@@ -713,14 +713,16 @@ template <typename T>
713713static void SoftmaxWithCrossEntropySoftLabel (const GPUContext& dev_ctx,
714714 const int rank,
715715 const int axis,
716- const T* logits_data ,
716+ const DenseTensor& logits ,
717717 const T* labels_data,
718- T* softmax_data ,
718+ DenseTensor* softmax ,
719719 T* loss_data,
720720 int N,
721721 int dim,
722722 int D) {
723723 constexpr int kMaxBlockDim = 512 ;
724+ auto * logits_data = logits.data <T>();
725+ auto * softmax_data = softmax->data <T>();
724726 int64_t block_dim = dim >= kMaxBlockDim
725727 ? kMaxBlockDim
726728 : (1 << static_cast <int >(std::log2 (dim)));
@@ -762,13 +764,7 @@ static void SoftmaxWithCrossEntropySoftLabel(const GPUContext& dev_ctx,
762764 GPUDNNDataLayout layout = GPUDNNDataLayout::kNCHW ;
763765#ifdef PADDLE_WITH_HIP
764766 miopenTensorDescriptor_t descp = desc.descriptor <T>(layout, tensor_dims);
765- #else
766- cudnnTensorDescriptor_t descp = desc.descriptor <T>(layout, tensor_dims);
767- #endif
768-
769767 auto handle = dev_ctx.cudnn_handle ();
770-
771- #ifdef PADDLE_WITH_HIP
772768 auto mode = axis == rank - 1 ? MIOPEN_SOFTMAX_MODE_INSTANCE
773769 : MIOPEN_SOFTMAX_MODE_CHANNEL;
774770 PADDLE_ENFORCE_GPU_SUCCESS (phi::dynload::miopenSoftmaxForward_V2 (
@@ -782,18 +778,8 @@ static void SoftmaxWithCrossEntropySoftLabel(const GPUContext& dev_ctx,
782778 MIOPEN_SOFTMAX_LOG,
783779 mode));
784780#else
785- auto mode = axis == rank - 1 ? CUDNN_SOFTMAX_MODE_INSTANCE
786- : CUDNN_SOFTMAX_MODE_CHANNEL;
787- PADDLE_ENFORCE_GPU_SUCCESS (phi::dynload::cudnnSoftmaxForward (
788- handle,
789- CUDNN_SOFTMAX_LOG,
790- mode,
791- phi::backends::gpu::CudnnDataType<T>::kOne (),
792- descp,
793- logits_data,
794- phi::backends::gpu::CudnnDataType<T>::kZero (),
795- descp,
796- softmax_data));
781+ SoftmaxForwardCUDAKernelDriver<T, true >(dev_ctx, logits, axis, softmax);
782+ softmax_data = softmax->data <T>();
797783#endif
798784
799785 const int kDimLog2 = static_cast <int >(Log2Ceil (dim));
@@ -1170,7 +1156,7 @@ static void SoftmaxWithCrossEntropyHardLabel(const GPUContext& dev_ctx,
11701156 VLOG (7 ) << " rank=" << rank << " , axis = " << axis << " , N = " << N
11711157 << " , dim = " << dim << " , D = " << D;
11721158 auto * logits_data = logits.data <T>();
1173- auto * softmax_data = dev_ctx. template Alloc <T>(softmax );
1159+ auto * softmax_data = softmax-> data <T>();
11741160 auto stream = dev_ctx.stream ();
11751161 constexpr int max_dim = 320 ;
11761162 if (D == 1 ) {
@@ -1216,8 +1202,6 @@ static void SoftmaxWithCrossEntropyHardLabel(const GPUContext& dev_ctx,
12161202 MIOPEN_SOFTMAX_LOG,
12171203 mode));
12181204#else
1219- auto mode = axis == rank - 1 ? CUDNN_SOFTMAX_MODE_INSTANCE
1220- : CUDNN_SOFTMAX_MODE_CHANNEL;
12211205 SoftmaxForwardCUDAKernelDriver<T, true >(dev_ctx, logits, axis, softmax);
12221206 softmax_data = softmax->data <T>();
12231207#endif
@@ -1352,14 +1336,13 @@ void CrossEntropyWithSoftmaxCUDAKernel(const GPUContext& dev_ctx,
13521336 }
13531337
13541338 if (soft_label) {
1355- auto * logits_data = logits.data <T>();
13561339 auto * labels_data = label.data <T>();
13571340 SoftmaxWithCrossEntropySoftLabel<T>(dev_ctx,
13581341 rank,
13591342 axis_v,
1360- logits_data ,
1343+ logits ,
13611344 labels_data,
1362- softmax_data ,
1345+ softmax ,
13631346 loss_data,
13641347 n,
13651348 axis_dim,
0 commit comments