Skip to content

Commit e554b1e

Browse files
authored
[BIG tensor] fix overflow in cudnn_softmax by replacing the api (#74329)
* [BIG tensor] fix overflow in cudnn_softmax by replacing the api * [BIG tensor] minor fix
1 parent 76bcf45 commit e554b1e

File tree

1 file changed

+9
-26
lines changed

1 file changed

+9
-26
lines changed

paddle/phi/kernels/gpu/cross_entropy_kernel.cu

Lines changed: 9 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -713,14 +713,16 @@ template <typename T>
713713
static void SoftmaxWithCrossEntropySoftLabel(const GPUContext& dev_ctx,
714714
const int rank,
715715
const int axis,
716-
const T* logits_data,
716+
const DenseTensor& logits,
717717
const T* labels_data,
718-
T* softmax_data,
718+
DenseTensor* softmax,
719719
T* loss_data,
720720
int N,
721721
int dim,
722722
int D) {
723723
constexpr int kMaxBlockDim = 512;
724+
auto* logits_data = logits.data<T>();
725+
auto* softmax_data = softmax->data<T>();
724726
int64_t block_dim = dim >= kMaxBlockDim
725727
? kMaxBlockDim
726728
: (1 << static_cast<int>(std::log2(dim)));
@@ -762,13 +764,7 @@ static void SoftmaxWithCrossEntropySoftLabel(const GPUContext& dev_ctx,
762764
GPUDNNDataLayout layout = GPUDNNDataLayout::kNCHW;
763765
#ifdef PADDLE_WITH_HIP
764766
miopenTensorDescriptor_t descp = desc.descriptor<T>(layout, tensor_dims);
765-
#else
766-
cudnnTensorDescriptor_t descp = desc.descriptor<T>(layout, tensor_dims);
767-
#endif
768-
769767
auto handle = dev_ctx.cudnn_handle();
770-
771-
#ifdef PADDLE_WITH_HIP
772768
auto mode = axis == rank - 1 ? MIOPEN_SOFTMAX_MODE_INSTANCE
773769
: MIOPEN_SOFTMAX_MODE_CHANNEL;
774770
PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSoftmaxForward_V2(
@@ -782,18 +778,8 @@ static void SoftmaxWithCrossEntropySoftLabel(const GPUContext& dev_ctx,
782778
MIOPEN_SOFTMAX_LOG,
783779
mode));
784780
#else
785-
auto mode = axis == rank - 1 ? CUDNN_SOFTMAX_MODE_INSTANCE
786-
: CUDNN_SOFTMAX_MODE_CHANNEL;
787-
PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSoftmaxForward(
788-
handle,
789-
CUDNN_SOFTMAX_LOG,
790-
mode,
791-
phi::backends::gpu::CudnnDataType<T>::kOne(),
792-
descp,
793-
logits_data,
794-
phi::backends::gpu::CudnnDataType<T>::kZero(),
795-
descp,
796-
softmax_data));
781+
SoftmaxForwardCUDAKernelDriver<T, true>(dev_ctx, logits, axis, softmax);
782+
softmax_data = softmax->data<T>();
797783
#endif
798784

799785
const int kDimLog2 = static_cast<int>(Log2Ceil(dim));
@@ -1170,7 +1156,7 @@ static void SoftmaxWithCrossEntropyHardLabel(const GPUContext& dev_ctx,
11701156
VLOG(7) << "rank=" << rank << ", axis = " << axis << ", N = " << N
11711157
<< ", dim = " << dim << ", D = " << D;
11721158
auto* logits_data = logits.data<T>();
1173-
auto* softmax_data = dev_ctx.template Alloc<T>(softmax);
1159+
auto* softmax_data = softmax->data<T>();
11741160
auto stream = dev_ctx.stream();
11751161
constexpr int max_dim = 320;
11761162
if (D == 1) {
@@ -1216,8 +1202,6 @@ static void SoftmaxWithCrossEntropyHardLabel(const GPUContext& dev_ctx,
12161202
MIOPEN_SOFTMAX_LOG,
12171203
mode));
12181204
#else
1219-
auto mode = axis == rank - 1 ? CUDNN_SOFTMAX_MODE_INSTANCE
1220-
: CUDNN_SOFTMAX_MODE_CHANNEL;
12211205
SoftmaxForwardCUDAKernelDriver<T, true>(dev_ctx, logits, axis, softmax);
12221206
softmax_data = softmax->data<T>();
12231207
#endif
@@ -1352,14 +1336,13 @@ void CrossEntropyWithSoftmaxCUDAKernel(const GPUContext& dev_ctx,
13521336
}
13531337

13541338
if (soft_label) {
1355-
auto* logits_data = logits.data<T>();
13561339
auto* labels_data = label.data<T>();
13571340
SoftmaxWithCrossEntropySoftLabel<T>(dev_ctx,
13581341
rank,
13591342
axis_v,
1360-
logits_data,
1343+
logits,
13611344
labels_data,
1362-
softmax_data,
1345+
softmax,
13631346
loss_data,
13641347
n,
13651348
axis_dim,

0 commit comments

Comments
 (0)