Skip to content

Commit da3ed67

Browse files
committed
Limited optimization to paddle_with_cuda.
1 parent 561be44 commit da3ed67

File tree

2 files changed

+6
-1
lines changed

2 files changed

+6
-1
lines changed

paddle/fluid/operators/layer_norm_kernel.cu.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,7 @@ __inline__ __device__ half rsqrt_(const half val) {
172172
}
173173
#endif
174174

175+
#ifdef PADDLE_WITH_CUDA
175176
template <typename T, typename U, typename ScaleT = U, int VecSize = 8,
176177
int WARPS_M = 4, int WARPS_N = 1, int BYTES_PER_LDG = 16,
177178
int ELTS_PER_ROW = 1024, int THREADS_PER_WARP = 32,
@@ -281,6 +282,7 @@ __global__ __launch_bounds__(THREADS_PER_CTA) void ln_fwd_1024_kernel(
281282
}
282283
}
283284
}
285+
#endif
284286

285287
template <typename T, typename U, bool ScaleBiasWithSameTypeX>
286288
using LayerNormScaleBiasT =

paddle/fluid/operators/layer_norm_op.cu

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,11 +112,11 @@ class LayerNormKernel<platform::CUDADeviceContext, T>
112112
} \
113113
} while (0)
114114

115+
#ifdef PADDLE_WITH_CUDA
115116
bool can_call_1024_kernel = false;
116117
if (feature_size == 1024 && scale != nullptr && bias != nullptr) {
117118
can_call_1024_kernel = true;
118119
}
119-
120120
if (can_call_1024_kernel) {
121121
const int WARPS_M = 4;
122122
const int WARPS_N = 1;
@@ -145,12 +145,15 @@ class LayerNormKernel<platform::CUDADeviceContext, T>
145145
y_data);
146146
}
147147
} else {
148+
#endif
148149
if (is_scale_bias_same_dtype_with_x) {
149150
PADDLE_LAUNCH_LAYERNORM_FWD(T, true);
150151
} else {
151152
PADDLE_LAUNCH_LAYERNORM_FWD(U, false);
152153
}
154+
#ifdef PADDLE_WITH_CUDA
153155
}
156+
#endif
154157

155158
#undef PADDLE_LAUNCH_LAYERNORM_FWD
156159
}

0 commit comments

Comments
 (0)