PaddlePaddle
diff --git a/‎paddle/fluid/operators/layer_norm_kernel.cu.h‎
Lines changed: 2 additions & 0 deletions b/‎paddle/fluid/operators/layer_norm_kernel.cu.h‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎paddle/fluid/operators/layer_norm_op.cu‎
Lines changed: 4 additions & 1 deletion b/‎paddle/fluid/operators/layer_norm_op.cu‎
Lines changed: 4 additions & 1 deletion
@@ -172,6 +172,7 @@ __inline__ __device__ half rsqrt_(const half val) {
 }
 #endif
 
+#ifdef PADDLE_WITH_CUDA
 template <typename T, typename U, typename ScaleT = U, int VecSize = 8,
  int WARPS_M = 4, int WARPS_N = 1, int BYTES_PER_LDG = 16,
  int ELTS_PER_ROW = 1024, int THREADS_PER_WARP = 32,
@@ -281,6 +282,7 @@ __global__ __launch_bounds__(THREADS_PER_CTA) void ln_fwd_1024_kernel(
  }
  }
 }
+#endif
 
 template <typename T, typename U, bool ScaleBiasWithSameTypeX>
 using LayerNormScaleBiasT =
 
@@ -112,11 +112,11 @@ class LayerNormKernel<platform::CUDADeviceContext, T>
  } \
  } while (0)
 
+#ifdef PADDLE_WITH_CUDA
  bool can_call_1024_kernel = false;
  if (feature_size == 1024 && scale != nullptr && bias != nullptr) {
  can_call_1024_kernel = true;
  }
-
  if (can_call_1024_kernel) {
  const int WARPS_M = 4;
  const int WARPS_N = 1;
@@ -145,12 +145,15 @@ class LayerNormKernel<platform::CUDADeviceContext, T>
  y_data);
  }
  } else {
+#endif
  if (is_scale_bias_same_dtype_with_x) {
  PADDLE_LAUNCH_LAYERNORM_FWD(T, true);
  } else {
  PADDLE_LAUNCH_LAYERNORM_FWD(U, false);
  }
+#ifdef PADDLE_WITH_CUDA
  }
+#endif
 
 #undef PADDLE_LAUNCH_LAYERNORM_FWD
  }
Original file line number	Diff line number	Diff line change
`@@ -172,6 +172,7 @@ __inline__ __device__ half rsqrt_(const half val) {`
`172`	`172`	`}`
`173`	`173`	`#endif`
`174`	`174`
	`175`	`+#ifdef PADDLE_WITH_CUDA`
`175`	`176`	`template <typename T, typename U, typename ScaleT = U, int VecSize = 8,`
`176`	`177`	`int WARPS_M = 4, int WARPS_N = 1, int BYTES_PER_LDG = 16,`
`177`	`178`	`int ELTS_PER_ROW = 1024, int THREADS_PER_WARP = 32,`
`@@ -281,6 +282,7 @@ __global__ __launch_bounds__(THREADS_PER_CTA) void ln_fwd_1024_kernel(`
`281`	`282`	`}`
`282`	`283`	`}`
`283`	`284`	`}`
	`285`	`+#endif`
`284`	`286`
`285`	`287`	`template <typename T, typename U, bool ScaleBiasWithSameTypeX>`
`286`	`288`	`using LayerNormScaleBiasT =`
Original file line number	Diff line number	Diff line change
`@@ -112,11 +112,11 @@ class LayerNormKernel<platform::CUDADeviceContext, T>`
`112`	`112`	`} \`
`113`	`113`	`} while (0)`
`114`	`114`
	`115`	`+#ifdef PADDLE_WITH_CUDA`
`115`	`116`	`bool can_call_1024_kernel = false;`
`116`	`117`	`if (feature_size == 1024 && scale != nullptr && bias != nullptr) {`
`117`	`118`	`can_call_1024_kernel = true;`
`118`	`119`	`}`
`119`		`-`
`120`	`120`	`if (can_call_1024_kernel) {`
`121`	`121`	`const int WARPS_M = 4;`
`122`	`122`	`const int WARPS_N = 1;`
`@@ -145,12 +145,15 @@ class LayerNormKernel<platform::CUDADeviceContext, T>`
`145`	`145`	`y_data);`
`146`	`146`	`}`
`147`	`147`	`} else {`
	`148`	`+#endif`
`148`	`149`	`if (is_scale_bias_same_dtype_with_x) {`
`149`	`150`	`PADDLE_LAUNCH_LAYERNORM_FWD(T, true);`
`150`	`151`	`} else {`
`151`	`152`	`PADDLE_LAUNCH_LAYERNORM_FWD(U, false);`
`152`	`153`	`}`
	`154`	`+#ifdef PADDLE_WITH_CUDA`
`153`	`155`	`}`
	`156`	`+#endif`
`154`	`157`
`155`	`158`	`#undef PADDLE_LAUNCH_LAYERNORM_FWD`
`156`	`159`	`}`