PaddlePaddle
diff --git a/‎paddle/phi/kernels/funcs/concat_and_split_functor.cu‎
Lines changed: 8 additions & 8 deletions b/‎paddle/phi/kernels/funcs/concat_and_split_functor.cu‎
Lines changed: 8 additions & 8 deletions
diff --git a/‎paddle/phi/kernels/funcs/elementwise/elementwise_op_broadcast.cu.h‎
Lines changed: 2 additions & 2 deletions b/‎paddle/phi/kernels/funcs/elementwise/elementwise_op_broadcast.cu.h‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎paddle/phi/kernels/funcs/elementwise/elementwise_op_impl.cu.h‎
Lines changed: 2 additions & 2 deletions b/‎paddle/phi/kernels/funcs/elementwise/elementwise_op_impl.cu.h‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎paddle/phi/kernels/funcs/fc_functor.cc‎
Lines changed: 4 additions & 4 deletions b/‎paddle/phi/kernels/funcs/fc_functor.cc‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎paddle/phi/kernels/funcs/fc_functor.cu‎
Lines changed: 13 additions & 13 deletions b/‎paddle/phi/kernels/funcs/fc_functor.cu‎
Lines changed: 13 additions & 13 deletions
diff --git a/‎paddle/phi/kernels/funcs/fft_fill_conj.h‎
Lines changed: 11 additions & 11 deletions b/‎paddle/phi/kernels/funcs/fft_fill_conj.h‎
Lines changed: 11 additions & 11 deletions
diff --git a/‎paddle/phi/kernels/funcs/fft_fill_conj_xpu.h‎
Lines changed: 3 additions & 3 deletions b/‎paddle/phi/kernels/funcs/fft_fill_conj_xpu.h‎
Lines changed: 3 additions & 3 deletions
@@ -24,7 +24,7 @@ limitations under the License. */
 namespace phi {
 namespace funcs {
 
-static inline void GetBlockDims(const phi::GPUContext& context,
+static inline void GetBlockDims(const phi::GPUContext& dev_ctx,
  int64_t num_rows,
  int64_t num_cols,
  dim3* block_dims,
@@ -39,7 +39,7 @@ static inline void GetBlockDims(const phi::GPUContext& context,
  *block_dims = dim3(block_cols, block_rows, 1);
 
  constexpr int waves = 1;
- int max_threads = context.GetMaxPhysicalThreadCount() * waves;
+ int max_threads = dev_ctx.GetMaxPhysicalThreadCount() * waves;
  int64_t max_blocks = std::max(max_threads / kThreadsPerBlock, 1);
 
  int grid_cols =
@@ -605,14 +605,14 @@ void ConcatFunctorWithIndexType(const phi::GPUContext& dev_ctx,
 
 template <typename T>
 struct ConcatFunctor<phi::GPUContext, T> {
- void operator()(const phi::GPUContext& context,
+ void operator()(const phi::GPUContext& dev_ctx,
  const std::vector<phi::DenseTensor>& input,
  int axis,
  phi::DenseTensor* output) {
  if (output->numel() < std::numeric_limits<int32_t>::max()) {
- ConcatFunctorWithIndexType<T, int32_t>(context, input, axis, output);
+ ConcatFunctorWithIndexType<T, int32_t>(dev_ctx, input, axis, output);
  } else {
- ConcatFunctorWithIndexType<T, int64_t>(context, input, axis, output);
+ ConcatFunctorWithIndexType<T, int64_t>(dev_ctx, input, axis, output);
  }
  }
 };
@@ -805,7 +805,7 @@ void SplitFunctorDispatchWithIndexType(
 template <typename T>
 class SplitFunctor<phi::GPUContext, T> {
  public:
- void operator()(const phi::GPUContext& context,
+ void operator()(const phi::GPUContext& dev_ctx,
  const phi::DenseTensor& input,
  const std::vector<const phi::DenseTensor*>& ref_inputs,
  int axis,
@@ -819,10 +819,10 @@ class SplitFunctor<phi::GPUContext, T> {
 
  if (numel < std::numeric_limits<int32_t>::max()) {
  SplitFunctorDispatchWithIndexType<T, int32_t>(
- context, axis, input, ref_inputs, outputs);
+ dev_ctx, axis, input, ref_inputs, outputs);
  } else {
  SplitFunctorDispatchWithIndexType<T, int64_t>(
- context, axis, input, ref_inputs, outputs);
+ dev_ctx, axis, input, ref_inputs, outputs);
  }
  }
 };
 
@@ -21,7 +21,7 @@ namespace funcs {
 
 template <typename OutT, typename Functor, int NumOuts = 1>
 void LaunchElementwiseCudaKernel(
- const KPDevice &ctx,
+ const KPDevice &dev_ctx,
  const std::vector<const phi::DenseTensor *> &ins,
  std::vector<phi::DenseTensor *> *outs,
  Functor func,
@@ -50,7 +50,7 @@ void LaunchElementwiseCudaKernel(
  pt_outputs.push_back(pt_outputs_tmp[i].get());
  }
  phi::funcs::BroadcastKernel<OutT, Functor, NumOuts>(
- ctx, pt_inputs, &pt_outputs, func, axis);
+ dev_ctx, pt_inputs, &pt_outputs, func, axis);
 }
 
 } // namespace funcs
 
@@ -24,7 +24,7 @@ namespace funcs {
 
 template <typename OutT, typename Functor, int NumOuts = 1>
 void LaunchSameDimsElementwiseCudaKernel(
- const KPDevice &ctx,
+ const KPDevice &dev_ctx,
  const std::vector<const phi::DenseTensor *> &ins,
  std::vector<phi::DenseTensor *> *outs,
  Functor func) {
@@ -52,7 +52,7 @@ void LaunchSameDimsElementwiseCudaKernel(
  pt_outputs.push_back(pt_outputs_tmp[i].get());
  }
  phi::funcs::ElementwiseKernel<OutT, Functor, NumOuts>(
- ctx, pt_inputs, &pt_outputs, func);
+ dev_ctx, pt_inputs, &pt_outputs, func);
 }
 
 } // namespace funcs
 
@@ -22,7 +22,7 @@ namespace phi {
 namespace funcs {
 
 template <typename DeviceContext, typename T>
-void FCFunctor<DeviceContext, T>::operator()(const DeviceContext& context,
+void FCFunctor<DeviceContext, T>::operator()(const DeviceContext& dev_ctx,
  const int M,
  const int N,
  const int K,
@@ -32,18 +32,18 @@ void FCFunctor<DeviceContext, T>::operator()(const DeviceContext& context,
  const T* B,
  bool relu,
  bool padding_weights) {
- auto blas = GetBlas<DeviceContext, T>(context);
+ auto blas = GetBlas<DeviceContext, T>(dev_ctx);
  phi::DenseTensor Y1;
  T* Y1_data = nullptr;
  if (padding_weights) {
  const int NN = N + 4;
  const int KK = K + 4;
  phi::DenseTensor X1;
  X1.Resize({M * KK});
- T* X1_data = context.template HostAlloc<T>(&X1);
+ T* X1_data = dev_ctx.template HostAlloc<T>(&X1);
 
  Y1.Resize({M * (N + 4)});
- Y1_data = context.template HostAlloc<T>(&Y1);
+ Y1_data = dev_ctx.template HostAlloc<T>(&Y1);
 #ifdef PADDLE_WITH_MKLML
 #pragma omp parallel for
 #endif
 
@@ -336,7 +336,7 @@ void AddReluKernel(gpuStream_t stream,
 #endif
 
 template <typename DeviceContext, typename T>
-void FCFunctor<DeviceContext, T>::operator()(const DeviceContext& context,
+void FCFunctor<DeviceContext, T>::operator()(const DeviceContext& dev_ctx,
  const int M,
  const int N,
  const int K,
@@ -350,7 +350,7 @@ void FCFunctor<DeviceContext, T>::operator()(const DeviceContext& context,
  false,
  errors::PermissionDenied(
  "Weight padding in fc can not be used in GPU scope."));
- auto blas = phi::funcs::GetBlas<DeviceContext, T>(context);
+ auto blas = phi::funcs::GetBlas<DeviceContext, T>(dev_ctx);
  blas.GEMM(CblasNoTrans,
  CblasNoTrans,
  M,
@@ -366,7 +366,7 @@ void FCFunctor<DeviceContext, T>::operator()(const DeviceContext& context,
  }
 
  // M * N
- AddReluKernel(context.stream(), M, N, Y, B, relu);
+ AddReluKernel(dev_ctx.stream(), M, N, Y, B, relu);
 }
 
 template class FCFunctor<GPUContext, float16>;
@@ -375,7 +375,7 @@ template class FCFunctor<GPUContext, double>;
 
 template <typename DeviceContext, typename T>
 void FCInt8Functor<DeviceContext, T>::operator()(
- const DeviceContext& context,
+ const DeviceContext& dev_ctx,
  const int M,
  const int N,
  const int K,
@@ -399,9 +399,9 @@ void FCInt8Functor<DeviceContext, T>::operator()(
  DenseTensor quant_x_tensor, quant_y_tensor;
  quant_x_tensor.Resize(common::make_ddim({M, K}));
  quant_y_tensor.Resize(common::make_ddim({M, N}));
- context.template Alloc<int8_t>(&quant_x_tensor,
+ dev_ctx.template Alloc<int8_t>(&quant_x_tensor,
  quant_x_tensor.numel() * sizeof(int8_t));
- context.template Alloc<int32_t>(&quant_y_tensor,
+ dev_ctx.template Alloc<int32_t>(&quant_y_tensor,
  quant_y_tensor.numel() * sizeof(int32_t));
  LaunchQuantKernelWithVecSize<T>(X,
  quant_x_tensor.data<int8_t>(),
@@ -411,14 +411,14 @@ void FCInt8Functor<DeviceContext, T>::operator()(
  quant_round_type,
  quant_max_bound,
  quant_min_bound,
- context.stream());
+ dev_ctx.stream());
 
  MatmulKernel<int8_t, GPUContext>(
- context, quant_x_tensor, *w_tensor, false, false, &quant_y_tensor);
+ dev_ctx, quant_x_tensor, *w_tensor, false, false, &quant_y_tensor);
 
  DenseTensor scale_weights_dev;
  scale_weights_dev.Resize(common::make_ddim({N}));
- context.template Alloc<float>(&scale_weights_dev,
+ dev_ctx.template Alloc<float>(&scale_weights_dev,
  scale_weights_dev.numel() * sizeof(float));
  float* scale_weights_dev_ptr = scale_weights_dev.data<float>();
 #ifdef PADDLE_WITH_HIP
@@ -436,15 +436,15 @@ void FCInt8Functor<DeviceContext, T>::operator()(
  phi::backends::gpu::GpuLaunchConfig config;
  if (N % DequantKernelVecSize == 0) {
  config = phi::backends::gpu::GetGpuLaunchConfig1D(
- context, M * N, DequantKernelVecSize);
+ dev_ctx, M * N, DequantKernelVecSize);
  } else {
- config = phi::backends::gpu::GetGpuLaunchConfig1D(context, M * N, 1);
+ config = phi::backends::gpu::GetGpuLaunchConfig1D(dev_ctx, M * N, 1);
  }
  LaunchDequantKernelWithScaleOfInputAndWeight(quant_y_tensor.data<int32_t>(),
  Y,
  M,
  N,
- context.stream(),
+ dev_ctx.stream(),
  &config,
  scale_in,
  scale_weights_dev_ptr,
@@ -455,7 +455,7 @@ void FCInt8Functor<DeviceContext, T>::operator()(
  }
 
  // M * N
- AddReluKernel(context.stream(), M, N, Y, B, relu);
+ AddReluKernel(dev_ctx.stream(), M, N, Y, B, relu);
 }
 
 template class FCInt8Functor<GPUContext, float16>;
 
@@ -138,7 +138,7 @@ struct FFTFillConjFunctor {
 };
 
 template <typename DeviceContext, typename C>
-void FFTFillConj(const DeviceContext& ctx,
+void FFTFillConj(const DeviceContext& dev_ctx,
  const DenseTensor* src,
  DenseTensor* dst,
  const std::vector<int64_t>& axes) {
@@ -160,49 +160,49 @@ void FFTFillConj(const DeviceContext& ctx,
 #if defined(__NVCC__) || defined(__HIPCC__)
  DenseTensor src_strides_g;
  src_strides_g.Resize({(int64_t)src_strides_v.size()});
- int64_t* src_strides = ctx.template Alloc<int64_t>(&src_strides_g);
+ int64_t* src_strides = dev_ctx.template Alloc<int64_t>(&src_strides_g);
  DenseTensor dst_strides_g;
  dst_strides_g.Resize({(int64_t)dst_strides_v.size()});
- int64_t* dst_strides = ctx.template Alloc<int64_t>(&dst_strides_g);
+ int64_t* dst_strides = dev_ctx.template Alloc<int64_t>(&dst_strides_g);
  DenseTensor dst_shape_g;
  dst_shape_g.Resize({(int64_t)dst_shape_v.size()});
- int64_t* dst_shape = ctx.template Alloc<int64_t>(&dst_shape_g);
+ int64_t* dst_shape = dev_ctx.template Alloc<int64_t>(&dst_shape_g);
  DenseTensor is_fft_axis_g;
  is_fft_axis_g.Resize({rank});
- bool* p_is_fft_axis = ctx.template Alloc<bool>(&is_fft_axis_g);
+ bool* p_is_fft_axis = dev_ctx.template Alloc<bool>(&is_fft_axis_g);
  auto cplace = phi::CPUPlace();
- const auto gplace = ctx.GetPlace();
+ const auto gplace = dev_ctx.GetPlace();
  memory_utils::Copy(gplace,
  src_strides,
  cplace,
  src_strides_v.data(),
  sizeof(int64_t) * src_strides_v.size(),
- ctx.stream());
+ dev_ctx.stream());
  memory_utils::Copy(gplace,
  dst_strides,
  cplace,
  dst_strides_v.data(),
  sizeof(int64_t) * dst_strides_v.size(),
- ctx.stream());
+ dev_ctx.stream());
  memory_utils::Copy(gplace,
  dst_shape,
  cplace,
  dst_shape_v.data(),
  sizeof(int64_t) * dst_shape_v.size(),
- ctx.stream());
+ dev_ctx.stream());
  memory_utils::Copy(gplace,
  p_is_fft_axis,
  cplace,
  _is_fft_axis.get(),
  sizeof(bool) * rank,
- ctx.stream());
+ dev_ctx.stream());
 #else
  const auto src_strides = src_strides_v.data();
  const auto dst_strides = dst_strides_v.data();
  const auto dst_shape = dst_shape_v.data();
  const auto p_is_fft_axis = _is_fft_axis.get();
 #endif
- ForRange<DeviceContext> for_range(ctx, dst->numel());
+ ForRange<DeviceContext> for_range(dev_ctx, dst->numel());
  FFTFillConjFunctor<C> fill_conj_functor(src_data,
  dst_data,
  src_strides,
 
@@ -44,7 +44,7 @@ int FFTFillConjGrad(int N,
 namespace phi {
 namespace funcs {
 template <typename DeviceContext, typename C>
-void FFTFillConj(const DeviceContext& ctx,
+void FFTFillConj(const DeviceContext& dev_ctx,
  DenseTensor* src,
  DenseTensor* dst,
  const std::vector<int64_t>& axes) {
@@ -63,7 +63,7 @@ void FFTFillConj(const DeviceContext& ctx,
  _is_fft_axis[i] = true;
  }
 
- xpu::ctx_guard RAII_GUARD(ctx.x_context());
+ xpu::ctx_guard RAII_GUARD(dev_ctx.x_context());
  int64_t* src_strides_ptr =
  RAII_GUARD.alloc_l3_or_gm<int64_t>(src_strides_v.size());
  PADDLE_ENFORCE_NOT_NULL(src_strides_ptr,
@@ -111,7 +111,7 @@ void FFTFillConj(const DeviceContext& ctx,
 }
 
 template <typename DeviceContext, typename C>
-void FFTFillConjGrad(const DeviceContext& ctx,
+void FFTFillConjGrad(const DeviceContext& dev_ctx,
  const DenseTensor& out_grad,
  const std::vector<int64_t>& axes,
  DenseTensor* x_grad) {