Skip to content

Commit 4f7ef98

Browse files
authored
Rename context to dev_ctx in paddle/phi/kernels/funcs/ [fluid_ops] (#73823)
1 parent 5ab73cc commit 4f7ef98

17 files changed

+190
-187
lines changed

paddle/phi/kernels/funcs/concat_and_split_functor.cu

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ limitations under the License. */
2424
namespace phi {
2525
namespace funcs {
2626

27-
static inline void GetBlockDims(const phi::GPUContext& context,
27+
static inline void GetBlockDims(const phi::GPUContext& dev_ctx,
2828
int64_t num_rows,
2929
int64_t num_cols,
3030
dim3* block_dims,
@@ -39,7 +39,7 @@ static inline void GetBlockDims(const phi::GPUContext& context,
3939
*block_dims = dim3(block_cols, block_rows, 1);
4040

4141
constexpr int waves = 1;
42-
int max_threads = context.GetMaxPhysicalThreadCount() * waves;
42+
int max_threads = dev_ctx.GetMaxPhysicalThreadCount() * waves;
4343
int64_t max_blocks = std::max(max_threads / kThreadsPerBlock, 1);
4444

4545
int grid_cols =
@@ -605,14 +605,14 @@ void ConcatFunctorWithIndexType(const phi::GPUContext& dev_ctx,
605605

606606
template <typename T>
607607
struct ConcatFunctor<phi::GPUContext, T> {
608-
void operator()(const phi::GPUContext& context,
608+
void operator()(const phi::GPUContext& dev_ctx,
609609
const std::vector<phi::DenseTensor>& input,
610610
int axis,
611611
phi::DenseTensor* output) {
612612
if (output->numel() < std::numeric_limits<int32_t>::max()) {
613-
ConcatFunctorWithIndexType<T, int32_t>(context, input, axis, output);
613+
ConcatFunctorWithIndexType<T, int32_t>(dev_ctx, input, axis, output);
614614
} else {
615-
ConcatFunctorWithIndexType<T, int64_t>(context, input, axis, output);
615+
ConcatFunctorWithIndexType<T, int64_t>(dev_ctx, input, axis, output);
616616
}
617617
}
618618
};
@@ -805,7 +805,7 @@ void SplitFunctorDispatchWithIndexType(
805805
template <typename T>
806806
class SplitFunctor<phi::GPUContext, T> {
807807
public:
808-
void operator()(const phi::GPUContext& context,
808+
void operator()(const phi::GPUContext& dev_ctx,
809809
const phi::DenseTensor& input,
810810
const std::vector<const phi::DenseTensor*>& ref_inputs,
811811
int axis,
@@ -819,10 +819,10 @@ class SplitFunctor<phi::GPUContext, T> {
819819

820820
if (numel < std::numeric_limits<int32_t>::max()) {
821821
SplitFunctorDispatchWithIndexType<T, int32_t>(
822-
context, axis, input, ref_inputs, outputs);
822+
dev_ctx, axis, input, ref_inputs, outputs);
823823
} else {
824824
SplitFunctorDispatchWithIndexType<T, int64_t>(
825-
context, axis, input, ref_inputs, outputs);
825+
dev_ctx, axis, input, ref_inputs, outputs);
826826
}
827827
}
828828
};

paddle/phi/kernels/funcs/elementwise/elementwise_op_broadcast.cu.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ namespace funcs {
2121

2222
template <typename OutT, typename Functor, int NumOuts = 1>
2323
void LaunchElementwiseCudaKernel(
24-
const KPDevice &ctx,
24+
const KPDevice &dev_ctx,
2525
const std::vector<const phi::DenseTensor *> &ins,
2626
std::vector<phi::DenseTensor *> *outs,
2727
Functor func,
@@ -50,7 +50,7 @@ void LaunchElementwiseCudaKernel(
5050
pt_outputs.push_back(pt_outputs_tmp[i].get());
5151
}
5252
phi::funcs::BroadcastKernel<OutT, Functor, NumOuts>(
53-
ctx, pt_inputs, &pt_outputs, func, axis);
53+
dev_ctx, pt_inputs, &pt_outputs, func, axis);
5454
}
5555

5656
} // namespace funcs

paddle/phi/kernels/funcs/elementwise/elementwise_op_impl.cu.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ namespace funcs {
2424

2525
template <typename OutT, typename Functor, int NumOuts = 1>
2626
void LaunchSameDimsElementwiseCudaKernel(
27-
const KPDevice &ctx,
27+
const KPDevice &dev_ctx,
2828
const std::vector<const phi::DenseTensor *> &ins,
2929
std::vector<phi::DenseTensor *> *outs,
3030
Functor func) {
@@ -52,7 +52,7 @@ void LaunchSameDimsElementwiseCudaKernel(
5252
pt_outputs.push_back(pt_outputs_tmp[i].get());
5353
}
5454
phi::funcs::ElementwiseKernel<OutT, Functor, NumOuts>(
55-
ctx, pt_inputs, &pt_outputs, func);
55+
dev_ctx, pt_inputs, &pt_outputs, func);
5656
}
5757

5858
} // namespace funcs

paddle/phi/kernels/funcs/fc_functor.cc

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ namespace phi {
2222
namespace funcs {
2323

2424
template <typename DeviceContext, typename T>
25-
void FCFunctor<DeviceContext, T>::operator()(const DeviceContext& context,
25+
void FCFunctor<DeviceContext, T>::operator()(const DeviceContext& dev_ctx,
2626
const int M,
2727
const int N,
2828
const int K,
@@ -32,18 +32,18 @@ void FCFunctor<DeviceContext, T>::operator()(const DeviceContext& context,
3232
const T* B,
3333
bool relu,
3434
bool padding_weights) {
35-
auto blas = GetBlas<DeviceContext, T>(context);
35+
auto blas = GetBlas<DeviceContext, T>(dev_ctx);
3636
phi::DenseTensor Y1;
3737
T* Y1_data = nullptr;
3838
if (padding_weights) {
3939
const int NN = N + 4;
4040
const int KK = K + 4;
4141
phi::DenseTensor X1;
4242
X1.Resize({M * KK});
43-
T* X1_data = context.template HostAlloc<T>(&X1);
43+
T* X1_data = dev_ctx.template HostAlloc<T>(&X1);
4444

4545
Y1.Resize({M * (N + 4)});
46-
Y1_data = context.template HostAlloc<T>(&Y1);
46+
Y1_data = dev_ctx.template HostAlloc<T>(&Y1);
4747
#ifdef PADDLE_WITH_MKLML
4848
#pragma omp parallel for
4949
#endif

paddle/phi/kernels/funcs/fc_functor.cu

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -336,7 +336,7 @@ void AddReluKernel(gpuStream_t stream,
336336
#endif
337337

338338
template <typename DeviceContext, typename T>
339-
void FCFunctor<DeviceContext, T>::operator()(const DeviceContext& context,
339+
void FCFunctor<DeviceContext, T>::operator()(const DeviceContext& dev_ctx,
340340
const int M,
341341
const int N,
342342
const int K,
@@ -350,7 +350,7 @@ void FCFunctor<DeviceContext, T>::operator()(const DeviceContext& context,
350350
false,
351351
errors::PermissionDenied(
352352
"Weight padding in fc can not be used in GPU scope."));
353-
auto blas = phi::funcs::GetBlas<DeviceContext, T>(context);
353+
auto blas = phi::funcs::GetBlas<DeviceContext, T>(dev_ctx);
354354
blas.GEMM(CblasNoTrans,
355355
CblasNoTrans,
356356
M,
@@ -366,7 +366,7 @@ void FCFunctor<DeviceContext, T>::operator()(const DeviceContext& context,
366366
}
367367

368368
// M * N
369-
AddReluKernel(context.stream(), M, N, Y, B, relu);
369+
AddReluKernel(dev_ctx.stream(), M, N, Y, B, relu);
370370
}
371371

372372
template class FCFunctor<GPUContext, float16>;
@@ -375,7 +375,7 @@ template class FCFunctor<GPUContext, double>;
375375

376376
template <typename DeviceContext, typename T>
377377
void FCInt8Functor<DeviceContext, T>::operator()(
378-
const DeviceContext& context,
378+
const DeviceContext& dev_ctx,
379379
const int M,
380380
const int N,
381381
const int K,
@@ -399,9 +399,9 @@ void FCInt8Functor<DeviceContext, T>::operator()(
399399
DenseTensor quant_x_tensor, quant_y_tensor;
400400
quant_x_tensor.Resize(common::make_ddim({M, K}));
401401
quant_y_tensor.Resize(common::make_ddim({M, N}));
402-
context.template Alloc<int8_t>(&quant_x_tensor,
402+
dev_ctx.template Alloc<int8_t>(&quant_x_tensor,
403403
quant_x_tensor.numel() * sizeof(int8_t));
404-
context.template Alloc<int32_t>(&quant_y_tensor,
404+
dev_ctx.template Alloc<int32_t>(&quant_y_tensor,
405405
quant_y_tensor.numel() * sizeof(int32_t));
406406
LaunchQuantKernelWithVecSize<T>(X,
407407
quant_x_tensor.data<int8_t>(),
@@ -411,14 +411,14 @@ void FCInt8Functor<DeviceContext, T>::operator()(
411411
quant_round_type,
412412
quant_max_bound,
413413
quant_min_bound,
414-
context.stream());
414+
dev_ctx.stream());
415415

416416
MatmulKernel<int8_t, GPUContext>(
417-
context, quant_x_tensor, *w_tensor, false, false, &quant_y_tensor);
417+
dev_ctx, quant_x_tensor, *w_tensor, false, false, &quant_y_tensor);
418418

419419
DenseTensor scale_weights_dev;
420420
scale_weights_dev.Resize(common::make_ddim({N}));
421-
context.template Alloc<float>(&scale_weights_dev,
421+
dev_ctx.template Alloc<float>(&scale_weights_dev,
422422
scale_weights_dev.numel() * sizeof(float));
423423
float* scale_weights_dev_ptr = scale_weights_dev.data<float>();
424424
#ifdef PADDLE_WITH_HIP
@@ -436,15 +436,15 @@ void FCInt8Functor<DeviceContext, T>::operator()(
436436
phi::backends::gpu::GpuLaunchConfig config;
437437
if (N % DequantKernelVecSize == 0) {
438438
config = phi::backends::gpu::GetGpuLaunchConfig1D(
439-
context, M * N, DequantKernelVecSize);
439+
dev_ctx, M * N, DequantKernelVecSize);
440440
} else {
441-
config = phi::backends::gpu::GetGpuLaunchConfig1D(context, M * N, 1);
441+
config = phi::backends::gpu::GetGpuLaunchConfig1D(dev_ctx, M * N, 1);
442442
}
443443
LaunchDequantKernelWithScaleOfInputAndWeight(quant_y_tensor.data<int32_t>(),
444444
Y,
445445
M,
446446
N,
447-
context.stream(),
447+
dev_ctx.stream(),
448448
&config,
449449
scale_in,
450450
scale_weights_dev_ptr,
@@ -455,7 +455,7 @@ void FCInt8Functor<DeviceContext, T>::operator()(
455455
}
456456

457457
// M * N
458-
AddReluKernel(context.stream(), M, N, Y, B, relu);
458+
AddReluKernel(dev_ctx.stream(), M, N, Y, B, relu);
459459
}
460460

461461
template class FCInt8Functor<GPUContext, float16>;

paddle/phi/kernels/funcs/fft_fill_conj.h

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ struct FFTFillConjFunctor {
138138
};
139139

140140
template <typename DeviceContext, typename C>
141-
void FFTFillConj(const DeviceContext& ctx,
141+
void FFTFillConj(const DeviceContext& dev_ctx,
142142
const DenseTensor* src,
143143
DenseTensor* dst,
144144
const std::vector<int64_t>& axes) {
@@ -160,49 +160,49 @@ void FFTFillConj(const DeviceContext& ctx,
160160
#if defined(__NVCC__) || defined(__HIPCC__)
161161
DenseTensor src_strides_g;
162162
src_strides_g.Resize({(int64_t)src_strides_v.size()});
163-
int64_t* src_strides = ctx.template Alloc<int64_t>(&src_strides_g);
163+
int64_t* src_strides = dev_ctx.template Alloc<int64_t>(&src_strides_g);
164164
DenseTensor dst_strides_g;
165165
dst_strides_g.Resize({(int64_t)dst_strides_v.size()});
166-
int64_t* dst_strides = ctx.template Alloc<int64_t>(&dst_strides_g);
166+
int64_t* dst_strides = dev_ctx.template Alloc<int64_t>(&dst_strides_g);
167167
DenseTensor dst_shape_g;
168168
dst_shape_g.Resize({(int64_t)dst_shape_v.size()});
169-
int64_t* dst_shape = ctx.template Alloc<int64_t>(&dst_shape_g);
169+
int64_t* dst_shape = dev_ctx.template Alloc<int64_t>(&dst_shape_g);
170170
DenseTensor is_fft_axis_g;
171171
is_fft_axis_g.Resize({rank});
172-
bool* p_is_fft_axis = ctx.template Alloc<bool>(&is_fft_axis_g);
172+
bool* p_is_fft_axis = dev_ctx.template Alloc<bool>(&is_fft_axis_g);
173173
auto cplace = phi::CPUPlace();
174-
const auto gplace = ctx.GetPlace();
174+
const auto gplace = dev_ctx.GetPlace();
175175
memory_utils::Copy(gplace,
176176
src_strides,
177177
cplace,
178178
src_strides_v.data(),
179179
sizeof(int64_t) * src_strides_v.size(),
180-
ctx.stream());
180+
dev_ctx.stream());
181181
memory_utils::Copy(gplace,
182182
dst_strides,
183183
cplace,
184184
dst_strides_v.data(),
185185
sizeof(int64_t) * dst_strides_v.size(),
186-
ctx.stream());
186+
dev_ctx.stream());
187187
memory_utils::Copy(gplace,
188188
dst_shape,
189189
cplace,
190190
dst_shape_v.data(),
191191
sizeof(int64_t) * dst_shape_v.size(),
192-
ctx.stream());
192+
dev_ctx.stream());
193193
memory_utils::Copy(gplace,
194194
p_is_fft_axis,
195195
cplace,
196196
_is_fft_axis.get(),
197197
sizeof(bool) * rank,
198-
ctx.stream());
198+
dev_ctx.stream());
199199
#else
200200
const auto src_strides = src_strides_v.data();
201201
const auto dst_strides = dst_strides_v.data();
202202
const auto dst_shape = dst_shape_v.data();
203203
const auto p_is_fft_axis = _is_fft_axis.get();
204204
#endif
205-
ForRange<DeviceContext> for_range(ctx, dst->numel());
205+
ForRange<DeviceContext> for_range(dev_ctx, dst->numel());
206206
FFTFillConjFunctor<C> fill_conj_functor(src_data,
207207
dst_data,
208208
src_strides,

paddle/phi/kernels/funcs/fft_fill_conj_xpu.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ int FFTFillConjGrad(int N,
4444
namespace phi {
4545
namespace funcs {
4646
template <typename DeviceContext, typename C>
47-
void FFTFillConj(const DeviceContext& ctx,
47+
void FFTFillConj(const DeviceContext& dev_ctx,
4848
DenseTensor* src,
4949
DenseTensor* dst,
5050
const std::vector<int64_t>& axes) {
@@ -63,7 +63,7 @@ void FFTFillConj(const DeviceContext& ctx,
6363
_is_fft_axis[i] = true;
6464
}
6565

66-
xpu::ctx_guard RAII_GUARD(ctx.x_context());
66+
xpu::ctx_guard RAII_GUARD(dev_ctx.x_context());
6767
int64_t* src_strides_ptr =
6868
RAII_GUARD.alloc_l3_or_gm<int64_t>(src_strides_v.size());
6969
PADDLE_ENFORCE_NOT_NULL(src_strides_ptr,
@@ -111,7 +111,7 @@ void FFTFillConj(const DeviceContext& ctx,
111111
}
112112

113113
template <typename DeviceContext, typename C>
114-
void FFTFillConjGrad(const DeviceContext& ctx,
114+
void FFTFillConjGrad(const DeviceContext& dev_ctx,
115115
const DenseTensor& out_grad,
116116
const std::vector<int64_t>& axes,
117117
DenseTensor* x_grad) {

0 commit comments

Comments
 (0)