@@ -336,7 +336,7 @@ void AddReluKernel(gpuStream_t stream,
336336#endif
337337
338338template <typename DeviceContext, typename T>
339- void FCFunctor<DeviceContext, T>::operator ()(const DeviceContext& context ,
339+ void FCFunctor<DeviceContext, T>::operator ()(const DeviceContext& dev_ctx ,
340340 const int M,
341341 const int N,
342342 const int K,
@@ -350,7 +350,7 @@ void FCFunctor<DeviceContext, T>::operator()(const DeviceContext& context,
350350 false ,
351351 errors::PermissionDenied (
352352 " Weight padding in fc can not be used in GPU scope." ));
353- auto blas = phi::funcs::GetBlas<DeviceContext, T>(context );
353+ auto blas = phi::funcs::GetBlas<DeviceContext, T>(dev_ctx );
354354 blas.GEMM (CblasNoTrans,
355355 CblasNoTrans,
356356 M,
@@ -366,7 +366,7 @@ void FCFunctor<DeviceContext, T>::operator()(const DeviceContext& context,
366366 }
367367
368368 // M * N
369- AddReluKernel (context .stream (), M, N, Y, B, relu);
369+ AddReluKernel (dev_ctx .stream (), M, N, Y, B, relu);
370370}
371371
372372template class FCFunctor <GPUContext, float16>;
@@ -375,7 +375,7 @@ template class FCFunctor<GPUContext, double>;
375375
376376template <typename DeviceContext, typename T>
377377void FCInt8Functor<DeviceContext, T>::operator ()(
378- const DeviceContext& context ,
378+ const DeviceContext& dev_ctx ,
379379 const int M,
380380 const int N,
381381 const int K,
@@ -399,9 +399,9 @@ void FCInt8Functor<DeviceContext, T>::operator()(
399399 DenseTensor quant_x_tensor, quant_y_tensor;
400400 quant_x_tensor.Resize (common::make_ddim ({M, K}));
401401 quant_y_tensor.Resize (common::make_ddim ({M, N}));
402- context .template Alloc <int8_t >(&quant_x_tensor,
402+ dev_ctx .template Alloc <int8_t >(&quant_x_tensor,
403403 quant_x_tensor.numel () * sizeof (int8_t ));
404- context .template Alloc <int32_t >(&quant_y_tensor,
404+ dev_ctx .template Alloc <int32_t >(&quant_y_tensor,
405405 quant_y_tensor.numel () * sizeof (int32_t ));
406406 LaunchQuantKernelWithVecSize<T>(X,
407407 quant_x_tensor.data <int8_t >(),
@@ -411,14 +411,14 @@ void FCInt8Functor<DeviceContext, T>::operator()(
411411 quant_round_type,
412412 quant_max_bound,
413413 quant_min_bound,
414- context .stream ());
414+ dev_ctx .stream ());
415415
416416 MatmulKernel<int8_t , GPUContext>(
417- context , quant_x_tensor, *w_tensor, false , false , &quant_y_tensor);
417+ dev_ctx , quant_x_tensor, *w_tensor, false , false , &quant_y_tensor);
418418
419419 DenseTensor scale_weights_dev;
420420 scale_weights_dev.Resize (common::make_ddim ({N}));
421- context .template Alloc <float >(&scale_weights_dev,
421+ dev_ctx .template Alloc <float >(&scale_weights_dev,
422422 scale_weights_dev.numel () * sizeof (float ));
423423 float * scale_weights_dev_ptr = scale_weights_dev.data <float >();
424424#ifdef PADDLE_WITH_HIP
@@ -436,15 +436,15 @@ void FCInt8Functor<DeviceContext, T>::operator()(
436436 phi::backends::gpu::GpuLaunchConfig config;
437437 if (N % DequantKernelVecSize == 0 ) {
438438 config = phi::backends::gpu::GetGpuLaunchConfig1D (
439- context , M * N, DequantKernelVecSize);
439+ dev_ctx , M * N, DequantKernelVecSize);
440440 } else {
441- config = phi::backends::gpu::GetGpuLaunchConfig1D (context , M * N, 1 );
441+ config = phi::backends::gpu::GetGpuLaunchConfig1D (dev_ctx , M * N, 1 );
442442 }
443443 LaunchDequantKernelWithScaleOfInputAndWeight (quant_y_tensor.data <int32_t >(),
444444 Y,
445445 M,
446446 N,
447- context .stream (),
447+ dev_ctx .stream (),
448448 &config,
449449 scale_in,
450450 scale_weights_dev_ptr,
@@ -455,7 +455,7 @@ void FCInt8Functor<DeviceContext, T>::operator()(
455455 }
456456
457457 // M * N
458- AddReluKernel (context .stream (), M, N, Y, B, relu);
458+ AddReluKernel (dev_ctx .stream (), M, N, Y, B, relu);
459459}
460460
461461template class FCInt8Functor <GPUContext, float16>;
0 commit comments