@@ -436,15 +436,15 @@ __global__ void VectorizedBroadcastKernel(
436436
437437template <typename OutT, typename Functor, int Arity, int NumOuts, int VecSize>
438438void LaunchBroadcastKernel (
439- const KPDevice &ctx ,
439+ const KPDevice &dev_ctx ,
440440 const BroadcastTypeClassifier<OutT, Functor, Arity, NumOuts> &classifier,
441441 Functor func) {
442442#ifdef PADDLE_WITH_XPU_KP
443443 int numel = classifier.numel ;
444444 const int threads = 64 ;
445445 const int blocks = 8 ;
446446 int read_lens = configs[0 ].buf_len ;
447- auto stream = ctx .x_context ()->xpu_stream ;
447+ auto stream = dev_ctx .x_context ()->xpu_stream ;
448448 int main_offset = (numel / (read_lens * threads)) * read_lens * threads;
449449 int tail_tid = numel % (read_lens * threads);
450450
@@ -461,8 +461,8 @@ void LaunchBroadcastKernel(
461461#else
462462 const int &numel = classifier.numel ;
463463 auto gpu_config =
464- phi::backends::gpu::GetGpuLaunchConfig1D (ctx , numel, VecSize);
465- auto stream = ctx .stream ();
464+ phi::backends::gpu::GetGpuLaunchConfig1D (dev_ctx , numel, VecSize);
465+ auto stream = dev_ctx .stream ();
466466 auto threads = gpu_config.GetBlockSize ();
467467 auto blocks = gpu_config.block_per_grid ;
468468 int main_offset = (numel / (VecSize * threads)) * VecSize * threads;
@@ -513,20 +513,20 @@ void LaunchBroadcastKernel(
513513
514514template <typename OutT, typename Functor, int Arity, int NumOuts = 1 >
515515typename std::enable_if<!NeedVectorized<OutT>::value, void >::type
516- BroadcastKernelForDifferentVecSize (const KPDevice &ctx ,
516+ BroadcastKernelForDifferentVecSize (const KPDevice &dev_ctx ,
517517 const std::vector<const DenseTensor *> &ins,
518518 std::vector<DenseTensor *> *outs,
519519 int axis,
520520 Functor func) {
521521 auto classifier =
522522 BroadcastTypeClassifier<OutT, Functor, Arity, NumOuts>(ins, outs, axis);
523523 LaunchBroadcastKernel<OutT, Functor, Arity, NumOuts, VecSizeS>(
524- ctx , classifier, func);
524+ dev_ctx , classifier, func);
525525}
526526
527527template <typename OutT, typename Functor, int Arity, int NumOuts = 1 >
528528typename std::enable_if<NeedVectorized<OutT>::value, void >::type
529- BroadcastKernelForDifferentVecSize (const KPDevice &ctx ,
529+ BroadcastKernelForDifferentVecSize (const KPDevice &dev_ctx ,
530530 const std::vector<const DenseTensor *> &ins,
531531 std::vector<DenseTensor *> *outs,
532532 int axis,
@@ -545,17 +545,17 @@ BroadcastKernelForDifferentVecSize(const KPDevice &ctx,
545545 switch (vec_size) {
546546 case VecSizeL: {
547547 LaunchBroadcastKernel<OutT, Functor, Arity, NumOuts, VecSizeL>(
548- ctx , classifier, func);
548+ dev_ctx , classifier, func);
549549 break ;
550550 }
551551 case VecSizeM: {
552552 LaunchBroadcastKernel<OutT, Functor, Arity, NumOuts, VecSizeM>(
553- ctx , classifier, func);
553+ dev_ctx , classifier, func);
554554 break ;
555555 }
556556 case VecSizeS: {
557557 LaunchBroadcastKernel<OutT, Functor, Arity, NumOuts, VecSizeS>(
558- ctx , classifier, func);
558+ dev_ctx , classifier, func);
559559 break ;
560560 }
561561 default : {
@@ -591,7 +591,7 @@ static void SliceTensor(DenseTensor *x,
591591}
592592
593593template <typename OutT, typename Functor, int kArity , int NumOuts = 1 >
594- void BroadcastKernelSplit (const KPDevice &ctx ,
594+ void BroadcastKernelSplit (const KPDevice &dev_ctx ,
595595 const std::vector<const DenseTensor *> &ins,
596596 std::vector<DenseTensor *> *outs,
597597 int axis,
@@ -728,12 +728,12 @@ void BroadcastKernelSplit(const KPDevice &ctx,
728728 }
729729
730730 BroadcastKernelForDifferentVecSize<OutT, Functor, kArity , NumOuts>(
731- ctx , new_ins, &new_outs, axis, func);
731+ dev_ctx , new_ins, &new_outs, axis, func);
732732 }
733733}
734734
735735template <typename OutT, typename Functor, int kArity , int NumOuts = 1 >
736- void BroadcastKernelApply (const KPDevice &ctx ,
736+ void BroadcastKernelApply (const KPDevice &dev_ctx ,
737737 const std::vector<const DenseTensor *> &ins,
738738 std::vector<DenseTensor *> *outs,
739739 int axis,
@@ -748,16 +748,16 @@ void BroadcastKernelApply(const KPDevice &ctx,
748748 }
749749 if (use_int64_index_kernel) { // use_int64_index_kernel
750750 BroadcastKernelSplit<OutT, Functor, kArity , NumOuts>(
751- ctx , ins, outs, axis, func, compute_size);
751+ dev_ctx , ins, outs, axis, func, compute_size);
752752 return ;
753753 }
754754#endif
755755 BroadcastKernelForDifferentVecSize<OutT, Functor, kArity , NumOuts>(
756- ctx , ins, outs, axis, func);
756+ dev_ctx , ins, outs, axis, func);
757757}
758758
759759template <typename OutT, typename Functor, int NumOuts = 1 >
760- void BroadcastKernel (const KPDevice &ctx ,
760+ void BroadcastKernel (const KPDevice &dev_ctx ,
761761 const std::vector<const DenseTensor *> &ins,
762762 std::vector<DenseTensor *> *outs,
763763 Functor func,
@@ -805,7 +805,7 @@ void BroadcastKernel(const KPDevice &ctx,
805805 " %d-th output tensor`s shape is not." ,
806806 i));
807807 }
808- ctx .template Alloc <OutT>((*outs)[i]);
808+ dev_ctx .template Alloc <OutT>((*outs)[i]);
809809 }
810810 if ((*outs)[0 ]->numel () == 0 ) {
811811 return ;
@@ -823,7 +823,7 @@ void BroadcastKernel(const KPDevice &ctx,
823823 }
824824 axis = axis == -1 ? max_rank - min_rank : axis;
825825 BroadcastKernelApply<OutT, Functor, kArity , NumOuts>(
826- ctx , ins, outs, axis, func);
826+ dev_ctx , ins, outs, axis, func);
827827}
828828
829829template <typename Functor, typename T, typename OutType = T>
0 commit comments