@@ -107,17 +107,17 @@ __global__ void ScatterAssignGPUKernel(tensor_t* self_data,
107107 int  dim,
108108 const  index_t * index_data,
109109 tensor_t * src_data,
110-  int  select_dim_size,
111-  int  self_select_dim_size,
112-  int  src_select_dim_size,
110+  int64_t  select_dim_size,
111+  int64_t  self_select_dim_size,
112+  int64_t  src_select_dim_size,
113113 int64_t  outer_dim_size,
114114 int64_t  outer_dim_size_self,
115115 int64_t  outer_dim_size_src,
116116 int64_t  numel,
117117 int64_t  numel_data,
118118 const  func_t & reduce_op,
119119 int * thread_ids) {
120-  int64_t  tid = threadIdx .x  + blockIdx .x  * blockDim .x ;
120+  int64_t  tid = threadIdx .x  + static_cast < int64_t >( blockIdx .x )  * blockDim .x ;
121121 if  (tid >= numel) return ;
122122 int64_t  i, j, k; //  The i, j, k here is the index of the 3 layers loop
123123 //  squeezed from the N layers loop.
@@ -199,9 +199,9 @@ __global__ void GatherScatterGPUKernel(tensor_t* self_data,
199199 int  dim,
200200 const  index_t * index_data,
201201 tensor_t * src_data,
202-  int  select_dim_size,
203-  int  self_select_dim_size,
204-  int  src_select_dim_size,
202+  int64_t  select_dim_size,
203+  int64_t  self_select_dim_size,
204+  int64_t  src_select_dim_size,
205205 int64_t  outer_dim_size,
206206 int64_t  outer_dim_size_self,
207207 int64_t  outer_dim_size_src,
@@ -305,9 +305,9 @@ __global__ void ScatterMeanGPUKernel(tensor_t* self_data,
305305 int  dim,
306306 const  index_t * index_data,
307307 tensor_t * src_data,
308-  int  select_dim_size,
309-  int  self_select_dim_size,
310-  int  src_select_dim_size,
308+  int64_t  select_dim_size,
309+  int64_t  self_select_dim_size,
310+  int64_t  src_select_dim_size,
311311 int64_t  outer_dim_size,
312312 int64_t  outer_dim_size_self,
313313 int64_t  outer_dim_size_src,
@@ -316,7 +316,7 @@ __global__ void ScatterMeanGPUKernel(tensor_t* self_data,
316316 bool  include_self,
317317 const  func_t & reduce_op,
318318 int * shared_mem) {
319-  int64_t  tid = threadIdx .x  + blockIdx .x  * blockDim .x ;
319+  int64_t  tid = threadIdx .x  + static_cast < int64_t >( blockIdx .x )  * blockDim .x ;
320320 if  (tid >= numel) return ;
321321
322322 int64_t  i, j, k; //  The i, j, k here is the index of the 3 layers loop
@@ -425,10 +425,10 @@ struct gpu_gather_scatter_functor {
425425 auto  index_dims = index.dims ();
426426 auto  src_dims = src.dims ();
427427 if  (self_size == 0  || src_size == 0  || index_size == 0 ) return ;
428-  int  select_dim_size = index_dims[dim];
428+  int64_t  select_dim_size = index_dims[dim];
429429 //  index matrix has different shape with self matrix or src matrix.
430-  int  self_select_dim_size = self_dims[dim];
431-  int  src_select_dim_size = src_dims[dim];
430+  int64_t  self_select_dim_size = self_dims[dim];
431+  int64_t  src_select_dim_size = src_dims[dim];
432432 int64_t  outer_dim_size_self = 1 ;
433433 int64_t  outer_dim_size_src = 1 ;
434434 int64_t  inner_dim_size = 1 ;
0 commit comments