Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions paddle/phi/kernels/gpu/cum_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -114,8 +114,9 @@ struct LogAddExp {
template <typename T>
__host__ __device__ __forceinline__ T operator()(const T& a,
const T& b) const {
return std::log(1 + std::exp(std::min(a, b) - std::max(a, b))) +
std::max(a, b);
T min_val = std::min(a, b);
T max_val = std::max(a, b);
return std::log1p(std::exp(min_val - max_val)) + max_val;
}
};

Expand Down Expand Up @@ -325,7 +326,7 @@ void ScanKernel(const Context& dev_ctx,
for (size_t i = axis + 1; i < out_dims.size(); i++) {
width *= out_dims[i];
}
int scan_size = out_dims[axis];
int64_t scan_size = out_dims[axis];
bool transpose = (axis != out_dims.size() - 1);

DenseTensor tmp_tensor;
Expand All @@ -341,7 +342,7 @@ void ScanKernel(const Context& dev_ctx,
int64_t max_grid_x = dev_ctx.GetCUDAMaxGridDimSize()[0];

// Do pre-process transpose
int tile_size = 32;
int64_t tile_size = 32;
dim3 blocks(32, 8);
int64_t transpose_grids = ((width + tile_size - 1) / tile_size) *
((height + tile_size - 1) / tile_size);
Expand Down
Loading