Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions paddle/phi/kernels/impl/baddbmm_grad_kernel_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ void BaddbmmGradKernel(const Context& dev_ctx,
in_dims = {input.dims()[0], 1, input.dims()[1]};
input_grad->Resize(in_dims);
}
int total_elems = 0;
int64_t total_elems = 0;

VLOG(3) << "alpha: " << alpha << " beta: " << beta;

Expand Down Expand Up @@ -212,7 +212,7 @@ void BaddbmmGradKernel(const Context& dev_ctx,
total_elems = x.dims()[0] * x.dims()[1] * x.dims()[2];
// x_grad = out_grad * y'. x_grad: B x M x K, out_grad : B x M x N, y : B x
// K x N
for (int i = 0; i < x.dims()[0]; ++i) {
for (int64_t i = 0; i < x.dims()[0]; ++i) {
auto out_grad_slice = out_grad.Slice(i, i + 1);
auto y_slice = y.Slice(i, i + 1);
auto x_grad_slice = x_grad->Slice(i, i + 1);
Expand All @@ -238,7 +238,7 @@ void BaddbmmGradKernel(const Context& dev_ctx,
total_elems = y.dims()[0] * y.dims()[1] * y.dims()[2];
// y_grad = x' * out_grad. y_grad: B x K x N, out_grad : B x M x N, x : B x
// M x K
for (int i = 0; i < x.dims()[0]; ++i) {
for (int64_t i = 0; i < x.dims()[0]; ++i) {
auto out_grad_slice = out_grad.Slice(i, i + 1);
auto x_slice = x.Slice(i, i + 1);
auto y_grad_slice = y_grad->Slice(i, i + 1);
Expand Down
36 changes: 18 additions & 18 deletions paddle/phi/kernels/impl/matmul_kernel_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -520,8 +520,8 @@ void MatMulFunctionImplWithCublasLt(
using blaslt = phi::funcs::MatmulWithCublasLt<T>;

if (x_ndim == 1 && y_ndim == 1) {
const int M = X.numel();
const int N = Y.numel();
const int64_t M = X.numel();
const int64_t N = Y.numel();
PADDLE_ENFORCE_EQ(
M,
N,
Expand Down Expand Up @@ -550,7 +550,7 @@ void MatMulFunctionImplWithCublasLt(
}

if (x_ndim == 1) {
const int N = X.numel();
const int64_t N = X.numel();
if (trans_y) {
PADDLE_ENFORCE_EQ(
y_dims[y_ndim - 1],
Expand Down Expand Up @@ -633,7 +633,7 @@ void MatMulFunctionImplWithCublasLt(
}

if (y_ndim == 1) {
const int N = Y.numel();
const int64_t N = Y.numel();
if (trans_x) {
PADDLE_ENFORCE_EQ(
x_dims[x_ndim - 2],
Expand Down Expand Up @@ -1008,8 +1008,8 @@ bool inline MatMulInt8Function(const phi::GPUContext& dev_ctx,
return false;
}
#if CUDA_VERSION >= 11060
const int x_ndim = x_dims.size();
const int y_ndim = y_dims.size();
const int64_t x_ndim = x_dims.size();
const int64_t y_ndim = y_dims.size();
const int8_t* x_data = x.data<int8_t>();
const int8_t* y_data = y.data<int8_t>();
using blaslt = phi::funcs::MatmulWithCublasLt<int8_t, int32_t>;
Expand All @@ -1027,8 +1027,8 @@ bool inline MatMulInt8Function(const phi::GPUContext& dev_ctx,
/* no_exchange */ true);

if (x_ndim == 1 && y_ndim == 1) {
const int M = x.numel();
const int N = y.numel();
const int64_t M = x.numel();
const int64_t N = y.numel();
PADDLE_ENFORCE_EQ(
M,
N,
Expand Down Expand Up @@ -1057,7 +1057,7 @@ bool inline MatMulInt8Function(const phi::GPUContext& dev_ctx,
return true;
}
if (x_ndim == 1) {
const int N = x.numel();
const int64_t N = x.numel();
if (trans_y) {
PADDLE_ENFORCE_EQ(
y_dims[y_ndim - 1],
Expand All @@ -1083,7 +1083,7 @@ bool inline MatMulInt8Function(const phi::GPUContext& dev_ctx,
N,
y_ndim - 2,
y_dims[y_ndim - 2]));
const int M = y.numel() / N;
const int64_t M = y.numel() / N;
if (!(M == 1 || M % 4 == 0)) {
return false;
}
Expand All @@ -1098,7 +1098,7 @@ bool inline MatMulInt8Function(const phi::GPUContext& dev_ctx,
out->ResizeAndAllocate(common::make_ddim(out_dims));
dev_ctx.template Alloc<int32_t>(out);
if (trans_y) {
const int M = y.numel() / N;
const int64_t M = y.numel() / N;
blaslt::Run(dev_ctx,
y_data,
x_data,
Expand All @@ -1110,7 +1110,7 @@ bool inline MatMulInt8Function(const phi::GPUContext& dev_ctx,
false,
&matmul_planner);
} else {
const int M = y_dims[y_ndim - 1];
const int64_t M = y_dims[y_ndim - 1];
const int batch_size = y.numel() / (M * N);
if (batch_size == 1) {
blaslt::Run(dev_ctx,
Expand Down Expand Up @@ -1144,7 +1144,7 @@ bool inline MatMulInt8Function(const phi::GPUContext& dev_ctx,
}

if (y_ndim == 1) {
const int N = y.numel();
const int64_t N = y.numel();
if (trans_x) {
PADDLE_ENFORCE_EQ(
x_dims[x_ndim - 2],
Expand All @@ -1156,7 +1156,7 @@ bool inline MatMulInt8Function(const phi::GPUContext& dev_ctx,
N,
x_ndim - 2,
x_dims[x_ndim - 2]));
const int M = x.numel() / N;
const int64_t M = x.numel() / N;
if (!((M == 1 || M % 4 == 0))) {
return false;
}
Expand Down Expand Up @@ -1459,8 +1459,8 @@ bool inline MatMulInt8Function(const phi::GPUContext& dev_ctx,
const int8_t* y_data = y.data<int8_t>();

if (x_ndim == 1 && y_ndim == 1) {
const int M = x.numel();
const int N = y.numel();
const int64_t M = x.numel();
const int64_t N = y.numel();
PADDLE_ENFORCE_EQ(
M,
N,
Expand Down Expand Up @@ -1488,7 +1488,7 @@ bool inline MatMulInt8Function(const phi::GPUContext& dev_ctx,
}

if (x_ndim == 1) {
const int N = x.numel();
const int64_t N = x.numel();
if (trans_y) {
PADDLE_ENFORCE_EQ(
y_dims[y_ndim - 1],
Expand Down Expand Up @@ -1569,7 +1569,7 @@ bool inline MatMulInt8Function(const phi::GPUContext& dev_ctx,
}

if (y_ndim == 1) {
const int N = y.numel();
const int64_t N = y.numel();
if (trans_x) {
PADDLE_ENFORCE_EQ(
x_dims[x_ndim - 2],
Expand Down
4 changes: 2 additions & 2 deletions paddle/phi/kernels/impl/multi_dot_kernel_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ inline DenseTensor MatChainMul(const Context& dev_ctx,
template <typename Context, typename T>
std::vector<uint64_t> GetOrder(const std::vector<const DenseTensor*>& ins,
const std::vector<phi::DDim>& ins_dims) {
auto n = ins.size();
uint64_t n = ins.size();
// p: save the ins shape, the ins[i] shape is (p[i], p[i+1])
std::vector<uint64_t> p(n + 1);
for (uint64_t i = 0; i < n; i++) {
Expand Down Expand Up @@ -329,7 +329,7 @@ void MultiDotGradMatChainOrder(const Context& dev_ctx,
std::vector<DenseTensor*>* dx) {
auto order = GetOrder<Context, T>(ins, ins_dims);
auto n = ins.size();
std::vector<DenseTensor> results(n * n);
std::vector<DenseTensor> results(static_cast<int64_t>(n) * n);
MatChainMul<Context, T>(
dev_ctx, ins, ins_dims, order, 0, n - 1, true, &results);
MatChainMulGrad<Context, T>(
Expand Down