Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 16 additions & 16 deletions paddle/phi/kernels/funcs/batch_norm_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ namespace phi {
using Tensor = DenseTensor;

template <typename DeviceContext, typename T>
inline void ResizeToChannelFirst(const DeviceContext& context,
inline void ResizeToChannelFirst(const DeviceContext& dev_ctx,
const Tensor* input,
Tensor* transformed_input) {
int dim = input->dims().size() - 2;
Expand All @@ -37,7 +37,7 @@ inline void ResizeToChannelFirst(const DeviceContext& context,
in_dims_vec[3] = input->dims()[2];
in_dims_vec[4] = input->dims()[3];
transformed_input->Resize(common::make_ddim(in_dims_vec));
context.template Alloc<T>(transformed_input);
dev_ctx.template Alloc<T>(transformed_input);
} else if (dim == 2) {
// input
transformed_input->Resize(input->dims());
Expand All @@ -47,20 +47,20 @@ inline void ResizeToChannelFirst(const DeviceContext& context,
in_dims_vec[2] = input->dims()[1];
in_dims_vec[3] = input->dims()[2];
transformed_input->Resize(common::make_ddim(in_dims_vec));
context.template Alloc<T>(transformed_input);
dev_ctx.template Alloc<T>(transformed_input);
} else if (dim == 1) {
transformed_input->Resize(input->dims());

auto in_dims_vec = common::vectorize(input->dims());
in_dims_vec[1] = input->dims()[2];
in_dims_vec[2] = input->dims()[1];
transformed_input->Resize(common::make_ddim(in_dims_vec));
context.template Alloc<T>(transformed_input);
dev_ctx.template Alloc<T>(transformed_input);
}
}

template <typename DeviceContext, typename T>
inline void ResizeToChannelLast(const DeviceContext& context,
inline void ResizeToChannelLast(const DeviceContext& dev_ctx,
const Tensor* input,
Tensor* transformed_input) {
int dim = input->dims().size() - 2;
Expand All @@ -74,7 +74,7 @@ inline void ResizeToChannelLast(const DeviceContext& context,
in_dims_vec[3] = input->dims()[4];
in_dims_vec[4] = input->dims()[1];
transformed_input->Resize(common::make_ddim(in_dims_vec));
context.template Alloc<T>(transformed_input);
dev_ctx.template Alloc<T>(transformed_input);

} else if (dim == 2) {
// input
Expand All @@ -85,58 +85,58 @@ inline void ResizeToChannelLast(const DeviceContext& context,
in_dims_vec[2] = input->dims()[3];
in_dims_vec[3] = input->dims()[1];
transformed_input->Resize(common::make_ddim(in_dims_vec));
context.template Alloc<T>(transformed_input);
dev_ctx.template Alloc<T>(transformed_input);
} else if (dim == 1) {
transformed_input->Resize(input->dims());

auto in_dims_vec = common::vectorize(input->dims());
in_dims_vec[1] = input->dims()[2];
in_dims_vec[2] = input->dims()[1];
transformed_input->Resize(common::make_ddim(in_dims_vec));
context.template Alloc<T>(transformed_input);
dev_ctx.template Alloc<T>(transformed_input);
}
}

template <typename DeviceContext, typename T>
inline void TransToChannelFirst(const DeviceContext& context,
inline void TransToChannelFirst(const DeviceContext& dev_ctx,
const Tensor* input,
Tensor* transformed_input) {
VLOG(5) << "Why am I called?";
int dim = input->dims().size() - 2;
if (dim == 3) {
std::vector<int> axis{0, 4, 1, 2, 3};
phi::funcs::Transpose<DeviceContext, T, 5> trans5;
trans5(context, *input, transformed_input, axis);
trans5(dev_ctx, *input, transformed_input, axis);

} else if (dim == 2) {
std::vector<int> axis{0, 3, 1, 2};
phi::funcs::Transpose<DeviceContext, T, 4> trans4;
trans4(context, *input, transformed_input, axis);
trans4(dev_ctx, *input, transformed_input, axis);
} else if (dim == 1) {
std::vector<int> axis{0, 2, 1};
phi::funcs::Transpose<DeviceContext, T, 3> trans3;
trans3(context, *input, transformed_input, axis);
trans3(dev_ctx, *input, transformed_input, axis);
}
}

template <typename DeviceContext, typename T>
inline void TransToChannelLast(const DeviceContext& context,
inline void TransToChannelLast(const DeviceContext& dev_ctx,
const Tensor* input,
Tensor* transformed_input) {
int dim = input->dims().size() - 2;
if (dim == 3) {
std::vector<int> axis{0, 2, 3, 4, 1};
phi::funcs::Transpose<DeviceContext, T, 5> trans5;
trans5(context, *input, transformed_input, axis);
trans5(dev_ctx, *input, transformed_input, axis);

} else if (dim == 2) {
std::vector<int> axis{0, 2, 3, 1};
phi::funcs::Transpose<DeviceContext, T, 4> trans4;
trans4(context, *input, transformed_input, axis);
trans4(dev_ctx, *input, transformed_input, axis);
} else if (dim == 1) {
std::vector<int> axis{0, 2, 1};
phi::funcs::Transpose<DeviceContext, T, 3> trans3;
trans3(context, *input, transformed_input, axis);
trans3(dev_ctx, *input, transformed_input, axis);
}
}

Expand Down
24 changes: 12 additions & 12 deletions paddle/phi/kernels/funcs/detail/gru_cpu_kernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -285,10 +285,10 @@ void hl_avx_gru_forward_final_output(OpFinalOutput op_final_output,
}

template <typename T, typename Context>
inline void forward_reset_outputV2(const Context &context,
inline void forward_reset_outputV2(const Context &dev_ctx,
phi::funcs::GRUMetaValue<T> value,
int frame_size) {
auto &place = *context.eigen_device();
auto &place = *dev_ctx.eigen_device();
auto value_reset_gate =
typename EigenVector<T>::Type(value.gate_value, Array1(frame_size));
auto value_update_gate = typename EigenVector<T>::Type(
Expand All @@ -310,11 +310,11 @@ inline void forward_reset_output(OpResetOutput op_reset_output,
int batch_size,
ActivationType active_gate,
bool old_version = true,
const Context *context = nullptr) {
const Context *dev_ctx = nullptr) {
for (int b = 0; b < batch_size; b++) {
if (!old_version) {
// use eigen
forward_reset_outputV2(*context, value, frame_size);
forward_reset_outputV2(*dev_ctx, value, frame_size);
} else {
if (OpResetOutput::avx && (frame_size > static_cast<int>(8 - 1)) &&
(sizeof(T) == 4)) {
Expand Down Expand Up @@ -346,10 +346,10 @@ inline void forward_reset_output(OpResetOutput op_reset_output,
}

template <typename T, typename Context>
inline void forward_final_outputV2(const Context &context,
inline void forward_final_outputV2(const Context &dev_ctx,
phi::funcs::GRUMetaValue<T> value,
int frame_size) {
auto &place = *context.eigen_device();
auto &place = *dev_ctx.eigen_device();
auto value_update_gate = typename EigenVector<T>::Type(
value.gate_value + frame_size, Array1(frame_size));
auto value_frame_state = typename EigenVector<T>::Type(
Expand All @@ -375,11 +375,11 @@ inline void forward_final_output(OpFinalOutput op_final_output,
ActivationType active_node,
bool origin_mode,
bool old_version = true,
const Context *context = nullptr) {
const Context *dev_ctx = nullptr) {
for (int b = 0; b < batch_size; b++) {
if (!old_version) {
// eigen
forward_final_outputV2(*context, value, frame_size);
forward_final_outputV2(*dev_ctx, value, frame_size);
} else {
if (OpFinalOutput::avx && (frame_size > static_cast<int>(8 - 1)) &&
(sizeof(T) == 4)) {
Expand Down Expand Up @@ -866,11 +866,11 @@ inline void backward_reset_grad(OpResetGrad op_reset_grad,
}

template <typename T, typename Context>
inline void gru_backward(const Context &context,
inline void gru_backward(const Context &dev_ctx,
phi::funcs::GRUMetaValue<T> value,
phi::funcs::GRUMetaGrad<T> grad,
int frame_size) {
auto &place = *context.eigen_device();
auto &place = *dev_ctx.eigen_device();

auto value_reset_gate =
typename EigenVector<T>::Type(value.gate_value, Array1(frame_size));
Expand Down Expand Up @@ -931,7 +931,7 @@ inline void gru_backward(const Context &context,
}

template <class OpGruGrad, typename T, typename Context>
inline void cpu_gru_backward(const Context &context,
inline void cpu_gru_backward(const Context &dev_ctx,
OpGruGrad op_gru_grad UNUSED,
phi::funcs::GRUMetaValue<T> value,
phi::funcs::GRUMetaGrad<T> grad,
Expand All @@ -941,7 +941,7 @@ inline void cpu_gru_backward(const Context &context,
ActivationType active_gate UNUSED) {
for (int b = 0; b < batch_size; ++b) {
// eigen
gru_backward(context, value, grad, frame_size);
gru_backward(dev_ctx, value, grad, frame_size);

value.gate_value += frame_size * 3;
value.reset_output_value += frame_size;
Expand Down
16 changes: 8 additions & 8 deletions paddle/phi/kernels/funcs/detail/lstm_cpu_kernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -411,7 +411,7 @@ void avx_lstm_backward_one_sequence(Op op,
}

template <class T, class Context>
void eigen_lstm_forward_one_sequence(const Context &context,
void eigen_lstm_forward_one_sequence(const Context &dev_ctx,
phi::funcs::LstmMetaValue<T> value,
int frame_size) {
auto eigen_value_ig =
Expand All @@ -429,7 +429,7 @@ void eigen_lstm_forward_one_sequence(const Context &context,
auto eigen_output =
typename EigenVector<T>::Type(value.output_value, Array1(frame_size));

auto &place = *context.eigen_device();
auto &place = *dev_ctx.eigen_device();
TanhFunctor<T>()(place, eigen_value_in, eigen_value_in);
SigmoidFunctor<T>()(place, eigen_value_ig, eigen_value_ig);
SigmoidFunctor<T>()(place, eigen_value_fg, eigen_value_fg);
Expand All @@ -447,7 +447,7 @@ void eigen_lstm_forward_one_sequence(const Context &context,
}

template <class T, class Context>
void eigen_lstm_backward_one_sequence(const Context &context,
void eigen_lstm_backward_one_sequence(const Context &dev_ctx,
phi::funcs::LstmMetaValue<T> value,
phi::funcs::LstmMetaGrad<T> grad,
int frame_size) {
Expand Down Expand Up @@ -475,7 +475,7 @@ void eigen_lstm_backward_one_sequence(const Context &context,
auto eigen_grad_state =
typename EigenVector<T>::Type(grad.state_grad, Array1(frame_size));

auto &place = *context.eigen_device();
auto &place = *dev_ctx.eigen_device();
SigmoidGradFunctor<T>()(place,
1 /*useless*/,
eigen_value_og,
Expand Down Expand Up @@ -514,7 +514,7 @@ void eigen_lstm_backward_one_sequence(const Context &context,
}

template <class T, class Op, class Context>
void cpu_lstm_forward(const Context &context,
void cpu_lstm_forward(const Context &dev_ctx,
Op op,
phi::funcs::LstmMetaValue<T> value,
int frame_size,
Expand All @@ -524,7 +524,7 @@ void cpu_lstm_forward(const Context &context,
ActivationType active_state,
bool old_api_version) {
if (!old_api_version) {
eigen_lstm_forward_one_sequence<T>(context, value, frame_size);
eigen_lstm_forward_one_sequence<T>(dev_ctx, value, frame_size);
} else {
if (Op::avx && !(frame_size & (8 - 1)) && (std::is_same<T, float>::value)) {
avx_lstm_forward_one_sequence<T>(op,
Expand All @@ -549,7 +549,7 @@ void cpu_lstm_forward(const Context &context,
}

template <class T, class Op, class Context>
void cpu_lstm_backward(const Context &context,
void cpu_lstm_backward(const Context &dev_ctx,
Op op,
phi::funcs::LstmMetaValue<T> value,
phi::funcs::LstmMetaGrad<T> grad,
Expand All @@ -560,7 +560,7 @@ void cpu_lstm_backward(const Context &context,
ActivationType active_state,
bool old_api_version) {
if (!old_api_version) {
eigen_lstm_backward_one_sequence<T>(context, value, grad, frame_size);
eigen_lstm_backward_one_sequence<T>(dev_ctx, value, grad, frame_size);
} else {
if (Op::avx && !(frame_size & (8 - 1)) && (std::is_same<T, float>::value)) {
avx_lstm_backward_one_sequence<T>(op,
Expand Down
8 changes: 4 additions & 4 deletions paddle/phi/kernels/funcs/detail/lstm_gpu_kernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ __global__ void KeLstmBackward(Op op,
}

template <class T, class Op>
void gpu_lstm_forward(const phi::DeviceContext& context,
void gpu_lstm_forward(const phi::DeviceContext& dev_ctx,
Op op,
phi::funcs::LstmMetaValue<T> value,
int frame_size,
Expand All @@ -240,7 +240,7 @@ void gpu_lstm_forward(const phi::DeviceContext& context,
grid = dim3((frame_size + 32 - 1) / 32, (batch_size + 16 - 1) / 16);
}

auto stream = reinterpret_cast<const phi::GPUContext&>(context).stream();
auto stream = reinterpret_cast<const phi::GPUContext&>(dev_ctx).stream();
if (batch_size == 1) {
KeLstmForward<T,
Op,
Expand Down Expand Up @@ -269,7 +269,7 @@ void gpu_lstm_forward(const phi::DeviceContext& context,
}

template <class T, class Op>
void gpu_lstm_backward(const phi::DeviceContext& context,
void gpu_lstm_backward(const phi::DeviceContext& dev_ctx,
Op op,
phi::funcs::LstmMetaValue<T> value,
phi::funcs::LstmMetaGrad<T> grad,
Expand All @@ -292,7 +292,7 @@ void gpu_lstm_backward(const phi::DeviceContext& context,
grid = dim3((frame_size + 32 - 1) / 32, (batch_size + 16 - 1) / 16);
}

auto stream = reinterpret_cast<const phi::GPUContext&>(context).stream();
auto stream = reinterpret_cast<const phi::GPUContext&>(dev_ctx).stream();
if (batch_size == 1) {
KeLstmBackward<T,
Op,
Expand Down
8 changes: 4 additions & 4 deletions paddle/phi/kernels/funcs/diagonal.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ struct DiagonalFunctor {
};

template <typename T, typename DeviceContext>
DenseTensor Diagonal(const DeviceContext& context,
DenseTensor Diagonal(const DeviceContext& dev_ctx,
const DenseTensor* input,
int64_t offset,
int64_t dim1,
Expand Down Expand Up @@ -105,7 +105,7 @@ DenseTensor Diagonal(const DeviceContext& context,
DDim diag_dims = common::make_ddim(ret_dims);
auto dig_stride = common::stride(diag_dims);
diag.Resize(diag_dims);
auto diag_data = context.template Alloc<T>(&diag);
auto diag_data = dev_ctx.template Alloc<T>(&diag);

int64_t pos = std::abs(offset) * offset_stride;
int64_t dim_size = ret_strides.size();
Expand All @@ -119,8 +119,8 @@ DenseTensor Diagonal(const DeviceContext& context,
const auto* ret_arr = ret_strides.data();
#endif

// auto& dev_ctx = context.template device_context<DeviceContext>();
phi::funcs::ForRange<DeviceContext> for_range(context, diag.numel());
// auto& dev_ctx2 = dev_ctx.template device_context<DeviceContext>();
phi::funcs::ForRange<DeviceContext> for_range(dev_ctx, diag.numel());
DiagonalFunctor<T> functor(
input_data, diag_arr, ret_arr, pos, dim_size, diag_data);
for_range(functor);
Expand Down
Loading