PaddlePaddle · kangguangli · Dec 25, 2023 · Dec 11, 2023 · Dec 11, 2023 · Dec 13, 2023
diff --git a/paddle/fluid/pir/dialect/op_generator/ops_api_gen.py b/paddle/fluid/pir/dialect/op_generator/ops_api_gen.py
@@ -115,6 +115,7 @@
  'c_identity',
  'c_reduce_sum',
  'c_reducescatter',
+ 'c_softmax_with_cross_entropy',
  'decayed_adagrad',
  'dpsgd',
  'embedding_grad_sparse',

diff --git a/paddle/fluid/pir/dialect/operator/ir/ops.yaml b/paddle/fluid/pir/dialect/operator/ir/ops.yaml
@@ -1399,6 +1399,16 @@
  output : Tensor(out)
  invoke : full_like(x, 0, dtype, place)
 
+- op: c_softmax_with_cross_entropy
+ args: (Tensor logits, Tensor label, int64_t ignore_index=-100, int ring_id=0, int rank=0, int nranks=0)
+ output: Tensor(softmax), Tensor(loss)
+ infer_meta:
+ func : CSoftmaxWithCrossEntropyInferMeta
+ kernel:
+ func: c_softmax_with_cross_entropy
+ data_type : logits
+ backward: c_softmax_with_cross_entropy_grad
+
 - op: dpsgd
  args: (Tensor param, Tensor grad, Tensor learning_rate, float clip = 10.0f, float batch_size = 16.0f, float sigma = 1.0f, int seed = 0)
  output: Tensor(param_out)

diff --git a/paddle/fluid/pir/dialect/operator/ir/ops_backward.yaml b/paddle/fluid/pir/dialect/operator/ir/ops_backward.yaml
@@ -119,6 +119,16 @@
  func : c_embedding_grad
  no_need_buffer : weight
 
+- backward_op : c_softmax_with_cross_entropy_grad
+ forward: c_softmax_with_cross_entropy (Tensor logits, Tensor label, int64_t ignore_index=-100, int ring_id=0, int rank=0, int nranks=0) -> Tensor(softmax), Tensor(loss)
+ args: (Tensor softmax, Tensor label, Tensor loss_grad,int64_t ignore_index=-100, int ring_id=0, int rank=0, int nranks=0)
+ output: Tensor(logits_grad)
+ infer_meta :
+ func: CSoftmaxWithCrossEntropyGradInferMeta
+ kernel:
+ func: c_softmax_with_cross_entropy_grad
+ data_type: loss_grad
+
 - backward_op : cast_grad
  forward : cast (Tensor x, DataType dtype) -> Tensor(out)
  args : (Tensor x, Tensor out_grad)

diff --git a/paddle/fluid/pir/dialect/operator/utils/utils.cc b/paddle/fluid/pir/dialect/operator/utils/utils.cc
@@ -49,6 +49,8 @@ const std::unordered_set<std::string> LegacyOpList = {
  CReduceSum_Op::name(),
  CAllreduceMax_Op::name(),
  CAllgatherOp::name(),
+ CSoftmaxWithCrossEntropyOp::name(),
+ CSoftmaxWithCrossEntropyGradOp::name(),
  SeedOp::name(),
  ShareDataOp::name(),
  SparseMomentumOp::name(),

diff --git a/paddle/phi/api/yaml/op_compat.yaml b/paddle/phi/api/yaml/op_compat.yaml
@@ -452,6 +452,13 @@
  outputs :
  out : Out
 
+- op : c_softmax_with_cross_entropy
+ backward : c_softmax_with_cross_entropy_grad
+ inputs :
+ {logits : Logits, label : Label}
+ outputs :
+ {softmax : Softmax, loss : Loss}
+
 - op : cast
  inputs :
  x : X

diff --git a/paddle/phi/infermeta/backward.cc b/paddle/phi/infermeta/backward.cc
@@ -201,7 +201,17 @@ void CropGradInferMeta(const MetaTensor& out_grad,
  x_grad->set_dtype(x.dtype());
  }
 }
-
+void CSoftmaxWithCrossEntropyGradInferMeta(const MetaTensor& softmax,
+ const MetaTensor& label,
+ const MetaTensor& loss_grad,
+ int64_t ignore_index,
+ int ring_id,
+ int rank,
+ int nranks,
+ MetaTensor* logits_grad,
+ MetaConfig config) {
+ logits_grad->set_dims(softmax.dims());
+}
 void FlashAttnGradInferMeta(const MetaTensor& q,
  const MetaTensor& k,
  const MetaTensor& v,

diff --git a/paddle/phi/infermeta/backward.h b/paddle/phi/infermeta/backward.h
@@ -123,6 +123,16 @@ void CrossEntropyWithSoftmaxGradInferMeta(const MetaTensor& label,
  MetaTensor* logits_grad,
  MetaConfig config = MetaConfig());
 
+void CSoftmaxWithCrossEntropyGradInferMeta(const MetaTensor& softmax,
+ const MetaTensor& label,
+ const MetaTensor& loss_grad,
+ int64_t ignore_index,
+ int ring_id,
+ int rank,
+ int nranks,
+ MetaTensor* logits_grad,
+ MetaConfig config = MetaConfig());
+
 void DeformableConvGradInferMeta(const MetaTensor& x,
  const MetaTensor& offset,
  const MetaTensor& filter,

diff --git a/paddle/phi/infermeta/binary.cc b/paddle/phi/infermeta/binary.cc
@@ -1009,6 +1009,49 @@ void CrossEntropyWithSoftmaxInferMeta(const MetaTensor& logits,
  loss->share_lod(logits);
 }
 
+void CSoftmaxWithCrossEntropyInferMeta(const MetaTensor& logits,
+ const MetaTensor& label,
+ int64_t ignore_index,
+ int ring_id,
+ int rank,
+ int nranks,
+ MetaTensor* softmax,
+ MetaTensor* loss,
+ MetaConfig config) {
+ auto logits_dims = logits.dims();
+ auto labels_dims = label.dims();
+
+ auto logits_rank = logits_dims.size();
+ auto axis = logits_rank - 1;
+ for (int i = 0; i < logits_rank; i++) {
+ if (i != axis) {
+ if (config.is_runtime || (logits_dims[i] > 0 && labels_dims[i] > 0)) {
+ PADDLE_ENFORCE_EQ(logits_dims[i],
+ labels_dims[i],
+ phi::errors::InvalidArgument(
+ "Input(Logits) and Input(Label) should in "
+ "same shape in dimensions except axis."));
+ }
+ }
+ }
+
+ PADDLE_ENFORCE_EQ(
+ labels_dims[logits_rank - 1],
+ 1UL,
+ phi::errors::InvalidArgument(
+ "the last dimension of Input(Label) should be 1."
+ "But received: the last dimension of Input(Label) is [%d],"
+ "the last dimension is [%d]",
+ labels_dims[logits_rank - 1],
+ logits_rank - 1));
+
+ softmax->set_dims(logits_dims);
+ logits_dims[axis] = 1;
+ loss->set_dims(logits_dims);
+ softmax->share_lod(logits);
+ loss->share_lod(logits);
+}
+
 void DepthwiseConvInferMeta(const MetaTensor& input,
  const MetaTensor& filter,
  const std::vector<int>& strides,

diff --git a/paddle/phi/infermeta/binary.h b/paddle/phi/infermeta/binary.h
@@ -159,6 +159,16 @@ void CrossEntropyWithSoftmaxInferMeta(const MetaTensor& logits,
  MetaTensor* loss,
  MetaConfig config = MetaConfig());
 
+void CSoftmaxWithCrossEntropyInferMeta(const MetaTensor& logits,
+ const MetaTensor& label,
+ int64_t ignore_index,
+ int ring_id,
+ int rank,
+ int nranks,
+ MetaTensor* softmax,
+ MetaTensor* loss,
+ MetaConfig config = MetaConfig());
+
 void DepthwiseConvInferMeta(const MetaTensor& input,
  const MetaTensor& filter,
  const std::vector<int>& strides,

diff --git a/test/auto_parallel/hybrid_strategy/test_semi_auto_parallel_c_cross_entropy.py b/test/auto_parallel/hybrid_strategy/test_semi_auto_parallel_c_cross_entropy.py
@@ -35,6 +35,11 @@ def test_mp(self):
  "semi_auto_parallel_c_cross_entropy_mp.py",
  )
 
+ def test_mp_pir(self):
+ os.environ["FLAGS_enable_pir_in_executor"] = "True"
+ self.test_mp()
+ os.environ["FLAGS_enable_pir_in_executor"] = "False"
+
 
 class TestParallelCrossEntropyHybrid(test_base.CommunicationTestDistBase):
  def setUp(self):