PaddlePaddle
diff --git a/‎paddle/phi/infermeta/spmd_rules/rules.cc‎
Lines changed: 5 additions & 0 deletions b/‎paddle/phi/infermeta/spmd_rules/rules.cc‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎paddle/phi/infermeta/spmd_rules/rules.h‎
Lines changed: 1 addition & 0 deletions b/‎paddle/phi/infermeta/spmd_rules/rules.h‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎paddle/phi/infermeta/spmd_rules/topk.cc‎
Lines changed: 80 additions & 0 deletions b/‎paddle/phi/infermeta/spmd_rules/topk.cc‎
Lines changed: 80 additions & 0 deletions
diff --git a/‎paddle/phi/infermeta/spmd_rules/topk.h‎
Lines changed: 40 additions & 0 deletions b/‎paddle/phi/infermeta/spmd_rules/topk.h‎
Lines changed: 40 additions & 0 deletions
diff --git a/‎paddle/phi/ops/yaml/backward.yaml‎
Lines changed: 1 addition & 0 deletions b/‎paddle/phi/ops/yaml/backward.yaml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎paddle/phi/ops/yaml/ops.yaml‎
Lines changed: 1 addition & 0 deletions b/‎paddle/phi/ops/yaml/ops.yaml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎test/auto_parallel/spmd_rules/CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions b/‎test/auto_parallel/spmd_rules/CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎test/auto_parallel/spmd_rules/test_topk_rule.py‎
Lines changed: 103 additions & 0 deletions b/‎test/auto_parallel/spmd_rules/test_topk_rule.py‎
Lines changed: 103 additions & 0 deletions
@@ -705,6 +705,11 @@ PD_REGISTER_SPMD_RULE(
  PD_INFER_SPMD(phi::distributed::ArgMaxInferSpmdBase),
  PD_INFER_SPMD(phi::distributed::ArgMaxInferSpmdReverseBase));
 
+// argmax
+PD_REGISTER_SPMD_RULE(topk,
+ PD_INFER_SPMD(phi::distributed::TopkInferSpmd),
+ PD_INFER_SPMD(phi::distributed::TopkGradInferSpmd));
+
 // unbind
 PD_REGISTER_SPMD_RULE(unbind,
  PD_INFER_SPMD(phi::distributed::UnbindInferSpmd),
 
@@ -66,6 +66,7 @@ limitations under the License. */
 #include "paddle/phi/infermeta/spmd_rules/squeeze.h"
 #include "paddle/phi/infermeta/spmd_rules/stack.h"
 #include "paddle/phi/infermeta/spmd_rules/tile.h"
+#include "paddle/phi/infermeta/spmd_rules/topk.h"
 #include "paddle/phi/infermeta/spmd_rules/transpose.h"
 #include "paddle/phi/infermeta/spmd_rules/triu.h"
 #include "paddle/phi/infermeta/spmd_rules/unbind.h"
 
@@ -0,0 +1,80 @@
+/* Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/phi/infermeta/spmd_rules/topk.h"
+#include "paddle/phi/infermeta/spmd_rules/spmd_rule_macro_define.h"
+#include "paddle/phi/infermeta/spmd_rules/utils.h"
+
+namespace phi {
+namespace distributed {
+
+SpmdInfo TopkInferSpmd(const DistMetaTensor& x,
+ const Scalar& k,
+ int axis,
+ bool largest,
+ bool sorted) {
+ // Verify input args
+ EXTRACT_SHAPE_AND_DIST_ATTR(x);
+ axis = axis < 0 ? x_ndim + axis : axis;
+
+ // Infer output dims mapping from merged input dims mapping
+ std::vector<int64_t> x_dims_mapping_dst(x_dims_mapping_src);
+ std::vector<int64_t> out_dims_mapping;
+ std::vector<int64_t> indices_dims_mapping;
+ x_dims_mapping_dst[axis] = -1;
+ out_dims_mapping.assign(x_dims_mapping_dst.begin(), x_dims_mapping_dst.end());
+ indices_dims_mapping = out_dims_mapping;
+
+ TensorDistAttr x_dist_attr_dst = CopyTensorDistAttrForOutput(x_dist_attr_src);
+ x_dist_attr_dst.set_dims_mapping(x_dims_mapping_dst);
+ TensorDistAttr out_dist_attr = CopyTensorDistAttrForOutput(x_dist_attr_src);
+ out_dist_attr.set_dims_mapping(out_dims_mapping);
+ TensorDistAttr indices_dist_attr =
+ CopyTensorDistAttrForOutput(x_dist_attr_src);
+ indices_dist_attr.set_dims_mapping(indices_dims_mapping);
+
+ return {{x_dist_attr_dst}, {out_dist_attr, indices_dist_attr}};
+}
+
+SpmdInfo TopkGradInferSpmd(const DistMetaTensor& x,
+ const DistMetaTensor& indices,
+ Tensor out_grad,
+ Scalar k,
+ int axis,
+ bool largest,
+ bool sorted) {
+ EXTRACT_SHAPE_AND_DIST_ATTR(x);
+ EXTRACT_SHAPE_AND_DIST_ATTR(indices);
+ EXTRACT_SHAPE_AND_DIST_ATTR(out_grad);
+
+ TensorDistAttr out_grad_dist_attr_dst =
+ CopyTensorDistAttrForOutput(out_grad_dist_attr_src);
+ out_grad_dist_attr_dst.set_dims_mapping(out_grad_dims_mapping_src);
+
+ TensorDistAttr x_dist_attr_dst = CopyTensorDistAttrForOutput(x_dist_attr_src);
+ x_dist_attr_dst.set_dims_mapping(out_grad_dims_mapping_src);
+
+ TensorDistAttr indices_dist_attr_dst =
+ CopyTensorDistAttrForOutput(indices_dist_attr_src);
+ indices_dist_attr_dst.set_dims_mapping(out_grad_dims_mapping_src);
+
+ TensorDistAttr x_grad_dist_attr_dst =
+ CopyTensorDistAttrForOutput(x_dist_attr_src);
+ x_grad_dist_attr_dst.set_dims_mapping(out_grad_dims_mapping_src);
+ return {{x_dist_attr_dst, indices_dist_attr_dst, out_grad_dist_attr_dst},
+ {x_grad_dist_attr_dst}};
+}
+
+} // namespace distributed
+} // namespace phi
@@ -0,0 +1,40 @@
+/* Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include <vector>
+#include "paddle/phi/common/scalar.h"
+#include "paddle/phi/core/distributed/auto_parallel/dist_meta_tensor.h"
+#include "paddle/phi/core/distributed/type_defs.h"
+
+namespace phi {
+namespace distributed {
+
+SpmdInfo TopkInferSpmd(const DistMetaTensor& x,
+ const Scalar& k,
+ int axis,
+ bool largest,
+ bool sorted);
+
+SpmdInfo TopkGradInferSpmd(const DistMetaTensor& x,
+ const DistMetaTensor& indices,
+ const DistMetaTensor& out_grad,
+ Scalar k,
+ int axis,
+ bool largest,
+ bool sorted);
+
+} // namespace distributed
+} // namespace phi
@@ -3516,6 +3516,7 @@
  infer_meta :
  func : UnchangedInferMeta
  param : [x]
+ spmd_rule: TopkGradInferSpmd
  kernel :
  func : topk_grad
  data_type : out_grad
 
@@ -5128,6 +5128,7 @@
  output : Tensor(out), Tensor(indices)
  infer_meta :
  func : TopKInferMeta
+ spmd_rule: TopkInferSpmd
  kernel :
  func : topk
  data_type : x
 
@@ -35,6 +35,7 @@ if(WITH_DISTRIBUTE)
  py_test_modules(test_gather_rule MODULES test_gather_rule)
  py_test_modules(test_cumsum_rule MODULES test_cumsum_rule)
  py_test_modules(test_argmax_rule MODULES test_argmax_rule)
+ py_test_modules(test_topk_rule MODULES test_topk_rule)
  py_test_modules(test_unbind_rule MODULES test_unbind_rule)
  py_test_modules(test_stack_rule MODULES test_stack_rule)
  py_test_modules(test_gather_nd_rule MODULES test_gather_nd_rule)
 
@@ -0,0 +1,103 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+from collections import OrderedDict
+
+from paddle.distributed.auto_parallel.static.dist_attribute import (
+ DistTensorSpec,
+ TensorDistAttr,
+)
+from paddle.distributed.fleet import auto
+from paddle.framework import core
+
+
+class TestTopkSPMDRule(unittest.TestCase):
+ def setUp(self):
+ x_shape = [16, 16, 16]
+ out_shape = [16, 2, 16]
+ process_mesh = auto.ProcessMesh(mesh=[[0, 1], [2, 3]])
+
+ x_tensor_dist_attr = TensorDistAttr()
+ x_tensor_dist_attr.dims_mapping = [-1, -1, -1]
+ x_tensor_dist_attr.process_mesh = process_mesh
+ self.x_dist_tensor_spec = DistTensorSpec(x_shape, x_tensor_dist_attr)
+ out_tensor_dist_attr = TensorDistAttr()
+ out_tensor_dist_attr.dims_mapping = [-1, -1, -1]
+ out_tensor_dist_attr.process_mesh = process_mesh
+ self.out_dist_tensor_spec = DistTensorSpec(
+ out_shape, x_tensor_dist_attr
+ )
+
+ self.rule = core.get_phi_spmd_rule("topk")
+ self.attrs = OrderedDict()
+ self.attrs['k'] = 2
+ self.attrs['axis'] = 1
+ self.attrs['largest'] = True
+ self.attrs['sorted'] = True
+
+ def test_topk_forward(self):
+ # axis = 1
+ # [0, 1, -1] --> [0, -1, -1], [0, -1, -1]
+ self.attrs['axis'] = 1
+ self.x_dist_tensor_spec.set_dims_mapping([0, 1, -1])
+ result_dist_attrs = self.rule.infer_forward(
+ self.x_dist_tensor_spec,
+ self.attrs['k'],
+ self.attrs['axis'],
+ self.attrs['largest'],
+ self.attrs['sorted'],
+ )
+ inferred_input_dist_attrs = result_dist_attrs[0]
+ inferred_output_dist_attrs = result_dist_attrs[1]
+
+ self.assertEqual(len(result_dist_attrs), 2)
+ self.assertEqual(len(inferred_input_dist_attrs), 1)
+ self.assertEqual(len(inferred_output_dist_attrs), 2)
+
+ self.assertEqual(inferred_input_dist_attrs[0].dims_mapping, [0, -1, -1])
+ self.assertEqual(inferred_input_dist_attrs[0].dims_mapping, [0, -1, -1])
+ self.assertEqual(
+ inferred_output_dist_attrs[0].dims_mapping, [0, -1, -1]
+ )
+
+ def test_topk_backward(self):
+ # axis = 1
+ # [0, -1, 1] --> [0, -1, 1], [0, -1, 1], [0, -1, 1]
+ self.attrs['axis'] = 1
+ self.out_dist_tensor_spec.shape = [16, 2, 16]
+ self.out_dist_tensor_spec.set_dims_mapping([0, -1, 1])
+ result_dist_attrs = self.rule.infer_backward(
+ self.x_dist_tensor_spec,
+ self.out_dist_tensor_spec,
+ self.out_dist_tensor_spec,
+ self.attrs['k'],
+ self.attrs['axis'],
+ self.attrs['largest'],
+ self.attrs['sorted'],
+ )
+ inferred_input_dist_attrs = result_dist_attrs[0]
+ inferred_output_dist_attrs = result_dist_attrs[1]
+ self.assertEqual(len(result_dist_attrs), 2)
+ self.assertEqual(len(inferred_input_dist_attrs), 3)
+ self.assertEqual(len(inferred_output_dist_attrs), 1)
+
+ self.assertEqual(inferred_input_dist_attrs[0].dims_mapping, [0, -1, 1])
+ self.assertEqual(inferred_input_dist_attrs[0].dims_mapping, [0, -1, 1])
+ self.assertEqual(inferred_input_dist_attrs[0].dims_mapping, [0, -1, 1])
+ self.assertEqual(inferred_output_dist_attrs[0].dims_mapping, [0, -1, 1])
+
+
+if __name__ == "__main__":
+ unittest.main()