PaddlePaddle
diff --git a/‎paddle/phi/infermeta/spmd_rules/rules.cc‎
Lines changed: 5 additions & 0 deletions b/‎paddle/phi/infermeta/spmd_rules/rules.cc‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎paddle/phi/infermeta/spmd_rules/rules.h‎
Lines changed: 1 addition & 0 deletions b/‎paddle/phi/infermeta/spmd_rules/rules.h‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎paddle/phi/infermeta/spmd_rules/topk.cc‎
Lines changed: 164 additions & 0 deletions b/‎paddle/phi/infermeta/spmd_rules/topk.cc‎
Lines changed: 164 additions & 0 deletions
diff --git a/‎paddle/phi/infermeta/spmd_rules/topk.h‎
Lines changed: 50 additions & 0 deletions b/‎paddle/phi/infermeta/spmd_rules/topk.h‎
Lines changed: 50 additions & 0 deletions
diff --git a/‎paddle/phi/ops/yaml/backward.yaml‎
Lines changed: 1 addition & 0 deletions b/‎paddle/phi/ops/yaml/backward.yaml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎paddle/phi/ops/yaml/ops.yaml‎
Lines changed: 1 addition & 0 deletions b/‎paddle/phi/ops/yaml/ops.yaml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎test/auto_parallel/spmd_rules/CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions b/‎test/auto_parallel/spmd_rules/CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎test/auto_parallel/spmd_rules/test_topk_rule.py‎
Lines changed: 105 additions & 0 deletions b/‎test/auto_parallel/spmd_rules/test_topk_rule.py‎
Lines changed: 105 additions & 0 deletions
@@ -719,6 +719,11 @@ PD_REGISTER_SPMD_RULE(
  PD_INFER_SPMD(phi::distributed::ArgMaxInferSpmdBase),
  PD_INFER_SPMD(phi::distributed::ArgMaxInferSpmdReverseBase));
 
+// topk
+PD_REGISTER_SPMD_RULE(topk,
+ PD_INFER_SPMD(phi::distributed::TopkInferSpmd),
+ PD_INFER_SPMD(phi::distributed::TopkGradInferSpmd));
+
 // unbind
 PD_REGISTER_SPMD_RULE(unbind,
  PD_INFER_SPMD(phi::distributed::UnbindInferSpmd),
 
@@ -68,6 +68,7 @@ limitations under the License. */
 #include "paddle/phi/infermeta/spmd_rules/squeeze.h"
 #include "paddle/phi/infermeta/spmd_rules/stack.h"
 #include "paddle/phi/infermeta/spmd_rules/tile.h"
+#include "paddle/phi/infermeta/spmd_rules/topk.h"
 #include "paddle/phi/infermeta/spmd_rules/transpose.h"
 #include "paddle/phi/infermeta/spmd_rules/triu.h"
 #include "paddle/phi/infermeta/spmd_rules/unbind.h"
 
@@ -0,0 +1,164 @@
+/* Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/phi/infermeta/spmd_rules/topk.h"
+#include "glog/logging.h"
+#include "paddle/phi/infermeta/spmd_rules/spmd_rule_macro_define.h"
+#include "paddle/phi/infermeta/spmd_rules/utils.h"
+
+namespace phi {
+namespace distributed {
+
+SpmdInfo TopkInferSpmd(
+ const DistMetaTensor& x, int k, int axis, bool largest, bool sorted) {
+ // Verify input args
+ EXTRACT_SHAPE_AND_DIST_ATTR(x);
+ axis = axis < 0 ? x_ndim + axis : axis;
+ PADDLE_ENFORCE_EQ(
+ 0 <= axis && axis < x_ndim,
+ true,
+ phi::errors::InvalidArgument(
+ "The axis of topk should be in range [0, %d), but got %d.",
+ x_ndim,
+ axis));
+
+ // Create destination dist attrs
+ TensorDistAttr x_dist_attr_dst = CopyTensorDistAttrForOutput(x_dist_attr_src);
+ TensorDistAttr out_dist_attr_dst =
+ CopyTensorDistAttrForOutput(x_dist_attr_src);
+ TensorDistAttr indices_dist_attr_dst =
+ CopyTensorDistAttrForOutput(x_dist_attr_src);
+
+ // Infer dims_mapping
+ std::vector<int64_t> x_dims_mapping_dst = x_dims_mapping_src;
+ x_dims_mapping_dst[axis] = -1;
+ std::vector<int64_t> out_dims_mapping_dst = x_dims_mapping_dst;
+ std::vector<int64_t> indices_dims_mapping_dst = x_dims_mapping_dst;
+
+ // Set the dims mapping for outputs
+ out_dist_attr_dst.set_dims_mapping(out_dims_mapping_dst);
+ indices_dist_attr_dst.set_dims_mapping(indices_dims_mapping_dst);
+
+ // Update the dims mapping for inputs
+ x_dist_attr_dst.set_dims_mapping(x_dims_mapping_dst);
+ VLOG(4) << "TopkInferSpmd: Done.";
+ LOG_SPMD_INPUT(x);
+ LOG_SPMD_OUTPUT(out_dist_attr_dst);
+ LOG_SPMD_OUTPUT(indices_dist_attr_dst);
+
+ return {{x_dist_attr_dst}, {out_dist_attr_dst, indices_dist_attr_dst}};
+}
+
+SpmdInfo TopkGradInferSpmd(const DistMetaTensor& x,
+ const DistMetaTensor& indices,
+ const DistMetaTensor& out_grad,
+ int k,
+ int axis,
+ bool largest,
+ bool sorted) {
+ // Verify input args
+ EXTRACT_SHAPE_AND_DIST_ATTR(x);
+ EXTRACT_SHAPE_AND_DIST_ATTR(indices);
+ EXTRACT_SHAPE_AND_DIST_ATTR(out_grad);
+ PADDLE_ENFORCE_EQ(indices_ndim,
+ out_grad_ndim,
+ common::errors::InvalidArgument(
+ "TopKGrad: The rank of Indices [%d] and OutGrad [%d] "
+ "must be the same.",
+ indices_ndim,
+ out_grad_ndim));
+ PADDLE_ENFORCE_EQ(x_ndim,
+ indices_ndim,
+ common::errors::InvalidArgument(
+ "TopKGrad: The rank of Input [%d] and Indices [%d] "
+ "must be the same.",
+ x_ndim,
+ indices_ndim));
+ axis = axis < 0 ? x_ndim + axis : axis;
+ PADDLE_ENFORCE_EQ(
+ 0 <= axis && axis < x_ndim,
+ true,
+ phi::errors::InvalidArgument(
+ "The axis of topk_grad should be in range [0, %d), but got %d.",
+ x_ndim,
+ axis));
+ // Build einsum notation
+ std::string alphabet = "abcdefghijlopqrstuvwxyz";
+ std::string x_axes = alphabet.substr(0, x_ndim - 1);
+ std::string indices_axes = x_axes;
+ std::string out_grad_axes = x_axes;
+
+ // Merge sharding
+ std::pair<std::string, std::vector<int64_t>> indices_pair(
+ indices_axes, indices_dims_mapping_src);
+ std::pair<std::string, std::vector<int64_t>> out_grad_pair(
+ out_grad_axes, out_grad_dims_mapping_src);
+ std::pair<std::string, std::vector<int64_t>> x_pair(x_axes,
+ x_dims_mapping_src);
+ auto axis_to_dim_map =
+ ShardingMergeForTensors({x_pair, indices_pair, out_grad_pair});
+
+ // Infer dims mapping
+ std::vector<int64_t> x_grad_dims_mapping_dst =
+ GetDimsMappingForAxes(x_axes, axis_to_dim_map);
+ x_grad_dims_mapping_dst.insert(x_grad_dims_mapping_dst.begin() + axis, -1);
+ std::vector<int64_t> x_dims_mapping_dst = x_grad_dims_mapping_dst;
+ std::vector<int64_t> indices_dims_mapping_dst = x_grad_dims_mapping_dst;
+ std::vector<int64_t> out_grad_dims_mapping_dst = x_grad_dims_mapping_dst;
+
+ // Set the dims mapping
+ TensorDistAttr x_grad_dist_attr_dst =
+ CopyTensorDistAttrForOutput(out_grad_dist_attr_src);
+ TensorDistAttr x_dist_attr_dst =
+ CopyTensorDistAttrForOutput(out_grad_dist_attr_src);
+ TensorDistAttr indices_dist_attr_dst =
+ CopyTensorDistAttrForOutput(out_grad_dist_attr_src);
+ TensorDistAttr out_grad_dist_attr_dst =
+ CopyTensorDistAttrForOutput(out_grad_dist_attr_src);
+
+ x_grad_dist_attr_dst.set_dims_mapping(x_grad_dims_mapping_dst);
+ x_dist_attr_dst.set_dims_mapping(x_dims_mapping_dst);
+ indices_dist_attr_dst.set_dims_mapping(indices_dims_mapping_dst);
+ out_grad_dist_attr_dst.set_dims_mapping(out_grad_dims_mapping_dst);
+
+ VLOG(4) << "TopkGradInferSpmd: Done.";
+ LOG_SPMD_INPUT(x);
+ LOG_SPMD_INPUT(indices);
+ LOG_SPMD_INPUT(out_grad);
+ LOG_SPMD_OUTPUT(x_grad_dist_attr_dst);
+
+ return {{x_dist_attr_dst, indices_dist_attr_dst, out_grad_dist_attr_dst},
+ {x_grad_dist_attr_dst}};
+}
+SpmdInfo TopkInferSpmdDynamic(const DistMetaTensor& x,
+ const Scalar& k,
+ int axis,
+ bool largest,
+ bool sorted) {
+ return TopkInferSpmd(x, k.to<int>(), axis, largest, sorted);
+}
+
+SpmdInfo TopkGradInferSpmdDynamic(const DistMetaTensor& x,
+ const DistMetaTensor& indices,
+ const DistMetaTensor& out_grad,
+ const Scalar& k,
+ int axis,
+ bool largest,
+ bool sorted) {
+ return TopkGradInferSpmd(
+ x, indices, out_grad, k.to<int>(), axis, largest, sorted);
+}
+
+} // namespace distributed
+} // namespace phi
@@ -0,0 +1,50 @@
+/* Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include "paddle/phi/common/scalar.h"
+#include "paddle/phi/core/distributed/auto_parallel/dist_meta_tensor.h"
+#include "paddle/phi/core/distributed/type_defs.h"
+
+namespace phi {
+namespace distributed {
+
+SpmdInfo TopkInferSpmd(
+ const DistMetaTensor& x, int k, int axis, bool largest, bool sorted);
+
+SpmdInfo TopkGradInferSpmd(const DistMetaTensor& x,
+ const DistMetaTensor& indices,
+ const DistMetaTensor& out_grad,
+ int k,
+ int axis,
+ bool largest,
+ bool sorted);
+
+SpmdInfo TopkInferSpmdDynamic(const DistMetaTensor& x,
+ const Scalar& k,
+ int axis,
+ bool largest,
+ bool sorted);
+
+SpmdInfo TopkGradInferSpmdDynamic(const DistMetaTensor& x,
+ const DistMetaTensor& indices,
+ const DistMetaTensor& out_grad,
+ const Scalar& k,
+ int axis,
+ bool largest,
+ bool sorted);
+
+} // namespace distributed
+} // namespace phi
@@ -3519,6 +3519,7 @@
  infer_meta :
  func : UnchangedInferMeta
  param : [x]
+ spmd_rule: TopkGradInferSpmdDynamic
  kernel :
  func : topk_grad
  data_type : out_grad
 
@@ -5144,6 +5144,7 @@
  output : Tensor(out), Tensor(indices)
  infer_meta :
  func : TopKInferMeta
+ spmd_rule: TopkInferSpmdDynamic
  kernel :
  func : topk
  data_type : x
 
@@ -44,6 +44,7 @@ if(WITH_DISTRIBUTE)
  py_test_modules(test_logsumexp_rule MODULES test_logsumexp_rule)
  py_test_modules(test_nonzero_rule MODULES test_nonzero_rule)
  if(NOT WITH_ROCM)
+ py_test_modules(test_topk_rule MODULES test_topk_rule)
  py_test_modules(test_add_n_rule MODULES test_add_n_rule)
  py_test_modules(test_mean_all_rule MODULES test_mean_all_rule)
  py_test_modules(test_argmin_rule MODULES test_argmin_rule)
 
@@ -0,0 +1,105 @@
+# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+from collections import OrderedDict
+
+from paddle.distributed.auto_parallel.static.dist_attribute import (
+ DistTensorSpec,
+ TensorDistAttr,
+)
+from paddle.distributed.fleet import auto
+from paddle.framework import core
+
+
+class TestTopkSPMDRule(unittest.TestCase):
+ def setUp(self):
+ x_shape = [16, 16, 16]
+ out_shape = [16, 2, 16]
+ process_mesh = auto.ProcessMesh(mesh=[[0, 1], [2, 3]])
+
+ x_tensor_dist_attr = TensorDistAttr()
+ x_tensor_dist_attr.dims_mapping = [-1, -1, -1]
+ x_tensor_dist_attr.process_mesh = process_mesh
+ self.x_dist_tensor_spec = DistTensorSpec(x_shape, x_tensor_dist_attr)
+ out_tensor_dist_attr = TensorDistAttr()
+ out_tensor_dist_attr.dims_mapping = [-1, -1, -1]
+ out_tensor_dist_attr.process_mesh = process_mesh
+ self.out_dist_tensor_spec = DistTensorSpec(
+ out_shape, x_tensor_dist_attr
+ )
+
+ self.rule = core.get_phi_spmd_rule("topk")
+ self.attrs = OrderedDict()
+ self.attrs['k'] = 2
+ self.attrs['axis'] = 1
+ self.attrs['largest'] = True
+ self.attrs['sorted'] = True
+
+ def test_topk_forward(self):
+ # axis = 1
+ # [0, 1, -1] --> [0, -1, -1], [0, -1, -1]
+ self.attrs['axis'] = 1
+ self.x_dist_tensor_spec.set_dims_mapping([0, 1, -1])
+ result_dist_attrs = self.rule.infer_forward(
+ self.x_dist_tensor_spec,
+ self.attrs['k'],
+ self.attrs['axis'],
+ self.attrs['largest'],
+ self.attrs['sorted'],
+ )
+
+ self.assertEqual(len(result_dist_attrs), 2)
+ inferred_input_dist_attrs = result_dist_attrs[0]
+ inferred_output_dist_attrs = result_dist_attrs[1]
+
+ self.assertEqual(len(inferred_input_dist_attrs), 1)
+ self.assertEqual(len(inferred_output_dist_attrs), 2)
+
+ self.assertEqual(inferred_input_dist_attrs[0].dims_mapping, [0, -1, -1])
+ self.assertEqual(inferred_input_dist_attrs[0].dims_mapping, [0, -1, -1])
+ self.assertEqual(
+ inferred_output_dist_attrs[0].dims_mapping, [0, -1, -1]
+ )
+
+ def test_topk_backward(self):
+ # axis = 1
+ # [0, -1, 1], [0, -1, 1], [-1, 1, -1] --> [0, -1, 1], [0, -1, 1], [0, -1, 1], [0, -1, 1]
+ self.attrs['axis'] = 1
+ self.x_dist_tensor_spec.set_dims_mapping([0, -1, 1])
+ self.out_dist_tensor_spec.shape = [16, 2, 16]
+ self.out_dist_tensor_spec.set_dims_mapping([-1, 1, -1])
+ result_dist_attrs = self.rule.infer_backward(
+ self.x_dist_tensor_spec,
+ self.out_dist_tensor_spec,
+ self.out_dist_tensor_spec,
+ self.attrs['k'],
+ self.attrs['axis'],
+ self.attrs['largest'],
+ self.attrs['sorted'],
+ )
+
+ self.assertEqual(len(result_dist_attrs), 2)
+ inferred_input_dist_attrs = result_dist_attrs[0]
+ inferred_output_dist_attrs = result_dist_attrs[1]
+ self.assertEqual(len(inferred_input_dist_attrs), 3)
+ self.assertEqual(len(inferred_output_dist_attrs), 1)
+ self.assertEqual(inferred_input_dist_attrs[0].dims_mapping, [0, -1, 1])
+ self.assertEqual(inferred_input_dist_attrs[0].dims_mapping, [0, -1, 1])
+ self.assertEqual(inferred_input_dist_attrs[0].dims_mapping, [0, -1, 1])
+ self.assertEqual(inferred_output_dist_attrs[0].dims_mapping, [0, -1, 1])
+
+
+if __name__ == "__main__":
+ unittest.main()