pytorch
diff --git a/‎test/spmd/test_xla_sharding.py‎
Lines changed: 12 additions & 0 deletions b/‎test/spmd/test_xla_sharding.py‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎torch_xla/experimental/xla_sharding.py‎
Lines changed: 30 additions & 3 deletions b/‎torch_xla/experimental/xla_sharding.py‎
Lines changed: 30 additions & 3 deletions
@@ -718,6 +718,18 @@ def test_sharded_tensor_to_cpu_int_type(self):
  partition_spec)
  self.assertTrue(torch.allclose(t1, xst1.cpu()))
 
+ def test_named_partition_spec(self):
+ xt1 = torch.arange(64).reshape(8, 8).to(xm.xla_device())
+ mesh = xs.Mesh(
+ list(range(self.n_devices)), (1, self.n_devices), ('data', 'model'))
+ partition_spec = ('model', 'data')
+ xs.mark_sharding(xt1, mesh, partition_spec)
+ sharding_spec = torch_xla._XLAC._get_xla_sharding_spec(xt1)
+ if self.n_devices > 1:
+ self.assertTrue(f"devices=[{self.n_devices},1]" in sharding_spec)
+ else:
+ self.assertTrue("replicated" in sharding_spec)
+
 
 if __name__ == '__main__':
  test = unittest.main()
 
@@ -53,6 +53,7 @@ def __init__(self,
  if not isinstance(device_ids, np.ndarray):
  device_ids = np.array(device_ids)
  assert (axis_names is None) or (len(mesh_shape) == len(axis_names))
+ assert axis_names is None or (len(set(axis_names)) == len(axis_names))
  assert (len(device_ids) == np.prod(mesh_shape))
  assert len(device_ids) == len(np.unique(device_ids))
  self.device_ids = device_ids
@@ -64,12 +65,20 @@ def size(self):
  return np.prod(self.mesh_shape)
 
  def shape(self):
+ if self.axis_names is None:
+ return OrderedDict(
+ (dim, size) for dim, size in enumerate(self.mesh_shape))
  return OrderedDict(
  (name, size) for name, size in zip(self.axis_names, self.mesh_shape))
 
  def get_logical_mesh(self):
  return self.device_ids.reshape(self.mesh_shape)
 
+ def get_axis_name_idx(self, name: str) -> int:
+ if name not in self.axis_names:
+ return None
+ return self.axis_names.index(name)
+
 
 # HybridDevice class has been inspired from jax's mesh_utils: https://github.com/google/jax/blob/fc5960f2b8b7a0ef74dbae4e27c5c08ff1564cff/jax/experimental/mesh_utils.py#L4
 
@@ -359,9 +368,26 @@ def _get_group_assignment(
  return group_assignment, replication_groups
 
 
+def _translate_named_partition_spec(mesh: Mesh, partition_spec: Tuple):
+ _partition_spec = list()
+ for p in partition_spec:
+ if (p is None) or (type(p) is int):
+ _partition_spec.append(p)
+ elif type(p) is str:
+ idx = mesh.get_axis_name_idx(p)
+ if idx is None:
+ raise ValueError(f"Axis name {p} is not defined in the given mesh")
+ _partition_spec.append(idx)
+ else:
+ raise ValueError(
+ f"Spec type {type(p)} is not supported in partition spec")
+ return _partition_spec
+
+
 @xr.requires_pjrt
-def mark_sharding(t: Union[torch.Tensor, XLAShardedTensor], mesh: Mesh,
- partition_spec: Tuple[Union[int, None]]) -> XLAShardedTensor:
+def mark_sharding(
+ t: Union[torch.Tensor, XLAShardedTensor], mesh: Mesh,
+ partition_spec: Tuple[Union[int, str, None]]) -> XLAShardedTensor:
  """
  Annotates the tensor provided with XLA partition spec. Internally,
  it annotates the corresponding XLATensor as sharded for the XLA SpmdPartitioner pass.
@@ -370,7 +396,7 @@ def mark_sharding(t: Union[torch.Tensor, XLAShardedTensor], mesh: Mesh,
 
  mesh (Mesh): describes the logical XLA device topology and the underlying device IDs.
 
- partition_spec (Tuple[int, None]): A tuple of device_mesh dimension index or `None`.
+ partition_spec (Tuple[int, str, None]): A tuple of device_mesh dimension index or `None`. Each index is an int or str if the mesh axis is named.
  This specifies how each input rank is sharded (index to mesh_shape) or replicated (None).
  For example, we can shard an 8x10 tensor 4-way row-wise, and replicate column-wise.
  >> input = torch.randn(8, 10)
@@ -396,6 +422,7 @@ def mark_sharding(t: Union[torch.Tensor, XLAShardedTensor], mesh: Mesh,
  assert num_devices > 0, "This requires XLA supported device(s)."
  assert mesh.size() == num_devices, \
  f"{mesh.mesh_shape} is not mappable over {num_devices} devices."
+ partition_spec = _translate_named_partition_spec(mesh, partition_spec)
  assert all((d >= 0 and d < len(mesh.mesh_shape)) for d in partition_spec if d), \
  f"partition_spec ({partition_spec}) contains out of bound index into mesh_shape."
  # We only allow fully specified `partition_spec` to be applicable, as opposed