codeplaysoftware
diff --git a/‎test/distributed/_tensor/test_dtensor.py‎
Lines changed: 36 additions & 4 deletions b/‎test/distributed/_tensor/test_dtensor.py‎
Lines changed: 36 additions & 4 deletions
diff --git a/‎torch/_weights_only_unpickler.py‎
Lines changed: 14 additions & 13 deletions b/‎torch/_weights_only_unpickler.py‎
Lines changed: 14 additions & 13 deletions
diff --git a/‎torch/distributed/tensor/__init__.py‎
Lines changed: 16 additions & 0 deletions b/‎torch/distributed/tensor/__init__.py‎
Lines changed: 16 additions & 0 deletions
@@ -2,6 +2,9 @@
 # Owner(s): ["oncall: distributed"]
 
 import os
+import pathlib
+import tempfile
+import unittest
 
 from numpy.testing import assert_array_equal
 
@@ -28,7 +31,7 @@
  parallelize_module,
  RowwiseParallel,
 )
-from torch.testing._internal.common_utils import run_tests
+from torch.testing._internal.common_utils import IS_FBCODE, run_tests
 from torch.testing._internal.distributed._tensor.common_dtensor import (
  DTensorTestBase,
  with_comms,
@@ -542,6 +545,33 @@ def test_dtensor_save_load(self):
  reloaded_st = torch.load(buffer, weights_only=True)
  self.assertEqual(sharded_tensor, reloaded_st)
 
+ @with_comms
+ @unittest.skipIf(
+ IS_FBCODE,
+ "subprocess import torch fails with ModuleNotFoundError: No module named 'torch' in fbcode",
+ )
+ def test_dtensor_save_load_import(self):
+ for should_import in [True, False]:
+ device_mesh = self.build_device_mesh()
+ placements = [Shard(0)]
+ local_tensor = torch.randn(3, 3)
+ sharded_tensor = DTensor.from_local(local_tensor, device_mesh, placements)
+ with tempfile.NamedTemporaryFile() as f:
+ torch.save(sharded_tensor, f)
+ import_string = (
+ "import torch.distributed.tensor;" if should_import else ""
+ )
+ filename = pathlib.Path(f.name)
+ err_msg = (
+ (
+ "_pickle.UnpicklingError: Weights only load failed. "
+ "``torch.distributed.tensor`` must be imported to load DTensors"
+ )
+ if not should_import
+ else None
+ )
+ self._attempt_load_from_subprocess(filename, import_string, err_msg)
+
 
 class DTensorMeshTest(DTensorTestBase):
  @property
@@ -943,9 +973,11 @@ def test_split_tensor_1D(self) -> None:
  from torch.distributed.tensor._collective_utils import unpad_tensor
 
  unpadded_list = [
- unpad_tensor(tensor, shard_placement.dim, pad_sizes[i])
- if pad_sizes[i] > 0
- else tensor
+ (
+ unpad_tensor(tensor, shard_placement.dim, pad_sizes[i])
+ if pad_sizes[i] > 0
+ else tensor
+ )
  for i, tensor in enumerate(splitted_tensor_list)
  ]
  expected_is_tensor_empty = [
 
@@ -169,19 +169,6 @@ def _get_allowed_globals():
  "builtins.bytearray": bytearray, # for bytearray
  "builtins.set": set, # for set
  }
- # Only add the dtensor related classes if the dtensor module is available
- if hasattr(torch.distributed, "tensor"):
- dtensor_rc: Dict[str, Any] = {
- # DTensor related
- "torch.distributed.device_mesh.DeviceMesh": torch.distributed.device_mesh.DeviceMesh,
- "torch.distributed.tensor._dtensor_spec.DTensorSpec": torch.distributed.tensor._dtensor_spec.DTensorSpec,
- "torch.distributed.tensor._dtensor_spec.TensorMeta": torch.distributed.tensor._dtensor_spec.TensorMeta,
- "torch.distributed.tensor.DTensor": torch.distributed.tensor.DTensor,
- "torch.distributed.tensor.placement_types.Partial": torch.distributed.tensor.placement_types.Partial,
- "torch.distributed.tensor.placement_types.Replicate": torch.distributed.tensor.placement_types.Replicate,
- "torch.distributed.tensor.placement_types.Shard": torch.distributed.tensor.placement_types.Shard,
- }
- rc.update(dtensor_rc)
 
  # dtype
  for t in torch.storage._dtype_to_storage_type_map().keys():
@@ -341,6 +328,20 @@ def load(self):
  raise UnpicklingError(
  "``torch.nested`` and ``torch._dynamo`` must be imported to load nested jagged tensors (NJTs)"
  )
+ elif full_path in (
+ [
+ "torch.distributed.device_mesh.DeviceMesh",
+ "torch.distributed.tensor._dtensor_spec.DTensorSpec",
+ "torch.distributed.tensor._dtensor_spec.TensorMeta",
+ "torch.distributed.tensor.DTensor",
+ "torch.distributed.tensor.placement_types.Partial",
+ "torch.distributed.tensor.placement_types.Replicate",
+ "torch.distributed.tensor.placement_types.Shard",
+ ]
+ ):
+ raise UnpicklingError(
+ "``torch.distributed.tensor`` must be imported to load DTensors"
+ )
  else:
  raise UnpicklingError(
  f"Unsupported global: GLOBAL {full_path} was not an allowed global by default. "
 
@@ -45,6 +45,22 @@
  "zeros",
 ]
 
+# For weights_only torch.load
+from ._dtensor_spec import DTensorSpec as _DTensorSpec, TensorMeta as _TensorMeta
+
+
+torch.serialization.add_safe_globals(
+ [
+ DeviceMesh,
+ _DTensorSpec,
+ _TensorMeta,
+ DTensor,
+ Partial,
+ Replicate,
+ Shard,
+ ]
+)
+
 
 # Append DTensor to the list of supported types for foreach implementation for optimizer
 # and clip_grad_norm_ so that we will try to use foreach over the for-loop implementation on CUDA.