Lightning-AI
diff --git a/‎pyproject.toml‎
Lines changed: 1 addition & 0 deletions b/‎pyproject.toml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/lightning/fabric/CHANGELOG.md‎
Lines changed: 1 addition & 3 deletions b/‎src/lightning/fabric/CHANGELOG.md‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎src/lightning/fabric/plugins/io/checkpoint_io.py‎
Lines changed: 8 additions & 1 deletion b/‎src/lightning/fabric/plugins/io/checkpoint_io.py‎
Lines changed: 8 additions & 1 deletion
diff --git a/‎src/lightning/fabric/plugins/io/torch_io.py‎
Lines changed: 10 additions & 2 deletions b/‎src/lightning/fabric/plugins/io/torch_io.py‎
Lines changed: 10 additions & 2 deletions
diff --git a/‎src/lightning/fabric/strategies/deepspeed.py‎
Lines changed: 2 additions & 1 deletion b/‎src/lightning/fabric/strategies/deepspeed.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎src/lightning/fabric/strategies/fsdp.py‎
Lines changed: 2 additions & 1 deletion b/‎src/lightning/fabric/strategies/fsdp.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎src/lightning/fabric/strategies/model_parallel.py‎
Lines changed: 5 additions & 3 deletions b/‎src/lightning/fabric/strategies/model_parallel.py‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎src/lightning/fabric/strategies/strategy.py‎
Lines changed: 2 additions & 1 deletion b/‎src/lightning/fabric/strategies/strategy.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎src/lightning/fabric/strategies/xla_fsdp.py‎
Lines changed: 2 additions & 1 deletion b/‎src/lightning/fabric/strategies/xla_fsdp.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎src/lightning/fabric/utilities/cloud_io.py‎
Lines changed: 15 additions & 3 deletions b/‎src/lightning/fabric/utilities/cloud_io.py‎
Lines changed: 15 additions & 3 deletions
@@ -172,6 +172,7 @@ filterwarnings = [
  # "error::DeprecationWarning",
  "error::FutureWarning",
  "ignore::FutureWarning:onnxscript", # Temporary ignore until onnxscript is updated
+ "ignore:You are using `torch.load` with `weights_only=False`.*:FutureWarning",
 ]
 xfail_strict = true
 junit_duration_report = "call"
@@ -19,9 +19,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 ### Changed
 
--
-
-
+- Expose `weights_only` argument for `Trainer.{fit,validate,test,predict}` and let `torch` handle default value ([#21072](https://github.com/Lightning-AI/pytorch-lightning/pull/21072))
 - Set `_DeviceDtypeModuleMixin._device` from torch's default device function ([#21164](https://github.com/Lightning-AI/pytorch-lightning/pull/21164))
 
 
 
@@ -47,13 +47,20 @@ def save_checkpoint(self, checkpoint: dict[str, Any], path: _PATH, storage_optio
  """
 
  @abstractmethod
- def load_checkpoint(self, path: _PATH, map_location: Optional[Any] = None) -> dict[str, Any]:
+ def load_checkpoint(
+ self, path: _PATH, map_location: Optional[Any] = None, weights_only: Optional[bool] = None
+ ) -> dict[str, Any]:
  """Load checkpoint from a path when resuming or loading ckpt for test/validate/predict stages.
 
  Args:
  path: Path to checkpoint
  map_location: a function, :class:`torch.device`, string or a dict specifying how to remap storage
  locations.
+ weights_only: Defaults to ``None``. If ``True``, restricts loading to ``state_dicts`` of plain
+ ``torch.Tensor`` and other primitive types. If loading a checkpoint from a trusted source that contains
+ an ``nn.Module``, use ``weights_only=False``. If loading checkpoint from an untrusted source, we
+ recommend using ``weights_only=True``. For more information, please refer to the
+ `PyTorch Developer Notes on Serialization Semantics <https://docs.pytorch.org/docs/main/notes/serialization.html#id3>`_.
 
  Returns: The loaded checkpoint.
 
 
@@ -59,14 +59,22 @@ def save_checkpoint(self, checkpoint: dict[str, Any], path: _PATH, storage_optio
 
  @override
  def load_checkpoint(
- self, path: _PATH, map_location: Optional[Callable] = lambda storage, loc: storage
+ self,
+ path: _PATH,
+ map_location: Optional[Callable] = lambda storage, loc: storage,
+ weights_only: Optional[bool] = None,
  ) -> dict[str, Any]:
  """Loads checkpoint using :func:`torch.load`, with additional handling for ``fsspec`` remote loading of files.
 
  Args:
  path: Path to checkpoint
  map_location: a function, :class:`torch.device`, string or a dict specifying how to remap storage
  locations.
+ weights_only: Defaults to ``None``. If ``True``, restricts loading to ``state_dicts`` of plain
+ ``torch.Tensor`` and other primitive types. If loading a checkpoint from a trusted source that contains
+ an ``nn.Module``, use ``weights_only=False``. If loading checkpoint from an untrusted source, we
+ recommend using ``weights_only=True``. For more information, please refer to the
+ `PyTorch Developer Notes on Serialization Semantics <https://docs.pytorch.org/docs/main/notes/serialization.html#id3>`_.
 
  Returns: The loaded checkpoint.
 
@@ -80,7 +88,7 @@ def load_checkpoint(
  if not fs.exists(path):
  raise FileNotFoundError(f"Checkpoint file not found: {path}")
 
- return pl_load(path, map_location=map_location)
+ return pl_load(path, map_location=map_location, weights_only=weights_only)
 
  @override
  def remove_checkpoint(self, path: _PATH) -> None:
 
@@ -473,6 +473,7 @@ def load_checkpoint(
  path: _PATH,
  state: Optional[Union[Module, Optimizer, dict[str, Union[Module, Optimizer, Any]]]] = None,
  strict: bool = True,
+ weights_only: Optional[bool] = None,
  ) -> dict[str, Any]:
  """Load the contents from a checkpoint and restore the state of the given objects.
 
@@ -498,7 +499,7 @@ def load_checkpoint(
  # This code path to enables loading a checkpoint from a non-deepspeed checkpoint or from
  # a consolidated checkpoint
  path = self.broadcast(path)
- return super().load_checkpoint(path=path, state=state, strict=strict)
+ return super().load_checkpoint(path=path, state=state, strict=strict, weights_only=weights_only)
 
  if not state:
  raise ValueError(
 
@@ -516,6 +516,7 @@ def load_checkpoint(
  path: _PATH,
  state: Optional[Union[Module, Optimizer, dict[str, Union[Module, Optimizer, Any]]]] = None,
  strict: bool = True,
+ weights_only: Optional[bool] = None,
  ) -> dict[str, Any]:
  """Load the contents from a checkpoint and restore the state of the given objects."""
  if not state:
@@ -586,7 +587,7 @@ def load_checkpoint(
  optim.load_state_dict(flattened_osd)
 
  # Load metadata (anything not a module or optimizer)
- metadata = torch.load(path / _METADATA_FILENAME)
+ metadata = torch.load(path / _METADATA_FILENAME, weights_only=weights_only)
  requested_metadata_keys = state.keys() - modules.keys() - optimizers.keys()
  _validate_keys_for_strict_loading(requested_metadata_keys, metadata.keys(), strict=strict)
  for key in requested_metadata_keys:
 
@@ -275,6 +275,7 @@ def load_checkpoint(
  path: _PATH,
  state: Optional[Union[Module, Optimizer, dict[str, Union[Module, Optimizer, Any]]]] = None,
  strict: bool = True,
+ weights_only: Optional[bool] = None,
  ) -> dict[str, Any]:
  """Load the contents from a checkpoint and restore the state of the given objects."""
  if not state:
@@ -295,7 +296,7 @@ def load_checkpoint(
  f"Loading a single optimizer object from a checkpoint is not supported yet with {type(self).__name__}."
  )
 
- return _load_checkpoint(path=path, state=state, strict=strict)
+ return _load_checkpoint(path=path, state=state, strict=strict, weights_only=weights_only)
 
  def _setup_distributed(self) -> None:
  reset_seed()
@@ -411,6 +412,7 @@ def _load_checkpoint(
  state: dict[str, Union[Module, Optimizer, Any]],
  strict: bool = True,
  optimizer_states_from_list: bool = False,
+ weights_only: Optional[bool] = None,
 ) -> dict[str, Any]:
  from torch.distributed.checkpoint.state_dict import (
  StateDictOptions,
@@ -449,7 +451,7 @@ def _load_checkpoint(
  set_optimizer_state_dict(module, optim, optim_state_dict=optim_state[optim_key], options=state_dict_options)
 
  # Load metadata (anything not a module or optimizer)
- metadata = torch.load(path / _METADATA_FILENAME)
+ metadata = torch.load(path / _METADATA_FILENAME, weights_only=weights_only)
  requested_metadata_keys = state.keys() - modules.keys() - optimizers.keys()
  _validate_keys_for_strict_loading(requested_metadata_keys, metadata.keys(), strict=strict)
  for key in requested_metadata_keys:
@@ -461,7 +463,7 @@ def _load_checkpoint(
  return metadata
 
  if _is_full_checkpoint(path):
- checkpoint = torch.load(path, mmap=True, map_location="cpu", weights_only=False)
+ checkpoint = torch.load(path, mmap=True, map_location="cpu", weights_only=weights_only)
  _load_raw_module_state(checkpoint.pop(module_key), module, strict=strict)
 
  state_dict_options = StateDictOptions(
 
@@ -310,6 +310,7 @@ def load_checkpoint(
  path: _PATH,
  state: Optional[Union[Module, Optimizer, dict[str, Union[Module, Optimizer, Any]]]] = None,
  strict: bool = True,
+ weights_only: Optional[bool] = None,
  ) -> dict[str, Any]:
  """Load the contents from a checkpoint and restore the state of the given objects.
 
@@ -330,7 +331,7 @@ def load_checkpoint(
 
  """
  torch.cuda.empty_cache()
- checkpoint = self.checkpoint_io.load_checkpoint(path)
+ checkpoint = self.checkpoint_io.load_checkpoint(path, weights_only=weights_only)
  if not state:
  return checkpoint
 
 
@@ -516,6 +516,7 @@ def load_checkpoint(
  path: _PATH,
  state: Optional[Union[Module, Optimizer, dict[str, Union[Module, Optimizer, Any]]]] = None,
  strict: bool = True,
+ weights_only: Optional[bool] = None,
  ) -> dict[str, Any]:
  """Given a folder, load the contents from a checkpoint and restore the state of the given objects.
 
@@ -608,7 +609,7 @@ def load_checkpoint(
  )
  if "model" not in state or not isinstance(model := state["model"], torch.nn.Module):
  raise NotImplementedError("XLAFSDP only supports a single model instance with 'model' as the key.")
- full_ckpt = torch.load(path)
+ full_ckpt = torch.load(path, weights_only=weights_only)
  model.load_state_dict(full_ckpt.pop("model"), strict=strict)
  return full_ckpt
 
 
@@ -17,7 +17,7 @@
 import io
 import logging
 from pathlib import Path
-from typing import IO, Any, Union
+from typing import IO, Any, Optional, Union
 
 import fsspec
 import fsspec.utils
@@ -34,13 +34,18 @@
 def _load(
  path_or_url: Union[IO, _PATH],
  map_location: _MAP_LOCATION_TYPE = None,
- weights_only: bool = False,
+ weights_only: Optional[bool] = None,
 ) -> Any:
  """Loads a checkpoint.
 
  Args:
  path_or_url: Path or URL of the checkpoint.
  map_location: a function, ``torch.device``, string or a dict specifying how to remap storage locations.
+ weights_only: If ``True``, restricts loading to ``state_dicts`` of plain ``torch.Tensor`` and other primitive
+ types. If loading a checkpoint from a trusted source that contains an ``nn.Module``, use
+ ``weights_only=False``. If loading checkpoint from an untrusted source, we recommend using
+ ``weights_only=True``. For more information, please refer to the
+ `PyTorch Developer Notes on Serialization Semantics <https://docs.pytorch.org/docs/main/notes/serialization.html#id3>`_.
 
  """
  if not isinstance(path_or_url, (str, Path)):
@@ -51,6 +56,13 @@ def _load(
  weights_only=weights_only,
  )
  if str(path_or_url).startswith("http"):
+ if weights_only is None:
+ weights_only = False
+ log.debug(
+ f"Defaulting to `weights_only=False` for remote checkpoint: {path_or_url}."
+ f" If loading a checkpoint from an untrustted source, we recommend using `weights_only=True`."
+ )
+
  return torch.hub.load_state_dict_from_url(
  str(path_or_url),
  map_location=map_location, # type: ignore[arg-type]
@@ -70,7 +82,7 @@ def get_filesystem(path: _PATH, **kwargs: Any) -> AbstractFileSystem:
  return fs
 
 
-def _atomic_save(checkpoint: dict[str, Any], filepath: Union[str, Path]) -> None:
+def _atomic_save(checkpoint: dict[str, Any], filepath: _PATH) -> None:
  """Saves a checkpoint atomically, avoiding the creation of incomplete checkpoints.
 
  Args:
Original file line number	Diff line number	Diff line change
`@@ -172,6 +172,7 @@ filterwarnings = [`
`172`	`172`	`# "error::DeprecationWarning",`
`173`	`173`	`"error::FutureWarning",`
`174`	`174`	`"ignore::FutureWarning:onnxscript", # Temporary ignore until onnxscript is updated`
	`175`	+ "ignore:You are using `torch.load` with `weights_only=False`.*:FutureWarning",
`175`	`176`	`]`
`176`	`177`	`xfail_strict = true`
`177`	`178`	`junit_duration_report = "call"`