Lightning-AI
diff --git a/‎.lightning/workflows/fabric.yml‎
Lines changed: 2 additions & 2 deletions b/‎.lightning/workflows/fabric.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.lightning/workflows/pytorch.yml‎
Lines changed: 2 additions & 2 deletions b/‎.lightning/workflows/pytorch.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎Makefile‎
Lines changed: 6 additions & 10 deletions b/‎Makefile‎
Lines changed: 6 additions & 10 deletions
diff --git a/‎docs/source-pytorch/advanced/speed.rst‎
Lines changed: 12 additions & 1 deletion b/‎docs/source-pytorch/advanced/speed.rst‎
Lines changed: 12 additions & 1 deletion
diff --git a/‎docs/source-pytorch/common/trainer.rst‎
Lines changed: 28 additions & 1 deletion b/‎docs/source-pytorch/common/trainer.rst‎
Lines changed: 28 additions & 1 deletion
diff --git a/‎requirements/fabric/extra.txt‎
Lines changed: 4 additions & 0 deletions b/‎requirements/fabric/extra.txt‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎src/lightning/__setup__.py‎
Lines changed: 1 addition & 1 deletion b/‎src/lightning/__setup__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/lightning/fabric/CHANGELOG.md‎
Lines changed: 5 additions & 1 deletion b/‎src/lightning/fabric/CHANGELOG.md‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎src/lightning/fabric/strategies/ddp.py‎
Lines changed: 11 additions & 1 deletion b/‎src/lightning/fabric/strategies/ddp.py‎
Lines changed: 11 additions & 1 deletion
diff --git a/‎src/lightning/fabric/utilities/distributed.py‎
Lines changed: 5 additions & 1 deletion b/‎src/lightning/fabric/utilities/distributed.py‎
Lines changed: 5 additions & 1 deletion
@@ -1,8 +1,8 @@
 trigger:
  push:
- branches: ["master"]
+ branches: ["master", "release/stable"]
  pull_request:
- branches: ["master"]
+ branches: ["master", "release/stable"]
 
 timeout: "55" # minutes
 parametrize:
 
@@ -1,8 +1,8 @@
 trigger:
  push:
- branches: ["master"]
+ branches: ["master", "release/stable"]
  pull_request:
- branches: ["master"]
+ branches: ["master", "release/stable"]
 
 timeout: "55" # minutes
 parametrize:
 
@@ -45,12 +45,8 @@ clean:
 rm -rf src/lightning_fabric/*/
 rm -rf src/pytorch_lightning/*/
 
-test: clean
+test: clean setup
 # Review the CONTRIBUTING documentation for other ways to test.
-pip install -e . \
--r requirements/pytorch/base.txt \
--r requirements/fabric/base.txt \
--r requirements/pytorch/test.txt \
 
 # run tests with coverage
 python -m coverage run --source src/lightning/pytorch -m pytest src/lightning/pytorch tests/tests_pytorch -v
@@ -59,18 +55,18 @@ test: clean
 
 docs: docs-pytorch
 
-sphinx-theme:
-pip install -q awscli
+sphinx-theme: setup
+uv pip install -q awscli
 mkdir -p dist/
 aws s3 sync --no-sign-request s3://sphinx-packages/ dist/
-pip install lai-sphinx-theme -f dist/
+uv pip install lai-sphinx-theme -f dist/
 
 docs-fabric: clean sphinx-theme
-pip install -e .[all] --quiet -r requirements/fabric/docs.txt
+uv pip install -e '.[all]' --quiet -r requirements/fabric/docs.txt
 cd docs/source-fabric && $(MAKE) html --jobs $(nproc)
 
 docs-pytorch: clean sphinx-theme
-pip install -e .[all] --quiet -r requirements/pytorch/docs.txt
+uv pip install -e '.[all]' --quiet -r requirements/pytorch/docs.txt
 cd docs/source-pytorch && $(MAKE) html --jobs $(nproc)
 
 update:
 
@@ -297,7 +297,8 @@ Validation Within Training Epoch
 
 For large datasets, it's often desirable to check validation multiple times within a training epoch.
 Pass in a float to check that often within one training epoch. Pass in an int ``K`` to check every ``K`` training batch.
-Must use an ``int`` if using an :class:`~torch.utils.data.IterableDataset`.
+Must use an ``int`` if using an :class:`~torch.utils.data.IterableDataset`. Alternatively, pass a string ("DD:HH:MM:SS"),
+a dict of ``datetime.timedelta`` kwargs, or a ``datetime.timedelta`` to check validation after a given amount of wall-clock time.
 
 .. testcode::
 
@@ -310,6 +311,16 @@ Must use an ``int`` if using an :class:`~torch.utils.data.IterableDataset`.
  # check every 100 train batches (ie: for IterableDatasets or fixed frequency)
  trainer = Trainer(val_check_interval=100)
 
+ # check validation every 15 minutes of wall-clock time
+ trainer = Trainer(val_check_interval="00:00:15:00")
+
+ # alternatively, pass a dict of timedelta kwargs
+ trainer = Trainer(val_check_interval={"minutes": 1})
+
+ # or use a timedelta object directly
+ from datetime import timedelta
+ trainer = Trainer(val_check_interval=timedelta(hours=1))
+
 Learn more in our :ref:`trainer_flags` guide.
 
 
 
@@ -991,11 +991,23 @@ val_check_interval
  :muted:
 
 How often within one training epoch to check the validation set.
-Can specify as float or int.
+Can specify as float, int, or a time-based duration.
 
 - pass a ``float`` in the range [0.0, 1.0] to check after a fraction of the training epoch.
 - pass an ``int`` to check after a fixed number of training batches. An ``int`` value can only be higher than the number of training
  batches when ``check_val_every_n_epoch=None``, which validates after every ``N`` training batches across epochs or iteration-based training.
+- pass a ``string`` duration in the format "DD:HH:MM:SS", a ``datetime.timedelta`` object, or a ``dictionary`` of keyword arguments that can be passed
+ to ``datetime.timedelta`` for time-based validation. When using a time-based duration, validation will trigger once the elapsed wall-clock time
+ since the last validation exceeds the interval. The validation check occurs after the current batch completes, the validation loop runs, and
+ the timer resets.
+
+**Time-based validation behavior with check_val_every_n_epoch:** When used together with ``val_check_interval`` (time-based) and
+``check_val_every_n_epoch > 1``, validation is aligned to epoch multiples:
+
+- If the time-based interval elapses **before** the next multiple-N epoch, validation runs at the start of that epoch (after the first batch),
+ and the timer resets.
+- If the interval elapses **during** a multiple-N epoch, validation runs after the current batch.
+- For cases where ``check_val_every_n_epoch=None`` or ``1``, the time-based behavior of ``val_check_interval`` applies without additional alignment.
 
 .. testcode::
 
@@ -1013,10 +1025,25 @@ Can specify as float or int.
  # (ie: production cases with streaming data)
  trainer = Trainer(val_check_interval=1000, check_val_every_n_epoch=None)
 
+ # check validation every 15 minutes of wall-clock time using a string-based approach
+ trainer = Trainer(val_check_interval="00:00:15:00")
+
+ # check validation every 15 minutes of wall-clock time using a dictionary-based approach
+ trainer = Trainer(val_check_interval={"minutes": 15})
+
+ # check validation every 1 hour of wall-clock time using a dictionary-based approach
+ trainer = Trainer(val_check_interval={"hours": 1})
+
+ # check validation every 1 hour of wall-clock time using a datetime.timedelta object
+ from datetime import timedelta
+ trainer = Trainer(val_check_interval=timedelta(hours=1))
+
+
 
 .. code-block:: python
 
  # Here is the computation to estimate the total number of batches seen within an epoch.
+ # This logic applies when `val_check_interval` is specified as an integer or a float.
 
  # Find the total number of train batches
  total_train_batches = total_train_samples // (train_batch_size * world_size)
 
@@ -0,0 +1,4 @@
+# NOTE: the upper bound for the package version is only set for CI stability, and it is dropped while installing this package
+# in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment
+
+hydra-core >=1.2.0, <1.4.0
@@ -41,7 +41,7 @@ def _prepare_extras() -> dict[str, Any]:
  }
 
  # project specific extras groups
- extras["fabric-all"] = extras["fabric-strategies"] + extras["fabric-examples"]
+ extras["fabric-all"] = extras["fabric-extra"] + extras["fabric-strategies"] + extras["fabric-examples"]
  extras["fabric-dev"] = extras["fabric-all"] + extras["fabric-test"]
  extras["pytorch-all"] = extras["pytorch-extra"] + extras["pytorch-strategies"] + extras["pytorch-examples"]
  extras["pytorch-dev"] = extras["pytorch-all"] + extras["pytorch-test"]
 
@@ -22,14 +22,18 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 ### Changed
 
--
+- let `_get_default_process_group_backend_for_device` support more hardware platforms (
+ [#21057](https://github.com/Lightning-AI/pytorch-lightning/pull/21057), [#21093](https://github.com/Lightning-AI/pytorch-lightning/pull/21093))
 
 
 ### Fixed
 
 - Fixed with adding a missing device id for pytorch 2.8 ([#21105](https://github.com/Lightning-AI/pytorch-lightning/pull/21105))
 
 
+- Respect `verbose=False` in `seed_everything` when no seed is provided
+
+
 ---
 
 ## [2.5.4] - 2025-08-29
 
@@ -160,7 +160,17 @@ def barrier(self, *args: Any, **kwargs: Any) -> None:
  if torch.distributed.get_backend() == "nccl":
  torch.distributed.barrier(device_ids=self._determine_ddp_device_ids())
  else:
- torch.distributed.barrier()
+ # Handle PyTorch bug where barrier() fails on CPU with "PrivateUse1HooksInterface" error
+ try:
+ torch.distributed.barrier()
+ except RuntimeError as e:
+ if "PrivateUse1HooksInterface" in str(e):
+ # Fallback: Use all_reduce as barrier - all processes must participate
+ # This achieves the same synchronization effect as barrier()
+ dummy_tensor = torch.tensor(0.0, device=self.root_device)
+ torch.distributed.all_reduce(dummy_tensor)
+ else:
+ raise
 
  @override
  def broadcast(self, obj: TBroadcast, src: int = 0) -> TBroadcast:
 
@@ -319,7 +319,11 @@ def _destroy_dist_connection() -> None:
 
 
 def _get_default_process_group_backend_for_device(device: torch.device) -> str:
- return "nccl" if device.type == "cuda" else "gloo"
+ """Return corresponding distributed backend for a given device."""
+ device_backend_map = torch.distributed.Backend.default_device_backend_map
+ if device.type in device_backend_map:
+ return device_backend_map[device.type]
+ return "gloo"
 
 
 class _DatasetSamplerWrapper(Dataset):
Original file line number	Diff line number	Diff line change
`@@ -41,7 +41,7 @@ def _prepare_extras() -> dict[str, Any]:`
`41`	`41`	`}`
`42`	`42`
`43`	`43`	`# project specific extras groups`
`44`		`- extras["fabric-all"] = extras["fabric-strategies"] + extras["fabric-examples"]`
	`44`	`+ extras["fabric-all"] = extras["fabric-extra"] + extras["fabric-strategies"] + extras["fabric-examples"]`
`45`	`45`	`extras["fabric-dev"] = extras["fabric-all"] + extras["fabric-test"]`
`46`	`46`	`extras["pytorch-all"] = extras["pytorch-extra"] + extras["pytorch-strategies"] + extras["pytorch-examples"]`
`47`	`47`	`extras["pytorch-dev"] = extras["pytorch-all"] + extras["pytorch-test"]`