HabanaAI
diff --git a/‎src/lightning/pytorch/CHANGELOG.md‎
Lines changed: 3 additions & 0 deletions b/‎src/lightning/pytorch/CHANGELOG.md‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎src/lightning/pytorch/utilities/model_summary/model_summary.py‎
Lines changed: 25 additions & 0 deletions b/‎src/lightning/pytorch/utilities/model_summary/model_summary.py‎
Lines changed: 25 additions & 0 deletions
diff --git a/‎src/lightning/pytorch/utilities/model_summary/model_summary_deepspeed.py‎
Lines changed: 11 additions & 0 deletions b/‎src/lightning/pytorch/utilities/model_summary/model_summary_deepspeed.py‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎tests/tests_pytorch/utilities/test_model_summary.py‎
Lines changed: 42 additions & 1 deletion b/‎tests/tests_pytorch/utilities/test_model_summary.py‎
Lines changed: 42 additions & 1 deletion
@@ -51,6 +51,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Added CLI option `--map-to-cpu` to the checkpoint upgrade script to enable converting GPU checkpoints on a CPU-only machine ([#17527](https://github.com/Lightning-AI/lightning/pull/17527))
 
 
+- Added non-layer param count to the model summary ([#17005](https://github.com/Lightning-AI/lightning/pull/17005))
+
+
 ### Changed
 
 - Removed the limitation to call `self.trainer.model.parameters()` in `LightningModule.configure_optimizers()` ([#17309](https://github.com/Lightning-AI/lightning/pull/17309))
 
@@ -32,6 +32,8 @@
 
 PARAMETER_NUM_UNITS = [" ", "K", "M", "B", "T"]
 UNKNOWN_SIZE = "?"
+LEFTOVER_PARAMS_NAME = "other params"
+NOT_APPLICABLE = "n/a"
 
 
 class LayerSummary:
@@ -141,6 +143,9 @@ class ModelSummary:
  intermediate input- and output shapes of all layers. Supported are tensors and
  nested lists and tuples of tensors. All other types of inputs will be skipped and show as `?`
  in the summary table. The summary will also display `?` for layers not used in the forward pass.
+ If there are parameters not associated with any layers or modules, the count of those parameters
+ will be displayed in the table under `other params`. The summary will display `n/a` for module type,
+ in size, and out size.
 
  Example::
 
@@ -235,6 +240,10 @@ def trainable_parameters(self) -> int:
  p.numel() if not _is_lazy_weight_tensor(p) else 0 for p in self._model.parameters() if p.requires_grad
  )
 
+ @property
+ def total_layer_params(self) -> int:
+ return sum(self.param_nums)
+
  @property
  def model_size(self) -> float:
  return self.total_parameters * self._precision_megabytes
@@ -292,8 +301,24 @@ def _get_summary_data(self) -> List[Tuple[str, List[str]]]:
  arrays.append(("In sizes", [str(x) for x in self.in_sizes]))
  arrays.append(("Out sizes", [str(x) for x in self.out_sizes]))
 
+ total_leftover_params = self.total_parameters - self.total_layer_params
+ if total_leftover_params > 0:
+ self._add_leftover_params_to_summary(arrays, total_leftover_params)
+
  return arrays
 
+ def _add_leftover_params_to_summary(self, arrays: List[Tuple[str, List[str]]], total_leftover_params: int) -> None:
+ """Add summary of params not associated with module or layer to model summary."""
+ layer_summaries = dict(arrays)
+ layer_summaries[" "].append(" ")
+ layer_summaries["Name"].append(LEFTOVER_PARAMS_NAME)
+ layer_summaries["Type"].append(NOT_APPLICABLE)
+ layer_summaries["Params"].append(get_human_readable_count(total_leftover_params))
+ if "In sizes" in layer_summaries:
+ layer_summaries["In sizes"].append(NOT_APPLICABLE)
+ if "Out sizes" in layer_summaries:
+ layer_summaries["Out sizes"].append(NOT_APPLICABLE)
+
  def __str__(self) -> str:
  arrays = self._get_summary_data()
 
 
@@ -25,6 +25,7 @@
  get_human_readable_count,
  LayerSummary,
  ModelSummary,
+ NOT_APPLICABLE,
 )
 
 
@@ -96,4 +97,14 @@ def _get_summary_data(self) -> List[Tuple[str, List[str]]]:
  arrays.append(("In sizes", [str(x) for x in self.in_sizes]))
  arrays.append(("Out sizes", [str(x) for x in self.out_sizes]))
 
+ total_leftover_params = self.total_parameters - self.total_layer_params
+ if total_leftover_params > 0:
+ self._add_leftover_params_to_summary(arrays, total_leftover_params)
+
  return arrays
+
+ def _add_leftover_params_to_summary(self, arrays: List[Tuple[str, List[str]]], total_leftover_params: int) -> None:
+ """Add summary of params not associated with module or layer to model summary."""
+ super()._add_leftover_params_to_summary(arrays, total_leftover_params)
+ layer_summaries = dict(arrays)
+ layer_summaries["Params per Device"].append(NOT_APPLICABLE)
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from collections import OrderedDict
 from typing import Any
 
 import pytest
@@ -19,7 +20,13 @@
 
 from lightning.pytorch import LightningModule, Trainer
 from lightning.pytorch.demos.boring_classes import BoringModel
-from lightning.pytorch.utilities.model_summary.model_summary import ModelSummary, summarize, UNKNOWN_SIZE
+from lightning.pytorch.utilities.model_summary.model_summary import (
+ LEFTOVER_PARAMS_NAME,
+ ModelSummary,
+ NOT_APPLICABLE,
+ summarize,
+ UNKNOWN_SIZE,
+)
 from tests_pytorch.helpers.advanced_models import ParityModuleRNN
 from tests_pytorch.helpers.runif import RunIf
 
@@ -137,6 +144,18 @@ def forward(self, inp):
  return self.head(self.branch1(inp), self.branch2(inp))
 
 
+class NonLayerParamsModel(LightningModule):
+ """A model with parameters not associated with pytorch layer."""
+
+ def __init__(self):
+ super().__init__()
+ self.param = torch.nn.Parameter(torch.ones(2, 2))
+ self.layer = torch.nn.Linear(2, 2)
+
+ def forward(self, inp):
+ self.layer(self.param @ inp)
+
+
 def test_invalid_max_depth():
  """Test that invalid value for max_depth raises an error."""
  model = LightningModule()
@@ -358,3 +377,25 @@ def example_input_array(self) -> Any:
  summary_data = summary._get_summary_data()
  for column_name, entries in summary_data:
  assert all(isinstance(entry, str) for entry in entries)
+
+
+@pytest.mark.parametrize("example_input", [None, torch.ones(2, 2)])
+def test_summary_data_with_non_layer_params(example_input):
+ model = NonLayerParamsModel()
+ model.example_input_array = example_input
+
+ summary = summarize(model)
+ summary_data = OrderedDict(summary._get_summary_data())
+ assert summary_data[" "][-1] == " "
+ assert summary_data["Name"][-1] == LEFTOVER_PARAMS_NAME
+ assert summary_data["Type"][-1] == NOT_APPLICABLE
+ assert int(summary_data["Params"][-1]) == 4
+ if example_input is not None:
+ assert summary_data["In sizes"][-1] == NOT_APPLICABLE
+ assert summary_data["Out sizes"][-1] == NOT_APPLICABLE
+
+
+def test_summary_data_with_no_non_layer_params():
+ summary = summarize(PreCalculatedModel())
+ summary_data = OrderedDict(summary._get_summary_data())
+ assert summary_data["Name"][-1] != LEFTOVER_PARAMS_NAME