MathieuHaller
diff --git a/‎.github/workflows/pythonapp-gpu.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/pythonapp-gpu.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎monai/apps/auto3dseg/data_analyzer.py‎
Lines changed: 10 additions & 7 deletions b/‎monai/apps/auto3dseg/data_analyzer.py‎
Lines changed: 10 additions & 7 deletions
diff --git a/‎monai/auto3dseg/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎monai/auto3dseg/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎monai/auto3dseg/analyzer.py‎
Lines changed: 29 additions & 9 deletions b/‎monai/auto3dseg/analyzer.py‎
Lines changed: 29 additions & 9 deletions
diff --git a/‎monai/auto3dseg/seg_summarizer.py‎
Lines changed: 1 addition & 1 deletion b/‎monai/auto3dseg/seg_summarizer.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎monai/auto3dseg/utils.py‎
Lines changed: 4 additions & 1 deletion b/‎monai/auto3dseg/utils.py‎
Lines changed: 4 additions & 1 deletion
@@ -23,7 +23,7 @@ jobs:
  - "PT17+CUDA102"
  - "PT18+CUDA102"
  - "PT18+CUDA112"
- - "PT112+CUDA117"
+ - "PT112+CUDA118"
  - "PT110+CUDA102"
  - "PT112+CUDA102"
  include:
 
@@ -122,8 +122,8 @@ def __init__(
  output_path: str = "./data_stats.yaml",
  average: bool = True,
  do_ccp: bool = True,
- device: Union[str, torch.device] = "cuda",
- worker: int = 0,
+ device: Union[str, torch.device] = "cpu",
+ worker: int = 2,
  image_key: str = "image",
  label_key: Optional[str] = "label",
  ):
@@ -137,13 +137,10 @@ def __init__(
  self.average = average
  self.do_ccp = do_ccp
  self.device = torch.device(device)
- self.worker = worker
+ self.worker = 0 if (self.device.type == "cuda") else worker
  self.image_key = image_key
  self.label_key = label_key
 
- if (self.device.type == "cuda") and (worker > 0):
- raise ValueError("CUDA does not support multiple subprocess. If device is GPU, please set worker to 0")
-
  @staticmethod
  def _check_data_uniformity(keys: List[str], result: Dict):
  """
@@ -232,8 +229,14 @@ def get_all_case_stats(self):
  result[DataStatsKeys.SUMMARY] = summarizer.summarize(result[DataStatsKeys.BY_CASE])
 
  if not self._check_data_uniformity([ImageStatsKeys.SPACING], result):
- logger.warning("Data is not completely uniform. MONAI transforms may provide unexpected result")
+ logger.warning("data spacing is not completely uniform. MONAI transforms may provide unexpected result")
 
  ConfigParser.export_config_file(result, self.output_path, fmt="yaml", default_flow_style=None)
 
+ del d["image"], d["label"]
+ if self.device.type == "cuda":
+ # release unreferenced tensors to mitigate OOM
+ # limitation: https://github.com/pytorch/pytorch/issues/12873#issuecomment-482916237
+ torch.cuda.empty_cache()
+
  return result
@@ -11,6 +11,7 @@
 
 from .algo_gen import Algo, AlgoGen
 from .analyzer import (
+ Analyzer,
  FgImageStats,
  FgImageStatsSumm,
  FilenameStats,
 
@@ -229,8 +229,10 @@ def __call__(self, data):
  """
  d = dict(data)
  start = time.time()
- ndas = data[self.image_key]
- ndas = [ndas[i] for i in range(ndas.shape[0])]
+ restore_grad_state = torch.is_grad_enabled()
+ torch.set_grad_enabled(False)
+
+ ndas = [d[self.image_key][i] for i in range(d[self.image_key].shape[0])]
  if "nda_croppeds" not in d:
  nda_croppeds = [get_foreground_image(nda) for nda in ndas]
 
@@ -250,8 +252,10 @@ def __call__(self, data):
  if not verify_report_format(report, self.get_report_format()):
  raise RuntimeError(f"report generated by {self.__class__} differs from the report format.")
 
- logger.debug(f"Get image stats spent {time.time()-start}")
  d[self.stats_name] = report
+
+ torch.set_grad_enabled(restore_grad_state)
+ logger.debug(f"Get image stats spent {time.time()-start}")
  return d
 
 
@@ -307,9 +311,11 @@ def __call__(self, data) -> dict:
  """
 
  d = dict(data)
+ start = time.time()
+ restore_grad_state = torch.is_grad_enabled()
+ torch.set_grad_enabled(False)
 
- ndas = d[self.image_key] # (1,H,W,D) or (C,H,W,D)
- ndas = [ndas[i] for i in range(ndas.shape[0])]
+ ndas = [d[self.image_key][i] for i in range(d[self.image_key].shape[0])]
  ndas_label = d[self.label_key] # (H,W,D)
  nda_foregrounds = [get_foreground_label(nda, ndas_label) for nda in ndas]
 
@@ -324,6 +330,9 @@ def __call__(self, data) -> dict:
  raise RuntimeError(f"report generated by {self.__class__} differs from the report format.")
 
  d[self.stats_name] = report
+
+ torch.set_grad_enabled(restore_grad_state)
+ logger.debug(f"Get foreground image stats spent {time.time()-start}")
  return d
 
 
@@ -423,9 +432,12 @@ def __call__(self, data):
  functions. If the input has nan/inf, the stats results will be nan/inf.
  """
  d = dict(data)
+ start = time.time()
+ using_cuda = True if d[self.image_key].device.type == "cuda" else False
+ restore_grad_state = torch.is_grad_enabled()
+ torch.set_grad_enabled(False)
 
- ndas = d[self.image_key] # (1,H,W,D) or (C,H,W,D)
- ndas = [ndas[i] for i in range(ndas.shape[0])]
+ ndas = [d[self.image_key][i] for i in range(d[self.image_key].shape[0])]
  ndas_label = d[self.label_key] # (H,W,D)
  nda_foregrounds = [get_foreground_label(nda, ndas_label) for nda in ndas]
 
@@ -435,7 +447,6 @@ def __call__(self, data):
 
  unique_label = unique_label.astype(np.int8).tolist()
 
- start = time.time()
  label_substats = [] # each element is one label
  pixel_sum = 0
  pixel_arr = []
@@ -444,13 +455,20 @@ def __call__(self, data):
  label_dict: Dict[str, Any] = {}
  mask_index = ndas_label == index
 
+ nda_masks = [nda[mask_index] for nda in ndas]
  label_dict[LabelStatsKeys.IMAGE_INTST] = [
- self.ops[LabelStatsKeys.IMAGE_INTST].evaluate(nda[mask_index]) for nda in ndas
+ self.ops[LabelStatsKeys.IMAGE_INTST].evaluate(nda_m) for nda_m in nda_masks
  ]
+
  pixel_count = sum(mask_index)
  pixel_arr.append(pixel_count)
  pixel_sum += pixel_count
  if self.do_ccp: # apply connected component
+ if using_cuda:
+ # The back end of get_label_ccp is CuPy
+ # which is unable to automatically release CUDA GPU memory held by PyTorch
+ del nda_masks
+ torch.cuda.empty_cache()
  shape_list, ncomponents = get_label_ccp(mask_index)
  label_dict[LabelStatsKeys.LABEL_SHAPE] = shape_list
  label_dict[LabelStatsKeys.LABEL_NCOMP] = ncomponents
@@ -472,6 +490,8 @@ def __call__(self, data):
  raise RuntimeError(f"report generated by {self.__class__} differs from the report format.")
 
  d[self.stats_name] = report
+
+ torch.set_grad_enabled(restore_grad_state)
  logger.debug(f"Get label stats spent {time.time()-start}")
  return d
 
 
@@ -104,7 +104,7 @@ def add_analyzer(self, case_analyzer, summary_analyzer) -> None:
 
  .. code-block:: python
 
- from monai.auto3dseg.analyzer import Analyzer
+ from monai.auto3dseg import Analyzer
  from monai.auto3dseg.utils import concat_val_to_np
  from monai.auto3dseg.analyzer_engine import SegSummarizer
 
 
@@ -106,6 +106,9 @@ def get_label_ccp(mask_index: MetaTensor, use_gpu: bool = True) -> Tuple[List[An
  shape_list.append(bbox_shape)
  ncomponents = len(vals)
 
+ del mask_cupy, labeled, vals, comp_idx, ncomp
+ cp.get_default_memory_pool().free_all_blocks()
+
  elif has_measure:
  labeled, ncomponents = measure_np.label(mask_index.data.cpu().numpy(), background=-1, return_num=True)
  for ncomp in range(1, ncomponents + 1):
@@ -174,7 +177,7 @@ def concat_val_to_np(
  elif ragged:
  return np.concatenate(np_list, **kwargs) # type: ignore
  else:
- return np.concatenate([np_list], **kwargs)
+ return np.concatenate([np_list], **kwargs) # type: ignore
 
 
 def concat_multikeys_to_dict(