pytorch-ignite · ydcjeff · Apr 7, 2021 · Apr 7, 2021 · Apr 7, 2021 · Apr 7, 2021
diff --git a/templates/_base/_argparse.py b/templates/_base/_argparse.py
@@ -58,11 +58,6 @@
  },
 
  # ignite handlers options
- "output_path": {
- "default": "{{output_path}}",
- "type": str,
- "help": "output path to indicate where to_save objects are stored ({{output_path}})",
- },
  "save_every_iters": {
  "default": {{save_every_iters}},
  "type": int,
@@ -115,10 +110,10 @@
  },
 
  # ignite logger options
- "filepath": {
- "default": "{{ filepath }}",
+ "output_dir": {
+ "default": "{{ output_dir }}",
  "type": str,
- "help": "logging file path ({{ filepath }})",
+ "help": "directory to save all outputs ({{ output_dir }})",
  },
  "logger_log_every_iters": {
  "default": {{logger_log_every_iters}},

diff --git a/templates/_base/_handlers.py b/templates/_base/_handlers.py
@@ -34,7 +34,7 @@ def get_handlers(
  Config object for setting up handlers
 
  `config` has to contain
- - `output_path`: output path to indicate where to_save objects are stored
+ - `output_dir`: output path to indicate where to_save objects are stored
  - `save_every_iters`: saving iteration interval
  - `n_saved`: number of best models to store
  - `log_every_iters`: logging interval for iteration progress bar and `GpuInfo` if true
@@ -83,7 +83,7 @@ def get_handlers(
  to_save=to_save,
  lr_scheduler=lr_scheduler,
  output_names=output_names,
- output_path=config.output_path,
+ output_path=config.output_dir / 'checkpoints',
  save_every_iters=config.save_every_iters,
  n_saved=config.n_saved,
  log_every_iters=config.log_every_iters,
@@ -98,7 +98,7 @@ def get_handlers(
 
  # https://pytorch.org/ignite/contrib/engines.html#ignite.contrib.engines.common.save_best_model_by_val_score
  best_model_handler = common.save_best_model_by_val_score(
- output_path=config.output_path,
+ output_path=config.output_dir / 'checkpoints',
  evaluator=eval_engine,
  model=model,
  metric_name=metric_name,
@@ -209,7 +209,7 @@ def get_logger(
  )
  {% elif logger_deps == 'tensorboard' %}
  logger_handler = common.setup_tb_logging(
- output_path=config.filepath,
+ output_path=config.output_dir,
  trainer=train_engine,
  optimizers=optimizers,
  evaluators=eval_engine,

diff --git a/templates/_base/_sidebar.py b/templates/_base/_sidebar.py
@@ -80,14 +80,14 @@ def ignite_handlers_options(config):
 
 def ignite_loggers_options(config):
  st.markdown("## Ignite Loggers Options")
- config["filepath"] = st.text_input(
- "Logging file path (filepath)",
+ config["output_dir"] = st.text_input(
+ "Directory to save all outputs (output_dir)",
  "./logs",
- help="This option will be used by both python logging and ignite loggers if possible",
+ help="This option will be used by python logging, saving checkpoints, and ignite loggers if possible",
  )
  if st.checkbox("Use experiment tracking system ?", value=True):
  config["logger_deps"] = st.selectbox(
- "Select experiment eracking system",
+ "Select experiment tracking system",
  ["ClearML", "MLflow", "Neptune", "Polyaxon", "TensorBoard", "Visdom", "WandB"],
  index=4,
  ).lower()
@@ -104,10 +104,6 @@ def ignite_loggers_options(config):
 
 
 def _setup_common_training_handlers_options(config):
- config["output_path"] = st.text_input(
- "Output path to indicate where to_save objects are stored (output_path)",
- value="./logs",
- )
  config["save_every_iters"] = st.number_input(
  "Saving iteration interval (save_every_iters)", min_value=1, value=1000
  )

diff --git a/templates/gan/README.md b/templates/gan/README.md
@@ -75,7 +75,7 @@ python main.py --verbose
 
 ### Single Node, Multiple GPUs
 
-- Using `torch.distributed.launch` (preferred)
+- Using `torch.distributed.launch` (recommended)
 
  ```sh
  python -m torch.distributed.launch \
@@ -144,14 +144,14 @@ usage: main.py [-h] [--use_amp] [--resume_from RESUME_FROM] [--seed SEED]
  [--verbose] [--backend BACKEND]
  [--nproc_per_node NPROC_PER_NODE] [--nnodes NNODES]
  [--node_rank NODE_RANK] [--master_addr MASTER_ADDR]
- [--master_port MASTER_PORT] [--output_path OUTPUT_PATH]
+ [--master_port MASTER_PORT]
  [--save_every_iters SAVE_EVERY_ITERS] [--n_saved N_SAVED]
  [--log_every_iters LOG_EVERY_ITERS] [--with_pbars WITH_PBARS]
  [--with_pbar_on_iters WITH_PBAR_ON_ITERS]
  [--stop_on_nan STOP_ON_NAN]
  [--clear_cuda_cache CLEAR_CUDA_CACHE]
  [--with_gpu_stats WITH_GPU_STATS] [--patience PATIENCE]
- [--limit_sec LIMIT_SEC] [--filepath FILEPATH]
+ [--limit_sec LIMIT_SEC] [--output_dir OUTPUT_DIR]
  [--logger_log_every_iters LOGGER_LOG_EVERY_ITERS]
  [--dataset {cifar10,lsun,imagenet,folder,lfw,fake,mnist}]
  [--data_path DATA_PATH] [--batch_size BATCH_SIZE]
@@ -182,17 +182,14 @@ optional arguments:
  (None)
  --master_port MASTER_PORT
  master node port for torch native backends (None)
- --output_path OUTPUT_PATH
- output path to indicate where to_save objects are
- stored (./logs)
  --save_every_iters SAVE_EVERY_ITERS
  Saving iteration interval (1000)
  --n_saved N_SAVED number of best models to store (2)
  --log_every_iters LOG_EVERY_ITERS
  logging interval for iteration progress bar (100)
  --with_pbars WITH_PBARS
  show epoch-wise and iteration-wise progress bars
- (False)
+ (True)
  --with_pbar_on_iters WITH_PBAR_ON_ITERS
  show iteration progress bar or not (True)
  --stop_on_nan STOP_ON_NAN
@@ -207,9 +204,10 @@ optional arguments:
  --limit_sec LIMIT_SEC
  maximum time before training terminates in seconds
  (None)
- --filepath FILEPATH logging file path (./logs)
+ --output_dir OUTPUT_DIR
+ directory to save all outputs (./logs)
  --logger_log_every_iters LOGGER_LOG_EVERY_ITERS
- logging interval for experiment tracking system (None)
+ logging interval for experiment tracking system (100)
  --dataset {cifar10,lsun,imagenet,folder,lfw,fake,mnist}
  dataset to use (cifar10)
  --data_path DATA_PATH

diff --git a/templates/gan/gan/main.py b/templates/gan/gan/main.py
@@ -3,6 +3,7 @@
 """
 import warnings
 from argparse import ArgumentParser
+from datetime import datetime
 from pathlib import Path
 from typing import Any
 from ignite.contrib.handlers.wandb_logger import WandBLogger
@@ -109,7 +110,7 @@ def run(local_rank: int, config: Any, *args: Any, **kwargs: Any):
  @train_engine.on(Events.EPOCH_COMPLETED)
  def save_fake_example(engine):
  fake = netG(fixed_noise)
- path = config.filepath / (FAKE_IMG_FNAME.format(engine.state.epoch))
+ path = config.output_dir / (FAKE_IMG_FNAME.format(engine.state.epoch))
  vutils.save_image(fake.detach(), path, normalize=True)
 
  # --------------------------------------------------
@@ -118,7 +119,7 @@ def save_fake_example(engine):
  @train_engine.on(Events.EPOCH_COMPLETED)
  def save_real_example(engine):
  img, y = engine.state.batch
- path = config.filepath / (REAL_IMG_FNAME.format(engine.state.epoch))
+ path = config.output_dir / (REAL_IMG_FNAME.format(engine.state.epoch))
  vutils.save_image(img, path, normalize=True)
 
  # -------------------------------------------------------------
@@ -147,11 +148,11 @@ def create_plots(engine):
  warnings.warn("Loss plots will not be generated -- pandas or matplotlib not found")
 
  else:
- df = pd.read_csv(config.filepath / LOGS_FNAME, delimiter="\t", index_col="iteration")
+ df = pd.read_csv(config.output_dir / LOGS_FNAME, delimiter="\t", index_col="iteration")
  _ = df.plot(subplots=True, figsize=(20, 20))
  _ = plt.xlabel("Iteration number")
  fig = plt.gcf()
- path = config.filepath / PLOT_FNAME
+ path = config.output_dir / PLOT_FNAME
 
  fig.savefig(path)
 
@@ -161,7 +162,7 @@ def create_plots(engine):
  # for training stats
  # --------------------------------
 
- train_engine.add_event_handler(Events.ITERATION_COMPLETED(config.log_every_iters), log_metrics, tag="train")
+ train_engine.add_event_handler(Events.ITERATION_COMPLETED(every=config.log_every_iters), log_metrics, tag="train")
 
  # ------------------------------------------
  # setup if done. let's run the training
@@ -192,15 +193,12 @@ def main():
  config = parser.parse_args()
  manual_seed(config.seed)
 
- if config.filepath:
- path = Path(config.filepath)
+ if config.output_dir:
+ now = datetime.now().strftime("%Y%m%d-%H%M%S")
+ name = f'{config.dataset}-backend-{idist.backend()}-{now}'
+ path = Path(config.output_dir, name)
  path.mkdir(parents=True, exist_ok=True)
- config.filepath = path
-
- if config.output_path:
- path = Path(config.output_path)
- path.mkdir(parents=True, exist_ok=True)
- config.output_path = path
+ config.output_dir = path
 
  with idist.Parallel(
  backend=config.backend,

diff --git a/templates/gan/gan/utils.py b/templates/gan/gan/utils.py
@@ -4,7 +4,6 @@
 import hashlib
 import logging
 import shutil
-from datetime import datetime
 from logging import Logger
 from pathlib import Path
 from pprint import pformat
@@ -30,7 +29,7 @@
 # TODO : PLEASE provide your custom model, optimizer, and loss function
 
 
-def initialize(config: Optional[Any]) -> Tuple[Module, Optimizer, Module, Union[_LRScheduler, ParamScheduler]]:
+def initialize(config: Optional[Any], num_channels: int) -> Tuple[Module, Optimizer, Module, Union[_LRScheduler, ParamScheduler]]:
  """Initializing model, optimizer, loss function, and lr scheduler
  with correct settings.
 
@@ -46,7 +45,6 @@ def initialize(config: Optional[Any]) -> Tuple[Module, Optimizer, Module, Union[
  netG = idist.auto_model(Generator(config.z_dim, config.g_filters, num_channels))
  netD = idist.auto_model(Discriminator(num_channels, config.d_filters))
  loss_fn = nn.BCELoss()
- model = idist.auto_model(model)
  optimizerG = optim.Adam(netG.parameters(), lr=config.lr, betas=(config.beta_1, 0.999))
  optimizerD = optim.Adam(netD.parameters(), lr=config.lr, betas=(config.beta_1, 0.999))
  loss_fn = loss_fn.to(idist.device())
@@ -111,18 +109,17 @@ def setup_logging(config: Any) -> Logger:
  ----------
  config
  config object. config has to contain
- `verbose` and `filepath` attributes.
+ `verbose` and `output_dir` attributes.
 
  Returns
  -------
  logger
  an instance of `Logger`
  """
- now = datetime.now().strftime("%Y%m%d-%X")
  logger = setup_logger(
  level=logging.INFO if config.verbose else logging.WARNING,
  format="%(message)s",
- filepath=config.filepath / f"{now}.log",
+ filepath=config.output_dir / "training-info.log",
  )
  return logger
 

diff --git a/templates/gan/tests/test_handlers.py b/templates/gan/tests/test_handlers.py
@@ -1,5 +1,6 @@
 import unittest
 from argparse import Namespace
+from pathlib import Path
 from tempfile import TemporaryDirectory
 
 from ignite.contrib.handlers import (
@@ -26,8 +27,9 @@ class TestHandlers(unittest.TestCase):
  def test_get_handlers(self):
  train_engine = Engine(lambda e, b: b)
  with TemporaryDirectory() as tmp:
+ tmp = Path(tmp)
  config = Namespace(
- output_path=tmp,
+ output_dir=tmp,
  save_every_iters=1,
  n_saved=2,
  log_every_iters=1,
@@ -53,7 +55,7 @@ def test_get_handlers(self):
 
  def test_get_logger(self):
  with TemporaryDirectory() as tmp:
- config = Namespace(filepath=tmp, logger_log_every_iters=1)
+ config = Namespace(output_dir=tmp, logger_log_every_iters=1)
  train_engine = Engine(lambda e, b: b)
  optimizer = optim.Adam(nn.Linear(1, 1).parameters())
  logger_handler = get_logger(

diff --git a/templates/gan/tests/test_utils.py b/templates/gan/tests/test_utils.py
@@ -35,7 +35,7 @@ def test_log_metrics(self):
  def test_setup_logging(self):
  with TemporaryDirectory() as tmp:
  tmp = Path(tmp)
- config = Namespace(verbose=True, filepath=tmp)
+ config = Namespace(verbose=True, output_dir=tmp)
  logger = setup_logging(config)
  self.assertEqual(logger.level, logging.INFO)
  self.assertIsInstance(logger, logging.Logger)

diff --git a/templates/image_classification/README.md b/templates/image_classification/README.md
@@ -143,14 +143,14 @@ usage: main.py [-h] [--use_amp] [--resume_from RESUME_FROM] [--seed SEED]
  [--verbose] [--backend BACKEND]
  [--nproc_per_node NPROC_PER_NODE] [--nnodes NNODES]
  [--node_rank NODE_RANK] [--master_addr MASTER_ADDR]
- [--master_port MASTER_PORT] [--output_path OUTPUT_PATH]
+ [--master_port MASTER_PORT]
  [--save_every_iters SAVE_EVERY_ITERS] [--n_saved N_SAVED]
  [--log_every_iters LOG_EVERY_ITERS] [--with_pbars WITH_PBARS]
  [--with_pbar_on_iters WITH_PBAR_ON_ITERS]
  [--stop_on_nan STOP_ON_NAN]
  [--clear_cuda_cache CLEAR_CUDA_CACHE]
  [--with_gpu_stats WITH_GPU_STATS] [--patience PATIENCE]
- [--limit_sec LIMIT_SEC] [--filepath FILEPATH]
+ [--limit_sec LIMIT_SEC] [--output_dir OUTPUT_DIR]
  [--logger_log_every_iters LOGGER_LOG_EVERY_ITERS]
  [--data_path DATA_PATH] [--train_batch_size TRAIN_BATCH_SIZE]
  [--eval_batch_size EVAL_BATCH_SIZE] [--num_workers NUM_WORKERS]
@@ -181,17 +181,14 @@ optional arguments:
  (None)
  --master_port MASTER_PORT
  master node port for torch native backends (None)
- --output_path OUTPUT_PATH
- output path to indicate where to_save objects are
- stored (./logs)
  --save_every_iters SAVE_EVERY_ITERS
  Saving iteration interval (1000)
  --n_saved N_SAVED number of best models to store (2)
  --log_every_iters LOG_EVERY_ITERS
  logging interval for iteration progress bar (100)
  --with_pbars WITH_PBARS
  show epoch-wise and iteration-wise progress bars
- (False)
+ (True)
  --with_pbar_on_iters WITH_PBAR_ON_ITERS
  show iteration progress bar or not (True)
  --stop_on_nan STOP_ON_NAN
@@ -206,7 +203,8 @@ optional arguments:
  --limit_sec LIMIT_SEC
  maximum time before training terminates in seconds
  (None)
- --filepath FILEPATH logging file path (./logs)
+ --output_dir OUTPUT_DIR
+ directory to save all outputs (./logs)
  --logger_log_every_iters LOGGER_LOG_EVERY_ITERS
  logging interval for experiment tracking system (None)
  --data_path DATA_PATH

diff --git a/templates/image_classification/image_classification/main.py b/templates/image_classification/image_classification/main.py
@@ -2,6 +2,7 @@
 main entrypoint training
 """
 from argparse import ArgumentParser
+from datetime import datetime
 from pathlib import Path
 from typing import Any
 from ignite.contrib.handlers.wandb_logger import WandBLogger
@@ -148,7 +149,7 @@ def _():
  # for training stats
  # --------------------------------
 
- train_engine.add_event_handler(Events.ITERATION_COMPLETED(config.log_every_iters), log_metrics, tag="train")
+ train_engine.add_event_handler(Events.ITERATION_COMPLETED(every=config.log_every_iters), log_metrics, tag="train")
 
  # ---------------------------------------------
  # run evaluation at every training epoch end
@@ -192,15 +193,12 @@ def main():
  config = parser.parse_args()
  manual_seed(config.seed)
 
- if config.filepath:
- path = Path(config.filepath)
+ if config.output_dir:
+ now = datetime.now().strftime("%Y%m%d-%H%M%S")
+ name = f'{config.model}-backend-{idist.backend()}-{now}'
+ path = Path(config.output_dir, name)
  path.mkdir(parents=True, exist_ok=True)
- config.filepath = path
-
- if config.output_path:
- path = Path(config.output_path)
- path.mkdir(parents=True, exist_ok=True)
- config.output_path = path
+ config.output_dir = path
 
  with idist.Parallel(
  backend=config.backend,