huggingface
diff --git a/‎benchmark.py‎
Lines changed: 470 additions & 0 deletions b/‎benchmark.py‎
Lines changed: 470 additions & 0 deletions
diff --git a/‎timm/data/config.py‎
Lines changed: 1 addition & 1 deletion b/‎timm/data/config.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎timm/data/dataset.py‎
Lines changed: 2 additions & 1 deletion b/‎timm/data/dataset.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎timm/data/dataset_factory.py‎
Lines changed: 1 addition & 0 deletions b/‎timm/data/dataset_factory.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎timm/data/parsers/parser_tfds.py‎
Lines changed: 35 additions & 15 deletions b/‎timm/data/parsers/parser_tfds.py‎
Lines changed: 35 additions & 15 deletions
diff --git a/‎timm/models/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎timm/models/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎timm/models/layers/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎timm/models/layers/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎timm/models/layers/weight_init.py‎
Lines changed: 29 additions & 0 deletions b/‎timm/models/layers/weight_init.py‎
Lines changed: 29 additions & 0 deletions
diff --git a/‎timm/models/resnetv2.py‎
Lines changed: 5 additions & 2 deletions b/‎timm/models/resnetv2.py‎
Lines changed: 5 additions & 2 deletions
@@ -5,7 +5,7 @@
 _logger = logging.getLogger(__name__)
 
 
-def resolve_data_config(args, default_cfg={}, model=None, use_test_size=False, verbose=True):
+def resolve_data_config(args, default_cfg={}, model=None, use_test_size=False, verbose=False):
  new_config = {}
  default_cfg = default_cfg
  if not default_cfg and model is not None and hasattr(model, 'default_cfg'):
 
@@ -73,12 +73,13 @@ def __init__(
  batch_size=None,
  class_map='',
  load_bytes=False,
+ repeats=0,
  transform=None,
  ):
  assert parser is not None
  if isinstance(parser, str):
  self.parser = create_parser(
- parser, root=root, split=split, is_training=is_training, batch_size=batch_size)
+ parser, root=root, split=split, is_training=is_training, batch_size=batch_size, repeats=repeats)
  else:
  self.parser = parser
  self.transform = transform
 
@@ -23,6 +23,7 @@ def create_dataset(name, root, split='validation', search_split=True, is_trainin
  root, parser=name, split=split, is_training=is_training, batch_size=batch_size, **kwargs)
  else:
  # FIXME support more advance split cfg for ImageFolder/Tar datasets in the future
+ kwargs.pop('repeats', 0) # FIXME currently only Iterable dataset support the repeat multiplier
  if search_split and os.path.isdir(root):
  root = _search_split(root, split)
  ds = ImageDataset(root, parser=name, **kwargs)
 
@@ -29,6 +29,11 @@
 PREFETCH_SIZE = 4096 # samples to prefetch
 
 
+def even_split_indices(split, n, num_samples):
+ partitions = [round(i * num_samples / n) for i in range(n + 1)]
+ return [f"{split}[{partitions[i]}:{partitions[i+1]}]" for i in range(n)]
+
+
 class ParserTfds(Parser):
  """ Wrap Tensorflow Datasets for use in PyTorch
 
@@ -52,7 +57,7 @@ class ParserTfds(Parser):
  components.
 
  """
- def __init__(self, root, name, split='train', shuffle=False, is_training=False, batch_size=None):
+ def __init__(self, root, name, split='train', shuffle=False, is_training=False, batch_size=None, repeats=0):
  super().__init__()
  self.root = root
  self.split = split
@@ -62,6 +67,8 @@ def __init__(self, root, name, split='train', shuffle=False, is_training=False,
  assert batch_size is not None,\
  "Must specify batch_size in training mode for reasonable behaviour w/ TFDS wrapper"
  self.batch_size = batch_size
+ self.repeats = repeats
+ self.subsplit = None
 
  self.builder = tfds.builder(name, data_dir=root)
  # NOTE: please use tfds command line app to download & prepare datasets, I don't want to call
@@ -95,6 +102,7 @@ def _lazy_init(self):
  if worker_info is not None:
  self.worker_info = worker_info
  num_workers = worker_info.num_workers
+ global_num_workers = self.dist_num_replicas * num_workers
  worker_id = worker_info.id
 
  # FIXME I need to spend more time figuring out the best way to distribute/split data across
@@ -114,19 +122,31 @@ def _lazy_init(self):
  # split = split + '[{}:]'.format(start)
  # else:
  # split = split + '[{}:{}]'.format(start, start + split_size)
-
- input_context = tf.distribute.InputContext(
- num_input_pipelines=self.dist_num_replicas * num_workers,
- input_pipeline_id=self.dist_rank * num_workers + worker_id,
- num_replicas_in_sync=self.dist_num_replicas # FIXME does this have any impact?
- )
-
- read_config = tfds.ReadConfig(input_context=input_context)
- ds = self.builder.as_dataset(split=split, shuffle_files=self.shuffle, read_config=read_config)
+ if not self.is_training and '[' not in self.split:
+ # If not training, and split doesn't define a subsplit, manually split the dataset
+ # for more even samples / worker
+ self.subsplit = even_split_indices(self.split, global_num_workers, self.num_samples)[
+ self.dist_rank * num_workers + worker_id]
+
+ if self.subsplit is None:
+ input_context = tf.distribute.InputContext(
+ num_input_pipelines=self.dist_num_replicas * num_workers,
+ input_pipeline_id=self.dist_rank * num_workers + worker_id,
+ num_replicas_in_sync=self.dist_num_replicas # FIXME does this arg have any impact?
+ )
+ else:
+ input_context = None
+
+ read_config = tfds.ReadConfig(
+ shuffle_seed=42,
+ shuffle_reshuffle_each_iteration=True,
+ input_context=input_context)
+ ds = self.builder.as_dataset(
+ split=self.subsplit or self.split, shuffle_files=self.shuffle, read_config=read_config)
  # avoid overloading threading w/ combo fo TF ds threads + PyTorch workers
  ds.options().experimental_threading.private_threadpool_size = max(1, MAX_TP_SIZE // num_workers)
  ds.options().experimental_threading.max_intra_op_parallelism = 1
- if self.is_training:
+ if self.is_training or self.repeats > 1:
  # to prevent excessive drop_last batch behaviour w/ IterableDatasets
  # see warnings at https://pytorch.org/docs/stable/data.html#multi-process-data-loading
  ds = ds.repeat() # allow wrap around and break iteration manually
@@ -143,7 +163,7 @@ def __iter__(self):
  # This adds extra samples and will slightly alter validation results.
  # 2. determine loop ending condition in training w/ repeat enabled so that only full batch_size
  # batches are produced (underlying tfds iter wraps around)
- target_sample_count = math.ceil(self.num_samples / self._num_pipelines)
+ target_sample_count = math.ceil(max(1, self.repeats) * self.num_samples / self._num_pipelines)
  if self.is_training:
  # round up to nearest batch_size per worker-replica
  target_sample_count = math.ceil(target_sample_count / self.batch_size) * self.batch_size
@@ -160,8 +180,8 @@ def __iter__(self):
  if not self.is_training and self.dist_num_replicas and 0 < sample_count < target_sample_count:
  # Validation batch padding only done for distributed training where results are reduced across nodes.
  # For single process case, it won't matter if workers return different batch sizes.
- # FIXME this needs more testing, possible for sharding / split api to cause differences of > 1?
- assert target_sample_count - sample_count == 1 # should only be off by 1 or sharding is not optimal
+ # FIXME if using input_context or % based subsplits, sample count can vary by more than +/- 1 and this
+ # approach is not optimal
  yield img, sample['label'] # yield prev sample again
  sample_count += 1
 
@@ -176,7 +196,7 @@ def _num_pipelines(self):
  def __len__(self):
  # this is just an estimate and does not factor in extra samples added to pad batches based on
  # complete worker & replica info (not available until init in dataloader).
- return math.ceil(self.num_samples / self.dist_num_replicas)
+ return math.ceil(max(1, self.repeats) * self.num_samples / self.dist_num_replicas)
 
  def _filename(self, index, basename=False, absolute=False):
  assert False, "Not supported" # no random access to samples
 
@@ -29,6 +29,7 @@
 from .tresnet import *
 from .vgg import *
 from .vision_transformer import *
+from .vision_transformer_hybrid import *
 from .vovnet import *
 from .xception import *
 from .xception_aligned import *
 
@@ -31,4 +31,4 @@
 from .split_batchnorm import SplitBatchNorm2d, convert_splitbn_model
 from .std_conv import StdConv2d, StdConv2dSame, ScaledStdConv2d, ScaledStdConv2dSame
 from .test_time_pool import TestTimePoolHead, apply_test_time_pool
-from .weight_init import trunc_normal_
+from .weight_init import trunc_normal_, variance_scaling_, lecun_normal_
@@ -2,6 +2,8 @@
 import math
 import warnings
 
+from torch.nn.init import _calculate_fan_in_and_fan_out
+
 
 def _no_grad_trunc_normal_(tensor, mean, std, a, b):
  # Cut & paste from PyTorch official master until it's in a few official releases - RW
@@ -58,3 +60,30 @@ def trunc_normal_(tensor, mean=0., std=1., a=-2., b=2.):
  >>> nn.init.trunc_normal_(w)
  """
  return _no_grad_trunc_normal_(tensor, mean, std, a, b)
+
+
+def variance_scaling_(tensor, scale=1.0, mode='fan_in', distribution='normal'):
+ fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor)
+ if mode == 'fan_in':
+ denom = fan_in
+ elif mode == 'fan_out':
+ denom = fan_out
+ elif mode == 'fan_avg':
+ denom = (fan_in + fan_out) / 2
+
+ variance = scale / denom
+
+ if distribution == "truncated_normal":
+ # constant is stddev of standard normal truncated to (-2, 2)
+ trunc_normal_(tensor, std=math.sqrt(variance) / .87962566103423978)
+ elif distribution == "normal":
+ tensor.normal_(std=math.sqrt(variance))
+ elif distribution == "uniform":
+ bound = math.sqrt(3 * variance)
+ tensor.uniform_(-bound, bound)
+ else:
+ raise ValueError(f"invalid distribution {distribution}")
+
+
+def lecun_normal_(tensor):
+ variance_scaling_(tensor, mode='fan_in', distribution='truncated_normal')
@@ -274,7 +274,9 @@ def forward(self, x):
  return x
 
 
-def create_stem(in_chs, out_chs, stem_type='', preact=True, conv_layer=None, norm_layer=None):
+def create_resnetv2_stem(
+ in_chs, out_chs=64, stem_type='', preact=True,
+ conv_layer=StdConv2d, norm_layer=partial(GroupNormAct, num_groups=32)):
  stem = OrderedDict()
  assert stem_type in ('', 'fixed', 'same', 'deep', 'deep_fixed', 'deep_same')
 
@@ -322,7 +324,8 @@ def __init__(self, layers, channels=(256, 512, 1024, 2048),
 
  self.feature_info = []
  stem_chs = make_div(stem_chs * wf)
- self.stem = create_stem(in_chans, stem_chs, stem_type, preact, conv_layer=conv_layer, norm_layer=norm_layer)
+ self.stem = create_resnetv2_stem(
+ in_chans, stem_chs, stem_type, preact, conv_layer=conv_layer, norm_layer=norm_layer)
  stem_feat = ('stem.conv3' if 'deep' in stem_type else 'stem.conv') if preact else 'stem.norm'
  self.feature_info.append(dict(num_chs=stem_chs, reduction=2, module=stem_feat))