DEV Community

Super Kai (Kazuya Ito)
Super Kai (Kazuya Ito)

Posted on • Edited on

DataLoader in PyTorch

Buy Me a Coffee

DataLoader() can get the zero or more tensors of the zero or more elements flexibly manipulated from the dataset of the 0D or more D tensor of zero or more elements as shown below:

*Memos:

  • The 1st argument is dataset(Required-Type:Dataset or tensor of int, float, complex or bool). *The 0D or more D tensor of zero or more elements can be used.
  • The 2nd argument is batch_size(Optional-Default:1-Type:int): *Memos:
    • It must be 1 <= x.
    • None can be set.
  • The 3rd argument is shuffle(Optional-Default:False-Type:bool). *If it's True, dataset is randomly shuffled.
  • The 4th argument is sampler(Optional-Default:None-Type:Sampler or Iterable(tuple or list of int or bool, etc)). *If it's not None, shuffle must be False.
  • The 5th argument is batch_sampler(Optional-Default:None-Type:Sampler or Iterable(tuple or list of int or bool, etc)): *Memos:
    • It must be more than 1D.
    • If it's not None, batch_sampler must be 1, shuffle must be False or None, sampler must be None and drop_last must be False.
  • The 6th argument is num_workers(Optional-Default:0-Type:int). *It must be 0 <= x.
  • The 7th argument is collate_fn(Optional-Default:None-Type:function).
  • The 8th argument is pin_memory(Optional-Default:False-Type:bool).
  • The 9th argument is drop_last(Optional-Default:False-Type:bool). *If it's True, the last incomplete batch is dropped(removed).
  • The 10th argument is timeout(Optional-Default:0-Type:int or float): *Memos:
    • It must be 0 <= x.
    • 0 disables it.
  • The 11th argument is worker_init_fn(Optional-Default:None-Type:function).
  • The 12th argument is multiprocessing_context(Optional-Default:None-Type:str or multiprocessing.context.BaseContext): *Memos:
    • spawn can be set for str.
    • If it's None, the default multiprocessing context of your operating system will be used.
  • The 13th argument is generator(Optional-Default:None-Type:torch.Generator). *Only cpu can be set for Generator.
  • There is prefetch_factor argument(Optional-Type:function): *Memos:
    • It must 0 <= x.
    • Its default depends on num_workers: *Memos:
    • If num_workers = 0, its default is None.
    • If num_workers > 0, its default is 2).
    • If num_workers = 0, its must be None.
    • prefetch_factor= must be used.
  • There is persistent_workers argument(Optional-Default:False-Type:bool). *persistent_workers= must be used.
  • There is pin_memory_device argument(Optional-Default:''-Type:str). *pin_memory_device= must be used.
  • dl.shuffle don't work.
import torch from torch.utils.data import DataLoader my_tensor = torch.tensor([8, -3, 0, 1, 5, -2, -1, 4]) dl = DataLoader(dataset=my_tensor) list(dl) # [tensor([8]), # tensor([-3]), # tensor([0]), # tensor([1]), # tensor([5]), # tensor([-2]), # tensor([-1]), # tensor([4])]  dl # <torch.utils.data.dataloader.DataLoader at 0x1ae4386d150>  dl.dataset # tensor([8, -3, 0, 1, 5, -2, -1, 4])  dl.batch_size # 1  dl.sampler # <torch.utils.data.sampler.SequentialSampler at 0x1ae432cff50>  dl.batch_sampler # <torch.utils.data.sampler.BatchSampler at 0x1ae424532d0>  dl.num_workers # 0  dl.collate_fn # <function torch.utils.data._utils.collate.default_collate(batch)>  dl.pin_memory # False  dl.drop_last # False  dl.timeout # 0  dl.worker_init_fn # None  dl.multiprocessing_context # None  dl.generator # None  dl.prefetch_factor # None  dl.persistent_workers # False  dl.pin_memory_device # ''  dl = DataLoader(dataset=my_tensor, batch_size=1, shuffle=False, sampler=None, batch_sampler=None, num_workers=0, collate_fn=None, pin_memory=False, drop_last=False, timeout=0, worker_init_fn=None, multiprocessing_context=None, generator=None, prefetch_factor=None, persistent_workers=False, pin_memory_device='') list(dl) # [tensor([8]), # tensor([-3]), # tensor([0]), # tensor([1]), # tensor([5]), # tensor([-2]), # tensor([-1]), # tensor([4])]  torch.manual_seed(47) dl = DataLoader(dataset=my_tensor, batch_size=1, shuffle=True, drop_last=True) list(dl) # [tensor([-2]), # tensor([5]), # tensor([8]), # tensor([4]), # tensor([-1]), # tensor([1]), # tensor([-3]), # tensor([0])]  torch.manual_seed(47) dl = DataLoader(dataset=my_tensor, batch_size=2, shuffle=True, drop_last=True) list(dl) # [tensor([-2, 5]), # tensor([8, 4]), # tensor([-1, 1]), # tensor([-3, 0])]  torch.manual_seed(47) dl = DataLoader(dataset=my_tensor, batch_size=3, shuffle=True, drop_last=True) list(dl) # [tensor([-2, 5, 8]), # tensor([4, -1, 1])]  torch.manual_seed(47) dl = DataLoader(dataset=my_tensor, batch_size=4, shuffle=True, drop_last=True) list(dl) # [tensor([-2, 5, 8, 4]), # tensor([-1, 1, -3, 0])]  torch.manual_seed(47) dl = DataLoader(dataset=my_tensor, batch_size=5, shuffle=True, drop_last=True) list(dl) # [tensor([-2, 5, 8, 4, -1])]  torch.manual_seed(47) dl = DataLoader(dataset=my_tensor, batch_size=6, shuffle=True, drop_last=True) list(dl) # [tensor([-2, 5, 8, 4, -1, 1])]  torch.manual_seed(47) dl = DataLoader(dataset=my_tensor, batch_size=7, shuffle=True, drop_last=True) list(dl) # [tensor([-2, 5, 8, 4, -1, 1, -3])]  torch.manual_seed(47) dl = DataLoader(dataset=my_tensor, batch_size=8, shuffle=True, drop_last=True) list(dl) # [tensor([-2, 5, 8, 4, -1, 1, -3, 0])]  torch.manual_seed(47) dl = DataLoader(dataset=my_tensor, batch_size=9, shuffle=True, drop_last=True) list(dl) # []  my_tensor = torch.tensor([[8, -3, 0, 1], [5, -2, -1, 4]]) dl = DataLoader(dataset=my_tensor) list(dl) # [tensor([[8, -3, 0, 1]]), # tensor([[5, -2, -1, 4]])]  torch.manual_seed(47) dl = DataLoader(dataset=my_tensor, batch_size=1, shuffle=True, drop_last=True) list(dl) # [tensor([[5, -2, -1, 4]]), # tensor([[8, -3, 0, 1]])]  torch.manual_seed(47) dl = DataLoader(dataset=my_tensor, batch_size=2, shuffle=True, drop_last=True) list(dl) # [tensor([[5, -2, -1, 4], # [8, -3, 0, 1]])]  torch.manual_seed(47) dl = DataLoader(dataset=my_tensor, batch_size=3, shuffle=True, drop_last=True) list(dl) # []  my_tensor = torch.tensor([[[8, -3], [0, 1]], [[5, -2], [-1, 4]]]) dl = DataLoader(dataset=my_tensor) list(dl) # [tensor([[[8, -3], [0, 1]]]), # tensor([[[5, -2], [-1, 4]]])]  torch.manual_seed(47) dl = DataLoader(dataset=my_tensor, batch_size=1, shuffle=True, drop_last=True) list(dl) # [tensor([[[5, -2], [-1, 4]]]), # tensor([[[8, -3], [0, 1]]])]  torch.manual_seed(47) dl = DataLoader(dataset=my_tensor, batch_size=2, shuffle=True, drop_last=True) list(dl) # [tensor([[[5, -2], [-1, 4]], # [[8, -3], [0, 1]]])]  torch.manual_seed(47) dl = DataLoader(dataset=my_tensor, batch_size=3, shuffle=True, drop_last=True) list(dl) # []  my_tensor = torch.tensor([[[8., -3.], [0., 1.]], [[5., -2.], [-1., 4.]]]) dl = DataLoader(dataset=my_tensor) list(dl) # [tensor([[[8., -3.], [0., 1.]]]), # tensor([[[5., -2.], [-1., 4.]]])]  my_tensor = torch.tensor([[[8.+0.j, -3.+0.j], [0.+0.j, 1.+0.j]], [[5.+0.j, -2.+0.j], [-1.+0.j, 4.+0.j]]]) dl = DataLoader(dataset=my_tensor) list(dl) # [tensor([[[8.+0.j, -3.+0.j], [0.+0.j, 1.+0.j]]]), # tensor([[[5.+0.j, -2.+0.j], [-1.+0.j, 4.+0.j]]])]  my_tensor = torch.tensor([[[True, False], [True, False]], [[False, True], [False, True]]]) dl = DataLoader(dataset=my_tensor) list(dl) # [tensor([[[True, False], [True, False]]]), # tensor([[[False, True], [False, True]]])] 
Enter fullscreen mode Exit fullscreen mode

Top comments (0)