*Memos:
- My post explains Compose().
- My post explains ToImage().
- My post explains ToDtype() about
scale=True
. - My post explains ToTensor().
- My post explains PILToTensor().
- My post explains ToPILImage() about no arguments.
- My post explains OxfordIIITPet().
Compose(transforms=[ToImage(), ToDtype(dtype=torch.float32, scale=True)]) can convert a PIL(Pillow library) image([H, W, C]
), tensor or ndarray to an Image([..., C, H, W]
) and scale its values to [0.0, 0.1]
as shown below:
*Memos:
-
ToTensor()
can convert a PIL image or ndarray to a tensor and scale the values of a PIL image or ndarray but it's deprecated so instead useCompose(transforms=[ToImage(), ToDtype(dtype=torch.float32, scale=True)])
according to the doc. -
v2
is recommended to use according to V1 or V2? Which one should I use?.
from torchvision.datasets import OxfordIIITPet from torchvision.transforms.v2 import Compose, ToImage, ToDtype import torch import numpy as np PILImage_data = OxfordIIITPet( root="data", transform=None ) ImageScaleTrue_data = OxfordIIITPet( root="data", transform=Compose(transforms=[ToImage(), ToDtype(dtype=torch.float32, scale=True)]) ) ImageScaleFalse_data = OxfordIIITPet( root="data", transform=Compose(transforms=[ToImage(), ToDtype(dtype=torch.float32, scale=False)]) ) ImageScaleTrue_data # Dataset OxfordIIITPet # Number of datapoints: 3680 # Root location: data # StandardTransform # Transform: Compose( # ToImage() # ToDtype(scale=True) # ) ImageScaleTrue_data[0] # (Image([[[0.1451, 0.1373, 0.1412, ..., 0.9686, 0.9765, 0.9765], # [0.1373, 0.1373, 0.1451, ..., 0.9647, 0.9725, 0.9765], # ..., # [0.1098, 0.1098, 0.1059, ..., 0.2314, 0.2549, 0.2980]], # [[0.0784, 0.0706, 0.0745, ..., 0.9725, 0.9725, 0.9725], # [0.0706, 0.0706, 0.0784, ..., 0.9686, 0.9686, 0.9725], # ..., # [0.1059, 0.1059, 0.1059, ..., 0.3686, 0.4157, 0.4588]], # [[0.0471, 0.0392, 0.0431, ..., 0.9922, 0.9922, 0.9922], # [0.0392, 0.0392, 0.0471, ..., 0.9843, 0.9882, 0.9922], # ..., # [0.1373, 0.1373, 0.1373, ..., 0.8392, 0.9098, 0.8745]]],), 0) ImageScaleTrue_data[0][0].size() # torchtorch.Size([3, 500, 394]) ImageScaleTrue_data[0][0] # Image([[[0.1451, 0.1373, 0.1412, ..., 0.9686, 0.9765, 0.9765], # [0.1373, 0.1373, 0.1451, ..., 0.9647, 0.9725, 0.9765], # ..., # [0.1098, 0.1098, 0.1059, ..., 0.2314, 0.2549, 0.2980]], # [[0.0784, 0.0706, 0.0745, ..., 0.9725, 0.9725, 0.9725], # [0.0706, 0.0706, 0.0784, ..., 0.9686, 0.9686, 0.9725], # ..., # [0.1059, 0.1059, 0.1059, ..., 0.3686, 0.4157, 0.4588]], # [[0.0471, 0.0392, 0.0431, ..., 0.9922, 0.9922, 0.9922], # [0.0392, 0.0392, 0.0471, ..., 0.9843, 0.9882, 0.9922], # ..., # [0.1373, 0.1373, 0.1373, ..., 0.8392, 0.9098, 0.8745]]],) ImageScaleTrue_data[0][1] # 0 import matplotlib.pyplot as plt plt.imshow(X=ImageScaleTrue_data[0][0]) # TypeError: Invalid shape (3, 500, 394) for image data c = Compose(transforms=[ToImage(), ToDtype(dtype=torch.float32, scale=True)]) c(PILImage_data) # It's still PIL image. # Dataset OxfordIIITPet # Number of datapoints: 3680 # Root location: data c(PILImage_data[0]) # (Image([[[0.1451, 0.1373, 0.1412, ..., 0.9686, 0.9765, 0.9765], # [0.1373, 0.1373, 0.1451, ..., 0.9647, 0.9725, 0.9765], # ..., # [0.1098, 0.1098, 0.1059, ..., 0.2314, 0.2549, 0.2980]], # [[0.0784, 0.0706, 0.0745, ..., 0.9725, 0.9725, 0.9725], # [0.0706, 0.0706, 0.0784, ..., 0.9686, 0.9686, 0.9725], # ..., # [0.1059, 0.1059, 0.1059, ..., 0.3686, 0.4157, 0.4588]], # [[0.0471, 0.0392, 0.0431, ..., 0.9922, 0.9922, 0.9922], # [0.0392, 0.0392, 0.0471, ..., 0.9843, 0.9882, 0.9922], # ..., # [0.1373, 0.1373, 0.1373, ..., 0.8392, 0.9098, 0.8745]]],), 0) c(PILImage_data[0][0]) # Image([[[0.1451, 0.1373, 0.1412, ..., 0.9686, 0.9765, 0.9765], # [0.1373, 0.1373, 0.1451, ..., 0.9647, 0.9725, 0.9765], # ..., # [0.1098, 0.1098, 0.1059, ..., 0.2314, 0.2549, 0.2980]], # [[0.0784, 0.0706, 0.0745, ..., 0.9725, 0.9725, 0.9725], # [0.0706, 0.0706, 0.0784, ..., 0.9686, 0.9686, 0.9725], # ..., # [0.1059, 0.1059, 0.1059, ..., 0.3686, 0.4157, 0.4588]], # [[0.0471, 0.0392, 0.0431, ..., 0.9922, 0.9922, 0.9922], # [0.0392, 0.0392, 0.0471, ..., 0.9843, 0.9882, 0.9922], # ..., # [0.1373, 0.1373, 0.1373, ..., 0.8392, 0.9098, 0.8745]]],) plt.imshow(X=c(PILImage_data[0][0])) # TypeError: Invalid shape (3, 500, 394) for image data c((torch.tensor([[0, 1, 2, 3]]), 0)) # int64 c((torch.tensor([[0, 1, 2, 3]], dtype=torch.int64), 0)) c((torch.tensor([[[0, 1, 2, 3]]]), 0)) # (Image([[[0.0000e+00, 1.0842e-19, 2.1684e-19, 3.2526e-19]]],), 0) c(torch.tensor([[0, 1, 2, 3]])) c(torch.tensor([[[0, 1, 2, 3]]])) # Image([[[0.0000e+00, 1.0842e-19, 2.1684e-19, 3.2526e-19]]],) c((torch.tensor([[[[0, 1, 2, 3]]]]), 0)) # (Image([[[[0.0000e+00, 1.0842e-19, 2.1684e-19, 3.2526e-19]]]],), 0) c(torch.tensor([[[[0, 1, 2, 3]]]])) # Image([[[[0.0000e+00, 1.0842e-19, 2.1684e-19, 3.2526e-19]]]],) c((torch.tensor([[[[[0, 1, 2, 3]]]]]), 0)) # (Image([[[[[0.0000e+00, 1.0842e-19, 2.1684e-19, 3.2526e-19]]]]],), 0) c(torch.tensor([[[[[0, 1, 2, 3]]]]])) # Image([[[[[0.0000e+00, 1.0842e-19, 2.1684e-19, 3.2526e-19]]]]],) c((torch.tensor([[0, 1, 2, 3]], dtype=torch.int32), 0)) # (Image([[[0.0000e+00, 4.6566e-10, 9.3132e-10, 1.3970e-09]]],), 0) c((torch.tensor([[0., 1., 2., 3.]]), 0)) # float32 c((torch.tensor([[0., 1., 2., 3.]], dtype=torch.float32), 0)) c((torch.tensor([[0., 1., 2., 3.]], dtype=torch.float64), 0)) c((torch.tensor([[0.+0.j, 1.+0.j, 2.+0.j, 3.+0.j]]), 0)) # complex64 c((torch.tensor([[0.+0.j, 1.+0.j, 2.+0.j, 3.+0.j]], dtype=torch.complex64), 0)) c((torch.tensor([[0.+0.j, 1.+0.j, 2.+0.j, 3.+0.j]], dtype=torch.complex32), 0)) # (Image([[[0., 1., 2., 3.]]],), 0) c((torch.tensor([[True, False, True, False]]), 0)) # bool c((torch.tensor([[True, False, True, False]], dtype=torch.bool), 0)) # (Image([[[1., 0., 1., 0.]]],), 0) c((np.array(3), 0)) # int32 c((np.array(3, dtype=np.int32), 0)) # (Image([[[1.3970e-09]]],), 0) c(np.array(3)) # Image([[[1.3970e-09]]],) c((np.array([0, 1, 2, 3]), 0)) c((np.array([[0, 1, 2, 3]]), 0)) # (Image([[[0.0000e+00, 4.6566e-10, 9.3132e-10, 1.3970e-09]]],), 0) c(np.array([0, 1, 2, 3])) c(np.array([[0, 1, 2, 3]])) # Image([[[0.0000e+00, 4.6566e-10, 9.3132e-10, 1.3970e-09]]],) c((np.array([[[0, 1, 2, 3]]]), 0)) # (Image([[[0.0000e+00]], [[4.6566e-10]], [[9.3132e-10]], [[1.3970e-09]]],), 0) c(np.array([[[0, 1, 2, 3]]])) # Image([[[0.0000e+00]], [[4.6566e-10]], [[9.3132e-10]], [[1.3970e-09]]],) c((np.array([[0, 1, 2, 3]], dtype=np.int64), 0)) # (Image([[[0.0000e+00, 1.0842e-19, 2.1684e-19, 3.2526e-19]]],), 0) c((np.array([[0., 1., 2., 3.]]), 0)) # float64 c((np.array([[0., 1., 2., 3.]], dtype=np.float64), 0)) c((np.array([[0., 1., 2., 3.]], dtype=np.float32), 0)) c((np.array([[0.+0.j, 1.+0.j, 2.+0.j, 3.+0.j]]), 0)) # complex128 c((np.array([[0.+0.j, 1.+0.j, 2.+0.j, 3.+0.j]], dtype=np.complex128), 0)) c((np.array([[0.+0.j, 1.+0.j, 2.+0.j, 3.+0.j]], dtype=np.complex64), 0)) # (Image([[[0., 1., 2., 3.]]],), 0) c((np.array([[True, False, True, False]]), 0)) # bool c((np.array([[True, False, True, False]], dtype=bool), 0)) # (Image([[[1., 0., 1., 0.]]],), 0) ImageScaleFalse_data # Dataset OxfordIIITPet # Number of datapoints: 3680 # Root location: data # StandardTransform # Transform: Compose( # ToImage() # ToDtype(scale=False) # ) ImageScaleFalse_data[0] # (Image([[[37., 35., 36., ..., 247., 249., 249.], # [35., 35., 37., ..., 246., 248., 249.], # ..., # [28., 28., 27., ..., 59., 65., 76.]], # [[20., 18., 19., ..., 248., 248., 248.], # [18., 18., 20., ..., 247., 247., 248.], # ..., # [27., 27., 27., ..., 94., 106., 117.]], # [[12., 10., 11., ..., 253., 253., 253.], # [10., 10., 12., ..., 251., 252., 253.], # ..., # [35., 35., 35., ..., 214., 232., 223.]]],), 0) ImageScaleFalse_data[0][0].size() # torch.Size([3, 500, 394]) ImageScaleFalse_data[0][0] # Image([[[37., 35., 36., ..., 247., 249., 249.], # [35., 35., 37., ..., 246., 248., 249.], # ..., # [28., 28., 27., ..., 59., 65., 76.]], # [[20., 18., 19., ..., 248., 248., 248.], # [18., 18., 20., ..., 247., 247., 248.], # ..., # [27., 27., 27., ..., 94., 106., 117.]], # [[12., 10., 11., ..., 253., 253., 253.], # [10., 10., 12., ..., 251., 252., 253.], # ..., # [35., 35., 35., ..., 214., 232., 223.]]],) ImageScaleFalse_data[0][1] # 0 plt.imshow(X=ImageScaleFalse_data[0][0]) # TypeError: Invalid shape (3, 500, 394) for image data c = Compose(transforms=[ToImage(), ToDtype(dtype=torch.float32, scale=False)]) c(PILImage_data) # It's still PIL image. # Dataset OxfordIIITPet # Number of datapoints: 3680 # Root location: data c(PILImage_data[0]) # (Image([[[37., 35., 36., ..., 247., 249., 249.], # [35., 35., 37., ..., 246., 248., 249.], # ..., # [28., 28., 27., ..., 59., 65., 76.]], # [[20., 18., 19., ..., 248., 248., 248.], # [18., 18., 20., ..., 247., 247., 248.], # ..., # [27., 27., 27., ..., 94., 106., 117.]], # [[12., 10., 11., ..., 253., 253., 253.], # [10., 10., 12., ..., 251., 252., 253.], # ..., # [35., 35., 35., ..., 214., 232., 223.]]],), 0) c(PILImage_data[0][0]) # Image([[[37., 35., 36., ..., 247., 249., 249.], # [35., 35., 37., ..., 246., 248., 249.], # ..., # [28., 28., 27., ..., 59., 65., 76.]], # [[20., 18., 19., ..., 248., 248., 248.], # [18., 18., 20., ..., 247., 247., 248.], # ..., # [27., 27., 27., ..., 94., 106., 117.]], # [[12., 10., 11., ..., 253., 253., 253.], # [10., 10., 12., ..., 251., 252., 253.], # ..., # [35., 35., 35., ..., 214., 232., 223.]]],) plt.imshow(X=c(PILImage_data[0][0])) # TypeError: Invalid shape (3, 500, 394) for image data c((torch.tensor([[0, 1, 2, 3]]), 0)) # int64 c((torch.tensor([[0, 1, 2, 3]], dtype=torch.int64), 0)) c((torch.tensor([[[0, 1, 2, 3]]]), 0)) c((torch.tensor([[0, 1, 2, 3]], dtype=torch.int32), 0)) c((torch.tensor([[0., 1., 2., 3.]]), 0)) # float32 c((torch.tensor([[0., 1., 2., 3.]], dtype=torch.float32), 0)) c((torch.tensor([[0., 1., 2., 3.]], dtype=torch.float64), 0)) c((torch.tensor([[0.+0.j, 1.+0.j, 2.+0.j, 3.+0.j]]), 0)) # complex64 c((torch.tensor([[0.+0.j, 1.+0.j, 2.+0.j, 3.+0.j]], dtype=torch.complex64), 0)) c((torch.tensor([[0.+0.j, 1.+0.j, 2.+0.j, 3.+0.j]], dtype=torch.complex32), 0)) # (Image([[[0., 1., 2., 3.]]],), 0) c(torch.tensor([[0, 1, 2, 3]])) c(torch.tensor([[[0, 1, 2, 3]]])) # Image([[[0., 1., 2., 3.]]],) c((torch.tensor([[[[0, 1, 2, 3]]]]), 0)) # (Image([[[[0., 1., 2., 3.]]]], ), 0) c(torch.tensor([[[[0, 1, 2, 3]]]])) # Image([[[[0., 1., 2., 3.]]]],) c((torch.tensor([[[[[0, 1, 2, 3]]]]]), 0)) # (Image([[[[[0., 1., 2., 3.]]]]],), 0) c(torch.tensor([[[[[0, 1, 2, 3]]]]])) # Image([[[[[0., 1., 2., 3.]]]]],) c((torch.tensor([[True, False, True, False]]), 0)) # bool c((torch.tensor([[True, False, True, False]], dtype=torch.bool), 0)) # (Image([[[1., 0., 1., 0.]]],), 0) c((np.array(3), 0)) # int32 c((np.array(3, dtype=np.int32), 0)) # (Image([[[3.]]],), 0) c(np.array(3)) # int32 # Image([[[3.]]],) c((np.array([0, 1, 2, 3]), 0)) c((np.array([[0, 1, 2, 3]]), 0)) c((np.array([[0, 1, 2, 3]], dtype=np.int64), 0)) c((np.array([[0., 1., 2., 3.]]), 0)) # float64 c((np.array([[0., 1., 2., 3.]], dtype=np.float64), 0)) c((np.array([[0., 1., 2., 3.]], dtype=np.float32), 0)) c((np.array([[0.+0.j, 1.+0.j, 2.+0.j, 3.+0.j]]), 0)) # complex128 c((np.array([[0.+0.j, 1.+0.j, 2.+0.j, 3.+0.j]], dtype=np.complex128), 0)) c((np.array([[0.+0.j, 1.+0.j, 2.+0.j, 3.+0.j]], dtype=np.complex64), 0)) # (Image([[[0., 1., 2., 3.]]],), 0) c(np.array([0, 1, 2, 3])) c(np.array([[0, 1, 2, 3]])) # Image([[[0., 1., 2., 3.]]],) c((np.array([[[0, 1, 2, 3]]]), 0)) # (Image([[[0.]], [[1.]], [[2.]], [[3.]]],), 0) c(np.array([[[0, 1, 2, 3]]])) # Image([[[0.]], [[1.]], [[2.]], [[3.]]],) c((np.array([[True, False, True, False]]), 0)) # bool c((np.array([[True, False, True, False]], dtype=bool), 0)) # (Image([[[1., 0., 1., 0.]]],), 0)
Top comments (0)