pytorch
diff --git a/‎test/test_image.py‎
Lines changed: 11 additions & 2 deletions b/‎test/test_image.py‎
Lines changed: 11 additions & 2 deletions
diff --git a/‎torchvision/csrc/io/image/cpu/decode_gif.cpp‎
Lines changed: 50 additions & 34 deletions b/‎torchvision/csrc/io/image/cpu/decode_gif.cpp‎
Lines changed: 50 additions & 34 deletions
@@ -551,7 +551,9 @@ def test_pathlib_support(tmpdir):
  write_png(img, write_path)
 
 
-@pytest.mark.parametrize("name", ("gifgrid", "fire", "porsche", "treescap", "treescap-interlaced", "solid2", "x-trans"))
+@pytest.mark.parametrize(
+ "name", ("gifgrid", "fire", "porsche", "treescap", "treescap-interlaced", "solid2", "x-trans", "earth")
+)
 @pytest.mark.parametrize("scripted", (True, False))
 def test_decode_gif(tmpdir, name, scripted):
  # Using test images from GIFLIB
@@ -560,9 +562,16 @@ def test_decode_gif(tmpdir, name, scripted):
  # We're not testing against "welcome2" because PIL and GIFLIB disagee on what
  # the background color should be (likely a difference in the way they handle
  # transparency?)
+ # 'earth' image is from wikipedia, licensed under CC BY-SA 3.0
+ # https://creativecommons.org/licenses/by-sa/3.0/
+ # it allows to properly test for transparency, TOP-LEFT offsets, and
+ # disposal modes.
 
  path = tmpdir / f"{name}.gif"
- url = f"https://sourceforge.net/p/giflib/code/ci/master/tree/pic/{name}.gif?format=raw"
+ if name == "earth":
+ url = "https://upload.wikimedia.org/wikipedia/commons/2/2c/Rotating_earth_%28large%29.gif"
+ else:
+ url = f"https://sourceforge.net/p/giflib/code/ci/master/tree/pic/{name}.gif?format=raw"
  with open(path, "wb") as f:
  f.write(requests.get(url).content)
 
 
@@ -86,32 +86,19 @@ torch::Tensor decode_gif(const torch::Tensor& encoded_data) {
  // This check should already done within DGifSlurp(), just to be safe
  TORCH_CHECK(num_images > 0, "GIF file should contain at least one image!");
 
- // Note:
- // The GIF format has this notion of "canvas" and "canvas size", where each
- // image could be displayed on the canvas at different offsets, forming a
- // mosaic/picture wall like so:
- //
- // <--- canvas W --->
- // ------------------------ ^
- // | | | |
- // | img1 | img3 | |
- // | |------------| canvas H
- // |---------- | |
- // | img2 | img4 | |
- // | | | |
- // ------------------------ v
- // The GifLib docs indicate that this is mostly vestigial
- // (https://giflib.sourceforge.net/whatsinagif/bits_and_bytes.html), and
- // modern viewers ignore the canvas size as well as image offsets. Hence,
- // we're ignoring that too:
- // - We're ignoring the canvas width and height and assume that the shape of
- // the canvas and of all images is the shape of the first image.
- // - We're enforcing that all images have the same shape.
- // - Left and Top offsets of each image are ignored as well and assumed to be
- // 0.
-
- auto out_h = gifFile->SavedImages[0].ImageDesc.Height;
- auto out_w = gifFile->SavedImages[0].ImageDesc.Width;
+ GifColorType bg = {0, 0, 0};
+ if (gifFile->SColorMap) {
+ bg = gifFile->SColorMap->Colors[gifFile->SBackGroundColor];
+ }
+
+ // The GIFLIB docs say that the canvas's height and width are potentially
+ // ignored by modern viewers, so to be on the safe side we set the output
+ // height to max(canvas_heigh, first_image_height). Same for width.
+ // https://giflib.sourceforge.net/whatsinagif/bits_and_bytes.html
+ auto out_h =
+ std::max(gifFile->SHeight, gifFile->SavedImages[0].ImageDesc.Height);
+ auto out_w =
+ std::max(gifFile->SWidth, gifFile->SavedImages[0].ImageDesc.Width);
 
  // We output a channels-last tensor for consistency with other image decoders.
  // Torchvision's resize tends to be is faster on uint8 channels-last tensors.
@@ -121,30 +108,59 @@ torch::Tensor decode_gif(const torch::Tensor& encoded_data) {
  auto out = torch::empty(
  {int64_t(num_images), 3, int64_t(out_h), int64_t(out_w)}, options);
  auto out_a = out.accessor<uint8_t, 4>();
-
  for (int i = 0; i < num_images; i++) {
  const SavedImage& img = gifFile->SavedImages[i];
- const GifImageDesc& desc = img.ImageDesc;
- TORCH_CHECK(
- desc.Width == out_w && desc.Height == out_h,
- "All images in the gif should have the same dimensions.");
 
+ GraphicsControlBlock gcb;
+ DGifSavedExtensionToGCB(gifFile, i, &gcb);
+
+ const GifImageDesc& desc = img.ImageDesc;
  const ColorMapObject* cmap =
  desc.ColorMap ? desc.ColorMap : gifFile->SColorMap;
  TORCH_CHECK(
  cmap != nullptr,
  "Global and local color maps are missing. This should never happen!");
 
+ // When going from one image to another, there is a "disposal method" which
+ // specifies how to handle the transition. E.g. DISPOSE_DO_NOT means that
+ // the current image should essentially be drawn on top of the previous
+ // canvas. The pixels of that previous canvas will appear on the new one if
+ // either:
+ // - a pixel is transparent in the current image
+ // - the current image is smaller than the canvas, hence exposing its pixels
+ // The "background" disposal method means that the current canvas should be
+ // set to the background color.
+ // We only support these 2 modes and default to "background" when the
+ // disposal method is unspecified, or when it's set to "DISPOSE_PREVIOUS"
+ // which according to GIFLIB is not widely supported.
+ // (https://giflib.sourceforge.net/whatsinagif/animation_and_transparency.html).
+ if (i > 0 && gcb.DisposalMode == DISPOSE_DO_NOT) {
+ out[i] = out[i - 1];
+ } else {
+ // Background. If bg wasn't defined, it will be (0, 0, 0)
+ for (int h = 0; h < gifFile->SHeight; h++) {
+ for (int w = 0; w < gifFile->SWidth; w++) {
+ out_a[i][0][h][w] = bg.Red;
+ out_a[i][1][h][w] = bg.Green;
+ out_a[i][2][h][w] = bg.Blue;
+ }
+ }
+ }
+
  for (int h = 0; h < desc.Height; h++) {
  for (int w = 0; w < desc.Width; w++) {
  auto c = img.RasterBits[h * desc.Width + w];
+ if (c == gcb.TransparentColor) {
+ continue;
+ }
  GifColorType rgb = cmap->Colors[c];
- out_a[i][0][h][w] = rgb.Red;
- out_a[i][1][h][w] = rgb.Green;
- out_a[i][2][h][w] = rgb.Blue;
+ out_a[i][0][h + desc.Top][w + desc.Left] = rgb.Red;
+ out_a[i][1][h + desc.Top][w + desc.Left] = rgb.Green;
+ out_a[i][2][h + desc.Top][w + desc.Left] = rgb.Blue;
  }
  }
  }
+
  out = out.squeeze(0); // remove batch dim if there's only one image
 
  DGifCloseFile(gifFile, &error);