fehiepsi
diff --git a/‎test/common.py‎
Lines changed: 12 additions & 5 deletions b/‎test/common.py‎
Lines changed: 12 additions & 5 deletions
diff --git a/‎test/common_nn.py‎
Lines changed: 6 additions & 3 deletions b/‎test/common_nn.py‎
Lines changed: 6 additions & 3 deletions
diff --git a/‎test/test_autograd.py‎
Lines changed: 45 additions & 1 deletion b/‎test/test_autograd.py‎
Lines changed: 45 additions & 1 deletion
diff --git a/‎test/test_multiprocessing.py‎
Lines changed: 3 additions & 3 deletions b/‎test/test_multiprocessing.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎test/test_nn.py‎
Lines changed: 29 additions & 17 deletions b/‎test/test_nn.py‎
Lines changed: 29 additions & 17 deletions
@@ -118,11 +118,18 @@ def assertEqual(self, x, y, prec=None, message=''):
  y = y.data
 
  if torch.is_tensor(x) and torch.is_tensor(y):
- max_err = 0
- super(TestCase, self).assertEqual(x.size(), y.size())
- for index in iter_indices(x):
- max_err = max(max_err, abs(x[index] - y[index]))
- self.assertLessEqual(max_err, prec, message)
+ def assertTensorsEqual(a, b):
+ max_err = 0
+ super(TestCase, self).assertEqual(a.size(), b.size())
+ for index in iter_indices(a):
+ max_err = max(max_err, abs(a[index] - b[index]))
+ self.assertLessEqual(max_err, prec, message)
+ self.assertEqual(x.is_sparse, y.is_sparse, message)
+ if x.is_sparse:
+ assertTensorsEqual(x.indices(), y.indices())
+ assertTensorsEqual(x.values(), y.values())
+ else:
+ assertTensorsEqual(x, y)
  elif type(x) == str and type(y) == str:
  super(TestCase, self).assertEqual(x, y)
  elif is_iterable(x) and is_iterable(y):
 
@@ -337,15 +337,18 @@ def _jacobian(self, input, num_out):
 
  def _flatten_tensors(self, x):
  if torch.is_tensor(x):
- return x.view(-1)
+ if x.is_sparse:
+ return x.to_dense().view(-1)
+ else:
+ return x.view(-1)
  elif isinstance(x, Variable):
- return x.data.view(-1)
+ return self._flatten_tensors(x.data)
  else:
  return tuple(self._flatten_tensors(a) for a in x)
 
  def _zero_grad_input(self, input):
  if isinstance(input, Variable):
- if input.requires_grad:
+ if input.requires_grad and input.grad is not None:
  input.grad.data.zero_()
  elif torch.is_tensor(input):
  return
 
@@ -128,6 +128,49 @@ def _test_backward(self):
  def test_backward(self):
  self._test_backward()
 
+ def test_sparse_backward(self):
+ class FixedGradientFunction(Function):
+
+ def __init__(self, grad):
+ self.grad = grad
+
+ def forward(self, x):
+ return x
+
+ def backward(self, grad_x):
+ return self.grad
+
+ size = torch.Size([6, 3, 2])
+ i1 = torch.LongTensor([
+ [0, 3, 4],
+ [0, 2, 2],
+ ])
+ v1 = torch.DoubleTensor([[1, 2], [4, 5], [7, 8]])
+ sparse_grad1 = torch.sparse.DoubleTensor(i1, v1, size)
+ i2 = torch.LongTensor([
+ [0, 1, 3, 4],
+ [0, 1, 2, 2],
+ ])
+ v2 = torch.DoubleTensor([[1, 2], [4, 3], [4, 5], [7, 8]])
+ sparse_grad2 = torch.sparse.DoubleTensor(i2, v2, size)
+ dense_grad = torch.rand(size).double()
+ sparse_fn1 = FixedGradientFunction(sparse_grad1)
+ sparse_fn2 = FixedGradientFunction(sparse_grad2)
+ dense_fn = FixedGradientFunction(dense_grad)
+
+ # sparse first
+ x = Variable(torch.randn(5, 5), requires_grad=True)
+ (sparse_fn1(x) + dense_fn(x) + sparse_fn2(x)).sum().backward()
+ self.assertEqual(x.grad.data, dense_grad + sparse_grad1 + sparse_grad2)
+ # dense first
+ x = Variable(torch.randn(5, 5), requires_grad=True)
+ (dense_fn(x) + sparse_fn1(x) + sparse_fn2(x)).sum().backward()
+ self.assertEqual(x.grad.data, dense_grad + sparse_grad1 + sparse_grad2)
+ # sparse only
+ x = Variable(torch.randn(5, 5), requires_grad=True)
+ (sparse_fn1(x) + sparse_fn2(x)).sum().backward()
+ self.assertEqual(x.grad.data, sparse_grad1 + sparse_grad2)
+
  @unittest.skip("BasicEngine is out of date")
  def test_backward_basic_engine(self):
  with backward_engine(torch.autograd.engine.BasicEngine):
@@ -197,7 +240,8 @@ def test_indexing(self):
  y = Variable(x, requires_grad=True)
 
  def check_index(idx):
- y.grad.data.zero_()
+ if y.grad is not None:
+ y.grad.data.zero_()
  indexed_tensor = x[idx]
  indexed_var = y[idx]
 
 
@@ -80,8 +80,8 @@ def autograd_sharing(queue, ready, master_modified):
  is_ok = var.data.equal(expected_var)
  var.data[:] = torch.ones(5, 5)
 
- is_ok &= var.grad.data.equal(torch.zeros(5, 5))
- var.grad.data[:] = torch.ones(5, 5)
+ is_ok &= var.grad is None
+ var._grad = Variable(torch.ones(5, 5), requires_grad=False)
 
  queue.put(is_ok)
 
@@ -358,7 +358,7 @@ def _test_autograd_sharing(self, var):
  queue = mp.Queue()
  p = mp.Process(target=autograd_sharing, args=(queue, ready, master_modified))
  p.start()
- var.grad.data.zero_()
+ var._grad = Variable(torch.zeros(5, 5), requires_grad=False)
  queue.put(var)
 
  ready.wait()
 
@@ -196,7 +196,8 @@ def _forward_criterion(self, criterion, input, target):
  def _backward_criterion(self, criterion, input, target):
  input_tuple = input if isinstance(input, tuple) else (input,)
  for i in input_tuple:
- i.grad.data.zero_()
+ if i.grad is not None:
+ i.grad.data.zero_()
  args = input_tuple + (target,)
  criterion(*args).backward()
  if isinstance(input, tuple):
@@ -206,18 +207,24 @@ def _backward_criterion(self, criterion, input, target):
 
  def _zero_grad_parameters(self, module):
  if hasattr(module, 'weight') and module.weight is not None:
- module.weight.grad.data.zero_()
+ if module.weight.grad is not None:
+ module.weight.grad.data.zero_()
  if hasattr(module, 'bias') and module.bias is not None:
- module.bias.grad.data.zero_()
+ if module.bias.grad is not None:
+ module.bias.grad.data.zero_()
 
  def _get_parameters(self, module):
  params = []
  d_params = []
  if hasattr(module, 'weight') and module.weight is not None:
  params += [module.weight.data]
+ if module.weight.grad is None:
+ module.weight._grad = Variable(module.weight.data.clone().zero_())
  d_params += [module.weight.grad.data]
  if hasattr(module, 'bias') and module.bias is not None:
  params += [module.bias.data]
+ if module.bias.grad is None:
+ module.bias._grad = Variable(module.bias.data.clone().zero_())
  d_params += [module.bias.grad.data]
  return params, d_params
 
@@ -356,13 +363,13 @@ def test_zero_grad(self):
  module.zero_grad()
 
  module.weight.requires_grad = True
- module.weight.grad.data.fill_(1)
+ module.weight._grad = Variable(module.weight.data.clone().fill_(1))
  module.zero_grad()
  self.assertEqual(module.weight.grad.data, module.weight.data.clone().zero_())
 
  module.bias.requires_grad = True
- module.weight.grad.data.fill_(1)
- module.bias.grad.data.fill_(1)
+ module.weight._grad = Variable(module.weight.data.clone().fill_(1))
+ module.bias._grad = Variable(module.bias.data.clone().fill_(1))
  module.zero_grad()
  self.assertEqual(module.weight.grad.data, module.weight.data.clone().zero_())
  self.assertEqual(module.bias.grad.data, module.bias.data.clone().zero_())
@@ -586,7 +593,7 @@ def compare_scaling(grads):
  grads = torch.range(1, 100), torch.ones(10).div(1000)
  for norm_type in [0.5, 1.5, 2, 4, 'inf']:
  for p, g in zip(l.parameters(), grads):
- p.grad.data.copy_(g)
+ p._grad = Variable(g.clone())
  norm_before = compute_norm(norm_type)
  clip_grad_norm(l.parameters(), max_norm, norm_type=norm_type)
  norm_after = compute_norm(norm_type)
@@ -1167,7 +1174,8 @@ def pad(tensor, length):
  self.assertEqual(unpacked_len, lengths)
 
  # check grad
- padded.grad.data.zero_()
+ if padded.grad is not None:
+ padded.grad.data.zero_()
  grad_output = unpacked.data.clone().normal_()
  unpacked.backward(grad_output)
  if batch_first:
@@ -1185,13 +1193,15 @@ def pad(var, length):
 
  lengths = [10, 10, 6, 2, 2, 1, 1]
  max_length = lengths[0]
- x = Variable(torch.randn(max_length, len(lengths), 3), requires_grad=True)
+ x_leaf = Variable(torch.randn(max_length, len(lengths), 3), requires_grad=True)
  lstm = nn.LSTM(3, 4, bidirectional=True, num_layers=2)
  lstm2 = deepcopy(lstm)
  if cuda:
- x = x.cuda()
+ x = x_leaf.cuda()
  lstm.cuda()
  lstm2.cuda()
+ else:
+ x = x_leaf
 
  # Compute sequences separately
  seq_outs = []
@@ -1216,11 +1226,11 @@ def pad(var, length):
 
  # Check backward
  seq_out.sum().backward()
- grad_x = x.grad.data.clone()
- x.grad.data.zero_()
+ grad_x = x_leaf.grad.data.clone()
+ x_leaf.grad.data.zero_()
  unpacked.sum().backward()
 
- self.assertEqual(x.grad.data, grad_x)
+ self.assertEqual(x_leaf.grad.data, grad_x)
  for p1, p2 in zip(lstm.parameters(), lstm2.parameters()):
  self.assertEqual(p1.grad, p2.grad)
 
@@ -1576,11 +1586,12 @@ def test_noncontig_conv_grad(self):
  grad = torch.randn(2, 2, 5, 10, 10).cuda()[:, 1]
  assert not grad.is_contiguous()
  output.backward(grad, retain_variables=True)
- result = output.grad.data.clone()
- output.grad.data.zero_()
+ self.assertIsNotNone(input.grad)
+ result = input.grad.data.clone()
+ input.grad.data.zero_()
 
  output.backward(grad.contiguous())
- self.assertEqual(result, output.grad.data)
+ self.assertEqual(result, input.grad.data)
 
  def test_pixel_shuffle(self):
  batch_size = random.randint(1, 3)
@@ -1613,7 +1624,8 @@ def test_batchnorm_eval(self):
  grad1 = data.grad.data.clone()
 
  # 2nd pass
- data.grad.data.zero_()
+ if data.grad is not None:
+ data.grad.data.zero_()
 
  res2 = module(data)
  res2.backward(grad)