Skip to content

Commit b5d1329

Browse files
committed
addressing comments
1 parent 8628826 commit b5d1329

File tree

13 files changed

+409
-456
lines changed

13 files changed

+409
-456
lines changed

test/test_autograd.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,20 @@ def test_setitem_mask(self):
230230
mask = Variable(torch.ByteTensor(5, 5).bernoulli_(), requires_grad=False)
231231
self._test_setitem(mask)
232232

233+
def test_setitem_tensor(self):
234+
x = Variable(torch.ones(5, 5), requires_grad=True)
235+
y = x + 2
236+
y_version = y._version
237+
value = Variable(torch.Tensor(5).fill_(7), requires_grad=True)
238+
index = 3
239+
y[index] = value
240+
self.assertNotEqual(y._version, y_version)
241+
y.backward(torch.ones(5, 5))
242+
expected_grad_input = torch.ones(5, 5)
243+
expected_grad_input[index] = 0
244+
self.assertEqual(x.grad, expected_grad_input)
245+
self.assertEqual(value.grad, torch.ones(5))
246+
233247
def test_type_conversions(self):
234248
import torch.cuda
235249
x = Variable(torch.randn(5, 5))

test/test_nn.py

Lines changed: 53 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import contextlib
66
from copy import deepcopy
77
from itertools import repeat
8+
from functools import wraps
89

910
import torch.nn as nn
1011
import torch.nn.parallel as dp
@@ -13,14 +14,19 @@
1314
module_tests, criterion_tests, TEST_CUDA, TEST_CUDNN, PRECISION
1415
from common import freeze_rng_state
1516

16-
@contextlib.contextmanager
17-
def set_default_tensor_type(type):
18-
old_type = torch.typename(torch.Tensor())
19-
torch.set_default_tensor_type(type)
20-
try:
21-
yield
22-
finally:
23-
torch.set_default_tensor_type(old_type)
17+
def default_tensor_type(type):
18+
type_str = torch.typename(type)
19+
def decorator(fn):
20+
@wraps(fn)
21+
def wrapper(*args, **kwargs):
22+
old_type = torch.typename(torch.Tensor())
23+
torch.set_default_tensor_type(type_str)
24+
try:
25+
return fn(*args, **kwargs)
26+
finally:
27+
torch.set_default_tensor_type(old_type)
28+
return wrapper
29+
return decorator
2430

2531
class InputVariableMixin(object):
2632
def _get_input(self):
@@ -621,7 +627,7 @@ def test_MaxUnpool2d_output_size(self):
621627
def test_RNN_cell(self):
622628
# this is just a smoke test; these modules are implemented through
623629
# autograd so no Jacobian test is needed
624-
for module in (nn.rnn.cell.RNN, nn.rnn.cell.RNNReLU, nn.rnn.cell.GRU):
630+
for module in (nn.RNNCell, nn.GRUCell):
625631
for bias in (True, False):
626632
input = Variable(torch.randn(3, 10))
627633
hx = Variable(torch.randn(3, 20))
@@ -638,18 +644,18 @@ def test_LSTM_cell(self):
638644
input = Variable(torch.randn(3, 10))
639645
hx = Variable(torch.randn(3, 20))
640646
cx = Variable(torch.randn(3, 20))
641-
lstm = nn.rnn.cell.LSTM(10, 20, bias=bias)
647+
lstm = nn.LSTMCell(10, 20, bias=bias)
642648
for i in range(6):
643649
hx, cx = lstm(input, (hx, cx))
644650

645651
(hx+cx).sum().backward()
646652

647653
@unittest.skipIf(not TEST_CUDNN, "needs cudnn")
654+
@default_tensor_type(torch.FloatTensor) # FIXME: just until torch.cuda.DoubleTensor.sum() implemented
648655
def test_RNN_cpu_vs_cudnn(self):
649656

650-
def forward_backward(cuda, module, bias, input_val, hx_val, weights_val):
651-
rnn = module(input_size, hidden_size, num_layers, bias=bias)
652-
is_lstm = module == nn.rnn.LSTM
657+
def forward_backward(cuda, rnn, input_val, hx_val, weights_val):
658+
is_lstm = type(rnn) == nn.LSTM
653659

654660
for x_layer, y_layer in zip(rnn.all_weights, weights_val):
655661
for x, y in zip(x_layer, y_layer):
@@ -689,42 +695,46 @@ def forward_backward(cuda, module, bias, input_val, hx_val, weights_val):
689695
'cy': hy[1].data if is_lstm else None,
690696
'grad_cx': hx[1].grad if is_lstm else None}
691697

692-
def diff(t_cpu, t_gpu, name):
693-
self.assertTrue(torch.is_tensor(t_cpu))
694-
self.assertTrue(torch.is_tensor(t_gpu))
695-
delta = t_gpu.cpu().add(-1, t_cpu).abs().max()
696-
# print("{:30s} cpu: {:10g} gpu: {:10g} diff: {:10g}".format(name, t_cpu.abs().max(), t_gpu.abs().max(), delta))
697-
self.assertLess(delta, 2 * PRECISION)
698-
699698
input_size = 10
700699
hidden_size = 20
701700
num_layers = 2
702701
seq_length = 7
703702
batch = 5
704703

705-
# FIXME: we can't use torch.cuda.DoubleTensor because sum() is not yet defined on it
706-
with set_default_tensor_type('torch.FloatTensor'):
707-
for module in (nn.rnn.RNNTanh, nn.rnn.RNNReLU, nn.rnn.LSTM, nn.rnn.GRU):
708-
for bias in (True, False):
709-
input_val = torch.randn(seq_length, batch, input_size)
710-
hx_val = torch.randn(num_layers, batch, hidden_size)
711-
712-
weights_val = module(input_size, hidden_size, num_layers).all_weights
713-
714-
outputs_cpu = forward_backward(False, module, bias, input_val, hx_val, weights_val)
715-
outputs_gpu = forward_backward(True, module, bias, input_val, hx_val, weights_val)
716-
717-
diff(outputs_cpu['output'], outputs_gpu['output'], 'output')
718-
diff(outputs_cpu['hy'], outputs_gpu['hy'], 'hy')
719-
diff(outputs_cpu['grad_input'], outputs_gpu['grad_input'], 'grad_input')
720-
diff(outputs_cpu['grad_hx'], outputs_gpu['grad_hx'], 'grad_hx')
721-
if outputs_cpu['cy'] is not None:
722-
diff(outputs_cpu['cy'], outputs_gpu['cy'], 'cy')
723-
diff(outputs_cpu['grad_cx'], outputs_gpu['grad_cx'], 'grad_cx')
724-
725-
for i, (cpu_layer_weight, gpu_layer_weight) in enumerate(zip(outputs_cpu['weights'], outputs_gpu['weights'])):
726-
for j, (cpu_weight, gpu_weight) in enumerate(zip(cpu_layer_weight, gpu_layer_weight)):
727-
diff(cpu_weight.grad, gpu_weight.grad, 'grad_weight[{},{}]'.format(i, j))
704+
def compare_cpu_gpu(outputs_cpu, outputs_gpu):
705+
self.assertEqual(list(outputs_cpu.keys()), list(outputs_gpu.keys()))
706+
for key in outputs_cpu.keys():
707+
if key != 'weights':
708+
self.assertEqual(outputs_cpu[key], outputs_gpu[key], prec=5e-5)
709+
710+
# check grad weights separately, as nested dict
711+
for cpu_layer_weight, gpu_layer_weight in zip(outputs_cpu['weights'], outputs_gpu['weights']):
712+
for (cpu_weight, gpu_weight) in zip(cpu_layer_weight, gpu_layer_weight):
713+
self.assertEqual(cpu_weight.grad, gpu_weight.grad, prec=5e-5)
714+
715+
716+
input_val = torch.randn(seq_length, batch, input_size)
717+
hx_val = torch.randn(num_layers, batch, hidden_size)
718+
# FIXME: add bidirectional
719+
# FIXME: add dropout
720+
for module in (nn.RNN, nn.LSTM, nn.GRU):
721+
for bias in (True, False):
722+
rnn = module(input_size, hidden_size, num_layers, bias=bias)
723+
outputs_cpu = forward_backward(False, rnn, input_val, hx_val, rnn.all_weights)
724+
725+
rnn_gpu = module(input_size, hidden_size, num_layers, bias=bias)
726+
outputs_gpu = forward_backward(True, rnn_gpu, input_val, hx_val, rnn.all_weights)
727+
728+
compare_cpu_gpu(outputs_cpu, outputs_gpu)
729+
730+
for nonlinearity in ('tanh', 'relu'):
731+
rnn = nn.rnn.RNN(input_size, hidden_size, num_layers, bias=bias, nonlinearity=nonlinearity)
732+
outputs_cpu = forward_backward(False, rnn, input_val, hx_val, rnn.all_weights)
733+
734+
rnn_gpu = nn.rnn.RNN(input_size, hidden_size, num_layers, bias=bias, nonlinearity=nonlinearity)
735+
outputs_gpu = forward_backward(True, rnn_gpu, input_val, hx_val, rnn.all_weights)
736+
737+
compare_cpu_gpu(outputs_cpu, outputs_gpu)
728738

729739

730740
def add_test(test):

torch/autograd/function.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
1+
import torch
12
import torch._C as _C
23
from collections import OrderedDict
34
from itertools import chain
45

5-
import torch # FIXME: is this ok? Needed for torch.is_tensor
6-
import collections
76

87
class Function(_C._FunctionBase):
98

@@ -78,12 +77,9 @@ def _iter(obj):
7877
_iter_tensors = _iter_filter(torch.is_tensor)
7978
_iter_None_tensors = _iter_filter(lambda o: o is None or torch.is_tensor(o))
8079
_map_variable_tensor = _nested_map(lambda o: isinstance(o, torch.autograd.Variable), lambda o: o.data)
81-
_map_tensor_type = _nested_map(lambda o: torch.is_tensor(o), lambda o: o.type())
8280

8381
def _map_tensor_fromiter(itr):
84-
return _nested_map(lambda o: torch.is_tensor(o), lambda o: itr.next())
85-
def _map_variable_fromiter(itr):
86-
return _nested_map(lambda o: isinstance(o, torch.autograd.Variable), lambda o: itr.next())
82+
return _nested_map(lambda o: torch.is_tensor(o), lambda o: next(itr))
8783

8884
class NestedIOFunction(Function):
8985

@@ -99,6 +95,7 @@ def backward(self, *gradients):
9995
nested_gradients = _map_tensor_fromiter(iter(gradients))(self._nested_output)
10096
del self._nested_output
10197
result = self.backward_extended(*nested_gradients)
98+
del self._to_save_nested
10299
return tuple(_iter_None_tensors(result))
103100

104101
__call__ = _do_forward
@@ -130,4 +127,3 @@ def forward_extended(self, *input):
130127

131128
def backward_extended(self, *grad_output):
132129
raise NotImplementedError
133-
raise NotImplementedError

torch/autograd/functions/tensor.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,15 @@ def forward(self, i, value=None):
4444
return i
4545

4646
def backward(self, grad_output):
47-
grad_input = grad_output.clone()
48-
grad_input[self.index].fill_(0)
49-
return grad_input
47+
if self.value is None:
48+
grad_input = grad_output.clone()
49+
grad_value = grad_output[self.index].clone()
50+
grad_input[self.index].fill_(0)
51+
return grad_input, grad_value
52+
else:
53+
grad_input = grad_output.clone()
54+
grad_input[self.index].fill_(0)
55+
return grad_input
5056

5157

5258
class Transpose(Function):

torch/autograd/variable.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,8 @@ def __setitem__(self, key, value):
5858
return MaskedFill(value, inplace=True)(self, key)
5959
if isinstance(value, Variable):
6060
return SetItem(key)(self, value)
61-
return SetItem(key, value)(self)
61+
else:
62+
return SetItem(key, value)(self)
6263

6364
def __iter__(self):
6465
return iter(map(lambda i: self[i], range(self.size(0))))

torch/backends/cudnn/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -280,7 +280,7 @@ def int_array(itr):
280280
return array_type(*itr)
281281

282282
def descriptor(tensor, N=None):
283-
if N:
283+
if N is not None:
284284
descriptor = TensorDescriptorArray(N)
285285
else:
286286
descriptor = TensorDescriptor()

0 commit comments

Comments
 (0)