mattmacy
diff --git a/‎test/common_nn.py‎
Lines changed: 2 additions & 0 deletions b/‎test/common_nn.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎test/test_nn.py‎
Lines changed: 91 additions & 1 deletion b/‎test/test_nn.py‎
Lines changed: 91 additions & 1 deletion
diff --git a/‎torch/autograd/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎torch/autograd/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎torch/autograd/function.py‎
Lines changed: 88 additions & 0 deletions b/‎torch/autograd/function.py‎
Lines changed: 88 additions & 0 deletions
@@ -7,6 +7,7 @@
 import torch.cuda
 from torch.autograd import Variable
 from common import TestCase, to_gpu, get_numerical_jacobian, iter_tensors, contiguous
+import torch.backends.cudnn
 
 # tarfile module tries to obtain a file object name in python 3.3
 if sys.version_info[:2] == (3, 3):
@@ -15,6 +16,7 @@
  TemporaryFile = tempfile.TemporaryFile
 
 TEST_CUDA = torch.cuda.is_available()
+TEST_CUDNN = TEST_CUDA and torch.backends.cudnn.is_acceptable(torch.cuda.FloatTensor(1))
 PRECISION = 1e-5
 
 module_tests = [
 
@@ -2,16 +2,25 @@
 import torch
 import random
 import unittest
+import contextlib
 from copy import deepcopy
 from itertools import repeat
 
 import torch.nn as nn
 import torch.nn.parallel as dp
 from torch.autograd import Variable
 from common_nn import NNTestCase, ModuleTest, CriterionTest, TestBase, \
- module_tests, criterion_tests, TEST_CUDA, PRECISION
+ module_tests, criterion_tests, TEST_CUDA, TEST_CUDNN, PRECISION
 from common import freeze_rng_state
 
+@contextlib.contextmanager
+def set_default_tensor_type(type):
+ old_type = torch.typename(torch.Tensor())
+ torch.set_default_tensor_type(type)
+ try:
+ yield
+ finally:
+ torch.set_default_tensor_type(old_type)
 
 class InputVariableMixin(object):
  def _get_input(self):
@@ -609,6 +618,87 @@ def test_MaxUnpool2d_output_size(self):
  mu(output_small, indices_small, (h, w)))
 
 
+ @unittest.skipIf(not TEST_CUDNN, "needs cudnn")
+ def test_RNN_cpu_vs_cudnn(self):
+ 
+ def forwardBackward(cuda, mode, input_val, hx_val, weights_val):
+ rnn = nn.RNNBase(mode, input_size, hidden_size, num_layers)
+
+ for x_layer, y_layer in zip(rnn.all_weights, weights_val):
+ for x, y in zip(x_layer, y_layer):
+ x.data.copy_(y.data)
+
+ input = Variable(input_val.clone(), requires_grad=True)
+ if mode == 'LSTM':
+ hx = (Variable(hx_val.clone(), requires_grad=True),
+ Variable(hx_val.add(1), requires_grad=True))
+ else:
+ hx = Variable(hx_val.clone(), requires_grad=True)
+ 
+ if cuda:
+ rnn.cuda()
+ input.data = input.data.cuda()
+ if mode == 'LSTM':
+ hx[0].data = hx[0].data.cuda()
+ hx[1].data = hx[1].data.cuda()
+ else:
+ hx.data = hx.data.cuda()
+
+ output, hy = rnn(input, hx)
+ # FIXME this is because of a pytorch bug
+ if mode == 'LSTM':
+ fake_loss = 0*(hy[0] + hy[1]).sum()
+ else:
+ fake_loss = 0*hy.sum()
+ 
+ loss = output.sum() + fake_loss
+ loss.backward()
+ 
+ return {'output': output.data,
+ 'hy': hy[0].data if mode == 'LSTM' else hy.data,
+ 'weights': rnn.all_weights,
+ 'grad_input': input.grad,
+ 'grad_hx': hx[0].grad if mode == 'LSTM' else hx.grad,
+ 'cy': hy[1].data if mode == 'LSTM' else None,
+ 'grad_cx': hx[1].grad if mode == 'LSTM' else None}
+ 
+ def diff(t_cpu, t_gpu, name):
+ self.assertTrue(torch.is_tensor(t_cpu))
+ self.assertTrue(torch.is_tensor(t_gpu))
+ delta = t_gpu.cpu().add(-1, t_cpu).abs().max()
+ # print("{:30s} cpu: {:10g} gpu: {:10g} diff: {:10g}".format(name, t_cpu.abs().max(), t_gpu.abs().max(), delta))
+ self.assertLess(delta, 2 * PRECISION)
+ 
+ input_size = 10
+ hidden_size = 20
+ num_layers = 2
+ seq_length = 7
+ batch = 5
+
+ # FIXME: we can't use torch.cuda.DoubleTensor because sum() is not yet defined on it
+ with set_default_tensor_type('torch.FloatTensor'):
+ for mode in ("RNN_RELU", "RNN_TANH", "GRU", "LSTM"):
+ input_val = torch.randn(seq_length, batch, input_size)
+ hx_val = torch.randn(num_layers, batch, hidden_size)
+ 
+ weights_val = nn.RNNBase(mode, input_size, hidden_size, num_layers).all_weights
+ 
+ outputs_cpu = forwardBackward(False, mode, input_val, hx_val, weights_val)
+ outputs_gpu = forwardBackward(True, mode, input_val, hx_val, weights_val)
+ 
+ diff(outputs_cpu['output'], outputs_gpu['output'], 'output')
+ diff(outputs_cpu['hy'], outputs_gpu['hy'], 'hy')
+ diff(outputs_cpu['grad_input'], outputs_gpu['grad_input'], 'grad_input')
+ diff(outputs_cpu['grad_hx'], outputs_gpu['grad_hx'], 'grad_hx')
+ if outputs_cpu['cy'] is not None:
+ diff(outputs_cpu['cy'], outputs_gpu['cy'], 'cy')
+ diff(outputs_cpu['grad_cx'], outputs_gpu['grad_cx'], 'grad_cx')
+ 
+ for i, (cpu_layer_weight, gpu_layer_weight) in enumerate(zip(outputs_cpu['weights'], outputs_gpu['weights'])):
+ for j, (cpu_weight, gpu_weight) in enumerate(zip(cpu_layer_weight, gpu_layer_weight)):
+ diff(cpu_weight.grad, gpu_weight.grad, mode + ' grad_weight[{},{}]'.format(i, j))
+
+
 def add_test(test):
  test_name = test.get_name()
  cuda_test_name = test_name + '_cuda'
 
@@ -1,6 +1,6 @@
 import torch
 
 from .variable import Variable
-from .function import Function
+from .function import Function, NestedInputFunction
 
 assert torch._C._autograd_init()
@@ -2,6 +2,8 @@
 from collections import OrderedDict
 from itertools import chain
 
+import torch # FIXME: is this ok? Needed for torch.is_tensor
+import collections
 
 class Function(_C._FunctionBase):
 
@@ -43,3 +45,89 @@ def __init__(self, inplace=False):
  super(InplaceFunction, self).__init__()
  self.inplace = inplace
 
+def _nested_map(condition, fn):
+ def _map(obj):
+ if condition(obj):
+ return fn(obj)
+ elif obj is None:
+ return None
+ elif isinstance(obj, (list, tuple)):
+ return type(obj)(_map(x) for x in obj)
+ else:
+ raise ValueError("NestedIOFunction doesn't know how to process "
+ "an input object of type " + torch.typename(obj))
+ return _map
+
+def _iter_filter(condition):
+ def _iter(obj):
+ if condition(obj):
+ yield obj
+ elif obj is None:
+ return
+ elif isinstance(obj, (list, tuple)):
+ for o in obj:
+ for var in _iter(o):
+ yield var
+ else:
+ raise ValueError("NestedIOFunction doesn't know how to process "
+ "an input object of type " + torch.typename(obj))
+ return _iter
+
+
+_iter_variables = _iter_filter(lambda o: isinstance(o, torch.autograd.Variable))
+_iter_tensors = _iter_filter(torch.is_tensor)
+_iter_None_tensors = _iter_filter(lambda o: o is None or torch.is_tensor(o))
+_map_variable_tensor = _nested_map(lambda o: isinstance(o, torch.autograd.Variable), lambda o: o.data)
+_map_tensor_type = _nested_map(lambda o: torch.is_tensor(o), lambda o: o.type())
+
+def _map_tensor_fromiter(itr):
+ return _nested_map(lambda o: torch.is_tensor(o), lambda o: itr.next())
+def _map_variable_fromiter(itr):
+ return _nested_map(lambda o: isinstance(o, torch.autograd.Variable), lambda o: itr.next())
+
+class NestedIOFunction(Function):
+
+ def _do_forward(self, *input):
+ self._nested_input = input
+ flat_input = tuple(_iter_variables(input))
+ flat_output = super(NestedIOFunction, self)._do_forward(*flat_input)
+ nested_output = self._nested_output
+ nested_variables = _map_tensor_fromiter(iter(flat_output))(self._nested_output)
+ return nested_variables
+
+ def backward(self, *gradients):
+ nested_gradients = _map_tensor_fromiter(iter(gradients))(self._nested_output)
+ del self._nested_output
+ result = self.backward_extended(*nested_gradients)
+ return tuple(_iter_None_tensors(result))
+
+ __call__ = _do_forward
+
+ def forward(self, *args):
+ nested_tensors = _map_variable_tensor(self._nested_input)
+ result = self.forward_extended(*nested_tensors)
+ del self._nested_input
+ self._nested_output = result
+ return tuple(_iter_tensors(result))
+
+ def save_for_backward(self, *args):
+ self.to_save = tuple(_iter_tensors(args))
+ self._to_save_nested = args
+
+ @property
+ def saved_tensors(self):
+ flat_tensors = super(NestedIOFunction, self).saved_tensors
+ return _map_tensor_fromiter(iter(flat_tensors))(self._to_save_nested)
+
+ def mark_dirty(self, *args, **kwargs):
+ self.dirty_tensors = tuple(_iter_tensors((args, kwargs)))
+
+ def mark_non_differentiable(self, *args, **kwargs):
+ self.non_differentiable = tuple(_iter_tensors((args, kwargs)))
+
+ def forward_extended(self, *input):
+ raise NotImplementedError
+
+ def backward_extended(self, *grad_output):
+ raise NotImplementedError
+ raise NotImplementedError