mattmacy
diff --git a/‎test/test_nn.py‎
Lines changed: 30 additions & 3 deletions b/‎test/test_nn.py‎
Lines changed: 30 additions & 3 deletions
diff --git a/‎torch/backends/cudnn/__init__.py‎
Lines changed: 0 additions & 3 deletions b/‎torch/backends/cudnn/__init__.py‎
Lines changed: 0 additions & 3 deletions
diff --git a/‎torch/csrc/utils.cpp‎
Lines changed: 1 addition & 1 deletion b/‎torch/csrc/utils.cpp‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎torch/nn/backends/thnn.py‎
Lines changed: 6 additions & 1 deletion b/‎torch/nn/backends/thnn.py‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎torch/nn/functions/rnn.py‎
Lines changed: 9 additions & 14 deletions b/‎torch/nn/functions/rnn.py‎
Lines changed: 9 additions & 14 deletions
diff --git a/‎torch/nn/modules/__init__.py‎
Lines changed: 2 additions & 1 deletion b/‎torch/nn/modules/__init__.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎torch/nn/modules/rnn/__init__.py‎
Lines changed: 2 additions & 0 deletions b/‎torch/nn/modules/rnn/__init__.py‎
Lines changed: 2 additions & 0 deletions
@@ -618,12 +618,38 @@ def test_MaxUnpool2d_output_size(self):
  mu(output_small, indices_small, (h, w)))
 
 
+ def test_RNN_cell(self):
+ # this is just a smoke test; these modules are implemented through
+ # autograd so no Jacobian test is needed
+ for module in (nn.rnn.cell.RNN, nn.rnn.cell.RNNReLU, nn.rnn.cell.GRU):
+ for bias in (True, False):
+ input = Variable(torch.randn(3, 10))
+ hx = Variable(torch.randn(3, 20))
+ cell = module(10, 20, bias=bias)
+ for i in range(6):
+ hx = cell(input, hx)
+
+ hx.sum().backward()
+
+ def test_LSTM_cell(self):
+ # this is just a smoke test; these modules are implemented through
+ # autograd so no Jacobian test is needed
+ for bias in (True, False):
+ input = Variable(torch.randn(3, 10))
+ hx = Variable(torch.randn(3, 20))
+ cx = Variable(torch.randn(3, 20))
+ lstm = nn.rnn.cell.LSTM(10, 20, bias=bias)
+ for i in range(6):
+ hx, cx = lstm(input, (hx, cx))
+
+ (hx+cx).sum().backward()
+
  @unittest.skipIf(not TEST_CUDNN, "needs cudnn")
  def test_RNN_cpu_vs_cudnn(self):
 
  def forward_backward(cuda, module, bias, input_val, hx_val, weights_val):
  rnn = module(input_size, hidden_size, num_layers, bias=bias)
- is_lstm = module == nn.LSTM
+ is_lstm = module == nn.rnn.LSTM
 
  for x_layer, y_layer in zip(rnn.all_weights, weights_val):
  for x, y in zip(x_layer, y_layer):
@@ -678,7 +704,7 @@ def diff(t_cpu, t_gpu, name):
 
  # FIXME: we can't use torch.cuda.DoubleTensor because sum() is not yet defined on it
  with set_default_tensor_type('torch.FloatTensor'):
- for module in (nn.RNN, nn.RNNReLU, nn.LSTM, nn.GRU):
+ for module in (nn.rnn.RNNTanh, nn.rnn.RNNReLU, nn.rnn.LSTM, nn.rnn.GRU):
  for bias in (True, False):
  input_val = torch.randn(seq_length, batch, input_size)
  hx_val = torch.randn(num_layers, batch, hidden_size)
@@ -880,7 +906,8 @@ def add_test(test):
  constructor=lambda: nn.FractionalMaxPool2d(2, output_ratio=0.5, _random_samples=torch.DoubleTensor(1, 3, 2).uniform_()),
  input_size=(1, 3, 5, 5),
  fullname='FractionalMaxPool2d_ratio',
- test_cuda=False),
+ test_cuda=False
+ ),
  dict(
  constructor=lambda: nn.FractionalMaxPool2d((2, 2), output_size=(4, 4), _random_samples=torch.DoubleTensor(1, 3, 2).uniform_()),
  input_size=(1, 3, 7, 7),
 
@@ -117,8 +117,6 @@ def set(self, tensor):
  def as_tuple(self):
  return (self._type, tuple(self._size), tuple(self._stride))
 
-<<<<<<< 9cd68129da50023929aff0ca4e4ba667ae75d785
-=======
 
 class TensorDescriptorArray(object):
  def __init__(self, N):
@@ -148,7 +146,6 @@ def as_tuple(self):
  return (self._type, tuple(self._size), tuple(self._stride))
 
 
->>>>>>> CUDNN RNN bindings
 class ConvolutionDescriptor(object):
  def __init__(self):
  ptr = ctypes.c_void_p()
 
@@ -202,7 +202,7 @@ void THPUtils_invalidArguments(PyObject *given_args,
  std::string error_msg;
  error_msg.reserve(2000);
  error_msg += function_name;
- error_msg += " recieved an invalid combination of argument types - got ";
+ error_msg += " received an invalid combination of argument types - got ";
  va_list option_list;
  va_start(option_list, num_options);
  for (size_t i = 0; i < num_options; i++)
 
@@ -14,7 +14,8 @@ def _initialize_backend():
  from ..functions.thnn import _all_functions as _thnn_functions
  from ..functions.linear import Linear
  from ..functions.conv import Conv2d
- from ..functions.rnn import RNN
+ from ..functions.rnn import RNN, \
+ RNNTanhCell, RNNReLUCell, GRUCell, LSTMCell
  from ..functions.dropout import Dropout, FeatureDropout
  from ..functions.activation import Softsign
  from ..functions.loss import CosineEmbeddingLoss, \
@@ -23,6 +24,10 @@ def _initialize_backend():
  backend.register_function('Linear', Linear)
  backend.register_function('Conv2d', Conv2d)
  backend.register_function('RNN', RNN)
+ backend.register_function('RNNTanhCell', RNNTanhCell)
+ backend.register_function('RNNReLUCell', RNNReLUCell)
+ backend.register_function('LSTMCell', LSTMCell)
+ backend.register_function('GRUCell', GRUCell)
  backend.register_function('Dropout', Dropout)
  backend.register_function('Dropout2d', FeatureDropout)
  backend.register_function('Dropout3d', FeatureDropout)
 
@@ -9,8 +9,6 @@
 import torch.backends.cudnn.rnn
 
 
-
-
 def _getCudnnMode(mode):
  if mode == 'RNN_RELU':
  return cudnn.CUDNN_RNN_RELU
@@ -48,11 +46,11 @@ def linear(input, w, b):
 
 def RNNReLUCell(input, hidden, w_ih, w_hh, b_ih=None, b_hh=None):
  hy = ReLU(linear(input, w_ih, b_ih) + linear(hidden, w_hh, b_hh))
- return hy, hy
+ return hy
 
 def RNNTanhCell(input, hidden, w_ih, w_hh, b_ih=None, b_hh=None):
  hy = tanh(linear(input, w_ih, b_ih) + linear(hidden, w_hh, b_hh))
- return hy, hy
+ return hy
 
 def LSTMCell(input, hidden, w_ih, w_hh, b_ih=None, b_hh=None):
  hx, cx = hidden
@@ -63,25 +61,23 @@ def LSTMCell(input, hidden, w_ih, w_hh, b_ih=None, b_hh=None):
  forgetgate = sigmoid(gates[:,1*hsz:2*hsz])
  cellgate = tanh( gates[:,2*hsz:3*hsz])
  outgate = sigmoid(gates[:,3*hsz:4*hsz])
- nextc = (forgetgate * cx) + (ingate * cellgate)
- nexth = outgate * tanh(nextc)
+ cy = (forgetgate * cx) + (ingate * cellgate)
+ hy = outgate * tanh(cy)
 
- return (nexth, nextc), nexth
+ return hy, cy
 
 def GRUCell(input, hidden, w_ih, w_hh, b_ih=None, b_hh=None):
  hsz = hidden.size(1)
  gi = linear(input, w_ih, b_ih)
  gh = linear(hidden, w_hh, b_hh)
  # FIXME: chunk
 
- # this is a bit weird, it doesn't match the order of parameters
- # implied by the cudnn docs, and it also uses nexth for output...
  resetgate = sigmoid(gi[:,0*hsz:1*hsz] + gh[:,0*hsz:1*hsz])
  inputgate = sigmoid(gi[:,1*hsz:2*hsz] + gh[:,1*hsz:2*hsz])
  newgate = tanh(gi[:,2*hsz:3*hsz] + resetgate * gh[:,2*hsz:3*hsz])
- nexth = newgate + inputgate * (hidden - newgate)
+ hy = newgate + inputgate * (hidden - newgate)
 
- return nexth, nexth # FIXME: nexth, nexth ???
+ return hy
 
 def StackedRNN(cell, num_layers, lstm=False):
  def forward(input, hidden, weight):
@@ -92,8 +88,9 @@ def forward(input, hidden, weight):
  hidden = zip(*hidden)
 
  for i in range(num_layers):
- hy, input = cell(input, hidden[i], *weight[i])
+ hy = cell(input, hidden[i], *weight[i])
  next_hidden.append(hy)
+ input = hy[0] if lstm else hy
 
  if lstm:
  next_h, next_c = zip(*next_hidden)
@@ -222,8 +219,6 @@ def backward_extended(self, grad_output, grad_hy):
  weight,
  grad_weight)
 
- # FIXME: zero out grad_bias if necessary :)
-
  return grad_input, grad_weight, grad_hx
 
 
 
@@ -15,4 +15,5 @@
 from .padding import ReflectionPad2d, ReplicationPad2d, ReplicationPad3d
 from .normalization import CrossMapLRN2d
 from .sparse import Embedding
-from .rnn import RNNBase, RNN, RNNReLU, GRU, LSTM
+# from .rnn import RNNBase, RNN, RNNReLU, GRU, LSTM, cell
+import rnn
@@ -0,0 +1,2 @@
+from rnn import RNNBase, RNNTanh, RNNReLU, LSTM, GRU
+import cell
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+from rnn import RNNBase, RNNTanh, RNNReLU, LSTM, GRU`
	`2`	`+import cell`