55import contextlib
66from copy import deepcopy
77from itertools import repeat
8+ from functools import wraps
89
910import torch .nn as nn
1011import torch .nn .parallel as dp
1314 module_tests , criterion_tests , TEST_CUDA , TEST_CUDNN , PRECISION
1415from common import freeze_rng_state
1516
16- @contextlib .contextmanager
17- def set_default_tensor_type (type ):
18- old_type = torch .typename (torch .Tensor ())
19- torch .set_default_tensor_type (type )
20- try :
21- yield
22- finally :
23- torch .set_default_tensor_type (old_type )
17+ def default_tensor_type (type ):
18+ type_str = torch .typename (type )
19+ def decorator (fn ):
20+ @wraps (fn )
21+ def wrapper (* args , ** kwargs ):
22+ old_type = torch .typename (torch .Tensor ())
23+ torch .set_default_tensor_type (type_str )
24+ try :
25+ return fn (* args , ** kwargs )
26+ finally :
27+ torch .set_default_tensor_type (old_type )
28+ return wrapper
29+ return decorator
2430
2531class InputVariableMixin (object ):
2632 def _get_input (self ):
@@ -621,7 +627,7 @@ def test_MaxUnpool2d_output_size(self):
621627 def test_RNN_cell (self ):
622628 # this is just a smoke test; these modules are implemented through
623629 # autograd so no Jacobian test is needed
624- for module in (nn .rnn . cell . RNN , nn .rnn . cell . RNNReLU , nn . rnn . cell . GRU ):
630+ for module in (nn .RNNCell , nn .GRUCell ):
625631 for bias in (True , False ):
626632 input = Variable (torch .randn (3 , 10 ))
627633 hx = Variable (torch .randn (3 , 20 ))
@@ -638,18 +644,18 @@ def test_LSTM_cell(self):
638644 input = Variable (torch .randn (3 , 10 ))
639645 hx = Variable (torch .randn (3 , 20 ))
640646 cx = Variable (torch .randn (3 , 20 ))
641- lstm = nn .rnn . cell . LSTM (10 , 20 , bias = bias )
647+ lstm = nn .LSTMCell (10 , 20 , bias = bias )
642648 for i in range (6 ):
643649 hx , cx = lstm (input , (hx , cx ))
644650
645651 (hx + cx ).sum ().backward ()
646652
647653 @unittest .skipIf (not TEST_CUDNN , "needs cudnn" )
654+ @default_tensor_type (torch .FloatTensor ) # FIXME: just until torch.cuda.DoubleTensor.sum() implemented
648655 def test_RNN_cpu_vs_cudnn (self ):
649656
650- def forward_backward (cuda , module , bias , input_val , hx_val , weights_val ):
651- rnn = module (input_size , hidden_size , num_layers , bias = bias )
652- is_lstm = module == nn .rnn .LSTM
657+ def forward_backward (cuda , rnn , input_val , hx_val , weights_val ):
658+ is_lstm = type (rnn ) == nn .LSTM
653659
654660 for x_layer , y_layer in zip (rnn .all_weights , weights_val ):
655661 for x , y in zip (x_layer , y_layer ):
@@ -689,42 +695,46 @@ def forward_backward(cuda, module, bias, input_val, hx_val, weights_val):
689695 'cy' : hy [1 ].data if is_lstm else None ,
690696 'grad_cx' : hx [1 ].grad if is_lstm else None }
691697
692- def diff (t_cpu , t_gpu , name ):
693- self .assertTrue (torch .is_tensor (t_cpu ))
694- self .assertTrue (torch .is_tensor (t_gpu ))
695- delta = t_gpu .cpu ().add (- 1 , t_cpu ).abs ().max ()
696- # print("{:30s} cpu: {:10g} gpu: {:10g} diff: {:10g}".format(name, t_cpu.abs().max(), t_gpu.abs().max(), delta))
697- self .assertLess (delta , 2 * PRECISION )
698-
699698 input_size = 10
700699 hidden_size = 20
701700 num_layers = 2
702701 seq_length = 7
703702 batch = 5
704703
705- # FIXME: we can't use torch.cuda.DoubleTensor because sum() is not yet defined on it
706- with set_default_tensor_type ('torch.FloatTensor' ):
707- for module in (nn .rnn .RNNTanh , nn .rnn .RNNReLU , nn .rnn .LSTM , nn .rnn .GRU ):
708- for bias in (True , False ):
709- input_val = torch .randn (seq_length , batch , input_size )
710- hx_val = torch .randn (num_layers , batch , hidden_size )
711-
712- weights_val = module (input_size , hidden_size , num_layers ).all_weights
713-
714- outputs_cpu = forward_backward (False , module , bias , input_val , hx_val , weights_val )
715- outputs_gpu = forward_backward (True , module , bias , input_val , hx_val , weights_val )
716-
717- diff (outputs_cpu ['output' ], outputs_gpu ['output' ], 'output' )
718- diff (outputs_cpu ['hy' ], outputs_gpu ['hy' ], 'hy' )
719- diff (outputs_cpu ['grad_input' ], outputs_gpu ['grad_input' ], 'grad_input' )
720- diff (outputs_cpu ['grad_hx' ], outputs_gpu ['grad_hx' ], 'grad_hx' )
721- if outputs_cpu ['cy' ] is not None :
722- diff (outputs_cpu ['cy' ], outputs_gpu ['cy' ], 'cy' )
723- diff (outputs_cpu ['grad_cx' ], outputs_gpu ['grad_cx' ], 'grad_cx' )
724-
725- for i , (cpu_layer_weight , gpu_layer_weight ) in enumerate (zip (outputs_cpu ['weights' ], outputs_gpu ['weights' ])):
726- for j , (cpu_weight , gpu_weight ) in enumerate (zip (cpu_layer_weight , gpu_layer_weight )):
727- diff (cpu_weight .grad , gpu_weight .grad , 'grad_weight[{},{}]' .format (i , j ))
704+ def compare_cpu_gpu (outputs_cpu , outputs_gpu ):
705+ self .assertEqual (list (outputs_cpu .keys ()), list (outputs_gpu .keys ()))
706+ for key in outputs_cpu .keys ():
707+ if key != 'weights' :
708+ self .assertEqual (outputs_cpu [key ], outputs_gpu [key ], prec = 5e-5 )
709+
710+ # check grad weights separately, as nested dict
711+ for cpu_layer_weight , gpu_layer_weight in zip (outputs_cpu ['weights' ], outputs_gpu ['weights' ]):
712+ for (cpu_weight , gpu_weight ) in zip (cpu_layer_weight , gpu_layer_weight ):
713+ self .assertEqual (cpu_weight .grad , gpu_weight .grad , prec = 5e-5 )
714+
715+
716+ input_val = torch .randn (seq_length , batch , input_size )
717+ hx_val = torch .randn (num_layers , batch , hidden_size )
718+ # FIXME: add bidirectional
719+ # FIXME: add dropout
720+ for module in (nn .RNN , nn .LSTM , nn .GRU ):
721+ for bias in (True , False ):
722+ rnn = module (input_size , hidden_size , num_layers , bias = bias )
723+ outputs_cpu = forward_backward (False , rnn , input_val , hx_val , rnn .all_weights )
724+
725+ rnn_gpu = module (input_size , hidden_size , num_layers , bias = bias )
726+ outputs_gpu = forward_backward (True , rnn_gpu , input_val , hx_val , rnn .all_weights )
727+
728+ compare_cpu_gpu (outputs_cpu , outputs_gpu )
729+
730+ for nonlinearity in ('tanh' , 'relu' ):
731+ rnn = nn .rnn .RNN (input_size , hidden_size , num_layers , bias = bias , nonlinearity = nonlinearity )
732+ outputs_cpu = forward_backward (False , rnn , input_val , hx_val , rnn .all_weights )
733+
734+ rnn_gpu = nn .rnn .RNN (input_size , hidden_size , num_layers , bias = bias , nonlinearity = nonlinearity )
735+ outputs_gpu = forward_backward (True , rnn_gpu , input_val , hx_val , rnn .all_weights )
736+
737+ compare_cpu_gpu (outputs_cpu , outputs_gpu )
728738
729739
730740def add_test (test ):
0 commit comments