|
2 | 2 | import torch |
3 | 3 | import random |
4 | 4 | import unittest |
| 5 | +import contextlib |
5 | 6 | from copy import deepcopy |
6 | 7 | from itertools import repeat |
7 | 8 |
|
8 | 9 | import torch.nn as nn |
9 | 10 | import torch.nn.parallel as dp |
10 | 11 | from torch.autograd import Variable |
11 | 12 | from common_nn import NNTestCase, ModuleTest, CriterionTest, TestBase, \ |
12 | | - module_tests, criterion_tests, TEST_CUDA, PRECISION |
| 13 | + module_tests, criterion_tests, TEST_CUDA, TEST_CUDNN, PRECISION |
13 | 14 | from common import freeze_rng_state |
14 | 15 |
|
| 16 | +@contextlib.contextmanager |
| 17 | +def set_default_tensor_type(type): |
| 18 | + old_type = torch.typename(torch.Tensor()) |
| 19 | + torch.set_default_tensor_type(type) |
| 20 | + try: |
| 21 | + yield |
| 22 | + finally: |
| 23 | + torch.set_default_tensor_type(old_type) |
15 | 24 |
|
16 | 25 | class InputVariableMixin(object): |
17 | 26 | def _get_input(self): |
@@ -609,6 +618,87 @@ def test_MaxUnpool2d_output_size(self): |
609 | 618 | mu(output_small, indices_small, (h, w))) |
610 | 619 |
|
611 | 620 |
|
| 621 | + @unittest.skipIf(not TEST_CUDNN, "needs cudnn") |
| 622 | + def test_RNN_cpu_vs_cudnn(self): |
| 623 | + |
| 624 | + def forwardBackward(cuda, mode, input_val, hx_val, weights_val): |
| 625 | + rnn = nn.RNNBase(mode, input_size, hidden_size, num_layers) |
| 626 | + |
| 627 | + for x_layer, y_layer in zip(rnn.all_weights, weights_val): |
| 628 | + for x, y in zip(x_layer, y_layer): |
| 629 | + x.data.copy_(y.data) |
| 630 | + |
| 631 | + input = Variable(input_val.clone(), requires_grad=True) |
| 632 | + if mode == 'LSTM': |
| 633 | + hx = (Variable(hx_val.clone(), requires_grad=True), |
| 634 | + Variable(hx_val.add(1), requires_grad=True)) |
| 635 | + else: |
| 636 | + hx = Variable(hx_val.clone(), requires_grad=True) |
| 637 | + |
| 638 | + if cuda: |
| 639 | + rnn.cuda() |
| 640 | + input.data = input.data.cuda() |
| 641 | + if mode == 'LSTM': |
| 642 | + hx[0].data = hx[0].data.cuda() |
| 643 | + hx[1].data = hx[1].data.cuda() |
| 644 | + else: |
| 645 | + hx.data = hx.data.cuda() |
| 646 | + |
| 647 | + output, hy = rnn(input, hx) |
| 648 | + # FIXME this is because of a pytorch bug |
| 649 | + if mode == 'LSTM': |
| 650 | + fake_loss = 0*(hy[0] + hy[1]).sum() |
| 651 | + else: |
| 652 | + fake_loss = 0*hy.sum() |
| 653 | + |
| 654 | + loss = output.sum() + fake_loss |
| 655 | + loss.backward() |
| 656 | + |
| 657 | + return {'output': output.data, |
| 658 | + 'hy': hy[0].data if mode == 'LSTM' else hy.data, |
| 659 | + 'weights': rnn.all_weights, |
| 660 | + 'grad_input': input.grad, |
| 661 | + 'grad_hx': hx[0].grad if mode == 'LSTM' else hx.grad, |
| 662 | + 'cy': hy[1].data if mode == 'LSTM' else None, |
| 663 | + 'grad_cx': hx[1].grad if mode == 'LSTM' else None} |
| 664 | + |
| 665 | + def diff(t_cpu, t_gpu, name): |
| 666 | + self.assertTrue(torch.is_tensor(t_cpu)) |
| 667 | + self.assertTrue(torch.is_tensor(t_gpu)) |
| 668 | + delta = t_gpu.cpu().add(-1, t_cpu).abs().max() |
| 669 | + # print("{:30s} cpu: {:10g} gpu: {:10g} diff: {:10g}".format(name, t_cpu.abs().max(), t_gpu.abs().max(), delta)) |
| 670 | + self.assertLess(delta, 2 * PRECISION) |
| 671 | + |
| 672 | + input_size = 10 |
| 673 | + hidden_size = 20 |
| 674 | + num_layers = 2 |
| 675 | + seq_length = 7 |
| 676 | + batch = 5 |
| 677 | + |
| 678 | + # FIXME: we can't use torch.cuda.DoubleTensor because sum() is not yet defined on it |
| 679 | + with set_default_tensor_type('torch.FloatTensor'): |
| 680 | + for mode in ("RNN_RELU", "RNN_TANH", "GRU", "LSTM"): |
| 681 | + input_val = torch.randn(seq_length, batch, input_size) |
| 682 | + hx_val = torch.randn(num_layers, batch, hidden_size) |
| 683 | + |
| 684 | + weights_val = nn.RNNBase(mode, input_size, hidden_size, num_layers).all_weights |
| 685 | + |
| 686 | + outputs_cpu = forwardBackward(False, mode, input_val, hx_val, weights_val) |
| 687 | + outputs_gpu = forwardBackward(True, mode, input_val, hx_val, weights_val) |
| 688 | + |
| 689 | + diff(outputs_cpu['output'], outputs_gpu['output'], 'output') |
| 690 | + diff(outputs_cpu['hy'], outputs_gpu['hy'], 'hy') |
| 691 | + diff(outputs_cpu['grad_input'], outputs_gpu['grad_input'], 'grad_input') |
| 692 | + diff(outputs_cpu['grad_hx'], outputs_gpu['grad_hx'], 'grad_hx') |
| 693 | + if outputs_cpu['cy'] is not None: |
| 694 | + diff(outputs_cpu['cy'], outputs_gpu['cy'], 'cy') |
| 695 | + diff(outputs_cpu['grad_cx'], outputs_gpu['grad_cx'], 'grad_cx') |
| 696 | + |
| 697 | + for i, (cpu_layer_weight, gpu_layer_weight) in enumerate(zip(outputs_cpu['weights'], outputs_gpu['weights'])): |
| 698 | + for j, (cpu_weight, gpu_weight) in enumerate(zip(cpu_layer_weight, gpu_layer_weight)): |
| 699 | + diff(cpu_weight.grad, gpu_weight.grad, mode + ' grad_weight[{},{}]'.format(i, j)) |
| 700 | + |
| 701 | + |
612 | 702 | def add_test(test): |
613 | 703 | test_name = test.get_name() |
614 | 704 | cuda_test_name = test_name + '_cuda' |
|
0 commit comments