aolansili
diff --git a/‎test/test_nn.py‎
Lines changed: 30 additions & 0 deletions b/‎test/test_nn.py‎
Lines changed: 30 additions & 0 deletions
diff --git a/‎torch/nn/_functions/linear.py‎
Lines changed: 53 additions & 0 deletions b/‎torch/nn/_functions/linear.py‎
Lines changed: 53 additions & 0 deletions
diff --git a/‎torch/nn/functional.py‎
Lines changed: 8 additions & 0 deletions b/‎torch/nn/functional.py‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎torch/nn/modules/__init__.py‎
Lines changed: 2 additions & 2 deletions b/‎torch/nn/modules/__init__.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎torch/nn/modules/linear.py‎
Lines changed: 55 additions & 2 deletions b/‎torch/nn/modules/linear.py‎
Lines changed: 55 additions & 2 deletions
@@ -14,6 +14,7 @@
 import torch.nn.parallel as dp
 import torch.nn.init as init
 import torch.nn.utils.rnn as rnn_utils
+import torch.legacy.nn as legacy
 from torch.nn.utils import clip_grad_norm
 from torch.autograd import Variable, gradcheck
 from torch.nn import Parameter
@@ -2048,6 +2049,35 @@ def test_triplet_margin_swap_loss(self):
  self.assertTrue(gradcheck(lambda x1, x2, x3: F.triplet_margin_loss(
  x1, x2, x3, swap=True), (input1, input2, input3)))
 
+ def test_bilinear(self):
+ module = nn.Bilinear(10, 10, 8)
+ module2 = legacy.Bilinear(10, 10, 8)
+
+ module2.weight.copy_(module.weight.data)
+ module2.bias.copy_(module.bias.data)
+
+ input1 = torch.randn(4, 10)
+ input2 = torch.randn(4, 10)
+
+ output = module(Variable(input1), Variable(input2))
+ output2 = module2.forward([input1, input2])
+
+ input1_1 = Variable(input1, requires_grad=True)
+ input2_1 = Variable(input2, requires_grad=True)
+
+ output3 = module(input1_1, input2_1)
+ grad = torch.randn(*output3.size())
+ output3.backward(grad)
+ gi1 = input1_1.grad.data.clone()
+ gi2 = input2_1.grad.data.clone()
+
+ self.assertEqual(output.data, output2)
+ self.assertEqual([gi1, gi2], output3)
+
+ def forward(x1, x2):
+ F.bilinear(x1, x2, module.weight, module.bias)
+ self.assertTrue(gradcheck(forward, (input1_1, input2_1)))
+
 
 class TestNNInit(TestCase):
  def setUp(self):
 
@@ -29,3 +29,56 @@ def backward(self, grad_output):
  return grad_input, grad_weight, grad_bias
  else:
  return grad_input, grad_weight
+
+
+class Bilinear(Function):
+
+ def forward(self, input1, input2, weight, bias=None):
+ self.save_for_backward(input1, input2, weight, bias)
+
+ output = input1.new(input1.size(0), weight.size(0))
+
+ buff = input1.new()
+
+ # compute output scores:
+ for k, w in enumerate(weight):
+ torch.mm(input1, w, out=buff)
+ buff.mul_(input2)
+ torch.sum(buff, 1, out=output.narrow(1, k, 1))
+
+ if bias is not None:
+ output.add_(bias.expand_as(output))
+
+ return output
+
+ def backward(self, grad_output):
+ input1, input2, weight, bias = self.saved_tensors
+ grad_input1 = grad_input2 = grad_weight = grad_bias = None
+
+ buff = input1.new()
+
+ if self.needs_input_grad[0] or self.needs_input_grad[1]:
+ grad_input1 = torch.mm(input2, weight[0].t())
+ grad_input1.mul_(grad_output.narrow(1, 0, 1).expand(grad_input1.size()))
+ grad_input2 = torch.mm(input1, weight[0])
+ grad_input2.mul_(grad_output.narrow(1, 0, 1).expand(grad_input2.size()))
+
+ for k in range(1, weight.size(0)):
+ torch.mm(input2, weight[k].t(), out=buff)
+ buff.mul_(grad_output.narrow(1, k, 1).expand(grad_input1.size()))
+ grad_input1.add_(buff)
+
+ torch.mm(input1, weight[k], out=buff)
+ buff.mul_(grad_output.narrow(1, k, 1).expand(grad_input2.size()))
+ grad_input2.add_(buff)
+
+ if self.needs_input_grad[2]:
+ # accumulate parameter gradients:
+ for k in range(weight.size(0)):
+ torch.mul(input1, grad_output.narrow(1, k, 1).expand_as(input1), out=buff)
+ grad_weight = torch.mm(buff.t(), input2)
+
+ if bias is not None and self.needs_input_grad[3]:
+ grad_bias = grad_output.sum(0)
+
+ return grad_input1, grad_input2, grad_weight, grad_bias
@@ -449,6 +449,14 @@ def linear(input, weight, bias=None):
  return state(input, weight) if bias is None else state(input, weight, bias)
 
 
+def bilinear(input1, input2, weight, bias=None):
+ state = _functions.linear.Bilinear()
+ if bias is None:
+ return state(input1, input2, weight)
+ else:
+ return state(input1, input2, weight, bias)
+
+
 def batch_norm(input, running_mean, running_var, weight=None, bias=None,
  training=False, momentum=0.1, eps=1e-5):
  f = torch._C._functions.BatchNorm(running_mean, running_var, training, momentum, eps, torch.backends.cudnn.enabled)
 
@@ -1,5 +1,5 @@
 from .module import Module
-from .linear import Linear
+from .linear import Linear, Bilinear
 from .conv import Conv1d, Conv2d, Conv3d, \
  ConvTranspose1d, ConvTranspose2d, ConvTranspose3d
 from .activation import Threshold, ReLU, Hardtanh, ReLU6, Sigmoid, Tanh, \
@@ -43,5 +43,5 @@
  'Embedding', 'RNNBase', 'RNN', 'LSTM', 'GRU', 'RNNCell', 'LSTMCell', 'GRUCell',
  'PixelShuffle', 'UpsamplingNearest2d', 'UpsamplingBilinear2d', 'PairwiseDistance',
  'AdaptiveMaxPool1d', 'AdaptiveMaxPool2d', 'AdaptiveAvgPool1d', 'AdaptiveAvgPool2d',
- 'TripletMarginLoss', 'ZeroPad2d', 'ConstantPad2d'
+ 'TripletMarginLoss', 'ZeroPad2d', 'ConstantPad2d', 'Bilinear',
 ]
@@ -2,7 +2,7 @@
 
 import torch
 from torch.nn.parameter import Parameter
-
+from .. import functional as F
 from .module import Module
 
 
@@ -59,5 +59,58 @@ def __repr__(self):
  + str(self.out_features) + ')'
 
 
-# TODO: Bilinear
+class Bilinear(Module):
+ r"""Applies a bilinear transformation to the incoming data: :math:`y = x_1 * A * x_2 + b`
+
+ Args:
+ in_features1: size of each first input sample
+ in_features2: size of each second input sample
+ out_features: size of each output sample
+ bias: If set to False, the layer will not learn an additive bias. Default: True
+
+ Shape:
+ - Input: :math:`(N, in\_features1)`, :math:`(N, in\_features2)`
+ - Output: :math:`(N, out\_features)`
+
+ Attributes:
+ weight: the learnable weights of the module of shape (out_features x in_features1 x in_features2)
+ bias: the learnable bias of the module of shape (out_features)
+
+ Examples::
+
+ >>> m = nn.Bilinear(20, 30, 40)
+ >>> input1 = autograd.Variable(torch.randn(128, 20))
+ >>> input1 = autograd.Variable(torch.randn(128, 30))
+ >>> output = m(input1, input2)
+ >>> print(output.size())
+ """
+
+ def __init__(self, in1_features, in2_features, out_features, bias=True):
+ super(Bilinear, self).__init__()
+ self.in1_features = in1_features
+ self.in2_features = in2_features
+ self.out_features = out_features
+ self.weight = Parameter(torch.Tensor(out_features, in1_features, in2_features))
+
+ if bias:
+ self.bias = Parameter(torch.Tensor(out_features))
+ else:
+ self.register_parameter('bias', None)
+ self.reset_parameters()
+
+ def reset_parameters(self):
+ stdv = 1. / math.sqrt(self.weight.size(1))
+ self.weight.data.uniform_(-stdv, stdv)
+ if self.bias is not None:
+ self.bias.data.uniform_(-stdv, stdv)
+
+ def forward(self, input1, input2):
+ return F.bilinear(input1, input2, self.weight, self.bias)
+
+ def __repr__(self):
+ return self.__class__.__name__ + ' (' \
+ + 'in1_features=' + str(self.in1_features) \
+ + ', in2_features=' + str(self.in2_features) \
+ + ', out_features=' + str(self.out_features) + ')'
+
 # TODO: PartialLinear - maybe in sparse?