mattmacy
diff --git a/‎test/common_nn.py‎
Lines changed: 7 additions & 0 deletions b/‎test/common_nn.py‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎test/test_nn.py‎
Lines changed: 3 additions & 3 deletions b/‎test/test_nn.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎torch/nn/functions/linear.py‎
Lines changed: 13 additions & 9 deletions b/‎torch/nn/functions/linear.py‎
Lines changed: 13 additions & 9 deletions
diff --git a/‎torch/nn/modules/conv.py‎
Lines changed: 6 additions & 6 deletions b/‎torch/nn/modules/conv.py‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎torch/nn/modules/linear.py‎
Lines changed: 9 additions & 4 deletions b/‎torch/nn/modules/linear.py‎
Lines changed: 9 additions & 4 deletions
diff --git a/‎torch/nn/modules/module.py‎
Lines changed: 5 additions & 5 deletions b/‎torch/nn/modules/module.py‎
Lines changed: 5 additions & 5 deletions
@@ -24,6 +24,13 @@
  input_size=(4, 10),
  reference_fn=lambda i,p: torch.mm(i, p[0].t()) + p[1].view(1, -1).expand(4, 8)
  ),
+ dict(
+ module_name='Linear',
+ constructor_args=(10, 8, False),
+ input_size=(4, 10),
+ desc='no_bias',
+ reference_fn=lambda i,p: torch.mm(i, p[0].t())
+ ),
  dict(
  module_name='Threshold',
  constructor_args=(2, 1),
 
@@ -498,7 +498,7 @@ def test_data_parallel(self):
  def test_parameter_dict(self):
  l = nn.Linear(5, 5)
  block = nn.Container(
- conv=nn.Conv2d(3, 3, 3, no_bias=True)
+ conv=nn.Conv2d(3, 3, 3, bias=False)
  )
  net = nn.Container(
  linear1=l,
@@ -530,7 +530,7 @@ def test_parameter_dict(self):
  def test_load_parameter_dict(self):
  l = nn.Linear(5, 5)
  block = nn.Container(
- conv=nn.Conv2d(3, 3, 3, no_bias=True)
+ conv=nn.Conv2d(3, 3, 3, bias=False)
  )
  net = nn.Container(
  linear1=l,
@@ -606,7 +606,7 @@ def add_test(test):
  ),
  dict(
  module_name='Conv2d',
- constructor_args=(3, 4, (3, 3), 1, 0, None, 1, True),
+ constructor_args=(3, 4, (3, 3), 1, 0, None, 1, False),
  input_size=(2, 3, 6, 6),
  desc='no_bias',
  ),
 
@@ -24,13 +24,17 @@ def backward(self, grad_output):
  bias = None
  else:
  input, weight, bias = tensors
- grad_tuple = (
- torch.mm(grad_output, weight) if \
- self.needs_input_grad[0] else None,
- torch.mm(grad_output.t(), input) if \
- self.needs_input_grad[1] else None,
- torch.mv(grad_output.t(), self.add_buffer) if \
- bias is not None and self.needs_input_grad[2] else None,
- )
- return grad_tuple
+
+ grad_input = grad_weight = grad_bias = None
+ if self.needs_input_grad[0]:
+ grad_input = torch.mm(grad_output, weight)
+ if self.needs_input_grad[1]:
+ grad_weight = torch.mm(grad_output.t(), input)
+ if bias is not None and self.needs_input_grad[2]:
+ grad_bias = torch.mv(grad_output.t(), self.add_buffer)
+
+ if bias is not None:
+ return grad_input, grad_weight, grad_bias
+ else:
+ return grad_input, grad_weight
 
@@ -91,7 +91,7 @@ class Conv2d(Module):
  stride: the stride of the convolving kernel. Can be a single number s or a tuple (sh x sw). Default: 1
  padding: implicit zero padding on the input. Can be a single number s or a tuple. Default: 0
  dilation: If given, will do dilated (or atrous) convolutions. Can be a single number s or a tuple. Default: None
- no_bias: If set to true, the layer will not learn an additive bias. Default: False
+ bias: If set to False, the layer will not learn an additive bias. Default: True
  Input Shape: [ * , in_channels , * , * ] : Input is minibatch x in_channels x iH x iW
  Output Shape:[ * , out_channels , * , * ] : Output shape is precisely minibatch x out_channels x floor((iH + 2*padH - kH) / dH + 1) x floor((iW + 2*padW - kW) / dW + 1)
  Members:
@@ -108,7 +108,7 @@ class Conv2d(Module):
  >>> output = m(input)
  """
  def __init__(self, in_channels, out_channels, kernel_size, stride=1,
- padding=0, dilation=None, groups=1, no_bias=False):
+ padding=0, dilation=None, groups=1, bias=True):
  self.in_channels = in_channels
  self.out_channels = out_channels
  self.kh, self.kw = _pair(kernel_size)
@@ -121,7 +121,7 @@ def __init__(self, in_channels, out_channels, kernel_size, stride=1,
 
  weight = torch.Tensor(self.out_channels, self.in_channels, self.kh,
  self.kw)
- bias = None if no_bias else torch.Tensor(self.out_channels)
+ bias = torch.Tensor(self.out_channels) if bias else None
  super(Conv2d, self).__init__(
  weight=weight,
  bias=bias,
@@ -166,7 +166,7 @@ class FullConv2d(Conv2d):
  stride: the stride of the convolving kernel. Can be a single number or a tuple (sh x sw). Default: 1
  padding: implicit zero padding on the input. Can be a single number or a tuple. Default: 0
  output_padding: A padding of 0 or 1 pixels that should be added to the output. Can be a single number or a tuple. Default: 0
- no_bias: If set to true, the layer will not learn an additive bias. Default: False
+ bias: If set to False, the layer will not learn an additive bias. Default: True
  Input Shape: [ * , in_channels , * , * ] : Input is minibatch x in_channels x iH x iW
  Output Shape:[ * , out_channels , * , * ] : Output shape is precisely minibatch x out_channels x (iH - 1) * sH - 2*padH + kH + output_paddingH x (iW - 1) * sW - 2*padW + kW
  Members:
@@ -181,9 +181,9 @@ class FullConv2d(Conv2d):
  >>> output = m(input)
  """
  def __init__(self, in_channels, out_channels, kernel_size, stride=1,
- padding=0, output_padding=0, no_bias=False):
+ padding=0, output_padding=0, bias=True):
  super(FullConv2d, self).__init__(in_channels, out_channels, kernel_size,
- stride, padding, no_bias)
+ stride, padding, bias)
  self.out_padh, self.out_padw = _pair(output_padding)
 
  def forward(self, input):
 
@@ -14,6 +14,7 @@ class Linear(Module):
  Args:
  in_features: size of each input sample
  out_features: size of each output sample
+ bias: If set to False, the layer will not learn an additive bias. Default: True
  Input Shape: [*, in_features] : Input can be of shape minibatch x in_features
  Output Shape:[*, out_features] : Output is of shape minibatch x out_features
  Members:
@@ -25,23 +26,27 @@ class Linear(Module):
  >>> output = m(input)
  >>> print(output.size())
  """
- def __init__(self, in_features, out_features):
+ def __init__(self, in_features, out_features, bias=True):
  self.in_features = in_features
  self.out_features = out_features
 
  super(Linear, self).__init__(
  weight=torch.Tensor(out_features, in_features),
- bias=torch.Tensor(out_features)
+ bias=torch.Tensor(out_features) if bias else None
  )
  self.reset_parameters()
 
  def reset_parameters(self):
  stdv = 1./math.sqrt(self.weight.size(1))
  self.weight.data.uniform_(-stdv, stdv)
- self.bias.data.uniform_(-stdv, stdv)
+ if self.bias is not None:
+ self.bias.data.uniform_(-stdv, stdv)
 
  def forward(self, input):
- return self._backend.Linear()(input, self.weight, self.bias)
+ if self.bias is None:
+ return self._backend.Linear()(input, self.weight)
+ else:
+ return self._backend.Linear()(input, self.weight, self.bias)
 
 
 # TODO: Bilinear
 
@@ -73,12 +73,12 @@ def __call__(self, *input):
  result = self.forward(*input)
  for hook in self.forward_hooks.values():
  hook(self, input, result)
- if isinstance(result, tuple):
-  fn = result[0].creator
- else:
-  fn = result.creator
+ var = result
+ while not isinstance(var, Variable):
+  var= var[0]
+ creator = var.creator
  for key, hook in self.backward_hooks.items():
- fn.register_hook(key, lambda gi,go,hook=hook: hook(self, gi, go))
+ creator.register_hook(key, lambda gi,go,hook=hook: hook(self, gi, go))
  return result
 
  def __getattr__(self, name):