- Notifications
You must be signed in to change notification settings - Fork 3.8k
Open
Description
@ddbourgin Have an issue where updates to gradients cannot be performed since shapes conflict during backprop... specifically in the optimizer file.
Error reads:
C[param_name]["mean"] = d1 * mean + (1 - d1) * param_grad ValueError: operands could not be broadcast together with shapes (100,10) (3072,100) Model architecture is as follows:
Input -> n_samples, 3072
FC1 -> 3072, 100
FC2 -> 100, 10
The model code is as follows:
def _build_model(self): self.model = OrderedDict() self.model['fc1'] = FullyConnected(n_out=self.layers[0], act_fn=ReLU(), init=self.initializer, optimizer=self.optimizer) self.model['fc2'] = FullyConnected(n_out=self.layers[1], act_fn=Affine(slope=1, intercept=0), init=self.initializer, optimizer=self.optimizer) self.model['out'] = Softmax(dim=-1, optimizer=self.optimizer) @property def parameters(self): return {k: v.parameters for k, v in self.model.items()} @property def hyperparameters(self): return {k: v.hyperparameters for k, v in self.model.items()} @property def derived_variables(self): return {k: v.derived_variables for k, v in self.model.items()} @property def gradients(self): return {k: v.gradients for k, v in self.model.items()} def forward(self, x): out = x for k, v in self.model.items(): out = v.forward(out) return out def backward(self, y, y_pred): """Compute dLdy and then backprop through the layers in self.model""" dY_pred = self.loss.grad(y, y_pred) for k, v in reversed(list(self.model.items())): dY_pred = v.backward(dY_pred) self._dv['d' + k] = dY_pred return dY_pred def update(self, cur_loss): """Perform gradient updates""" for k, v in reversed(list(self.model.items())): v.update(cur_loss) self.flush_gradients() Hoping we can fix this and also create an example for people to follow. Thanks
Metadata
Metadata
Assignees
Labels
No labels