- Notifications
You must be signed in to change notification settings - Fork 14
Open
Description
Hi, I know it has been a year since is has been done but I am not sure if you can help me. When using implicit calls, I get the following issue during training after calling the loss.backward() function.
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.cuda.FloatTensor [1000, 1]], which is output 0 of NormBackward1, is at version 1; expected version 0 instead. Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck!
I basically just grabbed the VGG19 model off pytorch and convert it. ResNet-18 have the same issue.
import torch import torchvision import torchvision.models as models import torchvision.transforms as transforms import torch.nn as nn import torch.nn.functional as F import torch.optim as optim model = models.vgg19() from bnn import BConfig, prepare_binary_model # Import a few examples of quantizers from bnn.ops import * # Define the binarization configuration and assign it to the model bconfig = BConfig( activation_pre_process = BasicInputBinarizer, activation_post_process = BasicScaleBinarizer, # optionally, one can pass certain custom variables weight_pre_process = XNORWeightBinarizer.with_args(center_weights=True) ) # Convert the model appropiately, propagating the changes from parent node to leafs # The custom_config_layers_name syntax will perform a match based on the layer name, setting a custom quantization function. bmodel = prepare_binary_model(model, bconfig, custom_config_layers_name=[{'conv1' : BConfig()}]) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(bmodel.parameters(), lr=0.001, momentum=0.9) print("Training begin!") # Select GPU 4 as execution device device = torch.device("cuda:4" if torch.cuda.is_available() else "cpu") print("The model will be running on", device, "device") # Convert model parameters and buffers to CPU or Cuda bmodel.to(device) save_path = './models/vgg19.pth' bestaccuracy = 0.0 #break_epoch = 0 t_begin = time() for epoch in range(50): # loop over the dataset multiple times running_loss = 0.0 break_epoch = epoch + 1 correct = 0 total = 0 for i, data in enumerate(trainloader, 0): # get the inputs; data is a list of [inputs, labels] inputs, labels = data inputs, labels = inputs.cuda(), labels.cuda() # zero the parameter gradients optimizer.zero_grad() #print(inputs.size(1)) # forward + backward + optimize outputs = bmodel(inputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() # check for correct answer _, predictions = torch.max(outputs, 1) total += labels.size(0) correct += (predictions == labels).sum().item() # print statistics running_loss += loss.item() if i % 50 == 49: # print every 50 mini-batches print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 50:.3f}') running_loss = 0.0 #calculate accurary of epoch accuracy = 100 * correct / total print(f'Epoch {epoch + 1} accuracy: {accuracy:.3f}') #If accuracy is better than the last, save the model if accuracy > bestaccuracy: torch.save(bmodel.state_dict(), save_path) bestaccuracy = accuracy time_taken = int(time()-t_begin) time_min = int(time_taken/60) time_sec = time_taken - (time_min*60) print(f'Finished Training! Best accuracy: {bestaccuracy:.3f} - Training time (mm:ss): {time_min}:{time_sec}')
dkhonker and Neltherion
Metadata
Metadata
Assignees
Labels
No labels