alok
diff --git a/‎dqn/args.py‎
Lines changed: 1 addition & 9 deletions b/‎dqn/args.py‎
Lines changed: 1 addition & 9 deletions
diff --git a/‎dqn/dqn.py‎
Lines changed: 4 additions & 4 deletions b/‎dqn/dqn.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎dqn/exploration.py‎
Lines changed: 0 additions & 1 deletion b/‎dqn/exploration.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎dqn/main.py‎
Lines changed: 0 additions & 3 deletions b/‎dqn/main.py‎
Lines changed: 0 additions & 3 deletions
diff --git a/‎dqn/now‎
Lines changed: 0 additions & 1 deletion b/‎dqn/now‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎dqn/replay_buffer.py‎
Lines changed: 0 additions & 3 deletions b/‎dqn/replay_buffer.py‎
Lines changed: 0 additions & 3 deletions
diff --git a/‎dqn/train.py‎
Lines changed: 0 additions & 4 deletions b/‎dqn/train.py‎
Lines changed: 0 additions & 4 deletions
diff --git a/‎xor_lstm/xor_lstm.py‎
Lines changed: 5 additions & 4 deletions b/‎xor_lstm/xor_lstm.py‎
Lines changed: 5 additions & 4 deletions
@@ -3,22 +3,14 @@
 
 import argparse
 
+
 parser = argparse.ArgumentParser()
 
 parser.add_argument("-b", "--batch_size", type=int, default=128)
-
 parser.add_argument("-r", "--replay_buffer_size", type=int, default=10 ** 4)
-
-
 parser.add_argument("-i", "--iterations", type=int, default=10 ** 3)
-
 parser.add_argument("-d", "--discount_rate", "--gamma", type=float, default=0.999)
-
 parser.add_argument("-e", "--exploration_rate", "--epsilon", type=float, default=0.9)
-
 parser.add_argument("-l", "--lr", "--learning_rate", type=float, default=1e-7)
 
 args = parser.parse_args()
-
-if __name__ == "__main__":
- pass
@@ -2,7 +2,6 @@
 # -*- coding: utf-8 -*-
 
 import random
-from copy import deepcopy
 
 import gym
 import numpy as np
@@ -50,10 +49,11 @@ def __init__(self, env):
 
  S = self.state_size = int(np.product(env.observation_space.shape))
  A = self.action_size = env.action_space.n
+ H = 50
 
- self.fc1 = nn.Linear(S, 50)
- self.fc2 = nn.Linear(50, 50)
- self.fc3 = nn.Linear(50, A)
+ self.fc1 = nn.Linear(S, H)
+ self.fc2 = nn.Linear(H, H)
+ self.fc3 = nn.Linear(H, A)
 
  self.loss = nn.functional.mse_loss
  self.opt = torch.optim.Adam(self.parameters())
 
@@ -34,5 +34,4 @@ def decay_exploration(i, epsilon=epsilon):
 
 
 if __name__ == "__main__":
- pass
  print(epsilon_greedy(env.observation_space.sample()))
@@ -8,10 +8,7 @@
 import torchvision.datasets as dset
 import torchvision.transforms as transforms
 from torch import nn
-from torch.autograd import Variable as V
-from torch.nn import Parameter as P
 from torch.utils.data import DataLoader
-from torch.autograd import Variable
 
 from args import args
 from env import env
 
@@ -5,6 +5,3 @@
 
 REPLAY_SIZE = 10 ** 6
 replay_buffer = deque(maxlen=REPLAY_SIZE)
-
-if __name__ == "__main__":
- pass
@@ -67,7 +67,3 @@ def train(buffer, Q):
 
  states.volatile, td_estimates.volatile = False, False
  return states, td_estimates
-
-
-if __name__ == "__main__":
- pass
@@ -9,10 +9,11 @@
 import torch
 import torch.nn.functional as F
 import torch.optim as optim
-from torch import Tensor, nn
+from torch import nn
 from torch.autograd import Variable
 from torch.nn import Linear, ReLU, Softmax
 
+
 CUDA_AVAILABLE = torch.cuda.is_available()
 
 N = NUM_SAMPLES = 100_000
@@ -25,7 +26,7 @@
 
 
 def foldr(arr: np.ndarray, op) -> np.ndarray:
- """Specific version of foldr that's only for Numpy arrays"""
+ """Specific version of foldr that's only for Numpy arrays."""
 
  return np.fromiter(itertools.accumulate(arr, op), dtype=np.float32, count=len(arr))
 
@@ -75,8 +76,8 @@ def argmax(tensor, dim=1):
 # Hack to check if we've already trained a model (assumed to be a good one.
 model_path = Path(f"model-{OP.__name__}" + ("cuda" if CUDA_AVAILABLE else "") + ".pth")
 
-test_mode = model_path.exists()
-train_mode = not test_mode
+test_mode: bool = model_path.exists()
+train_mode: bool = not test_mode
 
 if test_mode:
  model = torch.load(model_path)
Original file line number	Diff line number	Diff line change
`@@ -34,5 +34,4 @@ def decay_exploration(i, epsilon=epsilon):`
`34`	`34`
`35`	`35`
`36`	`36`	`if __name__ == "__main__":`
`37`		`- pass`
`38`	`37`	`print(epsilon_greedy(env.observation_space.sample()))`