arp95
diff --git a/‎code/__pycache__/td3.cpython-37.pyc‎
0 Bytes b/‎code/__pycache__/td3.cpython-37.pyc‎
0 Bytes
diff --git a/‎code/envs/__pycache__/robot_target_tracking_env.cpython-37.pyc‎
622 Bytes b/‎code/envs/__pycache__/robot_target_tracking_env.cpython-37.pyc‎
622 Bytes
diff --git a/‎code/envs/robot_target_tracking_env.py‎
Lines changed: 17 additions & 17 deletions b/‎code/envs/robot_target_tracking_env.py‎
Lines changed: 17 additions & 17 deletions
diff --git a/‎code/eval.py‎
Lines changed: 1 addition & 1 deletion b/‎code/eval.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎code/td3.py‎
Lines changed: 6 additions & 6 deletions b/‎code/td3.py‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎code/train.py‎
Lines changed: 4 additions & 4 deletions b/‎code/train.py‎
Lines changed: 4 additions & 4 deletions
@@ -48,7 +48,7 @@ def __init__(self):
  #self.model.to(self.device)
 
 
- def env_parametrization(self, num_targets=2, num_sensors=1, target_motion_omegas=None, meas_model='range'):
+ def env_parametrization(self, num_targets=4, num_sensors=1, target_motion_omegas=None, meas_model='range'):
  """ 
  Function for parametrizing the environment
  """
@@ -112,10 +112,10 @@ def env_parametrization(self, num_targets=2, num_sensors=1, target_motion_omegas
  self.y1_list.append(float(self.true_targets_pos[0, 1]))
  self.x2_list.append(float(self.true_targets_pos[1, 0]))
  self.y2_list.append(float(self.true_targets_pos[1, 1]))
- #self.x3_list.append(float(self.true_targets_pos[2, 0]))
- #self.y3_list.append(float(self.true_targets_pos[2, 1]))
- #self.x4_list.append(float(self.true_targets_pos[3, 0]))
- #self.y4_list.append(float(self.true_targets_pos[3, 1]))
+ self.x3_list.append(float(self.true_targets_pos[2, 0]))
+ self.y3_list.append(float(self.true_targets_pos[2, 1]))
+ self.x4_list.append(float(self.true_targets_pos[3, 0]))
+ self.y4_list.append(float(self.true_targets_pos[3, 1]))
 
  self.meas_model = meas_model
  if self.meas_model == 'bearing':
@@ -145,10 +145,10 @@ def step(self, action, step_size):
  self.y1_list.append(float(self.true_targets_pos[0, 1]))
  self.x2_list.append(float(self.true_targets_pos[1, 0]))
  self.y2_list.append(float(self.true_targets_pos[1, 1]))
- #self.x3_list.append(float(self.true_targets_pos[2, 0]))
- #self.y3_list.append(float(self.true_targets_pos[2, 1]))
- #self.x4_list.append(float(self.true_targets_pos[3, 0]))
- #self.y4_list.append(float(self.true_targets_pos[3, 1]))
+ self.x3_list.append(float(self.true_targets_pos[2, 0]))
+ self.y3_list.append(float(self.true_targets_pos[2, 1]))
+ self.x4_list.append(float(self.true_targets_pos[3, 0]))
+ self.y4_list.append(float(self.true_targets_pos[3, 1]))
 
  self.heatmap = torch.zeros(self.len_workspace, self.len_workspace)
  for index in range(0, self.num_targets):
@@ -218,10 +218,10 @@ def reset(self, **kwargs):
  self.y1_list.append(float(self.true_targets_pos[0, 1]))
  self.x2_list.append(float(self.true_targets_pos[1, 0]))
  self.y2_list.append(float(self.true_targets_pos[1, 1]))
- #self.x3_list.append(float(self.true_targets_pos[2, 0]))
- #self.y3_list.append(float(self.true_targets_pos[2, 1]))
- #self.x4_list.append(float(self.true_targets_pos[3, 0]))
- #self.y4_list.append(float(self.true_targets_pos[3, 1]))
+ self.x3_list.append(float(self.true_targets_pos[2, 0]))
+ self.y3_list.append(float(self.true_targets_pos[2, 1]))
+ self.x4_list.append(float(self.true_targets_pos[3, 0]))
+ self.y4_list.append(float(self.true_targets_pos[3, 1]))
 
  self.heatmap = torch.zeros(self.len_workspace, self.len_workspace)
  for index in range(0, self.num_targets):
@@ -273,10 +273,10 @@ def render(self):
  plt.plot(self.x1_list[len(self.x1_list) - 1], self.y1_list[len(self.y1_list) - 1], 'o', c='b', marker='*')
  plt.plot(self.x2_list, self.y2_list, 'b--')
  plt.plot(self.x2_list[len(self.x2_list) - 1], self.y2_list[len(self.y2_list) - 1], 'o', c='b', marker='*')
- #plt.plot(self.x3_list, self.y3_list, 'b--')
- #plt.plot(self.x3_list[len(self.x3_list) - 1], self.y3_list[len(self.y3_list) - 1], 'o', c='b', marker='*')
- #plt.plot(self.x4_list, self.y4_list, 'b--')
- #plt.plot(self.x4_list[len(self.x4_list) - 1], self.y4_list[len(self.y4_list) - 1], 'o', c='b', marker='*')
+ plt.plot(self.x3_list, self.y3_list, 'b--')
+ plt.plot(self.x3_list[len(self.x3_list) - 1], self.y3_list[len(self.y3_list) - 1], 'o', c='b', marker='*')
+ plt.plot(self.x4_list, self.y4_list, 'b--')
+ plt.plot(self.x4_list[len(self.x4_list) - 1], self.y4_list[len(self.y4_list) - 1], 'o', c='b', marker='*')
  if(len(self.robot_movement_x) < 8):
  plt.plot(self.robot_movement_x, self.robot_movement_y, 'r--')
  else:
 
@@ -37,7 +37,7 @@
 action_dim = env.action_space.shape[0]
 max_action = float(env.action_space.high[0])
 policy = TD3(0.0005, state_dim, 2, max_action)
-policy.load_actor("/home/arpitdec5/Desktop/robot_target_tracking/", "model_sensors_1_targets_2")
+policy.load_actor("/home/arpitdec5/Desktop/robot_target_tracking/", "model_sensors_1_targets_4")
 
 # eval loop
 state = env.reset()
 
@@ -15,9 +15,9 @@ class Actor(nn.Module):
  def __init__(self, state_dim, action_dim, max_action):
  super(Actor, self).__init__()
 
- self.l1 = nn.Linear(state_dim, 800)
- self.l2 = nn.Linear(800, 800)
- self.l3 = nn.Linear(800, action_dim)
+ self.l1 = nn.Linear(state_dim, 1024)
+ self.l2 = nn.Linear(1024, 1024)
+ self.l3 = nn.Linear(1024, action_dim)
 
  self.max_action = max_action
 
@@ -33,9 +33,9 @@ class Critic(nn.Module):
  def __init__(self, state_dim, action_dim):
  super(Critic, self).__init__()
 
- self.l1 = nn.Linear(state_dim + action_dim, 800)
- self.l2 = nn.Linear(800, 800)
- self.l3 = nn.Linear(800, action_dim)
+ self.l1 = nn.Linear(state_dim + action_dim, 1024)
+ self.l2 = nn.Linear(1024, 1024)
+ self.l3 = nn.Linear(1024, action_dim)
 
  def forward(self, state, action):
  q = F.relu(self.l1(torch.cat([state, action], 1)))
 
@@ -34,8 +34,8 @@
 
 # constants
 lr = 0.0005
-epochs = 2000
-iters = 300
+epochs = 4000
+iters = 600
 
 # create TD3 object
 state_dim = env.observation_space.shape[0]
@@ -73,7 +73,7 @@
 
  # save actor and critic models
  if(epoch > 1000 and epoch%10==0):
- policy.save("/home/arpitdec5/Desktop/robot_target_tracking/", "model_sensors_1_targets_2")
+ policy.save("/home/arpitdec5/Desktop/robot_target_tracking/", "model_sensors_1_targets_4")
 
  # print reward
  print()
@@ -101,4 +101,4 @@
 plt.plot(m_e, m_r, c='orange', label='Mean Reward')
 #plt.plot(g_e, g_r, c='red', label='Greedy Algorithm')
 plt.legend()
-plt.savefig("/home/arpitdec5/Desktop/robot_target_tracking/reward_sensors_1_targets_2.png")
+plt.savefig("/home/arpitdec5/Desktop/robot_target_tracking/reward_sensors_1_targets_4.png")