boyu-ai
diff --git a/‎bin/interactive.py‎
Lines changed: 1 addition & 1 deletion b/‎bin/interactive.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎multiagent/environment.py‎
Lines changed: 6 additions & 12 deletions b/‎multiagent/environment.py‎
Lines changed: 6 additions & 12 deletions
diff --git a/‎multiagent/policy.py‎
Lines changed: 13 additions & 8 deletions b/‎multiagent/policy.py‎
Lines changed: 13 additions & 8 deletions
@@ -18,7 +18,7 @@
  # create world
  world = scenario.make_world()
  # create multiagent environment
- env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation, shared_viewer = False)
+ env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation, info_callback=None, shared_viewer = False)
  # render call to create viewer window (necessary only for interactive policies)
  env.render()
  # create interactive policies for each agent
 
@@ -4,10 +4,8 @@
 import numpy as np
 import tensorflow as tf
 
-# TODO: make description of class?
-
 # environment for all agents in the multiagent world
-# TODO: currently code assumes that no agents will be created/destroyed at runtime!
+# currently code assumes that no agents will be created/destroyed at runtime!
 class MultiAgentEnv(gym.Env):
  metadata = {
  'render.modes' : ['human', 'rgb_array']
@@ -94,6 +92,7 @@ def _step(self, action_n):
  obs_n.append(self._get_obs(agent))
  reward_n.append(self._get_reward(agent))
  done_n.append(False)
+
  info_n['n'].append(self._get_info(agent))
 
  # all agents get total reward in cooperative case
@@ -146,13 +145,12 @@ def _set_action(self, action, agent, action_space, time=None):
  act.append(action[index:(index+s)])
  index += s
  action = act
- #else:
- # action = [action] # TODO: why is this necessary??
+ else:
+ action = [action] # TODO: why is this necessary??
 
  if agent.movable:
  # physical action
  if self.discrete_action_input:
- print(action)
  agent.action.u = np.zeros(self.world.dim_p)
  # process discrete action
  if action[0] == 1: agent.action.u[0] = -1.0
@@ -190,7 +188,6 @@ def _reset_render(self):
 
  # render environment
  def _render(self, mode='human', close=True):
- # TODO: render text in viewer instead
  if mode == 'human':
  alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
  message = ''
@@ -231,10 +228,7 @@ def _render(self, mode='human', close=True):
  for entity in self.world.entities:
  geom = rendering.make_circle(entity.size)
  xform = rendering.Transform()
- if 'forest' in entity.name:
- print(entity.color)
- geom.set_color(*entity.color, alpha=0.5)
- elif 'agent' in entity.name:
+ if 'agent' in entity.name:
  geom.set_color(*entity.color, alpha=0.5)
  else:
  geom.set_color(*entity.color)
@@ -252,7 +246,7 @@ def _render(self, mode='human', close=True):
  for i in range(len(self.viewers)):
  from multiagent import rendering
  # update bounds to center around agent
- cam_range = 1.2
+ cam_range = 1
  if self.shared_viewer:
  pos = np.zeros(self.world.dim_p)
  else:
 
@@ -29,15 +29,20 @@ def action(self, obs):
  if self.move[2]: u = 4
  if self.move[3]: u = 3
  else:
- u = np.array([0.0,0.0,0.0,0.0])
- if self.move[0]: u[0] += 1.0
- if self.move[1]: u[1] += 1.0
- if self.move[2]: u[2] += 1.0
+ u = np.array([0.0,0.0,0.0,0.0,0.0]) # 5-d because of no-move action
+ if self.move[0]: u[1] += 1.0
+ if self.move[1]: u[2] += 1.0
  if self.move[3]: u[3] += 1.0
- c = 0
- for i in range(len(self.comm)):
- if self.comm[i]: c = i+1
- return [u, c]
+ if self.move[2]: u[4] += 1.0
+ if True not in self.move:
+ u[0] += 1.0
+ if self.env.world.dim_c == 0:
+ return u
+ else:
+ c = 0
+ for i in range(len(self.comm)):
+ if self.comm[i]: c = i+1
+ return [u, c]
 
  # keyborad event callbacks
  def key_press(self, k, mod):