mwalton
diff --git a/‎bin/interactive.py‎
Lines changed: 5 additions & 5 deletions b/‎bin/interactive.py‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎multiagent/environment.py‎
Lines changed: 4 additions & 4 deletions b/‎multiagent/environment.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎multiagent/policy.py‎
Lines changed: 10 additions & 10 deletions b/‎multiagent/policy.py‎
Lines changed: 10 additions & 10 deletions
diff --git a/‎multiagent/scenarios/simple.py‎
Lines changed: 4 additions & 5 deletions b/‎multiagent/scenarios/simple.py‎
Lines changed: 4 additions & 5 deletions
diff --git a/‎setup.py‎
Lines changed: 1 addition & 1 deletion b/‎setup.py‎
Lines changed: 1 addition & 1 deletion
@@ -17,7 +17,7 @@
  scenario = scenarios.load(args.scenario).Scenario()
  # create world
  world = scenario.make_world()
- # create multiagent environment 
+ # create multiagent environment
  env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation, shared_viewer = False)
  # render call to create viewer window (necessary only for interactive policies)
  env.render()
@@ -33,7 +33,7 @@
  # step environment
  obs_n, reward_n, done_n, _ = env.step(act_n)
  # render all agent views
- env.render() 
- # display rewards 
- for agent in env.world.agents:
- print(agent.name + " reward: %0.3f" % env._get_reward(agent))
+ env.render()
+ # display rewards
+ #for agent in env.world.agents:
+ # print(agent.name + " reward: %0.3f" % env._get_reward(agent))
@@ -70,7 +70,6 @@ def __init__(self, world, reset_callback=None, reward_callback=None,
  self.observation_space.append(spaces.Box(low=-np.inf, high=+np.inf, shape=(obs_dim),))
  agent.action.c = np.zeros(self.world.dim_c)
 
-
  # rendering
  self.shared_viewer = shared_viewer
  if self.shared_viewer:
@@ -90,7 +89,7 @@ def _step(self, action_n):
  self._set_action(action_n[i], agent, self.action_space[i])
  # advance world state
  self.world.step()
- # record observation for each agent # TODO: clean up
+ # record observation for each agent
  for agent in self.agents:
  obs_n.append(self._get_obs(agent))
  reward_n.append(self._get_reward(agent))
@@ -147,12 +146,13 @@ def _set_action(self, action, agent, action_space, time=None):
  act.append(action[index:(index+s)])
  index += s
  action = act
- else:
- action = [action]
+ #else:
+ # action = [action] # TODO: why is this necessary??
 
  if agent.movable:
  # physical action
  if self.discrete_action_input:
+ print(action)
  agent.action.u = np.zeros(self.world.dim_p)
  # process discrete action
  if action[0] == 1: agent.action.u[0] = -1.0
 
@@ -13,33 +13,33 @@ class InteractivePolicy(Policy):
  def __init__(self, env, agent_index):
  super(InteractivePolicy, self).__init__()
  self.env = env
- # hard-coded keyborad events
+ # hard-coded keyboard events
  self.move = [False for i in range(4)]
  self.comm = [False for i in range(env.world.dim_c)]
- # register keyboard events with this envornment's window
+ # register keyboard events with this environment's window
  env.viewers[agent_index].window.on_key_press = self.key_press
  env.viewers[agent_index].window.on_key_release = self.key_release
 
  def action(self, obs):
  # ignore observation and just act based on keyboard events
- if self.env.discrete_action_space:
+ if self.env.discrete_action_input:
  u = 0
  if self.move[0]: u = 1
  if self.move[1]: u = 2
  if self.move[2]: u = 4
  if self.move[3]: u = 3
  else:
- u = np.array([0.0,0.0])
- if self.move[0]: u[0] -= 1.0
- if self.move[1]: u[0] += 1.0
- if self.move[2]: u[1] += 1.0
- if self.move[3]: u[1] -= 1.0
+ u = np.array([0.0,0.0,0.0,0.0])
+ if self.move[0]: u[0] += 1.0
+ if self.move[1]: u[1] += 1.0
+ if self.move[2]: u[2] += 1.0
+ if self.move[3]: u[3] += 1.0
  c = 0
  for i in range(len(self.comm)):
  if self.comm[i]: c = i+1
  return [u, c]
 
- # keyborad event callbacks 
+ # keyborad event callbacks
  def key_press(self, k, mod):
  if k==key.LEFT: self.move[0] = True
  if k==key.RIGHT: self.move[1] = True
@@ -53,4 +53,4 @@ def key_release(self, k, mod):
  if k==key.UP: self.move[2] = False
  if k==key.DOWN: self.move[3] = False
  for i in range(len(self.comm)):
- if k==key._1+i: self.comm[i] = False
+ if k==key._1+i: self.comm[i] = False
@@ -10,7 +10,7 @@ def make_world(self):
  for i, agent in enumerate(world.agents):
  agent.name = 'agent %d' % i
  agent.collide = False
- agent.silent = True 
+ agent.silent = True
  # add landmarks
  world.landmarks = [Landmark() for i in range(1)]
  for i, landmark in enumerate(world.landmarks):
@@ -24,11 +24,11 @@ def make_world(self):
  def reset_world(self, world):
  # random properties for agents
  for i, agent in enumerate(world.agents):
- agent.color = np.array([0.25,0.25,0.25]) 
+ agent.color = np.array([0.25,0.25,0.25])
  # random properties for landmarks
  for i, landmark in enumerate(world.landmarks):
  landmark.color = np.array([0.75,0.75,0.75])
- world.landmarks[0].color = np.array([0.75,0.25,0.25]) 
+ world.landmarks[0].color = np.array([0.75,0.25,0.25])
  # set random initial states
  for agent in world.agents:
  agent.state.p_pos = np.random.uniform(-1,+1, world.dim_p)
@@ -42,10 +42,9 @@ def reward(self, agent, world):
  dist2 = np.sum(np.square(agent.state.p_pos - world.landmarks[0].state.p_pos))
  return -dist2 #np.exp(-dist2)
 
- def observation(self, agent, world): 
+ def observation(self, agent, world):
  # get positions of all entities in this agent's reference frame
  entity_pos = []
  for entity in world.landmarks:
  entity_pos.append(entity.state.p_pos - agent.state.p_pos)
  return np.concatenate([agent.state.p_vel] + entity_pos)
- 
@@ -3,7 +3,7 @@
 setup(name='multiagent',
  version='0.0.1',
  description='Multi-Agent Goal-Driven Communication Environment',
- url='https://github.com/openai/multiagent',
+ url='https://github.com/openai/multiagent-public',
  author='Igor Mordatch',
  author_email='mordatch@openai.com',
  packages=find_packages(),