cu-rie
diff --git a/‎README.md‎
Lines changed: 1 addition & 1 deletion b/‎README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎multiagent/environment.py‎
Lines changed: 4 additions & 3 deletions b/‎multiagent/environment.py‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎multiagent/multi_discrete.py‎
Lines changed: 44 additions & 0 deletions b/‎multiagent/multi_discrete.py‎
Lines changed: 44 additions & 0 deletions
diff --git a/‎multiagent/scenarios/simple.py‎
Lines changed: 1 addition & 1 deletion b/‎multiagent/scenarios/simple.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎multiagent/scenarios/simple_adversary.py‎
Lines changed: 0 additions & 1 deletion b/‎multiagent/scenarios/simple_adversary.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎multiagent/scenarios/simple_crypto.py‎
Lines changed: 0 additions & 2 deletions b/‎multiagent/scenarios/simple_crypto.py‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎multiagent/scenarios/simple_push.py‎
Lines changed: 0 additions & 8 deletions b/‎multiagent/scenarios/simple_push.py‎
Lines changed: 0 additions & 8 deletions
diff --git a/‎multiagent/scenarios/simple_reference.py‎
Lines changed: 5 additions & 14 deletions b/‎multiagent/scenarios/simple_reference.py‎
Lines changed: 5 additions & 14 deletions
diff --git a/‎multiagent/scenarios/simple_speaker_listener.py‎
Lines changed: 2 additions & 1 deletion b/‎multiagent/scenarios/simple_speaker_listener.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎multiagent/scenarios/simple_spread.py‎
Lines changed: 1 addition & 0 deletions b/‎multiagent/scenarios/simple_spread.py‎
Lines changed: 1 addition & 0 deletions
@@ -10,7 +10,7 @@ Used in the paper [Multi-Agent Actor-Critic for Mixed Cooperative-Competitive En
 - To interactively view moving to landmark scenario (see others in ./scenarios/):
 `bin/interactive.py --scenario simple.py`
 
-- Known dependencies: OpenAI gym version >=0.10, numpy
+- Known dependencies: Python (3.5.4), OpenAI gym (0.10.5), numpy (1.14.5)
 
 - To use the environments, look at the code for importing them in `make_env.py`.
 
 
@@ -2,6 +2,7 @@
 from gym import spaces
 from gym.envs.registration import EnvSpec
 import numpy as np
+from multiagent.multi_discrete import MultiDiscrete
 
 # environment for all agents in the multiagent world
 # currently code assumes that no agents will be created/destroyed at runtime!
@@ -31,7 +32,7 @@ def __init__(self, world, reset_callback=None, reward_callback=None,
  # if true, even the action is continuous, action will be performed discretely
  self.force_discrete_action = world.discrete_action if hasattr(world, 'discrete_action') else False
  # if true, every agent has the same reward
- self.shared_reward = False
+ self.shared_reward = world.collaborative if hasattr(world, 'collaborative') else False
  self.time = 0
 
  # configure spaces
@@ -57,7 +58,7 @@ def __init__(self, world, reset_callback=None, reward_callback=None,
  if len(total_action_space) > 1:
  # all action spaces are discrete, so simplify to MultiDiscrete action space
  if all([isinstance(act_space, spaces.Discrete) for act_space in total_action_space]):
- act_space = spaces.MultiDiscrete([[0,act_space.n-1] for act_space in total_action_space])
+ act_space = MultiDiscrete([[0, act_space.n - 1] for act_space in total_action_space])
  else:
  act_space = spaces.Tuple(total_action_space)
  self.action_space.append(act_space)
@@ -144,7 +145,7 @@ def _set_action(self, action, agent, action_space, time=None):
  agent.action.u = np.zeros(self.world.dim_p)
  agent.action.c = np.zeros(self.world.dim_c)
  # process action
- if isinstance(action_space, spaces.MultiDiscrete):
+ if isinstance(action_space, MultiDiscrete):
  act = []
  size = action_space.high - action_space.low + 1
  index = 0
 
@@ -0,0 +1,44 @@
+# An old version of OpenAI Gym's multi_discrete.py. (Was getting affected by Gym updates)
+# (https://github.com/openai/gym/blob/1fb81d4e3fb780ccf77fec731287ba07da35eb84/gym/spaces/multi_discrete.py)
+
+import numpy as np
+
+import gym
+from gym.spaces import prng
+
+class MultiDiscrete(gym.Space):
+ """
+ - The multi-discrete action space consists of a series of discrete action spaces with different parameters
+ - It can be adapted to both a Discrete action space or a continuous (Box) action space
+ - It is useful to represent game controllers or keyboards where each key can be represented as a discrete action space
+ - It is parametrized by passing an array of arrays containing [min, max] for each discrete action space
+ where the discrete action space can take any integers from `min` to `max` (both inclusive)
+ Note: A value of 0 always need to represent the NOOP action.
+ e.g. Nintendo Game Controller
+ - Can be conceptualized as 3 discrete action spaces:
+ 1) Arrow Keys: Discrete 5 - NOOP[0], UP[1], RIGHT[2], DOWN[3], LEFT[4] - params: min: 0, max: 4
+ 2) Button A: Discrete 2 - NOOP[0], Pressed[1] - params: min: 0, max: 1
+ 3) Button B: Discrete 2 - NOOP[0], Pressed[1] - params: min: 0, max: 1
+ - Can be initialized as
+ MultiDiscrete([ [0,4], [0,1], [0,1] ])
+ """
+ def __init__(self, array_of_param_array):
+ self.low = np.array([x[0] for x in array_of_param_array])
+ self.high = np.array([x[1] for x in array_of_param_array])
+ self.num_discrete_space = self.low.shape[0]
+
+ def sample(self):
+ """ Returns a array with one sample from each discrete action space """
+ # For each row: round(random .* (max - min) + min, 0)
+ random_array = prng.np_random.rand(self.num_discrete_space)
+ return [int(x) for x in np.floor(np.multiply((self.high - self.low + 1.), random_array) + self.low)]
+ def contains(self, x):
+ return len(x) == self.num_discrete_space and (np.array(x) >= self.low).all() and (np.array(x) <= self.high).all()
+
+ @property
+ def shape(self):
+ return self.num_discrete_space
+ def __repr__(self):
+ return "MultiDiscrete" + str(self.num_discrete_space)
+ def __eq__(self, other):
+ return np.array_equal(self.low, other.low) and np.array_equal(self.high, other.high)
@@ -40,7 +40,7 @@ def reset_world(self, world):
 
  def reward(self, agent, world):
  dist2 = np.sum(np.square(agent.state.p_pos - world.landmarks[0].state.p_pos))
- return -dist2 #np.exp(-dist2)
+ return -dist2
 
  def observation(self, agent, world):
  # get positions of all entities in this agent's reference frame
 
@@ -1,7 +1,6 @@
 import numpy as np
 from multiagent.core import World, Agent, Landmark
 from multiagent.scenario import BaseScenario
-import random
 
 
 class Scenario(BaseScenario):
 
@@ -127,8 +127,6 @@ def observation(self, agent, world):
  if agent.goal_a is not None:
  goal_color = agent.goal_a.color
 
- #print('goal color in obs is {}'.format(goal_color))
-
  # get positions of all entities in this agent's reference frame
  entity_pos = []
  for entity in world.landmarks:
 
@@ -1,12 +1,6 @@
 import numpy as np
 from multiagent.core import World, Agent, Landmark
 from multiagent.scenario import BaseScenario
-import random
-
-#
-# # the non-ensemble version of <ensemble_push>
-#
-#
 
 class Scenario(BaseScenario):
  def make_world(self):
@@ -26,8 +20,6 @@ def make_world(self):
  agent.adversary = True
  else:
  agent.adversary = False
- # agent.u_noise = 1e-1
- # agent.c_noise = 1e-1
  # add landmarks
  world.landmarks = [Landmark() for i in range(num_landmarks)]
  for i, landmark in enumerate(world.landmarks):
 
@@ -6,14 +6,13 @@ class Scenario(BaseScenario):
  def make_world(self):
  world = World()
  # set any world properties first
- world.dim_c = 10 
+ world.dim_c = 10
+ world.collaborative = True # whether agents share rewards
  # add agents
  world.agents = [Agent() for i in range(2)]
  for i, agent in enumerate(world.agents):
  agent.name = 'agent %d' % i
  agent.collide = False
- # agent.u_noise = 1e-1
- # agent.c_noise = 1e-1
  # add landmarks
  world.landmarks = [Landmark() for i in range(3)]
  for i, landmark in enumerate(world.landmarks):
@@ -57,29 +56,21 @@ def reward(self, agent, world):
  if agent.goal_a is None or agent.goal_b is None:
  return 0.0
  dist2 = np.sum(np.square(agent.goal_a.state.p_pos - agent.goal_b.state.p_pos))
- return -dist2 #np.exp(-dist2)
+ return -dist2
 
  def observation(self, agent, world):
- # goal positions
- # goal_pos = [np.zeros(world.dim_p), np.zeros(world.dim_p)]
- # if agent.goal_a is not None:
- # goal_pos[0] = agent.goal_a.state.p_pos - agent.state.p_pos
- # if agent.goal_b is not None:
- # goal_pos[1] = agent.goal_b.state.p_pos - agent.state.p_pos 
  # goal color
  goal_color = [np.zeros(world.dim_color), np.zeros(world.dim_color)]
- # if agent.goal_a is not None:
- # goal_color[0] = agent.goal_a.color
  if agent.goal_b is not None:
  goal_color[1] = agent.goal_b.color 
 
  # get positions of all entities in this agent's reference frame
  entity_pos = []
- for entity in world.landmarks: #world.entities:
+ for entity in world.landmarks:
  entity_pos.append(entity.state.p_pos - agent.state.p_pos)
  # entity colors
  entity_color = []
- for entity in world.landmarks: #world.entities:
+ for entity in world.landmarks:
  entity_color.append(entity.color)
  # communication of all other agents
  comm = []
 
@@ -8,6 +8,7 @@ def make_world(self):
  # set any world properties first
  world.dim_c = 3
  num_landmarks = 3
+ world.collaborative = True
  # add agents
  world.agents = [Agent() for i in range(2)]
  for i, agent in enumerate(world.agents):
@@ -57,7 +58,7 @@ def reset_world(self, world):
 
  def benchmark_data(self, agent, world):
  # returns data for benchmarking purposes
- return reward(agent, reward)
+ return self.reward(agent, reward)
 
  def reward(self, agent, world):
  # squared distance from listener to landmark
 
@@ -10,6 +10,7 @@ def make_world(self):
  world.dim_c = 2
  num_agents = 3
  num_landmarks = 3
+ world.collaborative = True
  # add agents
  world.agents = [Agent() for i in range(num_agents)]
  for i, agent in enumerate(world.agents):