Skip to content

Commit 6ed7cac

Browse files
committed
Fixed bugs, now works with latest gym. Fixed shared reward bug. Cleaned up code
1 parent 17fd2f7 commit 6ed7cac

File tree

11 files changed

+64
-59
lines changed

11 files changed

+64
-59
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ Used in the paper [Multi-Agent Actor-Critic for Mixed Cooperative-Competitive En
1010
- To interactively view moving to landmark scenario (see others in ./scenarios/):
1111
`bin/interactive.py --scenario simple.py`
1212

13-
- Known dependencies: OpenAI gym version >=0.10, numpy
13+
- Known dependencies: Python (3.5.4), OpenAI gym (0.10.5), numpy (1.14.5)
1414

1515
- To use the environments, look at the code for importing them in `make_env.py`.
1616

multiagent/environment.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from gym import spaces
33
from gym.envs.registration import EnvSpec
44
import numpy as np
5+
from multiagent.multi_discrete import MultiDiscrete
56

67
# environment for all agents in the multiagent world
78
# currently code assumes that no agents will be created/destroyed at runtime!
@@ -31,7 +32,7 @@ def __init__(self, world, reset_callback=None, reward_callback=None,
3132
# if true, even the action is continuous, action will be performed discretely
3233
self.force_discrete_action = world.discrete_action if hasattr(world, 'discrete_action') else False
3334
# if true, every agent has the same reward
34-
self.shared_reward = False
35+
self.shared_reward = world.collaborative if hasattr(world, 'collaborative') else False
3536
self.time = 0
3637

3738
# configure spaces
@@ -57,7 +58,7 @@ def __init__(self, world, reset_callback=None, reward_callback=None,
5758
if len(total_action_space) > 1:
5859
# all action spaces are discrete, so simplify to MultiDiscrete action space
5960
if all([isinstance(act_space, spaces.Discrete) for act_space in total_action_space]):
60-
act_space = spaces.MultiDiscrete([[0,act_space.n-1] for act_space in total_action_space])
61+
act_space = MultiDiscrete([[0, act_space.n - 1] for act_space in total_action_space])
6162
else:
6263
act_space = spaces.Tuple(total_action_space)
6364
self.action_space.append(act_space)
@@ -144,7 +145,7 @@ def _set_action(self, action, agent, action_space, time=None):
144145
agent.action.u = np.zeros(self.world.dim_p)
145146
agent.action.c = np.zeros(self.world.dim_c)
146147
# process action
147-
if isinstance(action_space, spaces.MultiDiscrete):
148+
if isinstance(action_space, MultiDiscrete):
148149
act = []
149150
size = action_space.high - action_space.low + 1
150151
index = 0

multiagent/multi_discrete.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
# An old version of OpenAI Gym's multi_discrete.py. (Was getting affected by Gym updates)
2+
# (https://github.com/openai/gym/blob/1fb81d4e3fb780ccf77fec731287ba07da35eb84/gym/spaces/multi_discrete.py)
3+
4+
import numpy as np
5+
6+
import gym
7+
from gym.spaces import prng
8+
9+
class MultiDiscrete(gym.Space):
10+
"""
11+
- The multi-discrete action space consists of a series of discrete action spaces with different parameters
12+
- It can be adapted to both a Discrete action space or a continuous (Box) action space
13+
- It is useful to represent game controllers or keyboards where each key can be represented as a discrete action space
14+
- It is parametrized by passing an array of arrays containing [min, max] for each discrete action space
15+
where the discrete action space can take any integers from `min` to `max` (both inclusive)
16+
Note: A value of 0 always need to represent the NOOP action.
17+
e.g. Nintendo Game Controller
18+
- Can be conceptualized as 3 discrete action spaces:
19+
1) Arrow Keys: Discrete 5 - NOOP[0], UP[1], RIGHT[2], DOWN[3], LEFT[4] - params: min: 0, max: 4
20+
2) Button A: Discrete 2 - NOOP[0], Pressed[1] - params: min: 0, max: 1
21+
3) Button B: Discrete 2 - NOOP[0], Pressed[1] - params: min: 0, max: 1
22+
- Can be initialized as
23+
MultiDiscrete([ [0,4], [0,1], [0,1] ])
24+
"""
25+
def __init__(self, array_of_param_array):
26+
self.low = np.array([x[0] for x in array_of_param_array])
27+
self.high = np.array([x[1] for x in array_of_param_array])
28+
self.num_discrete_space = self.low.shape[0]
29+
30+
def sample(self):
31+
""" Returns a array with one sample from each discrete action space """
32+
# For each row: round(random .* (max - min) + min, 0)
33+
random_array = prng.np_random.rand(self.num_discrete_space)
34+
return [int(x) for x in np.floor(np.multiply((self.high - self.low + 1.), random_array) + self.low)]
35+
def contains(self, x):
36+
return len(x) == self.num_discrete_space and (np.array(x) >= self.low).all() and (np.array(x) <= self.high).all()
37+
38+
@property
39+
def shape(self):
40+
return self.num_discrete_space
41+
def __repr__(self):
42+
return "MultiDiscrete" + str(self.num_discrete_space)
43+
def __eq__(self, other):
44+
return np.array_equal(self.low, other.low) and np.array_equal(self.high, other.high)

multiagent/scenarios/simple.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ def reset_world(self, world):
4040

4141
def reward(self, agent, world):
4242
dist2 = np.sum(np.square(agent.state.p_pos - world.landmarks[0].state.p_pos))
43-
return -dist2 #np.exp(-dist2)
43+
return -dist2
4444

4545
def observation(self, agent, world):
4646
# get positions of all entities in this agent's reference frame

multiagent/scenarios/simple_adversary.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import numpy as np
22
from multiagent.core import World, Agent, Landmark
33
from multiagent.scenario import BaseScenario
4-
import random
54

65

76
class Scenario(BaseScenario):

multiagent/scenarios/simple_crypto.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -127,8 +127,6 @@ def observation(self, agent, world):
127127
if agent.goal_a is not None:
128128
goal_color = agent.goal_a.color
129129

130-
#print('goal color in obs is {}'.format(goal_color))
131-
132130
# get positions of all entities in this agent's reference frame
133131
entity_pos = []
134132
for entity in world.landmarks:

multiagent/scenarios/simple_push.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,6 @@
11
import numpy as np
22
from multiagent.core import World, Agent, Landmark
33
from multiagent.scenario import BaseScenario
4-
import random
5-
6-
#
7-
# # the non-ensemble version of <ensemble_push>
8-
#
9-
#
104

115
class Scenario(BaseScenario):
126
def make_world(self):
@@ -26,8 +20,6 @@ def make_world(self):
2620
agent.adversary = True
2721
else:
2822
agent.adversary = False
29-
# agent.u_noise = 1e-1
30-
# agent.c_noise = 1e-1
3123
# add landmarks
3224
world.landmarks = [Landmark() for i in range(num_landmarks)]
3325
for i, landmark in enumerate(world.landmarks):

multiagent/scenarios/simple_reference.py

Lines changed: 5 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,13 @@ class Scenario(BaseScenario):
66
def make_world(self):
77
world = World()
88
# set any world properties first
9-
world.dim_c = 10
9+
world.dim_c = 10
10+
world.collaborative = True # whether agents share rewards
1011
# add agents
1112
world.agents = [Agent() for i in range(2)]
1213
for i, agent in enumerate(world.agents):
1314
agent.name = 'agent %d' % i
1415
agent.collide = False
15-
# agent.u_noise = 1e-1
16-
# agent.c_noise = 1e-1
1716
# add landmarks
1817
world.landmarks = [Landmark() for i in range(3)]
1918
for i, landmark in enumerate(world.landmarks):
@@ -57,29 +56,21 @@ def reward(self, agent, world):
5756
if agent.goal_a is None or agent.goal_b is None:
5857
return 0.0
5958
dist2 = np.sum(np.square(agent.goal_a.state.p_pos - agent.goal_b.state.p_pos))
60-
return -dist2 #np.exp(-dist2)
59+
return -dist2
6160

6261
def observation(self, agent, world):
63-
# goal positions
64-
# goal_pos = [np.zeros(world.dim_p), np.zeros(world.dim_p)]
65-
# if agent.goal_a is not None:
66-
# goal_pos[0] = agent.goal_a.state.p_pos - agent.state.p_pos
67-
# if agent.goal_b is not None:
68-
# goal_pos[1] = agent.goal_b.state.p_pos - agent.state.p_pos
6962
# goal color
7063
goal_color = [np.zeros(world.dim_color), np.zeros(world.dim_color)]
71-
# if agent.goal_a is not None:
72-
# goal_color[0] = agent.goal_a.color
7364
if agent.goal_b is not None:
7465
goal_color[1] = agent.goal_b.color
7566

7667
# get positions of all entities in this agent's reference frame
7768
entity_pos = []
78-
for entity in world.landmarks: #world.entities:
69+
for entity in world.landmarks:
7970
entity_pos.append(entity.state.p_pos - agent.state.p_pos)
8071
# entity colors
8172
entity_color = []
82-
for entity in world.landmarks: #world.entities:
73+
for entity in world.landmarks:
8374
entity_color.append(entity.color)
8475
# communication of all other agents
8576
comm = []

multiagent/scenarios/simple_speaker_listener.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ def make_world(self):
88
# set any world properties first
99
world.dim_c = 3
1010
num_landmarks = 3
11+
world.collaborative = True
1112
# add agents
1213
world.agents = [Agent() for i in range(2)]
1314
for i, agent in enumerate(world.agents):
@@ -57,7 +58,7 @@ def reset_world(self, world):
5758

5859
def benchmark_data(self, agent, world):
5960
# returns data for benchmarking purposes
60-
return reward(agent, reward)
61+
return self.reward(agent, reward)
6162

6263
def reward(self, agent, world):
6364
# squared distance from listener to landmark

multiagent/scenarios/simple_spread.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ def make_world(self):
1010
world.dim_c = 2
1111
num_agents = 3
1212
num_landmarks = 3
13+
world.collaborative = True
1314
# add agents
1415
world.agents = [Agent() for i in range(num_agents)]
1516
for i, agent in enumerate(world.agents):

0 commit comments

Comments
 (0)