Skip to content

Commit ba72459

Browse files
committed
fixed some interactive.py bugs
1 parent dc2d761 commit ba72459

File tree

5 files changed

+24
-25
lines changed

5 files changed

+24
-25
lines changed

bin/interactive.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
scenario = scenarios.load(args.scenario).Scenario()
1818
# create world
1919
world = scenario.make_world()
20-
# create multiagent environment
20+
# create multiagent environment
2121
env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation, shared_viewer = False)
2222
# render call to create viewer window (necessary only for interactive policies)
2323
env.render()
@@ -33,7 +33,7 @@
3333
# step environment
3434
obs_n, reward_n, done_n, _ = env.step(act_n)
3535
# render all agent views
36-
env.render()
37-
# display rewards
38-
for agent in env.world.agents:
39-
print(agent.name + " reward: %0.3f" % env._get_reward(agent))
36+
env.render()
37+
# display rewards
38+
#for agent in env.world.agents:
39+
# print(agent.name + " reward: %0.3f" % env._get_reward(agent))

multiagent/environment.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,6 @@ def __init__(self, world, reset_callback=None, reward_callback=None,
7070
self.observation_space.append(spaces.Box(low=-np.inf, high=+np.inf, shape=(obs_dim),))
7171
agent.action.c = np.zeros(self.world.dim_c)
7272

73-
7473
# rendering
7574
self.shared_viewer = shared_viewer
7675
if self.shared_viewer:
@@ -90,7 +89,7 @@ def _step(self, action_n):
9089
self._set_action(action_n[i], agent, self.action_space[i])
9190
# advance world state
9291
self.world.step()
93-
# record observation for each agent # TODO: clean up
92+
# record observation for each agent
9493
for agent in self.agents:
9594
obs_n.append(self._get_obs(agent))
9695
reward_n.append(self._get_reward(agent))
@@ -147,12 +146,13 @@ def _set_action(self, action, agent, action_space, time=None):
147146
act.append(action[index:(index+s)])
148147
index += s
149148
action = act
150-
else:
151-
action = [action]
149+
#else:
150+
# action = [action] # TODO: why is this necessary??
152151

153152
if agent.movable:
154153
# physical action
155154
if self.discrete_action_input:
155+
print(action)
156156
agent.action.u = np.zeros(self.world.dim_p)
157157
# process discrete action
158158
if action[0] == 1: agent.action.u[0] = -1.0

multiagent/policy.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13,33 +13,33 @@ class InteractivePolicy(Policy):
1313
def __init__(self, env, agent_index):
1414
super(InteractivePolicy, self).__init__()
1515
self.env = env
16-
# hard-coded keyborad events
16+
# hard-coded keyboard events
1717
self.move = [False for i in range(4)]
1818
self.comm = [False for i in range(env.world.dim_c)]
19-
# register keyboard events with this envornment's window
19+
# register keyboard events with this environment's window
2020
env.viewers[agent_index].window.on_key_press = self.key_press
2121
env.viewers[agent_index].window.on_key_release = self.key_release
2222

2323
def action(self, obs):
2424
# ignore observation and just act based on keyboard events
25-
if self.env.discrete_action_space:
25+
if self.env.discrete_action_input:
2626
u = 0
2727
if self.move[0]: u = 1
2828
if self.move[1]: u = 2
2929
if self.move[2]: u = 4
3030
if self.move[3]: u = 3
3131
else:
32-
u = np.array([0.0,0.0])
33-
if self.move[0]: u[0] -= 1.0
34-
if self.move[1]: u[0] += 1.0
35-
if self.move[2]: u[1] += 1.0
36-
if self.move[3]: u[1] -= 1.0
32+
u = np.array([0.0,0.0,0.0,0.0])
33+
if self.move[0]: u[0] += 1.0
34+
if self.move[1]: u[1] += 1.0
35+
if self.move[2]: u[2] += 1.0
36+
if self.move[3]: u[3] += 1.0
3737
c = 0
3838
for i in range(len(self.comm)):
3939
if self.comm[i]: c = i+1
4040
return [u, c]
4141

42-
# keyborad event callbacks
42+
# keyborad event callbacks
4343
def key_press(self, k, mod):
4444
if k==key.LEFT: self.move[0] = True
4545
if k==key.RIGHT: self.move[1] = True
@@ -53,4 +53,4 @@ def key_release(self, k, mod):
5353
if k==key.UP: self.move[2] = False
5454
if k==key.DOWN: self.move[3] = False
5555
for i in range(len(self.comm)):
56-
if k==key._1+i: self.comm[i] = False
56+
if k==key._1+i: self.comm[i] = False

multiagent/scenarios/simple.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ def make_world(self):
1010
for i, agent in enumerate(world.agents):
1111
agent.name = 'agent %d' % i
1212
agent.collide = False
13-
agent.silent = True
13+
agent.silent = True
1414
# add landmarks
1515
world.landmarks = [Landmark() for i in range(1)]
1616
for i, landmark in enumerate(world.landmarks):
@@ -24,11 +24,11 @@ def make_world(self):
2424
def reset_world(self, world):
2525
# random properties for agents
2626
for i, agent in enumerate(world.agents):
27-
agent.color = np.array([0.25,0.25,0.25])
27+
agent.color = np.array([0.25,0.25,0.25])
2828
# random properties for landmarks
2929
for i, landmark in enumerate(world.landmarks):
3030
landmark.color = np.array([0.75,0.75,0.75])
31-
world.landmarks[0].color = np.array([0.75,0.25,0.25])
31+
world.landmarks[0].color = np.array([0.75,0.25,0.25])
3232
# set random initial states
3333
for agent in world.agents:
3434
agent.state.p_pos = np.random.uniform(-1,+1, world.dim_p)
@@ -42,10 +42,9 @@ def reward(self, agent, world):
4242
dist2 = np.sum(np.square(agent.state.p_pos - world.landmarks[0].state.p_pos))
4343
return -dist2 #np.exp(-dist2)
4444

45-
def observation(self, agent, world):
45+
def observation(self, agent, world):
4646
# get positions of all entities in this agent's reference frame
4747
entity_pos = []
4848
for entity in world.landmarks:
4949
entity_pos.append(entity.state.p_pos - agent.state.p_pos)
5050
return np.concatenate([agent.state.p_vel] + entity_pos)
51-

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
setup(name='multiagent',
44
version='0.0.1',
55
description='Multi-Agent Goal-Driven Communication Environment',
6-
url='https://github.com/openai/multiagent',
6+
url='https://github.com/openai/multiagent-public',
77
author='Igor Mordatch',
88
author_email='mordatch@openai.com',
99
packages=find_packages(),

0 commit comments

Comments
 (0)