longdays01
diff --git a/‎README.md‎
Lines changed: 1 addition & 4 deletions b/‎README.md‎
Lines changed: 1 addition & 4 deletions
diff --git a/‎multiagent/environment.py‎
Lines changed: 2 additions & 3 deletions b/‎multiagent/environment.py‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎multiagent/scenario.py‎
Lines changed: 0 additions & 57 deletions b/‎multiagent/scenario.py‎
Lines changed: 0 additions & 57 deletions
@@ -26,7 +26,7 @@ Used in the paper [Multi-Agent Actor-Critic for Mixed Cooperative-Competitive En
 
 - `./multiagent/policy.py`: contains code for interactive policy based on keyboard input.
 
-- `./multiagent/scenario.py`: contains base scenario object that is extended for all scenarios. Also contains base code for the ensemble scenarios.
+- `./multiagent/scenario.py`: contains base scenario object that is extended for all scenarios.
 
 - `./multiagent/scenarios/`: folder where various scenarios/ environments are stored. scenario code consists of several functions:
  1) `make_world()`: creates all of the entities that inhabit the world (landmarks, agents, etc.), assigns their capabilities (whether they can communicate, or move, or both).
@@ -46,9 +46,6 @@ You can create new scenarios by implementing the first 4 functions above (`make_
 
 | Env name in code (name in paper) | Communication? | Competitive? | Notes |
 | --- | --- | --- | --- |
-| `ensemble_adversary.py` (Physical deception) | N | Y | Same as simple_adversary below, where agents are trained with an ensemble of policies. |
-| `ensemble_push.py` (Keep-away) | N | Y | Same as simple_push below, where agents are trained with an ensemble of policies. |
-| `ensemble_tag.py` (Predator-prey) | N | Y | Same as simple_tag below, where agents are trained with an ensemble of policies. |
 | `simple.py` | N | N | Single agent sees landmark position, rewarded based on how close it gets to landmark. Not a multiagent environment -- used for debugging policies. |
 | `simple_adversary.py` (Physical deception) | N | Y | 1 adversary (red), N good agents (green), N landmarks (usually N=2). All agents observe position of landmarks and other agents. One landmark is the ‘target landmark’ (colored green). Good agents rewarded based on how close one of them is to the target landmark, but negatively rewarded if the adversary is close to target landmark. Adversary is rewarded based on how close it is to the target, but it doesn’t know which landmark is the target landmark. So good agents have to learn to ‘split up’ and cover all landmarks to deceive the adversary. |
 | `simple_crypto.py` (Covert communication) | Y | Y | Two good agents (alice and bob), one adversary (eve). Alice must sent a private message to bob over a public channel. Alice and bob are rewarded based on how well bob reconstructs the message, but negatively rewarded if eve can reconstruct the message. Alice and bob have a private key (randomly generated at beginning of each episode), which they must learn to use to encrypt the message. |
 
@@ -137,8 +137,7 @@ def _get_reward(self, agent):
  # set env action for a particular agent
  def _set_action(self, action, agent, action_space, time=None):
  agent.action.u = np.zeros(self.world.dim_p)
- #agent.action.c = np.zeros(self.world.dim_c)
- agent.action.c *= self.comm_decay
+ agent.action.c = np.zeros(self.world.dim_c)
  # process action
  if isinstance(action_space, spaces.MultiDiscrete):
  act = []
@@ -170,7 +169,7 @@ def _set_action(self, action, agent, action_space, time=None):
  agent.action.u[1] += action[0][3] - action[0][4]
  else:
  agent.action.u = action[0]
- sensitivity = 5.0 #5.0 #1.0 #0.25
+ sensitivity = 5.0
  if agent.accel is not None:
  sensitivity = agent.accel
  agent.action.u *= sensitivity
 
@@ -8,60 +8,3 @@ def make_world(self):
  # create initial conditions of the world
  def reset_world(self, world):
  raise NotImplementedError()
-
-class EnsembleBaseScenario(BaseScenario):
- def __init__(self):
- self.partition = 'rand'
- self.partition_flag = -1
- self.measure_success = False
-
- def select_agents(self, world):
- good_agents = [agent for agent in world.all_agents if not agent.adversary]
- adversary_agents = [agent for agent in world.all_agents if agent.adversary]
- n_good = world.good_part_n
- n_bad = world.adversary_part_n
- if self.partition == 'rand':
- np.random.shuffle(good_agents)
- np.random.shuffle(adversary_agents)
- world.agents = adversary_agents[:world.num_adversaries] + \
- good_agents[:(world.num_agents - world.num_adversaries)]
- elif self.partition == 'fix':
- k = np.random.choice(world.partition_n)
- bad_part = adversary_agents[k * n_bad: (k + 1) * n_bad]
- np.random.shuffle(bad_part)
- good_part = good_agents[k * n_good: (k + 1) * n_good]
- np.random.shuffle(good_part)
- world.agents = bad_part[:world.num_adversaries] + good_part[:(world.num_agents - world.num_adversaries)]
- else:
- fix_good = good_agents[:n_good]
- rand_good_all = good_agents[n_good:]
- np.random.shuffle(fix_good)
- fix_bad = adversary_agents[:n_bad]
- rand_bad_all = adversary_agents[n_bad:]
- np.random.shuffle(fix_bad)
- # pick a team from rand-good/bad
- t = np.random.choice(world.partition_n - 1) # excluding fix-team
- rand_good = rand_good_all[t * n_good: (t+1) * n_good]
- t = np.random.choice(world.partition_n - 1)
- rand_bad = rand_bad_all[t * n_bad: (t+1) * n_bad]
- np.random.shuffle(rand_good)
- np.random.shuffle(rand_bad)
- if self.partition == 'mix':
- k = np.random.choice(world.partition_n)
- if self.partition_flag > -1: # only use fixed partition
- k = self.partition_flag
- if k == 0:
- world.agents = fix_bad[:world.num_adversaries] + fix_good[:(world.num_agents - world.num_adversaries)]
- else:
- world.agents = rand_bad[:world.num_adversaries] + \
- rand_good[:(world.num_agents - world.num_adversaries)]
- else:
- if self.partition_flag > -1:
- k = self.partition_flag
- else:
- k = np.random.choice(2)
- if k == 0:
- world.agents = fix_bad[:world.num_adversaries] + rand_good[:(world.num_agents - world.num_adversaries)]
- else:
- world.agents = rand_bad[:world.num_adversaries] + fix_good[:(world.num_agents - world.num_adversaries)]
- assert (len(world.agents) == world.num_agents)