Unity-Technologies · vincentpierre · Apr 21, 2020 · Apr 20, 2020 · Apr 20, 2020 · Apr 20, 2020
diff --git a/com.unity.ml-agents/CHANGELOG.md b/com.unity.ml-agents/CHANGELOG.md
@@ -40,6 +40,10 @@ and this project adheres to
  asymmetric example environment Strikers Vs. Goalie has been added.
 - CameraSensorComponent.m_Grayscale and RenderTextureSensorComponent.m_Grayscale
  were changed from `public` to `private` (#3808).
+- The `UnityEnv` class from the `gym-unity` package was renamed
+ `UnityToGymWrapper` and no longer creates the `UnityEnvironment`.
+ Instead, the `UnityEnvironment` must be passed as input to the
+ constructor of `UnityToGymWrapper`
 
 ### Minor Changes
 

diff --git a/docs/Migrating.md b/docs/Migrating.md
@@ -34,7 +34,10 @@ double-check that the versions are in the same. The versions can be found in
  parameter, instead of returning the array. This was done to prevent a common
  source of error where users would return arrays of the wrong size.
 - `num_updates` and `train_interval` for SAC have been replaced with `steps_per_update`.
-
+- The `UnityEnv` class from the `gym-unity` package was renamed
+ `UnityToGymWrapper` and no longer creates the `UnityEnvironment`. Instead,
+ the `UnityEnvironment` must be passed as input to the
+ constructor of `UnityToGymWrapper`
 
 ### Steps to Migrate
 
@@ -58,6 +61,9 @@ double-check that the versions are in the same. The versions can be found in
  `actionsOut` instead of returning an array.
 - Set `steps_per_update` to be around equal to the number of agents in your environment,
  times `num_updates` and divided by `train_interval`.
+- Replace `UnityEnv` with `UnityToGymWrapper` in your code. The constructor
+ no longer takes a file name as input but a fully constructed
+ `UnityEnvironment` instead.
 
 ## Migrating from 0.14 to 0.15
 

diff --git a/gym-unity/README.md b/gym-unity/README.md
@@ -29,15 +29,12 @@ The gym interface is available from `gym_unity.envs`. To launch an environment
 from the root of the project repository use:
 
 ```python
-from gym_unity.envs import UnityEnv
+from gym_unity.envs import UnityToGymWrapper
 
-env = UnityEnv(environment_filename, worker_id, use_visual, uint8_visual)
+env = UnityToGymWrapper(unity_environment, worker_id, use_visual, uint8_visual)
 ```
 
-* `environment_filename` refers to the path to the Unity environment.
-
-* `worker_id` refers to the port to use for communication with the environment.
- Defaults to `0`.
+* `unity_environment` refers to the Unity environment to be wrapped.
 
 * `use_visual` refers to whether to use visual observations (True) or vector
  observations (False) as the default observation provided by the `reset` and
@@ -103,10 +100,12 @@ import gym
 from baselines import deepq
 from baselines import logger
 
-from gym_unity.envs import UnityEnv
+from mlagents_envs import UnityEnvironment
+from gym_unity.envs import UnityToGymWrapper
 
 def main():
- env = UnityEnv("./envs/GridWorld", 0, use_visual=True, uint8_visual=True)
+ unity_env = UnityEnvironment("./envs/GridWorld")
+ env = UnityToGymWrapper(unity_env, 0, use_visual=True, uint8_visual=True)
  logger.configure('./logs') # Çhange to log in a different directory
  act = deepq.learn(
  env,
@@ -144,9 +143,9 @@ python -m train_unity
 
 Other algorithms in the Baselines repository can be run using scripts similar to
 the examples from the baselines package. In most cases, the primary changes needed
-to use a Unity environment are to import `UnityEnv`, and to replace the environment
-creation code, typically `gym.make()`, with a call to `UnityEnv(env_path)`
-passing the environment binary path.
+to use a Unity environment are to import `UnityToGymWrapper`, and to replace the
+environment creation code, typically `gym.make()`, with a call to
+`UnityToGymWrapper(unity_environment)` passing the environment as input.
 
 A typical rule of thumb is that for vision-based environments, modification
 should be done to Atari training scripts, and for vector observation
@@ -157,7 +156,8 @@ functions. You can define a similar function for Unity environments. An example
 such a method using the PPO2 baseline:
 
 ```python
-from gym_unity.envs import UnityEnv
+from mlagents_envs.environment import UnityEnvironment
+from gym_unity.envs import UnityToGymWrapper
 from baselines.common.vec_env.subproc_vec_env import SubprocVecEnv
 from baselines.common.vec_env.dummy_vec_env import DummyVecEnv
 from baselines.bench import Monitor
@@ -177,7 +177,8 @@ def make_unity_env(env_directory, num_env, visual, start_index=0):
  """
  def make_env(rank, use_visual=True): # pylint: disable=C0111
  def _thunk():
- env = UnityEnv(env_directory, rank, use_visual=use_visual, uint8_visual=True)
+ unity_env = UnityEnvironment(env_directory)
+ env = UnityToGymWrapper(unity_env, rank, use_visual=use_visual, uint8_visual=True)
  env = Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank)))
  return env
  return _thunk
@@ -228,7 +229,8 @@ Within `run_experiment.py`, we will need to make changes to which environment is
 instantiated, just as in the Baselines example. At the top of the file, insert
 
 ```python
-from gym_unity.envs import UnityEnv
+from mlagents_envs.environment import UnityEnvironment
+from gym_unity.envs import UnityToGymWrapper
 ```
 
 to import the Gym Wrapper. Navigate to the `create_atari_environment` method
@@ -238,7 +240,8 @@ the method with the following code.
 ```python
  game_version = 'v0' if sticky_actions else 'v4'
  full_game_name = '{}NoFrameskip-{}'.format(game_name, game_version)
- env = UnityEnv('./envs/GridWorld', 0, use_visual=True, uint8_visual=True)
+ unity_env = UnityEnvironment('./envs/GridWorld')
+ env = UnityToGymWrapper(unity_env, use_visual=True, uint8_visual=True)
  return env
 ```
 
@@ -256,7 +259,7 @@ Since Dopamine is designed around variants of DQN, it is only compatible
 with discrete action spaces, and specifically the Discrete Gym space. For environments
 that use branched discrete action spaces (e.g.
 [VisualBanana](../docs/Learning-Environment-Examples.md)), you can enable the
-`flatten_branched` parameter in `UnityEnv`, which treats each combination of branched
+`flatten_branched` parameter in `UnityToGymWrapper`, which treats each combination of branched
 actions as separate actions.
 
 Furthermore, when building your environments, ensure that your Agent is using visual

diff --git a/gym-unity/gym_unity/envs/__init__.py b/gym-unity/gym_unity/envs/__init__.py
@@ -5,7 +5,7 @@
 import gym
 from gym import error, spaces
 
-from mlagents_envs.environment import UnityEnvironment
+from mlagents_envs.base_env import BaseEnv
 from mlagents_envs.base_env import DecisionSteps, TerminalSteps
 from mlagents_envs import logging_util
 
@@ -24,42 +24,29 @@ class UnityGymException(error.Error):
 GymStepResult = Tuple[np.ndarray, float, bool, Dict]
 
 
-class UnityEnv(gym.Env):
+class UnityToGymWrapper(gym.Env):
  """
  Provides Gym wrapper for Unity Learning Environments.
  """
 
  def __init__(
  self,
- environment_filename: str,
- worker_id: int = 0,
+ unity_env: BaseEnv,
  use_visual: bool = False,
  uint8_visual: bool = False,
  flatten_branched: bool = False,
- no_graphics: bool = False,
  allow_multiple_visual_obs: bool = False,
  ):
  """
  Environment initialization
- :param environment_filename: The UnityEnvironment path or file to be wrapped in the gym.
- :param worker_id: Worker number for environment.
+ :param unity_env: The Unity BaseEnv to be wrapped in the gym.
  :param use_visual: Whether to use visual observation or vector observation.
  :param uint8_visual: Return visual observations as uint8 (0-255) matrices instead of float (0.0-1.0).
  :param flatten_branched: If True, turn branched discrete action spaces into a Discrete space rather than
  MultiDiscrete.
- :param no_graphics: Whether to run the Unity simulator in no-graphics mode
  :param allow_multiple_visual_obs: If True, return a list of visual observations instead of only one.
  """
- base_port = UnityEnvironment.BASE_ENVIRONMENT_PORT
- if environment_filename is None:
- base_port = UnityEnvironment.DEFAULT_EDITOR_PORT
-
- self._env = UnityEnvironment(
- environment_filename,
- worker_id,
- base_port=base_port,
- no_graphics=no_graphics,
- )
+ self._env = unity_env
 
  # Take a single step so that the brain information will be sent over
  if not self._env.get_behavior_names():

diff --git a/gym-unity/gym_unity/tests/test_gym.py b/gym-unity/gym_unity/tests/test_gym.py
@@ -3,7 +3,7 @@
 import numpy as np
 
 from gym import spaces
-from gym_unity.envs import UnityEnv
+from gym_unity.envs import UnityToGymWrapper
 from mlagents_envs.base_env import (
  BehaviorSpec,
  ActionType,
@@ -12,16 +12,18 @@
 )
 
 
-@mock.patch("gym_unity.envs.UnityEnvironment")
-def test_gym_wrapper(mock_env):
+def test_gym_wrapper():
+ mock_env = mock.MagicMock()
  mock_spec = create_mock_group_spec()
  mock_decision_step, mock_terminal_step = create_mock_vector_steps(mock_spec)
  setup_mock_unityenvironment(
  mock_env, mock_spec, mock_decision_step, mock_terminal_step
  )
 
- env = UnityEnv(" ", use_visual=False)
- assert isinstance(env, UnityEnv)
+ print(mock_env.get_behavior_names())
+
+ env = UnityToGymWrapper(mock_env, use_visual=False)
+ assert isinstance(env, UnityToGymWrapper)
  assert isinstance(env.reset(), np.ndarray)
  actions = env.action_space.sample()
  assert actions.shape[0] == 2
@@ -33,8 +35,8 @@ def test_gym_wrapper(mock_env):
  assert isinstance(info, dict)
 
 
-@mock.patch("gym_unity.envs.UnityEnvironment")
-def test_branched_flatten(mock_env):
+def test_branched_flatten():
+ mock_env = mock.MagicMock()
  mock_spec = create_mock_group_spec(
  vector_action_space_type="discrete", vector_action_space_size=[2, 2, 3]
  )
@@ -45,20 +47,20 @@ def test_branched_flatten(mock_env):
  mock_env, mock_spec, mock_decision_step, mock_terminal_step
  )
 
- env = UnityEnv(" ", use_visual=False, flatten_branched=True)
+ env = UnityToGymWrapper(mock_env, use_visual=False, flatten_branched=True)
  assert isinstance(env.action_space, spaces.Discrete)
  assert env.action_space.n == 12
  assert env._flattener.lookup_action(0) == [0, 0, 0]
  assert env._flattener.lookup_action(11) == [1, 1, 2]
 
  # Check that False produces a MultiDiscrete
- env = UnityEnv(" ", use_visual=False, flatten_branched=False)
+ env = UnityToGymWrapper(mock_env, use_visual=False, flatten_branched=False)
  assert isinstance(env.action_space, spaces.MultiDiscrete)
 
 
 @pytest.mark.parametrize("use_uint8", [True, False], ids=["float", "uint8"])
-@mock.patch("gym_unity.envs.UnityEnvironment")
-def test_gym_wrapper_visual(mock_env, use_uint8):
+def test_gym_wrapper_visual(use_uint8):
+ mock_env = mock.MagicMock()
  mock_spec = create_mock_group_spec(number_visual_observations=1)
  mock_decision_step, mock_terminal_step = create_mock_vector_steps(
  mock_spec, number_visual_observations=1
@@ -67,8 +69,8 @@ def test_gym_wrapper_visual(mock_env, use_uint8):
  mock_env, mock_spec, mock_decision_step, mock_terminal_step
  )
 
- env = UnityEnv(" ", use_visual=True, uint8_visual=use_uint8)
- assert isinstance(env, UnityEnv)
+ env = UnityToGymWrapper(mock_env, use_visual=True, uint8_visual=use_uint8)
+ assert isinstance(env, UnityToGymWrapper)
  assert isinstance(env.reset(), np.ndarray)
  actions = env.action_space.sample()
  assert actions.shape[0] == 2
@@ -137,6 +139,6 @@ def setup_mock_unityenvironment(mock_env, mock_spec, mock_decision, mock_termina
  :Mock mock_decision: A DecisionSteps object that will be returned at each step and reset.
  :Mock mock_termination: A TerminationSteps object that will be returned at each step and reset.
  """
- mock_env.return_value.get_behavior_names.return_value = ["MockBrain"]
- mock_env.return_value.get_behavior_spec.return_value = mock_spec
- mock_env.return_value.get_steps.return_value = (mock_decision, mock_termination)
+ mock_env.get_behavior_names.return_value = ["MockBrain"]
+ mock_env.get_behavior_spec.return_value = mock_spec
+ mock_env.get_steps.return_value = (mock_decision, mock_termination)
diff --git a/ml-agents/tests/yamato/scripts/run_gym.py b/ml-agents/tests/yamato/scripts/run_gym.py
@@ -1,14 +1,16 @@
 import argparse
 
-from gym_unity.envs import UnityEnv
+from mlagents_envs.environment import UnityEnvironment
+from gym_unity.envs import UnityToGymWrapper
 
 
 def test_run_environment(env_name):
  """
  Run the gym test using the specified environment
  :param env_name: Name of the Unity environment binary to launch
  """
- env = UnityEnv(env_name, worker_id=1, use_visual=False, no_graphics=True)
+ u_env = UnityEnvironment(env_name, worker_id=1, no_graphics=True)
+ env = UnityToGymWrapper(u_env, use_visual=False)
 
  try:
  # Examine environment parameters
@@ -41,10 +43,16 @@ def test_closing(env_name):
  """
 
  try:
- env1 = UnityEnv(env_name, worker_id=1, use_visual=False, no_graphics=True)
+ env1 = UnityToGymWrapper(
+ UnityEnvironment(env_name, worker_id=1, no_graphics=True), use_visual=False
+ )
  env1.close()
- env1 = UnityEnv(env_name, worker_id=1, use_visual=False, no_graphics=True)
- env2 = UnityEnv(env_name, worker_id=2, use_visual=False, no_graphics=True)
+ env1 = UnityToGymWrapper(
+ UnityEnvironment(env_name, worker_id=1, no_graphics=True), use_visual=False
+ )
+ env2 = UnityToGymWrapper(
+ UnityEnvironment(env_name, worker_id=2, no_graphics=True), use_visual=False
+ )
  env2.reset()
  finally:
  env1.close()