Skip to content
4 changes: 4 additions & 0 deletions com.unity.ml-agents/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@ and this project adheres to
asymmetric example environment Strikers Vs. Goalie has been added.
- CameraSensorComponent.m_Grayscale and RenderTextureSensorComponent.m_Grayscale
were changed from `public` to `private` (#3808).
- The `UnityEnv` class from the `gym-unity` package was renamed
`UnityToGymWrapper` and no longer creates the `UnityEnvironment`.
Instead, the `UnityEnvironment` must be passed as input to the
constructor of `UnityToGymWrapper`

### Minor Changes

Expand Down
8 changes: 7 additions & 1 deletion docs/Migrating.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,10 @@ double-check that the versions are in the same. The versions can be found in
parameter, instead of returning the array. This was done to prevent a common
source of error where users would return arrays of the wrong size.
- `num_updates` and `train_interval` for SAC have been replaced with `steps_per_update`.

- The `UnityEnv` class from the `gym-unity` package was renamed
`UnityToGymWrapper` and no longer creates the `UnityEnvironment`. Instead,
the `UnityEnvironment` must be passed as input to the
constructor of `UnityToGymWrapper`

### Steps to Migrate

Expand All @@ -58,6 +61,9 @@ double-check that the versions are in the same. The versions can be found in
`actionsOut` instead of returning an array.
- Set `steps_per_update` to be around equal to the number of agents in your environment,
times `num_updates` and divided by `train_interval`.
- Replace `UnityEnv` with `UnityToGymWrapper` in your code. The constructor
no longer takes a file name as input but a fully constructed
`UnityEnvironment` instead.

## Migrating from 0.14 to 0.15

Expand Down
35 changes: 19 additions & 16 deletions gym-unity/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,12 @@ The gym interface is available from `gym_unity.envs`. To launch an environment
from the root of the project repository use:

```python
from gym_unity.envs import UnityEnv
from gym_unity.envs import UnityToGymWrapper

env = UnityEnv(environment_filename, worker_id, use_visual, uint8_visual)
env = UnityToGymWrapper(unity_environment, worker_id, use_visual, uint8_visual)
```

* `environment_filename` refers to the path to the Unity environment.

* `worker_id` refers to the port to use for communication with the environment.
Defaults to `0`.
* `unity_environment` refers to the Unity environment to be wrapped.

* `use_visual` refers to whether to use visual observations (True) or vector
observations (False) as the default observation provided by the `reset` and
Expand Down Expand Up @@ -103,10 +100,12 @@ import gym
from baselines import deepq
from baselines import logger

from gym_unity.envs import UnityEnv
from mlagents_envs import UnityEnvironment
from gym_unity.envs import UnityToGymWrapper

def main():
env = UnityEnv("./envs/GridWorld", 0, use_visual=True, uint8_visual=True)
unity_env = UnityEnvironment("./envs/GridWorld")
env = UnityToGymWrapper(unity_env, 0, use_visual=True, uint8_visual=True)
logger.configure('./logs') # Çhange to log in a different directory
act = deepq.learn(
env,
Expand Down Expand Up @@ -144,9 +143,9 @@ python -m train_unity

Other algorithms in the Baselines repository can be run using scripts similar to
the examples from the baselines package. In most cases, the primary changes needed
to use a Unity environment are to import `UnityEnv`, and to replace the environment
creation code, typically `gym.make()`, with a call to `UnityEnv(env_path)`
passing the environment binary path.
to use a Unity environment are to import `UnityToGymWrapper`, and to replace the
environment creation code, typically `gym.make()`, with a call to
`UnityToGymWrapper(unity_environment)` passing the environment as input.

A typical rule of thumb is that for vision-based environments, modification
should be done to Atari training scripts, and for vector observation
Expand All @@ -157,7 +156,8 @@ functions. You can define a similar function for Unity environments. An example
such a method using the PPO2 baseline:

```python
from gym_unity.envs import UnityEnv
from mlagents_envs.environment import UnityEnvironment
from gym_unity.envs import UnityToGymWrapper
from baselines.common.vec_env.subproc_vec_env import SubprocVecEnv
from baselines.common.vec_env.dummy_vec_env import DummyVecEnv
from baselines.bench import Monitor
Expand All @@ -177,7 +177,8 @@ def make_unity_env(env_directory, num_env, visual, start_index=0):
"""
def make_env(rank, use_visual=True): # pylint: disable=C0111
def _thunk():
env = UnityEnv(env_directory, rank, use_visual=use_visual, uint8_visual=True)
unity_env = UnityEnvironment(env_directory)
env = UnityToGymWrapper(unity_env, rank, use_visual=use_visual, uint8_visual=True)
env = Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank)))
return env
return _thunk
Expand Down Expand Up @@ -228,7 +229,8 @@ Within `run_experiment.py`, we will need to make changes to which environment is
instantiated, just as in the Baselines example. At the top of the file, insert

```python
from gym_unity.envs import UnityEnv
from mlagents_envs.environment import UnityEnvironment
from gym_unity.envs import UnityToGymWrapper
```

to import the Gym Wrapper. Navigate to the `create_atari_environment` method
Expand All @@ -238,7 +240,8 @@ the method with the following code.
```python
game_version = 'v0' if sticky_actions else 'v4'
full_game_name = '{}NoFrameskip-{}'.format(game_name, game_version)
env = UnityEnv('./envs/GridWorld', 0, use_visual=True, uint8_visual=True)
unity_env = UnityEnvironment('./envs/GridWorld')
env = UnityToGymWrapper(unity_env, use_visual=True, uint8_visual=True)
return env
```

Expand All @@ -256,7 +259,7 @@ Since Dopamine is designed around variants of DQN, it is only compatible
with discrete action spaces, and specifically the Discrete Gym space. For environments
that use branched discrete action spaces (e.g.
[VisualBanana](../docs/Learning-Environment-Examples.md)), you can enable the
`flatten_branched` parameter in `UnityEnv`, which treats each combination of branched
`flatten_branched` parameter in `UnityToGymWrapper`, which treats each combination of branched
actions as separate actions.

Furthermore, when building your environments, ensure that your Agent is using visual
Expand Down
23 changes: 5 additions & 18 deletions gym-unity/gym_unity/envs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import gym
from gym import error, spaces

from mlagents_envs.environment import UnityEnvironment
from mlagents_envs.base_env import BaseEnv
from mlagents_envs.base_env import DecisionSteps, TerminalSteps
from mlagents_envs import logging_util

Expand All @@ -24,42 +24,29 @@ class UnityGymException(error.Error):
GymStepResult = Tuple[np.ndarray, float, bool, Dict]


class UnityEnv(gym.Env):
class UnityToGymWrapper(gym.Env):
"""
Provides Gym wrapper for Unity Learning Environments.
"""

def __init__(
self,
environment_filename: str,
worker_id: int = 0,
unity_env: BaseEnv,
use_visual: bool = False,
uint8_visual: bool = False,
flatten_branched: bool = False,
no_graphics: bool = False,
allow_multiple_visual_obs: bool = False,
):
"""
Environment initialization
:param environment_filename: The UnityEnvironment path or file to be wrapped in the gym.
:param worker_id: Worker number for environment.
:param unity_env: The Unity BaseEnv to be wrapped in the gym.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add a note that the environment will be closed when the UnityToGymWrapper is closed.

:param use_visual: Whether to use visual observation or vector observation.
:param uint8_visual: Return visual observations as uint8 (0-255) matrices instead of float (0.0-1.0).
:param flatten_branched: If True, turn branched discrete action spaces into a Discrete space rather than
MultiDiscrete.
:param no_graphics: Whether to run the Unity simulator in no-graphics mode
:param allow_multiple_visual_obs: If True, return a list of visual observations instead of only one.
"""
base_port = UnityEnvironment.BASE_ENVIRONMENT_PORT
if environment_filename is None:
base_port = UnityEnvironment.DEFAULT_EDITOR_PORT

self._env = UnityEnvironment(
environment_filename,
worker_id,
base_port=base_port,
no_graphics=no_graphics,
)
self._env = unity_env

# Take a single step so that the brain information will be sent over
if not self._env.get_behavior_names():
Expand Down
34 changes: 18 additions & 16 deletions gym-unity/gym_unity/tests/test_gym.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import numpy as np

from gym import spaces
from gym_unity.envs import UnityEnv
from gym_unity.envs import UnityToGymWrapper
from mlagents_envs.base_env import (
BehaviorSpec,
ActionType,
Expand All @@ -12,16 +12,18 @@
)


@mock.patch("gym_unity.envs.UnityEnvironment")
def test_gym_wrapper(mock_env):
def test_gym_wrapper():
mock_env = mock.MagicMock()
mock_spec = create_mock_group_spec()
mock_decision_step, mock_terminal_step = create_mock_vector_steps(mock_spec)
setup_mock_unityenvironment(
mock_env, mock_spec, mock_decision_step, mock_terminal_step
)

env = UnityEnv(" ", use_visual=False)
assert isinstance(env, UnityEnv)
print(mock_env.get_behavior_names())

env = UnityToGymWrapper(mock_env, use_visual=False)
assert isinstance(env, UnityToGymWrapper)
assert isinstance(env.reset(), np.ndarray)
actions = env.action_space.sample()
assert actions.shape[0] == 2
Expand All @@ -33,8 +35,8 @@ def test_gym_wrapper(mock_env):
assert isinstance(info, dict)


@mock.patch("gym_unity.envs.UnityEnvironment")
def test_branched_flatten(mock_env):
def test_branched_flatten():
mock_env = mock.MagicMock()
mock_spec = create_mock_group_spec(
vector_action_space_type="discrete", vector_action_space_size=[2, 2, 3]
)
Expand All @@ -45,20 +47,20 @@ def test_branched_flatten(mock_env):
mock_env, mock_spec, mock_decision_step, mock_terminal_step
)

env = UnityEnv(" ", use_visual=False, flatten_branched=True)
env = UnityToGymWrapper(mock_env, use_visual=False, flatten_branched=True)
assert isinstance(env.action_space, spaces.Discrete)
assert env.action_space.n == 12
assert env._flattener.lookup_action(0) == [0, 0, 0]
assert env._flattener.lookup_action(11) == [1, 1, 2]

# Check that False produces a MultiDiscrete
env = UnityEnv(" ", use_visual=False, flatten_branched=False)
env = UnityToGymWrapper(mock_env, use_visual=False, flatten_branched=False)
assert isinstance(env.action_space, spaces.MultiDiscrete)


@pytest.mark.parametrize("use_uint8", [True, False], ids=["float", "uint8"])
@mock.patch("gym_unity.envs.UnityEnvironment")
def test_gym_wrapper_visual(mock_env, use_uint8):
def test_gym_wrapper_visual(use_uint8):
mock_env = mock.MagicMock()
mock_spec = create_mock_group_spec(number_visual_observations=1)
mock_decision_step, mock_terminal_step = create_mock_vector_steps(
mock_spec, number_visual_observations=1
Expand All @@ -67,8 +69,8 @@ def test_gym_wrapper_visual(mock_env, use_uint8):
mock_env, mock_spec, mock_decision_step, mock_terminal_step
)

env = UnityEnv(" ", use_visual=True, uint8_visual=use_uint8)
assert isinstance(env, UnityEnv)
env = UnityToGymWrapper(mock_env, use_visual=True, uint8_visual=use_uint8)
assert isinstance(env, UnityToGymWrapper)
assert isinstance(env.reset(), np.ndarray)
actions = env.action_space.sample()
assert actions.shape[0] == 2
Expand Down Expand Up @@ -137,6 +139,6 @@ def setup_mock_unityenvironment(mock_env, mock_spec, mock_decision, mock_termina
:Mock mock_decision: A DecisionSteps object that will be returned at each step and reset.
:Mock mock_termination: A TerminationSteps object that will be returned at each step and reset.
"""
mock_env.return_value.get_behavior_names.return_value = ["MockBrain"]
mock_env.return_value.get_behavior_spec.return_value = mock_spec
mock_env.return_value.get_steps.return_value = (mock_decision, mock_termination)
mock_env.get_behavior_names.return_value = ["MockBrain"]
mock_env.get_behavior_spec.return_value = mock_spec
mock_env.get_steps.return_value = (mock_decision, mock_termination)
18 changes: 13 additions & 5 deletions ml-agents/tests/yamato/scripts/run_gym.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
import argparse

from gym_unity.envs import UnityEnv
from mlagents_envs.environment import UnityEnvironment
from gym_unity.envs import UnityToGymWrapper


def test_run_environment(env_name):
"""
Run the gym test using the specified environment
:param env_name: Name of the Unity environment binary to launch
"""
env = UnityEnv(env_name, worker_id=1, use_visual=False, no_graphics=True)
u_env = UnityEnvironment(env_name, worker_id=1, no_graphics=True)
env = UnityToGymWrapper(u_env, use_visual=False)

try:
# Examine environment parameters
Expand Down Expand Up @@ -41,10 +43,16 @@ def test_closing(env_name):
"""

try:
env1 = UnityEnv(env_name, worker_id=1, use_visual=False, no_graphics=True)
env1 = UnityToGymWrapper(
UnityEnvironment(env_name, worker_id=1, no_graphics=True), use_visual=False
)
env1.close()
env1 = UnityEnv(env_name, worker_id=1, use_visual=False, no_graphics=True)
env2 = UnityEnv(env_name, worker_id=2, use_visual=False, no_graphics=True)
env1 = UnityToGymWrapper(
UnityEnvironment(env_name, worker_id=1, no_graphics=True), use_visual=False
)
env2 = UnityToGymWrapper(
UnityEnvironment(env_name, worker_id=2, no_graphics=True), use_visual=False
)
env2.reset()
finally:
env1.close()
Expand Down