sanjulamadurapperuma
diff --git a/‎deep-learning/Deep-Reinforcement-Learning-Complete-Collection/PyTorch-cpp/gym_server/__init__.py‎ b/‎deep-learning/Deep-Reinforcement-Learning-Complete-Collection/PyTorch-cpp/gym_server/__init__.py‎
diff --git a/‎deep-learning/Deep-Reinforcement-Learning-Complete-Collection/PyTorch-cpp/gym_server/envs.py‎
Lines changed: 162 additions & 0 deletions b/‎deep-learning/Deep-Reinforcement-Learning-Complete-Collection/PyTorch-cpp/gym_server/envs.py‎
Lines changed: 162 additions & 0 deletions
diff --git a/‎deep-learning/Deep-Reinforcement-Learning-Complete-Collection/PyTorch-cpp/gym_server/messages.py‎
Lines changed: 91 additions & 0 deletions b/‎deep-learning/Deep-Reinforcement-Learning-Complete-Collection/PyTorch-cpp/gym_server/messages.py‎
Lines changed: 91 additions & 0 deletions
diff --git a/‎deep-learning/Deep-Reinforcement-Learning-Complete-Collection/PyTorch-cpp/gym_server/server.py‎
Lines changed: 124 additions & 0 deletions b/‎deep-learning/Deep-Reinforcement-Learning-Complete-Collection/PyTorch-cpp/gym_server/server.py‎
Lines changed: 124 additions & 0 deletions
@@ -0,0 +1,162 @@
+"""
+Adapted from:
+github.com/ikostrikov/pytorch-a2c-ppo-acktr/blob/master/a2c_ppo_acktr/envs.py
+
+Provides utility functions for making Gym environments.
+"""
+import gym
+from gym.spaces import Box
+import numpy as np
+
+from baselines.common.vec_env import VecEnvWrapper
+from baselines.common.atari_wrappers import make_atari, wrap_deepmind
+from baselines.common.vec_env.subproc_vec_env import SubprocVecEnv
+from baselines.common.vec_env.dummy_vec_env import DummyVecEnv
+from baselines.common.vec_env.vec_normalize import (VecNormalize
+ as VecNormalize_)
+
+
+class TransposeImage(gym.ObservationWrapper):
+ def __init__(self, env=None):
+ super(TransposeImage, self).__init__(env)
+ obs_shape = self.observation_space.shape
+ self.observation_space = Box(
+ self.observation_space.low[0, 0, 0],
+ self.observation_space.high[0, 0, 0],
+ [obs_shape[2], obs_shape[1], obs_shape[0]],
+ dtype=self.observation_space.dtype)
+
+ def observation(self, observation):
+ return observation.transpose(2, 0, 1)
+
+
+class VecFrameStack(VecEnvWrapper):
+ def __init__(self, venv, nstack):
+ self.venv = venv
+ self.nstack = nstack
+ wos = venv.observation_space # wrapped ob space
+ low = np.repeat(wos.low, self.nstack, axis=0)
+ high = np.repeat(wos.high, self.nstack, axis=0)
+ self.stackedobs = np.zeros((venv.num_envs,) + low.shape, low.dtype)
+ observation_space = gym.spaces.Box(
+ low=low, high=high, dtype=venv.observation_space.dtype)
+ VecEnvWrapper.__init__(self, venv, observation_space=observation_space)
+
+ def step_wait(self):
+ obs, rews, news, infos = self.venv.step_wait()
+ self.stackedobs = np.roll(self.stackedobs, shift=-1, axis=0)
+ for (i, new) in enumerate(news):
+ if new:
+ self.stackedobs[i] = 0
+ self.stackedobs[..., -obs.shape[-1]:] = obs
+ return self.stackedobs, rews, news, infos
+
+ def reset(self):
+ obs = self.venv.reset()
+ self.stackedobs[...] = 0
+ self.stackedobs[-obs.shape[-1]:, ...] = obs
+ return self.stackedobs
+
+
+class VecRewardInfo(VecEnvWrapper):
+ def __init__(self, venv):
+ self.venv = venv
+ VecEnvWrapper.__init__(self, venv)
+
+ def step_wait(self):
+ obs, rews, news, infos = self.venv.step_wait()
+ infos = {'reward': np.expand_dims(rews, -1)}
+ return obs, rews, news, infos
+
+ def reset(self):
+ obs = self.venv.reset()
+ return obs
+
+
+class VecNormalize(VecNormalize_):
+ def __init__(self, *args, **kwargs):
+ super(VecNormalize, self).__init__(*args, **kwargs)
+ self.training = True
+
+ def _obfilt(self, obs):
+ if self.ob_rms:
+ if self.training:
+ self.ob_rms.update(obs)
+ obs = np.clip((obs - self.ob_rms.mean)
+ / np.sqrt(self.ob_rms.var + self.epsilon),
+ -self.clipob, self.clipob)
+ return obs
+
+ def train(self):
+ self.training = True
+
+ def eval(self):
+ self.training = False
+
+ def step_wait(self):
+ obs, rews, news, infos = self.venv.step_wait()
+ infos = {'reward': np.expand_dims(rews, -1)}
+ self.ret = self.ret * self.gamma + rews
+ obs = self._obfilt(obs)
+ if self.ret_rms:
+ self.ret_rms.update(self.ret)
+ rews = np.clip(rews / np.sqrt(self.ret_rms.var + self.epsilon),
+ -self.cliprew,
+ self.cliprew)
+ self.ret[news] = 0.
+ return obs, rews, news, infos
+
+
+def make_env(env_id, seed, rank):
+ def _thunk():
+ env = gym.make(env_id)
+
+ is_atari = hasattr(gym.envs, 'atari') and isinstance(
+ env.unwrapped, gym.envs.atari.atari_env.AtariEnv)
+ if is_atari:
+ env = make_atari(env_id)
+
+ env.seed(seed + rank)
+
+ obs_shape = env.observation_space.shape
+
+ if is_atari:
+ if len(env.observation_space.shape) == 3:
+ env = wrap_deepmind(env)
+ elif len(env.observation_space.shape) == 3:
+ raise NotImplementedError("CNN models work only for atari,\n"
+ "please use a custom wrapper for a "
+ "custom pixel input env.\n See "
+ "wrap_deepmind for an example.")
+
+ # If the input has shape (W,H,3), wrap for PyTorch convolutions
+ obs_shape = env.observation_space.shape
+ if len(obs_shape) == 3 and obs_shape[2] in [1, 3]:
+ env = TransposeImage(env)
+
+ return env
+ return _thunk
+
+
+def make_vec_envs(env_name, seed, num_processes, gamma, num_frame_stack=None):
+ envs = [make_env(env_name, seed, i) for i in range(num_processes)]
+
+ if len(envs) > 1:
+ envs = SubprocVecEnv(envs)
+ else:
+ envs = DummyVecEnv(envs)
+
+ if len(envs.observation_space.shape) == 1:
+ if gamma is None or gamma == -1:
+ envs = VecNormalize(envs, ret=False)
+ else:
+ envs = VecNormalize(envs, gamma=gamma)
+ else:
+ envs = VecRewardInfo(envs)
+
+ if num_frame_stack is not None:
+ envs = VecFrameStack(envs, num_frame_stack)
+ elif len(envs.observation_space.shape) == 3:
+ envs = VecFrameStack(envs, 4)
+
+ return envs
@@ -0,0 +1,91 @@
+"""
+Classes for building requests to send to pytorch-cpp-rl.
+"""
+from abc import ABC, abstractmethod
+import numpy as np
+import msgpack
+
+
+class Message(ABC):
+ """
+ Base class for messages.
+ """
+ @abstractmethod
+ def to_msg(self) -> bytes:
+ """
+ Creates the JSON for the request.
+ """
+
+
+class InfoMessage(Message):
+ """
+ Builds the JSON for returning the result of an info() action.
+ """
+
+ def __init__(self, action_space_type, action_space_shape,
+ observation_space_type, observation_space_shape):
+ self.action_space_type = action_space_type
+ self.action_space_shape = action_space_shape
+ self.observation_space_type = observation_space_type
+ self.observation_space_shape = observation_space_shape
+
+ def to_msg(self) -> bytes:
+ request = {
+ "action_space_type": self.action_space_type,
+ "action_space_shape": self.action_space_shape,
+ "observation_space_type": self.observation_space_type,
+ "observation_space_shape": self.observation_space_shape
+ }
+ return msgpack.packb(request)
+
+
+class MakeMessage(Message):
+ """
+ Builds the JSON for returning the result of an make_env() action.
+ """
+
+ def to_msg(self) -> bytes:
+ request = {
+ "result": "OK"
+ }
+ return msgpack.packb(request)
+
+
+class ResetMessage(Message):
+ """
+ Builds the JSON for returning the result of an env.reset() action.
+ """
+
+ def __init__(self, observation: np.ndarray):
+ self.observation = observation
+
+ def to_msg(self) -> bytes:
+ request = {
+ "observation": self.observation.tolist()
+ }
+ return msgpack.packb(request)
+
+
+class StepMessage(Message):
+ """
+ Builds the JSON for returning the result of an env.step() action.
+ """
+
+ def __init__(self,
+ observation: np.ndarray,
+ reward: np.ndarray,
+ done: np.ndarray,
+ real_reward: np.ndarray):
+ self.observation = observation
+ self.reward = reward
+ self.done = done
+ self.real_reward = real_reward
+
+ def to_msg(self) -> bytes:
+ request = {
+ "observation": self.observation.tolist(),
+ "reward": self.reward.tolist(),
+ "done": self.done.tolist(),
+ "real_reward": self.real_reward.tolist()
+ }
+ return msgpack.packb(request)
@@ -0,0 +1,124 @@
+"""
+Contains a class that trains an agent.
+"""
+import logging
+from typing import Tuple
+import numpy as np
+import gym
+
+from gym_server.envs import make_vec_envs
+from gym_server.messages import (InfoMessage, MakeMessage, ResetMessage,
+ StepMessage)
+from gym_server.zmq_client import ZmqClient
+
+
+RUNNING_REWARD_HORIZON = 10
+
+
+class Server:
+ """
+ When `Server.serve()` is called, provides a ZMQ based API for training
+ RL agents on OpenAI gym environments.
+ """
+
+ def __init__(self, zmq_client: ZmqClient):
+ self.zmq_client: ZmqClient = zmq_client
+ self.env: gym.Env = None
+ logging.info("Gym server initialized")
+
+ def serve(self):
+ """
+ Run the server.
+ """
+ logging.info("Serving")
+ try:
+ self.__serve()
+ except KeyboardInterrupt:
+ pass
+
+ def _serve(self):
+ while True:
+ request = self.zmq_client.receive()
+ method = request['method']
+ param = request['param']
+
+ if method == 'info':
+ (action_space_type,
+ action_space_shape,
+ observation_space_type,
+ observation_space_shape) = self.__info()
+ self.zmq_client.send(InfoMessage(action_space_type,
+ action_space_shape,
+ observation_space_type,
+ observation_space_shape))
+
+ elif method == 'make':
+ self.__make(param['env_name'], param['num_envs'],
+ param['gamma'])
+ self.zmq_client.send(MakeMessage())
+
+ elif method == 'reset':
+ observation = self.__reset()
+ self.zmq_client.send(ResetMessage(observation))
+
+ elif method == 'step':
+ if 'render' in param:
+ result = self.__step(
+ np.array(param['actions']), param['render'])
+ else:
+ result = self.__step(np.array(param['actions']))
+ self.zmq_client.send(StepMessage(result[0],
+ result[1],
+ result[2],
+ result[3]['reward']))
+
+ def info(self):
+ """
+ Return info about the currently loaded environment
+ """
+ action_space_type = self.env.action_space.__class__.__name__
+ if action_space_type == 'Discrete':
+ action_space_shape = [self.env.action_space.n]
+ else:
+ action_space_shape = self.env.action_space.shape
+ observation_space_type = self.env.observation_space.__class__.__name__
+ observation_space_shape = self.env.observation_space.shape
+ return (action_space_type, action_space_shape, observation_space_type,
+ observation_space_shape)
+
+ def make(self, env_name, num_envs, gamma):
+ """
+ Makes a vectorized environment of the type and number specified.
+ """
+ logging.info("Making %d %ss", num_envs, env_name)
+ self.env = make_vec_envs(env_name, 0, num_envs, gamma)
+
+ def reset(self) -> np.ndarray:
+ """
+ Resets the environments.
+ """
+ logging.info("Resetting environments")
+ return self.env.reset()
+
+ def step(self,
+ actions: np.ndarray,
+ render: bool = False) -> Tuple[np.ndarray, np.ndarray,
+ np.ndarray, np.ndarray]:
+ """
+ Steps the environments.
+ """
+ if isinstance(self.env.action_space, gym.spaces.Discrete):
+ actions = actions.squeeze(-1)
+ observation, reward, done, info = self.env.step(actions)
+ if isinstance(self.env.action_space, gym.spaces.Discrete):
+ reward = np.expand_dims(reward, -1)
+ done = np.expand_dims(done, -1)
+ if render:
+ self.env.render()
+ return observation, reward, done, info
+
+ __info = info
+ __make = make
+ __reset = reset
+ __serve = _serve
+ __step = step