@@ -43,14 +43,14 @@ def __init__(self, world, reset_callback=None, reward_callback=None,
4343 if self .discrete_action_space :
4444 u_action_space = spaces .Discrete (world .dim_p * 2 + 1 )
4545 else :
46- u_action_space = spaces .Box (low = - agent .u_range , high = + agent .u_range , shape = (world .dim_p ,))
46+ u_action_space = spaces .Box (low = - agent .u_range , high = + agent .u_range , shape = (world .dim_p ,), dtype = np . float32 )
4747 if agent .movable :
4848 total_action_space .append (u_action_space )
4949 # communication action space
5050 if self .discrete_action_space :
5151 c_action_space = spaces .Discrete (world .dim_c )
5252 else :
53- c_action_space = spaces .Box (low = 0.0 , high = 1.0 , shape = (world .dim_c ,))
53+ c_action_space = spaces .Box (low = 0.0 , high = 1.0 , shape = (world .dim_c ,), dtype = np . float32 )
5454 if not agent .silent :
5555 total_action_space .append (c_action_space )
5656 # total action space
@@ -65,7 +65,7 @@ def __init__(self, world, reset_callback=None, reward_callback=None,
6565 self .action_space .append (total_action_space [0 ])
6666 # observation space
6767 obs_dim = len (observation_callback (agent , self .world ))
68- self .observation_space .append (spaces .Box (low = - np .inf , high = + np .inf , shape = (obs_dim ,)))
68+ self .observation_space .append (spaces .Box (low = - np .inf , high = + np .inf , shape = (obs_dim ,), dtype = np . float32 ))
6969 agent .action .c = np .zeros (self .world .dim_c )
7070
7171 # rendering
@@ -76,7 +76,7 @@ def __init__(self, world, reset_callback=None, reward_callback=None,
7676 self .viewers = [None ] * self .n
7777 self ._reset_render ()
7878
79- def _step (self , action_n ):
79+ def step (self , action_n ):
8080 obs_n = []
8181 reward_n = []
8282 done_n = []
@@ -102,7 +102,7 @@ def _step(self, action_n):
102102
103103 return obs_n , reward_n , done_n , info_n
104104
105- def _reset (self ):
105+ def reset (self ):
106106 # reset world
107107 self .reset_callback (self .world )
108108 # reset renderer
@@ -196,7 +196,7 @@ def _reset_render(self):
196196 self .render_geoms_xform = None
197197
198198 # render environment
199- def _render (self , mode = 'human' , close = True ):
199+ def render (self , mode = 'human' ):
200200 if mode == 'human' :
201201 alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
202202 message = ''
@@ -211,14 +211,6 @@ def _render(self, mode='human', close=True):
211211 message += (other .name + ' to ' + agent .name + ': ' + word + ' ' )
212212 print (message )
213213
214- if close :
215- # close any existic renderers
216- for i ,viewer in enumerate (self .viewers ):
217- if viewer is not None :
218- viewer .close ()
219- self .viewers [i ] = None
220- return []
221-
222214 for i in range (len (self .viewers )):
223215 # create viewers (if necessary)
224216 if self .viewers [i ] is None :
@@ -313,7 +305,7 @@ def action_space(self):
313305 def observation_space (self ):
314306 return self .env_batch [0 ].observation_space
315307
316- def _step (self , action_n , time ):
308+ def step (self , action_n , time ):
317309 obs_n = []
318310 reward_n = []
319311 done_n = []
@@ -328,14 +320,14 @@ def _step(self, action_n, time):
328320 done_n += done
329321 return obs_n , reward_n , done_n , info_n
330322
331- def _reset (self ):
323+ def reset (self ):
332324 obs_n = []
333325 for env in self .env_batch :
334326 obs_n += env .reset ()
335327 return obs_n
336328
337329 # render environment
338- def _render (self , mode = 'human' , close = True ):
330+ def render (self , mode = 'human' , close = True ):
339331 results_n = []
340332 for env in self .env_batch :
341333 results_n += env .render (mode , close )
0 commit comments