11import  argparse 
2- import  gym 
2+ import  gymnasium   as   gym 
33import  os 
44import  threading 
55import  time 
@@ -68,7 +68,7 @@ class Observer:
6868 def  __init__ (self , batch = True ):
6969 self .id  =  rpc .get_worker_info ().id  -  1 
7070 self .env  =  gym .make ('CartPole-v1' )
71-  self .env .seed ( args .seed )
71+  self .env .reset ( seed = args .seed )
7272 self .select_action  =  Agent .select_action_batch  if  batch  else  Agent .select_action 
7373
7474 def  run_episode (self , agent_rref , n_steps ):
@@ -92,10 +92,10 @@ def run_episode(self, agent_rref, n_steps):
9292 )
9393
9494 # apply the action to the environment, and get the reward 
95-  state , reward , done , _  =  self .env .step (action )
95+  state , reward , terminated ,  truncated , _  =  self .env .step (action )
9696 rewards [step ] =  reward 
9797
98-  if  done  or  step  +  1  >=  n_steps :
98+  if  terminated   or   truncated  or  step  +  1  >=  n_steps :
9999 curr_rewards  =  rewards [start_step :(step  +  1 )]
100100 R  =  0 
101101 for  i  in  range (curr_rewards .numel () - 1 , - 1 , - 1 ):
@@ -226,8 +226,7 @@ def run_worker(rank, world_size, n_episode, batch, print_log=True):
226226 last_reward , running_reward  =  agent .run_episode (n_steps = NUM_STEPS )
227227
228228 if  print_log :
229-  print ('Episode {}\t Last reward: {:.2f}\t Average reward: {:.2f}' .format (
230-  i_episode , last_reward , running_reward ))
229+  print (f'Episode { i_episode } \t Last reward: { last_reward :.2f} \t Average reward: { running_reward :.2f}  )
231230 else :
232231 # other ranks are the observer 
233232 rpc .init_rpc (OBSERVER_NAME .format (rank ), rank = rank , world_size = world_size )
0 commit comments