@@ -63,7 +63,10 @@ def __init__(self):
6363 self .distance_from_border_1 = 0
6464 self .distance_from_border_2 = 0
6565 self .steps = 0
66+ self .episodes = 0
6667 self .progress_at_beginning_of_race = 0
68+ self .prev_closest_waypoint_index = 0
69+ self .closest_waypoint_index = 0
6770
6871 # actions -> steering angle, throttle
6972 self .action_space = spaces .Box (low = np .array ([- 1 , 0 ]), high = np .array ([+ 1 , + 1 ]), dtype = np .float32 )
@@ -84,11 +87,12 @@ def __init__(self):
8487 rospy .Subscriber ('/camera/zed/rgb/image_rect_color' , sensor_image , self .callback_image )
8588 self .world_name = rospy .get_param ('WORLD_NAME' )
8689 self .set_waypoints ()
90+ self .track_length = self .calculate_track_length ()
91+
8792 self .aws_region = rospy .get_param ('ROS_AWS_REGION' )
8893
8994 self .reward_in_episode = 0
9095 self .prev_progress = 0
91- self .steps = 0
9296
9397 def reset (self ):
9498 if node_type == SAGEMAKER_TRAINING_WORKER :
@@ -103,11 +107,20 @@ def reset(self):
103107 self .next_state = None
104108 self .image = None
105109 self .steps = 0
110+ self .episodes += 1
106111 self .prev_progress = 0
112+ self .total_progress = 0
113+ self .action_taken = 2 #straight
114+ self .prev_action = 2 #straight
115+ self .prev_closest_waypoint_index = 0 #always starts from first waypoint
116+ self .closest_waypoint_index = 0
107117
108118 # Reset car in Gazebo
109119 self .send_action (0 , 0 ) # set the throttle to 0
110120 self .racecar_reset ()
121+ self .steering_angle = 0.0
122+ self .throttle = 0.0
123+ self .action_taken = 2.0
111124
112125 self .infer_reward_state (0 , 0 )
113126 return self .next_state
@@ -138,6 +151,30 @@ def racecar_reset(self):
138151 elif self .world_name .startswith (HARD_TRACK_WORLD ):
139152 modelState .pose .position .x = 1.75
140153 modelState .pose .position .y = 0.6
154+
155+ def toQuaternion (pitch , roll , yaw ):
156+ cy = np .cos (yaw * 0.5 )
157+ sy = np .sin (yaw * 0.5 )
158+ cr = np .cos (roll * 0.5 )
159+ sr = np .sin (roll * 0.5 )
160+ cp = np .cos (pitch * 0.5 )
161+ sp = np .sin (pitch * 0.5 )
162+
163+ w = cy * cr * cp + sy * sr * sp
164+ x = cy * sr * cp - sy * cr * sp
165+ y = cy * cr * sp + sy * sr * cp
166+ z = sy * cr * cp - cy * sr * sp
167+ return [x , y , z , w ]
168+
169+ #clockwise
170+ quaternion = toQuaternion (roll = 0.0 , pitch = 0.0 , yaw = np .pi )
171+ #anti-clockwise
172+ quaternion = toQuaternion (roll = 0.0 , pitch = 0.0 , yaw = 0.0 )
173+ modelState .pose .orientation .x = quaternion [0 ]
174+ modelState .pose .orientation .y = quaternion [1 ]
175+ modelState .pose .orientation .z = quaternion [2 ]
176+ modelState .pose .orientation .w = quaternion [3 ]
177+
141178 else :
142179 raise ValueError ("Unknown simulation world: {}" .format (self .world_name ))
143180
@@ -203,32 +240,48 @@ def infer_reward_state(self, steering_angle, throttle):
203240 # Car environment spits out BGR images by default. Converting to the
204241 # image to RGB.
205242 image = Image .frombytes ('RGB' , (self .image .width , self .image .height ),
206- self .image .data , 'raw' , 'BGR ' , 0 , 1 )
243+ self .image .data , 'raw' , 'RGB ' , 0 , 1 )
207244 # resize image ans perform anti-aliasing
208245 image = image .resize (TRAINING_IMAGE_SIZE , resample = 2 ).convert ("RGB" )
209246 state = np .array (image )
210247
211- on_track = self .on_track
212- total_progress = self .progress - self .progress_at_beginning_of_race
213- done = False
214-
215- self .prev_progress = total_progress
248+
249+ #total_progress = self.progress - self.progress_at_beginning_of_race
250+ #self.prev_progress = total_progress
251+
252+ # calculate the closest way point
253+ self .closest_waypoint_index = self .get_closest_waypoint ()
254+ # calculate the current progress with respect to the way points
255+ current_progress = self .calculate_current_progress (self .closest_waypoint_index , self .prev_closest_waypoint_index )
256+ self .total_progress = current_progress + self .prev_progress
257+ # re-assign the prev progress and way point variables
258+ self .prev_progress = self .total_progress
259+ self .prev_closest_waypoint_index = self .closest_waypoint_index
216260
261+ done = False
262+ on_track = self .on_track
217263 if on_track != 1 :
218264 reward = CRASHED
219265 done = True
220- elif total_progress >= FINISH_LINE : # reached max waypoints
221- print ("Congratulations! You finished the race!" )
222- if self .steps == 0 :
223- reward = 0.0
224- done = False
225- else :
226- reward = FINISHED / self .steps
227- done = True
266+ # elif total_progress >= FINISH_LINE: # reached max waypoints
267+ # print("Congratulations! You finished the race!")
268+ # if self.steps == 0:
269+ # reward = 0.0
270+ # done = False
271+ # else:
272+ # reward = FINISHED / self.steps
273+ # done = True
228274 else :
229275 reward = self .reward_function (on_track , self .x , self .y , self .distance_from_center , self .yaw ,
230- total_progress , self .steps , throttle , steering_angle , self .road_width ,
276+ self . total_progress , self .steps , throttle , steering_angle , self .road_width ,
231277 list (self .waypoints ), self .get_closest_waypoint ())
278+
279+ reward += 0.5 #reward bonus for surviving
280+
281+ #smooth
282+ #if self.action_taken == self.prev_action:
283+ # reward += 0.5
284+ self .prev_action = self .action_taken
232285
233286 print ('Step No=%.2f' % self .steps ,
234287 'Step Reward=%.2f' % reward )
@@ -237,6 +290,25 @@ def infer_reward_state(self, steering_angle, throttle):
237290 self .reward = reward
238291 self .done = done
239292 self .next_state = state
293+
294+ # Trace logs to help us debug and visualize the training runs
295+ stdout_ = 'SIM_TRACE_LOG:%d,%d,%.4f,%.4f,%.4f,%.2f,%.2f,%d,%.4f,%.4f,%d,%s,%s,%.4f,%d,%d,%.2f,%s\n ' % (
296+ self .episodes , self .steps , self .x , self .y ,
297+ self .yaw ,
298+ self .steering_angle ,
299+ self .throttle ,
300+ self .action_taken ,
301+ self .reward ,
302+ self .total_progress ,
303+ 0 , #self.get_waypoint_action(), #the expert action at the next waypoint
304+ self .done ,
305+ self .on_track ,
306+ current_progress ,
307+ 0 , #self.initidxWayPoint, #starting waypoint for an episode
308+ self .closest_waypoint_index ,
309+ self .track_length ,
310+ time .time ())
311+ print (stdout_ )
240312
241313 def send_reward_to_cloudwatch (self , reward ):
242314 session = boto3 .session .Session ()
@@ -317,34 +389,188 @@ def get_closest_waypoint(self):
317389 index = index + 1
318390 return res
319391
392+ def calculate_current_progress (self , closest_waypoint_index , prev_closest_waypoint_index ):
393+ current_progress = 0.0
394+
395+ # calculate distance in meters
396+ coor1 = self .waypoints [closest_waypoint_index ]
397+ coor2 = self .waypoints [prev_closest_waypoint_index ]
398+ current_progress = math .sqrt ((coor1 [0 ] - coor2 [0 ]) * (coor1 [0 ] - coor2 [0 ]) + (coor1 [1 ] - coor2 [1 ]) * (coor1 [1 ] - coor2 [1 ]))
399+
400+ # convert to ratio and then percentage
401+ current_progress /= self .track_length
402+ current_progress *= 100.0
403+
404+ return current_progress
405+
406+ def calculate_track_length (self ):
407+ track_length = 0.0
408+ prev_row = self .waypoints [0 ]
409+ for row in self .waypoints [1 :]:
410+ track_length += math .sqrt ((row [0 ] - prev_row [0 ]) * (row [0 ] - prev_row [0 ]) + (row [1 ] - prev_row [1 ]) * (row [1 ] - prev_row [1 ]))
411+ prev_row = row
412+
413+ if track_length == 0.0 :
414+ print ('ERROR: Track length is zero.' )
415+ raise
416+
417+ return track_length
418+
320419class DeepRacerDiscreteEnv (DeepRacerEnv ):
321420 def __init__ (self ):
322421 DeepRacerEnv .__init__ (self )
323422
324- # actions -> straight, left, right
325- self .action_space = spaces .Discrete (5 )
423+ self .action_space = spaces .Discrete (6 )
326424
327425 def step (self , action ):
328426
329427 # Convert discrete to continuous
428+ throttle = 1.0
429+ throttle_multiplier = 0.8
430+ throttle = throttle * throttle_multiplier
431+ steering_angle = 0.8
432+
433+ self .throttle , self .steering_angle = self .default_6_actions (throttle , steering_angle , action )
434+
435+ self .action_taken = action
436+
437+ continous_action = [self .steering_angle , self .throttle ]
438+
439+ return super ().step (continous_action )
440+
441+ def default_6_actions (self , throttle , steering_angle , action ):
330442 if action == 0 : # move left
331443 steering_angle = 0.8
332- throttle = 0.3
333444 elif action == 1 : # move right
334- steering_angle = - 0.8 # -1 #-0.5 #-1
335- throttle = 0.3
445+ steering_angle = - 0.8
336446 elif action == 2 : # straight
337447 steering_angle = 0
338- throttle = 0.3
339- elif action == 3 : # move left
340- steering_angle = 0.4
341- throttle = 0.3
342- elif action == 4 : # move right
343- steering_angle = - 0.4 # -1 #-0.5 #-1
344- throttle = 0.3
448+ elif action == 3 : # move slight left
449+ steering_angle = 0.2
450+ elif action == 4 : # move slight right
451+ steering_angle = - 0.2
452+ elif action == 5 : # slow straight
453+ steering_angle = 0
454+ throttle = throttle / 2
345455 else : # should not be here
346456 raise ValueError ("Invalid action" )
347-
348- continous_action = [steering_angle , throttle ]
349-
350- return super ().step (continous_action )
457+
458+ return throttle , steering_angle
459+
460+ def two_steering_one_throttle_5_states (self ,throttle_ , steering_angle_ , action ):
461+ if action == 0 : # move left
462+ steering_angle = 1 * steering_angle_
463+ throttle = throttle_
464+ elif action == 1 : # move right
465+ steering_angle = - 1 * steering_angle_
466+ throttle = throttle_
467+ elif action == 2 : # move left
468+ steering_angle = 0.5 * steering_angle_
469+ throttle = throttle_
470+ elif action == 3 : # move right
471+ steering_angle = - 0.5 * steering_angle_
472+ throttle = throttle_
473+ elif action == 4 : # straight
474+ steering_angle = 0
475+ throttle = throttle_
476+
477+ else : # should not be here
478+ raise ValueError ("Invalid action" )
479+
480+ return throttle , steering_angle
481+
482+
483+ def two_steering_two_throttle_10_states (self ,throttle_ , steering_angle_ , action ):
484+ if action == 0 : # move left
485+ steering_angle = 1 * steering_angle_
486+ throttle = throttle_
487+ elif action == 1 : # move right
488+ steering_angle = - 1 * steering_angle_
489+ throttle = throttle_
490+ elif action == 2 : # move left
491+ steering_angle = 0.5 * steering_angle_
492+ throttle = throttle_
493+ elif action == 3 : # move right
494+ steering_angle = - 0.5 * steering_angle_
495+ throttle = throttle_
496+ elif action == 4 : # straight
497+ steering_angle = 0
498+ throttle = throttle_
499+ elif action == 5 : # move left
500+ steering_angle = 1 * steering_angle_
501+ throttle = throttle_ * 0.5
502+ elif action == 6 : # move right
503+ steering_angle = - 1 * steering_angle_
504+ throttle = throttle_ * 0.5
505+ elif action == 7 : # move left
506+ steering_angle = 0.5 * steering_angle_
507+ throttle = throttle_ * 0.5
508+ elif action == 8 : # move right
509+ steering_angle = - 0.5 * steering_angle_
510+ throttle = throttle_ * 0.5
511+ elif action == 9 : # straight
512+ steering_angle = 0
513+ throttle = throttle_ * 0.5
514+
515+ else : # should not be here
516+ raise ValueError ("Invalid action" )
517+
518+ return throttle , steering_angle
519+
520+
521+ def two_steering_three_throttle_15_states (self ,throttle_ , steering_angle_ , action ):
522+
523+ # Convert discrete to continuous
524+ if action == 0 : # move left
525+ steering_angle = steering_angle_
526+ throttle = throttle_
527+ elif action == 1 : # move right
528+ steering_angle = - 1 * steering_angle_
529+ throttle = throttle_
530+ elif action == 2 : # move left
531+ steering_angle = 0.5 * steering_angle_
532+ throttle = throttle_
533+ elif action == 3 : # move right
534+ steering_angle = - 0.5 * steering_angle_
535+ throttle = throttle_
536+ elif action == 4 : # straight
537+ steering_angle = 0
538+ throttle = throttle_
539+
540+
541+ elif action == 5 : # move left
542+ steering_angle = steering_angle_
543+ throttle = 0.5 * throttle_
544+ elif action == 6 : # move right
545+ steering_angle = - 1 * steering_angle_
546+ throttle = 0.5 * throttle_
547+ elif action == 7 : # move left
548+ steering_angle = 0.5 * steering_angle_
549+ throttle = 0.5 * throttle_
550+ elif action == 8 : # move right
551+ steering_angle = - 0.5 * steering_angle_
552+ throttle = 0.5 * throttle_
553+ elif action == 9 : # slow straight
554+ steering_angle = 0
555+ throttle = throttle_ * 0.5
556+
557+ elif action == 10 : # move left
558+ steering_angle = 1 * steering_angle_
559+ throttle = throttle_ * 2.0
560+ elif action == 11 : # move right
561+ steering_angle = - 1 * steering_angle_
562+ throttle = throttle_ * 2.0
563+ elif action == 12 : # move left
564+ steering_angle = 0.5 * steering_angle_
565+ throttle = throttle_ * 2.0
566+ elif action == 13 : # move right
567+ steering_angle = - 0.5 * steering_angle_
568+ throttle = throttle_ * 2.0
569+ elif action == 14 : # fast straight
570+ steering_angle = 0
571+ throttle = throttle_ * 2.0
572+
573+ else : # should not be here
574+ raise ValueError ("Invalid action" )
575+
576+ return throttle , steering_angle
0 commit comments