Skip to content

Commit 2ae1c0c

Browse files
authored
Merge pull request aws#661 from bbalaji-ucsd/master
Update the DeepRacer notebook to have the updated URDF model and fix the progress calculation bug.
2 parents a8bacfd + 331a937 commit 2ae1c0c

File tree

2 files changed

+261
-35
lines changed

2 files changed

+261
-35
lines changed

reinforcement_learning/rl_deepracer_robomaker_coach_gazebo/rl_deepracer_coach_robomaker.ipynb

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -506,11 +506,11 @@
506506
"metadata": {},
507507
"outputs": [],
508508
"source": [
509-
"simulation_application_bundle_location = \"https://s3-us-west-2.amazonaws.com/robomaker-applications-us-west-2-11d8d0439f6a/deep-racer/deep-racer-1.0.80.0.1.0.106.0/simulation_ws.tar.gz\"\n",
509+
"simulation_application_bundle_location = \"https://s3.amazonaws.com/deepracer-managed-resources/deepracer-github-simapp.tar.gz\"\n",
510510
"\n",
511511
"!wget {simulation_application_bundle_location}\n",
512-
"!aws s3 cp simulation_ws.tar.gz s3://{s3_bucket}/{bundle_s3_key}\n",
513-
"!rm simulation_ws.tar.gz"
512+
"!aws s3 cp deepracer-github-simapp.tar.gz s3://{s3_bucket}/{bundle_s3_key}\n",
513+
"!rm deepracer-github-simapp.tar.gz"
514514
]
515515
},
516516
{

reinforcement_learning/rl_deepracer_robomaker_coach_gazebo/src/robomaker/environments/deepracer_env.py

Lines changed: 258 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,10 @@ def __init__(self):
6363
self.distance_from_border_1 = 0
6464
self.distance_from_border_2 = 0
6565
self.steps = 0
66+
self.episodes = 0
6667
self.progress_at_beginning_of_race = 0
68+
self.prev_closest_waypoint_index = 0
69+
self.closest_waypoint_index = 0
6770

6871
# actions -> steering angle, throttle
6972
self.action_space = spaces.Box(low=np.array([-1, 0]), high=np.array([+1, +1]), dtype=np.float32)
@@ -84,11 +87,12 @@ def __init__(self):
8487
rospy.Subscriber('/camera/zed/rgb/image_rect_color', sensor_image, self.callback_image)
8588
self.world_name = rospy.get_param('WORLD_NAME')
8689
self.set_waypoints()
90+
self.track_length = self.calculate_track_length()
91+
8792
self.aws_region = rospy.get_param('ROS_AWS_REGION')
8893

8994
self.reward_in_episode = 0
9095
self.prev_progress = 0
91-
self.steps = 0
9296

9397
def reset(self):
9498
if node_type == SAGEMAKER_TRAINING_WORKER:
@@ -103,11 +107,20 @@ def reset(self):
103107
self.next_state = None
104108
self.image = None
105109
self.steps = 0
110+
self.episodes += 1
106111
self.prev_progress = 0
112+
self.total_progress = 0
113+
self.action_taken = 2 #straight
114+
self.prev_action = 2 #straight
115+
self.prev_closest_waypoint_index = 0 #always starts from first waypoint
116+
self.closest_waypoint_index = 0
107117

108118
# Reset car in Gazebo
109119
self.send_action(0, 0) # set the throttle to 0
110120
self.racecar_reset()
121+
self.steering_angle = 0.0
122+
self.throttle = 0.0
123+
self.action_taken = 2.0
111124

112125
self.infer_reward_state(0, 0)
113126
return self.next_state
@@ -138,6 +151,30 @@ def racecar_reset(self):
138151
elif self.world_name.startswith(HARD_TRACK_WORLD):
139152
modelState.pose.position.x = 1.75
140153
modelState.pose.position.y = 0.6
154+
155+
def toQuaternion(pitch, roll, yaw):
156+
cy = np.cos(yaw * 0.5)
157+
sy = np.sin(yaw * 0.5)
158+
cr = np.cos(roll * 0.5)
159+
sr = np.sin(roll * 0.5)
160+
cp = np.cos(pitch * 0.5)
161+
sp = np.sin(pitch * 0.5)
162+
163+
w = cy * cr * cp + sy * sr * sp
164+
x = cy * sr * cp - sy * cr * sp
165+
y = cy * cr * sp + sy * sr * cp
166+
z = sy * cr * cp - cy * sr * sp
167+
return [x, y, z, w]
168+
169+
#clockwise
170+
quaternion = toQuaternion(roll=0.0, pitch=0.0, yaw=np.pi)
171+
#anti-clockwise
172+
quaternion = toQuaternion(roll=0.0, pitch=0.0, yaw=0.0)
173+
modelState.pose.orientation.x = quaternion[0]
174+
modelState.pose.orientation.y = quaternion[1]
175+
modelState.pose.orientation.z = quaternion[2]
176+
modelState.pose.orientation.w = quaternion[3]
177+
141178
else:
142179
raise ValueError("Unknown simulation world: {}".format(self.world_name))
143180

@@ -203,32 +240,48 @@ def infer_reward_state(self, steering_angle, throttle):
203240
# Car environment spits out BGR images by default. Converting to the
204241
# image to RGB.
205242
image = Image.frombytes('RGB', (self.image.width, self.image.height),
206-
self.image.data, 'raw', 'BGR', 0, 1)
243+
self.image.data, 'raw', 'RGB', 0, 1)
207244
# resize image ans perform anti-aliasing
208245
image = image.resize(TRAINING_IMAGE_SIZE, resample=2).convert("RGB")
209246
state = np.array(image)
210247

211-
on_track = self.on_track
212-
total_progress = self.progress - self.progress_at_beginning_of_race
213-
done = False
214-
215-
self.prev_progress = total_progress
248+
249+
#total_progress = self.progress - self.progress_at_beginning_of_race
250+
#self.prev_progress = total_progress
251+
252+
# calculate the closest way point
253+
self.closest_waypoint_index = self.get_closest_waypoint()
254+
# calculate the current progress with respect to the way points
255+
current_progress = self.calculate_current_progress(self.closest_waypoint_index, self.prev_closest_waypoint_index)
256+
self.total_progress = current_progress + self.prev_progress
257+
# re-assign the prev progress and way point variables
258+
self.prev_progress = self.total_progress
259+
self.prev_closest_waypoint_index = self.closest_waypoint_index
216260

261+
done = False
262+
on_track = self.on_track
217263
if on_track != 1:
218264
reward = CRASHED
219265
done = True
220-
elif total_progress >= FINISH_LINE: # reached max waypoints
221-
print("Congratulations! You finished the race!")
222-
if self.steps == 0:
223-
reward = 0.0
224-
done = False
225-
else:
226-
reward = FINISHED / self.steps
227-
done = True
266+
#elif total_progress >= FINISH_LINE: # reached max waypoints
267+
# print("Congratulations! You finished the race!")
268+
# if self.steps == 0:
269+
# reward = 0.0
270+
# done = False
271+
# else:
272+
# reward = FINISHED / self.steps
273+
# done = True
228274
else:
229275
reward = self.reward_function(on_track, self.x, self.y, self.distance_from_center, self.yaw,
230-
total_progress, self.steps, throttle, steering_angle, self.road_width,
276+
self.total_progress, self.steps, throttle, steering_angle, self.road_width,
231277
list(self.waypoints), self.get_closest_waypoint())
278+
279+
reward += 0.5 #reward bonus for surviving
280+
281+
#smooth
282+
#if self.action_taken == self.prev_action:
283+
# reward += 0.5
284+
self.prev_action = self.action_taken
232285

233286
print('Step No=%.2f' % self.steps,
234287
'Step Reward=%.2f' % reward)
@@ -237,6 +290,25 @@ def infer_reward_state(self, steering_angle, throttle):
237290
self.reward = reward
238291
self.done = done
239292
self.next_state = state
293+
294+
# Trace logs to help us debug and visualize the training runs
295+
stdout_ = 'SIM_TRACE_LOG:%d,%d,%.4f,%.4f,%.4f,%.2f,%.2f,%d,%.4f,%.4f,%d,%s,%s,%.4f,%d,%d,%.2f,%s\n' % (
296+
self.episodes, self.steps, self.x, self.y,
297+
self.yaw,
298+
self.steering_angle,
299+
self.throttle,
300+
self.action_taken,
301+
self.reward,
302+
self.total_progress,
303+
0, #self.get_waypoint_action(), #the expert action at the next waypoint
304+
self.done,
305+
self.on_track,
306+
current_progress,
307+
0, #self.initidxWayPoint, #starting waypoint for an episode
308+
self.closest_waypoint_index,
309+
self.track_length,
310+
time.time())
311+
print(stdout_)
240312

241313
def send_reward_to_cloudwatch(self, reward):
242314
session = boto3.session.Session()
@@ -317,34 +389,188 @@ def get_closest_waypoint(self):
317389
index = index + 1
318390
return res
319391

392+
def calculate_current_progress(self, closest_waypoint_index, prev_closest_waypoint_index):
393+
current_progress = 0.0
394+
395+
# calculate distance in meters
396+
coor1 = self.waypoints[closest_waypoint_index]
397+
coor2 = self.waypoints[prev_closest_waypoint_index]
398+
current_progress = math.sqrt((coor1[0] - coor2[0]) *(coor1[0] - coor2[0]) + (coor1[1] - coor2[1]) * (coor1[1] - coor2[1]))
399+
400+
# convert to ratio and then percentage
401+
current_progress /= self.track_length
402+
current_progress *= 100.0
403+
404+
return current_progress
405+
406+
def calculate_track_length(self):
407+
track_length = 0.0
408+
prev_row = self.waypoints[0]
409+
for row in self.waypoints[1:]:
410+
track_length += math.sqrt((row[0] - prev_row[0]) * (row[0] - prev_row[0]) + (row[1] - prev_row[1]) * (row[1] - prev_row[1]))
411+
prev_row = row
412+
413+
if track_length == 0.0:
414+
print('ERROR: Track length is zero.')
415+
raise
416+
417+
return track_length
418+
320419
class DeepRacerDiscreteEnv(DeepRacerEnv):
321420
def __init__(self):
322421
DeepRacerEnv.__init__(self)
323422

324-
# actions -> straight, left, right
325-
self.action_space = spaces.Discrete(5)
423+
self.action_space = spaces.Discrete(6)
326424

327425
def step(self, action):
328426

329427
# Convert discrete to continuous
428+
throttle = 1.0
429+
throttle_multiplier = 0.8
430+
throttle = throttle*throttle_multiplier
431+
steering_angle = 0.8
432+
433+
self.throttle, self.steering_angle = self.default_6_actions(throttle, steering_angle, action)
434+
435+
self.action_taken = action
436+
437+
continous_action = [self.steering_angle, self.throttle]
438+
439+
return super().step(continous_action)
440+
441+
def default_6_actions(self, throttle, steering_angle, action):
330442
if action == 0: # move left
331443
steering_angle = 0.8
332-
throttle = 0.3
333444
elif action == 1: # move right
334-
steering_angle = -0.8 # -1 #-0.5 #-1
335-
throttle = 0.3
445+
steering_angle = -0.8
336446
elif action == 2: # straight
337447
steering_angle = 0
338-
throttle = 0.3
339-
elif action == 3: # move left
340-
steering_angle = 0.4
341-
throttle = 0.3
342-
elif action == 4: # move right
343-
steering_angle = -0.4 # -1 #-0.5 #-1
344-
throttle = 0.3
448+
elif action == 3: # move slight left
449+
steering_angle = 0.2
450+
elif action == 4: # move slight right
451+
steering_angle = -0.2
452+
elif action == 5: # slow straight
453+
steering_angle = 0
454+
throttle = throttle/2
345455
else: # should not be here
346456
raise ValueError("Invalid action")
347-
348-
continous_action = [steering_angle, throttle]
349-
350-
return super().step(continous_action)
457+
458+
return throttle, steering_angle
459+
460+
def two_steering_one_throttle_5_states(self,throttle_, steering_angle_, action):
461+
if action == 0: # move left
462+
steering_angle = 1 * steering_angle_
463+
throttle = throttle_
464+
elif action == 1: # move right
465+
steering_angle = -1 * steering_angle_
466+
throttle = throttle_
467+
elif action == 2: # move left
468+
steering_angle = 0.5 * steering_angle_
469+
throttle = throttle_
470+
elif action == 3: # move right
471+
steering_angle = -0.5 * steering_angle_
472+
throttle = throttle_
473+
elif action == 4: # straight
474+
steering_angle = 0
475+
throttle = throttle_
476+
477+
else: # should not be here
478+
raise ValueError("Invalid action")
479+
480+
return throttle, steering_angle
481+
482+
483+
def two_steering_two_throttle_10_states(self,throttle_, steering_angle_, action):
484+
if action == 0: # move left
485+
steering_angle = 1 * steering_angle_
486+
throttle = throttle_
487+
elif action == 1: # move right
488+
steering_angle = -1 * steering_angle_
489+
throttle = throttle_
490+
elif action == 2: # move left
491+
steering_angle = 0.5 * steering_angle_
492+
throttle = throttle_
493+
elif action == 3: # move right
494+
steering_angle = -0.5 * steering_angle_
495+
throttle = throttle_
496+
elif action == 4: # straight
497+
steering_angle = 0
498+
throttle = throttle_
499+
elif action == 5: # move left
500+
steering_angle = 1 * steering_angle_
501+
throttle = throttle_ * 0.5
502+
elif action == 6: # move right
503+
steering_angle = -1 * steering_angle_
504+
throttle = throttle_ * 0.5
505+
elif action == 7: # move left
506+
steering_angle = 0.5 * steering_angle_
507+
throttle = throttle_ * 0.5
508+
elif action == 8: # move right
509+
steering_angle = -0.5 * steering_angle_
510+
throttle = throttle_ * 0.5
511+
elif action == 9: # straight
512+
steering_angle = 0
513+
throttle = throttle_ * 0.5
514+
515+
else: # should not be here
516+
raise ValueError("Invalid action")
517+
518+
return throttle, steering_angle
519+
520+
521+
def two_steering_three_throttle_15_states(self,throttle_, steering_angle_, action):
522+
523+
# Convert discrete to continuous
524+
if action == 0: # move left
525+
steering_angle = steering_angle_
526+
throttle = throttle_
527+
elif action == 1: # move right
528+
steering_angle = -1 * steering_angle_
529+
throttle = throttle_
530+
elif action == 2: # move left
531+
steering_angle = 0.5 * steering_angle_
532+
throttle = throttle_
533+
elif action == 3: # move right
534+
steering_angle = -0.5 * steering_angle_
535+
throttle = throttle_
536+
elif action == 4: # straight
537+
steering_angle = 0
538+
throttle = throttle_
539+
540+
541+
elif action == 5: # move left
542+
steering_angle = steering_angle_
543+
throttle = 0.5 * throttle_
544+
elif action == 6: # move right
545+
steering_angle = -1 * steering_angle_
546+
throttle = 0.5 * throttle_
547+
elif action == 7: # move left
548+
steering_angle = 0.5 * steering_angle_
549+
throttle = 0.5 * throttle_
550+
elif action == 8: # move right
551+
steering_angle = -0.5 * steering_angle_
552+
throttle = 0.5 * throttle_
553+
elif action == 9: # slow straight
554+
steering_angle = 0
555+
throttle = throttle_ *0.5
556+
557+
elif action == 10: # move left
558+
steering_angle = 1 * steering_angle_
559+
throttle = throttle_ * 2.0
560+
elif action == 11: # move right
561+
steering_angle = -1 * steering_angle_
562+
throttle = throttle_ * 2.0
563+
elif action == 12: # move left
564+
steering_angle = 0.5 * steering_angle_
565+
throttle = throttle_ * 2.0
566+
elif action == 13: # move right
567+
steering_angle = -0.5 * steering_angle_
568+
throttle = throttle_ * 2.0
569+
elif action == 14: # fast straight
570+
steering_angle = 0
571+
throttle = throttle_ * 2.0
572+
573+
else: # should not be here
574+
raise ValueError("Invalid action")
575+
576+
return throttle, steering_angle

0 commit comments

Comments
 (0)