3
3
class CartPoleConfigModule ():
4
4
# parameters
5
5
ENV_NAME = "CartPole-v0"
6
+ PLANNER_TYPE = "Const"
6
7
TYPE = "Nonlinear"
7
8
TASK_HORIZON = 500
8
9
PRED_LEN = 50
9
10
STATE_SIZE = 4
10
11
INPUT_SIZE = 1
11
12
DT = 0.02
12
13
# cost parameters
13
- R = np .diag ([1. ]) # 0.01 is worked for MPPI and CEM and MPPIWilliams
14
+ R = np .diag ([0.01 ]) # 0.01 is worked for MPPI and CEM and MPPIWilliams
14
15
# 1. is worked for iLQR
15
- Terminal_Weight = 1.
16
+ TERMINAL_WEIGHT = 1.
16
17
Q = None
17
18
Sf = None
18
19
# bounds
@@ -23,6 +24,7 @@ class CartPoleConfigModule():
23
24
MC = 1.
24
25
L = 0.5
25
26
G = 9.81
27
+ CART_SIZE = (0.15 , 0.1 )
26
28
27
29
def __init__ (self ):
28
30
"""
@@ -76,6 +78,7 @@ def __init__(self):
76
78
@staticmethod
77
79
def input_cost_fn (u ):
78
80
""" input cost functions
81
+
79
82
Args:
80
83
u (numpy.ndarray): input, shape(pred_len, input_size)
81
84
or shape(pop_size, pred_len, input_size)
@@ -88,6 +91,7 @@ def input_cost_fn(u):
88
91
@staticmethod
89
92
def state_cost_fn (x , g_x ):
90
93
""" state cost function
94
+
91
95
Args:
92
96
x (numpy.ndarray): state, shape(pred_len, state_size)
93
97
or shape(pop_size, pred_len, state_size)
@@ -118,6 +122,7 @@ def state_cost_fn(x, g_x):
118
122
@staticmethod
119
123
def terminal_state_cost_fn (terminal_x , terminal_g_x ):
120
124
"""
125
+
121
126
Args:
122
127
terminal_x (numpy.ndarray): terminal state,
123
128
shape(state_size, ) or shape(pop_size, state_size)
@@ -133,13 +138,13 @@ def terminal_state_cost_fn(terminal_x, terminal_g_x):
133
138
+ 12. * ((np .cos (terminal_x [:, 2 ]) + 1. )** 2 ) \
134
139
+ 0.1 * (terminal_x [:, 1 ]** 2 ) \
135
140
+ 0.1 * (terminal_x [:, 3 ]** 2 ))[:, np .newaxis ] \
136
- * CartPoleConfigModule .Terminal_Weight
141
+ * CartPoleConfigModule .TERMINAL_WEIGHT
137
142
138
143
return (6. * (terminal_x [0 ]** 2 ) \
139
144
+ 12. * ((np .cos (terminal_x [2 ]) + 1. )** 2 ) \
140
145
+ 0.1 * (terminal_x [1 ]** 2 ) \
141
146
+ 0.1 * (terminal_x [3 ]** 2 )) \
142
- * CartPoleConfigModule .Terminal_Weight
147
+ * CartPoleConfigModule .TERMINAL_WEIGHT
143
148
144
149
@staticmethod
145
150
def gradient_cost_fn_with_state (x , g_x , terminal = False ):
@@ -168,7 +173,7 @@ def gradient_cost_fn_with_state(x, g_x, terminal=False):
168
173
cost_dx3 = 0.2 * x [3 ]
169
174
cost_dx = np .array ([[cost_dx0 , cost_dx1 , cost_dx2 , cost_dx3 ]])
170
175
171
- return cost_dx * CartPoleConfigModule .Terminal_Weight
176
+ return cost_dx * CartPoleConfigModule .TERMINAL_WEIGHT
172
177
173
178
@staticmethod
174
179
def gradient_cost_fn_with_input (x , u ):
@@ -177,7 +182,6 @@ def gradient_cost_fn_with_input(x, u):
177
182
Args:
178
183
x (numpy.ndarray): state, shape(pred_len, state_size)
179
184
u (numpy.ndarray): goal state, shape(pred_len, input_size)
180
-
181
185
Returns:
182
186
l_u (numpy.ndarray): gradient of cost, shape(pred_len, input_size)
183
187
"""
@@ -190,7 +194,6 @@ def hessian_cost_fn_with_state(x, g_x, terminal=False):
190
194
Args:
191
195
x (numpy.ndarray): state, shape(pred_len, state_size)
192
196
g_x (numpy.ndarray): goal state, shape(pred_len, state_size)
193
-
194
197
Returns:
195
198
l_xx (numpy.ndarray): gradient of cost,
196
199
shape(pred_len, state_size, state_size) or
@@ -220,7 +223,7 @@ def hessian_cost_fn_with_state(x, g_x, terminal=False):
220
223
* - np .cos (x [2 ])
221
224
hessian [3 , 3 ] = 0.2
222
225
223
- return hessian [np .newaxis , :, :] * CartPoleConfigModule .Terminal_Weight
226
+ return hessian [np .newaxis , :, :] * CartPoleConfigModule .TERMINAL_WEIGHT
224
227
225
228
@staticmethod
226
229
def hessian_cost_fn_with_input (x , u ):
@@ -229,7 +232,6 @@ def hessian_cost_fn_with_input(x, u):
229
232
Args:
230
233
x (numpy.ndarray): state, shape(pred_len, state_size)
231
234
u (numpy.ndarray): goal state, shape(pred_len, input_size)
232
-
233
235
Returns:
234
236
l_uu (numpy.ndarray): gradient of cost,
235
237
shape(pred_len, input_size, input_size)
@@ -245,7 +247,6 @@ def hessian_cost_fn_with_input_state(x, u):
245
247
Args:
246
248
x (numpy.ndarray): state, shape(pred_len, state_size)
247
249
u (numpy.ndarray): goal state, shape(pred_len, input_size)
248
-
249
250
Returns:
250
251
l_ux (numpy.ndarray): gradient of cost ,
251
252
shape(pred_len, input_size, state_size)
0 commit comments