Skip to content

Commit 89facbe

Browse files
quanvuongsoumith
authored andcommitted
replace model by policy (pytorch#154)
1 parent 140a8bf commit 89facbe

File tree

1 file changed

+10
-10
lines changed

1 file changed

+10
-10
lines changed

reinforcement_learning/reinforce.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -43,33 +43,33 @@ def forward(self, x):
4343
return F.softmax(action_scores)
4444

4545

46-
model = Policy()
47-
optimizer = optim.Adam(model.parameters(), lr=1e-2)
46+
policy = Policy()
47+
optimizer = optim.Adam(policy.parameters(), lr=1e-2)
4848

4949

5050
def select_action(state):
5151
state = torch.from_numpy(state).float().unsqueeze(0)
52-
probs = model(Variable(state))
52+
probs = policy(Variable(state))
5353
action = probs.multinomial()
54-
model.saved_actions.append(action)
54+
policy.saved_actions.append(action)
5555
return action.data
5656

5757

5858
def finish_episode():
5959
R = 0
6060
rewards = []
61-
for r in model.rewards[::-1]:
61+
for r in policy.rewards[::-1]:
6262
R = r + args.gamma * R
6363
rewards.insert(0, R)
6464
rewards = torch.Tensor(rewards)
6565
rewards = (rewards - rewards.mean()) / (rewards.std() + np.finfo(np.float32).eps)
66-
for action, r in zip(model.saved_actions, rewards):
66+
for action, r in zip(policy.saved_actions, rewards):
6767
action.reinforce(r)
6868
optimizer.zero_grad()
69-
autograd.backward(model.saved_actions, [None for _ in model.saved_actions])
69+
autograd.backward(policy.saved_actions, [None for _ in policy.saved_actions])
7070
optimizer.step()
71-
del model.rewards[:]
72-
del model.saved_actions[:]
71+
del policy.rewards[:]
72+
del policy.saved_actions[:]
7373

7474

7575
running_reward = 10
@@ -80,7 +80,7 @@ def finish_episode():
8080
state, reward, done, _ = env.step(action[0,0])
8181
if args.render:
8282
env.render()
83-
model.rewards.append(reward)
83+
policy.rewards.append(reward)
8484
if done:
8585
break
8686

0 commit comments

Comments
 (0)