alok
diff --git a/‎pg/main.py‎
Lines changed: 2 additions & 2 deletions b/‎pg/main.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎ppo/ppo.py‎
Lines changed: 1 addition & 1 deletion b/‎ppo/ppo.py‎
Lines changed: 1 addition & 1 deletion
@@ -69,12 +69,12 @@ def G(rewards, start=0, end=None):
  ]
 
  states = torch.stack(states)
- state_values = critic(states).reshape(-1)
+ state_values = critic(states).flatten()
 
  cumulative_returns = tensor(cumulative_returns)
  Adv = cumulative_returns - state_values
 
- log_probs = torch.stack(log_probs).reshape(-1)
+ log_probs = torch.stack(log_probs).flatten()
 
  loss = -(Adv @ log_probs) / len(rewards)
  if episode > 500 and loss.item() < -1000:
 
@@ -122,7 +122,7 @@ def train(model, old_model, data) -> float:
  [sum(discounted_rewards[t:]) for t, _ in enumerate(discounted_rewards)]
  )
 
- state_values = model.vf(states).reshape(-1)
+ state_values = model.vf(states).flatten()
 
  adv = cumulative_returns - state_values
  vf_loss = F.mse_loss(state_values, cumulative_returns)
Original file line number	Diff line number	Diff line change
`@@ -122,7 +122,7 @@ def train(model, old_model, data) -> float:`
`122`	`122`	`[sum(discounted_rewards[t:]) for t, _ in enumerate(discounted_rewards)]`
`123`	`123`	`)`
`124`	`124`
`125`		`- state_values = model.vf(states).reshape(-1)`
	`125`	`+ state_values = model.vf(states).flatten()`
`126`	`126`
`127`	`127`	`adv = cumulative_returns - state_values`
`128`	`128`	`vf_loss = F.mse_loss(state_values, cumulative_returns)`