liming-ai
diff --git a/‎README.md‎
Lines changed: 6 additions & 0 deletions b/‎README.md‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎train/reward_control.py‎
Lines changed: 3 additions & 3 deletions b/‎train/reward_control.py‎
Lines changed: 3 additions & 3 deletions
@@ -77,6 +77,12 @@ else:
 ```
 #### Step 3: Apply both diffusion training loss and reward loss:
 ```python
+# reward model inference
+if args.task_name == 'canny':
+ outputs = reward_model(image.to(accelerator.device), low_threshold, high_threshold)
+else:
+ outputs = reward_model(image.to(accelerator.device))
+
 # Determine which samples in the current batch need to calculate reward loss
 timestep_mask = (args.min_timestep_rewarding <= timesteps.reshape(-1, 1)) & (timesteps.reshape(-1, 1) <= args.max_timestep_rewarding)
 
 
@@ -1428,14 +1428,14 @@ def load_model_hook(models, input_dir):
  """
  Rewarding ControlNet
  """
- # compute the original image
+ # Predict the single-step denoised latents
  pred_original_sample = [
  noise_scheduler.step(noise, t, noisy_latent).pred_original_sample.to(weight_dtype) \
  for (noise, t, noisy_latent) in zip(model_pred, timesteps, noisy_latents)
  ]
  pred_original_sample = torch.stack(pred_original_sample)
 
- # compute the original image
+ # Map the denoised latents into RGB images
  pred_original_sample = 1 / vae.config.scaling_factor * pred_original_sample
  image = vae.decode(pred_original_sample.to(weight_dtype)).sample
  image = (image / 2 + 0.5).clamp(0, 1)
@@ -1500,7 +1500,7 @@ def load_model_hook(models, input_dir):
 
  labels = [x.to(accelerator.device) for x in labels] if isinstance(labels, list) else labels.to(accelerator.device)
 
- # timestep-based filtering
+ # Determine which samples in the current batch need to calculate reward loss
  timestep_mask = (args.min_timestep_rewarding <= timesteps.reshape(-1, 1)) & (timesteps.reshape(-1, 1) <= args.max_timestep_rewarding)
 
  # calculate the reward loss