IBMer
diff --git a/‎backends/stable_diffusion/schedulers/scheduling_ddim.py‎
Lines changed: 12 additions & 4 deletions b/‎backends/stable_diffusion/schedulers/scheduling_ddim.py‎
Lines changed: 12 additions & 4 deletions
diff --git a/‎backends/stable_diffusion/stable_diffusion.py‎
Lines changed: 15 additions & 1 deletion b/‎backends/stable_diffusion/stable_diffusion.py‎
Lines changed: 15 additions & 1 deletion
diff --git a/‎backends/stable_diffusion/tests.py‎
Lines changed: 4 additions & 5 deletions b/‎backends/stable_diffusion/tests.py‎
Lines changed: 4 additions & 5 deletions
@@ -88,6 +88,7 @@ def __init__(
  clip_sample: bool = True,
  set_alpha_to_one: bool = True,
  tensor_format: str = "pt",
+ prediction_type: str = "epsilon"
  ):
  if trained_betas is not None:
  self.betas = np.asarray(trained_betas)
@@ -115,6 +116,7 @@ def __init__(
  self.clip_sample = clip_sample
  self.set_alpha_to_one = set_alpha_to_one
  self.tensor_format = tensor_format
+ self.prediction_type = prediction_type
 
  # At every step in ddim, we are looking into the previous alphas_cumprod
  # For the final step, there is no previous alphas_cumprod because we are already at 0
@@ -217,8 +219,14 @@ def step(
 
  # 3. compute predicted original sample from predicted noise also called
  # "predicted x_0" of formula (12) from https://arxiv.org/pdf/2010.02502.pdf
- pred_original_sample = (sample - beta_prod_t ** (0.5) * model_output) / alpha_prod_t ** (0.5)
-
+ if self.config.prediction_type == "epsilon":
+ pred_original_sample = (sample - beta_prod_t ** (0.5) * model_output) / alpha_prod_t ** (0.5)
+ pred_epsilon = model_output
+ elif self.config.prediction_type == "v_prediction":
+ pred_original_sample = (alpha_prod_t**0.5) * sample - (beta_prod_t**0.5) * model_output
+ pred_epsilon = (alpha_prod_t**0.5) * model_output + (beta_prod_t**0.5) * sample
+ else:
+ raise ValueError("Unknown prediction_type")
  # 4. Clip "predicted x_0"
  if self.config.clip_sample:
  pred_original_sample = self.clip(pred_original_sample, -1, 1)
@@ -230,10 +238,10 @@ def step(
 
  if use_clipped_model_output:
  # the model_output is always re-derived from the clipped x_0 in Glide
- model_output = (sample - alpha_prod_t ** (0.5) * pred_original_sample) / beta_prod_t ** (0.5)
+ pred_epsilon = (sample - alpha_prod_t ** (0.5) * pred_original_sample) / beta_prod_t ** (0.5)
 
  # 6. compute "direction pointing to x_t" of formula (12) from https://arxiv.org/pdf/2010.02502.pdf
- pred_sample_direction = (1 - alpha_prod_t_prev - std_dev_t**2) ** (0.5) * model_output
+ pred_sample_direction = (1 - alpha_prod_t_prev - std_dev_t**2) ** (0.5) * pred_epsilon
 
  # 7. compute x_t without "random noise" of formula (12) from https://arxiv.org/pdf/2010.02502.pdf
  prev_sample = alpha_prod_t_prev ** (0.5) * pred_original_sample + pred_sample_direction
 
@@ -111,7 +111,21 @@ def get_scheduler(name):
  set_alpha_to_one=False,
  # steps_offset= 1,
  trained_betas= None,
- tensor_format="np"
+ tensor_format="np",
+ )
+
+ if name == "ddim_v":
+ return DDIMScheduler(
+ beta_start=0.00085,
+ beta_end=0.012,
+ beta_schedule="scaled_linear",
+ clip_sample= False,
+ num_train_timesteps= 1000,
+ set_alpha_to_one=False,
+ # steps_offset= 1,
+ trained_betas= None,
+ tensor_format="np",
+ prediction_type="v_prediction"
  )
 
  if name == "lmsd":
 
@@ -287,15 +287,14 @@ def test_sd2_2():
 def test_sd2_4():
 
  img = sd.generate(
- prompt="A Tree" , 
+ prompt="a tree" , 
  img_height=512, 
  img_width=512, 
- seed=1, 
- num_steps=10,
+ seed=13, 
+ num_steps=30,
  tdict_path="/Volumes/ext_drive_1/sd_data_models/v2-1_768-nonema-pruned.tdict",
- batch_size=1,
  dtype="float32",
- scheduler='pndm',
+ scheduler='ddim_v',
  mode="txt2img" )
 
  # sd2_a_cat_111_test_sd2_3.png