Skip to content

Commit 24b0fce

Browse files
Do padding of audio embed in model for humo for more flexibility. (#9935)
1 parent 1ea8c54 commit 24b0fce

File tree

2 files changed

+3
-4
lines changed

2 files changed

+3
-4
lines changed

comfy/ldm/wan/model.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1551,6 +1551,9 @@ def forward_orig(
15511551
context_img_len = None
15521552

15531553
if audio_embed is not None:
1554+
if reference_latent is not None:
1555+
zero_audio_pad = torch.zeros(audio_embed.shape[0], reference_latent.shape[-3], *audio_embed.shape[2:], device=audio_embed.device, dtype=audio_embed.dtype)
1556+
audio_embed = torch.cat([audio_embed, zero_audio_pad], dim=1)
15541557
audio = self.audio_proj(audio_embed).permute(0, 3, 1, 2).flatten(2).transpose(1, 2)
15551558
else:
15561559
audio = None

comfy_extras/nodes_wan.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1095,10 +1095,6 @@ def execute(cls, positive, negative, vae, width, height, length, batch_size, ref
10951095
audio_emb = torch.stack([feat0, feat1, feat2, feat3, feat4], dim=2)[0] # [T, 5, 1280]
10961096
audio_emb, _ = get_audio_emb_window(audio_emb, length, frame0_idx=0)
10971097

1098-
# pad for ref latent
1099-
zero_audio_pad = torch.zeros(ref_latent.shape[2], *audio_emb.shape[1:], device=audio_emb.device, dtype=audio_emb.dtype)
1100-
audio_emb = torch.cat([audio_emb, zero_audio_pad], dim=0)
1101-
11021098
audio_emb = audio_emb.unsqueeze(0)
11031099
audio_emb_neg = torch.zeros_like(audio_emb)
11041100
positive = node_helpers.conditioning_set_values(positive, {"audio_embed": audio_emb})

0 commit comments

Comments
 (0)