Fix joint attention mask being applied on image-only self-attention

cursoragent · cursoragent · commit 01eaf44c8f78 · 2026-06-21T11:50:52.000Z
Only prepare and apply the joint attention mask when encoder_hidden_states
is present. SD3.5 dual-attention blocks pass joint_attention_kwargs (including
the text mask) to attn2 self-attention, which should ignore the mask.
diff --git a/src/diffusers/models/attention_processor.py b/src/diffusers/models/attention_processor.py
@@ -1513,7 +1513,10 @@ def __call__(
             value = torch.cat([value, encoder_hidden_states_value_proj], dim=2)
 
         if attention_mask is not None:
-            attention_mask = attn.prepare_joint_attention_mask(attention_mask, key.shape[2], key.dtype)
+            if encoder_hidden_states is not None:
+                attention_mask = attn.prepare_joint_attention_mask(attention_mask, key.shape[2], key.dtype)
+            else:
+                attention_mask = None
 
         hidden_states = F.scaled_dot_product_attention(
             query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_causal=False