Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/diffusers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -535,6 +535,7 @@
"LucyEditPipeline",
"Lumina2Pipeline",
"Lumina2Text2ImgPipeline",
"NewbiePipeline",
"LuminaPipeline",
"LuminaText2ImgPipeline",
"MarigoldDepthPipeline",
Expand Down Expand Up @@ -1235,6 +1236,7 @@
LucyEditPipeline,
Lumina2Pipeline,
Lumina2Text2ImgPipeline,
NewbiePipeline,
LuminaPipeline,
LuminaText2ImgPipeline,
MarigoldDepthPipeline,
Expand Down
2 changes: 2 additions & 0 deletions src/diffusers/loaders/single_file_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2966,6 +2966,8 @@ def convert_lumina2_to_diffusers(checkpoint, **kwargs):
"cap_embedder": "time_caption_embed.caption_embedder",
"t_embedder.mlp.0": "time_caption_embed.timestep_embedder.linear_1",
"t_embedder.mlp.2": "time_caption_embed.timestep_embedder.linear_2",
"clip_text_pooled_proj": "time_caption_embed.clip_text_pooled_proj",
"time_text_embed": "time_caption_embed.time_text_embed",
"attention": "attn",
".out.": ".to_out.0.",
"k_norm": "norm_k",
Expand Down
35 changes: 32 additions & 3 deletions src/diffusers/models/transformers/transformer_lumina2.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ def __init__(
self,
hidden_size: int = 4096,
cap_feat_dim: int = 2048,
pooled_projection_dim: Optional[int] = None,
frequency_embedding_size: int = 256,
norm_eps: float = 1e-5,
) -> None:
Expand All @@ -56,12 +57,30 @@ def __init__(
RMSNorm(cap_feat_dim, eps=norm_eps), nn.Linear(cap_feat_dim, hidden_size, bias=True)
)

if pooled_projection_dim is not None:
self.clip_text_pooled_proj = nn.Sequential(
RMSNorm(pooled_projection_dim, eps=norm_eps),
nn.Linear(pooled_projection_dim, pooled_projection_dim, bias=True),
)
self.time_text_embed = nn.Sequential(
nn.SiLU(),
nn.Linear(min(hidden_size, 1024) + pooled_projection_dim, min(hidden_size, 1024)),
)

def forward(
self, hidden_states: torch.Tensor, timestep: torch.Tensor, encoder_hidden_states: torch.Tensor
self,
hidden_states: torch.Tensor,
timestep: torch.Tensor,
encoder_hidden_states: torch.Tensor,
pooled_projections: Optional[torch.Tensor] = None,
) -> Tuple[torch.Tensor, torch.Tensor]:
timestep_proj = self.time_proj(timestep).type_as(hidden_states)
time_embed = self.timestep_embedder(timestep_proj)
caption_embed = self.caption_embedder(encoder_hidden_states)
if pooled_projections is not None:
pooled_projections = self.clip_text_pooled_proj(pooled_projections)
time_embed = torch.cat([time_embed, pooled_projections], dim=-1)
time_embed = self.time_text_embed(time_embed)
return time_embed, caption_embed


Expand Down Expand Up @@ -381,6 +400,7 @@ def __init__(
axes_dim_rope: Tuple[int, int, int] = (32, 32, 32),
axes_lens: Tuple[int, int, int] = (300, 512, 512),
cap_feat_dim: int = 1024,
pooled_projection_dim: Optional[int] = None,
) -> None:
super().__init__()
self.out_channels = out_channels or in_channels
Expand All @@ -393,7 +413,10 @@ def __init__(
self.x_embedder = nn.Linear(in_features=patch_size * patch_size * in_channels, out_features=hidden_size)

self.time_caption_embed = Lumina2CombinedTimestepCaptionEmbedding(
hidden_size=hidden_size, cap_feat_dim=cap_feat_dim, norm_eps=norm_eps
hidden_size=hidden_size,
cap_feat_dim=cap_feat_dim,
pooled_projection_dim=pooled_projection_dim,
norm_eps=norm_eps,
)

# 2. Noise and context refinement blocks
Expand Down Expand Up @@ -461,6 +484,7 @@ def forward(
timestep: torch.Tensor,
encoder_hidden_states: torch.Tensor,
encoder_attention_mask: torch.Tensor,
pooled_projections: Optional[torch.Tensor] = None,
attention_kwargs: Optional[Dict[str, Any]] = None,
return_dict: bool = True,
) -> Union[torch.Tensor, Transformer2DModelOutput]:
Expand All @@ -482,7 +506,12 @@ def forward(
# 1. Condition, positional & patch embedding
batch_size, _, height, width = hidden_states.shape

temb, encoder_hidden_states = self.time_caption_embed(hidden_states, timestep, encoder_hidden_states)
temb, encoder_hidden_states = self.time_caption_embed(
hidden_states,
timestep,
encoder_hidden_states,
pooled_projections=pooled_projections,
)

(
hidden_states,
Expand Down
2 changes: 2 additions & 0 deletions src/diffusers/pipelines/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,7 @@
]
_import_structure["lumina"] = ["LuminaPipeline", "LuminaText2ImgPipeline"]
_import_structure["lumina2"] = ["Lumina2Pipeline", "Lumina2Text2ImgPipeline"]
_import_structure["newbie"] = ["NewbiePipeline"]
_import_structure["lucy"] = ["LucyEditPipeline"]
_import_structure["marigold"].extend(
[
Expand Down Expand Up @@ -722,6 +723,7 @@
from .lucy import LucyEditPipeline
from .lumina import LuminaPipeline, LuminaText2ImgPipeline
from .lumina2 import Lumina2Pipeline, Lumina2Text2ImgPipeline
from .newbie import NewbiePipeline
from .marigold import (
MarigoldDepthPipeline,
MarigoldIntrinsicsPipeline,
Expand Down
48 changes: 48 additions & 0 deletions src/diffusers/pipelines/newbie/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
from typing import TYPE_CHECKING

from ...utils import (
DIFFUSERS_SLOW_IMPORT,
OptionalDependencyNotAvailable,
_LazyModule,
get_objects_from_module,
is_torch_available,
is_transformers_available,
)


_dummy_objects = {}
_import_structure = {}


try:
if not (is_transformers_available() and is_torch_available()):
raise OptionalDependencyNotAvailable()
except OptionalDependencyNotAvailable:
from ...utils import dummy_torch_and_transformers_objects # noqa F403

_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
else:
_import_structure["pipeline_newbie"] = ["NewbiePipeline"]

if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
try:
if not (is_transformers_available() and is_torch_available()):
raise OptionalDependencyNotAvailable()

except OptionalDependencyNotAvailable:
from ...utils.dummy_torch_and_transformers_objects import *
else:
from .pipeline_newbie import NewbiePipeline

else:
import sys

sys.modules[__name__] = _LazyModule(
__name__,
globals()["__file__"],
_import_structure,
module_spec=__spec__,
)

for name, value in _dummy_objects.items():
setattr(sys.modules[__name__], name, value)
Loading