Instructions to use Overworld/Waypoint-1-Small with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Diffusers
How to use Overworld/Waypoint-1-Small with Diffusers:
pip install -U diffusers transformers accelerate
import torch from diffusers import DiffusionPipeline # switch to "mps" for apple devices pipe = DiffusionPipeline.from_pretrained("Overworld/Waypoint-1-Small", dtype=torch.bfloat16, device_map="cuda") prompt = "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k" image = pipe(prompt).images[0] - Notebooks
- Google Colab
- Kaggle
| # Copyright (C) 2025 Hugging Face Team and Overworld | |
| # | |
| # This program is free software: you can redistribute it and/or modify | |
| # it under the terms of the GNU General Public License as published by | |
| # the Free Software Foundation, either version 3 of the License, or | |
| # (at your option) any later version. | |
| # | |
| # This program is distributed in the hope that it will be useful, | |
| # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| # GNU General Public License for more details. | |
| # | |
| # You should have received a copy of the GNU General Public License | |
| # along with this program. If not, see <https://www.gnu.org/licenses/>. | |
| """Neural network building blocks for WorldModel transformer.""" | |
| import warnings | |
| import einops as eo | |
| import torch | |
| from torch import nn | |
| import torch.nn.functional as F | |
| class NoCastModule(torch.nn.Module): | |
| """Module that prevents dtype casting during .to() calls.""" | |
| def _apply(self, fn): | |
| def keep_dtype(t): | |
| old_dtype = t.dtype | |
| out = fn(t) | |
| if out.dtype is not old_dtype: | |
| warnings.warn( | |
| f"{self.__class__.__name__}: requested dtype cast ignored; " | |
| f"keeping {old_dtype}.", | |
| stacklevel=3, | |
| ) | |
| out = out.to(dtype=old_dtype) | |
| return out | |
| return super()._apply(keep_dtype) | |
| def to(self, *args, **kwargs): | |
| warn_cast = False | |
| # m.to(ref_tensor): use ref's device, ignore its dtype | |
| if args and isinstance(args[0], torch.Tensor): | |
| ref, *rest = args | |
| args = (ref.device, *rest) | |
| base = next(self.parameters(), None) or next(self.buffers(), None) | |
| if base is not None and ref.dtype is not base.dtype: | |
| warn_cast = True | |
| # keyword dtype | |
| if kwargs.pop("dtype", None) is not None: | |
| warn_cast = True | |
| # positional dtype | |
| args = tuple(a for a in args if not isinstance(a, torch.dtype)) | |
| if warn_cast: | |
| warnings.warn( | |
| f"{self.__class__.__name__}.to: requested dtype cast ignored; " | |
| "keeping existing dtypes.", | |
| stacklevel=2, | |
| ) | |
| return super().to(*args, **kwargs) | |
| def rms_norm(x: torch.Tensor) -> torch.Tensor: | |
| """Root mean square layer normalization.""" | |
| return F.rms_norm(x, (x.size(-1),)) | |
| class MLP(nn.Module): | |
| """Simple MLP with SiLU activation.""" | |
| def __init__(self, dim_in, dim_middle, dim_out): | |
| super().__init__() | |
| self.fc1 = nn.Linear(dim_in, dim_middle, bias=False) | |
| self.fc2 = nn.Linear(dim_middle, dim_out, bias=False) | |
| def forward(self, x): | |
| return self.fc2(F.silu(self.fc1(x))) | |
| class AdaLN(nn.Module): | |
| """Adaptive Layer Normalization.""" | |
| def __init__(self, dim): | |
| super().__init__() | |
| self.fc = nn.Linear(dim, 2 * dim, bias=False) | |
| def forward(self, x, cond): | |
| # cond: [b, n, d], x: [b, n*m, d] | |
| b, n, d = cond.shape | |
| _, nm, _ = x.shape | |
| m = nm // n | |
| y = F.silu(cond) | |
| ab = self.fc(y) # [b, n, 2d] | |
| ab = ab.view(b, n, 1, 2 * d) # [b, n, 1, 2d] | |
| ab = ab.expand(-1, -1, m, -1) # [b, n, m, 2d] | |
| ab = ab.reshape(b, nm, 2 * d) # [b, nm, 2d] | |
| a, b_ = ab.chunk(2, dim=-1) # [b, nm, d] each | |
| x = rms_norm(x) * (1 + a) + b_ | |
| return x | |
| def ada_rmsnorm(x, scale, bias): | |
| """Adaptive RMS normalization with scale and bias.""" | |
| x4 = eo.rearrange(x, "b (n m) d -> b n m d", n=scale.size(1)) | |
| y4 = rms_norm(x4) * (1 + scale.unsqueeze(2)) + bias.unsqueeze(2) | |
| return eo.rearrange(y4, "b n m d -> b (n m) d") | |
| def ada_gate(x, gate): | |
| """Apply gating to x with per-frame gates.""" | |
| x4 = eo.rearrange(x, "b (n m) d -> b n m d", n=gate.size(1)) | |
| return eo.rearrange(x4 * gate.unsqueeze(2), "b n m d -> b (n m) d") | |
| class NoiseConditioner(NoCastModule): | |
| """Sigma -> logSNR -> Fourier Features -> Dense embedding.""" | |
| def __init__(self, dim, fourier_dim=512, base=10_000.0): | |
| super().__init__() | |
| assert fourier_dim % 2 == 0 | |
| half = fourier_dim // 2 | |
| self.freq = nn.Buffer( | |
| torch.logspace(0, -1, steps=half, base=base, dtype=torch.float32), | |
| persistent=False, | |
| ) | |
| self.mlp = MLP(fourier_dim, dim * 4, dim) | |
| def forward(self, s, eps=torch.finfo(torch.float32).eps): | |
| assert self.freq.dtype == torch.float32 | |
| orig_dtype, shape = s.dtype, s.shape | |
| with torch.autocast("cuda", enabled=False): | |
| s = s.reshape(-1).float() # fp32 for fourier numerical stability | |
| s = s * 1000 # expressive rotation range | |
| # calculate fourier features | |
| phase = s[:, None] * self.freq[None, :] | |
| emb = torch.cat((torch.sin(phase), torch.cos(phase)), dim=-1) | |
| emb = emb * 2**0.5 # Ensure unit variance | |
| emb = self.mlp(emb) | |
| return emb.to(orig_dtype).view(*shape, -1) | |