python273
diff --git a/‎README.md‎
Lines changed: 4 additions & 4 deletions b/‎README.md‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎examples/sample_loop.py‎
Lines changed: 142 additions & 84 deletions b/‎examples/sample_loop.py‎
Lines changed: 142 additions & 84 deletions
diff --git a/‎models/vision/ddpm/example.py‎
Lines changed: 5 additions & 3 deletions b/‎models/vision/ddpm/example.py‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎models/vision/ddpm/modeling_ddpm.py‎
Lines changed: 0 additions & 1 deletion b/‎models/vision/ddpm/modeling_ddpm.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎models/vision/ddpm/run_ddpm.py‎
Lines changed: 2 additions & 2 deletions b/‎models/vision/ddpm/run_ddpm.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/diffusers/__init__.py‎
Lines changed: 2 additions & 3 deletions b/‎src/diffusers/__init__.py‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎src/diffusers/configuration_utils.py‎
Lines changed: 11 additions & 7 deletions b/‎src/diffusers/configuration_utils.py‎
Lines changed: 11 additions & 7 deletions
diff --git a/‎src/diffusers/modeling_utils.py‎
Lines changed: 1 addition & 1 deletion b/‎src/diffusers/modeling_utils.py‎
Lines changed: 1 addition & 1 deletion
@@ -27,7 +27,7 @@ One should be able to save both models and samplers as well as load them from th
 Example:
 
 ```python
-from diffusers import UNetModel, GaussianDiffusion
+from diffusers import UNetModel, GaussianDDPMScheduler
 import torch
 
 # 1. Load model
@@ -40,7 +40,7 @@ time_step = torch.tensor([10])
 image = unet(dummy_noise, time_step)
 
 # 3. Load sampler
-sampler = GaussianDiffusion.from_config("fusing/ddpm_dummy")
+sampler = GaussianDDPMScheduler.from_config("fusing/ddpm_dummy")
 
 # 4. Sample image from sampler passing the model
 image = sampler.sample(model, batch_size=1)
@@ -54,12 +54,12 @@ print(image)
 Example:
 
 ```python
-from diffusers import UNetModel, GaussianDiffusion
+from diffusers import UNetModel, GaussianDDPMScheduler
 from modeling_ddpm import DDPM
 import tempfile
 
 unet = UNetModel.from_pretrained("fusing/ddpm_dummy")
-sampler = GaussianDiffusion.from_config("fusing/ddpm_dummy")
+sampler = GaussianDDPMScheduler.from_config("fusing/ddpm_dummy")
 
 # compose Diffusion Pipeline
 ddpm = DDPM(unet, sampler)
 
@@ -1,99 +1,157 @@
 #!/usr/bin/env python3
-from diffusers import UNetModel, GaussianDiffusion
+from diffusers import UNetModel, GaussianDDPMScheduler
 import torch
 import torch.nn.functional as F
-
-unet = UNetModel.from_pretrained("fusing/ddpm_dummy")
-diffusion = GaussianDiffusion.from_config("fusing/ddpm_dummy")
-
+import numpy as np
+import PIL.Image
+import tqdm
+
+#torch_device = "cuda"
+#
+#unet = UNetModel.from_pretrained("/home/patrick/ddpm-lsun-church")
+#unet.to(torch_device)
+#
+#TIME_STEPS = 10
+#
+#scheduler = GaussianDDPMScheduler.from_config("/home/patrick/ddpm-lsun-church", timesteps=TIME_STEPS)
+#
+#diffusion_config = {
+#    "beta_start": 0.0001,
+#    "beta_end": 0.02,
+#    "num_diffusion_timesteps": TIME_STEPS,
+#}
+#
 # 2. Do one denoising step with model
-batch_size, num_channels, height, width = 1, 3, 32, 32
-dummy_noise = torch.ones((batch_size, num_channels, height, width))
-
-
-TIME_STEPS = 10
-
-
+#batch_size, num_channels, height, width = 1, 3, 256, 256
+#
+#torch.manual_seed(0)
+#noise_image = torch.randn(batch_size, num_channels, height, width, device="cuda")
+#
+#
 # Helper
-def extract(a, t, x_shape):
-    b, *_ = t.shape
-    out = a.gather(-1, t)
-    return out.reshape(b, *((1,) * (len(x_shape) - 1)))
+#def noise_like(shape, device, repeat=False):
+#    def repeat_noise():
+#        return torch.randn((1, *shape[1:]), device=device).repeat(shape[0], *((1,) * (len(shape) - 1)))
+#
+#    def noise():
+#        return torch.randn(shape, device=device)
+#
+#    return repeat_noise() if repeat else noise()
+#
+#
+#betas = np.linspace(diffusion_config["beta_start"], diffusion_config["beta_end"], diffusion_config["num_diffusion_timesteps"], dtype=np.float64)
+#betas = torch.tensor(betas, device=torch_device)
+#alphas = 1.0 - betas
+#
+#alphas_cumprod = torch.cumprod(alphas, axis=0)
+#alphas_cumprod_prev = F.pad(alphas_cumprod[:-1], (1, 0), value=1.0)
+#
+#posterior_mean_coef1 = betas * torch.sqrt(alphas_cumprod_prev) / (1.0 - alphas_cumprod)
+#posterior_mean_coef2 = (1.0 - alphas_cumprod_prev) * torch.sqrt(alphas) / (1.0 - alphas_cumprod)
+#
+#posterior_variance = betas * (1.0 - alphas_cumprod_prev) / (1.0 - alphas_cumprod)
+#posterior_log_variance_clipped = torch.log(posterior_variance.clamp(min=1e-20))
+#
+#
+#sqrt_recip_alphas_cumprod = torch.sqrt(1.0 / alphas_cumprod)
+#sqrt_recipm1_alphas_cumprod = torch.sqrt(1.0 / alphas_cumprod - 1)
+#
+#
+#noise_coeff = (1 - alphas) / torch.sqrt(1 - alphas_cumprod)
+#coeff = 1 / torch.sqrt(alphas)
+
+
+def real_fn():
+    # Compare the following to Algorithm 2 Sampling of paper: https://arxiv.org/pdf/2006.11239.pdf
+    # 1: x_t ~ N(0,1)
+    x_t = noise_image
+    # 2: for t = T, ...., 1 do
+    for i in reversed(range(TIME_STEPS)):
+        t = torch.tensor([i]).to(torch_device)
+        # 3: z ~ N(0, 1)
+        noise = noise_like(x_t.shape, torch_device)
+
+        # 4:  √1αtxt − √1−αt1−α¯tθ(xt, t) + σtz
+        # ------------------------- MODEL ------------------------------------#
+        with torch.no_grad():
+            pred_noise = unet(x_t, t)  # pred epsilon_theta
+
+    #    pred_x = sqrt_recip_alphas_cumprod[t] * x_t - sqrt_recipm1_alphas_cumprod[t] * pred_noise
+    #    pred_x.clamp_(-1.0, 1.0)
+        # pred mean
+    #    posterior_mean = posterior_mean_coef1[t] * pred_x + posterior_mean_coef2[t] * x_t
+        # --------------------------------------------------------------------#
+
+        posterior_mean = coeff[t] * (x_t - noise_coeff[t] * pred_noise)
+
+        # ------------------------- Variance Scheduler -----------------------#
+        # pred variance
+        posterior_log_variance = posterior_log_variance_clipped[t]
+
+        b, *_, device = *x_t.shape, x_t.device
+        nonzero_mask = (1 - (t == 0).float()).reshape(b, *((1,) * (len(x_t.shape) - 1)))
+        posterior_variance = nonzero_mask * (0.5 * posterior_log_variance).exp()
+        # --------------------------------------------------------------------#
+
+        x_t_1 = (posterior_mean + posterior_variance * noise).to(torch.float32)
+        x_t = x_t_1
+
+        print(x_t.abs().sum())
+
+
+def post_process_to_image(x_t):
+    image = x_t.cpu().permute(0, 2, 3, 1)
+    image = (image + 1.0) * 127.5
+    image = image.numpy().astype(np.uint8)
+
+    return PIL.Image.fromarray(image[0])
+
+
+from pytorch_diffusion import Diffusion
+
+#diffusion = Diffusion.from_pretrained("lsun_church")
+#samples = diffusion.denoise(1)
+#
+#image = post_process_to_image(samples)
+#image.save("check.png")
+#import ipdb; ipdb.set_trace()
+
+
+device = "cuda"
+scheduler = GaussianDDPMScheduler.from_config("/home/patrick/ddpm-lsun-church", timesteps=10)
+
+import ipdb; ipdb.set_trace()
+
+model = UNetModel.from_pretrained("/home/patrick/ddpm-lsun-church").to(device)
 
 
-def noise_like(shape, device, repeat=False):
-    def repeat_noise():
-        return torch.randn((1, *shape[1:]), device=device).repeat(shape[0], *((1,) * (len(shape) - 1)))
-
-    def noise():
-        return torch.randn(shape, device=device)
-
-    return repeat_noise() if repeat else noise()
-
-
-# Schedule
-def cosine_beta_schedule(timesteps, s=0.008):
-    """
-    cosine schedule
-    as proposed in https://openreview.net/forum?id=-NEXDKk8gZ
-    """
-    steps = timesteps + 1
-    x = torch.linspace(0, timesteps, steps, dtype=torch.float64)
-    alphas_cumprod = torch.cos(((x / timesteps) + s) / (1 + s) * torch.pi * 0.5) ** 2
-    alphas_cumprod = alphas_cumprod / alphas_cumprod[0]
-    betas = 1 - (alphas_cumprod[1:] / alphas_cumprod[:-1])
-    return torch.clip(betas, 0, 0.999)
+torch.manual_seed(0)
+next_image = scheduler.sample_noise((1, model.in_channels, model.resolution, model.resolution), device=device)
 
+for t in tqdm.tqdm(reversed(range(len(scheduler))), total=len(scheduler)):
+    # define coefficients for time step t
+    clip_image_coeff = 1 / torch.sqrt(scheduler.get_alpha_prod(t))
+    clip_noise_coeff = torch.sqrt(1 / scheduler.get_alpha_prod(t) - 1)
+    image_coeff = (1 - scheduler.get_alpha_prod(t - 1)) * torch.sqrt(scheduler.get_alpha(t)) / (1 - scheduler.get_alpha_prod(t))
+    clip_coeff = torch.sqrt(scheduler.get_alpha_prod(t - 1)) * scheduler.get_beta(t) / (1 - scheduler.get_alpha_prod(t))
 
-betas = cosine_beta_schedule(TIME_STEPS)
-alphas = 1.0 - betas
-alphas_cumprod = torch.cumprod(alphas, axis=0)
-alphas_cumprod_prev = F.pad(alphas_cumprod[:-1], (1, 0), value=1.0)
+    # predict noise residual
+    with torch.no_grad():
+        noise_residual = model(next_image, t)
 
-posterior_mean_coef1 = betas * torch.sqrt(alphas_cumprod_prev) / (1.0 - alphas_cumprod)
-posterior_mean_coef2 = (1.0 - alphas_cumprod_prev) * torch.sqrt(alphas) / (1.0 - alphas_cumprod)
+    # compute prev image from noise
+    pred_mean = clip_image_coeff * next_image - clip_noise_coeff * noise_residual
+    pred_mean = torch.clamp(pred_mean, -1, 1)
+    image = clip_coeff * pred_mean + image_coeff * next_image
 
-posterior_variance = betas * (1.0 - alphas_cumprod_prev) / (1.0 - alphas_cumprod)
-posterior_log_variance_clipped = torch.log(posterior_variance.clamp(min=1e-20))
+    # sample variance
+    variance = scheduler.sample_variance(t, image.shape, device=device)
 
+    # sample previous image
+    sampled_image = image + variance
 
-sqrt_recip_alphas_cumprod = torch.sqrt(1.0 / alphas_cumprod)
-sqrt_recipm1_alphas_cumprod = torch.sqrt(1.0 / alphas_cumprod - 1)
+    next_image = sampled_image
 
-torch.manual_seed(0)
 
-# Compare the following to Algorithm 2 Sampling of paper: https://arxiv.org/pdf/2006.11239.pdf
-# 1: x_t ~ N(0,1)
-x_t = dummy_noise
-# 2: for t = T, ...., 1 do
-for i in reversed(range(TIME_STEPS)):
-    t = torch.tensor([i])
-    # 3: z ~ N(0, 1)
-    noise = noise_like(x_t.shape, "cpu")
-
-    # 4:  √1αtxt − √1−αt1−α¯tθ(xt, t) + σtz
-    # ------------------------- MODEL ------------------------------------#
-    pred_noise = unet(x_t, t)  # pred epsilon_theta
-    pred_x = extract(sqrt_recip_alphas_cumprod, t, x_t.shape) * x_t - extract(sqrt_recipm1_alphas_cumprod, t, x_t.shape) * pred_noise
-    pred_x.clamp_(-1.0, 1.0)
-    # pred mean
-    posterior_mean = extract(posterior_mean_coef1, t, x_t.shape) * pred_x + extract(posterior_mean_coef2, t, x_t.shape) * x_t
-    # --------------------------------------------------------------------#
-
-    # ------------------------- Variance Scheduler -----------------------#
-    # pred variance
-    posterior_log_variance = extract(posterior_log_variance_clipped, t, x_t.shape)
-    b, *_, device = *x_t.shape, x_t.device
-    nonzero_mask = (1 - (t == 0).float()).reshape(b, *((1,) * (len(x_t.shape) - 1)))
-    posterior_variance = nonzero_mask * (0.5 * posterior_log_variance).exp()
-    # --------------------------------------------------------------------#
-
-    x_t_1 = (posterior_mean + posterior_variance * noise).to(torch.float32)
-
-    # FOR PATRICK TO VERIFY: make sure manual loop is equal to function
-    # --------------------------------------------------------------------#
-    x_t_12 = diffusion.p_sample(unet, x_t, t, noise=noise)
-    assert (x_t_1 - x_t_12).abs().sum().item() < 1e-3
-    # --------------------------------------------------------------------#
-
-    x_t = x_t_1
+image = post_process_to_image(next_image)
+image.save("example_new.png")
@@ -1,10 +1,12 @@
 #!/usr/bin/env python3
-from diffusers import UNetModel, GaussianDiffusion
-from modeling_ddpm import DDPM
 import tempfile
 
+from diffusers import GaussianDDPMScheduler, UNetModel
+from modeling_ddpm import DDPM
+
+
 unet = UNetModel.from_pretrained("fusing/ddpm_dummy")
-sampler = GaussianDiffusion.from_config("fusing/ddpm_dummy")
+sampler = GaussianDDPMScheduler.from_config("fusing/ddpm_dummy")
 
 # compose Diffusion Pipeline
 ddpm = DDPM(unet, sampler)
 
@@ -18,7 +18,6 @@
 
 
 class DDPM(DiffusionPipeline):
-
     def __init__(self, unet, gaussian_sampler):
         super().__init__(unet=unet, gaussian_sampler=gaussian_sampler)
 
 
@@ -1,12 +1,12 @@
 #!/usr/bin/env python3
 import torch
 
-from diffusers import GaussianDiffusion, UNetModel
+from diffusers import GaussianDDPMScheduler, UNetModel
 
 
 model = UNetModel(dim=64, dim_mults=(1, 2, 4, 8))
 
-diffusion = GaussianDiffusion(model, image_size=128, timesteps=1000, loss_type="l1")  # number of steps  # L1 or L2
+diffusion = GaussianDDPMScheduler(model, image_size=128, timesteps=1000, loss_type="l1")  # number of steps  # L1 or L2
 
 training_images = torch.randn(8, 3, 128, 128)  # your images need to be normalized from a range of -1 to +1
 loss = diffusion(training_images)
 
@@ -4,8 +4,7 @@
 
 __version__ = "0.0.1"
 
+from .modeling_utils import PreTrainedModel
 from .models.unet import UNetModel
-from .samplers.gaussian import GaussianDiffusion
-
 from .pipeline_utils import DiffusionPipeline
-from .modeling_utils import PreTrainedModel
+from .schedulers.gaussian_ddpm import GaussianDDPMScheduler
@@ -17,10 +17,10 @@
 
 
 import copy
+import inspect
 import json
 import os
 import re
-import inspect
 from typing import Any, Dict, Tuple, Union
 
 from requests import HTTPError
@@ -186,6 +186,11 @@ def get_config_dict(
         expected_keys = set(dict(inspect.signature(cls.__init__).parameters).keys())
         expected_keys.remove("self")
 
+        for key in expected_keys:
+            if key in kwargs:
+                # overwrite key
+                config_dict[key] = kwargs.pop(key)
+
         passed_keys = set(config_dict.keys())
 
         unused_kwargs = kwargs
@@ -194,17 +199,16 @@ def get_config_dict(
 
         if len(expected_keys - passed_keys) > 0:
             logger.warn(
-                f"{expected_keys - passed_keys} was not found in config. "
-                f"Values will be initialized to default values."
+                f"{expected_keys - passed_keys} was not found in config. Values will be initialized to default values."
             )
 
         return config_dict, unused_kwargs
 
     @classmethod
-    def from_config(
-        cls, pretrained_model_name_or_path: Union[str, os.PathLike], return_unused_kwargs=False, **kwargs
-    ):
-        config_dict, unused_kwargs = cls.get_config_dict(pretrained_model_name_or_path=pretrained_model_name_or_path, **kwargs)
+    def from_config(cls, pretrained_model_name_or_path: Union[str, os.PathLike], return_unused_kwargs=False, **kwargs):
+        config_dict, unused_kwargs = cls.get_config_dict(
+            pretrained_model_name_or_path=pretrained_model_name_or_path, **kwargs
+        )
 
         model = cls(**config_dict)
 
 
@@ -24,6 +24,7 @@
 
 # CHANGE to diffusers.utils
 from transformers.utils import (
+    CONFIG_NAME,
     HUGGINGFACE_CO_RESOLVE_ENDPOINT,
     EntryNotFoundError,
     RepositoryNotFoundError,
@@ -33,7 +34,6 @@
     is_offline_mode,
     is_remote_url,
     logging,
-    CONFIG_NAME,
 )