Alignment-Lab-AI
diff --git a/‎.gitignore‎
Lines changed: 178 additions & 0 deletions b/‎.gitignore‎
Lines changed: 178 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 2 additions & 1 deletion b/‎README.md‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎src/maxdiffusion/configs/README.md‎
Lines changed: 17 additions & 0 deletions b/‎src/maxdiffusion/configs/README.md‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎src/maxdiffusion/configs/base.yml‎ renamed to ‎src/maxdiffusion/configs/base21.yml‎ b/‎src/maxdiffusion/configs/base.yml‎ renamed to ‎src/maxdiffusion/configs/base21.yml‎
diff --git a/‎src/maxdiffusion/configs/base_2_base.yml‎
Lines changed: 126 additions & 0 deletions b/‎src/maxdiffusion/configs/base_2_base.yml‎
Lines changed: 126 additions & 0 deletions
@@ -0,0 +1,178 @@
+# Initially taken from Github's Python gitignore file
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# tests and logs
+tests/fixtures/cached_*_text.txt
+logs/
+lightning_logs/
+lang_code_data/
+image_*.png
+train-smoke-test/
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# vscode
+.vs
+.vscode
+
+# Pycharm
+.idea
+
+# TF code
+tensorflow_code
+
+# Models
+proc_data
+
+# examples
+runs
+/runs_old
+/wandb
+/examples/runs
+/examples/**/*.args
+/examples/rag/sweep
+
+# data
+/data
+serialization_dir
+
+# emacs
+*.*~
+debug.env
+
+# vim
+.*.swp
+
+#ctags
+tags
+
+# pre-commit
+.pre-commit*
+
+# .lock
+*.lock
+
+# DS_Store (MacOS)
+.DS_Store
+# RL pipelines may produce mp4 outputs
+*.mp4
+
+# dependencies
+/transformers
+
+# ruff
+.ruff_cache
+
+wandb
@@ -24,6 +24,7 @@ MaxDiffusion is a Latent Diffusion model written in pure Python/Jax and targetin
 We encourage users to start by experimenting with MaxDiffusion out of the box and then fork and modify MaxDiffusion to meet their needs.
 
 MaxDiffusion supports 
+* Stable Diffusion 2 base (training and inference)
 * Stable Diffusion 2.1 (training and inference) 
 * Stable Diffusion XL (inference).
 
@@ -50,7 +51,7 @@ pip3 install -e .
 ```
 4. After installation completes, run training with the command:
 ```bash
-python -m src.maxdiffusion.models.train src/maxdiffusion/configs/base.yml run_name="my_run" base_output_directory="gs://your-bucket/"
+python -m src.maxdiffusion.models.train src/maxdiffusion/configs/base_2_base.yml run_name="my_run" base_output_directory="gs://your-bucket/"
 ```
 5. If you want to generate images, you can do it as follows.
 - Stable Diffusion 2.1
 
@@ -0,0 +1,17 @@
+# Model Configs
+
+This directory contains model configuration for different Stable Diffusion models.
+
+## Stable Diffusion 2.1
+
+base21.yml - used for training and inference using [stable-diffusion-2-1](https://huggingface.co/stabilityai/stable-diffusion-2-1)
+
+## Stable Diffusion 2 Base
+
+base_2_base.yml - used for training and inference using [stable-diffusion-2-base](https://huggingface.co/stabilityai/stable-diffusion-2-base)
+
+base_2_base_inference.yml - used for inference after running a training loop using the saved checkpoint in base_2_base.yml's config `output_dir`.
+
+## Stable Diffusion XL
+
+base_xl.yml - used to run inference using [stable-diffusion-xl-base-1.0](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0)
@@ -0,0 +1,126 @@
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This sentinel is a reminder to choose a real run name.
+run_name: ''
+
+metrics_file: "" # for testing, local file that stores scalar metrics. If empty, no metrics are written.
+# If true save metrics such as loss and TFLOPS to GCS in {base_output_directory}/{run_name}/metrics/
+gcs_metrics: True
+log_period: 100
+
+pretrained_model_name_or_path: 'stabilityai/stable-diffusion-2-base'
+revision: 'main'
+dtype: 'bfloat16'
+# Set true to load weights from pytorch
+from_pt: True
+split_head_dim: True
+
+# Output directory
+# Create a GCS bucket, e.g. my-maxtext-outputs and set this to "gs://my-maxtext-outputs/"
+base_output_directory: ""
+
+# Parallelism
+mesh_axes: ['data', 'fsdp', 'tensor']
+
+# batch : batch dimension of data and activations
+# hidden : 
+# embed : attention qkv dense layer hidden dim named as embed
+# heads : attention head dim = num_heads * head_dim
+# length : attention sequence length
+# temb_in : dense.shape[0] of resnet dense before conv 
+# out_c : dense.shape[1] of resnet dense before conv 
+# out_channels : conv.shape[-1] activation
+# keep_1 : conv.shape[0] weight
+# keep_2 : conv.shape[1] weight
+# conv_in : conv.shape[2] weight
+# conv_out : conv.shape[-1] weight
+logical_axis_rules: [
+ ['batch', 'data'],
+ ['activation_batch', 'data'],
+ ['activation_length', 'fsdp'],
+ ['out_channels', 'fsdp'],
+ ['conv_out', 'fsdp'],
+ ['length', 'fsdp']
+ ]
+data_sharding: [['data', 'fsdp', 'tensor']]
+
+# One axis for each parallelism type may hold a placeholder (-1)
+# value to auto-shard based on available slices and devices.
+# By default, product of the DCN axes should equal number of slices
+# and product of the ICI axes should equal number of devices per slice.
+dcn_data_parallelism: -1 # recommended DCN axis to be auto-sharded
+dcn_fsdp_parallelism: 1
+dcn_tensor_parallelism: 1
+ici_data_parallelism: -1 # recommended ICI axis to be auto-sharded for TPUv5e 
+ici_fsdp_parallelism: 1 # recommended ICI axis to be auto-sharded
+ici_tensor_parallelism: 1
+
+# Dataset
+# Replace with dataset path or train_data_dir. One has to be set.
+dataset_name: 'lambdalabs/pokemon-blip-captions'
+train_data_dir: ''
+dataset_config_name: ''
+cache_dir: ''
+image_column: 'image'
+caption_column: 'text'
+resolution: 512
+center_crop: False
+random_flip: False
+# If cache_latents_text_encoder_outputs is True
+# the num_proc is set to 1
+tokenize_captions_num_proc: 4
+transform_images_num_proc: 4
+reuse_example_batch: False
+enable_data_shuffling: True
+
+# Prepare image latents and text encoder outputs
+# during dataset creation to reduce memory consumption.
+cache_latents_text_encoder_outputs: True
+
+
+# Training loop
+learning_rate: 1.e-7
+scale_lr: False
+max_train_samples: -1
+# max_train_steps takes priority over num_train_epochs.
+max_train_steps: 800
+seed: 0
+output_dir: 'sd-model-finetuned'
+tensorboard_dir: 'gs://shahrokhi-maxdiffusion-v5'
+per_device_batch_size: 1
+
+cosine_learning_rate_final_fraction: 0.1 
+warmup_steps_fraction: 0.1
+learning_rate_schedule_steps: -1 # By default the length of the schedule is set to the number of steps.
+
+# However you may choose a longer schedule (learning_rate_schedule_steps > steps), in which case the training will end before 
+# dropping fully down. Or you may choose a shorter schedule, where the unspecified steps will have a learning rate of 0.
+
+# AdamW optimizer parameters
+adam_b1: 0.9 # Exponential decay rate to track the first moment of past gradients.
+adam_b2: 0.999 # Exponential decay rate to track the second moment of past gradients.
+adam_eps: 1.e-8 # A small constant applied to denominator outside of the square root.
+adam_weight_decay: 1.e-2 # AdamW Weight decay
+
+max_grad_norm: 1.0
+
+enable_profiler: True
+
+# Generation parameters
+prompt: "A magical castle in the middle of a forest, artistic drawing"
+negative_prompt: "purple, red"
+guidance_scale: 7.5
+num_inference_steps: 30
+seed: 47