---
job: "extension"
config:
  name: "Character_2"
  process:
    - type: "diffusion_trainer"
      training_folder: "/workspace/pinokio/api/ai-toolkit.git/app/output"
      sqlite_db_path: "./aitk_db.db"
      device: "cuda"
      trigger_word: null
      performance_log_every: 10
      network:
        type: "lokr"
        linear: 32
        linear_alpha: 32
        conv: 16
        conv_alpha: 16
        lokr_full_rank: true
        lokr_factor: 4
        network_kwargs:
          ignore_if_contains: []
      save:
        dtype: "bf16"
        save_every: 1000
        max_step_saves_to_keep: 4
        save_format: "diffusers"
        push_to_hub: false
      datasets:
        - folder_path: "/workspace/pinokio/api/ai-toolkit.git/app/datasets/temp"
          mask_path: null
          mask_min_value: 0.1
          default_caption: ""
          caption_ext: "txt"
          caption_dropout_rate: 0.05
          cache_latents_to_disk: true
          is_reg: false
          network_weight: 1
          resolution:
            - 1024
            - 768
            - 512
          controls: []
          shrink_video_to_frames: true
          num_frames: 1
          flip_x: false
          flip_y: false
          num_repeats: 50
          diffusion_loss_weight: 1
          depth_loss_weight: 0.005
          loss_split: "sum"
      train:
        weight_noise:
          enabled: true
          mode: "relative"
          sigma: 0.0125
          log_every: 1
        max_grad_norm: 1
        batch_size: 1
        bypass_guidance_embedding: false
        steps: 8000
        gradient_accumulation: 1
        train_unet: true
        train_text_encoder: false
        gradient_checkpointing: true
        noise_scheduler: "flowmatch"
        optimizer: "adamw8bit"
        timestep_type: "sigmoid"
        content_or_style: "balanced"
        optimizer_params:
          weight_decay: 0.0001
        unload_text_encoder: true
        cache_text_embeddings: false
        lr: 0.00025
        ema_config:
          use_ema: false
          ema_decay: 0.99
        skip_first_sample: true
        force_first_sample: false
        disable_sampling: true
        dtype: "bf16"
        diff_output_preservation: false
        diff_output_preservation_multiplier: 1
        diff_output_preservation_class: "person"
        switch_boundary_every: 1
        loss_type: "mse"
        diffusion_loss_weight: 1
        diffusion_loss_max_t: 1
        diffusion_loss_min_t: 0
        custom_timestep_distribution:
          points:
            - x: 0
              y: 1
            - x: 0.048801606351679024
              y: 3
            - x: 0.12443620508367365
              y: 1.1754955492521588
            - x: 1
              y: 1.124230987147281
          normalize: false
          sourceName: "high_90s"
        custom_timestep_curve:
          points:
            - x: 0
              y: 1
            - x: 0.6226329803466797
              y: 2.0341763245432
            - x: 0.6226329803466797
              y: 2.0341763245432
            - x: 1
              y: 1
          normalize: true
          sourceName: "boost_low_mid_t"
        max_denoising_steps: 999
        min_denoising_steps: 0
        loss_split: null
        do_differential_guidance: true
        differential_guidance_scale: 3
      logging:
        log_every: 1
        use_ui_logger: true
      model:
        name_or_path: "Tongyi-MAI/Z-Image"
        quantize: false
        qtype: "qfloat8"
        quantize_te: false
        qtype_te: "qfloat8"
        arch: "zimage"
        low_vram: false
        model_kwargs: {}
        layer_offloading: false
        layer_offloading_text_encoder_percent: 1
        layer_offloading_transformer_percent: 0.55
      sample:
        sampler: "flowmatch"
        sample_every: 100
        width: 1024
        height: 1024
        samples:
          - prompt: "a photo of a person"
        neg: ""
        seed: 42
        walk_seed: false
        guidance_scale: 4
        sample_steps: 30
        num_frames: 1
        fps: 1
      face_id:
        enabled: false
        init_scale: 0.3
        identity_loss_weight: 0
        landmark_loss_weight: 0
        body_proportion_loss_weight: 0
        body_proportion_loss_min_t: 0.8
        body_proportion_loss_max_t: 1
        body_shape_loss_weight: 0
        body_shape_loss_max_t: 1
        body_shape_loss_min_t: 0.8
        identity_loss_min_t: 0
        identity_loss_max_t: 0.9
        identity_metrics: false
        identity_loss_use_average: true
        identity_loss_min_cos: 0.4
      depth_consistency:
        loss_weight: 0.005
        input_size: 518
        preview_every: 1
        mask_source: "subject"
        loss_max_t: 1
        model_id: "depth-anything/Depth-Anything-V2-Large-hf"
        loss_min_t: 0
        grad_checkpoint: true
        ssi_weight: 0
        grad_weight: 1
        grad_scales: 6
      subject_mask:
        enabled: true
        background_loss_weight: 0
        clothing_loss_weight: 1
        save_debug_previews: true
        cache_resolution: 768
        segformer_res: 768
        sam_size: "small"
meta:
  name: "Character_2"
  version: "1.0"