# Training configuration optimized for RTX3070 8GB VRAM - Qwen3-8B Model # AI Trainer for unsloth/Qwen3-8B-bnb-4bit model: name: "unsloth/Qwen3-8B-bnb-4bit" max_seq_length: 2048 trust_remote_code: true use_fast_tokenizer: true padding_side: "left" truncation_side: "left" training: # Memory-optimized batch size for RTX3070 8GB with Qwen3-8B per_device_train_batch_size: 1 # More conservative for larger model gradient_accumulation_steps: 8 # Higher accumulation to maintain effective batch size # Training parameters num_train_epochs: 3 learning_rate: 1.0e-4 # Slightly lower for larger model warmup_steps: 15 warmup_ratio: 0.1 # Logging and saving logging_steps: 1 save_steps: 100 save_total_limit: 3 # Evaluation evaluation_strategy: "steps" eval_steps: 100 load_best_model_at_end: true metric_for_best_model: "loss" greater_is_better: false # Data loading dataloader_num_workers: 2 dataloader_pin_memory: true remove_unused_columns: false # Memory optimization - CRITICAL for RTX3070 8GB with 8B model use_gradient_checkpointing: true offload_to_cpu: false # Explicitly no CPU offloading # Optimizer settings optim: "adamw_torch" weight_decay: 0.01 adam_beta1: 0.9 adam_beta2: 0.999 adam_epsilon: 1.0e-8 max_grad_norm: 1.0 # Learning rate scheduler lr_scheduler_type: "cosine" # Precision - BF16 for better stability on modern GPUs bf16: true fp16: false tf32: true # Dataset settings dataset_shuffle: true dataset_seed: 42 # Output settings output_dir: "./models" logging_dir: "./logs" report_to: ["tensorboard"] dataset: # File filtering min_file_size: 10 max_file_size: 10000 # Supported programming languages supported_languages: - python - javascript - typescript - java - cpp - c - csharp - php - ruby - go - rust - swift - kotlin - scala - sql - bash - yaml - json - xml - html - css - markdown # Files and directories to exclude exclude_patterns: - "\\.git/" - "__pycache__/" - "\\.pytest_cache/" - "node_modules/" - "\\.venv/" - "venv/" - "package-lock\\.json$" - "yarn\\.lock$" - "\\.log$" - "\\.tmp$" - "\\.bak$" - "~\\$.*" - "\\.swp$" - "\\.swo$" - "\\.DS_Store" - "\\.pyc$" - "\\.pyo$" - "\\.pyd$" - "\\.so$" - "\\.dll$" - "\\.exe$" memory: # Memory management for RTX3070 8GB with Qwen3-8B max_memory_usage: 0.95 # Use up to 95% for more aggressive memory usage enable_memory_tracking: true clear_cache_between_epochs: true # Attention optimization use_memory_efficient_attention: true attention_slicing: true slice_size: 1