# Training configuration optimized for RTX3070 8GB VRAM # AI Trainer for unsloth/Qwen2.5-Coder-7B-Instruct-bnb-4bit model: name: "unsloth/Qwen2.5-Coder-7B-Instruct-bnb-4bit" max_seq_length: 512 trust_remote_code: true use_fast_tokenizer: true padding_side: "left" truncation_side: "left" training: # Memory-optimized batch size for RTX3070 8GB per_device_train_batch_size: 1 gradient_accumulation_steps: 16 max_steps: 50 # Training parameters num_train_epochs: 1 learning_rate: 2.0e-4 warmup_steps: 10 warmup_ratio: 0.1 # Logging and saving logging_steps: 1 save_steps: 100 save_total_limit: 3 # Evaluation eval_strategy: "steps" eval_steps: 100 load_best_model_at_end: true metric_for_best_model: "loss" greater_is_better: false # Data loading dataloader_num_workers: 0 # Temporarily disabled for debugging dataloader_pin_memory: true remove_unused_columns: false # Memory optimization - CRITICAL for RTX3070 8GB use_gradient_checkpointing: true offload_to_cpu: false # Explicitly no CPU offloading # Additional memory optimizations dataloader_drop_last: true # Aggressive memory settings for 8GB GPU per_device_eval_batch_size: 1 eval_accumulation_steps: 1 # Optimizer settings optim: "adamw_torch" weight_decay: 0.01 adam_beta1: 0.9 adam_beta2: 0.999 adam_epsilon: 1.0e-8 max_grad_norm: 0.5 # Learning rate scheduler lr_scheduler_type: "linear" # Precision - BF16 for better stability on modern GPUs bf16: true fp16: false tf32: true # Disable torch compilation to avoid generator tracing issues torch_compile: false # Dataset settings dataset_shuffle: true dataset_seed: 3407 # Output settings output_dir: "./models" logging_dir: "./logs" report_to: ["tensorboard"] dataset: # File filtering min_file_size: 10 max_file_size: 10000 # Supported programming languages supported_languages: - python - javascript - typescript - java - cpp - c - csharp - php - ruby - go - rust - swift - kotlin - scala - sql - bash - yaml - json - xml - html - css - markdown # Files and directories to exclude exclude_patterns: - "\\.git/" - "__pycache__/" - "\\.pytest_cache/" - "node_modules/" - "\\.venv/" - "venv/" - "package-lock\\.json$" - "yarn\\.lock$" - "\\.log$" - "\\.tmp$" - "\\.bak$" - "~\\$.*" - "\\.swp$" - "\\.swo$" - "\\.DS_Store" - "\\.pyc$" - "\\.pyo$" - "\\.pyd$" - "\\.so$" - "\\.dll$" - "\\.exe$" memory: # Memory management for RTX3070 8GB max_memory_usage: 0.85 # Use up to 85% of GPU memory enable_memory_tracking: true clear_cache_between_epochs: true # Attention optimization use_memory_efficient_attention: true attention_slicing: true slice_size: 1