ai_github_trainer/configs/training_config.yaml
Suherdy Yacob aaa0f1b51e 1. add 2 data processor type: standard and synthetic
2. add DataProcessorSynthetic class to format github repo to QA ChatML format
2025-08-23 16:44:33 +07:00

143 lines
2.8 KiB
YAML

# Training configuration optimized for RTX3070 8GB VRAM
# AI Trainer for unsloth/Qwen2.5-Coder-7B-Instruct-bnb-4bit
model:
name: "unsloth/Qwen2.5-Coder-7B-Instruct-bnb-4bit"
max_seq_length: 512
trust_remote_code: true
use_fast_tokenizer: true
padding_side: "left"
truncation_side: "left"
training:
# Memory-optimized batch size for RTX3070 8GB
per_device_train_batch_size: 2
gradient_accumulation_steps: 16
max_steps: 120
# Training parameters
num_train_epochs: 3
learning_rate: 1.0e-4
warmup_steps: 10
warmup_ratio: 0.03
# Logging and saving
logging_steps: 1
save_steps: 100
save_total_limit: 3
# Evaluation
eval_strategy: "steps"
eval_steps: 100
load_best_model_at_end: true
metric_for_best_model: "loss"
greater_is_better: false
# Data loading
dataloader_num_workers: 0 # Temporarily disabled for debugging
dataloader_pin_memory: true
remove_unused_columns: false
# Memory optimization - CRITICAL for RTX3070 8GB
use_gradient_checkpointing: true
offload_to_cpu: false # Explicitly no CPU offloading
# Additional memory optimizations
dataloader_drop_last: true
# Aggressive memory settings for 8GB GPU
per_device_eval_batch_size: 1
eval_accumulation_steps: 1
# Optimizer settings
optim: "adamw_torch"
weight_decay: 0.01
adam_beta1: 0.9
adam_beta2: 0.999
adam_epsilon: 1.0e-8
max_grad_norm: 0.5
# Learning rate scheduler
lr_scheduler_type: "linear"
# Precision - BF16 for better stability on modern GPUs
bf16: true
fp16: false
tf32: true
# Disable torch compilation to avoid generator tracing issues
torch_compile: false
# Dataset settings
dataset_shuffle: true
dataset_seed: 3407
# Output settings
output_dir: "./models"
logging_dir: "./logs"
report_to: ["tensorboard"]
dataset:
# File filtering
min_file_size: 10
max_file_size: 10000
# Supported programming languages
supported_languages:
- python
- javascript
- typescript
- java
- cpp
- c
- csharp
- php
- ruby
- go
- rust
- swift
- kotlin
- scala
- sql
- bash
- yaml
- json
- xml
- html
- css
- markdown
# Files and directories to exclude
exclude_patterns:
- "\\.git/"
- "__pycache__/"
- "\\.pytest_cache/"
- "node_modules/"
- "\\.venv/"
- "venv/"
- "package-lock\\.json$"
- "yarn\\.lock$"
- "\\.log$"
- "\\.tmp$"
- "\\.bak$"
- "~\\$.*"
- "\\.swp$"
- "\\.swo$"
- "\\.DS_Store"
- "\\.pyc$"
- "\\.pyo$"
- "\\.pyd$"
- "\\.so$"
- "\\.dll$"
- "\\.exe$"
memory:
# Memory management for RTX3070 8GB
max_memory_usage: 0.85 # Use up to 85% of GPU memory
enable_memory_tracking: true
clear_cache_between_epochs: true
# Attention optimization
use_memory_efficient_attention: true
attention_slicing: true
slice_size: 1