ai_github_trainer/compare_configs.py
2025-08-22 16:33:30 +07:00

99 lines
3.7 KiB
Python

#!/usr/bin/env python3
"""
Compare training configurations for different models
"""
import yaml
from pathlib import Path
from colorama import init, Fore, Style
init(autoreset=True)
def load_config(config_path):
"""Load YAML configuration"""
with open(config_path, 'r') as f:
return yaml.safe_load(f)
def compare_configs():
"""Compare the two training configurations"""
print(f"\n{Fore.CYAN}{'='*80}{Style.RESET_ALL}")
print(f"{Fore.CYAN}AI TRAINER - MODEL CONFIGURATION COMPARISON{Style.RESET_ALL}")
print(f"{Fore.CYAN}{'='*80}{Style.RESET_ALL}")
# Load configurations
qwen25_config = load_config('configs/training_config.yaml')
qwen3_config = load_config('configs/training_config_qwen3.yaml')
# Model comparison
print(f"\n{Fore.GREEN}📊 MODEL COMPARISON{Style.RESET_ALL}")
print(f"{'Setting':<25} {'Qwen2.5-Coder-7B':<20} {'Qwen3-8B':<15}")
print(f"{'-'*60}")
print(f"{'Model Name':<25} {qwen25_config['model']['name']:<20} {qwen3_config['model']['name']:<15}")
print(f"{'Max Seq Length':<25} {qwen25_config['model']['max_seq_length']:<20} {qwen3_config['model']['max_seq_length']:<15}")
# Training comparison
print(f"\n{Fore.GREEN}⚙️ TRAINING PARAMETERS{Style.RESET_ALL}")
print(f"{'Parameter':<25} {'Qwen2.5-Coder-7B':<20} {'Qwen3-8B':<15} {'Difference':<15}")
print(f"{'-'*75}")
training_params = [
('Batch Size', 'per_device_train_batch_size'),
('Gradient Accumulation', 'gradient_accumulation_steps'),
('Learning Rate', 'learning_rate'),
('Warmup Steps', 'warmup_steps'),
('Epochs', 'num_train_epochs')
]
for param_name, param_key in training_params:
qwen25_val = qwen25_config['training'][param_key]
qwen3_val = qwen3_config['training'][param_key]
diff = "🔻" if qwen3_val < qwen25_val else "🔺" if qwen3_val > qwen25_val else "➡️"
print(f"{param_name:<25} {qwen25_val:<20} {qwen3_val:<15} {diff}")
# Memory comparison
print(f"\n{Fore.GREEN}🧠 MEMORY SETTINGS{Style.RESET_ALL}")
print(f"{'Setting':<25} {'Qwen2.5-Coder-7B':<20} {'Qwen3-8B':<15}")
print(f"{'-'*60}")
memory_params = [
('Max Memory Usage', 'max_memory_usage'),
('Gradient Checkpointing', 'use_gradient_checkpointing'),
('CPU Offloading', 'offload_to_cpu')
]
for param_name, param_key in memory_params:
qwen25_val = qwen25_config['memory'][param_key]
qwen3_val = qwen3_config['memory'][param_key]
print(f"{param_name:<25} {qwen25_val:<20} {qwen3_val:<15}")
# Usage guide
print(f"\n{Fore.YELLOW}💡 RECOMMENDATION GUIDE{Style.RESET_ALL}")
print(f"{'='*80}")
print(f"\n{Fore.BLUE}Use Qwen2.5-Coder-7B when:{Style.RESET_ALL}")
print(f" • You want to fine-tune for code generation tasks")
print(f" • Working primarily with programming languages")
print(f" • Need code completion and understanding")
print(f" • Prefer moderate memory usage (~6-7GB VRAM)")
print(f"\n{Fore.BLUE}Use Qwen3-8B when:{Style.RESET_ALL}")
print(f" • You need general instruction following")
print(f" • Working with mixed code and natural language")
print(f" • Want broader language understanding")
print(f" • Have sufficient VRAM (~7-8GB)")
print(f"\n{Fore.GREEN}🚀 QUICK START COMMANDS{Style.RESET_ALL}")
print(f"{'='*80}")
print(f"\n{Fore.CYAN}For Qwen2.5-Coder-7B:{Style.RESET_ALL}")
print(f"python run_training.py --repo1 <repo1> --repo2 <repo2>")
print(f"\n{Fore.CYAN}For Qwen3-8B:{Style.RESET_ALL}")
print(f"python run_training_qwen3.py --repo1 <repo1> --repo2 <repo2>")
print(f"\n{Fore.CYAN}{'='*80}{Style.RESET_ALL}")
if __name__ == "__main__":
compare_configs()