ai_github_trainer/compare_configs.py

#!/usr/bin/env python3
"""
Compare training configurations for different models
"""

import yaml
from pathlib import Path
from colorama import init, Fore, Style

init(autoreset=True)

def load_config(config_path):
    """Load YAML configuration"""
    with open(config_path, 'r') as f:
        return yaml.safe_load(f)

def compare_configs():
    """Compare the two training configurations"""
    print(f"\n{Fore.CYAN}{'='*80}{Style.RESET_ALL}")
    print(f"{Fore.CYAN}AI TRAINER - MODEL CONFIGURATION COMPARISON{Style.RESET_ALL}")
    print(f"{Fore.CYAN}{'='*80}{Style.RESET_ALL}")

    # Load configurations
    qwen25_config = load_config('configs/training_config.yaml')
    qwen3_config = load_config('configs/training_config_qwen3.yaml')

    # Model comparison
    print(f"\n{Fore.GREEN}📊 MODEL COMPARISON{Style.RESET_ALL}")
    print(f"{'Setting':<25} {'Qwen2.5-Coder-7B':<20} {'Qwen3-8B':<15}")
    print(f"{'-'*60}")

    print(f"{'Model Name':<25} {qwen25_config['model']['name']:<20} {qwen3_config['model']['name']:<15}")
    print(f"{'Max Seq Length':<25} {qwen25_config['model']['max_seq_length']:<20} {qwen3_config['model']['max_seq_length']:<15}")

    # Training comparison
    print(f"\n{Fore.GREEN}⚙️ TRAINING PARAMETERS{Style.RESET_ALL}")
    print(f"{'Parameter':<25} {'Qwen2.5-Coder-7B':<20} {'Qwen3-8B':<15} {'Difference':<15}")
    print(f"{'-'*75}")

    training_params = [
        ('Batch Size', 'per_device_train_batch_size'),
        ('Gradient Accumulation', 'gradient_accumulation_steps'),
        ('Learning Rate', 'learning_rate'),
        ('Warmup Steps', 'warmup_steps'),
        ('Epochs', 'num_train_epochs')
    ]

    for param_name, param_key in training_params:
        qwen25_val = qwen25_config['training'][param_key]
        qwen3_val = qwen3_config['training'][param_key]
        diff = "🔻" if qwen3_val < qwen25_val else "🔺" if qwen3_val > qwen25_val else "➡️"

        print(f"{param_name:<25} {qwen25_val:<20} {qwen3_val:<15} {diff}")

    # Memory comparison
    print(f"\n{Fore.GREEN}🧠 MEMORY SETTINGS{Style.RESET_ALL}")
    print(f"{'Setting':<25} {'Qwen2.5-Coder-7B':<20} {'Qwen3-8B':<15}")
    print(f"{'-'*60}")

    memory_params = [
        ('Max Memory Usage', 'max_memory_usage'),
        ('Gradient Checkpointing', 'use_gradient_checkpointing'),
        ('CPU Offloading', 'offload_to_cpu')
    ]

    for param_name, param_key in memory_params:
        qwen25_val = qwen25_config['memory'][param_key]
        qwen3_val = qwen3_config['memory'][param_key]
        print(f"{param_name:<25} {qwen25_val:<20} {qwen3_val:<15}")

    # Usage guide
    print(f"\n{Fore.YELLOW}💡 RECOMMENDATION GUIDE{Style.RESET_ALL}")
    print(f"{'='*80}")

    print(f"\n{Fore.BLUE}Use Qwen2.5-Coder-7B when:{Style.RESET_ALL}")
    print(f"  • You want to fine-tune for code generation tasks")
    print(f"  • Working primarily with programming languages")
    print(f"  • Need code completion and understanding")
    print(f"  • Prefer moderate memory usage (~6-7GB VRAM)")

    print(f"\n{Fore.BLUE}Use Qwen3-8B when:{Style.RESET_ALL}")
    print(f"  • You need general instruction following")
    print(f"  • Working with mixed code and natural language")
    print(f"  • Want broader language understanding")
    print(f"  • Have sufficient VRAM (~7-8GB)")

    print(f"\n{Fore.GREEN}🚀 QUICK START COMMANDS{Style.RESET_ALL}")
    print(f"{'='*80}")

    print(f"\n{Fore.CYAN}For Qwen2.5-Coder-7B:{Style.RESET_ALL}")
    print(f"python run_training.py --repo1 <repo1> --repo2 <repo2>")

    print(f"\n{Fore.CYAN}For Qwen3-8B:{Style.RESET_ALL}")
    print(f"python run_training_qwen3.py --repo1 <repo1> --repo2 <repo2>")

    print(f"\n{Fore.CYAN}{'='*80}{Style.RESET_ALL}")

if __name__ == "__main__":
    compare_configs()