#!/usr/bin/env python3 """ Compare training configurations for different models """ import yaml from pathlib import Path from colorama import init, Fore, Style init(autoreset=True) def load_config(config_path): """Load YAML configuration""" with open(config_path, 'r') as f: return yaml.safe_load(f) def compare_configs(): """Compare the two training configurations""" print(f"\n{Fore.CYAN}{'='*80}{Style.RESET_ALL}") print(f"{Fore.CYAN}AI TRAINER - MODEL CONFIGURATION COMPARISON{Style.RESET_ALL}") print(f"{Fore.CYAN}{'='*80}{Style.RESET_ALL}") # Load configurations qwen25_config = load_config('configs/training_config.yaml') qwen3_config = load_config('configs/training_config_qwen3.yaml') # Model comparison print(f"\n{Fore.GREEN}📊 MODEL COMPARISON{Style.RESET_ALL}") print(f"{'Setting':<25} {'Qwen2.5-Coder-7B':<20} {'Qwen3-8B':<15}") print(f"{'-'*60}") print(f"{'Model Name':<25} {qwen25_config['model']['name']:<20} {qwen3_config['model']['name']:<15}") print(f"{'Max Seq Length':<25} {qwen25_config['model']['max_seq_length']:<20} {qwen3_config['model']['max_seq_length']:<15}") # Training comparison print(f"\n{Fore.GREEN}⚙️ TRAINING PARAMETERS{Style.RESET_ALL}") print(f"{'Parameter':<25} {'Qwen2.5-Coder-7B':<20} {'Qwen3-8B':<15} {'Difference':<15}") print(f"{'-'*75}") training_params = [ ('Batch Size', 'per_device_train_batch_size'), ('Gradient Accumulation', 'gradient_accumulation_steps'), ('Learning Rate', 'learning_rate'), ('Warmup Steps', 'warmup_steps'), ('Epochs', 'num_train_epochs') ] for param_name, param_key in training_params: qwen25_val = qwen25_config['training'][param_key] qwen3_val = qwen3_config['training'][param_key] diff = "🔻" if qwen3_val < qwen25_val else "🔺" if qwen3_val > qwen25_val else "➡️" print(f"{param_name:<25} {qwen25_val:<20} {qwen3_val:<15} {diff}") # Memory comparison print(f"\n{Fore.GREEN}🧠 MEMORY SETTINGS{Style.RESET_ALL}") print(f"{'Setting':<25} {'Qwen2.5-Coder-7B':<20} {'Qwen3-8B':<15}") print(f"{'-'*60}") memory_params = [ ('Max Memory Usage', 'max_memory_usage'), ('Gradient Checkpointing', 'use_gradient_checkpointing'), ('CPU Offloading', 'offload_to_cpu') ] for param_name, param_key in memory_params: qwen25_val = qwen25_config['memory'][param_key] qwen3_val = qwen3_config['memory'][param_key] print(f"{param_name:<25} {qwen25_val:<20} {qwen3_val:<15}") # Usage guide print(f"\n{Fore.YELLOW}💡 RECOMMENDATION GUIDE{Style.RESET_ALL}") print(f"{'='*80}") print(f"\n{Fore.BLUE}Use Qwen2.5-Coder-7B when:{Style.RESET_ALL}") print(f" • You want to fine-tune for code generation tasks") print(f" • Working primarily with programming languages") print(f" • Need code completion and understanding") print(f" • Prefer moderate memory usage (~6-7GB VRAM)") print(f"\n{Fore.BLUE}Use Qwen3-8B when:{Style.RESET_ALL}") print(f" • You need general instruction following") print(f" • Working with mixed code and natural language") print(f" • Want broader language understanding") print(f" • Have sufficient VRAM (~7-8GB)") print(f"\n{Fore.GREEN}🚀 QUICK START COMMANDS{Style.RESET_ALL}") print(f"{'='*80}") print(f"\n{Fore.CYAN}For Qwen2.5-Coder-7B:{Style.RESET_ALL}") print(f"python run_training.py --repo1 --repo2 ") print(f"\n{Fore.CYAN}For Qwen3-8B:{Style.RESET_ALL}") print(f"python run_training_qwen3.py --repo1 --repo2 ") print(f"\n{Fore.CYAN}{'='*80}{Style.RESET_ALL}") if __name__ == "__main__": compare_configs()