99 lines
3.7 KiB
Python
99 lines
3.7 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Compare training configurations for different models
|
|
"""
|
|
|
|
import yaml
|
|
from pathlib import Path
|
|
from colorama import init, Fore, Style
|
|
|
|
init(autoreset=True)
|
|
|
|
def load_config(config_path):
|
|
"""Load YAML configuration"""
|
|
with open(config_path, 'r') as f:
|
|
return yaml.safe_load(f)
|
|
|
|
def compare_configs():
|
|
"""Compare the two training configurations"""
|
|
print(f"\n{Fore.CYAN}{'='*80}{Style.RESET_ALL}")
|
|
print(f"{Fore.CYAN}AI TRAINER - MODEL CONFIGURATION COMPARISON{Style.RESET_ALL}")
|
|
print(f"{Fore.CYAN}{'='*80}{Style.RESET_ALL}")
|
|
|
|
# Load configurations
|
|
qwen25_config = load_config('configs/training_config.yaml')
|
|
qwen3_config = load_config('configs/training_config_qwen3.yaml')
|
|
|
|
# Model comparison
|
|
print(f"\n{Fore.GREEN}📊 MODEL COMPARISON{Style.RESET_ALL}")
|
|
print(f"{'Setting':<25} {'Qwen2.5-Coder-7B':<20} {'Qwen3-8B':<15}")
|
|
print(f"{'-'*60}")
|
|
|
|
print(f"{'Model Name':<25} {qwen25_config['model']['name']:<20} {qwen3_config['model']['name']:<15}")
|
|
print(f"{'Max Seq Length':<25} {qwen25_config['model']['max_seq_length']:<20} {qwen3_config['model']['max_seq_length']:<15}")
|
|
|
|
# Training comparison
|
|
print(f"\n{Fore.GREEN}⚙️ TRAINING PARAMETERS{Style.RESET_ALL}")
|
|
print(f"{'Parameter':<25} {'Qwen2.5-Coder-7B':<20} {'Qwen3-8B':<15} {'Difference':<15}")
|
|
print(f"{'-'*75}")
|
|
|
|
training_params = [
|
|
('Batch Size', 'per_device_train_batch_size'),
|
|
('Gradient Accumulation', 'gradient_accumulation_steps'),
|
|
('Learning Rate', 'learning_rate'),
|
|
('Warmup Steps', 'warmup_steps'),
|
|
('Epochs', 'num_train_epochs')
|
|
]
|
|
|
|
for param_name, param_key in training_params:
|
|
qwen25_val = qwen25_config['training'][param_key]
|
|
qwen3_val = qwen3_config['training'][param_key]
|
|
diff = "🔻" if qwen3_val < qwen25_val else "🔺" if qwen3_val > qwen25_val else "➡️"
|
|
|
|
print(f"{param_name:<25} {qwen25_val:<20} {qwen3_val:<15} {diff}")
|
|
|
|
# Memory comparison
|
|
print(f"\n{Fore.GREEN}🧠 MEMORY SETTINGS{Style.RESET_ALL}")
|
|
print(f"{'Setting':<25} {'Qwen2.5-Coder-7B':<20} {'Qwen3-8B':<15}")
|
|
print(f"{'-'*60}")
|
|
|
|
memory_params = [
|
|
('Max Memory Usage', 'max_memory_usage'),
|
|
('Gradient Checkpointing', 'use_gradient_checkpointing'),
|
|
('CPU Offloading', 'offload_to_cpu')
|
|
]
|
|
|
|
for param_name, param_key in memory_params:
|
|
qwen25_val = qwen25_config['memory'][param_key]
|
|
qwen3_val = qwen3_config['memory'][param_key]
|
|
print(f"{param_name:<25} {qwen25_val:<20} {qwen3_val:<15}")
|
|
|
|
# Usage guide
|
|
print(f"\n{Fore.YELLOW}💡 RECOMMENDATION GUIDE{Style.RESET_ALL}")
|
|
print(f"{'='*80}")
|
|
|
|
print(f"\n{Fore.BLUE}Use Qwen2.5-Coder-7B when:{Style.RESET_ALL}")
|
|
print(f" • You want to fine-tune for code generation tasks")
|
|
print(f" • Working primarily with programming languages")
|
|
print(f" • Need code completion and understanding")
|
|
print(f" • Prefer moderate memory usage (~6-7GB VRAM)")
|
|
|
|
print(f"\n{Fore.BLUE}Use Qwen3-8B when:{Style.RESET_ALL}")
|
|
print(f" • You need general instruction following")
|
|
print(f" • Working with mixed code and natural language")
|
|
print(f" • Want broader language understanding")
|
|
print(f" • Have sufficient VRAM (~7-8GB)")
|
|
|
|
print(f"\n{Fore.GREEN}🚀 QUICK START COMMANDS{Style.RESET_ALL}")
|
|
print(f"{'='*80}")
|
|
|
|
print(f"\n{Fore.CYAN}For Qwen2.5-Coder-7B:{Style.RESET_ALL}")
|
|
print(f"python run_training.py --repo1 <repo1> --repo2 <repo2>")
|
|
|
|
print(f"\n{Fore.CYAN}For Qwen3-8B:{Style.RESET_ALL}")
|
|
print(f"python run_training_qwen3.py --repo1 <repo1> --repo2 <repo2>")
|
|
|
|
print(f"\n{Fore.CYAN}{'='*80}{Style.RESET_ALL}")
|
|
|
|
if __name__ == "__main__":
|
|
compare_configs() |