ai_html_document_trainer/main.py

#!/usr/bin/env python3
"""
Odoo AI Model Trainer - Main Orchestrator Script
Trains an AI model on Odoo documentation using Unsloth
"""

import os
import sys
import argparse
from data_scraper import OdooDocScraper
from data_preprocessor import DataPreprocessor
from train_model import OdooModelTrainer

def run_data_collection():
    """Step 1: Collect data from Odoo documentation"""
    print("=== Step 1: Data Collection ===")

    if os.path.exists('odoo_docs_data.csv'):
        print("Data file already exists. Skipping data collection.")
        print("To re-scrape data, delete 'odoo_docs_data.csv' and run again.")
        return True

    try:
        scraper = OdooDocScraper()
        data = scraper.scrape_documentation()
        scraper.save_data(data)
        return len(data) > 0
    except Exception as e:
        print(f"Error during data collection: {e}")
        return False

def run_data_preprocessing():
    """Step 2: Preprocess and format the collected data"""
    print("\n=== Step 2: Data Preprocessing ===")

    if not os.path.exists('odoo_docs_data.csv'):
        print("No raw data found. Please run data collection first.")
        return False

    if os.path.exists('training_data.json'):
        print("Training data already exists. Skipping preprocessing.")
        print("To reprocess data, delete 'training_data.json' and run again.")
        return True

    try:
        preprocessor = DataPreprocessor()
        training_data = preprocessor.process_csv_data()
        preprocessor.save_training_data(training_data)

        stats = preprocessor.get_statistics(training_data)
        print("\nTraining Data Statistics:")
        print(f"Total samples: {stats['total_samples']}")
        print(f"Language distribution: {stats['language_distribution']}")
        print(f"Average length: {stats['average_length']:.2f}")

        return len(training_data) > 0
    except Exception as e:
        print(f"Error during data preprocessing: {e}")
        return False

def run_model_training(skip_training=False):
    """Step 3: Train the AI model"""
    print("\n=== Step 3: Model Training ===")

    if skip_training:
        print("Training skipped as requested.")
        return True

    if not os.path.exists('training_data.json'):
        print("No training data found. Please run data preprocessing first.")
        return False

    try:
        trainer = OdooModelTrainer()
        trainer.load_model()
        dataset = trainer.prepare_data()
        trainer.train(dataset)
        return True
    except Exception as e:
        print(f"Error during model training: {e}")
        import traceback
        traceback.print_exc()
        return False

def main():
    """Main orchestrator function"""
    parser = argparse.ArgumentParser(description='Odoo AI Model Trainer')
    parser.add_argument('--skip-collection', action='store_true',
                       help='Skip data collection step')
    parser.add_argument('--skip-preprocessing', action='store_true',
                       help='Skip data preprocessing step')
    parser.add_argument('--skip-training', action='store_true',
                       help='Skip model training step')
    parser.add_argument('--only-collection', action='store_true',
                       help='Only run data collection')
    parser.add_argument('--only-preprocessing', action='store_true',
                       help='Only run data preprocessing')
    parser.add_argument('--only-training', action='store_true',
                       help='Only run model training')

    args = parser.parse_args()

    print("🚀 Odoo AI Model Trainer")
    print("=" * 50)

    # Check for specific modes
    if args.only_collection:
        success = run_data_collection()
        sys.exit(0 if success else 1)

    if args.only_preprocessing:
        success = run_data_preprocessing()
        sys.exit(0 if success else 1)

    if args.only_training:
        success = run_model_training()
        sys.exit(0 if success else 1)

    # Full pipeline mode
    steps = []
    if not args.skip_collection:
        steps.append(("Data Collection", run_data_collection))
    if not args.skip_preprocessing:
        steps.append(("Data Preprocessing", run_data_preprocessing))
    if not args.skip_training:
        steps.append(("Model Training", run_model_training))

    if not steps:
        print("No steps to run. Use --help to see available options.")
        return

    success_count = 0
    for step_name, step_func in steps:
        if step_func():
            success_count += 1
            print(f"✅ {step_name} completed successfully")
        else:
            print(f"❌ {step_name} failed")
            break

    print("\n=== Final Results ===")
    print(f"Completed steps: {success_count}/{len(steps)}")

    if success_count == len(steps):
        print("🎉 All steps completed successfully!")
        print("\nNext steps:")
        print("1. Check the 'odoo_model_output' directory for trained model")
        print("2. Use the model for Odoo-related questions")
    else:
        print("❌ Some steps failed. Check the output above for details.")

if __name__ == "__main__":
    main()