ai_html_document_trainer/main.py
2025-08-22 16:30:56 +07:00

153 lines
5.1 KiB
Python

#!/usr/bin/env python3
"""
Odoo AI Model Trainer - Main Orchestrator Script
Trains an AI model on Odoo documentation using Unsloth
"""
import os
import sys
import argparse
from data_scraper import OdooDocScraper
from data_preprocessor import DataPreprocessor
from train_model import OdooModelTrainer
def run_data_collection():
"""Step 1: Collect data from Odoo documentation"""
print("=== Step 1: Data Collection ===")
if os.path.exists('odoo_docs_data.csv'):
print("Data file already exists. Skipping data collection.")
print("To re-scrape data, delete 'odoo_docs_data.csv' and run again.")
return True
try:
scraper = OdooDocScraper()
data = scraper.scrape_documentation()
scraper.save_data(data)
return len(data) > 0
except Exception as e:
print(f"Error during data collection: {e}")
return False
def run_data_preprocessing():
"""Step 2: Preprocess and format the collected data"""
print("\n=== Step 2: Data Preprocessing ===")
if not os.path.exists('odoo_docs_data.csv'):
print("No raw data found. Please run data collection first.")
return False
if os.path.exists('training_data.json'):
print("Training data already exists. Skipping preprocessing.")
print("To reprocess data, delete 'training_data.json' and run again.")
return True
try:
preprocessor = DataPreprocessor()
training_data = preprocessor.process_csv_data()
preprocessor.save_training_data(training_data)
stats = preprocessor.get_statistics(training_data)
print("\nTraining Data Statistics:")
print(f"Total samples: {stats['total_samples']}")
print(f"Language distribution: {stats['language_distribution']}")
print(f"Average length: {stats['average_length']:.2f}")
return len(training_data) > 0
except Exception as e:
print(f"Error during data preprocessing: {e}")
return False
def run_model_training(skip_training=False):
"""Step 3: Train the AI model"""
print("\n=== Step 3: Model Training ===")
if skip_training:
print("Training skipped as requested.")
return True
if not os.path.exists('training_data.json'):
print("No training data found. Please run data preprocessing first.")
return False
try:
trainer = OdooModelTrainer()
trainer.load_model()
dataset = trainer.prepare_data()
trainer.train(dataset)
return True
except Exception as e:
print(f"Error during model training: {e}")
import traceback
traceback.print_exc()
return False
def main():
"""Main orchestrator function"""
parser = argparse.ArgumentParser(description='Odoo AI Model Trainer')
parser.add_argument('--skip-collection', action='store_true',
help='Skip data collection step')
parser.add_argument('--skip-preprocessing', action='store_true',
help='Skip data preprocessing step')
parser.add_argument('--skip-training', action='store_true',
help='Skip model training step')
parser.add_argument('--only-collection', action='store_true',
help='Only run data collection')
parser.add_argument('--only-preprocessing', action='store_true',
help='Only run data preprocessing')
parser.add_argument('--only-training', action='store_true',
help='Only run model training')
args = parser.parse_args()
print("🚀 Odoo AI Model Trainer")
print("=" * 50)
# Check for specific modes
if args.only_collection:
success = run_data_collection()
sys.exit(0 if success else 1)
if args.only_preprocessing:
success = run_data_preprocessing()
sys.exit(0 if success else 1)
if args.only_training:
success = run_model_training()
sys.exit(0 if success else 1)
# Full pipeline mode
steps = []
if not args.skip_collection:
steps.append(("Data Collection", run_data_collection))
if not args.skip_preprocessing:
steps.append(("Data Preprocessing", run_data_preprocessing))
if not args.skip_training:
steps.append(("Model Training", run_model_training))
if not steps:
print("No steps to run. Use --help to see available options.")
return
success_count = 0
for step_name, step_func in steps:
if step_func():
success_count += 1
print(f"{step_name} completed successfully")
else:
print(f"{step_name} failed")
break
print("\n=== Final Results ===")
print(f"Completed steps: {success_count}/{len(steps)}")
if success_count == len(steps):
print("🎉 All steps completed successfully!")
print("\nNext steps:")
print("1. Check the 'odoo_model_output' directory for trained model")
print("2. Use the model for Odoo-related questions")
else:
print("❌ Some steps failed. Check the output above for details.")
if __name__ == "__main__":
main()