153 lines
5.1 KiB
Python
153 lines
5.1 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Odoo AI Model Trainer - Main Orchestrator Script
|
|
Trains an AI model on Odoo documentation using Unsloth
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import argparse
|
|
from data_scraper import OdooDocScraper
|
|
from data_preprocessor import DataPreprocessor
|
|
from train_model import OdooModelTrainer
|
|
|
|
def run_data_collection():
|
|
"""Step 1: Collect data from Odoo documentation"""
|
|
print("=== Step 1: Data Collection ===")
|
|
|
|
if os.path.exists('odoo_docs_data.csv'):
|
|
print("Data file already exists. Skipping data collection.")
|
|
print("To re-scrape data, delete 'odoo_docs_data.csv' and run again.")
|
|
return True
|
|
|
|
try:
|
|
scraper = OdooDocScraper()
|
|
data = scraper.scrape_documentation()
|
|
scraper.save_data(data)
|
|
return len(data) > 0
|
|
except Exception as e:
|
|
print(f"Error during data collection: {e}")
|
|
return False
|
|
|
|
def run_data_preprocessing():
|
|
"""Step 2: Preprocess and format the collected data"""
|
|
print("\n=== Step 2: Data Preprocessing ===")
|
|
|
|
if not os.path.exists('odoo_docs_data.csv'):
|
|
print("No raw data found. Please run data collection first.")
|
|
return False
|
|
|
|
if os.path.exists('training_data.json'):
|
|
print("Training data already exists. Skipping preprocessing.")
|
|
print("To reprocess data, delete 'training_data.json' and run again.")
|
|
return True
|
|
|
|
try:
|
|
preprocessor = DataPreprocessor()
|
|
training_data = preprocessor.process_csv_data()
|
|
preprocessor.save_training_data(training_data)
|
|
|
|
stats = preprocessor.get_statistics(training_data)
|
|
print("\nTraining Data Statistics:")
|
|
print(f"Total samples: {stats['total_samples']}")
|
|
print(f"Language distribution: {stats['language_distribution']}")
|
|
print(f"Average length: {stats['average_length']:.2f}")
|
|
|
|
return len(training_data) > 0
|
|
except Exception as e:
|
|
print(f"Error during data preprocessing: {e}")
|
|
return False
|
|
|
|
def run_model_training(skip_training=False):
|
|
"""Step 3: Train the AI model"""
|
|
print("\n=== Step 3: Model Training ===")
|
|
|
|
if skip_training:
|
|
print("Training skipped as requested.")
|
|
return True
|
|
|
|
if not os.path.exists('training_data.json'):
|
|
print("No training data found. Please run data preprocessing first.")
|
|
return False
|
|
|
|
try:
|
|
trainer = OdooModelTrainer()
|
|
trainer.load_model()
|
|
dataset = trainer.prepare_data()
|
|
trainer.train(dataset)
|
|
return True
|
|
except Exception as e:
|
|
print(f"Error during model training: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
return False
|
|
|
|
def main():
|
|
"""Main orchestrator function"""
|
|
parser = argparse.ArgumentParser(description='Odoo AI Model Trainer')
|
|
parser.add_argument('--skip-collection', action='store_true',
|
|
help='Skip data collection step')
|
|
parser.add_argument('--skip-preprocessing', action='store_true',
|
|
help='Skip data preprocessing step')
|
|
parser.add_argument('--skip-training', action='store_true',
|
|
help='Skip model training step')
|
|
parser.add_argument('--only-collection', action='store_true',
|
|
help='Only run data collection')
|
|
parser.add_argument('--only-preprocessing', action='store_true',
|
|
help='Only run data preprocessing')
|
|
parser.add_argument('--only-training', action='store_true',
|
|
help='Only run model training')
|
|
|
|
args = parser.parse_args()
|
|
|
|
print("🚀 Odoo AI Model Trainer")
|
|
print("=" * 50)
|
|
|
|
# Check for specific modes
|
|
if args.only_collection:
|
|
success = run_data_collection()
|
|
sys.exit(0 if success else 1)
|
|
|
|
if args.only_preprocessing:
|
|
success = run_data_preprocessing()
|
|
sys.exit(0 if success else 1)
|
|
|
|
if args.only_training:
|
|
success = run_model_training()
|
|
sys.exit(0 if success else 1)
|
|
|
|
# Full pipeline mode
|
|
steps = []
|
|
if not args.skip_collection:
|
|
steps.append(("Data Collection", run_data_collection))
|
|
if not args.skip_preprocessing:
|
|
steps.append(("Data Preprocessing", run_data_preprocessing))
|
|
if not args.skip_training:
|
|
steps.append(("Model Training", run_model_training))
|
|
|
|
if not steps:
|
|
print("No steps to run. Use --help to see available options.")
|
|
return
|
|
|
|
success_count = 0
|
|
for step_name, step_func in steps:
|
|
if step_func():
|
|
success_count += 1
|
|
print(f"✅ {step_name} completed successfully")
|
|
else:
|
|
print(f"❌ {step_name} failed")
|
|
break
|
|
|
|
print("\n=== Final Results ===")
|
|
print(f"Completed steps: {success_count}/{len(steps)}")
|
|
|
|
if success_count == len(steps):
|
|
print("🎉 All steps completed successfully!")
|
|
print("\nNext steps:")
|
|
print("1. Check the 'odoo_model_output' directory for trained model")
|
|
print("2. Use the model for Odoo-related questions")
|
|
else:
|
|
print("❌ Some steps failed. Check the output above for details.")
|
|
|
|
if __name__ == "__main__":
|
|
main() |