import torch
import json
import os
from typing import Dict, List
from datasets import Dataset
from data_preprocessor import DataPreprocessor
import gc

# Import Unsloth components
from unsloth import FastLanguageModel
from unsloth.chat_templates import get_chat_template
from transformers import TrainingArguments
from trl import SFTTrainer

class OdooModelTrainer:
    def __init__(self):
        self.model_name = "unsloth/Qwen3-8B-bnb-4bit"
        self.max_seq_length = 2048
        self.load_in_4bit = True
        self.model = None
        self.tokenizer = None

    def load_model(self):
        """Load the Qwen model with Unsloth optimizations"""
        print("Loading model...")

        self.model, self.tokenizer = FastLanguageModel.from_pretrained(
            model_name=self.model_name,
            max_seq_length=self.max_seq_length,
            dtype=None,  # Auto detection
            load_in_4bit=self.load_in_4bit,
        )

        # Enable gradient checkpointing for memory efficiency
        self.model.gradient_checkpointing_enable()

        print("Model loaded successfully!")

    def prepare_data(self, data_file: str = 'training_data.json') -> Dataset:
        """Prepare training data"""
        if not os.path.exists(data_file):
            print(f"Data file {data_file} not found. Running data preprocessing...")
            preprocessor = DataPreprocessor()
            training_data = preprocessor.process_csv_data()
            preprocessor.save_training_data(training_data, data_file)

        print(f"Loading data from {data_file}")
        with open(data_file, 'r', encoding='utf-8') as f:
            data = json.load(f)

        # Convert to HuggingFace Dataset format
        dataset_dict = {
            'instruction': [item['instruction'] for item in data],
            'input': [item['input'] for item in data],
            'output': [item['output'] for item in data]
        }

        dataset = Dataset.from_dict(dataset_dict)
        print(f"Prepared dataset with {len(dataset)} samples")

        return dataset

    def format_chat_template(self, example):
        """Format data for chat template"""
        messages = [
            {"role": "system", "content": "You are a helpful assistant specialized in Odoo documentation."},
            {"role": "user", "content": f"{example['instruction']}\n\n{example['input']}"},
            {"role": "assistant", "content": example['output']}
        ]

        # Apply chat template
        formatted_text = self.tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=False
        )

        return {"text": formatted_text}

    def train(self, dataset: Dataset, output_dir: str = './odoo_model_output'):
        """Train the model"""
        print("Starting training...")

        # Apply chat template to dataset
        formatted_dataset = dataset.map(self.format_chat_template)

        # Configure LoRA for efficient training
        self.model = FastLanguageModel.get_peft_model(
            self.model,
            r=16,  # LoRA rank
            target_modules=[
                "q_proj", "k_proj", "v_proj", "o_proj",
                "gate_proj", "up_proj", "down_proj"
            ],
            lora_alpha=16,
            lora_dropout=0,
            bias="none",
            use_gradient_checkpointing=True,
            random_state=3407,
            use_rslora=False,
            loftq_config=None,
        )

        # Configure training arguments optimized for RTX3070 8GB VRAM
        training_args = TrainingArguments(
            per_device_train_batch_size=1,  # Very small batch size for 8GB VRAM
            gradient_accumulation_steps=4,  # Effective batch size of 4
            warmup_steps=5,
            max_steps=100,  # Limit steps for testing, increase as needed
            learning_rate=2e-4,
            fp16=not torch.cuda.is_bf16_supported(),
            bf16=torch.cuda.is_bf16_supported(),
            logging_steps=1,
            optim="adamw_8bit",
            weight_decay=0.01,
            lr_scheduler_type="linear",
            seed=3407,
            output_dir=output_dir,
            save_steps=50,
            save_total_limit=2,
            report_to="none",  # Disable wandb/tensorboard for simplicity
        )

        # Initialize trainer
        trainer = SFTTrainer(
            model=self.model,
            tokenizer=self.tokenizer,
            train_dataset=formatted_dataset,
            dataset_text_field="text",
            max_seq_length=self.max_seq_length,
            dataset_num_proc=2,
            packing=False,  # Can make training faster for short sequences
            args=training_args,
        )

        # Clear cache before training
        gc.collect()
        torch.cuda.empty_cache()

        print("Starting training...")
        trainer.train()

        # Save the model
        print(f"Saving model to {output_dir}")
        trainer.save_model(output_dir)

        # Save in GGUF format for compatibility
        self.model.save_pretrained_gguf(
            output_dir + "_gguf",
            self.tokenizer,
            quantization_method="q4_k_m"  # 4-bit quantization
        )

        print("Training completed!")

    def generate_response(self, prompt: str, max_new_tokens: int = 256) -> str:
        """Generate response from the trained model"""
        if not self.model:
            print("Model not loaded!")
            return ""

        # Enable faster inference
        FastLanguageModel.for_inference(self.model)

        messages = [
            {"role": "system", "content": "You are a helpful assistant specialized in Odoo documentation."},
            {"role": "user", "content": prompt}
        ]

        inputs = self.tokenizer.apply_chat_template(
            messages,
            tokenize=True,
            add_generation_prompt=True,
            return_tensors="pt"
        ).to("cuda")

        outputs = self.model.generate(
            input_ids=inputs,
            max_new_tokens=max_new_tokens,
            use_cache=True,
            temperature=0.7,
            min_p=0.1
        )

        response = self.tokenizer.batch_decode(outputs)[0]
        return response

def main():
    # Check CUDA availability
    if not torch.cuda.is_available():
        print("CUDA is not available. Please ensure you have a CUDA-compatible GPU.")
        return

    print(f"Using GPU: {torch.cuda.get_device_name(0)}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.2f} GB")

    # Initialize trainer
    trainer = OdooModelTrainer()

    try:
        # Load model
        trainer.load_model()

        # Prepare data
        dataset = trainer.prepare_data()

        if len(dataset) == 0:
            print("No training data available!")
            return

        # Train model
        trainer.train(dataset)

        # Test the model
        print("\nTesting the trained model:")
        test_prompt = "How do I install Odoo?"
        response = trainer.generate_response(test_prompt)
        print(f"Prompt: {test_prompt}")
        print(f"Response: {response}")

    except Exception as e:
        print(f"Error during training: {e}")
        import traceback
        traceback.print_exc()

if __name__ == "__main__":
    main()