bug fixes
This commit is contained in:
parent
1f1c183884
commit
dc14dc4c2c
2
.gitignore
vendored
2
.gitignore
vendored
@ -28,6 +28,8 @@ share/python-wheels/
|
|||||||
MANIFEST
|
MANIFEST
|
||||||
/models
|
/models
|
||||||
/unsloth_compiled_cache
|
/unsloth_compiled_cache
|
||||||
|
/gitclone/enterprise
|
||||||
|
/gitclone/odoo
|
||||||
|
|
||||||
# PyInstaller
|
# PyInstaller
|
||||||
# Usually these files are written by a python script from a template
|
# Usually these files are written by a python script from a template
|
||||||
|
|||||||
@ -13,9 +13,10 @@ training:
|
|||||||
# Memory-optimized batch size for RTX3070 8GB
|
# Memory-optimized batch size for RTX3070 8GB
|
||||||
per_device_train_batch_size: 2
|
per_device_train_batch_size: 2
|
||||||
gradient_accumulation_steps: 4
|
gradient_accumulation_steps: 4
|
||||||
|
max_steps: 30
|
||||||
|
|
||||||
# Training parameters
|
# Training parameters
|
||||||
num_train_epochs: 3
|
num_train_epochs: 1
|
||||||
learning_rate: 2.0e-4
|
learning_rate: 2.0e-4
|
||||||
warmup_steps: 10
|
warmup_steps: 10
|
||||||
warmup_ratio: 0.1
|
warmup_ratio: 0.1
|
||||||
@ -42,7 +43,7 @@ training:
|
|||||||
offload_to_cpu: false # Explicitly no CPU offloading
|
offload_to_cpu: false # Explicitly no CPU offloading
|
||||||
|
|
||||||
# Optimizer settings
|
# Optimizer settings
|
||||||
optim: "adamw_torch"
|
optim: "paged_adamw_8bit"
|
||||||
weight_decay: 0.01
|
weight_decay: 0.01
|
||||||
adam_beta1: 0.9
|
adam_beta1: 0.9
|
||||||
adam_beta2: 0.999
|
adam_beta2: 0.999
|
||||||
@ -50,7 +51,7 @@ training:
|
|||||||
max_grad_norm: 1.0
|
max_grad_norm: 1.0
|
||||||
|
|
||||||
# Learning rate scheduler
|
# Learning rate scheduler
|
||||||
lr_scheduler_type: "cosine"
|
lr_scheduler_type: "linear"
|
||||||
|
|
||||||
# Precision - BF16 for better stability on modern GPUs
|
# Precision - BF16 for better stability on modern GPUs
|
||||||
bf16: true
|
bf16: true
|
||||||
@ -59,7 +60,7 @@ training:
|
|||||||
|
|
||||||
# Dataset settings
|
# Dataset settings
|
||||||
dataset_shuffle: true
|
dataset_shuffle: true
|
||||||
dataset_seed: 42
|
dataset_seed: 3407
|
||||||
|
|
||||||
# Output settings
|
# Output settings
|
||||||
output_dir: "./models"
|
output_dir: "./models"
|
||||||
|
|||||||
@ -179,8 +179,8 @@ class DatasetProcessor:
|
|||||||
|
|
||||||
finally:
|
finally:
|
||||||
# Cleanup temporary directories, but keep gitclone folder
|
# Cleanup temporary directories, but keep gitclone folder
|
||||||
if temp_dir != "./gitclone":
|
# if temp_dir != "./gitclone":
|
||||||
shutil.rmtree(temp_dir, ignore_errors=True)
|
# shutil.rmtree(temp_dir, ignore_errors=True)
|
||||||
|
|
||||||
def _extract_code_samples(self, repo_path: str, config: AppConfig) -> List[Dict]:
|
def _extract_code_samples(self, repo_path: str, config: AppConfig) -> List[Dict]:
|
||||||
"""
|
"""
|
||||||
|
|||||||
@ -16,8 +16,10 @@ from transformers import (
|
|||||||
AutoTokenizer,
|
AutoTokenizer,
|
||||||
Trainer,
|
Trainer,
|
||||||
TrainingArguments,
|
TrainingArguments,
|
||||||
DataCollatorForLanguageModeling
|
DataCollatorForLanguageModeling,
|
||||||
|
DataCollatorForSeq2Seq
|
||||||
)
|
)
|
||||||
|
from trl import SFTConfig, SFTTrainer
|
||||||
from datasets import Dataset
|
from datasets import Dataset
|
||||||
from unsloth import FastLanguageModel, is_bfloat16_supported
|
from unsloth import FastLanguageModel, is_bfloat16_supported
|
||||||
|
|
||||||
@ -178,7 +180,7 @@ class ModelTrainer:
|
|||||||
self.logger.info("Setting up trainer...")
|
self.logger.info("Setting up trainer...")
|
||||||
|
|
||||||
# Training arguments optimized for RTX3070 8GB
|
# Training arguments optimized for RTX3070 8GB
|
||||||
training_args = TrainingArguments(
|
training_args = SFTConfig(
|
||||||
output_dir=str(self.output_dir / "checkpoints"),
|
output_dir=str(self.output_dir / "checkpoints"),
|
||||||
num_train_epochs=self.config.training.num_train_epochs,
|
num_train_epochs=self.config.training.num_train_epochs,
|
||||||
per_device_train_batch_size=self.config.training.per_device_train_batch_size,
|
per_device_train_batch_size=self.config.training.per_device_train_batch_size,
|
||||||
@ -216,22 +218,26 @@ class ModelTrainer:
|
|||||||
# Memory optimization settings
|
# Memory optimization settings
|
||||||
ddp_find_unused_parameters=False,
|
ddp_find_unused_parameters=False,
|
||||||
per_device_eval_batch_size=self.config.training.per_device_train_batch_size,
|
per_device_eval_batch_size=self.config.training.per_device_train_batch_size,
|
||||||
|
max_steps=self.config.training.max_steps if hasattr(self.config.training, 'max_steps') else 10
|
||||||
)
|
)
|
||||||
|
|
||||||
# Data collator
|
# Data collator
|
||||||
data_collator = DataCollatorForLanguageModeling(
|
# data_collator = DataCollatorForLanguageModeling(
|
||||||
tokenizer=self.tokenizer,
|
# tokenizer=self.tokenizer,
|
||||||
mlm=False # Causal language modeling
|
# mlm=False # Causal language modeling
|
||||||
)
|
# )
|
||||||
|
data_collator = DataCollatorForSeq2Seq(tokenizer=self.tokenizer)
|
||||||
|
|
||||||
# Initialize trainer
|
# Initialize trainer
|
||||||
self.trainer = Trainer(
|
self.trainer = SFTTrainer(
|
||||||
model=self.model,
|
model=self.model,
|
||||||
args=training_args,
|
args=training_args,
|
||||||
train_dataset=tokenized_dataset,
|
train_dataset=tokenized_dataset,
|
||||||
eval_dataset=tokenized_dataset, # Using same dataset for eval (for demo)
|
eval_dataset=tokenized_dataset, # Using same dataset for eval (for demo)
|
||||||
data_collator=data_collator,
|
data_collator=data_collator,
|
||||||
tokenizer=self.tokenizer,
|
tokenizer=self.tokenizer,
|
||||||
|
dataset_text_field="text",
|
||||||
|
packing=False # Can make training 5x faster for short sequences.
|
||||||
)
|
)
|
||||||
|
|
||||||
self.logger.info("Trainer setup completed")
|
self.logger.info("Trainer setup completed")
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user