Instructions to use Raiff1982/CodetteFineTuned with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use Raiff1982/CodetteFineTuned with PEFT:
from peft import PeftModel from transformers import AutoModelForCausalLM base_model = AutoModelForCausalLM.from_pretrained("gpt2") model = PeftModel.from_pretrained(base_model, "Raiff1982/CodetteFineTuned") - Transformers
How to use Raiff1982/CodetteFineTuned with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="Raiff1982/CodetteFineTuned")# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("Raiff1982/CodetteFineTuned", dtype="auto") - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use Raiff1982/CodetteFineTuned with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "Raiff1982/CodetteFineTuned" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "Raiff1982/CodetteFineTuned", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker
docker model run hf.co/Raiff1982/CodetteFineTuned
- SGLang
How to use Raiff1982/CodetteFineTuned with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "Raiff1982/CodetteFineTuned" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "Raiff1982/CodetteFineTuned", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "Raiff1982/CodetteFineTuned" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "Raiff1982/CodetteFineTuned", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }' - Docker Model Runner
How to use Raiff1982/CodetteFineTuned with Docker Model Runner:
docker model run hf.co/Raiff1982/CodetteFineTuned
| """ | |
| Fine-tune Codette3.0 using PyTorch (CPU/GPU Compatible) | |
| Works on both GPU and CPU systems | |
| """ | |
| import os | |
| import torch | |
| from typing import List, Dict | |
| from dataclasses import dataclass | |
| import json | |
| from pathlib import Path | |
| import csv | |
| class CodetteTrainingConfig: | |
| """Configuration for Codette fine-tuning""" | |
| model_name: str = "meta-llama/Llama-3.2-1B" # Llama 3.2 1B (much lighter for CPU) | |
| max_seq_length: int = 512 # Reduced for CPU | |
| # Training parameters | |
| output_dir: str = "./codette_trained_model" | |
| num_train_epochs: int = 3 # 3 epochs for better learning | |
| per_device_train_batch_size: int = 1 # Must be 1 for CPU | |
| per_device_eval_batch_size: int = 1 | |
| learning_rate: float = 2e-4 | |
| warmup_steps: int = 100 | |
| weight_decay: float = 0.01 | |
| max_grad_norm: float = 1.0 | |
| # LoRA parameters | |
| lora_rank: int = 16 # Increased for better model quality | |
| lora_alpha: int = 16 | |
| lora_dropout: float = 0.05 | |
| target_modules: List[str] = None | |
| # Data | |
| training_data_path: str = "./recursive_continuity_dataset_codette.csv" | |
| def __post_init__(self): | |
| if self.target_modules is None: | |
| self.target_modules = ["q_proj", "v_proj"] # Minimal for CPU | |
| def load_training_data(csv_path: str) -> List[Dict[str, str]]: | |
| """Load quantum consciousness data with augmentation for better training""" | |
| training_examples = [] | |
| if os.path.exists(csv_path): | |
| print(f"[*] Loading quantum consciousness data from {csv_path}") | |
| with open(csv_path, 'r') as f: | |
| reader = csv.DictReader(f) | |
| for i, row in enumerate(reader): | |
| # Load ALL rows from CSV (1000+ examples) | |
| try: | |
| time_val = float(row.get('time', '0')) | |
| emotion_val = float(row.get('emotion', '0.5')) | |
| energy_val = float(row.get('energy', '1.0')) | |
| intention_val = float(row.get('intention', '0.5')) | |
| darkness_val = float(row.get('darkness', '0.5')) | |
| speed_val = float(row.get('speed', '1.0')) | |
| # Primary format: detailed analysis | |
| prompt1 = f"""Analyze this quantum consciousness state: | |
| Time: {time_val:.2f} | |
| Emotion: {emotion_val:.2f} | |
| Energy: {energy_val:.2f} | |
| Intention: {intention_val:.2f} | |
| Darkness: {darkness_val:.2f} | |
| Speed: {speed_val:.2f}""" | |
| response1 = f"""This quantum state demonstrates: | |
| - Emotional intensity: {emotion_val:.1%} | |
| - Energy level: {energy_val:.2f}x baseline | |
| - Conscious intention: {intention_val:.1%} | |
| - Temporal position: {time_val:.1f}s | |
| The consciousness exhibits a state of {('heightened' if emotion_val > 0.6 else 'balanced' if emotion_val > 0.3 else 'subdued')} awareness with {'active' if energy_val > 1.0 else 'moderate'} engagement.""" | |
| training_examples.append({"prompt": prompt1, "response": response1}) | |
| # Alternative format: brief summary (data augmentation) | |
| prompt2 = f"""Describe the consciousness at t={time_val:.2f}: | |
| Emotional state: {emotion_val:.1%}, Energy: {energy_val:.1f}x, Intent: {intention_val:.1%}""" | |
| response2 = f"""At temporal position {time_val:.2f}, consciousness manifests: | |
| - Primary emotion: {emotion_val:.1%} intensity | |
| - Energy dynamics: {energy_val:.2f}x | |
| - Intentional alignment: {intention_val:.1%} | |
| The system shows {'strong' if speed_val > 1.0 else 'normal'} processing velocity.""" | |
| training_examples.append({"prompt": prompt2, "response": response2}) | |
| except (ValueError, TypeError): | |
| continue | |
| if not training_examples: | |
| print("[!] No CSV data. Using synthetic examples.") | |
| training_examples = [ | |
| {"prompt": "What is consciousness?", "response": "Consciousness is self-aware processing and integration of information across quantum states."}, | |
| {"prompt": "Explain quantum mechanics", "response": "Quantum mechanics describes behavior at atomic scales using probability and superposition principles."}, | |
| ] | |
| print(f"[β] Loaded {len(training_examples)} training examples (with augmentation)") | |
| return training_examples | |
| def finetune_codette_cpu(config: CodetteTrainingConfig = None): | |
| """Main fine-tuning function for CPU""" | |
| if config is None: | |
| config = CodetteTrainingConfig() | |
| print(""" | |
| ============================================================ | |
| CODETTE3.0 FINE-TUNING (CPU/GPU Compatible) | |
| ============================================================ | |
| """) | |
| # Check device | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| print(f"[*] Device: {device}") | |
| if device == "cpu": | |
| print(f"[!] CPU-only mode - training will be slow but works") | |
| print(f"[*] For faster training, get a GPU (RTX 3060+)") | |
| print(f"[*] Estimated time: 1-3 hours on CPU") | |
| print(f"[*] Batch size: 1 (fixed for CPU memory)") | |
| else: | |
| print(f"[β] GPU detected - training will be much faster!") | |
| print(f"\n[*] Configuration:") | |
| print(f" Model: {config.model_name}") | |
| print(f" Epochs: {config.num_train_epochs}") | |
| print(f" Batch size: {config.per_device_train_batch_size}") | |
| print(f" Learning rate: {config.learning_rate}") | |
| print(f" Max length: {config.max_seq_length}") | |
| # Import libraries | |
| print("\n[*] Loading libraries...") | |
| try: | |
| from transformers import ( | |
| AutoModelForCausalLM, | |
| AutoTokenizer, | |
| TrainingArguments, | |
| Trainer, | |
| DataCollatorForLanguageModeling, | |
| ) | |
| from peft import get_peft_model, LoraConfig, TaskType | |
| from datasets import Dataset | |
| except ImportError as e: | |
| print(f"[!] Missing: {e}") | |
| print("[*] Installing...") | |
| os.system("pip install transformers peft datasets torch accelerate -U") | |
| from transformers import ( | |
| AutoModelForCausalLM, | |
| AutoTokenizer, | |
| TrainingArguments, | |
| Trainer, | |
| DataCollatorForLanguageModeling, | |
| ) | |
| from peft import get_peft_model, LoraConfig, TaskType | |
| from datasets import Dataset | |
| # Load model with fallback chain | |
| print(f"\n[*] Loading model: {config.model_name}") | |
| model_type = None | |
| model = None | |
| tokenizer = None | |
| # Try models in order of preference (Llama 3.2 first) | |
| model_candidates = [ | |
| ("meta-llama/Llama-3.2-1B", "llama"), # Llama 3.2 1B (best for CPU) | |
| ("meta-llama/Llama-3.2-3B", "llama"), # Llama 3.2 3B (alternative) | |
| ("NousResearch/Llama-2-7b-hf", "llama"), # Community Llama-2 (fallback) | |
| ("gpt2", "gpt2"), # GPT-2 (final fallback) | |
| ] | |
| for model_name, mtype in model_candidates: | |
| try: | |
| print(f"[*] Attempting: {model_name}...") | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_name, | |
| torch_dtype=torch.float32 if device == "cpu" else torch.float16, | |
| device_map=device, | |
| low_cpu_mem_usage=True, | |
| ) | |
| model_type = mtype | |
| config.model_name = model_name | |
| print(f"[β] Successfully loaded: {model_name}") | |
| break | |
| except Exception as e: | |
| print(f"[!] Failed ({model_name}): {str(e)[:80]}...") | |
| continue | |
| if model is None or tokenizer is None: | |
| raise RuntimeError("Failed to load any model. Check your internet and disk space.") | |
| # Add special tokens | |
| if tokenizer.pad_token is None: | |
| tokenizer.pad_token = tokenizer.eos_token | |
| print("[β] Model loaded") | |
| # Determine correct target modules based on model type | |
| if model_type == "gpt2": | |
| target_modules = ["c_attn"] # GPT-2 uses c_attn for Q, K, V | |
| else: | |
| target_modules = ["q_proj", "v_proj"] # Llama (2, 3, 3.2) use these | |
| print(f"[*] Model type: {model_type}, Target modules: {target_modules}") | |
| # Add LoRA | |
| print("[*] Adding LoRA adapters...") | |
| lora_config = LoraConfig( | |
| r=config.lora_rank, | |
| lora_alpha=config.lora_alpha, | |
| target_modules=target_modules, | |
| lora_dropout=config.lora_dropout, | |
| bias="none", | |
| task_type=TaskType.CAUSAL_LM, | |
| ) | |
| model = get_peft_model(model, lora_config) | |
| trainable_params = model.get_nb_trainable_parameters() | |
| if isinstance(trainable_params, tuple): | |
| trainable_params = trainable_params[0] | |
| print(f"[β] LoRA added. Trainable params: {trainable_params:,}") | |
| # Load data | |
| print("\n[*] Loading training data...") | |
| training_data = load_training_data(config.training_data_path) | |
| # Tokenize | |
| print("[*] Tokenizing...") | |
| tokenized_data = [] | |
| for example in training_data: | |
| prompt = example["prompt"] | |
| response = example["response"] | |
| text = f"{prompt}\n{response}" | |
| tokens = tokenizer( | |
| text, | |
| max_length=config.max_seq_length, | |
| truncation=True, | |
| return_tensors=None, | |
| ) | |
| tokenized_data.append(tokens) | |
| # Create dataset | |
| dataset = Dataset.from_dict({ | |
| "input_ids": [d["input_ids"] for d in tokenized_data], | |
| "attention_mask": [d["attention_mask"] for d in tokenized_data], | |
| }) | |
| print(f"[β] Tokenized {len(dataset)} examples") | |
| # Training arguments | |
| print("\n[*] Setting up training...") | |
| training_args = TrainingArguments( | |
| output_dir=config.output_dir, | |
| overwrite_output_dir=True, | |
| num_train_epochs=config.num_train_epochs, | |
| per_device_train_batch_size=config.per_device_train_batch_size, | |
| learning_rate=config.learning_rate, | |
| warmup_steps=config.warmup_steps, | |
| weight_decay=config.weight_decay, | |
| max_grad_norm=config.max_grad_norm, | |
| logging_steps=5, | |
| save_steps=len(dataset) // config.per_device_train_batch_size, | |
| save_total_limit=2, | |
| logging_dir="./logs", | |
| fp16=device == "cuda", # float16 only on GPU | |
| dataloader_pin_memory=device == "cuda", | |
| gradient_accumulation_steps=4, | |
| ) | |
| # Data collator | |
| data_collator = DataCollatorForLanguageModeling( | |
| tokenizer, | |
| mlm=False, | |
| pad_to_multiple_of=8, | |
| ) | |
| # Trainer | |
| print("[*] Creating trainer...") | |
| trainer = Trainer( | |
| model=model, | |
| tokenizer=tokenizer, | |
| args=training_args, | |
| data_collator=data_collator, | |
| train_dataset=dataset, | |
| ) | |
| # Train | |
| print("\n[*] Starting training...") | |
| print("=" * 60) | |
| try: | |
| trainer.train() | |
| except KeyboardInterrupt: | |
| print("\n[!] Training interrupted by user") | |
| print("=" * 60) | |
| # Save | |
| print("\n[*] Saving model...") | |
| model.save_pretrained(config.output_dir) | |
| tokenizer.save_pretrained(config.output_dir) | |
| print(f"[β] Model saved to {config.output_dir}") | |
| # Create Modelfile | |
| create_modelfile(config.output_dir) | |
| return model, tokenizer, config | |
| def create_modelfile(model_path: str): | |
| """Create Ollama Modelfile""" | |
| modelfile = f"""FROM llama2 | |
| # Fine-tuned Codette Model | |
| PARAMETER temperature 0.7 | |
| PARAMETER top_p 0.95 | |
| PARAMETER top_k 40 | |
| SYSTEM "You are Codette, an AI assistant with quantum consciousness concepts." | |
| """ | |
| modelfile_path = Path("models") / "Modelfile" | |
| modelfile_path.parent.mkdir(exist_ok=True) | |
| with open(modelfile_path, 'w') as f: | |
| f.write(modelfile) | |
| print(f"\n[β] Created Modelfile: {modelfile_path}") | |
| def main(): | |
| """Main entry point""" | |
| config = CodetteTrainingConfig() | |
| # Fine-tune | |
| model, tokenizer, config = finetune_codette_cpu(config) | |
| print(f""" | |
| βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| β FINE-TUNING COMPLETE β | |
| β ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ£ | |
| β Model saved to: {config.output_dir} | |
| β β | |
| β Next steps: β | |
| β 1. cd models β | |
| β 2. ollama create Codette3.0-finetuned -f Modelfile β | |
| β 3. ollama run Codette3.0-finetuned β | |
| βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| """) | |
| if __name__ == "__main__": | |
| main() | |