Dogacel
/

Qwen3-Coder-30B-A3B-Kubernetes-Instruct

Text Generation

trouble-shooting

text-generation-inference

Model card Files Files and versions

Dogacel commited on Dec 14, 2025

Commit

194a2e0

·

verified ·

1 Parent(s): 3328039

Update README.md

Files changed (1) hide show

README.md +10 -10

README.md CHANGED Viewed

@@ -10,7 +10,8 @@ tags:
 - code
 - qwen
 - text-generation-inference
-library_name: peft
 ---
 # Qwen3-Coder-30B-A3B-Kubernetes-Instruct
@@ -72,26 +73,25 @@ Use the code below to get started with the model.
 ```python
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
-from peft import PeftModel
-base_model_id = "Qwen/Qwen3-Coder-30B-A3B-Instruct"
-adapter_id = "Dogacel/Qwen3-Coder-30B-A3B-Kubernetes-Instruct"
-# Load base model with device_map to avoid RAM OOM
 model = AutoModelForCausalLM.from_pretrained(
-    base_model_id,
     torch_dtype=torch.float16,
     device_map="auto",
     low_cpu_mem_usage=True
 )
-# Load the adapter
-model = PeftModel.from_pretrained(model, adapter_id)
-tokenizer = AutoTokenizer.from_pretrained(base_model_id)
 messages = [
     {"role": "system", "content": "You are a Kubernetes expert. Diagnose issues step-by-step, then provide the fixed YAML configuration."},
-    {"role": "user", "content": "When I run kubectl apply, I get the following error ..."}
 ]
 text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

 - code
 - qwen
 - text-generation-inference
+- transformers
+library_name: transformers
 ---
 # Qwen3-Coder-30B-A3B-Kubernetes-Instruct
 ```python
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
+# Path to your merged model (no base model needed)
+model_id = "Dogacel/Qwen3-Coder-30B-A3B-Kubernetes-Instruct"
+# 1. Load the Full Model
+# Use device_map="auto" to handle the 30B size efficiently
 model = AutoModelForCausalLM.from_pretrained(
+    model_id,
     torch_dtype=torch.float16,
     device_map="auto",
     low_cpu_mem_usage=True
 )
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+# 2. Run Inference
 messages = [
     {"role": "system", "content": "You are a Kubernetes expert. Diagnose issues step-by-step, then provide the fixed YAML configuration."},
+    {"role": "user", "content": "My Pod is in Pending state and describing it says 'Insufficient cpu'. How do I fix this?"}
 ]
 text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)