The model is trained on TDK Dictionary for Turkish Words for the full dataset.
Inference Code
# coding: utf-8
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import sys
def main():
# Fix Unicode printing on Windows
sys.stdout.reconfigure(encoding='utf-8')
# 1. Configuration for Hugging Face Load
hf_model_id = "uisikdag/qwen3-8b-tr-dict-full"
print(f"Loading model from Hugging Face: {hf_model_id}...")
from huggingface_hub import hf_hub_download
# Download one extra file from HF Repo
filename = "utils_chat_templates.py"
# This downloads the file and saves it to a specified local path
local_file_path = hf_hub_download(
repo_id=hf_model_id,
filename=filename,
local_dir=".", # Downloads it directly to the current directory
)
print(f"Downloaded chat template utility to: {local_file_path}")
# 2. Tokenizer Loading
try:
tokenizer = AutoTokenizer.from_pretrained(hf_model_id)
from utils_chat_templates import get_chat_template
tokenizer.chat_template = get_chat_template(hf_model_id)
except OSError as e:
print(f"Error: Could not find tokenizer at {hf_model_id}. Check the ID and access permissions.")
print(f"Details: {e}")
return
# 3. Model Loading (with 4-bit Quantization)
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.float16,
)
try:
model = AutoModelForCausalLM.from_pretrained(
hf_model_id,
quantization_config=bnb_config,
device_map="auto",
trust_remote_code=True
)
base_model_name = "Qwen/Qwen3-8B"
base_model = AutoModelForCausalLM.from_pretrained(base_model_name)
from peft import PeftModel
model = PeftModel.from_pretrained(base_model, hf_model_id)
except Exception as e:
print(f"Error loading model from Hugging Face: {e}")
return
# 4. Test Cases (The rest of the logic remains the same)
test_words = [
"kalem",
"bilgisayar",
"sevgi",
"okul",
"kitap",
"agac",
"deniz"
]
# Set model to evaluation mode
model.eval()
for word in test_words:
prompt = f"Word: {word}\nDefinition:"
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=100,
do_sample=True,
temperature=0.5,
top_k=50,
top_p=0.95,
pad_token_id=tokenizer.eos_token_id
)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
# Extract just the definition part
definition_part = response.split("Definition:")[-1].strip()
print("-" * 30)
print(f"Word: {word}")
print("-" * 10)
print(f"Definition: {definition_part}")
print("-" * 30)
if __name__ == "__main__":
main()
- Downloads last month
- 100