Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import AutoModelForSeq2SeqLM, NllbTokenizerFast | |
| import torch | |
| # Load model | |
| model_id = "ClaudBarbara/Open_Access_Khmer" | |
| model = AutoModelForSeq2SeqLM.from_pretrained(model_id) | |
| tokenizer = NllbTokenizerFast.from_pretrained(model_id) | |
| def translate(text, direction): | |
| if direction == "English to Khmer": | |
| src_lang, tgt_lang = "eng_Latn", "khm_Khmr" | |
| else: | |
| src_lang, tgt_lang = "khm_Khmr", "eng_Latn" | |
| tokenizer.src_lang = src_lang | |
| inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True) | |
| with torch.no_grad(): | |
| outputs = model.generate( | |
| **inputs, | |
| forced_bos_token_id=tokenizer.convert_tokens_to_ids(tgt_lang), | |
| max_length=512, | |
| num_beams=4 | |
| ) | |
| return tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| demo = gr.Interface( | |
| fn=translate, | |
| inputs=[ | |
| gr.Textbox(label="Input Text", lines=5), | |
| gr.Radio(["English to Khmer", "Khmer to English"], label="Direction", value="English to Khmer") | |
| ], | |
| outputs=gr.Textbox(label="Translation", lines=5), | |
| title="Khmer Legal Bridge", | |
| description="English-Khmer Legal Translation" | |
| ) | |
| demo.launch() | |