File size: 2,242 Bytes
d69485a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import gradio as gr
import torch
from transformers import AutoModel, AutoTokenizer


model_path = 'CjangCjengh/NomBert-hn2qn-v0.1'
device = 'cuda' if torch.cuda.is_available() else 'cpu'

model = AutoModel.from_pretrained(model_path, torch_dtype='auto', trust_remote_code=True).eval().to(device)
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)

def parse_text(input_text):
    with torch.inference_mode():
        output_text, output_probs = model.parse_nom_text(tokenizer, [input_text])

    html_content = '<div>'
    for item in output_probs[0]:
        char = item['char']
        candidates = item['candidates']
        html_content += f'<h3>{char}</h3>'
        html_content += '<div style=\'display: flex; flex-wrap: wrap; gap: 10px;\'>'
        
        for candidate, prob in candidates:
            prob_percent = prob * 100
            html_content += f'''
            <div style='margin-bottom: 15px; width: 170px;'>
                <div style='margin-bottom: 5px;'>{candidate}: {prob_percent:.2f}%</div>
                <div style='background-color: #f0f0f0; width: 100%; height: 15px; border-radius: 3px;'>
                    <div style='background-color: #4caf50; width: {prob_percent}%; height: 100%; border-radius: 3px;'></div>
                </div>
            </div>
            '''
        html_content += '</div>'
    html_content += '</div>'
    
    return output_text[0], html_content

if __name__=='__main__':
    with gr.Blocks(css='#viz {height: 500px; overflow-y: scroll;}') as app:
        gr.Markdown('## NomBERT - Hán Nôm to Quốc Ngữ Converter')
        with gr.Row():
            with gr.Column(scale=1):
                input_text = gr.Textbox(label='Input Hán Nôm Text', lines=5, placeholder='Enter Hán Nôm text here...')
                parse_button = gr.Button('Parse')
                output_text = gr.Textbox(label='Output Quốc Ngữ Text', lines=5, interactive=False)

            with gr.Column(scale=2):
                visualization = gr.HTML(label='Candidates Probabilities', elem_id='viz')

        parse_button.click(
            fn=parse_text,
            inputs=input_text,
            outputs=[output_text, visualization]
        )

    app.launch()