Spaces:
Build error
Build error
| import sys | |
| tabpfn_path = 'TabPFN' | |
| sys.path.insert(0, tabpfn_path) # our submodule of the TabPFN repo (at 045c8400203ebd062346970b4f2c0ccda5a40618) | |
| from TabPFN.scripts.transformer_prediction_interface import TabPFNClassifier | |
| import numpy as np | |
| import pandas as pd | |
| import torch | |
| import gradio as gr | |
| import openml | |
| def compute(table: np.array): | |
| vfunc = np.vectorize(lambda s: len(s)) | |
| non_empty_row_mask = (vfunc(table).sum(1) != 0) | |
| table = table[non_empty_row_mask] | |
| empty_mask = table == '' | |
| empty_inds = np.where(empty_mask) | |
| if not len(empty_inds[0]): | |
| return "**Please leave at least one field blank for prediction.**", None | |
| if not np.all(empty_inds[1][0] == empty_inds[1]): | |
| return "**Please only leave fields of one column blank for prediction.**", None | |
| y_column = empty_inds[1][0] | |
| eval_lines = empty_inds[0] | |
| train_table = np.delete(table, eval_lines, axis=0) | |
| eval_table = table[eval_lines] | |
| try: | |
| x_train = torch.tensor(np.delete(train_table, y_column, axis=1).astype(np.float32)) | |
| x_eval = torch.tensor(np.delete(eval_table, y_column, axis=1).astype(np.float32)) | |
| y_train = train_table[:, y_column] | |
| except ValueError: | |
| return "**Please only add numbers (to the inputs) or leave fields empty.**", None | |
| classifier = TabPFNClassifier(base_path=tabpfn_path, device='cpu') | |
| classifier.fit(x_train, y_train) | |
| y_eval, p_eval = classifier.predict(x_eval, return_winning_probability=True) | |
| # print(file, type(file)) | |
| out_table = table.copy().astype(str) | |
| out_table[eval_lines, y_column] = [f"{y_e} (p={p_e:.2f})" for y_e, p_e in zip(y_eval, p_eval)] | |
| return None, out_table | |
| def upload_file(file): | |
| if file.name.endswith('.arff'): | |
| dataset = openml.datasets.OpenMLDataset('t', 'test', data_file=file.name) | |
| X_, _, categorical_indicator_, attribute_names_ = dataset.get_data( | |
| dataset_format="array" | |
| ) | |
| df = pd.DataFrame(X_, columns=attribute_names_) | |
| return df | |
| elif file.name.endswith('.csv') or file.name.endswith('.data'): | |
| df = pd.read_csv(file.name, header=None) | |
| df.columns = np.arange(len(df.columns)) | |
| print(df) | |
| return df | |
| example = \ | |
| [ | |
| [1, 2, 1], | |
| [2, 1, 1], | |
| [1, 1, 1], | |
| [2, 2, 2], | |
| [3, 4, 2], | |
| [3, 2, 2], | |
| [2, 3, ''] | |
| ] | |
| with gr.Blocks() as demo: | |
| gr.Markdown("""This demo allows you to play with the **TabPFN**. | |
| You can either change the table manually (we have filled it with a toy benchmark, sum up to 3 has label 1 and over that label 2). | |
| The network predicts fields you leave empty. Only one column can have empty entries that are predicted. | |
| Please, provide everything but the label column as numeric values. It is ok to encode classes as integers. | |
| """) | |
| inp_table = gr.DataFrame(type='numpy', value=example, headers=[''] * 3) | |
| inp_file = gr.File( | |
| label='Drop either a .csv (without header, only numeric values for all but the labels) or a .arff file.') | |
| examples = gr.Examples(examples=['iris.csv', 'balance-scale.arff'], | |
| inputs=[inp_file], | |
| outputs=[inp_table], | |
| fn=upload_file, | |
| cache_examples=True) | |
| btn = gr.Button("Predict Empty Table Cells") | |
| inp_file.change(fn=upload_file, inputs=inp_file, outputs=inp_table) | |
| out_text = gr.Markdown() | |
| out_table = gr.DataFrame() | |
| btn.click(fn=compute, inputs=inp_table, outputs=[out_text, out_table]) | |
| demo.launch() |