Spaces:

mib-bench
/

leaderboard

Running

jasonshaoshun commited on Jan 29

Commit

8b19d83

1 Parent(s): 85aecd7

debug

Files changed (1) hide show

src/leaderboard/read_evals.py CHANGED Viewed

@@ -118,11 +118,11 @@ class EvalResult_MIB_SUBGRAPH:
         #         # if model == "gemma2" and (task.startswith("arithmetic") or task == "arc_challenge"):
         #         #     continue
         #         data_dict[f"{task}_{model}"] = '-'
         for task in TasksMib_Subgraph:
             for model in task.value.models:
-                print(f"task is {task}, model is {model}")
-                data_dict[f"{task}_{model}"] = '-'
         all_scores = []
         for task, task_results in self.results.items():

         #         # if model == "gemma2" and (task.startswith("arithmetic") or task == "arc_challenge"):
         #         #     continue
         #         data_dict[f"{task}_{model}"] = '-'
         for task in TasksMib_Subgraph:
             for model in task.value.models:
+                print(f"task is {task}, task.value.benchmark is {task.value.benchmark}, model is {model}")
+                data_dict[f"{task.value.benchmark}_{model}"] = '-'
         all_scores = []
         for task, task_results in self.results.items():