Spaces:

mib-bench
/

leaderboard

Running

jasonshaoshun commited on Jan 29

Commit

85aecd7

1 Parent(s): 6c09a9f

debug

Files changed (1) hide show

src/leaderboard/read_evals.py CHANGED Viewed

@@ -109,14 +109,19 @@ class EvalResult_MIB_SUBGRAPH:
         # Initialize all possible columns with '-'
         expected_models = TasksMib_Subgraph.get_all_models()
         expected_tasks = TasksMib_Subgraph.get_all_tasks()
-        for task in expected_tasks:
             for model in task.value.models:
-                # if model == "gpt2" and task != "ioi":
-                #     continue
-                # if model == "qwen2_5" and task.startswith(("arithmetic", "arc")):
-                #     continue
-                # if model == "gemma2" and (task.startswith("arithmetic") or task == "arc_challenge"):
-                #     continue
                 data_dict[f"{task}_{model}"] = '-'
         all_scores = []

         # Initialize all possible columns with '-'
         expected_models = TasksMib_Subgraph.get_all_models()
         expected_tasks = TasksMib_Subgraph.get_all_tasks()
+        # for task in expected_tasks:
+        #     for model in task.value.models:
+        #         # if model == "gpt2" and task != "ioi":
+        #         #     continue
+        #         # if model == "qwen2_5" and task.startswith(("arithmetic", "arc")):
+        #         #     continue
+        #         # if model == "gemma2" and (task.startswith("arithmetic") or task == "arc_challenge"):
+        #         #     continue
+        #         data_dict[f"{task}_{model}"] = '-'
+        for task in TasksMib_Subgraph:
             for model in task.value.models:
+                print(f"task is {task}, model is {model}")
                 data_dict[f"{task}_{model}"] = '-'
         all_scores = []