Spaces:
Running
Running
jasonshaoshun
commited on
Commit
·
85aecd7
1
Parent(s):
6c09a9f
debug
Browse files
src/leaderboard/read_evals.py
CHANGED
|
@@ -109,14 +109,19 @@ class EvalResult_MIB_SUBGRAPH:
|
|
| 109 |
# Initialize all possible columns with '-'
|
| 110 |
expected_models = TasksMib_Subgraph.get_all_models()
|
| 111 |
expected_tasks = TasksMib_Subgraph.get_all_tasks()
|
| 112 |
-
for task in expected_tasks:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
for model in task.value.models:
|
| 114 |
-
|
| 115 |
-
# continue
|
| 116 |
-
# if model == "qwen2_5" and task.startswith(("arithmetic", "arc")):
|
| 117 |
-
# continue
|
| 118 |
-
# if model == "gemma2" and (task.startswith("arithmetic") or task == "arc_challenge"):
|
| 119 |
-
# continue
|
| 120 |
data_dict[f"{task}_{model}"] = '-'
|
| 121 |
|
| 122 |
all_scores = []
|
|
|
|
| 109 |
# Initialize all possible columns with '-'
|
| 110 |
expected_models = TasksMib_Subgraph.get_all_models()
|
| 111 |
expected_tasks = TasksMib_Subgraph.get_all_tasks()
|
| 112 |
+
# for task in expected_tasks:
|
| 113 |
+
# for model in task.value.models:
|
| 114 |
+
# # if model == "gpt2" and task != "ioi":
|
| 115 |
+
# # continue
|
| 116 |
+
# # if model == "qwen2_5" and task.startswith(("arithmetic", "arc")):
|
| 117 |
+
# # continue
|
| 118 |
+
# # if model == "gemma2" and (task.startswith("arithmetic") or task == "arc_challenge"):
|
| 119 |
+
# # continue
|
| 120 |
+
# data_dict[f"{task}_{model}"] = '-'
|
| 121 |
+
|
| 122 |
+
for task in TasksMib_Subgraph:
|
| 123 |
for model in task.value.models:
|
| 124 |
+
print(f"task is {task}, model is {model}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 125 |
data_dict[f"{task}_{model}"] = '-'
|
| 126 |
|
| 127 |
all_scores = []
|