Spaces:
Running
Running
jasonshaoshun
commited on
Commit
·
8b19d83
1
Parent(s):
85aecd7
debug
Browse files
src/leaderboard/read_evals.py
CHANGED
|
@@ -118,11 +118,11 @@ class EvalResult_MIB_SUBGRAPH:
|
|
| 118 |
# # if model == "gemma2" and (task.startswith("arithmetic") or task == "arc_challenge"):
|
| 119 |
# # continue
|
| 120 |
# data_dict[f"{task}_{model}"] = '-'
|
| 121 |
-
|
| 122 |
for task in TasksMib_Subgraph:
|
| 123 |
for model in task.value.models:
|
| 124 |
-
print(f"task is {task}, model is {model}")
|
| 125 |
-
data_dict[f"{task}_{model}"] = '-'
|
| 126 |
|
| 127 |
all_scores = []
|
| 128 |
for task, task_results in self.results.items():
|
|
|
|
| 118 |
# # if model == "gemma2" and (task.startswith("arithmetic") or task == "arc_challenge"):
|
| 119 |
# # continue
|
| 120 |
# data_dict[f"{task}_{model}"] = '-'
|
| 121 |
+
|
| 122 |
for task in TasksMib_Subgraph:
|
| 123 |
for model in task.value.models:
|
| 124 |
+
print(f"task is {task}, task.value.benchmark is {task.value.benchmark}, model is {model}")
|
| 125 |
+
data_dict[f"{task.value.benchmark}_{model}"] = '-'
|
| 126 |
|
| 127 |
all_scores = []
|
| 128 |
for task, task_results in self.results.items():
|