Spaces:
Running
Running
Update afrobench lite
Browse files
data/leaderboard_json/afrobench_lite.json
CHANGED
|
@@ -18,9 +18,10 @@
|
|
| 18 |
"GPT-4.1 (April)": 67.5,
|
| 19 |
"LLaMa 4 405B": 45.5,
|
| 20 |
"Lugha-Llama 8B": 36.7,
|
| 21 |
-
"Gemini-2.5 Flash": 69.
|
| 22 |
-
"Claude
|
| 23 |
-
"Claude
|
|
|
|
| 24 |
}
|
| 25 |
},
|
| 26 |
"Intent": {
|
|
@@ -42,9 +43,10 @@
|
|
| 42 |
"GPT-4.1 (April)": 84.4,
|
| 43 |
"LLaMa 4 405B": 73.9,
|
| 44 |
"Lugha-Llama 8B": 4.1,
|
| 45 |
-
"Gemini-2.5 Flash": 87.
|
| 46 |
-
"Claude
|
| 47 |
-
"Claude
|
|
|
|
| 48 |
}
|
| 49 |
},
|
| 50 |
"MT(en/fr-xx)": {
|
|
@@ -66,9 +68,9 @@
|
|
| 66 |
"GPT-4.1 (April)": 47.3,
|
| 67 |
"LLaMa 4 405B": 42.8,
|
| 68 |
"Lugha-Llama 8B": 22.1,
|
| 69 |
-
"Gemini-2.5 Flash": 46.
|
| 70 |
-
"Claude
|
| 71 |
-
"Claude
|
| 72 |
}
|
| 73 |
},
|
| 74 |
"MMLU": {
|
|
@@ -90,9 +92,10 @@
|
|
| 90 |
"GPT-4.1 (April)": 60.2,
|
| 91 |
"LLaMa 4 405B": 15.8,
|
| 92 |
"Lugha-Llama 8B": 25.2,
|
| 93 |
-
"Gemini-2.5 Flash":
|
| 94 |
-
"Claude
|
| 95 |
-
"Claude
|
|
|
|
| 96 |
}
|
| 97 |
},
|
| 98 |
"Math": {
|
|
@@ -114,9 +117,10 @@
|
|
| 114 |
"GPT-4.1 (April)": 59.5,
|
| 115 |
"LLaMa 4 405B": 65.0,
|
| 116 |
"Lugha-Llama 8B": 1.8,
|
| 117 |
-
"Gemini-2.5 Flash": 70.
|
| 118 |
-
"Claude
|
| 119 |
-
"Claude
|
|
|
|
| 120 |
}
|
| 121 |
},
|
| 122 |
"Topic": {
|
|
@@ -139,8 +143,9 @@
|
|
| 139 |
"LLaMa 4 405B": 80.6,
|
| 140 |
"Lugha-Llama 8B": 34.1,
|
| 141 |
"Gemini-2.5 Flash": 87.2,
|
| 142 |
-
"Claude
|
| 143 |
-
"Claude
|
|
|
|
| 144 |
}
|
| 145 |
},
|
| 146 |
"RC": {
|
|
@@ -162,9 +167,10 @@
|
|
| 162 |
"GPT-4.1 (April)": 64.8,
|
| 163 |
"LLaMa 4 405B": 24.6,
|
| 164 |
"Lugha-Llama 8B": 23.0,
|
| 165 |
-
"Gemini-2.5 Flash":
|
| 166 |
-
"Claude
|
| 167 |
-
"Claude
|
|
|
|
| 168 |
}
|
| 169 |
}
|
| 170 |
}
|
|
|
|
| 18 |
"GPT-4.1 (April)": 67.5,
|
| 19 |
"LLaMa 4 405B": 45.5,
|
| 20 |
"Lugha-Llama 8B": 36.7,
|
| 21 |
+
"Gemini-2.5 Flash": 69.9,
|
| 22 |
+
"Claude 4.0 Sonnet": 68.1,
|
| 23 |
+
"Claude 3.7 Sonnet": 59.8,
|
| 24 |
+
"Claude 4.5 Sonnet": 71.7
|
| 25 |
}
|
| 26 |
},
|
| 27 |
"Intent": {
|
|
|
|
| 43 |
"GPT-4.1 (April)": 84.4,
|
| 44 |
"LLaMa 4 405B": 73.9,
|
| 45 |
"Lugha-Llama 8B": 4.1,
|
| 46 |
+
"Gemini-2.5 Flash": 87.9,
|
| 47 |
+
"Claude 4.0 Sonnet": 80.4,
|
| 48 |
+
"Claude 3.7 Sonnet": 73.4,
|
| 49 |
+
"Claude 4.5 Sonnet": 81.6
|
| 50 |
}
|
| 51 |
},
|
| 52 |
"MT(en/fr-xx)": {
|
|
|
|
| 68 |
"GPT-4.1 (April)": 47.3,
|
| 69 |
"LLaMa 4 405B": 42.8,
|
| 70 |
"Lugha-Llama 8B": 22.1,
|
| 71 |
+
"Gemini-2.5 Flash": 46.5,
|
| 72 |
+
"Claude 4.0 Sonnet": 46.0,
|
| 73 |
+
"Claude 3.7 Sonnet": 44.0
|
| 74 |
}
|
| 75 |
},
|
| 76 |
"MMLU": {
|
|
|
|
| 92 |
"GPT-4.1 (April)": 60.2,
|
| 93 |
"LLaMa 4 405B": 15.8,
|
| 94 |
"Lugha-Llama 8B": 25.2,
|
| 95 |
+
"Gemini-2.5 Flash": 67.7,
|
| 96 |
+
"Claude 4.0 Sonnet": 75.5,
|
| 97 |
+
"Claude 3.7 Sonnet": 66.7,
|
| 98 |
+
"Claude 4.5 Sonnet": 58.6
|
| 99 |
}
|
| 100 |
},
|
| 101 |
"Math": {
|
|
|
|
| 117 |
"GPT-4.1 (April)": 59.5,
|
| 118 |
"LLaMa 4 405B": 65.0,
|
| 119 |
"Lugha-Llama 8B": 1.8,
|
| 120 |
+
"Gemini-2.5 Flash": 70.6,
|
| 121 |
+
"Claude 4.0 Sonnet": 66.9,
|
| 122 |
+
"Claude 3.7 Sonnet": 35.2,
|
| 123 |
+
"Claude 4.5 Sonnet": 73.1
|
| 124 |
}
|
| 125 |
},
|
| 126 |
"Topic": {
|
|
|
|
| 143 |
"LLaMa 4 405B": 80.6,
|
| 144 |
"Lugha-Llama 8B": 34.1,
|
| 145 |
"Gemini-2.5 Flash": 87.2,
|
| 146 |
+
"Claude 4.0 Sonnet": 83.2,
|
| 147 |
+
"Claude 3.7 Sonnet": 84.9,
|
| 148 |
+
"Claude 4.5 Sonnet": 84.2
|
| 149 |
}
|
| 150 |
},
|
| 151 |
"RC": {
|
|
|
|
| 167 |
"GPT-4.1 (April)": 64.8,
|
| 168 |
"LLaMa 4 405B": 24.6,
|
| 169 |
"Lugha-Llama 8B": 23.0,
|
| 170 |
+
"Gemini-2.5 Flash": 42.2,
|
| 171 |
+
"Claude 4.0 Sonnet": 76.2,
|
| 172 |
+
"Claude 3.7 Sonnet": 65.1,
|
| 173 |
+
"Claude 4.5 Sonnet": 74.8
|
| 174 |
}
|
| 175 |
}
|
| 176 |
}
|