leaderboard / eval-results-mib-subgraph /submissions /results_2024-10-2T13-36-121.json
Aaron Mueller
updated filtering, add F= tab
1d8e193
raw
history blame
973 Bytes
{"method_name": "EAP-IG (mean)", "results": [
{"model_id": "meta-llama/Llama-3.1-8B", "scores": {
"ioi": {
"edge_counts": [10.0, 29.0, 117.0, 269.0, 561.0, 1570.0, 3194.0, 6386.0, 16245.0, 32491.0],
"faithfulness": [0.11454112510535433,0.14123527363014815,0.3197643850972241,0.47765884872924175,0.7701570853704176,1.3201798748760563,2.037825774185549,2.651813181821849,3.27612042118584,1.0]},
"mcqa": {
"edge_counts": [10.0, 21.0, 94.0, 241.0, 527.0, 1469.0, 3046.0, 6036.0, 14832.0, 32491.0],
"faithfulness": [[0.02677059664121319,0.1965060952906922,0.449060470868564,0.7604756153676078,0.786575587658478,1.106011020720112,1.3436645156597262,1.5466349080478032,1.4914126224418107,1.0]]}
}},
{"model_id": "Qwen/Qwen2-1.5B", "scores": {
"ioi": {
"edge_counts": [],
"faithfulness": []},
"mcqa": {
"edge_counts": [],
"faithfulness": []}
}}
]
}