Spaces:
Runtime error
Runtime error
Commit
·
d10cc3f
1
Parent(s):
a379dd4
improved viewer tab
Browse files- src/components/viewer-tab.tsx +101 -18
src/components/viewer-tab.tsx
CHANGED
|
@@ -15,6 +15,7 @@ import {
|
|
| 15 |
SelectValue
|
| 16 |
} from "@/components/ui/select";
|
| 17 |
import { Run as ForceGraphRun } from "@/components/reasoning-trace";
|
|
|
|
| 18 |
|
| 19 |
const models = {
|
| 20 |
"Qwen3-14B": q3Results,
|
|
@@ -29,6 +30,18 @@ interface Run {
|
|
| 29 |
result: string;
|
| 30 |
}
|
| 31 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
export default function ViewerTab({
|
| 33 |
handleTryRun,
|
| 34 |
}: {
|
|
@@ -37,6 +50,7 @@ export default function ViewerTab({
|
|
| 37 |
const [selectedRun, setSelectedRun] = useState<number | null>(null);
|
| 38 |
const [runs, setRuns] = useState<Run[]>([]);
|
| 39 |
const [selectedModel, setSelectedModel] = useState<string>("Qwen3-14B");
|
|
|
|
| 40 |
|
| 41 |
useEffect(() => {
|
| 42 |
// Convert the model data to the format expected by RunsList
|
|
@@ -52,6 +66,45 @@ export default function ViewerTab({
|
|
| 52 |
result: run.result
|
| 53 |
}));
|
| 54 |
setRuns(convertedRuns);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
}, [selectedModel]);
|
| 56 |
|
| 57 |
const handleRunSelect = (runId: number) => {
|
|
@@ -73,24 +126,54 @@ export default function ViewerTab({
|
|
| 73 |
|
| 74 |
return (
|
| 75 |
<div className="grid grid-cols-1 md:grid-cols-12 gap-4 h-[calc(100vh-200px)] max-h-[calc(100vh-200px)] overflow-hidden p-2">
|
| 76 |
-
<Card className="p-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
</Card>
|
| 95 |
<div className="md:col-span-3 flex flex-col max-h-full overflow-hidden">
|
| 96 |
<div className="bg-card rounded-lg p-3 border flex-grow overflow-hidden flex flex-col">
|
|
|
|
| 15 |
SelectValue
|
| 16 |
} from "@/components/ui/select";
|
| 17 |
import { Run as ForceGraphRun } from "@/components/reasoning-trace";
|
| 18 |
+
import { Badge } from "@/components/ui/badge";
|
| 19 |
|
| 20 |
const models = {
|
| 21 |
"Qwen3-14B": q3Results,
|
|
|
|
| 30 |
result: string;
|
| 31 |
}
|
| 32 |
|
| 33 |
+
// Interface for model statistics
|
| 34 |
+
interface ModelStats {
|
| 35 |
+
winPercentage: number;
|
| 36 |
+
avgSteps: number;
|
| 37 |
+
stdDevSteps: number;
|
| 38 |
+
totalRuns: number;
|
| 39 |
+
wins: number;
|
| 40 |
+
medianSteps: number;
|
| 41 |
+
minSteps: number;
|
| 42 |
+
maxSteps: number;
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
export default function ViewerTab({
|
| 46 |
handleTryRun,
|
| 47 |
}: {
|
|
|
|
| 50 |
const [selectedRun, setSelectedRun] = useState<number | null>(null);
|
| 51 |
const [runs, setRuns] = useState<Run[]>([]);
|
| 52 |
const [selectedModel, setSelectedModel] = useState<string>("Qwen3-14B");
|
| 53 |
+
const [modelStats, setModelStats] = useState<ModelStats | null>(null);
|
| 54 |
|
| 55 |
useEffect(() => {
|
| 56 |
// Convert the model data to the format expected by RunsList
|
|
|
|
| 66 |
result: run.result
|
| 67 |
}));
|
| 68 |
setRuns(convertedRuns);
|
| 69 |
+
|
| 70 |
+
// Calculate model statistics
|
| 71 |
+
const winRuns = convertedRuns.filter(run => run.result === "win");
|
| 72 |
+
const totalRuns = convertedRuns.length;
|
| 73 |
+
const wins = winRuns.length;
|
| 74 |
+
const winPercentage = totalRuns > 0 ? (wins / totalRuns) * 100 : 0;
|
| 75 |
+
|
| 76 |
+
// Calculate steps statistics for winning runs
|
| 77 |
+
const stepCounts = winRuns.map(run => run.steps.length);
|
| 78 |
+
const avgSteps = stepCounts.length > 0
|
| 79 |
+
? stepCounts.reduce((sum, count) => sum + count, 0) / stepCounts.length
|
| 80 |
+
: 0;
|
| 81 |
+
|
| 82 |
+
// Calculate standard deviation
|
| 83 |
+
const variance = stepCounts.length > 0
|
| 84 |
+
? stepCounts.reduce((sum, count) => sum + Math.pow(count - avgSteps, 2), 0) / stepCounts.length
|
| 85 |
+
: 0;
|
| 86 |
+
const stdDevSteps = Math.sqrt(variance);
|
| 87 |
+
|
| 88 |
+
// Calculate median, min, max steps
|
| 89 |
+
const sortedSteps = [...stepCounts].sort((a, b) => a - b);
|
| 90 |
+
const medianSteps = stepCounts.length > 0
|
| 91 |
+
? stepCounts.length % 2 === 0
|
| 92 |
+
? (sortedSteps[stepCounts.length / 2 - 1] + sortedSteps[stepCounts.length / 2]) / 2
|
| 93 |
+
: sortedSteps[Math.floor(stepCounts.length / 2)]
|
| 94 |
+
: 0;
|
| 95 |
+
const minSteps = stepCounts.length > 0 ? Math.min(...stepCounts) : 0;
|
| 96 |
+
const maxSteps = stepCounts.length > 0 ? Math.max(...stepCounts) : 0;
|
| 97 |
+
|
| 98 |
+
setModelStats({
|
| 99 |
+
winPercentage,
|
| 100 |
+
avgSteps,
|
| 101 |
+
stdDevSteps,
|
| 102 |
+
totalRuns,
|
| 103 |
+
wins,
|
| 104 |
+
medianSteps,
|
| 105 |
+
minSteps,
|
| 106 |
+
maxSteps
|
| 107 |
+
});
|
| 108 |
}, [selectedModel]);
|
| 109 |
|
| 110 |
const handleRunSelect = (runId: number) => {
|
|
|
|
| 126 |
|
| 127 |
return (
|
| 128 |
<div className="grid grid-cols-1 md:grid-cols-12 gap-4 h-[calc(100vh-200px)] max-h-[calc(100vh-200px)] overflow-hidden p-2">
|
| 129 |
+
<Card className="p-3 col-span-12 row-start-1">
|
| 130 |
+
<div className="flex flex-col sm:flex-row items-start sm:items-center gap-3">
|
| 131 |
+
<div className="flex-shrink-0">
|
| 132 |
+
<Select value={selectedModel} onValueChange={setSelectedModel}>
|
| 133 |
+
<SelectTrigger className="w-[180px]">
|
| 134 |
+
<SelectValue placeholder="Select model" />
|
| 135 |
+
</SelectTrigger>
|
| 136 |
+
<SelectContent>
|
| 137 |
+
{Object.keys(models).map((modelName) => (
|
| 138 |
+
<SelectItem key={modelName} value={modelName}>
|
| 139 |
+
{modelName}
|
| 140 |
+
</SelectItem>
|
| 141 |
+
))}
|
| 142 |
+
</SelectContent>
|
| 143 |
+
</Select>
|
| 144 |
+
</div>
|
| 145 |
+
|
| 146 |
+
{modelStats && (
|
| 147 |
+
<div className="flex flex-wrap gap-1.5 items-center">
|
| 148 |
+
<Badge variant="outline" className="px-2 py-0.5 flex gap-1 items-center">
|
| 149 |
+
<span className="text-xs font-medium">Success:</span>
|
| 150 |
+
<span className="text-xs font-semibold">{modelStats.winPercentage.toFixed(1)}%</span>
|
| 151 |
+
<span className="text-xs text-muted-foreground">({modelStats.wins}/{modelStats.totalRuns})</span>
|
| 152 |
+
</Badge>
|
| 153 |
+
|
| 154 |
+
<Badge variant="outline" className="px-2 py-0.5 flex gap-1 items-center">
|
| 155 |
+
<span className="text-xs font-medium">Mean:</span>
|
| 156 |
+
<span className="text-xs font-semibold">{modelStats.avgSteps.toFixed(1)}</span>
|
| 157 |
+
<span className="text-xs text-muted-foreground">±{modelStats.stdDevSteps.toFixed(1)}</span>
|
| 158 |
+
</Badge>
|
| 159 |
+
|
| 160 |
+
<Badge variant="outline" className="px-2 py-0.5 flex gap-1 items-center">
|
| 161 |
+
<span className="text-xs font-medium">Median:</span>
|
| 162 |
+
<span className="text-xs font-semibold">{modelStats.medianSteps.toFixed(1)}</span>
|
| 163 |
+
</Badge>
|
| 164 |
+
|
| 165 |
+
<Badge variant="outline" className="px-2 py-0.5 flex gap-1 items-center">
|
| 166 |
+
<span className="text-xs font-medium">Min:</span>
|
| 167 |
+
<span className="text-xs font-semibold">{modelStats.minSteps}</span>
|
| 168 |
+
</Badge>
|
| 169 |
+
|
| 170 |
+
<Badge variant="outline" className="px-2 py-0.5 flex gap-1 items-center">
|
| 171 |
+
<span className="text-xs font-medium">Max:</span>
|
| 172 |
+
<span className="text-xs font-semibold">{modelStats.maxSteps}</span>
|
| 173 |
+
</Badge>
|
| 174 |
+
</div>
|
| 175 |
+
)}
|
| 176 |
+
</div>
|
| 177 |
</Card>
|
| 178 |
<div className="md:col-span-3 flex flex-col max-h-full overflow-hidden">
|
| 179 |
<div className="bg-card rounded-lg p-3 border flex-grow overflow-hidden flex flex-col">
|