maurocarlu commited on
Commit
4634e2c
·
1 Parent(s): b98ed92

updating grafana dashboards

Browse files
monitoring/grafana/dashboards/hopcroft_dashboard.json CHANGED
@@ -62,7 +62,7 @@
62
  "pluginVersion": "9.0.0",
63
  "targets": [
64
  {
65
- "expr": "rate(fastapi_requests_total[1m])",
66
  "refId": "A"
67
  }
68
  ],
@@ -131,12 +131,12 @@
131
  "pluginVersion": "9.0.0",
132
  "targets": [
133
  {
134
- "expr": "histogram_quantile(0.95, rate(fastapi_request_duration_seconds_bucket[5m])) * 1000",
135
  "legendFormat": "p95",
136
  "refId": "A"
137
  },
138
  {
139
- "expr": "histogram_quantile(0.50, rate(fastapi_request_duration_seconds_bucket[5m])) * 1000",
140
  "legendFormat": "p50 (median)",
141
  "refId": "B"
142
  }
@@ -152,32 +152,25 @@
152
  "color": {
153
  "mode": "thresholds"
154
  },
155
- "mappings": [
156
- {
157
- "options": {
158
- "0": {
159
- "color": "red",
160
- "index": 1,
161
- "text": "No Drift"
162
- },
163
- "1": {
164
- "color": "green",
165
- "index": 0,
166
- "text": "Drift Detected"
167
- }
168
- },
169
- "type": "value"
170
- }
171
- ],
172
  "thresholds": {
173
  "mode": "absolute",
174
  "steps": [
175
  {
176
  "color": "green",
177
  "value": null
 
 
 
 
 
 
 
 
178
  }
179
  ]
180
- }
 
181
  }
182
  },
183
  "gridPos": {
@@ -201,13 +194,13 @@
201
  "pluginVersion": "9.0.0",
202
  "targets": [
203
  {
204
- "expr": "drift_detected",
205
  "refId": "A"
206
  }
207
  ],
208
- "title": "Data Drift Status",
209
  "type": "stat",
210
- "description": "Current data drift detection status (1 = drift detected, 0 = no drift)"
211
  },
212
  {
213
  "datasource": "Prometheus",
@@ -216,7 +209,7 @@
216
  "color": {
217
  "mode": "thresholds"
218
  },
219
- "decimals": 4,
220
  "mappings": [],
221
  "thresholds": {
222
  "mode": "absolute",
@@ -235,7 +228,7 @@
235
  }
236
  ]
237
  },
238
- "unit": "short"
239
  }
240
  },
241
  "gridPos": {
@@ -259,13 +252,13 @@
259
  "pluginVersion": "9.0.0",
260
  "targets": [
261
  {
262
- "expr": "drift_p_value",
263
  "refId": "A"
264
  }
265
  ],
266
- "title": "Drift P-Value",
267
  "type": "stat",
268
- "description": "Statistical significance of detected drift (lower = more significant)"
269
  },
270
  {
271
  "datasource": "Prometheus",
@@ -305,7 +298,7 @@
305
  }
306
  ]
307
  },
308
- "unit": "short"
309
  }
310
  },
311
  "gridPos": {
@@ -328,14 +321,84 @@
328
  "pluginVersion": "9.0.0",
329
  "targets": [
330
  {
331
- "expr": "drift_distance",
332
- "legendFormat": "Distance",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
333
  "refId": "A"
334
  }
335
  ],
336
- "title": "Drift Distance Over Time",
337
  "type": "timeseries",
338
- "description": "Statistical distance between baseline and current data distribution"
339
  }
340
  ],
341
  "refresh": "10s",
@@ -353,6 +416,6 @@
353
  "timezone": "",
354
  "title": "Hopcroft ML Model Monitoring",
355
  "uid": "hopcroft-ml-dashboard",
356
- "version": 1,
357
  "weekStart": ""
358
  }
 
62
  "pluginVersion": "9.0.0",
63
  "targets": [
64
  {
65
+ "expr": "sum(rate(hopcroft_requests_total[1m]))",
66
  "refId": "A"
67
  }
68
  ],
 
131
  "pluginVersion": "9.0.0",
132
  "targets": [
133
  {
134
+ "expr": "histogram_quantile(0.95, sum(rate(hopcroft_request_duration_seconds_bucket[5m])) by (le)) * 1000",
135
  "legendFormat": "p95",
136
  "refId": "A"
137
  },
138
  {
139
+ "expr": "histogram_quantile(0.50, sum(rate(hopcroft_request_duration_seconds_bucket[5m])) by (le)) * 1000",
140
  "legendFormat": "p50 (median)",
141
  "refId": "B"
142
  }
 
152
  "color": {
153
  "mode": "thresholds"
154
  },
155
+ "mappings": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
  "thresholds": {
157
  "mode": "absolute",
158
  "steps": [
159
  {
160
  "color": "green",
161
  "value": null
162
+ },
163
+ {
164
+ "color": "yellow",
165
+ "value": 1
166
+ },
167
+ {
168
+ "color": "red",
169
+ "value": 5
170
  }
171
  ]
172
+ },
173
+ "unit": "short"
174
  }
175
  },
176
  "gridPos": {
 
194
  "pluginVersion": "9.0.0",
195
  "targets": [
196
  {
197
+ "expr": "sum(hopcroft_in_progress_requests)",
198
  "refId": "A"
199
  }
200
  ],
201
+ "title": "In-Progress Requests",
202
  "type": "stat",
203
+ "description": "Number of requests currently being processed"
204
  },
205
  {
206
  "datasource": "Prometheus",
 
209
  "color": {
210
  "mode": "thresholds"
211
  },
212
+ "decimals": 2,
213
  "mappings": [],
214
  "thresholds": {
215
  "mode": "absolute",
 
228
  }
229
  ]
230
  },
231
+ "unit": "percentunit"
232
  }
233
  },
234
  "gridPos": {
 
252
  "pluginVersion": "9.0.0",
253
  "targets": [
254
  {
255
+ "expr": "sum(rate(hopcroft_requests_total{http_status=~\"5..\"}[5m])) / sum(rate(hopcroft_requests_total[5m]))",
256
  "refId": "A"
257
  }
258
  ],
259
+ "title": "Error Rate (5xx)",
260
  "type": "stat",
261
+ "description": "Percentage of requests resulting in 5xx errors"
262
  },
263
  {
264
  "datasource": "Prometheus",
 
298
  }
299
  ]
300
  },
301
+ "unit": "s"
302
  }
303
  },
304
  "gridPos": {
 
321
  "pluginVersion": "9.0.0",
322
  "targets": [
323
  {
324
+ "expr": "rate(hopcroft_prediction_processing_seconds_sum[5m]) / rate(hopcroft_prediction_processing_seconds_count[5m])",
325
+ "legendFormat": "Avg Prediction Time",
326
+ "refId": "A"
327
+ }
328
+ ],
329
+ "title": "Model Prediction Time",
330
+ "type": "timeseries",
331
+ "description": "Average time spent processing model predictions"
332
+ },
333
+ {
334
+ "datasource": "Prometheus",
335
+ "fieldConfig": {
336
+ "defaults": {
337
+ "color": {
338
+ "mode": "palette-classic"
339
+ },
340
+ "custom": {
341
+ "axisLabel": "",
342
+ "axisPlacement": "auto",
343
+ "barAlignment": 0,
344
+ "drawStyle": "bars",
345
+ "fillOpacity": 80,
346
+ "gradientMode": "none",
347
+ "hideFrom": {
348
+ "tooltip": false,
349
+ "viz": false,
350
+ "legend": false
351
+ },
352
+ "lineInterpolation": "linear",
353
+ "lineWidth": 1,
354
+ "pointSize": 5,
355
+ "scaleDistribution": {
356
+ "type": "linear"
357
+ },
358
+ "showPoints": "never",
359
+ "spanNulls": false
360
+ },
361
+ "mappings": [],
362
+ "thresholds": {
363
+ "mode": "absolute",
364
+ "steps": [
365
+ {
366
+ "color": "green",
367
+ "value": null
368
+ }
369
+ ]
370
+ },
371
+ "unit": "short"
372
+ }
373
+ },
374
+ "gridPos": {
375
+ "h": 8,
376
+ "w": 24,
377
+ "x": 0,
378
+ "y": 14
379
+ },
380
+ "id": 6,
381
+ "options": {
382
+ "legend": {
383
+ "calcs": ["sum"],
384
+ "displayMode": "table",
385
+ "placement": "right"
386
+ },
387
+ "tooltip": {
388
+ "mode": "multi"
389
+ }
390
+ },
391
+ "pluginVersion": "9.0.0",
392
+ "targets": [
393
+ {
394
+ "expr": "sum by (endpoint) (increase(hopcroft_requests_total[5m]))",
395
+ "legendFormat": "{{endpoint}}",
396
  "refId": "A"
397
  }
398
  ],
399
+ "title": "Requests by Endpoint",
400
  "type": "timeseries",
401
+ "description": "Number of requests per endpoint over time"
402
  }
403
  ],
404
  "refresh": "10s",
 
416
  "timezone": "",
417
  "title": "Hopcroft ML Model Monitoring",
418
  "uid": "hopcroft-ml-dashboard",
419
+ "version": 2,
420
  "weekStart": ""
421
  }