mirror of
https://github.com/pacnpal/thrilltrack-explorer.git
synced 2025-12-20 04:51:11 -05:00
feat: Integrate Grafana Loki
This commit is contained in:
266
monitoring/grafana-dashboard.json
Normal file
266
monitoring/grafana-dashboard.json
Normal file
@@ -0,0 +1,266 @@
|
||||
{
|
||||
"dashboard": {
|
||||
"title": "Playwright Test Execution Dashboard",
|
||||
"tags": ["playwright", "testing", "e2e"],
|
||||
"timezone": "browser",
|
||||
"refresh": "30s",
|
||||
"time": {
|
||||
"from": "now-24h",
|
||||
"to": "now"
|
||||
},
|
||||
"panels": [
|
||||
{
|
||||
"id": 1,
|
||||
"title": "Test Execution Overview",
|
||||
"type": "stat",
|
||||
"gridPos": { "x": 0, "y": 0, "w": 6, "h": 4 },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "count_over_time({job=\"playwright-tests\", event=\"test_end\"}[$__range])",
|
||||
"legendFormat": "Total Tests"
|
||||
}
|
||||
],
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "area",
|
||||
"textMode": "auto"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"title": "Pass Rate %",
|
||||
"type": "stat",
|
||||
"gridPos": { "x": 6, "y": 0, "w": 6, "h": 4 },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(sum(count_over_time({job=\"playwright-tests\", status=\"passed\"}[$__range])) / sum(count_over_time({job=\"playwright-tests\", event=\"test_end\"}[$__range]))) * 100",
|
||||
"legendFormat": "Pass Rate"
|
||||
}
|
||||
],
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "area",
|
||||
"textMode": "auto",
|
||||
"unit": "percent"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{ "value": 0, "color": "red" },
|
||||
{ "value": 80, "color": "yellow" },
|
||||
{ "value": 95, "color": "green" }
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"title": "Failure Rate %",
|
||||
"type": "stat",
|
||||
"gridPos": { "x": 12, "y": 0, "w": 6, "h": 4 },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(sum(count_over_time({job=\"playwright-tests\", status=\"failed\"}[$__range])) / sum(count_over_time({job=\"playwright-tests\", event=\"test_end\"}[$__range]))) * 100",
|
||||
"legendFormat": "Failure Rate"
|
||||
}
|
||||
],
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "area",
|
||||
"textMode": "auto",
|
||||
"unit": "percent"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{ "value": 0, "color": "green" },
|
||||
{ "value": 5, "color": "yellow" },
|
||||
{ "value": 20, "color": "red" }
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"title": "Avg Test Duration",
|
||||
"type": "stat",
|
||||
"gridPos": { "x": 18, "y": 0, "w": 6, "h": 4 },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "avg_over_time({job=\"playwright-tests\", event=\"test_end\"} | json | unwrap duration_ms [$__range])",
|
||||
"legendFormat": "Avg Duration"
|
||||
}
|
||||
],
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "area",
|
||||
"textMode": "auto",
|
||||
"unit": "ms"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 5,
|
||||
"title": "Test Status Over Time",
|
||||
"type": "timeseries",
|
||||
"gridPos": { "x": 0, "y": 4, "w": 12, "h": 8 },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by (status) (count_over_time({job=\"playwright-tests\", event=\"test_end\"} | json [$__interval]))",
|
||||
"legendFormat": "{{status}}"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"lineInterpolation": "smooth",
|
||||
"fillOpacity": 20
|
||||
}
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": { "id": "byName", "options": "passed" },
|
||||
"properties": [{ "id": "color", "value": { "mode": "fixed", "fixedColor": "green" } }]
|
||||
},
|
||||
{
|
||||
"matcher": { "id": "byName", "options": "failed" },
|
||||
"properties": [{ "id": "color", "value": { "mode": "fixed", "fixedColor": "red" } }]
|
||||
},
|
||||
{
|
||||
"matcher": { "id": "byName", "options": "skipped" },
|
||||
"properties": [{ "id": "color", "value": { "mode": "fixed", "fixedColor": "yellow" } }]
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 6,
|
||||
"title": "Browser Comparison",
|
||||
"type": "bargauge",
|
||||
"gridPos": { "x": 12, "y": 4, "w": 12, "h": 8 },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by (browser) (count_over_time({job=\"playwright-tests\", status=\"passed\"} [$__range]))",
|
||||
"legendFormat": "{{browser}}"
|
||||
}
|
||||
],
|
||||
"options": {
|
||||
"orientation": "horizontal",
|
||||
"displayMode": "gradient"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 7,
|
||||
"title": "Test Duration Distribution",
|
||||
"type": "histogram",
|
||||
"gridPos": { "x": 0, "y": 12, "w": 12, "h": 8 },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "{job=\"playwright-tests\", event=\"test_end\"} | json | unwrap duration_ms",
|
||||
"legendFormat": "Duration"
|
||||
}
|
||||
],
|
||||
"options": {
|
||||
"bucketOffset": 0,
|
||||
"bucketSize": 1000
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 8,
|
||||
"title": "Top 10 Failing Tests",
|
||||
"type": "bargauge",
|
||||
"gridPos": { "x": 12, "y": 12, "w": 12, "h": 8 },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "topk(10, sum by (test_name) (count_over_time({job=\"playwright-tests\", status=\"failed\"} | json [$__range])))",
|
||||
"legendFormat": "{{test_name}}"
|
||||
}
|
||||
],
|
||||
"options": {
|
||||
"orientation": "horizontal",
|
||||
"displayMode": "gradient",
|
||||
"showUnfilled": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 9,
|
||||
"title": "Recent Test Runs",
|
||||
"type": "table",
|
||||
"gridPos": { "x": 0, "y": 20, "w": 24, "h": 8 },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "{job=\"playwright-tests\", event=\"test_end\"} | json",
|
||||
"legendFormat": ""
|
||||
}
|
||||
],
|
||||
"options": {
|
||||
"showHeader": true,
|
||||
"sortBy": [{ "displayName": "Time", "desc": true }]
|
||||
},
|
||||
"transformations": [
|
||||
{
|
||||
"id": "organize",
|
||||
"options": {
|
||||
"excludeByName": {},
|
||||
"indexByName": {
|
||||
"Time": 0,
|
||||
"test_name": 1,
|
||||
"test_file": 2,
|
||||
"browser": 3,
|
||||
"status": 4,
|
||||
"duration_ms": 5,
|
||||
"branch": 6,
|
||||
"commit": 7
|
||||
},
|
||||
"renameByName": {
|
||||
"test_name": "Test Name",
|
||||
"test_file": "File",
|
||||
"browser": "Browser",
|
||||
"status": "Status",
|
||||
"duration_ms": "Duration (ms)",
|
||||
"branch": "Branch",
|
||||
"commit": "Commit"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "limit",
|
||||
"options": {
|
||||
"limitField": 20
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 10,
|
||||
"title": "Slowest Tests (P95)",
|
||||
"type": "table",
|
||||
"gridPos": { "x": 0, "y": 28, "w": 12, "h": 6 },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "topk(10, quantile_over_time(0.95, {job=\"playwright-tests\", event=\"test_end\"} | json | unwrap duration_ms by (test_name) [$__range]))",
|
||||
"legendFormat": "{{test_name}}"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 11,
|
||||
"title": "Flaky Tests Detection",
|
||||
"type": "table",
|
||||
"gridPos": { "x": 12, "y": 28, "w": 12, "h": 6 },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(count by (test_name) ({job=\"playwright-tests\", status=\"failed\"} | json) and count by (test_name) ({job=\"playwright-tests\", status=\"passed\"} | json))",
|
||||
"legendFormat": "{{test_name}}"
|
||||
}
|
||||
],
|
||||
"description": "Tests that have both passed and failed runs (potential flaky tests)"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
166
monitoring/loki-alerts.yml
Normal file
166
monitoring/loki-alerts.yml
Normal file
@@ -0,0 +1,166 @@
|
||||
# Grafana Loki Alert Rules for Playwright Tests
|
||||
# Deploy this to AlertManager or Grafana Cloud
|
||||
|
||||
groups:
|
||||
- name: playwright_test_alerts
|
||||
interval: 1m
|
||||
rules:
|
||||
# Critical: All tests are failing
|
||||
- alert: AllPlaywrightTestsFailing
|
||||
expr: |
|
||||
sum(rate({job="playwright-tests", status="passed"}[15m])) == 0
|
||||
and
|
||||
sum(rate({job="playwright-tests", event="test_end"}[15m])) > 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
team: qa
|
||||
component: playwright
|
||||
annotations:
|
||||
summary: "All Playwright tests are failing"
|
||||
description: "No passing tests detected in the last 15 minutes. Test count: {{ $value }}"
|
||||
runbook_url: "https://wiki.internal/runbooks/playwright-all-tests-failing"
|
||||
dashboard_url: "https://grafana.internal/d/playwright-dashboard"
|
||||
|
||||
# Warning: High failure rate
|
||||
- alert: HighPlaywrightFailureRate
|
||||
expr: |
|
||||
(
|
||||
sum(rate({job="playwright-tests", status="failed"}[30m]))
|
||||
/
|
||||
sum(rate({job="playwright-tests", event="test_end"}[30m]))
|
||||
) > 0.20
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
team: qa
|
||||
component: playwright
|
||||
annotations:
|
||||
summary: "High Playwright test failure rate detected"
|
||||
description: "{{ $value | humanizePercentage }} of tests are failing over the last 30 minutes"
|
||||
runbook_url: "https://wiki.internal/runbooks/playwright-high-failure-rate"
|
||||
|
||||
# Warning: Specific browser has high failure rate
|
||||
- alert: BrowserSpecificFailures
|
||||
expr: |
|
||||
(
|
||||
sum by (browser) (rate({job="playwright-tests", status="failed"}[30m]))
|
||||
/
|
||||
sum by (browser) (rate({job="playwright-tests", event="test_end"}[30m]))
|
||||
) > 0.30
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
team: qa
|
||||
component: playwright
|
||||
annotations:
|
||||
summary: "High failure rate in {{ $labels.browser }}"
|
||||
description: "{{ $labels.browser }} browser has {{ $value | humanizePercentage }} failure rate"
|
||||
|
||||
# Warning: Slow test execution
|
||||
- alert: SlowPlaywrightTests
|
||||
expr: |
|
||||
quantile_over_time(0.95,
|
||||
{job="playwright-tests", event="test_end"} | json | unwrap duration_ms
|
||||
[30m]) > 300000
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
team: qa
|
||||
component: playwright
|
||||
annotations:
|
||||
summary: "Playwright tests are running slowly"
|
||||
description: "P95 test duration is {{ $value | humanizeDuration }} (threshold: 5 minutes)"
|
||||
runbook_url: "https://wiki.internal/runbooks/playwright-slow-tests"
|
||||
|
||||
# Warning: Test suite timeout
|
||||
- alert: PlaywrightSuiteTimeout
|
||||
expr: |
|
||||
{job="playwright-tests", event="test_suite_end"} | json | unwrap duration_ms > 3600000
|
||||
labels:
|
||||
severity: warning
|
||||
team: qa
|
||||
component: playwright
|
||||
annotations:
|
||||
summary: "Playwright test suite exceeded 1 hour"
|
||||
description: "Test suite took {{ $value | humanizeDuration }} to complete"
|
||||
|
||||
# Info: No tests running (during business hours)
|
||||
- alert: NoPlaywrightTestsRunning
|
||||
expr: |
|
||||
absent_over_time({job="playwright-tests", event="test_start"}[2h])
|
||||
for: 5m
|
||||
labels:
|
||||
severity: info
|
||||
team: qa
|
||||
component: playwright
|
||||
annotations:
|
||||
summary: "No Playwright tests have run recently"
|
||||
description: "No test executions detected in the last 2 hours. CI/CD pipeline may be broken."
|
||||
runbook_url: "https://wiki.internal/runbooks/playwright-no-tests"
|
||||
|
||||
# Warning: Flaky test detected
|
||||
- alert: FlakyPlaywrightTest
|
||||
expr: |
|
||||
count by (test_name) (
|
||||
{job="playwright-tests", status="failed", retry="1"} | json
|
||||
) > 3
|
||||
for: 1h
|
||||
labels:
|
||||
severity: warning
|
||||
team: qa
|
||||
component: playwright
|
||||
annotations:
|
||||
summary: "Flaky test detected: {{ $labels.test_name }}"
|
||||
description: "Test '{{ $labels.test_name }}' has failed {{ $value }} times on retry in the last hour"
|
||||
runbook_url: "https://wiki.internal/runbooks/playwright-flaky-tests"
|
||||
|
||||
# Critical: Test infrastructure failure
|
||||
- alert: PlaywrightInfrastructureFailure
|
||||
expr: |
|
||||
count_over_time({job="playwright-tests", event="test_suite_start"}[30m]) == 0
|
||||
and
|
||||
count_over_time({job="playwright-tests"}[30m]) > 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
team: devops
|
||||
component: playwright
|
||||
annotations:
|
||||
summary: "Playwright test infrastructure may be failing"
|
||||
description: "Tests are attempting to run but test suite is not starting properly"
|
||||
runbook_url: "https://wiki.internal/runbooks/playwright-infrastructure"
|
||||
|
||||
# Warning: High retry rate
|
||||
- alert: HighPlaywrightRetryRate
|
||||
expr: |
|
||||
(
|
||||
sum(rate({job="playwright-tests", retry!="0"}[30m]))
|
||||
/
|
||||
sum(rate({job="playwright-tests", event="test_end"}[30m]))
|
||||
) > 0.15
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
team: qa
|
||||
component: playwright
|
||||
annotations:
|
||||
summary: "High test retry rate detected"
|
||||
description: "{{ $value | humanizePercentage }} of tests are being retried"
|
||||
|
||||
# Info: Test duration increasing
|
||||
- alert: PlaywrightDurationIncreasing
|
||||
expr: |
|
||||
(
|
||||
avg_over_time({job="playwright-tests", event="test_end"} | json | unwrap duration_ms [1h])
|
||||
/
|
||||
avg_over_time({job="playwright-tests", event="test_end"} | json | unwrap duration_ms [24h] offset 1h)
|
||||
) > 1.5
|
||||
for: 30m
|
||||
labels:
|
||||
severity: info
|
||||
team: qa
|
||||
component: playwright
|
||||
annotations:
|
||||
summary: "Playwright test duration is increasing"
|
||||
description: "Average test duration has increased by {{ $value | humanizePercentage }} compared to previous day"
|
||||
Reference in New Issue
Block a user