feat: Integrate Grafana Loki

This commit is contained in:
gpt-engineer-app[bot]
2025-10-30 15:54:32 +00:00
parent 8ac61e01e3
commit 72a7cb7f7c
9 changed files with 1261 additions and 2 deletions

View File

@@ -0,0 +1,266 @@
{
"dashboard": {
"title": "Playwright Test Execution Dashboard",
"tags": ["playwright", "testing", "e2e"],
"timezone": "browser",
"refresh": "30s",
"time": {
"from": "now-24h",
"to": "now"
},
"panels": [
{
"id": 1,
"title": "Test Execution Overview",
"type": "stat",
"gridPos": { "x": 0, "y": 0, "w": 6, "h": 4 },
"targets": [
{
"expr": "count_over_time({job=\"playwright-tests\", event=\"test_end\"}[$__range])",
"legendFormat": "Total Tests"
}
],
"options": {
"colorMode": "value",
"graphMode": "area",
"textMode": "auto"
}
},
{
"id": 2,
"title": "Pass Rate %",
"type": "stat",
"gridPos": { "x": 6, "y": 0, "w": 6, "h": 4 },
"targets": [
{
"expr": "(sum(count_over_time({job=\"playwright-tests\", status=\"passed\"}[$__range])) / sum(count_over_time({job=\"playwright-tests\", event=\"test_end\"}[$__range]))) * 100",
"legendFormat": "Pass Rate"
}
],
"options": {
"colorMode": "value",
"graphMode": "area",
"textMode": "auto",
"unit": "percent"
},
"fieldConfig": {
"defaults": {
"thresholds": {
"mode": "absolute",
"steps": [
{ "value": 0, "color": "red" },
{ "value": 80, "color": "yellow" },
{ "value": 95, "color": "green" }
]
}
}
}
},
{
"id": 3,
"title": "Failure Rate %",
"type": "stat",
"gridPos": { "x": 12, "y": 0, "w": 6, "h": 4 },
"targets": [
{
"expr": "(sum(count_over_time({job=\"playwright-tests\", status=\"failed\"}[$__range])) / sum(count_over_time({job=\"playwright-tests\", event=\"test_end\"}[$__range]))) * 100",
"legendFormat": "Failure Rate"
}
],
"options": {
"colorMode": "value",
"graphMode": "area",
"textMode": "auto",
"unit": "percent"
},
"fieldConfig": {
"defaults": {
"thresholds": {
"mode": "absolute",
"steps": [
{ "value": 0, "color": "green" },
{ "value": 5, "color": "yellow" },
{ "value": 20, "color": "red" }
]
}
}
}
},
{
"id": 4,
"title": "Avg Test Duration",
"type": "stat",
"gridPos": { "x": 18, "y": 0, "w": 6, "h": 4 },
"targets": [
{
"expr": "avg_over_time({job=\"playwright-tests\", event=\"test_end\"} | json | unwrap duration_ms [$__range])",
"legendFormat": "Avg Duration"
}
],
"options": {
"colorMode": "value",
"graphMode": "area",
"textMode": "auto",
"unit": "ms"
}
},
{
"id": 5,
"title": "Test Status Over Time",
"type": "timeseries",
"gridPos": { "x": 0, "y": 4, "w": 12, "h": 8 },
"targets": [
{
"expr": "sum by (status) (count_over_time({job=\"playwright-tests\", event=\"test_end\"} | json [$__interval]))",
"legendFormat": "{{status}}"
}
],
"fieldConfig": {
"defaults": {
"custom": {
"lineInterpolation": "smooth",
"fillOpacity": 20
}
},
"overrides": [
{
"matcher": { "id": "byName", "options": "passed" },
"properties": [{ "id": "color", "value": { "mode": "fixed", "fixedColor": "green" } }]
},
{
"matcher": { "id": "byName", "options": "failed" },
"properties": [{ "id": "color", "value": { "mode": "fixed", "fixedColor": "red" } }]
},
{
"matcher": { "id": "byName", "options": "skipped" },
"properties": [{ "id": "color", "value": { "mode": "fixed", "fixedColor": "yellow" } }]
}
]
}
},
{
"id": 6,
"title": "Browser Comparison",
"type": "bargauge",
"gridPos": { "x": 12, "y": 4, "w": 12, "h": 8 },
"targets": [
{
"expr": "sum by (browser) (count_over_time({job=\"playwright-tests\", status=\"passed\"} [$__range]))",
"legendFormat": "{{browser}}"
}
],
"options": {
"orientation": "horizontal",
"displayMode": "gradient"
}
},
{
"id": 7,
"title": "Test Duration Distribution",
"type": "histogram",
"gridPos": { "x": 0, "y": 12, "w": 12, "h": 8 },
"targets": [
{
"expr": "{job=\"playwright-tests\", event=\"test_end\"} | json | unwrap duration_ms",
"legendFormat": "Duration"
}
],
"options": {
"bucketOffset": 0,
"bucketSize": 1000
}
},
{
"id": 8,
"title": "Top 10 Failing Tests",
"type": "bargauge",
"gridPos": { "x": 12, "y": 12, "w": 12, "h": 8 },
"targets": [
{
"expr": "topk(10, sum by (test_name) (count_over_time({job=\"playwright-tests\", status=\"failed\"} | json [$__range])))",
"legendFormat": "{{test_name}}"
}
],
"options": {
"orientation": "horizontal",
"displayMode": "gradient",
"showUnfilled": true
}
},
{
"id": 9,
"title": "Recent Test Runs",
"type": "table",
"gridPos": { "x": 0, "y": 20, "w": 24, "h": 8 },
"targets": [
{
"expr": "{job=\"playwright-tests\", event=\"test_end\"} | json",
"legendFormat": ""
}
],
"options": {
"showHeader": true,
"sortBy": [{ "displayName": "Time", "desc": true }]
},
"transformations": [
{
"id": "organize",
"options": {
"excludeByName": {},
"indexByName": {
"Time": 0,
"test_name": 1,
"test_file": 2,
"browser": 3,
"status": 4,
"duration_ms": 5,
"branch": 6,
"commit": 7
},
"renameByName": {
"test_name": "Test Name",
"test_file": "File",
"browser": "Browser",
"status": "Status",
"duration_ms": "Duration (ms)",
"branch": "Branch",
"commit": "Commit"
}
}
},
{
"id": "limit",
"options": {
"limitField": 20
}
}
]
},
{
"id": 10,
"title": "Slowest Tests (P95)",
"type": "table",
"gridPos": { "x": 0, "y": 28, "w": 12, "h": 6 },
"targets": [
{
"expr": "topk(10, quantile_over_time(0.95, {job=\"playwright-tests\", event=\"test_end\"} | json | unwrap duration_ms by (test_name) [$__range]))",
"legendFormat": "{{test_name}}"
}
]
},
{
"id": 11,
"title": "Flaky Tests Detection",
"type": "table",
"gridPos": { "x": 12, "y": 28, "w": 12, "h": 6 },
"targets": [
{
"expr": "(count by (test_name) ({job=\"playwright-tests\", status=\"failed\"} | json) and count by (test_name) ({job=\"playwright-tests\", status=\"passed\"} | json))",
"legendFormat": "{{test_name}}"
}
],
"description": "Tests that have both passed and failed runs (potential flaky tests)"
}
]
}
}

166
monitoring/loki-alerts.yml Normal file
View File

@@ -0,0 +1,166 @@
# Grafana Loki Alert Rules for Playwright Tests
# Deploy this to AlertManager or Grafana Cloud
groups:
- name: playwright_test_alerts
interval: 1m
rules:
# Critical: All tests are failing
- alert: AllPlaywrightTestsFailing
expr: |
sum(rate({job="playwright-tests", status="passed"}[15m])) == 0
and
sum(rate({job="playwright-tests", event="test_end"}[15m])) > 0
for: 5m
labels:
severity: critical
team: qa
component: playwright
annotations:
summary: "All Playwright tests are failing"
description: "No passing tests detected in the last 15 minutes. Test count: {{ $value }}"
runbook_url: "https://wiki.internal/runbooks/playwright-all-tests-failing"
dashboard_url: "https://grafana.internal/d/playwright-dashboard"
# Warning: High failure rate
- alert: HighPlaywrightFailureRate
expr: |
(
sum(rate({job="playwright-tests", status="failed"}[30m]))
/
sum(rate({job="playwright-tests", event="test_end"}[30m]))
) > 0.20
for: 10m
labels:
severity: warning
team: qa
component: playwright
annotations:
summary: "High Playwright test failure rate detected"
description: "{{ $value | humanizePercentage }} of tests are failing over the last 30 minutes"
runbook_url: "https://wiki.internal/runbooks/playwright-high-failure-rate"
# Warning: Specific browser has high failure rate
- alert: BrowserSpecificFailures
expr: |
(
sum by (browser) (rate({job="playwright-tests", status="failed"}[30m]))
/
sum by (browser) (rate({job="playwright-tests", event="test_end"}[30m]))
) > 0.30
for: 10m
labels:
severity: warning
team: qa
component: playwright
annotations:
summary: "High failure rate in {{ $labels.browser }}"
description: "{{ $labels.browser }} browser has {{ $value | humanizePercentage }} failure rate"
# Warning: Slow test execution
- alert: SlowPlaywrightTests
expr: |
quantile_over_time(0.95,
{job="playwright-tests", event="test_end"} | json | unwrap duration_ms
[30m]) > 300000
for: 15m
labels:
severity: warning
team: qa
component: playwright
annotations:
summary: "Playwright tests are running slowly"
description: "P95 test duration is {{ $value | humanizeDuration }} (threshold: 5 minutes)"
runbook_url: "https://wiki.internal/runbooks/playwright-slow-tests"
# Warning: Test suite timeout
- alert: PlaywrightSuiteTimeout
expr: |
{job="playwright-tests", event="test_suite_end"} | json | unwrap duration_ms > 3600000
labels:
severity: warning
team: qa
component: playwright
annotations:
summary: "Playwright test suite exceeded 1 hour"
description: "Test suite took {{ $value | humanizeDuration }} to complete"
# Info: No tests running (during business hours)
- alert: NoPlaywrightTestsRunning
expr: |
absent_over_time({job="playwright-tests", event="test_start"}[2h])
for: 5m
labels:
severity: info
team: qa
component: playwright
annotations:
summary: "No Playwright tests have run recently"
description: "No test executions detected in the last 2 hours. CI/CD pipeline may be broken."
runbook_url: "https://wiki.internal/runbooks/playwright-no-tests"
# Warning: Flaky test detected
- alert: FlakyPlaywrightTest
expr: |
count by (test_name) (
{job="playwright-tests", status="failed", retry="1"} | json
) > 3
for: 1h
labels:
severity: warning
team: qa
component: playwright
annotations:
summary: "Flaky test detected: {{ $labels.test_name }}"
description: "Test '{{ $labels.test_name }}' has failed {{ $value }} times on retry in the last hour"
runbook_url: "https://wiki.internal/runbooks/playwright-flaky-tests"
# Critical: Test infrastructure failure
- alert: PlaywrightInfrastructureFailure
expr: |
count_over_time({job="playwright-tests", event="test_suite_start"}[30m]) == 0
and
count_over_time({job="playwright-tests"}[30m]) > 0
for: 5m
labels:
severity: critical
team: devops
component: playwright
annotations:
summary: "Playwright test infrastructure may be failing"
description: "Tests are attempting to run but test suite is not starting properly"
runbook_url: "https://wiki.internal/runbooks/playwright-infrastructure"
# Warning: High retry rate
- alert: HighPlaywrightRetryRate
expr: |
(
sum(rate({job="playwright-tests", retry!="0"}[30m]))
/
sum(rate({job="playwright-tests", event="test_end"}[30m]))
) > 0.15
for: 10m
labels:
severity: warning
team: qa
component: playwright
annotations:
summary: "High test retry rate detected"
description: "{{ $value | humanizePercentage }} of tests are being retried"
# Info: Test duration increasing
- alert: PlaywrightDurationIncreasing
expr: |
(
avg_over_time({job="playwright-tests", event="test_end"} | json | unwrap duration_ms [1h])
/
avg_over_time({job="playwright-tests", event="test_end"} | json | unwrap duration_ms [24h] offset 1h)
) > 1.5
for: 30m
labels:
severity: info
team: qa
component: playwright
annotations:
summary: "Playwright test duration is increasing"
description: "Average test duration has increased by {{ $value | humanizePercentage }} compared to previous day"