mirror of
https://github.com/pacnpal/thrilltrack-explorer.git
synced 2025-12-20 07:11:12 -05:00
167 lines
6.1 KiB
YAML
167 lines
6.1 KiB
YAML
# Grafana Loki Alert Rules for Playwright Tests
|
|
# Deploy this to AlertManager or Grafana Cloud
|
|
|
|
groups:
|
|
- name: playwright_test_alerts
|
|
interval: 1m
|
|
rules:
|
|
# Critical: All tests are failing
|
|
- alert: AllPlaywrightTestsFailing
|
|
expr: |
|
|
sum(rate({job="playwright-tests", status="passed"}[15m])) == 0
|
|
and
|
|
sum(rate({job="playwright-tests", event="test_end"}[15m])) > 0
|
|
for: 5m
|
|
labels:
|
|
severity: critical
|
|
team: qa
|
|
component: playwright
|
|
annotations:
|
|
summary: "All Playwright tests are failing"
|
|
description: "No passing tests detected in the last 15 minutes. Test count: {{ $value }}"
|
|
runbook_url: "https://wiki.internal/runbooks/playwright-all-tests-failing"
|
|
dashboard_url: "https://grafana.internal/d/playwright-dashboard"
|
|
|
|
# Warning: High failure rate
|
|
- alert: HighPlaywrightFailureRate
|
|
expr: |
|
|
(
|
|
sum(rate({job="playwright-tests", status="failed"}[30m]))
|
|
/
|
|
sum(rate({job="playwright-tests", event="test_end"}[30m]))
|
|
) > 0.20
|
|
for: 10m
|
|
labels:
|
|
severity: warning
|
|
team: qa
|
|
component: playwright
|
|
annotations:
|
|
summary: "High Playwright test failure rate detected"
|
|
description: "{{ $value | humanizePercentage }} of tests are failing over the last 30 minutes"
|
|
runbook_url: "https://wiki.internal/runbooks/playwright-high-failure-rate"
|
|
|
|
# Warning: Specific browser has high failure rate
|
|
- alert: BrowserSpecificFailures
|
|
expr: |
|
|
(
|
|
sum by (browser) (rate({job="playwright-tests", status="failed"}[30m]))
|
|
/
|
|
sum by (browser) (rate({job="playwright-tests", event="test_end"}[30m]))
|
|
) > 0.30
|
|
for: 10m
|
|
labels:
|
|
severity: warning
|
|
team: qa
|
|
component: playwright
|
|
annotations:
|
|
summary: "High failure rate in {{ $labels.browser }}"
|
|
description: "{{ $labels.browser }} browser has {{ $value | humanizePercentage }} failure rate"
|
|
|
|
# Warning: Slow test execution
|
|
- alert: SlowPlaywrightTests
|
|
expr: |
|
|
quantile_over_time(0.95,
|
|
{job="playwright-tests", event="test_end"} | json | unwrap duration_ms
|
|
[30m]) > 300000
|
|
for: 15m
|
|
labels:
|
|
severity: warning
|
|
team: qa
|
|
component: playwright
|
|
annotations:
|
|
summary: "Playwright tests are running slowly"
|
|
description: "P95 test duration is {{ $value | humanizeDuration }} (threshold: 5 minutes)"
|
|
runbook_url: "https://wiki.internal/runbooks/playwright-slow-tests"
|
|
|
|
# Warning: Test suite timeout
|
|
- alert: PlaywrightSuiteTimeout
|
|
expr: |
|
|
{job="playwright-tests", event="test_suite_end"} | json | unwrap duration_ms > 3600000
|
|
labels:
|
|
severity: warning
|
|
team: qa
|
|
component: playwright
|
|
annotations:
|
|
summary: "Playwright test suite exceeded 1 hour"
|
|
description: "Test suite took {{ $value | humanizeDuration }} to complete"
|
|
|
|
# Info: No tests running (during business hours)
|
|
- alert: NoPlaywrightTestsRunning
|
|
expr: |
|
|
absent_over_time({job="playwright-tests", event="test_start"}[2h])
|
|
for: 5m
|
|
labels:
|
|
severity: info
|
|
team: qa
|
|
component: playwright
|
|
annotations:
|
|
summary: "No Playwright tests have run recently"
|
|
description: "No test executions detected in the last 2 hours. CI/CD pipeline may be broken."
|
|
runbook_url: "https://wiki.internal/runbooks/playwright-no-tests"
|
|
|
|
# Warning: Flaky test detected
|
|
- alert: FlakyPlaywrightTest
|
|
expr: |
|
|
count by (test_name) (
|
|
{job="playwright-tests", status="failed", retry="1"} | json
|
|
) > 3
|
|
for: 1h
|
|
labels:
|
|
severity: warning
|
|
team: qa
|
|
component: playwright
|
|
annotations:
|
|
summary: "Flaky test detected: {{ $labels.test_name }}"
|
|
description: "Test '{{ $labels.test_name }}' has failed {{ $value }} times on retry in the last hour"
|
|
runbook_url: "https://wiki.internal/runbooks/playwright-flaky-tests"
|
|
|
|
# Critical: Test infrastructure failure
|
|
- alert: PlaywrightInfrastructureFailure
|
|
expr: |
|
|
count_over_time({job="playwright-tests", event="test_suite_start"}[30m]) == 0
|
|
and
|
|
count_over_time({job="playwright-tests"}[30m]) > 0
|
|
for: 5m
|
|
labels:
|
|
severity: critical
|
|
team: devops
|
|
component: playwright
|
|
annotations:
|
|
summary: "Playwright test infrastructure may be failing"
|
|
description: "Tests are attempting to run but test suite is not starting properly"
|
|
runbook_url: "https://wiki.internal/runbooks/playwright-infrastructure"
|
|
|
|
# Warning: High retry rate
|
|
- alert: HighPlaywrightRetryRate
|
|
expr: |
|
|
(
|
|
sum(rate({job="playwright-tests", retry!="0"}[30m]))
|
|
/
|
|
sum(rate({job="playwright-tests", event="test_end"}[30m]))
|
|
) > 0.15
|
|
for: 10m
|
|
labels:
|
|
severity: warning
|
|
team: qa
|
|
component: playwright
|
|
annotations:
|
|
summary: "High test retry rate detected"
|
|
description: "{{ $value | humanizePercentage }} of tests are being retried"
|
|
|
|
# Info: Test duration increasing
|
|
- alert: PlaywrightDurationIncreasing
|
|
expr: |
|
|
(
|
|
avg_over_time({job="playwright-tests", event="test_end"} | json | unwrap duration_ms [1h])
|
|
/
|
|
avg_over_time({job="playwright-tests", event="test_end"} | json | unwrap duration_ms [24h] offset 1h)
|
|
) > 1.5
|
|
for: 30m
|
|
labels:
|
|
severity: info
|
|
team: qa
|
|
component: playwright
|
|
annotations:
|
|
summary: "Playwright test duration is increasing"
|
|
description: "Average test duration has increased by {{ $value | humanizePercentage }} compared to previous day"
|