# Grafana Loki Alert Rules for Playwright Tests # Deploy this to AlertManager or Grafana Cloud groups: - name: playwright_test_alerts interval: 1m rules: # Critical: All tests are failing - alert: AllPlaywrightTestsFailing expr: | sum(rate({job="playwright-tests", status="passed"}[15m])) == 0 and sum(rate({job="playwright-tests", event="test_end"}[15m])) > 0 for: 5m labels: severity: critical team: qa component: playwright annotations: summary: "All Playwright tests are failing" description: "No passing tests detected in the last 15 minutes. Test count: {{ $value }}" runbook_url: "https://wiki.internal/runbooks/playwright-all-tests-failing" dashboard_url: "https://grafana.internal/d/playwright-dashboard" # Warning: High failure rate - alert: HighPlaywrightFailureRate expr: | ( sum(rate({job="playwright-tests", status="failed"}[30m])) / sum(rate({job="playwright-tests", event="test_end"}[30m])) ) > 0.20 for: 10m labels: severity: warning team: qa component: playwright annotations: summary: "High Playwright test failure rate detected" description: "{{ $value | humanizePercentage }} of tests are failing over the last 30 minutes" runbook_url: "https://wiki.internal/runbooks/playwright-high-failure-rate" # Warning: Specific browser has high failure rate - alert: BrowserSpecificFailures expr: | ( sum by (browser) (rate({job="playwright-tests", status="failed"}[30m])) / sum by (browser) (rate({job="playwright-tests", event="test_end"}[30m])) ) > 0.30 for: 10m labels: severity: warning team: qa component: playwright annotations: summary: "High failure rate in {{ $labels.browser }}" description: "{{ $labels.browser }} browser has {{ $value | humanizePercentage }} failure rate" # Warning: Slow test execution - alert: SlowPlaywrightTests expr: | quantile_over_time(0.95, {job="playwright-tests", event="test_end"} | json | unwrap duration_ms [30m]) > 300000 for: 15m labels: severity: warning team: qa component: playwright annotations: summary: "Playwright tests are running slowly" description: "P95 test duration is {{ $value | humanizeDuration }} (threshold: 5 minutes)" runbook_url: "https://wiki.internal/runbooks/playwright-slow-tests" # Warning: Test suite timeout - alert: PlaywrightSuiteTimeout expr: | {job="playwright-tests", event="test_suite_end"} | json | unwrap duration_ms > 3600000 labels: severity: warning team: qa component: playwright annotations: summary: "Playwright test suite exceeded 1 hour" description: "Test suite took {{ $value | humanizeDuration }} to complete" # Info: No tests running (during business hours) - alert: NoPlaywrightTestsRunning expr: | absent_over_time({job="playwright-tests", event="test_start"}[2h]) for: 5m labels: severity: info team: qa component: playwright annotations: summary: "No Playwright tests have run recently" description: "No test executions detected in the last 2 hours. CI/CD pipeline may be broken." runbook_url: "https://wiki.internal/runbooks/playwright-no-tests" # Warning: Flaky test detected - alert: FlakyPlaywrightTest expr: | count by (test_name) ( {job="playwright-tests", status="failed", retry="1"} | json ) > 3 for: 1h labels: severity: warning team: qa component: playwright annotations: summary: "Flaky test detected: {{ $labels.test_name }}" description: "Test '{{ $labels.test_name }}' has failed {{ $value }} times on retry in the last hour" runbook_url: "https://wiki.internal/runbooks/playwright-flaky-tests" # Critical: Test infrastructure failure - alert: PlaywrightInfrastructureFailure expr: | count_over_time({job="playwright-tests", event="test_suite_start"}[30m]) == 0 and count_over_time({job="playwright-tests"}[30m]) > 0 for: 5m labels: severity: critical team: devops component: playwright annotations: summary: "Playwright test infrastructure may be failing" description: "Tests are attempting to run but test suite is not starting properly" runbook_url: "https://wiki.internal/runbooks/playwright-infrastructure" # Warning: High retry rate - alert: HighPlaywrightRetryRate expr: | ( sum(rate({job="playwright-tests", retry!="0"}[30m])) / sum(rate({job="playwright-tests", event="test_end"}[30m])) ) > 0.15 for: 10m labels: severity: warning team: qa component: playwright annotations: summary: "High test retry rate detected" description: "{{ $value | humanizePercentage }} of tests are being retried" # Info: Test duration increasing - alert: PlaywrightDurationIncreasing expr: | ( avg_over_time({job="playwright-tests", event="test_end"} | json | unwrap duration_ms [1h]) / avg_over_time({job="playwright-tests", event="test_end"} | json | unwrap duration_ms [24h] offset 1h) ) > 1.5 for: 30m labels: severity: info team: qa component: playwright annotations: summary: "Playwright test duration is increasing" description: "Average test duration has increased by {{ $value | humanizePercentage }} compared to previous day"