diff --git a/src/hooks/useCronitorHealth.ts b/src/hooks/useCronitorHealth.ts index e3063329..65804e01 100644 --- a/src/hooks/useCronitorHealth.ts +++ b/src/hooks/useCronitorHealth.ts @@ -1,16 +1,39 @@ import { useQuery } from '@tanstack/react-query'; +import { isRetryableError } from '@/lib/retryHelpers'; +import { handleNonCriticalError } from '@/lib/errorHandler'; +import { logger } from '@/lib/logger'; const CRONITOR_API_URL = 'https://cronitor.io/api/monitors/88kG4W?env=production&format=json'; const POLL_INTERVAL = 60000; // 60 seconds +// Retry configuration +const MAX_RETRIES = 3; +const BASE_DELAY = 1000; // 1 second +const MAX_DELAY = 10000; // 10 seconds + interface CronitorResponse { passing: boolean; [key: string]: any; // Other fields we don't need } +/** + * Calculate exponential backoff delay with jitter + */ +function calculateRetryDelay(failureCount: number): number { + const exponentialDelay = BASE_DELAY * Math.pow(2, failureCount - 1); + const cappedDelay = Math.min(exponentialDelay, MAX_DELAY); + + // Add ±30% jitter to prevent thundering herd + const jitterAmount = cappedDelay * 0.3; + const jitterOffset = (Math.random() * 2 - 1) * jitterAmount; + + return Math.max(0, cappedDelay + jitterOffset); +} + /** * Hook to poll Cronitor API for health status * Returns the monitor's passing status (true = healthy, false = down) + * Implements exponential backoff retry with jitter */ export function useCronitorHealth() { return useQuery({ @@ -29,8 +52,70 @@ export function useCronitorHealth() { return response.json(); }, + retry: (failureCount, error) => { + // Use existing retry logic to determine if error is retryable + if (!isRetryableError(error)) { + logger.warn('Cronitor health check: Non-retryable error', { error }); + + // Log non-retryable errors to error monitoring (non-critical) + handleNonCriticalError(error, { + action: 'Cronitor Health Check - Non-Retryable Error', + metadata: { + failureCount, + errorType: 'non_retryable', + }, + }); + + return false; + } + + // Retry up to MAX_RETRIES times + if (failureCount >= MAX_RETRIES) { + logger.error('Cronitor health check: Max retries exhausted', { + error, + totalAttempts: MAX_RETRIES, + }); + + // Track exhausted retries in Cronitor RUM and error monitoring + if (typeof window !== 'undefined' && window.cronitor) { + window.cronitor.track('cronitor_health_check_failed', { + totalAttempts: MAX_RETRIES, + error: error instanceof Error ? error.message : String(error), + severity: 'high', + }); + } + + // Log to error monitoring system (non-critical, background operation) + handleNonCriticalError(error, { + action: 'Cronitor Health Check - Max Retries Exhausted', + metadata: { + totalAttempts: MAX_RETRIES, + errorType: 'max_retries_exhausted', + }, + }); + + return false; + } + + // Track retry attempt in Cronitor RUM + if (typeof window !== 'undefined' && window.cronitor) { + window.cronitor.track('cronitor_health_check_retry', { + attempt: failureCount + 1, + maxAttempts: MAX_RETRIES, + error: error instanceof Error ? error.message : String(error), + }); + } + + logger.info(`Cronitor health check: Retry attempt ${failureCount + 1}/${MAX_RETRIES}`, { + attempt: failureCount + 1, + maxAttempts: MAX_RETRIES, + error, + }); + + return true; + }, + retryDelay: calculateRetryDelay, // Use exponential backoff with jitter refetchInterval: POLL_INTERVAL, // Auto-poll every 60 seconds - retry: 2, // Retry failed requests twice staleTime: 30000, // Consider data stale after 30 seconds gcTime: 5 * 60 * 1000, // Keep in cache for 5 minutes });