Refactor: Improve Cronitor health check error handling

This commit is contained in:
gpt-engineer-app[bot]
2025-11-05 15:42:43 +00:00
parent 6c9cd57190
commit 09de0772ea

View File

@@ -1,16 +1,39 @@
import { useQuery } from '@tanstack/react-query'; import { useQuery } from '@tanstack/react-query';
import { isRetryableError } from '@/lib/retryHelpers';
import { handleNonCriticalError } from '@/lib/errorHandler';
import { logger } from '@/lib/logger';
const CRONITOR_API_URL = 'https://cronitor.io/api/monitors/88kG4W?env=production&format=json'; const CRONITOR_API_URL = 'https://cronitor.io/api/monitors/88kG4W?env=production&format=json';
const POLL_INTERVAL = 60000; // 60 seconds const POLL_INTERVAL = 60000; // 60 seconds
// Retry configuration
const MAX_RETRIES = 3;
const BASE_DELAY = 1000; // 1 second
const MAX_DELAY = 10000; // 10 seconds
interface CronitorResponse { interface CronitorResponse {
passing: boolean; passing: boolean;
[key: string]: any; // Other fields we don't need [key: string]: any; // Other fields we don't need
} }
/**
* Calculate exponential backoff delay with jitter
*/
function calculateRetryDelay(failureCount: number): number {
const exponentialDelay = BASE_DELAY * Math.pow(2, failureCount - 1);
const cappedDelay = Math.min(exponentialDelay, MAX_DELAY);
// Add ±30% jitter to prevent thundering herd
const jitterAmount = cappedDelay * 0.3;
const jitterOffset = (Math.random() * 2 - 1) * jitterAmount;
return Math.max(0, cappedDelay + jitterOffset);
}
/** /**
* Hook to poll Cronitor API for health status * Hook to poll Cronitor API for health status
* Returns the monitor's passing status (true = healthy, false = down) * Returns the monitor's passing status (true = healthy, false = down)
* Implements exponential backoff retry with jitter
*/ */
export function useCronitorHealth() { export function useCronitorHealth() {
return useQuery({ return useQuery({
@@ -29,8 +52,70 @@ export function useCronitorHealth() {
return response.json(); return response.json();
}, },
retry: (failureCount, error) => {
// Use existing retry logic to determine if error is retryable
if (!isRetryableError(error)) {
logger.warn('Cronitor health check: Non-retryable error', { error });
// Log non-retryable errors to error monitoring (non-critical)
handleNonCriticalError(error, {
action: 'Cronitor Health Check - Non-Retryable Error',
metadata: {
failureCount,
errorType: 'non_retryable',
},
});
return false;
}
// Retry up to MAX_RETRIES times
if (failureCount >= MAX_RETRIES) {
logger.error('Cronitor health check: Max retries exhausted', {
error,
totalAttempts: MAX_RETRIES,
});
// Track exhausted retries in Cronitor RUM and error monitoring
if (typeof window !== 'undefined' && window.cronitor) {
window.cronitor.track('cronitor_health_check_failed', {
totalAttempts: MAX_RETRIES,
error: error instanceof Error ? error.message : String(error),
severity: 'high',
});
}
// Log to error monitoring system (non-critical, background operation)
handleNonCriticalError(error, {
action: 'Cronitor Health Check - Max Retries Exhausted',
metadata: {
totalAttempts: MAX_RETRIES,
errorType: 'max_retries_exhausted',
},
});
return false;
}
// Track retry attempt in Cronitor RUM
if (typeof window !== 'undefined' && window.cronitor) {
window.cronitor.track('cronitor_health_check_retry', {
attempt: failureCount + 1,
maxAttempts: MAX_RETRIES,
error: error instanceof Error ? error.message : String(error),
});
}
logger.info(`Cronitor health check: Retry attempt ${failureCount + 1}/${MAX_RETRIES}`, {
attempt: failureCount + 1,
maxAttempts: MAX_RETRIES,
error,
});
return true;
},
retryDelay: calculateRetryDelay, // Use exponential backoff with jitter
refetchInterval: POLL_INTERVAL, // Auto-poll every 60 seconds refetchInterval: POLL_INTERVAL, // Auto-poll every 60 seconds
retry: 2, // Retry failed requests twice
staleTime: 30000, // Consider data stale after 30 seconds staleTime: 30000, // Consider data stale after 30 seconds
gcTime: 5 * 60 * 1000, // Keep in cache for 5 minutes gcTime: 5 * 60 * 1000, // Keep in cache for 5 minutes
}); });