mirror of
https://github.com/pacnpal/thrilltrack-explorer.git
synced 2025-12-20 06:31:13 -05:00
Refactor: Improve Cronitor health check error handling
This commit is contained in:
@@ -1,16 +1,39 @@
|
||||
import { useQuery } from '@tanstack/react-query';
|
||||
import { isRetryableError } from '@/lib/retryHelpers';
|
||||
import { handleNonCriticalError } from '@/lib/errorHandler';
|
||||
import { logger } from '@/lib/logger';
|
||||
|
||||
const CRONITOR_API_URL = 'https://cronitor.io/api/monitors/88kG4W?env=production&format=json';
|
||||
const POLL_INTERVAL = 60000; // 60 seconds
|
||||
|
||||
// Retry configuration
|
||||
const MAX_RETRIES = 3;
|
||||
const BASE_DELAY = 1000; // 1 second
|
||||
const MAX_DELAY = 10000; // 10 seconds
|
||||
|
||||
interface CronitorResponse {
|
||||
passing: boolean;
|
||||
[key: string]: any; // Other fields we don't need
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate exponential backoff delay with jitter
|
||||
*/
|
||||
function calculateRetryDelay(failureCount: number): number {
|
||||
const exponentialDelay = BASE_DELAY * Math.pow(2, failureCount - 1);
|
||||
const cappedDelay = Math.min(exponentialDelay, MAX_DELAY);
|
||||
|
||||
// Add ±30% jitter to prevent thundering herd
|
||||
const jitterAmount = cappedDelay * 0.3;
|
||||
const jitterOffset = (Math.random() * 2 - 1) * jitterAmount;
|
||||
|
||||
return Math.max(0, cappedDelay + jitterOffset);
|
||||
}
|
||||
|
||||
/**
|
||||
* Hook to poll Cronitor API for health status
|
||||
* Returns the monitor's passing status (true = healthy, false = down)
|
||||
* Implements exponential backoff retry with jitter
|
||||
*/
|
||||
export function useCronitorHealth() {
|
||||
return useQuery({
|
||||
@@ -29,8 +52,70 @@ export function useCronitorHealth() {
|
||||
|
||||
return response.json();
|
||||
},
|
||||
retry: (failureCount, error) => {
|
||||
// Use existing retry logic to determine if error is retryable
|
||||
if (!isRetryableError(error)) {
|
||||
logger.warn('Cronitor health check: Non-retryable error', { error });
|
||||
|
||||
// Log non-retryable errors to error monitoring (non-critical)
|
||||
handleNonCriticalError(error, {
|
||||
action: 'Cronitor Health Check - Non-Retryable Error',
|
||||
metadata: {
|
||||
failureCount,
|
||||
errorType: 'non_retryable',
|
||||
},
|
||||
});
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// Retry up to MAX_RETRIES times
|
||||
if (failureCount >= MAX_RETRIES) {
|
||||
logger.error('Cronitor health check: Max retries exhausted', {
|
||||
error,
|
||||
totalAttempts: MAX_RETRIES,
|
||||
});
|
||||
|
||||
// Track exhausted retries in Cronitor RUM and error monitoring
|
||||
if (typeof window !== 'undefined' && window.cronitor) {
|
||||
window.cronitor.track('cronitor_health_check_failed', {
|
||||
totalAttempts: MAX_RETRIES,
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
severity: 'high',
|
||||
});
|
||||
}
|
||||
|
||||
// Log to error monitoring system (non-critical, background operation)
|
||||
handleNonCriticalError(error, {
|
||||
action: 'Cronitor Health Check - Max Retries Exhausted',
|
||||
metadata: {
|
||||
totalAttempts: MAX_RETRIES,
|
||||
errorType: 'max_retries_exhausted',
|
||||
},
|
||||
});
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// Track retry attempt in Cronitor RUM
|
||||
if (typeof window !== 'undefined' && window.cronitor) {
|
||||
window.cronitor.track('cronitor_health_check_retry', {
|
||||
attempt: failureCount + 1,
|
||||
maxAttempts: MAX_RETRIES,
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
});
|
||||
}
|
||||
|
||||
logger.info(`Cronitor health check: Retry attempt ${failureCount + 1}/${MAX_RETRIES}`, {
|
||||
attempt: failureCount + 1,
|
||||
maxAttempts: MAX_RETRIES,
|
||||
error,
|
||||
});
|
||||
|
||||
return true;
|
||||
},
|
||||
retryDelay: calculateRetryDelay, // Use exponential backoff with jitter
|
||||
refetchInterval: POLL_INTERVAL, // Auto-poll every 60 seconds
|
||||
retry: 2, // Retry failed requests twice
|
||||
staleTime: 30000, // Consider data stale after 30 seconds
|
||||
gcTime: 5 * 60 * 1000, // Keep in cache for 5 minutes
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user