Refactor: Improve Cronitor health check error handling

2025-12-20 08:11:13 -05:00 · 2025-11-05 15:42:43 +00:00
parent 6c9cd57190
commit 09de0772ea
1 changed files with 86 additions and 1 deletions
--- a/src/hooks/useCronitorHealth.ts
+++ b/src/hooks/useCronitorHealth.ts
@@ -1,16 +1,39 @@
 import { useQuery } from '@tanstack/react-query';
 import { isRetryableError } from '@/lib/retryHelpers';
 import { handleNonCriticalError } from '@/lib/errorHandler';
 import { logger } from '@/lib/logger';
 const CRONITOR_API_URL = 'https://cronitor.io/api/monitors/88kG4W?env=production&format=json';
 const POLL_INTERVAL = 60000; // 60 seconds
 // Retry configuration
 const MAX_RETRIES = 3;
 const BASE_DELAY = 1000; // 1 second
 const MAX_DELAY = 10000; // 10 seconds
 interface CronitorResponse {
  passing: boolean;
  [key: string]: any; // Other fields we don't need
 }
 /**
 * Calculate exponential backoff delay with jitter
 */
 function calculateRetryDelay(failureCount: number): number {
  const exponentialDelay = BASE_DELAY * Math.pow(2, failureCount - 1);
  const cappedDelay = Math.min(exponentialDelay, MAX_DELAY);
  // Add ±30% jitter to prevent thundering herd
  const jitterAmount = cappedDelay * 0.3;
  const jitterOffset = (Math.random() * 2 - 1) * jitterAmount;
  return Math.max(0, cappedDelay + jitterOffset);
 }
 /**
 * Hook to poll Cronitor API for health status
 * Returns the monitor's passing status (true = healthy, false = down)
 * Implements exponential backoff retry with jitter
 */
 export function useCronitorHealth() {
  return useQuery({
@@ -29,8 +52,70 @@ export function useCronitorHealth() {
      return response.json();
    },
    retry: (failureCount, error) => {
      // Use existing retry logic to determine if error is retryable
      if (!isRetryableError(error)) {
        logger.warn('Cronitor health check: Non-retryable error', { error });
        // Log non-retryable errors to error monitoring (non-critical)
        handleNonCriticalError(error, {
          action: 'Cronitor Health Check - Non-Retryable Error',
          metadata: {
            failureCount,
            errorType: 'non_retryable',
          },
        });
        return false;
      }
      // Retry up to MAX_RETRIES times
      if (failureCount >= MAX_RETRIES) {
        logger.error('Cronitor health check: Max retries exhausted', {
          error,
          totalAttempts: MAX_RETRIES,
        });
        // Track exhausted retries in Cronitor RUM and error monitoring
        if (typeof window !== 'undefined' && window.cronitor) {
          window.cronitor.track('cronitor_health_check_failed', {
            totalAttempts: MAX_RETRIES,
            error: error instanceof Error ? error.message : String(error),
            severity: 'high',
          });
        }
        // Log to error monitoring system (non-critical, background operation)
        handleNonCriticalError(error, {
          action: 'Cronitor Health Check - Max Retries Exhausted',
          metadata: {
            totalAttempts: MAX_RETRIES,
            errorType: 'max_retries_exhausted',
          },
        });
        return false;
      }
      // Track retry attempt in Cronitor RUM
      if (typeof window !== 'undefined' && window.cronitor) {
        window.cronitor.track('cronitor_health_check_retry', {
          attempt: failureCount + 1,
          maxAttempts: MAX_RETRIES,
          error: error instanceof Error ? error.message : String(error),
        });
      }
      logger.info(`Cronitor health check: Retry attempt ${failureCount + 1}/${MAX_RETRIES}`, {
        attempt: failureCount + 1,
        maxAttempts: MAX_RETRIES,
        error,
      });
      return true;
    },
    retryDelay: calculateRetryDelay, // Use exponential backoff with jitter
    refetchInterval: POLL_INTERVAL, // Auto-poll every 60 seconds
    retry: 2, // Retry failed requests twice
    staleTime: 30000, // Consider data stale after 30 seconds
    gcTime: 5 * 60 * 1000, // Keep in cache for 5 minutes
  });