/** * Header-based bot detection */ export interface HeaderAnalysisResult { isBot: boolean; confidence: number; // 0-100 signals: string[]; } /** * Analyze request headers for bot indicators */ export function analyzeHeaders(headers: Record): HeaderAnalysisResult { const signals: string[] = []; let confidence = 0; // Normalize headers to lowercase const normalizedHeaders: Record = {}; for (const [key, value] of Object.entries(headers)) { if (value) { normalizedHeaders[key.toLowerCase()] = Array.isArray(value) ? value[0] : value; } } // Check for explicit bot-identifying headers if (normalizedHeaders['x-purpose'] === 'preview') { signals.push('x-purpose-preview'); confidence += 40; } // Check for headless Chrome DevTools Protocol if (normalizedHeaders['x-devtools-emulate-network-conditions-client-id']) { signals.push('devtools-protocol'); confidence += 30; } // Missing typical browser headers if (!normalizedHeaders['accept-language']) { signals.push('missing-accept-language'); confidence += 15; } if (!normalizedHeaders['accept-encoding']) { signals.push('missing-accept-encoding'); confidence += 10; } // Suspicious Accept header (not typical browser) const accept = normalizedHeaders['accept']; if (accept && !accept.includes('text/html') && !accept.includes('*/*')) { signals.push('non-html-accept'); confidence += 15; } // Direct access without referer (common for bots) if (!normalizedHeaders['referer'] && !normalizedHeaders['referrer']) { signals.push('no-referer'); confidence += 5; } // Check for automation headers if (normalizedHeaders['x-requested-with'] === 'XMLHttpRequest') { // XHR requests might be AJAX but also automation signals.push('xhr-request'); confidence += 5; } // Very simple Accept header (typical of scrapers) if (accept === '*/*' || accept === 'application/json') { signals.push('simple-accept'); confidence += 10; } // No DNT or cookie-related headers (bots often don't send these) if (!normalizedHeaders['cookie'] && !normalizedHeaders['dnt']) { signals.push('no-cookie-or-dnt'); confidence += 5; } // Forward headers from proxies/CDNs (could indicate bot) if (normalizedHeaders['x-forwarded-for']) { signals.push('has-x-forwarded-for'); confidence += 5; } // Cloudflare bot management headers if (normalizedHeaders['cf-ray']) { // Cloudflare is present, which is normal if (normalizedHeaders['cf-ipcountry'] && !normalizedHeaders['accept-language']) { signals.push('cloudflare-without-language'); confidence += 10; } } // Cap confidence at 100 confidence = Math.min(confidence, 100); const isBot = confidence >= 30; // Threshold for header-based detection return { isBot, confidence, signals, }; }