/** * Comprehensive user-agent bot patterns organized by category */ export interface BotPattern { pattern: string; platform: string; category: 'social' | 'seo' | 'monitoring' | 'preview' | 'ai' | 'dev' | 'archive' | 'email' | 'generic'; } export const BOT_PATTERNS: BotPattern[] = [ // Social Media Preview Bots (HIGH PRIORITY) { pattern: 'facebookexternalhit', platform: 'facebook', category: 'social' }, { pattern: 'facebot', platform: 'facebook', category: 'social' }, { pattern: 'twitterbot', platform: 'twitter', category: 'social' }, { pattern: 'twitter', platform: 'twitter', category: 'social' }, { pattern: 'linkedinbot', platform: 'linkedin', category: 'social' }, { pattern: 'linkedin', platform: 'linkedin', category: 'social' }, { pattern: 'slackbot', platform: 'slack', category: 'social' }, { pattern: 'slack-imgproxy', platform: 'slack', category: 'social' }, { pattern: 'telegrambot', platform: 'telegram', category: 'social' }, { pattern: 'whatsapp', platform: 'whatsapp', category: 'social' }, { pattern: 'discordbot', platform: 'discord', category: 'social' }, { pattern: 'discord', platform: 'discord', category: 'social' }, { pattern: 'pinterestbot', platform: 'pinterest', category: 'social' }, { pattern: 'pinterest', platform: 'pinterest', category: 'social' }, { pattern: 'redditbot', platform: 'reddit', category: 'social' }, { pattern: 'reddit', platform: 'reddit', category: 'social' }, { pattern: 'instagram', platform: 'instagram', category: 'social' }, { pattern: 'snapchat', platform: 'snapchat', category: 'social' }, { pattern: 'tiktokbot', platform: 'tiktok', category: 'social' }, { pattern: 'bytespider', platform: 'tiktok', category: 'social' }, { pattern: 'tumblr', platform: 'tumblr', category: 'social' }, { pattern: 'vkshare', platform: 'vk', category: 'social' }, { pattern: 'line', platform: 'line', category: 'social' }, { pattern: 'kakaotalk', platform: 'kakaotalk', category: 'social' }, { pattern: 'wechat', platform: 'wechat', category: 'social' }, // Search Engine Crawlers { pattern: 'googlebot', platform: 'google', category: 'seo' }, { pattern: 'bingbot', platform: 'bing', category: 'seo' }, { pattern: 'bingpreview', platform: 'bing', category: 'preview' }, { pattern: 'slurp', platform: 'yahoo', category: 'seo' }, { pattern: 'duckduckbot', platform: 'duckduckgo', category: 'seo' }, { pattern: 'baiduspider', platform: 'baidu', category: 'seo' }, { pattern: 'yandexbot', platform: 'yandex', category: 'seo' }, // SEO & Analytics Crawlers { pattern: 'ahrefsbot', platform: 'ahrefs', category: 'seo' }, { pattern: 'ahrefs', platform: 'ahrefs', category: 'seo' }, { pattern: 'semrushbot', platform: 'semrush', category: 'seo' }, { pattern: 'dotbot', platform: 'moz', category: 'seo' }, { pattern: 'rogerbot', platform: 'moz', category: 'seo' }, { pattern: 'screaming frog', platform: 'screaming-frog', category: 'seo' }, { pattern: 'majestic', platform: 'majestic', category: 'seo' }, { pattern: 'mjl12bot', platform: 'majestic', category: 'seo' }, { pattern: 'similarweb', platform: 'similarweb', category: 'seo' }, { pattern: 'dataforseo', platform: 'dataforseo', category: 'seo' }, // Monitoring & Uptime Services { pattern: 'pingdom', platform: 'pingdom', category: 'monitoring' }, { pattern: 'statuscake', platform: 'statuscake', category: 'monitoring' }, { pattern: 'uptimerobot', platform: 'uptimerobot', category: 'monitoring' }, { pattern: 'newrelic', platform: 'newrelic', category: 'monitoring' }, { pattern: 'datadog', platform: 'datadog', category: 'monitoring' }, // Preview & Unfurling Services { pattern: 'embedly', platform: 'embedly', category: 'preview' }, { pattern: 'nuzzel', platform: 'nuzzel', category: 'preview' }, { pattern: 'qwantify', platform: 'qwantify', category: 'preview' }, { pattern: 'skypeuripreview', platform: 'skype', category: 'preview' }, { pattern: 'outbrain', platform: 'outbrain', category: 'preview' }, { pattern: 'flipboard', platform: 'flipboard', category: 'preview' }, // AI & LLM Crawlers { pattern: 'gptbot', platform: 'openai', category: 'ai' }, { pattern: 'chatgpt', platform: 'openai', category: 'ai' }, { pattern: 'claudebot', platform: 'anthropic', category: 'ai' }, { pattern: 'anthropic-ai', platform: 'anthropic', category: 'ai' }, { pattern: 'google-extended', platform: 'google-bard', category: 'ai' }, { pattern: 'cohere-ai', platform: 'cohere', category: 'ai' }, { pattern: 'perplexitybot', platform: 'perplexity', category: 'ai' }, { pattern: 'ccbot', platform: 'commoncrawl', category: 'ai' }, // Development & Testing Tools { pattern: 'postman', platform: 'postman', category: 'dev' }, { pattern: 'insomnia', platform: 'insomnia', category: 'dev' }, { pattern: 'httpie', platform: 'httpie', category: 'dev' }, { pattern: 'curl', platform: 'curl', category: 'dev' }, { pattern: 'wget', platform: 'wget', category: 'dev' }, { pattern: 'apache-httpclient', platform: 'apache', category: 'dev' }, { pattern: 'python-requests', platform: 'python', category: 'dev' }, { pattern: 'node-fetch', platform: 'nodejs', category: 'dev' }, { pattern: 'axios', platform: 'axios', category: 'dev' }, // Headless Browsers & Automation { pattern: 'headless', platform: 'headless-browser', category: 'dev' }, { pattern: 'chrome-lighthouse', platform: 'lighthouse', category: 'dev' }, { pattern: 'puppeteer', platform: 'puppeteer', category: 'dev' }, { pattern: 'playwright', platform: 'playwright', category: 'dev' }, { pattern: 'selenium', platform: 'selenium', category: 'dev' }, { pattern: 'phantomjs', platform: 'phantomjs', category: 'dev' }, // Vercel & Deployment Platforms { pattern: 'vercel', platform: 'vercel', category: 'preview' }, { pattern: 'vercel-screenshot', platform: 'vercel', category: 'preview' }, { pattern: 'prerender', platform: 'prerender', category: 'preview' }, { pattern: 'netlify', platform: 'netlify', category: 'preview' }, // Archive & Research { pattern: 'ia_archiver', platform: 'internet-archive', category: 'archive' }, { pattern: 'archive.org_bot', platform: 'internet-archive', category: 'archive' }, // Email Clients (for link previews) { pattern: 'outlook', platform: 'outlook', category: 'email' }, { pattern: 'googleimageproxy', platform: 'gmail', category: 'email' }, { pattern: 'apple mail', platform: 'apple-mail', category: 'email' }, { pattern: 'yahoo', platform: 'yahoo-mail', category: 'email' }, // Generic patterns (LOWEST PRIORITY - check last) { pattern: 'bot', platform: 'generic-bot', category: 'generic' }, { pattern: 'crawler', platform: 'generic-crawler', category: 'generic' }, { pattern: 'spider', platform: 'generic-spider', category: 'generic' }, { pattern: 'scraper', platform: 'generic-scraper', category: 'generic' }, ]; /** * Regex patterns for faster generic matching */ export const GENERIC_BOT_REGEX = /(bot|crawler|spider|scraper|curl|wget|http|fetch)/i;