mirror of
https://github.com/pacnpal/thrilltrack-explorer.git
synced 2025-12-20 17:11:13 -05:00
131 lines
6.9 KiB
TypeScript
131 lines
6.9 KiB
TypeScript
/**
|
|
* Comprehensive user-agent bot patterns organized by category
|
|
*/
|
|
|
|
export interface BotPattern {
|
|
pattern: string;
|
|
platform: string;
|
|
category: 'social' | 'seo' | 'monitoring' | 'preview' | 'ai' | 'dev' | 'archive' | 'email' | 'generic';
|
|
}
|
|
|
|
export const BOT_PATTERNS: BotPattern[] = [
|
|
// Social Media Preview Bots (HIGH PRIORITY)
|
|
{ pattern: 'facebookexternalhit', platform: 'facebook', category: 'social' },
|
|
{ pattern: 'facebot', platform: 'facebook', category: 'social' },
|
|
{ pattern: 'twitterbot', platform: 'twitter', category: 'social' },
|
|
{ pattern: 'twitter', platform: 'twitter', category: 'social' },
|
|
{ pattern: 'linkedinbot', platform: 'linkedin', category: 'social' },
|
|
{ pattern: 'linkedin', platform: 'linkedin', category: 'social' },
|
|
{ pattern: 'slackbot', platform: 'slack', category: 'social' },
|
|
{ pattern: 'slack-imgproxy', platform: 'slack', category: 'social' },
|
|
{ pattern: 'telegrambot', platform: 'telegram', category: 'social' },
|
|
{ pattern: 'whatsapp', platform: 'whatsapp', category: 'social' },
|
|
{ pattern: 'discordbot', platform: 'discord', category: 'social' },
|
|
{ pattern: 'discord', platform: 'discord', category: 'social' },
|
|
{ pattern: 'pinterestbot', platform: 'pinterest', category: 'social' },
|
|
{ pattern: 'pinterest', platform: 'pinterest', category: 'social' },
|
|
{ pattern: 'redditbot', platform: 'reddit', category: 'social' },
|
|
{ pattern: 'reddit', platform: 'reddit', category: 'social' },
|
|
{ pattern: 'instagram', platform: 'instagram', category: 'social' },
|
|
{ pattern: 'snapchat', platform: 'snapchat', category: 'social' },
|
|
{ pattern: 'tiktokbot', platform: 'tiktok', category: 'social' },
|
|
{ pattern: 'bytespider', platform: 'tiktok', category: 'social' },
|
|
{ pattern: 'tumblr', platform: 'tumblr', category: 'social' },
|
|
{ pattern: 'vkshare', platform: 'vk', category: 'social' },
|
|
{ pattern: 'line', platform: 'line', category: 'social' },
|
|
{ pattern: 'kakaotalk', platform: 'kakaotalk', category: 'social' },
|
|
{ pattern: 'wechat', platform: 'wechat', category: 'social' },
|
|
|
|
// Search Engine Crawlers
|
|
{ pattern: 'googlebot', platform: 'google', category: 'seo' },
|
|
{ pattern: 'bingbot', platform: 'bing', category: 'seo' },
|
|
{ pattern: 'bingpreview', platform: 'bing', category: 'preview' },
|
|
{ pattern: 'slurp', platform: 'yahoo', category: 'seo' },
|
|
{ pattern: 'duckduckbot', platform: 'duckduckgo', category: 'seo' },
|
|
{ pattern: 'baiduspider', platform: 'baidu', category: 'seo' },
|
|
{ pattern: 'yandexbot', platform: 'yandex', category: 'seo' },
|
|
|
|
// SEO & Analytics Crawlers
|
|
{ pattern: 'ahrefsbot', platform: 'ahrefs', category: 'seo' },
|
|
{ pattern: 'ahrefs', platform: 'ahrefs', category: 'seo' },
|
|
{ pattern: 'semrushbot', platform: 'semrush', category: 'seo' },
|
|
{ pattern: 'dotbot', platform: 'moz', category: 'seo' },
|
|
{ pattern: 'rogerbot', platform: 'moz', category: 'seo' },
|
|
{ pattern: 'screaming frog', platform: 'screaming-frog', category: 'seo' },
|
|
{ pattern: 'majestic', platform: 'majestic', category: 'seo' },
|
|
{ pattern: 'mjl12bot', platform: 'majestic', category: 'seo' },
|
|
{ pattern: 'similarweb', platform: 'similarweb', category: 'seo' },
|
|
{ pattern: 'dataforseo', platform: 'dataforseo', category: 'seo' },
|
|
|
|
// Monitoring & Uptime Services
|
|
{ pattern: 'pingdom', platform: 'pingdom', category: 'monitoring' },
|
|
{ pattern: 'statuscake', platform: 'statuscake', category: 'monitoring' },
|
|
{ pattern: 'uptimerobot', platform: 'uptimerobot', category: 'monitoring' },
|
|
{ pattern: 'newrelic', platform: 'newrelic', category: 'monitoring' },
|
|
{ pattern: 'datadog', platform: 'datadog', category: 'monitoring' },
|
|
|
|
// Preview & Unfurling Services
|
|
{ pattern: 'embedly', platform: 'embedly', category: 'preview' },
|
|
{ pattern: 'nuzzel', platform: 'nuzzel', category: 'preview' },
|
|
{ pattern: 'qwantify', platform: 'qwantify', category: 'preview' },
|
|
{ pattern: 'skypeuripreview', platform: 'skype', category: 'preview' },
|
|
{ pattern: 'outbrain', platform: 'outbrain', category: 'preview' },
|
|
{ pattern: 'flipboard', platform: 'flipboard', category: 'preview' },
|
|
|
|
// AI & LLM Crawlers
|
|
{ pattern: 'gptbot', platform: 'openai', category: 'ai' },
|
|
{ pattern: 'chatgpt', platform: 'openai', category: 'ai' },
|
|
{ pattern: 'claudebot', platform: 'anthropic', category: 'ai' },
|
|
{ pattern: 'anthropic-ai', platform: 'anthropic', category: 'ai' },
|
|
{ pattern: 'google-extended', platform: 'google-bard', category: 'ai' },
|
|
{ pattern: 'cohere-ai', platform: 'cohere', category: 'ai' },
|
|
{ pattern: 'perplexitybot', platform: 'perplexity', category: 'ai' },
|
|
{ pattern: 'ccbot', platform: 'commoncrawl', category: 'ai' },
|
|
|
|
// Development & Testing Tools
|
|
{ pattern: 'postman', platform: 'postman', category: 'dev' },
|
|
{ pattern: 'insomnia', platform: 'insomnia', category: 'dev' },
|
|
{ pattern: 'httpie', platform: 'httpie', category: 'dev' },
|
|
{ pattern: 'curl', platform: 'curl', category: 'dev' },
|
|
{ pattern: 'wget', platform: 'wget', category: 'dev' },
|
|
{ pattern: 'apache-httpclient', platform: 'apache', category: 'dev' },
|
|
{ pattern: 'python-requests', platform: 'python', category: 'dev' },
|
|
{ pattern: 'node-fetch', platform: 'nodejs', category: 'dev' },
|
|
{ pattern: 'axios', platform: 'axios', category: 'dev' },
|
|
|
|
// Headless Browsers & Automation
|
|
{ pattern: 'headless', platform: 'headless-browser', category: 'dev' },
|
|
{ pattern: 'chrome-lighthouse', platform: 'lighthouse', category: 'dev' },
|
|
{ pattern: 'puppeteer', platform: 'puppeteer', category: 'dev' },
|
|
{ pattern: 'playwright', platform: 'playwright', category: 'dev' },
|
|
{ pattern: 'selenium', platform: 'selenium', category: 'dev' },
|
|
{ pattern: 'phantomjs', platform: 'phantomjs', category: 'dev' },
|
|
|
|
// Vercel & Deployment Platforms
|
|
{ pattern: 'vercel', platform: 'vercel', category: 'preview' },
|
|
{ pattern: 'vercel-screenshot', platform: 'vercel', category: 'preview' },
|
|
{ pattern: 'prerender', platform: 'prerender', category: 'preview' },
|
|
{ pattern: 'netlify', platform: 'netlify', category: 'preview' },
|
|
|
|
// Archive & Research
|
|
{ pattern: 'ia_archiver', platform: 'internet-archive', category: 'archive' },
|
|
{ pattern: 'archive.org_bot', platform: 'internet-archive', category: 'archive' },
|
|
|
|
// Email Clients (for link previews)
|
|
{ pattern: 'outlook', platform: 'outlook', category: 'email' },
|
|
{ pattern: 'googleimageproxy', platform: 'gmail', category: 'email' },
|
|
{ pattern: 'apple mail', platform: 'apple-mail', category: 'email' },
|
|
{ pattern: 'yahoo', platform: 'yahoo-mail', category: 'email' },
|
|
|
|
// Generic patterns (LOWEST PRIORITY - check last)
|
|
{ pattern: 'bot', platform: 'generic-bot', category: 'generic' },
|
|
{ pattern: 'crawler', platform: 'generic-crawler', category: 'generic' },
|
|
{ pattern: 'spider', platform: 'generic-spider', category: 'generic' },
|
|
{ pattern: 'scraper', platform: 'generic-scraper', category: 'generic' },
|
|
];
|
|
|
|
/**
|
|
* Regex patterns for faster generic matching
|
|
*/
|
|
export const GENERIC_BOT_REGEX = /(bot|crawler|spider|scraper|curl|wget|http|fetch)/i;
|