feat: Implement comprehensive bot detection

This commit is contained in:
gpt-engineer-app[bot]
2025-10-29 20:49:26 +00:00
parent d362fa7537
commit 2918f9d280
5 changed files with 515 additions and 76 deletions

View File

@@ -14,68 +14,7 @@ type VercelResponse = ServerResponse & {
send: (body: string) => VercelResponse;
};
// Bot detection configuration
const SOCIAL_BOTS = {
'facebookexternalhit': 'facebook',
'facebot': 'facebook',
'facebookcatalog': 'facebook',
'twitterbot': 'twitter',
'x-bot': 'twitter',
'linkedinbot': 'linkedin',
'discordbot': 'discord',
'slackbot': 'slack',
'slack-imgproxy': 'slack',
'whatsapp': 'whatsapp',
'telegrambot': 'telegram',
'pinterestbot': 'pinterest',
'redditbot': 'reddit',
'apple-pcs': 'imessage',
'mastodon': 'mastodon',
'ms-teams': 'teams',
'googlebot': 'google',
'bingbot': 'bing',
'slurp': 'yahoo',
'duckduckbot': 'duckduckgo',
'baiduspider': 'baidu',
'yandexbot': 'yandex',
// Headless browsers & crawlers
'headless': 'headless-browser',
'chrome-lighthouse': 'lighthouse',
'puppeteer': 'puppeteer',
'playwright': 'playwright',
'selenium': 'selenium',
'phantomjs': 'phantomjs',
// Vercel & deployment platforms
'vercel': 'vercel',
'vercel-screenshot': 'vercel',
'prerender': 'prerender',
// Generic crawler patterns
'bot': 'generic-bot',
'crawler': 'generic-crawler',
'spider': 'generic-spider',
'scraper': 'generic-scraper'
};
interface BotDetection {
isBot: boolean;
platform: string | null;
}
function detectBot(userAgent: string): BotDetection {
if (!userAgent) {
return { isBot: false, platform: null };
}
const ua = userAgent.toLowerCase();
for (const [pattern, platform] of Object.entries(SOCIAL_BOTS)) {
if (ua.includes(pattern)) {
return { isBot: true, platform };
}
}
return { isBot: false, platform: null };
}
import { detectBot } from './botDetection/index';
interface PageData {
title: string;
@@ -245,24 +184,25 @@ export default async function handler(req: VercelRequest, res: VercelResponse) {
const fullUrl = `https://${req.headers.host}${req.url}`;
const pathname = new URL(fullUrl).pathname;
// Bot detection
const botDetection = detectBot(userAgent);
// Comprehensive bot detection with headers
const botDetection = detectBot(userAgent, req.headers as Record<string, string | string[] | undefined>);
// Enhanced logging
// Enhanced logging with detection details
if (botDetection.isBot) {
console.log(`[SSR-OG] ✅ Bot detected: ${botDetection.platform} | ${req.method} ${pathname}`);
console.log(`[SSR-OG] Full UA: ${userAgent}`);
console.log(`[SSR-OG] ✅ Bot detected: ${botDetection.platform || 'unknown'} | Confidence: ${botDetection.confidence} (${botDetection.score}%) | Method: ${botDetection.detectionMethod}`);
console.log(`[SSR-OG] Path: ${req.method} ${pathname}`);
console.log(`[SSR-OG] UA: ${userAgent}`);
if (botDetection.metadata.signals.length > 0) {
console.log(`[SSR-OG] Signals: ${botDetection.metadata.signals.slice(0, 5).join(', ')}${botDetection.metadata.signals.length > 5 ? '...' : ''}`);
}
} else {
// Log undetected potential bots for debugging
const looksLikeBot = !userAgent.includes('Mozilla') ||
userAgent.includes('http') ||
userAgent.length < 50;
if (looksLikeBot) {
console.warn(`[SSR-OG] ⚠️ Possible undetected bot | ${req.method} ${pathname}`);
console.warn(`[SSR-OG] Full UA: ${userAgent}`);
// Log potential false negatives
if (botDetection.score > 30) {
console.warn(`[SSR-OG] ⚠️ Low confidence bot (${botDetection.score}%) - not serving SSR | ${req.method} ${pathname}`);
console.warn(`[SSR-OG] UA: ${userAgent}`);
console.warn(`[SSR-OG] Signals: ${botDetection.metadata.signals.join(', ')}`);
} else {
console.log(`[SSR-OG] Regular user | ${req.method} ${pathname} | UA: ${userAgent.substring(0, 60)}...`);
console.log(`[SSR-OG] Regular user (score: ${botDetection.score}%) | ${req.method} ${pathname}`);
}
}
@@ -280,6 +220,9 @@ export default async function handler(req: VercelRequest, res: VercelResponse) {
html = injectOGTags(html, ogTags);
res.setHeader('X-Bot-Platform', botDetection.platform || 'unknown');
res.setHeader('X-Bot-Confidence', botDetection.confidence);
res.setHeader('X-Bot-Score', botDetection.score.toString());
res.setHeader('X-Bot-Method', botDetection.detectionMethod);
res.setHeader('X-SSR-Modified', 'true');
}