mirror of
https://github.com/pacnpal/thrilltrack-explorer.git
synced 2025-12-20 06:11:11 -05:00
feat: Implement comprehensive bot detection
This commit is contained in:
95
api/ssrOG.ts
95
api/ssrOG.ts
@@ -14,68 +14,7 @@ type VercelResponse = ServerResponse & {
|
||||
send: (body: string) => VercelResponse;
|
||||
};
|
||||
|
||||
// Bot detection configuration
|
||||
const SOCIAL_BOTS = {
|
||||
'facebookexternalhit': 'facebook',
|
||||
'facebot': 'facebook',
|
||||
'facebookcatalog': 'facebook',
|
||||
'twitterbot': 'twitter',
|
||||
'x-bot': 'twitter',
|
||||
'linkedinbot': 'linkedin',
|
||||
'discordbot': 'discord',
|
||||
'slackbot': 'slack',
|
||||
'slack-imgproxy': 'slack',
|
||||
'whatsapp': 'whatsapp',
|
||||
'telegrambot': 'telegram',
|
||||
'pinterestbot': 'pinterest',
|
||||
'redditbot': 'reddit',
|
||||
'apple-pcs': 'imessage',
|
||||
'mastodon': 'mastodon',
|
||||
'ms-teams': 'teams',
|
||||
'googlebot': 'google',
|
||||
'bingbot': 'bing',
|
||||
'slurp': 'yahoo',
|
||||
'duckduckbot': 'duckduckgo',
|
||||
'baiduspider': 'baidu',
|
||||
'yandexbot': 'yandex',
|
||||
// Headless browsers & crawlers
|
||||
'headless': 'headless-browser',
|
||||
'chrome-lighthouse': 'lighthouse',
|
||||
'puppeteer': 'puppeteer',
|
||||
'playwright': 'playwright',
|
||||
'selenium': 'selenium',
|
||||
'phantomjs': 'phantomjs',
|
||||
// Vercel & deployment platforms
|
||||
'vercel': 'vercel',
|
||||
'vercel-screenshot': 'vercel',
|
||||
'prerender': 'prerender',
|
||||
// Generic crawler patterns
|
||||
'bot': 'generic-bot',
|
||||
'crawler': 'generic-crawler',
|
||||
'spider': 'generic-spider',
|
||||
'scraper': 'generic-scraper'
|
||||
};
|
||||
|
||||
interface BotDetection {
|
||||
isBot: boolean;
|
||||
platform: string | null;
|
||||
}
|
||||
|
||||
function detectBot(userAgent: string): BotDetection {
|
||||
if (!userAgent) {
|
||||
return { isBot: false, platform: null };
|
||||
}
|
||||
|
||||
const ua = userAgent.toLowerCase();
|
||||
|
||||
for (const [pattern, platform] of Object.entries(SOCIAL_BOTS)) {
|
||||
if (ua.includes(pattern)) {
|
||||
return { isBot: true, platform };
|
||||
}
|
||||
}
|
||||
|
||||
return { isBot: false, platform: null };
|
||||
}
|
||||
import { detectBot } from './botDetection/index';
|
||||
|
||||
interface PageData {
|
||||
title: string;
|
||||
@@ -245,24 +184,25 @@ export default async function handler(req: VercelRequest, res: VercelResponse) {
|
||||
const fullUrl = `https://${req.headers.host}${req.url}`;
|
||||
const pathname = new URL(fullUrl).pathname;
|
||||
|
||||
// Bot detection
|
||||
const botDetection = detectBot(userAgent);
|
||||
// Comprehensive bot detection with headers
|
||||
const botDetection = detectBot(userAgent, req.headers as Record<string, string | string[] | undefined>);
|
||||
|
||||
// Enhanced logging
|
||||
// Enhanced logging with detection details
|
||||
if (botDetection.isBot) {
|
||||
console.log(`[SSR-OG] ✅ Bot detected: ${botDetection.platform} | ${req.method} ${pathname}`);
|
||||
console.log(`[SSR-OG] Full UA: ${userAgent}`);
|
||||
console.log(`[SSR-OG] ✅ Bot detected: ${botDetection.platform || 'unknown'} | Confidence: ${botDetection.confidence} (${botDetection.score}%) | Method: ${botDetection.detectionMethod}`);
|
||||
console.log(`[SSR-OG] Path: ${req.method} ${pathname}`);
|
||||
console.log(`[SSR-OG] UA: ${userAgent}`);
|
||||
if (botDetection.metadata.signals.length > 0) {
|
||||
console.log(`[SSR-OG] Signals: ${botDetection.metadata.signals.slice(0, 5).join(', ')}${botDetection.metadata.signals.length > 5 ? '...' : ''}`);
|
||||
}
|
||||
} else {
|
||||
// Log undetected potential bots for debugging
|
||||
const looksLikeBot = !userAgent.includes('Mozilla') ||
|
||||
userAgent.includes('http') ||
|
||||
userAgent.length < 50;
|
||||
|
||||
if (looksLikeBot) {
|
||||
console.warn(`[SSR-OG] ⚠️ Possible undetected bot | ${req.method} ${pathname}`);
|
||||
console.warn(`[SSR-OG] Full UA: ${userAgent}`);
|
||||
// Log potential false negatives
|
||||
if (botDetection.score > 30) {
|
||||
console.warn(`[SSR-OG] ⚠️ Low confidence bot (${botDetection.score}%) - not serving SSR | ${req.method} ${pathname}`);
|
||||
console.warn(`[SSR-OG] UA: ${userAgent}`);
|
||||
console.warn(`[SSR-OG] Signals: ${botDetection.metadata.signals.join(', ')}`);
|
||||
} else {
|
||||
console.log(`[SSR-OG] Regular user | ${req.method} ${pathname} | UA: ${userAgent.substring(0, 60)}...`);
|
||||
console.log(`[SSR-OG] Regular user (score: ${botDetection.score}%) | ${req.method} ${pathname}`);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -280,6 +220,9 @@ export default async function handler(req: VercelRequest, res: VercelResponse) {
|
||||
html = injectOGTags(html, ogTags);
|
||||
|
||||
res.setHeader('X-Bot-Platform', botDetection.platform || 'unknown');
|
||||
res.setHeader('X-Bot-Confidence', botDetection.confidence);
|
||||
res.setHeader('X-Bot-Score', botDetection.score.toString());
|
||||
res.setHeader('X-Bot-Method', botDetection.detectionMethod);
|
||||
res.setHeader('X-SSR-Modified', 'true');
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user