From d362fa75371d3b8d5b358e3cd2767f4b4258d560 Mon Sep 17 00:00:00 2001 From: "gpt-engineer-app[bot]" <159125892+gpt-engineer-app[bot]@users.noreply.github.com> Date: Wed, 29 Oct 2025 20:45:21 +0000 Subject: [PATCH] feat: Improve bot detection for Vercel OG preview --- api/ssrOG.ts | 43 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 36 insertions(+), 7 deletions(-) diff --git a/api/ssrOG.ts b/api/ssrOG.ts index d225fb4f..40e85b01 100644 --- a/api/ssrOG.ts +++ b/api/ssrOG.ts @@ -37,7 +37,23 @@ const SOCIAL_BOTS = { 'slurp': 'yahoo', 'duckduckbot': 'duckduckgo', 'baiduspider': 'baidu', - 'yandexbot': 'yandex' + 'yandexbot': 'yandex', + // Headless browsers & crawlers + 'headless': 'headless-browser', + 'chrome-lighthouse': 'lighthouse', + 'puppeteer': 'puppeteer', + 'playwright': 'playwright', + 'selenium': 'selenium', + 'phantomjs': 'phantomjs', + // Vercel & deployment platforms + 'vercel': 'vercel', + 'vercel-screenshot': 'vercel', + 'prerender': 'prerender', + // Generic crawler patterns + 'bot': 'generic-bot', + 'crawler': 'generic-crawler', + 'spider': 'generic-spider', + 'scraper': 'generic-scraper' }; interface BotDetection { @@ -229,20 +245,35 @@ export default async function handler(req: VercelRequest, res: VercelResponse) { const fullUrl = `https://${req.headers.host}${req.url}`; const pathname = new URL(fullUrl).pathname; - console.log(`[SSR-OG] ${req.method} ${pathname} | UA: ${userAgent.substring(0, 60)}`); - // Bot detection const botDetection = detectBot(userAgent); + // Enhanced logging + if (botDetection.isBot) { + console.log(`[SSR-OG] ✅ Bot detected: ${botDetection.platform} | ${req.method} ${pathname}`); + console.log(`[SSR-OG] Full UA: ${userAgent}`); + } else { + // Log undetected potential bots for debugging + const looksLikeBot = !userAgent.includes('Mozilla') || + userAgent.includes('http') || + userAgent.length < 50; + + if (looksLikeBot) { + console.warn(`[SSR-OG] ⚠️ Possible undetected bot | ${req.method} ${pathname}`); + console.warn(`[SSR-OG] Full UA: ${userAgent}`); + } else { + console.log(`[SSR-OG] Regular user | ${req.method} ${pathname} | UA: ${userAgent.substring(0, 60)}...`); + } + } + // Read the built index.html const htmlPath = join(process.cwd(), 'dist', 'index.html'); let html = readFileSync(htmlPath, 'utf-8'); if (botDetection.isBot) { - console.log(`[SSR-OG] Bot detected: ${botDetection.platform}`); - // Fetch page-specific data const pageData = await getPageData(pathname, fullUrl); + console.log(`[SSR-OG] Generated OG tags: ${pageData.title}`); // Generate and inject OG tags const ogTags = generateOGTags(pageData); @@ -250,8 +281,6 @@ export default async function handler(req: VercelRequest, res: VercelResponse) { res.setHeader('X-Bot-Platform', botDetection.platform || 'unknown'); res.setHeader('X-SSR-Modified', 'true'); - } else { - console.log('[SSR-OG] Regular user - serving original HTML'); } res.setHeader('Content-Type', 'text/html; charset=utf-8');