/** * Next.js API route: Fetch URL content for comprehensive SEO/GEO auditing. * Endpoint: GET /api/fetch-url?url=https://example.com * Returns: Full SEO audit data (technical, content, performance signals, accessibility) */ import { NextRequest, NextResponse } from "next/server"; function countOccurrences(text: string, regex: RegExp): number { return (text.match(regex) || []).length; } export async function GET(request: NextRequest) { const targetUrl = request.nextUrl.searchParams.get("url"); if (!targetUrl) { return NextResponse.json({ error: "URL parameter required" }, { status: 400 }); } let normalizedUrl = targetUrl; if (!normalizedUrl.startsWith("http")) normalizedUrl = "https://" + normalizedUrl; try { new URL(normalizedUrl); } catch { return NextResponse.json({ error: "Invalid URL" }, { status: 400 }); } const startTime = Date.now(); try { const controller = new AbortController(); const timeout = setTimeout(() => controller.abort(), 15000); const res = await fetch(normalizedUrl, { signal: controller.signal, headers: { "User-Agent": "Mozilla/5.0 (compatible; PromptArch-SEOAudit/1.6; +https://rommark.dev)", Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "Accept-Language": "en-US,en;q=0.9", }, }); clearTimeout(timeout); const responseTime = Date.now() - startTime; if (!res.ok) { return NextResponse.json({ error: `HTTP ${res.status}`, status: res.status, url: normalizedUrl, responseTime }); } const html = await res.text(); const urlObj = new URL(normalizedUrl); const baseDomain = urlObj.hostname; // === HTTP HEADERS === const headers: Record = {}; res.headers.forEach((value, key) => { headers[key.toLowerCase()] = value; }); const isHttps = normalizedUrl.startsWith("https://"); const server = headers["server"] || "Unknown"; const xFrameOptions = headers["x-frame-options"] || null; const contentEncoding = headers["content-encoding"] || ""; // === ROBOTS & CANONICAL === const robotsMeta = html.match(/]*name\s*=\s*["']robots["'][^>]*content\s*=\s*["']([\s\S]*?)["']/i) || html.match(/]*content\s*=\s*["']([\s\S]*?)["'][^>]*name\s*=\s*["']robots["']/i); const robotsDirectives = robotsMeta ? robotsMeta[1].trim() : null; const canonicalMatch = html.match(/]*rel\s*=\s*["']canonical["'][^>]*href\s*=\s*["']([^"']*)["']/i) || html.match(/]*href\s*=\s*["']([^"']*)["'][^>]*rel\s*=\s*["']canonical["']/i); const canonical = canonicalMatch ? canonicalMatch[1] : null; const hasCanonicalMismatch = canonical && canonical !== normalizedUrl && !canonical.startsWith("/") && new URL(canonical).href !== new URL(normalizedUrl).href; // === META TAGS === const titleMatch = html.match(/]*>([\s\S]*?)<\/title>/i); const title = titleMatch ? titleMatch[1].trim() : null; const titleLength = title ? title.length : 0; const descMatch = html.match(/]*name\s*=\s*["']description["'][^>]*content\s*=\s*["']([\s\S]*?)["']/i) || html.match(/]*content\s*=\s*["']([\s\S]*?)["'][^>]*name\s*=\s*["']description["']/i); const metaDescription = descMatch ? descMatch[1].trim() : null; const descLength = metaDescription ? metaDescription.length : 0; const kwMatch = html.match(/]*name\s*=\s*["']keywords["'][^>]*content\s*=\s*["']([\s\S]*?)["']/i) || html.match(/]*content\s*=\s*["']([\s\S]*?)["'][^>]*name\s*=\s*["']keywords["']/i); const metaKeywords = kwMatch ? kwMatch[1].trim() : null; const viewportMatch = html.match(/]*name\s*=\s*["']viewport["'][^>]*content\s*=\s*["']([^"']*)["']/i); const viewport = viewportMatch ? viewportMatch[1].trim() : null; const charsetMatch = html.match(/]*charset\s*=\s*["']?([^"'\s>]+)/i) || html.match(/]*content\s*=\s*["'][^"']*charset=([^"'\s]+)/i); const charset = charsetMatch ? charsetMatch[1].trim() : null; // === OPEN GRAPH === const ogTitle = html.match(/]*property\s*=\s*["']og:title["'][^>]*content\s*=\s*["']([^"']*)["']/i); const ogDesc = html.match(/]*property\s*=\s*["']og:description["'][^>]*content\s*=\s*["']([^"']*)["']/i); const ogImage = html.match(/]*property\s*=\s*["']og:image["'][^>]*content\s*=\s*["']([^"']*)["']/i); const ogType = html.match(/]*property\s*=\s*["']og:type["'][^>]*content\s*=\s*["']([^"']*)["']/i); const ogUrl = html.match(/]*property\s*=\s*["']og:url["'][^>]*content\s*=\s*["']([^"']*)["']/i); // === TWITTER CARD === const twCard = html.match(/]*name\s*=\s*["']twitter:card["'][^>]*content\s*=\s*["']([^"']*)["']/i); const twTitle = html.match(/]*name\s*=\s*["']twitter:title["'][^>]*content\s*=\s*["']([^"']*)["']/i); const twDesc = html.match(/]*name\s*=\s*["']twitter:description["'][^>]*content\s*=\s*["']([^"']*)["']/i); // === HEADING STRUCTURE === const headings: { level: number; text: string }[] = []; const headingRegex = /]*>([\s\S]*?)<\/h[1-6]>/gi; let hMatch; while ((hMatch = headingRegex.exec(html)) !== null) { const text = hMatch[2].replace(/<[^>]*>/g, "").trim(); if (text) headings.push({ level: parseInt(hMatch[1]), text }); } const h1Count = headings.filter(h => h.level === 1).length; const h2Count = headings.filter(h => h.level === 2).length; const h3Count = headings.filter(h => h.level === 3).length; const h4Count = headings.filter(h => h.level === 4).length; const headingHierarchy = headings.map(h => ({ level: h.level, text: h.text.substring(0, 100) })); // === LINKS === const links: { href: string; text: string; internal: boolean; nofollow: boolean }[] = []; const linkRegex = /]*href\s*=\s*["']([^"']*)["'][^>]*>([\s\S]*?)<\/a>/gi; let lMatch; while ((lMatch = linkRegex.exec(html)) !== null) { const href = lMatch[1].trim(); const fullTag = lMatch[0]; const text = lMatch[2].replace(/<[^>]*>/g, "").trim().substring(0, 100); if (!href || href.startsWith("#") || href.startsWith("javascript:") || href.startsWith("mailto:")) continue; const isNofollow = /rel\s*=\s*["'][^"']*nofollow/i.test(fullTag); try { const linkDomain = new URL(href, normalizedUrl).hostname; links.push({ href, text, internal: linkDomain === baseDomain, nofollow: isNofollow }); } catch { continue; } } const internalLinks = links.filter(l => l.internal); const externalLinks = links.filter(l => !l.internal); const nofollowLinks = links.filter(l => l.nofollow); // === IMAGES === const images: { src: string; alt: string; loading?: string }[] = []; const imgRegex = /]*)\/?>/gi; let iMatch; while ((iMatch = imgRegex.exec(html)) !== null) { const attrs = iMatch[1]; const srcMatch = attrs.match(/src\s*=\s*["']([^"']*)["']/i); const altMatch = attrs.match(/alt\s*=\s*["']([^"']*)["']/i); const loadMatch = attrs.match(/loading\s*=\s*["']([^"']*)["']/i); if (srcMatch) { images.push({ src: srcMatch[1], alt: altMatch ? altMatch[1] : "", loading: loadMatch ? loadMatch[1] : undefined }); } } const imagesWithAlt = images.filter(img => img.alt && img.alt.trim().length > 0); const imagesWithoutAlt = images.filter(img => !img.alt || !img.alt.trim()); const lazyLoadedImages = images.filter(img => img.loading === "lazy"); // === CONTENT ANALYSIS === const plainText = html .replace(//gi, "") .replace(//gi, "") .replace(//gi, "") .replace(/<[^>]*>/g, " ") .replace(/ /g, " ").replace(/&/g, "&").replace(/</g, "<") .replace(/>/g, ">").replace(/"/g, '"').replace(/'/g, "'") .replace(/\s+/g, " ").trim(); const wordCount = plainText ? plainText.split(/\s+/).length : 0; const sentenceCount = plainText ? (plainText.match(/[.!?]+/g) || []).length : 0; const paragraphCount = plainText ? (plainText.match(/\n\s*\n/g) || []).length : 0; const avgWordsPerSentence = sentenceCount > 0 ? Math.round(wordCount / sentenceCount) : 0; // === STRUCTURED DATA === const sdTypes = ["Article", "BlogPosting", "FAQPage", "HowTo", "Product", "LocalBusiness", "Organization", "BreadcrumbList", "WebSite", "SearchAction", "VideoObject", "Review"]; const structuredData = sdTypes.map(sdType => ({ type: sdType, found: new RegExp('"@type"\\s*:\\s*"' + sdType + '"', "i").test(html) || new RegExp('"@type"\\s*:\\s*\\["' + sdType + '"', "i").test(html), })); const hasJsonLd = /]*type\s*=\s*["']application\/ld\+json["']/i.test(html); const hasMicrodata = /itemscope/i.test(html); // === HREFLANG === const hreflangTags: { lang: string; href: string }[] = []; const hlRegex = /]*rel\s*=\s*["']alternate["'][^>]*hreflang\s*=\s*["']([^"']*)["'][^>]*href\s*=\s*["']([^"']*)["']/i; let hlMatch; while ((hlMatch = hlRegex.exec(html)) !== null) { hreflangTags.push({ lang: hlMatch[1], href: hlMatch[2] }); } // === PERFORMANCE SIGNALS === const htmlSize = html.length; const inlineStyleCount = countOccurrences(html, /style\s*=\s*"/g); const inlineScriptCount = countOccurrences(html, /]*src\s*=/gi); const externalStylesheets = countOccurrences(html, /]*stylesheet/gi); const hasPreconnect = /]*rel\s*=\s*["']preconnect["']/i.test(html); const hasPreload = /]*rel\s*=\s*["']preload["']/i.test(html); const hasDnsPrefetch = /]*rel\s*=\s*["']dns-prefetch["']/i.test(html); const usesAsyncScripts = /async\s*=/.test(html); const usesDeferScripts = /defer\s*=/.test(html); // === ACCESSIBILITY === const hasLangAttr = /]*lang\s*=/i.test(html); const hasAriaLabels = /aria-label|aria-labelledby|aria-describedby/i.test(html); // === SCORE CALCULATION === let score = 100; const issues: { severity: "critical" | "warning" | "info"; category: string; message: string }[] = []; if (!title) { score -= 10; issues.push({ severity: "critical", category: "Meta", message: "Missing title tag" }); } else if (titleLength > 60) { score -= 3; issues.push({ severity: "warning", category: "Meta", message: "Title too long (" + titleLength + " chars, max 60)" }); } if (!metaDescription) { score -= 10; issues.push({ severity: "critical", category: "Meta", message: "Missing meta description" }); } else if (descLength > 160) { score -= 3; issues.push({ severity: "warning", category: "Meta", message: "Meta description too long (" + descLength + " chars, max 160)" }); } if (h1Count === 0) { score -= 10; issues.push({ severity: "critical", category: "Content", message: "Missing H1 heading" }); } if (h1Count > 1) { score -= 5; issues.push({ severity: "critical", category: "Content", message: "Multiple H1 tags (" + h1Count + " found)" }); } if (!viewport) { score -= 10; issues.push({ severity: "critical", category: "Mobile", message: "Missing viewport meta tag" }); } if (!isHttps) { score -= 10; issues.push({ severity: "critical", category: "Security", message: "Not using HTTPS" }); } if (imagesWithoutAlt.length > 0) { score -= 5; issues.push({ severity: "warning", category: "Accessibility", message: imagesWithoutAlt.length + " images missing alt text" }); } if (!canonical) { score -= 3; issues.push({ severity: "warning", category: "Technical", message: "Missing canonical tag" }); } if (hasCanonicalMismatch) { score -= 5; issues.push({ severity: "warning", category: "Technical", message: "Canonical URL mismatch" }); } if (inlineStyleCount > 10) { score -= 3; issues.push({ severity: "warning", category: "Performance", message: inlineStyleCount + " inline styles detected" }); } if (!hasPreconnect && externalScripts > 3) { score -= 3; issues.push({ severity: "warning", category: "Performance", message: "Missing preconnect hints for external resources" }); } if (wordCount < 300 && wordCount > 0) { score -= 3; issues.push({ severity: "warning", category: "Content", message: "Thin content (" + wordCount + " words)" }); } if (!ogTitle && !ogDesc) { score -= 3; issues.push({ severity: "warning", category: "Social", message: "Missing Open Graph tags" }); } if (!twCard) { score -= 2; issues.push({ severity: "warning", category: "Social", message: "Missing Twitter Card tags" }); } if (externalLinks.length === 0) { score -= 2; issues.push({ severity: "warning", category: "Links", message: "No external links found" }); } if (robotsDirectives && /noindex/i.test(robotsDirectives)) { score -= 10; issues.push({ severity: "critical", category: "Technical", message: "Page has noindex directive" }); } if (!hasJsonLd && !hasMicrodata) { score -= 1; issues.push({ severity: "info", category: "Structured Data", message: "No structured data found" }); } if (!hasLangAttr) { score -= 1; issues.push({ severity: "info", category: "Accessibility", message: "Missing html lang attribute" }); } if (!lazyLoadedImages.length && images.length > 5) { score -= 2; issues.push({ severity: "info", category: "Performance", message: "Consider lazy loading for images" }); } score = Math.max(0, Math.min(100, score)); const technicalScore = Math.min(100, 100 - issues.filter(i => i.category === "Technical" || i.category === "Security").reduce((s, i) => s + (i.severity === "critical" ? 15 : i.severity === "warning" ? 7 : 2), 0)); const contentScore = Math.min(100, 100 - issues.filter(i => i.category === "Content").reduce((s, i) => s + (i.severity === "critical" ? 15 : i.severity === "warning" ? 7 : 2), 0)); const performanceScore = Math.min(100, 100 - issues.filter(i => i.category === "Performance" || i.category === "Mobile").reduce((s, i) => s + (i.severity === "critical" ? 15 : i.severity === "warning" ? 7 : 2), 0)); const socialScore = Math.min(100, 100 - issues.filter(i => i.category === "Social" || i.category === "Structured Data").reduce((s, i) => s + (i.severity === "critical" ? 15 : i.severity === "warning" ? 7 : 2), 0)); return NextResponse.json({ url: normalizedUrl, domain: baseDomain, protocol: isHttps ? "HTTPS" : "HTTP", responseTime, server, htmlSize, title, titleLength, titleStatus: !title ? "missing" : titleLength > 60 ? "too_long" : "good", metaDescription, descLength, descStatus: !metaDescription ? "missing" : descLength > 160 ? "too_long" : "good", metaKeywords, viewport, charset, robotsDirectives, canonical, hasCanonicalMismatch, xFrameOptions, openGraph: { title: ogTitle ? ogTitle[1] : null, description: ogDesc ? ogDesc[1] : null, image: ogImage ? ogImage[1] : null, type: ogType ? ogType[1] : null, url: ogUrl ? ogUrl[1] : null }, twitterCard: { card: twCard ? twCard[1] : null, title: twTitle ? twTitle[1] : null, description: twDesc ? twDesc[1] : null }, headings: headingHierarchy, h1Count, h2Count, h3Count, h4Count, headingStatus: h1Count === 0 ? "missing_h1" : h1Count > 1 ? "multiple_h1" : "good", links: { total: links.length, internal: internalLinks.length, external: externalLinks.length, nofollow: nofollowLinks.length, sampleExternal: externalLinks.slice(0, 20).map(l => ({ href: l.href, text: l.text, nofollow: l.nofollow })) }, images: { total: images.length, withAlt: imagesWithAlt.length, withoutAlt: imagesWithoutAlt.length, lazyLoaded: lazyLoadedImages.length, altCoverage: images.length > 0 ? Math.round((imagesWithAlt.length / images.length) * 100) : 100, sampleWithoutAlt: imagesWithoutAlt.slice(0, 10).map(img => img.src) }, content: { wordCount, sentenceCount, paragraphCount, avgWordsPerSentence, textPreview: plainText.substring(0, 2000) }, structuredData: { hasJsonLd, hasMicrodata, types: structuredData }, hreflang: hreflangTags, performance: { inlineStyles: inlineStyleCount, inlineScripts: inlineScriptCount, externalScripts, externalStylesheets, hasPreconnect, hasPreload, hasDnsPrefetch, usesAsyncScripts, usesDeferScripts, contentEncoding }, accessibility: { hasLangAttr, hasAriaLabels, hasAltOnFirstImage: images.length > 0 && images[0].alt && images[0].alt.trim().length > 0 }, scores: { overall: score, technical: technicalScore, content: contentScore, performance: performanceScore, social: socialScore }, issues, }); } catch (error) { const msg = error instanceof Error ? error.message : "Fetch failed"; return NextResponse.json({ error: msg }, { status: 500 }); } }