Files
PromptArch/app/api/fetch-url/route.ts

270 lines
18 KiB
TypeScript

/**
* Next.js API route: Fetch URL content for comprehensive SEO/GEO auditing.
* Endpoint: GET /api/fetch-url?url=https://example.com
* Returns: Full SEO audit data (technical, content, performance signals, accessibility)
*/
import { NextRequest, NextResponse } from "next/server";
function countOccurrences(text: string, regex: RegExp): number {
return (text.match(regex) || []).length;
}
export async function GET(request: NextRequest) {
const targetUrl = request.nextUrl.searchParams.get("url");
if (!targetUrl) {
return NextResponse.json({ error: "URL parameter required" }, { status: 400 });
}
let normalizedUrl = targetUrl;
if (!normalizedUrl.startsWith("http")) normalizedUrl = "https://" + normalizedUrl;
try {
new URL(normalizedUrl);
} catch {
return NextResponse.json({ error: "Invalid URL" }, { status: 400 });
}
const startTime = Date.now();
try {
const controller = new AbortController();
const timeout = setTimeout(() => controller.abort(), 15000);
const res = await fetch(normalizedUrl, {
signal: controller.signal,
headers: {
"User-Agent": "Mozilla/5.0 (compatible; PromptArch-SEOAudit/1.6; +https://rommark.dev)",
Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.9",
},
});
clearTimeout(timeout);
const responseTime = Date.now() - startTime;
if (!res.ok) {
return NextResponse.json({ error: `HTTP ${res.status}`, status: res.status, url: normalizedUrl, responseTime });
}
const html = await res.text();
const urlObj = new URL(normalizedUrl);
const baseDomain = urlObj.hostname;
// === HTTP HEADERS ===
const headers: Record<string, string> = {};
res.headers.forEach((value, key) => { headers[key.toLowerCase()] = value; });
const isHttps = normalizedUrl.startsWith("https://");
const server = headers["server"] || "Unknown";
const xFrameOptions = headers["x-frame-options"] || null;
const contentEncoding = headers["content-encoding"] || "";
// === ROBOTS & CANONICAL ===
const robotsMeta = html.match(/<meta[^>]*name\s*=\s*["']robots["'][^>]*content\s*=\s*["']([\s\S]*?)["']/i)
|| html.match(/<meta[^>]*content\s*=\s*["']([\s\S]*?)["'][^>]*name\s*=\s*["']robots["']/i);
const robotsDirectives = robotsMeta ? robotsMeta[1].trim() : null;
const canonicalMatch = html.match(/<link[^>]*rel\s*=\s*["']canonical["'][^>]*href\s*=\s*["']([^"']*)["']/i)
|| html.match(/<link[^>]*href\s*=\s*["']([^"']*)["'][^>]*rel\s*=\s*["']canonical["']/i);
const canonical = canonicalMatch ? canonicalMatch[1] : null;
const hasCanonicalMismatch = canonical && canonical !== normalizedUrl && !canonical.startsWith("/") && new URL(canonical).href !== new URL(normalizedUrl).href;
// === META TAGS ===
const titleMatch = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
const title = titleMatch ? titleMatch[1].trim() : null;
const titleLength = title ? title.length : 0;
const descMatch = html.match(/<meta[^>]*name\s*=\s*["']description["'][^>]*content\s*=\s*["']([\s\S]*?)["']/i)
|| html.match(/<meta[^>]*content\s*=\s*["']([\s\S]*?)["'][^>]*name\s*=\s*["']description["']/i);
const metaDescription = descMatch ? descMatch[1].trim() : null;
const descLength = metaDescription ? metaDescription.length : 0;
const kwMatch = html.match(/<meta[^>]*name\s*=\s*["']keywords["'][^>]*content\s*=\s*["']([\s\S]*?)["']/i)
|| html.match(/<meta[^>]*content\s*=\s*["']([\s\S]*?)["'][^>]*name\s*=\s*["']keywords["']/i);
const metaKeywords = kwMatch ? kwMatch[1].trim() : null;
const viewportMatch = html.match(/<meta[^>]*name\s*=\s*["']viewport["'][^>]*content\s*=\s*["']([^"']*)["']/i);
const viewport = viewportMatch ? viewportMatch[1].trim() : null;
const charsetMatch = html.match(/<meta[^>]*charset\s*=\s*["']?([^"'\s>]+)/i)
|| html.match(/<meta[^>]*content\s*=\s*["'][^"']*charset=([^"'\s]+)/i);
const charset = charsetMatch ? charsetMatch[1].trim() : null;
// === OPEN GRAPH ===
const ogTitle = html.match(/<meta[^>]*property\s*=\s*["']og:title["'][^>]*content\s*=\s*["']([^"']*)["']/i);
const ogDesc = html.match(/<meta[^>]*property\s*=\s*["']og:description["'][^>]*content\s*=\s*["']([^"']*)["']/i);
const ogImage = html.match(/<meta[^>]*property\s*=\s*["']og:image["'][^>]*content\s*=\s*["']([^"']*)["']/i);
const ogType = html.match(/<meta[^>]*property\s*=\s*["']og:type["'][^>]*content\s*=\s*["']([^"']*)["']/i);
const ogUrl = html.match(/<meta[^>]*property\s*=\s*["']og:url["'][^>]*content\s*=\s*["']([^"']*)["']/i);
// === TWITTER CARD ===
const twCard = html.match(/<meta[^>]*name\s*=\s*["']twitter:card["'][^>]*content\s*=\s*["']([^"']*)["']/i);
const twTitle = html.match(/<meta[^>]*name\s*=\s*["']twitter:title["'][^>]*content\s*=\s*["']([^"']*)["']/i);
const twDesc = html.match(/<meta[^>]*name\s*=\s*["']twitter:description["'][^>]*content\s*=\s*["']([^"']*)["']/i);
// === HEADING STRUCTURE ===
const headings: { level: number; text: string }[] = [];
const headingRegex = /<h([1-6])[^>]*>([\s\S]*?)<\/h[1-6]>/gi;
let hMatch;
while ((hMatch = headingRegex.exec(html)) !== null) {
const text = hMatch[2].replace(/<[^>]*>/g, "").trim();
if (text) headings.push({ level: parseInt(hMatch[1]), text });
}
const h1Count = headings.filter(h => h.level === 1).length;
const h2Count = headings.filter(h => h.level === 2).length;
const h3Count = headings.filter(h => h.level === 3).length;
const h4Count = headings.filter(h => h.level === 4).length;
const headingHierarchy = headings.map(h => ({ level: h.level, text: h.text.substring(0, 100) }));
// === LINKS ===
const links: { href: string; text: string; internal: boolean; nofollow: boolean }[] = [];
const linkRegex = /<a[^>]*href\s*=\s*["']([^"']*)["'][^>]*>([\s\S]*?)<\/a>/gi;
let lMatch;
while ((lMatch = linkRegex.exec(html)) !== null) {
const href = lMatch[1].trim();
const fullTag = lMatch[0];
const text = lMatch[2].replace(/<[^>]*>/g, "").trim().substring(0, 100);
if (!href || href.startsWith("#") || href.startsWith("javascript:") || href.startsWith("mailto:")) continue;
const isNofollow = /rel\s*=\s*["'][^"']*nofollow/i.test(fullTag);
try {
const linkDomain = new URL(href, normalizedUrl).hostname;
links.push({ href, text, internal: linkDomain === baseDomain, nofollow: isNofollow });
} catch { continue; }
}
const internalLinks = links.filter(l => l.internal);
const externalLinks = links.filter(l => !l.internal);
const nofollowLinks = links.filter(l => l.nofollow);
// === IMAGES ===
const images: { src: string; alt: string; loading?: string }[] = [];
const imgRegex = /<img([^>]*)\/?>/gi;
let iMatch;
while ((iMatch = imgRegex.exec(html)) !== null) {
const attrs = iMatch[1];
const srcMatch = attrs.match(/src\s*=\s*["']([^"']*)["']/i);
const altMatch = attrs.match(/alt\s*=\s*["']([^"']*)["']/i);
const loadMatch = attrs.match(/loading\s*=\s*["']([^"']*)["']/i);
if (srcMatch) {
images.push({ src: srcMatch[1], alt: altMatch ? altMatch[1] : "", loading: loadMatch ? loadMatch[1] : undefined });
}
}
const imagesWithAlt = images.filter(img => img.alt && img.alt.trim().length > 0);
const imagesWithoutAlt = images.filter(img => !img.alt || !img.alt.trim());
const lazyLoadedImages = images.filter(img => img.loading === "lazy");
// === CONTENT ANALYSIS ===
const plainText = html
.replace(/<script[\s\S]*?<\/script>/gi, "")
.replace(/<style[\s\S]*?<\/style>/gi, "")
.replace(/<noscript[\s\S]*?<\/noscript>/gi, "")
.replace(/<[^>]*>/g, " ")
.replace(/&nbsp;/g, " ").replace(/&amp;/g, "&").replace(/&lt;/g, "<")
.replace(/&gt;/g, ">").replace(/&quot;/g, '"').replace(/&#39;/g, "'")
.replace(/\s+/g, " ").trim();
const wordCount = plainText ? plainText.split(/\s+/).length : 0;
const sentenceCount = plainText ? (plainText.match(/[.!?]+/g) || []).length : 0;
const paragraphCount = plainText ? (plainText.match(/\n\s*\n/g) || []).length : 0;
const avgWordsPerSentence = sentenceCount > 0 ? Math.round(wordCount / sentenceCount) : 0;
// === STRUCTURED DATA ===
const sdTypes = ["Article", "BlogPosting", "FAQPage", "HowTo", "Product", "LocalBusiness", "Organization", "BreadcrumbList", "WebSite", "SearchAction", "VideoObject", "Review"];
const structuredData = sdTypes.map(sdType => ({
type: sdType,
found: new RegExp('"@type"\\s*:\\s*"' + sdType + '"', "i").test(html)
|| new RegExp('"@type"\\s*:\\s*\\["' + sdType + '"', "i").test(html),
}));
const hasJsonLd = /<script[^>]*type\s*=\s*["']application\/ld\+json["']/i.test(html);
const hasMicrodata = /itemscope/i.test(html);
// === HREFLANG ===
const hreflangTags: { lang: string; href: string }[] = [];
const hlRegex = /<link[^>]*rel\s*=\s*["']alternate["'][^>]*hreflang\s*=\s*["']([^"']*)["'][^>]*href\s*=\s*["']([^"']*)["']/i;
let hlMatch;
while ((hlMatch = hlRegex.exec(html)) !== null) {
hreflangTags.push({ lang: hlMatch[1], href: hlMatch[2] });
}
// === PERFORMANCE SIGNALS ===
const htmlSize = html.length;
const inlineStyleCount = countOccurrences(html, /style\s*=\s*"/g);
const inlineScriptCount = countOccurrences(html, /<script(?!.*src)/gi);
const externalScripts = countOccurrences(html, /<script[^>]*src\s*=/gi);
const externalStylesheets = countOccurrences(html, /<link[^>]*stylesheet/gi);
const hasPreconnect = /<link[^>]*rel\s*=\s*["']preconnect["']/i.test(html);
const hasPreload = /<link[^>]*rel\s*=\s*["']preload["']/i.test(html);
const hasDnsPrefetch = /<link[^>]*rel\s*=\s*["']dns-prefetch["']/i.test(html);
const usesAsyncScripts = /async\s*=/.test(html);
const usesDeferScripts = /defer\s*=/.test(html);
// === ACCESSIBILITY ===
const hasLangAttr = /<html[^>]*lang\s*=/i.test(html);
const hasAriaLabels = /aria-label|aria-labelledby|aria-describedby/i.test(html);
// === SCORE CALCULATION ===
let score = 100;
const issues: { severity: "critical" | "warning" | "info"; category: string; message: string }[] = [];
if (!title) { score -= 10; issues.push({ severity: "critical", category: "Meta", message: "Missing title tag" }); }
else if (titleLength > 60) { score -= 3; issues.push({ severity: "warning", category: "Meta", message: "Title too long (" + titleLength + " chars, max 60)" }); }
if (!metaDescription) { score -= 10; issues.push({ severity: "critical", category: "Meta", message: "Missing meta description" }); }
else if (descLength > 160) { score -= 3; issues.push({ severity: "warning", category: "Meta", message: "Meta description too long (" + descLength + " chars, max 160)" }); }
if (h1Count === 0) { score -= 10; issues.push({ severity: "critical", category: "Content", message: "Missing H1 heading" }); }
if (h1Count > 1) { score -= 5; issues.push({ severity: "critical", category: "Content", message: "Multiple H1 tags (" + h1Count + " found)" }); }
if (!viewport) { score -= 10; issues.push({ severity: "critical", category: "Mobile", message: "Missing viewport meta tag" }); }
if (!isHttps) { score -= 10; issues.push({ severity: "critical", category: "Security", message: "Not using HTTPS" }); }
if (imagesWithoutAlt.length > 0) { score -= 5; issues.push({ severity: "warning", category: "Accessibility", message: imagesWithoutAlt.length + " images missing alt text" }); }
if (!canonical) { score -= 3; issues.push({ severity: "warning", category: "Technical", message: "Missing canonical tag" }); }
if (hasCanonicalMismatch) { score -= 5; issues.push({ severity: "warning", category: "Technical", message: "Canonical URL mismatch" }); }
if (inlineStyleCount > 10) { score -= 3; issues.push({ severity: "warning", category: "Performance", message: inlineStyleCount + " inline styles detected" }); }
if (!hasPreconnect && externalScripts > 3) { score -= 3; issues.push({ severity: "warning", category: "Performance", message: "Missing preconnect hints for external resources" }); }
if (wordCount < 300 && wordCount > 0) { score -= 3; issues.push({ severity: "warning", category: "Content", message: "Thin content (" + wordCount + " words)" }); }
if (!ogTitle && !ogDesc) { score -= 3; issues.push({ severity: "warning", category: "Social", message: "Missing Open Graph tags" }); }
if (!twCard) { score -= 2; issues.push({ severity: "warning", category: "Social", message: "Missing Twitter Card tags" }); }
if (externalLinks.length === 0) { score -= 2; issues.push({ severity: "warning", category: "Links", message: "No external links found" }); }
if (robotsDirectives && /noindex/i.test(robotsDirectives)) { score -= 10; issues.push({ severity: "critical", category: "Technical", message: "Page has noindex directive" }); }
if (!hasJsonLd && !hasMicrodata) { score -= 1; issues.push({ severity: "info", category: "Structured Data", message: "No structured data found" }); }
if (!hasLangAttr) { score -= 1; issues.push({ severity: "info", category: "Accessibility", message: "Missing html lang attribute" }); }
if (!lazyLoadedImages.length && images.length > 5) { score -= 2; issues.push({ severity: "info", category: "Performance", message: "Consider lazy loading for images" }); }
score = Math.max(0, Math.min(100, score));
const technicalScore = Math.min(100, 100 - issues.filter(i => i.category === "Technical" || i.category === "Security").reduce((s, i) => s + (i.severity === "critical" ? 15 : i.severity === "warning" ? 7 : 2), 0));
const contentScore = Math.min(100, 100 - issues.filter(i => i.category === "Content").reduce((s, i) => s + (i.severity === "critical" ? 15 : i.severity === "warning" ? 7 : 2), 0));
const performanceScore = Math.min(100, 100 - issues.filter(i => i.category === "Performance" || i.category === "Mobile").reduce((s, i) => s + (i.severity === "critical" ? 15 : i.severity === "warning" ? 7 : 2), 0));
const socialScore = Math.min(100, 100 - issues.filter(i => i.category === "Social" || i.category === "Structured Data").reduce((s, i) => s + (i.severity === "critical" ? 15 : i.severity === "warning" ? 7 : 2), 0));
return NextResponse.json({
url: normalizedUrl,
domain: baseDomain,
protocol: isHttps ? "HTTPS" : "HTTP",
responseTime,
server,
htmlSize,
title, titleLength,
titleStatus: !title ? "missing" : titleLength > 60 ? "too_long" : "good",
metaDescription, descLength,
descStatus: !metaDescription ? "missing" : descLength > 160 ? "too_long" : "good",
metaKeywords, viewport, charset, robotsDirectives,
canonical, hasCanonicalMismatch, xFrameOptions,
openGraph: { title: ogTitle ? ogTitle[1] : null, description: ogDesc ? ogDesc[1] : null, image: ogImage ? ogImage[1] : null, type: ogType ? ogType[1] : null, url: ogUrl ? ogUrl[1] : null },
twitterCard: { card: twCard ? twCard[1] : null, title: twTitle ? twTitle[1] : null, description: twDesc ? twDesc[1] : null },
headings: headingHierarchy, h1Count, h2Count, h3Count, h4Count,
headingStatus: h1Count === 0 ? "missing_h1" : h1Count > 1 ? "multiple_h1" : "good",
links: { total: links.length, internal: internalLinks.length, external: externalLinks.length, nofollow: nofollowLinks.length, sampleExternal: externalLinks.slice(0, 20).map(l => ({ href: l.href, text: l.text, nofollow: l.nofollow })) },
images: { total: images.length, withAlt: imagesWithAlt.length, withoutAlt: imagesWithoutAlt.length, lazyLoaded: lazyLoadedImages.length, altCoverage: images.length > 0 ? Math.round((imagesWithAlt.length / images.length) * 100) : 100, sampleWithoutAlt: imagesWithoutAlt.slice(0, 10).map(img => img.src) },
content: { wordCount, sentenceCount, paragraphCount, avgWordsPerSentence, textPreview: plainText.substring(0, 2000) },
structuredData: { hasJsonLd, hasMicrodata, types: structuredData },
hreflang: hreflangTags,
performance: { inlineStyles: inlineStyleCount, inlineScripts: inlineScriptCount, externalScripts, externalStylesheets, hasPreconnect, hasPreload, hasDnsPrefetch, usesAsyncScripts, usesDeferScripts, contentEncoding },
accessibility: { hasLangAttr, hasAriaLabels, hasAltOnFirstImage: images.length > 0 && images[0].alt && images[0].alt.trim().length > 0 },
scores: { overall: score, technical: technicalScore, content: contentScore, performance: performanceScore, social: socialScore },
issues,
});
} catch (error) {
const msg = error instanceof Error ? error.message : "Fetch failed";
return NextResponse.json({ error: msg }, { status: 500 });
}
}