270 lines
18 KiB
TypeScript
270 lines
18 KiB
TypeScript
/**
|
|
* Next.js API route: Fetch URL content for comprehensive SEO/GEO auditing.
|
|
* Endpoint: GET /api/fetch-url?url=https://example.com
|
|
* Returns: Full SEO audit data (technical, content, performance signals, accessibility)
|
|
*/
|
|
|
|
import { NextRequest, NextResponse } from "next/server";
|
|
|
|
function countOccurrences(text: string, regex: RegExp): number {
|
|
return (text.match(regex) || []).length;
|
|
}
|
|
|
|
export async function GET(request: NextRequest) {
|
|
const targetUrl = request.nextUrl.searchParams.get("url");
|
|
|
|
if (!targetUrl) {
|
|
return NextResponse.json({ error: "URL parameter required" }, { status: 400 });
|
|
}
|
|
|
|
let normalizedUrl = targetUrl;
|
|
if (!normalizedUrl.startsWith("http")) normalizedUrl = "https://" + normalizedUrl;
|
|
|
|
try {
|
|
new URL(normalizedUrl);
|
|
} catch {
|
|
return NextResponse.json({ error: "Invalid URL" }, { status: 400 });
|
|
}
|
|
|
|
const startTime = Date.now();
|
|
|
|
try {
|
|
const controller = new AbortController();
|
|
const timeout = setTimeout(() => controller.abort(), 15000);
|
|
|
|
const res = await fetch(normalizedUrl, {
|
|
signal: controller.signal,
|
|
headers: {
|
|
"User-Agent": "Mozilla/5.0 (compatible; PromptArch-SEOAudit/1.6; +https://rommark.dev)",
|
|
Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
|
"Accept-Language": "en-US,en;q=0.9",
|
|
},
|
|
});
|
|
clearTimeout(timeout);
|
|
|
|
const responseTime = Date.now() - startTime;
|
|
|
|
if (!res.ok) {
|
|
return NextResponse.json({ error: `HTTP ${res.status}`, status: res.status, url: normalizedUrl, responseTime });
|
|
}
|
|
|
|
const html = await res.text();
|
|
const urlObj = new URL(normalizedUrl);
|
|
const baseDomain = urlObj.hostname;
|
|
|
|
// === HTTP HEADERS ===
|
|
const headers: Record<string, string> = {};
|
|
res.headers.forEach((value, key) => { headers[key.toLowerCase()] = value; });
|
|
const isHttps = normalizedUrl.startsWith("https://");
|
|
const server = headers["server"] || "Unknown";
|
|
const xFrameOptions = headers["x-frame-options"] || null;
|
|
const contentEncoding = headers["content-encoding"] || "";
|
|
|
|
// === ROBOTS & CANONICAL ===
|
|
const robotsMeta = html.match(/<meta[^>]*name\s*=\s*["']robots["'][^>]*content\s*=\s*["']([\s\S]*?)["']/i)
|
|
|| html.match(/<meta[^>]*content\s*=\s*["']([\s\S]*?)["'][^>]*name\s*=\s*["']robots["']/i);
|
|
const robotsDirectives = robotsMeta ? robotsMeta[1].trim() : null;
|
|
|
|
const canonicalMatch = html.match(/<link[^>]*rel\s*=\s*["']canonical["'][^>]*href\s*=\s*["']([^"']*)["']/i)
|
|
|| html.match(/<link[^>]*href\s*=\s*["']([^"']*)["'][^>]*rel\s*=\s*["']canonical["']/i);
|
|
const canonical = canonicalMatch ? canonicalMatch[1] : null;
|
|
const hasCanonicalMismatch = canonical && canonical !== normalizedUrl && !canonical.startsWith("/") && new URL(canonical).href !== new URL(normalizedUrl).href;
|
|
|
|
// === META TAGS ===
|
|
const titleMatch = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
|
|
const title = titleMatch ? titleMatch[1].trim() : null;
|
|
const titleLength = title ? title.length : 0;
|
|
|
|
const descMatch = html.match(/<meta[^>]*name\s*=\s*["']description["'][^>]*content\s*=\s*["']([\s\S]*?)["']/i)
|
|
|| html.match(/<meta[^>]*content\s*=\s*["']([\s\S]*?)["'][^>]*name\s*=\s*["']description["']/i);
|
|
const metaDescription = descMatch ? descMatch[1].trim() : null;
|
|
const descLength = metaDescription ? metaDescription.length : 0;
|
|
|
|
const kwMatch = html.match(/<meta[^>]*name\s*=\s*["']keywords["'][^>]*content\s*=\s*["']([\s\S]*?)["']/i)
|
|
|| html.match(/<meta[^>]*content\s*=\s*["']([\s\S]*?)["'][^>]*name\s*=\s*["']keywords["']/i);
|
|
const metaKeywords = kwMatch ? kwMatch[1].trim() : null;
|
|
|
|
const viewportMatch = html.match(/<meta[^>]*name\s*=\s*["']viewport["'][^>]*content\s*=\s*["']([^"']*)["']/i);
|
|
const viewport = viewportMatch ? viewportMatch[1].trim() : null;
|
|
|
|
const charsetMatch = html.match(/<meta[^>]*charset\s*=\s*["']?([^"'\s>]+)/i)
|
|
|| html.match(/<meta[^>]*content\s*=\s*["'][^"']*charset=([^"'\s]+)/i);
|
|
const charset = charsetMatch ? charsetMatch[1].trim() : null;
|
|
|
|
// === OPEN GRAPH ===
|
|
const ogTitle = html.match(/<meta[^>]*property\s*=\s*["']og:title["'][^>]*content\s*=\s*["']([^"']*)["']/i);
|
|
const ogDesc = html.match(/<meta[^>]*property\s*=\s*["']og:description["'][^>]*content\s*=\s*["']([^"']*)["']/i);
|
|
const ogImage = html.match(/<meta[^>]*property\s*=\s*["']og:image["'][^>]*content\s*=\s*["']([^"']*)["']/i);
|
|
const ogType = html.match(/<meta[^>]*property\s*=\s*["']og:type["'][^>]*content\s*=\s*["']([^"']*)["']/i);
|
|
const ogUrl = html.match(/<meta[^>]*property\s*=\s*["']og:url["'][^>]*content\s*=\s*["']([^"']*)["']/i);
|
|
|
|
// === TWITTER CARD ===
|
|
const twCard = html.match(/<meta[^>]*name\s*=\s*["']twitter:card["'][^>]*content\s*=\s*["']([^"']*)["']/i);
|
|
const twTitle = html.match(/<meta[^>]*name\s*=\s*["']twitter:title["'][^>]*content\s*=\s*["']([^"']*)["']/i);
|
|
const twDesc = html.match(/<meta[^>]*name\s*=\s*["']twitter:description["'][^>]*content\s*=\s*["']([^"']*)["']/i);
|
|
|
|
// === HEADING STRUCTURE ===
|
|
const headings: { level: number; text: string }[] = [];
|
|
const headingRegex = /<h([1-6])[^>]*>([\s\S]*?)<\/h[1-6]>/gi;
|
|
let hMatch;
|
|
while ((hMatch = headingRegex.exec(html)) !== null) {
|
|
const text = hMatch[2].replace(/<[^>]*>/g, "").trim();
|
|
if (text) headings.push({ level: parseInt(hMatch[1]), text });
|
|
}
|
|
const h1Count = headings.filter(h => h.level === 1).length;
|
|
const h2Count = headings.filter(h => h.level === 2).length;
|
|
const h3Count = headings.filter(h => h.level === 3).length;
|
|
const h4Count = headings.filter(h => h.level === 4).length;
|
|
const headingHierarchy = headings.map(h => ({ level: h.level, text: h.text.substring(0, 100) }));
|
|
|
|
// === LINKS ===
|
|
const links: { href: string; text: string; internal: boolean; nofollow: boolean }[] = [];
|
|
const linkRegex = /<a[^>]*href\s*=\s*["']([^"']*)["'][^>]*>([\s\S]*?)<\/a>/gi;
|
|
let lMatch;
|
|
while ((lMatch = linkRegex.exec(html)) !== null) {
|
|
const href = lMatch[1].trim();
|
|
const fullTag = lMatch[0];
|
|
const text = lMatch[2].replace(/<[^>]*>/g, "").trim().substring(0, 100);
|
|
if (!href || href.startsWith("#") || href.startsWith("javascript:") || href.startsWith("mailto:")) continue;
|
|
const isNofollow = /rel\s*=\s*["'][^"']*nofollow/i.test(fullTag);
|
|
try {
|
|
const linkDomain = new URL(href, normalizedUrl).hostname;
|
|
links.push({ href, text, internal: linkDomain === baseDomain, nofollow: isNofollow });
|
|
} catch { continue; }
|
|
}
|
|
const internalLinks = links.filter(l => l.internal);
|
|
const externalLinks = links.filter(l => !l.internal);
|
|
const nofollowLinks = links.filter(l => l.nofollow);
|
|
|
|
// === IMAGES ===
|
|
const images: { src: string; alt: string; loading?: string }[] = [];
|
|
const imgRegex = /<img([^>]*)\/?>/gi;
|
|
let iMatch;
|
|
while ((iMatch = imgRegex.exec(html)) !== null) {
|
|
const attrs = iMatch[1];
|
|
const srcMatch = attrs.match(/src\s*=\s*["']([^"']*)["']/i);
|
|
const altMatch = attrs.match(/alt\s*=\s*["']([^"']*)["']/i);
|
|
const loadMatch = attrs.match(/loading\s*=\s*["']([^"']*)["']/i);
|
|
if (srcMatch) {
|
|
images.push({ src: srcMatch[1], alt: altMatch ? altMatch[1] : "", loading: loadMatch ? loadMatch[1] : undefined });
|
|
}
|
|
}
|
|
const imagesWithAlt = images.filter(img => img.alt && img.alt.trim().length > 0);
|
|
const imagesWithoutAlt = images.filter(img => !img.alt || !img.alt.trim());
|
|
const lazyLoadedImages = images.filter(img => img.loading === "lazy");
|
|
|
|
// === CONTENT ANALYSIS ===
|
|
const plainText = html
|
|
.replace(/<script[\s\S]*?<\/script>/gi, "")
|
|
.replace(/<style[\s\S]*?<\/style>/gi, "")
|
|
.replace(/<noscript[\s\S]*?<\/noscript>/gi, "")
|
|
.replace(/<[^>]*>/g, " ")
|
|
.replace(/ /g, " ").replace(/&/g, "&").replace(/</g, "<")
|
|
.replace(/>/g, ">").replace(/"/g, '"').replace(/'/g, "'")
|
|
.replace(/\s+/g, " ").trim();
|
|
|
|
const wordCount = plainText ? plainText.split(/\s+/).length : 0;
|
|
const sentenceCount = plainText ? (plainText.match(/[.!?]+/g) || []).length : 0;
|
|
const paragraphCount = plainText ? (plainText.match(/\n\s*\n/g) || []).length : 0;
|
|
const avgWordsPerSentence = sentenceCount > 0 ? Math.round(wordCount / sentenceCount) : 0;
|
|
|
|
// === STRUCTURED DATA ===
|
|
const sdTypes = ["Article", "BlogPosting", "FAQPage", "HowTo", "Product", "LocalBusiness", "Organization", "BreadcrumbList", "WebSite", "SearchAction", "VideoObject", "Review"];
|
|
const structuredData = sdTypes.map(sdType => ({
|
|
type: sdType,
|
|
found: new RegExp('"@type"\\s*:\\s*"' + sdType + '"', "i").test(html)
|
|
|| new RegExp('"@type"\\s*:\\s*\\["' + sdType + '"', "i").test(html),
|
|
}));
|
|
const hasJsonLd = /<script[^>]*type\s*=\s*["']application\/ld\+json["']/i.test(html);
|
|
const hasMicrodata = /itemscope/i.test(html);
|
|
|
|
// === HREFLANG ===
|
|
const hreflangTags: { lang: string; href: string }[] = [];
|
|
const hlRegex = /<link[^>]*rel\s*=\s*["']alternate["'][^>]*hreflang\s*=\s*["']([^"']*)["'][^>]*href\s*=\s*["']([^"']*)["']/i;
|
|
let hlMatch;
|
|
while ((hlMatch = hlRegex.exec(html)) !== null) {
|
|
hreflangTags.push({ lang: hlMatch[1], href: hlMatch[2] });
|
|
}
|
|
|
|
// === PERFORMANCE SIGNALS ===
|
|
const htmlSize = html.length;
|
|
const inlineStyleCount = countOccurrences(html, /style\s*=\s*"/g);
|
|
const inlineScriptCount = countOccurrences(html, /<script(?!.*src)/gi);
|
|
const externalScripts = countOccurrences(html, /<script[^>]*src\s*=/gi);
|
|
const externalStylesheets = countOccurrences(html, /<link[^>]*stylesheet/gi);
|
|
const hasPreconnect = /<link[^>]*rel\s*=\s*["']preconnect["']/i.test(html);
|
|
const hasPreload = /<link[^>]*rel\s*=\s*["']preload["']/i.test(html);
|
|
const hasDnsPrefetch = /<link[^>]*rel\s*=\s*["']dns-prefetch["']/i.test(html);
|
|
const usesAsyncScripts = /async\s*=/.test(html);
|
|
const usesDeferScripts = /defer\s*=/.test(html);
|
|
|
|
// === ACCESSIBILITY ===
|
|
const hasLangAttr = /<html[^>]*lang\s*=/i.test(html);
|
|
const hasAriaLabels = /aria-label|aria-labelledby|aria-describedby/i.test(html);
|
|
|
|
// === SCORE CALCULATION ===
|
|
let score = 100;
|
|
const issues: { severity: "critical" | "warning" | "info"; category: string; message: string }[] = [];
|
|
|
|
if (!title) { score -= 10; issues.push({ severity: "critical", category: "Meta", message: "Missing title tag" }); }
|
|
else if (titleLength > 60) { score -= 3; issues.push({ severity: "warning", category: "Meta", message: "Title too long (" + titleLength + " chars, max 60)" }); }
|
|
if (!metaDescription) { score -= 10; issues.push({ severity: "critical", category: "Meta", message: "Missing meta description" }); }
|
|
else if (descLength > 160) { score -= 3; issues.push({ severity: "warning", category: "Meta", message: "Meta description too long (" + descLength + " chars, max 160)" }); }
|
|
if (h1Count === 0) { score -= 10; issues.push({ severity: "critical", category: "Content", message: "Missing H1 heading" }); }
|
|
if (h1Count > 1) { score -= 5; issues.push({ severity: "critical", category: "Content", message: "Multiple H1 tags (" + h1Count + " found)" }); }
|
|
if (!viewport) { score -= 10; issues.push({ severity: "critical", category: "Mobile", message: "Missing viewport meta tag" }); }
|
|
if (!isHttps) { score -= 10; issues.push({ severity: "critical", category: "Security", message: "Not using HTTPS" }); }
|
|
if (imagesWithoutAlt.length > 0) { score -= 5; issues.push({ severity: "warning", category: "Accessibility", message: imagesWithoutAlt.length + " images missing alt text" }); }
|
|
if (!canonical) { score -= 3; issues.push({ severity: "warning", category: "Technical", message: "Missing canonical tag" }); }
|
|
if (hasCanonicalMismatch) { score -= 5; issues.push({ severity: "warning", category: "Technical", message: "Canonical URL mismatch" }); }
|
|
if (inlineStyleCount > 10) { score -= 3; issues.push({ severity: "warning", category: "Performance", message: inlineStyleCount + " inline styles detected" }); }
|
|
if (!hasPreconnect && externalScripts > 3) { score -= 3; issues.push({ severity: "warning", category: "Performance", message: "Missing preconnect hints for external resources" }); }
|
|
if (wordCount < 300 && wordCount > 0) { score -= 3; issues.push({ severity: "warning", category: "Content", message: "Thin content (" + wordCount + " words)" }); }
|
|
if (!ogTitle && !ogDesc) { score -= 3; issues.push({ severity: "warning", category: "Social", message: "Missing Open Graph tags" }); }
|
|
if (!twCard) { score -= 2; issues.push({ severity: "warning", category: "Social", message: "Missing Twitter Card tags" }); }
|
|
if (externalLinks.length === 0) { score -= 2; issues.push({ severity: "warning", category: "Links", message: "No external links found" }); }
|
|
if (robotsDirectives && /noindex/i.test(robotsDirectives)) { score -= 10; issues.push({ severity: "critical", category: "Technical", message: "Page has noindex directive" }); }
|
|
if (!hasJsonLd && !hasMicrodata) { score -= 1; issues.push({ severity: "info", category: "Structured Data", message: "No structured data found" }); }
|
|
if (!hasLangAttr) { score -= 1; issues.push({ severity: "info", category: "Accessibility", message: "Missing html lang attribute" }); }
|
|
if (!lazyLoadedImages.length && images.length > 5) { score -= 2; issues.push({ severity: "info", category: "Performance", message: "Consider lazy loading for images" }); }
|
|
|
|
score = Math.max(0, Math.min(100, score));
|
|
|
|
const technicalScore = Math.min(100, 100 - issues.filter(i => i.category === "Technical" || i.category === "Security").reduce((s, i) => s + (i.severity === "critical" ? 15 : i.severity === "warning" ? 7 : 2), 0));
|
|
const contentScore = Math.min(100, 100 - issues.filter(i => i.category === "Content").reduce((s, i) => s + (i.severity === "critical" ? 15 : i.severity === "warning" ? 7 : 2), 0));
|
|
const performanceScore = Math.min(100, 100 - issues.filter(i => i.category === "Performance" || i.category === "Mobile").reduce((s, i) => s + (i.severity === "critical" ? 15 : i.severity === "warning" ? 7 : 2), 0));
|
|
const socialScore = Math.min(100, 100 - issues.filter(i => i.category === "Social" || i.category === "Structured Data").reduce((s, i) => s + (i.severity === "critical" ? 15 : i.severity === "warning" ? 7 : 2), 0));
|
|
|
|
return NextResponse.json({
|
|
url: normalizedUrl,
|
|
domain: baseDomain,
|
|
protocol: isHttps ? "HTTPS" : "HTTP",
|
|
responseTime,
|
|
server,
|
|
htmlSize,
|
|
title, titleLength,
|
|
titleStatus: !title ? "missing" : titleLength > 60 ? "too_long" : "good",
|
|
metaDescription, descLength,
|
|
descStatus: !metaDescription ? "missing" : descLength > 160 ? "too_long" : "good",
|
|
metaKeywords, viewport, charset, robotsDirectives,
|
|
canonical, hasCanonicalMismatch, xFrameOptions,
|
|
openGraph: { title: ogTitle ? ogTitle[1] : null, description: ogDesc ? ogDesc[1] : null, image: ogImage ? ogImage[1] : null, type: ogType ? ogType[1] : null, url: ogUrl ? ogUrl[1] : null },
|
|
twitterCard: { card: twCard ? twCard[1] : null, title: twTitle ? twTitle[1] : null, description: twDesc ? twDesc[1] : null },
|
|
headings: headingHierarchy, h1Count, h2Count, h3Count, h4Count,
|
|
headingStatus: h1Count === 0 ? "missing_h1" : h1Count > 1 ? "multiple_h1" : "good",
|
|
links: { total: links.length, internal: internalLinks.length, external: externalLinks.length, nofollow: nofollowLinks.length, sampleExternal: externalLinks.slice(0, 20).map(l => ({ href: l.href, text: l.text, nofollow: l.nofollow })) },
|
|
images: { total: images.length, withAlt: imagesWithAlt.length, withoutAlt: imagesWithoutAlt.length, lazyLoaded: lazyLoadedImages.length, altCoverage: images.length > 0 ? Math.round((imagesWithAlt.length / images.length) * 100) : 100, sampleWithoutAlt: imagesWithoutAlt.slice(0, 10).map(img => img.src) },
|
|
content: { wordCount, sentenceCount, paragraphCount, avgWordsPerSentence, textPreview: plainText.substring(0, 2000) },
|
|
structuredData: { hasJsonLd, hasMicrodata, types: structuredData },
|
|
hreflang: hreflangTags,
|
|
performance: { inlineStyles: inlineStyleCount, inlineScripts: inlineScriptCount, externalScripts, externalStylesheets, hasPreconnect, hasPreload, hasDnsPrefetch, usesAsyncScripts, usesDeferScripts, contentEncoding },
|
|
accessibility: { hasLangAttr, hasAriaLabels, hasAltOnFirstImage: images.length > 0 && images[0].alt && images[0].alt.trim().length > 0 },
|
|
scores: { overall: score, technical: technicalScore, content: contentScore, performance: performanceScore, social: socialScore },
|
|
issues,
|
|
});
|
|
} catch (error) {
|
|
const msg = error instanceof Error ? error.message : "Fetch failed";
|
|
return NextResponse.json({ error: msg }, { status: 500 });
|
|
}
|
|
}
|