Files
PromptArch/app/api/fetch-url/route.ts

118 lines
4.7 KiB
TypeScript

/**
* Next.js API route: Fetch URL content for SEO/web auditing.
* Endpoint: GET /api/fetch-url?url=https://example.com
* Returns: { title, meta, headings, text, links, status }
*/
import { NextRequest, NextResponse } from "next/server";
export async function GET(request: NextRequest) {
const targetUrl = request.nextUrl.searchParams.get("url");
if (!targetUrl) {
return NextResponse.json({ error: "URL parameter required" }, { status: 400 });
}
try {
new URL(targetUrl);
} catch {
return NextResponse.json({ error: "Invalid URL" }, { status: 400 });
}
try {
const controller = new AbortController();
const timeout = setTimeout(() => controller.abort(), 12000);
const res = await fetch(targetUrl, {
signal: controller.signal,
headers: {
"User-Agent": "PromptArch-SEO-Bot/1.5 (https://rommark.dev)",
Accept: "text/html,application/xhtml+xml,text/plain;q=0.9",
},
});
clearTimeout(timeout);
if (!res.ok) {
return NextResponse.json({ error: `HTTP ${res.status}`, status: res.status });
}
const html = await res.text();
const titleMatch = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
const title = titleMatch ? titleMatch[1].trim() : "";
const descMatch = html.match(/<meta[^>]*name\s*=\s*["']description["'][^>]*content\s*=\s*["']([\s\S]*?)["']/i)
|| html.match(/<meta[^>]*content\s*=\s*["']([\s\S]*?)["'][^>]*name\s*=\s*["']description["']/i);
const metaDescription = descMatch ? descMatch[1].trim() : "";
const kwMatch = html.match(/<meta[^>]*name\s*=\s*["']keywords["'][^>]*content\s*=\s*["']([\s\S]*?)["']/i)
|| html.match(/<meta[^>]*content\s*=\s*["']([\s\S]*?)["'][^>]*name\s*=\s*["']keywords["']/i);
const metaKeywords = kwMatch ? kwMatch[1].trim() : "";
const headings: { level: number; text: string }[] = [];
const headingRegex = /<h([1-6])[^>]*>([\s\S]*?)<\/h[1-6]>/gi;
let hMatch;
while ((hMatch = headingRegex.exec(html)) !== null) {
const text = hMatch[2].replace(/<[^>]*>/g, "").trim();
if (text) headings.push({ level: parseInt(hMatch[1]), text });
}
const links: { href: string; text: string; internal: boolean }[] = [];
const linkRegex = /<a[^>]*href\s*=\s*["']([^"']*)["'][^>]*>([\s\S]*?)<\/a>/gi;
let lMatch;
const baseDomain = new URL(targetUrl).hostname;
while ((lMatch = linkRegex.exec(html)) !== null) {
const href = lMatch[1].trim();
const text = lMatch[2].replace(/<[^>]*>/g, "").trim().substring(0, 100);
if (!href || href.startsWith("#") || href.startsWith("javascript:")) continue;
try {
const linkDomain = new URL(href, targetUrl).hostname;
links.push({ href, text, internal: linkDomain === baseDomain });
} catch { continue; }
}
const images: { src: string; alt: string }[] = [];
const imgRegex = /<img[^>]*src\s*=\s*["']([^"']*)["'][^>]*alt\s*=\s*["']([^"']*)["'][^>]*\/?>/gi;
let iMatch;
while ((iMatch = imgRegex.exec(html)) !== null) {
images.push({ src: iMatch[1], alt: iMatch[2] });
}
const plainText = html
.replace(/<script[\s\S]*?<\/script>/gi, "")
.replace(/<style[\s\S]*?<\/style>/gi, "")
.replace(/<[^>]*>/g, " ")
.replace(/\s+/g, " ")
.trim()
.substring(0, 5000);
const canonicalMatch = html.match(/<link[^>]*rel\s*=\s*["']canonical["'][^>]*href\s*=\s*["']([^"']*)["']/i);
const canonical = canonicalMatch ? canonicalMatch[1] : "";
const ogTitleMatch = html.match(/<meta[^>]*property\s*=\s*["']og:title["'][^>]*content\s*=\s*["']([\s\S]*?)["']/i);
const ogTitle = ogTitleMatch ? ogTitleMatch[1].trim() : "";
const ogDescMatch = html.match(/<meta[^>]*property\s*=\s*["']og:description["'][^>]*content\s*=\s*["']([\s\S]*?)["']/i);
const ogDescription = ogDescMatch ? ogDescMatch[1].trim() : "";
return NextResponse.json({
url: targetUrl,
title,
metaDescription,
metaKeywords,
canonical,
ogTitle,
ogDescription,
headings,
links: links.slice(0, 100),
images: images.slice(0, 50),
text: plainText,
htmlLength: html.length,
status: res.status,
});
} catch (error) {
const msg = error instanceof Error ? error.message : "Fetch failed";
return NextResponse.json({ error: msg }, { status: 500 });
}
}