118 lines
4.7 KiB
TypeScript
118 lines
4.7 KiB
TypeScript
/**
|
|
* Next.js API route: Fetch URL content for SEO/web auditing.
|
|
* Endpoint: GET /api/fetch-url?url=https://example.com
|
|
* Returns: { title, meta, headings, text, links, status }
|
|
*/
|
|
|
|
import { NextRequest, NextResponse } from "next/server";
|
|
|
|
export async function GET(request: NextRequest) {
|
|
const targetUrl = request.nextUrl.searchParams.get("url");
|
|
|
|
if (!targetUrl) {
|
|
return NextResponse.json({ error: "URL parameter required" }, { status: 400 });
|
|
}
|
|
|
|
try {
|
|
new URL(targetUrl);
|
|
} catch {
|
|
return NextResponse.json({ error: "Invalid URL" }, { status: 400 });
|
|
}
|
|
|
|
try {
|
|
const controller = new AbortController();
|
|
const timeout = setTimeout(() => controller.abort(), 12000);
|
|
|
|
const res = await fetch(targetUrl, {
|
|
signal: controller.signal,
|
|
headers: {
|
|
"User-Agent": "PromptArch-SEO-Bot/1.5 (https://rommark.dev)",
|
|
Accept: "text/html,application/xhtml+xml,text/plain;q=0.9",
|
|
},
|
|
});
|
|
clearTimeout(timeout);
|
|
|
|
if (!res.ok) {
|
|
return NextResponse.json({ error: `HTTP ${res.status}`, status: res.status });
|
|
}
|
|
|
|
const html = await res.text();
|
|
|
|
const titleMatch = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
|
|
const title = titleMatch ? titleMatch[1].trim() : "";
|
|
|
|
const descMatch = html.match(/<meta[^>]*name\s*=\s*["']description["'][^>]*content\s*=\s*["']([\s\S]*?)["']/i)
|
|
|| html.match(/<meta[^>]*content\s*=\s*["']([\s\S]*?)["'][^>]*name\s*=\s*["']description["']/i);
|
|
const metaDescription = descMatch ? descMatch[1].trim() : "";
|
|
|
|
const kwMatch = html.match(/<meta[^>]*name\s*=\s*["']keywords["'][^>]*content\s*=\s*["']([\s\S]*?)["']/i)
|
|
|| html.match(/<meta[^>]*content\s*=\s*["']([\s\S]*?)["'][^>]*name\s*=\s*["']keywords["']/i);
|
|
const metaKeywords = kwMatch ? kwMatch[1].trim() : "";
|
|
|
|
const headings: { level: number; text: string }[] = [];
|
|
const headingRegex = /<h([1-6])[^>]*>([\s\S]*?)<\/h[1-6]>/gi;
|
|
let hMatch;
|
|
while ((hMatch = headingRegex.exec(html)) !== null) {
|
|
const text = hMatch[2].replace(/<[^>]*>/g, "").trim();
|
|
if (text) headings.push({ level: parseInt(hMatch[1]), text });
|
|
}
|
|
|
|
const links: { href: string; text: string; internal: boolean }[] = [];
|
|
const linkRegex = /<a[^>]*href\s*=\s*["']([^"']*)["'][^>]*>([\s\S]*?)<\/a>/gi;
|
|
let lMatch;
|
|
const baseDomain = new URL(targetUrl).hostname;
|
|
while ((lMatch = linkRegex.exec(html)) !== null) {
|
|
const href = lMatch[1].trim();
|
|
const text = lMatch[2].replace(/<[^>]*>/g, "").trim().substring(0, 100);
|
|
if (!href || href.startsWith("#") || href.startsWith("javascript:")) continue;
|
|
try {
|
|
const linkDomain = new URL(href, targetUrl).hostname;
|
|
links.push({ href, text, internal: linkDomain === baseDomain });
|
|
} catch { continue; }
|
|
}
|
|
|
|
const images: { src: string; alt: string }[] = [];
|
|
const imgRegex = /<img[^>]*src\s*=\s*["']([^"']*)["'][^>]*alt\s*=\s*["']([^"']*)["'][^>]*\/?>/gi;
|
|
let iMatch;
|
|
while ((iMatch = imgRegex.exec(html)) !== null) {
|
|
images.push({ src: iMatch[1], alt: iMatch[2] });
|
|
}
|
|
|
|
const plainText = html
|
|
.replace(/<script[\s\S]*?<\/script>/gi, "")
|
|
.replace(/<style[\s\S]*?<\/style>/gi, "")
|
|
.replace(/<[^>]*>/g, " ")
|
|
.replace(/\s+/g, " ")
|
|
.trim()
|
|
.substring(0, 5000);
|
|
|
|
const canonicalMatch = html.match(/<link[^>]*rel\s*=\s*["']canonical["'][^>]*href\s*=\s*["']([^"']*)["']/i);
|
|
const canonical = canonicalMatch ? canonicalMatch[1] : "";
|
|
|
|
const ogTitleMatch = html.match(/<meta[^>]*property\s*=\s*["']og:title["'][^>]*content\s*=\s*["']([\s\S]*?)["']/i);
|
|
const ogTitle = ogTitleMatch ? ogTitleMatch[1].trim() : "";
|
|
|
|
const ogDescMatch = html.match(/<meta[^>]*property\s*=\s*["']og:description["'][^>]*content\s*=\s*["']([\s\S]*?)["']/i);
|
|
const ogDescription = ogDescMatch ? ogDescMatch[1].trim() : "";
|
|
|
|
return NextResponse.json({
|
|
url: targetUrl,
|
|
title,
|
|
metaDescription,
|
|
metaKeywords,
|
|
canonical,
|
|
ogTitle,
|
|
ogDescription,
|
|
headings,
|
|
links: links.slice(0, 100),
|
|
images: images.slice(0, 50),
|
|
text: plainText,
|
|
htmlLength: html.length,
|
|
status: res.status,
|
|
});
|
|
} catch (error) {
|
|
const msg = error instanceof Error ? error.message : "Fetch failed";
|
|
return NextResponse.json({ error: msg }, { status: 500 });
|
|
}
|
|
}
|