/** * Next.js API route: Fetch URL content for SEO/web auditing. * Endpoint: GET /api/fetch-url?url=https://example.com * Returns: { title, meta, headings, text, links, status } */ import { NextRequest, NextResponse } from "next/server"; export async function GET(request: NextRequest) { const targetUrl = request.nextUrl.searchParams.get("url"); if (!targetUrl) { return NextResponse.json({ error: "URL parameter required" }, { status: 400 }); } try { new URL(targetUrl); } catch { return NextResponse.json({ error: "Invalid URL" }, { status: 400 }); } try { const controller = new AbortController(); const timeout = setTimeout(() => controller.abort(), 12000); const res = await fetch(targetUrl, { signal: controller.signal, headers: { "User-Agent": "PromptArch-SEO-Bot/1.5 (https://rommark.dev)", Accept: "text/html,application/xhtml+xml,text/plain;q=0.9", }, }); clearTimeout(timeout); if (!res.ok) { return NextResponse.json({ error: `HTTP ${res.status}`, status: res.status }); } const html = await res.text(); const titleMatch = html.match(/]*>([\s\S]*?)<\/title>/i); const title = titleMatch ? titleMatch[1].trim() : ""; const descMatch = html.match(/]*name\s*=\s*["']description["'][^>]*content\s*=\s*["']([\s\S]*?)["']/i) || html.match(/]*content\s*=\s*["']([\s\S]*?)["'][^>]*name\s*=\s*["']description["']/i); const metaDescription = descMatch ? descMatch[1].trim() : ""; const kwMatch = html.match(/]*name\s*=\s*["']keywords["'][^>]*content\s*=\s*["']([\s\S]*?)["']/i) || html.match(/]*content\s*=\s*["']([\s\S]*?)["'][^>]*name\s*=\s*["']keywords["']/i); const metaKeywords = kwMatch ? kwMatch[1].trim() : ""; const headings: { level: number; text: string }[] = []; const headingRegex = /]*>([\s\S]*?)<\/h[1-6]>/gi; let hMatch; while ((hMatch = headingRegex.exec(html)) !== null) { const text = hMatch[2].replace(/<[^>]*>/g, "").trim(); if (text) headings.push({ level: parseInt(hMatch[1]), text }); } const links: { href: string; text: string; internal: boolean }[] = []; const linkRegex = /]*href\s*=\s*["']([^"']*)["'][^>]*>([\s\S]*?)<\/a>/gi; let lMatch; const baseDomain = new URL(targetUrl).hostname; while ((lMatch = linkRegex.exec(html)) !== null) { const href = lMatch[1].trim(); const text = lMatch[2].replace(/<[^>]*>/g, "").trim().substring(0, 100); if (!href || href.startsWith("#") || href.startsWith("javascript:")) continue; try { const linkDomain = new URL(href, targetUrl).hostname; links.push({ href, text, internal: linkDomain === baseDomain }); } catch { continue; } } const images: { src: string; alt: string }[] = []; const imgRegex = /]*src\s*=\s*["']([^"']*)["'][^>]*alt\s*=\s*["']([^"']*)["'][^>]*\/?>/gi; let iMatch; while ((iMatch = imgRegex.exec(html)) !== null) { images.push({ src: iMatch[1], alt: iMatch[2] }); } const plainText = html .replace(//gi, "") .replace(//gi, "") .replace(/<[^>]*>/g, " ") .replace(/\s+/g, " ") .trim() .substring(0, 5000); const canonicalMatch = html.match(/]*rel\s*=\s*["']canonical["'][^>]*href\s*=\s*["']([^"']*)["']/i); const canonical = canonicalMatch ? canonicalMatch[1] : ""; const ogTitleMatch = html.match(/]*property\s*=\s*["']og:title["'][^>]*content\s*=\s*["']([\s\S]*?)["']/i); const ogTitle = ogTitleMatch ? ogTitleMatch[1].trim() : ""; const ogDescMatch = html.match(/]*property\s*=\s*["']og:description["'][^>]*content\s*=\s*["']([\s\S]*?)["']/i); const ogDescription = ogDescMatch ? ogDescMatch[1].trim() : ""; return NextResponse.json({ url: targetUrl, title, metaDescription, metaKeywords, canonical, ogTitle, ogDescription, headings, links: links.slice(0, 100), images: images.slice(0, 50), text: plainText, htmlLength: html.length, status: res.status, }); } catch (error) { const msg = error instanceof Error ? error.message : "Fetch failed"; return NextResponse.json({ error: msg }, { status: 500 }); } }