PromptArch/app/api/fetch-url/route.ts

/**
 * Next.js API route: Fetch URL content for SEO/web auditing.
 * Endpoint: GET /api/fetch-url?url=https://example.com
 * Returns: { title, meta, headings, text, links, status }
 */

import { NextRequest, NextResponse } from "next/server";

export async function GET(request: NextRequest) {
    const targetUrl = request.nextUrl.searchParams.get("url");

    if (!targetUrl) {
        return NextResponse.json({ error: "URL parameter required" }, { status: 400 });
    }

    try {
        new URL(targetUrl);
    } catch {
        return NextResponse.json({ error: "Invalid URL" }, { status: 400 });
    }

    try {
        const controller = new AbortController();
        const timeout = setTimeout(() => controller.abort(), 12000);

        const res = await fetch(targetUrl, {
            signal: controller.signal,
            headers: {
                "User-Agent": "PromptArch-SEO-Bot/1.5 (https://rommark.dev)",
                Accept: "text/html,application/xhtml+xml,text/plain;q=0.9",
            },
        });
        clearTimeout(timeout);

        if (!res.ok) {
            return NextResponse.json({ error: `HTTP ${res.status}`, status: res.status });
        }

        const html = await res.text();

        const titleMatch = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
        const title = titleMatch ? titleMatch[1].trim() : "";

        const descMatch = html.match(/<meta[^>]*name\s*=\s*["']description["'][^>]*content\s*=\s*["']([\s\S]*?)["']/i)
            || html.match(/<meta[^>]*content\s*=\s*["']([\s\S]*?)["'][^>]*name\s*=\s*["']description["']/i);
        const metaDescription = descMatch ? descMatch[1].trim() : "";

        const kwMatch = html.match(/<meta[^>]*name\s*=\s*["']keywords["'][^>]*content\s*=\s*["']([\s\S]*?)["']/i)
            || html.match(/<meta[^>]*content\s*=\s*["']([\s\S]*?)["'][^>]*name\s*=\s*["']keywords["']/i);
        const metaKeywords = kwMatch ? kwMatch[1].trim() : "";

        const headings: { level: number; text: string }[] = [];
        const headingRegex = /<h([1-6])[^>]*>([\s\S]*?)<\/h[1-6]>/gi;
        let hMatch;
        while ((hMatch = headingRegex.exec(html)) !== null) {
            const text = hMatch[2].replace(/<[^>]*>/g, "").trim();
            if (text) headings.push({ level: parseInt(hMatch[1]), text });
        }

        const links: { href: string; text: string; internal: boolean }[] = [];
        const linkRegex = /<a[^>]*href\s*=\s*["']([^"']*)["'][^>]*>([\s\S]*?)<\/a>/gi;
        let lMatch;
        const baseDomain = new URL(targetUrl).hostname;
        while ((lMatch = linkRegex.exec(html)) !== null) {
            const href = lMatch[1].trim();
            const text = lMatch[2].replace(/<[^>]*>/g, "").trim().substring(0, 100);
            if (!href || href.startsWith("#") || href.startsWith("javascript:")) continue;
            try {
                const linkDomain = new URL(href, targetUrl).hostname;
                links.push({ href, text, internal: linkDomain === baseDomain });
            } catch { continue; }
        }

        const images: { src: string; alt: string }[] = [];
        const imgRegex = /<img[^>]*src\s*=\s*["']([^"']*)["'][^>]*alt\s*=\s*["']([^"']*)["'][^>]*\/?>/gi;
        let iMatch;
        while ((iMatch = imgRegex.exec(html)) !== null) {
            images.push({ src: iMatch[1], alt: iMatch[2] });
        }

        const plainText = html
            .replace(/<script[\s\S]*?<\/script>/gi, "")
            .replace(/<style[\s\S]*?<\/style>/gi, "")
            .replace(/<[^>]*>/g, " ")
            .replace(/\s+/g, " ")
            .trim()
            .substring(0, 5000);

        const canonicalMatch = html.match(/<link[^>]*rel\s*=\s*["']canonical["'][^>]*href\s*=\s*["']([^"']*)["']/i);
        const canonical = canonicalMatch ? canonicalMatch[1] : "";

        const ogTitleMatch = html.match(/<meta[^>]*property\s*=\s*["']og:title["'][^>]*content\s*=\s*["']([\s\S]*?)["']/i);
        const ogTitle = ogTitleMatch ? ogTitleMatch[1].trim() : "";

        const ogDescMatch = html.match(/<meta[^>]*property\s*=\s*["']og:description["'][^>]*content\s*=\s*["']([\s\S]*?)["']/i);
        const ogDescription = ogDescMatch ? ogDescMatch[1].trim() : "";

        return NextResponse.json({
            url: targetUrl,
            title,
            metaDescription,
            metaKeywords,
            canonical,
            ogTitle,
            ogDescription,
            headings,
            links: links.slice(0, 100),
            images: images.slice(0, 50),
            text: plainText,
            htmlLength: html.length,
            status: res.status,
        });
    } catch (error) {
        const msg = error instanceof Error ? error.message : "Fetch failed";
        return NextResponse.json({ error: msg }, { status: 500 });
    }
}