2025-09-17 00:19:13 +05:30

84 lines
2.8 KiB
TypeScript

import { NextRequest, NextResponse } from "next/server";
// Lightweight XML counting with DOMParser in the Edge/Node runtime.
// If your project needs robust XML, switch to a library, but this is fine for sitemaps.
async function fetchText(url: string) {
const r = await fetch(url, { headers: { Accept: "application/xml, text/xml, */*" }, cache: "no-store" });
if (!r.ok) throw new Error(`Failed ${r.status}`);
return r.text();
}
function countFromXml(xml: string): { isIndex: boolean; count: number; locs: string[] } {
const doc = new DOMParser().parseFromString(xml, "application/xml");
const urlset = doc.getElementsByTagName("urlset");
const sitemapindex = doc.getElementsByTagName("sitemapindex");
if (urlset && urlset.length) {
return { isIndex: false, count: doc.getElementsByTagName("url").length, locs: [] };
}
if (sitemapindex && sitemapindex.length) {
const locs = Array.from(doc.getElementsByTagName("loc")).map((n) => n.textContent || "").filter(Boolean);
return { isIndex: true, count: 0, locs };
}
// fallback: count <loc>
return { isIndex: false, count: doc.getElementsByTagName("loc").length, locs: [] };
}
export async function GET(req: NextRequest) {
const u = req.nextUrl.searchParams.get("u");
if (!u) return NextResponse.json({ error: "Missing ?u" }, { status: 400 });
let target: URL;
try {
target = new URL(u);
if (!/^https?:$/.test(target.protocol)) throw new Error("bad protocol");
} catch {
return NextResponse.json({ error: "Invalid URL" }, { status: 400 });
}
const candidates: string[] = /\\/sitemap(.*)\\.xml$/i.test(target.pathname)
? [target.toString()]
: [
new URL("/sitemap.xml", target.origin).toString(),
new URL("/sitemap_index.xml", target.origin).toString(),
new URL("/sitemap-index.xml", target.origin).toString(),
];
let used: string | null = null;
let count = 0;
for (const c of candidates) {
try {
const xml = await fetchText(c);
const { isIndex, count: directCount, locs } = countFromXml(xml);
used = c;
if (!isIndex) {
count = directCount;
} else {
// follow up to 25 child sitemaps to keep it fast/safe
const subset = locs.slice(0, 25);
let total = 0;
for (const loc of subset) {
try {
const childXml = await fetchText(loc);
const child = countFromXml(childXml);
total += child.isIndex ? child.count : child.count;
} catch {
/* skip */
}
}
count = total;
}
break;
} catch {
// try next candidate
}
}
if (!used) return NextResponse.json({ error: "Sitemap not reachable" }, { status: 404 });
const res = NextResponse.json({ count, sitemapUsed: used });
res.headers.set("Access-Control-Allow-Origin", "*"); // dev-friendly; tighten in prod
return res;
}