84 lines
2.8 KiB
TypeScript
84 lines
2.8 KiB
TypeScript
import { NextRequest, NextResponse } from "next/server";
|
|
|
|
// Lightweight XML counting with DOMParser in the Edge/Node runtime.
|
|
// If your project needs robust XML, switch to a library, but this is fine for sitemaps.
|
|
async function fetchText(url: string) {
|
|
const r = await fetch(url, { headers: { Accept: "application/xml, text/xml, */*" }, cache: "no-store" });
|
|
if (!r.ok) throw new Error(`Failed ${r.status}`);
|
|
return r.text();
|
|
}
|
|
|
|
function countFromXml(xml: string): { isIndex: boolean; count: number; locs: string[] } {
|
|
const doc = new DOMParser().parseFromString(xml, "application/xml");
|
|
const urlset = doc.getElementsByTagName("urlset");
|
|
const sitemapindex = doc.getElementsByTagName("sitemapindex");
|
|
|
|
if (urlset && urlset.length) {
|
|
return { isIndex: false, count: doc.getElementsByTagName("url").length, locs: [] };
|
|
}
|
|
if (sitemapindex && sitemapindex.length) {
|
|
const locs = Array.from(doc.getElementsByTagName("loc")).map((n) => n.textContent || "").filter(Boolean);
|
|
return { isIndex: true, count: 0, locs };
|
|
}
|
|
// fallback: count <loc>
|
|
return { isIndex: false, count: doc.getElementsByTagName("loc").length, locs: [] };
|
|
}
|
|
|
|
export async function GET(req: NextRequest) {
|
|
const u = req.nextUrl.searchParams.get("u");
|
|
if (!u) return NextResponse.json({ error: "Missing ?u" }, { status: 400 });
|
|
|
|
let target: URL;
|
|
try {
|
|
target = new URL(u);
|
|
if (!/^https?:$/.test(target.protocol)) throw new Error("bad protocol");
|
|
} catch {
|
|
return NextResponse.json({ error: "Invalid URL" }, { status: 400 });
|
|
}
|
|
|
|
const candidates: string[] = /\\/sitemap(.*)\\.xml$/i.test(target.pathname)
|
|
? [target.toString()]
|
|
: [
|
|
new URL("/sitemap.xml", target.origin).toString(),
|
|
new URL("/sitemap_index.xml", target.origin).toString(),
|
|
new URL("/sitemap-index.xml", target.origin).toString(),
|
|
];
|
|
|
|
let used: string | null = null;
|
|
let count = 0;
|
|
|
|
for (const c of candidates) {
|
|
try {
|
|
const xml = await fetchText(c);
|
|
const { isIndex, count: directCount, locs } = countFromXml(xml);
|
|
used = c;
|
|
if (!isIndex) {
|
|
count = directCount;
|
|
} else {
|
|
// follow up to 25 child sitemaps to keep it fast/safe
|
|
const subset = locs.slice(0, 25);
|
|
let total = 0;
|
|
for (const loc of subset) {
|
|
try {
|
|
const childXml = await fetchText(loc);
|
|
const child = countFromXml(childXml);
|
|
total += child.isIndex ? child.count : child.count;
|
|
} catch {
|
|
/* skip */
|
|
}
|
|
}
|
|
count = total;
|
|
}
|
|
break;
|
|
} catch {
|
|
// try next candidate
|
|
}
|
|
}
|
|
|
|
if (!used) return NextResponse.json({ error: "Sitemap not reachable" }, { status: 404 });
|
|
|
|
const res = NextResponse.json({ count, sitemapUsed: used });
|
|
res.headers.set("Access-Control-Allow-Origin", "*"); // dev-friendly; tighten in prod
|
|
return res;
|
|
}
|