'use client'; import { useMemo, useRef, useState, useEffect } from "react"; // Path: app/(defaults)/crawl/page.tsx (App Router) // TailwindCSS assumed. type Row = Record; function csvEscape(v: unknown) { if (v === undefined || v === null) return ""; const s = String(v); return /[",\n]/.test(s) ? `"${s.replace(/"/g, '""')}"` : s; } function toCSV(rows: Row[], headers: string[]) { const lines = [headers.join(",")]; for (const r of rows) lines.push(headers.map(h => csvEscape(r[h])).join(",")); return lines.join("\n"); } function download(filename: string, mime: string, text: string) { const blob = new Blob([text], { type: mime }); const url = URL.createObjectURL(blob); const a = document.createElement("a"); a.href = url; a.download = filename; a.click(); URL.revokeObjectURL(url); } function getUnionKeys(rows: Row[]): string[] { const set = new Set(); for (const r of rows) Object.keys(r || {}).forEach(k => set.add(k)); // Prefer a useful order if present: const preferred = [ "url","status","status_text","time_ms","bytes","content_type","http_version", "title","title_length","title_pixel_width", "meta_description","meta_description_length","meta_description_pixel_width", "h1_1","h1_1_length","h1_1_pixel_width","h1_2","h1_2_length","h1_2_pixel_width", "h2_1","h2_2", "canonical","robots_meta","x_robots_tag","noindex","nofollow", "lang","word_count","flesch_reading_ease","flesch_kincaid_grade", "gunning_fog","coleman_liau","ari","smog", "schema_types","inlinks","outlinks","render_mode", "last_modified","set_cookie","crawl_timestamp", "duplicate_title_exact","nearest_title_similarity","nearest_title_url", "duplicate_description_exact","nearest_description_similarity","nearest_description_url" ]; const others = [...set].filter(k => !preferred.includes(k)).sort(); return [...preferred.filter(k => set.has(k)), ...others]; } function coerce(value: any): string { if (Array.isArray(value)) return value.join(" | "); if (typeof value === "object" && value !== null) return JSON.stringify(value); return String(value ?? ""); } function truncate(s: string, n: number) { return s.length > n ? s.slice(0, n - 1) + "…" : s; } function renderCell(value: any) { if (value == null) return ; if (typeof value === "string") { if (/^https?:\/\//i.test(value)) { return {value}; } return {truncate(value, 220)}; } if (typeof value === "number" || typeof value === "boolean") return {String(value)}; if (Array.isArray(value)) return [{value.length} items]; return (
object
{JSON.stringify(value, null, 2)}
); } function summaryChips(report: any): { label: string; value: string | number }[] { const chips: { label: string; value: string | number }[] = []; const arr = Array.isArray(report) ? report : Array.isArray(report?.results) ? report.results : null; if (arr) chips.push({ label: "Pages crawled", value: arr.length }); const totals = report?.totals || report?.summary || {}; for (const [k, v] of Object.entries(totals)) { if (typeof v === "number") chips.push({ label: k, value: v }); } return chips.slice(0, 8); } export default function CrawlPage() { const [siteUrl, setSiteUrl] = useState(""); const [maxUrls, setMaxUrls] = useState(""); const [autoMaxLoading, setAutoMaxLoading] = useState(false); const [crawlLoading, setCrawlLoading] = useState(false); const [error, setError] = useState(null); const [report, setReport] = useState(null); const [query, setQuery] = useState(""); // ✅ NEW: quick search const [visible, setVisible] = useState>({}); // ✅ NEW: column toggles const [sortBy, setSortBy] = useState("url"); // ✅ NEW: sorting const [sortDir, setSortDir] = useState<"asc"|"desc">("asc"); const apiBase = "https://app.crawlerx.co/crawl"; const isValidUrl = useMemo(() => { try { if (!siteUrl) return false; const normalized = siteUrl.match(/^https?:\/\//i) ? siteUrl : `https://${siteUrl}`; const u = new URL(normalized); return !!u.hostname; } catch { return false; } }, [siteUrl]); const normalizedUrl = useMemo(() => { if (!siteUrl) return ""; return siteUrl.match(/^https?:\/\//i) ? siteUrl : `https://${siteUrl}`; }, [siteUrl]); async function autoDetectMaxFromSitemap() { setError(null); setAutoMaxLoading(true); try { if (!isValidUrl) throw new Error("Enter a valid website URL first."); const res = await fetch(`/api/sitemap?u=${encodeURIComponent(normalizedUrl)}`); if (!res.ok) throw new Error(`Sitemap probe failed (${res.status})`); const json = await res.json(); if (typeof json.count !== "number" || json.count < 1) throw new Error("Sitemap found but contains no URLs."); setMaxUrls(json.count); } catch (e: any) { setError(e?.message || "Failed to detect Max from sitemap."); } finally { setAutoMaxLoading(false); } } async function handleCrawl() { setError(null); setCrawlLoading(true); setReport(null); try { if (!isValidUrl) throw new Error("Please enter a valid website URL (with or without https://)."); const max = typeof maxUrls === "number" && maxUrls > 0 ? maxUrls : 50; const apiUrl = `${apiBase}?url=${encodeURIComponent(normalizedUrl)}&max=${max}`; const res = await fetch(apiUrl); if (!res.ok) throw new Error(`Crawler API error: ${res.status} ${res.statusText}`); const data = await res.json(); setReport(data); } catch (e: any) { setError(e?.message || "Failed to crawl the site."); } finally { setCrawlLoading(false); } } function downloadJson() { if (!report) return; const blob = new Blob([JSON.stringify(report, null, 2)], { type: "application/json" }); const url = URL.createObjectURL(blob); const a = document.createElement("a"); a.href = url; const host = (() => { try { return new URL(normalizedUrl).hostname; } catch { return "report"; }})(); a.download = `crawlerx-report-${host}.json`; document.body.appendChild(a); a.click(); a.remove(); URL.revokeObjectURL(url); } // ✅ Build table rows from report (supports either array or { results: [] }) const rawRows: Row[] = useMemo(() => { if (!report) return []; if (Array.isArray(report)) return report as Row[]; if (Array.isArray(report?.results)) return report.results as Row[]; return []; }, [report]); // ✅ Determine columns = union of keys across rows const columns = useMemo(() => getUnionKeys(rawRows), [rawRows]); // ✅ Initialize column visibility when columns change (default all visible) useEffect(() => { if (!columns.length) return; setVisible(prev => { const next: Record = { ...prev }; let changed = false; for (const c of columns) if (next[c] === undefined) { next[c] = true; changed = true; } return changed ? next : prev; }); // eslint-disable-next-line react-hooks/exhaustive-deps }, [columns.join("|")]); // ✅ Search (filters visible columns) const filtered = useMemo(() => { if (!query.trim()) return rawRows; const q = query.toLowerCase(); return rawRows.filter(r => { for (const h of columns) { if (!visible[h]) continue; const v = coerce(r[h]).toLowerCase(); if (v.includes(q)) return true; } return false; }); }, [rawRows, query, columns, visible]); // ✅ Sorting const sorted = useMemo(() => { const copy = [...filtered]; copy.sort((a, b) => { const va = a[sortBy], vb = b[sortBy]; if (va == null && vb == null) return 0; if (va == null) return sortDir === "asc" ? -1 : 1; if (vb == null) return sortDir === "asc" ? 1 : -1; const sa = typeof va === "number" ? va : String(va); const sb = typeof vb === "number" ? vb : String(vb); if (sa < sb) return sortDir === "asc" ? -1 : 1; if (sa > sb) return sortDir === "asc" ? 1 : -1; return 0; }); return copy; }, [filtered, sortBy, sortDir]); function onHeaderClick(h: string) { if (sortBy === h) setSortDir(d => (d === "asc" ? "desc" : "asc")); else { setSortBy(h); setSortDir("asc"); } } function exportCSV() { if (!sorted.length) return; const activeHeaders = columns.filter(h => visible[h]); const csv = toCSV(sorted, activeHeaders); const host = (() => { try { return new URL(normalizedUrl).hostname; } catch { return "report"; }})(); download(`crawlerx-report-${host}.csv`, "text/csv;charset=utf-8", csv); } function exportJSON() { if (!sorted.length) return; const host = (() => { try { return new URL(normalizedUrl).hostname; } catch { return "report"; }})(); download(`crawlerx-report-${host}.json`, "application/json", JSON.stringify(sorted, null, 2)); } return (

CrawlerX — Crawl & Report

Enter a website, auto-detect the sitemap size for Max, then run a crawl via the CrawlerX API and download the JSON report.

setSiteUrl(e.target.value)} placeholder="https://example.com" className="w-full rounded-xl border border-gray-300 px-3 py-2 focus:outline-none focus:ring-4 focus:ring-blue-100 focus:border-blue-500" />
setMaxUrls(e.target.value ? Number(e.target.value) : "")} placeholder="e.g. 50" className="w-40 rounded-xl border border-gray-300 px-3 py-2 focus:outline-none focus:ring-4 focus:ring-blue-100 focus:border-blue-500" />
{error && (
{error}
)} {report && (

Crawler Report

setQuery(e.target.value)} placeholder="Search (filters visible columns)" className="border rounded-xl px-3 py-2 text-sm" />
{summaryChips(report).map((c) => ( {c.value} {c.label} ))}
{/* Column toggles */}
Columns
{columns.map((h) => ( ))}
{/* Table */} {sorted.length > 0 ? (
{columns.filter(c => visible[c]).map((c) => ( ))} {sorted.map((r: any, idx: number) => ( {columns.filter(c => visible[c]).map((c) => ( ))} ))}
onHeaderClick(c)} title="Click to sort" >
{c} {sortBy === c && {sortDir === "asc" ? "▲" : "▼"}}
{renderCell(r[c])}
) : (
                {JSON.stringify(report, null, 2)}
              
)}
)}

Tip: If sitemap auto-detection fails due to server restrictions, enter Max manually or use the /api/sitemap proxy.

); }