'use client'; import { useMemo, useState } from "react"; // Path: app/(defaults)/crawl/page.tsx (App Router) // If using Pages Router, place at pages/crawl.tsx // TailwindCSS assumed. export default function CrawlPage() { const [siteUrl, setSiteUrl] = useState(""); const [maxUrls, setMaxUrls] = useState(""); const [autoMaxLoading, setAutoMaxLoading] = useState(false); const [crawlLoading, setCrawlLoading] = useState(false); const [error, setError] = useState(null); const [report, setReport] = useState(null); const apiBase = "https://app.crawlerx.co/crawl"; const isValidUrl = useMemo(() => { try { if (!siteUrl) return false; const normalized = siteUrl.match(/^https?:\/\//i) ? siteUrl : `https://${siteUrl}`; const u = new URL(normalized); return !!u.hostname; } catch { return false; } }, [siteUrl]); const normalizedUrl = useMemo(() => { if (!siteUrl) return ""; return siteUrl.match(/^https?:\/\//i) ? siteUrl : `https://${siteUrl}`; }, [siteUrl]); async function autoDetectMaxFromSitemap() { setError(null); setAutoMaxLoading(true); try { if (!isValidUrl) throw new Error("Enter a valid website URL first."); // Server-side proxy avoids CORS const res = await fetch(`/api/sitemap?u=${encodeURIComponent(normalizedUrl)}`); if (!res.ok) throw new Error(`Sitemap probe failed (${res.status})`); const json = await res.json(); if (typeof json.count !== "number" || json.count < 1) throw new Error("Sitemap found but contains no URLs."); setMaxUrls(json.count); } catch (e: any) { setError(e?.message || "Failed to detect Max from sitemap."); } finally { setAutoMaxLoading(false); } } async function handleCrawl() { setError(null); setCrawlLoading(true); setReport(null); try { if (!isValidUrl) throw new Error("Please enter a valid website URL (with or without https://)."); const max = typeof maxUrls === "number" && maxUrls > 0 ? maxUrls : 50; const apiUrl = `${apiBase}?url=${encodeURIComponent(normalizedUrl)}&max=${max}`; const res = await fetch(apiUrl); if (!res.ok) throw new Error(`Crawler API error: ${res.status} ${res.statusText}`); const data = await res.json(); setReport(data); } catch (e: any) { setError(e?.message || "Failed to crawl the site."); } finally { setCrawlLoading(false); } } function downloadJson() { if (!report) return; const blob = new Blob([JSON.stringify(report, null, 2)], { type: "application/json" }); const url = URL.createObjectURL(blob); const a = document.createElement("a"); a.href = url; const host = (() => { try { return new URL(normalizedUrl).hostname; } catch { return "report"; } })(); a.download = `crawlerx-report-${host}.json`; document.body.appendChild(a); a.click(); a.remove(); URL.revokeObjectURL(url); } const { rows, columns } = useMemo(() => { if (!report) return { rows: [] as any[], columns: [] as string[] }; const data = Array.isArray(report) ? report : Array.isArray(report?.results) ? report.results : null; if (!data || !Array.isArray(data) || data.length === 0) return { rows: [], columns: [] }; const preferred = ["url", "status", "title", "description", "h1", "issues", "links", "loadTime" ]; const colset = new Set(); data.slice(0, 25).forEach((r: any) => Object.keys(r || {}).forEach((k) => colset.add(k))); const cols = preferred.filter((k) => colset.has(k)).concat(Array.from(colset).filter((k) => !preferred.includes(k)).slice(0, 6)); return { rows: data, columns: cols }; }, [report]); return (

CrawlerX — Crawl & Report

Enter a website, auto-detect the sitemap size for Max, then run a crawl via the CrawlerX API and download the JSON report.

setSiteUrl(e.target.value)} placeholder="https://example.com" className="w-full rounded-xl border border-gray-300 px-3 py-2 focus:outline-none focus:ring-4 focus:ring-blue-100 focus:border-blue-500" />
setMaxUrls(e.target.value ? Number(e.target.value) : "")} placeholder="e.g. 50" className="w-40 rounded-xl border border-gray-300 px-3 py-2 focus:outline-none focus:ring-4 focus:ring-blue-100 focus:border-blue-500" />
{error && (
{error}
)} {report && (

Crawler Report

{summaryChips(report).map((c) => ( {c.value} {c.label} ))}
{rows.length > 0 ? (
{columns.map((c) => ( ))} {rows.map((r: any, idx: number) => ( {columns.map((c) => ( ))} ))}
{c}
{renderCell(r[c])}
) : (
                {JSON.stringify(report, null, 2)}
              
)}
)}

Tip: If sitemap auto‑detection fails due to server restrictions, enter Max manually or use the /api/sitemap proxy.

); } function renderCell(value: any) { if (value == null) return ; if (typeof value === "string") { if (/^https?:\/\//i.test(value)) { return ( {value} ); } return {truncate(value, 220)}; } if (typeof value === "number" || typeof value === "boolean") return {String(value)}; if (Array.isArray(value)) return [{value.length} items]; return (
object
{JSON.stringify(value, null, 2)}
); } function truncate(s: string, n: number) { return s.length > n ? s.slice(0, n - 1) + "…" : s; } function summaryChips(report: any): { label: string; value: string | number }[] { const chips: { label: string; value: string | number }[] = []; const arr = Array.isArray(report) ? report : Array.isArray(report?.results) ? report.results : null; if (arr) chips.push({ label: "Pages crawled", value: arr.length }); const totals = report?.totals || report?.summary || {}; for (const [k, v] of Object.entries(totals)) { if (typeof v === "number") chips.push({ label: k, value: v }); } return chips.slice(0, 8); }