// app/(defaults)/crawl/page.tsx 'use client'; import React, { useEffect, useMemo, useState } from 'react'; /** * Screaming-Frog style UI (tabs + summary + big table + details panel) * - Left sidebar: views (Internal, External, Response Codes, Page Titles, Meta Description, H1, H2, Links, Issues, Performance, Render) * - Top toolbar: search, export (JSON/CSV), column visibility quick toggles * - Main table: sticky header, virtualish rendering by slice, clickable row selects into Details panel * - Details panel (bottom): key/value for the selected URL * * Assumptions: * - API GET https://api.crawlerx.co/crawl?url=...&max=... returns { ok, results: Row[], ... } * - Row shape is the one produced by crawler.js in this project */ export default function CrawlPage() { const [siteUrl, setSiteUrl] = useState(''); const [maxUrls, setMaxUrls] = useState(''); const [autoMaxLoading, setAutoMaxLoading] = useState(false); const [crawlLoading, setCrawlLoading] = useState(false); const [error, setError] = useState(null); const [report, setReport] = useState(null); const [query, setQuery] = useState(''); const [view, setView] = useState('Internal'); const [visibleCols, setVisibleCols] = useState([]); const [selectedIndex, setSelectedIndex] = useState(null); const apiBase = 'https://api.crawlerx.co/crawl'; /* ---------------- URL helpers ---------------- */ const isValidUrl = useMemo(() => { try { if (!siteUrl) return false; const normalized = siteUrl.match(/^https?:\/\//i) ? siteUrl : `https://${siteUrl}`; const u = new URL(normalized); return !!u.hostname; } catch { return false; } }, [siteUrl]); const normalizedUrl = useMemo(() => { if (!siteUrl) return ''; return siteUrl.match(/^https?:\/\//i) ? siteUrl : `https://${siteUrl}`; }, [siteUrl]); const startHost = useMemo(() => { try { return normalizedUrl ? new URL(normalizedUrl).hostname : ''; } catch { return ''; } }, [normalizedUrl]); /* ---------------- actions ---------------- */ async function autoDetectMaxFromSitemap() { setError(null); setAutoMaxLoading(true); try { if (!isValidUrl) throw new Error('Enter a valid website URL first.'); const res = await fetch(`/api/sitemap?u=${encodeURIComponent(normalizedUrl)}`); if (!res.ok) throw new Error(`Sitemap probe failed (${res.status})`); const json = await res.json(); if (typeof json.count !== 'number' || json.count < 1) throw new Error('Sitemap found but contains no URLs.'); setMaxUrls(json.count); } catch (e: any) { setError(e?.message || 'Failed to detect Max from sitemap.'); } finally { setAutoMaxLoading(false); } } async function handleCrawl() { setError(null); setCrawlLoading(true); setReport(null); setSelectedIndex(null); try { if (!isValidUrl) throw new Error('Please enter a valid website URL (with or without https://).'); const max = typeof maxUrls === 'number' && maxUrls > 0 ? maxUrls : 50; const apiUrl = `${apiBase}?url=${encodeURIComponent(normalizedUrl)}&max=${max}`; const res = await fetch(apiUrl); if (!res.ok) throw new Error(`Crawler API error: ${res.status} ${res.statusText}`); const data = await res.json(); setReport(data); } catch (e: any) { setError(e?.message || 'Failed to crawl the site.'); } finally { setCrawlLoading(false); } } function downloadJson() { const rows = dataRows(report); if (!rows.length) return; const blob = new Blob([JSON.stringify(rows, null, 2)], { type: 'application/json' }); const url = URL.createObjectURL(blob); const a = document.createElement('a'); a.href = url; const host = startHost || 'report'; a.download = `crawlerx-report-${host}.json`; document.body.appendChild(a); a.click(); a.remove(); URL.revokeObjectURL(url); } function exportCSV() { const rows = filteredRows; if (!rows.length) return; const cols = visibleCols.length ? visibleCols : defaultCols; const csvEscape = (v: any) => { if (v == null) return ''; const s = String(v); // NOTE: keep this regex on one line! return /[",\n]/.test(s) ? `"${s.replace(/"/g, '""')}"` : s; }; const header = cols.join(','); const lines = rows.map((r) => cols.map((c) => csvEscape(r[c])).join(',')); const csv = [header, ...lines].join('\n'); const blob = new Blob([csv], { type: 'text/csv;charset=utf-8' }); const a = document.createElement('a'); a.href = URL.createObjectURL(blob); a.download = 'crawl-report.csv'; a.click(); } /* ---------------- data shaping ---------------- */ const rows = useMemo(() => dataRows(report), [report]); // establish columns from data sample const allColumns = useMemo(() => { const sample = rows.slice(0, 40); const set = new Set(); sample.forEach((r) => Object.keys(r).forEach((k) => set.add(k))); return Array.from(set); }, [rows]); const defaultCols = useMemo(() => PRESets['Internal'].columns, []); // initialize visible cols on first load useEffect(() => { if (!rows.length) return; if (!visibleCols.length) setVisibleCols(PRESets[view]?.columns ?? defaultCols); // eslint-disable-next-line react-hooks/exhaustive-deps }, [rows.length]); // recompute on view change useEffect(() => { setVisibleCols(PRESets[view]?.columns ?? defaultCols); }, [view]); const filteredRows = useMemo(() => { let base = [...rows]; // view-scoped filtering (Internal/External) if (view === 'Internal' && startHost) { base = base.filter((r) => hostOf(r.url) === startHost); } else if (view === 'External' && startHost) { base = base.filter((r) => r.url && hostOf(r.url) !== startHost); } // Response Codes tab: only include rows with status if (view === 'Response Codes') { base = base.filter((r) => typeof r.status === 'number'); } // Text tabs could keep everything; columns drive the UI // text search across url/title/desc/h1 const q = query.trim().toLowerCase(); if (q) { base = base.filter((r) => [r.url, r.title, r.meta_description, r.h1_1, r.h2_1] .map((v) => String(v || '').toLowerCase()) .some((s) => s.includes(q)) ); } return base; }, [rows, query, view, startHost]); const counts = useMemo(() => makeCounts(rows, startHost), [rows, startHost]); const selected = selectedIndex != null ? filteredRows[selectedIndex] : null; /* ---------------- render ---------------- */ return (
{/* Header / Controls */}

CrawlerX — Crawl & Report

{/* URL + Max bar */}
setSiteUrl(e.target.value)} placeholder="https://example.com" className="w-full rounded-lg border border-gray-300 px-3 py-2 focus:outline-none focus:ring-4 focus:ring-blue-100 focus:border-blue-500" />
setMaxUrls(e.target.value ? Number(e.target.value) : '')} placeholder="e.g. 50" className="w-36 rounded-lg border border-gray-300 px-3 py-2 focus:outline-none focus:ring-4 focus:ring-blue-100 focus:border-blue-500" />
setQuery(e.target.value)} placeholder="Filter rows (url, title, h1, description)…" className="w-64 rounded-lg border border-gray-300 px-3 py-2 focus:outline-none focus:ring-4 focus:ring-blue-100 focus:border-blue-500" />
{error &&
{error}
} {/* Main layout */}
{/* Sidebar (views) */} {/* Content */}
{/* Summary cards */} {/* Column toggles */} {/* Table */}
{visibleCols.map((c) => ( ))} {filteredRows.map((r, i) => ( setSelectedIndex(i)} className={`cursor-pointer ${i % 2 ? 'bg-gray-50' : 'bg-white'} ${selectedIndex === i ? 'ring-1 ring-blue-500' : ''}`}> {visibleCols.map((c) => ( ))} ))}
{c}
{renderCell(r[c])}
{/* Details panel */}
URL Details
{selected ? :
Select a row to see full details (headers, H1/H2, robots, schema, links, timings…)
}
); } /* ---------------- components ---------------- */ function SummaryBar({ counts, total }: { counts: ReturnType; total: number }) { const items = [ { label: 'Pages crawled', value: total }, { label: '2xx', value: counts.codes['2xx'] }, { label: '3xx', value: counts.codes['3xx'] }, { label: '4xx', value: counts.codes['4xx'] }, { label: '5xx', value: counts.codes['5xx'] }, { label: 'Noindex', value: counts.noindex }, { label: 'Nofollow', value: counts.nofollow }, { label: 'Duplicate titles', value: counts.dupTitles }, { label: 'Duplicate desc', value: counts.dupDesc }, ]; return (
{items.map((c) => ( {c.value ?? 0} {c.label} ))}
); } function ColumnPicker({ allColumns, preset, visible, setVisible }: { allColumns: string[]; preset: string[]; visible: string[]; setVisible: (v: string[]) => void; }) { const [open, setOpen] = useState(true); const cols = useMemo(() => Array.from(new Set([...preset, ...visible, ...allColumns])), [preset, visible, allColumns]); const toggle = (key: string) => { setVisible(visible.includes(key) ? visible.filter((c) => c !== key) : [...visible, key]); }; return (
Columns
{open && (
{cols.map((c) => ( ))}
)}
); } function DetailGrid({ row }: { row: Record }) { const entries = Object.entries(row); return (
{entries.map(([k, v]) => (
{k}
{renderCell(v)}
))}
); } function renderCell(value: any) { if (value == null) return ; if (typeof value === 'string') { if (/^https?:\/\//i.test(value)) { return ( {value} ); } return {value.length > 220 ? value.slice(0, 220) + '…' : value}; } if (typeof value === 'number' || typeof value === 'boolean') return {String(value)}; if (Array.isArray(value)) return [{value.length} items]; return (
object
{JSON.stringify(value, null, 2)}
); } /* ---------------- helpers ---------------- */ const VIEWS = { Internal: 'Internal', External: 'External', 'Response Codes': 'Response Codes', 'Page Titles': 'Page Titles', 'Meta Description': 'Meta Description', H1: 'H1', H2: 'H2', Links: 'Links', Issues: 'Issues', Performance: 'Performance', Render: 'Render', } as const; const PRESets: Record = { Internal: { columns: ['url', 'status', 'content_type', 'title', 'meta_description', 'h1_1', 'inlinks', 'outlinks'], }, External: { columns: ['url', 'status', 'content_type', 'title', 'meta_description'], }, 'Response Codes': { columns: ['url', 'status', 'status_text', 'last_modified', 'set_cookie'], }, 'Page Titles': { columns: ['url', 'title', 'title_length', 'title_pixel_width', 'duplicate_title_exact', 'nearest_title_similarity', 'nearest_title_url'], }, 'Meta Description': { columns: ['url', 'meta_description', 'meta_description_length', 'meta_description_pixel_width', 'duplicate_description_exact', 'nearest_description_similarity', 'nearest_description_url'], }, H1: { columns: ['url', 'h1_1', 'h1_1_length', 'h1_1_pixel_width', 'h1_2'] }, H2: { columns: ['url', 'h2_1', 'h2_2'] }, Links: { columns: ['url', 'inlinks', 'outlinks', 'nearest_title_url', 'nearest_description_url'] }, Issues: { columns: ['url', 'noindex', 'nofollow', 'robots_meta', 'x_robots_tag', 'canonical', 'duplicate_title_exact', 'duplicate_description_exact'] }, Performance: { columns: ['url', 'time_ms', 'bytes', 'word_count', 'flesch_reading_ease', 'flesch_kincaid_grade', 'gunning_fog'] }, Render: { columns: ['url', 'render_mode', 'content_type', 'http_version', 'lang', 'crawl_timestamp'] }, }; function dataRows(report: any): any[] { const data = Array.isArray(report) ? report : Array.isArray(report?.results) ? report.results : null; return Array.isArray(data) ? data : []; } function hostOf(u?: string) { try { return u ? new URL(u).hostname : ''; } catch { return ''; } } function makeCounts(rows: any[], startHost: string) { const codes: Record<'2xx' | '3xx' | '4xx' | '5xx', number> = { '2xx': 0, '3xx': 0, '4xx': 0, '5xx': 0 }; let noindex = 0, nofollow = 0, dupTitles = 0, dupDesc = 0, internal = 0, external = 0; for (const r of rows) { const s = r.status as number | null; if (typeof s === 'number') { if (s >= 200 && s < 300) codes['2xx']++; else if (s >= 300 && s < 400) codes['3xx']++; else if (s >= 400 && s < 500) codes['4xx']++; else if (s >= 500) codes['5xx']++; } if (r.noindex) noindex++; if (r.nofollow) nofollow++; if (r.duplicate_title_exact === 'yes') dupTitles++; if (r.duplicate_description_exact === 'yes') dupDesc++; const host = hostOf(r.url); if (startHost) { if (host === startHost) internal++; else external++; } } return { codes, noindex, nofollow, dupTitles, dupDesc, internal, external }; } function badgeCount(key: keyof typeof VIEWS, counts: ReturnType) { switch (key) { case 'Internal': return counts.internal ?? 0; case 'External': return counts.external ?? 0; case 'Response Codes': return Object.values(counts.codes).reduce((a, b) => a + b, 0); default: return ''; } }