480 lines
20 KiB
TypeScript
480 lines
20 KiB
TypeScript
// app/(defaults)/crawl/page.tsx
|
||
'use client';
|
||
|
||
import React, { useEffect, useMemo, useState } from 'react';
|
||
|
||
/**
|
||
* Screaming-Frog style UI (tabs + summary + big table + details panel)
|
||
* - Left sidebar: views (Internal, External, Response Codes, Page Titles, Meta Description, H1, H2, Links, Issues, Performance, Render)
|
||
* - Top toolbar: search, export (JSON/CSV), column visibility quick toggles
|
||
* - Main table: sticky header, virtualish rendering by slice, clickable row selects into Details panel
|
||
* - Details panel (bottom): key/value for the selected URL
|
||
*
|
||
* Assumptions:
|
||
* - API GET https://api.crawlerx.co/crawl?url=...&max=... returns { ok, results: Row[], ... }
|
||
* - Row shape is the one produced by crawler.js in this project
|
||
*/
|
||
|
||
export default function CrawlPage() {
|
||
const [siteUrl, setSiteUrl] = useState('');
|
||
const [maxUrls, setMaxUrls] = useState<number | ''>('');
|
||
const [autoMaxLoading, setAutoMaxLoading] = useState(false);
|
||
const [crawlLoading, setCrawlLoading] = useState(false);
|
||
const [error, setError] = useState<string | null>(null);
|
||
const [report, setReport] = useState<any>(null);
|
||
const [query, setQuery] = useState('');
|
||
const [view, setView] = useState<keyof typeof VIEWS>('Internal');
|
||
const [visibleCols, setVisibleCols] = useState<string[]>([]);
|
||
const [selectedIndex, setSelectedIndex] = useState<number | null>(null);
|
||
|
||
const apiBase = 'https://api.crawlerx.co/crawl';
|
||
|
||
/* ---------------- URL helpers ---------------- */
|
||
const isValidUrl = useMemo(() => {
|
||
try {
|
||
if (!siteUrl) return false;
|
||
const normalized = siteUrl.match(/^https?:\/\//i) ? siteUrl : `https://${siteUrl}`;
|
||
const u = new URL(normalized);
|
||
return !!u.hostname;
|
||
} catch {
|
||
return false;
|
||
}
|
||
}, [siteUrl]);
|
||
|
||
const normalizedUrl = useMemo(() => {
|
||
if (!siteUrl) return '';
|
||
return siteUrl.match(/^https?:\/\//i) ? siteUrl : `https://${siteUrl}`;
|
||
}, [siteUrl]);
|
||
|
||
const startHost = useMemo(() => {
|
||
try { return normalizedUrl ? new URL(normalizedUrl).hostname : ''; } catch { return ''; }
|
||
}, [normalizedUrl]);
|
||
|
||
/* ---------------- actions ---------------- */
|
||
async function autoDetectMaxFromSitemap() {
|
||
setError(null);
|
||
setAutoMaxLoading(true);
|
||
try {
|
||
if (!isValidUrl) throw new Error('Enter a valid website URL first.');
|
||
const res = await fetch(`/api/sitemap?u=${encodeURIComponent(normalizedUrl)}`);
|
||
if (!res.ok) throw new Error(`Sitemap probe failed (${res.status})`);
|
||
const json = await res.json();
|
||
if (typeof json.count !== 'number' || json.count < 1) throw new Error('Sitemap found but contains no URLs.');
|
||
setMaxUrls(json.count);
|
||
} catch (e: any) {
|
||
setError(e?.message || 'Failed to detect Max from sitemap.');
|
||
} finally {
|
||
setAutoMaxLoading(false);
|
||
}
|
||
}
|
||
|
||
async function handleCrawl() {
|
||
setError(null);
|
||
setCrawlLoading(true);
|
||
setReport(null);
|
||
setSelectedIndex(null);
|
||
|
||
try {
|
||
if (!isValidUrl) throw new Error('Please enter a valid website URL (with or without https://).');
|
||
const max = typeof maxUrls === 'number' && maxUrls > 0 ? maxUrls : 50;
|
||
const apiUrl = `${apiBase}?url=${encodeURIComponent(normalizedUrl)}&max=${max}`;
|
||
const res = await fetch(apiUrl);
|
||
if (!res.ok) throw new Error(`Crawler API error: ${res.status} ${res.statusText}`);
|
||
const data = await res.json();
|
||
setReport(data);
|
||
} catch (e: any) {
|
||
setError(e?.message || 'Failed to crawl the site.');
|
||
} finally {
|
||
setCrawlLoading(false);
|
||
}
|
||
}
|
||
|
||
function downloadJson() {
|
||
const rows = dataRows(report);
|
||
if (!rows.length) return;
|
||
const blob = new Blob([JSON.stringify(rows, null, 2)], { type: 'application/json' });
|
||
const url = URL.createObjectURL(blob);
|
||
const a = document.createElement('a');
|
||
a.href = url;
|
||
const host = startHost || 'report';
|
||
a.download = `crawlerx-report-${host}.json`;
|
||
document.body.appendChild(a);
|
||
a.click();
|
||
a.remove();
|
||
URL.revokeObjectURL(url);
|
||
}
|
||
|
||
function exportCSV() {
|
||
const rows = filteredRows;
|
||
if (!rows.length) return;
|
||
const cols = visibleCols.length ? visibleCols : defaultCols;
|
||
|
||
const csvEscape = (v: any) => {
|
||
if (v == null) return '';
|
||
const s = String(v);
|
||
// NOTE: keep this regex on one line!
|
||
return /[",\n]/.test(s) ? `"${s.replace(/"/g, '""')}"` : s;
|
||
};
|
||
|
||
const header = cols.join(',');
|
||
const lines = rows.map((r) => cols.map((c) => csvEscape(r[c])).join(','));
|
||
const csv = [header, ...lines].join('\n');
|
||
|
||
const blob = new Blob([csv], { type: 'text/csv;charset=utf-8' });
|
||
const a = document.createElement('a');
|
||
a.href = URL.createObjectURL(blob);
|
||
a.download = 'crawl-report.csv';
|
||
a.click();
|
||
}
|
||
|
||
/* ---------------- data shaping ---------------- */
|
||
const rows = useMemo(() => dataRows(report), [report]);
|
||
|
||
// establish columns from data sample
|
||
const allColumns = useMemo(() => {
|
||
const sample = rows.slice(0, 40);
|
||
const set = new Set<string>();
|
||
sample.forEach((r) => Object.keys(r).forEach((k) => set.add(k)));
|
||
return Array.from(set);
|
||
}, [rows]);
|
||
|
||
const defaultCols = useMemo(() => PRESets['Internal'].columns, []);
|
||
|
||
// initialize visible cols on first load
|
||
useEffect(() => {
|
||
if (!rows.length) return;
|
||
if (!visibleCols.length) setVisibleCols(PRESets[view]?.columns ?? defaultCols);
|
||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||
}, [rows.length]);
|
||
|
||
// recompute on view change
|
||
useEffect(() => {
|
||
setVisibleCols(PRESets[view]?.columns ?? defaultCols);
|
||
}, [view]);
|
||
|
||
const filteredRows = useMemo(() => {
|
||
let base = [...rows];
|
||
// view-scoped filtering (Internal/External)
|
||
if (view === 'Internal' && startHost) {
|
||
base = base.filter((r) => hostOf(r.url) === startHost);
|
||
} else if (view === 'External' && startHost) {
|
||
base = base.filter((r) => r.url && hostOf(r.url) !== startHost);
|
||
}
|
||
// Response Codes tab: only include rows with status
|
||
if (view === 'Response Codes') {
|
||
base = base.filter((r) => typeof r.status === 'number');
|
||
}
|
||
// Text tabs could keep everything; columns drive the UI
|
||
|
||
// text search across url/title/desc/h1
|
||
const q = query.trim().toLowerCase();
|
||
if (q) {
|
||
base = base.filter((r) =>
|
||
[r.url, r.title, r.meta_description, r.h1_1, r.h2_1]
|
||
.map((v) => String(v || '').toLowerCase())
|
||
.some((s) => s.includes(q))
|
||
);
|
||
}
|
||
return base;
|
||
}, [rows, query, view, startHost]);
|
||
|
||
const counts = useMemo(() => makeCounts(rows, startHost), [rows, startHost]);
|
||
|
||
const selected = selectedIndex != null ? filteredRows[selectedIndex] : null;
|
||
|
||
/* ---------------- render ---------------- */
|
||
return (
|
||
<div className="min-h-screen bg-white">
|
||
<div className="mx-auto max-w-[1400px] px-4 py-6">
|
||
{/* Header / Controls */}
|
||
<div className="mb-3 flex items-center justify-between gap-2">
|
||
<h1 className="text-2xl font-semibold text-gray-900">CrawlerX — Crawl & Report</h1>
|
||
<div className="flex items-center gap-2">
|
||
<button onClick={downloadJson} className="rounded-lg border border-gray-300 bg-white px-3 py-2 text-sm hover:bg-gray-50">Export JSON</button>
|
||
<button onClick={exportCSV} className="rounded-lg border border-gray-300 bg-white px-3 py-2 text-sm hover:bg-gray-50">Export CSV</button>
|
||
<button onClick={handleCrawl} disabled={!isValidUrl || crawlLoading} className="rounded-lg bg-green-600 px-3 py-2 text-white text-sm hover:bg-green-700 disabled:opacity-50">{crawlLoading ? 'Crawling…' : 'Run Crawl'}</button>
|
||
</div>
|
||
</div>
|
||
|
||
{/* URL + Max bar */}
|
||
<div className="grid gap-3 sm:grid-cols-[1fr_auto_auto] items-end bg-gray-50 p-3 rounded-xl border border-gray-200">
|
||
<div className="flex flex-col">
|
||
<label className="text-xs font-medium text-gray-700 mb-1">Website URL</label>
|
||
<input type="url" value={siteUrl} onChange={(e) => setSiteUrl(e.target.value)} placeholder="https://example.com" className="w-full rounded-lg border border-gray-300 px-3 py-2 focus:outline-none focus:ring-4 focus:ring-blue-100 focus:border-blue-500" />
|
||
</div>
|
||
<div className="flex flex-col">
|
||
<label className="text-xs font-medium text-gray-700 mb-1 flex items-center gap-2">Max URLs
|
||
<button type="button" onClick={autoDetectMaxFromSitemap} disabled={!isValidUrl || autoMaxLoading} className="text-[11px] rounded-md px-2 py-1 border border-gray-300 hover:bg-white disabled:opacity-50">{autoMaxLoading ? 'Detecting…' : 'Auto‑detect'}</button>
|
||
</label>
|
||
<input type="number" min={1} value={maxUrls} onChange={(e) => setMaxUrls(e.target.value ? Number(e.target.value) : '')} placeholder="e.g. 50" className="w-36 rounded-lg border border-gray-300 px-3 py-2 focus:outline-none focus:ring-4 focus:ring-blue-100 focus:border-blue-500" />
|
||
</div>
|
||
<div className="flex flex-col">
|
||
<label className="text-xs font-medium text-gray-700 mb-1">Search</label>
|
||
<input type="search" value={query} onChange={(e) => setQuery(e.target.value)} placeholder="Filter rows (url, title, h1, description)…" className="w-64 rounded-lg border border-gray-300 px-3 py-2 focus:outline-none focus:ring-4 focus:ring-blue-100 focus:border-blue-500" />
|
||
</div>
|
||
</div>
|
||
|
||
{error && <div className="mt-3 rounded-lg border border-red-200 bg-red-50 px-4 py-2 text-sm text-red-700">{error}</div>}
|
||
|
||
{/* Main layout */}
|
||
<div className="mt-4 grid grid-cols-12 gap-4">
|
||
{/* Sidebar (views) */}
|
||
<aside className="col-span-12 md:col-span-3 lg:col-span-2">
|
||
<nav className="sticky top-4 space-y-1">
|
||
{Object.entries(VIEWS).map(([key, label]) => (
|
||
<button
|
||
key={key}
|
||
onClick={() => setView(key as keyof typeof VIEWS)}
|
||
className={`w-full text-left px-3 py-2 rounded-md border ${view === key ? 'bg-blue-600 text-white border-blue-600' : 'bg-white text-gray-800 border-gray-200 hover:bg-gray-50'}`}
|
||
>
|
||
<div className="flex items-center justify-between">
|
||
<span>{label}</span>
|
||
<span className="text-xs opacity-70">{badgeCount(key as keyof typeof VIEWS, counts)}</span>
|
||
</div>
|
||
</button>
|
||
))}
|
||
</nav>
|
||
</aside>
|
||
|
||
{/* Content */}
|
||
<section className="col-span-12 md:col-span-9 lg:col-span-10">
|
||
{/* Summary cards */}
|
||
<SummaryBar counts={counts} total={rows.length} />
|
||
|
||
{/* Column toggles */}
|
||
<ColumnPicker
|
||
allColumns={allColumns}
|
||
preset={PRESets[view]?.columns ?? defaultCols}
|
||
visible={visibleCols}
|
||
setVisible={setVisibleCols}
|
||
/>
|
||
|
||
{/* Table */}
|
||
<div className="mt-2 overflow-auto rounded-xl border border-gray-200" style={{ maxHeight: '60vh' }}>
|
||
<table className="min-w-full text-xs">
|
||
<thead className="sticky top-0 bg-gray-50 z-10 border-b">
|
||
<tr>
|
||
{visibleCols.map((c) => (
|
||
<th key={c} className="px-3 py-2 font-semibold text-gray-700 whitespace-nowrap">{c}</th>
|
||
))}
|
||
</tr>
|
||
</thead>
|
||
<tbody>
|
||
{filteredRows.map((r, i) => (
|
||
<tr key={i} onClick={() => setSelectedIndex(i)} className={`cursor-pointer ${i % 2 ? 'bg-gray-50' : 'bg-white'} ${selectedIndex === i ? 'ring-1 ring-blue-500' : ''}`}>
|
||
{visibleCols.map((c) => (
|
||
<td key={c} className="px-3 py-2 align-top text-gray-800 max-w-[36rem]">
|
||
{renderCell(r[c])}
|
||
</td>
|
||
))}
|
||
</tr>
|
||
))}
|
||
</tbody>
|
||
</table>
|
||
</div>
|
||
|
||
{/* Details panel */}
|
||
<div className="mt-3 rounded-xl border border-gray-200">
|
||
<header className="px-3 py-2 border-b bg-gray-50 text-sm font-medium text-gray-800">URL Details</header>
|
||
<div className="p-3 text-sm">
|
||
{selected ? <DetailGrid row={selected} /> : <div className="text-gray-500">Select a row to see full details (headers, H1/H2, robots, schema, links, timings…)</div>}
|
||
</div>
|
||
</div>
|
||
</section>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
);
|
||
}
|
||
|
||
/* ---------------- components ---------------- */
|
||
function SummaryBar({ counts, total }: { counts: ReturnType<typeof makeCounts>; total: number }) {
|
||
const items = [
|
||
{ label: 'Pages crawled', value: total },
|
||
{ label: '2xx', value: counts.codes['2xx'] },
|
||
{ label: '3xx', value: counts.codes['3xx'] },
|
||
{ label: '4xx', value: counts.codes['4xx'] },
|
||
{ label: '5xx', value: counts.codes['5xx'] },
|
||
{ label: 'Noindex', value: counts.noindex },
|
||
{ label: 'Nofollow', value: counts.nofollow },
|
||
{ label: 'Duplicate titles', value: counts.dupTitles },
|
||
{ label: 'Duplicate desc', value: counts.dupDesc },
|
||
];
|
||
return (
|
||
<div className="mb-3 flex flex-wrap gap-2">
|
||
{items.map((c) => (
|
||
<span key={c.label} className="inline-flex items-center gap-2 rounded-full border border-gray-200 bg-gray-50 px-3 py-1 text-xs text-gray-700">
|
||
<span className="font-semibold">{c.value ?? 0}</span>
|
||
<span className="text-gray-500">{c.label}</span>
|
||
</span>
|
||
))}
|
||
</div>
|
||
);
|
||
}
|
||
|
||
function ColumnPicker({ allColumns, preset, visible, setVisible }: { allColumns: string[]; preset: string[]; visible: string[]; setVisible: (v: string[]) => void; }) {
|
||
const [open, setOpen] = useState(true);
|
||
const cols = useMemo(() => Array.from(new Set([...preset, ...visible, ...allColumns])), [preset, visible, allColumns]);
|
||
const toggle = (key: string) => {
|
||
setVisible(visible.includes(key) ? visible.filter((c) => c !== key) : [...visible, key]);
|
||
};
|
||
return (
|
||
<div className="rounded-xl border border-gray-200 bg-white">
|
||
<div className="flex items-center justify-between px-3 py-2">
|
||
<div className="text-sm font-medium text-gray-800">Columns</div>
|
||
<div className="flex gap-2">
|
||
<button className="text-xs rounded-md border px-2 py-1" onClick={() => setVisible(preset)}>Preset</button>
|
||
<button className="text-xs rounded-md border px-2 py-1" onClick={() => setVisible(cols)}>All</button>
|
||
<button className="text-xs rounded-md border px-2 py-1" onClick={() => setVisible(preset.slice(0, 6))}>Minimal</button>
|
||
<button className="text-xs rounded-md border px-2 py-1" onClick={() => setOpen((o) => !o)}>{open ? 'Hide' : 'Show'}</button>
|
||
</div>
|
||
</div>
|
||
{open && (
|
||
<div className="px-3 pb-2 grid grid-cols-2 md:grid-cols-3 lg:grid-cols-4 gap-2 max-h-40 overflow-auto">
|
||
{cols.map((c) => (
|
||
<label key={c} className="flex gap-2 text-xs items-center">
|
||
<input type="checkbox" className="rounded border-gray-300" checked={visible.includes(c)} onChange={() => toggle(c)} />
|
||
<span className="truncate" title={c}>{c}</span>
|
||
</label>
|
||
))}
|
||
</div>
|
||
)}
|
||
</div>
|
||
);
|
||
}
|
||
|
||
function DetailGrid({ row }: { row: Record<string, any> }) {
|
||
const entries = Object.entries(row);
|
||
return (
|
||
<div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-3">
|
||
{entries.map(([k, v]) => (
|
||
<div key={k} className="rounded-lg border border-gray-200 p-2">
|
||
<div className="text-[11px] uppercase tracking-wide text-gray-500">{k}</div>
|
||
<div className="mt-1 text-[13px] text-gray-900 break-words">{renderCell(v)}</div>
|
||
</div>
|
||
))}
|
||
</div>
|
||
);
|
||
}
|
||
|
||
function renderCell(value: any) {
|
||
if (value == null) return <span className="text-gray-400">—</span>;
|
||
if (typeof value === 'string') {
|
||
if (/^https?:\/\//i.test(value)) {
|
||
return (
|
||
<a href={value} target="_blank" rel="noreferrer" className="text-blue-600 hover:underline break-all">
|
||
{value}
|
||
</a>
|
||
);
|
||
}
|
||
return <span className="break-words">{value.length > 220 ? value.slice(0, 220) + '…' : value}</span>;
|
||
}
|
||
if (typeof value === 'number' || typeof value === 'boolean') return <span>{String(value)}</span>;
|
||
if (Array.isArray(value)) return <span className="text-gray-700">[{value.length} items]</span>;
|
||
return (
|
||
<details>
|
||
<summary className="cursor-pointer text-gray-700">object</summary>
|
||
<pre className="mt-1 whitespace-pre-wrap break-words bg-gray-100 rounded-lg p-2 text-[11px]">{JSON.stringify(value, null, 2)}</pre>
|
||
</details>
|
||
);
|
||
}
|
||
|
||
/* ---------------- helpers ---------------- */
|
||
const VIEWS = {
|
||
Internal: 'Internal',
|
||
External: 'External',
|
||
'Response Codes': 'Response Codes',
|
||
'Page Titles': 'Page Titles',
|
||
'Meta Description': 'Meta Description',
|
||
H1: 'H1',
|
||
H2: 'H2',
|
||
Links: 'Links',
|
||
Issues: 'Issues',
|
||
Performance: 'Performance',
|
||
Render: 'Render',
|
||
} as const;
|
||
|
||
const PRESets: Record<string, { columns: string[] }> = {
|
||
Internal: {
|
||
columns: ['url', 'status', 'content_type', 'title', 'meta_description', 'h1_1', 'inlinks', 'outlinks'],
|
||
},
|
||
External: {
|
||
columns: ['url', 'status', 'content_type', 'title', 'meta_description'],
|
||
},
|
||
'Response Codes': {
|
||
columns: ['url', 'status', 'status_text', 'last_modified', 'set_cookie'],
|
||
},
|
||
'Page Titles': {
|
||
columns: ['url', 'title', 'title_length', 'title_pixel_width', 'duplicate_title_exact', 'nearest_title_similarity', 'nearest_title_url'],
|
||
},
|
||
'Meta Description': {
|
||
columns: ['url', 'meta_description', 'meta_description_length', 'meta_description_pixel_width', 'duplicate_description_exact', 'nearest_description_similarity', 'nearest_description_url'],
|
||
},
|
||
H1: { columns: ['url', 'h1_1', 'h1_1_length', 'h1_1_pixel_width', 'h1_2'] },
|
||
H2: { columns: ['url', 'h2_1', 'h2_2'] },
|
||
Links: { columns: ['url', 'inlinks', 'outlinks', 'nearest_title_url', 'nearest_description_url'] },
|
||
Issues: { columns: ['url', 'noindex', 'nofollow', 'robots_meta', 'x_robots_tag', 'canonical', 'duplicate_title_exact', 'duplicate_description_exact'] },
|
||
Performance: { columns: ['url', 'time_ms', 'bytes', 'word_count', 'flesch_reading_ease', 'flesch_kincaid_grade', 'gunning_fog'] },
|
||
Render: { columns: ['url', 'render_mode', 'content_type', 'http_version', 'lang', 'crawl_timestamp'] },
|
||
};
|
||
|
||
function dataRows(report: any): any[] {
|
||
const data = Array.isArray(report)
|
||
? report
|
||
: Array.isArray(report?.results)
|
||
? report.results
|
||
: null;
|
||
return Array.isArray(data) ? data : [];
|
||
}
|
||
|
||
function hostOf(u?: string) {
|
||
try {
|
||
return u ? new URL(u).hostname : '';
|
||
} catch {
|
||
return '';
|
||
}
|
||
}
|
||
|
||
function makeCounts(rows: any[], startHost: string) {
|
||
const codes: Record<'2xx' | '3xx' | '4xx' | '5xx', number> = { '2xx': 0, '3xx': 0, '4xx': 0, '5xx': 0 };
|
||
let noindex = 0,
|
||
nofollow = 0,
|
||
dupTitles = 0,
|
||
dupDesc = 0,
|
||
internal = 0,
|
||
external = 0;
|
||
|
||
for (const r of rows) {
|
||
const s = r.status as number | null;
|
||
if (typeof s === 'number') {
|
||
if (s >= 200 && s < 300) codes['2xx']++;
|
||
else if (s >= 300 && s < 400) codes['3xx']++;
|
||
else if (s >= 400 && s < 500) codes['4xx']++;
|
||
else if (s >= 500) codes['5xx']++;
|
||
}
|
||
if (r.noindex) noindex++;
|
||
if (r.nofollow) nofollow++;
|
||
if (r.duplicate_title_exact === 'yes') dupTitles++;
|
||
if (r.duplicate_description_exact === 'yes') dupDesc++;
|
||
const host = hostOf(r.url);
|
||
if (startHost) {
|
||
if (host === startHost) internal++;
|
||
else external++;
|
||
}
|
||
}
|
||
return { codes, noindex, nofollow, dupTitles, dupDesc, internal, external };
|
||
}
|
||
|
||
function badgeCount(key: keyof typeof VIEWS, counts: ReturnType<typeof makeCounts>) {
|
||
switch (key) {
|
||
case 'Internal':
|
||
return counts.internal ?? 0;
|
||
case 'External':
|
||
return counts.external ?? 0;
|
||
case 'Response Codes':
|
||
return Object.values(counts.codes).reduce((a, b) => a + b, 0);
|
||
default:
|
||
return '';
|
||
}
|
||
}
|