493 lines
20 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// app/(defaults)/crawl/page.tsx
'use client';
import { useRouter } from 'next/navigation';
import React, { useEffect, useMemo, useState } from 'react';
/**
* Screaming-Frog style UI (tabs + summary + big table + details panel)
* - Left sidebar: views (Internal, External, Response Codes, Page Titles, Meta Description, H1, H2, Links, Issues, Performance, Render)
* - Top toolbar: search, export (JSON/CSV), column visibility quick toggles
* - Main table: sticky header, virtualish rendering by slice, clickable row selects into Details panel
* - Details panel (bottom): key/value for the selected URL
*
* Assumptions:
* - API GET https://api.crawlerx.co/crawl?url=...&max=... returns { ok, results: Row[], ... }
* - Row shape is the one produced by crawler.js in this project
*/
export default function CrawlPage() {
const [siteUrl, setSiteUrl] = useState('');
const [maxUrls, setMaxUrls] = useState<number | ''>('');
const [autoMaxLoading, setAutoMaxLoading] = useState(false);
const [crawlLoading, setCrawlLoading] = useState(false);
const [error, setError] = useState<string | null>(null);
const [report, setReport] = useState<any>(null);
const [query, setQuery] = useState('');
const [view, setView] = useState<keyof typeof VIEWS>('Internal');
const [visibleCols, setVisibleCols] = useState<string[]>([]);
const [selectedIndex, setSelectedIndex] = useState<number | null>(null);
const apiBase = 'https://api.crawlerx.co/crawl';
const router = useRouter();
useEffect(() => {
const token = localStorage.getItem("token");
if (!token) {
// If no token, redirect to login page
router.push("/login");
}
}, [router]);
/* ---------------- URL helpers ---------------- */
const isValidUrl = useMemo(() => {
try {
if (!siteUrl) return false;
const normalized = siteUrl.match(/^https?:\/\//i) ? siteUrl : `https://${siteUrl}`;
const u = new URL(normalized);
return !!u.hostname;
} catch {
return false;
}
}, [siteUrl]);
const normalizedUrl = useMemo(() => {
if (!siteUrl) return '';
return siteUrl.match(/^https?:\/\//i) ? siteUrl : `https://${siteUrl}`;
}, [siteUrl]);
const startHost = useMemo(() => {
try { return normalizedUrl ? new URL(normalizedUrl).hostname : ''; } catch { return ''; }
}, [normalizedUrl]);
/* ---------------- actions ---------------- */
async function autoDetectMaxFromSitemap() {
setError(null);
setAutoMaxLoading(true);
try {
if (!isValidUrl) throw new Error('Enter a valid website URL first.');
const res = await fetch(`/api/sitemap?u=${encodeURIComponent(normalizedUrl)}`);
if (!res.ok) throw new Error(`Sitemap probe failed (${res.status})`);
const json = await res.json();
if (typeof json.count !== 'number' || json.count < 1) throw new Error('Sitemap found but contains no URLs.');
setMaxUrls(json.count);
} catch (e: any) {
setError(e?.message || 'Failed to detect Max from sitemap.');
} finally {
setAutoMaxLoading(false);
}
}
async function handleCrawl() {
setError(null);
setCrawlLoading(true);
setReport(null);
setSelectedIndex(null);
try {
if (!isValidUrl) throw new Error('Please enter a valid website URL (with or without https://).');
const max = typeof maxUrls === 'number' && maxUrls > 0 ? maxUrls : 50;
const apiUrl = `${apiBase}?url=${encodeURIComponent(normalizedUrl)}&max=${max}`;
const res = await fetch(apiUrl);
if (!res.ok) throw new Error(`Crawler API error: ${res.status} ${res.statusText}`);
const data = await res.json();
setReport(data);
} catch (e: any) {
setError(e?.message || 'Failed to crawl the site.');
} finally {
setCrawlLoading(false);
}
}
function downloadJson() {
const rows = dataRows(report);
if (!rows.length) return;
const blob = new Blob([JSON.stringify(rows, null, 2)], { type: 'application/json' });
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
const host = startHost || 'report';
a.download = `crawlerx-report-${host}.json`;
document.body.appendChild(a);
a.click();
a.remove();
URL.revokeObjectURL(url);
}
function exportCSV() {
const rows = filteredRows;
if (!rows.length) return;
const cols = visibleCols.length ? visibleCols : defaultCols;
const csvEscape = (v: any) => {
if (v == null) return '';
const s = String(v);
// NOTE: keep this regex on one line!
return /[",\n]/.test(s) ? `"${s.replace(/"/g, '""')}"` : s;
};
const header = cols.join(',');
const lines = rows.map((r) => cols.map((c) => csvEscape(r[c])).join(','));
const csv = [header, ...lines].join('\n');
const blob = new Blob([csv], { type: 'text/csv;charset=utf-8' });
const a = document.createElement('a');
a.href = URL.createObjectURL(blob);
a.download = 'crawl-report.csv';
a.click();
}
/* ---------------- data shaping ---------------- */
const rows = useMemo(() => dataRows(report), [report]);
// establish columns from data sample
const allColumns = useMemo(() => {
const sample = rows.slice(0, 40);
const set = new Set<string>();
sample.forEach((r) => Object.keys(r).forEach((k) => set.add(k)));
return Array.from(set);
}, [rows]);
const defaultCols = useMemo(() => PRESets['Internal'].columns, []);
// initialize visible cols on first load
useEffect(() => {
if (!rows.length) return;
if (!visibleCols.length) setVisibleCols(PRESets[view]?.columns ?? defaultCols);
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [rows.length]);
// recompute on view change
useEffect(() => {
setVisibleCols(PRESets[view]?.columns ?? defaultCols);
}, [view]);
const filteredRows = useMemo(() => {
let base = [...rows];
// view-scoped filtering (Internal/External)
if (view === 'Internal' && startHost) {
base = base.filter((r) => hostOf(r.url) === startHost);
} else if (view === 'External' && startHost) {
base = base.filter((r) => r.url && hostOf(r.url) !== startHost);
}
// Response Codes tab: only include rows with status
if (view === 'Response Codes') {
base = base.filter((r) => typeof r.status === 'number');
}
// Text tabs could keep everything; columns drive the UI
// text search across url/title/desc/h1
const q = query.trim().toLowerCase();
if (q) {
base = base.filter((r) =>
[r.url, r.title, r.meta_description, r.h1_1, r.h2_1]
.map((v) => String(v || '').toLowerCase())
.some((s) => s.includes(q))
);
}
return base;
}, [rows, query, view, startHost]);
const counts = useMemo(() => makeCounts(rows, startHost), [rows, startHost]);
const selected = selectedIndex != null ? filteredRows[selectedIndex] : null;
/* ---------------- render ---------------- */
return (
<div className="min-h-screen bg-white">
<div className="mx-auto max-w-[1400px] px-4 py-6">
{/* Header / Controls */}
<div className="mb-3 flex items-center justify-between gap-2">
<h1 className="text-2xl font-semibold text-gray-900">CrawlerX Crawl & Report</h1>
<div className="flex items-center gap-2">
<button onClick={downloadJson} className="rounded-lg border border-gray-300 bg-white px-3 py-2 text-sm hover:bg-gray-50">Export JSON</button>
<button onClick={exportCSV} className="rounded-lg border border-gray-300 bg-white px-3 py-2 text-sm hover:bg-gray-50">Export CSV</button>
<button onClick={handleCrawl} disabled={!isValidUrl || crawlLoading} className="rounded-lg bg-green-600 px-3 py-2 text-white text-sm hover:bg-green-700 disabled:opacity-50">{crawlLoading ? 'Crawling…' : 'Run Crawl'}</button>
</div>
</div>
{/* URL + Max bar */}
<div className="grid gap-3 sm:grid-cols-[1fr_auto_auto] items-end bg-gray-50 p-3 rounded-xl border border-gray-200">
<div className="flex flex-col">
<label className="text-xs font-medium text-gray-700 mb-1">Website URL</label>
<input type="url" value={siteUrl} onChange={(e) => setSiteUrl(e.target.value)} placeholder="https://example.com" className="w-full rounded-lg border border-gray-300 px-3 py-2 focus:outline-none focus:ring-4 focus:ring-blue-100 focus:border-blue-500" />
</div>
<div className="flex flex-col">
<label className="text-xs font-medium text-gray-700 mb-1 flex items-center gap-2">Max URLs
<button type="button" onClick={autoDetectMaxFromSitemap} disabled={!isValidUrl || autoMaxLoading} className="text-[11px] rounded-md px-2 py-1 border border-gray-300 hover:bg-white disabled:opacity-50">{autoMaxLoading ? 'Detecting…' : 'Autodetect'}</button>
</label>
<input type="number" min={1} value={maxUrls} onChange={(e) => setMaxUrls(e.target.value ? Number(e.target.value) : '')} placeholder="e.g. 50" className="w-36 rounded-lg border border-gray-300 px-3 py-2 focus:outline-none focus:ring-4 focus:ring-blue-100 focus:border-blue-500" />
</div>
<div className="flex flex-col">
<label className="text-xs font-medium text-gray-700 mb-1">Search</label>
<input type="search" value={query} onChange={(e) => setQuery(e.target.value)} placeholder="Filter rows (url, title, h1, description)…" className="w-64 rounded-lg border border-gray-300 px-3 py-2 focus:outline-none focus:ring-4 focus:ring-blue-100 focus:border-blue-500" />
</div>
</div>
{error && <div className="mt-3 rounded-lg border border-red-200 bg-red-50 px-4 py-2 text-sm text-red-700">{error}</div>}
{/* Main layout */}
<div className="mt-4 grid grid-cols-12 gap-4">
{/* Sidebar (views) */}
<aside className="col-span-12 md:col-span-3 lg:col-span-2">
<nav className="sticky top-4 space-y-1">
{Object.entries(VIEWS).map(([key, label]) => (
<button
key={key}
onClick={() => setView(key as keyof typeof VIEWS)}
className={`w-full text-left px-3 py-2 rounded-md border ${view === key ? 'bg-blue-600 text-white border-blue-600' : 'bg-white text-gray-800 border-gray-200 hover:bg-gray-50'}`}
>
<div className="flex items-center justify-between">
<span>{label}</span>
<span className="text-xs opacity-70">{badgeCount(key as keyof typeof VIEWS, counts)}</span>
</div>
</button>
))}
</nav>
</aside>
{/* Content */}
<section className="col-span-12 md:col-span-9 lg:col-span-10">
{/* Summary cards */}
<SummaryBar counts={counts} total={rows.length} />
{/* Column toggles */}
<ColumnPicker
allColumns={allColumns}
preset={PRESets[view]?.columns ?? defaultCols}
visible={visibleCols}
setVisible={setVisibleCols}
/>
{/* Table */}
<div className="mt-2 overflow-auto rounded-xl border border-gray-200" style={{ maxHeight: '60vh' }}>
<table className="min-w-full text-xs">
<thead className="sticky top-0 bg-gray-50 z-10 border-b">
<tr>
{visibleCols.map((c) => (
<th key={c} className="px-3 py-2 font-semibold text-gray-700 whitespace-nowrap">{c}</th>
))}
</tr>
</thead>
<tbody>
{filteredRows.map((r, i) => (
<tr key={i} onClick={() => setSelectedIndex(i)} className={`cursor-pointer ${i % 2 ? 'bg-gray-50' : 'bg-white'} ${selectedIndex === i ? 'ring-1 ring-blue-500' : ''}`}>
{visibleCols.map((c) => (
<td key={c} className="px-3 py-2 align-top text-gray-800 max-w-[36rem]">
{renderCell(r[c])}
</td>
))}
</tr>
))}
</tbody>
</table>
</div>
{/* Details panel */}
<div className="mt-3 rounded-xl border border-gray-200">
<header className="px-3 py-2 border-b bg-gray-50 text-sm font-medium text-gray-800">URL Details</header>
<div className="p-3 text-sm">
{selected ? <DetailGrid row={selected} /> : <div className="text-gray-500">Select a row to see full details (headers, H1/H2, robots, schema, links, timings)</div>}
</div>
</div>
</section>
</div>
</div>
</div>
);
}
/* ---------------- components ---------------- */
function SummaryBar({ counts, total }: { counts: ReturnType<typeof makeCounts>; total: number }) {
const items = [
{ label: 'Pages crawled', value: total },
{ label: '2xx', value: counts.codes['2xx'] },
{ label: '3xx', value: counts.codes['3xx'] },
{ label: '4xx', value: counts.codes['4xx'] },
{ label: '5xx', value: counts.codes['5xx'] },
{ label: 'Noindex', value: counts.noindex },
{ label: 'Nofollow', value: counts.nofollow },
{ label: 'Duplicate titles', value: counts.dupTitles },
{ label: 'Duplicate desc', value: counts.dupDesc },
];
return (
<div className="mb-3 flex flex-wrap gap-2">
{items.map((c) => (
<span key={c.label} className="inline-flex items-center gap-2 rounded-full border border-gray-200 bg-gray-50 px-3 py-1 text-xs text-gray-700">
<span className="font-semibold">{c.value ?? 0}</span>
<span className="text-gray-500">{c.label}</span>
</span>
))}
</div>
);
}
function ColumnPicker({ allColumns, preset, visible, setVisible }: { allColumns: string[]; preset: string[]; visible: string[]; setVisible: (v: string[]) => void; }) {
const [open, setOpen] = useState(true);
const cols = useMemo(() => Array.from(new Set([...preset, ...visible, ...allColumns])), [preset, visible, allColumns]);
const toggle = (key: string) => {
setVisible(visible.includes(key) ? visible.filter((c) => c !== key) : [...visible, key]);
};
return (
<div className="rounded-xl border border-gray-200 bg-white">
<div className="flex items-center justify-between px-3 py-2">
<div className="text-sm font-medium text-gray-800">Columns</div>
<div className="flex gap-2">
<button className="text-xs rounded-md border px-2 py-1" onClick={() => setVisible(preset)}>Preset</button>
<button className="text-xs rounded-md border px-2 py-1" onClick={() => setVisible(cols)}>All</button>
<button className="text-xs rounded-md border px-2 py-1" onClick={() => setVisible(preset.slice(0, 6))}>Minimal</button>
<button className="text-xs rounded-md border px-2 py-1" onClick={() => setOpen((o) => !o)}>{open ? 'Hide' : 'Show'}</button>
</div>
</div>
{open && (
<div className="px-3 pb-2 grid grid-cols-2 md:grid-cols-3 lg:grid-cols-4 gap-2 max-h-40 overflow-auto">
{cols.map((c) => (
<label key={c} className="flex gap-2 text-xs items-center">
<input type="checkbox" className="rounded border-gray-300" checked={visible.includes(c)} onChange={() => toggle(c)} />
<span className="truncate" title={c}>{c}</span>
</label>
))}
</div>
)}
</div>
);
}
function DetailGrid({ row }: { row: Record<string, any> }) {
const entries = Object.entries(row);
return (
<div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-3">
{entries.map(([k, v]) => (
<div key={k} className="rounded-lg border border-gray-200 p-2">
<div className="text-[11px] uppercase tracking-wide text-gray-500">{k}</div>
<div className="mt-1 text-[13px] text-gray-900 break-words">{renderCell(v)}</div>
</div>
))}
</div>
);
}
function renderCell(value: any) {
if (value == null) return <span className="text-gray-400"></span>;
if (typeof value === 'string') {
if (/^https?:\/\//i.test(value)) {
return (
<a href={value} target="_blank" rel="noreferrer" className="text-blue-600 hover:underline break-all">
{value}
</a>
);
}
return <span className="break-words">{value.length > 220 ? value.slice(0, 220) + '…' : value}</span>;
}
if (typeof value === 'number' || typeof value === 'boolean') return <span>{String(value)}</span>;
if (Array.isArray(value)) return <span className="text-gray-700">[{value.length} items]</span>;
return (
<details>
<summary className="cursor-pointer text-gray-700">object</summary>
<pre className="mt-1 whitespace-pre-wrap break-words bg-gray-100 rounded-lg p-2 text-[11px]">{JSON.stringify(value, null, 2)}</pre>
</details>
);
}
/* ---------------- helpers ---------------- */
const VIEWS = {
Internal: 'Internal',
External: 'External',
'Response Codes': 'Response Codes',
'Page Titles': 'Page Titles',
'Meta Description': 'Meta Description',
H1: 'H1',
H2: 'H2',
Links: 'Links',
Issues: 'Issues',
Performance: 'Performance',
Render: 'Render',
} as const;
const PRESets: Record<string, { columns: string[] }> = {
Internal: {
columns: ['url', 'status', 'content_type', 'title', 'meta_description', 'h1_1', 'inlinks', 'outlinks'],
},
External: {
columns: ['url', 'status', 'content_type', 'title', 'meta_description'],
},
'Response Codes': {
columns: ['url', 'status', 'status_text', 'last_modified', 'set_cookie'],
},
'Page Titles': {
columns: ['url', 'title', 'title_length', 'title_pixel_width', 'duplicate_title_exact', 'nearest_title_similarity', 'nearest_title_url'],
},
'Meta Description': {
columns: ['url', 'meta_description', 'meta_description_length', 'meta_description_pixel_width', 'duplicate_description_exact', 'nearest_description_similarity', 'nearest_description_url'],
},
H1: { columns: ['url', 'h1_1', 'h1_1_length', 'h1_1_pixel_width', 'h1_2'] },
H2: { columns: ['url', 'h2_1', 'h2_2'] },
Links: { columns: ['url', 'inlinks', 'outlinks', 'nearest_title_url', 'nearest_description_url'] },
Issues: { columns: ['url', 'noindex', 'nofollow', 'robots_meta', 'x_robots_tag', 'canonical', 'duplicate_title_exact', 'duplicate_description_exact'] },
Performance: { columns: ['url', 'time_ms', 'bytes', 'word_count', 'flesch_reading_ease', 'flesch_kincaid_grade', 'gunning_fog'] },
Render: { columns: ['url', 'render_mode', 'content_type', 'http_version', 'lang', 'crawl_timestamp'] },
};
function dataRows(report: any): any[] {
const data = Array.isArray(report)
? report
: Array.isArray(report?.results)
? report.results
: null;
return Array.isArray(data) ? data : [];
}
function hostOf(u?: string) {
try {
return u ? new URL(u).hostname : '';
} catch {
return '';
}
}
function makeCounts(rows: any[], startHost: string) {
const codes: Record<'2xx' | '3xx' | '4xx' | '5xx', number> = { '2xx': 0, '3xx': 0, '4xx': 0, '5xx': 0 };
let noindex = 0,
nofollow = 0,
dupTitles = 0,
dupDesc = 0,
internal = 0,
external = 0;
for (const r of rows) {
const s = r.status as number | null;
if (typeof s === 'number') {
if (s >= 200 && s < 300) codes['2xx']++;
else if (s >= 300 && s < 400) codes['3xx']++;
else if (s >= 400 && s < 500) codes['4xx']++;
else if (s >= 500) codes['5xx']++;
}
if (r.noindex) noindex++;
if (r.nofollow) nofollow++;
if (r.duplicate_title_exact === 'yes') dupTitles++;
if (r.duplicate_description_exact === 'yes') dupDesc++;
const host = hostOf(r.url);
if (startHost) {
if (host === startHost) internal++;
else external++;
}
}
return { codes, noindex, nofollow, dupTitles, dupDesc, internal, external };
}
function badgeCount(key: keyof typeof VIEWS, counts: ReturnType<typeof makeCounts>) {
switch (key) {
case 'Internal':
return counts.internal ?? 0;
case 'External':
return counts.external ?? 0;
case 'Response Codes':
return Object.values(counts.codes).reduce((a, b) => a + b, 0);
default:
return '';
}
}