updated the front end code
This commit is contained in:
parent
5866084fb1
commit
a6d89ca7bd
@ -1,406 +0,0 @@
|
|||||||
'use client';
|
|
||||||
|
|
||||||
import { useMemo, useRef, useState, useEffect } from "react";
|
|
||||||
|
|
||||||
// Path: app/(defaults)/crawl/page.tsx (App Router)
|
|
||||||
// TailwindCSS assumed.
|
|
||||||
|
|
||||||
type Row = Record<string, any>;
|
|
||||||
|
|
||||||
function csvEscape(v: unknown) {
|
|
||||||
if (v === undefined || v === null) return "";
|
|
||||||
const s = String(v);
|
|
||||||
return /[",\n]/.test(s) ? `"${s.replace(/"/g, '""')}"` : s;
|
|
||||||
}
|
|
||||||
function toCSV(rows: Row[], headers: string[]) {
|
|
||||||
const lines = [headers.join(",")];
|
|
||||||
for (const r of rows) lines.push(headers.map(h => csvEscape(r[h])).join(","));
|
|
||||||
return lines.join("\n");
|
|
||||||
}
|
|
||||||
function download(filename: string, mime: string, text: string) {
|
|
||||||
const blob = new Blob([text], { type: mime });
|
|
||||||
const url = URL.createObjectURL(blob);
|
|
||||||
const a = document.createElement("a");
|
|
||||||
a.href = url;
|
|
||||||
a.download = filename;
|
|
||||||
a.click();
|
|
||||||
URL.revokeObjectURL(url);
|
|
||||||
}
|
|
||||||
function getUnionKeys(rows: Row[]): string[] {
|
|
||||||
const set = new Set<string>();
|
|
||||||
for (const r of rows) Object.keys(r || {}).forEach(k => set.add(k));
|
|
||||||
// Prefer a useful order if present:
|
|
||||||
const preferred = [
|
|
||||||
"url","status","status_text","time_ms","bytes","content_type","http_version",
|
|
||||||
"title","title_length","title_pixel_width",
|
|
||||||
"meta_description","meta_description_length","meta_description_pixel_width",
|
|
||||||
"h1_1","h1_1_length","h1_1_pixel_width","h1_2","h1_2_length","h1_2_pixel_width",
|
|
||||||
"h2_1","h2_2",
|
|
||||||
"canonical","robots_meta","x_robots_tag","noindex","nofollow",
|
|
||||||
"lang","word_count","flesch_reading_ease","flesch_kincaid_grade",
|
|
||||||
"gunning_fog","coleman_liau","ari","smog",
|
|
||||||
"schema_types","inlinks","outlinks","render_mode",
|
|
||||||
"last_modified","set_cookie","crawl_timestamp",
|
|
||||||
"duplicate_title_exact","nearest_title_similarity","nearest_title_url",
|
|
||||||
"duplicate_description_exact","nearest_description_similarity","nearest_description_url"
|
|
||||||
];
|
|
||||||
const others = [...set].filter(k => !preferred.includes(k)).sort();
|
|
||||||
return [...preferred.filter(k => set.has(k)), ...others];
|
|
||||||
}
|
|
||||||
function coerce(value: any): string {
|
|
||||||
if (Array.isArray(value)) return value.join(" | ");
|
|
||||||
if (typeof value === "object" && value !== null) return JSON.stringify(value);
|
|
||||||
return String(value ?? "");
|
|
||||||
}
|
|
||||||
function truncate(s: string, n: number) {
|
|
||||||
return s.length > n ? s.slice(0, n - 1) + "…" : s;
|
|
||||||
}
|
|
||||||
function renderCell(value: any) {
|
|
||||||
if (value == null) return <span className="text-gray-400">—</span>;
|
|
||||||
if (typeof value === "string") {
|
|
||||||
if (/^https?:\/\//i.test(value)) {
|
|
||||||
return <a href={value} target="_blank" rel="noreferrer" className="text-blue-600 hover:underline break-all">{value}</a>;
|
|
||||||
}
|
|
||||||
return <span className="break-words">{truncate(value, 220)}</span>;
|
|
||||||
}
|
|
||||||
if (typeof value === "number" || typeof value === "boolean") return <span>{String(value)}</span>;
|
|
||||||
if (Array.isArray(value)) return <span className="text-gray-700">[{value.length} items]</span>;
|
|
||||||
return (
|
|
||||||
<details>
|
|
||||||
<summary className="cursor-pointer text-gray-700">object</summary>
|
|
||||||
<pre className="mt-1 whitespace-pre-wrap break-words bg-gray-100 rounded-lg p-2 text-[11px]">{JSON.stringify(value, null, 2)}</pre>
|
|
||||||
</details>
|
|
||||||
);
|
|
||||||
}
|
|
||||||
function summaryChips(report: any): { label: string; value: string | number }[] {
|
|
||||||
const chips: { label: string; value: string | number }[] = [];
|
|
||||||
const arr = Array.isArray(report) ? report : Array.isArray(report?.results) ? report.results : null;
|
|
||||||
if (arr) chips.push({ label: "Pages crawled", value: arr.length });
|
|
||||||
const totals = report?.totals || report?.summary || {};
|
|
||||||
for (const [k, v] of Object.entries(totals)) {
|
|
||||||
if (typeof v === "number") chips.push({ label: k, value: v });
|
|
||||||
}
|
|
||||||
return chips.slice(0, 8);
|
|
||||||
}
|
|
||||||
|
|
||||||
export default function CrawlPage() {
|
|
||||||
const [siteUrl, setSiteUrl] = useState("");
|
|
||||||
const [maxUrls, setMaxUrls] = useState<number | "">("");
|
|
||||||
const [autoMaxLoading, setAutoMaxLoading] = useState(false);
|
|
||||||
const [crawlLoading, setCrawlLoading] = useState(false);
|
|
||||||
const [error, setError] = useState<string | null>(null);
|
|
||||||
const [report, setReport] = useState<any>(null);
|
|
||||||
|
|
||||||
const [query, setQuery] = useState(""); // ✅ NEW: quick search
|
|
||||||
const [visible, setVisible] = useState<Record<string, boolean>>({}); // ✅ NEW: column toggles
|
|
||||||
const [sortBy, setSortBy] = useState<string>("url"); // ✅ NEW: sorting
|
|
||||||
const [sortDir, setSortDir] = useState<"asc"|"desc">("asc");
|
|
||||||
|
|
||||||
const apiBase = "https://app.crawlerx.co/crawl";
|
|
||||||
|
|
||||||
const isValidUrl = useMemo(() => {
|
|
||||||
try {
|
|
||||||
if (!siteUrl) return false;
|
|
||||||
const normalized = siteUrl.match(/^https?:\/\//i) ? siteUrl : `https://${siteUrl}`;
|
|
||||||
const u = new URL(normalized);
|
|
||||||
return !!u.hostname;
|
|
||||||
} catch {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}, [siteUrl]);
|
|
||||||
|
|
||||||
const normalizedUrl = useMemo(() => {
|
|
||||||
if (!siteUrl) return "";
|
|
||||||
return siteUrl.match(/^https?:\/\//i) ? siteUrl : `https://${siteUrl}`;
|
|
||||||
}, [siteUrl]);
|
|
||||||
|
|
||||||
async function autoDetectMaxFromSitemap() {
|
|
||||||
setError(null);
|
|
||||||
setAutoMaxLoading(true);
|
|
||||||
try {
|
|
||||||
if (!isValidUrl) throw new Error("Enter a valid website URL first.");
|
|
||||||
const res = await fetch(`/api/sitemap?u=${encodeURIComponent(normalizedUrl)}`);
|
|
||||||
if (!res.ok) throw new Error(`Sitemap probe failed (${res.status})`);
|
|
||||||
const json = await res.json();
|
|
||||||
if (typeof json.count !== "number" || json.count < 1) throw new Error("Sitemap found but contains no URLs.");
|
|
||||||
setMaxUrls(json.count);
|
|
||||||
} catch (e: any) {
|
|
||||||
setError(e?.message || "Failed to detect Max from sitemap.");
|
|
||||||
} finally {
|
|
||||||
setAutoMaxLoading(false);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async function handleCrawl() {
|
|
||||||
setError(null);
|
|
||||||
setCrawlLoading(true);
|
|
||||||
setReport(null);
|
|
||||||
try {
|
|
||||||
if (!isValidUrl) throw new Error("Please enter a valid website URL (with or without https://).");
|
|
||||||
const max = typeof maxUrls === "number" && maxUrls > 0 ? maxUrls : 50;
|
|
||||||
const apiUrl = `${apiBase}?url=${encodeURIComponent(normalizedUrl)}&max=${max}`;
|
|
||||||
const res = await fetch(apiUrl);
|
|
||||||
if (!res.ok) throw new Error(`Crawler API error: ${res.status} ${res.statusText}`);
|
|
||||||
const data = await res.json();
|
|
||||||
setReport(data);
|
|
||||||
} catch (e: any) {
|
|
||||||
setError(e?.message || "Failed to crawl the site.");
|
|
||||||
} finally {
|
|
||||||
setCrawlLoading(false);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
function downloadJson() {
|
|
||||||
if (!report) return;
|
|
||||||
const blob = new Blob([JSON.stringify(report, null, 2)], { type: "application/json" });
|
|
||||||
const url = URL.createObjectURL(blob);
|
|
||||||
const a = document.createElement("a");
|
|
||||||
a.href = url;
|
|
||||||
const host = (() => { try { return new URL(normalizedUrl).hostname; } catch { return "report"; }})();
|
|
||||||
a.download = `crawlerx-report-${host}.json`;
|
|
||||||
document.body.appendChild(a);
|
|
||||||
a.click();
|
|
||||||
a.remove();
|
|
||||||
URL.revokeObjectURL(url);
|
|
||||||
}
|
|
||||||
|
|
||||||
// ✅ Build table rows from report (supports either array or { results: [] })
|
|
||||||
const rawRows: Row[] = useMemo(() => {
|
|
||||||
if (!report) return [];
|
|
||||||
if (Array.isArray(report)) return report as Row[];
|
|
||||||
if (Array.isArray(report?.results)) return report.results as Row[];
|
|
||||||
return [];
|
|
||||||
}, [report]);
|
|
||||||
|
|
||||||
// ✅ Determine columns = union of keys across rows
|
|
||||||
const columns = useMemo(() => getUnionKeys(rawRows), [rawRows]);
|
|
||||||
|
|
||||||
// ✅ Initialize column visibility when columns change (default all visible)
|
|
||||||
useEffect(() => {
|
|
||||||
if (!columns.length) return;
|
|
||||||
setVisible(prev => {
|
|
||||||
const next: Record<string, boolean> = { ...prev };
|
|
||||||
let changed = false;
|
|
||||||
for (const c of columns) if (next[c] === undefined) { next[c] = true; changed = true; }
|
|
||||||
return changed ? next : prev;
|
|
||||||
});
|
|
||||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
|
||||||
}, [columns.join("|")]);
|
|
||||||
|
|
||||||
// ✅ Search (filters visible columns)
|
|
||||||
const filtered = useMemo(() => {
|
|
||||||
if (!query.trim()) return rawRows;
|
|
||||||
const q = query.toLowerCase();
|
|
||||||
return rawRows.filter(r => {
|
|
||||||
for (const h of columns) {
|
|
||||||
if (!visible[h]) continue;
|
|
||||||
const v = coerce(r[h]).toLowerCase();
|
|
||||||
if (v.includes(q)) return true;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
});
|
|
||||||
}, [rawRows, query, columns, visible]);
|
|
||||||
|
|
||||||
// ✅ Sorting
|
|
||||||
const sorted = useMemo(() => {
|
|
||||||
const copy = [...filtered];
|
|
||||||
copy.sort((a, b) => {
|
|
||||||
const va = a[sortBy], vb = b[sortBy];
|
|
||||||
if (va == null && vb == null) return 0;
|
|
||||||
if (va == null) return sortDir === "asc" ? -1 : 1;
|
|
||||||
if (vb == null) return sortDir === "asc" ? 1 : -1;
|
|
||||||
const sa = typeof va === "number" ? va : String(va);
|
|
||||||
const sb = typeof vb === "number" ? vb : String(vb);
|
|
||||||
if (sa < sb) return sortDir === "asc" ? -1 : 1;
|
|
||||||
if (sa > sb) return sortDir === "asc" ? 1 : -1;
|
|
||||||
return 0;
|
|
||||||
});
|
|
||||||
return copy;
|
|
||||||
}, [filtered, sortBy, sortDir]);
|
|
||||||
|
|
||||||
function onHeaderClick(h: string) {
|
|
||||||
if (sortBy === h) setSortDir(d => (d === "asc" ? "desc" : "asc"));
|
|
||||||
else { setSortBy(h); setSortDir("asc"); }
|
|
||||||
}
|
|
||||||
|
|
||||||
function exportCSV() {
|
|
||||||
if (!sorted.length) return;
|
|
||||||
const activeHeaders = columns.filter(h => visible[h]);
|
|
||||||
const csv = toCSV(sorted, activeHeaders);
|
|
||||||
const host = (() => { try { return new URL(normalizedUrl).hostname; } catch { return "report"; }})();
|
|
||||||
download(`crawlerx-report-${host}.csv`, "text/csv;charset=utf-8", csv);
|
|
||||||
}
|
|
||||||
function exportJSON() {
|
|
||||||
if (!sorted.length) return;
|
|
||||||
const host = (() => { try { return new URL(normalizedUrl).hostname; } catch { return "report"; }})();
|
|
||||||
download(`crawlerx-report-${host}.json`, "application/json", JSON.stringify(sorted, null, 2));
|
|
||||||
}
|
|
||||||
|
|
||||||
return (
|
|
||||||
<div className="min-h-screen bg-gradient-to-b from-gray-50 to-white">
|
|
||||||
<div className="mx-auto max-w-6xl px-4 py-10">
|
|
||||||
<header className="mb-8">
|
|
||||||
<h1 className="text-3xl sm:text-4xl font-semibold tracking-tight text-gray-900">CrawlerX — Crawl & Report</h1>
|
|
||||||
<p className="mt-2 text-gray-600">
|
|
||||||
Enter a website, auto-detect the sitemap size for <span className="font-medium">Max</span>, then run a crawl via the CrawlerX API and download the JSON report.
|
|
||||||
</p>
|
|
||||||
</header>
|
|
||||||
|
|
||||||
<div className="grid gap-4 sm:grid-cols-[1fr_auto_auto] items-end bg-white p-4 rounded-2xl shadow-sm border border-gray-200">
|
|
||||||
<div className="flex flex-col">
|
|
||||||
<label className="text-sm font-medium text-gray-700 mb-1">Website URL</label>
|
|
||||||
<input
|
|
||||||
type="url"
|
|
||||||
value={siteUrl}
|
|
||||||
onChange={(e) => setSiteUrl(e.target.value)}
|
|
||||||
placeholder="https://example.com"
|
|
||||||
className="w-full rounded-xl border border-gray-300 px-3 py-2 focus:outline-none focus:ring-4 focus:ring-blue-100 focus:border-blue-500"
|
|
||||||
/>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div className="flex flex-col">
|
|
||||||
<label className="text-sm font-medium text-gray-700 mb-1 flex items-center gap-2">
|
|
||||||
Max URLs
|
|
||||||
<button
|
|
||||||
type="button"
|
|
||||||
onClick={autoDetectMaxFromSitemap}
|
|
||||||
disabled={!isValidUrl || autoMaxLoading}
|
|
||||||
className="text-xs rounded-lg px-2 py-1 border border-gray-300 hover:bg-gray-50 disabled:opacity-50"
|
|
||||||
title="Fetch and count URLs from the site's sitemap.xml"
|
|
||||||
>
|
|
||||||
{autoMaxLoading ? "Detecting…" : "Auto-detect from sitemap"}
|
|
||||||
</button>
|
|
||||||
</label>
|
|
||||||
<input
|
|
||||||
type="number"
|
|
||||||
min={1}
|
|
||||||
value={maxUrls}
|
|
||||||
onChange={(e) => setMaxUrls(e.target.value ? Number(e.target.value) : "")}
|
|
||||||
placeholder="e.g. 50"
|
|
||||||
className="w-40 rounded-xl border border-gray-300 px-3 py-2 focus:outline-none focus:ring-4 focus:ring-blue-100 focus:border-blue-500"
|
|
||||||
/>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div className="flex gap-3 sm:justify-end">
|
|
||||||
<button
|
|
||||||
type="button"
|
|
||||||
onClick={handleCrawl}
|
|
||||||
disabled={!isValidUrl || crawlLoading}
|
|
||||||
className="h-10 mt-6 inline-flex items-center justify-center rounded-xl bg-blue-600 px-4 text-white font-medium shadow-sm hover:bg-blue-700 disabled:opacity-50"
|
|
||||||
>
|
|
||||||
{crawlLoading ? "Crawling…" : "Run Crawl"}
|
|
||||||
</button>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
{error && (
|
|
||||||
<div className="mt-4 rounded-xl border border-red-200 bg-red-50 px-4 py-3 text-sm text-red-700">{error}</div>
|
|
||||||
)}
|
|
||||||
|
|
||||||
{report && (
|
|
||||||
<section className="mt-8">
|
|
||||||
<div className="flex items-center justify-between mb-3">
|
|
||||||
<h2 className="text-2xl font-semibold text-gray-900">Crawler Report</h2>
|
|
||||||
<div className="flex gap-2">
|
|
||||||
<input
|
|
||||||
value={query}
|
|
||||||
onChange={(e) => setQuery(e.target.value)}
|
|
||||||
placeholder="Search (filters visible columns)"
|
|
||||||
className="border rounded-xl px-3 py-2 text-sm"
|
|
||||||
/>
|
|
||||||
<button
|
|
||||||
onClick={exportCSV}
|
|
||||||
className="inline-flex items-center rounded-xl border border-gray-300 bg-white px-3 py-2 text-sm font-medium hover:bg-gray-50"
|
|
||||||
>
|
|
||||||
Export CSV
|
|
||||||
</button>
|
|
||||||
<button
|
|
||||||
onClick={exportJSON}
|
|
||||||
className="inline-flex items-center rounded-xl border border-gray-300 bg-white px-3 py-2 text-sm font-medium hover:bg-gray-50"
|
|
||||||
>
|
|
||||||
Export JSON
|
|
||||||
</button>
|
|
||||||
<button
|
|
||||||
onClick={downloadJson}
|
|
||||||
className="inline-flex items-center rounded-xl border border-gray-300 bg-white px-3 py-2 text-sm font-medium hover:bg-gray-50"
|
|
||||||
>
|
|
||||||
Download Raw (original)
|
|
||||||
</button>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div className="flex flex-wrap gap-2 mb-4">
|
|
||||||
{summaryChips(report).map((c) => (
|
|
||||||
<span key={c.label} className="inline-flex items-center gap-2 rounded-full border border-gray-200 bg-gray-50 px-3 py-1 text-xs text-gray-700">
|
|
||||||
<span className="font-semibold">{c.value}</span>
|
|
||||||
<span className="text-gray-500">{c.label}</span>
|
|
||||||
</span>
|
|
||||||
))}
|
|
||||||
</div>
|
|
||||||
|
|
||||||
{/* Column toggles */}
|
|
||||||
<div className="mb-3 max-h-40 overflow-auto border rounded-xl p-2 bg-white">
|
|
||||||
<div className="text-xs font-medium mb-2">Columns</div>
|
|
||||||
<div className="grid grid-cols-2 sm:grid-cols-3 md:grid-cols-4 gap-2 text-xs">
|
|
||||||
{columns.map((h) => (
|
|
||||||
<label key={h} className="flex items-center gap-2">
|
|
||||||
<input
|
|
||||||
type="checkbox"
|
|
||||||
checked={!!visible[h]}
|
|
||||||
onChange={(e) => setVisible(v => ({ ...v, [h]: e.target.checked }))}
|
|
||||||
/>
|
|
||||||
<span className="truncate" title={h}>{h}</span>
|
|
||||||
</label>
|
|
||||||
))}
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
{/* Table */}
|
|
||||||
{sorted.length > 0 ? (
|
|
||||||
<div className="overflow-auto rounded-2xl border border-gray-200">
|
|
||||||
<table className="min-w-full text-sm">
|
|
||||||
<thead className="bg-gray-50 text-left sticky top-0 z-10">
|
|
||||||
<tr>
|
|
||||||
{columns.filter(c => visible[c]).map((c) => (
|
|
||||||
<th
|
|
||||||
key={c}
|
|
||||||
className="px-3 py-2 font-semibold text-gray-700 whitespace-nowrap cursor-pointer select-none"
|
|
||||||
onClick={() => onHeaderClick(c)}
|
|
||||||
title="Click to sort"
|
|
||||||
>
|
|
||||||
<div className="flex items-center gap-1">
|
|
||||||
<span>{c}</span>
|
|
||||||
{sortBy === c && <span className="text-xs">{sortDir === "asc" ? "▲" : "▼"}</span>}
|
|
||||||
</div>
|
|
||||||
</th>
|
|
||||||
))}
|
|
||||||
</tr>
|
|
||||||
</thead>
|
|
||||||
<tbody>
|
|
||||||
{sorted.map((r: any, idx: number) => (
|
|
||||||
<tr key={idx} className="odd:bg-white even:bg-gray-50">
|
|
||||||
{columns.filter(c => visible[c]).map((c) => (
|
|
||||||
<td key={c} className="px-3 py-2 align-top text-gray-800 max-w-[28rem] whitespace-pre-wrap">
|
|
||||||
{renderCell(r[c])}
|
|
||||||
</td>
|
|
||||||
))}
|
|
||||||
</tr>
|
|
||||||
))}
|
|
||||||
</tbody>
|
|
||||||
</table>
|
|
||||||
</div>
|
|
||||||
) : (
|
|
||||||
<pre className="mt-3 whitespace-pre-wrap break-words rounded-2xl bg-gray-900 text-gray-100 p-4 text-xs overflow-auto">
|
|
||||||
{JSON.stringify(report, null, 2)}
|
|
||||||
</pre>
|
|
||||||
)}
|
|
||||||
</section>
|
|
||||||
)}
|
|
||||||
|
|
||||||
<p className="mt-6 text-xs text-gray-500">
|
|
||||||
Tip: If sitemap auto-detection fails due to server restrictions, enter Max manually or use the /api/sitemap proxy.
|
|
||||||
</p>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
);
|
|
||||||
}
|
|
||||||
@ -1,257 +0,0 @@
|
|||||||
'use client';
|
|
||||||
|
|
||||||
import { useMemo, useState } from "react";
|
|
||||||
|
|
||||||
// Path: app/(defaults)/crawl/page.tsx (App Router)
|
|
||||||
// If using Pages Router, place at pages/crawl.tsx
|
|
||||||
// TailwindCSS assumed.
|
|
||||||
|
|
||||||
export default function CrawlPage() {
|
|
||||||
const [siteUrl, setSiteUrl] = useState("");
|
|
||||||
const [maxUrls, setMaxUrls] = useState<number | "">("");
|
|
||||||
const [autoMaxLoading, setAutoMaxLoading] = useState(false);
|
|
||||||
const [crawlLoading, setCrawlLoading] = useState(false);
|
|
||||||
const [error, setError] = useState<string | null>(null);
|
|
||||||
const [report, setReport] = useState<any>(null);
|
|
||||||
|
|
||||||
const apiBase = "https://app.crawlerx.co/crawl";
|
|
||||||
|
|
||||||
const isValidUrl = useMemo(() => {
|
|
||||||
try {
|
|
||||||
if (!siteUrl) return false;
|
|
||||||
const normalized = siteUrl.match(/^https?:\/\//i) ? siteUrl : `https://${siteUrl}`;
|
|
||||||
const u = new URL(normalized);
|
|
||||||
return !!u.hostname;
|
|
||||||
} catch {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}, [siteUrl]);
|
|
||||||
|
|
||||||
const normalizedUrl = useMemo(() => {
|
|
||||||
if (!siteUrl) return "";
|
|
||||||
return siteUrl.match(/^https?:\/\//i) ? siteUrl : `https://${siteUrl}`;
|
|
||||||
}, [siteUrl]);
|
|
||||||
|
|
||||||
async function autoDetectMaxFromSitemap() {
|
|
||||||
setError(null);
|
|
||||||
setAutoMaxLoading(true);
|
|
||||||
try {
|
|
||||||
if (!isValidUrl) throw new Error("Enter a valid website URL first.");
|
|
||||||
// Server-side proxy avoids CORS
|
|
||||||
const res = await fetch(`/api/sitemap?u=${encodeURIComponent(normalizedUrl)}`);
|
|
||||||
if (!res.ok) throw new Error(`Sitemap probe failed (${res.status})`);
|
|
||||||
const json = await res.json();
|
|
||||||
if (typeof json.count !== "number" || json.count < 1) throw new Error("Sitemap found but contains no URLs.");
|
|
||||||
setMaxUrls(json.count);
|
|
||||||
} catch (e: any) {
|
|
||||||
setError(e?.message || "Failed to detect Max from sitemap.");
|
|
||||||
} finally {
|
|
||||||
setAutoMaxLoading(false);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async function handleCrawl() {
|
|
||||||
setError(null);
|
|
||||||
setCrawlLoading(true);
|
|
||||||
setReport(null);
|
|
||||||
|
|
||||||
try {
|
|
||||||
if (!isValidUrl) throw new Error("Please enter a valid website URL (with or without https://).");
|
|
||||||
const max = typeof maxUrls === "number" && maxUrls > 0 ? maxUrls : 50;
|
|
||||||
const apiUrl = `${apiBase}?url=${encodeURIComponent(normalizedUrl)}&max=${max}`;
|
|
||||||
const res = await fetch(apiUrl);
|
|
||||||
if (!res.ok) throw new Error(`Crawler API error: ${res.status} ${res.statusText}`);
|
|
||||||
const data = await res.json();
|
|
||||||
setReport(data);
|
|
||||||
} catch (e: any) {
|
|
||||||
setError(e?.message || "Failed to crawl the site.");
|
|
||||||
} finally {
|
|
||||||
setCrawlLoading(false);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
function downloadJson() {
|
|
||||||
if (!report) return;
|
|
||||||
const blob = new Blob([JSON.stringify(report, null, 2)], { type: "application/json" });
|
|
||||||
const url = URL.createObjectURL(blob);
|
|
||||||
const a = document.createElement("a");
|
|
||||||
a.href = url;
|
|
||||||
const host = (() => {
|
|
||||||
try { return new URL(normalizedUrl).hostname; } catch { return "report"; }
|
|
||||||
})();
|
|
||||||
a.download = `crawlerx-report-${host}.json`;
|
|
||||||
document.body.appendChild(a);
|
|
||||||
a.click();
|
|
||||||
a.remove();
|
|
||||||
URL.revokeObjectURL(url);
|
|
||||||
}
|
|
||||||
|
|
||||||
const { rows, columns } = useMemo(() => {
|
|
||||||
if (!report) return { rows: [] as any[], columns: [] as string[] };
|
|
||||||
const data = Array.isArray(report) ? report : Array.isArray(report?.results) ? report.results : null;
|
|
||||||
if (!data || !Array.isArray(data) || data.length === 0) return { rows: [], columns: [] };
|
|
||||||
|
|
||||||
const preferred = ["url", "status", "title", "description", "h1", "issues", "links", "loadTime" ];
|
|
||||||
const colset = new Set<string>();
|
|
||||||
data.slice(0, 25).forEach((r: any) => Object.keys(r || {}).forEach((k) => colset.add(k)));
|
|
||||||
const cols = preferred.filter((k) => colset.has(k)).concat(Array.from(colset).filter((k) => !preferred.includes(k)).slice(0, 6));
|
|
||||||
return { rows: data, columns: cols };
|
|
||||||
}, [report]);
|
|
||||||
|
|
||||||
return (
|
|
||||||
<div className="min-h-screen bg-gradient-to-b from-gray-50 to-white">
|
|
||||||
<div className="mx-auto max-w-6xl px-4 py-10">
|
|
||||||
<header className="mb-8">
|
|
||||||
<h1 className="text-3xl sm:text-4xl font-semibold tracking-tight text-gray-900">CrawlerX — Crawl & Report</h1>
|
|
||||||
<p className="mt-2 text-gray-600">Enter a website, auto-detect the sitemap size for <span className="font-medium">Max</span>, then run a crawl via the CrawlerX API and download the JSON report.</p>
|
|
||||||
</header>
|
|
||||||
|
|
||||||
<div className="grid gap-4 sm:grid-cols-[1fr_auto_auto] items-end bg-white p-4 rounded-2xl shadow-sm border border-gray-200">
|
|
||||||
<div className="flex flex-col">
|
|
||||||
<label className="text-sm font-medium text-gray-700 mb-1">Website URL</label>
|
|
||||||
<input
|
|
||||||
type="url"
|
|
||||||
value={siteUrl}
|
|
||||||
onChange={(e) => setSiteUrl(e.target.value)}
|
|
||||||
placeholder="https://example.com"
|
|
||||||
className="w-full rounded-xl border border-gray-300 px-3 py-2 focus:outline-none focus:ring-4 focus:ring-blue-100 focus:border-blue-500"
|
|
||||||
/>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div className="flex flex-col">
|
|
||||||
<label className="text-sm font-medium text-gray-700 mb-1 flex items-center gap-2">
|
|
||||||
Max URLs
|
|
||||||
<button
|
|
||||||
type="button"
|
|
||||||
onClick={autoDetectMaxFromSitemap}
|
|
||||||
disabled={!isValidUrl || autoMaxLoading}
|
|
||||||
className="text-xs rounded-lg px-2 py-1 border border-gray-300 hover:bg-gray-50 disabled:opacity-50"
|
|
||||||
title="Fetch and count URLs from the site's sitemap.xml"
|
|
||||||
>
|
|
||||||
{autoMaxLoading ? "Detecting…" : "Auto‑detect from sitemap"}
|
|
||||||
</button>
|
|
||||||
</label>
|
|
||||||
<input
|
|
||||||
type="number"
|
|
||||||
min={1}
|
|
||||||
value={maxUrls}
|
|
||||||
onChange={(e) => setMaxUrls(e.target.value ? Number(e.target.value) : "")}
|
|
||||||
placeholder="e.g. 50"
|
|
||||||
className="w-40 rounded-xl border border-gray-300 px-3 py-2 focus:outline-none focus:ring-4 focus:ring-blue-100 focus:border-blue-500"
|
|
||||||
/>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div className="flex gap-3 sm:justify-end">
|
|
||||||
<button
|
|
||||||
type="button"
|
|
||||||
onClick={handleCrawl}
|
|
||||||
disabled={!isValidUrl || crawlLoading}
|
|
||||||
className="h-10 mt-6 inline-flex items-center justify-center rounded-xl bg-blue-600 px-4 text-white font-medium shadow-sm hover:bg-blue-700 disabled:opacity-50"
|
|
||||||
>
|
|
||||||
{crawlLoading ? "Crawling…" : "Run Crawl"}
|
|
||||||
</button>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
{error && (
|
|
||||||
<div className="mt-4 rounded-xl border border-red-200 bg-red-50 px-4 py-3 text-sm text-red-700">{error}</div>
|
|
||||||
)}
|
|
||||||
|
|
||||||
{report && (
|
|
||||||
<section className="mt-8">
|
|
||||||
<div className="flex items-center justify-between mb-3">
|
|
||||||
<h2 className="text-2xl font-semibold text-gray-900">Crawler Report</h2>
|
|
||||||
<div className="flex gap-2">
|
|
||||||
<button
|
|
||||||
onClick={downloadJson}
|
|
||||||
className="inline-flex items-center rounded-xl border border-gray-300 bg-white px-3 py-2 text-sm font-medium hover:bg-gray-50"
|
|
||||||
>
|
|
||||||
Download JSON
|
|
||||||
</button>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div className="flex flex-wrap gap-2 mb-4">
|
|
||||||
{summaryChips(report).map((c) => (
|
|
||||||
<span key={c.label} className="inline-flex items-center gap-2 rounded-full border border-gray-200 bg-gray-50 px-3 py-1 text-xs text-gray-700">
|
|
||||||
<span className="font-semibold">{c.value}</span>
|
|
||||||
<span className="text-gray-500">{c.label}</span>
|
|
||||||
</span>
|
|
||||||
))}
|
|
||||||
</div>
|
|
||||||
|
|
||||||
{rows.length > 0 ? (
|
|
||||||
<div className="overflow-auto rounded-2xl border border-gray-200">
|
|
||||||
<table className="min-w-full text-sm">
|
|
||||||
<thead className="bg-gray-50 text-left sticky top-0">
|
|
||||||
<tr>
|
|
||||||
{columns.map((c) => (
|
|
||||||
<th key={c} className="px-3 py-2 font-semibold text-gray-700 whitespace-nowrap">{c}</th>
|
|
||||||
))}
|
|
||||||
</tr>
|
|
||||||
</thead>
|
|
||||||
<tbody>
|
|
||||||
{rows.map((r: any, idx: number) => (
|
|
||||||
<tr key={idx} className="odd:bg-white even:bg-gray-50">
|
|
||||||
{columns.map((c) => (
|
|
||||||
<td key={c} className="px-3 py-2 align-top text-gray-800 max-w-[28rem]">
|
|
||||||
{renderCell(r[c])}
|
|
||||||
</td>
|
|
||||||
))}
|
|
||||||
</tr>
|
|
||||||
))}
|
|
||||||
</tbody>
|
|
||||||
</table>
|
|
||||||
</div>
|
|
||||||
) : (
|
|
||||||
<pre className="mt-3 whitespace-pre-wrap break-words rounded-2xl bg-gray-900 text-gray-100 p-4 text-xs overflow-auto">
|
|
||||||
{JSON.stringify(report, null, 2)}
|
|
||||||
</pre>
|
|
||||||
)}
|
|
||||||
</section>
|
|
||||||
)}
|
|
||||||
|
|
||||||
<p className="mt-6 text-xs text-gray-500">
|
|
||||||
Tip: If sitemap auto‑detection fails due to server restrictions, enter Max manually or use the /api/sitemap proxy.
|
|
||||||
</p>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
function renderCell(value: any) {
|
|
||||||
if (value == null) return <span className="text-gray-400">—</span>;
|
|
||||||
if (typeof value === "string") {
|
|
||||||
if (/^https?:\/\//i.test(value)) {
|
|
||||||
return (
|
|
||||||
<a href={value} target="_blank" rel="noreferrer" className="text-blue-600 hover:underline break-all">
|
|
||||||
{value}
|
|
||||||
</a>
|
|
||||||
);
|
|
||||||
}
|
|
||||||
return <span className="break-words">{truncate(value, 220)}</span>;
|
|
||||||
}
|
|
||||||
if (typeof value === "number" || typeof value === "boolean") return <span>{String(value)}</span>;
|
|
||||||
if (Array.isArray(value)) return <span className="text-gray-700">[{value.length} items]</span>;
|
|
||||||
return (
|
|
||||||
<details>
|
|
||||||
<summary className="cursor-pointer text-gray-700">object</summary>
|
|
||||||
<pre className="mt-1 whitespace-pre-wrap break-words bg-gray-100 rounded-lg p-2 text-[11px]">{JSON.stringify(value, null, 2)}</pre>
|
|
||||||
</details>
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
function truncate(s: string, n: number) {
|
|
||||||
return s.length > n ? s.slice(0, n - 1) + "…" : s;
|
|
||||||
}
|
|
||||||
|
|
||||||
function summaryChips(report: any): { label: string; value: string | number }[] {
|
|
||||||
const chips: { label: string; value: string | number }[] = [];
|
|
||||||
const arr = Array.isArray(report) ? report : Array.isArray(report?.results) ? report.results : null;
|
|
||||||
if (arr) chips.push({ label: "Pages crawled", value: arr.length });
|
|
||||||
const totals = report?.totals || report?.summary || {};
|
|
||||||
for (const [k, v] of Object.entries(totals)) {
|
|
||||||
if (typeof v === "number") chips.push({ label: k, value: v });
|
|
||||||
}
|
|
||||||
return chips.slice(0, 8);
|
|
||||||
}
|
|
||||||
@ -11,7 +11,7 @@ import React, { useEffect, useMemo, useState } from 'react';
|
|||||||
* - Details panel (bottom): key/value for the selected URL
|
* - Details panel (bottom): key/value for the selected URL
|
||||||
*
|
*
|
||||||
* Assumptions:
|
* Assumptions:
|
||||||
* - API GET https://app.crawlerx.co/crawl?url=...&max=... returns { ok, results: Row[], ... }
|
* - API GET https://api.crawlerx.co/crawl?url=...&max=... returns { ok, results: Row[], ... }
|
||||||
* - Row shape is the one produced by crawler.js in this project
|
* - Row shape is the one produced by crawler.js in this project
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@ -27,7 +27,7 @@ export default function CrawlPage() {
|
|||||||
const [visibleCols, setVisibleCols] = useState<string[]>([]);
|
const [visibleCols, setVisibleCols] = useState<string[]>([]);
|
||||||
const [selectedIndex, setSelectedIndex] = useState<number | null>(null);
|
const [selectedIndex, setSelectedIndex] = useState<number | null>(null);
|
||||||
|
|
||||||
const apiBase = 'https://app.crawlerx.co/crawl';
|
const apiBase = 'https://api.crawlerx.co/crawl';
|
||||||
|
|
||||||
/* ---------------- URL helpers ---------------- */
|
/* ---------------- URL helpers ---------------- */
|
||||||
const isValidUrl = useMemo(() => {
|
const isValidUrl = useMemo(() => {
|
||||||
|
|||||||
@ -18,7 +18,8 @@ export async function GET(req: Request) {
|
|||||||
];
|
];
|
||||||
|
|
||||||
// fetch & parse each candidate; you can also call your backend util if exposed
|
// fetch & parse each candidate; you can also call your backend util if exposed
|
||||||
const urls = new Set<string>();
|
const urls:any = new Set<string>();
|
||||||
|
|
||||||
|
|
||||||
// very light probe: just check existence; swap to real parser if needed
|
// very light probe: just check existence; swap to real parser if needed
|
||||||
for (const href of candidates) {
|
for (const href of candidates) {
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user