2025-09-17 00:19:13 +05:30

258 lines
10 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

'use client';
import { useMemo, useState } from "react";
// Path: app/(defaults)/crawl/page.tsx (App Router)
// If using Pages Router, place at pages/crawl.tsx
// TailwindCSS assumed.
export default function CrawlPage() {
const [siteUrl, setSiteUrl] = useState("");
const [maxUrls, setMaxUrls] = useState<number | "">("");
const [autoMaxLoading, setAutoMaxLoading] = useState(false);
const [crawlLoading, setCrawlLoading] = useState(false);
const [error, setError] = useState<string | null>(null);
const [report, setReport] = useState<any>(null);
const apiBase = "https://app.crawlerx.co/crawl";
const isValidUrl = useMemo(() => {
try {
if (!siteUrl) return false;
const normalized = siteUrl.match(/^https?:\/\//i) ? siteUrl : `https://${siteUrl}`;
const u = new URL(normalized);
return !!u.hostname;
} catch {
return false;
}
}, [siteUrl]);
const normalizedUrl = useMemo(() => {
if (!siteUrl) return "";
return siteUrl.match(/^https?:\/\//i) ? siteUrl : `https://${siteUrl}`;
}, [siteUrl]);
async function autoDetectMaxFromSitemap() {
setError(null);
setAutoMaxLoading(true);
try {
if (!isValidUrl) throw new Error("Enter a valid website URL first.");
// Server-side proxy avoids CORS
const res = await fetch(`/api/sitemap?u=${encodeURIComponent(normalizedUrl)}`);
if (!res.ok) throw new Error(`Sitemap probe failed (${res.status})`);
const json = await res.json();
if (typeof json.count !== "number" || json.count < 1) throw new Error("Sitemap found but contains no URLs.");
setMaxUrls(json.count);
} catch (e: any) {
setError(e?.message || "Failed to detect Max from sitemap.");
} finally {
setAutoMaxLoading(false);
}
}
async function handleCrawl() {
setError(null);
setCrawlLoading(true);
setReport(null);
try {
if (!isValidUrl) throw new Error("Please enter a valid website URL (with or without https://).");
const max = typeof maxUrls === "number" && maxUrls > 0 ? maxUrls : 50;
const apiUrl = `${apiBase}?url=${encodeURIComponent(normalizedUrl)}&max=${max}`;
const res = await fetch(apiUrl);
if (!res.ok) throw new Error(`Crawler API error: ${res.status} ${res.statusText}`);
const data = await res.json();
setReport(data);
} catch (e: any) {
setError(e?.message || "Failed to crawl the site.");
} finally {
setCrawlLoading(false);
}
}
function downloadJson() {
if (!report) return;
const blob = new Blob([JSON.stringify(report, null, 2)], { type: "application/json" });
const url = URL.createObjectURL(blob);
const a = document.createElement("a");
a.href = url;
const host = (() => {
try { return new URL(normalizedUrl).hostname; } catch { return "report"; }
})();
a.download = `crawlerx-report-${host}.json`;
document.body.appendChild(a);
a.click();
a.remove();
URL.revokeObjectURL(url);
}
const { rows, columns } = useMemo(() => {
if (!report) return { rows: [] as any[], columns: [] as string[] };
const data = Array.isArray(report) ? report : Array.isArray(report?.results) ? report.results : null;
if (!data || !Array.isArray(data) || data.length === 0) return { rows: [], columns: [] };
const preferred = ["url", "status", "title", "description", "h1", "issues", "links", "loadTime" ];
const colset = new Set<string>();
data.slice(0, 25).forEach((r: any) => Object.keys(r || {}).forEach((k) => colset.add(k)));
const cols = preferred.filter((k) => colset.has(k)).concat(Array.from(colset).filter((k) => !preferred.includes(k)).slice(0, 6));
return { rows: data, columns: cols };
}, [report]);
return (
<div className="min-h-screen bg-gradient-to-b from-gray-50 to-white">
<div className="mx-auto max-w-6xl px-4 py-10">
<header className="mb-8">
<h1 className="text-3xl sm:text-4xl font-semibold tracking-tight text-gray-900">CrawlerX Crawl & Report</h1>
<p className="mt-2 text-gray-600">Enter a website, auto-detect the sitemap size for <span className="font-medium">Max</span>, then run a crawl via the CrawlerX API and download the JSON report.</p>
</header>
<div className="grid gap-4 sm:grid-cols-[1fr_auto_auto] items-end bg-white p-4 rounded-2xl shadow-sm border border-gray-200">
<div className="flex flex-col">
<label className="text-sm font-medium text-gray-700 mb-1">Website URL</label>
<input
type="url"
value={siteUrl}
onChange={(e) => setSiteUrl(e.target.value)}
placeholder="https://example.com"
className="w-full rounded-xl border border-gray-300 px-3 py-2 focus:outline-none focus:ring-4 focus:ring-blue-100 focus:border-blue-500"
/>
</div>
<div className="flex flex-col">
<label className="text-sm font-medium text-gray-700 mb-1 flex items-center gap-2">
Max URLs
<button
type="button"
onClick={autoDetectMaxFromSitemap}
disabled={!isValidUrl || autoMaxLoading}
className="text-xs rounded-lg px-2 py-1 border border-gray-300 hover:bg-gray-50 disabled:opacity-50"
title="Fetch and count URLs from the site's sitemap.xml"
>
{autoMaxLoading ? "Detecting…" : "Autodetect from sitemap"}
</button>
</label>
<input
type="number"
min={1}
value={maxUrls}
onChange={(e) => setMaxUrls(e.target.value ? Number(e.target.value) : "")}
placeholder="e.g. 50"
className="w-40 rounded-xl border border-gray-300 px-3 py-2 focus:outline-none focus:ring-4 focus:ring-blue-100 focus:border-blue-500"
/>
</div>
<div className="flex gap-3 sm:justify-end">
<button
type="button"
onClick={handleCrawl}
disabled={!isValidUrl || crawlLoading}
className="h-10 mt-6 inline-flex items-center justify-center rounded-xl bg-blue-600 px-4 text-white font-medium shadow-sm hover:bg-blue-700 disabled:opacity-50"
>
{crawlLoading ? "Crawling…" : "Run Crawl"}
</button>
</div>
</div>
{error && (
<div className="mt-4 rounded-xl border border-red-200 bg-red-50 px-4 py-3 text-sm text-red-700">{error}</div>
)}
{report && (
<section className="mt-8">
<div className="flex items-center justify-between mb-3">
<h2 className="text-2xl font-semibold text-gray-900">Crawler Report</h2>
<div className="flex gap-2">
<button
onClick={downloadJson}
className="inline-flex items-center rounded-xl border border-gray-300 bg-white px-3 py-2 text-sm font-medium hover:bg-gray-50"
>
Download JSON
</button>
</div>
</div>
<div className="flex flex-wrap gap-2 mb-4">
{summaryChips(report).map((c) => (
<span key={c.label} className="inline-flex items-center gap-2 rounded-full border border-gray-200 bg-gray-50 px-3 py-1 text-xs text-gray-700">
<span className="font-semibold">{c.value}</span>
<span className="text-gray-500">{c.label}</span>
</span>
))}
</div>
{rows.length > 0 ? (
<div className="overflow-auto rounded-2xl border border-gray-200">
<table className="min-w-full text-sm">
<thead className="bg-gray-50 text-left sticky top-0">
<tr>
{columns.map((c) => (
<th key={c} className="px-3 py-2 font-semibold text-gray-700 whitespace-nowrap">{c}</th>
))}
</tr>
</thead>
<tbody>
{rows.map((r: any, idx: number) => (
<tr key={idx} className="odd:bg-white even:bg-gray-50">
{columns.map((c) => (
<td key={c} className="px-3 py-2 align-top text-gray-800 max-w-[28rem]">
{renderCell(r[c])}
</td>
))}
</tr>
))}
</tbody>
</table>
</div>
) : (
<pre className="mt-3 whitespace-pre-wrap break-words rounded-2xl bg-gray-900 text-gray-100 p-4 text-xs overflow-auto">
{JSON.stringify(report, null, 2)}
</pre>
)}
</section>
)}
<p className="mt-6 text-xs text-gray-500">
Tip: If sitemap autodetection fails due to server restrictions, enter Max manually or use the /api/sitemap proxy.
</p>
</div>
</div>
);
}
function renderCell(value: any) {
if (value == null) return <span className="text-gray-400"></span>;
if (typeof value === "string") {
if (/^https?:\/\//i.test(value)) {
return (
<a href={value} target="_blank" rel="noreferrer" className="text-blue-600 hover:underline break-all">
{value}
</a>
);
}
return <span className="break-words">{truncate(value, 220)}</span>;
}
if (typeof value === "number" || typeof value === "boolean") return <span>{String(value)}</span>;
if (Array.isArray(value)) return <span className="text-gray-700">[{value.length} items]</span>;
return (
<details>
<summary className="cursor-pointer text-gray-700">object</summary>
<pre className="mt-1 whitespace-pre-wrap break-words bg-gray-100 rounded-lg p-2 text-[11px]">{JSON.stringify(value, null, 2)}</pre>
</details>
);
}
function truncate(s: string, n: number) {
return s.length > n ? s.slice(0, n - 1) + "…" : s;
}
function summaryChips(report: any): { label: string; value: string | number }[] {
const chips: { label: string; value: string | number }[] = [];
const arr = Array.isArray(report) ? report : Array.isArray(report?.results) ? report.results : null;
if (arr) chips.push({ label: "Pages crawled", value: arr.length });
const totals = report?.totals || report?.summary || {};
for (const [k, v] of Object.entries(totals)) {
if (typeof v === "number") chips.push({ label: k, value: v });
}
return chips.slice(0, 8);
}