diff --git a/app/(defaults)/crawl/page copy 2.tsx b/app/(defaults)/crawl/page copy 2.tsx deleted file mode 100644 index 8ca4d43..0000000 --- a/app/(defaults)/crawl/page copy 2.tsx +++ /dev/null @@ -1,406 +0,0 @@ -'use client'; - -import { useMemo, useRef, useState, useEffect } from "react"; - -// Path: app/(defaults)/crawl/page.tsx (App Router) -// TailwindCSS assumed. - -type Row = Record; - -function csvEscape(v: unknown) { - if (v === undefined || v === null) return ""; - const s = String(v); - return /[",\n]/.test(s) ? `"${s.replace(/"/g, '""')}"` : s; -} -function toCSV(rows: Row[], headers: string[]) { - const lines = [headers.join(",")]; - for (const r of rows) lines.push(headers.map(h => csvEscape(r[h])).join(",")); - return lines.join("\n"); -} -function download(filename: string, mime: string, text: string) { - const blob = new Blob([text], { type: mime }); - const url = URL.createObjectURL(blob); - const a = document.createElement("a"); - a.href = url; - a.download = filename; - a.click(); - URL.revokeObjectURL(url); -} -function getUnionKeys(rows: Row[]): string[] { - const set = new Set(); - for (const r of rows) Object.keys(r || {}).forEach(k => set.add(k)); - // Prefer a useful order if present: - const preferred = [ - "url","status","status_text","time_ms","bytes","content_type","http_version", - "title","title_length","title_pixel_width", - "meta_description","meta_description_length","meta_description_pixel_width", - "h1_1","h1_1_length","h1_1_pixel_width","h1_2","h1_2_length","h1_2_pixel_width", - "h2_1","h2_2", - "canonical","robots_meta","x_robots_tag","noindex","nofollow", - "lang","word_count","flesch_reading_ease","flesch_kincaid_grade", - "gunning_fog","coleman_liau","ari","smog", - "schema_types","inlinks","outlinks","render_mode", - "last_modified","set_cookie","crawl_timestamp", - "duplicate_title_exact","nearest_title_similarity","nearest_title_url", - "duplicate_description_exact","nearest_description_similarity","nearest_description_url" - ]; - const others = [...set].filter(k => !preferred.includes(k)).sort(); - return [...preferred.filter(k => set.has(k)), ...others]; -} -function coerce(value: any): string { - if (Array.isArray(value)) return value.join(" | "); - if (typeof value === "object" && value !== null) return JSON.stringify(value); - return String(value ?? ""); -} -function truncate(s: string, n: number) { - return s.length > n ? s.slice(0, n - 1) + "…" : s; -} -function renderCell(value: any) { - if (value == null) return ; - if (typeof value === "string") { - if (/^https?:\/\//i.test(value)) { - return {value}; - } - return {truncate(value, 220)}; - } - if (typeof value === "number" || typeof value === "boolean") return {String(value)}; - if (Array.isArray(value)) return [{value.length} items]; - return ( -
- object -
{JSON.stringify(value, null, 2)}
-
- ); -} -function summaryChips(report: any): { label: string; value: string | number }[] { - const chips: { label: string; value: string | number }[] = []; - const arr = Array.isArray(report) ? report : Array.isArray(report?.results) ? report.results : null; - if (arr) chips.push({ label: "Pages crawled", value: arr.length }); - const totals = report?.totals || report?.summary || {}; - for (const [k, v] of Object.entries(totals)) { - if (typeof v === "number") chips.push({ label: k, value: v }); - } - return chips.slice(0, 8); -} - -export default function CrawlPage() { - const [siteUrl, setSiteUrl] = useState(""); - const [maxUrls, setMaxUrls] = useState(""); - const [autoMaxLoading, setAutoMaxLoading] = useState(false); - const [crawlLoading, setCrawlLoading] = useState(false); - const [error, setError] = useState(null); - const [report, setReport] = useState(null); - - const [query, setQuery] = useState(""); // ✅ NEW: quick search - const [visible, setVisible] = useState>({}); // ✅ NEW: column toggles - const [sortBy, setSortBy] = useState("url"); // ✅ NEW: sorting - const [sortDir, setSortDir] = useState<"asc"|"desc">("asc"); - - const apiBase = "https://app.crawlerx.co/crawl"; - - const isValidUrl = useMemo(() => { - try { - if (!siteUrl) return false; - const normalized = siteUrl.match(/^https?:\/\//i) ? siteUrl : `https://${siteUrl}`; - const u = new URL(normalized); - return !!u.hostname; - } catch { - return false; - } - }, [siteUrl]); - - const normalizedUrl = useMemo(() => { - if (!siteUrl) return ""; - return siteUrl.match(/^https?:\/\//i) ? siteUrl : `https://${siteUrl}`; - }, [siteUrl]); - - async function autoDetectMaxFromSitemap() { - setError(null); - setAutoMaxLoading(true); - try { - if (!isValidUrl) throw new Error("Enter a valid website URL first."); - const res = await fetch(`/api/sitemap?u=${encodeURIComponent(normalizedUrl)}`); - if (!res.ok) throw new Error(`Sitemap probe failed (${res.status})`); - const json = await res.json(); - if (typeof json.count !== "number" || json.count < 1) throw new Error("Sitemap found but contains no URLs."); - setMaxUrls(json.count); - } catch (e: any) { - setError(e?.message || "Failed to detect Max from sitemap."); - } finally { - setAutoMaxLoading(false); - } - } - - async function handleCrawl() { - setError(null); - setCrawlLoading(true); - setReport(null); - try { - if (!isValidUrl) throw new Error("Please enter a valid website URL (with or without https://)."); - const max = typeof maxUrls === "number" && maxUrls > 0 ? maxUrls : 50; - const apiUrl = `${apiBase}?url=${encodeURIComponent(normalizedUrl)}&max=${max}`; - const res = await fetch(apiUrl); - if (!res.ok) throw new Error(`Crawler API error: ${res.status} ${res.statusText}`); - const data = await res.json(); - setReport(data); - } catch (e: any) { - setError(e?.message || "Failed to crawl the site."); - } finally { - setCrawlLoading(false); - } - } - - function downloadJson() { - if (!report) return; - const blob = new Blob([JSON.stringify(report, null, 2)], { type: "application/json" }); - const url = URL.createObjectURL(blob); - const a = document.createElement("a"); - a.href = url; - const host = (() => { try { return new URL(normalizedUrl).hostname; } catch { return "report"; }})(); - a.download = `crawlerx-report-${host}.json`; - document.body.appendChild(a); - a.click(); - a.remove(); - URL.revokeObjectURL(url); - } - - // ✅ Build table rows from report (supports either array or { results: [] }) - const rawRows: Row[] = useMemo(() => { - if (!report) return []; - if (Array.isArray(report)) return report as Row[]; - if (Array.isArray(report?.results)) return report.results as Row[]; - return []; - }, [report]); - - // ✅ Determine columns = union of keys across rows - const columns = useMemo(() => getUnionKeys(rawRows), [rawRows]); - - // ✅ Initialize column visibility when columns change (default all visible) - useEffect(() => { - if (!columns.length) return; - setVisible(prev => { - const next: Record = { ...prev }; - let changed = false; - for (const c of columns) if (next[c] === undefined) { next[c] = true; changed = true; } - return changed ? next : prev; - }); - // eslint-disable-next-line react-hooks/exhaustive-deps - }, [columns.join("|")]); - - // ✅ Search (filters visible columns) - const filtered = useMemo(() => { - if (!query.trim()) return rawRows; - const q = query.toLowerCase(); - return rawRows.filter(r => { - for (const h of columns) { - if (!visible[h]) continue; - const v = coerce(r[h]).toLowerCase(); - if (v.includes(q)) return true; - } - return false; - }); - }, [rawRows, query, columns, visible]); - - // ✅ Sorting - const sorted = useMemo(() => { - const copy = [...filtered]; - copy.sort((a, b) => { - const va = a[sortBy], vb = b[sortBy]; - if (va == null && vb == null) return 0; - if (va == null) return sortDir === "asc" ? -1 : 1; - if (vb == null) return sortDir === "asc" ? 1 : -1; - const sa = typeof va === "number" ? va : String(va); - const sb = typeof vb === "number" ? vb : String(vb); - if (sa < sb) return sortDir === "asc" ? -1 : 1; - if (sa > sb) return sortDir === "asc" ? 1 : -1; - return 0; - }); - return copy; - }, [filtered, sortBy, sortDir]); - - function onHeaderClick(h: string) { - if (sortBy === h) setSortDir(d => (d === "asc" ? "desc" : "asc")); - else { setSortBy(h); setSortDir("asc"); } - } - - function exportCSV() { - if (!sorted.length) return; - const activeHeaders = columns.filter(h => visible[h]); - const csv = toCSV(sorted, activeHeaders); - const host = (() => { try { return new URL(normalizedUrl).hostname; } catch { return "report"; }})(); - download(`crawlerx-report-${host}.csv`, "text/csv;charset=utf-8", csv); - } - function exportJSON() { - if (!sorted.length) return; - const host = (() => { try { return new URL(normalizedUrl).hostname; } catch { return "report"; }})(); - download(`crawlerx-report-${host}.json`, "application/json", JSON.stringify(sorted, null, 2)); - } - - return ( -
-
-
-

CrawlerX — Crawl & Report

-

- Enter a website, auto-detect the sitemap size for Max, then run a crawl via the CrawlerX API and download the JSON report. -

-
- -
-
- - setSiteUrl(e.target.value)} - placeholder="https://example.com" - className="w-full rounded-xl border border-gray-300 px-3 py-2 focus:outline-none focus:ring-4 focus:ring-blue-100 focus:border-blue-500" - /> -
- -
- - setMaxUrls(e.target.value ? Number(e.target.value) : "")} - placeholder="e.g. 50" - className="w-40 rounded-xl border border-gray-300 px-3 py-2 focus:outline-none focus:ring-4 focus:ring-blue-100 focus:border-blue-500" - /> -
- -
- -
-
- - {error && ( -
{error}
- )} - - {report && ( -
-
-

Crawler Report

-
- setQuery(e.target.value)} - placeholder="Search (filters visible columns)" - className="border rounded-xl px-3 py-2 text-sm" - /> - - - -
-
- -
- {summaryChips(report).map((c) => ( - - {c.value} - {c.label} - - ))} -
- - {/* Column toggles */} -
-
Columns
-
- {columns.map((h) => ( - - ))} -
-
- - {/* Table */} - {sorted.length > 0 ? ( -
- - - - {columns.filter(c => visible[c]).map((c) => ( - - ))} - - - - {sorted.map((r: any, idx: number) => ( - - {columns.filter(c => visible[c]).map((c) => ( - - ))} - - ))} - -
onHeaderClick(c)} - title="Click to sort" - > -
- {c} - {sortBy === c && {sortDir === "asc" ? "▲" : "▼"}} -
-
- {renderCell(r[c])} -
-
- ) : ( -
-                {JSON.stringify(report, null, 2)}
-              
- )} -
- )} - -

- Tip: If sitemap auto-detection fails due to server restrictions, enter Max manually or use the /api/sitemap proxy. -

-
-
- ); -} diff --git a/app/(defaults)/crawl/page copy.tsx b/app/(defaults)/crawl/page copy.tsx deleted file mode 100644 index f0808de..0000000 --- a/app/(defaults)/crawl/page copy.tsx +++ /dev/null @@ -1,257 +0,0 @@ -'use client'; - -import { useMemo, useState } from "react"; - -// Path: app/(defaults)/crawl/page.tsx (App Router) -// If using Pages Router, place at pages/crawl.tsx -// TailwindCSS assumed. - -export default function CrawlPage() { - const [siteUrl, setSiteUrl] = useState(""); - const [maxUrls, setMaxUrls] = useState(""); - const [autoMaxLoading, setAutoMaxLoading] = useState(false); - const [crawlLoading, setCrawlLoading] = useState(false); - const [error, setError] = useState(null); - const [report, setReport] = useState(null); - - const apiBase = "https://app.crawlerx.co/crawl"; - - const isValidUrl = useMemo(() => { - try { - if (!siteUrl) return false; - const normalized = siteUrl.match(/^https?:\/\//i) ? siteUrl : `https://${siteUrl}`; - const u = new URL(normalized); - return !!u.hostname; - } catch { - return false; - } - }, [siteUrl]); - - const normalizedUrl = useMemo(() => { - if (!siteUrl) return ""; - return siteUrl.match(/^https?:\/\//i) ? siteUrl : `https://${siteUrl}`; - }, [siteUrl]); - - async function autoDetectMaxFromSitemap() { - setError(null); - setAutoMaxLoading(true); - try { - if (!isValidUrl) throw new Error("Enter a valid website URL first."); - // Server-side proxy avoids CORS - const res = await fetch(`/api/sitemap?u=${encodeURIComponent(normalizedUrl)}`); - if (!res.ok) throw new Error(`Sitemap probe failed (${res.status})`); - const json = await res.json(); - if (typeof json.count !== "number" || json.count < 1) throw new Error("Sitemap found but contains no URLs."); - setMaxUrls(json.count); - } catch (e: any) { - setError(e?.message || "Failed to detect Max from sitemap."); - } finally { - setAutoMaxLoading(false); - } - } - - async function handleCrawl() { - setError(null); - setCrawlLoading(true); - setReport(null); - - try { - if (!isValidUrl) throw new Error("Please enter a valid website URL (with or without https://)."); - const max = typeof maxUrls === "number" && maxUrls > 0 ? maxUrls : 50; - const apiUrl = `${apiBase}?url=${encodeURIComponent(normalizedUrl)}&max=${max}`; - const res = await fetch(apiUrl); - if (!res.ok) throw new Error(`Crawler API error: ${res.status} ${res.statusText}`); - const data = await res.json(); - setReport(data); - } catch (e: any) { - setError(e?.message || "Failed to crawl the site."); - } finally { - setCrawlLoading(false); - } - } - - function downloadJson() { - if (!report) return; - const blob = new Blob([JSON.stringify(report, null, 2)], { type: "application/json" }); - const url = URL.createObjectURL(blob); - const a = document.createElement("a"); - a.href = url; - const host = (() => { - try { return new URL(normalizedUrl).hostname; } catch { return "report"; } - })(); - a.download = `crawlerx-report-${host}.json`; - document.body.appendChild(a); - a.click(); - a.remove(); - URL.revokeObjectURL(url); - } - - const { rows, columns } = useMemo(() => { - if (!report) return { rows: [] as any[], columns: [] as string[] }; - const data = Array.isArray(report) ? report : Array.isArray(report?.results) ? report.results : null; - if (!data || !Array.isArray(data) || data.length === 0) return { rows: [], columns: [] }; - - const preferred = ["url", "status", "title", "description", "h1", "issues", "links", "loadTime" ]; - const colset = new Set(); - data.slice(0, 25).forEach((r: any) => Object.keys(r || {}).forEach((k) => colset.add(k))); - const cols = preferred.filter((k) => colset.has(k)).concat(Array.from(colset).filter((k) => !preferred.includes(k)).slice(0, 6)); - return { rows: data, columns: cols }; - }, [report]); - - return ( -
-
-
-

CrawlerX — Crawl & Report

-

Enter a website, auto-detect the sitemap size for Max, then run a crawl via the CrawlerX API and download the JSON report.

-
- -
-
- - setSiteUrl(e.target.value)} - placeholder="https://example.com" - className="w-full rounded-xl border border-gray-300 px-3 py-2 focus:outline-none focus:ring-4 focus:ring-blue-100 focus:border-blue-500" - /> -
- -
- - setMaxUrls(e.target.value ? Number(e.target.value) : "")} - placeholder="e.g. 50" - className="w-40 rounded-xl border border-gray-300 px-3 py-2 focus:outline-none focus:ring-4 focus:ring-blue-100 focus:border-blue-500" - /> -
- -
- -
-
- - {error && ( -
{error}
- )} - - {report && ( -
-
-

Crawler Report

-
- -
-
- -
- {summaryChips(report).map((c) => ( - - {c.value} - {c.label} - - ))} -
- - {rows.length > 0 ? ( -
- - - - {columns.map((c) => ( - - ))} - - - - {rows.map((r: any, idx: number) => ( - - {columns.map((c) => ( - - ))} - - ))} - -
{c}
- {renderCell(r[c])} -
-
- ) : ( -
-                {JSON.stringify(report, null, 2)}
-              
- )} -
- )} - -

- Tip: If sitemap auto‑detection fails due to server restrictions, enter Max manually or use the /api/sitemap proxy. -

-
-
- ); -} - -function renderCell(value: any) { - if (value == null) return ; - if (typeof value === "string") { - if (/^https?:\/\//i.test(value)) { - return ( - - {value} - - ); - } - return {truncate(value, 220)}; - } - if (typeof value === "number" || typeof value === "boolean") return {String(value)}; - if (Array.isArray(value)) return [{value.length} items]; - return ( -
- object -
{JSON.stringify(value, null, 2)}
-
- ); -} - -function truncate(s: string, n: number) { - return s.length > n ? s.slice(0, n - 1) + "…" : s; -} - -function summaryChips(report: any): { label: string; value: string | number }[] { - const chips: { label: string; value: string | number }[] = []; - const arr = Array.isArray(report) ? report : Array.isArray(report?.results) ? report.results : null; - if (arr) chips.push({ label: "Pages crawled", value: arr.length }); - const totals = report?.totals || report?.summary || {}; - for (const [k, v] of Object.entries(totals)) { - if (typeof v === "number") chips.push({ label: k, value: v }); - } - return chips.slice(0, 8); -} diff --git a/app/(defaults)/crawl/page.tsx b/app/(defaults)/crawl/page.tsx index 87f5406..e62fb38 100644 --- a/app/(defaults)/crawl/page.tsx +++ b/app/(defaults)/crawl/page.tsx @@ -11,7 +11,7 @@ import React, { useEffect, useMemo, useState } from 'react'; * - Details panel (bottom): key/value for the selected URL * * Assumptions: - * - API GET https://app.crawlerx.co/crawl?url=...&max=... returns { ok, results: Row[], ... } + * - API GET https://api.crawlerx.co/crawl?url=...&max=... returns { ok, results: Row[], ... } * - Row shape is the one produced by crawler.js in this project */ @@ -27,7 +27,7 @@ export default function CrawlPage() { const [visibleCols, setVisibleCols] = useState([]); const [selectedIndex, setSelectedIndex] = useState(null); - const apiBase = 'https://app.crawlerx.co/crawl'; + const apiBase = 'https://api.crawlerx.co/crawl'; /* ---------------- URL helpers ---------------- */ const isValidUrl = useMemo(() => { diff --git a/app/api/sitemap/route.ts b/app/api/sitemap/route.ts index 248a36a..d4f286d 100644 --- a/app/api/sitemap/route.ts +++ b/app/api/sitemap/route.ts @@ -18,7 +18,8 @@ export async function GET(req: Request) { ]; // fetch & parse each candidate; you can also call your backend util if exposed - const urls = new Set(); + const urls:any = new Set(); + // very light probe: just check existence; swap to real parser if needed for (const href of candidates) {