first commit
This commit is contained in:
commit
6c345df1c2
4
.gitignore
vendored
Normal file
4
.gitignore
vendored
Normal file
@ -0,0 +1,4 @@
|
||||
# .gitignore
|
||||
node_modules
|
||||
.env
|
||||
reports
|
||||
11
config/db.js
Normal file
11
config/db.js
Normal file
@ -0,0 +1,11 @@
|
||||
import mongoose from 'mongoose';
|
||||
|
||||
export async function connectDB() {
|
||||
try {
|
||||
await mongoose.connect(process.env.MONGODB_URI, { dbName: 'crawlerX' });
|
||||
console.log('✅ MongoDB connected');
|
||||
} catch (err) {
|
||||
console.error('❌ MongoDB connection error:', err);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
150
controllers/auth.controller.js
Normal file
150
controllers/auth.controller.js
Normal file
@ -0,0 +1,150 @@
|
||||
import bcrypt from "bcrypt";
|
||||
import jwt from "jsonwebtoken";
|
||||
import User from "../models/user.model.js";
|
||||
import { sendResetPasswordMail, sendSignupMail, } from "../utils/mailer.js";
|
||||
import crypto from "crypto";
|
||||
|
||||
|
||||
export async function signup(req, res) {
|
||||
try {
|
||||
const { email, password } = req.body;
|
||||
if (!email || !password)
|
||||
return res.status(400).json({ error: "Email and password required" });
|
||||
|
||||
const exists = await User.findOne({ email });
|
||||
if (exists) return res.status(400).json({ error: "User already exists" });
|
||||
|
||||
const passwordHash = await bcrypt.hash(password, 10);
|
||||
const user = await User.create({ email, passwordHash });
|
||||
|
||||
// ✅ send confirmation email (non-blocking)
|
||||
sendSignupMail(email)
|
||||
.then(() => console.log("Signup email sent to", email))
|
||||
.catch(err => console.error("Email send failed:", err));
|
||||
|
||||
res.status(201).json({ message: "Signup success, email sent", id: user._id });
|
||||
} catch (err) {
|
||||
console.error(err);
|
||||
res.status(500).json({ error: "Signup failed" });
|
||||
}
|
||||
}
|
||||
|
||||
export async function login(req, res) {
|
||||
try {
|
||||
const { email, password } = req.body;
|
||||
const user = await User.findOne({ email });
|
||||
if (!user) return res.status(401).json({ error: "Invalid credentials" });
|
||||
|
||||
const match = await bcrypt.compare(password, user.passwordHash);
|
||||
if (!match) return res.status(401).json({ error: "Invalid credentials" });
|
||||
|
||||
const token = jwt.sign(
|
||||
{ id: user._id, email: user.email },
|
||||
process.env.JWT_SECRET,
|
||||
{ expiresIn: "1h" }
|
||||
);
|
||||
|
||||
res.json({ message: "Login success", token });
|
||||
} catch (err) {
|
||||
console.error(err);
|
||||
res.status(500).json({ error: "Login failed" });
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* POST /api/auth/change-password
|
||||
* Body: { currentPassword, newPassword }
|
||||
* Header: Authorization: Bearer <token>
|
||||
*/
|
||||
export async function changePassword(req, res) {
|
||||
try {
|
||||
const { currentPassword, newPassword } = req.body;
|
||||
|
||||
// if using FormData, fields come from req.body AFTER a multipart parser
|
||||
if (!currentPassword || !newPassword) {
|
||||
return res.status(400).json({ error: "Current password and new password are required" });
|
||||
}
|
||||
|
||||
const user = await User.findById(req.user.id);
|
||||
if (!user) return res.status(404).json({ error: "User not found" });
|
||||
|
||||
const isMatch = await bcrypt.compare(currentPassword, user.passwordHash);
|
||||
if (!isMatch)
|
||||
return res.status(401).json({ error: "Current password is incorrect" });
|
||||
|
||||
user.passwordHash = await bcrypt.hash(newPassword, 10);
|
||||
await user.save();
|
||||
|
||||
res.json({ message: "Password updated successfully" });
|
||||
} catch (err) {
|
||||
console.error("changePassword error:", err); // ✅ show actual error
|
||||
res.status(500).json({ error: "Failed to change password" });
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* POST /api/auth/forgot-password
|
||||
* Body: { email }
|
||||
*/
|
||||
export async function forgotPassword(req, res) {
|
||||
try {
|
||||
const { email } = req.body;
|
||||
if (!email) return res.status(400).json({ error: "Email is required" });
|
||||
|
||||
const user = await User.findOne({ email });
|
||||
|
||||
if (!user)
|
||||
return res.json({
|
||||
message: "If the email is registered, a reset link has been sent.",
|
||||
verificationCode: null, // user not found
|
||||
});
|
||||
|
||||
// Generate 4-digit numeric verification code
|
||||
const verificationCode = Math.floor(1000 + Math.random() * 9000).toString();
|
||||
|
||||
// Save code and expiry in DB
|
||||
user.resetPasswordToken = verificationCode;
|
||||
user.resetPasswordExpires = Date.now() + 60 * 60 * 1000; // 1 hour
|
||||
await user.save();
|
||||
|
||||
// Send code via email
|
||||
await sendResetPasswordMail(email, verificationCode);
|
||||
|
||||
// ✅ Return verification code in response
|
||||
res.json({
|
||||
message: "If the email is registered, a reset link has been sent.",
|
||||
verificationCode, // This is the 4-digit code
|
||||
});
|
||||
} catch (err) {
|
||||
console.error("forgotPassword error:", err);
|
||||
res.status(500).json({ error: "Failed to send reset link" });
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* POST /api/auth/reset-password
|
||||
* Body: { token, newPassword }
|
||||
*/
|
||||
export async function resetPassword(req, res) {
|
||||
try {
|
||||
const { token, newPassword } = req.body;
|
||||
if (!token || !newPassword)
|
||||
return res.status(400).json({ error: "Token and new password are required" });
|
||||
|
||||
const user = await User.findOne({
|
||||
resetPasswordToken: token,
|
||||
resetPasswordExpires: { $gt: Date.now() },
|
||||
});
|
||||
if (!user) return res.status(400).json({ error: "Invalid or expired token" });
|
||||
|
||||
user.passwordHash = await bcrypt.hash(newPassword, 10);
|
||||
user.resetPasswordToken = undefined;
|
||||
user.resetPasswordExpires = undefined;
|
||||
await user.save();
|
||||
|
||||
res.json({ message: "Password has been reset successfully" });
|
||||
} catch (err) {
|
||||
console.error("resetPassword error:", err);
|
||||
res.status(500).json({ error: "Failed to reset password" });
|
||||
}
|
||||
}
|
||||
111
controllers/blog.controller.js
Normal file
111
controllers/blog.controller.js
Normal file
@ -0,0 +1,111 @@
|
||||
import Blog from '../models/blog.model.js';
|
||||
import Category from '../models/category.model.js';
|
||||
import slugify from 'slugify';
|
||||
|
||||
// ✅ Create Blog for particular project
|
||||
export const createBlog = async (req, res) => {
|
||||
try {
|
||||
const { projectId, title, description, categoryId, tags } = req.body;
|
||||
if (!projectId) return res.status(400).json({ message: 'projectId is required' });
|
||||
|
||||
const slug = slugify(title, { lower: true, strict: true });
|
||||
|
||||
const blog = await Blog.create({
|
||||
projectId,
|
||||
title,
|
||||
description,
|
||||
slug,
|
||||
category: categoryId,
|
||||
tags,
|
||||
imageUrl: req.files?.imageUrl ? `/uploads/${req.files.imageUrl[0].filename}` : '',
|
||||
bigImageUrl: req.files?.bigImageUrl ? `/uploads/${req.files.bigImageUrl[0].filename}` : ''
|
||||
});
|
||||
|
||||
res.status(201).json(blog);
|
||||
} catch (err) {
|
||||
res.status(500).json({ message: err.message });
|
||||
}
|
||||
};
|
||||
|
||||
// ✅ Get All Blogs for a particular project
|
||||
export const getAllBlogs = async (req, res) => {
|
||||
try {
|
||||
const { page = 1, limit = 10, search = '', category, projectId } = req.query;
|
||||
if (!projectId) return res.status(400).json({ message: 'projectId is required' });
|
||||
|
||||
const query = {
|
||||
projectId,
|
||||
title: { $regex: search, $options: 'i' }
|
||||
};
|
||||
|
||||
if (category) {
|
||||
const cat = await Category.findOne({ slug: category });
|
||||
if (cat) query.category = cat._id;
|
||||
}
|
||||
|
||||
const blogs = await Blog.find(query)
|
||||
.populate('category', 'name slug')
|
||||
.sort({ createdAt: -1 })
|
||||
.skip((page - 1) * limit)
|
||||
.limit(parseInt(limit));
|
||||
|
||||
const total = await Blog.countDocuments(query);
|
||||
|
||||
res.json({ total, page: parseInt(page), blogs });
|
||||
} catch (err) {
|
||||
res.status(500).json({ message: err.message });
|
||||
}
|
||||
};
|
||||
|
||||
// ✅ Get Single Blog by Slug + projectId
|
||||
export const getBlogBySlug = async (req, res) => {
|
||||
try {
|
||||
const { projectId } = req.query; // 👈 query param மூலம்
|
||||
if (!projectId) return res.status(400).json({ message: 'projectId is required' });
|
||||
|
||||
const blog = await Blog.findOne({ slug: req.params.slug, projectId })
|
||||
.populate('category', 'name slug');
|
||||
|
||||
if (!blog) return res.status(404).json({ message: 'Blog not found' });
|
||||
res.json(blog);
|
||||
} catch (err) {
|
||||
res.status(500).json({ message: err.message });
|
||||
}
|
||||
};
|
||||
|
||||
// ✅ Add Comment to Blog (projectId check optional – id already unique)
|
||||
export const addComment = async (req, res) => {
|
||||
try {
|
||||
const { text, name } = req.body;
|
||||
const blog = await Blog.findById(req.params.id);
|
||||
if (!blog) return res.status(404).json({ message: 'Blog not found' });
|
||||
|
||||
blog.comments.push({
|
||||
user: req.user?._id,
|
||||
name: name || 'Anonymous',
|
||||
text
|
||||
});
|
||||
|
||||
await blog.save();
|
||||
res.json(blog.comments);
|
||||
} catch (err) {
|
||||
res.status(500).json({ message: err.message });
|
||||
}
|
||||
};
|
||||
|
||||
// ✅ Like / Unlike
|
||||
export const likeBlog = async (req, res) => {
|
||||
try {
|
||||
const blog = await Blog.findById(req.params.id);
|
||||
if (!blog) return res.status(404).json({ message: 'Blog not found' });
|
||||
|
||||
const userId = req.user._id;
|
||||
if (blog.likes.includes(userId)) blog.likes.pull(userId);
|
||||
else blog.likes.push(userId);
|
||||
|
||||
await blog.save();
|
||||
res.json({ likesCount: blog.likes.length });
|
||||
} catch (err) {
|
||||
res.status(500).json({ message: err.message });
|
||||
}
|
||||
};
|
||||
46
controllers/category.controller.js
Normal file
46
controllers/category.controller.js
Normal file
@ -0,0 +1,46 @@
|
||||
import Category from '../models/category.model.js';
|
||||
import slugify from "slugify";
|
||||
|
||||
// Create a new category (Admin only)
|
||||
export const createCategory = async (req, res) => {
|
||||
try {
|
||||
const { name, projectId } = req.body;
|
||||
if (!projectId) return res.status(400).json({ message: "projectId is required" });
|
||||
|
||||
const slug = slugify(name, { lower: true, strict: true });
|
||||
|
||||
const category = await Category.create({
|
||||
name,
|
||||
slug,
|
||||
projectId
|
||||
});
|
||||
|
||||
res.status(201).json(category);
|
||||
} catch (err) {
|
||||
res.status(500).json({ message: err.message });
|
||||
}
|
||||
};
|
||||
|
||||
// Get all categories for a project
|
||||
export const getCategories = async (req, res) => {
|
||||
try {
|
||||
const { projectId } = req.query;
|
||||
if (!projectId) return res.status(400).json({ message: "projectId is required" });
|
||||
|
||||
const categories = await Category.find({ projectId }).sort({ name: 1 });
|
||||
res.json(categories);
|
||||
} catch (err) {
|
||||
res.status(500).json({ message: err.message });
|
||||
}
|
||||
};
|
||||
|
||||
// Delete a category (Admin only)
|
||||
export const deleteCategory = async (req, res) => {
|
||||
try {
|
||||
const { id } = req.params;
|
||||
await Category.findByIdAndDelete(id);
|
||||
res.json({ message: "Category deleted" });
|
||||
} catch (err) {
|
||||
res.status(500).json({ message: err.message });
|
||||
}
|
||||
};
|
||||
54
controllers/comment.controller.js
Normal file
54
controllers/comment.controller.js
Normal file
@ -0,0 +1,54 @@
|
||||
import Blog from "../models/blog.model.js";
|
||||
|
||||
// Add comment to a blog
|
||||
export const addComment = async (req, res) => {
|
||||
try {
|
||||
const { blogId } = req.params;
|
||||
const { text, name } = req.body;
|
||||
|
||||
const blog = await Blog.findById(blogId);
|
||||
if (!blog) return res.status(404).json({ message: "Blog not found" });
|
||||
|
||||
blog.comments.push({
|
||||
user: req.user?._id || null,
|
||||
name: name || "Anonymous",
|
||||
text
|
||||
});
|
||||
|
||||
await blog.save();
|
||||
res.status(201).json(blog.comments);
|
||||
} catch (err) {
|
||||
res.status(500).json({ message: err.message });
|
||||
}
|
||||
};
|
||||
|
||||
// Get all comments for a blog
|
||||
export const getComments = async (req, res) => {
|
||||
try {
|
||||
const { blogId } = req.params;
|
||||
|
||||
const blog = await Blog.findById(blogId);
|
||||
if (!blog) return res.status(404).json({ message: "Blog not found" });
|
||||
|
||||
res.json(blog.comments.sort((a,b) => b.createdAt - a.createdAt));
|
||||
} catch (err) {
|
||||
res.status(500).json({ message: err.message });
|
||||
}
|
||||
};
|
||||
|
||||
// Delete a comment (Admin only)
|
||||
export const deleteComment = async (req, res) => {
|
||||
try {
|
||||
const { blogId, commentId } = req.params;
|
||||
|
||||
const blog = await Blog.findById(blogId);
|
||||
if (!blog) return res.status(404).json({ message: "Blog not found" });
|
||||
|
||||
blog.comments.id(commentId)?.remove();
|
||||
await blog.save();
|
||||
|
||||
res.json({ message: "Comment deleted" });
|
||||
} catch (err) {
|
||||
res.status(500).json({ message: err.message });
|
||||
}
|
||||
};
|
||||
115
controllers/crawl.controller.js
Normal file
115
controllers/crawl.controller.js
Normal file
@ -0,0 +1,115 @@
|
||||
import path from "node:path";
|
||||
import fs from "node:fs";
|
||||
import { fileURLToPath } from "node:url";
|
||||
import { crawl } from "../crawler.js";
|
||||
|
||||
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
||||
|
||||
const ts = () =>
|
||||
new Date().toISOString().replaceAll(":", "-").replaceAll(".", "-");
|
||||
|
||||
function attachJson(res, filename, obj) {
|
||||
const json = JSON.stringify(obj, null, 2);
|
||||
res.setHeader("Content-Type", "application/json; charset=utf-8");
|
||||
res.setHeader("Content-Disposition", `attachment; filename="${filename}"`);
|
||||
return res.send(json);
|
||||
}
|
||||
function isAbs(p) {
|
||||
try {
|
||||
return path.isAbsolute(p);
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
export async function crawlHandler(req, res) {
|
||||
try {
|
||||
const { url, max, stream, download, nostore } = req.query;
|
||||
if (!url) return res.status(400).json({ error: "Missing url param" });
|
||||
|
||||
const target = new URL(String(url));
|
||||
const limit = Math.min(Math.max(parseInt(max ?? "50", 10), 1), 500);
|
||||
const wantsStream =
|
||||
String(stream) === "1" ||
|
||||
(req.get("accept") || "").includes("text/event-stream");
|
||||
|
||||
/* ---------- SSE mode ---------- */
|
||||
if (wantsStream) {
|
||||
if (String(download) === "1") {
|
||||
return res.status(400).json({ error: "download not supported with stream=1" });
|
||||
}
|
||||
|
||||
res.setHeader("Content-Type", "text/event-stream");
|
||||
res.setHeader("Cache-Control", "no-cache, no-transform");
|
||||
res.setHeader("Connection", "keep-alive");
|
||||
res.flushHeaders?.();
|
||||
|
||||
const send = (obj, evt) => {
|
||||
if (evt) res.write(`event: ${evt}\n`);
|
||||
res.write(`data: ${JSON.stringify(obj)}\n\n`);
|
||||
};
|
||||
|
||||
const heartbeat = setInterval(() => res.write(":\n\n"), 15000);
|
||||
let finished = false;
|
||||
|
||||
req.on("close", () => {
|
||||
clearInterval(heartbeat);
|
||||
if (!finished) console.warn("SSE client disconnected.");
|
||||
});
|
||||
|
||||
const onProgress = (tick) => send(tick, "tick");
|
||||
send({ ok: true, message: "Crawl started", url: target.toString(), limit }, "started");
|
||||
|
||||
const result = await crawl(target.toString(), limit, onProgress, {
|
||||
persistReports: false,
|
||||
collectPages: true,
|
||||
});
|
||||
|
||||
finished = true;
|
||||
clearInterval(heartbeat);
|
||||
send({ ok: true, done: true, result }, "done");
|
||||
return res.end();
|
||||
}
|
||||
|
||||
/* ---------- Non-streaming mode ---------- */
|
||||
const preferMemory = String(nostore) === "1" || String(download) === "1";
|
||||
const result = await crawl(
|
||||
target.toString(),
|
||||
limit,
|
||||
undefined,
|
||||
preferMemory
|
||||
? { persistReports: false, collectPages: true }
|
||||
: { persistReports: true, collectPages: true }
|
||||
);
|
||||
|
||||
if (String(download) === "1") {
|
||||
const filename = `crawl-${ts()}.json`;
|
||||
|
||||
if (Array.isArray(result?.results)) {
|
||||
return attachJson(res, filename, result.results);
|
||||
}
|
||||
|
||||
const jsonPath = result?.files?.json;
|
||||
if (jsonPath) {
|
||||
const abs = isAbs(jsonPath) ? jsonPath : path.join(__dirname, jsonPath);
|
||||
if (fs.existsSync(abs)) {
|
||||
res.setHeader("Content-Type", "application/json; charset=utf-8");
|
||||
res.setHeader("Content-Disposition", `attachment; filename="${filename}"`);
|
||||
return fs.createReadStream(abs).pipe(res);
|
||||
}
|
||||
}
|
||||
return attachJson(res, filename, result ?? {});
|
||||
}
|
||||
|
||||
return res.json({
|
||||
ok: true,
|
||||
message: "Crawl completed",
|
||||
url: target.toString(),
|
||||
limit,
|
||||
...result,
|
||||
});
|
||||
} catch (err) {
|
||||
console.error("Crawl error:", err);
|
||||
res.status(500).json({ error: "Crawl failed", details: String(err?.message ?? err) });
|
||||
}
|
||||
}
|
||||
113
controllers/lighthouseController.js
Normal file
113
controllers/lighthouseController.js
Normal file
@ -0,0 +1,113 @@
|
||||
import lighthouse from 'lighthouse';
|
||||
import { launch } from 'chrome-launcher';
|
||||
import PageSpeedTest from '../models/pageSpeedTest.model.js';
|
||||
import path from 'path';
|
||||
import fs from 'fs';
|
||||
|
||||
const reportsDir = path.join(process.cwd(), 'public', 'lighthouse-treemap');
|
||||
// Ensure folder exists
|
||||
if (!fs.existsSync(reportsDir)) fs.mkdirSync(reportsDir, { recursive: true });
|
||||
|
||||
const launchChromeAndRunLighthouse = async (url, device = 'mobile') => {
|
||||
const chrome = await launch({ chromeFlags: ['--headless'] });
|
||||
|
||||
const options = {
|
||||
port: chrome.port,
|
||||
emulatedFormFactor: device,
|
||||
throttlingMethod: device === 'mobile' ? 'simulate' : 'devtools',
|
||||
output: 'json', // JSON for metrics
|
||||
};
|
||||
|
||||
const runnerResult = await lighthouse(url, options);
|
||||
const lhr = runnerResult.lhr;
|
||||
|
||||
// Create HTML treemap report (only once, for mobile)
|
||||
let treemapFile = null;
|
||||
if (device === 'mobile') {
|
||||
const fileName = `treemap-${Date.now()}.html`;
|
||||
treemapFile = `/lighthouse-treemap/${fileName}`;
|
||||
|
||||
// Generate HTML report
|
||||
const htmlReport = await lighthouse(url, {
|
||||
port: chrome.port,
|
||||
emulatedFormFactor: device,
|
||||
throttlingMethod: 'simulate',
|
||||
output: 'html',
|
||||
});
|
||||
|
||||
fs.writeFileSync(path.join(reportsDir, fileName), htmlReport.report);
|
||||
}
|
||||
|
||||
await chrome.kill();
|
||||
|
||||
// Structured result
|
||||
const result = {
|
||||
url,
|
||||
device,
|
||||
scores: {
|
||||
performance: Math.round(lhr.categories.performance?.score * 100),
|
||||
accessibility: Math.round(lhr.categories.accessibility?.score * 100),
|
||||
bestPractices: Math.round(lhr.categories['best-practices']?.score * 100),
|
||||
seo: Math.round(lhr.categories.seo?.score * 100),
|
||||
pwa: lhr.categories.pwa?.score ? Math.round(lhr.categories.pwa.score * 100) : null,
|
||||
},
|
||||
metrics: {
|
||||
firstContentfulPaint: lhr.audits['first-contentful-paint']?.displayValue || null,
|
||||
largestContentfulPaint: lhr.audits['largest-contentful-paint']?.displayValue || null,
|
||||
totalBlockingTime: lhr.audits['total-blocking-time']?.displayValue || null,
|
||||
timeToInteractive: lhr.audits['interactive']?.displayValue || null,
|
||||
speedIndex: lhr.audits['speed-index']?.displayValue || null,
|
||||
cumulativeLayoutShift: lhr.audits['cumulative-layout-shift']?.displayValue || null,
|
||||
},
|
||||
opportunities: Object.values(lhr.audits)
|
||||
.filter(a => a.details?.type === 'opportunity')
|
||||
.map(a => ({
|
||||
title: a.title,
|
||||
description: a.description,
|
||||
estimatedSavings: a.details?.overallSavingsMs
|
||||
? `${Math.round(a.details.overallSavingsMs)} ms`
|
||||
: null,
|
||||
})),
|
||||
diagnostics: {
|
||||
usesHTTPS: lhr.audits['is-on-https']?.score === 1,
|
||||
usesEfficientCachePolicy: lhr.audits['uses-long-cache-ttl']?.score === 1,
|
||||
imageCompression: lhr.audits['uses-optimized-images']?.score === 1,
|
||||
},
|
||||
failedAudits: Object.values(lhr.audits)
|
||||
.filter(a => a.score !== null && a.score !== 1 && a.scoreDisplayMode !== 'notApplicable')
|
||||
.map(a => ({ title: a.title, description: a.description })),
|
||||
passedAudits: Object.values(lhr.audits)
|
||||
.filter(a => a.score === 1 && a.scoreDisplayMode !== 'notApplicable' && !a.details?.type)
|
||||
.map(a => a.title),
|
||||
notApplicableAudits: Object.values(lhr.audits)
|
||||
.filter(a => a.scoreDisplayMode === 'notApplicable')
|
||||
.map(a => a.title),
|
||||
screenshot: lhr.audits['final-screenshot']?.details?.data || null,
|
||||
createdAt: new Date(),
|
||||
treemapPath: treemapFile,
|
||||
};
|
||||
|
||||
const report = await PageSpeedTest.create(result);
|
||||
return { report };
|
||||
};
|
||||
|
||||
export const runAudit = async (req, res, next) => {
|
||||
try {
|
||||
const { url } = req.body;
|
||||
if (!url) return res.status(400).json({ message: 'URL is required' });
|
||||
|
||||
const mobileResult = await launchChromeAndRunLighthouse(url, 'mobile');
|
||||
const desktopResult = await launchChromeAndRunLighthouse(url, 'desktop');
|
||||
|
||||
res.status(200).json({
|
||||
message: 'Audit completed successfully',
|
||||
results: {
|
||||
mobile: mobileResult.report,
|
||||
desktop: desktopResult.report,
|
||||
treemap: mobileResult.report.treemapPath, // HTML report
|
||||
},
|
||||
});
|
||||
} catch (err) {
|
||||
next(err);
|
||||
}
|
||||
};
|
||||
39
controllers/maisondetreats/cakeOrder.controller.js
Normal file
39
controllers/maisondetreats/cakeOrder.controller.js
Normal file
@ -0,0 +1,39 @@
|
||||
import { CakeOrder } from "../../models/maisondetreats/cakeOrder.model.js";
|
||||
import { sendCakeOrderMail } from "../../utils/mailer.js";
|
||||
|
||||
export const createCakeOrder = async (req, res) => {
|
||||
try {
|
||||
const { order, email } = req.body;
|
||||
|
||||
if (!order || typeof order !== "object") {
|
||||
return res.status(400).json({ message: "Order data is required" });
|
||||
}
|
||||
|
||||
const newOrder = await CakeOrder.create({ order, email });
|
||||
|
||||
// ✅ send confirmation email (non-blocking)
|
||||
if (email) {
|
||||
sendCakeOrderMail(email, order)
|
||||
.then(() => console.log("Cake order email sent to", email))
|
||||
.catch((err) => console.error("Email send failed:", err));
|
||||
}
|
||||
|
||||
res.status(201).json({
|
||||
message: "Cake order created successfully",
|
||||
data: newOrder,
|
||||
});
|
||||
} catch (err) {
|
||||
console.error("Error creating cake order:", err);
|
||||
res.status(500).json({ message: "Server error", error: err.message });
|
||||
}
|
||||
};
|
||||
// GET /api/cake-orders → List all orders
|
||||
export const getAllCakeOrders = async (_req, res) => {
|
||||
try {
|
||||
const orders = await CakeOrder.find().sort({ createdAt: -1 });
|
||||
res.json({ data: orders });
|
||||
} catch (err) {
|
||||
console.error("Error fetching cake orders:", err);
|
||||
res.status(500).json({ message: "Server error", error: err.message });
|
||||
}
|
||||
};
|
||||
68
controllers/message.controller.js
Normal file
68
controllers/message.controller.js
Normal file
@ -0,0 +1,68 @@
|
||||
// message.controller.js
|
||||
|
||||
import dotenv from "dotenv";
|
||||
import axios from "axios";
|
||||
import Message from "../models/message.model.js";
|
||||
|
||||
dotenv.config();
|
||||
|
||||
export const sendMessage = async (req, res) => {
|
||||
try {
|
||||
const { project, name, email, message } = req.body;
|
||||
|
||||
if (!project) return res.status(400).json({ success: false, error: "Project is required" });
|
||||
if (!message) return res.status(400).json({ success: false, error: "Message is required" });
|
||||
|
||||
// Save message to MongoDB
|
||||
const newMessage = await Message.create({ project, name, email, message });
|
||||
|
||||
// Send WhatsApp Template Message
|
||||
const url = `https://graph.facebook.com/v22.0/774121419125441/messages`;
|
||||
const payload = {
|
||||
messaging_product: "whatsapp",
|
||||
to: 917871207631,
|
||||
type: "template",
|
||||
template: {
|
||||
name: "new_message_alert",
|
||||
language: { code: "en_US" },
|
||||
components: [
|
||||
{
|
||||
type: "body",
|
||||
parameters: [
|
||||
{ type: "text", text: project || "Project" },
|
||||
{ type: "text", text: name || "Guest" },
|
||||
{ type: "text", text: email || "N/A" },
|
||||
{ type: "text", text: message || "No message" },
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
};
|
||||
const headers = {
|
||||
Authorization: `Bearer EAALKxEMPlp0BPkmoTAJlZAZAymtgqzcUuGVdZAZAKSZAw1csXR5Xy2DodBUC2zXckOYvQ2jOV4aFlZAeCo4IuJCyMb5aFt2UfNRQ1pDGk08QlbCjjCTMsZALipZCMNYyNVwN2pTDwUcYeNZByOrweVVdXD1ErZAbzjc04wmR8ilhQXink4it05BatwkZBf3xCLyy3k6R0tgx9JoymQTn83iZANBWDzvmX3vW5dx6Pud6xNEfqYNsjwZDZD`,
|
||||
"Content-Type": "application/json",
|
||||
};
|
||||
|
||||
const response = await axios.post(url, payload, { headers });
|
||||
console.log("✅ WhatsApp API Response:", response.data);
|
||||
|
||||
return res.status(201).json({ success: true, data: newMessage });
|
||||
} catch (err) {
|
||||
console.error("❌ WhatsApp API Error:", err.response?.data || err.message);
|
||||
return res.status(500).json({ success: false, error: "Server Error" });
|
||||
}
|
||||
};
|
||||
|
||||
// ✅ Add this function and export it
|
||||
export const getMessages = async (req, res) => {
|
||||
try {
|
||||
const { project } = req.query;
|
||||
if (!project) return res.status(400).json({ success: false, error: "Project is required" });
|
||||
|
||||
const messages = await Message.find({ project }).sort({ createdAt: -1 });
|
||||
return res.status(200).json({ success: true, data: messages });
|
||||
} catch (err) {
|
||||
console.error(err);
|
||||
return res.status(500).json({ success: false, error: "Server Error" });
|
||||
}
|
||||
};
|
||||
132
controllers/payment.controller.js
Normal file
132
controllers/payment.controller.js
Normal file
@ -0,0 +1,132 @@
|
||||
// controllers/payment.controller.js
|
||||
import Stripe from "stripe";
|
||||
import { Payment } from "../models/payment.model.js";
|
||||
|
||||
// ✅ Load Stripe Secret Key from .env
|
||||
const stripe = new Stripe("sk_test_51SB8SnIFk8fh986GkYaNPVSfZzh6gcuXhq3tOa5hyE4U4vYIqrHwyGRu2OE1N5TNW39tJmfFOyYfsh4HcZOjlsj100xIeM46zU", {
|
||||
apiVersion: "2022-11-15",
|
||||
});
|
||||
|
||||
/**
|
||||
* 🔹 Option 1: PaymentIntent API (client uses clientSecret)
|
||||
*/
|
||||
export async function createPaymentIntent(req, res) {
|
||||
try {
|
||||
const { amount } = req.body;
|
||||
if (!amount) return res.status(400).json({ error: "amount is required" });
|
||||
|
||||
const paymentIntent = await stripe.paymentIntents.create({
|
||||
amount: Math.round(amount * 100), // dollars → cents
|
||||
currency: "usd",
|
||||
automatic_payment_methods: { enabled: true },
|
||||
});
|
||||
|
||||
await Payment.create({
|
||||
amount: Math.round(amount * 100),
|
||||
stripePaymentIntentId: paymentIntent.id,
|
||||
status: "pending",
|
||||
});
|
||||
|
||||
res.json({ clientSecret: paymentIntent.client_secret });
|
||||
} catch (err) {
|
||||
console.error("❌ Error creating PaymentIntent:", err);
|
||||
res.status(500).json({ error: "Internal Server Error" });
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 🔹 Option 2: Stripe Checkout Session (redirect flow)
|
||||
*/
|
||||
export async function createCheckoutSession(req, res) {
|
||||
try {
|
||||
const { email, amount, planId } = req.body;
|
||||
if (!email || !amount) {
|
||||
return res.status(400).json({ error: "email and amount are required" });
|
||||
}
|
||||
|
||||
const session = await stripe.checkout.sessions.create({
|
||||
payment_method_types: ["card"],
|
||||
mode: "payment",
|
||||
customer_email: email,
|
||||
line_items: [
|
||||
{
|
||||
price_data: {
|
||||
currency: "usd",
|
||||
product_data: { name: planId || "SEO Plan" },
|
||||
unit_amount: Math.round(amount * 100),
|
||||
},
|
||||
quantity: 1,
|
||||
},
|
||||
],
|
||||
success_url: "https://app.crawlerx.co/success",
|
||||
cancel_url: "https://app.crawlerx.co/cancel",
|
||||
});
|
||||
|
||||
// Save to DB using stripeSessionId instead of stripePaymentIntentId
|
||||
await Payment.create({
|
||||
email,
|
||||
amount: Math.round(amount * 100),
|
||||
stripeSessionId: session.id, // ✅ use session id
|
||||
status: "pending",
|
||||
});
|
||||
|
||||
res.json({ sessionId: session.id });
|
||||
} catch (err) {
|
||||
console.error("❌ Error creating checkout session:", err);
|
||||
res.status(500).json({ error: "Internal Server Error" });
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* 🔹 Stripe Webhook
|
||||
* Stripe requires `express.raw({ type: "application/json" })` in route
|
||||
*/
|
||||
export async function handleWebhook(req, res) {
|
||||
const sig = req.headers["stripe-signature"];
|
||||
let event;
|
||||
|
||||
try {
|
||||
event = stripe.webhooks.constructEvent(
|
||||
req.rawBody, // Must be raw body
|
||||
sig,
|
||||
process.env.STRIPE_WEBHOOK_SECRET
|
||||
);
|
||||
} catch (err) {
|
||||
console.error("❌ Webhook signature verification failed:", err.message);
|
||||
return res.status(400).send(`Webhook Error: ${err.message}`);
|
||||
}
|
||||
|
||||
switch (event.type) {
|
||||
case "payment_intent.succeeded": {
|
||||
const paymentIntent = event.data.object;
|
||||
console.log("✅ PaymentIntent succeeded:", paymentIntent.id);
|
||||
|
||||
await Payment.findOneAndUpdate(
|
||||
{ stripePaymentIntentId: paymentIntent.id },
|
||||
{ status: "succeeded" }
|
||||
);
|
||||
break;
|
||||
}
|
||||
|
||||
case "checkout.session.completed": {
|
||||
const session = event.data.object;
|
||||
console.log("✅ Checkout session completed:", session.id);
|
||||
|
||||
// Update DB record created earlier
|
||||
await Payment.findOneAndUpdate(
|
||||
{ email: session.customer_email, status: "pending" },
|
||||
{
|
||||
stripePaymentIntentId: session.payment_intent,
|
||||
status: "succeeded",
|
||||
}
|
||||
);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
console.log(`Unhandled event type ${event.type}`);
|
||||
}
|
||||
|
||||
res.json({ received: true });
|
||||
}
|
||||
20
controllers/sitemap.controller.js
Normal file
20
controllers/sitemap.controller.js
Normal file
@ -0,0 +1,20 @@
|
||||
import { getSitemapUrls } from "../utils/sitemap.js";
|
||||
|
||||
export async function sitemapHandler(req, res) {
|
||||
try {
|
||||
const { u } = req.query;
|
||||
if (!u) return res.status(400).json({ error: "Missing ?u=https://site.com" });
|
||||
|
||||
const origin = new URL(String(u));
|
||||
const urls = await getSitemapUrls(origin.toString());
|
||||
res.json({
|
||||
ok: true,
|
||||
origin: origin.origin,
|
||||
count: urls.length,
|
||||
urls,
|
||||
});
|
||||
} catch (err) {
|
||||
console.error("sitemap error:", err);
|
||||
res.status(500).json({ error: "Failed to fetch sitemap", details: String(err?.message ?? err) });
|
||||
}
|
||||
}
|
||||
709
crawler copy.js
Normal file
709
crawler copy.js
Normal file
@ -0,0 +1,709 @@
|
||||
import got from "got";
|
||||
import * as cheerio from "cheerio";
|
||||
import normalizeUrl from "normalize-url";
|
||||
import { isInternal } from "./utils/urlHelpers.js";
|
||||
import { getSitemapUrls } from "./utils/sitemap.js";
|
||||
import fs from "node:fs";
|
||||
import path from "node:path";
|
||||
import { chromium } from "playwright";
|
||||
|
||||
// NEW libs
|
||||
import pixelWidth from "string-pixel-width";
|
||||
import * as readability from "text-readability";
|
||||
import stringSimilarity from "string-similarity";
|
||||
|
||||
/* ------------------------------ globals --------------------------------- */
|
||||
const visited = new Set();
|
||||
const queue = [];
|
||||
const results = [];
|
||||
|
||||
// Link provenance: every discovered edge (source -> target)
|
||||
const edges = []; // { from, raw_href, to, discovered_by }
|
||||
|
||||
// Quick referrer map for error report
|
||||
const referrers = new Map(); // url -> Array<{from, raw_href, discovered_by}>
|
||||
|
||||
const REAL_UA =
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36";
|
||||
const REAL_HEADERS = {
|
||||
"user-agent": REAL_UA,
|
||||
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
||||
"accept-language": "en-US,en;q=0.9",
|
||||
"upgrade-insecure-requests": "1",
|
||||
};
|
||||
|
||||
/* ------------------------------ utils ----------------------------------- */
|
||||
function csvEscape(v) {
|
||||
if (v === undefined || v === null) return "";
|
||||
const s = String(v);
|
||||
return /[",\n]/.test(s) ? `"${s.replace(/"/g, '""')}"` : s;
|
||||
}
|
||||
function ensureDir(dir) {
|
||||
if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
|
||||
}
|
||||
function writePageReports(results) {
|
||||
ensureDir("reports");
|
||||
const stamp = new Date().toISOString().replace(/[:T]/g, "-").slice(0, 19);
|
||||
const base = path.join("reports", `crawl-${stamp}`);
|
||||
|
||||
fs.writeFileSync(`${base}.json`, JSON.stringify(results, null, 2), "utf8");
|
||||
console.log(`\n📝 Full JSON report saved: ${base}.json`);
|
||||
|
||||
// Columns (a Screaming-Frog-ish shape with our extras)
|
||||
const headers = [
|
||||
"url", "status", "status_text", "time_ms", "bytes", "content_type", "http_version",
|
||||
"title", "title_length", "title_pixel_width",
|
||||
"meta_description", "meta_description_length", "meta_description_pixel_width",
|
||||
"h1_1", "h1_1_length", "h1_1_pixel_width", "h1_2", "h1_2_length", "h1_2_pixel_width",
|
||||
"h2_1", "h2_2",
|
||||
"canonical", "robots_meta", "x_robots_tag", "noindex", "nofollow",
|
||||
"lang", "word_count", "flesch_reading_ease", "flesch_kincaid_grade",
|
||||
"gunning_fog", "coleman_liau", "ari", "smog",
|
||||
"schema_types", "inlinks", "outlinks", "render_mode",
|
||||
"last_modified", "set_cookie", "crawl_timestamp",
|
||||
"duplicate_title_exact", "nearest_title_similarity", "nearest_title_url",
|
||||
"duplicate_description_exact", "nearest_description_similarity", "nearest_description_url"
|
||||
];
|
||||
const lines = [headers.join(",")];
|
||||
for (const r of results) {
|
||||
lines.push([
|
||||
r.url,
|
||||
r.status,
|
||||
r.status_text ?? "",
|
||||
r.time_ms,
|
||||
r.bytes,
|
||||
r.content_type,
|
||||
r.http_version ?? "",
|
||||
r.title,
|
||||
r.title_length,
|
||||
r.title_pixel_width,
|
||||
r.meta_description,
|
||||
r.meta_description_length,
|
||||
r.meta_description_pixel_width,
|
||||
r.h1_1 ?? "",
|
||||
r.h1_1_length ?? 0,
|
||||
r.h1_1_pixel_width ?? "",
|
||||
r.h1_2 ?? "",
|
||||
r.h1_2_length ?? 0,
|
||||
r.h1_2_pixel_width ?? "",
|
||||
r.h2_1 ?? "",
|
||||
r.h2_2 ?? "",
|
||||
r.canonical,
|
||||
r.robots_meta,
|
||||
r.x_robots_tag ?? "",
|
||||
r.noindex,
|
||||
r.nofollow,
|
||||
r.lang ?? "",
|
||||
r.word_count ?? "",
|
||||
r.flesch_reading_ease ?? "",
|
||||
r.flesch_kincaid_grade ?? "",
|
||||
r.gunning_fog ?? "",
|
||||
r.coleman_liau ?? "",
|
||||
r.ari ?? "",
|
||||
r.smog ?? "",
|
||||
Array.isArray(r.schema_types) ? r.schema_types.join("|") : "",
|
||||
r.inlinks ?? 0,
|
||||
r.outlinks ?? 0,
|
||||
r.render_mode,
|
||||
r.last_modified ?? "",
|
||||
r.set_cookie ? "yes" : "no",
|
||||
r.crawl_timestamp ?? "",
|
||||
r.duplicate_title_exact ?? "",
|
||||
r.nearest_title_similarity ?? "",
|
||||
r.nearest_title_url ?? "",
|
||||
r.duplicate_description_exact ?? "",
|
||||
r.nearest_description_similarity ?? "",
|
||||
r.nearest_description_url ?? ""
|
||||
].map(csvEscape).join(","));
|
||||
}
|
||||
//fs.writeFileSync(`${base}.csv`, lines.join("\n"), "utf8");
|
||||
//console.log(`\n📝 Page reports saved:\n - ${base}.csv\n - ${base}.json`);
|
||||
}
|
||||
function writeLinkEdges(edges) {
|
||||
ensureDir("reports");
|
||||
const stamp = new Date().toISOString().replace(/[:T]/g, "-").slice(0, 19);
|
||||
const file = path.join("reports", `links-${stamp}.csv`);
|
||||
const headers = ["from", "raw_href", "to", "discovered_by"];
|
||||
const lines = [headers.join(",")];
|
||||
for (const e of edges) {
|
||||
lines.push([e.from, e.raw_href, e.to, e.discovered_by].map(csvEscape).join(","));
|
||||
}
|
||||
fs.writeFileSync(file, lines.join("\n"), "utf8");
|
||||
console.log(`🔗 Link provenance saved: ${file}`);
|
||||
}
|
||||
function writeErrors(results) {
|
||||
ensureDir("reports");
|
||||
const stamp = new Date().toISOString().replace(/[:T]/g, "-").slice(0, 19);
|
||||
const file = path.join("reports", `errors-${stamp}.csv`);
|
||||
const headers = ["url", "status", "title", "from_page", "raw_href", "discovered_by"];
|
||||
const lines = [headers.join(",")];
|
||||
|
||||
for (const r of results) {
|
||||
if (r && r.status !== null && r.status >= 400) {
|
||||
const refs = referrers.get(r.url) || [];
|
||||
if (refs.length === 0) {
|
||||
lines.push([r.url, r.status, r.title, "", "", ""].map(csvEscape).join(","));
|
||||
} else {
|
||||
for (const ref of refs) {
|
||||
lines.push([r.url, r.status, r.title, ref.from, ref.raw_href, ref.discovered_by].map(csvEscape).join(","));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
fs.writeFileSync(file, lines.join("\n"), "utf8");
|
||||
console.log(`❗ Error report saved: ${file}`);
|
||||
}
|
||||
function addEdge(from, rawHref, to, discovered_by) {
|
||||
edges.push({ from, raw_href: rawHref || "", to, discovered_by });
|
||||
if (!referrers.has(to)) referrers.set(to, []);
|
||||
referrers.get(to).push({ from, raw_href: rawHref || "", discovered_by });
|
||||
}
|
||||
|
||||
/* ---------------------- parse HTML without JS --------------------------- */
|
||||
function safeJsonParse(txt) {
|
||||
try { return JSON.parse(txt); } catch { return null; }
|
||||
}
|
||||
function parseSchemaTypes($) {
|
||||
const types = new Set();
|
||||
$('script[type="application/ld+json"]').each((_, el) => {
|
||||
const raw = $(el).contents().text();
|
||||
const parsed = safeJsonParse(raw);
|
||||
if (!parsed) return;
|
||||
const collect = (obj) => {
|
||||
if (!obj) return;
|
||||
if (Array.isArray(obj)) { obj.forEach(collect); return; }
|
||||
if (typeof obj === "object") {
|
||||
const t = obj["@type"];
|
||||
if (typeof t === "string") types.add(t);
|
||||
else if (Array.isArray(t)) t.forEach(x => typeof x === "string" && types.add(x));
|
||||
// nested
|
||||
Object.values(obj).forEach(collect);
|
||||
}
|
||||
};
|
||||
collect(parsed);
|
||||
});
|
||||
return [...types];
|
||||
}
|
||||
function parseHtml(html, url) {
|
||||
const $ = cheerio.load(html);
|
||||
|
||||
let title = ($("title").first().text() || "").trim();
|
||||
const ogTitle = $('meta[property="og:title"]').attr("content") || "";
|
||||
const twTitle = $('meta[name="twitter:title"]').attr("content") || "";
|
||||
|
||||
// Headings (capture top two H1s and H2s)
|
||||
const h1s = $("h1").map((_, el) => $(el).text().trim()).get();
|
||||
const h2s = $("h2").map((_, el) => $(el).text().trim()).get();
|
||||
|
||||
const h1_1 = h1s[0] || "";
|
||||
const h1_2 = h1s[1] || "";
|
||||
const h2_1 = h2s[0] || "";
|
||||
const h2_2 = h2s[1] || "";
|
||||
|
||||
const totalHeadings = $("h1,h2,h3,h4,h5,h6,[role='heading']").length;
|
||||
|
||||
if (!title) title = (ogTitle || twTitle || h1_1 || "").trim();
|
||||
|
||||
const metaDesc = ($('meta[name="description"]').attr("content") || "").trim();
|
||||
const canonical = ($('link[rel="canonical"]').attr("href") || "").trim();
|
||||
const robotsMeta = ($('meta[name="robots"]').attr("content") || "").trim();
|
||||
const robotsLower = robotsMeta.toLowerCase();
|
||||
const noindex = /(^|[,;\s])noindex([,;\s]|$)/.test(robotsLower);
|
||||
const nofollow = /(^|[,;\s])nofollow([,;\s]|$)/.test(robotsLower);
|
||||
|
||||
const lang = ($("html").attr("lang") || "").trim();
|
||||
|
||||
// Basic text body for word count / readability
|
||||
const bodyText = ($("main").text() || $("body").text() || "").replace(/\s+/g, " ").trim();
|
||||
const wordCount = bodyText ? bodyText.split(/\s+/).length : 0;
|
||||
|
||||
// Internal links + raw href
|
||||
const internalLinks = new Set();
|
||||
const rawLinks = [];
|
||||
$("a[href]").each((_, el) => {
|
||||
const href = $(el).attr("href");
|
||||
if (!href) return;
|
||||
try {
|
||||
const abs = new URL(href, url).toString();
|
||||
rawLinks.push({ raw: href, abs });
|
||||
internalLinks.add(abs);
|
||||
} catch { }
|
||||
});
|
||||
|
||||
// Schema.org JSON-LD types
|
||||
const schemaTypes = parseSchemaTypes($);
|
||||
|
||||
return {
|
||||
title,
|
||||
metaDesc,
|
||||
h1_1, h1_2, h2_1, h2_2,
|
||||
totalHeadings,
|
||||
canonical, robotsMeta, noindex, nofollow,
|
||||
internalLinks, rawLinks,
|
||||
lang,
|
||||
wordCount,
|
||||
schemaTypes,
|
||||
bodyText
|
||||
};
|
||||
}
|
||||
|
||||
/* ------------------------------ fetchers -------------------------------- */
|
||||
async function fetchWithGot(url) {
|
||||
const t0 = Date.now();
|
||||
const res = await got(url, {
|
||||
timeout: { request: 20000 },
|
||||
throwHttpErrors: false,
|
||||
headers: REAL_HEADERS,
|
||||
http2: false
|
||||
});
|
||||
const dt = Date.now() - t0;
|
||||
const contentType = (res.headers["content-type"] || "").toLowerCase();
|
||||
const bytes = res.headers["content-length"]
|
||||
? Number(res.headers["content-length"])
|
||||
: Buffer.byteLength(res.body || "", "utf8");
|
||||
|
||||
return {
|
||||
status: res.statusCode ?? null,
|
||||
status_text: res.statusMessage ?? "",
|
||||
time_ms: dt,
|
||||
contentType,
|
||||
body: res.body,
|
||||
bytes,
|
||||
render_mode: "http",
|
||||
httpVersion: res.httpVersion ?? "",
|
||||
headers: res.headers
|
||||
};
|
||||
}
|
||||
|
||||
async function createBrowserContext() {
|
||||
const browser = await chromium.launch({ headless: true, args: ["--disable-blink-features=AutomationControlled"] });
|
||||
const context = await browser.newContext({
|
||||
ignoreHTTPSErrors: true, // Ignore SSL certificate errors
|
||||
userAgent: REAL_UA,
|
||||
viewport: { width: 1366, height: 768 },
|
||||
deviceScaleFactor: 1,
|
||||
isMobile: false,
|
||||
locale: "en-US",
|
||||
extraHTTPHeaders: REAL_HEADERS
|
||||
});
|
||||
await context.addInitScript(() => {
|
||||
Object.defineProperty(navigator, "webdriver", { get: () => false });
|
||||
Object.defineProperty(navigator, "plugins", { get: () => [1, 2, 3] });
|
||||
Object.defineProperty(navigator, "languages", { get: () => ["en-US", "en"] });
|
||||
});
|
||||
return { browser: context.browser(), context };
|
||||
}
|
||||
|
||||
async function fetchWithPlaywrightAndExtract(url, shared) {
|
||||
const page = await shared.context.newPage();
|
||||
const t0 = Date.now();
|
||||
let status = null, mainHeaders = {}, statusText = "";
|
||||
|
||||
try {
|
||||
const resp = await page.goto(url, { waitUntil: "domcontentloaded", timeout: 30000 });
|
||||
status = resp?.status() ?? null;
|
||||
statusText = resp?.statusText() ?? "";
|
||||
try { mainHeaders = resp ? await resp.headers() : {}; } catch { }
|
||||
|
||||
try { await page.waitForLoadState("networkidle", { timeout: 12000 }); } catch { }
|
||||
try {
|
||||
await page.waitForFunction(() => {
|
||||
const main = document.querySelector("main") || document.body;
|
||||
const textLen = (main?.innerText || "").replace(/\s+/g, " ").trim().length;
|
||||
const hasHeading = !!document.querySelector("h1, h2, [role='heading'], [class*='title'], [class*='heading'], [class*='hero'], [class*='banner']");
|
||||
return textLen > 160 || hasHeading;
|
||||
}, { timeout: 8000 });
|
||||
} catch { }
|
||||
|
||||
const dom = await page.evaluate(() => {
|
||||
const clean = s => (s || "").replace(/\s+/g, " ").trim();
|
||||
const getTextList = sel => Array.from(document.querySelectorAll(sel))
|
||||
.map(el => clean(el.textContent)).filter(Boolean);
|
||||
|
||||
const title = document.title || "";
|
||||
const ogTitle = document.querySelector('meta[property="og:title"]')?.content || "";
|
||||
const twTitle = document.querySelector('meta[name="twitter:title"]')?.content || "";
|
||||
const metaDesc = document.querySelector('meta[name="description"]')?.content || "";
|
||||
const canonical = document.querySelector('link[rel="canonical"]')?.href || "";
|
||||
const robotsMeta = document.querySelector('meta[name="robots"]')?.content || "";
|
||||
const lang = document.documentElement.getAttribute("lang") || "";
|
||||
|
||||
const h1 = getTextList("h1");
|
||||
const h2 = getTextList("h2");
|
||||
const h3 = getTextList("h3");
|
||||
const totalHeadings = document.querySelectorAll("h1,h2,h3,h4,h5,h6,[role='heading']").length;
|
||||
|
||||
const links = Array.from(document.querySelectorAll("a[href]"))
|
||||
.map(a => {
|
||||
const raw = a.getAttribute("href");
|
||||
try { return { raw, abs: new URL(raw, location.href).toString() }; }
|
||||
catch { return null; }
|
||||
})
|
||||
.filter(Boolean);
|
||||
|
||||
const firstHeading = h1[0] || h2[0] || "";
|
||||
const bodyText = clean((document.querySelector("main") || document.body).innerText || "");
|
||||
|
||||
const schemaScripts = Array.from(document.querySelectorAll('script[type="application/ld+json"]')).map(s => s.textContent || "");
|
||||
|
||||
return {
|
||||
htmlLen: (document.documentElement.outerHTML || "").length,
|
||||
title, ogTitle, twTitle, metaDesc, canonical, robotsMeta, lang,
|
||||
h1, h2, totalHeadings,
|
||||
links,
|
||||
bodyText,
|
||||
schemaScripts
|
||||
};
|
||||
});
|
||||
|
||||
// Parse schema types from strings (outside of page)
|
||||
const schemaTypes = [];
|
||||
for (const raw of dom.schemaScripts || []) {
|
||||
try {
|
||||
const parsed = JSON.parse(raw);
|
||||
const collect = (obj) => {
|
||||
if (!obj) return;
|
||||
if (Array.isArray(obj)) { obj.forEach(collect); return; }
|
||||
if (typeof obj === "object") {
|
||||
const t = obj["@type"];
|
||||
if (typeof t === "string") schemaTypes.push(t);
|
||||
else if (Array.isArray(t)) t.forEach(x => typeof x === "string" && schemaTypes.push(x));
|
||||
Object.values(obj).forEach(collect);
|
||||
}
|
||||
};
|
||||
collect(parsed);
|
||||
} catch { }
|
||||
}
|
||||
|
||||
const dt = Date.now() - t0;
|
||||
const robotsLower = (dom.robotsMeta || "").toLowerCase();
|
||||
const noindex = /(^|[,;\s])noindex([,;\s]|$)/.test(robotsLower);
|
||||
const nofollow = /(^|[,;\s])nofollow([,;\s]|$)/.test(robotsLower);
|
||||
const finalTitle = (dom.title || dom.ogTitle || dom.twTitle || dom.h1?.[0] || "").trim();
|
||||
|
||||
return {
|
||||
status,
|
||||
status_text: statusText,
|
||||
time_ms: dt,
|
||||
contentType: "text/html",
|
||||
bytes: dom.htmlLen || 0,
|
||||
render_mode: "rendered",
|
||||
headers: mainHeaders,
|
||||
domExtract: {
|
||||
title: finalTitle,
|
||||
metaDesc: dom.metaDesc || "",
|
||||
canonical: dom.canonical || "",
|
||||
robotsMeta: dom.robotsMeta || "",
|
||||
lang: dom.lang || "",
|
||||
noindex, nofollow,
|
||||
h1_1: dom.h1?.[0] || "",
|
||||
h1_2: dom.h1?.[1] || "",
|
||||
h2_1: dom.h2?.[0] || "",
|
||||
h2_2: dom.h2?.[1] || "",
|
||||
totalHeadings: dom.totalHeadings || 0,
|
||||
links: new Set((dom.links || []).map(l => l.abs)),
|
||||
rawLinks: dom.links || [],
|
||||
bodyText: dom.bodyText || "",
|
||||
schemaTypes: Array.from(new Set(schemaTypes))
|
||||
}
|
||||
};
|
||||
} finally {
|
||||
await page.close();
|
||||
}
|
||||
}
|
||||
|
||||
/* ------------------------- render decision ------------------------------ */
|
||||
function shouldRender(currentUrl, httpRes, parsed, homeTitle) {
|
||||
const { pathname } = new URL(currentUrl);
|
||||
if ((httpRes.bytes ?? 0) < 4000) return true; // tiny HTML shell
|
||||
if (parsed.totalHeadings === 0) return true;
|
||||
if (homeTitle && parsed.title && parsed.title === homeTitle && pathname !== "/") return true;
|
||||
return false;
|
||||
}
|
||||
function withWWW(urlStr) {
|
||||
try { const u = new URL(urlStr); if (!u.hostname.startsWith("www.")) u.hostname = "www." + u.hostname; return u.toString(); }
|
||||
catch { return urlStr; }
|
||||
}
|
||||
|
||||
/* ------------------------ per-page enrichers ---------------------------- */
|
||||
function measurePixelWidth(text, size = 16, font = "arial") {
|
||||
if (!text) return 0;
|
||||
try { return pixelWidth(text, { font, size }); } catch { return Math.round(text.length * size * 0.5); }
|
||||
}
|
||||
function computeReadability(text) {
|
||||
if (!text) return {};
|
||||
const safe = text.slice(0, 200000); // cap
|
||||
const out = {};
|
||||
try { out.flesch_reading_ease = readability.fleschReadingEase(safe); } catch { }
|
||||
try { out.flesch_kincaid_grade = readability.fleschKincaidGrade(safe); } catch { }
|
||||
try { out.gunning_fog = readability.gunningFog(safe); } catch { }
|
||||
try { out.coleman_liau = readability.colemanLiauIndex(safe); } catch { }
|
||||
try { out.ari = readability.automatedReadabilityIndex(safe); } catch { }
|
||||
try { out.smog = readability.smogIndex(safe); } catch { }
|
||||
return out;
|
||||
}
|
||||
|
||||
/* -------------------------------- main ---------------------------------- */
|
||||
// async function crawl(startUrl, maxPages = 50) {
|
||||
|
||||
export async function crawl(startUrl, maxPages = 50) {
|
||||
const start = normalizeUrl(startUrl, { stripHash: true });
|
||||
queue.push(start);
|
||||
|
||||
// Seed from sitemap.xml + record provenance
|
||||
try {
|
||||
const sitemapUrls = await getSitemapUrls(start);
|
||||
for (const u of sitemapUrls) {
|
||||
queue.push(u);
|
||||
addEdge("sitemap.xml", u, u, "sitemap");
|
||||
}
|
||||
console.log(`📌 Seeded ${sitemapUrls.length} URL(s) from sitemap.xml`);
|
||||
} catch (e) {
|
||||
console.log("⚠️ Sitemap step skipped:", e.message);
|
||||
}
|
||||
|
||||
let shared = null;
|
||||
async function getShared() { if (!shared) shared = await createBrowserContext(); return shared; }
|
||||
|
||||
let homeTitle = null;
|
||||
|
||||
while (queue.length > 0 && visited.size < maxPages) {
|
||||
const url = queue.shift();
|
||||
if (!url) continue;
|
||||
|
||||
const normUrl = normalizeUrl(url, { stripHash: true });
|
||||
if (visited.has(normUrl)) continue;
|
||||
visited.add(normUrl);
|
||||
|
||||
let attemptUrls = [normUrl];
|
||||
let usedWWWRetry = false;
|
||||
|
||||
for (let attempt = 0; attempt < attemptUrls.length; attempt++) {
|
||||
const currentUrl = attemptUrls[attempt];
|
||||
try {
|
||||
// 1) HTTP fetch
|
||||
let pageRes = await fetchWithGot(currentUrl);
|
||||
|
||||
let parsed = {
|
||||
title: "", metaDesc: "", h1_1: "", h1_2: "", h2_1: "", h2_2: "",
|
||||
totalHeadings: 0, canonical: "", robotsMeta: "", noindex: false, nofollow: false,
|
||||
internalLinks: new Set(), rawLinks: [],
|
||||
lang: "", wordCount: 0, bodyText: "", schemaTypes: []
|
||||
};
|
||||
if (pageRes.contentType.includes("text/html")) {
|
||||
const p = parseHtml(pageRes.body || "", currentUrl);
|
||||
parsed = { ...parsed, ...p };
|
||||
}
|
||||
|
||||
if (!homeTitle && new URL(currentUrl).pathname === "/") {
|
||||
homeTitle = parsed.title || "";
|
||||
}
|
||||
|
||||
// 2) Render if needed
|
||||
if (pageRes.contentType.includes("text/html") && shouldRender(currentUrl, pageRes, parsed, homeTitle)) {
|
||||
const s = await getShared();
|
||||
const rendered = await fetchWithPlaywrightAndExtract(currentUrl, s);
|
||||
if (rendered.domExtract) {
|
||||
pageRes = { ...rendered, body: null };
|
||||
parsed = {
|
||||
...parsed,
|
||||
title: rendered.domExtract.title,
|
||||
metaDesc: rendered.domExtract.metaDesc,
|
||||
h1_1: rendered.domExtract.h1_1,
|
||||
h1_2: rendered.domExtract.h1_2,
|
||||
h2_1: rendered.domExtract.h2_1,
|
||||
h2_2: rendered.domExtract.h2_2,
|
||||
totalHeadings: rendered.domExtract.totalHeadings,
|
||||
canonical: rendered.domExtract.canonical,
|
||||
robotsMeta: rendered.domExtract.robotsMeta,
|
||||
noindex: rendered.domExtract.noindex,
|
||||
nofollow: rendered.domExtract.nofollow,
|
||||
internalLinks: rendered.domExtract.links,
|
||||
rawLinks: rendered.domExtract.rawLinks,
|
||||
lang: rendered.domExtract.lang || parsed.lang,
|
||||
bodyText: rendered.domExtract.bodyText || parsed.bodyText,
|
||||
wordCount: (rendered.domExtract.bodyText || "").split(/\s+/).filter(Boolean).length,
|
||||
schemaTypes: rendered.domExtract.schemaTypes
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// If still looks empty, try www once
|
||||
if (!usedWWWRetry && parsed.totalHeadings === 0 && !parsed.h1_1) {
|
||||
attemptUrls.push(withWWW(currentUrl));
|
||||
usedWWWRetry = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Enqueue internal links + record provenance
|
||||
for (const link of parsed.internalLinks) {
|
||||
if (isInternal(start, link)) {
|
||||
const ln = normalizeUrl(link, { stripHash: true });
|
||||
const rawMatch = (parsed.rawLinks || []).find(r => r.abs === link)?.raw ?? "";
|
||||
addEdge(currentUrl, rawMatch, ln, pageRes.render_mode);
|
||||
if (!visited.has(ln)) queue.push(ln);
|
||||
}
|
||||
}
|
||||
|
||||
// ---- Per-page metrics & enrichers ----
|
||||
const title = parsed.title || "";
|
||||
const metaDesc = parsed.metaDesc || "";
|
||||
const h1_1 = parsed.h1_1 || "";
|
||||
const h1_2 = parsed.h1_2 || "";
|
||||
const lang = parsed.lang || "";
|
||||
const bodyText = parsed.bodyText || "";
|
||||
const wordCount = parsed.wordCount || (bodyText ? bodyText.split(/\s+/).filter(Boolean).length : 0);
|
||||
|
||||
const titlePx = measurePixelWidth(title, 16, "arial");
|
||||
const descPx = measurePixelWidth(metaDesc, 14, "arial");
|
||||
const h1_1_px = measurePixelWidth(h1_1, 24, "arial");
|
||||
const h1_2_px = measurePixelWidth(h1_2, 24, "arial");
|
||||
|
||||
const read = computeReadability(bodyText);
|
||||
|
||||
const headers = pageRes.headers || {};
|
||||
const xRobots = (headers["x-robots-tag"] || headers["x-robots-tag".toLowerCase()]) ?? "";
|
||||
const lastModified = headers["last-modified"] ?? headers["Last-Modified"] ?? "";
|
||||
const setCookie = !!headers["set-cookie"];
|
||||
|
||||
const outlinks = parsed.internalLinks.size;
|
||||
const inlinks = (referrers.get(currentUrl) || []).length;
|
||||
|
||||
// Save page row
|
||||
results.push({
|
||||
url: currentUrl,
|
||||
status: pageRes.status,
|
||||
status_text: pageRes.status_text ?? "",
|
||||
time_ms: pageRes.time_ms,
|
||||
bytes: pageRes.bytes,
|
||||
content_type: pageRes.contentType,
|
||||
http_version: pageRes.httpVersion ?? "",
|
||||
title,
|
||||
title_length: title.length,
|
||||
title_pixel_width: titlePx,
|
||||
meta_description: metaDesc,
|
||||
meta_description_length: metaDesc.length,
|
||||
meta_description_pixel_width: descPx,
|
||||
h1_1,
|
||||
h1_1_length: h1_1.length,
|
||||
h1_1_pixel_width: h1_1_px,
|
||||
h1_2,
|
||||
h1_2_length: h1_2.length,
|
||||
h1_2_pixel_width: h1_2_px,
|
||||
h2_1: parsed.h2_1 || "",
|
||||
h2_2: parsed.h2_2 || "",
|
||||
canonical: parsed.canonical,
|
||||
robots_meta: parsed.robotsMeta,
|
||||
x_robots_tag: Array.isArray(xRobots) ? xRobots.join("; ") : xRobots,
|
||||
noindex: parsed.noindex,
|
||||
nofollow: parsed.nofollow,
|
||||
lang,
|
||||
word_count: wordCount,
|
||||
flesch_reading_ease: read.flesch_reading_ease ?? "",
|
||||
flesch_kincaid_grade: read.flesch_kincaid_grade ?? "",
|
||||
gunning_fog: read.gunning_fog ?? "",
|
||||
coleman_liau: read.coleman_liau ?? "",
|
||||
ari: read.ari ?? "",
|
||||
smog: read.smog ?? "",
|
||||
schema_types: parsed.schemaTypes || [],
|
||||
inlinks,
|
||||
outlinks,
|
||||
render_mode: pageRes.render_mode,
|
||||
last_modified: lastModified,
|
||||
set_cookie: setCookie,
|
||||
crawl_timestamp: new Date().toISOString()
|
||||
});
|
||||
|
||||
console.log(
|
||||
`[${pageRes.status ?? "ERR"}] ${pageRes.time_ms}ms ${String(pageRes.render_mode).padEnd(8)} H:${parsed.totalHeadings} ${currentUrl} ${title || h1_1}`
|
||||
);
|
||||
break; // success for this URL; stop attempts
|
||||
} catch (err) {
|
||||
console.error(`[ERROR] ${currentUrl} -> ${err.message}`);
|
||||
results.push({
|
||||
url: currentUrl,
|
||||
status: null, status_text: "", time_ms: null, bytes: null, content_type: "",
|
||||
http_version: "", title: "", title_length: 0, title_pixel_width: "",
|
||||
meta_description: "", meta_description_length: 0, meta_description_pixel_width: "",
|
||||
h1_1: "", h1_1_length: 0, h1_1_pixel_width: "", h1_2: "", h1_2_length: 0, h1_2_pixel_width: "",
|
||||
h2_1: "", h2_2: "",
|
||||
canonical: "", robots_meta: "", x_robots_tag: "", noindex: false, nofollow: false,
|
||||
lang: "", word_count: "", flesch_reading_ease: "", flesch_kincaid_grade: "",
|
||||
gunning_fog: "", coleman_liau: "", ari: "", smog: "",
|
||||
schema_types: [], inlinks: 0, outlinks: 0, render_mode: "error",
|
||||
last_modified: "", set_cookie: "", crawl_timestamp: new Date().toISOString()
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (shared) await shared.browser.close();
|
||||
|
||||
// -------------------- Post-process: duplicates & similarity -------------
|
||||
// Titles
|
||||
const titleMap = new Map();
|
||||
for (const r of results) {
|
||||
const key = (r.title || "").trim();
|
||||
if (!titleMap.has(key)) titleMap.set(key, []);
|
||||
titleMap.get(key).push(r);
|
||||
}
|
||||
for (const [t, arr] of titleMap.entries()) {
|
||||
if (!t) continue;
|
||||
const isDup = arr.length > 1;
|
||||
for (const row of arr) row.duplicate_title_exact = isDup ? "yes" : "no";
|
||||
}
|
||||
|
||||
// Meta descriptions
|
||||
const descMap = new Map();
|
||||
for (const r of results) {
|
||||
const key = (r.meta_description || "").trim();
|
||||
if (!descMap.has(key)) descMap.set(key, []);
|
||||
descMap.get(key).push(r);
|
||||
}
|
||||
for (const [d, arr] of descMap.entries()) {
|
||||
if (!d) continue;
|
||||
const isDup = arr.length > 1;
|
||||
for (const row of arr) row.duplicate_description_exact = isDup ? "yes" : "no";
|
||||
}
|
||||
|
||||
// Nearest neighbor similarities (within site, lightweight)
|
||||
const titleList = results.map(r => ({ url: r.url, text: (r.title || "").trim() }));
|
||||
const descList = results.map(r => ({ url: r.url, text: (r.meta_description || "").trim() }));
|
||||
for (const r of results) {
|
||||
// titles
|
||||
const others = titleList.filter(x => x.url !== r.url && x.text);
|
||||
let bestT = { rating: 0, target: "" };
|
||||
if (r.title && others.length) {
|
||||
const ratings = stringSimilarity.findBestMatch(r.title, others.map(x => x.text));
|
||||
const best = ratings.bestMatch;
|
||||
bestT.rating = best.rating;
|
||||
const idx = ratings.ratings.findIndex(x => x.rating === best.rating);
|
||||
bestT.target = others[idx]?.url || "";
|
||||
}
|
||||
r.nearest_title_similarity = bestT.rating ? bestT.rating.toFixed(3) : "";
|
||||
r.nearest_title_url = bestT.target;
|
||||
|
||||
// descriptions
|
||||
const othersD = descList.filter(x => x.url !== r.url && x.text);
|
||||
let bestD = { rating: 0, target: "" };
|
||||
if (r.meta_description && othersD.length) {
|
||||
const ratingsD = stringSimilarity.findBestMatch(r.meta_description, othersD.map(x => x.text));
|
||||
const best = ratingsD.bestMatch;
|
||||
bestD.rating = best.rating;
|
||||
const idx = ratingsD.ratings.findIndex(x => x.rating === best.rating);
|
||||
bestD.target = othersD[idx]?.url || "";
|
||||
}
|
||||
r.nearest_description_similarity = bestD.rating ? bestD.rating.toFixed(3) : "";
|
||||
r.nearest_description_url = bestD.target;
|
||||
}
|
||||
|
||||
console.log(`\n✅ Crawl finished. Total pages: ${visited.size}`);
|
||||
writePageReports(results);
|
||||
writeLinkEdges(edges);
|
||||
writeErrors(results);
|
||||
}
|
||||
|
||||
// // CLI: node crawler.js https://site.com 200
|
||||
// const START_URL = process.argv[2] || "https://example.com";
|
||||
// const MAX_PAGES = Number(process.argv[3] || 100);
|
||||
// crawl(START_URL, MAX_PAGES);
|
||||
921
crawler.js
Normal file
921
crawler.js
Normal file
@ -0,0 +1,921 @@
|
||||
// crawler.js
|
||||
import got from "got";
|
||||
import * as cheerio from "cheerio";
|
||||
import normalizeUrl from "normalize-url";
|
||||
import { isInternal } from "./utils/urlHelpers.js";
|
||||
import { getSitemapUrls } from "./utils/sitemap.js";
|
||||
import fs from "node:fs";
|
||||
import path from "node:path";
|
||||
import { chromium } from "playwright";
|
||||
|
||||
// NEW libs
|
||||
import pixelWidth from "string-pixel-width";
|
||||
import * as readability from "text-readability";
|
||||
import stringSimilarity from "string-similarity";
|
||||
|
||||
/* ------------------------------ globals --------------------------------- */
|
||||
// NOTE: We'll reset these at the start of crawl() so repeated runs don't share state.
|
||||
const visited = new Set();
|
||||
const queue = [];
|
||||
const results = [];
|
||||
|
||||
// Link provenance: every discovered edge (source -> target)
|
||||
const edges = []; // { from, raw_href, to, discovered_by }
|
||||
|
||||
// Quick referrer map for error report
|
||||
const referrers = new Map(); // url -> Array<{from, raw_href, discovered_by}>
|
||||
|
||||
const REAL_UA =
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36";
|
||||
const REAL_HEADERS = {
|
||||
"user-agent": REAL_UA,
|
||||
accept:
|
||||
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
||||
"accept-language": "en-US,en;q=0.9",
|
||||
"upgrade-insecure-requests": "1",
|
||||
};
|
||||
|
||||
/* ------------------------------ utils ----------------------------------- */
|
||||
function csvEscape(v) {
|
||||
if (v === undefined || v === null) return "";
|
||||
const s = String(v);
|
||||
return /[",\n]/.test(s) ? `"${s.replace(/"/g, '""')}"` : s;
|
||||
}
|
||||
function ensureDir(dir) {
|
||||
if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
|
||||
}
|
||||
function writePageReports(rows) {
|
||||
ensureDir("reports");
|
||||
const stamp = new Date().toISOString().replace(/[:T]/g, "-").slice(0, 19);
|
||||
const base = path.join("reports", `crawl-${stamp}`);
|
||||
|
||||
fs.writeFileSync(`${base}.json`, JSON.stringify(rows, null, 2), "utf8");
|
||||
console.log(`\n📝 Full JSON report saved: ${base}.json`);
|
||||
|
||||
// Columns (a Screaming-Frog-ish shape with our extras)
|
||||
const headers = [
|
||||
"url",
|
||||
"status",
|
||||
"status_text",
|
||||
"time_ms",
|
||||
"bytes",
|
||||
"content_type",
|
||||
"http_version",
|
||||
"title",
|
||||
"title_length",
|
||||
"title_pixel_width",
|
||||
"meta_description",
|
||||
"meta_description_length",
|
||||
"meta_description_pixel_width",
|
||||
"h1_1",
|
||||
"h1_1_length",
|
||||
"h1_1_pixel_width",
|
||||
"h1_2",
|
||||
"h1_2_length",
|
||||
"h1_2_pixel_width",
|
||||
"h2_1",
|
||||
"h2_2",
|
||||
"canonical",
|
||||
"robots_meta",
|
||||
"x_robots_tag",
|
||||
"noindex",
|
||||
"nofollow",
|
||||
"lang",
|
||||
"word_count",
|
||||
"flesch_reading_ease",
|
||||
"flesch_kincaid_grade",
|
||||
"gunning_fog",
|
||||
"coleman_liau",
|
||||
"ari",
|
||||
"smog",
|
||||
"schema_types",
|
||||
"inlinks",
|
||||
"outlinks",
|
||||
"render_mode",
|
||||
"last_modified",
|
||||
"set_cookie",
|
||||
"crawl_timestamp",
|
||||
"duplicate_title_exact",
|
||||
"nearest_title_similarity",
|
||||
"nearest_title_url",
|
||||
"duplicate_description_exact",
|
||||
"nearest_description_similarity",
|
||||
"nearest_description_url",
|
||||
];
|
||||
const lines = [headers.join(",")];
|
||||
for (const r of rows) {
|
||||
lines.push(
|
||||
[
|
||||
r.url,
|
||||
r.status,
|
||||
r.status_text ?? "",
|
||||
r.time_ms,
|
||||
r.bytes,
|
||||
r.content_type,
|
||||
r.http_version ?? "",
|
||||
r.title,
|
||||
r.title_length,
|
||||
r.title_pixel_width,
|
||||
r.meta_description,
|
||||
r.meta_description_length,
|
||||
r.meta_description_pixel_width,
|
||||
r.h1_1 ?? "",
|
||||
r.h1_1_length ?? 0,
|
||||
r.h1_1_pixel_width ?? "",
|
||||
r.h1_2 ?? "",
|
||||
r.h1_2_length ?? 0,
|
||||
r.h1_2_pixel_width ?? "",
|
||||
r.h2_1 ?? "",
|
||||
r.h2_2 ?? "",
|
||||
r.canonical,
|
||||
r.robots_meta,
|
||||
r.x_robots_tag ?? "",
|
||||
r.noindex,
|
||||
r.nofollow,
|
||||
r.lang ?? "",
|
||||
r.word_count ?? "",
|
||||
r.flesch_reading_ease ?? "",
|
||||
r.flesch_kincaid_grade ?? "",
|
||||
r.gunning_fog ?? "",
|
||||
r.coleman_liau ?? "",
|
||||
r.ari ?? "",
|
||||
r.smog ?? "",
|
||||
Array.isArray(r.schema_types) ? r.schema_types.join("|") : "",
|
||||
r.inlinks ?? 0,
|
||||
r.outlinks ?? 0,
|
||||
r.render_mode,
|
||||
r.last_modified ?? "",
|
||||
r.set_cookie ? "yes" : "no",
|
||||
r.crawl_timestamp ?? "",
|
||||
r.duplicate_title_exact ?? "",
|
||||
r.nearest_title_similarity ?? "",
|
||||
r.nearest_title_url ?? "",
|
||||
r.duplicate_description_exact ?? "",
|
||||
r.nearest_description_similarity ?? "",
|
||||
r.nearest_description_url ?? "",
|
||||
]
|
||||
.map(csvEscape)
|
||||
.join(",")
|
||||
);
|
||||
}
|
||||
// If you also want CSV persisted, uncomment:
|
||||
// fs.writeFileSync(`${base}.csv`, lines.join("\n"), "utf8");
|
||||
// console.log(`📝 CSV report saved: ${base}.csv`);
|
||||
|
||||
return { json: path.resolve(`${base}.json`) /*, csv: path.resolve(`${base}.csv`)*/ };
|
||||
}
|
||||
function writeLinkEdges(edges) {
|
||||
ensureDir("reports");
|
||||
const stamp = new Date().toISOString().replace(/[:T]/g, "-").slice(0, 19);
|
||||
const file = path.join("reports", `links-${stamp}.csv`);
|
||||
const headers = ["from", "raw_href", "to", "discovered_by"];
|
||||
const lines = [headers.join(",")];
|
||||
for (const e of edges) {
|
||||
lines.push([e.from, e.raw_href, e.to, e.discovered_by].map(csvEscape).join(","));
|
||||
}
|
||||
fs.writeFileSync(file, lines.join("\n"), "utf8");
|
||||
console.log(`🔗 Link provenance saved: ${file}`);
|
||||
return { linksCsv: path.resolve(file) };
|
||||
}
|
||||
function writeErrors(rows) {
|
||||
ensureDir("reports");
|
||||
const stamp = new Date().toISOString().replace(/[:T]/g, "-").slice(0, 19);
|
||||
const file = path.join("reports", `errors-${stamp}.csv`);
|
||||
const headers = ["url", "status", "title", "from_page", "raw_href", "discovered_by"];
|
||||
const lines = [headers.join(",")];
|
||||
|
||||
for (const r of rows) {
|
||||
if (r && r.status !== null && r.status >= 400) {
|
||||
const refs = referrers.get(r.url) || [];
|
||||
if (refs.length === 0) {
|
||||
lines.push([r.url, r.status, r.title, "", "", ""].map(csvEscape).join(","));
|
||||
} else {
|
||||
for (const ref of refs) {
|
||||
lines.push([r.url, r.status, r.title, ref.from, ref.raw_href, ref.discovered_by].map(csvEscape).join(","));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
fs.writeFileSync(file, lines.join("\n"), "utf8");
|
||||
console.log(`❗ Error report saved: ${file}`);
|
||||
return { errorsCsv: path.resolve(file) };
|
||||
}
|
||||
function addEdge(from, rawHref, to, discovered_by) {
|
||||
edges.push({ from, raw_href: rawHref || "", to, discovered_by });
|
||||
if (!referrers.has(to)) referrers.set(to, []);
|
||||
referrers.get(to).push({ from, raw_href: rawHref || "", discovered_by });
|
||||
}
|
||||
|
||||
/* ---------------------- parse HTML without JS --------------------------- */
|
||||
function safeJsonParse(txt) {
|
||||
try {
|
||||
return JSON.parse(txt);
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
function parseSchemaTypes($) {
|
||||
const types = new Set();
|
||||
$('script[type="application/ld+json"]').each((_, el) => {
|
||||
const raw = $(el).contents().text();
|
||||
const parsed = safeJsonParse(raw);
|
||||
if (!parsed) return;
|
||||
const collect = (obj) => {
|
||||
if (!obj) return;
|
||||
if (Array.isArray(obj)) {
|
||||
obj.forEach(collect);
|
||||
return;
|
||||
}
|
||||
if (typeof obj === "object") {
|
||||
const t = obj["@type"];
|
||||
if (typeof t === "string") types.add(t);
|
||||
else if (Array.isArray(t)) t.forEach((x) => typeof x === "string" && types.add(x));
|
||||
// nested
|
||||
Object.values(obj).forEach(collect);
|
||||
}
|
||||
};
|
||||
collect(parsed);
|
||||
});
|
||||
return [...types];
|
||||
}
|
||||
function parseHtml(html, url) {
|
||||
const $ = cheerio.load(html);
|
||||
|
||||
let title = ($("title").first().text() || "").trim();
|
||||
const ogTitle = $('meta[property="og:title"]').attr("content") || "";
|
||||
const twTitle = $('meta[name="twitter:title"]').attr("content") || "";
|
||||
|
||||
// Headings (capture top two H1s and H2s)
|
||||
const h1s = $("h1")
|
||||
.map((_, el) => $(el).text().trim())
|
||||
.get();
|
||||
const h2s = $("h2")
|
||||
.map((_, el) => $(el).text().trim())
|
||||
.get();
|
||||
|
||||
const h1_1 = h1s[0] || "";
|
||||
const h1_2 = h1s[1] || "";
|
||||
const h2_1 = h2s[0] || "";
|
||||
const h2_2 = h2s[1] || "";
|
||||
|
||||
const totalHeadings = $("h1,h2,h3,h4,h5,h6,[role='heading']").length;
|
||||
|
||||
if (!title) title = (ogTitle || twTitle || h1_1 || "").trim();
|
||||
|
||||
const metaDesc = ($('meta[name="description"]').attr("content") || "").trim();
|
||||
const canonical = ($('link[rel="canonical"]').attr("href") || "").trim();
|
||||
const robotsMeta = ($('meta[name="robots"]').attr("content") || "").trim();
|
||||
const robotsLower = robotsMeta.toLowerCase();
|
||||
const noindex = /(^|[,;\s])noindex([,;\s]|$)/.test(robotsLower);
|
||||
const nofollow = /(^|[,;\s])nofollow([,;\s]|$)/.test(robotsLower);
|
||||
|
||||
const lang = ($("html").attr("lang") || "").trim();
|
||||
|
||||
// Basic text body for word count / readability
|
||||
const bodyText = ($("main").text() || $("body").text() || "").replace(/\s+/g, " ").trim();
|
||||
const wordCount = bodyText ? bodyText.split(/\s+/).length : 0;
|
||||
|
||||
// Internal links + raw href
|
||||
const internalLinks = new Set();
|
||||
const rawLinks = [];
|
||||
$("a[href]").each((_, el) => {
|
||||
const href = $(el).attr("href");
|
||||
if (!href) return;
|
||||
try {
|
||||
const abs = new URL(href, url).toString();
|
||||
rawLinks.push({ raw: href, abs });
|
||||
internalLinks.add(abs);
|
||||
} catch {}
|
||||
});
|
||||
|
||||
// Schema.org JSON-LD types
|
||||
const schemaTypes = parseSchemaTypes($);
|
||||
|
||||
return {
|
||||
title,
|
||||
metaDesc,
|
||||
h1_1,
|
||||
h1_2,
|
||||
h2_1,
|
||||
h2_2,
|
||||
totalHeadings,
|
||||
canonical,
|
||||
robotsMeta,
|
||||
noindex,
|
||||
nofollow,
|
||||
internalLinks,
|
||||
rawLinks,
|
||||
lang,
|
||||
wordCount,
|
||||
schemaTypes,
|
||||
bodyText,
|
||||
};
|
||||
}
|
||||
|
||||
/* ------------------------------ fetchers -------------------------------- */
|
||||
async function fetchWithGot(url) {
|
||||
const t0 = Date.now();
|
||||
const res = await got(url, {
|
||||
timeout: { request: 20000 },
|
||||
throwHttpErrors: false,
|
||||
headers: REAL_HEADERS,
|
||||
http2: false,
|
||||
});
|
||||
const dt = Date.now() - t0;
|
||||
const contentType = (res.headers["content-type"] || "").toLowerCase();
|
||||
const bytes = res.headers["content-length"]
|
||||
? Number(res.headers["content-length"])
|
||||
: Buffer.byteLength(res.body || "", "utf8");
|
||||
|
||||
return {
|
||||
status: res.statusCode ?? null,
|
||||
status_text: res.statusMessage ?? "",
|
||||
time_ms: dt,
|
||||
contentType,
|
||||
body: res.body,
|
||||
bytes,
|
||||
render_mode: "http",
|
||||
httpVersion: res.httpVersion ?? "",
|
||||
headers: res.headers,
|
||||
};
|
||||
}
|
||||
|
||||
async function createBrowserContext() {
|
||||
const browser = await chromium.launch({
|
||||
headless: true,
|
||||
args: ["--disable-blink-features=AutomationControlled"],
|
||||
});
|
||||
const context = await browser.newContext({
|
||||
ignoreHTTPSErrors: true, // Ignore SSL certificate errors
|
||||
userAgent: REAL_UA,
|
||||
viewport: { width: 1366, height: 768 },
|
||||
deviceScaleFactor: 1,
|
||||
isMobile: false,
|
||||
locale: "en-US",
|
||||
extraHTTPHeaders: REAL_HEADERS,
|
||||
});
|
||||
await context.addInitScript(() => {
|
||||
Object.defineProperty(navigator, "webdriver", { get: () => false });
|
||||
Object.defineProperty(navigator, "plugins", { get: () => [1, 2, 3] });
|
||||
Object.defineProperty(navigator, "languages", { get: () => ["en-US", "en"] });
|
||||
});
|
||||
return { browser: context.browser(), context };
|
||||
}
|
||||
|
||||
async function fetchWithPlaywrightAndExtract(url, shared) {
|
||||
const page = await shared.context.newPage();
|
||||
const t0 = Date.now();
|
||||
let status = null,
|
||||
mainHeaders = {},
|
||||
statusText = "";
|
||||
|
||||
try {
|
||||
const resp = await page.goto(url, { waitUntil: "domcontentloaded", timeout: 30000 });
|
||||
status = resp?.status() ?? null;
|
||||
statusText = resp?.statusText() ?? "";
|
||||
try {
|
||||
mainHeaders = resp ? await resp.headers() : {};
|
||||
} catch {}
|
||||
|
||||
try {
|
||||
await page.waitForLoadState("networkidle", { timeout: 12000 });
|
||||
} catch {}
|
||||
try {
|
||||
await page.waitForFunction(
|
||||
() => {
|
||||
const main = document.querySelector("main") || document.body;
|
||||
const textLen = (main?.innerText || "").replace(/\s+/g, " ").trim().length;
|
||||
const hasHeading = !!document.querySelector(
|
||||
"h1, h2, [role='heading'], [class*='title'], [class*='heading'], [class*='hero'], [class*='banner']"
|
||||
);
|
||||
return textLen > 160 || hasHeading;
|
||||
},
|
||||
{ timeout: 8000 }
|
||||
);
|
||||
} catch {}
|
||||
|
||||
const dom = await page.evaluate(() => {
|
||||
const clean = (s) => (s || "").replace(/\s+/g, " ").trim();
|
||||
const getTextList = (sel) =>
|
||||
Array.from(document.querySelectorAll(sel))
|
||||
.map((el) => clean(el.textContent))
|
||||
.filter(Boolean);
|
||||
|
||||
const title = document.title || "";
|
||||
const ogTitle = document.querySelector('meta[property="og:title"]')?.content || "";
|
||||
const twTitle = document.querySelector('meta[name="twitter:title"]')?.content || "";
|
||||
const metaDesc = document.querySelector('meta[name="description"]')?.content || "";
|
||||
const canonical = document.querySelector('link[rel="canonical"]')?.href || "";
|
||||
const robotsMeta = document.querySelector('meta[name="robots"]')?.content || "";
|
||||
const lang = document.documentElement.getAttribute("lang") || "";
|
||||
|
||||
const h1 = getTextList("h1");
|
||||
const h2 = getTextList("h2");
|
||||
const totalHeadings = document.querySelectorAll("h1,h2,h3,h4,h5,h6,[role='heading']").length;
|
||||
|
||||
const links = Array.from(document.querySelectorAll("a[href]"))
|
||||
.map((a) => {
|
||||
const raw = a.getAttribute("href");
|
||||
try {
|
||||
return { raw, abs: new URL(raw, location.href).toString() };
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
})
|
||||
.filter(Boolean);
|
||||
|
||||
const bodyText = clean((document.querySelector("main") || document.body).innerText || "");
|
||||
|
||||
const schemaScripts = Array.from(
|
||||
document.querySelectorAll('script[type="application/ld+json"]')
|
||||
).map((s) => s.textContent || "");
|
||||
|
||||
return {
|
||||
htmlLen: (document.documentElement.outerHTML || "").length,
|
||||
title,
|
||||
ogTitle,
|
||||
twTitle,
|
||||
metaDesc,
|
||||
canonical,
|
||||
robotsMeta,
|
||||
lang,
|
||||
h1,
|
||||
h2,
|
||||
totalHeadings,
|
||||
links,
|
||||
bodyText,
|
||||
schemaScripts,
|
||||
};
|
||||
});
|
||||
|
||||
// Parse schema types from strings (outside of page)
|
||||
const schemaTypes = [];
|
||||
for (const raw of dom.schemaScripts || []) {
|
||||
try {
|
||||
const parsed = JSON.parse(raw);
|
||||
const collect = (obj) => {
|
||||
if (!obj) return;
|
||||
if (Array.isArray(obj)) {
|
||||
obj.forEach(collect);
|
||||
return;
|
||||
}
|
||||
if (typeof obj === "object") {
|
||||
const t = obj["@type"];
|
||||
if (typeof t === "string") schemaTypes.push(t);
|
||||
else if (Array.isArray(t)) t.forEach((x) => typeof x === "string" && schemaTypes.push(x));
|
||||
Object.values(obj).forEach(collect);
|
||||
}
|
||||
};
|
||||
collect(parsed);
|
||||
} catch {}
|
||||
}
|
||||
|
||||
const dt = Date.now() - t0;
|
||||
const robotsLower = (dom.robotsMeta || "").toLowerCase();
|
||||
const noindex = /(^|[,;\s])noindex([,;\s]|$)/.test(robotsLower);
|
||||
const nofollow = /(^|[,;\s])nofollow([,;\s]|$)/.test(robotsLower);
|
||||
const finalTitle = (dom.title || dom.ogTitle || dom.twTitle || dom.h1?.[0] || "").trim();
|
||||
|
||||
return {
|
||||
status,
|
||||
status_text: statusText,
|
||||
time_ms: dt,
|
||||
contentType: "text/html",
|
||||
bytes: dom.htmlLen || 0,
|
||||
render_mode: "rendered",
|
||||
headers: mainHeaders,
|
||||
domExtract: {
|
||||
title: finalTitle,
|
||||
metaDesc: dom.metaDesc || "",
|
||||
canonical: dom.canonical || "",
|
||||
robotsMeta: dom.robotsMeta || "",
|
||||
lang: dom.lang || "",
|
||||
noindex,
|
||||
nofollow,
|
||||
h1_1: dom.h1?.[0] || "",
|
||||
h1_2: dom.h1?.[1] || "",
|
||||
h2_1: dom.h2?.[0] || "",
|
||||
h2_2: dom.h2?.[1] || "",
|
||||
totalHeadings: dom.totalHeadings || 0,
|
||||
links: new Set((dom.links || []).map((l) => l.abs)),
|
||||
rawLinks: dom.links || [],
|
||||
bodyText: dom.bodyText || "",
|
||||
schemaTypes: Array.from(new Set(schemaTypes)),
|
||||
},
|
||||
};
|
||||
} finally {
|
||||
await page.close();
|
||||
}
|
||||
}
|
||||
|
||||
/* ------------------------- render decision ------------------------------ */
|
||||
function shouldRender(currentUrl, httpRes, parsed, homeTitle) {
|
||||
const { pathname } = new URL(currentUrl);
|
||||
if ((httpRes.bytes ?? 0) < 4000) return true; // tiny HTML shell
|
||||
if (parsed.totalHeadings === 0) return true;
|
||||
if (homeTitle && parsed.title && parsed.title === homeTitle && pathname !== "/") return true;
|
||||
return false;
|
||||
}
|
||||
function withWWW(urlStr) {
|
||||
try {
|
||||
const u = new URL(urlStr);
|
||||
if (!u.hostname.startsWith("www.")) u.hostname = "www." + u.hostname;
|
||||
return u.toString();
|
||||
} catch {
|
||||
return urlStr;
|
||||
}
|
||||
}
|
||||
|
||||
/* ------------------------ per-page enrichers ---------------------------- */
|
||||
function measurePixelWidth(text, size = 16, font = "arial") {
|
||||
if (!text) return 0;
|
||||
try {
|
||||
return pixelWidth(text, { font, size });
|
||||
} catch {
|
||||
return Math.round(text.length * size * 0.5);
|
||||
}
|
||||
}
|
||||
function computeReadability(text) {
|
||||
if (!text) return {};
|
||||
const safe = text.slice(0, 200000); // cap
|
||||
const out = {};
|
||||
try {
|
||||
out.flesch_reading_ease = readability.fleschReadingEase(safe);
|
||||
} catch {}
|
||||
try {
|
||||
out.flesch_kincaid_grade = readability.fleschKincaidGrade(safe);
|
||||
} catch {}
|
||||
try {
|
||||
out.gunning_fog = readability.gunningFog(safe);
|
||||
} catch {}
|
||||
try {
|
||||
out.coleman_liau = readability.colemanLiauIndex(safe);
|
||||
} catch {}
|
||||
try {
|
||||
out.ari = readability.automatedReadabilityIndex(safe);
|
||||
} catch {}
|
||||
try {
|
||||
out.smog = readability.smogIndex(safe);
|
||||
} catch {}
|
||||
return out;
|
||||
}
|
||||
|
||||
/* -------------------------------- main ---------------------------------- */
|
||||
/**
|
||||
* Crawl a site and return a structured report.
|
||||
* @param {string} startUrl
|
||||
* @param {number} maxPages
|
||||
* @param {(tick:any)=>void} [onProgress] optional callback for progress events
|
||||
* @param {{persistReports?: boolean, collectPages?: boolean}} [options]
|
||||
* @returns {{ results: any[], files: Record<string,string>, total: number }}
|
||||
*/
|
||||
export async function crawl(startUrl, maxPages = 50, onProgress, options = {}) {
|
||||
const persistReports = options.persistReports !== false; // default true
|
||||
|
||||
// Reset global state per run
|
||||
visited.clear();
|
||||
queue.length = 0;
|
||||
results.length = 0;
|
||||
edges.length = 0;
|
||||
referrers.clear();
|
||||
|
||||
const start = normalizeUrl(startUrl, { stripHash: true });
|
||||
queue.push(start);
|
||||
|
||||
// Seed from sitemap.xml + record provenance
|
||||
try {
|
||||
const sitemapUrls = await getSitemapUrls(start);
|
||||
for (const u of sitemapUrls) {
|
||||
queue.push(u);
|
||||
addEdge("sitemap.xml", u, u, "sitemap");
|
||||
}
|
||||
console.log(`📌 Seeded ${sitemapUrls.length} URL(s) from sitemap.xml`);
|
||||
} catch (e) {
|
||||
console.log("⚠️ Sitemap step skipped:", e.message);
|
||||
}
|
||||
|
||||
let shared = null;
|
||||
async function getShared() {
|
||||
if (!shared) shared = await createBrowserContext();
|
||||
return shared;
|
||||
}
|
||||
|
||||
let homeTitle = null;
|
||||
|
||||
while (queue.length > 0 && visited.size < maxPages) {
|
||||
const url = queue.shift();
|
||||
if (!url) continue;
|
||||
|
||||
const normUrl = normalizeUrl(url, { stripHash: true });
|
||||
if (visited.has(normUrl)) continue;
|
||||
visited.add(normUrl);
|
||||
|
||||
const attemptUrls = [normUrl];
|
||||
let usedWWWRetry = false;
|
||||
|
||||
for (let attempt = 0; attempt < attemptUrls.length; attempt++) {
|
||||
const currentUrl = attemptUrls[attempt];
|
||||
try {
|
||||
// 1) HTTP fetch
|
||||
let pageRes = await fetchWithGot(currentUrl);
|
||||
|
||||
let parsed = {
|
||||
title: "",
|
||||
metaDesc: "",
|
||||
h1_1: "",
|
||||
h1_2: "",
|
||||
h2_1: "",
|
||||
h2_2: "",
|
||||
totalHeadings: 0,
|
||||
canonical: "",
|
||||
robotsMeta: "",
|
||||
noindex: false,
|
||||
nofollow: false,
|
||||
internalLinks: new Set(),
|
||||
rawLinks: [],
|
||||
lang: "",
|
||||
wordCount: 0,
|
||||
bodyText: "",
|
||||
schemaTypes: [],
|
||||
};
|
||||
if (pageRes.contentType.includes("text/html")) {
|
||||
const p = parseHtml(pageRes.body || "", currentUrl);
|
||||
parsed = { ...parsed, ...p };
|
||||
}
|
||||
|
||||
if (!homeTitle && new URL(currentUrl).pathname === "/") {
|
||||
homeTitle = parsed.title || "";
|
||||
}
|
||||
|
||||
// 2) Render if needed
|
||||
if (pageRes.contentType.includes("text/html") && shouldRender(currentUrl, pageRes, parsed, homeTitle)) {
|
||||
const s = await getShared();
|
||||
const rendered = await fetchWithPlaywrightAndExtract(currentUrl, s);
|
||||
if (rendered.domExtract) {
|
||||
pageRes = { ...rendered, body: null };
|
||||
parsed = {
|
||||
...parsed,
|
||||
title: rendered.domExtract.title,
|
||||
metaDesc: rendered.domExtract.metaDesc,
|
||||
h1_1: rendered.domExtract.h1_1,
|
||||
h1_2: rendered.domExtract.h1_2,
|
||||
h2_1: rendered.domExtract.h2_1,
|
||||
h2_2: rendered.domExtract.h2_2,
|
||||
totalHeadings: rendered.domExtract.totalHeadings,
|
||||
canonical: rendered.domExtract.canonical,
|
||||
robotsMeta: rendered.domExtract.robotsMeta,
|
||||
noindex: rendered.domExtract.noindex,
|
||||
nofollow: rendered.domExtract.nofollow,
|
||||
internalLinks: rendered.domExtract.links,
|
||||
rawLinks: rendered.domExtract.rawLinks,
|
||||
lang: rendered.domExtract.lang || parsed.lang,
|
||||
bodyText: rendered.domExtract.bodyText || parsed.bodyText,
|
||||
wordCount: (rendered.domExtract.bodyText || "")
|
||||
.split(/\s+/)
|
||||
.filter(Boolean).length,
|
||||
schemaTypes: rendered.domExtract.schemaTypes,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// If still looks empty, try www once
|
||||
if (!usedWWWRetry && parsed.totalHeadings === 0 && !parsed.h1_1) {
|
||||
attemptUrls.push(withWWW(currentUrl));
|
||||
usedWWWRetry = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Enqueue internal links + record provenance
|
||||
for (const link of parsed.internalLinks) {
|
||||
if (isInternal(start, link)) {
|
||||
const ln = normalizeUrl(link, { stripHash: true });
|
||||
const rawMatch = (parsed.rawLinks || []).find((r) => r.abs === link)?.raw ?? "";
|
||||
addEdge(currentUrl, rawMatch, ln, pageRes.render_mode);
|
||||
if (!visited.has(ln)) queue.push(ln);
|
||||
}
|
||||
}
|
||||
|
||||
// ---- Per-page metrics & enrichers ----
|
||||
const title = parsed.title || "";
|
||||
const metaDesc = parsed.metaDesc || "";
|
||||
const h1_1 = parsed.h1_1 || "";
|
||||
const h1_2 = parsed.h1_2 || "";
|
||||
const lang = parsed.lang || "";
|
||||
const bodyText = parsed.bodyText || "";
|
||||
const wordCount = parsed.wordCount || (bodyText ? bodyText.split(/\s+/).filter(Boolean).length : 0);
|
||||
|
||||
const titlePx = measurePixelWidth(title, 16, "arial");
|
||||
const descPx = measurePixelWidth(metaDesc, 14, "arial");
|
||||
const h1_1_px = measurePixelWidth(h1_1, 24, "arial");
|
||||
const h1_2_px = measurePixelWidth(h1_2, 24, "arial");
|
||||
|
||||
const read = computeReadability(bodyText);
|
||||
|
||||
const headers = pageRes.headers || {};
|
||||
const xRobots = (headers["x-robots-tag"] || headers["x-robots-tag".toLowerCase()]) ?? "";
|
||||
const lastModified = headers["last-modified"] ?? headers["Last-Modified"] ?? "";
|
||||
const setCookie = !!headers["set-cookie"];
|
||||
|
||||
const outlinks = parsed.internalLinks.size;
|
||||
const inlinks = (referrers.get(currentUrl) || []).length;
|
||||
|
||||
// Save page row
|
||||
results.push({
|
||||
url: currentUrl,
|
||||
status: pageRes.status,
|
||||
status_text: pageRes.status_text ?? "",
|
||||
time_ms: pageRes.time_ms,
|
||||
bytes: pageRes.bytes,
|
||||
content_type: pageRes.contentType,
|
||||
http_version: pageRes.httpVersion ?? "",
|
||||
title,
|
||||
title_length: title.length,
|
||||
title_pixel_width: titlePx,
|
||||
meta_description: metaDesc,
|
||||
meta_description_length: metaDesc.length,
|
||||
meta_description_pixel_width: descPx,
|
||||
h1_1,
|
||||
h1_1_length: h1_1.length,
|
||||
h1_1_pixel_width: h1_1_px,
|
||||
h1_2,
|
||||
h1_2_length: h1_2.length,
|
||||
h1_2_pixel_width: h1_2_px,
|
||||
h2_1: parsed.h2_1 || "",
|
||||
h2_2: parsed.h2_2 || "",
|
||||
canonical: parsed.canonical,
|
||||
robots_meta: parsed.robotsMeta,
|
||||
x_robots_tag: Array.isArray(xRobots) ? xRobots.join("; ") : xRobots,
|
||||
noindex: parsed.noindex,
|
||||
nofollow: parsed.nofollow,
|
||||
lang,
|
||||
word_count: wordCount,
|
||||
flesch_reading_ease: read.flesch_reading_ease ?? "",
|
||||
flesch_kincaid_grade: read.flesch_kincaid_grade ?? "",
|
||||
gunning_fog: read.gunning_fog ?? "",
|
||||
coleman_liau: read.coleman_liau ?? "",
|
||||
ari: read.ari ?? "",
|
||||
smog: read.smog ?? "",
|
||||
schema_types: parsed.schemaTypes || [],
|
||||
inlinks,
|
||||
outlinks,
|
||||
render_mode: pageRes.render_mode,
|
||||
last_modified: lastModified,
|
||||
set_cookie: setCookie,
|
||||
crawl_timestamp: new Date().toISOString(),
|
||||
});
|
||||
|
||||
console.log(
|
||||
`[${pageRes.status ?? "ERR"}] ${pageRes.time_ms}ms ${String(pageRes.render_mode).padEnd(8)} H:${parsed.totalHeadings} ${currentUrl} ${
|
||||
title || h1_1
|
||||
}`
|
||||
);
|
||||
|
||||
// optional progress callback (non-fatal)
|
||||
try {
|
||||
onProgress?.({
|
||||
url: currentUrl,
|
||||
status: pageRes.status,
|
||||
title,
|
||||
inlinks,
|
||||
outlinks,
|
||||
visited: visited.size,
|
||||
queued: queue.length,
|
||||
});
|
||||
} catch {}
|
||||
|
||||
break; // success for this URL; stop attempts
|
||||
} catch (err) {
|
||||
console.error(`[ERROR] ${currentUrl} -> ${err.message}`);
|
||||
results.push({
|
||||
url: currentUrl,
|
||||
status: null,
|
||||
status_text: "",
|
||||
time_ms: null,
|
||||
bytes: null,
|
||||
content_type: "",
|
||||
http_version: "",
|
||||
title: "",
|
||||
title_length: 0,
|
||||
title_pixel_width: "",
|
||||
meta_description: "",
|
||||
meta_description_length: 0,
|
||||
meta_description_pixel_width: "",
|
||||
h1_1: "",
|
||||
h1_1_length: 0,
|
||||
h1_1_pixel_width: "",
|
||||
h1_2: "",
|
||||
h1_2_length: 0,
|
||||
h1_2_pixel_width: "",
|
||||
h2_1: "",
|
||||
h2_2: "",
|
||||
canonical: "",
|
||||
robots_meta: "",
|
||||
x_robots_tag: "",
|
||||
noindex: false,
|
||||
nofollow: false,
|
||||
lang: "",
|
||||
word_count: "",
|
||||
flesch_reading_ease: "",
|
||||
flesch_kincaid_grade: "",
|
||||
gunning_fog: "",
|
||||
coleman_liau: "",
|
||||
ari: "",
|
||||
smog: "",
|
||||
schema_types: [],
|
||||
inlinks: 0,
|
||||
outlinks: 0,
|
||||
render_mode: "error",
|
||||
last_modified: "",
|
||||
set_cookie: "",
|
||||
crawl_timestamp: new Date().toISOString(),
|
||||
});
|
||||
|
||||
try {
|
||||
onProgress?.({
|
||||
url: currentUrl,
|
||||
error: String(err?.message || err),
|
||||
visited: visited.size,
|
||||
queued: queue.length,
|
||||
});
|
||||
} catch {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (shared) await shared.browser.close();
|
||||
|
||||
// -------------------- Post-process: duplicates & similarity -------------
|
||||
// Titles
|
||||
const titleMap = new Map();
|
||||
for (const r of results) {
|
||||
const key = (r.title || "").trim();
|
||||
if (!titleMap.has(key)) titleMap.set(key, []);
|
||||
titleMap.get(key).push(r);
|
||||
}
|
||||
for (const [t, arr] of titleMap.entries()) {
|
||||
if (!t) continue;
|
||||
const isDup = arr.length > 1;
|
||||
for (const row of arr) row.duplicate_title_exact = isDup ? "yes" : "no";
|
||||
}
|
||||
|
||||
// Meta descriptions
|
||||
const descMap = new Map();
|
||||
for (const r of results) {
|
||||
const key = (r.meta_description || "").trim();
|
||||
if (!descMap.has(key)) descMap.set(key, []);
|
||||
descMap.get(key).push(r);
|
||||
}
|
||||
for (const [d, arr] of descMap.entries()) {
|
||||
if (!d) continue;
|
||||
const isDup = arr.length > 1;
|
||||
for (const row of arr) row.duplicate_description_exact = isDup ? "yes" : "no";
|
||||
}
|
||||
|
||||
// Nearest neighbor similarities (within site, lightweight)
|
||||
const titleList = results.map((r) => ({ url: r.url, text: (r.title || "").trim() }));
|
||||
const descList = results.map((r) => ({ url: r.url, text: (r.meta_description || "").trim() }));
|
||||
for (const r of results) {
|
||||
// titles
|
||||
const others = titleList.filter((x) => x.url !== r.url && x.text);
|
||||
let bestT = { rating: 0, target: "" };
|
||||
if (r.title && others.length) {
|
||||
const ratings = stringSimilarity.findBestMatch(r.title, others.map((x) => x.text));
|
||||
const best = ratings.bestMatch;
|
||||
bestT.rating = best.rating;
|
||||
const idx = ratings.ratings.findIndex((x) => x.rating === best.rating);
|
||||
bestT.target = others[idx]?.url || "";
|
||||
}
|
||||
r.nearest_title_similarity = bestT.rating ? bestT.rating.toFixed(3) : "";
|
||||
r.nearest_title_url = bestT.target;
|
||||
|
||||
// descriptions
|
||||
const othersD = descList.filter((x) => x.url !== r.url && x.text);
|
||||
let bestD = { rating: 0, target: "" };
|
||||
if (r.meta_description && othersD.length) {
|
||||
const ratingsD = stringSimilarity.findBestMatch(r.meta_description, othersD.map((x) => x.text));
|
||||
const best = ratingsD.bestMatch;
|
||||
bestD.rating = best.rating;
|
||||
const idx = ratingsD.ratings.findIndex((x) => x.rating === best.rating);
|
||||
bestD.target = othersD[idx]?.url || "";
|
||||
}
|
||||
r.nearest_description_similarity = bestD.rating ? bestD.rating.toFixed(3) : "";
|
||||
r.nearest_description_url = bestD.target;
|
||||
}
|
||||
|
||||
console.log(`\n✅ Crawl finished. Total pages: ${visited.size}`);
|
||||
|
||||
let files = {};
|
||||
if (persistReports) {
|
||||
const a = writePageReports(results);
|
||||
const b = writeLinkEdges(edges);
|
||||
const c = writeErrors(results);
|
||||
files = { ...a, ...b, ...c };
|
||||
}
|
||||
|
||||
return { results, files, total: results.length };
|
||||
}
|
||||
|
||||
// // CLI: node crawler.js https://site.com 200
|
||||
// const START_URL = process.argv[2] || "https://example.com";
|
||||
// const MAX_PAGES = Number(process.argv[3] || 100);
|
||||
// crawl(START_URL, MAX_PAGES);
|
||||
19
middlewares/auth.middleware.js
Normal file
19
middlewares/auth.middleware.js
Normal file
@ -0,0 +1,19 @@
|
||||
import jwt from "jsonwebtoken";
|
||||
|
||||
export function authMiddleware(req, res, next) {
|
||||
const header = req.headers.authorization;
|
||||
|
||||
if (!header?.startsWith("Bearer ")) {
|
||||
return res.status(401).json({ error: "Missing token" });
|
||||
}
|
||||
|
||||
const token = header.split(" ")[1];
|
||||
|
||||
try {
|
||||
req.user = jwt.verify(token, process.env.JWT_SECRET);
|
||||
next();
|
||||
} catch (err) {
|
||||
console.error("JWT verification failed:", err.message);
|
||||
return res.status(401).json({ error: "Invalid or expired token" });
|
||||
}
|
||||
}
|
||||
4
middlewares/pageSpeedErrorHandler.js
Normal file
4
middlewares/pageSpeedErrorHandler.js
Normal file
@ -0,0 +1,4 @@
|
||||
export const errorHandler = (err, req, res, next) => {
|
||||
console.error(err);
|
||||
res.status(500).json({ message: err.message || 'Internal Server Error' });
|
||||
};
|
||||
41
models/blog.model.js
Normal file
41
models/blog.model.js
Normal file
@ -0,0 +1,41 @@
|
||||
import mongoose from 'mongoose';
|
||||
|
||||
const commentSchema = new mongoose.Schema({
|
||||
user: { type: mongoose.Schema.Types.ObjectId, ref: 'User' },
|
||||
name: String,
|
||||
text: { type: String, required: true },
|
||||
createdAt: { type: Date, default: Date.now }
|
||||
});
|
||||
|
||||
const blogSchema = new mongoose.Schema({
|
||||
projectId: { type: String, required: true, index: true },
|
||||
title: { type: String, required: true },
|
||||
slug: { type: String, required: true, unique: false },
|
||||
description: { type: String, required: true },
|
||||
imageUrl: String,
|
||||
bigImageUrl: String, // ✅ New field
|
||||
category: { type: mongoose.Schema.Types.ObjectId, ref: 'Category' },
|
||||
tags: [String],
|
||||
comments: [commentSchema],
|
||||
likes: [{ type: mongoose.Schema.Types.ObjectId, ref: 'User' }],
|
||||
author: { type: mongoose.Schema.Types.ObjectId, ref: 'User' }
|
||||
}, { timestamps: true });
|
||||
|
||||
// 👇 projectId + slug combo unique
|
||||
blogSchema.index({ projectId: 1, slug: 1 }, { unique: true });
|
||||
|
||||
// 👇 Add base URL when converting to JSON
|
||||
blogSchema.set('toJSON', {
|
||||
transform: (doc, ret) => {
|
||||
const baseUrl = process.env.BACKEND_URL || 'http://localhost:3010';
|
||||
if (ret.imageUrl && !ret.imageUrl.startsWith('http')) {
|
||||
ret.imageUrl = `${baseUrl}${ret.imageUrl}`;
|
||||
}
|
||||
if (ret.bigImageUrl && !ret.bigImageUrl.startsWith('http')) {
|
||||
ret.bigImageUrl = `${baseUrl}${ret.bigImageUrl}`;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
});
|
||||
|
||||
export default mongoose.model('Blog', blogSchema);
|
||||
21
models/category.model.js
Normal file
21
models/category.model.js
Normal file
@ -0,0 +1,21 @@
|
||||
import mongoose from "mongoose";
|
||||
|
||||
const categorySchema = new mongoose.Schema(
|
||||
{
|
||||
name: {
|
||||
type: String,
|
||||
required: true,
|
||||
unique: true,
|
||||
trim: true,
|
||||
},
|
||||
projectId: {
|
||||
type: String, // For multi-project support
|
||||
required: true,
|
||||
},
|
||||
},
|
||||
{ timestamps: true }
|
||||
);
|
||||
|
||||
const Category = mongoose.model("Category", categorySchema);
|
||||
|
||||
export default Category;
|
||||
10
models/comments.model.js
Normal file
10
models/comments.model.js
Normal file
@ -0,0 +1,10 @@
|
||||
import mongoose from "mongoose";
|
||||
|
||||
const commentSchema = new mongoose.Schema({
|
||||
blog: { type: mongoose.Schema.Types.ObjectId, ref: "Blog" },
|
||||
name: String,
|
||||
text: String,
|
||||
createdAt: { type: Date, default: Date.now }
|
||||
});
|
||||
|
||||
export default mongoose.model("Comment", commentSchema);
|
||||
18
models/maisondetreats/cakeOrder.model.js
Normal file
18
models/maisondetreats/cakeOrder.model.js
Normal file
@ -0,0 +1,18 @@
|
||||
import mongoose from "mongoose";
|
||||
|
||||
const CakeOrderSchema = new mongoose.Schema(
|
||||
{
|
||||
order: {
|
||||
type: Object,
|
||||
required: true,
|
||||
// Example format:
|
||||
// {
|
||||
// "Mini Cakes": { "Thandai Cake": 1, "Mango Cardamom": 1 },
|
||||
// "Mithai-Inspired Macarons": { "Mango macarons (pack of 6)": 1, "Pista (pack of 6)": 10 }
|
||||
// }
|
||||
},
|
||||
},
|
||||
{ timestamps: true }
|
||||
);
|
||||
|
||||
export const CakeOrder = mongoose.model("CakeOrder", CakeOrderSchema);
|
||||
12
models/message.model.js
Normal file
12
models/message.model.js
Normal file
@ -0,0 +1,12 @@
|
||||
import mongoose from "mongoose";
|
||||
|
||||
const MessageSchema = new mongoose.Schema({
|
||||
project: { type: String, required: true }, // new field to identify project
|
||||
name: { type: String, default: "Guest" },
|
||||
email: { type: String },
|
||||
message: { type: String, required: true },
|
||||
}, { timestamps: true });
|
||||
|
||||
const Message = mongoose.models.Message || mongoose.model("Message", MessageSchema);
|
||||
|
||||
export default Message;
|
||||
42
models/pageSpeedTest.model.js
Normal file
42
models/pageSpeedTest.model.js
Normal file
@ -0,0 +1,42 @@
|
||||
import mongoose from 'mongoose';
|
||||
|
||||
const pageSpeedTestSchema = new mongoose.Schema({
|
||||
url: { type: String, required: true },
|
||||
device: { type: String, enum: ['mobile', 'desktop'], required: true },
|
||||
scores: {
|
||||
performance: Number,
|
||||
accessibility: Number,
|
||||
bestPractices: Number,
|
||||
seo: Number,
|
||||
pwa: Number,
|
||||
},
|
||||
metrics: {
|
||||
firstContentfulPaint: String,
|
||||
largestContentfulPaint: String,
|
||||
totalBlockingTime: String,
|
||||
timeToInteractive: String,
|
||||
speedIndex: String,
|
||||
cumulativeLayoutShift: String,
|
||||
},
|
||||
opportunities: [
|
||||
{
|
||||
title: String,
|
||||
description: String,
|
||||
estimatedSavings: String,
|
||||
},
|
||||
],
|
||||
diagnostics: Object,
|
||||
failedAudits: [
|
||||
{
|
||||
title: String,
|
||||
description: String,
|
||||
},
|
||||
],
|
||||
passedAudits: [String],
|
||||
notApplicableAudits: [String],
|
||||
screenshot: String,
|
||||
treemapPath: { type: String },
|
||||
createdAt: { type: Date, default: Date.now },
|
||||
});
|
||||
|
||||
export default mongoose.model('PageSpeedTest', pageSpeedTestSchema);
|
||||
12
models/payment.model.js
Normal file
12
models/payment.model.js
Normal file
@ -0,0 +1,12 @@
|
||||
import mongoose from "mongoose";
|
||||
|
||||
const paymentSchema = new mongoose.Schema({
|
||||
email: { type: String, required: true },
|
||||
amount: { type: Number, required: true }, // store in cents
|
||||
currency: { type: String, default: "usd" },
|
||||
stripePaymentIntentId: { type: String }, // ❌ remove required: true
|
||||
stripeSessionId: { type: String }, // ✅ store Checkout Session ID
|
||||
status: { type: String, default: "pending" }, // pending, succeeded, failed
|
||||
}, { timestamps: true });
|
||||
|
||||
export const Payment = mongoose.model("Payment", paymentSchema);
|
||||
14
models/user.model.js
Normal file
14
models/user.model.js
Normal file
@ -0,0 +1,14 @@
|
||||
import mongoose from "mongoose";
|
||||
|
||||
const userSchema = new mongoose.Schema(
|
||||
{
|
||||
email: { type: String, required: true, unique: true, lowercase: true },
|
||||
passwordHash: { type: String, required: true },
|
||||
// ➡️ Add these two lines
|
||||
resetPasswordToken: { type: String },
|
||||
resetPasswordExpires: { type: Date },
|
||||
},
|
||||
{ timestamps: true }
|
||||
);
|
||||
|
||||
export default mongoose.model("User", userSchema);
|
||||
5093
package-lock.json
generated
Normal file
5093
package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load Diff
36
package.json
Normal file
36
package.json
Normal file
@ -0,0 +1,36 @@
|
||||
{
|
||||
"name": "crawlerx",
|
||||
"version": "1.0.0",
|
||||
"type": "module",
|
||||
"main": "crawler.js",
|
||||
"scripts": {
|
||||
"start": "node crawler.js https://example.com 200",
|
||||
"dev": "nodemon crawler.js https://example.com 200"
|
||||
},
|
||||
"dependencies": {
|
||||
"axios": "^1.12.2",
|
||||
"bcrypt": "^6.0.0",
|
||||
"cheerio": "^1.1.0",
|
||||
"chrome-launcher": "^1.2.1",
|
||||
"cors": "^2.8.5",
|
||||
"dotenv": "^17.2.2",
|
||||
"express": "^5.1.0",
|
||||
"got": "^14.4.7",
|
||||
"jsonwebtoken": "^9.0.2",
|
||||
"lighthouse": "^12.8.2",
|
||||
"mongoose": "^8.18.1",
|
||||
"multer": "^2.0.2",
|
||||
"nodemailer": "^7.0.6",
|
||||
"normalize-url": "^8.0.2",
|
||||
"sitemapper": "^3.2.7",
|
||||
"slugify": "^1.6.6",
|
||||
"string-pixel-width": "^1.11.0",
|
||||
"string-similarity": "^4.0.4",
|
||||
"stripe": "^18.5.0",
|
||||
"text-readability": "^1.1.1"
|
||||
},
|
||||
"devDependencies": {
|
||||
"nodemon": "^3.1.10",
|
||||
"playwright": "^1.55.0"
|
||||
}
|
||||
}
|
||||
2897
public/lighthouse-treemap/treemap-1758885492002.html
Normal file
2897
public/lighthouse-treemap/treemap-1758885492002.html
Normal file
File diff suppressed because one or more lines are too long
2897
public/lighthouse-treemap/treemap-1758885890915.html
Normal file
2897
public/lighthouse-treemap/treemap-1758885890915.html
Normal file
File diff suppressed because one or more lines are too long
2897
public/lighthouse-treemap/treemap-1758958684569.html
Normal file
2897
public/lighthouse-treemap/treemap-1758958684569.html
Normal file
File diff suppressed because one or more lines are too long
18
routes/auth.routes.js
Normal file
18
routes/auth.routes.js
Normal file
@ -0,0 +1,18 @@
|
||||
import express from "express";
|
||||
import { signup, login, changePassword, forgotPassword, resetPassword } from "../controllers/auth.controller.js";
|
||||
import { authMiddleware } from "../middlewares/auth.middleware.js";
|
||||
|
||||
const router = express.Router();
|
||||
|
||||
router.post("/signup", signup);
|
||||
router.post("/login", login);
|
||||
router.post("/change-password", authMiddleware, changePassword);
|
||||
router.post("/forgot-password", forgotPassword);
|
||||
router.post("/reset-password", resetPassword);
|
||||
|
||||
// example protected route
|
||||
router.get("/profile", authMiddleware, (req, res) => {
|
||||
res.json({ user: req.user });
|
||||
});
|
||||
|
||||
export default router;
|
||||
54
routes/blog.routes.js
Normal file
54
routes/blog.routes.js
Normal file
@ -0,0 +1,54 @@
|
||||
import express from "express";
|
||||
import multer from "multer";
|
||||
import {
|
||||
createBlog,
|
||||
getAllBlogs,
|
||||
getBlogBySlug,
|
||||
likeBlog,
|
||||
} from "../controllers/blog.controller.js";
|
||||
|
||||
import {
|
||||
createCategory,
|
||||
getCategories,
|
||||
deleteCategory,
|
||||
} from "../controllers/category.controller.js";
|
||||
|
||||
import {
|
||||
addComment as addCommentController,
|
||||
getComments,
|
||||
deleteComment,
|
||||
} from "../controllers/comment.controller.js";
|
||||
|
||||
const router = express.Router();
|
||||
const upload = multer({ dest: "uploads/" });
|
||||
|
||||
// =======================
|
||||
// Blog Routes
|
||||
// =======================
|
||||
// Create a blog
|
||||
router.post("/", upload.single("image"), createBlog);
|
||||
|
||||
// Get all blogs
|
||||
router.get("/", getAllBlogs);
|
||||
|
||||
// Get blog by slug
|
||||
router.get("/:slug", getBlogBySlug);
|
||||
|
||||
// Like a blog
|
||||
router.post("/:id/like", likeBlog);
|
||||
|
||||
// =======================
|
||||
// Category Routes
|
||||
// =======================
|
||||
router.post("/category", createCategory); // Create Category (admin)
|
||||
router.get("/category", getCategories); // List Categories
|
||||
router.delete("/category/:id", deleteCategory); // Delete Category (admin)
|
||||
|
||||
// =======================
|
||||
// Comment Routes
|
||||
// =======================
|
||||
router.post("/:blogId/comments", addCommentController); // Add Comment
|
||||
router.get("/:blogId/comments", getComments); // Get Comments
|
||||
router.delete("/:blogId/comments/:commentId", deleteComment); // Delete Comment (admin)
|
||||
|
||||
export default router;
|
||||
6
routes/crawl.routes.js
Normal file
6
routes/crawl.routes.js
Normal file
@ -0,0 +1,6 @@
|
||||
import { Router } from "express";
|
||||
import { crawlHandler } from "../controllers/crawl.controller.js";
|
||||
|
||||
const router = Router();
|
||||
router.get("/", crawlHandler);
|
||||
export default router;
|
||||
8
routes/lighthouse.routes.js
Normal file
8
routes/lighthouse.routes.js
Normal file
@ -0,0 +1,8 @@
|
||||
import express from 'express';
|
||||
import { runAudit } from '../controllers/lighthouseController.js';
|
||||
|
||||
const router = express.Router();
|
||||
|
||||
router.post('/audit', runAudit);
|
||||
|
||||
export default router;
|
||||
12
routes/maisondetreats/cakeOrder.routes.js
Normal file
12
routes/maisondetreats/cakeOrder.routes.js
Normal file
@ -0,0 +1,12 @@
|
||||
import express from "express";
|
||||
import { createCakeOrder, getAllCakeOrders } from "../../controllers/maisondetreats/cakeOrder.controller.js";
|
||||
|
||||
const router = express.Router();
|
||||
|
||||
// Create a new cake order
|
||||
router.post("/", createCakeOrder);
|
||||
|
||||
// Get all cake orders
|
||||
router.get("/", getAllCakeOrders);
|
||||
|
||||
export default router;
|
||||
12
routes/message.routes.js
Normal file
12
routes/message.routes.js
Normal file
@ -0,0 +1,12 @@
|
||||
import express from "express";
|
||||
import { sendMessage, getMessages } from "../controllers/message.controller.js";
|
||||
|
||||
const router = express.Router();
|
||||
|
||||
// POST /api/messages → Save a message
|
||||
router.post("/", sendMessage);
|
||||
|
||||
// GET /api/messages → Get all messages (optional)
|
||||
router.get("/", getMessages);
|
||||
|
||||
export default router;
|
||||
14
routes/payment.route.js
Normal file
14
routes/payment.route.js
Normal file
@ -0,0 +1,14 @@
|
||||
import express from "express";
|
||||
import {
|
||||
createPaymentIntent,
|
||||
createCheckoutSession,
|
||||
handleWebhook
|
||||
} from "../controllers/payment.controller.js";
|
||||
|
||||
const router = express.Router();
|
||||
|
||||
router.post("/create-intent", createPaymentIntent);
|
||||
router.post("/create-checkout-session", createCheckoutSession);
|
||||
router.post("/webhook", express.raw({ type: "application/json" }), handleWebhook);
|
||||
|
||||
export default router;
|
||||
6
routes/sitemap.routes.js
Normal file
6
routes/sitemap.routes.js
Normal file
@ -0,0 +1,6 @@
|
||||
import { Router } from "express";
|
||||
import { sitemapHandler } from "../controllers/sitemap.controller.js";
|
||||
|
||||
const router = Router();
|
||||
router.get("/", sitemapHandler);
|
||||
export default router;
|
||||
237
server copy.js
Normal file
237
server copy.js
Normal file
@ -0,0 +1,237 @@
|
||||
// // server.js
|
||||
// import express from "express";
|
||||
// import { Queue } from "bullmq";
|
||||
// import { connection } from "./redis.js";
|
||||
// import crypto from "crypto";
|
||||
|
||||
// const app = express();
|
||||
// app.use(express.json());
|
||||
|
||||
// const crawlQueue = new Queue("crawl", { connection });
|
||||
|
||||
// // Start a new crawl
|
||||
// app.post("/crawl", async (req, res) => {
|
||||
// const { startUrl } = req.body;
|
||||
// if (!startUrl) return res.status(400).json({ error: "Missing startUrl" });
|
||||
|
||||
// const crawlId = crypto.randomUUID();
|
||||
// await crawlQueue.add("fetch", { crawlId, url: startUrl });
|
||||
|
||||
// res.json({ crawlId, message: "Crawl started" });
|
||||
// });
|
||||
|
||||
// // (Optional) Check progress
|
||||
// app.get("/status/:id", async (req, res) => {
|
||||
// // For now just reply with "in progress"
|
||||
// res.json({ crawlId: req.params.id, status: "in progress" });
|
||||
// });
|
||||
|
||||
// app.listen(3000, () => {
|
||||
// console.log("Crawler API running at http://localhost:3000");
|
||||
// });
|
||||
|
||||
|
||||
// // server.js
|
||||
// import express from "express";
|
||||
// import cors from "cors"; // ← optional but recommended
|
||||
// import { crawl } from "./crawler.js"; // ensure crawl is a NAMED export; if default, use: import crawl from "./crawler.js";
|
||||
|
||||
// const app = express();
|
||||
// const PORT = process.env.PORT || 3010;
|
||||
|
||||
// /* Parse JSON BEFORE any middleware that might read req.body */
|
||||
// app.use(express.json());
|
||||
|
||||
// /* CORS (adjust origins as needed) */
|
||||
// app.use(cors({
|
||||
// origin: [
|
||||
// "http://localhost:3000",
|
||||
// "https://your-frontend.example" // ← replace or remove
|
||||
// ],
|
||||
// }));
|
||||
|
||||
// /* Safe request logger */
|
||||
// app.use((req, res, next) => {
|
||||
// console.log(`[${new Date().toISOString()}] ${req.method} ${req.originalUrl}`);
|
||||
// if (req.query && Object.keys(req.query).length) console.log("Query:", req.query);
|
||||
// if (req.body && typeof req.body === "object" && Object.keys(req.body).length) console.log("Body:", req.body);
|
||||
// next();
|
||||
// });
|
||||
|
||||
// /* GET /crawl?url=https://site.com&max=50 */
|
||||
// app.get("/crawl", async (req, res) => {
|
||||
// try {
|
||||
// const { url, max } = req.query;
|
||||
// if (!url) return res.status(400).json({ error: "Missing url param" });
|
||||
|
||||
// // validate & normalize
|
||||
// const target = new URL(String(url)); // throws if invalid
|
||||
// const limit = Math.min(Math.max(parseInt(max ?? "50", 10), 1), 500);
|
||||
|
||||
// await crawl(target.toString(), limit);
|
||||
// res.json({ ok: true, message: `Crawl started`, url: target.toString(), limit });
|
||||
// } catch (err) {
|
||||
// console.error("Crawl error:", err);
|
||||
// res.status(500).json({ error: "Crawl failed", details: String(err?.message ?? err) });
|
||||
// }
|
||||
// });
|
||||
|
||||
// /* Global safety nets so crashes don’t become silent restart loops */
|
||||
// process.on("unhandledRejection", (err) => console.error("unhandledRejection:", err));
|
||||
// process.on("uncaughtException", (err) => console.error("uncaughtException:", err));
|
||||
|
||||
// /* Bind to all interfaces so remote calls work */
|
||||
// app.listen(PORT, "0.0.0.0", () => {
|
||||
// console.log(`🚀 Server running at http://localhost:${PORT}`);
|
||||
// });
|
||||
|
||||
// server.js
|
||||
import express from "express";
|
||||
import cors from "cors";
|
||||
import path from "node:path";
|
||||
import fs from "node:fs";
|
||||
import fsp from "node:fs/promises";
|
||||
import { fileURLToPath } from "node:url";
|
||||
import { crawl } from "./crawler.js"; // crawl(target, limit, onProgress?, options?)
|
||||
|
||||
const app = express();
|
||||
const PORT = process.env.PORT || 3010;
|
||||
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
||||
|
||||
/* ------------ Middleware ------------ */
|
||||
app.use(express.json());
|
||||
app.use(cors({ origin: ["http://localhost:3000", "https://app.crawlerx.co"] }));
|
||||
app.use(express.static(path.join(__dirname, "public")));
|
||||
app.get("/", (_req, res) => {
|
||||
const viewer = path.join(__dirname, "public", "crawlerx_viewer.html");
|
||||
return fs.existsSync(viewer)
|
||||
? res.sendFile(viewer)
|
||||
: res.type("text/plain").send("CrawlerX backend is running.");
|
||||
});
|
||||
app.get("/healthz", (_req, res) => res.json({ ok: true, time: new Date().toISOString() }));
|
||||
|
||||
/* ------------ Helpers ------------ */
|
||||
const ts = () =>
|
||||
new Date().toISOString().replaceAll(":", "-").replaceAll(".", "-"); // safe filename
|
||||
function attachJson(res, filename, obj) {
|
||||
const json = JSON.stringify(obj, null, 2);
|
||||
res.setHeader("Content-Type", "application/json; charset=utf-8");
|
||||
res.setHeader("Content-Disposition", `attachment; filename="${filename}"`);
|
||||
return res.send(json);
|
||||
}
|
||||
function isAbs(p) {
|
||||
try { return path.isAbsolute(p); } catch { return false; }
|
||||
}
|
||||
|
||||
/* ------------ Crawl endpoint ------------ */
|
||||
/**
|
||||
* GET /crawl?url=https://site.com&max=50[&stream=1][&download=1][&nostore=1]
|
||||
* - stream=1 : SSE live progress (no download)
|
||||
* - download=1 : respond as a JSON download (attachment)
|
||||
* - nostore=1 : ask crawler not to write files (if supported by your crawler)
|
||||
*/
|
||||
app.get("/crawl", async (req, res) => {
|
||||
try {
|
||||
const { url, max, stream, download, nostore } = req.query;
|
||||
if (!url) return res.status(400).json({ error: "Missing url param" });
|
||||
|
||||
const target = new URL(String(url)); // validate
|
||||
const limit = Math.min(Math.max(parseInt(max ?? "50", 10), 1), 500);
|
||||
const wantsStream =
|
||||
String(stream) === "1" ||
|
||||
(req.get("accept") || "").includes("text/event-stream");
|
||||
|
||||
/* ---------- SSE mode ---------- */
|
||||
if (wantsStream) {
|
||||
if (String(download) === "1") {
|
||||
return res.status(400).json({ error: "download not supported with stream=1" });
|
||||
}
|
||||
res.setHeader("Content-Type", "text/event-stream");
|
||||
res.setHeader("Cache-Control", "no-cache, no-transform");
|
||||
res.setHeader("Connection", "keep-alive");
|
||||
res.flushHeaders?.();
|
||||
const heartbeat = setInterval(() => res.write(":\n\n"), 15000);
|
||||
const send = (obj, evt) => {
|
||||
if (evt) res.write(`event: ${evt}\n`);
|
||||
res.write(`data: ${JSON.stringify(obj)}\n\n`);
|
||||
};
|
||||
send({ ok: true, message: "Crawl started", url: target.toString(), limit }, "started");
|
||||
|
||||
let finished = false;
|
||||
req.on("close", () => { clearInterval(heartbeat); if (!finished) console.warn("SSE client disconnected."); });
|
||||
|
||||
const result = await crawl(
|
||||
target.toString(),
|
||||
limit,
|
||||
(tick) => send(tick),
|
||||
// If your crawler supports it, this avoids writing files during SSE runs:
|
||||
{ persistReports: false, collectPages: true }
|
||||
);
|
||||
|
||||
finished = true;
|
||||
clearInterval(heartbeat);
|
||||
send({ ok: true, done: true, result }, "done");
|
||||
return res.end();
|
||||
}
|
||||
|
||||
/* ---------- Non-streaming mode ---------- */
|
||||
// Ask crawler (if it supports options) to avoid writing files when nostore=1 or download requested.
|
||||
const preferMemory = String(nostore) === "1" || String(download) === "1";
|
||||
const result = await crawl(
|
||||
target.toString(),
|
||||
limit,
|
||||
undefined,
|
||||
preferMemory ? { persistReports: false, collectPages: true } : undefined
|
||||
);
|
||||
|
||||
// If caller wants a downloadable JSON file...
|
||||
if (String(download) === "1") {
|
||||
const filename = `crawl-${ts()}.json`;
|
||||
|
||||
// 1) Best case: crawler returned in-memory data (no disk IO).
|
||||
// Use whichever property your crawler exposes. We try common shapes.
|
||||
const inMemory =
|
||||
result?.jsonData ??
|
||||
result?.pages ??
|
||||
result?.report ??
|
||||
(Array.isArray(result) ? result : null);
|
||||
|
||||
if (inMemory) {
|
||||
return attachJson(res, filename, inMemory);
|
||||
}
|
||||
|
||||
// 2) Fallback: crawler saved a JSON report path that we can stream.
|
||||
const jsonPath = result?.reports?.json;
|
||||
if (jsonPath && fs.existsSync(isAbs(jsonPath) ? jsonPath : path.join(__dirname, jsonPath))) {
|
||||
const abs = isAbs(jsonPath) ? jsonPath : path.join(__dirname, jsonPath);
|
||||
res.setHeader("Content-Type", "application/json; charset=utf-8");
|
||||
res.setHeader("Content-Disposition", `attachment; filename="${filename}"`);
|
||||
return fs.createReadStream(abs).pipe(res);
|
||||
}
|
||||
|
||||
// 3) Last resort: send the entire result itself as JSON.
|
||||
return attachJson(res, filename, result);
|
||||
}
|
||||
|
||||
// Default JSON (inline, not attachment)
|
||||
return res.json({
|
||||
ok: true,
|
||||
message: "Crawl completed",
|
||||
url: target.toString(),
|
||||
limit,
|
||||
result
|
||||
});
|
||||
} catch (err) {
|
||||
console.error("Crawl error:", err);
|
||||
return res.status(500).json({ error: "Crawl failed", details: String(err?.message ?? err) });
|
||||
}
|
||||
});
|
||||
|
||||
/* ------------ Safety nets ------------ */
|
||||
process.on("unhandledRejection", (e) => console.error("unhandledRejection:", e));
|
||||
process.on("uncaughtException", (e) => console.error("uncaughtException:", e));
|
||||
|
||||
/* ------------ Start server ------------ */
|
||||
app.listen(PORT, "0.0.0.0", () => {
|
||||
console.log(`🚀 Server running at http://localhost:${PORT}`);
|
||||
});
|
||||
80
server.js
Normal file
80
server.js
Normal file
@ -0,0 +1,80 @@
|
||||
import express from "express";
|
||||
import cors from "cors";
|
||||
import path from "node:path";
|
||||
import fs from "node:fs";
|
||||
import { fileURLToPath } from "node:url";
|
||||
import dotenv from "dotenv";
|
||||
|
||||
import crawlRoutes from "./routes/crawl.routes.js";
|
||||
import sitemapRoutes from "./routes/sitemap.routes.js";
|
||||
import authRoutes from "./routes/auth.routes.js"; // Login & Signup endpoints
|
||||
import paymentRoutes from "./routes/payment.route.js";
|
||||
import lighthouseRoutes from "./routes/lighthouse.routes.js"; // <-- ADD THIS
|
||||
import messageRoutes from "./routes/message.routes.js";
|
||||
import cakeOrderRoutes from "./routes/maisondetreats/cakeOrder.routes.js";
|
||||
import blogRoutes from "./routes/blog.routes.js";
|
||||
import { connectDB } from "./config/db.js";
|
||||
import { mailer } from "./utils/mailer.js";
|
||||
|
||||
// ------------------ Load environment ------------------
|
||||
dotenv.config(); // Must be first
|
||||
|
||||
// ------------------ Connect database ------------------
|
||||
await connectDB();
|
||||
|
||||
// ------------------ Express setup ------------------
|
||||
const app = express();
|
||||
const PORT = process.env.PORT || 3010;
|
||||
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
||||
|
||||
app.use(express.json());
|
||||
app.use(
|
||||
cors({
|
||||
origin: [
|
||||
"http://localhost:3000",
|
||||
"http://127.0.0.1:3000",
|
||||
"https://api.crawlerx.co",
|
||||
"https://app.crawlerx.co",
|
||||
],
|
||||
})
|
||||
);
|
||||
app.use(express.static(path.join(__dirname, "public")));
|
||||
|
||||
// ------------------ SMTP verification ------------------
|
||||
|
||||
console.log("SMTP Host:", process.env.SMTP_HOST);
|
||||
console.log("SMTP Port:", process.env.SMTP_PORT);
|
||||
// ------------------ Routes ------------------
|
||||
app.get("/", (_req, res) => {
|
||||
const viewer = path.join(__dirname, "public", "crawlerx_viewer.html");
|
||||
if (fs.existsSync(viewer)) {
|
||||
return res.sendFile(viewer);
|
||||
} else {
|
||||
return res
|
||||
.type("text/plain")
|
||||
.send("CrawlerX backend is running.");
|
||||
}
|
||||
});
|
||||
|
||||
app.get("/healthz", (_req, res) => res.json({ ok: true, time: new Date().toISOString() }));
|
||||
|
||||
app.use("/crawl", crawlRoutes);
|
||||
app.use("/sitemap", sitemapRoutes);
|
||||
app.use("/api/auth", authRoutes); // Login & Signup endpoints
|
||||
app.use("/api/payment", paymentRoutes);
|
||||
app.use("/api/lighthouse", lighthouseRoutes);
|
||||
app.use("/api/blog", blogRoutes); // All blog/category/comment routes now prefixed with /api/blog
|
||||
app.use("/api/messages", messageRoutes);
|
||||
app.use("/api/cake-orders", cakeOrderRoutes);
|
||||
|
||||
// Serve uploaded files
|
||||
app.use('/uploads', express.static(path.join(process.cwd(), 'uploads')));
|
||||
|
||||
// ------------------ Safety nets ------------------
|
||||
process.on("unhandledRejection", (err) => console.error("Unhandled Rejection:", err));
|
||||
process.on("uncaughtException", (err) => console.error("Uncaught Exception:", err));
|
||||
|
||||
// ------------------ Start server ------------------
|
||||
app.listen(PORT, "0.0.0.0", () => {
|
||||
console.log(`🚀 Server running at http://localhost:${PORT}`);
|
||||
});
|
||||
BIN
uploads/2176fe75251a06cf07a08e32a845b537
Normal file
BIN
uploads/2176fe75251a06cf07a08e32a845b537
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 34 KiB |
BIN
uploads/240f5f275adf05f94ee3cb81d3a4c829
Normal file
BIN
uploads/240f5f275adf05f94ee3cb81d3a4c829
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 34 KiB |
BIN
uploads/3d2a70806f726d28bd1814bce39b0906
Normal file
BIN
uploads/3d2a70806f726d28bd1814bce39b0906
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 30 KiB |
BIN
uploads/866ff6699ca2b462d73b20c4d150b2e0
Normal file
BIN
uploads/866ff6699ca2b462d73b20c4d150b2e0
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 34 KiB |
BIN
uploads/8930208f2d9cc158282fc31953abc6a3
Normal file
BIN
uploads/8930208f2d9cc158282fc31953abc6a3
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 34 KiB |
BIN
uploads/d6d36b979d636c9f38a712bd3c298563
Normal file
BIN
uploads/d6d36b979d636c9f38a712bd3c298563
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 34 KiB |
105
utils/mailer.js
Normal file
105
utils/mailer.js
Normal file
@ -0,0 +1,105 @@
|
||||
import nodemailer from "nodemailer";
|
||||
|
||||
//
|
||||
// Create reusable transporter object
|
||||
//
|
||||
export const mailer = nodemailer.createTransport({
|
||||
host: "mail.crawlerx.co", // your Hestia mail host
|
||||
port: 587, // STARTTLS
|
||||
secure: false, // must be false for 587
|
||||
auth: {
|
||||
user: "info@crawlerx.co", // e.g. info@crawlerx.co
|
||||
pass: "CrawlerX@2025", // mailbox password
|
||||
},
|
||||
name: "mail.crawlerx.co", // explicitly set hostname
|
||||
tls: {
|
||||
rejectUnauthorized: false, // allow self-signed certs
|
||||
},
|
||||
logger: true, // optional: logs connection steps
|
||||
debug: true, // optional: debug SMTP connection
|
||||
});
|
||||
|
||||
|
||||
//
|
||||
// Send welcome / signup email
|
||||
//
|
||||
export async function sendSignupMail(toEmail) {
|
||||
try {
|
||||
await mailer.sendMail({
|
||||
from: `"CrawlerX" <${process.env.SMTP_USER}>`,
|
||||
to: toEmail,
|
||||
subject: "Welcome to CrawlerX",
|
||||
html: `
|
||||
<h2>Welcome!</h2>
|
||||
<p>Your signup was successful. You can now log in and start using the app.</p>
|
||||
`,
|
||||
});
|
||||
console.log(`✅ Signup email sent to ${toEmail}`);
|
||||
} catch (err) {
|
||||
console.error("❌ Error sending signup email:", err);
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// Send reset-password email with 4-digit code or token link
|
||||
//
|
||||
export async function sendResetPasswordMail(email, token) {
|
||||
try {
|
||||
const resetURL = `${process.env.FRONTEND_URL}/reset-password?email=${email}&token=${token}`;
|
||||
await mailer.sendMail({
|
||||
from: `"CrawlerX" <${process.env.SMTP_USER}>`,
|
||||
to: email,
|
||||
subject: "Reset your password",
|
||||
html: `
|
||||
<p>You requested a password reset.</p>
|
||||
<p>Click here to reset: <a href="${resetURL}">${resetURL}</a></p>
|
||||
<p>This link is valid for 1 hour.</p>
|
||||
`,
|
||||
});
|
||||
console.log(`✅ Reset password email sent to ${email}`);
|
||||
} catch (err) {
|
||||
console.error("❌ Error sending reset password email:", err);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
export const sendCakeOrderMail = async (toEmail, orderData) => {
|
||||
try {
|
||||
const transporter = nodemailer.createTransport({
|
||||
host: "mail.crawlerx.co",
|
||||
port: 587,
|
||||
secure: false, // use TLS? false for port 587
|
||||
auth: {
|
||||
user: "info@crawlerx.co",
|
||||
pass: "CrawlerX@2025",
|
||||
},
|
||||
tls: {
|
||||
rejectUnauthorized: false, // <--- allow self-signed certificate
|
||||
},
|
||||
});
|
||||
|
||||
const orderItems = Object.entries(orderData)
|
||||
.map(([category, flavours]) => {
|
||||
const items = Object.entries(flavours)
|
||||
.map(([flavour, qty]) => `• ${flavour}: ${qty}`)
|
||||
.join("\n");
|
||||
return `${category}:\n${items}`;
|
||||
})
|
||||
.join("\n\n");
|
||||
|
||||
const mailOptions = {
|
||||
from: `"Maison de Treats" <info@crawlerx.co>`,
|
||||
to: toEmail,
|
||||
subject: "🎉 Your Cake Order Confirmation",
|
||||
text: `Thank you for your order! Here are the details:\n\n${orderItems}`,
|
||||
html: `<h2>Thank you for your order!</h2>
|
||||
<p>Here are your cake order details:</p>
|
||||
<pre>${orderItems}</pre>`,
|
||||
};
|
||||
|
||||
await transporter.sendMail(mailOptions);
|
||||
console.log("Cake order email sent to", toEmail);
|
||||
} catch (err) {
|
||||
console.error("Failed to send cake order email:", err);
|
||||
}
|
||||
};
|
||||
20
utils/sitemap.js
Normal file
20
utils/sitemap.js
Normal file
@ -0,0 +1,20 @@
|
||||
import Sitemapper from "sitemapper";
|
||||
import normalizeUrl from "normalize-url";
|
||||
|
||||
/** Return an array of normalized URLs from <origin>/sitemap.xml (or [] if none). */
|
||||
export async function getSitemapUrls(startUrl) {
|
||||
const origin = new URL(startUrl).origin;
|
||||
const sitemapUrl = `${origin}/sitemap.xml`;
|
||||
const sm = new Sitemapper({ url: sitemapUrl, timeout: 15000 });
|
||||
|
||||
try {
|
||||
const { sites } = await sm.fetch();
|
||||
const out = [];
|
||||
for (const u of sites || []) {
|
||||
try { out.push(normalizeUrl(u, { stripHash: true })); } catch {}
|
||||
}
|
||||
return out;
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
8
utils/stripe.js
Normal file
8
utils/stripe.js
Normal file
@ -0,0 +1,8 @@
|
||||
import Stripe from "stripe";
|
||||
import dotenv from "dotenv";
|
||||
|
||||
dotenv.config();
|
||||
|
||||
export const stripe = new Stripe(process.env.STRIPE_SECRET_KEY, {
|
||||
apiVersion: "2024-06-20",
|
||||
});
|
||||
11
utils/urlHelpers.js
Normal file
11
utils/urlHelpers.js
Normal file
@ -0,0 +1,11 @@
|
||||
export function isInternal(base, candidate) {
|
||||
try {
|
||||
const baseUrl = new URL(base);
|
||||
const testUrl = new URL(candidate, base);
|
||||
const protocolOk = testUrl.protocol === "http:" || testUrl.protocol === "https:";
|
||||
const stripWWW = (h) => h.replace(/^www\./i, "");
|
||||
return protocolOk && stripWWW(baseUrl.hostname) === stripWWW(testUrl.hostname);
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user