// š Full SEO + Broken Link + 404 + Accessibility + Image Alt CSV Export
// Run with: node seo_full_audit.js
const { Builder, By } = require("selenium-webdriver");
const chrome = require("selenium-webdriver/chrome");
const axios = require("axios");
const xml2js = require("xml2js");
const fs = require("fs");
const path = require("path");
// CSV file for Image Alt issues
const csvPath = path.join(__dirname, "image_alt_issues.csv");
fs.writeFileSync(csvPath, "Page URL,Image Src,Alt Text,Issue Type\n", "utf8");
// ==========================
// 1ļøā£ Fetch URLs from sitemap.xml
// ==========================
async function getUrlsFromSitemap(sitemapUrl) {
try {
const res = await axios.get(sitemapUrl);
const parsed = await xml2js.parseStringPromise(res.data);
return parsed.urlset.url.map((u) => u.loc[0]);
} catch (err) {
console.error("ā Failed to load sitemap:", err.message);
return [];
}
}
// ==========================
// 2ļøā£ Check HTTP Status
// ==========================
async function checkLinkStatus(url) {
try {
const res = await axios.get(url, {
timeout: 10000,
validateStatus: () => true,
});
if (
res.status === 200 &&
(
/404/i.test(res.data.match(/
]*>(.*?)<\/title>/)?.[1] ?? "")
)
) {
return "Soft 404";
}
return res.status;
} catch (err) {
return err.response ? err.response.status : "ā No Response";
}
}
// ==========================
// 3ļøā£ Main SEO + Accessibility + Image Alt Audit
// ==========================
async function checkSEO(url, siteDomain) {
const options = new chrome.Options();
options.addArguments("--headless", "--no-sandbox", "--disable-gpu");
const driver = await new Builder()
.forBrowser("chrome")
.setChromeOptions(options)
.build();
try {
const pageStatus = await checkLinkStatus(url);
if (pageStatus === 404 || pageStatus === "Soft 404") {
console.log(`\nš« ${url} ā ā Page not found (${pageStatus})`);
return;
}
await driver.get(url);
const pageSource = await driver.getPageSource();
// Basic SEO Elements
const title = await driver.getTitle();
const descElem = await driver.findElements(By.css('meta[name="description"]'));
const canonicalElem = await driver.findElements(By.css('link[rel="canonical"]'));
const robotsElem = await driver.findElements(By.css('meta[name="robots"]'));
const viewportElem = await driver.findElements(By.css('meta[name="viewport"]'));
const charset = await driver.findElements(By.css('meta[charset]'));
const htmlTag = await driver.findElement(By.css("html"));
const langAttr = await htmlTag.getAttribute("lang").catch(() => "");
const h1Tags = await driver.findElements(By.css("h1"));
const h2Tags = await driver.findElements(By.css("h2"));
// Meta Description
let descContent = descElem.length > 0 ? await descElem[0].getAttribute("content") : "";
const descLength = descContent.length;
const descStatus =
descLength === 0
? "ā Missing"
: descLength < 50
? `ā ļø Too short (${descLength})`
: descLength > 160
? `ā ļø Too long (${descLength})`
: "ā
Perfect";
// Title length check
const titleLength = title.length;
const titleStatus =
titleLength === 0
? "ā Missing"
: titleLength < 30
? `ā ļø Too short (${titleLength})`
: titleLength > 65
? `ā ļø Too long (${titleLength})`
: "ā
Perfect";
// Canonical
const canonicalURL =
canonicalElem.length > 0 ? await canonicalElem[0].getAttribute("href") : "ā Missing";
// š¼ļø Image Accessibility Audit
const imgs = await driver.findElements(By.css("img"));
let missingAlt = 0;
let emptyAlt = 0;
let duplicateAlt = [];
const altTextMap = new Map();
for (const img of imgs) {
const src = await img.getAttribute("src");
const alt = (await img.getAttribute("alt"))?.trim() ?? null;
if (alt === null) {
missingAlt++;
fs.appendFileSync(csvPath, `"${url}","${src}","","Missing Alt"\n`, "utf8");
continue;
}
if (alt === "") {
emptyAlt++;
fs.appendFileSync(csvPath, `"${url}","${src}","(empty)","Empty Alt"\n`, "utf8");
}
if (altTextMap.has(alt)) {
altTextMap.set(alt, altTextMap.get(alt) + 1);
} else {
altTextMap.set(alt, 1);
}
}
for (const [altText, count] of altTextMap.entries()) {
if (altText && count > 1) {
duplicateAlt.push({ altText, count });
fs.appendFileSync(
csvPath,
`"${url}","","${altText}","Duplicate Alt (${count} times)"\n`,
"utf8"
);
}
}
// Detect tracking & schema tags
const hasGTM = pageSource.includes("googletagmanager.com/gtm.js");
const hasClarity = pageSource.includes("clarity.ms/tag");
const hasFBPixel = pageSource.includes("fbevents.js") || pageSource.includes("fbq(");
const hasAnalytics = pageSource.includes("www.googletagmanager.com/gtag/js");
const ogTags = await driver.findElements(By.css("meta[property^='og:']"));
const twitterTags = await driver.findElements(By.css("meta[name^='twitter:']"));
const schemaScripts = await driver.findElements(By.css('script[type="application/ld+json"]'));
// Links check
const anchorTags = await driver.findElements(By.css("a[href]"));
const brokenLinks = [];
for (const a of anchorTags) {
const href = await a.getAttribute("href");
if (!href || href.startsWith("#") || href.startsWith("mailto:")) continue;
const fullUrl = href.startsWith("http")
? href
: `${siteDomain}${href.startsWith("/") ? href : `/${href}`}`;
if (fullUrl.includes(siteDomain)) {
const status = await checkLinkStatus(fullUrl);
if (status === 404 || status === "Soft 404" || status === "ā No Response") {
brokenLinks.push({ link: fullUrl, status });
}
}
}
// Lazy loading check
const images = await driver.findElements(By.css("img, video, iframe"));
const lazyLoadCount = await Promise.all(
images.map(async (img) => {
const loading = await img.getAttribute("loading");
return loading === "lazy";
})
);
const lazyLoaded = lazyLoadCount.filter((v) => v).length;
// Console Summary
console.log(`\nš Checking: ${url}`);
console.log("-------------------------------------------");
console.log("Title:", titleStatus);
console.log("Meta Description:", descStatus);
console.log("Canonical URL:", canonicalURL);
console.log("Meta Robots:", robotsElem.length > 0 ? "ā
Found" : "ā ļø Missing");
console.log("Viewport:", viewportElem.length > 0 ? "ā
Found" : "ā ļø Missing");
console.log("Charset:", charset.length > 0 ? "ā
Found" : "ā Missing");
console.log("HTML lang:", langAttr ? `ā
${langAttr}` : "ā ļø Missing");
console.log("H1 Tags:", h1Tags.length > 0 ? `ā
${h1Tags.length}` : "ā Missing");
console.log("H2 Tags:", h2Tags.length > 0 ? `ā¹ļø ${h2Tags.length}` : "ā ļø None");
console.log("Images:", imgs.length);
console.log(
"Missing Alt:",
missingAlt > 0 ? `ā ${missingAlt}` : "ā
None"
);
console.log(
"Empty Alt:",
emptyAlt > 0 ? `ā ļø ${emptyAlt}` : "ā
None"
);
console.log(
"Duplicate Alt:",
duplicateAlt.length > 0 ? `ā ļø ${duplicateAlt.length}` : "ā
None"
);
console.log("Lazy Loaded Images:", lazyLoaded > 0 ? `ā
${lazyLoaded}` : "ā ļø None");
console.log("Open Graph Tags:", ogTags.length > 0 ? "ā
Found" : "ā ļø Missing");
console.log("Twitter Tags:", twitterTags.length > 0 ? "ā
Found" : "ā ļø Missing");
console.log("Schema Markup:", schemaScripts.length > 0 ? "ā
Found" : "ā ļø Missing");
console.log("Google Analytics:", hasAnalytics ? "ā
Found" : "ā ļø Missing");
console.log("GTM:", hasGTM ? "ā
Found" : "ā ļø Missing");
console.log("Clarity:", hasClarity ? "ā
Found" : "ā ļø Missing");
console.log("Facebook Pixel:", hasFBPixel ? "ā
Found" : "ā ļø Missing");
if (brokenLinks.length > 0) {
console.log("\nā Broken Links:");
brokenLinks.forEach((b) => console.log(` ā ${b.link} [${b.status}]`));
} else {
console.log("ā
No broken links found.");
}
} catch (err) {
console.error(`ā Error on ${url}:`, err.message);
} finally {
await driver.quit();
}
}
// ==========================
// 4ļøā£ Run Full Site Audit
// ==========================
(async () => {
const sitemapUrl = "https://rapharehab.ca/sitemap.xml";
const siteDomain = "http://localhost:3000";
console.log("š Fetching URLs from sitemap...");
const urls = await getUrlsFromSitemap(sitemapUrl);
if (urls.length === 0) {
console.error("ā No URLs found in sitemap.");
return;
}
console.log(`ā
Found ${urls.length} URLs in sitemap.`);
console.log("š Starting Full SEO + Accessibility + Broken Link Audit...");
for (const url of urls) {
await checkSEO(url, siteDomain);
}
console.log("\nā
Full SEO Audit Completed!");
console.log(`š CSV Report: ${csvPath}`);
})();