Add 24h cache to motousher and dirtstreet fetchWebsiteData
Subsequent pipeline runs within 24 hours reuse the existing 01_products_aggregated.json instead of re-scraping all brands, eliminating redundant HTTP requests and 429 rate-limit retries. Cache lifetime controlled per source: MOTOUSHER_CACHE_HOURS=0 → always re-scrape DIRTSTREET_CACHE_HOURS=0 → always re-scrape (default: 24h) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
6eac0b92ed
commit
4e536f08b3
@ -22,6 +22,35 @@ function paths() {
|
||||
}
|
||||
|
||||
async function fetchWebsiteData({ paths: runPaths }) {
|
||||
// Use cached aggregated JSON if it exists and is fresh (default 24h).
|
||||
// Set DIRTSTREET_CACHE_HOURS=0 to always force a full re-scrape.
|
||||
const cacheMaxHours = Number(process.env.DIRTSTREET_CACHE_HOURS ?? 24);
|
||||
const absAggregatedPath = path.resolve(process.cwd(), runPaths.aggregatedJson);
|
||||
|
||||
if (cacheMaxHours > 0) {
|
||||
try {
|
||||
const existing = JSON.parse(await fs.readFile(absAggregatedPath, "utf8"));
|
||||
if (existing?.generatedAt && Array.isArray(existing?.products) && existing.products.length > 0) {
|
||||
const ageMs = Date.now() - new Date(existing.generatedAt).getTime();
|
||||
const ageHours = ageMs / (1000 * 60 * 60);
|
||||
if (ageHours < cacheMaxHours) {
|
||||
console.log(
|
||||
`[DIRTSTREET] Using cached data (${ageHours.toFixed(1)}h old, limit ${cacheMaxHours}h) — ${existing.products.length} products. Set DIRTSTREET_CACHE_HOURS=0 to force re-scrape.`
|
||||
);
|
||||
return {
|
||||
analysis: existing.analysis || {},
|
||||
outputJsonPath: absAggregatedPath,
|
||||
historyPath: path.resolve(process.cwd(), runPaths.historyJson),
|
||||
cached: true,
|
||||
};
|
||||
}
|
||||
console.log(`[DIRTSTREET] Cache expired (${ageHours.toFixed(1)}h old). Re-scraping all brands...`);
|
||||
}
|
||||
} catch {
|
||||
// no cache file yet, proceed with scraping
|
||||
}
|
||||
}
|
||||
|
||||
const brandsToScrape = process.env.DIRTSTREET_BRANDS
|
||||
? BRANDS.filter((b) => process.env.DIRTSTREET_BRANDS.split(",").map((s) => s.trim()).includes(b.slug))
|
||||
: BRANDS;
|
||||
|
||||
@ -22,6 +22,35 @@ function paths() {
|
||||
}
|
||||
|
||||
async function fetchWebsiteData({ paths: runPaths }) {
|
||||
// Use cached aggregated JSON if it exists and is fresh (default 24h).
|
||||
// Set MOTOUSHER_CACHE_HOURS=0 to always force a full re-scrape.
|
||||
const cacheMaxHours = Number(process.env.MOTOUSHER_CACHE_HOURS ?? 24);
|
||||
const absAggregatedPath = path.resolve(process.cwd(), runPaths.aggregatedJson);
|
||||
|
||||
if (cacheMaxHours > 0) {
|
||||
try {
|
||||
const existing = JSON.parse(await fs.readFile(absAggregatedPath, "utf8"));
|
||||
if (existing?.generatedAt && Array.isArray(existing?.products) && existing.products.length > 0) {
|
||||
const ageMs = Date.now() - new Date(existing.generatedAt).getTime();
|
||||
const ageHours = ageMs / (1000 * 60 * 60);
|
||||
if (ageHours < cacheMaxHours) {
|
||||
console.log(
|
||||
`[MOTOUSHER] Using cached data (${ageHours.toFixed(1)}h old, limit ${cacheMaxHours}h) — ${existing.products.length} products. Set MOTOUSHER_CACHE_HOURS=0 to force re-scrape.`
|
||||
);
|
||||
return {
|
||||
analysis: existing.analysis || {},
|
||||
outputJsonPath: absAggregatedPath,
|
||||
historyPath: path.resolve(process.cwd(), runPaths.historyJson),
|
||||
cached: true,
|
||||
};
|
||||
}
|
||||
console.log(`[MOTOUSHER] Cache expired (${ageHours.toFixed(1)}h old). Re-scraping all brands...`);
|
||||
}
|
||||
} catch {
|
||||
// no cache file yet, proceed with scraping
|
||||
}
|
||||
}
|
||||
|
||||
const brandsToScrape = process.env.MOTOUSHER_BRANDS
|
||||
? BRANDS.filter((b) => process.env.MOTOUSHER_BRANDS.split(",").map((s) => s.trim()).includes(b.slug))
|
||||
: BRANDS;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user