Previously only lived on servers at /opt/playwright/. Now tracked in repo. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
71 lines
2.2 KiB
JavaScript
71 lines
2.2 KiB
JavaScript
const { chromium } = require("playwright");
|
|
|
|
(async () => {
|
|
const url = process.argv[2];
|
|
const wait = parseInt(process.argv[3] || 3000);
|
|
|
|
if (!url) {
|
|
console.log(JSON.stringify({ error: "URL required" }));
|
|
process.exit(1);
|
|
}
|
|
|
|
const browser = await chromium.launch({ headless: true });
|
|
const context = await browser.newContext({
|
|
userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
|
});
|
|
const page = await context.newPage();
|
|
|
|
// Track image URLs as they load
|
|
const images = new Set();
|
|
page.on("response", response => {
|
|
const ct = response.headers()["content-type"] || "";
|
|
if (ct.includes("image/")) {
|
|
images.add(response.url());
|
|
}
|
|
});
|
|
|
|
try {
|
|
// Use load instead of networkidle - more reliable for sites with persistent connections
|
|
await page.goto(url, { waitUntil: "load", timeout: 45000 });
|
|
|
|
// Wait for initial JS rendering
|
|
await page.waitForTimeout(wait);
|
|
|
|
// Scroll the page to trigger lazy-loaded images (DoorDash, etc.)
|
|
const scrollHeight = await page.evaluate(() => document.body.scrollHeight);
|
|
const viewportHeight = await page.evaluate(() => window.innerHeight);
|
|
const scrollSteps = Math.min(Math.ceil(scrollHeight / viewportHeight), 20);
|
|
|
|
for (let i = 0; i < scrollSteps; i++) {
|
|
await page.evaluate((step) => {
|
|
window.scrollTo(0, step * window.innerHeight);
|
|
}, i + 1);
|
|
await page.waitForTimeout(300);
|
|
}
|
|
|
|
// Scroll back to top and wait for any final images
|
|
await page.evaluate(() => window.scrollTo(0, 0));
|
|
await page.waitForTimeout(1000);
|
|
|
|
// Extract images from DOM as well
|
|
const domImages = await page.evaluate(() => {
|
|
return Array.from(document.querySelectorAll("img"))
|
|
.map(img => img.src)
|
|
.filter(src => src && src.startsWith("http"));
|
|
});
|
|
|
|
domImages.forEach(img => images.add(img));
|
|
|
|
const html = await page.content();
|
|
|
|
console.log(JSON.stringify({
|
|
html: html,
|
|
images: Array.from(images),
|
|
url: url
|
|
}));
|
|
} catch (e) {
|
|
console.log(JSON.stringify({ error: e.message, url: url }));
|
|
}
|
|
|
|
await browser.close();
|
|
})();
|