const { chromium } = require("playwright"); (async () => { const url = process.argv[2]; const wait = parseInt(process.argv[3] || 3000); if (!url) { console.log(JSON.stringify({ error: "URL required" })); process.exit(1); } const browser = await chromium.launch({ headless: true }); const context = await browser.newContext({ userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" }); const page = await context.newPage(); // Track image URLs as they load const images = new Set(); page.on("response", response => { const ct = response.headers()["content-type"] || ""; if (ct.includes("image/")) { images.add(response.url()); } }); try { // Use load instead of networkidle - more reliable for sites with persistent connections await page.goto(url, { waitUntil: "load", timeout: 45000 }); // Wait for initial JS rendering await page.waitForTimeout(wait); // Scroll the page to trigger lazy-loaded images (DoorDash, etc.) const scrollHeight = await page.evaluate(() => document.body.scrollHeight); const viewportHeight = await page.evaluate(() => window.innerHeight); const scrollSteps = Math.min(Math.ceil(scrollHeight / viewportHeight), 20); for (let i = 0; i < scrollSteps; i++) { await page.evaluate((step) => { window.scrollTo(0, step * window.innerHeight); }, i + 1); await page.waitForTimeout(300); } // Scroll back to top and wait for any final images await page.evaluate(() => window.scrollTo(0, 0)); await page.waitForTimeout(1000); // Extract images from DOM as well const domImages = await page.evaluate(() => { return Array.from(document.querySelectorAll("img")) .map(img => img.src) .filter(src => src && src.startsWith("http")); }); domImages.forEach(img => images.add(img)); const html = await page.content(); console.log(JSON.stringify({ html: html, images: Array.from(images), url: url })); } catch (e) { console.log(JSON.stringify({ error: e.message, url: url })); } await browser.close(); })();