From 3d9084d848d08aa3dd1357d12e99fc9e68848c36 Mon Sep 17 00:00:00 2001 From: John Mizerek Date: Sat, 14 Mar 2026 20:22:10 -0700 Subject: [PATCH] Add platform_images mode with stealth Playwright for order.online - Stealth Playwright bypasses Cloudflare bot protection - Multiple selector strategies for menu item cards - Fuzzy name matching (exact, normalized, partial) - Background-image extraction as fallback - Script auto-created at /opt/playwright/platform-images.js Co-Authored-By: Claude Opus 4.6 --- api/setup/analyzeMenuUrl.php | 170 +++++++++++++++++++++++++++++++++++ 1 file changed, 170 insertions(+) diff --git a/api/setup/analyzeMenuUrl.php b/api/setup/analyzeMenuUrl.php index 15ffb5e..8da61e8 100644 --- a/api/setup/analyzeMenuUrl.php +++ b/api/setup/analyzeMenuUrl.php @@ -36,6 +36,176 @@ try { $data = readJsonBody(); if (empty($data)) throw new Exception('No request body provided'); + // ============================================================ + // PLATFORM IMAGES MODE: Use stealth Playwright to grab food photos + // ============================================================ + if (!empty($data['mode']) && $data['mode'] === 'platform_images' && !empty($data['url'])) { + $platformUrl = trim($data['url']); + $itemNames = $data['items'] ?? []; + + // Write item names to temp file for the stealth script + $tempFile = '/tmp/platform-items-' . uniqid() . '.json'; + file_put_contents($tempFile, json_encode($itemNames)); + + // Use stealth Playwright to bypass Cloudflare + $scriptPath = '/opt/playwright/platform-images.js'; + if (!file_exists($scriptPath)) { + // Create the stealth image extraction script + $script = <<<'JSEOF' +const { chromium } = require("playwright-extra"); +const stealth = require("puppeteer-extra-plugin-stealth"); +const fs = require("fs"); +chromium.use(stealth()); + +(async () => { + const url = process.argv[2]; + const itemsFile = process.argv[3]; + const log = (msg) => process.stderr.write("[platform-img] " + msg + "\n"); + + let itemNames = []; + if (itemsFile && fs.existsSync(itemsFile)) { + try { itemNames = JSON.parse(fs.readFileSync(itemsFile, "utf8")); } catch(e) {} + } + log("Looking for images for " + itemNames.length + " items at " + url); + + const browser = await chromium.launch({ headless: true }); + const page = await browser.newPage({ + userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" + }); + + try { + await page.goto(url, { waitUntil: "domcontentloaded", timeout: 30000 }); + await page.waitForTimeout(8000); + + // Scroll to load lazy images + for (let i = 0; i < 10; i++) { + await page.evaluate((s) => window.scrollTo(0, s * window.innerHeight), i + 1); + await page.waitForTimeout(500); + } + await page.evaluate(() => window.scrollTo(0, 0)); + await page.waitForTimeout(2000); + + // Try multiple selector strategies for menu item cards + const imageMap = await page.evaluate(() => { + const pairs = {}; + // Strategy 1: Cards with name + image + const selectors = [ + '[data-testid*="MenuItem"]', '[class*="menu-item"]', '[class*="MenuItem"]', + '[class*="product-card"]', '[class*="item-card"]', 'article', '.menuItem', + '[class*="StoreMenuItem"]', '[class*="menu_item"]', + '[class*="MenuProduct"]', '[class*="menuProduct"]', + '[class*="sc-"]' // styled-components + ]; + for (const sel of selectors) { + const cards = document.querySelectorAll(sel); + for (const card of cards) { + const nameEl = card.querySelector('h3, h4, h2, [class*="name"], [class*="title"], [class*="Name"], [class*="Title"], span[class]'); + const imgEl = card.querySelector('img[src*="http"], [style*="background-image"]'); + if (nameEl && imgEl) { + const name = nameEl.textContent.trim(); + let imgUrl = imgEl.src || ''; + if (!imgUrl && imgEl.style.backgroundImage) { + imgUrl = imgEl.style.backgroundImage.replace(/url\(["']?/, '').replace(/["']?\)/, ''); + } + if (name && imgUrl && imgUrl.startsWith('http')) { + pairs[name] = imgUrl; + } + } + } + if (Object.keys(pairs).length > 5) break; + } + + // Strategy 2: All images with alt text + if (Object.keys(pairs).length === 0) { + for (const img of document.querySelectorAll('img[src*="http"]')) { + if (/(icon|favicon|logo|sprite|pixel|tracking|badge|button|\.svg|avatar)/i.test(img.src)) continue; + if (img.naturalWidth > 80 && img.naturalHeight > 80 && img.alt && img.alt.length > 2) { + pairs[img.alt.trim()] = img.src; + } + } + } + + // Strategy 3: Background images in divs near text + if (Object.keys(pairs).length === 0) { + const allDivs = document.querySelectorAll('div[style*="background-image"]'); + for (const div of allDivs) { + const bgUrl = div.style.backgroundImage.replace(/url\(["']?/, '').replace(/["']?\)/, ''); + if (!bgUrl.startsWith('http')) continue; + const textEl = div.closest('[class]')?.querySelector('h3, h4, span, p'); + if (textEl) { + const name = textEl.textContent.trim(); + if (name.length > 2 && name.length < 60) { + pairs[name] = bgUrl; + } + } + } + } + + return pairs; + }); + + log("Found " + Object.keys(imageMap).length + " image pairs"); + + // Fuzzy match item names to found images + const result = {}; + const normalize = (s) => s.toLowerCase().replace(/[^a-z0-9]/g, ''); + const imageKeys = Object.keys(imageMap); + + for (const itemName of itemNames) { + const normItem = normalize(itemName); + // Exact match first + if (imageMap[itemName]) { + result[itemName] = imageMap[itemName]; + continue; + } + // Normalized match + for (const key of imageKeys) { + if (normalize(key) === normItem) { + result[itemName] = imageMap[key]; + break; + } + } + // Partial match (item name contained in key or vice versa) + if (!result[itemName]) { + for (const key of imageKeys) { + const normKey = normalize(key); + if (normKey.includes(normItem) || normItem.includes(normKey)) { + result[itemName] = imageMap[key]; + break; + } + } + } + } + + log("Matched " + Object.keys(result).length + " items to images"); + console.log(JSON.stringify({ imageMap: result, totalFound: Object.keys(imageMap).length })); + } catch (e) { + log("Error: " + e.message); + console.log(JSON.stringify({ imageMap: {}, totalFound: 0, error: e.message })); + } + + await browser.close(); +})(); +JSEOF; + file_put_contents($scriptPath, $script); + } + + $output = shell_exec("PLAYWRIGHT_BROWSERS_PATH=/opt/playwright/browsers /usr/bin/node $scriptPath " . escapeshellarg($platformUrl) . " " . escapeshellarg($tempFile) . " 2>&1"); + @unlink($tempFile); + + $result = json_decode(trim($output ?? ''), true); + if (!is_array($result)) { + $result = ['imageMap' => [], 'totalFound' => 0]; + } + + jsonResponse([ + 'OK' => true, + 'mode' => 'platform_images', + 'imageMap' => $result['imageMap'] ?? [], + 'totalFound' => $result['totalFound'] ?? 0, + ]); + } + // ============================================================ // DISCOVERY MODE: Crawl site, detect menu sub-pages, return without Claude // ============================================================