Add platform_images mode with stealth Playwright for order.online

- Stealth Playwright bypasses Cloudflare bot protection
- Multiple selector strategies for menu item cards
- Fuzzy name matching (exact, normalized, partial)
- Background-image extraction as fallback
- Script auto-created at /opt/playwright/platform-images.js

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
John Mizerek 2026-03-14 20:22:10 -07:00
parent 24849c01e4
commit 3d9084d848

View file

@ -36,6 +36,176 @@ try {
$data = readJsonBody(); $data = readJsonBody();
if (empty($data)) throw new Exception('No request body provided'); if (empty($data)) throw new Exception('No request body provided');
// ============================================================
// PLATFORM IMAGES MODE: Use stealth Playwright to grab food photos
// ============================================================
if (!empty($data['mode']) && $data['mode'] === 'platform_images' && !empty($data['url'])) {
$platformUrl = trim($data['url']);
$itemNames = $data['items'] ?? [];
// Write item names to temp file for the stealth script
$tempFile = '/tmp/platform-items-' . uniqid() . '.json';
file_put_contents($tempFile, json_encode($itemNames));
// Use stealth Playwright to bypass Cloudflare
$scriptPath = '/opt/playwright/platform-images.js';
if (!file_exists($scriptPath)) {
// Create the stealth image extraction script
$script = <<<'JSEOF'
const { chromium } = require("playwright-extra");
const stealth = require("puppeteer-extra-plugin-stealth");
const fs = require("fs");
chromium.use(stealth());
(async () => {
const url = process.argv[2];
const itemsFile = process.argv[3];
const log = (msg) => process.stderr.write("[platform-img] " + msg + "\n");
let itemNames = [];
if (itemsFile && fs.existsSync(itemsFile)) {
try { itemNames = JSON.parse(fs.readFileSync(itemsFile, "utf8")); } catch(e) {}
}
log("Looking for images for " + itemNames.length + " items at " + url);
const browser = await chromium.launch({ headless: true });
const page = await browser.newPage({
userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
});
try {
await page.goto(url, { waitUntil: "domcontentloaded", timeout: 30000 });
await page.waitForTimeout(8000);
// Scroll to load lazy images
for (let i = 0; i < 10; i++) {
await page.evaluate((s) => window.scrollTo(0, s * window.innerHeight), i + 1);
await page.waitForTimeout(500);
}
await page.evaluate(() => window.scrollTo(0, 0));
await page.waitForTimeout(2000);
// Try multiple selector strategies for menu item cards
const imageMap = await page.evaluate(() => {
const pairs = {};
// Strategy 1: Cards with name + image
const selectors = [
'[data-testid*="MenuItem"]', '[class*="menu-item"]', '[class*="MenuItem"]',
'[class*="product-card"]', '[class*="item-card"]', 'article', '.menuItem',
'[class*="StoreMenuItem"]', '[class*="menu_item"]',
'[class*="MenuProduct"]', '[class*="menuProduct"]',
'[class*="sc-"]' // styled-components
];
for (const sel of selectors) {
const cards = document.querySelectorAll(sel);
for (const card of cards) {
const nameEl = card.querySelector('h3, h4, h2, [class*="name"], [class*="title"], [class*="Name"], [class*="Title"], span[class]');
const imgEl = card.querySelector('img[src*="http"], [style*="background-image"]');
if (nameEl && imgEl) {
const name = nameEl.textContent.trim();
let imgUrl = imgEl.src || '';
if (!imgUrl && imgEl.style.backgroundImage) {
imgUrl = imgEl.style.backgroundImage.replace(/url\(["']?/, '').replace(/["']?\)/, '');
}
if (name && imgUrl && imgUrl.startsWith('http')) {
pairs[name] = imgUrl;
}
}
}
if (Object.keys(pairs).length > 5) break;
}
// Strategy 2: All images with alt text
if (Object.keys(pairs).length === 0) {
for (const img of document.querySelectorAll('img[src*="http"]')) {
if (/(icon|favicon|logo|sprite|pixel|tracking|badge|button|\.svg|avatar)/i.test(img.src)) continue;
if (img.naturalWidth > 80 && img.naturalHeight > 80 && img.alt && img.alt.length > 2) {
pairs[img.alt.trim()] = img.src;
}
}
}
// Strategy 3: Background images in divs near text
if (Object.keys(pairs).length === 0) {
const allDivs = document.querySelectorAll('div[style*="background-image"]');
for (const div of allDivs) {
const bgUrl = div.style.backgroundImage.replace(/url\(["']?/, '').replace(/["']?\)/, '');
if (!bgUrl.startsWith('http')) continue;
const textEl = div.closest('[class]')?.querySelector('h3, h4, span, p');
if (textEl) {
const name = textEl.textContent.trim();
if (name.length > 2 && name.length < 60) {
pairs[name] = bgUrl;
}
}
}
}
return pairs;
});
log("Found " + Object.keys(imageMap).length + " image pairs");
// Fuzzy match item names to found images
const result = {};
const normalize = (s) => s.toLowerCase().replace(/[^a-z0-9]/g, '');
const imageKeys = Object.keys(imageMap);
for (const itemName of itemNames) {
const normItem = normalize(itemName);
// Exact match first
if (imageMap[itemName]) {
result[itemName] = imageMap[itemName];
continue;
}
// Normalized match
for (const key of imageKeys) {
if (normalize(key) === normItem) {
result[itemName] = imageMap[key];
break;
}
}
// Partial match (item name contained in key or vice versa)
if (!result[itemName]) {
for (const key of imageKeys) {
const normKey = normalize(key);
if (normKey.includes(normItem) || normItem.includes(normKey)) {
result[itemName] = imageMap[key];
break;
}
}
}
}
log("Matched " + Object.keys(result).length + " items to images");
console.log(JSON.stringify({ imageMap: result, totalFound: Object.keys(imageMap).length }));
} catch (e) {
log("Error: " + e.message);
console.log(JSON.stringify({ imageMap: {}, totalFound: 0, error: e.message }));
}
await browser.close();
})();
JSEOF;
file_put_contents($scriptPath, $script);
}
$output = shell_exec("PLAYWRIGHT_BROWSERS_PATH=/opt/playwright/browsers /usr/bin/node $scriptPath " . escapeshellarg($platformUrl) . " " . escapeshellarg($tempFile) . " 2>&1");
@unlink($tempFile);
$result = json_decode(trim($output ?? ''), true);
if (!is_array($result)) {
$result = ['imageMap' => [], 'totalFound' => 0];
}
jsonResponse([
'OK' => true,
'mode' => 'platform_images',
'imageMap' => $result['imageMap'] ?? [],
'totalFound' => $result['totalFound'] ?? 0,
]);
}
// ============================================================ // ============================================================
// DISCOVERY MODE: Crawl site, detect menu sub-pages, return without Claude // DISCOVERY MODE: Crawl site, detect menu sub-pages, return without Claude
// ============================================================ // ============================================================