This repository has been archived on 2026-03-21. You can view files and clone it, but cannot push or open issues or pull requests.
payfrit-biz/playwright/doordash-modifiers.js
John Mizerek b14f26ed47 Add DoorDash modifier extraction via stealth Playwright
- New doordash-modifiers.js: stealth Playwright script that clicks each
  menu item on a DoorDash page, captures itemPage GraphQL responses,
  and extracts optionLists (modifier groups with options and prices)
- Wire modifier extraction into DoorDash fast-path in analyzeMenuUrl.cfm:
  after parsing items/categories, runs modifier script and maps results
- Improved business info extraction: address, phone, and hours now use
  position-based parsing of StoreHeaderAddress, StoreHeaderPhoneNumber,
  and StoreOperationHoursRange embedded data (fixes intermittent missing info)
- Add playwright-extra and stealth plugin to package.json

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-10 12:58:00 -07:00

271 lines
10 KiB
JavaScript

const { chromium } = require("playwright-extra");
const stealth = require("puppeteer-extra-plugin-stealth");
chromium.use(stealth());
(async () => {
const url = process.argv[2];
if (!url) {
console.log(JSON.stringify({ error: "URL required", modifiers: [], itemModifierMap: {} }));
process.exit(1);
}
const log = (msg) => process.stderr.write("[dd-mod] " + msg + "\n");
let browser;
try {
browser = await chromium.launch({ headless: true });
const context = await browser.newContext({
userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
viewport: { width: 1280, height: 900 }
});
const page = await context.newPage();
// Intercept itemPage GraphQL responses
let latestItemPage = null;
let responseCount = 0;
page.on("response", async (response) => {
try {
const responseUrl = response.url();
if (responseUrl.includes("graphql") || responseUrl.includes("api/v2")) {
const ct = response.headers()["content-type"] || "";
if (ct.includes("json")) {
const body = await response.json();
// DoorDash itemPage response structure
if (body && body.data && body.data.itemPage) {
latestItemPage = body.data.itemPage;
responseCount++;
}
// Some DoorDash endpoints wrap in array
if (Array.isArray(body)) {
for (const entry of body) {
if (entry && entry.data && entry.data.itemPage) {
latestItemPage = entry.data.itemPage;
responseCount++;
}
}
}
}
}
} catch (e) {}
});
log("Navigating to " + url);
await page.goto(url, { waitUntil: "load", timeout: 60000 });
await page.waitForTimeout(5000);
// Scroll to load all items
const scrollHeight = await page.evaluate(() => document.body.scrollHeight);
const viewportHeight = await page.evaluate(() => window.innerHeight);
const scrollSteps = Math.min(Math.ceil(scrollHeight / viewportHeight), 20);
for (let i = 0; i < scrollSteps; i++) {
await page.evaluate((step) => {
window.scrollTo(0, step * window.innerHeight);
}, i + 1);
await page.waitForTimeout(300);
}
await page.evaluate(() => window.scrollTo(0, 0));
await page.waitForTimeout(1000);
// Find all clickable menu item elements
// DoorDash renders items as buttons/anchors with item names and images
const itemElements = await page.evaluate(() => {
const items = [];
const seen = new Set();
// Strategy 1: Look for item cards with data-anchor-id containing "MenuItem"
document.querySelectorAll('[data-anchor-id*="MenuItem"]').forEach(el => {
const nameEl = el.querySelector('[data-telemetry-id="storeMenuItem.title"]') ||
el.querySelector('span[class*="Text"]') ||
el.querySelector('h3') ||
el.querySelector('span');
if (nameEl) {
const name = nameEl.textContent.trim();
if (name && !seen.has(name) && name.length > 1 && name.length < 200) {
seen.add(name);
const rect = el.getBoundingClientRect();
if (rect.width > 0 && rect.height > 0) {
items.push({ name, x: rect.x + rect.width / 2, y: rect.y + rect.height / 2 });
}
}
}
});
// Strategy 2: Look for buttons/divs containing item names with prices
if (items.length === 0) {
document.querySelectorAll('button, [role="button"], [data-testid*="item"], [data-testid*="menu"]').forEach(el => {
const text = el.textContent || "";
// Items typically have a price like $X.XX
if (text.match(/\$\d+\.\d{2}/) && text.length < 500) {
const lines = text.split("\n").map(l => l.trim()).filter(l => l.length > 0);
const name = lines[0];
if (name && !seen.has(name) && name.length > 1 && name.length < 200 && !name.startsWith("$")) {
seen.add(name);
const rect = el.getBoundingClientRect();
if (rect.width > 0 && rect.height > 0) {
items.push({ name, x: rect.x + rect.width / 2, y: rect.y + rect.height / 2 });
}
}
}
});
}
// Strategy 3: Generic - find any clickable element with an image and text nearby
if (items.length === 0) {
document.querySelectorAll('img[src*="cdn4dd"]').forEach(img => {
const parent = img.closest('a, button, [role="button"], [tabindex="0"]') || img.parentElement.parentElement;
if (parent) {
const nameEl = parent.querySelector('span, h3, h4, p');
if (nameEl) {
const name = nameEl.textContent.trim();
if (name && !seen.has(name) && name.length > 1 && name.length < 200) {
seen.add(name);
const rect = parent.getBoundingClientRect();
if (rect.width > 0 && rect.height > 0) {
items.push({ name, x: rect.x + rect.width / 2, y: rect.y + rect.height / 2 });
}
}
}
}
});
}
return items;
});
log("Found " + itemElements.length + " clickable items on page");
if (itemElements.length === 0) {
log("No clickable items found, trying fallback...");
// Take a screenshot for debugging
console.log(JSON.stringify({ error: "No clickable items found", modifiers: [], itemModifierMap: {} }));
await browser.close();
process.exit(0);
}
// Click each item, capture modifier data
const allModifierGroups = new Map(); // name -> modifier group data
const itemModifierMap = {}; // item name -> [modifier group names]
let clickedCount = 0;
let modItemCount = 0;
// Limit to prevent timeouts (DoorDash has many items)
const maxClicks = Math.min(itemElements.length, 200);
for (let i = 0; i < maxClicks; i++) {
const item = itemElements[i];
try {
latestItemPage = null;
// Scroll item into view and click by coordinates
await page.evaluate((y) => window.scrollTo(0, y - 300), item.y);
await page.waitForTimeout(200);
// Recalculate position after scroll
const freshPos = await page.evaluate((itemName) => {
const els = document.querySelectorAll('[data-anchor-id*="MenuItem"], button, [role="button"]');
for (const el of els) {
if (el.textContent.includes(itemName)) {
const rect = el.getBoundingClientRect();
if (rect.width > 0 && rect.height > 0) {
return { x: rect.x + rect.width / 2, y: rect.y + rect.height / 2, found: true };
}
}
}
return { found: false };
}, item.name);
if (!freshPos.found) {
continue;
}
await page.mouse.click(freshPos.x, freshPos.y);
clickedCount++;
// Wait for GraphQL response (up to 4s)
const startTime = Date.now();
while (!latestItemPage && Date.now() - startTime < 4000) {
await page.waitForTimeout(150);
}
if (latestItemPage && latestItemPage.optionLists && Array.isArray(latestItemPage.optionLists)) {
const optionLists = latestItemPage.optionLists;
if (optionLists.length > 0) {
const modNames = [];
for (const ol of optionLists) {
const olName = ol.name || "Options";
if (!allModifierGroups.has(olName)) {
const options = [];
if (ol.options && Array.isArray(ol.options)) {
for (const opt of ol.options) {
const price = opt.price ? (typeof opt.price === "number" ? opt.price / 100 : parseFloat(opt.price) || 0) : 0;
options.push({
name: opt.name || "",
price: price
});
}
}
allModifierGroups.set(olName, {
name: olName,
required: ol.isRequired || false,
minSelections: ol.minNumOptions || 0,
maxSelections: ol.maxNumOptions || 0,
options: options
});
}
modNames.push(olName);
}
itemModifierMap[item.name] = modNames;
modItemCount++;
}
}
// Close modal (press Escape or click outside)
await page.keyboard.press("Escape");
await page.waitForTimeout(400);
// Check if modal is still open, click overlay if so
const modalStillOpen = await page.evaluate(() => {
const overlay = document.querySelector('[data-testid="modal-overlay"], [class*="ModalOverlay"], [class*="overlay"]');
return !!overlay;
});
if (modalStillOpen) {
await page.mouse.click(10, 10);
await page.waitForTimeout(300);
}
} catch (e) {
log("Error clicking " + item.name + ": " + e.message);
try { await page.keyboard.press("Escape"); } catch (e2) {}
await page.waitForTimeout(300);
}
// Progress log every 20 items
if ((i + 1) % 20 === 0) {
log("Progress: " + (i + 1) + "/" + maxClicks + " clicked, " + modItemCount + " with modifiers");
}
}
log("Done: " + clickedCount + " clicked, " + modItemCount + " items with modifiers, " + allModifierGroups.size + " unique modifier groups");
const modifiers = Array.from(allModifierGroups.values());
console.log(JSON.stringify({
modifiers: modifiers,
itemModifierMap: itemModifierMap,
stats: {
clickableItems: itemElements.length,
clickedCount: clickedCount,
itemsWithModifiers: modItemCount,
uniqueModifierGroups: modifiers.length
}
}));
} catch (e) {
log("Fatal error: " + e.message);
console.log(JSON.stringify({ error: e.message, modifiers: [], itemModifierMap: {} }));
}
if (browser) await browser.close();
})();