const { chromium } = require("playwright-extra"); const stealth = require("puppeteer-extra-plugin-stealth"); const fs = require("fs"); chromium.use(stealth()); (async () => { const url = process.argv[2]; const itemsFile = process.argv[3]; // JSON file: [{id, name}, ...] or ["name1", ...] if (!url) { console.log(JSON.stringify({ error: "URL required", modifiers: [], itemModifierMap: {} })); process.exit(1); } const log = (msg) => process.stderr.write("[dd-mod] " + msg + "\n"); // Extract storeId from URL (e.g. /store/name-2545 -> 2545) const storeIdMatch = url.match(/[-/](\d+)/); const storeId = storeIdMatch ? storeIdMatch[1] : null; if (!storeId) { console.log(JSON.stringify({ error: "Could not extract storeId from URL", modifiers: [], itemModifierMap: {} })); process.exit(1); } log("Store ID: " + storeId); // Load item names/IDs if provided let providedItems = []; if (itemsFile && fs.existsSync(itemsFile)) { try { providedItems = JSON.parse(fs.readFileSync(itemsFile, "utf8")); log("Loaded " + providedItems.length + " items from file"); } catch (e) { log("Failed to load items file: " + e.message); } } let browser; try { browser = await chromium.launch({ headless: true }); const context = await browser.newContext({ userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36", viewport: { width: 1280, height: 900 } }); const page = await context.newPage(); // Capture the GraphQL query template from any itemPage request let queryTemplate = null; let capturedHeaders = null; page.on("request", (req) => { const pd = req.postData(); if (pd && pd.includes("itemPage") && pd.includes("optionLists")) { try { const parsed = JSON.parse(pd); queryTemplate = parsed.query; capturedHeaders = req.headers(); } catch(e) {} } }); log("Navigating to " + url); await page.goto(url, { waitUntil: "load", timeout: 60000 }); await page.waitForTimeout(5000); // Extract item IDs from embedded JSON in page HTML const html = await page.content(); const BQ = '\\"'; const marker = BQ + '__typename' + BQ + ':' + BQ + 'MenuPageItem' + BQ; const idKey = BQ + 'id' + BQ + ':' + BQ; const nameKey = BQ + 'name' + BQ + ':' + BQ; const embeddedItems = []; let pos = 0; while (embeddedItems.length < 500) { pos = html.indexOf(marker, pos); if (pos === -1) break; const idPos = html.indexOf(idKey, pos); if (idPos === -1 || idPos - pos > 200) { pos += marker.length; continue; } const idStart = idPos + idKey.length; const idEnd = html.indexOf(BQ, idStart); if (idEnd === -1) { pos += marker.length; continue; } const id = html.substring(idStart, idEnd); const namePos = html.indexOf(nameKey, pos); if (namePos === -1 || namePos - pos > 500) { pos += marker.length; continue; } const nameStart = namePos + nameKey.length; const nameEnd = html.indexOf(BQ, nameStart); if (nameEnd === -1) { pos += marker.length; continue; } const name = html.substring(nameStart, nameEnd).replace(/\\u0026/g, "&"); embeddedItems.push({ id, name }); pos += marker.length; } log("Extracted " + embeddedItems.length + " items with IDs from embedded data"); // Build the items list with IDs // If providedItems has names only (strings), match them against embedded items let itemsWithIds = []; if (embeddedItems.length > 0) { const nameToId = new Map(); for (const ei of embeddedItems) { if (!nameToId.has(ei.name)) nameToId.set(ei.name, ei.id); } if (providedItems.length > 0) { // Match provided names to embedded IDs for (const pi of providedItems) { const name = typeof pi === "string" ? pi : pi.name; const id = typeof pi === "object" && pi.id ? pi.id : nameToId.get(name); if (id) { itemsWithIds.push({ id, name }); } } log("Matched " + itemsWithIds.length + "/" + providedItems.length + " provided items to IDs"); } else { itemsWithIds = embeddedItems; } } if (itemsWithIds.length === 0) { console.log(JSON.stringify({ error: "No items with IDs found", modifiers: [], itemModifierMap: {} })); await browser.close(); process.exit(0); } // We need the GraphQL query template. Trigger one click to capture it. if (!queryTemplate) { log("Capturing GraphQL query template via click..."); const clicked = await page.evaluate(() => { const els = document.querySelectorAll("button, [role=button], [data-anchor-id]"); for (const el of els) { if (el.textContent.includes("$") && el.textContent.length < 300 && el.textContent.length > 5) { el.click(); return true; } } return false; }); if (clicked) { // Wait for the request to be captured const start = Date.now(); while (!queryTemplate && Date.now() - start < 8000) { await page.waitForTimeout(200); } await page.keyboard.press("Escape"); await page.waitForTimeout(500); } } if (!queryTemplate) { log("Could not capture query template, using hardcoded minimal query"); // Minimal query that gets optionLists queryTemplate = `query itemPage($storeId: ID!, $itemId: ID!, $consumerId: ID, $isMerchantPreview: Boolean, $isNested: Boolean!, $fulfillmentType: FulfillmentType, $shouldFetchPresetCarousels: Boolean!, $cursorContext: ItemPageCursorContextInput, $shouldFetchStoreLiteData: Boolean!) { itemPage(storeId: $storeId, itemId: $itemId, consumerId: $consumerId, isMerchantPreview: $isMerchantPreview, fulfillmentType: $fulfillmentType, cursorContext: $cursorContext) { optionLists { name minNumOptions maxNumOptions isOptional options { name unitAmount currency decimalPlaces displayString __typename } __typename } __typename } }`; } else { log("Captured query template (" + queryTemplate.length + " chars)"); } // Now make direct GraphQL calls for each item const allModifierGroups = new Map(); const itemModifierMap = {}; let successCount = 0; let modItemCount = 0; // Deduplicate by ID const seenIds = new Set(); const uniqueItems = []; const idToNames = new Map(); // id -> [names] for items sharing same ID for (const item of itemsWithIds) { if (!seenIds.has(item.id)) { seenIds.add(item.id); uniqueItems.push(item); idToNames.set(item.id, [item.name]); } else { idToNames.get(item.id).push(item.name); } } log("Deduplicated: " + itemsWithIds.length + " -> " + uniqueItems.length + " unique IDs"); // Process in batches of 5 concurrent requests const batchSize = 5; for (let i = 0; i < uniqueItems.length; i += batchSize) { const batch = uniqueItems.slice(i, i + batchSize); const results = await page.evaluate(async (params) => { const { items, storeId, query } = params; const results = []; const promises = items.map(async (item) => { try { const resp = await fetch("/graphql/itemPage?operation=itemPage", { method: "POST", headers: { "content-type": "application/json", "x-channel-id": "marketplace", "x-experience-id": "storefront", "apollographql-client-name": "@doordash/app-consumer-production-ssr-client", "apollographql-client-version": "3.0" }, body: JSON.stringify({ operationName: "itemPage", variables: { itemId: item.id, storeId: storeId, consumerId: null, isMerchantPreview: false, isNested: false, shouldFetchPresetCarousels: false, fulfillmentType: "Pickup", cursorContext: {}, shouldFetchStoreLiteData: false }, query: query }) }); const data = await resp.json(); if (data && data.data && data.data.itemPage && data.data.itemPage.optionLists) { return { id: item.id, name: item.name, optionLists: data.data.itemPage.optionLists }; } return { id: item.id, name: item.name, optionLists: [] }; } catch (e) { return { id: item.id, name: item.name, error: e.message }; } }); return Promise.all(promises); }, { items: batch, storeId, query: queryTemplate }); for (const result of results) { if (result.error) continue; successCount++; const optionLists = result.optionLists || []; if (optionLists.length > 0) { const modNames = []; for (const ol of optionLists) { const olName = ol.name || "Options"; if (!allModifierGroups.has(olName)) { const options = []; if (ol.options && Array.isArray(ol.options)) { for (const opt of ol.options) { let price = 0; if (opt.unitAmount && opt.decimalPlaces !== undefined) { price = opt.unitAmount / Math.pow(10, opt.decimalPlaces || 2); } else if (opt.displayString && opt.displayString.includes("$")) { price = parseFloat(opt.displayString.replace(/[^0-9.]/g, "")) || 0; } options.push({ name: opt.name || "", price: price }); } } allModifierGroups.set(olName, { name: olName, required: !ol.isOptional, minSelections: ol.minNumOptions || 0, maxSelections: ol.maxNumOptions || 0, options: options }); } modNames.push(olName); } // Map to all names sharing this ID const names = idToNames.get(result.id) || [result.name]; for (const name of names) { itemModifierMap[name] = modNames; } modItemCount++; } } if ((i + batchSize) % 25 < batchSize) { log("Progress: " + Math.min(i + batchSize, uniqueItems.length) + "/" + uniqueItems.length + " | " + modItemCount + " with mods, " + allModifierGroups.size + " groups"); } // Small delay between batches to avoid rate limiting if (i + batchSize < uniqueItems.length) { await page.waitForTimeout(200); } } log("Done: " + successCount + "/" + uniqueItems.length + " fetched, " + Object.keys(itemModifierMap).length + " items with modifiers, " + allModifierGroups.size + " unique groups"); const modifiers = Array.from(allModifierGroups.values()); console.log(JSON.stringify({ modifiers: modifiers, itemModifierMap: itemModifierMap, stats: { totalItems: itemsWithIds.length, uniqueItemIds: uniqueItems.length, fetchedCount: successCount, itemsWithModifiers: Object.keys(itemModifierMap).length, uniqueModifierGroups: modifiers.length } })); } catch (e) { log("Fatal error: " + e.message); console.log(JSON.stringify({ error: e.message, modifiers: [], itemModifierMap: {} })); } if (browser) await browser.close(); })();