DoorDash modifiers: direct GraphQL API calls instead of clicking

Instead of clicking each menu item (broken by virtual scrolling),
extract item IDs from embedded JSON and make direct fetch() calls
to the itemPage GraphQL endpoint. 5 concurrent requests per batch.
Much faster and 100% reliable - no DOM interaction needed.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
John Mizerek 2026-03-10 13:11:03 -07:00
parent f5974a5fa2
commit 2cf5039c0f
2 changed files with 256 additions and 230 deletions

View file

@ -1255,6 +1255,18 @@
<cfif nextItemPos EQ 0><cfset nextItemPos = len(catSection)></cfif>
<cfset itemEntry = mid(catSection, itemPos, nextItemPos - itemPos)>
<!--- Extract DoorDash item ID --->
<cfset ddIdKey = BQ & "id" & BQ & ":" & BQ>
<cfset ddIdPos = findNoCase(ddIdKey, itemEntry)>
<cfset ddItemId = "">
<cfif ddIdPos GT 0>
<cfset ddIdStart = ddIdPos + len(ddIdKey)>
<cfset ddIdEnd = find(BQ, itemEntry, ddIdStart)>
<cfif ddIdEnd GT ddIdStart>
<cfset ddItemId = mid(itemEntry, ddIdStart, ddIdEnd - ddIdStart)>
</cfif>
</cfif>
<!--- Extract item name --->
<cfset inPos = findNoCase(nameKey, itemEntry)>
<cfif inPos EQ 0><cfset itemPos = itemPos + len(itemMarker)><cfcontinue></cfif>
@ -1322,6 +1334,7 @@
<cfset ddItem["category"] = catName>
<cfset ddItem["modifiers"] = arrayNew(1)>
<cfset ddItem["id"] = "item_" & ddItemCounter>
<cfset ddItem["ddItemId"] = ddItemId>
<cfset ddItem["imageUrl"] = ipImg>
<cfset ddItem["imageSrc"] = ipImg>
<cfif len(ipImg)>
@ -1442,13 +1455,13 @@
<cfset ddItemModMap = structNew()>
<cftry>
<cfset arrayAppend(response.steps, "Running stealth Playwright for modifier extraction...")>
<!--- Write item names to temp file so Playwright knows what to click --->
<cfset ddItemNames = arrayNew(1)>
<!--- Write items with DoorDash IDs to temp file for Playwright --->
<cfset ddItemsForPw = arrayNew(1)>
<cfloop array="#ddItems#" index="ddi">
<cfset arrayAppend(ddItemNames, ddi.name)>
<cfset arrayAppend(ddItemsForPw, { "id": ddi.ddItemId, "name": ddi.name })>
</cfloop>
<cfset ddTempFile = "/tmp/dd-items-#createUUID()#.json">
<cffile action="write" file="#ddTempFile#" output="#serializeJSON(ddItemNames)#" charset="utf-8">
<cffile action="write" file="#ddTempFile#" output="#serializeJSON(ddItemsForPw)#" charset="utf-8">
<cfset modTimeout = 180000 + (arrayLen(ddItems) * 1500)>
<cfif modTimeout GT 600000><cfset modTimeout = 600000></cfif>

View file

@ -5,7 +5,7 @@ chromium.use(stealth());
(async () => {
const url = process.argv[2];
const itemNamesFile = process.argv[3]; // Optional: JSON file with array of item names
const itemsFile = process.argv[3]; // JSON file: [{id, name}, ...] or ["name1", ...]
if (!url) {
console.log(JSON.stringify({ error: "URL required", modifiers: [], itemModifierMap: {} }));
process.exit(1);
@ -13,14 +13,23 @@ chromium.use(stealth());
const log = (msg) => process.stderr.write("[dd-mod] " + msg + "\n");
// Load item names if provided (from CFML fast-path)
let knownItemNames = [];
if (itemNamesFile && fs.existsSync(itemNamesFile)) {
// Extract storeId from URL (e.g. /store/name-2545 -> 2545)
const storeIdMatch = url.match(/[-/](\d+)/);
const storeId = storeIdMatch ? storeIdMatch[1] : null;
if (!storeId) {
console.log(JSON.stringify({ error: "Could not extract storeId from URL", modifiers: [], itemModifierMap: {} }));
process.exit(1);
}
log("Store ID: " + storeId);
// Load item names/IDs if provided
let providedItems = [];
if (itemsFile && fs.existsSync(itemsFile)) {
try {
knownItemNames = JSON.parse(fs.readFileSync(itemNamesFile, "utf8"));
log("Loaded " + knownItemNames.length + " item names from file");
providedItems = JSON.parse(fs.readFileSync(itemsFile, "utf8"));
log("Loaded " + providedItems.length + " items from file");
} catch (e) {
log("Failed to load item names file: " + e.message);
log("Failed to load items file: " + e.message);
}
}
@ -33,245 +42,249 @@ chromium.use(stealth());
});
const page = await context.newPage();
// Intercept itemPage GraphQL responses
let latestItemPage = null;
let responseCount = 0;
page.on("response", async (response) => {
try {
const responseUrl = response.url();
if (responseUrl.includes("graphql") || responseUrl.includes("api/v2")) {
const ct = response.headers()["content-type"] || "";
if (ct.includes("json")) {
const body = await response.json();
if (body && body.data && body.data.itemPage) {
latestItemPage = body.data.itemPage;
responseCount++;
}
if (Array.isArray(body)) {
for (const entry of body) {
if (entry && entry.data && entry.data.itemPage) {
latestItemPage = entry.data.itemPage;
responseCount++;
}
}
}
}
}
} catch (e) {}
// Capture the GraphQL query template from any itemPage request
let queryTemplate = null;
let capturedHeaders = null;
page.on("request", (req) => {
const pd = req.postData();
if (pd && pd.includes("itemPage") && pd.includes("optionLists")) {
try {
const parsed = JSON.parse(pd);
queryTemplate = parsed.query;
capturedHeaders = req.headers();
} catch(e) {}
}
});
log("Navigating to " + url);
await page.goto(url, { waitUntil: "load", timeout: 60000 });
await page.waitForTimeout(5000);
// Aggressive scroll to force DoorDash to render all items
log("Scrolling to load all items...");
let lastHeight = 0;
for (let round = 0; round < 3; round++) {
const scrollHeight = await page.evaluate(() => document.body.scrollHeight);
const viewportHeight = await page.evaluate(() => window.innerHeight);
const scrollSteps = Math.ceil(scrollHeight / viewportHeight);
// Extract item IDs from embedded JSON in page HTML
const html = await page.content();
const BQ = '\\"';
const marker = BQ + '__typename' + BQ + ':' + BQ + 'MenuPageItem' + BQ;
const idKey = BQ + 'id' + BQ + ':' + BQ;
const nameKey = BQ + 'name' + BQ + ':' + BQ;
for (let i = 0; i <= scrollSteps; i++) {
await page.evaluate((y) => window.scrollTo(0, y), i * viewportHeight);
await page.waitForTimeout(250);
const embeddedItems = [];
let pos = 0;
while (embeddedItems.length < 500) {
pos = html.indexOf(marker, pos);
if (pos === -1) break;
const idPos = html.indexOf(idKey, pos);
if (idPos === -1 || idPos - pos > 200) { pos += marker.length; continue; }
const idStart = idPos + idKey.length;
const idEnd = html.indexOf(BQ, idStart);
if (idEnd === -1) { pos += marker.length; continue; }
const id = html.substring(idStart, idEnd);
const namePos = html.indexOf(nameKey, pos);
if (namePos === -1 || namePos - pos > 500) { pos += marker.length; continue; }
const nameStart = namePos + nameKey.length;
const nameEnd = html.indexOf(BQ, nameStart);
if (nameEnd === -1) { pos += marker.length; continue; }
const name = html.substring(nameStart, nameEnd).replace(/\\u0026/g, "&");
embeddedItems.push({ id, name });
pos += marker.length;
}
log("Extracted " + embeddedItems.length + " items with IDs from embedded data");
// Build the items list with IDs
// If providedItems has names only (strings), match them against embedded items
let itemsWithIds = [];
if (embeddedItems.length > 0) {
const nameToId = new Map();
for (const ei of embeddedItems) {
if (!nameToId.has(ei.name)) nameToId.set(ei.name, ei.id);
}
await page.waitForTimeout(500);
const newHeight = await page.evaluate(() => document.body.scrollHeight);
if (newHeight === lastHeight) break;
lastHeight = newHeight;
}
await page.evaluate(() => window.scrollTo(0, 0));
await page.waitForTimeout(500);
// If we have known item names, use them to find and click elements
// Otherwise fall back to DOM auto-detection
let itemsToClick = [];
if (knownItemNames.length > 0) {
// Find each known item in the DOM by text content
log("Searching DOM for " + knownItemNames.length + " known items...");
itemsToClick = await page.evaluate((names) => {
const found = [];
const allElements = document.querySelectorAll('span, h3, h4, p, div');
// Build a map of text -> element for fast lookup
const textMap = new Map();
for (const el of allElements) {
// Only use leaf-ish elements (avoid containers that contain the whole menu)
if (el.children.length > 5) continue;
const text = el.textContent.trim();
if (text.length > 1 && text.length < 200 && !textMap.has(text)) {
textMap.set(text, el);
if (providedItems.length > 0) {
// Match provided names to embedded IDs
for (const pi of providedItems) {
const name = typeof pi === "string" ? pi : pi.name;
const id = typeof pi === "object" && pi.id ? pi.id : nameToId.get(name);
if (id) {
itemsWithIds.push({ id, name });
}
}
for (const name of names) {
const el = textMap.get(name);
if (el) {
// Find the clickable parent (the item card)
const clickable = el.closest('a, button, [role="button"], [tabindex="0"], [data-anchor-id]')
|| el.parentElement?.closest('a, button, [role="button"], [tabindex="0"], [data-anchor-id]')
|| el.parentElement;
if (clickable) {
const rect = clickable.getBoundingClientRect();
if (rect.width > 0 && rect.height > 0) {
found.push({ name, y: rect.y + window.scrollY });
}
}
}
}
return found;
}, knownItemNames);
log("Found " + itemsToClick.length + "/" + knownItemNames.length + " items in DOM");
} else {
// Auto-detect from DOM (fallback)
itemsToClick = await page.evaluate(() => {
const items = [];
const seen = new Set();
document.querySelectorAll('[data-anchor-id*="MenuItem"], button, [role="button"]').forEach(el => {
const text = el.textContent || "";
if (text.match(/\$\d+\.\d{2}/) && text.length < 500) {
const lines = text.split("\n").map(l => l.trim()).filter(l => l.length > 0);
const name = lines[0];
if (name && !seen.has(name) && name.length > 1 && name.length < 200 && !name.startsWith("$")) {
seen.add(name);
const rect = el.getBoundingClientRect();
if (rect.width > 0 && rect.height > 0) {
items.push({ name, y: rect.y + window.scrollY });
}
}
}
});
return items;
});
log("Auto-detected " + itemsToClick.length + " clickable items");
log("Matched " + itemsWithIds.length + "/" + providedItems.length + " provided items to IDs");
} else {
itemsWithIds = embeddedItems;
}
}
if (itemsToClick.length === 0) {
console.log(JSON.stringify({ error: "No clickable items found", modifiers: [], itemModifierMap: {} }));
if (itemsWithIds.length === 0) {
console.log(JSON.stringify({ error: "No items with IDs found", modifiers: [], itemModifierMap: {} }));
await browser.close();
process.exit(0);
}
// Click each item, capture modifier data
const allModifierGroups = new Map();
const itemModifierMap = {};
let clickedCount = 0;
let modItemCount = 0;
let noModCount = 0;
const maxClicks = Math.min(itemsToClick.length, 250);
for (let i = 0; i < maxClicks; i++) {
const item = itemsToClick[i];
try {
latestItemPage = null;
// Scroll to item position
await page.evaluate((y) => window.scrollTo(0, Math.max(0, y - 300)), item.y);
await page.waitForTimeout(200);
// Find the item element by name and click it
const clicked = await page.evaluate((itemName) => {
// Find the text node with this exact name
const walker = document.createTreeWalker(document.body, NodeFilter.SHOW_TEXT);
while (walker.nextNode()) {
if (walker.currentNode.textContent.trim() === itemName) {
const el = walker.currentNode.parentElement;
const clickable = el.closest('a, button, [role="button"], [tabindex="0"], [data-anchor-id]')
|| el.parentElement?.closest('a, button, [role="button"], [tabindex="0"], [data-anchor-id]')
|| el;
const rect = clickable.getBoundingClientRect();
if (rect.width > 0 && rect.height > 0) {
clickable.click();
return true;
}
}
// We need the GraphQL query template. Trigger one click to capture it.
if (!queryTemplate) {
log("Capturing GraphQL query template via click...");
const clicked = await page.evaluate(() => {
const els = document.querySelectorAll("button, [role=button], [data-anchor-id]");
for (const el of els) {
if (el.textContent.includes("$") && el.textContent.length < 300 && el.textContent.length > 5) {
el.click();
return true;
}
return false;
}, item.name);
if (!clicked) continue;
clickedCount++;
// Wait for GraphQL response (up to 5s)
const startTime = Date.now();
while (!latestItemPage && Date.now() - startTime < 5000) {
await page.waitForTimeout(150);
}
if (latestItemPage && latestItemPage.optionLists && Array.isArray(latestItemPage.optionLists)) {
const optionLists = latestItemPage.optionLists;
if (optionLists.length > 0) {
const modNames = [];
for (const ol of optionLists) {
const olName = ol.name || "Options";
if (!allModifierGroups.has(olName)) {
const options = [];
if (ol.options && Array.isArray(ol.options)) {
for (const opt of ol.options) {
// DoorDash prices: sometimes cents (int), sometimes dollars (string like "$1.50")
let price = 0;
if (opt.price) {
if (typeof opt.price === "number") {
price = opt.price > 100 ? opt.price / 100 : opt.price;
} else if (typeof opt.price === "string") {
price = parseFloat(opt.price.replace(/[^0-9.]/g, "")) || 0;
}
}
if (opt.displayPrice) {
price = parseFloat(String(opt.displayPrice).replace(/[^0-9.]/g, "")) || price;
}
options.push({ name: opt.name || "", price: price });
}
}
allModifierGroups.set(olName, {
name: olName,
required: ol.isRequired || false,
minSelections: ol.minNumOptions || 0,
maxSelections: ol.maxNumOptions || 0,
options: options
});
}
modNames.push(olName);
}
itemModifierMap[item.name] = modNames;
modItemCount++;
} else {
noModCount++;
}
} else {
noModCount++;
return false;
});
if (clicked) {
// Wait for the request to be captured
const start = Date.now();
while (!queryTemplate && Date.now() - start < 8000) {
await page.waitForTimeout(200);
}
// Close modal
await page.keyboard.press("Escape");
await page.waitForTimeout(350);
// Double-check modal is closed
const stillOpen = await page.evaluate(() => {
const overlay = document.querySelector('[data-testid="modal-overlay"], [class*="ModalOverlay"], [class*="Overlay"]');
if (overlay) { overlay.click(); return true; }
return false;
});
if (stillOpen) await page.waitForTimeout(300);
} catch (e) {
log("Error clicking " + item.name + ": " + e.message);
try { await page.keyboard.press("Escape"); } catch (e2) {}
await page.waitForTimeout(300);
}
if ((i + 1) % 25 === 0) {
log("Progress: " + (i + 1) + "/" + maxClicks + " | " + modItemCount + " with mods, " + noModCount + " without");
await page.waitForTimeout(500);
}
}
log("Done: " + clickedCount + "/" + maxClicks + " clicked, " + modItemCount + " with modifiers, " + allModifierGroups.size + " unique groups");
if (!queryTemplate) {
log("Could not capture query template, using hardcoded minimal query");
// Minimal query that gets optionLists
queryTemplate = `query itemPage($storeId: ID!, $itemId: ID!, $consumerId: ID, $isMerchantPreview: Boolean, $isNested: Boolean!, $fulfillmentType: FulfillmentType, $shouldFetchPresetCarousels: Boolean!, $cursorContext: ItemPageCursorContextInput, $shouldFetchStoreLiteData: Boolean!) {
itemPage(storeId: $storeId, itemId: $itemId, consumerId: $consumerId, isMerchantPreview: $isMerchantPreview, fulfillmentType: $fulfillmentType, cursorContext: $cursorContext) {
optionLists {
name minNumOptions maxNumOptions isOptional
options { name unitAmount currency decimalPlaces displayString __typename }
__typename
}
__typename
}
}`;
} else {
log("Captured query template (" + queryTemplate.length + " chars)");
}
// Now make direct GraphQL calls for each item
const allModifierGroups = new Map();
const itemModifierMap = {};
let successCount = 0;
let modItemCount = 0;
// Deduplicate by ID
const seenIds = new Set();
const uniqueItems = [];
const idToNames = new Map(); // id -> [names] for items sharing same ID
for (const item of itemsWithIds) {
if (!seenIds.has(item.id)) {
seenIds.add(item.id);
uniqueItems.push(item);
idToNames.set(item.id, [item.name]);
} else {
idToNames.get(item.id).push(item.name);
}
}
log("Deduplicated: " + itemsWithIds.length + " -> " + uniqueItems.length + " unique IDs");
// Process in batches of 5 concurrent requests
const batchSize = 5;
for (let i = 0; i < uniqueItems.length; i += batchSize) {
const batch = uniqueItems.slice(i, i + batchSize);
const results = await page.evaluate(async (params) => {
const { items, storeId, query } = params;
const results = [];
const promises = items.map(async (item) => {
try {
const resp = await fetch("/graphql/itemPage?operation=itemPage", {
method: "POST",
headers: {
"content-type": "application/json",
"x-channel-id": "marketplace",
"x-experience-id": "storefront",
"apollographql-client-name": "@doordash/app-consumer-production-ssr-client",
"apollographql-client-version": "3.0"
},
body: JSON.stringify({
operationName: "itemPage",
variables: {
itemId: item.id,
storeId: storeId,
consumerId: null,
isMerchantPreview: false,
isNested: false,
shouldFetchPresetCarousels: false,
fulfillmentType: "Pickup",
cursorContext: {},
shouldFetchStoreLiteData: false
},
query: query
})
});
const data = await resp.json();
if (data && data.data && data.data.itemPage && data.data.itemPage.optionLists) {
return { id: item.id, name: item.name, optionLists: data.data.itemPage.optionLists };
}
return { id: item.id, name: item.name, optionLists: [] };
} catch (e) {
return { id: item.id, name: item.name, error: e.message };
}
});
return Promise.all(promises);
}, { items: batch, storeId, query: queryTemplate });
for (const result of results) {
if (result.error) continue;
successCount++;
const optionLists = result.optionLists || [];
if (optionLists.length > 0) {
const modNames = [];
for (const ol of optionLists) {
const olName = ol.name || "Options";
if (!allModifierGroups.has(olName)) {
const options = [];
if (ol.options && Array.isArray(ol.options)) {
for (const opt of ol.options) {
let price = 0;
if (opt.unitAmount && opt.decimalPlaces !== undefined) {
price = opt.unitAmount / Math.pow(10, opt.decimalPlaces || 2);
} else if (opt.displayString && opt.displayString.includes("$")) {
price = parseFloat(opt.displayString.replace(/[^0-9.]/g, "")) || 0;
}
options.push({ name: opt.name || "", price: price });
}
}
allModifierGroups.set(olName, {
name: olName,
required: !ol.isOptional,
minSelections: ol.minNumOptions || 0,
maxSelections: ol.maxNumOptions || 0,
options: options
});
}
modNames.push(olName);
}
// Map to all names sharing this ID
const names = idToNames.get(result.id) || [result.name];
for (const name of names) {
itemModifierMap[name] = modNames;
}
modItemCount++;
}
}
if ((i + batchSize) % 25 < batchSize) {
log("Progress: " + Math.min(i + batchSize, uniqueItems.length) + "/" + uniqueItems.length + " | " + modItemCount + " with mods, " + allModifierGroups.size + " groups");
}
// Small delay between batches to avoid rate limiting
if (i + batchSize < uniqueItems.length) {
await page.waitForTimeout(200);
}
}
log("Done: " + successCount + "/" + uniqueItems.length + " fetched, " + Object.keys(itemModifierMap).length + " items with modifiers, " + allModifierGroups.size + " unique groups");
const modifiers = Array.from(allModifierGroups.values());
@ -279,10 +292,10 @@ chromium.use(stealth());
modifiers: modifiers,
itemModifierMap: itemModifierMap,
stats: {
totalItems: itemsToClick.length,
clickedCount: clickedCount,
itemsWithModifiers: modItemCount,
itemsWithoutModifiers: noModCount,
totalItems: itemsWithIds.length,
uniqueItemIds: uniqueItems.length,
fetchedCount: successCount,
itemsWithModifiers: Object.keys(itemModifierMap).length,
uniqueModifierGroups: modifiers.length
}
}));