Rewrite DoorDash fast-path: use MenuPageItemList for full menu

- Extract items from MenuPageItemList (171 items) instead of StorePageCarouselItem (54)
- Categories already mapped to items via MenuPageItemList sections
- Cross-reference images from carousel entries by item name
- No need for Claude category assignment - data already structured

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
John Mizerek 2026-03-10 12:02:04 -07:00
parent 67e2079550
commit 33040c9cd3

View file

@ -1148,74 +1148,164 @@
<!--- ========== END WOOCOMMERCE FAST PATH ========== --->
<!--- ========== DOORDASH / ORDER.ONLINE FAST PATH ========== --->
<cfif findNoCase("StorePageCarouselItem", pageHtml) AND findNoCase("MenuBookCategory", pageHtml)>
<cfif findNoCase("MenuPageItem", pageHtml) AND findNoCase("MenuPageItemList", pageHtml)>
<cfset arrayAppend(response.steps, "DoorDash/order.online site detected - extracting embedded data")>
<cftry>
<!--- Extract categories from MenuBookCategory --->
<!--- HTML contains escaped JSON: \"__typename\":\"MenuBookCategory\" etc. --->
<cfset ddCatMatches = reMatchNoCase('\\"__typename\\":\\"MenuBookCategory\\",\\"id\\":\\"([^\\"]+)\\",\\"name\\":\\"([^\\"]+)\\",\\"numItems\\":(\d+)', pageHtml)>
<!--- DoorDash embeds menu data as escaped JSON in script tags --->
<!--- The backslash-quote delimiter used throughout --->
<cfset BQ = '\"'><!--- literal backslash-quote as it appears in the HTML --->
<!--- Build an image map from StorePageCarouselItem entries (these have imgUrl) --->
<cfset ddImageMap = structNew()>
<cfset ddCarouselParts = listToArray(pageHtml, BQ & "__typename" & BQ & ":" & BQ & "StorePageCarouselItem" & BQ & "," & BQ & "id" & BQ & ":" & BQ)>
<cfloop from="2" to="#arrayLen(ddCarouselParts)#" index="cpIdx">
<cfset cp = ddCarouselParts[cpIdx]>
<!--- Extract name --->
<cfset cpNameStart = findNoCase(BQ & "name" & BQ & ":" & BQ, cp)>
<cfif cpNameStart GT 0>
<cfset cpNameStart = cpNameStart + len(BQ & "name" & BQ & ":" & BQ)>
<cfset cpNameEnd = find(BQ, cp, cpNameStart)>
<cfif cpNameEnd GT cpNameStart>
<cfset cpName = mid(cp, cpNameStart, cpNameEnd - cpNameStart)>
<!--- Extract imgUrl --->
<cfset cpImgStart = findNoCase(BQ & "imgUrl" & BQ & ":" & BQ, cp)>
<cfif cpImgStart GT 0>
<cfset cpImgStart = cpImgStart + len(BQ & "imgUrl" & BQ & ":" & BQ)>
<cfset cpImgEnd = find(BQ, cp, cpImgStart)>
<cfif cpImgEnd GT cpImgStart>
<cfset cpImgUrl = mid(cp, cpImgStart, cpImgEnd - cpImgStart)>
<cfif len(cpImgUrl) AND cpImgUrl NEQ "null">
<!--- Upgrade to larger size --->
<cfif findNoCase("width=", cpImgUrl)>
<cfset cpImgUrl = reReplaceNoCase(cpImgUrl, 'width=\d+', 'width=600')>
<cfset cpImgUrl = reReplaceNoCase(cpImgUrl, 'height=\d+', 'height=600')>
</cfif>
<cfset ddImageMap[cpName] = cpImgUrl>
</cfif>
</cfif>
</cfif>
</cfif>
</cfif>
</cfloop>
<cfset arrayAppend(response.steps, "Built image map with " & structCount(ddImageMap) & " entries from carousel")>
<!--- Extract full menu from MenuPageItemList (categories) and MenuPageItem (items) --->
<cfset ddCategories = arrayNew(1)>
<cfset ddCatSeen = structNew()>
<cfloop array="#ddCatMatches#" index="ddCatMatch">
<cfset ddCatName = reReplaceNoCase(ddCatMatch, '.*\\"name\\":\\"([^\\"]+)\\".*', '\1')>
<cfset ddCatName = replace(ddCatName, '\u0026', '&', 'all')>
<cfset ddCatName = replace(ddCatName, '&amp;', '&', 'all')>
<cfif NOT structKeyExists(ddCatSeen, ddCatName) AND ddCatName NEQ "Most Ordered">
<cfset ddCatSeen[ddCatName] = true>
<cfset arrayAppend(ddCategories, { "name": ddCatName, "parentCategoryName": "" })>
</cfif>
</cfloop>
<cfset arrayAppend(response.steps, "Found " & arrayLen(ddCategories) & " DoorDash categories")>
<!--- Extract items from StorePageCarouselItem --->
<cfset ddItemMatches = reMatchNoCase('\\"__typename\\":\\"StorePageCarouselItem\\",\\"id\\":\\"(\d+)\\",\\"name\\":\\"([^\\"]+)\\",\\"description\\":\\"([^\\"]*)\\",\\"displayPrice\\":\\"([^\\"]*)\\",\\"displayStrikethroughPrice\\":\\"[^\\"]*\\",\\"imgUrl\\":\\"([^\\"]*?)\\"', pageHtml)>
<cfset ddItems = arrayNew(1)>
<cfset ddItemSeen = structNew()>
<cfloop array="#ddItemMatches#" index="ddItemMatch">
<cfset ddItemName = reReplaceNoCase(ddItemMatch, '.*\\"name\\":\\"([^\\"]+)\\".*', '\1')>
<cfset ddItemName = replace(ddItemName, '\u0026', '&', 'all')>
<cfset ddItemName = replace(ddItemName, '&amp;', '&', 'all')>
<cfif structKeyExists(ddItemSeen, ddItemName)>
<cfset ddItemCounter = 0>
<!--- Split on MenuPageItemList to get category sections --->
<cfset ddCatParts = listToArray(pageHtml, BQ & "__typename" & BQ & ":" & BQ & "MenuPageItemList" & BQ & "," & BQ & "id" & BQ & ":" & BQ)>
<cfloop from="2" to="#arrayLen(ddCatParts)#" index="catIdx">
<cfset catPart = ddCatParts[catIdx]>
<!--- Extract category name --->
<cfset catNameStart = findNoCase(BQ & "name" & BQ & ":" & BQ, catPart)>
<cfif catNameStart EQ 0><cfcontinue></cfif>
<cfset catNameStart = catNameStart + len(BQ & "name" & BQ & ":" & BQ)>
<cfset catNameEnd = find(BQ, catPart, catNameStart)>
<cfif catNameEnd LTE catNameStart><cfcontinue></cfif>
<cfset catName = mid(catPart, catNameStart, catNameEnd - catNameStart)>
<cfset catName = replace(catName, '\u0026', '&', 'all')>
<cfset catName = replace(catName, '&amp;', '&', 'all')>
<!--- Skip "Most Ordered" and duplicates --->
<cfif catName EQ "Most Ordered" OR structKeyExists(ddCatSeen, catName)>
<cfcontinue>
</cfif>
<cfset ddItemSeen[ddItemName] = true>
<cfset ddCatSeen[catName] = true>
<cfset arrayAppend(ddCategories, { "name": catName, "parentCategoryName": "" })>
<cfset ddItemDesc = reReplaceNoCase(ddItemMatch, '.*\\"description\\":\\"([^\\"]*)\\"\s*,\s*\\"displayPrice.*', '\1')>
<cfset ddItemDesc = replace(ddItemDesc, '\u0026', '&', 'all')>
<!--- Extract items within this category section --->
<cfset itemParts = listToArray(catPart, BQ & "__typename" & BQ & ":" & BQ & "MenuPageItem" & BQ & "," & BQ & "id" & BQ & ":" & BQ)>
<cfloop from="2" to="#arrayLen(itemParts)#" index="ipIdx">
<cfset ip = itemParts[ipIdx]>
<cfset ddItemPrice = reReplaceNoCase(ddItemMatch, '.*\\"displayPrice\\":\\"([^\\"]*)\\"\s*,.*', '\1')>
<cfset ddItemPrice = reReplace(ddItemPrice, '[^0-9.]', '', 'all')>
<!--- Extract item name --->
<cfset ipNameStart = findNoCase(BQ & "name" & BQ & ":" & BQ, ip)>
<cfif ipNameStart EQ 0><cfcontinue></cfif>
<cfset ipNameStart = ipNameStart + len(BQ & "name" & BQ & ":" & BQ)>
<cfset ipNameEnd = find(BQ, ip, ipNameStart)>
<cfif ipNameEnd LTE ipNameStart><cfcontinue></cfif>
<cfset ipName = mid(ip, ipNameStart, ipNameEnd - ipNameStart)>
<cfset ipName = replace(ipName, '\u0026', '&', 'all')>
<cfset ddItemImg = reReplaceNoCase(ddItemMatch, '.*\\"imgUrl\\":\\"([^\\"]*)\\"\s*,?.*', '\1')>
<!--- Upgrade image to larger size --->
<cfif len(ddItemImg) AND findNoCase("width=", ddItemImg)>
<cfset ddItemImg = reReplaceNoCase(ddItemImg, 'width=\d+', 'width=600')>
<cfset ddItemImg = reReplaceNoCase(ddItemImg, 'height=\d+', 'height=600')>
<!--- Skip duplicates --->
<cfif structKeyExists(ddItemSeen, ipName)><cfcontinue></cfif>
<cfset ddItemSeen[ipName] = true>
<!--- Extract description --->
<cfset ipDesc = "">
<cfset ipDescStart = findNoCase(BQ & "description" & BQ & ":" & BQ, ip)>
<cfif ipDescStart GT 0>
<cfset ipDescStart = ipDescStart + len(BQ & "description" & BQ & ":" & BQ)>
<cfset ipDescEnd = find(BQ, ip, ipDescStart)>
<cfif ipDescEnd GT ipDescStart>
<cfset ipDesc = mid(ip, ipDescStart, ipDescEnd - ipDescStart)>
<cfset ipDesc = replace(ipDesc, '\u0026', '&', 'all')>
</cfif>
</cfif>
<!--- Extract displayPrice --->
<cfset ipPrice = 0>
<cfset ipPriceStart = findNoCase(BQ & "displayPrice" & BQ & ":" & BQ, ip)>
<cfif ipPriceStart GT 0>
<cfset ipPriceStart = ipPriceStart + len(BQ & "displayPrice" & BQ & ":" & BQ)>
<cfset ipPriceEnd = find(BQ, ip, ipPriceStart)>
<cfif ipPriceEnd GT ipPriceStart>
<cfset ipPriceStr = mid(ip, ipPriceStart, ipPriceEnd - ipPriceStart)>
<cfset ipPriceStr = reReplace(ipPriceStr, '[^0-9.]', '', 'all')>
<cfset ipPrice = val(ipPriceStr)>
</cfif>
</cfif>
<!--- Look up image from carousel image map --->
<cfset ipImg = structKeyExists(ddImageMap, ipName) ? ddImageMap[ipName] : "">
<!--- Also check for imgUrl directly on this MenuPageItem --->
<cfif NOT len(ipImg)>
<cfset ipImgStart = findNoCase(BQ & "imgUrl" & BQ & ":" & BQ, ip)>
<cfif ipImgStart GT 0>
<cfset ipImgStart = ipImgStart + len(BQ & "imgUrl" & BQ & ":" & BQ)>
<cfset ipImgEnd = find(BQ, ip, ipImgStart)>
<cfif ipImgEnd GT ipImgStart>
<cfset ipImg = mid(ip, ipImgStart, ipImgEnd - ipImgStart)>
<cfif ipImg EQ "null"><cfset ipImg = ""></cfif>
<cfif len(ipImg) AND findNoCase("width=", ipImg)>
<cfset ipImg = reReplaceNoCase(ipImg, 'width=\d+', 'width=600')>
<cfset ipImg = reReplaceNoCase(ipImg, 'height=\d+', 'height=600')>
</cfif>
</cfif>
</cfif>
</cfif>
<cfset ddItemCounter = ddItemCounter + 1>
<cfset ddItem = structNew()>
<cfset ddItem["name"] = ddItemName>
<cfset ddItem["description"] = ddItemDesc>
<cfset ddItem["price"] = val(ddItemPrice)>
<cfset ddItem["imageUrl"] = ddItemImg>
<cfset ddItem["imageSrc"] = ddItemImg>
<cfif len(ddItemImg)>
<cfset ddItem["imageFilename"] = listLast(ddItemImg, "/")>
</cfif>
<cfset ddItem["category"] = "">
<cfset ddItem["name"] = ipName>
<cfset ddItem["description"] = ipDesc>
<cfset ddItem["price"] = ipPrice>
<cfset ddItem["category"] = catName>
<cfset ddItem["modifiers"] = arrayNew(1)>
<cfset ddItem["id"] = "item_" & arrayLen(ddItems) + 1>
<cfset ddItem["id"] = "item_" & ddItemCounter>
<cfset ddItem["imageUrl"] = ipImg>
<cfset ddItem["imageSrc"] = ipImg>
<cfif len(ipImg)>
<cfset ddItem["imageFilename"] = listLast(ipImg, "/")>
</cfif>
<cfset arrayAppend(ddItems, ddItem)>
</cfloop>
<cfset arrayAppend(response.steps, "Found " & arrayLen(ddItems) & " DoorDash items with images")>
</cfloop>
<!--- Try to assign categories to items using category button labels from HTML --->
<!--- DoorDash renders category sections with aria-labels matching category names --->
<!--- Items in the carousel don't have explicit category assignment, so use Claude for that --->
<cfset ddItemsWithImg = 0>
<cfloop array="#ddItems#" index="ddi">
<cfif len(ddi.imageUrl)><cfset ddItemsWithImg = ddItemsWithImg + 1></cfif>
</cfloop>
<cfset arrayAppend(response.steps, "Found " & arrayLen(ddCategories) & " categories, " & arrayLen(ddItems) & " items (" & ddItemsWithImg & " with images)")>
<!--- Extract business info --->
<cfset ddBusiness = structNew()>
<!--- Business name from title or og:title --->
<cfset ddTitleMatch = reMatchNoCase('<title>([^<]+)</title>', pageHtml)>
<cfif arrayLen(ddTitleMatch)>
<cfset ddTitle = reReplaceNoCase(ddTitleMatch[1], '<title>([^<]+)</title>', '\1')>
@ -1225,8 +1315,6 @@
<cfset ddBusiness["name"] = ddTitle>
</cfif>
</cfif>
<!--- Address from StoreHeaderAddress --->
<cfset ddAddrMatch = reMatchNoCase('\\"__typename\\":\\"StoreHeaderAddress\\",\\"street\\":\\"([^\\"]+)\\",\\"displayAddress\\":\\"([^\\"]+)\\"', pageHtml)>
<cfif arrayLen(ddAddrMatch)>
<cfset ddAddr = reReplaceNoCase(ddAddrMatch[1], '.*\\"displayAddress\\":\\"([^\\"]+)\\".*', '\1')>
@ -1234,61 +1322,6 @@
</cfif>
<cfif arrayLen(ddItems) GT 0>
<!--- Use Claude to assign categories to items --->
<cfset ddCatNames = arrayNew(1)>
<cfloop array="#ddCategories#" index="ddCat">
<cfset arrayAppend(ddCatNames, ddCat.name)>
</cfloop>
<cfset ddItemNames = arrayNew(1)>
<cfloop array="#ddItems#" index="ddI">
<cfset arrayAppend(ddItemNames, ddI.name & " - " & left(ddI.description, 60))>
</cfloop>
<cfset ddCatPrompt = "Given these restaurant menu categories: " & arrayToList(ddCatNames, ", ") & chr(10) & chr(10) & "Assign each of these items to the best matching category. Return ONLY a JSON array of category names in the same order as the items:" & chr(10) & serializeJSON(ddItemNames)>
<cfset ddCatRequest = {
"model": "claude-sonnet-4-20250514",
"max_tokens": 4096,
"temperature": 0,
"messages": [{ "role": "user", "content": ddCatPrompt }]
}>
<cftry>
<cfhttp url="https://api.anthropic.com/v1/messages" method="POST" timeout="60" result="ddCatResult">
<cfhttpparam type="header" name="Content-Type" value="application/json">
<cfhttpparam type="header" name="x-api-key" value="#CLAUDE_API_KEY#">
<cfhttpparam type="header" name="anthropic-version" value="2023-06-01">
<cfhttpparam type="body" value="#serializeJSON(ddCatRequest)#">
</cfhttp>
<cfif findNoCase("200", ddCatResult.statusCode)>
<cfset ddCatResponse = deserializeJSON(ddCatResult.fileContent)>
<cfset ddCatText = ddCatResponse.content[1].text>
<!--- Strip code fences --->
<cfset ddCatText = trim(ddCatText)>
<cfif left(ddCatText, 7) EQ "```json">
<cfset ddCatText = mid(ddCatText, 8, len(ddCatText) - 7)>
</cfif>
<cfif left(ddCatText, 3) EQ "```">
<cfset ddCatText = mid(ddCatText, 4, len(ddCatText) - 3)>
</cfif>
<cfif right(ddCatText, 3) EQ "```">
<cfset ddCatText = left(ddCatText, len(ddCatText) - 3)>
</cfif>
<cfset ddCatText = trim(ddCatText)>
<cfset ddCatAssignments = deserializeJSON(ddCatText)>
<cfif isArray(ddCatAssignments) AND arrayLen(ddCatAssignments) EQ arrayLen(ddItems)>
<cfloop from="1" to="#arrayLen(ddItems)#" index="ddIdx">
<cfset ddItems[ddIdx]["category"] = ddCatAssignments[ddIdx]>
</cfloop>
<cfset arrayAppend(response.steps, "Claude assigned categories to all items")>
</cfif>
</cfif>
<cfcatch>
<cfset arrayAppend(response.steps, "Category assignment failed: " & cfcatch.message)>
</cfcatch>
</cftry>
<!--- Build image URL list --->
<cfset ddImageUrls = arrayNew(1)>
<cfloop array="#ddItems#" index="ddI">