Rewrite DoorDash fast-path: use MenuPageItemList for full menu
- Extract items from MenuPageItemList (171 items) instead of StorePageCarouselItem (54) - Categories already mapped to items via MenuPageItemList sections - Cross-reference images from carousel entries by item name - No need for Claude category assignment - data already structured Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
67e2079550
commit
33040c9cd3
1 changed files with 141 additions and 108 deletions
|
|
@ -1148,74 +1148,164 @@
|
|||
<!--- ========== END WOOCOMMERCE FAST PATH ========== --->
|
||||
|
||||
<!--- ========== DOORDASH / ORDER.ONLINE FAST PATH ========== --->
|
||||
<cfif findNoCase("StorePageCarouselItem", pageHtml) AND findNoCase("MenuBookCategory", pageHtml)>
|
||||
<cfif findNoCase("MenuPageItem", pageHtml) AND findNoCase("MenuPageItemList", pageHtml)>
|
||||
<cfset arrayAppend(response.steps, "DoorDash/order.online site detected - extracting embedded data")>
|
||||
<cftry>
|
||||
<!--- Extract categories from MenuBookCategory --->
|
||||
<!--- HTML contains escaped JSON: \"__typename\":\"MenuBookCategory\" etc. --->
|
||||
<cfset ddCatMatches = reMatchNoCase('\\"__typename\\":\\"MenuBookCategory\\",\\"id\\":\\"([^\\"]+)\\",\\"name\\":\\"([^\\"]+)\\",\\"numItems\\":(\d+)', pageHtml)>
|
||||
<!--- DoorDash embeds menu data as escaped JSON in script tags --->
|
||||
<!--- The backslash-quote delimiter used throughout --->
|
||||
<cfset BQ = '\"'><!--- literal backslash-quote as it appears in the HTML --->
|
||||
|
||||
<!--- Build an image map from StorePageCarouselItem entries (these have imgUrl) --->
|
||||
<cfset ddImageMap = structNew()>
|
||||
<cfset ddCarouselParts = listToArray(pageHtml, BQ & "__typename" & BQ & ":" & BQ & "StorePageCarouselItem" & BQ & "," & BQ & "id" & BQ & ":" & BQ)>
|
||||
<cfloop from="2" to="#arrayLen(ddCarouselParts)#" index="cpIdx">
|
||||
<cfset cp = ddCarouselParts[cpIdx]>
|
||||
<!--- Extract name --->
|
||||
<cfset cpNameStart = findNoCase(BQ & "name" & BQ & ":" & BQ, cp)>
|
||||
<cfif cpNameStart GT 0>
|
||||
<cfset cpNameStart = cpNameStart + len(BQ & "name" & BQ & ":" & BQ)>
|
||||
<cfset cpNameEnd = find(BQ, cp, cpNameStart)>
|
||||
<cfif cpNameEnd GT cpNameStart>
|
||||
<cfset cpName = mid(cp, cpNameStart, cpNameEnd - cpNameStart)>
|
||||
<!--- Extract imgUrl --->
|
||||
<cfset cpImgStart = findNoCase(BQ & "imgUrl" & BQ & ":" & BQ, cp)>
|
||||
<cfif cpImgStart GT 0>
|
||||
<cfset cpImgStart = cpImgStart + len(BQ & "imgUrl" & BQ & ":" & BQ)>
|
||||
<cfset cpImgEnd = find(BQ, cp, cpImgStart)>
|
||||
<cfif cpImgEnd GT cpImgStart>
|
||||
<cfset cpImgUrl = mid(cp, cpImgStart, cpImgEnd - cpImgStart)>
|
||||
<cfif len(cpImgUrl) AND cpImgUrl NEQ "null">
|
||||
<!--- Upgrade to larger size --->
|
||||
<cfif findNoCase("width=", cpImgUrl)>
|
||||
<cfset cpImgUrl = reReplaceNoCase(cpImgUrl, 'width=\d+', 'width=600')>
|
||||
<cfset cpImgUrl = reReplaceNoCase(cpImgUrl, 'height=\d+', 'height=600')>
|
||||
</cfif>
|
||||
<cfset ddImageMap[cpName] = cpImgUrl>
|
||||
</cfif>
|
||||
</cfif>
|
||||
</cfif>
|
||||
</cfif>
|
||||
</cfif>
|
||||
</cfloop>
|
||||
<cfset arrayAppend(response.steps, "Built image map with " & structCount(ddImageMap) & " entries from carousel")>
|
||||
|
||||
<!--- Extract full menu from MenuPageItemList (categories) and MenuPageItem (items) --->
|
||||
<cfset ddCategories = arrayNew(1)>
|
||||
<cfset ddCatSeen = structNew()>
|
||||
<cfloop array="#ddCatMatches#" index="ddCatMatch">
|
||||
<cfset ddCatName = reReplaceNoCase(ddCatMatch, '.*\\"name\\":\\"([^\\"]+)\\".*', '\1')>
|
||||
<cfset ddCatName = replace(ddCatName, '\u0026', '&', 'all')>
|
||||
<cfset ddCatName = replace(ddCatName, '&', '&', 'all')>
|
||||
<cfif NOT structKeyExists(ddCatSeen, ddCatName) AND ddCatName NEQ "Most Ordered">
|
||||
<cfset ddCatSeen[ddCatName] = true>
|
||||
<cfset arrayAppend(ddCategories, { "name": ddCatName, "parentCategoryName": "" })>
|
||||
</cfif>
|
||||
</cfloop>
|
||||
<cfset arrayAppend(response.steps, "Found " & arrayLen(ddCategories) & " DoorDash categories")>
|
||||
|
||||
<!--- Extract items from StorePageCarouselItem --->
|
||||
<cfset ddItemMatches = reMatchNoCase('\\"__typename\\":\\"StorePageCarouselItem\\",\\"id\\":\\"(\d+)\\",\\"name\\":\\"([^\\"]+)\\",\\"description\\":\\"([^\\"]*)\\",\\"displayPrice\\":\\"([^\\"]*)\\",\\"displayStrikethroughPrice\\":\\"[^\\"]*\\",\\"imgUrl\\":\\"([^\\"]*?)\\"', pageHtml)>
|
||||
<cfset ddItems = arrayNew(1)>
|
||||
<cfset ddItemSeen = structNew()>
|
||||
<cfloop array="#ddItemMatches#" index="ddItemMatch">
|
||||
<cfset ddItemName = reReplaceNoCase(ddItemMatch, '.*\\"name\\":\\"([^\\"]+)\\".*', '\1')>
|
||||
<cfset ddItemName = replace(ddItemName, '\u0026', '&', 'all')>
|
||||
<cfset ddItemName = replace(ddItemName, '&', '&', 'all')>
|
||||
<cfif structKeyExists(ddItemSeen, ddItemName)>
|
||||
<cfset ddItemCounter = 0>
|
||||
|
||||
<!--- Split on MenuPageItemList to get category sections --->
|
||||
<cfset ddCatParts = listToArray(pageHtml, BQ & "__typename" & BQ & ":" & BQ & "MenuPageItemList" & BQ & "," & BQ & "id" & BQ & ":" & BQ)>
|
||||
<cfloop from="2" to="#arrayLen(ddCatParts)#" index="catIdx">
|
||||
<cfset catPart = ddCatParts[catIdx]>
|
||||
|
||||
<!--- Extract category name --->
|
||||
<cfset catNameStart = findNoCase(BQ & "name" & BQ & ":" & BQ, catPart)>
|
||||
<cfif catNameStart EQ 0><cfcontinue></cfif>
|
||||
<cfset catNameStart = catNameStart + len(BQ & "name" & BQ & ":" & BQ)>
|
||||
<cfset catNameEnd = find(BQ, catPart, catNameStart)>
|
||||
<cfif catNameEnd LTE catNameStart><cfcontinue></cfif>
|
||||
<cfset catName = mid(catPart, catNameStart, catNameEnd - catNameStart)>
|
||||
<cfset catName = replace(catName, '\u0026', '&', 'all')>
|
||||
<cfset catName = replace(catName, '&', '&', 'all')>
|
||||
|
||||
<!--- Skip "Most Ordered" and duplicates --->
|
||||
<cfif catName EQ "Most Ordered" OR structKeyExists(ddCatSeen, catName)>
|
||||
<cfcontinue>
|
||||
</cfif>
|
||||
<cfset ddItemSeen[ddItemName] = true>
|
||||
<cfset ddCatSeen[catName] = true>
|
||||
<cfset arrayAppend(ddCategories, { "name": catName, "parentCategoryName": "" })>
|
||||
|
||||
<cfset ddItemDesc = reReplaceNoCase(ddItemMatch, '.*\\"description\\":\\"([^\\"]*)\\"\s*,\s*\\"displayPrice.*', '\1')>
|
||||
<cfset ddItemDesc = replace(ddItemDesc, '\u0026', '&', 'all')>
|
||||
<!--- Extract items within this category section --->
|
||||
<cfset itemParts = listToArray(catPart, BQ & "__typename" & BQ & ":" & BQ & "MenuPageItem" & BQ & "," & BQ & "id" & BQ & ":" & BQ)>
|
||||
<cfloop from="2" to="#arrayLen(itemParts)#" index="ipIdx">
|
||||
<cfset ip = itemParts[ipIdx]>
|
||||
|
||||
<cfset ddItemPrice = reReplaceNoCase(ddItemMatch, '.*\\"displayPrice\\":\\"([^\\"]*)\\"\s*,.*', '\1')>
|
||||
<cfset ddItemPrice = reReplace(ddItemPrice, '[^0-9.]', '', 'all')>
|
||||
<!--- Extract item name --->
|
||||
<cfset ipNameStart = findNoCase(BQ & "name" & BQ & ":" & BQ, ip)>
|
||||
<cfif ipNameStart EQ 0><cfcontinue></cfif>
|
||||
<cfset ipNameStart = ipNameStart + len(BQ & "name" & BQ & ":" & BQ)>
|
||||
<cfset ipNameEnd = find(BQ, ip, ipNameStart)>
|
||||
<cfif ipNameEnd LTE ipNameStart><cfcontinue></cfif>
|
||||
<cfset ipName = mid(ip, ipNameStart, ipNameEnd - ipNameStart)>
|
||||
<cfset ipName = replace(ipName, '\u0026', '&', 'all')>
|
||||
|
||||
<cfset ddItemImg = reReplaceNoCase(ddItemMatch, '.*\\"imgUrl\\":\\"([^\\"]*)\\"\s*,?.*', '\1')>
|
||||
<!--- Upgrade image to larger size --->
|
||||
<cfif len(ddItemImg) AND findNoCase("width=", ddItemImg)>
|
||||
<cfset ddItemImg = reReplaceNoCase(ddItemImg, 'width=\d+', 'width=600')>
|
||||
<cfset ddItemImg = reReplaceNoCase(ddItemImg, 'height=\d+', 'height=600')>
|
||||
</cfif>
|
||||
<!--- Skip duplicates --->
|
||||
<cfif structKeyExists(ddItemSeen, ipName)><cfcontinue></cfif>
|
||||
<cfset ddItemSeen[ipName] = true>
|
||||
|
||||
<cfset ddItem = structNew()>
|
||||
<cfset ddItem["name"] = ddItemName>
|
||||
<cfset ddItem["description"] = ddItemDesc>
|
||||
<cfset ddItem["price"] = val(ddItemPrice)>
|
||||
<cfset ddItem["imageUrl"] = ddItemImg>
|
||||
<cfset ddItem["imageSrc"] = ddItemImg>
|
||||
<cfif len(ddItemImg)>
|
||||
<cfset ddItem["imageFilename"] = listLast(ddItemImg, "/")>
|
||||
</cfif>
|
||||
<cfset ddItem["category"] = "">
|
||||
<cfset ddItem["modifiers"] = arrayNew(1)>
|
||||
<cfset ddItem["id"] = "item_" & arrayLen(ddItems) + 1>
|
||||
<cfset arrayAppend(ddItems, ddItem)>
|
||||
<!--- Extract description --->
|
||||
<cfset ipDesc = "">
|
||||
<cfset ipDescStart = findNoCase(BQ & "description" & BQ & ":" & BQ, ip)>
|
||||
<cfif ipDescStart GT 0>
|
||||
<cfset ipDescStart = ipDescStart + len(BQ & "description" & BQ & ":" & BQ)>
|
||||
<cfset ipDescEnd = find(BQ, ip, ipDescStart)>
|
||||
<cfif ipDescEnd GT ipDescStart>
|
||||
<cfset ipDesc = mid(ip, ipDescStart, ipDescEnd - ipDescStart)>
|
||||
<cfset ipDesc = replace(ipDesc, '\u0026', '&', 'all')>
|
||||
</cfif>
|
||||
</cfif>
|
||||
|
||||
<!--- Extract displayPrice --->
|
||||
<cfset ipPrice = 0>
|
||||
<cfset ipPriceStart = findNoCase(BQ & "displayPrice" & BQ & ":" & BQ, ip)>
|
||||
<cfif ipPriceStart GT 0>
|
||||
<cfset ipPriceStart = ipPriceStart + len(BQ & "displayPrice" & BQ & ":" & BQ)>
|
||||
<cfset ipPriceEnd = find(BQ, ip, ipPriceStart)>
|
||||
<cfif ipPriceEnd GT ipPriceStart>
|
||||
<cfset ipPriceStr = mid(ip, ipPriceStart, ipPriceEnd - ipPriceStart)>
|
||||
<cfset ipPriceStr = reReplace(ipPriceStr, '[^0-9.]', '', 'all')>
|
||||
<cfset ipPrice = val(ipPriceStr)>
|
||||
</cfif>
|
||||
</cfif>
|
||||
|
||||
<!--- Look up image from carousel image map --->
|
||||
<cfset ipImg = structKeyExists(ddImageMap, ipName) ? ddImageMap[ipName] : "">
|
||||
|
||||
<!--- Also check for imgUrl directly on this MenuPageItem --->
|
||||
<cfif NOT len(ipImg)>
|
||||
<cfset ipImgStart = findNoCase(BQ & "imgUrl" & BQ & ":" & BQ, ip)>
|
||||
<cfif ipImgStart GT 0>
|
||||
<cfset ipImgStart = ipImgStart + len(BQ & "imgUrl" & BQ & ":" & BQ)>
|
||||
<cfset ipImgEnd = find(BQ, ip, ipImgStart)>
|
||||
<cfif ipImgEnd GT ipImgStart>
|
||||
<cfset ipImg = mid(ip, ipImgStart, ipImgEnd - ipImgStart)>
|
||||
<cfif ipImg EQ "null"><cfset ipImg = ""></cfif>
|
||||
<cfif len(ipImg) AND findNoCase("width=", ipImg)>
|
||||
<cfset ipImg = reReplaceNoCase(ipImg, 'width=\d+', 'width=600')>
|
||||
<cfset ipImg = reReplaceNoCase(ipImg, 'height=\d+', 'height=600')>
|
||||
</cfif>
|
||||
</cfif>
|
||||
</cfif>
|
||||
</cfif>
|
||||
|
||||
<cfset ddItemCounter = ddItemCounter + 1>
|
||||
<cfset ddItem = structNew()>
|
||||
<cfset ddItem["name"] = ipName>
|
||||
<cfset ddItem["description"] = ipDesc>
|
||||
<cfset ddItem["price"] = ipPrice>
|
||||
<cfset ddItem["category"] = catName>
|
||||
<cfset ddItem["modifiers"] = arrayNew(1)>
|
||||
<cfset ddItem["id"] = "item_" & ddItemCounter>
|
||||
<cfset ddItem["imageUrl"] = ipImg>
|
||||
<cfset ddItem["imageSrc"] = ipImg>
|
||||
<cfif len(ipImg)>
|
||||
<cfset ddItem["imageFilename"] = listLast(ipImg, "/")>
|
||||
</cfif>
|
||||
<cfset arrayAppend(ddItems, ddItem)>
|
||||
</cfloop>
|
||||
</cfloop>
|
||||
<cfset arrayAppend(response.steps, "Found " & arrayLen(ddItems) & " DoorDash items with images")>
|
||||
|
||||
<!--- Try to assign categories to items using category button labels from HTML --->
|
||||
<!--- DoorDash renders category sections with aria-labels matching category names --->
|
||||
<!--- Items in the carousel don't have explicit category assignment, so use Claude for that --->
|
||||
<cfset ddItemsWithImg = 0>
|
||||
<cfloop array="#ddItems#" index="ddi">
|
||||
<cfif len(ddi.imageUrl)><cfset ddItemsWithImg = ddItemsWithImg + 1></cfif>
|
||||
</cfloop>
|
||||
<cfset arrayAppend(response.steps, "Found " & arrayLen(ddCategories) & " categories, " & arrayLen(ddItems) & " items (" & ddItemsWithImg & " with images)")>
|
||||
|
||||
<!--- Extract business info --->
|
||||
<cfset ddBusiness = structNew()>
|
||||
<!--- Business name from title or og:title --->
|
||||
<cfset ddTitleMatch = reMatchNoCase('<title>([^<]+)</title>', pageHtml)>
|
||||
<cfif arrayLen(ddTitleMatch)>
|
||||
<cfset ddTitle = reReplaceNoCase(ddTitleMatch[1], '<title>([^<]+)</title>', '\1')>
|
||||
|
|
@ -1225,8 +1315,6 @@
|
|||
<cfset ddBusiness["name"] = ddTitle>
|
||||
</cfif>
|
||||
</cfif>
|
||||
|
||||
<!--- Address from StoreHeaderAddress --->
|
||||
<cfset ddAddrMatch = reMatchNoCase('\\"__typename\\":\\"StoreHeaderAddress\\",\\"street\\":\\"([^\\"]+)\\",\\"displayAddress\\":\\"([^\\"]+)\\"', pageHtml)>
|
||||
<cfif arrayLen(ddAddrMatch)>
|
||||
<cfset ddAddr = reReplaceNoCase(ddAddrMatch[1], '.*\\"displayAddress\\":\\"([^\\"]+)\\".*', '\1')>
|
||||
|
|
@ -1234,61 +1322,6 @@
|
|||
</cfif>
|
||||
|
||||
<cfif arrayLen(ddItems) GT 0>
|
||||
<!--- Use Claude to assign categories to items --->
|
||||
<cfset ddCatNames = arrayNew(1)>
|
||||
<cfloop array="#ddCategories#" index="ddCat">
|
||||
<cfset arrayAppend(ddCatNames, ddCat.name)>
|
||||
</cfloop>
|
||||
<cfset ddItemNames = arrayNew(1)>
|
||||
<cfloop array="#ddItems#" index="ddI">
|
||||
<cfset arrayAppend(ddItemNames, ddI.name & " - " & left(ddI.description, 60))>
|
||||
</cfloop>
|
||||
|
||||
<cfset ddCatPrompt = "Given these restaurant menu categories: " & arrayToList(ddCatNames, ", ") & chr(10) & chr(10) & "Assign each of these items to the best matching category. Return ONLY a JSON array of category names in the same order as the items:" & chr(10) & serializeJSON(ddItemNames)>
|
||||
|
||||
<cfset ddCatRequest = {
|
||||
"model": "claude-sonnet-4-20250514",
|
||||
"max_tokens": 4096,
|
||||
"temperature": 0,
|
||||
"messages": [{ "role": "user", "content": ddCatPrompt }]
|
||||
}>
|
||||
|
||||
<cftry>
|
||||
<cfhttp url="https://api.anthropic.com/v1/messages" method="POST" timeout="60" result="ddCatResult">
|
||||
<cfhttpparam type="header" name="Content-Type" value="application/json">
|
||||
<cfhttpparam type="header" name="x-api-key" value="#CLAUDE_API_KEY#">
|
||||
<cfhttpparam type="header" name="anthropic-version" value="2023-06-01">
|
||||
<cfhttpparam type="body" value="#serializeJSON(ddCatRequest)#">
|
||||
</cfhttp>
|
||||
|
||||
<cfif findNoCase("200", ddCatResult.statusCode)>
|
||||
<cfset ddCatResponse = deserializeJSON(ddCatResult.fileContent)>
|
||||
<cfset ddCatText = ddCatResponse.content[1].text>
|
||||
<!--- Strip code fences --->
|
||||
<cfset ddCatText = trim(ddCatText)>
|
||||
<cfif left(ddCatText, 7) EQ "```json">
|
||||
<cfset ddCatText = mid(ddCatText, 8, len(ddCatText) - 7)>
|
||||
</cfif>
|
||||
<cfif left(ddCatText, 3) EQ "```">
|
||||
<cfset ddCatText = mid(ddCatText, 4, len(ddCatText) - 3)>
|
||||
</cfif>
|
||||
<cfif right(ddCatText, 3) EQ "```">
|
||||
<cfset ddCatText = left(ddCatText, len(ddCatText) - 3)>
|
||||
</cfif>
|
||||
<cfset ddCatText = trim(ddCatText)>
|
||||
<cfset ddCatAssignments = deserializeJSON(ddCatText)>
|
||||
<cfif isArray(ddCatAssignments) AND arrayLen(ddCatAssignments) EQ arrayLen(ddItems)>
|
||||
<cfloop from="1" to="#arrayLen(ddItems)#" index="ddIdx">
|
||||
<cfset ddItems[ddIdx]["category"] = ddCatAssignments[ddIdx]>
|
||||
</cfloop>
|
||||
<cfset arrayAppend(response.steps, "Claude assigned categories to all items")>
|
||||
</cfif>
|
||||
</cfif>
|
||||
<cfcatch>
|
||||
<cfset arrayAppend(response.steps, "Category assignment failed: " & cfcatch.message)>
|
||||
</cfcatch>
|
||||
</cftry>
|
||||
|
||||
<!--- Build image URL list --->
|
||||
<cfset ddImageUrls = arrayNew(1)>
|
||||
<cfloop array="#ddItems#" index="ddI">
|
||||
|
|
|
|||
Reference in a new issue