Fix DoorDash parser: use find() loops instead of listToArray

listToArray treats delimiter as individual chars, not a string.
Rewritten to use position-based find() traversal for proper
multi-character delimiter splitting.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
John Mizerek 2026-03-10 12:23:21 -07:00
parent 33040c9cd3
commit a830a0820a

View file

@ -1152,127 +1152,160 @@
<cfset arrayAppend(response.steps, "DoorDash/order.online site detected - extracting embedded data")> <cfset arrayAppend(response.steps, "DoorDash/order.online site detected - extracting embedded data")>
<cftry> <cftry>
<!--- DoorDash embeds menu data as escaped JSON in script tags ---> <!--- DoorDash embeds menu data as escaped JSON in script tags --->
<!--- The backslash-quote delimiter used throughout ---> <!--- Backslash-quote as it appears in the HTML: chr(92) & chr(34) --->
<cfset BQ = '\"'><!--- literal backslash-quote as it appears in the HTML ---> <cfset BQ = chr(92) & chr(34)>
<!--- Build an image map from StorePageCarouselItem entries (these have imgUrl) ---> <!--- Helper function: extract value after a \"key\":\" pattern --->
<!--- Returns text between the opening \" and closing \" --->
<!--- Build image map from StorePageCarouselItem entries --->
<cfset ddImageMap = structNew()> <cfset ddImageMap = structNew()>
<cfset ddCarouselParts = listToArray(pageHtml, BQ & "__typename" & BQ & ":" & BQ & "StorePageCarouselItem" & BQ & "," & BQ & "id" & BQ & ":" & BQ)> <cfset carouselMarker = BQ & "__typename" & BQ & ":" & BQ & "StorePageCarouselItem" & BQ>
<cfloop from="2" to="#arrayLen(ddCarouselParts)#" index="cpIdx"> <cfset searchPos = 1>
<cfset cp = ddCarouselParts[cpIdx]> <cfloop condition="true">
<cfset searchPos = findNoCase(carouselMarker, pageHtml, searchPos)>
<cfif searchPos EQ 0><cfbreak></cfif>
<!--- Find the end of this entry (next typename marker or reasonable limit) --->
<cfset nextMarker = findNoCase(BQ & "__typename" & BQ, pageHtml, searchPos + len(carouselMarker))>
<cfif nextMarker EQ 0><cfset nextMarker = len(pageHtml)></cfif>
<cfset entryText = mid(pageHtml, searchPos, nextMarker - searchPos)>
<!--- Extract name ---> <!--- Extract name --->
<cfset cpNameStart = findNoCase(BQ & "name" & BQ & ":" & BQ, cp)> <cfset nameKey = BQ & "name" & BQ & ":" & BQ>
<cfif cpNameStart GT 0> <cfset nPos = findNoCase(nameKey, entryText)>
<cfset cpNameStart = cpNameStart + len(BQ & "name" & BQ & ":" & BQ)> <cfif nPos GT 0>
<cfset cpNameEnd = find(BQ, cp, cpNameStart)> <cfset nStart = nPos + len(nameKey)>
<cfif cpNameEnd GT cpNameStart> <cfset nEnd = find(BQ, entryText, nStart)>
<cfset cpName = mid(cp, cpNameStart, cpNameEnd - cpNameStart)> <cfif nEnd GT nStart>
<cfset cpName = mid(entryText, nStart, nEnd - nStart)>
<!--- Extract imgUrl ---> <!--- Extract imgUrl --->
<cfset cpImgStart = findNoCase(BQ & "imgUrl" & BQ & ":" & BQ, cp)> <cfset imgKey = BQ & "imgUrl" & BQ & ":" & BQ>
<cfif cpImgStart GT 0> <cfset iPos = findNoCase(imgKey, entryText)>
<cfset cpImgStart = cpImgStart + len(BQ & "imgUrl" & BQ & ":" & BQ)> <cfif iPos GT 0>
<cfset cpImgEnd = find(BQ, cp, cpImgStart)> <cfset iStart = iPos + len(imgKey)>
<cfif cpImgEnd GT cpImgStart> <cfset iEnd = find(BQ, entryText, iStart)>
<cfset cpImgUrl = mid(cp, cpImgStart, cpImgEnd - cpImgStart)> <cfif iEnd GT iStart>
<cfif len(cpImgUrl) AND cpImgUrl NEQ "null"> <cfset cpImg = mid(entryText, iStart, iEnd - iStart)>
<!--- Upgrade to larger size ---> <cfif len(cpImg) AND cpImg NEQ "null" AND findNoCase("http", cpImg)>
<cfif findNoCase("width=", cpImgUrl)> <cfif findNoCase("width=", cpImg)>
<cfset cpImgUrl = reReplaceNoCase(cpImgUrl, 'width=\d+', 'width=600')> <cfset cpImg = reReplaceNoCase(cpImg, 'width=\d+', 'width=600')>
<cfset cpImgUrl = reReplaceNoCase(cpImgUrl, 'height=\d+', 'height=600')> <cfset cpImg = reReplaceNoCase(cpImg, 'height=\d+', 'height=600')>
</cfif> </cfif>
<cfset ddImageMap[cpName] = cpImgUrl> <cfset ddImageMap[cpName] = cpImg>
</cfif> </cfif>
</cfif> </cfif>
</cfif> </cfif>
</cfif> </cfif>
</cfif> </cfif>
<cfset searchPos = searchPos + len(carouselMarker)>
</cfloop> </cfloop>
<cfset arrayAppend(response.steps, "Built image map with " & structCount(ddImageMap) & " entries from carousel")> <cfset arrayAppend(response.steps, "Built image map with " & structCount(ddImageMap) & " entries from carousel")>
<!--- Extract full menu from MenuPageItemList (categories) and MenuPageItem (items) ---> <!--- Extract full menu from MenuPageItemList (categories with items) --->
<cfset ddCategories = arrayNew(1)> <cfset ddCategories = arrayNew(1)>
<cfset ddCatSeen = structNew()> <cfset ddCatSeen = structNew()>
<cfset ddItems = arrayNew(1)> <cfset ddItems = arrayNew(1)>
<cfset ddItemSeen = structNew()> <cfset ddItemSeen = structNew()>
<cfset ddItemCounter = 0> <cfset ddItemCounter = 0>
<!--- Split on MenuPageItemList to get category sections ---> <cfset catMarker = BQ & "__typename" & BQ & ":" & BQ & "MenuPageItemList" & BQ>
<cfset ddCatParts = listToArray(pageHtml, BQ & "__typename" & BQ & ":" & BQ & "MenuPageItemList" & BQ & "," & BQ & "id" & BQ & ":" & BQ)> <cfset itemMarker = BQ & "__typename" & BQ & ":" & BQ & "MenuPageItem" & BQ>
<cfloop from="2" to="#arrayLen(ddCatParts)#" index="catIdx"> <cfset nameKey = BQ & "name" & BQ & ":" & BQ>
<cfset catPart = ddCatParts[catIdx]> <cfset descKey = BQ & "description" & BQ & ":" & BQ>
<cfset priceKey = BQ & "displayPrice" & BQ & ":" & BQ>
<cfset imgKey = BQ & "imgUrl" & BQ & ":" & BQ>
<!--- Find each MenuPageItemList section --->
<cfset catPos = 1>
<cfloop condition="true">
<cfset catPos = findNoCase(catMarker, pageHtml, catPos)>
<cfif catPos EQ 0><cfbreak></cfif>
<!--- Find end of this category section (next MenuPageItemList or end) --->
<cfset nextCatPos = findNoCase(catMarker, pageHtml, catPos + len(catMarker))>
<cfif nextCatPos EQ 0><cfset nextCatPos = len(pageHtml)></cfif>
<cfset catSection = mid(pageHtml, catPos, nextCatPos - catPos)>
<!--- Extract category name ---> <!--- Extract category name --->
<cfset catNameStart = findNoCase(BQ & "name" & BQ & ":" & BQ, catPart)> <cfset cnPos = findNoCase(nameKey, catSection)>
<cfif catNameStart EQ 0><cfcontinue></cfif> <cfif cnPos EQ 0><cfset catPos = catPos + len(catMarker)><cfcontinue></cfif>
<cfset catNameStart = catNameStart + len(BQ & "name" & BQ & ":" & BQ)> <cfset cnStart = cnPos + len(nameKey)>
<cfset catNameEnd = find(BQ, catPart, catNameStart)> <cfset cnEnd = find(BQ, catSection, cnStart)>
<cfif catNameEnd LTE catNameStart><cfcontinue></cfif> <cfif cnEnd LTE cnStart><cfset catPos = catPos + len(catMarker)><cfcontinue></cfif>
<cfset catName = mid(catPart, catNameStart, catNameEnd - catNameStart)> <cfset catName = mid(catSection, cnStart, cnEnd - cnStart)>
<cfset catName = replace(catName, '\u0026', '&', 'all')> <cfset catName = replace(catName, '\u0026', '&', 'all')>
<cfset catName = replace(catName, '&amp;', '&', 'all')> <cfset catName = replace(catName, '&amp;', '&', 'all')>
<!--- Skip "Most Ordered" and duplicates ---> <!--- Skip "Most Ordered" and duplicates --->
<cfif catName EQ "Most Ordered" OR structKeyExists(ddCatSeen, catName)> <cfif catName EQ "Most Ordered" OR structKeyExists(ddCatSeen, catName)>
<cfset catPos = catPos + len(catMarker)>
<cfcontinue> <cfcontinue>
</cfif> </cfif>
<cfset ddCatSeen[catName] = true> <cfset ddCatSeen[catName] = true>
<cfset arrayAppend(ddCategories, { "name": catName, "parentCategoryName": "" })> <cfset arrayAppend(ddCategories, { "name": catName, "parentCategoryName": "" })>
<!--- Extract items within this category section ---> <!--- Find all MenuPageItem entries within this category section --->
<cfset itemParts = listToArray(catPart, BQ & "__typename" & BQ & ":" & BQ & "MenuPageItem" & BQ & "," & BQ & "id" & BQ & ":" & BQ)> <cfset itemPos = 1>
<cfloop from="2" to="#arrayLen(itemParts)#" index="ipIdx"> <cfloop condition="true">
<cfset ip = itemParts[ipIdx]> <cfset itemPos = findNoCase(itemMarker, catSection, itemPos)>
<cfif itemPos EQ 0><cfbreak></cfif>
<!--- Find end of this item entry --->
<cfset nextItemPos = findNoCase(itemMarker, catSection, itemPos + len(itemMarker))>
<cfif nextItemPos EQ 0><cfset nextItemPos = len(catSection)></cfif>
<cfset itemEntry = mid(catSection, itemPos, nextItemPos - itemPos)>
<!--- Extract item name ---> <!--- Extract item name --->
<cfset ipNameStart = findNoCase(BQ & "name" & BQ & ":" & BQ, ip)> <cfset inPos = findNoCase(nameKey, itemEntry)>
<cfif ipNameStart EQ 0><cfcontinue></cfif> <cfif inPos EQ 0><cfset itemPos = itemPos + len(itemMarker)><cfcontinue></cfif>
<cfset ipNameStart = ipNameStart + len(BQ & "name" & BQ & ":" & BQ)> <cfset inStart = inPos + len(nameKey)>
<cfset ipNameEnd = find(BQ, ip, ipNameStart)> <cfset inEnd = find(BQ, itemEntry, inStart)>
<cfif ipNameEnd LTE ipNameStart><cfcontinue></cfif> <cfif inEnd LTE inStart><cfset itemPos = itemPos + len(itemMarker)><cfcontinue></cfif>
<cfset ipName = mid(ip, ipNameStart, ipNameEnd - ipNameStart)> <cfset ipName = mid(itemEntry, inStart, inEnd - inStart)>
<cfset ipName = replace(ipName, '\u0026', '&', 'all')> <cfset ipName = replace(ipName, '\u0026', '&', 'all')>
<!--- Skip duplicates ---> <!--- Skip duplicates --->
<cfif structKeyExists(ddItemSeen, ipName)><cfcontinue></cfif> <cfif structKeyExists(ddItemSeen, ipName)>
<cfset itemPos = itemPos + len(itemMarker)>
<cfcontinue>
</cfif>
<cfset ddItemSeen[ipName] = true> <cfset ddItemSeen[ipName] = true>
<!--- Extract description ---> <!--- Extract description --->
<cfset ipDesc = ""> <cfset ipDesc = "">
<cfset ipDescStart = findNoCase(BQ & "description" & BQ & ":" & BQ, ip)> <cfset idPos = findNoCase(descKey, itemEntry)>
<cfif ipDescStart GT 0> <cfif idPos GT 0>
<cfset ipDescStart = ipDescStart + len(BQ & "description" & BQ & ":" & BQ)> <cfset idStart = idPos + len(descKey)>
<cfset ipDescEnd = find(BQ, ip, ipDescStart)> <cfset idEnd = find(BQ, itemEntry, idStart)>
<cfif ipDescEnd GT ipDescStart> <cfif idEnd GT idStart>
<cfset ipDesc = mid(ip, ipDescStart, ipDescEnd - ipDescStart)> <cfset ipDesc = mid(itemEntry, idStart, idEnd - idStart)>
<cfset ipDesc = replace(ipDesc, '\u0026', '&', 'all')> <cfset ipDesc = replace(ipDesc, '\u0026', '&', 'all')>
</cfif> </cfif>
</cfif> </cfif>
<!--- Extract displayPrice ---> <!--- Extract displayPrice --->
<cfset ipPrice = 0> <cfset ipPrice = 0>
<cfset ipPriceStart = findNoCase(BQ & "displayPrice" & BQ & ":" & BQ, ip)> <cfset ipPos = findNoCase(priceKey, itemEntry)>
<cfif ipPriceStart GT 0> <cfif ipPos GT 0>
<cfset ipPriceStart = ipPriceStart + len(BQ & "displayPrice" & BQ & ":" & BQ)> <cfset ipStart = ipPos + len(priceKey)>
<cfset ipPriceEnd = find(BQ, ip, ipPriceStart)> <cfset ipEnd = find(BQ, itemEntry, ipStart)>
<cfif ipPriceEnd GT ipPriceStart> <cfif ipEnd GT ipStart>
<cfset ipPriceStr = mid(ip, ipPriceStart, ipPriceEnd - ipPriceStart)> <cfset ipPriceStr = mid(itemEntry, ipStart, ipEnd - ipStart)>
<cfset ipPriceStr = reReplace(ipPriceStr, '[^0-9.]', '', 'all')> <cfset ipPriceStr = reReplace(ipPriceStr, '[^0-9.]', '', 'all')>
<cfset ipPrice = val(ipPriceStr)> <cfset ipPrice = val(ipPriceStr)>
</cfif> </cfif>
</cfif> </cfif>
<!--- Look up image from carousel image map ---> <!--- Look up image from carousel map, or check imgUrl on item --->
<cfset ipImg = structKeyExists(ddImageMap, ipName) ? ddImageMap[ipName] : ""> <cfset ipImg = structKeyExists(ddImageMap, ipName) ? ddImageMap[ipName] : "">
<!--- Also check for imgUrl directly on this MenuPageItem --->
<cfif NOT len(ipImg)> <cfif NOT len(ipImg)>
<cfset ipImgStart = findNoCase(BQ & "imgUrl" & BQ & ":" & BQ, ip)> <cfset iiPos = findNoCase(imgKey, itemEntry)>
<cfif ipImgStart GT 0> <cfif iiPos GT 0>
<cfset ipImgStart = ipImgStart + len(BQ & "imgUrl" & BQ & ":" & BQ)> <cfset iiStart = iiPos + len(imgKey)>
<cfset ipImgEnd = find(BQ, ip, ipImgStart)> <cfset iiEnd = find(BQ, itemEntry, iiStart)>
<cfif ipImgEnd GT ipImgStart> <cfif iiEnd GT iiStart>
<cfset ipImg = mid(ip, ipImgStart, ipImgEnd - ipImgStart)> <cfset ipImg = mid(itemEntry, iiStart, iiEnd - iiStart)>
<cfif ipImg EQ "null"><cfset ipImg = ""></cfif> <cfif ipImg EQ "null" OR NOT findNoCase("http", ipImg)><cfset ipImg = ""></cfif>
<cfif len(ipImg) AND findNoCase("width=", ipImg)> <cfif len(ipImg) AND findNoCase("width=", ipImg)>
<cfset ipImg = reReplaceNoCase(ipImg, 'width=\d+', 'width=600')> <cfset ipImg = reReplaceNoCase(ipImg, 'width=\d+', 'width=600')>
<cfset ipImg = reReplaceNoCase(ipImg, 'height=\d+', 'height=600')> <cfset ipImg = reReplaceNoCase(ipImg, 'height=\d+', 'height=600')>
@ -1295,7 +1328,11 @@
<cfset ddItem["imageFilename"] = listLast(ipImg, "/")> <cfset ddItem["imageFilename"] = listLast(ipImg, "/")>
</cfif> </cfif>
<cfset arrayAppend(ddItems, ddItem)> <cfset arrayAppend(ddItems, ddItem)>
<cfset itemPos = itemPos + len(itemMarker)>
</cfloop> </cfloop>
<cfset catPos = catPos + len(catMarker)>
</cfloop> </cfloop>
<cfset ddItemsWithImg = 0> <cfset ddItemsWithImg = 0>