Fix DoorDash parser: use find() loops instead of listToArray

listToArray treats delimiter as individual chars, not a string.
Rewritten to use position-based find() traversal for proper
multi-character delimiter splitting.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
John Mizerek 2026-03-10 12:23:21 -07:00
parent 33040c9cd3
commit a830a0820a

View file

@ -1152,127 +1152,160 @@
<cfset arrayAppend(response.steps, "DoorDash/order.online site detected - extracting embedded data")>
<cftry>
<!--- DoorDash embeds menu data as escaped JSON in script tags --->
<!--- The backslash-quote delimiter used throughout --->
<cfset BQ = '\"'><!--- literal backslash-quote as it appears in the HTML --->
<!--- Backslash-quote as it appears in the HTML: chr(92) & chr(34) --->
<cfset BQ = chr(92) & chr(34)>
<!--- Build an image map from StorePageCarouselItem entries (these have imgUrl) --->
<!--- Helper function: extract value after a \"key\":\" pattern --->
<!--- Returns text between the opening \" and closing \" --->
<!--- Build image map from StorePageCarouselItem entries --->
<cfset ddImageMap = structNew()>
<cfset ddCarouselParts = listToArray(pageHtml, BQ & "__typename" & BQ & ":" & BQ & "StorePageCarouselItem" & BQ & "," & BQ & "id" & BQ & ":" & BQ)>
<cfloop from="2" to="#arrayLen(ddCarouselParts)#" index="cpIdx">
<cfset cp = ddCarouselParts[cpIdx]>
<cfset carouselMarker = BQ & "__typename" & BQ & ":" & BQ & "StorePageCarouselItem" & BQ>
<cfset searchPos = 1>
<cfloop condition="true">
<cfset searchPos = findNoCase(carouselMarker, pageHtml, searchPos)>
<cfif searchPos EQ 0><cfbreak></cfif>
<!--- Find the end of this entry (next typename marker or reasonable limit) --->
<cfset nextMarker = findNoCase(BQ & "__typename" & BQ, pageHtml, searchPos + len(carouselMarker))>
<cfif nextMarker EQ 0><cfset nextMarker = len(pageHtml)></cfif>
<cfset entryText = mid(pageHtml, searchPos, nextMarker - searchPos)>
<!--- Extract name --->
<cfset cpNameStart = findNoCase(BQ & "name" & BQ & ":" & BQ, cp)>
<cfif cpNameStart GT 0>
<cfset cpNameStart = cpNameStart + len(BQ & "name" & BQ & ":" & BQ)>
<cfset cpNameEnd = find(BQ, cp, cpNameStart)>
<cfif cpNameEnd GT cpNameStart>
<cfset cpName = mid(cp, cpNameStart, cpNameEnd - cpNameStart)>
<cfset nameKey = BQ & "name" & BQ & ":" & BQ>
<cfset nPos = findNoCase(nameKey, entryText)>
<cfif nPos GT 0>
<cfset nStart = nPos + len(nameKey)>
<cfset nEnd = find(BQ, entryText, nStart)>
<cfif nEnd GT nStart>
<cfset cpName = mid(entryText, nStart, nEnd - nStart)>
<!--- Extract imgUrl --->
<cfset cpImgStart = findNoCase(BQ & "imgUrl" & BQ & ":" & BQ, cp)>
<cfif cpImgStart GT 0>
<cfset cpImgStart = cpImgStart + len(BQ & "imgUrl" & BQ & ":" & BQ)>
<cfset cpImgEnd = find(BQ, cp, cpImgStart)>
<cfif cpImgEnd GT cpImgStart>
<cfset cpImgUrl = mid(cp, cpImgStart, cpImgEnd - cpImgStart)>
<cfif len(cpImgUrl) AND cpImgUrl NEQ "null">
<!--- Upgrade to larger size --->
<cfif findNoCase("width=", cpImgUrl)>
<cfset cpImgUrl = reReplaceNoCase(cpImgUrl, 'width=\d+', 'width=600')>
<cfset cpImgUrl = reReplaceNoCase(cpImgUrl, 'height=\d+', 'height=600')>
<cfset imgKey = BQ & "imgUrl" & BQ & ":" & BQ>
<cfset iPos = findNoCase(imgKey, entryText)>
<cfif iPos GT 0>
<cfset iStart = iPos + len(imgKey)>
<cfset iEnd = find(BQ, entryText, iStart)>
<cfif iEnd GT iStart>
<cfset cpImg = mid(entryText, iStart, iEnd - iStart)>
<cfif len(cpImg) AND cpImg NEQ "null" AND findNoCase("http", cpImg)>
<cfif findNoCase("width=", cpImg)>
<cfset cpImg = reReplaceNoCase(cpImg, 'width=\d+', 'width=600')>
<cfset cpImg = reReplaceNoCase(cpImg, 'height=\d+', 'height=600')>
</cfif>
<cfset ddImageMap[cpName] = cpImgUrl>
<cfset ddImageMap[cpName] = cpImg>
</cfif>
</cfif>
</cfif>
</cfif>
</cfif>
<cfset searchPos = searchPos + len(carouselMarker)>
</cfloop>
<cfset arrayAppend(response.steps, "Built image map with " & structCount(ddImageMap) & " entries from carousel")>
<!--- Extract full menu from MenuPageItemList (categories) and MenuPageItem (items) --->
<!--- Extract full menu from MenuPageItemList (categories with items) --->
<cfset ddCategories = arrayNew(1)>
<cfset ddCatSeen = structNew()>
<cfset ddItems = arrayNew(1)>
<cfset ddItemSeen = structNew()>
<cfset ddItemCounter = 0>
<!--- Split on MenuPageItemList to get category sections --->
<cfset ddCatParts = listToArray(pageHtml, BQ & "__typename" & BQ & ":" & BQ & "MenuPageItemList" & BQ & "," & BQ & "id" & BQ & ":" & BQ)>
<cfloop from="2" to="#arrayLen(ddCatParts)#" index="catIdx">
<cfset catPart = ddCatParts[catIdx]>
<cfset catMarker = BQ & "__typename" & BQ & ":" & BQ & "MenuPageItemList" & BQ>
<cfset itemMarker = BQ & "__typename" & BQ & ":" & BQ & "MenuPageItem" & BQ>
<cfset nameKey = BQ & "name" & BQ & ":" & BQ>
<cfset descKey = BQ & "description" & BQ & ":" & BQ>
<cfset priceKey = BQ & "displayPrice" & BQ & ":" & BQ>
<cfset imgKey = BQ & "imgUrl" & BQ & ":" & BQ>
<!--- Find each MenuPageItemList section --->
<cfset catPos = 1>
<cfloop condition="true">
<cfset catPos = findNoCase(catMarker, pageHtml, catPos)>
<cfif catPos EQ 0><cfbreak></cfif>
<!--- Find end of this category section (next MenuPageItemList or end) --->
<cfset nextCatPos = findNoCase(catMarker, pageHtml, catPos + len(catMarker))>
<cfif nextCatPos EQ 0><cfset nextCatPos = len(pageHtml)></cfif>
<cfset catSection = mid(pageHtml, catPos, nextCatPos - catPos)>
<!--- Extract category name --->
<cfset catNameStart = findNoCase(BQ & "name" & BQ & ":" & BQ, catPart)>
<cfif catNameStart EQ 0><cfcontinue></cfif>
<cfset catNameStart = catNameStart + len(BQ & "name" & BQ & ":" & BQ)>
<cfset catNameEnd = find(BQ, catPart, catNameStart)>
<cfif catNameEnd LTE catNameStart><cfcontinue></cfif>
<cfset catName = mid(catPart, catNameStart, catNameEnd - catNameStart)>
<cfset cnPos = findNoCase(nameKey, catSection)>
<cfif cnPos EQ 0><cfset catPos = catPos + len(catMarker)><cfcontinue></cfif>
<cfset cnStart = cnPos + len(nameKey)>
<cfset cnEnd = find(BQ, catSection, cnStart)>
<cfif cnEnd LTE cnStart><cfset catPos = catPos + len(catMarker)><cfcontinue></cfif>
<cfset catName = mid(catSection, cnStart, cnEnd - cnStart)>
<cfset catName = replace(catName, '\u0026', '&', 'all')>
<cfset catName = replace(catName, '&amp;', '&', 'all')>
<!--- Skip "Most Ordered" and duplicates --->
<cfif catName EQ "Most Ordered" OR structKeyExists(ddCatSeen, catName)>
<cfset catPos = catPos + len(catMarker)>
<cfcontinue>
</cfif>
<cfset ddCatSeen[catName] = true>
<cfset arrayAppend(ddCategories, { "name": catName, "parentCategoryName": "" })>
<!--- Extract items within this category section --->
<cfset itemParts = listToArray(catPart, BQ & "__typename" & BQ & ":" & BQ & "MenuPageItem" & BQ & "," & BQ & "id" & BQ & ":" & BQ)>
<cfloop from="2" to="#arrayLen(itemParts)#" index="ipIdx">
<cfset ip = itemParts[ipIdx]>
<!--- Find all MenuPageItem entries within this category section --->
<cfset itemPos = 1>
<cfloop condition="true">
<cfset itemPos = findNoCase(itemMarker, catSection, itemPos)>
<cfif itemPos EQ 0><cfbreak></cfif>
<!--- Find end of this item entry --->
<cfset nextItemPos = findNoCase(itemMarker, catSection, itemPos + len(itemMarker))>
<cfif nextItemPos EQ 0><cfset nextItemPos = len(catSection)></cfif>
<cfset itemEntry = mid(catSection, itemPos, nextItemPos - itemPos)>
<!--- Extract item name --->
<cfset ipNameStart = findNoCase(BQ & "name" & BQ & ":" & BQ, ip)>
<cfif ipNameStart EQ 0><cfcontinue></cfif>
<cfset ipNameStart = ipNameStart + len(BQ & "name" & BQ & ":" & BQ)>
<cfset ipNameEnd = find(BQ, ip, ipNameStart)>
<cfif ipNameEnd LTE ipNameStart><cfcontinue></cfif>
<cfset ipName = mid(ip, ipNameStart, ipNameEnd - ipNameStart)>
<cfset inPos = findNoCase(nameKey, itemEntry)>
<cfif inPos EQ 0><cfset itemPos = itemPos + len(itemMarker)><cfcontinue></cfif>
<cfset inStart = inPos + len(nameKey)>
<cfset inEnd = find(BQ, itemEntry, inStart)>
<cfif inEnd LTE inStart><cfset itemPos = itemPos + len(itemMarker)><cfcontinue></cfif>
<cfset ipName = mid(itemEntry, inStart, inEnd - inStart)>
<cfset ipName = replace(ipName, '\u0026', '&', 'all')>
<!--- Skip duplicates --->
<cfif structKeyExists(ddItemSeen, ipName)><cfcontinue></cfif>
<cfif structKeyExists(ddItemSeen, ipName)>
<cfset itemPos = itemPos + len(itemMarker)>
<cfcontinue>
</cfif>
<cfset ddItemSeen[ipName] = true>
<!--- Extract description --->
<cfset ipDesc = "">
<cfset ipDescStart = findNoCase(BQ & "description" & BQ & ":" & BQ, ip)>
<cfif ipDescStart GT 0>
<cfset ipDescStart = ipDescStart + len(BQ & "description" & BQ & ":" & BQ)>
<cfset ipDescEnd = find(BQ, ip, ipDescStart)>
<cfif ipDescEnd GT ipDescStart>
<cfset ipDesc = mid(ip, ipDescStart, ipDescEnd - ipDescStart)>
<cfset idPos = findNoCase(descKey, itemEntry)>
<cfif idPos GT 0>
<cfset idStart = idPos + len(descKey)>
<cfset idEnd = find(BQ, itemEntry, idStart)>
<cfif idEnd GT idStart>
<cfset ipDesc = mid(itemEntry, idStart, idEnd - idStart)>
<cfset ipDesc = replace(ipDesc, '\u0026', '&', 'all')>
</cfif>
</cfif>
<!--- Extract displayPrice --->
<cfset ipPrice = 0>
<cfset ipPriceStart = findNoCase(BQ & "displayPrice" & BQ & ":" & BQ, ip)>
<cfif ipPriceStart GT 0>
<cfset ipPriceStart = ipPriceStart + len(BQ & "displayPrice" & BQ & ":" & BQ)>
<cfset ipPriceEnd = find(BQ, ip, ipPriceStart)>
<cfif ipPriceEnd GT ipPriceStart>
<cfset ipPriceStr = mid(ip, ipPriceStart, ipPriceEnd - ipPriceStart)>
<cfset ipPos = findNoCase(priceKey, itemEntry)>
<cfif ipPos GT 0>
<cfset ipStart = ipPos + len(priceKey)>
<cfset ipEnd = find(BQ, itemEntry, ipStart)>
<cfif ipEnd GT ipStart>
<cfset ipPriceStr = mid(itemEntry, ipStart, ipEnd - ipStart)>
<cfset ipPriceStr = reReplace(ipPriceStr, '[^0-9.]', '', 'all')>
<cfset ipPrice = val(ipPriceStr)>
</cfif>
</cfif>
<!--- Look up image from carousel image map --->
<!--- Look up image from carousel map, or check imgUrl on item --->
<cfset ipImg = structKeyExists(ddImageMap, ipName) ? ddImageMap[ipName] : "">
<!--- Also check for imgUrl directly on this MenuPageItem --->
<cfif NOT len(ipImg)>
<cfset ipImgStart = findNoCase(BQ & "imgUrl" & BQ & ":" & BQ, ip)>
<cfif ipImgStart GT 0>
<cfset ipImgStart = ipImgStart + len(BQ & "imgUrl" & BQ & ":" & BQ)>
<cfset ipImgEnd = find(BQ, ip, ipImgStart)>
<cfif ipImgEnd GT ipImgStart>
<cfset ipImg = mid(ip, ipImgStart, ipImgEnd - ipImgStart)>
<cfif ipImg EQ "null"><cfset ipImg = ""></cfif>
<cfset iiPos = findNoCase(imgKey, itemEntry)>
<cfif iiPos GT 0>
<cfset iiStart = iiPos + len(imgKey)>
<cfset iiEnd = find(BQ, itemEntry, iiStart)>
<cfif iiEnd GT iiStart>
<cfset ipImg = mid(itemEntry, iiStart, iiEnd - iiStart)>
<cfif ipImg EQ "null" OR NOT findNoCase("http", ipImg)><cfset ipImg = ""></cfif>
<cfif len(ipImg) AND findNoCase("width=", ipImg)>
<cfset ipImg = reReplaceNoCase(ipImg, 'width=\d+', 'width=600')>
<cfset ipImg = reReplaceNoCase(ipImg, 'height=\d+', 'height=600')>
@ -1295,7 +1328,11 @@
<cfset ddItem["imageFilename"] = listLast(ipImg, "/")>
</cfif>
<cfset arrayAppend(ddItems, ddItem)>
<cfset itemPos = itemPos + len(itemMarker)>
</cfloop>
<cfset catPos = catPos + len(catMarker)>
</cfloop>
<cfset ddItemsWithImg = 0>