Add direct Toast menu parsing via __OO_STATE__
Skip Claude AI for Toast menus - parse the embedded JSON directly. This extracts all items, categories, and images from the structured __OO_STATE__ data, which is faster and more complete than AI extraction. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
1b16dd8671
commit
b5abbe43b4
1 changed files with 103 additions and 0 deletions
|
|
@ -68,6 +68,109 @@
|
|||
<cfset playwrightImages = arrayNew(1)>
|
||||
<cfset arrayAppend(response.steps, "Read " & len(pageHtml) & " bytes from local file")>
|
||||
|
||||
<!--- Check for Toast __OO_STATE__ - parse directly, skip Claude --->
|
||||
<cfif findNoCase("window.__OO_STATE__", pageHtml)>
|
||||
<cfset arrayAppend(response.steps, "Toast menu detected - parsing __OO_STATE__ directly")>
|
||||
|
||||
<!--- Extract the JSON using regex --->
|
||||
<cfset ooStateMatch = reMatchNoCase("window\.__OO_STATE__\s*=\s*(\{.*?\});\s*window\.", pageHtml)>
|
||||
<cfif arrayLen(ooStateMatch)>
|
||||
<cfset ooStateJson = reReplaceNoCase(ooStateMatch[1], "window\.__OO_STATE__\s*=\s*", "")>
|
||||
<cfset ooStateJson = reReplace(ooStateJson, ";\s*window\.$", "")>
|
||||
|
||||
<cftry>
|
||||
<cfset ooState = deserializeJSON(ooStateJson)>
|
||||
<cfset arrayAppend(response.steps, "Parsed __OO_STATE__ JSON successfully")>
|
||||
|
||||
<!--- Extract menus, categories, and items --->
|
||||
<cfset toastBusiness = structNew()>
|
||||
<cfset toastCategories = arrayNew(1)>
|
||||
<cfset toastItems = arrayNew(1)>
|
||||
<cfset categorySet = structNew()>
|
||||
<cfset itemId = 1>
|
||||
|
||||
<cfloop collection="#ooState#" item="key">
|
||||
<cfif left(key, 5) EQ "Menu:">
|
||||
<cfset menu = ooState[key]>
|
||||
<cfif structKeyExists(menu, "groups") AND isArray(menu.groups)>
|
||||
<cfloop array="#menu.groups#" index="group">
|
||||
<cfset groupName = structKeyExists(group, "name") ? group.name : "">
|
||||
<cfif len(groupName) AND NOT structKeyExists(categorySet, groupName)>
|
||||
<cfset categorySet[groupName] = true>
|
||||
<cfset arrayAppend(toastCategories, { "name": groupName, "itemCount": 0 })>
|
||||
</cfif>
|
||||
|
||||
<cfif structKeyExists(group, "items") AND isArray(group.items)>
|
||||
<cfloop array="#group.items#" index="item">
|
||||
<cfset itemStruct = structNew()>
|
||||
<cfset itemStruct["id"] = "item_" & itemId>
|
||||
<cfset itemStruct["name"] = structKeyExists(item, "name") ? item.name : "">
|
||||
<cfset itemStruct["description"] = structKeyExists(item, "description") ? item.description : "">
|
||||
<cfset itemStruct["category"] = groupName>
|
||||
<cfset itemStruct["modifiers"] = arrayNew(1)>
|
||||
|
||||
<!--- Handle prices (array for sizes) --->
|
||||
<cfif structKeyExists(item, "prices") AND isArray(item.prices) AND arrayLen(item.prices)>
|
||||
<cfset itemStruct["price"] = item.prices[1]>
|
||||
<cfelseif structKeyExists(item, "price")>
|
||||
<cfset itemStruct["price"] = item.price>
|
||||
<cfelse>
|
||||
<cfset itemStruct["price"] = 0>
|
||||
</cfif>
|
||||
|
||||
<!--- Handle images --->
|
||||
<cfif structKeyExists(item, "imageUrls") AND isStruct(item.imageUrls)>
|
||||
<cfset imgUrls = item.imageUrls>
|
||||
<cfif structKeyExists(imgUrls, "medium")>
|
||||
<cfset itemStruct["imageUrl"] = imgUrls.medium>
|
||||
<cfset itemStruct["imageSrc"] = imgUrls.medium>
|
||||
<cfset itemStruct["imageFilename"] = listLast(imgUrls.medium, "/")>
|
||||
<cfelseif structKeyExists(imgUrls, "large")>
|
||||
<cfset itemStruct["imageUrl"] = imgUrls.large>
|
||||
<cfset itemStruct["imageSrc"] = imgUrls.large>
|
||||
<cfset itemStruct["imageFilename"] = listLast(imgUrls.large, "/")>
|
||||
</cfif>
|
||||
</cfif>
|
||||
|
||||
<cfif len(itemStruct.name)>
|
||||
<cfset arrayAppend(toastItems, itemStruct)>
|
||||
<cfset itemId++>
|
||||
</cfif>
|
||||
</cfloop>
|
||||
</cfif>
|
||||
</cfloop>
|
||||
</cfif>
|
||||
</cfif>
|
||||
</cfloop>
|
||||
|
||||
<cfset arrayAppend(response.steps, "Extracted " & arrayLen(toastItems) & " items from " & arrayLen(toastCategories) & " categories")>
|
||||
|
||||
<!--- Return directly without Claude --->
|
||||
<cfset response["OK"] = true>
|
||||
<cfset response["DATA"] = {
|
||||
"business": toastBusiness,
|
||||
"categories": toastCategories,
|
||||
"modifiers": arrayNew(1),
|
||||
"items": toastItems,
|
||||
"imageUrls": arrayNew(1),
|
||||
"headerCandidateIndices": arrayNew(1),
|
||||
"imageMappings": arrayNew(1)
|
||||
}>
|
||||
<cfset response["sourceUrl"] = targetUrl>
|
||||
<cfset response["pagesProcessed"] = 1>
|
||||
<cfset response["imagesFound"] = 0>
|
||||
<cfset response["playwrightImagesCount"] = 0>
|
||||
<cfset response["toastDirect"] = true>
|
||||
<cfoutput>#serializeJSON(response)#</cfoutput>
|
||||
<cfabort>
|
||||
|
||||
<cfcatch type="any">
|
||||
<cfset arrayAppend(response.steps, "Toast JSON parse failed: " & cfcatch.message & " - falling back to Claude")>
|
||||
</cfcatch>
|
||||
</cftry>
|
||||
</cfif>
|
||||
</cfif>
|
||||
|
||||
<!--- Extract base URL for resolving relative links --->
|
||||
<cfset baseUrl = reReplace(targetUrl, "(https?://[^/]+).*", "\1")>
|
||||
<cfset basePath = reReplace(targetUrl, "(https?://[^/]+/[^?]*/?).*", "\1")>
|
||||
|
|
|
|||
Reference in a new issue