This repository has been archived on 2026-03-21. You can view files and clone it, but cannot push or open issues or pull requests.
payfrit-biz/api/setup/analyzeMenuUrl.cfm
John Mizerek 4684936595 Add parent/child category hierarchy for Toast menus
Toast pages with multiple menus (e.g. "Food", "Beverages", "Merchandise")
now produce parent categories from the menu names with subcategories from
the groups within each menu, using the parentCategoryName field the wizard
already supports.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-01 18:18:13 -08:00

1801 lines
116 KiB
Text

<cfsetting showdebugoutput="false">
<cfsetting enablecfoutputonly="true">
<cfsetting requesttimeout="300">
<cfcontent type="application/json; charset=utf-8" reset="true">
<cfset response = structNew()>
<cfset response["OK"] = false>
<cftry>
<!--- Load API Key --->
<cfset CLAUDE_API_KEY = "">
<cfset configPath = getDirectoryFromPath(getCurrentTemplatePath()) & "../../config/claude.json">
<cfif fileExists(configPath)>
<cfset configData = deserializeJSON(fileRead(configPath))>
<cfif structKeyExists(configData, "apiKey")>
<cfset CLAUDE_API_KEY = configData.apiKey>
</cfif>
</cfif>
<cfif NOT len(CLAUDE_API_KEY)>
<cfthrow message="Claude API key not configured">
</cfif>
<!--- Get URL from request --->
<cfset requestBody = toString(getHttpRequestData().content)>
<cfif NOT len(requestBody)>
<cfthrow message="No request body provided">
</cfif>
<cfset requestData = deserializeJSON(requestBody)>
<cfset response["steps"] = arrayNew(1)>
<cfset response["debug"] = structNew()>
<cfset response["debug"]["hasHtmlKey"] = structKeyExists(requestData, "html")>
<cfset response["debug"]["hasUrlKey"] = structKeyExists(requestData, "url")>
<cfset response["debug"]["htmlLength"] = structKeyExists(requestData, "html") ? len(requestData.html) : 0>
<cfset response["debug"]["urlValue"] = structKeyExists(requestData, "url") ? requestData.url : "">
<cfset pageHtml = "">
<cfset baseUrl = "">
<cfset basePath = "">
<cfset targetUrl = "">
<!--- Check if HTML content was provided directly (uploaded file or pasted) --->
<cfif structKeyExists(requestData, "html") AND len(trim(requestData.html))>
<cfset pageHtml = trim(requestData.html)>
<cfset arrayAppend(response.steps, "Using provided HTML content: " & len(pageHtml) & " bytes")>
<!--- No base URL for local content - images won't be fetched --->
<cfset baseUrl = "">
<cfset basePath = "">
<cfelseif structKeyExists(requestData, "url") AND len(trim(requestData.url))>
<cfset targetUrl = trim(requestData.url)>
<!--- Validate URL format --->
<cfif NOT reFindNoCase("^https?://", targetUrl)>
<cfset targetUrl = "https://" & targetUrl>
</cfif>
<!--- Check if this is a local temp file (ZIP upload) - read directly, skip Playwright --->
<cfif findNoCase("/temp/menu-import/", targetUrl)>
<cfset localFilePath = expandPath(reReplaceNoCase(targetUrl, "https?://[^/]+(/temp/menu-import/.*)", "\1"))>
<cfset arrayAppend(response.steps, "Local temp file detected: " & localFilePath)>
<cfif NOT fileExists(localFilePath)>
<cfthrow message="Local file not found: #localFilePath#">
</cfif>
<cfset pageHtml = fileRead(localFilePath, "utf-8")>
<cfset playwrightImages = arrayNew(1)>
<cfset arrayAppend(response.steps, "Read " & len(pageHtml) & " bytes from local file")>
<!--- Extract base path for local files (relative to the HTML file) --->
<cfset localDir = getDirectoryFromPath(localFilePath)>
<cfset basePath = reReplaceNoCase(targetUrl, "/[^/]*$", "/")>
<cfif NOT reFindNoCase("/$", basePath)>
<cfset basePath = basePath & "/">
</cfif>
<!--- Check for Toast menu page - extract from visible HTML for most complete data --->
<cfif findNoCase("class=""headerText""", pageHtml) AND findNoCase("toasttab", pageHtml)>
<cfset arrayAppend(response.steps, "Toast menu detected - parsing visible HTML items")>
<cftry>
<!--- Extract visible items from rendered HTML (most complete) --->
<cfset toastBusiness = structNew()>
<cfset toastCategories = arrayNew(1)>
<cfset toastItems = arrayNew(1)>
<cfset categorySet = structNew()>
<cfset itemNameSet = structNew()>
<cfset itemId = 1>
<cfset currentCategory = "Menu">
<!--- Find category headers (h2 with specific Toast patterns) --->
<cfset categoryMatches = reMatchNoCase('<h2[^>]*class="[^"]*groupHeader[^"]*"[^>]*>([^<]+)</h2>', pageHtml)>
<cfloop array="#categoryMatches#" index="catMatch">
<cfset catName = reReplaceNoCase(catMatch, '.*>([^<]+)</h2>.*', '\1')>
<cfset catName = trim(catName)>
<cfif len(catName) AND NOT structKeyExists(categorySet, catName)>
<cfset categorySet[catName] = true>
<cfset arrayAppend(toastCategories, { "name": catName, "itemCount": 0 })>
</cfif>
</cfloop>
<!--- Extract item blocks with name, price, description, image --->
<!--- Toast pattern: li.item containing headerText for name, price span, itemImage img --->
<cfset itemBlocks = reMatchNoCase('<li[^>]*class="[^"]*item[^"]*"[^>]*>.*?</li>', pageHtml)>
<cfset arrayAppend(response.steps, "Found " & arrayLen(itemBlocks) & " item blocks in HTML")>
<cfloop array="#itemBlocks#" index="block">
<!--- Extract item name --->
<cfset nameMatch = reMatchNoCase('<span class="headerText">([^<]+)</span>', block)>
<cfif arrayLen(nameMatch)>
<cfset itemName = reReplaceNoCase(nameMatch[1], '.*>([^<]+)</span>.*', '\1')>
<cfset itemName = trim(itemName)>
<!--- Skip duplicates --->
<cfif len(itemName) AND NOT structKeyExists(itemNameSet, itemName)>
<cfset itemNameSet[itemName] = true>
<cfset itemStruct = structNew()>
<cfset itemStruct["id"] = "item_" & itemId>
<cfset itemStruct["name"] = itemName>
<cfset itemStruct["modifiers"] = arrayNew(1)>
<!--- Extract price - look for any dollar amount in the block --->
<cfset itemStruct["price"] = 0>
<cfset priceMatch = reMatchNoCase('\$([0-9]+\.?[0-9]*)', block)>
<cfif arrayLen(priceMatch)>
<!--- priceMatch[1] is like "$12.99", strip the $ --->
<cfset priceStr = replace(priceMatch[1], "$", "")>
<cfif isNumeric(priceStr) AND val(priceStr) GT 0>
<cfset itemStruct["price"] = val(priceStr)>
</cfif>
</cfif>
<!--- Extract description --->
<cfset descMatch = reMatchNoCase('<div[^>]*class="[^"]*description[^"]*"[^>]*>([^<]+)</div>', block)>
<cfif arrayLen(descMatch)>
<cfset itemStruct["description"] = trim(reReplaceNoCase(descMatch[1], '.*>([^<]+)</div>.*', '\1'))>
<cfelse>
<cfset itemStruct["description"] = "">
</cfif>
<!--- Extract image URL from srcset or src --->
<cfset imgMatch = reMatchNoCase('src="(Menu_files/[^"]+)"', block)>
<cfif arrayLen(imgMatch)>
<cfset imgSrc = reReplaceNoCase(imgMatch[1], '.*src="([^"]+)".*', '\1')>
<!--- Convert to full URL --->
<cfset itemStruct["imageUrl"] = basePath & imgSrc>
<cfset itemStruct["imageSrc"] = basePath & imgSrc>
<cfset itemStruct["imageFilename"] = listLast(imgSrc, "/")>
</cfif>
<!--- Try to determine category from nearby h2 or default --->
<cfset itemStruct["category"] = arrayLen(toastCategories) ? toastCategories[1].name : "Menu">
<cfset arrayAppend(toastItems, itemStruct)>
<cfset itemId++>
</cfif>
</cfif>
</cfloop>
<!--- If no items found from blocks, try simpler headerText extraction --->
<cfif arrayLen(toastItems) EQ 0>
<cfset nameMatches = reMatchNoCase('<span class="headerText">([^<]+)</span>', pageHtml)>
<cfloop array="#nameMatches#" index="nameMatch">
<cfset itemName = reReplaceNoCase(nameMatch, '.*>([^<]+)</span>.*', '\1')>
<cfset itemName = trim(itemName)>
<cfif len(itemName) AND NOT structKeyExists(itemNameSet, itemName)>
<cfset itemNameSet[itemName] = true>
<cfset itemStruct = { "id": "item_" & itemId, "name": itemName, "price": 0, "description": "", "category": "Menu", "modifiers": [] }>
<cfset arrayAppend(toastItems, itemStruct)>
<cfset itemId++>
</cfif>
</cfloop>
</cfif>
<!--- Try multiple sources for business name --->
<!--- 1. Try title tag first --->
<cfset titleMatch = reMatchNoCase('<title[^>]*>([^<]+)</title>', pageHtml)>
<cfset arrayAppend(response.steps, "Title tag matches: " & arrayLen(titleMatch))>
<cfif arrayLen(titleMatch)>
<cfset titleText = reReplaceNoCase(titleMatch[1], '.*<title[^>]*>([^<]+)</title>.*', '\1')>
<cfset titleText = trim(titleText)>
<cfset arrayAppend(response.steps, "Raw title: " & left(titleText, 100))>
<!--- Toast titles: "Restaurant Name | Online Ordering" --->
<cfif findNoCase("|", titleText)>
<cfset titleText = trim(listFirst(titleText, "|"))>
</cfif>
<!--- Remove common suffixes --->
<cfset titleText = reReplaceNoCase(titleText, "\s*-\s*(Menu|Order|Online).*$", "")>
<cfif len(titleText) AND NOT structKeyExists(toastBusiness, "name")>
<cfset toastBusiness["name"] = titleText>
<cfset arrayAppend(response.steps, "Business name from title: " & titleText)>
</cfif>
</cfif>
<!--- 2. Try og:title or og:site_name meta tags --->
<cfif NOT structKeyExists(toastBusiness, "name") OR NOT len(toastBusiness.name)>
<cfset ogMatch = reMatchNoCase('<meta[^>]*property=["'']og:(site_name|title)["''][^>]*content=["'']([^"'']+)["'']', pageHtml)>
<cfif NOT arrayLen(ogMatch)>
<!--- Try alternate attribute order --->
<cfset ogMatch = reMatchNoCase('<meta[^>]*content=["'']([^"'']+)["''][^>]*property=["'']og:(site_name|title)["'']', pageHtml)>
</cfif>
<cfif arrayLen(ogMatch)>
<cfset ogText = reReplaceNoCase(ogMatch[1], '.*content=["'']([^"'']+)["''].*', '\1')>
<cfif NOT len(ogText)>
<cfset ogText = reReplaceNoCase(ogMatch[1], '.*<meta[^>]*>.*', '')>
</cfif>
<cfset ogText = trim(ogText)>
<cfif findNoCase("|", ogText)>
<cfset ogText = trim(listFirst(ogText, "|"))>
</cfif>
<cfif len(ogText)>
<cfset toastBusiness["name"] = ogText>
<cfset arrayAppend(response.steps, "Business name from og:meta: " & ogText)>
</cfif>
</cfif>
</cfif>
<!--- 3. Try looking for restaurant name in header/nav area (Toast-specific) --->
<cfif NOT structKeyExists(toastBusiness, "name") OR NOT len(toastBusiness.name)>
<!--- Toast often has restaurant name in a div with specific classes --->
<cfset headerMatch = reMatchNoCase('<(?:h1|div)[^>]*class="[^"]*(?:restaurant|location|brand)[^"]*"[^>]*>([^<]+)<', pageHtml)>
<cfif arrayLen(headerMatch)>
<cfset headerText = reReplaceNoCase(headerMatch[1], '.*>([^<]+)<.*', '\1')>
<cfset headerText = trim(headerText)>
<cfif len(headerText) AND len(headerText) LT 100>
<cfset toastBusiness["name"] = headerText>
<cfset arrayAppend(response.steps, "Business name from header: " & headerText)>
</cfif>
</cfif>
</cfif>
<!--- 4. Try first h1 tag as last resort --->
<cfif NOT structKeyExists(toastBusiness, "name") OR NOT len(toastBusiness.name)>
<cfset h1Match = reMatchNoCase('<h1[^>]*>([^<]+)</h1>', pageHtml)>
<cfif arrayLen(h1Match)>
<cfset h1Text = reReplaceNoCase(h1Match[1], '.*<h1[^>]*>([^<]+)</h1>.*', '\1')>
<cfset h1Text = trim(h1Text)>
<cfif len(h1Text) AND len(h1Text) LT 100>
<cfset toastBusiness["name"] = h1Text>
<cfset arrayAppend(response.steps, "Business name from h1: " & h1Text)>
</cfif>
</cfif>
</cfif>
<!--- Try to extract address from visible HTML --->
<cfif NOT structKeyExists(toastBusiness, "addressLine1")>
<!--- Look for address patterns in the HTML --->
<cfset addrMatch = reMatchNoCase('<[^>]*class="[^"]*address[^"]*"[^>]*>([^<]+)</[^>]+>', pageHtml)>
<cfif arrayLen(addrMatch)>
<cfset addrText = reReplaceNoCase(addrMatch[1], '.*>([^<]+)</.*', '\1')>
<cfset addrText = trim(addrText)>
<cfif len(addrText) AND len(addrText) LT 200>
<cfset toastBusiness["addressLine1"] = addrText>
<cfset arrayAppend(response.steps, "Address from HTML: " & left(addrText, 50))>
</cfif>
</cfif>
</cfif>
<!--- Try to extract phone from visible HTML --->
<cfif NOT structKeyExists(toastBusiness, "phone")>
<!--- Look for phone number patterns --->
<cfset phoneMatch = reMatchNoCase('(?:tel:|phone[^"]*">)\s*\(?(\d{3})\)?[-.\s]?(\d{3})[-.\s]?(\d{4})', pageHtml)>
<cfif arrayLen(phoneMatch)>
<cfset phoneText = reReplaceNoCase(phoneMatch[1], '.*(\d{3}).*(\d{3}).*(\d{4}).*', '\1-\2-\3')>
<cfif len(phoneText) GTE 10>
<cfset toastBusiness["phone"] = phoneText>
<cfset arrayAppend(response.steps, "Phone from HTML: " & phoneText)>
</cfif>
</cfif>
</cfif>
<!--- Check if __OO_STATE__ exists in the saved HTML --->
<cfset hasOoState = findNoCase("window.__OO_STATE__", pageHtml) GT 0>
<cfset arrayAppend(response.steps, "Has __OO_STATE__: " & hasOoState)>
<!--- Also try to extract from __OO_STATE__ for images and business info --->
<cfif hasOoState>
<cfset ooStateMatch = reMatchNoCase("window\.__OO_STATE__\s*=\s*(\{.*?\});\s*window\.", pageHtml)>
<cfif arrayLen(ooStateMatch)>
<cfset ooStateJson = reReplaceNoCase(ooStateMatch[1], "window\.__OO_STATE__\s*=\s*", "")>
<cfset ooStateJson = reReplace(ooStateJson, ";\s*window\.$", "")>
<cftry>
<cfset ooState = deserializeJSON(ooStateJson)>
<!--- Debug: log all top-level keys in OO_STATE --->
<cfset ooStateKeys = structKeyList(ooState)>
<cfset arrayAppend(response.steps, "OO_STATE keys: " & left(ooStateKeys, 500))>
<!--- Build name -> image URL map, name -> category map, and name -> price map from OO_STATE --->
<cfset imageMap = structNew()>
<cfset itemCategoryMap = structNew()>
<cfset itemPriceMap = structNew()>
<cfloop collection="#ooState#" item="key">
<!--- Extract restaurant/business info --->
<cfif left(key, 11) EQ "Restaurant:">
<cfset restaurant = ooState[key]>
<cfif structKeyExists(restaurant, "name")>
<cfset toastBusiness["name"] = restaurant.name>
</cfif>
<cfif structKeyExists(restaurant, "location")>
<cfset loc = restaurant.location>
<cfif structKeyExists(loc, "address1")>
<cfset toastBusiness["addressLine1"] = loc.address1>
</cfif>
<cfif structKeyExists(loc, "city")>
<cfset toastBusiness["city"] = loc.city>
</cfif>
<cfif structKeyExists(loc, "state")>
<cfset toastBusiness["state"] = loc.state>
</cfif>
<cfif structKeyExists(loc, "zipCode")>
<cfset toastBusiness["zip"] = loc.zipCode>
</cfif>
<cfif structKeyExists(loc, "phone")>
<cfset toastBusiness["phone"] = loc.phone>
</cfif>
</cfif>
<cfif structKeyExists(restaurant, "brandColor")>
<cfset toastBusiness["brandColor"] = replace(restaurant.brandColor, "##", "")>
</cfif>
</cfif>
<!--- Extract menu items, images, and CATEGORIES --->
<cfif left(key, 5) EQ "Menu:">
<cfset menu = ooState[key]>
<cfif structKeyExists(menu, "groups") AND isArray(menu.groups)>
<cfloop array="#menu.groups#" index="group">
<!--- Extract category name from group --->
<cfset groupName = "">
<cfif structKeyExists(group, "name") AND len(trim(group.name))>
<cfset groupName = trim(group.name)>
<!--- Add to categories if not already there --->
<cfif NOT structKeyExists(categorySet, groupName)>
<cfset categorySet[groupName] = true>
<cfset arrayAppend(toastCategories, { "name": groupName, "itemCount": 0 })>
</cfif>
</cfif>
<!--- Debug: log group keys to help identify subgroup field names --->
<cfif isStruct(group) AND NOT structKeyExists(variables, "loggedGroupKeys")>
<cfset variables.loggedGroupKeys = true>
<cfset arrayAppend(response.steps, "Group keys: " & structKeyList(group))>
</cfif>
<!--- Check for subgroups (nested categories within this group) --->
<!--- Try multiple field names: subgroups, children, childGroups --->
<cfset subgroupsArr = arrayNew(1)>
<cfif structKeyExists(group, "subgroups") AND isArray(group.subgroups)>
<cfset subgroupsArr = group.subgroups>
<cfelseif structKeyExists(group, "children") AND isArray(group.children)>
<cfset subgroupsArr = group.children>
<cfelseif structKeyExists(group, "childGroups") AND isArray(group.childGroups)>
<cfset subgroupsArr = group.childGroups>
</cfif>
<cfset hasSubgroups = false>
<cfif arrayLen(subgroupsArr) GT 0>
<cfset hasSubgroups = true>
<cfset arrayAppend(response.steps, "Group '" & groupName & "' has " & arrayLen(subgroupsArr) & " subgroups")>
<cfloop array="#subgroupsArr#" index="subgroup">
<cfset subgroupName = "">
<cfif structKeyExists(subgroup, "name") AND len(trim(subgroup.name))>
<cfset subgroupName = trim(subgroup.name)>
<cfif NOT structKeyExists(categorySet, subgroupName)>
<cfset categorySet[subgroupName] = true>
<cfset arrayAppend(toastCategories, { "name": subgroupName, "parentCategoryName": groupName, "itemCount": 0 })>
</cfif>
</cfif>
<!--- Extract items from subgroup --->
<cfif structKeyExists(subgroup, "items") AND isArray(subgroup.items)>
<cfset effectiveName = len(subgroupName) ? subgroupName : groupName>
<cfloop array="#subgroup.items#" index="item">
<cfif structKeyExists(item, "name")>
<cfset itemCategoryMap[item.name] = effectiveName>
<!--- Extract price --->
<cfif structKeyExists(item, "price") AND isNumeric(item.price)>
<cfset itemPriceMap[item.name] = val(item.price)>
<cfelseif structKeyExists(item, "unitPrice") AND isNumeric(item.unitPrice)>
<cfset itemPriceMap[item.name] = val(item.unitPrice)>
<cfelseif structKeyExists(item, "basePrice") AND isNumeric(item.basePrice)>
<cfset itemPriceMap[item.name] = val(item.basePrice)>
<cfelseif structKeyExists(item, "displayPrice")>
<cfset priceStr = reReplace(item.displayPrice, "[^0-9.]", "", "all")>
<cfif len(priceStr) AND isNumeric(priceStr)>
<cfset itemPriceMap[item.name] = val(priceStr)>
</cfif>
</cfif>
<!--- Extract image URLs --->
<cfif structKeyExists(item, "imageUrls")>
<cfset imgUrls = item.imageUrls>
<cfif structKeyExists(imgUrls, "medium")>
<cfset imageMap[item.name] = imgUrls.medium>
<cfelseif structKeyExists(imgUrls, "large")>
<cfset imageMap[item.name] = imgUrls.large>
</cfif>
</cfif>
</cfif>
</cfloop>
</cfif>
</cfloop>
</cfif>
<!--- Extract direct items from group (not in subgroups) --->
<cfif structKeyExists(group, "items") AND isArray(group.items)>
<!--- Debug: log first item's structure --->
<cfif arrayLen(group.items) GT 0 AND NOT structKeyExists(variables, "loggedItemKeys")>
<cfset variables.loggedItemKeys = true>
<cfset firstItem = group.items[1]>
<cfif isStruct(firstItem)>
<cfset arrayAppend(response.steps, "First item keys: " & structKeyList(firstItem))>
<cfif structKeyExists(firstItem, "price")>
<cfset arrayAppend(response.steps, "item.price = " & firstItem.price)>
</cfif>
<cfif structKeyExists(firstItem, "basePrice")>
<cfset arrayAppend(response.steps, "item.basePrice = " & firstItem.basePrice)>
</cfif>
<cfif structKeyExists(firstItem, "displayPrice")>
<cfset arrayAppend(response.steps, "item.displayPrice = " & firstItem.displayPrice)>
</cfif>
</cfif>
</cfif>
<cfloop array="#group.items#" index="item">
<cfif structKeyExists(item, "name")>
<!--- Map item name to category --->
<cfif len(groupName)>
<cfset itemCategoryMap[item.name] = groupName>
</cfif>
<!--- Extract price - try multiple field names --->
<cfif structKeyExists(item, "price") AND isNumeric(item.price)>
<cfset itemPriceMap[item.name] = val(item.price)>
<cfelseif structKeyExists(item, "unitPrice") AND isNumeric(item.unitPrice)>
<cfset itemPriceMap[item.name] = val(item.unitPrice)>
<cfelseif structKeyExists(item, "basePrice") AND isNumeric(item.basePrice)>
<cfset itemPriceMap[item.name] = val(item.basePrice)>
<cfelseif structKeyExists(item, "displayPrice")>
<!--- displayPrice might be a string like "$12.99" --->
<cfset priceStr = reReplace(item.displayPrice, "[^0-9.]", "", "all")>
<cfif len(priceStr) AND isNumeric(priceStr)>
<cfset itemPriceMap[item.name] = val(priceStr)>
</cfif>
</cfif>
<!--- Extract image URLs --->
<cfif structKeyExists(item, "imageUrls")>
<cfset imgUrls = item.imageUrls>
<cfif structKeyExists(imgUrls, "medium")>
<cfset imageMap[item.name] = imgUrls.medium>
<cfelseif structKeyExists(imgUrls, "large")>
<cfset imageMap[item.name] = imgUrls.large>
</cfif>
</cfif>
</cfif>
</cfloop>
</cfif>
</cfloop>
</cfif>
</cfif>
</cfloop>
<!--- Apply images, categories, and prices to items --->
<cfset imagesMatched = 0>
<cfset categoriesMatched = 0>
<cfset pricesMatched = 0>
<cfloop from="1" to="#arrayLen(toastItems)#" index="i">
<cfif structKeyExists(imageMap, toastItems[i].name)>
<cfset toastItems[i]["imageUrl"] = imageMap[toastItems[i].name]>
<cfset toastItems[i]["imageSrc"] = imageMap[toastItems[i].name]>
<cfset toastItems[i]["imageFilename"] = listLast(imageMap[toastItems[i].name], "/")>
<cfset imagesMatched++>
</cfif>
<!--- Apply category from __OO_STATE__ --->
<cfif structKeyExists(itemCategoryMap, toastItems[i].name)>
<cfset toastItems[i]["category"] = itemCategoryMap[toastItems[i].name]>
<cfset categoriesMatched++>
</cfif>
<!--- Apply price from __OO_STATE__ if not already set or is 0 --->
<cfif structKeyExists(itemPriceMap, toastItems[i].name) AND (NOT structKeyExists(toastItems[i], "price") OR toastItems[i].price EQ 0)>
<cfset toastItems[i]["price"] = itemPriceMap[toastItems[i].name]>
<cfset pricesMatched++>
</cfif>
</cfloop>
<cfset arrayAppend(response.steps, "Matched " & imagesMatched & " images, " & categoriesMatched & " categories, " & pricesMatched & " prices from __OO_STATE__")>
<cfif structCount(toastBusiness) GT 0>
<cfset arrayAppend(response.steps, "Extracted business info: " & structKeyList(toastBusiness))>
</cfif>
<cfcatch></cfcatch>
</cftry>
</cfif>
</cfif>
<!--- If we have items but no categories, add a default "Menu" category --->
<cfif arrayLen(toastItems) GT 0 AND arrayLen(toastCategories) EQ 0>
<cfset arrayAppend(toastCategories, { "name": "Menu", "itemCount": arrayLen(toastItems) })>
<cfset arrayAppend(response.steps, "Added default 'Menu' category for " & arrayLen(toastItems) & " items")>
</cfif>
<!--- Scan ALL HTML files in the ZIP for business info --->
<!--- Get the extraction directory from the URL (the UUID folder) --->
<cfset extractDir = reReplaceNoCase(targetUrl, "https?://[^/]+(/temp/menu-import/[a-f0-9]+/).*", "\1")>
<cfset extractDir = expandPath(extractDir)>
<cftry>
<cfdirectory action="list" directory="#extractDir#" name="allHtmlFiles" filter="*.htm*" recurse="true" type="file">
<cfset arrayAppend(response.steps, "Found " & allHtmlFiles.recordCount & " HTML files in ZIP")>
<cfloop query="allHtmlFiles">
<!--- Skip the main menu file we already processed --->
<cfset otherFilePath = "#allHtmlFiles.directory#/#allHtmlFiles.name#">
<cfif otherFilePath EQ localFilePath>
<cfcontinue>
</cfif>
<cftry>
<cfset otherHtml = fileRead(otherFilePath, "utf-8")>
<cfset arrayAppend(response.steps, "Scanning " & allHtmlFiles.name & " for business info...")>
<!--- Extract business name from title tag --->
<cfif NOT structKeyExists(toastBusiness, "name") OR NOT len(toastBusiness.name)>
<cfset otherTitleMatch = reMatchNoCase('<title[^>]*>([^<]+)</title>', otherHtml)>
<cfif arrayLen(otherTitleMatch)>
<cfset otherTitle = reReplaceNoCase(otherTitleMatch[1], '.*<title[^>]*>([^<]+)</title>.*', '\1')>
<cfset otherTitle = trim(otherTitle)>
<!--- Skip generic titles --->
<cfif len(otherTitle) AND NOT reFindNoCase("^(Menu|Home|About|Contact|Order|Online)$", otherTitle)>
<cfif findNoCase("|", otherTitle)>
<cfset otherTitle = trim(listFirst(otherTitle, "|"))>
</cfif>
<cfset otherTitle = reReplaceNoCase(otherTitle, "\s*-\s*(Menu|Order|Online).*$", "")>
<cfif len(otherTitle) AND len(otherTitle) LT 100>
<cfset toastBusiness["name"] = otherTitle>
<cfset arrayAppend(response.steps, "Found business name in " & allHtmlFiles.name & ": " & otherTitle)>
</cfif>
</cfif>
</cfif>
</cfif>
<!--- Extract address - look for common patterns --->
<cfif NOT structKeyExists(toastBusiness, "addressLine1") OR NOT len(toastBusiness.addressLine1)>
<!--- Look for street address patterns (number + street name) --->
<cfset addrMatch = reMatchNoCase('(\d+\s+[A-Za-z0-9\s]+(?:St(?:reet)?|Ave(?:nue)?|Rd|Road|Blvd|Boulevard|Dr(?:ive)?|Ln|Lane|Way|Ct|Court|Pl(?:ace)?|Pkwy|Parkway)[.,]?\s*(?:Suite|Ste|##|Unit|Apt)?\s*[A-Za-z0-9\-]*)', otherHtml)>
<cfif arrayLen(addrMatch)>
<cfset addrText = trim(addrMatch[1])>
<cfif len(addrText) GT 5 AND len(addrText) LT 100>
<cfset toastBusiness["addressLine1"] = addrText>
<cfset arrayAppend(response.steps, "Found address in " & allHtmlFiles.name & ": " & addrText)>
</cfif>
</cfif>
</cfif>
<!--- Extract phone number --->
<cfif NOT structKeyExists(toastBusiness, "phone") OR NOT len(toastBusiness.phone)>
<cfset phoneMatch = reMatchNoCase('\(?(\d{3})\)?[-.\s]?(\d{3})[-.\s]?(\d{4})', otherHtml)>
<cfif arrayLen(phoneMatch)>
<cfset phoneText = reReplaceNoCase(phoneMatch[1], '.*\(?(\d{3})\)?[-.\s]?(\d{3})[-.\s]?(\d{4}).*', '\1-\2-\3')>
<cfif len(phoneText) GTE 10>
<cfset toastBusiness["phone"] = phoneText>
<cfset arrayAppend(response.steps, "Found phone in " & allHtmlFiles.name & ": " & phoneText)>
</cfif>
</cfif>
</cfif>
<!--- Check for __OO_STATE__ in other files too (might have Restaurant info) --->
<cfif findNoCase("window.__OO_STATE__", otherHtml)>
<cfset otherOoMatch = reMatchNoCase("window\.__OO_STATE__\s*=\s*(\{.*?\});\s*window\.", otherHtml)>
<cfif arrayLen(otherOoMatch)>
<cfset otherOoJson = reReplaceNoCase(otherOoMatch[1], "window\.__OO_STATE__\s*=\s*", "")>
<cfset otherOoJson = reReplace(otherOoJson, ";\s*window\.$", "")>
<cftry>
<cfset otherOoState = deserializeJSON(otherOoJson)>
<cfloop collection="#otherOoState#" item="otherKey">
<cfif left(otherKey, 11) EQ "Restaurant:">
<cfset otherRest = otherOoState[otherKey]>
<cfif structKeyExists(otherRest, "name") AND (NOT structKeyExists(toastBusiness, "name") OR NOT len(toastBusiness.name))>
<cfset toastBusiness["name"] = otherRest.name>
<cfset arrayAppend(response.steps, "Found business name in " & allHtmlFiles.name & " __OO_STATE__: " & otherRest.name)>
</cfif>
<cfif structKeyExists(otherRest, "location")>
<cfset otherLoc = otherRest.location>
<cfif structKeyExists(otherLoc, "address1") AND (NOT structKeyExists(toastBusiness, "addressLine1") OR NOT len(toastBusiness.addressLine1))>
<cfset toastBusiness["addressLine1"] = otherLoc.address1>
</cfif>
<cfif structKeyExists(otherLoc, "city") AND (NOT structKeyExists(toastBusiness, "city") OR NOT len(toastBusiness.city))>
<cfset toastBusiness["city"] = otherLoc.city>
</cfif>
<cfif structKeyExists(otherLoc, "state") AND (NOT structKeyExists(toastBusiness, "state") OR NOT len(toastBusiness.state))>
<cfset toastBusiness["state"] = otherLoc.state>
</cfif>
<cfif structKeyExists(otherLoc, "zipCode") AND (NOT structKeyExists(toastBusiness, "zip") OR NOT len(toastBusiness.zip))>
<cfset toastBusiness["zip"] = otherLoc.zipCode>
</cfif>
<cfif structKeyExists(otherLoc, "phone") AND (NOT structKeyExists(toastBusiness, "phone") OR NOT len(toastBusiness.phone))>
<cfset toastBusiness["phone"] = otherLoc.phone>
</cfif>
</cfif>
<cfif structKeyExists(otherRest, "brandColor") AND (NOT structKeyExists(toastBusiness, "brandColor") OR NOT len(toastBusiness.brandColor))>
<cfset toastBusiness["brandColor"] = replace(otherRest.brandColor, "##", "")>
</cfif>
</cfif>
</cfloop>
<cfcatch></cfcatch>
</cftry>
</cfif>
</cfif>
<cfcatch>
<!--- Skip files that can't be read --->
</cfcatch>
</cftry>
</cfloop>
<cfcatch>
<cfset arrayAppend(response.steps, "Could not scan other HTML files: " & cfcatch.message)>
</cfcatch>
</cftry>
<cfset arrayAppend(response.steps, "Extracted " & arrayLen(toastItems) & " unique items from " & arrayLen(toastCategories) & " categories")>
<!--- Scan for images in ZIP and analyze them for business info --->
<cftry>
<cfdirectory action="list" directory="#extractDir#" name="zipImages" recurse="true" type="file">
<cfset imageExtensions = "jpg,jpeg,png,gif,webp">
<cfset zipImageFiles = []>
<cfloop query="zipImages">
<cfset imgExt = lCase(listLast(zipImages.name, "."))>
<cfif listFindNoCase(imageExtensions, imgExt)>
<!--- Skip small files (likely icons) and _files folder assets --->
<cfif zipImages.size GT 10000 AND NOT findNoCase("_files", zipImages.directory)>
<cfset arrayAppend(zipImageFiles, "#zipImages.directory#/#zipImages.name#")>
</cfif>
</cfif>
</cfloop>
<cfif arrayLen(zipImageFiles) GT 0>
<cfset arrayAppend(response.steps, "Found " & arrayLen(zipImageFiles) & " images in ZIP to analyze for business info")>
<!--- Analyze up to 3 images for business info --->
<cfset imgLimit = min(arrayLen(zipImageFiles), 3)>
<cfloop from="1" to="#imgLimit#" index="imgIdx">
<cfset imgPath = zipImageFiles[imgIdx]>
<cftry>
<cfset imgContent = fileReadBinary(imgPath)>
<cfset imgExt = lCase(listLast(imgPath, "."))>
<cfset mediaType = "image/jpeg">
<cfif imgExt EQ "png"><cfset mediaType = "image/png">
<cfelseif imgExt EQ "gif"><cfset mediaType = "image/gif">
<cfelseif imgExt EQ "webp"><cfset mediaType = "image/webp">
</cfif>
<cfset base64Img = toBase64(imgContent)>
<cfset arrayAppend(response.steps, "Analyzing image: " & listLast(imgPath, "/\"))>
<!--- Build Claude request for business info extraction --->
<cfset imgMsgContent = []>
<cfset arrayAppend(imgMsgContent, {
"type": "image",
"source": {
"type": "base64",
"media_type": mediaType,
"data": base64Img
}
})>
<cfset arrayAppend(imgMsgContent, {
"type": "text",
"text": "Extract ALL business information visible in this image. Look carefully for: 1) Business NAME (the restaurant/store name), 2) PHONE number (format: xxx-xxx-xxxx), 3) Full ADDRESS (street, city, state, zip), 4) HOURS of operation (all days shown). Return JSON: {""name"":"""",""addressLine1"":"""",""city"":"""",""state"":"""",""zip"":"""",""phone"":"""",""hours"":"""",""brandColor"":""""}. For hours, format as single string like 'Mon-Thu 7am-10pm, Fri-Sat 7am-11pm'. Return ONLY valid JSON."
})>
<cfset imgRequest = {
"model": "claude-sonnet-4-20250514",
"max_tokens": 1024,
"temperature": 0,
"messages": [{
"role": "user",
"content": imgMsgContent
}]
}>
<cfhttp url="https://api.anthropic.com/v1/messages" method="POST" timeout="60" result="imgHttpResult">
<cfhttpparam type="header" name="Content-Type" value="application/json">
<cfhttpparam type="header" name="x-api-key" value="#CLAUDE_API_KEY#">
<cfhttpparam type="header" name="anthropic-version" value="2023-06-01">
<cfhttpparam type="body" value="#serializeJSON(imgRequest)#">
</cfhttp>
<cfif findNoCase("200", imgHttpResult.statusCode)>
<cfset imgResponse = deserializeJSON(imgHttpResult.fileContent)>
<cfif structKeyExists(imgResponse, "content") AND arrayLen(imgResponse.content)>
<cfset imgText = imgResponse.content[1].text>
<!--- Clean up JSON --->
<cfset imgText = trim(imgText)>
<cfif left(imgText, 7) EQ "```json">
<cfset imgText = mid(imgText, 8, len(imgText) - 7)>
</cfif>
<cfif left(imgText, 3) EQ "```">
<cfset imgText = mid(imgText, 4, len(imgText) - 3)>
</cfif>
<cfif right(imgText, 3) EQ "```">
<cfset imgText = left(imgText, len(imgText) - 3)>
</cfif>
<cfset imgText = trim(imgText)>
<cftry>
<cfset imgBizData = deserializeJSON(imgText)>
<!--- Image data OVERWRITES HTML-extracted data (more reliable) --->
<cfset bizFieldsToCheck = "name,addressLine1,city,state,zip,phone,hours,brandColor">
<cfloop list="#bizFieldsToCheck#" index="bizField">
<cfif structKeyExists(imgBizData, bizField) AND isSimpleValue(imgBizData[bizField]) AND len(trim(imgBizData[bizField]))>
<cfset toastBusiness[bizField] = trim(imgBizData[bizField])>
<cfset arrayAppend(response.steps, "Found " & bizField & " from image: " & left(toastBusiness[bizField], 50))>
</cfif>
</cfloop>
<cfcatch>
<cfset arrayAppend(response.steps, "Could not parse image analysis JSON")>
</cfcatch>
</cftry>
</cfif>
</cfif>
<cfcatch>
<cfset arrayAppend(response.steps, "Error analyzing image: " & cfcatch.message)>
</cfcatch>
</cftry>
</cfloop>
</cfif>
<cfcatch>
<cfset arrayAppend(response.steps, "Could not scan ZIP for images: " & cfcatch.message)>
</cfcatch>
</cftry>
<!--- Summary of business info found --->
<cfset bizKeys = structKeyList(toastBusiness)>
<cfset arrayAppend(response.steps, "Business info keys: " & (len(bizKeys) ? bizKeys : "(none)"))>
<cfif structKeyExists(toastBusiness, "name")>
<cfset arrayAppend(response.steps, "Business name: " & toastBusiness.name)>
</cfif>
<!--- Return directly without Claude --->
<cfset response["OK"] = true>
<cfset response["DATA"] = {
"business": toastBusiness,
"categories": toastCategories,
"modifiers": arrayNew(1),
"items": toastItems,
"imageUrls": arrayNew(1),
"headerCandidateIndices": arrayNew(1),
"imageMappings": arrayNew(1)
}>
<cfset response["sourceUrl"] = targetUrl>
<cfset response["pagesProcessed"] = 1>
<cfset response["imagesFound"] = 0>
<cfset response["playwrightImagesCount"] = 0>
<cfset response["toastDirect"] = true>
<cfoutput>#serializeJSON(response)#</cfoutput>
<cfabort>
<cfcatch type="any">
<cfset arrayAppend(response.steps, "Toast HTML parse failed: " & cfcatch.message & " - falling back to Claude")>
</cfcatch>
</cftry>
</cfif>
<!--- Extract base URL for resolving relative links --->
<cfset baseUrl = reReplace(targetUrl, "(https?://[^/]+).*", "\1")>
<cfset basePath = reReplace(targetUrl, "(https?://[^/]+/[^?]*/?).*", "\1")>
<cfif NOT reFindNoCase("/$", basePath)>
<cfset basePath = reReplace(basePath, "/[^/]*$", "/")>
</cfif>
<cfelse>
<!--- Remote URL - use Playwright for JS-rendered content --->
<cfset arrayAppend(response.steps, "Fetching URL with Playwright: " & targetUrl)>
<cfset playwrightOutput = "">
<cfexecute name="/opt/playwright/run.sh" arguments="'#targetUrl#' 4000" timeout="90" variable="playwrightOutput" />
<cfif NOT len(trim(playwrightOutput))>
<cfthrow message="Playwright returned empty response">
</cfif>
<cfset playwrightResult = deserializeJSON(playwrightOutput)>
<cfif structKeyExists(playwrightResult, "error")>
<cfthrow message="Playwright error: #playwrightResult.error#">
</cfif>
<cfset pageHtml = playwrightResult.html>
<cfset playwrightImages = structKeyExists(playwrightResult, "images") ? playwrightResult.images : arrayNew(1)>
<cfset arrayAppend(response.steps, "Fetched " & len(pageHtml) & " bytes via Playwright, " & arrayLen(playwrightImages) & " images captured")>
<!--- Extract base URL for resolving relative links --->
<cfset baseUrl = reReplace(targetUrl, "(https?://[^/]+).*", "\1")>
<cfset basePath = reReplace(targetUrl, "(https?://[^/]+/[^?]*/?).*", "\1")>
<cfif NOT reFindNoCase("/$", basePath)>
<cfset basePath = reReplace(basePath, "/[^/]*$", "/")>
</cfif>
</cfif>
<cfelse>
<cfthrow message="Either 'url' or 'html' content is required">
</cfif>
<!--- Initialize playwrightImages if not set (HTML upload case) --->
<cfif NOT isDefined("playwrightImages")>
<cfset playwrightImages = arrayNew(1)>
</cfif>
<!--- Menu pages array - Playwright renders JS so we get everything in one page --->
<cfset menuPages = arrayNew(1)>
<cfset arrayAppend(menuPages, { url: isDefined("targetUrl") ? targetUrl : "uploaded", html: pageHtml })>
<!--- Extract images from all pages --->
<cfset allImages = arrayNew(1)>
<cfset imageUrls = structNew()>
<cfset imageMappings = arrayNew(1)><!--- For local HTML: filename -> alt text mappings --->
<!--- Add images captured by Playwright (network requests) --->
<cfloop array="#playwrightImages#" index="pwImg">
<cfif NOT reFindNoCase("(icon|favicon|logo|sprite|pixel|tracking|badge|button|\.svg)", pwImg)>
<cfset imageUrls[pwImg] = true>
</cfif>
</cfloop>
<cfloop array="#menuPages#" index="menuPage">
<!--- Find all img tags --->
<cfset imgMatches = reMatchNoCase('<img[^>]+src=["'']([^"'']+)["''][^>]*>', menuPage.html)>
<cfloop array="#imgMatches#" index="imgTag">
<cfset imgSrc = reReplaceNoCase(imgTag, '.*src=["'']([^"'']+)["''].*', "\1")>
<!--- Extract alt text for image mapping --->
<cfset imgAlt = "">
<cfif reFindNoCase('alt=["'']([^"'']+)["'']', imgTag)>
<cfset imgAlt = reReplaceNoCase(imgTag, '.*alt=["'']([^"'']+)["''].*', "\1")>
</cfif>
<!--- Extract just the filename for matching local uploads --->
<cfset imgFilename = listLast(imgSrc, "/\")>
<cfif len(imgFilename) AND len(imgAlt) AND NOT reFindNoCase("(icon|favicon|logo|sprite|pixel|tracking|badge|button)", imgSrc)>
<cfset mapping = structNew()>
<cfset mapping["filename"] = imgFilename>
<cfset mapping["alt"] = imgAlt>
<cfset mapping["src"] = imgSrc>
<cfset arrayAppend(imageMappings, mapping)>
</cfif>
<!--- Resolve relative URLs --->
<cfif left(imgSrc, 1) EQ "/">
<cfset imgSrc = baseUrl & imgSrc>
<cfelseif NOT reFindNoCase("^https?://", imgSrc) AND NOT reFindNoCase("^data:", imgSrc)>
<cfset imgSrc = basePath & imgSrc>
</cfif>
<!--- Skip data URLs, icons, and already-processed images --->
<cfif reFindNoCase("^https?://", imgSrc) AND NOT structKeyExists(imageUrls, imgSrc)>
<!--- Skip common icon/logo patterns that are too small --->
<cfif NOT reFindNoCase("(icon|favicon|logo|sprite|pixel|tracking|badge|button)", imgSrc)>
<cfset imageUrls[imgSrc] = true>
</cfif>
</cfif>
</cfloop>
</cfloop>
<cfset arrayAppend(response.steps, "Found #structCount(imageUrls)# unique images")>
<!--- Check if we're scanning a local temp URL (ZIP upload) --->
<cfset isLocalScan = isDefined("targetUrl") AND findNoCase("/temp/menu-import/", targetUrl)>
<cfset localBasePath = "">
<cfif isLocalScan>
<!--- Extract the folder path from URL for local file reads --->
<cfset localBasePath = expandPath(reReplaceNoCase(targetUrl, "https?://[^/]+(/temp/menu-import/[^/]+/).*", "\1"))>
<cfset arrayAppend(response.steps, "Local scan detected, base path: " & localBasePath)>
</cfif>
<!--- Download/read images (limit to 20) --->
<cfset imageDataArray = arrayNew(1)>
<cfset downloadedCount = 0>
<cfset localReadCount = 0>
<cfloop collection="#imageUrls#" item="imgUrl">
<cfif downloadedCount GTE 20>
<cfbreak>
</cfif>
<cftry>
<cfset imgBytes = 0>
<cfset imgContent = "">
<cfset mediaType = "image/jpeg">
<!--- Check if this is a local file we can read directly --->
<cfif isLocalScan AND findNoCase("/temp/menu-import/", imgUrl)>
<!--- Convert URL to local path --->
<cfset localPath = expandPath(reReplaceNoCase(imgUrl, "https?://[^/]+(/temp/menu-import/.*)", "\1"))>
<cfif fileExists(localPath)>
<cfset imgContent = fileReadBinary(localPath)>
<cfset imgBytes = len(imgContent)>
<!--- Determine media type from extension --->
<cfset ext = lCase(listLast(localPath, "."))>
<cfif ext EQ "png"><cfset mediaType = "image/png">
<cfelseif ext EQ "gif"><cfset mediaType = "image/gif">
<cfelseif ext EQ "webp"><cfset mediaType = "image/webp">
</cfif>
<cfset localReadCount = localReadCount + 1>
</cfif>
<cfelse>
<!--- Fetch remote image via HTTP --->
<cfhttp url="#imgUrl#" method="GET" timeout="10" result="imgResult" getasbinary="yes">
</cfhttp>
<cfif findNoCase("200", imgResult.statusCode) AND isBinary(imgResult.fileContent)>
<cfset contentType = structKeyExists(imgResult.responseHeader, "Content-Type") ? imgResult.responseHeader["Content-Type"] : "">
<cfif reFindNoCase("image/(jpeg|jpg|png|gif|webp)", contentType)>
<cfset imgContent = imgResult.fileContent>
<cfset imgBytes = len(imgContent)>
<cfif findNoCase("png", contentType)><cfset mediaType = "image/png"></cfif>
<cfif findNoCase("gif", contentType)><cfset mediaType = "image/gif"></cfif>
<cfif findNoCase("webp", contentType)><cfset mediaType = "image/webp"></cfif>
</cfif>
</cfif>
</cfif>
<!--- Process the image if we got valid content --->
<cfif imgBytes GT 5000>
<cfset base64Content = toBase64(imgContent)>
<cfset imgSource = structNew()>
<cfset imgSource["type"] = "base64">
<cfset imgSource["media_type"] = mediaType>
<cfset imgSource["data"] = base64Content>
<cfset imgStruct = structNew()>
<cfset imgStruct["type"] = "image">
<cfset imgStruct["source"] = imgSource>
<cfset imgStruct["url"] = imgUrl>
<cfset arrayAppend(imageDataArray, imgStruct)>
<cfset downloadedCount = downloadedCount + 1>
</cfif>
<cfcatch>
<!--- Skip failed downloads --->
</cfcatch>
</cftry>
</cfloop>
<cfset arrayAppend(response.steps, "Loaded #arrayLen(imageDataArray)# valid images (#localReadCount# from local disk)")>
<!--- ============================================================ --->
<!--- TOAST FAST PATH: Parse __OO_STATE__ directly instead of Claude --->
<!--- ============================================================ --->
<cfif findNoCase("window.__OO_STATE__", pageHtml) AND findNoCase("toasttab", pageHtml)>
<cfset arrayAppend(response.steps, "Toast page detected - extracting menu data from __OO_STATE__")>
<cftry>
<cfset ooStateMatch = reMatchNoCase("window\.__OO_STATE__\s*=\s*(\{.*?\});\s*window\.", pageHtml)>
<cfif arrayLen(ooStateMatch)>
<cfset ooStateJson = reReplaceNoCase(ooStateMatch[1], "window\.__OO_STATE__\s*=\s*", "")>
<cfset ooStateJson = reReplace(ooStateJson, ";\s*window\.$", "")>
<cfset ooState = deserializeJSON(ooStateJson)>
<cfset toastBusiness = structNew()>
<cfset toastCategories = arrayNew(1)>
<cfset toastItems = arrayNew(1)>
<cfset categorySet = structNew()>
<cfset itemId = 1>
<cfset menuNames = arrayNew(1)>
<!--- Extract restaurant info from ROOT_QUERY (Apollo cache format) --->
<cfif structKeyExists(ooState, "ROOT_QUERY")>
<cfset rootQuery = ooState["ROOT_QUERY"]>
<cfloop collection="#rootQuery#" item="rqKey">
<cfif (findNoCase("restaurantV2By", rqKey) OR findNoCase("restaurantV2(", rqKey)) AND isStruct(rootQuery[rqKey])>
<cfset restaurant = rootQuery[rqKey]>
<cfif structKeyExists(restaurant, "name") AND NOT structKeyExists(toastBusiness, "name")>
<cfset toastBusiness["name"] = restaurant.name>
</cfif>
<cfif structKeyExists(restaurant, "description") AND NOT isNull(restaurant.description) AND len(trim(toString(restaurant.description)))>
<cfset toastBusiness["description"] = trim(toString(restaurant.description))>
</cfif>
<cfif structKeyExists(restaurant, "location") AND isStruct(restaurant.location)>
<cfset loc = restaurant.location>
<cfif structKeyExists(loc, "address1")>
<cfset toastBusiness["address"] = loc.address1>
<cfif structKeyExists(loc, "city") AND NOT isNull(loc.city)><cfset toastBusiness["address"] = toastBusiness.address & ", " & loc.city></cfif>
<cfif structKeyExists(loc, "state") AND NOT isNull(loc.state)><cfset toastBusiness["address"] = toastBusiness.address & ", " & loc.state></cfif>
<cfif structKeyExists(loc, "zip") AND NOT isNull(loc.zip)><cfset toastBusiness["address"] = toastBusiness.address & " " & loc.zip></cfif>
</cfif>
<cfif structKeyExists(loc, "phone") AND NOT isNull(loc.phone)>
<cfset toastBusiness["phone"] = loc.phone>
</cfif>
</cfif>
<cfif structKeyExists(restaurant, "brandColor") AND NOT isNull(restaurant.brandColor)>
<cfset toastBusiness["brandColor"] = replace(restaurant.brandColor, "##", "")>
</cfif>
</cfif>
</cfloop>
</cfif>
<!--- Also check for Restaurant: keys (older Toast format) --->
<cfloop collection="#ooState#" item="ooKey">
<cfif left(ooKey, 11) EQ "Restaurant:" AND NOT structKeyExists(toastBusiness, "name")>
<cfset restaurant = ooState[ooKey]>
<cfif structKeyExists(restaurant, "name")>
<cfset toastBusiness["name"] = restaurant.name>
</cfif>
<cfif structKeyExists(restaurant, "location") AND isStruct(restaurant.location)>
<cfset loc = restaurant.location>
<cfif structKeyExists(loc, "address1")>
<cfset toastBusiness["address"] = loc.address1>
<cfif structKeyExists(loc, "city")><cfset toastBusiness["address"] = toastBusiness.address & ", " & loc.city></cfif>
<cfif structKeyExists(loc, "state")><cfset toastBusiness["address"] = toastBusiness.address & ", " & loc.state></cfif>
<cfif structKeyExists(loc, "zipCode")><cfset toastBusiness["address"] = toastBusiness.address & " " & loc.zipCode></cfif>
</cfif>
<cfif structKeyExists(loc, "phone")>
<cfset toastBusiness["phone"] = loc.phone>
</cfif>
</cfif>
<cfif structKeyExists(restaurant, "brandColor")>
<cfset toastBusiness["brandColor"] = replace(restaurant.brandColor, "##", "")>
</cfif>
</cfif>
<!--- Extract menu data --->
<cfif left(ooKey, 5) EQ "Menu:">
<cfset menu = ooState[ooKey]>
<cfif structKeyExists(menu, "groups") AND isArray(menu.groups)>
<!--- Use menu name as parent category if multiple menus --->
<cfset menuName = structKeyExists(menu, "name") ? menu.name : "">
<cfif len(menuName)><cfset arrayAppend(menuNames, menuName)></cfif>
<cfloop array="#menu.groups#" index="group">
<cfset groupName = structKeyExists(group, "name") ? trim(group.name) : "Menu">
<cfif NOT structKeyExists(categorySet, groupName)>
<cfset categorySet[groupName] = true>
<cfset catObj = { "name": groupName, "itemCount": 0 }>
<!--- Store menu name for parent category assignment later --->
<cfset catObj["menuName"] = menuName>
<cfset arrayAppend(toastCategories, catObj)>
</cfif>
<!--- Extract items from group --->
<cfif structKeyExists(group, "items") AND isArray(group.items)>
<cfloop array="#group.items#" index="item">
<cfif structKeyExists(item, "name") AND len(trim(item.name))>
<cfset itemStruct = structNew()>
<cfset itemStruct["id"] = "item_" & itemId>
<cfset itemStruct["name"] = trim(item.name)>
<cfset itemStruct["category"] = groupName>
<cfset itemStruct["modifiers"] = arrayNew(1)>
<cfset itemStruct["hasModifiers"] = structKeyExists(item, "hasModifiers") AND item.hasModifiers EQ true>
<cfset itemStruct["guid"] = structKeyExists(item, "guid") ? item.guid : "">
<cfset itemStruct["itemGroupGuid"] = structKeyExists(item, "itemGroupGuid") ? item.itemGroupGuid : "">
<cfset itemStruct["description"] = "">
<cfif structKeyExists(item, "description") AND NOT isNull(item.description)>
<cfset itemStruct["description"] = trim(toString(item.description))>
</cfif>
<!--- Extract price: Toast uses "prices" array [4.50] or scalar "price" --->
<cfset itemStruct["price"] = 0>
<cfif structKeyExists(item, "prices") AND isArray(item.prices) AND arrayLen(item.prices) GT 0 AND isNumeric(item.prices[1])>
<cfset itemStruct["price"] = val(item.prices[1])>
<cfelseif structKeyExists(item, "price") AND isNumeric(item.price)>
<cfset itemStruct["price"] = val(item.price)>
<cfelseif structKeyExists(item, "unitPrice") AND isNumeric(item.unitPrice)>
<cfset itemStruct["price"] = val(item.unitPrice)>
<cfelseif structKeyExists(item, "basePrice") AND isNumeric(item.basePrice)>
<cfset itemStruct["price"] = val(item.basePrice)>
<cfelseif structKeyExists(item, "displayPrice") AND len(trim(toString(item.displayPrice)))>
<cfset priceStr = reReplace(toString(item.displayPrice), "[^0-9.]", "", "all")>
<cfif len(priceStr) AND isNumeric(priceStr)>
<cfset itemStruct["price"] = val(priceStr)>
</cfif>
</cfif>
<!--- Extract image URL --->
<cfset itemStruct["imageUrl"] = "">
<cfif structKeyExists(item, "imageUrls") AND NOT isNull(item.imageUrls) AND isStruct(item.imageUrls)>
<cfif structKeyExists(item.imageUrls, "medium")>
<cfset itemStruct["imageUrl"] = item.imageUrls.medium>
<cfelseif structKeyExists(item.imageUrls, "large")>
<cfset itemStruct["imageUrl"] = item.imageUrls.large>
<cfelseif structKeyExists(item.imageUrls, "small")>
<cfset itemStruct["imageUrl"] = item.imageUrls.small>
</cfif>
<cfif len(itemStruct.imageUrl)>
<cfset itemStruct["imageSrc"] = itemStruct.imageUrl>
<cfset itemStruct["imageFilename"] = listLast(itemStruct.imageUrl, "/")>
</cfif>
</cfif>
<cfset arrayAppend(toastItems, itemStruct)>
<cfset itemId++>
</cfif>
</cfloop>
</cfif>
<!--- Extract items from subgroups --->
<cfset subgroupsArr = arrayNew(1)>
<cfif structKeyExists(group, "subgroups") AND isArray(group.subgroups)>
<cfset subgroupsArr = group.subgroups>
<cfelseif structKeyExists(group, "children") AND isArray(group.children)>
<cfset subgroupsArr = group.children>
<cfelseif structKeyExists(group, "childGroups") AND isArray(group.childGroups)>
<cfset subgroupsArr = group.childGroups>
</cfif>
<cfloop array="#subgroupsArr#" index="subgroup">
<cfset subName = structKeyExists(subgroup, "name") ? trim(subgroup.name) : groupName>
<cfif len(subName) AND NOT structKeyExists(categorySet, subName)>
<cfset categorySet[subName] = true>
<cfset arrayAppend(toastCategories, { "name": subName, "parentCategoryName": groupName, "itemCount": 0 })>
</cfif>
<cfif structKeyExists(subgroup, "items") AND isArray(subgroup.items)>
<cfloop array="#subgroup.items#" index="subItem">
<cfif structKeyExists(subItem, "name") AND len(trim(subItem.name))>
<cfset itemStruct = structNew()>
<cfset itemStruct["id"] = "item_" & itemId>
<cfset itemStruct["name"] = trim(subItem.name)>
<cfset itemStruct["category"] = subName>
<cfset itemStruct["modifiers"] = arrayNew(1)>
<cfset itemStruct["hasModifiers"] = structKeyExists(subItem, "hasModifiers") AND subItem.hasModifiers EQ true>
<cfset itemStruct["guid"] = structKeyExists(subItem, "guid") ? subItem.guid : "">
<cfset itemStruct["itemGroupGuid"] = structKeyExists(subItem, "itemGroupGuid") ? subItem.itemGroupGuid : "">
<cfset itemStruct["description"] = "">
<cfif structKeyExists(subItem, "description") AND NOT isNull(subItem.description)>
<cfset itemStruct["description"] = trim(toString(subItem.description))>
</cfif>
<cfset itemStruct["price"] = 0>
<cfif structKeyExists(subItem, "prices") AND isArray(subItem.prices) AND arrayLen(subItem.prices) GT 0 AND isNumeric(subItem.prices[1])>
<cfset itemStruct["price"] = val(subItem.prices[1])>
<cfelseif structKeyExists(subItem, "price") AND isNumeric(subItem.price)>
<cfset itemStruct["price"] = val(subItem.price)>
<cfelseif structKeyExists(subItem, "unitPrice") AND isNumeric(subItem.unitPrice)>
<cfset itemStruct["price"] = val(subItem.unitPrice)>
<cfelseif structKeyExists(subItem, "basePrice") AND isNumeric(subItem.basePrice)>
<cfset itemStruct["price"] = val(subItem.basePrice)>
<cfelseif structKeyExists(subItem, "displayPrice") AND len(trim(toString(subItem.displayPrice)))>
<cfset priceStr = reReplace(toString(subItem.displayPrice), "[^0-9.]", "", "all")>
<cfif len(priceStr) AND isNumeric(priceStr)>
<cfset itemStruct["price"] = val(priceStr)>
</cfif>
</cfif>
<cfset itemStruct["imageUrl"] = "">
<cfif structKeyExists(subItem, "imageUrls") AND NOT isNull(subItem.imageUrls) AND isStruct(subItem.imageUrls)>
<cfif structKeyExists(subItem.imageUrls, "medium")>
<cfset itemStruct["imageUrl"] = subItem.imageUrls.medium>
<cfelseif structKeyExists(subItem.imageUrls, "large")>
<cfset itemStruct["imageUrl"] = subItem.imageUrls.large>
<cfelseif structKeyExists(subItem.imageUrls, "small")>
<cfset itemStruct["imageUrl"] = subItem.imageUrls.small>
</cfif>
<cfif len(itemStruct.imageUrl)>
<cfset itemStruct["imageSrc"] = itemStruct.imageUrl>
<cfset itemStruct["imageFilename"] = listLast(itemStruct.imageUrl, "/")>
</cfif>
</cfif>
<cfset arrayAppend(toastItems, itemStruct)>
<cfset itemId++>
</cfif>
</cfloop>
</cfif>
</cfloop>
</cfloop>
</cfif>
</cfif>
</cfloop>
<!--- Fallback: get business name from title tag if not found in OO_STATE --->
<cfif NOT structKeyExists(toastBusiness, "name") OR NOT len(toastBusiness.name)>
<cfset titleMatch = reMatchNoCase('<title[^>]*>([^<]+)</title>', pageHtml)>
<cfif arrayLen(titleMatch)>
<cfset titleText = reReplaceNoCase(titleMatch[1], '.*<title[^>]*>([^<]+)</title>.*', '\1')>
<cfset titleText = trim(titleText)>
<cfif findNoCase("|", titleText)>
<cfset titleText = trim(listFirst(titleText, "|"))>
</cfif>
<cfset titleText = reReplaceNoCase(titleText, "\s*-\s*(Menu|Order|Online).*$", "")>
<cfif len(titleText)>
<cfset toastBusiness["name"] = titleText>
</cfif>
</cfif>
</cfif>
<!--- Clean business name: strip address if it was embedded in the name --->
<cfif structKeyExists(toastBusiness, "name") AND structKeyExists(toastBusiness, "address")>
<cfset bizAddr1 = listFirst(toastBusiness.address, ",")>
<cfif len(bizAddr1) AND findNoCase(bizAddr1, toastBusiness.name)>
<cfset toastBusiness["name"] = trim(replaceNoCase(toastBusiness.name, bizAddr1, ""))>
</cfif>
<!--- Also strip leading/trailing dashes or pipes left over --->
<cfset toastBusiness["name"] = trim(reReplace(toastBusiness.name, "[\-\|]+$", ""))>
<cfset toastBusiness["name"] = trim(reReplace(toastBusiness.name, "^[\-\|]+", ""))>
</cfif>
<!--- Build parent/child category hierarchy if multiple menus --->
<cfif arrayLen(menuNames) GT 1>
<cfset hierarchicalCategories = arrayNew(1)>
<cfloop array="#menuNames#" index="mn">
<!--- Add parent category (the menu name) --->
<cfset parentCat = { "name": mn, "itemCount": 0 }>
<cfset arrayAppend(hierarchicalCategories, parentCat)>
<!--- Add subcategories under this parent --->
<cfloop array="#toastCategories#" index="tc">
<cfif structKeyExists(tc, "menuName") AND tc.menuName EQ mn>
<cfset tc["parentCategoryName"] = mn>
<cfset arrayAppend(hierarchicalCategories, tc)>
</cfif>
</cfloop>
</cfloop>
<cfset toastCategories = hierarchicalCategories>
</cfif>
<!--- Update category item counts --->
<cfloop from="1" to="#arrayLen(toastCategories)#" index="ci">
<cfset catName = toastCategories[ci].name>
<cfset count = 0>
<cfloop array="#toastItems#" index="ti">
<cfif ti.category EQ catName><cfset count++></cfif>
</cfloop>
<cfset toastCategories[ci]["itemCount"] = count>
</cfloop>
<cfset arrayAppend(response.steps, "Extracted " & arrayLen(toastItems) & " items from " & arrayLen(toastCategories) & " categories via __OO_STATE__")>
<!--- Extract Toast modifiers via Playwright if items have modifiers --->
<cfset toastModifiers = arrayNew(1)>
<cfset modifierItemCount = 0>
<cfloop array="#toastItems#" index="ti">
<cfif structKeyExists(ti, "hasModifiers") AND ti.hasModifiers>
<cfset modifierItemCount++>
</cfif>
</cfloop>
<cfif modifierItemCount GT 0>
<cfset arrayAppend(response.steps, modifierItemCount & " items have modifiers - extracting via Playwright")>
<cftry>
<!--- Determine Toast URL for Playwright --->
<cfset toastUrl = "">
<cfif isDefined("targetUrl") AND reFindNoCase("order\.toasttab\.com", targetUrl)>
<!--- URL mode: use original URL --->
<cfset toastUrl = targetUrl>
<cfelse>
<!--- Saved HTML mode: extract slug from HTML --->
<!--- Try __APOLLO_STATE__ shortUrl first --->
<cfset slugMatch = reMatchNoCase('"shortUrl"\s*:\s*"([^"]+)"', pageHtml)>
<cfif arrayLen(slugMatch)>
<cfset slug = reReplaceNoCase(slugMatch[1], '.*"shortUrl"\s*:\s*"([^"]+)".*', '\1')>
<cfset toastUrl = "https://order.toasttab.com/online/" & slug>
</cfif>
<!--- Try gift card URL pattern --->
<cfif NOT len(toastUrl)>
<cfset giftMatch = reMatchNoCase('toasttab\.com/([a-zA-Z0-9_-]+)/giftcards', pageHtml)>
<cfif arrayLen(giftMatch)>
<cfset slug = reReplaceNoCase(giftMatch[1], '.*toasttab\.com/([a-zA-Z0-9_-]+)/giftcards.*', '\1')>
<cfset toastUrl = "https://order.toasttab.com/online/" & slug>
</cfif>
</cfif>
</cfif>
<cfif len(toastUrl)>
<cfset arrayAppend(response.steps, "Fetching modifiers from: " & toastUrl)>
<cfset modOutput = "">
<cfexecute name="/opt/playwright/run-toast-modifiers.sh" arguments="'#toastUrl#'" timeout="180" variable="modOutput" />
<cfif len(trim(modOutput))>
<cfset modResult = deserializeJSON(modOutput)>
<!--- Extract modifiers --->
<cfif structKeyExists(modResult, "modifiers") AND isArray(modResult.modifiers)>
<cfset toastModifiers = modResult.modifiers>
<cfset arrayAppend(response.steps, "Extracted " & arrayLen(toastModifiers) & " unique modifier groups")>
</cfif>
<!--- Map modifiers to items --->
<cfif structKeyExists(modResult, "itemModifierMap") AND isStruct(modResult.itemModifierMap)>
<cfset modMap = modResult.itemModifierMap>
<cfloop from="1" to="#arrayLen(toastItems)#" index="mi">
<cfif structKeyExists(modMap, toastItems[mi].name)>
<cfset toastItems[mi]["modifiers"] = modMap[toastItems[mi].name]>
</cfif>
</cfloop>
<cfset arrayAppend(response.steps, "Mapped modifiers to " & structCount(modMap) & " items")>
</cfif>
<!--- Log stats --->
<cfif structKeyExists(modResult, "stats") AND isStruct(modResult.stats)>
<cfset arrayAppend(response.steps, "Modifier stats: " & serializeJSON(modResult.stats))>
</cfif>
<cfelse>
<cfset arrayAppend(response.steps, "Playwright modifier script returned empty output")>
</cfif>
<cfelse>
<cfset arrayAppend(response.steps, "Could not determine Toast URL for modifier extraction")>
</cfif>
<cfcatch>
<cfset arrayAppend(response.steps, "Modifier extraction failed: " & cfcatch.message & " - continuing without modifiers")>
</cfcatch>
</cftry>
</cfif>
<!--- Build and return response directly - skip Claude --->
<cfif arrayLen(toastItems) GT 0>
<cfset menuData = structNew()>
<cfset menuData["business"] = toastBusiness>
<cfset menuData["categories"] = toastCategories>
<cfset menuData["items"] = toastItems>
<cfset menuData["modifiers"] = toastModifiers>
<cfset menuData["imageUrls"] = arrayNew(1)>
<cfset menuData["imageMappings"] = imageMappings>
<cfset menuData["headerCandidateIndices"] = arrayNew(1)>
<cfset response["OK"] = true>
<cfset response["DATA"] = menuData>
<cfset response["sourceUrl"] = isDefined("targetUrl") ? targetUrl : "uploaded">
<cfset response["pagesProcessed"] = 1>
<cfset response["imagesFound"] = arrayLen(imageDataArray)>
<cfset response["playwrightImagesCount"] = arrayLen(playwrightImages)>
<cfset response["parsedVia"] = "toast_oo_state">
<cfcontent type="application/json" reset="true">
<cfoutput>#serializeJSON(response)#</cfoutput>
<cfabort>
</cfif>
</cfif>
<cfcatch>
<cfset arrayAppend(response.steps, "Toast __OO_STATE__ parsing failed: " & cfcatch.message & " - falling back to Claude")>
</cfcatch>
</cftry>
</cfif>
<!--- Look for embedded JSON data (Next.js __NEXT_DATA__, Toast state, etc.) --->
<cfset embeddedJsonData = "">
<cfset embeddedMenuItems = arrayNew(1)>
<cfloop array="#menuPages#" index="menuPage">
<!--- Look for __NEXT_DATA__ (Next.js apps) --->
<cfset nextDataMatch = reMatchNoCase('<script[^>]*id=["'']__NEXT_DATA__["''][^>]*>([^<]+)</script>', menuPage.html)>
<cfif arrayLen(nextDataMatch)>
<cfset scriptContent = reReplaceNoCase(nextDataMatch[1], '<script[^>]*>([^<]+)</script>', '\1')>
<cfset embeddedJsonData = embeddedJsonData & chr(10) & "--- __NEXT_DATA__ ---" & chr(10) & scriptContent>
</cfif>
<!--- Look for window.__INITIAL_STATE__ or similar patterns --->
<cfset stateMatches = reMatchNoCase('window\.__[A-Z_]+__\s*=\s*(\{[^;]+\});', menuPage.html)>
<cfloop array="#stateMatches#" index="stateMatch">
<cfset embeddedJsonData = embeddedJsonData & chr(10) & "--- WINDOW_STATE ---" & chr(10) & stateMatch>
</cfloop>
<!--- Look for data-props or data-page attributes with JSON --->
<cfset dataPropsMatches = reMatchNoCase('data-(?:props|page|state)=["''](\{[^"'']+\})["'']', menuPage.html)>
<cfloop array="#dataPropsMatches#" index="propsMatch">
<cfset embeddedJsonData = embeddedJsonData & chr(10) & "--- DATA_PROPS ---" & chr(10) & propsMatch>
</cfloop>
<!--- Look for JSON-LD structured data (schema.org Menu) --->
<cfset jsonLdMatches = reMatchNoCase('<script[^>]*type=["'']application/ld\+json["''][^>]*>([^<]+)</script>', menuPage.html)>
<cfloop array="#jsonLdMatches#" index="jsonLdMatch">
<cfset scriptContent = reReplaceNoCase(jsonLdMatch, '<script[^>]*>([^<]+)</script>', '\1')>
<cfif findNoCase("menu", scriptContent) OR findNoCase("MenuItem", scriptContent)>
<cfset embeddedJsonData = embeddedJsonData & chr(10) & "--- JSON_LD_MENU ---" & chr(10) & scriptContent>
</cfif>
</cfloop>
</cfloop>
<cfif len(embeddedJsonData)>
<cfset response["DEBUG_EMBEDDED_JSON_FOUND"] = true>
<cfset response["DEBUG_EMBEDDED_JSON_LENGTH"] = len(embeddedJsonData)>
<cfset response["DEBUG_EMBEDDED_JSON_PREVIEW"] = left(embeddedJsonData, 2000)>
<cfelse>
<cfset response["DEBUG_EMBEDDED_JSON_FOUND"] = false>
</cfif>
<!--- Combine all page HTML into one text block --->
<cfset combinedHtml = "">
<cfloop array="#menuPages#" index="menuPage">
<!--- Strip scripts, styles, and extract text content --->
<cfset cleanHtml = menuPage.html>
<cfset cleanHtml = reReplaceNoCase(cleanHtml, "<script[^>]*>.*?</script>", "", "all")>
<cfset cleanHtml = reReplaceNoCase(cleanHtml, "<style[^>]*>.*?</style>", "", "all")>
<cfset cleanHtml = reReplaceNoCase(cleanHtml, "<!--.*?-->", "", "all")>
<cfset combinedHtml = combinedHtml & chr(10) & "--- PAGE: " & menuPage.url & " ---" & chr(10) & cleanHtml>
</cfloop>
<!--- If we found embedded JSON, append it to help Claude find all menu items --->
<cfif len(embeddedJsonData)>
<cfset combinedHtml = combinedHtml & chr(10) & chr(10) & "=== EMBEDDED JSON DATA (may contain full menu) ===" & chr(10) & embeddedJsonData>
</cfif>
<!--- Limit HTML size for Claude --->
<cfif len(combinedHtml) GT 100000>
<cfset combinedHtml = left(combinedHtml, 100000)>
</cfif>
<!--- Debug: extract h3 and h4 tags from HTML on server side --->
<cfset h3Tags = reMatchNoCase("<h3[^>]*>([^<]*)</h3>", combinedHtml)>
<cfset h3Texts = arrayNew(1)>
<cfloop array="#h3Tags#" index="h3Tag">
<cfset h3Text = reReplaceNoCase(h3Tag, "<h3[^>]*>([^<]*)</h3>", "\1")>
<cfset h3Text = trim(h3Text)>
<cfif len(h3Text)>
<cfset arrayAppend(h3Texts, h3Text)>
</cfif>
</cfloop>
<cfset response["DEBUG_H3_TAGS"] = h3Texts>
<cfset h4Tags = reMatchNoCase("<h4[^>]*>([^<]*)</h4>", combinedHtml)>
<cfset h4Texts = arrayNew(1)>
<cfloop array="#h4Tags#" index="h4Tag">
<cfset h4Text = reReplaceNoCase(h4Tag, "<h4[^>]*>([^<]*)</h4>", "\1")>
<cfset h4Text = trim(h4Text)>
<cfif len(h4Text)>
<cfset arrayAppend(h4Texts, h4Text)>
</cfif>
</cfloop>
<cfset response["DEBUG_H4_TAGS"] = h4Texts>
<!--- Debug: find all heading tags --->
<cfset h2Tags = reMatchNoCase("<h2[^>]*>([^<]*)</h2>", combinedHtml)>
<cfset h5Tags = reMatchNoCase("<h5[^>]*>([^<]*)</h5>", combinedHtml)>
<cfset h6Tags = reMatchNoCase("<h6[^>]*>([^<]*)</h6>", combinedHtml)>
<cfset response["DEBUG_H2_COUNT"] = arrayLen(h2Tags)>
<cfset response["DEBUG_H5_COUNT"] = arrayLen(h5Tags)>
<cfset response["DEBUG_H6_COUNT"] = arrayLen(h6Tags)>
<!--- Show first 1000 chars of HTML around "Beverages" to see structure --->
<cfset bevPos = findNoCase("Beverages", combinedHtml)>
<cfif bevPos GT 0>
<cfset bevStart = max(1, bevPos - 100)>
<cfset bevEnd = min(len(combinedHtml), bevPos + 900)>
<cfset response["DEBUG_BEVERAGES_HTML"] = mid(combinedHtml, bevStart, bevEnd - bevStart)>
</cfif>
<cfset arrayAppend(response.steps, "Found " & arrayLen(h3Texts) & " h3 and " & arrayLen(h4Texts) & " h4 tags")>
<!--- Server-side heading hierarchy detection from HTML h2/h3 structure --->
<cfset headingHierarchy = structNew()>
<cfset hierarchyDesc = "">
<cfset scanPos = 1>
<cfset currentH2 = "">
<cfloop condition="scanPos LT len(combinedHtml)">
<cfset nextH2 = reFindNoCase("<h2[^>]*>", combinedHtml, scanPos)>
<cfset nextH3 = reFindNoCase("<h3[^>]*>", combinedHtml, scanPos)>
<cfif nextH2 EQ 0 AND nextH3 EQ 0><cfbreak></cfif>
<cfif nextH2 GT 0 AND (nextH3 EQ 0 OR nextH2 LT nextH3)>
<!--- h2 found first --->
<cfset closePos = findNoCase("</h2>", combinedHtml, nextH2)>
<cfif closePos EQ 0><cfbreak></cfif>
<cfset tagContent = mid(combinedHtml, nextH2, closePos + 5 - nextH2)>
<cfset h2Raw = reReplaceNoCase(tagContent, "<[^>]+>", "", "all")>
<cfset h2Raw = trim(h2Raw)>
<!--- Clean: strip decorative dashes --->
<cfset h2Clean = reReplace(h2Raw, "[^a-zA-Z0-9 ]", "", "all")>
<cfset h2Clean = trim(h2Clean)>
<!--- Skip non-category h2s --->
<cfif len(h2Clean) AND h2Clean NEQ "MENU" AND NOT findNoCase("copyright", h2Clean)>
<cfset currentH2 = h2Raw>
<cfelse>
<cfset currentH2 = "">
</cfif>
<cfset scanPos = closePos + 5>
<cfelse>
<!--- h3 found first --->
<cfset closePos = findNoCase("</h3>", combinedHtml, nextH3)>
<cfif closePos EQ 0><cfbreak></cfif>
<cfset tagContent = mid(combinedHtml, nextH3, closePos + 5 - nextH3)>
<cfset h3Text = reReplaceNoCase(tagContent, "<[^>]+>", "", "all")>
<cfset h3Text = trim(h3Text)>
<cfif len(currentH2) AND len(h3Text)>
<cfif NOT structKeyExists(headingHierarchy, currentH2)>
<cfset headingHierarchy[currentH2] = arrayNew(1)>
</cfif>
<cfset arrayAppend(headingHierarchy[currentH2], h3Text)>
</cfif>
<cfset scanPos = closePos + 5>
</cfif>
</cfloop>
<cfif structCount(headingHierarchy) GT 0>
<cfloop collection="#headingHierarchy#" item="hParent">
<cfset hierarchyDesc = hierarchyDesc & "- """ & hParent & """ contains subsections: " & arrayToList(headingHierarchy[hParent], ", ") & chr(10)>
</cfloop>
<cfset response["DEBUG_HEADING_HIERARCHY"] = headingHierarchy>
<cfset arrayAppend(response.steps, "Detected " & structCount(headingHierarchy) & " parent categories with subcategories from h2/h3 structure")>
</cfif>
<!--- System prompt for URL analysis --->
<cfset systemPrompt = "You are an expert at extracting structured menu data from restaurant website HTML. Extract ALL menu data visible in the HTML. Return valid JSON with these keys: business (object with name, address, phone, hours, brandColor), categories (array), modifiers (array), items (array with name, description, price, category, modifiers array, and imageUrl). CATEGORIES FORMAT: Each entry in the categories array can be either a simple string (for flat categories) OR an object with 'name' and optional 'subcategories' array. Example: [""Appetizers"", {""name"": ""Drinks"", ""subcategories"": [""Hot Drinks"", ""Cold Drinks""]}, ""Desserts""]. SUBCATEGORY DETECTION: If a section header contains nested titled sections beneath it (sub-headers with their own items), the outer section is the PARENT and inner sections are SUBCATEGORIES. For items in subcategories, set their 'category' field to the SUBCATEGORY name (not the parent). CRITICAL FOR IMAGES: Each menu item in the HTML is typically in a container (div, li, article) that also contains an img tag. Extract the img src URL and include it as 'imageUrl' for that item. Look for img tags that are siblings or children within the same menu-item container. The image URL should be the full or relative src value from the img tag - NOT the alt text. CRITICAL: Extract EVERY menu item from ALL sources including embedded JSON (__NEXT_DATA__, window state, JSON-LD). For brandColor: suggest a vibrant hex (6 digits, no hash). For prices: numbers (e.g., 12.99). Return ONLY valid JSON.">
<!--- Build message content --->
<cfset messagesContent = arrayNew(1)>
<!--- Add images first (up to 10 for analysis) --->
<cfset imgLimit = min(arrayLen(imageDataArray), 10)>
<cfloop from="1" to="#imgLimit#" index="i">
<cfset imgData = imageDataArray[i]>
<cfset imgContent = structNew()>
<cfset imgContent["type"] = "image">
<cfset imgContent["source"] = imgData.source>
<cfset arrayAppend(messagesContent, imgContent)>
</cfloop>
<!--- Add HTML text --->
<cfset textBlock = structNew()>
<cfset textBlock["type"] = "text">
<cfset userText = "Extract menu data from this restaurant website HTML. The images above are from the same website - identify which ones are food photos that could be used as item images, and which could be header/banner images.">
<!--- Append heading hierarchy hint if detected --->
<cfif len(hierarchyDesc)>
<cfset userText = userText & chr(10) & chr(10) & "IMPORTANT - DETECTED SECTION HIERARCHY FROM HTML HEADINGS:" & chr(10) & "The following h2 sections contain h3 sub-sections. Use these as parent-subcategory relationships in your categories output:" & chr(10) & hierarchyDesc & "For each parent above, include it in the categories array as an OBJECT with 'name' and 'subcategories' array. Items belonging to a subsection should have their 'category' field set to the SUBCATEGORY name (not the parent).">
</cfif>
<cfset userText = userText & chr(10) & chr(10) & "Here is the HTML content:" & chr(10) & chr(10) & combinedHtml>
<cfset textBlock["text"] = userText>
<cfset arrayAppend(messagesContent, textBlock)>
<cfset userMessage = structNew()>
<cfset userMessage["role"] = "user">
<cfset userMessage["content"] = messagesContent>
<cfset requestBody = structNew()>
<cfset requestBody["model"] = "claude-sonnet-4-20250514">
<cfset requestBody["max_tokens"] = 16384>
<cfset requestBody["temperature"] = 0>
<cfset requestBody["system"] = systemPrompt>
<cfset requestBody["messages"] = arrayNew(1)>
<cfset arrayAppend(requestBody["messages"], userMessage)>
<cfset arrayAppend(response.steps, "Sending to Claude API...")>
<!--- Call Claude API --->
<cfhttp url="https://api.anthropic.com/v1/messages" method="POST" timeout="120" result="httpResult">
<cfhttpparam type="header" name="Content-Type" value="application/json">
<cfhttpparam type="header" name="x-api-key" value="#CLAUDE_API_KEY#">
<cfhttpparam type="header" name="anthropic-version" value="2023-06-01">
<cfhttpparam type="body" value="#serializeJSON(requestBody)#">
</cfhttp>
<cfset httpStatusCode = httpResult.statusCode>
<cfif isNumeric(httpStatusCode)>
<cfset httpStatusCode = int(httpStatusCode)>
<cfelseif findNoCase("200", httpStatusCode)>
<cfset httpStatusCode = 200>
<cfelse>
<cfset httpStatusCode = 0>
</cfif>
<cfif httpStatusCode NEQ 200>
<cfset errorDetail = "">
<cftry>
<cfset errorResponse = deserializeJSON(httpResult.fileContent)>
<cfif structKeyExists(errorResponse, "error") AND structKeyExists(errorResponse.error, "message")>
<cfset errorDetail = errorResponse.error.message>
<cfelse>
<cfset errorDetail = httpResult.fileContent>
</cfif>
<cfcatch>
<cfset errorDetail = httpResult.fileContent>
</cfcatch>
</cftry>
<cfthrow message="Claude API error: #httpResult.statusCode# - #errorDetail#">
</cfif>
<!--- Parse response --->
<cfset claudeResponse = deserializeJSON(httpResult.fileContent)>
<cfif NOT structKeyExists(claudeResponse, "content") OR NOT arrayLen(claudeResponse.content)>
<cfthrow message="Empty response from Claude">
</cfif>
<cfset responseText = "">
<cfloop array="#claudeResponse.content#" index="block">
<cfif structKeyExists(block, "type") AND block.type EQ "text">
<cfset responseText = block.text>
<cfbreak>
</cfif>
</cfloop>
<!--- Clean up JSON response --->
<cfset responseText = trim(responseText)>
<!--- Strip markdown code fences --->
<cfif left(responseText, 7) EQ "```json">
<cfset responseText = mid(responseText, 8, len(responseText) - 7)>
</cfif>
<cfif left(responseText, 3) EQ "```">
<cfset responseText = mid(responseText, 4, len(responseText) - 3)>
</cfif>
<cfif right(responseText, 3) EQ "```">
<cfset responseText = left(responseText, len(responseText) - 3)>
</cfif>
<cfset responseText = trim(responseText)>
<!--- If response doesn't start with {, extract JSON object from text --->
<cfif left(responseText, 1) NEQ "{">
<cfset jsonStart = find("{", responseText)>
<cfif jsonStart GT 0>
<cfset responseText = mid(responseText, jsonStart, len(responseText) - jsonStart + 1)>
<!--- Strip any trailing text/fences after the JSON --->
<cfif right(trim(responseText), 3) EQ "```">
<cfset responseText = left(trim(responseText), len(trim(responseText)) - 3)>
</cfif>
<cfset responseText = trim(responseText)>
</cfif>
</cfif>
<!--- Remove trailing commas before ] or } --->
<cfset responseText = reReplace(responseText, ",(\s*[\]\}])", "\1", "all")>
<!--- Remove control characters that break JSON --->
<cfset responseText = reReplace(responseText, "[\x00-\x1F]", " ", "all")>
<!--- Try to parse JSON with error handling --->
<cftry>
<cfset menuData = deserializeJSON(responseText)>
<cfcatch type="any">
<!--- JSON parsing failed - try to extract what we can --->
<!--- Return the raw response for debugging --->
<cfset response["success"] = false>
<cfset response["error"] = "JSON parse error: #cfcatch.message#">
<cfset response["DEBUG_RAW_RESPONSE"] = left(responseText, 2000)>
<cfset response["DEBUG_RESPONSE_LENGTH"] = len(responseText)>
<cfcontent type="application/json" reset="true">
<cfoutput>#serializeJSON(response)#</cfoutput>
<cfabort>
</cfcatch>
</cftry>
<!--- Debug: save raw Claude response before processing --->
<cfset response["DEBUG_RAW_CLAUDE"] = responseText>
<!--- Build image URL list for the wizard to use --->
<cfset imageUrlList = arrayNew(1)>
<cfloop array="#imageDataArray#" index="imgData">
<cfif structKeyExists(imgData, "url")>
<cfset arrayAppend(imageUrlList, imgData.url)>
</cfif>
</cfloop>
<!--- Ensure expected structure --->
<cfif NOT structKeyExists(menuData, "business")>
<cfset menuData["business"] = structNew()>
</cfif>
<cfif NOT structKeyExists(menuData, "categories")>
<cfset menuData["categories"] = arrayNew(1)>
</cfif>
<cfif NOT structKeyExists(menuData, "modifiers")>
<cfset menuData["modifiers"] = arrayNew(1)>
</cfif>
<cfif NOT structKeyExists(menuData, "items")>
<cfset menuData["items"] = arrayNew(1)>
</cfif>
<!--- Convert categories to expected format - preserve subcategory hierarchy --->
<cfset formattedCategories = arrayNew(1)>
<cfloop array="#menuData.categories#" index="cat">
<cfif isSimpleValue(cat)>
<cfset catObj = structNew()>
<cfset catObj["name"] = cat>
<cfset catObj["itemCount"] = 0>
<cfset arrayAppend(formattedCategories, catObj)>
<cfelseif isStruct(cat)>
<cfset parentName = structKeyExists(cat, "name") ? cat.name : "">
<cfif len(parentName)>
<cfset catObj = structNew()>
<cfset catObj["name"] = parentName>
<cfset catObj["itemCount"] = 0>
<cfset arrayAppend(formattedCategories, catObj)>
<!--- Add subcategories with parentCategoryName --->
<cfif structKeyExists(cat, "subcategories") AND isArray(cat.subcategories)>
<cfloop array="#cat.subcategories#" index="subcat">
<cfset subcatName = "">
<cfif isSimpleValue(subcat)>
<cfset subcatName = subcat>
<cfelseif isStruct(subcat) AND structKeyExists(subcat, "name")>
<cfset subcatName = subcat.name>
</cfif>
<cfif len(subcatName)>
<cfset subcatObj = structNew()>
<cfset subcatObj["name"] = subcatName>
<cfset subcatObj["parentCategoryName"] = parentName>
<cfset subcatObj["itemCount"] = 0>
<cfset arrayAppend(formattedCategories, subcatObj)>
</cfif>
</cfloop>
</cfif>
</cfif>
</cfif>
</cfloop>
<cfset menuData["categories"] = formattedCategories>
<!--- Server-side hierarchy enforcement from HTML heading structure (backup if Claude returns flat) --->
<cfif structCount(headingHierarchy) GT 0>
<!--- Build reverse map: lowercase h3 name → raw h2 parent name --->
<cfset h3ToParent = structNew()>
<cfloop collection="#headingHierarchy#" item="hParentName">
<cfloop array="#headingHierarchy[hParentName]#" index="hChildName">
<cfset h3ToParent[lCase(trim(hChildName))] = hParentName>
</cfloop>
</cfloop>
<!--- Check if any categories match h3 names but lack parentCategoryName --->
<cfset hierarchyApplied = 0>
<cfloop from="1" to="#arrayLen(formattedCategories)#" index="i">
<cfset cat = formattedCategories[i]>
<cfif NOT structKeyExists(cat, "parentCategoryName") OR NOT len(cat.parentCategoryName)>
<cfset catLower = lCase(trim(cat.name))>
<cfif structKeyExists(h3ToParent, catLower)>
<cfset rawParent = h3ToParent[catLower]>
<!--- Find matching parent category in the list --->
<cfset matchedParent = "">
<cfloop array="#formattedCategories#" index="pcat">
<cfset pcatLower = lCase(trim(pcat.name))>
<!--- Normalize: strip dashes and "menu" suffix for comparison --->
<cfset parentNorm = lCase(reReplace(rawParent, "[^a-zA-Z0-9 ]", "", "all"))>
<cfset parentNorm = trim(reReplaceNoCase(parentNorm, "\s*menu\s*$", ""))>
<cfset pcatNorm = trim(reReplaceNoCase(pcatLower, "\s*menu\s*$", ""))>
<cfif pcatNorm EQ parentNorm OR pcatLower EQ lCase(rawParent)>
<cfset matchedParent = pcat.name>
<cfbreak>
</cfif>
</cfloop>
<cfif len(matchedParent)>
<cfset formattedCategories[i]["parentCategoryName"] = matchedParent>
<cfset hierarchyApplied = hierarchyApplied + 1>
</cfif>
</cfif>
</cfif>
</cfloop>
<cfif hierarchyApplied GT 0>
<cfset menuData["categories"] = formattedCategories>
<cfset arrayAppend(response.steps, "Server-side hierarchy: applied " & hierarchyApplied & " parent-child relationships")>
</cfif>
</cfif>
<!--- For items with subcategory field from Claude, set their category to the subcategory name --->
<cfloop from="1" to="#arrayLen(menuData.items)#" index="i">
<cfset item = menuData.items[i]>
<!--- If Claude set a subcategory field, use that as the item's category --->
<cfif structKeyExists(item, "subcategory") AND len(item.subcategory)>
<cfset menuData.items[i]["category"] = item.subcategory>
</cfif>
</cfloop>
<!--- Add item IDs --->
<cfloop from="1" to="#arrayLen(menuData.items)#" index="i">
<cfset menuData.items[i]["id"] = "item_" & i>
</cfloop>
<!--- Process item images - extract filenames from images object that Claude identified from HTML --->
<cfset itemsWithImages = 0>
<cfloop from="1" to="#arrayLen(menuData.items)#" index="i">
<cfset item = menuData.items[i]>
<!--- Check if Claude found images object with URLs from HTML --->
<cfif structKeyExists(item, "images") AND isStruct(item.images)>
<cfset imgObj = item.images>
<cfset itemsWithImages = itemsWithImages + 1>
<!--- Extract filenames for each image size --->
<cfset filenames = structNew()>
<cfloop collection="#imgObj#" item="sizeKey">
<cfset imgUrl = imgObj[sizeKey]>
<cfif isSimpleValue(imgUrl) AND len(trim(imgUrl))>
<cfset filenames[sizeKey] = listLast(imgUrl, "/\")>
</cfif>
</cfloop>
<cfset menuData.items[i]["imageFilenames"] = filenames>
<!--- Also set primary imageSrc for backwards compatibility --->
<cfif structKeyExists(imgObj, "src")>
<cfset menuData.items[i]["imageSrc"] = imgObj.src>
<cfset menuData.items[i]["imageFilename"] = listLast(imgObj.src, "/\")>
<cfelseif structKeyExists(imgObj, "large")>
<cfset menuData.items[i]["imageSrc"] = imgObj.large>
<cfset menuData.items[i]["imageFilename"] = listLast(imgObj.large, "/\")>
<cfelseif structKeyExists(imgObj, "medium")>
<cfset menuData.items[i]["imageSrc"] = imgObj.medium>
<cfset menuData.items[i]["imageFilename"] = listLast(imgObj.medium, "/\")>
<cfelseif structKeyExists(imgObj, "small")>
<cfset menuData.items[i]["imageSrc"] = imgObj.small>
<cfset menuData.items[i]["imageFilename"] = listLast(imgObj.small, "/\")>
</cfif>
<!--- Handle imageUrl from Claude (most common) --->
<cfelseif structKeyExists(item, "imageUrl") AND len(trim(item.imageUrl))>
<cfset menuData.items[i]["imageSrc"] = item.imageUrl>
<cfset menuData.items[i]["imageFilename"] = listLast(item.imageUrl, "/\")>
<cfset itemsWithImages = itemsWithImages + 1>
<!--- Legacy: handle if Claude returned imageSrc directly --->
<cfelseif structKeyExists(item, "imageSrc") AND len(trim(item.imageSrc))>
<cfset menuData.items[i]["imageFilename"] = listLast(item.imageSrc, "/\")>
<cfset itemsWithImages = itemsWithImages + 1>
</cfif>
</cfloop>
<cfset arrayAppend(response.steps, "Found images for " & itemsWithImages & " of " & arrayLen(menuData.items) & " items")>
<!--- Add image URLs to response --->
<cfset menuData["imageUrls"] = imageUrlList>
<cfset menuData["headerCandidateIndices"] = arrayNew(1)>
<!--- Add image mappings for local HTML uploads (filename -> alt text) --->
<cfset menuData["imageMappings"] = imageMappings>
<cfset response["OK"] = true>
<cfset response["DATA"] = menuData>
<cfset response["sourceUrl"] = isDefined("targetUrl") ? targetUrl : "uploaded">
<cfset response["pagesProcessed"] = arrayLen(menuPages)>
<cfset response["imagesFound"] = arrayLen(imageDataArray)>
<cfset response["playwrightImagesCount"] = arrayLen(playwrightImages)>
<cfset response["DEBUG_PLAYWRIGHT_IMAGES"] = playwrightImages>
<cfset response["DEBUG_RAW_CATEGORIES"] = menuData.categories>
<cfcatch type="any">
<cfset response["MESSAGE"] = cfcatch.message>
<cfif len(cfcatch.detail)>
<cfset response["DETAIL"] = cfcatch.detail>
</cfif>
<cfif structKeyExists(cfcatch, "tagContext") AND arrayLen(cfcatch.tagContext) GT 0>
<cfset response["DEBUG_LINE"] = cfcatch.tagContext[1].line>
<cfset response["DEBUG_TEMPLATE"] = cfcatch.tagContext[1].template>
</cfif>
</cfcatch>
</cftry>
<cfoutput>#serializeJSON(response)#</cfoutput>