CFML was failing to compile analyzeMenuUrl.cfm because ' contains a # character that Lucee interprets as variable expression start. Escaped all 4 occurrences to &##39;. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2501 lines
161 KiB
Text
2501 lines
161 KiB
Text
<cfsetting showdebugoutput="false">
|
|
<cfsetting enablecfoutputonly="true">
|
|
<cfsetting requesttimeout="300">
|
|
<cfcontent type="application/json; charset=utf-8" reset="true">
|
|
|
|
<cfset response = structNew()>
|
|
<cfset response["OK"] = false>
|
|
|
|
<cftry>
|
|
<!--- Load API Key --->
|
|
<cfset CLAUDE_API_KEY = "">
|
|
<cfset configPath = getDirectoryFromPath(getCurrentTemplatePath()) & "../../config/claude.json">
|
|
<cfif fileExists(configPath)>
|
|
<cfset configData = deserializeJSON(fileRead(configPath))>
|
|
<cfif structKeyExists(configData, "apiKey")>
|
|
<cfset CLAUDE_API_KEY = configData.apiKey>
|
|
</cfif>
|
|
</cfif>
|
|
|
|
<cfif NOT len(CLAUDE_API_KEY)>
|
|
<cfthrow message="Claude API key not configured">
|
|
</cfif>
|
|
|
|
<!--- Get URL from request --->
|
|
<cfset requestBody = toString(getHttpRequestData().content)>
|
|
<cfif NOT len(requestBody)>
|
|
<cfthrow message="No request body provided">
|
|
</cfif>
|
|
|
|
<cfset requestData = deserializeJSON(requestBody)>
|
|
|
|
<cfset response["steps"] = arrayNew(1)>
|
|
<cfset response["debug"] = structNew()>
|
|
<cfset response["debug"]["hasHtmlKey"] = structKeyExists(requestData, "html")>
|
|
<cfset response["debug"]["hasUrlKey"] = structKeyExists(requestData, "url")>
|
|
<cfset response["debug"]["htmlLength"] = structKeyExists(requestData, "html") ? len(requestData.html) : 0>
|
|
<cfset response["debug"]["urlValue"] = structKeyExists(requestData, "url") ? requestData.url : "">
|
|
<cfset pageHtml = "">
|
|
<cfset baseUrl = "">
|
|
<cfset basePath = "">
|
|
<cfset targetUrl = "">
|
|
|
|
<!--- Check if HTML content was provided directly (uploaded file or pasted) --->
|
|
<cfif structKeyExists(requestData, "html") AND len(trim(requestData.html))>
|
|
<cfset pageHtml = trim(requestData.html)>
|
|
<cfset arrayAppend(response.steps, "Using provided HTML content: " & len(pageHtml) & " bytes")>
|
|
<!--- No base URL for local content - images won't be fetched --->
|
|
<cfset baseUrl = "">
|
|
<cfset basePath = "">
|
|
<cfelseif structKeyExists(requestData, "url") AND len(trim(requestData.url))>
|
|
<cfset targetUrl = trim(requestData.url)>
|
|
|
|
<!--- Validate URL format --->
|
|
<cfif NOT reFindNoCase("^https?://", targetUrl)>
|
|
<cfset targetUrl = "https://" & targetUrl>
|
|
</cfif>
|
|
|
|
<!--- ========== GRUBHUB FAST PATH ========== --->
|
|
<cfif reFindNoCase("grubhub\.com/restaurant/", targetUrl)>
|
|
<cfset arrayAppend(response.steps, "Grubhub URL detected - using API")>
|
|
|
|
<!--- Extract restaurant ID from URL (last path segment or ?classicAffiliateId param) --->
|
|
<cfset ghRestaurantId = "">
|
|
<cfset ghIdMatch = reMatchNoCase("/(\d+)(\?|$)", targetUrl)>
|
|
<cfif arrayLen(ghIdMatch)>
|
|
<cfset ghRestaurantId = reReplaceNoCase(ghIdMatch[1], "[^0-9]", "", "all")>
|
|
</cfif>
|
|
<cfif NOT len(ghRestaurantId)>
|
|
<cfthrow message="Could not extract Grubhub restaurant ID from URL">
|
|
</cfif>
|
|
<cfset arrayAppend(response.steps, "Grubhub restaurant ID: " & ghRestaurantId)>
|
|
|
|
<!--- Step 1: Get anonymous access token --->
|
|
<cfhttp url="https://api-gtm.grubhub.com/auth" method="POST" timeout="15" result="ghAuthResult">
|
|
<cfhttpparam type="header" name="Content-Type" value="application/json">
|
|
<cfhttpparam type="body" value='{"brand":"GRUBHUB","client_id":"beta_UmWlpstzQSFmocLy3h1UieYcVST","scope":"anonymous"}'>
|
|
</cfhttp>
|
|
<cfif NOT ghAuthResult.statusCode CONTAINS "200">
|
|
<cfthrow message="Grubhub auth failed: #ghAuthResult.statusCode#">
|
|
</cfif>
|
|
<cfset ghAuth = deserializeJSON(ghAuthResult.fileContent)>
|
|
<cfset ghToken = ghAuth.session_handle.access_token>
|
|
<cfset arrayAppend(response.steps, "Got Grubhub anonymous token")>
|
|
|
|
<!--- Step 2: Fetch restaurant with full menu data including modifiers --->
|
|
<cfhttp url="https://api-gtm.grubhub.com/restaurants/#ghRestaurantId#?hideChoiceCategories=false&version=4&orderType=standard&hideUnavailableMenuItems=false&hideMenuItems=false" method="GET" timeout="30" result="ghMenuResult">
|
|
<cfhttpparam type="header" name="Authorization" value="Bearer #ghToken#">
|
|
</cfhttp>
|
|
<cfif NOT ghMenuResult.statusCode CONTAINS "200">
|
|
<cfthrow message="Grubhub restaurant fetch failed: #ghMenuResult.statusCode#">
|
|
</cfif>
|
|
<cfset ghData = deserializeJSON(ghMenuResult.fileContent)>
|
|
<cfset ghRestaurant = ghData.restaurant>
|
|
<cfset arrayAppend(response.steps, "Fetched Grubhub restaurant data (" & len(ghMenuResult.fileContent) & " bytes)")>
|
|
|
|
<!--- Parse business info --->
|
|
<cfset ghBusiness = structNew()>
|
|
<cfset ghBusiness["name"] = ghRestaurant.name>
|
|
<cfif structKeyExists(ghRestaurant, "address") AND isStruct(ghRestaurant.address)>
|
|
<cfset ghAddr = ghRestaurant.address>
|
|
<cfif structKeyExists(ghAddr, "street_address")><cfset ghBusiness["addressLine1"] = ghAddr.street_address></cfif>
|
|
<cfif structKeyExists(ghAddr, "locality")><cfset ghBusiness["city"] = ghAddr.locality></cfif>
|
|
<cfif structKeyExists(ghAddr, "region")><cfset ghBusiness["state"] = ghAddr.region></cfif>
|
|
<cfif structKeyExists(ghAddr, "zip")><cfset ghBusiness["zip"] = ghAddr.zip></cfif>
|
|
<cfset ghBusiness["address"] = (ghBusiness.addressLine1 ?: "") & ", " & (ghBusiness.city ?: "") & ", " & (ghBusiness.state ?: "") & " " & (ghBusiness.zip ?: "")>
|
|
</cfif>
|
|
<cfif structKeyExists(ghRestaurant, "latitude") AND isNumeric(ghRestaurant.latitude)>
|
|
<cfset ghBusiness["latitude"] = ghRestaurant.latitude>
|
|
</cfif>
|
|
<cfif structKeyExists(ghRestaurant, "longitude") AND isNumeric(ghRestaurant.longitude)>
|
|
<cfset ghBusiness["longitude"] = ghRestaurant.longitude>
|
|
</cfif>
|
|
|
|
<!--- Phone --->
|
|
<cfif structKeyExists(ghRestaurant, "phone_number") AND len(ghRestaurant.phone_number)>
|
|
<cfset ghBusiness["phone"] = reReplace(ghRestaurant.phone_number, "[^0-9]", "", "all")>
|
|
</cfif>
|
|
|
|
<!--- Description --->
|
|
<cfif structKeyExists(ghRestaurant, "description") AND len(trim(ghRestaurant.description))>
|
|
<cfset ghBusiness["description"] = trim(ghRestaurant.description)>
|
|
</cfif>
|
|
|
|
<!--- Hours from restaurant_availability or restaurant --->
|
|
<cfset ghHoursParts = []>
|
|
<cfset ghDayOrder = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]>
|
|
<cfset ghDayAbbrev = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]>
|
|
<cfif structKeyExists(ghRestaurant, "restaurant_managed_hours_list_v2") AND isArray(ghRestaurant.restaurant_managed_hours_list_v2)>
|
|
<cfloop array="#ghRestaurant.restaurant_managed_hours_list_v2#" index="ghDayHours">
|
|
<cfif structKeyExists(ghDayHours, "day") AND structKeyExists(ghDayHours, "start_time") AND structKeyExists(ghDayHours, "end_time")>
|
|
<cfset ghDayIdx = arrayFind(ghDayOrder, ghDayHours.day)>
|
|
<cfif ghDayIdx GT 0>
|
|
<!--- Convert HH:mm:ss to 12h format --->
|
|
<cfset ghOpenH = val(listFirst(ghDayHours.start_time, ":"))>
|
|
<cfset ghOpenM = val(listGetAt(ghDayHours.start_time, 2, ":"))>
|
|
<cfset ghCloseH = val(listFirst(ghDayHours.end_time, ":"))>
|
|
<cfset ghCloseM = val(listGetAt(ghDayHours.end_time, 2, ":"))>
|
|
<cfset ghOpenAmPm = ghOpenH GTE 12 ? "pm" : "am">
|
|
<cfset ghCloseAmPm = ghCloseH GTE 12 ? "pm" : "am">
|
|
<cfif ghOpenH GT 12><cfset ghOpenH = ghOpenH - 12></cfif>
|
|
<cfif ghOpenH EQ 0><cfset ghOpenH = 12></cfif>
|
|
<cfif ghCloseH GT 12><cfset ghCloseH = ghCloseH - 12></cfif>
|
|
<cfif ghCloseH EQ 0><cfset ghCloseH = 12></cfif>
|
|
<cfset ghOpenStr = ghOpenH & (ghOpenM GT 0 ? ":" & numberFormat(ghOpenM, "00") : "") & ghOpenAmPm>
|
|
<cfset ghCloseStr = ghCloseH & (ghCloseM GT 0 ? ":" & numberFormat(ghCloseM, "00") : "") & ghCloseAmPm>
|
|
<cfset arrayAppend(ghHoursParts, ghDayAbbrev[ghDayIdx] & " " & ghOpenStr & "-" & ghCloseStr)>
|
|
</cfif>
|
|
</cfif>
|
|
</cfloop>
|
|
</cfif>
|
|
<cfif arrayLen(ghHoursParts) GT 0>
|
|
<cfset ghBusiness["hours"] = arrayToList(ghHoursParts, ", ")>
|
|
</cfif>
|
|
|
|
<!--- Tax rate from availability --->
|
|
<cfif structKeyExists(ghData, "restaurant_availability") AND structKeyExists(ghData.restaurant_availability, "sales_tax")>
|
|
<cfset ghBusiness["taxRate"] = ghData.restaurant_availability.sales_tax>
|
|
</cfif>
|
|
|
|
<!--- Parse categories and items --->
|
|
<cfset ghCategories = []>
|
|
<cfset ghItems = []>
|
|
<cfset ghItemId = 1>
|
|
<cfset ghModifierGroups = structNew()><!--- dedup by name --->
|
|
<cfset ghImageMappings = []>
|
|
|
|
<cfif structKeyExists(ghRestaurant, "menu_category_list") AND isArray(ghRestaurant.menu_category_list)>
|
|
<cfloop array="#ghRestaurant.menu_category_list#" index="ghCat">
|
|
<cfset ghCatName = structKeyExists(ghCat, "name") ? trim(ghCat.name) : "Menu">
|
|
<cfset ghCatItemCount = 0>
|
|
|
|
<cfif structKeyExists(ghCat, "menu_item_list") AND isArray(ghCat.menu_item_list)>
|
|
<cfloop array="#ghCat.menu_item_list#" index="ghItem">
|
|
<cfset ghItemName = structKeyExists(ghItem, "name") ? trim(ghItem.name) : "">
|
|
<cfif NOT len(ghItemName)><cfcontinue></cfif>
|
|
|
|
<!--- Price in cents -> dollars --->
|
|
<cfset ghPrice = 0>
|
|
<cfif structKeyExists(ghItem, "price") AND isStruct(ghItem.price) AND structKeyExists(ghItem.price, "amount")>
|
|
<cfset ghPrice = val(ghItem.price.amount) / 100>
|
|
</cfif>
|
|
|
|
<!--- Description --->
|
|
<cfset ghDesc = structKeyExists(ghItem, "description") ? trim(ghItem.description) : "">
|
|
|
|
<!--- Image URL --->
|
|
<cfset ghImageUrl = "">
|
|
<cfif structKeyExists(ghItem, "media_image") AND isStruct(ghItem.media_image)>
|
|
<cfset ghImg = ghItem.media_image>
|
|
<cfif structKeyExists(ghImg, "base_url") AND structKeyExists(ghImg, "public_id") AND structKeyExists(ghImg, "format")>
|
|
<cfset ghImageUrl = ghImg.base_url & "w_400,h_400,c_fill/" & ghImg.public_id & "." & ghImg.format>
|
|
</cfif>
|
|
</cfif>
|
|
|
|
<!--- Modifiers from choice_category_list --->
|
|
<cfset ghItemModifiers = []>
|
|
<cfif structKeyExists(ghItem, "choice_category_list") AND isArray(ghItem.choice_category_list)>
|
|
<cfloop array="#ghItem.choice_category_list#" index="ghChoiceCat">
|
|
<cfset ghModName = structKeyExists(ghChoiceCat, "name") ? trim(ghChoiceCat.name) : "">
|
|
<cfif NOT len(ghModName)><cfcontinue></cfif>
|
|
<cfset arrayAppend(ghItemModifiers, ghModName)>
|
|
|
|
<!--- Build modifier template if not seen --->
|
|
<cfif NOT structKeyExists(ghModifierGroups, ghModName)>
|
|
<cfset ghModOptions = []>
|
|
<cfif structKeyExists(ghChoiceCat, "choice_option_list") AND isArray(ghChoiceCat.choice_option_list)>
|
|
<cfloop array="#ghChoiceCat.choice_option_list#" index="ghOpt">
|
|
<cfset ghOptName = structKeyExists(ghOpt, "description") ? trim(ghOpt.description) : "">
|
|
<cfset ghOptPrice = 0>
|
|
<cfif structKeyExists(ghOpt, "price") AND isStruct(ghOpt.price) AND structKeyExists(ghOpt.price, "amount")>
|
|
<cfset ghOptPrice = val(ghOpt.price.amount) / 100>
|
|
</cfif>
|
|
<cfif len(ghOptName)>
|
|
<cfset arrayAppend(ghModOptions, { "name": ghOptName, "price": ghOptPrice })>
|
|
</cfif>
|
|
</cfloop>
|
|
</cfif>
|
|
<cfset ghMinSel = structKeyExists(ghChoiceCat, "min_choice_options") ? val(ghChoiceCat.min_choice_options) : 0>
|
|
<cfset ghMaxSel = structKeyExists(ghChoiceCat, "max_choice_options") ? val(ghChoiceCat.max_choice_options) : 0>
|
|
<cfset ghModifierGroups[ghModName] = {
|
|
"name": ghModName,
|
|
"required": ghMinSel GT 0,
|
|
"minSelections": ghMinSel,
|
|
"maxSelections": ghMaxSel,
|
|
"options": ghModOptions
|
|
}>
|
|
</cfif>
|
|
</cfloop>
|
|
</cfif>
|
|
|
|
<cfset ghItemObj = {
|
|
"id": "item_" & ghItemId,
|
|
"name": ghItemName,
|
|
"price": ghPrice,
|
|
"description": ghDesc,
|
|
"category": ghCatName,
|
|
"imageUrl": ghImageUrl,
|
|
"hasModifiers": arrayLen(ghItemModifiers) GT 0,
|
|
"modifiers": ghItemModifiers
|
|
}>
|
|
<cfset arrayAppend(ghItems, ghItemObj)>
|
|
<cfset ghCatItemCount++>
|
|
|
|
<!--- Track image mapping --->
|
|
<cfif len(ghImageUrl)>
|
|
<cfset arrayAppend(ghImageMappings, { "itemId": "item_" & ghItemId, "url": ghImageUrl })>
|
|
</cfif>
|
|
|
|
<cfset ghItemId++>
|
|
</cfloop>
|
|
</cfif>
|
|
|
|
<cfset arrayAppend(ghCategories, { "name": ghCatName, "itemCount": ghCatItemCount })>
|
|
</cfloop>
|
|
</cfif>
|
|
|
|
<!--- Build modifier templates array --->
|
|
<cfset ghModifiers = []>
|
|
<cfloop collection="#ghModifierGroups#" item="ghModKey">
|
|
<cfset arrayAppend(ghModifiers, ghModifierGroups[ghModKey])>
|
|
</cfloop>
|
|
|
|
<cfset arrayAppend(response.steps, "Parsed " & arrayLen(ghItems) & " items in " & arrayLen(ghCategories) & " categories with " & arrayLen(ghModifiers) & " modifier groups")>
|
|
|
|
<!--- Build and return response --->
|
|
<cfset menuData = structNew()>
|
|
<cfset menuData["business"] = ghBusiness>
|
|
<cfset menuData["categories"] = ghCategories>
|
|
<cfset menuData["items"] = ghItems>
|
|
<cfset menuData["modifiers"] = ghModifiers>
|
|
<cfset menuData["imageUrls"] = arrayNew(1)>
|
|
<cfset menuData["imageMappings"] = ghImageMappings>
|
|
<cfset menuData["headerCandidateIndices"] = arrayNew(1)>
|
|
|
|
<cfset response["OK"] = true>
|
|
<cfset response["DATA"] = menuData>
|
|
<cfset response["sourceUrl"] = targetUrl>
|
|
<cfset response["pagesProcessed"] = 1>
|
|
<cfset response["imagesFound"] = arrayLen(ghImageMappings)>
|
|
<cfset response["parsedVia"] = "grubhub_api">
|
|
<cfcontent type="application/json" reset="true">
|
|
<cfoutput>#serializeJSON(response)#</cfoutput>
|
|
<cfabort>
|
|
</cfif>
|
|
<!--- ========== END GRUBHUB FAST PATH ========== --->
|
|
|
|
<!--- Check if this is a local temp file (ZIP upload) - read directly, skip Playwright --->
|
|
<cfif findNoCase("/temp/menu-import/", targetUrl)>
|
|
<cfset localFilePath = expandPath(reReplaceNoCase(targetUrl, "https?://[^/]+(/temp/menu-import/.*)", "\1"))>
|
|
<cfset arrayAppend(response.steps, "Local temp file detected: " & localFilePath)>
|
|
|
|
<cfif NOT fileExists(localFilePath)>
|
|
<cfthrow message="Local file not found: #localFilePath#">
|
|
</cfif>
|
|
|
|
<cfset pageHtml = fileRead(localFilePath, "utf-8")>
|
|
<cfset playwrightImages = arrayNew(1)>
|
|
<cfset arrayAppend(response.steps, "Read " & len(pageHtml) & " bytes from local file")>
|
|
|
|
<!--- Extract base path for local files (relative to the HTML file) --->
|
|
<cfset localDir = getDirectoryFromPath(localFilePath)>
|
|
<cfset basePath = reReplaceNoCase(targetUrl, "/[^/]*$", "/")>
|
|
<cfif NOT reFindNoCase("/$", basePath)>
|
|
<cfset basePath = basePath & "/">
|
|
</cfif>
|
|
|
|
<!--- Check for Toast menu page - extract from visible HTML for most complete data --->
|
|
<cfif findNoCase("class=""headerText""", pageHtml) AND findNoCase("toasttab", pageHtml)>
|
|
<cfset arrayAppend(response.steps, "Toast menu detected - parsing visible HTML items")>
|
|
|
|
<cftry>
|
|
<!--- Extract visible items from rendered HTML (most complete) --->
|
|
<cfset toastBusiness = structNew()>
|
|
<cfset toastCategories = arrayNew(1)>
|
|
<cfset toastItems = arrayNew(1)>
|
|
<cfset categorySet = structNew()>
|
|
<cfset itemNameSet = structNew()>
|
|
<cfset itemId = 1>
|
|
<cfset currentCategory = "Menu">
|
|
|
|
<!--- Find category headers (h2 with specific Toast patterns) --->
|
|
<cfset categoryMatches = reMatchNoCase('<h2[^>]*class="[^"]*groupHeader[^"]*"[^>]*>([^<]+)</h2>', pageHtml)>
|
|
<cfloop array="#categoryMatches#" index="catMatch">
|
|
<cfset catName = reReplaceNoCase(catMatch, '.*>([^<]+)</h2>.*', '\1')>
|
|
<cfset catName = trim(catName)>
|
|
<cfif len(catName) AND NOT structKeyExists(categorySet, catName)>
|
|
<cfset categorySet[catName] = true>
|
|
<cfset arrayAppend(toastCategories, { "name": catName, "itemCount": 0 })>
|
|
</cfif>
|
|
</cfloop>
|
|
|
|
<!--- Extract item blocks with name, price, description, image --->
|
|
<!--- Toast pattern: li.item containing headerText for name, price span, itemImage img --->
|
|
<cfset itemBlocks = reMatchNoCase('<li[^>]*class="[^"]*item[^"]*"[^>]*>.*?</li>', pageHtml)>
|
|
<cfset arrayAppend(response.steps, "Found " & arrayLen(itemBlocks) & " item blocks in HTML")>
|
|
|
|
<cfloop array="#itemBlocks#" index="block">
|
|
<!--- Extract item name --->
|
|
<cfset nameMatch = reMatchNoCase('<span class="headerText">([^<]+)</span>', block)>
|
|
<cfif arrayLen(nameMatch)>
|
|
<cfset itemName = reReplaceNoCase(nameMatch[1], '.*>([^<]+)</span>.*', '\1')>
|
|
<cfset itemName = trim(itemName)>
|
|
|
|
<!--- Skip duplicates --->
|
|
<cfif len(itemName) AND NOT structKeyExists(itemNameSet, itemName)>
|
|
<cfset itemNameSet[itemName] = true>
|
|
|
|
<cfset itemStruct = structNew()>
|
|
<cfset itemStruct["id"] = "item_" & itemId>
|
|
<cfset itemStruct["name"] = itemName>
|
|
<cfset itemStruct["modifiers"] = arrayNew(1)>
|
|
|
|
<!--- Extract price - look for any dollar amount in the block --->
|
|
<cfset itemStruct["price"] = 0>
|
|
<cfset priceMatch = reMatchNoCase('\$([0-9]+\.?[0-9]*)', block)>
|
|
<cfif arrayLen(priceMatch)>
|
|
<!--- priceMatch[1] is like "$12.99", strip the $ --->
|
|
<cfset priceStr = replace(priceMatch[1], "$", "")>
|
|
<cfif isNumeric(priceStr) AND val(priceStr) GT 0>
|
|
<cfset itemStruct["price"] = val(priceStr)>
|
|
</cfif>
|
|
</cfif>
|
|
|
|
<!--- Extract description --->
|
|
<cfset descMatch = reMatchNoCase('<div[^>]*class="[^"]*description[^"]*"[^>]*>([^<]+)</div>', block)>
|
|
<cfif arrayLen(descMatch)>
|
|
<cfset itemStruct["description"] = trim(reReplaceNoCase(descMatch[1], '.*>([^<]+)</div>.*', '\1'))>
|
|
<cfelse>
|
|
<cfset itemStruct["description"] = "">
|
|
</cfif>
|
|
|
|
<!--- Extract image URL from srcset or src --->
|
|
<cfset imgMatch = reMatchNoCase('src="(Menu_files/[^"]+)"', block)>
|
|
<cfif arrayLen(imgMatch)>
|
|
<cfset imgSrc = reReplaceNoCase(imgMatch[1], '.*src="([^"]+)".*', '\1')>
|
|
<!--- Convert to full URL --->
|
|
<cfset itemStruct["imageUrl"] = basePath & imgSrc>
|
|
<cfset itemStruct["imageSrc"] = basePath & imgSrc>
|
|
<cfset itemStruct["imageFilename"] = listLast(imgSrc, "/")>
|
|
</cfif>
|
|
|
|
<!--- Try to determine category from nearby h2 or default --->
|
|
<cfset itemStruct["category"] = arrayLen(toastCategories) ? toastCategories[1].name : "Menu">
|
|
|
|
<cfset arrayAppend(toastItems, itemStruct)>
|
|
<cfset itemId++>
|
|
</cfif>
|
|
</cfif>
|
|
</cfloop>
|
|
|
|
<!--- If no items found from blocks, try simpler headerText extraction --->
|
|
<cfif arrayLen(toastItems) EQ 0>
|
|
<cfset nameMatches = reMatchNoCase('<span class="headerText">([^<]+)</span>', pageHtml)>
|
|
<cfloop array="#nameMatches#" index="nameMatch">
|
|
<cfset itemName = reReplaceNoCase(nameMatch, '.*>([^<]+)</span>.*', '\1')>
|
|
<cfset itemName = trim(itemName)>
|
|
<cfif len(itemName) AND NOT structKeyExists(itemNameSet, itemName)>
|
|
<cfset itemNameSet[itemName] = true>
|
|
<cfset itemStruct = { "id": "item_" & itemId, "name": itemName, "price": 0, "description": "", "category": "Menu", "modifiers": [] }>
|
|
<cfset arrayAppend(toastItems, itemStruct)>
|
|
<cfset itemId++>
|
|
</cfif>
|
|
</cfloop>
|
|
</cfif>
|
|
|
|
<!--- Try multiple sources for business name --->
|
|
|
|
<!--- 1. Try title tag first --->
|
|
<cfset titleMatch = reMatchNoCase('<title[^>]*>([^<]+)</title>', pageHtml)>
|
|
<cfset arrayAppend(response.steps, "Title tag matches: " & arrayLen(titleMatch))>
|
|
<cfif arrayLen(titleMatch)>
|
|
<cfset titleText = reReplaceNoCase(titleMatch[1], '.*<title[^>]*>([^<]+)</title>.*', '\1')>
|
|
<cfset titleText = trim(titleText)>
|
|
<cfset arrayAppend(response.steps, "Raw title: " & left(titleText, 100))>
|
|
<!--- Toast titles: "Restaurant Name | Online Ordering" --->
|
|
<cfif findNoCase("|", titleText)>
|
|
<cfset titleText = trim(listFirst(titleText, "|"))>
|
|
</cfif>
|
|
<!--- Remove common suffixes --->
|
|
<cfset titleText = reReplaceNoCase(titleText, "\s*-\s*(Menu|Order|Online).*$", "")>
|
|
<cfif len(titleText) AND NOT structKeyExists(toastBusiness, "name")>
|
|
<cfset toastBusiness["name"] = titleText>
|
|
<cfset arrayAppend(response.steps, "Business name from title: " & titleText)>
|
|
</cfif>
|
|
</cfif>
|
|
|
|
<!--- 2. Try og:title or og:site_name meta tags --->
|
|
<cfif NOT structKeyExists(toastBusiness, "name") OR NOT len(toastBusiness.name)>
|
|
<cfset ogMatch = reMatchNoCase('<meta[^>]*property=["'']og:(site_name|title)["''][^>]*content=["'']([^"'']+)["'']', pageHtml)>
|
|
<cfif NOT arrayLen(ogMatch)>
|
|
<!--- Try alternate attribute order --->
|
|
<cfset ogMatch = reMatchNoCase('<meta[^>]*content=["'']([^"'']+)["''][^>]*property=["'']og:(site_name|title)["'']', pageHtml)>
|
|
</cfif>
|
|
<cfif arrayLen(ogMatch)>
|
|
<cfset ogText = reReplaceNoCase(ogMatch[1], '.*content=["'']([^"'']+)["''].*', '\1')>
|
|
<cfif NOT len(ogText)>
|
|
<cfset ogText = reReplaceNoCase(ogMatch[1], '.*<meta[^>]*>.*', '')>
|
|
</cfif>
|
|
<cfset ogText = trim(ogText)>
|
|
<cfif findNoCase("|", ogText)>
|
|
<cfset ogText = trim(listFirst(ogText, "|"))>
|
|
</cfif>
|
|
<cfif len(ogText)>
|
|
<cfset toastBusiness["name"] = ogText>
|
|
<cfset arrayAppend(response.steps, "Business name from og:meta: " & ogText)>
|
|
</cfif>
|
|
</cfif>
|
|
</cfif>
|
|
|
|
<!--- 3. Try looking for restaurant name in header/nav area (Toast-specific) --->
|
|
<cfif NOT structKeyExists(toastBusiness, "name") OR NOT len(toastBusiness.name)>
|
|
<!--- Toast often has restaurant name in a div with specific classes --->
|
|
<cfset headerMatch = reMatchNoCase('<(?:h1|div)[^>]*class="[^"]*(?:restaurant|location|brand)[^"]*"[^>]*>([^<]+)<', pageHtml)>
|
|
<cfif arrayLen(headerMatch)>
|
|
<cfset headerText = reReplaceNoCase(headerMatch[1], '.*>([^<]+)<.*', '\1')>
|
|
<cfset headerText = trim(headerText)>
|
|
<cfif len(headerText) AND len(headerText) LT 100>
|
|
<cfset toastBusiness["name"] = headerText>
|
|
<cfset arrayAppend(response.steps, "Business name from header: " & headerText)>
|
|
</cfif>
|
|
</cfif>
|
|
</cfif>
|
|
|
|
<!--- 4. Try first h1 tag as last resort --->
|
|
<cfif NOT structKeyExists(toastBusiness, "name") OR NOT len(toastBusiness.name)>
|
|
<cfset h1Match = reMatchNoCase('<h1[^>]*>([^<]+)</h1>', pageHtml)>
|
|
<cfif arrayLen(h1Match)>
|
|
<cfset h1Text = reReplaceNoCase(h1Match[1], '.*<h1[^>]*>([^<]+)</h1>.*', '\1')>
|
|
<cfset h1Text = trim(h1Text)>
|
|
<cfif len(h1Text) AND len(h1Text) LT 100>
|
|
<cfset toastBusiness["name"] = h1Text>
|
|
<cfset arrayAppend(response.steps, "Business name from h1: " & h1Text)>
|
|
</cfif>
|
|
</cfif>
|
|
</cfif>
|
|
|
|
<!--- Try to extract address from visible HTML --->
|
|
<cfif NOT structKeyExists(toastBusiness, "addressLine1")>
|
|
<!--- Look for address patterns in the HTML --->
|
|
<cfset addrMatch = reMatchNoCase('<[^>]*class="[^"]*address[^"]*"[^>]*>([^<]+)</[^>]+>', pageHtml)>
|
|
<cfif arrayLen(addrMatch)>
|
|
<cfset addrText = reReplaceNoCase(addrMatch[1], '.*>([^<]+)</.*', '\1')>
|
|
<cfset addrText = trim(addrText)>
|
|
<cfif len(addrText) AND len(addrText) LT 200>
|
|
<cfset toastBusiness["addressLine1"] = addrText>
|
|
<cfset arrayAppend(response.steps, "Address from HTML: " & left(addrText, 50))>
|
|
</cfif>
|
|
</cfif>
|
|
</cfif>
|
|
|
|
<!--- Try to extract phone from visible HTML --->
|
|
<cfif NOT structKeyExists(toastBusiness, "phone")>
|
|
<!--- Look for phone number patterns --->
|
|
<cfset phoneMatch = reMatchNoCase('(?:tel:|phone[^"]*">)\s*\(?(\d{3})\)?[-.\s]?(\d{3})[-.\s]?(\d{4})', pageHtml)>
|
|
<cfif arrayLen(phoneMatch)>
|
|
<cfset phoneText = reReplaceNoCase(phoneMatch[1], '.*(\d{3}).*(\d{3}).*(\d{4}).*', '\1-\2-\3')>
|
|
<cfif len(phoneText) GTE 10>
|
|
<cfset toastBusiness["phone"] = phoneText>
|
|
<cfset arrayAppend(response.steps, "Phone from HTML: " & phoneText)>
|
|
</cfif>
|
|
</cfif>
|
|
</cfif>
|
|
|
|
<!--- Check if __OO_STATE__ exists in the saved HTML --->
|
|
<cfset hasOoState = findNoCase("window.__OO_STATE__", pageHtml) GT 0>
|
|
<cfset arrayAppend(response.steps, "Has __OO_STATE__: " & hasOoState)>
|
|
|
|
<!--- Also try to extract from __OO_STATE__ for images and business info --->
|
|
<cfif hasOoState>
|
|
<cfscript>
|
|
ooStateJson2 = "";
|
|
ooStart2 = findNoCase("window.__OO_STATE__", pageHtml);
|
|
if (ooStart2 > 0) {
|
|
bs2 = find("{", pageHtml, ooStart2);
|
|
if (bs2 > 0) {
|
|
d2 = 0; inS2 = false; esc2 = false; be2 = 0;
|
|
totalLen2 = len(pageHtml);
|
|
for (i2 = bs2; i2 <= totalLen2; i2++) {
|
|
c2 = mid(pageHtml, i2, 1);
|
|
if (esc2) { esc2 = false; continue; }
|
|
if (c2 == chr(92) && inS2) { esc2 = true; continue; }
|
|
if (c2 == '"') { inS2 = !inS2; continue; }
|
|
if (!inS2) {
|
|
if (c2 == "{") d2 = d2 + 1;
|
|
else if (c2 == "}") { d2 = d2 - 1; if (d2 == 0) { be2 = i2; break; } }
|
|
}
|
|
}
|
|
if (be2 > 0) ooStateJson2 = mid(pageHtml, bs2, be2 - bs2 + 1);
|
|
}
|
|
}
|
|
if (len(ooStateJson2)) {
|
|
ooStateJson2 = replace(ooStateJson2, "&", "&", "all");
|
|
ooStateJson2 = replace(ooStateJson2, "<", "<", "all");
|
|
ooStateJson2 = replace(ooStateJson2, ">", ">", "all");
|
|
ooStateJson2 = replace(ooStateJson2, """, '"', "all");
|
|
}
|
|
</cfscript>
|
|
<cfif len(ooStateJson2)>
|
|
<cftry>
|
|
<cfset ooState = deserializeJSON(ooStateJson2)>
|
|
<!--- Debug: log all top-level keys in OO_STATE --->
|
|
<cfset ooStateKeys = structKeyList(ooState)>
|
|
<cfset arrayAppend(response.steps, "OO_STATE keys: " & left(ooStateKeys, 500))>
|
|
<!--- Build name -> image URL map, name -> category map, and name -> price map from OO_STATE --->
|
|
<cfset imageMap = structNew()>
|
|
<cfset itemCategoryMap = structNew()>
|
|
<cfset itemPriceMap = structNew()>
|
|
<cfloop collection="#ooState#" item="key">
|
|
<!--- Extract restaurant/business info --->
|
|
<cfif left(key, 11) EQ "Restaurant:">
|
|
<cfset restaurant = ooState[key]>
|
|
<cfif structKeyExists(restaurant, "name")>
|
|
<cfset toastBusiness["name"] = restaurant.name>
|
|
</cfif>
|
|
<cfif structKeyExists(restaurant, "location")>
|
|
<cfset loc = restaurant.location>
|
|
<cfif structKeyExists(loc, "address1")>
|
|
<cfset toastBusiness["addressLine1"] = loc.address1>
|
|
</cfif>
|
|
<cfif structKeyExists(loc, "city")>
|
|
<cfset toastBusiness["city"] = loc.city>
|
|
</cfif>
|
|
<cfif structKeyExists(loc, "state")>
|
|
<cfset toastBusiness["state"] = loc.state>
|
|
</cfif>
|
|
<cfif structKeyExists(loc, "zipCode")>
|
|
<cfset toastBusiness["zip"] = loc.zipCode>
|
|
</cfif>
|
|
<cfif structKeyExists(loc, "phone")>
|
|
<cfset toastBusiness["phone"] = loc.phone>
|
|
</cfif>
|
|
</cfif>
|
|
<cfif structKeyExists(restaurant, "brandColor")>
|
|
<cfset toastBusiness["brandColor"] = replace(restaurant.brandColor, "##", "")>
|
|
</cfif>
|
|
</cfif>
|
|
<!--- Extract menu items, images, and CATEGORIES --->
|
|
<cfif left(key, 5) EQ "Menu:">
|
|
<cfset menu = ooState[key]>
|
|
<cfif structKeyExists(menu, "groups") AND isArray(menu.groups)>
|
|
<cfloop array="#menu.groups#" index="group">
|
|
<!--- Extract category name from group --->
|
|
<cfset groupName = "">
|
|
<cfif structKeyExists(group, "name") AND len(trim(group.name))>
|
|
<cfset groupName = trim(group.name)>
|
|
<!--- Add to categories if not already there --->
|
|
<cfif NOT structKeyExists(categorySet, groupName)>
|
|
<cfset categorySet[groupName] = true>
|
|
<cfset arrayAppend(toastCategories, { "name": groupName, "itemCount": 0 })>
|
|
</cfif>
|
|
</cfif>
|
|
|
|
<!--- Debug: log group keys to help identify subgroup field names --->
|
|
<cfif isStruct(group) AND NOT structKeyExists(variables, "loggedGroupKeys")>
|
|
<cfset variables.loggedGroupKeys = true>
|
|
<cfset arrayAppend(response.steps, "Group keys: " & structKeyList(group))>
|
|
</cfif>
|
|
|
|
<!--- Check for subgroups (nested categories within this group) --->
|
|
<!--- Try multiple field names: subgroups, children, childGroups --->
|
|
<cfset subgroupsArr = arrayNew(1)>
|
|
<cfif structKeyExists(group, "subgroups") AND isArray(group.subgroups)>
|
|
<cfset subgroupsArr = group.subgroups>
|
|
<cfelseif structKeyExists(group, "children") AND isArray(group.children)>
|
|
<cfset subgroupsArr = group.children>
|
|
<cfelseif structKeyExists(group, "childGroups") AND isArray(group.childGroups)>
|
|
<cfset subgroupsArr = group.childGroups>
|
|
</cfif>
|
|
<cfset hasSubgroups = false>
|
|
<cfif arrayLen(subgroupsArr) GT 0>
|
|
<cfset hasSubgroups = true>
|
|
<cfset arrayAppend(response.steps, "Group '" & groupName & "' has " & arrayLen(subgroupsArr) & " subgroups")>
|
|
<cfloop array="#subgroupsArr#" index="subgroup">
|
|
<cfset subgroupName = "">
|
|
<cfif structKeyExists(subgroup, "name") AND len(trim(subgroup.name))>
|
|
<cfset subgroupName = trim(subgroup.name)>
|
|
<cfif NOT structKeyExists(categorySet, subgroupName)>
|
|
<cfset categorySet[subgroupName] = true>
|
|
<cfset arrayAppend(toastCategories, { "name": subgroupName, "parentCategoryName": groupName, "itemCount": 0 })>
|
|
</cfif>
|
|
</cfif>
|
|
<!--- Extract items from subgroup --->
|
|
<cfif structKeyExists(subgroup, "items") AND isArray(subgroup.items)>
|
|
<cfset effectiveName = len(subgroupName) ? subgroupName : groupName>
|
|
<cfloop array="#subgroup.items#" index="item">
|
|
<cfif structKeyExists(item, "name")>
|
|
<cfset itemCategoryMap[item.name] = effectiveName>
|
|
<!--- Extract price --->
|
|
<cfif structKeyExists(item, "prices") AND isArray(item.prices) AND arrayLen(item.prices) GT 0 AND isNumeric(item.prices[1])>
|
|
<cfset itemPriceMap[item.name] = val(item.prices[1])>
|
|
<cfelseif structKeyExists(item, "price") AND isNumeric(item.price)>
|
|
<cfset itemPriceMap[item.name] = val(item.price)>
|
|
<cfelseif structKeyExists(item, "unitPrice") AND isNumeric(item.unitPrice)>
|
|
<cfset itemPriceMap[item.name] = val(item.unitPrice)>
|
|
<cfelseif structKeyExists(item, "basePrice") AND isNumeric(item.basePrice)>
|
|
<cfset itemPriceMap[item.name] = val(item.basePrice)>
|
|
<cfelseif structKeyExists(item, "displayPrice")>
|
|
<cfset priceStr = reReplace(item.displayPrice, "[^0-9.]", "", "all")>
|
|
<cfif len(priceStr) AND isNumeric(priceStr)>
|
|
<cfset itemPriceMap[item.name] = val(priceStr)>
|
|
</cfif>
|
|
</cfif>
|
|
<!--- Extract image URLs --->
|
|
<cfif structKeyExists(item, "imageUrls")>
|
|
<cfset imgUrls = item.imageUrls>
|
|
<cfif structKeyExists(imgUrls, "medium")>
|
|
<cfset imageMap[item.name] = imgUrls.medium>
|
|
<cfelseif structKeyExists(imgUrls, "large")>
|
|
<cfset imageMap[item.name] = imgUrls.large>
|
|
</cfif>
|
|
</cfif>
|
|
</cfif>
|
|
</cfloop>
|
|
</cfif>
|
|
</cfloop>
|
|
</cfif>
|
|
|
|
<!--- Extract direct items from group (not in subgroups) --->
|
|
<cfif structKeyExists(group, "items") AND isArray(group.items)>
|
|
<!--- Debug: log first item's structure --->
|
|
<cfif arrayLen(group.items) GT 0 AND NOT structKeyExists(variables, "loggedItemKeys")>
|
|
<cfset variables.loggedItemKeys = true>
|
|
<cfset firstItem = group.items[1]>
|
|
<cfif isStruct(firstItem)>
|
|
<cfset arrayAppend(response.steps, "First item keys: " & structKeyList(firstItem))>
|
|
<cfif structKeyExists(firstItem, "price")>
|
|
<cfset arrayAppend(response.steps, "item.price = " & firstItem.price)>
|
|
</cfif>
|
|
<cfif structKeyExists(firstItem, "basePrice")>
|
|
<cfset arrayAppend(response.steps, "item.basePrice = " & firstItem.basePrice)>
|
|
</cfif>
|
|
<cfif structKeyExists(firstItem, "displayPrice")>
|
|
<cfset arrayAppend(response.steps, "item.displayPrice = " & firstItem.displayPrice)>
|
|
</cfif>
|
|
</cfif>
|
|
</cfif>
|
|
<cfloop array="#group.items#" index="item">
|
|
<cfif structKeyExists(item, "name")>
|
|
<!--- Map item name to category --->
|
|
<cfif len(groupName)>
|
|
<cfset itemCategoryMap[item.name] = groupName>
|
|
</cfif>
|
|
<!--- Extract price - try multiple field names --->
|
|
<cfif structKeyExists(item, "prices") AND isArray(item.prices) AND arrayLen(item.prices) GT 0 AND isNumeric(item.prices[1])>
|
|
<cfset itemPriceMap[item.name] = val(item.prices[1])>
|
|
<cfelseif structKeyExists(item, "price") AND isNumeric(item.price)>
|
|
<cfset itemPriceMap[item.name] = val(item.price)>
|
|
<cfelseif structKeyExists(item, "unitPrice") AND isNumeric(item.unitPrice)>
|
|
<cfset itemPriceMap[item.name] = val(item.unitPrice)>
|
|
<cfelseif structKeyExists(item, "basePrice") AND isNumeric(item.basePrice)>
|
|
<cfset itemPriceMap[item.name] = val(item.basePrice)>
|
|
<cfelseif structKeyExists(item, "displayPrice")>
|
|
<!--- displayPrice might be a string like "$12.99" --->
|
|
<cfset priceStr = reReplace(item.displayPrice, "[^0-9.]", "", "all")>
|
|
<cfif len(priceStr) AND isNumeric(priceStr)>
|
|
<cfset itemPriceMap[item.name] = val(priceStr)>
|
|
</cfif>
|
|
</cfif>
|
|
<!--- Extract image URLs --->
|
|
<cfif structKeyExists(item, "imageUrls")>
|
|
<cfset imgUrls = item.imageUrls>
|
|
<cfif structKeyExists(imgUrls, "medium")>
|
|
<cfset imageMap[item.name] = imgUrls.medium>
|
|
<cfelseif structKeyExists(imgUrls, "large")>
|
|
<cfset imageMap[item.name] = imgUrls.large>
|
|
</cfif>
|
|
</cfif>
|
|
</cfif>
|
|
</cfloop>
|
|
</cfif>
|
|
</cfloop>
|
|
</cfif>
|
|
</cfif>
|
|
</cfloop>
|
|
<!--- Apply images, categories, and prices to items --->
|
|
<cfset imagesMatched = 0>
|
|
<cfset categoriesMatched = 0>
|
|
<cfset pricesMatched = 0>
|
|
<cfloop from="1" to="#arrayLen(toastItems)#" index="i">
|
|
<cfif structKeyExists(imageMap, toastItems[i].name)>
|
|
<cfset toastItems[i]["imageUrl"] = imageMap[toastItems[i].name]>
|
|
<cfset toastItems[i]["imageSrc"] = imageMap[toastItems[i].name]>
|
|
<cfset toastItems[i]["imageFilename"] = listLast(imageMap[toastItems[i].name], "/")>
|
|
<cfset imagesMatched++>
|
|
</cfif>
|
|
<!--- Apply category from __OO_STATE__ --->
|
|
<cfif structKeyExists(itemCategoryMap, toastItems[i].name)>
|
|
<cfset toastItems[i]["category"] = itemCategoryMap[toastItems[i].name]>
|
|
<cfset categoriesMatched++>
|
|
</cfif>
|
|
<!--- Apply price from __OO_STATE__ if not already set or is 0 --->
|
|
<cfif structKeyExists(itemPriceMap, toastItems[i].name) AND (NOT structKeyExists(toastItems[i], "price") OR toastItems[i].price EQ 0)>
|
|
<cfset toastItems[i]["price"] = itemPriceMap[toastItems[i].name]>
|
|
<cfset pricesMatched++>
|
|
</cfif>
|
|
</cfloop>
|
|
<cfset arrayAppend(response.steps, "Matched " & imagesMatched & " images, " & categoriesMatched & " categories, " & pricesMatched & " prices from __OO_STATE__")>
|
|
<cfif structCount(toastBusiness) GT 0>
|
|
<cfset arrayAppend(response.steps, "Extracted business info: " & structKeyList(toastBusiness))>
|
|
</cfif>
|
|
<cfcatch></cfcatch>
|
|
</cftry>
|
|
</cfif>
|
|
</cfif>
|
|
|
|
<!--- If we have items but no categories, add a default "Menu" category --->
|
|
<cfif arrayLen(toastItems) GT 0 AND arrayLen(toastCategories) EQ 0>
|
|
<cfset arrayAppend(toastCategories, { "name": "Menu", "itemCount": arrayLen(toastItems) })>
|
|
<cfset arrayAppend(response.steps, "Added default 'Menu' category for " & arrayLen(toastItems) & " items")>
|
|
</cfif>
|
|
|
|
<!--- Scan ALL HTML files in the ZIP for business info --->
|
|
<!--- Get the extraction directory from the URL (the UUID folder) --->
|
|
<cfset extractDir = reReplaceNoCase(targetUrl, "https?://[^/]+(/temp/menu-import/[a-f0-9]+/).*", "\1")>
|
|
<cfset extractDir = expandPath(extractDir)>
|
|
<cftry>
|
|
<cfdirectory action="list" directory="#extractDir#" name="allHtmlFiles" filter="*.htm*" recurse="true" type="file">
|
|
<cfset arrayAppend(response.steps, "Found " & allHtmlFiles.recordCount & " HTML files in ZIP")>
|
|
|
|
<cfloop query="allHtmlFiles">
|
|
<!--- Skip the main menu file we already processed --->
|
|
<cfset otherFilePath = "#allHtmlFiles.directory#/#allHtmlFiles.name#">
|
|
<cfif otherFilePath EQ localFilePath>
|
|
<cfcontinue>
|
|
</cfif>
|
|
|
|
<cftry>
|
|
<cfset otherHtml = fileRead(otherFilePath, "utf-8")>
|
|
<cfset arrayAppend(response.steps, "Scanning " & allHtmlFiles.name & " for business info...")>
|
|
|
|
<!--- Extract business name from title tag --->
|
|
<cfif NOT structKeyExists(toastBusiness, "name") OR NOT len(toastBusiness.name)>
|
|
<cfset otherTitleMatch = reMatchNoCase('<title[^>]*>([^<]+)</title>', otherHtml)>
|
|
<cfif arrayLen(otherTitleMatch)>
|
|
<cfset otherTitle = reReplaceNoCase(otherTitleMatch[1], '.*<title[^>]*>([^<]+)</title>.*', '\1')>
|
|
<cfset otherTitle = trim(otherTitle)>
|
|
<!--- Skip generic titles --->
|
|
<cfif len(otherTitle) AND NOT reFindNoCase("^(Menu|Home|About|Contact|Order|Online)$", otherTitle)>
|
|
<cfif findNoCase("|", otherTitle)>
|
|
<cfset otherTitle = trim(listFirst(otherTitle, "|"))>
|
|
</cfif>
|
|
<cfset otherTitle = reReplaceNoCase(otherTitle, "\s*-\s*(Menu|Order|Online).*$", "")>
|
|
<cfif len(otherTitle) AND len(otherTitle) LT 100>
|
|
<cfset toastBusiness["name"] = otherTitle>
|
|
<cfset arrayAppend(response.steps, "Found business name in " & allHtmlFiles.name & ": " & otherTitle)>
|
|
</cfif>
|
|
</cfif>
|
|
</cfif>
|
|
</cfif>
|
|
|
|
<!--- Extract address - look for common patterns --->
|
|
<cfif NOT structKeyExists(toastBusiness, "addressLine1") OR NOT len(toastBusiness.addressLine1)>
|
|
<!--- Look for street address patterns (number + street name) --->
|
|
<cfset addrMatch = reMatchNoCase('(\d+\s+[A-Za-z0-9\s]+(?:St(?:reet)?|Ave(?:nue)?|Rd|Road|Blvd|Boulevard|Dr(?:ive)?|Ln|Lane|Way|Ct|Court|Pl(?:ace)?|Pkwy|Parkway)[.,]?\s*(?:Suite|Ste|##|Unit|Apt)?\s*[A-Za-z0-9\-]*)', otherHtml)>
|
|
<cfif arrayLen(addrMatch)>
|
|
<cfset addrText = trim(addrMatch[1])>
|
|
<cfif len(addrText) GT 5 AND len(addrText) LT 100>
|
|
<cfset toastBusiness["addressLine1"] = addrText>
|
|
<cfset arrayAppend(response.steps, "Found address in " & allHtmlFiles.name & ": " & addrText)>
|
|
</cfif>
|
|
</cfif>
|
|
</cfif>
|
|
|
|
<!--- Extract phone number --->
|
|
<cfif NOT structKeyExists(toastBusiness, "phone") OR NOT len(toastBusiness.phone)>
|
|
<cfset phoneMatch = reMatchNoCase('\(?(\d{3})\)?[-.\s]?(\d{3})[-.\s]?(\d{4})', otherHtml)>
|
|
<cfif arrayLen(phoneMatch)>
|
|
<cfset phoneText = reReplaceNoCase(phoneMatch[1], '.*\(?(\d{3})\)?[-.\s]?(\d{3})[-.\s]?(\d{4}).*', '\1-\2-\3')>
|
|
<cfif len(phoneText) GTE 10>
|
|
<cfset toastBusiness["phone"] = phoneText>
|
|
<cfset arrayAppend(response.steps, "Found phone in " & allHtmlFiles.name & ": " & phoneText)>
|
|
</cfif>
|
|
</cfif>
|
|
</cfif>
|
|
|
|
<!--- Check for __OO_STATE__ in other files too (might have Restaurant info) --->
|
|
<cfif findNoCase("window.__OO_STATE__", otherHtml)>
|
|
<cfscript>
|
|
otherOoJson = "";
|
|
otherOoStart = findNoCase("window.__OO_STATE__", otherHtml);
|
|
if (otherOoStart > 0) {
|
|
obs = find("{", otherHtml, otherOoStart);
|
|
if (obs > 0) {
|
|
od = 0; ois = false; oesc = false; obe = 0;
|
|
otherLen = len(otherHtml);
|
|
for (oi = obs; oi <= otherLen; oi++) {
|
|
oc = mid(otherHtml, oi, 1);
|
|
if (oesc) { oesc = false; continue; }
|
|
if (oc == chr(92) && ois) { oesc = true; continue; }
|
|
if (oc == '"') { ois = !ois; continue; }
|
|
if (!ois) {
|
|
if (oc == "{") od = od + 1;
|
|
else if (oc == "}") { od = od - 1; if (od == 0) { obe = oi; break; } }
|
|
}
|
|
}
|
|
if (obe > 0) otherOoJson = mid(otherHtml, obs, obe - obs + 1);
|
|
}
|
|
}
|
|
if (len(otherOoJson)) {
|
|
otherOoJson = replace(otherOoJson, "&", "&", "all");
|
|
otherOoJson = replace(otherOoJson, "<", "<", "all");
|
|
otherOoJson = replace(otherOoJson, ">", ">", "all");
|
|
otherOoJson = replace(otherOoJson, """, '"', "all");
|
|
}
|
|
</cfscript>
|
|
<cfif len(otherOoJson)>
|
|
<cftry>
|
|
<cfset otherOoState = deserializeJSON(otherOoJson)>
|
|
<cfloop collection="#otherOoState#" item="otherKey">
|
|
<cfif left(otherKey, 11) EQ "Restaurant:">
|
|
<cfset otherRest = otherOoState[otherKey]>
|
|
<cfif structKeyExists(otherRest, "name") AND (NOT structKeyExists(toastBusiness, "name") OR NOT len(toastBusiness.name))>
|
|
<cfset toastBusiness["name"] = otherRest.name>
|
|
<cfset arrayAppend(response.steps, "Found business name in " & allHtmlFiles.name & " __OO_STATE__: " & otherRest.name)>
|
|
</cfif>
|
|
<cfif structKeyExists(otherRest, "location")>
|
|
<cfset otherLoc = otherRest.location>
|
|
<cfif structKeyExists(otherLoc, "address1") AND (NOT structKeyExists(toastBusiness, "addressLine1") OR NOT len(toastBusiness.addressLine1))>
|
|
<cfset toastBusiness["addressLine1"] = otherLoc.address1>
|
|
</cfif>
|
|
<cfif structKeyExists(otherLoc, "city") AND (NOT structKeyExists(toastBusiness, "city") OR NOT len(toastBusiness.city))>
|
|
<cfset toastBusiness["city"] = otherLoc.city>
|
|
</cfif>
|
|
<cfif structKeyExists(otherLoc, "state") AND (NOT structKeyExists(toastBusiness, "state") OR NOT len(toastBusiness.state))>
|
|
<cfset toastBusiness["state"] = otherLoc.state>
|
|
</cfif>
|
|
<cfif structKeyExists(otherLoc, "zipCode") AND (NOT structKeyExists(toastBusiness, "zip") OR NOT len(toastBusiness.zip))>
|
|
<cfset toastBusiness["zip"] = otherLoc.zipCode>
|
|
</cfif>
|
|
<cfif structKeyExists(otherLoc, "phone") AND (NOT structKeyExists(toastBusiness, "phone") OR NOT len(toastBusiness.phone))>
|
|
<cfset toastBusiness["phone"] = otherLoc.phone>
|
|
</cfif>
|
|
</cfif>
|
|
<cfif structKeyExists(otherRest, "brandColor") AND (NOT structKeyExists(toastBusiness, "brandColor") OR NOT len(toastBusiness.brandColor))>
|
|
<cfset toastBusiness["brandColor"] = replace(otherRest.brandColor, "##", "")>
|
|
</cfif>
|
|
</cfif>
|
|
</cfloop>
|
|
<cfcatch></cfcatch>
|
|
</cftry>
|
|
</cfif>
|
|
</cfif>
|
|
<cfcatch>
|
|
<!--- Skip files that can't be read --->
|
|
</cfcatch>
|
|
</cftry>
|
|
</cfloop>
|
|
<cfcatch>
|
|
<cfset arrayAppend(response.steps, "Could not scan other HTML files: " & cfcatch.message)>
|
|
</cfcatch>
|
|
</cftry>
|
|
|
|
<cfset arrayAppend(response.steps, "Extracted " & arrayLen(toastItems) & " unique items from " & arrayLen(toastCategories) & " categories")>
|
|
|
|
<!--- Scan for images in ZIP and analyze them for business info --->
|
|
<cftry>
|
|
<cfdirectory action="list" directory="#extractDir#" name="zipImages" recurse="true" type="file">
|
|
<cfset imageExtensions = "jpg,jpeg,png,gif,webp">
|
|
<cfset zipImageFiles = []>
|
|
<cfloop query="zipImages">
|
|
<cfset imgExt = lCase(listLast(zipImages.name, "."))>
|
|
<cfif listFindNoCase(imageExtensions, imgExt)>
|
|
<!--- Skip small files (likely icons) and _files folder assets --->
|
|
<cfif zipImages.size GT 10000 AND NOT findNoCase("_files", zipImages.directory)>
|
|
<cfset arrayAppend(zipImageFiles, "#zipImages.directory#/#zipImages.name#")>
|
|
</cfif>
|
|
</cfif>
|
|
</cfloop>
|
|
|
|
<cfif arrayLen(zipImageFiles) GT 0>
|
|
<cfset arrayAppend(response.steps, "Found " & arrayLen(zipImageFiles) & " images in ZIP to analyze for business info")>
|
|
|
|
<!--- Analyze up to 3 images for business info --->
|
|
<cfset imgLimit = min(arrayLen(zipImageFiles), 3)>
|
|
<cfloop from="1" to="#imgLimit#" index="imgIdx">
|
|
<cfset imgPath = zipImageFiles[imgIdx]>
|
|
<cftry>
|
|
<cfset imgContent = fileReadBinary(imgPath)>
|
|
<cfset base64Img = toBase64(imgContent)>
|
|
<!--- Detect actual media type from content, not extension --->
|
|
<cfset mediaType = "image/jpeg">
|
|
<cfif left(base64Img, 4) EQ "iVBO"><cfset mediaType = "image/png">
|
|
<cfelseif left(base64Img, 6) EQ "R0lGOD"><cfset mediaType = "image/gif">
|
|
<cfelseif left(base64Img, 5) EQ "UklGR"><cfset mediaType = "image/webp">
|
|
</cfif>
|
|
<cfset arrayAppend(response.steps, "Analyzing image: " & listLast(imgPath, "/\"))>
|
|
|
|
<!--- Build Claude request for business info extraction --->
|
|
<cfset imgMsgContent = []>
|
|
<cfset arrayAppend(imgMsgContent, {
|
|
"type": "image",
|
|
"source": {
|
|
"type": "base64",
|
|
"media_type": mediaType,
|
|
"data": base64Img
|
|
}
|
|
})>
|
|
<cfset arrayAppend(imgMsgContent, {
|
|
"type": "text",
|
|
"text": "Extract ALL business information visible in this image. Look carefully for: 1) Business NAME (the restaurant/store name), 2) PHONE number (format: xxx-xxx-xxxx), 3) Full ADDRESS (street, city, state, zip), 4) HOURS of operation (all days shown). Return JSON: {""name"":"""",""addressLine1"":"""",""city"":"""",""state"":"""",""zip"":"""",""phone"":"""",""hours"":"""",""brandColor"":""""}. For hours, format as single string like 'Mon-Thu 7am-10pm, Fri-Sat 7am-11pm'. Return ONLY valid JSON."
|
|
})>
|
|
|
|
<cfset imgRequest = {
|
|
"model": "claude-sonnet-4-20250514",
|
|
"max_tokens": 1024,
|
|
"temperature": 0,
|
|
"messages": [{
|
|
"role": "user",
|
|
"content": imgMsgContent
|
|
}]
|
|
}>
|
|
|
|
<cfhttp url="https://api.anthropic.com/v1/messages" method="POST" timeout="60" result="imgHttpResult">
|
|
<cfhttpparam type="header" name="Content-Type" value="application/json">
|
|
<cfhttpparam type="header" name="x-api-key" value="#CLAUDE_API_KEY#">
|
|
<cfhttpparam type="header" name="anthropic-version" value="2023-06-01">
|
|
<cfhttpparam type="body" value="#serializeJSON(imgRequest)#">
|
|
</cfhttp>
|
|
|
|
<cfif findNoCase("200", imgHttpResult.statusCode)>
|
|
<cfset imgResponse = deserializeJSON(imgHttpResult.fileContent)>
|
|
<cfif structKeyExists(imgResponse, "content") AND arrayLen(imgResponse.content)>
|
|
<cfset imgText = imgResponse.content[1].text>
|
|
<!--- Clean up JSON --->
|
|
<cfset imgText = trim(imgText)>
|
|
<cfif left(imgText, 7) EQ "```json">
|
|
<cfset imgText = mid(imgText, 8, len(imgText) - 7)>
|
|
</cfif>
|
|
<cfif left(imgText, 3) EQ "```">
|
|
<cfset imgText = mid(imgText, 4, len(imgText) - 3)>
|
|
</cfif>
|
|
<cfif right(imgText, 3) EQ "```">
|
|
<cfset imgText = left(imgText, len(imgText) - 3)>
|
|
</cfif>
|
|
<cfset imgText = trim(imgText)>
|
|
|
|
<cftry>
|
|
<cfset imgBizData = deserializeJSON(imgText)>
|
|
<!--- Image data OVERWRITES HTML-extracted data (more reliable) --->
|
|
<cfset bizFieldsToCheck = "name,addressLine1,city,state,zip,phone,hours,brandColor">
|
|
<cfloop list="#bizFieldsToCheck#" index="bizField">
|
|
<cfif structKeyExists(imgBizData, bizField) AND isSimpleValue(imgBizData[bizField]) AND len(trim(imgBizData[bizField]))>
|
|
<cfset toastBusiness[bizField] = trim(imgBizData[bizField])>
|
|
<cfset arrayAppend(response.steps, "Found " & bizField & " from image: " & left(toastBusiness[bizField], 50))>
|
|
</cfif>
|
|
</cfloop>
|
|
<cfcatch>
|
|
<cfset arrayAppend(response.steps, "Could not parse image analysis JSON")>
|
|
</cfcatch>
|
|
</cftry>
|
|
</cfif>
|
|
</cfif>
|
|
<cfcatch>
|
|
<cfset arrayAppend(response.steps, "Error analyzing image: " & cfcatch.message)>
|
|
</cfcatch>
|
|
</cftry>
|
|
</cfloop>
|
|
</cfif>
|
|
<cfcatch>
|
|
<cfset arrayAppend(response.steps, "Could not scan ZIP for images: " & cfcatch.message)>
|
|
</cfcatch>
|
|
</cftry>
|
|
|
|
<!--- Summary of business info found --->
|
|
<cfset bizKeys = structKeyList(toastBusiness)>
|
|
<cfset arrayAppend(response.steps, "Business info keys: " & (len(bizKeys) ? bizKeys : "(none)"))>
|
|
<cfif structKeyExists(toastBusiness, "name")>
|
|
<cfset arrayAppend(response.steps, "Business name: " & toastBusiness.name)>
|
|
</cfif>
|
|
|
|
<!--- Return directly without Claude --->
|
|
<cfset response["OK"] = true>
|
|
<cfset response["DATA"] = {
|
|
"business": toastBusiness,
|
|
"categories": toastCategories,
|
|
"modifiers": arrayNew(1),
|
|
"items": toastItems,
|
|
"imageUrls": arrayNew(1),
|
|
"headerCandidateIndices": arrayNew(1),
|
|
"imageMappings": arrayNew(1)
|
|
}>
|
|
<cfset response["sourceUrl"] = targetUrl>
|
|
<cfset response["pagesProcessed"] = 1>
|
|
<cfset response["imagesFound"] = 0>
|
|
<cfset response["playwrightImagesCount"] = 0>
|
|
<cfset response["toastDirect"] = true>
|
|
<cfoutput>#serializeJSON(response)#</cfoutput>
|
|
<cfabort>
|
|
|
|
<cfcatch type="any">
|
|
<cfset arrayAppend(response.steps, "Toast HTML parse failed: " & cfcatch.message & " - falling back to Claude")>
|
|
</cfcatch>
|
|
</cftry>
|
|
</cfif>
|
|
|
|
<!--- Extract base URL for resolving relative links --->
|
|
<cfset baseUrl = reReplace(targetUrl, "(https?://[^/]+).*", "\1")>
|
|
<cfset basePath = reReplace(targetUrl, "(https?://[^/]+/[^?]*/?).*", "\1")>
|
|
<cfif NOT reFindNoCase("/$", basePath)>
|
|
<cfset basePath = reReplace(basePath, "/[^/]*$", "/")>
|
|
</cfif>
|
|
<cfelse>
|
|
<!--- Remote URL - use Playwright for JS-rendered content --->
|
|
<cfset arrayAppend(response.steps, "Fetching URL with Playwright: " & targetUrl)>
|
|
|
|
<cfset playwrightOutput = "">
|
|
<cfexecute name="/opt/playwright/run.sh" arguments="'#targetUrl#' 4000" timeout="90" variable="playwrightOutput" />
|
|
|
|
<cfif NOT len(trim(playwrightOutput))>
|
|
<cfthrow message="Playwright returned empty response">
|
|
</cfif>
|
|
|
|
<cfset playwrightResult = deserializeJSON(playwrightOutput)>
|
|
<cfif structKeyExists(playwrightResult, "error")>
|
|
<cfthrow message="Playwright error: #playwrightResult.error#">
|
|
</cfif>
|
|
|
|
<cfset pageHtml = playwrightResult.html>
|
|
<cfset playwrightImages = structKeyExists(playwrightResult, "images") ? playwrightResult.images : arrayNew(1)>
|
|
<cfset arrayAppend(response.steps, "Fetched " & len(pageHtml) & " bytes via Playwright, " & arrayLen(playwrightImages) & " images captured")>
|
|
|
|
<!--- ========== WOOCOMMERCE FAST PATH ========== --->
|
|
<cfif findNoCase("woocommerce", pageHtml) OR findNoCase("wc-add-to-cart", pageHtml) OR findNoCase("tm-extra-product-options", pageHtml)>
|
|
<cfset arrayAppend(response.steps, "WooCommerce site detected - running modifier extraction")>
|
|
|
|
<!--- Use the shop root URL --->
|
|
<cfset wooUrl = reReplace(targetUrl, "(https?://[^/]+).*", "\1")>
|
|
|
|
<cftry>
|
|
<cfset wooOutput = "">
|
|
<cfexecute name="/opt/playwright/run-woo-modifiers.sh" arguments="'#wooUrl#'" timeout="300" variable="wooOutput" />
|
|
|
|
<cfif len(trim(wooOutput))>
|
|
<cfset wooResult = deserializeJSON(wooOutput)>
|
|
|
|
<cfif structKeyExists(wooResult, "items") AND isArray(wooResult.items) AND arrayLen(wooResult.items) GT 0>
|
|
<cfset arrayAppend(response.steps, "WooCommerce extraction: " & arrayLen(wooResult.items) & " items, " & arrayLen(wooResult.modifiers) & " modifier groups")>
|
|
|
|
<!--- Build categories from item category fields --->
|
|
<cfset wooCats = {}>
|
|
<cfset wooItems = []>
|
|
<cfset wooImageMappings = {}>
|
|
<cfloop from="1" to="#arrayLen(wooResult.items)#" index="wi">
|
|
<cfset wItem = wooResult.items[wi]>
|
|
<cfset catName = structKeyExists(wItem, "category") AND len(trim(wItem.category)) ? trim(wItem.category) : "Menu">
|
|
<cfif NOT structKeyExists(wooCats, catName)>
|
|
<cfset wooCats[catName] = 0>
|
|
</cfif>
|
|
<cfset wooCats[catName] = wooCats[catName] + 1>
|
|
|
|
<cfset itemId = "item_" & wi>
|
|
<cfset itemMods = structKeyExists(wooResult, "itemModifierMap") AND structKeyExists(wooResult.itemModifierMap, wItem.name) ? wooResult.itemModifierMap[wItem.name] : []>
|
|
<cfset wItemImageUrl = structKeyExists(wItem, "imageUrl") AND len(trim(wItem.imageUrl)) ? trim(wItem.imageUrl) : "">
|
|
<cfset arrayAppend(wooItems, {
|
|
"id": itemId,
|
|
"name": wItem.name,
|
|
"price": structKeyExists(wItem, "price") ? val(wItem.price) : 0,
|
|
"description": structKeyExists(wItem, "description") ? wItem.description : "",
|
|
"category": catName,
|
|
"modifiers": itemMods,
|
|
"hasModifiers": arrayLen(itemMods) GT 0,
|
|
"imageUrl": wItemImageUrl
|
|
})>
|
|
|
|
<cfif structKeyExists(wItem, "imageUrl") AND len(trim(wItem.imageUrl))>
|
|
<cfset wooImageMappings[wItem.name] = wItem.imageUrl>
|
|
</cfif>
|
|
</cfloop>
|
|
|
|
<cfset wooCategories = []>
|
|
<cfloop collection="#wooCats#" item="wcName">
|
|
<cfset arrayAppend(wooCategories, { "name": wcName, "itemCount": wooCats[wcName] })>
|
|
</cfloop>
|
|
|
|
<cfset wooModifiers = structKeyExists(wooResult, "modifiers") ? wooResult.modifiers : []>
|
|
|
|
<cfset wooBiz = structKeyExists(wooResult, "business") ? wooResult.business : {}>
|
|
<cfset menuData = {
|
|
"business": {
|
|
"name": structKeyExists(wooBiz, "name") ? wooBiz.name : "",
|
|
"address": structKeyExists(wooBiz, "address") ? wooBiz.address : "",
|
|
"phone": structKeyExists(wooBiz, "phone") ? wooBiz.phone : "",
|
|
"hours": structKeyExists(wooBiz, "hours") ? wooBiz.hours : ""
|
|
},
|
|
"categories": wooCategories,
|
|
"items": wooItems,
|
|
"modifiers": wooModifiers,
|
|
"imageUrls": [],
|
|
"imageMappings": wooImageMappings,
|
|
"headerCandidateIndices": []
|
|
}>
|
|
|
|
<cfset response["OK"] = true>
|
|
<cfset response["DATA"] = menuData>
|
|
<cfset response["sourceUrl"] = targetUrl>
|
|
<cfset response["parsedVia"] = "woocommerce_playwright">
|
|
<cfcontent type="application/json" reset="true">
|
|
<cfoutput>#serializeJSON(response)#</cfoutput>
|
|
<cfabort>
|
|
</cfif>
|
|
</cfif>
|
|
<cfset arrayAppend(response.steps, "WooCommerce extraction returned no items - falling through to Claude")>
|
|
<cfcatch>
|
|
<cfset arrayAppend(response.steps, "WooCommerce extraction failed: " & cfcatch.message & " - falling through to Claude")>
|
|
</cfcatch>
|
|
</cftry>
|
|
</cfif>
|
|
<!--- ========== END WOOCOMMERCE FAST PATH ========== --->
|
|
|
|
<!--- Extract base URL for resolving relative links --->
|
|
<cfset baseUrl = reReplace(targetUrl, "(https?://[^/]+).*", "\1")>
|
|
<cfset basePath = reReplace(targetUrl, "(https?://[^/]+/[^?]*/?).*", "\1")>
|
|
<cfif NOT reFindNoCase("/$", basePath)>
|
|
<cfset basePath = reReplace(basePath, "/[^/]*$", "/")>
|
|
</cfif>
|
|
</cfif>
|
|
<cfelse>
|
|
<cfthrow message="Either 'url' or 'html' content is required">
|
|
</cfif>
|
|
|
|
<!--- Initialize playwrightImages if not set (HTML upload case) --->
|
|
<cfif NOT isDefined("playwrightImages")>
|
|
<cfset playwrightImages = arrayNew(1)>
|
|
</cfif>
|
|
|
|
<!--- Menu pages array - Playwright renders JS so we get everything in one page --->
|
|
<cfset menuPages = arrayNew(1)>
|
|
<cfset arrayAppend(menuPages, { url: isDefined("targetUrl") ? targetUrl : "uploaded", html: pageHtml })>
|
|
|
|
<!--- Extract images from all pages --->
|
|
<cfset allImages = arrayNew(1)>
|
|
<cfset imageUrls = structNew()>
|
|
<cfset imageMappings = arrayNew(1)><!--- For local HTML: filename -> alt text mappings --->
|
|
|
|
<!--- Add images captured by Playwright (network requests) --->
|
|
<cfloop array="#playwrightImages#" index="pwImg">
|
|
<cfif NOT reFindNoCase("(icon|favicon|logo|sprite|pixel|tracking|badge|button|\.svg)", pwImg)>
|
|
<cfset imageUrls[pwImg] = true>
|
|
</cfif>
|
|
</cfloop>
|
|
|
|
<cfloop array="#menuPages#" index="menuPage">
|
|
<!--- Find all img tags --->
|
|
<cfset imgMatches = reMatchNoCase('<img[^>]+src=["'']([^"'']+)["''][^>]*>', menuPage.html)>
|
|
|
|
<cfloop array="#imgMatches#" index="imgTag">
|
|
<cfset imgSrc = reReplaceNoCase(imgTag, '.*src=["'']([^"'']+)["''].*', "\1")>
|
|
|
|
<!--- Extract alt text for image mapping --->
|
|
<cfset imgAlt = "">
|
|
<cfif reFindNoCase('alt=["'']([^"'']+)["'']', imgTag)>
|
|
<cfset imgAlt = reReplaceNoCase(imgTag, '.*alt=["'']([^"'']+)["''].*', "\1")>
|
|
</cfif>
|
|
|
|
<!--- Extract just the filename for matching local uploads --->
|
|
<cfset imgFilename = listLast(imgSrc, "/\")>
|
|
<cfif len(imgFilename) AND len(imgAlt) AND NOT reFindNoCase("(icon|favicon|logo|sprite|pixel|tracking|badge|button)", imgSrc)>
|
|
<cfset mapping = structNew()>
|
|
<cfset mapping["filename"] = imgFilename>
|
|
<cfset mapping["alt"] = imgAlt>
|
|
<cfset mapping["src"] = imgSrc>
|
|
<cfset arrayAppend(imageMappings, mapping)>
|
|
</cfif>
|
|
|
|
<!--- Resolve relative URLs --->
|
|
<cfif left(imgSrc, 1) EQ "/">
|
|
<cfset imgSrc = baseUrl & imgSrc>
|
|
<cfelseif NOT reFindNoCase("^https?://", imgSrc) AND NOT reFindNoCase("^data:", imgSrc)>
|
|
<cfset imgSrc = basePath & imgSrc>
|
|
</cfif>
|
|
|
|
<!--- Skip data URLs, icons, and already-processed images --->
|
|
<cfif reFindNoCase("^https?://", imgSrc) AND NOT structKeyExists(imageUrls, imgSrc)>
|
|
<!--- Skip common icon/logo patterns that are too small --->
|
|
<cfif NOT reFindNoCase("(icon|favicon|logo|sprite|pixel|tracking|badge|button)", imgSrc)>
|
|
<cfset imageUrls[imgSrc] = true>
|
|
</cfif>
|
|
</cfif>
|
|
</cfloop>
|
|
</cfloop>
|
|
|
|
<cfset arrayAppend(response.steps, "Found #structCount(imageUrls)# unique images")>
|
|
|
|
<!--- Check if we're scanning a local temp URL (ZIP upload) --->
|
|
<cfset isLocalScan = isDefined("targetUrl") AND findNoCase("/temp/menu-import/", targetUrl)>
|
|
<cfset localBasePath = "">
|
|
<cfif isLocalScan>
|
|
<!--- Extract the folder path from URL for local file reads --->
|
|
<cfset localBasePath = expandPath(reReplaceNoCase(targetUrl, "https?://[^/]+(/temp/menu-import/[^/]+/).*", "\1"))>
|
|
<cfset arrayAppend(response.steps, "Local scan detected, base path: " & localBasePath)>
|
|
</cfif>
|
|
|
|
<!--- Download/read images (limit to 20) --->
|
|
<cfset imageDataArray = arrayNew(1)>
|
|
<cfset downloadedCount = 0>
|
|
<cfset localReadCount = 0>
|
|
|
|
<cfloop collection="#imageUrls#" item="imgUrl">
|
|
<cfif downloadedCount GTE 20>
|
|
<cfbreak>
|
|
</cfif>
|
|
|
|
<cftry>
|
|
<cfset imgBytes = 0>
|
|
<cfset imgContent = "">
|
|
<cfset mediaType = "image/jpeg">
|
|
|
|
<!--- Check if this is a local file we can read directly --->
|
|
<cfif isLocalScan AND findNoCase("/temp/menu-import/", imgUrl)>
|
|
<!--- Convert URL to local path --->
|
|
<cfset localPath = expandPath(reReplaceNoCase(imgUrl, "https?://[^/]+(/temp/menu-import/.*)", "\1"))>
|
|
|
|
<cfif fileExists(localPath)>
|
|
<cfset imgContent = fileReadBinary(localPath)>
|
|
<cfset imgBytes = len(imgContent)>
|
|
|
|
<!--- Determine media type from extension --->
|
|
<cfset ext = lCase(listLast(localPath, "."))>
|
|
<cfif ext EQ "png"><cfset mediaType = "image/png">
|
|
<cfelseif ext EQ "gif"><cfset mediaType = "image/gif">
|
|
<cfelseif ext EQ "webp"><cfset mediaType = "image/webp">
|
|
</cfif>
|
|
<cfset localReadCount = localReadCount + 1>
|
|
</cfif>
|
|
<cfelse>
|
|
<!--- Fetch remote image via HTTP --->
|
|
<cfhttp url="#imgUrl#" method="GET" timeout="10" result="imgResult" getasbinary="yes">
|
|
</cfhttp>
|
|
|
|
<cfif findNoCase("200", imgResult.statusCode) AND isBinary(imgResult.fileContent)>
|
|
<cfset contentType = structKeyExists(imgResult.responseHeader, "Content-Type") ? imgResult.responseHeader["Content-Type"] : "">
|
|
<cfif reFindNoCase("image/(jpeg|jpg|png|gif|webp)", contentType)>
|
|
<cfset imgContent = imgResult.fileContent>
|
|
<cfset imgBytes = len(imgContent)>
|
|
<cfif findNoCase("png", contentType)><cfset mediaType = "image/png"></cfif>
|
|
<cfif findNoCase("gif", contentType)><cfset mediaType = "image/gif"></cfif>
|
|
<cfif findNoCase("webp", contentType)><cfset mediaType = "image/webp"></cfif>
|
|
</cfif>
|
|
</cfif>
|
|
</cfif>
|
|
|
|
<!--- Process the image if we got valid content --->
|
|
<cfif imgBytes GT 5000>
|
|
<cfset base64Content = toBase64(imgContent)>
|
|
<!--- Detect actual media type from content, not extension/headers --->
|
|
<cfset mediaType = "image/jpeg">
|
|
<cfif left(base64Content, 4) EQ "iVBO"><cfset mediaType = "image/png">
|
|
<cfelseif left(base64Content, 6) EQ "R0lGOD"><cfset mediaType = "image/gif">
|
|
<cfelseif left(base64Content, 5) EQ "UklGR"><cfset mediaType = "image/webp">
|
|
</cfif>
|
|
|
|
<cfset imgSource = structNew()>
|
|
<cfset imgSource["type"] = "base64">
|
|
<cfset imgSource["media_type"] = mediaType>
|
|
<cfset imgSource["data"] = base64Content>
|
|
|
|
<cfset imgStruct = structNew()>
|
|
<cfset imgStruct["type"] = "image">
|
|
<cfset imgStruct["source"] = imgSource>
|
|
<cfset imgStruct["url"] = imgUrl>
|
|
|
|
<cfset arrayAppend(imageDataArray, imgStruct)>
|
|
<cfset downloadedCount = downloadedCount + 1>
|
|
</cfif>
|
|
<cfcatch>
|
|
<!--- Skip failed downloads --->
|
|
</cfcatch>
|
|
</cftry>
|
|
</cfloop>
|
|
|
|
<cfset arrayAppend(response.steps, "Loaded #arrayLen(imageDataArray)# valid images (#localReadCount# from local disk)")>
|
|
|
|
<!--- ============================================================ --->
|
|
<!--- TOAST FAST PATH: Parse __OO_STATE__ directly instead of Claude --->
|
|
<!--- ============================================================ --->
|
|
<cfif findNoCase("window.__OO_STATE__", pageHtml) AND findNoCase("toasttab", pageHtml)>
|
|
<cfset arrayAppend(response.steps, "Toast page detected - extracting menu data from __OO_STATE__")>
|
|
<cftry>
|
|
<!--- Extract JSON using brace-counting (no tag stripping needed - braces don't appear in HTML tags) --->
|
|
<cfscript>
|
|
ooStateJson = "";
|
|
ooStartPos = findNoCase("window.__OO_STATE__", pageHtml);
|
|
if (ooStartPos > 0) {
|
|
braceStart = find("{", pageHtml, ooStartPos);
|
|
if (braceStart > 0) {
|
|
depth = 0;
|
|
inStr = false;
|
|
esc = false;
|
|
braceEnd = 0;
|
|
totalLen = len(pageHtml);
|
|
for (ci = braceStart; ci <= totalLen; ci++) {
|
|
ch = mid(pageHtml, ci, 1);
|
|
if (esc) { esc = false; continue; }
|
|
if (ch == chr(92) && inStr) { esc = true; continue; }
|
|
if (ch == '"') { inStr = !inStr; continue; }
|
|
if (!inStr) {
|
|
if (ch == "{") depth = depth + 1;
|
|
else if (ch == "}") {
|
|
depth = depth - 1;
|
|
if (depth == 0) { braceEnd = ci; break; }
|
|
}
|
|
}
|
|
}
|
|
if (braceEnd > 0) {
|
|
ooStateJson = mid(pageHtml, braceStart, braceEnd - braceStart + 1);
|
|
}
|
|
}
|
|
}
|
|
// Decode HTML entities from View Source (Chrome encodes & as & etc.)
|
|
if (len(ooStateJson)) {
|
|
ooStateJson = replace(ooStateJson, "&", "&", "all");
|
|
ooStateJson = replace(ooStateJson, "<", "<", "all");
|
|
ooStateJson = replace(ooStateJson, ">", ">", "all");
|
|
ooStateJson = replace(ooStateJson, """, '"', "all");
|
|
}
|
|
</cfscript>
|
|
<cfif len(ooStateJson)>
|
|
<cfset ooState = deserializeJSON(ooStateJson)>
|
|
|
|
<cfset toastBusiness = structNew()>
|
|
<cfset toastCategories = arrayNew(1)>
|
|
<cfset toastItems = arrayNew(1)>
|
|
<cfset categorySet = structNew()>
|
|
<cfset itemId = 1>
|
|
<cfset menuNames = arrayNew(1)>
|
|
|
|
<!--- Extract restaurant info from ROOT_QUERY (Apollo cache format) --->
|
|
<cfif structKeyExists(ooState, "ROOT_QUERY")>
|
|
<cfset rootQuery = ooState["ROOT_QUERY"]>
|
|
<cfloop collection="#rootQuery#" item="rqKey">
|
|
<cfif (findNoCase("restaurantV2By", rqKey) OR findNoCase("restaurantV2(", rqKey)) AND isStruct(rootQuery[rqKey])>
|
|
<cfset restaurant = rootQuery[rqKey]>
|
|
<cfif structKeyExists(restaurant, "name") AND NOT structKeyExists(toastBusiness, "name")>
|
|
<cfset toastBusiness["name"] = restaurant.name>
|
|
</cfif>
|
|
<cfif structKeyExists(restaurant, "description") AND NOT isNull(restaurant.description) AND len(trim(toString(restaurant.description)))>
|
|
<cfset toastBusiness["description"] = trim(toString(restaurant.description))>
|
|
</cfif>
|
|
<cfif structKeyExists(restaurant, "location") AND isStruct(restaurant.location)>
|
|
<cfset loc = restaurant.location>
|
|
<cfif structKeyExists(loc, "address1") AND NOT isNull(loc.address1)>
|
|
<cfset toastBusiness["addressLine1"] = loc.address1>
|
|
<cfset toastBusiness["address"] = loc.address1>
|
|
<cfif structKeyExists(loc, "city") AND NOT isNull(loc.city)>
|
|
<cfset toastBusiness["city"] = loc.city>
|
|
<cfset toastBusiness["address"] = toastBusiness.address & ", " & loc.city>
|
|
</cfif>
|
|
<cfif structKeyExists(loc, "state") AND NOT isNull(loc.state)>
|
|
<cfset toastBusiness["state"] = loc.state>
|
|
<cfset toastBusiness["address"] = toastBusiness.address & ", " & loc.state>
|
|
</cfif>
|
|
<cfif structKeyExists(loc, "zip") AND NOT isNull(loc.zip)>
|
|
<cfset toastBusiness["zip"] = loc.zip>
|
|
<cfset toastBusiness["address"] = toastBusiness.address & " " & loc.zip>
|
|
<cfelseif structKeyExists(loc, "zipCode") AND NOT isNull(loc.zipCode)>
|
|
<cfset toastBusiness["zip"] = loc.zipCode>
|
|
<cfset toastBusiness["address"] = toastBusiness.address & " " & loc.zipCode>
|
|
</cfif>
|
|
</cfif>
|
|
<cfif structKeyExists(loc, "phone") AND NOT isNull(loc.phone)>
|
|
<cfset toastBusiness["phone"] = loc.phone>
|
|
</cfif>
|
|
<cfif structKeyExists(loc, "latitude") AND isNumeric(loc.latitude) AND structKeyExists(loc, "longitude") AND isNumeric(loc.longitude)>
|
|
<cfset toastBusiness["latitude"] = loc.latitude>
|
|
<cfset toastBusiness["longitude"] = loc.longitude>
|
|
</cfif>
|
|
</cfif>
|
|
<cfif structKeyExists(restaurant, "brandColor") AND NOT isNull(restaurant.brandColor)>
|
|
<cfset toastBusiness["brandColor"] = replace(restaurant.brandColor, "##", "")>
|
|
</cfif>
|
|
<!--- Extract business hours from schedule --->
|
|
<cfif structKeyExists(restaurant, "schedule") AND isStruct(restaurant.schedule)>
|
|
<cfset sched = restaurant.schedule>
|
|
<cfif structKeyExists(sched, "upcomingSchedules") AND isArray(sched.upcomingSchedules) AND arrayLen(sched.upcomingSchedules) GT 0>
|
|
<!--- Use the first schedule (usually TAKE_OUT) --->
|
|
<cfset upcoming = sched.upcomingSchedules[1]>
|
|
<cfif structKeyExists(upcoming, "dailySchedules") AND isArray(upcoming.dailySchedules)>
|
|
<!--- Build a 7-day hours map: dayOfWeek -> {open, close} --->
|
|
<cfset dayHours = {}>
|
|
<cfloop array="#upcoming.dailySchedules#" index="ds">
|
|
<cfif structKeyExists(ds, "date") AND structKeyExists(ds, "servicePeriods") AND isArray(ds.servicePeriods) AND arrayLen(ds.servicePeriods) GT 0>
|
|
<cfset dsDate = parseDateTime(ds.date)>
|
|
<cfset dow = dayOfWeek(dsDate)><!--- 1=Sun, 2=Mon, ..., 7=Sat --->
|
|
<cfset sp = ds.servicePeriods[1]>
|
|
<cfif structKeyExists(sp, "startTime") AND structKeyExists(sp, "endTime")>
|
|
<cfset dayHours[dow] = { "open": left(sp.startTime, 5), "close": left(sp.endTime, 5) }>
|
|
</cfif>
|
|
</cfif>
|
|
</cfloop>
|
|
<!--- Format as text string: "Mon 7:30am-6:30pm, Tue 7:30am-6:30pm" --->
|
|
<cfset dayNames = ["Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"]>
|
|
<cfset hoursParts = []>
|
|
<!--- Reorder to Mon-Sun (2,3,4,5,6,7,1) --->
|
|
<cfset dayOrder = [2, 3, 4, 5, 6, 7, 1]>
|
|
<cfloop array="#dayOrder#" index="dIdx">
|
|
<cfif structKeyExists(dayHours, dIdx)>
|
|
<cfset dh = dayHours[dIdx]>
|
|
<!--- Convert 24h to 12h format --->
|
|
<cfset openH = val(listFirst(dh.open, ":"))>
|
|
<cfset openM = val(listLast(dh.open, ":"))>
|
|
<cfset closeH = val(listFirst(dh.close, ":"))>
|
|
<cfset closeM = val(listLast(dh.close, ":"))>
|
|
<cfset openAmPm = openH GTE 12 ? "pm" : "am">
|
|
<cfset closeAmPm = closeH GTE 12 ? "pm" : "am">
|
|
<cfif openH GT 12><cfset openH = openH - 12></cfif>
|
|
<cfif openH EQ 0><cfset openH = 12></cfif>
|
|
<cfif closeH GT 12><cfset closeH = closeH - 12></cfif>
|
|
<cfif closeH EQ 0><cfset closeH = 12></cfif>
|
|
<cfset openStr = openH & (openM GT 0 ? ":" & numberFormat(openM, "00") : "") & openAmPm>
|
|
<cfset closeStr = closeH & (closeM GT 0 ? ":" & numberFormat(closeM, "00") : "") & closeAmPm>
|
|
<cfset arrayAppend(hoursParts, dayNames[dIdx] & " " & openStr & "-" & closeStr)>
|
|
</cfif>
|
|
</cfloop>
|
|
<cfif arrayLen(hoursParts) GT 0>
|
|
<cfset toastBusiness["hours"] = arrayToList(hoursParts, ", ")>
|
|
</cfif>
|
|
</cfif>
|
|
</cfif>
|
|
</cfif>
|
|
</cfif>
|
|
</cfloop>
|
|
</cfif>
|
|
|
|
<!--- Also check for Restaurant: keys (older Toast format) --->
|
|
<cfloop collection="#ooState#" item="ooKey">
|
|
<cfif left(ooKey, 11) EQ "Restaurant:" AND NOT structKeyExists(toastBusiness, "name")>
|
|
<cfset restaurant = ooState[ooKey]>
|
|
<cfif structKeyExists(restaurant, "name")>
|
|
<cfset toastBusiness["name"] = restaurant.name>
|
|
</cfif>
|
|
<cfif structKeyExists(restaurant, "location") AND isStruct(restaurant.location)>
|
|
<cfset loc = restaurant.location>
|
|
<cfif structKeyExists(loc, "address1")>
|
|
<cfset toastBusiness["address"] = loc.address1>
|
|
<cfif structKeyExists(loc, "city")><cfset toastBusiness["address"] = toastBusiness.address & ", " & loc.city></cfif>
|
|
<cfif structKeyExists(loc, "state")><cfset toastBusiness["address"] = toastBusiness.address & ", " & loc.state></cfif>
|
|
<cfif structKeyExists(loc, "zipCode")><cfset toastBusiness["address"] = toastBusiness.address & " " & loc.zipCode></cfif>
|
|
</cfif>
|
|
<cfif structKeyExists(loc, "phone")>
|
|
<cfset toastBusiness["phone"] = loc.phone>
|
|
</cfif>
|
|
</cfif>
|
|
<cfif structKeyExists(restaurant, "brandColor")>
|
|
<cfset toastBusiness["brandColor"] = replace(restaurant.brandColor, "##", "")>
|
|
</cfif>
|
|
</cfif>
|
|
|
|
<!--- Extract menu data --->
|
|
<cfif left(ooKey, 5) EQ "Menu:">
|
|
<cfset menu = ooState[ooKey]>
|
|
<cfif structKeyExists(menu, "groups") AND isArray(menu.groups)>
|
|
<!--- Use menu name as parent category if multiple menus --->
|
|
<cfset menuName = structKeyExists(menu, "name") ? menu.name : "">
|
|
<cfif len(menuName)><cfset arrayAppend(menuNames, menuName)></cfif>
|
|
<cfloop array="#menu.groups#" index="group">
|
|
<cfset groupName = structKeyExists(group, "name") ? trim(group.name) : "Menu">
|
|
<cfif NOT structKeyExists(categorySet, groupName)>
|
|
<cfset categorySet[groupName] = true>
|
|
<cfset catObj = { "name": groupName, "itemCount": 0 }>
|
|
<!--- Store menu name for parent category assignment later --->
|
|
<cfset catObj["menuName"] = menuName>
|
|
<cfset arrayAppend(toastCategories, catObj)>
|
|
</cfif>
|
|
|
|
<!--- Extract items from group --->
|
|
<cfif structKeyExists(group, "items") AND isArray(group.items)>
|
|
<cfloop array="#group.items#" index="item">
|
|
<cfif structKeyExists(item, "name") AND len(trim(item.name))>
|
|
<cfset itemStruct = structNew()>
|
|
<cfset itemStruct["id"] = "item_" & itemId>
|
|
<cfset itemStruct["name"] = trim(item.name)>
|
|
<cfset itemStruct["category"] = groupName>
|
|
<cfset itemStruct["modifiers"] = arrayNew(1)>
|
|
<cfset itemStruct["hasModifiers"] = structKeyExists(item, "hasModifiers") AND item.hasModifiers EQ true>
|
|
<cfset itemStruct["guid"] = structKeyExists(item, "guid") ? item.guid : "">
|
|
<cfset itemStruct["itemGroupGuid"] = structKeyExists(item, "itemGroupGuid") ? item.itemGroupGuid : "">
|
|
<cfset itemStruct["description"] = "">
|
|
<cfif structKeyExists(item, "description") AND NOT isNull(item.description)>
|
|
<cfset itemStruct["description"] = trim(toString(item.description))>
|
|
</cfif>
|
|
|
|
<!--- Extract price: Toast uses "prices" array [4.50] or scalar "price" --->
|
|
<cfset itemStruct["price"] = 0>
|
|
<cfif structKeyExists(item, "prices") AND isArray(item.prices) AND arrayLen(item.prices) GT 0 AND isNumeric(item.prices[1])>
|
|
<cfset itemStruct["price"] = val(item.prices[1])>
|
|
<cfelseif structKeyExists(item, "price") AND isNumeric(item.price)>
|
|
<cfset itemStruct["price"] = val(item.price)>
|
|
<cfelseif structKeyExists(item, "unitPrice") AND isNumeric(item.unitPrice)>
|
|
<cfset itemStruct["price"] = val(item.unitPrice)>
|
|
<cfelseif structKeyExists(item, "basePrice") AND isNumeric(item.basePrice)>
|
|
<cfset itemStruct["price"] = val(item.basePrice)>
|
|
<cfelseif structKeyExists(item, "displayPrice") AND len(trim(toString(item.displayPrice)))>
|
|
<cfset priceStr = reReplace(toString(item.displayPrice), "[^0-9.]", "", "all")>
|
|
<cfif len(priceStr) AND isNumeric(priceStr)>
|
|
<cfset itemStruct["price"] = val(priceStr)>
|
|
</cfif>
|
|
</cfif>
|
|
|
|
<!--- Extract image URL --->
|
|
<cfset itemStruct["imageUrl"] = "">
|
|
<cfif structKeyExists(item, "imageUrls") AND NOT isNull(item.imageUrls) AND isStruct(item.imageUrls)>
|
|
<cfif structKeyExists(item.imageUrls, "medium")>
|
|
<cfset itemStruct["imageUrl"] = item.imageUrls.medium>
|
|
<cfelseif structKeyExists(item.imageUrls, "large")>
|
|
<cfset itemStruct["imageUrl"] = item.imageUrls.large>
|
|
<cfelseif structKeyExists(item.imageUrls, "small")>
|
|
<cfset itemStruct["imageUrl"] = item.imageUrls.small>
|
|
</cfif>
|
|
<cfif len(itemStruct.imageUrl)>
|
|
<cfset itemStruct["imageSrc"] = itemStruct.imageUrl>
|
|
<cfset itemStruct["imageFilename"] = listLast(itemStruct.imageUrl, "/")>
|
|
</cfif>
|
|
</cfif>
|
|
|
|
<cfset arrayAppend(toastItems, itemStruct)>
|
|
<cfset itemId++>
|
|
</cfif>
|
|
</cfloop>
|
|
</cfif>
|
|
|
|
<!--- Extract items from subgroups --->
|
|
<cfset subgroupsArr = arrayNew(1)>
|
|
<cfif structKeyExists(group, "subgroups") AND isArray(group.subgroups)>
|
|
<cfset subgroupsArr = group.subgroups>
|
|
<cfelseif structKeyExists(group, "children") AND isArray(group.children)>
|
|
<cfset subgroupsArr = group.children>
|
|
<cfelseif structKeyExists(group, "childGroups") AND isArray(group.childGroups)>
|
|
<cfset subgroupsArr = group.childGroups>
|
|
</cfif>
|
|
<cfloop array="#subgroupsArr#" index="subgroup">
|
|
<cfset subName = structKeyExists(subgroup, "name") ? trim(subgroup.name) : groupName>
|
|
<cfif len(subName) AND NOT structKeyExists(categorySet, subName)>
|
|
<cfset categorySet[subName] = true>
|
|
<cfset arrayAppend(toastCategories, { "name": subName, "parentCategoryName": groupName, "itemCount": 0 })>
|
|
</cfif>
|
|
<cfif structKeyExists(subgroup, "items") AND isArray(subgroup.items)>
|
|
<cfloop array="#subgroup.items#" index="subItem">
|
|
<cfif structKeyExists(subItem, "name") AND len(trim(subItem.name))>
|
|
<cfset itemStruct = structNew()>
|
|
<cfset itemStruct["id"] = "item_" & itemId>
|
|
<cfset itemStruct["name"] = trim(subItem.name)>
|
|
<cfset itemStruct["category"] = subName>
|
|
<cfset itemStruct["modifiers"] = arrayNew(1)>
|
|
<cfset itemStruct["hasModifiers"] = structKeyExists(subItem, "hasModifiers") AND subItem.hasModifiers EQ true>
|
|
<cfset itemStruct["guid"] = structKeyExists(subItem, "guid") ? subItem.guid : "">
|
|
<cfset itemStruct["itemGroupGuid"] = structKeyExists(subItem, "itemGroupGuid") ? subItem.itemGroupGuid : "">
|
|
<cfset itemStruct["description"] = "">
|
|
<cfif structKeyExists(subItem, "description") AND NOT isNull(subItem.description)>
|
|
<cfset itemStruct["description"] = trim(toString(subItem.description))>
|
|
</cfif>
|
|
<cfset itemStruct["price"] = 0>
|
|
<cfif structKeyExists(subItem, "prices") AND isArray(subItem.prices) AND arrayLen(subItem.prices) GT 0 AND isNumeric(subItem.prices[1])>
|
|
<cfset itemStruct["price"] = val(subItem.prices[1])>
|
|
<cfelseif structKeyExists(subItem, "price") AND isNumeric(subItem.price)>
|
|
<cfset itemStruct["price"] = val(subItem.price)>
|
|
<cfelseif structKeyExists(subItem, "unitPrice") AND isNumeric(subItem.unitPrice)>
|
|
<cfset itemStruct["price"] = val(subItem.unitPrice)>
|
|
<cfelseif structKeyExists(subItem, "basePrice") AND isNumeric(subItem.basePrice)>
|
|
<cfset itemStruct["price"] = val(subItem.basePrice)>
|
|
<cfelseif structKeyExists(subItem, "displayPrice") AND len(trim(toString(subItem.displayPrice)))>
|
|
<cfset priceStr = reReplace(toString(subItem.displayPrice), "[^0-9.]", "", "all")>
|
|
<cfif len(priceStr) AND isNumeric(priceStr)>
|
|
<cfset itemStruct["price"] = val(priceStr)>
|
|
</cfif>
|
|
</cfif>
|
|
<cfset itemStruct["imageUrl"] = "">
|
|
<cfif structKeyExists(subItem, "imageUrls") AND NOT isNull(subItem.imageUrls) AND isStruct(subItem.imageUrls)>
|
|
<cfif structKeyExists(subItem.imageUrls, "medium")>
|
|
<cfset itemStruct["imageUrl"] = subItem.imageUrls.medium>
|
|
<cfelseif structKeyExists(subItem.imageUrls, "large")>
|
|
<cfset itemStruct["imageUrl"] = subItem.imageUrls.large>
|
|
<cfelseif structKeyExists(subItem.imageUrls, "small")>
|
|
<cfset itemStruct["imageUrl"] = subItem.imageUrls.small>
|
|
</cfif>
|
|
<cfif len(itemStruct.imageUrl)>
|
|
<cfset itemStruct["imageSrc"] = itemStruct.imageUrl>
|
|
<cfset itemStruct["imageFilename"] = listLast(itemStruct.imageUrl, "/")>
|
|
</cfif>
|
|
</cfif>
|
|
<cfset arrayAppend(toastItems, itemStruct)>
|
|
<cfset itemId++>
|
|
</cfif>
|
|
</cfloop>
|
|
</cfif>
|
|
</cfloop>
|
|
</cfloop>
|
|
</cfif>
|
|
</cfif>
|
|
</cfloop>
|
|
|
|
<!--- Fallback: get business name from title tag if not found in OO_STATE --->
|
|
<cfif NOT structKeyExists(toastBusiness, "name") OR NOT len(toastBusiness.name)>
|
|
<cfset titleMatch = reMatchNoCase('<title[^>]*>([^<]+)</title>', pageHtml)>
|
|
<cfif arrayLen(titleMatch)>
|
|
<cfset titleText = reReplaceNoCase(titleMatch[1], '.*<title[^>]*>([^<]+)</title>.*', '\1')>
|
|
<cfset titleText = trim(titleText)>
|
|
<cfif findNoCase("|", titleText)>
|
|
<cfset titleText = trim(listFirst(titleText, "|"))>
|
|
</cfif>
|
|
<cfset titleText = reReplaceNoCase(titleText, "\s*-\s*(Menu|Order|Online).*$", "")>
|
|
<cfif len(titleText)>
|
|
<cfset toastBusiness["name"] = titleText>
|
|
</cfif>
|
|
</cfif>
|
|
</cfif>
|
|
|
|
<!--- Clean business name: strip address if it was embedded in the name --->
|
|
<cfif structKeyExists(toastBusiness, "name") AND structKeyExists(toastBusiness, "address")>
|
|
<cfset bizAddr1 = listFirst(toastBusiness.address, ",")>
|
|
<cfif len(bizAddr1) AND findNoCase(bizAddr1, toastBusiness.name)>
|
|
<cfset toastBusiness["name"] = trim(replaceNoCase(toastBusiness.name, bizAddr1, ""))>
|
|
</cfif>
|
|
<!--- Also strip leading/trailing dashes or pipes left over --->
|
|
<cfset toastBusiness["name"] = trim(reReplace(toastBusiness.name, "[\-\|]+$", ""))>
|
|
<cfset toastBusiness["name"] = trim(reReplace(toastBusiness.name, "^[\-\|]+", ""))>
|
|
</cfif>
|
|
|
|
<!--- Build parent/child category hierarchy if multiple menus --->
|
|
<cfif arrayLen(menuNames) GT 1>
|
|
<cfset hierarchicalCategories = arrayNew(1)>
|
|
<cfloop array="#menuNames#" index="mn">
|
|
<!--- Add parent category (the menu name) --->
|
|
<cfset parentCat = { "name": mn, "itemCount": 0 }>
|
|
<cfset arrayAppend(hierarchicalCategories, parentCat)>
|
|
<!--- Add subcategories under this parent --->
|
|
<cfloop array="#toastCategories#" index="tc">
|
|
<cfif structKeyExists(tc, "menuName") AND tc.menuName EQ mn>
|
|
<cfset tc["parentCategoryName"] = mn>
|
|
<cfset arrayAppend(hierarchicalCategories, tc)>
|
|
</cfif>
|
|
</cfloop>
|
|
</cfloop>
|
|
<cfset toastCategories = hierarchicalCategories>
|
|
</cfif>
|
|
|
|
<!--- Update category item counts --->
|
|
<cfloop from="1" to="#arrayLen(toastCategories)#" index="ci">
|
|
<cfset catName = toastCategories[ci].name>
|
|
<cfset count = 0>
|
|
<cfloop array="#toastItems#" index="ti">
|
|
<cfif ti.category EQ catName><cfset count++></cfif>
|
|
</cfloop>
|
|
<cfset toastCategories[ci]["itemCount"] = count>
|
|
</cfloop>
|
|
|
|
<cfset arrayAppend(response.steps, "Extracted " & arrayLen(toastItems) & " items from " & arrayLen(toastCategories) & " categories via __OO_STATE__")>
|
|
|
|
<!--- Extract Toast modifiers via Playwright if items have modifiers --->
|
|
<cfset toastModifiers = arrayNew(1)>
|
|
<cfset modifierItemCount = 0>
|
|
<cfloop array="#toastItems#" index="ti">
|
|
<cfif structKeyExists(ti, "hasModifiers") AND ti.hasModifiers>
|
|
<cfset modifierItemCount++>
|
|
</cfif>
|
|
</cfloop>
|
|
|
|
<cfif modifierItemCount GT 0>
|
|
<cfset arrayAppend(response.steps, modifierItemCount & " items have modifiers - extracting via Playwright")>
|
|
<cftry>
|
|
<!--- Determine Toast URL for Playwright --->
|
|
<cfset toastUrl = "">
|
|
<cfif isDefined("targetUrl") AND reFindNoCase("toasttab\.com", targetUrl)>
|
|
<!--- URL mode: use original URL --->
|
|
<cfset toastUrl = targetUrl>
|
|
<cfelse>
|
|
<!--- Saved HTML mode: extract slug from HTML --->
|
|
<!--- Try __APOLLO_STATE__ shortUrl first --->
|
|
<cfset slugMatch = reMatchNoCase('"shortUrl"\s*:\s*"([^"]+)"', pageHtml)>
|
|
<cfif arrayLen(slugMatch)>
|
|
<cfset slug = reReplaceNoCase(slugMatch[1], '.*"shortUrl"\s*:\s*"([^"]+)".*', '\1')>
|
|
<cfset toastUrl = "https://www.toasttab.com/local/order/" & slug>
|
|
</cfif>
|
|
<!--- Try gift card URL pattern --->
|
|
<cfif NOT len(toastUrl)>
|
|
<cfset giftMatch = reMatchNoCase('toasttab\.com/([a-zA-Z0-9_-]+)/giftcards', pageHtml)>
|
|
<cfif arrayLen(giftMatch)>
|
|
<cfset slug = reReplaceNoCase(giftMatch[1], '.*toasttab\.com/([a-zA-Z0-9_-]+)/giftcards.*', '\1')>
|
|
<cfset toastUrl = "https://www.toasttab.com/local/order/" & slug>
|
|
</cfif>
|
|
</cfif>
|
|
</cfif>
|
|
|
|
<cfif len(toastUrl)>
|
|
<cfset arrayAppend(response.steps, "Fetching modifiers from: " & toastUrl)>
|
|
<cfset modOutput = "">
|
|
<cfexecute name="/opt/playwright/run-toast-modifiers.sh" arguments="'#toastUrl#'" timeout="180" variable="modOutput" />
|
|
|
|
<cfif len(trim(modOutput))>
|
|
<cfset modResult = deserializeJSON(modOutput)>
|
|
|
|
<!--- Extract modifiers --->
|
|
<cfif structKeyExists(modResult, "modifiers") AND isArray(modResult.modifiers)>
|
|
<cfset toastModifiers = modResult.modifiers>
|
|
<cfset arrayAppend(response.steps, "Extracted " & arrayLen(toastModifiers) & " unique modifier groups")>
|
|
</cfif>
|
|
|
|
<!--- Map modifiers to items --->
|
|
<cfif structKeyExists(modResult, "itemModifierMap") AND isStruct(modResult.itemModifierMap)>
|
|
<cfset modMap = modResult.itemModifierMap>
|
|
<cfloop from="1" to="#arrayLen(toastItems)#" index="mi">
|
|
<cfif structKeyExists(modMap, toastItems[mi].name)>
|
|
<cfset toastItems[mi]["modifiers"] = modMap[toastItems[mi].name]>
|
|
</cfif>
|
|
</cfloop>
|
|
<cfset arrayAppend(response.steps, "Mapped modifiers to " & structCount(modMap) & " items")>
|
|
</cfif>
|
|
|
|
<!--- Log stats --->
|
|
<cfif structKeyExists(modResult, "stats") AND isStruct(modResult.stats)>
|
|
<cfset arrayAppend(response.steps, "Modifier stats: " & serializeJSON(modResult.stats))>
|
|
</cfif>
|
|
<cfelse>
|
|
<cfset arrayAppend(response.steps, "Playwright modifier script returned empty output")>
|
|
</cfif>
|
|
<cfelse>
|
|
<cfset arrayAppend(response.steps, "Could not determine Toast URL for modifier extraction")>
|
|
</cfif>
|
|
<cfcatch>
|
|
<cfset arrayAppend(response.steps, "Modifier extraction failed: " & cfcatch.message & " - continuing without modifiers")>
|
|
</cfcatch>
|
|
</cftry>
|
|
</cfif>
|
|
|
|
<!--- Build and return response directly - skip Claude --->
|
|
<cfif arrayLen(toastItems) GT 0>
|
|
<cfset menuData = structNew()>
|
|
<cfset menuData["business"] = toastBusiness>
|
|
<cfset menuData["categories"] = toastCategories>
|
|
<cfset menuData["items"] = toastItems>
|
|
<cfset menuData["modifiers"] = toastModifiers>
|
|
<cfset menuData["imageUrls"] = arrayNew(1)>
|
|
<cfset menuData["imageMappings"] = imageMappings>
|
|
<cfset menuData["headerCandidateIndices"] = arrayNew(1)>
|
|
|
|
<cfset response["OK"] = true>
|
|
<cfset response["DATA"] = menuData>
|
|
<cfset response["sourceUrl"] = isDefined("targetUrl") ? targetUrl : "uploaded">
|
|
<cfset response["pagesProcessed"] = 1>
|
|
<cfset response["imagesFound"] = arrayLen(imageDataArray)>
|
|
<cfset response["playwrightImagesCount"] = arrayLen(playwrightImages)>
|
|
<cfset response["parsedVia"] = "toast_oo_state">
|
|
<cfcontent type="application/json" reset="true">
|
|
<cfoutput>#serializeJSON(response)#</cfoutput>
|
|
<cfabort>
|
|
</cfif>
|
|
</cfif>
|
|
<cfcatch>
|
|
<cfset toastError = "Toast __OO_STATE__ parsing failed: " & cfcatch.message>
|
|
<cfif len(cfcatch.detail)><cfset toastError = toastError & " | Detail: " & cfcatch.detail></cfif>
|
|
<cfif structKeyExists(cfcatch, "tagContext") AND isArray(cfcatch.tagContext) AND arrayLen(cfcatch.tagContext) GT 0>
|
|
<cfset toastError = toastError & " | Line: " & cfcatch.tagContext[1].line & " in " & listLast(cfcatch.tagContext[1].template, "/")>
|
|
</cfif>
|
|
<cfset arrayAppend(response.steps, toastError & " - falling back to Claude")>
|
|
<cfset response["DEBUG_TOAST_ERROR"] = toastError>
|
|
</cfcatch>
|
|
</cftry>
|
|
</cfif>
|
|
|
|
<!--- ============================================================ --->
|
|
<!--- UBER EATS FAST PATH: Parse JSON-LD structured menu data --->
|
|
<!--- ============================================================ --->
|
|
<cfif findNoCase("ubereats.com", pageHtml) OR findNoCase("uber.com/store", pageHtml)>
|
|
<cfset arrayAppend(response.steps, "Uber Eats page detected - looking for JSON-LD menu data")>
|
|
<cftry>
|
|
<!--- Extract all JSON-LD blocks --->
|
|
<cfset jsonLdBlocks = reMatchNoCase('<script[^>]*type\s*=\s*["'']application/ld\+json["''][^>]*>([\s\S]*?)</script>', pageHtml)>
|
|
<cfset arrayAppend(response.steps, "Found " & arrayLen(jsonLdBlocks) & " JSON-LD blocks")>
|
|
|
|
<cfset ueRestaurant = "">
|
|
<cfloop array="#jsonLdBlocks#" index="ldBlock">
|
|
<!--- Extract content between script tags --->
|
|
<cfset ldContent = reReplaceNoCase(ldBlock, '<script[^>]*>([\s\S]*?)</script>', '\1')>
|
|
<cfset ldContent = trim(ldContent)>
|
|
<cfif len(ldContent)>
|
|
<cftry>
|
|
<!--- Unescape unicode \u002F etc --->
|
|
<cfset ldContent = replace(ldContent, '\u002F', '/', 'all')>
|
|
<cfset ldContent = replace(ldContent, '\u0026', '&', 'all')>
|
|
<cfset ldContent = replace(ldContent, '\u0022', '"', 'all')>
|
|
<cfset ldContent = replace(ldContent, '\u0027', "'", 'all')>
|
|
<cfset ldParsed = deserializeJSON(ldContent)>
|
|
<cfif isStruct(ldParsed) AND structKeyExists(ldParsed, "@type") AND ldParsed["@type"] EQ "Restaurant" AND structKeyExists(ldParsed, "hasMenu")>
|
|
<cfset ueRestaurant = ldParsed>
|
|
</cfif>
|
|
<cfcatch><!--- skip unparseable blocks ---></cfcatch>
|
|
</cftry>
|
|
</cfif>
|
|
</cfloop>
|
|
|
|
<cfif isStruct(ueRestaurant) AND structKeyExists(ueRestaurant, "hasMenu")>
|
|
<cfset arrayAppend(response.steps, "Found Restaurant JSON-LD with menu data")>
|
|
|
|
<!--- Parse business info --->
|
|
<cfset ueBusiness = structNew()>
|
|
<cfset ueBusiness["name"] = structKeyExists(ueRestaurant, "name") ? ueRestaurant.name : "">
|
|
<!--- Unescape HTML entities in name --->
|
|
<cfset ueBusiness["name"] = replace(ueBusiness.name, "&", "&", "all")>
|
|
<cfset ueBusiness["name"] = replace(ueBusiness.name, "<", "<", "all")>
|
|
<cfset ueBusiness["name"] = replace(ueBusiness.name, ">", ">", "all")>
|
|
<cfset ueBusiness["name"] = replace(ueBusiness.name, "&##39;", "'", "all")>
|
|
<cfset ueBusiness["name"] = replace(ueBusiness.name, "'", "'", "all")>
|
|
|
|
<cfif structKeyExists(ueRestaurant, "address") AND isStruct(ueRestaurant.address)>
|
|
<cfset ueAddr = ueRestaurant.address>
|
|
<cfif structKeyExists(ueAddr, "streetAddress")><cfset ueBusiness["addressLine1"] = ueAddr.streetAddress></cfif>
|
|
<cfif structKeyExists(ueAddr, "addressLocality")><cfset ueBusiness["city"] = ueAddr.addressLocality></cfif>
|
|
<cfif structKeyExists(ueAddr, "addressRegion")><cfset ueBusiness["state"] = ueAddr.addressRegion></cfif>
|
|
<cfif structKeyExists(ueAddr, "postalCode")><cfset ueBusiness["zip"] = ueAddr.postalCode></cfif>
|
|
</cfif>
|
|
|
|
<!--- Parse menu sections --->
|
|
<cfset ueMenu = ueRestaurant.hasMenu>
|
|
<cfset ueCategories = []>
|
|
<cfset ueItems = []>
|
|
<cfset ueImageMappings = structNew()>
|
|
<cfset ueItemId = 1>
|
|
|
|
<cfif structKeyExists(ueMenu, "hasMenuSection") AND isArray(ueMenu.hasMenuSection)>
|
|
<cfloop array="#ueMenu.hasMenuSection#" index="ueSection">
|
|
<cfset ueCatName = structKeyExists(ueSection, "name") ? trim(ueSection.name) : "Menu">
|
|
<!--- Unescape HTML entities --->
|
|
<cfset ueCatName = replace(ueCatName, "&", "&", "all")>
|
|
<cfset ueCatName = replace(ueCatName, "&##39;", "'", "all")>
|
|
<cfset ueCatName = replace(ueCatName, "'", "'", "all")>
|
|
<cfset ueSectionItemCount = 0>
|
|
|
|
<cfif structKeyExists(ueSection, "hasMenuItem") AND isArray(ueSection.hasMenuItem)>
|
|
<cfloop array="#ueSection.hasMenuItem#" index="ueMenuItem">
|
|
<cfset ueItemName = structKeyExists(ueMenuItem, "name") ? trim(ueMenuItem.name) : "">
|
|
<cfif NOT len(ueItemName)><cfcontinue></cfif>
|
|
|
|
<!--- Unescape HTML entities in name and description --->
|
|
<cfset ueItemName = replace(ueItemName, "&", "&", "all")>
|
|
<cfset ueItemName = replace(ueItemName, "&##39;", "'", "all")>
|
|
<cfset ueItemName = replace(ueItemName, "'", "'", "all")>
|
|
<cfset ueItemName = replace(ueItemName, "<", "<", "all")>
|
|
<cfset ueItemName = replace(ueItemName, ">", ">", "all")>
|
|
|
|
<cfset ueItemDesc = structKeyExists(ueMenuItem, "description") ? trim(ueMenuItem.description) : "">
|
|
<cfset ueItemDesc = replace(ueItemDesc, "&", "&", "all")>
|
|
<cfset ueItemDesc = replace(ueItemDesc, "&##39;", "'", "all")>
|
|
<cfset ueItemDesc = replace(ueItemDesc, "'", "'", "all")>
|
|
|
|
<!--- Extract price from offers --->
|
|
<cfset uePrice = 0>
|
|
<cfif structKeyExists(ueMenuItem, "offers") AND isStruct(ueMenuItem.offers)>
|
|
<cfif structKeyExists(ueMenuItem.offers, "price")>
|
|
<cfset uePrice = val(ueMenuItem.offers.price)>
|
|
</cfif>
|
|
</cfif>
|
|
|
|
<!--- Extract image if available --->
|
|
<cfset ueItemImage = "">
|
|
<cfif structKeyExists(ueMenuItem, "image") AND len(trim(ueMenuItem.image))>
|
|
<cfset ueItemImage = trim(ueMenuItem.image)>
|
|
<cfset ueImageMappings[ueItemName] = ueItemImage>
|
|
</cfif>
|
|
|
|
<cfset arrayAppend(ueItems, {
|
|
"id": "item_" & ueItemId,
|
|
"name": ueItemName,
|
|
"price": uePrice,
|
|
"description": ueItemDesc,
|
|
"category": ueCatName,
|
|
"modifiers": [],
|
|
"imageUrl": ueItemImage
|
|
})>
|
|
<cfset ueItemId = ueItemId + 1>
|
|
<cfset ueSectionItemCount = ueSectionItemCount + 1>
|
|
</cfloop>
|
|
</cfif>
|
|
|
|
<cfif ueSectionItemCount GT 0>
|
|
<cfset arrayAppend(ueCategories, { "name": ueCatName, "itemCount": ueSectionItemCount })>
|
|
</cfif>
|
|
</cfloop>
|
|
</cfif>
|
|
|
|
<cfset arrayAppend(response.steps, "Parsed " & arrayLen(ueItems) & " items in " & arrayLen(ueCategories) & " categories from Uber Eats JSON-LD")>
|
|
|
|
<cfif arrayLen(ueItems) GT 0>
|
|
<!--- Try to get restaurant header image from JSON-LD images --->
|
|
<cfset ueHeaderImage = "">
|
|
<cfif structKeyExists(ueRestaurant, "image") AND isArray(ueRestaurant.image) AND arrayLen(ueRestaurant.image) GT 0>
|
|
<cfset ueHeaderImage = ueRestaurant.image[1]>
|
|
</cfif>
|
|
|
|
<cfset menuData = {
|
|
"business": ueBusiness,
|
|
"categories": ueCategories,
|
|
"items": ueItems,
|
|
"modifiers": [],
|
|
"imageUrls": [],
|
|
"imageMappings": ueImageMappings,
|
|
"headerCandidateIndices": [],
|
|
"headerImage": ueHeaderImage
|
|
}>
|
|
|
|
<cfset response["OK"] = true>
|
|
<cfset response["DATA"] = menuData>
|
|
<cfset response["sourceUrl"] = len(targetUrl) ? targetUrl : "ubereats-upload">
|
|
<cfset response["parsedVia"] = "ubereats_jsonld">
|
|
<cfset response["pagesProcessed"] = 1>
|
|
<cfset response["imagesFound"] = structCount(ueImageMappings)>
|
|
<cfcontent type="application/json" reset="true">
|
|
<cfoutput>#serializeJSON(response)#</cfoutput>
|
|
<cfabort>
|
|
</cfif>
|
|
<cfelse>
|
|
<cfset arrayAppend(response.steps, "No Restaurant JSON-LD with menu found - falling through")>
|
|
</cfif>
|
|
<cfcatch>
|
|
<cfset ueError = "Uber Eats JSON-LD parsing failed: " & cfcatch.message>
|
|
<cfif len(cfcatch.detail)><cfset ueError = ueError & " | Detail: " & cfcatch.detail></cfif>
|
|
<cfset arrayAppend(response.steps, ueError & " - falling back to Claude")>
|
|
</cfcatch>
|
|
</cftry>
|
|
</cfif>
|
|
<!--- ========== END UBER EATS FAST PATH ========== --->
|
|
|
|
<!--- Look for embedded JSON data (Next.js __NEXT_DATA__, Toast state, etc.) --->
|
|
<cfset embeddedJsonData = "">
|
|
<cfset embeddedMenuItems = arrayNew(1)>
|
|
<cfloop array="#menuPages#" index="menuPage">
|
|
<!--- Look for __NEXT_DATA__ (Next.js apps) --->
|
|
<cfset nextDataMatch = reMatchNoCase('<script[^>]*id=["'']__NEXT_DATA__["''][^>]*>([^<]+)</script>', menuPage.html)>
|
|
<cfif arrayLen(nextDataMatch)>
|
|
<cfset scriptContent = reReplaceNoCase(nextDataMatch[1], '<script[^>]*>([^<]+)</script>', '\1')>
|
|
<cfset embeddedJsonData = embeddedJsonData & chr(10) & "--- __NEXT_DATA__ ---" & chr(10) & scriptContent>
|
|
</cfif>
|
|
|
|
<!--- Look for window.__INITIAL_STATE__ or similar patterns --->
|
|
<cfset stateMatches = reMatchNoCase('window\.__[A-Z_]+__\s*=\s*(\{[^;]+\});', menuPage.html)>
|
|
<cfloop array="#stateMatches#" index="stateMatch">
|
|
<cfset embeddedJsonData = embeddedJsonData & chr(10) & "--- WINDOW_STATE ---" & chr(10) & stateMatch>
|
|
</cfloop>
|
|
|
|
<!--- Look for data-props or data-page attributes with JSON --->
|
|
<cfset dataPropsMatches = reMatchNoCase('data-(?:props|page|state)=["''](\{[^"'']+\})["'']', menuPage.html)>
|
|
<cfloop array="#dataPropsMatches#" index="propsMatch">
|
|
<cfset embeddedJsonData = embeddedJsonData & chr(10) & "--- DATA_PROPS ---" & chr(10) & propsMatch>
|
|
</cfloop>
|
|
|
|
<!--- Look for JSON-LD structured data (schema.org Menu) --->
|
|
<cfset jsonLdMatches = reMatchNoCase('<script[^>]*type=["'']application/ld\+json["''][^>]*>([^<]+)</script>', menuPage.html)>
|
|
<cfloop array="#jsonLdMatches#" index="jsonLdMatch">
|
|
<cfset scriptContent = reReplaceNoCase(jsonLdMatch, '<script[^>]*>([^<]+)</script>', '\1')>
|
|
<cfif findNoCase("menu", scriptContent) OR findNoCase("MenuItem", scriptContent)>
|
|
<cfset embeddedJsonData = embeddedJsonData & chr(10) & "--- JSON_LD_MENU ---" & chr(10) & scriptContent>
|
|
</cfif>
|
|
</cfloop>
|
|
</cfloop>
|
|
|
|
<cfif len(embeddedJsonData)>
|
|
<cfset response["DEBUG_EMBEDDED_JSON_FOUND"] = true>
|
|
<cfset response["DEBUG_EMBEDDED_JSON_LENGTH"] = len(embeddedJsonData)>
|
|
<cfset response["DEBUG_EMBEDDED_JSON_PREVIEW"] = left(embeddedJsonData, 2000)>
|
|
<cfelse>
|
|
<cfset response["DEBUG_EMBEDDED_JSON_FOUND"] = false>
|
|
</cfif>
|
|
|
|
<!--- Combine all page HTML into one text block --->
|
|
<cfset combinedHtml = "">
|
|
<cfloop array="#menuPages#" index="menuPage">
|
|
<!--- Strip scripts, styles, and extract text content --->
|
|
<cfset cleanHtml = menuPage.html>
|
|
<cfset cleanHtml = reReplaceNoCase(cleanHtml, "<script[^>]*>.*?</script>", "", "all")>
|
|
<cfset cleanHtml = reReplaceNoCase(cleanHtml, "<style[^>]*>.*?</style>", "", "all")>
|
|
<cfset cleanHtml = reReplaceNoCase(cleanHtml, "<!--.*?-->", "", "all")>
|
|
<cfset combinedHtml = combinedHtml & chr(10) & "--- PAGE: " & menuPage.url & " ---" & chr(10) & cleanHtml>
|
|
</cfloop>
|
|
|
|
<!--- If we found embedded JSON, append it to help Claude find all menu items --->
|
|
<cfif len(embeddedJsonData)>
|
|
<cfset combinedHtml = combinedHtml & chr(10) & chr(10) & "=== EMBEDDED JSON DATA (may contain full menu) ===" & chr(10) & embeddedJsonData>
|
|
</cfif>
|
|
|
|
<!--- Limit HTML size for Claude --->
|
|
<cfif len(combinedHtml) GT 100000>
|
|
<cfset combinedHtml = left(combinedHtml, 100000)>
|
|
</cfif>
|
|
|
|
<!--- Debug: extract h3 and h4 tags from HTML on server side --->
|
|
<cfset h3Tags = reMatchNoCase("<h3[^>]*>([^<]*)</h3>", combinedHtml)>
|
|
<cfset h3Texts = arrayNew(1)>
|
|
<cfloop array="#h3Tags#" index="h3Tag">
|
|
<cfset h3Text = reReplaceNoCase(h3Tag, "<h3[^>]*>([^<]*)</h3>", "\1")>
|
|
<cfset h3Text = trim(h3Text)>
|
|
<cfif len(h3Text)>
|
|
<cfset arrayAppend(h3Texts, h3Text)>
|
|
</cfif>
|
|
</cfloop>
|
|
<cfset response["DEBUG_H3_TAGS"] = h3Texts>
|
|
|
|
<cfset h4Tags = reMatchNoCase("<h4[^>]*>([^<]*)</h4>", combinedHtml)>
|
|
<cfset h4Texts = arrayNew(1)>
|
|
<cfloop array="#h4Tags#" index="h4Tag">
|
|
<cfset h4Text = reReplaceNoCase(h4Tag, "<h4[^>]*>([^<]*)</h4>", "\1")>
|
|
<cfset h4Text = trim(h4Text)>
|
|
<cfif len(h4Text)>
|
|
<cfset arrayAppend(h4Texts, h4Text)>
|
|
</cfif>
|
|
</cfloop>
|
|
<cfset response["DEBUG_H4_TAGS"] = h4Texts>
|
|
|
|
<!--- Debug: find all heading tags --->
|
|
<cfset h2Tags = reMatchNoCase("<h2[^>]*>([^<]*)</h2>", combinedHtml)>
|
|
<cfset h5Tags = reMatchNoCase("<h5[^>]*>([^<]*)</h5>", combinedHtml)>
|
|
<cfset h6Tags = reMatchNoCase("<h6[^>]*>([^<]*)</h6>", combinedHtml)>
|
|
<cfset response["DEBUG_H2_COUNT"] = arrayLen(h2Tags)>
|
|
<cfset response["DEBUG_H5_COUNT"] = arrayLen(h5Tags)>
|
|
<cfset response["DEBUG_H6_COUNT"] = arrayLen(h6Tags)>
|
|
|
|
<!--- Show first 1000 chars of HTML around "Beverages" to see structure --->
|
|
<cfset bevPos = findNoCase("Beverages", combinedHtml)>
|
|
<cfif bevPos GT 0>
|
|
<cfset bevStart = max(1, bevPos - 100)>
|
|
<cfset bevEnd = min(len(combinedHtml), bevPos + 900)>
|
|
<cfset response["DEBUG_BEVERAGES_HTML"] = mid(combinedHtml, bevStart, bevEnd - bevStart)>
|
|
</cfif>
|
|
<cfset arrayAppend(response.steps, "Found " & arrayLen(h3Texts) & " h3 and " & arrayLen(h4Texts) & " h4 tags")>
|
|
|
|
<!--- Server-side heading hierarchy detection from HTML h2/h3 structure --->
|
|
<cfset headingHierarchy = structNew()>
|
|
<cfset hierarchyDesc = "">
|
|
<cfset scanPos = 1>
|
|
<cfset currentH2 = "">
|
|
<cfloop condition="scanPos LT len(combinedHtml)">
|
|
<cfset nextH2 = reFindNoCase("<h2[^>]*>", combinedHtml, scanPos)>
|
|
<cfset nextH3 = reFindNoCase("<h3[^>]*>", combinedHtml, scanPos)>
|
|
|
|
<cfif nextH2 EQ 0 AND nextH3 EQ 0><cfbreak></cfif>
|
|
|
|
<cfif nextH2 GT 0 AND (nextH3 EQ 0 OR nextH2 LT nextH3)>
|
|
<!--- h2 found first --->
|
|
<cfset closePos = findNoCase("</h2>", combinedHtml, nextH2)>
|
|
<cfif closePos EQ 0><cfbreak></cfif>
|
|
<cfset tagContent = mid(combinedHtml, nextH2, closePos + 5 - nextH2)>
|
|
<cfset h2Raw = reReplaceNoCase(tagContent, "<[^>]+>", "", "all")>
|
|
<cfset h2Raw = trim(h2Raw)>
|
|
<!--- Clean: strip decorative dashes --->
|
|
<cfset h2Clean = reReplace(h2Raw, "[^a-zA-Z0-9 ]", "", "all")>
|
|
<cfset h2Clean = trim(h2Clean)>
|
|
<!--- Skip non-category h2s --->
|
|
<cfif len(h2Clean) AND h2Clean NEQ "MENU" AND NOT findNoCase("copyright", h2Clean)>
|
|
<cfset currentH2 = h2Raw>
|
|
<cfelse>
|
|
<cfset currentH2 = "">
|
|
</cfif>
|
|
<cfset scanPos = closePos + 5>
|
|
<cfelse>
|
|
<!--- h3 found first --->
|
|
<cfset closePos = findNoCase("</h3>", combinedHtml, nextH3)>
|
|
<cfif closePos EQ 0><cfbreak></cfif>
|
|
<cfset tagContent = mid(combinedHtml, nextH3, closePos + 5 - nextH3)>
|
|
<cfset h3Text = reReplaceNoCase(tagContent, "<[^>]+>", "", "all")>
|
|
<cfset h3Text = trim(h3Text)>
|
|
<cfif len(currentH2) AND len(h3Text)>
|
|
<cfif NOT structKeyExists(headingHierarchy, currentH2)>
|
|
<cfset headingHierarchy[currentH2] = arrayNew(1)>
|
|
</cfif>
|
|
<cfset arrayAppend(headingHierarchy[currentH2], h3Text)>
|
|
</cfif>
|
|
<cfset scanPos = closePos + 5>
|
|
</cfif>
|
|
</cfloop>
|
|
|
|
<cfif structCount(headingHierarchy) GT 0>
|
|
<cfloop collection="#headingHierarchy#" item="hParent">
|
|
<cfset hierarchyDesc = hierarchyDesc & "- """ & hParent & """ contains subsections: " & arrayToList(headingHierarchy[hParent], ", ") & chr(10)>
|
|
</cfloop>
|
|
<cfset response["DEBUG_HEADING_HIERARCHY"] = headingHierarchy>
|
|
<cfset arrayAppend(response.steps, "Detected " & structCount(headingHierarchy) & " parent categories with subcategories from h2/h3 structure")>
|
|
</cfif>
|
|
|
|
<!--- System prompt for URL analysis --->
|
|
<cfset systemPrompt = "You are an expert at extracting structured menu data from restaurant website HTML. Extract ALL menu data visible in the HTML. Return valid JSON with these keys: business (object with name, address, phone, hours, brandColor), categories (array), modifiers (array), items (array with name, description, price, category, modifiers array, and imageUrl). CATEGORIES vs ITEMS (CRITICAL): A CATEGORY is a broad section heading that groups multiple items (e.g., 'Appetizers', 'Tacos', 'Drinks', 'Desserts'). An ITEM is an individual food or drink product with a name, description, and price. Do NOT create a category for each individual item. A typical restaurant has 5-15 categories and 30-150 items. If you find yourself creating more categories than items, you are wrong - those are items, not categories. Each item must have a 'category' field set to the category it belongs to. CATEGORIES FORMAT: Each entry in the categories array can be either a simple string (for flat categories) OR an object with 'name' and optional 'subcategories' array. Example: [""Appetizers"", {""name"": ""Drinks"", ""subcategories"": [""Hot Drinks"", ""Cold Drinks""]}, ""Desserts""]. SUBCATEGORY DETECTION: If a section header contains nested titled sections beneath it (sub-headers with their own items), the outer section is the PARENT and inner sections are SUBCATEGORIES. For items in subcategories, set their 'category' field to the SUBCATEGORY name (not the parent). CRITICAL FOR IMAGES: Each menu item in the HTML is typically in a container (div, li, article) that also contains an img tag. Extract the img src URL and include it as 'imageUrl' for that item. Look for img tags that are siblings or children within the same menu-item container. The image URL should be the full or relative src value from the img tag - NOT the alt text. CRITICAL: Extract EVERY menu item from ALL sources including embedded JSON (__NEXT_DATA__, window state, JSON-LD). For brandColor: suggest a vibrant hex (6 digits, no hash). For prices: numbers (e.g., 12.99). Return ONLY valid JSON.">
|
|
|
|
<!--- Build message content --->
|
|
<cfset messagesContent = arrayNew(1)>
|
|
|
|
<!--- Add images first (up to 10 for analysis) --->
|
|
<cfset imgLimit = min(arrayLen(imageDataArray), 10)>
|
|
<cfloop from="1" to="#imgLimit#" index="i">
|
|
<cfset imgData = imageDataArray[i]>
|
|
<cfset imgContent = structNew()>
|
|
<cfset imgContent["type"] = "image">
|
|
<cfset imgContent["source"] = imgData.source>
|
|
<cfset arrayAppend(messagesContent, imgContent)>
|
|
</cfloop>
|
|
|
|
<!--- Add HTML text --->
|
|
<cfset textBlock = structNew()>
|
|
<cfset textBlock["type"] = "text">
|
|
<cfset userText = "Extract menu data from this restaurant website HTML. The images above are from the same website - identify which ones are food photos that could be used as item images, and which could be header/banner images.">
|
|
<!--- Append heading hierarchy hint if detected --->
|
|
<cfif len(hierarchyDesc)>
|
|
<cfset userText = userText & chr(10) & chr(10) & "IMPORTANT - DETECTED SECTION HIERARCHY FROM HTML HEADINGS:" & chr(10) & "The following h2 sections contain h3 sub-sections. Use these as parent-subcategory relationships in your categories output:" & chr(10) & hierarchyDesc & "For each parent above, include it in the categories array as an OBJECT with 'name' and 'subcategories' array. Items belonging to a subsection should have their 'category' field set to the SUBCATEGORY name (not the parent).">
|
|
</cfif>
|
|
<cfset userText = userText & chr(10) & chr(10) & "Here is the HTML content:" & chr(10) & chr(10) & combinedHtml>
|
|
<cfset textBlock["text"] = userText>
|
|
<cfset arrayAppend(messagesContent, textBlock)>
|
|
|
|
<cfset userMessage = structNew()>
|
|
<cfset userMessage["role"] = "user">
|
|
<cfset userMessage["content"] = messagesContent>
|
|
|
|
<cfset requestBody = structNew()>
|
|
<cfset requestBody["model"] = "claude-sonnet-4-20250514">
|
|
<cfset requestBody["max_tokens"] = 16384>
|
|
<cfset requestBody["temperature"] = 0>
|
|
<cfset requestBody["system"] = systemPrompt>
|
|
<cfset requestBody["messages"] = arrayNew(1)>
|
|
<cfset arrayAppend(requestBody["messages"], userMessage)>
|
|
|
|
<cfset arrayAppend(response.steps, "Sending to Claude API...")>
|
|
|
|
<!--- Call Claude API --->
|
|
<cfhttp url="https://api.anthropic.com/v1/messages" method="POST" timeout="120" result="httpResult">
|
|
<cfhttpparam type="header" name="Content-Type" value="application/json">
|
|
<cfhttpparam type="header" name="x-api-key" value="#CLAUDE_API_KEY#">
|
|
<cfhttpparam type="header" name="anthropic-version" value="2023-06-01">
|
|
<cfhttpparam type="body" value="#serializeJSON(requestBody)#">
|
|
</cfhttp>
|
|
|
|
<cfset httpStatusCode = httpResult.statusCode>
|
|
<cfif isNumeric(httpStatusCode)>
|
|
<cfset httpStatusCode = int(httpStatusCode)>
|
|
<cfelseif findNoCase("200", httpStatusCode)>
|
|
<cfset httpStatusCode = 200>
|
|
<cfelse>
|
|
<cfset httpStatusCode = 0>
|
|
</cfif>
|
|
|
|
<cfif httpStatusCode NEQ 200>
|
|
<cfset errorDetail = "">
|
|
<cftry>
|
|
<cfset errorResponse = deserializeJSON(httpResult.fileContent)>
|
|
<cfif structKeyExists(errorResponse, "error") AND structKeyExists(errorResponse.error, "message")>
|
|
<cfset errorDetail = errorResponse.error.message>
|
|
<cfelse>
|
|
<cfset errorDetail = httpResult.fileContent>
|
|
</cfif>
|
|
<cfcatch>
|
|
<cfset errorDetail = httpResult.fileContent>
|
|
</cfcatch>
|
|
</cftry>
|
|
<cfthrow message="Claude API error: #httpResult.statusCode# - #errorDetail#">
|
|
</cfif>
|
|
|
|
<!--- Parse response --->
|
|
<cfset claudeResponse = deserializeJSON(httpResult.fileContent)>
|
|
<cfif NOT structKeyExists(claudeResponse, "content") OR NOT arrayLen(claudeResponse.content)>
|
|
<cfthrow message="Empty response from Claude">
|
|
</cfif>
|
|
|
|
<cfset responseText = "">
|
|
<cfloop array="#claudeResponse.content#" index="block">
|
|
<cfif structKeyExists(block, "type") AND block.type EQ "text">
|
|
<cfset responseText = block.text>
|
|
<cfbreak>
|
|
</cfif>
|
|
</cfloop>
|
|
|
|
<!--- Clean up JSON response --->
|
|
<cfset responseText = trim(responseText)>
|
|
<!--- Strip markdown code fences --->
|
|
<cfif left(responseText, 7) EQ "```json">
|
|
<cfset responseText = mid(responseText, 8, len(responseText) - 7)>
|
|
</cfif>
|
|
<cfif left(responseText, 3) EQ "```">
|
|
<cfset responseText = mid(responseText, 4, len(responseText) - 3)>
|
|
</cfif>
|
|
<cfif right(responseText, 3) EQ "```">
|
|
<cfset responseText = left(responseText, len(responseText) - 3)>
|
|
</cfif>
|
|
<cfset responseText = trim(responseText)>
|
|
<!--- If response doesn't start with {, extract JSON object from text --->
|
|
<cfif left(responseText, 1) NEQ "{">
|
|
<cfset jsonStart = find("{", responseText)>
|
|
<cfif jsonStart GT 0>
|
|
<cfset responseText = mid(responseText, jsonStart, len(responseText) - jsonStart + 1)>
|
|
<!--- Strip any trailing text/fences after the JSON --->
|
|
<cfif right(trim(responseText), 3) EQ "```">
|
|
<cfset responseText = left(trim(responseText), len(trim(responseText)) - 3)>
|
|
</cfif>
|
|
<cfset responseText = trim(responseText)>
|
|
</cfif>
|
|
</cfif>
|
|
<!--- Remove trailing commas before ] or } --->
|
|
<cfset responseText = reReplace(responseText, ",(\s*[\]\}])", "\1", "all")>
|
|
<!--- Remove control characters that break JSON --->
|
|
<cfset responseText = reReplace(responseText, "[\x00-\x1F]", " ", "all")>
|
|
|
|
<!--- Try to parse JSON with error handling --->
|
|
<cftry>
|
|
<cfset menuData = deserializeJSON(responseText)>
|
|
<cfcatch type="any">
|
|
<!--- JSON parsing failed - try to extract what we can --->
|
|
<!--- Return the raw response for debugging --->
|
|
<cfset response["success"] = false>
|
|
<cfset response["error"] = "JSON parse error: #cfcatch.message#">
|
|
<cfset response["DEBUG_RAW_RESPONSE"] = left(responseText, 2000)>
|
|
<cfset response["DEBUG_RESPONSE_LENGTH"] = len(responseText)>
|
|
<cfcontent type="application/json" reset="true">
|
|
<cfoutput>#serializeJSON(response)#</cfoutput>
|
|
<cfabort>
|
|
</cfcatch>
|
|
</cftry>
|
|
|
|
<!--- Debug: save raw Claude response before processing --->
|
|
<cfset response["DEBUG_RAW_CLAUDE"] = responseText>
|
|
|
|
<!--- Build image URL list for the wizard to use --->
|
|
<cfset imageUrlList = arrayNew(1)>
|
|
<cfloop array="#imageDataArray#" index="imgData">
|
|
<cfif structKeyExists(imgData, "url")>
|
|
<cfset arrayAppend(imageUrlList, imgData.url)>
|
|
</cfif>
|
|
</cfloop>
|
|
|
|
<!--- Ensure expected structure --->
|
|
<cfif NOT structKeyExists(menuData, "business")>
|
|
<cfset menuData["business"] = structNew()>
|
|
</cfif>
|
|
<cfif NOT structKeyExists(menuData, "categories")>
|
|
<cfset menuData["categories"] = arrayNew(1)>
|
|
</cfif>
|
|
<cfif NOT structKeyExists(menuData, "modifiers")>
|
|
<cfset menuData["modifiers"] = arrayNew(1)>
|
|
</cfif>
|
|
<cfif NOT structKeyExists(menuData, "items")>
|
|
<cfset menuData["items"] = arrayNew(1)>
|
|
</cfif>
|
|
|
|
<!--- Convert categories to expected format - preserve subcategory hierarchy --->
|
|
<cfset formattedCategories = arrayNew(1)>
|
|
<cfloop array="#menuData.categories#" index="cat">
|
|
<cfif isSimpleValue(cat)>
|
|
<cfset catObj = structNew()>
|
|
<cfset catObj["name"] = cat>
|
|
<cfset catObj["itemCount"] = 0>
|
|
<cfset arrayAppend(formattedCategories, catObj)>
|
|
<cfelseif isStruct(cat)>
|
|
<cfset parentName = structKeyExists(cat, "name") ? cat.name : "">
|
|
<cfif len(parentName)>
|
|
<cfset catObj = structNew()>
|
|
<cfset catObj["name"] = parentName>
|
|
<cfset catObj["itemCount"] = 0>
|
|
<cfset arrayAppend(formattedCategories, catObj)>
|
|
<!--- Add subcategories with parentCategoryName --->
|
|
<cfif structKeyExists(cat, "subcategories") AND isArray(cat.subcategories)>
|
|
<cfloop array="#cat.subcategories#" index="subcat">
|
|
<cfset subcatName = "">
|
|
<cfif isSimpleValue(subcat)>
|
|
<cfset subcatName = subcat>
|
|
<cfelseif isStruct(subcat) AND structKeyExists(subcat, "name")>
|
|
<cfset subcatName = subcat.name>
|
|
</cfif>
|
|
<cfif len(subcatName)>
|
|
<cfset subcatObj = structNew()>
|
|
<cfset subcatObj["name"] = subcatName>
|
|
<cfset subcatObj["parentCategoryName"] = parentName>
|
|
<cfset subcatObj["itemCount"] = 0>
|
|
<cfset arrayAppend(formattedCategories, subcatObj)>
|
|
</cfif>
|
|
</cfloop>
|
|
</cfif>
|
|
</cfif>
|
|
</cfif>
|
|
</cfloop>
|
|
<cfset menuData["categories"] = formattedCategories>
|
|
|
|
<!--- Fix "every item is a category" pattern --->
|
|
<!--- Detect: if most categories have 0-1 items and category count > item count * 0.5, collapse --->
|
|
<cfset totalItems = arrayLen(menuData.items)>
|
|
<cfset totalCats = arrayLen(formattedCategories)>
|
|
<cfif totalCats GT 10 AND totalItems GT 0 AND totalCats GT totalItems * 0.5>
|
|
<!--- Count categories with 0 items (real categories) vs 1 item (pseudo-categories) --->
|
|
<cfset zeroCats = []>
|
|
<cfset singleCats = []>
|
|
<cfloop array="#formattedCategories#" index="fc">
|
|
<cfset fcCount = 0>
|
|
<cfloop array="#menuData.items#" index="fi">
|
|
<cfif fi.category EQ fc.name><cfset fcCount++></cfif>
|
|
</cfloop>
|
|
<cfif fcCount EQ 0>
|
|
<cfset arrayAppend(zeroCats, fc.name)>
|
|
<cfelseif fcCount EQ 1>
|
|
<cfset arrayAppend(singleCats, fc.name)>
|
|
</cfif>
|
|
</cfloop>
|
|
|
|
<!--- If >60% of categories have exactly 1 item, this is the broken pattern --->
|
|
<cfif arrayLen(singleCats) GT totalCats * 0.6 AND arrayLen(zeroCats) GT 0>
|
|
<cfset arrayAppend(response.steps, "Detected 'every item is a category' pattern (" & arrayLen(singleCats) & " single-item cats, " & arrayLen(zeroCats) & " empty cats) - collapsing")>
|
|
|
|
<!--- Reassign items: each item's category should be the most recent zero-item category before it in the list --->
|
|
<cfset currentParent = zeroCats[1]>
|
|
<cfloop array="#formattedCategories#" index="fc">
|
|
<!--- Is this a real category (0 items)? --->
|
|
<cfif arrayFind(zeroCats, fc.name) GT 0>
|
|
<cfset currentParent = fc.name>
|
|
<cfelse>
|
|
<!--- This is a pseudo-category (1 item) - reassign its item to currentParent --->
|
|
<cfloop from="1" to="#arrayLen(menuData.items)#" index="ii">
|
|
<cfif menuData.items[ii].category EQ fc.name>
|
|
<cfset menuData.items[ii]["category"] = currentParent>
|
|
</cfif>
|
|
</cfloop>
|
|
</cfif>
|
|
</cfloop>
|
|
|
|
<!--- Rebuild categories: only keep the real ones (zeroCats) --->
|
|
<cfset fixedCategories = []>
|
|
<cfloop array="#zeroCats#" index="zc">
|
|
<cfset zcCount = 0>
|
|
<cfloop array="#menuData.items#" index="fi">
|
|
<cfif fi.category EQ zc><cfset zcCount++></cfif>
|
|
</cfloop>
|
|
<cfset arrayAppend(fixedCategories, { "name": zc, "itemCount": zcCount })>
|
|
</cfloop>
|
|
<cfset menuData["categories"] = fixedCategories>
|
|
<cfset formattedCategories = fixedCategories>
|
|
<cfset arrayAppend(response.steps, "Collapsed to " & arrayLen(fixedCategories) & " categories")>
|
|
</cfif>
|
|
</cfif>
|
|
|
|
<!--- Server-side hierarchy enforcement from HTML heading structure (backup if Claude returns flat) --->
|
|
<cfif structCount(headingHierarchy) GT 0>
|
|
<!--- Build reverse map: lowercase h3 name → raw h2 parent name --->
|
|
<cfset h3ToParent = structNew()>
|
|
<cfloop collection="#headingHierarchy#" item="hParentName">
|
|
<cfloop array="#headingHierarchy[hParentName]#" index="hChildName">
|
|
<cfset h3ToParent[lCase(trim(hChildName))] = hParentName>
|
|
</cfloop>
|
|
</cfloop>
|
|
|
|
<!--- Check if any categories match h3 names but lack parentCategoryName --->
|
|
<cfset hierarchyApplied = 0>
|
|
<cfloop from="1" to="#arrayLen(formattedCategories)#" index="i">
|
|
<cfset cat = formattedCategories[i]>
|
|
<cfif NOT structKeyExists(cat, "parentCategoryName") OR NOT len(cat.parentCategoryName)>
|
|
<cfset catLower = lCase(trim(cat.name))>
|
|
<cfif structKeyExists(h3ToParent, catLower)>
|
|
<cfset rawParent = h3ToParent[catLower]>
|
|
<!--- Find matching parent category in the list --->
|
|
<cfset matchedParent = "">
|
|
<cfloop array="#formattedCategories#" index="pcat">
|
|
<cfset pcatLower = lCase(trim(pcat.name))>
|
|
<!--- Normalize: strip dashes and "menu" suffix for comparison --->
|
|
<cfset parentNorm = lCase(reReplace(rawParent, "[^a-zA-Z0-9 ]", "", "all"))>
|
|
<cfset parentNorm = trim(reReplaceNoCase(parentNorm, "\s*menu\s*$", ""))>
|
|
<cfset pcatNorm = trim(reReplaceNoCase(pcatLower, "\s*menu\s*$", ""))>
|
|
<cfif pcatNorm EQ parentNorm OR pcatLower EQ lCase(rawParent)>
|
|
<cfset matchedParent = pcat.name>
|
|
<cfbreak>
|
|
</cfif>
|
|
</cfloop>
|
|
<cfif len(matchedParent)>
|
|
<cfset formattedCategories[i]["parentCategoryName"] = matchedParent>
|
|
<cfset hierarchyApplied = hierarchyApplied + 1>
|
|
</cfif>
|
|
</cfif>
|
|
</cfif>
|
|
</cfloop>
|
|
<cfif hierarchyApplied GT 0>
|
|
<cfset menuData["categories"] = formattedCategories>
|
|
<cfset arrayAppend(response.steps, "Server-side hierarchy: applied " & hierarchyApplied & " parent-child relationships")>
|
|
</cfif>
|
|
</cfif>
|
|
|
|
<!--- For items with subcategory field from Claude, set their category to the subcategory name --->
|
|
<cfloop from="1" to="#arrayLen(menuData.items)#" index="i">
|
|
<cfset item = menuData.items[i]>
|
|
<!--- If Claude set a subcategory field, use that as the item's category --->
|
|
<cfif structKeyExists(item, "subcategory") AND len(item.subcategory)>
|
|
<cfset menuData.items[i]["category"] = item.subcategory>
|
|
</cfif>
|
|
</cfloop>
|
|
|
|
<!--- Add item IDs --->
|
|
<cfloop from="1" to="#arrayLen(menuData.items)#" index="i">
|
|
<cfset menuData.items[i]["id"] = "item_" & i>
|
|
</cfloop>
|
|
|
|
<!--- Process item images - extract filenames from images object that Claude identified from HTML --->
|
|
<cfset itemsWithImages = 0>
|
|
<cfloop from="1" to="#arrayLen(menuData.items)#" index="i">
|
|
<cfset item = menuData.items[i]>
|
|
|
|
<!--- Check if Claude found images object with URLs from HTML --->
|
|
<cfif structKeyExists(item, "images") AND isStruct(item.images)>
|
|
<cfset imgObj = item.images>
|
|
<cfset itemsWithImages = itemsWithImages + 1>
|
|
|
|
<!--- Extract filenames for each image size --->
|
|
<cfset filenames = structNew()>
|
|
<cfloop collection="#imgObj#" item="sizeKey">
|
|
<cfset imgUrl = imgObj[sizeKey]>
|
|
<cfif isSimpleValue(imgUrl) AND len(trim(imgUrl))>
|
|
<cfset filenames[sizeKey] = listLast(imgUrl, "/\")>
|
|
</cfif>
|
|
</cfloop>
|
|
<cfset menuData.items[i]["imageFilenames"] = filenames>
|
|
|
|
<!--- Also set primary imageSrc for backwards compatibility --->
|
|
<cfif structKeyExists(imgObj, "src")>
|
|
<cfset menuData.items[i]["imageSrc"] = imgObj.src>
|
|
<cfset menuData.items[i]["imageFilename"] = listLast(imgObj.src, "/\")>
|
|
<cfelseif structKeyExists(imgObj, "large")>
|
|
<cfset menuData.items[i]["imageSrc"] = imgObj.large>
|
|
<cfset menuData.items[i]["imageFilename"] = listLast(imgObj.large, "/\")>
|
|
<cfelseif structKeyExists(imgObj, "medium")>
|
|
<cfset menuData.items[i]["imageSrc"] = imgObj.medium>
|
|
<cfset menuData.items[i]["imageFilename"] = listLast(imgObj.medium, "/\")>
|
|
<cfelseif structKeyExists(imgObj, "small")>
|
|
<cfset menuData.items[i]["imageSrc"] = imgObj.small>
|
|
<cfset menuData.items[i]["imageFilename"] = listLast(imgObj.small, "/\")>
|
|
</cfif>
|
|
<!--- Handle imageUrl from Claude (most common) --->
|
|
<cfelseif structKeyExists(item, "imageUrl") AND len(trim(item.imageUrl))>
|
|
<cfset menuData.items[i]["imageSrc"] = item.imageUrl>
|
|
<cfset menuData.items[i]["imageFilename"] = listLast(item.imageUrl, "/\")>
|
|
<cfset itemsWithImages = itemsWithImages + 1>
|
|
<!--- Legacy: handle if Claude returned imageSrc directly --->
|
|
<cfelseif structKeyExists(item, "imageSrc") AND len(trim(item.imageSrc))>
|
|
<cfset menuData.items[i]["imageFilename"] = listLast(item.imageSrc, "/\")>
|
|
<cfset itemsWithImages = itemsWithImages + 1>
|
|
</cfif>
|
|
</cfloop>
|
|
<cfset arrayAppend(response.steps, "Found images for " & itemsWithImages & " of " & arrayLen(menuData.items) & " items")>
|
|
|
|
<!--- Add image URLs to response --->
|
|
<cfset menuData["imageUrls"] = imageUrlList>
|
|
<cfset menuData["headerCandidateIndices"] = arrayNew(1)>
|
|
<!--- Add image mappings for local HTML uploads (filename -> alt text) --->
|
|
<cfset menuData["imageMappings"] = imageMappings>
|
|
|
|
<cfset response["OK"] = true>
|
|
<cfset response["DATA"] = menuData>
|
|
<cfset response["sourceUrl"] = isDefined("targetUrl") ? targetUrl : "uploaded">
|
|
<cfset response["pagesProcessed"] = arrayLen(menuPages)>
|
|
<cfset response["imagesFound"] = arrayLen(imageDataArray)>
|
|
<cfset response["playwrightImagesCount"] = arrayLen(playwrightImages)>
|
|
<cfset response["DEBUG_PLAYWRIGHT_IMAGES"] = playwrightImages>
|
|
<cfset response["DEBUG_RAW_CATEGORIES"] = menuData.categories>
|
|
|
|
<cfcatch type="any">
|
|
<cfset response["MESSAGE"] = cfcatch.message>
|
|
<cfif len(cfcatch.detail)>
|
|
<cfset response["DETAIL"] = cfcatch.detail>
|
|
</cfif>
|
|
<cfif structKeyExists(cfcatch, "tagContext") AND arrayLen(cfcatch.tagContext) GT 0>
|
|
<cfset response["DEBUG_LINE"] = cfcatch.tagContext[1].line>
|
|
<cfset response["DEBUG_TEMPLATE"] = cfcatch.tagContext[1].template>
|
|
</cfif>
|
|
</cfcatch>
|
|
</cftry>
|
|
|
|
<cfoutput>#serializeJSON(response)#</cfoutput>
|