Improve Toast POS subcategory detection with explicit h3 search and debug output

This commit is contained in:
John Mizerek 2026-02-12 19:14:15 -08:00
parent 813628cecb
commit e372f67901

View file

@ -30,6 +30,11 @@
<cfset requestData = deserializeJSON(requestBody)> <cfset requestData = deserializeJSON(requestBody)>
<cfset response["steps"] = arrayNew(1)> <cfset response["steps"] = arrayNew(1)>
<cfset response["debug"] = structNew()>
<cfset response["debug"]["hasHtmlKey"] = structKeyExists(requestData, "html")>
<cfset response["debug"]["hasUrlKey"] = structKeyExists(requestData, "url")>
<cfset response["debug"]["htmlLength"] = structKeyExists(requestData, "html") ? len(requestData.html) : 0>
<cfset response["debug"]["urlValue"] = structKeyExists(requestData, "url") ? requestData.url : "">
<cfset pageHtml = ""> <cfset pageHtml = "">
<cfset baseUrl = ""> <cfset baseUrl = "">
<cfset basePath = ""> <cfset basePath = "">
@ -125,6 +130,7 @@
<!--- Extract images from all pages ---> <!--- Extract images from all pages --->
<cfset allImages = arrayNew(1)> <cfset allImages = arrayNew(1)>
<cfset imageUrls = structNew()> <cfset imageUrls = structNew()>
<cfset imageMappings = arrayNew(1)><!--- For local HTML: filename -> alt text mappings --->
<cfloop array="#menuPages#" index="menuPage"> <cfloop array="#menuPages#" index="menuPage">
<!--- Find all img tags ---> <!--- Find all img tags --->
@ -133,6 +139,22 @@
<cfloop array="#imgMatches#" index="imgTag"> <cfloop array="#imgMatches#" index="imgTag">
<cfset imgSrc = reReplaceNoCase(imgTag, '.*src=["'']([^"'']+)["''].*', "\1")> <cfset imgSrc = reReplaceNoCase(imgTag, '.*src=["'']([^"'']+)["''].*', "\1")>
<!--- Extract alt text for image mapping --->
<cfset imgAlt = "">
<cfif reFindNoCase('alt=["'']([^"'']+)["'']', imgTag)>
<cfset imgAlt = reReplaceNoCase(imgTag, '.*alt=["'']([^"'']+)["''].*', "\1")>
</cfif>
<!--- Extract just the filename for matching local uploads --->
<cfset imgFilename = listLast(imgSrc, "/\")>
<cfif len(imgFilename) AND len(imgAlt) AND NOT reFindNoCase("(icon|favicon|logo|sprite|pixel|tracking|badge|button)", imgSrc)>
<cfset mapping = structNew()>
<cfset mapping["filename"] = imgFilename>
<cfset mapping["alt"] = imgAlt>
<cfset mapping["src"] = imgSrc>
<cfset arrayAppend(imageMappings, mapping)>
</cfif>
<!--- Resolve relative URLs ---> <!--- Resolve relative URLs --->
<cfif left(imgSrc, 1) EQ "/"> <cfif left(imgSrc, 1) EQ "/">
<cfset imgSrc = baseUrl & imgSrc> <cfset imgSrc = baseUrl & imgSrc>
@ -220,8 +242,43 @@
<cfset combinedHtml = left(combinedHtml, 100000)> <cfset combinedHtml = left(combinedHtml, 100000)>
</cfif> </cfif>
<!--- Debug: extract all h3 tags from HTML on server side --->
<cfset h3Tags = reMatchNoCase("<h3[^>]*>([^<]*)</h3>", combinedHtml)>
<cfset h3Texts = arrayNew(1)>
<cfloop array="#h3Tags#" index="h3Tag">
<cfset h3Text = reReplaceNoCase(h3Tag, "<h3[^>]*>([^<]*)</h3>", "\1")>
<cfset h3Text = trim(h3Text)>
<cfif len(h3Text)>
<cfset arrayAppend(h3Texts, h3Text)>
</cfif>
</cfloop>
<cfset response["DEBUG_H3_TAGS_FOUND"] = h3Texts>
<cfset arrayAppend(response.steps, "Found " & arrayLen(h3Texts) & " h3 tags in HTML")>
<!--- System prompt for URL analysis ---> <!--- System prompt for URL analysis --->
<cfset systemPrompt = "You are an expert at extracting structured menu data from restaurant website HTML. Extract ALL menu data visible in the HTML. Return valid JSON with these keys: business (object with name, address, phone, hours, brandColor), categories (array of category names), modifiers (array of modifier templates with name, required boolean, appliesTo, categoryName if applicable, and options array), items (array with name, description, price, category, modifiers array, and imageUrl if found). For brandColor: suggest a vibrant hex color (6 digits, no ##) based on the restaurant style. For hours: format as ""Mon-Fri 10:30am-10pm, Sat 11am-10pm, Sun 11am-9pm"". Include ALL days visible. For prices: extract as numbers (e.g., 12.99). For modifier options: use format {""name"": ""option"", ""price"": 0}. Return ONLY valid JSON, no markdown, no explanation."> <cfset systemPrompt = "Extract menu data from restaurant website HTML. Return JSON with: business, categories, modifiers, items.
##1 CATEGORIES - MOST IMPORTANT##
Search the HTML for EVERY <h3> tag. Each h3 text is a SUBCATEGORY name. Group them under parent categories.
Toast POS structure:
- Parent categories: top nav links or dropdown items (Beverages, Food, Merchandise)
- Subcategories: ALL h3 tags inside div.menuGroup or section.menuSection (e.g., 'Beer, Cider & Kombucha', 'White Wine', 'Red Wine', 'Cocktails', 'Sandwiches', 'Salads')
REQUIRED: Find and list EVERY h3 in the HTML. Output format:
{""categories"":[{""name"":""Beverages"",""subcategories"":[{""name"":""Beer, Cider & Kombucha""},{""name"":""White Wine""},{""name"":""Red Wine""}]},{""name"":""Food"",""subcategories"":[{""name"":""Sandwiches""},{""name"":""Salads""}]}]}
##2 ITEMS##
Extract from li.item elements. Include: name, description, price (number), category (parent), subcategory (from nearest h3 above).
##3 IMAGES##
For each item, extract ALL image URLs as object: {""src"":""url"",""srcset"":""urls"",""small"":""url"",""medium"":""url"",""large"":""url""}. Parse srcset attribute for multiple sizes.
##4 BUSINESS##
name, address, phone, hours (format: ""Mon-Fri 10am-9pm, Sat-Sun 11am-8pm""), brandColor (6-digit hex, no #).
##5 MODIFIERS##
name, required (boolean), appliesTo, options array [{""name"":""opt"",""price"":0}].
Return ONLY valid JSON.">
<!--- Build message content ---> <!--- Build message content --->
<cfset messagesContent = arrayNew(1)> <cfset messagesContent = arrayNew(1)>
@ -319,6 +376,9 @@
<cfset menuData = deserializeJSON(responseText)> <cfset menuData = deserializeJSON(responseText)>
<!--- Debug: save raw Claude response before processing --->
<cfset response["DEBUG_RAW_CLAUDE"] = responseText>
<!--- Build image URL list for the wizard to use ---> <!--- Build image URL list for the wizard to use --->
<cfset imageUrlList = arrayNew(1)> <cfset imageUrlList = arrayNew(1)>
<cfloop array="#imageDataArray#" index="imgData"> <cfloop array="#imageDataArray#" index="imgData">
@ -341,8 +401,9 @@
<cfset menuData["items"] = arrayNew(1)> <cfset menuData["items"] = arrayNew(1)>
</cfif> </cfif>
<!--- Convert categories to expected format if needed ---> <!--- Convert categories to expected format - flatten subcategories into parent --->
<cfset formattedCategories = arrayNew(1)> <cfset formattedCategories = arrayNew(1)>
<cfset subcatToParentMap = structNew()><!--- Map subcategory names to parent category names --->
<cfloop array="#menuData.categories#" index="cat"> <cfloop array="#menuData.categories#" index="cat">
<cfif isSimpleValue(cat)> <cfif isSimpleValue(cat)>
<cfset catObj = structNew()> <cfset catObj = structNew()>
@ -350,28 +411,112 @@
<cfset catObj["itemCount"] = 0> <cfset catObj["itemCount"] = 0>
<cfset arrayAppend(formattedCategories, catObj)> <cfset arrayAppend(formattedCategories, catObj)>
<cfelseif isStruct(cat)> <cfelseif isStruct(cat)>
<cfif NOT structKeyExists(cat, "itemCount")> <!--- Add only the parent category --->
<cfset cat["itemCount"] = 0> <cfset parentName = structKeyExists(cat, "name") ? cat.name : "">
<cfif len(parentName)>
<cfset catObj = structNew()>
<cfset catObj["name"] = parentName>
<cfset catObj["itemCount"] = 0>
<cfset arrayAppend(formattedCategories, catObj)>
<!--- Build map of subcategory names -> parent name for item reassignment --->
<cfif structKeyExists(cat, "subcategories") AND isArray(cat.subcategories)>
<cfloop array="#cat.subcategories#" index="subcat">
<cfset subcatName = "">
<cfif isSimpleValue(subcat)>
<cfset subcatName = subcat>
<cfelseif isStruct(subcat) AND structKeyExists(subcat, "name")>
<cfset subcatName = subcat.name>
</cfif>
<cfif len(subcatName)>
<cfset subcatToParentMap[lcase(subcatName)] = parentName>
</cfif>
</cfloop>
</cfif>
</cfif> </cfif>
<cfset arrayAppend(formattedCategories, cat)>
</cfif> </cfif>
</cfloop> </cfloop>
<cfset menuData["categories"] = formattedCategories> <cfset menuData["categories"] = formattedCategories>
<!--- Reassign items in subcategories to their parent category --->
<cfloop from="1" to="#arrayLen(menuData.items)#" index="i">
<cfset item = menuData.items[i]>
<!--- Check if item's category is actually a subcategory --->
<cfif structKeyExists(item, "category") AND len(item.category)>
<cfset catKey = lcase(item.category)>
<cfif structKeyExists(subcatToParentMap, catKey)>
<cfset menuData.items[i]["category"] = subcatToParentMap[catKey]>
</cfif>
</cfif>
<!--- Also check subcategory field if present --->
<cfif structKeyExists(item, "subcategory") AND len(item.subcategory)>
<cfset subcatKey = lcase(item.subcategory)>
<cfif structKeyExists(subcatToParentMap, subcatKey)>
<cfset menuData.items[i]["category"] = subcatToParentMap[subcatKey]>
</cfif>
</cfif>
</cfloop>
<!--- Add item IDs ---> <!--- Add item IDs --->
<cfloop from="1" to="#arrayLen(menuData.items)#" index="i"> <cfloop from="1" to="#arrayLen(menuData.items)#" index="i">
<cfset menuData.items[i]["id"] = "item_" & i> <cfset menuData.items[i]["id"] = "item_" & i>
</cfloop> </cfloop>
<!--- Process item images - extract filenames from images object that Claude identified from HTML --->
<cfset itemsWithImages = 0>
<cfloop from="1" to="#arrayLen(menuData.items)#" index="i">
<cfset item = menuData.items[i]>
<!--- Check if Claude found images object with URLs from HTML --->
<cfif structKeyExists(item, "images") AND isStruct(item.images)>
<cfset imgObj = item.images>
<cfset itemsWithImages = itemsWithImages + 1>
<!--- Extract filenames for each image size --->
<cfset filenames = structNew()>
<cfloop collection="#imgObj#" item="sizeKey">
<cfset imgUrl = imgObj[sizeKey]>
<cfif isSimpleValue(imgUrl) AND len(trim(imgUrl))>
<cfset filenames[sizeKey] = listLast(imgUrl, "/\")>
</cfif>
</cfloop>
<cfset menuData.items[i]["imageFilenames"] = filenames>
<!--- Also set primary imageSrc for backwards compatibility --->
<cfif structKeyExists(imgObj, "src")>
<cfset menuData.items[i]["imageSrc"] = imgObj.src>
<cfset menuData.items[i]["imageFilename"] = listLast(imgObj.src, "/\")>
<cfelseif structKeyExists(imgObj, "large")>
<cfset menuData.items[i]["imageSrc"] = imgObj.large>
<cfset menuData.items[i]["imageFilename"] = listLast(imgObj.large, "/\")>
<cfelseif structKeyExists(imgObj, "medium")>
<cfset menuData.items[i]["imageSrc"] = imgObj.medium>
<cfset menuData.items[i]["imageFilename"] = listLast(imgObj.medium, "/\")>
<cfelseif structKeyExists(imgObj, "small")>
<cfset menuData.items[i]["imageSrc"] = imgObj.small>
<cfset menuData.items[i]["imageFilename"] = listLast(imgObj.small, "/\")>
</cfif>
<!--- Legacy: handle if Claude returned imageSrc directly --->
<cfelseif structKeyExists(item, "imageSrc") AND len(trim(item.imageSrc))>
<cfset menuData.items[i]["imageFilename"] = listLast(item.imageSrc, "/\")>
<cfset itemsWithImages = itemsWithImages + 1>
</cfif>
</cfloop>
<cfset arrayAppend(response.steps, "Found images for " & itemsWithImages & " of " & arrayLen(menuData.items) & " items")>
<!--- Add image URLs to response ---> <!--- Add image URLs to response --->
<cfset menuData["imageUrls"] = imageUrlList> <cfset menuData["imageUrls"] = imageUrlList>
<cfset menuData["headerCandidateIndices"] = arrayNew(1)> <cfset menuData["headerCandidateIndices"] = arrayNew(1)>
<!--- Add image mappings for local HTML uploads (filename -> alt text) --->
<cfset menuData["imageMappings"] = imageMappings>
<cfset response["OK"] = true> <cfset response["OK"] = true>
<cfset response["DATA"] = menuData> <cfset response["DATA"] = menuData>
<cfset response["sourceUrl"] = targetUrl> <cfset response["sourceUrl"] = targetUrl>
<cfset response["pagesProcessed"] = arrayLen(menuPages)> <cfset response["pagesProcessed"] = arrayLen(menuPages)>
<cfset response["imagesFound"] = arrayLen(imageDataArray)> <cfset response["imagesFound"] = arrayLen(imageDataArray)>
<!--- Debug: show subcategory mapping --->
<cfset response["DEBUG_SUBCAT_MAP"] = subcatToParentMap>
<cfset response["DEBUG_RAW_CATEGORIES"] = menuData.categories>
<cfcatch type="any"> <cfcatch type="any">
<cfset response["MESSAGE"] = cfcatch.message> <cfset response["MESSAGE"] = cfcatch.message>