Improve Toast POS subcategory detection with explicit h3 search and debug output
This commit is contained in:
parent
813628cecb
commit
e372f67901
1 changed files with 150 additions and 5 deletions
|
|
@ -30,6 +30,11 @@
|
|||
<cfset requestData = deserializeJSON(requestBody)>
|
||||
|
||||
<cfset response["steps"] = arrayNew(1)>
|
||||
<cfset response["debug"] = structNew()>
|
||||
<cfset response["debug"]["hasHtmlKey"] = structKeyExists(requestData, "html")>
|
||||
<cfset response["debug"]["hasUrlKey"] = structKeyExists(requestData, "url")>
|
||||
<cfset response["debug"]["htmlLength"] = structKeyExists(requestData, "html") ? len(requestData.html) : 0>
|
||||
<cfset response["debug"]["urlValue"] = structKeyExists(requestData, "url") ? requestData.url : "">
|
||||
<cfset pageHtml = "">
|
||||
<cfset baseUrl = "">
|
||||
<cfset basePath = "">
|
||||
|
|
@ -125,6 +130,7 @@
|
|||
<!--- Extract images from all pages --->
|
||||
<cfset allImages = arrayNew(1)>
|
||||
<cfset imageUrls = structNew()>
|
||||
<cfset imageMappings = arrayNew(1)><!--- For local HTML: filename -> alt text mappings --->
|
||||
|
||||
<cfloop array="#menuPages#" index="menuPage">
|
||||
<!--- Find all img tags --->
|
||||
|
|
@ -133,6 +139,22 @@
|
|||
<cfloop array="#imgMatches#" index="imgTag">
|
||||
<cfset imgSrc = reReplaceNoCase(imgTag, '.*src=["'']([^"'']+)["''].*', "\1")>
|
||||
|
||||
<!--- Extract alt text for image mapping --->
|
||||
<cfset imgAlt = "">
|
||||
<cfif reFindNoCase('alt=["'']([^"'']+)["'']', imgTag)>
|
||||
<cfset imgAlt = reReplaceNoCase(imgTag, '.*alt=["'']([^"'']+)["''].*', "\1")>
|
||||
</cfif>
|
||||
|
||||
<!--- Extract just the filename for matching local uploads --->
|
||||
<cfset imgFilename = listLast(imgSrc, "/\")>
|
||||
<cfif len(imgFilename) AND len(imgAlt) AND NOT reFindNoCase("(icon|favicon|logo|sprite|pixel|tracking|badge|button)", imgSrc)>
|
||||
<cfset mapping = structNew()>
|
||||
<cfset mapping["filename"] = imgFilename>
|
||||
<cfset mapping["alt"] = imgAlt>
|
||||
<cfset mapping["src"] = imgSrc>
|
||||
<cfset arrayAppend(imageMappings, mapping)>
|
||||
</cfif>
|
||||
|
||||
<!--- Resolve relative URLs --->
|
||||
<cfif left(imgSrc, 1) EQ "/">
|
||||
<cfset imgSrc = baseUrl & imgSrc>
|
||||
|
|
@ -220,8 +242,43 @@
|
|||
<cfset combinedHtml = left(combinedHtml, 100000)>
|
||||
</cfif>
|
||||
|
||||
<!--- Debug: extract all h3 tags from HTML on server side --->
|
||||
<cfset h3Tags = reMatchNoCase("<h3[^>]*>([^<]*)</h3>", combinedHtml)>
|
||||
<cfset h3Texts = arrayNew(1)>
|
||||
<cfloop array="#h3Tags#" index="h3Tag">
|
||||
<cfset h3Text = reReplaceNoCase(h3Tag, "<h3[^>]*>([^<]*)</h3>", "\1")>
|
||||
<cfset h3Text = trim(h3Text)>
|
||||
<cfif len(h3Text)>
|
||||
<cfset arrayAppend(h3Texts, h3Text)>
|
||||
</cfif>
|
||||
</cfloop>
|
||||
<cfset response["DEBUG_H3_TAGS_FOUND"] = h3Texts>
|
||||
<cfset arrayAppend(response.steps, "Found " & arrayLen(h3Texts) & " h3 tags in HTML")>
|
||||
|
||||
<!--- System prompt for URL analysis --->
|
||||
<cfset systemPrompt = "You are an expert at extracting structured menu data from restaurant website HTML. Extract ALL menu data visible in the HTML. Return valid JSON with these keys: business (object with name, address, phone, hours, brandColor), categories (array of category names), modifiers (array of modifier templates with name, required boolean, appliesTo, categoryName if applicable, and options array), items (array with name, description, price, category, modifiers array, and imageUrl if found). For brandColor: suggest a vibrant hex color (6 digits, no ##) based on the restaurant style. For hours: format as ""Mon-Fri 10:30am-10pm, Sat 11am-10pm, Sun 11am-9pm"". Include ALL days visible. For prices: extract as numbers (e.g., 12.99). For modifier options: use format {""name"": ""option"", ""price"": 0}. Return ONLY valid JSON, no markdown, no explanation.">
|
||||
<cfset systemPrompt = "Extract menu data from restaurant website HTML. Return JSON with: business, categories, modifiers, items.
|
||||
|
||||
##1 CATEGORIES - MOST IMPORTANT##
|
||||
Search the HTML for EVERY <h3> tag. Each h3 text is a SUBCATEGORY name. Group them under parent categories.
|
||||
Toast POS structure:
|
||||
- Parent categories: top nav links or dropdown items (Beverages, Food, Merchandise)
|
||||
- Subcategories: ALL h3 tags inside div.menuGroup or section.menuSection (e.g., 'Beer, Cider & Kombucha', 'White Wine', 'Red Wine', 'Cocktails', 'Sandwiches', 'Salads')
|
||||
REQUIRED: Find and list EVERY h3 in the HTML. Output format:
|
||||
{""categories"":[{""name"":""Beverages"",""subcategories"":[{""name"":""Beer, Cider & Kombucha""},{""name"":""White Wine""},{""name"":""Red Wine""}]},{""name"":""Food"",""subcategories"":[{""name"":""Sandwiches""},{""name"":""Salads""}]}]}
|
||||
|
||||
##2 ITEMS##
|
||||
Extract from li.item elements. Include: name, description, price (number), category (parent), subcategory (from nearest h3 above).
|
||||
|
||||
##3 IMAGES##
|
||||
For each item, extract ALL image URLs as object: {""src"":""url"",""srcset"":""urls"",""small"":""url"",""medium"":""url"",""large"":""url""}. Parse srcset attribute for multiple sizes.
|
||||
|
||||
##4 BUSINESS##
|
||||
name, address, phone, hours (format: ""Mon-Fri 10am-9pm, Sat-Sun 11am-8pm""), brandColor (6-digit hex, no #).
|
||||
|
||||
##5 MODIFIERS##
|
||||
name, required (boolean), appliesTo, options array [{""name"":""opt"",""price"":0}].
|
||||
|
||||
Return ONLY valid JSON.">
|
||||
|
||||
<!--- Build message content --->
|
||||
<cfset messagesContent = arrayNew(1)>
|
||||
|
|
@ -319,6 +376,9 @@
|
|||
|
||||
<cfset menuData = deserializeJSON(responseText)>
|
||||
|
||||
<!--- Debug: save raw Claude response before processing --->
|
||||
<cfset response["DEBUG_RAW_CLAUDE"] = responseText>
|
||||
|
||||
<!--- Build image URL list for the wizard to use --->
|
||||
<cfset imageUrlList = arrayNew(1)>
|
||||
<cfloop array="#imageDataArray#" index="imgData">
|
||||
|
|
@ -341,8 +401,9 @@
|
|||
<cfset menuData["items"] = arrayNew(1)>
|
||||
</cfif>
|
||||
|
||||
<!--- Convert categories to expected format if needed --->
|
||||
<!--- Convert categories to expected format - flatten subcategories into parent --->
|
||||
<cfset formattedCategories = arrayNew(1)>
|
||||
<cfset subcatToParentMap = structNew()><!--- Map subcategory names to parent category names --->
|
||||
<cfloop array="#menuData.categories#" index="cat">
|
||||
<cfif isSimpleValue(cat)>
|
||||
<cfset catObj = structNew()>
|
||||
|
|
@ -350,28 +411,112 @@
|
|||
<cfset catObj["itemCount"] = 0>
|
||||
<cfset arrayAppend(formattedCategories, catObj)>
|
||||
<cfelseif isStruct(cat)>
|
||||
<cfif NOT structKeyExists(cat, "itemCount")>
|
||||
<cfset cat["itemCount"] = 0>
|
||||
<!--- Add only the parent category --->
|
||||
<cfset parentName = structKeyExists(cat, "name") ? cat.name : "">
|
||||
<cfif len(parentName)>
|
||||
<cfset catObj = structNew()>
|
||||
<cfset catObj["name"] = parentName>
|
||||
<cfset catObj["itemCount"] = 0>
|
||||
<cfset arrayAppend(formattedCategories, catObj)>
|
||||
<!--- Build map of subcategory names -> parent name for item reassignment --->
|
||||
<cfif structKeyExists(cat, "subcategories") AND isArray(cat.subcategories)>
|
||||
<cfloop array="#cat.subcategories#" index="subcat">
|
||||
<cfset subcatName = "">
|
||||
<cfif isSimpleValue(subcat)>
|
||||
<cfset subcatName = subcat>
|
||||
<cfelseif isStruct(subcat) AND structKeyExists(subcat, "name")>
|
||||
<cfset subcatName = subcat.name>
|
||||
</cfif>
|
||||
<cfif len(subcatName)>
|
||||
<cfset subcatToParentMap[lcase(subcatName)] = parentName>
|
||||
</cfif>
|
||||
</cfloop>
|
||||
</cfif>
|
||||
</cfif>
|
||||
<cfset arrayAppend(formattedCategories, cat)>
|
||||
</cfif>
|
||||
</cfloop>
|
||||
<cfset menuData["categories"] = formattedCategories>
|
||||
|
||||
<!--- Reassign items in subcategories to their parent category --->
|
||||
<cfloop from="1" to="#arrayLen(menuData.items)#" index="i">
|
||||
<cfset item = menuData.items[i]>
|
||||
<!--- Check if item's category is actually a subcategory --->
|
||||
<cfif structKeyExists(item, "category") AND len(item.category)>
|
||||
<cfset catKey = lcase(item.category)>
|
||||
<cfif structKeyExists(subcatToParentMap, catKey)>
|
||||
<cfset menuData.items[i]["category"] = subcatToParentMap[catKey]>
|
||||
</cfif>
|
||||
</cfif>
|
||||
<!--- Also check subcategory field if present --->
|
||||
<cfif structKeyExists(item, "subcategory") AND len(item.subcategory)>
|
||||
<cfset subcatKey = lcase(item.subcategory)>
|
||||
<cfif structKeyExists(subcatToParentMap, subcatKey)>
|
||||
<cfset menuData.items[i]["category"] = subcatToParentMap[subcatKey]>
|
||||
</cfif>
|
||||
</cfif>
|
||||
</cfloop>
|
||||
|
||||
<!--- Add item IDs --->
|
||||
<cfloop from="1" to="#arrayLen(menuData.items)#" index="i">
|
||||
<cfset menuData.items[i]["id"] = "item_" & i>
|
||||
</cfloop>
|
||||
|
||||
<!--- Process item images - extract filenames from images object that Claude identified from HTML --->
|
||||
<cfset itemsWithImages = 0>
|
||||
<cfloop from="1" to="#arrayLen(menuData.items)#" index="i">
|
||||
<cfset item = menuData.items[i]>
|
||||
|
||||
<!--- Check if Claude found images object with URLs from HTML --->
|
||||
<cfif structKeyExists(item, "images") AND isStruct(item.images)>
|
||||
<cfset imgObj = item.images>
|
||||
<cfset itemsWithImages = itemsWithImages + 1>
|
||||
|
||||
<!--- Extract filenames for each image size --->
|
||||
<cfset filenames = structNew()>
|
||||
<cfloop collection="#imgObj#" item="sizeKey">
|
||||
<cfset imgUrl = imgObj[sizeKey]>
|
||||
<cfif isSimpleValue(imgUrl) AND len(trim(imgUrl))>
|
||||
<cfset filenames[sizeKey] = listLast(imgUrl, "/\")>
|
||||
</cfif>
|
||||
</cfloop>
|
||||
<cfset menuData.items[i]["imageFilenames"] = filenames>
|
||||
|
||||
<!--- Also set primary imageSrc for backwards compatibility --->
|
||||
<cfif structKeyExists(imgObj, "src")>
|
||||
<cfset menuData.items[i]["imageSrc"] = imgObj.src>
|
||||
<cfset menuData.items[i]["imageFilename"] = listLast(imgObj.src, "/\")>
|
||||
<cfelseif structKeyExists(imgObj, "large")>
|
||||
<cfset menuData.items[i]["imageSrc"] = imgObj.large>
|
||||
<cfset menuData.items[i]["imageFilename"] = listLast(imgObj.large, "/\")>
|
||||
<cfelseif structKeyExists(imgObj, "medium")>
|
||||
<cfset menuData.items[i]["imageSrc"] = imgObj.medium>
|
||||
<cfset menuData.items[i]["imageFilename"] = listLast(imgObj.medium, "/\")>
|
||||
<cfelseif structKeyExists(imgObj, "small")>
|
||||
<cfset menuData.items[i]["imageSrc"] = imgObj.small>
|
||||
<cfset menuData.items[i]["imageFilename"] = listLast(imgObj.small, "/\")>
|
||||
</cfif>
|
||||
<!--- Legacy: handle if Claude returned imageSrc directly --->
|
||||
<cfelseif structKeyExists(item, "imageSrc") AND len(trim(item.imageSrc))>
|
||||
<cfset menuData.items[i]["imageFilename"] = listLast(item.imageSrc, "/\")>
|
||||
<cfset itemsWithImages = itemsWithImages + 1>
|
||||
</cfif>
|
||||
</cfloop>
|
||||
<cfset arrayAppend(response.steps, "Found images for " & itemsWithImages & " of " & arrayLen(menuData.items) & " items")>
|
||||
|
||||
<!--- Add image URLs to response --->
|
||||
<cfset menuData["imageUrls"] = imageUrlList>
|
||||
<cfset menuData["headerCandidateIndices"] = arrayNew(1)>
|
||||
<!--- Add image mappings for local HTML uploads (filename -> alt text) --->
|
||||
<cfset menuData["imageMappings"] = imageMappings>
|
||||
|
||||
<cfset response["OK"] = true>
|
||||
<cfset response["DATA"] = menuData>
|
||||
<cfset response["sourceUrl"] = targetUrl>
|
||||
<cfset response["pagesProcessed"] = arrayLen(menuPages)>
|
||||
<cfset response["imagesFound"] = arrayLen(imageDataArray)>
|
||||
<!--- Debug: show subcategory mapping --->
|
||||
<cfset response["DEBUG_SUBCAT_MAP"] = subcatToParentMap>
|
||||
<cfset response["DEBUG_RAW_CATEGORIES"] = menuData.categories>
|
||||
|
||||
<cfcatch type="any">
|
||||
<cfset response["MESSAGE"] = cfcatch.message>
|
||||
|
|
|
|||
Reference in a new issue