528 lines
25 KiB
Text
528 lines
25 KiB
Text
<cfsetting showdebugoutput="false">
|
|
<cfsetting enablecfoutputonly="true">
|
|
<cfsetting requesttimeout="300">
|
|
<cfcontent type="application/json; charset=utf-8" reset="true">
|
|
|
|
<cfset response = structNew()>
|
|
<cfset response["OK"] = false>
|
|
|
|
<cftry>
|
|
<!--- Load API Key --->
|
|
<cfset CLAUDE_API_KEY = "">
|
|
<cfset configPath = getDirectoryFromPath(getCurrentTemplatePath()) & "../../config/claude.json">
|
|
<cfif fileExists(configPath)>
|
|
<cfset configData = deserializeJSON(fileRead(configPath))>
|
|
<cfif structKeyExists(configData, "apiKey")>
|
|
<cfset CLAUDE_API_KEY = configData.apiKey>
|
|
</cfif>
|
|
</cfif>
|
|
|
|
<cfif NOT len(CLAUDE_API_KEY)>
|
|
<cfthrow message="Claude API key not configured">
|
|
</cfif>
|
|
|
|
<!--- Get URL from request --->
|
|
<cfset requestBody = toString(getHttpRequestData().content)>
|
|
<cfif NOT len(requestBody)>
|
|
<cfthrow message="No request body provided">
|
|
</cfif>
|
|
|
|
<cfset requestData = deserializeJSON(requestBody)>
|
|
|
|
<cfset response["steps"] = arrayNew(1)>
|
|
<cfset response["debug"] = structNew()>
|
|
<cfset response["debug"]["hasHtmlKey"] = structKeyExists(requestData, "html")>
|
|
<cfset response["debug"]["hasUrlKey"] = structKeyExists(requestData, "url")>
|
|
<cfset response["debug"]["htmlLength"] = structKeyExists(requestData, "html") ? len(requestData.html) : 0>
|
|
<cfset response["debug"]["urlValue"] = structKeyExists(requestData, "url") ? requestData.url : "">
|
|
<cfset pageHtml = "">
|
|
<cfset baseUrl = "">
|
|
<cfset basePath = "">
|
|
<cfset targetUrl = "">
|
|
|
|
<!--- Check if HTML content was provided directly (uploaded file or pasted) --->
|
|
<cfif structKeyExists(requestData, "html") AND len(trim(requestData.html))>
|
|
<cfset pageHtml = trim(requestData.html)>
|
|
<cfset arrayAppend(response.steps, "Using provided HTML content: " & len(pageHtml) & " bytes")>
|
|
<!--- No base URL for local content - images won't be fetched --->
|
|
<cfset baseUrl = "">
|
|
<cfset basePath = "">
|
|
<cfelseif structKeyExists(requestData, "url") AND len(trim(requestData.url))>
|
|
<cfset targetUrl = trim(requestData.url)>
|
|
|
|
<!--- Validate URL format --->
|
|
<cfif NOT reFindNoCase("^https?://", targetUrl)>
|
|
<cfset targetUrl = "https://" & targetUrl>
|
|
</cfif>
|
|
|
|
<cfset arrayAppend(response.steps, "Fetching URL: " & targetUrl)>
|
|
|
|
<!--- Fetch the main page with browser-like headers --->
|
|
<cfhttp url="#targetUrl#" method="GET" timeout="30" result="mainPage" useragent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36">
|
|
<cfhttpparam type="header" name="Accept" value="text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8">
|
|
<cfhttpparam type="header" name="Accept-Language" value="en-US,en;q=0.9">
|
|
<cfhttpparam type="header" name="Accept-Encoding" value="gzip, deflate, br">
|
|
<cfhttpparam type="header" name="Sec-Fetch-Dest" value="document">
|
|
<cfhttpparam type="header" name="Sec-Fetch-Mode" value="navigate">
|
|
<cfhttpparam type="header" name="Sec-Fetch-Site" value="none">
|
|
<cfhttpparam type="header" name="Sec-Fetch-User" value="?1">
|
|
<cfhttpparam type="header" name="Upgrade-Insecure-Requests" value="1">
|
|
</cfhttp>
|
|
|
|
<cfif mainPage.statusCode NEQ "200 OK" AND NOT findNoCase("200", mainPage.statusCode)>
|
|
<cfthrow message="Failed to fetch URL: #mainPage.statusCode#">
|
|
</cfif>
|
|
|
|
<cfset pageHtml = mainPage.fileContent>
|
|
<cfset arrayAppend(response.steps, "Fetched " & len(pageHtml) & " bytes")>
|
|
|
|
<!--- Extract base URL for resolving relative links --->
|
|
<cfset baseUrl = reReplace(targetUrl, "(https?://[^/]+).*", "\1")>
|
|
<cfset basePath = reReplace(targetUrl, "(https?://[^/]+/[^?]*/?).*", "\1")>
|
|
<cfif NOT reFindNoCase("/$", basePath)>
|
|
<cfset basePath = reReplace(basePath, "/[^/]*$", "/")>
|
|
</cfif>
|
|
<cfelse>
|
|
<cfthrow message="Either 'url' or 'html' content is required">
|
|
</cfif>
|
|
|
|
<!--- Find menu links and fetch them too --->
|
|
<cfset menuPages = arrayNew(1)>
|
|
<cfset arrayAppend(menuPages, { url: targetUrl, html: pageHtml })>
|
|
|
|
<!--- Look for menu links in the page --->
|
|
<cfset menuLinkPatterns = 'href=["'']([^"'']*(?:menu|food|dishes|order)[^"'']*)["'']'>
|
|
<cfset menuLinks = reMatchNoCase(menuLinkPatterns, pageHtml)>
|
|
|
|
<cfloop array="#menuLinks#" index="linkMatch">
|
|
<cfset linkUrl = reReplaceNoCase(linkMatch, 'href=["'']([^"'']*)["'']', "\1")>
|
|
|
|
<!--- Resolve relative URLs --->
|
|
<cfif left(linkUrl, 1) EQ "/">
|
|
<cfset linkUrl = baseUrl & linkUrl>
|
|
<cfelseif NOT reFindNoCase("^https?://", linkUrl)>
|
|
<cfset linkUrl = basePath & linkUrl>
|
|
</cfif>
|
|
|
|
<!--- Skip if same as main page or external domain --->
|
|
<cfif linkUrl NEQ targetUrl AND findNoCase(baseUrl, linkUrl)>
|
|
<cftry>
|
|
<cfhttp url="#linkUrl#" method="GET" timeout="15" result="subPage" useragent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36">
|
|
<cfhttpparam type="header" name="Accept" value="text/html,application/xhtml+xml">
|
|
</cfhttp>
|
|
|
|
<cfif findNoCase("200", subPage.statusCode)>
|
|
<cfset arrayAppend(menuPages, { url: linkUrl, html: subPage.fileContent })>
|
|
<cfset arrayAppend(response.steps, "Found menu page: " & linkUrl)>
|
|
</cfif>
|
|
<cfcatch>
|
|
<!--- Skip failed requests --->
|
|
</cfcatch>
|
|
</cftry>
|
|
</cfif>
|
|
|
|
<!--- Limit to 5 pages max --->
|
|
<cfif arrayLen(menuPages) GTE 5>
|
|
<cfbreak>
|
|
</cfif>
|
|
</cfloop>
|
|
|
|
<!--- Extract images from all pages --->
|
|
<cfset allImages = arrayNew(1)>
|
|
<cfset imageUrls = structNew()>
|
|
<cfset imageMappings = arrayNew(1)><!--- For local HTML: filename -> alt text mappings --->
|
|
|
|
<cfloop array="#menuPages#" index="menuPage">
|
|
<!--- Find all img tags --->
|
|
<cfset imgMatches = reMatchNoCase('<img[^>]+src=["'']([^"'']+)["''][^>]*>', menuPage.html)>
|
|
|
|
<cfloop array="#imgMatches#" index="imgTag">
|
|
<cfset imgSrc = reReplaceNoCase(imgTag, '.*src=["'']([^"'']+)["''].*', "\1")>
|
|
|
|
<!--- Extract alt text for image mapping --->
|
|
<cfset imgAlt = "">
|
|
<cfif reFindNoCase('alt=["'']([^"'']+)["'']', imgTag)>
|
|
<cfset imgAlt = reReplaceNoCase(imgTag, '.*alt=["'']([^"'']+)["''].*', "\1")>
|
|
</cfif>
|
|
|
|
<!--- Extract just the filename for matching local uploads --->
|
|
<cfset imgFilename = listLast(imgSrc, "/\")>
|
|
<cfif len(imgFilename) AND len(imgAlt) AND NOT reFindNoCase("(icon|favicon|logo|sprite|pixel|tracking|badge|button)", imgSrc)>
|
|
<cfset mapping = structNew()>
|
|
<cfset mapping["filename"] = imgFilename>
|
|
<cfset mapping["alt"] = imgAlt>
|
|
<cfset mapping["src"] = imgSrc>
|
|
<cfset arrayAppend(imageMappings, mapping)>
|
|
</cfif>
|
|
|
|
<!--- Resolve relative URLs --->
|
|
<cfif left(imgSrc, 1) EQ "/">
|
|
<cfset imgSrc = baseUrl & imgSrc>
|
|
<cfelseif NOT reFindNoCase("^https?://", imgSrc) AND NOT reFindNoCase("^data:", imgSrc)>
|
|
<cfset imgSrc = basePath & imgSrc>
|
|
</cfif>
|
|
|
|
<!--- Skip data URLs, icons, and already-processed images --->
|
|
<cfif reFindNoCase("^https?://", imgSrc) AND NOT structKeyExists(imageUrls, imgSrc)>
|
|
<!--- Skip common icon/logo patterns that are too small --->
|
|
<cfif NOT reFindNoCase("(icon|favicon|logo|sprite|pixel|tracking|badge|button)", imgSrc)>
|
|
<cfset imageUrls[imgSrc] = true>
|
|
</cfif>
|
|
</cfif>
|
|
</cfloop>
|
|
</cfloop>
|
|
|
|
<cfset arrayAppend(response.steps, "Found #structCount(imageUrls)# unique images")>
|
|
|
|
<!--- Download images (limit to 20) --->
|
|
<cfset imageDataArray = arrayNew(1)>
|
|
<cfset downloadedCount = 0>
|
|
|
|
<cfloop collection="#imageUrls#" item="imgUrl">
|
|
<cfif downloadedCount GTE 20>
|
|
<cfbreak>
|
|
</cfif>
|
|
|
|
<cftry>
|
|
<cfhttp url="#imgUrl#" method="GET" timeout="10" result="imgResult" getasbinary="yes">
|
|
</cfhttp>
|
|
|
|
<cfif findNoCase("200", imgResult.statusCode) AND isBinary(imgResult.fileContent)>
|
|
<!--- Check content type --->
|
|
<cfset contentType = structKeyExists(imgResult.responseHeader, "Content-Type") ? imgResult.responseHeader["Content-Type"] : "">
|
|
|
|
<cfif reFindNoCase("image/(jpeg|jpg|png|gif|webp)", contentType)>
|
|
<!--- Check image size (skip tiny images) --->
|
|
<cfset imgBytes = len(imgResult.fileContent)>
|
|
|
|
<cfif imgBytes GT 5000>
|
|
<cfset base64Content = toBase64(imgResult.fileContent)>
|
|
|
|
<cfset mediaType = "image/jpeg">
|
|
<cfif findNoCase("png", contentType)><cfset mediaType = "image/png"></cfif>
|
|
<cfif findNoCase("gif", contentType)><cfset mediaType = "image/gif"></cfif>
|
|
<cfif findNoCase("webp", contentType)><cfset mediaType = "image/webp"></cfif>
|
|
|
|
<cfset imgSource = structNew()>
|
|
<cfset imgSource["type"] = "base64">
|
|
<cfset imgSource["media_type"] = mediaType>
|
|
<cfset imgSource["data"] = base64Content>
|
|
|
|
<cfset imgStruct = structNew()>
|
|
<cfset imgStruct["type"] = "image">
|
|
<cfset imgStruct["source"] = imgSource>
|
|
<cfset imgStruct["url"] = imgUrl>
|
|
|
|
<cfset arrayAppend(imageDataArray, imgStruct)>
|
|
<cfset downloadedCount = downloadedCount + 1>
|
|
</cfif>
|
|
</cfif>
|
|
</cfif>
|
|
<cfcatch>
|
|
<!--- Skip failed downloads --->
|
|
</cfcatch>
|
|
</cftry>
|
|
</cfloop>
|
|
|
|
<cfset arrayAppend(response.steps, "Downloaded #arrayLen(imageDataArray)# valid images")>
|
|
|
|
<!--- Combine all page HTML into one text block --->
|
|
<cfset combinedHtml = "">
|
|
<cfloop array="#menuPages#" index="menuPage">
|
|
<!--- Strip scripts, styles, and extract text content --->
|
|
<cfset cleanHtml = menuPage.html>
|
|
<cfset cleanHtml = reReplaceNoCase(cleanHtml, "<script[^>]*>.*?</script>", "", "all")>
|
|
<cfset cleanHtml = reReplaceNoCase(cleanHtml, "<style[^>]*>.*?</style>", "", "all")>
|
|
<cfset cleanHtml = reReplaceNoCase(cleanHtml, "<!--.*?-->", "", "all")>
|
|
<cfset combinedHtml = combinedHtml & chr(10) & "--- PAGE: " & menuPage.url & " ---" & chr(10) & cleanHtml>
|
|
</cfloop>
|
|
|
|
<!--- Limit HTML size for Claude --->
|
|
<cfif len(combinedHtml) GT 100000>
|
|
<cfset combinedHtml = left(combinedHtml, 100000)>
|
|
</cfif>
|
|
|
|
<!--- Debug: extract all h3 tags from HTML on server side --->
|
|
<cfset h3Tags = reMatchNoCase("<h3[^>]*>([^<]*)</h3>", combinedHtml)>
|
|
<cfset h3Texts = arrayNew(1)>
|
|
<cfloop array="#h3Tags#" index="h3Tag">
|
|
<cfset h3Text = reReplaceNoCase(h3Tag, "<h3[^>]*>([^<]*)</h3>", "\1")>
|
|
<cfset h3Text = trim(h3Text)>
|
|
<cfif len(h3Text)>
|
|
<cfset arrayAppend(h3Texts, h3Text)>
|
|
</cfif>
|
|
</cfloop>
|
|
<cfset response["DEBUG_H3_TAGS_FOUND"] = h3Texts>
|
|
<cfset arrayAppend(response.steps, "Found " & arrayLen(h3Texts) & " h3 tags in HTML")>
|
|
|
|
<!--- System prompt for URL analysis --->
|
|
<cfset systemPrompt = "You are an expert at extracting structured menu data from restaurant website HTML. Extract ALL menu data visible in the HTML. Return valid JSON with these keys: business (object with name, address, phone, hours, brandColor), categories (array of category objects - each with name and optional subcategories array), modifiers (array of modifier templates with name, required boolean, appliesTo, categoryName if applicable, and options array), items (array with name, description, price, category, subcategory if applicable, modifiers array, and images object with all image URLs found). CRITICAL: Extract EVERY menu item visible in the HTML - do not skip any items. For categories: Look for h3 headers inside menu sections - these are subcategories. Group them under parent categories. Format: [{""name"":""Beverages"",""subcategories"":[{""name"":""Beer""},{""name"":""Wine""}]},{""name"":""Food""}]. For images: Extract ALL image URLs for each item as an object with keys like src, srcset, small, medium, large. Parse srcset attributes. For brandColor: suggest a vibrant hex color (6 digits, no hash symbol) based on the restaurant style. For hours: format as ""Mon-Fri 10:30am-10pm, Sat 11am-10pm, Sun 11am-9pm"". Include ALL days visible. For prices: extract as numbers (e.g., 12.99). For modifier options: use format {""name"": ""option"", ""price"": 0}. Return ONLY valid JSON, no markdown, no explanation.">
|
|
|
|
<!--- Build message content --->
|
|
<cfset messagesContent = arrayNew(1)>
|
|
|
|
<!--- Add images first (up to 10 for analysis) --->
|
|
<cfset imgLimit = min(arrayLen(imageDataArray), 10)>
|
|
<cfloop from="1" to="#imgLimit#" index="i">
|
|
<cfset imgData = imageDataArray[i]>
|
|
<cfset imgContent = structNew()>
|
|
<cfset imgContent["type"] = "image">
|
|
<cfset imgContent["source"] = imgData.source>
|
|
<cfset arrayAppend(messagesContent, imgContent)>
|
|
</cfloop>
|
|
|
|
<!--- Add HTML text --->
|
|
<cfset textBlock = structNew()>
|
|
<cfset textBlock["type"] = "text">
|
|
<cfset textBlock["text"] = "Extract menu data from this restaurant website HTML. The images above are from the same website - identify which ones are food photos that could be used as item images, and which could be header/banner images. Here is the HTML content:" & chr(10) & chr(10) & combinedHtml>
|
|
<cfset arrayAppend(messagesContent, textBlock)>
|
|
|
|
<cfset userMessage = structNew()>
|
|
<cfset userMessage["role"] = "user">
|
|
<cfset userMessage["content"] = messagesContent>
|
|
|
|
<cfset requestBody = structNew()>
|
|
<cfset requestBody["model"] = "claude-sonnet-4-20250514">
|
|
<cfset requestBody["max_tokens"] = 8192>
|
|
<cfset requestBody["temperature"] = 0>
|
|
<cfset requestBody["system"] = systemPrompt>
|
|
<cfset requestBody["messages"] = arrayNew(1)>
|
|
<cfset arrayAppend(requestBody["messages"], userMessage)>
|
|
|
|
<cfset arrayAppend(response.steps, "Sending to Claude API...")>
|
|
|
|
<!--- Call Claude API --->
|
|
<cfhttp url="https://api.anthropic.com/v1/messages" method="POST" timeout="120" result="httpResult">
|
|
<cfhttpparam type="header" name="Content-Type" value="application/json">
|
|
<cfhttpparam type="header" name="x-api-key" value="#CLAUDE_API_KEY#">
|
|
<cfhttpparam type="header" name="anthropic-version" value="2023-06-01">
|
|
<cfhttpparam type="body" value="#serializeJSON(requestBody)#">
|
|
</cfhttp>
|
|
|
|
<cfset httpStatusCode = httpResult.statusCode>
|
|
<cfif isNumeric(httpStatusCode)>
|
|
<cfset httpStatusCode = int(httpStatusCode)>
|
|
<cfelseif findNoCase("200", httpStatusCode)>
|
|
<cfset httpStatusCode = 200>
|
|
<cfelse>
|
|
<cfset httpStatusCode = 0>
|
|
</cfif>
|
|
|
|
<cfif httpStatusCode NEQ 200>
|
|
<cfset errorDetail = "">
|
|
<cftry>
|
|
<cfset errorResponse = deserializeJSON(httpResult.fileContent)>
|
|
<cfif structKeyExists(errorResponse, "error") AND structKeyExists(errorResponse.error, "message")>
|
|
<cfset errorDetail = errorResponse.error.message>
|
|
<cfelse>
|
|
<cfset errorDetail = httpResult.fileContent>
|
|
</cfif>
|
|
<cfcatch>
|
|
<cfset errorDetail = httpResult.fileContent>
|
|
</cfcatch>
|
|
</cftry>
|
|
<cfthrow message="Claude API error: #httpResult.statusCode# - #errorDetail#">
|
|
</cfif>
|
|
|
|
<!--- Parse response --->
|
|
<cfset claudeResponse = deserializeJSON(httpResult.fileContent)>
|
|
<cfif NOT structKeyExists(claudeResponse, "content") OR NOT arrayLen(claudeResponse.content)>
|
|
<cfthrow message="Empty response from Claude">
|
|
</cfif>
|
|
|
|
<cfset responseText = "">
|
|
<cfloop array="#claudeResponse.content#" index="block">
|
|
<cfif structKeyExists(block, "type") AND block.type EQ "text">
|
|
<cfset responseText = block.text>
|
|
<cfbreak>
|
|
</cfif>
|
|
</cfloop>
|
|
|
|
<!--- Clean up JSON response --->
|
|
<cfset responseText = trim(responseText)>
|
|
<cfif left(responseText, 7) EQ "```json">
|
|
<cfset responseText = mid(responseText, 8, len(responseText) - 7)>
|
|
</cfif>
|
|
<cfif left(responseText, 3) EQ "```">
|
|
<cfset responseText = mid(responseText, 4, len(responseText) - 3)>
|
|
</cfif>
|
|
<cfif right(responseText, 3) EQ "```">
|
|
<cfset responseText = left(responseText, len(responseText) - 3)>
|
|
</cfif>
|
|
<cfset responseText = trim(responseText)>
|
|
<!--- Remove trailing commas before ] or } --->
|
|
<cfset responseText = reReplace(responseText, ",(\s*[\]\}])", "\1", "all")>
|
|
<!--- Remove control characters that break JSON --->
|
|
<cfset responseText = reReplace(responseText, "[\x00-\x1F]", " ", "all")>
|
|
|
|
<!--- Try to parse JSON with error handling --->
|
|
<cftry>
|
|
<cfset menuData = deserializeJSON(responseText)>
|
|
<cfcatch type="any">
|
|
<!--- JSON parsing failed - try to extract what we can --->
|
|
<!--- Return the raw response for debugging --->
|
|
<cfset response["success"] = false>
|
|
<cfset response["error"] = "JSON parse error: #cfcatch.message#">
|
|
<cfset response["DEBUG_RAW_RESPONSE"] = left(responseText, 2000)>
|
|
<cfset response["DEBUG_RESPONSE_LENGTH"] = len(responseText)>
|
|
<cfcontent type="application/json" reset="true">
|
|
<cfoutput>#serializeJSON(response)#</cfoutput>
|
|
<cfabort>
|
|
</cfcatch>
|
|
</cftry>
|
|
|
|
<!--- Debug: save raw Claude response before processing --->
|
|
<cfset response["DEBUG_RAW_CLAUDE"] = responseText>
|
|
|
|
<!--- Build image URL list for the wizard to use --->
|
|
<cfset imageUrlList = arrayNew(1)>
|
|
<cfloop array="#imageDataArray#" index="imgData">
|
|
<cfif structKeyExists(imgData, "url")>
|
|
<cfset arrayAppend(imageUrlList, imgData.url)>
|
|
</cfif>
|
|
</cfloop>
|
|
|
|
<!--- Ensure expected structure --->
|
|
<cfif NOT structKeyExists(menuData, "business")>
|
|
<cfset menuData["business"] = structNew()>
|
|
</cfif>
|
|
<cfif NOT structKeyExists(menuData, "categories")>
|
|
<cfset menuData["categories"] = arrayNew(1)>
|
|
</cfif>
|
|
<cfif NOT structKeyExists(menuData, "modifiers")>
|
|
<cfset menuData["modifiers"] = arrayNew(1)>
|
|
</cfif>
|
|
<cfif NOT structKeyExists(menuData, "items")>
|
|
<cfset menuData["items"] = arrayNew(1)>
|
|
</cfif>
|
|
|
|
<!--- Convert categories to expected format - flatten subcategories into parent --->
|
|
<cfset formattedCategories = arrayNew(1)>
|
|
<cfset subcatToParentMap = structNew()><!--- Map subcategory names to parent category names --->
|
|
<cfloop array="#menuData.categories#" index="cat">
|
|
<cfif isSimpleValue(cat)>
|
|
<cfset catObj = structNew()>
|
|
<cfset catObj["name"] = cat>
|
|
<cfset catObj["itemCount"] = 0>
|
|
<cfset arrayAppend(formattedCategories, catObj)>
|
|
<cfelseif isStruct(cat)>
|
|
<!--- Add only the parent category --->
|
|
<cfset parentName = structKeyExists(cat, "name") ? cat.name : "">
|
|
<cfif len(parentName)>
|
|
<cfset catObj = structNew()>
|
|
<cfset catObj["name"] = parentName>
|
|
<cfset catObj["itemCount"] = 0>
|
|
<cfset arrayAppend(formattedCategories, catObj)>
|
|
<!--- Build map of subcategory names -> parent name for item reassignment --->
|
|
<cfif structKeyExists(cat, "subcategories") AND isArray(cat.subcategories)>
|
|
<cfloop array="#cat.subcategories#" index="subcat">
|
|
<cfset subcatName = "">
|
|
<cfif isSimpleValue(subcat)>
|
|
<cfset subcatName = subcat>
|
|
<cfelseif isStruct(subcat) AND structKeyExists(subcat, "name")>
|
|
<cfset subcatName = subcat.name>
|
|
</cfif>
|
|
<cfif len(subcatName)>
|
|
<cfset subcatToParentMap[lcase(subcatName)] = parentName>
|
|
</cfif>
|
|
</cfloop>
|
|
</cfif>
|
|
</cfif>
|
|
</cfif>
|
|
</cfloop>
|
|
<cfset menuData["categories"] = formattedCategories>
|
|
|
|
<!--- Reassign items in subcategories to their parent category --->
|
|
<cfloop from="1" to="#arrayLen(menuData.items)#" index="i">
|
|
<cfset item = menuData.items[i]>
|
|
<!--- Check if item's category is actually a subcategory --->
|
|
<cfif structKeyExists(item, "category") AND len(item.category)>
|
|
<cfset catKey = lcase(item.category)>
|
|
<cfif structKeyExists(subcatToParentMap, catKey)>
|
|
<cfset menuData.items[i]["category"] = subcatToParentMap[catKey]>
|
|
</cfif>
|
|
</cfif>
|
|
<!--- Also check subcategory field if present --->
|
|
<cfif structKeyExists(item, "subcategory") AND len(item.subcategory)>
|
|
<cfset subcatKey = lcase(item.subcategory)>
|
|
<cfif structKeyExists(subcatToParentMap, subcatKey)>
|
|
<cfset menuData.items[i]["category"] = subcatToParentMap[subcatKey]>
|
|
</cfif>
|
|
</cfif>
|
|
</cfloop>
|
|
|
|
<!--- Add item IDs --->
|
|
<cfloop from="1" to="#arrayLen(menuData.items)#" index="i">
|
|
<cfset menuData.items[i]["id"] = "item_" & i>
|
|
</cfloop>
|
|
|
|
<!--- Process item images - extract filenames from images object that Claude identified from HTML --->
|
|
<cfset itemsWithImages = 0>
|
|
<cfloop from="1" to="#arrayLen(menuData.items)#" index="i">
|
|
<cfset item = menuData.items[i]>
|
|
|
|
<!--- Check if Claude found images object with URLs from HTML --->
|
|
<cfif structKeyExists(item, "images") AND isStruct(item.images)>
|
|
<cfset imgObj = item.images>
|
|
<cfset itemsWithImages = itemsWithImages + 1>
|
|
|
|
<!--- Extract filenames for each image size --->
|
|
<cfset filenames = structNew()>
|
|
<cfloop collection="#imgObj#" item="sizeKey">
|
|
<cfset imgUrl = imgObj[sizeKey]>
|
|
<cfif isSimpleValue(imgUrl) AND len(trim(imgUrl))>
|
|
<cfset filenames[sizeKey] = listLast(imgUrl, "/\")>
|
|
</cfif>
|
|
</cfloop>
|
|
<cfset menuData.items[i]["imageFilenames"] = filenames>
|
|
|
|
<!--- Also set primary imageSrc for backwards compatibility --->
|
|
<cfif structKeyExists(imgObj, "src")>
|
|
<cfset menuData.items[i]["imageSrc"] = imgObj.src>
|
|
<cfset menuData.items[i]["imageFilename"] = listLast(imgObj.src, "/\")>
|
|
<cfelseif structKeyExists(imgObj, "large")>
|
|
<cfset menuData.items[i]["imageSrc"] = imgObj.large>
|
|
<cfset menuData.items[i]["imageFilename"] = listLast(imgObj.large, "/\")>
|
|
<cfelseif structKeyExists(imgObj, "medium")>
|
|
<cfset menuData.items[i]["imageSrc"] = imgObj.medium>
|
|
<cfset menuData.items[i]["imageFilename"] = listLast(imgObj.medium, "/\")>
|
|
<cfelseif structKeyExists(imgObj, "small")>
|
|
<cfset menuData.items[i]["imageSrc"] = imgObj.small>
|
|
<cfset menuData.items[i]["imageFilename"] = listLast(imgObj.small, "/\")>
|
|
</cfif>
|
|
<!--- Legacy: handle if Claude returned imageSrc directly --->
|
|
<cfelseif structKeyExists(item, "imageSrc") AND len(trim(item.imageSrc))>
|
|
<cfset menuData.items[i]["imageFilename"] = listLast(item.imageSrc, "/\")>
|
|
<cfset itemsWithImages = itemsWithImages + 1>
|
|
</cfif>
|
|
</cfloop>
|
|
<cfset arrayAppend(response.steps, "Found images for " & itemsWithImages & " of " & arrayLen(menuData.items) & " items")>
|
|
|
|
<!--- Add image URLs to response --->
|
|
<cfset menuData["imageUrls"] = imageUrlList>
|
|
<cfset menuData["headerCandidateIndices"] = arrayNew(1)>
|
|
<!--- Add image mappings for local HTML uploads (filename -> alt text) --->
|
|
<cfset menuData["imageMappings"] = imageMappings>
|
|
|
|
<cfset response["OK"] = true>
|
|
<cfset response["DATA"] = menuData>
|
|
<cfset response["sourceUrl"] = targetUrl>
|
|
<cfset response["pagesProcessed"] = arrayLen(menuPages)>
|
|
<cfset response["imagesFound"] = arrayLen(imageDataArray)>
|
|
<!--- Debug: show subcategory mapping --->
|
|
<cfset response["DEBUG_SUBCAT_MAP"] = subcatToParentMap>
|
|
<cfset response["DEBUG_RAW_CATEGORIES"] = menuData.categories>
|
|
|
|
<cfcatch type="any">
|
|
<cfset response["MESSAGE"] = cfcatch.message>
|
|
<cfif len(cfcatch.detail)>
|
|
<cfset response["DETAIL"] = cfcatch.detail>
|
|
</cfif>
|
|
<cfif structKeyExists(cfcatch, "tagContext") AND arrayLen(cfcatch.tagContext) GT 0>
|
|
<cfset response["DEBUG_LINE"] = cfcatch.tagContext[1].line>
|
|
<cfset response["DEBUG_TEMPLATE"] = cfcatch.tagContext[1].template>
|
|
</cfif>
|
|
</cfcatch>
|
|
</cftry>
|
|
|
|
<cfoutput>#serializeJSON(response)#</cfoutput>
|