547 lines
26 KiB
Text
547 lines
26 KiB
Text
<cfsetting showdebugoutput="false">
|
|
<cfsetting enablecfoutputonly="true">
|
|
<cfsetting requesttimeout="300">
|
|
<cfcontent type="application/json; charset=utf-8" reset="true">
|
|
|
|
<cfset response = structNew()>
|
|
<cfset response["OK"] = false>
|
|
|
|
<cftry>
|
|
<!--- Load API Key --->
|
|
<cfset CLAUDE_API_KEY = "">
|
|
<cfset configPath = getDirectoryFromPath(getCurrentTemplatePath()) & "../../config/claude.json">
|
|
<cfif fileExists(configPath)>
|
|
<cfset configData = deserializeJSON(fileRead(configPath))>
|
|
<cfif structKeyExists(configData, "apiKey")>
|
|
<cfset CLAUDE_API_KEY = configData.apiKey>
|
|
</cfif>
|
|
</cfif>
|
|
|
|
<cfif NOT len(CLAUDE_API_KEY)>
|
|
<cfthrow message="Claude API key not configured">
|
|
</cfif>
|
|
|
|
<!--- Get URL from request --->
|
|
<cfset requestBody = toString(getHttpRequestData().content)>
|
|
<cfif NOT len(requestBody)>
|
|
<cfthrow message="No request body provided">
|
|
</cfif>
|
|
|
|
<cfset requestData = deserializeJSON(requestBody)>
|
|
|
|
<cfset response["steps"] = arrayNew(1)>
|
|
<cfset response["debug"] = structNew()>
|
|
<cfset response["debug"]["hasHtmlKey"] = structKeyExists(requestData, "html")>
|
|
<cfset response["debug"]["hasUrlKey"] = structKeyExists(requestData, "url")>
|
|
<cfset response["debug"]["htmlLength"] = structKeyExists(requestData, "html") ? len(requestData.html) : 0>
|
|
<cfset response["debug"]["urlValue"] = structKeyExists(requestData, "url") ? requestData.url : "">
|
|
<cfset pageHtml = "">
|
|
<cfset baseUrl = "">
|
|
<cfset basePath = "">
|
|
<cfset targetUrl = "">
|
|
|
|
<!--- Check if HTML content was provided directly (uploaded file or pasted) --->
|
|
<cfif structKeyExists(requestData, "html") AND len(trim(requestData.html))>
|
|
<cfset pageHtml = trim(requestData.html)>
|
|
<cfset arrayAppend(response.steps, "Using provided HTML content: " & len(pageHtml) & " bytes")>
|
|
<!--- No base URL for local content - images won't be fetched --->
|
|
<cfset baseUrl = "">
|
|
<cfset basePath = "">
|
|
<cfelseif structKeyExists(requestData, "url") AND len(trim(requestData.url))>
|
|
<cfset targetUrl = trim(requestData.url)>
|
|
|
|
<!--- Validate URL format --->
|
|
<cfif NOT reFindNoCase("^https?://", targetUrl)>
|
|
<cfset targetUrl = "https://" & targetUrl>
|
|
</cfif>
|
|
|
|
<cfset arrayAppend(response.steps, "Fetching URL: " & targetUrl)>
|
|
|
|
<!--- Fetch the main page with browser-like headers --->
|
|
<cfhttp url="#targetUrl#" method="GET" timeout="30" result="mainPage" useragent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36">
|
|
<cfhttpparam type="header" name="Accept" value="text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8">
|
|
<cfhttpparam type="header" name="Accept-Language" value="en-US,en;q=0.9">
|
|
<cfhttpparam type="header" name="Accept-Encoding" value="gzip, deflate, br">
|
|
<cfhttpparam type="header" name="Sec-Fetch-Dest" value="document">
|
|
<cfhttpparam type="header" name="Sec-Fetch-Mode" value="navigate">
|
|
<cfhttpparam type="header" name="Sec-Fetch-Site" value="none">
|
|
<cfhttpparam type="header" name="Sec-Fetch-User" value="?1">
|
|
<cfhttpparam type="header" name="Upgrade-Insecure-Requests" value="1">
|
|
</cfhttp>
|
|
|
|
<cfif mainPage.statusCode NEQ "200 OK" AND NOT findNoCase("200", mainPage.statusCode)>
|
|
<cfthrow message="Failed to fetch URL: #mainPage.statusCode#">
|
|
</cfif>
|
|
|
|
<cfset pageHtml = mainPage.fileContent>
|
|
<cfset arrayAppend(response.steps, "Fetched " & len(pageHtml) & " bytes")>
|
|
|
|
<!--- Extract base URL for resolving relative links --->
|
|
<cfset baseUrl = reReplace(targetUrl, "(https?://[^/]+).*", "\1")>
|
|
<cfset basePath = reReplace(targetUrl, "(https?://[^/]+/[^?]*/?).*", "\1")>
|
|
<cfif NOT reFindNoCase("/$", basePath)>
|
|
<cfset basePath = reReplace(basePath, "/[^/]*$", "/")>
|
|
</cfif>
|
|
<cfelse>
|
|
<cfthrow message="Either 'url' or 'html' content is required">
|
|
</cfif>
|
|
|
|
<!--- Find menu links and fetch them too --->
|
|
<cfset menuPages = arrayNew(1)>
|
|
<cfset arrayAppend(menuPages, { url: targetUrl, html: pageHtml })>
|
|
|
|
<!--- Look for menu links in the page --->
|
|
<cfset menuLinkPatterns = 'href=["'']([^"'']*(?:menu|food|dishes|order)[^"'']*)["'']'>
|
|
<cfset menuLinks = reMatchNoCase(menuLinkPatterns, pageHtml)>
|
|
|
|
<cfloop array="#menuLinks#" index="linkMatch">
|
|
<cfset linkUrl = reReplaceNoCase(linkMatch, 'href=["'']([^"'']*)["'']', "\1")>
|
|
|
|
<!--- Resolve relative URLs --->
|
|
<cfif left(linkUrl, 1) EQ "/">
|
|
<cfset linkUrl = baseUrl & linkUrl>
|
|
<cfelseif NOT reFindNoCase("^https?://", linkUrl)>
|
|
<cfset linkUrl = basePath & linkUrl>
|
|
</cfif>
|
|
|
|
<!--- Skip if same as main page or external domain --->
|
|
<cfif linkUrl NEQ targetUrl AND findNoCase(baseUrl, linkUrl)>
|
|
<cftry>
|
|
<cfhttp url="#linkUrl#" method="GET" timeout="15" result="subPage" useragent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36">
|
|
<cfhttpparam type="header" name="Accept" value="text/html,application/xhtml+xml">
|
|
</cfhttp>
|
|
|
|
<cfif findNoCase("200", subPage.statusCode)>
|
|
<cfset arrayAppend(menuPages, { url: linkUrl, html: subPage.fileContent })>
|
|
<cfset arrayAppend(response.steps, "Found menu page: " & linkUrl)>
|
|
</cfif>
|
|
<cfcatch>
|
|
<!--- Skip failed requests --->
|
|
</cfcatch>
|
|
</cftry>
|
|
</cfif>
|
|
|
|
<!--- Limit to 5 pages max --->
|
|
<cfif arrayLen(menuPages) GTE 5>
|
|
<cfbreak>
|
|
</cfif>
|
|
</cfloop>
|
|
|
|
<!--- Extract images from all pages --->
|
|
<cfset allImages = arrayNew(1)>
|
|
<cfset imageUrls = structNew()>
|
|
<cfset imageMappings = arrayNew(1)><!--- For local HTML: filename -> alt text mappings --->
|
|
|
|
<cfloop array="#menuPages#" index="menuPage">
|
|
<!--- Find all img tags --->
|
|
<cfset imgMatches = reMatchNoCase('<img[^>]+src=["'']([^"'']+)["''][^>]*>', menuPage.html)>
|
|
|
|
<cfloop array="#imgMatches#" index="imgTag">
|
|
<cfset imgSrc = reReplaceNoCase(imgTag, '.*src=["'']([^"'']+)["''].*', "\1")>
|
|
|
|
<!--- Extract alt text for image mapping --->
|
|
<cfset imgAlt = "">
|
|
<cfif reFindNoCase('alt=["'']([^"'']+)["'']', imgTag)>
|
|
<cfset imgAlt = reReplaceNoCase(imgTag, '.*alt=["'']([^"'']+)["''].*', "\1")>
|
|
</cfif>
|
|
|
|
<!--- Extract just the filename for matching local uploads --->
|
|
<cfset imgFilename = listLast(imgSrc, "/\")>
|
|
<cfif len(imgFilename) AND len(imgAlt) AND NOT reFindNoCase("(icon|favicon|logo|sprite|pixel|tracking|badge|button)", imgSrc)>
|
|
<cfset mapping = structNew()>
|
|
<cfset mapping["filename"] = imgFilename>
|
|
<cfset mapping["alt"] = imgAlt>
|
|
<cfset mapping["src"] = imgSrc>
|
|
<cfset arrayAppend(imageMappings, mapping)>
|
|
</cfif>
|
|
|
|
<!--- Resolve relative URLs --->
|
|
<cfif left(imgSrc, 1) EQ "/">
|
|
<cfset imgSrc = baseUrl & imgSrc>
|
|
<cfelseif NOT reFindNoCase("^https?://", imgSrc) AND NOT reFindNoCase("^data:", imgSrc)>
|
|
<cfset imgSrc = basePath & imgSrc>
|
|
</cfif>
|
|
|
|
<!--- Skip data URLs, icons, and already-processed images --->
|
|
<cfif reFindNoCase("^https?://", imgSrc) AND NOT structKeyExists(imageUrls, imgSrc)>
|
|
<!--- Skip common icon/logo patterns that are too small --->
|
|
<cfif NOT reFindNoCase("(icon|favicon|logo|sprite|pixel|tracking|badge|button)", imgSrc)>
|
|
<cfset imageUrls[imgSrc] = true>
|
|
</cfif>
|
|
</cfif>
|
|
</cfloop>
|
|
</cfloop>
|
|
|
|
<cfset arrayAppend(response.steps, "Found #structCount(imageUrls)# unique images")>
|
|
|
|
<!--- Download images (limit to 20) --->
|
|
<cfset imageDataArray = arrayNew(1)>
|
|
<cfset downloadedCount = 0>
|
|
|
|
<cfloop collection="#imageUrls#" item="imgUrl">
|
|
<cfif downloadedCount GTE 20>
|
|
<cfbreak>
|
|
</cfif>
|
|
|
|
<cftry>
|
|
<cfhttp url="#imgUrl#" method="GET" timeout="10" result="imgResult" getasbinary="yes">
|
|
</cfhttp>
|
|
|
|
<cfif findNoCase("200", imgResult.statusCode) AND isBinary(imgResult.fileContent)>
|
|
<!--- Check content type --->
|
|
<cfset contentType = structKeyExists(imgResult.responseHeader, "Content-Type") ? imgResult.responseHeader["Content-Type"] : "">
|
|
|
|
<cfif reFindNoCase("image/(jpeg|jpg|png|gif|webp)", contentType)>
|
|
<!--- Check image size (skip tiny images) --->
|
|
<cfset imgBytes = len(imgResult.fileContent)>
|
|
|
|
<cfif imgBytes GT 5000>
|
|
<cfset base64Content = toBase64(imgResult.fileContent)>
|
|
|
|
<cfset mediaType = "image/jpeg">
|
|
<cfif findNoCase("png", contentType)><cfset mediaType = "image/png"></cfif>
|
|
<cfif findNoCase("gif", contentType)><cfset mediaType = "image/gif"></cfif>
|
|
<cfif findNoCase("webp", contentType)><cfset mediaType = "image/webp"></cfif>
|
|
|
|
<cfset imgSource = structNew()>
|
|
<cfset imgSource["type"] = "base64">
|
|
<cfset imgSource["media_type"] = mediaType>
|
|
<cfset imgSource["data"] = base64Content>
|
|
|
|
<cfset imgStruct = structNew()>
|
|
<cfset imgStruct["type"] = "image">
|
|
<cfset imgStruct["source"] = imgSource>
|
|
<cfset imgStruct["url"] = imgUrl>
|
|
|
|
<cfset arrayAppend(imageDataArray, imgStruct)>
|
|
<cfset downloadedCount = downloadedCount + 1>
|
|
</cfif>
|
|
</cfif>
|
|
</cfif>
|
|
<cfcatch>
|
|
<!--- Skip failed downloads --->
|
|
</cfcatch>
|
|
</cftry>
|
|
</cfloop>
|
|
|
|
<cfset arrayAppend(response.steps, "Downloaded #arrayLen(imageDataArray)# valid images")>
|
|
|
|
<!--- Combine all page HTML into one text block --->
|
|
<cfset combinedHtml = "">
|
|
<cfloop array="#menuPages#" index="menuPage">
|
|
<!--- Strip scripts, styles, and extract text content --->
|
|
<cfset cleanHtml = menuPage.html>
|
|
<cfset cleanHtml = reReplaceNoCase(cleanHtml, "<script[^>]*>.*?</script>", "", "all")>
|
|
<cfset cleanHtml = reReplaceNoCase(cleanHtml, "<style[^>]*>.*?</style>", "", "all")>
|
|
<cfset cleanHtml = reReplaceNoCase(cleanHtml, "<!--.*?-->", "", "all")>
|
|
<cfset combinedHtml = combinedHtml & chr(10) & "--- PAGE: " & menuPage.url & " ---" & chr(10) & cleanHtml>
|
|
</cfloop>
|
|
|
|
<!--- Limit HTML size for Claude --->
|
|
<cfif len(combinedHtml) GT 100000>
|
|
<cfset combinedHtml = left(combinedHtml, 100000)>
|
|
</cfif>
|
|
|
|
<!--- Debug: extract h3 and h4 tags from HTML on server side --->
|
|
<cfset h3Tags = reMatchNoCase("<h3[^>]*>([^<]*)</h3>", combinedHtml)>
|
|
<cfset h3Texts = arrayNew(1)>
|
|
<cfloop array="#h3Tags#" index="h3Tag">
|
|
<cfset h3Text = reReplaceNoCase(h3Tag, "<h3[^>]*>([^<]*)</h3>", "\1")>
|
|
<cfset h3Text = trim(h3Text)>
|
|
<cfif len(h3Text)>
|
|
<cfset arrayAppend(h3Texts, h3Text)>
|
|
</cfif>
|
|
</cfloop>
|
|
<cfset response["DEBUG_H3_TAGS"] = h3Texts>
|
|
|
|
<cfset h4Tags = reMatchNoCase("<h4[^>]*>([^<]*)</h4>", combinedHtml)>
|
|
<cfset h4Texts = arrayNew(1)>
|
|
<cfloop array="#h4Tags#" index="h4Tag">
|
|
<cfset h4Text = reReplaceNoCase(h4Tag, "<h4[^>]*>([^<]*)</h4>", "\1")>
|
|
<cfset h4Text = trim(h4Text)>
|
|
<cfif len(h4Text)>
|
|
<cfset arrayAppend(h4Texts, h4Text)>
|
|
</cfif>
|
|
</cfloop>
|
|
<cfset response["DEBUG_H4_TAGS"] = h4Texts>
|
|
|
|
<!--- Debug: show raw HTML snippet around first h3 tag --->
|
|
<cfset h3Pos = findNoCase("<h3", combinedHtml)>
|
|
<cfif h3Pos GT 0>
|
|
<cfset snippetStart = max(1, h3Pos - 200)>
|
|
<cfset snippetEnd = min(len(combinedHtml), h3Pos + 500)>
|
|
<cfset response["DEBUG_HTML_SNIPPET"] = mid(combinedHtml, snippetStart, snippetEnd - snippetStart)>
|
|
</cfif>
|
|
<cfset arrayAppend(response.steps, "Found " & arrayLen(h3Texts) & " h3 and " & arrayLen(h4Texts) & " h4 tags")>
|
|
|
|
<!--- System prompt for URL analysis --->
|
|
<cfset systemPrompt = "You are an expert at extracting structured menu data from restaurant website HTML. Extract ALL menu data visible in the HTML. Return valid JSON with these keys: business (object with name, address, phone, hours, brandColor), categories (array of category names), modifiers (array), items (array with name, description, price, category, subcategory, modifiers array, and imageUrl if found). CRITICAL: Extract EVERY menu item. SUBCATEGORY RULE: If a section header (like h3) has NO menu items directly below it, but contains NESTED sections (each with their own h3 and items), then: the outer section is the PARENT CATEGORY, the inner sections are SUBCATEGORIES. For items in subcategories, set category to the PARENT name and subcategory to the inner section name. Example: outer h3 says 'Drinks', inner h3s say 'Beer' and 'Wine' with items under them - those items should have category='Drinks' and subcategory='Beer' or 'Wine'. For brandColor: suggest a vibrant hex (6 digits, no hash). For prices: numbers (e.g., 12.99). Return ONLY valid JSON.">
|
|
|
|
<!--- Build message content --->
|
|
<cfset messagesContent = arrayNew(1)>
|
|
|
|
<!--- Add images first (up to 10 for analysis) --->
|
|
<cfset imgLimit = min(arrayLen(imageDataArray), 10)>
|
|
<cfloop from="1" to="#imgLimit#" index="i">
|
|
<cfset imgData = imageDataArray[i]>
|
|
<cfset imgContent = structNew()>
|
|
<cfset imgContent["type"] = "image">
|
|
<cfset imgContent["source"] = imgData.source>
|
|
<cfset arrayAppend(messagesContent, imgContent)>
|
|
</cfloop>
|
|
|
|
<!--- Add HTML text --->
|
|
<cfset textBlock = structNew()>
|
|
<cfset textBlock["type"] = "text">
|
|
<cfset textBlock["text"] = "Extract menu data from this restaurant website HTML. The images above are from the same website - identify which ones are food photos that could be used as item images, and which could be header/banner images. Here is the HTML content:" & chr(10) & chr(10) & combinedHtml>
|
|
<cfset arrayAppend(messagesContent, textBlock)>
|
|
|
|
<cfset userMessage = structNew()>
|
|
<cfset userMessage["role"] = "user">
|
|
<cfset userMessage["content"] = messagesContent>
|
|
|
|
<cfset requestBody = structNew()>
|
|
<cfset requestBody["model"] = "claude-sonnet-4-20250514">
|
|
<cfset requestBody["max_tokens"] = 8192>
|
|
<cfset requestBody["temperature"] = 0>
|
|
<cfset requestBody["system"] = systemPrompt>
|
|
<cfset requestBody["messages"] = arrayNew(1)>
|
|
<cfset arrayAppend(requestBody["messages"], userMessage)>
|
|
|
|
<cfset arrayAppend(response.steps, "Sending to Claude API...")>
|
|
|
|
<!--- Call Claude API --->
|
|
<cfhttp url="https://api.anthropic.com/v1/messages" method="POST" timeout="120" result="httpResult">
|
|
<cfhttpparam type="header" name="Content-Type" value="application/json">
|
|
<cfhttpparam type="header" name="x-api-key" value="#CLAUDE_API_KEY#">
|
|
<cfhttpparam type="header" name="anthropic-version" value="2023-06-01">
|
|
<cfhttpparam type="body" value="#serializeJSON(requestBody)#">
|
|
</cfhttp>
|
|
|
|
<cfset httpStatusCode = httpResult.statusCode>
|
|
<cfif isNumeric(httpStatusCode)>
|
|
<cfset httpStatusCode = int(httpStatusCode)>
|
|
<cfelseif findNoCase("200", httpStatusCode)>
|
|
<cfset httpStatusCode = 200>
|
|
<cfelse>
|
|
<cfset httpStatusCode = 0>
|
|
</cfif>
|
|
|
|
<cfif httpStatusCode NEQ 200>
|
|
<cfset errorDetail = "">
|
|
<cftry>
|
|
<cfset errorResponse = deserializeJSON(httpResult.fileContent)>
|
|
<cfif structKeyExists(errorResponse, "error") AND structKeyExists(errorResponse.error, "message")>
|
|
<cfset errorDetail = errorResponse.error.message>
|
|
<cfelse>
|
|
<cfset errorDetail = httpResult.fileContent>
|
|
</cfif>
|
|
<cfcatch>
|
|
<cfset errorDetail = httpResult.fileContent>
|
|
</cfcatch>
|
|
</cftry>
|
|
<cfthrow message="Claude API error: #httpResult.statusCode# - #errorDetail#">
|
|
</cfif>
|
|
|
|
<!--- Parse response --->
|
|
<cfset claudeResponse = deserializeJSON(httpResult.fileContent)>
|
|
<cfif NOT structKeyExists(claudeResponse, "content") OR NOT arrayLen(claudeResponse.content)>
|
|
<cfthrow message="Empty response from Claude">
|
|
</cfif>
|
|
|
|
<cfset responseText = "">
|
|
<cfloop array="#claudeResponse.content#" index="block">
|
|
<cfif structKeyExists(block, "type") AND block.type EQ "text">
|
|
<cfset responseText = block.text>
|
|
<cfbreak>
|
|
</cfif>
|
|
</cfloop>
|
|
|
|
<!--- Clean up JSON response --->
|
|
<cfset responseText = trim(responseText)>
|
|
<cfif left(responseText, 7) EQ "```json">
|
|
<cfset responseText = mid(responseText, 8, len(responseText) - 7)>
|
|
</cfif>
|
|
<cfif left(responseText, 3) EQ "```">
|
|
<cfset responseText = mid(responseText, 4, len(responseText) - 3)>
|
|
</cfif>
|
|
<cfif right(responseText, 3) EQ "```">
|
|
<cfset responseText = left(responseText, len(responseText) - 3)>
|
|
</cfif>
|
|
<cfset responseText = trim(responseText)>
|
|
<!--- Remove trailing commas before ] or } --->
|
|
<cfset responseText = reReplace(responseText, ",(\s*[\]\}])", "\1", "all")>
|
|
<!--- Remove control characters that break JSON --->
|
|
<cfset responseText = reReplace(responseText, "[\x00-\x1F]", " ", "all")>
|
|
|
|
<!--- Try to parse JSON with error handling --->
|
|
<cftry>
|
|
<cfset menuData = deserializeJSON(responseText)>
|
|
<cfcatch type="any">
|
|
<!--- JSON parsing failed - try to extract what we can --->
|
|
<!--- Return the raw response for debugging --->
|
|
<cfset response["success"] = false>
|
|
<cfset response["error"] = "JSON parse error: #cfcatch.message#">
|
|
<cfset response["DEBUG_RAW_RESPONSE"] = left(responseText, 2000)>
|
|
<cfset response["DEBUG_RESPONSE_LENGTH"] = len(responseText)>
|
|
<cfcontent type="application/json" reset="true">
|
|
<cfoutput>#serializeJSON(response)#</cfoutput>
|
|
<cfabort>
|
|
</cfcatch>
|
|
</cftry>
|
|
|
|
<!--- Debug: save raw Claude response before processing --->
|
|
<cfset response["DEBUG_RAW_CLAUDE"] = responseText>
|
|
|
|
<!--- Build image URL list for the wizard to use --->
|
|
<cfset imageUrlList = arrayNew(1)>
|
|
<cfloop array="#imageDataArray#" index="imgData">
|
|
<cfif structKeyExists(imgData, "url")>
|
|
<cfset arrayAppend(imageUrlList, imgData.url)>
|
|
</cfif>
|
|
</cfloop>
|
|
|
|
<!--- Ensure expected structure --->
|
|
<cfif NOT structKeyExists(menuData, "business")>
|
|
<cfset menuData["business"] = structNew()>
|
|
</cfif>
|
|
<cfif NOT structKeyExists(menuData, "categories")>
|
|
<cfset menuData["categories"] = arrayNew(1)>
|
|
</cfif>
|
|
<cfif NOT structKeyExists(menuData, "modifiers")>
|
|
<cfset menuData["modifiers"] = arrayNew(1)>
|
|
</cfif>
|
|
<cfif NOT structKeyExists(menuData, "items")>
|
|
<cfset menuData["items"] = arrayNew(1)>
|
|
</cfif>
|
|
|
|
<!--- Convert categories to expected format - flatten subcategories into parent --->
|
|
<cfset formattedCategories = arrayNew(1)>
|
|
<cfset subcatToParentMap = structNew()><!--- Map subcategory names to parent category names --->
|
|
<cfloop array="#menuData.categories#" index="cat">
|
|
<cfif isSimpleValue(cat)>
|
|
<cfset catObj = structNew()>
|
|
<cfset catObj["name"] = cat>
|
|
<cfset catObj["itemCount"] = 0>
|
|
<cfset arrayAppend(formattedCategories, catObj)>
|
|
<cfelseif isStruct(cat)>
|
|
<!--- Add only the parent category --->
|
|
<cfset parentName = structKeyExists(cat, "name") ? cat.name : "">
|
|
<cfif len(parentName)>
|
|
<cfset catObj = structNew()>
|
|
<cfset catObj["name"] = parentName>
|
|
<cfset catObj["itemCount"] = 0>
|
|
<cfset arrayAppend(formattedCategories, catObj)>
|
|
<!--- Build map of subcategory names -> parent name for item reassignment --->
|
|
<cfif structKeyExists(cat, "subcategories") AND isArray(cat.subcategories)>
|
|
<cfloop array="#cat.subcategories#" index="subcat">
|
|
<cfset subcatName = "">
|
|
<cfif isSimpleValue(subcat)>
|
|
<cfset subcatName = subcat>
|
|
<cfelseif isStruct(subcat) AND structKeyExists(subcat, "name")>
|
|
<cfset subcatName = subcat.name>
|
|
</cfif>
|
|
<cfif len(subcatName)>
|
|
<cfset subcatToParentMap[lcase(subcatName)] = parentName>
|
|
</cfif>
|
|
</cfloop>
|
|
</cfif>
|
|
</cfif>
|
|
</cfif>
|
|
</cfloop>
|
|
<cfset menuData["categories"] = formattedCategories>
|
|
|
|
<!--- Reassign items in subcategories to their parent category --->
|
|
<cfloop from="1" to="#arrayLen(menuData.items)#" index="i">
|
|
<cfset item = menuData.items[i]>
|
|
<!--- Check if item's category is actually a subcategory --->
|
|
<cfif structKeyExists(item, "category") AND len(item.category)>
|
|
<cfset catKey = lcase(item.category)>
|
|
<cfif structKeyExists(subcatToParentMap, catKey)>
|
|
<cfset menuData.items[i]["category"] = subcatToParentMap[catKey]>
|
|
</cfif>
|
|
</cfif>
|
|
<!--- Also check subcategory field if present --->
|
|
<cfif structKeyExists(item, "subcategory") AND len(item.subcategory)>
|
|
<cfset subcatKey = lcase(item.subcategory)>
|
|
<cfif structKeyExists(subcatToParentMap, subcatKey)>
|
|
<cfset menuData.items[i]["category"] = subcatToParentMap[subcatKey]>
|
|
</cfif>
|
|
</cfif>
|
|
</cfloop>
|
|
|
|
<!--- Add item IDs --->
|
|
<cfloop from="1" to="#arrayLen(menuData.items)#" index="i">
|
|
<cfset menuData.items[i]["id"] = "item_" & i>
|
|
</cfloop>
|
|
|
|
<!--- Process item images - extract filenames from images object that Claude identified from HTML --->
|
|
<cfset itemsWithImages = 0>
|
|
<cfloop from="1" to="#arrayLen(menuData.items)#" index="i">
|
|
<cfset item = menuData.items[i]>
|
|
|
|
<!--- Check if Claude found images object with URLs from HTML --->
|
|
<cfif structKeyExists(item, "images") AND isStruct(item.images)>
|
|
<cfset imgObj = item.images>
|
|
<cfset itemsWithImages = itemsWithImages + 1>
|
|
|
|
<!--- Extract filenames for each image size --->
|
|
<cfset filenames = structNew()>
|
|
<cfloop collection="#imgObj#" item="sizeKey">
|
|
<cfset imgUrl = imgObj[sizeKey]>
|
|
<cfif isSimpleValue(imgUrl) AND len(trim(imgUrl))>
|
|
<cfset filenames[sizeKey] = listLast(imgUrl, "/\")>
|
|
</cfif>
|
|
</cfloop>
|
|
<cfset menuData.items[i]["imageFilenames"] = filenames>
|
|
|
|
<!--- Also set primary imageSrc for backwards compatibility --->
|
|
<cfif structKeyExists(imgObj, "src")>
|
|
<cfset menuData.items[i]["imageSrc"] = imgObj.src>
|
|
<cfset menuData.items[i]["imageFilename"] = listLast(imgObj.src, "/\")>
|
|
<cfelseif structKeyExists(imgObj, "large")>
|
|
<cfset menuData.items[i]["imageSrc"] = imgObj.large>
|
|
<cfset menuData.items[i]["imageFilename"] = listLast(imgObj.large, "/\")>
|
|
<cfelseif structKeyExists(imgObj, "medium")>
|
|
<cfset menuData.items[i]["imageSrc"] = imgObj.medium>
|
|
<cfset menuData.items[i]["imageFilename"] = listLast(imgObj.medium, "/\")>
|
|
<cfelseif structKeyExists(imgObj, "small")>
|
|
<cfset menuData.items[i]["imageSrc"] = imgObj.small>
|
|
<cfset menuData.items[i]["imageFilename"] = listLast(imgObj.small, "/\")>
|
|
</cfif>
|
|
<!--- Legacy: handle if Claude returned imageSrc directly --->
|
|
<cfelseif structKeyExists(item, "imageSrc") AND len(trim(item.imageSrc))>
|
|
<cfset menuData.items[i]["imageFilename"] = listLast(item.imageSrc, "/\")>
|
|
<cfset itemsWithImages = itemsWithImages + 1>
|
|
</cfif>
|
|
</cfloop>
|
|
<cfset arrayAppend(response.steps, "Found images for " & itemsWithImages & " of " & arrayLen(menuData.items) & " items")>
|
|
|
|
<!--- Add image URLs to response --->
|
|
<cfset menuData["imageUrls"] = imageUrlList>
|
|
<cfset menuData["headerCandidateIndices"] = arrayNew(1)>
|
|
<!--- Add image mappings for local HTML uploads (filename -> alt text) --->
|
|
<cfset menuData["imageMappings"] = imageMappings>
|
|
|
|
<cfset response["OK"] = true>
|
|
<cfset response["DATA"] = menuData>
|
|
<cfset response["sourceUrl"] = targetUrl>
|
|
<cfset response["pagesProcessed"] = arrayLen(menuPages)>
|
|
<cfset response["imagesFound"] = arrayLen(imageDataArray)>
|
|
<!--- Debug: show subcategory mapping --->
|
|
<cfset response["DEBUG_SUBCAT_MAP"] = subcatToParentMap>
|
|
<cfset response["DEBUG_RAW_CATEGORIES"] = menuData.categories>
|
|
|
|
<cfcatch type="any">
|
|
<cfset response["MESSAGE"] = cfcatch.message>
|
|
<cfif len(cfcatch.detail)>
|
|
<cfset response["DETAIL"] = cfcatch.detail>
|
|
</cfif>
|
|
<cfif structKeyExists(cfcatch, "tagContext") AND arrayLen(cfcatch.tagContext) GT 0>
|
|
<cfset response["DEBUG_LINE"] = cfcatch.tagContext[1].line>
|
|
<cfset response["DEBUG_TEMPLATE"] = cfcatch.tagContext[1].template>
|
|
</cfif>
|
|
</cfcatch>
|
|
</cftry>
|
|
|
|
<cfoutput>#serializeJSON(response)#</cfoutput>
|