867 lines
46 KiB
Text
867 lines
46 KiB
Text
<cfsetting showdebugoutput="false">
|
|
<cfsetting enablecfoutputonly="true">
|
|
<cfsetting requesttimeout="300">
|
|
<cfcontent type="application/json; charset=utf-8" reset="true">
|
|
|
|
<cfset response = structNew()>
|
|
<cfset response["OK"] = false>
|
|
|
|
<cftry>
|
|
<!--- Load API Key --->
|
|
<cfset CLAUDE_API_KEY = "">
|
|
<cfset configPath = getDirectoryFromPath(getCurrentTemplatePath()) & "../../config/claude.json">
|
|
<cfif fileExists(configPath)>
|
|
<cfset configData = deserializeJSON(fileRead(configPath))>
|
|
<cfif structKeyExists(configData, "apiKey")>
|
|
<cfset CLAUDE_API_KEY = configData.apiKey>
|
|
</cfif>
|
|
</cfif>
|
|
|
|
<cfif NOT len(CLAUDE_API_KEY)>
|
|
<cfthrow message="Claude API key not configured">
|
|
</cfif>
|
|
|
|
<!--- Get URL from request --->
|
|
<cfset requestBody = toString(getHttpRequestData().content)>
|
|
<cfif NOT len(requestBody)>
|
|
<cfthrow message="No request body provided">
|
|
</cfif>
|
|
|
|
<cfset requestData = deserializeJSON(requestBody)>
|
|
|
|
<cfset response["steps"] = arrayNew(1)>
|
|
<cfset response["debug"] = structNew()>
|
|
<cfset response["debug"]["hasHtmlKey"] = structKeyExists(requestData, "html")>
|
|
<cfset response["debug"]["hasUrlKey"] = structKeyExists(requestData, "url")>
|
|
<cfset response["debug"]["htmlLength"] = structKeyExists(requestData, "html") ? len(requestData.html) : 0>
|
|
<cfset response["debug"]["urlValue"] = structKeyExists(requestData, "url") ? requestData.url : "">
|
|
<cfset pageHtml = "">
|
|
<cfset baseUrl = "">
|
|
<cfset basePath = "">
|
|
<cfset targetUrl = "">
|
|
|
|
<!--- Check if HTML content was provided directly (uploaded file or pasted) --->
|
|
<cfif structKeyExists(requestData, "html") AND len(trim(requestData.html))>
|
|
<cfset pageHtml = trim(requestData.html)>
|
|
<cfset arrayAppend(response.steps, "Using provided HTML content: " & len(pageHtml) & " bytes")>
|
|
<!--- No base URL for local content - images won't be fetched --->
|
|
<cfset baseUrl = "">
|
|
<cfset basePath = "">
|
|
<cfelseif structKeyExists(requestData, "url") AND len(trim(requestData.url))>
|
|
<cfset targetUrl = trim(requestData.url)>
|
|
|
|
<!--- Validate URL format --->
|
|
<cfif NOT reFindNoCase("^https?://", targetUrl)>
|
|
<cfset targetUrl = "https://" & targetUrl>
|
|
</cfif>
|
|
|
|
<!--- Check if this is a local temp file (ZIP upload) - read directly, skip Playwright --->
|
|
<cfif findNoCase("/temp/menu-import/", targetUrl)>
|
|
<cfset localFilePath = expandPath(reReplaceNoCase(targetUrl, "https?://[^/]+(/temp/menu-import/.*)", "\1"))>
|
|
<cfset arrayAppend(response.steps, "Local temp file detected: " & localFilePath)>
|
|
|
|
<cfif NOT fileExists(localFilePath)>
|
|
<cfthrow message="Local file not found: #localFilePath#">
|
|
</cfif>
|
|
|
|
<cfset pageHtml = fileRead(localFilePath, "utf-8")>
|
|
<cfset playwrightImages = arrayNew(1)>
|
|
<cfset arrayAppend(response.steps, "Read " & len(pageHtml) & " bytes from local file")>
|
|
|
|
<!--- Extract base path for local files (relative to the HTML file) --->
|
|
<cfset localDir = getDirectoryFromPath(localFilePath)>
|
|
<cfset basePath = reReplaceNoCase(targetUrl, "/[^/]*$", "/")>
|
|
<cfif NOT reFindNoCase("/$", basePath)>
|
|
<cfset basePath = basePath & "/">
|
|
</cfif>
|
|
|
|
<!--- Check for Toast menu page - extract from visible HTML for most complete data --->
|
|
<cfif findNoCase("class=""headerText""", pageHtml) AND findNoCase("toasttab", pageHtml)>
|
|
<cfset arrayAppend(response.steps, "Toast menu detected - parsing visible HTML items")>
|
|
|
|
<cftry>
|
|
<!--- Extract visible items from rendered HTML (most complete) --->
|
|
<cfset toastBusiness = structNew()>
|
|
<cfset toastCategories = arrayNew(1)>
|
|
<cfset toastItems = arrayNew(1)>
|
|
<cfset categorySet = structNew()>
|
|
<cfset itemNameSet = structNew()>
|
|
<cfset itemId = 1>
|
|
<cfset currentCategory = "Menu">
|
|
|
|
<!--- Find category headers (h2 with specific Toast patterns) --->
|
|
<cfset categoryMatches = reMatchNoCase('<h2[^>]*class="[^"]*groupHeader[^"]*"[^>]*>([^<]+)</h2>', pageHtml)>
|
|
<cfloop array="#categoryMatches#" index="catMatch">
|
|
<cfset catName = reReplaceNoCase(catMatch, '.*>([^<]+)</h2>.*', '\1')>
|
|
<cfset catName = trim(catName)>
|
|
<cfif len(catName) AND NOT structKeyExists(categorySet, catName)>
|
|
<cfset categorySet[catName] = true>
|
|
<cfset arrayAppend(toastCategories, { "name": catName, "itemCount": 0 })>
|
|
</cfif>
|
|
</cfloop>
|
|
|
|
<!--- Extract item blocks with name, price, description, image --->
|
|
<!--- Toast pattern: li.item containing headerText for name, price span, itemImage img --->
|
|
<cfset itemBlocks = reMatchNoCase('<li[^>]*class="[^"]*item[^"]*"[^>]*>.*?</li>', pageHtml)>
|
|
<cfset arrayAppend(response.steps, "Found " & arrayLen(itemBlocks) & " item blocks in HTML")>
|
|
|
|
<cfloop array="#itemBlocks#" index="block">
|
|
<!--- Extract item name --->
|
|
<cfset nameMatch = reMatchNoCase('<span class="headerText">([^<]+)</span>', block)>
|
|
<cfif arrayLen(nameMatch)>
|
|
<cfset itemName = reReplaceNoCase(nameMatch[1], '.*>([^<]+)</span>.*', '\1')>
|
|
<cfset itemName = trim(itemName)>
|
|
|
|
<!--- Skip duplicates --->
|
|
<cfif len(itemName) AND NOT structKeyExists(itemNameSet, itemName)>
|
|
<cfset itemNameSet[itemName] = true>
|
|
|
|
<cfset itemStruct = structNew()>
|
|
<cfset itemStruct["id"] = "item_" & itemId>
|
|
<cfset itemStruct["name"] = itemName>
|
|
<cfset itemStruct["modifiers"] = arrayNew(1)>
|
|
|
|
<!--- Extract price --->
|
|
<cfset priceMatch = reMatchNoCase('<span[^>]*class="price"[^>]*>\$?([0-9.]+)</span>', block)>
|
|
<cfif arrayLen(priceMatch)>
|
|
<cfset priceStr = reReplaceNoCase(priceMatch[1], '.*>\\$?([0-9.]+)</span>.*', '\1')>
|
|
<cfset itemStruct["price"] = val(priceStr)>
|
|
<cfelse>
|
|
<cfset itemStruct["price"] = 0>
|
|
</cfif>
|
|
|
|
<!--- Extract description --->
|
|
<cfset descMatch = reMatchNoCase('<div[^>]*class="[^"]*description[^"]*"[^>]*>([^<]+)</div>', block)>
|
|
<cfif arrayLen(descMatch)>
|
|
<cfset itemStruct["description"] = trim(reReplaceNoCase(descMatch[1], '.*>([^<]+)</div>.*', '\1'))>
|
|
<cfelse>
|
|
<cfset itemStruct["description"] = "">
|
|
</cfif>
|
|
|
|
<!--- Extract image URL from srcset or src --->
|
|
<cfset imgMatch = reMatchNoCase('src="(Menu_files/[^"]+)"', block)>
|
|
<cfif arrayLen(imgMatch)>
|
|
<cfset imgSrc = reReplaceNoCase(imgMatch[1], '.*src="([^"]+)".*', '\1')>
|
|
<!--- Convert to full URL --->
|
|
<cfset itemStruct["imageUrl"] = basePath & imgSrc>
|
|
<cfset itemStruct["imageSrc"] = basePath & imgSrc>
|
|
<cfset itemStruct["imageFilename"] = listLast(imgSrc, "/")>
|
|
</cfif>
|
|
|
|
<!--- Try to determine category from nearby h2 or default --->
|
|
<cfset itemStruct["category"] = arrayLen(toastCategories) ? toastCategories[1].name : "Menu">
|
|
|
|
<cfset arrayAppend(toastItems, itemStruct)>
|
|
<cfset itemId++>
|
|
</cfif>
|
|
</cfif>
|
|
</cfloop>
|
|
|
|
<!--- If no items found from blocks, try simpler headerText extraction --->
|
|
<cfif arrayLen(toastItems) EQ 0>
|
|
<cfset nameMatches = reMatchNoCase('<span class="headerText">([^<]+)</span>', pageHtml)>
|
|
<cfloop array="#nameMatches#" index="nameMatch">
|
|
<cfset itemName = reReplaceNoCase(nameMatch, '.*>([^<]+)</span>.*', '\1')>
|
|
<cfset itemName = trim(itemName)>
|
|
<cfif len(itemName) AND NOT structKeyExists(itemNameSet, itemName)>
|
|
<cfset itemNameSet[itemName] = true>
|
|
<cfset itemStruct = { "id": "item_" & itemId, "name": itemName, "price": 0, "description": "", "category": "Menu", "modifiers": [] }>
|
|
<cfset arrayAppend(toastItems, itemStruct)>
|
|
<cfset itemId++>
|
|
</cfif>
|
|
</cfloop>
|
|
</cfif>
|
|
|
|
<!--- Try to extract business name from title tag --->
|
|
<cfset titleMatch = reMatchNoCase('<title>([^<]+)</title>', pageHtml)>
|
|
<cfset arrayAppend(response.steps, "Title tag matches: " & arrayLen(titleMatch))>
|
|
<cfif arrayLen(titleMatch)>
|
|
<cfset titleText = reReplaceNoCase(titleMatch[1], '.*<title>([^<]+)</title>.*', '\1')>
|
|
<cfset titleText = trim(titleText)>
|
|
<cfset arrayAppend(response.steps, "Raw title text: " & left(titleText, 100))>
|
|
<!--- Toast titles are usually "Restaurant Name | Online Ordering" --->
|
|
<cfif findNoCase("|", titleText)>
|
|
<cfset titleText = trim(listFirst(titleText, "|"))>
|
|
</cfif>
|
|
<cfif len(titleText) AND NOT structKeyExists(toastBusiness, "name")>
|
|
<cfset toastBusiness["name"] = titleText>
|
|
<cfset arrayAppend(response.steps, "Extracted business name from title: " & titleText)>
|
|
</cfif>
|
|
</cfif>
|
|
|
|
<!--- Check if __OO_STATE__ exists in the saved HTML --->
|
|
<cfset hasOoState = findNoCase("window.__OO_STATE__", pageHtml) GT 0>
|
|
<cfset arrayAppend(response.steps, "Has __OO_STATE__: " & hasOoState)>
|
|
|
|
<!--- Also try to extract from __OO_STATE__ for images and business info --->
|
|
<cfif hasOoState>
|
|
<cfset ooStateMatch = reMatchNoCase("window\.__OO_STATE__\s*=\s*(\{.*?\});\s*window\.", pageHtml)>
|
|
<cfif arrayLen(ooStateMatch)>
|
|
<cfset ooStateJson = reReplaceNoCase(ooStateMatch[1], "window\.__OO_STATE__\s*=\s*", "")>
|
|
<cfset ooStateJson = reReplace(ooStateJson, ";\s*window\.$", "")>
|
|
<cftry>
|
|
<cfset ooState = deserializeJSON(ooStateJson)>
|
|
<!--- Debug: log all top-level keys in OO_STATE --->
|
|
<cfset ooStateKeys = structKeyList(ooState)>
|
|
<cfset arrayAppend(response.steps, "OO_STATE keys: " & left(ooStateKeys, 500))>
|
|
<!--- Build name -> image URL map from OO_STATE --->
|
|
<cfset imageMap = structNew()>
|
|
<cfloop collection="#ooState#" item="key">
|
|
<!--- Extract restaurant/business info --->
|
|
<cfif left(key, 11) EQ "Restaurant:">
|
|
<cfset restaurant = ooState[key]>
|
|
<cfif structKeyExists(restaurant, "name")>
|
|
<cfset toastBusiness["name"] = restaurant.name>
|
|
</cfif>
|
|
<cfif structKeyExists(restaurant, "location")>
|
|
<cfset loc = restaurant.location>
|
|
<cfif structKeyExists(loc, "address1")>
|
|
<cfset toastBusiness["addressLine1"] = loc.address1>
|
|
</cfif>
|
|
<cfif structKeyExists(loc, "city")>
|
|
<cfset toastBusiness["city"] = loc.city>
|
|
</cfif>
|
|
<cfif structKeyExists(loc, "state")>
|
|
<cfset toastBusiness["state"] = loc.state>
|
|
</cfif>
|
|
<cfif structKeyExists(loc, "zipCode")>
|
|
<cfset toastBusiness["zip"] = loc.zipCode>
|
|
</cfif>
|
|
<cfif structKeyExists(loc, "phone")>
|
|
<cfset toastBusiness["phone"] = loc.phone>
|
|
</cfif>
|
|
</cfif>
|
|
<cfif structKeyExists(restaurant, "brandColor")>
|
|
<cfset toastBusiness["brandColor"] = replace(restaurant.brandColor, "##", "")>
|
|
</cfif>
|
|
</cfif>
|
|
<!--- Extract menu items and images --->
|
|
<cfif left(key, 5) EQ "Menu:">
|
|
<cfset menu = ooState[key]>
|
|
<cfif structKeyExists(menu, "groups") AND isArray(menu.groups)>
|
|
<cfloop array="#menu.groups#" index="group">
|
|
<cfif structKeyExists(group, "items") AND isArray(group.items)>
|
|
<cfloop array="#group.items#" index="item">
|
|
<cfif structKeyExists(item, "name") AND structKeyExists(item, "imageUrls")>
|
|
<cfset imgUrls = item.imageUrls>
|
|
<cfif structKeyExists(imgUrls, "medium")>
|
|
<cfset imageMap[item.name] = imgUrls.medium>
|
|
<cfelseif structKeyExists(imgUrls, "large")>
|
|
<cfset imageMap[item.name] = imgUrls.large>
|
|
</cfif>
|
|
</cfif>
|
|
</cfloop>
|
|
</cfif>
|
|
</cfloop>
|
|
</cfif>
|
|
</cfif>
|
|
</cfloop>
|
|
<!--- Apply images to items --->
|
|
<cfset imagesMatched = 0>
|
|
<cfloop from="1" to="#arrayLen(toastItems)#" index="i">
|
|
<cfif structKeyExists(imageMap, toastItems[i].name)>
|
|
<cfset toastItems[i]["imageUrl"] = imageMap[toastItems[i].name]>
|
|
<cfset toastItems[i]["imageSrc"] = imageMap[toastItems[i].name]>
|
|
<cfset toastItems[i]["imageFilename"] = listLast(imageMap[toastItems[i].name], "/")>
|
|
<cfset imagesMatched++>
|
|
</cfif>
|
|
</cfloop>
|
|
<cfset arrayAppend(response.steps, "Matched " & imagesMatched & " images from __OO_STATE__")>
|
|
<cfif structCount(toastBusiness) GT 0>
|
|
<cfset arrayAppend(response.steps, "Extracted business info: " & structKeyList(toastBusiness))>
|
|
</cfif>
|
|
<cfcatch></cfcatch>
|
|
</cftry>
|
|
</cfif>
|
|
</cfif>
|
|
|
|
<cfset arrayAppend(response.steps, "Extracted " & arrayLen(toastItems) & " unique items from " & arrayLen(toastCategories) & " categories")>
|
|
|
|
<!--- Return directly without Claude --->
|
|
<cfset response["OK"] = true>
|
|
<cfset response["DATA"] = {
|
|
"business": toastBusiness,
|
|
"categories": toastCategories,
|
|
"modifiers": arrayNew(1),
|
|
"items": toastItems,
|
|
"imageUrls": arrayNew(1),
|
|
"headerCandidateIndices": arrayNew(1),
|
|
"imageMappings": arrayNew(1)
|
|
}>
|
|
<cfset response["sourceUrl"] = targetUrl>
|
|
<cfset response["pagesProcessed"] = 1>
|
|
<cfset response["imagesFound"] = 0>
|
|
<cfset response["playwrightImagesCount"] = 0>
|
|
<cfset response["toastDirect"] = true>
|
|
<cfoutput>#serializeJSON(response)#</cfoutput>
|
|
<cfabort>
|
|
|
|
<cfcatch type="any">
|
|
<cfset arrayAppend(response.steps, "Toast HTML parse failed: " & cfcatch.message & " - falling back to Claude")>
|
|
</cfcatch>
|
|
</cftry>
|
|
</cfif>
|
|
|
|
<!--- Extract base URL for resolving relative links --->
|
|
<cfset baseUrl = reReplace(targetUrl, "(https?://[^/]+).*", "\1")>
|
|
<cfset basePath = reReplace(targetUrl, "(https?://[^/]+/[^?]*/?).*", "\1")>
|
|
<cfif NOT reFindNoCase("/$", basePath)>
|
|
<cfset basePath = reReplace(basePath, "/[^/]*$", "/")>
|
|
</cfif>
|
|
<cfelse>
|
|
<!--- Remote URL - use Playwright for JS-rendered content --->
|
|
<cfset arrayAppend(response.steps, "Fetching URL with Playwright: " & targetUrl)>
|
|
|
|
<cfset playwrightOutput = "">
|
|
<cfexecute name="/opt/playwright/run.sh" arguments="'#targetUrl#' 4000" timeout="90" variable="playwrightOutput" />
|
|
|
|
<cfif NOT len(trim(playwrightOutput))>
|
|
<cfthrow message="Playwright returned empty response">
|
|
</cfif>
|
|
|
|
<cfset playwrightResult = deserializeJSON(playwrightOutput)>
|
|
<cfif structKeyExists(playwrightResult, "error")>
|
|
<cfthrow message="Playwright error: #playwrightResult.error#">
|
|
</cfif>
|
|
|
|
<cfset pageHtml = playwrightResult.html>
|
|
<cfset playwrightImages = structKeyExists(playwrightResult, "images") ? playwrightResult.images : arrayNew(1)>
|
|
<cfset arrayAppend(response.steps, "Fetched " & len(pageHtml) & " bytes via Playwright, " & arrayLen(playwrightImages) & " images captured")>
|
|
|
|
<!--- Extract base URL for resolving relative links --->
|
|
<cfset baseUrl = reReplace(targetUrl, "(https?://[^/]+).*", "\1")>
|
|
<cfset basePath = reReplace(targetUrl, "(https?://[^/]+/[^?]*/?).*", "\1")>
|
|
<cfif NOT reFindNoCase("/$", basePath)>
|
|
<cfset basePath = reReplace(basePath, "/[^/]*$", "/")>
|
|
</cfif>
|
|
</cfif>
|
|
<cfelse>
|
|
<cfthrow message="Either 'url' or 'html' content is required">
|
|
</cfif>
|
|
|
|
<!--- Initialize playwrightImages if not set (HTML upload case) --->
|
|
<cfif NOT isDefined("playwrightImages")>
|
|
<cfset playwrightImages = arrayNew(1)>
|
|
</cfif>
|
|
|
|
<!--- Menu pages array - Playwright renders JS so we get everything in one page --->
|
|
<cfset menuPages = arrayNew(1)>
|
|
<cfset arrayAppend(menuPages, { url: isDefined("targetUrl") ? targetUrl : "uploaded", html: pageHtml })>
|
|
|
|
<!--- Extract images from all pages --->
|
|
<cfset allImages = arrayNew(1)>
|
|
<cfset imageUrls = structNew()>
|
|
<cfset imageMappings = arrayNew(1)><!--- For local HTML: filename -> alt text mappings --->
|
|
|
|
<!--- Add images captured by Playwright (network requests) --->
|
|
<cfloop array="#playwrightImages#" index="pwImg">
|
|
<cfif NOT reFindNoCase("(icon|favicon|logo|sprite|pixel|tracking|badge|button|\.svg)", pwImg)>
|
|
<cfset imageUrls[pwImg] = true>
|
|
</cfif>
|
|
</cfloop>
|
|
|
|
<cfloop array="#menuPages#" index="menuPage">
|
|
<!--- Find all img tags --->
|
|
<cfset imgMatches = reMatchNoCase('<img[^>]+src=["'']([^"'']+)["''][^>]*>', menuPage.html)>
|
|
|
|
<cfloop array="#imgMatches#" index="imgTag">
|
|
<cfset imgSrc = reReplaceNoCase(imgTag, '.*src=["'']([^"'']+)["''].*', "\1")>
|
|
|
|
<!--- Extract alt text for image mapping --->
|
|
<cfset imgAlt = "">
|
|
<cfif reFindNoCase('alt=["'']([^"'']+)["'']', imgTag)>
|
|
<cfset imgAlt = reReplaceNoCase(imgTag, '.*alt=["'']([^"'']+)["''].*', "\1")>
|
|
</cfif>
|
|
|
|
<!--- Extract just the filename for matching local uploads --->
|
|
<cfset imgFilename = listLast(imgSrc, "/\")>
|
|
<cfif len(imgFilename) AND len(imgAlt) AND NOT reFindNoCase("(icon|favicon|logo|sprite|pixel|tracking|badge|button)", imgSrc)>
|
|
<cfset mapping = structNew()>
|
|
<cfset mapping["filename"] = imgFilename>
|
|
<cfset mapping["alt"] = imgAlt>
|
|
<cfset mapping["src"] = imgSrc>
|
|
<cfset arrayAppend(imageMappings, mapping)>
|
|
</cfif>
|
|
|
|
<!--- Resolve relative URLs --->
|
|
<cfif left(imgSrc, 1) EQ "/">
|
|
<cfset imgSrc = baseUrl & imgSrc>
|
|
<cfelseif NOT reFindNoCase("^https?://", imgSrc) AND NOT reFindNoCase("^data:", imgSrc)>
|
|
<cfset imgSrc = basePath & imgSrc>
|
|
</cfif>
|
|
|
|
<!--- Skip data URLs, icons, and already-processed images --->
|
|
<cfif reFindNoCase("^https?://", imgSrc) AND NOT structKeyExists(imageUrls, imgSrc)>
|
|
<!--- Skip common icon/logo patterns that are too small --->
|
|
<cfif NOT reFindNoCase("(icon|favicon|logo|sprite|pixel|tracking|badge|button)", imgSrc)>
|
|
<cfset imageUrls[imgSrc] = true>
|
|
</cfif>
|
|
</cfif>
|
|
</cfloop>
|
|
</cfloop>
|
|
|
|
<cfset arrayAppend(response.steps, "Found #structCount(imageUrls)# unique images")>
|
|
|
|
<!--- Check if we're scanning a local temp URL (ZIP upload) --->
|
|
<cfset isLocalScan = isDefined("targetUrl") AND findNoCase("/temp/menu-import/", targetUrl)>
|
|
<cfset localBasePath = "">
|
|
<cfif isLocalScan>
|
|
<!--- Extract the folder path from URL for local file reads --->
|
|
<cfset localBasePath = expandPath(reReplaceNoCase(targetUrl, "https?://[^/]+(/temp/menu-import/[^/]+/).*", "\1"))>
|
|
<cfset arrayAppend(response.steps, "Local scan detected, base path: " & localBasePath)>
|
|
</cfif>
|
|
|
|
<!--- Download/read images (limit to 20) --->
|
|
<cfset imageDataArray = arrayNew(1)>
|
|
<cfset downloadedCount = 0>
|
|
<cfset localReadCount = 0>
|
|
|
|
<cfloop collection="#imageUrls#" item="imgUrl">
|
|
<cfif downloadedCount GTE 20>
|
|
<cfbreak>
|
|
</cfif>
|
|
|
|
<cftry>
|
|
<cfset imgBytes = 0>
|
|
<cfset imgContent = "">
|
|
<cfset mediaType = "image/jpeg">
|
|
|
|
<!--- Check if this is a local file we can read directly --->
|
|
<cfif isLocalScan AND findNoCase("/temp/menu-import/", imgUrl)>
|
|
<!--- Convert URL to local path --->
|
|
<cfset localPath = expandPath(reReplaceNoCase(imgUrl, "https?://[^/]+(/temp/menu-import/.*)", "\1"))>
|
|
|
|
<cfif fileExists(localPath)>
|
|
<cfset imgContent = fileReadBinary(localPath)>
|
|
<cfset imgBytes = len(imgContent)>
|
|
|
|
<!--- Determine media type from extension --->
|
|
<cfset ext = lCase(listLast(localPath, "."))>
|
|
<cfif ext EQ "png"><cfset mediaType = "image/png">
|
|
<cfelseif ext EQ "gif"><cfset mediaType = "image/gif">
|
|
<cfelseif ext EQ "webp"><cfset mediaType = "image/webp">
|
|
</cfif>
|
|
<cfset localReadCount = localReadCount + 1>
|
|
</cfif>
|
|
<cfelse>
|
|
<!--- Fetch remote image via HTTP --->
|
|
<cfhttp url="#imgUrl#" method="GET" timeout="10" result="imgResult" getasbinary="yes">
|
|
</cfhttp>
|
|
|
|
<cfif findNoCase("200", imgResult.statusCode) AND isBinary(imgResult.fileContent)>
|
|
<cfset contentType = structKeyExists(imgResult.responseHeader, "Content-Type") ? imgResult.responseHeader["Content-Type"] : "">
|
|
<cfif reFindNoCase("image/(jpeg|jpg|png|gif|webp)", contentType)>
|
|
<cfset imgContent = imgResult.fileContent>
|
|
<cfset imgBytes = len(imgContent)>
|
|
<cfif findNoCase("png", contentType)><cfset mediaType = "image/png"></cfif>
|
|
<cfif findNoCase("gif", contentType)><cfset mediaType = "image/gif"></cfif>
|
|
<cfif findNoCase("webp", contentType)><cfset mediaType = "image/webp"></cfif>
|
|
</cfif>
|
|
</cfif>
|
|
</cfif>
|
|
|
|
<!--- Process the image if we got valid content --->
|
|
<cfif imgBytes GT 5000>
|
|
<cfset base64Content = toBase64(imgContent)>
|
|
|
|
<cfset imgSource = structNew()>
|
|
<cfset imgSource["type"] = "base64">
|
|
<cfset imgSource["media_type"] = mediaType>
|
|
<cfset imgSource["data"] = base64Content>
|
|
|
|
<cfset imgStruct = structNew()>
|
|
<cfset imgStruct["type"] = "image">
|
|
<cfset imgStruct["source"] = imgSource>
|
|
<cfset imgStruct["url"] = imgUrl>
|
|
|
|
<cfset arrayAppend(imageDataArray, imgStruct)>
|
|
<cfset downloadedCount = downloadedCount + 1>
|
|
</cfif>
|
|
<cfcatch>
|
|
<!--- Skip failed downloads --->
|
|
</cfcatch>
|
|
</cftry>
|
|
</cfloop>
|
|
|
|
<cfset arrayAppend(response.steps, "Loaded #arrayLen(imageDataArray)# valid images (#localReadCount# from local disk)")>
|
|
|
|
<!--- Look for embedded JSON data (Next.js __NEXT_DATA__, Toast state, etc.) --->
|
|
<cfset embeddedJsonData = "">
|
|
<cfset embeddedMenuItems = arrayNew(1)>
|
|
<cfloop array="#menuPages#" index="menuPage">
|
|
<!--- Look for __NEXT_DATA__ (Next.js apps) --->
|
|
<cfset nextDataMatch = reMatchNoCase('<script[^>]*id=["'']__NEXT_DATA__["''][^>]*>([^<]+)</script>', menuPage.html)>
|
|
<cfif arrayLen(nextDataMatch)>
|
|
<cfset scriptContent = reReplaceNoCase(nextDataMatch[1], '<script[^>]*>([^<]+)</script>', '\1')>
|
|
<cfset embeddedJsonData = embeddedJsonData & chr(10) & "--- __NEXT_DATA__ ---" & chr(10) & scriptContent>
|
|
</cfif>
|
|
|
|
<!--- Look for window.__INITIAL_STATE__ or similar patterns --->
|
|
<cfset stateMatches = reMatchNoCase('window\.__[A-Z_]+__\s*=\s*(\{[^;]+\});', menuPage.html)>
|
|
<cfloop array="#stateMatches#" index="stateMatch">
|
|
<cfset embeddedJsonData = embeddedJsonData & chr(10) & "--- WINDOW_STATE ---" & chr(10) & stateMatch>
|
|
</cfloop>
|
|
|
|
<!--- Look for data-props or data-page attributes with JSON --->
|
|
<cfset dataPropsMatches = reMatchNoCase('data-(?:props|page|state)=["''](\{[^"'']+\})["'']', menuPage.html)>
|
|
<cfloop array="#dataPropsMatches#" index="propsMatch">
|
|
<cfset embeddedJsonData = embeddedJsonData & chr(10) & "--- DATA_PROPS ---" & chr(10) & propsMatch>
|
|
</cfloop>
|
|
|
|
<!--- Look for JSON-LD structured data (schema.org Menu) --->
|
|
<cfset jsonLdMatches = reMatchNoCase('<script[^>]*type=["'']application/ld\+json["''][^>]*>([^<]+)</script>', menuPage.html)>
|
|
<cfloop array="#jsonLdMatches#" index="jsonLdMatch">
|
|
<cfset scriptContent = reReplaceNoCase(jsonLdMatch, '<script[^>]*>([^<]+)</script>', '\1')>
|
|
<cfif findNoCase("menu", scriptContent) OR findNoCase("MenuItem", scriptContent)>
|
|
<cfset embeddedJsonData = embeddedJsonData & chr(10) & "--- JSON_LD_MENU ---" & chr(10) & scriptContent>
|
|
</cfif>
|
|
</cfloop>
|
|
</cfloop>
|
|
|
|
<cfif len(embeddedJsonData)>
|
|
<cfset response["DEBUG_EMBEDDED_JSON_FOUND"] = true>
|
|
<cfset response["DEBUG_EMBEDDED_JSON_LENGTH"] = len(embeddedJsonData)>
|
|
<cfset response["DEBUG_EMBEDDED_JSON_PREVIEW"] = left(embeddedJsonData, 2000)>
|
|
<cfelse>
|
|
<cfset response["DEBUG_EMBEDDED_JSON_FOUND"] = false>
|
|
</cfif>
|
|
|
|
<!--- Combine all page HTML into one text block --->
|
|
<cfset combinedHtml = "">
|
|
<cfloop array="#menuPages#" index="menuPage">
|
|
<!--- Strip scripts, styles, and extract text content --->
|
|
<cfset cleanHtml = menuPage.html>
|
|
<cfset cleanHtml = reReplaceNoCase(cleanHtml, "<script[^>]*>.*?</script>", "", "all")>
|
|
<cfset cleanHtml = reReplaceNoCase(cleanHtml, "<style[^>]*>.*?</style>", "", "all")>
|
|
<cfset cleanHtml = reReplaceNoCase(cleanHtml, "<!--.*?-->", "", "all")>
|
|
<cfset combinedHtml = combinedHtml & chr(10) & "--- PAGE: " & menuPage.url & " ---" & chr(10) & cleanHtml>
|
|
</cfloop>
|
|
|
|
<!--- If we found embedded JSON, append it to help Claude find all menu items --->
|
|
<cfif len(embeddedJsonData)>
|
|
<cfset combinedHtml = combinedHtml & chr(10) & chr(10) & "=== EMBEDDED JSON DATA (may contain full menu) ===" & chr(10) & embeddedJsonData>
|
|
</cfif>
|
|
|
|
<!--- Limit HTML size for Claude --->
|
|
<cfif len(combinedHtml) GT 100000>
|
|
<cfset combinedHtml = left(combinedHtml, 100000)>
|
|
</cfif>
|
|
|
|
<!--- Debug: extract h3 and h4 tags from HTML on server side --->
|
|
<cfset h3Tags = reMatchNoCase("<h3[^>]*>([^<]*)</h3>", combinedHtml)>
|
|
<cfset h3Texts = arrayNew(1)>
|
|
<cfloop array="#h3Tags#" index="h3Tag">
|
|
<cfset h3Text = reReplaceNoCase(h3Tag, "<h3[^>]*>([^<]*)</h3>", "\1")>
|
|
<cfset h3Text = trim(h3Text)>
|
|
<cfif len(h3Text)>
|
|
<cfset arrayAppend(h3Texts, h3Text)>
|
|
</cfif>
|
|
</cfloop>
|
|
<cfset response["DEBUG_H3_TAGS"] = h3Texts>
|
|
|
|
<cfset h4Tags = reMatchNoCase("<h4[^>]*>([^<]*)</h4>", combinedHtml)>
|
|
<cfset h4Texts = arrayNew(1)>
|
|
<cfloop array="#h4Tags#" index="h4Tag">
|
|
<cfset h4Text = reReplaceNoCase(h4Tag, "<h4[^>]*>([^<]*)</h4>", "\1")>
|
|
<cfset h4Text = trim(h4Text)>
|
|
<cfif len(h4Text)>
|
|
<cfset arrayAppend(h4Texts, h4Text)>
|
|
</cfif>
|
|
</cfloop>
|
|
<cfset response["DEBUG_H4_TAGS"] = h4Texts>
|
|
|
|
<!--- Debug: find all heading tags --->
|
|
<cfset h2Tags = reMatchNoCase("<h2[^>]*>([^<]*)</h2>", combinedHtml)>
|
|
<cfset h5Tags = reMatchNoCase("<h5[^>]*>([^<]*)</h5>", combinedHtml)>
|
|
<cfset h6Tags = reMatchNoCase("<h6[^>]*>([^<]*)</h6>", combinedHtml)>
|
|
<cfset response["DEBUG_H2_COUNT"] = arrayLen(h2Tags)>
|
|
<cfset response["DEBUG_H5_COUNT"] = arrayLen(h5Tags)>
|
|
<cfset response["DEBUG_H6_COUNT"] = arrayLen(h6Tags)>
|
|
|
|
<!--- Show first 1000 chars of HTML around "Beverages" to see structure --->
|
|
<cfset bevPos = findNoCase("Beverages", combinedHtml)>
|
|
<cfif bevPos GT 0>
|
|
<cfset bevStart = max(1, bevPos - 100)>
|
|
<cfset bevEnd = min(len(combinedHtml), bevPos + 900)>
|
|
<cfset response["DEBUG_BEVERAGES_HTML"] = mid(combinedHtml, bevStart, bevEnd - bevStart)>
|
|
</cfif>
|
|
<cfset arrayAppend(response.steps, "Found " & arrayLen(h3Texts) & " h3 and " & arrayLen(h4Texts) & " h4 tags")>
|
|
|
|
<!--- System prompt for URL analysis --->
|
|
<cfset systemPrompt = "You are an expert at extracting structured menu data from restaurant website HTML. Extract ALL menu data visible in the HTML. Return valid JSON with these keys: business (object with name, address, phone, hours, brandColor), categories (array of category names), modifiers (array), items (array with name, description, price, category, subcategory, modifiers array, and imageUrl). CRITICAL FOR IMAGES: Each menu item in the HTML is typically in a container (div, li, article) that also contains an img tag. Extract the img src URL and include it as 'imageUrl' for that item. Look for img tags that are siblings or children within the same menu-item container. The image URL should be the full or relative src value from the img tag - NOT the alt text. CRITICAL: Extract EVERY menu item from ALL sources including embedded JSON (__NEXT_DATA__, window state, JSON-LD). SUBCATEGORY RULE: If a section header has NO menu items directly below it but contains NESTED sections, the outer section is the PARENT CATEGORY and inner sections are SUBCATEGORIES. For items in subcategories, set category to the PARENT name and subcategory to the inner section name. For brandColor: suggest a vibrant hex (6 digits, no hash). For prices: numbers (e.g., 12.99). Return ONLY valid JSON.">
|
|
|
|
<!--- Build message content --->
|
|
<cfset messagesContent = arrayNew(1)>
|
|
|
|
<!--- Add images first (up to 10 for analysis) --->
|
|
<cfset imgLimit = min(arrayLen(imageDataArray), 10)>
|
|
<cfloop from="1" to="#imgLimit#" index="i">
|
|
<cfset imgData = imageDataArray[i]>
|
|
<cfset imgContent = structNew()>
|
|
<cfset imgContent["type"] = "image">
|
|
<cfset imgContent["source"] = imgData.source>
|
|
<cfset arrayAppend(messagesContent, imgContent)>
|
|
</cfloop>
|
|
|
|
<!--- Add HTML text --->
|
|
<cfset textBlock = structNew()>
|
|
<cfset textBlock["type"] = "text">
|
|
<cfset textBlock["text"] = "Extract menu data from this restaurant website HTML. The images above are from the same website - identify which ones are food photos that could be used as item images, and which could be header/banner images. Here is the HTML content:" & chr(10) & chr(10) & combinedHtml>
|
|
<cfset arrayAppend(messagesContent, textBlock)>
|
|
|
|
<cfset userMessage = structNew()>
|
|
<cfset userMessage["role"] = "user">
|
|
<cfset userMessage["content"] = messagesContent>
|
|
|
|
<cfset requestBody = structNew()>
|
|
<cfset requestBody["model"] = "claude-sonnet-4-20250514">
|
|
<cfset requestBody["max_tokens"] = 8192>
|
|
<cfset requestBody["temperature"] = 0>
|
|
<cfset requestBody["system"] = systemPrompt>
|
|
<cfset requestBody["messages"] = arrayNew(1)>
|
|
<cfset arrayAppend(requestBody["messages"], userMessage)>
|
|
|
|
<cfset arrayAppend(response.steps, "Sending to Claude API...")>
|
|
|
|
<!--- Call Claude API --->
|
|
<cfhttp url="https://api.anthropic.com/v1/messages" method="POST" timeout="120" result="httpResult">
|
|
<cfhttpparam type="header" name="Content-Type" value="application/json">
|
|
<cfhttpparam type="header" name="x-api-key" value="#CLAUDE_API_KEY#">
|
|
<cfhttpparam type="header" name="anthropic-version" value="2023-06-01">
|
|
<cfhttpparam type="body" value="#serializeJSON(requestBody)#">
|
|
</cfhttp>
|
|
|
|
<cfset httpStatusCode = httpResult.statusCode>
|
|
<cfif isNumeric(httpStatusCode)>
|
|
<cfset httpStatusCode = int(httpStatusCode)>
|
|
<cfelseif findNoCase("200", httpStatusCode)>
|
|
<cfset httpStatusCode = 200>
|
|
<cfelse>
|
|
<cfset httpStatusCode = 0>
|
|
</cfif>
|
|
|
|
<cfif httpStatusCode NEQ 200>
|
|
<cfset errorDetail = "">
|
|
<cftry>
|
|
<cfset errorResponse = deserializeJSON(httpResult.fileContent)>
|
|
<cfif structKeyExists(errorResponse, "error") AND structKeyExists(errorResponse.error, "message")>
|
|
<cfset errorDetail = errorResponse.error.message>
|
|
<cfelse>
|
|
<cfset errorDetail = httpResult.fileContent>
|
|
</cfif>
|
|
<cfcatch>
|
|
<cfset errorDetail = httpResult.fileContent>
|
|
</cfcatch>
|
|
</cftry>
|
|
<cfthrow message="Claude API error: #httpResult.statusCode# - #errorDetail#">
|
|
</cfif>
|
|
|
|
<!--- Parse response --->
|
|
<cfset claudeResponse = deserializeJSON(httpResult.fileContent)>
|
|
<cfif NOT structKeyExists(claudeResponse, "content") OR NOT arrayLen(claudeResponse.content)>
|
|
<cfthrow message="Empty response from Claude">
|
|
</cfif>
|
|
|
|
<cfset responseText = "">
|
|
<cfloop array="#claudeResponse.content#" index="block">
|
|
<cfif structKeyExists(block, "type") AND block.type EQ "text">
|
|
<cfset responseText = block.text>
|
|
<cfbreak>
|
|
</cfif>
|
|
</cfloop>
|
|
|
|
<!--- Clean up JSON response --->
|
|
<cfset responseText = trim(responseText)>
|
|
<cfif left(responseText, 7) EQ "```json">
|
|
<cfset responseText = mid(responseText, 8, len(responseText) - 7)>
|
|
</cfif>
|
|
<cfif left(responseText, 3) EQ "```">
|
|
<cfset responseText = mid(responseText, 4, len(responseText) - 3)>
|
|
</cfif>
|
|
<cfif right(responseText, 3) EQ "```">
|
|
<cfset responseText = left(responseText, len(responseText) - 3)>
|
|
</cfif>
|
|
<cfset responseText = trim(responseText)>
|
|
<!--- Remove trailing commas before ] or } --->
|
|
<cfset responseText = reReplace(responseText, ",(\s*[\]\}])", "\1", "all")>
|
|
<!--- Remove control characters that break JSON --->
|
|
<cfset responseText = reReplace(responseText, "[\x00-\x1F]", " ", "all")>
|
|
|
|
<!--- Try to parse JSON with error handling --->
|
|
<cftry>
|
|
<cfset menuData = deserializeJSON(responseText)>
|
|
<cfcatch type="any">
|
|
<!--- JSON parsing failed - try to extract what we can --->
|
|
<!--- Return the raw response for debugging --->
|
|
<cfset response["success"] = false>
|
|
<cfset response["error"] = "JSON parse error: #cfcatch.message#">
|
|
<cfset response["DEBUG_RAW_RESPONSE"] = left(responseText, 2000)>
|
|
<cfset response["DEBUG_RESPONSE_LENGTH"] = len(responseText)>
|
|
<cfcontent type="application/json" reset="true">
|
|
<cfoutput>#serializeJSON(response)#</cfoutput>
|
|
<cfabort>
|
|
</cfcatch>
|
|
</cftry>
|
|
|
|
<!--- Debug: save raw Claude response before processing --->
|
|
<cfset response["DEBUG_RAW_CLAUDE"] = responseText>
|
|
|
|
<!--- Build image URL list for the wizard to use --->
|
|
<cfset imageUrlList = arrayNew(1)>
|
|
<cfloop array="#imageDataArray#" index="imgData">
|
|
<cfif structKeyExists(imgData, "url")>
|
|
<cfset arrayAppend(imageUrlList, imgData.url)>
|
|
</cfif>
|
|
</cfloop>
|
|
|
|
<!--- Ensure expected structure --->
|
|
<cfif NOT structKeyExists(menuData, "business")>
|
|
<cfset menuData["business"] = structNew()>
|
|
</cfif>
|
|
<cfif NOT structKeyExists(menuData, "categories")>
|
|
<cfset menuData["categories"] = arrayNew(1)>
|
|
</cfif>
|
|
<cfif NOT structKeyExists(menuData, "modifiers")>
|
|
<cfset menuData["modifiers"] = arrayNew(1)>
|
|
</cfif>
|
|
<cfif NOT structKeyExists(menuData, "items")>
|
|
<cfset menuData["items"] = arrayNew(1)>
|
|
</cfif>
|
|
|
|
<!--- Convert categories to expected format - flatten subcategories into parent --->
|
|
<cfset formattedCategories = arrayNew(1)>
|
|
<cfset subcatToParentMap = structNew()><!--- Map subcategory names to parent category names --->
|
|
<cfloop array="#menuData.categories#" index="cat">
|
|
<cfif isSimpleValue(cat)>
|
|
<cfset catObj = structNew()>
|
|
<cfset catObj["name"] = cat>
|
|
<cfset catObj["itemCount"] = 0>
|
|
<cfset arrayAppend(formattedCategories, catObj)>
|
|
<cfelseif isStruct(cat)>
|
|
<!--- Add only the parent category --->
|
|
<cfset parentName = structKeyExists(cat, "name") ? cat.name : "">
|
|
<cfif len(parentName)>
|
|
<cfset catObj = structNew()>
|
|
<cfset catObj["name"] = parentName>
|
|
<cfset catObj["itemCount"] = 0>
|
|
<cfset arrayAppend(formattedCategories, catObj)>
|
|
<!--- Build map of subcategory names -> parent name for item reassignment --->
|
|
<cfif structKeyExists(cat, "subcategories") AND isArray(cat.subcategories)>
|
|
<cfloop array="#cat.subcategories#" index="subcat">
|
|
<cfset subcatName = "">
|
|
<cfif isSimpleValue(subcat)>
|
|
<cfset subcatName = subcat>
|
|
<cfelseif isStruct(subcat) AND structKeyExists(subcat, "name")>
|
|
<cfset subcatName = subcat.name>
|
|
</cfif>
|
|
<cfif len(subcatName)>
|
|
<cfset subcatToParentMap[lcase(subcatName)] = parentName>
|
|
</cfif>
|
|
</cfloop>
|
|
</cfif>
|
|
</cfif>
|
|
</cfif>
|
|
</cfloop>
|
|
<cfset menuData["categories"] = formattedCategories>
|
|
|
|
<!--- Reassign items in subcategories to their parent category --->
|
|
<cfloop from="1" to="#arrayLen(menuData.items)#" index="i">
|
|
<cfset item = menuData.items[i]>
|
|
<!--- Check if item's category is actually a subcategory --->
|
|
<cfif structKeyExists(item, "category") AND len(item.category)>
|
|
<cfset catKey = lcase(item.category)>
|
|
<cfif structKeyExists(subcatToParentMap, catKey)>
|
|
<cfset menuData.items[i]["category"] = subcatToParentMap[catKey]>
|
|
</cfif>
|
|
</cfif>
|
|
<!--- Also check subcategory field if present --->
|
|
<cfif structKeyExists(item, "subcategory") AND len(item.subcategory)>
|
|
<cfset subcatKey = lcase(item.subcategory)>
|
|
<cfif structKeyExists(subcatToParentMap, subcatKey)>
|
|
<cfset menuData.items[i]["category"] = subcatToParentMap[subcatKey]>
|
|
</cfif>
|
|
</cfif>
|
|
</cfloop>
|
|
|
|
<!--- Add item IDs --->
|
|
<cfloop from="1" to="#arrayLen(menuData.items)#" index="i">
|
|
<cfset menuData.items[i]["id"] = "item_" & i>
|
|
</cfloop>
|
|
|
|
<!--- Process item images - extract filenames from images object that Claude identified from HTML --->
|
|
<cfset itemsWithImages = 0>
|
|
<cfloop from="1" to="#arrayLen(menuData.items)#" index="i">
|
|
<cfset item = menuData.items[i]>
|
|
|
|
<!--- Check if Claude found images object with URLs from HTML --->
|
|
<cfif structKeyExists(item, "images") AND isStruct(item.images)>
|
|
<cfset imgObj = item.images>
|
|
<cfset itemsWithImages = itemsWithImages + 1>
|
|
|
|
<!--- Extract filenames for each image size --->
|
|
<cfset filenames = structNew()>
|
|
<cfloop collection="#imgObj#" item="sizeKey">
|
|
<cfset imgUrl = imgObj[sizeKey]>
|
|
<cfif isSimpleValue(imgUrl) AND len(trim(imgUrl))>
|
|
<cfset filenames[sizeKey] = listLast(imgUrl, "/\")>
|
|
</cfif>
|
|
</cfloop>
|
|
<cfset menuData.items[i]["imageFilenames"] = filenames>
|
|
|
|
<!--- Also set primary imageSrc for backwards compatibility --->
|
|
<cfif structKeyExists(imgObj, "src")>
|
|
<cfset menuData.items[i]["imageSrc"] = imgObj.src>
|
|
<cfset menuData.items[i]["imageFilename"] = listLast(imgObj.src, "/\")>
|
|
<cfelseif structKeyExists(imgObj, "large")>
|
|
<cfset menuData.items[i]["imageSrc"] = imgObj.large>
|
|
<cfset menuData.items[i]["imageFilename"] = listLast(imgObj.large, "/\")>
|
|
<cfelseif structKeyExists(imgObj, "medium")>
|
|
<cfset menuData.items[i]["imageSrc"] = imgObj.medium>
|
|
<cfset menuData.items[i]["imageFilename"] = listLast(imgObj.medium, "/\")>
|
|
<cfelseif structKeyExists(imgObj, "small")>
|
|
<cfset menuData.items[i]["imageSrc"] = imgObj.small>
|
|
<cfset menuData.items[i]["imageFilename"] = listLast(imgObj.small, "/\")>
|
|
</cfif>
|
|
<!--- Handle imageUrl from Claude (most common) --->
|
|
<cfelseif structKeyExists(item, "imageUrl") AND len(trim(item.imageUrl))>
|
|
<cfset menuData.items[i]["imageSrc"] = item.imageUrl>
|
|
<cfset menuData.items[i]["imageFilename"] = listLast(item.imageUrl, "/\")>
|
|
<cfset itemsWithImages = itemsWithImages + 1>
|
|
<!--- Legacy: handle if Claude returned imageSrc directly --->
|
|
<cfelseif structKeyExists(item, "imageSrc") AND len(trim(item.imageSrc))>
|
|
<cfset menuData.items[i]["imageFilename"] = listLast(item.imageSrc, "/\")>
|
|
<cfset itemsWithImages = itemsWithImages + 1>
|
|
</cfif>
|
|
</cfloop>
|
|
<cfset arrayAppend(response.steps, "Found images for " & itemsWithImages & " of " & arrayLen(menuData.items) & " items")>
|
|
|
|
<!--- Add image URLs to response --->
|
|
<cfset menuData["imageUrls"] = imageUrlList>
|
|
<cfset menuData["headerCandidateIndices"] = arrayNew(1)>
|
|
<!--- Add image mappings for local HTML uploads (filename -> alt text) --->
|
|
<cfset menuData["imageMappings"] = imageMappings>
|
|
|
|
<cfset response["OK"] = true>
|
|
<cfset response["DATA"] = menuData>
|
|
<cfset response["sourceUrl"] = isDefined("targetUrl") ? targetUrl : "uploaded">
|
|
<cfset response["pagesProcessed"] = arrayLen(menuPages)>
|
|
<cfset response["imagesFound"] = arrayLen(imageDataArray)>
|
|
<cfset response["playwrightImagesCount"] = arrayLen(playwrightImages)>
|
|
<!--- Debug: show subcategory mapping --->
|
|
<cfset response["DEBUG_SUBCAT_MAP"] = subcatToParentMap>
|
|
<cfset response["DEBUG_PLAYWRIGHT_IMAGES"] = playwrightImages>
|
|
<cfset response["DEBUG_RAW_CATEGORIES"] = menuData.categories>
|
|
|
|
<cfcatch type="any">
|
|
<cfset response["MESSAGE"] = cfcatch.message>
|
|
<cfif len(cfcatch.detail)>
|
|
<cfset response["DETAIL"] = cfcatch.detail>
|
|
</cfif>
|
|
<cfif structKeyExists(cfcatch, "tagContext") AND arrayLen(cfcatch.tagContext) GT 0>
|
|
<cfset response["DEBUG_LINE"] = cfcatch.tagContext[1].line>
|
|
<cfset response["DEBUG_TEMPLATE"] = cfcatch.tagContext[1].template>
|
|
</cfif>
|
|
</cfcatch>
|
|
</cftry>
|
|
|
|
<cfoutput>#serializeJSON(response)#</cfoutput>
|