Auto-detect ordering platform links for food photos
Playwright now follows menu sub-page links (brunch, lunch, dinner, etc.) AND detects ordering platform links (order.online, toasttab, grubhub, etc.) on restaurant websites. Visits ordering pages to scrape item-image pairs, then fuzzy-matches them to menu items extracted from the main site. This gives us complete menus from the restaurant's own website plus food photos from their ordering platform — best of both worlds. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
1d0e4ee616
commit
37caa7c9bc
1 changed files with 50 additions and 2 deletions
|
|
@ -1,6 +1,6 @@
|
||||||
<cfsetting showdebugoutput="false">
|
<cfsetting showdebugoutput="false">
|
||||||
<cfsetting enablecfoutputonly="true">
|
<cfsetting enablecfoutputonly="true">
|
||||||
<cfsetting requesttimeout="300">
|
<cfsetting requesttimeout="600">
|
||||||
<cfcontent type="application/json; charset=utf-8" reset="true">
|
<cfcontent type="application/json; charset=utf-8" reset="true">
|
||||||
|
|
||||||
<cfset response = structNew()>
|
<cfset response = structNew()>
|
||||||
|
|
@ -1044,7 +1044,7 @@
|
||||||
<cfset arrayAppend(response.steps, "Fetching URL with Playwright: " & targetUrl)>
|
<cfset arrayAppend(response.steps, "Fetching URL with Playwright: " & targetUrl)>
|
||||||
|
|
||||||
<cfset playwrightOutput = "">
|
<cfset playwrightOutput = "">
|
||||||
<cfexecute name="/opt/playwright/run.sh" arguments="'#targetUrl#' 5000" timeout="120" variable="playwrightOutput" />
|
<cfexecute name="/opt/playwright/run.sh" arguments="'#targetUrl#' 5000" timeout="240" variable="playwrightOutput" />
|
||||||
|
|
||||||
<cfif NOT len(trim(playwrightOutput))>
|
<cfif NOT len(trim(playwrightOutput))>
|
||||||
<cfthrow message="Playwright returned empty response">
|
<cfthrow message="Playwright returned empty response">
|
||||||
|
|
@ -1059,6 +1059,19 @@
|
||||||
<cfset playwrightImages = structKeyExists(playwrightResult, "images") ? playwrightResult.images : arrayNew(1)>
|
<cfset playwrightImages = structKeyExists(playwrightResult, "images") ? playwrightResult.images : arrayNew(1)>
|
||||||
<cfset arrayAppend(response.steps, "Fetched " & len(pageHtml) & " bytes via Playwright, " & arrayLen(playwrightImages) & " images captured")>
|
<cfset arrayAppend(response.steps, "Fetched " & len(pageHtml) & " bytes via Playwright, " & arrayLen(playwrightImages) & " images captured")>
|
||||||
|
|
||||||
|
<!--- Capture platform image map (ordering site food photos matched to item names) --->
|
||||||
|
<cfset platformImageMap = structNew()>
|
||||||
|
<cfif structKeyExists(playwrightResult, "platformImageMap") AND isStruct(playwrightResult.platformImageMap)>
|
||||||
|
<cfset platformImageMap = playwrightResult.platformImageMap>
|
||||||
|
<cfset arrayAppend(response.steps, "Found " & structCount(platformImageMap) & " item images from ordering platform")>
|
||||||
|
</cfif>
|
||||||
|
<cfif structKeyExists(playwrightResult, "subPagesVisited") AND isArray(playwrightResult.subPagesVisited) AND arrayLen(playwrightResult.subPagesVisited) GT 0>
|
||||||
|
<cfset arrayAppend(response.steps, "Visited " & arrayLen(playwrightResult.subPagesVisited) & " menu sub-pages: " & arrayToList(playwrightResult.subPagesVisited, ", "))>
|
||||||
|
</cfif>
|
||||||
|
<cfif structKeyExists(playwrightResult, "platformPagesVisited") AND isArray(playwrightResult.platformPagesVisited) AND arrayLen(playwrightResult.platformPagesVisited) GT 0>
|
||||||
|
<cfset arrayAppend(response.steps, "Visited " & arrayLen(playwrightResult.platformPagesVisited) & " ordering platforms for photos: " & arrayToList(playwrightResult.platformPagesVisited, ", "))>
|
||||||
|
</cfif>
|
||||||
|
|
||||||
<!--- ========== WOOCOMMERCE FAST PATH ========== --->
|
<!--- ========== WOOCOMMERCE FAST PATH ========== --->
|
||||||
<cfif findNoCase("woocommerce", pageHtml) OR findNoCase("wc-add-to-cart", pageHtml) OR findNoCase("tm-extra-product-options", pageHtml)>
|
<cfif findNoCase("woocommerce", pageHtml) OR findNoCase("wc-add-to-cart", pageHtml) OR findNoCase("tm-extra-product-options", pageHtml)>
|
||||||
<cfset arrayAppend(response.steps, "WooCommerce site detected - running modifier extraction")>
|
<cfset arrayAppend(response.steps, "WooCommerce site detected - running modifier extraction")>
|
||||||
|
|
@ -2745,6 +2758,41 @@
|
||||||
</cfloop>
|
</cfloop>
|
||||||
<cfset arrayAppend(response.steps, "Found images for " & itemsWithImages & " of " & arrayLen(menuData.items) & " items")>
|
<cfset arrayAppend(response.steps, "Found images for " & itemsWithImages & " of " & arrayLen(menuData.items) & " items")>
|
||||||
|
|
||||||
|
<!--- Fill missing item images from ordering platform photo map --->
|
||||||
|
<cfif isDefined("platformImageMap") AND isStruct(platformImageMap) AND structCount(platformImageMap) GT 0>
|
||||||
|
<cfset platformMatches = 0>
|
||||||
|
<cfloop from="1" to="#arrayLen(menuData.items)#" index="i">
|
||||||
|
<cfset item = menuData.items[i]>
|
||||||
|
<!--- Skip items that already have an image --->
|
||||||
|
<cfif structKeyExists(item, "imageUrl") AND len(trim(item.imageUrl))>
|
||||||
|
<cfcontinue>
|
||||||
|
</cfif>
|
||||||
|
<cfset itemName = structKeyExists(item, "name") ? trim(item.name) : "">
|
||||||
|
<cfif NOT len(itemName)><cfcontinue></cfif>
|
||||||
|
<!--- Try exact match first --->
|
||||||
|
<cfif structKeyExists(platformImageMap, itemName)>
|
||||||
|
<cfset menuData.items[i]["imageUrl"] = platformImageMap[itemName]>
|
||||||
|
<cfset menuData.items[i]["imageFilename"] = listLast(platformImageMap[itemName], "/\")>
|
||||||
|
<cfset menuData.items[i]["imageSrc"] = platformImageMap[itemName]>
|
||||||
|
<cfset platformMatches++>
|
||||||
|
<cfelse>
|
||||||
|
<!--- Try case-insensitive partial match --->
|
||||||
|
<cfloop collection="#platformImageMap#" item="platName">
|
||||||
|
<cfif findNoCase(itemName, platName) OR findNoCase(platName, itemName)>
|
||||||
|
<cfset menuData.items[i]["imageUrl"] = platformImageMap[platName]>
|
||||||
|
<cfset menuData.items[i]["imageFilename"] = listLast(platformImageMap[platName], "/\")>
|
||||||
|
<cfset menuData.items[i]["imageSrc"] = platformImageMap[platName]>
|
||||||
|
<cfset platformMatches++>
|
||||||
|
<cfbreak>
|
||||||
|
</cfif>
|
||||||
|
</cfloop>
|
||||||
|
</cfif>
|
||||||
|
</cfloop>
|
||||||
|
<cfif platformMatches GT 0>
|
||||||
|
<cfset arrayAppend(response.steps, "Matched " & platformMatches & " additional item images from ordering platform")>
|
||||||
|
</cfif>
|
||||||
|
</cfif>
|
||||||
|
|
||||||
<!--- Add image URLs to response --->
|
<!--- Add image URLs to response --->
|
||||||
<cfset menuData["imageUrls"] = imageUrlList>
|
<cfset menuData["imageUrls"] = imageUrlList>
|
||||||
<cfset menuData["headerCandidateIndices"] = arrayNew(1)>
|
<cfset menuData["headerCandidateIndices"] = arrayNew(1)>
|
||||||
|
|
|
||||||
Reference in a new issue