Add WooCommerce fast-path with Playwright modifier extraction
Detects WooCommerce sites from Playwright HTML (woocommerce, wc-add-to-cart, tm-extra-product-options). Runs woo-modifiers.js which navigates all product pages, extracts items with categories, and scrapes TMEPO/variation modifiers. Falls through to Claude if extraction fails. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
d9262e83c0
commit
fb92748784
1 changed files with 80 additions and 0 deletions
|
|
@ -1059,6 +1059,86 @@
|
|||
<cfset playwrightImages = structKeyExists(playwrightResult, "images") ? playwrightResult.images : arrayNew(1)>
|
||||
<cfset arrayAppend(response.steps, "Fetched " & len(pageHtml) & " bytes via Playwright, " & arrayLen(playwrightImages) & " images captured")>
|
||||
|
||||
<!--- ========== WOOCOMMERCE FAST PATH ========== --->
|
||||
<cfif findNoCase("woocommerce", pageHtml) OR findNoCase("wc-add-to-cart", pageHtml) OR findNoCase("tm-extra-product-options", pageHtml)>
|
||||
<cfset arrayAppend(response.steps, "WooCommerce site detected - running modifier extraction")>
|
||||
|
||||
<!--- Use the shop root URL --->
|
||||
<cfset wooUrl = reReplace(targetUrl, "(https?://[^/]+).*", "\1")>
|
||||
|
||||
<cftry>
|
||||
<cfset wooOutput = "">
|
||||
<cfexecute name="/opt/playwright/run-woo-modifiers.sh" arguments="'#wooUrl#'" timeout="300" variable="wooOutput" />
|
||||
|
||||
<cfif len(trim(wooOutput))>
|
||||
<cfset wooResult = deserializeJSON(wooOutput)>
|
||||
|
||||
<cfif structKeyExists(wooResult, "items") AND isArray(wooResult.items) AND arrayLen(wooResult.items) GT 0>
|
||||
<cfset arrayAppend(response.steps, "WooCommerce extraction: " & arrayLen(wooResult.items) & " items, " & arrayLen(wooResult.modifiers) & " modifier groups")>
|
||||
|
||||
<!--- Build categories from item category fields --->
|
||||
<cfset wooCats = {}>
|
||||
<cfset wooItems = []>
|
||||
<cfset wooImageMappings = {}>
|
||||
<cfloop from="1" to="#arrayLen(wooResult.items)#" index="wi">
|
||||
<cfset wItem = wooResult.items[wi]>
|
||||
<cfset catName = structKeyExists(wItem, "category") AND len(trim(wItem.category)) ? trim(wItem.category) : "Menu">
|
||||
<cfif NOT structKeyExists(wooCats, catName)>
|
||||
<cfset wooCats[catName] = 0>
|
||||
</cfif>
|
||||
<cfset wooCats[catName] = wooCats[catName] + 1>
|
||||
|
||||
<cfset itemId = "item_" & wi>
|
||||
<cfset itemMods = structKeyExists(wooResult, "itemModifierMap") AND structKeyExists(wooResult.itemModifierMap, wItem.name) ? wooResult.itemModifierMap[wItem.name] : []>
|
||||
<cfset arrayAppend(wooItems, {
|
||||
"id": itemId,
|
||||
"name": wItem.name,
|
||||
"price": structKeyExists(wItem, "price") ? val(wItem.price) : 0,
|
||||
"description": structKeyExists(wItem, "description") ? wItem.description : "",
|
||||
"category": catName,
|
||||
"modifiers": itemMods,
|
||||
"hasModifiers": arrayLen(itemMods) GT 0
|
||||
})>
|
||||
|
||||
<cfif structKeyExists(wItem, "imageUrl") AND len(trim(wItem.imageUrl))>
|
||||
<cfset wooImageMappings[wItem.name] = wItem.imageUrl>
|
||||
</cfif>
|
||||
</cfloop>
|
||||
|
||||
<cfset wooCategories = []>
|
||||
<cfloop collection="#wooCats#" item="wcName">
|
||||
<cfset arrayAppend(wooCategories, { "name": wcName, "itemCount": wooCats[wcName] })>
|
||||
</cfloop>
|
||||
|
||||
<cfset wooModifiers = structKeyExists(wooResult, "modifiers") ? wooResult.modifiers : []>
|
||||
|
||||
<cfset menuData = {
|
||||
"business": { "name": "", "address": "", "phone": "", "hours": "" },
|
||||
"categories": wooCategories,
|
||||
"items": wooItems,
|
||||
"modifiers": wooModifiers,
|
||||
"imageUrls": [],
|
||||
"imageMappings": wooImageMappings,
|
||||
"headerCandidateIndices": []
|
||||
}>
|
||||
|
||||
<cfset response["OK"] = true>
|
||||
<cfset response["DATA"] = menuData>
|
||||
<cfset response["sourceUrl"] = targetUrl>
|
||||
<cfset response["parsedVia"] = "woocommerce_playwright">
|
||||
<cfcontent type="application/json" reset="true">
|
||||
<cfoutput>#serializeJSON(response)#</cfoutput>
|
||||
<cfabort>
|
||||
</cfif>
|
||||
</cfif>
|
||||
<cfset arrayAppend(response.steps, "WooCommerce extraction returned no items - falling through to Claude")>
|
||||
<cfcatch>
|
||||
<cfset arrayAppend(response.steps, "WooCommerce extraction failed: " & cfcatch.message & " - falling through to Claude")>
|
||||
</cfcatch>
|
||||
</cftry>
|
||||
</cfif>
|
||||
<!--- ========== END WOOCOMMERCE FAST PATH ========== --->
|
||||
|
||||
<!--- Extract base URL for resolving relative links --->
|
||||
<cfset baseUrl = reReplace(targetUrl, "(https?://[^/]+).*", "\1")>
|
||||
<cfset basePath = reReplace(targetUrl, "(https?://[^/]+/[^?]*/?).*", "\1")>
|
||||
|
|
|
|||
Reference in a new issue