Add WooCommerce fast-path with Playwright modifier extraction

Detects WooCommerce sites from Playwright HTML (woocommerce, wc-add-to-cart,
tm-extra-product-options). Runs woo-modifiers.js which navigates all product
pages, extracts items with categories, and scrapes TMEPO/variation modifiers.
Falls through to Claude if extraction fails.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
John Mizerek 2026-03-07 14:24:22 -08:00
parent d9262e83c0
commit fb92748784

View file

@ -1059,6 +1059,86 @@
<cfset playwrightImages = structKeyExists(playwrightResult, "images") ? playwrightResult.images : arrayNew(1)>
<cfset arrayAppend(response.steps, "Fetched " & len(pageHtml) & " bytes via Playwright, " & arrayLen(playwrightImages) & " images captured")>
<!--- ========== WOOCOMMERCE FAST PATH ========== --->
<cfif findNoCase("woocommerce", pageHtml) OR findNoCase("wc-add-to-cart", pageHtml) OR findNoCase("tm-extra-product-options", pageHtml)>
<cfset arrayAppend(response.steps, "WooCommerce site detected - running modifier extraction")>
<!--- Use the shop root URL --->
<cfset wooUrl = reReplace(targetUrl, "(https?://[^/]+).*", "\1")>
<cftry>
<cfset wooOutput = "">
<cfexecute name="/opt/playwright/run-woo-modifiers.sh" arguments="'#wooUrl#'" timeout="300" variable="wooOutput" />
<cfif len(trim(wooOutput))>
<cfset wooResult = deserializeJSON(wooOutput)>
<cfif structKeyExists(wooResult, "items") AND isArray(wooResult.items) AND arrayLen(wooResult.items) GT 0>
<cfset arrayAppend(response.steps, "WooCommerce extraction: " & arrayLen(wooResult.items) & " items, " & arrayLen(wooResult.modifiers) & " modifier groups")>
<!--- Build categories from item category fields --->
<cfset wooCats = {}>
<cfset wooItems = []>
<cfset wooImageMappings = {}>
<cfloop from="1" to="#arrayLen(wooResult.items)#" index="wi">
<cfset wItem = wooResult.items[wi]>
<cfset catName = structKeyExists(wItem, "category") AND len(trim(wItem.category)) ? trim(wItem.category) : "Menu">
<cfif NOT structKeyExists(wooCats, catName)>
<cfset wooCats[catName] = 0>
</cfif>
<cfset wooCats[catName] = wooCats[catName] + 1>
<cfset itemId = "item_" & wi>
<cfset itemMods = structKeyExists(wooResult, "itemModifierMap") AND structKeyExists(wooResult.itemModifierMap, wItem.name) ? wooResult.itemModifierMap[wItem.name] : []>
<cfset arrayAppend(wooItems, {
"id": itemId,
"name": wItem.name,
"price": structKeyExists(wItem, "price") ? val(wItem.price) : 0,
"description": structKeyExists(wItem, "description") ? wItem.description : "",
"category": catName,
"modifiers": itemMods,
"hasModifiers": arrayLen(itemMods) GT 0
})>
<cfif structKeyExists(wItem, "imageUrl") AND len(trim(wItem.imageUrl))>
<cfset wooImageMappings[wItem.name] = wItem.imageUrl>
</cfif>
</cfloop>
<cfset wooCategories = []>
<cfloop collection="#wooCats#" item="wcName">
<cfset arrayAppend(wooCategories, { "name": wcName, "itemCount": wooCats[wcName] })>
</cfloop>
<cfset wooModifiers = structKeyExists(wooResult, "modifiers") ? wooResult.modifiers : []>
<cfset menuData = {
"business": { "name": "", "address": "", "phone": "", "hours": "" },
"categories": wooCategories,
"items": wooItems,
"modifiers": wooModifiers,
"imageUrls": [],
"imageMappings": wooImageMappings,
"headerCandidateIndices": []
}>
<cfset response["OK"] = true>
<cfset response["DATA"] = menuData>
<cfset response["sourceUrl"] = targetUrl>
<cfset response["parsedVia"] = "woocommerce_playwright">
<cfcontent type="application/json" reset="true">
<cfoutput>#serializeJSON(response)#</cfoutput>
<cfabort>
</cfif>
</cfif>
<cfset arrayAppend(response.steps, "WooCommerce extraction returned no items - falling through to Claude")>
<cfcatch>
<cfset arrayAppend(response.steps, "WooCommerce extraction failed: " & cfcatch.message & " - falling through to Claude")>
</cfcatch>
</cftry>
</cfif>
<!--- ========== END WOOCOMMERCE FAST PATH ========== --->
<!--- Extract base URL for resolving relative links --->
<cfset baseUrl = reReplace(targetUrl, "(https?://[^/]+).*", "\1")>
<cfset basePath = reReplace(targetUrl, "(https?://[^/]+/[^?]*/?).*", "\1")>