Fix every-item-as-category pattern in menu import

Post-process Claude menu extraction to detect when >60% of categories
have exactly 1 item (a common misparse). Collapses pseudo-categories
into the nearest preceding real (0-item) category.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
John Mizerek 2026-03-03 16:51:09 -08:00
parent 84985d98d8
commit a44dfd79ae

View file

@ -2061,6 +2061,61 @@
</cfloop>
<cfset menuData["categories"] = formattedCategories>
<!--- Fix "every item is a category" pattern --->
<!--- Detect: if most categories have 0-1 items and category count > item count * 0.5, collapse --->
<cfset totalItems = arrayLen(menuData.items)>
<cfset totalCats = arrayLen(formattedCategories)>
<cfif totalCats GT 10 AND totalItems GT 0 AND totalCats GT totalItems * 0.5>
<!--- Count categories with 0 items (real categories) vs 1 item (pseudo-categories) --->
<cfset zeroCats = []>
<cfset singleCats = []>
<cfloop array="#formattedCategories#" index="fc">
<cfset fcCount = 0>
<cfloop array="#menuData.items#" index="fi">
<cfif fi.category EQ fc.name><cfset fcCount++></cfif>
</cfloop>
<cfif fcCount EQ 0>
<cfset arrayAppend(zeroCats, fc.name)>
<cfelseif fcCount EQ 1>
<cfset arrayAppend(singleCats, fc.name)>
</cfif>
</cfloop>
<!--- If >60% of categories have exactly 1 item, this is the broken pattern --->
<cfif arrayLen(singleCats) GT totalCats * 0.6 AND arrayLen(zeroCats) GT 0>
<cfset arrayAppend(response.steps, "Detected 'every item is a category' pattern (" & arrayLen(singleCats) & " single-item cats, " & arrayLen(zeroCats) & " empty cats) - collapsing")>
<!--- Reassign items: each item's category should be the most recent zero-item category before it in the list --->
<cfset currentParent = zeroCats[1]>
<cfloop array="#formattedCategories#" index="fc">
<!--- Is this a real category (0 items)? --->
<cfif arrayFind(zeroCats, fc.name) GT 0>
<cfset currentParent = fc.name>
<cfelse>
<!--- This is a pseudo-category (1 item) - reassign its item to currentParent --->
<cfloop from="1" to="#arrayLen(menuData.items)#" index="ii">
<cfif menuData.items[ii].category EQ fc.name>
<cfset menuData.items[ii]["category"] = currentParent>
</cfif>
</cfloop>
</cfif>
</cfloop>
<!--- Rebuild categories: only keep the real ones (zeroCats) --->
<cfset fixedCategories = []>
<cfloop array="#zeroCats#" index="zc">
<cfset zcCount = 0>
<cfloop array="#menuData.items#" index="fi">
<cfif fi.category EQ zc><cfset zcCount++></cfif>
</cfloop>
<cfset arrayAppend(fixedCategories, { "name": zc, "itemCount": zcCount })>
</cfloop>
<cfset menuData["categories"] = fixedCategories>
<cfset formattedCategories = fixedCategories>
<cfset arrayAppend(response.steps, "Collapsed to " & arrayLen(fixedCategories) & " categories")>
</cfif>
</cfif>
<!--- Server-side hierarchy enforcement from HTML heading structure (backup if Claude returns flat) --->
<cfif structCount(headingHierarchy) GT 0>
<!--- Build reverse map: lowercase h3 name → raw h2 parent name --->