Fix every-item-as-category pattern in menu import
Post-process Claude menu extraction to detect when >60% of categories have exactly 1 item (a common misparse). Collapses pseudo-categories into the nearest preceding real (0-item) category. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
84985d98d8
commit
a44dfd79ae
1 changed files with 55 additions and 0 deletions
|
|
@ -2061,6 +2061,61 @@
|
||||||
</cfloop>
|
</cfloop>
|
||||||
<cfset menuData["categories"] = formattedCategories>
|
<cfset menuData["categories"] = formattedCategories>
|
||||||
|
|
||||||
|
<!--- Fix "every item is a category" pattern --->
|
||||||
|
<!--- Detect: if most categories have 0-1 items and category count > item count * 0.5, collapse --->
|
||||||
|
<cfset totalItems = arrayLen(menuData.items)>
|
||||||
|
<cfset totalCats = arrayLen(formattedCategories)>
|
||||||
|
<cfif totalCats GT 10 AND totalItems GT 0 AND totalCats GT totalItems * 0.5>
|
||||||
|
<!--- Count categories with 0 items (real categories) vs 1 item (pseudo-categories) --->
|
||||||
|
<cfset zeroCats = []>
|
||||||
|
<cfset singleCats = []>
|
||||||
|
<cfloop array="#formattedCategories#" index="fc">
|
||||||
|
<cfset fcCount = 0>
|
||||||
|
<cfloop array="#menuData.items#" index="fi">
|
||||||
|
<cfif fi.category EQ fc.name><cfset fcCount++></cfif>
|
||||||
|
</cfloop>
|
||||||
|
<cfif fcCount EQ 0>
|
||||||
|
<cfset arrayAppend(zeroCats, fc.name)>
|
||||||
|
<cfelseif fcCount EQ 1>
|
||||||
|
<cfset arrayAppend(singleCats, fc.name)>
|
||||||
|
</cfif>
|
||||||
|
</cfloop>
|
||||||
|
|
||||||
|
<!--- If >60% of categories have exactly 1 item, this is the broken pattern --->
|
||||||
|
<cfif arrayLen(singleCats) GT totalCats * 0.6 AND arrayLen(zeroCats) GT 0>
|
||||||
|
<cfset arrayAppend(response.steps, "Detected 'every item is a category' pattern (" & arrayLen(singleCats) & " single-item cats, " & arrayLen(zeroCats) & " empty cats) - collapsing")>
|
||||||
|
|
||||||
|
<!--- Reassign items: each item's category should be the most recent zero-item category before it in the list --->
|
||||||
|
<cfset currentParent = zeroCats[1]>
|
||||||
|
<cfloop array="#formattedCategories#" index="fc">
|
||||||
|
<!--- Is this a real category (0 items)? --->
|
||||||
|
<cfif arrayFind(zeroCats, fc.name) GT 0>
|
||||||
|
<cfset currentParent = fc.name>
|
||||||
|
<cfelse>
|
||||||
|
<!--- This is a pseudo-category (1 item) - reassign its item to currentParent --->
|
||||||
|
<cfloop from="1" to="#arrayLen(menuData.items)#" index="ii">
|
||||||
|
<cfif menuData.items[ii].category EQ fc.name>
|
||||||
|
<cfset menuData.items[ii]["category"] = currentParent>
|
||||||
|
</cfif>
|
||||||
|
</cfloop>
|
||||||
|
</cfif>
|
||||||
|
</cfloop>
|
||||||
|
|
||||||
|
<!--- Rebuild categories: only keep the real ones (zeroCats) --->
|
||||||
|
<cfset fixedCategories = []>
|
||||||
|
<cfloop array="#zeroCats#" index="zc">
|
||||||
|
<cfset zcCount = 0>
|
||||||
|
<cfloop array="#menuData.items#" index="fi">
|
||||||
|
<cfif fi.category EQ zc><cfset zcCount++></cfif>
|
||||||
|
</cfloop>
|
||||||
|
<cfset arrayAppend(fixedCategories, { "name": zc, "itemCount": zcCount })>
|
||||||
|
</cfloop>
|
||||||
|
<cfset menuData["categories"] = fixedCategories>
|
||||||
|
<cfset formattedCategories = fixedCategories>
|
||||||
|
<cfset arrayAppend(response.steps, "Collapsed to " & arrayLen(fixedCategories) & " categories")>
|
||||||
|
</cfif>
|
||||||
|
</cfif>
|
||||||
|
|
||||||
<!--- Server-side hierarchy enforcement from HTML heading structure (backup if Claude returns flat) --->
|
<!--- Server-side hierarchy enforcement from HTML heading structure (backup if Claude returns flat) --->
|
||||||
<cfif structCount(headingHierarchy) GT 0>
|
<cfif structCount(headingHierarchy) GT 0>
|
||||||
<!--- Build reverse map: lowercase h3 name → raw h2 parent name --->
|
<!--- Build reverse map: lowercase h3 name → raw h2 parent name --->
|
||||||
|
|
|
||||||
Reference in a new issue