Add h4 tag debug to find subcategory tags

This commit is contained in:
John Mizerek 2026-02-12 19:40:51 -08:00
parent 3e9f07df1a
commit 436861970e

View file

@ -242,7 +242,7 @@
<cfset combinedHtml = left(combinedHtml, 100000)> <cfset combinedHtml = left(combinedHtml, 100000)>
</cfif> </cfif>
<!--- Debug: extract all h3 tags from HTML on server side ---> <!--- Debug: extract h3 and h4 tags from HTML on server side --->
<cfset h3Tags = reMatchNoCase("<h3[^>]*>([^<]*)</h3>", combinedHtml)> <cfset h3Tags = reMatchNoCase("<h3[^>]*>([^<]*)</h3>", combinedHtml)>
<cfset h3Texts = arrayNew(1)> <cfset h3Texts = arrayNew(1)>
<cfloop array="#h3Tags#" index="h3Tag"> <cfloop array="#h3Tags#" index="h3Tag">
@ -252,8 +252,19 @@
<cfset arrayAppend(h3Texts, h3Text)> <cfset arrayAppend(h3Texts, h3Text)>
</cfif> </cfif>
</cfloop> </cfloop>
<cfset response["DEBUG_H3_TAGS_FOUND"] = h3Texts> <cfset response["DEBUG_H3_TAGS"] = h3Texts>
<cfset arrayAppend(response.steps, "Found " & arrayLen(h3Texts) & " h3 tags in HTML")>
<cfset h4Tags = reMatchNoCase("<h4[^>]*>([^<]*)</h4>", combinedHtml)>
<cfset h4Texts = arrayNew(1)>
<cfloop array="#h4Tags#" index="h4Tag">
<cfset h4Text = reReplaceNoCase(h4Tag, "<h4[^>]*>([^<]*)</h4>", "\1")>
<cfset h4Text = trim(h4Text)>
<cfif len(h4Text)>
<cfset arrayAppend(h4Texts, h4Text)>
</cfif>
</cfloop>
<cfset response["DEBUG_H4_TAGS"] = h4Texts>
<cfset arrayAppend(response.steps, "Found " & arrayLen(h3Texts) & " h3 and " & arrayLen(h4Texts) & " h4 tags")>
<!--- System prompt for URL analysis ---> <!--- System prompt for URL analysis --->
<cfset systemPrompt = "You are an expert at extracting structured menu data from restaurant website HTML. Extract ALL menu data visible in the HTML. Return valid JSON with these keys: business (object with name, address, phone, hours, brandColor), categories (array of category names), modifiers (array of modifier templates with name, required boolean, appliesTo, categoryName if applicable, and options array), items (array with name, description, price, category, subcategory, modifiers array, and imageUrl if found). CRITICAL: Extract EVERY menu item visible in the HTML - do not skip any items. For items: if there is a subsection header (like an h3) above the item within its category section, include that as subcategory. For brandColor: suggest a vibrant hex color (6 digits, no hash symbol) based on the restaurant style. For hours: format as ""Mon-Fri 10:30am-10pm, Sat 11am-10pm, Sun 11am-9pm"". Include ALL days visible. For prices: extract as numbers (e.g., 12.99). For modifier options: use format {""name"": ""option"", ""price"": 0}. Return ONLY valid JSON, no markdown, no explanation."> <cfset systemPrompt = "You are an expert at extracting structured menu data from restaurant website HTML. Extract ALL menu data visible in the HTML. Return valid JSON with these keys: business (object with name, address, phone, hours, brandColor), categories (array of category names), modifiers (array of modifier templates with name, required boolean, appliesTo, categoryName if applicable, and options array), items (array with name, description, price, category, subcategory, modifiers array, and imageUrl if found). CRITICAL: Extract EVERY menu item visible in the HTML - do not skip any items. For items: if there is a subsection header (like an h3) above the item within its category section, include that as subcategory. For brandColor: suggest a vibrant hex color (6 digits, no hash symbol) based on the restaurant style. For hours: format as ""Mon-Fri 10:30am-10pm, Sat 11am-10pm, Sun 11am-9pm"". Include ALL days visible. For prices: extract as numbers (e.g., 12.99). For modifier options: use format {""name"": ""option"", ""price"": 0}. Return ONLY valid JSON, no markdown, no explanation.">