From b081e723477de9a92fbe23a2723274d7d36cf050 Mon Sep 17 00:00:00 2001
From: John Mizerek <mizerek@gmail.com>
Date: Fri, 13 Feb 2026 09:26:37 -0800
Subject: [PATCH] Improve business info extraction from saved Toast pages

Added multiple fallback methods to extract business name:
1. Title tag with Toast-specific parsing
2. og:title and og:site_name meta tags
3. Header elements with restaurant/location classes
4. First h1 tag as last resort

Also added address and phone extraction from visible HTML.
Added summary logging of business info keys found.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 api/setup/analyzeMenuUrl.cfm | 99 +++++++++++++++++++++++++++++++++---
 1 file changed, 93 insertions(+), 6 deletions(-)
diff --git a/api/setup/analyzeMenuUrl.cfm b/api/setup/analyzeMenuUrl.cfm
index 0041566..2701563 100644
--- a/api/setup/analyzeMenuUrl.cfm
+++ b/api/setup/analyzeMenuUrl.cfm
@@ -172,20 +172,101 @@
                         </cfloop>
                     </cfif>
 
-                    <!--- Try to extract business name from title tag --->
-                    <cfset titleMatch = reMatchNoCase('<title>([^<]+)</title>', pageHtml)>
+                    <!--- Try multiple sources for business name --->
+
+                    <!--- 1. Try title tag first --->
+                    <cfset titleMatch = reMatchNoCase('<title[^>]*>([^<]+)</title>', pageHtml)>
                     <cfset arrayAppend(response.steps, "Title tag matches: " & arrayLen(titleMatch))>
                     <cfif arrayLen(titleMatch)>
-                        <cfset titleText = reReplaceNoCase(titleMatch[1], '.*<title>([^<]+)</title>.*', '\1')>
+                        <cfset titleText = reReplaceNoCase(titleMatch[1], '.*<title[^>]*>([^<]+)</title>.*', '\1')>
                         <cfset titleText = trim(titleText)>
-                        <cfset arrayAppend(response.steps, "Raw title text: " & left(titleText, 100))>
-                        <!--- Toast titles are usually "Restaurant Name | Online Ordering" --->
+                        <cfset arrayAppend(response.steps, "Raw title: " & left(titleText, 100))>
+                        <!--- Toast titles: "Restaurant Name | Online Ordering" --->
                         <cfif findNoCase("|", titleText)>
                             <cfset titleText = trim(listFirst(titleText, "|"))>
                         </cfif>
+                        <!--- Remove common suffixes --->
+                        <cfset titleText = reReplaceNoCase(titleText, "\s*-\s*(Menu|Order|Online).*$", "")>
                         <cfif len(titleText) AND NOT structKeyExists(toastBusiness, "name")>
                             <cfset toastBusiness["name"] = titleText>
-                            <cfset arrayAppend(response.steps, "Extracted business name from title: " & titleText)>
+                            <cfset arrayAppend(response.steps, "Business name from title: " & titleText)>
+                        </cfif>
+                    </cfif>
+
+                    <!--- 2. Try og:title or og:site_name meta tags --->
+                    <cfif NOT structKeyExists(toastBusiness, "name") OR NOT len(toastBusiness.name)>
+                        <cfset ogMatch = reMatchNoCase('<meta[^>]*property=["'']og:(site_name|title)["''][^>]*content=["'']([^"'']+)["'']', pageHtml)>
+                        <cfif NOT arrayLen(ogMatch)>
+                            <!--- Try alternate attribute order --->
+                            <cfset ogMatch = reMatchNoCase('<meta[^>]*content=["'']([^"'']+)["''][^>]*property=["'']og:(site_name|title)["'']', pageHtml)>
+                        </cfif>
+                        <cfif arrayLen(ogMatch)>
+                            <cfset ogText = reReplaceNoCase(ogMatch[1], '.*content=["'']([^"'']+)["''].*', '\1')>
+                            <cfif NOT len(ogText)>
+                                <cfset ogText = reReplaceNoCase(ogMatch[1], '.*<meta[^>]*>.*', '')>
+                            </cfif>
+                            <cfset ogText = trim(ogText)>
+                            <cfif findNoCase("|", ogText)>
+                                <cfset ogText = trim(listFirst(ogText, "|"))>
+                            </cfif>
+                            <cfif len(ogText)>
+                                <cfset toastBusiness["name"] = ogText>
+                                <cfset arrayAppend(response.steps, "Business name from og:meta: " & ogText)>
+                            </cfif>
+                        </cfif>
+                    </cfif>
+
+                    <!--- 3. Try looking for restaurant name in header/nav area (Toast-specific) --->
+                    <cfif NOT structKeyExists(toastBusiness, "name") OR NOT len(toastBusiness.name)>
+                        <!--- Toast often has restaurant name in a div with specific classes --->
+                        <cfset headerMatch = reMatchNoCase('<(?:h1|div)[^>]*class="[^"]*(?:restaurant|location|brand)[^"]*"[^>]*>([^<]+)<', pageHtml)>
+                        <cfif arrayLen(headerMatch)>
+                            <cfset headerText = reReplaceNoCase(headerMatch[1], '.*>([^<]+)<.*', '\1')>
+                            <cfset headerText = trim(headerText)>
+                            <cfif len(headerText) AND len(headerText) LT 100>
+                                <cfset toastBusiness["name"] = headerText>
+                                <cfset arrayAppend(response.steps, "Business name from header: " & headerText)>
+                            </cfif>
+                        </cfif>
+                    </cfif>
+
+                    <!--- 4. Try first h1 tag as last resort --->
+                    <cfif NOT structKeyExists(toastBusiness, "name") OR NOT len(toastBusiness.name)>
+                        <cfset h1Match = reMatchNoCase('<h1[^>]*>([^<]+)</h1>', pageHtml)>
+                        <cfif arrayLen(h1Match)>
+                            <cfset h1Text = reReplaceNoCase(h1Match[1], '.*<h1[^>]*>([^<]+)</h1>.*', '\1')>
+                            <cfset h1Text = trim(h1Text)>
+                            <cfif len(h1Text) AND len(h1Text) LT 100>
+                                <cfset toastBusiness["name"] = h1Text>
+                                <cfset arrayAppend(response.steps, "Business name from h1: " & h1Text)>
+                            </cfif>
+                        </cfif>
+                    </cfif>
+
+                    <!--- Try to extract address from visible HTML --->
+                    <cfif NOT structKeyExists(toastBusiness, "addressLine1")>
+                        <!--- Look for address patterns in the HTML --->
+                        <cfset addrMatch = reMatchNoCase('<[^>]*class="[^"]*address[^"]*"[^>]*>([^<]+)</[^>]+>', pageHtml)>
+                        <cfif arrayLen(addrMatch)>
+                            <cfset addrText = reReplaceNoCase(addrMatch[1], '.*>([^<]+)</.*', '\1')>
+                            <cfset addrText = trim(addrText)>
+                            <cfif len(addrText) AND len(addrText) LT 200>
+                                <cfset toastBusiness["addressLine1"] = addrText>
+                                <cfset arrayAppend(response.steps, "Address from HTML: " & left(addrText, 50))>
+                            </cfif>
+                        </cfif>
+                    </cfif>
+
+                    <!--- Try to extract phone from visible HTML --->
+                    <cfif NOT structKeyExists(toastBusiness, "phone")>
+                        <!--- Look for phone number patterns --->
+                        <cfset phoneMatch = reMatchNoCase('(?:tel:|phone[^"]*">)\s*\(?(\d{3})\)?[-.\s]?(\d{3})[-.\s]?(\d{4})', pageHtml)>
+                        <cfif arrayLen(phoneMatch)>
+                            <cfset phoneText = reReplaceNoCase(phoneMatch[1], '.*(\d{3}).*(\d{3}).*(\d{4}).*', '\1-\2-\3')>
+                            <cfif len(phoneText) GTE 10>
+                                <cfset toastBusiness["phone"] = phoneText>
+                                <cfset arrayAppend(response.steps, "Phone from HTML: " & phoneText)>
+                            </cfif>
                         </cfif>
                     </cfif>
 
@@ -276,6 +357,12 @@
                     </cfif>
 
                     <cfset arrayAppend(response.steps, "Extracted " & arrayLen(toastItems) & " unique items from " & arrayLen(toastCategories) & " categories")>
+                    <!--- Summary of business info found --->
+                    <cfset bizKeys = structKeyList(toastBusiness)>
+                    <cfset arrayAppend(response.steps, "Business info keys: " & (len(bizKeys) ? bizKeys : "(none)"))>
+                    <cfif structKeyExists(toastBusiness, "name")>
+                        <cfset arrayAppend(response.steps, "Business name: " & toastBusiness.name)>
+                    </cfif>
 
                     <!--- Return directly without Claude --->
                     <cfset response["OK"] = true>