Improve business info extraction from saved Toast pages
Added multiple fallback methods to extract business name: 1. Title tag with Toast-specific parsing 2. og:title and og:site_name meta tags 3. Header elements with restaurant/location classes 4. First h1 tag as last resort Also added address and phone extraction from visible HTML. Added summary logging of business info keys found. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
eec44011f4
commit
b081e72347
1 changed files with 93 additions and 6 deletions
|
|
@ -172,20 +172,101 @@
|
|||
</cfloop>
|
||||
</cfif>
|
||||
|
||||
<!--- Try to extract business name from title tag --->
|
||||
<cfset titleMatch = reMatchNoCase('<title>([^<]+)</title>', pageHtml)>
|
||||
<!--- Try multiple sources for business name --->
|
||||
|
||||
<!--- 1. Try title tag first --->
|
||||
<cfset titleMatch = reMatchNoCase('<title[^>]*>([^<]+)</title>', pageHtml)>
|
||||
<cfset arrayAppend(response.steps, "Title tag matches: " & arrayLen(titleMatch))>
|
||||
<cfif arrayLen(titleMatch)>
|
||||
<cfset titleText = reReplaceNoCase(titleMatch[1], '.*<title>([^<]+)</title>.*', '\1')>
|
||||
<cfset titleText = reReplaceNoCase(titleMatch[1], '.*<title[^>]*>([^<]+)</title>.*', '\1')>
|
||||
<cfset titleText = trim(titleText)>
|
||||
<cfset arrayAppend(response.steps, "Raw title text: " & left(titleText, 100))>
|
||||
<!--- Toast titles are usually "Restaurant Name | Online Ordering" --->
|
||||
<cfset arrayAppend(response.steps, "Raw title: " & left(titleText, 100))>
|
||||
<!--- Toast titles: "Restaurant Name | Online Ordering" --->
|
||||
<cfif findNoCase("|", titleText)>
|
||||
<cfset titleText = trim(listFirst(titleText, "|"))>
|
||||
</cfif>
|
||||
<!--- Remove common suffixes --->
|
||||
<cfset titleText = reReplaceNoCase(titleText, "\s*-\s*(Menu|Order|Online).*$", "")>
|
||||
<cfif len(titleText) AND NOT structKeyExists(toastBusiness, "name")>
|
||||
<cfset toastBusiness["name"] = titleText>
|
||||
<cfset arrayAppend(response.steps, "Extracted business name from title: " & titleText)>
|
||||
<cfset arrayAppend(response.steps, "Business name from title: " & titleText)>
|
||||
</cfif>
|
||||
</cfif>
|
||||
|
||||
<!--- 2. Try og:title or og:site_name meta tags --->
|
||||
<cfif NOT structKeyExists(toastBusiness, "name") OR NOT len(toastBusiness.name)>
|
||||
<cfset ogMatch = reMatchNoCase('<meta[^>]*property=["'']og:(site_name|title)["''][^>]*content=["'']([^"'']+)["'']', pageHtml)>
|
||||
<cfif NOT arrayLen(ogMatch)>
|
||||
<!--- Try alternate attribute order --->
|
||||
<cfset ogMatch = reMatchNoCase('<meta[^>]*content=["'']([^"'']+)["''][^>]*property=["'']og:(site_name|title)["'']', pageHtml)>
|
||||
</cfif>
|
||||
<cfif arrayLen(ogMatch)>
|
||||
<cfset ogText = reReplaceNoCase(ogMatch[1], '.*content=["'']([^"'']+)["''].*', '\1')>
|
||||
<cfif NOT len(ogText)>
|
||||
<cfset ogText = reReplaceNoCase(ogMatch[1], '.*<meta[^>]*>.*', '')>
|
||||
</cfif>
|
||||
<cfset ogText = trim(ogText)>
|
||||
<cfif findNoCase("|", ogText)>
|
||||
<cfset ogText = trim(listFirst(ogText, "|"))>
|
||||
</cfif>
|
||||
<cfif len(ogText)>
|
||||
<cfset toastBusiness["name"] = ogText>
|
||||
<cfset arrayAppend(response.steps, "Business name from og:meta: " & ogText)>
|
||||
</cfif>
|
||||
</cfif>
|
||||
</cfif>
|
||||
|
||||
<!--- 3. Try looking for restaurant name in header/nav area (Toast-specific) --->
|
||||
<cfif NOT structKeyExists(toastBusiness, "name") OR NOT len(toastBusiness.name)>
|
||||
<!--- Toast often has restaurant name in a div with specific classes --->
|
||||
<cfset headerMatch = reMatchNoCase('<(?:h1|div)[^>]*class="[^"]*(?:restaurant|location|brand)[^"]*"[^>]*>([^<]+)<', pageHtml)>
|
||||
<cfif arrayLen(headerMatch)>
|
||||
<cfset headerText = reReplaceNoCase(headerMatch[1], '.*>([^<]+)<.*', '\1')>
|
||||
<cfset headerText = trim(headerText)>
|
||||
<cfif len(headerText) AND len(headerText) LT 100>
|
||||
<cfset toastBusiness["name"] = headerText>
|
||||
<cfset arrayAppend(response.steps, "Business name from header: " & headerText)>
|
||||
</cfif>
|
||||
</cfif>
|
||||
</cfif>
|
||||
|
||||
<!--- 4. Try first h1 tag as last resort --->
|
||||
<cfif NOT structKeyExists(toastBusiness, "name") OR NOT len(toastBusiness.name)>
|
||||
<cfset h1Match = reMatchNoCase('<h1[^>]*>([^<]+)</h1>', pageHtml)>
|
||||
<cfif arrayLen(h1Match)>
|
||||
<cfset h1Text = reReplaceNoCase(h1Match[1], '.*<h1[^>]*>([^<]+)</h1>.*', '\1')>
|
||||
<cfset h1Text = trim(h1Text)>
|
||||
<cfif len(h1Text) AND len(h1Text) LT 100>
|
||||
<cfset toastBusiness["name"] = h1Text>
|
||||
<cfset arrayAppend(response.steps, "Business name from h1: " & h1Text)>
|
||||
</cfif>
|
||||
</cfif>
|
||||
</cfif>
|
||||
|
||||
<!--- Try to extract address from visible HTML --->
|
||||
<cfif NOT structKeyExists(toastBusiness, "addressLine1")>
|
||||
<!--- Look for address patterns in the HTML --->
|
||||
<cfset addrMatch = reMatchNoCase('<[^>]*class="[^"]*address[^"]*"[^>]*>([^<]+)</[^>]+>', pageHtml)>
|
||||
<cfif arrayLen(addrMatch)>
|
||||
<cfset addrText = reReplaceNoCase(addrMatch[1], '.*>([^<]+)</.*', '\1')>
|
||||
<cfset addrText = trim(addrText)>
|
||||
<cfif len(addrText) AND len(addrText) LT 200>
|
||||
<cfset toastBusiness["addressLine1"] = addrText>
|
||||
<cfset arrayAppend(response.steps, "Address from HTML: " & left(addrText, 50))>
|
||||
</cfif>
|
||||
</cfif>
|
||||
</cfif>
|
||||
|
||||
<!--- Try to extract phone from visible HTML --->
|
||||
<cfif NOT structKeyExists(toastBusiness, "phone")>
|
||||
<!--- Look for phone number patterns --->
|
||||
<cfset phoneMatch = reMatchNoCase('(?:tel:|phone[^"]*">)\s*\(?(\d{3})\)?[-.\s]?(\d{3})[-.\s]?(\d{4})', pageHtml)>
|
||||
<cfif arrayLen(phoneMatch)>
|
||||
<cfset phoneText = reReplaceNoCase(phoneMatch[1], '.*(\d{3}).*(\d{3}).*(\d{4}).*', '\1-\2-\3')>
|
||||
<cfif len(phoneText) GTE 10>
|
||||
<cfset toastBusiness["phone"] = phoneText>
|
||||
<cfset arrayAppend(response.steps, "Phone from HTML: " & phoneText)>
|
||||
</cfif>
|
||||
</cfif>
|
||||
</cfif>
|
||||
|
||||
|
|
@ -276,6 +357,12 @@
|
|||
</cfif>
|
||||
|
||||
<cfset arrayAppend(response.steps, "Extracted " & arrayLen(toastItems) & " unique items from " & arrayLen(toastCategories) & " categories")>
|
||||
<!--- Summary of business info found --->
|
||||
<cfset bizKeys = structKeyList(toastBusiness)>
|
||||
<cfset arrayAppend(response.steps, "Business info keys: " & (len(bizKeys) ? bizKeys : "(none)"))>
|
||||
<cfif structKeyExists(toastBusiness, "name")>
|
||||
<cfset arrayAppend(response.steps, "Business name: " & toastBusiness.name)>
|
||||
</cfif>
|
||||
|
||||
<!--- Return directly without Claude --->
|
||||
<cfset response["OK"] = true>
|
||||
|
|
|
|||
Reference in a new issue