Scan all HTML files in ZIP for business info

- Extract directory and scan all .htm/.html files recursively
- Look for business name in title tags (skip generic titles)
- Extract street addresses with regex patterns
- Extract phone numbers
- Check __OO_STATE__ in other pages for Restaurant data
- Merge found info into toastBusiness (first found wins)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
John Mizerek 2026-02-13 10:13:13 -08:00
parent 90ed78fa96
commit cf34636879

View file

@ -386,6 +386,124 @@
<cfset arrayAppend(response.steps, "Added default 'Menu' category for " & arrayLen(toastItems) & " items")> <cfset arrayAppend(response.steps, "Added default 'Menu' category for " & arrayLen(toastItems) & " items")>
</cfif> </cfif>
<!--- Scan ALL HTML files in the ZIP for business info --->
<cfset extractDir = getDirectoryFromPath(localFilePath)>
<!--- Go up one level if we're in a subfolder (e.g., Menu_files) --->
<cfif NOT findNoCase("menu-import", listLast(extractDir, "/\"))>
<cfset extractDir = getDirectoryFromPath(left(extractDir, len(extractDir) - 1))>
</cfif>
<cftry>
<cfdirectory action="list" directory="#extractDir#" name="allHtmlFiles" filter="*.htm*" recurse="true" type="file">
<cfset arrayAppend(response.steps, "Found " & allHtmlFiles.recordCount & " HTML files in ZIP")>
<cfloop query="allHtmlFiles">
<!--- Skip the main menu file we already processed --->
<cfset otherFilePath = "#allHtmlFiles.directory#/#allHtmlFiles.name#">
<cfif otherFilePath EQ localFilePath>
<cfcontinue>
</cfif>
<cftry>
<cfset otherHtml = fileRead(otherFilePath, "utf-8")>
<cfset arrayAppend(response.steps, "Scanning " & allHtmlFiles.name & " for business info...")>
<!--- Extract business name from title tag --->
<cfif NOT structKeyExists(toastBusiness, "name") OR NOT len(toastBusiness.name)>
<cfset otherTitleMatch = reMatchNoCase('<title[^>]*>([^<]+)</title>', otherHtml)>
<cfif arrayLen(otherTitleMatch)>
<cfset otherTitle = reReplaceNoCase(otherTitleMatch[1], '.*<title[^>]*>([^<]+)</title>.*', '\1')>
<cfset otherTitle = trim(otherTitle)>
<!--- Skip generic titles --->
<cfif len(otherTitle) AND NOT reFindNoCase("^(Menu|Home|About|Contact|Order|Online)$", otherTitle)>
<cfif findNoCase("|", otherTitle)>
<cfset otherTitle = trim(listFirst(otherTitle, "|"))>
</cfif>
<cfset otherTitle = reReplaceNoCase(otherTitle, "\s*-\s*(Menu|Order|Online).*$", "")>
<cfif len(otherTitle) AND len(otherTitle) LT 100>
<cfset toastBusiness["name"] = otherTitle>
<cfset arrayAppend(response.steps, "Found business name in " & allHtmlFiles.name & ": " & otherTitle)>
</cfif>
</cfif>
</cfif>
</cfif>
<!--- Extract address - look for common patterns --->
<cfif NOT structKeyExists(toastBusiness, "addressLine1") OR NOT len(toastBusiness.addressLine1)>
<!--- Look for street address patterns (number + street name) --->
<cfset addrMatch = reMatchNoCase('(\d+\s+[A-Za-z0-9\s]+(?:St(?:reet)?|Ave(?:nue)?|Rd|Road|Blvd|Boulevard|Dr(?:ive)?|Ln|Lane|Way|Ct|Court|Pl(?:ace)?|Pkwy|Parkway)[.,]?\s*(?:Suite|Ste|##|Unit|Apt)?\s*[A-Za-z0-9\-]*)', otherHtml)>
<cfif arrayLen(addrMatch)>
<cfset addrText = trim(addrMatch[1])>
<cfif len(addrText) GT 5 AND len(addrText) LT 100>
<cfset toastBusiness["addressLine1"] = addrText>
<cfset arrayAppend(response.steps, "Found address in " & allHtmlFiles.name & ": " & addrText)>
</cfif>
</cfif>
</cfif>
<!--- Extract phone number --->
<cfif NOT structKeyExists(toastBusiness, "phone") OR NOT len(toastBusiness.phone)>
<cfset phoneMatch = reMatchNoCase('\(?(\d{3})\)?[-.\s]?(\d{3})[-.\s]?(\d{4})', otherHtml)>
<cfif arrayLen(phoneMatch)>
<cfset phoneText = reReplaceNoCase(phoneMatch[1], '.*\(?(\d{3})\)?[-.\s]?(\d{3})[-.\s]?(\d{4}).*', '\1-\2-\3')>
<cfif len(phoneText) GTE 10>
<cfset toastBusiness["phone"] = phoneText>
<cfset arrayAppend(response.steps, "Found phone in " & allHtmlFiles.name & ": " & phoneText)>
</cfif>
</cfif>
</cfif>
<!--- Check for __OO_STATE__ in other files too (might have Restaurant info) --->
<cfif findNoCase("window.__OO_STATE__", otherHtml)>
<cfset otherOoMatch = reMatchNoCase("window\.__OO_STATE__\s*=\s*(\{.*?\});\s*window\.", otherHtml)>
<cfif arrayLen(otherOoMatch)>
<cfset otherOoJson = reReplaceNoCase(otherOoMatch[1], "window\.__OO_STATE__\s*=\s*", "")>
<cfset otherOoJson = reReplace(otherOoJson, ";\s*window\.$", "")>
<cftry>
<cfset otherOoState = deserializeJSON(otherOoJson)>
<cfloop collection="#otherOoState#" item="otherKey">
<cfif left(otherKey, 11) EQ "Restaurant:">
<cfset otherRest = otherOoState[otherKey]>
<cfif structKeyExists(otherRest, "name") AND (NOT structKeyExists(toastBusiness, "name") OR NOT len(toastBusiness.name))>
<cfset toastBusiness["name"] = otherRest.name>
<cfset arrayAppend(response.steps, "Found business name in " & allHtmlFiles.name & " __OO_STATE__: " & otherRest.name)>
</cfif>
<cfif structKeyExists(otherRest, "location")>
<cfset otherLoc = otherRest.location>
<cfif structKeyExists(otherLoc, "address1") AND (NOT structKeyExists(toastBusiness, "addressLine1") OR NOT len(toastBusiness.addressLine1))>
<cfset toastBusiness["addressLine1"] = otherLoc.address1>
</cfif>
<cfif structKeyExists(otherLoc, "city") AND (NOT structKeyExists(toastBusiness, "city") OR NOT len(toastBusiness.city))>
<cfset toastBusiness["city"] = otherLoc.city>
</cfif>
<cfif structKeyExists(otherLoc, "state") AND (NOT structKeyExists(toastBusiness, "state") OR NOT len(toastBusiness.state))>
<cfset toastBusiness["state"] = otherLoc.state>
</cfif>
<cfif structKeyExists(otherLoc, "zipCode") AND (NOT structKeyExists(toastBusiness, "zip") OR NOT len(toastBusiness.zip))>
<cfset toastBusiness["zip"] = otherLoc.zipCode>
</cfif>
<cfif structKeyExists(otherLoc, "phone") AND (NOT structKeyExists(toastBusiness, "phone") OR NOT len(toastBusiness.phone))>
<cfset toastBusiness["phone"] = otherLoc.phone>
</cfif>
</cfif>
<cfif structKeyExists(otherRest, "brandColor") AND (NOT structKeyExists(toastBusiness, "brandColor") OR NOT len(toastBusiness.brandColor))>
<cfset toastBusiness["brandColor"] = replace(otherRest.brandColor, "##", "")>
</cfif>
</cfif>
</cfloop>
<cfcatch></cfcatch>
</cftry>
</cfif>
</cfif>
<cfcatch>
<!--- Skip files that can't be read --->
</cfcatch>
</cftry>
</cfloop>
<cfcatch>
<cfset arrayAppend(response.steps, "Could not scan other HTML files: " & cfcatch.message)>
</cfcatch>
</cftry>
<cfset arrayAppend(response.steps, "Extracted " & arrayLen(toastItems) & " unique items from " & arrayLen(toastCategories) & " categories")> <cfset arrayAppend(response.steps, "Extracted " & arrayLen(toastItems) & " unique items from " & arrayLen(toastCategories) & " categories")>
<!--- Summary of business info found ---> <!--- Summary of business info found --->
<cfset bizKeys = structKeyList(toastBusiness)> <cfset bizKeys = structKeyList(toastBusiness)>