diff --git a/api/setup/analyzeMenuUrl.php b/api/setup/analyzeMenuUrl.php
index d6b0ca7..15ffb5e 100644
--- a/api/setup/analyzeMenuUrl.php
+++ b/api/setup/analyzeMenuUrl.php
@@ -188,6 +188,68 @@ try {
}
}
+ // 5. Fetch contact/about page for better business info
+ $baseOrigin = preg_replace('#^(https?://[^/]+).*#', '$1', $discoverUrl);
+ $contactUrl = '';
+ if (preg_match_all('#]+href=["\']([^"\']+)["\'][^>]*>([^<]*)#i', $html, $linkMatches, PREG_SET_ORDER)) {
+ foreach ($linkMatches as $lm) {
+ $href = $lm[1];
+ $text = strtolower(trim($lm[2]));
+ if (preg_match('/\b(contact|about|location|find.?us|visit|hours)\b/i', $text) || preg_match('#/(contact|about|location|find-us|visit|hours)/?$#i', $href)) {
+ if (str_starts_with($href, '/')) $href = $baseOrigin . $href;
+ if (str_starts_with($href, $baseOrigin)) {
+ $contactUrl = $href;
+ break;
+ }
+ }
+ }
+ }
+ if ($contactUrl) {
+ $ch = curl_init($contactUrl);
+ curl_setopt_array($ch, [CURLOPT_RETURNTRANSFER => true, CURLOPT_TIMEOUT => 10, CURLOPT_FOLLOWLOCATION => true,
+ CURLOPT_USERAGENT => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36']);
+ $contactHtml = curl_exec($ch);
+ curl_close($ch);
+
+ if ($contactHtml && strlen($contactHtml) > 100) {
+ // Strip scripts/styles for cleaner parsing
+ $contactClean = preg_replace('##is', '', $contactHtml);
+ $contactClean = preg_replace('##is', '', $contactClean);
+ $contactText = strip_tags($contactClean);
+
+ // Phone: (xxx) xxx-xxxx or xxx-xxx-xxxx or xxx.xxx.xxxx
+ if (preg_match('/(?:P(?:hone)?:?\s*)?\(?\d{3}\)?[\s.-]\d{3}[\s.-]\d{4}/', $contactText, $cpm)) {
+ $bizInfo['phone'] = trim($cpm[0]);
+ // Clean prefix like "P:" or "Phone:"
+ $bizInfo['phone'] = preg_replace('/^P(?:hone)?:?\s*/i', '', $bizInfo['phone']);
+ }
+
+ // Also check tel: links on contact page
+ if (empty($bizInfo['phone']) && preg_match('#href=["\']tel:([^"\']+)["\']#i', $contactHtml, $cpm2)) {
+ $bizInfo['phone'] = trim(preg_replace('/[^\d+()-\s]/', '', $cpm2[1]));
+ }
+
+ // Address: look for US street address pattern
+ if (preg_match('/(\d+\s+[A-Z][a-zA-Z\s]+(?:Street|St|Avenue|Ave|Boulevard|Blvd|Road|Rd|Drive|Dr|Lane|Ln|Way|Place|Pl|Court|Ct)\.?)(?:\s*[,\n]?\s*([A-Z][a-zA-Z\s]+),\s*([A-Z]{2})\s*,?\s*(\d{5}))?/m', $contactText, $cam)) {
+ if (!empty($cam[1])) $bizInfo['addressLine1'] = trim($cam[1]);
+ if (!empty($cam[2])) $bizInfo['city'] = trim($cam[2]);
+ if (!empty($cam[3])) $bizInfo['state'] = strtoupper($cam[3]);
+ if (!empty($cam[4])) $bizInfo['zip'] = $cam[4];
+ }
+
+ // Hours: look for day-time patterns
+ $dayPattern = '(?:Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday|Mon|Tue|Wed|Thu|Fri|Sat|Sun)';
+ $timePattern = '\d{1,2}(?::\d{2})?\s*(?:am|pm)';
+ if (preg_match_all("/($dayPattern)\s*($timePattern)\s*-\s*($timePattern)/i", $contactText, $hm, PREG_SET_ORDER)) {
+ $hourParts = [];
+ foreach ($hm as $h) {
+ $hourParts[] = $h[1] . ' ' . $h[2] . '-' . $h[3];
+ }
+ if (!empty($hourParts)) $bizInfo['hours'] = implode(', ', $hourParts);
+ }
+ }
+ }
+
// Extract menu names from sub-page URLs
$menuPages = [];
foreach ($subPages as $spUrl) {