diff --git a/api/setup/analyzeMenuUrl.php b/api/setup/analyzeMenuUrl.php index 62d14e7..8a27157 100644 --- a/api/setup/analyzeMenuUrl.php +++ b/api/setup/analyzeMenuUrl.php @@ -107,13 +107,50 @@ try { } } - // 2. Business name from title (fallback) + // 2. Business name from title — prefer over JSON-LD since many sites put address in LD name + if (preg_match('#]*>([^<]+)#i', $html, $tm)) { + $titleName = trim($tm[1]); + $titleName = preg_replace('#\s*[-|]+\s*(Menu|Order|Online|Home|Welcome|Restaurant).*$#i', '', $titleName); + $titleName = trim($titleName); + if (strlen($titleName)) { + $ldName = $bizInfo['name'] ?? ''; + // Use title if JSON-LD name looks like an address (starts with number or contains comma) + if (empty($ldName) || preg_match('/^\d/', $ldName) || strpos($ldName, ',') !== false) { + $bizInfo['name'] = $titleName; + } + } + } $siteName = $bizInfo['name'] ?? ''; - if (empty($siteName) && preg_match('#]*>([^<]+)#i', $html, $tm)) { - $siteName = trim($tm[1]); - $siteName = preg_replace('#\s*[-|]+\s*(Menu|Order|Online|Home|Welcome|Restaurant).*$#i', '', $siteName); - $siteName = trim($siteName); - $bizInfo['name'] = $siteName; + + // Parse address into components if only full string + if (!empty($bizInfo['address']) && empty($bizInfo['addressLine1'])) { + $addr = trim(preg_replace('/,?\s*(United States|USA|US|U\.S\.A?\.)\s*$/i', '', $bizInfo['address'])); + $addr = preg_replace('/\n+/', ', ', $addr); // newlines to commas + if (preg_match('/\b(\d{5})(?:-\d{4})?\s*$/', $addr, $zm)) { + $bizInfo['zip'] = $zm[1]; + $addr = trim(substr($addr, 0, strrpos($addr, $zm[0]))); + } + if (preg_match('/\b([A-Z]{2})\s*$/i', $addr, $sm)) { + $bizInfo['state'] = strtoupper($sm[1]); + $addr = trim(substr($addr, 0, strrpos($addr, $sm[0]))); + } + $addr = rtrim($addr, ', '); + if (strpos($addr, ',') !== false) { + $parts = array_map('trim', explode(',', $addr)); + $bizInfo['addressLine1'] = $parts[0]; + $bizInfo['city'] = $parts[count($parts) - 1]; // last part before state is city + } + } + + // Convert 24h hours to 12h format + if (!empty($bizInfo['hours'])) { + $bizInfo['hours'] = preg_replace_callback('/(\d{1,2}):(\d{2})/', function($m) { + $h = (int)$m[1]; $min = $m[2]; + $ampm = $h >= 12 ? 'pm' : 'am'; + if ($h > 12) $h -= 12; + if ($h === 0) $h = 12; + return $h . ($min !== '00' ? ":$min" : '') . $ampm; + }, $bizInfo['hours']); } // 3. Phone from tel: links (fallback)