diff --git a/api/setup/analyzeMenuUrl.php b/api/setup/analyzeMenuUrl.php
index 4196d56..7f0c222 100644
--- a/api/setup/analyzeMenuUrl.php
+++ b/api/setup/analyzeMenuUrl.php
@@ -53,13 +53,89 @@ try {
$subPages = $pwResult['subPagesVisited'] ?? [];
$platformPages = $pwResult['platformPagesVisited'] ?? [];
- // Extract business name from title
+ // Extract business info from main page
$html = $pwResult['html'] ?? '';
- $siteName = '';
- if (preg_match('#
]*>([^<]+)#i', $html, $tm)) {
+ $bizInfo = [];
+
+ // 1. Try JSON-LD structured data (most reliable)
+ if (preg_match_all('##i', $html, $ldMatches)) {
+ foreach ($ldMatches[1] as $ldJson) {
+ $ld = json_decode($ldJson, true);
+ if (!is_array($ld)) continue;
+ // Handle @graph wrapper
+ $entries = isset($ld['@graph']) ? $ld['@graph'] : [$ld];
+ foreach ($entries as $entry) {
+ $type = $entry['@type'] ?? '';
+ if (in_array($type, ['Restaurant', 'FoodEstablishment', 'LocalBusiness', 'CafeOrCoffeeShop', 'BarOrPub'])) {
+ if (!empty($entry['name'])) $bizInfo['name'] = $entry['name'];
+ if (!empty($entry['telephone'])) $bizInfo['phone'] = $entry['telephone'];
+ if (!empty($entry['address'])) {
+ $a = $entry['address'];
+ if (is_string($a)) {
+ $bizInfo['address'] = $a;
+ } elseif (is_array($a)) {
+ if (!empty($a['streetAddress'])) $bizInfo['addressLine1'] = $a['streetAddress'];
+ if (!empty($a['addressLocality'])) $bizInfo['city'] = $a['addressLocality'];
+ if (!empty($a['addressRegion'])) $bizInfo['state'] = $a['addressRegion'];
+ if (!empty($a['postalCode'])) $bizInfo['zip'] = $a['postalCode'];
+ $bizInfo['address'] = trim(implode(', ', array_filter([
+ $a['streetAddress'] ?? '', $a['addressLocality'] ?? '',
+ $a['addressRegion'] ?? '', $a['postalCode'] ?? ''
+ ])));
+ }
+ }
+ if (!empty($entry['openingHours'])) {
+ $bizInfo['hours'] = is_array($entry['openingHours'])
+ ? implode(', ', $entry['openingHours'])
+ : $entry['openingHours'];
+ }
+ if (!empty($entry['openingHoursSpecification']) && is_array($entry['openingHoursSpecification'])) {
+ $dayMap = ['Monday'=>'Mon','Tuesday'=>'Tue','Wednesday'=>'Wed','Thursday'=>'Thu','Friday'=>'Fri','Saturday'=>'Sat','Sunday'=>'Sun'];
+ $hParts = [];
+ foreach ($entry['openingHoursSpecification'] as $spec) {
+ $days = $spec['dayOfWeek'] ?? [];
+ if (is_string($days)) $days = [$days];
+ $open = $spec['opens'] ?? '';
+ $close = $spec['closes'] ?? '';
+ $dayAbbrs = array_map(fn($d) => $dayMap[basename($d)] ?? $d, $days);
+ if ($open && $close) $hParts[] = implode('/', $dayAbbrs) . " $open-$close";
+ }
+ if (!empty($hParts)) $bizInfo['hours'] = implode(', ', $hParts);
+ }
+ }
+ }
+ }
+ }
+
+ // 2. Business name from title (fallback)
+ $siteName = $bizInfo['name'] ?? '';
+ if (empty($siteName) && preg_match('#]*>([^<]+)#i', $html, $tm)) {
$siteName = trim($tm[1]);
$siteName = preg_replace('#\s*[-|]+\s*(Menu|Order|Online|Home|Welcome|Restaurant).*$#i', '', $siteName);
$siteName = trim($siteName);
+ $bizInfo['name'] = $siteName;
+ }
+
+ // 3. Phone from tel: links (fallback)
+ if (empty($bizInfo['phone']) && preg_match('#href=["\']tel:([^"\']+)["\']#i', $html, $pm)) {
+ $bizInfo['phone'] = trim(preg_replace('/[^\d+()-\s]/', '', $pm[1]));
+ }
+
+ // 4. Address from common patterns (fallback)
+ if (empty($bizInfo['address'])) {
+ // Look for address in meta tags
+ if (preg_match('#]+(?:property|name)=["\'](?:og:street-address|business:contact_data:street_address)["\'][^>]+content=["\']([^"\']+)["\']#i', $html, $am)) {
+ $bizInfo['addressLine1'] = trim($am[1]);
+ }
+ if (preg_match('#]+(?:property|name)=["\'](?:og:locality|business:contact_data:locality)["\'][^>]+content=["\']([^"\']+)["\']#i', $html, $cm)) {
+ $bizInfo['city'] = trim($cm[1]);
+ }
+ if (preg_match('#]+(?:property|name)=["\'](?:og:region|business:contact_data:region)["\'][^>]+content=["\']([^"\']+)["\']#i', $html, $sm)) {
+ $bizInfo['state'] = trim($sm[1]);
+ }
+ if (preg_match('#]+(?:property|name)=["\'](?:og:postal-code|business:contact_data:postal_code)["\'][^>]+content=["\']([^"\']+)["\']#i', $html, $zm)) {
+ $bizInfo['zip'] = trim($zm[1]);
+ }
}
// Extract menu names from sub-page URLs
@@ -79,6 +155,7 @@ try {
'OK' => true,
'mode' => 'discover',
'siteName' => $siteName,
+ 'businessInfo' => $bizInfo,
'mainUrl' => $discoverUrl,
'menuPages' => $menuPages,
'platformPages' => $platformPages,