diff --git a/api/setup/analyzeMenuUrl.php b/api/setup/analyzeMenuUrl.php index 0f7c391..d60ecb0 100644 --- a/api/setup/analyzeMenuUrl.php +++ b/api/setup/analyzeMenuUrl.php @@ -292,6 +292,31 @@ JSEOF; } $siteName = $bizInfo['name'] ?? ''; + // 3. Extract header image from og:image or JSON-LD image + $headerImageUrl = ''; + // Try og:image meta tag first (most common for restaurants) + if (preg_match('#]*type=["\']application/ld\+json["\'][^>]*>([^<]+)#i', $html, $ldImgMatches)) { + foreach ($ldImgMatches[1] as $ldJson) { + $ld = json_decode($ldJson, true); + if (!is_array($ld)) continue; + $entries = isset($ld['@graph']) ? $ld['@graph'] : [$ld]; + foreach ($entries as $entry) { + if (!empty($entry['image'])) { + $img = $entry['image']; + if (is_string($img)) { $headerImageUrl = $img; break 2; } + if (is_array($img) && !empty($img['url'])) { $headerImageUrl = $img['url']; break 2; } + if (is_array($img) && isset($img[0])) { $headerImageUrl = is_string($img[0]) ? $img[0] : ($img[0]['url'] ?? ''); break 2; } + } + } + } + } + // Parse address into components if only full string if (!empty($bizInfo['address']) && empty($bizInfo['addressLine1'])) { $addr = $bizInfo['address']; @@ -443,6 +468,7 @@ JSEOF; 'platformPages' => $platformPages, 'hasPlatform' => $hasPlatform, 'totalPagesFound' => count($menuPages), + 'headerImageUrl' => $headerImageUrl, ]); } diff --git a/api/setup/downloadImages.php b/api/setup/downloadImages.php index 2926840..7fdf531 100644 --- a/api/setup/downloadImages.php +++ b/api/setup/downloadImages.php @@ -81,12 +81,6 @@ try { // Download header if (!empty($data['headerUrl'])) { $headerUrl = $data['headerUrl']; - $ext = '.jpg'; - if (stripos($headerUrl, '.png') !== false) $ext = '.png'; - elseif (stripos($headerUrl, '.gif') !== false) $ext = '.gif'; - elseif (stripos($headerUrl, '.webp') !== false) $ext = '.webp'; - - $headerFile = "$headersPath/$businessID$ext"; try { $ch = curl_init($headerUrl); @@ -94,24 +88,46 @@ try { CURLOPT_RETURNTRANSFER => true, CURLOPT_TIMEOUT => 30, CURLOPT_FOLLOWLOCATION => true, + CURLOPT_HTTPHEADER => [ + 'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36', + ], ]); $content = curl_exec($ch); $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); + $contentType = curl_getinfo($ch, CURLINFO_CONTENT_TYPE); curl_close($ch); - if ($httpCode === 200 && $content !== false) { + if ($httpCode === 200 && $content !== false && strlen($content) > 100) { + // Detect actual format from content-type or magic bytes + $ext = 'jpg'; + if (stripos($contentType, 'png') !== false) $ext = 'png'; + elseif (stripos($contentType, 'gif') !== false) $ext = 'gif'; + elseif (stripos($contentType, 'webp') !== false) $ext = 'webp'; + else { + // Fallback: check magic bytes + $hex = strtoupper(bin2hex(substr($content, 0, 8))); + if (str_starts_with($hex, '89504E47')) $ext = 'png'; + elseif (str_starts_with($hex, '474946')) $ext = 'gif'; + elseif (str_starts_with($hex, '52494646')) $ext = 'webp'; + } + + $headerFile = "$headersPath/$businessID.$ext"; file_put_contents($headerFile, $content); + + // Update database + queryTimed("UPDATE Businesses SET HeaderImageExtension = ? WHERE ID = ?", [$ext, $businessID]); + $response['downloaded'][] = [ 'type' => 'header', 'url' => $headerUrl, - 'savedTo' => "/uploads/headers/$businessID$ext", + 'savedTo' => "/uploads/headers/$businessID.$ext", 'size' => strlen($content), ]; } else { $response['downloaded'][] = [ 'type' => 'header', 'url' => $headerUrl, - 'error' => "HTTP $httpCode", + 'error' => "HTTP $httpCode (size: " . strlen($content ?: '') . ")", ]; } } catch (Exception $e) {