false]; try { // Load API Key $configPath = realpath(__DIR__ . '/../../config/claude.json'); $CLAUDE_API_KEY = ''; if ($configPath && file_exists($configPath)) { $configData = json_decode(file_get_contents($configPath), true); if (!empty($configData['apiKey'])) { $CLAUDE_API_KEY = $configData['apiKey']; } } if (empty($CLAUDE_API_KEY)) { throw new Exception('Claude API key not configured'); } $data = readJsonBody(); if (empty($data)) throw new Exception('No request body provided'); // ============================================================ // DISCOVERY MODE: Crawl site, detect menu sub-pages, return without Claude // ============================================================ if (!empty($data['mode']) && $data['mode'] === 'discover' && !empty($data['url'])) { $discoverUrl = trim($data['url']); $pwOutput = shell_exec("/opt/playwright/run.sh " . escapeshellarg($discoverUrl) . " 8000 2>&1"); if (empty(trim($pwOutput ?? ''))) { throw new Exception("Playwright returned empty response"); } $pwResult = json_decode($pwOutput, true); if (isset($pwResult['error'])) { throw new Exception("Playwright error: " . $pwResult['error']); } $subPages = $pwResult['subPagesVisited'] ?? []; $platformPages = $pwResult['platformPagesVisited'] ?? []; // Extract business info from main page $html = $pwResult['html'] ?? ''; $bizInfo = []; // 1. Try JSON-LD structured data (most reliable) if (preg_match_all('#]*type=["\']application/ld\+json["\'][^>]*>([^<]+)#i', $html, $ldMatches)) { foreach ($ldMatches[1] as $ldJson) { $ld = json_decode($ldJson, true); if (!is_array($ld)) continue; // Handle @graph wrapper $entries = isset($ld['@graph']) ? $ld['@graph'] : [$ld]; foreach ($entries as $entry) { $type = $entry['@type'] ?? ''; if (in_array($type, ['Restaurant', 'FoodEstablishment', 'LocalBusiness', 'CafeOrCoffeeShop', 'BarOrPub'])) { if (!empty($entry['name'])) $bizInfo['name'] = $entry['name']; if (!empty($entry['telephone'])) $bizInfo['phone'] = $entry['telephone']; if (!empty($entry['address'])) { $a = $entry['address']; if (is_string($a)) { $bizInfo['address'] = $a; } elseif (is_array($a)) { if (!empty($a['streetAddress'])) $bizInfo['addressLine1'] = $a['streetAddress']; if (!empty($a['addressLocality'])) $bizInfo['city'] = $a['addressLocality']; if (!empty($a['addressRegion'])) $bizInfo['state'] = $a['addressRegion']; if (!empty($a['postalCode'])) $bizInfo['zip'] = $a['postalCode']; $bizInfo['address'] = trim(implode(', ', array_filter([ $a['streetAddress'] ?? '', $a['addressLocality'] ?? '', $a['addressRegion'] ?? '', $a['postalCode'] ?? '' ]))); } } if (!empty($entry['openingHours'])) { $bizInfo['hours'] = is_array($entry['openingHours']) ? implode(', ', $entry['openingHours']) : $entry['openingHours']; } if (!empty($entry['openingHoursSpecification']) && is_array($entry['openingHoursSpecification'])) { $dayMap = ['Monday'=>'Mon','Tuesday'=>'Tue','Wednesday'=>'Wed','Thursday'=>'Thu','Friday'=>'Fri','Saturday'=>'Sat','Sunday'=>'Sun']; $hParts = []; foreach ($entry['openingHoursSpecification'] as $spec) { $days = $spec['dayOfWeek'] ?? []; if (is_string($days)) $days = [$days]; $open = $spec['opens'] ?? ''; $close = $spec['closes'] ?? ''; $dayAbbrs = array_map(fn($d) => $dayMap[basename($d)] ?? $d, $days); if ($open && $close) $hParts[] = implode('/', $dayAbbrs) . " $open-$close"; } if (!empty($hParts)) $bizInfo['hours'] = implode(', ', $hParts); } } } } } // 2. Business name from title (fallback) $siteName = $bizInfo['name'] ?? ''; if (empty($siteName) && preg_match('#]*>([^<]+)#i', $html, $tm)) { $siteName = trim($tm[1]); $siteName = preg_replace('#\s*[-|]+\s*(Menu|Order|Online|Home|Welcome|Restaurant).*$#i', '', $siteName); $siteName = trim($siteName); $bizInfo['name'] = $siteName; } // 3. Phone from tel: links (fallback) if (empty($bizInfo['phone']) && preg_match('#href=["\']tel:([^"\']+)["\']#i', $html, $pm)) { $bizInfo['phone'] = trim(preg_replace('/[^\d+()-\s]/', '', $pm[1])); } // 4. Address from common patterns (fallback) if (empty($bizInfo['address'])) { // Look for address in meta tags if (preg_match('#]+(?:property|name)=["\'](?:og:street-address|business:contact_data:street_address)["\'][^>]+content=["\']([^"\']+)["\']#i', $html, $am)) { $bizInfo['addressLine1'] = trim($am[1]); } if (preg_match('#]+(?:property|name)=["\'](?:og:locality|business:contact_data:locality)["\'][^>]+content=["\']([^"\']+)["\']#i', $html, $cm)) { $bizInfo['city'] = trim($cm[1]); } if (preg_match('#]+(?:property|name)=["\'](?:og:region|business:contact_data:region)["\'][^>]+content=["\']([^"\']+)["\']#i', $html, $sm)) { $bizInfo['state'] = trim($sm[1]); } if (preg_match('#]+(?:property|name)=["\'](?:og:postal-code|business:contact_data:postal_code)["\'][^>]+content=["\']([^"\']+)["\']#i', $html, $zm)) { $bizInfo['zip'] = trim($zm[1]); } } // Extract menu names from sub-page URLs $menuPages = []; foreach ($subPages as $spUrl) { $path = parse_url($spUrl, PHP_URL_PATH); $slug = trim($path, '/'); if (strpos($slug, '/') !== false) $slug = basename($slug); $menuName = ucwords(str_replace(['-', '_'], ' ', $slug)); $menuPages[] = ['url' => $spUrl, 'name' => $menuName, 'slug' => $slug]; } // Check for ordering platform links $hasPlatform = count($platformPages) > 0; jsonResponse([ 'OK' => true, 'mode' => 'discover', 'siteName' => $siteName, 'businessInfo' => $bizInfo, 'mainUrl' => $discoverUrl, 'menuPages' => $menuPages, 'platformPages' => $platformPages, 'hasPlatform' => $hasPlatform, 'totalPagesFound' => count($menuPages), ]); } // ============================================================ // MULTI-PAGE MODE: Process specific URLs individually through Claude // ============================================================ if (!empty($data['mode']) && $data['mode'] === 'extract_page' && !empty($data['url'])) { // Process a single menu page through Playwright + Claude // The frontend calls this once per confirmed menu page $singleUrl = trim($data['url']); $menuName = $data['menuName'] ?? ''; $pwOutput = shell_exec("/opt/playwright/run.sh " . escapeshellarg($singleUrl) . " 8000 2>&1"); if (empty(trim($pwOutput ?? ''))) { throw new Exception("Playwright returned empty response for $singleUrl"); } $pwResult = json_decode($pwOutput, true); if (isset($pwResult['error'])) { throw new Exception("Playwright error: " . $pwResult['error']); } $singleHtml = $pwResult['html'] ?? ''; // Strip to just the main page (no sub-page following for single-page extract) $marker = strpos($singleHtml, '#s', '', $singleHtml); $singleHtml = preg_replace('#]*>.*?#is', '', $singleHtml); $singleHtml = preg_replace('#]*>.*?#is', '', $singleHtml); $singleHtml = preg_replace('#]*>.*?#is', '', $singleHtml); $singleHtml = preg_replace('#]*>.*?#is', '', $singleHtml); $singleHtml = preg_replace('#\s+(class|style|data-[a-z-]+|id|aria-[a-z-]+|role|tabindex)="[^"]*"#i', '', $singleHtml); $singleHtml = preg_replace('#\s{2,}#', ' ', $singleHtml); $singleHtml = preg_replace('#>\s+<#', '><', $singleHtml); if (strlen($singleHtml) > 100000) { $singleHtml = substr($singleHtml, 0, 100000); } // Extract images from this page $singleImages = []; $singleImageData = []; if (preg_match_all('#]+src=["\']([^"\']+)["\'][^>]*>#i', $singleHtml, $imgMatches, PREG_SET_ORDER)) { foreach ($imgMatches as $imgMatch) { $imgSrc = $imgMatch[1]; if (preg_match('#(icon|favicon|logo|sprite|pixel|tracking|badge|button|\.svg)#i', $imgSrc)) continue; if (!preg_match('#^https?://#i', $imgSrc)) { $origin = preg_replace('#^(https?://[^/]+).*#', '$1', $singleUrl); $imgSrc = (str_starts_with($imgSrc, '/')) ? $origin . $imgSrc : $origin . '/' . $imgSrc; } $singleImages[] = $imgSrc; } } // Also add Playwright-captured images foreach (($pwResult['images'] ?? []) as $pwImg) { if (!preg_match('#(icon|favicon|logo|sprite|pixel|tracking|badge|button|\.svg)#i', $pwImg)) { $singleImages[] = $pwImg; } } $singleImages = array_values(array_unique($singleImages)); // Download up to 5 images for this page $imgContent = []; foreach (array_slice($singleImages, 0, 5) as $imgUrl) { try { $ch = curl_init($imgUrl); curl_setopt_array($ch, [CURLOPT_RETURNTRANSFER => true, CURLOPT_TIMEOUT => 10, CURLOPT_FOLLOWLOCATION => true]); $imgData = curl_exec($ch); $ct = curl_getinfo($ch, CURLINFO_CONTENT_TYPE) ?: 'image/jpeg'; curl_close($ch); if ($imgData && strlen($imgData) > 1000 && strlen($imgData) < 5000000) { $mt = 'image/jpeg'; if (stripos($ct, 'png') !== false) $mt = 'image/png'; elseif (stripos($ct, 'webp') !== false) $mt = 'image/webp'; $imgContent[] = ['source' => ['type' => 'base64', 'media_type' => $mt, 'data' => base64_encode($imgData)], 'url' => $imgUrl]; } } catch (Exception $e) {} } // Send to Claude for this single menu page $singlePrompt = 'You are an expert at extracting structured menu data from restaurant website HTML. Extract ALL menu items visible in the HTML. Return valid JSON with these keys: business (object with name, address, phone, hours, brandColor), categories (array of category name strings), items (array of objects with name, description, price, category, imageUrl). CATEGORIES vs ITEMS (CRITICAL): A CATEGORY is a broad section heading that groups items. An ITEM is an individual food/drink with name, description, and price. Each item must have a \'category\' field. For prices: numbers (e.g., 12.99). For imageUrl: the full src URL of the food image from the HTML. For brandColor: suggest a vibrant hex (6 digits, no hash). Return ONLY valid JSON.'; $msgContent = []; foreach (array_slice($imgContent, 0, 5) as $ic) { $msgContent[] = ['type' => 'image', 'source' => $ic['source']]; } $menuNameHint = strlen($menuName) ? "\n\nThis is the \"$menuName\" menu page." : ''; $msgContent[] = ['type' => 'text', 'text' => "Extract all menu items from this page.$menuNameHint\n\nHTML:\n\n$singleHtml"]; $claudeReq = [ 'model' => 'claude-sonnet-4-20250514', 'max_tokens' => 16384, 'temperature' => 0, 'system' => $singlePrompt, 'messages' => [['role' => 'user', 'content' => $msgContent]], ]; $httpPost = function(string $url, string $body, array $headers = [], int $timeout = 120): array { $ch = curl_init($url); curl_setopt_array($ch, [ CURLOPT_RETURNTRANSFER => true, CURLOPT_POST => true, CURLOPT_POSTFIELDS => $body, CURLOPT_HTTPHEADER => $headers, CURLOPT_TIMEOUT => $timeout, ]); $resp = curl_exec($ch); $code = curl_getinfo($ch, CURLINFO_HTTP_CODE); curl_close($ch); return ['code' => $code, 'body' => $resp ?: '']; }; $claudeResult = $httpPost( 'https://api.anthropic.com/v1/messages', json_encode($claudeReq), ['Content-Type: application/json', "x-api-key: $CLAUDE_API_KEY", 'anthropic-version: 2023-06-01'], 120 ); if ($claudeResult['code'] !== 200) { $errData = json_decode($claudeResult['body'], true); $errorDetail = $errData['error']['message'] ?? substr($claudeResult['body'], 0, 500); throw new Exception("Claude API error: {$claudeResult['code']} - $errorDetail"); } $claudeResponse = json_decode($claudeResult['body'], true); $responseText = ''; foreach (($claudeResponse['content'] ?? []) as $block) { if (($block['type'] ?? '') === 'text') { $responseText = $block['text']; break; } } // Clean Claude JSON $responseText = preg_replace('/^```(?:json)?\s*/i', '', trim($responseText)); $responseText = preg_replace('/\s*```\s*$/', '', $responseText); $menuData = json_decode($responseText, true); if (!is_array($menuData)) { throw new Exception("Failed to parse Claude response as JSON"); } // Tag items with menu name if (strlen($menuName) && !empty($menuData['items'])) { foreach ($menuData['items'] as &$item) { $item['menu'] = $menuName; } unset($item); } jsonResponse([ 'OK' => true, 'mode' => 'extract_page', 'menuName' => $menuName, 'url' => $singleUrl, 'DATA' => [ 'business' => $menuData['business'] ?? [], 'categories' => $menuData['categories'] ?? [], 'items' => $menuData['items'] ?? [], 'modifiers' => $menuData['modifiers'] ?? [], ], 'itemCount' => count($menuData['items'] ?? []), 'categoryCount' => count($menuData['categories'] ?? []), ]); } $response['steps'] = []; $response['debug'] = [ 'hasHtmlKey' => isset($data['html']), 'hasUrlKey' => isset($data['url']), 'htmlLength' => isset($data['html']) ? strlen($data['html']) : 0, 'urlValue' => $data['url'] ?? '', ]; $pageHtml = ''; $baseUrl = ''; $basePath = ''; $targetUrl = ''; $playwrightImages = []; // Helper: webroot path $webroot = isDev() ? '/opt/lucee/tomcat/webapps/ROOT' : '/var/www/biz.payfrit.com'; // Helper: expand a URL path to a local file path $expandPath = function(string $urlPath) use ($webroot): string { return $webroot . $urlPath; }; // Helper: convert 24h time to 12h format string $formatTime12h = function(int $h, int $m): string { $ampm = $h >= 12 ? 'pm' : 'am'; if ($h > 12) $h -= 12; if ($h === 0) $h = 12; return $h . ($m > 0 ? ':' . str_pad($m, 2, '0', STR_PAD_LEFT) : '') . $ampm; }; // Helper: extract value from escaped JSON using backslash-quote markers $BQ = "\\\""; // backslash-quote as it appears in HTML $extractBqValue = function(string $text, string $key, int $startPos = 0) use ($BQ): ?string { $marker = $BQ . $key . $BQ . ':' . $BQ; $pos = stripos($text, $marker, $startPos); if ($pos === false) return null; $valStart = $pos + strlen($marker); $valEnd = strpos($text, $BQ, $valStart); if ($valEnd === false || $valEnd <= $valStart) return null; return substr($text, $valStart, $valEnd - $valStart); }; // Helper: extract __OO_STATE__ JSON using brace-counting $extractOoState = function(string $html): ?string { $ooStart = stripos($html, 'window.__OO_STATE__'); if ($ooStart === false) return null; $braceStart = strpos($html, '{', $ooStart); if ($braceStart === false) return null; $depth = 0; $inStr = false; $esc = false; $totalLen = strlen($html); $braceEnd = 0; for ($i = $braceStart; $i < $totalLen; $i++) { $ch = $html[$i]; if ($esc) { $esc = false; continue; } if ($ch === '\\' && $inStr) { $esc = true; continue; } if ($ch === '"') { $inStr = !$inStr; continue; } if (!$inStr) { if ($ch === '{') $depth++; elseif ($ch === '}') { $depth--; if ($depth === 0) { $braceEnd = $i; break; } } } } if ($braceEnd === 0) return null; $json = substr($html, $braceStart, $braceEnd - $braceStart + 1); // Decode HTML entities from View Source $json = str_replace(['&', '<', '>', '"'], ['&', '<', '>', '"'], $json); return $json; }; // Helper: extract Toast item price from multiple possible fields $extractToastPrice = function(array $item): float { if (!empty($item['prices']) && is_array($item['prices']) && is_numeric($item['prices'][0] ?? null)) { return (float)$item['prices'][0]; } if (isset($item['price']) && is_numeric($item['price'])) return (float)$item['price']; if (isset($item['unitPrice']) && is_numeric($item['unitPrice'])) return (float)$item['unitPrice']; if (isset($item['basePrice']) && is_numeric($item['basePrice'])) return (float)$item['basePrice']; if (isset($item['displayPrice']) && strlen(trim((string)$item['displayPrice']))) { $ps = preg_replace('/[^0-9.]/', '', (string)$item['displayPrice']); if (strlen($ps) && is_numeric($ps)) return (float)$ps; } return 0.0; }; // Helper: extract Toast item image URL $extractToastImage = function(array $item): string { if (isset($item['imageUrls']) && is_array($item['imageUrls'])) { $urls = $item['imageUrls']; return $urls['medium'] ?? $urls['large'] ?? $urls['small'] ?? ''; } return ''; }; // Helper: clean JSON from Claude response $cleanClaudeJson = function(string $text): string { $text = trim($text); // Strip markdown code fences if (str_starts_with($text, '```json')) $text = substr($text, 7); if (str_starts_with($text, '```')) $text = substr($text, 3); if (str_ends_with($text, '```')) $text = substr($text, 0, -3); $text = trim($text); // Extract JSON object if text doesn't start with { if (!str_starts_with($text, '{')) { $jsonStart = strpos($text, '{'); if ($jsonStart !== false) { $text = substr($text, $jsonStart); if (str_ends_with(trim($text), '```')) { $text = substr(trim($text), 0, -3); } $text = trim($text); } } // Remove trailing commas before ] or } $text = preg_replace('/,(\s*[\]\}])/', '$1', $text); // Remove control characters $text = preg_replace('/[\x00-\x08\x0B\x0C\x0E-\x1F]/', '', $text); // Clean smart quotes/dashes $text = str_replace(["\xe2\x80\x98", "\xe2\x80\x99"], "'", $text); // smart single quotes $text = str_replace(["\xe2\x80\x93", "\xe2\x80\x94"], "-", $text); // en/em dash $text = str_replace("\xe2\x80\xa6", "...", $text); // ellipsis return $text; }; // Helper: detect media type from base64 prefix $detectMediaType = function(string $base64): string { if (str_starts_with($base64, 'iVBO')) return 'image/png'; if (str_starts_with($base64, 'R0lGOD')) return 'image/gif'; if (str_starts_with($base64, 'UklGR')) return 'image/webp'; return 'image/jpeg'; }; // Helper: HTTP GET with curl $httpGet = function(string $url, array $headers = [], int $timeout = 30): array { $ch = curl_init($url); curl_setopt_array($ch, [ CURLOPT_RETURNTRANSFER => true, CURLOPT_TIMEOUT => $timeout, CURLOPT_FOLLOWLOCATION => true, CURLOPT_HTTPHEADER => $headers, ]); $body = curl_exec($ch); $code = curl_getinfo($ch, CURLINFO_HTTP_CODE); $contentType = curl_getinfo($ch, CURLINFO_CONTENT_TYPE); curl_close($ch); return ['body' => $body, 'code' => $code, 'contentType' => $contentType ?? '']; }; // Helper: HTTP POST with curl $httpPost = function(string $url, string $body, array $headers = [], int $timeout = 30): array { $ch = curl_init($url); curl_setopt_array($ch, [ CURLOPT_RETURNTRANSFER => true, CURLOPT_POST => true, CURLOPT_POSTFIELDS => $body, CURLOPT_TIMEOUT => $timeout, CURLOPT_HTTPHEADER => $headers, ]); $result = curl_exec($ch); $code = curl_getinfo($ch, CURLINFO_HTTP_CODE); curl_close($ch); return ['body' => $result, 'code' => $code]; }; // ============================================================ // Parse request: HTML content or URL // ============================================================ if (!empty($data['html'])) { $pageHtml = trim($data['html']); $response['steps'][] = "Using provided HTML content: " . strlen($pageHtml) . " bytes"; } elseif (!empty($data['url'])) { $targetUrl = trim($data['url']); if (!preg_match('#^https?://#i', $targetUrl)) { $targetUrl = 'https://' . $targetUrl; } // ========== GRUBHUB FAST PATH ========== if (preg_match('#grubhub\.com/restaurant/#i', $targetUrl)) { $response['steps'][] = "Grubhub URL detected - using API"; // Extract restaurant ID if (!preg_match('#/(\d+)(\?|$)#', $targetUrl, $ghIdMatch)) { throw new Exception('Could not extract Grubhub restaurant ID from URL'); } $ghRestaurantId = $ghIdMatch[1]; $response['steps'][] = "Grubhub restaurant ID: $ghRestaurantId"; // Get anonymous access token $ghAuth = $httpPost( 'https://api-gtm.grubhub.com/auth', '{"brand":"GRUBHUB","client_id":"beta_UmWlpstzQSFmocLy3h1UieYcVST","scope":"anonymous"}', ['Content-Type: application/json'], 15 ); if ($ghAuth['code'] !== 200) throw new Exception("Grubhub auth failed: {$ghAuth['code']}"); $ghAuthData = json_decode($ghAuth['body'], true); $ghToken = $ghAuthData['session_handle']['access_token']; $response['steps'][] = "Got Grubhub anonymous token"; // Fetch restaurant with full menu data $ghMenu = $httpGet( "https://api-gtm.grubhub.com/restaurants/$ghRestaurantId?hideChoiceCategories=false&version=4&orderType=standard&hideUnavailableMenuItems=false&hideMenuItems=false", ["Authorization: Bearer $ghToken"], 30 ); if ($ghMenu['code'] !== 200) throw new Exception("Grubhub restaurant fetch failed: {$ghMenu['code']}"); $ghData = json_decode($ghMenu['body'], true); $ghRestaurant = $ghData['restaurant']; $response['steps'][] = "Fetched Grubhub restaurant data (" . strlen($ghMenu['body']) . " bytes)"; // Parse business info $ghBusiness = ['name' => $ghRestaurant['name']]; if (!empty($ghRestaurant['address']) && is_array($ghRestaurant['address'])) { $ghAddr = $ghRestaurant['address']; if (isset($ghAddr['street_address'])) $ghBusiness['addressLine1'] = $ghAddr['street_address']; if (isset($ghAddr['locality'])) $ghBusiness['city'] = $ghAddr['locality']; if (isset($ghAddr['region'])) $ghBusiness['state'] = $ghAddr['region']; if (isset($ghAddr['zip'])) $ghBusiness['zip'] = $ghAddr['zip']; $ghBusiness['address'] = ($ghBusiness['addressLine1'] ?? '') . ', ' . ($ghBusiness['city'] ?? '') . ', ' . ($ghBusiness['state'] ?? '') . ' ' . ($ghBusiness['zip'] ?? ''); } if (isset($ghRestaurant['latitude']) && is_numeric($ghRestaurant['latitude'])) $ghBusiness['latitude'] = $ghRestaurant['latitude']; if (isset($ghRestaurant['longitude']) && is_numeric($ghRestaurant['longitude'])) $ghBusiness['longitude'] = $ghRestaurant['longitude']; if (!empty($ghRestaurant['phone_number'])) $ghBusiness['phone'] = preg_replace('/[^0-9]/', '', $ghRestaurant['phone_number']); if (!empty(trim($ghRestaurant['description'] ?? ''))) $ghBusiness['description'] = trim($ghRestaurant['description']); // Hours $ghHoursParts = []; $ghDayOrder = ['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday']; $ghDayAbbrev = ['Mon','Tue','Wed','Thu','Fri','Sat','Sun']; if (!empty($ghRestaurant['restaurant_managed_hours_list_v2']) && is_array($ghRestaurant['restaurant_managed_hours_list_v2'])) { foreach ($ghRestaurant['restaurant_managed_hours_list_v2'] as $ghDayHours) { if (isset($ghDayHours['day'], $ghDayHours['start_time'], $ghDayHours['end_time'])) { $ghDayIdx = array_search($ghDayHours['day'], $ghDayOrder); if ($ghDayIdx !== false) { $parts = explode(':', $ghDayHours['start_time']); $openStr = $formatTime12h((int)$parts[0], (int)($parts[1] ?? 0)); $parts = explode(':', $ghDayHours['end_time']); $closeStr = $formatTime12h((int)$parts[0], (int)($parts[1] ?? 0)); $ghHoursParts[] = $ghDayAbbrev[$ghDayIdx] . " $openStr-$closeStr"; } } } } if (!empty($ghHoursParts)) $ghBusiness['hours'] = implode(', ', $ghHoursParts); if (isset($ghData['restaurant_availability']['sales_tax'])) $ghBusiness['taxRate'] = $ghData['restaurant_availability']['sales_tax']; // Parse categories and items $ghCategories = []; $ghItems = []; $ghItemId = 1; $ghModifierGroups = []; $ghImageMappings = []; if (!empty($ghRestaurant['menu_category_list']) && is_array($ghRestaurant['menu_category_list'])) { foreach ($ghRestaurant['menu_category_list'] as $ghCat) { $ghCatName = trim($ghCat['name'] ?? 'Menu'); $ghCatItemCount = 0; if (!empty($ghCat['menu_item_list']) && is_array($ghCat['menu_item_list'])) { foreach ($ghCat['menu_item_list'] as $ghItem) { $ghItemName = trim($ghItem['name'] ?? ''); if (empty($ghItemName)) continue; $ghPrice = 0; if (!empty($ghItem['price']['amount'])) $ghPrice = (float)$ghItem['price']['amount'] / 100; $ghDesc = trim($ghItem['description'] ?? ''); // Image URL $ghImageUrl = ''; if (!empty($ghItem['media_image']) && is_array($ghItem['media_image'])) { $gi = $ghItem['media_image']; if (!empty($gi['base_url']) && !empty($gi['public_id']) && !empty($gi['format'])) { $ghImageUrl = $gi['base_url'] . 'w_400,h_400,c_fill/' . $gi['public_id'] . '.' . $gi['format']; } } // Modifiers $ghItemModifiers = []; if (!empty($ghItem['choice_category_list']) && is_array($ghItem['choice_category_list'])) { foreach ($ghItem['choice_category_list'] as $ghChoiceCat) { $ghModName = trim($ghChoiceCat['name'] ?? ''); if (empty($ghModName)) continue; $ghItemModifiers[] = $ghModName; if (!isset($ghModifierGroups[$ghModName])) { $ghModOptions = []; if (!empty($ghChoiceCat['choice_option_list'])) { foreach ($ghChoiceCat['choice_option_list'] as $ghOpt) { $optName = trim($ghOpt['description'] ?? ''); $optPrice = !empty($ghOpt['price']['amount']) ? (float)$ghOpt['price']['amount'] / 100 : 0; if (strlen($optName)) $ghModOptions[] = ['name' => $optName, 'price' => $optPrice]; } } $ghMinSel = (int)($ghChoiceCat['min_choice_options'] ?? 0); $ghMaxSel = (int)($ghChoiceCat['max_choice_options'] ?? 0); $ghModifierGroups[$ghModName] = [ 'name' => $ghModName, 'required' => $ghMinSel > 0, 'minSelections' => $ghMinSel, 'maxSelections' => $ghMaxSel, 'options' => $ghModOptions, ]; } } } $ghItems[] = [ 'id' => 'item_' . $ghItemId, 'name' => $ghItemName, 'price' => $ghPrice, 'description' => $ghDesc, 'category' => $ghCatName, 'imageUrl' => $ghImageUrl, 'hasModifiers' => count($ghItemModifiers) > 0, 'modifiers' => $ghItemModifiers, ]; if (strlen($ghImageUrl)) $ghImageMappings[] = ['itemId' => 'item_' . $ghItemId, 'url' => $ghImageUrl]; $ghCatItemCount++; $ghItemId++; } } $ghCategories[] = ['name' => $ghCatName, 'itemCount' => $ghCatItemCount]; } } $ghModifiers = array_values($ghModifierGroups); $response['steps'][] = "Parsed " . count($ghItems) . " items in " . count($ghCategories) . " categories with " . count($ghModifiers) . " modifier groups"; $response['OK'] = true; $response['DATA'] = [ 'business' => $ghBusiness, 'categories' => $ghCategories, 'items' => $ghItems, 'modifiers' => $ghModifiers, 'imageUrls' => [], 'imageMappings' => $ghImageMappings, 'headerCandidateIndices' => [], ]; $response['sourceUrl'] = $targetUrl; $response['pagesProcessed'] = 1; $response['imagesFound'] = count($ghImageMappings); $response['parsedVia'] = 'grubhub_api'; jsonResponse($response); } // ========== END GRUBHUB FAST PATH ========== // Check if this is a local temp file (ZIP upload) - read directly if (stripos($targetUrl, '/temp/menu-import/') !== false) { $localUrlPath = preg_replace('#https?://[^/]+(/temp/menu-import/.*)#i', '$1', $targetUrl); $localFilePath = $expandPath($localUrlPath); $response['steps'][] = "Local temp file detected: $localFilePath"; if (!file_exists($localFilePath)) { throw new Exception("Local file not found: $localFilePath"); } $pageHtml = file_get_contents($localFilePath); $playwrightImages = []; $response['steps'][] = "Read " . strlen($pageHtml) . " bytes from local file"; $localDir = dirname($localFilePath); $basePath = preg_replace('#/[^/]*$#', '/', $targetUrl); // Check for Toast menu page - extract from visible HTML if (stripos($pageHtml, 'class="headerText"') !== false && stripos($pageHtml, 'toasttab') !== false) { $response['steps'][] = "Toast menu detected - parsing visible HTML items"; try { $toastBusiness = []; $toastCategories = []; $toastItems = []; $categorySet = []; $itemNameSet = []; $itemId = 1; // Find category headers if (preg_match_all('#]*class="[^"]*groupHeader[^"]*"[^>]*>([^<]+)#i', $pageHtml, $catMatches)) { foreach ($catMatches[1] as $catName) { $catName = trim($catName); if (strlen($catName) && !isset($categorySet[$catName])) { $categorySet[$catName] = true; $toastCategories[] = ['name' => $catName, 'itemCount' => 0]; } } } // Extract item blocks if (preg_match_all('#]*class="[^"]*item[^"]*"[^>]*>.*?#is', $pageHtml, $blockMatches)) { $response['steps'][] = "Found " . count($blockMatches[0]) . " item blocks in HTML"; foreach ($blockMatches[0] as $block) { if (preg_match('#([^<]+)#i', $block, $nm)) { $itemName = trim($nm[1]); if (strlen($itemName) && !isset($itemNameSet[$itemName])) { $itemNameSet[$itemName] = true; $itemStruct = ['id' => 'item_' . $itemId, 'name' => $itemName, 'modifiers' => [], 'price' => 0, 'description' => '']; // Price if (preg_match('#\$([0-9]+\.?[0-9]*)#', $block, $pm)) { $p = (float)$pm[1]; if ($p > 0) $itemStruct['price'] = $p; } // Description if (preg_match('#]*class="[^"]*description[^"]*"[^>]*>([^<]+)#i', $block, $dm)) { $itemStruct['description'] = trim($dm[1]); } // Image if (preg_match('#src="(Menu_files/[^"]+)"#i', $block, $im)) { $itemStruct['imageUrl'] = $basePath . $im[1]; $itemStruct['imageSrc'] = $basePath . $im[1]; $itemStruct['imageFilename'] = basename($im[1]); } $itemStruct['category'] = !empty($toastCategories) ? $toastCategories[0]['name'] : 'Menu'; $toastItems[] = $itemStruct; $itemId++; } } } } // Fallback: simpler headerText extraction if (empty($toastItems)) { if (preg_match_all('#([^<]+)#i', $pageHtml, $nameMatches)) { foreach ($nameMatches[1] as $nm) { $nm = trim($nm); if (strlen($nm) && !isset($itemNameSet[$nm])) { $itemNameSet[$nm] = true; $toastItems[] = ['id' => 'item_' . $itemId, 'name' => $nm, 'price' => 0, 'description' => '', 'category' => 'Menu', 'modifiers' => []]; $itemId++; } } } } // Try business name from title if (preg_match('#]*>([^<]+)#i', $pageHtml, $tm)) { $titleText = trim($tm[1]); if (strpos($titleText, '|') !== false) $titleText = trim(explode('|', $titleText)[0]); $titleText = preg_replace('#\s*-\s*(Menu|Order|Online).*$#i', '', $titleText); if (strlen($titleText) && !isset($toastBusiness['name'])) { $toastBusiness['name'] = $titleText; } } // Try og:title/og:site_name if (empty($toastBusiness['name'])) { if (preg_match('#]*property=["\']og:(site_name|title)["\'][^>]*content=["\']([^"\']+)["\']#i', $pageHtml, $ogm)) { $ogText = trim($ogm[2]); if (strpos($ogText, '|') !== false) $ogText = trim(explode('|', $ogText)[0]); if (strlen($ogText)) $toastBusiness['name'] = $ogText; } elseif (preg_match('#]*content=["\']([^"\']+)["\'][^>]*property=["\']og:(site_name|title)["\']#i', $pageHtml, $ogm)) { $ogText = trim($ogm[1]); if (strpos($ogText, '|') !== false) $ogText = trim(explode('|', $ogText)[0]); if (strlen($ogText)) $toastBusiness['name'] = $ogText; } } // Try header element if (empty($toastBusiness['name'])) { if (preg_match('#<(?:h1|div)[^>]*class="[^"]*(?:restaurant|location|brand)[^"]*"[^>]*>([^<]+)<#i', $pageHtml, $hm)) { $ht = trim($hm[1]); if (strlen($ht) && strlen($ht) < 100) $toastBusiness['name'] = $ht; } } // Try first h1 if (empty($toastBusiness['name'])) { if (preg_match('#]*>([^<]+)#i', $pageHtml, $h1m)) { $h1t = trim($h1m[1]); if (strlen($h1t) && strlen($h1t) < 100) $toastBusiness['name'] = $h1t; } } // Try address from HTML if (empty($toastBusiness['addressLine1'])) { if (preg_match('#<[^>]*class="[^"]*address[^"]*"[^>]*>([^<]+)]+>#i', $pageHtml, $am)) { $at = trim($am[1]); if (strlen($at) && strlen($at) < 200) $toastBusiness['addressLine1'] = $at; } } // Try phone from HTML if (empty($toastBusiness['phone'])) { if (preg_match('#(?:tel:|phone[^"]*">)\s*\(?(\d{3})\)?[-.\s]?(\d{3})[-.\s]?(\d{4})#i', $pageHtml, $phm)) { $toastBusiness['phone'] = $phm[1] . '-' . $phm[2] . '-' . $phm[3]; } } // Check __OO_STATE__ for images, categories, prices, business info if (stripos($pageHtml, 'window.__OO_STATE__') !== false) { $ooJson = $extractOoState($pageHtml); if ($ooJson !== null) { try { $ooState = json_decode($ooJson, true); if (is_array($ooState)) { $imageMap = []; $itemCategoryMap = []; $itemPriceMap = []; foreach ($ooState as $key => $val) { // Restaurant info if (str_starts_with($key, 'Restaurant:') && is_array($val)) { if (!empty($val['name'])) $toastBusiness['name'] = $val['name']; if (!empty($val['location']) && is_array($val['location'])) { $loc = $val['location']; if (!empty($loc['address1'])) $toastBusiness['addressLine1'] = $loc['address1']; if (!empty($loc['city'])) $toastBusiness['city'] = $loc['city']; if (!empty($loc['state'])) $toastBusiness['state'] = $loc['state']; if (!empty($loc['zipCode'])) $toastBusiness['zip'] = $loc['zipCode']; if (!empty($loc['phone'])) $toastBusiness['phone'] = $loc['phone']; } if (!empty($val['brandColor'])) $toastBusiness['brandColor'] = str_replace('#', '', $val['brandColor']); } // Menu items if (str_starts_with($key, 'Menu:') && is_array($val) && !empty($val['groups']) && is_array($val['groups'])) { foreach ($val['groups'] as $group) { $groupName = trim($group['name'] ?? ''); if (strlen($groupName) && !isset($categorySet[$groupName])) { $categorySet[$groupName] = true; $toastCategories[] = ['name' => $groupName, 'itemCount' => 0]; } // Check for subgroups $subgroups = $group['subgroups'] ?? $group['children'] ?? $group['childGroups'] ?? []; if (!empty($subgroups) && is_array($subgroups)) { foreach ($subgroups as $sg) { $sgName = trim($sg['name'] ?? ''); if (strlen($sgName) && !isset($categorySet[$sgName])) { $categorySet[$sgName] = true; $toastCategories[] = ['name' => $sgName, 'parentCategoryName' => $groupName, 'itemCount' => 0]; } if (!empty($sg['items']) && is_array($sg['items'])) { $effectiveName = strlen($sgName) ? $sgName : $groupName; foreach ($sg['items'] as $item) { if (!empty($item['name'])) { $itemCategoryMap[$item['name']] = $effectiveName; $p = $extractToastPrice($item); if ($p > 0) $itemPriceMap[$item['name']] = $p; $img = $extractToastImage($item); if (strlen($img)) $imageMap[$item['name']] = $img; } } } } } // Direct items if (!empty($group['items']) && is_array($group['items'])) { foreach ($group['items'] as $item) { if (!empty($item['name'])) { if (strlen($groupName)) $itemCategoryMap[$item['name']] = $groupName; $p = $extractToastPrice($item); if ($p > 0) $itemPriceMap[$item['name']] = $p; $img = $extractToastImage($item); if (strlen($img)) $imageMap[$item['name']] = $img; } } } } } } // Apply to items $imagesMatched = $categoriesMatched = $pricesMatched = 0; for ($i = 0; $i < count($toastItems); $i++) { $name = $toastItems[$i]['name']; if (isset($imageMap[$name])) { $toastItems[$i]['imageUrl'] = $imageMap[$name]; $toastItems[$i]['imageSrc'] = $imageMap[$name]; $toastItems[$i]['imageFilename'] = basename($imageMap[$name]); $imagesMatched++; } if (isset($itemCategoryMap[$name])) { $toastItems[$i]['category'] = $itemCategoryMap[$name]; $categoriesMatched++; } if (isset($itemPriceMap[$name]) && ($toastItems[$i]['price'] ?? 0) == 0) { $toastItems[$i]['price'] = $itemPriceMap[$name]; $pricesMatched++; } } $response['steps'][] = "Matched $imagesMatched images, $categoriesMatched categories, $pricesMatched prices from __OO_STATE__"; } } catch (Exception $e) { // OO_STATE parse failed, continue } } } // Default category if none if (!empty($toastItems) && empty($toastCategories)) { $toastCategories[] = ['name' => 'Menu', 'itemCount' => count($toastItems)]; } // Scan ALL HTML files in the ZIP for business info $extractUrlPath = preg_replace('#https?://[^/]+(/temp/menu-import/[a-f0-9]+/).*#i', '$1', $targetUrl); $extractDir = $expandPath($extractUrlPath); try { $allHtmlFiles = []; $it = new RecursiveDirectoryIterator($extractDir, RecursiveDirectoryIterator::SKIP_DOTS); $files = new RecursiveIteratorIterator($it); foreach ($files as $file) { if (preg_match('/\.html?$/i', $file->getFilename())) { $allHtmlFiles[] = $file->getRealPath(); } } $response['steps'][] = "Found " . count($allHtmlFiles) . " HTML files in ZIP"; foreach ($allHtmlFiles as $otherFile) { if ($otherFile === $localFilePath) continue; try { $otherHtml = file_get_contents($otherFile); // Business name from title if (empty($toastBusiness['name'])) { if (preg_match('#]*>([^<]+)#i', $otherHtml, $otm)) { $ot = trim($otm[1]); if (strlen($ot) && !preg_match('#^(Menu|Home|About|Contact|Order|Online)$#i', $ot)) { if (strpos($ot, '|') !== false) $ot = trim(explode('|', $ot)[0]); $ot = preg_replace('#\s*-\s*(Menu|Order|Online).*$#i', '', $ot); if (strlen($ot) && strlen($ot) < 100) $toastBusiness['name'] = $ot; } } } // Address from other files if (empty($toastBusiness['addressLine1'])) { if (preg_match('#(\d+\s+[A-Za-z0-9\s]+(?:St(?:reet)?|Ave(?:nue)?|Rd|Road|Blvd|Boulevard|Dr(?:ive)?|Ln|Lane|Way|Ct|Court|Pl(?:ace)?|Pkwy|Parkway)[.,]?\s*(?:Suite|Ste|#|Unit|Apt)?\s*[A-Za-z0-9\-]*)#i', $otherHtml, $adm)) { $at = trim($adm[1]); if (strlen($at) > 5 && strlen($at) < 100) $toastBusiness['addressLine1'] = $at; } } // Phone from other files if (empty($toastBusiness['phone'])) { if (preg_match('#\(?(\d{3})\)?[-.\s]?(\d{3})[-.\s]?(\d{4})#', $otherHtml, $phm)) { $toastBusiness['phone'] = $phm[1] . '-' . $phm[2] . '-' . $phm[3]; } } // Check __OO_STATE__ in other files if (stripos($otherHtml, 'window.__OO_STATE__') !== false) { $otherOoJson = $extractOoState($otherHtml); if ($otherOoJson !== null) { try { $otherOo = json_decode($otherOoJson, true); if (is_array($otherOo)) { foreach ($otherOo as $oKey => $oVal) { if (str_starts_with($oKey, 'Restaurant:') && is_array($oVal)) { if (!empty($oVal['name']) && empty($toastBusiness['name'])) $toastBusiness['name'] = $oVal['name']; if (!empty($oVal['location']) && is_array($oVal['location'])) { $ol = $oVal['location']; if (!empty($ol['address1']) && empty($toastBusiness['addressLine1'])) $toastBusiness['addressLine1'] = $ol['address1']; if (!empty($ol['city']) && empty($toastBusiness['city'])) $toastBusiness['city'] = $ol['city']; if (!empty($ol['state']) && empty($toastBusiness['state'])) $toastBusiness['state'] = $ol['state']; if (!empty($ol['zipCode']) && empty($toastBusiness['zip'])) $toastBusiness['zip'] = $ol['zipCode']; if (!empty($ol['phone']) && empty($toastBusiness['phone'])) $toastBusiness['phone'] = $ol['phone']; } if (!empty($oVal['brandColor']) && empty($toastBusiness['brandColor'])) $toastBusiness['brandColor'] = str_replace('#', '', $oVal['brandColor']); } } } } catch (Exception $e) { /* skip */ } } } } catch (Exception $e) { /* skip unreadable files */ } } } catch (Exception $e) { $response['steps'][] = "Could not scan other HTML files: " . $e->getMessage(); } $response['steps'][] = "Extracted " . count($toastItems) . " unique items from " . count($toastCategories) . " categories"; // Scan ZIP images and analyze for business info via Claude try { $zipImageFiles = []; $it = new RecursiveDirectoryIterator($extractDir, RecursiveDirectoryIterator::SKIP_DOTS); $files = new RecursiveIteratorIterator($it); $imageExtensions = ['jpg','jpeg','png','gif','webp']; foreach ($files as $file) { if (!$file->isFile()) continue; $ext = strtolower(pathinfo($file->getFilename(), PATHINFO_EXTENSION)); if (in_array($ext, $imageExtensions) && $file->getSize() > 10000 && stripos($file->getPath(), '_files') === false) { $zipImageFiles[] = $file->getRealPath(); } } if (!empty($zipImageFiles)) { $response['steps'][] = "Found " . count($zipImageFiles) . " images in ZIP to analyze for business info"; $imgLimit = min(count($zipImageFiles), 3); for ($imgIdx = 0; $imgIdx < $imgLimit; $imgIdx++) { try { $imgContent = file_get_contents($zipImageFiles[$imgIdx]); $base64Img = base64_encode($imgContent); $mediaType = $detectMediaType($base64Img); $imgRequest = [ 'model' => 'claude-sonnet-4-20250514', 'max_tokens' => 1024, 'temperature' => 0, 'messages' => [[ 'role' => 'user', 'content' => [ ['type' => 'image', 'source' => ['type' => 'base64', 'media_type' => $mediaType, 'data' => $base64Img]], ['type' => 'text', 'text' => 'Extract ALL business information visible in this image. Look carefully for: 1) Business NAME (the restaurant/store name), 2) PHONE number (format: xxx-xxx-xxxx), 3) Full ADDRESS (street, city, state, zip), 4) HOURS of operation (all days shown). Return JSON: {"name":"","addressLine1":"","city":"","state":"","zip":"","phone":"","hours":"","brandColor":""}. For hours, format as single string like \'Mon-Thu 7am-10pm, Fri-Sat 7am-11pm\'. Return ONLY valid JSON.'], ], ]], ]; $imgResp = $httpPost( 'https://api.anthropic.com/v1/messages', json_encode($imgRequest), ['Content-Type: application/json', "x-api-key: $CLAUDE_API_KEY", 'anthropic-version: 2023-06-01'], 60 ); if ($imgResp['code'] === 200) { $imgData = json_decode($imgResp['body'], true); if (!empty($imgData['content'][0]['text'])) { $imgText = $cleanClaudeJson($imgData['content'][0]['text']); $imgBiz = json_decode($imgText, true); if (is_array($imgBiz)) { foreach (['name','addressLine1','city','state','zip','phone','hours','brandColor'] as $field) { if (!empty($imgBiz[$field]) && is_scalar($imgBiz[$field])) { $toastBusiness[$field] = trim($imgBiz[$field]); } } } } } } catch (Exception $e) { $response['steps'][] = "Error analyzing image: " . $e->getMessage(); } } } } catch (Exception $e) { $response['steps'][] = "Could not scan ZIP for images: " . $e->getMessage(); } // Return directly $response['OK'] = true; $response['DATA'] = [ 'business' => $toastBusiness, 'categories' => $toastCategories, 'modifiers' => [], 'items' => $toastItems, 'imageUrls' => [], 'headerCandidateIndices' => [], 'imageMappings' => [], ]; $response['sourceUrl'] = $targetUrl; $response['pagesProcessed'] = 1; $response['imagesFound'] = 0; $response['playwrightImagesCount'] = 0; $response['toastDirect'] = true; jsonResponse($response); } catch (Exception $e) { $response['steps'][] = "Toast HTML parse failed: " . $e->getMessage() . " - falling back to Claude"; } } // Extract base URL for relative links (local temp file case) if (preg_match('#^(https?://[^/]+)#', $targetUrl, $bm)) { $baseUrl = $bm[1]; } $basePath = preg_replace('#/[^/]*$#', '/', preg_replace('#\?.*$#', '', $targetUrl)); } else { // Remote URL - use Playwright for JS-rendered content $response['steps'][] = "Fetching URL with Playwright: $targetUrl"; $pwOutput = shell_exec("/opt/playwright/run.sh " . escapeshellarg($targetUrl) . " 10000 2>&1"); if (empty(trim($pwOutput ?? ''))) { throw new Exception("Playwright returned empty response"); } $pwResult = json_decode($pwOutput, true); if (isset($pwResult['error'])) { throw new Exception("Playwright error: " . $pwResult['error']); } $pageHtml = $pwResult['html'] ?? ''; $playwrightImages = $pwResult['images'] ?? []; $response['steps'][] = "Fetched " . strlen($pageHtml) . " bytes via Playwright, " . count($playwrightImages) . " images captured"; // Capture platform image map (ordering site food photos matched to item names) $platformImageMap = []; if (!empty($pwResult['platformImageMap']) && is_array($pwResult['platformImageMap'])) { $platformImageMap = $pwResult['platformImageMap']; $response['steps'][] = "Found " . count($platformImageMap) . " item images from ordering platform"; } if (!empty($pwResult['subPagesVisited']) && is_array($pwResult['subPagesVisited'])) { $response['steps'][] = "Visited " . count($pwResult['subPagesVisited']) . " menu sub-pages: " . implode(', ', $pwResult['subPagesVisited']); } if (!empty($pwResult['platformPagesVisited']) && is_array($pwResult['platformPagesVisited'])) { $response['steps'][] = "Visited " . count($pwResult['platformPagesVisited']) . " ordering platforms for photos: " . implode(', ', $pwResult['platformPagesVisited']); } // ========== WOOCOMMERCE FAST PATH ========== if (stripos($pageHtml, 'woocommerce') !== false || stripos($pageHtml, 'wc-add-to-cart') !== false || stripos($pageHtml, 'tm-extra-product-options') !== false) { $response['steps'][] = "WooCommerce site detected - running modifier extraction"; $wooUrl = preg_replace('#(https?://[^/]+).*#', '$1', $targetUrl); try { $wooOutput = shell_exec("/opt/playwright/run-woo-modifiers.sh " . escapeshellarg($wooUrl) . " 2>&1"); if (!empty(trim($wooOutput ?? ''))) { $wooResult = json_decode($wooOutput, true); if (!empty($wooResult['items']) && is_array($wooResult['items'])) { $response['steps'][] = "WooCommerce extraction: " . count($wooResult['items']) . " items, " . count($wooResult['modifiers'] ?? []) . " modifier groups"; $wooCats = []; $wooItems = []; foreach ($wooResult['items'] as $wi => $wItem) { $catName = !empty($wItem['category']) ? trim($wItem['category']) : 'Menu'; if (!isset($wooCats[$catName])) $wooCats[$catName] = 0; $wooCats[$catName]++; $itemMods = $wooResult['itemModifierMap'][$wItem['name']] ?? []; $wooItems[] = [ 'id' => 'item_' . ($wi + 1), 'name' => $wItem['name'], 'price' => (float)($wItem['price'] ?? 0), 'description' => $wItem['description'] ?? '', 'category' => $catName, 'modifiers' => $itemMods, 'hasModifiers' => count($itemMods) > 0, 'imageUrl' => trim($wItem['imageUrl'] ?? ''), ]; } $wooCategories = []; foreach ($wooCats as $wcName => $wcCount) { $wooCategories[] = ['name' => $wcName, 'itemCount' => $wcCount]; } $wooBiz = $wooResult['business'] ?? []; $response['OK'] = true; $response['DATA'] = [ 'business' => [ 'name' => $wooBiz['name'] ?? '', 'address' => $wooBiz['address'] ?? '', 'phone' => $wooBiz['phone'] ?? '', 'hours' => $wooBiz['hours'] ?? '', ], 'categories' => $wooCategories, 'items' => $wooItems, 'modifiers' => $wooResult['modifiers'] ?? [], 'imageUrls' => [], 'imageMappings' => [], 'headerCandidateIndices' => [], ]; $response['sourceUrl'] = $targetUrl; $response['parsedVia'] = 'woocommerce_playwright'; jsonResponse($response); } } $response['steps'][] = "WooCommerce extraction returned no items - falling through to Claude"; } catch (Exception $e) { $response['steps'][] = "WooCommerce extraction failed: " . $e->getMessage() . " - falling through to Claude"; } } // ========== END WOOCOMMERCE FAST PATH ========== // ========== DOORDASH / ORDER.ONLINE FAST PATH ========== if (stripos($pageHtml, 'MenuPageItem') !== false && stripos($pageHtml, 'MenuPageItemList') !== false) { $response['steps'][] = "DoorDash/order.online site detected - extracting embedded data"; try { // Build image map from StorePageCarouselItem entries $ddImageMap = []; $carouselMarker = $BQ . '__typename' . $BQ . ':' . $BQ . 'StorePageCarouselItem' . $BQ; $searchPos = 0; while (true) { $searchPos = stripos($pageHtml, $carouselMarker, $searchPos); if ($searchPos === false) break; $nextMarker = stripos($pageHtml, $BQ . '__typename' . $BQ, $searchPos + strlen($carouselMarker)); if ($nextMarker === false) $nextMarker = strlen($pageHtml); $entryText = substr($pageHtml, $searchPos, $nextMarker - $searchPos); $cpName = $extractBqValue($entryText, 'name'); if ($cpName !== null) { $cpImg = $extractBqValue($entryText, 'imgUrl'); if ($cpImg !== null && $cpImg !== 'null' && stripos($cpImg, 'http') !== false) { if (stripos($cpImg, 'width=') !== false) { $cpImg = preg_replace('/width=\d+/i', 'width=600', $cpImg); $cpImg = preg_replace('/height=\d+/i', 'height=600', $cpImg); } $ddImageMap[$cpName] = $cpImg; } } $searchPos += strlen($carouselMarker); } $response['steps'][] = "Built image map with " . count($ddImageMap) . " entries from carousel"; // Extract menu from MenuPageItemList $ddCategories = []; $ddCatSeen = []; $ddItems = []; $ddItemSeen = []; $ddItemCounter = 0; $catMarker = $BQ . '__typename' . $BQ . ':' . $BQ . 'MenuPageItemList' . $BQ; $itemMarker = $BQ . '__typename' . $BQ . ':' . $BQ . 'MenuPageItem' . $BQ; $catPos = 0; while (true) { $catPos = stripos($pageHtml, $catMarker, $catPos); if ($catPos === false) break; $nextCatPos = stripos($pageHtml, $catMarker, $catPos + strlen($catMarker)); if ($nextCatPos === false) $nextCatPos = strlen($pageHtml); $catSection = substr($pageHtml, $catPos, $nextCatPos - $catPos); $catName = $extractBqValue($catSection, 'name'); if ($catName === null) { $catPos += strlen($catMarker); continue; } $catName = str_replace(['\\u0026', '&'], '&', $catName); if ($catName === 'Most Ordered' || isset($ddCatSeen[$catName])) { $catPos += strlen($catMarker); continue; } $ddCatSeen[$catName] = true; $ddCategories[] = ['name' => $catName, 'parentCategoryName' => '']; // Items within category $itemPos = 0; while (true) { $itemPos = stripos($catSection, $itemMarker, $itemPos); if ($itemPos === false) break; $nextItemPos = stripos($catSection, $itemMarker, $itemPos + strlen($itemMarker)); if ($nextItemPos === false) $nextItemPos = strlen($catSection); $itemEntry = substr($catSection, $itemPos, $nextItemPos - $itemPos); $ddItemId = $extractBqValue($itemEntry, 'id') ?? ''; $ipName = $extractBqValue($itemEntry, 'name'); if ($ipName === null) { $itemPos += strlen($itemMarker); continue; } $ipName = str_replace('\\u0026', '&', $ipName); if (isset($ddItemSeen[$ipName])) { $itemPos += strlen($itemMarker); continue; } $ddItemSeen[$ipName] = true; $ipDesc = $extractBqValue($itemEntry, 'description') ?? ''; $ipDesc = str_replace('\\u0026', '&', $ipDesc); $ipPriceStr = $extractBqValue($itemEntry, 'displayPrice') ?? ''; $ipPrice = (float)preg_replace('/[^0-9.]/', '', $ipPriceStr); // Image from carousel map or item entry $ipImg = $ddImageMap[$ipName] ?? ''; if (empty($ipImg)) { $ipImg = $extractBqValue($itemEntry, 'imageUrl') ?? ''; if ($ipImg === 'null' || stripos($ipImg, 'http') === false) $ipImg = ''; if (strlen($ipImg) && stripos($ipImg, 'width=') !== false) { $ipImg = preg_replace('/width=\d+/i', 'width=600', $ipImg); $ipImg = preg_replace('/height=\d+/i', 'height=600', $ipImg); } } $ddItemCounter++; $ddItem = [ 'name' => $ipName, 'description' => $ipDesc, 'price' => $ipPrice, 'category' => $catName, 'modifiers' => [], 'id' => 'item_' . $ddItemCounter, 'ddItemId' => $ddItemId, 'imageUrl' => $ipImg, 'imageSrc' => $ipImg, ]; if (strlen($ipImg)) $ddItem['imageFilename'] = basename(parse_url($ipImg, PHP_URL_PATH) ?: $ipImg); $ddItems[] = $ddItem; $itemPos += strlen($itemMarker); } $catPos += strlen($catMarker); } $ddItemsWithImg = 0; foreach ($ddItems as $ddi) { if (!empty($ddi['imageUrl'])) $ddItemsWithImg++; } $response['steps'][] = "Found " . count($ddCategories) . " categories, " . count($ddItems) . " items ($ddItemsWithImg with images)"; // Extract business info $ddBusiness = []; if (preg_match('#([^<]+)#i', $pageHtml, $ddTm)) { $ddTitle = preg_replace('#\s*[-|].*#', '', trim($ddTm[1])); if (strlen($ddTitle)) $ddBusiness['name'] = $ddTitle; } // Address from StoreHeaderAddress $ddAddrMarker = $BQ . '__typename' . $BQ . ':' . $BQ . 'StoreHeaderAddress' . $BQ; $ddAddrPos = stripos($pageHtml, $ddAddrMarker); if ($ddAddrPos !== false) { $ddAddrEnd = stripos($pageHtml, $BQ . '__typename' . $BQ, $ddAddrPos + strlen($ddAddrMarker)); if ($ddAddrEnd === false) $ddAddrEnd = min($ddAddrPos + 2000, strlen($pageHtml)); $ddAddrSection = substr($pageHtml, $ddAddrPos, $ddAddrEnd - $ddAddrPos); $street = $extractBqValue($ddAddrSection, 'street'); if ($street !== null) $ddBusiness['street'] = $street; $displayAddr = $extractBqValue($ddAddrSection, 'displayAddress'); if ($displayAddr !== null) $ddBusiness['address'] = $displayAddr; } // Phone from StoreHeaderPhoneNumber $ddPhoneMarker = $BQ . '__typename' . $BQ . ':' . $BQ . 'StoreHeaderPhoneNumber' . $BQ; $ddPhonePos = stripos($pageHtml, $ddPhoneMarker); if ($ddPhonePos !== false) { $ddPhoneEnd = stripos($pageHtml, $BQ . '__typename' . $BQ, $ddPhonePos + strlen($ddPhoneMarker)); if ($ddPhoneEnd === false) $ddPhoneEnd = min($ddPhonePos + 1000, strlen($pageHtml)); $ddPhoneSection = substr($pageHtml, $ddPhonePos, $ddPhoneEnd - $ddPhonePos); $phone = $extractBqValue($ddPhoneSection, 'phoneNumber'); if ($phone !== null) $ddBusiness['phone'] = $phone; } // Hours from StoreOperationHoursRange $ddHoursMarker = $BQ . '__typename' . $BQ . ':' . $BQ . 'StoreOperationHoursRange' . $BQ; if (stripos($pageHtml, $ddHoursMarker) !== false) { $ddHoursArr = []; $hPos = 0; while (true) { $hPos = stripos($pageHtml, $ddHoursMarker, $hPos); if ($hPos === false) break; $hNext = stripos($pageHtml, $ddHoursMarker, $hPos + strlen($ddHoursMarker)); if ($hNext === false) $hNext = min($hPos + 500, strlen($pageHtml)); $hSection = substr($pageHtml, $hPos, $hNext - $hPos); $dayRange = $extractBqValue($hSection, 'dayRange'); $timeRange = $extractBqValue($hSection, 'timeRange'); if ($dayRange !== null && $timeRange !== null) { $ddHoursArr[] = "$dayRange: $timeRange"; } $hPos += strlen($ddHoursMarker); } if (!empty($ddHoursArr)) $ddBusiness['hours'] = implode('; ', $ddHoursArr); } if (!empty($ddItems)) { // Playwright modifier extraction $ddModifiers = []; $ddItemModMap = []; try { $response['steps'][] = "Running stealth Playwright for modifier extraction..."; $ddItemsForPw = []; foreach ($ddItems as $ddi) { $ddItemsForPw[] = ['id' => $ddi['ddItemId'], 'name' => $ddi['name']]; } $ddTempFile = '/tmp/dd-items-' . generateUUID() . '.json'; file_put_contents($ddTempFile, json_encode($ddItemsForPw)); $modTimeout = 180 + count($ddItems) * 2; if ($modTimeout > 600) $modTimeout = 600; $ddModOutput = shell_exec("/opt/playwright/run-doordash-modifiers.sh " . escapeshellarg($targetUrl) . " " . escapeshellarg($ddTempFile) . " 2>&1"); @unlink($ddTempFile); if (!empty(trim($ddModOutput ?? ''))) { $ddModData = json_decode(trim($ddModOutput), true); if (!empty($ddModData['modifiers']) && is_array($ddModData['modifiers'])) { $ddModifiers = $ddModData['modifiers']; foreach ($ddModifiers as &$ddMod) { $ddMod['type'] = (!empty($ddMod['maxSelections']) && $ddMod['maxSelections'] == 1) ? 'select' : 'checkbox'; } unset($ddMod); } if (!empty($ddModData['itemModifierMap']) && is_array($ddModData['itemModifierMap'])) { $ddItemModMap = $ddModData['itemModifierMap']; for ($i = 0; $i < count($ddItems); $i++) { if (isset($ddItemModMap[$ddItems[$i]['name']])) { $ddItems[$i]['modifiers'] = $ddItemModMap[$ddItems[$i]['name']]; } } } $response['steps'][] = "Modifier extraction: " . count($ddModifiers) . " groups, " . count($ddItemModMap) . " items mapped"; } } catch (Exception $e) { $response['steps'][] = "Modifier extraction failed (non-fatal): " . $e->getMessage(); } $ddImageUrls = []; foreach ($ddItems as $ddI) { if (!empty($ddI['imageUrl'])) $ddImageUrls[] = $ddI['imageUrl']; } $response['OK'] = true; $response['DATA'] = [ 'business' => $ddBusiness, 'categories' => $ddCategories, 'modifiers' => $ddModifiers, 'items' => $ddItems, 'imageUrls' => $ddImageUrls, 'headerCandidateIndices' => [], ]; $response['sourceUrl'] = $targetUrl; $response['parsedVia'] = 'doordash_embedded'; $response['imagesFound'] = count($ddImageUrls); $response['playwrightImagesCount'] = count($playwrightImages); jsonResponse($response); } } catch (Exception $e) { $response['steps'][] = "DoorDash extraction failed: " . $e->getMessage() . " - falling through to Claude"; } } // ========== END DOORDASH FAST PATH ========== // Extract base URL for relative links if (preg_match('#^(https?://[^/]+)#', $targetUrl, $bm)) { $baseUrl = $bm[1]; } $basePath = preg_replace('#\?.*$#', '', $targetUrl); if (!preg_match('#/$#', $basePath)) { $basePath = preg_replace('#/[^/]*$#', '/', $basePath); } } } else { throw new Exception("Either 'url' or 'html' content is required"); } // Menu pages array $menuPages = [['url' => !empty($targetUrl) ? $targetUrl : 'uploaded', 'html' => $pageHtml]]; // Extract images from all pages $imageUrls = []; $imageMappings = []; // Add Playwright-captured images foreach ($playwrightImages as $pwImg) { if (!preg_match('#(icon|favicon|logo|sprite|pixel|tracking|badge|button|\.svg)#i', $pwImg)) { $imageUrls[$pwImg] = true; } } foreach ($menuPages as $menuPage) { if (preg_match_all('#]+src=["\']([^"\']+)["\'][^>]*>#i', $menuPage['html'], $imgMatches, PREG_SET_ORDER)) { foreach ($imgMatches as $imgMatch) { $imgTag = $imgMatch[0]; $imgSrc = $imgMatch[1]; // Extract alt text $imgAlt = ''; if (preg_match('#alt=["\']([^"\']+)["\']#i', $imgTag, $altM)) { $imgAlt = $altM[1]; } // Image mapping for local uploads $imgFilename = basename($imgSrc); if (strlen($imgFilename) && strlen($imgAlt) && !preg_match('#(icon|favicon|logo|sprite|pixel|tracking|badge|button)#i', $imgSrc)) { $imageMappings[] = ['filename' => $imgFilename, 'alt' => $imgAlt, 'src' => $imgSrc]; } // Resolve relative URLs if (str_starts_with($imgSrc, '/')) { $imgSrc = $baseUrl . $imgSrc; } elseif (!preg_match('#^https?://#i', $imgSrc) && !str_starts_with($imgSrc, 'data:')) { $imgSrc = $basePath . $imgSrc; } if (preg_match('#^https?://#i', $imgSrc) && !isset($imageUrls[$imgSrc])) { if (!preg_match('#(icon|favicon|logo|sprite|pixel|tracking|badge|button)#i', $imgSrc)) { $imageUrls[$imgSrc] = true; } } } } } $response['steps'][] = "Found " . count($imageUrls) . " unique images"; // Check for local scan (ZIP upload) $isLocalScan = !empty($targetUrl) && stripos($targetUrl, '/temp/menu-import/') !== false; $localBasePath = ''; if ($isLocalScan) { $localUrlPath = preg_replace('#https?://[^/]+(/temp/menu-import/[^/]+/).*#i', '$1', $targetUrl); $localBasePath = $expandPath($localUrlPath); $response['steps'][] = "Local scan detected, base path: $localBasePath"; } // Download/read images (limit to 20) $imageDataArray = []; $downloadedCount = 0; $localReadCount = 0; foreach (array_keys($imageUrls) as $imgUrl) { if ($downloadedCount >= 20) break; try { $imgBytes = 0; $imgContent = ''; $mediaType = 'image/jpeg'; if ($isLocalScan && stripos($imgUrl, '/temp/menu-import/') !== false) { $localPath = $expandPath(preg_replace('#https?://[^/]+(/temp/menu-import/.*)#i', '$1', $imgUrl)); if (file_exists($localPath)) { $imgContent = file_get_contents($localPath); $imgBytes = strlen($imgContent); $ext = strtolower(pathinfo($localPath, PATHINFO_EXTENSION)); if ($ext === 'png') $mediaType = 'image/png'; elseif ($ext === 'gif') $mediaType = 'image/gif'; elseif ($ext === 'webp') $mediaType = 'image/webp'; $localReadCount++; } } else { $result = $httpGet($imgUrl, [], 10); if ($result['code'] === 200 && !empty($result['body'])) { $ct = $result['contentType']; if (preg_match('#image/(jpeg|jpg|png|gif|webp)#i', $ct)) { $imgContent = $result['body']; $imgBytes = strlen($imgContent); if (stripos($ct, 'png') !== false) $mediaType = 'image/png'; elseif (stripos($ct, 'gif') !== false) $mediaType = 'image/gif'; elseif (stripos($ct, 'webp') !== false) $mediaType = 'image/webp'; } } } if ($imgBytes > 5000) { $base64Content = base64_encode($imgContent); $mediaType = $detectMediaType($base64Content); $imageDataArray[] = [ 'type' => 'image', 'source' => ['type' => 'base64', 'media_type' => $mediaType, 'data' => $base64Content], 'url' => $imgUrl, ]; $downloadedCount++; } } catch (Exception $e) { // Skip failed downloads } } $response['steps'][] = "Loaded " . count($imageDataArray) . " valid images ($localReadCount from local disk)"; // ============================================================ // TOAST FAST PATH: Parse __OO_STATE__ directly instead of Claude // ============================================================ if (stripos($pageHtml, 'window.__OO_STATE__') !== false && stripos($pageHtml, 'toasttab') !== false) { $response['steps'][] = "Toast page detected - extracting menu data from __OO_STATE__"; try { $ooJson = $extractOoState($pageHtml); if ($ooJson !== null) { $ooState = json_decode($ooJson, true); if (!is_array($ooState)) throw new Exception("Failed to parse __OO_STATE__ JSON"); $toastBusiness = []; $toastCategories = []; $toastItems = []; $categorySet = []; $itemId = 1; $menuNames = []; // Extract restaurant info from ROOT_QUERY if (!empty($ooState['ROOT_QUERY']) && is_array($ooState['ROOT_QUERY'])) { foreach ($ooState['ROOT_QUERY'] as $rqKey => $rqVal) { if ((stripos($rqKey, 'restaurantV2By') !== false || stripos($rqKey, 'restaurantV2(') !== false) && is_array($rqVal)) { if (!empty($rqVal['name']) && empty($toastBusiness['name'])) $toastBusiness['name'] = $rqVal['name']; if (!empty($rqVal['description']) && strlen(trim((string)$rqVal['description']))) { $toastBusiness['description'] = trim((string)$rqVal['description']); } if (!empty($rqVal['location']) && is_array($rqVal['location'])) { $loc = $rqVal['location']; if (!empty($loc['address1'])) { $toastBusiness['addressLine1'] = $loc['address1']; $toastBusiness['address'] = $loc['address1']; if (!empty($loc['city'])) { $toastBusiness['city'] = $loc['city']; $toastBusiness['address'] .= ', ' . $loc['city']; } if (!empty($loc['state'])) { $toastBusiness['state'] = $loc['state']; $toastBusiness['address'] .= ', ' . $loc['state']; } $zip = $loc['zip'] ?? $loc['zipCode'] ?? null; if (!empty($zip)) { $toastBusiness['zip'] = $zip; $toastBusiness['address'] .= ' ' . $zip; } } if (!empty($loc['phone'])) $toastBusiness['phone'] = $loc['phone']; if (!empty($loc['latitude']) && is_numeric($loc['latitude']) && !empty($loc['longitude']) && is_numeric($loc['longitude'])) { $toastBusiness['latitude'] = $loc['latitude']; $toastBusiness['longitude'] = $loc['longitude']; } } if (!empty($rqVal['brandColor'])) $toastBusiness['brandColor'] = str_replace('#', '', $rqVal['brandColor']); // Hours from schedule if (!empty($rqVal['schedule']['upcomingSchedules'][0]['dailySchedules'])) { $dayHours = []; foreach ($rqVal['schedule']['upcomingSchedules'][0]['dailySchedules'] as $ds) { if (!empty($ds['date']) && !empty($ds['servicePeriods'][0]['startTime'])) { $dow = (int)date('w', strtotime($ds['date'])) + 1; // 1=Sun $sp = $ds['servicePeriods'][0]; $dayHours[$dow] = ['open' => substr($sp['startTime'], 0, 5), 'close' => substr($sp['endTime'], 0, 5)]; } } $dayNames = [1=>'Sun',2=>'Mon',3=>'Tue',4=>'Wed',5=>'Thu',6=>'Fri',7=>'Sat']; $dayOrder = [2,3,4,5,6,7,1]; // Mon-Sun $hoursParts = []; foreach ($dayOrder as $dIdx) { if (isset($dayHours[$dIdx])) { $dh = $dayHours[$dIdx]; $op = explode(':', $dh['open']); $cp = explode(':', $dh['close']); $openStr = $formatTime12h((int)$op[0], (int)($op[1] ?? 0)); $closeStr = $formatTime12h((int)$cp[0], (int)($cp[1] ?? 0)); $hoursParts[] = $dayNames[$dIdx] . " $openStr-$closeStr"; } } if (!empty($hoursParts)) $toastBusiness['hours'] = implode(', ', $hoursParts); } } } } // Also check Restaurant: keys (older format) foreach ($ooState as $ooKey => $ooVal) { if (str_starts_with($ooKey, 'Restaurant:') && empty($toastBusiness['name']) && is_array($ooVal)) { if (!empty($ooVal['name'])) $toastBusiness['name'] = $ooVal['name']; if (!empty($ooVal['location']) && is_array($ooVal['location'])) { $loc = $ooVal['location']; if (!empty($loc['address1'])) { $toastBusiness['address'] = $loc['address1']; if (!empty($loc['city'])) $toastBusiness['address'] .= ', ' . $loc['city']; if (!empty($loc['state'])) $toastBusiness['address'] .= ', ' . $loc['state']; if (!empty($loc['zipCode'])) $toastBusiness['address'] .= ' ' . $loc['zipCode']; } if (!empty($loc['phone'])) $toastBusiness['phone'] = $loc['phone']; } if (!empty($ooVal['brandColor'])) $toastBusiness['brandColor'] = str_replace('#', '', $ooVal['brandColor']); } // Menu data if (str_starts_with($ooKey, 'Menu:') && is_array($ooVal) && !empty($ooVal['groups']) && is_array($ooVal['groups'])) { $menuName = $ooVal['name'] ?? ''; if (strlen($menuName)) $menuNames[] = $menuName; foreach ($ooVal['groups'] as $group) { $groupName = trim($group['name'] ?? 'Menu'); if (!isset($categorySet[$groupName])) { $categorySet[$groupName] = true; $catObj = ['name' => $groupName, 'itemCount' => 0, 'menuName' => $menuName]; $toastCategories[] = $catObj; } // Items from group if (!empty($group['items']) && is_array($group['items'])) { foreach ($group['items'] as $item) { if (empty($item['name']) || !strlen(trim($item['name']))) continue; $itemStruct = [ 'id' => 'item_' . $itemId, 'name' => trim($item['name']), 'category' => $groupName, 'modifiers' => [], 'hasModifiers' => !empty($item['hasModifiers']), 'guid' => $item['guid'] ?? '', 'itemGroupGuid' => $item['itemGroupGuid'] ?? '', 'description' => isset($item['description']) && !is_null($item['description']) ? trim((string)$item['description']) : '', 'price' => $extractToastPrice($item), 'imageUrl' => '', ]; $img = $extractToastImage($item); if (strlen($img)) { $itemStruct['imageUrl'] = $img; $itemStruct['imageSrc'] = $img; $itemStruct['imageFilename'] = basename($img); } $toastItems[] = $itemStruct; $itemId++; } } // Subgroups $subgroups = $group['subgroups'] ?? $group['children'] ?? $group['childGroups'] ?? []; if (!empty($subgroups) && is_array($subgroups)) { foreach ($subgroups as $sg) { $subName = trim($sg['name'] ?? $groupName); if (strlen($subName) && !isset($categorySet[$subName])) { $categorySet[$subName] = true; $toastCategories[] = ['name' => $subName, 'parentCategoryName' => $groupName, 'itemCount' => 0]; } if (!empty($sg['items']) && is_array($sg['items'])) { foreach ($sg['items'] as $subItem) { if (empty($subItem['name']) || !strlen(trim($subItem['name']))) continue; $itemStruct = [ 'id' => 'item_' . $itemId, 'name' => trim($subItem['name']), 'category' => $subName, 'modifiers' => [], 'hasModifiers' => !empty($subItem['hasModifiers']), 'guid' => $subItem['guid'] ?? '', 'itemGroupGuid' => $subItem['itemGroupGuid'] ?? '', 'description' => isset($subItem['description']) && !is_null($subItem['description']) ? trim((string)$subItem['description']) : '', 'price' => $extractToastPrice($subItem), 'imageUrl' => '', ]; $img = $extractToastImage($subItem); if (strlen($img)) { $itemStruct['imageUrl'] = $img; $itemStruct['imageSrc'] = $img; $itemStruct['imageFilename'] = basename($img); } $toastItems[] = $itemStruct; $itemId++; } } } } } } } // Fallback: business name from title if (empty($toastBusiness['name'])) { if (preg_match('#]*>([^<]+)#i', $pageHtml, $tm)) { $titleText = trim($tm[1]); if (strpos($titleText, '|') !== false) $titleText = trim(explode('|', $titleText)[0]); $titleText = preg_replace('#\s*-\s*(Menu|Order|Online).*$#i', '', $titleText); if (strlen($titleText)) $toastBusiness['name'] = $titleText; } } // Clean business name if (!empty($toastBusiness['name'])) { $bizName = $toastBusiness['name']; $bizName = preg_replace('#\s*[-|]+\s*(Order\s+(pickup|online|delivery|food)|Online\s+Order|Delivery\s*[&and]+\s*Takeout|Takeout\s*[&and]+\s*Delivery|Menu\s*[&and]+\s*Order).*$#i', '', $bizName); if (!empty($toastBusiness['addressLine1']) && stripos($bizName, $toastBusiness['addressLine1']) !== false) { $bizName = trim(str_ireplace($toastBusiness['addressLine1'], '', $bizName)); } if (!empty($toastBusiness['address'])) { $addrFirst = trim(explode(',', $toastBusiness['address'])[0]); if (strlen($addrFirst) && stripos($bizName, $addrFirst) !== false) { $bizName = trim(str_ireplace($addrFirst, '', $bizName)); } } $bizName = trim(preg_replace('#[-|]+$#', '', trim($bizName))); $bizName = trim(preg_replace('#^[-|]+#', '', $bizName)); $toastBusiness['name'] = trim($bizName); } // Clean city if (!empty($toastBusiness['city']) && strpos($toastBusiness['city'], ',') !== false) { $toastBusiness['city'] = trim(explode(',', $toastBusiness['city'])[0]); } // Multi-menu hierarchy if (count($menuNames) > 1) { $hierarchicalCategories = []; foreach ($menuNames as $mn) { $hierarchicalCategories[] = ['name' => $mn, 'itemCount' => 0]; foreach ($toastCategories as $tc) { if (($tc['menuName'] ?? '') === $mn) { $tc['parentCategoryName'] = $mn; $hierarchicalCategories[] = $tc; } } } $toastCategories = $hierarchicalCategories; } // Update category item counts for ($ci = 0; $ci < count($toastCategories); $ci++) { $count = 0; foreach ($toastItems as $ti) { if ($ti['category'] === $toastCategories[$ci]['name']) $count++; } $toastCategories[$ci]['itemCount'] = $count; } $response['steps'][] = "Extracted " . count($toastItems) . " items from " . count($toastCategories) . " categories via __OO_STATE__"; // Toast modifier extraction via Playwright $toastModifiers = []; $modifierItemCount = 0; foreach ($toastItems as $ti) { if (!empty($ti['hasModifiers'])) $modifierItemCount++; } if ($modifierItemCount > 0) { $response['steps'][] = "$modifierItemCount items have modifiers - extracting via Playwright"; try { $toastUrl = ''; if (!empty($targetUrl) && preg_match('#toasttab\.com#i', $targetUrl)) { $toastUrl = $targetUrl; } else { // Try shortUrl from HTML if (preg_match('#"shortUrl"\s*:\s*"([^"]+)"#i', $pageHtml, $sm)) { $toastUrl = 'https://www.toasttab.com/local/order/' . $sm[1]; } if (empty($toastUrl) && preg_match('#toasttab\.com/([a-zA-Z0-9_-]+)/giftcards#i', $pageHtml, $gm)) { $toastUrl = 'https://www.toasttab.com/local/order/' . $gm[1]; } } if (strlen($toastUrl)) { $response['steps'][] = "Fetching modifiers from: $toastUrl"; $modOutput = shell_exec("/opt/playwright/run-toast-modifiers.sh " . escapeshellarg($toastUrl) . " 2>&1"); if (!empty(trim($modOutput ?? ''))) { $modResult = json_decode($modOutput, true); if (!empty($modResult['modifiers']) && is_array($modResult['modifiers'])) { $toastModifiers = $modResult['modifiers']; $response['steps'][] = "Extracted " . count($toastModifiers) . " unique modifier groups"; } if (!empty($modResult['itemModifierMap']) && is_array($modResult['itemModifierMap'])) { $modMap = $modResult['itemModifierMap']; for ($mi = 0; $mi < count($toastItems); $mi++) { if (isset($modMap[$toastItems[$mi]['name']])) { $toastItems[$mi]['modifiers'] = $modMap[$toastItems[$mi]['name']]; } } $response['steps'][] = "Mapped modifiers to " . count($modMap) . " items"; } if (!empty($modResult['stats'])) { $response['steps'][] = "Modifier stats: " . json_encode($modResult['stats']); } } else { $response['steps'][] = "Playwright modifier script returned empty output"; } } else { $response['steps'][] = "Could not determine Toast URL for modifier extraction"; } } catch (Exception $e) { $response['steps'][] = "Modifier extraction failed: " . $e->getMessage() . " - continuing without modifiers"; } } // Return directly if we have items if (!empty($toastItems)) { $response['OK'] = true; $response['DATA'] = [ 'business' => $toastBusiness, 'categories' => $toastCategories, 'items' => $toastItems, 'modifiers' => $toastModifiers, 'imageUrls' => [], 'imageMappings' => $imageMappings, 'headerCandidateIndices' => [], ]; $response['sourceUrl'] = !empty($targetUrl) ? $targetUrl : 'uploaded'; $response['pagesProcessed'] = 1; $response['imagesFound'] = count($imageDataArray); $response['playwrightImagesCount'] = count($playwrightImages); $response['parsedVia'] = 'toast_oo_state'; jsonResponse($response); } } } catch (Exception $e) { $toastError = "Toast __OO_STATE__ parsing failed: " . $e->getMessage(); $response['steps'][] = "$toastError - falling back to Claude"; $response['DEBUG_TOAST_ERROR'] = $toastError; } } // ============================================================ // Look for embedded JSON data (__NEXT_DATA__, window state, etc.) // ============================================================ $embeddedJsonData = ''; foreach ($menuPages as $menuPage) { if (preg_match_all('#]*id=["\']__NEXT_DATA__["\'][^>]*>([^<]+)#i', $menuPage['html'], $ndm)) { foreach ($ndm[1] as $sc) $embeddedJsonData .= "\n--- __NEXT_DATA__ ---\n$sc"; } if (preg_match_all('#window\.__[A-Z_]+__\s*=\s*(\{[^;]+\});#', $menuPage['html'], $stm)) { foreach ($stm[0] as $sm) $embeddedJsonData .= "\n--- WINDOW_STATE ---\n$sm"; } if (preg_match_all('#data-(?:props|page|state)=["\'](\{[^"\']+\})["\']#i', $menuPage['html'], $dpm)) { foreach ($dpm[0] as $dp) $embeddedJsonData .= "\n--- DATA_PROPS ---\n$dp"; } if (preg_match_all('#]*type=["\']application/ld\+json["\'][^>]*>([^<]+)#i', $menuPage['html'], $ldm)) { foreach ($ldm[1] as $sc) { if (stripos($sc, 'menu') !== false || stripos($sc, 'MenuItem') !== false) { $embeddedJsonData .= "\n--- JSON_LD_MENU ---\n$sc"; } } } } if (strlen($embeddedJsonData)) { $response['DEBUG_EMBEDDED_JSON_FOUND'] = true; $response['DEBUG_EMBEDDED_JSON_LENGTH'] = strlen($embeddedJsonData); } else { $response['DEBUG_EMBEDDED_JSON_FOUND'] = false; } // Combine HTML, strip aggressively to keep menu content $combinedHtml = ''; foreach ($menuPages as $menuPage) { $cleanHtml = $menuPage['html']; // Remove non-content elements $cleanHtml = preg_replace('#]*>.*?#is', '', $cleanHtml); $cleanHtml = preg_replace('#]*>.*?#is', '', $cleanHtml); $cleanHtml = preg_replace('##s', '', $cleanHtml); $cleanHtml = preg_replace('#]*>.*?#is', '', $cleanHtml); $cleanHtml = preg_replace('#]*>.*?#is', '', $cleanHtml); $cleanHtml = preg_replace('#]*>.*?#is', '', $cleanHtml); $cleanHtml = preg_replace('#]*>.*?#is', '', $cleanHtml); $cleanHtml = preg_replace('#]*>.*?#is', '', $cleanHtml); $cleanHtml = preg_replace('#]*>.*?#is', '', $cleanHtml); $cleanHtml = preg_replace('#<(input|button|select|textarea|option|label|fieldset|legend|datalist|output)[^>]*/?>#is', '', $cleanHtml); $cleanHtml = preg_replace('#<(meta|link|base|source|track|wbr)[^>]*/?>#is', '', $cleanHtml); // Strip class/style/data/id/aria attributes to reduce size $cleanHtml = preg_replace('#\s+(class|style|data-[a-z-]+|id|aria-[a-z-]+|role|tabindex|onclick|onload|loading)="[^"]*"#i', '', $cleanHtml); $cleanHtml = preg_replace("#\s+(class|style|data-[a-z-]+|id|aria-[a-z-]+|role|tabindex)='[^']*'#i", '', $cleanHtml); // Collapse whitespace $cleanHtml = preg_replace('#\s{2,}#', ' ', $cleanHtml); $cleanHtml = preg_replace('#>\s+<#', '><', $cleanHtml); $combinedHtml .= "\n--- PAGE: " . $menuPage['url'] . " ---\n" . $cleanHtml; } if (strlen($embeddedJsonData)) { $combinedHtml .= "\n\n=== EMBEDDED JSON DATA (may contain full menu) ===\n" . $embeddedJsonData; } $response['steps'][] = "Combined HTML size after stripping: " . strlen($combinedHtml) . " bytes"; if (strlen($combinedHtml) > 200000) { $combinedHtml = substr($combinedHtml, 0, 200000); $response['steps'][] = "Truncated to 200KB"; } // Server-side heading hierarchy detection $headingHierarchy = []; $hierarchyDesc = ''; $scanPos = 0; $currentH2 = ''; while ($scanPos < strlen($combinedHtml)) { $nextH2 = preg_match('#]*>#i', $combinedHtml, $m2, PREG_OFFSET_CAPTURE, $scanPos) ? $m2[0][1] : false; $nextH3 = preg_match('#]*>#i', $combinedHtml, $m3, PREG_OFFSET_CAPTURE, $scanPos) ? $m3[0][1] : false; if ($nextH2 === false && $nextH3 === false) break; if ($nextH2 !== false && ($nextH3 === false || $nextH2 < $nextH3)) { $closePos = stripos($combinedHtml, '', $nextH2); if ($closePos === false) break; $tagContent = substr($combinedHtml, $nextH2, $closePos + 5 - $nextH2); $h2Raw = trim(strip_tags($tagContent)); $h2Clean = trim(preg_replace('/[^a-zA-Z0-9 ]/', '', $h2Raw)); if (strlen($h2Clean) && strtoupper($h2Clean) !== 'MENU' && stripos($h2Clean, 'copyright') === false) { $currentH2 = $h2Raw; } else { $currentH2 = ''; } $scanPos = $closePos + 5; } else { $closePos = stripos($combinedHtml, '', $nextH3); if ($closePos === false) break; $tagContent = substr($combinedHtml, $nextH3, $closePos + 5 - $nextH3); $h3Text = trim(strip_tags($tagContent)); if (strlen($currentH2) && strlen($h3Text)) { if (!isset($headingHierarchy[$currentH2])) $headingHierarchy[$currentH2] = []; $headingHierarchy[$currentH2][] = $h3Text; } $scanPos = $closePos + 5; } } if (!empty($headingHierarchy)) { foreach ($headingHierarchy as $hParent => $hChildren) { $hierarchyDesc .= "- \"$hParent\" contains subsections: " . implode(', ', $hChildren) . "\n"; } $response['steps'][] = "Detected " . count($headingHierarchy) . " parent categories with subcategories from h2/h3 structure"; } // ============================================================ // Claude API call for generic pages // ============================================================ $systemPrompt = 'You are an expert at extracting structured menu data from restaurant website HTML. Extract ALL menu data visible in the HTML. Return valid JSON with these keys: business (object with name, address, phone, hours, brandColor), menus (array of objects — see below), categories (array), modifiers (array), items (array with name, description, price, category, menu, modifiers array, and imageUrl). MENUS vs CATEGORIES (CRITICAL): A MENU is a distinct time-based or themed menu that a restaurant offers separately — e.g., "Brunch", "Lunch", "Dinner", "Happy Hour", "Late Night", "Kids Menu". If a restaurant has multiple menus, return a "menus" array of objects like [{"name": "Brunch"}, {"name": "Lunch"}, {"name": "Dinner"}]. Each item should have a "menu" field set to which menu it belongs to. If the restaurant only has one menu or the sections are food-type categories (not time/theme based), omit the "menus" key entirely and treat everything as categories within a single menu. CATEGORIES vs ITEMS (CRITICAL): A CATEGORY is a broad section heading that groups multiple items (e.g., \'Appetizers\', \'Tacos\', \'Drinks\', \'Desserts\'). An ITEM is an individual food or drink product with a name, description, and price. Do NOT create a category for each individual item. A typical restaurant has 5-15 categories and 30-150 items. If you find yourself creating more categories than items, you are wrong - those are items, not categories. Each item must have a \'category\' field set to the category it belongs to. CATEGORIES FORMAT: Each entry in the categories array can be either a simple string (for flat categories) OR an object with \'name\' and optional \'subcategories\' array. Example: ["Appetizers", {"name": "Drinks", "subcategories": ["Hot Drinks", "Cold Drinks"]}, "Desserts"]. SUBCATEGORY DETECTION: If a section header contains nested titled sections beneath it (sub-headers with their own items), the outer section is the PARENT and inner sections are SUBCATEGORIES. For items in subcategories, set their \'category\' field to the SUBCATEGORY name (not the parent). CRITICAL FOR IMAGES: Each menu item in the HTML is typically in a container (div, li, article) that also contains an img tag. Extract the img src URL and include it as \'imageUrl\' for that item. Look for img tags that are siblings or children within the same menu-item container. The image URL should be the full or relative src value from the img tag - NOT the alt text. CRITICAL: Extract EVERY menu item from ALL sources including embedded JSON (__NEXT_DATA__, window state, JSON-LD). For brandColor: suggest a vibrant hex (6 digits, no hash). For prices: numbers (e.g., 12.99). CRITICAL: Return ONLY valid JSON. All special characters in strings must be properly escaped. Never use smart/curly quotes. Use only ASCII double quotes for JSON string delimiters and backslash-escape any literal double quotes inside values.'; // Build message content $messagesContent = []; // Add images (up to 10) $imgLimit = min(count($imageDataArray), 10); for ($i = 0; $i < $imgLimit; $i++) { $messagesContent[] = ['type' => 'image', 'source' => $imageDataArray[$i]['source']]; } // Add HTML text $userText = "Extract menu data from this restaurant website HTML. The images above are from the same website - identify which ones are food photos that could be used as item images, and which could be header/banner images."; if (strlen($hierarchyDesc)) { $userText .= "\n\nIMPORTANT - DETECTED SECTION HIERARCHY FROM HTML HEADINGS:\n" . "The following h2 sections contain h3 sub-sections. Use these as parent-subcategory relationships in your categories output:\n" . $hierarchyDesc . "For each parent above, include it in the categories array as an OBJECT with 'name' and 'subcategories' array. Items belonging to a subsection should have their 'category' field set to the SUBCATEGORY name (not the parent)."; } $userText .= "\n\nHere is the HTML content:\n\n" . $combinedHtml; $messagesContent[] = ['type' => 'text', 'text' => $userText]; $requestBody = [ 'model' => 'claude-sonnet-4-20250514', 'max_tokens' => 16384, 'temperature' => 0, 'system' => $systemPrompt, 'messages' => [['role' => 'user', 'content' => $messagesContent]], ]; $response['steps'][] = "Sending to Claude API..."; $claudeResult = $httpPost( 'https://api.anthropic.com/v1/messages', json_encode($requestBody), ['Content-Type: application/json', "x-api-key: $CLAUDE_API_KEY", 'anthropic-version: 2023-06-01'], 120 ); if ($claudeResult['code'] !== 200) { $errorDetail = ''; $errData = json_decode($claudeResult['body'], true); if (!empty($errData['error']['message'])) { $errorDetail = $errData['error']['message']; } else { $errorDetail = substr($claudeResult['body'], 0, 500); } throw new Exception("Claude API error: {$claudeResult['code']} - $errorDetail"); } $claudeResponse = json_decode($claudeResult['body'], true); if (empty($claudeResponse['content'])) throw new Exception("Empty response from Claude"); $responseText = ''; foreach ($claudeResponse['content'] as $block) { if (($block['type'] ?? '') === 'text') { $responseText = $block['text']; break; } } $responseText = $cleanClaudeJson($responseText); $response['DEBUG_RAW_CLAUDE'] = $responseText; $menuData = json_decode($responseText, true); if (!is_array($menuData)) { $response['OK'] = false; $response['MESSAGE'] = 'JSON parse error'; $response['DEBUG_RAW_RESPONSE'] = substr($responseText, 0, 3000); jsonResponse($response); } // Build image URL list $imageUrlList = []; foreach ($imageDataArray as $imgData) { if (!empty($imgData['url'])) $imageUrlList[] = $imgData['url']; } // Ensure expected structure if (!isset($menuData['business'])) $menuData['business'] = []; if (!isset($menuData['categories'])) $menuData['categories'] = []; if (!isset($menuData['modifiers'])) $menuData['modifiers'] = []; if (!isset($menuData['items'])) $menuData['items'] = []; // Server-side address parsing: split combined address into components $biz = &$menuData['business']; if (!empty($biz['address']) && empty($biz['addressLine1'])) { $addr = trim(preg_replace('/,?\s*(United States|USA|US|U\.S\.A?\.)\s*$/i', '', $biz['address'])); // Extract ZIP if (preg_match('/\b(\d{5})(?:-\d{4})?\s*$/', $addr, $zm)) { $biz['zip'] = $zm[1]; $addr = trim(substr($addr, 0, $zm[0] ? strrpos($addr, $zm[0]) : strlen($addr))); } // Extract state (2-letter code at end) if (preg_match('/\b([A-Z]{2})\s*$/i', $addr, $sm)) { $biz['state'] = strtoupper($sm[1]); $addr = trim(substr($addr, 0, strrpos($addr, $sm[0]))); } // Split remaining into addressLine1 and city by comma $addr = rtrim($addr, ', '); if (strpos($addr, ',') !== false) { $parts = array_map('trim', explode(',', $addr)); $biz['addressLine1'] = $parts[0]; $biz['city'] = $parts[1] ?? ''; } } // Clean city if it still has state/zip/country in it if (!empty($biz['city']) && strpos($biz['city'], ',') !== false) { $biz['city'] = trim(explode(',', $biz['city'])[0]); } // Pass through menus array if Claude detected multiple menus if (!empty($menuData['menus']) && is_array($menuData['menus']) && count($menuData['menus']) > 1) { $response['steps'][] = "Detected " . count($menuData['menus']) . " separate menus: " . implode(', ', array_column($menuData['menus'], 'name')); } // Convert categories to expected format $formattedCategories = []; foreach ($menuData['categories'] as $cat) { if (is_string($cat)) { $formattedCategories[] = ['name' => $cat, 'itemCount' => 0]; } elseif (is_array($cat)) { $parentName = $cat['name'] ?? ''; if (strlen($parentName)) { $formattedCategories[] = ['name' => $parentName, 'itemCount' => 0]; if (!empty($cat['subcategories']) && is_array($cat['subcategories'])) { foreach ($cat['subcategories'] as $subcat) { $subcatName = is_string($subcat) ? $subcat : ($subcat['name'] ?? ''); if (strlen($subcatName)) { $formattedCategories[] = ['name' => $subcatName, 'parentCategoryName' => $parentName, 'itemCount' => 0]; } } } } } } $menuData['categories'] = $formattedCategories; // Fix "every item is a category" pattern $totalItems = count($menuData['items']); $totalCats = count($formattedCategories); if ($totalCats > 10 && $totalItems > 0 && $totalCats > $totalItems * 0.5) { $zeroCats = []; $singleCats = []; foreach ($formattedCategories as $fc) { $fcCount = 0; foreach ($menuData['items'] as $fi) { if ($fi['category'] === $fc['name']) $fcCount++; } if ($fcCount === 0) $zeroCats[] = $fc['name']; elseif ($fcCount === 1) $singleCats[] = $fc['name']; } if (count($singleCats) > $totalCats * 0.6 && !empty($zeroCats)) { $response['steps'][] = "Detected 'every item is a category' pattern (" . count($singleCats) . " single-item cats, " . count($zeroCats) . " empty cats) - collapsing"; $currentParent = $zeroCats[0]; foreach ($formattedCategories as $fc) { if (in_array($fc['name'], $zeroCats)) { $currentParent = $fc['name']; } else { for ($ii = 0; $ii < count($menuData['items']); $ii++) { if ($menuData['items'][$ii]['category'] === $fc['name']) { $menuData['items'][$ii]['category'] = $currentParent; } } } } $fixedCategories = []; foreach ($zeroCats as $zc) { $zcCount = 0; foreach ($menuData['items'] as $fi) { if ($fi['category'] === $zc) $zcCount++; } $fixedCategories[] = ['name' => $zc, 'itemCount' => $zcCount]; } $menuData['categories'] = $fixedCategories; $formattedCategories = $fixedCategories; $response['steps'][] = "Collapsed to " . count($fixedCategories) . " categories"; } } // Server-side hierarchy enforcement from HTML heading structure if (!empty($headingHierarchy)) { $h3ToParent = []; foreach ($headingHierarchy as $hParentName => $hChildren) { foreach ($hChildren as $hChild) { $h3ToParent[strtolower(trim($hChild))] = $hParentName; } } $hierarchyApplied = 0; for ($i = 0; $i < count($formattedCategories); $i++) { if (empty($formattedCategories[$i]['parentCategoryName'])) { $catLower = strtolower(trim($formattedCategories[$i]['name'])); if (isset($h3ToParent[$catLower])) { $rawParent = $h3ToParent[$catLower]; $matchedParent = ''; foreach ($formattedCategories as $pcat) { $parentNorm = strtolower(preg_replace('/[^a-zA-Z0-9 ]/', '', $rawParent)); $parentNorm = trim(preg_replace('/\s*menu\s*$/i', '', $parentNorm)); $pcatNorm = trim(preg_replace('/\s*menu\s*$/i', '', strtolower($pcat['name']))); if ($pcatNorm === $parentNorm || strtolower($pcat['name']) === strtolower($rawParent)) { $matchedParent = $pcat['name']; break; } } if (strlen($matchedParent)) { $formattedCategories[$i]['parentCategoryName'] = $matchedParent; $hierarchyApplied++; } } } } if ($hierarchyApplied > 0) { $menuData['categories'] = $formattedCategories; $response['steps'][] = "Server-side hierarchy: applied $hierarchyApplied parent-child relationships"; } } // Items with subcategory field from Claude for ($i = 0; $i < count($menuData['items']); $i++) { if (!empty($menuData['items'][$i]['subcategory'])) { $menuData['items'][$i]['category'] = $menuData['items'][$i]['subcategory']; } } // Add item IDs for ($i = 0; $i < count($menuData['items']); $i++) { $menuData['items'][$i]['id'] = 'item_' . ($i + 1); } // Process item images $itemsWithImages = 0; for ($i = 0; $i < count($menuData['items']); $i++) { $item = $menuData['items'][$i]; if (!empty($item['images']) && is_array($item['images'])) { $imgObj = $item['images']; $itemsWithImages++; $filenames = []; foreach ($imgObj as $sizeKey => $imgUrl) { if (is_scalar($imgUrl) && strlen(trim((string)$imgUrl))) { $filenames[$sizeKey] = basename((string)$imgUrl); } } $menuData['items'][$i]['imageFilenames'] = $filenames; $primarySrc = $imgObj['src'] ?? $imgObj['large'] ?? $imgObj['medium'] ?? $imgObj['small'] ?? null; if ($primarySrc) { $menuData['items'][$i]['imageSrc'] = $primarySrc; $menuData['items'][$i]['imageFilename'] = basename($primarySrc); } } elseif (!empty($item['imageUrl'])) { $menuData['items'][$i]['imageSrc'] = $item['imageUrl']; $menuData['items'][$i]['imageFilename'] = basename($item['imageUrl']); $itemsWithImages++; } elseif (!empty($item['imageSrc'])) { $menuData['items'][$i]['imageFilename'] = basename($item['imageSrc']); $itemsWithImages++; } } $response['steps'][] = "Found images for $itemsWithImages of " . count($menuData['items']) . " items"; $menuData['imageUrls'] = $imageUrlList; $menuData['headerCandidateIndices'] = []; $menuData['imageMappings'] = $imageMappings; $response['OK'] = true; $response['DATA'] = $menuData; $response['sourceUrl'] = !empty($targetUrl) ? $targetUrl : 'uploaded'; $response['pagesProcessed'] = count($menuPages); $response['imagesFound'] = count($imageDataArray); $response['playwrightImagesCount'] = count($playwrightImages); } catch (Exception $e) { $response['MESSAGE'] = $e->getMessage(); } jsonResponse($response);