From dd2a50868041157028c9fd990736f5e36916afc3 Mon Sep 17 00:00:00 2001 From: John Mizerek Date: Tue, 10 Mar 2026 11:22:34 -0700 Subject: [PATCH] Add playwright scripts to git Previously only lived on servers at /opt/playwright/. Now tracked in repo. Co-Authored-By: Claude Opus 4.6 --- playwright/package.json | 15 + playwright/render.js | 71 +++++ playwright/run-toast-modifiers.sh | 3 + playwright/run-woo-modifiers.sh | 3 + playwright/run.sh | 3 + playwright/toast-modifiers.js | 308 ++++++++++++++++++ playwright/woo-modifiers.js | 497 ++++++++++++++++++++++++++++++ 7 files changed, 900 insertions(+) create mode 100644 playwright/package.json create mode 100644 playwright/render.js create mode 100644 playwright/run-toast-modifiers.sh create mode 100644 playwright/run-woo-modifiers.sh create mode 100644 playwright/run.sh create mode 100644 playwright/toast-modifiers.js create mode 100644 playwright/woo-modifiers.js diff --git a/playwright/package.json b/playwright/package.json new file mode 100644 index 0000000..418d5fd --- /dev/null +++ b/playwright/package.json @@ -0,0 +1,15 @@ +{ + "name": "playwright", + "version": "1.0.0", + "main": "index.js", + "scripts": { + "test": "echo \"Error: no test specified\" && exit 1" + }, + "keywords": [], + "author": "", + "license": "ISC", + "description": "", + "dependencies": { + "playwright": "^1.58.2" + } +} diff --git a/playwright/render.js b/playwright/render.js new file mode 100644 index 0000000..47eb120 --- /dev/null +++ b/playwright/render.js @@ -0,0 +1,71 @@ +const { chromium } = require("playwright"); + +(async () => { + const url = process.argv[2]; + const wait = parseInt(process.argv[3] || 3000); + + if (!url) { + console.log(JSON.stringify({ error: "URL required" })); + process.exit(1); + } + + const browser = await chromium.launch({ headless: true }); + const context = await browser.newContext({ + userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" + }); + const page = await context.newPage(); + + // Track image URLs as they load + const images = new Set(); + page.on("response", response => { + const ct = response.headers()["content-type"] || ""; + if (ct.includes("image/")) { + images.add(response.url()); + } + }); + + try { + // Use load instead of networkidle - more reliable for sites with persistent connections + await page.goto(url, { waitUntil: "load", timeout: 45000 }); + + // Wait for initial JS rendering + await page.waitForTimeout(wait); + + // Scroll the page to trigger lazy-loaded images (DoorDash, etc.) + const scrollHeight = await page.evaluate(() => document.body.scrollHeight); + const viewportHeight = await page.evaluate(() => window.innerHeight); + const scrollSteps = Math.min(Math.ceil(scrollHeight / viewportHeight), 20); + + for (let i = 0; i < scrollSteps; i++) { + await page.evaluate((step) => { + window.scrollTo(0, step * window.innerHeight); + }, i + 1); + await page.waitForTimeout(300); + } + + // Scroll back to top and wait for any final images + await page.evaluate(() => window.scrollTo(0, 0)); + await page.waitForTimeout(1000); + + // Extract images from DOM as well + const domImages = await page.evaluate(() => { + return Array.from(document.querySelectorAll("img")) + .map(img => img.src) + .filter(src => src && src.startsWith("http")); + }); + + domImages.forEach(img => images.add(img)); + + const html = await page.content(); + + console.log(JSON.stringify({ + html: html, + images: Array.from(images), + url: url + })); + } catch (e) { + console.log(JSON.stringify({ error: e.message, url: url })); + } + + await browser.close(); +})(); diff --git a/playwright/run-toast-modifiers.sh b/playwright/run-toast-modifiers.sh new file mode 100644 index 0000000..c7b43ab --- /dev/null +++ b/playwright/run-toast-modifiers.sh @@ -0,0 +1,3 @@ +#!/bin/bash +export PLAYWRIGHT_BROWSERS_PATH=/opt/playwright/browsers +exec /usr/bin/node /opt/playwright/toast-modifiers.js "$@" diff --git a/playwright/run-woo-modifiers.sh b/playwright/run-woo-modifiers.sh new file mode 100644 index 0000000..0f103da --- /dev/null +++ b/playwright/run-woo-modifiers.sh @@ -0,0 +1,3 @@ +#!/bin/bash +export PLAYWRIGHT_BROWSERS_PATH=/opt/playwright/browsers +exec /usr/bin/node /opt/playwright/woo-modifiers.js "$@" diff --git a/playwright/run.sh b/playwright/run.sh new file mode 100644 index 0000000..85968e3 --- /dev/null +++ b/playwright/run.sh @@ -0,0 +1,3 @@ +#!/bin/bash +export PLAYWRIGHT_BROWSERS_PATH=/opt/playwright/browsers +exec /usr/bin/node /opt/playwright/render.js "$@" diff --git a/playwright/toast-modifiers.js b/playwright/toast-modifiers.js new file mode 100644 index 0000000..d7e7b30 --- /dev/null +++ b/playwright/toast-modifiers.js @@ -0,0 +1,308 @@ +const { chromium } = require("playwright"); + +(async () => { + const url = process.argv[2]; + if (!url) { + console.log(JSON.stringify({ error: "URL required" })); + process.exit(1); + } + + const log = (msg) => process.stderr.write("[toast-mod] " + msg + "\n"); + + let browser; + try { + browser = await chromium.launch({ headless: true }); + const context = await browser.newContext({ + userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36" + }); + const page = await context.newPage(); + + // Set up GraphQL response interceptor BEFORE navigation to catch everything + let latestResponse = null; + let responseCount = 0; + + page.on("response", async (response) => { + try { + const responseUrl = response.url(); + if ((responseUrl.includes("graphql") || responseUrl.includes("federated-gateway"))) { + const ct = response.headers()["content-type"] || ""; + if (ct.includes("json")) { + const rawBody = await response.json(); + responseCount++; + const responses = Array.isArray(rawBody) ? rawBody : [rawBody]; + for (const body of responses) { + if (!body || !body.data) continue; + if (body.data.menuItemDetails) { + const details = body.data.menuItemDetails; + if (details.modifierGroups && Array.isArray(details.modifierGroups) && details.modifierGroups.length > 0) { + latestResponse = details; + } + } + } + } + } + } catch (e) {} + }); + + log("Navigating to " + url); + await page.goto(url, { waitUntil: "load", timeout: 60000 }); + await page.waitForTimeout(5000); + + const title = await page.title(); + log("Page title: " + title); + + let hasOoState = await page.evaluate(() => !!window.__OO_STATE__); + if (!hasOoState) { + log("No __OO_STATE__ yet, waiting 10 more seconds..."); + await page.waitForTimeout(10000); + hasOoState = await page.evaluate(() => !!window.__OO_STATE__); + if (!hasOoState) { + console.log(JSON.stringify({ error: "No __OO_STATE__ found", items: [], modifiers: [], itemModifierMap: {} })); + await browser.close(); + process.exit(0); + } + } + + // Extract items + const ooData = await page.evaluate(() => { + const state = window.__OO_STATE__ || {}; + const items = []; + + for (const key of Object.keys(state)) { + if (!key.startsWith("Menu:")) continue; + const menu = state[key]; + if (!menu.groups || !Array.isArray(menu.groups)) continue; + + for (const group of menu.groups) { + const groupName = group.name || "Menu"; + + if (group.items && Array.isArray(group.items)) { + for (const item of group.items) { + if (item.name) { + items.push({ + name: item.name.trim(), + guid: item.guid || "", + itemGroupGuid: item.itemGroupGuid || "", + hasModifiers: !!item.hasModifiers, + category: groupName + }); + } + } + } + + const subs = group.subgroups || group.children || group.childGroups || []; + for (const sub of subs) { + if (sub.items && Array.isArray(sub.items)) { + for (const item of sub.items) { + if (item.name) { + items.push({ + name: item.name.trim(), + guid: item.guid || "", + itemGroupGuid: item.itemGroupGuid || "", + hasModifiers: !!item.hasModifiers, + category: sub.name || groupName + }); + } + } + } + } + } + } + + return items; + }); + + log("Found " + ooData.length + " items, " + ooData.filter(i => i.hasModifiers).length + " with modifiers"); + + const modifierItems = ooData.filter(i => i.hasModifiers); + + if (modifierItems.length === 0) { + console.log(JSON.stringify({ items: ooData, modifiers: [], itemModifierMap: {} })); + await browser.close(); + process.exit(0); + } + + // OPTIMIZATION: Deduplicate by itemGroupGuid - only click one representative per group + const guidToItems = new Map(); // itemGroupGuid -> [items] + const noGuidItems = []; // items without itemGroupGuid + + for (const item of modifierItems) { + if (item.itemGroupGuid) { + if (!guidToItems.has(item.itemGroupGuid)) { + guidToItems.set(item.itemGroupGuid, []); + } + guidToItems.get(item.itemGroupGuid).push(item); + } else { + noGuidItems.push(item); + } + } + + // Build click list: one item per unique itemGroupGuid + all items without a guid + const clickList = []; + for (const [guid, items] of guidToItems) { + clickList.push(items[0]); // representative + } + for (const item of noGuidItems) { + clickList.push(item); + } + + log("Deduplicated: " + modifierItems.length + " modifier items -> " + clickList.length + " unique groups to click (" + guidToItems.size + " guids + " + noGuidItems.length + " ungrouped)"); + + // Click items to extract modifier data + const allModifierGroups = new Map(); + const itemModifierMap = {}; + let clickedCount = 0; + let failedClicks = 0; + + function processModGroups(groups, prefix) { + if (!Array.isArray(groups)) return []; + const modNames = []; + for (const mg of groups) { + const fullName = prefix ? prefix + " > " + mg.name : mg.name; + const guid = mg.guid || fullName; + + if (!allModifierGroups.has(guid)) { + const options = []; + if (mg.modifiers && Array.isArray(mg.modifiers)) { + for (const mod of mg.modifiers) { + const price = typeof mod.price === "number" ? mod.price : 0; + options.push({ name: mod.name || "", price: price }); + + if (mod.modifierGroups && Array.isArray(mod.modifierGroups) && mod.modifierGroups.length > 0) { + const nestedNames = processModGroups(mod.modifierGroups, fullName); + modNames.push(...nestedNames); + } + } + } + + allModifierGroups.set(guid, { + guid: guid, + name: mg.name || "", + required: (mg.minSelections || 0) > 0, + minSelections: mg.minSelections || 0, + maxSelections: mg.maxSelections || 0, + options: options + }); + } + + modNames.push(allModifierGroups.get(guid).name); + } + return modNames; + } + + // Only click the deduplicated clickList + for (const item of clickList) { + try { + latestResponse = null; + const countBefore = responseCount; + + // Find and click the item + const headerLocator = page.locator(".headerText").filter({ + hasText: new RegExp("^" + item.name.replace(/[.*+?^${}()|[\]\\]/g, "\\$&") + "$") + }).first(); + + if (await headerLocator.count() === 0) { + failedClicks++; + log("Not found on page: " + item.name); + continue; + } + + const clickable = headerLocator.locator("xpath=ancestor::*[contains(@class,'clickable')]").first(); + if (await clickable.count() > 0) { + await clickable.scrollIntoViewIfNeeded(); + await clickable.click({ timeout: 3000 }); + } else { + await headerLocator.scrollIntoViewIfNeeded(); + await headerLocator.click({ timeout: 3000 }); + } + + clickedCount++; + + // Wait for GraphQL response (up to 6s) + const startTime = Date.now(); + while (!latestResponse && Date.now() - startTime < 6000) { + await page.waitForTimeout(200); + } + + if (latestResponse && latestResponse.modifierGroups) { + const names = processModGroups(latestResponse.modifierGroups, ""); + // Map the clicked item + itemModifierMap[item.name] = names; + + // OPTIMIZATION: Immediately map all siblings with same itemGroupGuid + if (item.itemGroupGuid && guidToItems.has(item.itemGroupGuid)) { + for (const sibling of guidToItems.get(item.itemGroupGuid)) { + if (sibling.name !== item.name) { + itemModifierMap[sibling.name] = names; + } + } + } + } + + // Close modal + await page.keyboard.press("Escape"); + await page.waitForTimeout(400); + + } catch (e) { + log("Error clicking " + item.name + ": " + e.message); + try { await page.keyboard.press("Escape"); } catch (e2) {} + await page.waitForTimeout(300); + } + } + + const directMapped = Object.keys(itemModifierMap).length; + log("Clicked: " + clickedCount + "/" + clickList.length + ", Mapped: " + directMapped + "/" + modifierItems.length); + + // Final fallback: any remaining unmapped items, try to infer from category siblings + let inferredCount = 0; + for (const item of modifierItems) { + if (itemModifierMap[item.name]) continue; + if (!item.itemGroupGuid) continue; + + for (const mappedName of Object.keys(itemModifierMap)) { + const mappedItem = modifierItems.find(i => i.name === mappedName); + if (mappedItem && mappedItem.itemGroupGuid === item.itemGroupGuid) { + itemModifierMap[item.name] = itemModifierMap[mappedName]; + inferredCount++; + break; + } + } + } + + if (inferredCount > 0) { + log("Inferred modifiers for " + inferredCount + " additional items via itemGroupGuid fallback"); + } + + log("Final: " + Object.keys(itemModifierMap).length + "/" + modifierItems.length + " mapped, " + allModifierGroups.size + " unique modifier groups"); + + const modifiers = Array.from(allModifierGroups.values()).map(mg => ({ + name: mg.name, + required: mg.required, + minSelections: mg.minSelections, + maxSelections: mg.maxSelections, + options: mg.options + })); + + console.log(JSON.stringify({ + items: ooData, + modifiers: modifiers, + itemModifierMap: itemModifierMap, + stats: { + totalItems: ooData.length, + itemsWithModifiers: modifierItems.length, + modifiersExtracted: Object.keys(itemModifierMap).length, + uniqueModifierGroups: modifiers.length, + clickedCount: clickedCount, + failedClicks: failedClicks, + uniqueGroups: guidToItems.size, + inferredCount: inferredCount + } + })); + + } catch (e) { + log("Fatal error: " + e.message); + console.log(JSON.stringify({ error: e.message, items: [], modifiers: [], itemModifierMap: {} })); + } + + if (browser) await browser.close(); +})(); diff --git a/playwright/woo-modifiers.js b/playwright/woo-modifiers.js new file mode 100644 index 0000000..abb7c29 --- /dev/null +++ b/playwright/woo-modifiers.js @@ -0,0 +1,497 @@ +const { chromium } = require("playwright"); + +(async () => { + const url = process.argv[2]; + if (!url) { + console.log(JSON.stringify({ error: "URL required" })); + process.exit(1); + } + + const log = (msg) => process.stderr.write("[woo-mod] " + msg + "\n"); + + let browser; + try { + browser = await chromium.launch({ headless: true }); + const context = await browser.newContext({ + userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36" + }); + const page = await context.newPage(); + + log("Navigating to " + url); + await page.goto(url, { waitUntil: "networkidle", timeout: 60000 }); + await page.waitForTimeout(3000); + + // Close any popups/modals + try { + const closeButtons = await page.$$('.close, .modal .close, [aria-label="Close"]'); + for (const btn of closeButtons) { + if (await btn.isVisible()) await btn.click().catch(() => {}); + } + } catch (e) {} + + // Extract business info from the page + const businessInfo = await page.evaluate(() => { + const info = { name: '', address: '', phone: '', hours: '' }; + // Try common selectors for business name + const nameEl = document.querySelector('.site-title, .logo-text, h1.site-title, .custom-logo-link img, title'); + if (nameEl) { + info.name = nameEl.alt || nameEl.textContent || ''; + info.name = info.name.replace(/\s*[-–|].*$/, '').trim(); // strip taglines + } + // Try page title as fallback + if (!info.name && document.title) { + info.name = document.title.replace(/\s*[-–|].*$/, '').trim(); + } + // Look for address/phone in common locations + const bodyText = document.body.innerText; + // Phone + const phoneMatch = bodyText.match(/(?:Call|Phone|Tel)[:\s]*\(?\d{3}\)?[\s.-]?\d{3}[\s.-]?\d{4}/i) || + bodyText.match(/\(?\d{3}\)?[\s.-]\d{3}[\s.-]\d{4}/); + if (phoneMatch) info.phone = phoneMatch[0].replace(/^(?:Call|Phone|Tel)[:\s]*/i, '').trim(); + // Address - look for street patterns + const addrMatch = bodyText.match(/\d{1,5}\s+[A-Z][a-zA-Z\s]+(?:St|Ave|Blvd|Dr|Rd|Ln|Way|Ct|Pl|Cir)[.,]?\s*(?:[A-Z][a-zA-Z\s]+,?\s*[A-Z]{2}\s*\d{5})?/); + if (addrMatch) info.address = addrMatch[0].trim(); + return info; + }); + log("Business: " + businessInfo.name + " | " + businessInfo.address + " | " + businessInfo.phone); + + // Strategy 1: Products displayed inline on the page (custom WooCommerce themes) + // Collect products from all category tabs/pages + let allProducts = []; + + // Check for category tabs (clickable, same-page) vs category links (separate pages) + const categoryTabs = await page.evaluate(() => { + const tabs = document.querySelectorAll('li.tabs, .category-tab, [data-filter]'); + return [...tabs].map((t, i) => ({ index: i, name: t.textContent.trim(), active: t.classList.contains('active') })) + .filter(t => t.name.length > 0 && t.name.length < 60); + }); + + const categoryLinks = await page.evaluate(() => { + // Only use links if no tabs found — some themes use links that 404 + const cats = document.querySelectorAll('.product-category a, .product_cat a'); + return [...cats].map(a => ({ href: a.href, name: a.textContent.trim() })).filter(c => c.name.length > 0 && c.name.length < 60); + }); + + const useTabs = categoryTabs.length > 0; + log("Found " + categoryTabs.length + " category tabs, " + categoryLinks.length + " category links" + (useTabs ? " (using tabs)" : "")); + + const visitedUrls = new Set([page.url()]); + + // Scrape current page first + const scrapeInlineProducts = async (catOverride) => { + return await page.evaluate((catName) => { + const products = []; + const productEls = document.querySelectorAll('.product-con-box, li.product, .type-product, .product-item'); + productEls.forEach(el => { + const nameEl = el.querySelector('.woocommerce-loop-product__title, h2, h3, .product-title'); + if (!nameEl) return; + const name = nameEl.textContent.trim(); + if (!name) return; + + const descEl = el.querySelector('.woocommerce-product-details__short-description, .description, .short-description'); + const description = descEl ? descEl.textContent.trim().substring(0, 200) : ''; + + let price = 0; + const priceEl = el.querySelector('.price .woocommerce-Price-amount, .price ins .amount, .price'); + if (priceEl) { + const m = priceEl.textContent.match(/\$?([\d.]+)/); + if (m) price = parseFloat(m[1]) || 0; + } + + const imgEl = el.querySelector('img'); + const imageUrl = imgEl ? (imgEl.src || imgEl.dataset.src || '') : ''; + + // Try to get category from element classes + let category = catName || ''; + if (!category) { + const classes = el.className || ''; + const catMatch = classes.match(/product_cat-([a-z0-9-]+)/); + if (catMatch) { + category = catMatch[1].replace(/-/g, ' ').replace(/\b\w/g, c => c.toUpperCase()); + } + } + + // Get post ID for clicking later + const idMatch = (el.className || '').match(/post-(\d+)/); + const postId = idMatch ? idMatch[1] : ''; + + products.push({ name, price, description, imageUrl, category, postId }); + }); + return products; + }, catOverride); + }; + + // Scrape products — either by clicking tabs or visiting category pages + if (useTabs) { + // Click each tab (prevent navigation) and scrape products that appear + for (const tab of categoryTabs) { + try { + log("Clicking tab: " + tab.name); + const tabName = tab.name; + await page.evaluate((name) => { + const btns = document.querySelectorAll('li.tabs a, a.catabtn, .category-tab a'); + for (const btn of btns) { + if (btn.textContent.trim() === name) { + btn.addEventListener('click', e => e.preventDefault(), { once: true }); + btn.click(); + break; + } + } + }, tabName); + await page.waitForTimeout(2500); + + const catProducts = await scrapeInlineProducts(tab.name); + for (const p of catProducts) { + if (!allProducts.find(ep => ep.name === p.name)) { + allProducts.push(p); + } + } + log(" -> " + catProducts.length + " products"); + } catch (e) { + log("Error on tab " + tab.name + ": " + e.message); + } + } + } else { + // Scrape homepage products first + let homeProducts = await scrapeInlineProducts(null); + log("Found " + homeProducts.length + " products on homepage"); + allProducts.push(...homeProducts); + + // Visit each category page + if (categoryLinks.length > 0) { + for (const cat of categoryLinks) { + if (visitedUrls.has(cat.href)) continue; + visitedUrls.add(cat.href); + try { + log("Visiting category: " + cat.name); + await page.goto(cat.href, { waitUntil: "networkidle", timeout: 30000 }); + await page.waitForTimeout(2000); + + let pageNum = 1; + while (pageNum <= 10) { + const catProducts = await scrapeInlineProducts(cat.name); + for (const p of catProducts) { + if (!allProducts.find(ep => ep.name === p.name)) { + allProducts.push(p); + } + } + + const nextUrl = await page.evaluate(() => { + const next = document.querySelector('.woocommerce-pagination .next, a.next.page-numbers'); + return next ? next.href : null; + }); + if (!nextUrl) break; + pageNum++; + await page.goto(nextUrl, { waitUntil: "networkidle", timeout: 30000 }); + await page.waitForTimeout(1500); + } + } catch (e) { + log("Error on category " + cat.name + ": " + e.message); + } + } + } + } + + log("Total unique products: " + allProducts.length); + + // Strategy 2: If no inline products found, try standard product links + if (allProducts.length === 0) { + log("No inline products - trying product link approach"); + await page.goto(url, { waitUntil: "networkidle", timeout: 30000 }); + await page.waitForTimeout(3000); + + const productLinks = await page.evaluate(() => { + const anchors = document.querySelectorAll('a[href*="/product/"], a.woocommerce-LoopProduct-link'); + return [...new Set([...anchors].map(a => a.href))]; + }); + + log("Found " + productLinks.length + " product links"); + + for (let i = 0; i < productLinks.length; i++) { + try { + await page.goto(productLinks[i], { waitUntil: "domcontentloaded", timeout: 30000 }); + await page.waitForTimeout(1000); + + const pd = await page.evaluate(() => { + const nameEl = document.querySelector('.product_title, h1.entry-title'); + const name = nameEl ? nameEl.textContent.trim() : ''; + const priceEl = document.querySelector('.summary .price .woocommerce-Price-amount'); + let price = 0; + if (priceEl) { const m = priceEl.textContent.match(/\$?([\d.]+)/); if (m) price = parseFloat(m[1]) || 0; } + const descEl = document.querySelector('.woocommerce-product-details__short-description'); + const desc = descEl ? descEl.textContent.trim().substring(0, 200) : ''; + const imgEl = document.querySelector('.woocommerce-product-gallery__image img'); + const img = imgEl ? (imgEl.src || '') : ''; + const catEl = document.querySelector('.posted_in a'); + const cat = catEl ? catEl.textContent.trim() : ''; + return { name, price, description: desc, imageUrl: img, category: cat, postId: '' }; + }); + + if (pd.name) allProducts.push(pd); + } catch (e) { + log("Error on product link: " + e.message); + } + } + } + + // Now extract modifiers by visiting individual product pages + const modifierGroupsMap = {}; + const itemModifierMap = {}; + + // For inline products, we need their permalink - try /product/{slug} or ?p={postId} + for (let i = 0; i < allProducts.length; i++) { + const prod = allProducts[i]; + log(`[${i + 1}/${allProducts.length}] Extracting modifiers for: ${prod.name}`); + + // Build product URL from name slug or postId + let productUrl = ''; + if (prod.postId) { + productUrl = url.replace(/\/$/, '') + '/?p=' + prod.postId; + } else { + const slug = prod.name.toLowerCase().replace(/[^a-z0-9]+/g, '-').replace(/^-|-$/g, ''); + productUrl = url.replace(/\/$/, '') + '/product/' + slug + '/'; + } + + try { + await page.goto(productUrl, { waitUntil: "domcontentloaded", timeout: 20000 }); + await page.waitForTimeout(1500); + + const groups = await page.evaluate(() => { + const results = []; + const seen = new Set(); + + // Helper: clean TMEPO value strings like "Small_0" -> "Small" + const cleanVal = (v) => v ? v.replace(/_\d+$/, '').trim() : ''; + + // Helper: parse price from data-rules JSON or data-price attr + const parseRulesPrice = (input) => { + if (!input) return 0; + const rules = input.getAttribute('data-rules'); + if (rules) { + try { + const arr = JSON.parse(rules); + if (Array.isArray(arr) && arr.length > 0) { + const v = parseFloat(arr[0]); + if (!isNaN(v) && v > 0) return v; + } + } catch(e) {} + } + const dp = input.getAttribute('data-price'); + if (dp) { const v = parseFloat(dp); if (!isNaN(v) && v > 0) return v; } + return 0; + }; + + // Prefer granular containers (individual field cells) over broad row containers + // tm-cell with cpf-type are individual modifier groups; tc-cell.tcell are TMEPO cells + const granular = document.querySelectorAll('.tm-cell[class*="cpf-type-"], .tc-cell.tcell'); + const broad = document.querySelectorAll('.tc-row, .tm-row, .cpf-section'); + // Use granular if they have labels, otherwise fall back to broad + const granularWithLabels = [...granular].filter(el => + el.querySelector('.tm-epo-field-label label, .tm-epo-element-label, h3.tm-epo-field-label, label:first-of-type') + ); + const elements = granularWithLabels.length > 0 ? granularWithLabels : (broad.length > 0 ? broad : granular); + + elements.forEach(section => { + const labelEl = section.querySelector('.tm-epo-field-label label, .tm-epo-element-label, h3.tm-epo-field-label, label:first-of-type'); + if (!labelEl) return; + + const groupName = labelEl.textContent.trim(); + if (!groupName || groupName.length > 80 || seen.has(groupName)) return; + if (/special request|sandwich name|your name|instructions|quantity/i.test(groupName)) return; + seen.add(groupName); + + const options = []; + let groupType = 'select'; // default + + // Radio buttons and checkboxes — get name from input.value, price from data-rules + const radios = section.querySelectorAll('input[type="radio"]'); + const checkboxes = section.querySelectorAll('input[type="checkbox"]'); + if (radios.length > 0) groupType = 'radio'; + else if (checkboxes.length > 0) groupType = 'checkbox'; + + // Check if this checkbox group has both preselected and non-preselected (split into two groups) + const hasPreselected = checkboxes.length > 0 && [...checkboxes].some(c => c.className.includes('custom-preselected')); + const hasAdditions = checkboxes.length > 0 && [...checkboxes].some(c => !c.className.includes('custom-preselected')); + const shouldSplit = hasPreselected && hasAdditions; + + const additionOptions = []; // only used if splitting + + section.querySelectorAll('.tmcp-field-wrap, .tm-field-wrap, label.tm-epo-field-label-wrap').forEach(wrap => { + const input = wrap.querySelector('input[type="radio"], input[type="checkbox"]'); + if (input) { + const optName = cleanVal(input.value); + if (!optName || optName.length > 80) return; + const optPrice = parseRulesPrice(input); + const selected = input.checked || wrap.classList.contains('tc-active'); + const isPreselected = input.className.includes('custom-preselected'); + + // Skip disabled duplicates (size variants) + if (input.disabled || input.className.includes('tcdisabled')) return; + + const entry = { name: optName, price: optPrice, selected }; + + if (shouldSplit && !isPreselected) { + // Deduplicate + if (!additionOptions.find(o => o.name === optName)) { + additionOptions.push(entry); + } + } else { + // Deduplicate + if (!options.find(o => o.name === optName)) { + options.push(entry); + } + } + return; + } + // Fallback: try label text + const lbl = wrap.querySelector('.tm-label, .tm-value, label span:not(.tm-price)'); + if (lbl) { + const optName = lbl.textContent.replace(/[\n\r\t]+/g, ' ').trim(); + if (!optName || optName.length > 80) return; + let optPrice = 0; + const priceSpan = wrap.querySelector('.tm-price, .price .amount, [class*="price"]'); + if (priceSpan) { + const m = priceSpan.textContent.match(/\+?\$?([\d.]+)/); + if (m) optPrice = parseFloat(m[1]) || 0; + } + if (!options.find(o => o.name === optName)) { + options.push({ name: optName, price: optPrice, selected: false }); + } + } + }); + + // Select dropdowns — get name from option.value, price from data-price + if (options.length === 0) { + section.querySelectorAll('select option').forEach(opt => { + if (!opt.value) return; + let optName = cleanVal(opt.value); + if (!optName || optName.length > 80) return; + let optPrice = 0; + const dp = opt.getAttribute('data-price'); + if (dp) { const v = parseFloat(dp); if (!isNaN(v) && v > 0) optPrice = v; } + const text = opt.textContent.trim(); + if (text && text.length < 80 && text !== optName) { + const m = text.match(/\+?\$?([\d.]+)/); + if (m) optPrice = optPrice || (parseFloat(m[1]) || 0); + optName = text.replace(/\s*\(\+?\$?[\d.]+\)\s*$/, '').trim() || optName; + } + options.push({ name: optName, price: optPrice, selected: opt.selected }); + }); + } + + if (options.length > 0) { + const required = section.querySelector('.required, [data-required="1"]') !== null; + results.push({ name: groupName, type: groupType, options, required }); + } + + }); + + // Standard WooCommerce variations fallback + if (results.length === 0) { + const vForm = document.querySelector('.variations_form'); + if (vForm) { + vForm.querySelectorAll('.variations tr').forEach(row => { + const lbl = row.querySelector('th label, .label label'); + const sel = row.querySelector('select'); + if (lbl && sel) { + const opts = [...sel.querySelectorAll('option')].filter(o => o.value).map(o => ({ name: o.textContent.trim(), price: 0 })); + if (opts.length > 0) results.push({ name: lbl.textContent.trim(), options: opts, required: true }); + } + }); + } + } + + return results; + }); + + // If item has no price, try to get it from the product page + if (prod.price === 0) { + const pagePrice = await page.evaluate(() => { + const pe = document.querySelector('.summary .price .woocommerce-Price-amount, .summary .price .amount, .product .price .amount'); + if (pe) { + const m = pe.textContent.match(/\$?([\d.]+)/); + if (m) { const v = parseFloat(m[1]); if (v > 0) return v; } + } + const hid = document.querySelector('input.cpf-product-price'); + if (hid && hid.value) { const v = parseFloat(hid.value); if (v > 0) return v; } + return 0; + }); + if (pagePrice > 0) { + prod.price = pagePrice; + } else if (groups.length > 0) { + // Use lowest price from first modifier group that has prices + for (const g of groups) { + const prices = g.options.map(o => o.price).filter(p => p > 0); + if (prices.length > 0) { prod.price = Math.min(...prices); break; } + } + } + } + + // Convert any modifier group with absolute prices to relative (subtract base price) + if (prod.price > 0 && groups.length > 0) { + for (const g of groups) { + const prices = g.options.map(o => o.price).filter(p => p > 0); + if (prices.length > 0) { + const minPrice = Math.min(...prices); + // Only convert if options have prices near or above the item base price (absolute pricing) + if (minPrice >= prod.price * 0.8) { + for (const opt of g.options) { + if (opt.price > 0) { + opt.price = Math.round((opt.price - prod.price) * 100) / 100; + if (opt.price < 0) opt.price = 0; + } + } + } + } + } + } + + if (groups.length > 0) { + const itemModGroups = []; + for (const g of groups) { + if (!modifierGroupsMap[g.name]) { + modifierGroupsMap[g.name] = { + name: g.name, + type: g.type || 'select', + options: g.options, + required: g.required, + minSelections: g.required ? 1 : 0, + maxSelections: g.type === 'radio' || g.type === 'select' ? 1 : 0 + }; + } + itemModGroups.push(g.name); + } + itemModifierMap[prod.name] = itemModGroups; + log(" -> " + groups.length + " modifier groups" + (prod.price > 0 ? " ($" + prod.price + ")" : "")); + } + } catch (e) { + log(" -> Error: " + e.message); + } + } + + const modifiers = Object.values(modifierGroupsMap); + const stats = { + totalProducts: allProducts.length, + itemsExtracted: allProducts.length, + modifierGroups: modifiers.length, + itemsWithModifiers: Object.keys(itemModifierMap).length + }; + + log("Done: " + stats.itemsExtracted + " items, " + stats.modifierGroups + " modifier groups, " + stats.itemsWithModifiers + " items with modifiers"); + + console.log(JSON.stringify({ + business: businessInfo, + items: allProducts, + modifiers, + itemModifierMap, + stats + })); + + } catch (err) { + log("Fatal: " + err.message); + console.log(JSON.stringify({ error: err.message })); + process.exit(1); + } finally { + if (browser) await browser.close(); + } +})();