const { chromium } = require("playwright"); (async () => { const url = process.argv[2]; if (!url) { console.log(JSON.stringify({ error: "URL required" })); process.exit(1); } const log = (msg) => process.stderr.write("[woo-mod] " + msg + "\n"); let browser; try { browser = await chromium.launch({ headless: true }); const context = await browser.newContext({ userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36" }); const page = await context.newPage(); log("Navigating to " + url); await page.goto(url, { waitUntil: "networkidle", timeout: 60000 }); await page.waitForTimeout(3000); // Close any popups/modals try { const closeButtons = await page.$$('.close, .modal .close, [aria-label="Close"]'); for (const btn of closeButtons) { if (await btn.isVisible()) await btn.click().catch(() => {}); } } catch (e) {} // Extract business info from the page const businessInfo = await page.evaluate(() => { const info = { name: '', address: '', phone: '', hours: '' }; // Try common selectors for business name const nameEl = document.querySelector('.site-title, .logo-text, h1.site-title, .custom-logo-link img, title'); if (nameEl) { info.name = nameEl.alt || nameEl.textContent || ''; info.name = info.name.replace(/\s*[-–|].*$/, '').trim(); // strip taglines } // Try page title as fallback if (!info.name && document.title) { info.name = document.title.replace(/\s*[-–|].*$/, '').trim(); } // Look for address/phone in common locations const bodyText = document.body.innerText; // Phone const phoneMatch = bodyText.match(/(?:Call|Phone|Tel)[:\s]*\(?\d{3}\)?[\s.-]?\d{3}[\s.-]?\d{4}/i) || bodyText.match(/\(?\d{3}\)?[\s.-]\d{3}[\s.-]\d{4}/); if (phoneMatch) info.phone = phoneMatch[0].replace(/^(?:Call|Phone|Tel)[:\s]*/i, '').trim(); // Address - look for street patterns const addrMatch = bodyText.match(/\d{1,5}\s+[A-Z][a-zA-Z\s]+(?:St|Ave|Blvd|Dr|Rd|Ln|Way|Ct|Pl|Cir)[.,]?\s*(?:[A-Z][a-zA-Z\s]+,?\s*[A-Z]{2}\s*\d{5})?/); if (addrMatch) info.address = addrMatch[0].trim(); return info; }); log("Business: " + businessInfo.name + " | " + businessInfo.address + " | " + businessInfo.phone); // Strategy 1: Products displayed inline on the page (custom WooCommerce themes) // Collect products from all category tabs/pages let allProducts = []; // Check for category tabs (clickable, same-page) vs category links (separate pages) const categoryTabs = await page.evaluate(() => { const tabs = document.querySelectorAll('li.tabs, .category-tab, [data-filter]'); return [...tabs].map((t, i) => ({ index: i, name: t.textContent.trim(), active: t.classList.contains('active') })) .filter(t => t.name.length > 0 && t.name.length < 60); }); const categoryLinks = await page.evaluate(() => { // Only use links if no tabs found — some themes use links that 404 const cats = document.querySelectorAll('.product-category a, .product_cat a'); return [...cats].map(a => ({ href: a.href, name: a.textContent.trim() })).filter(c => c.name.length > 0 && c.name.length < 60); }); const useTabs = categoryTabs.length > 0; log("Found " + categoryTabs.length + " category tabs, " + categoryLinks.length + " category links" + (useTabs ? " (using tabs)" : "")); const visitedUrls = new Set([page.url()]); // Scrape current page first const scrapeInlineProducts = async (catOverride) => { return await page.evaluate((catName) => { const products = []; const productEls = document.querySelectorAll('.product-con-box, li.product, .type-product, .product-item'); productEls.forEach(el => { const nameEl = el.querySelector('.woocommerce-loop-product__title, h2, h3, .product-title'); if (!nameEl) return; const name = nameEl.textContent.trim(); if (!name) return; const descEl = el.querySelector('.woocommerce-product-details__short-description, .description, .short-description'); const description = descEl ? descEl.textContent.trim().substring(0, 200) : ''; let price = 0; const priceEl = el.querySelector('.price .woocommerce-Price-amount, .price ins .amount, .price'); if (priceEl) { const m = priceEl.textContent.match(/\$?([\d.]+)/); if (m) price = parseFloat(m[1]) || 0; } const imgEl = el.querySelector('img'); const imageUrl = imgEl ? (imgEl.src || imgEl.dataset.src || '') : ''; // Try to get category from element classes let category = catName || ''; if (!category) { const classes = el.className || ''; const catMatch = classes.match(/product_cat-([a-z0-9-]+)/); if (catMatch) { category = catMatch[1].replace(/-/g, ' ').replace(/\b\w/g, c => c.toUpperCase()); } } // Get post ID for clicking later const idMatch = (el.className || '').match(/post-(\d+)/); const postId = idMatch ? idMatch[1] : ''; products.push({ name, price, description, imageUrl, category, postId }); }); return products; }, catOverride); }; // Scrape products — either by clicking tabs or visiting category pages if (useTabs) { // Click each tab (prevent navigation) and scrape products that appear for (const tab of categoryTabs) { try { log("Clicking tab: " + tab.name); const tabName = tab.name; await page.evaluate((name) => { const btns = document.querySelectorAll('li.tabs a, a.catabtn, .category-tab a'); for (const btn of btns) { if (btn.textContent.trim() === name) { btn.addEventListener('click', e => e.preventDefault(), { once: true }); btn.click(); break; } } }, tabName); await page.waitForTimeout(2500); const catProducts = await scrapeInlineProducts(tab.name); for (const p of catProducts) { if (!allProducts.find(ep => ep.name === p.name)) { allProducts.push(p); } } log(" -> " + catProducts.length + " products"); } catch (e) { log("Error on tab " + tab.name + ": " + e.message); } } } else { // Scrape homepage products first let homeProducts = await scrapeInlineProducts(null); log("Found " + homeProducts.length + " products on homepage"); allProducts.push(...homeProducts); // Visit each category page if (categoryLinks.length > 0) { for (const cat of categoryLinks) { if (visitedUrls.has(cat.href)) continue; visitedUrls.add(cat.href); try { log("Visiting category: " + cat.name); await page.goto(cat.href, { waitUntil: "networkidle", timeout: 30000 }); await page.waitForTimeout(2000); let pageNum = 1; while (pageNum <= 10) { const catProducts = await scrapeInlineProducts(cat.name); for (const p of catProducts) { if (!allProducts.find(ep => ep.name === p.name)) { allProducts.push(p); } } const nextUrl = await page.evaluate(() => { const next = document.querySelector('.woocommerce-pagination .next, a.next.page-numbers'); return next ? next.href : null; }); if (!nextUrl) break; pageNum++; await page.goto(nextUrl, { waitUntil: "networkidle", timeout: 30000 }); await page.waitForTimeout(1500); } } catch (e) { log("Error on category " + cat.name + ": " + e.message); } } } } log("Total unique products: " + allProducts.length); // Strategy 2: If no inline products found, try standard product links if (allProducts.length === 0) { log("No inline products - trying product link approach"); await page.goto(url, { waitUntil: "networkidle", timeout: 30000 }); await page.waitForTimeout(3000); const productLinks = await page.evaluate(() => { const anchors = document.querySelectorAll('a[href*="/product/"], a.woocommerce-LoopProduct-link'); return [...new Set([...anchors].map(a => a.href))]; }); log("Found " + productLinks.length + " product links"); for (let i = 0; i < productLinks.length; i++) { try { await page.goto(productLinks[i], { waitUntil: "domcontentloaded", timeout: 30000 }); await page.waitForTimeout(1000); const pd = await page.evaluate(() => { const nameEl = document.querySelector('.product_title, h1.entry-title'); const name = nameEl ? nameEl.textContent.trim() : ''; const priceEl = document.querySelector('.summary .price .woocommerce-Price-amount'); let price = 0; if (priceEl) { const m = priceEl.textContent.match(/\$?([\d.]+)/); if (m) price = parseFloat(m[1]) || 0; } const descEl = document.querySelector('.woocommerce-product-details__short-description'); const desc = descEl ? descEl.textContent.trim().substring(0, 200) : ''; const imgEl = document.querySelector('.woocommerce-product-gallery__image img'); const img = imgEl ? (imgEl.src || '') : ''; const catEl = document.querySelector('.posted_in a'); const cat = catEl ? catEl.textContent.trim() : ''; return { name, price, description: desc, imageUrl: img, category: cat, postId: '' }; }); if (pd.name) allProducts.push(pd); } catch (e) { log("Error on product link: " + e.message); } } } // Now extract modifiers by visiting individual product pages const modifierGroupsMap = {}; const itemModifierMap = {}; // For inline products, we need their permalink - try /product/{slug} or ?p={postId} for (let i = 0; i < allProducts.length; i++) { const prod = allProducts[i]; log(`[${i + 1}/${allProducts.length}] Extracting modifiers for: ${prod.name}`); // Build product URL from name slug or postId let productUrl = ''; if (prod.postId) { productUrl = url.replace(/\/$/, '') + '/?p=' + prod.postId; } else { const slug = prod.name.toLowerCase().replace(/[^a-z0-9]+/g, '-').replace(/^-|-$/g, ''); productUrl = url.replace(/\/$/, '') + '/product/' + slug + '/'; } try { await page.goto(productUrl, { waitUntil: "domcontentloaded", timeout: 20000 }); await page.waitForTimeout(1500); const groups = await page.evaluate(() => { const results = []; const seen = new Set(); // Helper: clean TMEPO value strings like "Small_0" -> "Small" const cleanVal = (v) => v ? v.replace(/_\d+$/, '').trim() : ''; // Helper: parse price from data-rules JSON or data-price attr const parseRulesPrice = (input) => { if (!input) return 0; const rules = input.getAttribute('data-rules'); if (rules) { try { const arr = JSON.parse(rules); if (Array.isArray(arr) && arr.length > 0) { const v = parseFloat(arr[0]); if (!isNaN(v) && v > 0) return v; } } catch(e) {} } const dp = input.getAttribute('data-price'); if (dp) { const v = parseFloat(dp); if (!isNaN(v) && v > 0) return v; } return 0; }; // Prefer granular containers (individual field cells) over broad row containers // tm-cell with cpf-type are individual modifier groups; tc-cell.tcell are TMEPO cells const granular = document.querySelectorAll('.tm-cell[class*="cpf-type-"], .tc-cell.tcell'); const broad = document.querySelectorAll('.tc-row, .tm-row, .cpf-section'); // Use granular if they have labels, otherwise fall back to broad const granularWithLabels = [...granular].filter(el => el.querySelector('.tm-epo-field-label label, .tm-epo-element-label, h3.tm-epo-field-label, label:first-of-type') ); const elements = granularWithLabels.length > 0 ? granularWithLabels : (broad.length > 0 ? broad : granular); elements.forEach(section => { const labelEl = section.querySelector('.tm-epo-field-label label, .tm-epo-element-label, h3.tm-epo-field-label, label:first-of-type'); if (!labelEl) return; const groupName = labelEl.textContent.trim(); if (!groupName || groupName.length > 80 || seen.has(groupName)) return; if (/special request|sandwich name|your name|instructions|quantity/i.test(groupName)) return; seen.add(groupName); const options = []; let groupType = 'select'; // default // Radio buttons and checkboxes — get name from input.value, price from data-rules const radios = section.querySelectorAll('input[type="radio"]'); const checkboxes = section.querySelectorAll('input[type="checkbox"]'); if (radios.length > 0) groupType = 'radio'; else if (checkboxes.length > 0) groupType = 'checkbox'; // Check if this checkbox group has both preselected and non-preselected (split into two groups) const hasPreselected = checkboxes.length > 0 && [...checkboxes].some(c => c.className.includes('custom-preselected')); const hasAdditions = checkboxes.length > 0 && [...checkboxes].some(c => !c.className.includes('custom-preselected')); const shouldSplit = hasPreselected && hasAdditions; const additionOptions = []; // only used if splitting section.querySelectorAll('.tmcp-field-wrap, .tm-field-wrap, label.tm-epo-field-label-wrap').forEach(wrap => { const input = wrap.querySelector('input[type="radio"], input[type="checkbox"]'); if (input) { const optName = cleanVal(input.value); if (!optName || optName.length > 80) return; const optPrice = parseRulesPrice(input); const selected = input.checked || wrap.classList.contains('tc-active'); const isPreselected = input.className.includes('custom-preselected'); // Skip disabled duplicates (size variants) if (input.disabled || input.className.includes('tcdisabled')) return; const entry = { name: optName, price: optPrice, selected }; if (shouldSplit && !isPreselected) { // Deduplicate if (!additionOptions.find(o => o.name === optName)) { additionOptions.push(entry); } } else { // Deduplicate if (!options.find(o => o.name === optName)) { options.push(entry); } } return; } // Fallback: try label text const lbl = wrap.querySelector('.tm-label, .tm-value, label span:not(.tm-price)'); if (lbl) { const optName = lbl.textContent.replace(/[\n\r\t]+/g, ' ').trim(); if (!optName || optName.length > 80) return; let optPrice = 0; const priceSpan = wrap.querySelector('.tm-price, .price .amount, [class*="price"]'); if (priceSpan) { const m = priceSpan.textContent.match(/\+?\$?([\d.]+)/); if (m) optPrice = parseFloat(m[1]) || 0; } if (!options.find(o => o.name === optName)) { options.push({ name: optName, price: optPrice, selected: false }); } } }); // Select dropdowns — get name from option.value, price from data-price if (options.length === 0) { section.querySelectorAll('select option').forEach(opt => { if (!opt.value) return; let optName = cleanVal(opt.value); if (!optName || optName.length > 80) return; let optPrice = 0; const dp = opt.getAttribute('data-price'); if (dp) { const v = parseFloat(dp); if (!isNaN(v) && v > 0) optPrice = v; } const text = opt.textContent.trim(); if (text && text.length < 80 && text !== optName) { const m = text.match(/\+?\$?([\d.]+)/); if (m) optPrice = optPrice || (parseFloat(m[1]) || 0); optName = text.replace(/\s*\(\+?\$?[\d.]+\)\s*$/, '').trim() || optName; } options.push({ name: optName, price: optPrice, selected: opt.selected }); }); } if (options.length > 0) { const required = section.querySelector('.required, [data-required="1"]') !== null; results.push({ name: groupName, type: groupType, options, required }); } }); // Standard WooCommerce variations fallback if (results.length === 0) { const vForm = document.querySelector('.variations_form'); if (vForm) { vForm.querySelectorAll('.variations tr').forEach(row => { const lbl = row.querySelector('th label, .label label'); const sel = row.querySelector('select'); if (lbl && sel) { const opts = [...sel.querySelectorAll('option')].filter(o => o.value).map(o => ({ name: o.textContent.trim(), price: 0 })); if (opts.length > 0) results.push({ name: lbl.textContent.trim(), options: opts, required: true }); } }); } } return results; }); // If item has no price, try to get it from the product page if (prod.price === 0) { const pagePrice = await page.evaluate(() => { const pe = document.querySelector('.summary .price .woocommerce-Price-amount, .summary .price .amount, .product .price .amount'); if (pe) { const m = pe.textContent.match(/\$?([\d.]+)/); if (m) { const v = parseFloat(m[1]); if (v > 0) return v; } } const hid = document.querySelector('input.cpf-product-price'); if (hid && hid.value) { const v = parseFloat(hid.value); if (v > 0) return v; } return 0; }); if (pagePrice > 0) { prod.price = pagePrice; } else if (groups.length > 0) { // Use lowest price from first modifier group that has prices for (const g of groups) { const prices = g.options.map(o => o.price).filter(p => p > 0); if (prices.length > 0) { prod.price = Math.min(...prices); break; } } } } // Convert any modifier group with absolute prices to relative (subtract base price) if (prod.price > 0 && groups.length > 0) { for (const g of groups) { const prices = g.options.map(o => o.price).filter(p => p > 0); if (prices.length > 0) { const minPrice = Math.min(...prices); // Only convert if options have prices near or above the item base price (absolute pricing) if (minPrice >= prod.price * 0.8) { for (const opt of g.options) { if (opt.price > 0) { opt.price = Math.round((opt.price - prod.price) * 100) / 100; if (opt.price < 0) opt.price = 0; } } } } } } if (groups.length > 0) { const itemModGroups = []; for (const g of groups) { if (!modifierGroupsMap[g.name]) { modifierGroupsMap[g.name] = { name: g.name, type: g.type || 'select', options: g.options, required: g.required, minSelections: g.required ? 1 : 0, maxSelections: g.type === 'radio' || g.type === 'select' ? 1 : 0 }; } itemModGroups.push(g.name); } itemModifierMap[prod.name] = itemModGroups; log(" -> " + groups.length + " modifier groups" + (prod.price > 0 ? " ($" + prod.price + ")" : "")); } } catch (e) { log(" -> Error: " + e.message); } } const modifiers = Object.values(modifierGroupsMap); const stats = { totalProducts: allProducts.length, itemsExtracted: allProducts.length, modifierGroups: modifiers.length, itemsWithModifiers: Object.keys(itemModifierMap).length }; log("Done: " + stats.itemsExtracted + " items, " + stats.modifierGroups + " modifier groups, " + stats.itemsWithModifiers + " items with modifiers"); console.log(JSON.stringify({ business: businessInfo, items: allProducts, modifiers, itemModifierMap, stats })); } catch (err) { log("Fatal: " + err.message); console.log(JSON.stringify({ error: err.message })); process.exit(1); } finally { if (browser) await browser.close(); } })();