This repository has been archived on 2026-03-21. You can view files and clone it, but cannot push or open issues or pull requests.
payfrit-biz/playwright/woo-modifiers.js
John Mizerek dd2a508680 Add playwright scripts to git
Previously only lived on servers at /opt/playwright/. Now tracked in repo.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-10 11:22:34 -07:00

497 lines
21 KiB
JavaScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

const { chromium } = require("playwright");
(async () => {
const url = process.argv[2];
if (!url) {
console.log(JSON.stringify({ error: "URL required" }));
process.exit(1);
}
const log = (msg) => process.stderr.write("[woo-mod] " + msg + "\n");
let browser;
try {
browser = await chromium.launch({ headless: true });
const context = await browser.newContext({
userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
});
const page = await context.newPage();
log("Navigating to " + url);
await page.goto(url, { waitUntil: "networkidle", timeout: 60000 });
await page.waitForTimeout(3000);
// Close any popups/modals
try {
const closeButtons = await page.$$('.close, .modal .close, [aria-label="Close"]');
for (const btn of closeButtons) {
if (await btn.isVisible()) await btn.click().catch(() => {});
}
} catch (e) {}
// Extract business info from the page
const businessInfo = await page.evaluate(() => {
const info = { name: '', address: '', phone: '', hours: '' };
// Try common selectors for business name
const nameEl = document.querySelector('.site-title, .logo-text, h1.site-title, .custom-logo-link img, title');
if (nameEl) {
info.name = nameEl.alt || nameEl.textContent || '';
info.name = info.name.replace(/\s*[-|].*$/, '').trim(); // strip taglines
}
// Try page title as fallback
if (!info.name && document.title) {
info.name = document.title.replace(/\s*[-|].*$/, '').trim();
}
// Look for address/phone in common locations
const bodyText = document.body.innerText;
// Phone
const phoneMatch = bodyText.match(/(?:Call|Phone|Tel)[:\s]*\(?\d{3}\)?[\s.-]?\d{3}[\s.-]?\d{4}/i) ||
bodyText.match(/\(?\d{3}\)?[\s.-]\d{3}[\s.-]\d{4}/);
if (phoneMatch) info.phone = phoneMatch[0].replace(/^(?:Call|Phone|Tel)[:\s]*/i, '').trim();
// Address - look for street patterns
const addrMatch = bodyText.match(/\d{1,5}\s+[A-Z][a-zA-Z\s]+(?:St|Ave|Blvd|Dr|Rd|Ln|Way|Ct|Pl|Cir)[.,]?\s*(?:[A-Z][a-zA-Z\s]+,?\s*[A-Z]{2}\s*\d{5})?/);
if (addrMatch) info.address = addrMatch[0].trim();
return info;
});
log("Business: " + businessInfo.name + " | " + businessInfo.address + " | " + businessInfo.phone);
// Strategy 1: Products displayed inline on the page (custom WooCommerce themes)
// Collect products from all category tabs/pages
let allProducts = [];
// Check for category tabs (clickable, same-page) vs category links (separate pages)
const categoryTabs = await page.evaluate(() => {
const tabs = document.querySelectorAll('li.tabs, .category-tab, [data-filter]');
return [...tabs].map((t, i) => ({ index: i, name: t.textContent.trim(), active: t.classList.contains('active') }))
.filter(t => t.name.length > 0 && t.name.length < 60);
});
const categoryLinks = await page.evaluate(() => {
// Only use links if no tabs found — some themes use links that 404
const cats = document.querySelectorAll('.product-category a, .product_cat a');
return [...cats].map(a => ({ href: a.href, name: a.textContent.trim() })).filter(c => c.name.length > 0 && c.name.length < 60);
});
const useTabs = categoryTabs.length > 0;
log("Found " + categoryTabs.length + " category tabs, " + categoryLinks.length + " category links" + (useTabs ? " (using tabs)" : ""));
const visitedUrls = new Set([page.url()]);
// Scrape current page first
const scrapeInlineProducts = async (catOverride) => {
return await page.evaluate((catName) => {
const products = [];
const productEls = document.querySelectorAll('.product-con-box, li.product, .type-product, .product-item');
productEls.forEach(el => {
const nameEl = el.querySelector('.woocommerce-loop-product__title, h2, h3, .product-title');
if (!nameEl) return;
const name = nameEl.textContent.trim();
if (!name) return;
const descEl = el.querySelector('.woocommerce-product-details__short-description, .description, .short-description');
const description = descEl ? descEl.textContent.trim().substring(0, 200) : '';
let price = 0;
const priceEl = el.querySelector('.price .woocommerce-Price-amount, .price ins .amount, .price');
if (priceEl) {
const m = priceEl.textContent.match(/\$?([\d.]+)/);
if (m) price = parseFloat(m[1]) || 0;
}
const imgEl = el.querySelector('img');
const imageUrl = imgEl ? (imgEl.src || imgEl.dataset.src || '') : '';
// Try to get category from element classes
let category = catName || '';
if (!category) {
const classes = el.className || '';
const catMatch = classes.match(/product_cat-([a-z0-9-]+)/);
if (catMatch) {
category = catMatch[1].replace(/-/g, ' ').replace(/\b\w/g, c => c.toUpperCase());
}
}
// Get post ID for clicking later
const idMatch = (el.className || '').match(/post-(\d+)/);
const postId = idMatch ? idMatch[1] : '';
products.push({ name, price, description, imageUrl, category, postId });
});
return products;
}, catOverride);
};
// Scrape products — either by clicking tabs or visiting category pages
if (useTabs) {
// Click each tab (prevent navigation) and scrape products that appear
for (const tab of categoryTabs) {
try {
log("Clicking tab: " + tab.name);
const tabName = tab.name;
await page.evaluate((name) => {
const btns = document.querySelectorAll('li.tabs a, a.catabtn, .category-tab a');
for (const btn of btns) {
if (btn.textContent.trim() === name) {
btn.addEventListener('click', e => e.preventDefault(), { once: true });
btn.click();
break;
}
}
}, tabName);
await page.waitForTimeout(2500);
const catProducts = await scrapeInlineProducts(tab.name);
for (const p of catProducts) {
if (!allProducts.find(ep => ep.name === p.name)) {
allProducts.push(p);
}
}
log(" -> " + catProducts.length + " products");
} catch (e) {
log("Error on tab " + tab.name + ": " + e.message);
}
}
} else {
// Scrape homepage products first
let homeProducts = await scrapeInlineProducts(null);
log("Found " + homeProducts.length + " products on homepage");
allProducts.push(...homeProducts);
// Visit each category page
if (categoryLinks.length > 0) {
for (const cat of categoryLinks) {
if (visitedUrls.has(cat.href)) continue;
visitedUrls.add(cat.href);
try {
log("Visiting category: " + cat.name);
await page.goto(cat.href, { waitUntil: "networkidle", timeout: 30000 });
await page.waitForTimeout(2000);
let pageNum = 1;
while (pageNum <= 10) {
const catProducts = await scrapeInlineProducts(cat.name);
for (const p of catProducts) {
if (!allProducts.find(ep => ep.name === p.name)) {
allProducts.push(p);
}
}
const nextUrl = await page.evaluate(() => {
const next = document.querySelector('.woocommerce-pagination .next, a.next.page-numbers');
return next ? next.href : null;
});
if (!nextUrl) break;
pageNum++;
await page.goto(nextUrl, { waitUntil: "networkidle", timeout: 30000 });
await page.waitForTimeout(1500);
}
} catch (e) {
log("Error on category " + cat.name + ": " + e.message);
}
}
}
}
log("Total unique products: " + allProducts.length);
// Strategy 2: If no inline products found, try standard product links
if (allProducts.length === 0) {
log("No inline products - trying product link approach");
await page.goto(url, { waitUntil: "networkidle", timeout: 30000 });
await page.waitForTimeout(3000);
const productLinks = await page.evaluate(() => {
const anchors = document.querySelectorAll('a[href*="/product/"], a.woocommerce-LoopProduct-link');
return [...new Set([...anchors].map(a => a.href))];
});
log("Found " + productLinks.length + " product links");
for (let i = 0; i < productLinks.length; i++) {
try {
await page.goto(productLinks[i], { waitUntil: "domcontentloaded", timeout: 30000 });
await page.waitForTimeout(1000);
const pd = await page.evaluate(() => {
const nameEl = document.querySelector('.product_title, h1.entry-title');
const name = nameEl ? nameEl.textContent.trim() : '';
const priceEl = document.querySelector('.summary .price .woocommerce-Price-amount');
let price = 0;
if (priceEl) { const m = priceEl.textContent.match(/\$?([\d.]+)/); if (m) price = parseFloat(m[1]) || 0; }
const descEl = document.querySelector('.woocommerce-product-details__short-description');
const desc = descEl ? descEl.textContent.trim().substring(0, 200) : '';
const imgEl = document.querySelector('.woocommerce-product-gallery__image img');
const img = imgEl ? (imgEl.src || '') : '';
const catEl = document.querySelector('.posted_in a');
const cat = catEl ? catEl.textContent.trim() : '';
return { name, price, description: desc, imageUrl: img, category: cat, postId: '' };
});
if (pd.name) allProducts.push(pd);
} catch (e) {
log("Error on product link: " + e.message);
}
}
}
// Now extract modifiers by visiting individual product pages
const modifierGroupsMap = {};
const itemModifierMap = {};
// For inline products, we need their permalink - try /product/{slug} or ?p={postId}
for (let i = 0; i < allProducts.length; i++) {
const prod = allProducts[i];
log(`[${i + 1}/${allProducts.length}] Extracting modifiers for: ${prod.name}`);
// Build product URL from name slug or postId
let productUrl = '';
if (prod.postId) {
productUrl = url.replace(/\/$/, '') + '/?p=' + prod.postId;
} else {
const slug = prod.name.toLowerCase().replace(/[^a-z0-9]+/g, '-').replace(/^-|-$/g, '');
productUrl = url.replace(/\/$/, '') + '/product/' + slug + '/';
}
try {
await page.goto(productUrl, { waitUntil: "domcontentloaded", timeout: 20000 });
await page.waitForTimeout(1500);
const groups = await page.evaluate(() => {
const results = [];
const seen = new Set();
// Helper: clean TMEPO value strings like "Small_0" -> "Small"
const cleanVal = (v) => v ? v.replace(/_\d+$/, '').trim() : '';
// Helper: parse price from data-rules JSON or data-price attr
const parseRulesPrice = (input) => {
if (!input) return 0;
const rules = input.getAttribute('data-rules');
if (rules) {
try {
const arr = JSON.parse(rules);
if (Array.isArray(arr) && arr.length > 0) {
const v = parseFloat(arr[0]);
if (!isNaN(v) && v > 0) return v;
}
} catch(e) {}
}
const dp = input.getAttribute('data-price');
if (dp) { const v = parseFloat(dp); if (!isNaN(v) && v > 0) return v; }
return 0;
};
// Prefer granular containers (individual field cells) over broad row containers
// tm-cell with cpf-type are individual modifier groups; tc-cell.tcell are TMEPO cells
const granular = document.querySelectorAll('.tm-cell[class*="cpf-type-"], .tc-cell.tcell');
const broad = document.querySelectorAll('.tc-row, .tm-row, .cpf-section');
// Use granular if they have labels, otherwise fall back to broad
const granularWithLabels = [...granular].filter(el =>
el.querySelector('.tm-epo-field-label label, .tm-epo-element-label, h3.tm-epo-field-label, label:first-of-type')
);
const elements = granularWithLabels.length > 0 ? granularWithLabels : (broad.length > 0 ? broad : granular);
elements.forEach(section => {
const labelEl = section.querySelector('.tm-epo-field-label label, .tm-epo-element-label, h3.tm-epo-field-label, label:first-of-type');
if (!labelEl) return;
const groupName = labelEl.textContent.trim();
if (!groupName || groupName.length > 80 || seen.has(groupName)) return;
if (/special request|sandwich name|your name|instructions|quantity/i.test(groupName)) return;
seen.add(groupName);
const options = [];
let groupType = 'select'; // default
// Radio buttons and checkboxes — get name from input.value, price from data-rules
const radios = section.querySelectorAll('input[type="radio"]');
const checkboxes = section.querySelectorAll('input[type="checkbox"]');
if (radios.length > 0) groupType = 'radio';
else if (checkboxes.length > 0) groupType = 'checkbox';
// Check if this checkbox group has both preselected and non-preselected (split into two groups)
const hasPreselected = checkboxes.length > 0 && [...checkboxes].some(c => c.className.includes('custom-preselected'));
const hasAdditions = checkboxes.length > 0 && [...checkboxes].some(c => !c.className.includes('custom-preselected'));
const shouldSplit = hasPreselected && hasAdditions;
const additionOptions = []; // only used if splitting
section.querySelectorAll('.tmcp-field-wrap, .tm-field-wrap, label.tm-epo-field-label-wrap').forEach(wrap => {
const input = wrap.querySelector('input[type="radio"], input[type="checkbox"]');
if (input) {
const optName = cleanVal(input.value);
if (!optName || optName.length > 80) return;
const optPrice = parseRulesPrice(input);
const selected = input.checked || wrap.classList.contains('tc-active');
const isPreselected = input.className.includes('custom-preselected');
// Skip disabled duplicates (size variants)
if (input.disabled || input.className.includes('tcdisabled')) return;
const entry = { name: optName, price: optPrice, selected };
if (shouldSplit && !isPreselected) {
// Deduplicate
if (!additionOptions.find(o => o.name === optName)) {
additionOptions.push(entry);
}
} else {
// Deduplicate
if (!options.find(o => o.name === optName)) {
options.push(entry);
}
}
return;
}
// Fallback: try label text
const lbl = wrap.querySelector('.tm-label, .tm-value, label span:not(.tm-price)');
if (lbl) {
const optName = lbl.textContent.replace(/[\n\r\t]+/g, ' ').trim();
if (!optName || optName.length > 80) return;
let optPrice = 0;
const priceSpan = wrap.querySelector('.tm-price, .price .amount, [class*="price"]');
if (priceSpan) {
const m = priceSpan.textContent.match(/\+?\$?([\d.]+)/);
if (m) optPrice = parseFloat(m[1]) || 0;
}
if (!options.find(o => o.name === optName)) {
options.push({ name: optName, price: optPrice, selected: false });
}
}
});
// Select dropdowns — get name from option.value, price from data-price
if (options.length === 0) {
section.querySelectorAll('select option').forEach(opt => {
if (!opt.value) return;
let optName = cleanVal(opt.value);
if (!optName || optName.length > 80) return;
let optPrice = 0;
const dp = opt.getAttribute('data-price');
if (dp) { const v = parseFloat(dp); if (!isNaN(v) && v > 0) optPrice = v; }
const text = opt.textContent.trim();
if (text && text.length < 80 && text !== optName) {
const m = text.match(/\+?\$?([\d.]+)/);
if (m) optPrice = optPrice || (parseFloat(m[1]) || 0);
optName = text.replace(/\s*\(\+?\$?[\d.]+\)\s*$/, '').trim() || optName;
}
options.push({ name: optName, price: optPrice, selected: opt.selected });
});
}
if (options.length > 0) {
const required = section.querySelector('.required, [data-required="1"]') !== null;
results.push({ name: groupName, type: groupType, options, required });
}
});
// Standard WooCommerce variations fallback
if (results.length === 0) {
const vForm = document.querySelector('.variations_form');
if (vForm) {
vForm.querySelectorAll('.variations tr').forEach(row => {
const lbl = row.querySelector('th label, .label label');
const sel = row.querySelector('select');
if (lbl && sel) {
const opts = [...sel.querySelectorAll('option')].filter(o => o.value).map(o => ({ name: o.textContent.trim(), price: 0 }));
if (opts.length > 0) results.push({ name: lbl.textContent.trim(), options: opts, required: true });
}
});
}
}
return results;
});
// If item has no price, try to get it from the product page
if (prod.price === 0) {
const pagePrice = await page.evaluate(() => {
const pe = document.querySelector('.summary .price .woocommerce-Price-amount, .summary .price .amount, .product .price .amount');
if (pe) {
const m = pe.textContent.match(/\$?([\d.]+)/);
if (m) { const v = parseFloat(m[1]); if (v > 0) return v; }
}
const hid = document.querySelector('input.cpf-product-price');
if (hid && hid.value) { const v = parseFloat(hid.value); if (v > 0) return v; }
return 0;
});
if (pagePrice > 0) {
prod.price = pagePrice;
} else if (groups.length > 0) {
// Use lowest price from first modifier group that has prices
for (const g of groups) {
const prices = g.options.map(o => o.price).filter(p => p > 0);
if (prices.length > 0) { prod.price = Math.min(...prices); break; }
}
}
}
// Convert any modifier group with absolute prices to relative (subtract base price)
if (prod.price > 0 && groups.length > 0) {
for (const g of groups) {
const prices = g.options.map(o => o.price).filter(p => p > 0);
if (prices.length > 0) {
const minPrice = Math.min(...prices);
// Only convert if options have prices near or above the item base price (absolute pricing)
if (minPrice >= prod.price * 0.8) {
for (const opt of g.options) {
if (opt.price > 0) {
opt.price = Math.round((opt.price - prod.price) * 100) / 100;
if (opt.price < 0) opt.price = 0;
}
}
}
}
}
}
if (groups.length > 0) {
const itemModGroups = [];
for (const g of groups) {
if (!modifierGroupsMap[g.name]) {
modifierGroupsMap[g.name] = {
name: g.name,
type: g.type || 'select',
options: g.options,
required: g.required,
minSelections: g.required ? 1 : 0,
maxSelections: g.type === 'radio' || g.type === 'select' ? 1 : 0
};
}
itemModGroups.push(g.name);
}
itemModifierMap[prod.name] = itemModGroups;
log(" -> " + groups.length + " modifier groups" + (prod.price > 0 ? " ($" + prod.price + ")" : ""));
}
} catch (e) {
log(" -> Error: " + e.message);
}
}
const modifiers = Object.values(modifierGroupsMap);
const stats = {
totalProducts: allProducts.length,
itemsExtracted: allProducts.length,
modifierGroups: modifiers.length,
itemsWithModifiers: Object.keys(itemModifierMap).length
};
log("Done: " + stats.itemsExtracted + " items, " + stats.modifierGroups + " modifier groups, " + stats.itemsWithModifiers + " items with modifiers");
console.log(JSON.stringify({
business: businessInfo,
items: allProducts,
modifiers,
itemModifierMap,
stats
}));
} catch (err) {
log("Fatal: " + err.message);
console.log(JSON.stringify({ error: err.message }));
process.exit(1);
} finally {
if (browser) await browser.close();
}
})();