Add playwright scripts to git
Previously only lived on servers at /opt/playwright/. Now tracked in repo. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
f58d567fb4
commit
dd2a508680
7 changed files with 900 additions and 0 deletions
15
playwright/package.json
Normal file
15
playwright/package.json
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
{
|
||||
"name": "playwright",
|
||||
"version": "1.0.0",
|
||||
"main": "index.js",
|
||||
"scripts": {
|
||||
"test": "echo \"Error: no test specified\" && exit 1"
|
||||
},
|
||||
"keywords": [],
|
||||
"author": "",
|
||||
"license": "ISC",
|
||||
"description": "",
|
||||
"dependencies": {
|
||||
"playwright": "^1.58.2"
|
||||
}
|
||||
}
|
||||
71
playwright/render.js
Normal file
71
playwright/render.js
Normal file
|
|
@ -0,0 +1,71 @@
|
|||
const { chromium } = require("playwright");
|
||||
|
||||
(async () => {
|
||||
const url = process.argv[2];
|
||||
const wait = parseInt(process.argv[3] || 3000);
|
||||
|
||||
if (!url) {
|
||||
console.log(JSON.stringify({ error: "URL required" }));
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const browser = await chromium.launch({ headless: true });
|
||||
const context = await browser.newContext({
|
||||
userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
||||
});
|
||||
const page = await context.newPage();
|
||||
|
||||
// Track image URLs as they load
|
||||
const images = new Set();
|
||||
page.on("response", response => {
|
||||
const ct = response.headers()["content-type"] || "";
|
||||
if (ct.includes("image/")) {
|
||||
images.add(response.url());
|
||||
}
|
||||
});
|
||||
|
||||
try {
|
||||
// Use load instead of networkidle - more reliable for sites with persistent connections
|
||||
await page.goto(url, { waitUntil: "load", timeout: 45000 });
|
||||
|
||||
// Wait for initial JS rendering
|
||||
await page.waitForTimeout(wait);
|
||||
|
||||
// Scroll the page to trigger lazy-loaded images (DoorDash, etc.)
|
||||
const scrollHeight = await page.evaluate(() => document.body.scrollHeight);
|
||||
const viewportHeight = await page.evaluate(() => window.innerHeight);
|
||||
const scrollSteps = Math.min(Math.ceil(scrollHeight / viewportHeight), 20);
|
||||
|
||||
for (let i = 0; i < scrollSteps; i++) {
|
||||
await page.evaluate((step) => {
|
||||
window.scrollTo(0, step * window.innerHeight);
|
||||
}, i + 1);
|
||||
await page.waitForTimeout(300);
|
||||
}
|
||||
|
||||
// Scroll back to top and wait for any final images
|
||||
await page.evaluate(() => window.scrollTo(0, 0));
|
||||
await page.waitForTimeout(1000);
|
||||
|
||||
// Extract images from DOM as well
|
||||
const domImages = await page.evaluate(() => {
|
||||
return Array.from(document.querySelectorAll("img"))
|
||||
.map(img => img.src)
|
||||
.filter(src => src && src.startsWith("http"));
|
||||
});
|
||||
|
||||
domImages.forEach(img => images.add(img));
|
||||
|
||||
const html = await page.content();
|
||||
|
||||
console.log(JSON.stringify({
|
||||
html: html,
|
||||
images: Array.from(images),
|
||||
url: url
|
||||
}));
|
||||
} catch (e) {
|
||||
console.log(JSON.stringify({ error: e.message, url: url }));
|
||||
}
|
||||
|
||||
await browser.close();
|
||||
})();
|
||||
3
playwright/run-toast-modifiers.sh
Normal file
3
playwright/run-toast-modifiers.sh
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
#!/bin/bash
|
||||
export PLAYWRIGHT_BROWSERS_PATH=/opt/playwright/browsers
|
||||
exec /usr/bin/node /opt/playwright/toast-modifiers.js "$@"
|
||||
3
playwright/run-woo-modifiers.sh
Normal file
3
playwright/run-woo-modifiers.sh
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
#!/bin/bash
|
||||
export PLAYWRIGHT_BROWSERS_PATH=/opt/playwright/browsers
|
||||
exec /usr/bin/node /opt/playwright/woo-modifiers.js "$@"
|
||||
3
playwright/run.sh
Normal file
3
playwright/run.sh
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
#!/bin/bash
|
||||
export PLAYWRIGHT_BROWSERS_PATH=/opt/playwright/browsers
|
||||
exec /usr/bin/node /opt/playwright/render.js "$@"
|
||||
308
playwright/toast-modifiers.js
Normal file
308
playwright/toast-modifiers.js
Normal file
|
|
@ -0,0 +1,308 @@
|
|||
const { chromium } = require("playwright");
|
||||
|
||||
(async () => {
|
||||
const url = process.argv[2];
|
||||
if (!url) {
|
||||
console.log(JSON.stringify({ error: "URL required" }));
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const log = (msg) => process.stderr.write("[toast-mod] " + msg + "\n");
|
||||
|
||||
let browser;
|
||||
try {
|
||||
browser = await chromium.launch({ headless: true });
|
||||
const context = await browser.newContext({
|
||||
userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
|
||||
});
|
||||
const page = await context.newPage();
|
||||
|
||||
// Set up GraphQL response interceptor BEFORE navigation to catch everything
|
||||
let latestResponse = null;
|
||||
let responseCount = 0;
|
||||
|
||||
page.on("response", async (response) => {
|
||||
try {
|
||||
const responseUrl = response.url();
|
||||
if ((responseUrl.includes("graphql") || responseUrl.includes("federated-gateway"))) {
|
||||
const ct = response.headers()["content-type"] || "";
|
||||
if (ct.includes("json")) {
|
||||
const rawBody = await response.json();
|
||||
responseCount++;
|
||||
const responses = Array.isArray(rawBody) ? rawBody : [rawBody];
|
||||
for (const body of responses) {
|
||||
if (!body || !body.data) continue;
|
||||
if (body.data.menuItemDetails) {
|
||||
const details = body.data.menuItemDetails;
|
||||
if (details.modifierGroups && Array.isArray(details.modifierGroups) && details.modifierGroups.length > 0) {
|
||||
latestResponse = details;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (e) {}
|
||||
});
|
||||
|
||||
log("Navigating to " + url);
|
||||
await page.goto(url, { waitUntil: "load", timeout: 60000 });
|
||||
await page.waitForTimeout(5000);
|
||||
|
||||
const title = await page.title();
|
||||
log("Page title: " + title);
|
||||
|
||||
let hasOoState = await page.evaluate(() => !!window.__OO_STATE__);
|
||||
if (!hasOoState) {
|
||||
log("No __OO_STATE__ yet, waiting 10 more seconds...");
|
||||
await page.waitForTimeout(10000);
|
||||
hasOoState = await page.evaluate(() => !!window.__OO_STATE__);
|
||||
if (!hasOoState) {
|
||||
console.log(JSON.stringify({ error: "No __OO_STATE__ found", items: [], modifiers: [], itemModifierMap: {} }));
|
||||
await browser.close();
|
||||
process.exit(0);
|
||||
}
|
||||
}
|
||||
|
||||
// Extract items
|
||||
const ooData = await page.evaluate(() => {
|
||||
const state = window.__OO_STATE__ || {};
|
||||
const items = [];
|
||||
|
||||
for (const key of Object.keys(state)) {
|
||||
if (!key.startsWith("Menu:")) continue;
|
||||
const menu = state[key];
|
||||
if (!menu.groups || !Array.isArray(menu.groups)) continue;
|
||||
|
||||
for (const group of menu.groups) {
|
||||
const groupName = group.name || "Menu";
|
||||
|
||||
if (group.items && Array.isArray(group.items)) {
|
||||
for (const item of group.items) {
|
||||
if (item.name) {
|
||||
items.push({
|
||||
name: item.name.trim(),
|
||||
guid: item.guid || "",
|
||||
itemGroupGuid: item.itemGroupGuid || "",
|
||||
hasModifiers: !!item.hasModifiers,
|
||||
category: groupName
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const subs = group.subgroups || group.children || group.childGroups || [];
|
||||
for (const sub of subs) {
|
||||
if (sub.items && Array.isArray(sub.items)) {
|
||||
for (const item of sub.items) {
|
||||
if (item.name) {
|
||||
items.push({
|
||||
name: item.name.trim(),
|
||||
guid: item.guid || "",
|
||||
itemGroupGuid: item.itemGroupGuid || "",
|
||||
hasModifiers: !!item.hasModifiers,
|
||||
category: sub.name || groupName
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return items;
|
||||
});
|
||||
|
||||
log("Found " + ooData.length + " items, " + ooData.filter(i => i.hasModifiers).length + " with modifiers");
|
||||
|
||||
const modifierItems = ooData.filter(i => i.hasModifiers);
|
||||
|
||||
if (modifierItems.length === 0) {
|
||||
console.log(JSON.stringify({ items: ooData, modifiers: [], itemModifierMap: {} }));
|
||||
await browser.close();
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
// OPTIMIZATION: Deduplicate by itemGroupGuid - only click one representative per group
|
||||
const guidToItems = new Map(); // itemGroupGuid -> [items]
|
||||
const noGuidItems = []; // items without itemGroupGuid
|
||||
|
||||
for (const item of modifierItems) {
|
||||
if (item.itemGroupGuid) {
|
||||
if (!guidToItems.has(item.itemGroupGuid)) {
|
||||
guidToItems.set(item.itemGroupGuid, []);
|
||||
}
|
||||
guidToItems.get(item.itemGroupGuid).push(item);
|
||||
} else {
|
||||
noGuidItems.push(item);
|
||||
}
|
||||
}
|
||||
|
||||
// Build click list: one item per unique itemGroupGuid + all items without a guid
|
||||
const clickList = [];
|
||||
for (const [guid, items] of guidToItems) {
|
||||
clickList.push(items[0]); // representative
|
||||
}
|
||||
for (const item of noGuidItems) {
|
||||
clickList.push(item);
|
||||
}
|
||||
|
||||
log("Deduplicated: " + modifierItems.length + " modifier items -> " + clickList.length + " unique groups to click (" + guidToItems.size + " guids + " + noGuidItems.length + " ungrouped)");
|
||||
|
||||
// Click items to extract modifier data
|
||||
const allModifierGroups = new Map();
|
||||
const itemModifierMap = {};
|
||||
let clickedCount = 0;
|
||||
let failedClicks = 0;
|
||||
|
||||
function processModGroups(groups, prefix) {
|
||||
if (!Array.isArray(groups)) return [];
|
||||
const modNames = [];
|
||||
for (const mg of groups) {
|
||||
const fullName = prefix ? prefix + " > " + mg.name : mg.name;
|
||||
const guid = mg.guid || fullName;
|
||||
|
||||
if (!allModifierGroups.has(guid)) {
|
||||
const options = [];
|
||||
if (mg.modifiers && Array.isArray(mg.modifiers)) {
|
||||
for (const mod of mg.modifiers) {
|
||||
const price = typeof mod.price === "number" ? mod.price : 0;
|
||||
options.push({ name: mod.name || "", price: price });
|
||||
|
||||
if (mod.modifierGroups && Array.isArray(mod.modifierGroups) && mod.modifierGroups.length > 0) {
|
||||
const nestedNames = processModGroups(mod.modifierGroups, fullName);
|
||||
modNames.push(...nestedNames);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
allModifierGroups.set(guid, {
|
||||
guid: guid,
|
||||
name: mg.name || "",
|
||||
required: (mg.minSelections || 0) > 0,
|
||||
minSelections: mg.minSelections || 0,
|
||||
maxSelections: mg.maxSelections || 0,
|
||||
options: options
|
||||
});
|
||||
}
|
||||
|
||||
modNames.push(allModifierGroups.get(guid).name);
|
||||
}
|
||||
return modNames;
|
||||
}
|
||||
|
||||
// Only click the deduplicated clickList
|
||||
for (const item of clickList) {
|
||||
try {
|
||||
latestResponse = null;
|
||||
const countBefore = responseCount;
|
||||
|
||||
// Find and click the item
|
||||
const headerLocator = page.locator(".headerText").filter({
|
||||
hasText: new RegExp("^" + item.name.replace(/[.*+?^${}()|[\]\\]/g, "\\$&") + "$")
|
||||
}).first();
|
||||
|
||||
if (await headerLocator.count() === 0) {
|
||||
failedClicks++;
|
||||
log("Not found on page: " + item.name);
|
||||
continue;
|
||||
}
|
||||
|
||||
const clickable = headerLocator.locator("xpath=ancestor::*[contains(@class,'clickable')]").first();
|
||||
if (await clickable.count() > 0) {
|
||||
await clickable.scrollIntoViewIfNeeded();
|
||||
await clickable.click({ timeout: 3000 });
|
||||
} else {
|
||||
await headerLocator.scrollIntoViewIfNeeded();
|
||||
await headerLocator.click({ timeout: 3000 });
|
||||
}
|
||||
|
||||
clickedCount++;
|
||||
|
||||
// Wait for GraphQL response (up to 6s)
|
||||
const startTime = Date.now();
|
||||
while (!latestResponse && Date.now() - startTime < 6000) {
|
||||
await page.waitForTimeout(200);
|
||||
}
|
||||
|
||||
if (latestResponse && latestResponse.modifierGroups) {
|
||||
const names = processModGroups(latestResponse.modifierGroups, "");
|
||||
// Map the clicked item
|
||||
itemModifierMap[item.name] = names;
|
||||
|
||||
// OPTIMIZATION: Immediately map all siblings with same itemGroupGuid
|
||||
if (item.itemGroupGuid && guidToItems.has(item.itemGroupGuid)) {
|
||||
for (const sibling of guidToItems.get(item.itemGroupGuid)) {
|
||||
if (sibling.name !== item.name) {
|
||||
itemModifierMap[sibling.name] = names;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Close modal
|
||||
await page.keyboard.press("Escape");
|
||||
await page.waitForTimeout(400);
|
||||
|
||||
} catch (e) {
|
||||
log("Error clicking " + item.name + ": " + e.message);
|
||||
try { await page.keyboard.press("Escape"); } catch (e2) {}
|
||||
await page.waitForTimeout(300);
|
||||
}
|
||||
}
|
||||
|
||||
const directMapped = Object.keys(itemModifierMap).length;
|
||||
log("Clicked: " + clickedCount + "/" + clickList.length + ", Mapped: " + directMapped + "/" + modifierItems.length);
|
||||
|
||||
// Final fallback: any remaining unmapped items, try to infer from category siblings
|
||||
let inferredCount = 0;
|
||||
for (const item of modifierItems) {
|
||||
if (itemModifierMap[item.name]) continue;
|
||||
if (!item.itemGroupGuid) continue;
|
||||
|
||||
for (const mappedName of Object.keys(itemModifierMap)) {
|
||||
const mappedItem = modifierItems.find(i => i.name === mappedName);
|
||||
if (mappedItem && mappedItem.itemGroupGuid === item.itemGroupGuid) {
|
||||
itemModifierMap[item.name] = itemModifierMap[mappedName];
|
||||
inferredCount++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (inferredCount > 0) {
|
||||
log("Inferred modifiers for " + inferredCount + " additional items via itemGroupGuid fallback");
|
||||
}
|
||||
|
||||
log("Final: " + Object.keys(itemModifierMap).length + "/" + modifierItems.length + " mapped, " + allModifierGroups.size + " unique modifier groups");
|
||||
|
||||
const modifiers = Array.from(allModifierGroups.values()).map(mg => ({
|
||||
name: mg.name,
|
||||
required: mg.required,
|
||||
minSelections: mg.minSelections,
|
||||
maxSelections: mg.maxSelections,
|
||||
options: mg.options
|
||||
}));
|
||||
|
||||
console.log(JSON.stringify({
|
||||
items: ooData,
|
||||
modifiers: modifiers,
|
||||
itemModifierMap: itemModifierMap,
|
||||
stats: {
|
||||
totalItems: ooData.length,
|
||||
itemsWithModifiers: modifierItems.length,
|
||||
modifiersExtracted: Object.keys(itemModifierMap).length,
|
||||
uniqueModifierGroups: modifiers.length,
|
||||
clickedCount: clickedCount,
|
||||
failedClicks: failedClicks,
|
||||
uniqueGroups: guidToItems.size,
|
||||
inferredCount: inferredCount
|
||||
}
|
||||
}));
|
||||
|
||||
} catch (e) {
|
||||
log("Fatal error: " + e.message);
|
||||
console.log(JSON.stringify({ error: e.message, items: [], modifiers: [], itemModifierMap: {} }));
|
||||
}
|
||||
|
||||
if (browser) await browser.close();
|
||||
})();
|
||||
497
playwright/woo-modifiers.js
Normal file
497
playwright/woo-modifiers.js
Normal file
|
|
@ -0,0 +1,497 @@
|
|||
const { chromium } = require("playwright");
|
||||
|
||||
(async () => {
|
||||
const url = process.argv[2];
|
||||
if (!url) {
|
||||
console.log(JSON.stringify({ error: "URL required" }));
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const log = (msg) => process.stderr.write("[woo-mod] " + msg + "\n");
|
||||
|
||||
let browser;
|
||||
try {
|
||||
browser = await chromium.launch({ headless: true });
|
||||
const context = await browser.newContext({
|
||||
userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
|
||||
});
|
||||
const page = await context.newPage();
|
||||
|
||||
log("Navigating to " + url);
|
||||
await page.goto(url, { waitUntil: "networkidle", timeout: 60000 });
|
||||
await page.waitForTimeout(3000);
|
||||
|
||||
// Close any popups/modals
|
||||
try {
|
||||
const closeButtons = await page.$$('.close, .modal .close, [aria-label="Close"]');
|
||||
for (const btn of closeButtons) {
|
||||
if (await btn.isVisible()) await btn.click().catch(() => {});
|
||||
}
|
||||
} catch (e) {}
|
||||
|
||||
// Extract business info from the page
|
||||
const businessInfo = await page.evaluate(() => {
|
||||
const info = { name: '', address: '', phone: '', hours: '' };
|
||||
// Try common selectors for business name
|
||||
const nameEl = document.querySelector('.site-title, .logo-text, h1.site-title, .custom-logo-link img, title');
|
||||
if (nameEl) {
|
||||
info.name = nameEl.alt || nameEl.textContent || '';
|
||||
info.name = info.name.replace(/\s*[-–|].*$/, '').trim(); // strip taglines
|
||||
}
|
||||
// Try page title as fallback
|
||||
if (!info.name && document.title) {
|
||||
info.name = document.title.replace(/\s*[-–|].*$/, '').trim();
|
||||
}
|
||||
// Look for address/phone in common locations
|
||||
const bodyText = document.body.innerText;
|
||||
// Phone
|
||||
const phoneMatch = bodyText.match(/(?:Call|Phone|Tel)[:\s]*\(?\d{3}\)?[\s.-]?\d{3}[\s.-]?\d{4}/i) ||
|
||||
bodyText.match(/\(?\d{3}\)?[\s.-]\d{3}[\s.-]\d{4}/);
|
||||
if (phoneMatch) info.phone = phoneMatch[0].replace(/^(?:Call|Phone|Tel)[:\s]*/i, '').trim();
|
||||
// Address - look for street patterns
|
||||
const addrMatch = bodyText.match(/\d{1,5}\s+[A-Z][a-zA-Z\s]+(?:St|Ave|Blvd|Dr|Rd|Ln|Way|Ct|Pl|Cir)[.,]?\s*(?:[A-Z][a-zA-Z\s]+,?\s*[A-Z]{2}\s*\d{5})?/);
|
||||
if (addrMatch) info.address = addrMatch[0].trim();
|
||||
return info;
|
||||
});
|
||||
log("Business: " + businessInfo.name + " | " + businessInfo.address + " | " + businessInfo.phone);
|
||||
|
||||
// Strategy 1: Products displayed inline on the page (custom WooCommerce themes)
|
||||
// Collect products from all category tabs/pages
|
||||
let allProducts = [];
|
||||
|
||||
// Check for category tabs (clickable, same-page) vs category links (separate pages)
|
||||
const categoryTabs = await page.evaluate(() => {
|
||||
const tabs = document.querySelectorAll('li.tabs, .category-tab, [data-filter]');
|
||||
return [...tabs].map((t, i) => ({ index: i, name: t.textContent.trim(), active: t.classList.contains('active') }))
|
||||
.filter(t => t.name.length > 0 && t.name.length < 60);
|
||||
});
|
||||
|
||||
const categoryLinks = await page.evaluate(() => {
|
||||
// Only use links if no tabs found — some themes use links that 404
|
||||
const cats = document.querySelectorAll('.product-category a, .product_cat a');
|
||||
return [...cats].map(a => ({ href: a.href, name: a.textContent.trim() })).filter(c => c.name.length > 0 && c.name.length < 60);
|
||||
});
|
||||
|
||||
const useTabs = categoryTabs.length > 0;
|
||||
log("Found " + categoryTabs.length + " category tabs, " + categoryLinks.length + " category links" + (useTabs ? " (using tabs)" : ""));
|
||||
|
||||
const visitedUrls = new Set([page.url()]);
|
||||
|
||||
// Scrape current page first
|
||||
const scrapeInlineProducts = async (catOverride) => {
|
||||
return await page.evaluate((catName) => {
|
||||
const products = [];
|
||||
const productEls = document.querySelectorAll('.product-con-box, li.product, .type-product, .product-item');
|
||||
productEls.forEach(el => {
|
||||
const nameEl = el.querySelector('.woocommerce-loop-product__title, h2, h3, .product-title');
|
||||
if (!nameEl) return;
|
||||
const name = nameEl.textContent.trim();
|
||||
if (!name) return;
|
||||
|
||||
const descEl = el.querySelector('.woocommerce-product-details__short-description, .description, .short-description');
|
||||
const description = descEl ? descEl.textContent.trim().substring(0, 200) : '';
|
||||
|
||||
let price = 0;
|
||||
const priceEl = el.querySelector('.price .woocommerce-Price-amount, .price ins .amount, .price');
|
||||
if (priceEl) {
|
||||
const m = priceEl.textContent.match(/\$?([\d.]+)/);
|
||||
if (m) price = parseFloat(m[1]) || 0;
|
||||
}
|
||||
|
||||
const imgEl = el.querySelector('img');
|
||||
const imageUrl = imgEl ? (imgEl.src || imgEl.dataset.src || '') : '';
|
||||
|
||||
// Try to get category from element classes
|
||||
let category = catName || '';
|
||||
if (!category) {
|
||||
const classes = el.className || '';
|
||||
const catMatch = classes.match(/product_cat-([a-z0-9-]+)/);
|
||||
if (catMatch) {
|
||||
category = catMatch[1].replace(/-/g, ' ').replace(/\b\w/g, c => c.toUpperCase());
|
||||
}
|
||||
}
|
||||
|
||||
// Get post ID for clicking later
|
||||
const idMatch = (el.className || '').match(/post-(\d+)/);
|
||||
const postId = idMatch ? idMatch[1] : '';
|
||||
|
||||
products.push({ name, price, description, imageUrl, category, postId });
|
||||
});
|
||||
return products;
|
||||
}, catOverride);
|
||||
};
|
||||
|
||||
// Scrape products — either by clicking tabs or visiting category pages
|
||||
if (useTabs) {
|
||||
// Click each tab (prevent navigation) and scrape products that appear
|
||||
for (const tab of categoryTabs) {
|
||||
try {
|
||||
log("Clicking tab: " + tab.name);
|
||||
const tabName = tab.name;
|
||||
await page.evaluate((name) => {
|
||||
const btns = document.querySelectorAll('li.tabs a, a.catabtn, .category-tab a');
|
||||
for (const btn of btns) {
|
||||
if (btn.textContent.trim() === name) {
|
||||
btn.addEventListener('click', e => e.preventDefault(), { once: true });
|
||||
btn.click();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}, tabName);
|
||||
await page.waitForTimeout(2500);
|
||||
|
||||
const catProducts = await scrapeInlineProducts(tab.name);
|
||||
for (const p of catProducts) {
|
||||
if (!allProducts.find(ep => ep.name === p.name)) {
|
||||
allProducts.push(p);
|
||||
}
|
||||
}
|
||||
log(" -> " + catProducts.length + " products");
|
||||
} catch (e) {
|
||||
log("Error on tab " + tab.name + ": " + e.message);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Scrape homepage products first
|
||||
let homeProducts = await scrapeInlineProducts(null);
|
||||
log("Found " + homeProducts.length + " products on homepage");
|
||||
allProducts.push(...homeProducts);
|
||||
|
||||
// Visit each category page
|
||||
if (categoryLinks.length > 0) {
|
||||
for (const cat of categoryLinks) {
|
||||
if (visitedUrls.has(cat.href)) continue;
|
||||
visitedUrls.add(cat.href);
|
||||
try {
|
||||
log("Visiting category: " + cat.name);
|
||||
await page.goto(cat.href, { waitUntil: "networkidle", timeout: 30000 });
|
||||
await page.waitForTimeout(2000);
|
||||
|
||||
let pageNum = 1;
|
||||
while (pageNum <= 10) {
|
||||
const catProducts = await scrapeInlineProducts(cat.name);
|
||||
for (const p of catProducts) {
|
||||
if (!allProducts.find(ep => ep.name === p.name)) {
|
||||
allProducts.push(p);
|
||||
}
|
||||
}
|
||||
|
||||
const nextUrl = await page.evaluate(() => {
|
||||
const next = document.querySelector('.woocommerce-pagination .next, a.next.page-numbers');
|
||||
return next ? next.href : null;
|
||||
});
|
||||
if (!nextUrl) break;
|
||||
pageNum++;
|
||||
await page.goto(nextUrl, { waitUntil: "networkidle", timeout: 30000 });
|
||||
await page.waitForTimeout(1500);
|
||||
}
|
||||
} catch (e) {
|
||||
log("Error on category " + cat.name + ": " + e.message);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
log("Total unique products: " + allProducts.length);
|
||||
|
||||
// Strategy 2: If no inline products found, try standard product links
|
||||
if (allProducts.length === 0) {
|
||||
log("No inline products - trying product link approach");
|
||||
await page.goto(url, { waitUntil: "networkidle", timeout: 30000 });
|
||||
await page.waitForTimeout(3000);
|
||||
|
||||
const productLinks = await page.evaluate(() => {
|
||||
const anchors = document.querySelectorAll('a[href*="/product/"], a.woocommerce-LoopProduct-link');
|
||||
return [...new Set([...anchors].map(a => a.href))];
|
||||
});
|
||||
|
||||
log("Found " + productLinks.length + " product links");
|
||||
|
||||
for (let i = 0; i < productLinks.length; i++) {
|
||||
try {
|
||||
await page.goto(productLinks[i], { waitUntil: "domcontentloaded", timeout: 30000 });
|
||||
await page.waitForTimeout(1000);
|
||||
|
||||
const pd = await page.evaluate(() => {
|
||||
const nameEl = document.querySelector('.product_title, h1.entry-title');
|
||||
const name = nameEl ? nameEl.textContent.trim() : '';
|
||||
const priceEl = document.querySelector('.summary .price .woocommerce-Price-amount');
|
||||
let price = 0;
|
||||
if (priceEl) { const m = priceEl.textContent.match(/\$?([\d.]+)/); if (m) price = parseFloat(m[1]) || 0; }
|
||||
const descEl = document.querySelector('.woocommerce-product-details__short-description');
|
||||
const desc = descEl ? descEl.textContent.trim().substring(0, 200) : '';
|
||||
const imgEl = document.querySelector('.woocommerce-product-gallery__image img');
|
||||
const img = imgEl ? (imgEl.src || '') : '';
|
||||
const catEl = document.querySelector('.posted_in a');
|
||||
const cat = catEl ? catEl.textContent.trim() : '';
|
||||
return { name, price, description: desc, imageUrl: img, category: cat, postId: '' };
|
||||
});
|
||||
|
||||
if (pd.name) allProducts.push(pd);
|
||||
} catch (e) {
|
||||
log("Error on product link: " + e.message);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Now extract modifiers by visiting individual product pages
|
||||
const modifierGroupsMap = {};
|
||||
const itemModifierMap = {};
|
||||
|
||||
// For inline products, we need their permalink - try /product/{slug} or ?p={postId}
|
||||
for (let i = 0; i < allProducts.length; i++) {
|
||||
const prod = allProducts[i];
|
||||
log(`[${i + 1}/${allProducts.length}] Extracting modifiers for: ${prod.name}`);
|
||||
|
||||
// Build product URL from name slug or postId
|
||||
let productUrl = '';
|
||||
if (prod.postId) {
|
||||
productUrl = url.replace(/\/$/, '') + '/?p=' + prod.postId;
|
||||
} else {
|
||||
const slug = prod.name.toLowerCase().replace(/[^a-z0-9]+/g, '-').replace(/^-|-$/g, '');
|
||||
productUrl = url.replace(/\/$/, '') + '/product/' + slug + '/';
|
||||
}
|
||||
|
||||
try {
|
||||
await page.goto(productUrl, { waitUntil: "domcontentloaded", timeout: 20000 });
|
||||
await page.waitForTimeout(1500);
|
||||
|
||||
const groups = await page.evaluate(() => {
|
||||
const results = [];
|
||||
const seen = new Set();
|
||||
|
||||
// Helper: clean TMEPO value strings like "Small_0" -> "Small"
|
||||
const cleanVal = (v) => v ? v.replace(/_\d+$/, '').trim() : '';
|
||||
|
||||
// Helper: parse price from data-rules JSON or data-price attr
|
||||
const parseRulesPrice = (input) => {
|
||||
if (!input) return 0;
|
||||
const rules = input.getAttribute('data-rules');
|
||||
if (rules) {
|
||||
try {
|
||||
const arr = JSON.parse(rules);
|
||||
if (Array.isArray(arr) && arr.length > 0) {
|
||||
const v = parseFloat(arr[0]);
|
||||
if (!isNaN(v) && v > 0) return v;
|
||||
}
|
||||
} catch(e) {}
|
||||
}
|
||||
const dp = input.getAttribute('data-price');
|
||||
if (dp) { const v = parseFloat(dp); if (!isNaN(v) && v > 0) return v; }
|
||||
return 0;
|
||||
};
|
||||
|
||||
// Prefer granular containers (individual field cells) over broad row containers
|
||||
// tm-cell with cpf-type are individual modifier groups; tc-cell.tcell are TMEPO cells
|
||||
const granular = document.querySelectorAll('.tm-cell[class*="cpf-type-"], .tc-cell.tcell');
|
||||
const broad = document.querySelectorAll('.tc-row, .tm-row, .cpf-section');
|
||||
// Use granular if they have labels, otherwise fall back to broad
|
||||
const granularWithLabels = [...granular].filter(el =>
|
||||
el.querySelector('.tm-epo-field-label label, .tm-epo-element-label, h3.tm-epo-field-label, label:first-of-type')
|
||||
);
|
||||
const elements = granularWithLabels.length > 0 ? granularWithLabels : (broad.length > 0 ? broad : granular);
|
||||
|
||||
elements.forEach(section => {
|
||||
const labelEl = section.querySelector('.tm-epo-field-label label, .tm-epo-element-label, h3.tm-epo-field-label, label:first-of-type');
|
||||
if (!labelEl) return;
|
||||
|
||||
const groupName = labelEl.textContent.trim();
|
||||
if (!groupName || groupName.length > 80 || seen.has(groupName)) return;
|
||||
if (/special request|sandwich name|your name|instructions|quantity/i.test(groupName)) return;
|
||||
seen.add(groupName);
|
||||
|
||||
const options = [];
|
||||
let groupType = 'select'; // default
|
||||
|
||||
// Radio buttons and checkboxes — get name from input.value, price from data-rules
|
||||
const radios = section.querySelectorAll('input[type="radio"]');
|
||||
const checkboxes = section.querySelectorAll('input[type="checkbox"]');
|
||||
if (radios.length > 0) groupType = 'radio';
|
||||
else if (checkboxes.length > 0) groupType = 'checkbox';
|
||||
|
||||
// Check if this checkbox group has both preselected and non-preselected (split into two groups)
|
||||
const hasPreselected = checkboxes.length > 0 && [...checkboxes].some(c => c.className.includes('custom-preselected'));
|
||||
const hasAdditions = checkboxes.length > 0 && [...checkboxes].some(c => !c.className.includes('custom-preselected'));
|
||||
const shouldSplit = hasPreselected && hasAdditions;
|
||||
|
||||
const additionOptions = []; // only used if splitting
|
||||
|
||||
section.querySelectorAll('.tmcp-field-wrap, .tm-field-wrap, label.tm-epo-field-label-wrap').forEach(wrap => {
|
||||
const input = wrap.querySelector('input[type="radio"], input[type="checkbox"]');
|
||||
if (input) {
|
||||
const optName = cleanVal(input.value);
|
||||
if (!optName || optName.length > 80) return;
|
||||
const optPrice = parseRulesPrice(input);
|
||||
const selected = input.checked || wrap.classList.contains('tc-active');
|
||||
const isPreselected = input.className.includes('custom-preselected');
|
||||
|
||||
// Skip disabled duplicates (size variants)
|
||||
if (input.disabled || input.className.includes('tcdisabled')) return;
|
||||
|
||||
const entry = { name: optName, price: optPrice, selected };
|
||||
|
||||
if (shouldSplit && !isPreselected) {
|
||||
// Deduplicate
|
||||
if (!additionOptions.find(o => o.name === optName)) {
|
||||
additionOptions.push(entry);
|
||||
}
|
||||
} else {
|
||||
// Deduplicate
|
||||
if (!options.find(o => o.name === optName)) {
|
||||
options.push(entry);
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
// Fallback: try label text
|
||||
const lbl = wrap.querySelector('.tm-label, .tm-value, label span:not(.tm-price)');
|
||||
if (lbl) {
|
||||
const optName = lbl.textContent.replace(/[\n\r\t]+/g, ' ').trim();
|
||||
if (!optName || optName.length > 80) return;
|
||||
let optPrice = 0;
|
||||
const priceSpan = wrap.querySelector('.tm-price, .price .amount, [class*="price"]');
|
||||
if (priceSpan) {
|
||||
const m = priceSpan.textContent.match(/\+?\$?([\d.]+)/);
|
||||
if (m) optPrice = parseFloat(m[1]) || 0;
|
||||
}
|
||||
if (!options.find(o => o.name === optName)) {
|
||||
options.push({ name: optName, price: optPrice, selected: false });
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Select dropdowns — get name from option.value, price from data-price
|
||||
if (options.length === 0) {
|
||||
section.querySelectorAll('select option').forEach(opt => {
|
||||
if (!opt.value) return;
|
||||
let optName = cleanVal(opt.value);
|
||||
if (!optName || optName.length > 80) return;
|
||||
let optPrice = 0;
|
||||
const dp = opt.getAttribute('data-price');
|
||||
if (dp) { const v = parseFloat(dp); if (!isNaN(v) && v > 0) optPrice = v; }
|
||||
const text = opt.textContent.trim();
|
||||
if (text && text.length < 80 && text !== optName) {
|
||||
const m = text.match(/\+?\$?([\d.]+)/);
|
||||
if (m) optPrice = optPrice || (parseFloat(m[1]) || 0);
|
||||
optName = text.replace(/\s*\(\+?\$?[\d.]+\)\s*$/, '').trim() || optName;
|
||||
}
|
||||
options.push({ name: optName, price: optPrice, selected: opt.selected });
|
||||
});
|
||||
}
|
||||
|
||||
if (options.length > 0) {
|
||||
const required = section.querySelector('.required, [data-required="1"]') !== null;
|
||||
results.push({ name: groupName, type: groupType, options, required });
|
||||
}
|
||||
|
||||
});
|
||||
|
||||
// Standard WooCommerce variations fallback
|
||||
if (results.length === 0) {
|
||||
const vForm = document.querySelector('.variations_form');
|
||||
if (vForm) {
|
||||
vForm.querySelectorAll('.variations tr').forEach(row => {
|
||||
const lbl = row.querySelector('th label, .label label');
|
||||
const sel = row.querySelector('select');
|
||||
if (lbl && sel) {
|
||||
const opts = [...sel.querySelectorAll('option')].filter(o => o.value).map(o => ({ name: o.textContent.trim(), price: 0 }));
|
||||
if (opts.length > 0) results.push({ name: lbl.textContent.trim(), options: opts, required: true });
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
});
|
||||
|
||||
// If item has no price, try to get it from the product page
|
||||
if (prod.price === 0) {
|
||||
const pagePrice = await page.evaluate(() => {
|
||||
const pe = document.querySelector('.summary .price .woocommerce-Price-amount, .summary .price .amount, .product .price .amount');
|
||||
if (pe) {
|
||||
const m = pe.textContent.match(/\$?([\d.]+)/);
|
||||
if (m) { const v = parseFloat(m[1]); if (v > 0) return v; }
|
||||
}
|
||||
const hid = document.querySelector('input.cpf-product-price');
|
||||
if (hid && hid.value) { const v = parseFloat(hid.value); if (v > 0) return v; }
|
||||
return 0;
|
||||
});
|
||||
if (pagePrice > 0) {
|
||||
prod.price = pagePrice;
|
||||
} else if (groups.length > 0) {
|
||||
// Use lowest price from first modifier group that has prices
|
||||
for (const g of groups) {
|
||||
const prices = g.options.map(o => o.price).filter(p => p > 0);
|
||||
if (prices.length > 0) { prod.price = Math.min(...prices); break; }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Convert any modifier group with absolute prices to relative (subtract base price)
|
||||
if (prod.price > 0 && groups.length > 0) {
|
||||
for (const g of groups) {
|
||||
const prices = g.options.map(o => o.price).filter(p => p > 0);
|
||||
if (prices.length > 0) {
|
||||
const minPrice = Math.min(...prices);
|
||||
// Only convert if options have prices near or above the item base price (absolute pricing)
|
||||
if (minPrice >= prod.price * 0.8) {
|
||||
for (const opt of g.options) {
|
||||
if (opt.price > 0) {
|
||||
opt.price = Math.round((opt.price - prod.price) * 100) / 100;
|
||||
if (opt.price < 0) opt.price = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (groups.length > 0) {
|
||||
const itemModGroups = [];
|
||||
for (const g of groups) {
|
||||
if (!modifierGroupsMap[g.name]) {
|
||||
modifierGroupsMap[g.name] = {
|
||||
name: g.name,
|
||||
type: g.type || 'select',
|
||||
options: g.options,
|
||||
required: g.required,
|
||||
minSelections: g.required ? 1 : 0,
|
||||
maxSelections: g.type === 'radio' || g.type === 'select' ? 1 : 0
|
||||
};
|
||||
}
|
||||
itemModGroups.push(g.name);
|
||||
}
|
||||
itemModifierMap[prod.name] = itemModGroups;
|
||||
log(" -> " + groups.length + " modifier groups" + (prod.price > 0 ? " ($" + prod.price + ")" : ""));
|
||||
}
|
||||
} catch (e) {
|
||||
log(" -> Error: " + e.message);
|
||||
}
|
||||
}
|
||||
|
||||
const modifiers = Object.values(modifierGroupsMap);
|
||||
const stats = {
|
||||
totalProducts: allProducts.length,
|
||||
itemsExtracted: allProducts.length,
|
||||
modifierGroups: modifiers.length,
|
||||
itemsWithModifiers: Object.keys(itemModifierMap).length
|
||||
};
|
||||
|
||||
log("Done: " + stats.itemsExtracted + " items, " + stats.modifierGroups + " modifier groups, " + stats.itemsWithModifiers + " items with modifiers");
|
||||
|
||||
console.log(JSON.stringify({
|
||||
business: businessInfo,
|
||||
items: allProducts,
|
||||
modifiers,
|
||||
itemModifierMap,
|
||||
stats
|
||||
}));
|
||||
|
||||
} catch (err) {
|
||||
log("Fatal: " + err.message);
|
||||
console.log(JSON.stringify({ error: err.message }));
|
||||
process.exit(1);
|
||||
} finally {
|
||||
if (browser) await browser.close();
|
||||
}
|
||||
})();
|
||||
Reference in a new issue