Add playwright scripts to git
Previously only lived on servers at /opt/playwright/. Now tracked in repo. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
f58d567fb4
commit
dd2a508680
7 changed files with 900 additions and 0 deletions
15
playwright/package.json
Normal file
15
playwright/package.json
Normal file
|
|
@ -0,0 +1,15 @@
|
||||||
|
{
|
||||||
|
"name": "playwright",
|
||||||
|
"version": "1.0.0",
|
||||||
|
"main": "index.js",
|
||||||
|
"scripts": {
|
||||||
|
"test": "echo \"Error: no test specified\" && exit 1"
|
||||||
|
},
|
||||||
|
"keywords": [],
|
||||||
|
"author": "",
|
||||||
|
"license": "ISC",
|
||||||
|
"description": "",
|
||||||
|
"dependencies": {
|
||||||
|
"playwright": "^1.58.2"
|
||||||
|
}
|
||||||
|
}
|
||||||
71
playwright/render.js
Normal file
71
playwright/render.js
Normal file
|
|
@ -0,0 +1,71 @@
|
||||||
|
const { chromium } = require("playwright");
|
||||||
|
|
||||||
|
(async () => {
|
||||||
|
const url = process.argv[2];
|
||||||
|
const wait = parseInt(process.argv[3] || 3000);
|
||||||
|
|
||||||
|
if (!url) {
|
||||||
|
console.log(JSON.stringify({ error: "URL required" }));
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
const browser = await chromium.launch({ headless: true });
|
||||||
|
const context = await browser.newContext({
|
||||||
|
userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
||||||
|
});
|
||||||
|
const page = await context.newPage();
|
||||||
|
|
||||||
|
// Track image URLs as they load
|
||||||
|
const images = new Set();
|
||||||
|
page.on("response", response => {
|
||||||
|
const ct = response.headers()["content-type"] || "";
|
||||||
|
if (ct.includes("image/")) {
|
||||||
|
images.add(response.url());
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Use load instead of networkidle - more reliable for sites with persistent connections
|
||||||
|
await page.goto(url, { waitUntil: "load", timeout: 45000 });
|
||||||
|
|
||||||
|
// Wait for initial JS rendering
|
||||||
|
await page.waitForTimeout(wait);
|
||||||
|
|
||||||
|
// Scroll the page to trigger lazy-loaded images (DoorDash, etc.)
|
||||||
|
const scrollHeight = await page.evaluate(() => document.body.scrollHeight);
|
||||||
|
const viewportHeight = await page.evaluate(() => window.innerHeight);
|
||||||
|
const scrollSteps = Math.min(Math.ceil(scrollHeight / viewportHeight), 20);
|
||||||
|
|
||||||
|
for (let i = 0; i < scrollSteps; i++) {
|
||||||
|
await page.evaluate((step) => {
|
||||||
|
window.scrollTo(0, step * window.innerHeight);
|
||||||
|
}, i + 1);
|
||||||
|
await page.waitForTimeout(300);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Scroll back to top and wait for any final images
|
||||||
|
await page.evaluate(() => window.scrollTo(0, 0));
|
||||||
|
await page.waitForTimeout(1000);
|
||||||
|
|
||||||
|
// Extract images from DOM as well
|
||||||
|
const domImages = await page.evaluate(() => {
|
||||||
|
return Array.from(document.querySelectorAll("img"))
|
||||||
|
.map(img => img.src)
|
||||||
|
.filter(src => src && src.startsWith("http"));
|
||||||
|
});
|
||||||
|
|
||||||
|
domImages.forEach(img => images.add(img));
|
||||||
|
|
||||||
|
const html = await page.content();
|
||||||
|
|
||||||
|
console.log(JSON.stringify({
|
||||||
|
html: html,
|
||||||
|
images: Array.from(images),
|
||||||
|
url: url
|
||||||
|
}));
|
||||||
|
} catch (e) {
|
||||||
|
console.log(JSON.stringify({ error: e.message, url: url }));
|
||||||
|
}
|
||||||
|
|
||||||
|
await browser.close();
|
||||||
|
})();
|
||||||
3
playwright/run-toast-modifiers.sh
Normal file
3
playwright/run-toast-modifiers.sh
Normal file
|
|
@ -0,0 +1,3 @@
|
||||||
|
#!/bin/bash
|
||||||
|
export PLAYWRIGHT_BROWSERS_PATH=/opt/playwright/browsers
|
||||||
|
exec /usr/bin/node /opt/playwright/toast-modifiers.js "$@"
|
||||||
3
playwright/run-woo-modifiers.sh
Normal file
3
playwright/run-woo-modifiers.sh
Normal file
|
|
@ -0,0 +1,3 @@
|
||||||
|
#!/bin/bash
|
||||||
|
export PLAYWRIGHT_BROWSERS_PATH=/opt/playwright/browsers
|
||||||
|
exec /usr/bin/node /opt/playwright/woo-modifiers.js "$@"
|
||||||
3
playwright/run.sh
Normal file
3
playwright/run.sh
Normal file
|
|
@ -0,0 +1,3 @@
|
||||||
|
#!/bin/bash
|
||||||
|
export PLAYWRIGHT_BROWSERS_PATH=/opt/playwright/browsers
|
||||||
|
exec /usr/bin/node /opt/playwright/render.js "$@"
|
||||||
308
playwright/toast-modifiers.js
Normal file
308
playwright/toast-modifiers.js
Normal file
|
|
@ -0,0 +1,308 @@
|
||||||
|
const { chromium } = require("playwright");
|
||||||
|
|
||||||
|
(async () => {
|
||||||
|
const url = process.argv[2];
|
||||||
|
if (!url) {
|
||||||
|
console.log(JSON.stringify({ error: "URL required" }));
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
const log = (msg) => process.stderr.write("[toast-mod] " + msg + "\n");
|
||||||
|
|
||||||
|
let browser;
|
||||||
|
try {
|
||||||
|
browser = await chromium.launch({ headless: true });
|
||||||
|
const context = await browser.newContext({
|
||||||
|
userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
|
||||||
|
});
|
||||||
|
const page = await context.newPage();
|
||||||
|
|
||||||
|
// Set up GraphQL response interceptor BEFORE navigation to catch everything
|
||||||
|
let latestResponse = null;
|
||||||
|
let responseCount = 0;
|
||||||
|
|
||||||
|
page.on("response", async (response) => {
|
||||||
|
try {
|
||||||
|
const responseUrl = response.url();
|
||||||
|
if ((responseUrl.includes("graphql") || responseUrl.includes("federated-gateway"))) {
|
||||||
|
const ct = response.headers()["content-type"] || "";
|
||||||
|
if (ct.includes("json")) {
|
||||||
|
const rawBody = await response.json();
|
||||||
|
responseCount++;
|
||||||
|
const responses = Array.isArray(rawBody) ? rawBody : [rawBody];
|
||||||
|
for (const body of responses) {
|
||||||
|
if (!body || !body.data) continue;
|
||||||
|
if (body.data.menuItemDetails) {
|
||||||
|
const details = body.data.menuItemDetails;
|
||||||
|
if (details.modifierGroups && Array.isArray(details.modifierGroups) && details.modifierGroups.length > 0) {
|
||||||
|
latestResponse = details;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (e) {}
|
||||||
|
});
|
||||||
|
|
||||||
|
log("Navigating to " + url);
|
||||||
|
await page.goto(url, { waitUntil: "load", timeout: 60000 });
|
||||||
|
await page.waitForTimeout(5000);
|
||||||
|
|
||||||
|
const title = await page.title();
|
||||||
|
log("Page title: " + title);
|
||||||
|
|
||||||
|
let hasOoState = await page.evaluate(() => !!window.__OO_STATE__);
|
||||||
|
if (!hasOoState) {
|
||||||
|
log("No __OO_STATE__ yet, waiting 10 more seconds...");
|
||||||
|
await page.waitForTimeout(10000);
|
||||||
|
hasOoState = await page.evaluate(() => !!window.__OO_STATE__);
|
||||||
|
if (!hasOoState) {
|
||||||
|
console.log(JSON.stringify({ error: "No __OO_STATE__ found", items: [], modifiers: [], itemModifierMap: {} }));
|
||||||
|
await browser.close();
|
||||||
|
process.exit(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract items
|
||||||
|
const ooData = await page.evaluate(() => {
|
||||||
|
const state = window.__OO_STATE__ || {};
|
||||||
|
const items = [];
|
||||||
|
|
||||||
|
for (const key of Object.keys(state)) {
|
||||||
|
if (!key.startsWith("Menu:")) continue;
|
||||||
|
const menu = state[key];
|
||||||
|
if (!menu.groups || !Array.isArray(menu.groups)) continue;
|
||||||
|
|
||||||
|
for (const group of menu.groups) {
|
||||||
|
const groupName = group.name || "Menu";
|
||||||
|
|
||||||
|
if (group.items && Array.isArray(group.items)) {
|
||||||
|
for (const item of group.items) {
|
||||||
|
if (item.name) {
|
||||||
|
items.push({
|
||||||
|
name: item.name.trim(),
|
||||||
|
guid: item.guid || "",
|
||||||
|
itemGroupGuid: item.itemGroupGuid || "",
|
||||||
|
hasModifiers: !!item.hasModifiers,
|
||||||
|
category: groupName
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const subs = group.subgroups || group.children || group.childGroups || [];
|
||||||
|
for (const sub of subs) {
|
||||||
|
if (sub.items && Array.isArray(sub.items)) {
|
||||||
|
for (const item of sub.items) {
|
||||||
|
if (item.name) {
|
||||||
|
items.push({
|
||||||
|
name: item.name.trim(),
|
||||||
|
guid: item.guid || "",
|
||||||
|
itemGroupGuid: item.itemGroupGuid || "",
|
||||||
|
hasModifiers: !!item.hasModifiers,
|
||||||
|
category: sub.name || groupName
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return items;
|
||||||
|
});
|
||||||
|
|
||||||
|
log("Found " + ooData.length + " items, " + ooData.filter(i => i.hasModifiers).length + " with modifiers");
|
||||||
|
|
||||||
|
const modifierItems = ooData.filter(i => i.hasModifiers);
|
||||||
|
|
||||||
|
if (modifierItems.length === 0) {
|
||||||
|
console.log(JSON.stringify({ items: ooData, modifiers: [], itemModifierMap: {} }));
|
||||||
|
await browser.close();
|
||||||
|
process.exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
// OPTIMIZATION: Deduplicate by itemGroupGuid - only click one representative per group
|
||||||
|
const guidToItems = new Map(); // itemGroupGuid -> [items]
|
||||||
|
const noGuidItems = []; // items without itemGroupGuid
|
||||||
|
|
||||||
|
for (const item of modifierItems) {
|
||||||
|
if (item.itemGroupGuid) {
|
||||||
|
if (!guidToItems.has(item.itemGroupGuid)) {
|
||||||
|
guidToItems.set(item.itemGroupGuid, []);
|
||||||
|
}
|
||||||
|
guidToItems.get(item.itemGroupGuid).push(item);
|
||||||
|
} else {
|
||||||
|
noGuidItems.push(item);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build click list: one item per unique itemGroupGuid + all items without a guid
|
||||||
|
const clickList = [];
|
||||||
|
for (const [guid, items] of guidToItems) {
|
||||||
|
clickList.push(items[0]); // representative
|
||||||
|
}
|
||||||
|
for (const item of noGuidItems) {
|
||||||
|
clickList.push(item);
|
||||||
|
}
|
||||||
|
|
||||||
|
log("Deduplicated: " + modifierItems.length + " modifier items -> " + clickList.length + " unique groups to click (" + guidToItems.size + " guids + " + noGuidItems.length + " ungrouped)");
|
||||||
|
|
||||||
|
// Click items to extract modifier data
|
||||||
|
const allModifierGroups = new Map();
|
||||||
|
const itemModifierMap = {};
|
||||||
|
let clickedCount = 0;
|
||||||
|
let failedClicks = 0;
|
||||||
|
|
||||||
|
function processModGroups(groups, prefix) {
|
||||||
|
if (!Array.isArray(groups)) return [];
|
||||||
|
const modNames = [];
|
||||||
|
for (const mg of groups) {
|
||||||
|
const fullName = prefix ? prefix + " > " + mg.name : mg.name;
|
||||||
|
const guid = mg.guid || fullName;
|
||||||
|
|
||||||
|
if (!allModifierGroups.has(guid)) {
|
||||||
|
const options = [];
|
||||||
|
if (mg.modifiers && Array.isArray(mg.modifiers)) {
|
||||||
|
for (const mod of mg.modifiers) {
|
||||||
|
const price = typeof mod.price === "number" ? mod.price : 0;
|
||||||
|
options.push({ name: mod.name || "", price: price });
|
||||||
|
|
||||||
|
if (mod.modifierGroups && Array.isArray(mod.modifierGroups) && mod.modifierGroups.length > 0) {
|
||||||
|
const nestedNames = processModGroups(mod.modifierGroups, fullName);
|
||||||
|
modNames.push(...nestedNames);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
allModifierGroups.set(guid, {
|
||||||
|
guid: guid,
|
||||||
|
name: mg.name || "",
|
||||||
|
required: (mg.minSelections || 0) > 0,
|
||||||
|
minSelections: mg.minSelections || 0,
|
||||||
|
maxSelections: mg.maxSelections || 0,
|
||||||
|
options: options
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
modNames.push(allModifierGroups.get(guid).name);
|
||||||
|
}
|
||||||
|
return modNames;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Only click the deduplicated clickList
|
||||||
|
for (const item of clickList) {
|
||||||
|
try {
|
||||||
|
latestResponse = null;
|
||||||
|
const countBefore = responseCount;
|
||||||
|
|
||||||
|
// Find and click the item
|
||||||
|
const headerLocator = page.locator(".headerText").filter({
|
||||||
|
hasText: new RegExp("^" + item.name.replace(/[.*+?^${}()|[\]\\]/g, "\\$&") + "$")
|
||||||
|
}).first();
|
||||||
|
|
||||||
|
if (await headerLocator.count() === 0) {
|
||||||
|
failedClicks++;
|
||||||
|
log("Not found on page: " + item.name);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
const clickable = headerLocator.locator("xpath=ancestor::*[contains(@class,'clickable')]").first();
|
||||||
|
if (await clickable.count() > 0) {
|
||||||
|
await clickable.scrollIntoViewIfNeeded();
|
||||||
|
await clickable.click({ timeout: 3000 });
|
||||||
|
} else {
|
||||||
|
await headerLocator.scrollIntoViewIfNeeded();
|
||||||
|
await headerLocator.click({ timeout: 3000 });
|
||||||
|
}
|
||||||
|
|
||||||
|
clickedCount++;
|
||||||
|
|
||||||
|
// Wait for GraphQL response (up to 6s)
|
||||||
|
const startTime = Date.now();
|
||||||
|
while (!latestResponse && Date.now() - startTime < 6000) {
|
||||||
|
await page.waitForTimeout(200);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (latestResponse && latestResponse.modifierGroups) {
|
||||||
|
const names = processModGroups(latestResponse.modifierGroups, "");
|
||||||
|
// Map the clicked item
|
||||||
|
itemModifierMap[item.name] = names;
|
||||||
|
|
||||||
|
// OPTIMIZATION: Immediately map all siblings with same itemGroupGuid
|
||||||
|
if (item.itemGroupGuid && guidToItems.has(item.itemGroupGuid)) {
|
||||||
|
for (const sibling of guidToItems.get(item.itemGroupGuid)) {
|
||||||
|
if (sibling.name !== item.name) {
|
||||||
|
itemModifierMap[sibling.name] = names;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close modal
|
||||||
|
await page.keyboard.press("Escape");
|
||||||
|
await page.waitForTimeout(400);
|
||||||
|
|
||||||
|
} catch (e) {
|
||||||
|
log("Error clicking " + item.name + ": " + e.message);
|
||||||
|
try { await page.keyboard.press("Escape"); } catch (e2) {}
|
||||||
|
await page.waitForTimeout(300);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const directMapped = Object.keys(itemModifierMap).length;
|
||||||
|
log("Clicked: " + clickedCount + "/" + clickList.length + ", Mapped: " + directMapped + "/" + modifierItems.length);
|
||||||
|
|
||||||
|
// Final fallback: any remaining unmapped items, try to infer from category siblings
|
||||||
|
let inferredCount = 0;
|
||||||
|
for (const item of modifierItems) {
|
||||||
|
if (itemModifierMap[item.name]) continue;
|
||||||
|
if (!item.itemGroupGuid) continue;
|
||||||
|
|
||||||
|
for (const mappedName of Object.keys(itemModifierMap)) {
|
||||||
|
const mappedItem = modifierItems.find(i => i.name === mappedName);
|
||||||
|
if (mappedItem && mappedItem.itemGroupGuid === item.itemGroupGuid) {
|
||||||
|
itemModifierMap[item.name] = itemModifierMap[mappedName];
|
||||||
|
inferredCount++;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (inferredCount > 0) {
|
||||||
|
log("Inferred modifiers for " + inferredCount + " additional items via itemGroupGuid fallback");
|
||||||
|
}
|
||||||
|
|
||||||
|
log("Final: " + Object.keys(itemModifierMap).length + "/" + modifierItems.length + " mapped, " + allModifierGroups.size + " unique modifier groups");
|
||||||
|
|
||||||
|
const modifiers = Array.from(allModifierGroups.values()).map(mg => ({
|
||||||
|
name: mg.name,
|
||||||
|
required: mg.required,
|
||||||
|
minSelections: mg.minSelections,
|
||||||
|
maxSelections: mg.maxSelections,
|
||||||
|
options: mg.options
|
||||||
|
}));
|
||||||
|
|
||||||
|
console.log(JSON.stringify({
|
||||||
|
items: ooData,
|
||||||
|
modifiers: modifiers,
|
||||||
|
itemModifierMap: itemModifierMap,
|
||||||
|
stats: {
|
||||||
|
totalItems: ooData.length,
|
||||||
|
itemsWithModifiers: modifierItems.length,
|
||||||
|
modifiersExtracted: Object.keys(itemModifierMap).length,
|
||||||
|
uniqueModifierGroups: modifiers.length,
|
||||||
|
clickedCount: clickedCount,
|
||||||
|
failedClicks: failedClicks,
|
||||||
|
uniqueGroups: guidToItems.size,
|
||||||
|
inferredCount: inferredCount
|
||||||
|
}
|
||||||
|
}));
|
||||||
|
|
||||||
|
} catch (e) {
|
||||||
|
log("Fatal error: " + e.message);
|
||||||
|
console.log(JSON.stringify({ error: e.message, items: [], modifiers: [], itemModifierMap: {} }));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (browser) await browser.close();
|
||||||
|
})();
|
||||||
497
playwright/woo-modifiers.js
Normal file
497
playwright/woo-modifiers.js
Normal file
|
|
@ -0,0 +1,497 @@
|
||||||
|
const { chromium } = require("playwright");
|
||||||
|
|
||||||
|
(async () => {
|
||||||
|
const url = process.argv[2];
|
||||||
|
if (!url) {
|
||||||
|
console.log(JSON.stringify({ error: "URL required" }));
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
const log = (msg) => process.stderr.write("[woo-mod] " + msg + "\n");
|
||||||
|
|
||||||
|
let browser;
|
||||||
|
try {
|
||||||
|
browser = await chromium.launch({ headless: true });
|
||||||
|
const context = await browser.newContext({
|
||||||
|
userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
|
||||||
|
});
|
||||||
|
const page = await context.newPage();
|
||||||
|
|
||||||
|
log("Navigating to " + url);
|
||||||
|
await page.goto(url, { waitUntil: "networkidle", timeout: 60000 });
|
||||||
|
await page.waitForTimeout(3000);
|
||||||
|
|
||||||
|
// Close any popups/modals
|
||||||
|
try {
|
||||||
|
const closeButtons = await page.$$('.close, .modal .close, [aria-label="Close"]');
|
||||||
|
for (const btn of closeButtons) {
|
||||||
|
if (await btn.isVisible()) await btn.click().catch(() => {});
|
||||||
|
}
|
||||||
|
} catch (e) {}
|
||||||
|
|
||||||
|
// Extract business info from the page
|
||||||
|
const businessInfo = await page.evaluate(() => {
|
||||||
|
const info = { name: '', address: '', phone: '', hours: '' };
|
||||||
|
// Try common selectors for business name
|
||||||
|
const nameEl = document.querySelector('.site-title, .logo-text, h1.site-title, .custom-logo-link img, title');
|
||||||
|
if (nameEl) {
|
||||||
|
info.name = nameEl.alt || nameEl.textContent || '';
|
||||||
|
info.name = info.name.replace(/\s*[-–|].*$/, '').trim(); // strip taglines
|
||||||
|
}
|
||||||
|
// Try page title as fallback
|
||||||
|
if (!info.name && document.title) {
|
||||||
|
info.name = document.title.replace(/\s*[-–|].*$/, '').trim();
|
||||||
|
}
|
||||||
|
// Look for address/phone in common locations
|
||||||
|
const bodyText = document.body.innerText;
|
||||||
|
// Phone
|
||||||
|
const phoneMatch = bodyText.match(/(?:Call|Phone|Tel)[:\s]*\(?\d{3}\)?[\s.-]?\d{3}[\s.-]?\d{4}/i) ||
|
||||||
|
bodyText.match(/\(?\d{3}\)?[\s.-]\d{3}[\s.-]\d{4}/);
|
||||||
|
if (phoneMatch) info.phone = phoneMatch[0].replace(/^(?:Call|Phone|Tel)[:\s]*/i, '').trim();
|
||||||
|
// Address - look for street patterns
|
||||||
|
const addrMatch = bodyText.match(/\d{1,5}\s+[A-Z][a-zA-Z\s]+(?:St|Ave|Blvd|Dr|Rd|Ln|Way|Ct|Pl|Cir)[.,]?\s*(?:[A-Z][a-zA-Z\s]+,?\s*[A-Z]{2}\s*\d{5})?/);
|
||||||
|
if (addrMatch) info.address = addrMatch[0].trim();
|
||||||
|
return info;
|
||||||
|
});
|
||||||
|
log("Business: " + businessInfo.name + " | " + businessInfo.address + " | " + businessInfo.phone);
|
||||||
|
|
||||||
|
// Strategy 1: Products displayed inline on the page (custom WooCommerce themes)
|
||||||
|
// Collect products from all category tabs/pages
|
||||||
|
let allProducts = [];
|
||||||
|
|
||||||
|
// Check for category tabs (clickable, same-page) vs category links (separate pages)
|
||||||
|
const categoryTabs = await page.evaluate(() => {
|
||||||
|
const tabs = document.querySelectorAll('li.tabs, .category-tab, [data-filter]');
|
||||||
|
return [...tabs].map((t, i) => ({ index: i, name: t.textContent.trim(), active: t.classList.contains('active') }))
|
||||||
|
.filter(t => t.name.length > 0 && t.name.length < 60);
|
||||||
|
});
|
||||||
|
|
||||||
|
const categoryLinks = await page.evaluate(() => {
|
||||||
|
// Only use links if no tabs found — some themes use links that 404
|
||||||
|
const cats = document.querySelectorAll('.product-category a, .product_cat a');
|
||||||
|
return [...cats].map(a => ({ href: a.href, name: a.textContent.trim() })).filter(c => c.name.length > 0 && c.name.length < 60);
|
||||||
|
});
|
||||||
|
|
||||||
|
const useTabs = categoryTabs.length > 0;
|
||||||
|
log("Found " + categoryTabs.length + " category tabs, " + categoryLinks.length + " category links" + (useTabs ? " (using tabs)" : ""));
|
||||||
|
|
||||||
|
const visitedUrls = new Set([page.url()]);
|
||||||
|
|
||||||
|
// Scrape current page first
|
||||||
|
const scrapeInlineProducts = async (catOverride) => {
|
||||||
|
return await page.evaluate((catName) => {
|
||||||
|
const products = [];
|
||||||
|
const productEls = document.querySelectorAll('.product-con-box, li.product, .type-product, .product-item');
|
||||||
|
productEls.forEach(el => {
|
||||||
|
const nameEl = el.querySelector('.woocommerce-loop-product__title, h2, h3, .product-title');
|
||||||
|
if (!nameEl) return;
|
||||||
|
const name = nameEl.textContent.trim();
|
||||||
|
if (!name) return;
|
||||||
|
|
||||||
|
const descEl = el.querySelector('.woocommerce-product-details__short-description, .description, .short-description');
|
||||||
|
const description = descEl ? descEl.textContent.trim().substring(0, 200) : '';
|
||||||
|
|
||||||
|
let price = 0;
|
||||||
|
const priceEl = el.querySelector('.price .woocommerce-Price-amount, .price ins .amount, .price');
|
||||||
|
if (priceEl) {
|
||||||
|
const m = priceEl.textContent.match(/\$?([\d.]+)/);
|
||||||
|
if (m) price = parseFloat(m[1]) || 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
const imgEl = el.querySelector('img');
|
||||||
|
const imageUrl = imgEl ? (imgEl.src || imgEl.dataset.src || '') : '';
|
||||||
|
|
||||||
|
// Try to get category from element classes
|
||||||
|
let category = catName || '';
|
||||||
|
if (!category) {
|
||||||
|
const classes = el.className || '';
|
||||||
|
const catMatch = classes.match(/product_cat-([a-z0-9-]+)/);
|
||||||
|
if (catMatch) {
|
||||||
|
category = catMatch[1].replace(/-/g, ' ').replace(/\b\w/g, c => c.toUpperCase());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get post ID for clicking later
|
||||||
|
const idMatch = (el.className || '').match(/post-(\d+)/);
|
||||||
|
const postId = idMatch ? idMatch[1] : '';
|
||||||
|
|
||||||
|
products.push({ name, price, description, imageUrl, category, postId });
|
||||||
|
});
|
||||||
|
return products;
|
||||||
|
}, catOverride);
|
||||||
|
};
|
||||||
|
|
||||||
|
// Scrape products — either by clicking tabs or visiting category pages
|
||||||
|
if (useTabs) {
|
||||||
|
// Click each tab (prevent navigation) and scrape products that appear
|
||||||
|
for (const tab of categoryTabs) {
|
||||||
|
try {
|
||||||
|
log("Clicking tab: " + tab.name);
|
||||||
|
const tabName = tab.name;
|
||||||
|
await page.evaluate((name) => {
|
||||||
|
const btns = document.querySelectorAll('li.tabs a, a.catabtn, .category-tab a');
|
||||||
|
for (const btn of btns) {
|
||||||
|
if (btn.textContent.trim() === name) {
|
||||||
|
btn.addEventListener('click', e => e.preventDefault(), { once: true });
|
||||||
|
btn.click();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}, tabName);
|
||||||
|
await page.waitForTimeout(2500);
|
||||||
|
|
||||||
|
const catProducts = await scrapeInlineProducts(tab.name);
|
||||||
|
for (const p of catProducts) {
|
||||||
|
if (!allProducts.find(ep => ep.name === p.name)) {
|
||||||
|
allProducts.push(p);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
log(" -> " + catProducts.length + " products");
|
||||||
|
} catch (e) {
|
||||||
|
log("Error on tab " + tab.name + ": " + e.message);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Scrape homepage products first
|
||||||
|
let homeProducts = await scrapeInlineProducts(null);
|
||||||
|
log("Found " + homeProducts.length + " products on homepage");
|
||||||
|
allProducts.push(...homeProducts);
|
||||||
|
|
||||||
|
// Visit each category page
|
||||||
|
if (categoryLinks.length > 0) {
|
||||||
|
for (const cat of categoryLinks) {
|
||||||
|
if (visitedUrls.has(cat.href)) continue;
|
||||||
|
visitedUrls.add(cat.href);
|
||||||
|
try {
|
||||||
|
log("Visiting category: " + cat.name);
|
||||||
|
await page.goto(cat.href, { waitUntil: "networkidle", timeout: 30000 });
|
||||||
|
await page.waitForTimeout(2000);
|
||||||
|
|
||||||
|
let pageNum = 1;
|
||||||
|
while (pageNum <= 10) {
|
||||||
|
const catProducts = await scrapeInlineProducts(cat.name);
|
||||||
|
for (const p of catProducts) {
|
||||||
|
if (!allProducts.find(ep => ep.name === p.name)) {
|
||||||
|
allProducts.push(p);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const nextUrl = await page.evaluate(() => {
|
||||||
|
const next = document.querySelector('.woocommerce-pagination .next, a.next.page-numbers');
|
||||||
|
return next ? next.href : null;
|
||||||
|
});
|
||||||
|
if (!nextUrl) break;
|
||||||
|
pageNum++;
|
||||||
|
await page.goto(nextUrl, { waitUntil: "networkidle", timeout: 30000 });
|
||||||
|
await page.waitForTimeout(1500);
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
log("Error on category " + cat.name + ": " + e.message);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
log("Total unique products: " + allProducts.length);
|
||||||
|
|
||||||
|
// Strategy 2: If no inline products found, try standard product links
|
||||||
|
if (allProducts.length === 0) {
|
||||||
|
log("No inline products - trying product link approach");
|
||||||
|
await page.goto(url, { waitUntil: "networkidle", timeout: 30000 });
|
||||||
|
await page.waitForTimeout(3000);
|
||||||
|
|
||||||
|
const productLinks = await page.evaluate(() => {
|
||||||
|
const anchors = document.querySelectorAll('a[href*="/product/"], a.woocommerce-LoopProduct-link');
|
||||||
|
return [...new Set([...anchors].map(a => a.href))];
|
||||||
|
});
|
||||||
|
|
||||||
|
log("Found " + productLinks.length + " product links");
|
||||||
|
|
||||||
|
for (let i = 0; i < productLinks.length; i++) {
|
||||||
|
try {
|
||||||
|
await page.goto(productLinks[i], { waitUntil: "domcontentloaded", timeout: 30000 });
|
||||||
|
await page.waitForTimeout(1000);
|
||||||
|
|
||||||
|
const pd = await page.evaluate(() => {
|
||||||
|
const nameEl = document.querySelector('.product_title, h1.entry-title');
|
||||||
|
const name = nameEl ? nameEl.textContent.trim() : '';
|
||||||
|
const priceEl = document.querySelector('.summary .price .woocommerce-Price-amount');
|
||||||
|
let price = 0;
|
||||||
|
if (priceEl) { const m = priceEl.textContent.match(/\$?([\d.]+)/); if (m) price = parseFloat(m[1]) || 0; }
|
||||||
|
const descEl = document.querySelector('.woocommerce-product-details__short-description');
|
||||||
|
const desc = descEl ? descEl.textContent.trim().substring(0, 200) : '';
|
||||||
|
const imgEl = document.querySelector('.woocommerce-product-gallery__image img');
|
||||||
|
const img = imgEl ? (imgEl.src || '') : '';
|
||||||
|
const catEl = document.querySelector('.posted_in a');
|
||||||
|
const cat = catEl ? catEl.textContent.trim() : '';
|
||||||
|
return { name, price, description: desc, imageUrl: img, category: cat, postId: '' };
|
||||||
|
});
|
||||||
|
|
||||||
|
if (pd.name) allProducts.push(pd);
|
||||||
|
} catch (e) {
|
||||||
|
log("Error on product link: " + e.message);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now extract modifiers by visiting individual product pages
|
||||||
|
const modifierGroupsMap = {};
|
||||||
|
const itemModifierMap = {};
|
||||||
|
|
||||||
|
// For inline products, we need their permalink - try /product/{slug} or ?p={postId}
|
||||||
|
for (let i = 0; i < allProducts.length; i++) {
|
||||||
|
const prod = allProducts[i];
|
||||||
|
log(`[${i + 1}/${allProducts.length}] Extracting modifiers for: ${prod.name}`);
|
||||||
|
|
||||||
|
// Build product URL from name slug or postId
|
||||||
|
let productUrl = '';
|
||||||
|
if (prod.postId) {
|
||||||
|
productUrl = url.replace(/\/$/, '') + '/?p=' + prod.postId;
|
||||||
|
} else {
|
||||||
|
const slug = prod.name.toLowerCase().replace(/[^a-z0-9]+/g, '-').replace(/^-|-$/g, '');
|
||||||
|
productUrl = url.replace(/\/$/, '') + '/product/' + slug + '/';
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
await page.goto(productUrl, { waitUntil: "domcontentloaded", timeout: 20000 });
|
||||||
|
await page.waitForTimeout(1500);
|
||||||
|
|
||||||
|
const groups = await page.evaluate(() => {
|
||||||
|
const results = [];
|
||||||
|
const seen = new Set();
|
||||||
|
|
||||||
|
// Helper: clean TMEPO value strings like "Small_0" -> "Small"
|
||||||
|
const cleanVal = (v) => v ? v.replace(/_\d+$/, '').trim() : '';
|
||||||
|
|
||||||
|
// Helper: parse price from data-rules JSON or data-price attr
|
||||||
|
const parseRulesPrice = (input) => {
|
||||||
|
if (!input) return 0;
|
||||||
|
const rules = input.getAttribute('data-rules');
|
||||||
|
if (rules) {
|
||||||
|
try {
|
||||||
|
const arr = JSON.parse(rules);
|
||||||
|
if (Array.isArray(arr) && arr.length > 0) {
|
||||||
|
const v = parseFloat(arr[0]);
|
||||||
|
if (!isNaN(v) && v > 0) return v;
|
||||||
|
}
|
||||||
|
} catch(e) {}
|
||||||
|
}
|
||||||
|
const dp = input.getAttribute('data-price');
|
||||||
|
if (dp) { const v = parseFloat(dp); if (!isNaN(v) && v > 0) return v; }
|
||||||
|
return 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Prefer granular containers (individual field cells) over broad row containers
|
||||||
|
// tm-cell with cpf-type are individual modifier groups; tc-cell.tcell are TMEPO cells
|
||||||
|
const granular = document.querySelectorAll('.tm-cell[class*="cpf-type-"], .tc-cell.tcell');
|
||||||
|
const broad = document.querySelectorAll('.tc-row, .tm-row, .cpf-section');
|
||||||
|
// Use granular if they have labels, otherwise fall back to broad
|
||||||
|
const granularWithLabels = [...granular].filter(el =>
|
||||||
|
el.querySelector('.tm-epo-field-label label, .tm-epo-element-label, h3.tm-epo-field-label, label:first-of-type')
|
||||||
|
);
|
||||||
|
const elements = granularWithLabels.length > 0 ? granularWithLabels : (broad.length > 0 ? broad : granular);
|
||||||
|
|
||||||
|
elements.forEach(section => {
|
||||||
|
const labelEl = section.querySelector('.tm-epo-field-label label, .tm-epo-element-label, h3.tm-epo-field-label, label:first-of-type');
|
||||||
|
if (!labelEl) return;
|
||||||
|
|
||||||
|
const groupName = labelEl.textContent.trim();
|
||||||
|
if (!groupName || groupName.length > 80 || seen.has(groupName)) return;
|
||||||
|
if (/special request|sandwich name|your name|instructions|quantity/i.test(groupName)) return;
|
||||||
|
seen.add(groupName);
|
||||||
|
|
||||||
|
const options = [];
|
||||||
|
let groupType = 'select'; // default
|
||||||
|
|
||||||
|
// Radio buttons and checkboxes — get name from input.value, price from data-rules
|
||||||
|
const radios = section.querySelectorAll('input[type="radio"]');
|
||||||
|
const checkboxes = section.querySelectorAll('input[type="checkbox"]');
|
||||||
|
if (radios.length > 0) groupType = 'radio';
|
||||||
|
else if (checkboxes.length > 0) groupType = 'checkbox';
|
||||||
|
|
||||||
|
// Check if this checkbox group has both preselected and non-preselected (split into two groups)
|
||||||
|
const hasPreselected = checkboxes.length > 0 && [...checkboxes].some(c => c.className.includes('custom-preselected'));
|
||||||
|
const hasAdditions = checkboxes.length > 0 && [...checkboxes].some(c => !c.className.includes('custom-preselected'));
|
||||||
|
const shouldSplit = hasPreselected && hasAdditions;
|
||||||
|
|
||||||
|
const additionOptions = []; // only used if splitting
|
||||||
|
|
||||||
|
section.querySelectorAll('.tmcp-field-wrap, .tm-field-wrap, label.tm-epo-field-label-wrap').forEach(wrap => {
|
||||||
|
const input = wrap.querySelector('input[type="radio"], input[type="checkbox"]');
|
||||||
|
if (input) {
|
||||||
|
const optName = cleanVal(input.value);
|
||||||
|
if (!optName || optName.length > 80) return;
|
||||||
|
const optPrice = parseRulesPrice(input);
|
||||||
|
const selected = input.checked || wrap.classList.contains('tc-active');
|
||||||
|
const isPreselected = input.className.includes('custom-preselected');
|
||||||
|
|
||||||
|
// Skip disabled duplicates (size variants)
|
||||||
|
if (input.disabled || input.className.includes('tcdisabled')) return;
|
||||||
|
|
||||||
|
const entry = { name: optName, price: optPrice, selected };
|
||||||
|
|
||||||
|
if (shouldSplit && !isPreselected) {
|
||||||
|
// Deduplicate
|
||||||
|
if (!additionOptions.find(o => o.name === optName)) {
|
||||||
|
additionOptions.push(entry);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Deduplicate
|
||||||
|
if (!options.find(o => o.name === optName)) {
|
||||||
|
options.push(entry);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// Fallback: try label text
|
||||||
|
const lbl = wrap.querySelector('.tm-label, .tm-value, label span:not(.tm-price)');
|
||||||
|
if (lbl) {
|
||||||
|
const optName = lbl.textContent.replace(/[\n\r\t]+/g, ' ').trim();
|
||||||
|
if (!optName || optName.length > 80) return;
|
||||||
|
let optPrice = 0;
|
||||||
|
const priceSpan = wrap.querySelector('.tm-price, .price .amount, [class*="price"]');
|
||||||
|
if (priceSpan) {
|
||||||
|
const m = priceSpan.textContent.match(/\+?\$?([\d.]+)/);
|
||||||
|
if (m) optPrice = parseFloat(m[1]) || 0;
|
||||||
|
}
|
||||||
|
if (!options.find(o => o.name === optName)) {
|
||||||
|
options.push({ name: optName, price: optPrice, selected: false });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Select dropdowns — get name from option.value, price from data-price
|
||||||
|
if (options.length === 0) {
|
||||||
|
section.querySelectorAll('select option').forEach(opt => {
|
||||||
|
if (!opt.value) return;
|
||||||
|
let optName = cleanVal(opt.value);
|
||||||
|
if (!optName || optName.length > 80) return;
|
||||||
|
let optPrice = 0;
|
||||||
|
const dp = opt.getAttribute('data-price');
|
||||||
|
if (dp) { const v = parseFloat(dp); if (!isNaN(v) && v > 0) optPrice = v; }
|
||||||
|
const text = opt.textContent.trim();
|
||||||
|
if (text && text.length < 80 && text !== optName) {
|
||||||
|
const m = text.match(/\+?\$?([\d.]+)/);
|
||||||
|
if (m) optPrice = optPrice || (parseFloat(m[1]) || 0);
|
||||||
|
optName = text.replace(/\s*\(\+?\$?[\d.]+\)\s*$/, '').trim() || optName;
|
||||||
|
}
|
||||||
|
options.push({ name: optName, price: optPrice, selected: opt.selected });
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (options.length > 0) {
|
||||||
|
const required = section.querySelector('.required, [data-required="1"]') !== null;
|
||||||
|
results.push({ name: groupName, type: groupType, options, required });
|
||||||
|
}
|
||||||
|
|
||||||
|
});
|
||||||
|
|
||||||
|
// Standard WooCommerce variations fallback
|
||||||
|
if (results.length === 0) {
|
||||||
|
const vForm = document.querySelector('.variations_form');
|
||||||
|
if (vForm) {
|
||||||
|
vForm.querySelectorAll('.variations tr').forEach(row => {
|
||||||
|
const lbl = row.querySelector('th label, .label label');
|
||||||
|
const sel = row.querySelector('select');
|
||||||
|
if (lbl && sel) {
|
||||||
|
const opts = [...sel.querySelectorAll('option')].filter(o => o.value).map(o => ({ name: o.textContent.trim(), price: 0 }));
|
||||||
|
if (opts.length > 0) results.push({ name: lbl.textContent.trim(), options: opts, required: true });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return results;
|
||||||
|
});
|
||||||
|
|
||||||
|
// If item has no price, try to get it from the product page
|
||||||
|
if (prod.price === 0) {
|
||||||
|
const pagePrice = await page.evaluate(() => {
|
||||||
|
const pe = document.querySelector('.summary .price .woocommerce-Price-amount, .summary .price .amount, .product .price .amount');
|
||||||
|
if (pe) {
|
||||||
|
const m = pe.textContent.match(/\$?([\d.]+)/);
|
||||||
|
if (m) { const v = parseFloat(m[1]); if (v > 0) return v; }
|
||||||
|
}
|
||||||
|
const hid = document.querySelector('input.cpf-product-price');
|
||||||
|
if (hid && hid.value) { const v = parseFloat(hid.value); if (v > 0) return v; }
|
||||||
|
return 0;
|
||||||
|
});
|
||||||
|
if (pagePrice > 0) {
|
||||||
|
prod.price = pagePrice;
|
||||||
|
} else if (groups.length > 0) {
|
||||||
|
// Use lowest price from first modifier group that has prices
|
||||||
|
for (const g of groups) {
|
||||||
|
const prices = g.options.map(o => o.price).filter(p => p > 0);
|
||||||
|
if (prices.length > 0) { prod.price = Math.min(...prices); break; }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert any modifier group with absolute prices to relative (subtract base price)
|
||||||
|
if (prod.price > 0 && groups.length > 0) {
|
||||||
|
for (const g of groups) {
|
||||||
|
const prices = g.options.map(o => o.price).filter(p => p > 0);
|
||||||
|
if (prices.length > 0) {
|
||||||
|
const minPrice = Math.min(...prices);
|
||||||
|
// Only convert if options have prices near or above the item base price (absolute pricing)
|
||||||
|
if (minPrice >= prod.price * 0.8) {
|
||||||
|
for (const opt of g.options) {
|
||||||
|
if (opt.price > 0) {
|
||||||
|
opt.price = Math.round((opt.price - prod.price) * 100) / 100;
|
||||||
|
if (opt.price < 0) opt.price = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (groups.length > 0) {
|
||||||
|
const itemModGroups = [];
|
||||||
|
for (const g of groups) {
|
||||||
|
if (!modifierGroupsMap[g.name]) {
|
||||||
|
modifierGroupsMap[g.name] = {
|
||||||
|
name: g.name,
|
||||||
|
type: g.type || 'select',
|
||||||
|
options: g.options,
|
||||||
|
required: g.required,
|
||||||
|
minSelections: g.required ? 1 : 0,
|
||||||
|
maxSelections: g.type === 'radio' || g.type === 'select' ? 1 : 0
|
||||||
|
};
|
||||||
|
}
|
||||||
|
itemModGroups.push(g.name);
|
||||||
|
}
|
||||||
|
itemModifierMap[prod.name] = itemModGroups;
|
||||||
|
log(" -> " + groups.length + " modifier groups" + (prod.price > 0 ? " ($" + prod.price + ")" : ""));
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
log(" -> Error: " + e.message);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const modifiers = Object.values(modifierGroupsMap);
|
||||||
|
const stats = {
|
||||||
|
totalProducts: allProducts.length,
|
||||||
|
itemsExtracted: allProducts.length,
|
||||||
|
modifierGroups: modifiers.length,
|
||||||
|
itemsWithModifiers: Object.keys(itemModifierMap).length
|
||||||
|
};
|
||||||
|
|
||||||
|
log("Done: " + stats.itemsExtracted + " items, " + stats.modifierGroups + " modifier groups, " + stats.itemsWithModifiers + " items with modifiers");
|
||||||
|
|
||||||
|
console.log(JSON.stringify({
|
||||||
|
business: businessInfo,
|
||||||
|
items: allProducts,
|
||||||
|
modifiers,
|
||||||
|
itemModifierMap,
|
||||||
|
stats
|
||||||
|
}));
|
||||||
|
|
||||||
|
} catch (err) {
|
||||||
|
log("Fatal: " + err.message);
|
||||||
|
console.log(JSON.stringify({ error: err.message }));
|
||||||
|
process.exit(1);
|
||||||
|
} finally {
|
||||||
|
if (browser) await browser.close();
|
||||||
|
}
|
||||||
|
})();
|
||||||
Reference in a new issue