ZIP upload: add file sanitization, direct file read, and temp cleanup

- uploadSavedPage.cfm: sanitize extracted files (whitelist safe extensions,
  delete symlinks) to protect against malicious content from infected sites
- analyzeMenuUrl.cfm: detect local temp URLs and read directly from disk,
  bypassing Playwright for faster processing of saved pages
- saveWizard.cfm: delete temp folder immediately after wizard completes
  instead of waiting for 1-hour auto-cleanup
- setup-wizard.html: track temp folder ID and pass to saveWizard for cleanup

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
John Mizerek 2026-02-13 08:02:41 -08:00
parent 336aef8685
commit 5cde8ce4fa
4 changed files with 119 additions and 25 deletions

View file

@ -55,30 +55,51 @@
<cfset targetUrl = "https://" & targetUrl> <cfset targetUrl = "https://" & targetUrl>
</cfif> </cfif>
<cfset arrayAppend(response.steps, "Fetching URL with Playwright: " & targetUrl)> <!--- Check if this is a local temp file (ZIP upload) - read directly, skip Playwright --->
<cfif findNoCase("/temp/menu-import/", targetUrl)>
<cfset localFilePath = expandPath(reReplaceNoCase(targetUrl, "https?://[^/]+(/temp/menu-import/.*)", "\1"))>
<cfset arrayAppend(response.steps, "Local temp file detected: " & localFilePath)>
<!--- Use Playwright for JS-rendered content ---> <cfif NOT fileExists(localFilePath)>
<cfset playwrightOutput = ""> <cfthrow message="Local file not found: #localFilePath#">
<cfexecute name="/opt/playwright/run.sh" arguments="'#targetUrl#' 4000" timeout="90" variable="playwrightOutput" /> </cfif>
<cfif NOT len(trim(playwrightOutput))> <cfset pageHtml = fileRead(localFilePath, "utf-8")>
<cfthrow message="Playwright returned empty response"> <cfset playwrightImages = arrayNew(1)>
</cfif> <cfset arrayAppend(response.steps, "Read " & len(pageHtml) & " bytes from local file")>
<cfset playwrightResult = deserializeJSON(playwrightOutput)> <!--- Extract base URL for resolving relative links --->
<cfif structKeyExists(playwrightResult, "error")> <cfset baseUrl = reReplace(targetUrl, "(https?://[^/]+).*", "\1")>
<cfthrow message="Playwright error: #playwrightResult.error#"> <cfset basePath = reReplace(targetUrl, "(https?://[^/]+/[^?]*/?).*", "\1")>
</cfif> <cfif NOT reFindNoCase("/$", basePath)>
<cfset basePath = reReplace(basePath, "/[^/]*$", "/")>
</cfif>
<cfelse>
<!--- Remote URL - use Playwright for JS-rendered content --->
<cfset arrayAppend(response.steps, "Fetching URL with Playwright: " & targetUrl)>
<cfset pageHtml = playwrightResult.html> <cfset playwrightOutput = "">
<cfset playwrightImages = structKeyExists(playwrightResult, "images") ? playwrightResult.images : arrayNew(1)> <cfexecute name="/opt/playwright/run.sh" arguments="'#targetUrl#' 4000" timeout="90" variable="playwrightOutput" />
<cfset arrayAppend(response.steps, "Fetched " & len(pageHtml) & " bytes via Playwright, " & arrayLen(playwrightImages) & " images captured")>
<!--- Extract base URL for resolving relative links ---> <cfif NOT len(trim(playwrightOutput))>
<cfset baseUrl = reReplace(targetUrl, "(https?://[^/]+).*", "\1")> <cfthrow message="Playwright returned empty response">
<cfset basePath = reReplace(targetUrl, "(https?://[^/]+/[^?]*/?).*", "\1")> </cfif>
<cfif NOT reFindNoCase("/$", basePath)>
<cfset basePath = reReplace(basePath, "/[^/]*$", "/")> <cfset playwrightResult = deserializeJSON(playwrightOutput)>
<cfif structKeyExists(playwrightResult, "error")>
<cfthrow message="Playwright error: #playwrightResult.error#">
</cfif>
<cfset pageHtml = playwrightResult.html>
<cfset playwrightImages = structKeyExists(playwrightResult, "images") ? playwrightResult.images : arrayNew(1)>
<cfset arrayAppend(response.steps, "Fetched " & len(pageHtml) & " bytes via Playwright, " & arrayLen(playwrightImages) & " images captured")>
<!--- Extract base URL for resolving relative links --->
<cfset baseUrl = reReplace(targetUrl, "(https?://[^/]+).*", "\1")>
<cfset basePath = reReplace(targetUrl, "(https?://[^/]+/[^?]*/?).*", "\1")>
<cfif NOT reFindNoCase("/$", basePath)>
<cfset basePath = reReplace(basePath, "/[^/]*$", "/")>
</cfif>
</cfif> </cfif>
<cfelse> <cfelse>
<cfthrow message="Either 'url' or 'html' content is required"> <cfthrow message="Either 'url' or 'html' content is required">

View file

@ -744,6 +744,23 @@ try {
"itemIdMap": itemIdMap "itemIdMap": itemIdMap
}; };
// Clean up temp folder from ZIP upload if provided
tempFolder = structKeyExists(data, "tempFolder") && isSimpleValue(data.tempFolder) ? trim(data.tempFolder) : "";
if (len(tempFolder)) {
// Validate folder name is safe (alphanumeric only - UUID without dashes)
if (reFind("^[a-f0-9]{32}$", tempFolder)) {
tempFolderPath = expandPath("/temp/menu-import/" & tempFolder);
if (directoryExists(tempFolderPath)) {
try {
directoryDelete(tempFolderPath, true);
response.steps.append("Cleaned up temp folder: " & tempFolder);
} catch (any cleanupErr) {
response.steps.append("Warning: Could not delete temp folder: " & cleanupErr.message);
}
}
}
}
} catch (any e) { } catch (any e) {
response.errors.append(e.message); response.errors.append(e.message);
if (len(e.detail)) { if (len(e.detail)) {

View file

@ -68,15 +68,64 @@
<!--- Extract the ZIP file ---> <!--- Extract the ZIP file --->
<cfzip action="unzip" file="#uploadedFile#" destination="#extractDir#" overwrite="true"> <cfzip action="unzip" file="#uploadedFile#" destination="#extractDir#" overwrite="true">
<!--- Make extracted files world-readable for nginx/Playwright ---> <!--- Delete the uploaded ZIP --->
<cffile action="delete" file="#uploadedFile#">
<!--- SECURITY: Sanitize extracted files --->
<!--- Whitelist of safe file extensions for saved web pages --->
<cfset safeExtensions = "htm,html,css,js,json,txt,xml,svg,jpg,jpeg,png,gif,webp,ico,woff,woff2,ttf,eot,otf,map">
<cfset deletedCount = 0>
<!--- Recursively scan and remove unsafe files --->
<cfdirectory action="list" directory="#extractDir#" name="allFiles" recurse="true">
<cfloop query="allFiles">
<cfset filePath = "#allFiles.directory#/#allFiles.name#">
<!--- Skip directories --->
<cfif allFiles.type EQ "Dir">
<cfcontinue>
</cfif>
<!--- Delete symlinks (use shell to detect) --->
<cftry>
<cfset isSymlink = false>
<cfexecute name="test" arguments="-L '#filePath#' && echo SYMLINK" timeout="5" variable="symlinkCheck" />
<cfif findNoCase("SYMLINK", symlinkCheck)>
<cfset isSymlink = true>
</cfif>
<cfcatch>
<!--- test command failed, assume not symlink --->
</cfcatch>
</cftry>
<cfif isSymlink>
<cftry>
<cffile action="delete" file="#filePath#">
<cfset deletedCount++>
<cfcatch></cfcatch>
</cftry>
<cfcontinue>
</cfif>
<!--- Check file extension against whitelist --->
<cfset fileExt = lCase(listLast(allFiles.name, "."))>
<cfif NOT listFindNoCase(safeExtensions, fileExt)>
<cftry>
<cffile action="delete" file="#filePath#">
<cfset deletedCount++>
<cfcatch></cfcatch>
</cftry>
</cfif>
</cfloop>
<cfset response["SANITIZED_COUNT"] = deletedCount>
<!--- Make extracted files world-readable for nginx --->
<cftry> <cftry>
<cfexecute name="chmod" arguments="-R o+rX #extractDir#" timeout="10" /> <cfexecute name="chmod" arguments="-R o+rX #extractDir#" timeout="10" />
<cfcatch></cfcatch> <cfcatch></cfcatch>
</cftry> </cftry>
<!--- Delete the uploaded ZIP --->
<cffile action="delete" file="#uploadedFile#">
<!--- Find the main HTML file ---> <!--- Find the main HTML file --->
<cfset htmlFile = ""> <cfset htmlFile = "">
<cfset htmlFiles = []> <cfset htmlFiles = []>

View file

@ -1056,7 +1056,8 @@
currentStep: 1, currentStep: 1,
imageObjectUrls: [], // Store object URLs for uploaded images imageObjectUrls: [], // Store object URLs for uploaded images
imageMappings: [], // For matching uploaded images to items (from HTML import) imageMappings: [], // For matching uploaded images to items (from HTML import)
itemImages: {} // item ID -> File object for matched images itemImages: {}, // item ID -> File object for matched images
tempFolder: null // Temp folder ID from ZIP upload (for cleanup after save)
}; };
// Image preview functions // Image preview functions
@ -1435,6 +1436,11 @@
console.log('ZIP uploaded, extracted URL:', uploadResult.URL); console.log('ZIP uploaded, extracted URL:', uploadResult.URL);
// Store temp folder ID for cleanup after wizard completes
if (uploadResult.FOLDER) {
config.tempFolder = uploadResult.FOLDER;
}
// Update loading message // Update loading message
document.getElementById('conversation').innerHTML = ''; document.getElementById('conversation').innerHTML = '';
addMessage('ai', ` addMessage('ai', `
@ -2817,7 +2823,8 @@
businessId: config.businessId || 0, businessId: config.businessId || 0,
menuId: config.menuId || 0, menuId: config.menuId || 0,
userId: config.userId, userId: config.userId,
data: config.extractedData data: config.extractedData,
tempFolder: config.tempFolder
}) })
}); });