ZIP upload: add file sanitization, direct file read, and temp cleanup
- uploadSavedPage.cfm: sanitize extracted files (whitelist safe extensions, delete symlinks) to protect against malicious content from infected sites - analyzeMenuUrl.cfm: detect local temp URLs and read directly from disk, bypassing Playwright for faster processing of saved pages - saveWizard.cfm: delete temp folder immediately after wizard completes instead of waiting for 1-hour auto-cleanup - setup-wizard.html: track temp folder ID and pass to saveWizard for cleanup Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
336aef8685
commit
5cde8ce4fa
4 changed files with 119 additions and 25 deletions
|
|
@ -55,9 +55,29 @@
|
|||
<cfset targetUrl = "https://" & targetUrl>
|
||||
</cfif>
|
||||
|
||||
<!--- Check if this is a local temp file (ZIP upload) - read directly, skip Playwright --->
|
||||
<cfif findNoCase("/temp/menu-import/", targetUrl)>
|
||||
<cfset localFilePath = expandPath(reReplaceNoCase(targetUrl, "https?://[^/]+(/temp/menu-import/.*)", "\1"))>
|
||||
<cfset arrayAppend(response.steps, "Local temp file detected: " & localFilePath)>
|
||||
|
||||
<cfif NOT fileExists(localFilePath)>
|
||||
<cfthrow message="Local file not found: #localFilePath#">
|
||||
</cfif>
|
||||
|
||||
<cfset pageHtml = fileRead(localFilePath, "utf-8")>
|
||||
<cfset playwrightImages = arrayNew(1)>
|
||||
<cfset arrayAppend(response.steps, "Read " & len(pageHtml) & " bytes from local file")>
|
||||
|
||||
<!--- Extract base URL for resolving relative links --->
|
||||
<cfset baseUrl = reReplace(targetUrl, "(https?://[^/]+).*", "\1")>
|
||||
<cfset basePath = reReplace(targetUrl, "(https?://[^/]+/[^?]*/?).*", "\1")>
|
||||
<cfif NOT reFindNoCase("/$", basePath)>
|
||||
<cfset basePath = reReplace(basePath, "/[^/]*$", "/")>
|
||||
</cfif>
|
||||
<cfelse>
|
||||
<!--- Remote URL - use Playwright for JS-rendered content --->
|
||||
<cfset arrayAppend(response.steps, "Fetching URL with Playwright: " & targetUrl)>
|
||||
|
||||
<!--- Use Playwright for JS-rendered content --->
|
||||
<cfset playwrightOutput = "">
|
||||
<cfexecute name="/opt/playwright/run.sh" arguments="'#targetUrl#' 4000" timeout="90" variable="playwrightOutput" />
|
||||
|
||||
|
|
@ -80,6 +100,7 @@
|
|||
<cfif NOT reFindNoCase("/$", basePath)>
|
||||
<cfset basePath = reReplace(basePath, "/[^/]*$", "/")>
|
||||
</cfif>
|
||||
</cfif>
|
||||
<cfelse>
|
||||
<cfthrow message="Either 'url' or 'html' content is required">
|
||||
</cfif>
|
||||
|
|
|
|||
|
|
@ -744,6 +744,23 @@ try {
|
|||
"itemIdMap": itemIdMap
|
||||
};
|
||||
|
||||
// Clean up temp folder from ZIP upload if provided
|
||||
tempFolder = structKeyExists(data, "tempFolder") && isSimpleValue(data.tempFolder) ? trim(data.tempFolder) : "";
|
||||
if (len(tempFolder)) {
|
||||
// Validate folder name is safe (alphanumeric only - UUID without dashes)
|
||||
if (reFind("^[a-f0-9]{32}$", tempFolder)) {
|
||||
tempFolderPath = expandPath("/temp/menu-import/" & tempFolder);
|
||||
if (directoryExists(tempFolderPath)) {
|
||||
try {
|
||||
directoryDelete(tempFolderPath, true);
|
||||
response.steps.append("Cleaned up temp folder: " & tempFolder);
|
||||
} catch (any cleanupErr) {
|
||||
response.steps.append("Warning: Could not delete temp folder: " & cleanupErr.message);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} catch (any e) {
|
||||
response.errors.append(e.message);
|
||||
if (len(e.detail)) {
|
||||
|
|
|
|||
|
|
@ -68,15 +68,64 @@
|
|||
<!--- Extract the ZIP file --->
|
||||
<cfzip action="unzip" file="#uploadedFile#" destination="#extractDir#" overwrite="true">
|
||||
|
||||
<!--- Make extracted files world-readable for nginx/Playwright --->
|
||||
<!--- Delete the uploaded ZIP --->
|
||||
<cffile action="delete" file="#uploadedFile#">
|
||||
|
||||
<!--- SECURITY: Sanitize extracted files --->
|
||||
<!--- Whitelist of safe file extensions for saved web pages --->
|
||||
<cfset safeExtensions = "htm,html,css,js,json,txt,xml,svg,jpg,jpeg,png,gif,webp,ico,woff,woff2,ttf,eot,otf,map">
|
||||
<cfset deletedCount = 0>
|
||||
|
||||
<!--- Recursively scan and remove unsafe files --->
|
||||
<cfdirectory action="list" directory="#extractDir#" name="allFiles" recurse="true">
|
||||
<cfloop query="allFiles">
|
||||
<cfset filePath = "#allFiles.directory#/#allFiles.name#">
|
||||
|
||||
<!--- Skip directories --->
|
||||
<cfif allFiles.type EQ "Dir">
|
||||
<cfcontinue>
|
||||
</cfif>
|
||||
|
||||
<!--- Delete symlinks (use shell to detect) --->
|
||||
<cftry>
|
||||
<cfset isSymlink = false>
|
||||
<cfexecute name="test" arguments="-L '#filePath#' && echo SYMLINK" timeout="5" variable="symlinkCheck" />
|
||||
<cfif findNoCase("SYMLINK", symlinkCheck)>
|
||||
<cfset isSymlink = true>
|
||||
</cfif>
|
||||
<cfcatch>
|
||||
<!--- test command failed, assume not symlink --->
|
||||
</cfcatch>
|
||||
</cftry>
|
||||
|
||||
<cfif isSymlink>
|
||||
<cftry>
|
||||
<cffile action="delete" file="#filePath#">
|
||||
<cfset deletedCount++>
|
||||
<cfcatch></cfcatch>
|
||||
</cftry>
|
||||
<cfcontinue>
|
||||
</cfif>
|
||||
|
||||
<!--- Check file extension against whitelist --->
|
||||
<cfset fileExt = lCase(listLast(allFiles.name, "."))>
|
||||
<cfif NOT listFindNoCase(safeExtensions, fileExt)>
|
||||
<cftry>
|
||||
<cffile action="delete" file="#filePath#">
|
||||
<cfset deletedCount++>
|
||||
<cfcatch></cfcatch>
|
||||
</cftry>
|
||||
</cfif>
|
||||
</cfloop>
|
||||
|
||||
<cfset response["SANITIZED_COUNT"] = deletedCount>
|
||||
|
||||
<!--- Make extracted files world-readable for nginx --->
|
||||
<cftry>
|
||||
<cfexecute name="chmod" arguments="-R o+rX #extractDir#" timeout="10" />
|
||||
<cfcatch></cfcatch>
|
||||
</cftry>
|
||||
|
||||
<!--- Delete the uploaded ZIP --->
|
||||
<cffile action="delete" file="#uploadedFile#">
|
||||
|
||||
<!--- Find the main HTML file --->
|
||||
<cfset htmlFile = "">
|
||||
<cfset htmlFiles = []>
|
||||
|
|
|
|||
|
|
@ -1056,7 +1056,8 @@
|
|||
currentStep: 1,
|
||||
imageObjectUrls: [], // Store object URLs for uploaded images
|
||||
imageMappings: [], // For matching uploaded images to items (from HTML import)
|
||||
itemImages: {} // item ID -> File object for matched images
|
||||
itemImages: {}, // item ID -> File object for matched images
|
||||
tempFolder: null // Temp folder ID from ZIP upload (for cleanup after save)
|
||||
};
|
||||
|
||||
// Image preview functions
|
||||
|
|
@ -1435,6 +1436,11 @@
|
|||
|
||||
console.log('ZIP uploaded, extracted URL:', uploadResult.URL);
|
||||
|
||||
// Store temp folder ID for cleanup after wizard completes
|
||||
if (uploadResult.FOLDER) {
|
||||
config.tempFolder = uploadResult.FOLDER;
|
||||
}
|
||||
|
||||
// Update loading message
|
||||
document.getElementById('conversation').innerHTML = '';
|
||||
addMessage('ai', `
|
||||
|
|
@ -2817,7 +2823,8 @@
|
|||
businessId: config.businessId || 0,
|
||||
menuId: config.menuId || 0,
|
||||
userId: config.userId,
|
||||
data: config.extractedData
|
||||
data: config.extractedData,
|
||||
tempFolder: config.tempFolder
|
||||
})
|
||||
});
|
||||
|
||||
|
|
|
|||
Reference in a new issue