This repository has been archived on 2026-03-21. You can view files and clone it, but cannot push or open issues or pull requests.
payfrit-biz/api/setup/uploadSavedPage.cfm
John Mizerek 5cde8ce4fa ZIP upload: add file sanitization, direct file read, and temp cleanup
- uploadSavedPage.cfm: sanitize extracted files (whitelist safe extensions,
  delete symlinks) to protect against malicious content from infected sites
- analyzeMenuUrl.cfm: detect local temp URLs and read directly from disk,
  bypassing Playwright for faster processing of saved pages
- saveWizard.cfm: delete temp folder immediately after wizard completes
  instead of waiting for 1-hour auto-cleanup
- setup-wizard.html: track temp folder ID and pass to saveWizard for cleanup

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-13 08:02:41 -08:00

225 lines
8.3 KiB
Text

<cfsetting showdebugoutput="false">
<cfsetting enablecfoutputonly="true">
<cfcontent type="application/json; charset=utf-8" reset="true">
<cfheader name="Cache-Control" value="no-store">
<cfset response = structNew()>
<cfset response["OK"] = false>
<cfset response["MESSAGE"] = "">
<cfset response["URL"] = "">
<cftry>
<!--- Temp directory for extracted saved pages --->
<cfset tempBaseDir = expandPath("/temp/menu-import")>
<!--- Create temp directory if it doesn't exist --->
<cfif NOT directoryExists(tempBaseDir)>
<cfdirectory action="create" directory="#tempBaseDir#" mode="755">
</cfif>
<!--- Cleanup: delete folders older than 1 hour --->
<cftry>
<cfdirectory action="list" directory="#tempBaseDir#" name="oldFolders" type="dir">
<cfset oneHourAgo = dateAdd("h", -1, now())>
<cfloop query="oldFolders">
<cfif oldFolders.dateLastModified LT oneHourAgo AND oldFolders.name NEQ "." AND oldFolders.name NEQ "..">
<cftry>
<cfdirectory action="delete" directory="#tempBaseDir#/#oldFolders.name#" recurse="true">
<cfcatch></cfcatch>
</cftry>
</cfif>
</cfloop>
<cfcatch></cfcatch>
</cftry>
<!--- Check if ZIP file was uploaded --->
<cfif NOT structKeyExists(form, "zipFile") OR form.zipFile EQ "">
<cfset response["MESSAGE"] = "No ZIP file uploaded">
<cfoutput>#serializeJSON(response)#</cfoutput>
<cfabort>
</cfif>
<!--- Generate unique folder name --->
<cfset uniqueId = lCase(replace(createUUID(), "-", "", "all"))>
<cfset extractDir = "#tempBaseDir#/#uniqueId#">
<!--- Upload the ZIP file --->
<cffile action="upload"
filefield="zipFile"
destination="#tempBaseDir#/"
nameconflict="makeunique"
mode="644"
result="uploadResult">
<!--- Validate it's a ZIP file --->
<cfset uploadedFile = "#tempBaseDir#/#uploadResult.serverFile#">
<cfset fileExt = lCase(uploadResult.clientFileExt)>
<cfif fileExt NEQ "zip">
<cffile action="delete" file="#uploadedFile#">
<cfset response["MESSAGE"] = "Only ZIP files are accepted">
<cfoutput>#serializeJSON(response)#</cfoutput>
<cfabort>
</cfif>
<!--- Create extraction directory --->
<cfdirectory action="create" directory="#extractDir#" mode="755">
<!--- Extract the ZIP file --->
<cfzip action="unzip" file="#uploadedFile#" destination="#extractDir#" overwrite="true">
<!--- Delete the uploaded ZIP --->
<cffile action="delete" file="#uploadedFile#">
<!--- SECURITY: Sanitize extracted files --->
<!--- Whitelist of safe file extensions for saved web pages --->
<cfset safeExtensions = "htm,html,css,js,json,txt,xml,svg,jpg,jpeg,png,gif,webp,ico,woff,woff2,ttf,eot,otf,map">
<cfset deletedCount = 0>
<!--- Recursively scan and remove unsafe files --->
<cfdirectory action="list" directory="#extractDir#" name="allFiles" recurse="true">
<cfloop query="allFiles">
<cfset filePath = "#allFiles.directory#/#allFiles.name#">
<!--- Skip directories --->
<cfif allFiles.type EQ "Dir">
<cfcontinue>
</cfif>
<!--- Delete symlinks (use shell to detect) --->
<cftry>
<cfset isSymlink = false>
<cfexecute name="test" arguments="-L '#filePath#' && echo SYMLINK" timeout="5" variable="symlinkCheck" />
<cfif findNoCase("SYMLINK", symlinkCheck)>
<cfset isSymlink = true>
</cfif>
<cfcatch>
<!--- test command failed, assume not symlink --->
</cfcatch>
</cftry>
<cfif isSymlink>
<cftry>
<cffile action="delete" file="#filePath#">
<cfset deletedCount++>
<cfcatch></cfcatch>
</cftry>
<cfcontinue>
</cfif>
<!--- Check file extension against whitelist --->
<cfset fileExt = lCase(listLast(allFiles.name, "."))>
<cfif NOT listFindNoCase(safeExtensions, fileExt)>
<cftry>
<cffile action="delete" file="#filePath#">
<cfset deletedCount++>
<cfcatch></cfcatch>
</cftry>
</cfif>
</cfloop>
<cfset response["SANITIZED_COUNT"] = deletedCount>
<!--- Make extracted files world-readable for nginx --->
<cftry>
<cfexecute name="chmod" arguments="-R o+rX #extractDir#" timeout="10" />
<cfcatch></cfcatch>
</cftry>
<!--- Find the main HTML file --->
<cfset htmlFile = "">
<cfset htmlFiles = []>
<!--- First, look for HTML files directly in the extract directory --->
<cfdirectory action="list" directory="#extractDir#" name="topFiles" filter="*.htm*" type="file">
<cfloop query="topFiles">
<cfset arrayAppend(htmlFiles, { "name": topFiles.name, "path": "#extractDir#/#topFiles.name#", "depth": 0 })>
</cfloop>
<!--- Also check one level deep (common for "Save Page As" which creates folder_files alongside .html) --->
<cfdirectory action="list" directory="#extractDir#" name="subDirs" type="dir">
<cfloop query="subDirs">
<cfif subDirs.name NEQ "." AND subDirs.name NEQ "..">
<cfset subDirPath = "#extractDir#/#subDirs.name#">
<cfdirectory action="list" directory="#subDirPath#" name="subFiles" filter="*.htm*" type="file">
<cfloop query="subFiles">
<cfset arrayAppend(htmlFiles, { "name": subFiles.name, "path": "#subDirPath#/#subFiles.name#", "depth": 1 })>
</cfloop>
</cfif>
</cfloop>
<!--- Find the best HTML file (prefer index.html, then top-level, then by size) --->
<cfif arrayLen(htmlFiles) EQ 0>
<!--- Clean up and error --->
<cfdirectory action="delete" directory="#extractDir#" recurse="true">
<cfset response["MESSAGE"] = "No HTML files found in ZIP">
<cfoutput>#serializeJSON(response)#</cfoutput>
<cfabort>
</cfif>
<!--- Priority: index.html at top level, then any index.html, then top-level html, then first found --->
<cfset foundFile = false>
<cfset htmlFile = {}>
<cfloop array="#htmlFiles#" index="hf">
<cfif lCase(hf.name) EQ "index.html" AND hf.depth EQ 0>
<cfset htmlFile = hf>
<cfset foundFile = true>
<cfbreak>
</cfif>
</cfloop>
<cfif NOT foundFile>
<cfloop array="#htmlFiles#" index="hf">
<cfif lCase(hf.name) EQ "index.html">
<cfset htmlFile = hf>
<cfset foundFile = true>
<cfbreak>
</cfif>
</cfloop>
</cfif>
<cfif NOT foundFile>
<cfloop array="#htmlFiles#" index="hf">
<cfif hf.depth EQ 0>
<cfset htmlFile = hf>
<cfset foundFile = true>
<cfbreak>
</cfif>
</cfloop>
</cfif>
<cfif NOT foundFile>
<cfset htmlFile = htmlFiles[1]>
</cfif>
<!--- Build the URL path --->
<cfset relativePath = replace(htmlFile.path, extractDir, "")>
<cfset relativePath = replace(relativePath, "\", "/", "all")>
<cfif left(relativePath, 1) NEQ "/">
<cfset relativePath = "/" & relativePath>
</cfif>
<!--- Determine the server hostname for the URL --->
<cfset serverHost = cgi.HTTP_HOST>
<!--- Check X-Forwarded-Proto for reverse proxy, fall back to cgi.HTTPS --->
<cfset forwardedProto = structKeyExists(getHttpRequestData().headers, "X-Forwarded-Proto") ? getHttpRequestData().headers["X-Forwarded-Proto"] : "">
<cfset protocol = (forwardedProto EQ "https" OR cgi.HTTPS EQ "on") ? "https" : "http">
<cfset response["OK"] = true>
<cfset response["MESSAGE"] = "ZIP extracted successfully">
<cfset response["URL"] = "#protocol#://#serverHost#/temp/menu-import/#uniqueId##relativePath#">
<cfset response["FOLDER"] = uniqueId>
<cfset response["FILE"] = htmlFile.name>
<cfset response["FILE_COUNT"] = arrayLen(htmlFiles)>
<cfoutput>#serializeJSON(response)#</cfoutput>
<cfcatch type="any">
<cfset response["OK"] = false>
<cfset response["MESSAGE"] = "Error: #cfcatch.message#">
<cfif len(cfcatch.detail)>
<cfset response["MESSAGE"] = response["MESSAGE"] & " - #cfcatch.detail#">
</cfif>
<cfoutput>#serializeJSON(response)#</cfoutput>
</cfcatch>
</cftry>