ZIP upload: add file sanitization, direct file read, and temp cleanup

- uploadSavedPage.cfm: sanitize extracted files (whitelist safe extensions, delete symlinks) to protect against malicious content from infected sites - analyzeMenuUrl.cfm: detect local temp URLs and read directly from disk, bypassing Playwright for faster processing of saved pages - saveWizard.cfm: delete temp folder immediately after wizard completes instead of waiting for 1-hour auto-cleanup - setup-wizard.html: track temp folder ID and pass to saveWizard for cleanup Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-13 08:02:41 -08:00 · 2026-02-13 08:02:41 -08:00 · 5cde8ce4fa
commit 5cde8ce4fa
parent 336aef8685
4 changed files with 119 additions and 25 deletions
--- a/api/setup/analyzeMenuUrl.cfm
+++ b/api/setup/analyzeMenuUrl.cfm
@ -55,9 +55,29 @@
            <cfset targetUrl = "https://" & targetUrl>
        </cfif>

+        <!--- Check if this is a local temp file (ZIP upload) - read directly, skip Playwright --->
+        <cfif findNoCase("/temp/menu-import/", targetUrl)>
+            <cfset localFilePath = expandPath(reReplaceNoCase(targetUrl, "https?://[^/]+(/temp/menu-import/.*)", "\1"))>
+            <cfset arrayAppend(response.steps, "Local temp file detected: " & localFilePath)>
+
+            <cfif NOT fileExists(localFilePath)>
+                <cfthrow message="Local file not found: #localFilePath#">
+            </cfif>
+
+            <cfset pageHtml = fileRead(localFilePath, "utf-8")>
+            <cfset playwrightImages = arrayNew(1)>
+            <cfset arrayAppend(response.steps, "Read " & len(pageHtml) & " bytes from local file")>
+
+            <!--- Extract base URL for resolving relative links --->
+            <cfset baseUrl = reReplace(targetUrl, "(https?://[^/]+).*", "\1")>
+            <cfset basePath = reReplace(targetUrl, "(https?://[^/]+/[^?]*/?).*", "\1")>
+            <cfif NOT reFindNoCase("/$", basePath)>
+                <cfset basePath = reReplace(basePath, "/[^/]*$", "/")>
+            </cfif>
+        <cfelse>
+            <!--- Remote URL - use Playwright for JS-rendered content --->
            <cfset arrayAppend(response.steps, "Fetching URL with Playwright: " & targetUrl)>

-        <!--- Use Playwright for JS-rendered content --->
            <cfset playwrightOutput = "">
            <cfexecute name="/opt/playwright/run.sh" arguments="'#targetUrl#' 4000" timeout="90" variable="playwrightOutput" />

@ -80,6 +100,7 @@
            <cfif NOT reFindNoCase("/$", basePath)>
                <cfset basePath = reReplace(basePath, "/[^/]*$", "/")>
            </cfif>
+        </cfif>
    <cfelse>
        <cfthrow message="Either 'url' or 'html' content is required">
    </cfif>
--- a/api/setup/saveWizard.cfm
+++ b/api/setup/saveWizard.cfm
@ -744,6 +744,23 @@ try {
        "itemIdMap": itemIdMap
    };

+    // Clean up temp folder from ZIP upload if provided
+    tempFolder = structKeyExists(data, "tempFolder") && isSimpleValue(data.tempFolder) ? trim(data.tempFolder) : "";
+    if (len(tempFolder)) {
+        // Validate folder name is safe (alphanumeric only - UUID without dashes)
+        if (reFind("^[a-f0-9]{32}$", tempFolder)) {
+            tempFolderPath = expandPath("/temp/menu-import/" & tempFolder);
+            if (directoryExists(tempFolderPath)) {
+                try {
+                    directoryDelete(tempFolderPath, true);
+                    response.steps.append("Cleaned up temp folder: " & tempFolder);
+                } catch (any cleanupErr) {
+                    response.steps.append("Warning: Could not delete temp folder: " & cleanupErr.message);
+                }
+            }
+        }
+    }
+
 } catch (any e) {
    response.errors.append(e.message);
    if (len(e.detail)) {
--- a/api/setup/uploadSavedPage.cfm
+++ b/api/setup/uploadSavedPage.cfm
@ -68,15 +68,64 @@
    <!--- Extract the ZIP file --->
    <cfzip action="unzip" file="#uploadedFile#" destination="#extractDir#" overwrite="true">

-    <!--- Make extracted files world-readable for nginx/Playwright --->
+    <!--- Delete the uploaded ZIP --->
+    <cffile action="delete" file="#uploadedFile#">
+
+    <!--- SECURITY: Sanitize extracted files --->
+    <!--- Whitelist of safe file extensions for saved web pages --->
+    <cfset safeExtensions = "htm,html,css,js,json,txt,xml,svg,jpg,jpeg,png,gif,webp,ico,woff,woff2,ttf,eot,otf,map">
+    <cfset deletedCount = 0>
+
+    <!--- Recursively scan and remove unsafe files --->
+    <cfdirectory action="list" directory="#extractDir#" name="allFiles" recurse="true">
+    <cfloop query="allFiles">
+        <cfset filePath = "#allFiles.directory#/#allFiles.name#">
+
+        <!--- Skip directories --->
+        <cfif allFiles.type EQ "Dir">
+            <cfcontinue>
+        </cfif>
+
+        <!--- Delete symlinks (use shell to detect) --->
+        <cftry>
+            <cfset isSymlink = false>
+            <cfexecute name="test" arguments="-L '#filePath#' && echo SYMLINK" timeout="5" variable="symlinkCheck" />
+            <cfif findNoCase("SYMLINK", symlinkCheck)>
+                <cfset isSymlink = true>
+            </cfif>
+        <cfcatch>
+            <!--- test command failed, assume not symlink --->
+        </cfcatch>
+        </cftry>
+
+        <cfif isSymlink>
+            <cftry>
+                <cffile action="delete" file="#filePath#">
+                <cfset deletedCount++>
+            <cfcatch></cfcatch>
+            </cftry>
+            <cfcontinue>
+        </cfif>
+
+        <!--- Check file extension against whitelist --->
+        <cfset fileExt = lCase(listLast(allFiles.name, "."))>
+        <cfif NOT listFindNoCase(safeExtensions, fileExt)>
+            <cftry>
+                <cffile action="delete" file="#filePath#">
+                <cfset deletedCount++>
+            <cfcatch></cfcatch>
+            </cftry>
+        </cfif>
+    </cfloop>
+
+    <cfset response["SANITIZED_COUNT"] = deletedCount>
+
+    <!--- Make extracted files world-readable for nginx --->
    <cftry>
        <cfexecute name="chmod" arguments="-R o+rX #extractDir#" timeout="10" />
    <cfcatch></cfcatch>
    </cftry>

-    <!--- Delete the uploaded ZIP --->
-    <cffile action="delete" file="#uploadedFile#">
-
    <!--- Find the main HTML file --->
    <cfset htmlFile = "">
    <cfset htmlFiles = []>
--- a/portal/setup-wizard.html
+++ b/portal/setup-wizard.html
@ -1056,7 +1056,8 @@
      currentStep: 1,
      imageObjectUrls: [], // Store object URLs for uploaded images
      imageMappings: [], // For matching uploaded images to items (from HTML import)
-      itemImages: {} // item ID -> File object for matched images
+      itemImages: {}, // item ID -> File object for matched images
+      tempFolder: null // Temp folder ID from ZIP upload (for cleanup after save)
    };

    // Image preview functions
@ -1435,6 +1436,11 @@

          console.log('ZIP uploaded, extracted URL:', uploadResult.URL);

+          // Store temp folder ID for cleanup after wizard completes
+          if (uploadResult.FOLDER) {
+            config.tempFolder = uploadResult.FOLDER;
+          }
+
          // Update loading message
          document.getElementById('conversation').innerHTML = '';
          addMessage('ai', `
@ -2817,7 +2823,8 @@
            businessId: config.businessId || 0,
            menuId: config.menuId || 0,
            userId: config.userId,
-            data: config.extractedData
+            data: config.extractedData,
+            tempFolder: config.tempFolder
          })
        });