From e7aaae58b75feda8743200cde99ec0a31c1a1f9f Mon Sep 17 00:00:00 2001 From: John Mizerek Date: Sun, 1 Mar 2026 19:18:17 -0800 Subject: [PATCH] Replace regex extraction with brace-counting for __OO_STATE__ The regex .*? (non-greedy) fails on 500K+ JSON due to Java regex backtracking limits, causing truncated data (only 3 of 6 menus extracted). Replace all 3 extraction points with cfscript brace-counting that reliably handles any JSON size. Also decode HTML entities (& -> &, < -> <, etc.) from Chrome View Source saves. Co-Authored-By: Claude Opus 4.6 --- api/setup/analyzeMenuUrl.cfm | 109 ++++++++++++++++++++++++++++++----- 1 file changed, 96 insertions(+), 13 deletions(-) diff --git a/api/setup/analyzeMenuUrl.cfm b/api/setup/analyzeMenuUrl.cfm index 37a167c..9eb5a55 100644 --- a/api/setup/analyzeMenuUrl.cfm +++ b/api/setup/analyzeMenuUrl.cfm @@ -283,12 +283,36 @@ ]+>", "", "all")> - - - - + + ooStateJson2 = ""; + ooStart2 = findNoCase("window.__OO_STATE__", ooExtractHtml2); + if (ooStart2 > 0) { + var bs2 = find("{", ooExtractHtml2, ooStart2); + if (bs2 > 0) { + var d2 = 0; var inS2 = false; var esc2 = false; var be2 = 0; + for (var i2 = bs2; i2 <= len(ooExtractHtml2); i2++) { + var c2 = mid(ooExtractHtml2, i2, 1); + if (esc2) { esc2 = false; continue; } + if (c2 == chr(92) && inS2) { esc2 = true; continue; } + if (c2 == '"') { inS2 = !inS2; continue; } + if (!inS2) { + if (c2 == "{") d2 = d2 + 1; + else if (c2 == "}") { d2 = d2 - 1; if (d2 == 0) { be2 = i2; break; } } + } + } + if (be2 > 0) ooStateJson2 = mid(ooExtractHtml2, bs2, be2 - bs2 + 1); + } + } + if (len(ooStateJson2)) { + ooStateJson2 = replace(ooStateJson2, "&", "&", "all"); + ooStateJson2 = replace(ooStateJson2, "<", "<", "all"); + ooStateJson2 = replace(ooStateJson2, ">", ">", "all"); + ooStateJson2 = replace(ooStateJson2, """, '"', "all"); + } + + - + @@ -567,10 +591,34 @@ ]+>", "", "all")> - - - - + + otherOoJson = ""; + var otherOoStart = findNoCase("window.__OO_STATE__", otherCleanHtml); + if (otherOoStart > 0) { + var obs = find("{", otherCleanHtml, otherOoStart); + if (obs > 0) { + var od = 0; var ois = false; var oesc = false; var obe = 0; + for (var oi = obs; oi <= len(otherCleanHtml); oi++) { + var oc = mid(otherCleanHtml, oi, 1); + if (oesc) { oesc = false; continue; } + if (oc == chr(92) && ois) { oesc = true; continue; } + if (oc == '"') { ois = !ois; continue; } + if (!ois) { + if (oc == "{") od = od + 1; + else if (oc == "}") { od = od - 1; if (od == 0) { obe = oi; break; } } + } + } + if (obe > 0) otherOoJson = mid(otherCleanHtml, obs, obe - obs + 1); + } + } + if (len(otherOoJson)) { + otherOoJson = replace(otherOoJson, "&", "&", "all"); + otherOoJson = replace(otherOoJson, "<", "<", "all"); + otherOoJson = replace(otherOoJson, ">", ">", "all"); + otherOoJson = replace(otherOoJson, """, '"', "all"); + } + + @@ -955,10 +1003,45 @@ ]+>", "", "all")> - - - - + + + ooStateJson = ""; + ooStartPos = findNoCase("window.__OO_STATE__", ooExtractHtml); + if (ooStartPos > 0) { + braceStart = find("{", ooExtractHtml, ooStartPos); + if (braceStart > 0) { + var depth = 0; + var inStr = false; + var esc = false; + var braceEnd = 0; + var totalLen = len(ooExtractHtml); + for (var ci = braceStart; ci <= totalLen; ci++) { + var ch = mid(ooExtractHtml, ci, 1); + if (esc) { esc = false; continue; } + if (ch == chr(92) && inStr) { esc = true; continue; } + if (ch == '"') { inStr = !inStr; continue; } + if (!inStr) { + if (ch == "{") depth = depth + 1; + else if (ch == "}") { + depth = depth - 1; + if (depth == 0) { braceEnd = ci; break; } + } + } + } + if (braceEnd > 0) { + ooStateJson = mid(ooExtractHtml, braceStart, braceEnd - braceStart + 1); + } + } + } + // Decode HTML entities from View Source (Chrome encodes & as & etc.) + if (len(ooStateJson)) { + ooStateJson = replace(ooStateJson, "&", "&", "all"); + ooStateJson = replace(ooStateJson, "<", "<", "all"); + ooStateJson = replace(ooStateJson, ">", ">", "all"); + ooStateJson = replace(ooStateJson, """, '"', "all"); + } + +