Fix address parsing: proper US format detection (Street, City, ST ZIP)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
John Mizerek 2026-03-14 18:11:41 -07:00
parent 1df69463a8
commit b48f20011d

View file

@ -124,21 +124,34 @@ try {
// Parse address into components if only full string
if (!empty($bizInfo['address']) && empty($bizInfo['addressLine1'])) {
$addr = trim(preg_replace('/,?\s*(United States|USA|US|U\.S\.A?\.)\s*$/i', '', $bizInfo['address']));
$addr = $bizInfo['address'];
$addr = preg_replace('/\n+/', ', ', $addr); // newlines to commas
if (preg_match('/\b(\d{5})(?:-\d{4})?\s*$/', $addr, $zm)) {
$bizInfo['zip'] = $zm[1];
$addr = trim(substr($addr, 0, strrpos($addr, $zm[0])));
}
if (preg_match('/\b([A-Z]{2})\s*$/i', $addr, $sm)) {
$bizInfo['state'] = strtoupper($sm[1]);
$addr = trim(substr($addr, 0, strrpos($addr, $sm[0])));
}
$addr = rtrim($addr, ', ');
if (strpos($addr, ',') !== false) {
$parts = array_map('trim', explode(',', $addr));
$bizInfo['addressLine1'] = $parts[0];
$bizInfo['city'] = $parts[count($parts) - 1]; // last part before state is city
$addr = preg_replace('/,\s*,/', ',', $addr); // collapse double commas
$addr = trim(preg_replace('/,?\s*(United States|USA|US|U\.S\.A?\.)\s*$/i', '', $addr));
// Try standard US format: Street, City, ST ZIP or Street, City, ST, ZIP
if (preg_match('/^(.+?),\s*(.+?),\s*([A-Z]{2})\s*,?\s*(\d{5}(?:-\d{4})?)?$/i', $addr, $am)) {
$bizInfo['addressLine1'] = trim($am[1]);
$bizInfo['city'] = trim($am[2]);
$bizInfo['state'] = strtoupper($am[3]);
if (!empty($am[4])) $bizInfo['zip'] = $am[4];
} else {
// Fallback: strip ZIP and state from end
if (preg_match('/\b(\d{5})(?:-\d{4})?\s*$/', $addr, $zm)) {
$bizInfo['zip'] = $zm[1];
$addr = trim(substr($addr, 0, strrpos($addr, $zm[0])));
}
$addr = rtrim($addr, ', ');
if (preg_match('/,\s*([A-Z]{2})\s*$/i', $addr, $sm)) {
$bizInfo['state'] = strtoupper($sm[1]);
$addr = trim(substr($addr, 0, strrpos($addr, ',')));
}
if (strpos($addr, ',') !== false) {
$parts = array_map('trim', explode(',', $addr));
$bizInfo['addressLine1'] = $parts[0];
$bizInfo['city'] = $parts[count($parts) - 1];
} else {
$bizInfo['addressLine1'] = $addr;
}
}
}