payfrit-works/cfpayment/api/utils/csvutils.cfc

547 lines
No EOL
13 KiB
Text

<!---
$Id$
Copyright 2008 Ben Nadel / Kinky Solutions (http://)www.bennadel.com)
Mark Mazelin (http://www.mkville.com/)
Licensed under the Apache License, Version 2.0 (the "License"); you
may not use this file except in compliance with the License. You may
obtain a copy of the License at:
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
--->
<cfcomponent output="false">
<--- --------------------------------------------------------------------------------------- ----
Blog Entry:
Parsing CSV Values In To A ColdFusion Query
Code Snippet:
1
Author:
Ben Nadel / Kinky Solutions
Link:
http://www.bennadel.com/index.cfm?dax=blog:501.view
Date Posted:
Jan 30, 2007 at 3:42 PM
Modified 18-NOV-2008-MBM: to include CSVToArray() function to perform conversion to array
---- --------------------------------------------------------------------------------------- --->
<cffunction
name="CSVToQuery"
access="public"
returntype="query"
output="false"
hint="Converts the given CSV string to a query.">
<!--- Define arguments. --->
<cfargument
name="CSV"
type="string"
required="true"
hint="This is the CSV string that will be manipulated."
/>
<cfargument
name="Delimiter"
type="string"
required="false"
default=","
hint="This is the delimiter that will separate the fields within the CSV value."
/>
<cfargument
name="Qualifier"
type="string"
required="false"
default=""""
hint="This is the qualifier that will wrap around fields that have special characters embeded."
/>
<cfargument
name="Trim"
type="boolean"
required="false"
default="false"
hint=""
/>
<!--- 12-JUL-2007-MBM: ADDED 1 ARGUMENT --->
<cfargument
name="FirstRowColumnNames"
type="boolean"
required="false"
default="false"
hint=""
/>
<!--- 12-JUL-2007-MBM: ADDED 1 ARGUMENT --->
<cfargument
name="TrimData"
type="boolean"
required="false"
default="false"
hint=""
/>
<!--- Define the local scope. --->
<cfset var LOCAL = StructNew() />
<cfset LOCAL.Rows=CSVToArray(argumentCollection=arguments)>
<!---
ASSERT: At this point, we have parsed the CSV into an
array of arrays (LOCAL.Rows). Now, we can take that
array of arrays and convert it into a query.
--->
<!---
To create a query that fits this array of arrays, we
need to figure out the max length for each row as
well as the number of records.
The number of records is easy - it's the length of the
array. The max field count per row is not that easy. We
will have to iterate over each row to find the max.
However, this works to our advantage as we can use that
array iteration as an opportunity to build up a single
array of empty string that we will use to pre-populate
the query.
--->
<!--- Set the initial max field count. --->
<cfset LOCAL.MaxFieldCount = 0 />
<!---
Set up the array of empty values. As we iterate over
the rows, we are going to add an empty value to this
for each record (not field) that we find.
--->
<cfset LOCAL.EmptyArray = ArrayNew( 1 ) />
<!--- Loop over the records array. --->
<cfloop
index="LOCAL.RowIndex"
from="1"
to="#ArrayLen( LOCAL.Rows )#"
step="1">
<!--- Get the max rows encountered so far. --->
<cfset LOCAL.MaxFieldCount = Max(
LOCAL.MaxFieldCount,
ArrayLen(
LOCAL.Rows[ LOCAL.RowIndex ]
)
) />
<!--- Add an empty value to the empty array. --->
<cfset ArrayAppend(
LOCAL.EmptyArray,
""
) />
</cfloop>
<!---
ASSERT: At this point, LOCAL.MaxFieldCount should hold
the number of fields in the widest row. Additionally,
the LOCAL.EmptyArray should have the same number of
indexes as the row array - each index containing an
empty string.
--->
<!---
Now, let's pre-populate the query with empty strings. We
are going to create the query as all VARCHAR data
fields, starting off with blank. Then we will override
these values shortly.
--->
<cfset LOCAL.Query = QueryNew( "" ) />
<!---
Loop over the max number of fields and create a column
for each records.
--->
<!--- 12-JUL-2007-MBM: ADDED 2 LINES --->
<cfset LOCAL.ColumnArray=ArrayNew(1)>
<cfset LOCAL.CurrColumnName="">
<!--- <cfdump var="#local.rows#"><cfabort> --->
<cfloop
index="LOCAL.FieldIndex"
from="1"
to="#LOCAL.MaxFieldCount#"
step="1">
<!---
Add a new query column. By using QueryAddColumn()
rather than QueryAddRow() we are able to leverage
ColdFusion's ability to add row values in bulk
based on an array of values. Since we are going to
pre-populate the query with empty values, we can
just send in the EmptyArray we built previously.
--->
<!--- 12-JUL-2007-MBM: ADDED 10 LINES --->
<cfif arguments.FirstRowColumnNames>
<!--- get column name --->
<cfset LOCAL.CurrColumnName=trim(LOCAL.Rows[1][LOCAL.FieldIndex])>
<!--- replace spaces --->
<cfset LOCAL.CurrColumnName=Replace(LOCAL.CurrColumnName, " ", "_", "ALL")>
<!--- replace pound signs --->
<cfset LOCAL.CurrColumnName=Replace(LOCAL.CurrColumnName, "##", "NUM", "ALL")>
<cfelse>
<cfset LOCAL.CurrColumnName="COLUMN_#LOCAL.FieldIndex#">
</cfif>
<cfset ArrayAppend(LOCAL.ColumnArray, LOCAL.CurrColumnName)>
<!--- 12-JUL-2007-MBM: MODIFIED 1 LINE --->
<cfset QueryAddColumn(
LOCAL.Query,
LOCAL.CurrColumnName,
"CF_SQL_VARCHAR",
LOCAL.EmptyArray
) />
</cfloop>
<!---
ASSERT: At this point, our return query LOCAL.Query
contains enough columns and rows to handle all the
data that we have stored in our array of arrays.
--->
<!---
Loop over the array to populate the query with
actual data. We are going to have to loop over
each row and then each field.
--->
<cfloop
index="LOCAL.RowIndex"
from="1"
to="#ArrayLen( LOCAL.Rows )#"
step="1">
<!--- Loop over the fields in this record. --->
<cfloop
index="LOCAL.FieldIndex"
from="1"
to="#ArrayLen( LOCAL.Rows[ LOCAL.RowIndex ] )#"
step="1">
<!---
Update the query cell. Remember to cast string
to make sure that the underlying Java data
works properly.
--->
<!--- 12-JUL-2007-MBM: MODIFIED 1 LINE --->
<cfif arguments.trimData>
<cfset LOCAL.Query[ LOCAL.ColumnArray[LOCAL.FieldIndex] ][ LOCAL.RowIndex ] = JavaCast(
"string",
trim(LOCAL.Rows[ LOCAL.RowIndex ][ LOCAL.FieldIndex ])
) />
<cfelse>
<cfset LOCAL.Query[ LOCAL.ColumnArray[LOCAL.FieldIndex] ][ LOCAL.RowIndex ] = JavaCast(
"string",
LOCAL.Rows[ LOCAL.RowIndex ][ LOCAL.FieldIndex ]
) />
</cfif>
</cfloop>
</cfloop>
<!--- 12-JUL-2007-MBM: ADDED 5 LINES --->
<!--- remove the first row if it contains our column names --->
<cfif arguments.FirstRowColumnNames>
<!--- NOTE: This is undocumented ColdFusion and may break in future versions --->
<cfset LOCAL.Query.RemoveRows(0,1)>
</cfif>
<!---
Our query has been successfully populated.
Now, return it.
--->
<cfreturn LOCAL.Query />
</cffunction>
<--- --------------------------------------------------------------------------------------- ----
Blog Entry:
CSVToArray() ColdFusion UDF For Parsing CSV Data / Files
Author:
Ben Nadel / Kinky Solutions
Link:
http://www.bennadel.com/index.cfm?dax=blog:991.view
Date Posted:
Oct 12, 2007 at 8:59 AM
---- --------------------------------------------------------------------------------------- --->
<cffunction
name="CSVToArray"
access="public"
returntype="array"
output="false"
hint="Takes a CSV file or CSV data value and converts it to an array of arrays based on the given field delimiter. Line delimiter is assumed to be new line / carriage return related.">
<!--- Define arguments. --->
<cfargument
name="File"
type="string"
required="false"
default=""
hint="The optional file containing the CSV data."
/>
<cfargument
name="CSV"
type="string"
required="false"
default=""
hint="The CSV text data (if the file was not used)."
/>
<cfargument
name="Delimiter"
type="string"
required="false"
default=","
hint="The data field delimiter."
/>
<cfargument
name="Trim"
type="boolean"
required="false"
default="true"
hint="Flags whether or not to trim the END of the file for line breaks and carriage returns."
/>
<!--- Define the local scope. --->
<cfset var LOCAL = StructNew() />
<!---
Check to see if we are using a CSV File. If so,
then all we want to do is move the file data into
the CSV variable. That way, the rest of the algorithm
can be uniform.
--->
<cfif Len( ARGUMENTS.File )>
<!--- Read the file into Data. --->
<cffile
action="read"
file="#ARGUMENTS.File#"
variable="ARGUMENTS.CSV"
/>
</cfif>
<!---
ASSERT: At this point, no matter how the data was
passed in, we now have it in the CSV variable.
--->
<!---
Check to see if we need to trim the data. Be default,
we are going to pull off any new line and carraige
returns that are at the end of the file (we do NOT want
to strip spaces or tabs).
--->
<cfif ARGUMENTS.Trim>
<!--- Remove trailing returns. --->
<cfset ARGUMENTS.CSV = REReplace(
ARGUMENTS.CSV,
"[\r\n]+$",
"",
"ALL"
) />
</cfif>
<!--- Make sure the delimiter is just one character. --->
<cfif (Len( ARGUMENTS.Delimiter ) NEQ 1)>
<!--- Set the default delimiter value. --->
<cfset ARGUMENTS.Delimiter = "," />
</cfif>
<!---
Create a compiled Java regular expression pattern object
for the experssion that will be needed to parse the
CSV tokens including the field values as well as any
delimiters along the way.
--->
<cfset LOCAL.Pattern = CreateObject(
"java",
"java.util.regex.Pattern"
).Compile(
JavaCast(
"string",
<!--- Delimiter. --->
"\G(\#ARGUMENTS.Delimiter#|\r?\n|\r|^)" &
<!--- Quoted field value. --->
"(?:""([^""]*+(?>""""[^""]*+)*)""|" &
<!--- Standard field value --->
"([^""\#ARGUMENTS.Delimiter#\r\n]*+))"
)
)
/>
<!---
Get the pattern matcher for our target text (the
CSV data). This will allows us to iterate over all the
tokens in the CSV data for individual evaluation.
--->
<cfset LOCAL.Matcher = LOCAL.Pattern.Matcher(
JavaCast( "string", ARGUMENTS.CSV )
) />
<!---
Create an array to hold the CSV data. We are going
to create an array of arrays in which each nested
array represents a row in the CSV data file.
--->
<cfset LOCAL.Data = ArrayNew( 1 ) />
<!--- Start off with a new array for the new data. --->
<cfset ArrayAppend( LOCAL.Data, ArrayNew( 1 ) ) />
<!---
Here's where the magic is taking place; we are going
to use the Java pattern matcher to iterate over each
of the CSV data fields using the regular expression
we defined above.
Each match will have at least the field value and
possibly an optional trailing delimiter.
--->
<cfloop condition="LOCAL.Matcher.Find()">
<!---
Get the delimiter. We know that the delimiter will
always be matched, but in the case that it matched
the START expression, it will not have a length.
--->
<cfset LOCAL.Delimiter = LOCAL.Matcher.Group(
JavaCast( "int", 1 )
) />
<!---
Check for delimiter length and is not the field
delimiter. This is the only time we ever need to
perform an action (adding a new line array). We
need to check the length because it might be the
START STRING match which is empty.
--->
<cfif (
Len( LOCAL.Delimiter ) AND
(LOCAL.Delimiter NEQ ARGUMENTS.Delimiter)
)>
<!--- Start new row data array. --->
<cfset ArrayAppend(
LOCAL.Data,
ArrayNew( 1 )
) />
</cfif>
<!---
Get the field token value in group 2 (which may
not exist if the field value was not qualified.
--->
<cfset LOCAL.Value = LOCAL.Matcher.Group(
JavaCast( "int", 2 )
) />
<!---
Check to see if the value exists. If it doesn't
exist, then we want the non-qualified field. If
it does exist, then we want to replace any escaped
embedded quotes.
--->
<cfif StructKeyExists( LOCAL, "Value" )>
<!---
Replace escpaed quotes with an unescaped double
quote. No need to perform regex for this.
--->
<cfset LOCAL.Value = Replace(
LOCAL.Value,
"""""",
"""",
"all"
) />
<cfelse>
<!---
No qualified field value was found, so use group
3 - the non-qualified alternative.
--->
<cfset LOCAL.Value = LOCAL.Matcher.Group(
JavaCast( "int", 3 )
) />
</cfif>
<!--- Add the field value to the row array. --->
<cfset ArrayAppend(
LOCAL.Data[ ArrayLen( LOCAL.Data ) ],
LOCAL.Value
) />
</cfloop>
<!---
At this point, our array should contain the parsed
contents of the CSV value. Return the array.
--->
<cfreturn LOCAL.Data />
</cffunction>
</cfcomponent>