#!/usr/bin/perl ###################################################################### # csv_parse.pl v0.2 - Scott Baker - 2005-04-28 # # This is some code to process CSV files. They're not quite as simple # as split(",",$string); Because some data can contain a , in the # data, not to mention that you have to double quote quotation marks. # This isn't the fastest implementation, but it works pretty decent. # The function will take either a simple string and return a list, # or it will take a list_ref of a bunch of strings and return a multi # dimensional list with the processed data. This second method may be # handy for parsing an entire CSV file as opposed to a single line # # I've been able to get this code to process 600,000 lines of CSV data # in approximately 1 minute. That's "good enough" for me :) # # Explanation: # $list_ref = &csv_data($string_data); # $list_ref = &csv_data($list_ref); # $list_ref = &csv_file_data($filename); ###################################################################### use strict; use Data::Dumper; print "Simple scalar data\n"; my $csv = 'Scott,Baker,917,87.5,"Scott Baker","Baker, Scott","""Lots""of""quotes""","Something, with, lots, of, commas,,,,"'; print Dumper(csv_data($csv)); print "List reference of multi-dimensional data\n"; my @csv; push(@csv,$csv,$csv,$csv); print Dumper(csv_data(\@csv)); print "CSV data from a file\n"; print Dumper(csv_file_data("/tmp/foo.csv")); sub csv_data { my ($i,$str,$quote,@ret); my $data = shift(); # If they sent in a list-ref, pass it off to the array parsing one if (ref($data) eq "ARRAY") { return csv_data_array($data); } my $str_len = length($data); # Loop through the string character by character while ($i < $str_len) { my $char = substr($data,$i,1); if ($char eq '"') { $quote++; } # If the character is a comma, and we're not in the middle of a string if ($char eq "," && $quote % 2 == 0) { push(@ret,process_string($str)); $str = ""; # Just tack the character on the end until we get to that comma } else { $str .= $char; } $i++; } # Add whatever's left of the string (there is no trailing ,) push(@ret,process_string($str)); return \@ret; } sub process_string { my $str = shift(); # Remove leading and trailing quotes from the line $str =~ s/^"(.*)"$/$1/g; # CSV makes quotes "" so return it as intended $str =~ s/""/"/g; # If the last char is a \n nuke it $str =~ s/\n$//g; return $str; } # Used to parse an array of strings to a listref sub csv_data_array { my $array = shift(); my @ret; foreach my $item(@$array) { push(@ret,&csv_data($item)); } return \@ret; } # Read a CSV file directly in to a listref sub csv_file_data { my $file = shift(); if (!-r $file) { die("Can't open file '$file'\n"); } open (FILE, $file); my @file = ; close FILE; my $info = &csv_data_array(\@file); return $info; } # Note: I friggin' love the mod (%) operator!!!