Revision 26 (by dpavlin, 2009/03/16 14:58:54) fix gender (since we don't mark string as utf-8 anymore)
#!/usr/bin/perl

use warnings;
use strict;

# cvs2dbi.pl
#
# 03/15/2009 05:06:53 PM CET Dobrica Pavlinusic <dpavlin@rot13.org>

use Data::Dump qw/dump/;
use File::Slurp;
use YAML qw/DumpFile/;
use Text::CSV;
use Encode qw/from_to/;

my $debug = 0;

my $path = shift @ARGV || die "usage: $0 file.csv\n";

my $csv = read_file( $path );
from_to($csv, 'utf-16', 'utf-8');

my @columns;

foreach my $line ( split(/\r?\n/, $csv) ) {

	warn "## $line\n";

	my @fields = split(/;/, $line);
	if ( ! @columns ) {
		@columns = @fields;
		warn "# columns = ",dump( @columns ) if $debug;
		next;
	}

	my $hash;

	warn "# fields = ",dump( @fields ) if $debug;

	foreach ( 0 .. $#fields ) {
		my $n = $columns[$_];
		my $v = $fields[$_];

		$v =~ s{\s*#\s*$}{};
		$v =~ s{^\s+}{};
		$v =~ s{\s+$}{};

		# fix tel fields
		$v =~ s{\s+}{#}g if $n =~ m{tel};
		$v =~ s[\xC5\xBD][F]i if $n =~ m{spol};

		if ( $v =~ m{#} ) { # subfields delimiter in CSV data
			my @v = split(/\s*#+\s*/, $v);
			foreach my $pos ( 0 .. $#v ) {
				if ( $n =~ m{tel} ) {
					if ( $v[$pos] =~ m{^09} ) {
						$hash->{ $n . '_mobile' } ||= $v[$pos];
					} else {
						$hash->{ $n . '_fixed' } ||= $v[$pos];
					}
				}
				$hash->{ $n . '_' . $pos } = $v[$pos];
			}

			$hash->{ $n } = [ @v ];
		} else {
			$hash->{ $n } = $v;
		}
	}

	warn dump( $hash ) if $debug;

	my $uuid = $fields[0];

	DumpFile( "yaml/$uuid.yaml", $hash );
}