/[Frey]/trunk/lib/DBD/RAM.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/lib/DBD/RAM.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 334 - (show annotations)
Sat Nov 8 22:15:15 2008 UTC (15 years, 5 months ago) by dpavlin
File size: 78393 byte(s)
import DBD::RAM for cleanup (and rename) patched with
http://rt.cpan.org/Public/Bug/Display.html?id=33882

We could and probably will use DBD::AnyData as source for
data. However, at this stage, it makes much more sense to
first support SQL queries over Frey objects and DBD::RAM seems like smaller
codebase to start with.
1 #########################################################################
2 #
3 # DBD::RAM - a DBI driver for files and data structures
4 #
5 # This module is copyright (c), 2000 by Jeff Zucker
6 # All rights reserved.
7 #
8 # This is free software. You may distribute it under
9 # the same terms as Perl itself as specified in the
10 # Perl README file.
11 #
12 # WARNING: no warranty of any kind is implied.
13 #
14 # To learn more: enter "perldoc DBD::RAM" at the command prompt,
15 # or search in this file for =head1 and read the text below it
16 #
17 #########################################################################
18
19 package DBD::RAM;
20
21 use strict;
22 require DBD::File;
23 require SQL::Statement;
24 require SQL::Eval;
25 use IO::File;
26
27 use vars qw($VERSION $err $errstr $sqlstate $drh $ramdata);
28
29 use base qw(DBD::File);
30
31 $VERSION = '0.07';
32
33 $err = 0; # holds error code for DBI::err
34 $errstr = ""; # holds error string for DBI::errstr
35 $sqlstate = ""; # holds SQL state for DBI::state
36 $drh = undef; # holds driver handle once initialized
37
38 #sub driver {
39 # return $drh if $drh; # already created - return same one
40 # my($class, $attr) = @_;
41 # $class .= "::dr";
42 # $drh = DBI::_new_drh($class, {
43 # 'Name' => 'RAM',
44 # 'Version' => $VERSION,
45 # 'Err' => \$DBD::RAM::err,
46 # 'Errstr' => \$DBD::RAM::errstr,
47 # 'State' => \$DBD::RAM::sqlstate,
48 # 'Attribution' => 'DBD::RAM by Jeff Zucker',
49 # });
50 # return $drh;
51 #}
52
53 package DBD::RAM::dr; # ====== DRIVER ======
54
55 $DBD::RAM::dr::imp_data_size = 0;
56
57 use base qw(DBD::File::dr);
58
59 sub connect {
60 my($drh, $dbname, $user, $auth, $attr)= @_;
61 my $dbh = DBI::_new_dbh($drh, {
62 Name => $dbname,
63 USER => $user,
64 CURRENT_USER => $user,
65 });
66 # PARSE EXTRA STRINGS IN DSN HERE
67 # Process attributes from the DSN; we assume ODBC syntax
68 # here, that is, the DSN looks like var1=val1;...;varN=valN
69 my $var;
70 foreach $var (split(/;/, $dbname)) {
71 if ($var =~ /(.*?)=(.*)/) {
72 my $key = $1;
73 my $val = $2;
74 $dbh->STORE($key, $val);
75 }
76 }
77 $dbh->STORE('f_dir','./') if !$dbh->{f_dir};
78 # use Data::Dumper; die Dumper $DBD::RAM::ramdata if $DBD::RAM::ramdata;
79 $dbh;
80 }
81
82 sub data_sources {}
83
84 sub disconnect_all{ $DBD::RAM::ramdata = {};}
85
86 sub DESTROY { $DBD::RAM::ramdata = {};}
87
88
89 package DBD::RAM::db; # ====== DATABASE ======
90
91 $DBD::RAM::db::imp_data_size = 0;
92
93 use base qw(DBD::File::db);
94
95 sub disconnect{ $DBD::RAM::ramdata = {};}
96
97 # DRIVER PRIVATE METHODS
98
99 sub clear {
100 my $dbh = shift;
101 my $tname = shift;
102 my $r = $DBD::RAM::ramdata;
103 if ( $tname && $r->{$tname} ) {
104 delete $r->{$tname} if $tname && $r->{$tname};
105 }
106 else {
107 $DBD::RAM::ramdata = {};
108 }
109
110 }
111
112 sub dump {
113 my $dbh = shift;
114 my $sql = shift;
115 my $txt;
116 my $sth = $dbh->prepare($sql) or die $dbh->errstr;
117 # use Data::Dumper; $Data::Dumper::Indent=0; print Dumper $sth;
118 $sth->execute or die $sth->errstr;
119 my @col_names = @{$sth->{NAME}};
120 $txt .= "<";
121 for (@col_names) {
122 $txt .= "$_,";
123 }
124 $txt =~ s/,$//;
125 $txt .= ">\n";
126 while (my @row = $sth->fetchrow_array) {
127 for (@row) {
128 $_ ||= '';
129 s/^\s*//;
130 s/\s*$//;
131 $txt .= "[$_] ";
132 }
133 $txt .= "\n";
134 }
135 return $txt;
136 }
137
138 sub get_catalog {
139 my $self = shift;
140 my $tname = shift || '';
141 my $catalog = $DBD::RAM::ramdata->{catalog}{$tname} || {};
142 $catalog->{f_type} ||= '';
143 $catalog->{r_type} ||= $catalog->{f_type};
144 $catalog->{f_name} ||= '';
145 $catalog->{pattern} ||= '';
146 $catalog->{col_names} ||= '';
147 $catalog->{eol} ||= "\n";
148 return $catalog;
149 }
150
151 sub catalog {
152 my $dbh = shift;
153 my $table_info = shift;
154 if (!$table_info) {
155 my @tables = (keys %{$DBD::RAM::ramdata->{catalog}} );
156 my @all_tables;
157 for (@tables) {
158 push @all_tables,[
159 $_,
160 $DBD::RAM::ramdata->{catalog}{$_}{f_type},
161 $DBD::RAM::ramdata->{catalog}{$_}{f_name},
162 $DBD::RAM::ramdata->{catalog}{$_}{pattern},
163 $DBD::RAM::ramdata->{catalog}{$_}{sep_char},
164 $DBD::RAM::ramdata->{catalog}{$_}{eol},
165 $DBD::RAM::ramdata->{catalog}{$_}{col_names},
166 $DBD::RAM::ramdata->{catalog}{$_}{read_sub},
167 $DBD::RAM::ramdata->{catalog}{$_}{write_sub}];
168 }
169 return @all_tables;
170 }
171 for (@{$table_info}) {
172 my($table_name,$f_type,$f_name,$hash);
173 if (ref $_ eq 'ARRAY') {
174 ($table_name,$f_type,$f_name,$hash) = @{$_};
175 }
176 if (ref $_ eq 'HASH') {
177 $table_name = $_->{table_name} || die "catlog() requires a table_name";
178 $f_type = $_->{data_type} || 'CSV';
179 $f_name = $_->{file_source} || '';
180 $hash = $_;
181 }
182 $hash->{r_type} = $f_type;
183 if ($f_type eq 'FIXED') { $f_type = 'FIX'; }
184 if ($f_type eq 'PIPE'){
185 $hash->{sep_char}='\s*\|\s*';
186 $hash->{wsep_char}='|';
187 $f_type = 'CSV';
188 }
189 if ($f_type eq 'TAB' ){
190 $hash->{sep_char}="\t";
191 $f_type = 'CSV';
192 }
193 if ($f_type eq 'INI' ){
194 $hash->{sep_char}='=';
195 }
196 $DBD::RAM::ramdata->{catalog}{$table_name}{f_type} = uc $f_type || '';
197 $DBD::RAM::ramdata->{catalog}{$table_name}{f_name} = $f_name || '';
198 if ($hash) {
199 for(keys %{$hash}) {
200 next if /table_name/;
201 next if /data_type/;
202 next if /file_source/;
203 $DBD::RAM::ramdata->{catalog}{$table_name}{$_}=$hash->{$_};
204 }
205 }
206 $DBD::RAM::ramdata->{catalog}{$table_name}{eol} ||= "\n";
207 }
208 }
209
210 sub get_table_name {
211 my $dbh = shift;
212 my @tables = (keys %{$DBD::RAM::ramdata} );
213 if (!$tables[0]) { return 'table1'; }
214 my $next=0;
215 for my $table(@tables) {
216 if ($table =~ /^table(\d+)/ ) {
217 $next = $1 if $1 > $next;
218 }
219 }
220 $next++;
221 return("table$next");
222 }
223
224 sub export() {
225 my $dbh = shift;
226 my $args = shift || die "No arguments for export()\n";
227 my $msg = "export() requires ";
228 my $sql = $args->{data_source} || die $msg . '{data_source => $}';
229 my $f_name = $args->{data_target} || die 'export requires {data_target => $f}';
230 my $f_type = $args->{data_type} || die 'export requires {data_type => $d}';
231 if ($f_type eq 'XML') { return &export_xml($dbh,$args); }
232 my $temp_table = 'temp__';
233 $dbh->func( [[$temp_table,$f_type,$f_name,$args]],'catalog');
234 my $sth1 = $dbh->prepare($sql);
235 $sth1->execute or die $DBI::errstr;
236 my @col_names = @{$sth1->{NAME}};
237 my $sth2 = &prep_insert( $dbh, $temp_table, @col_names );
238 while (my @row = $sth1->fetchrow_array) {
239 $sth2->execute(@row);
240 }
241 delete $DBD::RAM::ramdata->{catalog}{$temp_table};
242 }
243
244 sub export_xml() {
245 my $dbh = shift;
246 my $args = shift;
247 my $msg = "Export to XML requires ";
248 my $sql = $args->{data_source} || die $msg . '{data_source => $}';
249 my $f_name = $args->{data_target} || die $msg . '{data_target => $f}';
250 my $f_type = $args->{data_type} || die $msg . '{data_type => $d}';
251 my $record_tag = $args->{record_tag} || die $msg . '{record_tag => $r}';
252 my $header = $args->{header} || '';
253 my($head,$item,$foot) = &prep_xml_export($header,$record_tag);
254 $f_name = $dbh->{f_dir} . '/' .$f_name;
255 $f_name =~ s#//#/#g;
256 open(O,">$f_name") || die "Couldn't write to $f_name: $!\n";
257 print O $head, "\n";
258 my $sth = $dbh->prepare($sql);
259 $sth->execute;
260 my @col_names = @{$sth->{NAME}};
261 while (my @row = $sth->fetchrow_array) {
262 print O "<$item>\n";
263 my $i=0;
264 for (@row) {
265 next unless $row[$i];
266 print O " <$col_names[$i]>";
267 print O "$row[$i]";
268 print O "</$col_names[$i]>\n";
269 $i++;
270 }
271 print O "</$item>\n\n";
272 }
273 print O $foot;
274 close O || die "Couldn't write to $f_name: $!\n";
275 }
276
277 sub prep_xml_export {
278 my $header = shift || qq{<?xml version="1.0" ?>\n};
279 my $record_tag = shift;
280 my @tag_starts = split ' ', $record_tag;
281 my $terminal_tag = pop @tag_starts;
282 my @tag_ends = map("</$_>\n",reverse @tag_starts);
283 @tag_starts = map("<$_>\n",@tag_starts);
284 for (@tag_starts) { $header .= $_; }
285 #print " <$terminal_tag>\n";
286 my $footer;
287 for (@tag_ends) { $footer .= $_; }
288 return($header,$terminal_tag,$footer);
289 }
290
291 sub convert() {
292 my $dbh = shift;
293 my $specs = shift;
294 my $source_type = $specs->{source_type} || '';
295 my $source_file = $specs->{source_file} || '';
296 my $source_params = $specs->{source_params} || '';
297 my $target_type = $specs->{target_type} || '';
298 my $target_file = $specs->{target_file} || '';
299 my $temp_table = 'temp__';
300 my($dbh2,$sth1);
301 $dbh->func( [
302 ["${temp_table}2",$target_type,$target_file,$source_params],
303 ],'catalog');
304 if ($source_type eq 'DBI' ) {
305 my @con_ary = @{$source_params->{connection_ary}};
306 my $table = $source_params->{table};
307 $dbh2 = DBI->connect( @con_ary );
308 $sth1 = $dbh2->prepare("SELECT * FROM $table");
309 }
310 else {
311 $dbh->func( [
312 ["${temp_table}1",$source_type,$source_file,$source_params],
313 ],'catalog');
314 $sth1 = $dbh->prepare("SELECT * FROM ${temp_table}1");
315 }
316 $sth1->execute;
317 my @col_names = @{$sth1->{NAME}};
318 my $sth2 = &prep_insert( $dbh, "${temp_table}2", @col_names );
319 while (my @row = $sth1->fetchrow_array) {
320 $sth2->execute(@row);
321 }
322 if ($source_type eq 'DBI' ) { $dbh2->disconnect; }
323 }
324
325
326 sub import() {
327 my $dbh = shift;
328 my $specs = shift;
329 my $data = shift;
330 if ($specs && ! $data ) {
331 if (ref $specs eq 'ARRAY' ) {
332 $data = $specs; $specs = {};
333 }
334 else {
335 $data = [];
336 }
337 }
338 if (ref $specs ne 'HASH') {
339 die 'First argument to "import" must be a hashref.';
340 }
341 if (ref $data ne 'ARRAY') {
342 die 'Second argument to "import" must be an arrayref.';
343 }
344 my $data_type = uc $specs->{data_type} || 'CSV';
345 my $table_name = $specs->{table_name} || $dbh->func('get_table_name');
346 my $col_names = $specs->{col_names} || '';
347 my $pattern = $specs->{pattern} || '';
348 my $read_sub = $specs->{read_sub} || '';
349 my $write_sub = $specs->{write_sub} || '';
350 my $data_source = $specs->{data_source} || '';
351 my $file_source = $specs->{file_source} || '';
352 my $remote_source = $specs->{remote_source} || '';
353 my $sep_char = $specs->{sep_char} || '';
354 my $eol = $specs->{eol} || "\n";
355 $DBD::RAM::ramdata->{catalog}{$table_name}->{r_type} = $data_type;
356 if ($data_type eq 'FIXED'){ $data_type = 'FIX'; }
357 if ($data_type eq 'PIPE') { $sep_char = '\s*\|\s*'; $data_type = 'CSV'; }
358 if ($data_type eq 'TAB' ) { $sep_char = "\t"; $data_type = 'CSV'; }
359 $DBD::RAM::ramdata->{catalog}{$table_name}->{sep_char} = $sep_char if $sep_char;
360 $DBD::RAM::ramdata->{catalog}{$table_name}->{eol} = $eol if $eol;
361 $DBD::RAM::ramdata->{catalog}{$table_name}->{pattern} = $pattern if $pattern;
362 $DBD::RAM::ramdata->{catalog}{$table_name}->{read_sub} = $read_sub if $read_sub;
363 $DBD::RAM::ramdata->{catalog}{$table_name}->{write_sub} = $write_sub if $write_sub;
364 if ($data_type eq 'MP3' ) {
365 $data_type = 'FIX';
366 $col_names = 'file_name,song_name,artist,album,year,comment,genre',
367 $pattern = 'A255 A30 A30 A30 A4 A30 A50',
368 $DBD::RAM::ramdata->{catalog}{$table_name}->{pattern} = $pattern;
369 $data = &get_music_library( $specs )
370 }
371 if ($data_type eq 'XML' ) {
372 $data = $dbh->func( $specs, $table_name, 'get_xml_db' );
373 return 1;
374 }
375 ####################################################################
376 # DATA SOURCE
377 ####################################################################
378 #
379 # DATA FROM REMOTE FILE
380 #
381 if ($remote_source) {
382 $data = $dbh->func($remote_source,'get_remote_data') or return undef;
383 $data = [split("\n",$data)]; # turn string into arrayref
384 }
385 #
386 # DATA FROM LOCAL FILE
387 #
388 if ($file_source) {
389 $data = &get_file_data($dbh,$file_source);
390 $data = [split("\n",$data)]; # turn string into arrayref
391 }
392 #
393 # DATA FROM DATA STRUCTURE
394 #
395 if ($data_source) {
396 $data = $data_source;
397 }
398 my @col_names;
399 if ($data_type eq 'DBI' ) {
400 @col_names = @{$data->{NAME}};
401 my $sth_new = &prep_insert( $dbh, $table_name, @col_names );
402 while (my @datarow = $data->fetchrow_array) {
403 $sth_new->execute(@datarow);
404 }
405 die "No data in table $table_name!"
406 unless $DBD::RAM::ramdata->{$table_name}->{DATA};
407 return 1;
408 }
409 ####################################################################
410 # GET COLUMN NAMES
411 ####################################################################
412 if (!ref $data) { my @tmp = split ( /$eol/m, $data ); $data = \@tmp; }
413 my $first_line;
414 if ($col_names eq 'first_line'
415 && $data_type ne 'HASH' ) { $first_line = shift @{$data}; }
416 else { $first_line = @{$data}->[0]; }
417 @col_names = $dbh->func(
418 $table_name,$data_type,$col_names,$first_line,
419 'get_column_names');
420 ####################################################################
421 # CREATE TABLE & PREPARE INSERT STATEMENT
422 ####################################################################
423 my $sth = &prep_insert( $dbh, $table_name, @col_names );
424
425 ####################################################################
426 # INSERT DATA INTO TABLE
427 ####################################################################
428 if ('CSV FIX INI ARRAY HASH USR' =~ /$data_type/ ) {
429 for ( @{$data} ) {
430 my @datarow;
431 if ( $data_type eq 'HASH') {
432 my %rowhash = %{$_};
433 for (@col_names) {
434 my $val = $rowhash{$_} || '';
435 push @datarow, $val;
436 }
437 }
438 else {
439 @datarow = $dbh->func($_,$table_name,$data_type,'read_fields');
440 }
441 $sth->execute(@datarow);
442 }
443 }
444 die "No data in table $table_name!" unless $DBD::RAM::ramdata->{$table_name}->{DATA};
445 $DBD::RAM::ramdata->{$table_name}->{data_type} = $data_type;
446 $DBD::RAM::ramdata->{$table_name}->{pattern} = $pattern;
447 $DBD::RAM::ramdata->{$table_name}->{read_sub} = $read_sub;
448 $DBD::RAM::ramdata->{$table_name}->{write_sub} = $write_sub;
449 return 1;
450 }
451
452 ####################################################################
453 # COLUMN NAMES
454 ####################################################################
455 sub get_column_names {
456 my($dbh,$table_name,$data_type,$col_names,$first_line) = @_;
457 my $catalog = $DBD::RAM::ramdata->{catalog}{$table_name};
458 my $pattern = $catalog->{pattern} || '';
459 my $read_sub = $catalog->{read_sub} || '';
460 my($colstr,@col_names,$num_params);
461 $colstr = '';
462 #
463 # COLUMN NAMES FROM FIRST LINE OF DATA
464 #
465 if ( $col_names eq 'first_line' && $data_type ne 'HASH' ) {
466 @col_names = $dbh->func(
467 $first_line,$table_name,$data_type,'read_fields');
468 $num_params = scalar @col_names;
469 }
470 #
471 # COLUMN NAMES FROM USER-SUPPLIED LIST
472 #
473 if ( $col_names && $col_names ne 'first_line' ) {
474 $col_names =~ s/\s+//g;
475 @col_names = split /,/,$col_names;
476 $num_params = scalar @col_names;
477 }
478 #
479 # AUTOMATICALLY ASSIGNED COLUMN NAMES
480 #
481 if ( $data_type eq 'HASH' && !$num_params ) {
482 @col_names = keys %{$first_line};
483 $num_params = scalar @col_names;
484 }
485 if ( !$num_params ) {
486 if ( $data_type eq 'INI' ) {
487 $num_params = 2;
488 }
489 if ( $data_type eq 'FIX' ) {
490 my @x = split /\s+/,$pattern;
491 $num_params = scalar @x;
492 }
493 if ( $data_type eq 'CSV' or $data_type eq 'USR' ) {
494 my @colAry = $dbh->func(
495 $first_line,$table_name,$data_type,'read_fields');
496 $num_params = scalar @colAry;
497 }
498 $num_params = scalar @{ $first_line } if
499 !$num_params && ref $first_line eq 'ARRAY';
500 die "Couldn't find column names!" if !$num_params;
501 for ( 1 .. $num_params ) { push(@col_names,"col$_"); }
502 }
503 return @col_names;
504 }
505
506 sub prep_insert {
507 my( $dbh, $table_name, @col_names ) = @_;
508 my($colstr,$num_params);
509 for ( @col_names ) { $colstr .= $_ . ' TEXT,'; }
510 $colstr =~ s/,$//;
511 my $create_stmt = "CREATE TABLE $table_name ($colstr)";
512 my $param_str = (join ",", ("?") x @col_names);
513 my $insert_stmt = "INSERT INTO $table_name VALUES ($param_str)";
514 $dbh->do($create_stmt);
515 my $sth = $dbh->prepare($insert_stmt);
516 }
517
518
519 sub get_remote_data {
520 my $dbh = shift;
521 my $remote_source = shift;
522 undef $@;
523 eval{ require 'LWP/UserAgent.pm'; };
524 die "LWP module not found! $@" if $@;
525 my $ua = new LWP::UserAgent;
526 my $req = new HTTP::Request GET => $remote_source;
527 my $res = $ua->request($req);
528 die "[$remote_source] : " . $res->message if !$res->is_success;
529 my $data = $res->content;
530 return $data;
531 }
532
533 sub get_file_data {
534 my $dbh = shift;
535 my $file_source = shift;
536 $file_source = $dbh->{f_dir} . '/' .$file_source;
537 $file_source =~ s#//#/#g;
538 open(I,$file_source) || die "[$file_source]: $!\n";
539 local $/ = undef;
540 my $data = <I>;
541 close(I) || die "$file_source: $!\n";
542 return $data;
543 }
544
545 sub get_xml_db {
546 # Hat tip to Randal Schwartz for the XML/LWP stuff
547 my($dbh,$specs,$table_name) = @_;
548 my $remote_source = $specs->{remote_source} || '';
549 my $file_source = $specs->{file_source} || '';
550 my $data_source = $specs->{data_source} || '';
551 my $record_tag = $specs->{record_tag} || '';
552 my $col_tags = $specs->{col_tags} || '';
553 my $fold_col = $specs->{fold_col} || '';
554 my $col_mapping = $specs->{col_mapping} || '';
555 my $col_names = $specs->{col_names} || '';
556 my $read_sub = $specs->{read_sub} || '';
557 my $attr = $specs->{attr} || '';
558 my $data;
559 my @columns;
560 if (ref $col_names ne 'ARRAY') { $col_names = [split ',',$col_names]; }
561 for ( @{$col_names} ) {
562 if ($_ =~ /^\[(.*)\]$/ ) {
563 my @newCols = split ',', $1;
564 for (@newCols) { push @columns, $_; }
565 }
566 else {
567 push @columns, $_;
568 }
569 }
570 my $colstr;
571 for ( @columns ) { $colstr .= $_ . ' TEXT,'; }
572 $colstr =~ s/,$//;
573 my $sql = "CREATE TABLE $table_name ($colstr)";
574 $dbh->do($sql) || die DBI::errstr, " : $sql";
575 $DBD::RAM::ramdata->{$table_name}->{data_type} = 'XML';
576 if ($remote_source){$data = $dbh->func($remote_source,'get_remote_data') or die; }
577 if ($file_source) { $data = &get_file_data($dbh,$file_source); }
578 if ($data_source) { $data = $data_source; }
579 die "No file or data source supplied!" unless $data;
580 my $insert = $dbh->prepare("INSERT INTO $table_name (".
581 (join ", ", @columns).
582 ") VALUES (".
583 (join ",", ("?") x @columns).")");
584 My_XML_Parser::doParse($data, $insert, $record_tag,
585 $col_names, $col_mapping,$fold_col,$attr,$read_sub);
586 #use Data::Dumper; print Dumper $DBD::RAM::ramdata; exit;
587 }
588
589 sub read_fields {
590 my $dbh = shift;
591 my $str = shift;
592 my $tname = shift;
593 my $type = uc shift;
594 my $catalog = $dbh->func($tname,'get_catalog');
595 if ($type eq 'ARRAY') {
596 return @{$str};
597 }
598 chomp $str;
599 if ($type eq 'CSV') {
600 my $sep_char = $catalog->{sep_char} || ',';
601 #my @fields = Text::ParseWords::parse_line( $sep_char, 0, $str );
602 my @fields = &csv2ary( $sep_char, $str );
603 return @fields;
604 }
605 if ($type eq 'USR') {
606 my $read_sub = $catalog->{read_sub} || die "USR Type requires read_sub routine!\n";
607 return &$read_sub($str);
608 }
609 if ($type eq 'FIX') {
610 return unpack $catalog->{pattern}, $str;
611 }
612 if ($type eq 'INI') {
613 if ( $str =~ /^([^=]+)=(.*)/ ) {
614 my @fields = ($1,$2);
615 return @fields;
616 }
617 }
618 if ($type eq 'XML') {
619 my @fields;
620 $str =~ s#<[^>]*>([^<]*)<[^>]*>#
621 my $x = $1 || '';
622 push @fields, $x;
623 #ge;
624 return @fields;
625 }
626 return ();
627 }
628
629 sub ary2csv {
630 my($field_sep,$record_sep,@ary)=@_;
631 my $field_rsep = quotemeta($field_sep);
632 my $str='';
633 for (@ary) {
634 $_ = '' if !defined $_;
635 if ($field_sep eq ',') {
636 s/"/""/g;
637 s/^(.*)$/"$1"/s if /,/ or /\n/s or /"/;
638 }
639 $str .= $_ . $field_sep;
640 }
641 $str =~ s/$field_rsep$/$record_sep/;
642 return $str;
643 }
644
645 sub csv2ary {
646 my($field_sep,$str)=@_;
647 # chomp $str;
648 #$str =~ s/[\015\012]//g;
649 $str =~ tr/\015\012//d;
650 if ($field_sep ne ',') {
651 #$field_sep = quotemeta($field_sep); LEFT UP TO USER TO DO
652 return split($field_sep, $str);
653 }
654 $str =~ s/""/\\"/g;
655 my @new = ();
656 push(@new, $+ ) while $str =~ m{
657 "([^\"\\]*(?:\\.[^\"\\]*)*)"$field_sep?
658 | ([^$field_sep]+)$field_sep?
659 | $field_sep
660 }gx;
661 push(@new, undef) if substr($str,-1,1) eq $field_sep;
662 @new = map {my $x=$_; $x = '' if !defined $x; $x =~ s/\\"/"/g; $x;} @new;
663 return @new;
664 }
665
666 sub write_fields {
667 my($dbh,$fields,$tname,$type) = @_;
668 my $catalog = $dbh->func($tname,'get_catalog');
669 my $sep = $catalog->{sep_char} || ',';
670 my $wsep = $catalog->{wsep_char} || $sep;
671 my $fieldNum =0;
672 my $fieldStr = $catalog->{pattern} || '';
673 $fieldStr =~ s/a//gi;
674 my @fieldLengths = split / /, $fieldStr;
675 $fieldStr = '';
676 if( $catalog->{f_type} eq 'USR' ) {
677 my $write_sub = $catalog->{write_sub} || die "Requires write_sub!\n";
678 my $fieldStr = &$write_sub(@{$fields});
679 return $fieldStr;
680 }
681 if( $catalog->{f_type} eq 'XML' ) {
682 my @col_names = split ',',$catalog->{col_names};
683 my $count =0;
684 for (@col_names) {
685 $fieldStr .= "<$_>$fields->[$count]</$_>";
686 $count++;
687 }
688 return $fieldStr;
689 }
690 for(@$fields) {
691 # PAD OR TRUNCATE DATA TO FIT WITHIN FIELD LENGTHS
692 if( $catalog->{f_type} eq 'FIX' ) {
693 my $oldLen = length $_;
694 my $newLen = $fieldLengths[$fieldNum];
695 if ($oldLen < $newLen) { $_ = sprintf "%-${newLen}s",$_; }
696 if ($oldLen > $newLen) { $_ = substr $_, 0, $newLen; }
697 $fieldNum++;
698 }
699 my $newCol = $_;
700 if( $catalog->{f_type} eq 'CSV' ) {
701 if ($newCol =~ /$sep/ ) {
702 $newCol =~ s/\042/\\\042/go;
703 $newCol = qq{"$newCol"};
704 }
705 $fieldStr .= $newCol . $wsep;
706 }
707 else { $fieldStr .= $newCol; }
708 if( $catalog->{f_type} eq 'INI' ) { $fieldStr .= '='; }
709 }
710 if( $catalog->{f_type} eq 'CSV' ) { $fieldStr =~ s/$sep$//; }
711 if( $catalog->{f_type} eq 'INI' ) { $fieldStr =~ s/=$//; }
712 return $fieldStr;
713 }
714
715 sub get_music_library {
716 my $specs = shift;
717 my @dirs = @{$specs->{dirs}};
718 my @db;
719 for my $dir(@dirs) {
720 my @files = get_music_dir( $dir );
721 for my $fname(@files) {
722 push @db, &get_mp3_tag($fname)
723 }
724 }
725 return \@db;
726 }
727
728 sub get_music_dir {
729 my $dir = shift;
730 opendir(D,$dir) || print "$dir: $!\n";
731 return '' if $!;
732 my @files = grep /mp3$/i, readdir D;
733 @files = map ( $_ = $dir . $_, @files);
734 closedir(D) || print "Couldn't read '$dir':$!";
735 return @files;
736 }
737
738 sub get_mp3_tag {
739 my($file) = shift;
740 open(I,$file) || return '';
741 binmode I;
742 local $/ = '';
743 seek I, -128, 2;
744 my $str = <I> || '';
745 return '' if !($str =~ /^TAG/);
746 $file = sprintf("%-255s",$file);
747 $str =~ s/^TAG(.*)/$file$1/;
748 my $genre = $str;
749 $genre =~ s/^.*(.)$/$1/g;
750 $str =~ s/(.)$//g;
751 $genre = unpack( 'C', $genre );
752 my @genres =("Blues", "Classic Rock", "Country", "Dance", "Disco", "Funk", "Grunge", "Hip-Hop", "Jazz", "Metal", "New Age", "Oldies", "Other", "Pop", "R&B", "Rap", "Reggae", "Rock", "Techno", "Industrial", "Alternative", "Ska", "Death Metal", "Pranks", "Soundtrack", "Eurotechno", "Ambient", "Trip-Hop", "Vocal", "Jazz+Funk", "Fusion", "Trance", "Classical", "Instrumental", "Acid", "House", "Game", "Sound Clip", "Gospel", "Noise", "Alternative Rock", "Bass", "Soul", "Punk", "Space", "Meditative", "Instrumental Pop", "Instrumental Rock", "Ethnic", "Gothic", "Darkwave", "Techno-Industrial", "Electronic", "Pop-Folk", "Eurodance", "Dream", "Southern Rock", "Comedy", "Cult", "Gangsta", "Top 40", "Christian Rap", "Pop/Funk", "Jungle", "Native American", "Cabaret", "New Wave", "Psychadelic", "Rave", "Show Tunes", "Trailer", "Lo-Fi", "Tribal", "Acid Punk", "Acid Jazz", "Polka", "Retro", "Musical", "Rock & Roll", "Hard Rock", "Folk", "Folk/Rock", "National Folk", "Swing", "Fast-Fusion", "Bebop", "Latin", "Revival", "Celtic", "Bluegrass", "Avantgarde", "Gothic Rock", "Progressive Rock", "Psychedelic Rock", "Symphonic Rock", "Slow Rock", "Big Band", "Chorus", "Easy Listening", "Acoustic", "Humour", "Speech", "Chanson", "Opera", "Chamber Music", "Sonata", "Symphony", "Booty Bass", "Primus", "Porn Groove", "Satire", "Slow Jam", "Club", "Tango", "Samba", "Folklore", "Ballad", "Power Ballad", "Rhytmic Soul", "Freestyle", "Duet", "Punk Rock", "Drum Solo", "Acapella", "Euro-House", "Dance Hall", "Goa", "Drum & Bass", "Club-House", "Hardcore", "Terror", "Indie", "BritPop", "Negerpunk", "Polsk Punk", "Beat", "Christian Gangsta Rap", "Heavy Metal", "Black Metal", "Crossover", "Contemporary Christian", "Christian Rock", "Unknown");
753 $genre = $genres[$genre] || '';
754 $str .= $genre . "\n";
755 return $str;
756 }
757
758
759 # END OF DRIVER PRIVATE METHODS
760
761 sub table_info ($) {
762 my($dbh) = @_;
763 my @tables;
764 for (keys %{$DBD::RAM::ramdata} ) {
765 push(@tables, [undef, undef, $_, "TABLE", undef]);
766 }
767 my $names = ['TABLE_QUALIFIER', 'TABLE_OWNER', 'TABLE_NAME',
768 'TABLE_TYPE', 'REMARKS'];
769 my $dbh2 = $dbh->{'csv_sponge_driver'};
770 if (!$dbh2) {
771 $dbh2 = $dbh->{'csv_sponge_driver'} = DBI->connect("DBI:Sponge:");
772 if (!$dbh2) {
773 DBI::set_err($dbh, 1, $DBI::errstr);
774 return undef;
775 }
776 }
777
778 # Temporary kludge: DBD::Sponge dies if @tables is empty. :-(
779 return undef if !@tables;
780
781 my $sth = $dbh2->prepare("TABLE_INFO", { 'rows' => \@tables,
782 'NAMES' => $names });
783 if (!$sth) {
784 DBI::set_err($dbh, 1, $dbh2->errstr());
785 }
786 $sth;
787 }
788
789 sub DESTROY { $DBD::RAM::ramdata = {};}
790
791 package DBD::RAM::st; # ====== STATEMENT ======
792
793 $DBD::RAM::st::imp_data_size = 0;
794 @DBD::RAM::st::ISA = qw(DBD::File::st);
795
796
797 package DBD::RAM::Statement;
798
799 #@DBD::RAM::Statement::ISA = qw(SQL::Statement);
800 @DBD::RAM::Statement::ISA = qw(SQL::Statement DBD::File::Statement);
801 #@DBD::RAM::Statement::ISA = qw(DBD::File::Statement);
802
803 sub open_table ($$$$$) {
804 my($self, $data, $tname, $createMode, $lockMode) = @_;
805 my($table);
806 my $dbh = $data->{Database};
807 my $catalog = $dbh->func($tname,'get_catalog');
808 my $ftype = $catalog->{f_type} || '';
809 if( !$catalog->{f_type} || $catalog->{f_type} eq 'RAM' ) {
810 if ($createMode && !($DBD::RAM::ramdata->{$tname}) ) {
811 if (exists($data->{$tname})) {
812 die "A table $tname already exists";
813 }
814 $table = $data->{$tname} = { 'DATA' => [],
815 'CURRENT_ROW' => 0,
816 'NAME' => $tname,
817 };
818 bless($table, ref($self) . "::Table");
819 $DBD::RAM::ramdata->{$tname} = $table;
820 return $table;
821 }
822 else {
823 $table = $DBD::RAM::ramdata->{$tname};
824 die "No such table $tname" unless $table;
825 $table->{'CURRENT_ROW'} = 0;
826 return $table;
827 }
828 }
829 else {
830 my $file_name = $catalog->{f_name} || $tname;
831 $table = $self->SUPER::open_table(
832 $data, $file_name, $createMode, $lockMode
833 );
834 my $fh = $table->{'fh'};
835 my $col_names = $catalog->{col_names} || '';
836 my @col_names = ();
837 if (!$createMode) {
838 my $first_line = $fh->getline || '';
839 #$first_line =~ s/[\015\012]//g;
840 $first_line =~ tr/\015\012//d;
841 @col_names = $dbh->func(
842 $tname,$ftype,$col_names,$first_line,
843 'get_column_names');
844 }
845 if ($col_names eq 'first_line' && !$createMode) {
846 $table->{first_row_pos} = $fh->tell();
847 }
848 else {
849 seek $fh,0,0;
850 }
851 my $count = 0;
852 my %col_nums;
853 for (@col_names) { next unless $_; $col_nums{$_} = $count; $count++; }
854 $table->{col_names} = \@col_names;
855 $table->{col_nums} = \%col_nums;
856 $table->{'CURRENT_ROW'} = 0;
857 $table->{NAME} = $tname;
858 $table;
859 }
860 }
861
862 package DBD::RAM::Statement::Table;
863
864 @DBD::RAM::Statement::Table::ISA = qw(DBD::RAM::Table);
865
866 package DBD::RAM::Table;
867
868 #@DBD::RAM::Table::ISA = qw(SQL::Eval::Table);
869 #@DBD::RAM::Statement::Table::ISA = qw(SQL::Eval::Table DBD::File::Table);
870 use base qw(DBD::File::Table);
871
872 ##################################
873 # fetch_row()
874 # CALLED WITH "SELECT ... FETCH"
875 ##################################
876 sub fetch_row ($$$) {
877 my($self, $data, $row) = @_;
878 my $dbh = $data->{Database};
879 my $tname = $self->{NAME};
880 my $catalog = $dbh->func($tname,'get_catalog');
881 if( !$catalog->{f_type} || $catalog->{f_type} eq 'RAM' ) {
882 my($currentRow) = $self->{'CURRENT_ROW'};
883 if ($currentRow >= @{$self->{'DATA'}}) {
884 return undef;
885 }
886 $self->{'CURRENT_ROW'} = $currentRow+1;
887 $self->{'row'} = $self->{'DATA'}->[$currentRow];
888 return $self->{row};
889 }
890 else {
891 my $fields;
892 if (exists($self->{cached_row})) {
893 $fields = delete($self->{cached_row});
894 } else {
895 local $/ = $catalog->{eol} || "\n";
896 #$fields = $csv->getline($self->{'fh'});
897 my $fh = $self->{'fh'} ;
898 my $line = $fh->getline || return undef;
899 chomp $line;
900 @$fields = $dbh->func($line,$tname,$catalog->{f_type},'read_fields');
901 # @$fields = unpack $dbh->{pattern}, $line;
902 if ( $dbh->{ChopBlanks} ) {
903 @$fields = map($_=&trim($_),@$fields);
904 }
905 if (!$fields ) {
906 die "Error while reading file " . $self->{'file'} . ": $!" if $!;
907 return undef;
908 }
909 }
910 $self->{row} = (@$fields ? $fields : undef);
911 }
912 return $self->{row};
913 }
914
915 sub trim { my $x=shift; $x =~ s/^\s+//; $x =~ s/\s+$//; $x; }
916
917 ##############################
918 # push_names()
919 # CALLED WITH "CREATE TABLE"
920 ##############################
921 sub push_names ($$$) {
922 my($self, $data, $names) = @_;
923 my $dbh = $data->{Database};
924 my $tname = $self->{NAME};
925 my $catalog = $dbh->func($tname,'get_catalog');
926 if( !$catalog->{f_type} || $catalog->{f_type} eq 'RAM' ) {
927 $self->{'col_names'} = $names;
928 my($colNums) = {};
929 for (my $i = 0; $i < @$names; $i++) {
930 $colNums->{$names->[$i]} = $i;
931 }
932 $self->{'col_nums'} = $colNums;
933 }
934 elsif(!$catalog->{col_names}) {
935 my $fh = $self->{'fh'} ;
936 my $colStr=$dbh->func($names,$tname,$catalog->{f_type},'write_fields');
937 $colStr .= $catalog->{eol};
938 $fh->print($colStr);
939 }
940 }
941
942 ################################
943 # push_rows()
944 # CALLED WITH "INSERT" & UPDATE
945 ################################
946 sub push_row ($$$) {
947 my($self, $data, $fields) = @_;
948 my $dbh = $data->{Database};
949 my $tname = $self->{NAME};
950 my $catalog = $dbh->func($tname,'get_catalog');
951 if( !$catalog->{f_type} || $catalog->{f_type} eq 'RAM' ) {
952 my($currentRow) = $self->{'CURRENT_ROW'};
953 $self->{'CURRENT_ROW'} = $currentRow+1;
954 $self->{'DATA'}->[$currentRow] = $fields;
955 return 1;
956 }
957 my $fh = $self->{'fh'};
958 #
959 # Remove undef from the right end of the fields, so that at least
960 # in these cases undef is returned from FetchRow
961 #
962 while (@$fields && !defined($fields->[$#$fields])) {
963 pop @$fields;
964 }
965 my $fieldStr=$dbh->func($fields,$tname,$catalog->{f_type},'write_fields');
966 $fh->print($fieldStr,$catalog->{eol});
967 1;
968 }
969
970 sub seek ($$$$) {
971 my($self, $data, $pos, $whence) = @_;
972 my $dbh = $data->{Database};
973 my $tname = $self->{NAME};
974 my $catalog = $dbh->func($tname,'get_catalog');
975 if( $catalog->{f_type} && $catalog->{f_type} ne 'RAM' ) {
976 return DBD::File::Table::seek(
977 $self, $data, $pos, $whence
978 );
979 }
980 my($currentRow) = $self->{'CURRENT_ROW'};
981 if ($whence == 0) {
982 $currentRow = $pos;
983 } elsif ($whence == 1) {
984 $currentRow += $pos;
985 } elsif ($whence == 2) {
986 $currentRow = @{$self->{'DATA'}} + $pos;
987 } else {
988 die $self . "->seek: Illegal whence argument ($whence)";
989 }
990 if ($currentRow < 0) {
991 die "Illegal row number: $currentRow";
992 }
993 $self->{'CURRENT_ROW'} = $currentRow;
994 }
995
996
997 sub drop ($$) {
998 my($self, $data) = @_;
999 my $dbh = $data->{Database};
1000 my $tname = $self->{NAME};
1001 my $catalog = $dbh->func($tname,'get_catalog');
1002 if( !$catalog->{f_type} || $catalog->{f_type} eq 'RAM' ) {
1003 my $table_name = $self->{NAME} || return;
1004 delete $DBD::RAM::ramdata->{$table_name}
1005 if $DBD::RAM::ramdata->{$table_name};
1006 delete $data->{$table_name}
1007 if $data->{$table_name};
1008 return 1;
1009 }
1010 return DBD::File::Table::drop( $self );
1011 }
1012
1013 ##################################
1014 # truncate()
1015 # CALLED WITH "DELETE" & "UPDATE"
1016 ##################################
1017 sub truncate ($$) {
1018 my($self, $data) = @_;
1019 my $dbh = $data->{Database};
1020 my $tname = $self->{NAME};
1021 my $catalog = $dbh->func($tname,'get_catalog');
1022 if( !$catalog->{f_type} || $catalog->{f_type} eq 'RAM' ) {
1023 $#{$self->{'DATA'}} = $self->{'CURRENT_ROW'} - 1;
1024 return 1;
1025 }
1026 return DBD::File::Table::truncate( $self, $data );
1027 }
1028
1029 package My_XML_Parser;
1030 my @state;
1031 my %one_group_data;
1032 my $insert_handle;
1033 my $record_tag;
1034 my @columns;
1035 my %column_mapping;
1036 my $multi_field_count;
1037 my $fold_col;
1038 my $fold;
1039 my $fold_name;
1040 my %folds;
1041 my $read_sub;
1042
1043 sub doParse {
1044 my $data = shift;
1045 $insert_handle = shift;
1046 $record_tag = shift;
1047 my $col_names = shift;
1048 my $col_map = shift || '';
1049 $fold_col = shift || {};
1050 my $attributes = shift || {};
1051 $read_sub = shift || '';
1052 if ($read_sub eq 'latin1' && !$attributes->{ProtocolEncoding} ) {
1053 $read_sub = \&utf8_to_latin1;
1054 $attributes->{ProtocolEncoding} = 'ISO-8859-1';
1055 }
1056 @columns = @{$col_names};
1057 if ($col_map) { %column_mapping = %{$col_map}; }
1058 else {%column_mapping = map{ $_ => $_ } @columns; }
1059 undef $@;
1060 eval{ require 'XML/Parser.pm' };
1061 die "XML::Parser module not found! $@" if $@;
1062 XML::Parser->new( Style => 'Stream', %{$attributes} )->parse($data);
1063 }
1064
1065 sub StartTag {
1066 my ($parser, $type) = @_;
1067 my %attrs = %_;
1068 push @state, $type;
1069 if ("@state" eq $record_tag) {
1070 %one_group_data = ();
1071 $multi_field_count = 0;
1072 while (my($k,$v)=each %folds) {
1073 $one_group_data{$k} = $v if $v;
1074 }
1075 }
1076 for (keys %{$fold_col}) {
1077 my $state = "@state";
1078 next unless $_ =~ /^$state\^*/;
1079 my $fold_tag = $fold_col->{$_} if $fold_col && $fold_col->{$_};
1080 if ( $fold_tag ) {
1081 $fold_name = $column_mapping{$fold_tag} if $column_mapping{$fold_tag};
1082 $fold_name ||= $fold_tag;
1083 $fold = $attrs{$fold_tag} || '';
1084 $folds{$fold_name} = $fold;
1085 }
1086 }
1087 for (keys %attrs) {
1088 my $place = $column_mapping{$_};
1089 if (defined $place) {
1090 $one_group_data{$place} .= " " if $one_group_data{$place};
1091 $one_group_data{$place} .= &check_read( $attrs{$_} );
1092 }
1093 }
1094 }
1095
1096 sub EndTag {
1097 my ($parser, $type) = @_;
1098 my $tag = "@state";
1099 $tag =~ s/^$record_tag\s*//;
1100 my $column = $column_mapping{$tag};
1101 if (ref $column eq 'ARRAY') {
1102 $multi_field_count++;
1103 }
1104 if ("@state" eq $record_tag) {
1105 $insert_handle->execute(@one_group_data{@columns});
1106 }
1107 pop @state;
1108 }
1109
1110 sub Text {
1111 my $tag = "@state";
1112 $tag =~ s/^$record_tag\s*//;
1113 my $column = $column_mapping{$tag};
1114 if (ref $column eq 'ARRAY') {
1115 $one_group_data{$column->[$multi_field_count]} .= &check_read($_);
1116 return;
1117 }
1118 if (defined $column) {
1119 $one_group_data{$column} .= " " if $one_group_data{$column};
1120 $one_group_data{$column} .= &check_read($_);
1121 }
1122 }
1123
1124 sub check_read {
1125 my $x = shift;
1126 $read_sub
1127 ? return &$read_sub($x)
1128 : return $x;
1129 }
1130
1131 sub utf8_to_latin1 {
1132 local $_ = shift;
1133 s/([\xC0-\xDF])([\x80-\xBF])
1134 /chr(ord($1)<<6&0xC0|ord($2)&0x3F)
1135 /egx;
1136 return $_;
1137 }
1138
1139 ############################################################################
1140 1;
1141 __END__
1142
1143
1144 =head1 NAME
1145
1146 DBD::RAM - a DBI driver for files and data structures
1147
1148 =head1 SYNOPSIS
1149
1150 use DBI;
1151 my $dbh = DBI->connect('DBI:RAM:','usr','pwd',{RaiseError=>1});
1152 $dbh->func({
1153 table_name => 'my_phrases',
1154 col_names => 'id,phrase',
1155 data_type => 'PIPE',
1156 data_source => [<DATA>],
1157 }, 'import' );
1158 print $dbh->selectcol_arrayref(qq[
1159 SELECT phrase FROM my_phrases WHERE id = 1
1160 ])->[0];
1161 __END__
1162 1 | Hello, New World
1163 2 | Some other Phrase
1164
1165 This sample creates a database table from data, uses SQL to make a
1166 selection from the database and prints out the results. While this
1167 table is in-memory only and uses pipe "delimited" formating, many
1168 other options are available including local and remote file access and
1169 many different data formats.
1170
1171
1172 =head1 DESCRIPTION
1173
1174 DBD::RAM allows you to import almost any type of Perl data
1175 structure into an in-memory table and then use DBI and SQL
1176 to access and modify it. It also allows direct access to
1177 almost any kind of file, supporting SQL manipulation
1178 of the file without converting the file out of its native
1179 format.
1180
1181 The module allows you to prototype a database without having an rdbms
1182 system or other database engine and can operate either with or without
1183 creating or reading disk files. If you do use disk files, they may,
1184 in most cases, either be local files or any remote file accessible via
1185 HTTP or FTP.
1186
1187 =head1 OVERVIEW
1188
1189 This modules allows you to work with a variety of data formats and to
1190 treat them like standard DBI/SQL accessible databases. Currently
1191 supported formats include:
1192
1193 FORMATS:
1194
1195 XML Extended Markup Language (XML)
1196 FIXED fixed-width records
1197 INI name=value pairs
1198 PIPE pipe "delimited" text
1199 TAB tab "delimited" text
1200 CSV Comma Separated Values or other "delimited" text
1201 MP3 MP3 music binary files
1202 ARRAY Perl array
1203 HASH Perl associative array
1204 DBI DBI database connection
1205 USR user defined formats
1206
1207 The data you use may come form several kinds of sources:
1208
1209 SOURCES
1210
1211 DATA Perl data structures: strings, arrays, hashes
1212 LOCAL FILE a file stored on your local computer hard disk
1213 REMOTE FILE a remote file accessible via HTTP or FTP
1214
1215 If you modify the data in a table, the modifications may be stored in
1216 several ways. The storage can be temporary, i.e. in memory only with
1217 no disk storage. Or several modifications can be done in memory and
1218 then stored to disk once at the end of the processing. Or
1219 modifications can be stored to disk continuously, similarly to the way
1220 other DBDs operate.
1221
1222 STORAGE
1223
1224 RAM in-memory processing only, no storage
1225 ONE-TIME processed in memory, stored to disk on command
1226 CONTINUOUS all modifications stored to disk as they occur
1227
1228 Here is a summary of the SOURCES, FORMATS, and STORAGE capabilities of
1229 DBD::RAM. (x = currently supported, - = notsupported, * = support in
1230 progress)
1231
1232 FORMAT
1233 CSV PIPE TAB FIXED INI XML MP3 ARRAY HASH DBI USR
1234 INPUT
1235 array/hash/string x x x x x x - x x - x
1236 local file x x x x x x x - - x x
1237 remote file x x x x x x * - - * x
1238 OUTPUT
1239 ram table x x x x x x x x x x x
1240 file (1-time) x x x x x x - - - * *
1241 file (continuous) x x x x x * - - - x *
1242
1243 Please note that any ram table, regardless of original source can be
1244 stored in any of the supported file output formats. So, for example,
1245 a table of MP3 information could be stored as a CSV file, the "-" in
1246 the MP3 column only indicates that the information from the MP3 table
1247 can not (for obvious reasons) be written back to an MP3 file.
1248
1249 =head1 INSTALLATION & PREREQUISITES
1250
1251 This module should work on any any platform that DBI works on.
1252
1253 You don't need an external SQL engine or a running server, or a
1254 compiler. All you need are Perl itself and installed versions of DBI
1255 and SQL::Statement. If you do not also have DBD::CSV installed you
1256 will need to either install it, or simply copy File.pm into your DBD
1257 directory.
1258
1259 You can either use the standard makefile method, or just copy RAM.pm
1260 into your DBD directory.
1261
1262 Some features require installation of extra modules. If you wish to
1263 work with the XML format, you will need to install XML::Parser. If
1264 you wish to use the ability to work with remote files, you will need
1265 to install the LWP (libnet) modules. Other features of DBD::RAM work
1266 fine without these additional modules.
1267
1268 =head1 SQL & DBI
1269
1270 This module, like other DBD database drivers, works with the DBI
1271 methods listed in DBI.pm and its documentation. Please see the DBI
1272 documentation for details of methods such as connecting, preparing,
1273 executing, and fetching data. Currently only a limited subset of SQL
1274 commands are supported. Here is a brief synopsis, please see the
1275 documentation for SQL::Statement for a more comple description of
1276 these commands.
1277
1278 CREATE TABLE $table
1279 ( $col1 $type1, ..., $colN $typeN,
1280 [ PRIMARY KEY ($col1, ... $colM) ] )
1281
1282 DROP TABLE $table
1283
1284 INSERT INTO $table
1285 [ ( $col1, ..., $colN ) ]
1286 VALUES ( $val1, ... $valN )
1287
1288 DELETE FROM $table
1289 [ WHERE $wclause ]
1290
1291 UPDATE $table
1292 SET $col1 = $val1, ... $colN = $valN
1293 [ WHERE $wclause ]
1294
1295 SELECT [DISTINCT] $col1, ... $colN
1296 FROM $table
1297 [ WHERE $wclause ]
1298 [ ORDER BY $ocol1 [ASC|DESC], ... $ocolM [ASC|DESC] ]
1299
1300 $wclause [NOT] $col $op $val|$col
1301 [ AND|OR $wclause2 ... AND|OR $wclauseN ]
1302
1303 $op = | <> | < | > | <= | >=
1304 | IS NULL | IS NOT NULL | LIKE | CLIKE
1305
1306
1307 =head1 WORKING WITH FILES & TABLES:
1308
1309 This module supports working with both in-memory and disk-based databases. In order to allow quick testing and prototyping, the default behavior is for tables to be created in-memory from in-memory data but it is easy to change this behavior so that tables can also be created, manipulated, and stored on disk or so that there is a combination of in-memory and disk-based manipulation.
1310
1311 There are three methods unique to the DBD::RAM module to allow you to specify which mode of operation you use for each table or operation:
1312
1313 1) import() imports data either from memory or from a file into an
1314 in-memory table
1315
1316 2) export() exports data from an in-memory table to a file regardless of
1317 the original source of the data
1318
1319 3) catalog() sets up an association between a file name and a table name
1320 such that all operations on the table are done continuously
1321 on the file
1322
1323 With the import() method, standard DBI/SQL commands like select,
1324 update, delete, etc. apply only to the data that is in-memory. If you
1325 want to save the modifications to a file, you must explcitly call
1326 export() after making the changes.
1327
1328 On the other hand, the catalog() method sets up an association between
1329 a file and a tablename such that all DBI/SQL commands operate on the
1330 file continuously without needing to explicitly call export(). This
1331 method of operation is similar to other DBD drivers.
1332
1333 Here is a rough diagram of how the three methods operate:
1334
1335 disk -> import() -> RAM
1336
1337 select
1338 update
1339 delete
1340 insert
1341 (multiple times)
1342
1343 disk <- export() <- RAM
1344
1345 catlog()
1346 disk <-> select
1347 disk <-> update
1348 disk <-> delete
1349 disk <-> insert
1350
1351 Regardless of which method is chosen, the same set of DBI and SQL commands may be applied to all tables.
1352
1353 See below for details of import(), export() and catalog() and for
1354 specifics of naming files and directories.
1355
1356 =head2 Creating in-memory tables from data and files: import()
1357
1358 In-memory tables may be created using standard CREATE/INSERT
1359 statements, or using the DBD::RAM specific import method:
1360
1361 $dbh->func( $args, 'import' );
1362
1363 The $args parameter is a hashref which can contain several keys, most
1364 of which are optional and/or which contain common defaults.
1365
1366 These keys can either be specified or left to default behaviour:
1367
1368 table_name string: name of the table
1369 col_names string: column names for the table
1370 data_type string: format of the data (e.g. XML, CSV...)
1371
1372 The table_name key to the import() method is either a string, or if
1373 it is omitted, a default table name will be automatically supplied,
1374 starting at table1, then table2, etc.
1375
1376 table_name => 'my_test_db',
1377
1378 OR simply omit the table_names key
1379
1380 If the col_names key to the import() method is omitted, the column
1381 names will be automatically supplied, starting at col1, then col2,
1382 etc. If the col_names key is the string 'first_line', the column
1383 names will be taken from the first line of the data. If the col_names
1384 key is a comma separated list of column names, those will be taken (in
1385 order) as the names of the columns.
1386
1387 col_names => 'first_line',
1388
1389 OR col_names => 'name,address,phone',
1390
1391 OR simply omit the col_names key
1392
1393 If table_name or col_names are specified, they must comply with SQL
1394 naming rules for identifiers: start with an alphabetic character;
1395 contain nothing but alphabetic characters, numbers, and/or
1396 underscores; be less than 128 characters long; not be the same as a
1397 SQL reserved keyword. If the table refers to a file that has a period
1398 in its name (e.g. my_data.csv), this can be handled with the catalog()
1399 method, see below.
1400
1401 The table_name and col_names, if specified, *are* case sensititive, so
1402 that "my_test_db" is not the same as "my_TEST_db".
1403
1404 The data_type key to the import() method specifies the format of the
1405 data as already described above in the general description. It must
1406 be one of:
1407
1408 data_type => 'CSV',
1409 data_type => 'TAB',
1410 data_type => 'PIPE',
1411 data_type => 'INI',
1412 data_type => 'FIXED',
1413 data_type => 'XML',
1414 data_type => 'MP3',
1415 data_type => 'DBI',
1416 data_type => 'USR',
1417 data_type => 'ARRAY',
1418 data_type => 'HASH',
1419
1420 OR simply omit the data_type key
1421
1422 If no data_type key is supplied, the default format CSV will be used.
1423
1424 The import() keys must specify a source of the data for the table,
1425 which can be any of:
1426
1427 file_source string: name of local file to get data from
1428 remote_source string: url of remote file to get data from
1429 data_source string or arrayref: the actual data
1430
1431 The file_source key is the name of local file. It's location will be
1432 taken to be relative to the f_dir specified in the database
1433 connection, see the connect() method above. Whether or not the file
1434 name is case sensitive depends on the operating system the script is
1435 running on e.g. on Windows case is ignored and on UNIX it is not
1436 ignored. For maximum portability, it is safest to assume that case
1437 matters.
1438
1439 file_source => 'my_test_db.csv'
1440
1441 The remote_source key is a URL (Uniform Resource Locator) to a file
1442 located on some other computer. It may be any kind of URL that is
1443 supported by the LWP module includding http and FTP. If username and
1444 password are required, they can be included in the URL.
1445
1446 remote_source => 'http://myhost.com/mypath/myfile.myext'
1447
1448 OR remote_source => 'ftp://user:password@myhost.com/mypath/myfile.myext'
1449
1450 The data_source key to the import() tag contains the actual data for
1451 the table. in cases where the data comes from the Perl script itself,
1452 rather than from a file. The method of specifying the data_source
1453 depends entirely on the format of the data_type. For example with
1454 data_type of XML or CSV, the data_source is a string in XML or CSV
1455 format but with data_type ARRAY, the data_source is a reference to an
1456 array of arrayrefs. See below under each data_type for details.
1457
1458 The following keys to the import() method apply only to specific data
1459 formats, see the sections on the specific formats (listed in parens)
1460 for details:
1461
1462 pattern (FIXED only)
1463 sep_char (CSV only)
1464 eol (CSV only)
1465 read_sub (USR and XML only)
1466 attr (XML only)
1467 record_tag (XML only)
1468 fold_col (XML only)
1469 col_mapping (XML only)
1470 dirs (MP3 only)
1471
1472
1473 =head2 Saving in-memory tables to disk: export()
1474
1475 The export() method creates a file from an in-memory table. It takes
1476 a very similar set of keys as does the import() method. The data_type
1477 key specifies the format of the file to be created (CSV, PIPE, TAB,
1478 XML, FIXED-WIDTH, etc.). The same set of specifiers available for the
1479 import method for these various formats are also available
1480 (e.g. sep_char to set the field separator for CSV files, or pattern to
1481 set the fixed-width pattern).
1482
1483 The data_source key for the export() method is a SQL select statement
1484 in relation to whatever in-memory table is chosen to export. The
1485 data_target key specifies the name of the file to put the results in.
1486 Here is an example:
1487
1488 $dbh->func( {
1489 data_source => 'SELECT * FROM table1',
1490 data_target => 'my_db.fix',
1491 data_type => 'FIXED',
1492 pattern => 'A2 A19',
1493 },'export' );
1494
1495 That statement creates a fixed-width record database file called
1496 "my_db.fix" and puts the entire contents of table1 into the file using
1497 the specified field widths and whatever column names alread exist in
1498 table1.
1499
1500 See specific data formats below for details related to the export() method.
1501
1502 =head2 Continuous file access: catalog()
1503
1504 The catalog() method creates an association between a specific table
1505 name, a specific data type, and a specific file name. You can create
1506 those associations for several files at once. The parameter to the
1507 catalog() method is a reference to an array of arrayrefs. Each of the
1508 arrayrefs should contain a table name, a data type, and a file name
1509 and can optionally inlcude other paramtets specific to specific data
1510 types. Here is an example:
1511
1512 $dbh->func([
1513 [ 'my_csv_table', 'CSV', 'test_db.csv' ],
1514 ],'catalog');
1515
1516 This example creates an association to a CSV file. Once the catalog()
1517 statement has been issued, any DBI/SQL commands relating to
1518 "my_csv_table" will operate on the file "test_db.csv". If the command
1519 is a SELECT statement, the file witll be opened and searched. If the
1520 command is an INSERT statement, the file will be opened and the new
1521 data row(s) inserted and saved into the file.
1522
1523 One can also pass additional information such as column names,
1524 fixed-width patterns, field and record separators to the export
1525 method(). See the import() information above for the meanings of
1526 these additional parameters. They should be passed as a hashref:
1527
1528 $dbh->func([
1529 [ 'table1', 'FIXED', 'test_db.fix',{pattern => 'A2 A19'} ],
1530 [ 'table2', 'INI', 'test_db.ini',{col_names => 'id,phrase,name' ],
1531 ],'catalog');
1532
1533 In future releases, users will be able to store catalogs in files for permanent associations between files and data types.
1534
1535 =head2 Specifying file and directory names
1536
1537 All filenames are relative to a user-specified file directory: f_dir.
1538 The f_dir parameter may be set in the connect statement:
1539
1540 my $dbh=DBI->connect("dbi:RAM:f_dir=/mypath/to-files/" ...
1541
1542 The f_dir parameter may also be set or reset anywhere in the program
1543 after the database connection:
1544
1545 $dbh->{f_dir} = '/mypath/to-files'
1546
1547 If the f_dir parameter is not set explicitly, it defaults to "./"
1548 which will be wherever your script thinks it is running from (which,
1549 depending on server setup may or may not be the physical location of
1550 your script so use this only if you know what you are doing).
1551
1552 All filenames are relative to the f_dir directory. It is not possible
1553 to use an absolute path to a file.
1554
1555 WARNING: no taint checking is performed on the filename or f_dir, this
1556 is the responsiblity of the programmer. Since the filename is
1557 relative to the f_dir directory, a filename starting with "../" will
1558 lead to files above or outside of the f_dir directory so you should
1559 exclude those from filenames if the filenames come from user input.
1560
1561 =head2 Using defaults for quick testing & prototyping
1562
1563 If no table_name is specified, a numbered table name will be supplied
1564 (table1, or if that exists table2, etc.). The same also applies to
1565 column names (col1, col2, etc.). If no data_type is supplied, CSV
1566 will be assumed. If the entire hashref parameter to import is missing
1567 and an arrayref of data is supplied instead, then defaults for all
1568 values will be used, the source will be defaulted to data and the
1569 contents of the array will be treated as the data source. For CSV
1570 file, a field separator of comma and a record separator of newline are
1571 the default. Thus, assuming there are no already exiting in-memory
1572 tables, the two statements below have the same effect:
1573
1574 $dbh->func( [<DATA>], 'import' );
1575
1576 $dbh->func({
1577 table_name => 'table1',
1578 data_type => 'CSV',
1579 col_names => 'col1,col2',
1580 sep_char => ',',
1581 eol => "\n",
1582 data_source => [<DATA>],
1583 },'import' );
1584
1585 It is also possible to rely on some of the defaults, but not all of
1586 them. For example:
1587
1588 $dbh->func({
1589 data_type => 'PIPE',
1590 file_source => 'my_db.pipe',
1591 },'import' );
1592
1593 =head1 DATA FORMATS
1594
1595 =head2 CSV / PIPE / TAB / INI (Comma,Pipe,Tab,INI & other "delimited" formats)
1596
1597 DBD::RAM can import CSV (Comma Separated Values) from strings, from
1598 local files, or from remote files into database tables and export
1599 tables from any source to CSV files. It can also store and update CSV
1600 files continuously similarly to the way other DBD drivers operate.
1601
1602 If you wish to use remote CSV files, you also need the LWP module
1603 installed. It is available from www.activestate.com for windows, and
1604 from www.cpan.org for other platforms.
1605
1606 CSV is the format of files commonly exported by such programs as
1607 Excel, Access, and FileMakerPro. Typically newlines separate records
1608 and commas separate fields. Commas may also be included inside fields
1609 if the field itself is surrounded by quotation marks. Quotation marks
1610 may be included in fields by doubling them. Although some types of
1611 CSV formats allow newlines inside fields, DBD::RAM does not currently
1612 support that. If you need that feature, you should use DBD::CSV.
1613
1614 Here are some typical CSV fields:
1615
1616 hello,1,"testing, ""george"", 1,2,3",junk
1617
1618 Note that numbers and strings that don't contain commas do not need
1619 quotation marks around them. That line would be parsed into four
1620 fields:
1621
1622 hello
1623 1
1624 testing, "george", 1,2,3
1625 junk
1626
1627 To import that string of CSV into a DBD::RAM table:
1628
1629 $dbh->func({
1630 data_source => qq[hello,1,"testing, ""george"", 1,2,3",junk]
1631 },'import');
1632
1633 Of if one wanted to continuously update a file similarly to the way
1634 DBD::CSV works:
1635
1636 $dbh->func([ 'table1', 'CSV', 'my_test.csv' ],'catalog');
1637
1638
1639 Or if that string and others like it were in a local file called
1640 'my_test.csv':
1641
1642 $dbh->func({ file_source => 'my_test.csv' },'import');
1643
1644 Or if that string and others like it were in a remote file at a known
1645 URL:
1646
1647 $dbh->func({ remote_source => 'http://www.foo.edu/my_test.csv' },'import');
1648
1649 Note that these forms all use default behaviour since CSV is the
1650 default data_type. These methods also use the default table_name
1651 (table1,table2,etc.) and default column_names (col1,col2, etc.). The
1652 same functions can specify a table_name and can either specify a list
1653 of column names or specify that the column names should be taken from
1654 the first line of data:
1655
1656 $dbh->func({
1657 file_source => 'my_test.csv',
1658 table_name => 'my_table',
1659 col_names => 'name,phone,address',
1660 },'import');
1661
1662 It is also possible to define other field separators (e.g. a
1663 semicolon) with the "sep_char" key and define other record separators
1664 with the "eol" key. For example:
1665
1666 sep_char => ';',
1667 eol => '~',
1668
1669 Adding those to the import() hash would define data that has a
1670 semicolon between every field and a tilde between every record.
1671
1672 For convenience shortcuts have been provided for PIPE and TAB
1673 separators. The data_type "PIPE" indicates a separator of the pipe
1674 character '|' which may optionally have blank spaces before or afer
1675 it. The TAB data_type indicates fields that are separated by tabs.
1676 In both cases newlines remain the default record separator unless
1677 specifically set to something else.
1678
1679 Another shortcut is the INI data_type. This expects to see data in
1680 name=value pairs like this:
1681
1682 resolution = 640x480
1683 colors = 256
1684
1685 Currently the INI type does not support sections within the .ini file,
1686 but that will change in future releases of this module.
1687
1688 The PIPE, TAB, and INI formats all behave like the CSV format.
1689 Defaults may be used for assigning column names from the first line of
1690 data, in which case the column names should be separated by the
1691 appropriate symbol (e.g. col1|col2 for PIPE, and col1=col2 for INI,
1692 and column names separated by tabs for TAB).
1693
1694 In the examples above using data_source the data was a string with
1695 newlines separating the records. It is also possible to use an
1696 reference to an array of lines as the data_source. This makes it
1697 easy to use the DATA segment of a script or to import an array from
1698 some other part of a script:
1699
1700 $dbh->func({ data_source => [<DATA>] },'import' );
1701
1702 =head2 ARRAYS & HASHES
1703
1704 DBD::RAM can import data directly from references to arrays of
1705 arrayrefs and references to arrays of hashrefs. This allows you to
1706 easily import data from some other portion of a perl script into a
1707 database format and either save it to disk or simply manipulate it in
1708 memory.
1709
1710 $dbh->func({
1711 data_type => 'ARRAY',
1712 data_source => [
1713 ['1','CSV:Hello New World!'],
1714 ['2','CSV:Cool!']
1715 ],
1716 },'import');
1717
1718 $dbh->func({
1719 data_type => 'HASH',
1720 data_source => [
1721 {id=>1,phrase=>'Hello new world!'},
1722 {id=>2,phrase=>'Junkity Junkity Junk'},
1723 ],
1724 },'import');
1725
1726
1727 =head2 FIXED-WIDTH RECORDS
1728
1729 Fixed-width records (also called fixed-length records) do not use
1730 character patterns to separate fields, rather they use a preset number
1731 of characters in each field to determine where one field begins and
1732 another ends. DBD::RAM can import fixed-width records from strings,
1733 arrayrefs, local files, and remote files and can export data from any
1734 source to fixed-width record fields. The module also allows
1735 continuous disk-based updating of fixed-width format files similarly
1736 to other DBDs.
1737
1738 The fixed-width format behaves exactly like the CSV formats mentioned
1739 above with the exception that the data_type is "FIXED" and that one
1740 must supply a pattern key to describe the width of the fields. The
1741 pattern should be in Perl unpack format e.g. "A2 A7 A14" would
1742 indicate a table with three columns with widths of 2,7,14 characters.
1743 When data is inserted or updated, it will be truncated or padded to
1744 fill exactly the amount of space alloted to each field.
1745
1746 $dbh->func({
1747 table_name => 'phrases',
1748 col_names => 'id,phrase',
1749 data_type => 'FIXED',
1750 pattern => 'A1 A20',
1751 data_source => [ '1Hello new world! ',
1752 '2Junkity Junkity Junk',
1753 ],
1754 },'import' );
1755
1756
1757 =head2 XML
1758
1759 DBD::RAM can import XML (Extended Markup Language) from strings, from
1760 local files, or from remote files into database tables and export
1761 tables from any source to XML files.
1762
1763 You must have XML::Parser installed in order to use the XML feature of
1764 DBD::RAM. If you wish to use remote XML files, you also need the LWP
1765 module installed. Both are available from www.activestate.com for
1766 windows, and from www.cpan.org for other platforms.
1767
1768 Support is provided for information in tag attributes and tag text and
1769 for multiple levels of nested tags. There are several options on how
1770 to treat tag names that occur multiple times in a single record
1771 including a variety of relationships between XML tags and database
1772 columns: one-to-one, one-to-many, and many-to-one. Tag attributes can
1773 be made to apply to multiple records nested within the tag. There is
1774 also support for alternate character encodings and other XML::Parser
1775 parameter attributes.
1776
1777 See below for details.
1778
1779 =over 4
1780
1781 =item XML Import
1782
1783 To start with a very simple example, consider this XML string:
1784
1785 <staff>
1786 <person name='Joe' title='bottle washer'/>
1787 <person name='Tom' title='flunky'/>
1788 <person name='Bev' title='chief cook'/>
1789 <person name='Sue' title='head honcho'/>
1790 </staff>
1791
1792 Assuming you have that XML structure in a variable $str, you can
1793 import it into a DBD::RAM table like this:
1794
1795 $dbh->func({
1796 data_source => $str
1797 data_type => 'XML',
1798 record_tag => 'staff person',
1799 col_names => 'name,title'
1800 },'import');
1801
1802 Which would produce this SQL/DBI accessible table:
1803
1804 name | title
1805 -----+--------------
1806 Joe | bottle washer
1807 Tom | flunky
1808 Bev | chief cook
1809 Sue | head honcho
1810
1811 If the XML data is in a local or remote file, rather than a string,
1812 simply change the "data_source" to "file_source" (for local files) or
1813 "remote_source" (for remote files) an everything else mentioned in
1814 this section works the same as if the data was imported from a string.
1815
1816 Notice that the "record_tag" is a space separated list of all of the
1817 tags that enclose the fields you want to capture starting at the
1818 highest level with the <staff> tag. In this example there is only one
1819 level of nesting, but there could be many levels of nesting in actual
1820 practice.
1821
1822 DBD::RAM can treat both text and tag attributes as fields. So the
1823 following three records could produce the same database row:
1824
1825 <person name='Tom' title='flunky'/>
1826
1827 <person name='Tom'>
1828 <title>flunky</title>
1829 </person>
1830
1831 <person>
1832 <name>Tom</name>
1833 <title>flunky</title>
1834 </person>
1835
1836 The database column names should be specified as a comma-separated
1837 string, in the order you want them to appear in the database:
1838
1839 col_names => 'name,state,year'
1840
1841 If you want the database column names to be the same as the XML tag
1842 names you do not need to do anything further.
1843
1844 NOTE: you *must* speficy the column names for XML data, you can not
1845 rely on automatic default column names (col1,col2,etc.) or on reading
1846 the column names from the "first line" of the data as you can with
1847 most other data types.
1848
1849
1850 =item Alternate relationships between XML tags & database columns
1851
1852 If you want the database column names to be different from the XML tag
1853 names, you need to add a col_mapping parameter which should be a hash
1854 with the XML tag as the key and the database column as the value:
1855
1856 col_mapping => {
1857 name => 'Member_Name',
1858 state => 'Location',
1859 year => 'Year',
1860 }
1861
1862 ('name' is the XML tag, 'Member_Name' is the database column)
1863
1864 If a given tag occurs more than once in an XML record, it can be
1865 mapped onto a single column name (in which case all of the values for
1866 it will be concatenated with spaces into the single column), or it can
1867 be mapped onto an array of column names (in which case each succeeding
1868 instance of the tag will be entered into the succeeding column in the
1869 array).
1870
1871 For example, given this XML snippet:
1872
1873 <person name='Joe' state='OR'>
1874 <year>1998</year>
1875 <year>1999</year>
1876 </person>
1877 <person name='Sally' state='WA'>
1878 <year>1998</year>
1879 <year>1999</year>
1880 <year>2000</year>
1881 </person>
1882
1883 This column mapping:
1884
1885 col_mapping => {
1886 name => 'Member_Name',
1887 state => 'Location',
1888 year => ['Year1','Year2','Year3'],
1889 }
1890
1891 Would produce this table:
1892
1893 Member_Name | Location | Year1 | Year2 | Year3
1894 ------------+----------+-------+-------+------
1895 Joe | OR | 1998 | 1999 |
1896 Sally | WA | 1998 | 1999 | 2000
1897
1898 And this column mapping:
1899
1900 col_mapping => {
1901 name => 'Member_Name',
1902 state => 'Location',
1903 year => 'Year',
1904 }
1905
1906 Would produce this table:
1907
1908 Member_Name | Location | Year
1909 ------------+----------+----------------
1910 Joe | OR | 1998 1999
1911 Sally | WA | 1998 1999 2000
1912
1913 It is also possible to map several differnt tags into a single column,
1914 e.g:
1915
1916 <person name='Joe' state='OR'>
1917 <year1>1998</year1>
1918 <year2>1999</year2>
1919 </person>
1920 <person name='Sally' state='WA'>
1921 <year1>1998</year1>
1922 <year2>1999</year2>
1923 <year3>2000</year3>
1924 </person>
1925
1926 col_mapping => {
1927 name => 'Member_Name',
1928 state => 'Location',
1929 year1 => 'Year',
1930 year2 => 'Year',
1931 year3 => 'Year',
1932 }
1933
1934 Member_Name | Location | Year
1935 ------------+----------+----------------
1936 Joe | OR | 1998 1999
1937 Sally | WA | 1998 1999 2000
1938
1939 =item Nested attributes that apply to multiple records
1940
1941 It is also possible to use nested record attributes to create column
1942 values that apply to multiple records. Consider the following XML:
1943
1944 <staff>
1945 <office location='Portland'>
1946 <person name='Joe'>
1947 <person name='Tom'/>
1948 </office>
1949 <office location='Seattle'>
1950 <person name='Bev'/>
1951 <person name='Sue'/>
1952 </office>
1953 </staff>
1954
1955 One might like to associate the office location with all of the staff
1956 members in that office. This is how that would be done:
1957
1958 record_tag => 'staff office person',
1959 col_names => 'location,name',
1960 fold_col => { 'staff office' => 'location' },
1961
1962 That fold-col specification in the import() method would "fold in"
1963 the attribute for location and apply it to all records nested within
1964 the office tag and produce the following table:
1965
1966 location | name
1967 ---------+-----
1968 Portland | Joe
1969 Portland | Tom
1970 Seattle | Bev
1971 Seattle | Sue
1972
1973 You may use several levels of folded columns, for example, to capture
1974 both the office location and title in this XML:
1975
1976 <staff>
1977 <office location='Portland'>
1978 <title id='manager'>
1979 <person name='Joe'/>
1980 </title>
1981 <title id='flunky'>
1982 <person name='Tom'/>
1983 </title>
1984 </office>
1985 <office location='Seattle'>
1986 <title id='flunky'>
1987 <person name='Bev'/>
1988 <person name='Sue'/>
1989 </title>
1990 </office>
1991 </staff>
1992
1993 You would use this fold_col key:
1994
1995 fold_col => { 'staff office' => 'location',
1996 'staff office title' => 'id',
1997 },
1998
1999 And obtain this table:
2000
2001 location | title | name
2002 ---------+---------+-----
2003 Portland | manager | Joe
2004 Portland | flunky | Tom
2005 Seattle | flunky | Bev
2006 Seattle | flunky | Sue
2007
2008 If you need to grab more than one attribute from a single tag, you
2009 need to put one or more carets (^) on the end of the fold_col key.
2010 For example:
2011
2012 <office type='branch' location='Portland' manager='Sue'> ...</office>
2013
2014 fold_col => { 'office' => 'branch',
2015 'office^' => 'location',
2016 'office^^' => 'manager',
2017
2018 =item Character Encoding and Unicode issues
2019
2020 The attr key can be used to pass extra information to XML::Parser when
2021 it imports a database. For example, if the XML file contains latin-1
2022 characters, one might like to pass the parser an encoding protocol
2023 like this:
2024
2025 attr => {ProtocolEncoding => 'ISO-8859-1'},
2026
2027 Attributes passed in this manner are passed straight to the
2028 XML::Parser.
2029
2030 Since the results of XML::Parser are returned as UTF-8, one might also
2031 like to translate from UTF-8 to something else when the data is
2032 entered into the database. This can be done by passing a pointer to a
2033 subroutine in the read_sub key. For example:
2034
2035 read_sub => \&utf8_to_latin1,
2036
2037 For this to work, there would need to be a subroutine utf8_to_latin1
2038 in the main module that takes a UTF8 string as input and returns a
2039 latin-1 string as output. Similar routines can be used to translate
2040 the UTF8 characters into any other character encoding.
2041
2042 Apologies for being Euro-centric, but I have included a short-cut for
2043 Latin-1. One can omit the attr key and instead of passing a pointer
2044 to a subroutine in the read_sub key, if one simply puts the string
2045 "latin1", the module will automatically perform ISO-8859-1 protocol
2046 encoding on reading the XML file and automatically translate from
2047 UTF-8 to Latin-1 as the values are inserted in the database, that is
2048 to say, a shortcut for the two keys mentioned above.
2049
2050
2051 =item Other features of XML import
2052
2053 * Tags, attributes, and text that are not specifically referred to in
2054 the import() parameters are ignored when creating the database table.
2055
2056 * If a column name is listed that is not mapped onto a tag that occurs
2057 in the XML source, a column will be created in the database for that
2058 name and it will be given a default value of NULL for each record
2059 created.
2060
2061 =item XML Export
2062
2063 Any DBD::RAM table, regardless of its original source or its original
2064 format, can be exported to an XML file.
2065
2066 The export() parameters are the same as for other types of export() --
2067 see the above for details. Additionally there are some export
2068 parameters specific to XML files which are the same as the import()
2069 parameters for XML files mentioned above. The col_names parameter is
2070 required, as is the record_tag parameter. Additionally one may
2071 optionally pass a header and/or a footer parameter which will be
2072 material that goes above and below the records in the file. If no
2073 header is passed, a default header consisting of
2074
2075 <?xml version="1.0" ?>
2076
2077 will be created at the top of the file.
2078
2079 Given a datbase like this:
2080
2081 location | name
2082 ---------+-----
2083 Portland | Joe
2084 Seattle | Sue
2085
2086 And an export() call like this:
2087
2088 $dbh->func({
2089 data_type => 'XML',
2090 data_target => 'test_db.new.xml',
2091 data_source => 'SELECT * FROM table1',
2092 record_tag => 'staff person',
2093 col_names => 'name,location',
2094 },'export');
2095
2096 Would produce a file called 'test_db.xml' containing text like this:
2097
2098 <?xml version="1.0" ?>
2099 <staff>
2100 <office>
2101 <person>
2102 <name>Joe</name>
2103 <location>Portland</location>
2104 </person>
2105 <person>
2106 <name>Sue</name>
2107 <location>Seattle</location>
2108 </person>
2109 </office>
2110 </staff>
2111
2112 The module does not currently support exporting tag attributes or
2113 "folding out" nested column information, but those are planned for
2114 future releases.
2115
2116 back
2117
2118 =head2 USER-DEFINED DATA STRUCTURES
2119
2120 DBD::RAM can be extended to handle almost any type of structured
2121 information with the USR data type. With this data type, you define a
2122 subroutine that parses your data and pass that to the import() command
2123 and the module will use that routine to create a database from your
2124 data. The routine can be as simple or as complex as you like. It
2125 must accept a string and return an array with the fields of the array
2126 in the same order as the columns in your database. Here is a simple
2127 example that works with data separated by double tildes. In reality,
2128 you could just do this with the bulit-in CSV type, but here is how you
2129 could recreate it with the USR type:
2130
2131 $dbh->func({
2132 data_type => 'USR',
2133 data_source => "1~~2~~3\n4~~5~~6\n",
2134 read_sub => sub { split /~~/,shift },
2135 },'import' );
2136
2137 That would build a table with two rows of three fields each. The
2138 first row would contain the values 1,2,3 and the second row would
2139 contain the values 4,5,6.
2140
2141 Here is a more complex example that handles a simplified address book.
2142 It assumes that your data is a series of addresses separated by blank
2143 lines and that the address has the name on the first line, the street
2144 on the second line and the town, state, and zipcode on the third line.
2145 (Apologies to those in countries that don't have states or zipcodes in
2146 this format). Here is an example of the kind of data it would handle:
2147
2148 Fred Bloggs
2149 123 Somewhere Lane
2150 Sometown OR 97215
2151
2152 Joe Blow
2153 567 Anywhere Street
2154 OtherTown WA 98006
2155
2156 Note that the end-of-line separator (eol) has been changed to be a
2157 blank line rather than a simple newline and that the parsing routine
2158 is more than a simple line by line parser, it splits the third line
2159 into three fields for town, state, and zip.
2160
2161 $dbh->func({
2162 data_type => 'USR',
2163 data_source => join('',<DATA>),
2164 col_names => 'name,street,town,state,zip',
2165 eol => '^\s*\n',
2166 read_sub => sub {
2167 my($name,$street,$stuff) = split "\n", $_[0];
2168 my @ary = split ' ',$stuff;
2169 my $zip = $ary[-1];
2170 my $state = $ary[-2];
2171 my $town = $stuff;
2172 $town =~ s/^(.*)\s+$state\s+$zip$/$1/;
2173 return($name,$street,$town,$state,$zip);
2174 },
2175 },'import');
2176
2177 Given the data above, this routine would create a table like this:
2178
2179 name | street | town | state | zip
2180 ------------+---------------------+-----------+-------+------
2181 Fred Bloggs | 123 Somewhere Lane | Sometown | OR | 97215
2182 Joe Blow | 567 Anywhere Street | OtherTown | WA | 98006
2183
2184 These are just samples, the possiblities are fairly unlimited.
2185
2186 PLEASE NOTE: If you develop generally useful parser routines that
2187 others might also be able to use, send them to me and I can
2188 encorporate them into the DBD itself (with proper credit, of course).
2189
2190 =head2 DBI DATABASE RECORDS
2191
2192 You can import information from any other DBI accessible database with
2193 the data_type set to 'DBI' in the import() method. First connect to
2194 the other database via DBI and get a database handle for it separate
2195 from the database handle for DBD::RAM. Then do a prepare and execute
2196 to get a statement handle for a SELECT statement into that database.
2197 Then pass the statement handle to the DBD::RAM import() method as the
2198 data_source. This will perform the fetch and insert the fetched
2199 fields and records into the DBD::RAM table. After the import()
2200 statement, you can then close the database connection to the other
2201 database if you are not going to be using it for anything else.
2202
2203 Here's an example using DBD::mysql --
2204
2205 use DBI;
2206 my $dbh_ram = DBI->connect('dbi:RAM:','','',{RaiseError=>1});
2207 my $dbh_mysql = DBI->connect('dbi:mysql:test','','',{RaiseError=>1});
2208 my $sth_mysql = $dbh_mysql->prepare("SELECT * FROM cars");
2209 $sth_mysql->execute;
2210 $dbh_ram->func({
2211 data_type => 'DBI',
2212 data_source => $sth_mysql,
2213 },'import' );
2214 $dbh_mysql->disconnect;
2215
2216 =head2 MP3 MUSIC FILES
2217
2218 Most mp3 (mpeg three) music files contain a header describing the song
2219 name, artist, and other information about the music. This shortcut
2220 will collect all of the header information in all mp3 files in a group
2221 of directories and turn it into a searchable database:
2222
2223
2224 $dbh->func(
2225 { data_type => 'MP3', dirs => $dirlist }, 'import'
2226 );
2227
2228 $dirlist should be a reference to an array of absolute paths to
2229 directories containing mp3 files. Each file in those directories
2230 will become a record containing the fields: file_name, song_name,
2231 artist, album, year, comment,genre. The fields will be filled
2232 in automatically from the ID3v1x header information in the mp3 file
2233 itself, assuming, of course, that the mp3 file contains a
2234 valid ID3v1x header.
2235
2236 =head1 USING MULTIPLE TABLES
2237
2238 A single script can create as many tables as your RAM will support and
2239 you can have multiple statement handles open to the tables
2240 simultaneously. This allows you to simulate joins and multi-table
2241 operations by iterating over several statement handles at once. You
2242 can also mix and match databases of different formats, for example
2243 gathering user info from .ini and .config files in many different
2244 formats and putting them all into a single table.
2245
2246
2247 =head1 TO DO
2248
2249 Lots of stuff. Allow users to specify a file where catalog
2250 information is stored so that one could record file types once and
2251 thereafter automatically open the files with the correct data type. A
2252 convert() function to go from one format to another. Support for a
2253 variety of other easily parsed formats such as Mail files, web logs,
2254 and for various DBM formats. Support for HTML files with the
2255 directory considered as a table, each HTML file considered as a record
2256 and the filename, <TITLE> tag, and <BODY> tags considered as fields.
2257 More robust SQL (coming when I update Statement.pm) including RLIKE (a
2258 regex-based LIKE), joins, alter table, typed fields?, authorization
2259 mechanisms? transactions? Allow remote exports (e.g. with LWP
2260 POST/PUT).
2261
2262 Let me know what else...
2263
2264 =head1 AUTHOR
2265
2266 Jeff Zucker <jeff@vpservices.com>
2267
2268 Copyright (c) 2000 Jeff Zucker. All rights reserved. This program is
2269 free software; you can redistribute it and/or modify it under the same
2270 terms as Perl itself as specified in the Perl README file.
2271
2272 No warranty of any kind is implied, use at your own risk.
2273
2274 =head1 SEE ALSO
2275
2276 DBI, SQL::Statement, DBD::File
2277
2278 =cut

  ViewVC Help
Powered by ViewVC 1.1.26