1 |
######################################################################### |
2 |
# |
3 |
# DBD::RAM - a DBI driver for files and data structures |
4 |
# |
5 |
# This module is copyright (c), 2000 by Jeff Zucker |
6 |
# All rights reserved. |
7 |
# |
8 |
# This is free software. You may distribute it under |
9 |
# the same terms as Perl itself as specified in the |
10 |
# Perl README file. |
11 |
# |
12 |
# WARNING: no warranty of any kind is implied. |
13 |
# |
14 |
# To learn more: enter "perldoc DBD::RAM" at the command prompt, |
15 |
# or search in this file for =head1 and read the text below it |
16 |
# |
17 |
######################################################################### |
18 |
|
19 |
package DBD::RAM; |
20 |
|
21 |
use strict; |
22 |
require DBD::File; |
23 |
require SQL::Statement; |
24 |
require SQL::Eval; |
25 |
use IO::File; |
26 |
|
27 |
use vars qw($VERSION $err $errstr $sqlstate $drh $ramdata); |
28 |
|
29 |
use base qw(DBD::File); |
30 |
|
31 |
$VERSION = '0.07'; |
32 |
|
33 |
$err = 0; # holds error code for DBI::err |
34 |
$errstr = ""; # holds error string for DBI::errstr |
35 |
$sqlstate = ""; # holds SQL state for DBI::state |
36 |
$drh = undef; # holds driver handle once initialized |
37 |
|
38 |
#sub driver { |
39 |
# return $drh if $drh; # already created - return same one |
40 |
# my($class, $attr) = @_; |
41 |
# $class .= "::dr"; |
42 |
# $drh = DBI::_new_drh($class, { |
43 |
# 'Name' => 'RAM', |
44 |
# 'Version' => $VERSION, |
45 |
# 'Err' => \$DBD::RAM::err, |
46 |
# 'Errstr' => \$DBD::RAM::errstr, |
47 |
# 'State' => \$DBD::RAM::sqlstate, |
48 |
# 'Attribution' => 'DBD::RAM by Jeff Zucker', |
49 |
# }); |
50 |
# return $drh; |
51 |
#} |
52 |
|
53 |
package DBD::RAM::dr; # ====== DRIVER ====== |
54 |
|
55 |
$DBD::RAM::dr::imp_data_size = 0; |
56 |
|
57 |
use base qw(DBD::File::dr); |
58 |
|
59 |
sub connect { |
60 |
my($drh, $dbname, $user, $auth, $attr)= @_; |
61 |
my $dbh = DBI::_new_dbh($drh, { |
62 |
Name => $dbname, |
63 |
USER => $user, |
64 |
CURRENT_USER => $user, |
65 |
}); |
66 |
# PARSE EXTRA STRINGS IN DSN HERE |
67 |
# Process attributes from the DSN; we assume ODBC syntax |
68 |
# here, that is, the DSN looks like var1=val1;...;varN=valN |
69 |
my $var; |
70 |
foreach $var (split(/;/, $dbname)) { |
71 |
if ($var =~ /(.*?)=(.*)/) { |
72 |
my $key = $1; |
73 |
my $val = $2; |
74 |
$dbh->STORE($key, $val); |
75 |
} |
76 |
} |
77 |
$dbh->STORE('f_dir','./') if !$dbh->{f_dir}; |
78 |
# use Data::Dumper; die Dumper $DBD::RAM::ramdata if $DBD::RAM::ramdata; |
79 |
$dbh; |
80 |
} |
81 |
|
82 |
sub data_sources {} |
83 |
|
84 |
sub disconnect_all{ $DBD::RAM::ramdata = {};} |
85 |
|
86 |
sub DESTROY { $DBD::RAM::ramdata = {};} |
87 |
|
88 |
|
89 |
package DBD::RAM::db; # ====== DATABASE ====== |
90 |
|
91 |
$DBD::RAM::db::imp_data_size = 0; |
92 |
|
93 |
use base qw(DBD::File::db); |
94 |
|
95 |
sub disconnect{ $DBD::RAM::ramdata = {};} |
96 |
|
97 |
# DRIVER PRIVATE METHODS |
98 |
|
99 |
sub clear { |
100 |
my $dbh = shift; |
101 |
my $tname = shift; |
102 |
my $r = $DBD::RAM::ramdata; |
103 |
if ( $tname && $r->{$tname} ) { |
104 |
delete $r->{$tname} if $tname && $r->{$tname}; |
105 |
} |
106 |
else { |
107 |
$DBD::RAM::ramdata = {}; |
108 |
} |
109 |
|
110 |
} |
111 |
|
112 |
sub dump { |
113 |
my $dbh = shift; |
114 |
my $sql = shift; |
115 |
my $txt; |
116 |
my $sth = $dbh->prepare($sql) or die $dbh->errstr; |
117 |
# use Data::Dumper; $Data::Dumper::Indent=0; print Dumper $sth; |
118 |
$sth->execute or die $sth->errstr; |
119 |
my @col_names = @{$sth->{NAME}}; |
120 |
$txt .= "<"; |
121 |
for (@col_names) { |
122 |
$txt .= "$_,"; |
123 |
} |
124 |
$txt =~ s/,$//; |
125 |
$txt .= ">\n"; |
126 |
while (my @row = $sth->fetchrow_array) { |
127 |
for (@row) { |
128 |
$_ ||= ''; |
129 |
s/^\s*//; |
130 |
s/\s*$//; |
131 |
$txt .= "[$_] "; |
132 |
} |
133 |
$txt .= "\n"; |
134 |
} |
135 |
return $txt; |
136 |
} |
137 |
|
138 |
sub get_catalog { |
139 |
my $self = shift; |
140 |
my $tname = shift || ''; |
141 |
my $catalog = $DBD::RAM::ramdata->{catalog}{$tname} || {}; |
142 |
$catalog->{f_type} ||= ''; |
143 |
$catalog->{r_type} ||= $catalog->{f_type}; |
144 |
$catalog->{f_name} ||= ''; |
145 |
$catalog->{pattern} ||= ''; |
146 |
$catalog->{col_names} ||= ''; |
147 |
$catalog->{eol} ||= "\n"; |
148 |
return $catalog; |
149 |
} |
150 |
|
151 |
sub catalog { |
152 |
my $dbh = shift; |
153 |
my $table_info = shift; |
154 |
if (!$table_info) { |
155 |
my @tables = (keys %{$DBD::RAM::ramdata->{catalog}} ); |
156 |
my @all_tables; |
157 |
for (@tables) { |
158 |
push @all_tables,[ |
159 |
$_, |
160 |
$DBD::RAM::ramdata->{catalog}{$_}{f_type}, |
161 |
$DBD::RAM::ramdata->{catalog}{$_}{f_name}, |
162 |
$DBD::RAM::ramdata->{catalog}{$_}{pattern}, |
163 |
$DBD::RAM::ramdata->{catalog}{$_}{sep_char}, |
164 |
$DBD::RAM::ramdata->{catalog}{$_}{eol}, |
165 |
$DBD::RAM::ramdata->{catalog}{$_}{col_names}, |
166 |
$DBD::RAM::ramdata->{catalog}{$_}{read_sub}, |
167 |
$DBD::RAM::ramdata->{catalog}{$_}{write_sub}]; |
168 |
} |
169 |
return @all_tables; |
170 |
} |
171 |
for (@{$table_info}) { |
172 |
my($table_name,$f_type,$f_name,$hash); |
173 |
if (ref $_ eq 'ARRAY') { |
174 |
($table_name,$f_type,$f_name,$hash) = @{$_}; |
175 |
} |
176 |
if (ref $_ eq 'HASH') { |
177 |
$table_name = $_->{table_name} || die "catlog() requires a table_name"; |
178 |
$f_type = $_->{data_type} || 'CSV'; |
179 |
$f_name = $_->{file_source} || ''; |
180 |
$hash = $_; |
181 |
} |
182 |
$hash->{r_type} = $f_type; |
183 |
if ($f_type eq 'FIXED') { $f_type = 'FIX'; } |
184 |
if ($f_type eq 'PIPE'){ |
185 |
$hash->{sep_char}='\s*\|\s*'; |
186 |
$hash->{wsep_char}='|'; |
187 |
$f_type = 'CSV'; |
188 |
} |
189 |
if ($f_type eq 'TAB' ){ |
190 |
$hash->{sep_char}="\t"; |
191 |
$f_type = 'CSV'; |
192 |
} |
193 |
if ($f_type eq 'INI' ){ |
194 |
$hash->{sep_char}='='; |
195 |
} |
196 |
$DBD::RAM::ramdata->{catalog}{$table_name}{f_type} = uc $f_type || ''; |
197 |
$DBD::RAM::ramdata->{catalog}{$table_name}{f_name} = $f_name || ''; |
198 |
if ($hash) { |
199 |
for(keys %{$hash}) { |
200 |
next if /table_name/; |
201 |
next if /data_type/; |
202 |
next if /file_source/; |
203 |
$DBD::RAM::ramdata->{catalog}{$table_name}{$_}=$hash->{$_}; |
204 |
} |
205 |
} |
206 |
$DBD::RAM::ramdata->{catalog}{$table_name}{eol} ||= "\n"; |
207 |
} |
208 |
} |
209 |
|
210 |
sub get_table_name { |
211 |
my $dbh = shift; |
212 |
my @tables = (keys %{$DBD::RAM::ramdata} ); |
213 |
if (!$tables[0]) { return 'table1'; } |
214 |
my $next=0; |
215 |
for my $table(@tables) { |
216 |
if ($table =~ /^table(\d+)/ ) { |
217 |
$next = $1 if $1 > $next; |
218 |
} |
219 |
} |
220 |
$next++; |
221 |
return("table$next"); |
222 |
} |
223 |
|
224 |
sub export() { |
225 |
my $dbh = shift; |
226 |
my $args = shift || die "No arguments for export()\n"; |
227 |
my $msg = "export() requires "; |
228 |
my $sql = $args->{data_source} || die $msg . '{data_source => $}'; |
229 |
my $f_name = $args->{data_target} || die 'export requires {data_target => $f}'; |
230 |
my $f_type = $args->{data_type} || die 'export requires {data_type => $d}'; |
231 |
if ($f_type eq 'XML') { return &export_xml($dbh,$args); } |
232 |
my $temp_table = 'temp__'; |
233 |
$dbh->func( [[$temp_table,$f_type,$f_name,$args]],'catalog'); |
234 |
my $sth1 = $dbh->prepare($sql); |
235 |
$sth1->execute or die $DBI::errstr; |
236 |
my @col_names = @{$sth1->{NAME}}; |
237 |
my $sth2 = &prep_insert( $dbh, $temp_table, @col_names ); |
238 |
while (my @row = $sth1->fetchrow_array) { |
239 |
$sth2->execute(@row); |
240 |
} |
241 |
delete $DBD::RAM::ramdata->{catalog}{$temp_table}; |
242 |
} |
243 |
|
244 |
sub export_xml() { |
245 |
my $dbh = shift; |
246 |
my $args = shift; |
247 |
my $msg = "Export to XML requires "; |
248 |
my $sql = $args->{data_source} || die $msg . '{data_source => $}'; |
249 |
my $f_name = $args->{data_target} || die $msg . '{data_target => $f}'; |
250 |
my $f_type = $args->{data_type} || die $msg . '{data_type => $d}'; |
251 |
my $record_tag = $args->{record_tag} || die $msg . '{record_tag => $r}'; |
252 |
my $header = $args->{header} || ''; |
253 |
my($head,$item,$foot) = &prep_xml_export($header,$record_tag); |
254 |
$f_name = $dbh->{f_dir} . '/' .$f_name; |
255 |
$f_name =~ s#//#/#g; |
256 |
open(O,">$f_name") || die "Couldn't write to $f_name: $!\n"; |
257 |
print O $head, "\n"; |
258 |
my $sth = $dbh->prepare($sql); |
259 |
$sth->execute; |
260 |
my @col_names = @{$sth->{NAME}}; |
261 |
while (my @row = $sth->fetchrow_array) { |
262 |
print O "<$item>\n"; |
263 |
my $i=0; |
264 |
for (@row) { |
265 |
next unless $row[$i]; |
266 |
print O " <$col_names[$i]>"; |
267 |
print O "$row[$i]"; |
268 |
print O "</$col_names[$i]>\n"; |
269 |
$i++; |
270 |
} |
271 |
print O "</$item>\n\n"; |
272 |
} |
273 |
print O $foot; |
274 |
close O || die "Couldn't write to $f_name: $!\n"; |
275 |
} |
276 |
|
277 |
sub prep_xml_export { |
278 |
my $header = shift || qq{<?xml version="1.0" ?>\n}; |
279 |
my $record_tag = shift; |
280 |
my @tag_starts = split ' ', $record_tag; |
281 |
my $terminal_tag = pop @tag_starts; |
282 |
my @tag_ends = map("</$_>\n",reverse @tag_starts); |
283 |
@tag_starts = map("<$_>\n",@tag_starts); |
284 |
for (@tag_starts) { $header .= $_; } |
285 |
#print " <$terminal_tag>\n"; |
286 |
my $footer; |
287 |
for (@tag_ends) { $footer .= $_; } |
288 |
return($header,$terminal_tag,$footer); |
289 |
} |
290 |
|
291 |
sub convert() { |
292 |
my $dbh = shift; |
293 |
my $specs = shift; |
294 |
my $source_type = $specs->{source_type} || ''; |
295 |
my $source_file = $specs->{source_file} || ''; |
296 |
my $source_params = $specs->{source_params} || ''; |
297 |
my $target_type = $specs->{target_type} || ''; |
298 |
my $target_file = $specs->{target_file} || ''; |
299 |
my $temp_table = 'temp__'; |
300 |
my($dbh2,$sth1); |
301 |
$dbh->func( [ |
302 |
["${temp_table}2",$target_type,$target_file,$source_params], |
303 |
],'catalog'); |
304 |
if ($source_type eq 'DBI' ) { |
305 |
my @con_ary = @{$source_params->{connection_ary}}; |
306 |
my $table = $source_params->{table}; |
307 |
$dbh2 = DBI->connect( @con_ary ); |
308 |
$sth1 = $dbh2->prepare("SELECT * FROM $table"); |
309 |
} |
310 |
else { |
311 |
$dbh->func( [ |
312 |
["${temp_table}1",$source_type,$source_file,$source_params], |
313 |
],'catalog'); |
314 |
$sth1 = $dbh->prepare("SELECT * FROM ${temp_table}1"); |
315 |
} |
316 |
$sth1->execute; |
317 |
my @col_names = @{$sth1->{NAME}}; |
318 |
my $sth2 = &prep_insert( $dbh, "${temp_table}2", @col_names ); |
319 |
while (my @row = $sth1->fetchrow_array) { |
320 |
$sth2->execute(@row); |
321 |
} |
322 |
if ($source_type eq 'DBI' ) { $dbh2->disconnect; } |
323 |
} |
324 |
|
325 |
|
326 |
sub import() { |
327 |
my $dbh = shift; |
328 |
my $specs = shift; |
329 |
my $data = shift; |
330 |
if ($specs && ! $data ) { |
331 |
if (ref $specs eq 'ARRAY' ) { |
332 |
$data = $specs; $specs = {}; |
333 |
} |
334 |
else { |
335 |
$data = []; |
336 |
} |
337 |
} |
338 |
if (ref $specs ne 'HASH') { |
339 |
die 'First argument to "import" must be a hashref.'; |
340 |
} |
341 |
if (ref $data ne 'ARRAY') { |
342 |
die 'Second argument to "import" must be an arrayref.'; |
343 |
} |
344 |
my $data_type = uc $specs->{data_type} || 'CSV'; |
345 |
my $table_name = $specs->{table_name} || $dbh->func('get_table_name'); |
346 |
my $col_names = $specs->{col_names} || ''; |
347 |
my $pattern = $specs->{pattern} || ''; |
348 |
my $read_sub = $specs->{read_sub} || ''; |
349 |
my $write_sub = $specs->{write_sub} || ''; |
350 |
my $data_source = $specs->{data_source} || ''; |
351 |
my $file_source = $specs->{file_source} || ''; |
352 |
my $remote_source = $specs->{remote_source} || ''; |
353 |
my $sep_char = $specs->{sep_char} || ''; |
354 |
my $eol = $specs->{eol} || "\n"; |
355 |
$DBD::RAM::ramdata->{catalog}{$table_name}->{r_type} = $data_type; |
356 |
if ($data_type eq 'FIXED'){ $data_type = 'FIX'; } |
357 |
if ($data_type eq 'PIPE') { $sep_char = '\s*\|\s*'; $data_type = 'CSV'; } |
358 |
if ($data_type eq 'TAB' ) { $sep_char = "\t"; $data_type = 'CSV'; } |
359 |
$DBD::RAM::ramdata->{catalog}{$table_name}->{sep_char} = $sep_char if $sep_char; |
360 |
$DBD::RAM::ramdata->{catalog}{$table_name}->{eol} = $eol if $eol; |
361 |
$DBD::RAM::ramdata->{catalog}{$table_name}->{pattern} = $pattern if $pattern; |
362 |
$DBD::RAM::ramdata->{catalog}{$table_name}->{read_sub} = $read_sub if $read_sub; |
363 |
$DBD::RAM::ramdata->{catalog}{$table_name}->{write_sub} = $write_sub if $write_sub; |
364 |
if ($data_type eq 'MP3' ) { |
365 |
$data_type = 'FIX'; |
366 |
$col_names = 'file_name,song_name,artist,album,year,comment,genre', |
367 |
$pattern = 'A255 A30 A30 A30 A4 A30 A50', |
368 |
$DBD::RAM::ramdata->{catalog}{$table_name}->{pattern} = $pattern; |
369 |
$data = &get_music_library( $specs ) |
370 |
} |
371 |
if ($data_type eq 'XML' ) { |
372 |
$data = $dbh->func( $specs, $table_name, 'get_xml_db' ); |
373 |
return 1; |
374 |
} |
375 |
#################################################################### |
376 |
# DATA SOURCE |
377 |
#################################################################### |
378 |
# |
379 |
# DATA FROM REMOTE FILE |
380 |
# |
381 |
if ($remote_source) { |
382 |
$data = $dbh->func($remote_source,'get_remote_data') or return undef; |
383 |
$data = [split("\n",$data)]; # turn string into arrayref |
384 |
} |
385 |
# |
386 |
# DATA FROM LOCAL FILE |
387 |
# |
388 |
if ($file_source) { |
389 |
$data = &get_file_data($dbh,$file_source); |
390 |
$data = [split("\n",$data)]; # turn string into arrayref |
391 |
} |
392 |
# |
393 |
# DATA FROM DATA STRUCTURE |
394 |
# |
395 |
if ($data_source) { |
396 |
$data = $data_source; |
397 |
} |
398 |
my @col_names; |
399 |
if ($data_type eq 'DBI' ) { |
400 |
@col_names = @{$data->{NAME}}; |
401 |
my $sth_new = &prep_insert( $dbh, $table_name, @col_names ); |
402 |
while (my @datarow = $data->fetchrow_array) { |
403 |
$sth_new->execute(@datarow); |
404 |
} |
405 |
die "No data in table $table_name!" |
406 |
unless $DBD::RAM::ramdata->{$table_name}->{DATA}; |
407 |
return 1; |
408 |
} |
409 |
#################################################################### |
410 |
# GET COLUMN NAMES |
411 |
#################################################################### |
412 |
if (!ref $data) { my @tmp = split ( /$eol/m, $data ); $data = \@tmp; } |
413 |
my $first_line; |
414 |
if ($col_names eq 'first_line' |
415 |
&& $data_type ne 'HASH' ) { $first_line = shift @{$data}; } |
416 |
else { $first_line = @{$data}->[0]; } |
417 |
@col_names = $dbh->func( |
418 |
$table_name,$data_type,$col_names,$first_line, |
419 |
'get_column_names'); |
420 |
#################################################################### |
421 |
# CREATE TABLE & PREPARE INSERT STATEMENT |
422 |
#################################################################### |
423 |
my $sth = &prep_insert( $dbh, $table_name, @col_names ); |
424 |
|
425 |
#################################################################### |
426 |
# INSERT DATA INTO TABLE |
427 |
#################################################################### |
428 |
if ('CSV FIX INI ARRAY HASH USR' =~ /$data_type/ ) { |
429 |
for ( @{$data} ) { |
430 |
my @datarow; |
431 |
if ( $data_type eq 'HASH') { |
432 |
my %rowhash = %{$_}; |
433 |
for (@col_names) { |
434 |
my $val = $rowhash{$_} || ''; |
435 |
push @datarow, $val; |
436 |
} |
437 |
} |
438 |
else { |
439 |
@datarow = $dbh->func($_,$table_name,$data_type,'read_fields'); |
440 |
} |
441 |
$sth->execute(@datarow); |
442 |
} |
443 |
} |
444 |
die "No data in table $table_name!" unless $DBD::RAM::ramdata->{$table_name}->{DATA}; |
445 |
$DBD::RAM::ramdata->{$table_name}->{data_type} = $data_type; |
446 |
$DBD::RAM::ramdata->{$table_name}->{pattern} = $pattern; |
447 |
$DBD::RAM::ramdata->{$table_name}->{read_sub} = $read_sub; |
448 |
$DBD::RAM::ramdata->{$table_name}->{write_sub} = $write_sub; |
449 |
return 1; |
450 |
} |
451 |
|
452 |
#################################################################### |
453 |
# COLUMN NAMES |
454 |
#################################################################### |
455 |
sub get_column_names { |
456 |
my($dbh,$table_name,$data_type,$col_names,$first_line) = @_; |
457 |
my $catalog = $DBD::RAM::ramdata->{catalog}{$table_name}; |
458 |
my $pattern = $catalog->{pattern} || ''; |
459 |
my $read_sub = $catalog->{read_sub} || ''; |
460 |
my($colstr,@col_names,$num_params); |
461 |
$colstr = ''; |
462 |
# |
463 |
# COLUMN NAMES FROM FIRST LINE OF DATA |
464 |
# |
465 |
if ( $col_names eq 'first_line' && $data_type ne 'HASH' ) { |
466 |
@col_names = $dbh->func( |
467 |
$first_line,$table_name,$data_type,'read_fields'); |
468 |
$num_params = scalar @col_names; |
469 |
} |
470 |
# |
471 |
# COLUMN NAMES FROM USER-SUPPLIED LIST |
472 |
# |
473 |
if ( $col_names && $col_names ne 'first_line' ) { |
474 |
$col_names =~ s/\s+//g; |
475 |
@col_names = split /,/,$col_names; |
476 |
$num_params = scalar @col_names; |
477 |
} |
478 |
# |
479 |
# AUTOMATICALLY ASSIGNED COLUMN NAMES |
480 |
# |
481 |
if ( $data_type eq 'HASH' && !$num_params ) { |
482 |
@col_names = keys %{$first_line}; |
483 |
$num_params = scalar @col_names; |
484 |
} |
485 |
if ( !$num_params ) { |
486 |
if ( $data_type eq 'INI' ) { |
487 |
$num_params = 2; |
488 |
} |
489 |
if ( $data_type eq 'FIX' ) { |
490 |
my @x = split /\s+/,$pattern; |
491 |
$num_params = scalar @x; |
492 |
} |
493 |
if ( $data_type eq 'CSV' or $data_type eq 'USR' ) { |
494 |
my @colAry = $dbh->func( |
495 |
$first_line,$table_name,$data_type,'read_fields'); |
496 |
$num_params = scalar @colAry; |
497 |
} |
498 |
$num_params = scalar @{ $first_line } if |
499 |
!$num_params && ref $first_line eq 'ARRAY'; |
500 |
die "Couldn't find column names!" if !$num_params; |
501 |
for ( 1 .. $num_params ) { push(@col_names,"col$_"); } |
502 |
} |
503 |
return @col_names; |
504 |
} |
505 |
|
506 |
sub prep_insert { |
507 |
my( $dbh, $table_name, @col_names ) = @_; |
508 |
my($colstr,$num_params); |
509 |
for ( @col_names ) { $colstr .= $_ . ' TEXT,'; } |
510 |
$colstr =~ s/,$//; |
511 |
my $create_stmt = "CREATE TABLE $table_name ($colstr)"; |
512 |
my $param_str = (join ",", ("?") x @col_names); |
513 |
my $insert_stmt = "INSERT INTO $table_name VALUES ($param_str)"; |
514 |
$dbh->do($create_stmt); |
515 |
my $sth = $dbh->prepare($insert_stmt); |
516 |
} |
517 |
|
518 |
|
519 |
sub get_remote_data { |
520 |
my $dbh = shift; |
521 |
my $remote_source = shift; |
522 |
undef $@; |
523 |
eval{ require 'LWP/UserAgent.pm'; }; |
524 |
die "LWP module not found! $@" if $@; |
525 |
my $ua = new LWP::UserAgent; |
526 |
my $req = new HTTP::Request GET => $remote_source; |
527 |
my $res = $ua->request($req); |
528 |
die "[$remote_source] : " . $res->message if !$res->is_success; |
529 |
my $data = $res->content; |
530 |
return $data; |
531 |
} |
532 |
|
533 |
sub get_file_data { |
534 |
my $dbh = shift; |
535 |
my $file_source = shift; |
536 |
$file_source = $dbh->{f_dir} . '/' .$file_source; |
537 |
$file_source =~ s#//#/#g; |
538 |
open(I,$file_source) || die "[$file_source]: $!\n"; |
539 |
local $/ = undef; |
540 |
my $data = <I>; |
541 |
close(I) || die "$file_source: $!\n"; |
542 |
return $data; |
543 |
} |
544 |
|
545 |
sub get_xml_db { |
546 |
# Hat tip to Randal Schwartz for the XML/LWP stuff |
547 |
my($dbh,$specs,$table_name) = @_; |
548 |
my $remote_source = $specs->{remote_source} || ''; |
549 |
my $file_source = $specs->{file_source} || ''; |
550 |
my $data_source = $specs->{data_source} || ''; |
551 |
my $record_tag = $specs->{record_tag} || ''; |
552 |
my $col_tags = $specs->{col_tags} || ''; |
553 |
my $fold_col = $specs->{fold_col} || ''; |
554 |
my $col_mapping = $specs->{col_mapping} || ''; |
555 |
my $col_names = $specs->{col_names} || ''; |
556 |
my $read_sub = $specs->{read_sub} || ''; |
557 |
my $attr = $specs->{attr} || ''; |
558 |
my $data; |
559 |
my @columns; |
560 |
if (ref $col_names ne 'ARRAY') { $col_names = [split ',',$col_names]; } |
561 |
for ( @{$col_names} ) { |
562 |
if ($_ =~ /^\[(.*)\]$/ ) { |
563 |
my @newCols = split ',', $1; |
564 |
for (@newCols) { push @columns, $_; } |
565 |
} |
566 |
else { |
567 |
push @columns, $_; |
568 |
} |
569 |
} |
570 |
my $colstr; |
571 |
for ( @columns ) { $colstr .= $_ . ' TEXT,'; } |
572 |
$colstr =~ s/,$//; |
573 |
my $sql = "CREATE TABLE $table_name ($colstr)"; |
574 |
$dbh->do($sql) || die DBI::errstr, " : $sql"; |
575 |
$DBD::RAM::ramdata->{$table_name}->{data_type} = 'XML'; |
576 |
if ($remote_source){$data = $dbh->func($remote_source,'get_remote_data') or die; } |
577 |
if ($file_source) { $data = &get_file_data($dbh,$file_source); } |
578 |
if ($data_source) { $data = $data_source; } |
579 |
die "No file or data source supplied!" unless $data; |
580 |
my $insert = $dbh->prepare("INSERT INTO $table_name (". |
581 |
(join ", ", @columns). |
582 |
") VALUES (". |
583 |
(join ",", ("?") x @columns).")"); |
584 |
My_XML_Parser::doParse($data, $insert, $record_tag, |
585 |
$col_names, $col_mapping,$fold_col,$attr,$read_sub); |
586 |
#use Data::Dumper; print Dumper $DBD::RAM::ramdata; exit; |
587 |
} |
588 |
|
589 |
sub read_fields { |
590 |
my $dbh = shift; |
591 |
my $str = shift; |
592 |
my $tname = shift; |
593 |
my $type = uc shift; |
594 |
my $catalog = $dbh->func($tname,'get_catalog'); |
595 |
if ($type eq 'ARRAY') { |
596 |
return @{$str}; |
597 |
} |
598 |
chomp $str; |
599 |
if ($type eq 'CSV') { |
600 |
my $sep_char = $catalog->{sep_char} || ','; |
601 |
#my @fields = Text::ParseWords::parse_line( $sep_char, 0, $str ); |
602 |
my @fields = &csv2ary( $sep_char, $str ); |
603 |
return @fields; |
604 |
} |
605 |
if ($type eq 'USR') { |
606 |
my $read_sub = $catalog->{read_sub} || die "USR Type requires read_sub routine!\n"; |
607 |
return &$read_sub($str); |
608 |
} |
609 |
if ($type eq 'FIX') { |
610 |
return unpack $catalog->{pattern}, $str; |
611 |
} |
612 |
if ($type eq 'INI') { |
613 |
if ( $str =~ /^([^=]+)=(.*)/ ) { |
614 |
my @fields = ($1,$2); |
615 |
return @fields; |
616 |
} |
617 |
} |
618 |
if ($type eq 'XML') { |
619 |
my @fields; |
620 |
$str =~ s#<[^>]*>([^<]*)<[^>]*># |
621 |
my $x = $1 || ''; |
622 |
push @fields, $x; |
623 |
#ge; |
624 |
return @fields; |
625 |
} |
626 |
return (); |
627 |
} |
628 |
|
629 |
sub ary2csv { |
630 |
my($field_sep,$record_sep,@ary)=@_; |
631 |
my $field_rsep = quotemeta($field_sep); |
632 |
my $str=''; |
633 |
for (@ary) { |
634 |
$_ = '' if !defined $_; |
635 |
if ($field_sep eq ',') { |
636 |
s/"/""/g; |
637 |
s/^(.*)$/"$1"/s if /,/ or /\n/s or /"/; |
638 |
} |
639 |
$str .= $_ . $field_sep; |
640 |
} |
641 |
$str =~ s/$field_rsep$/$record_sep/; |
642 |
return $str; |
643 |
} |
644 |
|
645 |
sub csv2ary { |
646 |
my($field_sep,$str)=@_; |
647 |
# chomp $str; |
648 |
#$str =~ s/[\015\012]//g; |
649 |
$str =~ tr/\015\012//d; |
650 |
if ($field_sep ne ',') { |
651 |
#$field_sep = quotemeta($field_sep); LEFT UP TO USER TO DO |
652 |
return split($field_sep, $str); |
653 |
} |
654 |
$str =~ s/""/\\"/g; |
655 |
my @new = (); |
656 |
push(@new, $+ ) while $str =~ m{ |
657 |
"([^\"\\]*(?:\\.[^\"\\]*)*)"$field_sep? |
658 |
| ([^$field_sep]+)$field_sep? |
659 |
| $field_sep |
660 |
}gx; |
661 |
push(@new, undef) if substr($str,-1,1) eq $field_sep; |
662 |
@new = map {my $x=$_; $x = '' if !defined $x; $x =~ s/\\"/"/g; $x;} @new; |
663 |
return @new; |
664 |
} |
665 |
|
666 |
sub write_fields { |
667 |
my($dbh,$fields,$tname,$type) = @_; |
668 |
my $catalog = $dbh->func($tname,'get_catalog'); |
669 |
my $sep = $catalog->{sep_char} || ','; |
670 |
my $wsep = $catalog->{wsep_char} || $sep; |
671 |
my $fieldNum =0; |
672 |
my $fieldStr = $catalog->{pattern} || ''; |
673 |
$fieldStr =~ s/a//gi; |
674 |
my @fieldLengths = split / /, $fieldStr; |
675 |
$fieldStr = ''; |
676 |
if( $catalog->{f_type} eq 'USR' ) { |
677 |
my $write_sub = $catalog->{write_sub} || die "Requires write_sub!\n"; |
678 |
my $fieldStr = &$write_sub(@{$fields}); |
679 |
return $fieldStr; |
680 |
} |
681 |
if( $catalog->{f_type} eq 'XML' ) { |
682 |
my @col_names = split ',',$catalog->{col_names}; |
683 |
my $count =0; |
684 |
for (@col_names) { |
685 |
$fieldStr .= "<$_>$fields->[$count]</$_>"; |
686 |
$count++; |
687 |
} |
688 |
return $fieldStr; |
689 |
} |
690 |
for(@$fields) { |
691 |
# PAD OR TRUNCATE DATA TO FIT WITHIN FIELD LENGTHS |
692 |
if( $catalog->{f_type} eq 'FIX' ) { |
693 |
my $oldLen = length $_; |
694 |
my $newLen = $fieldLengths[$fieldNum]; |
695 |
if ($oldLen < $newLen) { $_ = sprintf "%-${newLen}s",$_; } |
696 |
if ($oldLen > $newLen) { $_ = substr $_, 0, $newLen; } |
697 |
$fieldNum++; |
698 |
} |
699 |
my $newCol = $_; |
700 |
if( $catalog->{f_type} eq 'CSV' ) { |
701 |
if ($newCol =~ /$sep/ ) { |
702 |
$newCol =~ s/\042/\\\042/go; |
703 |
$newCol = qq{"$newCol"}; |
704 |
} |
705 |
$fieldStr .= $newCol . $wsep; |
706 |
} |
707 |
else { $fieldStr .= $newCol; } |
708 |
if( $catalog->{f_type} eq 'INI' ) { $fieldStr .= '='; } |
709 |
} |
710 |
if( $catalog->{f_type} eq 'CSV' ) { $fieldStr =~ s/$sep$//; } |
711 |
if( $catalog->{f_type} eq 'INI' ) { $fieldStr =~ s/=$//; } |
712 |
return $fieldStr; |
713 |
} |
714 |
|
715 |
sub get_music_library { |
716 |
my $specs = shift; |
717 |
my @dirs = @{$specs->{dirs}}; |
718 |
my @db; |
719 |
for my $dir(@dirs) { |
720 |
my @files = get_music_dir( $dir ); |
721 |
for my $fname(@files) { |
722 |
push @db, &get_mp3_tag($fname) |
723 |
} |
724 |
} |
725 |
return \@db; |
726 |
} |
727 |
|
728 |
sub get_music_dir { |
729 |
my $dir = shift; |
730 |
opendir(D,$dir) || print "$dir: $!\n"; |
731 |
return '' if $!; |
732 |
my @files = grep /mp3$/i, readdir D; |
733 |
@files = map ( $_ = $dir . $_, @files); |
734 |
closedir(D) || print "Couldn't read '$dir':$!"; |
735 |
return @files; |
736 |
} |
737 |
|
738 |
sub get_mp3_tag { |
739 |
my($file) = shift; |
740 |
open(I,$file) || return ''; |
741 |
binmode I; |
742 |
local $/ = ''; |
743 |
seek I, -128, 2; |
744 |
my $str = <I> || ''; |
745 |
return '' if !($str =~ /^TAG/); |
746 |
$file = sprintf("%-255s",$file); |
747 |
$str =~ s/^TAG(.*)/$file$1/; |
748 |
my $genre = $str; |
749 |
$genre =~ s/^.*(.)$/$1/g; |
750 |
$str =~ s/(.)$//g; |
751 |
$genre = unpack( 'C', $genre ); |
752 |
my @genres =("Blues", "Classic Rock", "Country", "Dance", "Disco", "Funk", "Grunge", "Hip-Hop", "Jazz", "Metal", "New Age", "Oldies", "Other", "Pop", "R&B", "Rap", "Reggae", "Rock", "Techno", "Industrial", "Alternative", "Ska", "Death Metal", "Pranks", "Soundtrack", "Eurotechno", "Ambient", "Trip-Hop", "Vocal", "Jazz+Funk", "Fusion", "Trance", "Classical", "Instrumental", "Acid", "House", "Game", "Sound Clip", "Gospel", "Noise", "Alternative Rock", "Bass", "Soul", "Punk", "Space", "Meditative", "Instrumental Pop", "Instrumental Rock", "Ethnic", "Gothic", "Darkwave", "Techno-Industrial", "Electronic", "Pop-Folk", "Eurodance", "Dream", "Southern Rock", "Comedy", "Cult", "Gangsta", "Top 40", "Christian Rap", "Pop/Funk", "Jungle", "Native American", "Cabaret", "New Wave", "Psychadelic", "Rave", "Show Tunes", "Trailer", "Lo-Fi", "Tribal", "Acid Punk", "Acid Jazz", "Polka", "Retro", "Musical", "Rock & Roll", "Hard Rock", "Folk", "Folk/Rock", "National Folk", "Swing", "Fast-Fusion", "Bebop", "Latin", "Revival", "Celtic", "Bluegrass", "Avantgarde", "Gothic Rock", "Progressive Rock", "Psychedelic Rock", "Symphonic Rock", "Slow Rock", "Big Band", "Chorus", "Easy Listening", "Acoustic", "Humour", "Speech", "Chanson", "Opera", "Chamber Music", "Sonata", "Symphony", "Booty Bass", "Primus", "Porn Groove", "Satire", "Slow Jam", "Club", "Tango", "Samba", "Folklore", "Ballad", "Power Ballad", "Rhytmic Soul", "Freestyle", "Duet", "Punk Rock", "Drum Solo", "Acapella", "Euro-House", "Dance Hall", "Goa", "Drum & Bass", "Club-House", "Hardcore", "Terror", "Indie", "BritPop", "Negerpunk", "Polsk Punk", "Beat", "Christian Gangsta Rap", "Heavy Metal", "Black Metal", "Crossover", "Contemporary Christian", "Christian Rock", "Unknown"); |
753 |
$genre = $genres[$genre] || ''; |
754 |
$str .= $genre . "\n"; |
755 |
return $str; |
756 |
} |
757 |
|
758 |
|
759 |
# END OF DRIVER PRIVATE METHODS |
760 |
|
761 |
sub table_info ($) { |
762 |
my($dbh) = @_; |
763 |
my @tables; |
764 |
for (keys %{$DBD::RAM::ramdata} ) { |
765 |
push(@tables, [undef, undef, $_, "TABLE", undef]); |
766 |
} |
767 |
my $names = ['TABLE_QUALIFIER', 'TABLE_OWNER', 'TABLE_NAME', |
768 |
'TABLE_TYPE', 'REMARKS']; |
769 |
my $dbh2 = $dbh->{'csv_sponge_driver'}; |
770 |
if (!$dbh2) { |
771 |
$dbh2 = $dbh->{'csv_sponge_driver'} = DBI->connect("DBI:Sponge:"); |
772 |
if (!$dbh2) { |
773 |
DBI::set_err($dbh, 1, $DBI::errstr); |
774 |
return undef; |
775 |
} |
776 |
} |
777 |
|
778 |
# Temporary kludge: DBD::Sponge dies if @tables is empty. :-( |
779 |
return undef if !@tables; |
780 |
|
781 |
my $sth = $dbh2->prepare("TABLE_INFO", { 'rows' => \@tables, |
782 |
'NAMES' => $names }); |
783 |
if (!$sth) { |
784 |
DBI::set_err($dbh, 1, $dbh2->errstr()); |
785 |
} |
786 |
$sth; |
787 |
} |
788 |
|
789 |
sub DESTROY { $DBD::RAM::ramdata = {};} |
790 |
|
791 |
package DBD::RAM::st; # ====== STATEMENT ====== |
792 |
|
793 |
$DBD::RAM::st::imp_data_size = 0; |
794 |
@DBD::RAM::st::ISA = qw(DBD::File::st); |
795 |
|
796 |
|
797 |
package DBD::RAM::Statement; |
798 |
|
799 |
#@DBD::RAM::Statement::ISA = qw(SQL::Statement); |
800 |
@DBD::RAM::Statement::ISA = qw(SQL::Statement DBD::File::Statement); |
801 |
#@DBD::RAM::Statement::ISA = qw(DBD::File::Statement); |
802 |
|
803 |
sub open_table ($$$$$) { |
804 |
my($self, $data, $tname, $createMode, $lockMode) = @_; |
805 |
my($table); |
806 |
my $dbh = $data->{Database}; |
807 |
my $catalog = $dbh->func($tname,'get_catalog'); |
808 |
my $ftype = $catalog->{f_type} || ''; |
809 |
if( !$catalog->{f_type} || $catalog->{f_type} eq 'RAM' ) { |
810 |
if ($createMode && !($DBD::RAM::ramdata->{$tname}) ) { |
811 |
if (exists($data->{$tname})) { |
812 |
die "A table $tname already exists"; |
813 |
} |
814 |
$table = $data->{$tname} = { 'DATA' => [], |
815 |
'CURRENT_ROW' => 0, |
816 |
'NAME' => $tname, |
817 |
}; |
818 |
bless($table, ref($self) . "::Table"); |
819 |
$DBD::RAM::ramdata->{$tname} = $table; |
820 |
return $table; |
821 |
} |
822 |
else { |
823 |
$table = $DBD::RAM::ramdata->{$tname}; |
824 |
die "No such table $tname" unless $table; |
825 |
$table->{'CURRENT_ROW'} = 0; |
826 |
return $table; |
827 |
} |
828 |
} |
829 |
else { |
830 |
my $file_name = $catalog->{f_name} || $tname; |
831 |
$table = $self->SUPER::open_table( |
832 |
$data, $file_name, $createMode, $lockMode |
833 |
); |
834 |
my $fh = $table->{'fh'}; |
835 |
my $col_names = $catalog->{col_names} || ''; |
836 |
my @col_names = (); |
837 |
if (!$createMode) { |
838 |
my $first_line = $fh->getline || ''; |
839 |
#$first_line =~ s/[\015\012]//g; |
840 |
$first_line =~ tr/\015\012//d; |
841 |
@col_names = $dbh->func( |
842 |
$tname,$ftype,$col_names,$first_line, |
843 |
'get_column_names'); |
844 |
} |
845 |
if ($col_names eq 'first_line' && !$createMode) { |
846 |
$table->{first_row_pos} = $fh->tell(); |
847 |
} |
848 |
else { |
849 |
seek $fh,0,0; |
850 |
} |
851 |
my $count = 0; |
852 |
my %col_nums; |
853 |
for (@col_names) { next unless $_; $col_nums{$_} = $count; $count++; } |
854 |
$table->{col_names} = \@col_names; |
855 |
$table->{col_nums} = \%col_nums; |
856 |
$table->{'CURRENT_ROW'} = 0; |
857 |
$table->{NAME} = $tname; |
858 |
$table; |
859 |
} |
860 |
} |
861 |
|
862 |
package DBD::RAM::Statement::Table; |
863 |
|
864 |
@DBD::RAM::Statement::Table::ISA = qw(DBD::RAM::Table); |
865 |
|
866 |
package DBD::RAM::Table; |
867 |
|
868 |
#@DBD::RAM::Table::ISA = qw(SQL::Eval::Table); |
869 |
#@DBD::RAM::Statement::Table::ISA = qw(SQL::Eval::Table DBD::File::Table); |
870 |
use base qw(DBD::File::Table); |
871 |
|
872 |
################################## |
873 |
# fetch_row() |
874 |
# CALLED WITH "SELECT ... FETCH" |
875 |
################################## |
876 |
sub fetch_row ($$$) { |
877 |
my($self, $data, $row) = @_; |
878 |
my $dbh = $data->{Database}; |
879 |
my $tname = $self->{NAME}; |
880 |
my $catalog = $dbh->func($tname,'get_catalog'); |
881 |
if( !$catalog->{f_type} || $catalog->{f_type} eq 'RAM' ) { |
882 |
my($currentRow) = $self->{'CURRENT_ROW'}; |
883 |
if ($currentRow >= @{$self->{'DATA'}}) { |
884 |
return undef; |
885 |
} |
886 |
$self->{'CURRENT_ROW'} = $currentRow+1; |
887 |
$self->{'row'} = $self->{'DATA'}->[$currentRow]; |
888 |
return $self->{row}; |
889 |
} |
890 |
else { |
891 |
my $fields; |
892 |
if (exists($self->{cached_row})) { |
893 |
$fields = delete($self->{cached_row}); |
894 |
} else { |
895 |
local $/ = $catalog->{eol} || "\n"; |
896 |
#$fields = $csv->getline($self->{'fh'}); |
897 |
my $fh = $self->{'fh'} ; |
898 |
my $line = $fh->getline || return undef; |
899 |
chomp $line; |
900 |
@$fields = $dbh->func($line,$tname,$catalog->{f_type},'read_fields'); |
901 |
# @$fields = unpack $dbh->{pattern}, $line; |
902 |
if ( $dbh->{ChopBlanks} ) { |
903 |
@$fields = map($_=&trim($_),@$fields); |
904 |
} |
905 |
if (!$fields ) { |
906 |
die "Error while reading file " . $self->{'file'} . ": $!" if $!; |
907 |
return undef; |
908 |
} |
909 |
} |
910 |
$self->{row} = (@$fields ? $fields : undef); |
911 |
} |
912 |
return $self->{row}; |
913 |
} |
914 |
|
915 |
sub trim { my $x=shift; $x =~ s/^\s+//; $x =~ s/\s+$//; $x; } |
916 |
|
917 |
############################## |
918 |
# push_names() |
919 |
# CALLED WITH "CREATE TABLE" |
920 |
############################## |
921 |
sub push_names ($$$) { |
922 |
my($self, $data, $names) = @_; |
923 |
my $dbh = $data->{Database}; |
924 |
my $tname = $self->{NAME}; |
925 |
my $catalog = $dbh->func($tname,'get_catalog'); |
926 |
if( !$catalog->{f_type} || $catalog->{f_type} eq 'RAM' ) { |
927 |
$self->{'col_names'} = $names; |
928 |
my($colNums) = {}; |
929 |
for (my $i = 0; $i < @$names; $i++) { |
930 |
$colNums->{$names->[$i]} = $i; |
931 |
} |
932 |
$self->{'col_nums'} = $colNums; |
933 |
} |
934 |
elsif(!$catalog->{col_names}) { |
935 |
my $fh = $self->{'fh'} ; |
936 |
my $colStr=$dbh->func($names,$tname,$catalog->{f_type},'write_fields'); |
937 |
$colStr .= $catalog->{eol}; |
938 |
$fh->print($colStr); |
939 |
} |
940 |
} |
941 |
|
942 |
################################ |
943 |
# push_rows() |
944 |
# CALLED WITH "INSERT" & UPDATE |
945 |
################################ |
946 |
sub push_row ($$$) { |
947 |
my($self, $data, $fields) = @_; |
948 |
my $dbh = $data->{Database}; |
949 |
my $tname = $self->{NAME}; |
950 |
my $catalog = $dbh->func($tname,'get_catalog'); |
951 |
if( !$catalog->{f_type} || $catalog->{f_type} eq 'RAM' ) { |
952 |
my($currentRow) = $self->{'CURRENT_ROW'}; |
953 |
$self->{'CURRENT_ROW'} = $currentRow+1; |
954 |
$self->{'DATA'}->[$currentRow] = $fields; |
955 |
return 1; |
956 |
} |
957 |
my $fh = $self->{'fh'}; |
958 |
# |
959 |
# Remove undef from the right end of the fields, so that at least |
960 |
# in these cases undef is returned from FetchRow |
961 |
# |
962 |
while (@$fields && !defined($fields->[$#$fields])) { |
963 |
pop @$fields; |
964 |
} |
965 |
my $fieldStr=$dbh->func($fields,$tname,$catalog->{f_type},'write_fields'); |
966 |
$fh->print($fieldStr,$catalog->{eol}); |
967 |
1; |
968 |
} |
969 |
|
970 |
sub seek ($$$$) { |
971 |
my($self, $data, $pos, $whence) = @_; |
972 |
my $dbh = $data->{Database}; |
973 |
my $tname = $self->{NAME}; |
974 |
my $catalog = $dbh->func($tname,'get_catalog'); |
975 |
if( $catalog->{f_type} && $catalog->{f_type} ne 'RAM' ) { |
976 |
return DBD::File::Table::seek( |
977 |
$self, $data, $pos, $whence |
978 |
); |
979 |
} |
980 |
my($currentRow) = $self->{'CURRENT_ROW'}; |
981 |
if ($whence == 0) { |
982 |
$currentRow = $pos; |
983 |
} elsif ($whence == 1) { |
984 |
$currentRow += $pos; |
985 |
} elsif ($whence == 2) { |
986 |
$currentRow = @{$self->{'DATA'}} + $pos; |
987 |
} else { |
988 |
die $self . "->seek: Illegal whence argument ($whence)"; |
989 |
} |
990 |
if ($currentRow < 0) { |
991 |
die "Illegal row number: $currentRow"; |
992 |
} |
993 |
$self->{'CURRENT_ROW'} = $currentRow; |
994 |
} |
995 |
|
996 |
|
997 |
sub drop ($$) { |
998 |
my($self, $data) = @_; |
999 |
my $dbh = $data->{Database}; |
1000 |
my $tname = $self->{NAME}; |
1001 |
my $catalog = $dbh->func($tname,'get_catalog'); |
1002 |
if( !$catalog->{f_type} || $catalog->{f_type} eq 'RAM' ) { |
1003 |
my $table_name = $self->{NAME} || return; |
1004 |
delete $DBD::RAM::ramdata->{$table_name} |
1005 |
if $DBD::RAM::ramdata->{$table_name}; |
1006 |
delete $data->{$table_name} |
1007 |
if $data->{$table_name}; |
1008 |
return 1; |
1009 |
} |
1010 |
return DBD::File::Table::drop( $self ); |
1011 |
} |
1012 |
|
1013 |
################################## |
1014 |
# truncate() |
1015 |
# CALLED WITH "DELETE" & "UPDATE" |
1016 |
################################## |
1017 |
sub truncate ($$) { |
1018 |
my($self, $data) = @_; |
1019 |
my $dbh = $data->{Database}; |
1020 |
my $tname = $self->{NAME}; |
1021 |
my $catalog = $dbh->func($tname,'get_catalog'); |
1022 |
if( !$catalog->{f_type} || $catalog->{f_type} eq 'RAM' ) { |
1023 |
$#{$self->{'DATA'}} = $self->{'CURRENT_ROW'} - 1; |
1024 |
return 1; |
1025 |
} |
1026 |
return DBD::File::Table::truncate( $self, $data ); |
1027 |
} |
1028 |
|
1029 |
package My_XML_Parser; |
1030 |
my @state; |
1031 |
my %one_group_data; |
1032 |
my $insert_handle; |
1033 |
my $record_tag; |
1034 |
my @columns; |
1035 |
my %column_mapping; |
1036 |
my $multi_field_count; |
1037 |
my $fold_col; |
1038 |
my $fold; |
1039 |
my $fold_name; |
1040 |
my %folds; |
1041 |
my $read_sub; |
1042 |
|
1043 |
sub doParse { |
1044 |
my $data = shift; |
1045 |
$insert_handle = shift; |
1046 |
$record_tag = shift; |
1047 |
my $col_names = shift; |
1048 |
my $col_map = shift || ''; |
1049 |
$fold_col = shift || {}; |
1050 |
my $attributes = shift || {}; |
1051 |
$read_sub = shift || ''; |
1052 |
if ($read_sub eq 'latin1' && !$attributes->{ProtocolEncoding} ) { |
1053 |
$read_sub = \&utf8_to_latin1; |
1054 |
$attributes->{ProtocolEncoding} = 'ISO-8859-1'; |
1055 |
} |
1056 |
@columns = @{$col_names}; |
1057 |
if ($col_map) { %column_mapping = %{$col_map}; } |
1058 |
else {%column_mapping = map{ $_ => $_ } @columns; } |
1059 |
undef $@; |
1060 |
eval{ require 'XML/Parser.pm' }; |
1061 |
die "XML::Parser module not found! $@" if $@; |
1062 |
XML::Parser->new( Style => 'Stream', %{$attributes} )->parse($data); |
1063 |
} |
1064 |
|
1065 |
sub StartTag { |
1066 |
my ($parser, $type) = @_; |
1067 |
my %attrs = %_; |
1068 |
push @state, $type; |
1069 |
if ("@state" eq $record_tag) { |
1070 |
%one_group_data = (); |
1071 |
$multi_field_count = 0; |
1072 |
while (my($k,$v)=each %folds) { |
1073 |
$one_group_data{$k} = $v if $v; |
1074 |
} |
1075 |
} |
1076 |
for (keys %{$fold_col}) { |
1077 |
my $state = "@state"; |
1078 |
next unless $_ =~ /^$state\^*/; |
1079 |
my $fold_tag = $fold_col->{$_} if $fold_col && $fold_col->{$_}; |
1080 |
if ( $fold_tag ) { |
1081 |
$fold_name = $column_mapping{$fold_tag} if $column_mapping{$fold_tag}; |
1082 |
$fold_name ||= $fold_tag; |
1083 |
$fold = $attrs{$fold_tag} || ''; |
1084 |
$folds{$fold_name} = $fold; |
1085 |
} |
1086 |
} |
1087 |
for (keys %attrs) { |
1088 |
my $place = $column_mapping{$_}; |
1089 |
if (defined $place) { |
1090 |
$one_group_data{$place} .= " " if $one_group_data{$place}; |
1091 |
$one_group_data{$place} .= &check_read( $attrs{$_} ); |
1092 |
} |
1093 |
} |
1094 |
} |
1095 |
|
1096 |
sub EndTag { |
1097 |
my ($parser, $type) = @_; |
1098 |
my $tag = "@state"; |
1099 |
$tag =~ s/^$record_tag\s*//; |
1100 |
my $column = $column_mapping{$tag}; |
1101 |
if (ref $column eq 'ARRAY') { |
1102 |
$multi_field_count++; |
1103 |
} |
1104 |
if ("@state" eq $record_tag) { |
1105 |
$insert_handle->execute(@one_group_data{@columns}); |
1106 |
} |
1107 |
pop @state; |
1108 |
} |
1109 |
|
1110 |
sub Text { |
1111 |
my $tag = "@state"; |
1112 |
$tag =~ s/^$record_tag\s*//; |
1113 |
my $column = $column_mapping{$tag}; |
1114 |
if (ref $column eq 'ARRAY') { |
1115 |
$one_group_data{$column->[$multi_field_count]} .= &check_read($_); |
1116 |
return; |
1117 |
} |
1118 |
if (defined $column) { |
1119 |
$one_group_data{$column} .= " " if $one_group_data{$column}; |
1120 |
$one_group_data{$column} .= &check_read($_); |
1121 |
} |
1122 |
} |
1123 |
|
1124 |
sub check_read { |
1125 |
my $x = shift; |
1126 |
$read_sub |
1127 |
? return &$read_sub($x) |
1128 |
: return $x; |
1129 |
} |
1130 |
|
1131 |
sub utf8_to_latin1 { |
1132 |
local $_ = shift; |
1133 |
s/([\xC0-\xDF])([\x80-\xBF]) |
1134 |
/chr(ord($1)<<6&0xC0|ord($2)&0x3F) |
1135 |
/egx; |
1136 |
return $_; |
1137 |
} |
1138 |
|
1139 |
############################################################################ |
1140 |
1; |
1141 |
__END__ |
1142 |
|
1143 |
|
1144 |
=head1 NAME |
1145 |
|
1146 |
DBD::RAM - a DBI driver for files and data structures |
1147 |
|
1148 |
=head1 SYNOPSIS |
1149 |
|
1150 |
use DBI; |
1151 |
my $dbh = DBI->connect('DBI:RAM:','usr','pwd',{RaiseError=>1}); |
1152 |
$dbh->func({ |
1153 |
table_name => 'my_phrases', |
1154 |
col_names => 'id,phrase', |
1155 |
data_type => 'PIPE', |
1156 |
data_source => [<DATA>], |
1157 |
}, 'import' ); |
1158 |
print $dbh->selectcol_arrayref(qq[ |
1159 |
SELECT phrase FROM my_phrases WHERE id = 1 |
1160 |
])->[0]; |
1161 |
__END__ |
1162 |
1 | Hello, New World |
1163 |
2 | Some other Phrase |
1164 |
|
1165 |
This sample creates a database table from data, uses SQL to make a |
1166 |
selection from the database and prints out the results. While this |
1167 |
table is in-memory only and uses pipe "delimited" formating, many |
1168 |
other options are available including local and remote file access and |
1169 |
many different data formats. |
1170 |
|
1171 |
|
1172 |
=head1 DESCRIPTION |
1173 |
|
1174 |
DBD::RAM allows you to import almost any type of Perl data |
1175 |
structure into an in-memory table and then use DBI and SQL |
1176 |
to access and modify it. It also allows direct access to |
1177 |
almost any kind of file, supporting SQL manipulation |
1178 |
of the file without converting the file out of its native |
1179 |
format. |
1180 |
|
1181 |
The module allows you to prototype a database without having an rdbms |
1182 |
system or other database engine and can operate either with or without |
1183 |
creating or reading disk files. If you do use disk files, they may, |
1184 |
in most cases, either be local files or any remote file accessible via |
1185 |
HTTP or FTP. |
1186 |
|
1187 |
=head1 OVERVIEW |
1188 |
|
1189 |
This modules allows you to work with a variety of data formats and to |
1190 |
treat them like standard DBI/SQL accessible databases. Currently |
1191 |
supported formats include: |
1192 |
|
1193 |
FORMATS: |
1194 |
|
1195 |
XML Extended Markup Language (XML) |
1196 |
FIXED fixed-width records |
1197 |
INI name=value pairs |
1198 |
PIPE pipe "delimited" text |
1199 |
TAB tab "delimited" text |
1200 |
CSV Comma Separated Values or other "delimited" text |
1201 |
MP3 MP3 music binary files |
1202 |
ARRAY Perl array |
1203 |
HASH Perl associative array |
1204 |
DBI DBI database connection |
1205 |
USR user defined formats |
1206 |
|
1207 |
The data you use may come form several kinds of sources: |
1208 |
|
1209 |
SOURCES |
1210 |
|
1211 |
DATA Perl data structures: strings, arrays, hashes |
1212 |
LOCAL FILE a file stored on your local computer hard disk |
1213 |
REMOTE FILE a remote file accessible via HTTP or FTP |
1214 |
|
1215 |
If you modify the data in a table, the modifications may be stored in |
1216 |
several ways. The storage can be temporary, i.e. in memory only with |
1217 |
no disk storage. Or several modifications can be done in memory and |
1218 |
then stored to disk once at the end of the processing. Or |
1219 |
modifications can be stored to disk continuously, similarly to the way |
1220 |
other DBDs operate. |
1221 |
|
1222 |
STORAGE |
1223 |
|
1224 |
RAM in-memory processing only, no storage |
1225 |
ONE-TIME processed in memory, stored to disk on command |
1226 |
CONTINUOUS all modifications stored to disk as they occur |
1227 |
|
1228 |
Here is a summary of the SOURCES, FORMATS, and STORAGE capabilities of |
1229 |
DBD::RAM. (x = currently supported, - = notsupported, * = support in |
1230 |
progress) |
1231 |
|
1232 |
FORMAT |
1233 |
CSV PIPE TAB FIXED INI XML MP3 ARRAY HASH DBI USR |
1234 |
INPUT |
1235 |
array/hash/string x x x x x x - x x - x |
1236 |
local file x x x x x x x - - x x |
1237 |
remote file x x x x x x * - - * x |
1238 |
OUTPUT |
1239 |
ram table x x x x x x x x x x x |
1240 |
file (1-time) x x x x x x - - - * * |
1241 |
file (continuous) x x x x x * - - - x * |
1242 |
|
1243 |
Please note that any ram table, regardless of original source can be |
1244 |
stored in any of the supported file output formats. So, for example, |
1245 |
a table of MP3 information could be stored as a CSV file, the "-" in |
1246 |
the MP3 column only indicates that the information from the MP3 table |
1247 |
can not (for obvious reasons) be written back to an MP3 file. |
1248 |
|
1249 |
=head1 INSTALLATION & PREREQUISITES |
1250 |
|
1251 |
This module should work on any any platform that DBI works on. |
1252 |
|
1253 |
You don't need an external SQL engine or a running server, or a |
1254 |
compiler. All you need are Perl itself and installed versions of DBI |
1255 |
and SQL::Statement. If you do not also have DBD::CSV installed you |
1256 |
will need to either install it, or simply copy File.pm into your DBD |
1257 |
directory. |
1258 |
|
1259 |
You can either use the standard makefile method, or just copy RAM.pm |
1260 |
into your DBD directory. |
1261 |
|
1262 |
Some features require installation of extra modules. If you wish to |
1263 |
work with the XML format, you will need to install XML::Parser. If |
1264 |
you wish to use the ability to work with remote files, you will need |
1265 |
to install the LWP (libnet) modules. Other features of DBD::RAM work |
1266 |
fine without these additional modules. |
1267 |
|
1268 |
=head1 SQL & DBI |
1269 |
|
1270 |
This module, like other DBD database drivers, works with the DBI |
1271 |
methods listed in DBI.pm and its documentation. Please see the DBI |
1272 |
documentation for details of methods such as connecting, preparing, |
1273 |
executing, and fetching data. Currently only a limited subset of SQL |
1274 |
commands are supported. Here is a brief synopsis, please see the |
1275 |
documentation for SQL::Statement for a more comple description of |
1276 |
these commands. |
1277 |
|
1278 |
CREATE TABLE $table |
1279 |
( $col1 $type1, ..., $colN $typeN, |
1280 |
[ PRIMARY KEY ($col1, ... $colM) ] ) |
1281 |
|
1282 |
DROP TABLE $table |
1283 |
|
1284 |
INSERT INTO $table |
1285 |
[ ( $col1, ..., $colN ) ] |
1286 |
VALUES ( $val1, ... $valN ) |
1287 |
|
1288 |
DELETE FROM $table |
1289 |
[ WHERE $wclause ] |
1290 |
|
1291 |
UPDATE $table |
1292 |
SET $col1 = $val1, ... $colN = $valN |
1293 |
[ WHERE $wclause ] |
1294 |
|
1295 |
SELECT [DISTINCT] $col1, ... $colN |
1296 |
FROM $table |
1297 |
[ WHERE $wclause ] |
1298 |
[ ORDER BY $ocol1 [ASC|DESC], ... $ocolM [ASC|DESC] ] |
1299 |
|
1300 |
$wclause [NOT] $col $op $val|$col |
1301 |
[ AND|OR $wclause2 ... AND|OR $wclauseN ] |
1302 |
|
1303 |
$op = | <> | < | > | <= | >= |
1304 |
| IS NULL | IS NOT NULL | LIKE | CLIKE |
1305 |
|
1306 |
|
1307 |
=head1 WORKING WITH FILES & TABLES: |
1308 |
|
1309 |
This module supports working with both in-memory and disk-based databases. In order to allow quick testing and prototyping, the default behavior is for tables to be created in-memory from in-memory data but it is easy to change this behavior so that tables can also be created, manipulated, and stored on disk or so that there is a combination of in-memory and disk-based manipulation. |
1310 |
|
1311 |
There are three methods unique to the DBD::RAM module to allow you to specify which mode of operation you use for each table or operation: |
1312 |
|
1313 |
1) import() imports data either from memory or from a file into an |
1314 |
in-memory table |
1315 |
|
1316 |
2) export() exports data from an in-memory table to a file regardless of |
1317 |
the original source of the data |
1318 |
|
1319 |
3) catalog() sets up an association between a file name and a table name |
1320 |
such that all operations on the table are done continuously |
1321 |
on the file |
1322 |
|
1323 |
With the import() method, standard DBI/SQL commands like select, |
1324 |
update, delete, etc. apply only to the data that is in-memory. If you |
1325 |
want to save the modifications to a file, you must explcitly call |
1326 |
export() after making the changes. |
1327 |
|
1328 |
On the other hand, the catalog() method sets up an association between |
1329 |
a file and a tablename such that all DBI/SQL commands operate on the |
1330 |
file continuously without needing to explicitly call export(). This |
1331 |
method of operation is similar to other DBD drivers. |
1332 |
|
1333 |
Here is a rough diagram of how the three methods operate: |
1334 |
|
1335 |
disk -> import() -> RAM |
1336 |
|
1337 |
select |
1338 |
update |
1339 |
delete |
1340 |
insert |
1341 |
(multiple times) |
1342 |
|
1343 |
disk <- export() <- RAM |
1344 |
|
1345 |
catlog() |
1346 |
disk <-> select |
1347 |
disk <-> update |
1348 |
disk <-> delete |
1349 |
disk <-> insert |
1350 |
|
1351 |
Regardless of which method is chosen, the same set of DBI and SQL commands may be applied to all tables. |
1352 |
|
1353 |
See below for details of import(), export() and catalog() and for |
1354 |
specifics of naming files and directories. |
1355 |
|
1356 |
=head2 Creating in-memory tables from data and files: import() |
1357 |
|
1358 |
In-memory tables may be created using standard CREATE/INSERT |
1359 |
statements, or using the DBD::RAM specific import method: |
1360 |
|
1361 |
$dbh->func( $args, 'import' ); |
1362 |
|
1363 |
The $args parameter is a hashref which can contain several keys, most |
1364 |
of which are optional and/or which contain common defaults. |
1365 |
|
1366 |
These keys can either be specified or left to default behaviour: |
1367 |
|
1368 |
table_name string: name of the table |
1369 |
col_names string: column names for the table |
1370 |
data_type string: format of the data (e.g. XML, CSV...) |
1371 |
|
1372 |
The table_name key to the import() method is either a string, or if |
1373 |
it is omitted, a default table name will be automatically supplied, |
1374 |
starting at table1, then table2, etc. |
1375 |
|
1376 |
table_name => 'my_test_db', |
1377 |
|
1378 |
OR simply omit the table_names key |
1379 |
|
1380 |
If the col_names key to the import() method is omitted, the column |
1381 |
names will be automatically supplied, starting at col1, then col2, |
1382 |
etc. If the col_names key is the string 'first_line', the column |
1383 |
names will be taken from the first line of the data. If the col_names |
1384 |
key is a comma separated list of column names, those will be taken (in |
1385 |
order) as the names of the columns. |
1386 |
|
1387 |
col_names => 'first_line', |
1388 |
|
1389 |
OR col_names => 'name,address,phone', |
1390 |
|
1391 |
OR simply omit the col_names key |
1392 |
|
1393 |
If table_name or col_names are specified, they must comply with SQL |
1394 |
naming rules for identifiers: start with an alphabetic character; |
1395 |
contain nothing but alphabetic characters, numbers, and/or |
1396 |
underscores; be less than 128 characters long; not be the same as a |
1397 |
SQL reserved keyword. If the table refers to a file that has a period |
1398 |
in its name (e.g. my_data.csv), this can be handled with the catalog() |
1399 |
method, see below. |
1400 |
|
1401 |
The table_name and col_names, if specified, *are* case sensititive, so |
1402 |
that "my_test_db" is not the same as "my_TEST_db". |
1403 |
|
1404 |
The data_type key to the import() method specifies the format of the |
1405 |
data as already described above in the general description. It must |
1406 |
be one of: |
1407 |
|
1408 |
data_type => 'CSV', |
1409 |
data_type => 'TAB', |
1410 |
data_type => 'PIPE', |
1411 |
data_type => 'INI', |
1412 |
data_type => 'FIXED', |
1413 |
data_type => 'XML', |
1414 |
data_type => 'MP3', |
1415 |
data_type => 'DBI', |
1416 |
data_type => 'USR', |
1417 |
data_type => 'ARRAY', |
1418 |
data_type => 'HASH', |
1419 |
|
1420 |
OR simply omit the data_type key |
1421 |
|
1422 |
If no data_type key is supplied, the default format CSV will be used. |
1423 |
|
1424 |
The import() keys must specify a source of the data for the table, |
1425 |
which can be any of: |
1426 |
|
1427 |
file_source string: name of local file to get data from |
1428 |
remote_source string: url of remote file to get data from |
1429 |
data_source string or arrayref: the actual data |
1430 |
|
1431 |
The file_source key is the name of local file. It's location will be |
1432 |
taken to be relative to the f_dir specified in the database |
1433 |
connection, see the connect() method above. Whether or not the file |
1434 |
name is case sensitive depends on the operating system the script is |
1435 |
running on e.g. on Windows case is ignored and on UNIX it is not |
1436 |
ignored. For maximum portability, it is safest to assume that case |
1437 |
matters. |
1438 |
|
1439 |
file_source => 'my_test_db.csv' |
1440 |
|
1441 |
The remote_source key is a URL (Uniform Resource Locator) to a file |
1442 |
located on some other computer. It may be any kind of URL that is |
1443 |
supported by the LWP module includding http and FTP. If username and |
1444 |
password are required, they can be included in the URL. |
1445 |
|
1446 |
remote_source => 'http://myhost.com/mypath/myfile.myext' |
1447 |
|
1448 |
OR remote_source => 'ftp://user:password@myhost.com/mypath/myfile.myext' |
1449 |
|
1450 |
The data_source key to the import() tag contains the actual data for |
1451 |
the table. in cases where the data comes from the Perl script itself, |
1452 |
rather than from a file. The method of specifying the data_source |
1453 |
depends entirely on the format of the data_type. For example with |
1454 |
data_type of XML or CSV, the data_source is a string in XML or CSV |
1455 |
format but with data_type ARRAY, the data_source is a reference to an |
1456 |
array of arrayrefs. See below under each data_type for details. |
1457 |
|
1458 |
The following keys to the import() method apply only to specific data |
1459 |
formats, see the sections on the specific formats (listed in parens) |
1460 |
for details: |
1461 |
|
1462 |
pattern (FIXED only) |
1463 |
sep_char (CSV only) |
1464 |
eol (CSV only) |
1465 |
read_sub (USR and XML only) |
1466 |
attr (XML only) |
1467 |
record_tag (XML only) |
1468 |
fold_col (XML only) |
1469 |
col_mapping (XML only) |
1470 |
dirs (MP3 only) |
1471 |
|
1472 |
|
1473 |
=head2 Saving in-memory tables to disk: export() |
1474 |
|
1475 |
The export() method creates a file from an in-memory table. It takes |
1476 |
a very similar set of keys as does the import() method. The data_type |
1477 |
key specifies the format of the file to be created (CSV, PIPE, TAB, |
1478 |
XML, FIXED-WIDTH, etc.). The same set of specifiers available for the |
1479 |
import method for these various formats are also available |
1480 |
(e.g. sep_char to set the field separator for CSV files, or pattern to |
1481 |
set the fixed-width pattern). |
1482 |
|
1483 |
The data_source key for the export() method is a SQL select statement |
1484 |
in relation to whatever in-memory table is chosen to export. The |
1485 |
data_target key specifies the name of the file to put the results in. |
1486 |
Here is an example: |
1487 |
|
1488 |
$dbh->func( { |
1489 |
data_source => 'SELECT * FROM table1', |
1490 |
data_target => 'my_db.fix', |
1491 |
data_type => 'FIXED', |
1492 |
pattern => 'A2 A19', |
1493 |
},'export' ); |
1494 |
|
1495 |
That statement creates a fixed-width record database file called |
1496 |
"my_db.fix" and puts the entire contents of table1 into the file using |
1497 |
the specified field widths and whatever column names alread exist in |
1498 |
table1. |
1499 |
|
1500 |
See specific data formats below for details related to the export() method. |
1501 |
|
1502 |
=head2 Continuous file access: catalog() |
1503 |
|
1504 |
The catalog() method creates an association between a specific table |
1505 |
name, a specific data type, and a specific file name. You can create |
1506 |
those associations for several files at once. The parameter to the |
1507 |
catalog() method is a reference to an array of arrayrefs. Each of the |
1508 |
arrayrefs should contain a table name, a data type, and a file name |
1509 |
and can optionally inlcude other paramtets specific to specific data |
1510 |
types. Here is an example: |
1511 |
|
1512 |
$dbh->func([ |
1513 |
[ 'my_csv_table', 'CSV', 'test_db.csv' ], |
1514 |
],'catalog'); |
1515 |
|
1516 |
This example creates an association to a CSV file. Once the catalog() |
1517 |
statement has been issued, any DBI/SQL commands relating to |
1518 |
"my_csv_table" will operate on the file "test_db.csv". If the command |
1519 |
is a SELECT statement, the file witll be opened and searched. If the |
1520 |
command is an INSERT statement, the file will be opened and the new |
1521 |
data row(s) inserted and saved into the file. |
1522 |
|
1523 |
One can also pass additional information such as column names, |
1524 |
fixed-width patterns, field and record separators to the export |
1525 |
method(). See the import() information above for the meanings of |
1526 |
these additional parameters. They should be passed as a hashref: |
1527 |
|
1528 |
$dbh->func([ |
1529 |
[ 'table1', 'FIXED', 'test_db.fix',{pattern => 'A2 A19'} ], |
1530 |
[ 'table2', 'INI', 'test_db.ini',{col_names => 'id,phrase,name' ], |
1531 |
],'catalog'); |
1532 |
|
1533 |
In future releases, users will be able to store catalogs in files for permanent associations between files and data types. |
1534 |
|
1535 |
=head2 Specifying file and directory names |
1536 |
|
1537 |
All filenames are relative to a user-specified file directory: f_dir. |
1538 |
The f_dir parameter may be set in the connect statement: |
1539 |
|
1540 |
my $dbh=DBI->connect("dbi:RAM:f_dir=/mypath/to-files/" ... |
1541 |
|
1542 |
The f_dir parameter may also be set or reset anywhere in the program |
1543 |
after the database connection: |
1544 |
|
1545 |
$dbh->{f_dir} = '/mypath/to-files' |
1546 |
|
1547 |
If the f_dir parameter is not set explicitly, it defaults to "./" |
1548 |
which will be wherever your script thinks it is running from (which, |
1549 |
depending on server setup may or may not be the physical location of |
1550 |
your script so use this only if you know what you are doing). |
1551 |
|
1552 |
All filenames are relative to the f_dir directory. It is not possible |
1553 |
to use an absolute path to a file. |
1554 |
|
1555 |
WARNING: no taint checking is performed on the filename or f_dir, this |
1556 |
is the responsiblity of the programmer. Since the filename is |
1557 |
relative to the f_dir directory, a filename starting with "../" will |
1558 |
lead to files above or outside of the f_dir directory so you should |
1559 |
exclude those from filenames if the filenames come from user input. |
1560 |
|
1561 |
=head2 Using defaults for quick testing & prototyping |
1562 |
|
1563 |
If no table_name is specified, a numbered table name will be supplied |
1564 |
(table1, or if that exists table2, etc.). The same also applies to |
1565 |
column names (col1, col2, etc.). If no data_type is supplied, CSV |
1566 |
will be assumed. If the entire hashref parameter to import is missing |
1567 |
and an arrayref of data is supplied instead, then defaults for all |
1568 |
values will be used, the source will be defaulted to data and the |
1569 |
contents of the array will be treated as the data source. For CSV |
1570 |
file, a field separator of comma and a record separator of newline are |
1571 |
the default. Thus, assuming there are no already exiting in-memory |
1572 |
tables, the two statements below have the same effect: |
1573 |
|
1574 |
$dbh->func( [<DATA>], 'import' ); |
1575 |
|
1576 |
$dbh->func({ |
1577 |
table_name => 'table1', |
1578 |
data_type => 'CSV', |
1579 |
col_names => 'col1,col2', |
1580 |
sep_char => ',', |
1581 |
eol => "\n", |
1582 |
data_source => [<DATA>], |
1583 |
},'import' ); |
1584 |
|
1585 |
It is also possible to rely on some of the defaults, but not all of |
1586 |
them. For example: |
1587 |
|
1588 |
$dbh->func({ |
1589 |
data_type => 'PIPE', |
1590 |
file_source => 'my_db.pipe', |
1591 |
},'import' ); |
1592 |
|
1593 |
=head1 DATA FORMATS |
1594 |
|
1595 |
=head2 CSV / PIPE / TAB / INI (Comma,Pipe,Tab,INI & other "delimited" formats) |
1596 |
|
1597 |
DBD::RAM can import CSV (Comma Separated Values) from strings, from |
1598 |
local files, or from remote files into database tables and export |
1599 |
tables from any source to CSV files. It can also store and update CSV |
1600 |
files continuously similarly to the way other DBD drivers operate. |
1601 |
|
1602 |
If you wish to use remote CSV files, you also need the LWP module |
1603 |
installed. It is available from www.activestate.com for windows, and |
1604 |
from www.cpan.org for other platforms. |
1605 |
|
1606 |
CSV is the format of files commonly exported by such programs as |
1607 |
Excel, Access, and FileMakerPro. Typically newlines separate records |
1608 |
and commas separate fields. Commas may also be included inside fields |
1609 |
if the field itself is surrounded by quotation marks. Quotation marks |
1610 |
may be included in fields by doubling them. Although some types of |
1611 |
CSV formats allow newlines inside fields, DBD::RAM does not currently |
1612 |
support that. If you need that feature, you should use DBD::CSV. |
1613 |
|
1614 |
Here are some typical CSV fields: |
1615 |
|
1616 |
hello,1,"testing, ""george"", 1,2,3",junk |
1617 |
|
1618 |
Note that numbers and strings that don't contain commas do not need |
1619 |
quotation marks around them. That line would be parsed into four |
1620 |
fields: |
1621 |
|
1622 |
hello |
1623 |
1 |
1624 |
testing, "george", 1,2,3 |
1625 |
junk |
1626 |
|
1627 |
To import that string of CSV into a DBD::RAM table: |
1628 |
|
1629 |
$dbh->func({ |
1630 |
data_source => qq[hello,1,"testing, ""george"", 1,2,3",junk] |
1631 |
},'import'); |
1632 |
|
1633 |
Of if one wanted to continuously update a file similarly to the way |
1634 |
DBD::CSV works: |
1635 |
|
1636 |
$dbh->func([ 'table1', 'CSV', 'my_test.csv' ],'catalog'); |
1637 |
|
1638 |
|
1639 |
Or if that string and others like it were in a local file called |
1640 |
'my_test.csv': |
1641 |
|
1642 |
$dbh->func({ file_source => 'my_test.csv' },'import'); |
1643 |
|
1644 |
Or if that string and others like it were in a remote file at a known |
1645 |
URL: |
1646 |
|
1647 |
$dbh->func({ remote_source => 'http://www.foo.edu/my_test.csv' },'import'); |
1648 |
|
1649 |
Note that these forms all use default behaviour since CSV is the |
1650 |
default data_type. These methods also use the default table_name |
1651 |
(table1,table2,etc.) and default column_names (col1,col2, etc.). The |
1652 |
same functions can specify a table_name and can either specify a list |
1653 |
of column names or specify that the column names should be taken from |
1654 |
the first line of data: |
1655 |
|
1656 |
$dbh->func({ |
1657 |
file_source => 'my_test.csv', |
1658 |
table_name => 'my_table', |
1659 |
col_names => 'name,phone,address', |
1660 |
},'import'); |
1661 |
|
1662 |
It is also possible to define other field separators (e.g. a |
1663 |
semicolon) with the "sep_char" key and define other record separators |
1664 |
with the "eol" key. For example: |
1665 |
|
1666 |
sep_char => ';', |
1667 |
eol => '~', |
1668 |
|
1669 |
Adding those to the import() hash would define data that has a |
1670 |
semicolon between every field and a tilde between every record. |
1671 |
|
1672 |
For convenience shortcuts have been provided for PIPE and TAB |
1673 |
separators. The data_type "PIPE" indicates a separator of the pipe |
1674 |
character '|' which may optionally have blank spaces before or afer |
1675 |
it. The TAB data_type indicates fields that are separated by tabs. |
1676 |
In both cases newlines remain the default record separator unless |
1677 |
specifically set to something else. |
1678 |
|
1679 |
Another shortcut is the INI data_type. This expects to see data in |
1680 |
name=value pairs like this: |
1681 |
|
1682 |
resolution = 640x480 |
1683 |
colors = 256 |
1684 |
|
1685 |
Currently the INI type does not support sections within the .ini file, |
1686 |
but that will change in future releases of this module. |
1687 |
|
1688 |
The PIPE, TAB, and INI formats all behave like the CSV format. |
1689 |
Defaults may be used for assigning column names from the first line of |
1690 |
data, in which case the column names should be separated by the |
1691 |
appropriate symbol (e.g. col1|col2 for PIPE, and col1=col2 for INI, |
1692 |
and column names separated by tabs for TAB). |
1693 |
|
1694 |
In the examples above using data_source the data was a string with |
1695 |
newlines separating the records. It is also possible to use an |
1696 |
reference to an array of lines as the data_source. This makes it |
1697 |
easy to use the DATA segment of a script or to import an array from |
1698 |
some other part of a script: |
1699 |
|
1700 |
$dbh->func({ data_source => [<DATA>] },'import' ); |
1701 |
|
1702 |
=head2 ARRAYS & HASHES |
1703 |
|
1704 |
DBD::RAM can import data directly from references to arrays of |
1705 |
arrayrefs and references to arrays of hashrefs. This allows you to |
1706 |
easily import data from some other portion of a perl script into a |
1707 |
database format and either save it to disk or simply manipulate it in |
1708 |
memory. |
1709 |
|
1710 |
$dbh->func({ |
1711 |
data_type => 'ARRAY', |
1712 |
data_source => [ |
1713 |
['1','CSV:Hello New World!'], |
1714 |
['2','CSV:Cool!'] |
1715 |
], |
1716 |
},'import'); |
1717 |
|
1718 |
$dbh->func({ |
1719 |
data_type => 'HASH', |
1720 |
data_source => [ |
1721 |
{id=>1,phrase=>'Hello new world!'}, |
1722 |
{id=>2,phrase=>'Junkity Junkity Junk'}, |
1723 |
], |
1724 |
},'import'); |
1725 |
|
1726 |
|
1727 |
=head2 FIXED-WIDTH RECORDS |
1728 |
|
1729 |
Fixed-width records (also called fixed-length records) do not use |
1730 |
character patterns to separate fields, rather they use a preset number |
1731 |
of characters in each field to determine where one field begins and |
1732 |
another ends. DBD::RAM can import fixed-width records from strings, |
1733 |
arrayrefs, local files, and remote files and can export data from any |
1734 |
source to fixed-width record fields. The module also allows |
1735 |
continuous disk-based updating of fixed-width format files similarly |
1736 |
to other DBDs. |
1737 |
|
1738 |
The fixed-width format behaves exactly like the CSV formats mentioned |
1739 |
above with the exception that the data_type is "FIXED" and that one |
1740 |
must supply a pattern key to describe the width of the fields. The |
1741 |
pattern should be in Perl unpack format e.g. "A2 A7 A14" would |
1742 |
indicate a table with three columns with widths of 2,7,14 characters. |
1743 |
When data is inserted or updated, it will be truncated or padded to |
1744 |
fill exactly the amount of space alloted to each field. |
1745 |
|
1746 |
$dbh->func({ |
1747 |
table_name => 'phrases', |
1748 |
col_names => 'id,phrase', |
1749 |
data_type => 'FIXED', |
1750 |
pattern => 'A1 A20', |
1751 |
data_source => [ '1Hello new world! ', |
1752 |
'2Junkity Junkity Junk', |
1753 |
], |
1754 |
},'import' ); |
1755 |
|
1756 |
|
1757 |
=head2 XML |
1758 |
|
1759 |
DBD::RAM can import XML (Extended Markup Language) from strings, from |
1760 |
local files, or from remote files into database tables and export |
1761 |
tables from any source to XML files. |
1762 |
|
1763 |
You must have XML::Parser installed in order to use the XML feature of |
1764 |
DBD::RAM. If you wish to use remote XML files, you also need the LWP |
1765 |
module installed. Both are available from www.activestate.com for |
1766 |
windows, and from www.cpan.org for other platforms. |
1767 |
|
1768 |
Support is provided for information in tag attributes and tag text and |
1769 |
for multiple levels of nested tags. There are several options on how |
1770 |
to treat tag names that occur multiple times in a single record |
1771 |
including a variety of relationships between XML tags and database |
1772 |
columns: one-to-one, one-to-many, and many-to-one. Tag attributes can |
1773 |
be made to apply to multiple records nested within the tag. There is |
1774 |
also support for alternate character encodings and other XML::Parser |
1775 |
parameter attributes. |
1776 |
|
1777 |
See below for details. |
1778 |
|
1779 |
=over 4 |
1780 |
|
1781 |
=item XML Import |
1782 |
|
1783 |
To start with a very simple example, consider this XML string: |
1784 |
|
1785 |
<staff> |
1786 |
<person name='Joe' title='bottle washer'/> |
1787 |
<person name='Tom' title='flunky'/> |
1788 |
<person name='Bev' title='chief cook'/> |
1789 |
<person name='Sue' title='head honcho'/> |
1790 |
</staff> |
1791 |
|
1792 |
Assuming you have that XML structure in a variable $str, you can |
1793 |
import it into a DBD::RAM table like this: |
1794 |
|
1795 |
$dbh->func({ |
1796 |
data_source => $str |
1797 |
data_type => 'XML', |
1798 |
record_tag => 'staff person', |
1799 |
col_names => 'name,title' |
1800 |
},'import'); |
1801 |
|
1802 |
Which would produce this SQL/DBI accessible table: |
1803 |
|
1804 |
name | title |
1805 |
-----+-------------- |
1806 |
Joe | bottle washer |
1807 |
Tom | flunky |
1808 |
Bev | chief cook |
1809 |
Sue | head honcho |
1810 |
|
1811 |
If the XML data is in a local or remote file, rather than a string, |
1812 |
simply change the "data_source" to "file_source" (for local files) or |
1813 |
"remote_source" (for remote files) an everything else mentioned in |
1814 |
this section works the same as if the data was imported from a string. |
1815 |
|
1816 |
Notice that the "record_tag" is a space separated list of all of the |
1817 |
tags that enclose the fields you want to capture starting at the |
1818 |
highest level with the <staff> tag. In this example there is only one |
1819 |
level of nesting, but there could be many levels of nesting in actual |
1820 |
practice. |
1821 |
|
1822 |
DBD::RAM can treat both text and tag attributes as fields. So the |
1823 |
following three records could produce the same database row: |
1824 |
|
1825 |
<person name='Tom' title='flunky'/> |
1826 |
|
1827 |
<person name='Tom'> |
1828 |
<title>flunky</title> |
1829 |
</person> |
1830 |
|
1831 |
<person> |
1832 |
<name>Tom</name> |
1833 |
<title>flunky</title> |
1834 |
</person> |
1835 |
|
1836 |
The database column names should be specified as a comma-separated |
1837 |
string, in the order you want them to appear in the database: |
1838 |
|
1839 |
col_names => 'name,state,year' |
1840 |
|
1841 |
If you want the database column names to be the same as the XML tag |
1842 |
names you do not need to do anything further. |
1843 |
|
1844 |
NOTE: you *must* speficy the column names for XML data, you can not |
1845 |
rely on automatic default column names (col1,col2,etc.) or on reading |
1846 |
the column names from the "first line" of the data as you can with |
1847 |
most other data types. |
1848 |
|
1849 |
|
1850 |
=item Alternate relationships between XML tags & database columns |
1851 |
|
1852 |
If you want the database column names to be different from the XML tag |
1853 |
names, you need to add a col_mapping parameter which should be a hash |
1854 |
with the XML tag as the key and the database column as the value: |
1855 |
|
1856 |
col_mapping => { |
1857 |
name => 'Member_Name', |
1858 |
state => 'Location', |
1859 |
year => 'Year', |
1860 |
} |
1861 |
|
1862 |
('name' is the XML tag, 'Member_Name' is the database column) |
1863 |
|
1864 |
If a given tag occurs more than once in an XML record, it can be |
1865 |
mapped onto a single column name (in which case all of the values for |
1866 |
it will be concatenated with spaces into the single column), or it can |
1867 |
be mapped onto an array of column names (in which case each succeeding |
1868 |
instance of the tag will be entered into the succeeding column in the |
1869 |
array). |
1870 |
|
1871 |
For example, given this XML snippet: |
1872 |
|
1873 |
<person name='Joe' state='OR'> |
1874 |
<year>1998</year> |
1875 |
<year>1999</year> |
1876 |
</person> |
1877 |
<person name='Sally' state='WA'> |
1878 |
<year>1998</year> |
1879 |
<year>1999</year> |
1880 |
<year>2000</year> |
1881 |
</person> |
1882 |
|
1883 |
This column mapping: |
1884 |
|
1885 |
col_mapping => { |
1886 |
name => 'Member_Name', |
1887 |
state => 'Location', |
1888 |
year => ['Year1','Year2','Year3'], |
1889 |
} |
1890 |
|
1891 |
Would produce this table: |
1892 |
|
1893 |
Member_Name | Location | Year1 | Year2 | Year3 |
1894 |
------------+----------+-------+-------+------ |
1895 |
Joe | OR | 1998 | 1999 | |
1896 |
Sally | WA | 1998 | 1999 | 2000 |
1897 |
|
1898 |
And this column mapping: |
1899 |
|
1900 |
col_mapping => { |
1901 |
name => 'Member_Name', |
1902 |
state => 'Location', |
1903 |
year => 'Year', |
1904 |
} |
1905 |
|
1906 |
Would produce this table: |
1907 |
|
1908 |
Member_Name | Location | Year |
1909 |
------------+----------+---------------- |
1910 |
Joe | OR | 1998 1999 |
1911 |
Sally | WA | 1998 1999 2000 |
1912 |
|
1913 |
It is also possible to map several differnt tags into a single column, |
1914 |
e.g: |
1915 |
|
1916 |
<person name='Joe' state='OR'> |
1917 |
<year1>1998</year1> |
1918 |
<year2>1999</year2> |
1919 |
</person> |
1920 |
<person name='Sally' state='WA'> |
1921 |
<year1>1998</year1> |
1922 |
<year2>1999</year2> |
1923 |
<year3>2000</year3> |
1924 |
</person> |
1925 |
|
1926 |
col_mapping => { |
1927 |
name => 'Member_Name', |
1928 |
state => 'Location', |
1929 |
year1 => 'Year', |
1930 |
year2 => 'Year', |
1931 |
year3 => 'Year', |
1932 |
} |
1933 |
|
1934 |
Member_Name | Location | Year |
1935 |
------------+----------+---------------- |
1936 |
Joe | OR | 1998 1999 |
1937 |
Sally | WA | 1998 1999 2000 |
1938 |
|
1939 |
=item Nested attributes that apply to multiple records |
1940 |
|
1941 |
It is also possible to use nested record attributes to create column |
1942 |
values that apply to multiple records. Consider the following XML: |
1943 |
|
1944 |
<staff> |
1945 |
<office location='Portland'> |
1946 |
<person name='Joe'> |
1947 |
<person name='Tom'/> |
1948 |
</office> |
1949 |
<office location='Seattle'> |
1950 |
<person name='Bev'/> |
1951 |
<person name='Sue'/> |
1952 |
</office> |
1953 |
</staff> |
1954 |
|
1955 |
One might like to associate the office location with all of the staff |
1956 |
members in that office. This is how that would be done: |
1957 |
|
1958 |
record_tag => 'staff office person', |
1959 |
col_names => 'location,name', |
1960 |
fold_col => { 'staff office' => 'location' }, |
1961 |
|
1962 |
That fold-col specification in the import() method would "fold in" |
1963 |
the attribute for location and apply it to all records nested within |
1964 |
the office tag and produce the following table: |
1965 |
|
1966 |
location | name |
1967 |
---------+----- |
1968 |
Portland | Joe |
1969 |
Portland | Tom |
1970 |
Seattle | Bev |
1971 |
Seattle | Sue |
1972 |
|
1973 |
You may use several levels of folded columns, for example, to capture |
1974 |
both the office location and title in this XML: |
1975 |
|
1976 |
<staff> |
1977 |
<office location='Portland'> |
1978 |
<title id='manager'> |
1979 |
<person name='Joe'/> |
1980 |
</title> |
1981 |
<title id='flunky'> |
1982 |
<person name='Tom'/> |
1983 |
</title> |
1984 |
</office> |
1985 |
<office location='Seattle'> |
1986 |
<title id='flunky'> |
1987 |
<person name='Bev'/> |
1988 |
<person name='Sue'/> |
1989 |
</title> |
1990 |
</office> |
1991 |
</staff> |
1992 |
|
1993 |
You would use this fold_col key: |
1994 |
|
1995 |
fold_col => { 'staff office' => 'location', |
1996 |
'staff office title' => 'id', |
1997 |
}, |
1998 |
|
1999 |
And obtain this table: |
2000 |
|
2001 |
location | title | name |
2002 |
---------+---------+----- |
2003 |
Portland | manager | Joe |
2004 |
Portland | flunky | Tom |
2005 |
Seattle | flunky | Bev |
2006 |
Seattle | flunky | Sue |
2007 |
|
2008 |
If you need to grab more than one attribute from a single tag, you |
2009 |
need to put one or more carets (^) on the end of the fold_col key. |
2010 |
For example: |
2011 |
|
2012 |
<office type='branch' location='Portland' manager='Sue'> ...</office> |
2013 |
|
2014 |
fold_col => { 'office' => 'branch', |
2015 |
'office^' => 'location', |
2016 |
'office^^' => 'manager', |
2017 |
|
2018 |
=item Character Encoding and Unicode issues |
2019 |
|
2020 |
The attr key can be used to pass extra information to XML::Parser when |
2021 |
it imports a database. For example, if the XML file contains latin-1 |
2022 |
characters, one might like to pass the parser an encoding protocol |
2023 |
like this: |
2024 |
|
2025 |
attr => {ProtocolEncoding => 'ISO-8859-1'}, |
2026 |
|
2027 |
Attributes passed in this manner are passed straight to the |
2028 |
XML::Parser. |
2029 |
|
2030 |
Since the results of XML::Parser are returned as UTF-8, one might also |
2031 |
like to translate from UTF-8 to something else when the data is |
2032 |
entered into the database. This can be done by passing a pointer to a |
2033 |
subroutine in the read_sub key. For example: |
2034 |
|
2035 |
read_sub => \&utf8_to_latin1, |
2036 |
|
2037 |
For this to work, there would need to be a subroutine utf8_to_latin1 |
2038 |
in the main module that takes a UTF8 string as input and returns a |
2039 |
latin-1 string as output. Similar routines can be used to translate |
2040 |
the UTF8 characters into any other character encoding. |
2041 |
|
2042 |
Apologies for being Euro-centric, but I have included a short-cut for |
2043 |
Latin-1. One can omit the attr key and instead of passing a pointer |
2044 |
to a subroutine in the read_sub key, if one simply puts the string |
2045 |
"latin1", the module will automatically perform ISO-8859-1 protocol |
2046 |
encoding on reading the XML file and automatically translate from |
2047 |
UTF-8 to Latin-1 as the values are inserted in the database, that is |
2048 |
to say, a shortcut for the two keys mentioned above. |
2049 |
|
2050 |
|
2051 |
=item Other features of XML import |
2052 |
|
2053 |
* Tags, attributes, and text that are not specifically referred to in |
2054 |
the import() parameters are ignored when creating the database table. |
2055 |
|
2056 |
* If a column name is listed that is not mapped onto a tag that occurs |
2057 |
in the XML source, a column will be created in the database for that |
2058 |
name and it will be given a default value of NULL for each record |
2059 |
created. |
2060 |
|
2061 |
=item XML Export |
2062 |
|
2063 |
Any DBD::RAM table, regardless of its original source or its original |
2064 |
format, can be exported to an XML file. |
2065 |
|
2066 |
The export() parameters are the same as for other types of export() -- |
2067 |
see the above for details. Additionally there are some export |
2068 |
parameters specific to XML files which are the same as the import() |
2069 |
parameters for XML files mentioned above. The col_names parameter is |
2070 |
required, as is the record_tag parameter. Additionally one may |
2071 |
optionally pass a header and/or a footer parameter which will be |
2072 |
material that goes above and below the records in the file. If no |
2073 |
header is passed, a default header consisting of |
2074 |
|
2075 |
<?xml version="1.0" ?> |
2076 |
|
2077 |
will be created at the top of the file. |
2078 |
|
2079 |
Given a datbase like this: |
2080 |
|
2081 |
location | name |
2082 |
---------+----- |
2083 |
Portland | Joe |
2084 |
Seattle | Sue |
2085 |
|
2086 |
And an export() call like this: |
2087 |
|
2088 |
$dbh->func({ |
2089 |
data_type => 'XML', |
2090 |
data_target => 'test_db.new.xml', |
2091 |
data_source => 'SELECT * FROM table1', |
2092 |
record_tag => 'staff person', |
2093 |
col_names => 'name,location', |
2094 |
},'export'); |
2095 |
|
2096 |
Would produce a file called 'test_db.xml' containing text like this: |
2097 |
|
2098 |
<?xml version="1.0" ?> |
2099 |
<staff> |
2100 |
<office> |
2101 |
<person> |
2102 |
<name>Joe</name> |
2103 |
<location>Portland</location> |
2104 |
</person> |
2105 |
<person> |
2106 |
<name>Sue</name> |
2107 |
<location>Seattle</location> |
2108 |
</person> |
2109 |
</office> |
2110 |
</staff> |
2111 |
|
2112 |
The module does not currently support exporting tag attributes or |
2113 |
"folding out" nested column information, but those are planned for |
2114 |
future releases. |
2115 |
|
2116 |
back |
2117 |
|
2118 |
=head2 USER-DEFINED DATA STRUCTURES |
2119 |
|
2120 |
DBD::RAM can be extended to handle almost any type of structured |
2121 |
information with the USR data type. With this data type, you define a |
2122 |
subroutine that parses your data and pass that to the import() command |
2123 |
and the module will use that routine to create a database from your |
2124 |
data. The routine can be as simple or as complex as you like. It |
2125 |
must accept a string and return an array with the fields of the array |
2126 |
in the same order as the columns in your database. Here is a simple |
2127 |
example that works with data separated by double tildes. In reality, |
2128 |
you could just do this with the bulit-in CSV type, but here is how you |
2129 |
could recreate it with the USR type: |
2130 |
|
2131 |
$dbh->func({ |
2132 |
data_type => 'USR', |
2133 |
data_source => "1~~2~~3\n4~~5~~6\n", |
2134 |
read_sub => sub { split /~~/,shift }, |
2135 |
},'import' ); |
2136 |
|
2137 |
That would build a table with two rows of three fields each. The |
2138 |
first row would contain the values 1,2,3 and the second row would |
2139 |
contain the values 4,5,6. |
2140 |
|
2141 |
Here is a more complex example that handles a simplified address book. |
2142 |
It assumes that your data is a series of addresses separated by blank |
2143 |
lines and that the address has the name on the first line, the street |
2144 |
on the second line and the town, state, and zipcode on the third line. |
2145 |
(Apologies to those in countries that don't have states or zipcodes in |
2146 |
this format). Here is an example of the kind of data it would handle: |
2147 |
|
2148 |
Fred Bloggs |
2149 |
123 Somewhere Lane |
2150 |
Sometown OR 97215 |
2151 |
|
2152 |
Joe Blow |
2153 |
567 Anywhere Street |
2154 |
OtherTown WA 98006 |
2155 |
|
2156 |
Note that the end-of-line separator (eol) has been changed to be a |
2157 |
blank line rather than a simple newline and that the parsing routine |
2158 |
is more than a simple line by line parser, it splits the third line |
2159 |
into three fields for town, state, and zip. |
2160 |
|
2161 |
$dbh->func({ |
2162 |
data_type => 'USR', |
2163 |
data_source => join('',<DATA>), |
2164 |
col_names => 'name,street,town,state,zip', |
2165 |
eol => '^\s*\n', |
2166 |
read_sub => sub { |
2167 |
my($name,$street,$stuff) = split "\n", $_[0]; |
2168 |
my @ary = split ' ',$stuff; |
2169 |
my $zip = $ary[-1]; |
2170 |
my $state = $ary[-2]; |
2171 |
my $town = $stuff; |
2172 |
$town =~ s/^(.*)\s+$state\s+$zip$/$1/; |
2173 |
return($name,$street,$town,$state,$zip); |
2174 |
}, |
2175 |
},'import'); |
2176 |
|
2177 |
Given the data above, this routine would create a table like this: |
2178 |
|
2179 |
name | street | town | state | zip |
2180 |
------------+---------------------+-----------+-------+------ |
2181 |
Fred Bloggs | 123 Somewhere Lane | Sometown | OR | 97215 |
2182 |
Joe Blow | 567 Anywhere Street | OtherTown | WA | 98006 |
2183 |
|
2184 |
These are just samples, the possiblities are fairly unlimited. |
2185 |
|
2186 |
PLEASE NOTE: If you develop generally useful parser routines that |
2187 |
others might also be able to use, send them to me and I can |
2188 |
encorporate them into the DBD itself (with proper credit, of course). |
2189 |
|
2190 |
=head2 DBI DATABASE RECORDS |
2191 |
|
2192 |
You can import information from any other DBI accessible database with |
2193 |
the data_type set to 'DBI' in the import() method. First connect to |
2194 |
the other database via DBI and get a database handle for it separate |
2195 |
from the database handle for DBD::RAM. Then do a prepare and execute |
2196 |
to get a statement handle for a SELECT statement into that database. |
2197 |
Then pass the statement handle to the DBD::RAM import() method as the |
2198 |
data_source. This will perform the fetch and insert the fetched |
2199 |
fields and records into the DBD::RAM table. After the import() |
2200 |
statement, you can then close the database connection to the other |
2201 |
database if you are not going to be using it for anything else. |
2202 |
|
2203 |
Here's an example using DBD::mysql -- |
2204 |
|
2205 |
use DBI; |
2206 |
my $dbh_ram = DBI->connect('dbi:RAM:','','',{RaiseError=>1}); |
2207 |
my $dbh_mysql = DBI->connect('dbi:mysql:test','','',{RaiseError=>1}); |
2208 |
my $sth_mysql = $dbh_mysql->prepare("SELECT * FROM cars"); |
2209 |
$sth_mysql->execute; |
2210 |
$dbh_ram->func({ |
2211 |
data_type => 'DBI', |
2212 |
data_source => $sth_mysql, |
2213 |
},'import' ); |
2214 |
$dbh_mysql->disconnect; |
2215 |
|
2216 |
=head2 MP3 MUSIC FILES |
2217 |
|
2218 |
Most mp3 (mpeg three) music files contain a header describing the song |
2219 |
name, artist, and other information about the music. This shortcut |
2220 |
will collect all of the header information in all mp3 files in a group |
2221 |
of directories and turn it into a searchable database: |
2222 |
|
2223 |
|
2224 |
$dbh->func( |
2225 |
{ data_type => 'MP3', dirs => $dirlist }, 'import' |
2226 |
); |
2227 |
|
2228 |
$dirlist should be a reference to an array of absolute paths to |
2229 |
directories containing mp3 files. Each file in those directories |
2230 |
will become a record containing the fields: file_name, song_name, |
2231 |
artist, album, year, comment,genre. The fields will be filled |
2232 |
in automatically from the ID3v1x header information in the mp3 file |
2233 |
itself, assuming, of course, that the mp3 file contains a |
2234 |
valid ID3v1x header. |
2235 |
|
2236 |
=head1 USING MULTIPLE TABLES |
2237 |
|
2238 |
A single script can create as many tables as your RAM will support and |
2239 |
you can have multiple statement handles open to the tables |
2240 |
simultaneously. This allows you to simulate joins and multi-table |
2241 |
operations by iterating over several statement handles at once. You |
2242 |
can also mix and match databases of different formats, for example |
2243 |
gathering user info from .ini and .config files in many different |
2244 |
formats and putting them all into a single table. |
2245 |
|
2246 |
|
2247 |
=head1 TO DO |
2248 |
|
2249 |
Lots of stuff. Allow users to specify a file where catalog |
2250 |
information is stored so that one could record file types once and |
2251 |
thereafter automatically open the files with the correct data type. A |
2252 |
convert() function to go from one format to another. Support for a |
2253 |
variety of other easily parsed formats such as Mail files, web logs, |
2254 |
and for various DBM formats. Support for HTML files with the |
2255 |
directory considered as a table, each HTML file considered as a record |
2256 |
and the filename, <TITLE> tag, and <BODY> tags considered as fields. |
2257 |
More robust SQL (coming when I update Statement.pm) including RLIKE (a |
2258 |
regex-based LIKE), joins, alter table, typed fields?, authorization |
2259 |
mechanisms? transactions? Allow remote exports (e.g. with LWP |
2260 |
POST/PUT). |
2261 |
|
2262 |
Let me know what else... |
2263 |
|
2264 |
=head1 AUTHOR |
2265 |
|
2266 |
Jeff Zucker <jeff@vpservices.com> |
2267 |
|
2268 |
Copyright (c) 2000 Jeff Zucker. All rights reserved. This program is |
2269 |
free software; you can redistribute it and/or modify it under the same |
2270 |
terms as Perl itself as specified in the Perl README file. |
2271 |
|
2272 |
No warranty of any kind is implied, use at your own risk. |
2273 |
|
2274 |
=head1 SEE ALSO |
2275 |
|
2276 |
DBI, SQL::Statement, DBD::File |
2277 |
|
2278 |
=cut |