/[pxelator]/lib/Data/Dump.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /lib/Data/Dump.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 495 - (show annotations)
Mon Jan 25 18:37:22 2010 UTC (14 years, 2 months ago) by dpavlin
File size: 16260 byte(s)
auto-vivification fix for error

Modification of non-creatable hash value attempted, subscript "File" at /usr/share/perl5/Data/Dump.pm line 272

which is triggered by MongoDB perl driver

1 package Data::Dump;
2
3 use strict;
4 use vars qw(@EXPORT @EXPORT_OK $VERSION $DEBUG);
5 use subs qq(dump);
6
7 require Exporter;
8 *import = \&Exporter::import;
9 @EXPORT = qw(dd ddx);
10 @EXPORT_OK = qw(dump pp quote);
11
12 $VERSION = "1.15";
13 $DEBUG = 0;
14
15 use overload ();
16 use vars qw(%seen %refcnt @dump @fixup %require $TRY_BASE64);
17
18 $TRY_BASE64 = 50 unless defined $TRY_BASE64;
19
20 my %is_perl_keyword = map { $_ => 1 }
21 qw( __FILE__ __LINE__ __PACKAGE__ __DATA__ __END__ AUTOLOAD BEGIN CORE
22 DESTROY END EQ GE GT INIT LE LT NE abs accept alarm and atan2 bind
23 binmode bless caller chdir chmod chomp chop chown chr chroot close
24 closedir cmp connect continue cos crypt dbmclose dbmopen defined
25 delete die do dump each else elsif endgrent endhostent endnetent
26 endprotoent endpwent endservent eof eq eval exec exists exit exp fcntl
27 fileno flock for foreach fork format formline ge getc getgrent
28 getgrgid getgrnam gethostbyaddr gethostbyname gethostent getlogin
29 getnetbyaddr getnetbyname getnetent getpeername getpgrp getppid
30 getpriority getprotobyname getprotobynumber getprotoent getpwent
31 getpwnam getpwuid getservbyname getservbyport getservent getsockname
32 getsockopt glob gmtime goto grep gt hex if index int ioctl join keys
33 kill last lc lcfirst le length link listen local localtime lock log
34 lstat lt m map mkdir msgctl msgget msgrcv msgsnd my ne next no not oct
35 open opendir or ord pack package pipe pop pos print printf prototype
36 push q qq qr quotemeta qw qx rand read readdir readline readlink
37 readpipe recv redo ref rename require reset return reverse rewinddir
38 rindex rmdir s scalar seek seekdir select semctl semget semop send
39 setgrent sethostent setnetent setpgrp setpriority setprotoent setpwent
40 setservent setsockopt shift shmctl shmget shmread shmwrite shutdown
41 sin sleep socket socketpair sort splice split sprintf sqrt srand stat
42 study sub substr symlink syscall sysopen sysread sysseek system
43 syswrite tell telldir tie tied time times tr truncate uc ucfirst umask
44 undef unless unlink unpack unshift untie until use utime values vec
45 wait waitpid wantarray warn while write x xor y);
46
47
48 sub dump
49 {
50 local %seen;
51 local %refcnt;
52 local %require;
53 local @fixup;
54
55 my $name = "a";
56 my @dump;
57
58 for my $v (@_) {
59 my $val = _dump($v, $name, [], tied($v));
60 push(@dump, [$name, $val]);
61 } continue {
62 $name++;
63 }
64
65 my $out = "";
66 if (%require) {
67 for (sort keys %require) {
68 $out .= "require $_;\n";
69 }
70 }
71 if (%refcnt) {
72 # output all those with refcounts first
73 for (@dump) {
74 my $name = $_->[0];
75 if ($refcnt{$name}) {
76 $out .= "my \$$name = $_->[1];\n";
77 undef $_->[1];
78 }
79 }
80 for (@fixup) {
81 $out .= "$_;\n";
82 }
83 }
84
85 my $paren = (@dump != 1);
86 $out .= "(" if $paren;
87 $out .= format_list($paren, undef,
88 map {defined($_->[1]) ? $_->[1] : "\$".$_->[0]}
89 @dump
90 );
91 $out .= ")" if $paren;
92
93 if (%refcnt || %require) {
94 $out .= ";\n";
95 $out =~ s/^/ /gm; # indent
96 $out = "do {\n$out}";
97 }
98
99 #use Data::Dumper; print Dumper(\%refcnt);
100 #use Data::Dumper; print Dumper(\%seen);
101
102 print STDERR "$out\n" unless defined wantarray;
103 $out;
104 }
105
106 *pp = \&dump;
107
108 sub dd {
109 print dump(@_), "\n";
110 }
111
112 sub ddx {
113 my(undef, $file, $line) = caller;
114 $file =~ s,.*[\\/],,;
115 my $out = "$file:$line: " . dump(@_) . "\n";
116 $out =~ s/^/# /gm;
117 print $out;
118 }
119
120 sub _dump
121 {
122 my $ref = ref $_[0];
123 my $rval = $ref ? $_[0] : \$_[0];
124 shift;
125
126 my($name, $idx, $dont_remember) = @_;
127
128 my($class, $type, $id);
129 if (overload::StrVal($rval) =~ /^(?:([^=]+)=)?([A-Z]+)\(0x([^\)]+)\)$/) {
130 $class = $1;
131 $type = $2;
132 $id = $3;
133 } else {
134 die "Can't parse " . overload::StrVal($rval);
135 }
136 if ($] < 5.008 && $type eq "SCALAR") {
137 $type = "REF" if $ref eq "REF";
138 }
139 warn "\$$name(@$idx) $class $type $id ($ref)" if $DEBUG;
140
141 unless ($dont_remember) {
142 if (my $s = $seen{$id}) {
143 my($sname, $sidx) = @$s;
144 $refcnt{$sname}++;
145 my $sref = fullname($sname, $sidx,
146 ($ref && $type eq "SCALAR"));
147 warn "SEEN: [\$$name(@$idx)] => [\$$sname(@$sidx)] ($ref,$sref)" if $DEBUG;
148 return $sref unless $sname eq $name;
149 $refcnt{$name}++;
150 push(@fixup, fullname($name,$idx)." = $sref");
151 return "do{my \$fix}" if @$idx && $idx->[-1] eq '$';
152 return "'fix'";
153 }
154 $seen{$id} = [$name, $idx];
155 }
156
157 my $out;
158 if ($type eq "SCALAR" || $type eq "REF" || $type eq "REGEXP") {
159 if ($ref) {
160 if ($class && $class eq "Regexp") {
161 my $v = "$rval";
162
163 my $mod = "";
164 if ($v =~ /^\(\?([msix-]+):([\x00-\xFF]*)\)\z/) {
165 $mod = $1;
166 $v = $2;
167 $mod =~ s/-.*//;
168 }
169
170 my $sep = '/';
171 my $sep_count = ($v =~ tr/\///);
172 if ($sep_count) {
173 # see if we can find a better one
174 for ('|', ',', ':', '#') {
175 my $c = eval "\$v =~ tr/\Q$_\E//";
176 #print "SEP $_ $c $sep_count\n";
177 if ($c < $sep_count) {
178 $sep = $_;
179 $sep_count = $c;
180 last if $sep_count == 0;
181 }
182 }
183 }
184 $v =~ s/\Q$sep\E/\\$sep/g;
185
186 $out = "qr$sep$v$sep$mod";
187 undef($class);
188 }
189 else {
190 delete $seen{$id} if $type eq "SCALAR"; # will be seen again shortly
191 my $val = _dump($$rval, $name, [@$idx, "\$"]);
192 $out = $class ? "do{\\(my \$o = $val)}" : "\\$val";
193 }
194 } else {
195 if (!defined $$rval) {
196 $out = "undef";
197 }
198 elsif ($$rval =~ /^-?[1-9]\d{0,9}$/ || $$rval eq "0") {
199 $out = $$rval;
200 }
201 else {
202 $out = str($$rval);
203 }
204 if ($class && !@$idx) {
205 # Top is an object, not a reference to one as perl needs
206 $refcnt{$name}++;
207 my $obj = fullname($name, $idx);
208 my $cl = quote($class);
209 push(@fixup, "bless \\$obj, $cl");
210 }
211 }
212 }
213 elsif ($type eq "GLOB") {
214 if ($ref) {
215 delete $seen{$id};
216 my $val = _dump($$rval, $name, [@$idx, "*"]);
217 $out = "\\$val";
218 if ($out =~ /^\\\*Symbol::/) {
219 $require{Symbol}++;
220 $out = "Symbol::gensym()";
221 }
222 } else {
223 my $val = "$$rval";
224 $out = "$$rval";
225
226 for my $k (qw(SCALAR ARRAY HASH)) {
227 my $gval = *$$rval{$k};
228 next unless defined $gval;
229 next if $k eq "SCALAR" && ! defined $$gval; # always there
230 my $f = scalar @fixup;
231 push(@fixup, "RESERVED"); # overwritten after _dump() below
232 $gval = _dump($gval, $name, [@$idx, "*{$k}"]);
233 $refcnt{$name}++;
234 my $gname = fullname($name, $idx);
235 $fixup[$f] = "$gname = $gval"; #XXX indent $gval
236 }
237 }
238 }
239 elsif ($type eq "ARRAY") {
240 my @vals;
241 my $tied = tied_str(tied(@$rval));
242 my $i = 0;
243 for my $v (@$rval) {
244 push(@vals, _dump($v, $name, [@$idx, "[$i]"], $tied));
245 $i++;
246 }
247 $out = "[" . format_list(1, $tied, @vals) . "]";
248 }
249 elsif ($type eq "HASH") {
250 my(@keys, @vals);
251 my $tied = tied_str(tied(%$rval));
252
253 # statistics to determine variation in key lengths
254 my $kstat_max = 0;
255 my $kstat_sum = 0;
256 my $kstat_sum2 = 0;
257
258 my @orig_keys = keys %$rval;
259 my $text_keys = 0;
260 for (@orig_keys) {
261 $text_keys++, last unless /^[-+]?(?:0|[1-9]\d*)(?:\.\d+)?\z/;
262 }
263
264 if ($text_keys) {
265 @orig_keys = sort @orig_keys;
266 }
267 else {
268 @orig_keys = sort { $a <=> $b } @orig_keys;
269 }
270
271 for my $key (@orig_keys) {
272 my $val = eval { \$rval->{$key} };
273 $key = quote($key) if $is_perl_keyword{$key} ||
274 !($key =~ /^[a-zA-Z_]\w{0,19}\z/ ||
275 $key =~ /^-?[1-9]\d{0,8}\z/
276 );
277
278 $kstat_max = length($key) if length($key) > $kstat_max;
279 $kstat_sum += length($key);
280 $kstat_sum2 += length($key)*length($key);
281
282 push(@keys, $key);
283 push(@vals, _dump($$val, $name, [@$idx, "{$key}"], $tied));
284 }
285 my $nl = "";
286 my $klen_pad = 0;
287 my $tmp = "@keys @vals";
288 if (length($tmp) > 60 || $tmp =~ /\n/ || $tied) {
289 $nl = "\n";
290
291 # Determine what padding to add
292 if ($kstat_max < 4) {
293 $klen_pad = $kstat_max;
294 }
295 elsif (@keys >= 2) {
296 my $n = @keys;
297 my $avg = $kstat_sum/$n;
298 my $stddev = sqrt(($kstat_sum2 - $n * $avg * $avg) / ($n - 1));
299
300 # I am not actually very happy with this heuristics
301 if ($stddev / $kstat_max < 0.25) {
302 $klen_pad = $kstat_max;
303 }
304 if ($DEBUG) {
305 push(@keys, "__S");
306 push(@vals, sprintf("%.2f (%d/%.1f/%.1f)",
307 $stddev / $kstat_max,
308 $kstat_max, $avg, $stddev));
309 }
310 }
311 }
312 $out = "{$nl";
313 $out .= " # $tied$nl" if $tied;
314 while (@keys) {
315 my $key = shift @keys;
316 my $val = shift @vals;
317 my $pad = " " x ($klen_pad + 6);
318 $val =~ s/\n/\n$pad/gm;
319 $key = " $key" . " " x ($klen_pad - length($key)) if $nl;
320 $out .= " $key => $val,$nl";
321 }
322 $out =~ s/,$/ / unless $nl;
323 $out .= "}";
324 }
325 elsif ($type eq "CODE") {
326 $out = 'sub { "???" }';
327 }
328 else {
329 warn "Can't handle $type data";
330 $out = "'#$type#'";
331 }
332
333 if ($class && $ref) {
334 $out = "bless($out, " . quote($class) . ")";
335 }
336 return $out;
337 }
338
339 sub tied_str {
340 my $tied = shift;
341 if ($tied) {
342 if (my $tied_ref = ref($tied)) {
343 $tied = "tied $tied_ref";
344 }
345 else {
346 $tied = "tied";
347 }
348 }
349 return $tied;
350 }
351
352 sub fullname
353 {
354 my($name, $idx, $ref) = @_;
355 substr($name, 0, 0) = "\$";
356
357 my @i = @$idx; # need copy in order to not modify @$idx
358 if ($ref && @i && $i[0] eq "\$") {
359 shift(@i); # remove one deref
360 $ref = 0;
361 }
362 while (@i && $i[0] eq "\$") {
363 shift @i;
364 $name = "\$$name";
365 }
366
367 my $last_was_index;
368 for my $i (@i) {
369 if ($i eq "*" || $i eq "\$") {
370 $last_was_index = 0;
371 $name = "$i\{$name}";
372 } elsif ($i =~ s/^\*//) {
373 $name .= $i;
374 $last_was_index++;
375 } else {
376 $name .= "->" unless $last_was_index++;
377 $name .= $i;
378 }
379 }
380 $name = "\\$name" if $ref;
381 $name;
382 }
383
384 sub format_list
385 {
386 my $paren = shift;
387 my $comment = shift;
388 my $indent_lim = $paren ? 0 : 1;
389 my $tmp = "@_";
390 if ($comment || (@_ > $indent_lim && (length($tmp) > 60 || $tmp =~ /\n/))) {
391 my @elem = @_;
392 for (@elem) { s/^/ /gm; } # indent
393 return "\n" . ($comment ? " # $comment\n" : "") .
394 join(",\n", @elem, "");
395 } else {
396 return join(", ", @_);
397 }
398 }
399
400 sub str {
401 if (length($_[0]) > 20) {
402 for ($_[0]) {
403 # Check for repeated string
404 if (/^(.)\1\1\1/s) {
405 # seems to be a repating sequence, let's check if it really is
406 # without backtracking
407 unless (/[^\Q$1\E]/) {
408 my $base = quote($1);
409 my $repeat = length;
410 return "($base x $repeat)"
411 }
412 }
413 # Length protection because the RE engine will blow the stack [RT#33520]
414 if (length($_) < 16 * 1024 && /^(.{2,5}?)\1*\z/s) {
415 my $base = quote($1);
416 my $repeat = length($_)/length($1);
417 return "($base x $repeat)";
418 }
419 }
420 }
421
422 local $_ = &quote;
423
424 if (length($_) > 40 && !/\\x\{/ && length($_) > (length($_[0]) * 2)) {
425 # too much binary data, better to represent as a hex/base64 string
426
427 # Base64 is more compact than hex when string is longer than
428 # 17 bytes (not counting any require statement needed).
429 # But on the other hand, hex is much more readable.
430 if ($TRY_BASE64 && length($_[0]) > $TRY_BASE64 &&
431 eval { require MIME::Base64 })
432 {
433 $require{"MIME::Base64"}++;
434 return "MIME::Base64::decode(\"" .
435 MIME::Base64::encode($_[0],"") .
436 "\")";
437 }
438 return "pack(\"H*\",\"" . unpack("H*", $_[0]) . "\")";
439 }
440
441 return $_;
442 }
443
444 my %esc = (
445 "\a" => "\\a",
446 "\b" => "\\b",
447 "\t" => "\\t",
448 "\n" => "\\n",
449 "\f" => "\\f",
450 "\r" => "\\r",
451 "\e" => "\\e",
452 );
453
454 # put a string value in double quotes
455 sub quote {
456 local($_) = $_[0];
457 # If there are many '"' we might want to use qq() instead
458 s/([\\\"\@\$])/\\$1/g;
459 return qq("$_") unless /[^\040-\176]/; # fast exit
460
461 s/([\a\b\t\n\f\r\e])/$esc{$1}/g;
462
463 # no need for 3 digits in escape for these
464 s/([\0-\037])(?!\d)/sprintf('\\%o',ord($1))/eg;
465
466 s/([\0-\037\177-\377])/sprintf('\\x%02X',ord($1))/eg;
467 s/([^\040-\176])/sprintf('\\x{%X}',ord($1))/eg;
468
469 return qq("$_");
470 }
471
472 1;
473
474 __END__
475
476 =head1 NAME
477
478 Data::Dump - Pretty printing of data structures
479
480 =head1 SYNOPSIS
481
482 use Data::Dump qw(dump ddx);
483
484 $str = dump(@list);
485 @copy_of_list = eval $str;
486
487 # or use it for easy debug printout
488 ddx localtime;
489
490 =head1 DESCRIPTION
491
492 This module provide functions that takes a list of values as their
493 argument and produces a string as its result. The string contains
494 Perl code that, when C<eval>ed, produces a deep copy of the original
495 arguments.
496
497 The main feature of the module is that it strives to produce output
498 that is easy to read. Example:
499
500 @a = (1, [2, 3], {4 => 5});
501 dump(@a);
502
503 Produces:
504
505 (1, [2, 3], { 4 => 5 })
506
507 If you dump just a little data, it is output on a single line. If
508 you dump data that is more complex or there is a lot of it, line breaks
509 are automatically added to keep it easy to read.
510
511 The following functions are provided (only the dd* functions are exported by default):
512
513 =over
514
515 =item dump( ... )
516
517 =item pp( ... )
518
519 Returns a string containing a Perl expression. If you pass this
520 string to Perl's built-in eval() function it should return a copy of
521 the arguments you passed to dump().
522
523 If you call the function with multiple arguments then the output will
524 be wrapped in parenthesis "( ..., ... )". If you call the function with a
525 single argument the output will not have the wrapping. If you call the function with
526 a single scalar (non-reference) argument it will just return the
527 scalar quoted if needed, but never break it into multiple lines. If you
528 pass multiple arguments or references to arrays of hashes then the
529 return value might contain line breaks to format it for easier
530 reading. The returned string will never be "\n" terminated, even if
531 contains multiple lines. This allows code like this to place the
532 semicolon in the expected place:
533
534 print '$obj = ', dump($obj), ";\n";
535
536 If dump() is called in void context, then the dump is printed on
537 STDERR and then "\n" terminated. You might find this useful for quick
538 debug printouts, but the dd*() functions might be better alternatives
539 for this.
540
541 There is no difference between dump() and pp(), except that dump()
542 shares its name with a not-so-useful perl builtin. Because of this
543 some might want to avoid using that name.
544
545 =item quote( $string )
546
547 Returns a quoted version of the provided string.
548
549 It differs from C<dump($string)> in that it will quote even numbers and
550 not try to come up with clever expressions that might shorten the
551 output.
552
553 =item dd( ... )
554
555 =item ddx( ... )
556
557 These functions will call dump() on their argument and print the
558 result to STDOUT (actually, it's the currently selected output handle, but
559 STDOUT is the default for that).
560
561 The difference between them is only that ddx() will prefix the lines
562 it prints with "# " and mark the first line with the file and line
563 number where it was called. This is meant to be useful for debug
564 printouts of state within programs.
565
566 =back
567
568
569 =head1 LIMITATIONS
570
571 Code references will be displayed as simply 'sub { "???" }' when
572 dumped. Thus, C<eval>ing them will not reproduce the original routine.
573
574 If you forget to explicitly import the C<dump> function, your code will
575 core dump. That's because you just called the builtin C<dump> function
576 by accident, which intentionally dumps core. Because of this you can
577 also import the same function as C<pp>, mnemonic for "pretty-print".
578
579 =head1 HISTORY
580
581 The C<Data::Dump> module grew out of frustration with Sarathy's
582 in-most-cases-excellent C<Data::Dumper>. Basic ideas and some code
583 are shared with Sarathy's module.
584
585 The C<Data::Dump> module provides a much simpler interface than
586 C<Data::Dumper>. No OO interface is available and there are no
587 configuration options to worry about (yet :-). The other benefit is
588 that the dump produced does not try to set any variables. It only
589 returns what is needed to produce a copy of the arguments. This means
590 that C<dump("foo")> simply returns C<"foo">, and C<dump(1..5)> simply
591 returns C<(1, 2, 3, 4, 5)>.
592
593 =head1 SEE ALSO
594
595 L<Data::Dumper>, L<Storable>
596
597 =head1 AUTHORS
598
599 The C<Data::Dump> module is written by Gisle Aas <gisle@aas.no>, based
600 on C<Data::Dumper> by Gurusamy Sarathy <gsar@umich.edu>.
601
602 Copyright 1998-2000,2003-2004,2008 Gisle Aas.
603 Copyright 1996-1998 Gurusamy Sarathy.
604
605 This library is free software; you can redistribute it and/or
606 modify it under the same terms as Perl itself.
607
608 =cut

  ViewVC Help
Powered by ViewVC 1.1.26