/[pxelator]/lib/Data/Dump.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /lib/Data/Dump.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 494 - (hide annotations)
Mon Jan 25 18:35:54 2010 UTC (12 years, 8 months ago) by dpavlin
File size: 16251 byte(s)
Data::Dump 1.15
1 dpavlin 494 package Data::Dump;
2    
3     use strict;
4     use vars qw(@EXPORT @EXPORT_OK $VERSION $DEBUG);
5     use subs qq(dump);
6    
7     require Exporter;
8     *import = \&Exporter::import;
9     @EXPORT = qw(dd ddx);
10     @EXPORT_OK = qw(dump pp quote);
11    
12     $VERSION = "1.15";
13     $DEBUG = 0;
14    
15     use overload ();
16     use vars qw(%seen %refcnt @dump @fixup %require $TRY_BASE64);
17    
18     $TRY_BASE64 = 50 unless defined $TRY_BASE64;
19    
20     my %is_perl_keyword = map { $_ => 1 }
21     qw( __FILE__ __LINE__ __PACKAGE__ __DATA__ __END__ AUTOLOAD BEGIN CORE
22     DESTROY END EQ GE GT INIT LE LT NE abs accept alarm and atan2 bind
23     binmode bless caller chdir chmod chomp chop chown chr chroot close
24     closedir cmp connect continue cos crypt dbmclose dbmopen defined
25     delete die do dump each else elsif endgrent endhostent endnetent
26     endprotoent endpwent endservent eof eq eval exec exists exit exp fcntl
27     fileno flock for foreach fork format formline ge getc getgrent
28     getgrgid getgrnam gethostbyaddr gethostbyname gethostent getlogin
29     getnetbyaddr getnetbyname getnetent getpeername getpgrp getppid
30     getpriority getprotobyname getprotobynumber getprotoent getpwent
31     getpwnam getpwuid getservbyname getservbyport getservent getsockname
32     getsockopt glob gmtime goto grep gt hex if index int ioctl join keys
33     kill last lc lcfirst le length link listen local localtime lock log
34     lstat lt m map mkdir msgctl msgget msgrcv msgsnd my ne next no not oct
35     open opendir or ord pack package pipe pop pos print printf prototype
36     push q qq qr quotemeta qw qx rand read readdir readline readlink
37     readpipe recv redo ref rename require reset return reverse rewinddir
38     rindex rmdir s scalar seek seekdir select semctl semget semop send
39     setgrent sethostent setnetent setpgrp setpriority setprotoent setpwent
40     setservent setsockopt shift shmctl shmget shmread shmwrite shutdown
41     sin sleep socket socketpair sort splice split sprintf sqrt srand stat
42     study sub substr symlink syscall sysopen sysread sysseek system
43     syswrite tell telldir tie tied time times tr truncate uc ucfirst umask
44     undef unless unlink unpack unshift untie until use utime values vec
45     wait waitpid wantarray warn while write x xor y);
46    
47    
48     sub dump
49     {
50     local %seen;
51     local %refcnt;
52     local %require;
53     local @fixup;
54    
55     my $name = "a";
56     my @dump;
57    
58     for my $v (@_) {
59     my $val = _dump($v, $name, [], tied($v));
60     push(@dump, [$name, $val]);
61     } continue {
62     $name++;
63     }
64    
65     my $out = "";
66     if (%require) {
67     for (sort keys %require) {
68     $out .= "require $_;\n";
69     }
70     }
71     if (%refcnt) {
72     # output all those with refcounts first
73     for (@dump) {
74     my $name = $_->[0];
75     if ($refcnt{$name}) {
76     $out .= "my \$$name = $_->[1];\n";
77     undef $_->[1];
78     }
79     }
80     for (@fixup) {
81     $out .= "$_;\n";
82     }
83     }
84    
85     my $paren = (@dump != 1);
86     $out .= "(" if $paren;
87     $out .= format_list($paren, undef,
88     map {defined($_->[1]) ? $_->[1] : "\$".$_->[0]}
89     @dump
90     );
91     $out .= ")" if $paren;
92    
93     if (%refcnt || %require) {
94     $out .= ";\n";
95     $out =~ s/^/ /gm; # indent
96     $out = "do {\n$out}";
97     }
98    
99     #use Data::Dumper; print Dumper(\%refcnt);
100     #use Data::Dumper; print Dumper(\%seen);
101    
102     print STDERR "$out\n" unless defined wantarray;
103     $out;
104     }
105    
106     *pp = \&dump;
107    
108     sub dd {
109     print dump(@_), "\n";
110     }
111    
112     sub ddx {
113     my(undef, $file, $line) = caller;
114     $file =~ s,.*[\\/],,;
115     my $out = "$file:$line: " . dump(@_) . "\n";
116     $out =~ s/^/# /gm;
117     print $out;
118     }
119    
120     sub _dump
121     {
122     my $ref = ref $_[0];
123     my $rval = $ref ? $_[0] : \$_[0];
124     shift;
125    
126     my($name, $idx, $dont_remember) = @_;
127    
128     my($class, $type, $id);
129     if (overload::StrVal($rval) =~ /^(?:([^=]+)=)?([A-Z]+)\(0x([^\)]+)\)$/) {
130     $class = $1;
131     $type = $2;
132     $id = $3;
133     } else {
134     die "Can't parse " . overload::StrVal($rval);
135     }
136     if ($] < 5.008 && $type eq "SCALAR") {
137     $type = "REF" if $ref eq "REF";
138     }
139     warn "\$$name(@$idx) $class $type $id ($ref)" if $DEBUG;
140    
141     unless ($dont_remember) {
142     if (my $s = $seen{$id}) {
143     my($sname, $sidx) = @$s;
144     $refcnt{$sname}++;
145     my $sref = fullname($sname, $sidx,
146     ($ref && $type eq "SCALAR"));
147     warn "SEEN: [\$$name(@$idx)] => [\$$sname(@$sidx)] ($ref,$sref)" if $DEBUG;
148     return $sref unless $sname eq $name;
149     $refcnt{$name}++;
150     push(@fixup, fullname($name,$idx)." = $sref");
151     return "do{my \$fix}" if @$idx && $idx->[-1] eq '$';
152     return "'fix'";
153     }
154     $seen{$id} = [$name, $idx];
155     }
156    
157     my $out;
158     if ($type eq "SCALAR" || $type eq "REF" || $type eq "REGEXP") {
159     if ($ref) {
160     if ($class && $class eq "Regexp") {
161     my $v = "$rval";
162    
163     my $mod = "";
164     if ($v =~ /^\(\?([msix-]+):([\x00-\xFF]*)\)\z/) {
165     $mod = $1;
166     $v = $2;
167     $mod =~ s/-.*//;
168     }
169    
170     my $sep = '/';
171     my $sep_count = ($v =~ tr/\///);
172     if ($sep_count) {
173     # see if we can find a better one
174     for ('|', ',', ':', '#') {
175     my $c = eval "\$v =~ tr/\Q$_\E//";
176     #print "SEP $_ $c $sep_count\n";
177     if ($c < $sep_count) {
178     $sep = $_;
179     $sep_count = $c;
180     last if $sep_count == 0;
181     }
182     }
183     }
184     $v =~ s/\Q$sep\E/\\$sep/g;
185    
186     $out = "qr$sep$v$sep$mod";
187     undef($class);
188     }
189     else {
190     delete $seen{$id} if $type eq "SCALAR"; # will be seen again shortly
191     my $val = _dump($$rval, $name, [@$idx, "\$"]);
192     $out = $class ? "do{\\(my \$o = $val)}" : "\\$val";
193     }
194     } else {
195     if (!defined $$rval) {
196     $out = "undef";
197     }
198     elsif ($$rval =~ /^-?[1-9]\d{0,9}$/ || $$rval eq "0") {
199     $out = $$rval;
200     }
201     else {
202     $out = str($$rval);
203     }
204     if ($class && !@$idx) {
205     # Top is an object, not a reference to one as perl needs
206     $refcnt{$name}++;
207     my $obj = fullname($name, $idx);
208     my $cl = quote($class);
209     push(@fixup, "bless \\$obj, $cl");
210     }
211     }
212     }
213     elsif ($type eq "GLOB") {
214     if ($ref) {
215     delete $seen{$id};
216     my $val = _dump($$rval, $name, [@$idx, "*"]);
217     $out = "\\$val";
218     if ($out =~ /^\\\*Symbol::/) {
219     $require{Symbol}++;
220     $out = "Symbol::gensym()";
221     }
222     } else {
223     my $val = "$$rval";
224     $out = "$$rval";
225    
226     for my $k (qw(SCALAR ARRAY HASH)) {
227     my $gval = *$$rval{$k};
228     next unless defined $gval;
229     next if $k eq "SCALAR" && ! defined $$gval; # always there
230     my $f = scalar @fixup;
231     push(@fixup, "RESERVED"); # overwritten after _dump() below
232     $gval = _dump($gval, $name, [@$idx, "*{$k}"]);
233     $refcnt{$name}++;
234     my $gname = fullname($name, $idx);
235     $fixup[$f] = "$gname = $gval"; #XXX indent $gval
236     }
237     }
238     }
239     elsif ($type eq "ARRAY") {
240     my @vals;
241     my $tied = tied_str(tied(@$rval));
242     my $i = 0;
243     for my $v (@$rval) {
244     push(@vals, _dump($v, $name, [@$idx, "[$i]"], $tied));
245     $i++;
246     }
247     $out = "[" . format_list(1, $tied, @vals) . "]";
248     }
249     elsif ($type eq "HASH") {
250     my(@keys, @vals);
251     my $tied = tied_str(tied(%$rval));
252    
253     # statistics to determine variation in key lengths
254     my $kstat_max = 0;
255     my $kstat_sum = 0;
256     my $kstat_sum2 = 0;
257    
258     my @orig_keys = keys %$rval;
259     my $text_keys = 0;
260     for (@orig_keys) {
261     $text_keys++, last unless /^[-+]?(?:0|[1-9]\d*)(?:\.\d+)?\z/;
262     }
263    
264     if ($text_keys) {
265     @orig_keys = sort @orig_keys;
266     }
267     else {
268     @orig_keys = sort { $a <=> $b } @orig_keys;
269     }
270    
271     for my $key (@orig_keys) {
272     my $val = \$rval->{$key};
273     $key = quote($key) if $is_perl_keyword{$key} ||
274     !($key =~ /^[a-zA-Z_]\w{0,19}\z/ ||
275     $key =~ /^-?[1-9]\d{0,8}\z/
276     );
277    
278     $kstat_max = length($key) if length($key) > $kstat_max;
279     $kstat_sum += length($key);
280     $kstat_sum2 += length($key)*length($key);
281    
282     push(@keys, $key);
283     push(@vals, _dump($$val, $name, [@$idx, "{$key}"], $tied));
284     }
285     my $nl = "";
286     my $klen_pad = 0;
287     my $tmp = "@keys @vals";
288     if (length($tmp) > 60 || $tmp =~ /\n/ || $tied) {
289     $nl = "\n";
290    
291     # Determine what padding to add
292     if ($kstat_max < 4) {
293     $klen_pad = $kstat_max;
294     }
295     elsif (@keys >= 2) {
296     my $n = @keys;
297     my $avg = $kstat_sum/$n;
298     my $stddev = sqrt(($kstat_sum2 - $n * $avg * $avg) / ($n - 1));
299    
300     # I am not actually very happy with this heuristics
301     if ($stddev / $kstat_max < 0.25) {
302     $klen_pad = $kstat_max;
303     }
304     if ($DEBUG) {
305     push(@keys, "__S");
306     push(@vals, sprintf("%.2f (%d/%.1f/%.1f)",
307     $stddev / $kstat_max,
308     $kstat_max, $avg, $stddev));
309     }
310     }
311     }
312     $out = "{$nl";
313     $out .= " # $tied$nl" if $tied;
314     while (@keys) {
315     my $key = shift @keys;
316     my $val = shift @vals;
317     my $pad = " " x ($klen_pad + 6);
318     $val =~ s/\n/\n$pad/gm;
319     $key = " $key" . " " x ($klen_pad - length($key)) if $nl;
320     $out .= " $key => $val,$nl";
321     }
322     $out =~ s/,$/ / unless $nl;
323     $out .= "}";
324     }
325     elsif ($type eq "CODE") {
326     $out = 'sub { "???" }';
327     }
328     else {
329     warn "Can't handle $type data";
330     $out = "'#$type#'";
331     }
332    
333     if ($class && $ref) {
334     $out = "bless($out, " . quote($class) . ")";
335     }
336     return $out;
337     }
338    
339     sub tied_str {
340     my $tied = shift;
341     if ($tied) {
342     if (my $tied_ref = ref($tied)) {
343     $tied = "tied $tied_ref";
344     }
345     else {
346     $tied = "tied";
347     }
348     }
349     return $tied;
350     }
351    
352     sub fullname
353     {
354     my($name, $idx, $ref) = @_;
355     substr($name, 0, 0) = "\$";
356    
357     my @i = @$idx; # need copy in order to not modify @$idx
358     if ($ref && @i && $i[0] eq "\$") {
359     shift(@i); # remove one deref
360     $ref = 0;
361     }
362     while (@i && $i[0] eq "\$") {
363     shift @i;
364     $name = "\$$name";
365     }
366    
367     my $last_was_index;
368     for my $i (@i) {
369     if ($i eq "*" || $i eq "\$") {
370     $last_was_index = 0;
371     $name = "$i\{$name}";
372     } elsif ($i =~ s/^\*//) {
373     $name .= $i;
374     $last_was_index++;
375     } else {
376     $name .= "->" unless $last_was_index++;
377     $name .= $i;
378     }
379     }
380     $name = "\\$name" if $ref;
381     $name;
382     }
383    
384     sub format_list
385     {
386     my $paren = shift;
387     my $comment = shift;
388     my $indent_lim = $paren ? 0 : 1;
389     my $tmp = "@_";
390     if ($comment || (@_ > $indent_lim && (length($tmp) > 60 || $tmp =~ /\n/))) {
391     my @elem = @_;
392     for (@elem) { s/^/ /gm; } # indent
393     return "\n" . ($comment ? " # $comment\n" : "") .
394     join(",\n", @elem, "");
395     } else {
396     return join(", ", @_);
397     }
398     }
399    
400     sub str {
401     if (length($_[0]) > 20) {
402     for ($_[0]) {
403     # Check for repeated string
404     if (/^(.)\1\1\1/s) {
405     # seems to be a repating sequence, let's check if it really is
406     # without backtracking
407     unless (/[^\Q$1\E]/) {
408     my $base = quote($1);
409     my $repeat = length;
410     return "($base x $repeat)"
411     }
412     }
413     # Length protection because the RE engine will blow the stack [RT#33520]
414     if (length($_) < 16 * 1024 && /^(.{2,5}?)\1*\z/s) {
415     my $base = quote($1);
416     my $repeat = length($_)/length($1);
417     return "($base x $repeat)";
418     }
419     }
420     }
421    
422     local $_ = &quote;
423    
424     if (length($_) > 40 && !/\\x\{/ && length($_) > (length($_[0]) * 2)) {
425     # too much binary data, better to represent as a hex/base64 string
426    
427     # Base64 is more compact than hex when string is longer than
428     # 17 bytes (not counting any require statement needed).
429     # But on the other hand, hex is much more readable.
430     if ($TRY_BASE64 && length($_[0]) > $TRY_BASE64 &&
431     eval { require MIME::Base64 })
432     {
433     $require{"MIME::Base64"}++;
434     return "MIME::Base64::decode(\"" .
435     MIME::Base64::encode($_[0],"") .
436     "\")";
437     }
438     return "pack(\"H*\",\"" . unpack("H*", $_[0]) . "\")";
439     }
440    
441     return $_;
442     }
443    
444     my %esc = (
445     "\a" => "\\a",
446     "\b" => "\\b",
447     "\t" => "\\t",
448     "\n" => "\\n",
449     "\f" => "\\f",
450     "\r" => "\\r",
451     "\e" => "\\e",
452     );
453    
454     # put a string value in double quotes
455     sub quote {
456     local($_) = $_[0];
457     # If there are many '"' we might want to use qq() instead
458     s/([\\\"\@\$])/\\$1/g;
459     return qq("$_") unless /[^\040-\176]/; # fast exit
460    
461     s/([\a\b\t\n\f\r\e])/$esc{$1}/g;
462    
463     # no need for 3 digits in escape for these
464     s/([\0-\037])(?!\d)/sprintf('\\%o',ord($1))/eg;
465    
466     s/([\0-\037\177-\377])/sprintf('\\x%02X',ord($1))/eg;
467     s/([^\040-\176])/sprintf('\\x{%X}',ord($1))/eg;
468    
469     return qq("$_");
470     }
471    
472     1;
473    
474     __END__
475    
476     =head1 NAME
477    
478     Data::Dump - Pretty printing of data structures
479    
480     =head1 SYNOPSIS
481    
482     use Data::Dump qw(dump ddx);
483    
484     $str = dump(@list);
485     @copy_of_list = eval $str;
486    
487     # or use it for easy debug printout
488     ddx localtime;
489    
490     =head1 DESCRIPTION
491    
492     This module provide functions that takes a list of values as their
493     argument and produces a string as its result. The string contains
494     Perl code that, when C<eval>ed, produces a deep copy of the original
495     arguments.
496    
497     The main feature of the module is that it strives to produce output
498     that is easy to read. Example:
499    
500     @a = (1, [2, 3], {4 => 5});
501     dump(@a);
502    
503     Produces:
504    
505     (1, [2, 3], { 4 => 5 })
506    
507     If you dump just a little data, it is output on a single line. If
508     you dump data that is more complex or there is a lot of it, line breaks
509     are automatically added to keep it easy to read.
510    
511     The following functions are provided (only the dd* functions are exported by default):
512    
513     =over
514    
515     =item dump( ... )
516    
517     =item pp( ... )
518    
519     Returns a string containing a Perl expression. If you pass this
520     string to Perl's built-in eval() function it should return a copy of
521     the arguments you passed to dump().
522    
523     If you call the function with multiple arguments then the output will
524     be wrapped in parenthesis "( ..., ... )". If you call the function with a
525     single argument the output will not have the wrapping. If you call the function with
526     a single scalar (non-reference) argument it will just return the
527     scalar quoted if needed, but never break it into multiple lines. If you
528     pass multiple arguments or references to arrays of hashes then the
529     return value might contain line breaks to format it for easier
530     reading. The returned string will never be "\n" terminated, even if
531     contains multiple lines. This allows code like this to place the
532     semicolon in the expected place:
533    
534     print '$obj = ', dump($obj), ";\n";
535    
536     If dump() is called in void context, then the dump is printed on
537     STDERR and then "\n" terminated. You might find this useful for quick
538     debug printouts, but the dd*() functions might be better alternatives
539     for this.
540    
541     There is no difference between dump() and pp(), except that dump()
542     shares its name with a not-so-useful perl builtin. Because of this
543     some might want to avoid using that name.
544    
545     =item quote( $string )
546    
547     Returns a quoted version of the provided string.
548    
549     It differs from C<dump($string)> in that it will quote even numbers and
550     not try to come up with clever expressions that might shorten the
551     output.
552    
553     =item dd( ... )
554    
555     =item ddx( ... )
556    
557     These functions will call dump() on their argument and print the
558     result to STDOUT (actually, it's the currently selected output handle, but
559     STDOUT is the default for that).
560    
561     The difference between them is only that ddx() will prefix the lines
562     it prints with "# " and mark the first line with the file and line
563     number where it was called. This is meant to be useful for debug
564     printouts of state within programs.
565    
566     =back
567    
568    
569     =head1 LIMITATIONS
570    
571     Code references will be displayed as simply 'sub { "???" }' when
572     dumped. Thus, C<eval>ing them will not reproduce the original routine.
573    
574     If you forget to explicitly import the C<dump> function, your code will
575     core dump. That's because you just called the builtin C<dump> function
576     by accident, which intentionally dumps core. Because of this you can
577     also import the same function as C<pp>, mnemonic for "pretty-print".
578    
579     =head1 HISTORY
580    
581     The C<Data::Dump> module grew out of frustration with Sarathy's
582     in-most-cases-excellent C<Data::Dumper>. Basic ideas and some code
583     are shared with Sarathy's module.
584    
585     The C<Data::Dump> module provides a much simpler interface than
586     C<Data::Dumper>. No OO interface is available and there are no
587     configuration options to worry about (yet :-). The other benefit is
588     that the dump produced does not try to set any variables. It only
589     returns what is needed to produce a copy of the arguments. This means
590     that C<dump("foo")> simply returns C<"foo">, and C<dump(1..5)> simply
591     returns C<(1, 2, 3, 4, 5)>.
592    
593     =head1 SEE ALSO
594    
595     L<Data::Dumper>, L<Storable>
596    
597     =head1 AUTHORS
598    
599     The C<Data::Dump> module is written by Gisle Aas <gisle@aas.no>, based
600     on C<Data::Dumper> by Gurusamy Sarathy <gsar@umich.edu>.
601    
602     Copyright 1998-2000,2003-2004,2008 Gisle Aas.
603     Copyright 1996-1998 Gurusamy Sarathy.
604    
605     This library is free software; you can redistribute it and/or
606     modify it under the same terms as Perl itself.
607    
608     =cut

  ViewVC Help
Powered by ViewVC 1.1.26