lib/Data/Dump.pm

package Data::Dump;

use strict;
use vars qw(@EXPORT @EXPORT_OK $VERSION $DEBUG);
use subs qq(dump);

require Exporter;
*import = \&Exporter::import;
@EXPORT = qw(dd ddx);
@EXPORT_OK = qw(dump pp quote);

$VERSION = "1.15";
$DEBUG = 0;

use overload ();
use vars qw(%seen %refcnt @dump @fixup %require $TRY_BASE64);

$TRY_BASE64 = 50 unless defined $TRY_BASE64;

my %is_perl_keyword = map { $_ => 1 }
qw( __FILE__ __LINE__ __PACKAGE__ __DATA__ __END__ AUTOLOAD BEGIN CORE
DESTROY END EQ GE GT INIT LE LT NE abs accept alarm and atan2 bind
binmode bless caller chdir chmod chomp chop chown chr chroot close
closedir cmp connect continue cos crypt dbmclose dbmopen defined
delete die do dump each else elsif endgrent endhostent endnetent
endprotoent endpwent endservent eof eq eval exec exists exit exp fcntl
fileno flock for foreach fork format formline ge getc getgrent
getgrgid getgrnam gethostbyaddr gethostbyname gethostent getlogin
getnetbyaddr getnetbyname getnetent getpeername getpgrp getppid
getpriority getprotobyname getprotobynumber getprotoent getpwent
getpwnam getpwuid getservbyname getservbyport getservent getsockname
getsockopt glob gmtime goto grep gt hex if index int ioctl join keys
kill last lc lcfirst le length link listen local localtime lock log
lstat lt m map mkdir msgctl msgget msgrcv msgsnd my ne next no not oct
open opendir or ord pack package pipe pop pos print printf prototype
push q qq qr quotemeta qw qx rand read readdir readline readlink
readpipe recv redo ref rename require reset return reverse rewinddir
rindex rmdir s scalar seek seekdir select semctl semget semop send
setgrent sethostent setnetent setpgrp setpriority setprotoent setpwent
setservent setsockopt shift shmctl shmget shmread shmwrite shutdown
sin sleep socket socketpair sort splice split sprintf sqrt srand stat
study sub substr symlink syscall sysopen sysread sysseek system
syswrite tell telldir tie tied time times tr truncate uc ucfirst umask
undef unless unlink unpack unshift untie until use utime values vec
wait waitpid wantarray warn while write x xor y);


sub dump
{
    local %seen;
    local %refcnt;
    local %require;
    local @fixup;

    my $name = "a";
    my @dump;

    for my $v (@_) {
        my $val = _dump($v, $name, [], tied($v));
        push(@dump, [$name, $val]);
    } continue {
        $name++;
    }

    my $out = "";
    if (%require) {
        for (sort keys %require) {
            $out .= "require $_;\n";
        }
    }
    if (%refcnt) {
        # output all those with refcounts first
        for (@dump) {
            my $name = $_->[0];
            if ($refcnt{$name}) {
                $out .= "my \$$name = $_->[1];\n";
                undef $_->[1];
            }
        }
        for (@fixup) {
            $out .= "$_;\n";
        }
    }

    my $paren = (@dump != 1);
    $out .= "(" if $paren;
    $out .= format_list($paren, undef,
                        map {defined($_->[1]) ? $_->[1] : "\$".$_->[0]}
                            @dump
                       );
    $out .= ")" if $paren;

    if (%refcnt || %require) {
        $out .= ";\n";
        $out =~ s/^/  /gm;  # indent
        $out = "do {\n$out}";
    }

    #use Data::Dumper;   print Dumper(\%refcnt);
    #use Data::Dumper;   print Dumper(\%seen);

    print STDERR "$out\n" unless defined wantarray;
    $out;
}

*pp = \&dump;

sub dd {
    print dump(@_), "\n";
}

sub ddx {
    my(undef, $file, $line) = caller;
    $file =~ s,.*[\\/],,;
    my $out = "$file:$line: " . dump(@_) . "\n";
    $out =~ s/^/# /gm;
    print $out;
}

sub _dump
{
    my $ref  = ref $_[0];
    my $rval = $ref ? $_[0] : \$_[0];
    shift;

    my($name, $idx, $dont_remember) = @_;

    my($class, $type, $id);
    if (overload::StrVal($rval) =~ /^(?:([^=]+)=)?([A-Z]+)\(0x([^\)]+)\)$/) {
        $class = $1;
        $type  = $2;
        $id    = $3;
    } else {
        die "Can't parse " . overload::StrVal($rval);
    }
    if ($] < 5.008 && $type eq "SCALAR") {
        $type = "REF" if $ref eq "REF";
    }
    warn "\$$name(@$idx) $class $type $id ($ref)" if $DEBUG;

    unless ($dont_remember) {
        if (my $s = $seen{$id}) {
            my($sname, $sidx) = @$s;
            $refcnt{$sname}++;
            my $sref = fullname($sname, $sidx,
                                ($ref && $type eq "SCALAR"));
            warn "SEEN: [\$$name(@$idx)] => [\$$sname(@$sidx)] ($ref,$sref)" if $DEBUG;
            return $sref unless $sname eq $name;
            $refcnt{$name}++;
            push(@fixup, fullname($name,$idx)." = $sref");
            return "do{my \$fix}" if @$idx && $idx->[-1] eq '$';
            return "'fix'";
        }
        $seen{$id} = [$name, $idx];
    }

    my $out;
    if ($type eq "SCALAR" || $type eq "REF" || $type eq "REGEXP") {
        if ($ref) {
            if ($class && $class eq "Regexp") {
                my $v = "$rval";

                my $mod = "";
                if ($v =~ /^\(\?([msix-]+):([\x00-\xFF]*)\)\z/) {
                    $mod = $1;
                    $v = $2;
                    $mod =~ s/-.*//;
                }

                my $sep = '/';
                my $sep_count = ($v =~ tr/\///);
                if ($sep_count) {
                    # see if we can find a better one
                    for ('|', ',', ':', '#') {
                        my $c = eval "\$v =~ tr/\Q$_\E//";
                        #print "SEP $_ $c $sep_count\n";
                        if ($c < $sep_count) {
                            $sep = $_;
                            $sep_count = $c;
                            last if $sep_count == 0;
                        }
                    }
                }
                $v =~ s/\Q$sep\E/\\$sep/g;

                $out = "qr$sep$v$sep$mod";
                undef($class);
            }
            else {
                delete $seen{$id} if $type eq "SCALAR";  # will be seen again shortly
                my $val = _dump($$rval, $name, [@$idx, "\$"]);
                $out = $class ? "do{\\(my \$o = $val)}" : "\\$val";
            }
        } else {
            if (!defined $$rval) {
                $out = "undef";
            }
            elsif ($$rval =~ /^-?[1-9]\d{0,9}$/ || $$rval eq "0") {
                $out = $$rval;
            }
            else {
                $out = str($$rval);
            }
            if ($class && !@$idx) {
                # Top is an object, not a reference to one as perl needs
                $refcnt{$name}++;
                my $obj = fullname($name, $idx);
                my $cl  = quote($class);
                push(@fixup, "bless \\$obj, $cl");
            }
        }
    }
    elsif ($type eq "GLOB") {
        if ($ref) {
            delete $seen{$id};
            my $val = _dump($$rval, $name, [@$idx, "*"]);
            $out = "\\$val";
            if ($out =~ /^\\\*Symbol::/) {
                $require{Symbol}++;
                $out = "Symbol::gensym()";
            }
        } else {
            my $val = "$$rval";
            $out = "$$rval";

            for my $k (qw(SCALAR ARRAY HASH)) {
                my $gval = *$$rval{$k};
                next unless defined $gval;
                next if $k eq "SCALAR" && ! defined $$gval;  # always there
                my $f = scalar @fixup;
                push(@fixup, "RESERVED");  # overwritten after _dump() below
                $gval = _dump($gval, $name, [@$idx, "*{$k}"]);
                $refcnt{$name}++;
                my $gname = fullname($name, $idx);
                $fixup[$f] = "$gname = $gval";  #XXX indent $gval
            }
        }
    }
    elsif ($type eq "ARRAY") {
        my @vals;
        my $tied = tied_str(tied(@$rval));
        my $i = 0;
        for my $v (@$rval) {
            push(@vals, _dump($v, $name, [@$idx, "[$i]"], $tied));
            $i++;
        }
        $out = "[" . format_list(1, $tied, @vals) . "]";
    }
    elsif ($type eq "HASH") {
        my(@keys, @vals);
        my $tied = tied_str(tied(%$rval));

        # statistics to determine variation in key lengths
        my $kstat_max = 0;
        my $kstat_sum = 0;
        my $kstat_sum2 = 0;

        my @orig_keys = keys %$rval;
        my $text_keys = 0;
        for (@orig_keys) {
            $text_keys++, last unless /^[-+]?(?:0|[1-9]\d*)(?:\.\d+)?\z/;
        }

        if ($text_keys) {
            @orig_keys = sort @orig_keys;
        }
        else {
            @orig_keys = sort { $a <=> $b } @orig_keys;
        }

        for my $key (@orig_keys) {
            my $val = eval { \$rval->{$key} };
            $key = quote($key) if $is_perl_keyword{$key} ||
                                  !($key =~ /^[a-zA-Z_]\w{0,19}\z/ ||
                                    $key =~ /^-?[1-9]\d{0,8}\z/
                                    );

            $kstat_max = length($key) if length($key) > $kstat_max;
            $kstat_sum += length($key);
            $kstat_sum2 += length($key)*length($key);

            push(@keys, $key);
            push(@vals, _dump($$val, $name, [@$idx, "{$key}"], $tied));
        }
        my $nl = "";
        my $klen_pad = 0;
        my $tmp = "@keys @vals";
        if (length($tmp) > 60 || $tmp =~ /\n/ || $tied) {
            $nl = "\n";

            # Determine what padding to add
            if ($kstat_max < 4) {
                $klen_pad = $kstat_max;
            }
            elsif (@keys >= 2) {
                my $n = @keys;
                my $avg = $kstat_sum/$n;
                my $stddev = sqrt(($kstat_sum2 - $n * $avg * $avg) / ($n - 1));

                # I am not actually very happy with this heuristics
                if ($stddev / $kstat_max < 0.25) {
                    $klen_pad = $kstat_max;
                }
                if ($DEBUG) {
                    push(@keys, "__S");
                    push(@vals, sprintf("%.2f (%d/%.1f/%.1f)",
                                        $stddev / $kstat_max,
                                        $kstat_max, $avg, $stddev));
                }
            }
        }
        $out = "{$nl";
        $out .= "  # $tied$nl" if $tied;
        while (@keys) {
            my $key = shift @keys;
            my $val = shift @vals;
            my $pad = " " x ($klen_pad + 6);
            $val =~ s/\n/\n$pad/gm;
            $key = " $key" . " " x ($klen_pad - length($key)) if $nl;
            $out .= " $key => $val,$nl";
        }
        $out =~ s/,$/ / unless $nl;
        $out .= "}";
    }
    elsif ($type eq "CODE") {
        $out = 'sub { "???" }';
    }
    else {
        warn "Can't handle $type data";
        $out = "'#$type#'";
    }

    if ($class && $ref) {
        $out = "bless($out, " . quote($class) . ")";
    }
    return $out;
}

sub tied_str {
    my $tied = shift;
    if ($tied) {
        if (my $tied_ref = ref($tied)) {
            $tied = "tied $tied_ref";
        }
        else {
            $tied = "tied";
        }
    }
    return $tied;
}

sub fullname
{
    my($name, $idx, $ref) = @_;
    substr($name, 0, 0) = "\$";

    my @i = @$idx;  # need copy in order to not modify @$idx
    if ($ref && @i && $i[0] eq "\$") {
        shift(@i);  # remove one deref
        $ref = 0;
    }
    while (@i && $i[0] eq "\$") {
        shift @i;
        $name = "\$$name";
    }

    my $last_was_index;
    for my $i (@i) {
        if ($i eq "*" || $i eq "\$") {
            $last_was_index = 0;
            $name = "$i\{$name}";
        } elsif ($i =~ s/^\*//) {
            $name .= $i;
            $last_was_index++;
        } else {
            $name .= "->" unless $last_was_index++;
            $name .= $i;
        }
    }
    $name = "\\$name" if $ref;
    $name;
}

sub format_list
{
    my $paren = shift;
    my $comment = shift;
    my $indent_lim = $paren ? 0 : 1;
    my $tmp = "@_";
    if ($comment || (@_ > $indent_lim && (length($tmp) > 60 || $tmp =~ /\n/))) {
        my @elem = @_;
        for (@elem) { s/^/  /gm; }   # indent
        return "\n" . ($comment ? "  # $comment\n" : "") .
               join(",\n", @elem, "");
    } else {
        return join(", ", @_);
    }
}

sub str {
  if (length($_[0]) > 20) {
      for ($_[0]) {
      # Check for repeated string
      if (/^(.)\1\1\1/s) {
          # seems to be a repating sequence, let's check if it really is
          # without backtracking
          unless (/[^\Q$1\E]/) {
              my $base = quote($1);
              my $repeat = length;
              return "($base x $repeat)"
          }
      }
      # Length protection because the RE engine will blow the stack [RT#33520]
      if (length($_) < 16 * 1024 && /^(.{2,5}?)\1*\z/s) {
          my $base   = quote($1);
          my $repeat = length($_)/length($1);
          return "($base x $repeat)";
      }
      }
  }

  local $_ = &quote;

  if (length($_) > 40  && !/\\x\{/ && length($_) > (length($_[0]) * 2)) {
      # too much binary data, better to represent as a hex/base64 string

      # Base64 is more compact than hex when string is longer than
      # 17 bytes (not counting any require statement needed).
      # But on the other hand, hex is much more readable.
      if ($TRY_BASE64 && length($_[0]) > $TRY_BASE64 &&
          eval { require MIME::Base64 })
      {
          $require{"MIME::Base64"}++;
          return "MIME::Base64::decode(\"" .
                     MIME::Base64::encode($_[0],"") .
                 "\")";
      }
      return "pack(\"H*\",\"" . unpack("H*", $_[0]) . "\")";
  }

  return $_;
}

my %esc = (
    "\a" => "\\a",
    "\b" => "\\b",
    "\t" => "\\t",
    "\n" => "\\n",
    "\f" => "\\f",
    "\r" => "\\r",
    "\e" => "\\e",
);

# put a string value in double quotes
sub quote {
  local($_) = $_[0];
  # If there are many '"' we might want to use qq() instead
  s/([\\\"\@\$])/\\$1/g;
  return qq("$_") unless /[^\040-\176]/;  # fast exit

  s/([\a\b\t\n\f\r\e])/$esc{$1}/g;

  # no need for 3 digits in escape for these
  s/([\0-\037])(?!\d)/sprintf('\\%o',ord($1))/eg;

  s/([\0-\037\177-\377])/sprintf('\\x%02X',ord($1))/eg;
  s/([^\040-\176])/sprintf('\\x{%X}',ord($1))/eg;

  return qq("$_");
}

1;

__END__

=head1 NAME

Data::Dump - Pretty printing of data structures

=head1 SYNOPSIS

 use Data::Dump qw(dump ddx);

 $str = dump(@list);
 @copy_of_list = eval $str;

 # or use it for easy debug printout
 ddx localtime;

=head1 DESCRIPTION

This module provide functions that takes a list of values as their
argument and produces a string as its result.  The string contains
Perl code that, when C<eval>ed, produces a deep copy of the original
arguments.

The main feature of the module is that it strives to produce output
that is easy to read.  Example:

    @a = (1, [2, 3], {4 => 5});
    dump(@a);

Produces:

    (1, [2, 3], { 4 => 5 })

If you dump just a little data, it is output on a single line. If
you dump data that is more complex or there is a lot of it, line breaks
are automatically added to keep it easy to read.

The following functions are provided (only the dd* functions are exported by default):

=over

=item dump( ... )

=item pp( ... )

Returns a string containing a Perl expression.  If you pass this
string to Perl's built-in eval() function it should return a copy of
the arguments you passed to dump().

If you call the function with multiple arguments then the output will
be wrapped in parenthesis "( ..., ... )".  If you call the function with a
single argument the output will not have the wrapping.  If you call the function with
a single scalar (non-reference) argument it will just return the
scalar quoted if needed, but never break it into multiple lines.  If you
pass multiple arguments or references to arrays of hashes then the
return value might contain line breaks to format it for easier
reading.  The returned string will never be "\n" terminated, even if
contains multiple lines.  This allows code like this to place the
semicolon in the expected place:

   print '$obj = ', dump($obj), ";\n";

If dump() is called in void context, then the dump is printed on
STDERR and then "\n" terminated.  You might find this useful for quick
debug printouts, but the dd*() functions might be better alternatives
for this.

There is no difference between dump() and pp(), except that dump()
shares its name with a not-so-useful perl builtin.  Because of this
some might want to avoid using that name.

=item quote( $string )

Returns a quoted version of the provided string.

It differs from C<dump($string)> in that it will quote even numbers and
not try to come up with clever expressions that might shorten the
output.

=item dd( ... )

=item ddx( ... )

These functions will call dump() on their argument and print the
result to STDOUT (actually, it's the currently selected output handle, but
STDOUT is the default for that).

The difference between them is only that ddx() will prefix the lines
it prints with "# " and mark the first line with the file and line
number where it was called.  This is meant to be useful for debug
printouts of state within programs.

=back


=head1 LIMITATIONS

Code references will be displayed as simply 'sub { "???" }' when
dumped. Thus, C<eval>ing them will not reproduce the original routine.

If you forget to explicitly import the C<dump> function, your code will
core dump. That's because you just called the builtin C<dump> function
by accident, which intentionally dumps core.  Because of this you can
also import the same function as C<pp>, mnemonic for "pretty-print".

=head1 HISTORY

The C<Data::Dump> module grew out of frustration with Sarathy's
in-most-cases-excellent C<Data::Dumper>.  Basic ideas and some code
are shared with Sarathy's module.

The C<Data::Dump> module provides a much simpler interface than
C<Data::Dumper>.  No OO interface is available and there are no
configuration options to worry about (yet :-).  The other benefit is
that the dump produced does not try to set any variables.  It only
returns what is needed to produce a copy of the arguments.  This means
that C<dump("foo")> simply returns C<"foo">, and C<dump(1..5)> simply
returns C<(1, 2, 3, 4, 5)>.

=head1 SEE ALSO

L<Data::Dumper>, L<Storable>

=head1 AUTHORS

The C<Data::Dump> module is written by Gisle Aas <gisle@aas.no>, based
on C<Data::Dumper> by Gurusamy Sarathy <gsar@umich.edu>.

 Copyright 1998-2000,2003-2004,2008 Gisle Aas.
 Copyright 1996-1998 Gurusamy Sarathy.

This library is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.

=cut
1	dpavlin	494	package Data::Dump;
2
3			use strict;
4			use vars qw(@EXPORT @EXPORT_OK $VERSION $DEBUG);
5			use subs qq(dump);
6
7			require Exporter;
8			*import = \&Exporter::import;
9			@EXPORT = qw(dd ddx);
10			@EXPORT_OK = qw(dump pp quote);
11
12			$VERSION = "1.15";
13			$DEBUG = 0;
14
15			use overload ();
16			use vars qw(%seen %refcnt @dump @fixup %require $TRY_BASE64);
17
18			$TRY_BASE64 = 50 unless defined $TRY_BASE64;
19
20			my %is_perl_keyword = map { $_ => 1 }
21			qw( __FILE__ __LINE__ __PACKAGE__ __DATA__ __END__ AUTOLOAD BEGIN CORE
22			DESTROY END EQ GE GT INIT LE LT NE abs accept alarm and atan2 bind
23			binmode bless caller chdir chmod chomp chop chown chr chroot close
24			closedir cmp connect continue cos crypt dbmclose dbmopen defined
25			delete die do dump each else elsif endgrent endhostent endnetent
26			endprotoent endpwent endservent eof eq eval exec exists exit exp fcntl
27			fileno flock for foreach fork format formline ge getc getgrent
28			getgrgid getgrnam gethostbyaddr gethostbyname gethostent getlogin
29			getnetbyaddr getnetbyname getnetent getpeername getpgrp getppid
30			getpriority getprotobyname getprotobynumber getprotoent getpwent
31			getpwnam getpwuid getservbyname getservbyport getservent getsockname
32			getsockopt glob gmtime goto grep gt hex if index int ioctl join keys
33			kill last lc lcfirst le length link listen local localtime lock log
34			lstat lt m map mkdir msgctl msgget msgrcv msgsnd my ne next no not oct
35			open opendir or ord pack package pipe pop pos print printf prototype
36			push q qq qr quotemeta qw qx rand read readdir readline readlink
37			readpipe recv redo ref rename require reset return reverse rewinddir
38			rindex rmdir s scalar seek seekdir select semctl semget semop send
39			setgrent sethostent setnetent setpgrp setpriority setprotoent setpwent
40			setservent setsockopt shift shmctl shmget shmread shmwrite shutdown
41			sin sleep socket socketpair sort splice split sprintf sqrt srand stat
42			study sub substr symlink syscall sysopen sysread sysseek system
43			syswrite tell telldir tie tied time times tr truncate uc ucfirst umask
44			undef unless unlink unpack unshift untie until use utime values vec
45			wait waitpid wantarray warn while write x xor y);
46
47
48			sub dump
49			{
50			local %seen;
51			local %refcnt;
52			local %require;
53			local @fixup;
54
55			my $name = "a";
56			my @dump;
57
58			for my $v (@_) {
59			my $val = _dump($v, $name, [], tied($v));
60			push(@dump, [$name, $val]);
61			} continue {
62			$name++;
63			}
64
65			my $out = "";
66			if (%require) {
67			for (sort keys %require) {
68			$out .= "require $_;\n";
69			}
70			}
71			if (%refcnt) {
72			# output all those with refcounts first
73			for (@dump) {
74			my $name = $_->[0];
75			if ($refcnt{$name}) {
76			$out .= "my \$$name = $_->[1];\n";
77			undef $_->[1];
78			}
79			}
80			for (@fixup) {
81			$out .= "$_;\n";
82			}
83			}
84
85			my $paren = (@dump != 1);
86			$out .= "(" if $paren;
87			$out .= format_list($paren, undef,
88			map {defined($_->[1]) ? $_->[1] : "\$".$_->[0]}
89			@dump
90			);
91			$out .= ")" if $paren;
92
93			if (%refcnt \|\| %require) {
94			$out .= ";\n";
95			$out =~ s/^/ /gm; # indent
96			$out = "do {\n$out}";
97			}
98
99			#use Data::Dumper; print Dumper(\%refcnt);
100			#use Data::Dumper; print Dumper(\%seen);
101
102			print STDERR "$out\n" unless defined wantarray;
103			$out;
104			}
105
106			*pp = \&dump;
107
108			sub dd {
109			print dump(@_), "\n";
110			}
111
112			sub ddx {
113			my(undef, $file, $line) = caller;
114			$file =~ s,.*[\\/],,;
115			my $out = "$file:$line: " . dump(@_) . "\n";
116			$out =~ s/^/# /gm;
117			print $out;
118			}
119
120			sub _dump
121			{
122			my $ref = ref $_[0];
123			my $rval = $ref ? $_[0] : \$_[0];
124			shift;
125
126			my($name, $idx, $dont_remember) = @_;
127
128			my($class, $type, $id);
129			if (overload::StrVal($rval) =~ /^(?:([^=]+)=)?([A-Z]+)\(0x([^\)]+)\)$/) {
130			$class = $1;
131			$type = $2;
132			$id = $3;
133			} else {
134			die "Can't parse " . overload::StrVal($rval);
135			}
136			if ($] < 5.008 && $type eq "SCALAR") {
137			$type = "REF" if $ref eq "REF";
138			}
139			warn "\$$name(@$idx) $class $type $id ($ref)" if $DEBUG;
140
141			unless ($dont_remember) {
142			if (my $s = $seen{$id}) {
143			my($sname, $sidx) = @$s;
144			$refcnt{$sname}++;
145			my $sref = fullname($sname, $sidx,
146			($ref && $type eq "SCALAR"));
147			warn "SEEN: [\$$name(@$idx)] => [\$$sname(@$sidx)] ($ref,$sref)" if $DEBUG;
148			return $sref unless $sname eq $name;
149			$refcnt{$name}++;
150			push(@fixup, fullname($name,$idx)." = $sref");
151			return "do{my \$fix}" if @$idx && $idx->[-1] eq '$';
152			return "'fix'";
153			}
154			$seen{$id} = [$name, $idx];
155			}
156
157			my $out;
158			if ($type eq "SCALAR" \|\| $type eq "REF" \|\| $type eq "REGEXP") {
159			if ($ref) {
160			if ($class && $class eq "Regexp") {
161			my $v = "$rval";
162
163			my $mod = "";
164			if ($v =~ /^\(\?([msix-]+):([\x00-\xFF]*)\)\z/) {
165			$mod = $1;
166			$v = $2;
167			$mod =~ s/-.*//;
168			}
169
170			my $sep = '/';
171			my $sep_count = ($v =~ tr/\///);
172			if ($sep_count) {
173			# see if we can find a better one
174			for ('\|', ',', ':', '#') {
175			my $c = eval "\$v =~ tr/\Q$_\E//";
176			#print "SEP $_ $c $sep_count\n";
177			if ($c < $sep_count) {
178			$sep = $_;
179			$sep_count = $c;
180			last if $sep_count == 0;
181			}
182			}
183			}
184			$v =~ s/\Q$sep\E/\\$sep/g;
185
186			$out = "qr$sep$v$sep$mod";
187			undef($class);
188			}
189			else {
190			delete $seen{$id} if $type eq "SCALAR"; # will be seen again shortly
191			my $val = _dump($$rval, $name, [@$idx, "\$"]);
192			$out = $class ? "do{\\(my \$o = $val)}" : "\\$val";
193			}
194			} else {
195			if (!defined $$rval) {
196			$out = "undef";
197			}
198			elsif ($$rval =~ /^-?[1-9]\d{0,9}$/ \|\| $$rval eq "0") {
199			$out = $$rval;
200			}
201			else {
202			$out = str($$rval);
203			}
204			if ($class && !@$idx) {
205			# Top is an object, not a reference to one as perl needs
206			$refcnt{$name}++;
207			my $obj = fullname($name, $idx);
208			my $cl = quote($class);
209			push(@fixup, "bless \\$obj, $cl");
210			}
211			}
212			}
213			elsif ($type eq "GLOB") {
214			if ($ref) {
215			delete $seen{$id};
216			my $val = _dump($$rval, $name, [@$idx, "*"]);
217			$out = "\\$val";
218			if ($out =~ /^\\\*Symbol::/) {
219			$require{Symbol}++;
220			$out = "Symbol::gensym()";
221			}
222			} else {
223			my $val = "$$rval";
224			$out = "$$rval";
225
226			for my $k (qw(SCALAR ARRAY HASH)) {
227			my $gval = *$$rval{$k};
228			next unless defined $gval;
229			next if $k eq "SCALAR" && ! defined $$gval; # always there
230			my $f = scalar @fixup;
231			push(@fixup, "RESERVED"); # overwritten after _dump() below
232			$gval = _dump($gval, $name, [@$idx, "*{$k}"]);
233			$refcnt{$name}++;
234			my $gname = fullname($name, $idx);
235			$fixup[$f] = "$gname = $gval"; #XXX indent $gval
236			}
237			}
238			}
239			elsif ($type eq "ARRAY") {
240			my @vals;
241			my $tied = tied_str(tied(@$rval));
242			my $i = 0;
243			for my $v (@$rval) {
244			push(@vals, _dump($v, $name, [@$idx, "[$i]"], $tied));
245			$i++;
246			}
247			$out = "[" . format_list(1, $tied, @vals) . "]";
248			}
249			elsif ($type eq "HASH") {
250			my(@keys, @vals);
251			my $tied = tied_str(tied(%$rval));
252
253			# statistics to determine variation in key lengths
254			my $kstat_max = 0;
255			my $kstat_sum = 0;
256			my $kstat_sum2 = 0;
257
258			my @orig_keys = keys %$rval;
259			my $text_keys = 0;
260			for (@orig_keys) {
261			$text_keys++, last unless /^[-+]?(?:0\|[1-9]\d*)(?:\.\d+)?\z/;
262			}
263
264			if ($text_keys) {
265			@orig_keys = sort @orig_keys;
266			}
267			else {
268			@orig_keys = sort { $a <=> $b } @orig_keys;
269			}
270
271			for my $key (@orig_keys) {
272	dpavlin	495	my $val = eval { \$rval->{$key} };
273	dpavlin	494	$key = quote($key) if $is_perl_keyword{$key} \|\|
274			!($key =~ /^[a-zA-Z_]\w{0,19}\z/ \|\|
275			$key =~ /^-?[1-9]\d{0,8}\z/
276			);
277
278			$kstat_max = length($key) if length($key) > $kstat_max;
279			$kstat_sum += length($key);
280			$kstat_sum2 += length($key)*length($key);
281
282			push(@keys, $key);
283			push(@vals, _dump($$val, $name, [@$idx, "{$key}"], $tied));
284			}
285			my $nl = "";
286			my $klen_pad = 0;
287			my $tmp = "@keys @vals";
288			if (length($tmp) > 60 \|\| $tmp =~ /\n/ \|\| $tied) {
289			$nl = "\n";
290
291			# Determine what padding to add
292			if ($kstat_max < 4) {
293			$klen_pad = $kstat_max;
294			}
295			elsif (@keys >= 2) {
296			my $n = @keys;
297			my $avg = $kstat_sum/$n;
298			my $stddev = sqrt(($kstat_sum2 - $n * $avg * $avg) / ($n - 1));
299
300			# I am not actually very happy with this heuristics
301			if ($stddev / $kstat_max < 0.25) {
302			$klen_pad = $kstat_max;
303			}
304			if ($DEBUG) {
305			push(@keys, "__S");
306			push(@vals, sprintf("%.2f (%d/%.1f/%.1f)",
307			$stddev / $kstat_max,
308			$kstat_max, $avg, $stddev));
309			}
310			}
311			}
312			$out = "{$nl";
313			$out .= " # $tied$nl" if $tied;
314			while (@keys) {
315			my $key = shift @keys;
316			my $val = shift @vals;
317			my $pad = " " x ($klen_pad + 6);
318			$val =~ s/\n/\n$pad/gm;
319			$key = " $key" . " " x ($klen_pad - length($key)) if $nl;
320			$out .= " $key => $val,$nl";
321			}
322			$out =~ s/,$/ / unless $nl;
323			$out .= "}";
324			}
325			elsif ($type eq "CODE") {
326			$out = 'sub { "???" }';
327			}
328			else {
329			warn "Can't handle $type data";
330			$out = "'#$type#'";
331			}
332
333			if ($class && $ref) {
334			$out = "bless($out, " . quote($class) . ")";
335			}
336			return $out;
337			}
338
339			sub tied_str {
340			my $tied = shift;
341			if ($tied) {
342			if (my $tied_ref = ref($tied)) {
343			$tied = "tied $tied_ref";
344			}
345			else {
346			$tied = "tied";
347			}
348			}
349			return $tied;
350			}
351
352			sub fullname
353			{
354			my($name, $idx, $ref) = @_;
355			substr($name, 0, 0) = "\$";
356
357			my @i = @$idx; # need copy in order to not modify @$idx
358			if ($ref && @i && $i[0] eq "\$") {
359			shift(@i); # remove one deref
360			$ref = 0;
361			}
362			while (@i && $i[0] eq "\$") {
363			shift @i;
364			$name = "\$$name";
365			}
366
367			my $last_was_index;
368			for my $i (@i) {
369			if ($i eq "*" \|\| $i eq "\$") {
370			$last_was_index = 0;
371			$name = "$i\{$name}";
372			} elsif ($i =~ s/^\*//) {
373			$name .= $i;
374			$last_was_index++;
375			} else {
376			$name .= "->" unless $last_was_index++;
377			$name .= $i;
378			}
379			}
380			$name = "\\$name" if $ref;
381			$name;
382			}
383
384			sub format_list
385			{
386			my $paren = shift;
387			my $comment = shift;
388			my $indent_lim = $paren ? 0 : 1;
389			my $tmp = "@_";
390			if ($comment \|\| (@_ > $indent_lim && (length($tmp) > 60 \|\| $tmp =~ /\n/))) {
391			my @elem = @_;
392			for (@elem) { s/^/ /gm; } # indent
393			return "\n" . ($comment ? " # $comment\n" : "") .
394			join(",\n", @elem, "");
395			} else {
396			return join(", ", @_);
397			}
398			}
399
400			sub str {
401			if (length($_[0]) > 20) {
402			for ($_[0]) {
403			# Check for repeated string
404			if (/^(.)\1\1\1/s) {
405			# seems to be a repating sequence, let's check if it really is
406			# without backtracking
407			unless (/[^\Q$1\E]/) {
408			my $base = quote($1);
409			my $repeat = length;
410			return "($base x $repeat)"
411			}
412			}
413			# Length protection because the RE engine will blow the stack [RT#33520]
414			if (length($_) < 16 * 1024 && /^(.{2,5}?)\1*\z/s) {
415			my $base = quote($1);
416			my $repeat = length($_)/length($1);
417			return "($base x $repeat)";
418			}
419			}
420			}
421
422			local $_ = &quote;
423
424			if (length($_) > 40 && !/\\x\{/ && length($_) > (length($_[0]) * 2)) {
425			# too much binary data, better to represent as a hex/base64 string
426
427			# Base64 is more compact than hex when string is longer than
428			# 17 bytes (not counting any require statement needed).
429			# But on the other hand, hex is much more readable.
430			if ($TRY_BASE64 && length($_[0]) > $TRY_BASE64 &&
431			eval { require MIME::Base64 })
432			{
433			$require{"MIME::Base64"}++;
434			return "MIME::Base64::decode(\"" .
435			MIME::Base64::encode($_[0],"") .
436			"\")";
437			}
438			return "pack(\"H\",\"" . unpack("H", $_[0]) . "\")";
439			}
440
441			return $_;
442			}
443
444			my %esc = (
445			"\a" => "\\a",
446			"\b" => "\\b",
447			"\t" => "\\t",
448			"\n" => "\\n",
449			"\f" => "\\f",
450			"\r" => "\\r",
451			"\e" => "\\e",
452			);
453
454			# put a string value in double quotes
455			sub quote {
456			local($_) = $_[0];
457			# If there are many '"' we might want to use qq() instead
458			s/([\\\"\@\$])/\\$1/g;
459			return qq("$_") unless /[^\040-\176]/; # fast exit
460
461			s/([\a\b\t\n\f\r\e])/$esc{$1}/g;
462
463			# no need for 3 digits in escape for these
464			s/([\0-\037])(?!\d)/sprintf('\\%o',ord($1))/eg;
465
466			s/([\0-\037\177-\377])/sprintf('\\x%02X',ord($1))/eg;
467			s/([^\040-\176])/sprintf('\\x{%X}',ord($1))/eg;
468
469			return qq("$_");
470			}
471
472			1;
473
474			__END__
475
476			=head1 NAME
477
478			Data::Dump - Pretty printing of data structures
479
480			=head1 SYNOPSIS
481
482			use Data::Dump qw(dump ddx);
483
484			$str = dump(@list);
485			@copy_of_list = eval $str;
486
487			# or use it for easy debug printout
488			ddx localtime;
489
490			=head1 DESCRIPTION
491
492			This module provide functions that takes a list of values as their
493			argument and produces a string as its result. The string contains
494			Perl code that, when C<eval>ed, produces a deep copy of the original
495			arguments.
496
497			The main feature of the module is that it strives to produce output
498			that is easy to read. Example:
499
500			@a = (1, [2, 3], {4 => 5});
501			dump(@a);
502
503			Produces:
504
505			(1, [2, 3], { 4 => 5 })
506
507			If you dump just a little data, it is output on a single line. If
508			you dump data that is more complex or there is a lot of it, line breaks
509			are automatically added to keep it easy to read.
510
511			The following functions are provided (only the dd* functions are exported by default):
512
513			=over
514
515			=item dump( ... )
516
517			=item pp( ... )
518
519			Returns a string containing a Perl expression. If you pass this
520			string to Perl's built-in eval() function it should return a copy of
521			the arguments you passed to dump().
522
523			If you call the function with multiple arguments then the output will
524			be wrapped in parenthesis "( ..., ... )". If you call the function with a
525			single argument the output will not have the wrapping. If you call the function with
526			a single scalar (non-reference) argument it will just return the
527			scalar quoted if needed, but never break it into multiple lines. If you
528			pass multiple arguments or references to arrays of hashes then the
529			return value might contain line breaks to format it for easier
530			reading. The returned string will never be "\n" terminated, even if
531			contains multiple lines. This allows code like this to place the
532			semicolon in the expected place:
533
534			print '$obj = ', dump($obj), ";\n";
535
536			If dump() is called in void context, then the dump is printed on
537			STDERR and then "\n" terminated. You might find this useful for quick
538			debug printouts, but the dd*() functions might be better alternatives
539			for this.
540
541			There is no difference between dump() and pp(), except that dump()
542			shares its name with a not-so-useful perl builtin. Because of this
543			some might want to avoid using that name.
544
545			=item quote( $string )
546
547			Returns a quoted version of the provided string.
548
549			It differs from C<dump($string)> in that it will quote even numbers and
550			not try to come up with clever expressions that might shorten the
551			output.
552
553			=item dd( ... )
554
555			=item ddx( ... )
556
557			These functions will call dump() on their argument and print the
558			result to STDOUT (actually, it's the currently selected output handle, but
559			STDOUT is the default for that).
560
561			The difference between them is only that ddx() will prefix the lines
562			it prints with "# " and mark the first line with the file and line
563			number where it was called. This is meant to be useful for debug
564			printouts of state within programs.
565
566			=back
567
568
569			=head1 LIMITATIONS
570
571			Code references will be displayed as simply 'sub { "???" }' when
572			dumped. Thus, C<eval>ing them will not reproduce the original routine.
573
574			If you forget to explicitly import the C<dump> function, your code will
575			core dump. That's because you just called the builtin C<dump> function
576			by accident, which intentionally dumps core. Because of this you can
577			also import the same function as C<pp>, mnemonic for "pretty-print".
578
579			=head1 HISTORY
580
581			The C<Data::Dump> module grew out of frustration with Sarathy's
582			in-most-cases-excellent C<Data::Dumper>. Basic ideas and some code
583			are shared with Sarathy's module.
584
585			The C<Data::Dump> module provides a much simpler interface than
586			C<Data::Dumper>. No OO interface is available and there are no
587			configuration options to worry about (yet :-). The other benefit is
588			that the dump produced does not try to set any variables. It only
589			returns what is needed to produce a copy of the arguments. This means
590			that C<dump("foo")> simply returns C<"foo">, and C<dump(1..5)> simply
591			returns C<(1, 2, 3, 4, 5)>.
592
593			=head1 SEE ALSO
594
595			L<Data::Dumper>, L<Storable>
596
597			=head1 AUTHORS
598
599			The C<Data::Dump> module is written by Gisle Aas <gisle@aas.no>, based
600			on C<Data::Dumper> by Gurusamy Sarathy <gsar@umich.edu>.
601
602			Copyright 1998-2000,2003-2004,2008 Gisle Aas.
603			Copyright 1996-1998 Gurusamy Sarathy.
604
605			This library is free software; you can redistribute it and/or
606			modify it under the same terms as Perl itself.
607
608			=cut