lib/WAIT/Table.pm

#                              -*- Mode: Cperl -*- 
# Table.pm -- 
# ITIID           : $ITI$ $Header $__Header$
# Author          : Ulrich Pfeifer
# Created On      : Thu Aug  8 13:05:10 1996
# Last Modified By: Ulrich Pfeifer
# Last Modified On: Wed Jan 23 14:15:15 2002
# Language        : CPerl
# Update Count    : 152
# Status          : Unknown, Use with caution!
#
# Copyright (c) 1996-1997, Ulrich Pfeifer
#

=head1 NAME

WAIT::Table -- Module for maintaining Tables / Relations

=head1 SYNOPSIS

  require WAIT::Table;

=head1 DESCRIPTION

=cut

package WAIT::Table;
our $VERSION = "2.000";

use WAIT::Table::Handle ();
require WAIT::Parse::Base;

use strict;
use Carp qw(cluck croak confess);
# use autouse Carp => qw( croak($) );
use BerkeleyDB;
use Fcntl;

=head2 Creating a Table.

The constructor WAIT::Table-E<gt>new is normally called via the
create_table method of a database handle. This is not enforced, but
creating a table does not make any sense unless the table is
registered by the database because the latter implements persistence
of the meta data. Registering is done automatically by letting the
database handle the creation of a table.

  my $db = WAIT::Database->create(name => 'sample');
  my $tb = $db->create_table(name     => 'test',
                             access   => $access,
                             layout   => $layout,
                             attr     => ['docid', 'headline'],
                            );

The constructor returns a handle for the table. This handle is hidden by the
table module, to prevent direct access if called via Table.

=over 10

=item C<access> => I<accessobj>

A reference to an access object for the external parts (attributes) of
tuples. As you may remember, the WAIT System does not enforce that
objects are completely stored inside the system to avoid duplication.
There is no (strong) point in storing all your HTML documents inside
the system when indexing your WWW-Server.

The access object is designed to work like as a tied hash. You pass
the refernce to the object, not the tied hash though. An example
implementation of an access class that works for manpages is
WAIT::Document::Nroff.

The implementation needs to take into account that WAIT will keep this
object in a Data::Dumper or Storable database and re-use it when sman
is run. So it is not good enough if we can produce the index with it
now, when we create or actively access the table, WAIT also must be
able to retrieve documents on its own, when we are in a different
context. This happens specifically in a retrieval. To get this working
seemlessly, the access-defining class must implement a close method.
This method will be called before the Data::Dumper dump takes place.
In that moment the access-defining class must get rid of all data
structures that cannot be reconstructed via the Data::Dumper dump,
such as database handles or C pointers.

=item C<path> => I<dir>

The path to database. Files for indexes will have I<path>
as prefix. I<Mandatory>, but usually taken care of by the
WAIT::Database handle when the constructor is called via
WAIT::Database::create_table().

=item C<name> => I<name>

The name of this table. I<Mandatory>

=item C<attr> => [ I<attr> ... ]

A reference to an array of attribute names. WAIT will keep the
contents of these attributes in its table. I<Mandatory>

=item C<djk> => [ I<attr> ... ]

A reference to an array of attribute names which make up the
I<disjointness key>. Don't think about it - it's of no use yet;

=item C<layout> => I<layoutobj>

A reference to an external parser object. Defaults to a new instance
of C<WAIT::Parse::Base>. For an example implementation see
WAIT::Parse::Nroff. A layout class can be implemented as a singleton
class if you so like.

=item C<keyset> => I<keyset>

The set of attributes needed to identify a record. Defaults to all
attributes.

=item C<invindex> => I<inverted index>

A reference to an anon array defining attributes of each record that
need to be indexed. See the source of smakewhatis for how to set this
up.

=back

=cut

sub new {
  my $type = shift;
  my %parm = @_;
  my $self = {};

  # Check for mandatory attrs early
  for my $x (qw(name attr env maindbfile tablename)) {
    $self->{$x}     = $parm{$x}     or confess "No $x specified";
  }

  # Do that before we eventually add '_weight' to attributes.
  $self->{keyset}   = $parm{keyset}   || [[@{$parm{attr}}]];

  $self->{mode}     = O_CREAT | O_RDWR;

  # Determine and set up subclass
  $type = ref($type) || $type;
  if (defined $parm{djk}) {
    if (@{$parm{djk}} == @{$parm{attr}}) {
      # All attributes in DK (sloppy test here!)
      $type .= '::Independent';
      require WAIT::Table::Independent;
    } else {
      $type .= '::Disjoint';
      require WAIT::Table::Disjoint;
    }
    # Add '_weight' to attributes
    my %attr;
    @attr{@{$parm{attr}}} = (1) x @{$parm{attr}};
    unshift @{$parm{attr}}, '_weight' unless $attr{'_weight'};
  }

  $self->{path}     = $parm{path}     or confess "No path specified";
  bless $self, $type;

  $self->{djk}      = $parm{djk}      if defined $parm{djk};
  $self->{layout}   = $parm{layout} || new WAIT::Parse::Base;
  $self->{access}   = $parm{access} if defined $parm{access};
  $self->{nextk}    = 1;        # next record to insert; first record unused
  $self->{deleted}  = {};       # no deleted records yet
  $self->{indexes}  = {};

  # Checking for readers is not necessary, but let's go with the
  # generic method.
  
  # Call create_index() and create_index() for compatibility
  for (@{$self->{keyset}||[]}) {
    #carp "Specification of indexes at table create time is deprecated";
    $self->create_index(@$_);
  }
  while (@{$parm{invindex}||[]}) {
    # carp "Specification of inverted indexes at table create time is deprecated";
    my $att  = shift @{$parm{invindex}};
    my @spec = @{shift @{$parm{invindex}}};
    my @opt  = ();

    if (ref($spec[0])) {
      warn "Secondary pipelines are deprecated";
      @opt = %{shift @spec};
    }
    $self->create_inverted_index(attribute => $att,
                                 pipeline  => \@spec,
                                 @opt);
  }

  $self;
  # end of backwarn compatibility stuff
}

for my $accessor (qw(maindbfile tablename)) {
  no strict 'refs';
  *{$accessor} = sub {
    my($self) = @_;
    return $self->{$accessor} if $self->{$accessor};
    require Carp;
    Carp::confess("accessor $accessor not there");
  }
}

=head2 Creating an index

  $tb->create_index('docid');

C<create_index>
must be called with a list of attributes. This must be a subset of the
attributes specified when the table was created. Currently this
method must be called before the first tuple is inserted in the
table!

=cut

sub create_index {
  my $self= shift;

  confess "Cannot create index for table aready populated"
    if $self->{nextk} > 1;

  require WAIT::Index;

  my $name = join '-', @_;
  #### warn "WARNING: Suspect use of \$_ in method create_index. name[$name]_[$_]";
  $self->{indexes}->{$name} =
    WAIT::Index->new(
                     path => $self->path.'/'.$name,
                     subname => $name,
                     env  => $self->{env},
                     maindbfile => $self->maindbfile,
                     tablename => $self->tablename,
                     attr => $_,
                    );
}

=head2 Creating an inverted index

  $tb->create_inverted_index
    (attribute => 'au',
     pipeline  => ['detex', 'isotr', 'isolc', 'split2', 'stop'],
     predicate => 'plain',
    );

=over 5

=item C<attribute>

The attribute to build the index on. This attribute may not be in the
set attributes specified when the table was created.

=item C<pipeline>

A piplines specification is a reference to an array of method names
(from package C<WAIT::Filter>) which are to be applied in sequence to
the contents of the named attribute. The attribute name may not be in
the attribute list.

=item C<predicate>

An indication which predicate the index implements. This may be
e.g. 'plain', 'stemming' or 'soundex'. The indicator will be used for
query processing. Currently there is no standard set of predicate
names. The predicate defaults to the last member of the pipeline if
omitted.

=back

Currently this method must be called before the first tuple is
inserted in the table!

=cut

sub create_inverted_index {
  my $self  = shift;
  my %parm  = @_;

  confess "No attribute specified" unless $parm{attribute};
  confess "No pipeline specified"  unless $parm{pipeline};

  $parm{predicate} ||= $parm{pipeline}->[-1];

  confess "Cannot create index for table aready populated"
    if $self->{nextk} > 1;

  require WAIT::InvertedIndex;

  # backward compatibility stuff
  my %opt = %parm;
  for (qw(attribute pipeline predicate)) {
    delete $opt{$_};
  }

  my $name = join '_', ($parm{attribute}, @{$parm{pipeline}});
  my $idx = WAIT::InvertedIndex->new(path   => $self->path.'/'.$name,
                                     subname=> $name,
                                     env    => $self->{env},
                                     maindbfile => $self->maindbfile,
                                     tablename => $self->tablename,
                                     filter => [@{$parm{pipeline}}], # clone
                                     name   => $name,
                                     attr   => $parm{attribute},
                                     %opt, # backward compatibility stuff
                                    );
  # We will have to use $parm{predicate} here
  push @{$self->{inverted}->{$parm{attribute}}}, $idx;
}

sub dir {
  $_[0]->path;
}

=head2 C<$tb-E<gt>layout>

Returns the reference to the associated parser object.

=cut

sub layout { $_[0]->{layout} }

=head2 C<$tb-E<gt>fields>

Returns the array of attribute names.

=cut


sub fields { keys %{$_[0]->{inverted}}}

=head2 C<$tb-E<gt>drop>

Must be called via C<WAIT::Database::drop_table>

=cut

sub drop {
  my $self = shift;

  if ((caller)[0] eq 'WAIT::Database') { # database knows about this
    $self->close;               # just make sure

#    my $path = $self->path;

    for (values %{$self->{indexes}}) {
      $_->drop;
    }
#    unlink "$path/records";
#    rmdir "$path/read" or warn "Could not rmdir '$path/read'";

  } else {
    confess ref($self)."::drop called directly";
  }
}

sub mrequire ($) {
  my $module = shift;

  $module =~ s{::}{/}g;
  $module .= '.pm';
  require $module;
}

sub path {
  my($self) = @_;
  return $self->{path} if $self->{path};
  require Data::Dumper; print STDERR "Line " . __LINE__ . ", File: " . __FILE__ . "\n" . Data::Dumper->new([$self],[qw(self)])->Indent(1)->Useqq(1)->Dump; # XXX
  require Carp;
  confess("NO path attr");
}

sub open {
  my $self = shift;
  my $path = $self->path . '/records';

  mrequire ref($self);           # that's tricky eh?
  if (defined $self->{'layout'}) {
    mrequire ref($self->{'layout'});
  }
  if (defined $self->{'access'}) {
    mrequire ref($self->{'access'});
  }
  if (exists $self->{indexes}) {
    require WAIT::Index;
    for my $Ind (values %{$self->{indexes}}) {
      for my $x (qw(mode env maindbfile)) {
        $Ind->{$x} = $self->{$x};
      }
    }
  }
  if (exists $self->{inverted}) {
    my ($att, $idx);
    for $att (keys %{$self->{inverted}}) {
      for $idx (@{$self->{inverted}->{$att}}) {
        for my $x (qw(mode env maindbfile)) {
          $idx->{$x} = $self->{$x};
        }
      }
    }
    require WAIT::InvertedIndex;
  }

  # CONFUSION: WAIT knows two *modes*: read-only or read-write.
  # BerkeleyDB means file permissions when talking about Mode.
  # BerkeleyDB has the "Flags" attribute to specify
  # read/write/lock/etc subsystems.

  my $flags;
  if ($self->{mode} & O_RDWR) {
    $flags = DB_CREATE; # | DB_INIT_MPOOL | DB_PRIVATE | DB_INIT_CDB;
    #warn "DEBUG: Flags on table $path set to 'writing'";
  } else {
    $flags = DB_RDONLY;
    #warn "DEBUG: Flags on table $path set to 'readonly'";
  }
  unless (defined $self->{dbh}) {
    my $subname = $self->tablename . "/records";
    $self->{dbh} =
        tie(%{$self->{db}}, 'BerkeleyDB::Btree',
            $self->{env} ? (Env => $self->{env}) : (),
            # Filename => $file,
            Filename => $self->maindbfile,
            Subname => $subname,
            Mode => 0664,
            Flags => $flags,
            $WAIT::Database::Cachesize?(Cachesize => $WAIT::Database::Cachesize):(),
            $WAIT::Database::Pagesize?(Pagesize => $WAIT::Database::Pagesize):(),
           )
            or confess "Cannot tie: $BerkeleyDB::Error\nDEBUG: Filename[$self->{maindbfile}]subname[$subname]Mode[0664]Flags[$flags]";
  }
  $self;
}

sub fetch_extern {
  my $self  = shift;

  # print "#@_", $self->{'access'}->{Mode}, "\n"; # DEBUGGING?
  if (exists $self->{'access'}) {
    mrequire ref($self->{'access'});
    $self->{'access'}->FETCH(@_);
  }
}

sub fetch_extern_by_id {
  my $self  = shift;

  $self->fetch_extern($self->fetch(@_));
}

sub _find_index {
  my $self  = shift;
  my (@att) = @_;
  my %att;
  my $name;

  @att{@att} = @att;

  KEY: for $name (keys %{$self->{indexes}}) {
      my @iat = split /-/, $name;
      for (@iat) {
        next KEY unless exists $att{$_};
      }
      return $self->{indexes}->{$name};
    }
  return undef;
}

sub have {
  my $self  = shift;
  my %parm  = @_;

  my $index = $self->_find_index(keys %parm) or return; # no index-no have

  defined $self->{db} or $self->open;
  return $index->have(@_);
}

sub insert {
  my $self  = shift;
  my %parm  = @_;

  defined $self->{db} or $self->open;

  # We should move all writing methods to a subclass to check only once
  $self->{mode} & O_RDWR or confess "Cannot insert into table opened in RD_ONLY mode";

  my $tuple = join($;, map($parm{$_} || '', @{$self->{attr}}));
  my $key;
  my @deleted = keys %{$self->{deleted}};
  my $gotkey = 0;

  if (@deleted) {
    $key = pop @deleted;
    delete $self->{deleted}->{$key};
    # Sanity check
    if ($key && $key>0) {
      $gotkey=1;
  } else {
      warn(sprintf("WAIT database inconsistency during insert ".
                   "key[%s]: Please rebuild index\n",
                   $key
                  ));
    }
  }
  unless ($gotkey) {
    $key = $self->{nextk}++;
  }
  $self->{db}->{$key} = $tuple;
  for (values %{$self->{indexes}}) {
    unless ($_->insert($key, %parm)) {
      # duplicate key, undo changes
      if ($key == $self->{nextk}-1) {
        $self->{nextk}--;
      } else {
        # warn "setting key[$key] deleted during insert";
        $self->{deleted}->{$key}=1;
      }
      my $idx;
      for $idx (values %{$self->{indexes}}) {
        last if $idx eq $_;
        $idx->remove($key, %parm);
      }
      return undef;
    }
  }
  if (defined $self->{inverted}) {
    my $att;
    for $att (keys %{$self->{inverted}}) {
      if (defined $parm{$att}) {
        map $_->insert($key, $parm{$att}), @{$self->{inverted}->{$att}};
        #map $_->sync, @{$self->{inverted}->{$att}}
      }
    }
  }
  $key
}

sub sync {
  my $self  = shift;

  for (values %{$self->{indexes}}) {
    map $_->sync, $_;
  }
  if (defined $self->{inverted}) {
    my $att;
    for $att (keys %{$self->{inverted}}) {
      map $_->sync, @{$self->{inverted}->{$att}}
    }
  }
}

sub fetch {
  my $self  = shift;
  my $key   = shift;

  return () if exists $self->{deleted}->{$key};

  defined $self->{db} or $self->open;
  $self->unpack($self->{db}->{$key});
}

sub delete_by_key {
  my $self  = shift;
  my $key   = shift;

  unless ($key) {
    cluck "Warning: delete_by_key called without key. Looks like a bug in WAIT?";
    return;
  }

  return $self->{deleted}->{$key} if defined $self->{deleted}->{$key};
  my %tuple = $self->fetch($key);
  for (values %{$self->{indexes}}) {
    $_->delete($key, %tuple);
  }
  if (defined $self->{inverted}) {
    # User *must* provide the full record for this or the entries
    # in the inverted index will not be removed
    %tuple = (%tuple, @_);
    my $att;
    for $att (keys %{$self->{inverted}}) {
      if (defined $tuple{$att}) {
        map $_->delete($key, $tuple{$att}), @{$self->{inverted}->{$att}}
      }
    }
  }
  # warn "setting key[$key] deleted during delete_by_key";
  ++$self->{deleted}->{$key};
}

sub delete {
  my $self  = shift;
  my $tkey = $self->have(@_);
  # warn "tkey[$tkey]\@_[@_]";
  defined $tkey && $self->delete_by_key($tkey, @_);
}

sub unpack {
  my($self, $tuple) = @_;

  unless (defined $tuple){
    # require Carp; # unfortunately gives us "bizarre copy...." :-(((((
    warn("Debug: somebody called unpack without argument tuple!");
    return;
  }

  my $att;
  my @result;
  my @tuple = split /$;/, $tuple;

  for $att (@{$self->{attr}}) {
    push @result, $att, shift @tuple;
  }
  @result;
}

sub set {
  my ($self, $iattr, $value) = @_;
  # in the rare case that they haven't written a single record yet, we
  # make sure, the inverted inherits our $self->{mode}:
  defined $self->{db} or $self->open;

  for my $att (keys %{$self->{inverted}}) {
    if ($] > 5.003) {         # avoid bug in perl up to 5.003_05
      my $idx;
      for $idx (@{$self->{inverted}->{$att}}) {
        $idx->set($iattr, $value);
      }
    } else {
      map $_->set($iattr, $value), @{$self->{inverted}->{$att}};
    }
  }

  1;
}

sub close {
  my $self = shift;

  #cluck("DEBUG: Closing A Table");

  if (exists $self->{'access'}) {
    eval {$self->{'access'}->close}; # dont bother if not opened
  }
  if ($WAIT::Index::VERSION) {
    for (values %{$self->{indexes}}) {
      $_->close();
    }
  }
  if (defined $self->{inverted} && $WAIT::InvertedIndex::VERSION) {
    # require WAIT::InvertedIndex; Uli: we can avoid closing indexes:
    # if WAIT::InvertedIndex has not been loaded, they cannot have
    # been altered so far
    my $att;
    for $att (keys %{$self->{inverted}}) {
      if ($] > 5.003) {         # avoid bug in perl up to 5.003_05
        my $idx;
        for $idx (@{$self->{inverted}->{$att}}) {
          $idx->close;
        }
      } else {
        map $_->close(), @{$self->{inverted}->{$att}};
      }
    }
  }
  if ($self->{dbh}) {
    delete $self->{dbh};
  }
  untie %{$self->{db}};
  for my $att (qw(env db path maindbfile)) {
    delete $self->{$att};
    #cluck "DEBUG: Deleted att $att";
  }

  1;
}

sub DESTROY {
  my $self = shift;

  delete $self->{env};

  # require Data::Dumper; print STDERR "Line " . __LINE__ . ", File: " . __FILE__ . "\n" . Data::Dumper->new([$self],[qw(self)])->Indent(1)->Useqq(1)->Dump; # XXX

}

sub open_scan {
  my $self = shift;
  my $code = shift;

  $self->{dbh} or $self->open;
  require WAIT::Scan;
  new WAIT::Scan $self, $self->{nextk}-1, $code;
}

sub open_index_scan {
  my $self = shift;
  my $attr = shift;
  my $code = shift;
  my $name = join '-', @$attr;

  if (defined $self->{indexes}->{$name}) {
    $self->{indexes}->{$name}->open_scan($code);
  } else {
    confess "No such index '$name'";
  }
}

eval {sub WAIT::Query::Raw::new} unless defined \&WAIT::Query::Raw::new;

sub prefix {
  my ($self , $attr, $prefix) = @_;
  my %result;

  defined $self->{db} or $self->open; # require layout

  for (@{$self->{inverted}->{$attr}}) {
    my $result = $_->prefix($prefix);
    if (defined $result) {
      $result{$_->name} = $result;
    }
  }
  bless \%result, 'WAIT::Query::Raw';
}

sub intervall {
  my ($self, $attr, $lb, $ub) = @_;
  my %result;

  defined $self->{db} or $self->open; # require layout

  for (@{$self->{inverted}->{$attr}}) {
    my $result = $_->intervall($lb, $ub);
    if (defined $result) {
      $result{$_->name} = $result;
    }
  }
  bless \%result, 'WAIT::Query::Raw';
}

sub search_ref {
  my $self  = shift;
  my ($query, $attr, $cont, $raw);
  if (ref $_[0]) {
    $query = shift;
    # require Data::Dumper; print STDERR "Line " . __LINE__ . ", File: " . __FILE__ . "\n" . Data::Dumper->new([$query],[qw()])->Indent(1)->Useqq(1)->Dump; # XXX

    $attr = $query->{attr};
    $cont = $query->{cont};
    $raw  = $query->{raw};
  } else {
    cluck("Using three argument search interface is deprecated, use hashref interface instead");
    $attr = shift;
    $cont = shift;
    $raw  = shift;
    $query = {
              attr => $attr,
              cont => $cont,
              raw  => $raw,
             };
  }

  my %result;

  defined $self->{db} or $self->open; # require layout

  if ($raw) {
    for (@{$self->{inverted}->{$attr}}) {
      my $name = $_->name;
      if (exists $raw->{$name} and @{$raw->{$name}}) {
        my $scale = 1/scalar(@{$raw->{$name}});
        my %r = $_->search_raw($query, @{$raw->{$name}});
        my ($key, $val);
        while (($key, $val) = each %r) {
          if (exists $result{$key}) {
            $result{$key} += $val*$scale;
          } else {
            $result{$key}  = $val*$scale;
          }
        }
      }
    }
  }
  if (defined $cont and $cont ne '') {
    for (@{$self->{inverted}->{$attr}}) {
      my $r = $_->search_ref($query, $cont);
      my ($key, $val);
      while (($key, $val) = each %$r) {
        if (exists $result{$key}) {
          $result{$key} += $val;
        } else {
          $result{$key}  = $val;
        }
      }
    }
  }
  # sanity check for deleted documents.
  # this should not be necessary !@#$
  for (keys %result) {
    delete $result{$_} if $self->{deleted}->{$_}
  }
  \%result;
}

sub parse_query {
  my($self, $attr, $query) = @_;
  return unless defined $query && length $query;
  my %qt;
  for (@{$self->{inverted}->{$attr}}) {
    grep $qt{$_}++, $_->parse($query);
  }
  [keys %qt];
}

sub hilight_positions {
  my ($self, $attr, $text, $query, $raw)  = @_;
  my %pos;

  if (defined $raw) {
    for (@{$self->{inverted}->{$attr}}) { # objects of type
                                          # WAIT::InvertedIndex for
                                          # this index field $attr
      my $name = $_->name;
      if (exists $raw->{$name}) {
        my %qt;
        grep $qt{$_}++, @{$raw->{$name}};
        for ($_->parse_pos($text)) {
          if (exists $qt{$_->[0]}) {
            $pos{$_->[1]} = max($pos{$_->[1]}, length($_->[0]));
          }
        }
      }
    }
  }
  if (defined $query) {
    for (@{$self->{inverted}->{$attr}}) {
      my %qt;

      grep $qt{$_}++, $_->parse($query);
      for ($_->parse_pos($text)) {
        if (exists $qt{$_->[0]}) {
          if (exists $pos{$_->[1]}) { # perl -w ;-)
            $pos{$_->[1]} = max($pos{$_->[1]}, length($_->[0]));
          } else {
            $pos{$_->[1]} = length($_->[0]);
          }
        }
      }
    }
  }

  \%pos;
}

sub hilight {
  my ($tb, $buf, $qplain, $qraw) = @_;
  my $layout = $tb->layout();

  my @result;

  $qplain ||= {};
  $qraw   ||= {};
  my @ttxt = $layout->tag($buf);
  while (@ttxt) {
    no strict 'refs';
    my %tag = %{shift @ttxt};
    my $txt = shift @ttxt;
    my $fld;

    my %hl;
    for $fld (grep defined $tag{$_}, keys %$qplain, keys %$qraw) {
      my $hp = $tb->hilight_positions($fld, $txt,
                                      $qplain->{$fld}, $qraw->{$fld});
      for (keys %$hp) {
        if (exists $hl{$_}) {   # -w ;-(
          $hl{$_} = max($hl{$_}, $hp->{$_});
        } else {
          $hl{$_} = $hp->{$_};
        }
      }
    }
    my $pos;
    my $qt = {_qt => 1, %tag};
    my $pl = \%tag;
    my $last = length($txt);
    my @tmp;
    for $pos (sort {$b <=> $a} keys %hl) {
      unshift @tmp, $pl, substr($txt,$pos+$hl{$pos},$last-$pos-$hl{$pos});
      unshift @tmp, $qt, substr($txt,$pos,$hl{$pos});
      $last = $pos;
    }
    push @result, $pl, substr($txt,0,$last);
    push @result, @tmp;
  }
  @result;                      # no speed necessary
}

1;