--- trunk2/lib/WebPAC.pm	2004/06/17 12:27:02	368
+++ trunk2/lib/WebPAC.pm	2004/09/21 19:08:11	459
@@ -7,9 +7,17 @@
 use Text::Iconv;
 use Config::IniFiles;
 use XML::Simple;
+use Template;
+use Log::Log4perl qw(get_logger :levels);
+use Time::HiRes qw(time);
 
 use Data::Dumper;
 
+#my $LOOKUP_REGEX = '\[[^\[\]]+\]';
+#my $LOOKUP_REGEX_SAVE = '\[([^\[\]]+)\]';
+my $LOOKUP_REGEX = 'lookup{[^\{\}]+}';
+my $LOOKUP_REGEX_SAVE = 'lookup{([^\{\}]+)}';
+
 =head1 NAME
 
 WebPAC - base class for WebPAC
@@ -22,16 +30,19 @@
 
 =head2 new
 
-This will create new instance of WebPAC using configuration specified by C<config_file>.
+Create new instance of WebPAC using configuration specified by C<config_file>.
 
  my $webpac = new WebPAC(
  	config_file => 'name.conf',
-	[code_page => 'ISO-8859-2',]
+	code_page => 'ISO-8859-2',
+	low_mem => 1,
  );
 
 Default C<code_page> is C<ISO-8859-2>.
 
-It will also read configuration files
+Default is not to use C<low_mem> options (see L<MEMORY USAGE> below).
+
+This method will also read configuration files
 C<global.conf> (used by indexer and Web font-end)
 and configuration file specified by C<config_file>
 which describes databases to be indexed.
@@ -52,6 +63,13 @@
         my $self = {@_};
         bless($self, $class);
 
+	$self->{'start_t'} = time();
+
+	my $log_file = $self->{'log'} || "log.conf";
+	Log::Log4perl->init($log_file);
+
+	my $log = $self->_get_logger();
+
 	# fill in default values
 	# output codepage
 	$self->{'code_page'} = 'ISO-8859-2' if (! $self->{'code_page'});
@@ -59,8 +77,9 @@
 	#
 	# read global.conf
 	#
+	$log->debug("read 'global.conf'");
 
-	$self->{global_config_file} = new Config::IniFiles( -file => 'global.conf' ) || croak "can't open 'global.conf'";
+	my $config = new Config::IniFiles( -file => 'global.conf' ) || $log->logcroak("can't open 'global.conf'");
 
 	# read global config parametars
 	foreach my $var (qw(
@@ -70,17 +89,55 @@
 			dbi_passwd
 			show_progress
 			my_unac_filter
+			output_template
 		)) {
-		$self->{global_config}->{$var} = $self->{global_config_file}->val('global', $var);
+		$self->{'global_config'}->{$var} = $config->val('global', $var);
 	}
 
 	#
 	# read indexer config file
 	#
 
-	$self->{indexer_config_file} = new Config::IniFiles( -file => $self->{config_file} ) || croak "can't open '$self->{config_file}'";
+	$self->{indexer_config_file} = new Config::IniFiles( -file => $self->{config_file} ) || $log->logcroak("can't open '",$self->{config_file},"'");
 
+	# create UTF-8 convertor for import_xml files
 	$self->{'utf2cp'} = Text::Iconv->new('UTF-8' ,$self->{'code_page'});
+
+	# create Template toolkit instance
+	$self->{'tt'} = Template->new(
+		INCLUDE_PATH => ($self->{'global_config_file'}->{'output_template'} || './output_template'),
+#		FILTERS => {
+#			'foo' => \&foo_filter,
+#		},
+		EVAL_PERL => 1,
+	);
+
+	# running with low_mem flag? well, use DBM::Deep then.
+	if ($self->{'low_mem'}) {
+		$log->info("running with low_mem which impacts performance (<32 Mb memory usage)");
+
+		my $db_file = "data.db";
+
+		if (-e $db_file) {
+			unlink $db_file or $log->logdie("can't remove '$db_file' from last run");
+			$log->debug("removed '$db_file' from last run");
+		}
+
+		require DBM::Deep;
+
+		my $db = new DBM::Deep $db_file;
+
+		$log->logdie("DBM::Deep error: $!") unless ($db);
+
+		if ($db->error()) {
+			$log->logdie("can't open '$db_file' under low_mem: ",$db->error());
+		} else {
+			$log->debug("using file '$db_file' for DBM::Deep");
+		}
+
+		$self->{'db'} = $db;
+	}
+
 	return $self;
 }
 
@@ -91,12 +148,16 @@
  $webpac->open_isis(
  	filename => '/data/ISIS/ISIS',
 	code_page => '852',
-	limit_mfn => '500',
+	limit_mfn => 500,
+	start_mfn => 6000,
 	lookup => [ ... ],
  );
 
 By default, ISIS code page is assumed to be C<852>.
 
+If optional parametar C<start_mfn> is set, this will be first MFN to read
+from database (so you can skip beginning of your database if you need to).
+
 If optional parametar C<limit_mfn> is set, it will read just 500 records
 from database in example above.
 
@@ -119,9 +180,17 @@
 	my $self = shift;
 	my $arg = {@_};
 
-	croak "need filename" if (! $arg->{'filename'});
+	my $log = $self->_get_logger();
+
+	$log->logcroak("need filename") if (! $arg->{'filename'});
 	my $code_page = $arg->{'code_page'} || '852';
 
+	$log->logdie("can't find database ",$arg->{'filename'}) unless (glob($arg->{'filename'}.'.*'));
+
+	# store data in object
+	$self->{'isis_filename'} = $arg->{'filename'};
+	$self->{'isis_code_page'} = $code_page;
+
 	use OpenIsis;
 
 	#$self->{'isis_code_page'} = $code_page;
@@ -129,18 +198,32 @@
 	# create Text::Iconv object
 	my $cp = Text::Iconv->new($code_page,$self->{'code_page'});
 
-	print STDERR "reading ISIS database '",$arg->{'filename'},"'\n" if ($self->{'debug'});
+	$log->info("reading ISIS database '",$arg->{'filename'},"'");
+	$log->debug("isis code page: $code_page");
 
 	my $isis_db = OpenIsis::open($arg->{'filename'});
 
 	my $maxmfn = OpenIsis::maxRowid( $isis_db ) || 1;
+	my $startmfn = 1;
 
-	$maxmfn = $self->{limit_mfn} if ($self->{limit_mfn});
+	if (my $s = $self->{'start_mfn'}) {
+		$log->info("skipping to MFN $s");
+		$startmfn = $s;
+	} else {
+		$self->{'start_mfn'} = $startmfn;
+	}
+
+	$maxmfn = $startmfn + $self->{limit_mfn} if ($self->{limit_mfn});
 
-	print STDERR "processing $maxmfn records...\n" if ($self->{'debug'});
+	$log->info("processing ",($maxmfn-$startmfn)." records...");
 
 	# read database
-	for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) {
+	for (my $mfn = $startmfn; $mfn <= $maxmfn; $mfn++) {
+
+
+		$log->debug("mfn: $mfn\n");
+
+		my $rec;
 
 		# read record
 		my $row = OpenIsis::read( $isis_db, $mfn );
@@ -159,19 +242,34 @@
 						$val = $l;
 					}
 
-					push @{$self->{'data'}->{$mfn}->{$k}}, $val;
+					push @{$rec->{$k}}, $val;
 				}
+			} else {
+				push @{$rec->{'000'}}, $mfn;
 			}
 
 		}
 
+		$log->confess("record $mfn empty?") unless ($rec);
+
+		# store
+		if ($self->{'low_mem'}) {
+			$self->{'db'}->put($mfn, $rec);
+		} else {
+			$self->{'data'}->{$mfn} = $rec;
+		}
+
 		# create lookup
-		my $rec = $self->{'data'}->{$mfn};
 		$self->create_lookup($rec, @{$arg->{'lookup'}});
 
+		$self->progress_bar($mfn,$maxmfn);
+
 	}
 
-	$self->{'current_mfn'} = 1;
+	$self->{'current_mfn'} = -1;
+	$self->{'last_pcnt'} = 0;
+
+	$log->debug("max mfn: $maxmfn");
 
 	# store max mfn and return it.
 	return $self->{'max_mfn'} = $maxmfn;
@@ -189,14 +287,117 @@
 sub fetch_rec {
 	my $self = shift;
 
-	my $mfn = $self->{'current_mfn'}++ || confess "it seems that you didn't load database!";
+	my $log = $self->_get_logger();
+
+	$log->logconfess("it seems that you didn't load database!") unless ($self->{'current_mfn'});
+
+	if ($self->{'current_mfn'} == -1) {
+		$self->{'current_mfn'} = $self->{'start_mfn'};
+	} else {
+		$self->{'current_mfn'}++;
+	}
+
+	my $mfn = $self->{'current_mfn'};
 
 	if ($mfn > $self->{'max_mfn'}) {
 		$self->{'current_mfn'} = $self->{'max_mfn'};
+		$log->debug("at EOF");
 		return;
 	}
 
-	return $self->{'data'}->{$mfn};
+	$self->progress_bar($mfn,$self->{'max_mfn'});
+
+	if ($self->{'low_mem'}) {
+		return $self->{'db'}->get($mfn);
+	} else {
+		return $self->{'data'}->{$mfn};
+	}
+}
+
+=head2 mfn
+
+Returns current record number (MFN).
+
+ print $webpac->mfn;
+
+=cut
+
+sub mfn {
+	my $self = shift;
+	return $self->{'current_mfn'};
+}
+
+=head2 progress_bar
+
+Draw progress bar on STDERR.
+
+ $webpac->progress_bar($current, $max);
+
+=cut
+
+sub progress_bar {
+	my $self = shift;
+
+	my ($curr,$max) = @_;
+
+	my $log = $self->_get_logger();
+
+	$log->logconfess("no current value!") if (! $curr);
+	$log->logconfess("no maximum value!") if (! $max);
+
+	if ($curr > $max) {
+		$max = $curr;
+		$log->debug("overflow to $curr");
+	}
+
+	$self->{'last_pcnt'} ||= 1;
+
+	my $p = int($curr * 100 / $max) || 1;
+
+	# reset on re-run
+	if ($p < $self->{'last_pcnt'}) {
+		$self->{'last_pcnt'} = $p;
+		$self->{'last_t'} = time();
+		$self->{'last_curr'} = undef;
+	}
+
+	$self->{'last_t'} ||= time();
+
+	if ($p != $self->{'last_pcnt'}) {
+
+		my $last_curr = $self->{'last_curr'} || $curr;
+		my $t = time();
+		my $rate = ($curr - $last_curr) / (($t - $self->{'last_t'} || 1));
+		my $eta = ($max-$curr) / ($rate || 1);
+		printf STDERR ("%5d [%-38s] %-5d %0.1f/s %s\r",$curr,"=" x ($p/3)."$p%>", $max, $rate, $self->fmt_time($eta));
+		$self->{'last_pcnt'} = $p;
+		$self->{'last_t'} = time();
+		$self->{'last_curr'} = $curr;
+	}
+	print STDERR "\n" if ($p == 100);
+}
+
+=head2 fmt_time
+
+Format time (in seconds) for display.
+
+ print $webpac->fmt_time(time());
+
+This method is called by L<progress_bar> to display remaining time.
+
+=cut
+
+sub fmt_time {
+	my $self = shift;
+
+	my $t = shift || 0;
+	my $out = "";
+
+	my ($ss,$mm,$hh) = gmtime($t);
+	$out .= "${hh}h" if ($hh);
+	$out .= sprintf("%02d:%02d", $mm,$ss);
+	$out .= "  " if ($hh == 0);
+	return $out;
 }
 
 =head2 open_import_xml
@@ -210,8 +411,10 @@
 sub open_import_xml {
 	my $self = shift;
 
+	my $log = $self->_get_logger();
+
 	my $arg = {@_};
-	confess "need type to load file from import_xml/" if (! $arg->{'type'});
+	$log->logconfess("need type to load file from import_xml/") if (! $arg->{'type'});
 
 	$self->{'type'} = $arg->{'type'};
 
@@ -220,19 +423,20 @@
 
 	$self->{'tag'} = $type2tag{$type_base};
 
-	print STDERR "using type ",$self->{'type'}," tag ",$self->{'tag'},"\n" if ($self->{'debug'});
+	$log->info("using type '",$self->{'type'},"' tag <",$self->{'tag'},">");
 
 	my $f = "./import_xml/".$self->{'type'}.".xml";
-	confess "import_xml file '$f' doesn't exist!" if (! -e "$f");
+	$log->logconfess("import_xml file '$f' doesn't exist!") if (! -e "$f");
+
+	$log->info("reading '$f'");
 
-	print STDERR "reading '$f'\n" if ($self->{'debug'});
+	$self->{'import_xml_file'} = $f;
 
 	$self->{'import_xml'} = XMLin($f,
 		ForceArray => [ $self->{'tag'}, 'config', 'format' ],
-		ForceContent => 1
 	);
 
-	print Dumper($self->{'import_xml'});
+	$log->debug("import xml is ",sub { Dumper($self->{'import_xml'}) });
 
 }
 
@@ -249,23 +453,29 @@
 sub create_lookup {
 	my $self = shift;
 
-	my $rec = shift || confess "need record to create lookup";
-	confess("need HASH as first argument!") if ($rec !~ /HASH/o);
+	my $log = $self->_get_logger();
+
+	my $rec = shift || $log->logconfess("need record to create lookup");
+	$log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
 
 	foreach my $i (@_) {
-		if ($i->{'eval'}) {
-			my $eval = $self->fill_in($rec,$i->{'eval'});
-			my $key = $self->fill_in($rec,$i->{'key'});
-			my @val = $self->fill_in($rec,$i->{'val'});
-			if ($key && @val && eval $eval) {
+		$log->logconfess("need key") unless defined($i->{'key'});
+		$log->logconfess("need val") unless defined($i->{'val'});
+
+		if (defined($i->{'eval'})) {
+			# eval first, so we can skip fill_in for key and val
+			my $eval = $self->fill_in($rec,$i->{'eval'}) || next;
+			if ($self->_eval($eval)) {
+				my $key = $self->fill_in($rec,$i->{'key'}) || next;
+				my @val = $self->fill_in($rec,$i->{'val'}) || next;
+				$log->debug("stored $key = ",sub { join(" | ",@val) });
 				push @{$self->{'lookup'}->{$key}}, @val;
 			}
 		} else {
-			my $key = $self->fill_in($rec,$i->{'key'});
-			my @val = $self->fill_in($rec,$i->{'val'});
-			if ($key && @val) {
-				push @{$self->{'lookup'}->{$key}}, @val;
-			}
+			my $key = $self->fill_in($rec,$i->{'key'}) || next;
+			my @val = $self->fill_in($rec,$i->{'val'}) || next;
+			$log->debug("stored $key = ",sub { join(" | ",@val) });
+			push @{$self->{'lookup'}->{$key}}, @val;
 		}
 	}
 }
@@ -296,6 +506,7 @@
 
 	if ($$rec->{$f}) {
 		return '' if (! $$rec->{$f}->[$i]);
+		no strict 'refs';
 		if ($sf && $$rec->{$f}->[$i]->{$sf}) {
 			$$found++ if (defined($$found));
 			return $$rec->{$f}->[$i]->{$sf};
@@ -336,18 +547,27 @@
 This function B<does not> perform parsing of format to inteligenty skip
 delimiters before fields which aren't used.
 
+This method will automatically decode UTF-8 string to local code page
+if needed.
+
 =cut
 
 sub fill_in {
 	my $self = shift;
 
-	my $rec = shift || confess "need data record";
-	my $format = shift || confess "need format to parse";
+	my $log = $self->_get_logger();
+
+	my $rec = shift || $log->logconfess("need data record");
+	my $format = shift || $log->logconfess("need format to parse");
 	# iteration (for repeatable fields)
 	my $i = shift || 0;
 
 	# FIXME remove for speedup?
-	confess("need HASH as first argument!") if ($rec !~ /HASH/o);
+	$log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
+
+	if (utf8::is_utf8($format)) {
+		$format = $self->_x($format);
+	}
 
 	my $found = 0;
 
@@ -356,15 +576,17 @@
 	$eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);
 
 	# do actual replacement of placeholders
-	$format =~ s/v(\d+)(?:\^(\w))*/$self->get_data(\$rec,$1,$2,$i,\$found)/ges;
+	$format =~ s/v(\d+)(?:\^(\w))?/$self->get_data(\$rec,$1,$2,$i,\$found)/ges;
 
 	if ($found) {
+		$log->debug("format: $format");
 		if ($eval_code) {
 			my $eval = $self->fill_in($rec,$eval_code,$i);
-			return if (! eval $eval);
+			return if (! $self->_eval($eval));
 		}
 		# do we have lookups?
-		if ($format =~ /\[[^\[\]]+\]/o) {
+		if ($format =~ /$LOOKUP_REGEX/o) {
+			$log->debug("format '$format' has lookup");
 			return $self->lookup($format);
 		} else {
 			return $format;
@@ -387,18 +609,23 @@
 sub lookup {
 	my $self = shift;
 
-	my $tmp = shift || confess "need format";
+	my $log = $self->_get_logger();
+
+	my $tmp = shift || $log->logconfess("need format");
 
-	if ($tmp =~ /\[[^\[\]]+\]/o) {
+	if ($tmp =~ /$LOOKUP_REGEX/o) {
 		my @in = ( $tmp );
+
+		$log->debug("lookup for: ",$tmp);
+
 		my @out;
 		while (my $f = shift @in) {
-			if ($f =~ /\[([^\[\]]+)\]/) {
+			if ($f =~ /$LOOKUP_REGEX_SAVE/o) {
 				my $k = $1;
 				if ($self->{'lookup'}->{$k}) {
 					foreach my $nv (@{$self->{'lookup'}->{$k}}) {
 						my $tmp2 = $f;
-						$tmp2 =~ s/\[$k\]/$nv/g;
+						$tmp2 =~ s/lookup{$k}/$nv/g;
 						push @in, $tmp2;
 					}
 				} else {
@@ -408,6 +635,7 @@
 				push @out, $f;
 			}
 		}
+		$log->logconfess("return is array and it's not expected!") unless wantarray;
 		return @out;
 	} else {
 		return $tmp;
@@ -431,15 +659,19 @@
 
 	return if (! $format_utf8);
 
-	confess("need HASH as first argument!") if ($rec !~ /HASH/o);
-	confess("need utf2cp Text::Iconv object!") if (! $self->{'utf2cp'});
+	my $log = $self->_get_logger();
+
+	$log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
+	$log->logconfess("need utf2cp Text::Iconv object!") if (! $self->{'utf2cp'});
 
 	$i = 0 if (! $i);
 
-	my $format = $self->{'utf2cp'}->convert($format_utf8) || confess("can't convert '$format_utf8' from UTF-8 to ",$self->{'code_page'});
+	my $format = $self->_x($format_utf8) || $log->logconfess("can't convert '$format_utf8' from UTF-8 to ",$self->{'code_page'});
 
 	my @out;
 
+	$log->debug("format: $format");
+
 	my $eval_code;
 	# remove eval{...} from beginning
 	$eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);
@@ -447,7 +679,7 @@
 	my $prefix;
 	my $all_found=0;
 
-	while ($format =~ s/^(.*?)v(\d+)(?:\^(\w))*//s) {
+	while ($format =~ s/^(.*?)v(\d+)(?:\^(\w))?//s) {
 
 		my $del = $1 || '';
 		$prefix ||= $del if ($all_found == 0);
@@ -464,14 +696,22 @@
 
 	return if (! $all_found);
 
-	my $out = join('',@out) . $format;
+	my $out = join('',@out);
+
+	if ($out) {
+		# add rest of format (suffix)
+		$out .= $format;
+
+		# add prefix if not there
+		$out = $prefix . $out if ($out !~ m/^\Q$prefix\E/);
 
-	# add prefix if not there
-	$out = $prefix . $out if ($out !~ m/^\Q$prefix\E/);
+		$log->debug("result: $out");
+	}
 
 	if ($eval_code) {
 		my $eval = $self->fill_in($rec,$eval_code,$i);
-		return if (! eval $eval);
+		$log->debug("about to eval{",$eval,"} format: $out");
+		return if (! $self->_eval($eval));
 	}
 
 	return $out;
@@ -490,7 +730,9 @@
 
 	my ($rec, $format_utf8) = @_;
 
-	confess("need HASH as first argument!") if ($rec !~ /HASH/o);
+	my $log = $self->_get_logger();
+
+	$log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
 	return if (! $format_utf8);
 
 	my $i = 0;
@@ -500,36 +742,92 @@
 		push @arr, $v;
 	}
 
+	$log->debug("format '$format_utf8' returned ",--$i," elements: ", sub { join(" | ",@arr) }) if (@arr);
+
 	return @arr;
 }
 
-=head2 data_structure
+=head2 fill_in_to_arr
 
-Create in-memory data structure which represents layout from C<import_xml>.
-It is used later to produce output.
+Similar to C<fill_in>, but returns array of all repeatable fields. Usable
+for fields which have lookups, so they shouldn't be parsed but rather
+C<fill_id>ed.
 
- my @ds = $webpac->data_structure($rec);
+ my @arr = $webpac->fill_in_to_arr($rec,'[v900];;[v250^a]');
 
 =cut
 
-# private method _sort_by_order
-# sort subrouting using order="" attribute
-sub _sort_by_order {
+sub fill_in_to_arr {
 	my $self = shift;
 
-	my $va = $self->{'import_xml'}->{'indexer'}->{$a}->{'order'} ||
-		$self->{'import_xml'}->{'indexer'}->{$a};
-	my $vb = $self->{'import_xml'}->{'indexer'}->{$b}->{'order'} ||
-		$self->{'import_xml'}->{'indexer'}->{$b};
+	my ($rec, $format_utf8) = @_;
 
-	return $va <=> $vb;
+	my $log = $self->_get_logger();
+
+	$log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
+	return if (! $format_utf8);
+
+	my $i = 0;
+	my @arr;
+
+	while (my @v = $self->fill_in($rec,$format_utf8,$i++)) {
+		push @arr, @v;
+	}
+
+	$log->debug("format '$format_utf8' returned ",--$i," elements: ", sub { join(" | ",@arr) }) if (@arr);
+
+	return @arr;
 }
 
+=head2 sort_arr
+
+Sort array ignoring case and html in data
+
+ my @sorted = $webpac->sort_arr(@unsorted);
+
+=cut
+
+sub sort_arr {
+	my $self = shift;
+
+	my $log = $self->_get_logger();
+
+	# FIXME add Schwartzian Transformation?
+
+	my @sorted = sort {
+		$a =~ s#<[^>]+/*>##;
+		$b =~ s#<[^>]+/*>##;
+		lc($b) cmp lc($a)
+	} @_;
+	$log->debug("sorted values: ",sub { join(", ",@sorted) });
+
+	return @sorted;
+}
+
+
+=head2 data_structure
+
+Create in-memory data structure which represents layout from C<import_xml>.
+It is used later to produce output.
+
+ my @ds = $webpac->data_structure($rec);
+
+This method will also set C<$webpac->{'currnet_filename'}> if there is
+<filename> tag in C<import_xml> and C<$webpac->{'headline'}> if there is
+<headline> tag.
+
+=cut
+
 sub data_structure {
 	my $self = shift;
 
+	my $log = $self->_get_logger();
+
 	my $rec = shift;
-	confess("need HASH as first argument!") if ($rec !~ /HASH/o);
+	$log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
+
+	undef $self->{'currnet_filename'};
+	undef $self->{'headline'};
 
 	my @sorted_tags;
 	if ($self->{tags_by_order}) {
@@ -541,6 +839,8 @@
 
 	my @ds;
 
+	$log->debug("tags: ",sub { join(", ",@sorted_tags) });
+
 	foreach my $field (@sorted_tags) {
 
 		my $row;
@@ -548,28 +848,329 @@
 #print "field $field [",$self->{'tag'},"] = ",Dumper($self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}});
 
 		foreach my $tag (@{$self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}}}) {
-			my @v = $self->parse_to_arr($rec,$tag->{'content'});
+			my $format = $tag->{'value'} || $tag->{'content'};
 
-			next if (! @v);
+			$log->debug("format: $format");
 
-			# does tag have type?
-			if ($tag->{'type'}) {
-				push @{$row->{$tag->{'type'}}}, @v;
+			my @v;
+			if ($format =~ /$LOOKUP_REGEX/o) {
+				@v = $self->fill_in_to_arr($rec,$format);
 			} else {
-				push @{$row->{'display'}}, @v;
-				push @{$row->{'swish'}}, @v;
+				@v = $self->parse_to_arr($rec,$format);
+			}
+			next if (! @v);
+
+			if ($tag->{'sort'}) {
+				@v = $self->sort_arr(@v);
+				$log->warn("sort within tag is usually not what you want!");
+			}
+
+			# use format?
+			if ($tag->{'format_name'}) {
+				@v = map { $self->apply_format($tag->{'format_name'},$tag->{'format_delimiter'},$_) } @v;
 			}
+
+			if ($field eq 'filename') {
+				$self->{'current_filename'} = join('',@v);
+				$log->debug("filename: ",$self->{'current_filename'});
+			} elsif ($field eq 'headline') {
+				$self->{'headline'} .= join('',@v);
+				$log->debug("headline: ",$self->{'headline'});
+				next; # don't return headline in data_structure!
+			}
+
+			# delimiter will join repeatable fields
+			if ($tag->{'delimiter'}) {
+				@v = ( join($tag->{'delimiter'}, @v) );
+			}
+
+			# default types 
+			my @types = qw(display swish);
+			# override by type attribute
+			@types = ( $tag->{'type'} ) if ($tag->{'type'});
+
+			foreach my $type (@types) {
+				# append to previous line?
+				$log->debug("type: $type ",sub { join(" ",@v) }, $row->{'append'} || 'no append');
+				if ($tag->{'append'}) {
+
+					# I will delimit appended part with
+					# delimiter (or ,)
+					my $d = $tag->{'delimiter'};
+					# default delimiter
+					$d ||= ", ";
+
+					my $last = pop @{$row->{$type}};
+					$d = "" if (! $last);
+					$last .= $d . join($d, @v);
+					push @{$row->{$type}}, $last;
+
+				} else {
+					push @{$row->{$type}}, @v;
+				}
+			}
+
+
 		}
 
 		if ($row) {
 			$row->{'tag'} = $field;
+
+			# TODO: name_sigular, name_plural
+			my $name = $self->{'import_xml'}->{'indexer'}->{$field}->{'name'};
+			$row->{'name'} = $name ? $self->_x($name) : $field;
+
+			# post-sort all values in field
+			if ($self->{'import_xml'}->{'indexer'}->{$field}->{'sort'}) {
+				$log->warn("sort at field tag not implemented");
+			}
+
 			push @ds, $row;
+
+			$log->debug("row $field: ",sub { Dumper($row) });
 		}
 
 	}
 
-	print "data_structure => ",Dumper(\@ds);
+	return @ds;
+
+}
+
+=head2 output
+
+Create output from in-memory data structure using Template Toolkit template.
+
+my $text = $webpac->output( template => 'text.tt', data => @ds );
+
+=cut
+
+sub output {
+	my $self = shift;
+
+	my $args = {@_};
+
+	my $log = $self->_get_logger();
+
+	$log->logconfess("need template name") if (! $args->{'template'});
+	$log->logconfess("need data array") if (! $args->{'data'});
+
+	my $out;
+
+	$self->{'tt'}->process(
+		$args->{'template'},
+		$args,
+		\$out
+	) || confess $self->{'tt'}->error();
+
+	return $out;
+}
+
+=head2 output_file
+
+Create output from in-memory data structure using Template Toolkit template
+to a file.
+
+ $webpac->output_file(
+        file => 'out.txt',
+ 	template => 'text.tt',
+	data => @ds
+ );
+
+=cut
+
+sub output_file {
+	my $self = shift;
+
+	my $args = {@_};
+
+	my $log = $self->_get_logger();
+
+	my $file = $args->{'file'} || $log->logconfess("need file name");
+
+	$log->debug("creating file ",$file);
+
+	open(my $fh, ">", $file) || $log->logdie("can't open output file '$file': $!");
+	print $fh $self->output(
+		template => $args->{'template'},
+		data => $args->{'data'},
+	) || $log->logdie("print: $!");
+	close($fh) || $log->logdie("close: $!");
+}
+
+=head2 apply_format
+
+Apply format specified in tag with C<format_name="name"> and
+C<format_delimiter=";;">.
+
+ my $text = $webpac->apply_format($format_name,$format_delimiter,$data);
+
+Formats can contain C<lookup{...}> if you need them.
+
+=cut
+
+sub apply_format {
+	my $self = shift;
+
+	my ($name,$delimiter,$data) = @_;
+
+	my $log = $self->_get_logger();
+
+	if (! $self->{'import_xml'}->{'format'}->{$name}) {
+		$log->warn("<format name=\"$name\"> is not defined in ",$self->{'import_xml_file'});
+		return $data;
+	}
+
+	$log->warn("no delimiter for format $name") if (! $delimiter);
+
+	my $format = $self->_x($self->{'import_xml'}->{'format'}->{$name}->{'content'}) || $log->logdie("can't find format '$name'");
+
+	my @data = split(/\Q$delimiter\E/, $data);
+
+	my $out = sprintf($format, @data);
+	$log->debug("using format $name [$format] on $data to produce: $out");
+
+	if ($out =~ m/$LOOKUP_REGEX/o) {
+		return $self->lookup($out);
+	} else {
+		return $out;
+	}
 
 }
 
+
+#
+#
+#
+
+=head1 INTERNAL METHODS
+
+Here is a quick list of internal methods, mostly useful to turn debugging
+on them (see L<LOGGING> below for explanation).
+
+=cut
+
+=head2 _eval
+
+Internal function to eval code without C<strict 'subs'>.
+
+=cut
+
+sub _eval {
+	my $self = shift;
+
+	my $code = shift || return;
+
+	my $log = $self->_get_logger();
+
+	no strict 'subs';
+	my $ret = eval $code;
+	if ($@) {
+		$log->error("problem with eval code [$code]: $@");
+	}
+
+	$log->debug("eval: ",$code," [",$ret,"]");
+
+	return $ret || 0;
+}
+
+=head2 _sort_by_order
+
+Sort xml tags data structure accoding to C<order=""> attribute.
+
+=cut
+
+sub _sort_by_order {
+	my $self = shift;
+
+	my $va = $self->{'import_xml'}->{'indexer'}->{$a}->{'order'} ||
+		$self->{'import_xml'}->{'indexer'}->{$a};
+	my $vb = $self->{'import_xml'}->{'indexer'}->{$b}->{'order'} ||
+		$self->{'import_xml'}->{'indexer'}->{$b};
+
+	return $va <=> $vb;
+}
+
+=head2 _get_logger
+
+Get C<Log::Log4perl> object with a twist: domains are defined for each
+method
+
+ my $log = $webpac->_get_logger();
+
+=cut
+
+sub _get_logger {
+	my $self = shift;
+
+	my $name = (caller(1))[3] || caller;
+	return get_logger($name);
+}
+
+=head2 _x
+
+Convert string from UTF-8 to code page defined in C<import_xml>.
+
+ my $text = $webpac->_x('utf8 text');
+
+=cut
+
+sub _x {
+	my $self = shift;
+	my $utf8 = shift || return;
+
+	return $self->{'utf2cp'}->convert($utf8) ||
+		$self->_get_logger()->logwarn("can't convert '$utf8'");
+}
+
+#
+#
+#
+
+=head1 LOGGING
+
+Logging in WebPAC is performed by L<Log::Log4perl> with config file
+C<log.conf>.
+
+Methods defined above have different levels of logging, so
+it's descriptions will be useful to turn (mostry B<debug> logging) on
+or off to see why WabPAC isn't perforing as you expect it (it might even
+be a bug!).
+
+B<This is different from normal Log4perl behaviour>. To repeat, you can
+also use method names, and not only classes (which are just few)
+to filter logging.
+
+
+=head1 MEMORY USAGE
+
+C<low_mem> options is double-edged sword. If enabled, WebPAC
+will run on memory constraint machines (which doesn't have enough
+physical RAM to create memory structure for whole source database).
+
+If your machine has 512Mb or more of RAM and database is around 10000 records,
+memory shouldn't be an issue. If you don't have enough physical RAM, you
+might consider using virtual memory (if your operating system is handling it
+well, like on FreeBSD or Linux) instead of dropping to L<DBD::Deep> to handle
+parsed structure of ISIS database (this is what C<low_mem> option does).
+
+Hitting swap at end of reading source database is probably o.k. However,
+hitting swap before 90% will dramatically decrease performance and you will
+be better off with C<low_mem> and using rest of availble memory for
+operating system disk cache (Linux is particuallary good about this).
+However, every access to database record will require disk access, so
+generation phase will be slower 10-100 times.
+
+Parsed structures are essential - you just have option to trade RAM memory
+(which is fast) for disk space (which is slow). Be sure to have planty of
+disk space if you are using C<low_mem> and thus L<DBD::Deep>.
+
+However, when WebPAC is running on desktop machines (or laptops :-), it's
+highly undesireable for system to start swapping. Using C<low_mem> option can
+reduce WecPAC memory usage to around 64Mb for same database with lookup
+fields and sorted indexes which stay in RAM. Performance will suffer, but
+memory usage will really be minimal. It might be also more confortable to
+run WebPAC reniced on those machines.
+
+=cut
+
 1;