trunk2/lib/WebPAC.pm

package WebPAC;

use warnings;
use strict;

use Carp;
use Text::Iconv;
use Config::IniFiles;
use XML::Simple;
use Template;

use Data::Dumper;

=head1 NAME

WebPAC - base class for WebPAC

=head1 DESCRIPTION

This module implements methods used by WebPAC.

=head1 METHODS

=head2 new

This will create new instance of WebPAC using configuration specified by C<config_file>.

 my $webpac = new WebPAC(
        config_file => 'name.conf',
        [code_page => 'ISO-8859-2',]
 );

Default C<code_page> is C<ISO-8859-2>.

It will also read configuration files
C<global.conf> (used by indexer and Web font-end)
and configuration file specified by C<config_file>
which describes databases to be indexed.

=cut

# mapping between data type and tag which specify
# format in XML file
my %type2tag = (
        'isis' => 'isis',
#       'excel' => 'column',
#       'marc' => 'marc',
#       'feed' => 'feed'
);

sub new {
        my $class = shift;
        my $self = {@_};
        bless($self, $class);

        # fill in default values
        # output codepage
        $self->{'code_page'} = 'ISO-8859-2' if (! $self->{'code_page'});

        #
        # read global.conf
        #

        my $config = new Config::IniFiles( -file => 'global.conf' ) || croak "can't open 'global.conf'";

        # read global config parametars
        foreach my $var (qw(
                        dbi_dbd
                        dbi_dsn
                        dbi_user
                        dbi_passwd
                        show_progress
                        my_unac_filter
                        output_template
                )) {
                $self->{'global_config'}->{$var} = $config->val('global', $var);
        }

        #
        # read indexer config file
        #

        $self->{indexer_config_file} = new Config::IniFiles( -file => $self->{config_file} ) || croak "can't open '$self->{config_file}'";

        # create UTF-8 convertor for import_xml files
        $self->{'utf2cp'} = Text::Iconv->new('UTF-8' ,$self->{'code_page'});

        # create Template toolkit instance
        $self->{'tt'} = Template->new(
                INCLUDE_PATH => ($self->{'global_config_file'}->{'output_template'} || './output_template'),
#               FILTERS => {
#                       'foo' => \&foo_filter,
#               },
                EVAL_PERL => 1,
        );

        return $self;
}

=head2 open_isis

Open CDS/ISIS database using OpenIsis module and read all records to memory.

 $webpac->open_isis(
        filename => '/data/ISIS/ISIS',
        code_page => '852',
        limit_mfn => '500',
        lookup => [ ... ],
 );

By default, ISIS code page is assumed to be C<852>.

If optional parametar C<limit_mfn> is set, it will read just 500 records
from database in example above.

C<lookup> argument is an array of lookups to create. Each lookup must have C<key> and
C<val>. Optional parametar C<eval> is perl code to evaluate before storing
value in index.

 lookup => [
  { 'key' => 'd:v900', 'val' => 'v250^a' },
  { 'eval' => '"v901^a" eq "Podruèje"',
    'key' => 'pa:v561^4:v562^4:v461^1',
    'val' => 'v900' },
 ]

Returns number of last record read into memory (size of database, really).

=cut

sub open_isis {
        my $self = shift;
        my $arg = {@_};

        croak "need filename" if (! $arg->{'filename'});
        my $code_page = $arg->{'code_page'} || '852';

        use OpenIsis;

        #$self->{'isis_code_page'} = $code_page;

        # create Text::Iconv object
        my $cp = Text::Iconv->new($code_page,$self->{'code_page'});

        print STDERR "reading ISIS database '",$arg->{'filename'},"'\n" if ($self->{'debug'});

        my $isis_db = OpenIsis::open($arg->{'filename'});

        my $maxmfn = OpenIsis::maxRowid( $isis_db ) || 1;

        $maxmfn = $self->{limit_mfn} if ($self->{limit_mfn});

        print STDERR "processing $maxmfn records...\n" if ($self->{'debug'});

        # read database
        for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) {

                # read record
                my $row = OpenIsis::read( $isis_db, $mfn );
                foreach my $k (keys %{$row}) {
                        if ($k ne "mfn") {
                                foreach my $l (@{$row->{$k}}) {
                                        $l = $cp->convert($l);
                                        # has subfields?
                                        my $val;
                                        if ($l =~ m/\^/) {
                                                foreach my $t (split(/\^/,$l)) {
                                                        next if (! $t);
                                                        $val->{substr($t,0,1)} = substr($t,1);
                                                }
                                        } else {
                                                $val = $l;
                                        }

                                        push @{$self->{'data'}->{$mfn}->{$k}}, $val;
                                }
                        }

                }

                # create lookup
                my $rec = $self->{'data'}->{$mfn};
                $self->create_lookup($rec, @{$arg->{'lookup'}});

        }

        $self->{'current_mfn'} = 1;

        # store max mfn and return it.
        return $self->{'max_mfn'} = $maxmfn;
}

=head2 fetch_rec

Fetch next record from database. It will also display progress bar (once
it's implemented, that is).

 my $rec = $webpac->fetch_rec;

=cut

sub fetch_rec {
        my $self = shift;

        my $mfn = $self->{'current_mfn'}++ || confess "it seems that you didn't load database!";

        if ($mfn > $self->{'max_mfn'}) {
                $self->{'current_mfn'} = $self->{'max_mfn'};
                return;
        }

        return $self->{'data'}->{$mfn};
}

=head2 open_import_xml

Read file from C<import_xml/> directory and parse it.

 $webpac->open_import_xml(type => 'isis');

=cut

sub open_import_xml {
        my $self = shift;

        my $arg = {@_};
        confess "need type to load file from import_xml/" if (! $arg->{'type'});

        $self->{'type'} = $arg->{'type'};

        my $type_base = $arg->{'type'};
        $type_base =~ s/_.*$//g;

        $self->{'tag'} = $type2tag{$type_base};

        print STDERR "using type ",$self->{'type'}," tag ",$self->{'tag'},"\n" if ($self->{'debug'});

        my $f = "./import_xml/".$self->{'type'}.".xml";
        confess "import_xml file '$f' doesn't exist!" if (! -e "$f");

        print STDERR "reading '$f'\n" if ($self->{'debug'});

        $self->{'import_xml'} = XMLin($f,
                ForceArray => [ $self->{'tag'}, 'config', 'format' ],
                ForceContent => 1
        );

}

=head2 create_lookup

Create lookup from record using lookup definition.

 $self->create_lookup($rec, @lookups);

Called internally by C<open_*> methods.

=cut

sub create_lookup {
        my $self = shift;

        my $rec = shift || confess "need record to create lookup";
        confess("need HASH as first argument!") if ($rec !~ /HASH/o);

        foreach my $i (@_) {
                if ($i->{'eval'}) {
                        my $eval = $self->fill_in($rec,$i->{'eval'});
                        my $key = $self->fill_in($rec,$i->{'key'});
                        my @val = $self->fill_in($rec,$i->{'val'});
                        if ($key && @val && eval $eval) {
                                push @{$self->{'lookup'}->{$key}}, @val;
                        }
                } else {
                        my $key = $self->fill_in($rec,$i->{'key'});
                        my @val = $self->fill_in($rec,$i->{'val'});
                        if ($key && @val) {
                                push @{$self->{'lookup'}->{$key}}, @val;
                        }
                }
        }
}

=head2 get_data

Returns value from record.

 my $text = $self->get_data(\$rec,$f,$sf,$i,\$found);

Arguments are:
record reference C<$rec>,
field C<$f>,
optional subfiled C<$sf>,
index for repeatable values C<$i>.

Optinal variable C<$found> will be incremeted if there
is field.

Returns value or empty string.

=cut

sub get_data {
        my $self = shift;

        my ($rec,$f,$sf,$i,$found) = @_;

        if ($$rec->{$f}) {
                return '' if (! $$rec->{$f}->[$i]);
                if ($sf && $$rec->{$f}->[$i]->{$sf}) {
                        $$found++ if (defined($$found));
                        return $$rec->{$f}->[$i]->{$sf};
                } elsif ($$rec->{$f}->[$i]) {
                        $$found++ if (defined($$found));
                        # it still might have subfield, just
                        # not specified, so we'll dump all
                        if ($$rec->{$f}->[$i] =~ /HASH/o) {
                                my $out;
                                foreach my $k (keys %{$$rec->{$f}->[$i]}) {
                                        $out .= $$rec->{$f}->[$i]->{$k}." ";
                                }
                                return $out;
                        } else {
                                return $$rec->{$f}->[$i];
                        }
                }
        } else {
                return '';
        }
}

=head2 fill_in

Workhourse of all: takes record from in-memory structure of database and
strings with placeholders and returns string or array of with substituted
values from record.

 my $text = $webpac->fill_in($rec,'v250^a');

Optional argument is ordinal number for repeatable fields. By default,
it's assume to be first repeatable field (fields are perl array, so first
element is 0).
Following example will read second value from repeatable field.

 my $text = $webpac->fill_in($rec,'Title: v250^a',1);

This function B<does not> perform parsing of format to inteligenty skip
delimiters before fields which aren't used.

=cut

sub fill_in {
        my $self = shift;

        my $rec = shift || confess "need data record";
        my $format = shift || confess "need format to parse";
        # iteration (for repeatable fields)
        my $i = shift || 0;

        # FIXME remove for speedup?
        confess("need HASH as first argument!") if ($rec !~ /HASH/o);

        my $found = 0;

        my $eval_code;
        # remove eval{...} from beginning
        $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);

        # do actual replacement of placeholders
        $format =~ s/v(\d+)(?:\^(\w))*/$self->get_data(\$rec,$1,$2,$i,\$found)/ges;

        if ($found) {
                if ($eval_code) {
                        my $eval = $self->fill_in($rec,$eval_code,$i);
                        return if (! eval $eval);
                }
                # do we have lookups?
                if ($format =~ /\[[^\[\]]+\]/o) {
                        return $self->lookup($format);
                } else {
                        return $format;
                }
        } else {
                return;
        }
}

=head2 lookup

Perform lookups on format supplied to it.

 my $text = $self->lookup('[v900]');

Lookups can be nested (like C<[d:[a:[v900]]]>).

=cut

sub lookup {
        my $self = shift;

        my $tmp = shift || confess "need format";

        if ($tmp =~ /\[[^\[\]]+\]/o) {
                my @in = ( $tmp );
                my @out;
                while (my $f = shift @in) {
                        if ($f =~ /\[([^\[\]]+)\]/) {
                                my $k = $1;
                                if ($self->{'lookup'}->{$k}) {
                                        foreach my $nv (@{$self->{'lookup'}->{$k}}) {
                                                my $tmp2 = $f;
                                                $tmp2 =~ s/\[$k\]/$nv/g;
                                                push @in, $tmp2;
                                        }
                                } else {
                                        undef $f;
                                }
                        } elsif ($f) {
                                push @out, $f;
                        }
                }
                return @out;
        } else {
                return $tmp;
        }
}

=head2 parse

Perform smart parsing of string, skipping delimiters for fields which aren't
defined. It can also eval code in format starting with C<eval{...}> and
return output or nothing depending on eval code.

 my $text = $webpac->parse($rec,'eval{"v901^a" eq "Deskriptor"}descriptor: v250^a', $i);

=cut

sub parse {
        my $self = shift;

        my ($rec, $format_utf8, $i) = @_;

        return if (! $format_utf8);

        confess("need HASH as first argument!") if ($rec !~ /HASH/o);
        confess("need utf2cp Text::Iconv object!") if (! $self->{'utf2cp'});

        $i = 0 if (! $i);

        my $format = $self->{'utf2cp'}->convert($format_utf8) || confess("can't convert '$format_utf8' from UTF-8 to ",$self->{'code_page'});

        my @out;

        my $eval_code;
        # remove eval{...} from beginning
        $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);

        my $prefix;
        my $all_found=0;

        while ($format =~ s/^(.*?)v(\d+)(?:\^(\w))*//s) {

                my $del = $1 || '';
                $prefix ||= $del if ($all_found == 0);

                my $found = 0;
                my $tmp = $self->get_data(\$rec,$2,$3,$i,\$found);

                if ($found) {
                        push @out, $del;
                        push @out, $tmp;
                        $all_found += $found;
                }
        }

        return if (! $all_found);

        my $out = join('',@out) . $format;

        # add prefix if not there
        $out = $prefix . $out if ($out !~ m/^\Q$prefix\E/);

        if ($eval_code) {
                my $eval = $self->fill_in($rec,$eval_code,$i);
                return if (! eval $eval);
        }

        return $out;
}

=head2 parse_to_arr

Similar to C<parse>, but returns array of all repeatable fields

 my @arr = $webpac->parse_to_arr($rec,'v250^a');

=cut

sub parse_to_arr {
        my $self = shift;

        my ($rec, $format_utf8) = @_;

        confess("need HASH as first argument!") if ($rec !~ /HASH/o);
        return if (! $format_utf8);

        my $i = 0;
        my @arr;

        while (my $v = $self->parse($rec,$format_utf8,$i++)) {
                push @arr, $v;
        }

        return @arr;
}

=head2 data_structure

Create in-memory data structure which represents layout from C<import_xml>.
It is used later to produce output.

 my @ds = $webpac->data_structure($rec);

=cut

# private method _sort_by_order
# sort subrouting using order="" attribute
sub _sort_by_order {
        my $self = shift;

        my $va = $self->{'import_xml'}->{'indexer'}->{$a}->{'order'} ||
                $self->{'import_xml'}->{'indexer'}->{$a};
        my $vb = $self->{'import_xml'}->{'indexer'}->{$b}->{'order'} ||
                $self->{'import_xml'}->{'indexer'}->{$b};

        return $va <=> $vb;
}

sub data_structure {
        my $self = shift;

        my $rec = shift;
        confess("need HASH as first argument!") if ($rec !~ /HASH/o);

        my @sorted_tags;
        if ($self->{tags_by_order}) {
                @sorted_tags = @{$self->{tags_by_order}};
        } else {
                @sorted_tags = sort { $self->_sort_by_order } keys %{$self->{'import_xml'}->{'indexer'}};
                $self->{tags_by_order} = \@sorted_tags;
        }

        my @ds;

        foreach my $field (@sorted_tags) {

                my $row;

#print "field $field [",$self->{'tag'},"] = ",Dumper($self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}});

                foreach my $tag (@{$self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}}}) {
                        my @v = $self->parse_to_arr($rec,$tag->{'content'});

                        next if (! @v);

                        # does tag have type?
                        if ($tag->{'type'}) {
                                push @{$row->{$tag->{'type'}}}, @v;
                        } else {
                                push @{$row->{'display'}}, @v;
                                push @{$row->{'swish'}}, @v;
                        }
                }

                if ($row) {
                        $row->{'tag'} = $field;
                        push @ds, $row;
                }

        }

        return @ds;

}

=head2 output

Create output from in-memory data structure using Template Toolkit template.

my $text = $webpac->output( template => 'text.tt', data => @ds );

=cut

sub output {
        my $self = shift;

        my $args = {@_};

        confess("need template name") if (! $args->{'template'});
        confess("need data array") if (! $args->{'data'});

        my $out;

        $self->{'tt'}->process(
                $args->{'template'},
                $args,
                \$out
        ) || confess $self->{'tt'}->error();

        return $out;
}

1;
1	dpavlin	354	package WebPAC;
2	dpavlin	352
3	dpavlin	367	use warnings;
4			use strict;
5
6	dpavlin	352	use Carp;
7	dpavlin	353	use Text::Iconv;
8			use Config::IniFiles;
9	dpavlin	363	use XML::Simple;
10	dpavlin	370	use Template;
11	dpavlin	352
12	dpavlin	358	use Data::Dumper;
13
14	dpavlin	352	=head1 NAME
15
16	dpavlin	354	WebPAC - base class for WebPAC
17	dpavlin	352
18			=head1 DESCRIPTION
19
20	dpavlin	354	This module implements methods used by WebPAC.
21	dpavlin	352
22			=head1 METHODS
23
24			=head2 new
25
26	dpavlin	354	This will create new instance of WebPAC using configuration specified by C<config_file>.
27	dpavlin	352
28	dpavlin	354	my $webpac = new WebPAC(
29	dpavlin	352	config_file => 'name.conf',
30			[code_page => 'ISO-8859-2',]
31			);
32
33			Default C<code_page> is C<ISO-8859-2>.
34
35	dpavlin	353	It will also read configuration files
36			C<global.conf> (used by indexer and Web font-end)
37			and configuration file specified by C<config_file>
38			which describes databases to be indexed.
39
40	dpavlin	352	=cut
41
42	dpavlin	363	# mapping between data type and tag which specify
43			# format in XML file
44			my %type2tag = (
45			'isis' => 'isis',
46			# 'excel' => 'column',
47			# 'marc' => 'marc',
48			# 'feed' => 'feed'
49			);
50
51	dpavlin	352	sub new {
52			my $class = shift;
53			my $self = {@_};
54			bless($self, $class);
55
56			# fill in default values
57			# output codepage
58			$self->{'code_page'} = 'ISO-8859-2' if (! $self->{'code_page'});
59
60	dpavlin	353	#
61			# read global.conf
62			#
63	dpavlin	352
64	dpavlin	370	my $config = new Config::IniFiles( -file => 'global.conf' ) \|\| croak "can't open 'global.conf'";
65	dpavlin	352
66			# read global config parametars
67			foreach my $var (qw(
68			dbi_dbd
69			dbi_dsn
70			dbi_user
71			dbi_passwd
72			show_progress
73			my_unac_filter
74	dpavlin	370	output_template
75	dpavlin	352	)) {
76	dpavlin	370	$self->{'global_config'}->{$var} = $config->val('global', $var);
77	dpavlin	352	}
78
79	dpavlin	353	#
80			# read indexer config file
81			#
82	dpavlin	352
83			$self->{indexer_config_file} = new Config::IniFiles( -file => $self->{config_file} ) \|\| croak "can't open '$self->{config_file}'";
84
85	dpavlin	370	# create UTF-8 convertor for import_xml files
86	dpavlin	366	$self->{'utf2cp'} = Text::Iconv->new('UTF-8' ,$self->{'code_page'});
87	dpavlin	370
88			# create Template toolkit instance
89			$self->{'tt'} = Template->new(
90			INCLUDE_PATH => ($self->{'global_config_file'}->{'output_template'} \|\| './output_template'),
91			# FILTERS => {
92			# 'foo' => \&foo_filter,
93			# },
94			EVAL_PERL => 1,
95			);
96
97	dpavlin	352	return $self;
98			}
99
100			=head2 open_isis
101
102			Open CDS/ISIS database using OpenIsis module and read all records to memory.
103
104			$webpac->open_isis(
105			filename => '/data/ISIS/ISIS',
106			code_page => '852',
107			limit_mfn => '500',
108			lookup => [ ... ],
109			);
110
111			By default, ISIS code page is assumed to be C<852>.
112
113	dpavlin	353	If optional parametar C<limit_mfn> is set, it will read just 500 records
114			from database in example above.
115	dpavlin	352
116			C<lookup> argument is an array of lookups to create. Each lookup must have C<key> and
117			C<val>. Optional parametar C<eval> is perl code to evaluate before storing
118			value in index.
119
120			lookup => [
121			{ 'key' => 'd:v900', 'val' => 'v250^a' },
122			{ 'eval' => '"v901^a" eq "Podruèje"',
123			'key' => 'pa:v561^4:v562^4:v461^1',
124			'val' => 'v900' },
125			]
126
127	dpavlin	367	Returns number of last record read into memory (size of database, really).
128
129	dpavlin	352	=cut
130
131			sub open_isis {
132			my $self = shift;
133			my $arg = {@_};
134
135			croak "need filename" if (! $arg->{'filename'});
136			my $code_page = $arg->{'code_page'} \|\| '852';
137
138	dpavlin	353	use OpenIsis;
139
140	dpavlin	352	#$self->{'isis_code_page'} = $code_page;
141
142			# create Text::Iconv object
143			my $cp = Text::Iconv->new($code_page,$self->{'code_page'});
144
145	dpavlin	363	print STDERR "reading ISIS database '",$arg->{'filename'},"'\n" if ($self->{'debug'});
146
147	dpavlin	352	my $isis_db = OpenIsis::open($arg->{'filename'});
148
149			my $maxmfn = OpenIsis::maxRowid( $isis_db ) \|\| 1;
150
151	dpavlin	357	$maxmfn = $self->{limit_mfn} if ($self->{limit_mfn});
152
153	dpavlin	363	print STDERR "processing $maxmfn records...\n" if ($self->{'debug'});
154
155	dpavlin	352	# read database
156			for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) {
157
158			# read record
159			my $row = OpenIsis::read( $isis_db, $mfn );
160			foreach my $k (keys %{$row}) {
161			if ($k ne "mfn") {
162			foreach my $l (@{$row->{$k}}) {
163			$l = $cp->convert($l);
164			# has subfields?
165			my $val;
166			if ($l =~ m/\^/) {
167			foreach my $t (split(/\^/,$l)) {
168			next if (! $t);
169			$val->{substr($t,0,1)} = substr($t,1);
170			}
171			} else {
172			$val = $l;
173			}
174
175			push @{$self->{'data'}->{$mfn}->{$k}}, $val;
176			}
177			}
178
179			}
180
181			# create lookup
182	dpavlin	355	my $rec = $self->{'data'}->{$mfn};
183			$self->create_lookup($rec, @{$arg->{'lookup'}});
184	dpavlin	352
185			}
186
187	dpavlin	362	$self->{'current_mfn'} = 1;
188
189	dpavlin	352	# store max mfn and return it.
190			return $self->{'max_mfn'} = $maxmfn;
191			}
192
193	dpavlin	362	=head2 fetch_rec
194
195			Fetch next record from database. It will also display progress bar (once
196			it's implemented, that is).
197
198			my $rec = $webpac->fetch_rec;
199
200			=cut
201
202			sub fetch_rec {
203			my $self = shift;
204
205			my $mfn = $self->{'current_mfn'}++ \|\| confess "it seems that you didn't load database!";
206
207			if ($mfn > $self->{'max_mfn'}) {
208			$self->{'current_mfn'} = $self->{'max_mfn'};
209			return;
210			}
211
212			return $self->{'data'}->{$mfn};
213			}
214
215	dpavlin	363	=head2 open_import_xml
216
217			Read file from C<import_xml/> directory and parse it.
218
219			$webpac->open_import_xml(type => 'isis');
220
221			=cut
222
223			sub open_import_xml {
224			my $self = shift;
225
226			my $arg = {@_};
227			confess "need type to load file from import_xml/" if (! $arg->{'type'});
228
229	dpavlin	366	$self->{'type'} = $arg->{'type'};
230	dpavlin	363
231	dpavlin	366	my $type_base = $arg->{'type'};
232	dpavlin	363	$type_base =~ s/_.*$//g;
233
234	dpavlin	366	$self->{'tag'} = $type2tag{$type_base};
235
236			print STDERR "using type ",$self->{'type'}," tag ",$self->{'tag'},"\n" if ($self->{'debug'});
237
238			my $f = "./import_xml/".$self->{'type'}.".xml";
239	dpavlin	363	confess "import_xml file '$f' doesn't exist!" if (! -e "$f");
240
241			print STDERR "reading '$f'\n" if ($self->{'debug'});
242
243			$self->{'import_xml'} = XMLin($f,
244	dpavlin	366	ForceArray => [ $self->{'tag'}, 'config', 'format' ],
245	dpavlin	363	ForceContent => 1
246			);
247
248			}
249
250	dpavlin	355	=head2 create_lookup
251
252			Create lookup from record using lookup definition.
253
254	dpavlin	367	$self->create_lookup($rec, @lookups);
255
256			Called internally by C<open_*> methods.
257
258	dpavlin	355	=cut
259
260			sub create_lookup {
261			my $self = shift;
262
263			my $rec = shift \|\| confess "need record to create lookup";
264			confess("need HASH as first argument!") if ($rec !~ /HASH/o);
265
266			foreach my $i (@_) {
267			if ($i->{'eval'}) {
268			my $eval = $self->fill_in($rec,$i->{'eval'});
269			my $key = $self->fill_in($rec,$i->{'key'});
270			my @val = $self->fill_in($rec,$i->{'val'});
271			if ($key && @val && eval $eval) {
272			push @{$self->{'lookup'}->{$key}}, @val;
273			}
274			} else {
275			my $key = $self->fill_in($rec,$i->{'key'});
276			my @val = $self->fill_in($rec,$i->{'val'});
277			if ($key && @val) {
278			push @{$self->{'lookup'}->{$key}}, @val;
279			}
280			}
281			}
282			}
283
284	dpavlin	356	=head2 get_data
285
286			Returns value from record.
287
288	dpavlin	367	my $text = $self->get_data(\$rec,$f,$sf,$i,\$found);
289	dpavlin	356
290			Arguments are:
291			record reference C<$rec>,
292			field C<$f>,
293			optional subfiled C<$sf>,
294			index for repeatable values C<$i>.
295
296	dpavlin	367	Optinal variable C<$found> will be incremeted if there
297	dpavlin	356	is field.
298
299			Returns value or empty string.
300
301			=cut
302
303			sub get_data {
304			my $self = shift;
305
306			my ($rec,$f,$sf,$i,$found) = @_;
307	dpavlin	367
308	dpavlin	356	if ($$rec->{$f}) {
309	dpavlin	367	return '' if (! $$rec->{$f}->[$i]);
310	dpavlin	356	if ($sf && $$rec->{$f}->[$i]->{$sf}) {
311			$$found++ if (defined($$found));
312			return $$rec->{$f}->[$i]->{$sf};
313			} elsif ($$rec->{$f}->[$i]) {
314			$$found++ if (defined($$found));
315	dpavlin	366	# it still might have subfield, just
316			# not specified, so we'll dump all
317			if ($$rec->{$f}->[$i] =~ /HASH/o) {
318			my $out;
319			foreach my $k (keys %{$$rec->{$f}->[$i]}) {
320			$out .= $$rec->{$f}->[$i]->{$k}." ";
321			}
322			return $out;
323			} else {
324			return $$rec->{$f}->[$i];
325			}
326	dpavlin	356	}
327			} else {
328			return '';
329			}
330			}
331
332	dpavlin	352	=head2 fill_in
333
334			Workhourse of all: takes record from in-memory structure of database and
335			strings with placeholders and returns string or array of with substituted
336			values from record.
337
338	dpavlin	367	my $text = $webpac->fill_in($rec,'v250^a');
339	dpavlin	352
340			Optional argument is ordinal number for repeatable fields. By default,
341	dpavlin	353	it's assume to be first repeatable field (fields are perl array, so first
342			element is 0).
343			Following example will read second value from repeatable field.
344	dpavlin	352
345	dpavlin	367	my $text = $webpac->fill_in($rec,'Title: v250^a',1);
346	dpavlin	353
347			This function B<does not> perform parsing of format to inteligenty skip
348			delimiters before fields which aren't used.
349
350	dpavlin	352	=cut
351
352			sub fill_in {
353			my $self = shift;
354
355			my $rec = shift \|\| confess "need data record";
356			my $format = shift \|\| confess "need format to parse";
357			# iteration (for repeatable fields)
358			my $i = shift \|\| 0;
359
360			# FIXME remove for speedup?
361	dpavlin	355	confess("need HASH as first argument!") if ($rec !~ /HASH/o);
362	dpavlin	352
363			my $found = 0;
364
365	dpavlin	359	my $eval_code;
366			# remove eval{...} from beginning
367			$eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);
368
369	dpavlin	352	# do actual replacement of placeholders
370	dpavlin	356	$format =~ s/v(\d+)(?:\^(\w))*/$self->get_data(\$rec,$1,$2,$i,\$found)/ges;
371	dpavlin	352
372	dpavlin	353	if ($found) {
373	dpavlin	359	if ($eval_code) {
374			my $eval = $self->fill_in($rec,$eval_code,$i);
375			return if (! eval $eval);
376			}
377	dpavlin	353	# do we have lookups?
378			if ($format =~ /\[[^\[\]]+\]/o) {
379			return $self->lookup($format);
380			} else {
381			return $format;
382			}
383	dpavlin	352	} else {
384			return;
385			}
386			}
387
388			=head2 lookup
389
390	dpavlin	355	Perform lookups on format supplied to it.
391	dpavlin	352
392	dpavlin	367	my $text = $self->lookup('[v900]');
393	dpavlin	352
394	dpavlin	355	Lookups can be nested (like C<[d:[a:[v900]]]>).
395
396	dpavlin	352	=cut
397
398			sub lookup {
399			my $self = shift;
400
401			my $tmp = shift \|\| confess "need format";
402
403	dpavlin	353	if ($tmp =~ /\[[^\[\]]+\]/o) {
404	dpavlin	352	my @in = ( $tmp );
405			my @out;
406			while (my $f = shift @in) {
407			if ($f =~ /\[([^\[\]]+)\]/) {
408			my $k = $1;
409			if ($self->{'lookup'}->{$k}) {
410			foreach my $nv (@{$self->{'lookup'}->{$k}}) {
411			my $tmp2 = $f;
412			$tmp2 =~ s/\[$k\]/$nv/g;
413			push @in, $tmp2;
414			}
415			} else {
416			undef $f;
417			}
418			} elsif ($f) {
419			push @out, $f;
420			}
421			}
422			return @out;
423			} else {
424			return $tmp;
425			}
426			}
427
428	dpavlin	356	=head2 parse
429
430			Perform smart parsing of string, skipping delimiters for fields which aren't
431			defined. It can also eval code in format starting with C<eval{...}> and
432			return output or nothing depending on eval code.
433
434	dpavlin	367	my $text = $webpac->parse($rec,'eval{"v901^a" eq "Deskriptor"}descriptor: v250^a', $i);
435	dpavlin	356
436			=cut
437
438			sub parse {
439			my $self = shift;
440
441	dpavlin	366	my ($rec, $format_utf8, $i) = @_;
442	dpavlin	356
443	dpavlin	366	return if (! $format_utf8);
444
445	dpavlin	358	confess("need HASH as first argument!") if ($rec !~ /HASH/o);
446	dpavlin	366	confess("need utf2cp Text::Iconv object!") if (! $self->{'utf2cp'});
447	dpavlin	358
448			$i = 0 if (! $i);
449
450	dpavlin	366	my $format = $self->{'utf2cp'}->convert($format_utf8) \|\| confess("can't convert '$format_utf8' from UTF-8 to ",$self->{'code_page'});
451
452	dpavlin	356	my @out;
453
454			my $eval_code;
455			# remove eval{...} from beginning
456			$eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);
457
458	dpavlin	358	my $prefix;
459			my $all_found=0;
460	dpavlin	356
461	dpavlin	358	while ($format =~ s/^(.?)v(\d+)(?:\^(\w))//s) {
462	dpavlin	356
463	dpavlin	358	my $del = $1 \|\| '';
464	dpavlin	359	$prefix \|\|= $del if ($all_found == 0);
465	dpavlin	358
466			my $found = 0;
467			my $tmp = $self->get_data(\$rec,$2,$3,$i,\$found);
468
469	dpavlin	356	if ($found) {
470	dpavlin	358	push @out, $del;
471			push @out, $tmp;
472			$all_found += $found;
473	dpavlin	356	}
474			}
475
476	dpavlin	358	return if (! $all_found);
477	dpavlin	356
478	dpavlin	358	my $out = join('',@out) . $format;
479
480			# add prefix if not there
481			$out = $prefix . $out if ($out !~ m/^\Q$prefix\E/);
482	dpavlin	367
483	dpavlin	359	if ($eval_code) {
484			my $eval = $self->fill_in($rec,$eval_code,$i);
485			return if (! eval $eval);
486			}
487
488	dpavlin	358	return $out;
489	dpavlin	356	}
490
491	dpavlin	367	=head2 parse_to_arr
492
493			Similar to C<parse>, but returns array of all repeatable fields
494
495			my @arr = $webpac->parse_to_arr($rec,'v250^a');
496
497			=cut
498
499			sub parse_to_arr {
500			my $self = shift;
501
502			my ($rec, $format_utf8) = @_;
503
504			confess("need HASH as first argument!") if ($rec !~ /HASH/o);
505			return if (! $format_utf8);
506
507			my $i = 0;
508			my @arr;
509
510			while (my $v = $self->parse($rec,$format_utf8,$i++)) {
511			push @arr, $v;
512			}
513
514			return @arr;
515			}
516
517	dpavlin	366	=head2 data_structure
518
519			Create in-memory data structure which represents layout from C<import_xml>.
520			It is used later to produce output.
521
522	dpavlin	368	my @ds = $webpac->data_structure($rec);
523	dpavlin	366
524			=cut
525
526			# private method _sort_by_order
527			# sort subrouting using order="" attribute
528			sub _sort_by_order {
529			my $self = shift;
530
531			my $va = $self->{'import_xml'}->{'indexer'}->{$a}->{'order'} \|\|
532			$self->{'import_xml'}->{'indexer'}->{$a};
533			my $vb = $self->{'import_xml'}->{'indexer'}->{$b}->{'order'} \|\|
534			$self->{'import_xml'}->{'indexer'}->{$b};
535
536			return $va <=> $vb;
537			}
538
539			sub data_structure {
540			my $self = shift;
541
542			my $rec = shift;
543			confess("need HASH as first argument!") if ($rec !~ /HASH/o);
544
545			my @sorted_tags;
546			if ($self->{tags_by_order}) {
547			@sorted_tags = @{$self->{tags_by_order}};
548			} else {
549			@sorted_tags = sort { $self->_sort_by_order } keys %{$self->{'import_xml'}->{'indexer'}};
550			$self->{tags_by_order} = \@sorted_tags;
551			}
552
553	dpavlin	368	my @ds;
554	dpavlin	366
555			foreach my $field (@sorted_tags) {
556
557			my $row;
558
559			#print "field $field [",$self->{'tag'},"] = ",Dumper($self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}});
560
561			foreach my $tag (@{$self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}}}) {
562	dpavlin	367	my @v = $self->parse_to_arr($rec,$tag->{'content'});
563	dpavlin	366
564	dpavlin	367	next if (! @v);
565	dpavlin	366
566			# does tag have type?
567			if ($tag->{'type'}) {
568	dpavlin	367	push @{$row->{$tag->{'type'}}}, @v;
569	dpavlin	366	} else {
570	dpavlin	367	push @{$row->{'display'}}, @v;
571			push @{$row->{'swish'}}, @v;
572	dpavlin	366	}
573			}
574
575	dpavlin	368	if ($row) {
576			$row->{'tag'} = $field;
577			push @ds, $row;
578			}
579	dpavlin	366
580			}
581
582	dpavlin	370	return @ds;
583	dpavlin	366
584			}
585
586	dpavlin	370	=head2 output
587
588			Create output from in-memory data structure using Template Toolkit template.
589
590			my $text = $webpac->output( template => 'text.tt', data => @ds );
591
592			=cut
593
594			sub output {
595			my $self = shift;
596
597			my $args = {@_};
598
599			confess("need template name") if (! $args->{'template'});
600			confess("need data array") if (! $args->{'data'});
601
602			my $out;
603
604			$self->{'tt'}->process(
605			$args->{'template'},
606			$args,
607			\$out
608			) \|\| confess $self->{'tt'}->error();
609
610			return $out;
611			}
612
613	dpavlin	352	1;