trunk2/lib/WebPAC.pm

package WebPAC;

use warnings;
use strict;

use Carp;
use Text::Iconv;
use Config::IniFiles;
use XML::Simple;

use Data::Dumper;

=head1 NAME

WebPAC - base class for WebPAC

=head1 DESCRIPTION

This module implements methods used by WebPAC.

=head1 METHODS

=head2 new

This will create new instance of WebPAC using configuration specified by C<config_file>.

 my $webpac = new WebPAC(
        config_file => 'name.conf',
        [code_page => 'ISO-8859-2',]
 );

Default C<code_page> is C<ISO-8859-2>.

It will also read configuration files
C<global.conf> (used by indexer and Web font-end)
and configuration file specified by C<config_file>
which describes databases to be indexed.

=cut

# mapping between data type and tag which specify
# format in XML file
my %type2tag = (
        'isis' => 'isis',
#       'excel' => 'column',
#       'marc' => 'marc',
#       'feed' => 'feed'
);

sub new {
        my $class = shift;
        my $self = {@_};
        bless($self, $class);

        # fill in default values
        # output codepage
        $self->{'code_page'} = 'ISO-8859-2' if (! $self->{'code_page'});

        #
        # read global.conf
        #

        $self->{global_config_file} = new Config::IniFiles( -file => 'global.conf' ) || croak "can't open 'global.conf'";

        # read global config parametars
        foreach my $var (qw(
                        dbi_dbd
                        dbi_dsn
                        dbi_user
                        dbi_passwd
                        show_progress
                        my_unac_filter
                )) {
                $self->{global_config}->{$var} = $self->{global_config_file}->val('global', $var);
        }

        #
        # read indexer config file
        #

        $self->{indexer_config_file} = new Config::IniFiles( -file => $self->{config_file} ) || croak "can't open '$self->{config_file}'";

        $self->{'utf2cp'} = Text::Iconv->new('UTF-8' ,$self->{'code_page'});
        return $self;
}

=head2 open_isis

Open CDS/ISIS database using OpenIsis module and read all records to memory.

 $webpac->open_isis(
        filename => '/data/ISIS/ISIS',
        code_page => '852',
        limit_mfn => '500',
        lookup => [ ... ],
 );

By default, ISIS code page is assumed to be C<852>.

If optional parametar C<limit_mfn> is set, it will read just 500 records
from database in example above.

C<lookup> argument is an array of lookups to create. Each lookup must have C<key> and
C<val>. Optional parametar C<eval> is perl code to evaluate before storing
value in index.

 lookup => [
  { 'key' => 'd:v900', 'val' => 'v250^a' },
  { 'eval' => '"v901^a" eq "Podruèje"',
    'key' => 'pa:v561^4:v562^4:v461^1',
    'val' => 'v900' },
 ]

Returns number of last record read into memory (size of database, really).

=cut

sub open_isis {
        my $self = shift;
        my $arg = {@_};

        croak "need filename" if (! $arg->{'filename'});
        my $code_page = $arg->{'code_page'} || '852';

        use OpenIsis;

        #$self->{'isis_code_page'} = $code_page;

        # create Text::Iconv object
        my $cp = Text::Iconv->new($code_page,$self->{'code_page'});

        print STDERR "reading ISIS database '",$arg->{'filename'},"'\n" if ($self->{'debug'});

        my $isis_db = OpenIsis::open($arg->{'filename'});

        my $maxmfn = OpenIsis::maxRowid( $isis_db ) || 1;

        $maxmfn = $self->{limit_mfn} if ($self->{limit_mfn});

        print STDERR "processing $maxmfn records...\n" if ($self->{'debug'});

        # read database
        for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) {

                # read record
                my $row = OpenIsis::read( $isis_db, $mfn );
                foreach my $k (keys %{$row}) {
                        if ($k ne "mfn") {
                                foreach my $l (@{$row->{$k}}) {
                                        $l = $cp->convert($l);
                                        # has subfields?
                                        my $val;
                                        if ($l =~ m/\^/) {
                                                foreach my $t (split(/\^/,$l)) {
                                                        next if (! $t);
                                                        $val->{substr($t,0,1)} = substr($t,1);
                                                }
                                        } else {
                                                $val = $l;
                                        }

                                        push @{$self->{'data'}->{$mfn}->{$k}}, $val;
                                }
                        }

                }

                # create lookup
                my $rec = $self->{'data'}->{$mfn};
                $self->create_lookup($rec, @{$arg->{'lookup'}});

        }

        $self->{'current_mfn'} = 1;

        # store max mfn and return it.
        return $self->{'max_mfn'} = $maxmfn;
}

=head2 fetch_rec

Fetch next record from database. It will also display progress bar (once
it's implemented, that is).

 my $rec = $webpac->fetch_rec;

=cut

sub fetch_rec {
        my $self = shift;

        my $mfn = $self->{'current_mfn'}++ || confess "it seems that you didn't load database!";

        if ($mfn > $self->{'max_mfn'}) {
                $self->{'current_mfn'} = $self->{'max_mfn'};
                return;
        }

        return $self->{'data'}->{$mfn};
}

=head2 open_import_xml

Read file from C<import_xml/> directory and parse it.

 $webpac->open_import_xml(type => 'isis');

=cut

sub open_import_xml {
        my $self = shift;

        my $arg = {@_};
        confess "need type to load file from import_xml/" if (! $arg->{'type'});

        $self->{'type'} = $arg->{'type'};

        my $type_base = $arg->{'type'};
        $type_base =~ s/_.*$//g;

        $self->{'tag'} = $type2tag{$type_base};

        print STDERR "using type ",$self->{'type'}," tag ",$self->{'tag'},"\n" if ($self->{'debug'});

        my $f = "./import_xml/".$self->{'type'}.".xml";
        confess "import_xml file '$f' doesn't exist!" if (! -e "$f");

        print STDERR "reading '$f'\n" if ($self->{'debug'});

        $self->{'import_xml'} = XMLin($f,
                ForceArray => [ $self->{'tag'}, 'config', 'format' ],
                ForceContent => 1
        );

        print Dumper($self->{'import_xml'});

}

=head2 create_lookup

Create lookup from record using lookup definition.

 $self->create_lookup($rec, @lookups);

Called internally by C<open_*> methods.

=cut

sub create_lookup {
        my $self = shift;

        my $rec = shift || confess "need record to create lookup";
        confess("need HASH as first argument!") if ($rec !~ /HASH/o);

        foreach my $i (@_) {
                if ($i->{'eval'}) {
                        my $eval = $self->fill_in($rec,$i->{'eval'});
                        my $key = $self->fill_in($rec,$i->{'key'});
                        my @val = $self->fill_in($rec,$i->{'val'});
                        if ($key && @val && eval $eval) {
                                push @{$self->{'lookup'}->{$key}}, @val;
                        }
                } else {
                        my $key = $self->fill_in($rec,$i->{'key'});
                        my @val = $self->fill_in($rec,$i->{'val'});
                        if ($key && @val) {
                                push @{$self->{'lookup'}->{$key}}, @val;
                        }
                }
        }
}

=head2 get_data

Returns value from record.

 my $text = $self->get_data(\$rec,$f,$sf,$i,\$found);

Arguments are:
record reference C<$rec>,
field C<$f>,
optional subfiled C<$sf>,
index for repeatable values C<$i>.

Optinal variable C<$found> will be incremeted if there
is field.

Returns value or empty string.

=cut

sub get_data {
        my $self = shift;

        my ($rec,$f,$sf,$i,$found) = @_;

        if ($$rec->{$f}) {
                return '' if (! $$rec->{$f}->[$i]);
                if ($sf && $$rec->{$f}->[$i]->{$sf}) {
                        $$found++ if (defined($$found));
                        return $$rec->{$f}->[$i]->{$sf};
                } elsif ($$rec->{$f}->[$i]) {
                        $$found++ if (defined($$found));
                        # it still might have subfield, just
                        # not specified, so we'll dump all
                        if ($$rec->{$f}->[$i] =~ /HASH/o) {
                                my $out;
                                foreach my $k (keys %{$$rec->{$f}->[$i]}) {
                                        $out .= $$rec->{$f}->[$i]->{$k}." ";
                                }
                                return $out;
                        } else {
                                return $$rec->{$f}->[$i];
                        }
                }
        } else {
                return '';
        }
}

=head2 fill_in

Workhourse of all: takes record from in-memory structure of database and
strings with placeholders and returns string or array of with substituted
values from record.

 my $text = $webpac->fill_in($rec,'v250^a');

Optional argument is ordinal number for repeatable fields. By default,
it's assume to be first repeatable field (fields are perl array, so first
element is 0).
Following example will read second value from repeatable field.

 my $text = $webpac->fill_in($rec,'Title: v250^a',1);

This function B<does not> perform parsing of format to inteligenty skip
delimiters before fields which aren't used.

=cut

sub fill_in {
        my $self = shift;

        my $rec = shift || confess "need data record";
        my $format = shift || confess "need format to parse";
        # iteration (for repeatable fields)
        my $i = shift || 0;

        # FIXME remove for speedup?
        confess("need HASH as first argument!") if ($rec !~ /HASH/o);

        my $found = 0;

        my $eval_code;
        # remove eval{...} from beginning
        $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);

        # do actual replacement of placeholders
        $format =~ s/v(\d+)(?:\^(\w))*/$self->get_data(\$rec,$1,$2,$i,\$found)/ges;

        if ($found) {
                if ($eval_code) {
                        my $eval = $self->fill_in($rec,$eval_code,$i);
                        return if (! eval $eval);
                }
                # do we have lookups?
                if ($format =~ /\[[^\[\]]+\]/o) {
                        return $self->lookup($format);
                } else {
                        return $format;
                }
        } else {
                return;
        }
}

=head2 lookup

Perform lookups on format supplied to it.

 my $text = $self->lookup('[v900]');

Lookups can be nested (like C<[d:[a:[v900]]]>).

=cut

sub lookup {
        my $self = shift;

        my $tmp = shift || confess "need format";

        if ($tmp =~ /\[[^\[\]]+\]/o) {
                my @in = ( $tmp );
                my @out;
                while (my $f = shift @in) {
                        if ($f =~ /\[([^\[\]]+)\]/) {
                                my $k = $1;
                                if ($self->{'lookup'}->{$k}) {
                                        foreach my $nv (@{$self->{'lookup'}->{$k}}) {
                                                my $tmp2 = $f;
                                                $tmp2 =~ s/\[$k\]/$nv/g;
                                                push @in, $tmp2;
                                        }
                                } else {
                                        undef $f;
                                }
                        } elsif ($f) {
                                push @out, $f;
                        }
                }
                return @out;
        } else {
                return $tmp;
        }
}

=head2 parse

Perform smart parsing of string, skipping delimiters for fields which aren't
defined. It can also eval code in format starting with C<eval{...}> and
return output or nothing depending on eval code.

 my $text = $webpac->parse($rec,'eval{"v901^a" eq "Deskriptor"}descriptor: v250^a', $i);

=cut

sub parse {
        my $self = shift;

        my ($rec, $format_utf8, $i) = @_;

        return if (! $format_utf8);

        confess("need HASH as first argument!") if ($rec !~ /HASH/o);
        confess("need utf2cp Text::Iconv object!") if (! $self->{'utf2cp'});

        $i = 0 if (! $i);

        my $format = $self->{'utf2cp'}->convert($format_utf8) || confess("can't convert '$format_utf8' from UTF-8 to ",$self->{'code_page'});

        my @out;

        my $eval_code;
        # remove eval{...} from beginning
        $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);

        my $prefix;
        my $all_found=0;

        while ($format =~ s/^(.*?)v(\d+)(?:\^(\w))*//s) {

                my $del = $1 || '';
                $prefix ||= $del if ($all_found == 0);

                my $found = 0;
                my $tmp = $self->get_data(\$rec,$2,$3,$i,\$found);

                if ($found) {
                        push @out, $del;
                        push @out, $tmp;
                        $all_found += $found;
                }
        }

        return if (! $all_found);

        my $out = join('',@out) . $format;

        # add prefix if not there
        $out = $prefix . $out if ($out !~ m/^\Q$prefix\E/);

        if ($eval_code) {
                my $eval = $self->fill_in($rec,$eval_code,$i);
                return if (! eval $eval);
        }

        return $out;
}

=head2 parse_to_arr

Similar to C<parse>, but returns array of all repeatable fields

 my @arr = $webpac->parse_to_arr($rec,'v250^a');

=cut

sub parse_to_arr {
        my $self = shift;

        my ($rec, $format_utf8) = @_;

        confess("need HASH as first argument!") if ($rec !~ /HASH/o);
        return if (! $format_utf8);

        my $i = 0;
        my @arr;

        while (my $v = $self->parse($rec,$format_utf8,$i++)) {
                push @arr, $v;
        }

        return @arr;
}

=head2 data_structure

Create in-memory data structure which represents layout from C<import_xml>.
It is used later to produce output.

 my $ds = $webpac->data_structure($rec);

=cut

# private method _sort_by_order
# sort subrouting using order="" attribute
sub _sort_by_order {
        my $self = shift;

        my $va = $self->{'import_xml'}->{'indexer'}->{$a}->{'order'} ||
                $self->{'import_xml'}->{'indexer'}->{$a};
        my $vb = $self->{'import_xml'}->{'indexer'}->{$b}->{'order'} ||
                $self->{'import_xml'}->{'indexer'}->{$b};

        return $va <=> $vb;
}

sub data_structure {
        my $self = shift;

        my $rec = shift;
        confess("need HASH as first argument!") if ($rec !~ /HASH/o);

        my @sorted_tags;
        if ($self->{tags_by_order}) {
                @sorted_tags = @{$self->{tags_by_order}};
        } else {
                @sorted_tags = sort { $self->_sort_by_order } keys %{$self->{'import_xml'}->{'indexer'}};
                $self->{tags_by_order} = \@sorted_tags;
        }

        my $ds;

        foreach my $field (@sorted_tags) {

                my $row;

#print "field $field [",$self->{'tag'},"] = ",Dumper($self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}});

                foreach my $tag (@{$self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}}}) {
                        my @v = $self->parse_to_arr($rec,$tag->{'content'});

                        next if (! @v);

                        # does tag have type?
                        if ($tag->{'type'}) {
                                push @{$row->{$tag->{'type'}}}, @v;
                        } else {
                                push @{$row->{'display'}}, @v;
                                push @{$row->{'swish'}}, @v;
                        }
                }

                push @{$ds->{$field}}, $row if ($row);

        }

        print "data_structure => ",Dumper($ds);

}

1;
1	package WebPAC;
2
3	use warnings;
4	use strict;
5
6	use Carp;
7	use Text::Iconv;
8	use Config::IniFiles;
9	use XML::Simple;
10
11	use Data::Dumper;
12
13	=head1 NAME
14
15	WebPAC - base class for WebPAC
16
17	=head1 DESCRIPTION
18
19	This module implements methods used by WebPAC.
20
21	=head1 METHODS
22
23	=head2 new
24
25	This will create new instance of WebPAC using configuration specified by C<config_file>.
26
27	my $webpac = new WebPAC(
28	config_file => 'name.conf',
29	[code_page => 'ISO-8859-2',]
30	);
31
32	Default C<code_page> is C<ISO-8859-2>.
33
34	It will also read configuration files
35	C<global.conf> (used by indexer and Web font-end)
36	and configuration file specified by C<config_file>
37	which describes databases to be indexed.
38
39	=cut
40
41	# mapping between data type and tag which specify
42	# format in XML file
43	my %type2tag = (
44	'isis' => 'isis',
45	# 'excel' => 'column',
46	# 'marc' => 'marc',
47	# 'feed' => 'feed'
48	);
49
50	sub new {
51	my $class = shift;
52	my $self = {@_};
53	bless($self, $class);
54
55	# fill in default values
56	# output codepage
57	$self->{'code_page'} = 'ISO-8859-2' if (! $self->{'code_page'});
58
59	#
60	# read global.conf
61	#
62
63	$self->{global_config_file} = new Config::IniFiles( -file => 'global.conf' ) \|\| croak "can't open 'global.conf'";
64
65	# read global config parametars
66	foreach my $var (qw(
67	dbi_dbd
68	dbi_dsn
69	dbi_user
70	dbi_passwd
71	show_progress
72	my_unac_filter
73	)) {
74	$self->{global_config}->{$var} = $self->{global_config_file}->val('global', $var);
75	}
76
77	#
78	# read indexer config file
79	#
80
81	$self->{indexer_config_file} = new Config::IniFiles( -file => $self->{config_file} ) \|\| croak "can't open '$self->{config_file}'";
82
83	$self->{'utf2cp'} = Text::Iconv->new('UTF-8' ,$self->{'code_page'});
84	return $self;
85	}
86
87	=head2 open_isis
88
89	Open CDS/ISIS database using OpenIsis module and read all records to memory.
90
91	$webpac->open_isis(
92	filename => '/data/ISIS/ISIS',
93	code_page => '852',
94	limit_mfn => '500',
95	lookup => [ ... ],
96	);
97
98	By default, ISIS code page is assumed to be C<852>.
99
100	If optional parametar C<limit_mfn> is set, it will read just 500 records
101	from database in example above.
102
103	C<lookup> argument is an array of lookups to create. Each lookup must have C<key> and
104	C<val>. Optional parametar C<eval> is perl code to evaluate before storing
105	value in index.
106
107	lookup => [
108	{ 'key' => 'd:v900', 'val' => 'v250^a' },
109	{ 'eval' => '"v901^a" eq "Podruèje"',
110	'key' => 'pa:v561^4:v562^4:v461^1',
111	'val' => 'v900' },
112	]
113
114	Returns number of last record read into memory (size of database, really).
115
116	=cut
117
118	sub open_isis {
119	my $self = shift;
120	my $arg = {@_};
121
122	croak "need filename" if (! $arg->{'filename'});
123	my $code_page = $arg->{'code_page'} \|\| '852';
124
125	use OpenIsis;
126
127	#$self->{'isis_code_page'} = $code_page;
128
129	# create Text::Iconv object
130	my $cp = Text::Iconv->new($code_page,$self->{'code_page'});
131
132	print STDERR "reading ISIS database '",$arg->{'filename'},"'\n" if ($self->{'debug'});
133
134	my $isis_db = OpenIsis::open($arg->{'filename'});
135
136	my $maxmfn = OpenIsis::maxRowid( $isis_db ) \|\| 1;
137
138	$maxmfn = $self->{limit_mfn} if ($self->{limit_mfn});
139
140	print STDERR "processing $maxmfn records...\n" if ($self->{'debug'});
141
142	# read database
143	for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) {
144
145	# read record
146	my $row = OpenIsis::read( $isis_db, $mfn );
147	foreach my $k (keys %{$row}) {
148	if ($k ne "mfn") {
149	foreach my $l (@{$row->{$k}}) {
150	$l = $cp->convert($l);
151	# has subfields?
152	my $val;
153	if ($l =~ m/\^/) {
154	foreach my $t (split(/\^/,$l)) {
155	next if (! $t);
156	$val->{substr($t,0,1)} = substr($t,1);
157	}
158	} else {
159	$val = $l;
160	}
161
162	push @{$self->{'data'}->{$mfn}->{$k}}, $val;
163	}
164	}
165
166	}
167
168	# create lookup
169	my $rec = $self->{'data'}->{$mfn};
170	$self->create_lookup($rec, @{$arg->{'lookup'}});
171
172	}
173
174	$self->{'current_mfn'} = 1;
175
176	# store max mfn and return it.
177	return $self->{'max_mfn'} = $maxmfn;
178	}
179
180	=head2 fetch_rec
181
182	Fetch next record from database. It will also display progress bar (once
183	it's implemented, that is).
184
185	my $rec = $webpac->fetch_rec;
186
187	=cut
188
189	sub fetch_rec {
190	my $self = shift;
191
192	my $mfn = $self->{'current_mfn'}++ \|\| confess "it seems that you didn't load database!";
193
194	if ($mfn > $self->{'max_mfn'}) {
195	$self->{'current_mfn'} = $self->{'max_mfn'};
196	return;
197	}
198
199	return $self->{'data'}->{$mfn};
200	}
201
202	=head2 open_import_xml
203
204	Read file from C<import_xml/> directory and parse it.
205
206	$webpac->open_import_xml(type => 'isis');
207
208	=cut
209
210	sub open_import_xml {
211	my $self = shift;
212
213	my $arg = {@_};
214	confess "need type to load file from import_xml/" if (! $arg->{'type'});
215
216	$self->{'type'} = $arg->{'type'};
217
218	my $type_base = $arg->{'type'};
219	$type_base =~ s/_.*$//g;
220
221	$self->{'tag'} = $type2tag{$type_base};
222
223	print STDERR "using type ",$self->{'type'}," tag ",$self->{'tag'},"\n" if ($self->{'debug'});
224
225	my $f = "./import_xml/".$self->{'type'}.".xml";
226	confess "import_xml file '$f' doesn't exist!" if (! -e "$f");
227
228	print STDERR "reading '$f'\n" if ($self->{'debug'});
229
230	$self->{'import_xml'} = XMLin($f,
231	ForceArray => [ $self->{'tag'}, 'config', 'format' ],
232	ForceContent => 1
233	);
234
235	print Dumper($self->{'import_xml'});
236
237	}
238
239	=head2 create_lookup
240
241	Create lookup from record using lookup definition.
242
243	$self->create_lookup($rec, @lookups);
244
245	Called internally by C<open_*> methods.
246
247	=cut
248
249	sub create_lookup {
250	my $self = shift;
251
252	my $rec = shift \|\| confess "need record to create lookup";
253	confess("need HASH as first argument!") if ($rec !~ /HASH/o);
254
255	foreach my $i (@_) {
256	if ($i->{'eval'}) {
257	my $eval = $self->fill_in($rec,$i->{'eval'});
258	my $key = $self->fill_in($rec,$i->{'key'});
259	my @val = $self->fill_in($rec,$i->{'val'});
260	if ($key && @val && eval $eval) {
261	push @{$self->{'lookup'}->{$key}}, @val;
262	}
263	} else {
264	my $key = $self->fill_in($rec,$i->{'key'});
265	my @val = $self->fill_in($rec,$i->{'val'});
266	if ($key && @val) {
267	push @{$self->{'lookup'}->{$key}}, @val;
268	}
269	}
270	}
271	}
272
273	=head2 get_data
274
275	Returns value from record.
276
277	my $text = $self->get_data(\$rec,$f,$sf,$i,\$found);
278
279	Arguments are:
280	record reference C<$rec>,
281	field C<$f>,
282	optional subfiled C<$sf>,
283	index for repeatable values C<$i>.
284
285	Optinal variable C<$found> will be incremeted if there
286	is field.
287
288	Returns value or empty string.
289
290	=cut
291
292	sub get_data {
293	my $self = shift;
294
295	my ($rec,$f,$sf,$i,$found) = @_;
296
297	if ($$rec->{$f}) {
298	return '' if (! $$rec->{$f}->[$i]);
299	if ($sf && $$rec->{$f}->[$i]->{$sf}) {
300	$$found++ if (defined($$found));
301	return $$rec->{$f}->[$i]->{$sf};
302	} elsif ($$rec->{$f}->[$i]) {
303	$$found++ if (defined($$found));
304	# it still might have subfield, just
305	# not specified, so we'll dump all
306	if ($$rec->{$f}->[$i] =~ /HASH/o) {
307	my $out;
308	foreach my $k (keys %{$$rec->{$f}->[$i]}) {
309	$out .= $$rec->{$f}->[$i]->{$k}." ";
310	}
311	return $out;
312	} else {
313	return $$rec->{$f}->[$i];
314	}
315	}
316	} else {
317	return '';
318	}
319	}
320
321	=head2 fill_in
322
323	Workhourse of all: takes record from in-memory structure of database and
324	strings with placeholders and returns string or array of with substituted
325	values from record.
326
327	my $text = $webpac->fill_in($rec,'v250^a');
328
329	Optional argument is ordinal number for repeatable fields. By default,
330	it's assume to be first repeatable field (fields are perl array, so first
331	element is 0).
332	Following example will read second value from repeatable field.
333
334	my $text = $webpac->fill_in($rec,'Title: v250^a',1);
335
336	This function B<does not> perform parsing of format to inteligenty skip
337	delimiters before fields which aren't used.
338
339	=cut
340
341	sub fill_in {
342	my $self = shift;
343
344	my $rec = shift \|\| confess "need data record";
345	my $format = shift \|\| confess "need format to parse";
346	# iteration (for repeatable fields)
347	my $i = shift \|\| 0;
348
349	# FIXME remove for speedup?
350	confess("need HASH as first argument!") if ($rec !~ /HASH/o);
351
352	my $found = 0;
353
354	my $eval_code;
355	# remove eval{...} from beginning
356	$eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);
357
358	# do actual replacement of placeholders
359	$format =~ s/v(\d+)(?:\^(\w))*/$self->get_data(\$rec,$1,$2,$i,\$found)/ges;
360
361	if ($found) {
362	if ($eval_code) {
363	my $eval = $self->fill_in($rec,$eval_code,$i);
364	return if (! eval $eval);
365	}
366	# do we have lookups?
367	if ($format =~ /\[[^\[\]]+\]/o) {
368	return $self->lookup($format);
369	} else {
370	return $format;
371	}
372	} else {
373	return;
374	}
375	}
376
377	=head2 lookup
378
379	Perform lookups on format supplied to it.
380
381	my $text = $self->lookup('[v900]');
382
383	Lookups can be nested (like C<[d:[a:[v900]]]>).
384
385	=cut
386
387	sub lookup {
388	my $self = shift;
389
390	my $tmp = shift \|\| confess "need format";
391
392	if ($tmp =~ /\[[^\[\]]+\]/o) {
393	my @in = ( $tmp );
394	my @out;
395	while (my $f = shift @in) {
396	if ($f =~ /\[([^\[\]]+)\]/) {
397	my $k = $1;
398	if ($self->{'lookup'}->{$k}) {
399	foreach my $nv (@{$self->{'lookup'}->{$k}}) {
400	my $tmp2 = $f;
401	$tmp2 =~ s/\[$k\]/$nv/g;
402	push @in, $tmp2;
403	}
404	} else {
405	undef $f;
406	}
407	} elsif ($f) {
408	push @out, $f;
409	}
410	}
411	return @out;
412	} else {
413	return $tmp;
414	}
415	}
416
417	=head2 parse
418
419	Perform smart parsing of string, skipping delimiters for fields which aren't
420	defined. It can also eval code in format starting with C<eval{...}> and
421	return output or nothing depending on eval code.
422
423	my $text = $webpac->parse($rec,'eval{"v901^a" eq "Deskriptor"}descriptor: v250^a', $i);
424
425	=cut
426
427	sub parse {
428	my $self = shift;
429
430	my ($rec, $format_utf8, $i) = @_;
431
432	return if (! $format_utf8);
433
434	confess("need HASH as first argument!") if ($rec !~ /HASH/o);
435	confess("need utf2cp Text::Iconv object!") if (! $self->{'utf2cp'});
436
437	$i = 0 if (! $i);
438
439	my $format = $self->{'utf2cp'}->convert($format_utf8) \|\| confess("can't convert '$format_utf8' from UTF-8 to ",$self->{'code_page'});
440
441	my @out;
442
443	my $eval_code;
444	# remove eval{...} from beginning
445	$eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);
446
447	my $prefix;
448	my $all_found=0;
449
450	while ($format =~ s/^(.?)v(\d+)(?:\^(\w))//s) {
451
452	my $del = $1 \|\| '';
453	$prefix \|\|= $del if ($all_found == 0);
454
455	my $found = 0;
456	my $tmp = $self->get_data(\$rec,$2,$3,$i,\$found);
457
458	if ($found) {
459	push @out, $del;
460	push @out, $tmp;
461	$all_found += $found;
462	}
463	}
464
465	return if (! $all_found);
466
467	my $out = join('',@out) . $format;
468
469	# add prefix if not there
470	$out = $prefix . $out if ($out !~ m/^\Q$prefix\E/);
471
472	if ($eval_code) {
473	my $eval = $self->fill_in($rec,$eval_code,$i);
474	return if (! eval $eval);
475	}
476
477	return $out;
478	}
479
480	=head2 parse_to_arr
481
482	Similar to C<parse>, but returns array of all repeatable fields
483
484	my @arr = $webpac->parse_to_arr($rec,'v250^a');
485
486	=cut
487
488	sub parse_to_arr {
489	my $self = shift;
490
491	my ($rec, $format_utf8) = @_;
492
493	confess("need HASH as first argument!") if ($rec !~ /HASH/o);
494	return if (! $format_utf8);
495
496	my $i = 0;
497	my @arr;
498
499	while (my $v = $self->parse($rec,$format_utf8,$i++)) {
500	push @arr, $v;
501	}
502
503	return @arr;
504	}
505
506	=head2 data_structure
507
508	Create in-memory data structure which represents layout from C<import_xml>.
509	It is used later to produce output.
510
511	my $ds = $webpac->data_structure($rec);
512
513	=cut
514
515	# private method _sort_by_order
516	# sort subrouting using order="" attribute
517	sub _sort_by_order {
518	my $self = shift;
519
520	my $va = $self->{'import_xml'}->{'indexer'}->{$a}->{'order'} \|\|
521	$self->{'import_xml'}->{'indexer'}->{$a};
522	my $vb = $self->{'import_xml'}->{'indexer'}->{$b}->{'order'} \|\|
523	$self->{'import_xml'}->{'indexer'}->{$b};
524
525	return $va <=> $vb;
526	}
527
528	sub data_structure {
529	my $self = shift;
530
531	my $rec = shift;
532	confess("need HASH as first argument!") if ($rec !~ /HASH/o);
533
534	my @sorted_tags;
535	if ($self->{tags_by_order}) {
536	@sorted_tags = @{$self->{tags_by_order}};
537	} else {
538	@sorted_tags = sort { $self->_sort_by_order } keys %{$self->{'import_xml'}->{'indexer'}};
539	$self->{tags_by_order} = \@sorted_tags;
540	}
541
542	my $ds;
543
544	foreach my $field (@sorted_tags) {
545
546	my $row;
547
548	#print "field $field [",$self->{'tag'},"] = ",Dumper($self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}});
549
550	foreach my $tag (@{$self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}}}) {
551	my @v = $self->parse_to_arr($rec,$tag->{'content'});
552
553	next if (! @v);
554
555	# does tag have type?
556	if ($tag->{'type'}) {
557	push @{$row->{$tag->{'type'}}}, @v;
558	} else {
559	push @{$row->{'display'}}, @v;
560	push @{$row->{'swish'}}, @v;
561	}
562	}
563
564	push @{$ds->{$field}}, $row if ($row);
565
566	}
567
568	print "data_structure => ",Dumper($ds);
569
570	}
571
572	1;