trunk2/lib/WebPAC.pm

package WebPAC;

use Carp;
use Text::Iconv;
use Config::IniFiles;
use XML::Simple;

use Data::Dumper;

=head1 NAME

WebPAC - base class for WebPAC

=head1 DESCRIPTION

This module implements methods used by WebPAC.

=head1 METHODS

=head2 new

This will create new instance of WebPAC using configuration specified by C<config_file>.

 my $webpac = new WebPAC(
        config_file => 'name.conf',
        [code_page => 'ISO-8859-2',]
 );

Default C<code_page> is C<ISO-8859-2>.

It will also read configuration files
C<global.conf> (used by indexer and Web font-end)
and configuration file specified by C<config_file>
which describes databases to be indexed.

=cut

# mapping between data type and tag which specify
# format in XML file
my %type2tag = (
        'isis' => 'isis',
#       'excel' => 'column',
#       'marc' => 'marc',
#       'feed' => 'feed'
);

sub new {
        my $class = shift;
        my $self = {@_};
        bless($self, $class);

        # fill in default values
        # output codepage
        $self->{'code_page'} = 'ISO-8859-2' if (! $self->{'code_page'});

        #
        # read global.conf
        #

        $self->{global_config_file} = new Config::IniFiles( -file => 'global.conf' ) || croak "can't open 'global.conf'";

        # read global config parametars
        foreach my $var (qw(
                        dbi_dbd
                        dbi_dsn
                        dbi_user
                        dbi_passwd
                        show_progress
                        my_unac_filter
                )) {
                $self->{global_config}->{$var} = $self->{global_config_file}->val('global', $var);
        }

        #
        # read indexer config file
        #

        $self->{indexer_config_file} = new Config::IniFiles( -file => $self->{config_file} ) || croak "can't open '$self->{config_file}'";

        $self->{'utf2cp'} = Text::Iconv->new('UTF-8' ,$self->{'code_page'});
        return $self;
}

=head2 open_isis

Open CDS/ISIS database using OpenIsis module and read all records to memory.

 $webpac->open_isis(
        filename => '/data/ISIS/ISIS',
        code_page => '852',
        limit_mfn => '500',
        lookup => [ ... ],
 );

By default, ISIS code page is assumed to be C<852>.

If optional parametar C<limit_mfn> is set, it will read just 500 records
from database in example above.

Returns number of last record read into memory (size of database, really).

C<lookup> argument is an array of lookups to create. Each lookup must have C<key> and
C<val>. Optional parametar C<eval> is perl code to evaluate before storing
value in index.

 lookup => [
  { 'key' => 'd:v900', 'val' => 'v250^a' },
  { 'eval' => '"v901^a" eq "Podruèje"',
    'key' => 'pa:v561^4:v562^4:v461^1',
    'val' => 'v900' },
 ]

=cut

sub open_isis {
        my $self = shift;
        my $arg = {@_};

        croak "need filename" if (! $arg->{'filename'});
        my $code_page = $arg->{'code_page'} || '852';

        use OpenIsis;

        #$self->{'isis_code_page'} = $code_page;

        # create Text::Iconv object
        my $cp = Text::Iconv->new($code_page,$self->{'code_page'});

        print STDERR "reading ISIS database '",$arg->{'filename'},"'\n" if ($self->{'debug'});

        my $isis_db = OpenIsis::open($arg->{'filename'});

        my $maxmfn = OpenIsis::maxRowid( $isis_db ) || 1;

        $maxmfn = $self->{limit_mfn} if ($self->{limit_mfn});

        print STDERR "processing $maxmfn records...\n" if ($self->{'debug'});

        # read database
        for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) {

                # read record
                my $row = OpenIsis::read( $isis_db, $mfn );
                foreach my $k (keys %{$row}) {
                        if ($k ne "mfn") {
                                foreach my $l (@{$row->{$k}}) {
                                        $l = $cp->convert($l);
                                        # has subfields?
                                        my $val;
                                        if ($l =~ m/\^/) {
                                                foreach my $t (split(/\^/,$l)) {
                                                        next if (! $t);
                                                        $val->{substr($t,0,1)} = substr($t,1);
                                                }
                                        } else {
                                                $val = $l;
                                        }

                                        push @{$self->{'data'}->{$mfn}->{$k}}, $val;
                                }
                        }

                }

                # create lookup
                my $rec = $self->{'data'}->{$mfn};
                $self->create_lookup($rec, @{$arg->{'lookup'}});

        }

        $self->{'current_mfn'} = 1;

        # store max mfn and return it.
        return $self->{'max_mfn'} = $maxmfn;
}

=head2 fetch_rec

Fetch next record from database. It will also display progress bar (once
it's implemented, that is).

 my $rec = $webpac->fetch_rec;

=cut

sub fetch_rec {
        my $self = shift;

        my $mfn = $self->{'current_mfn'}++ || confess "it seems that you didn't load database!";

        if ($mfn > $self->{'max_mfn'}) {
                $self->{'current_mfn'} = $self->{'max_mfn'};
                return;
        }

        return $self->{'data'}->{$mfn};
}

=head2 open_import_xml

Read file from C<import_xml/> directory and parse it.

 $webpac->open_import_xml(type => 'isis');

=cut

sub open_import_xml {
        my $self = shift;

        my $arg = {@_};
        confess "need type to load file from import_xml/" if (! $arg->{'type'});

        $self->{'type'} = $arg->{'type'};

        my $type_base = $arg->{'type'};
        $type_base =~ s/_.*$//g;

        $self->{'tag'} = $type2tag{$type_base};

        print STDERR "using type ",$self->{'type'}," tag ",$self->{'tag'},"\n" if ($self->{'debug'});

        my $f = "./import_xml/".$self->{'type'}.".xml";
        confess "import_xml file '$f' doesn't exist!" if (! -e "$f");

        print STDERR "reading '$f'\n" if ($self->{'debug'});

        $self->{'import_xml'} = XMLin($f,
                ForceArray => [ $self->{'tag'}, 'config', 'format' ],
                ForceContent => 1
        );

        print Dumper($self->{'import_xml'});

}

=head2 create_lookup

Create lookup from record using lookup definition.

=cut

sub create_lookup {
        my $self = shift;

        my $rec = shift || confess "need record to create lookup";
        confess("need HASH as first argument!") if ($rec !~ /HASH/o);

        foreach my $i (@_) {
                if ($i->{'eval'}) {
                        my $eval = $self->fill_in($rec,$i->{'eval'});
                        my $key = $self->fill_in($rec,$i->{'key'});
                        my @val = $self->fill_in($rec,$i->{'val'});
                        if ($key && @val && eval $eval) {
                                push @{$self->{'lookup'}->{$key}}, @val;
                        }
                } else {
                        my $key = $self->fill_in($rec,$i->{'key'});
                        my @val = $self->fill_in($rec,$i->{'val'});
                        if ($key && @val) {
                                push @{$self->{'lookup'}->{$key}}, @val;
                        }
                }
        }
}

=head2 get_data

Returns value from record.

 $self->get_data(\$rec,$f,$sf,$i,\$found);

Arguments are:
record reference C<$rec>,
field C<$f>,
optional subfiled C<$sf>,
index for repeatable values C<$i>.

Optinal variable C<$found> will be incremeted if thre
is field.

Returns value or empty string.

=cut

sub get_data {
        my $self = shift;

        my ($rec,$f,$sf,$i,$found) = @_;
        if ($$rec->{$f}) {
                if ($sf && $$rec->{$f}->[$i]->{$sf}) {
                        $$found++ if (defined($$found));
                        return $$rec->{$f}->[$i]->{$sf};
                } elsif ($$rec->{$f}->[$i]) {
                        $$found++ if (defined($$found));
                        # it still might have subfield, just
                        # not specified, so we'll dump all
                        if ($$rec->{$f}->[$i] =~ /HASH/o) {
                                my $out;
                                foreach my $k (keys %{$$rec->{$f}->[$i]}) {
                                        $out .= $$rec->{$f}->[$i]->{$k}." ";
                                }
                                return $out;
                        } else {
                                return $$rec->{$f}->[$i];
                        }
                }
        } else {
                return '';
        }
}

=head2 fill_in

Workhourse of all: takes record from in-memory structure of database and
strings with placeholders and returns string or array of with substituted
values from record.

 $webpac->fill_in($rec,'v250^a');

Optional argument is ordinal number for repeatable fields. By default,
it's assume to be first repeatable field (fields are perl array, so first
element is 0).
Following example will read second value from repeatable field.

 $webpac->fill_in($rec,'Title: v250^a',1);

This function B<does not> perform parsing of format to inteligenty skip
delimiters before fields which aren't used.

=cut

sub fill_in {
        my $self = shift;

        my $rec = shift || confess "need data record";
        my $format = shift || confess "need format to parse";
        # iteration (for repeatable fields)
        my $i = shift || 0;

        # FIXME remove for speedup?
        confess("need HASH as first argument!") if ($rec !~ /HASH/o);

        my $found = 0;

        my $eval_code;
        # remove eval{...} from beginning
        $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);

        # do actual replacement of placeholders
        $format =~ s/v(\d+)(?:\^(\w))*/$self->get_data(\$rec,$1,$2,$i,\$found)/ges;

        if ($found) {
                if ($eval_code) {
                        my $eval = $self->fill_in($rec,$eval_code,$i);
                        return if (! eval $eval);
                }
                # do we have lookups?
                if ($format =~ /\[[^\[\]]+\]/o) {
                        return $self->lookup($format);
                } else {
                        return $format;
                }
        } else {
                return;
        }
}

=head2 lookup

Perform lookups on format supplied to it.

 my $txt = $self->lookup('[v900]');

Lookups can be nested (like C<[d:[a:[v900]]]>).

=cut

sub lookup {
        my $self = shift;

        my $tmp = shift || confess "need format";

        if ($tmp =~ /\[[^\[\]]+\]/o) {
                my @in = ( $tmp );
#print "##lookup $tmp\n";
                my @out;
                while (my $f = shift @in) {
                        if ($f =~ /\[([^\[\]]+)\]/) {
                                my $k = $1;
                                if ($self->{'lookup'}->{$k}) {
#print "## lookup key = $k\n";
                                        foreach my $nv (@{$self->{'lookup'}->{$k}}) {
                                                my $tmp2 = $f;
                                                $tmp2 =~ s/\[$k\]/$nv/g;
                                                push @in, $tmp2;
#print "## lookup in => $tmp2\n";
                                        }
                                } else {
                                        undef $f;
                                }
                        } elsif ($f) {
                                push @out, $f;
#print "## lookup out => $f\n";
                        }
                }
                return @out;
        } else {
                return $tmp;
        }
}

=head2 parse

Perform smart parsing of string, skipping delimiters for fields which aren't
defined. It can also eval code in format starting with C<eval{...}> and
return output or nothing depending on eval code.

 $webpac->parse($rec,'eval{"v901^a" eq "Deskriptor"}descriptor: v250^a', $i);

=cut

sub parse {
        my $self = shift;

        my ($rec, $format_utf8, $i) = @_;

        return if (! $format_utf8);

        confess("need HASH as first argument!") if ($rec !~ /HASH/o);
        confess("need utf2cp Text::Iconv object!") if (! $self->{'utf2cp'});

        $i = 0 if (! $i);

        my $format = $self->{'utf2cp'}->convert($format_utf8) || confess("can't convert '$format_utf8' from UTF-8 to ",$self->{'code_page'});

        my @out;

        my $eval_code;
        # remove eval{...} from beginning
        $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);

        my $prefix;
        my $all_found=0;

#print "## $format\n";
        while ($format =~ s/^(.*?)v(\d+)(?:\^(\w))*//s) {
#print "## [ $1 | $2 | $3 ] $format\n";

                my $del = $1 || '';
                $prefix ||= $del if ($all_found == 0);

                my $found = 0;
                my $tmp = $self->get_data(\$rec,$2,$3,$i,\$found);

                if ($found) {
                        push @out, $del;
                        push @out, $tmp;
                        $all_found += $found;
                }
        }

        return if (! $all_found);

        my $out = join('',@out) . $format;

        # add prefix if not there
        $out = $prefix . $out if ($out !~ m/^\Q$prefix\E/);
        
        if ($eval_code) {
                my $eval = $self->fill_in($rec,$eval_code,$i);
                return if (! eval $eval);
        }

        return $out;
}

=head2 data_structure

Create in-memory data structure which represents layout from C<import_xml>.
It is used later to produce output.

 my $ds = $webpac->data_structure($rec);

=cut

# private method _sort_by_order
# sort subrouting using order="" attribute
sub _sort_by_order {
        my $self = shift;

        my $va = $self->{'import_xml'}->{'indexer'}->{$a}->{'order'} ||
                $self->{'import_xml'}->{'indexer'}->{$a};
        my $vb = $self->{'import_xml'}->{'indexer'}->{$b}->{'order'} ||
                $self->{'import_xml'}->{'indexer'}->{$b};

        return $va <=> $vb;
}

sub data_structure {
        my $self = shift;

        my $rec = shift;
        confess("need HASH as first argument!") if ($rec !~ /HASH/o);

        my @sorted_tags;
        if ($self->{tags_by_order}) {
                @sorted_tags = @{$self->{tags_by_order}};
        } else {
                @sorted_tags = sort { $self->_sort_by_order } keys %{$self->{'import_xml'}->{'indexer'}};
                $self->{tags_by_order} = \@sorted_tags;
        }

        my $ds;

        foreach my $field (@sorted_tags) {

                my $row;
                my $i = 0;

#print "field $field [",$self->{'tag'},"] = ",Dumper($self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}});

                foreach my $tag (@{$self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}}}) {

                        my $v = $self->parse($rec,$tag->{'content'},$i);
print "## $i:",$tag->{'content'}," = ",($v || 'null'),"\n";

                        next if (!$v || $v && $v eq '');

                        # does tag have type?
                        if ($tag->{'type'}) {
                                push @{$row->{$tag->{'type'}}}, $v;
                        } else {
                                push @{$row->{'display'}}, $v;
                                push @{$row->{'swish'}}, $v;
                        }
                }

                push @{$ds->{$field}}, $row if ($row);

        }

        print Dumper($ds);

}

1;
1	package WebPAC;
2
3	use Carp;
4	use Text::Iconv;
5	use Config::IniFiles;
6	use XML::Simple;
7
8	use Data::Dumper;
9
10	=head1 NAME
11
12	WebPAC - base class for WebPAC
13
14	=head1 DESCRIPTION
15
16	This module implements methods used by WebPAC.
17
18	=head1 METHODS
19
20	=head2 new
21
22	This will create new instance of WebPAC using configuration specified by C<config_file>.
23
24	my $webpac = new WebPAC(
25	config_file => 'name.conf',
26	[code_page => 'ISO-8859-2',]
27	);
28
29	Default C<code_page> is C<ISO-8859-2>.
30
31	It will also read configuration files
32	C<global.conf> (used by indexer and Web font-end)
33	and configuration file specified by C<config_file>
34	which describes databases to be indexed.
35
36	=cut
37
38	# mapping between data type and tag which specify
39	# format in XML file
40	my %type2tag = (
41	'isis' => 'isis',
42	# 'excel' => 'column',
43	# 'marc' => 'marc',
44	# 'feed' => 'feed'
45	);
46
47	sub new {
48	my $class = shift;
49	my $self = {@_};
50	bless($self, $class);
51
52	# fill in default values
53	# output codepage
54	$self->{'code_page'} = 'ISO-8859-2' if (! $self->{'code_page'});
55
56	#
57	# read global.conf
58	#
59
60	$self->{global_config_file} = new Config::IniFiles( -file => 'global.conf' ) \|\| croak "can't open 'global.conf'";
61
62	# read global config parametars
63	foreach my $var (qw(
64	dbi_dbd
65	dbi_dsn
66	dbi_user
67	dbi_passwd
68	show_progress
69	my_unac_filter
70	)) {
71	$self->{global_config}->{$var} = $self->{global_config_file}->val('global', $var);
72	}
73
74	#
75	# read indexer config file
76	#
77
78	$self->{indexer_config_file} = new Config::IniFiles( -file => $self->{config_file} ) \|\| croak "can't open '$self->{config_file}'";
79
80	$self->{'utf2cp'} = Text::Iconv->new('UTF-8' ,$self->{'code_page'});
81	return $self;
82	}
83
84	=head2 open_isis
85
86	Open CDS/ISIS database using OpenIsis module and read all records to memory.
87
88	$webpac->open_isis(
89	filename => '/data/ISIS/ISIS',
90	code_page => '852',
91	limit_mfn => '500',
92	lookup => [ ... ],
93	);
94
95	By default, ISIS code page is assumed to be C<852>.
96
97	If optional parametar C<limit_mfn> is set, it will read just 500 records
98	from database in example above.
99
100	Returns number of last record read into memory (size of database, really).
101
102	C<lookup> argument is an array of lookups to create. Each lookup must have C<key> and
103	C<val>. Optional parametar C<eval> is perl code to evaluate before storing
104	value in index.
105
106	lookup => [
107	{ 'key' => 'd:v900', 'val' => 'v250^a' },
108	{ 'eval' => '"v901^a" eq "Podruèje"',
109	'key' => 'pa:v561^4:v562^4:v461^1',
110	'val' => 'v900' },
111	]
112
113	=cut
114
115	sub open_isis {
116	my $self = shift;
117	my $arg = {@_};
118
119	croak "need filename" if (! $arg->{'filename'});
120	my $code_page = $arg->{'code_page'} \|\| '852';
121
122	use OpenIsis;
123
124	#$self->{'isis_code_page'} = $code_page;
125
126	# create Text::Iconv object
127	my $cp = Text::Iconv->new($code_page,$self->{'code_page'});
128
129	print STDERR "reading ISIS database '",$arg->{'filename'},"'\n" if ($self->{'debug'});
130
131	my $isis_db = OpenIsis::open($arg->{'filename'});
132
133	my $maxmfn = OpenIsis::maxRowid( $isis_db ) \|\| 1;
134
135	$maxmfn = $self->{limit_mfn} if ($self->{limit_mfn});
136
137	print STDERR "processing $maxmfn records...\n" if ($self->{'debug'});
138
139	# read database
140	for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) {
141
142	# read record
143	my $row = OpenIsis::read( $isis_db, $mfn );
144	foreach my $k (keys %{$row}) {
145	if ($k ne "mfn") {
146	foreach my $l (@{$row->{$k}}) {
147	$l = $cp->convert($l);
148	# has subfields?
149	my $val;
150	if ($l =~ m/\^/) {
151	foreach my $t (split(/\^/,$l)) {
152	next if (! $t);
153	$val->{substr($t,0,1)} = substr($t,1);
154	}
155	} else {
156	$val = $l;
157	}
158
159	push @{$self->{'data'}->{$mfn}->{$k}}, $val;
160	}
161	}
162
163	}
164
165	# create lookup
166	my $rec = $self->{'data'}->{$mfn};
167	$self->create_lookup($rec, @{$arg->{'lookup'}});
168
169	}
170
171	$self->{'current_mfn'} = 1;
172
173	# store max mfn and return it.
174	return $self->{'max_mfn'} = $maxmfn;
175	}
176
177	=head2 fetch_rec
178
179	Fetch next record from database. It will also display progress bar (once
180	it's implemented, that is).
181
182	my $rec = $webpac->fetch_rec;
183
184	=cut
185
186	sub fetch_rec {
187	my $self = shift;
188
189	my $mfn = $self->{'current_mfn'}++ \|\| confess "it seems that you didn't load database!";
190
191	if ($mfn > $self->{'max_mfn'}) {
192	$self->{'current_mfn'} = $self->{'max_mfn'};
193	return;
194	}
195
196	return $self->{'data'}->{$mfn};
197	}
198
199	=head2 open_import_xml
200
201	Read file from C<import_xml/> directory and parse it.
202
203	$webpac->open_import_xml(type => 'isis');
204
205	=cut
206
207	sub open_import_xml {
208	my $self = shift;
209
210	my $arg = {@_};
211	confess "need type to load file from import_xml/" if (! $arg->{'type'});
212
213	$self->{'type'} = $arg->{'type'};
214
215	my $type_base = $arg->{'type'};
216	$type_base =~ s/_.*$//g;
217
218	$self->{'tag'} = $type2tag{$type_base};
219
220	print STDERR "using type ",$self->{'type'}," tag ",$self->{'tag'},"\n" if ($self->{'debug'});
221
222	my $f = "./import_xml/".$self->{'type'}.".xml";
223	confess "import_xml file '$f' doesn't exist!" if (! -e "$f");
224
225	print STDERR "reading '$f'\n" if ($self->{'debug'});
226
227	$self->{'import_xml'} = XMLin($f,
228	ForceArray => [ $self->{'tag'}, 'config', 'format' ],
229	ForceContent => 1
230	);
231
232	print Dumper($self->{'import_xml'});
233
234	}
235
236	=head2 create_lookup
237
238	Create lookup from record using lookup definition.
239
240	=cut
241
242	sub create_lookup {
243	my $self = shift;
244
245	my $rec = shift \|\| confess "need record to create lookup";
246	confess("need HASH as first argument!") if ($rec !~ /HASH/o);
247
248	foreach my $i (@_) {
249	if ($i->{'eval'}) {
250	my $eval = $self->fill_in($rec,$i->{'eval'});
251	my $key = $self->fill_in($rec,$i->{'key'});
252	my @val = $self->fill_in($rec,$i->{'val'});
253	if ($key && @val && eval $eval) {
254	push @{$self->{'lookup'}->{$key}}, @val;
255	}
256	} else {
257	my $key = $self->fill_in($rec,$i->{'key'});
258	my @val = $self->fill_in($rec,$i->{'val'});
259	if ($key && @val) {
260	push @{$self->{'lookup'}->{$key}}, @val;
261	}
262	}
263	}
264	}
265
266	=head2 get_data
267
268	Returns value from record.
269
270	$self->get_data(\$rec,$f,$sf,$i,\$found);
271
272	Arguments are:
273	record reference C<$rec>,
274	field C<$f>,
275	optional subfiled C<$sf>,
276	index for repeatable values C<$i>.
277
278	Optinal variable C<$found> will be incremeted if thre
279	is field.
280
281	Returns value or empty string.
282
283	=cut
284
285	sub get_data {
286	my $self = shift;
287
288	my ($rec,$f,$sf,$i,$found) = @_;
289	if ($$rec->{$f}) {
290	if ($sf && $$rec->{$f}->[$i]->{$sf}) {
291	$$found++ if (defined($$found));
292	return $$rec->{$f}->[$i]->{$sf};
293	} elsif ($$rec->{$f}->[$i]) {
294	$$found++ if (defined($$found));
295	# it still might have subfield, just
296	# not specified, so we'll dump all
297	if ($$rec->{$f}->[$i] =~ /HASH/o) {
298	my $out;
299	foreach my $k (keys %{$$rec->{$f}->[$i]}) {
300	$out .= $$rec->{$f}->[$i]->{$k}." ";
301	}
302	return $out;
303	} else {
304	return $$rec->{$f}->[$i];
305	}
306	}
307	} else {
308	return '';
309	}
310	}
311
312	=head2 fill_in
313
314	Workhourse of all: takes record from in-memory structure of database and
315	strings with placeholders and returns string or array of with substituted
316	values from record.
317
318	$webpac->fill_in($rec,'v250^a');
319
320	Optional argument is ordinal number for repeatable fields. By default,
321	it's assume to be first repeatable field (fields are perl array, so first
322	element is 0).
323	Following example will read second value from repeatable field.
324
325	$webpac->fill_in($rec,'Title: v250^a',1);
326
327	This function B<does not> perform parsing of format to inteligenty skip
328	delimiters before fields which aren't used.
329
330	=cut
331
332	sub fill_in {
333	my $self = shift;
334
335	my $rec = shift \|\| confess "need data record";
336	my $format = shift \|\| confess "need format to parse";
337	# iteration (for repeatable fields)
338	my $i = shift \|\| 0;
339
340	# FIXME remove for speedup?
341	confess("need HASH as first argument!") if ($rec !~ /HASH/o);
342
343	my $found = 0;
344
345	my $eval_code;
346	# remove eval{...} from beginning
347	$eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);
348
349	# do actual replacement of placeholders
350	$format =~ s/v(\d+)(?:\^(\w))*/$self->get_data(\$rec,$1,$2,$i,\$found)/ges;
351
352	if ($found) {
353	if ($eval_code) {
354	my $eval = $self->fill_in($rec,$eval_code,$i);
355	return if (! eval $eval);
356	}
357	# do we have lookups?
358	if ($format =~ /\[[^\[\]]+\]/o) {
359	return $self->lookup($format);
360	} else {
361	return $format;
362	}
363	} else {
364	return;
365	}
366	}
367
368	=head2 lookup
369
370	Perform lookups on format supplied to it.
371
372	my $txt = $self->lookup('[v900]');
373
374	Lookups can be nested (like C<[d:[a:[v900]]]>).
375
376	=cut
377
378	sub lookup {
379	my $self = shift;
380
381	my $tmp = shift \|\| confess "need format";
382
383	if ($tmp =~ /\[[^\[\]]+\]/o) {
384	my @in = ( $tmp );
385	#print "##lookup $tmp\n";
386	my @out;
387	while (my $f = shift @in) {
388	if ($f =~ /\[([^\[\]]+)\]/) {
389	my $k = $1;
390	if ($self->{'lookup'}->{$k}) {
391	#print "## lookup key = $k\n";
392	foreach my $nv (@{$self->{'lookup'}->{$k}}) {
393	my $tmp2 = $f;
394	$tmp2 =~ s/\[$k\]/$nv/g;
395	push @in, $tmp2;
396	#print "## lookup in => $tmp2\n";
397	}
398	} else {
399	undef $f;
400	}
401	} elsif ($f) {
402	push @out, $f;
403	#print "## lookup out => $f\n";
404	}
405	}
406	return @out;
407	} else {
408	return $tmp;
409	}
410	}
411
412	=head2 parse
413
414	Perform smart parsing of string, skipping delimiters for fields which aren't
415	defined. It can also eval code in format starting with C<eval{...}> and
416	return output or nothing depending on eval code.
417
418	$webpac->parse($rec,'eval{"v901^a" eq "Deskriptor"}descriptor: v250^a', $i);
419
420	=cut
421
422	sub parse {
423	my $self = shift;
424
425	my ($rec, $format_utf8, $i) = @_;
426
427	return if (! $format_utf8);
428
429	confess("need HASH as first argument!") if ($rec !~ /HASH/o);
430	confess("need utf2cp Text::Iconv object!") if (! $self->{'utf2cp'});
431
432	$i = 0 if (! $i);
433
434	my $format = $self->{'utf2cp'}->convert($format_utf8) \|\| confess("can't convert '$format_utf8' from UTF-8 to ",$self->{'code_page'});
435
436	my @out;
437
438	my $eval_code;
439	# remove eval{...} from beginning
440	$eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);
441
442	my $prefix;
443	my $all_found=0;
444
445	#print "## $format\n";
446	while ($format =~ s/^(.?)v(\d+)(?:\^(\w))//s) {
447	#print "## [ $1 \| $2 \| $3 ] $format\n";
448
449	my $del = $1 \|\| '';
450	$prefix \|\|= $del if ($all_found == 0);
451
452	my $found = 0;
453	my $tmp = $self->get_data(\$rec,$2,$3,$i,\$found);
454
455	if ($found) {
456	push @out, $del;
457	push @out, $tmp;
458	$all_found += $found;
459	}
460	}
461
462	return if (! $all_found);
463
464	my $out = join('',@out) . $format;
465
466	# add prefix if not there
467	$out = $prefix . $out if ($out !~ m/^\Q$prefix\E/);
468
469	if ($eval_code) {
470	my $eval = $self->fill_in($rec,$eval_code,$i);
471	return if (! eval $eval);
472	}
473
474	return $out;
475	}
476
477	=head2 data_structure
478
479	Create in-memory data structure which represents layout from C<import_xml>.
480	It is used later to produce output.
481
482	my $ds = $webpac->data_structure($rec);
483
484	=cut
485
486	# private method _sort_by_order
487	# sort subrouting using order="" attribute
488	sub _sort_by_order {
489	my $self = shift;
490
491	my $va = $self->{'import_xml'}->{'indexer'}->{$a}->{'order'} \|\|
492	$self->{'import_xml'}->{'indexer'}->{$a};
493	my $vb = $self->{'import_xml'}->{'indexer'}->{$b}->{'order'} \|\|
494	$self->{'import_xml'}->{'indexer'}->{$b};
495
496	return $va <=> $vb;
497	}
498
499	sub data_structure {
500	my $self = shift;
501
502	my $rec = shift;
503	confess("need HASH as first argument!") if ($rec !~ /HASH/o);
504
505	my @sorted_tags;
506	if ($self->{tags_by_order}) {
507	@sorted_tags = @{$self->{tags_by_order}};
508	} else {
509	@sorted_tags = sort { $self->_sort_by_order } keys %{$self->{'import_xml'}->{'indexer'}};
510	$self->{tags_by_order} = \@sorted_tags;
511	}
512
513	my $ds;
514
515	foreach my $field (@sorted_tags) {
516
517	my $row;
518	my $i = 0;
519
520	#print "field $field [",$self->{'tag'},"] = ",Dumper($self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}});
521
522	foreach my $tag (@{$self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}}}) {
523
524	my $v = $self->parse($rec,$tag->{'content'},$i);
525	print "## $i:",$tag->{'content'}," = ",($v \|\| 'null'),"\n";
526
527	next if (!$v \|\| $v && $v eq '');
528
529	# does tag have type?
530	if ($tag->{'type'}) {
531	push @{$row->{$tag->{'type'}}}, $v;
532	} else {
533	push @{$row->{'display'}}, $v;
534	push @{$row->{'swish'}}, $v;
535	}
536	}
537
538	push @{$ds->{$field}}, $row if ($row);
539
540	}
541
542	print Dumper($ds);
543
544	}
545
546	1;