trunk/t/3-normalize-xml.t

#!/usr/bin/perl -w

use Test::More tests => 140;
use Test::Exception;
use Cwd qw/abs_path/;
use blib;
use strict;
use Data::Dumper;

BEGIN {
use_ok( 'WebPAC::Normalize::XML' );
}

ok(my $abs_path = abs_path($0), "abs_path");
$abs_path =~ s#/[^/]*$#/#;
#diag "abs_path: $abs_path";

throws_ok { new WebPAC::Normalize::XML( lookup_regex => 'foo' ) } qr/pair/, "lookup_regex without lookup";
throws_ok { new WebPAC::Normalize::XML( lookup => 'bar' ) } qr/pair/, "lookup without lookup_regex";

ok(my $n = new WebPAC::Normalize::XML(
        debug => 0,
        filter => {
                regex => sub {
                        my ($val, $regex) = @_;
                        eval "\$val =~ $regex";
                        return $val;
                },
        },
), "new");

throws_ok { $n->open() } qr/tag/, "open without tag";
throws_ok { $n->open( tag => 'isis' ) } qr/xml_file/, "open without xml_file";
throws_ok { $n->open( tag => 'isis', xml_file => '/foo/bar/baz' ) } qr/file.*doesn't exist/, "open with invalid xml_file";
ok( $n->open(
        tag => 'isis',
        xml_file => "$abs_path/data/normalize.xml",
), "open");

my $rec = {
 '675' => [
            {
              'a' => '159.9'
            }
          ],
 '210' => [
            {
              'c' => 'New York University press',
              'a' => 'New York',
              'd' => 'cop. 1988'
            }
          ],
 '700' => [
            {
              'a' => 'Haynal',
              'b' => 'André'
            }
          ],
 '801' => [
            'FFZG'
          ],
 '991' => [
            '8302'
          ],
 '000' => [
            1
          ],
 '702' => [
            {
              'a' => 'Holder',
              'b' => 'Elizabeth'
            }
          ],
 '215' => [
            {
              'c' => 'ilustr',
              'a' => 'xix, 202 str',
              'd' => '23cm'
            }
          ],
 '990' => [
            '2140',
            '88',
            'HAY'
          ],
 '200' => [
            {
              'e' => 'from Freud and Ferenczi to Michael balint',
              'a' => 'Controversies in psychoanalytic method',
              'g' => 'translated by Elizabeth Holder on the basisi of a first draft by Archie Hooton ; with a preface by Daniel N. Stern',
              'f' => 'by André E. Haynal'
            }
          ],
 '610' => [
            'povijest psihoanalize'
          ],
 '994' => [
            {
              'c' => '',
              'a' => 'PS',
              'b' => 'MG'
            }
          ],
 '320' => [
            'Kazalo'
          ],
 '101' => [
            'ENG'
          ],
 '686' => [
            '2140'
          ],
 '300' => [
            'Prijevod djela: '
          ]
};

foreach my $fld (keys %$rec) {
        my $r = 0;
        foreach my $item ($rec->{$fld}) {
                if (ref($item) eq 'HASH') {
                        foreach my $sf (keys %$item) {
                                my $found = 0;
                                ok($n->get_data(\$rec, $fld, $sf, $r, \$found), "v${fld}^${sf} / $r");
                                ok($found, "found");
                        }
                        my $found = 0;
                        ok(! $n->get_data(\$rec, $fld, 'x', $r, \$found), "no v${fld}^x / $r");
                        ok(! $found, "not found");
                } else {
                        my $found = 0;
                        ok($n->get_data(\$rec, $fld, undef, $r, \$found), "v${fld} / $r");
                        ok($found, "found");
                }
        }
        my $found = 0;
        ok(! $n->get_data(\$rec, '999', '', $r, \$found), "no v${fld} / $r");
        ok(! $found, "not found");
}

ok(my $ds = $n->data_structure( $rec ), "data_structure");

#diag Dumper($rec, $ds);

# fake load of our test normalisation data
$n->{tag} = 'isis';

#diag Dumper($n->{import_xml}->{indexer});


$rec = {
        '900' => [ {
                'a' => '1',
                'b' => '2',
                'c' => '3',
                'x' => 'yap',
        } ],
};

my $import = {
        'Tag' => { 'isis' => [
                        { content => 'v900^a + v900^b = v900^c [v900^x]' },
        ] },
};

sub parse_test($$$$$) {
        my ($import,$tag,$rec,$i,$r) = @_;
        $n->{import_xml}->{indexer} = $import;
        # erase internal cache (yak!)
        delete($n->{tags_by_order});
        push @{$rec->{'000'}}, 42 unless ($rec->{'000'});
        #diag "test normalisation of: ",Dumper($n->{import_xml}->{indexer}, $rec);
        ok(my $ds = $n->data_structure( $rec ), "data_structure");
        #diag Dumper($ds->{$tag}->{display}); # if ($i == 0);
        cmp_ok($ds->{$tag}->{display}->[$i], 'eq', $r, "parse $tag/$i - $r");
}

parse_test($import, 'Tag', $rec, 0, '1 + 2 = 3 [yap]');

delete($rec->{'900'}->[0]->{'b'});
parse_test($import, 'Tag', $rec, 0, '1 = 3 [yap]');

$rec->{'900'}->[0]->{'b'} = 5;
$rec->{'900'}->[0]->{'c'} = 6;
parse_test($import, 'Tag', $rec, 0, '1 + 5 = 6 [yap]');

delete($rec->{'900'}->[0]->{'c'});
$rec->{'900'}->[0]->{'x'} = 'hmmm';
parse_test($import, 'Tag', $rec, 0, '1 + 5 [hmmm]');

$rec->{'900'}->[0]->{'x'} = 'nope!';
delete($rec->{'900'}->[0]->{'a'});
parse_test($import, 'Tag', $rec, 0, '5 [nope!]');

$rec = {
        '900' => [ {
                'b' => 'b1',
                'x' => 'b1',
        },{
                'a' => 'a1',
                'b' => 'b2',
                'x' => 'a1_b2',
        },{
                'b' => 'b3',
                'c' => 'c1',
                'x' => 'b3_c1',
        },{
                'a' => 'a2',
                'b' => 'b4',
                'c' => 'c2',
                'x' => 'a2_b4_c2',
        } ],
};

$import = {
        'Tag' => { 'isis' => [
                        { content => '0 v900^a 1 v900^b 2 v900^c 3 v900^x 4' },
        ] },
};

parse_test($import, 'Tag', $rec, 0, '0 b1 3 b1 4');
parse_test($import, 'Tag', $rec, 1, '0 a1 1 b2 3 a1_b2 4');
parse_test($import, 'Tag', $rec, 2, '0 b3 2 c1 3 b3_c1 4');
parse_test($import, 'Tag', $rec, 3, '0 a2 1 b4 2 c2 3 a2_b4_c2 4');

sub parse_test_arr($$$$) {
        my ($import, $tag, $rec, $arr) = @_;
        my $i = 0;
        foreach my $res (@{$arr}) {
                parse_test($import, $tag, $rec, $i, $res);
                $i++;
        }
}

$import = {
        'Tag_a' => { 'isis' => [
                { content => 'v900^a' },
        ] },
        'Tag_b' => { 'isis' => [
                { content => 'v900^b' },
        ] },
        'Tag_c' => { 'isis' => [
                { content => 'v900^c' },
        ] },
        'Tag_x' => { 'isis' => [
                { content => 'v900^x' },
        ] },
        'Tag_s1' => { 'isis' => [
                { content => 'v900^a = v900^c' },
        ] },
        'Tag_noval' => { 'isis' => [
                { content => 'v911^1' },
                { content => 'v900^c' },
        ] },
};

parse_test_arr($import, 'Tag_a', $rec, [ '','a1','','a2' ] );
parse_test_arr($import, 'Tag_b', $rec, [ 'b1','b2','b3','b4' ] );
parse_test_arr($import, 'Tag_c', $rec, [ '','','c1','c2' ] );
parse_test_arr($import, 'Tag_x', $rec, [ 'b1','a1_b2','b3_c1','a2_b4_c2' ] );
parse_test_arr($import, 'Tag_s1', $rec, [ '', 'a1', 'c1', 'a2 = c2' ] );
parse_test_arr($import, 'Tag_noval', $rec, [ '','','c1','c2' ] );

1	#!/usr/bin/perl -w
2
3	use Test::More tests => 140;
4	use Test::Exception;
5	use Cwd qw/abs_path/;
6	use blib;
7	use strict;
8	use Data::Dumper;
9
10	BEGIN {
11	use_ok( 'WebPAC::Normalize::XML' );
12	}
13
14	ok(my $abs_path = abs_path($0), "abs_path");
15	$abs_path =~ s#/[^/]*$#/#;
16	#diag "abs_path: $abs_path";
17
18	throws_ok { new WebPAC::Normalize::XML( lookup_regex => 'foo' ) } qr/pair/, "lookup_regex without lookup";
19	throws_ok { new WebPAC::Normalize::XML( lookup => 'bar' ) } qr/pair/, "lookup without lookup_regex";
20
21	ok(my $n = new WebPAC::Normalize::XML(
22	debug => 0,
23	filter => {
24	regex => sub {
25	my ($val, $regex) = @_;
26	eval "\$val =~ $regex";
27	return $val;
28	},
29	},
30	), "new");
31
32	throws_ok { $n->open() } qr/tag/, "open without tag";
33	throws_ok { $n->open( tag => 'isis' ) } qr/xml_file/, "open without xml_file";
34	throws_ok { $n->open( tag => 'isis', xml_file => '/foo/bar/baz' ) } qr/file.*doesn't exist/, "open with invalid xml_file";
35	ok( $n->open(
36	tag => 'isis',
37	xml_file => "$abs_path/data/normalize.xml",
38	), "open");
39
40	my $rec = {
41	'675' => [
42	{
43	'a' => '159.9'
44	}
45	],
46	'210' => [
47	{
48	'c' => 'New York University press',
49	'a' => 'New York',
50	'd' => 'cop. 1988'
51	}
52	],
53	'700' => [
54	{
55	'a' => 'Haynal',
56	'b' => 'André'
57	}
58	],
59	'801' => [
60	'FFZG'
61	],
62	'991' => [
63	'8302'
64	],
65	'000' => [
66	1
67	],
68	'702' => [
69	{
70	'a' => 'Holder',
71	'b' => 'Elizabeth'
72	}
73	],
74	'215' => [
75	{
76	'c' => 'ilustr',
77	'a' => 'xix, 202 str',
78	'd' => '23cm'
79	}
80	],
81	'990' => [
82	'2140',
83	'88',
84	'HAY'
85	],
86	'200' => [
87	{
88	'e' => 'from Freud and Ferenczi to Michael balint',
89	'a' => 'Controversies in psychoanalytic method',
90	'g' => 'translated by Elizabeth Holder on the basisi of a first draft by Archie Hooton ; with a preface by Daniel N. Stern',
91	'f' => 'by André E. Haynal'
92	}
93	],
94	'610' => [
95	'povijest psihoanalize'
96	],
97	'994' => [
98	{
99	'c' => '',
100	'a' => 'PS',
101	'b' => 'MG'
102	}
103	],
104	'320' => [
105	'Kazalo'
106	],
107	'101' => [
108	'ENG'
109	],
110	'686' => [
111	'2140'
112	],
113	'300' => [
114	'Prijevod djela: '
115	]
116	};
117
118	foreach my $fld (keys %$rec) {
119	my $r = 0;
120	foreach my $item ($rec->{$fld}) {
121	if (ref($item) eq 'HASH') {
122	foreach my $sf (keys %$item) {
123	my $found = 0;
124	ok($n->get_data(\$rec, $fld, $sf, $r, \$found), "v${fld}^${sf} / $r");
125	ok($found, "found");
126	}
127	my $found = 0;
128	ok(! $n->get_data(\$rec, $fld, 'x', $r, \$found), "no v${fld}^x / $r");
129	ok(! $found, "not found");
130	} else {
131	my $found = 0;
132	ok($n->get_data(\$rec, $fld, undef, $r, \$found), "v${fld} / $r");
133	ok($found, "found");
134	}
135	}
136	my $found = 0;
137	ok(! $n->get_data(\$rec, '999', '', $r, \$found), "no v${fld} / $r");
138	ok(! $found, "not found");
139	}
140
141	ok(my $ds = $n->data_structure( $rec ), "data_structure");
142
143	#diag Dumper($rec, $ds);
144
145	# fake load of our test normalisation data
146	$n->{tag} = 'isis';
147
148	#diag Dumper($n->{import_xml}->{indexer});
149
150
151	$rec = {
152	'900' => [ {
153	'a' => '1',
154	'b' => '2',
155	'c' => '3',
156	'x' => 'yap',
157	} ],
158	};
159
160	my $import = {
161	'Tag' => { 'isis' => [
162	{ content => 'v900^a + v900^b = v900^c [v900^x]' },
163	] },
164	};
165
166	sub parse_test($$$$$) {
167	my ($import,$tag,$rec,$i,$r) = @_;
168	$n->{import_xml}->{indexer} = $import;
169	# erase internal cache (yak!)
170	delete($n->{tags_by_order});
171	push @{$rec->{'000'}}, 42 unless ($rec->{'000'});
172	#diag "test normalisation of: ",Dumper($n->{import_xml}->{indexer}, $rec);
173	ok(my $ds = $n->data_structure( $rec ), "data_structure");
174	#diag Dumper($ds->{$tag}->{display}); # if ($i == 0);
175	cmp_ok($ds->{$tag}->{display}->[$i], 'eq', $r, "parse $tag/$i - $r");
176	}
177
178	parse_test($import, 'Tag', $rec, 0, '1 + 2 = 3 [yap]');
179
180	delete($rec->{'900'}->[0]->{'b'});
181	parse_test($import, 'Tag', $rec, 0, '1 = 3 [yap]');
182
183	$rec->{'900'}->[0]->{'b'} = 5;
184	$rec->{'900'}->[0]->{'c'} = 6;
185	parse_test($import, 'Tag', $rec, 0, '1 + 5 = 6 [yap]');
186
187	delete($rec->{'900'}->[0]->{'c'});
188	$rec->{'900'}->[0]->{'x'} = 'hmmm';
189	parse_test($import, 'Tag', $rec, 0, '1 + 5 [hmmm]');
190
191	$rec->{'900'}->[0]->{'x'} = 'nope!';
192	delete($rec->{'900'}->[0]->{'a'});
193	parse_test($import, 'Tag', $rec, 0, '5 [nope!]');
194
195	$rec = {
196	'900' => [ {
197	'b' => 'b1',
198	'x' => 'b1',
199	},{
200	'a' => 'a1',
201	'b' => 'b2',
202	'x' => 'a1_b2',
203	},{
204	'b' => 'b3',
205	'c' => 'c1',
206	'x' => 'b3_c1',
207	},{
208	'a' => 'a2',
209	'b' => 'b4',
210	'c' => 'c2',
211	'x' => 'a2_b4_c2',
212	} ],
213	};
214
215	$import = {
216	'Tag' => { 'isis' => [
217	{ content => '0 v900^a 1 v900^b 2 v900^c 3 v900^x 4' },
218	] },
219	};
220
221	parse_test($import, 'Tag', $rec, 0, '0 b1 3 b1 4');
222	parse_test($import, 'Tag', $rec, 1, '0 a1 1 b2 3 a1_b2 4');
223	parse_test($import, 'Tag', $rec, 2, '0 b3 2 c1 3 b3_c1 4');
224	parse_test($import, 'Tag', $rec, 3, '0 a2 1 b4 2 c2 3 a2_b4_c2 4');
225
226	sub parse_test_arr($$$$) {
227	my ($import, $tag, $rec, $arr) = @_;
228	my $i = 0;
229	foreach my $res (@{$arr}) {
230	parse_test($import, $tag, $rec, $i, $res);
231	$i++;
232	}
233	}
234
235	$import = {
236	'Tag_a' => { 'isis' => [
237	{ content => 'v900^a' },
238	] },
239	'Tag_b' => { 'isis' => [
240	{ content => 'v900^b' },
241	] },
242	'Tag_c' => { 'isis' => [
243	{ content => 'v900^c' },
244	] },
245	'Tag_x' => { 'isis' => [
246	{ content => 'v900^x' },
247	] },
248	'Tag_s1' => { 'isis' => [
249	{ content => 'v900^a = v900^c' },
250	] },
251	'Tag_noval' => { 'isis' => [
252	{ content => 'v911^1' },
253	{ content => 'v900^c' },
254	] },
255	};
256
257	parse_test_arr($import, 'Tag_a', $rec, [ '','a1','','a2' ] );
258	parse_test_arr($import, 'Tag_b', $rec, [ 'b1','b2','b3','b4' ] );
259	parse_test_arr($import, 'Tag_c', $rec, [ '','','c1','c2' ] );
260	parse_test_arr($import, 'Tag_x', $rec, [ 'b1','a1_b2','b3_c1','a2_b4_c2' ] );
261	parse_test_arr($import, 'Tag_s1', $rec, [ '', 'a1', 'c1', 'a2 = c2' ] );
262	parse_test_arr($import, 'Tag_noval', $rec, [ '','','c1','c2' ] );
263