1 |
#!/usr/bin/perl -w |
2 |
|
3 |
use Test::More tests => 140; |
4 |
use Test::Exception; |
5 |
use Cwd qw/abs_path/; |
6 |
use blib; |
7 |
use strict; |
8 |
use Data::Dumper; |
9 |
|
10 |
BEGIN { |
11 |
use_ok( 'WebPAC::Normalize::XML' ); |
12 |
} |
13 |
|
14 |
ok(my $abs_path = abs_path($0), "abs_path"); |
15 |
$abs_path =~ s#/[^/]*$#/#; |
16 |
#diag "abs_path: $abs_path"; |
17 |
|
18 |
throws_ok { new WebPAC::Normalize::XML( lookup_regex => 'foo' ) } qr/pair/, "lookup_regex without lookup"; |
19 |
throws_ok { new WebPAC::Normalize::XML( lookup => 'bar' ) } qr/pair/, "lookup without lookup_regex"; |
20 |
|
21 |
ok(my $n = new WebPAC::Normalize::XML( |
22 |
debug => 0, |
23 |
filter => { |
24 |
regex => sub { |
25 |
my ($val, $regex) = @_; |
26 |
eval "\$val =~ $regex"; |
27 |
return $val; |
28 |
}, |
29 |
}, |
30 |
), "new"); |
31 |
|
32 |
throws_ok { $n->open() } qr/tag/, "open without tag"; |
33 |
throws_ok { $n->open( tag => 'isis' ) } qr/xml_file/, "open without xml_file"; |
34 |
throws_ok { $n->open( tag => 'isis', xml_file => '/foo/bar/baz' ) } qr/file.*doesn't exist/, "open with invalid xml_file"; |
35 |
ok( $n->open( |
36 |
tag => 'isis', |
37 |
xml_file => "$abs_path/data/normalize.xml", |
38 |
), "open"); |
39 |
|
40 |
my $rec = { |
41 |
'675' => [ |
42 |
{ |
43 |
'a' => '159.9' |
44 |
} |
45 |
], |
46 |
'210' => [ |
47 |
{ |
48 |
'c' => 'New York University press', |
49 |
'a' => 'New York', |
50 |
'd' => 'cop. 1988' |
51 |
} |
52 |
], |
53 |
'700' => [ |
54 |
{ |
55 |
'a' => 'Haynal', |
56 |
'b' => 'André' |
57 |
} |
58 |
], |
59 |
'801' => [ |
60 |
'FFZG' |
61 |
], |
62 |
'991' => [ |
63 |
'8302' |
64 |
], |
65 |
'000' => [ |
66 |
1 |
67 |
], |
68 |
'702' => [ |
69 |
{ |
70 |
'a' => 'Holder', |
71 |
'b' => 'Elizabeth' |
72 |
} |
73 |
], |
74 |
'215' => [ |
75 |
{ |
76 |
'c' => 'ilustr', |
77 |
'a' => 'xix, 202 str', |
78 |
'd' => '23cm' |
79 |
} |
80 |
], |
81 |
'990' => [ |
82 |
'2140', |
83 |
'88', |
84 |
'HAY' |
85 |
], |
86 |
'200' => [ |
87 |
{ |
88 |
'e' => 'from Freud and Ferenczi to Michael balint', |
89 |
'a' => 'Controversies in psychoanalytic method', |
90 |
'g' => 'translated by Elizabeth Holder on the basisi of a first draft by Archie Hooton ; with a preface by Daniel N. Stern', |
91 |
'f' => 'by André E. Haynal' |
92 |
} |
93 |
], |
94 |
'610' => [ |
95 |
'povijest psihoanalize' |
96 |
], |
97 |
'994' => [ |
98 |
{ |
99 |
'c' => '', |
100 |
'a' => 'PS', |
101 |
'b' => 'MG' |
102 |
} |
103 |
], |
104 |
'320' => [ |
105 |
'Kazalo' |
106 |
], |
107 |
'101' => [ |
108 |
'ENG' |
109 |
], |
110 |
'686' => [ |
111 |
'2140' |
112 |
], |
113 |
'300' => [ |
114 |
'Prijevod djela: ' |
115 |
] |
116 |
}; |
117 |
|
118 |
foreach my $fld (keys %$rec) { |
119 |
my $r = 0; |
120 |
foreach my $item ($rec->{$fld}) { |
121 |
if (ref($item) eq 'HASH') { |
122 |
foreach my $sf (keys %$item) { |
123 |
my $found = 0; |
124 |
ok($n->get_data(\$rec, $fld, $sf, $r, \$found), "v${fld}^${sf} / $r"); |
125 |
ok($found, "found"); |
126 |
} |
127 |
my $found = 0; |
128 |
ok(! $n->get_data(\$rec, $fld, 'x', $r, \$found), "no v${fld}^x / $r"); |
129 |
ok(! $found, "not found"); |
130 |
} else { |
131 |
my $found = 0; |
132 |
ok($n->get_data(\$rec, $fld, undef, $r, \$found), "v${fld} / $r"); |
133 |
ok($found, "found"); |
134 |
} |
135 |
} |
136 |
my $found = 0; |
137 |
ok(! $n->get_data(\$rec, '999', '', $r, \$found), "no v${fld} / $r"); |
138 |
ok(! $found, "not found"); |
139 |
} |
140 |
|
141 |
ok(my $ds = $n->data_structure( $rec ), "data_structure"); |
142 |
|
143 |
#diag Dumper($rec, $ds); |
144 |
|
145 |
# fake load of our test normalisation data |
146 |
$n->{tag} = 'isis'; |
147 |
|
148 |
#diag Dumper($n->{import_xml}->{indexer}); |
149 |
|
150 |
|
151 |
$rec = { |
152 |
'900' => [ { |
153 |
'a' => '1', |
154 |
'b' => '2', |
155 |
'c' => '3', |
156 |
'x' => 'yap', |
157 |
} ], |
158 |
}; |
159 |
|
160 |
my $import = { |
161 |
'Tag' => { 'isis' => [ |
162 |
{ content => 'v900^a + v900^b = v900^c [v900^x]' }, |
163 |
] }, |
164 |
}; |
165 |
|
166 |
sub parse_test($$$$$) { |
167 |
my ($import,$tag,$rec,$i,$r) = @_; |
168 |
$n->{import_xml}->{indexer} = $import; |
169 |
# erase internal cache (yak!) |
170 |
delete($n->{tags_by_order}); |
171 |
push @{$rec->{'000'}}, 42 unless ($rec->{'000'}); |
172 |
#diag "test normalisation of: ",Dumper($n->{import_xml}->{indexer}, $rec); |
173 |
ok(my $ds = $n->data_structure( $rec ), "data_structure"); |
174 |
#diag Dumper($ds->{$tag}->{display}); # if ($i == 0); |
175 |
cmp_ok($ds->{$tag}->{display}->[$i], 'eq', $r, "parse $tag/$i - $r"); |
176 |
} |
177 |
|
178 |
parse_test($import, 'Tag', $rec, 0, '1 + 2 = 3 [yap]'); |
179 |
|
180 |
delete($rec->{'900'}->[0]->{'b'}); |
181 |
parse_test($import, 'Tag', $rec, 0, '1 = 3 [yap]'); |
182 |
|
183 |
$rec->{'900'}->[0]->{'b'} = 5; |
184 |
$rec->{'900'}->[0]->{'c'} = 6; |
185 |
parse_test($import, 'Tag', $rec, 0, '1 + 5 = 6 [yap]'); |
186 |
|
187 |
delete($rec->{'900'}->[0]->{'c'}); |
188 |
$rec->{'900'}->[0]->{'x'} = 'hmmm'; |
189 |
parse_test($import, 'Tag', $rec, 0, '1 + 5 [hmmm]'); |
190 |
|
191 |
$rec->{'900'}->[0]->{'x'} = 'nope!'; |
192 |
delete($rec->{'900'}->[0]->{'a'}); |
193 |
parse_test($import, 'Tag', $rec, 0, '5 [nope!]'); |
194 |
|
195 |
$rec = { |
196 |
'900' => [ { |
197 |
'b' => 'b1', |
198 |
'x' => 'b1', |
199 |
},{ |
200 |
'a' => 'a1', |
201 |
'b' => 'b2', |
202 |
'x' => 'a1_b2', |
203 |
},{ |
204 |
'b' => 'b3', |
205 |
'c' => 'c1', |
206 |
'x' => 'b3_c1', |
207 |
},{ |
208 |
'a' => 'a2', |
209 |
'b' => 'b4', |
210 |
'c' => 'c2', |
211 |
'x' => 'a2_b4_c2', |
212 |
} ], |
213 |
}; |
214 |
|
215 |
$import = { |
216 |
'Tag' => { 'isis' => [ |
217 |
{ content => '0 v900^a 1 v900^b 2 v900^c 3 v900^x 4' }, |
218 |
] }, |
219 |
}; |
220 |
|
221 |
parse_test($import, 'Tag', $rec, 0, '0 b1 3 b1 4'); |
222 |
parse_test($import, 'Tag', $rec, 1, '0 a1 1 b2 3 a1_b2 4'); |
223 |
parse_test($import, 'Tag', $rec, 2, '0 b3 2 c1 3 b3_c1 4'); |
224 |
parse_test($import, 'Tag', $rec, 3, '0 a2 1 b4 2 c2 3 a2_b4_c2 4'); |
225 |
|
226 |
sub parse_test_arr($$$$) { |
227 |
my ($import, $tag, $rec, $arr) = @_; |
228 |
my $i = 0; |
229 |
foreach my $res (@{$arr}) { |
230 |
parse_test($import, $tag, $rec, $i, $res); |
231 |
$i++; |
232 |
} |
233 |
} |
234 |
|
235 |
$import = { |
236 |
'Tag_a' => { 'isis' => [ |
237 |
{ content => 'v900^a' }, |
238 |
] }, |
239 |
'Tag_b' => { 'isis' => [ |
240 |
{ content => 'v900^b' }, |
241 |
] }, |
242 |
'Tag_c' => { 'isis' => [ |
243 |
{ content => 'v900^c' }, |
244 |
] }, |
245 |
'Tag_x' => { 'isis' => [ |
246 |
{ content => 'v900^x' }, |
247 |
] }, |
248 |
'Tag_s1' => { 'isis' => [ |
249 |
{ content => 'v900^a = v900^c' }, |
250 |
] }, |
251 |
'Tag_noval' => { 'isis' => [ |
252 |
{ content => 'v911^1' }, |
253 |
{ content => 'v900^c' }, |
254 |
] }, |
255 |
}; |
256 |
|
257 |
parse_test_arr($import, 'Tag_a', $rec, [ '','a1','','a2' ] ); |
258 |
parse_test_arr($import, 'Tag_b', $rec, [ 'b1','b2','b3','b4' ] ); |
259 |
parse_test_arr($import, 'Tag_c', $rec, [ '','','c1','c2' ] ); |
260 |
parse_test_arr($import, 'Tag_x', $rec, [ 'b1','a1_b2','b3_c1','a2_b4_c2' ] ); |
261 |
parse_test_arr($import, 'Tag_s1', $rec, [ '', 'a1', 'c1', 'a2 = c2' ] ); |
262 |
parse_test_arr($import, 'Tag_noval', $rec, [ '','','c1','c2' ] ); |
263 |
|