1 |
package WebPAC::Normalize; |
package WebPAC::Normalize; |
2 |
use Exporter 'import'; |
use Exporter 'import'; |
3 |
our @EXPORT = qw/ |
our @EXPORT = qw/ |
4 |
_set_rec _set_lookup |
_set_ds _set_lookup |
5 |
_set_load_row |
_set_load_row |
6 |
_get_ds _clean_ds |
_get_ds _clean_ds |
7 |
_debug |
_debug |
13 |
marc_compose marc_leader marc_fixed |
marc_compose marc_leader marc_fixed |
14 |
marc_duplicate marc_remove marc_count |
marc_duplicate marc_remove marc_count |
15 |
marc_original_order |
marc_original_order |
16 |
|
marc_template |
17 |
|
|
18 |
rec1 rec2 rec |
rec1 rec2 rec |
19 |
frec |
frec frec_eq frec_ne |
20 |
regex prefix suffix surround |
regex prefix suffix surround |
21 |
first lookup join_with |
first lookup join_with |
22 |
save_into_lookup |
save_into_lookup |
39 |
# debugging warn(s) |
# debugging warn(s) |
40 |
my $debug = 0; |
my $debug = 0; |
41 |
|
|
42 |
|
# FIXME |
43 |
use WebPAC::Normalize::ISBN; |
use WebPAC::Normalize::ISBN; |
44 |
push @EXPORT, ( 'isbn_10', 'isbn_13' ); |
push @EXPORT, ( 'isbn_10', 'isbn_13' ); |
45 |
|
|
46 |
|
use WebPAC::Normalize::MARC; |
47 |
|
push @EXPORT, ( 'marc_template' ); |
48 |
|
|
49 |
=head1 NAME |
=head1 NAME |
50 |
|
|
51 |
WebPAC::Normalize - describe normalisaton rules using sets |
WebPAC::Normalize - describe normalisaton rules using sets |
52 |
|
|
53 |
=cut |
=cut |
54 |
|
|
55 |
our $VERSION = '0.31'; |
our $VERSION = '0.35'; |
56 |
|
|
57 |
=head1 SYNOPSIS |
=head1 SYNOPSIS |
58 |
|
|
110 |
die "need row argument" unless ($arg->{row}); |
die "need row argument" unless ($arg->{row}); |
111 |
die "need normalisation argument" unless ($arg->{rules}); |
die "need normalisation argument" unless ($arg->{rules}); |
112 |
|
|
|
no strict 'subs'; |
|
113 |
_set_lookup( $arg->{lookup} ) if defined($arg->{lookup}); |
_set_lookup( $arg->{lookup} ) if defined($arg->{lookup}); |
114 |
_set_rec( $arg->{row} ); |
_set_ds( $arg->{row} ); |
115 |
_set_config( $arg->{config} ) if defined($arg->{config}); |
_set_config( $arg->{config} ) if defined($arg->{config}); |
116 |
_clean_ds( %{ $arg } ); |
_clean_ds( %{ $arg } ); |
117 |
$load_row_coderef = $arg->{load_row_coderef}; |
$load_row_coderef = $arg->{load_row_coderef}; |
118 |
|
|
119 |
eval "$arg->{rules}"; |
no strict 'subs'; |
120 |
|
no warnings 'redefine'; |
121 |
|
eval "$arg->{rules};"; |
122 |
die "error evaling $arg->{rules}: $@\n" if ($@); |
die "error evaling $arg->{rules}: $@\n" if ($@); |
123 |
|
|
124 |
return _get_ds(); |
return _get_ds(); |
125 |
} |
} |
126 |
|
|
127 |
=head2 _set_rec |
=head2 _set_ds |
128 |
|
|
129 |
Set current record hash |
Set current record hash |
130 |
|
|
131 |
_set_rec( $rec ); |
_set_ds( $rec ); |
132 |
|
|
133 |
=cut |
=cut |
134 |
|
|
135 |
my $rec; |
my $rec; |
136 |
|
|
137 |
sub _set_rec { |
sub _set_ds { |
138 |
$rec = shift or die "no record hash"; |
$rec = shift or die "no record hash"; |
139 |
} |
} |
140 |
|
|
141 |
|
=head2 |
142 |
|
|
143 |
|
my $rec = _get_rec(); |
144 |
|
|
145 |
|
=cut |
146 |
|
|
147 |
|
sub _get_rec { $rec }; |
148 |
|
|
149 |
=head2 _set_config |
=head2 _set_config |
150 |
|
|
151 |
Set current config hash |
Set current config hash |
186 |
my ($marc_record_offset, $marc_fetch_offset) = (0, 0); |
my ($marc_record_offset, $marc_fetch_offset) = (0, 0); |
187 |
|
|
188 |
sub _get_ds { |
sub _get_ds { |
189 |
|
#warn "## out = ",dump($out); |
|
warn "## out = ",dump($out); |
|
|
|
|
190 |
return $out; |
return $out; |
191 |
} |
} |
192 |
|
|
827 |
return unless defined($rec->{$from}); |
return unless defined($rec->{$from}); |
828 |
|
|
829 |
my $r = $rec->{$from}; |
my $r = $rec->{$from}; |
830 |
die "record field $from isn't array\n" unless (ref($r) eq 'ARRAY'); |
die "record field $from isn't array ",dump( $rec ) unless (ref($r) eq 'ARRAY'); |
831 |
|
|
832 |
my ($i1,$i2) = defined($marc_indicators->{$to}) ? @{ $marc_indicators->{$to} } : (' ',' '); |
my ($i1,$i2) = defined($marc_indicators->{$to}) ? @{ $marc_indicators->{$to} } : (' ',' '); |
833 |
warn "## marc_original_order($to,$from) source = ", dump( $r ),$/ if ($debug > 1); |
warn "## marc_original_order($to,$from) source = ", dump( $r ),$/ if ($debug > 1); |
872 |
warn "## marc_record = ", dump( $marc_record ),$/ if ($debug > 1); |
warn "## marc_record = ", dump( $marc_record ),$/ if ($debug > 1); |
873 |
} |
} |
874 |
|
|
875 |
|
|
876 |
=head2 marc_count |
=head2 marc_count |
877 |
|
|
878 |
Return number of MARC records created using L</marc_duplicate>. |
Return number of MARC records created using L</marc_duplicate>. |
885 |
return $#{ $marc_record }; |
return $#{ $marc_record }; |
886 |
} |
} |
887 |
|
|
888 |
|
=head2 _marc_push |
889 |
|
|
890 |
|
_marc_push( $marc ); |
891 |
|
|
892 |
|
=cut |
893 |
|
|
894 |
|
sub _marc_push { |
895 |
|
my $marc = shift || die "no marc?"; |
896 |
|
push @{ $marc_record->[ $marc_record_offset ] }, $marc; |
897 |
|
} |
898 |
|
|
899 |
|
|
900 |
=head1 Functions to extract data from input |
=head1 Functions to extract data from input |
901 |
|
|
1025 |
|
|
1026 |
=cut |
=cut |
1027 |
|
|
|
sub frec { |
|
|
my @out = rec(@_); |
|
|
warn "rec(",dump(@_),") has more than one return value, ignoring\n" if $#out > 0; |
|
|
return shift @out; |
|
|
} |
|
|
|
|
1028 |
sub rec { |
sub rec { |
1029 |
my @out; |
my @out; |
1030 |
if ($#_ == 0) { |
if ($#_ == 0) { |
1041 |
} |
} |
1042 |
} |
} |
1043 |
|
|
1044 |
|
=head2 frec |
1045 |
|
|
1046 |
|
Returns first value from field |
1047 |
|
|
1048 |
|
$v = frec('200'); |
1049 |
|
$v = frec('200','a'); |
1050 |
|
|
1051 |
|
=cut |
1052 |
|
|
1053 |
|
sub frec { |
1054 |
|
my @out = rec(@_); |
1055 |
|
warn "rec(",dump(@_),") has more than one return value, ignoring\n" if $#out > 0; |
1056 |
|
return shift @out; |
1057 |
|
} |
1058 |
|
|
1059 |
|
=head2 frec_eq |
1060 |
|
|
1061 |
|
=head2 frec_ne |
1062 |
|
|
1063 |
|
Check if first values from two fields are same or different |
1064 |
|
|
1065 |
|
if ( frec_eq( 900 => 'a', 910 => 'c' ) ) { |
1066 |
|
# values are same |
1067 |
|
} else { |
1068 |
|
# values are different |
1069 |
|
} |
1070 |
|
|
1071 |
|
Strictly speaking C<frec_eq> and C<frec_ne> wouldn't be needed if you |
1072 |
|
could write something like: |
1073 |
|
|
1074 |
|
if ( frec( '900','a' ) eq frec( '910','c' ) ) { |
1075 |
|
# yada tada |
1076 |
|
} |
1077 |
|
|
1078 |
|
but you can't since our parser L<WebPAC::Parser> will remove all whitespaces |
1079 |
|
in order to parse text and create invalid function C<eqfrec>. |
1080 |
|
|
1081 |
|
=cut |
1082 |
|
|
1083 |
|
sub frec_eq { |
1084 |
|
my ( $f1,$sf1, $f2, $sf2 ) = @_; |
1085 |
|
return (rec( $f1, $sf1 ))[0] eq (rec( $f2, $sf2 ))[0]; |
1086 |
|
} |
1087 |
|
|
1088 |
|
sub frec_ne { |
1089 |
|
return ! frec_eq( @_ ); |
1090 |
|
} |
1091 |
|
|
1092 |
=head2 regex |
=head2 regex |
1093 |
|
|
1094 |
Apply regex to some or all values |
Apply regex to some or all values |