1 |
#!/usr/bin/perl |
2 |
|
3 |
#package common; |
4 |
|
5 |
use strict; |
6 |
|
7 |
$common::install_dir="/local/index"; |
8 |
$common::mpsindex="/local/mps-5.3/bin/mpsindex -l 9 -b"; |
9 |
#$common::isis_data="/var/autofs/misc/isis_data/"; |
10 |
$common::isis_data="/data/isis_data/"; |
11 |
#$common::isis_data="/mnt/20020606/Isis/Data/"; # doma |
12 |
|
13 |
$common::mps_header="V 5 3 |
14 |
L hr-HR |
15 |
F 700+ 1 Autor |
16 |
F 200+ 2 Naslov |
17 |
F 210+ 3 Izdavanje |
18 |
F 225 4 Nakladnièka cjelina |
19 |
F 300+ 5 Napomene |
20 |
F 330 6 Sadr¾aj |
21 |
F 464 7 Analitièki radovi |
22 |
F 610 8 Kljuène rijeèi |
23 |
F 675 9 UDK |
24 |
F 686 10 CC |
25 |
F 990 11 Signatura |
26 |
F 991 12 Inventarni broj |
27 |
F ISN 13 ISBN, ISSN |
28 |
F MFN 14 MFN |
29 |
F 215+ 15 Materijalni opis |
30 |
F 101 16 Jezik |
31 |
F tip 17 Tip graðe |
32 |
"; |
33 |
|
34 |
$common::database_cf="/local/search/databases.cf"; |
35 |
|
36 |
#-------------------------------------------------------------------- |
37 |
# |
38 |
# open_data_files($db_dir) |
39 |
# |
40 |
# input: $db_dir - database directory name (e.g. 'ps') |
41 |
# return: path on filesystem to data dir |
42 |
|
43 |
sub open_data_files { |
44 |
my ($db_dir) = @_; |
45 |
|
46 |
mkdir "$common::install_dir/$db_dir" if (!-e "$common::install_dir/$db_dir"); |
47 |
mkdir "$common::install_dir/$db_dir/data" if (!-e "$common::install_dir/$db_dir/data"); |
48 |
|
49 |
my $dir="$common::install_dir/$db_dir/data"; |
50 |
|
51 |
open(S,"> $dir/stream") || die "can't open output $dir/stream: $!"; |
52 |
open(R,"> $dir/bib") || die "can't open output $dir/bib: $!"; |
53 |
open(MPS,"| $common::mpsindex -d $common::install_dir/$db_dir -autokey") || die "can't start MPS indexer $common::mpsindex: $!"; |
54 |
# open(MPS,"> /tmp/mpsindex") || die "mps: $!"; |
55 |
|
56 |
print S $common::mps_header; |
57 |
print MPS $common::mps_header; |
58 |
|
59 |
return $dir; |
60 |
} |
61 |
|
62 |
#-------------------------------------------------------------------- |
63 |
# |
64 |
# mps_expand(nr,"space separated string"); |
65 |
# |
66 |
|
67 |
sub mps_expand { |
68 |
my $nr = shift @_; |
69 |
my $out = ""; |
70 |
while (my $fld = shift @_) { |
71 |
if ($fld =~ s/\s*[,;\.!?'"<>\[\]]*\s+/ /g) { |
72 |
$fld =~ s/></ /g; |
73 |
foreach my $w (split(/\s+/,$fld)) { |
74 |
# FIX: this should be replaced by stemmer! |
75 |
# remove chars from beginning of word |
76 |
$w =~ s/^['"<(\[]//g; |
77 |
# remove chars from end of word |
78 |
$w =~ s/[,;\.!?'">)\]]$//g; |
79 |
# preglasi öÖäÄüÜ |
80 |
if ($w =~ m/[öÖäÄüÜ]/) { |
81 |
$out .= "W $w $nr\n"; |
82 |
$w =~ s/[öÖ]/oe/g; |
83 |
$w =~ s/[äÄ]/ae/g; |
84 |
$w =~ s/[üÜ]/ue/g; |
85 |
$out .= "W $w $nr\n"; |
86 |
# Irish Last names |
87 |
} elsif (($nr == 1 || $nr == 2 || $nr == 3 || $nr == 6 || $nr == 7 || $nr == 8) && $w =~ m/^Ma?c([A-Z].+)/) { |
88 |
$out .= "W Mc$1 $nr\n"; |
89 |
$out .= "W Mac$1 $nr\n"; |
90 |
} else { |
91 |
$out .= "W $w $nr\n"; |
92 |
} |
93 |
} |
94 |
} else { |
95 |
$out .= "W $fld $nr\n"; |
96 |
} |
97 |
} |
98 |
return $out; |
99 |
} |
100 |
|
101 |
1; |
102 |
|