/[libdata]/trunk/admin/include/fuzzy.php
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/admin/include/fuzzy.php

Parent Directory Parent Directory | Revision Log Revision Log


Revision 72 - (show annotations)
Thu Mar 18 20:33:37 2004 UTC (20 years, 1 month ago) by dpavlin
File size: 4801 byte(s)
changes made in version 2.00

1 <?php
2 /**********************************************************
3 Function: newResPageHeader
4 Author: Paul Bramscher
5 Last Modified: 06.25.2003
6 ***********************************************************
7 Draws the HTML header of the enter new resource page at the
8 point of checking for possible matches. The header is
9 pulled out separately here, since we may not always want
10 it to display. In the event of no fuzzy (or exact)
11 matches we redirect the user to formResource and, hence,
12 cannot output this header.
13 **********************************************************/
14 function newResPageHeader() {
15
16 // Load globals
17 include ("global_vars.php");
18
19 // HTML header
20 printf("<HTML>\n");
21 printf("<HEAD>\n");
22 printf("<title>LibData: Enter a New Resource</title>\n");
23 printf("<link rel=\"stylesheet\" href=\"%s\" type=\"text/css\">\n", $GLOBAL_ADMIN_CSS);
24 printf("</HEAD>\n");
25
26 // Page header
27 include($GLOBAL_ADMIN_HEADER);
28 printf("<center>\n");
29 printf("<h3>Adding New Resource..</h3>\n");
30
31 }
32
33
34 /**********************************************************
35 Function: resTitlePat
36 Original Author: Paul Bramscher <brams006@umn.edu>
37 Last Modified: 03.03.2004
38 ***********************************************************
39 Comments:
40 This function takes $string as input and builds eight
41 permutations of it into an SQL query against resource
42 titles. These permutations are meant as possible matches.
43 This is not an exact science, and this algorithm could
44 certainly be further tweaked.
45
46 Pass definitions:
47
48 pass1 = left 50% of string, remainder wildcarded
49 pass2 = middle 80%, both ends wildcarded
50 pass3 = compacted without spaces
51 pass4 = a/and/the articles removed
52 pass5 = every other starting with 0 position
53 pass6 = every other starting with 1 position
54 pass7 = replacing 'ies' with 'y'
55 pass8 = replacing 'y' with 'ies'
56 **********************************************************/
57 function resTitlePat($string) {
58
59 // Save this for display purposes
60 $title = $string;
61 $title = stripslashes($string);
62
63 // Clean up the string -- we must remove single quotes and slashes
64 $string = ereg_replace("'","",$string);
65 $string = stripslashes($string);
66
67 // Initialize
68 $pass1 = "";
69 $pass2 = "";
70 $pass3 = "";
71 $pass4 = "";
72 $pass5 = "";
73 $pass6 = "";
74 $pass7 = "";
75 $pass8 = "";
76
77 // String sizes.
78 $length = strlen($string);
79 $max_length = ceil($length * 2);
80
81 // Permute #1: Match left 50%
82 $left50_pos = floor($length * .5);
83 $pass1 = substr($string, 0, $left50_pos);
84
85 // Permute #2: Cut-out and float match middle 80% substring
86 $left20_pos = floor($length * .2);
87 $pass2 = substr($string, $left20_pos, $length - ($left20_pos * 2));
88
89 // Permute #3: Compacting, remove all whitespaces.
90 $pass3 = $string;
91 $pass3 = ereg_replace(" ","",$pass3);
92
93 // Permute #4: Pulling out a/an/the articles
94 $pass4 = $string;
95 $pass4 = ereg_replace("the ","",$pass4);
96 $pass4 = ereg_replace("a ","",$pass4);
97 $pass4 = ereg_replace("an ","",$pass4);
98
99 // Permute #5: Wildcard every other position, starting with 0
100 $pass5 = $string;
101 for ($x = 0; $x < strlen($pass5); $x++) {
102
103 if ($x % 2 == 0) $pass5[$x] = "_";
104
105 }
106
107 // Permute #6: Wildcard every other position, starting with 1
108 $pass6 = $string;
109 for ($x = 0; $x < strlen($pass6); $x++) {
110
111 if ($x % 2 == 1) $pass6[$x] = "_";
112
113 }
114
115 // Permute #7: Replacing 'ies' with 'y'
116 $pass7 = $string;
117 $pass7 = ereg_replace("ies","y", $pass7);
118
119 // Permute #8: Replacing 'y' with 'ies'
120 $pass8 = $string;
121 $pass8 = ereg_replace("y","ies", $pass8);
122
123 // Assemble the SQL
124 $sql = "SELECT resource_id, title FROM resource WHERE (title LIKE '"
125 . $pass1
126 . "%' OR title LIKE '%"
127 . $pass2
128 . "%' OR title LIKE '%"
129 . $pass3
130 . "%' OR title LIKE '%"
131 . $pass4
132 . "%' OR title LIKE '%"
133 . $pass5
134 . "%' OR title LIKE '%"
135 . $pass6
136 . "%' OR title LIKE '%"
137 . $pass7
138 . "%' OR title LIKE '%"
139 . $pass8
140 . "%') AND (LENGTH(title) >= "
141 . $left50_pos
142 . " AND LENGTH(title) <= "
143 . $max_length
144 . ")";
145
146 // Debugging
147 // printf("sql was: %s", $sql);
148
149 $rs = xx_tryquery($sql);
150 $hits = xx_num_rows($rs);
151 if ($hits > 0) {
152
153 // Draw page header
154 newResPageHeader();
155
156 // Table
157 printf("<table width = \"60%%\" border = \"3\" cellpadding =\"4\" class=\"backLight\">\n");
158 printf("<tr><td>\n");
159 printf("<b>Messages:</b><br>\n");
160
161 printf ("Possible duplicate resource titles found with title '%s'. Are you sure you want to continue?<br><br>\n", $title);
162 while ($row = xx_fetch_array ($rs, xx_ASSOC)) {
163 $title = $row["title"];
164 $resource_id = $row["resource_id"];
165
166 printf("<b>Resource ID:</b> %d ", $resource_id);
167 printf("<b>Title:</b> %s<BR>\n", $title);
168
169 } // this result set
170
171 // Close things
172 printf("<br><br>\n");
173 printf("</td></tr></table>\n");
174 printf("</center>\n");
175
176 }
177
178
179 return $hits;
180
181 } // end of php function
182 ?>

  ViewVC Help
Powered by ViewVC 1.1.26