/[libdata]/trunk/admin/include/fuzzy.php
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/admin/include/fuzzy.php

Parent Directory Parent Directory | Revision Log Revision Log


Revision 10 - (hide annotations)
Thu Jan 15 14:19:27 2004 UTC (20 years, 3 months ago) by dpavlin
File size: 4825 byte(s)
Initial revision

1 dpavlin 10 <?php
2     /**********************************************************
3     Function: newResPageHeader
4     Author: Paul Bramscher
5     Last Modified: 06.25.2003
6     ***********************************************************
7     Draws the HTML header of the enter new resource page at the
8     point of checking for possible matches. The header is
9     pulled out separately here, since we may not always want
10     it to display. In the event of no fuzzy (or exact)
11     matches we redirect the user to formResource and, hence,
12     cannot output this header.
13     **********************************************************/
14     function newResPageHeader() {
15    
16     // Load globals
17     include ("global_vars.php");
18    
19     // HTML header
20     printf("<HTML>\n");
21     printf("<HEAD>\n");
22     printf("<title>LibData: Enter a New Resource</title>\n");
23     printf("<link rel=\"stylesheet\" href=\"%s\" type=\"text/css\">\n", $GLOBAL_ADMIN_CSS);
24     printf("</HEAD>\n");
25    
26     // Page header
27     include($GLOBAL_ADMIN_HEADER);
28     printf("<center>\n");
29     printf("<h3>Adding New Resource..</h3>\n");
30    
31     }
32    
33    
34     /**********************************************************
35     Function: resTitlePat
36     Original Author: Paul Bramscher <brams006@tc.umn.edu>
37     Last Modified: 03.19.2003
38     ***********************************************************
39     Comments:
40     This function takes $string as input and builds eight
41     permutations of it into an SQL query against resource
42     titles. These permutations are meant as possible matches.
43     This is not an exact science, and this algorithm could
44     certainly be further tweaked.
45    
46     Pass definitions:
47    
48     pass1 = left 50% of string, remainder wildcarded
49     pass2 = middle 80%, both ends wildcarded
50     pass3 = compacted without spaces
51     pass4 = a/and/the articles removed
52     pass5 = every other starting with 0 position
53     pass6 = every other starting with 1 position
54     pass7 = replacing 'ies' with 'y'
55     pass8 = replacing 'y' with 'ies'
56     **********************************************************/
57     function resTitlePat($con, $string) {
58    
59     // Save this for display purposes
60     $title = $string;
61     $title = stripslashes($string);
62    
63     // Clean up the string -- we must remove single quotes and slashes
64     $string = ereg_replace("'","",$string);
65     $string = stripslashes($string);
66    
67     // Initialize
68     $pass1 = "";
69     $pass2 = "";
70     $pass3 = "";
71     $pass4 = "";
72     $pass5 = "";
73     $pass6 = "";
74     $pass7 = "";
75     $pass8 = "";
76    
77     // String sizes.
78     $length = strlen($string);
79     $max_length = ceil($length * 2);
80    
81     // Permute #1: Match left 50%
82     $left50_pos = floor($length * .5);
83     $pass1 = substr($string, 0, $left50_pos);
84    
85     // Permute #2: Cut-out and float match middle 80% substring
86     $left20_pos = floor($length * .2);
87     $pass2 = substr($string, $left20_pos, $length - ($left20_pos * 2));
88    
89     // Permute #3: Compacting, remove all whitespaces.
90     $pass3 = $string;
91     $pass3 = ereg_replace(" ","",$pass3);
92    
93     // Permute #4: Pulling out a/an/the articles
94     $pass4 = $string;
95     $pass4 = ereg_replace("the ","",$pass4);
96     $pass4 = ereg_replace("a ","",$pass4);
97     $pass4 = ereg_replace("an ","",$pass4);
98    
99     // Permute #5: Wildcard every other position, starting with 0
100     $pass5 = $string;
101     for ($x = 0; $x < strlen($pass5); $x++) {
102    
103     if ($x % 2 == 0) $pass5[$x] = "_";
104    
105     }
106    
107     // Permute #6: Wildcard every other position, starting with 1
108     $pass6 = $string;
109     for ($x = 0; $x < strlen($pass6); $x++) {
110    
111     if ($x % 2 == 1) $pass6[$x] = "_";
112    
113     }
114    
115     // Permute #7: Replacing 'ies' with 'y'
116     $pass7 = $string;
117     $pass7 = ereg_replace("ies","y", $pass7);
118    
119     // Permute #8: Replacing 'y' with 'ies'
120     $pass8 = $string;
121     $pass8 = ereg_replace("y","ies", $pass8);
122    
123     // Assemble the SQL
124     $sql = "SELECT resource_id, title FROM resource WHERE (title LIKE '"
125     . $pass1
126     . "%' OR title LIKE '%"
127     . $pass2
128     . "%' OR title LIKE '%"
129     . $pass3
130     . "%' OR title LIKE '%"
131     . $pass4
132     . "%' OR title LIKE '%"
133     . $pass5
134     . "%' OR title LIKE '%"
135     . $pass6
136     . "%' OR title LIKE '%"
137     . $pass7
138     . "%' OR title LIKE '%"
139     . $pass8
140     . "%') AND (LENGTH(title) >= "
141     . $left50_pos
142     . " AND LENGTH(title) <= "
143     . $max_length
144     . ")";
145    
146     // Debugging
147     // printf("sql was: %s", $sql);
148    
149     $rs = mysql_query($sql, $con);
150     $hits = mysql_num_rows($rs);
151     if ($hits > 0) {
152    
153     // Draw page header
154     newResPageHeader();
155    
156     // Table
157     printf("<table width = \"60%%\" border = \"3\" cellpadding =\"4\" class=\"backLight\">\n");
158     printf("<tr><td><br>\n");
159     printf("<strong>Messages:</strong><br>\n");
160    
161     printf ("Possible duplicate resource titles found with title '%s'. Are you sure you want to continue?<br><br>\n", $title);
162     while ($row = mysql_fetch_array ($rs)) {
163     $title = $row["title"];
164     $resource_id = $row["resource_id"];
165    
166     printf("<b>Resource ID:</b> %d ", $resource_id);
167     printf("<b>Title:</b> %s<BR>", $title);
168    
169     } // this result set
170    
171     // Close things
172     printf("<br><br>\n");
173     printf("</td></tr></table>\n");
174     printf("</center>\n");
175    
176     }
177    
178    
179     return $hits;
180    
181     } // end of php function
182     ?>

  ViewVC Help
Powered by ViewVC 1.1.26