/[corp_html]/mnogo/parse.inc
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /mnogo/parse.inc

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (hide annotations)
Mon Mar 5 19:57:02 2001 UTC (23 years, 1 month ago) by dpavlin
Branch: MAIN
search koji radi

1 dpavlin 1.1 <?
2    
3     // -----------------------------------------------
4     // ParseDocText($text)
5     // -----------------------------------------------
6     function ParseDocText($text){
7     global $all_words;
8     global $stopword_arr;
9     global $highlight_stopwords;
10     global $hlbeg, $hlend;
11    
12     $str=$text;
13     for ($i=0; $i<count($all_words); $i++) {
14     $word=$all_words[$i];
15     $str = preg_replace("/([\s\t\r\n\~\!\@\#\$\%\^\&\*\(\)\-\_\=\+\\\|\{\}\[\]\;\:\'\"\<\>\?\/\,\.]+)($word)/i","\\1$hlbeg\\2$hlend",$str);
16     $str = preg_replace("/^($word)/i","$hlbeg\\1$hlend",$str);
17     }
18    
19     if ($highlight_stopwords == 'yes') {
20     reset($stopword_arr);
21     while(list($word,$temp)=each($stopword_arr)) {
22     $str = preg_replace("/([\s\t\r\n\~\!\@\#\$\%\^\&\*\(\)\-\_\=\+\\\|\{\}\[\]\;\:\'\"\<\>\?\/\,\.]+)($word)/i","\\1$hlbeg\\2$hlend",$str);
23     $str = preg_replace("/^($word)/i","$hlbeg\\1$hlend",$str);
24     }
25     }
26    
27     return $str;
28     }
29    
30     // -----------------------------------------------
31     // ParseStr($qwe)
32     // -----------------------------------------------
33     function ParseStr($qwe) {
34     global $DEFAULT_QUERY_TYPE;
35     global $DEBUG;
36     global $rus_bool_lang;
37    
38     if ($DEBUG) echo "Begin ParseStr(): qwe=$qwe<BR><HR>";
39    
40     $qwe=preg_replace("/[\!\@\#\$\%\^\*\-\_\+\=\\\{\}\[\]\;\'\:\"\<\>\?\/\,\.]{1,}/","&",$qwe);
41     $qwe=trim($qwe);
42    
43     // query language normalizer
44    
45     if ($rus_bool_lang == 'yes') {
46     $qwe=eregi_replace(" {0,}\| {0,}| {1,}or {1,}| {1,}ÉÌÉ {1,}","|",$qwe);
47     $qwe=eregi_replace(" {0,}\& {0,}| {1,}and {1,}| {1,}É {1,}","&",$qwe);
48     $qwe=eregi_replace(" {0,}\~ {0,}| {1,}not {1,}| {1,}without {1,}| {1,}ÎÅ {1,}| {1,}ÂÅÚ {1,}","~",$qwe);
49     } else {
50     $qwe=eregi_replace(" {0,}\| {0,}| {1,}or {1,}","|",$qwe);
51     $qwe=eregi_replace(" {0,}\& {0,}| {1,}and {1,}","&",$qwe);
52     $qwe=eregi_replace(" {0,}\~ {0,}| {1,}not {1,}| {1,}without {1,}","~",$qwe);
53     }
54    
55     $qwe=ereg_replace(" {0,}\( {0,}","(",$qwe);
56     $qwe=ereg_replace(" {0,}\) {0,}",")",$qwe);
57    
58     // default query type is and
59     if (strtolower($DEFAULT_QUERY_TYPE) == 'or') {
60     $qwe=ereg_replace(" {1,}","|",$qwe);
61     $qwe=ereg_replace("\&\|{1,}","|",$qwe);
62     $qwe=ereg_replace("\|\&{1,}","|",$qwe);
63     } else {
64     $qwe=ereg_replace(" {1,}","&",$qwe);
65     $qwe=ereg_replace("\&\|{1,}","&",$qwe);
66     $qwe=ereg_replace("\|\&{1,}","&",$qwe);
67     }
68    
69     // remove unnesessary boolean operators
70     $qwe=ereg_replace("\|{1,}","|",$qwe);
71     $qwe=ereg_replace("&{1,}","&",$qwe);
72     $qwe=ereg_replace("~{1,}","~",$qwe);
73     $qwe=ereg_replace("\|\&\|","&",$qwe);
74     $qwe=ereg_replace("[\|\&\~]{1,}$","",$qwe);
75     $qwe=ereg_replace("^[\|\&]{1,}","",$qwe);
76    
77     // transform "w1 ~w2" -> "w1 default_op ~ w2"
78     // ") ~w" -> ") default_op ~w"
79     // "w ~ (" -> "w default_op ~("
80     // ") w" -> ") default_op w"
81     // "w (" -> "w default_op ("
82     // ")(" -> ") default_op ("
83     if (strtolower($DEFAULT_QUERY_TYPE) == 'or') {
84     $qwe=ereg_replace("([^\&\~\|\(\)]+)~([^\&\~\|\(\)]+)","\\1|~\\2",$qwe);
85     $qwe=ereg_replace("\)~{1,}",")|~",$qwe);
86     $qwe=ereg_replace("~{1,}\(","~|(",$qwe);
87     $qwe=ereg_replace("\)([^\&\~\|\(\)]+)",")|\\1",$qwe);
88     $qwe=ereg_replace("([^\&\~\|\(\)]+)\(","\\1|(",$qwe);
89     $qwe=ereg_replace("\) *\(",")|(",$qwe);
90     } else {
91     $qwe=ereg_replace("([^\&\~\|\(\)]+)~([^\&\~\|\(\)]+)","\\1&~\\2",$qwe);
92     $qwe=ereg_replace("\)~{1,}",")&~",$qwe);
93     $qwe=ereg_replace("~{1,}\(","&~(",$qwe);
94     $qwe=ereg_replace("\)([^\&\~\|\(\)]+)",")&\\1",$qwe);
95     $qwe=ereg_replace("([^\&\~\|\(\)]+)\(","\\1&(",$qwe);
96     $qwe=ereg_replace("\) *\(",")&(",$qwe);
97     }
98    
99     // remove unnesessary boolean operators
100     $qwe=ereg_replace("\|{1,}","|",$qwe);
101     $qwe=ereg_replace("&{1,}","&",$qwe);
102    
103     // remove errornous format of query - ie: '(&', '&)', '(|', '|)', '~&', '~|', '~)'
104     $qwe=ereg_replace("\(\&{1,}","(",$qwe);
105     $qwe=ereg_replace("\&{1,}\)",")",$qwe);
106     $qwe=ereg_replace("\~{1,}\)",")",$qwe);
107     $qwe=ereg_replace("\(\|{1,}","(",$qwe);
108     $qwe=ereg_replace("\|{1,}\)",")",$qwe);
109     $qwe=ereg_replace("\~{1,}\&{1,}","&",$qwe);
110     $qwe=ereg_replace("\~{1,}\|{1,}","|",$qwe);
111    
112     if ($DEBUG) echo "End ParseStr(): qwe=$qwe<BR><HR>";
113    
114     return($qwe);
115     }
116    
117     // -----------------------------------------------
118     // ParseQ($q)
119     // -----------------------------------------------
120     function ParseQ($q){
121     global $DEBUG;
122    
123     if ($DEBUG) echo "Begin ParseQ(): q=$q<BR>";
124    
125     if (ereg_replace(" ","",$q)==''){
126     return '';
127     }
128    
129     $q=ParseStr($q);
130    
131     $q=eregi_replace("\&"," && ",$q);
132     $q=eregi_replace("\|"," || ",$q);
133     $q=eregi_replace("\~"," ! ",$q);
134     $q=ereg_replace("\("," ( ",$q);
135     $q=ereg_replace("\)"," ) ",$q);
136     $q="( $q )";
137     $q=ereg_replace(" {1,}"," ",$q);
138    
139     if ($DEBUG) echo "End ParseQ(): q=$q<BR>";
140    
141     return $q;
142     }
143    
144     // -----------------------------------------------
145     // last_parse($q)
146     // -----------------------------------------------
147     function last_parse($q){
148     global $all_words;
149     global $dbtype;
150     global $dbmode;
151     global $db_format;
152     global $ul, $ue, $uestr, $ulstr;
153     global $tagstr, $timestr, $catstr,$langstr;
154     global $DEBUG;
155     global $temp_table;
156     global $final_word;
157     global $ispellmode;
158     global $wordsinfo;
159     global $soundex_suggestions,$soundex;
160    
161     global $last_mod_field;
162     global $crc_field;
163    
164     $state=0;
165     $qu="";
166     $w="";
167     $n=0;
168    
169     $t=strtok($q," ");
170     $words=array();
171    
172     if (($dbmode == 'crc') ||
173     ($dbmode == 'crc-multi')) {
174     $word_field = 'word_id';
175     $word_table = 'ndict';
176     $word_type = 'INT';
177     $str_sep = "";
178     } else {
179     $word_field = 'word';
180     $word_table = 'dict';
181     $word_type = 'VARCHAR(32)';
182     $str_sep = "'";
183     }
184    
185     while(($t!="")&&($error=="")){
186    
187     switch($state){
188     case 0;
189    
190     if(($t=="||") ||
191     ($t=="&&") ||
192     ($t==")")) $error="at '$t'";
193    
194     else if ($t=="!") {
195     $state=0;
196     $qu="$qu NOT "; break;
197     } else if($t=="("){
198     $n++;
199     $state=0;
200     $qu="$qu(";
201     } else {
202     $state=1;
203     $t=strtolower($t);
204     if (!is_stopword($t)) {
205     normalize_word($t);
206     $words[]=$t;
207     $qu="$qu %$t% ";
208     } else {
209     $qu="$qu 1=1 ";
210     $wordsinfo .= "<b>$t</b>: stopword; ";
211     }
212     }
213     break;
214    
215     case 1;
216     if(($t=="||")||($t=="&&")){
217     $state=0;
218     if ($t=='||') $qu="$qu OR ";
219     else $qu="$qu AND ";
220     }
221     else if($t==")") {
222     $n--;
223     $state=1;
224     $qu="$qu)";
225     }
226     else
227     $error=" at '$t'";
228     break;
229     }
230     $t=strtok(" ");
231     }
232    
233     if($error=="" && $n != 0) $error='Unmatched brackets';
234     if($error!="") print_error_local($error);
235    
236     if (eregi('db',$ispellmode) || eregi('text',$ispellmode)) check_words();
237    
238     if (count($words)) $wordsinfo .= "looking for: ";
239    
240     for ($i=0; $i<count($words); $i++) {
241     $word=$words[$i];
242     $qu_norm='';
243    
244     for ($j=0; $j<count($final_word["$word"]); $j++) {
245     $norm_word=$final_word["$word"][$j];
246     $all_words[]=$norm_word;
247    
248     $wordsinfo .="<b>$norm_word</b>; ";
249    
250     if (($dbmode == 'crc') ||
251     ($dbmode == 'crc-multi')) {
252     $norm_word_crc[$norm_word]=crc32($norm_word);
253     $norm_word=$norm_word_crc[$norm_word];
254     }
255    
256     switch ($dbtype) {
257     case 'mysql' : $qu_norm .= " sum($word_field=$str_sep$norm_word$str_sep)>0 OR "; break;
258     case 'oracle7':
259     case 'oracle8':
260     case 'oracle': $qu_norm .= " sum(decode($word_field,$str_sep$norm_word$str_sep,1,0))>0 OR "; break;
261     case 'pgsql' : $qu_norm .= " sum(case $word_field when $str_sep$norm_word$str_sep then 1 else 0 end)>0 OR "; break;
262     default : $qu_norm .= " sum($word_field=$str_sep$norm_word$str_sep)>0 OR "; break;
263     }
264     if($w=="") $w="$str_sep$norm_word$str_sep"; else $w .=",$str_sep$norm_word$str_sep";
265     }
266    
267     $qu_norm="($qu_norm)";
268     $qu_norm=str_replace("OR )",")",$qu_norm);
269     $qu=str_replace("%$word%",$qu_norm,$qu);
270     }
271    
272     if ($soundex == 'yes') {
273     for ($i=0; $i<count($all_words); $i++) {
274     $soundex_suggestions .= suggest_soundex($norm_word);
275     }
276     if ($soundex_suggestions == '') $soundex_suggestions = '-';
277     }
278    
279     if (($dbtype == 'mysql') || ($dbtype == '')) $sql_small = 'SQL_SMALL_RESULT';
280    
281     if (($dbmode == 'multi') ||
282     ($dbmode == 'crc-multi')) {
283     // ------------------
284     // DBMode = multi OR crc-multi
285     $query="";
286    
287     if ((($dbtype == 'pgsql') || ($dbtype == 'mysql')) &&
288     (count($words))){
289     $make_temp_table=1;
290     $temp_table = 't'.time().rand(1000,9999).rand(1000,9999);
291     } else {
292     $make_temp_table=0;
293     $temp_table="";
294     }
295    
296     for ($i=0; $i<count($words); $i++) {
297     $word=$words[$i];
298     $qu_norm='';
299    
300     for ($j=0; $j<count($final_word["$word"]); $j++) {
301     $norm_word=$final_word["$word"][$j];
302    
303     $dict=get_dict_tab($norm_word);
304    
305     $old_norm_word=$norm_word;
306     if ($dbmode == 'crc-multi') $norm_word=$norm_word_crc[$norm_word];
307    
308     if ($query=="") {
309     if ($make_temp_table == 0) {
310     $query="SELECT url_id,$word_field,intag
311     FROM $dict
312     WHERE $word_field = $str_sep$norm_word$str_sep";
313     } else {
314     if ($dbtype == 'mysql') {
315     $query="CREATE /*!32302 TEMPORARY */ TABLE $temp_table (
316     url_id INT DEFAULT '0' NOT NULL,
317     $word_field $word_type DEFAULT '0' NOT NULL,
318     intag TINYINT DEFAULT '0' NOT NULL,
319     KEY i1$temp_table(url_id),
320     KEY i2$temp_table($word_field))";
321    
322     if($DEBUG) echo "last_parse(): ",$query,"<BR><HR>";
323     if (!db_query($query)) print_error_local('Query error: '.$query."\n<BR>".db_error());
324    
325     $query="INSERT INTO $temp_table
326     SELECT url_id,$word_field,intag
327     FROM $dict
328     WHERE $word_field = $str_sep$norm_word$str_sep";
329    
330     if($DEBUG) echo "last_parse(): ",$query,"<BR><HR>";
331     if (!db_query($query)) print_error_local('Query error: '.$query."\n<BR>".db_error());
332    
333     $query="SELECT count(*)
334     FROM $temp_table";
335    
336     if($DEBUG) echo "last_parse(): ",$query,"<BR><HR>";
337     if (!$res=db_query($query)) print_error_local('Query error: '.$query."\n<BR>".db_error());
338    
339     if ($row=db_fetchrow($res)) {
340     $count=$row[0];
341     $wordsinfo .= "<b>$old_norm_word</b>: $count; ";
342     }
343    
344     db_freeresult($res);
345     } else {
346     // not mysql
347     $query="SELECT url_id,$word_field,intag
348     INTO TEMP $temp_table
349     FROM $dict
350     WHERE $word_field = $str_sep$norm_word$str_sep";
351     }
352     }
353     } else {
354     if ($dbtype == 'mysql') {
355     $query ="INSERT INTO $temp_table
356     SELECT url_id,$word_field,intag
357     FROM $dict
358     WHERE $word_field = $str_sep$norm_word$str_sep";
359     if($DEBUG) echo "last_parse(): ",$query,"<BR><HR>";
360     if (!db_query($query)) print_error_local('Query error: '.$query."\n<BR>".db_error());
361    
362     $query="SELECT count(*)
363     FROM $temp_table";
364    
365     if($DEBUG) echo "last_parse(): ",$query,"<BR><HR>";
366     if (!$res=db_query($query)) print_error_local('Query error: '.$query."\n<BR>".db_error());
367    
368     if ($row=db_fetchrow($res)) {
369     $count=$row[0]-$count;
370     $wordsinfo .= "<b>$old_norm_word</b>: $count; ";
371     $count=$row[0];
372     }
373    
374     db_freeresult($res);
375     } else {
376     $query.="UNION ALL
377     SELECT url_id,$word_field,intag
378     FROM $dict
379     WHERE $word_field = $str_sep$norm_word$str_sep";
380     }
381     }
382     }
383     }
384    
385     if (($make_temp_table == 1) && ($dbtype != 'mysql')) {
386     if($DEBUG) echo "last_parse(): ",$query,"<BR><HR>";
387     if (!db_query($query)) print_error_local('Query error: '.$query."\n<BR>".db_error());
388     }
389    
390     if($ulstr||$uestr||$tagstr||$timestr||$catstr||$langstr) {
391     if (($dbtype == 'oracle') ||
392     ($dbtype == 'oracle7') ||
393     ($dbtype == 'oracle8')) {
394     $query_url_id="SELECT url_id,
395     sum(intag) as r
396     FROM url, ($query)
397     WHERE url.rec_id=url_id
398     $catstr
399     $ulstr
400     $uestr
401     $langstr
402     $tagstr
403     $timestr
404     GROUP BY url_id
405     HAVING ($qu)
406     ORDER BY r DESC";
407    
408     $query_count_url_id="SELECT count(*)
409     FROM (
410     SELECT url_id
411     FROM url, ($query)
412     WHERE url.rec_id=url_id
413     $catstr
414     $ulstr
415     $uestr
416     $langstr
417     $tagstr
418     $timestr
419     GROUP BY url_id
420     HAVING ($qu)
421     )";
422    
423     } elseif (($dbtype == 'pgsql') ||
424     ($dbtype == 'mysql')) {
425     $query_url_id="SELECT $sql_small
426     url_id,
427     sum(intag) as r
428     FROM url, $temp_table
429     WHERE url.rec_id=url_id
430     $catstr
431     $ulstr
432     $uestr
433     $langstr
434     $tagstr
435     $timestr
436     GROUP BY url_id
437     HAVING ($qu)
438     ORDER BY r DESC";
439    
440     $query_count_url_id="";
441     }
442     } else {
443     // not ul,ue nor tagstr timestr,catstr,langstr
444     if (($dbtype == 'oracle')||
445     ($dbtype == 'oracle7') ||
446     ($dbtype == 'oracle8')) {
447     $query_url_id="SELECT url_id,
448     sum(intag) as r
449     FROM ($query)
450     GROUP BY url_id
451     HAVING ($qu)
452     ORDER BY r DESC";
453    
454     $query_count_url_id="SELECT count(*)
455     FROM (
456     SELECT url_id
457     FROM ($query)
458     GROUP BY url_id
459     HAVING ($qu)
460     )";
461     } elseif (($dbtype == 'pgsql') ||
462     ($dbtype == 'mysql')) {
463     $query_url_id="SELECT $sql_small
464     url_id,
465     sum(intag) as r
466     FROM $temp_table
467     GROUP BY url_id
468     HAVING ($qu)
469     ORDER BY r DESC";
470    
471     $query_count_url_id="";
472     }
473     }
474    
475     } else {
476     // ------------------
477     // DBMode = single or crc
478    
479     if($ulstr||$uestr||$tagstr||$timestr||$catstr||$langstr) {
480     $query_url_id="SELECT $sql_small
481     $word_table.url_id,
482     sum($word_table.intag) as r
483     FROM url,$word_table
484     WHERE url.rec_id=$word_table.url_id
485     $catstr
486     $ulstr
487     $uestr
488     $langstr
489     $tagstr
490     $timestr
491     AND $word_table.$word_field in ($w)
492     GROUP BY url_id
493     HAVING ($qu)
494     ORDER BY r DESC";
495    
496     $query_count_url_id="SELECT count(*)
497     FROM (
498     SELECT $word_table.url_id
499     FROM url,$word_table
500     WHERE url.rec_id=$word_table.url_id
501     $catstr
502     $ulstr
503     $uestr
504     $langstr
505     $tagstr
506     $timestr
507     AND $word_table.$word_field in ($w)
508     GROUP BY $word_table.url_id
509     HAVING ($qu)
510     )";
511     } else {
512     $query_url_id="SELECT $sql_small
513     $word_table.url_id,
514     sum($word_table.intag) as r
515     FROM $word_table
516     WHERE $word_table.$word_field in ($w)
517     GROUP BY url_id
518     HAVING ($qu)
519     ORDER BY r DESC";
520    
521     $query_count_url_id="SELECT count(*)
522     FROM (
523     SELECT $word_table.url_id
524     FROM $word_table
525     WHERE $word_table.$word_field in ($w)
526     GROUP BY $word_table.url_id
527     HAVING ($qu)
528     )";
529     }
530     }
531    
532     if ($db_format == '3.1') {
533     $cat_field = ',category';
534     } else {
535     $cat_field = '';
536     }
537    
538    
539     $query_url="SELECT $sql_small
540     url.url,
541     url.title,
542     url.txt,
543     url.content_type,
544     url.docsize,
545     $last_mod_field,
546     url.keywords,
547     url.description,
548     $crc_field,
549     url.rec_id
550     $cat_field
551     FROM url
552     WHERE url.rec_id = %URL_IN%";
553    
554     return array($query_url_id,$query_url,$query_count_url_id);
555     }
556    
557     ?>
558    

  ViewVC Help
Powered by ViewVC 1.1.26