out/js/search.js

/**
 search.js searchs an XML index to HTML files.

 A part of the jsfind project (http://projects.elucidsoft.net/jsfind)
 Copyright (C) 2003 Shawn Garbett

 This program is free software; you can redistribute it and/or
 modify it under the terms of the GNU General Public License
 as published by the Free Software Foundation; either version 2
 of the License, or (at your option) any later version.
 
 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
 
 You should have received a copy of the GNU General Public License
 along with this program; if not, write to the Free Software
 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 
 Contact Info:
 Shawn Garbett <Shawn@eLucidSoft.net>
 http://www.elucidsoft.net
 4037 General Bate Drive
 Nashville, TN 37204
*/

// Constants
var conversion = new String
        ("0123456789abcdefghijklmnopqrstuvwxyz");

// State variables
var query_left = "";
var search_err = "";
var results    = null;
var index_path = "";

// wildcard data about found keys URLs and position
var wildcard_url_pos = new Array();

var watchdog_id = 0;
var watchdog_callback = null;

// Object to hold search results
function Result(title, link, freq) {
        this.title=title;
        this.link=link;
        this.frequency=Number(freq);
}

// Function to merge (intersect) two result sets
function intersect_results(data) {
        // If there are no stored results, then these are the results
        if (! results) {
                results = data;
                return;
        }

        var output=new Array();

        // There are existing results, to do an intersect...
        for (var i=0; i<results.length; i++) {
                for (var j=0; j<data.length; j++) {
                        if (data[j].title == results[i].title) {
                                results[i].frequency += data[j].frequency;
                                output.push(results[i]);  
                                break;
                        }
                }
        }

        results = output;
}

var debug_div = null;

function debug(msg) {

//      return; // Disable debugging

        if (! debug_div) debug_div = document.getElementById('debug');

        // this will create debug div if it doesn't exist.
        if (! debug_div) {
                debug_div = document.createElement('div');
                if (document.body) document.body.appendChild(debug_div);
                else debug_div = null;
        }
        if (debug_div) {
                debug_div.appendChild(document.createTextNode(msg));
                debug_div.appendChild(document.createElement("br"));
        }
}

// Convert a number into a base x alphanumeric number string
function convert(num) {
        var base = conversion.length;
        var pow = 1;
        var pos = 0;
        var out = "";  

        if (num == 0) return "0";

        while (num > 0) {
                pos = num % base;
                out = conversion.charAt(pos) + out;
                num = Math.floor(num/base);
                pow *= base;
        }

        return out;
}

function watchdog() {
        debug ("TIMEOUT!");
        watchdog_callback(new Array());
}

var xmldoc;

// This function loads the XML document from the specified URL, and when
// it is fully loaded, passes that document and the url to the specified
// handler function.  This function works with any XML document

function loadXML(url, handler, data, result_handler) {
        debug("loadXML("+url+","+data+")");

        // Timeout operation in 10 seconds
        watchdog_callback = result_handler;
        watchdog_id=setTimeout("watchdog()", 20000);

        //debug("setTimeout = "+watchdog_id);

        try {
                // Use the standard DOM Level 2 technique, if it is supported
                if (document.implementation && document.implementation.createDocument) {
                        // Create a new Document object
                        xmldoc = document.implementation.createDocument("", "", null);

                        // Specify what should happen when it finishes loading
                        xmldoc.onload = function() { handler(xmldoc, url, data, result_handler); }

                        //xmldoc.onerror = docError;
                        //xmldoc.addEventListener("load",docError,false);

                        // And tell it what URL to load
                        xmldoc.load(url);
                        return true;
                }
                // Otherwise use Microsoft's proprietary API for Internet Explorer
                // Something about not following standards once again
                else if (window.ActiveXObject) {  
                        xmldoc = new ActiveXObject("Microsoft.XMLDOM"); // Create doc.
                        if (! xmldoc) xmldoc = new ActiveXObject("MSXML2.DOMDocument"); // Create doc.
                        // Specify onload
                        xmldoc.onreadystatechange = function() {              
                                if (xmldoc.readyState == 4) handler(xmldoc, url, data, result_handler);
                        }
                        xmldoc.load(url);                                     // Start loading!
                        return true;
                }
                // else fallback on usage of iframes to load xml (Opera 7.53 without Java and maybe old Mac browsers)
                else {
                        debug("using iframe xml loader - experimental and slow");
                        if (! window.xml_iframe) {
                                debug("creating iframe");
                                window.xml_iframe = document.createElement('div');
                                window.xml_iframe.innerHTML = '<iframe src="'+url+'" name="xml_iframe" height="0" width="0" style="display: none;"></iframe>';
                                document.body.appendChild(window.xml_iframe);
                        } else {
                                debug("loading xml in existing iframe");
                                window.frames.xml_iframe.window.document.location.href = url;
                        }

                        // set timeout to re-check if iframe is loaded
                        window.iframe_timeout = window.setInterval('iframe_xml_loaded();',100);

                        // save some data for iframe_xml_loaded()
                        window.xml_handler = handler;
                        window.xml_url = url;
                        window.xml_data = data;
                        window.xml_result_handler = result_handler;
                        return true;
                }

                clearTimeout(watchdog_id);
                debug("Browser incompatilibity: can't request XML document by one of supported methods");
                return false;
        }

        catch(ex) {
                clearTimeout(watchdog_id);
                //debug("clearTimeout = "+watchdog_id);
                debug ("CAUGHT EXCEPTION!");
                result_handler(new Array());
                return false;
        }

        return true;
}

function iframe_xml_loaded() {
        debug("iframe_xmldoc_loaded");
        if (! window.frames['xml_iframe']) return;
        var xml = eval('window.frames.xml_iframe.window.document');
        if (xml) {
        clearTimeout(window.iframe_timeout);
                debug("calling handler with ("+window.xml_url+","+window.xml_data+",...)");
                window.xml_handler(window.frames.xml_iframe.window.document, window.xml_url, window.xml_data, window.xml_result_handler);
        } else {
                debug("can't eval iframe with xml");
        }
}

var data = new Array();

function loadData_intersect(xmldoc, url, pos, result_handler) {
        data = new Array();
        if (loadData(xmldoc, url, pos)) {
                intersect_results(data); 
                search_query_left(result_handler);
        } else {
                debug("INTERNAL ERROR, Inconsistent index");
                search_err="INTERNAL ERROR, Inconsistent index";
        }
}

function loadData(xmldoc, url, pos) {

        clearTimeout(watchdog_id);
        //debug("clearTimeout = "+watchdog_id);

        debug ("loadData("+url+","+pos+")");

        // Get all entries
        var entries = xmldoc.getElementsByTagName("e");

        if (entries.length > pos) {
                // Get the links associated with this query
                var links = entries[pos].getElementsByTagName("l");

                debug("loaded "+links.length+" links");

                // Dynamically append results to output
                var ret = false;
                for(i=0; i<links.length; i++) {
                        data.push(new Result(
                                links[i].getAttribute("t"),
                                links[i].firstChild.data,
                                links[i].getAttribute("f"))
                        );
                        ret = true;
                }
                return ret;
        } else {
                debug("ERROR: seek to "+pos+" with only "+entries.length+" elements");
        }
}


function search_query_left(result_handler) {
        if (query_left.length > 0) {
                doSearch(index_path, query_left, result_handler);  
        } else {
                results.sort(sortResults);
                result_handler(results);
        }
}

// you may override this function to sort by something else
function sortResults(a, b) {
        return a.frequency - b.frequency;
}

function end_traverseTree(wildcard,query,result_handler) {
        if (! wildcard) {
                debug("Unable to locate key "+query);
                result_handler(new Array());
        } else {
                debug("wildcard "+query+" produced "+(wildcard_url_pos.length / 2)+" results: "+wildcard_url_pos.join(" "));
        }

}

function traverseTree(xmldoc, url, query, result_handler) {
        clearTimeout(watchdog_id);
        //debug("clearTimeout = "+watchdog_id);
 
        debug("traverseTree("+xmldoc+","+url+","+query+")");

        var keys = xmldoc.getElementsByTagName("k");
        var i;

        // support for wildcard
        var qlen = query.length;
        var wildcard = false;
        var query_full = query;
        if (query.charAt(qlen-1) == '*') {
                wildcard = true;
                query = query.substr(0,--qlen);
                debug("using wildcard "+query+"*");
        }

        for(i = 0; i < keys.length; i++) {
                var key = keys[i].firstChild.data;

                if (wildcard) key = key.substr(0,qlen);

                debug("? "+key+" -- "+query);

                if (key != '' && key != null) {
                        // Case where current key is greater than query, descend
                        if (key > query) {
                                if (! loadXML(url.replace(".xml","/"+convert(i)+".xml"), traverseTree, query_full, result_handler)) {
                                        end_traverseTree(wildcard, query_full, result_handler);
                                }
                                // make sure of garbage collection
                                xmldoc=null;
                                return;
                        }
                        // Found it!
                        else if (key==query) {
                                if (wildcard) {
                                        wildcard_url_pos.push(url.replace(/(\w+\.xml)/, "_$1"));
                                        wildcard_url_pos.push(i);
                                        debug("+"+i+": "+keys[i].firstChild.data);
                                } else {
                                        // exact match
                                        if (! loadXML(url.replace(/(\w+\.xml)/, "_$1"), loadData_intersect, i, result_handler)) {
                                                debug("ERROR: Unable to locate data "+query_full);
                                                result_handler(new Array());
                                        }
                                        // make sure of garbage collection
                                        xmldoc=null;
                                        return;
                                }
                        } // key < query
                } // if key
        } // for

        // Look past the end...
        if (keys.length == 0 || !loadXML(url.replace(".xml","/"+convert(i)+".xml"), traverseTree, query_full, result_handler)) {
                end_traverseTree(wildcard, query_full, result_handler);
        }

        // make sure of garbage collection
        xmldoc=null;
        return;
}

function doSearch(index_name,query, result_func) {

        //alert("doSearch("+index_name+","+query+")");
        var pos=query.search(/[\s\+]/);
        if (index_name) index_path = index_name+'/';

        if (pos < 0) {
                query_left = "";
        } else {
                query_left = query.slice(pos+1);
                query = query.slice(0,pos);
        } 

        if (! loadXML(index_path+"0.xml", traverseTree, query.toLowerCase(), result_func)) {
                debug("ERROR: Couldn't find main index 0.xml");
                search_err = "INTERNAL ERROR: Unable to load main index 0.xml";
        }
}
1	/**
2	search.js searchs an XML index to HTML files.
3
4	A part of the jsfind project (http://projects.elucidsoft.net/jsfind)
5	Copyright (C) 2003 Shawn Garbett
6
7	This program is free software; you can redistribute it and/or
8	modify it under the terms of the GNU General Public License
9	as published by the Free Software Foundation; either version 2
10	of the License, or (at your option) any later version.
11
12	This program is distributed in the hope that it will be useful,
13	but WITHOUT ANY WARRANTY; without even the implied warranty of
14	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15	GNU General Public License for more details.
16
17	You should have received a copy of the GNU General Public License
18	along with this program; if not, write to the Free Software
19	Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
20
21	Contact Info:
22	Shawn Garbett <Shawn@eLucidSoft.net>
23	http://www.elucidsoft.net
24	4037 General Bate Drive
25	Nashville, TN 37204
26	*/
27
28	// Constants
29	var conversion = new String
30	("0123456789abcdefghijklmnopqrstuvwxyz");
31
32	// State variables
33	var query_left = "";
34	var search_err = "";
35	var results = null;
36	var index_path = "";
37
38	// wildcard data about found keys URLs and position
39	var wildcard_url_pos = new Array();
40
41	var watchdog_id = 0;
42	var watchdog_callback = null;
43
44	// Object to hold search results
45	function Result(title, link, freq) {
46	this.title=title;
47	this.link=link;
48	this.frequency=Number(freq);
49	}
50
51	// Function to merge (intersect) two result sets
52	function intersect_results(data) {
53	// If there are no stored results, then these are the results
54	if (! results) {
55	results = data;
56	return;
57	}
58
59	var output=new Array();
60
61	// There are existing results, to do an intersect...
62	for (var i=0; i<results.length; i++) {
63	for (var j=0; j<data.length; j++) {
64	if (data[j].title == results[i].title) {
65	results[i].frequency += data[j].frequency;
66	output.push(results[i]);
67	break;
68	}
69	}
70	}
71
72	results = output;
73	}
74
75	var debug_div = null;
76
77	function debug(msg) {
78
79	// return; // Disable debugging
80
81	if (! debug_div) debug_div = document.getElementById('debug');
82
83	// this will create debug div if it doesn't exist.
84	if (! debug_div) {
85	debug_div = document.createElement('div');
86	if (document.body) document.body.appendChild(debug_div);
87	else debug_div = null;
88	}
89	if (debug_div) {
90	debug_div.appendChild(document.createTextNode(msg));
91	debug_div.appendChild(document.createElement("br"));
92	}
93	}
94
95	// Convert a number into a base x alphanumeric number string
96	function convert(num) {
97	var base = conversion.length;
98	var pow = 1;
99	var pos = 0;
100	var out = "";
101
102	if (num == 0) return "0";
103
104	while (num > 0) {
105	pos = num % base;
106	out = conversion.charAt(pos) + out;
107	num = Math.floor(num/base);
108	pow *= base;
109	}
110
111	return out;
112	}
113
114	function watchdog() {
115	debug ("TIMEOUT!");
116	watchdog_callback(new Array());
117	}
118
119	var xmldoc;
120
121	// This function loads the XML document from the specified URL, and when
122	// it is fully loaded, passes that document and the url to the specified
123	// handler function. This function works with any XML document
124
125	function loadXML(url, handler, data, result_handler) {
126	debug("loadXML("+url+","+data+")");
127
128	// Timeout operation in 10 seconds
129	watchdog_callback = result_handler;
130	watchdog_id=setTimeout("watchdog()", 20000);
131
132	//debug("setTimeout = "+watchdog_id);
133
134	try {
135	// Use the standard DOM Level 2 technique, if it is supported
136	if (document.implementation && document.implementation.createDocument) {
137	// Create a new Document object
138	xmldoc = document.implementation.createDocument("", "", null);
139
140	// Specify what should happen when it finishes loading
141	xmldoc.onload = function() { handler(xmldoc, url, data, result_handler); }
142
143	//xmldoc.onerror = docError;
144	//xmldoc.addEventListener("load",docError,false);
145
146	// And tell it what URL to load
147	xmldoc.load(url);
148	return true;
149	}
150	// Otherwise use Microsoft's proprietary API for Internet Explorer
151	// Something about not following standards once again
152	else if (window.ActiveXObject) {
153	xmldoc = new ActiveXObject("Microsoft.XMLDOM"); // Create doc.
154	if (! xmldoc) xmldoc = new ActiveXObject("MSXML2.DOMDocument"); // Create doc.
155	// Specify onload
156	xmldoc.onreadystatechange = function() {
157	if (xmldoc.readyState == 4) handler(xmldoc, url, data, result_handler);
158	}
159	xmldoc.load(url); // Start loading!
160	return true;
161	}
162	// else fallback on usage of iframes to load xml (Opera 7.53 without Java and maybe old Mac browsers)
163	else {
164	debug("using iframe xml loader - experimental and slow");
165	if (! window.xml_iframe) {
166	debug("creating iframe");
167	window.xml_iframe = document.createElement('div');
168	window.xml_iframe.innerHTML = '<iframe src="'+url+'" name="xml_iframe" height="0" width="0" style="display: none;"></iframe>';
169	document.body.appendChild(window.xml_iframe);
170	} else {
171	debug("loading xml in existing iframe");
172	window.frames.xml_iframe.window.document.location.href = url;
173	}
174
175	// set timeout to re-check if iframe is loaded
176	window.iframe_timeout = window.setInterval('iframe_xml_loaded();',100);
177
178	// save some data for iframe_xml_loaded()
179	window.xml_handler = handler;
180	window.xml_url = url;
181	window.xml_data = data;
182	window.xml_result_handler = result_handler;
183	return true;
184	}
185
186	clearTimeout(watchdog_id);
187	debug("Browser incompatilibity: can't request XML document by one of supported methods");
188	return false;
189	}
190
191	catch(ex) {
192	clearTimeout(watchdog_id);
193	//debug("clearTimeout = "+watchdog_id);
194	debug ("CAUGHT EXCEPTION!");
195	result_handler(new Array());
196	return false;
197	}
198
199	return true;
200	}
201
202	function iframe_xml_loaded() {
203	debug("iframe_xmldoc_loaded");
204	if (! window.frames['xml_iframe']) return;
205	var xml = eval('window.frames.xml_iframe.window.document');
206	if (xml) {
207	clearTimeout(window.iframe_timeout);
208	debug("calling handler with ("+window.xml_url+","+window.xml_data+",...)");
209	window.xml_handler(window.frames.xml_iframe.window.document, window.xml_url, window.xml_data, window.xml_result_handler);
210	} else {
211	debug("can't eval iframe with xml");
212	}
213	}
214
215	var data = new Array();
216
217	function loadData_intersect(xmldoc, url, pos, result_handler) {
218	data = new Array();
219	if (loadData(xmldoc, url, pos)) {
220	intersect_results(data);
221	search_query_left(result_handler);
222	} else {
223	debug("INTERNAL ERROR, Inconsistent index");
224	search_err="INTERNAL ERROR, Inconsistent index";
225	}
226	}
227
228	function loadData(xmldoc, url, pos) {
229
230	clearTimeout(watchdog_id);
231	//debug("clearTimeout = "+watchdog_id);
232
233	debug ("loadData("+url+","+pos+")");
234
235	// Get all entries
236	var entries = xmldoc.getElementsByTagName("e");
237
238	if (entries.length > pos) {
239	// Get the links associated with this query
240	var links = entries[pos].getElementsByTagName("l");
241
242	debug("loaded "+links.length+" links");
243
244	// Dynamically append results to output
245	var ret = false;
246	for(i=0; i<links.length; i++) {
247	data.push(new Result(
248	links[i].getAttribute("t"),
249	links[i].firstChild.data,
250	links[i].getAttribute("f"))
251	);
252	ret = true;
253	}
254	return ret;
255	} else {
256	debug("ERROR: seek to "+pos+" with only "+entries.length+" elements");
257	}
258	}
259
260
261	function search_query_left(result_handler) {
262	if (query_left.length > 0) {
263	doSearch(index_path, query_left, result_handler);
264	} else {
265	results.sort(sortResults);
266	result_handler(results);
267	}
268	}
269
270	// you may override this function to sort by something else
271	function sortResults(a, b) {
272	return a.frequency - b.frequency;
273	}
274
275	function end_traverseTree(wildcard,query,result_handler) {
276	if (! wildcard) {
277	debug("Unable to locate key "+query);
278	result_handler(new Array());
279	} else {
280	debug("wildcard "+query+" produced "+(wildcard_url_pos.length / 2)+" results: "+wildcard_url_pos.join(" "));
281	}
282
283	}
284
285	function traverseTree(xmldoc, url, query, result_handler) {
286	clearTimeout(watchdog_id);
287	//debug("clearTimeout = "+watchdog_id);
288
289	debug("traverseTree("+xmldoc+","+url+","+query+")");
290
291	var keys = xmldoc.getElementsByTagName("k");
292	var i;
293
294	// support for wildcard
295	var qlen = query.length;
296	var wildcard = false;
297	var query_full = query;
298	if (query.charAt(qlen-1) == '*') {
299	wildcard = true;
300	query = query.substr(0,--qlen);
301	debug("using wildcard "+query+"*");
302	}
303
304	for(i = 0; i < keys.length; i++) {
305	var key = keys[i].firstChild.data;
306
307	if (wildcard) key = key.substr(0,qlen);
308
309	debug("? "+key+" -- "+query);
310
311	if (key != '' && key != null) {
312	// Case where current key is greater than query, descend
313	if (key > query) {
314	if (! loadXML(url.replace(".xml","/"+convert(i)+".xml"), traverseTree, query_full, result_handler)) {
315	end_traverseTree(wildcard, query_full, result_handler);
316	}
317	// make sure of garbage collection
318	xmldoc=null;
319	return;
320	}
321	// Found it!
322	else if (key==query) {
323	if (wildcard) {
324	wildcard_url_pos.push(url.replace(/(\w+\.xml)/, "_$1"));
325	wildcard_url_pos.push(i);
326	debug("+"+i+": "+keys[i].firstChild.data);
327	} else {
328	// exact match
329	if (! loadXML(url.replace(/(\w+\.xml)/, "_$1"), loadData_intersect, i, result_handler)) {
330	debug("ERROR: Unable to locate data "+query_full);
331	result_handler(new Array());
332	}
333	// make sure of garbage collection
334	xmldoc=null;
335	return;
336	}
337	} // key < query
338	} // if key
339	} // for
340
341	// Look past the end...
342	if (keys.length == 0 \|\| !loadXML(url.replace(".xml","/"+convert(i)+".xml"), traverseTree, query_full, result_handler)) {
343	end_traverseTree(wildcard, query_full, result_handler);
344	}
345
346	// make sure of garbage collection
347	xmldoc=null;
348	return;
349	}
350
351	function doSearch(index_name,query, result_func) {
352
353	//alert("doSearch("+index_name+","+query+")");
354	var pos=query.search(/[\s\+]/);
355	if (index_name) index_path = index_name+'/';
356
357	if (pos < 0) {
358	query_left = "";
359	} else {
360	query_left = query.slice(pos+1);
361	query = query.slice(0,pos);
362	}
363
364	if (! loadXML(index_path+"0.xml", traverseTree, query.toLowerCase(), result_func)) {
365	debug("ERROR: Couldn't find main index 0.xml");
366	search_err = "INTERNAL ERROR: Unable to load main index 0.xml";
367	}
368	}