1 |
#! /usr/local/bin/ruby -w |
2 |
#-- |
3 |
# Java interface of Hyper Estraier |
4 |
# Copyright (C) 2004-2005 Mikio Hirabayashi |
5 |
# All rights reserved. |
6 |
# This file is part of Hyper Estraier. |
7 |
# Redistribution and use in source and binary forms, with or without modification, are |
8 |
# permitted provided that the following conditions are met: |
9 |
# |
10 |
# * Redistributions of source code must retain the above copyright notice, this list of |
11 |
# conditions and the following disclaimer. |
12 |
# * Redistributions in binary form must reproduce the above copyright notice, this list of |
13 |
# conditions and the following disclaimer in the documentation and/or other materials |
14 |
# provided with the distribution. |
15 |
# * Neither the name of Mikio Hirabayashi nor the names of its contributors may be used to |
16 |
# endorse or promote products derived from this software without specific prior written |
17 |
# permission. |
18 |
# |
19 |
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS |
20 |
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF |
21 |
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE |
22 |
# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
23 |
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE |
24 |
# GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
25 |
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
26 |
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED |
27 |
# OF THE POSSIBILITY OF SUCH DAMAGE. |
28 |
#++ |
29 |
#:include:overview |
30 |
|
31 |
|
32 |
require "uri" |
33 |
require "socket" |
34 |
require "stringio" |
35 |
|
36 |
|
37 |
# |
38 |
# Module for the namespace of Hyper Estraier |
39 |
# |
40 |
module Estraier |
41 |
#---------------------------------------------------------------- |
42 |
#++ Abstraction of document. |
43 |
#---------------------------------------------------------------- |
44 |
class Document |
45 |
#-------------------------------- |
46 |
# public methods |
47 |
#-------------------------------- |
48 |
public |
49 |
# Add an attribute. |
50 |
# `name' specifies the name of an attribute. |
51 |
# `value' specifies the value of the attribute. If it is nil, the attribute is removed. |
52 |
def add_attr(name, value) |
53 |
Utility::check_types({ name=>String, value=>String }) if $DEBUG |
54 |
name = name.gsub(/[ \t\r\n\v\f]+/, " ") |
55 |
name = name.lstrip.rstrip.squeeze(" ") |
56 |
value = value.gsub(/[ \t\r\n\v\f]+/, " ") |
57 |
value = value.lstrip.rstrip.squeeze(" ") |
58 |
@attrs[name] = value |
59 |
end |
60 |
# Add a sentence of text. |
61 |
# `text' specifies a sentence of text. |
62 |
def add_text(text) |
63 |
Utility::check_types({ text=>String }) if $DEBUG |
64 |
text = text.gsub(/[ \t\r\n\v\f]+/, " ") |
65 |
text = text.lstrip.rstrip.squeeze(" ") |
66 |
@dtexts.push(text) |
67 |
end |
68 |
# Add a hidden sentence. |
69 |
# `text' specifies a hidden sentence. |
70 |
def add_hidden_text(text) |
71 |
Utility::check_types({ text=>String }) if $DEBUG |
72 |
text = text.gsub(/[ \t\r\n\v\f]+/, " ") |
73 |
text = text.lstrip.rstrip.squeeze(" ") |
74 |
@htexts.push(text) |
75 |
end |
76 |
# Get the ID number. |
77 |
# The return value is the ID number of the document object. If the object has never been |
78 |
# registered, -1 is returned. |
79 |
def id() |
80 |
@id |
81 |
end |
82 |
# Get a list of attribute names of a document object. |
83 |
# The return value is a list object of attribute names. |
84 |
def attr_names() |
85 |
@attrs.keys.sort |
86 |
end |
87 |
# Get the value of an attribute. |
88 |
# `name' specifies the name of an attribute. |
89 |
# The return value is the value of the attribute or nil if it does not exist. |
90 |
def attr(name) |
91 |
Utility::check_types({ name=>String }) if $DEBUG |
92 |
@attrs[name] |
93 |
end |
94 |
# Get a list of sentences of the text. |
95 |
# The return value is a list object of sentences of the text. |
96 |
def texts() |
97 |
@dtexts |
98 |
end |
99 |
# Concatenate sentences of the text of a document object. |
100 |
# The return value is concatenated sentences. |
101 |
def cat_texts() |
102 |
buf = StringIO.new |
103 |
for i in 0...@dtexts.length |
104 |
buf.write(" ") if i > 0 |
105 |
buf.write(@dtexts[i]) |
106 |
end |
107 |
buf.string |
108 |
end |
109 |
# Dump draft data of a document object. |
110 |
# The return value is draft data. |
111 |
def dump_draft() |
112 |
buf = StringIO.new |
113 |
keys = @attrs.keys.sort |
114 |
for i in 0...keys.length |
115 |
buf.printf("%s=%s\n", keys[i], @attrs[keys[i]]) |
116 |
end |
117 |
buf.printf("\n") |
118 |
for i in 0...@dtexts.length |
119 |
buf.printf("%s\n", @dtexts[i]) |
120 |
end |
121 |
for i in 0...@htexts.length |
122 |
buf.printf("\t%s\n", @htexts[i]) |
123 |
end |
124 |
buf.string |
125 |
end |
126 |
# Make a snippet of the body text of a document object. |
127 |
# `word' specifies a list object of words to be highlight. |
128 |
# `wwitdh' specifies whole width of the result. |
129 |
# `hwitdh' specifies width of strings picked up from the beginning of the text. |
130 |
# `awitdh' specifies width of strings picked up around each highlighted word. |
131 |
# The return value is a snippet string of the body text. There are tab separated values. |
132 |
# Each line is a string to be shown. Though most lines have only one field, some lines have |
133 |
# two fields. If the second field exists, the first field is to be shown with highlighted, |
134 |
# and the second field means its normalized form. |
135 |
def make_snippet(words, wwidth, hwidth, awidth) |
136 |
raise NoMethodError.new("Not Implemented") |
137 |
end |
138 |
# Check whether the text of a document object includes every specified words. |
139 |
# `word' specifies a list object of words to be checked. |
140 |
# The return value is true if every specified words is found, else it is false. |
141 |
def scan_words(words) |
142 |
raise NoMethodError.new("Not Implemented") |
143 |
end |
144 |
#-------------------------------- |
145 |
# private methods |
146 |
#-------------------------------- |
147 |
private |
148 |
# Create a document object. |
149 |
# `draft' specifies a string of draft data. |
150 |
def initialize(draft = "") |
151 |
Utility::check_types({ draft=>String }) if $DEBUG |
152 |
@id = -1 |
153 |
@attrs = {} |
154 |
@dtexts = [] |
155 |
@htexts = [] |
156 |
if draft.length |
157 |
lines = draft.split(/\n/) |
158 |
num = 0 |
159 |
while num < lines.length |
160 |
line = lines[num] |
161 |
line = line.gsub(/[ \t\r\n\v\f]+/, " ") |
162 |
line = line.lstrip.rstrip.squeeze(" ") |
163 |
num += 1 |
164 |
break if line.length < 1 |
165 |
if idx = line.index("=") |
166 |
key = line[0...idx] |
167 |
value = line[idx+1...line.length] |
168 |
@attrs[key] = value |
169 |
end |
170 |
end |
171 |
while num < lines.length |
172 |
line = lines[num] |
173 |
line = line.gsub(/[ \t\r\n\v\f]+/, " ") |
174 |
line = line.lstrip.rstrip.squeeze(" ") |
175 |
if line.length > 0 |
176 |
if line[0] == '\t' |
177 |
@htexts.push(line[1...line.length]) |
178 |
else |
179 |
@dtexts.push(line) |
180 |
end |
181 |
end |
182 |
num += 1 |
183 |
end |
184 |
end |
185 |
end |
186 |
end |
187 |
#---------------------------------------------------------------- |
188 |
#++ Abstraction of search condition. |
189 |
#---------------------------------------------------------------- |
190 |
class Condition |
191 |
#-------------------------------- |
192 |
# public constants |
193 |
#-------------------------------- |
194 |
public |
195 |
# option: check N-gram keys skipping by three |
196 |
CONDSURE = 1 << 0 |
197 |
# option: check N-gram keys skipping by two |
198 |
CONDUSU = 1 << 1 |
199 |
# option: without TF-IDF tuning |
200 |
CONDFAST = 1 << 2 |
201 |
# option: with the simplified phrase |
202 |
CONDAGIT = 1 << 3 |
203 |
# option: check every N-gram key |
204 |
CONDNOIDF = 1 << 4 |
205 |
# option: check N-gram keys skipping by one |
206 |
CONDSIMPLE = 1 << 10 |
207 |
#-------------------------------- |
208 |
# public methods |
209 |
#-------------------------------- |
210 |
public |
211 |
# Set the search phrase. |
212 |
# `phrase' specifies a search phrase. |
213 |
def set_phrase(phrase) |
214 |
Utility::check_types({ phrase=>String }) if $DEBUG |
215 |
phrase = phrase.gsub(/[ \t\r\n\v\f]+/, " ") |
216 |
phrase = phrase.lstrip.rstrip.squeeze(" ") |
217 |
@phrase = phrase |
218 |
end |
219 |
# Add an expression for an attribute. |
220 |
# `expr' specifies an expression for an attribute. |
221 |
def add_attr(expr) |
222 |
Utility::check_types({ expr=>String }) if $DEBUG |
223 |
expr = expr.gsub(/[ \t\r\n\v\f]+/, " ") |
224 |
expr = expr.lstrip.rstrip.squeeze(" ") |
225 |
@attrs.push(expr) |
226 |
end |
227 |
# Set the order of a condition object. |
228 |
# `expr' specifies an expression for the order. By default, the order is by score descending. |
229 |
def set_order(expr) |
230 |
Utility::check_types({ expr=>String }) if $DEBUG |
231 |
expr = expr.gsub(/[ \t\r\n\v\f]+/, " ") |
232 |
expr = expr.lstrip.rstrip.squeeze(" ") |
233 |
@order = expr |
234 |
end |
235 |
# Set the maximum number of retrieval. |
236 |
# `max' specifies the maximum number of retrieval. By default, the number of retrieval is |
237 |
# not limited. |
238 |
def set_max(max) |
239 |
Utility::check_types({ max=>Integer }) if $DEBUG |
240 |
@max = max |
241 |
end |
242 |
# Set options of retrieval. |
243 |
# `options' specifies options: `Condition::ESTCONDSURE' specifies that it checks every N-gram |
244 |
# key, `Condition::ESTCONDUSU', which is the default, specifies that it checks N-gram keys |
245 |
# with skipping one key, `Condition::ESTCONDFAST' skips two keys, `Condition::ESTCONDAGIT' |
246 |
# skips three keys, `Condition::ESTCONDNOIDF' specifies not to perform TF-IDF tuning, |
247 |
# `Condition::ESTCONDSIMPLE' specifies to use simplified phrase. Each option can be |
248 |
# specified at the same time by bitwise or. If keys are skipped, though search speed is |
249 |
# improved, the relevance ratio grows less. |
250 |
def set_options(options) |
251 |
Utility::check_types({ options=>Integer }) if $DEBUG |
252 |
@options |= options |
253 |
end |
254 |
# Get the search phrase. |
255 |
# The return value is the search phrase. |
256 |
def phrase() |
257 |
@phrase |
258 |
end |
259 |
# Get expressions for attributes. |
260 |
# The return value is expressions for attributes. |
261 |
def attrs() |
262 |
@attrs |
263 |
end |
264 |
# Get the order expression. |
265 |
# The return value is the order expression. |
266 |
def order() |
267 |
@order |
268 |
end |
269 |
# Get the maximum number of retrieval. |
270 |
# The return value is the maximum number of retrieval. |
271 |
def max() |
272 |
@max |
273 |
end |
274 |
# Get options of retrieval. |
275 |
# The return value is options by bitwise or. |
276 |
def options() |
277 |
@options |
278 |
end |
279 |
#-------------------------------- |
280 |
# private methods |
281 |
#-------------------------------- |
282 |
private |
283 |
# Create a search condition object. |
284 |
def initialize() |
285 |
@phrase = nil |
286 |
@attrs = [] |
287 |
@order = nil |
288 |
@max = -1 |
289 |
@options = 0 |
290 |
end |
291 |
end |
292 |
#---------------------------------------------------------------- |
293 |
#++ Abstraction of document in result set. |
294 |
#---------------------------------------------------------------- |
295 |
class ResultDocument |
296 |
#-------------------------------- |
297 |
# public methods |
298 |
#-------------------------------- |
299 |
public |
300 |
# Get the URI. |
301 |
# The return value is the URI of the result document object. |
302 |
def uri() |
303 |
@uri |
304 |
end |
305 |
# Get a list of attribute names. |
306 |
# The return value is a list object of attribute names. |
307 |
def attr_names() |
308 |
@attrs.keys.sort |
309 |
end |
310 |
# Get the value of an attribute. |
311 |
# The return value is the value of the attribute or nil if it does not exist. |
312 |
def attr(name) |
313 |
Utility::check_types({ name=>String }) if $DEBUG |
314 |
@attrs[name] |
315 |
end |
316 |
# Get the snippet of a result document object. |
317 |
# The return value is the snippet of the result document object. There are tab separated |
318 |
# values. Each line is a string to be shown. Though most lines have only one field, some |
319 |
# lines have two fields. If the second field exists, the first field is to be shown with |
320 |
# highlighted, and the second field means its normalized form. |
321 |
def snippet() |
322 |
@snippet |
323 |
end |
324 |
#-------------------------------- |
325 |
# private methods |
326 |
#-------------------------------- |
327 |
private |
328 |
# Create a result document object. |
329 |
def initialize(uri, attrs, snippet) |
330 |
Utility::check_types({ uri=>String, attrs=>Hash, snippet=>String }) if $DEBUG |
331 |
@uri = uri |
332 |
@attrs = attrs |
333 |
@snippet = snippet |
334 |
end |
335 |
end |
336 |
#---------------------------------------------------------------- |
337 |
#++ Abstraction of result set from node. |
338 |
#---------------------------------------------------------------- |
339 |
class NodeResult |
340 |
#-------------------------------- |
341 |
# public methods |
342 |
#-------------------------------- |
343 |
public |
344 |
# Get the number of documents. |
345 |
# The return value is the number of documents. |
346 |
def doc_num() |
347 |
@docs.length |
348 |
end |
349 |
# Get the value of hint information. |
350 |
# The return value is a result document object or nil if the index is out of bounds. |
351 |
def get_doc(index) |
352 |
Utility::check_types({ index=>Integer }) if $DEBUG |
353 |
return nil if index < 0 || index >= @docs.length |
354 |
@docs[index] |
355 |
end |
356 |
# Get the value of hint information. |
357 |
# `key' specifies the key of a hint. "VERSION", "NODE", "HIT", "HINT#n", "DOCNUM", "WORDNUM", |
358 |
# "TIME", "LINK#n", and "VIEW" are provided for keys. |
359 |
# The return value is the hint or nil if the key does not exist. |
360 |
def hint(key) |
361 |
Utility::check_types({ key=>String }) if $DEBUG |
362 |
@hints[key] |
363 |
end |
364 |
#-------------------------------- |
365 |
# private methods |
366 |
#-------------------------------- |
367 |
private |
368 |
# Create a node result object. |
369 |
def initialize(docs, hints) |
370 |
Utility::check_types({ docs=>Array, hints=>Hash }) if $DEBUG |
371 |
@docs = docs |
372 |
@hints = hints |
373 |
end |
374 |
end |
375 |
#---------------------------------------------------------------- |
376 |
#++ Abstraction of connection to P2P node. |
377 |
#---------------------------------------------------------------- |
378 |
class Node |
379 |
#-------------------------------- |
380 |
# public methods |
381 |
#-------------------------------- |
382 |
public |
383 |
# Set the URL of a node server. |
384 |
# `url' specifies the URL of a node. |
385 |
def set_url(url) |
386 |
Utility::check_types({ url=>String }) if $DEBUG |
387 |
@url = url |
388 |
end |
389 |
# Set the proxy information. |
390 |
# `host' specifies the host name of a proxy server. |
391 |
# `port' specifies the port number of the proxy server. |
392 |
def set_proxy(host, port) |
393 |
Utility::check_types({ host=>String, port=>Integer }) if $DEBUG |
394 |
@pxhost = host |
395 |
@pxport = port |
396 |
end |
397 |
# Set timeout of a connection. |
398 |
# `sec' specifies timeout of the connection in seconds. |
399 |
def set_timeout(sec) |
400 |
Utility::check_types({ sec=>Integer }) if $DEBUG |
401 |
@timeout = sec |
402 |
end |
403 |
# Set the authentication information. |
404 |
# `name' specifies the name of authentication. |
405 |
# `passwd' specifies the password of the authentication. |
406 |
def set_auth(name, password) |
407 |
Utility::check_types({ name=>String, password=>String }) if $DEBUG |
408 |
@auth = name + ":" + password |
409 |
end |
410 |
# Get the status code of the last request. |
411 |
# The return value is the status code of the last request. -1 means failure of connection. |
412 |
def status() |
413 |
@status |
414 |
end |
415 |
# Add a document. |
416 |
# `doc' specifies a document object. The document object should have the URI attribute. |
417 |
# The return value is true if success, else it is false. |
418 |
def put_doc(doc) |
419 |
Utility::check_types({ doc=>Document }) if $DEBUG |
420 |
@status = -1 |
421 |
return false if !@url |
422 |
turl = @url + "/put_doc" |
423 |
reqheads = [ "Content-Type: text/x-estraier-draft" ] |
424 |
reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth |
425 |
reqbody = doc.dump_draft |
426 |
rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, nil) |
427 |
@status = rv |
428 |
rv == 200 |
429 |
end |
430 |
# Remove a document. |
431 |
# `id' specifies the ID number of a registered document. |
432 |
# The return value is true if success, else it is false. |
433 |
def out_doc(id) |
434 |
Utility::check_types({ id=>Integer }) if $DEBUG |
435 |
@status = -1 |
436 |
return false if !@url |
437 |
turl = @url + "/out_doc" |
438 |
reqheads = [ "Content-Type: application/x-www-form-urlencoded" ] |
439 |
reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth |
440 |
reqbody = "id=" + id.to_s |
441 |
rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, nil) |
442 |
@status = rv |
443 |
rv == 200 |
444 |
end |
445 |
# Remove a document specified by URI. |
446 |
# `uri' specifies the URI of a registered document. |
447 |
# The return value is true if success, else it is false. |
448 |
def out_doc_by_uri(uri) |
449 |
Utility::check_types({ uri=>String }) if $DEBUG |
450 |
@status = -1 |
451 |
return false if !@url |
452 |
turl = @url + "/out_doc" |
453 |
reqheads = [ "Content-Type: application/x-www-form-urlencoded" ] |
454 |
reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth |
455 |
reqbody = "uri=" + URI::encode(uri) |
456 |
rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, nil) |
457 |
@status = rv |
458 |
rv == 200 |
459 |
end |
460 |
# Retrieve a document. |
461 |
# `id' specifies the ID number of a registered document. |
462 |
# The return value is a document object. On error, nil is returned. |
463 |
def get_doc(id) |
464 |
Utility::check_types({ id=>Integer }) if $DEBUG |
465 |
@status = -1 |
466 |
return nil if !@url |
467 |
turl = @url + "/get_doc" |
468 |
reqheads = [ "Content-Type: application/x-www-form-urlencoded" ] |
469 |
reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth |
470 |
reqbody = "id=" + id.to_s |
471 |
resbody = StringIO.new |
472 |
rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, resbody) |
473 |
@status = rv |
474 |
return nil if rv != 200 |
475 |
Document.new(resbody.string) |
476 |
end |
477 |
# Retrieve a document. |
478 |
# `uri' specifies the URI of a registered document. |
479 |
# The return value is a document object. On error, nil is returned. |
480 |
def get_doc_by_uri(uri) |
481 |
Utility::check_types({ uri=>String }) if $DEBUG |
482 |
@status = -1 |
483 |
return nil if !@url |
484 |
turl = @url + "/get_doc" |
485 |
reqheads = [ "Content-Type: application/x-www-form-urlencoded" ] |
486 |
reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth |
487 |
reqbody = "uri=" + URI::encode(uri) |
488 |
resbody = StringIO.new |
489 |
rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, resbody) |
490 |
@status = rv |
491 |
return nil if rv != 200 |
492 |
Document.new(resbody.string) |
493 |
end |
494 |
# Retrieve the value of an attribute of a document. |
495 |
# `id' specifies the ID number of a registered document. |
496 |
# `name' specifies the name of an attribute. |
497 |
# The return value is the value of the attribute or nil if it does not exist. |
498 |
def get_doc_attr(id, name) |
499 |
Utility::check_types({ id=>Integer, name=>String }) if $DEBUG |
500 |
@status = -1 |
501 |
return nil if !@url |
502 |
turl = @url + "/get_doc_attr" |
503 |
reqheads = [ "Content-Type: application/x-www-form-urlencoded" ] |
504 |
reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth |
505 |
reqbody = "id=" + id.to_s + "&attr=" + URI::encode(name) |
506 |
resbody = StringIO.new |
507 |
rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, resbody) |
508 |
@status = rv |
509 |
return nil if rv != 200 |
510 |
resbody.string.chomp |
511 |
end |
512 |
# Retrieve the value of an attribute of a document specified by URI. |
513 |
# `uri' specifies the URI of a registered document. |
514 |
# `name' specifies the name of an attribute. |
515 |
# The return value is the value of the attribute or nil if it does not exist. |
516 |
def get_doc_attr_by_uri(uri, name) |
517 |
Utility::check_types({ uri=>String, name=>String }) if $DEBUG |
518 |
@status = -1 |
519 |
return nil if !@url |
520 |
turl = @url + "/get_doc_attr" |
521 |
reqheads = [ "Content-Type: application/x-www-form-urlencoded" ] |
522 |
reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth |
523 |
reqbody = "uri=" + URI::encode(uri) + "&attr=" + URI::encode(name) |
524 |
resbody = StringIO.new |
525 |
rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, resbody) |
526 |
@status = rv |
527 |
return nil if rv != 200 |
528 |
resbody.string.chomp |
529 |
end |
530 |
# Get the ID of a document specified by URI. |
531 |
# `uri' specifies the URI of a registered document. |
532 |
# The return value is the ID of the document. On error, -1 is returned. |
533 |
def uri_to_id(uri) |
534 |
Utility::check_types({ uri=>String }) if $DEBUG |
535 |
@status = -1 |
536 |
return -1 if !@url |
537 |
turl = @url + "/uri_to_id" |
538 |
reqheads = [ "Content-Type: application/x-www-form-urlencoded" ] |
539 |
reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth |
540 |
reqbody = "uri=" + URI::encode(uri) |
541 |
resbody = StringIO.new |
542 |
rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, resbody) |
543 |
@status = rv |
544 |
return nil if rv != 200 |
545 |
resbody.string.chomp |
546 |
end |
547 |
# Get the name. |
548 |
# The return value is the name. On error, nil is returned. |
549 |
def name() |
550 |
set_info if !@name |
551 |
@name |
552 |
end |
553 |
# Get the label. |
554 |
# The return value is the label. On error, nil is returned. |
555 |
def label() |
556 |
set_info if !@label |
557 |
@label |
558 |
end |
559 |
# Get the number of documents. |
560 |
# The return value is the number of documents. On error, -1 is returned. |
561 |
def doc_num() |
562 |
set_info if @dnum < 0 |
563 |
@dnum |
564 |
end |
565 |
# Get the number of unique words. |
566 |
# The return value is the number of unique words. On error, -1 is returned. |
567 |
def word_num() |
568 |
set_info if @wnum < 0 |
569 |
@wnum |
570 |
end |
571 |
# Get the size of the datbase. |
572 |
# The return value is the size of the datbase. On error, -1.0 is returned. |
573 |
def size() |
574 |
set_info if @size < 0.0 |
575 |
@size |
576 |
end |
577 |
# Search documents corresponding a condition. |
578 |
# `cond' specifies a condition object. |
579 |
# `depth' specifies the depth of meta search. |
580 |
# The return value is a node result object. On error, nil is returned. |
581 |
def search(cond, depth) |
582 |
Utility::check_types({ cond=>Condition, depth=>Integer }) if $DEBUG |
583 |
@status = -1 |
584 |
return nil if !@url |
585 |
turl = @url + "/search" |
586 |
reqheads = [ "Content-Type: application/x-www-form-urlencoded" ] |
587 |
reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth |
588 |
reqbody = Utility::cond_to_query(cond, depth) |
589 |
resbody = StringIO.new |
590 |
rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, resbody) |
591 |
@status = rv |
592 |
return nil if rv != 200 |
593 |
lines = resbody.string.split(/\n/) |
594 |
return nil if lines.length < 1 |
595 |
docs = [] |
596 |
hints = {} |
597 |
nres = NodeResult.new(docs, hints) |
598 |
border = lines[0] |
599 |
isend = false |
600 |
lnum = 1 |
601 |
while lnum < lines.length |
602 |
line = lines[lnum] |
603 |
lnum += 1 |
604 |
if line.length >= border.length && line.index(border) == 0 |
605 |
isend = true if line[border.length...line.length] == ":END" |
606 |
break |
607 |
end |
608 |
lidx = line.index("\t") |
609 |
if lidx |
610 |
key = line[0...lidx] |
611 |
value = line[(lidx+1)...line.length] |
612 |
hints[key] = value |
613 |
end |
614 |
end |
615 |
snum = lnum |
616 |
while !isend && lnum < lines.length |
617 |
line = lines[lnum] |
618 |
lnum += 1 |
619 |
if line.length >= border.length && line.index(border) == 0 |
620 |
if lnum > snum |
621 |
rdattrs = {} |
622 |
sb = StringIO.new |
623 |
rlnum = snum |
624 |
while rlnum < lnum - 1 |
625 |
rdline = lines[rlnum] |
626 |
rlnum += 1 |
627 |
break if rdline.length < 1 |
628 |
lidx = rdline.index("=") |
629 |
if lidx |
630 |
key = rdline[0...lidx] |
631 |
value = rdline[(lidx+1)...rdline.length] |
632 |
rdattrs[key] = value |
633 |
end |
634 |
end |
635 |
while rlnum < lnum - 1 |
636 |
rdline = lines[rlnum] |
637 |
rlnum += 1 |
638 |
sb.printf("%s\n", rdline) |
639 |
end |
640 |
rduri = rdattrs["@uri"] |
641 |
rdsnippet = sb.string |
642 |
if rduri |
643 |
rdoc = ResultDocument.new(rduri, rdattrs, rdsnippet) |
644 |
docs.push(rdoc) |
645 |
end |
646 |
end |
647 |
snum = lnum |
648 |
isend = true if line[border.length...line.length] == ":END" |
649 |
end |
650 |
end |
651 |
return nil if !isend |
652 |
return nres |
653 |
end |
654 |
# Manage a user account of a node. |
655 |
# `name' specifies the name of a user. |
656 |
# `mode' specifies the operation mode. 0 means to delete the account. 1 means to set the |
657 |
# account as an administrator. 2 means to set the account as a normal user. |
658 |
# The return value is true if success, else it is false. |
659 |
def set_user(name, mode) |
660 |
Utility::check_types({ name=>String, mode=>Integer }) if $DEBUG |
661 |
@status = -1 |
662 |
return false if !@url |
663 |
turl = @url + "/_set_user" |
664 |
reqheads = [ "Content-Type: application/x-www-form-urlencoded" ] |
665 |
reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth |
666 |
reqbody = "name=" + URI::encode(name) + "&mode=" + mode.to_s |
667 |
rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, nil) |
668 |
@status = rv |
669 |
rv == 200 |
670 |
end |
671 |
# Manage a link of a node. |
672 |
# `url' specifies the URL of the target node of a link. |
673 |
# `label' specifies the label of the link. |
674 |
# `credit' specifies the credit of the link. If it is negative, the link is removed. |
675 |
# The return value is true if success, else it is false. |
676 |
def set_link(url, label, credit) |
677 |
Utility::check_types({ url=>String, label=>String, credit=>Integer }) if $DEBUG |
678 |
@status = -1 |
679 |
return false if !@url |
680 |
turl = @url + "/_set_link" |
681 |
reqheads = [ "Content-Type: application/x-www-form-urlencoded" ] |
682 |
reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth |
683 |
reqbody = "url=" + URI::encode(url) + "&label=" + label |
684 |
reqbody += "&credit=" + credit.to_s if credit >= 0 |
685 |
rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, nil) |
686 |
@status = rv |
687 |
rv == 200 |
688 |
end |
689 |
#-------------------------------- |
690 |
# private methods |
691 |
#-------------------------------- |
692 |
private |
693 |
# Create a node connection object. |
694 |
def initialize() |
695 |
@url = nil |
696 |
@pxhost = nil |
697 |
@pxport = -1 |
698 |
@timeout = -1 |
699 |
@auth = nil |
700 |
@name = nil |
701 |
@label = nil |
702 |
@dnum = -1 |
703 |
@wnum = -1 |
704 |
@size = -1.0 |
705 |
@status = -1 |
706 |
end |
707 |
# Set information of the node. |
708 |
def set_info() |
709 |
@status = -1 |
710 |
return if !@url |
711 |
turl = @url + "/inform" |
712 |
reqheads = [] |
713 |
reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth |
714 |
resbody = StringIO.new |
715 |
rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, nil, nil, resbody) |
716 |
@status = rv |
717 |
return if rv != 200 |
718 |
lines = resbody.string.chomp.split(/\n/) |
719 |
return if lines.length < 1 |
720 |
elems = lines[0].chomp.split(/\t/) |
721 |
return if elems.length != 5 |
722 |
@name = elems[0] |
723 |
@label = elems[1] |
724 |
@dnum = elems[2].to_i |
725 |
@wnum = elems[3].to_i |
726 |
@size = elems[4].to_f |
727 |
end |
728 |
end |
729 |
#:stopdoc: |
730 |
# |
731 |
# Module for utility |
732 |
# |
733 |
module Utility |
734 |
public |
735 |
# Check types of arguments |
736 |
# `types' specifies a hash object whose keys are arguments and values are class objects. |
737 |
# If there is a invalid object, an exception is thrown. |
738 |
def check_types(types) |
739 |
i = 0 |
740 |
types.each_key do |key| |
741 |
i += 1 |
742 |
unless key.kind_of?(types[key]) || key == nil |
743 |
raise ArgumentError.new("Argument#" + i.to_s + |
744 |
" should be a kind of " + types[key].to_s) |
745 |
end |
746 |
end |
747 |
end |
748 |
module_function :check_types |
749 |
# Perform an interaction of a URL. |
750 |
# `url' specifies a URL. |
751 |
# `pxhost' specifies the host name of a proxy. If it is nil, it is not used. |
752 |
# `pxport' specifies the port number of the proxy. |
753 |
# `outsec' specifies timeout in seconds. If it is negative, it is not used. |
754 |
# `reqheads' specifies a list object of extension headers. If it is nil, it is not used. |
755 |
# `reqbody' specifies the pointer of the entitiy body of request. If it is nil, "GET" |
756 |
# method is used. |
757 |
# `resheads' specifies a list object into which headers of response is stored. If it is nil |
758 |
# it is not used. |
759 |
# `resbody' specifies stream object into which the entity body of response is stored. If it |
760 |
# is nil, it is not used. |
761 |
# The return value is the status code of the response or -1 on error. |
762 |
def shuttle_url(url, pxhost, pxport, outsec, reqheads, reqbody, resheads, resbody) |
763 |
begin |
764 |
status = -1 |
765 |
th = Thread::start do |
766 |
url = URI::parse(url) |
767 |
url.normalize |
768 |
Thread::current.exit if url.scheme != "http" || !url.host || url.port < 1 |
769 |
if pxhost |
770 |
host = pxhost |
771 |
port = pxport |
772 |
query = "http://" + url.host + ":" + url.port.to_s + url.path |
773 |
else |
774 |
host = url.host |
775 |
port = url.port |
776 |
query = url.path |
777 |
end |
778 |
query += "?" + url.query if url.query && !reqbody |
779 |
begin |
780 |
sock = TCPSocket.open(host, port) |
781 |
if reqbody |
782 |
sock.printf("POST " + query + " HTTP/1.0\r\n") |
783 |
else |
784 |
sock.printf("GET " + query + " HTTP/1.0\r\n") |
785 |
end |
786 |
sock.printf("Host: %s:%d\r\n", url.host, url.port) |
787 |
sock.printf("Connection: close\r\n") |
788 |
sock.printf("User-Agent: HyperEstraierForRuby/1.0.0\r\n") |
789 |
if reqheads |
790 |
reqheads.each do |line| |
791 |
sock.printf("%s\r\n", line) |
792 |
end |
793 |
end |
794 |
sock.printf("Content-Length: %d\r\n", reqbody.length) if reqbody |
795 |
sock.printf("\r\n") |
796 |
sock.write(reqbody) if reqbody |
797 |
line = sock.gets.chomp |
798 |
elems = line.split(/ */) |
799 |
Thread::current.exit if elems.length < 3 || !(elems[0] =~ /^HTTP/) |
800 |
status = elems[1].to_i |
801 |
resheads.push(line) if resheads |
802 |
begin |
803 |
line = sock.gets.chomp |
804 |
resheads.push(line) if resheads |
805 |
end while line.length > 0 |
806 |
while buf = sock.read(8192) |
807 |
resbody.write(buf) if resbody |
808 |
end |
809 |
ensure |
810 |
sock.close if sock |
811 |
end |
812 |
end |
813 |
if outsec >= 0 |
814 |
unless th.join(outsec) |
815 |
th.exit |
816 |
th.join |
817 |
return -1 |
818 |
end |
819 |
else |
820 |
th.join |
821 |
end |
822 |
return status |
823 |
rescue |
824 |
return -1 |
825 |
end |
826 |
end |
827 |
module_function :shuttle_url |
828 |
# Serialize a condition object into a query string. |
829 |
# `cond' specifies a condition object. |
830 |
# `depth' specifies depth of meta search. |
831 |
# The return value is the serialized string. |
832 |
def cond_to_query(cond, depth) |
833 |
buf = StringIO.new |
834 |
if cond.phrase |
835 |
buf.write("&") if buf.length > 0 |
836 |
buf.write("phrase=") |
837 |
buf.write(URI::encode(cond.phrase)) |
838 |
end |
839 |
for i in 0...cond.attrs.length |
840 |
buf.write("&") if buf.length > 0 |
841 |
buf.write("attr" + (i + 1).to_s + "=") |
842 |
buf.write(URI::encode(cond.attrs[i])) |
843 |
end |
844 |
if cond.order |
845 |
buf.write("&") if buf.length > 0 |
846 |
buf.write("order=") |
847 |
buf.write(URI::encode(cond.order)) |
848 |
end |
849 |
if cond.max > 0 |
850 |
buf.write("&") if buf.length > 0 |
851 |
buf.write("max=" + cond.max.to_s) |
852 |
end |
853 |
if cond.options > 0 |
854 |
buf.write("&") if buf.length > 0 |
855 |
buf.write("options=" + cond.options.to_s) |
856 |
end |
857 |
if depth > 0 |
858 |
buf.write("&") if buf.length > 0 |
859 |
buf.write("depth=" + depth.to_s) |
860 |
end |
861 |
buf.string |
862 |
end |
863 |
module_function :cond_to_query |
864 |
# Encode a byte sequence with Base64 encoding. |
865 |
# `data' specifyes a string object. |
866 |
# The return value is the encoded string. |
867 |
def base_encode(data) |
868 |
[data].pack("m").gsub(/[ \n]/, "") |
869 |
end |
870 |
module_function :base_encode |
871 |
end |
872 |
end |
873 |
|
874 |
|
875 |
|
876 |
# END OF FILE |