/[webpac]/openisis/current/php/Isis/Rec.php
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /openisis/current/php/Isis/Rec.php

Parent Directory Parent Directory | Revision Log Revision Log


Revision 237 - (show annotations)
Mon Mar 8 17:43:12 2004 UTC (20 years, 1 month ago) by dpavlin
File size: 18392 byte(s)
initial import of openisis 0.9.0 vendor drop

1 <?php
2 /*
3 OpenIsis - an open implementation of the CDS/ISIS database
4 Version 0.8.x (patchlevel see file Version)
5 Copyright (C) 2001-2003 by Erik Grziwotz, erik@openisis.org
6
7 This library is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Lesser General Public
9 License as published by the Free Software Foundation; either
10 version 2.1 of the License, or (at your option) any later version.
11
12 This library is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public
18 License along with this library; if not, write to the Free Software
19 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
21 see README for more information
22 EOH */
23
24 // $Id: Rec.php,v 1.10 2003/06/10 11:57:16 kripke Exp $
25
26 /** FIELD mode replaces newlines with tabs.
27 On deserializing, these tabs are not converted back to newline.
28 Do not use if you need to retain newline information.
29 */
30 define( 'ISIS_REC_FIELD', "\t" );
31 /** BIN mode replaces newlines with a newline-tab sequence.
32 On deserializing, any newline-tab sequence is converted back to a newline.
33 This transparently retains even binary data.
34 */
35 define( 'ISIS_REC_BIN', "\n\t" );
36 /** TEXT mode replaces newlines with vertical tabs.
37 Vertical tabs are converted back to newlines only when explicitly
38 deserializing in TEXT mode, since it's not transparent to binary data.
39 */
40 define( 'ISIS_REC_TEXT', "\013" );
41 /**
42 the basic ISIS formatting mode.
43 "Data Mode" MDL doesn't make a significant difference,
44 and uppercase can better be achieved using strtoupper
45 (given the right locale ...).
46 */
47 define( 'ISIS_REC_MHL', 'MHL' );
48 // sorry, no other way to save this ...
49 $GLOBALS['ISIS_REC_MHL_PAT'] = array(
50 '/(<[^=>]*)=[^>]+>/', # dump <a=b> substitutions
51 '/></', # replace >< pairs
52 '/[><]/', # nuke other ><
53 '/^\^./', # kill initial subfield spec
54 '/\^a/', # ^a -> ;
55 '/\^[b-i]/', # ^[b-i] -> ,
56 '/\^./' # others -> .
57 );
58 $GLOBALS['ISIS_REC_MHL_REPL'] = array(
59 '\1>', '; ', '', '', '; ', ', ', '. '
60 );
61
62 /**
63 An ISIS(/IIF/Z39.2/ISO2709)-style record in pure PHP.
64 First of all: Use Tcl, Java, Perl or plain C,
65 but please don't use PHP -- it's a mess.
66
67 This is only loosely connected to an Isis Database,
68 most functions can be used without having a DB.
69
70 Note: Most getter methods work regardless of the type of tags.
71 However, the setters are strongly biased towards numeric tags.
72
73 @version $Revision: 1.10 $
74 @license LGPL
75 @package Isis
76 */
77 class Isis_Rec {
78 /**
79 The associated database, if any. Better be an Isis_Db.
80 @var object $db
81 */
82 var $db = 0;
83 /**
84 The records MFN ("master file number", a.k.a. rowid).
85 A value of 0 means the record did not yet have a number in it's db,
86 a write will be handled as insert.
87 @var int $mfn
88 */
89 var $mfn = 0;
90 /**
91 Array of tags for the record. Keys and values better be integers
92 (others might be ignored or yield unexpected results in some contexts).
93 The count of the tag array is taken as length of the record.
94 The tag array may have holes (unassigned ints) if unset is used.
95 @var array $tag
96 */
97 var $tag;
98 /**
99 Array of values for the record. Values better be strings
100 (i.e. will be forced to strings by some operations).
101 @var array $tag
102 */
103 var $val;
104 /**
105 last tag accessed using function v
106 */
107 var $v = null;
108 /**
109 number of holes in record
110 */
111 var $holy = 0;
112
113
114
115 // ////////////////////////////////////////////////////////////
116 // static
117 //
118
119 /** static function to format a value.
120 maybe used as Isis_Rec::fmt.
121 @param string $val a value to be formatted
122 @param string $fmt a format specification, defaults to 'MHL'
123 - if $fmt is null,
124 the value is returned unchanged
125 - if $fmt starts with '&' or '%',
126 that character is stripped and htmlspecialchars or urlencode, resp.,
127 is applied to the (each) value as last step
128 - if $fmt starts with 'MHL' (the constant ISIS_REC_MHL),
129 that is stripped and the classical MHL ISIS formatting applied to values
130 (before & or % mangling, rarely needed with subfields)
131 - if $fmt is (now) empty,
132 the complete value is used
133 - else we're going for subfields:
134 - if $fmt starts with a hat,
135 the hat is stripped an used as subfield delimiter (TAB otherwise)
136 - if $fmt matches /^([^(]*)\((\d*)(\.\.(\d*))?/,
137 the part from the first '(' on is stripped as occurence selector
138 (note that an optional closing ')' and additional chars are ignored)
139 - the (remaining) characters in $fmt are subfield names,
140 '*' selects any subfield
141 (including the initial unnamed, even if it's empty !),
142 '' any without stripping subfield names
143 - if there is a occurence or range selected,
144 for every character in the $fmt the specified occurences are used
145 (counted from 0). If either bound is empty, 0 is used.
146 If a range is specified (.. given), an upper bound of 0 means up to end.
147 By default, only the first occurence (0) is used.
148 @return if there is only a single character (remaining) in $fmt,
149 and only a single occurence selectedi (no ..), a string is returned.
150 Else you've been asking for an array,
151 and so an array is returned even if it contains only a single value.
152 If more than one subfield name was specified or the '*',
153 the names are used ('' for the initial).
154 If a range is selected, the index is used (in addition).
155 Fields are added by first looping over subfield names,
156 then occurences. PHP may or may not loop the array in that order.
157 Example:
158 '^ab' gives keys 'a' and 'b', 'b(1..' gives keys '1','2'...m
159 '^cab(..', gives 'c0', 'c1', ... 'a0', ...
160 */
161 function fmt ( $val, $fmt = ISIS_REC_MHL )
162 {
163 global $ISIS_REC_MHL_PAT, $ISIS_REC_MHL_REPL;
164 if ( null == $fmt )
165 return $val;
166 if ( '&' != $fmt{0} && '%' != $fmt{0} )
167 $mode = '';
168 else {
169 $mode = '&' == $fmt{0} ? 'htmlspecialchars' : 'urlencode';
170 $fmt = substr($fmt,1);
171 }
172 if ($mhl = ('M' == $fmt{0} && ISIS_REC_MHL == substr($fmt,0,3)) )
173 $fmt = substr($fmt,3);
174 if ( '' == $fmt ) {
175 if ( $mhl )
176 $val = preg_replace($ISIS_REC_MHL_PAT, $ISIS_REC_MHL_REPL, $val);
177 return $mode ? $mode($val) : $val;
178 }
179 if ( '^' != $fmt{0} )
180 $sep = "\t";
181 else {
182 $sep = '^';
183 $fmt = substr($fmt,1);
184 }
185 $sub = explode($sep,$val); // this really is not for performance ...
186 $n = count($sub);
187 if ( false !== strpos($fmt,'(') // quick check
188 && preg_match( '/^([^(]*)\((\d*)(\.\.(\d*))?/', $fmt, $m )
189 // actually matches everything with a ( in it
190 ) {
191 $fmt = $m[1]; // initial part
192 if (!($fst = $m[2]) )
193 $fst = 0;
194 if (!($range = $m[3]))
195 $lst = $fst;
196 elseif (!($lst = $m[4]))
197 $lst = $n;
198 } else
199 $fst = $range = 0;
200 $strip = $l = strlen($fmt);
201 if (!$strip) { // plain mode
202 $fmt = '*'; $l = 1;
203 }
204 if ( !($single = 1 == $l && !$range) )
205 $ret = array();
206 // echo "sub '$fmt'($fst..$lst) range $range single $single strip $strip n $n\n";
207 for ($i = 0; $i < $l; $i++ ) {
208 $s = $fmt{$i};
209 $sel = '*' == $s ? 0 : 1;
210 $o = -1;
211 for ($j = $sel; $j < $n; $j++ ) {
212 // echo "test '$s'($fst..$lst) $o\n";
213 if ( ($sel && $s != $sub[$j]{0}) || ++$o < $fst )
214 continue;
215 if ( $lst < $o )
216 break;
217 // ok, it's in range
218 $v = ($strip && $j) ? substr($sub[$j],1) : $sub[$j];
219 if ( $mhl )
220 $v = preg_replace($ISIS_REC_MHL_PAT, $ISIS_REC_MHL_REPL, $v);
221 if ( $mode )
222 $v = $mode($v);
223 if ( $single )
224 return $v;
225 $key = $sel ? (1 == $l ? '' : $s)
226 : $strip && $j ? $sub[$j]{0} : '';
227 if ($range) $key .= $o;
228 $ret[$key] = $v;
229 if ( !$range )
230 break;
231 }
232 }
233 return $ret;
234 }
235
236
237 // ////////////////////////////////////////////////////////////
238 // ctor
239 //
240
241 /**
242 create an ISIS record.
243 @return object Isis_Rec a new ISIS record
244 */
245 function Isis_Rec ()
246 {
247 $this->tag = array();
248 $this->val = array();
249 if ( func_num_args() )
250 $this->add( func_get_args() );
251 }
252
253
254
255 // ////////////////////////////////////////////////////////////
256 // getters
257 //
258
259 /**
260 @return the number of fields
261 */
262 function len ()
263 {
264 return count($this->tag);
265 }
266
267
268 /**
269 try to look up non-numeric tags in the fdt
270 */
271 function fdt ( &$tag )
272 {
273 if ( !is_int($tag) ) {
274 if (is_numeric($tag))
275 $tag = (int)$tag;
276 else if ($this->db && $this->db->fdt && is_int($this->db->fdt[$tag]))
277 $tag = $this->db->fdt[$tag];
278 }
279 }
280
281
282 /**
283 reset tag and val array pointers
284 */
285 function res ()
286 {
287 reset($this->tag);
288 reset($this->val);
289 }
290
291
292 /** get all values for tag as array
293 */
294 function get ( $tag )
295 {
296 $ret = array();
297 $this->fdt($tag);
298 reset($this->tag);
299 while (list($p,$t) = each($this->tag))
300 if ( $tag == $t )
301 $ret[] = $this->val[$p];
302 return $ret;
303 }
304
305
306 /** v is for value
307 -- get the value of the next occurence of tag in the record.
308 The position is reset by res or when using v with a different tag.
309 @param mixed $fmt format to apply
310 - if $fmt is null (or the value is null),
311 the value is returned unmodified
312 - if $fmt is a string,
313 the value is formatted by Isis_Rec::fmt (returns string or array).
314 - if $fmt is 0,
315 a new Isis_Rec is used as $fmt
316 - if $fmt is an object, it is assumed to be a record,
317 into which a subrecord (as of embed) is to be extracted.
318 This also advances the loop position to after the child fields.
319 The record is returned.
320 - other values of $fmt are reserved for future extensions
321 and currently return null.
322 */
323 function v ( $tag = null, $fmt = null )
324 {
325 if ( !is_int($tag) )
326 $this->fdt($tag);
327 if ( $this->v != $tag ) {
328 reset($this->tag); // no each on val needed
329 $this->v = $tag;
330 }
331 /*
332 This not working in 4.3.1 was due to an earlier print_r on tag,
333 which completely corrupts the data (prob one of the 282 print_r bugs).
334 So don't print_r.
335 */
336 $v = null;
337 if ( is_null($tag) ) {
338 if (list($p,$t) = each($this->tag))
339 $v = $this->val[$p];
340 } else
341 while (list($p,$t) = each($this->tag))
342 if ( $tag == $t ) {
343 $v = $this->val[$p];
344 break;
345 }
346 if (is_null($v) || is_null($fmt))
347 return $v;
348 if ( is_string($fmt) )
349 return Isis_Rec::fmt($v,$fmt);
350 if ( 0 === $fmt )
351 $fmt = new Isis_Rec();
352 if ( is_object($fmt) ) {
353 for ( $i = (int)$v; $i-- && list($p,$t) = each($this->tag); ) {
354 $fmt->tag[] = $t;
355 $fmt->val[] = $this->val[$p];
356 }
357 return $fmt;
358 }
359 return null;
360 }
361
362
363 /** same as v($tag,'&'.$fmt)
364 */
365 function h ( $tag = null, $fmt = '' )
366 {
367 return $this->v($tag,'&'.$fmt);
368 }
369
370
371 /** same as v($tag,'&MHL')
372 */
373 function mhl ( $tag = null )
374 {
375 return $this->v($tag,'&MHL');
376 }
377
378
379
380 // ////////////////////////////////////////////////////////////
381 // setters
382 //
383
384 /**
385 append a new field (tag-value-pair) to the end of the record.
386 @param int $tag tag to use in the field.
387 It is not enforced that tag is an integer.
388 @param string $val the new fields value.
389 The string type is not enforced here.
390 @return the new value
391 */
392 function append ( $tag, $val )
393 {
394 // echo "0\tappending $tag ",gettype($val),"\n";
395 if ( is_string($val) || is_numeric($val) ) {
396 $this->tag[] = $tag;
397 $this->val[] = $val;
398 } elseif ( is_array($val) ) {
399 foreach ($val as $v)
400 $this->append($tag, $v);
401 } elseif ( is_object($val) )
402 $this->embed( $tag, $val );
403 return $val;
404 }
405
406 /**
407 add an array to the record.
408 @param mixed $argv an array either as a single parameter
409 or as a variable number of arguments.
410 The array is processed as follows:
411 - if an item is an int, it is appended, with the following item as value.
412 - if an item is an array, add is called recursively on this array
413 - if an item is '-db' or '-mfn', the corresponding properties are set
414 - if we have an fdt which maps the item to an int,
415 the following item as value is added with the tag given by the fdt.
416 - if an item is ISIS_REC_TEXT, the following item is parsed in text mode.
417 - else the item is parsed in standard mode.
418 */
419 function add ( $argv )
420 {
421 $added = 0;
422 $fdt = $this->db ? $this->db->fdt : null;
423 if (1 < func_num_args() || !is_array($argv))
424 $argv = func_get_args();
425 // standardized to having an array
426 for ( $i = reset($argv); $i || !is_bool($i); $i = next($argv) ) {
427 if ( is_int($i) ) {
428 if ( !is_null($this->append($i, next($argv))) )
429 $added++;
430 } elseif ( is_array($i) )
431 $added += $this->add($i);
432 elseif ( '-mfn' == $i )
433 $this->mfn = next($argv);
434 elseif ( '-db' == $i ) {
435 // they don't grok $fdt = ($this->db = next($argv))->fdt;
436 $this->db = next($argv);
437 $fdt = $this->db->fdt;
438 } elseif ($fdt && is_int($fdt[$i])) {
439 if ( !is_null($this->append($fdt[$i], next($argv))) )
440 $added++;
441 } elseif ( ISIS_REC_TEXT == $i )
442 $added += $this->parse( next($argv), ISIS_REC_TEXT );
443 else
444 $added += $this->parse( $i );
445 }
446 }
447
448
449 function addglobals ()
450 {
451 if ( $this->db && $this->db->fdt )
452 foreach ($this->db->fdt as $name => $tag)
453 $this->append($tag, $GLOBALS[$name]);
454 }
455
456 /** recompact after unsetting
457 */
458 function pack ()
459 {
460 $this->tag = array_values( $this->tag );
461 $this->val = array_values( $this->val );
462 $this->holy = 0;
463 }
464
465
466 /** remove a field at given pos, poking a hole in the field list.
467 */
468 function rm ( $pos, $pack = FALSE )
469 {
470 unset( $this->tag[$pos] );
471 unset( $this->val[$pos] );
472 if ( $pack )
473 $this->pack();
474 else
475 $this->holy++;
476 }
477
478
479 /** remove all fields or all with a given tag.
480 */
481 function del ( $tag = null, $pack = FALSE )
482 {
483 if ( is_null($tag) ) {
484 $this->tag = array();
485 $this->val = array();
486 } else {
487 if ( !is_int($tag) )
488 $this->fdt($tag);
489 foreach ($this->tag as $p => $t)
490 if ($tag == $t)
491 $this->rm($p);
492 if ( $pack )
493 $this->pack();
494 }
495 }
496
497
498 /** set fields with tag to values.
499 set( 42, 'foo', 'bar', 'baz' ) will change
500 the first three occurences of 42 to 'foo', 'bar' and 'baz', resp.
501 - if there are less than three occurences,
502 the remaining values are appended
503 - if there are more than three occurences,
504 the remaining occurences are deleted
505 - if a value is the integer 0,
506 processing stops (i.e. remaining occurences are left unchanged)
507 - if a value is a positive integer n,
508 processing skips n occurences (letting them unchanged)
509 - if a value is an array,
510 it's elements are used (it is flattened out non-recursively)
511 @param mixed $tag tag by int or name
512 @param mixed values... variable number of values
513 */
514 function set ( $tag )
515 {
516 if ( !is_int($tag) )
517 $this->fdt($tag);
518 $argv = func_get_args();
519 reset($argv);
520 each($argv); // eat $tag
521 reset($this->tag);
522 $ary = null;
523 $more = 1;
524 for (;;) {
525 if ( $ary ) {
526 if ( !(list($k,$arg) = each($ary)) ) {
527 $ary = null;
528 continue;
529 }
530 } else {
531 if ( !(list($k,$arg) = each($argv)) )
532 break;
533 if ( is_array($arg) ) {
534 $ary = $arg;
535 reset($ary);
536 continue;
537 }
538 }
539 echo "setting '$arg'\n";
540 if (is_int($arg)) {
541 if (!$arg)
542 return;
543 while ($more && 0 < $arg--)
544 while (($more = (list($k,$v) = each($this->tag))) && $tag != $v)
545 ;
546 continue;
547 }
548 // now arg is a value to set/add
549 if ($more) {
550 while (($more = (list($k,$v) = each($this->tag))) && $tag != $v)
551 ;
552 if ($more) {
553 $this->val[$k] = $arg;
554 continue;
555 }
556 }
557 $this->append($tag,$arg);
558 }
559 if ( $more )
560 while (list($k,$v) = each($this->tag))
561 if ($tag == $v)
562 $this->rm($k);
563 }
564
565
566 /* transparently embed a record.
567 */
568 function embed ( $tag, $that )
569 {
570 $this->append( $tag, $i = $that->len() );
571 for ( $t = reset($that->tag), $v = reset($that->val);
572 $i--;
573 $t = next($that->tag), $v = next($that->val)
574 ) {
575 $this->tag[] = $t;
576 $this->val[] = $v;
577 }
578 }
579
580
581 // ////////////////////////////////////////////////////////////
582 // conversion to/from other representations
583 //
584
585 /**
586 serialize record to a string.
587 After each field, including the last one, a newline is added.
588 @param string $mode replacement value for newlines.
589 suggested is one of the predefined constants.
590 defaults to ISIS_REC_BIN.
591 @return the string representation of the record
592 */
593 function toString ( $mode = ISIS_REC_BIN )
594 {
595 $s = '';
596 // PHP is so braindead in almost every possible aspect,
597 // they even managed to get the iterator wrong ...
598 // why in heaven is next a pre-increment ???
599 $t = reset($this->tag);
600 $v = reset($this->val);
601 for ( $i = count($this->tag);
602 $i--;
603 $t = next($this->tag), $v = next($this->val)
604 )
605 $s .= $t . "\t" . str_replace("\n",$mode,$v) . "\n";
606 return $s;
607 }
608
609
610 /** parse text as record fields to add.
611 For each non-empty line, initial digits are used as tag (empty == 0),
612 an optional following tab is skipped, and the rest used as value,
613 after replacing $repl, if given, with newlines.
614 If the line starts with a tab and the record is not empty,
615 a newline and the value are appended to the last field,
616 else a new field is appended to the record.
617 @param string $repl string to be converted back to newlines.
618 use ISIS_REC_TEXT, if you know text is from toString(ISIS_REC_TEXT)
619 @return number of fields added
620 */
621 function parse ( $text, $repl = null )
622 {
623 // need compact array in order to reliably know last index
624 $conv = 0;
625 if ( $this->holy )
626 $this->pack();
627 foreach (explode("\n",$text) as $line) {
628 if ('' == $line) // blank line or trailing newline
629 continue;
630 $dig = strspn($line,'0123456789-');
631 $t = $dig ? (int)substr($line,0,$dig) : 0;
632 $v = substr( $line, $dig + ("\t" == $line{$dig} ? 1 : 0) );
633 if ( $repl )
634 $v = str_replace($repl,"\n",$v);
635 if ("\t" == $line{0}) { // continuation line
636 // need compact array in order to reliably know last index
637 if ( $this->holy )
638 $this->pack();
639 if ($c = count($this->val)) {
640 $this->val[$c-1] .= "\n".$v;
641 continue;
642 }
643 }
644 $this->tag[] = $t;
645 $this->val[] = $v;
646 $conv++;
647 }
648 return $conv;
649 } // parse
650
651
652 /**
653 return the "data fork" of this record by mapping a function
654 to tags and values in parallel.
655 @param function $func defaults to null,
656 resulting in an array of fields, each an array [0] => $tag, [1] => $val.
657 @return a new array as of array_map
658 */
659 function map ($func = null)
660 {
661 return array_map($func,$this->tag,$this->val);
662 }
663 } // class Isis_Rec
664 ?>

  ViewVC Help
Powered by ViewVC 1.1.26