/[webpac]/openisis/current/php/Isis/Rec.php
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /openisis/current/php/Isis/Rec.php

Parent Directory Parent Directory | Revision Log Revision Log


Revision 237 - (hide annotations)
Mon Mar 8 17:43:12 2004 UTC (20 years, 2 months ago) by dpavlin
File size: 18392 byte(s)
initial import of openisis 0.9.0 vendor drop

1 dpavlin 237 <?php
2     /*
3     OpenIsis - an open implementation of the CDS/ISIS database
4     Version 0.8.x (patchlevel see file Version)
5     Copyright (C) 2001-2003 by Erik Grziwotz, erik@openisis.org
6    
7     This library is free software; you can redistribute it and/or
8     modify it under the terms of the GNU Lesser General Public
9     License as published by the Free Software Foundation; either
10     version 2.1 of the License, or (at your option) any later version.
11    
12     This library is distributed in the hope that it will be useful,
13     but WITHOUT ANY WARRANTY; without even the implied warranty of
14     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15     Lesser General Public License for more details.
16    
17     You should have received a copy of the GNU Lesser General Public
18     License along with this library; if not, write to the Free Software
19     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20    
21     see README for more information
22     EOH */
23    
24     // $Id: Rec.php,v 1.10 2003/06/10 11:57:16 kripke Exp $
25    
26     /** FIELD mode replaces newlines with tabs.
27     On deserializing, these tabs are not converted back to newline.
28     Do not use if you need to retain newline information.
29     */
30     define( 'ISIS_REC_FIELD', "\t" );
31     /** BIN mode replaces newlines with a newline-tab sequence.
32     On deserializing, any newline-tab sequence is converted back to a newline.
33     This transparently retains even binary data.
34     */
35     define( 'ISIS_REC_BIN', "\n\t" );
36     /** TEXT mode replaces newlines with vertical tabs.
37     Vertical tabs are converted back to newlines only when explicitly
38     deserializing in TEXT mode, since it's not transparent to binary data.
39     */
40     define( 'ISIS_REC_TEXT', "\013" );
41     /**
42     the basic ISIS formatting mode.
43     "Data Mode" MDL doesn't make a significant difference,
44     and uppercase can better be achieved using strtoupper
45     (given the right locale ...).
46     */
47     define( 'ISIS_REC_MHL', 'MHL' );
48     // sorry, no other way to save this ...
49     $GLOBALS['ISIS_REC_MHL_PAT'] = array(
50     '/(<[^=>]*)=[^>]+>/', # dump <a=b> substitutions
51     '/></', # replace >< pairs
52     '/[><]/', # nuke other ><
53     '/^\^./', # kill initial subfield spec
54     '/\^a/', # ^a -> ;
55     '/\^[b-i]/', # ^[b-i] -> ,
56     '/\^./' # others -> .
57     );
58     $GLOBALS['ISIS_REC_MHL_REPL'] = array(
59     '\1>', '; ', '', '', '; ', ', ', '. '
60     );
61    
62     /**
63     An ISIS(/IIF/Z39.2/ISO2709)-style record in pure PHP.
64     First of all: Use Tcl, Java, Perl or plain C,
65     but please don't use PHP -- it's a mess.
66    
67     This is only loosely connected to an Isis Database,
68     most functions can be used without having a DB.
69    
70     Note: Most getter methods work regardless of the type of tags.
71     However, the setters are strongly biased towards numeric tags.
72    
73     @version $Revision: 1.10 $
74     @license LGPL
75     @package Isis
76     */
77     class Isis_Rec {
78     /**
79     The associated database, if any. Better be an Isis_Db.
80     @var object $db
81     */
82     var $db = 0;
83     /**
84     The records MFN ("master file number", a.k.a. rowid).
85     A value of 0 means the record did not yet have a number in it's db,
86     a write will be handled as insert.
87     @var int $mfn
88     */
89     var $mfn = 0;
90     /**
91     Array of tags for the record. Keys and values better be integers
92     (others might be ignored or yield unexpected results in some contexts).
93     The count of the tag array is taken as length of the record.
94     The tag array may have holes (unassigned ints) if unset is used.
95     @var array $tag
96     */
97     var $tag;
98     /**
99     Array of values for the record. Values better be strings
100     (i.e. will be forced to strings by some operations).
101     @var array $tag
102     */
103     var $val;
104     /**
105     last tag accessed using function v
106     */
107     var $v = null;
108     /**
109     number of holes in record
110     */
111     var $holy = 0;
112    
113    
114    
115     // ////////////////////////////////////////////////////////////
116     // static
117     //
118    
119     /** static function to format a value.
120     maybe used as Isis_Rec::fmt.
121     @param string $val a value to be formatted
122     @param string $fmt a format specification, defaults to 'MHL'
123     - if $fmt is null,
124     the value is returned unchanged
125     - if $fmt starts with '&' or '%',
126     that character is stripped and htmlspecialchars or urlencode, resp.,
127     is applied to the (each) value as last step
128     - if $fmt starts with 'MHL' (the constant ISIS_REC_MHL),
129     that is stripped and the classical MHL ISIS formatting applied to values
130     (before & or % mangling, rarely needed with subfields)
131     - if $fmt is (now) empty,
132     the complete value is used
133     - else we're going for subfields:
134     - if $fmt starts with a hat,
135     the hat is stripped an used as subfield delimiter (TAB otherwise)
136     - if $fmt matches /^([^(]*)\((\d*)(\.\.(\d*))?/,
137     the part from the first '(' on is stripped as occurence selector
138     (note that an optional closing ')' and additional chars are ignored)
139     - the (remaining) characters in $fmt are subfield names,
140     '*' selects any subfield
141     (including the initial unnamed, even if it's empty !),
142     '' any without stripping subfield names
143     - if there is a occurence or range selected,
144     for every character in the $fmt the specified occurences are used
145     (counted from 0). If either bound is empty, 0 is used.
146     If a range is specified (.. given), an upper bound of 0 means up to end.
147     By default, only the first occurence (0) is used.
148     @return if there is only a single character (remaining) in $fmt,
149     and only a single occurence selectedi (no ..), a string is returned.
150     Else you've been asking for an array,
151     and so an array is returned even if it contains only a single value.
152     If more than one subfield name was specified or the '*',
153     the names are used ('' for the initial).
154     If a range is selected, the index is used (in addition).
155     Fields are added by first looping over subfield names,
156     then occurences. PHP may or may not loop the array in that order.
157     Example:
158     '^ab' gives keys 'a' and 'b', 'b(1..' gives keys '1','2'...m
159     '^cab(..', gives 'c0', 'c1', ... 'a0', ...
160     */
161     function fmt ( $val, $fmt = ISIS_REC_MHL )
162     {
163     global $ISIS_REC_MHL_PAT, $ISIS_REC_MHL_REPL;
164     if ( null == $fmt )
165     return $val;
166     if ( '&' != $fmt{0} && '%' != $fmt{0} )
167     $mode = '';
168     else {
169     $mode = '&' == $fmt{0} ? 'htmlspecialchars' : 'urlencode';
170     $fmt = substr($fmt,1);
171     }
172     if ($mhl = ('M' == $fmt{0} && ISIS_REC_MHL == substr($fmt,0,3)) )
173     $fmt = substr($fmt,3);
174     if ( '' == $fmt ) {
175     if ( $mhl )
176     $val = preg_replace($ISIS_REC_MHL_PAT, $ISIS_REC_MHL_REPL, $val);
177     return $mode ? $mode($val) : $val;
178     }
179     if ( '^' != $fmt{0} )
180     $sep = "\t";
181     else {
182     $sep = '^';
183     $fmt = substr($fmt,1);
184     }
185     $sub = explode($sep,$val); // this really is not for performance ...
186     $n = count($sub);
187     if ( false !== strpos($fmt,'(') // quick check
188     && preg_match( '/^([^(]*)\((\d*)(\.\.(\d*))?/', $fmt, $m )
189     // actually matches everything with a ( in it
190     ) {
191     $fmt = $m[1]; // initial part
192     if (!($fst = $m[2]) )
193     $fst = 0;
194     if (!($range = $m[3]))
195     $lst = $fst;
196     elseif (!($lst = $m[4]))
197     $lst = $n;
198     } else
199     $fst = $range = 0;
200     $strip = $l = strlen($fmt);
201     if (!$strip) { // plain mode
202     $fmt = '*'; $l = 1;
203     }
204     if ( !($single = 1 == $l && !$range) )
205     $ret = array();
206     // echo "sub '$fmt'($fst..$lst) range $range single $single strip $strip n $n\n";
207     for ($i = 0; $i < $l; $i++ ) {
208     $s = $fmt{$i};
209     $sel = '*' == $s ? 0 : 1;
210     $o = -1;
211     for ($j = $sel; $j < $n; $j++ ) {
212     // echo "test '$s'($fst..$lst) $o\n";
213     if ( ($sel && $s != $sub[$j]{0}) || ++$o < $fst )
214     continue;
215     if ( $lst < $o )
216     break;
217     // ok, it's in range
218     $v = ($strip && $j) ? substr($sub[$j],1) : $sub[$j];
219     if ( $mhl )
220     $v = preg_replace($ISIS_REC_MHL_PAT, $ISIS_REC_MHL_REPL, $v);
221     if ( $mode )
222     $v = $mode($v);
223     if ( $single )
224     return $v;
225     $key = $sel ? (1 == $l ? '' : $s)
226     : $strip && $j ? $sub[$j]{0} : '';
227     if ($range) $key .= $o;
228     $ret[$key] = $v;
229     if ( !$range )
230     break;
231     }
232     }
233     return $ret;
234     }
235    
236    
237     // ////////////////////////////////////////////////////////////
238     // ctor
239     //
240    
241     /**
242     create an ISIS record.
243     @return object Isis_Rec a new ISIS record
244     */
245     function Isis_Rec ()
246     {
247     $this->tag = array();
248     $this->val = array();
249     if ( func_num_args() )
250     $this->add( func_get_args() );
251     }
252    
253    
254    
255     // ////////////////////////////////////////////////////////////
256     // getters
257     //
258    
259     /**
260     @return the number of fields
261     */
262     function len ()
263     {
264     return count($this->tag);
265     }
266    
267    
268     /**
269     try to look up non-numeric tags in the fdt
270     */
271     function fdt ( &$tag )
272     {
273     if ( !is_int($tag) ) {
274     if (is_numeric($tag))
275     $tag = (int)$tag;
276     else if ($this->db && $this->db->fdt && is_int($this->db->fdt[$tag]))
277     $tag = $this->db->fdt[$tag];
278     }
279     }
280    
281    
282     /**
283     reset tag and val array pointers
284     */
285     function res ()
286     {
287     reset($this->tag);
288     reset($this->val);
289     }
290    
291    
292     /** get all values for tag as array
293     */
294     function get ( $tag )
295     {
296     $ret = array();
297     $this->fdt($tag);
298     reset($this->tag);
299     while (list($p,$t) = each($this->tag))
300     if ( $tag == $t )
301     $ret[] = $this->val[$p];
302     return $ret;
303     }
304    
305    
306     /** v is for value
307     -- get the value of the next occurence of tag in the record.
308     The position is reset by res or when using v with a different tag.
309     @param mixed $fmt format to apply
310     - if $fmt is null (or the value is null),
311     the value is returned unmodified
312     - if $fmt is a string,
313     the value is formatted by Isis_Rec::fmt (returns string or array).
314     - if $fmt is 0,
315     a new Isis_Rec is used as $fmt
316     - if $fmt is an object, it is assumed to be a record,
317     into which a subrecord (as of embed) is to be extracted.
318     This also advances the loop position to after the child fields.
319     The record is returned.
320     - other values of $fmt are reserved for future extensions
321     and currently return null.
322     */
323     function v ( $tag = null, $fmt = null )
324     {
325     if ( !is_int($tag) )
326     $this->fdt($tag);
327     if ( $this->v != $tag ) {
328     reset($this->tag); // no each on val needed
329     $this->v = $tag;
330     }
331     /*
332     This not working in 4.3.1 was due to an earlier print_r on tag,
333     which completely corrupts the data (prob one of the 282 print_r bugs).
334     So don't print_r.
335     */
336     $v = null;
337     if ( is_null($tag) ) {
338     if (list($p,$t) = each($this->tag))
339     $v = $this->val[$p];
340     } else
341     while (list($p,$t) = each($this->tag))
342     if ( $tag == $t ) {
343     $v = $this->val[$p];
344     break;
345     }
346     if (is_null($v) || is_null($fmt))
347     return $v;
348     if ( is_string($fmt) )
349     return Isis_Rec::fmt($v,$fmt);
350     if ( 0 === $fmt )
351     $fmt = new Isis_Rec();
352     if ( is_object($fmt) ) {
353     for ( $i = (int)$v; $i-- && list($p,$t) = each($this->tag); ) {
354     $fmt->tag[] = $t;
355     $fmt->val[] = $this->val[$p];
356     }
357     return $fmt;
358     }
359     return null;
360     }
361    
362    
363     /** same as v($tag,'&'.$fmt)
364     */
365     function h ( $tag = null, $fmt = '' )
366     {
367     return $this->v($tag,'&'.$fmt);
368     }
369    
370    
371     /** same as v($tag,'&MHL')
372     */
373     function mhl ( $tag = null )
374     {
375     return $this->v($tag,'&MHL');
376     }
377    
378    
379    
380     // ////////////////////////////////////////////////////////////
381     // setters
382     //
383    
384     /**
385     append a new field (tag-value-pair) to the end of the record.
386     @param int $tag tag to use in the field.
387     It is not enforced that tag is an integer.
388     @param string $val the new fields value.
389     The string type is not enforced here.
390     @return the new value
391     */
392     function append ( $tag, $val )
393     {
394     // echo "0\tappending $tag ",gettype($val),"\n";
395     if ( is_string($val) || is_numeric($val) ) {
396     $this->tag[] = $tag;
397     $this->val[] = $val;
398     } elseif ( is_array($val) ) {
399     foreach ($val as $v)
400     $this->append($tag, $v);
401     } elseif ( is_object($val) )
402     $this->embed( $tag, $val );
403     return $val;
404     }
405    
406     /**
407     add an array to the record.
408     @param mixed $argv an array either as a single parameter
409     or as a variable number of arguments.
410     The array is processed as follows:
411     - if an item is an int, it is appended, with the following item as value.
412     - if an item is an array, add is called recursively on this array
413     - if an item is '-db' or '-mfn', the corresponding properties are set
414     - if we have an fdt which maps the item to an int,
415     the following item as value is added with the tag given by the fdt.
416     - if an item is ISIS_REC_TEXT, the following item is parsed in text mode.
417     - else the item is parsed in standard mode.
418     */
419     function add ( $argv )
420     {
421     $added = 0;
422     $fdt = $this->db ? $this->db->fdt : null;
423     if (1 < func_num_args() || !is_array($argv))
424     $argv = func_get_args();
425     // standardized to having an array
426     for ( $i = reset($argv); $i || !is_bool($i); $i = next($argv) ) {
427     if ( is_int($i) ) {
428     if ( !is_null($this->append($i, next($argv))) )
429     $added++;
430     } elseif ( is_array($i) )
431     $added += $this->add($i);
432     elseif ( '-mfn' == $i )
433     $this->mfn = next($argv);
434     elseif ( '-db' == $i ) {
435     // they don't grok $fdt = ($this->db = next($argv))->fdt;
436     $this->db = next($argv);
437     $fdt = $this->db->fdt;
438     } elseif ($fdt && is_int($fdt[$i])) {
439     if ( !is_null($this->append($fdt[$i], next($argv))) )
440     $added++;
441     } elseif ( ISIS_REC_TEXT == $i )
442     $added += $this->parse( next($argv), ISIS_REC_TEXT );
443     else
444     $added += $this->parse( $i );
445     }
446     }
447    
448    
449     function addglobals ()
450     {
451     if ( $this->db && $this->db->fdt )
452     foreach ($this->db->fdt as $name => $tag)
453     $this->append($tag, $GLOBALS[$name]);
454     }
455    
456     /** recompact after unsetting
457     */
458     function pack ()
459     {
460     $this->tag = array_values( $this->tag );
461     $this->val = array_values( $this->val );
462     $this->holy = 0;
463     }
464    
465    
466     /** remove a field at given pos, poking a hole in the field list.
467     */
468     function rm ( $pos, $pack = FALSE )
469     {
470     unset( $this->tag[$pos] );
471     unset( $this->val[$pos] );
472     if ( $pack )
473     $this->pack();
474     else
475     $this->holy++;
476     }
477    
478    
479     /** remove all fields or all with a given tag.
480     */
481     function del ( $tag = null, $pack = FALSE )
482     {
483     if ( is_null($tag) ) {
484     $this->tag = array();
485     $this->val = array();
486     } else {
487     if ( !is_int($tag) )
488     $this->fdt($tag);
489     foreach ($this->tag as $p => $t)
490     if ($tag == $t)
491     $this->rm($p);
492     if ( $pack )
493     $this->pack();
494     }
495     }
496    
497    
498     /** set fields with tag to values.
499     set( 42, 'foo', 'bar', 'baz' ) will change
500     the first three occurences of 42 to 'foo', 'bar' and 'baz', resp.
501     - if there are less than three occurences,
502     the remaining values are appended
503     - if there are more than three occurences,
504     the remaining occurences are deleted
505     - if a value is the integer 0,
506     processing stops (i.e. remaining occurences are left unchanged)
507     - if a value is a positive integer n,
508     processing skips n occurences (letting them unchanged)
509     - if a value is an array,
510     it's elements are used (it is flattened out non-recursively)
511     @param mixed $tag tag by int or name
512     @param mixed values... variable number of values
513     */
514     function set ( $tag )
515     {
516     if ( !is_int($tag) )
517     $this->fdt($tag);
518     $argv = func_get_args();
519     reset($argv);
520     each($argv); // eat $tag
521     reset($this->tag);
522     $ary = null;
523     $more = 1;
524     for (;;) {
525     if ( $ary ) {
526     if ( !(list($k,$arg) = each($ary)) ) {
527     $ary = null;
528     continue;
529     }
530     } else {
531     if ( !(list($k,$arg) = each($argv)) )
532     break;
533     if ( is_array($arg) ) {
534     $ary = $arg;
535     reset($ary);
536     continue;
537     }
538     }
539     echo "setting '$arg'\n";
540     if (is_int($arg)) {
541     if (!$arg)
542     return;
543     while ($more && 0 < $arg--)
544     while (($more = (list($k,$v) = each($this->tag))) && $tag != $v)
545     ;
546     continue;
547     }
548     // now arg is a value to set/add
549     if ($more) {
550     while (($more = (list($k,$v) = each($this->tag))) && $tag != $v)
551     ;
552     if ($more) {
553     $this->val[$k] = $arg;
554     continue;
555     }
556     }
557     $this->append($tag,$arg);
558     }
559     if ( $more )
560     while (list($k,$v) = each($this->tag))
561     if ($tag == $v)
562     $this->rm($k);
563     }
564    
565    
566     /* transparently embed a record.
567     */
568     function embed ( $tag, $that )
569     {
570     $this->append( $tag, $i = $that->len() );
571     for ( $t = reset($that->tag), $v = reset($that->val);
572     $i--;
573     $t = next($that->tag), $v = next($that->val)
574     ) {
575     $this->tag[] = $t;
576     $this->val[] = $v;
577     }
578     }
579    
580    
581     // ////////////////////////////////////////////////////////////
582     // conversion to/from other representations
583     //
584    
585     /**
586     serialize record to a string.
587     After each field, including the last one, a newline is added.
588     @param string $mode replacement value for newlines.
589     suggested is one of the predefined constants.
590     defaults to ISIS_REC_BIN.
591     @return the string representation of the record
592     */
593     function toString ( $mode = ISIS_REC_BIN )
594     {
595     $s = '';
596     // PHP is so braindead in almost every possible aspect,
597     // they even managed to get the iterator wrong ...
598     // why in heaven is next a pre-increment ???
599     $t = reset($this->tag);
600     $v = reset($this->val);
601     for ( $i = count($this->tag);
602     $i--;
603     $t = next($this->tag), $v = next($this->val)
604     )
605     $s .= $t . "\t" . str_replace("\n",$mode,$v) . "\n";
606     return $s;
607     }
608    
609    
610     /** parse text as record fields to add.
611     For each non-empty line, initial digits are used as tag (empty == 0),
612     an optional following tab is skipped, and the rest used as value,
613     after replacing $repl, if given, with newlines.
614     If the line starts with a tab and the record is not empty,
615     a newline and the value are appended to the last field,
616     else a new field is appended to the record.
617     @param string $repl string to be converted back to newlines.
618     use ISIS_REC_TEXT, if you know text is from toString(ISIS_REC_TEXT)
619     @return number of fields added
620     */
621     function parse ( $text, $repl = null )
622     {
623     // need compact array in order to reliably know last index
624     $conv = 0;
625     if ( $this->holy )
626     $this->pack();
627     foreach (explode("\n",$text) as $line) {
628     if ('' == $line) // blank line or trailing newline
629     continue;
630     $dig = strspn($line,'0123456789-');
631     $t = $dig ? (int)substr($line,0,$dig) : 0;
632     $v = substr( $line, $dig + ("\t" == $line{$dig} ? 1 : 0) );
633     if ( $repl )
634     $v = str_replace($repl,"\n",$v);
635     if ("\t" == $line{0}) { // continuation line
636     // need compact array in order to reliably know last index
637     if ( $this->holy )
638     $this->pack();
639     if ($c = count($this->val)) {
640     $this->val[$c-1] .= "\n".$v;
641     continue;
642     }
643     }
644     $this->tag[] = $t;
645     $this->val[] = $v;
646     $conv++;
647     }
648     return $conv;
649     } // parse
650    
651    
652     /**
653     return the "data fork" of this record by mapping a function
654     to tags and values in parallel.
655     @param function $func defaults to null,
656     resulting in an array of fields, each an array [0] => $tag, [1] => $val.
657     @return a new array as of array_map
658     */
659     function map ($func = null)
660     {
661     return array_map($func,$this->tag,$this->val);
662     }
663     } // class Isis_Rec
664     ?>

  ViewVC Help
Powered by ViewVC 1.1.26