/[Grep]/lib/Grep/Import/ScrapBook.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /lib/Grep/Import/ScrapBook.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 154 - (show annotations)
Sun Jun 10 18:41:00 2007 UTC (16 years, 10 months ago) by dpavlin
File size: 2503 byte(s)
starting to extend Grep so it can import data from local filesystem (for now
pages from ScrapBook fireFox plugin) and make them searchable
1 #!/usr/bin/perl
2
3 use warnings;
4 use strict;
5
6 package Grep::Import::ScrapBook;
7
8 =head1 NAME
9
10 Grep::Import::ScrapBook - importer for local ScrapBook pages
11
12 =head1 CONFIGURATION
13
14 You can symlink your ScrapBook directory
15
16 ~/Grep/share/web/static$ ln -sf /home/dpavlin/private/ScrapBook scrapbook
17
18 or modify L<ScrapBookDir> path (relative to Grep installation static root).
19
20 =head1 SEE ALSO
21
22 L<http://amb.vis.ne.jp/mozilla/scrapbook/> - ScrapBook FireFox extension
23
24 =cut
25
26 use XML::Simple;
27 use File::Slurp;
28 use Data::Dump qw/dump/;
29
30 sub import {
31 my $self = shift;
32 my $search = shift or die "need search";
33 die "search is ", ref($search), " and not Grep::Search" unless ($search->isa('Grep::Search'));
34
35 my $dir =
36 Jifty::Util->app_root . '/' .
37 Jifty->config->framework('Web')->{'StaticRoot'} . '/' .
38 Jifty->config->app('ScrapBookDir');
39
40 my $path = $dir . '/scrapbook.rdf';
41 $path =~ s!//+!/!g;
42
43 if ( ! -e $dir || ! -e $path ) {
44 Jifty->log->warn("Skipping ScrapBook import $path: $!");
45 return 1;
46 }
47
48 my $rdf = XMLin(
49 $path,
50 # KeyAttr => [ qw/RDF:about/ ],
51 ) || die "can't open $path: $!";
52
53 # warn "## original rdf -> ", dump( $rdf );
54
55 my $feed = Grep::Model::Feed->new();
56 $feed->load_or_create(
57 uri => 'file://' . $path,
58 title => 'ScrapBook',
59 #source => 'Grep::Source',
60 );
61
62 foreach my $item ( @{ $rdf->{'RDF:Description'} } ) {
63
64 warn "## item = ",dump( $item );
65
66 my $hash;
67 foreach my $k ( keys %$item ) {
68 next if $k =~ m/^RDF:/;
69 next if ( $item->{$k} eq '' );
70 my $n = $k;
71 $n =~ s/^\w+://; # strip namespace
72 $hash->{$n} = $item->{$k};
73 }
74
75 warn "## hash = ", dump( $hash );
76
77
78 # fetch full-text content and import it
79
80 my $content_path = $dir . '/data/' . $hash->{id} . '/index.html';
81 if ( ! -r $content_path ) {
82 Jifty->log->warn("can't import $content_path: $!");
83 next;
84 }
85 my $content = read_file( $content_path ) or
86 die "can't read $content_path: $!";
87
88
89 # create date from id
90
91 my $dt;
92 if ( $hash->{id} =~ m/^(\d{4})(\d\d)(\d\d)(\d\d)(\d\d)(\d\d)$/ ) {
93 $dt = DateTime->new(
94 year => $1,
95 month => $2,
96 day => $3,
97 hour => $4,
98 minute => $5,
99 second => $6,
100 #time_zone => 'UTC',
101 );
102 } else {
103 warn "can't parse date from ", $hash->{id};
104 }
105
106
107 my $i = Grep::Model::Item->new();
108 my ($id,$msg) = $i->load_or_create(
109 in_feed => $feed,
110 title => $hash->{title},
111 link => $hash->{source},
112 content => $content,
113 issued => $hash->{id},
114 );
115
116
117 warn ">> item $id $msg\n";
118
119 }
120
121 return 1;
122 }
123
124
125 1;

  ViewVC Help
Powered by ViewVC 1.1.26