/[Grep]/lib/Grep/Import/ScrapBook.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /lib/Grep/Import/ScrapBook.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 186 - (hide annotations)
Wed Apr 9 17:39:55 2008 UTC (16 years, 2 months ago) by dpavlin
File size: 4184 byte(s)
fix warnings
1 dpavlin 154 #!/usr/bin/perl
2    
3     use warnings;
4     use strict;
5    
6     package Grep::Import::ScrapBook;
7    
8     =head1 NAME
9    
10     Grep::Import::ScrapBook - importer for local ScrapBook pages
11    
12     =head1 CONFIGURATION
13    
14     You can symlink your ScrapBook directory
15    
16     ~/Grep/share/web/static$ ln -sf /home/dpavlin/private/ScrapBook scrapbook
17    
18     or modify L<ScrapBookDir> path (relative to Grep installation static root).
19    
20     =cut
21    
22     use XML::Simple;
23     use File::Slurp;
24 dpavlin 158 use HTML::ResolveLink;
25 dpavlin 159 use HTML::TreeBuilder;
26 dpavlin 154 use Data::Dump qw/dump/;
27    
28     sub import {
29     my $self = shift;
30    
31 dpavlin 162 my $config = Jifty->config->app('Import')->{'ScrapBook'};
32    
33     if (! $config ) {
34     Jifty->log->warn("skipping ScrapBook importer, no application->Import->ScrapBook config");
35     return;
36     }
37    
38     # required parametars in config.yml
39     foreach my $param ( qw/Dir OwnerEmail/ ) {
40     Jifty->log->die("can't find $param in Scrapbook config") unless defined ( $config->{$param} );
41     };
42    
43 dpavlin 154 my $dir =
44     Jifty::Util->app_root . '/' .
45 dpavlin 162 Jifty->config->framework('Web')->{'StaticRoot'} . '/' . $config->{'Dir'};
46 dpavlin 154
47     my $path = $dir . '/scrapbook.rdf';
48     $path =~ s!//+!/!g;
49    
50     if ( ! -e $dir || ! -e $path ) {
51     Jifty->log->warn("Skipping ScrapBook import $path: $!");
52     return 1;
53     }
54    
55     my $rdf = XMLin(
56     $path,
57     # KeyAttr => [ qw/RDF:about/ ],
58     ) || die "can't open $path: $!";
59    
60     # warn "## original rdf -> ", dump( $rdf );
61    
62 dpavlin 157 my $owner = Grep::Model::User->new();
63 dpavlin 162 $owner->load_by_cols( email => $config->{OwnerEmail} );
64     die "can't find ScrapBookOwner ", $config->{OwnerEmail} unless ( $owner->id );
65 dpavlin 157
66     Jifty->log->info( "Using user ", $owner->id, " from ", $owner->email, " for import" );
67    
68     my $feed = Grep::Model::Feed->new( current_user => $owner );
69 dpavlin 179
70 dpavlin 154 $feed->load_or_create(
71     uri => 'file://' . $path,
72     title => 'ScrapBook',
73     #source => 'Grep::Source',
74 dpavlin 157 owner => $owner,
75 dpavlin 154 );
76    
77 dpavlin 158 my $search = Grep::Search->new;
78    
79 dpavlin 155 my $stats;
80    
81 dpavlin 154 foreach my $item ( @{ $rdf->{'RDF:Description'} } ) {
82    
83 dpavlin 155 $stats->{total}++;
84 dpavlin 154
85 dpavlin 155 #warn "## item = ",dump( $item );
86    
87 dpavlin 154 my $hash;
88     foreach my $k ( keys %$item ) {
89     next if $k =~ m/^RDF:/;
90     next if ( $item->{$k} eq '' );
91     my $n = $k;
92     $n =~ s/^\w+://; # strip namespace
93     $hash->{$n} = $item->{$k};
94     }
95    
96 dpavlin 155 #warn "## hash = ", dump( $hash );
97 dpavlin 154
98    
99     # fetch full-text content and import it
100    
101 dpavlin 159 my $rel_path = '/data/' . $hash->{id} . '/index.html';
102    
103     my $content_path = $dir . $rel_path;
104 dpavlin 154 if ( ! -r $content_path ) {
105     Jifty->log->warn("can't import $content_path: $!");
106 dpavlin 155 $stats->{failure}++;
107 dpavlin 154 next;
108     }
109    
110     # create date from id
111    
112     my $dt;
113     if ( $hash->{id} =~ m/^(\d{4})(\d\d)(\d\d)(\d\d)(\d\d)(\d\d)$/ ) {
114 dpavlin 179 $dt = Jifty::DateTime->new(
115 dpavlin 154 year => $1,
116     month => $2,
117     day => $3,
118     hour => $4,
119     minute => $5,
120     second => $6,
121     #time_zone => 'UTC',
122     );
123     } else {
124 dpavlin 177 die "can't parse date from ", $hash->{id};
125 dpavlin 154 }
126    
127 dpavlin 157 my $i = Grep::Model::Item->new( current_user => $owner );
128 dpavlin 179
129     if ( $i->load_by_cols(
130 dpavlin 154 in_feed => $feed,
131     title => $hash->{title},
132     link => $hash->{source},
133 dpavlin 179 ) ) {
134     $stats->{old}++;
135 dpavlin 186 Jifty->log->info("existing ", $i->id ," ", $i->link) if $i->id;
136 dpavlin 179 } else {
137 dpavlin 154
138 dpavlin 179 my $content = read_file( $content_path ) or
139     die "can't read $content_path: $!";
140 dpavlin 177
141 dpavlin 179 my $tree = HTML::TreeBuilder->new or die "can't create html tree";
142     $tree->parse( $content ) or die "can't parse fetched content";
143 dpavlin 154
144 dpavlin 179 my $body = $tree->look_down( '_tag', 'body' );
145    
146     my $resolver = HTML::ResolveLink->new( base => '/static/' . $config->{Dir} . $rel_path );
147     $content = $resolver->resolve( $body->as_HTML );
148    
149     my ($ok,$msg) = $i->create(
150     in_feed => $feed,
151     created_on => $dt,
152     title => $hash->{title},
153     link => $hash->{source},
154     content => $content,
155     );
156    
157     # $i->set_created_on( $dt );
158    
159     if ( ! $ok ) {
160     Jifty->log->error( $msg );
161     $stats->{failure}++;
162     next;
163     }
164    
165     if ( $msg && $msg !~ m/^Found/ ) {
166     $stats->{new}++;
167     Jifty->log->info("created ", $i->id ," ", $i->link, " ", length( $content ), " bytes");
168     $search->add( $i, $owner->id );
169     } else {
170     Jifty->log->info("skipped ", $i->id ," ", $i->link, " ", length( $content ), " bytes");
171     }
172 dpavlin 155 }
173 dpavlin 154 }
174    
175 dpavlin 158 $search->finish;
176    
177 dpavlin 155 return $stats;
178 dpavlin 154 }
179    
180 dpavlin 155 =head1 SEE ALSO
181 dpavlin 154
182 dpavlin 155 L<http://amb.vis.ne.jp/mozilla/scrapbook/> - ScrapBook FireFox extension
183    
184     =cut
185    
186 dpavlin 154 1;

  ViewVC Help
Powered by ViewVC 1.1.26