21 |
|
|
22 |
use XML::Simple; |
use XML::Simple; |
23 |
use File::Slurp; |
use File::Slurp; |
24 |
|
use HTML::ResolveLink; |
25 |
|
use HTML::TreeBuilder; |
26 |
use Data::Dump qw/dump/; |
use Data::Dump qw/dump/; |
27 |
|
|
28 |
sub import { |
sub import { |
29 |
my $self = shift; |
my $self = shift; |
|
my $search = shift or die "need search"; |
|
|
die "search is ", ref($search), " and not Grep::Search" unless ($search->isa('Grep::Search')); |
|
30 |
|
|
31 |
my $dir = |
my $dir = |
32 |
Jifty::Util->app_root . '/' . |
Jifty::Util->app_root . '/' . |
62 |
owner => $owner, |
owner => $owner, |
63 |
); |
); |
64 |
|
|
65 |
|
my $search = Grep::Search->new; |
66 |
|
|
67 |
my $stats; |
my $stats; |
68 |
|
|
69 |
foreach my $item ( @{ $rdf->{'RDF:Description'} } ) { |
foreach my $item ( @{ $rdf->{'RDF:Description'} } ) { |
86 |
|
|
87 |
# fetch full-text content and import it |
# fetch full-text content and import it |
88 |
|
|
89 |
my $content_path = $dir . '/data/' . $hash->{id} . '/index.html'; |
my $rel_path = '/data/' . $hash->{id} . '/index.html'; |
90 |
|
|
91 |
|
my $content_path = $dir . $rel_path; |
92 |
if ( ! -r $content_path ) { |
if ( ! -r $content_path ) { |
93 |
Jifty->log->warn("can't import $content_path: $!"); |
Jifty->log->warn("can't import $content_path: $!"); |
94 |
$stats->{failure}++; |
$stats->{failure}++; |
97 |
my $content = read_file( $content_path ) or |
my $content = read_file( $content_path ) or |
98 |
die "can't read $content_path: $!"; |
die "can't read $content_path: $!"; |
99 |
|
|
100 |
|
my $tree = HTML::TreeBuilder->new or die "can't create html tree"; |
101 |
|
$tree->parse( $content ) or die "can't parse fetched content"; |
102 |
|
|
103 |
|
my $body = $tree->look_down( '_tag', 'body' ); |
104 |
|
|
105 |
|
my $resolver = HTML::ResolveLink->new( base => '/static/' . Jifty->config->app('ScrapBookDir') . $rel_path ); |
106 |
|
$content = $resolver->resolve( $body->as_HTML ); |
107 |
|
|
108 |
# create date from id |
# create date from id |
109 |
|
|
147 |
|
|
148 |
} |
} |
149 |
|
|
150 |
|
$search->finish; |
151 |
|
|
152 |
return $stats; |
return $stats; |
153 |
} |
} |
154 |
|
|