1 |
#!/usr/bin/perl |
2 |
|
3 |
use warnings; |
4 |
use strict; |
5 |
|
6 |
package Grep::Import::ScrapBook; |
7 |
|
8 |
=head1 NAME |
9 |
|
10 |
Grep::Import::ScrapBook - importer for local ScrapBook pages |
11 |
|
12 |
=head1 CONFIGURATION |
13 |
|
14 |
You can symlink your ScrapBook directory |
15 |
|
16 |
~/Grep/share/web/static$ ln -sf /home/dpavlin/private/ScrapBook scrapbook |
17 |
|
18 |
or modify L<ScrapBookDir> path (relative to Grep installation static root). |
19 |
|
20 |
=head1 SEE ALSO |
21 |
|
22 |
L<http://amb.vis.ne.jp/mozilla/scrapbook/> - ScrapBook FireFox extension |
23 |
|
24 |
=cut |
25 |
|
26 |
use XML::Simple; |
27 |
use File::Slurp; |
28 |
use Data::Dump qw/dump/; |
29 |
|
30 |
sub import { |
31 |
my $self = shift; |
32 |
my $search = shift or die "need search"; |
33 |
die "search is ", ref($search), " and not Grep::Search" unless ($search->isa('Grep::Search')); |
34 |
|
35 |
my $dir = |
36 |
Jifty::Util->app_root . '/' . |
37 |
Jifty->config->framework('Web')->{'StaticRoot'} . '/' . |
38 |
Jifty->config->app('ScrapBookDir'); |
39 |
|
40 |
my $path = $dir . '/scrapbook.rdf'; |
41 |
$path =~ s!//+!/!g; |
42 |
|
43 |
if ( ! -e $dir || ! -e $path ) { |
44 |
Jifty->log->warn("Skipping ScrapBook import $path: $!"); |
45 |
return 1; |
46 |
} |
47 |
|
48 |
my $rdf = XMLin( |
49 |
$path, |
50 |
# KeyAttr => [ qw/RDF:about/ ], |
51 |
) || die "can't open $path: $!"; |
52 |
|
53 |
# warn "## original rdf -> ", dump( $rdf ); |
54 |
|
55 |
my $feed = Grep::Model::Feed->new(); |
56 |
$feed->load_or_create( |
57 |
uri => 'file://' . $path, |
58 |
title => 'ScrapBook', |
59 |
#source => 'Grep::Source', |
60 |
); |
61 |
|
62 |
foreach my $item ( @{ $rdf->{'RDF:Description'} } ) { |
63 |
|
64 |
warn "## item = ",dump( $item ); |
65 |
|
66 |
my $hash; |
67 |
foreach my $k ( keys %$item ) { |
68 |
next if $k =~ m/^RDF:/; |
69 |
next if ( $item->{$k} eq '' ); |
70 |
my $n = $k; |
71 |
$n =~ s/^\w+://; # strip namespace |
72 |
$hash->{$n} = $item->{$k}; |
73 |
} |
74 |
|
75 |
warn "## hash = ", dump( $hash ); |
76 |
|
77 |
|
78 |
# fetch full-text content and import it |
79 |
|
80 |
my $content_path = $dir . '/data/' . $hash->{id} . '/index.html'; |
81 |
if ( ! -r $content_path ) { |
82 |
Jifty->log->warn("can't import $content_path: $!"); |
83 |
next; |
84 |
} |
85 |
my $content = read_file( $content_path ) or |
86 |
die "can't read $content_path: $!"; |
87 |
|
88 |
|
89 |
# create date from id |
90 |
|
91 |
my $dt; |
92 |
if ( $hash->{id} =~ m/^(\d{4})(\d\d)(\d\d)(\d\d)(\d\d)(\d\d)$/ ) { |
93 |
$dt = DateTime->new( |
94 |
year => $1, |
95 |
month => $2, |
96 |
day => $3, |
97 |
hour => $4, |
98 |
minute => $5, |
99 |
second => $6, |
100 |
#time_zone => 'UTC', |
101 |
); |
102 |
} else { |
103 |
warn "can't parse date from ", $hash->{id}; |
104 |
} |
105 |
|
106 |
|
107 |
my $i = Grep::Model::Item->new(); |
108 |
my ($id,$msg) = $i->load_or_create( |
109 |
in_feed => $feed, |
110 |
title => $hash->{title}, |
111 |
link => $hash->{source}, |
112 |
content => $content, |
113 |
issued => $hash->{id}, |
114 |
); |
115 |
|
116 |
|
117 |
warn ">> item $id $msg\n"; |
118 |
|
119 |
} |
120 |
|
121 |
return 1; |
122 |
} |
123 |
|
124 |
|
125 |
1; |