Grep/Import/ScrapBook.pm

#!/usr/bin/perl

use warnings;
use strict;

package Grep::Import::ScrapBook;

=head1 NAME

Grep::Import::ScrapBook - importer for local ScrapBook pages

=head1 CONFIGURATION

You can symlink your ScrapBook directory

  ~/Grep/share/web/static$ ln -sf /home/dpavlin/private/ScrapBook scrapbook

or modify L<ScrapBookDir> path (relative to Grep installation static root).

=cut

use XML::Simple;
use File::Slurp;
use Data::Dump qw/dump/;

sub import {
        my $self = shift;
        my $search = shift or die "need search";
        die "search is ", ref($search), " and not Grep::Search" unless ($search->isa('Grep::Search'));

        my $dir =
                Jifty::Util->app_root . '/' .
                Jifty->config->framework('Web')->{'StaticRoot'} . '/' .
                Jifty->config->app('ScrapBookDir');

        my $path = $dir . '/scrapbook.rdf';
        $path =~ s!//+!/!g;

        if ( ! -e $dir  || ! -e $path ) {
                Jifty->log->warn("Skipping ScrapBook import $path: $!");
                return 1;
        }

        my $rdf = XMLin(
                $path,
#               KeyAttr => [ qw/RDF:about/ ],
        ) || die "can't open $path: $!";

#       warn "## original rdf -> ", dump( $rdf );

        my $feed = Grep::Model::Feed->new();
        $feed->load_or_create(
                uri => 'file://' . $path,
                title => 'ScrapBook',
                #source => 'Grep::Source',
        );

        my $stats;

        foreach my $item ( @{ $rdf->{'RDF:Description'} } ) {

                $stats->{total}++;

                #warn "## item = ",dump( $item );

                my $hash;
                foreach my $k ( keys %$item ) {
                        next if $k =~ m/^RDF:/;
                        next if ( $item->{$k} eq '' ); 
                        my $n = $k;
                        $n =~ s/^\w+://;        # strip namespace
                        $hash->{$n} = $item->{$k};
                }
        
                #warn "## hash = ", dump( $hash );


                # fetch full-text content and import it

                my $content_path = $dir . '/data/' . $hash->{id} . '/index.html';
                if ( ! -r $content_path ) {
                        Jifty->log->warn("can't import $content_path: $!");
                        $stats->{failure}++;
                        next;
                }
                my $content = read_file( $content_path ) or
                        die "can't read $content_path: $!";


                # create date from id

                my $dt;
                if ( $hash->{id} =~ m/^(\d{4})(\d\d)(\d\d)(\d\d)(\d\d)(\d\d)$/ ) {
                        $dt = DateTime->new(
                                year    => $1,
                                month   => $2,
                                day             => $3,
                                hour    => $4,
                                minute  => $5,
                                second  => $6,
                                #time_zone => 'UTC',
                        );
                } else {
                        warn "can't parse date from ", $hash->{id};
                }

                my $i = Grep::Model::Item->new();
                my ($ok,$msg) = $i->load_or_create(
                        in_feed => $feed,
                        title => $hash->{title},
                        link => $hash->{source},
                        content => $content,
                        issued => $hash->{id},
                );

                if ( ! $ok ) {
                        Jifty->log->error( $msg );
                        $stats->{failure}++;
                        next;
                }

                if ( $msg && $msg =~ m/^Found/ ) {
                        $stats->{old}++;
                } else {
                        $stats->{new}++;
                        Jifty->log->info("imported ", $i->id ," ", $i->link, " ", length( $content ), " bytes");
                        $search->add( $i, $i->in_feed->owner->id );
                }

        }

        return $stats;
}

=head1 SEE ALSO

L<http://amb.vis.ne.jp/mozilla/scrapbook/> - ScrapBook FireFox extension

=cut

1;
1	#!/usr/bin/perl
2
3	use warnings;
4	use strict;
5
6	package Grep::Import::ScrapBook;
7
8	=head1 NAME
9
10	Grep::Import::ScrapBook - importer for local ScrapBook pages
11
12	=head1 CONFIGURATION
13
14	You can symlink your ScrapBook directory
15
16	~/Grep/share/web/static$ ln -sf /home/dpavlin/private/ScrapBook scrapbook
17
18	or modify L<ScrapBookDir> path (relative to Grep installation static root).
19
20	=cut
21
22	use XML::Simple;
23	use File::Slurp;
24	use Data::Dump qw/dump/;
25
26	sub import {
27	my $self = shift;
28	my $search = shift or die "need search";
29	die "search is ", ref($search), " and not Grep::Search" unless ($search->isa('Grep::Search'));
30
31	my $dir =
32	Jifty::Util->app_root . '/' .
33	Jifty->config->framework('Web')->{'StaticRoot'} . '/' .
34	Jifty->config->app('ScrapBookDir');
35
36	my $path = $dir . '/scrapbook.rdf';
37	$path =~ s!//+!/!g;
38
39	if ( ! -e $dir \|\| ! -e $path ) {
40	Jifty->log->warn("Skipping ScrapBook import $path: $!");
41	return 1;
42	}
43
44	my $rdf = XMLin(
45	$path,
46	# KeyAttr => [ qw/RDF:about/ ],
47	) \|\| die "can't open $path: $!";
48
49	# warn "## original rdf -> ", dump( $rdf );
50
51	my $feed = Grep::Model::Feed->new();
52	$feed->load_or_create(
53	uri => 'file://' . $path,
54	title => 'ScrapBook',
55	#source => 'Grep::Source',
56	);
57
58	my $stats;
59
60	foreach my $item ( @{ $rdf->{'RDF:Description'} } ) {
61
62	$stats->{total}++;
63
64	#warn "## item = ",dump( $item );
65
66	my $hash;
67	foreach my $k ( keys %$item ) {
68	next if $k =~ m/^RDF:/;
69	next if ( $item->{$k} eq '' );
70	my $n = $k;
71	$n =~ s/^\w+://; # strip namespace
72	$hash->{$n} = $item->{$k};
73	}
74
75	#warn "## hash = ", dump( $hash );
76
77
78	# fetch full-text content and import it
79
80	my $content_path = $dir . '/data/' . $hash->{id} . '/index.html';
81	if ( ! -r $content_path ) {
82	Jifty->log->warn("can't import $content_path: $!");
83	$stats->{failure}++;
84	next;
85	}
86	my $content = read_file( $content_path ) or
87	die "can't read $content_path: $!";
88
89
90	# create date from id
91
92	my $dt;
93	if ( $hash->{id} =~ m/^(\d{4})(\d\d)(\d\d)(\d\d)(\d\d)(\d\d)$/ ) {
94	$dt = DateTime->new(
95	year => $1,
96	month => $2,
97	day => $3,
98	hour => $4,
99	minute => $5,
100	second => $6,
101	#time_zone => 'UTC',
102	);
103	} else {
104	warn "can't parse date from ", $hash->{id};
105	}
106
107	my $i = Grep::Model::Item->new();
108	my ($ok,$msg) = $i->load_or_create(
109	in_feed => $feed,
110	title => $hash->{title},
111	link => $hash->{source},
112	content => $content,
113	issued => $hash->{id},
114	);
115
116	if ( ! $ok ) {
117	Jifty->log->error( $msg );
118	$stats->{failure}++;
119	next;
120	}
121
122	if ( $msg && $msg =~ m/^Found/ ) {
123	$stats->{old}++;
124	} else {
125	$stats->{new}++;
126	Jifty->log->info("imported ", $i->id ," ", $i->link, " ", length( $content ), " bytes");
127	$search->add( $i, $i->in_feed->owner->id );
128	}
129
130	}
131
132	return $stats;
133	}
134
135	=head1 SEE ALSO
136
137	L<http://amb.vis.ne.jp/mozilla/scrapbook/> - ScrapBook FireFox extension
138
139	=cut
140
141	1;