Grep/Import/ScrapBook.pm

#!/usr/bin/perl

use warnings;
use strict;

package Grep::Import::ScrapBook;

=head1 NAME

Grep::Import::ScrapBook - importer for local ScrapBook pages

=head1 CONFIGURATION

You can symlink your ScrapBook directory

  ~/Grep/share/web/static$ ln -sf /home/dpavlin/private/ScrapBook scrapbook

or modify L<ScrapBookDir> path (relative to Grep installation static root).

=head1 SEE ALSO

L<http://amb.vis.ne.jp/mozilla/scrapbook/> - ScrapBook FireFox extension

=cut

use XML::Simple;
use File::Slurp;
use Data::Dump qw/dump/;

sub import {
        my $self = shift;
        my $search = shift or die "need search";
        die "search is ", ref($search), " and not Grep::Search" unless ($search->isa('Grep::Search'));

        my $dir =
                Jifty::Util->app_root . '/' .
                Jifty->config->framework('Web')->{'StaticRoot'} . '/' .
                Jifty->config->app('ScrapBookDir');

        my $path = $dir . '/scrapbook.rdf';
        $path =~ s!//+!/!g;

        if ( ! -e $dir  || ! -e $path ) {
                Jifty->log->warn("Skipping ScrapBook import $path: $!");
                return 1;
        }

        my $rdf = XMLin(
                $path,
#               KeyAttr => [ qw/RDF:about/ ],
        ) || die "can't open $path: $!";

#       warn "## original rdf -> ", dump( $rdf );

        my $feed = Grep::Model::Feed->new();
        $feed->load_or_create(
                uri => 'file://' . $path,
                title => 'ScrapBook',
                #source => 'Grep::Source',
        );

        foreach my $item ( @{ $rdf->{'RDF:Description'} } ) {

                warn "## item = ",dump( $item );

                my $hash;
                foreach my $k ( keys %$item ) {
                        next if $k =~ m/^RDF:/;
                        next if ( $item->{$k} eq '' ); 
                        my $n = $k;
                        $n =~ s/^\w+://;        # strip namespace
                        $hash->{$n} = $item->{$k};
                }
        
                warn "## hash = ", dump( $hash );


                # fetch full-text content and import it

                my $content_path = $dir . '/data/' . $hash->{id} . '/index.html';
                if ( ! -r $content_path ) {
                        Jifty->log->warn("can't import $content_path: $!");
                        next;
                }
                my $content = read_file( $content_path ) or
                        die "can't read $content_path: $!";


                # create date from id

                my $dt;
                if ( $hash->{id} =~ m/^(\d{4})(\d\d)(\d\d)(\d\d)(\d\d)(\d\d)$/ ) {
                        $dt = DateTime->new(
                                year    => $1,
                                month   => $2,
                                day             => $3,
                                hour    => $4,
                                minute  => $5,
                                second  => $6,
                                #time_zone => 'UTC',
                        );
                } else {
                        warn "can't parse date from ", $hash->{id};
                }


                my $i = Grep::Model::Item->new();
                my ($id,$msg) = $i->load_or_create(
                        in_feed => $feed,
                        title => $hash->{title},
                        link => $hash->{source},
                        content => $content,
                        issued => $hash->{id},
                );


                warn ">> item $id $msg\n";

        }

        return 1;
}


1;
1	#!/usr/bin/perl
2
3	use warnings;
4	use strict;
5
6	package Grep::Import::ScrapBook;
7
8	=head1 NAME
9
10	Grep::Import::ScrapBook - importer for local ScrapBook pages
11
12	=head1 CONFIGURATION
13
14	You can symlink your ScrapBook directory
15
16	~/Grep/share/web/static$ ln -sf /home/dpavlin/private/ScrapBook scrapbook
17
18	or modify L<ScrapBookDir> path (relative to Grep installation static root).
19
20	=head1 SEE ALSO
21
22	L<http://amb.vis.ne.jp/mozilla/scrapbook/> - ScrapBook FireFox extension
23
24	=cut
25
26	use XML::Simple;
27	use File::Slurp;
28	use Data::Dump qw/dump/;
29
30	sub import {
31	my $self = shift;
32	my $search = shift or die "need search";
33	die "search is ", ref($search), " and not Grep::Search" unless ($search->isa('Grep::Search'));
34
35	my $dir =
36	Jifty::Util->app_root . '/' .
37	Jifty->config->framework('Web')->{'StaticRoot'} . '/' .
38	Jifty->config->app('ScrapBookDir');
39
40	my $path = $dir . '/scrapbook.rdf';
41	$path =~ s!//+!/!g;
42
43	if ( ! -e $dir \|\| ! -e $path ) {
44	Jifty->log->warn("Skipping ScrapBook import $path: $!");
45	return 1;
46	}
47
48	my $rdf = XMLin(
49	$path,
50	# KeyAttr => [ qw/RDF:about/ ],
51	) \|\| die "can't open $path: $!";
52
53	# warn "## original rdf -> ", dump( $rdf );
54
55	my $feed = Grep::Model::Feed->new();
56	$feed->load_or_create(
57	uri => 'file://' . $path,
58	title => 'ScrapBook',
59	#source => 'Grep::Source',
60	);
61
62	foreach my $item ( @{ $rdf->{'RDF:Description'} } ) {
63
64	warn "## item = ",dump( $item );
65
66	my $hash;
67	foreach my $k ( keys %$item ) {
68	next if $k =~ m/^RDF:/;
69	next if ( $item->{$k} eq '' );
70	my $n = $k;
71	$n =~ s/^\w+://; # strip namespace
72	$hash->{$n} = $item->{$k};
73	}
74
75	warn "## hash = ", dump( $hash );
76
77
78	# fetch full-text content and import it
79
80	my $content_path = $dir . '/data/' . $hash->{id} . '/index.html';
81	if ( ! -r $content_path ) {
82	Jifty->log->warn("can't import $content_path: $!");
83	next;
84	}
85	my $content = read_file( $content_path ) or
86	die "can't read $content_path: $!";
87
88
89	# create date from id
90
91	my $dt;
92	if ( $hash->{id} =~ m/^(\d{4})(\d\d)(\d\d)(\d\d)(\d\d)(\d\d)$/ ) {
93	$dt = DateTime->new(
94	year => $1,
95	month => $2,
96	day => $3,
97	hour => $4,
98	minute => $5,
99	second => $6,
100	#time_zone => 'UTC',
101	);
102	} else {
103	warn "can't parse date from ", $hash->{id};
104	}
105
106
107	my $i = Grep::Model::Item->new();
108	my ($id,$msg) = $i->load_or_create(
109	in_feed => $feed,
110	title => $hash->{title},
111	link => $hash->{source},
112	content => $content,
113	issued => $hash->{id},
114	);
115
116
117	warn ">> item $id $msg\n";
118
119	}
120
121	return 1;
122	}
123
124
125	1;