/[couchdb]/scripts/reblog2couchdb.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /scripts/reblog2couchdb.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 28 - (show annotations)
Sat Apr 25 00:11:51 2009 UTC (15 years ago) by dpavlin
File MIME type: text/plain
File size: 3327 byte(s)
create temporary table for published feed to greatly speed up query
1 #!/usr/bin/perl -w
2
3 use strict;
4 use DBI;
5 use CouchDB::Client;
6 use Data::Dump qw/dump/;
7
8 $|++;
9
10 my $debug = @ARGV ? 1 : 0;
11
12 my $database = 'reblog';
13
14 my $dbi = "DBI:mysql:database=$database";
15 $dbi .= ";host=127.0.0.1;port=13306"; # XXX over ssh
16
17 my $dbh = DBI->connect($dbi,"","",{ RaiseError => 1 });
18
19 $dbh->do(qq{
20 create temporary table published_items as
21 select
22 item_id
23 from
24 items_userdata
25 where
26 label = 'published' and
27 value_numeric = 1
28 });
29
30 my $sql = qq{
31 select
32 items.id as _id,
33 items.*,
34 feeds.url as feed_url,
35 feeds.title as feed_title,
36 feeds.link as feed_link,
37 feeds.description as feed_description
38 from items
39 join published_items on items.id = item_id
40 join feeds on items.feed_id = feeds.id
41 where items.id > ?
42 order by items.id asc
43 limit 1000
44 };
45
46 my $sql_tags = qq{
47 select
48 items_userdata.item_id,
49 value_long as tags,
50 timestamp
51 from items_userdata
52 join published_items p
53 on items_userdata.item_id = p.item_id and label='tags'
54 where
55 items_userdata.item_id > ?
56 order by items_userdata.item_id asc
57 };
58
59 my $c = CouchDB::Client->new(uri => 'http://192.168.1.13:5984/');
60
61 $c->testConnection or die "The server cannot be reached";
62 print "CouchDB version " . $c->serverInfo->{version} . "\n";
63 my $db = $c->newDB( $database );
64 $db->create unless $c->dbExists( $database );
65
66 my $status = $db->newDoc( 'last_sync' );
67 eval { $status->retrieve };
68 $status->create if $@;
69
70 print "status ",dump( $status->{data} ), "\n";
71
72 my $last_row = $status->{data}->{last_row_id} || 0;
73 $last_row = 0 if $debug;
74
75 sub commit_last_row {
76 warn "commit_last_row $last_row\n";
77 $status->{data}->{last_row_id} = $last_row;
78 $status->update;
79 }
80
81 print "Fetching items from $dbi id > $last_row\n";
82
83 my $sth = $dbh->prepare($sql);
84 $sth->execute( $last_row );
85
86 warn dump( $sth->{NAME} );
87
88 print "found ",$sth->rows," items to process...\n";
89
90 my $sth_tags = $dbh->prepare($sql_tags);
91 $sth_tags->execute( $last_row );
92 print "found ",$sth_tags->rows, " tags found...\n";
93
94 my $count = 0;
95
96 my $row_tags = $sth_tags->fetchrow_hashref();
97
98 while (my $row = $sth->fetchrow_hashref() ) {
99 my $_id = $row->{_id} || die "row needs _id";
100 my $doc = $db->newDoc( $_id );
101
102 while ( $row_tags && $row_tags->{item_id} < $row->{_id} ) {
103 $row_tags = $sth_tags->fetchrow_hashref();
104 warn "## got tags: ",dump( $row_tags ) if $debug;
105 }
106
107 sub row2doc {
108 my ( $row, $doc ) = @_;
109 my $a = delete( $row->{xml} );
110 $doc->addAttachment( 'item.xml', 'application/xhtml+xml', $a ) if $a;
111 $a = delete( $row->{content} );
112 $doc->addAttachment( 'content.html', 'text/html', $a ) if $a;
113 if ( $row_tags && $row_tags->{item_id} == $row->{_id} ) {
114 $row->{tags} = [ split(/\s+/, $row_tags->{tags} ) ];
115 warn "++ ",$row->{item_id}, dump( $row->{tags} );
116 }
117 $doc->{data} = $row;
118 warn "## ",dump( $row ) if $debug;
119 return $doc;
120 }
121
122 row2doc( $row, $doc );
123
124 eval { $doc->create };
125 if ( $@ ) {
126 $doc->retrieve;
127 row2doc( $row, $doc )->update;
128 # eval { $doc->update };
129 warn $@ ? "$count ERROR $_id $@\n" : "$count updated $_id\n";
130 } else {
131 warn "$count created $_id\n";
132 }
133
134 $last_row = $row->{id};
135 $count++;
136
137 commit_last_row if $count % 100 == 0 # checkpoint every 100 records
138 }
139
140 commit_last_row;
141
142 __END__
143
144 $sql = qq{
145 update items_userdata
146 set value_numeric = 1
147 where label = 'read' and item_id in ($ids)
148 };
149
150 $dbh->do( $sql );
151

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26