#!/usr/bin/perl
use warnings;
use strict;
use XML::Simple;
use File::Find;
use Regexp::Common qw/balanced/;
use Socialtext::Resting;
use Encode;
use HTTP::Date;
use POSIX qw/strftime/;
use File::Slurp;
use File::MMagic::XS;
use Getopt::Long;
use Data::Dump qw/dump/;
my $debug = 0;
my $max = 999;
my $attachments = 0;
my @create_tags = (qw/
Trazi
Nudi
SvakodnevneDovitljivosti
G33koSkop
/);
GetOptions(
'debug+' => \$debug,
'max=i' => \$max,
'attachments' => \$attachments,
);
my $page;
my $page_date;
my @page_names;
print "Importing $max pages", $attachments ? " with attachments" : "", "...\n";
find({
wanted => sub {
my $path = $File::Find::name;
return unless -f $path;
warn "+ $path\n";
my $ref = XMLin( $path,
KeyAttr => {
'attachment' => '+name',
'meta' => 'name',
},
ForceArray => [ 'attachment', 'meta', 'widget' ],
) || die "can't open $path: $!";
warn "## $path = ",dump( $ref ) if $debug;
my $name = $ref->{name} || die "no name in $path";
return if $name =~ m/^TamSystem/;
my $date = $ref->{meta}->{LastModified}->{value};
if ( ! $date ) {
warn "SKIP: no LastModified in $path $name";
return;
}
my $data;
foreach my $w ( @{ $ref->{widgets}->{widget} } ) {
warn "## w = ",dump( $w ) if $debug;
$data .= "\n----\n" if $data;
$data .= $w->{data} || die "no data?";
}
my $attachments;
if ( my $a = $ref->{attachment} ) {
foreach my $name ( keys %$a ) {
my $full_path = $path;
$full_path =~ s,pages/,attachments/,;
$full_path .= '.' . $name;
die "$full_path doesn't exist" unless -e $full_path;
push @$attachments, {
full_path => $full_path,
name => ( $name || $a->{$name}->{desc} || 'noname' ),
};
}
}
$page->{ $name } = {
content => convert_markup( $data ),
original => $data,
date => convert_date( $date ),
attachments => $attachments,
};
$name =~ s,^.+/([^/]+)$,$1,;
push @page_names, $name;
},
no_chdir=>1,
}, shift @ARGV || '.');
my @pages = ( keys %$page );
warn "found following pages: ", join(", ", @page_names),"\n";
my $page_link_re = '\b(' . join('|', @page_names) . ')\b';
my $Rester = Socialtext::Resting->new(
username => 'tamtam',
password => 'import',
server => 'http://saturn.ffzg.hr/',
workspace => 'razmjenavjestina',
);
$Rester->put_workspacetag('TamTam');
sub convert_date {
my $date = shift;
# return time2str( $date );
return strftime('%F %T %z', gmtime( $date ));
}
sub header {
my $h = shift;
if ( $h =~ m/^(=+)\s+(.+?)\s+\1$/ ) {
my $level = length($1);
return "\n" . ( '^' x $level ) . " $2\n";
} else {
return $h;
}
}
sub surround {
my ( $with, $what ) = @_;
return $with . $what . $with;
}
sub pre {
my $text = shift;
$text =~ s/^{{{\s*//s;
$text =~ s/\s*}}}$//s;
return "\n.pre\n" . $text . "\n.pre\n";
}
sub convert_markup {
my $body = shift;
$body =~ s/\Q[[TableOfContents]]\E/{toc}/gs;
$body =~ s/\Q[[BR]]\E/\n/gs;
$body =~ s/$RE{balanced}{-begin => "= |== |=== |==== |===== |===== "}{-end => " =| ==| ===| ====| ====="}{-keep}/header($1)/gse;
$body =~ s/''''(.+?)''''/surround('`',$1)/gse;
$body =~ s/'''(.+?)'''/surround('*',$1)/gse;
$body =~ s/''(.+?)''/surround('_',$1)/gse;
$body =~ s/$RE{balanced}{-begin => "{{{"}{-end => "}}}"}{-keep}/pre($1)/gse;
# fix bullets
$body =~ s/^\s+([\*])/$1/gm;
# fix links
$body =~ s/\["([^"]+)"\]/[$1]/gs;
$body =~ s,\[(http://\S+)\s+([^\]]+)\],"$2"<$1>,gs;
$body =~ s,\[(http://[^\]]+)\],$1,gs;
# fix hr
$body =~ s,(\S+)----,$1\n----,gs;
$body =~ s,----(\S+),----\n$1,gs;
# attachments
$body =~ s,\[attachment:([^\]]+)(gif|png|jpg|jpeg)\],{image: $1$2},gis;
$body =~ s,\[attachment:([^\]]+)\],{file: $1},gs;
return $body;
}
my $count = 0;
my $m = File::MMagic::XS->new;
foreach my $name ( keys %$page ) {
last if $count++ == $max;
my $p = $page->{$name};
warn "## $name = ",dump( $p ) if $debug;
my $body = $p->{content} || die "no content?";
my $date = $p->{date} || die "no date?";
my @tags = ( 'TamTam' );
my $full_name = $name;
if ( $name =~ m!/! ) {
my @page_tags = split(m!/!, $name);
$name = pop @page_tags; # remove page name
push @tags, @page_tags;
}
# link named pages
$body =~ s,\b$page_link_re\b,[$1],gs;
$body =~ s,``,,gs;
$body .= qq{
----
"original"<http://www.razmjenavjestina.org/$full_name> {date: $date}
};
Encode::_utf8_off( $body );
print "$name $date\n";
# original markup
$Rester->put_page( $name, { content => $p->{original}, date => $date });
foreach my $t ( @create_tags ) {
push @tags, $t if $full_name =~ m/$t/i;
}
foreach ( @tags ) {
$Rester->put_pagetag( $name, $_, { date => $date } );
print "+ tag $_\n";
}
if ( $attachments ) {
foreach my $a ( @{ $p->{attachments} } ) {
my $type = $m->get_mime( $a->{full_path} );
my $content = read_file( $a->{full_path} );
print "+ attachment ", $a->{name}," $type ", length($content), " bytes\n";
$Rester->post_attachment($name, $a->{name}, $content, $type );
}
}
# converted page
$Rester->put_page( $name, { content => $body, date => $date });
}