#!/usr/bin/perl
# Friday August 29, 2003
# I want to create a simple Perl script that will be able to access a
# MoveableType or TypePad weblog and tell me if it has been recently
# updated... that is, if I have new comments and/or entries. In short, I
# want my own version of what is (supposedly) available at www.bloglet.com -
# except I need it only for me, only for MT/TypePad. I would use Bloglet if
# it was reliable, but it's not.
#
# I found the API docs for Movable Type but I'm not sure if I should be
# looking at the XML-RPC API or the Perl API... probably the Perl API for
# more oomph?
#
# At 19:36 -0700 2003-08-29, Benjamin Trott wrote:
# >Sure, no problem. If you want it to work with TypePad, you'll need to
# >use the XML-RPC API--the Perl API is only for MT, because it requires
# >that you run the script on the server where the libraries live.
# >
# >For the XML-RPC API, take a look at SOAP::Lite... the API to get the N
# >most recent posts is pretty simple:
#
# Unfortunately, I get an unexpected error:
# 500 Can't locate object method "new" via package
# "LWP::Protocol::https::Socket" (perhaps you forgot to load
# "LWP::Protocol::https::Socket"?) at foo.pl line 69
# (the rpc->call line)
#
# At 15:54 -0700 2003-09-01, Benjamin Trott wrote:
# >The problem is that it looks like Net::SSLeay isn't installed on your
# >machine--the URL I specified as the XML-RPC endpoint was an HTTPS
# >endpoint, and you'll need Net::SSLeay for LWP to connect to it. So the
# >options are 1) install Net::SSLeay, or 2) just use
# >http://www.typepad.com/t/api as the endpoint.
#
# It appears I also needed Crypt::SSLeay
#
# Per the README.SSL file included with the LWP bundle:
# Encryption support is obtained through the use of Crypt::SSLeay or
# IO::Socket::SSL, which can both be found from CPAN. While libwww-perl
# has "plug-and-play" support for both of these modules (as of v5.45),
# the recommended module to use is Crypt::SSLeay. In addition to
# bringing SSL support to the LWP package, IO::Socket::SSL can be used
# as an object oriented interface to SSL encrypted network sockets.
#
# There is yet another SSL interface for perl called Net::SSLeay. It has
# a more complete SSL interface and can be used for web client
# programming among other things but doesn't directly support LWP.
#
# The underlying SSL support in all of these modules is based on OpenSSL
# (formerly SSLeay). For WWW-server side SSL
# support (e.g. CGI/FCGI scripts) in Apache see
# .
#
#
# metaWeblog.getRecentPosts
# Description: Returns a list of the most recent posts in the system.
#
# Parameters: String blogid, String username, String password,
# int numberOfPosts
#
# Return value:
# on success, array of structs containing
# ISO.8601 dateCreated,
# String userid, String postid, String description, String title,
# String link, String permaLink, String mt_excerpt, String mt_text_more,
# int mt_allow_comments, int mt_allow_pings,
# String mt_convert_breaks, String mt_keywords
# on failure, fault
#
# Notes: dateCreated is in the timezone of the weblog blogid;
# link and permaLink are the URL pointing to the archived post
#
# Description is the main blog entry text
#
# e.g.
# dateCreated: 2003-08-27T06:50:09Z
# userid: 2532
# postid: 229155
# description: I feel like I'm working in a Dilbert cartoon. ....
# title: Coffee Mugs and Dress Codes
# link: http://vlb.typepad.com/family_matters/2003/08/coffee_mugs_and.html
# permaLink: http://vlb.typepad.com/family_matters/2003/08/coffee_mugs_and.html
# mt_excerpt:
# mt_text_more:
# mt_allow_comments: 1
# mt_allow_pings: 0
# mt_convert_breaks: __default__
# mt_keywords:
#
#
# Configuration file format
# %weblogs = (
# myblog => {
# title => 'My Blog',
# blogid => '1',
# username => 'me',
# password => '*password*',
# server => 'https://www.typepad.com/t/api',
# numposts => 20,
# mail_to => 'my_blog@mydomain.com',
# mail_from => 'Vicki Brown ',
# authors => {
# 1 => 'Vicki',
# 2 => 'Pat',
# 3 => 'Rich',
# 4 => 'Ed',
# },
#
# },
# blog_this => {
# title => 'Blog This',
# blogid => '5',
# username => 'you',
# password => '*password*',
# server => 'http://www.domain.com/cgi-bin/mt/mt-xmlrpc.cgi',
# numposts => 10,
# mail_to => 'blog_this@mydomain.com',
# mail_from => ''Vicki Brown ',,
# authors => {
# 1 => 'Vicki',
# 3 => 'Keri',
# },
# },
# );
#
# 1;
use strict;
use warnings;
use XMLRPC::Lite;
use Mail::Mailer;
use Data::Dumper;
use Text::Wrap qw($columns &wrap);
use HTML::TreeBuilder;
use HTML::FormatText;
our (%weblogs); # our is critical for required variables
our $blogdir = '/usr/local/etc/www/weblogs';
our $logfile = "$blogdir/blogrecent.log";
our $config = "blogrecent_entries.conf";
our $excerptsz = 30;
our ($blogname, $testmode, $nomail);
# ($columns, $wrap) # from Text::Wrap
BEGIN {
while (@ARGV && ($ARGV[0] =~ /^-/)) {
if ($ARGV[0] eq '-n') {
$testmode = 1; # no mail sent; no changes to TOC
shift;
}
if ($ARGV[0] eq '-a') {
$nomail = 1; # no mail sent; append to TOC
shift;
}
}
$blogname = $ARGV[0];
die "usage: $0 blogname\n" unless $blogname;
}
main:
{
my ($blogid, $username, $password, $server, $numposts);
my ($call, $rpc, $som, $posts);
my (@posts_byid, @posts_new);
my @entries = ();
my $last_entry = "";
my ($postid, $postdate, $posttitle, $message_body);
my $today = `date`; chomp($today);
open (LOG, ">>$logfile") or die "Cannot open logfile\n";
print LOG "\n$today\n";
print LOG "$blogname\n";
if (-e "$blogdir/$config") {
require "$blogdir/$config";
} else {
print LOG "Fatal: Cannot find required $blogdir/$config\n";
die;
}
unless (defined($weblogs{$blogname})) {
print LOG "Fatal: No such weblog $blogname\n";
die;
}
$call = 'metaWeblog.getRecentPosts';
$blogid = $weblogs{$blogname}{blogid};
$username = $weblogs{$blogname}{username};
$password = $weblogs{$blogname}{password};
$server = $weblogs{$blogname}{server};
$numposts = $weblogs{$blogname}{numposts};
$rpc = XMLRPC::Lite->new;
$rpc->proxy($server);
$som = $rpc->call( $call, $blogid, $username, $password, $numposts);
if ($som->fault) {
die "Error: ", $som->faultcode, " ", $som->faultstring, "\n";
}
$posts = $som->result; # $numposts recent entries as array of hash refs
sub by_id {
$a->{postid} <=> $b->{postid}
}
@posts_byid = sort by_id @$posts;
if (-f "$blogdir/$blogname") {
# open and slurp current table of contents for blogname
open (IN, "$blogdir/$blogname");
@entries = ;
close(IN);
$last_entry = $entries[$#entries];
print LOG "last entry: $last_entry\n";
}
# Compare recent entries to table of contents
if ($testmode && $last_entry) {
unless (@posts_new) {
# nothing recent; create fake list with last entry logged for test
@posts_new = ($posts_byid[$#posts_byid]);
}
dumprecent($blogname, \@posts_new);
# format but don't send
$message_body = format4email($blogname, \@posts_new);
notify($blogname, $message_body);
} elsif ($last_entry) {
# get info for the last entry in the t.o.c
($postid, $postdate, $posttitle) = split(/\s+/, $last_entry, 3);
# set list of potential new entries
@posts_new = @posts_byid;
# print Dumper(@posts_byid);
# walk "new" entries, discarding any we've seen before
for my $postref (@posts_byid) {
last if (${$postref}{postid} > $postid);
# else (<=)
# this one is in the table of contents (not new)
shift(@posts_new);
}
# if anything is left in @posts_new, those are the new posts
if (@posts_new) {
$message_body = format4email($blogname, \@posts_new);
notify($blogname, $message_body) unless ($nomail);
append2toc($blogname, \@posts_new);
}
} else {
# create TOC with N recent entries
# for this one case, we ignore the question of
# "new since last checked"
unless ($testmode) {
append2toc($blogname, \@posts_byid);
}
}
}
END {
close(LOG);
}
# append2toc
# append N recent entries to stored Table of Contents for weblog $blogname
#
sub append2toc
{
my ($blogname, $posts) = @_;
open (OUT, ">>$blogdir/$blogname");
for my $postref (@$posts) {
# $postref is a HASH REF
# for my $key (keys %$postref) {
# print $key, ":\t", ${$postref}{$key}, "\n";
# }
my ($postdate) = split(/T/, ${$postref}{dateCreated});
printf OUT ("%-8d %s %s\n", ${$postref}{postid}, $postdate,
${$postref}{title});
}
close (OUT);
}
# format4email
# create an email message to notify readers of recent updates
#
# Recent updates to $blogname weblog:
#
#
# ${$postref}{title}
# ${$postref}{mt_excerpt}...
# Posting Date: $postdate
#
# Read more at ${$postref}{permaLink}
sub format4email
{
my ($blogname, $posts) = @_;
my $blogtitle = $weblogs{$blogname}{title};
my $message_body = "Recent updates to $blogtitle weblog:\n\n";
my ($postdate, $posted_by, $excerpt);
for my $postref (@$posts) {
($postdate) = split(/T/, ${$postref}{dateCreated});
$posted_by = ${$postref}{userid}; # numeric ID
if (defined($weblogs{$blogname}{authors}{$posted_by})) {
$posted_by = $weblogs{$blogname}{authors}{$posted_by};
}
print LOG "new entry: $postdate $posted_by ${$postref}{title}\n";
if (${$postref}{mt_excerpt}) {
$excerpt = ${$postref}{mt_excerpt};
} else {
# use first $excerptsz words of description,
# after stripping out HTML goo
my $tree = HTML::TreeBuilder->new; # empty tree
$tree->parse(${$postref}{description}); # parse description text
# $tree->dump;
my $formatter = HTML::FormatText->new;
my $description = $formatter->format($tree);
$tree = $tree->delete;
$description =~ s/\[IMAGE\]//g;
my @words = split(/\s+/, $description);
@words = splice(@words, 0, $excerptsz);
$excerpt = wrap('', '', @words);
}
chomp($excerpt); # just in case...
$message_body .= "${$postref}{title}\n";
$message_body .= "Posted on $postdate by $posted_by\n\n";
$message_body .= " $excerpt ...\n\n";
$message_body .= "Read more at ${$postref}{permaLink}\n\n";
}
return($message_body);
}
# notify
# send the email message prepared by format4email()
#
sub notify {
my ($blogname, $message_body) = @_;
my $blogtitle = $weblogs{$blogname}{title};
my $mailer;
unless ($testmode) {
$mailer = Mail::Mailer->new;
$mailer->open({ From => $weblogs{$blogname}{mail_from},
To => $weblogs{$blogname}{mail_to},
Subject => "$blogtitle Update",
})
or die "Can't open mail: $!\n";
print $mailer $message_body;
$mailer->close;
} else {
print STDOUT $message_body;
}
}
sub dumprecent
{
my ($blogname, $posts) = @_;
my $descr;
$columns = 70;
print LOG "TEST MODE\n";
for my $postref (@$posts) {
for my $key (sort(keys %$postref)) {
if ($key eq 'description') {
my @words = split(/\s+/, ${$postref}{$key});
@words = splice(@words, 0, 5);
$descr = wrap('', '', @words);
print $key, ": ", $descr, "...\n";
} else {
print $key, ": ", ${$postref}{$key}, "\n";
if ($key eq 'title') {
print LOG "new entry: ${$postref}{title}\n";
}
}
}
print "\n\n";
}
}