#!/usr/bin/perl # Friday August 29, 2003 # I want to create a simple Perl script that will be able to access a # MoveableType or TypePad weblog and tell me if it has been recently # updated... that is, if I have new comments and/or entries. In short, I # want my own version of what is (supposedly) available at www.bloglet.com - # except I need it only for me, only for MT/TypePad. I would use Bloglet if # it was reliable, but it's not. # # I found the API docs for Movable Type but I'm not sure if I should be # looking at the XML-RPC API or the Perl API... probably the Perl API for # more oomph? # # At 19:36 -0700 2003-08-29, Benjamin Trott wrote: # >Sure, no problem. If you want it to work with TypePad, you'll need to # >use the XML-RPC API--the Perl API is only for MT, because it requires # >that you run the script on the server where the libraries live. # > # >For the XML-RPC API, take a look at SOAP::Lite... the API to get the N # >most recent posts is pretty simple: # # Unfortunately, I get an unexpected error: # 500 Can't locate object method "new" via package # "LWP::Protocol::https::Socket" (perhaps you forgot to load # "LWP::Protocol::https::Socket"?) at foo.pl line 69 # (the rpc->call line) # # At 15:54 -0700 2003-09-01, Benjamin Trott wrote: # >The problem is that it looks like Net::SSLeay isn't installed on your # >machine--the URL I specified as the XML-RPC endpoint was an HTTPS # >endpoint, and you'll need Net::SSLeay for LWP to connect to it. So the # >options are 1) install Net::SSLeay, or 2) just use # >http://www.typepad.com/t/api as the endpoint. # # It appears I also needed Crypt::SSLeay # # Per the README.SSL file included with the LWP bundle: # Encryption support is obtained through the use of Crypt::SSLeay or # IO::Socket::SSL, which can both be found from CPAN. While libwww-perl # has "plug-and-play" support for both of these modules (as of v5.45), # the recommended module to use is Crypt::SSLeay. In addition to # bringing SSL support to the LWP package, IO::Socket::SSL can be used # as an object oriented interface to SSL encrypted network sockets. # # There is yet another SSL interface for perl called Net::SSLeay. It has # a more complete SSL interface and can be used for web client # programming among other things but doesn't directly support LWP. # # The underlying SSL support in all of these modules is based on OpenSSL # (formerly SSLeay). For WWW-server side SSL # support (e.g. CGI/FCGI scripts) in Apache see # . # # # metaWeblog.getRecentPosts # Description: Returns a list of the most recent posts in the system. # # Parameters: String blogid, String username, String password, # int numberOfPosts # # Return value: # on success, array of structs containing # ISO.8601 dateCreated, # String userid, String postid, String description, String title, # String link, String permaLink, String mt_excerpt, String mt_text_more, # int mt_allow_comments, int mt_allow_pings, # String mt_convert_breaks, String mt_keywords # on failure, fault # # Notes: dateCreated is in the timezone of the weblog blogid; # link and permaLink are the URL pointing to the archived post # # Description is the main blog entry text # # e.g. # dateCreated: 2003-08-27T06:50:09Z # userid: 2532 # postid: 229155 # description: I feel like I'm working in a Dilbert cartoon. .... # title: Coffee Mugs and Dress Codes # link: http://vlb.typepad.com/family_matters/2003/08/coffee_mugs_and.html # permaLink: http://vlb.typepad.com/family_matters/2003/08/coffee_mugs_and.html # mt_excerpt: # mt_text_more: # mt_allow_comments: 1 # mt_allow_pings: 0 # mt_convert_breaks: __default__ # mt_keywords: # # # Configuration file format # %weblogs = ( # myblog => { # title => 'My Blog', # blogid => '1', # username => 'me', # password => '*password*', # server => 'https://www.typepad.com/t/api', # numposts => 20, # mail_to => 'my_blog@mydomain.com', # mail_from => 'Vicki Brown ', # authors => { # 1 => 'Vicki', # 2 => 'Pat', # 3 => 'Rich', # 4 => 'Ed', # }, # # }, # blog_this => { # title => 'Blog This', # blogid => '5', # username => 'you', # password => '*password*', # server => 'http://www.domain.com/cgi-bin/mt/mt-xmlrpc.cgi', # numposts => 10, # mail_to => 'blog_this@mydomain.com', # mail_from => ''Vicki Brown ',, # authors => { # 1 => 'Vicki', # 3 => 'Keri', # }, # }, # ); # # 1; use strict; use warnings; use XMLRPC::Lite; use Mail::Mailer; use Data::Dumper; use Text::Wrap qw($columns &wrap); use HTML::TreeBuilder; use HTML::FormatText; our (%weblogs); # our is critical for required variables our $blogdir = '/usr/local/etc/www/weblogs'; our $logfile = "$blogdir/blogrecent.log"; our $config = "blogrecent_entries.conf"; our $excerptsz = 30; our ($blogname, $testmode, $nomail); # ($columns, $wrap) # from Text::Wrap BEGIN { while (@ARGV && ($ARGV[0] =~ /^-/)) { if ($ARGV[0] eq '-n') { $testmode = 1; # no mail sent; no changes to TOC shift; } if ($ARGV[0] eq '-a') { $nomail = 1; # no mail sent; append to TOC shift; } } $blogname = $ARGV[0]; die "usage: $0 blogname\n" unless $blogname; } main: { my ($blogid, $username, $password, $server, $numposts); my ($call, $rpc, $som, $posts); my (@posts_byid, @posts_new); my @entries = (); my $last_entry = ""; my ($postid, $postdate, $posttitle, $message_body); my $today = `date`; chomp($today); open (LOG, ">>$logfile") or die "Cannot open logfile\n"; print LOG "\n$today\n"; print LOG "$blogname\n"; if (-e "$blogdir/$config") { require "$blogdir/$config"; } else { print LOG "Fatal: Cannot find required $blogdir/$config\n"; die; } unless (defined($weblogs{$blogname})) { print LOG "Fatal: No such weblog $blogname\n"; die; } $call = 'metaWeblog.getRecentPosts'; $blogid = $weblogs{$blogname}{blogid}; $username = $weblogs{$blogname}{username}; $password = $weblogs{$blogname}{password}; $server = $weblogs{$blogname}{server}; $numposts = $weblogs{$blogname}{numposts}; $rpc = XMLRPC::Lite->new; $rpc->proxy($server); $som = $rpc->call( $call, $blogid, $username, $password, $numposts); if ($som->fault) { die "Error: ", $som->faultcode, " ", $som->faultstring, "\n"; } $posts = $som->result; # $numposts recent entries as array of hash refs sub by_id { $a->{postid} <=> $b->{postid} } @posts_byid = sort by_id @$posts; if (-f "$blogdir/$blogname") { # open and slurp current table of contents for blogname open (IN, "$blogdir/$blogname"); @entries = ; close(IN); $last_entry = $entries[$#entries]; print LOG "last entry: $last_entry\n"; } # Compare recent entries to table of contents if ($testmode && $last_entry) { unless (@posts_new) { # nothing recent; create fake list with last entry logged for test @posts_new = ($posts_byid[$#posts_byid]); } dumprecent($blogname, \@posts_new); # format but don't send $message_body = format4email($blogname, \@posts_new); notify($blogname, $message_body); } elsif ($last_entry) { # get info for the last entry in the t.o.c ($postid, $postdate, $posttitle) = split(/\s+/, $last_entry, 3); # set list of potential new entries @posts_new = @posts_byid; # print Dumper(@posts_byid); # walk "new" entries, discarding any we've seen before for my $postref (@posts_byid) { last if (${$postref}{postid} > $postid); # else (<=) # this one is in the table of contents (not new) shift(@posts_new); } # if anything is left in @posts_new, those are the new posts if (@posts_new) { $message_body = format4email($blogname, \@posts_new); notify($blogname, $message_body) unless ($nomail); append2toc($blogname, \@posts_new); } } else { # create TOC with N recent entries # for this one case, we ignore the question of # "new since last checked" unless ($testmode) { append2toc($blogname, \@posts_byid); } } } END { close(LOG); } # append2toc # append N recent entries to stored Table of Contents for weblog $blogname # sub append2toc { my ($blogname, $posts) = @_; open (OUT, ">>$blogdir/$blogname"); for my $postref (@$posts) { # $postref is a HASH REF # for my $key (keys %$postref) { # print $key, ":\t", ${$postref}{$key}, "\n"; # } my ($postdate) = split(/T/, ${$postref}{dateCreated}); printf OUT ("%-8d %s %s\n", ${$postref}{postid}, $postdate, ${$postref}{title}); } close (OUT); } # format4email # create an email message to notify readers of recent updates # # Recent updates to $blogname weblog: # # # ${$postref}{title} # ${$postref}{mt_excerpt}... # Posting Date: $postdate # # Read more at ${$postref}{permaLink} sub format4email { my ($blogname, $posts) = @_; my $blogtitle = $weblogs{$blogname}{title}; my $message_body = "Recent updates to $blogtitle weblog:\n\n"; my ($postdate, $posted_by, $excerpt); for my $postref (@$posts) { ($postdate) = split(/T/, ${$postref}{dateCreated}); $posted_by = ${$postref}{userid}; # numeric ID if (defined($weblogs{$blogname}{authors}{$posted_by})) { $posted_by = $weblogs{$blogname}{authors}{$posted_by}; } print LOG "new entry: $postdate $posted_by ${$postref}{title}\n"; if (${$postref}{mt_excerpt}) { $excerpt = ${$postref}{mt_excerpt}; } else { # use first $excerptsz words of description, # after stripping out HTML goo my $tree = HTML::TreeBuilder->new; # empty tree $tree->parse(${$postref}{description}); # parse description text # $tree->dump; my $formatter = HTML::FormatText->new; my $description = $formatter->format($tree); $tree = $tree->delete; $description =~ s/\[IMAGE\]//g; my @words = split(/\s+/, $description); @words = splice(@words, 0, $excerptsz); $excerpt = wrap('', '', @words); } chomp($excerpt); # just in case... $message_body .= "${$postref}{title}\n"; $message_body .= "Posted on $postdate by $posted_by\n\n"; $message_body .= " $excerpt ...\n\n"; $message_body .= "Read more at ${$postref}{permaLink}\n\n"; } return($message_body); } # notify # send the email message prepared by format4email() # sub notify { my ($blogname, $message_body) = @_; my $blogtitle = $weblogs{$blogname}{title}; my $mailer; unless ($testmode) { $mailer = Mail::Mailer->new; $mailer->open({ From => $weblogs{$blogname}{mail_from}, To => $weblogs{$blogname}{mail_to}, Subject => "$blogtitle Update", }) or die "Can't open mail: $!\n"; print $mailer $message_body; $mailer->close; } else { print STDOUT $message_body; } } sub dumprecent { my ($blogname, $posts) = @_; my $descr; $columns = 70; print LOG "TEST MODE\n"; for my $postref (@$posts) { for my $key (sort(keys %$postref)) { if ($key eq 'description') { my @words = split(/\s+/, ${$postref}{$key}); @words = splice(@words, 0, 5); $descr = wrap('', '', @words); print $key, ": ", $descr, "...\n"; } else { print $key, ": ", ${$postref}{$key}, "\n"; if ($key eq 'title') { print LOG "new entry: ${$postref}{title}\n"; } } } print "\n\n"; } }