""" gna2atom.py Parses Google News Alerts received via e-mail and processes them. Includes functionality to post to an Atom-enabled blog, and create an Atom feed suitable for syndication. copyright 2004, Nathan R. Yergler, Creative Commons Licensed under the GNU GPL 2. $Id: gna2atom.py,v 1.7 2004/11/08 02:48:11 nyergler Exp $ """ import poplib import urlparse import email import smtplib import urllib2 import urllib mail_server = 'mail' username = 'xxx' password = 'xxx' gna_from = 'Google Alerts ' class NewsAlert: def __init__(self, alerttext): self.title = self.summary = self.url = self.author = '' self.parseMessage(alerttext) def parseMessage(self, message): """Extract """ lines = message.split('\n') self.title = lines[0] self.author = lines[1] # determine the line containing the article url if (len(lines[-3]) > 8) and lines[-3][:8] == ' %s %s %s %s %s %s %s """ % (self.title, self.summary, self.author_name, self.author_url, self.issued, self.created, self.link, self.content) ############################################################### # # POP3 support functions # def connect(server, username, passwd): """Connect to the specified server using the username and password; return the server instance.""" server = poplib.POP3(server) print server.user(username) print server.pass_(passwd) return server def gnaMessages(server, delete=False): """Retrieve messages from the server; if the message is from the Google news service, yield the NewsAlert instance. If [delete] is True, delete the message once control returns to this function. After all messages have been processed, disconnect from the server.""" num_msgs = server.stat()[0] # for each message on the server for i in range(num_msgs): # retrieve the lines msg = email.message_from_string("\n".join(server.retr(i+1)[1]) ) if msg['From'] == gna_from: # extract the message payload segments = msg.get_payload() # check if list or string try: segments.append('foo') del segments[-1] except: segments = [segments] for segment in segments: if segment.get_content_type() == 'text/plain': # plain text segment; break on blank lines msg_text = segment.get_payload() msg_text = "\n".join(msg_text.split("\n")[:-12]) for alert in msg_text.split('\n\n'): aentry = NewsAlert(alert) yield aentry if delete: # delete the message server.dele(i+1) # disconnect from the server server.quit() ############################################################### # # Google News Alert message handlers # def nullHandler(msg): """NewsAlert handler which performs no processing. A handler should require a single parameter, an instance of NewsAlert. """ def printMessage(msg): print msg.title print msg.author print msg.summary print msg.url print def SimpleBlogHandler(msg): POSTDRAFT_URL = 'http://apps.creativecommons.org:7080/blog/press/postdraft' SMTP_SERVER = 'localhost' SEND_TO = ('foo@bar.com',) post_data = {'title':msg.title, 'bodytext':"""%s
%s""" % ( msg.summary, msg.url, msg.url), 'username':"Google News Import", } result = urllib2.urlopen(POSTDRAFT_URL, data=urllib.urlencode(post_data)).read() # send an email to the specified parties to # let them know a draft was created alert_msg = "From: gnaimport@yergler.net\r\nTo: %s\r\nSubject: Google News Alert draft blog entry created\r\n\r\nA news alert has been imported into the blog. The URL is:\r\n\r\n%s\r\n\r\n" % (", ".join(SEND_TO), result) server = smtplib.SMTP(SMTP_SERVER) server.sendmail('gnaimport@yergler.net', SEND_TO, alert_msg) server.quit() print result ############################################################### # # main control functions # if __name__ == '__main__': # set up the message handlers # handlers = (nullHandler, printMessage, SimpleBlogHandler) handlers = (nullHandler, SimpleBlogHandler) # connect to the mail server popserve = connect(mail_server, username, password) # retrieve waiting messages from the mail server for msg in gnaMessages(popserve, True): for handler in handlers: handler(msg)