commit fa1b2967c81bc57fddd64820d8ac9e6446065515 Author: Tom Meagher Date: Fri Aug 9 10:10:52 2013 -0400 first commit of port of @harrisj iron_ebooks diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..10bd2a7 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +*.pyc +.git \ No newline at end of file diff --git a/Procfile b/Procfile new file mode 100644 index 0000000..a3de0c5 --- /dev/null +++ b/Procfile @@ -0,0 +1 @@ +worker: python ebooks.py diff --git a/README.md b/README.md new file mode 100644 index 0000000..e0a7165 --- /dev/null +++ b/README.md @@ -0,0 +1,57 @@ +# Heroku_ebooks + +This is a basic Python port of @harrisj's [iron_ebooks](https://github.com/harrisj/iron_ebooks/) Ruby script. Using Heroku's scheduler, you can post to an _ebooks Twitter account based on the corpus of an existing Twitter at pseudorandom intervals. Currently, it is the magic behind [@adriennelaf_ebx](http://www.twitter.com/adriennelaf_ebx) and [@stevebuttry_ebx](http://www.twitter.com/stevebuttry_ebx). + +## Setup + +1. Clone this repo +2. Create a Twitter account that you will post to. +3. Sign into (https://dev.twitter.com/apps) with the same login and create an application. +4. Make a copy of the `local_settings_example.py` file and name it `local_settings.py` +5. Take the consumer key (and secret) and access token (and secret) from your Twiter application and paste them into the appropriate spots in `local_settings.py`. +6. In `local_settings.py`, be sure to add the handle of the Twitter user you want your _ebooks account to be based on. To make your tweets go live, change the `DEBUG` variable to `False`. +7. Create an account at Heroku, if you don't already have one. [Install the Heroku toolbelt](https://devcenter.heroku.com/articles/quickstart#step-2-install-the-heroku-toolbelt) and set your Heroku login on the command line. +8. Type the command `heroku create` to generate the _ebooks Python app on the platform that you can schedule. +9. The only Python requirement for this script is the (python-twitter)[https://github.com/bear/python-twitter], the `pip install` of which is handled by Heroku automatically. +9. `git commit -am 'updated the local_settings.py'` +10. `git push heroku master` +11. Test your upload by typing `heroku run worker`. You should either get a response that says "3, no, sorry, not this time" or a message with the body of your post. If you get the latter, check your _ebooks Twitter account to see if it worked. +12. Now it's time to configure the scheduler. `heroku addons:add scheduler:standard` +13. Once that runs, type `heroku addons:open scheduler`. This will open up a browser window where you can adjust the time interval for the script to run. I recommend setting it at one hour. +14. Sit back and enjoy the fruits of your labor. + + +## Configuring + +There are several parameters that control the behavior of the bot. You can adjust them by setting them in your `local_settings.py` file. + +``` +ODDS = 8 +``` + +The bot does not run on every invocation. It runs in a pseudorandom fashion. At the beginning of each time the script fires, `guess = random.choice(range(ODDS))`. If `guess == 0`, then it proceeds. If your `ODDS = 8`, it should run one out of every 8 times, more or less. You can override it to make it more or less frequent. To make it run every time, you can set it to 0. + + +By default, the bot ignores any tweets with URLs in them because those might just be headlines for articles and not text you've written. + +``` +$markov_index = 2 +``` + +The Markov index is a measure of associativity in the generated Markov chains. According to @harrisj, and I'll take his word for it, 1 is generally more incoherent and 3 is more lucid. + +## Debugging + +You can also turn off the publication of tweets and skip the random number generation if you want to test the script to debug it. + +First, adjust the DEBUG variable in `local_settings.py`. + +``` +DEBUG = True +``` + +After that, commit the change and `git push heroku master`. Then run the command `heroku run worker` on the command line and watch what happens. + +## Credit +As I said, this is based almost entirely on @harrisj's [iron_ebooks](https://github.com/harrisj/iron_ebooks/). He created it in Ruby, and I wanted to port it to Python. All the credit goes to him. +As a result, all of the blame for clunky implementation in Python fall on me. If you see ways to improve the code, please fork it and send a pull request, or file an issue for me and I'll address it. \ No newline at end of file diff --git a/ebooks.py b/ebooks.py new file mode 100644 index 0000000..cd9ea3e --- /dev/null +++ b/ebooks.py @@ -0,0 +1,92 @@ +import random +import markov +import twitter +import re +import sys +from local_settings import * + +def connect(): + api = twitter.Api(consumer_key=MY_CONSUMER_KEY, + consumer_secret=MY_CONSUMER_SECRET, + access_token_key=MY_ACCESS_TOKEN_KEY, + access_token_secret=MY_ACCESS_TOKEN_SECRET) + return api + +def filter_tweet(tweet): + tweet.text = re.sub(r'\b(RT|MT) .+','',tweet.text) #take out anything after RT or MT + tweet.text = re.sub(r'(\#|@|(h\/t)|(http))\S+','',tweet.text) #Take out URLs, hashtags, hts, etc. + tweet.text = re.sub(r'\n','', tweet.text) #take out new lines. + tweet.text = re.sub(r'\"|\(|\)', '', tweet.text) #take out quotes. + return tweet.text + +def grab_tweets(api, max_id=None): + source_tweets=[] + user_tweets = api.GetUserTimeline(screen_name=user, count=200, max_id=max_id, include_rts=True, trim_user=True, exclude_replies=True) + max_id = user_tweets[len(user_tweets)-1].id-1 + for tweet in user_tweets: + tweet.text = filter_tweet(tweet) + if len(tweet.text) != 0: + source_tweets.append(tweet.text) + return source_tweets, max_id + +if __name__=="__main__": + order = ORDER + if DEBUG==False: + guess = random.choice(range(ODDS)) + else: + guess = 0 + + if guess == 0: + user=SOURCE_ACCOUNT + source_tweets = [] + api=connect() + max_id=None + for x in range(17)[1:]: + source_tweets_iter, max_id = grab_tweets(api,max_id) + source_tweets += source_tweets_iter + print "{0} tweets found".format(len(source_tweets)) + if len(source_tweets) == 0: + print "Error fetching tweets from Twitter. Aborting." + sys.exit() + mine = markov.MarkovChainer(order) + for tweet in source_tweets: + mine.add_text(tweet) + for x in range(0,10): + ebook_tweet = mine.generate_sentence() + + #randomly drop the last word, as Horse_ebooks appears to do. + if random.randint(0,4) == 0 and re.search(r'(in|to|from|for|with|by|our|of|your|around|under|beyond)\s\w+$', ebook_tweet) != None: + print "Losing last word randomly" + ebook_tweet = re.sub(r'\s\w+.$','',ebook_tweet) + print ebook_tweet + + #if a tweet is very short, this will randomly add a second sentence to it. + if ebook_tweet != None and len(ebook_tweet) < 40 and random.randint(0,5) == 0: + print "Short tweet. Adding another sentence randomly" + newer_tweet = mine.generate_sentence() + if newer_tweet != None: + ebook_tweet += " " + mine.generate_sentence() + else: + ebook_tweet = ebook_tweet + + #throw out tweets that match anything from the source account. + if ebook_tweet != None and len(ebook_tweet) < 110: + for tweet in source_tweets: + if ebook_tweet[:-1] not in tweet: + continue + else: + print "TOO SIMILAR: " + ebook_tweet + sys.exit() + + if DEBUG == False: + status = api.PostUpdate(ebook_tweet) + print status.text + else: + print ebook_tweet + + elif ebook_tweet == None: + print "Tweet is empty, sorry." + else: + print "TOO LONG: " + ebook_tweet + else: + print str(guess) + " No, sorry, not this time." #message if the random number fails. \ No newline at end of file diff --git a/local_settings_example.py b/local_settings_example.py new file mode 100644 index 0000000..fa4602a --- /dev/null +++ b/local_settings_example.py @@ -0,0 +1,10 @@ +#configuration +MY_CONSUMER_KEY = 'Your Twitter API Consumer Key' +MY_CONSUMER_SECRET = 'Your Consumer Secret Key' +MY_ACCESS_TOKEN_KEY = 'Your Twitter API Access Token Key' +MY_ACCESS_TOKEN_SECRET = 'Your Access Token Secret' + +SOURCE_ACCOUNT = "" #The Twitter handle of the account that you'll generate tweets based on. +ODDS = 8 #How often do you want this to run? 1/8 times? +ORDER = 2 #how closely do you want this to hew to sensical? 1 is low and 3 is high. +DEBUG = True #Set this to False to start Tweeting live \ No newline at end of file diff --git a/markov.py b/markov.py new file mode 100644 index 0000000..b16a5f5 --- /dev/null +++ b/markov.py @@ -0,0 +1,83 @@ +import random +import re + +class MarkovChainer(object): + def __init__(self, order): + self.order=order + self.beginnings = [] + self.freq = {} + + #pass a string with a terminator to the function to add it to the markov lists. + def add_sentence(self, string, terminator): + data = "".join(string) + words = data.split() + buf = [] + if len(words) > self.order: + words.append(terminator) + self.beginnings.append(words[0:self.order]) + else: + pass + + for word in words: + buf.append(word) + if len(buf) == self.order + 1: + mykey = (buf[0], buf[-2]) + if mykey in self.freq: + self.freq[mykey].append(buf[-1]) + else: + self.freq[mykey] = [buf[-1]] + buf.pop(0) + else: + continue + return + + def add_text(self, text): + text = re.sub(r'\n\s*\n/m', ".", text) + seps = '([.!?;])' + pieces = re.split(seps, text) + sentence = "" + for piece in pieces: + if piece != "": + if re.search(seps, piece): + self.add_sentence(sentence, piece) + sentence = "" + else: + sentence = piece + + #Generate the goofy sentences that become your tweet. + def generate_sentence(self): + res = random.choice(self.beginnings) + res = res[:] + if len(res)==self.order: + nw = True + while nw != None: + restup = (res[-2], res[-1]) + try: + nw = self.next_word_for(restup) + if nw != None: + res.append(nw) + else: + continue + except: + nw = False + new_res = res[0:-2] + new_res[0] = new_res[0].capitalize() + sentence = "" + for word in new_res: + sentence += word + " " + sentence += res[-2] + res[-1] + + else: + sentence = None + return sentence + + def next_word_for(self, words): + try: + arr = self.freq[words] + next_words = random.choice(arr) + return next_words + except: + return None + +if __name__ == "__main__": + print "Try running ebooks.py first" diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..4658fe9 --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +python-twitter \ No newline at end of file