first commit of port of @harrisj iron_ebooks

This commit is contained in:
Tom Meagher
2013-08-09 10:10:52 -04:00
commit fa1b2967c8
7 changed files with 246 additions and 0 deletions

2
.gitignore vendored Normal file
View File

@@ -0,0 +1,2 @@
*.pyc
.git

1
Procfile Normal file
View File

@@ -0,0 +1 @@
worker: python ebooks.py

57
README.md Normal file
View File

@@ -0,0 +1,57 @@
# Heroku_ebooks
This is a basic Python port of @harrisj's [iron_ebooks](https://github.com/harrisj/iron_ebooks/) Ruby script. Using Heroku's scheduler, you can post to an _ebooks Twitter account based on the corpus of an existing Twitter at pseudorandom intervals. Currently, it is the magic behind [@adriennelaf_ebx](http://www.twitter.com/adriennelaf_ebx) and [@stevebuttry_ebx](http://www.twitter.com/stevebuttry_ebx).
## Setup
1. Clone this repo
2. Create a Twitter account that you will post to.
3. Sign into (https://dev.twitter.com/apps) with the same login and create an application.
4. Make a copy of the `local_settings_example.py` file and name it `local_settings.py`
5. Take the consumer key (and secret) and access token (and secret) from your Twiter application and paste them into the appropriate spots in `local_settings.py`.
6. In `local_settings.py`, be sure to add the handle of the Twitter user you want your _ebooks account to be based on. To make your tweets go live, change the `DEBUG` variable to `False`.
7. Create an account at Heroku, if you don't already have one. [Install the Heroku toolbelt](https://devcenter.heroku.com/articles/quickstart#step-2-install-the-heroku-toolbelt) and set your Heroku login on the command line.
8. Type the command `heroku create` to generate the _ebooks Python app on the platform that you can schedule.
9. The only Python requirement for this script is the (python-twitter)[https://github.com/bear/python-twitter], the `pip install` of which is handled by Heroku automatically.
9. `git commit -am 'updated the local_settings.py'`
10. `git push heroku master`
11. Test your upload by typing `heroku run worker`. You should either get a response that says "3, no, sorry, not this time" or a message with the body of your post. If you get the latter, check your _ebooks Twitter account to see if it worked.
12. Now it's time to configure the scheduler. `heroku addons:add scheduler:standard`
13. Once that runs, type `heroku addons:open scheduler`. This will open up a browser window where you can adjust the time interval for the script to run. I recommend setting it at one hour.
14. Sit back and enjoy the fruits of your labor.
## Configuring
There are several parameters that control the behavior of the bot. You can adjust them by setting them in your `local_settings.py` file.
```
ODDS = 8
```
The bot does not run on every invocation. It runs in a pseudorandom fashion. At the beginning of each time the script fires, `guess = random.choice(range(ODDS))`. If `guess == 0`, then it proceeds. If your `ODDS = 8`, it should run one out of every 8 times, more or less. You can override it to make it more or less frequent. To make it run every time, you can set it to 0.
By default, the bot ignores any tweets with URLs in them because those might just be headlines for articles and not text you've written.
```
$markov_index = 2
```
The Markov index is a measure of associativity in the generated Markov chains. According to @harrisj, and I'll take his word for it, 1 is generally more incoherent and 3 is more lucid.
## Debugging
You can also turn off the publication of tweets and skip the random number generation if you want to test the script to debug it.
First, adjust the DEBUG variable in `local_settings.py`.
```
DEBUG = True
```
After that, commit the change and `git push heroku master`. Then run the command `heroku run worker` on the command line and watch what happens.
## Credit
As I said, this is based almost entirely on @harrisj's [iron_ebooks](https://github.com/harrisj/iron_ebooks/). He created it in Ruby, and I wanted to port it to Python. All the credit goes to him.
As a result, all of the blame for clunky implementation in Python fall on me. If you see ways to improve the code, please fork it and send a pull request, or file an issue for me and I'll address it.

92
ebooks.py Normal file
View File

@@ -0,0 +1,92 @@
import random
import markov
import twitter
import re
import sys
from local_settings import *
def connect():
api = twitter.Api(consumer_key=MY_CONSUMER_KEY,
consumer_secret=MY_CONSUMER_SECRET,
access_token_key=MY_ACCESS_TOKEN_KEY,
access_token_secret=MY_ACCESS_TOKEN_SECRET)
return api
def filter_tweet(tweet):
tweet.text = re.sub(r'\b(RT|MT) .+','',tweet.text) #take out anything after RT or MT
tweet.text = re.sub(r'(\#|@|(h\/t)|(http))\S+','',tweet.text) #Take out URLs, hashtags, hts, etc.
tweet.text = re.sub(r'\n','', tweet.text) #take out new lines.
tweet.text = re.sub(r'\"|\(|\)', '', tweet.text) #take out quotes.
return tweet.text
def grab_tweets(api, max_id=None):
source_tweets=[]
user_tweets = api.GetUserTimeline(screen_name=user, count=200, max_id=max_id, include_rts=True, trim_user=True, exclude_replies=True)
max_id = user_tweets[len(user_tweets)-1].id-1
for tweet in user_tweets:
tweet.text = filter_tweet(tweet)
if len(tweet.text) != 0:
source_tweets.append(tweet.text)
return source_tweets, max_id
if __name__=="__main__":
order = ORDER
if DEBUG==False:
guess = random.choice(range(ODDS))
else:
guess = 0
if guess == 0:
user=SOURCE_ACCOUNT
source_tweets = []
api=connect()
max_id=None
for x in range(17)[1:]:
source_tweets_iter, max_id = grab_tweets(api,max_id)
source_tweets += source_tweets_iter
print "{0} tweets found".format(len(source_tweets))
if len(source_tweets) == 0:
print "Error fetching tweets from Twitter. Aborting."
sys.exit()
mine = markov.MarkovChainer(order)
for tweet in source_tweets:
mine.add_text(tweet)
for x in range(0,10):
ebook_tweet = mine.generate_sentence()
#randomly drop the last word, as Horse_ebooks appears to do.
if random.randint(0,4) == 0 and re.search(r'(in|to|from|for|with|by|our|of|your|around|under|beyond)\s\w+$', ebook_tweet) != None:
print "Losing last word randomly"
ebook_tweet = re.sub(r'\s\w+.$','',ebook_tweet)
print ebook_tweet
#if a tweet is very short, this will randomly add a second sentence to it.
if ebook_tweet != None and len(ebook_tweet) < 40 and random.randint(0,5) == 0:
print "Short tweet. Adding another sentence randomly"
newer_tweet = mine.generate_sentence()
if newer_tweet != None:
ebook_tweet += " " + mine.generate_sentence()
else:
ebook_tweet = ebook_tweet
#throw out tweets that match anything from the source account.
if ebook_tweet != None and len(ebook_tweet) < 110:
for tweet in source_tweets:
if ebook_tweet[:-1] not in tweet:
continue
else:
print "TOO SIMILAR: " + ebook_tweet
sys.exit()
if DEBUG == False:
status = api.PostUpdate(ebook_tweet)
print status.text
else:
print ebook_tweet
elif ebook_tweet == None:
print "Tweet is empty, sorry."
else:
print "TOO LONG: " + ebook_tweet
else:
print str(guess) + " No, sorry, not this time." #message if the random number fails.

10
local_settings_example.py Normal file
View File

@@ -0,0 +1,10 @@
#configuration
MY_CONSUMER_KEY = 'Your Twitter API Consumer Key'
MY_CONSUMER_SECRET = 'Your Consumer Secret Key'
MY_ACCESS_TOKEN_KEY = 'Your Twitter API Access Token Key'
MY_ACCESS_TOKEN_SECRET = 'Your Access Token Secret'
SOURCE_ACCOUNT = "" #The Twitter handle of the account that you'll generate tweets based on.
ODDS = 8 #How often do you want this to run? 1/8 times?
ORDER = 2 #how closely do you want this to hew to sensical? 1 is low and 3 is high.
DEBUG = True #Set this to False to start Tweeting live

83
markov.py Normal file
View File

@@ -0,0 +1,83 @@
import random
import re
class MarkovChainer(object):
def __init__(self, order):
self.order=order
self.beginnings = []
self.freq = {}
#pass a string with a terminator to the function to add it to the markov lists.
def add_sentence(self, string, terminator):
data = "".join(string)
words = data.split()
buf = []
if len(words) > self.order:
words.append(terminator)
self.beginnings.append(words[0:self.order])
else:
pass
for word in words:
buf.append(word)
if len(buf) == self.order + 1:
mykey = (buf[0], buf[-2])
if mykey in self.freq:
self.freq[mykey].append(buf[-1])
else:
self.freq[mykey] = [buf[-1]]
buf.pop(0)
else:
continue
return
def add_text(self, text):
text = re.sub(r'\n\s*\n/m', ".", text)
seps = '([.!?;])'
pieces = re.split(seps, text)
sentence = ""
for piece in pieces:
if piece != "":
if re.search(seps, piece):
self.add_sentence(sentence, piece)
sentence = ""
else:
sentence = piece
#Generate the goofy sentences that become your tweet.
def generate_sentence(self):
res = random.choice(self.beginnings)
res = res[:]
if len(res)==self.order:
nw = True
while nw != None:
restup = (res[-2], res[-1])
try:
nw = self.next_word_for(restup)
if nw != None:
res.append(nw)
else:
continue
except:
nw = False
new_res = res[0:-2]
new_res[0] = new_res[0].capitalize()
sentence = ""
for word in new_res:
sentence += word + " "
sentence += res[-2] + res[-1]
else:
sentence = None
return sentence
def next_word_for(self, words):
try:
arr = self.freq[words]
next_words = random.choice(arr)
return next_words
except:
return None
if __name__ == "__main__":
print "Try running ebooks.py first"

1
requirements.txt Normal file
View File

@@ -0,0 +1 @@
python-twitter