commit fa1b2967c81bc57fddd64820d8ac9e6446065515
Author: Tom Meagher <hello+git@tommeagher.com>
Date:   Fri Aug 9 10:10:52 2013 -0400

    first commit of port of @harrisj iron_ebooks

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..10bd2a7
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+*.pyc
+.git
\ No newline at end of file
diff --git a/Procfile b/Procfile
new file mode 100644
index 0000000..a3de0c5
--- /dev/null
+++ b/Procfile
@@ -0,0 +1 @@
+worker: python ebooks.py
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..e0a7165
--- /dev/null
+++ b/README.md
@@ -0,0 +1,57 @@
+# Heroku_ebooks
+
+This is a basic Python port of @harrisj's [iron_ebooks](https://github.com/harrisj/iron_ebooks/) Ruby script. Using Heroku's scheduler, you can post to an _ebooks Twitter account based on the corpus of an existing Twitter at pseudorandom intervals. Currently, it is the magic behind [@adriennelaf_ebx](http://www.twitter.com/adriennelaf_ebx) and [@stevebuttry_ebx](http://www.twitter.com/stevebuttry_ebx).
+
+## Setup
+
+1. Clone this repo
+2. Create a Twitter account that you will post to.
+3. Sign into (https://dev.twitter.com/apps) with the same login and create an application.
+4. Make a copy of the `local_settings_example.py` file and name it `local_settings.py`
+5. Take the consumer key (and secret) and access token (and secret) from your Twiter application and paste them into the appropriate spots in `local_settings.py`.
+6. In `local_settings.py`, be sure to add the handle of the Twitter user you want your _ebooks account to be based on. To make your tweets go live, change the `DEBUG` variable to `False`.
+7. Create an account at Heroku, if you don't already have one. [Install the Heroku toolbelt](https://devcenter.heroku.com/articles/quickstart#step-2-install-the-heroku-toolbelt) and set your Heroku login on the command line.
+8. Type the command `heroku create` to generate the _ebooks Python app on the platform that you can schedule.
+9. The only Python requirement for this script is the (python-twitter)[https://github.com/bear/python-twitter], the `pip install` of which is handled by Heroku automatically.
+9. `git commit -am 'updated the local_settings.py'`
+10. `git push heroku master`
+11. Test your upload by typing `heroku run worker`. You should either get a response that says "3, no, sorry, not this time" or a message with the body of your post. If you get the latter, check your _ebooks Twitter account to see if it worked.
+12. Now it's time to configure the scheduler. `heroku addons:add scheduler:standard`
+13. Once that runs, type `heroku addons:open scheduler`. This will open up a browser window where you can adjust the time interval for the script to run. I recommend setting it at one hour.
+14. Sit back and enjoy the fruits of your labor.
+
+
+## Configuring
+
+There are several parameters that control the behavior of the bot. You can adjust them by setting them in your `local_settings.py` file. 
+
+```
+ODDS = 8
+```
+
+The bot does not run on every invocation. It runs in a pseudorandom fashion. At the beginning of each time the script fires, `guess = random.choice(range(ODDS))`. If `guess == 0`, then it proceeds. If your `ODDS = 8`, it should run one out of every 8 times, more or less. You can override it to make it more or less frequent. To make it run every time, you can set it to 0.
+
+
+By default, the bot ignores any tweets with URLs in them because those might just be headlines for articles and not text you've written.
+
+```
+$markov_index = 2
+```
+
+The Markov index is a measure of associativity in the generated Markov chains. According to @harrisj, and I'll take his word for it, 1 is generally more incoherent and 3 is more lucid.
+
+## Debugging
+
+You can also turn off the publication of tweets and skip the random number generation if you want to test the script to debug it.
+
+First, adjust the DEBUG variable in `local_settings.py`.
+
+```
+DEBUG = True 
+```
+
+After that, commit the change and `git push heroku master`. Then run the command `heroku run worker` on the command line and watch what happens.
+
+## Credit
+As I said, this is based almost entirely on @harrisj's [iron_ebooks](https://github.com/harrisj/iron_ebooks/). He created it in Ruby, and I wanted to port it to Python. All the credit goes to him.
+As a result, all of the blame for clunky implementation in Python fall on me. If you see ways to improve the code, please fork it and send a pull request, or file an issue for me and I'll address it.
\ No newline at end of file
diff --git a/ebooks.py b/ebooks.py
new file mode 100644
index 0000000..cd9ea3e
--- /dev/null
+++ b/ebooks.py
@@ -0,0 +1,92 @@
+import random
+import markov
+import twitter
+import re
+import sys
+from local_settings import *
+
+def connect():
+    api = twitter.Api(consumer_key=MY_CONSUMER_KEY,
+                          consumer_secret=MY_CONSUMER_SECRET,
+                          access_token_key=MY_ACCESS_TOKEN_KEY,
+                          access_token_secret=MY_ACCESS_TOKEN_SECRET)
+    return api
+
+def filter_tweet(tweet):
+    tweet.text = re.sub(r'\b(RT|MT) .+','',tweet.text) #take out anything after RT or MT
+    tweet.text = re.sub(r'(\#|@|(h\/t)|(http))\S+','',tweet.text) #Take out URLs, hashtags, hts, etc.
+    tweet.text = re.sub(r'\n','', tweet.text) #take out new lines.
+    tweet.text = re.sub(r'\"|\(|\)', '', tweet.text) #take out quotes.
+    return tweet.text
+                                                    
+def grab_tweets(api, max_id=None):
+    source_tweets=[]
+    user_tweets = api.GetUserTimeline(screen_name=user, count=200, max_id=max_id, include_rts=True, trim_user=True, exclude_replies=True)
+    max_id = user_tweets[len(user_tweets)-1].id-1
+    for tweet in user_tweets:
+        tweet.text = filter_tweet(tweet)
+        if len(tweet.text) != 0:
+            source_tweets.append(tweet.text)
+    return source_tweets, max_id
+
+if __name__=="__main__":
+    order = ORDER
+    if DEBUG==False:
+        guess = random.choice(range(ODDS))
+    else:
+        guess = 0
+
+    if guess == 0:
+        user=SOURCE_ACCOUNT   
+        source_tweets = []
+        api=connect()
+        max_id=None
+        for x in range(17)[1:]:
+            source_tweets_iter, max_id = grab_tweets(api,max_id)
+            source_tweets += source_tweets_iter
+        print "{0} tweets found".format(len(source_tweets))
+        if len(source_tweets) == 0:
+            print "Error fetching tweets from Twitter. Aborting."
+            sys.exit()
+        mine = markov.MarkovChainer(order)
+        for tweet in source_tweets:
+            mine.add_text(tweet)
+        for x in range(0,10):
+            ebook_tweet = mine.generate_sentence()
+
+        #randomly drop the last word, as Horse_ebooks appears to do.
+        if random.randint(0,4) == 0 and re.search(r'(in|to|from|for|with|by|our|of|your|around|under|beyond)\s\w+$', ebook_tweet) != None: 
+           print "Losing last word randomly"
+           ebook_tweet = re.sub(r'\s\w+.$','',ebook_tweet) 
+           print ebook_tweet
+    
+        #if a tweet is very short, this will randomly add a second sentence to it.
+        if ebook_tweet != None and len(ebook_tweet) < 40 and random.randint(0,5) == 0: 
+            print "Short tweet. Adding another sentence randomly"
+            newer_tweet = mine.generate_sentence()
+            if newer_tweet != None:
+                ebook_tweet += " " + mine.generate_sentence()
+            else:
+                ebook_tweet = ebook_tweet
+
+        #throw out tweets that match anything from the source account.
+        if ebook_tweet != None and len(ebook_tweet) < 110:
+            for tweet in source_tweets:
+                if ebook_tweet[:-1] not in tweet:
+                    continue
+                else: 
+                    print "TOO SIMILAR: " + ebook_tweet
+                    sys.exit()
+                          
+            if DEBUG == False:
+                status = api.PostUpdate(ebook_tweet)
+                print status.text
+            else:
+                print ebook_tweet
+
+        elif ebook_tweet == None:
+            print "Tweet is empty, sorry."
+        else:
+            print "TOO LONG: " + ebook_tweet
+    else:
+        print str(guess) + " No, sorry, not this time." #message if the random number fails.
\ No newline at end of file
diff --git a/local_settings_example.py b/local_settings_example.py
new file mode 100644
index 0000000..fa4602a
--- /dev/null
+++ b/local_settings_example.py
@@ -0,0 +1,10 @@
+#configuration
+MY_CONSUMER_KEY = 'Your Twitter API Consumer Key'
+MY_CONSUMER_SECRET = 'Your Consumer Secret Key'
+MY_ACCESS_TOKEN_KEY = 'Your Twitter API Access Token Key'
+MY_ACCESS_TOKEN_SECRET = 'Your Access Token Secret'
+
+SOURCE_ACCOUNT = "" #The Twitter handle of the account that you'll generate tweets based on.
+ODDS = 8 #How often do you want this to run? 1/8 times?
+ORDER = 2 #how closely do you want this to hew to sensical? 1 is low and 3 is high.
+DEBUG = True #Set this to False to start Tweeting live
\ No newline at end of file
diff --git a/markov.py b/markov.py
new file mode 100644
index 0000000..b16a5f5
--- /dev/null
+++ b/markov.py
@@ -0,0 +1,83 @@
+import random
+import re
+
+class MarkovChainer(object):
+    def __init__(self, order):
+        self.order=order
+        self.beginnings = []
+        self.freq = {}
+
+    #pass a string with a terminator to the function to add it to the markov lists.
+    def add_sentence(self, string, terminator):
+        data = "".join(string)
+        words = data.split()
+        buf = []
+        if len(words) > self.order:
+            words.append(terminator)
+            self.beginnings.append(words[0:self.order])    
+        else:
+            pass
+        
+        for word in words:
+            buf.append(word)
+            if len(buf) == self.order + 1:
+                mykey = (buf[0], buf[-2])
+                if mykey in self.freq:
+                    self.freq[mykey].append(buf[-1])
+                else:
+                    self.freq[mykey] = [buf[-1]]
+                buf.pop(0)
+            else:
+                continue
+        return
+
+    def add_text(self, text):
+        text = re.sub(r'\n\s*\n/m', ".", text)
+        seps = '([.!?;])'
+        pieces = re.split(seps, text)
+        sentence = ""
+        for piece in pieces:
+            if piece != "":
+                if re.search(seps, piece):
+                    self.add_sentence(sentence, piece)
+                    sentence = ""
+                else:
+                    sentence = piece
+
+    #Generate the goofy sentences that become your tweet.
+    def generate_sentence(self):
+        res = random.choice(self.beginnings)
+        res = res[:]
+        if len(res)==self.order:
+            nw = True
+            while nw != None:
+                restup = (res[-2], res[-1])
+                try:
+                    nw = self.next_word_for(restup)
+                    if nw != None:
+                        res.append(nw)
+                    else:
+                        continue
+                except:
+                    nw = False
+            new_res = res[0:-2]
+            new_res[0] = new_res[0].capitalize()
+            sentence = ""
+            for word in new_res:
+                sentence += word + " "
+            sentence += res[-2] + res[-1]
+
+        else:
+            sentence = None
+        return sentence
+
+    def next_word_for(self, words):
+        try:
+            arr = self.freq[words]
+            next_words = random.choice(arr)
+            return next_words
+        except:
+            return None        
+
+if __name__ == "__main__":
+    print "Try running ebooks.py first"
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..4658fe9
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1 @@
+python-twitter
\ No newline at end of file