From c201b07a60ee1a3e669a74db942370186d0939d5 Mon Sep 17 00:00:00 2001 From: Jacob Harris Date: Tue, 9 Jul 2013 21:54:44 -0400 Subject: [PATCH] Fixes for launch --- ebook.rb | 51 +++++++++++++++++++++++++++++++++++++++++++++++ ebook.worker | 9 +++++++++ markov.rb | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 116 insertions(+) create mode 100644 ebook.rb create mode 100644 ebook.worker create mode 100644 markov.rb diff --git a/ebook.rb b/ebook.rb new file mode 100644 index 0000000..7f92980 --- /dev/null +++ b/ebook.rb @@ -0,0 +1,51 @@ +# Worker code can be anything you want. +require 'rubygems' +require 'twitter' + +require 'twitter_init' +require 'markov' + +source_tweets = [] + +$rand_limit ||= 10 + +# randomly running only about 1 in $rand_limit times +unless rand($rand_limit) == 0 + puts "Not running this time" +else + # Fetch a thousand tweets + begin + user_tweets = Twitter.user_timeline($source_account, :count => 200, :trim_user => true, :exclude_replies => false, :include_replies => true) + max_id = user_tweets.last.id + source_tweets += user_tweets.reject {|t| t.text =~ /(http:\/\/)|(\bRT\b)|(\bMT\b)|@/ } + + 25.times do + user_tweets = Twitter.user_timeline($source_account, :count => 200, :trim_user => true, :max_id => max_id - 1, :exclude_replies => false, :include_replies => true) + max_id = user_tweets.last.id + source_tweets += user_tweets.reject {|t| t.text =~ /(http:\/\/)|(\bRT\b)|(\bMT\b)|@/ } + end + rescue + end + + puts "#{source_tweets.length} tweets found" + + markov = MarkovChainer.new(2) + + source_tweets.each do |twt| + markov.add_text(twt.text) + end + + tweet = nil + + 5.times do + tweet = markov.generate_sentence + break if !source_tweets.any? {|t| t.text == tweet } + end + + puts "TWEET: #{tweet}" + + unless tweet.nil? || tweet == '' + Twitter.update(tweet) + end +end + diff --git a/ebook.worker b/ebook.worker new file mode 100644 index 0000000..33ff648 --- /dev/null +++ b/ebook.worker @@ -0,0 +1,9 @@ +# define the runtime language +runtime "ruby" +# exec is the file that will be executed: +exec "ebook.rb" + +file "twitter_init.rb" +file "markov.rb" + +gem 'twitter' diff --git a/markov.rb b/markov.rb new file mode 100644 index 0000000..f527b4a --- /dev/null +++ b/markov.rb @@ -0,0 +1,56 @@ +class MarkovChainer + attr_reader :order + def initialize(order) + @order = order + @beginnings = [] + @freq = {} + end + + def add_text(text) + # make sure each paragraph ends with some sentence terminator + text.gsub!(/\n\s*\n/m, ".") + text << "." + seps = /([.!?;])/ + sentence = "" + text.split(seps).each { |p| + if seps =~ p + add_sentence(sentence, p) + sentence = "" + else + sentence = p + end + } + end + + def generate_sentence + res = @beginnings[rand(@beginnings.size)] + return nil if res.nil? + loop { + unless nw = next_word_for(res[-order, order]) + return res[0..-2].join(" ") + res.last + end + res << nw + } + end + +private + def add_sentence(str, terminator) + words = str.scan(/[\w'\-]+/) + return unless words.size > order # ignore short sentences + words << terminator + buf = [] + words.each { |w| + buf << w + if buf.size == order + 1 + (@freq[buf[0..-2]] ||= []) << buf[-1] + buf.shift + end + } + @beginnings << words[0, order] + end + + def next_word_for(words) + arr = @freq[words] + arr && arr[rand(arr.size)] + end +end