diff --git a/ebook.rb b/ebook.rb index 68860d0..3d9d10f 100644 --- a/ebook.rb +++ b/ebook.rb @@ -18,6 +18,17 @@ end rand_key = rand($rand_limit) +def filtered_tweets(tweets) + include_urls = $include_urls || params["include_urls"] + source_tweets = tweets.map {|t| t.text.gsub(/\b(RT|MT) .+/, '') } + + if !include_urls + source_tweets = source_tweets.reject {|t| t =~ /(https?:\/\/)/ } + end + + source_tweets.map {|t| t.gsub(/(\#|@|(h\/t)|(http))\S+/, '') } +end + # randomly running only about 1 in $rand_limit times unless rand_key == 0 || params["force"] puts "Not running this time (key: #{rand_key})" @@ -26,31 +37,34 @@ else begin user_tweets = Twitter.user_timeline($source_account, :count => 200, :trim_user => true, :exclude_replies => true, :include_rts => false) max_id = user_tweets.last.id - source_tweets += user_tweets.reject {|t| t.text =~ /(https?:\/\/)/ }.map {|t| t.text.gsub(/\b(RT|MT) .+/, '') } + source_tweets += filtered_tweets(user_tweets) # Twitter only returns up to 3200 of a user timeline, includes retweets. 17.times do user_tweets = Twitter.user_timeline($source_account, :count => 200, :trim_user => true, :max_id => max_id - 1, :exclude_replies => true, :include_rts => false) puts "MAX_ID #{max_id} TWEETS: #{user_tweets.length}" max_id = user_tweets.last.id - source_tweets += user_tweets.reject {|t| t.text =~ /(https?:\/\/)/ }.map {|t| t.text.gsub(/\b(RT|MT) .+/, '') } + source_tweets += filtered_tweets(user_tweets) end rescue end puts "#{source_tweets.length} tweets found" + + if source_tweets.length == 0 + raise "Error fetching tweets from Twitter. Aborting." + end markov = MarkovChainer.new($markov_index) source_tweets.each do |twt| text = twt - text.gsub!(/\#[\w\d]+/, '') # remove hashtags markov.add_text(text) end tweet = nil - 5.times do + 10.times do tweet = markov.generate_sentence if rand(3) == 0 && tweet =~ /(in|to|from|for|with|by|our|of|your|around|under|beyond)\s\w+$/