diff --git a/Gemfile b/Gemfile index 8c89053..b9658f6 100644 --- a/Gemfile +++ b/Gemfile @@ -6,3 +6,4 @@ gem "twitter" gem "typhoeus" gem "iron_worker_ng" gem 'punkt-segmenter' +gem 'htmlentities' diff --git a/Gemfile.lock b/Gemfile.lock index 903a9ee..3de9327 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -7,6 +7,7 @@ GEM faraday (0.8.7) multipart-post (~> 1.1) ffi (1.9.0) + htmlentities (4.3.1) iron_core (0.6.2) rest (>= 2.2.0) iron_worker_ng (0.16.4) @@ -38,6 +39,7 @@ PLATFORMS ruby DEPENDENCIES + htmlentities iron_worker_ng punkt-segmenter twitter diff --git a/ebook.rb b/ebook.rb index b828b82..8c26b02 100644 --- a/ebook.rb +++ b/ebook.rb @@ -5,6 +5,7 @@ require 'twitter' require 'punkt-segmenter' require 'twitter_init' require 'markov' +require 'htmlentities' source_tweets = [] @@ -26,9 +27,10 @@ def random_closing_punctuation end def filtered_tweets(tweets) + html_decoder = HTMLEntities.new include_urls = $include_urls || params["include_urls"] include_replies = $include_replies || params["include_replies"] - source_tweets = tweets.map {|t| t.text.gsub(/\b(RT|MT) .+/, '') } + source_tweets = tweets.map {|t| html_decoder.decode(t.text).gsub(/\b(RT|MT) .+/, '') } if !include_urls source_tweets = source_tweets.reject {|t| t =~ /(https?:\/\/)/ } @@ -53,19 +55,20 @@ unless rand_key == 0 || params["force"] else # Fetch a thousand tweets begin - user_tweets = Twitter.user_timeline($source_account, :count => 200, :trim_user => true, :exclude_replies => false, :include_rts => false) + user_tweets = Twitter.user_timeline($source_account, :count => 200, :trim_user => true, :include_rts => false) max_id = user_tweets.last.id source_tweets += filtered_tweets(user_tweets) # Twitter only returns up to 3200 of a user timeline, includes retweets. 17.times do - user_tweets = Twitter.user_timeline($source_account, :count => 200, :trim_user => true, :max_id => max_id - 1, :exclude_replies => false, :include_rts => false) + user_tweets = Twitter.user_timeline($source_account, :count => 200, :trim_user => true, :max_id => max_id - 1, :include_rts => false) puts "MAX_ID #{max_id} TWEETS: #{user_tweets.length}" break if user_tweets.last.nil? max_id = user_tweets.last.id source_tweets += filtered_tweets(user_tweets) end - rescue + rescue => ex + puts ex.message end puts "#{source_tweets.length} tweets found" diff --git a/ebook.worker b/ebook.worker index e567c15..f0209eb 100644 --- a/ebook.worker +++ b/ebook.worker @@ -8,3 +8,4 @@ file "markov.rb" gem 'twitter' gem 'punkt-segmenter' +gem 'htmlentities'