mirror of
https://github.com/thewesker/iron_ebooks.git
synced 2025-12-20 04:11:12 -05:00
Filter out @usernames, #hashtags, and URLs
This commit is contained in:
22
ebook.rb
22
ebook.rb
@@ -18,6 +18,17 @@ end
|
|||||||
|
|
||||||
rand_key = rand($rand_limit)
|
rand_key = rand($rand_limit)
|
||||||
|
|
||||||
|
def filtered_tweets(tweets)
|
||||||
|
include_urls = $include_urls || params["include_urls"]
|
||||||
|
source_tweets = tweets.map {|t| t.text.gsub(/\b(RT|MT) .+/, '') }
|
||||||
|
|
||||||
|
if !include_urls
|
||||||
|
source_tweets = source_tweets.reject {|t| t =~ /(https?:\/\/)/ }
|
||||||
|
end
|
||||||
|
|
||||||
|
source_tweets.map {|t| t.gsub(/(\#|@|(h\/t)|(http))\S+/, '') }
|
||||||
|
end
|
||||||
|
|
||||||
# randomly running only about 1 in $rand_limit times
|
# randomly running only about 1 in $rand_limit times
|
||||||
unless rand_key == 0 || params["force"]
|
unless rand_key == 0 || params["force"]
|
||||||
puts "Not running this time (key: #{rand_key})"
|
puts "Not running this time (key: #{rand_key})"
|
||||||
@@ -26,31 +37,34 @@ else
|
|||||||
begin
|
begin
|
||||||
user_tweets = Twitter.user_timeline($source_account, :count => 200, :trim_user => true, :exclude_replies => true, :include_rts => false)
|
user_tweets = Twitter.user_timeline($source_account, :count => 200, :trim_user => true, :exclude_replies => true, :include_rts => false)
|
||||||
max_id = user_tweets.last.id
|
max_id = user_tweets.last.id
|
||||||
source_tweets += user_tweets.reject {|t| t.text =~ /(https?:\/\/)/ }.map {|t| t.text.gsub(/\b(RT|MT) .+/, '') }
|
source_tweets += filtered_tweets(user_tweets)
|
||||||
|
|
||||||
# Twitter only returns up to 3200 of a user timeline, includes retweets.
|
# Twitter only returns up to 3200 of a user timeline, includes retweets.
|
||||||
17.times do
|
17.times do
|
||||||
user_tweets = Twitter.user_timeline($source_account, :count => 200, :trim_user => true, :max_id => max_id - 1, :exclude_replies => true, :include_rts => false)
|
user_tweets = Twitter.user_timeline($source_account, :count => 200, :trim_user => true, :max_id => max_id - 1, :exclude_replies => true, :include_rts => false)
|
||||||
puts "MAX_ID #{max_id} TWEETS: #{user_tweets.length}"
|
puts "MAX_ID #{max_id} TWEETS: #{user_tweets.length}"
|
||||||
max_id = user_tweets.last.id
|
max_id = user_tweets.last.id
|
||||||
source_tweets += user_tweets.reject {|t| t.text =~ /(https?:\/\/)/ }.map {|t| t.text.gsub(/\b(RT|MT) .+/, '') }
|
source_tweets += filtered_tweets(user_tweets)
|
||||||
end
|
end
|
||||||
rescue
|
rescue
|
||||||
end
|
end
|
||||||
|
|
||||||
puts "#{source_tweets.length} tweets found"
|
puts "#{source_tweets.length} tweets found"
|
||||||
|
|
||||||
|
if source_tweets.length == 0
|
||||||
|
raise "Error fetching tweets from Twitter. Aborting."
|
||||||
|
end
|
||||||
|
|
||||||
markov = MarkovChainer.new($markov_index)
|
markov = MarkovChainer.new($markov_index)
|
||||||
|
|
||||||
source_tweets.each do |twt|
|
source_tweets.each do |twt|
|
||||||
text = twt
|
text = twt
|
||||||
text.gsub!(/\#[\w\d]+/, '') # remove hashtags
|
|
||||||
markov.add_text(text)
|
markov.add_text(text)
|
||||||
end
|
end
|
||||||
|
|
||||||
tweet = nil
|
tweet = nil
|
||||||
|
|
||||||
5.times do
|
10.times do
|
||||||
tweet = markov.generate_sentence
|
tweet = markov.generate_sentence
|
||||||
|
|
||||||
if rand(3) == 0 && tweet =~ /(in|to|from|for|with|by|our|of|your|around|under|beyond)\s\w+$/
|
if rand(3) == 0 && tweet =~ /(in|to|from|for|with|by|our|of|your|around|under|beyond)\s\w+$/
|
||||||
|
|||||||
Reference in New Issue
Block a user