mirror of
https://github.com/thewesker/iron_ebooks.git
synced 2025-12-20 12:21:14 -05:00
Some more tweaks on the tokenization and generation
This commit is contained in:
17
ebook.rb
17
ebook.rb
@@ -19,6 +19,12 @@ end
|
|||||||
|
|
||||||
rand_key = rand($rand_limit)
|
rand_key = rand($rand_limit)
|
||||||
|
|
||||||
|
CLOSING_PUNCTUATION = ['.', ';', ':', '?', '!']
|
||||||
|
|
||||||
|
def random_closing_punctuation
|
||||||
|
CLOSING_PUNCTUATION[rand(CLOSING_PUNCTUATION.length)]
|
||||||
|
end
|
||||||
|
|
||||||
def filtered_tweets(tweets)
|
def filtered_tweets(tweets)
|
||||||
include_urls = $include_urls || params["include_urls"]
|
include_urls = $include_urls || params["include_urls"]
|
||||||
source_tweets = tweets.map {|t| t.text.gsub(/\b(RT|MT) .+/, '') }
|
source_tweets = tweets.map {|t| t.text.gsub(/\b(RT|MT) .+/, '') }
|
||||||
@@ -89,6 +95,9 @@ else
|
|||||||
10.times do
|
10.times do
|
||||||
tweet = markov.generate_sentence
|
tweet = markov.generate_sentence
|
||||||
|
|
||||||
|
tweet_letters = tweet.gsub(/\P{Word}/, '')
|
||||||
|
next if source_tweets.any? {|t| t.gsub(/\P{Word}/, '') =~ /#{tweet_letters}/ }
|
||||||
|
|
||||||
if rand(3) == 0 && tweet =~ /(in|to|from|for|with|by|our|of|your|around|under|beyond)\p{Space}\w+$/
|
if rand(3) == 0 && tweet =~ /(in|to|from|for|with|by|our|of|your|around|under|beyond)\p{Space}\w+$/
|
||||||
puts "Losing last word randomly"
|
puts "Losing last word randomly"
|
||||||
tweet.gsub(/\p{Space}\p{Word}+.$/, '') # randomly losing the last word sometimes like horse_ebooks
|
tweet.gsub(/\p{Space}\p{Word}+.$/, '') # randomly losing the last word sometimes like horse_ebooks
|
||||||
@@ -96,6 +105,11 @@ else
|
|||||||
|
|
||||||
if tweet.length < 40 && rand(10) == 0
|
if tweet.length < 40 && rand(10) == 0
|
||||||
puts "Short tweet. Adding another sentence randomly"
|
puts "Short tweet. Adding another sentence randomly"
|
||||||
|
next_sentence = markov.generate_sentence
|
||||||
|
tweet_letters = next_sentence.gsub(/\P{Word}/, '')
|
||||||
|
next if source_tweets.any? {|t| t.gsub(/\P{Word}/, '') =~ /#{tweet_letters}/ }
|
||||||
|
|
||||||
|
tweet += random_closing_punctuation if tweet !~ /[.;:?!]$/
|
||||||
tweet += " #{markov.generate_sentence}"
|
tweet += " #{markov.generate_sentence}"
|
||||||
end
|
end
|
||||||
|
|
||||||
@@ -103,8 +117,7 @@ else
|
|||||||
puts "MARKOV: #{tweet}"
|
puts "MARKOV: #{tweet}"
|
||||||
end
|
end
|
||||||
|
|
||||||
tweet_letters = tweet.gsub(/\P{Word}/, '')
|
break if !tweet.nil? && tweet.length < 110
|
||||||
break if !tweet.nil? && tweet.length < 110 && !source_tweets.any? {|t| t.gsub(/\P{Word}/, '') =~ /#{tweet_letters}/ }
|
|
||||||
end
|
end
|
||||||
|
|
||||||
if params["tweet"]
|
if params["tweet"]
|
||||||
|
|||||||
Reference in New Issue
Block a user