mirror of
https://github.com/thewesker/iron_ebooks.git
synced 2025-12-20 04:11:12 -05:00
Use UNICODE-compatible regular expressions.
Even though english was enough for the lord to write the bible, it's still a smart idea to allow for UNICODE characters if only to allow horses to coöperate in a way the New Yorker would approve. See: http://www.ruby-doc.org/core-1.9.3/Regexp.html#label-Character+Properties and http://www.newyorker.com/online/blogs/culture/2012/04/the-curse-of-the-diaeresis.html
This commit is contained in:
10
ebook.rb
10
ebook.rb
@@ -60,7 +60,7 @@ else
|
||||
source_tweets.each do |twt|
|
||||
text = twt
|
||||
|
||||
if text !~ /[\.\"\'\?\!]/
|
||||
if text !~ /\p{Punct}/
|
||||
text += "."
|
||||
end
|
||||
|
||||
@@ -72,9 +72,9 @@ else
|
||||
10.times do
|
||||
tweet = markov.generate_sentence
|
||||
|
||||
if rand(3) == 0 && tweet =~ /(in|to|from|for|with|by|our|of|your|around|under|beyond)\s\w+$/
|
||||
if rand(3) == 0 && tweet =~ /(in|to|from|for|with|by|our|of|your|around|under|beyond)\p{Space}\w+$/
|
||||
puts "Losing last word randomly"
|
||||
tweet.gsub(/\s\w+.$/, '') # randomly losing the last word sometimes like horse_ebooks
|
||||
tweet.gsub(/\p{Space}\p{Word}+.$/, '') # randomly losing the last word sometimes like horse_ebooks
|
||||
end
|
||||
|
||||
if tweet.length < 40 && rand(5) == 0
|
||||
@@ -86,8 +86,8 @@ else
|
||||
puts "MARKOV: #{tweet}"
|
||||
end
|
||||
|
||||
tweet_letters = tweet.gsub(/\W/, '')
|
||||
break if !tweet.nil? && tweet.length < 110 && !source_tweets.any? {|t| t.gsub(/\W/, '') =~ /#{tweet_letters}/ }
|
||||
tweet_letters = tweet.gsub(/\P{Word}/, '')
|
||||
break if !tweet.nil? && tweet.length < 110 && !source_tweets.any? {|t| t.gsub(/\P{Word}/, '') =~ /#{tweet_letters}/ }
|
||||
end
|
||||
|
||||
if params["tweet"]
|
||||
|
||||
Reference in New Issue
Block a user