diff --git a/ebook.rb b/ebook.rb index 9c84b3c..10c46ec 100644 --- a/ebook.rb +++ b/ebook.rb @@ -60,7 +60,7 @@ else source_tweets.each do |twt| text = twt - if text !~ /[\.\"\'\?\!]/ + if text !~ /\p{Punct}/ text += "." end @@ -72,9 +72,9 @@ else 10.times do tweet = markov.generate_sentence - if rand(3) == 0 && tweet =~ /(in|to|from|for|with|by|our|of|your|around|under|beyond)\s\w+$/ + if rand(3) == 0 && tweet =~ /(in|to|from|for|with|by|our|of|your|around|under|beyond)\p{Space}\w+$/ puts "Losing last word randomly" - tweet.gsub(/\s\w+.$/, '') # randomly losing the last word sometimes like horse_ebooks + tweet.gsub(/\p{Space}\p{Word}+.$/, '') # randomly losing the last word sometimes like horse_ebooks end if tweet.length < 40 && rand(5) == 0 @@ -86,8 +86,8 @@ else puts "MARKOV: #{tweet}" end - tweet_letters = tweet.gsub(/\W/, '') - break if !tweet.nil? && tweet.length < 110 && !source_tweets.any? {|t| t.gsub(/\W/, '') =~ /#{tweet_letters}/ } + tweet_letters = tweet.gsub(/\P{Word}/, '') + break if !tweet.nil? && tweet.length < 110 && !source_tweets.any? {|t| t.gsub(/\P{Word}/, '') =~ /#{tweet_letters}/ } end if params["tweet"] diff --git a/markov.rb b/markov.rb index f31d933..9b0b47b 100644 --- a/markov.rb +++ b/markov.rb @@ -10,9 +10,9 @@ class MarkovChainer def add_text(text) # make sure each paragraph ends with some sentence terminator - text.gsub!(/\n\s*\n/m, ".") + text.gsub!(/[\r\n]+\p{Space}*[\r\n]+/m, ".") text << "." - seps = /([.!?;])/ + seps = /(\p{Punct})/ sentence = "" text.split(seps).each { |p| if seps =~ p @@ -37,7 +37,7 @@ class MarkovChainer private def add_sentence(str, terminator) - words = str.scan(/[\w'’\-]+/) + words = str.scan(/[\p{Word}'’\-]+/) return unless words.size > order # ignore short sentences words << terminator buf = []