From 9620e24416d5ce55985e728676596837c3b70c67 Mon Sep 17 00:00:00 2001 From: Sebastian Delmont Date: Sat, 10 Aug 2013 07:45:59 -0400 Subject: [PATCH 1/2] Use UNICODE-compatible regular expressions. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Even though english was enough for the lord to write the bible, it's still a smart idea to allow for UNICODE characters if only to allow horses to coöperate in a way the New Yorker would approve. See: http://www.ruby-doc.org/core-1.9.3/Regexp.html#label-Character+Properties and http://www.newyorker.com/online/blogs/culture/2012/04/the-curse-of-the-diaeresis.html --- ebook.rb | 10 +++++----- markov.rb | 6 +++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/ebook.rb b/ebook.rb index 9c84b3c..10c46ec 100644 --- a/ebook.rb +++ b/ebook.rb @@ -60,7 +60,7 @@ else source_tweets.each do |twt| text = twt - if text !~ /[\.\"\'\?\!]/ + if text !~ /\p{Punct}/ text += "." end @@ -72,9 +72,9 @@ else 10.times do tweet = markov.generate_sentence - if rand(3) == 0 && tweet =~ /(in|to|from|for|with|by|our|of|your|around|under|beyond)\s\w+$/ + if rand(3) == 0 && tweet =~ /(in|to|from|for|with|by|our|of|your|around|under|beyond)\p{Space}\w+$/ puts "Losing last word randomly" - tweet.gsub(/\s\w+.$/, '') # randomly losing the last word sometimes like horse_ebooks + tweet.gsub(/\p{Space}\p{Word}+.$/, '') # randomly losing the last word sometimes like horse_ebooks end if tweet.length < 40 && rand(5) == 0 @@ -86,8 +86,8 @@ else puts "MARKOV: #{tweet}" end - tweet_letters = tweet.gsub(/\W/, '') - break if !tweet.nil? && tweet.length < 110 && !source_tweets.any? {|t| t.gsub(/\W/, '') =~ /#{tweet_letters}/ } + tweet_letters = tweet.gsub(/\P{Word}/, '') + break if !tweet.nil? && tweet.length < 110 && !source_tweets.any? {|t| t.gsub(/\P{Word}/, '') =~ /#{tweet_letters}/ } end if params["tweet"] diff --git a/markov.rb b/markov.rb index f31d933..9b0b47b 100644 --- a/markov.rb +++ b/markov.rb @@ -10,9 +10,9 @@ class MarkovChainer def add_text(text) # make sure each paragraph ends with some sentence terminator - text.gsub!(/\n\s*\n/m, ".") + text.gsub!(/[\r\n]+\p{Space}*[\r\n]+/m, ".") text << "." - seps = /([.!?;])/ + seps = /(\p{Punct})/ sentence = "" text.split(seps).each { |p| if seps =~ p @@ -37,7 +37,7 @@ class MarkovChainer private def add_sentence(str, terminator) - words = str.scan(/[\w'’\-]+/) + words = str.scan(/[\p{Word}'’\-]+/) return unless words.size > order # ignore short sentences words << terminator buf = [] From 53ed3b834fcaed6f9fa4424c57d3d1ce32fae367 Mon Sep 17 00:00:00 2001 From: Sebastian Delmont Date: Sat, 10 Aug 2013 07:51:58 -0400 Subject: [PATCH 2/2] Use bundler and a Gemfile to manage local gems. --- Gemfile | 7 +++++++ README.md | 7 ++++--- 2 files changed, 11 insertions(+), 3 deletions(-) create mode 100644 Gemfile diff --git a/Gemfile b/Gemfile new file mode 100644 index 0000000..b0d3d90 --- /dev/null +++ b/Gemfile @@ -0,0 +1,7 @@ +source 'https://rubygems.org' + +ruby "1.9.3" + +gem "twitter" +gem "typhoeus" +gem "iron_worker_ng" diff --git a/README.md b/README.md index 176841c..1e80802 100644 --- a/README.md +++ b/README.md @@ -11,9 +11,10 @@ A simple and hackish ruby script for pseudorandomly posting to a _ebooks account 5. Sign into dev.twitter.com with the same credentials 6. Create an application for your _ebooks account (generate the credentials) 7. Create a file named twitter_init.rb in this directory with the OAuth credentials and the source account you want to use for seeding the markov process -8. Upload to iron.io with `iron_worker upload ebook` -9. Run it with `iron_worker queue ebook` a few times -10. You can schedule it now to run regularly using the scheduler. I'd suggest once every 53 minutes or so. +8. Run "bundle install" +9. Upload to iron.io with `bundle exec iron_worker upload ebook` +10. Run it with `bundle exec iron_worker queue ebook` a few times +11. You can schedule it now to run regularly using the scheduler. I'd suggest once every 53 minutes or so. ## Configuring