Merge pull request #1 from sd/master

Patches from Sebastian Delmont for Unicode-compliant regexps and Gemfile
2026-02-05 03:55:15 -05:00 · 2013-08-10 07:02:23 -07:00
parent 3e8bdfba47 53ed3b834f
commit 0ba450f60a
4 changed files with 19 additions and 11 deletions
--- a/7
+++ b/7
@@ -0,0 +1,7 @@
 source 'https://rubygems.org'
 ruby "1.9.3"
 gem "twitter"
 gem "typhoeus"
 gem "iron_worker_ng"
--- a/README.md
+++ b/README.md
@@ -11,9 +11,10 @@ A simple and hackish ruby script for pseudorandomly posting to a _ebooks account
 5. Sign into dev.twitter.com with the same credentials
 6. Create an application for your _ebooks account (generate the credentials)
 7. Create a file named twitter_init.rb in this directory with the OAuth credentials and the source account you want to use for seeding the markov process
-8. Upload to iron.io with `iron_worker upload ebook`
+8. Run "bundle install"
-9. Run it with `iron_worker queue ebook` a few times
+9. Upload to iron.io with `bundle exec iron_worker upload ebook`
-10. You can schedule it now to run regularly using the scheduler. I'd suggest once every 53 minutes or so.
+10. Run it with `bundle exec iron_worker queue ebook` a few times
 11. You can schedule it now to run regularly using the scheduler. I'd suggest once every 53 minutes or so.
 ## Configuring
--- a/ebook.rb
+++ b/ebook.rb
@@ -60,7 +60,7 @@ else
  source_tweets.each do |twt|
    text = twt
-    if text !~ /[\.\"\'\?\!]/
+    if text !~ /\p{Punct}/
      text += "."
    end
@@ -72,9 +72,9 @@ else
  10.times do
    tweet = markov.generate_sentence
-    if rand(3) == 0 && tweet =~ /(in|to|from|for|with|by|our|of|your|around|under|beyond)\s\w+$/ 
+    if rand(3) == 0 && tweet =~ /(in|to|from|for|with|by|our|of|your|around|under|beyond)\p{Space}\w+$/ 
      puts "Losing last word randomly"
-      tweet.gsub(/\s\w+.$/, '')   # randomly losing the last word sometimes like horse_ebooks
+      tweet.gsub(/\p{Space}\p{Word}+.$/, '')   # randomly losing the last word sometimes like horse_ebooks
    end
    if tweet.length < 40 && rand(5) == 0
@@ -86,8 +86,8 @@ else
      puts "MARKOV: #{tweet}"
    end
-    tweet_letters = tweet.gsub(/\W/, '')
+    tweet_letters = tweet.gsub(/\P{Word}/, '')
-    break if !tweet.nil? && tweet.length < 110 && !source_tweets.any? {|t| t.gsub(/\W/, '') =~ /#{tweet_letters}/ }
+    break if !tweet.nil? && tweet.length < 110 && !source_tweets.any? {|t| t.gsub(/\P{Word}/, '') =~ /#{tweet_letters}/ }
  end
  if params["tweet"]
--- a/markov.rb
+++ b/markov.rb
@@ -10,9 +10,9 @@ class MarkovChainer
   def add_text(text)
     # make sure each paragraph ends with some sentence terminator
-     text.gsub!(/\n\s*\n/m, ".")
+     text.gsub!(/[\r\n]+\p{Space}*[\r\n]+/m, ".")
     text << "."
-     seps = /([.!?;])/
+     seps = /(\p{Punct})/
     sentence = ""
     text.split(seps).each { |p|
       if seps =~ p
@@ -37,7 +37,7 @@ class MarkovChainer
 private
   def add_sentence(str, terminator)
-     words = str.scan(/[\w'’\-]+/)
+     words = str.scan(/[\p{Word}'’\-]+/)
     return unless words.size > order # ignore short sentences
     words << terminator
     buf = []