diff --git a/Gemfile b/Gemfile new file mode 100644 index 0000000..b0d3d90 --- /dev/null +++ b/Gemfile @@ -0,0 +1,7 @@ +source 'https://rubygems.org' + +ruby "1.9.3" + +gem "twitter" +gem "typhoeus" +gem "iron_worker_ng" diff --git a/README.md b/README.md index 176841c..1e80802 100644 --- a/README.md +++ b/README.md @@ -11,9 +11,10 @@ A simple and hackish ruby script for pseudorandomly posting to a _ebooks account 5. Sign into dev.twitter.com with the same credentials 6. Create an application for your _ebooks account (generate the credentials) 7. Create a file named twitter_init.rb in this directory with the OAuth credentials and the source account you want to use for seeding the markov process -8. Upload to iron.io with `iron_worker upload ebook` -9. Run it with `iron_worker queue ebook` a few times -10. You can schedule it now to run regularly using the scheduler. I'd suggest once every 53 minutes or so. +8. Run "bundle install" +9. Upload to iron.io with `bundle exec iron_worker upload ebook` +10. Run it with `bundle exec iron_worker queue ebook` a few times +11. You can schedule it now to run regularly using the scheduler. I'd suggest once every 53 minutes or so. ## Configuring diff --git a/ebook.rb b/ebook.rb index 7ecaefa..1035a25 100644 --- a/ebook.rb +++ b/ebook.rb @@ -60,12 +60,12 @@ else source_tweets.each do |twt| text = twt - sentences = text.split(/[\.\"\'\?\!]/) + sentences = text.split(/\p{Punct}/) sentences.each do |sentence| next if sentence =~ /@/ - if sentence !~ /[\.\"\'\?\!]$/ + if sentence !~ /\p{Punct}$/ sentence += "." end @@ -78,9 +78,9 @@ else 10.times do tweet = markov.generate_sentence - if rand(3) == 0 && tweet =~ /(in|to|from|for|with|by|our|of|your|around|under|beyond)\s\w+$/ + if rand(3) == 0 && tweet =~ /(in|to|from|for|with|by|our|of|your|around|under|beyond)\p{Space}\w+$/ puts "Losing last word randomly" - tweet.gsub(/\s\w+.$/, '') # randomly losing the last word sometimes like horse_ebooks + tweet.gsub(/\p{Space}\p{Word}+.$/, '') # randomly losing the last word sometimes like horse_ebooks end if tweet.length < 40 && rand(10) == 0 @@ -92,8 +92,8 @@ else puts "MARKOV: #{tweet}" end - tweet_letters = tweet.gsub(/\W/, '') - break if !tweet.nil? && tweet.length < 110 && !source_tweets.any? {|t| t.gsub(/\W/, '') =~ /#{tweet_letters}/ } + tweet_letters = tweet.gsub(/\P{Word}/, '') + break if !tweet.nil? && tweet.length < 110 && !source_tweets.any? {|t| t.gsub(/\P{Word}/, '') =~ /#{tweet_letters}/ } end if params["tweet"] diff --git a/markov.rb b/markov.rb index f31d933..9b0b47b 100644 --- a/markov.rb +++ b/markov.rb @@ -10,9 +10,9 @@ class MarkovChainer def add_text(text) # make sure each paragraph ends with some sentence terminator - text.gsub!(/\n\s*\n/m, ".") + text.gsub!(/[\r\n]+\p{Space}*[\r\n]+/m, ".") text << "." - seps = /([.!?;])/ + seps = /(\p{Punct})/ sentence = "" text.split(seps).each { |p| if seps =~ p @@ -37,7 +37,7 @@ class MarkovChainer private def add_sentence(str, terminator) - words = str.scan(/[\w'’\-]+/) + words = str.scan(/[\p{Word}'’\-]+/) return unless words.size > order # ignore short sentences words << terminator buf = []