From 003b17fd44158a353ffbc89ed3c7039178da4c50 Mon Sep 17 00:00:00 2001 From: Joel McCoy Date: Sat, 27 Dec 2014 18:38:02 -0500 Subject: [PATCH] Make banned_terms global, overwrite Model#valid_tweet?, first pass at bootstrapping --- boodoo.rb | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- bots.rb | 28 +++++++++++++++++++--------- 2 files changed, 73 insertions(+), 10 deletions(-) diff --git a/boodoo.rb b/boodoo.rb index 7eb93d3..95c76a1 100644 --- a/boodoo.rb +++ b/boodoo.rb @@ -22,15 +22,40 @@ module Ebooks::Boodoo array_splitter ||= / *[,;]+ */ value.split(array_splitter).map(&:strip) end + + def make_client + Twitter::REST::Client.new do |config| + config.consumer_key = @consumer_key + config.consumer_secret = @consumer_secret + config.access_token = @access_token + config.access_token_secret = @access_token_secret + end + end end +## Retweet check based on Really-Existing-RT practices class Ebooks::TweetMeta def is_retweet? - tweet.retweeted_status? || !!tweet.text[/^[RM]T[: ]/i] + tweet.retweeted_status? || !!tweet.text[/[RM]T ?[@:]/i] + end +end + +class Ebooks::Boodoo::Model < Ebooks::Model + def valid_tweet?(tokens, limit) + tweet = NLP.reconstruct(tokens) + found_banned = $banned_words.any? do |word| + re = Regexp.new("\\b#{word}\\b", "i") + re.match tweet + end + tweet.length <= limit && !found_banned && !NLP.unmatched_enclosers?(tweet) end end class Ebooks::Boodoo::BoodooBot < Ebooks::Bot + @required_fields = ['consumer_key', 'consumer_secret', + 'access_token', 'access_token_secret', + 'bot_name', 'original'] + # A rough error-catch/retry for rate limit, dupe fave, server timeouts def catch_twitter begin @@ -83,4 +108,32 @@ class Ebooks::Boodoo::BoodooBot < Ebooks::Bot end end + def has_model? + File.exists? @model_path + end + + def has_archive? + File.exists? @archive_path + end + + def get_archive! + @archive = Archive.new(@original, @archive_path, make_client).sync + end + + def make_model! + log "Updating model: #{@model_path}" + Ebooks::Boodoo::Model.consume(@archive_path).save(@model_path) + log "Loading model..." + @model = Ebooks::Boodoo::Model.load(@model_path) + end + + def can_run? + missing_fields.empty? + end + + def missing_fields + @required_fields.select { |field| + !send(field).nil? && !send(field).empty? + } + end end diff --git a/bots.rb b/bots.rb index 1830165..c444a25 100644 --- a/bots.rb +++ b/bots.rb @@ -23,7 +23,7 @@ class UserInfo end class CloneBot < BoodooBot - attr_accessor :original, :model, :model_path, :auth_name + attr_accessor :original, :model, :model_path, :auth_name, :archive_path, :archive # alias_method :oauth_token, :access_token # alias_method :oauth_token_secret, :access_token_secret def configure @@ -45,6 +45,7 @@ class CloneBot < BoodooBot # Array fields are CSV or SSV @blacklist = parse_array(SETTINGS['BLACKLIST']) @banned_terms = parse_array(SETTINGS['BANNED_TERMS']) + $banned_terms = @banned_terms @special_terms = parse_array(SETTINGS['SPECIAL_TERMS']) # Fields parsed as Fixnum, Float, or Range: @@ -63,10 +64,20 @@ class CloneBot < BoodooBot @attempts = 0 @followers = [] @following = [] + @archive_path = "corpus/#{@original}.json" + @model_path = "model/#{@original}.model" # @have_talked = {} - # load model file - load_model! + if can_run? + get_archive! unless has_archive? + make_model! unless has_model? + else + missing_fields.each {|missing| + log "Can't run without #{missing}" + } + log "Heroku will automatically try again immediately or in 10 minutes..." + Kernel.exit(1) + end end def top100; @top100 ||= model.keywords.take(100); end @@ -90,10 +101,11 @@ class CloneBot < BoodooBot follow_parity end - # TODO: This throws a weird error. - # Probably don't need it anyway? - # @auth_name ||= twitter.user.screen_name - # log "Logged in as #{auth_name}" + scheduler.interval @refresh_model_interval do + log "Refreshing archive/model..." + get_archive! + make_model! + end end def on_direct_message(dm) @@ -210,8 +222,6 @@ class CloneBot < BoodooBot end end - - CloneBot.new(SETTINGS['BOT_NAME']) do |bot| # CloneBot#configure does everything! bot