Make banned_terms global, overwrite Model#valid_tweet?, first pass at bootstrapping

This commit is contained in:
Joel McCoy
2014-12-27 18:38:02 -05:00
parent a331b24cfd
commit 003b17fd44
2 changed files with 73 additions and 10 deletions

View File

@@ -22,15 +22,40 @@ module Ebooks::Boodoo
array_splitter ||= / *[,;]+ */ array_splitter ||= / *[,;]+ */
value.split(array_splitter).map(&:strip) value.split(array_splitter).map(&:strip)
end end
def make_client
Twitter::REST::Client.new do |config|
config.consumer_key = @consumer_key
config.consumer_secret = @consumer_secret
config.access_token = @access_token
config.access_token_secret = @access_token_secret
end
end
end end
## Retweet check based on Really-Existing-RT practices
class Ebooks::TweetMeta class Ebooks::TweetMeta
def is_retweet? def is_retweet?
tweet.retweeted_status? || !!tweet.text[/^[RM]T[: ]/i] tweet.retweeted_status? || !!tweet.text[/[RM]T ?[@:]/i]
end
end
class Ebooks::Boodoo::Model < Ebooks::Model
def valid_tweet?(tokens, limit)
tweet = NLP.reconstruct(tokens)
found_banned = $banned_words.any? do |word|
re = Regexp.new("\\b#{word}\\b", "i")
re.match tweet
end
tweet.length <= limit && !found_banned && !NLP.unmatched_enclosers?(tweet)
end end
end end
class Ebooks::Boodoo::BoodooBot < Ebooks::Bot class Ebooks::Boodoo::BoodooBot < Ebooks::Bot
@required_fields = ['consumer_key', 'consumer_secret',
'access_token', 'access_token_secret',
'bot_name', 'original']
# A rough error-catch/retry for rate limit, dupe fave, server timeouts # A rough error-catch/retry for rate limit, dupe fave, server timeouts
def catch_twitter def catch_twitter
begin begin
@@ -83,4 +108,32 @@ class Ebooks::Boodoo::BoodooBot < Ebooks::Bot
end end
end end
def has_model?
File.exists? @model_path
end
def has_archive?
File.exists? @archive_path
end
def get_archive!
@archive = Archive.new(@original, @archive_path, make_client).sync
end
def make_model!
log "Updating model: #{@model_path}"
Ebooks::Boodoo::Model.consume(@archive_path).save(@model_path)
log "Loading model..."
@model = Ebooks::Boodoo::Model.load(@model_path)
end
def can_run?
missing_fields.empty?
end
def missing_fields
@required_fields.select { |field|
!send(field).nil? && !send(field).empty?
}
end
end end

28
bots.rb
View File

@@ -23,7 +23,7 @@ class UserInfo
end end
class CloneBot < BoodooBot class CloneBot < BoodooBot
attr_accessor :original, :model, :model_path, :auth_name attr_accessor :original, :model, :model_path, :auth_name, :archive_path, :archive
# alias_method :oauth_token, :access_token # alias_method :oauth_token, :access_token
# alias_method :oauth_token_secret, :access_token_secret # alias_method :oauth_token_secret, :access_token_secret
def configure def configure
@@ -45,6 +45,7 @@ class CloneBot < BoodooBot
# Array fields are CSV or SSV # Array fields are CSV or SSV
@blacklist = parse_array(SETTINGS['BLACKLIST']) @blacklist = parse_array(SETTINGS['BLACKLIST'])
@banned_terms = parse_array(SETTINGS['BANNED_TERMS']) @banned_terms = parse_array(SETTINGS['BANNED_TERMS'])
$banned_terms = @banned_terms
@special_terms = parse_array(SETTINGS['SPECIAL_TERMS']) @special_terms = parse_array(SETTINGS['SPECIAL_TERMS'])
# Fields parsed as Fixnum, Float, or Range: # Fields parsed as Fixnum, Float, or Range:
@@ -63,10 +64,20 @@ class CloneBot < BoodooBot
@attempts = 0 @attempts = 0
@followers = [] @followers = []
@following = [] @following = []
@archive_path = "corpus/#{@original}.json"
@model_path = "model/#{@original}.model"
# @have_talked = {} # @have_talked = {}
# load model file if can_run?
load_model! get_archive! unless has_archive?
make_model! unless has_model?
else
missing_fields.each {|missing|
log "Can't run without #{missing}"
}
log "Heroku will automatically try again immediately or in 10 minutes..."
Kernel.exit(1)
end
end end
def top100; @top100 ||= model.keywords.take(100); end def top100; @top100 ||= model.keywords.take(100); end
@@ -90,10 +101,11 @@ class CloneBot < BoodooBot
follow_parity follow_parity
end end
# TODO: This throws a weird error. scheduler.interval @refresh_model_interval do
# Probably don't need it anyway? log "Refreshing archive/model..."
# @auth_name ||= twitter.user.screen_name get_archive!
# log "Logged in as #{auth_name}" make_model!
end
end end
def on_direct_message(dm) def on_direct_message(dm)
@@ -210,8 +222,6 @@ class CloneBot < BoodooBot
end end
end end
CloneBot.new(SETTINGS['BOT_NAME']) do |bot| CloneBot.new(SETTINGS['BOT_NAME']) do |bot|
# CloneBot#configure does everything! # CloneBot#configure does everything!
bot bot