mirror of
https://github.com/thewesker/ebooks_example.git
synced 2025-12-20 04:11:13 -05:00
Make banned_terms global, overwrite Model#valid_tweet?, first pass at bootstrapping
This commit is contained in:
55
boodoo.rb
55
boodoo.rb
@@ -22,15 +22,40 @@ module Ebooks::Boodoo
|
||||
array_splitter ||= / *[,;]+ */
|
||||
value.split(array_splitter).map(&:strip)
|
||||
end
|
||||
|
||||
def make_client
|
||||
Twitter::REST::Client.new do |config|
|
||||
config.consumer_key = @consumer_key
|
||||
config.consumer_secret = @consumer_secret
|
||||
config.access_token = @access_token
|
||||
config.access_token_secret = @access_token_secret
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
## Retweet check based on Really-Existing-RT practices
|
||||
class Ebooks::TweetMeta
|
||||
def is_retweet?
|
||||
tweet.retweeted_status? || !!tweet.text[/^[RM]T[: ]/i]
|
||||
tweet.retweeted_status? || !!tweet.text[/[RM]T ?[@:]/i]
|
||||
end
|
||||
end
|
||||
|
||||
class Ebooks::Boodoo::Model < Ebooks::Model
|
||||
def valid_tweet?(tokens, limit)
|
||||
tweet = NLP.reconstruct(tokens)
|
||||
found_banned = $banned_words.any? do |word|
|
||||
re = Regexp.new("\\b#{word}\\b", "i")
|
||||
re.match tweet
|
||||
end
|
||||
tweet.length <= limit && !found_banned && !NLP.unmatched_enclosers?(tweet)
|
||||
end
|
||||
end
|
||||
|
||||
class Ebooks::Boodoo::BoodooBot < Ebooks::Bot
|
||||
@required_fields = ['consumer_key', 'consumer_secret',
|
||||
'access_token', 'access_token_secret',
|
||||
'bot_name', 'original']
|
||||
|
||||
# A rough error-catch/retry for rate limit, dupe fave, server timeouts
|
||||
def catch_twitter
|
||||
begin
|
||||
@@ -83,4 +108,32 @@ class Ebooks::Boodoo::BoodooBot < Ebooks::Bot
|
||||
end
|
||||
end
|
||||
|
||||
def has_model?
|
||||
File.exists? @model_path
|
||||
end
|
||||
|
||||
def has_archive?
|
||||
File.exists? @archive_path
|
||||
end
|
||||
|
||||
def get_archive!
|
||||
@archive = Archive.new(@original, @archive_path, make_client).sync
|
||||
end
|
||||
|
||||
def make_model!
|
||||
log "Updating model: #{@model_path}"
|
||||
Ebooks::Boodoo::Model.consume(@archive_path).save(@model_path)
|
||||
log "Loading model..."
|
||||
@model = Ebooks::Boodoo::Model.load(@model_path)
|
||||
end
|
||||
|
||||
def can_run?
|
||||
missing_fields.empty?
|
||||
end
|
||||
|
||||
def missing_fields
|
||||
@required_fields.select { |field|
|
||||
!send(field).nil? && !send(field).empty?
|
||||
}
|
||||
end
|
||||
end
|
||||
|
||||
28
bots.rb
28
bots.rb
@@ -23,7 +23,7 @@ class UserInfo
|
||||
end
|
||||
|
||||
class CloneBot < BoodooBot
|
||||
attr_accessor :original, :model, :model_path, :auth_name
|
||||
attr_accessor :original, :model, :model_path, :auth_name, :archive_path, :archive
|
||||
# alias_method :oauth_token, :access_token
|
||||
# alias_method :oauth_token_secret, :access_token_secret
|
||||
def configure
|
||||
@@ -45,6 +45,7 @@ class CloneBot < BoodooBot
|
||||
# Array fields are CSV or SSV
|
||||
@blacklist = parse_array(SETTINGS['BLACKLIST'])
|
||||
@banned_terms = parse_array(SETTINGS['BANNED_TERMS'])
|
||||
$banned_terms = @banned_terms
|
||||
@special_terms = parse_array(SETTINGS['SPECIAL_TERMS'])
|
||||
|
||||
# Fields parsed as Fixnum, Float, or Range:
|
||||
@@ -63,10 +64,20 @@ class CloneBot < BoodooBot
|
||||
@attempts = 0
|
||||
@followers = []
|
||||
@following = []
|
||||
@archive_path = "corpus/#{@original}.json"
|
||||
@model_path = "model/#{@original}.model"
|
||||
# @have_talked = {}
|
||||
|
||||
# load model file
|
||||
load_model!
|
||||
if can_run?
|
||||
get_archive! unless has_archive?
|
||||
make_model! unless has_model?
|
||||
else
|
||||
missing_fields.each {|missing|
|
||||
log "Can't run without #{missing}"
|
||||
}
|
||||
log "Heroku will automatically try again immediately or in 10 minutes..."
|
||||
Kernel.exit(1)
|
||||
end
|
||||
end
|
||||
|
||||
def top100; @top100 ||= model.keywords.take(100); end
|
||||
@@ -90,10 +101,11 @@ class CloneBot < BoodooBot
|
||||
follow_parity
|
||||
end
|
||||
|
||||
# TODO: This throws a weird error.
|
||||
# Probably don't need it anyway?
|
||||
# @auth_name ||= twitter.user.screen_name
|
||||
# log "Logged in as #{auth_name}"
|
||||
scheduler.interval @refresh_model_interval do
|
||||
log "Refreshing archive/model..."
|
||||
get_archive!
|
||||
make_model!
|
||||
end
|
||||
end
|
||||
|
||||
def on_direct_message(dm)
|
||||
@@ -210,8 +222,6 @@ class CloneBot < BoodooBot
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
|
||||
CloneBot.new(SETTINGS['BOT_NAME']) do |bot|
|
||||
# CloneBot#configure does everything!
|
||||
bot
|
||||
|
||||
Reference in New Issue
Block a user