Files
ebooks_example/boodoo.rb
2015-02-11 16:05:31 -05:00

353 lines
9.2 KiB
Ruby

require 'twitter_ebooks'
require 'cloudinary'
include Ebooks
## Retweet check based on Really-Existing-RT practices
class Ebooks::TweetMeta
def is_retweet?
tweet.retweeted_status? || !!tweet.text[/[RM]T ?[@:]/i]
end
end
module Ebooks::Boodoo
# check if we're configured to use Cloudinary for cloud storage
def has_cloud?
(ENV['CLOUDINARY_URL'].nil? || ENV['CLOUDINARY_URL'].empty?) ? false : true
end
# supports Ruby Range literal, Fixnum, or Float as string
def parse_num(value)
eval(value.to_s[/^\d+(?:\.{1,3})?\d*$/].to_s)
end
# Make expected/possible Range
def parse_range(value)
value = parse_num(value)
if value.nil?
value = nil
elsif !value.respond_to?(:to_a)
value = Range.new(value, value)
end
value
end
def obscure_curse(len)
s = []
c = ['!', '@', '$', '%', '^', '&', '*']
len.times do
s << c.sample
end
s.join('')
end
def obscure_curses(tweet)
# TODO: Ignore banned terms that are part of @-mentions
$banned_terms.each do |term|
re = Regexp.new("\\b#{term}\\b", "i")
tweet.gsub!(re, Ebooks::Boodoo.obscure_curse(term.size))
end
tweet
end
def parse_array(value, array_splitter=nil)
array_splitter ||= / *[,;]+ */
value.split(array_splitter).map(&:strip)
end
def make_client
Twitter::REST::Client.new do |config|
config.consumer_key = @consumer_key
config.consumer_secret = @consumer_secret
config.access_token = @access_token
config.access_token_secret = @access_token_secret
end
end
def jsonify(paths)
paths.each do |path|
name = File.basename(path).split('.')[0]
ext = path.split('.')[-1]
new_path = name + ".json"
lines = []
id = nil
if ext.downcase == "json"
log "Taking no action on JSON corpus at #{path}"
return
end
content = File.read(path, :encoding => 'utf-8')
if ext.downcase == "csv" #from twitter archive
log "Reading CSV corpus from #{path}"
content = CSV.parse(content)
header = content.shift
text_col = header.index('text')
id_col = header.index('tweet_id')
lines = content.map do |tweet|
id = tweet[id_col].empty? ? 0 : tweet[id_col]
{id: id, text: tweet[text_col]}
end
else
log "Reading plaintext corpus from #{path}"
lines = content.split("\n").map do |line|
{id: 0, text: line}
end
end
# BELOW IS FOR FILE-SYSTEM; NEED TO ALTER FOR CLOUDINARY/REQUEST?
File.open(new_path, 'w') do |f|
log "Writing #{lines.length} lines to #{new_path}"
f.write(JSON.pretty_generate(lines))
end
end
end
end
class Ebooks::Boodoo::CloudArchive < Ebooks::Archive
def initialize(username, path=nil, client=nil)
# Just bail on everything if we aren't using Cloudinary
return super unless has_cloud?
# Otherwise duplicate a lot of super(), but also use ~~THE CLOUD~~
@username = username
@path = path || "corpus/#{username}.json"
if File.directory?(@path)
@path = File.join(@path, "#{username}.json")
end
@basename = File.basename(@path)
@client = client || Boodoo.make_client
@url = Cloudinary::Utils.cloudinary_url(@basename, :resource_type=>:raw)
fetch!
parse!
sync
# save! # #sync automatically saves
persist
if @tweets.empty?
log "New archive for @#{@username} at #{@url}"
else
log "Currently #{@tweets.length} tweets for #{@username}"
end
end
def persist(public_id=nil)
public_id ||= @basename
log "Deleting out-dated archive ~~~FROM THE CLOUD~~~"
Cloudinary::Api.delete_resources(public_id, :resource_type=>:raw)
log "Uploading JSON archive ~~TO THE CLOUD~~"
res = Cloudinary::Uploader.upload(@path, :resource_type=>:raw, :public_id=>public_id, :invalidate=>true)
log "Upload complete!"
res
end
def persist!
persist(@basename)
end
def parse(content=nil)
content = content || @content || '[]'
JSON.parse(content, symbolize_names: true)
end
def parse!(content=nil)
@tweets = parse(content)
end
def save(path=nil)
path ||= @path
File.open(path, 'w') do |f|
f.write(JSON.pretty_generate(@tweets))
end
end
def save!
save(@path)
end
def fetch(url=nil)
url ||= @url
log "Fetching JSON archive ~~~FROM THE CLOUD~~~"
content = Cloudinary::Downloader.download(url)
if content.empty?
log "WARNING: JSON archive not found ~~~IN THE CLOUD~~~"
nil
else
log "Download complete!"
content
end
end
def fetch!
@content = fetch
end
end
class Ebooks::Boodoo::CloudModel < Ebooks::Model
# Read a saved model from marshaled content instead of file
# @param content [String]
# @return [Ebooks::Boodoo::CloudModel]
def self.parse(content)
model = Model.new
model.instance_eval do
props = Marshal.load(content)
@tokens = props[:tokens]
@sentences = props[:sentences]
@mentions = props[:mentions]
@keywords = props[:keywords]
end
model
end
def initialize(username, path=nil)
return Ebooks::Model.new unless has_cloud?
@path = path || "corpus/#{username}.model"
if File.directory?(@path)
@path = File.join(@path, "#{username}.model")
end
super()
@basename = File.basename(@path)
end
# Create a model from JSON string
# @content [String] Ebooks-style JSON twitter archive
# @return [Ebooks::Boodoo::CloudModel]
def from_json(content)
log "Reading json corpus with length #{content.size}"
lines = JSON.parse(content).map do |tweet|
tweet['text']
end
consume_lines(lines)
end
def persist(public_id=nil)
public_id ||= @basename
log "Deleting old model ~~~FROM THE CLOUD~~~"
Cloudinary::Api.delete_resources(@basename, :resource_type=>:raw)
log "Uploading bot model ~~TO THE CLOUD~~"
res = Cloudinary::Uploader.upload(@path, :resource_type=>:raw, :public_id=>public_id, :invalidate=>true)
log "Upload complete!"
res
end
def persist!
persist(@basename)
end
def parse(content=nil)
props = Marshal.load(content)
end
def parse!(content=nil)
props = parse(content)
@tokens = props[:tokens]
@sentences = props[:sentences]
@mentions = props[:mentions]
@keywords = props[:keywords]
end
def save!
save(@path)
end
def fetch(url=nil)
url ||= @url
log "Fetching bot model ~~~FROM THE CLOUD~~~"
content = Cloudinary::Downloader.download(url)
if content.empty?
log "WARNING: bot model not found ~~~IN THE CLOUD~~~"
nil
else
log "Download complete!"
content
end
end
def fetch!
@content = fetch
end
end
class Ebooks::Boodoo::BoodooBot < Ebooks::Bot
$required_fields = ['consumer_key', 'consumer_secret',
'access_token', 'access_token_secret',
'bot_name', 'original']
# Unfollow a user -- OVERRIDE TO FIX TYPO
# @param user [String] username or user id
def unfollow(user, *args)
log "Unfollowing #{user}"
twitter.unfollow(user, *args)
end
# A rough error-catch/retry for rate limit, dupe fave, server timeouts
def catch_twitter
begin
yield
rescue Twitter::Error => error
@retries += 1
raise if @retries > @max_error_retries
if error.class == Twitter::Error::TooManyRequests
reset_in = error.rate_limit.reset_in
log "RATE: Going to sleep for ~#{reset_in / 60} minutes..."
sleep reset_in
retry
elsif error.class == Twitter::Error::Forbidden
# don't count "Already faved/followed" message against attempts
@retries -= 1 if error.to_s.include?("already")
log "WARN: #{error.to_s}"
return true
elsif ["execution", "capacity"].any?(&error.to_s.method(:include?))
log "ERR: Timeout?\n\t#{error}\nSleeping for #{@timeout_sleep} seconds..."
sleep @timeout_sleep
retry
else
log "Unhandled exception from Twitter: #{error.to_s}"
raise
end
end
end
# Override Ebooks::Bot#blacklisted? to ensure lower<=>lower check
def blacklisted?(username)
if @blacklist.map(&:downcase).include?(username.downcase)
true
else
false
end
end
# Follow new followers, unfollow lost followers
def follow_parity
followers = catch_twitter { twitter.followers(:count=>200).map(&:screen_name) }
following = catch_twitter { twitter.following(:count=>200).map(&:screen_name) }
to_follow = followers - following
to_unfollow = following - followers
twitter.follow(to_follow) unless to_follow.empty?
twitter.unfollow(to_unfollow) unless to_unfollow.empty?
@followers = followers
@following = following - to_unfollow
if !(to_follow.empty? || to_unfollow.empty?)
log "Followed #{to_follow.size}; unfollowed #{to_unfollow.size}."
end
end
def make_model!
log "Updating model: #{@model_path}"
Ebooks::Model.consume(@archive_path).save(@model_path)
log "Loading model..."
@model = Ebooks::Model.load(@model_path)
end
def can_run?
missing_fields.empty?
end
def missing_fields
$required_fields.select { |field|
# log "#{field} = #{send(field)}"
send(field).nil? || send(field).empty?
}
end
end