mirror of
https://github.com/thewesker/ebooks_example.git
synced 2025-12-20 04:11:13 -05:00
353 lines
9.2 KiB
Ruby
353 lines
9.2 KiB
Ruby
require 'twitter_ebooks'
|
|
require 'cloudinary'
|
|
|
|
include Ebooks
|
|
|
|
## Retweet check based on Really-Existing-RT practices
|
|
class Ebooks::TweetMeta
|
|
def is_retweet?
|
|
tweet.retweeted_status? || !!tweet.text[/[RM]T ?[@:]/i]
|
|
end
|
|
end
|
|
|
|
module Ebooks::Boodoo
|
|
# check if we're configured to use Cloudinary for cloud storage
|
|
def has_cloud?
|
|
(ENV['CLOUDINARY_URL'].nil? || ENV['CLOUDINARY_URL'].empty?) ? false : true
|
|
end
|
|
|
|
# supports Ruby Range literal, Fixnum, or Float as string
|
|
def parse_num(value)
|
|
eval(value.to_s[/^\d+(?:\.{1,3})?\d*$/].to_s)
|
|
end
|
|
|
|
# Make expected/possible Range
|
|
def parse_range(value)
|
|
value = parse_num(value)
|
|
if value.nil?
|
|
value = nil
|
|
elsif !value.respond_to?(:to_a)
|
|
value = Range.new(value, value)
|
|
end
|
|
value
|
|
end
|
|
|
|
def obscure_curse(len)
|
|
s = []
|
|
c = ['!', '@', '$', '%', '^', '&', '*']
|
|
len.times do
|
|
s << c.sample
|
|
end
|
|
s.join('')
|
|
end
|
|
|
|
def obscure_curses(tweet)
|
|
# TODO: Ignore banned terms that are part of @-mentions
|
|
$banned_terms.each do |term|
|
|
re = Regexp.new("\\b#{term}\\b", "i")
|
|
tweet.gsub!(re, Ebooks::Boodoo.obscure_curse(term.size))
|
|
end
|
|
tweet
|
|
end
|
|
|
|
def parse_array(value, array_splitter=nil)
|
|
array_splitter ||= / *[,;]+ */
|
|
value.split(array_splitter).map(&:strip)
|
|
end
|
|
|
|
def make_client
|
|
Twitter::REST::Client.new do |config|
|
|
config.consumer_key = @consumer_key
|
|
config.consumer_secret = @consumer_secret
|
|
config.access_token = @access_token
|
|
config.access_token_secret = @access_token_secret
|
|
end
|
|
end
|
|
|
|
def jsonify(paths)
|
|
paths.each do |path|
|
|
name = File.basename(path).split('.')[0]
|
|
ext = path.split('.')[-1]
|
|
new_path = name + ".json"
|
|
lines = []
|
|
id = nil
|
|
|
|
if ext.downcase == "json"
|
|
log "Taking no action on JSON corpus at #{path}"
|
|
return
|
|
end
|
|
|
|
content = File.read(path, :encoding => 'utf-8')
|
|
|
|
if ext.downcase == "csv" #from twitter archive
|
|
log "Reading CSV corpus from #{path}"
|
|
content = CSV.parse(content)
|
|
header = content.shift
|
|
text_col = header.index('text')
|
|
id_col = header.index('tweet_id')
|
|
lines = content.map do |tweet|
|
|
id = tweet[id_col].empty? ? 0 : tweet[id_col]
|
|
{id: id, text: tweet[text_col]}
|
|
end
|
|
else
|
|
log "Reading plaintext corpus from #{path}"
|
|
lines = content.split("\n").map do |line|
|
|
{id: 0, text: line}
|
|
end
|
|
end
|
|
|
|
# BELOW IS FOR FILE-SYSTEM; NEED TO ALTER FOR CLOUDINARY/REQUEST?
|
|
File.open(new_path, 'w') do |f|
|
|
log "Writing #{lines.length} lines to #{new_path}"
|
|
f.write(JSON.pretty_generate(lines))
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
class Ebooks::Boodoo::CloudArchive < Ebooks::Archive
|
|
def initialize(username, path=nil, client=nil)
|
|
# Just bail on everything if we aren't using Cloudinary
|
|
return super unless has_cloud?
|
|
# Otherwise duplicate a lot of super(), but also use ~~THE CLOUD~~
|
|
@username = username
|
|
@path = path || "corpus/#{username}.json"
|
|
if File.directory?(@path)
|
|
@path = File.join(@path, "#{username}.json")
|
|
end
|
|
@basename = File.basename(@path)
|
|
@client = client || Boodoo.make_client
|
|
@url = Cloudinary::Utils.cloudinary_url(@basename, :resource_type=>:raw)
|
|
fetch!
|
|
parse!
|
|
sync
|
|
# save! # #sync automatically saves
|
|
persist
|
|
|
|
if @tweets.empty?
|
|
log "New archive for @#{@username} at #{@url}"
|
|
else
|
|
log "Currently #{@tweets.length} tweets for #{@username}"
|
|
end
|
|
end
|
|
|
|
def persist(public_id=nil)
|
|
public_id ||= @basename
|
|
log "Deleting out-dated archive ~~~FROM THE CLOUD~~~"
|
|
Cloudinary::Api.delete_resources(public_id, :resource_type=>:raw)
|
|
log "Uploading JSON archive ~~TO THE CLOUD~~"
|
|
res = Cloudinary::Uploader.upload(@path, :resource_type=>:raw, :public_id=>public_id, :invalidate=>true)
|
|
log "Upload complete!"
|
|
res
|
|
end
|
|
|
|
def persist!
|
|
persist(@basename)
|
|
end
|
|
|
|
def parse(content=nil)
|
|
content = content || @content || '[]'
|
|
JSON.parse(content, symbolize_names: true)
|
|
end
|
|
|
|
def parse!(content=nil)
|
|
@tweets = parse(content)
|
|
end
|
|
|
|
def save(path=nil)
|
|
path ||= @path
|
|
File.open(path, 'w') do |f|
|
|
f.write(JSON.pretty_generate(@tweets))
|
|
end
|
|
end
|
|
|
|
def save!
|
|
save(@path)
|
|
end
|
|
|
|
def fetch(url=nil)
|
|
url ||= @url
|
|
log "Fetching JSON archive ~~~FROM THE CLOUD~~~"
|
|
content = Cloudinary::Downloader.download(url)
|
|
if content.empty?
|
|
log "WARNING: JSON archive not found ~~~IN THE CLOUD~~~"
|
|
nil
|
|
else
|
|
log "Download complete!"
|
|
content
|
|
end
|
|
end
|
|
|
|
def fetch!
|
|
@content = fetch
|
|
end
|
|
end
|
|
|
|
class Ebooks::Boodoo::CloudModel < Ebooks::Model
|
|
# Read a saved model from marshaled content instead of file
|
|
# @param content [String]
|
|
# @return [Ebooks::Boodoo::CloudModel]
|
|
def self.parse(content)
|
|
model = Model.new
|
|
model.instance_eval do
|
|
props = Marshal.load(content)
|
|
@tokens = props[:tokens]
|
|
@sentences = props[:sentences]
|
|
@mentions = props[:mentions]
|
|
@keywords = props[:keywords]
|
|
end
|
|
model
|
|
end
|
|
|
|
def initialize(username, path=nil)
|
|
return Ebooks::Model.new unless has_cloud?
|
|
@path = path || "corpus/#{username}.model"
|
|
if File.directory?(@path)
|
|
@path = File.join(@path, "#{username}.model")
|
|
end
|
|
super()
|
|
@basename = File.basename(@path)
|
|
end
|
|
|
|
# Create a model from JSON string
|
|
# @content [String] Ebooks-style JSON twitter archive
|
|
# @return [Ebooks::Boodoo::CloudModel]
|
|
def from_json(content)
|
|
log "Reading json corpus with length #{content.size}"
|
|
lines = JSON.parse(content).map do |tweet|
|
|
tweet['text']
|
|
end
|
|
consume_lines(lines)
|
|
end
|
|
|
|
def persist(public_id=nil)
|
|
public_id ||= @basename
|
|
log "Deleting old model ~~~FROM THE CLOUD~~~"
|
|
Cloudinary::Api.delete_resources(@basename, :resource_type=>:raw)
|
|
log "Uploading bot model ~~TO THE CLOUD~~"
|
|
res = Cloudinary::Uploader.upload(@path, :resource_type=>:raw, :public_id=>public_id, :invalidate=>true)
|
|
log "Upload complete!"
|
|
res
|
|
end
|
|
|
|
def persist!
|
|
persist(@basename)
|
|
end
|
|
|
|
def parse(content=nil)
|
|
props = Marshal.load(content)
|
|
end
|
|
|
|
def parse!(content=nil)
|
|
props = parse(content)
|
|
@tokens = props[:tokens]
|
|
@sentences = props[:sentences]
|
|
@mentions = props[:mentions]
|
|
@keywords = props[:keywords]
|
|
end
|
|
|
|
def save!
|
|
save(@path)
|
|
end
|
|
|
|
def fetch(url=nil)
|
|
url ||= @url
|
|
log "Fetching bot model ~~~FROM THE CLOUD~~~"
|
|
content = Cloudinary::Downloader.download(url)
|
|
if content.empty?
|
|
log "WARNING: bot model not found ~~~IN THE CLOUD~~~"
|
|
nil
|
|
else
|
|
log "Download complete!"
|
|
content
|
|
end
|
|
end
|
|
|
|
def fetch!
|
|
@content = fetch
|
|
end
|
|
end
|
|
|
|
class Ebooks::Boodoo::BoodooBot < Ebooks::Bot
|
|
$required_fields = ['consumer_key', 'consumer_secret',
|
|
'access_token', 'access_token_secret',
|
|
'bot_name', 'original']
|
|
|
|
# Unfollow a user -- OVERRIDE TO FIX TYPO
|
|
# @param user [String] username or user id
|
|
def unfollow(user, *args)
|
|
log "Unfollowing #{user}"
|
|
twitter.unfollow(user, *args)
|
|
end
|
|
|
|
# A rough error-catch/retry for rate limit, dupe fave, server timeouts
|
|
def catch_twitter
|
|
begin
|
|
yield
|
|
rescue Twitter::Error => error
|
|
@retries += 1
|
|
raise if @retries > @max_error_retries
|
|
if error.class == Twitter::Error::TooManyRequests
|
|
reset_in = error.rate_limit.reset_in
|
|
log "RATE: Going to sleep for ~#{reset_in / 60} minutes..."
|
|
sleep reset_in
|
|
retry
|
|
elsif error.class == Twitter::Error::Forbidden
|
|
# don't count "Already faved/followed" message against attempts
|
|
@retries -= 1 if error.to_s.include?("already")
|
|
log "WARN: #{error.to_s}"
|
|
return true
|
|
elsif ["execution", "capacity"].any?(&error.to_s.method(:include?))
|
|
log "ERR: Timeout?\n\t#{error}\nSleeping for #{@timeout_sleep} seconds..."
|
|
sleep @timeout_sleep
|
|
retry
|
|
else
|
|
log "Unhandled exception from Twitter: #{error.to_s}"
|
|
raise
|
|
end
|
|
end
|
|
end
|
|
|
|
# Override Ebooks::Bot#blacklisted? to ensure lower<=>lower check
|
|
def blacklisted?(username)
|
|
if @blacklist.map(&:downcase).include?(username.downcase)
|
|
true
|
|
else
|
|
false
|
|
end
|
|
end
|
|
|
|
# Follow new followers, unfollow lost followers
|
|
def follow_parity
|
|
followers = catch_twitter { twitter.followers(:count=>200).map(&:screen_name) }
|
|
following = catch_twitter { twitter.following(:count=>200).map(&:screen_name) }
|
|
to_follow = followers - following
|
|
to_unfollow = following - followers
|
|
twitter.follow(to_follow) unless to_follow.empty?
|
|
twitter.unfollow(to_unfollow) unless to_unfollow.empty?
|
|
@followers = followers
|
|
@following = following - to_unfollow
|
|
if !(to_follow.empty? || to_unfollow.empty?)
|
|
log "Followed #{to_follow.size}; unfollowed #{to_unfollow.size}."
|
|
end
|
|
end
|
|
|
|
def make_model!
|
|
log "Updating model: #{@model_path}"
|
|
Ebooks::Model.consume(@archive_path).save(@model_path)
|
|
log "Loading model..."
|
|
@model = Ebooks::Model.load(@model_path)
|
|
end
|
|
|
|
def can_run?
|
|
missing_fields.empty?
|
|
end
|
|
|
|
def missing_fields
|
|
$required_fields.select { |field|
|
|
# log "#{field} = #{send(field)}"
|
|
send(field).nil? || send(field).empty?
|
|
}
|
|
end
|
|
end
|