require 'twitter_ebooks' require 'cloudinary' require 'time_difference' include Ebooks ## Retweet check based on Really-Existing-RT practices class Ebooks::TweetMeta def is_retweet? tweet.retweeted_status? || !!tweet.text[/[RM]T ?[@:]/i] end end module Ebooks::Boodoo def self.make_Model(username: nil, path: nil, ignore_cloud: false) # return CloudModel unless Cloudinary is missing or instructed not to. if !ignore_cloud && has_cloud? CloudModel.new(username, path: path) else Model.new end end def self.make_Archive(username, path: nil, client: nil, content: nil, local: false, ignore_cloud: false) # return CloudArchive unless Cloudinary is missing or instructed not to. if !ignore_cloud && has_cloud? CloudArchive.new(username, path: path, client: client, content: content, local: local) else Archive.new(username, path, client) end end def age(since, now: Time.now, unit: :in_hours) since ||= Time.new(1986, 2, 8) unit = unit.to_sym TimeDifference.between(since, now).method(unit).call end def self.age(since, now: Time.now, unit: :in_hours) age(since, now, unit) end # check if we're configured to use Cloudinary for cloud storage def has_cloud? (ENV['CLOUDINARY_URL'].nil? || ENV['CLOUDINARY_URL'].empty?) ? false : true end # def self.has_cloud? # has_cloud? # end def in_cloud?(public_id, resource_type=:raw) return false if !has_cloud? begin Cloudinary::Api.resource(public_id, :resource_type=>resource_type) true rescue Cloudinary::Api::NotFound false end end # supports Ruby Range literal, Fixnum, or Float as string def parse_num(value) eval(value.to_s[/^\d+(?:\.{1,3})?\d*$/].to_s) end # Make expected/possible Range def parse_range(value) value = parse_num(value) if value.nil? value = nil elsif !value.respond_to?(:to_a) value = Range.new(value, value) end value end def obscure_curse(len) s = [] c = ['!', '@', '$', '%', '^', '&', '*'] len.times do s << c.sample end s.join('') end def obscure_curses(tweet) # TODO: Ignore banned terms that are part of @-mentions $banned_terms.each do |term| re = Regexp.new("\\b#{term}\\b", "i") tweet.gsub!(re, Ebooks::Boodoo.obscure_curse(term.size)) end tweet end def parse_array(value, array_splitter=nil) array_splitter ||= / *[,;]+ */ value.split(array_splitter).map(&:strip) end def new_client Twitter::REST::Client.new do |config| config.consumer_key = ENV['CONSUMER_KEY'] config.consumer_secret = ENV['CONSUMER_SECRET'] config.access_token = ENV['ACCESS_TOKEN'] config.access_token_secret = ENV['ACCESS_TOKEN_SECRET'] end end def minify_tweets(tweets) log "Minifying tweets..." tweets.map do |tweet| {id: tweet[:id], text: tweet[:text]} end end def jsonify(path, write_file: true, from_cloud: false, to_cloud: true, new_name: nil) basename = File.basename(path) name = basename.split('.')[0] ext = path.split('.')[-1] new_name ||= name new_path = "corpus/#{new_name}.json" lines = [] id = nil #TODO: Move this to its own method: find_corpus(basename) if from_cloud && in_cloud?(basename) log "Reading initial corpus file ~~~FROM CLOUD~~~" content = Cloudinary::Downloader.download(path, :resource_type=>:raw) else log "Reading local initial corpus file" content = File.read(path, :encoding => 'utf-8') end if ext.downcase == "json" log "Minifying JSON corpus at #{path}" lines = minify_tweets(JSON.parse(content, :symbolize_names=>true)) elsif ext.downcase == "csv" #from twitter archive log "Reading CSV corpus from #{path}" content = CSV.parse(content) header = content.shift text_col = header.index('text') id_col = header.index('tweet_id') lines = content.map do |tweet| id = tweet[id_col].empty? ? 0 : tweet[id_col] {id: id, text: tweet[text_col]} end else log "Reading plaintext corpus from #{path}" lines = content.split("\n").map do |line| {id: 0, text: line} end end File.open(new_path, 'w') do |f| log "Writing #{lines.length} lines to #{new_path}" f.write(JSON.generate(lines)) end if write_file #TODO: Save res["url"] to CloudArchive somehow? if to_cloud && has_cloud? public_id = new_path # log "Deleting JSON archive ~~~FROM THE CLOUD~~~" # Cloudinary::Api.delete_resources(public_id, :resource_type=>:raw) log "Uploading JSON archive ~~TO THE CLOUD~~" res = Cloudinary::Uploader.upload(new_path, :resource_type=>:raw, :public_id=>public_id, :invalidate=>true) log "Upload complete" {url: res["url"], lines: JSON.generate(lines)} else {url: nil, lines: JSON.generate(lines)} end end end class Ebooks::Archive def self.exist?(basename) File.exist?("corpus/#{basename}") end def parse(content=nil) content = content || @content || '[]' JSON.parse(content, symbolize_names: true) end def parse!(content=nil) @tweets = parse(content) end def minify minify_tweets(@tweets) end def minify! @tweets = minify_tweets(@tweets) end def persist(path=nil) path ||= @path log "Saving JSON archive locally..." File.open(path, 'w') do |f| f.write(JSON.pretty_generate(@tweets)) end log "Save complete!" @path end def persist! persist(@path) end def save(path=nil) persist(path) end def save! save(@path) end end class Ebooks::Boodoo::CloudArchive < Ebooks::Archive include Ebooks::Boodoo def self.exist?(username) begin Cloudinary::Api.resource("#{username}.json", :resource_type=>:raw) true rescue Cloudinary::Api::NotFound false end end def initialize(username, path: nil, client: nil, content: nil, local: false) # Otherwise duplicate a lot of super(), but also use ~~THE CLOUD~~ @username = username @path = path || "corpus/#{username}.json" if File.directory?(@path) @path = File.join(@path, "#{username}.json") end @basename = File.basename(@path) @client = client || new_client @url = Cloudinary::Utils.cloudinary_url(@basename, :resource_type=>:raw) @public_id = @basename if local || content @content = content || File.read(@path) else fetch! end parse! new_tweets = sync.class != IO persist if new_tweets if @tweets.empty? log "New archive for @#{@username} at #{@url}" else log "Currently #{@tweets.length} tweets for #{@username}" end end def persist(public_id=nil) public_id ||= @basename # log "Deleting out-dated archive ~~~FROM THE CLOUD~~~" # Cloudinary::Api.delete_resources(public_id, :resource_type=>:raw) log "Uploading JSON archive ~~TO THE CLOUD~~" res = Cloudinary::Uploader.upload(@path, :resource_type=>:raw, :public_id=>public_id, :invalidate=>true) @url = res["url"] @persisted = Time.now log "Upload complete!" res end def since_persisted Boodoo.age(@persisted, Time.now) end # Unused method? def save(path=nil, minify=true) path ||= @path output = minify ? JSON.generate(minify) : JSON.pretty_generate(@tweets) File.open(path, 'w') do |f| f.write(output) end end def fetch(url=nil) url ||= @url log "Fetching JSON archive ~~~FROM THE CLOUD~~~" content = Cloudinary::Downloader.download(url, :resource_type=>:raw) if content.empty? log "WARNING: JSON archive not found ~~~IN THE CLOUD~~~" @fetched = nil nil else log "Download complete!" @fetched = Time.now content end end def fetch! @content = fetch end def since_fetched Boodoo.age(@fetched, Time.now) end end class Ebooks::Model # add methods here to match Boodoo::CloudModel def self.parse(content) model = Model.new model.instance_eval do props = Marshal.load(content) @tokens = props[:tokens] @sentences = props[:sentences] @mentions = props[:mentions] @keywords = props[:keywords] end model end def self.from_json(content, is_path: nil) model = Model.new model.from_json(content, is_file) model end # Create a model from JSON string # @content [String/Array] Ebooks-style JSON twitter archive # @return [Ebooks::Model] def from_json(content, is_path: false) content = File.read(content, :encoding => 'utf-8') if is_path if content.respond_to?(:upcase) lines = JSON.parse(content).map do |tweet| tweet['text'] end else lines = content end log "Reading json corpus with #{lines.size} lines" consume_lines(lines) end def fetch(path=nil) path ||= @path if File.exist?(path) log "Fetching local bot model" content = File.read(@path, :encoding => 'utf-8') if !content.empty? log "local model fetched" return content end end log "WARNING: local bot model not found" return nil end def fetch! @content = fetch end def parse(content=nil) props = Marshal.load(content) end def parse!(content=nil) props = parse(content) @tokens = props[:tokens] @sentences = props[:sentences] @mentions = props[:mentions] @keywords = props[:keywords] end def save! save(@path) end def persist(path=nil) path ||= @path save(path) end def persist! persist end end class Ebooks::Boodoo::CloudModel < Ebooks::Model # Read a saved model from marshaled content instead of file # @param content [String] # @return [Ebooks::Boodoo::CloudModel] def self.parse(content) model = CloudModel.new model.instance_eval do props = Marshal.load(content) @tokens = props[:tokens] @sentences = props[:sentences] @mentions = props[:mentions] @keywords = props[:keywords] end model end def self.from_json(content, is_file) model = CloudModel.new model.from_json(content, is_file) model end def initialize(username, path: nil) @path = path || "corpus/#{username}.model" if File.directory?(@path) @path = File.join(@path, "#{username}.model") end super() @basename = File.basename(@path) @url = Cloudinary::Utils.cloudinary_url(@basename, :resource_type=>:raw) end def persist(public_id=nil) public_id ||= @basename log "Uploading bot model ~~TO THE CLOUD~~" res = Cloudinary::Uploader.upload(@path, :resource_type=>:raw, :public_id=>public_id, :invalidate=>true) @url = res["url"] log "Upload complete!" res end def persist! persist(@basename) end def fetch(url=nil) url ||= @url log "Fetching bot model ~~~FROM THE CLOUD~~~" content = Cloudinary::Downloader.download(url) if content.empty? log "WARNING: bot model not found ~~~IN THE CLOUD~~~" nil else log "Download complete!" content end end end class Ebooks::Boodoo::BoodooBot < Ebooks::Bot $required_fields = ['consumer_key', 'consumer_secret', 'access_token', 'access_token_secret', 'bot_name', 'original'] # Unfollow a user -- OVERRIDE TO FIX TYPO # @param user [String] username or user id def unfollow(user, *args) log "Unfollowing #{user}" twitter.unfollow(user, *args) end # A rough error-catch/retry for rate limit, dupe fave, server timeouts def catch_twitter begin yield rescue Twitter::Error => error @retries += 1 raise if @retries > @max_error_retries if error.class == Twitter::Error::TooManyRequests reset_in = error.rate_limit.reset_in log "RATE: Going to sleep for ~#{reset_in / 60} minutes..." sleep reset_in retry elsif error.class == Twitter::Error::Forbidden # don't count "Already faved/followed" message against attempts @retries -= 1 if error.to_s.include?("already") log "WARN: #{error.to_s}" return true elsif ["execution", "capacity"].any?(&error.to_s.method(:include?)) log "ERR: Timeout?\n\t#{error}\nSleeping for #{@timeout_sleep} seconds..." sleep @timeout_sleep retry else log "Unhandled exception from Twitter: #{error.to_s}" raise end end end # Override Ebooks::Bot#blacklisted? to ensure lower<=>lower check def blacklisted?(username) @blacklist.map(&:downcase).include?(username.downcase) end # Follow new followers, unfollow lost followers def follow_parity followers = catch_twitter { twitter.followers(:count=>200).map(&:screen_name) } following = catch_twitter { twitter.following(:count=>200).map(&:screen_name) } to_follow = followers - following to_unfollow = following - followers twitter.follow(to_follow) unless to_follow.empty? twitter.unfollow(to_unfollow) unless to_unfollow.empty? @followers = followers @following = following - to_unfollow if !(to_follow.empty? || to_unfollow.empty?) log "Followed #{to_follow.size}; unfollowed #{to_unfollow.size}." end end def make_model! log "Updating model: #{@model_path}" Ebooks::Model.consume(@archive_path).save(@model_path) log "Loading model..." @model = Ebooks::Model.load(@model_path) end def can_run? missing_fields.empty? end def missing_fields $required_fields.select { |field| # log "#{field} = #{send(field)}" send(field).nil? || send(field).empty? } end end