ebooks_example/boodoo.rb

require 'twitter_ebooks'
require 'cloudinary'
require 'time_difference'

include Ebooks

## Retweet check based on Really-Existing-RT practices
class Ebooks::TweetMeta
  def is_retweet?
    tweet.retweeted_status? || !!tweet.text[/[RM]T ?[@:]/i]
  end
end

module Ebooks::Boodoo

  def self.make_Model(username: nil, path: nil, ignore_cloud: false)
    # return CloudModel unless Cloudinary is missing or instructed not to.
    if !ignore_cloud && has_cloud?
      CloudModel.new(username, path: path)
    else
      Model.new
    end
  end

  def self.make_Archive(username, path: nil, client: nil, content: nil, local: false, ignore_cloud: false)
    # return CloudArchive unless Cloudinary is missing or instructed not to.
    if !ignore_cloud && has_cloud?
      CloudArchive.new(username, path: path, client: client, content: content, local: local)
    else
      Archive.new(username, path, client)
    end
  end

  def age(since, now: Time.now, unit: :in_hours)
    since ||= Time.new(1986, 2, 8)
    unit = unit.to_sym
    TimeDifference.between(since, now).method(unit).call
  end

  def self.age(since, now: Time.now, unit: :in_hours)
    age(since, now, unit)
  end

  # check if we're configured to use Cloudinary for cloud storage
  def has_cloud?
    (ENV['CLOUDINARY_URL'].nil? || ENV['CLOUDINARY_URL'].empty?) ? false : true
  end

  # def self.has_cloud?
  #   has_cloud?
  # end

  def in_cloud?(public_id, resource_type=:raw)
    return false if !has_cloud?
    begin
      Cloudinary::Api.resource(public_id, :resource_type=>resource_type)
      true
    rescue Cloudinary::Api::NotFound
      false
    end
  end

  # supports Ruby Range literal, Fixnum, or Float as string
  def parse_num(value)
    eval(value.to_s[/^\d+(?:\.{1,3})?\d*$/].to_s)
  end

  # Make expected/possible Range
  def parse_range(value)
    value = parse_num(value)
    if value.nil?
      value = nil
    elsif !value.respond_to?(:to_a)
      value = Range.new(value, value)
    end
    value
  end

  def obscure_curse(len)
    s = []
    c = ['!', '@', '$', '%', '^', '&', '*']
    len.times do
      s << c.sample
    end
    s.join('')
  end

  def obscure_curses(tweet)
    # TODO: Ignore banned terms that are part of @-mentions
    $banned_terms.each do |term|
      re = Regexp.new("\\b#{term}\\b", "i")
      tweet.gsub!(re, Ebooks::Boodoo.obscure_curse(term.size))
    end
    tweet
  end

  def parse_array(value, array_splitter=nil)
    array_splitter ||= / *[,;]+ */
    value.split(array_splitter).map(&:strip)
  end

  def new_client
    Twitter::REST::Client.new do |config|
      config.consumer_key = ENV['CONSUMER_KEY']
      config.consumer_secret = ENV['CONSUMER_SECRET']
      config.access_token = ENV['ACCESS_TOKEN']
      config.access_token_secret = ENV['ACCESS_TOKEN_SECRET']
    end
  end

  def minify_tweets(tweets)
    log "Minifying tweets..."
    tweets.map do |tweet|
      {id: tweet[:id], text: tweet[:text]}
    end
  end

  def jsonify(path, write_file: true, from_cloud: false, to_cloud: true, new_name: nil)
    basename = File.basename(path)
    name = basename.split('.')[0]
    ext = path.split('.')[-1]
    new_name ||= name

    new_path = "corpus/#{new_name}.json"
    lines = []
    id = nil

    #TODO: Move this to its own method: find_corpus(basename)
    if from_cloud && in_cloud?(basename)
      log "Reading initial corpus file ~~~FROM CLOUD~~~"
      content = Cloudinary::Downloader.download(path, :resource_type=>:raw)
    else
      log "Reading local initial corpus file"
      content = File.read(path, :encoding => 'utf-8')
    end

    if ext.downcase == "json"
      log "Minifying JSON corpus at #{path}"
      lines = minify_tweets(JSON.parse(content, :symbolize_names=>true))
    elsif ext.downcase == "csv" #from twitter archive
      log "Reading CSV corpus from #{path}"
      content = CSV.parse(content)
      header = content.shift
      text_col = header.index('text')
      id_col = header.index('tweet_id')
      lines = content.map do |tweet|
        id = tweet[id_col].empty? ? 0 : tweet[id_col]
        {id: id, text: tweet[text_col]}
      end
    else
      log "Reading plaintext corpus from #{path}"
      lines = content.split("\n").map do |line|
        {id: 0, text: line}
      end
    end

    File.open(new_path, 'w') do |f|
      log "Writing #{lines.length} lines to #{new_path}"
      f.write(JSON.generate(lines))
    end if write_file

    #TODO: Save res["url"] to CloudArchive somehow?
    if to_cloud && has_cloud?
      public_id = new_path
      # log "Deleting JSON archive ~~~FROM THE CLOUD~~~"
      # Cloudinary::Api.delete_resources(public_id, :resource_type=>:raw)
      log "Uploading JSON archive ~~TO THE CLOUD~~"
      res = Cloudinary::Uploader.upload(new_path, :resource_type=>:raw, :public_id=>public_id, :invalidate=>true)
      log "Upload complete"
      {url: res["url"], lines: JSON.generate(lines)}
    else
      {url: nil, lines: JSON.generate(lines)}
    end
  end
end

class Ebooks::Archive
  def self.exist?(basename)
    File.exist?("corpus/#{basename}")
  end

  def parse(content=nil)
    content = content || @content || '[]'
    JSON.parse(content, symbolize_names: true)
  end

  def parse!(content=nil)
    @tweets = parse(content)
  end

 def minify
    minify_tweets(@tweets)
  end

  def minify!
    @tweets = minify_tweets(@tweets)
  end

 def persist(path=nil)
    path ||= @path
    log "Saving JSON archive locally..."
    File.open(path, 'w') do |f|
      f.write(JSON.pretty_generate(@tweets))
    end
    log "Save complete!"
    @path
  end

  def persist!
    persist(@path)
  end

  def save(path=nil)
    persist(path)
  end

  def save!
    save(@path)
  end
end

class Ebooks::Boodoo::CloudArchive < Ebooks::Archive
  include Ebooks::Boodoo

  def self.exist?(username)
    begin
      Cloudinary::Api.resource("#{username}.json", :resource_type=>:raw)
      true
    rescue Cloudinary::Api::NotFound
      false
    end
  end

  def initialize(username, path: nil, client: nil, content: nil, local: false)
    # Otherwise duplicate a lot of super(), but also use ~~THE CLOUD~~
    @username = username
    @path = path || "corpus/#{username}.json"
    if File.directory?(@path)
      @path = File.join(@path, "#{username}.json")
    end
    @basename = File.basename(@path)
    @client = client || new_client
    @url = Cloudinary::Utils.cloudinary_url(@basename, :resource_type=>:raw)
    @public_id = @basename
    if local || content
      @content = content || File.read(@path)
    else
      fetch!
    end
    parse!
    new_tweets = sync.class != IO
    persist if new_tweets

    if @tweets.empty?
      log "New archive for @#{@username} at #{@url}"
    else
      log "Currently #{@tweets.length} tweets for #{@username}"
    end
  end

  def persist(public_id=nil)
    public_id ||= @basename
    # log "Deleting out-dated archive ~~~FROM THE CLOUD~~~"
    # Cloudinary::Api.delete_resources(public_id, :resource_type=>:raw)
    log "Uploading JSON archive ~~TO THE CLOUD~~"
    res = Cloudinary::Uploader.upload(@path, :resource_type=>:raw, :public_id=>public_id, :invalidate=>true)
    @url = res["url"]
    @persisted = Time.now
    log "Upload complete!"
    res
  end

  def since_persisted
    Boodoo.age(@persisted, Time.now)
  end

  # Unused method?
  def save(path=nil, minify=true)
    path ||= @path
    output = minify ? JSON.generate(minify) : JSON.pretty_generate(@tweets)
    File.open(path, 'w') do |f|
      f.write(output)
    end
  end

  def fetch(url=nil)
    url ||= @url
    log "Fetching JSON archive ~~~FROM THE CLOUD~~~"
    content = Cloudinary::Downloader.download(url, :resource_type=>:raw)
    if content.empty?
      log "WARNING: JSON archive not found ~~~IN THE CLOUD~~~"
      @fetched = nil
      nil
    else
      log "Download complete!"
      @fetched = Time.now
      content
    end
  end

  def fetch!
    @content = fetch
  end

  def since_fetched
    Boodoo.age(@fetched, Time.now)
  end
end

class Ebooks::Model
  # add methods here to match Boodoo::CloudModel
  def self.parse(content)
    model = Model.new
    model.instance_eval do
      props = Marshal.load(content)
      @tokens = props[:tokens]
      @sentences = props[:sentences]
      @mentions = props[:mentions]
      @keywords = props[:keywords]
    end
    model
  end

  def self.from_json(content, is_path: nil)
    model = Model.new
    model.from_json(content, is_file)
    model
  end

  # Create a model from JSON string
  # @content [String/Array] Ebooks-style JSON twitter archive
  # @return [Ebooks::Model]
  def from_json(content, is_path: false)
    content = File.read(content, :encoding => 'utf-8') if is_path
    if content.respond_to?(:upcase)
      lines = JSON.parse(content).map do |tweet|
        tweet['text']
      end
    else
      lines = content
    end
    log "Reading json corpus with #{lines.size} lines"
    consume_lines(lines)
  end

  def fetch(path=nil)
    path ||= @path
    if File.exist?(path)
      log "Fetching local bot model"
      content = File.read(@path, :encoding => 'utf-8')
      if !content.empty?
        log "local model fetched"
        return content
      end
    end
    log "WARNING: local bot model not found"
    return nil
  end

  def fetch!
    @content = fetch
  end

  def parse(content=nil)
    props = Marshal.load(content)
  end

  def parse!(content=nil)
    props = parse(content)
    @tokens = props[:tokens]
    @sentences = props[:sentences]
    @mentions = props[:mentions]
    @keywords = props[:keywords]
  end

  def save!
    save(@path)
  end

  def persist(path=nil)
    path ||= @path
    save(path)
  end

  def persist!
    persist
  end
end

class Ebooks::Boodoo::CloudModel < Ebooks::Model
  # Read a saved model from marshaled content instead of file
  # @param content [String]
  # @return [Ebooks::Boodoo::CloudModel]
  def self.parse(content)
    model = CloudModel.new
    model.instance_eval do
      props = Marshal.load(content)
      @tokens = props[:tokens]
      @sentences = props[:sentences]
      @mentions = props[:mentions]
      @keywords = props[:keywords]
    end
    model
  end

  def self.from_json(content, is_file)
    model = CloudModel.new
    model.from_json(content, is_file)
    model
  end

  def initialize(username, path: nil)
    @path = path || "corpus/#{username}.model"
    if File.directory?(@path)
      @path = File.join(@path, "#{username}.model")
    end
    super()
    @basename = File.basename(@path)
    @url = Cloudinary::Utils.cloudinary_url(@basename, :resource_type=>:raw)
  end

  def persist(public_id=nil)
    public_id ||= @basename
    log "Uploading bot model ~~TO THE CLOUD~~"
    res = Cloudinary::Uploader.upload(@path, :resource_type=>:raw, :public_id=>public_id, :invalidate=>true)
    @url = res["url"]
    log "Upload complete!"
    res
  end

  def persist!
    persist(@basename)
  end

  def fetch(url=nil)
    url ||= @url
    log "Fetching bot model ~~~FROM THE CLOUD~~~"
    content = Cloudinary::Downloader.download(url)
    if content.empty?
      log "WARNING: bot model not found ~~~IN THE CLOUD~~~"
      nil
    else
      log "Download complete!"
      content
    end
  end
end

class Ebooks::Boodoo::BoodooBot < Ebooks::Bot
  $required_fields = ['consumer_key', 'consumer_secret',
                      'access_token', 'access_token_secret',
                      'bot_name', 'original']

  # Unfollow a user -- OVERRIDE TO FIX TYPO
  # @param user [String] username or user id
  def unfollow(user, *args)
    log "Unfollowing #{user}"
    twitter.unfollow(user, *args)
  end

  # A rough error-catch/retry for rate limit, dupe fave, server timeouts
  def catch_twitter
    begin
      yield
    rescue Twitter::Error => error
      @retries += 1
      raise if @retries > @max_error_retries
      if error.class == Twitter::Error::TooManyRequests
        reset_in = error.rate_limit.reset_in
        log "RATE: Going to sleep for ~#{reset_in / 60} minutes..."
        sleep reset_in
        retry
      elsif error.class == Twitter::Error::Forbidden
        # don't count "Already faved/followed" message against attempts
        @retries -= 1 if error.to_s.include?("already")
        log "WARN: #{error.to_s}"
        return true
      elsif ["execution", "capacity"].any?(&error.to_s.method(:include?))
        log "ERR: Timeout?\n\t#{error}\nSleeping for #{@timeout_sleep} seconds..."
        sleep @timeout_sleep
        retry
      else
        log "Unhandled exception from Twitter: #{error.to_s}"
        raise
      end
    end
  end

  # Override Ebooks::Bot#blacklisted? to ensure lower<=>lower check
  def blacklisted?(username)
    @blacklist.map(&:downcase).include?(username.downcase)
  end

  # Follow new followers, unfollow lost followers
  def follow_parity
    followers = catch_twitter { twitter.followers(:count=>200).map(&:screen_name) }
    following = catch_twitter { twitter.following(:count=>200).map(&:screen_name) }
    to_follow = followers - following
    to_unfollow = following - followers
    twitter.follow(to_follow) unless to_follow.empty?
    twitter.unfollow(to_unfollow) unless to_unfollow.empty?
    @followers = followers
    @following = following - to_unfollow
    if !(to_follow.empty? || to_unfollow.empty?)
      log "Followed #{to_follow.size}; unfollowed #{to_unfollow.size}."
    end
  end

  def make_model!
    log "Updating model: #{@model_path}"
    Ebooks::Model.consume(@archive_path).save(@model_path)
    log "Loading model..."
    @model = Ebooks::Model.load(@model_path)
  end

  def can_run?
    missing_fields.empty?
  end

  def missing_fields
    $required_fields.select { |field|
      # log "#{field} = #{send(field)}"
      send(field).nil? || send(field).empty?
    }
  end
end