diff --git a/boodoo.rb b/boodoo.rb index b55c21b..14a13d2 100644 --- a/boodoo.rb +++ b/boodoo.rb @@ -1,5 +1,6 @@ require 'twitter_ebooks' require 'cloudinary' +require 'time_difference' include Ebooks @@ -11,11 +12,27 @@ class Ebooks::TweetMeta end module Ebooks::Boodoo + + def self.age(since, now: Time.now, unit: :in_hours) + unit = unit.to_sym + TimeDifference.between(since, now).method(unit).call + end + # check if we're configured to use Cloudinary for cloud storage def has_cloud? (ENV['CLOUDINARY_URL'].nil? || ENV['CLOUDINARY_URL'].empty?) ? false : true end + def in_cloud?(public_id, resource_type=:raw) + return false if !has_cloud? + begin + Cloudinary::Api.resource(public_id, :resource_type=>resource_type) + true + rescue Cloudinary::Api::NotFound + false + end + end + # supports Ruby Range literal, Fixnum, or Float as string def parse_num(value) eval(value.to_s[/^\d+(?:\.{1,3})?\d*$/].to_s) @@ -55,12 +72,12 @@ module Ebooks::Boodoo value.split(array_splitter).map(&:strip) end - def make_client + def new_client Twitter::REST::Client.new do |config| - config.consumer_key = @consumer_key - config.consumer_secret = @consumer_secret - config.access_token = @access_token - config.access_token_secret = @access_token_secret + config.consumer_key = ENV['CONSUMER_KEY'] + config.consumer_secret = ENV['CONSUMER_SECRET'] + config.access_token = ENV['ACCESS_TOKEN'] + config.access_token_secret = ENV['ACCESS_TOKEN_SECRET'] end end @@ -71,15 +88,24 @@ module Ebooks::Boodoo end end - def jsonify(path, new_name=nil) - name = File.basename(path).split('.')[0] + def jsonify(path, write_file: true, from_cloud: false, to_cloud: true, new_name: nil) + basename = File.basename(path) + name = basename.split('.')[0] ext = path.split('.')[-1] new_name ||= name - new_path = new_name + ".json" + + new_path = "corpus/#{new_name}.json" lines = [] id = nil - content = File.read(path, :encoding => 'utf-8') + #TODO: Move this to its own method: find_corpus(basename) + if from_cloud && in_cloud?(basename) + log "Reading initial corpus file ~~~FROM CLOUD~~~" + content = Cloudinary::Downloader.download(path, :resource_type=>:raw) + else + log "Reading local initial corpus file" + content = File.read(path, :encoding => 'utf-8') + end if ext.downcase == "json" log "Minifying JSON corpus at #{path}" @@ -104,37 +130,100 @@ module Ebooks::Boodoo File.open(new_path, 'w') do |f| log "Writing #{lines.length} lines to #{new_path}" f.write(JSON.generate(lines)) - end + end if write_file - if has_cloud? + #TODO: Save res["url"] to CloudArchive somehow? + if to_cloud && has_cloud? public_id = new_path # log "Deleting JSON archive ~~~FROM THE CLOUD~~~" # Cloudinary::Api.delete_resources(public_id, :resource_type=>:raw) log "Uploading JSON archive ~~TO THE CLOUD~~" res = Cloudinary::Uploader.upload(new_path, :resource_type=>:raw, :public_id=>public_id, :invalidate=>true) - log "Upload complete!" + log "Upload complete" res["url"] else - log "Can't find ~~~THE CLOUD~~~, not uploading JSON archive." - nil + JSON.generate(lines) end end end +class Ebooks::Archive + def self.exist?(basename) + File.exist?("corpus/#{basename}") + end + + def parse(content=nil) + content = content || @content || '[]' + JSON.parse(content, symbolize_names: true) + end + + def parse!(content=nil) + @tweets = parse(content) + end + + def minify + minify_tweets(@tweets) + end + + def minify! + @tweets = minify_tweets(@tweets) + end + + def persist(path=nil) + path ||= @path + log "Saving JSON archive locally..." + File.open(path, 'w') do |f| + f.write(JSON.pretty_generate(@tweets)) + end + log "Save complete!" + @path + end + + def persist! + persist(@path) + end + + def save(path=nil) + persist(path) + end + + def save! + save(@path) + end +end + class Ebooks::Boodoo::CloudArchive < Ebooks::Archive - def initialize(username, path=nil, client=nil) + include Ebooks::Boodoo + + def self.exist?(username) + begin + Cloudinary::Api.resource("#{username}.json", :resource_type=>:raw) + true + rescue Cloudinary::Api::NotFound + false + end + end + + def initialize(username, path=nil, client=nil, options={}) # Just bail on everything if we aren't using Cloudinary - return super unless has_cloud? + return super(username, path, client) unless has_cloud? # Otherwise duplicate a lot of super(), but also use ~~THE CLOUD~~ + local = options.delete(:local) || false + content = options.delete(:content) @username = username @path = path || "corpus/#{username}.json" if File.directory?(@path) @path = File.join(@path, "#{username}.json") end @basename = File.basename(@path) - @client = client || Boodoo.make_client + @client = client || new_client @url = Cloudinary::Utils.cloudinary_url(@basename, :resource_type=>:raw) - fetch! + @public_id = @basename + if local || content + @content = content || File.read(@path) + else + fetch! + end parse! new_tweets = sync.class != IO persist if new_tweets @@ -146,14 +235,6 @@ class Ebooks::Boodoo::CloudArchive < Ebooks::Archive end end - def minify! - @tweets = minify_tweets(@tweets) - end - - def minify - minify_tweets(@tweets) - end - def persist(public_id=nil) public_id ||= @basename # log "Deleting out-dated archive ~~~FROM THE CLOUD~~~" @@ -165,19 +246,7 @@ class Ebooks::Boodoo::CloudArchive < Ebooks::Archive res end - def persist! - persist(@basename) - end - - def parse(content=nil) - content = content || @content || '[]' - JSON.parse(content, symbolize_names: true) - end - - def parse!(content=nil) - @tweets = parse(content) - end - + # Unused method? def save(path=nil, minify=true) path ||= @path output = minify ? JSON.generate(minify) : JSON.pretty_generate(@tweets) @@ -186,10 +255,6 @@ class Ebooks::Boodoo::CloudArchive < Ebooks::Archive end end - def save! - save(@path) - end - def fetch(url=nil) url ||= @url log "Fetching JSON archive ~~~FROM THE CLOUD~~~" @@ -208,6 +273,86 @@ class Ebooks::Boodoo::CloudArchive < Ebooks::Archive end end +class Ebooks::Model + # add methods here to match Boodoo::CloudModel + def self.parse(content) + model = Model.new + model.instance_eval do + props = Marshal.load(content) + @tokens = props[:tokens] + @sentences = props[:sentences] + @mentions = props[:mentions] + @keywords = props[:keywords] + end + model + end + + def self.from_json(content, is_file) + model = Model.new + model.from_json(content, is_file) + model + end + + # Create a model from JSON string + # @content [String/Array] Ebooks-style JSON twitter archive + # @return [Ebooks::Boodoo::CloudModel] + def from_json(content, is_file=false) + content = File.read(content, :encoding => 'utf-8') if is_file + if content.respond_to?(:upcase) + lines = JSON.parse(content).map do |tweet| + tweet['text'] + end + else + lines = content + end + log "Reading json corpus with #{lines.size} lines" + consume_lines(lines) + end + + def fetch(path=nil) + path ||= @path + if File.exist?(path) + log "Fetching local bot model" + content = File.read(@path, :encoding => 'utf-8') + if !content.empty? + log "local model fetched" + return content + end + end + log "WARNING: local bot model not found" + return nil + end + + def fetch! + @content = fetch + end + + def parse(content=nil) + props = Marshal.load(content) + end + + def parse!(content=nil) + props = parse(content) + @tokens = props[:tokens] + @sentences = props[:sentences] + @mentions = props[:mentions] + @keywords = props[:keywords] + end + + def save! + save(@path) + end + + def persist(path=nil) + path ||= @path + save(path) + end + + def persist! + persist + end +end + class Ebooks::Boodoo::CloudModel < Ebooks::Model # Read a saved model from marshaled content instead of file # @param content [String] @@ -231,7 +376,7 @@ class Ebooks::Boodoo::CloudModel < Ebooks::Model end def initialize(username, path=nil) - return Ebooks::Model.new unless has_cloud? + return super() unless has_cloud? @path = path || "corpus/#{username}.model" if File.directory?(@path) @path = File.join(@path, "#{username}.model") @@ -241,26 +386,8 @@ class Ebooks::Boodoo::CloudModel < Ebooks::Model @url = Cloudinary::Utils.cloudinary_url(@basename, :resource_type=>:raw) end - # Create a model from JSON string - # @content [String/Array] Ebooks-style JSON twitter archive (pre-parsed) - # @return [Ebooks::Boodoo::CloudModel] - def from_json(content, is_file=false) - content = File.read(content, :encoding=>'utf-8') if is_file - if content.respond_to?(:upcase) - lines = JSON.parse(content).map do |tweet| - tweet['text'] - end - else - lines = content - end - log "Reading json corpus with #{lines.size} lines" - consume_lines(lines) - end - def persist(public_id=nil) public_id ||= @basename - # log "Deleting old model ~~~FROM THE CLOUD~~~" - # Cloudinary::Api.delete_resources(@basename, :resource_type=>:raw) log "Uploading bot model ~~TO THE CLOUD~~" res = Cloudinary::Uploader.upload(@path, :resource_type=>:raw, :public_id=>public_id, :invalidate=>true) @url = res["url"] @@ -272,22 +399,6 @@ class Ebooks::Boodoo::CloudModel < Ebooks::Model persist(@basename) end - def parse(content=nil) - props = Marshal.load(content) - end - - def parse!(content=nil) - props = parse(content) - @tokens = props[:tokens] - @sentences = props[:sentences] - @mentions = props[:mentions] - @keywords = props[:keywords] - end - - def save! - save(@path) - end - def fetch(url=nil) url ||= @url log "Fetching bot model ~~~FROM THE CLOUD~~~" @@ -300,10 +411,6 @@ class Ebooks::Boodoo::CloudModel < Ebooks::Model content end end - - def fetch! - @content = fetch - end end class Ebooks::Boodoo::BoodooBot < Ebooks::Bot diff --git a/bots.rb b/bots.rb index 373d96b..a0c4ae2 100644 --- a/bots.rb +++ b/bots.rb @@ -6,7 +6,7 @@ require 'time_difference' include Ebooks::Boodoo # Read defaults and lay env vars on top: -SETTINGS = Dotenv.load('defaults.env').merge(ENV) +SETTINGS = Dotenv.load('secrets.env').merge(ENV) # Information about a particular Twitter user we know class UserInfo @@ -71,8 +71,17 @@ class BoodooBot if can_run? log "This can run!" - @archive = CloudArchive.new(original, archive_path, twitter) - @model = CloudModel.new(@original, @model_path).from_json(@archive_path, true) + if initial_corpus_file.blank? || in_cloud?(File.basename(@archive_path)) + log "Skipping initial corpus consumption" + @archive = CloudArchive.new(original, archive_path, twitter) + @model = CloudModel.new(@original, @model_path).from_json(@archive_path, true) + else + log "Consuming initial corpus..." + archive_json = jsonify(initial_corpus_file, :from_cloud=>has_cloud?, :new_name=>original, :to_cloud=>false) + log "archive_json length: #{archive_json.size}" + @archive = CloudArchive.new(original, archive_path, twitter, :local=>true, :content=>archive_json) + # @model = CloudModel.new(original, model_path).from_json(archive_json, false) + end else missing_fields.each {|missing| log "Can't run without #{missing}"