From 2a23cf22bd04086a589a5487b8f200325ef07072 Mon Sep 17 00:00:00 2001 From: Joel McCoy Date: Sun, 8 Mar 2015 18:12:39 -0700 Subject: [PATCH] Simple wrappers for Archive/Model creation; this whole thing needs to be re-written --- boodoo.rb | 61 ++++++++++++++++++++++++++++++++++++++++++------------- bots.rb | 13 ++++++------ 2 files changed, 53 insertions(+), 21 deletions(-) diff --git a/boodoo.rb b/boodoo.rb index 14a13d2..d82ed84 100644 --- a/boodoo.rb +++ b/boodoo.rb @@ -13,16 +13,43 @@ end module Ebooks::Boodoo - def self.age(since, now: Time.now, unit: :in_hours) + def self.make_Model(username: nil, path: nil, ignore_cloud: false) + # return CloudModel unless Cloudinary is missing or instructed not to. + if !ignore_cloud && has_cloud? + CloudModel.new(username, path) + else + Model.new + end + end + + def self.make_Archive(username, path: nil, client: nil, content: nil, local: false, ignore_cloud: false) + # return CloudArchive unless Cloudinary is missing or instructed not to. + if !ignore_cloud && has_cloud? + CloudArchive.new(username, path: path, client: client, content: content, local: local) + else + Archive.new(username, path, client) + end + end + + def age(since, now: Time.now, unit: :in_hours) + since |== Time.new(1986, 2, 8) unit = unit.to_sym TimeDifference.between(since, now).method(unit).call end + def self.age(since, now: Time.now, unit: :in_hours) + age(since, now, unit) + end + # check if we're configured to use Cloudinary for cloud storage def has_cloud? (ENV['CLOUDINARY_URL'].nil? || ENV['CLOUDINARY_URL'].empty?) ? false : true end + def self.has_cloud? + has_cloud? + end + def in_cloud?(public_id, resource_type=:raw) return false if !has_cloud? begin @@ -140,9 +167,9 @@ module Ebooks::Boodoo log "Uploading JSON archive ~~TO THE CLOUD~~" res = Cloudinary::Uploader.upload(new_path, :resource_type=>:raw, :public_id=>public_id, :invalidate=>true) log "Upload complete" - res["url"] + {url: res["url"], lines: JSON.generate(lines)} else - JSON.generate(lines) + {url: nil, lines: JSON.generate(lines)} end end end @@ -204,12 +231,8 @@ class Ebooks::Boodoo::CloudArchive < Ebooks::Archive end end - def initialize(username, path=nil, client=nil, options={}) - # Just bail on everything if we aren't using Cloudinary - return super(username, path, client) unless has_cloud? + def initialize(username, path: nil, client: nil, content: nil, local: false) # Otherwise duplicate a lot of super(), but also use ~~THE CLOUD~~ - local = options.delete(:local) || false - content = options.delete(:content) @username = username @path = path || "corpus/#{username}.json" if File.directory?(@path) @@ -242,10 +265,15 @@ class Ebooks::Boodoo::CloudArchive < Ebooks::Archive log "Uploading JSON archive ~~TO THE CLOUD~~" res = Cloudinary::Uploader.upload(@path, :resource_type=>:raw, :public_id=>public_id, :invalidate=>true) @url = res["url"] + @persisted = Time.now log "Upload complete!" res end + def since_persisted + Boodoo.age(@persisted, Time.now) + end + # Unused method? def save(path=nil, minify=true) path ||= @path @@ -261,9 +289,11 @@ class Ebooks::Boodoo::CloudArchive < Ebooks::Archive content = Cloudinary::Downloader.download(url, :resource_type=>:raw) if content.empty? log "WARNING: JSON archive not found ~~~IN THE CLOUD~~~" + @fetched = nil nil else log "Download complete!" + @fetched = Time.now content end end @@ -271,6 +301,10 @@ class Ebooks::Boodoo::CloudArchive < Ebooks::Archive def fetch! @content = fetch end + + def since_fetched + Boodoo.age(@fetched, Time.now) + end end class Ebooks::Model @@ -287,7 +321,7 @@ class Ebooks::Model model end - def self.from_json(content, is_file) + def self.from_json(content, is_path: nil) model = Model.new model.from_json(content, is_file) model @@ -295,9 +329,9 @@ class Ebooks::Model # Create a model from JSON string # @content [String/Array] Ebooks-style JSON twitter archive - # @return [Ebooks::Boodoo::CloudModel] - def from_json(content, is_file=false) - content = File.read(content, :encoding => 'utf-8') if is_file + # @return [Ebooks::Model] + def from_json(content, is_path: false) + content = File.read(content, :encoding => 'utf-8') if is_path if content.respond_to?(:upcase) lines = JSON.parse(content).map do |tweet| tweet['text'] @@ -375,8 +409,7 @@ class Ebooks::Boodoo::CloudModel < Ebooks::Model model end - def initialize(username, path=nil) - return super() unless has_cloud? + def initialize(username, path: nil) @path = path || "corpus/#{username}.model" if File.directory?(@path) @path = File.join(@path, "#{username}.model") diff --git a/bots.rb b/bots.rb index a0c4ae2..15a2ae7 100644 --- a/bots.rb +++ b/bots.rb @@ -6,7 +6,7 @@ require 'time_difference' include Ebooks::Boodoo # Read defaults and lay env vars on top: -SETTINGS = Dotenv.load('secrets.env').merge(ENV) +SETTINGS = Dotenv.load('defaults.env').merge(ENV) # Information about a particular Twitter user we know class UserInfo @@ -73,14 +73,13 @@ class BoodooBot log "This can run!" if initial_corpus_file.blank? || in_cloud?(File.basename(@archive_path)) log "Skipping initial corpus consumption" - @archive = CloudArchive.new(original, archive_path, twitter) - @model = CloudModel.new(@original, @model_path).from_json(@archive_path, true) + @archive = Boodoo.make_Archive(original, archive_path, twitter) + @model = Boodoo.make_Model(username: original, path: model_path).from_json(archive_path, is_path: true) else log "Consuming initial corpus..." - archive_json = jsonify(initial_corpus_file, :from_cloud=>has_cloud?, :new_name=>original, :to_cloud=>false) - log "archive_json length: #{archive_json.size}" - @archive = CloudArchive.new(original, archive_path, twitter, :local=>true, :content=>archive_json) - # @model = CloudModel.new(original, model_path).from_json(archive_json, false) + archive_json = jsonify(initial_corpus_file, from_cloud: has_cloud?, new_name: original, to_cloud: false) + @archive = Boodoo.make_Archive(original, path: archive_path, client: twitter, local: true, content: archive_json.lines) + @model = Boodoo.make_Model(username: original, path: model_path).from_json(archive_json, is_path: false) end else missing_fields.each {|missing|