Working on Cloud/No-Cloud in parallel:

Ebooks::Archive and Ebooks::Model set as fallbacks for Cloud equivalents -- ROUGH COMMIT
Add Boodoo.age method ("since"), use keyword arguments in some method signatures -- ROUGH COMMIT
This commit is contained in:
Joel McCoy
2015-02-24 19:05:10 -05:00
parent b00dbab1a5
commit d369371ce0
2 changed files with 201 additions and 85 deletions

271
boodoo.rb
View File

@@ -1,5 +1,6 @@
require 'twitter_ebooks'
require 'cloudinary'
require 'time_difference'
include Ebooks
@@ -11,11 +12,27 @@ class Ebooks::TweetMeta
end
module Ebooks::Boodoo
def self.age(since, now: Time.now, unit: :in_hours)
unit = unit.to_sym
TimeDifference.between(since, now).method(unit).call
end
# check if we're configured to use Cloudinary for cloud storage
def has_cloud?
(ENV['CLOUDINARY_URL'].nil? || ENV['CLOUDINARY_URL'].empty?) ? false : true
end
def in_cloud?(public_id, resource_type=:raw)
return false if !has_cloud?
begin
Cloudinary::Api.resource(public_id, :resource_type=>resource_type)
true
rescue Cloudinary::Api::NotFound
false
end
end
# supports Ruby Range literal, Fixnum, or Float as string
def parse_num(value)
eval(value.to_s[/^\d+(?:\.{1,3})?\d*$/].to_s)
@@ -55,12 +72,12 @@ module Ebooks::Boodoo
value.split(array_splitter).map(&:strip)
end
def make_client
def new_client
Twitter::REST::Client.new do |config|
config.consumer_key = @consumer_key
config.consumer_secret = @consumer_secret
config.access_token = @access_token
config.access_token_secret = @access_token_secret
config.consumer_key = ENV['CONSUMER_KEY']
config.consumer_secret = ENV['CONSUMER_SECRET']
config.access_token = ENV['ACCESS_TOKEN']
config.access_token_secret = ENV['ACCESS_TOKEN_SECRET']
end
end
@@ -71,15 +88,24 @@ module Ebooks::Boodoo
end
end
def jsonify(path, new_name=nil)
name = File.basename(path).split('.')[0]
def jsonify(path, write_file: true, from_cloud: false, to_cloud: true, new_name: nil)
basename = File.basename(path)
name = basename.split('.')[0]
ext = path.split('.')[-1]
new_name ||= name
new_path = new_name + ".json"
new_path = "corpus/#{new_name}.json"
lines = []
id = nil
content = File.read(path, :encoding => 'utf-8')
#TODO: Move this to its own method: find_corpus(basename)
if from_cloud && in_cloud?(basename)
log "Reading initial corpus file ~~~FROM CLOUD~~~"
content = Cloudinary::Downloader.download(path, :resource_type=>:raw)
else
log "Reading local initial corpus file"
content = File.read(path, :encoding => 'utf-8')
end
if ext.downcase == "json"
log "Minifying JSON corpus at #{path}"
@@ -104,37 +130,100 @@ module Ebooks::Boodoo
File.open(new_path, 'w') do |f|
log "Writing #{lines.length} lines to #{new_path}"
f.write(JSON.generate(lines))
end
end if write_file
if has_cloud?
#TODO: Save res["url"] to CloudArchive somehow?
if to_cloud && has_cloud?
public_id = new_path
# log "Deleting JSON archive ~~~FROM THE CLOUD~~~"
# Cloudinary::Api.delete_resources(public_id, :resource_type=>:raw)
log "Uploading JSON archive ~~TO THE CLOUD~~"
res = Cloudinary::Uploader.upload(new_path, :resource_type=>:raw, :public_id=>public_id, :invalidate=>true)
log "Upload complete!"
log "Upload complete"
res["url"]
else
log "Can't find ~~~THE CLOUD~~~, not uploading JSON archive."
nil
JSON.generate(lines)
end
end
end
class Ebooks::Archive
def self.exist?(basename)
File.exist?("corpus/#{basename}")
end
def parse(content=nil)
content = content || @content || '[]'
JSON.parse(content, symbolize_names: true)
end
def parse!(content=nil)
@tweets = parse(content)
end
def minify
minify_tweets(@tweets)
end
def minify!
@tweets = minify_tweets(@tweets)
end
def persist(path=nil)
path ||= @path
log "Saving JSON archive locally..."
File.open(path, 'w') do |f|
f.write(JSON.pretty_generate(@tweets))
end
log "Save complete!"
@path
end
def persist!
persist(@path)
end
def save(path=nil)
persist(path)
end
def save!
save(@path)
end
end
class Ebooks::Boodoo::CloudArchive < Ebooks::Archive
def initialize(username, path=nil, client=nil)
include Ebooks::Boodoo
def self.exist?(username)
begin
Cloudinary::Api.resource("#{username}.json", :resource_type=>:raw)
true
rescue Cloudinary::Api::NotFound
false
end
end
def initialize(username, path=nil, client=nil, options={})
# Just bail on everything if we aren't using Cloudinary
return super unless has_cloud?
return super(username, path, client) unless has_cloud?
# Otherwise duplicate a lot of super(), but also use ~~THE CLOUD~~
local = options.delete(:local) || false
content = options.delete(:content)
@username = username
@path = path || "corpus/#{username}.json"
if File.directory?(@path)
@path = File.join(@path, "#{username}.json")
end
@basename = File.basename(@path)
@client = client || Boodoo.make_client
@client = client || new_client
@url = Cloudinary::Utils.cloudinary_url(@basename, :resource_type=>:raw)
fetch!
@public_id = @basename
if local || content
@content = content || File.read(@path)
else
fetch!
end
parse!
new_tweets = sync.class != IO
persist if new_tweets
@@ -146,14 +235,6 @@ class Ebooks::Boodoo::CloudArchive < Ebooks::Archive
end
end
def minify!
@tweets = minify_tweets(@tweets)
end
def minify
minify_tweets(@tweets)
end
def persist(public_id=nil)
public_id ||= @basename
# log "Deleting out-dated archive ~~~FROM THE CLOUD~~~"
@@ -165,19 +246,7 @@ class Ebooks::Boodoo::CloudArchive < Ebooks::Archive
res
end
def persist!
persist(@basename)
end
def parse(content=nil)
content = content || @content || '[]'
JSON.parse(content, symbolize_names: true)
end
def parse!(content=nil)
@tweets = parse(content)
end
# Unused method?
def save(path=nil, minify=true)
path ||= @path
output = minify ? JSON.generate(minify) : JSON.pretty_generate(@tweets)
@@ -186,10 +255,6 @@ class Ebooks::Boodoo::CloudArchive < Ebooks::Archive
end
end
def save!
save(@path)
end
def fetch(url=nil)
url ||= @url
log "Fetching JSON archive ~~~FROM THE CLOUD~~~"
@@ -208,6 +273,86 @@ class Ebooks::Boodoo::CloudArchive < Ebooks::Archive
end
end
class Ebooks::Model
# add methods here to match Boodoo::CloudModel
def self.parse(content)
model = Model.new
model.instance_eval do
props = Marshal.load(content)
@tokens = props[:tokens]
@sentences = props[:sentences]
@mentions = props[:mentions]
@keywords = props[:keywords]
end
model
end
def self.from_json(content, is_file)
model = Model.new
model.from_json(content, is_file)
model
end
# Create a model from JSON string
# @content [String/Array] Ebooks-style JSON twitter archive
# @return [Ebooks::Boodoo::CloudModel]
def from_json(content, is_file=false)
content = File.read(content, :encoding => 'utf-8') if is_file
if content.respond_to?(:upcase)
lines = JSON.parse(content).map do |tweet|
tweet['text']
end
else
lines = content
end
log "Reading json corpus with #{lines.size} lines"
consume_lines(lines)
end
def fetch(path=nil)
path ||= @path
if File.exist?(path)
log "Fetching local bot model"
content = File.read(@path, :encoding => 'utf-8')
if !content.empty?
log "local model fetched"
return content
end
end
log "WARNING: local bot model not found"
return nil
end
def fetch!
@content = fetch
end
def parse(content=nil)
props = Marshal.load(content)
end
def parse!(content=nil)
props = parse(content)
@tokens = props[:tokens]
@sentences = props[:sentences]
@mentions = props[:mentions]
@keywords = props[:keywords]
end
def save!
save(@path)
end
def persist(path=nil)
path ||= @path
save(path)
end
def persist!
persist
end
end
class Ebooks::Boodoo::CloudModel < Ebooks::Model
# Read a saved model from marshaled content instead of file
# @param content [String]
@@ -231,7 +376,7 @@ class Ebooks::Boodoo::CloudModel < Ebooks::Model
end
def initialize(username, path=nil)
return Ebooks::Model.new unless has_cloud?
return super() unless has_cloud?
@path = path || "corpus/#{username}.model"
if File.directory?(@path)
@path = File.join(@path, "#{username}.model")
@@ -241,26 +386,8 @@ class Ebooks::Boodoo::CloudModel < Ebooks::Model
@url = Cloudinary::Utils.cloudinary_url(@basename, :resource_type=>:raw)
end
# Create a model from JSON string
# @content [String/Array] Ebooks-style JSON twitter archive (pre-parsed)
# @return [Ebooks::Boodoo::CloudModel]
def from_json(content, is_file=false)
content = File.read(content, :encoding=>'utf-8') if is_file
if content.respond_to?(:upcase)
lines = JSON.parse(content).map do |tweet|
tweet['text']
end
else
lines = content
end
log "Reading json corpus with #{lines.size} lines"
consume_lines(lines)
end
def persist(public_id=nil)
public_id ||= @basename
# log "Deleting old model ~~~FROM THE CLOUD~~~"
# Cloudinary::Api.delete_resources(@basename, :resource_type=>:raw)
log "Uploading bot model ~~TO THE CLOUD~~"
res = Cloudinary::Uploader.upload(@path, :resource_type=>:raw, :public_id=>public_id, :invalidate=>true)
@url = res["url"]
@@ -272,22 +399,6 @@ class Ebooks::Boodoo::CloudModel < Ebooks::Model
persist(@basename)
end
def parse(content=nil)
props = Marshal.load(content)
end
def parse!(content=nil)
props = parse(content)
@tokens = props[:tokens]
@sentences = props[:sentences]
@mentions = props[:mentions]
@keywords = props[:keywords]
end
def save!
save(@path)
end
def fetch(url=nil)
url ||= @url
log "Fetching bot model ~~~FROM THE CLOUD~~~"
@@ -300,10 +411,6 @@ class Ebooks::Boodoo::CloudModel < Ebooks::Model
content
end
end
def fetch!
@content = fetch
end
end
class Ebooks::Boodoo::BoodooBot < Ebooks::Bot

15
bots.rb
View File

@@ -6,7 +6,7 @@ require 'time_difference'
include Ebooks::Boodoo
# Read defaults and lay env vars on top:
SETTINGS = Dotenv.load('defaults.env').merge(ENV)
SETTINGS = Dotenv.load('secrets.env').merge(ENV)
# Information about a particular Twitter user we know
class UserInfo
@@ -71,8 +71,17 @@ class BoodooBot
if can_run?
log "This can run!"
@archive = CloudArchive.new(original, archive_path, twitter)
@model = CloudModel.new(@original, @model_path).from_json(@archive_path, true)
if initial_corpus_file.blank? || in_cloud?(File.basename(@archive_path))
log "Skipping initial corpus consumption"
@archive = CloudArchive.new(original, archive_path, twitter)
@model = CloudModel.new(@original, @model_path).from_json(@archive_path, true)
else
log "Consuming initial corpus..."
archive_json = jsonify(initial_corpus_file, :from_cloud=>has_cloud?, :new_name=>original, :to_cloud=>false)
log "archive_json length: #{archive_json.size}"
@archive = CloudArchive.new(original, archive_path, twitter, :local=>true, :content=>archive_json)
# @model = CloudModel.new(original, model_path).from_json(archive_json, false)
end
else
missing_fields.each {|missing|
log "Can't run without #{missing}"