mirror of
https://github.com/thewesker/ebooks_example.git
synced 2025-12-20 12:21:16 -05:00
Working on Cloud/No-Cloud in parallel:
Ebooks::Archive and Ebooks::Model set as fallbacks for Cloud equivalents -- ROUGH COMMIT
Add Boodoo.age method ("since"), use keyword arguments in some method signatures -- ROUGH COMMIT
This commit is contained in:
267
boodoo.rb
267
boodoo.rb
@@ -1,5 +1,6 @@
|
|||||||
require 'twitter_ebooks'
|
require 'twitter_ebooks'
|
||||||
require 'cloudinary'
|
require 'cloudinary'
|
||||||
|
require 'time_difference'
|
||||||
|
|
||||||
include Ebooks
|
include Ebooks
|
||||||
|
|
||||||
@@ -11,11 +12,27 @@ class Ebooks::TweetMeta
|
|||||||
end
|
end
|
||||||
|
|
||||||
module Ebooks::Boodoo
|
module Ebooks::Boodoo
|
||||||
|
|
||||||
|
def self.age(since, now: Time.now, unit: :in_hours)
|
||||||
|
unit = unit.to_sym
|
||||||
|
TimeDifference.between(since, now).method(unit).call
|
||||||
|
end
|
||||||
|
|
||||||
# check if we're configured to use Cloudinary for cloud storage
|
# check if we're configured to use Cloudinary for cloud storage
|
||||||
def has_cloud?
|
def has_cloud?
|
||||||
(ENV['CLOUDINARY_URL'].nil? || ENV['CLOUDINARY_URL'].empty?) ? false : true
|
(ENV['CLOUDINARY_URL'].nil? || ENV['CLOUDINARY_URL'].empty?) ? false : true
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def in_cloud?(public_id, resource_type=:raw)
|
||||||
|
return false if !has_cloud?
|
||||||
|
begin
|
||||||
|
Cloudinary::Api.resource(public_id, :resource_type=>resource_type)
|
||||||
|
true
|
||||||
|
rescue Cloudinary::Api::NotFound
|
||||||
|
false
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
# supports Ruby Range literal, Fixnum, or Float as string
|
# supports Ruby Range literal, Fixnum, or Float as string
|
||||||
def parse_num(value)
|
def parse_num(value)
|
||||||
eval(value.to_s[/^\d+(?:\.{1,3})?\d*$/].to_s)
|
eval(value.to_s[/^\d+(?:\.{1,3})?\d*$/].to_s)
|
||||||
@@ -55,12 +72,12 @@ module Ebooks::Boodoo
|
|||||||
value.split(array_splitter).map(&:strip)
|
value.split(array_splitter).map(&:strip)
|
||||||
end
|
end
|
||||||
|
|
||||||
def make_client
|
def new_client
|
||||||
Twitter::REST::Client.new do |config|
|
Twitter::REST::Client.new do |config|
|
||||||
config.consumer_key = @consumer_key
|
config.consumer_key = ENV['CONSUMER_KEY']
|
||||||
config.consumer_secret = @consumer_secret
|
config.consumer_secret = ENV['CONSUMER_SECRET']
|
||||||
config.access_token = @access_token
|
config.access_token = ENV['ACCESS_TOKEN']
|
||||||
config.access_token_secret = @access_token_secret
|
config.access_token_secret = ENV['ACCESS_TOKEN_SECRET']
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
@@ -71,15 +88,24 @@ module Ebooks::Boodoo
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def jsonify(path, new_name=nil)
|
def jsonify(path, write_file: true, from_cloud: false, to_cloud: true, new_name: nil)
|
||||||
name = File.basename(path).split('.')[0]
|
basename = File.basename(path)
|
||||||
|
name = basename.split('.')[0]
|
||||||
ext = path.split('.')[-1]
|
ext = path.split('.')[-1]
|
||||||
new_name ||= name
|
new_name ||= name
|
||||||
new_path = new_name + ".json"
|
|
||||||
|
new_path = "corpus/#{new_name}.json"
|
||||||
lines = []
|
lines = []
|
||||||
id = nil
|
id = nil
|
||||||
|
|
||||||
|
#TODO: Move this to its own method: find_corpus(basename)
|
||||||
|
if from_cloud && in_cloud?(basename)
|
||||||
|
log "Reading initial corpus file ~~~FROM CLOUD~~~"
|
||||||
|
content = Cloudinary::Downloader.download(path, :resource_type=>:raw)
|
||||||
|
else
|
||||||
|
log "Reading local initial corpus file"
|
||||||
content = File.read(path, :encoding => 'utf-8')
|
content = File.read(path, :encoding => 'utf-8')
|
||||||
|
end
|
||||||
|
|
||||||
if ext.downcase == "json"
|
if ext.downcase == "json"
|
||||||
log "Minifying JSON corpus at #{path}"
|
log "Minifying JSON corpus at #{path}"
|
||||||
@@ -104,37 +130,100 @@ module Ebooks::Boodoo
|
|||||||
File.open(new_path, 'w') do |f|
|
File.open(new_path, 'w') do |f|
|
||||||
log "Writing #{lines.length} lines to #{new_path}"
|
log "Writing #{lines.length} lines to #{new_path}"
|
||||||
f.write(JSON.generate(lines))
|
f.write(JSON.generate(lines))
|
||||||
end
|
end if write_file
|
||||||
|
|
||||||
if has_cloud?
|
#TODO: Save res["url"] to CloudArchive somehow?
|
||||||
|
if to_cloud && has_cloud?
|
||||||
public_id = new_path
|
public_id = new_path
|
||||||
# log "Deleting JSON archive ~~~FROM THE CLOUD~~~"
|
# log "Deleting JSON archive ~~~FROM THE CLOUD~~~"
|
||||||
# Cloudinary::Api.delete_resources(public_id, :resource_type=>:raw)
|
# Cloudinary::Api.delete_resources(public_id, :resource_type=>:raw)
|
||||||
log "Uploading JSON archive ~~TO THE CLOUD~~"
|
log "Uploading JSON archive ~~TO THE CLOUD~~"
|
||||||
res = Cloudinary::Uploader.upload(new_path, :resource_type=>:raw, :public_id=>public_id, :invalidate=>true)
|
res = Cloudinary::Uploader.upload(new_path, :resource_type=>:raw, :public_id=>public_id, :invalidate=>true)
|
||||||
log "Upload complete!"
|
log "Upload complete"
|
||||||
res["url"]
|
res["url"]
|
||||||
else
|
else
|
||||||
log "Can't find ~~~THE CLOUD~~~, not uploading JSON archive."
|
JSON.generate(lines)
|
||||||
nil
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
class Ebooks::Archive
|
||||||
|
def self.exist?(basename)
|
||||||
|
File.exist?("corpus/#{basename}")
|
||||||
|
end
|
||||||
|
|
||||||
|
def parse(content=nil)
|
||||||
|
content = content || @content || '[]'
|
||||||
|
JSON.parse(content, symbolize_names: true)
|
||||||
|
end
|
||||||
|
|
||||||
|
def parse!(content=nil)
|
||||||
|
@tweets = parse(content)
|
||||||
|
end
|
||||||
|
|
||||||
|
def minify
|
||||||
|
minify_tweets(@tweets)
|
||||||
|
end
|
||||||
|
|
||||||
|
def minify!
|
||||||
|
@tweets = minify_tweets(@tweets)
|
||||||
|
end
|
||||||
|
|
||||||
|
def persist(path=nil)
|
||||||
|
path ||= @path
|
||||||
|
log "Saving JSON archive locally..."
|
||||||
|
File.open(path, 'w') do |f|
|
||||||
|
f.write(JSON.pretty_generate(@tweets))
|
||||||
|
end
|
||||||
|
log "Save complete!"
|
||||||
|
@path
|
||||||
|
end
|
||||||
|
|
||||||
|
def persist!
|
||||||
|
persist(@path)
|
||||||
|
end
|
||||||
|
|
||||||
|
def save(path=nil)
|
||||||
|
persist(path)
|
||||||
|
end
|
||||||
|
|
||||||
|
def save!
|
||||||
|
save(@path)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
class Ebooks::Boodoo::CloudArchive < Ebooks::Archive
|
class Ebooks::Boodoo::CloudArchive < Ebooks::Archive
|
||||||
def initialize(username, path=nil, client=nil)
|
include Ebooks::Boodoo
|
||||||
|
|
||||||
|
def self.exist?(username)
|
||||||
|
begin
|
||||||
|
Cloudinary::Api.resource("#{username}.json", :resource_type=>:raw)
|
||||||
|
true
|
||||||
|
rescue Cloudinary::Api::NotFound
|
||||||
|
false
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def initialize(username, path=nil, client=nil, options={})
|
||||||
# Just bail on everything if we aren't using Cloudinary
|
# Just bail on everything if we aren't using Cloudinary
|
||||||
return super unless has_cloud?
|
return super(username, path, client) unless has_cloud?
|
||||||
# Otherwise duplicate a lot of super(), but also use ~~THE CLOUD~~
|
# Otherwise duplicate a lot of super(), but also use ~~THE CLOUD~~
|
||||||
|
local = options.delete(:local) || false
|
||||||
|
content = options.delete(:content)
|
||||||
@username = username
|
@username = username
|
||||||
@path = path || "corpus/#{username}.json"
|
@path = path || "corpus/#{username}.json"
|
||||||
if File.directory?(@path)
|
if File.directory?(@path)
|
||||||
@path = File.join(@path, "#{username}.json")
|
@path = File.join(@path, "#{username}.json")
|
||||||
end
|
end
|
||||||
@basename = File.basename(@path)
|
@basename = File.basename(@path)
|
||||||
@client = client || Boodoo.make_client
|
@client = client || new_client
|
||||||
@url = Cloudinary::Utils.cloudinary_url(@basename, :resource_type=>:raw)
|
@url = Cloudinary::Utils.cloudinary_url(@basename, :resource_type=>:raw)
|
||||||
|
@public_id = @basename
|
||||||
|
if local || content
|
||||||
|
@content = content || File.read(@path)
|
||||||
|
else
|
||||||
fetch!
|
fetch!
|
||||||
|
end
|
||||||
parse!
|
parse!
|
||||||
new_tweets = sync.class != IO
|
new_tweets = sync.class != IO
|
||||||
persist if new_tweets
|
persist if new_tweets
|
||||||
@@ -146,14 +235,6 @@ class Ebooks::Boodoo::CloudArchive < Ebooks::Archive
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def minify!
|
|
||||||
@tweets = minify_tweets(@tweets)
|
|
||||||
end
|
|
||||||
|
|
||||||
def minify
|
|
||||||
minify_tweets(@tweets)
|
|
||||||
end
|
|
||||||
|
|
||||||
def persist(public_id=nil)
|
def persist(public_id=nil)
|
||||||
public_id ||= @basename
|
public_id ||= @basename
|
||||||
# log "Deleting out-dated archive ~~~FROM THE CLOUD~~~"
|
# log "Deleting out-dated archive ~~~FROM THE CLOUD~~~"
|
||||||
@@ -165,19 +246,7 @@ class Ebooks::Boodoo::CloudArchive < Ebooks::Archive
|
|||||||
res
|
res
|
||||||
end
|
end
|
||||||
|
|
||||||
def persist!
|
# Unused method?
|
||||||
persist(@basename)
|
|
||||||
end
|
|
||||||
|
|
||||||
def parse(content=nil)
|
|
||||||
content = content || @content || '[]'
|
|
||||||
JSON.parse(content, symbolize_names: true)
|
|
||||||
end
|
|
||||||
|
|
||||||
def parse!(content=nil)
|
|
||||||
@tweets = parse(content)
|
|
||||||
end
|
|
||||||
|
|
||||||
def save(path=nil, minify=true)
|
def save(path=nil, minify=true)
|
||||||
path ||= @path
|
path ||= @path
|
||||||
output = minify ? JSON.generate(minify) : JSON.pretty_generate(@tweets)
|
output = minify ? JSON.generate(minify) : JSON.pretty_generate(@tweets)
|
||||||
@@ -186,10 +255,6 @@ class Ebooks::Boodoo::CloudArchive < Ebooks::Archive
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def save!
|
|
||||||
save(@path)
|
|
||||||
end
|
|
||||||
|
|
||||||
def fetch(url=nil)
|
def fetch(url=nil)
|
||||||
url ||= @url
|
url ||= @url
|
||||||
log "Fetching JSON archive ~~~FROM THE CLOUD~~~"
|
log "Fetching JSON archive ~~~FROM THE CLOUD~~~"
|
||||||
@@ -208,6 +273,86 @@ class Ebooks::Boodoo::CloudArchive < Ebooks::Archive
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
class Ebooks::Model
|
||||||
|
# add methods here to match Boodoo::CloudModel
|
||||||
|
def self.parse(content)
|
||||||
|
model = Model.new
|
||||||
|
model.instance_eval do
|
||||||
|
props = Marshal.load(content)
|
||||||
|
@tokens = props[:tokens]
|
||||||
|
@sentences = props[:sentences]
|
||||||
|
@mentions = props[:mentions]
|
||||||
|
@keywords = props[:keywords]
|
||||||
|
end
|
||||||
|
model
|
||||||
|
end
|
||||||
|
|
||||||
|
def self.from_json(content, is_file)
|
||||||
|
model = Model.new
|
||||||
|
model.from_json(content, is_file)
|
||||||
|
model
|
||||||
|
end
|
||||||
|
|
||||||
|
# Create a model from JSON string
|
||||||
|
# @content [String/Array] Ebooks-style JSON twitter archive
|
||||||
|
# @return [Ebooks::Boodoo::CloudModel]
|
||||||
|
def from_json(content, is_file=false)
|
||||||
|
content = File.read(content, :encoding => 'utf-8') if is_file
|
||||||
|
if content.respond_to?(:upcase)
|
||||||
|
lines = JSON.parse(content).map do |tweet|
|
||||||
|
tweet['text']
|
||||||
|
end
|
||||||
|
else
|
||||||
|
lines = content
|
||||||
|
end
|
||||||
|
log "Reading json corpus with #{lines.size} lines"
|
||||||
|
consume_lines(lines)
|
||||||
|
end
|
||||||
|
|
||||||
|
def fetch(path=nil)
|
||||||
|
path ||= @path
|
||||||
|
if File.exist?(path)
|
||||||
|
log "Fetching local bot model"
|
||||||
|
content = File.read(@path, :encoding => 'utf-8')
|
||||||
|
if !content.empty?
|
||||||
|
log "local model fetched"
|
||||||
|
return content
|
||||||
|
end
|
||||||
|
end
|
||||||
|
log "WARNING: local bot model not found"
|
||||||
|
return nil
|
||||||
|
end
|
||||||
|
|
||||||
|
def fetch!
|
||||||
|
@content = fetch
|
||||||
|
end
|
||||||
|
|
||||||
|
def parse(content=nil)
|
||||||
|
props = Marshal.load(content)
|
||||||
|
end
|
||||||
|
|
||||||
|
def parse!(content=nil)
|
||||||
|
props = parse(content)
|
||||||
|
@tokens = props[:tokens]
|
||||||
|
@sentences = props[:sentences]
|
||||||
|
@mentions = props[:mentions]
|
||||||
|
@keywords = props[:keywords]
|
||||||
|
end
|
||||||
|
|
||||||
|
def save!
|
||||||
|
save(@path)
|
||||||
|
end
|
||||||
|
|
||||||
|
def persist(path=nil)
|
||||||
|
path ||= @path
|
||||||
|
save(path)
|
||||||
|
end
|
||||||
|
|
||||||
|
def persist!
|
||||||
|
persist
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
class Ebooks::Boodoo::CloudModel < Ebooks::Model
|
class Ebooks::Boodoo::CloudModel < Ebooks::Model
|
||||||
# Read a saved model from marshaled content instead of file
|
# Read a saved model from marshaled content instead of file
|
||||||
# @param content [String]
|
# @param content [String]
|
||||||
@@ -231,7 +376,7 @@ class Ebooks::Boodoo::CloudModel < Ebooks::Model
|
|||||||
end
|
end
|
||||||
|
|
||||||
def initialize(username, path=nil)
|
def initialize(username, path=nil)
|
||||||
return Ebooks::Model.new unless has_cloud?
|
return super() unless has_cloud?
|
||||||
@path = path || "corpus/#{username}.model"
|
@path = path || "corpus/#{username}.model"
|
||||||
if File.directory?(@path)
|
if File.directory?(@path)
|
||||||
@path = File.join(@path, "#{username}.model")
|
@path = File.join(@path, "#{username}.model")
|
||||||
@@ -241,26 +386,8 @@ class Ebooks::Boodoo::CloudModel < Ebooks::Model
|
|||||||
@url = Cloudinary::Utils.cloudinary_url(@basename, :resource_type=>:raw)
|
@url = Cloudinary::Utils.cloudinary_url(@basename, :resource_type=>:raw)
|
||||||
end
|
end
|
||||||
|
|
||||||
# Create a model from JSON string
|
|
||||||
# @content [String/Array] Ebooks-style JSON twitter archive (pre-parsed)
|
|
||||||
# @return [Ebooks::Boodoo::CloudModel]
|
|
||||||
def from_json(content, is_file=false)
|
|
||||||
content = File.read(content, :encoding=>'utf-8') if is_file
|
|
||||||
if content.respond_to?(:upcase)
|
|
||||||
lines = JSON.parse(content).map do |tweet|
|
|
||||||
tweet['text']
|
|
||||||
end
|
|
||||||
else
|
|
||||||
lines = content
|
|
||||||
end
|
|
||||||
log "Reading json corpus with #{lines.size} lines"
|
|
||||||
consume_lines(lines)
|
|
||||||
end
|
|
||||||
|
|
||||||
def persist(public_id=nil)
|
def persist(public_id=nil)
|
||||||
public_id ||= @basename
|
public_id ||= @basename
|
||||||
# log "Deleting old model ~~~FROM THE CLOUD~~~"
|
|
||||||
# Cloudinary::Api.delete_resources(@basename, :resource_type=>:raw)
|
|
||||||
log "Uploading bot model ~~TO THE CLOUD~~"
|
log "Uploading bot model ~~TO THE CLOUD~~"
|
||||||
res = Cloudinary::Uploader.upload(@path, :resource_type=>:raw, :public_id=>public_id, :invalidate=>true)
|
res = Cloudinary::Uploader.upload(@path, :resource_type=>:raw, :public_id=>public_id, :invalidate=>true)
|
||||||
@url = res["url"]
|
@url = res["url"]
|
||||||
@@ -272,22 +399,6 @@ class Ebooks::Boodoo::CloudModel < Ebooks::Model
|
|||||||
persist(@basename)
|
persist(@basename)
|
||||||
end
|
end
|
||||||
|
|
||||||
def parse(content=nil)
|
|
||||||
props = Marshal.load(content)
|
|
||||||
end
|
|
||||||
|
|
||||||
def parse!(content=nil)
|
|
||||||
props = parse(content)
|
|
||||||
@tokens = props[:tokens]
|
|
||||||
@sentences = props[:sentences]
|
|
||||||
@mentions = props[:mentions]
|
|
||||||
@keywords = props[:keywords]
|
|
||||||
end
|
|
||||||
|
|
||||||
def save!
|
|
||||||
save(@path)
|
|
||||||
end
|
|
||||||
|
|
||||||
def fetch(url=nil)
|
def fetch(url=nil)
|
||||||
url ||= @url
|
url ||= @url
|
||||||
log "Fetching bot model ~~~FROM THE CLOUD~~~"
|
log "Fetching bot model ~~~FROM THE CLOUD~~~"
|
||||||
@@ -300,10 +411,6 @@ class Ebooks::Boodoo::CloudModel < Ebooks::Model
|
|||||||
content
|
content
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def fetch!
|
|
||||||
@content = fetch
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
|
||||||
class Ebooks::Boodoo::BoodooBot < Ebooks::Bot
|
class Ebooks::Boodoo::BoodooBot < Ebooks::Bot
|
||||||
|
|||||||
11
bots.rb
11
bots.rb
@@ -6,7 +6,7 @@ require 'time_difference'
|
|||||||
include Ebooks::Boodoo
|
include Ebooks::Boodoo
|
||||||
|
|
||||||
# Read defaults and lay env vars on top:
|
# Read defaults and lay env vars on top:
|
||||||
SETTINGS = Dotenv.load('defaults.env').merge(ENV)
|
SETTINGS = Dotenv.load('secrets.env').merge(ENV)
|
||||||
|
|
||||||
# Information about a particular Twitter user we know
|
# Information about a particular Twitter user we know
|
||||||
class UserInfo
|
class UserInfo
|
||||||
@@ -71,8 +71,17 @@ class BoodooBot
|
|||||||
|
|
||||||
if can_run?
|
if can_run?
|
||||||
log "This can run!"
|
log "This can run!"
|
||||||
|
if initial_corpus_file.blank? || in_cloud?(File.basename(@archive_path))
|
||||||
|
log "Skipping initial corpus consumption"
|
||||||
@archive = CloudArchive.new(original, archive_path, twitter)
|
@archive = CloudArchive.new(original, archive_path, twitter)
|
||||||
@model = CloudModel.new(@original, @model_path).from_json(@archive_path, true)
|
@model = CloudModel.new(@original, @model_path).from_json(@archive_path, true)
|
||||||
|
else
|
||||||
|
log "Consuming initial corpus..."
|
||||||
|
archive_json = jsonify(initial_corpus_file, :from_cloud=>has_cloud?, :new_name=>original, :to_cloud=>false)
|
||||||
|
log "archive_json length: #{archive_json.size}"
|
||||||
|
@archive = CloudArchive.new(original, archive_path, twitter, :local=>true, :content=>archive_json)
|
||||||
|
# @model = CloudModel.new(original, model_path).from_json(archive_json, false)
|
||||||
|
end
|
||||||
else
|
else
|
||||||
missing_fields.each {|missing|
|
missing_fields.each {|missing|
|
||||||
log "Can't run without #{missing}"
|
log "Can't run without #{missing}"
|
||||||
|
|||||||
Reference in New Issue
Block a user