On second thought, we can't use a cache system

Simply because the corpuses are too darn big to keep around
2025-12-23 13:51:09 -05:00 · 2014-11-18 13:51:31 +11:00
parent 8135aaaabb
commit 2e336fb9be
1 changed files with 2 additions and 21 deletions
--- a/lib/twitter_ebooks/model.rb
+++ b/lib/twitter_ebooks/model.rb
@@ -4,33 +4,14 @@
 require 'json'
 require 'set'
 require 'digest/md5'
 require 'fileutils'
 require 'csv'
 module Ebooks
  class Model
    attr_accessor :hash, :tokens, :sentences, :mentions, :keywords
-    # Consume a corpus file to create a model
+    def self.consume(txtpath)
-    # @param corpus_path Path to a json, text or csv file to consume
+      Model.new.consume(txtpath)
    # @param cache Optional path to a directory to store cached models
    def self.consume(corpus_path, cache: nil)
      if cache
        FileUtils::mkdir_p cache
        cache_path = File.join(cache, Digest::MD5.file(corpus_path).to_s)
        if File.exists?(cache_path)
          log "Reading model from cache at #{cache_path}"
          return Model.load(cache_path)
        end
      end
      model = Model.new.consume(corpus_path)
      if cache
        log "Caching model at #{cache_path}"
        model.save(cache_path)
      end
    end
    def self.consume_all(paths)