diff --git a/bin/ebooks b/bin/ebooks index c564df8..9ce5a53 100755 --- a/bin/ebooks +++ b/bin/ebooks @@ -25,6 +25,7 @@ Usage: ebooks auth ebooks consume [corpus_path2] [...] ebooks consume-all [corpus_path2] [...] + ebooks append ebooks gen [input] ebooks archive [path] ebooks tweet @@ -116,6 +117,24 @@ STR log "Corpuses consumed to #{outpath}" end + HELP.append = <<-STR + Usage: ebooks append + + Process then append the provided corpus to the model + instead of overwriting. + STR + + def self.append(name, path) + if !name || !path + help :append + exit 1 + end + + Ebooks::Model.consume(path).append(File.join(APP_PATH,'model',"#{name}.model")) + log "Corpus appended to #{name}.model" + end + + HELP.jsonify = <<-STR Usage: ebooks jsonify [tweets.csv2] [...] @@ -380,6 +399,7 @@ STR when "new" then new(args[1]) when "consume" then consume(args[1..-1]) when "consume-all" then consume_all(args[1], args[2..-1]) + when "append" then append(args[1],args[2]) when "gen" then gen(args[1], args[2..-1].join(' ')) when "archive" then archive(args[1], args[2]) when "tweet" then tweet(args[1], args[2]) diff --git a/lib/twitter_ebooks/model.rb b/lib/twitter_ebooks/model.rb index 8849118..a40b046 100644 --- a/lib/twitter_ebooks/model.rb +++ b/lib/twitter_ebooks/model.rb @@ -69,6 +69,35 @@ module Ebooks self end + # Append a generated model to existing model file instead of overwriting it + # @param path [String] + def append(path) + existing = File.file?(path) + if !existing + log "No existing model found at #{path}" + return + else + #read-in and deserialize existing model + props = Marshal.load(File.open(path,'rb') { |old| old.read }) + old_tokens = props[:tokens] + old_sentences = props[:sentences] + old_mentions = props[:mentions] + old_keywords = props[:keywords] + + #append existing properties to new ones and overwrite with new model + File.open(path, 'wb') do |f| + f.write(Marshal.dump({ + tokens: @tokens.concat(old_tokens), + sentences: @sentences.concat(old_sentences), + mentions: @mentions.concat(old_mentions), + keywords: @keywords.concat(old_keywords) + })) + end + end + self + end + + def initialize @tokens = []