mirror of
https://github.com/thewesker/twitter_ebooks.git
synced 2025-12-20 12:21:11 -05:00
Merge pull request #12 from BooDoo/feature/csv
2.2.4? Consume tweets.csv from official twitter archives
This commit is contained in:
@@ -4,6 +4,7 @@
|
||||
require 'json'
|
||||
require 'set'
|
||||
require 'digest/md5'
|
||||
require 'csv'
|
||||
|
||||
module Ebooks
|
||||
class Model
|
||||
@@ -26,6 +27,11 @@ module Ebooks
|
||||
lines = JSON.parse(content, symbolize_names: true).map do |tweet|
|
||||
tweet[:text]
|
||||
end
|
||||
elsif path.split('.')[-1] == "csv"
|
||||
log "Reading CSV corpus from #{path}"
|
||||
lines = CSV.read(path).drop(1).map do |tweet|
|
||||
tweet[5]
|
||||
end
|
||||
else
|
||||
log "Reading plaintext corpus from #{path}"
|
||||
lines = content.split("\n")
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
module Ebooks
|
||||
VERSION = "2.2.3"
|
||||
VERSION = "2.2.4"
|
||||
end
|
||||
|
||||
Reference in New Issue
Block a user