mirror of
https://github.com/thewesker/twitter_ebooks.git
synced 2025-12-20 04:11:08 -05:00
Support consuming tweets.csv from official twitter archives
This commit is contained in:
@@ -4,6 +4,7 @@
|
|||||||
require 'json'
|
require 'json'
|
||||||
require 'set'
|
require 'set'
|
||||||
require 'digest/md5'
|
require 'digest/md5'
|
||||||
|
require 'csv'
|
||||||
|
|
||||||
module Ebooks
|
module Ebooks
|
||||||
class Model
|
class Model
|
||||||
@@ -26,6 +27,11 @@ module Ebooks
|
|||||||
lines = JSON.parse(content, symbolize_names: true).map do |tweet|
|
lines = JSON.parse(content, symbolize_names: true).map do |tweet|
|
||||||
tweet[:text]
|
tweet[:text]
|
||||||
end
|
end
|
||||||
|
elsif path.split('.')[-1] == "csv"
|
||||||
|
log "Reading CSV corpus from #{path}"
|
||||||
|
lines = CSV.read(path).drop(1).map do |tweet|
|
||||||
|
tweet[5]
|
||||||
|
end
|
||||||
else
|
else
|
||||||
log "Reading plaintext corpus from #{path}"
|
log "Reading plaintext corpus from #{path}"
|
||||||
lines = content.split("\n")
|
lines = content.split("\n")
|
||||||
|
|||||||
@@ -1,3 +1,3 @@
|
|||||||
module Ebooks
|
module Ebooks
|
||||||
VERSION = "2.2.3"
|
VERSION = "2.2.4"
|
||||||
end
|
end
|
||||||
|
|||||||
Reference in New Issue
Block a user