From 31aa8425de4f7656b52ce6f359ad91967d28887e Mon Sep 17 00:00:00 2001 From: Jacob Harris Date: Sat, 28 Dec 2013 22:26:21 -0500 Subject: [PATCH] Don't tokenize empty tweets --- ebook.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/ebook.rb b/ebook.rb index 8c26b02..f3c3aff 100644 --- a/ebook.rb +++ b/ebook.rb @@ -82,6 +82,7 @@ else tokenizer = Punkt::SentenceTokenizer.new(source_tweets.join(" ")) # init with corpus of all sentences source_tweets.each do |twt| + next if twt.nil? || twt == '' sentences = tokenizer.sentences_from_text(twt, :output => :sentences_text) # sentences = text.split(/[.:;?!]/)