Don't tokenize empty tweets

This commit is contained in:
Jacob Harris
2013-12-28 22:26:21 -05:00
parent 3027681de3
commit 31aa8425de

View File

@@ -82,6 +82,7 @@ else
tokenizer = Punkt::SentenceTokenizer.new(source_tweets.join(" ")) # init with corpus of all sentences
source_tweets.each do |twt|
next if twt.nil? || twt == ''
sentences = tokenizer.sentences_from_text(twt, :output => :sentences_text)
# sentences = text.split(/[.:;?!]/)