From 3402506dd3a5013c3e37a3c2230818ea9e462cf3 Mon Sep 17 00:00:00 2001 From: Alina Saalfeld Date: Tue, 29 Sep 2015 16:52:46 +0200 Subject: [PATCH 1/2] Fix utf-8 in keywords --- lib/twitter_ebooks/nlp.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/twitter_ebooks/nlp.rb b/lib/twitter_ebooks/nlp.rb index b65e162..3fb9ea3 100644 --- a/lib/twitter_ebooks/nlp.rb +++ b/lib/twitter_ebooks/nlp.rb @@ -99,7 +99,7 @@ module Ebooks #set :vowels, 1 # => default: 0 = not considered #set :consonants, 5 # => default: 0 = not considered #set :ignore_case, true # => default: false - set :word_pattern, /(? default: /\w+/ + set :word_pattern, /(? default: /\w+/ #set :stemming, true # => default: false end From 18922ee3b5c4c93738370dc3bacd883af792ff92 Mon Sep 17 00:00:00 2001 From: Alina Saalfeld Date: Tue, 29 Sep 2015 17:17:30 +0200 Subject: [PATCH 2/2] Move stopwords.txt to be accessable by bot developer --- lib/twitter_ebooks/nlp.rb | 4 ++-- {data => skeleton}/stopwords.txt | 0 2 files changed, 2 insertions(+), 2 deletions(-) rename {data => skeleton}/stopwords.txt (100%) diff --git a/lib/twitter_ebooks/nlp.rb b/lib/twitter_ebooks/nlp.rb index 3fb9ea3..f971f0c 100644 --- a/lib/twitter_ebooks/nlp.rb +++ b/lib/twitter_ebooks/nlp.rb @@ -14,10 +14,10 @@ module Ebooks # to be using it all of the time # Lazily loads an array of stopwords - # Stopwords are common English words that should often be ignored + # Stopwords are common words that should often be ignored # @return [Array] def self.stopwords - @stopwords ||= File.read(File.join(DATA_PATH, 'stopwords.txt')).split + @stopwords ||= File.exists?('stopwords.txt') ? File.read('stopwords.txt').split : [] end # Lazily loads an array of known English nouns diff --git a/data/stopwords.txt b/skeleton/stopwords.txt similarity index 100% rename from data/stopwords.txt rename to skeleton/stopwords.txt