mirror of
https://github.com/thewesker/twitter_ebooks.git
synced 2025-12-20 04:11:08 -05:00
stuff I had to change to get the bot working
This commit is contained in:
@@ -80,7 +80,13 @@ module Ebooks
|
|||||||
# @param token [String]
|
# @param token [String]
|
||||||
# @return [Integer]
|
# @return [Integer]
|
||||||
def tikify(token)
|
def tikify(token)
|
||||||
@tikis[token] or (@tokens << token and @tikis[token] = @tokens.length-1)
|
if @tikis.has_key?(token) then
|
||||||
|
return @tikis[token]
|
||||||
|
else
|
||||||
|
(@tokens.length+1)%1000 == 0 and puts "#{@tokens.length+1} tokens"
|
||||||
|
@tokens << token
|
||||||
|
return @tikis[token] = @tokens.length-1
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
# Convert a body of text into arrays of tikis
|
# Convert a body of text into arrays of tikis
|
||||||
@@ -143,8 +149,8 @@ module Ebooks
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
text = statements.join("\n")
|
text = statements.join("\n").encode('UTF-8', :invalid => :replace)
|
||||||
mention_text = mentions.join("\n")
|
mention_text = mentions.join("\n").encode('UTF-8', :invalid => :replace)
|
||||||
|
|
||||||
lines = nil; statements = nil; mentions = nil # Allow garbage collection
|
lines = nil; statements = nil; mentions = nil # Allow garbage collection
|
||||||
|
|
||||||
@@ -155,6 +161,7 @@ module Ebooks
|
|||||||
|
|
||||||
log "Ranking keywords"
|
log "Ranking keywords"
|
||||||
@keywords = NLP.keywords(text).top(200).map(&:to_s)
|
@keywords = NLP.keywords(text).top(200).map(&:to_s)
|
||||||
|
log "Top keywords: #{@keywords[0]} #{@keywords[1]} #{@keywords[2]}"
|
||||||
|
|
||||||
self
|
self
|
||||||
end
|
end
|
||||||
@@ -218,6 +225,7 @@ module Ebooks
|
|||||||
tweet = ""
|
tweet = ""
|
||||||
|
|
||||||
while (tikis = generator.generate(3, :bigrams)) do
|
while (tikis = generator.generate(3, :bigrams)) do
|
||||||
|
log "Attempting to produce tweet try #{retries+1}/#{retry_limit}"
|
||||||
next if tikis.length <= 3 && !responding
|
next if tikis.length <= 3 && !responding
|
||||||
break if valid_tweet?(tikis, limit)
|
break if valid_tweet?(tikis, limit)
|
||||||
|
|
||||||
@@ -226,6 +234,7 @@ module Ebooks
|
|||||||
end
|
end
|
||||||
|
|
||||||
if verbatim?(tikis) && tikis.length > 3 # We made a verbatim tweet by accident
|
if verbatim?(tikis) && tikis.length > 3 # We made a verbatim tweet by accident
|
||||||
|
log "Attempting to produce unigram tweet try #{retries+1}/#{retry_limit}"
|
||||||
while (tikis = generator.generate(3, :unigrams)) do
|
while (tikis = generator.generate(3, :unigrams)) do
|
||||||
break if valid_tweet?(tikis, limit) && !verbatim?(tikis)
|
break if valid_tweet?(tikis, limit) && !verbatim?(tikis)
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
require 'fast-stemmer'
|
require 'fast-stemmer'
|
||||||
require 'highscore'
|
require 'highscore'
|
||||||
|
require 'htmlentities'
|
||||||
|
|
||||||
module Ebooks
|
module Ebooks
|
||||||
module NLP
|
module NLP
|
||||||
@@ -42,7 +43,6 @@ module Ebooks
|
|||||||
# Lazily load HTML entity decoder
|
# Lazily load HTML entity decoder
|
||||||
# @return [HTMLEntities]
|
# @return [HTMLEntities]
|
||||||
def self.htmlentities
|
def self.htmlentities
|
||||||
require 'htmlentities'
|
|
||||||
@htmlentities ||= HTMLEntities.new
|
@htmlentities ||= HTMLEntities.new
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|||||||
@@ -19,6 +19,9 @@ module Ebooks
|
|||||||
@bigrams = {}
|
@bigrams = {}
|
||||||
|
|
||||||
@sentences.each_with_index do |tikis, i|
|
@sentences.each_with_index do |tikis, i|
|
||||||
|
if (i % 10000 == 0) then
|
||||||
|
log ("Building: sentence #{i} of #{sentences.length}")
|
||||||
|
end
|
||||||
last_tiki = INTERIM
|
last_tiki = INTERIM
|
||||||
tikis.each_with_index do |tiki, j|
|
tikis.each_with_index do |tiki, j|
|
||||||
@unigrams[last_tiki] ||= []
|
@unigrams[last_tiki] ||= []
|
||||||
|
|||||||
Reference in New Issue
Block a user