Added deep training feature, removes duplicate and bot-sourced messages.

This commit is contained in:
Charlie Laabs
2018-02-06 21:04:54 -06:00
parent 6bee324aa8
commit a9623c86ef
3 changed files with 125 additions and 24 deletions

View File

@@ -1,10 +1,14 @@
const Discord = require('discord.js') //https://discord.js.org/#/docs/main/stable/general/welcome
const fs = require('fs')
const Markov = require('markov-strings')
const uniqueBy = require('unique-by');
const schedule = require('node-schedule');
const client = new Discord.Client()
const ZEROWIDTH_SPACE = String.fromCharCode(parseInt('200B', 16))
const MAXMESSAGELENGTH = 2000
let guilds = []
let connected = -1
let GAME = 'GAME'
@@ -16,10 +20,8 @@ let commands = {}
let aliases = {}
let errors = []
let lastSeenMessageID = null
let fileObj = {
messages: [],
lastSeenMessageID: null
messages: []
}
let markovDB = []
@@ -27,7 +29,7 @@ let messageCache = []
const markovOpts = {
maxLength: 400,
minWords: 3,
minScore: 5
minScore: 10
}
let markov = new Markov(markovDB, markovOpts);
@@ -40,12 +42,13 @@ function regenMarkov() {
// console.log("MessageCache", messageCache)
markovDB = fileObj.messages
if (markovDB.length == 0)
markovDB.push("hello")
markovDB = markovDB.concat(messageCache)
markovDB.push({ string: 'hello', id: null })
//markovDB = uniqueArray(markovDB.concat(messageCache), 'id')
markovDB = uniqueBy(markovDB.concat(messageCache), 'id')
//markovDB = markovDB.concat(messageCache)
markov = new Markov(markovDB, markovOpts);
markov.buildCorpusSync()
fileObj.messages = markovDB
fileObj.lastSeenMessageID = lastSeenMessageID
// console.log("WRITING THE FOLLOWING DATA:")
// console.log(fileObj)
fs.writeFileSync('markovDB.json', JSON.stringify(fileObj), 'utf-8')
@@ -63,6 +66,7 @@ function loadConfig() {
GAME = cfg.game
BOTDESC = cfg.description
inviteCmd = cfg.invitecmd
//regenMarkov()
client.login(cfg.token)
}
else {
@@ -72,9 +76,7 @@ function loadConfig() {
client.on('ready', () => {
console.log('Markbot by Charlie Laabs')
try { lastSeenMessageID = JSON.parse(fs.readFileSync('markovDB.json', 'utf8')).lastSeenMessageID }
catch (err) { console.log(err) }
regenMarkov()
client.user.setActivity(GAME)
})
client.on('error', (err) => {
@@ -90,18 +92,23 @@ client.on('error', (err) => {
client.on('message', message => {
if (message.guild) {
let command = validateMessage(message)
// if (command === 'help') {
// I should probably add a help message sometime
// }
if (command === 'help') {
let richem = new Discord.RichEmbed()
.setAuthor(client.user.username, client.user.avatarURL)
.setThumbnail(client.user.avatarURL)
.setDescription('A Markov chain chatbot that speaks based on previous chat input.')
.addField('!mark', 'Generates a sentence to say based on the chat database')
.addField('!mark train', 'Fetches the maximum amount of previous messages in the current text channel, adds it to the database, and regenerates the corpus. Takes about 2 minutes.')
.addField('!mark regen', 'Manually regenerates the corpus to add recent chat info. Run this before shutting down to avoid any data loss. This automatically runs at midnight.')
.addField('!mark invite', 'Don\'t invite this bot to other servers. The database is shared between all servers and text channels.')
message.channel.send(richem)
.catch(reason => {
message.author.send(richem)
})
}
if (command === 'train') {
console.log("Training...")
message.channel.fetchMessages({ after: lastSeenMessageID, limit: 100 })
.then(messages => {
messages.forEach(value => {
messageCache.push(value.content)
})
regenMarkov()
}).catch(console.error)
fetchMessageChunk(message, null, [])
}
if (command === 'respond') {
console.log("Responding...")
@@ -121,7 +128,9 @@ client.on('message', message => {
}
if (command === null) {
console.log("Listening...")
messageCache.push(message.content)
if (!message.author.bot) {
messageCache.push({ string: message.content, id: message.id })
}
}
if (command === inviteCmd) {
let richem = new Discord.RichEmbed()
@@ -134,7 +143,6 @@ client.on('message', message => {
message.author.send(richem)
})
}
lastSeenMessageID = message.id
}
})
@@ -158,4 +166,21 @@ function validateMessage(message) {
return command
}
function fetchMessageChunk(message, oldestMessageID, historyCache) {
message.channel.fetchMessages({ before: oldestMessageID, limit: 100 })
.then(messages => {
historyCache = historyCache.concat(messages.filter(elem => !elem.author.bot).map(elem => {
return { string: elem.content, id: elem.id }
}));
oldestMessageID = messages.last().id
return historyCache.concat(fetchMessageChunk(message, oldestMessageID, historyCache))
}).catch(err => {
console.log("Trained from " + historyCache.length + " past messages.")
messageCache = messageCache.concat(historyCache)
regenMarkov()
message.reply('Finished training from past ' + historyCache.length + ' messages.')
});
}
loadConfig()
const daily = schedule.scheduleJob('0 0 * * *', regenMarkov());

74
package-lock.json generated
View File

@@ -47,6 +47,15 @@
"resolved": "https://registry.npmjs.org/core-util-is/-/core-util-is-1.0.2.tgz",
"integrity": "sha1-tf1UIgqivFq1eqtxQMlAdUUDwac="
},
"cron-parser": {
"version": "2.4.4",
"resolved": "https://registry.npmjs.org/cron-parser/-/cron-parser-2.4.4.tgz",
"integrity": "sha512-lNWu5pGRGF7y4kl/uRXY69mC8n0qhjTIDQmc3MIfNY5eEvGyYqFPewn+2YQXybJoa2LVVOmDQ/1WTWyQzAM8uA==",
"requires": {
"is-nan": "1.2.1",
"moment-timezone": "0.5.14"
}
},
"cyclist": {
"version": "0.2.2",
"resolved": "https://registry.npmjs.org/cyclist/-/cyclist-0.2.2.tgz",
@@ -60,6 +69,15 @@
"ms": "2.0.0"
}
},
"define-properties": {
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/define-properties/-/define-properties-1.1.2.tgz",
"integrity": "sha1-g6c/L+pWmJj7c3GTyPhzyvbUXJQ=",
"requires": {
"foreach": "2.0.5",
"object-keys": "1.0.11"
}
},
"discord.js": {
"version": "11.3.0",
"resolved": "https://registry.npmjs.org/discord.js/-/discord.js-11.3.0.tgz",
@@ -119,6 +137,11 @@
"readable-stream": "2.3.3"
}
},
"foreach": {
"version": "2.0.5",
"resolved": "https://registry.npmjs.org/foreach/-/foreach-2.0.5.tgz",
"integrity": "sha1-C+4AUBiusmDQo6865ljdATbsG5k="
},
"from2": {
"version": "2.3.0",
"resolved": "https://registry.npmjs.org/from2/-/from2-2.3.0.tgz",
@@ -142,6 +165,14 @@
"p-is-promise": "1.1.0"
}
},
"is-nan": {
"version": "1.2.1",
"resolved": "https://registry.npmjs.org/is-nan/-/is-nan-1.2.1.tgz",
"integrity": "sha1-n69ltvttskt/XAYoR16nH5iEAeI=",
"requires": {
"define-properties": "1.1.2"
}
},
"isarray": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/isarray/-/isarray-1.0.0.tgz",
@@ -176,6 +207,11 @@
"resolved": "https://registry.npmjs.org/long/-/long-3.2.0.tgz",
"integrity": "sha1-2CG3E4yhy1gcFymQ7xTbIAtcR0s="
},
"long-timeout": {
"version": "0.1.1",
"resolved": "https://registry.npmjs.org/long-timeout/-/long-timeout-0.1.1.tgz",
"integrity": "sha1-lyHXiLR+C8taJMLivuGg2lXatRQ="
},
"markov-strings": {
"version": "1.3.5",
"resolved": "https://registry.npmjs.org/markov-strings/-/markov-strings-1.3.5.tgz",
@@ -202,6 +238,19 @@
"through2": "2.0.3"
}
},
"moment": {
"version": "2.20.1",
"resolved": "https://registry.npmjs.org/moment/-/moment-2.20.1.tgz",
"integrity": "sha512-Yh9y73JRljxW5QxN08Fner68eFLxM5ynNOAw2LbIB1YAGeQzZT8QFSUvkAz609Zf+IHhhaUxqZK8dG3W/+HEvg=="
},
"moment-timezone": {
"version": "0.5.14",
"resolved": "https://registry.npmjs.org/moment-timezone/-/moment-timezone-0.5.14.tgz",
"integrity": "sha1-TrOP+VOLgBCLpGekWPPtQmjM/LE=",
"requires": {
"moment": "2.20.1"
}
},
"ms": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz",
@@ -212,6 +261,21 @@
"resolved": "https://registry.npmjs.org/nan/-/nan-2.8.0.tgz",
"integrity": "sha1-7XFfP+neArV6XmJS2QqWZ14fCFo="
},
"node-schedule": {
"version": "1.3.0",
"resolved": "https://registry.npmjs.org/node-schedule/-/node-schedule-1.3.0.tgz",
"integrity": "sha512-NNwO9SUPjBwFmPh3vXiPVEhJLn4uqYmZYvJV358SRGM06BR4UoIqxJpeJwDDXB6atULsgQA97MfD1zMd5xsu+A==",
"requires": {
"cron-parser": "2.4.4",
"long-timeout": "0.1.1",
"sorted-array-functions": "1.1.0"
}
},
"object-keys": {
"version": "1.0.11",
"resolved": "https://registry.npmjs.org/object-keys/-/object-keys-1.0.11.tgz",
"integrity": "sha1-xUYBd4rVYPEULODgG8yotW0TQm0="
},
"once": {
"version": "1.4.0",
"resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz",
@@ -299,6 +363,11 @@
"resolved": "https://registry.npmjs.org/snekfetch/-/snekfetch-3.6.1.tgz",
"integrity": "sha512-aLEvf1YR440pINb0LEo/SL2Q2s/A26+YEqPlx09A0XpGH7qWp8iqIFFolVILHn2yudWXJne9QWyQu+lzDp+ksQ=="
},
"sorted-array-functions": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/sorted-array-functions/-/sorted-array-functions-1.1.0.tgz",
"integrity": "sha512-zq6fLdGQixb9VZfT/tLgU+LzoedJyTbcf1I/TKETFeUVoWIfcs5HNr+SJSvQJLXRlEZjB1gpILTrxamxAdCcgA=="
},
"stream-each": {
"version": "1.2.2",
"resolved": "https://registry.npmjs.org/stream-each/-/stream-each-1.2.2.tgz",
@@ -350,6 +419,11 @@
"resolved": "https://registry.npmjs.org/ultron/-/ultron-1.1.1.tgz",
"integrity": "sha512-UIEXBNeYmKptWH6z8ZnqTeS8fV74zG0/eRU9VGkpzz+LIJNs8W/zM/L+7ctCkRrgbNnnR0xxw4bKOr0cW0N0Og=="
},
"unique-by": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/unique-by/-/unique-by-1.0.0.tgz",
"integrity": "sha1-UiDIa6e8Vy+3E610ZRRwy2RCEr0="
},
"util-deprecate": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz",

View File

@@ -1,12 +1,12 @@
{
"name": "markbot",
"version": "0.1.0",
"version": "0.2.0",
"description": "A conversational Markov chain bot for Discord",
"main": "index.js",
"scripts": {
"start": "node index.js"
},
"repository": "",
"repository": "https://github.com/charlocharlie/markov-discord.git",
"keywords": [
"discord",
"markov",
@@ -20,6 +20,8 @@
"discord.js": "^11.3.0",
"erlpack": "github:discordapp/erlpack",
"markov-strings": "^1.3.5",
"node-schedule": "^1.3.0",
"unique-by": "^1.0.0",
"zlib-sync": "^0.1.4"
}
}