From a9623c86efa825fe9f5dc8cd46c7399b5290ca20 Mon Sep 17 00:00:00 2001 From: Charlie Laabs Date: Tue, 6 Feb 2018 21:04:54 -0600 Subject: [PATCH] Added deep training feature, removes duplicate and bot-sourced messages. --- index.js | 69 +++++++++++++++++++++++++++++-------------- package-lock.json | 74 +++++++++++++++++++++++++++++++++++++++++++++++ package.json | 6 ++-- 3 files changed, 125 insertions(+), 24 deletions(-) diff --git a/index.js b/index.js index 352612c..00a0265 100644 --- a/index.js +++ b/index.js @@ -1,10 +1,14 @@ const Discord = require('discord.js') //https://discord.js.org/#/docs/main/stable/general/welcome const fs = require('fs') const Markov = require('markov-strings') +const uniqueBy = require('unique-by'); +const schedule = require('node-schedule'); const client = new Discord.Client() const ZEROWIDTH_SPACE = String.fromCharCode(parseInt('200B', 16)) const MAXMESSAGELENGTH = 2000 + + let guilds = [] let connected = -1 let GAME = 'GAME' @@ -16,10 +20,8 @@ let commands = {} let aliases = {} let errors = [] -let lastSeenMessageID = null let fileObj = { - messages: [], - lastSeenMessageID: null + messages: [] } let markovDB = [] @@ -27,7 +29,7 @@ let messageCache = [] const markovOpts = { maxLength: 400, minWords: 3, - minScore: 5 + minScore: 10 } let markov = new Markov(markovDB, markovOpts); @@ -40,12 +42,13 @@ function regenMarkov() { // console.log("MessageCache", messageCache) markovDB = fileObj.messages if (markovDB.length == 0) - markovDB.push("hello") - markovDB = markovDB.concat(messageCache) + markovDB.push({ string: 'hello', id: null }) + //markovDB = uniqueArray(markovDB.concat(messageCache), 'id') + markovDB = uniqueBy(markovDB.concat(messageCache), 'id') + //markovDB = markovDB.concat(messageCache) markov = new Markov(markovDB, markovOpts); markov.buildCorpusSync() fileObj.messages = markovDB - fileObj.lastSeenMessageID = lastSeenMessageID // console.log("WRITING THE FOLLOWING DATA:") // console.log(fileObj) fs.writeFileSync('markovDB.json', JSON.stringify(fileObj), 'utf-8') @@ -63,6 +66,7 @@ function loadConfig() { GAME = cfg.game BOTDESC = cfg.description inviteCmd = cfg.invitecmd + //regenMarkov() client.login(cfg.token) } else { @@ -72,9 +76,7 @@ function loadConfig() { client.on('ready', () => { console.log('Markbot by Charlie Laabs') - try { lastSeenMessageID = JSON.parse(fs.readFileSync('markovDB.json', 'utf8')).lastSeenMessageID } - catch (err) { console.log(err) } - regenMarkov() + client.user.setActivity(GAME) }) client.on('error', (err) => { @@ -90,18 +92,23 @@ client.on('error', (err) => { client.on('message', message => { if (message.guild) { let command = validateMessage(message) - // if (command === 'help') { - // I should probably add a help message sometime - // } + if (command === 'help') { + let richem = new Discord.RichEmbed() + .setAuthor(client.user.username, client.user.avatarURL) + .setThumbnail(client.user.avatarURL) + .setDescription('A Markov chain chatbot that speaks based on previous chat input.') + .addField('!mark', 'Generates a sentence to say based on the chat database') + .addField('!mark train', 'Fetches the maximum amount of previous messages in the current text channel, adds it to the database, and regenerates the corpus. Takes about 2 minutes.') + .addField('!mark regen', 'Manually regenerates the corpus to add recent chat info. Run this before shutting down to avoid any data loss. This automatically runs at midnight.') + .addField('!mark invite', 'Don\'t invite this bot to other servers. The database is shared between all servers and text channels.') + message.channel.send(richem) + .catch(reason => { + message.author.send(richem) + }) + } if (command === 'train') { console.log("Training...") - message.channel.fetchMessages({ after: lastSeenMessageID, limit: 100 }) - .then(messages => { - messages.forEach(value => { - messageCache.push(value.content) - }) - regenMarkov() - }).catch(console.error) + fetchMessageChunk(message, null, []) } if (command === 'respond') { console.log("Responding...") @@ -121,7 +128,9 @@ client.on('message', message => { } if (command === null) { console.log("Listening...") - messageCache.push(message.content) + if (!message.author.bot) { + messageCache.push({ string: message.content, id: message.id }) + } } if (command === inviteCmd) { let richem = new Discord.RichEmbed() @@ -134,7 +143,6 @@ client.on('message', message => { message.author.send(richem) }) } - lastSeenMessageID = message.id } }) @@ -158,4 +166,21 @@ function validateMessage(message) { return command } +function fetchMessageChunk(message, oldestMessageID, historyCache) { + message.channel.fetchMessages({ before: oldestMessageID, limit: 100 }) + .then(messages => { + historyCache = historyCache.concat(messages.filter(elem => !elem.author.bot).map(elem => { + return { string: elem.content, id: elem.id } + })); + oldestMessageID = messages.last().id + return historyCache.concat(fetchMessageChunk(message, oldestMessageID, historyCache)) + }).catch(err => { + console.log("Trained from " + historyCache.length + " past messages.") + messageCache = messageCache.concat(historyCache) + regenMarkov() + message.reply('Finished training from past ' + historyCache.length + ' messages.') + }); +} + loadConfig() +const daily = schedule.scheduleJob('0 0 * * *', regenMarkov()); diff --git a/package-lock.json b/package-lock.json index 045f931..fb37b1c 100644 --- a/package-lock.json +++ b/package-lock.json @@ -47,6 +47,15 @@ "resolved": "https://registry.npmjs.org/core-util-is/-/core-util-is-1.0.2.tgz", "integrity": "sha1-tf1UIgqivFq1eqtxQMlAdUUDwac=" }, + "cron-parser": { + "version": "2.4.4", + "resolved": "https://registry.npmjs.org/cron-parser/-/cron-parser-2.4.4.tgz", + "integrity": "sha512-lNWu5pGRGF7y4kl/uRXY69mC8n0qhjTIDQmc3MIfNY5eEvGyYqFPewn+2YQXybJoa2LVVOmDQ/1WTWyQzAM8uA==", + "requires": { + "is-nan": "1.2.1", + "moment-timezone": "0.5.14" + } + }, "cyclist": { "version": "0.2.2", "resolved": "https://registry.npmjs.org/cyclist/-/cyclist-0.2.2.tgz", @@ -60,6 +69,15 @@ "ms": "2.0.0" } }, + "define-properties": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/define-properties/-/define-properties-1.1.2.tgz", + "integrity": "sha1-g6c/L+pWmJj7c3GTyPhzyvbUXJQ=", + "requires": { + "foreach": "2.0.5", + "object-keys": "1.0.11" + } + }, "discord.js": { "version": "11.3.0", "resolved": "https://registry.npmjs.org/discord.js/-/discord.js-11.3.0.tgz", @@ -119,6 +137,11 @@ "readable-stream": "2.3.3" } }, + "foreach": { + "version": "2.0.5", + "resolved": "https://registry.npmjs.org/foreach/-/foreach-2.0.5.tgz", + "integrity": "sha1-C+4AUBiusmDQo6865ljdATbsG5k=" + }, "from2": { "version": "2.3.0", "resolved": "https://registry.npmjs.org/from2/-/from2-2.3.0.tgz", @@ -142,6 +165,14 @@ "p-is-promise": "1.1.0" } }, + "is-nan": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/is-nan/-/is-nan-1.2.1.tgz", + "integrity": "sha1-n69ltvttskt/XAYoR16nH5iEAeI=", + "requires": { + "define-properties": "1.1.2" + } + }, "isarray": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/isarray/-/isarray-1.0.0.tgz", @@ -176,6 +207,11 @@ "resolved": "https://registry.npmjs.org/long/-/long-3.2.0.tgz", "integrity": "sha1-2CG3E4yhy1gcFymQ7xTbIAtcR0s=" }, + "long-timeout": { + "version": "0.1.1", + "resolved": "https://registry.npmjs.org/long-timeout/-/long-timeout-0.1.1.tgz", + "integrity": "sha1-lyHXiLR+C8taJMLivuGg2lXatRQ=" + }, "markov-strings": { "version": "1.3.5", "resolved": "https://registry.npmjs.org/markov-strings/-/markov-strings-1.3.5.tgz", @@ -202,6 +238,19 @@ "through2": "2.0.3" } }, + "moment": { + "version": "2.20.1", + "resolved": "https://registry.npmjs.org/moment/-/moment-2.20.1.tgz", + "integrity": "sha512-Yh9y73JRljxW5QxN08Fner68eFLxM5ynNOAw2LbIB1YAGeQzZT8QFSUvkAz609Zf+IHhhaUxqZK8dG3W/+HEvg==" + }, + "moment-timezone": { + "version": "0.5.14", + "resolved": "https://registry.npmjs.org/moment-timezone/-/moment-timezone-0.5.14.tgz", + "integrity": "sha1-TrOP+VOLgBCLpGekWPPtQmjM/LE=", + "requires": { + "moment": "2.20.1" + } + }, "ms": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", @@ -212,6 +261,21 @@ "resolved": "https://registry.npmjs.org/nan/-/nan-2.8.0.tgz", "integrity": "sha1-7XFfP+neArV6XmJS2QqWZ14fCFo=" }, + "node-schedule": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/node-schedule/-/node-schedule-1.3.0.tgz", + "integrity": "sha512-NNwO9SUPjBwFmPh3vXiPVEhJLn4uqYmZYvJV358SRGM06BR4UoIqxJpeJwDDXB6atULsgQA97MfD1zMd5xsu+A==", + "requires": { + "cron-parser": "2.4.4", + "long-timeout": "0.1.1", + "sorted-array-functions": "1.1.0" + } + }, + "object-keys": { + "version": "1.0.11", + "resolved": "https://registry.npmjs.org/object-keys/-/object-keys-1.0.11.tgz", + "integrity": "sha1-xUYBd4rVYPEULODgG8yotW0TQm0=" + }, "once": { "version": "1.4.0", "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", @@ -299,6 +363,11 @@ "resolved": "https://registry.npmjs.org/snekfetch/-/snekfetch-3.6.1.tgz", "integrity": "sha512-aLEvf1YR440pINb0LEo/SL2Q2s/A26+YEqPlx09A0XpGH7qWp8iqIFFolVILHn2yudWXJne9QWyQu+lzDp+ksQ==" }, + "sorted-array-functions": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/sorted-array-functions/-/sorted-array-functions-1.1.0.tgz", + "integrity": "sha512-zq6fLdGQixb9VZfT/tLgU+LzoedJyTbcf1I/TKETFeUVoWIfcs5HNr+SJSvQJLXRlEZjB1gpILTrxamxAdCcgA==" + }, "stream-each": { "version": "1.2.2", "resolved": "https://registry.npmjs.org/stream-each/-/stream-each-1.2.2.tgz", @@ -350,6 +419,11 @@ "resolved": "https://registry.npmjs.org/ultron/-/ultron-1.1.1.tgz", "integrity": "sha512-UIEXBNeYmKptWH6z8ZnqTeS8fV74zG0/eRU9VGkpzz+LIJNs8W/zM/L+7ctCkRrgbNnnR0xxw4bKOr0cW0N0Og==" }, + "unique-by": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/unique-by/-/unique-by-1.0.0.tgz", + "integrity": "sha1-UiDIa6e8Vy+3E610ZRRwy2RCEr0=" + }, "util-deprecate": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz", diff --git a/package.json b/package.json index abd7a3c..f3c614e 100644 --- a/package.json +++ b/package.json @@ -1,12 +1,12 @@ { "name": "markbot", - "version": "0.1.0", + "version": "0.2.0", "description": "A conversational Markov chain bot for Discord", "main": "index.js", "scripts": { "start": "node index.js" }, - "repository": "", + "repository": "https://github.com/charlocharlie/markov-discord.git", "keywords": [ "discord", "markov", @@ -20,6 +20,8 @@ "discord.js": "^11.3.0", "erlpack": "github:discordapp/erlpack", "markov-strings": "^1.3.5", + "node-schedule": "^1.3.0", + "unique-by": "^1.0.0", "zlib-sync": "^0.1.4" } }