Add train from file and non-clean training option.

Closes #31
This commit is contained in:
Charlie Laabs
2022-05-31 22:52:37 -05:00
parent c742bee965
commit 9adf741b5f
7 changed files with 220 additions and 55 deletions

View File

@@ -4,6 +4,11 @@ All notable changes to this project will be documented in this file.
## Versions
### 2.2.0
* Add a `clean` option flag to the `/train` command to allow retraining without overwriting
* Add the ability to train from file of messages (#31)
### 2.1.1
* Fix TTS not working for slash commands (with a somewhat janky solution)

View File

@@ -14,6 +14,11 @@ A Markov chain bot using markov-strings.
* User: `/mark`
* Bot: ![worms are not baby snakes, by the way](img/respond.png)
### Training from a file
Using the `json` option in the `/train` command, you can import a list of messages.
An example JSON file can be seen [here](img/example-training.json).
## Setup
This bot stores your Discord server's entire message history, so a public instance to invite to your server is not available due to obvious data privacy concerns. Instead, you can host it yourself.

30
img/example-training.json Normal file
View File

@@ -0,0 +1,30 @@
[
{
"message": "Lorem ipsum dolor sit amet"
},
{
"message": "Lorem ipsum duplicate start words",
"attachments": [
"https://cdn.discordapp.com/attachments/000000000000000000/000000000000000000/1.mp3",
"https://cdn.discordapp.com/attachments/000000000000000000/000000000000000000/2.png"
]
},
{
"message": "Consectetur adipiscing elit"
},
{
"message": "Quisque tempor, erat vel lacinia imperdiet"
},
{
"message": "Justo nisi fringilla dui"
},
{
"message": "Egestas bibendum eros nisi ut lacus"
},
{
"message": "fringilla dui avait annoncé une rupture avec le erat vel: il n'en est rien…"
},
{
"message": "Fusce tincidunt tempor, erat vel lacinia vel ex pharetra pretium lacinia imperdiet"
}
]

127
package-lock.json generated
View File

@@ -1,12 +1,12 @@
{
"name": "markov-discord",
"version": "2.1.1",
"version": "2.2.0",
"lockfileVersion": 2,
"requires": true,
"packages": {
"": {
"name": "markov-discord",
"version": "2.1.1",
"version": "2.2.0",
"license": "MIT",
"dependencies": {
"@discordjs/builders": "^0.13.0",
@@ -24,6 +24,7 @@
"fs-extra": "^10.1.0",
"json5": "^2.2.1",
"markov-strings-db": "^4.2.0",
"node-fetch": "^2.6.7",
"pino": "^7.11.0",
"pino-pretty": "^7.6.1",
"reflect-metadata": "^0.1.13",
@@ -55,22 +56,13 @@
"node": "16"
}
},
"node_modules/@cspotcode/source-map-consumer": {
"version": "0.8.0",
"resolved": "https://registry.npmjs.org/@cspotcode/source-map-consumer/-/source-map-consumer-0.8.0.tgz",
"integrity": "sha512-41qniHzTU8yAGbCp04ohlmSrZf8bkf/iJsl3V0dRGsQN/5GFfx+LbCSsCpp2gqrqjTVg/K6O8ycoV35JIwAzAg==",
"devOptional": true,
"engines": {
"node": ">= 12"
}
},
"node_modules/@cspotcode/source-map-support": {
"version": "0.7.0",
"resolved": "https://registry.npmjs.org/@cspotcode/source-map-support/-/source-map-support-0.7.0.tgz",
"integrity": "sha512-X4xqRHqN8ACt2aHVe51OxeA2HjbcL4MqFqXkrmQszJ1NOUuUu5u6Vqx/0lZSVNku7velL5FC/s5uEAj1lsBMhA==",
"version": "0.8.1",
"resolved": "https://registry.npmjs.org/@cspotcode/source-map-support/-/source-map-support-0.8.1.tgz",
"integrity": "sha512-IchNf6dN4tHoMFIn/7OE8LWZ19Y6q/67Bmf6vnGREv8RSbBVb9LPJxEcnwrcwX6ixSvaiGoomAUvu4YSxXrVgw==",
"devOptional": true,
"dependencies": {
"@cspotcode/source-map-consumer": "0.8.0"
"@jridgewell/trace-mapping": "0.3.9"
},
"engines": {
"node": ">=12"
@@ -181,6 +173,31 @@
"integrity": "sha512-ZnQMnLV4e7hDlUvw8H+U8ASL02SS2Gn6+9Ac3wGGLIe7+je2AeAOxPY+izIPJDfFDb7eDjev0Us8MO1iFRN8hA==",
"dev": true
},
"node_modules/@jridgewell/resolve-uri": {
"version": "3.0.7",
"resolved": "https://registry.npmjs.org/@jridgewell/resolve-uri/-/resolve-uri-3.0.7.tgz",
"integrity": "sha512-8cXDaBBHOr2pQ7j77Y6Vp5VDT2sIqWyWQ56TjEq4ih/a4iST3dItRe8Q9fp0rrIl9DoKhWQtUQz/YpOxLkXbNA==",
"devOptional": true,
"engines": {
"node": ">=6.0.0"
}
},
"node_modules/@jridgewell/sourcemap-codec": {
"version": "1.4.13",
"resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.4.13.tgz",
"integrity": "sha512-GryiOJmNcWbovBxTfZSF71V/mXbgcV3MewDe3kIMCLyIh5e7SKAeUZs+rMnJ8jkMolZ/4/VsdBmMrw3l+VdZ3w==",
"devOptional": true
},
"node_modules/@jridgewell/trace-mapping": {
"version": "0.3.9",
"resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.9.tgz",
"integrity": "sha512-3Belt6tdc8bPgAtbcmdtNJlirVoTmEb5e2gC94PnkwEW9jI6CAHUeoG85tjWP5WquqfavoMtMwiG4P926ZKKuQ==",
"devOptional": true,
"dependencies": {
"@jridgewell/resolve-uri": "^3.0.3",
"@jridgewell/sourcemap-codec": "^1.4.10"
}
},
"node_modules/@nodelib/fs.scandir": {
"version": "2.1.5",
"resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz",
@@ -5233,12 +5250,12 @@
"integrity": "sha512-hvE+ZYXuINrx6Ei6D6hz+PTim0Uf++dYbK9FFifLNwQj+RwKquhQpn868yZsCtJYiclZF1u8l6WZxxKi+vv7Rg=="
},
"node_modules/ts-node": {
"version": "10.7.0",
"resolved": "https://registry.npmjs.org/ts-node/-/ts-node-10.7.0.tgz",
"integrity": "sha512-TbIGS4xgJoX2i3do417KSaep1uRAW/Lu+WAL2doDHC0D6ummjirVOXU5/7aiZotbQ5p1Zp9tP7U6cYhA0O7M8A==",
"version": "10.8.0",
"resolved": "https://registry.npmjs.org/ts-node/-/ts-node-10.8.0.tgz",
"integrity": "sha512-/fNd5Qh+zTt8Vt1KbYZjRHCE9sI5i7nqfD/dzBBRDeVXZXS6kToW6R7tTU6Nd4XavFs0mAVCg29Q//ML7WsZYA==",
"devOptional": true,
"dependencies": {
"@cspotcode/source-map-support": "0.7.0",
"@cspotcode/source-map-support": "^0.8.0",
"@tsconfig/node10": "^1.0.7",
"@tsconfig/node12": "^1.0.7",
"@tsconfig/node14": "^1.0.0",
@@ -5249,7 +5266,7 @@
"create-require": "^1.1.0",
"diff": "^4.0.1",
"make-error": "^1.1.1",
"v8-compile-cache-lib": "^3.0.0",
"v8-compile-cache-lib": "^3.0.1",
"yn": "3.1.1"
},
"bin": {
@@ -5567,9 +5584,9 @@
"dev": true
},
"node_modules/typescript": {
"version": "4.6.4",
"resolved": "https://registry.npmjs.org/typescript/-/typescript-4.6.4.tgz",
"integrity": "sha512-9ia/jWHIEbo49HfjrLGfKbZSuWo9iTMwXO+Ca3pRsSpbsMbc7/IU8NKdCZVRRBafVPGnoJeFL76ZOAA84I9fEg==",
"version": "4.7.2",
"resolved": "https://registry.npmjs.org/typescript/-/typescript-4.7.2.tgz",
"integrity": "sha512-Mamb1iX2FDUpcTRzltPxgWMKy3fhg0TN378ylbktPGPK/99KbDtMQ4W1hwgsbPAsG3a0xKa1vmw4VKZQbkvz5A==",
"devOptional": true,
"bin": {
"tsc": "bin/tsc",
@@ -5654,9 +5671,9 @@
"dev": true
},
"node_modules/v8-compile-cache-lib": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/v8-compile-cache-lib/-/v8-compile-cache-lib-3.0.0.tgz",
"integrity": "sha512-mpSYqfsFvASnSn5qMiwrr4VKfumbPyONLCOPmsR3A6pTY/r0+tSaVbgPWSAIuzbk3lCTa+FForeTiO+wBQGkjA==",
"version": "3.0.1",
"resolved": "https://registry.npmjs.org/v8-compile-cache-lib/-/v8-compile-cache-lib-3.0.1.tgz",
"integrity": "sha512-wa7YjyUGfNZngI/vtK0UHAN+lgDCxBPCylVXGp0zu59Fz5aiGtNXaq3DhIov063MorB+VfufLh3JlF2KdTK3xg==",
"devOptional": true
},
"node_modules/validator": {
@@ -5980,19 +5997,13 @@
}
},
"dependencies": {
"@cspotcode/source-map-consumer": {
"version": "0.8.0",
"resolved": "https://registry.npmjs.org/@cspotcode/source-map-consumer/-/source-map-consumer-0.8.0.tgz",
"integrity": "sha512-41qniHzTU8yAGbCp04ohlmSrZf8bkf/iJsl3V0dRGsQN/5GFfx+LbCSsCpp2gqrqjTVg/K6O8ycoV35JIwAzAg==",
"devOptional": true
},
"@cspotcode/source-map-support": {
"version": "0.7.0",
"resolved": "https://registry.npmjs.org/@cspotcode/source-map-support/-/source-map-support-0.7.0.tgz",
"integrity": "sha512-X4xqRHqN8ACt2aHVe51OxeA2HjbcL4MqFqXkrmQszJ1NOUuUu5u6Vqx/0lZSVNku7velL5FC/s5uEAj1lsBMhA==",
"version": "0.8.1",
"resolved": "https://registry.npmjs.org/@cspotcode/source-map-support/-/source-map-support-0.8.1.tgz",
"integrity": "sha512-IchNf6dN4tHoMFIn/7OE8LWZ19Y6q/67Bmf6vnGREv8RSbBVb9LPJxEcnwrcwX6ixSvaiGoomAUvu4YSxXrVgw==",
"devOptional": true,
"requires": {
"@cspotcode/source-map-consumer": "0.8.0"
"@jridgewell/trace-mapping": "0.3.9"
}
},
"@discordjs/builders": {
@@ -6086,6 +6097,28 @@
"integrity": "sha512-ZnQMnLV4e7hDlUvw8H+U8ASL02SS2Gn6+9Ac3wGGLIe7+je2AeAOxPY+izIPJDfFDb7eDjev0Us8MO1iFRN8hA==",
"dev": true
},
"@jridgewell/resolve-uri": {
"version": "3.0.7",
"resolved": "https://registry.npmjs.org/@jridgewell/resolve-uri/-/resolve-uri-3.0.7.tgz",
"integrity": "sha512-8cXDaBBHOr2pQ7j77Y6Vp5VDT2sIqWyWQ56TjEq4ih/a4iST3dItRe8Q9fp0rrIl9DoKhWQtUQz/YpOxLkXbNA==",
"devOptional": true
},
"@jridgewell/sourcemap-codec": {
"version": "1.4.13",
"resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.4.13.tgz",
"integrity": "sha512-GryiOJmNcWbovBxTfZSF71V/mXbgcV3MewDe3kIMCLyIh5e7SKAeUZs+rMnJ8jkMolZ/4/VsdBmMrw3l+VdZ3w==",
"devOptional": true
},
"@jridgewell/trace-mapping": {
"version": "0.3.9",
"resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.9.tgz",
"integrity": "sha512-3Belt6tdc8bPgAtbcmdtNJlirVoTmEb5e2gC94PnkwEW9jI6CAHUeoG85tjWP5WquqfavoMtMwiG4P926ZKKuQ==",
"devOptional": true,
"requires": {
"@jridgewell/resolve-uri": "^3.0.3",
"@jridgewell/sourcemap-codec": "^1.4.10"
}
},
"@nodelib/fs.scandir": {
"version": "2.1.5",
"resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz",
@@ -9877,12 +9910,12 @@
"integrity": "sha512-hvE+ZYXuINrx6Ei6D6hz+PTim0Uf++dYbK9FFifLNwQj+RwKquhQpn868yZsCtJYiclZF1u8l6WZxxKi+vv7Rg=="
},
"ts-node": {
"version": "10.7.0",
"resolved": "https://registry.npmjs.org/ts-node/-/ts-node-10.7.0.tgz",
"integrity": "sha512-TbIGS4xgJoX2i3do417KSaep1uRAW/Lu+WAL2doDHC0D6ummjirVOXU5/7aiZotbQ5p1Zp9tP7U6cYhA0O7M8A==",
"version": "10.8.0",
"resolved": "https://registry.npmjs.org/ts-node/-/ts-node-10.8.0.tgz",
"integrity": "sha512-/fNd5Qh+zTt8Vt1KbYZjRHCE9sI5i7nqfD/dzBBRDeVXZXS6kToW6R7tTU6Nd4XavFs0mAVCg29Q//ML7WsZYA==",
"devOptional": true,
"requires": {
"@cspotcode/source-map-support": "0.7.0",
"@cspotcode/source-map-support": "^0.8.0",
"@tsconfig/node10": "^1.0.7",
"@tsconfig/node12": "^1.0.7",
"@tsconfig/node14": "^1.0.0",
@@ -9893,7 +9926,7 @@
"create-require": "^1.1.0",
"diff": "^4.0.1",
"make-error": "^1.1.1",
"v8-compile-cache-lib": "^3.0.0",
"v8-compile-cache-lib": "^3.0.1",
"yn": "3.1.1"
}
},
@@ -10061,9 +10094,9 @@
"dev": true
},
"typescript": {
"version": "4.6.4",
"resolved": "https://registry.npmjs.org/typescript/-/typescript-4.6.4.tgz",
"integrity": "sha512-9ia/jWHIEbo49HfjrLGfKbZSuWo9iTMwXO+Ca3pRsSpbsMbc7/IU8NKdCZVRRBafVPGnoJeFL76ZOAA84I9fEg==",
"version": "4.7.2",
"resolved": "https://registry.npmjs.org/typescript/-/typescript-4.7.2.tgz",
"integrity": "sha512-Mamb1iX2FDUpcTRzltPxgWMKy3fhg0TN378ylbktPGPK/99KbDtMQ4W1hwgsbPAsG3a0xKa1vmw4VKZQbkvz5A==",
"devOptional": true
},
"unbox-primitive": {
@@ -10124,9 +10157,9 @@
"dev": true
},
"v8-compile-cache-lib": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/v8-compile-cache-lib/-/v8-compile-cache-lib-3.0.0.tgz",
"integrity": "sha512-mpSYqfsFvASnSn5qMiwrr4VKfumbPyONLCOPmsR3A6pTY/r0+tSaVbgPWSAIuzbk3lCTa+FForeTiO+wBQGkjA==",
"version": "3.0.1",
"resolved": "https://registry.npmjs.org/v8-compile-cache-lib/-/v8-compile-cache-lib-3.0.1.tgz",
"integrity": "sha512-wa7YjyUGfNZngI/vtK0UHAN+lgDCxBPCylVXGp0zu59Fz5aiGtNXaq3DhIov063MorB+VfufLh3JlF2KdTK3xg==",
"devOptional": true
},
"validator": {

View File

@@ -1,6 +1,6 @@
{
"name": "markov-discord",
"version": "2.1.1",
"version": "2.2.0",
"description": "A conversational Markov chain bot for Discord",
"main": "dist/index.js",
"scripts": {
@@ -46,6 +46,7 @@
"fs-extra": "^10.1.0",
"json5": "^2.2.1",
"markov-strings-db": "^4.2.0",
"node-fetch": "^2.6.7",
"pino": "^7.11.0",
"pino-pretty": "^7.6.1",
"reflect-metadata": "^0.1.13",

View File

@@ -85,6 +85,20 @@ export const trainCommand = new SlashCommandBuilder()
.setName('train')
.setDescription(
'Train from past messages from the configured listened channels. This takes a while.'
)
.addBooleanOption((clean) =>
clean
.setName('clean')
.setDescription(
'Whether the database should be emptied before training. Default is true (recommended).'
)
.setRequired(false)
)
.addAttachmentOption((json) =>
json
.setName('json')
.setDescription('Train from a provided JSON file rather than channel history.')
.setRequired(false)
);
const commands = [

View File

@@ -13,6 +13,7 @@ import makeEta from 'simple-eta';
import formatDistanceToNow from 'date-fns/formatDistanceToNow';
import addSeconds from 'date-fns/addSeconds';
import type { APIInteractionGuildMember, APISelectMenuComponent } from 'discord-api-types/v9';
import fetch from 'node-fetch';
import L from './logger';
import { Channel } from './entity/Channel';
import { Guild } from './entity/Guild';
@@ -253,7 +254,8 @@ function messageToData(message: Discord.Message): AddDataProps {
* Recursively gets all messages in a text channel's history.
*/
async function saveGuildMessageHistory(
interaction: Discord.Message | Discord.CommandInteraction
interaction: Discord.Message | Discord.CommandInteraction,
clean = true
): Promise<string> {
if (!isModerator(interaction.member)) return INVALID_PERMISSIONS_MESSAGE;
if (!interaction.guildId || !interaction.guild) return INVALID_GUILD_MESSAGE;
@@ -265,8 +267,12 @@ async function saveGuildMessageHistory(
return 'No channels configured to learn from. Set some with `/listen add`.';
}
L.debug('Deleting old data');
await markov.delete();
if (clean) {
L.debug('Deleting old data');
await markov.delete();
} else {
L.debug('Not deleting old data during training');
}
const channelIds = channels.map((c) => c.id);
L.debug({ channelIds }, `Training from text channels`);
@@ -440,6 +446,69 @@ async function saveGuildMessageHistory(
return `Trained from ${messagesCount} past human authored messages.`;
}
interface JSONImport {
message: string;
attachments?: string[];
}
/**
* Train from an attached JSON file
*/
async function trainFromAttachmentJson(
attachment: Discord.MessageAttachment,
interaction: Discord.CommandInteraction,
clean = true
): Promise<string> {
if (!isModerator(interaction.member)) return INVALID_PERMISSIONS_MESSAGE;
if (!interaction.guildId || !interaction.guild) return INVALID_GUILD_MESSAGE;
const { guildId } = interaction;
const markov = await getMarkovByGuildId(guildId);
let trainingData: AddDataProps[];
try {
const importAttachmentUrl = attachment.attachment.toString();
const getResp = await fetch(importAttachmentUrl);
if (!getResp.ok) throw new Error(getResp.statusText);
const importData = (await getResp.json()) as JSONImport[];
trainingData = importData.map((datum, index) => {
if (!datum.message) {
throw new Error(`Entry at index ${index} must have a "message"`);
}
if (typeof datum.message !== 'string') {
throw new Error(`Entry at index ${index} must have a "message" with a type of string`);
}
if (datum.attachments?.every((a) => typeof a !== 'string')) {
throw new Error(
`Entry at index ${index} must have all "attachments" each with a type of string`
);
}
let custom: MarkovDataCustom | undefined;
if (datum.attachments?.length) custom = { attachments: datum.attachments };
return {
string: datum.message,
custom,
tags: [guildId],
};
});
} catch (err) {
L.error(err);
return 'The provided attachment file has invalid formatting. See the logs for details.';
}
if (clean) {
L.debug('Deleting old data');
await markov.delete();
} else {
L.debug('Not deleting old data during training');
}
await markov.addData(trainingData);
L.info(`Trained from ${trainingData.length} past human authored messages.`);
return `Trained from ${trainingData.length} past human authored messages.`;
}
interface GenerateResponse {
message?: AgnosticReplyOptions;
debug?: AgnosticReplyOptions;
@@ -846,10 +915,18 @@ client.on('interactionCreate', async (interaction) => {
}
} else if (interaction.commandName === trainCommand.name) {
await interaction.deferReply();
const reply = (await interaction.fetchReply()) as Discord.Message; // Must fetch the reply ASAP
const responseMessage = await saveGuildMessageHistory(interaction);
// Send a message in reply to the reply to avoid the 15 minute webhook token timeout
await reply.reply({ content: responseMessage });
const clean = interaction.options.getBoolean('clean') ?? true;
const trainingJSON = interaction.options.getAttachment('json');
if (trainingJSON) {
const responseMessage = await trainFromAttachmentJson(trainingJSON, interaction, clean);
await interaction.followUp(responseMessage);
} else {
const reply = (await interaction.fetchReply()) as Discord.Message; // Must fetch the reply ASAP
const responseMessage = await saveGuildMessageHistory(interaction, clean);
// Send a message in reply to the reply to avoid the 15 minute webhook token timeout
await reply.reply({ content: responseMessage });
}
}
} else if (interaction.isSelectMenu()) {
if (interaction.customId === 'listen-modify-select') {