diff --git a/CHANGELOG.md b/CHANGELOG.md index 52db5e1..83bee96 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,11 @@ All notable changes to this project will be documented in this file. ## Versions +### 2.2.0 + +* Add a `clean` option flag to the `/train` command to allow retraining without overwriting +* Add the ability to train from file of messages (#31) + ### 2.1.1 * Fix TTS not working for slash commands (with a somewhat janky solution) diff --git a/README.md b/README.md index b64d0be..52b5b57 100644 --- a/README.md +++ b/README.md @@ -14,6 +14,11 @@ A Markov chain bot using markov-strings. * User: `/mark` * Bot: ![worms are not baby snakes, by the way](img/respond.png) +### Training from a file + +Using the `json` option in the `/train` command, you can import a list of messages. +An example JSON file can be seen [here](img/example-training.json). + ## Setup This bot stores your Discord server's entire message history, so a public instance to invite to your server is not available due to obvious data privacy concerns. Instead, you can host it yourself. diff --git a/img/example-training.json b/img/example-training.json new file mode 100644 index 0000000..a8f2e33 --- /dev/null +++ b/img/example-training.json @@ -0,0 +1,30 @@ +[ + { + "message": "Lorem ipsum dolor sit amet" + }, + { + "message": "Lorem ipsum duplicate start words", + "attachments": [ + "https://cdn.discordapp.com/attachments/000000000000000000/000000000000000000/1.mp3", + "https://cdn.discordapp.com/attachments/000000000000000000/000000000000000000/2.png" + ] + }, + { + "message": "Consectetur adipiscing elit" + }, + { + "message": "Quisque tempor, erat vel lacinia imperdiet" + }, + { + "message": "Justo nisi fringilla dui" + }, + { + "message": "Egestas bibendum eros nisi ut lacus" + }, + { + "message": "fringilla dui avait annoncé une rupture avec le erat vel: il n'en est rien…" + }, + { + "message": "Fusce tincidunt tempor, erat vel lacinia vel ex pharetra pretium lacinia imperdiet" + } +] diff --git a/package-lock.json b/package-lock.json index f3599dd..ab19548 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "markov-discord", - "version": "2.1.1", + "version": "2.2.0", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "markov-discord", - "version": "2.1.1", + "version": "2.2.0", "license": "MIT", "dependencies": { "@discordjs/builders": "^0.13.0", @@ -24,6 +24,7 @@ "fs-extra": "^10.1.0", "json5": "^2.2.1", "markov-strings-db": "^4.2.0", + "node-fetch": "^2.6.7", "pino": "^7.11.0", "pino-pretty": "^7.6.1", "reflect-metadata": "^0.1.13", @@ -55,22 +56,13 @@ "node": "16" } }, - "node_modules/@cspotcode/source-map-consumer": { - "version": "0.8.0", - "resolved": "https://registry.npmjs.org/@cspotcode/source-map-consumer/-/source-map-consumer-0.8.0.tgz", - "integrity": "sha512-41qniHzTU8yAGbCp04ohlmSrZf8bkf/iJsl3V0dRGsQN/5GFfx+LbCSsCpp2gqrqjTVg/K6O8ycoV35JIwAzAg==", - "devOptional": true, - "engines": { - "node": ">= 12" - } - }, "node_modules/@cspotcode/source-map-support": { - "version": "0.7.0", - "resolved": "https://registry.npmjs.org/@cspotcode/source-map-support/-/source-map-support-0.7.0.tgz", - "integrity": "sha512-X4xqRHqN8ACt2aHVe51OxeA2HjbcL4MqFqXkrmQszJ1NOUuUu5u6Vqx/0lZSVNku7velL5FC/s5uEAj1lsBMhA==", + "version": "0.8.1", + "resolved": "https://registry.npmjs.org/@cspotcode/source-map-support/-/source-map-support-0.8.1.tgz", + "integrity": "sha512-IchNf6dN4tHoMFIn/7OE8LWZ19Y6q/67Bmf6vnGREv8RSbBVb9LPJxEcnwrcwX6ixSvaiGoomAUvu4YSxXrVgw==", "devOptional": true, "dependencies": { - "@cspotcode/source-map-consumer": "0.8.0" + "@jridgewell/trace-mapping": "0.3.9" }, "engines": { "node": ">=12" @@ -181,6 +173,31 @@ "integrity": "sha512-ZnQMnLV4e7hDlUvw8H+U8ASL02SS2Gn6+9Ac3wGGLIe7+je2AeAOxPY+izIPJDfFDb7eDjev0Us8MO1iFRN8hA==", "dev": true }, + "node_modules/@jridgewell/resolve-uri": { + "version": "3.0.7", + "resolved": "https://registry.npmjs.org/@jridgewell/resolve-uri/-/resolve-uri-3.0.7.tgz", + "integrity": "sha512-8cXDaBBHOr2pQ7j77Y6Vp5VDT2sIqWyWQ56TjEq4ih/a4iST3dItRe8Q9fp0rrIl9DoKhWQtUQz/YpOxLkXbNA==", + "devOptional": true, + "engines": { + "node": ">=6.0.0" + } + }, + "node_modules/@jridgewell/sourcemap-codec": { + "version": "1.4.13", + "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.4.13.tgz", + "integrity": "sha512-GryiOJmNcWbovBxTfZSF71V/mXbgcV3MewDe3kIMCLyIh5e7SKAeUZs+rMnJ8jkMolZ/4/VsdBmMrw3l+VdZ3w==", + "devOptional": true + }, + "node_modules/@jridgewell/trace-mapping": { + "version": "0.3.9", + "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.9.tgz", + "integrity": "sha512-3Belt6tdc8bPgAtbcmdtNJlirVoTmEb5e2gC94PnkwEW9jI6CAHUeoG85tjWP5WquqfavoMtMwiG4P926ZKKuQ==", + "devOptional": true, + "dependencies": { + "@jridgewell/resolve-uri": "^3.0.3", + "@jridgewell/sourcemap-codec": "^1.4.10" + } + }, "node_modules/@nodelib/fs.scandir": { "version": "2.1.5", "resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz", @@ -5233,12 +5250,12 @@ "integrity": "sha512-hvE+ZYXuINrx6Ei6D6hz+PTim0Uf++dYbK9FFifLNwQj+RwKquhQpn868yZsCtJYiclZF1u8l6WZxxKi+vv7Rg==" }, "node_modules/ts-node": { - "version": "10.7.0", - "resolved": "https://registry.npmjs.org/ts-node/-/ts-node-10.7.0.tgz", - "integrity": "sha512-TbIGS4xgJoX2i3do417KSaep1uRAW/Lu+WAL2doDHC0D6ummjirVOXU5/7aiZotbQ5p1Zp9tP7U6cYhA0O7M8A==", + "version": "10.8.0", + "resolved": "https://registry.npmjs.org/ts-node/-/ts-node-10.8.0.tgz", + "integrity": "sha512-/fNd5Qh+zTt8Vt1KbYZjRHCE9sI5i7nqfD/dzBBRDeVXZXS6kToW6R7tTU6Nd4XavFs0mAVCg29Q//ML7WsZYA==", "devOptional": true, "dependencies": { - "@cspotcode/source-map-support": "0.7.0", + "@cspotcode/source-map-support": "^0.8.0", "@tsconfig/node10": "^1.0.7", "@tsconfig/node12": "^1.0.7", "@tsconfig/node14": "^1.0.0", @@ -5249,7 +5266,7 @@ "create-require": "^1.1.0", "diff": "^4.0.1", "make-error": "^1.1.1", - "v8-compile-cache-lib": "^3.0.0", + "v8-compile-cache-lib": "^3.0.1", "yn": "3.1.1" }, "bin": { @@ -5567,9 +5584,9 @@ "dev": true }, "node_modules/typescript": { - "version": "4.6.4", - "resolved": "https://registry.npmjs.org/typescript/-/typescript-4.6.4.tgz", - "integrity": "sha512-9ia/jWHIEbo49HfjrLGfKbZSuWo9iTMwXO+Ca3pRsSpbsMbc7/IU8NKdCZVRRBafVPGnoJeFL76ZOAA84I9fEg==", + "version": "4.7.2", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-4.7.2.tgz", + "integrity": "sha512-Mamb1iX2FDUpcTRzltPxgWMKy3fhg0TN378ylbktPGPK/99KbDtMQ4W1hwgsbPAsG3a0xKa1vmw4VKZQbkvz5A==", "devOptional": true, "bin": { "tsc": "bin/tsc", @@ -5654,9 +5671,9 @@ "dev": true }, "node_modules/v8-compile-cache-lib": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/v8-compile-cache-lib/-/v8-compile-cache-lib-3.0.0.tgz", - "integrity": "sha512-mpSYqfsFvASnSn5qMiwrr4VKfumbPyONLCOPmsR3A6pTY/r0+tSaVbgPWSAIuzbk3lCTa+FForeTiO+wBQGkjA==", + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/v8-compile-cache-lib/-/v8-compile-cache-lib-3.0.1.tgz", + "integrity": "sha512-wa7YjyUGfNZngI/vtK0UHAN+lgDCxBPCylVXGp0zu59Fz5aiGtNXaq3DhIov063MorB+VfufLh3JlF2KdTK3xg==", "devOptional": true }, "node_modules/validator": { @@ -5980,19 +5997,13 @@ } }, "dependencies": { - "@cspotcode/source-map-consumer": { - "version": "0.8.0", - "resolved": "https://registry.npmjs.org/@cspotcode/source-map-consumer/-/source-map-consumer-0.8.0.tgz", - "integrity": "sha512-41qniHzTU8yAGbCp04ohlmSrZf8bkf/iJsl3V0dRGsQN/5GFfx+LbCSsCpp2gqrqjTVg/K6O8ycoV35JIwAzAg==", - "devOptional": true - }, "@cspotcode/source-map-support": { - "version": "0.7.0", - "resolved": "https://registry.npmjs.org/@cspotcode/source-map-support/-/source-map-support-0.7.0.tgz", - "integrity": "sha512-X4xqRHqN8ACt2aHVe51OxeA2HjbcL4MqFqXkrmQszJ1NOUuUu5u6Vqx/0lZSVNku7velL5FC/s5uEAj1lsBMhA==", + "version": "0.8.1", + "resolved": "https://registry.npmjs.org/@cspotcode/source-map-support/-/source-map-support-0.8.1.tgz", + "integrity": "sha512-IchNf6dN4tHoMFIn/7OE8LWZ19Y6q/67Bmf6vnGREv8RSbBVb9LPJxEcnwrcwX6ixSvaiGoomAUvu4YSxXrVgw==", "devOptional": true, "requires": { - "@cspotcode/source-map-consumer": "0.8.0" + "@jridgewell/trace-mapping": "0.3.9" } }, "@discordjs/builders": { @@ -6086,6 +6097,28 @@ "integrity": "sha512-ZnQMnLV4e7hDlUvw8H+U8ASL02SS2Gn6+9Ac3wGGLIe7+je2AeAOxPY+izIPJDfFDb7eDjev0Us8MO1iFRN8hA==", "dev": true }, + "@jridgewell/resolve-uri": { + "version": "3.0.7", + "resolved": "https://registry.npmjs.org/@jridgewell/resolve-uri/-/resolve-uri-3.0.7.tgz", + "integrity": "sha512-8cXDaBBHOr2pQ7j77Y6Vp5VDT2sIqWyWQ56TjEq4ih/a4iST3dItRe8Q9fp0rrIl9DoKhWQtUQz/YpOxLkXbNA==", + "devOptional": true + }, + "@jridgewell/sourcemap-codec": { + "version": "1.4.13", + "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.4.13.tgz", + "integrity": "sha512-GryiOJmNcWbovBxTfZSF71V/mXbgcV3MewDe3kIMCLyIh5e7SKAeUZs+rMnJ8jkMolZ/4/VsdBmMrw3l+VdZ3w==", + "devOptional": true + }, + "@jridgewell/trace-mapping": { + "version": "0.3.9", + "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.9.tgz", + "integrity": "sha512-3Belt6tdc8bPgAtbcmdtNJlirVoTmEb5e2gC94PnkwEW9jI6CAHUeoG85tjWP5WquqfavoMtMwiG4P926ZKKuQ==", + "devOptional": true, + "requires": { + "@jridgewell/resolve-uri": "^3.0.3", + "@jridgewell/sourcemap-codec": "^1.4.10" + } + }, "@nodelib/fs.scandir": { "version": "2.1.5", "resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz", @@ -9877,12 +9910,12 @@ "integrity": "sha512-hvE+ZYXuINrx6Ei6D6hz+PTim0Uf++dYbK9FFifLNwQj+RwKquhQpn868yZsCtJYiclZF1u8l6WZxxKi+vv7Rg==" }, "ts-node": { - "version": "10.7.0", - "resolved": "https://registry.npmjs.org/ts-node/-/ts-node-10.7.0.tgz", - "integrity": "sha512-TbIGS4xgJoX2i3do417KSaep1uRAW/Lu+WAL2doDHC0D6ummjirVOXU5/7aiZotbQ5p1Zp9tP7U6cYhA0O7M8A==", + "version": "10.8.0", + "resolved": "https://registry.npmjs.org/ts-node/-/ts-node-10.8.0.tgz", + "integrity": "sha512-/fNd5Qh+zTt8Vt1KbYZjRHCE9sI5i7nqfD/dzBBRDeVXZXS6kToW6R7tTU6Nd4XavFs0mAVCg29Q//ML7WsZYA==", "devOptional": true, "requires": { - "@cspotcode/source-map-support": "0.7.0", + "@cspotcode/source-map-support": "^0.8.0", "@tsconfig/node10": "^1.0.7", "@tsconfig/node12": "^1.0.7", "@tsconfig/node14": "^1.0.0", @@ -9893,7 +9926,7 @@ "create-require": "^1.1.0", "diff": "^4.0.1", "make-error": "^1.1.1", - "v8-compile-cache-lib": "^3.0.0", + "v8-compile-cache-lib": "^3.0.1", "yn": "3.1.1" } }, @@ -10061,9 +10094,9 @@ "dev": true }, "typescript": { - "version": "4.6.4", - "resolved": "https://registry.npmjs.org/typescript/-/typescript-4.6.4.tgz", - "integrity": "sha512-9ia/jWHIEbo49HfjrLGfKbZSuWo9iTMwXO+Ca3pRsSpbsMbc7/IU8NKdCZVRRBafVPGnoJeFL76ZOAA84I9fEg==", + "version": "4.7.2", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-4.7.2.tgz", + "integrity": "sha512-Mamb1iX2FDUpcTRzltPxgWMKy3fhg0TN378ylbktPGPK/99KbDtMQ4W1hwgsbPAsG3a0xKa1vmw4VKZQbkvz5A==", "devOptional": true }, "unbox-primitive": { @@ -10124,9 +10157,9 @@ "dev": true }, "v8-compile-cache-lib": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/v8-compile-cache-lib/-/v8-compile-cache-lib-3.0.0.tgz", - "integrity": "sha512-mpSYqfsFvASnSn5qMiwrr4VKfumbPyONLCOPmsR3A6pTY/r0+tSaVbgPWSAIuzbk3lCTa+FForeTiO+wBQGkjA==", + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/v8-compile-cache-lib/-/v8-compile-cache-lib-3.0.1.tgz", + "integrity": "sha512-wa7YjyUGfNZngI/vtK0UHAN+lgDCxBPCylVXGp0zu59Fz5aiGtNXaq3DhIov063MorB+VfufLh3JlF2KdTK3xg==", "devOptional": true }, "validator": { diff --git a/package.json b/package.json index ed18fdf..a8364b8 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "markov-discord", - "version": "2.1.1", + "version": "2.2.0", "description": "A conversational Markov chain bot for Discord", "main": "dist/index.js", "scripts": { @@ -46,6 +46,7 @@ "fs-extra": "^10.1.0", "json5": "^2.2.1", "markov-strings-db": "^4.2.0", + "node-fetch": "^2.6.7", "pino": "^7.11.0", "pino-pretty": "^7.6.1", "reflect-metadata": "^0.1.13", diff --git a/src/deploy-commands.ts b/src/deploy-commands.ts index 5069d5f..e01e7e4 100644 --- a/src/deploy-commands.ts +++ b/src/deploy-commands.ts @@ -85,6 +85,20 @@ export const trainCommand = new SlashCommandBuilder() .setName('train') .setDescription( 'Train from past messages from the configured listened channels. This takes a while.' + ) + .addBooleanOption((clean) => + clean + .setName('clean') + .setDescription( + 'Whether the database should be emptied before training. Default is true (recommended).' + ) + .setRequired(false) + ) + .addAttachmentOption((json) => + json + .setName('json') + .setDescription('Train from a provided JSON file rather than channel history.') + .setRequired(false) ); const commands = [ diff --git a/src/index.ts b/src/index.ts index d326669..40953bd 100644 --- a/src/index.ts +++ b/src/index.ts @@ -13,6 +13,7 @@ import makeEta from 'simple-eta'; import formatDistanceToNow from 'date-fns/formatDistanceToNow'; import addSeconds from 'date-fns/addSeconds'; import type { APIInteractionGuildMember, APISelectMenuComponent } from 'discord-api-types/v9'; +import fetch from 'node-fetch'; import L from './logger'; import { Channel } from './entity/Channel'; import { Guild } from './entity/Guild'; @@ -253,7 +254,8 @@ function messageToData(message: Discord.Message): AddDataProps { * Recursively gets all messages in a text channel's history. */ async function saveGuildMessageHistory( - interaction: Discord.Message | Discord.CommandInteraction + interaction: Discord.Message | Discord.CommandInteraction, + clean = true ): Promise { if (!isModerator(interaction.member)) return INVALID_PERMISSIONS_MESSAGE; if (!interaction.guildId || !interaction.guild) return INVALID_GUILD_MESSAGE; @@ -265,8 +267,12 @@ async function saveGuildMessageHistory( return 'No channels configured to learn from. Set some with `/listen add`.'; } - L.debug('Deleting old data'); - await markov.delete(); + if (clean) { + L.debug('Deleting old data'); + await markov.delete(); + } else { + L.debug('Not deleting old data during training'); + } const channelIds = channels.map((c) => c.id); L.debug({ channelIds }, `Training from text channels`); @@ -440,6 +446,69 @@ async function saveGuildMessageHistory( return `Trained from ${messagesCount} past human authored messages.`; } +interface JSONImport { + message: string; + attachments?: string[]; +} + +/** + * Train from an attached JSON file + */ +async function trainFromAttachmentJson( + attachment: Discord.MessageAttachment, + interaction: Discord.CommandInteraction, + clean = true +): Promise { + if (!isModerator(interaction.member)) return INVALID_PERMISSIONS_MESSAGE; + if (!interaction.guildId || !interaction.guild) return INVALID_GUILD_MESSAGE; + const { guildId } = interaction; + const markov = await getMarkovByGuildId(guildId); + + let trainingData: AddDataProps[]; + try { + const importAttachmentUrl = attachment.attachment.toString(); + const getResp = await fetch(importAttachmentUrl); + if (!getResp.ok) throw new Error(getResp.statusText); + const importData = (await getResp.json()) as JSONImport[]; + + trainingData = importData.map((datum, index) => { + if (!datum.message) { + throw new Error(`Entry at index ${index} must have a "message"`); + } + if (typeof datum.message !== 'string') { + throw new Error(`Entry at index ${index} must have a "message" with a type of string`); + } + if (datum.attachments?.every((a) => typeof a !== 'string')) { + throw new Error( + `Entry at index ${index} must have all "attachments" each with a type of string` + ); + } + let custom: MarkovDataCustom | undefined; + if (datum.attachments?.length) custom = { attachments: datum.attachments }; + return { + string: datum.message, + custom, + tags: [guildId], + }; + }); + } catch (err) { + L.error(err); + return 'The provided attachment file has invalid formatting. See the logs for details.'; + } + + if (clean) { + L.debug('Deleting old data'); + await markov.delete(); + } else { + L.debug('Not deleting old data during training'); + } + + await markov.addData(trainingData); + + L.info(`Trained from ${trainingData.length} past human authored messages.`); + return `Trained from ${trainingData.length} past human authored messages.`; +} + interface GenerateResponse { message?: AgnosticReplyOptions; debug?: AgnosticReplyOptions; @@ -846,10 +915,18 @@ client.on('interactionCreate', async (interaction) => { } } else if (interaction.commandName === trainCommand.name) { await interaction.deferReply(); - const reply = (await interaction.fetchReply()) as Discord.Message; // Must fetch the reply ASAP - const responseMessage = await saveGuildMessageHistory(interaction); - // Send a message in reply to the reply to avoid the 15 minute webhook token timeout - await reply.reply({ content: responseMessage }); + const clean = interaction.options.getBoolean('clean') ?? true; + const trainingJSON = interaction.options.getAttachment('json'); + + if (trainingJSON) { + const responseMessage = await trainFromAttachmentJson(trainingJSON, interaction, clean); + await interaction.followUp(responseMessage); + } else { + const reply = (await interaction.fetchReply()) as Discord.Message; // Must fetch the reply ASAP + const responseMessage = await saveGuildMessageHistory(interaction, clean); + // Send a message in reply to the reply to avoid the 15 minute webhook token timeout + await reply.reply({ content: responseMessage }); + } } } else if (interaction.isSelectMenu()) { if (interaction.customId === 'listen-modify-select') {