diff --git a/.dockerignore b/.dockerignore old mode 100644 new mode 100755 diff --git a/.eslintrc.js b/.eslintrc.js old mode 100644 new mode 100755 diff --git a/.gitattributes b/.gitattributes old mode 100644 new mode 100755 diff --git a/.github/workflows/build-and-push-image.yml b/.github/workflows/build-and-push-image.yml old mode 100644 new mode 100755 diff --git a/.github/workflows/dockerhub-description.yml b/.github/workflows/dockerhub-description.yml old mode 100644 new mode 100755 diff --git a/.github/workflows/typedoc.yml b/.github/workflows/typedoc.yml old mode 100644 new mode 100755 diff --git a/.gitignore b/.gitignore old mode 100644 new mode 100755 diff --git a/.nvmrc b/.nvmrc old mode 100644 new mode 100755 diff --git a/.prettierrc.js b/.prettierrc.js old mode 100644 new mode 100755 diff --git a/.vscode/launch.json b/.vscode/launch.json old mode 100644 new mode 100755 diff --git a/.vscode/settings.json b/.vscode/settings.json old mode 100644 new mode 100755 diff --git a/CHANGELOG.md b/CHANGELOG.md old mode 100644 new mode 100755 diff --git a/Dockerfile b/Dockerfile old mode 100644 new mode 100755 diff --git a/README.md b/README.md old mode 100644 new mode 100755 diff --git a/example-training.json b/example-training.json old mode 100644 new mode 100755 diff --git a/img/example-training.json b/img/example-training.json old mode 100644 new mode 100755 diff --git a/img/listen.png b/img/listen.png old mode 100644 new mode 100755 diff --git a/img/respond.png b/img/respond.png old mode 100644 new mode 100755 diff --git a/img/train.png b/img/train.png old mode 100644 new mode 100755 diff --git a/imports/discord-parser.py b/imports/discord-parser.py old mode 100644 new mode 100755 diff --git a/package-lock.json b/package-lock.json old mode 100644 new mode 100755 diff --git a/package.json b/package.json old mode 100644 new mode 100755 diff --git a/src/config/classes.ts b/src/config/classes.ts old mode 100644 new mode 100755 diff --git a/src/config/index.ts b/src/config/index.ts old mode 100644 new mode 100755 diff --git a/src/config/setup.ts b/src/config/setup.ts old mode 100644 new mode 100755 diff --git a/src/deploy-commands.ts b/src/deploy-commands.ts old mode 100644 new mode 100755 diff --git a/src/entity/Channel.ts b/src/entity/Channel.ts old mode 100644 new mode 100755 diff --git a/src/entity/Guild.ts b/src/entity/Guild.ts old mode 100644 new mode 100755 diff --git a/src/index.ts b/src/index.ts old mode 100644 new mode 100755 diff --git a/src/logger.ts b/src/logger.ts old mode 100644 new mode 100755 diff --git a/src/migration/1640838214672-CreateTables.ts b/src/migration/1640838214672-CreateTables.ts old mode 100644 new mode 100755 diff --git a/src/ormconfig.ts b/src/ormconfig.ts old mode 100644 new mode 100755 diff --git a/src/subscriber/.gitkeep b/src/subscriber/.gitkeep old mode 100644 new mode 100755 diff --git a/src/train.ts b/src/train.ts old mode 100644 new mode 100755 index 35c8bc4..e5c3b93 --- a/src/train.ts +++ b/src/train.ts @@ -223,6 +223,7 @@ async function trainFromDirectory( guildId: string, dirPath: string, clean = true, + forceRetrain = false, ): Promise { L.debug({ guildId, dirPath, clean }, 'Starting directory training'); const stateManager = new TrainingStateManager(guildId, CONFIG_DIR); @@ -274,6 +275,10 @@ async function trainFromDirectory( for (let i = 0; i < jsonFiles.length; i++) { const jsonPath = path.join(absolutePath, jsonFiles[i]); const fileNumber = i + 1; + // Log progress to console + console.log(`\nProcessing file ${fileNumber}/${jsonFiles.length}: ${jsonFiles[i]}`); + console.log(`${jsonFiles.length - fileNumber} files remaining\n`); + L.debug( { file: jsonFiles[i], progress: `${fileNumber}/${jsonFiles.length}` }, 'Processing file' @@ -288,12 +293,18 @@ async function trainFromDirectory( global.gc?.(); // Optional garbage collection if --expose-gc flag is used } - // Check if we should skip this file (already processed) - if (!clean && stateManager.isChannelProcessed(jsonFiles[i])) { - L.debug({ file: jsonFiles[i] }, 'Skipping already processed file'); + // Check if file was already processed + if (!clean && !forceRetrain && stateManager.isChannelProcessed(jsonFiles[i])) { + console.log(`\nSkipping ${jsonFiles[i]} - already processed`); + console.log(`Use --force-retrain to process this file again`); + console.log(`${jsonFiles.length - fileNumber} files remaining\n`); continue; } + // Log progress to console + console.log(`\nProcessing file ${fileNumber}/${jsonFiles.length}: ${jsonFiles[i]}`); + console.log(`${jsonFiles.length - fileNumber} files remaining\n`); + const result = await trainFromJson( guildId, jsonPath, @@ -359,10 +370,11 @@ async function trainFromDirectory( async function main(): Promise { const args = process.argv.slice(2); if (args.length < 2) { - console.log('Usage: node train.js [--keep-existing] [--directory]'); + console.log('Usage: node train.js [--keep-existing] [--directory] [--force-retrain]'); console.log('Options:'); console.log(' --keep-existing Keep existing training data'); console.log(' --directory Process all JSON files in the specified directory'); + console.log(' --force-retrain Force retraining on files even if already processed'); process.exit(1); } @@ -370,6 +382,7 @@ async function main(): Promise { const inputPath = args[1]; const keepExisting = args.includes('--keep-existing'); const isDirectory = args.includes('--directory'); + const forceRetrain = args.includes('--force-retrain'); const dataSourceOptions = Markov.extendDataSourceOptions(ormconfig); const dataSource = new DataSource(dataSourceOptions); @@ -379,7 +392,7 @@ async function main(): Promise { await Guild.upsert(Guild.create({ id: guildId }), ['id']); const result = isDirectory - ? await trainFromDirectory(guildId, inputPath, !keepExisting) + ? await trainFromDirectory(guildId, inputPath, !keepExisting, forceRetrain) : await trainFromJson(guildId, inputPath, !keepExisting); console.log(result); diff --git a/src/training-state.ts b/src/training-state.ts old mode 100644 new mode 100755 diff --git a/src/types.ts b/src/types.ts old mode 100644 new mode 100755 diff --git a/src/util.ts b/src/util.ts old mode 100644 new mode 100755 diff --git a/tsconfig.json b/tsconfig.json old mode 100644 new mode 100755