Update file permissions for multiple project files and add force retrain option in training script

This commit is contained in:
pacnpal
2025-09-25 10:04:34 -04:00
parent 1fdd8005f8
commit 239ded1669
38 changed files with 18 additions and 5 deletions

0
.dockerignore Normal file → Executable file
View File

0
.eslintrc.js Normal file → Executable file
View File

0
.gitattributes vendored Normal file → Executable file
View File

0
.github/workflows/build-and-push-image.yml vendored Normal file → Executable file
View File

0
.github/workflows/dockerhub-description.yml vendored Normal file → Executable file
View File

0
.github/workflows/typedoc.yml vendored Normal file → Executable file
View File

0
.gitignore vendored Normal file → Executable file
View File

0
.nvmrc Normal file → Executable file
View File

0
.prettierrc.js Normal file → Executable file
View File

0
.vscode/launch.json vendored Normal file → Executable file
View File

0
.vscode/settings.json vendored Normal file → Executable file
View File

0
CHANGELOG.md Normal file → Executable file
View File

0
Dockerfile Normal file → Executable file
View File

0
README.md Normal file → Executable file
View File

0
example-training.json Normal file → Executable file
View File

0
img/example-training.json Normal file → Executable file
View File

0
img/listen.png Normal file → Executable file
View File

Before

Width:  |  Height:  |  Size: 46 KiB

After

Width:  |  Height:  |  Size: 46 KiB

0
img/respond.png Normal file → Executable file
View File

Before

Width:  |  Height:  |  Size: 32 KiB

After

Width:  |  Height:  |  Size: 32 KiB

0
img/train.png Normal file → Executable file
View File

Before

Width:  |  Height:  |  Size: 47 KiB

After

Width:  |  Height:  |  Size: 47 KiB

0
imports/discord-parser.py Normal file → Executable file
View File

0
package-lock.json generated Normal file → Executable file
View File

0
package.json Normal file → Executable file
View File

0
src/config/classes.ts Normal file → Executable file
View File

0
src/config/index.ts Normal file → Executable file
View File

0
src/config/setup.ts Normal file → Executable file
View File

0
src/deploy-commands.ts Normal file → Executable file
View File

0
src/entity/Channel.ts Normal file → Executable file
View File

0
src/entity/Guild.ts Normal file → Executable file
View File

0
src/index.ts Normal file → Executable file
View File

0
src/logger.ts Normal file → Executable file
View File

0
src/migration/1640838214672-CreateTables.ts Normal file → Executable file
View File

0
src/ormconfig.ts Normal file → Executable file
View File

0
src/subscriber/.gitkeep Normal file → Executable file
View File

23
src/train.ts Normal file → Executable file
View File

@@ -223,6 +223,7 @@ async function trainFromDirectory(
guildId: string,
dirPath: string,
clean = true,
forceRetrain = false,
): Promise<string> {
L.debug({ guildId, dirPath, clean }, 'Starting directory training');
const stateManager = new TrainingStateManager(guildId, CONFIG_DIR);
@@ -274,6 +275,10 @@ async function trainFromDirectory(
for (let i = 0; i < jsonFiles.length; i++) {
const jsonPath = path.join(absolutePath, jsonFiles[i]);
const fileNumber = i + 1;
// Log progress to console
console.log(`\nProcessing file ${fileNumber}/${jsonFiles.length}: ${jsonFiles[i]}`);
console.log(`${jsonFiles.length - fileNumber} files remaining\n`);
L.debug(
{ file: jsonFiles[i], progress: `${fileNumber}/${jsonFiles.length}` },
'Processing file'
@@ -288,12 +293,18 @@ async function trainFromDirectory(
global.gc?.(); // Optional garbage collection if --expose-gc flag is used
}
// Check if we should skip this file (already processed)
if (!clean && stateManager.isChannelProcessed(jsonFiles[i])) {
L.debug({ file: jsonFiles[i] }, 'Skipping already processed file');
// Check if file was already processed
if (!clean && !forceRetrain && stateManager.isChannelProcessed(jsonFiles[i])) {
console.log(`\nSkipping ${jsonFiles[i]} - already processed`);
console.log(`Use --force-retrain to process this file again`);
console.log(`${jsonFiles.length - fileNumber} files remaining\n`);
continue;
}
// Log progress to console
console.log(`\nProcessing file ${fileNumber}/${jsonFiles.length}: ${jsonFiles[i]}`);
console.log(`${jsonFiles.length - fileNumber} files remaining\n`);
const result = await trainFromJson(
guildId,
jsonPath,
@@ -359,10 +370,11 @@ async function trainFromDirectory(
async function main(): Promise<void> {
const args = process.argv.slice(2);
if (args.length < 2) {
console.log('Usage: node train.js <guildId> <path> [--keep-existing] [--directory]');
console.log('Usage: node train.js <guildId> <path> [--keep-existing] [--directory] [--force-retrain]');
console.log('Options:');
console.log(' --keep-existing Keep existing training data');
console.log(' --directory Process all JSON files in the specified directory');
console.log(' --force-retrain Force retraining on files even if already processed');
process.exit(1);
}
@@ -370,6 +382,7 @@ async function main(): Promise<void> {
const inputPath = args[1];
const keepExisting = args.includes('--keep-existing');
const isDirectory = args.includes('--directory');
const forceRetrain = args.includes('--force-retrain');
const dataSourceOptions = Markov.extendDataSourceOptions(ormconfig);
const dataSource = new DataSource(dataSourceOptions);
@@ -379,7 +392,7 @@ async function main(): Promise<void> {
await Guild.upsert(Guild.create({ id: guildId }), ['id']);
const result = isDirectory
? await trainFromDirectory(guildId, inputPath, !keepExisting)
? await trainFromDirectory(guildId, inputPath, !keepExisting, forceRetrain)
: await trainFromJson(guildId, inputPath, !keepExisting);
console.log(result);

0
src/training-state.ts Normal file → Executable file
View File

0
src/types.ts Normal file → Executable file
View File

0
src/util.ts Normal file → Executable file
View File

0
tsconfig.json Normal file → Executable file
View File