use isbinaryfile to more safely read contents of folders and files

This commit is contained in:
Saoud Rizwan
2024-09-18 22:32:38 -04:00
parent 554da736d4
commit b7617e5f2a
4 changed files with 43 additions and 23 deletions

View File

@@ -3,6 +3,7 @@ import * as path from "path"
import pdf from "pdf-parse/lib/pdf-parse"
import mammoth from "mammoth"
import fs from "fs/promises"
import { isBinaryFile } from "isbinaryfile"
export async function extractTextFromFile(filePath: string): Promise<string> {
try {
@@ -18,19 +19,13 @@ export async function extractTextFromFile(filePath: string): Promise<string> {
return extractTextFromDOCX(filePath)
case ".ipynb":
return extractTextFromIPYNB(filePath)
case ".jpg":
case ".jpeg":
case ".png":
case ".gif":
case ".webp":
case ".mp4":
case ".mp3":
case ".wav":
case ".avi":
case ".mov":
return "Cannot read media file."
default:
return await fs.readFile(filePath, "utf8")
const isBinary = await isBinaryFile(filePath).catch(() => false)
if (!isBinary) {
return await fs.readFile(filePath, "utf8")
} else {
throw new Error(`Cannot read text for file type: ${fileExtension}`)
}
}
}