Add support for reading .ipynb

This commit is contained in:
Saoud Rizwan
2024-09-02 04:24:07 -04:00
parent 66542f94fd
commit 5609395780

View File

@@ -17,6 +17,8 @@ export async function extractTextFromFile(filePath: string): Promise<string> {
return extractTextFromPDF(filePath)
case ".docx":
return extractTextFromDOCX(filePath)
case ".ipynb":
return extractTextFromIPYNB(filePath)
default:
const isBinary = await isBinaryFile(filePath)
if (!isBinary) {
@@ -37,3 +39,17 @@ async function extractTextFromDOCX(filePath: string): Promise<string> {
const result = await mammoth.extractRawText({ path: filePath })
return result.value
}
async function extractTextFromIPYNB(filePath: string): Promise<string> {
const data = await fs.readFile(filePath, "utf8")
const notebook = JSON.parse(data)
let extractedText = ""
for (const cell of notebook.cells) {
if ((cell.cell_type === "markdown" || cell.cell_type === "code") && cell.source) {
extractedText += cell.source.join("\n") + "\n"
}
}
return extractedText
}