mirror of
https://github.com/pacnpal/Roo-Code.git
synced 2025-12-23 05:41:10 -05:00
Add support for reading PDF and docx files
This commit is contained in:
@@ -27,6 +27,7 @@ import { ClaudeAskResponse } from "./shared/WebviewMessage"
|
||||
import { findLast, findLastIndex } from "./utils"
|
||||
import { truncateHalfConversation } from "./utils/context-management"
|
||||
import { regexSearchFiles } from "./utils/ripgrep"
|
||||
import { extractTextFromFile } from "./utils/extract-text"
|
||||
|
||||
const SYSTEM_PROMPT =
|
||||
() => `You are Claude Dev, a highly skilled software developer with extensive knowledge in many programming languages, frameworks, design patterns, and best practices.
|
||||
@@ -924,7 +925,7 @@ export class ClaudeDev {
|
||||
}
|
||||
try {
|
||||
const absolutePath = path.resolve(cwd, relPath)
|
||||
const content = await fs.readFile(absolutePath, "utf-8")
|
||||
const content = await extractTextFromFile(absolutePath)
|
||||
|
||||
const message = JSON.stringify({
|
||||
tool: "readFile",
|
||||
|
||||
38
src/utils/extract-text.ts
Normal file
38
src/utils/extract-text.ts
Normal file
@@ -0,0 +1,38 @@
|
||||
import * as path from "path"
|
||||
import pdf from "pdf-parse"
|
||||
import mammoth from "mammoth"
|
||||
import { isBinaryFile } from "isbinaryfile"
|
||||
import fs from "fs/promises"
|
||||
|
||||
export async function extractTextFromFile(filePath: string): Promise<string> {
|
||||
try {
|
||||
await fs.access(filePath)
|
||||
} catch (error) {
|
||||
throw new Error(`File not found: ${filePath}`)
|
||||
}
|
||||
const fileExtension = path.extname(filePath).toLowerCase()
|
||||
switch (fileExtension) {
|
||||
case ".pdf":
|
||||
return extractTextFromPDF(filePath)
|
||||
case ".docx":
|
||||
return extractTextFromDOCX(filePath)
|
||||
default:
|
||||
const isBinary = await isBinaryFile(filePath)
|
||||
if (!isBinary) {
|
||||
return await fs.readFile(filePath, "utf8")
|
||||
} else {
|
||||
throw new Error(`Unsupported file type: ${fileExtension}`)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async function extractTextFromPDF(filePath: string): Promise<string> {
|
||||
const dataBuffer = await fs.readFile(filePath)
|
||||
const data = await pdf(dataBuffer)
|
||||
return data.text
|
||||
}
|
||||
|
||||
async function extractTextFromDOCX(filePath: string): Promise<string> {
|
||||
const result = await mammoth.extractRawText({ path: filePath })
|
||||
return result.value
|
||||
}
|
||||
Reference in New Issue
Block a user