mirror of
https://github.com/pacnpal/Roo-Code.git
synced 2025-12-20 04:11:10 -05:00
126 lines
4.3 KiB
TypeScript
126 lines
4.3 KiB
TypeScript
import * as path from "path"
|
|
// @ts-ignore-next-line
|
|
import pdf from "pdf-parse/lib/pdf-parse"
|
|
import mammoth from "mammoth"
|
|
import fs from "fs/promises"
|
|
import { isBinaryFile } from "isbinaryfile"
|
|
|
|
export async function extractTextFromFile(filePath: string): Promise<string> {
|
|
try {
|
|
await fs.access(filePath)
|
|
} catch (error) {
|
|
throw new Error(`File not found: ${filePath}`)
|
|
}
|
|
const fileExtension = path.extname(filePath).toLowerCase()
|
|
switch (fileExtension) {
|
|
case ".pdf":
|
|
return extractTextFromPDF(filePath)
|
|
case ".docx":
|
|
return extractTextFromDOCX(filePath)
|
|
case ".ipynb":
|
|
return extractTextFromIPYNB(filePath)
|
|
default:
|
|
const isBinary = await isBinaryFile(filePath).catch(() => false)
|
|
if (!isBinary) {
|
|
return addLineNumbers(await fs.readFile(filePath, "utf8"))
|
|
} else {
|
|
throw new Error(`Cannot read text for file type: ${fileExtension}`)
|
|
}
|
|
}
|
|
}
|
|
|
|
async function extractTextFromPDF(filePath: string): Promise<string> {
|
|
const dataBuffer = await fs.readFile(filePath)
|
|
const data = await pdf(dataBuffer)
|
|
return addLineNumbers(data.text)
|
|
}
|
|
|
|
async function extractTextFromDOCX(filePath: string): Promise<string> {
|
|
const result = await mammoth.extractRawText({ path: filePath })
|
|
return addLineNumbers(result.value)
|
|
}
|
|
|
|
async function extractTextFromIPYNB(filePath: string): Promise<string> {
|
|
const data = await fs.readFile(filePath, "utf8")
|
|
const notebook = JSON.parse(data)
|
|
let extractedText = ""
|
|
|
|
for (const cell of notebook.cells) {
|
|
if ((cell.cell_type === "markdown" || cell.cell_type === "code") && cell.source) {
|
|
extractedText += cell.source.join("\n") + "\n"
|
|
}
|
|
}
|
|
|
|
return addLineNumbers(extractedText)
|
|
}
|
|
|
|
export function addLineNumbers(content: string, startLine: number = 1): string {
|
|
const lines = content.split("\n")
|
|
const maxLineNumberWidth = String(startLine + lines.length - 1).length
|
|
return lines
|
|
.map((line, index) => {
|
|
const lineNumber = String(startLine + index).padStart(maxLineNumberWidth, " ")
|
|
return `${lineNumber} | ${line}`
|
|
})
|
|
.join("\n")
|
|
}
|
|
// Checks if every line in the content has line numbers prefixed (e.g., "1 | content" or "123 | content")
|
|
// Line numbers must be followed by a single pipe character (not double pipes)
|
|
export function everyLineHasLineNumbers(content: string): boolean {
|
|
const lines = content.split(/\r?\n/)
|
|
return lines.length > 0 && lines.every((line) => /^\s*\d+\s+\|(?!\|)/.test(line))
|
|
}
|
|
|
|
// Strips line numbers from content while preserving the actual content
|
|
// Handles formats like "1 | content", " 12 | content", "123 | content"
|
|
// Preserves content that naturally starts with pipe characters
|
|
export function stripLineNumbers(content: string): string {
|
|
// Split into lines to handle each line individually
|
|
const lines = content.split(/\r?\n/)
|
|
|
|
// Process each line
|
|
const processedLines = lines.map((line) => {
|
|
// Match line number pattern and capture everything after the pipe
|
|
const match = line.match(/^\s*\d+\s+\|(?!\|)\s?(.*)$/)
|
|
return match ? match[1] : line
|
|
})
|
|
|
|
// Join back with original line endings
|
|
const lineEnding = content.includes("\r\n") ? "\r\n" : "\n"
|
|
return processedLines.join(lineEnding)
|
|
}
|
|
|
|
/**
|
|
* Truncates multi-line output while preserving context from both the beginning and end.
|
|
* When truncation is needed, it keeps 20% of the lines from the start and 80% from the end,
|
|
* with a clear indicator of how many lines were omitted in between.
|
|
*
|
|
* @param content The multi-line string to truncate
|
|
* @param lineLimit Optional maximum number of lines to keep. If not provided or 0, returns the original content
|
|
* @returns The truncated string with an indicator of omitted lines, or the original content if no truncation needed
|
|
*
|
|
* @example
|
|
* // With 10 line limit on 25 lines of content:
|
|
* // - Keeps first 2 lines (20% of 10)
|
|
* // - Keeps last 8 lines (80% of 10)
|
|
* // - Adds "[...15 lines omitted...]" in between
|
|
*/
|
|
export function truncateOutput(content: string, lineLimit?: number): string {
|
|
if (!lineLimit) {
|
|
return content
|
|
}
|
|
|
|
const lines = content.split("\n")
|
|
if (lines.length <= lineLimit) {
|
|
return content
|
|
}
|
|
|
|
const beforeLimit = Math.floor(lineLimit * 0.2) // 20% of lines before
|
|
const afterLimit = lineLimit - beforeLimit // remaining 80% after
|
|
return [
|
|
...lines.slice(0, beforeLimit),
|
|
`\n[...${lines.length - lineLimit} lines omitted...]\n`,
|
|
...lines.slice(-afterLimit),
|
|
].join("\n")
|
|
}
|