mirror of
https://github.com/pacnpal/Roo-Code.git
synced 2025-12-23 13:51:11 -05:00
Add browser_action tool
This commit is contained in:
260
src/services/browser/BrowserSession.ts
Normal file
260
src/services/browser/BrowserSession.ts
Normal file
@@ -0,0 +1,260 @@
|
||||
import * as vscode from "vscode"
|
||||
import * as fs from "fs/promises"
|
||||
import * as path from "path"
|
||||
import { Browser, Page, ScreenshotOptions, TimeoutError, launch } from "puppeteer-core"
|
||||
// @ts-ignore
|
||||
import PCR from "puppeteer-chromium-resolver"
|
||||
import pWaitFor from "p-wait-for"
|
||||
import delay from "delay"
|
||||
import { fileExistsAtPath } from "../../utils/fs"
|
||||
import { BrowserActionResult } from "../../shared/ExtensionMessage"
|
||||
|
||||
interface PCRStats {
|
||||
puppeteer: { launch: typeof launch }
|
||||
executablePath: string
|
||||
}
|
||||
|
||||
export class BrowserSession {
|
||||
private context: vscode.ExtensionContext
|
||||
private browser?: Browser
|
||||
private page?: Page
|
||||
private currentMousePosition?: string
|
||||
|
||||
constructor(context: vscode.ExtensionContext) {
|
||||
this.context = context
|
||||
}
|
||||
|
||||
private async ensureChromiumExists(): Promise<PCRStats> {
|
||||
const globalStoragePath = this.context?.globalStorageUri?.fsPath
|
||||
if (!globalStoragePath) {
|
||||
throw new Error("Global storage uri is invalid")
|
||||
}
|
||||
|
||||
const puppeteerDir = path.join(globalStoragePath, "puppeteer")
|
||||
const dirExists = await fileExistsAtPath(puppeteerDir)
|
||||
if (!dirExists) {
|
||||
await fs.mkdir(puppeteerDir, { recursive: true })
|
||||
}
|
||||
|
||||
// if chromium doesn't exist, this will download it to path.join(puppeteerDir, ".chromium-browser-snapshots")
|
||||
// if it does exist it will return the path to existing chromium
|
||||
const stats: PCRStats = await PCR({
|
||||
downloadPath: puppeteerDir,
|
||||
})
|
||||
|
||||
return stats
|
||||
}
|
||||
|
||||
async launchBrowser() {
|
||||
console.log("launch browser called")
|
||||
if (this.browser) {
|
||||
// throw new Error("Browser already launched")
|
||||
await this.closeBrowser() // this may happen when the model launches a browser again after having used it already before
|
||||
}
|
||||
|
||||
const stats = await this.ensureChromiumExists()
|
||||
this.browser = await stats.puppeteer.launch({
|
||||
args: [
|
||||
"--user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36",
|
||||
],
|
||||
executablePath: stats.executablePath,
|
||||
defaultViewport: {
|
||||
width: 800,
|
||||
height: 600,
|
||||
},
|
||||
headless: false,
|
||||
})
|
||||
// (latest version of puppeteer does not add headless to user agent)
|
||||
this.page = await this.browser?.newPage()
|
||||
}
|
||||
|
||||
async closeBrowser(): Promise<BrowserActionResult> {
|
||||
if (this.browser || this.page) {
|
||||
console.log("closing browser...")
|
||||
await this.browser?.close().catch(() => {})
|
||||
this.browser = undefined
|
||||
this.page = undefined
|
||||
}
|
||||
return {}
|
||||
}
|
||||
|
||||
async doAction(action: (page: Page) => Promise<void>): Promise<BrowserActionResult> {
|
||||
if (!this.page) {
|
||||
throw new Error(
|
||||
"Browser is not launched. This may occur if the browser was automatically closed by a non-`browser_action` tool."
|
||||
)
|
||||
}
|
||||
|
||||
const logs: string[] = []
|
||||
let lastLogTs = Date.now()
|
||||
|
||||
const consoleListener = (msg: any) => {
|
||||
if (msg.type() === "log") {
|
||||
logs.push(msg.text())
|
||||
} else {
|
||||
logs.push(`[${msg.type()}] ${msg.text()}`)
|
||||
}
|
||||
lastLogTs = Date.now()
|
||||
}
|
||||
|
||||
const errorListener = (err: Error) => {
|
||||
logs.push(`[Page Error] ${err.toString()}`)
|
||||
lastLogTs = Date.now()
|
||||
}
|
||||
|
||||
// Add the listeners
|
||||
this.page.on("console", consoleListener)
|
||||
this.page.on("pageerror", errorListener)
|
||||
|
||||
try {
|
||||
await action(this.page)
|
||||
} catch (err) {
|
||||
if (!(err instanceof TimeoutError)) {
|
||||
logs.push(`[Error] ${err.toString()}`)
|
||||
}
|
||||
}
|
||||
|
||||
// Wait for console inactivity, with a timeout
|
||||
await pWaitFor(() => Date.now() - lastLogTs >= 500, {
|
||||
timeout: 3_000,
|
||||
interval: 100,
|
||||
}).catch(() => {})
|
||||
|
||||
let options: ScreenshotOptions = {
|
||||
encoding: "base64",
|
||||
clip: {
|
||||
x: 0,
|
||||
y: 0,
|
||||
width: 800,
|
||||
height: 600,
|
||||
},
|
||||
}
|
||||
|
||||
let screenshotBase64 = await this.page.screenshot({
|
||||
...options,
|
||||
type: "webp",
|
||||
})
|
||||
let screenshot = `data:image/webp;base64,${screenshotBase64}`
|
||||
|
||||
if (!screenshotBase64) {
|
||||
console.log("webp screenshot failed, trying png")
|
||||
screenshotBase64 = await this.page.screenshot({
|
||||
...options,
|
||||
type: "png",
|
||||
})
|
||||
screenshot = `data:image/png;base64,${screenshotBase64}`
|
||||
}
|
||||
|
||||
if (!screenshotBase64) {
|
||||
throw new Error("Failed to take screenshot.")
|
||||
}
|
||||
|
||||
// this.page.removeAllListeners() <- causes the page to crash!
|
||||
this.page.off("console", consoleListener)
|
||||
this.page.off("pageerror", errorListener)
|
||||
|
||||
return {
|
||||
screenshot,
|
||||
logs: logs.join("\n"),
|
||||
currentUrl: this.page.url(),
|
||||
currentMousePosition: this.currentMousePosition,
|
||||
}
|
||||
}
|
||||
|
||||
async navigateToUrl(url: string): Promise<BrowserActionResult> {
|
||||
return this.doAction(async (page) => {
|
||||
// networkidle2 isn't good enough since page may take some time to load. we can assume locally running dev sites will reach networkidle0 in a reasonable amount of time
|
||||
await page.goto(url, { timeout: 7_000, waitUntil: ["domcontentloaded", "networkidle2"] })
|
||||
// await page.goto(url, { timeout: 10_000, waitUntil: "load" })
|
||||
await this.waitTillHTMLStable(page) // in case the page is loading more resources
|
||||
})
|
||||
}
|
||||
|
||||
// page.goto { waitUntil: "networkidle0" } may not ever resolve, and not waiting could return page content too early before js has loaded
|
||||
// https://stackoverflow.com/questions/52497252/puppeteer-wait-until-page-is-completely-loaded/61304202#61304202
|
||||
private async waitTillHTMLStable(page: Page, timeout = 5_000) {
|
||||
const checkDurationMsecs = 500 // 1000
|
||||
const maxChecks = timeout / checkDurationMsecs
|
||||
let lastHTMLSize = 0
|
||||
let checkCounts = 1
|
||||
let countStableSizeIterations = 0
|
||||
const minStableSizeIterations = 3
|
||||
|
||||
while (checkCounts++ <= maxChecks) {
|
||||
let html = await page.content()
|
||||
let currentHTMLSize = html.length
|
||||
|
||||
// let bodyHTMLSize = await page.evaluate(() => document.body.innerHTML.length)
|
||||
console.log("last: ", lastHTMLSize, " <> curr: ", currentHTMLSize)
|
||||
|
||||
if (lastHTMLSize !== 0 && currentHTMLSize === lastHTMLSize) {
|
||||
countStableSizeIterations++
|
||||
} else {
|
||||
countStableSizeIterations = 0 //reset the counter
|
||||
}
|
||||
|
||||
if (countStableSizeIterations >= minStableSizeIterations) {
|
||||
console.log("Page rendered fully...")
|
||||
break
|
||||
}
|
||||
|
||||
lastHTMLSize = currentHTMLSize
|
||||
await delay(checkDurationMsecs)
|
||||
}
|
||||
}
|
||||
|
||||
async click(coordinate: string): Promise<BrowserActionResult> {
|
||||
const [x, y] = coordinate.split(",").map(Number)
|
||||
return this.doAction(async (page) => {
|
||||
// Set up network request monitoring
|
||||
let hasNetworkActivity = false
|
||||
const requestListener = () => {
|
||||
hasNetworkActivity = true
|
||||
}
|
||||
page.on("request", requestListener)
|
||||
|
||||
// Perform the click
|
||||
await page.mouse.click(x, y)
|
||||
this.currentMousePosition = coordinate
|
||||
|
||||
// Small delay to check if click triggered any network activity
|
||||
await delay(100)
|
||||
|
||||
if (hasNetworkActivity) {
|
||||
// If we detected network activity, wait for navigation/loading
|
||||
await page
|
||||
.waitForNavigation({
|
||||
waitUntil: ["domcontentloaded", "networkidle2"],
|
||||
timeout: 7000,
|
||||
})
|
||||
.catch(() => {})
|
||||
await this.waitTillHTMLStable(page)
|
||||
}
|
||||
|
||||
// Clean up listener
|
||||
page.off("request", requestListener)
|
||||
})
|
||||
}
|
||||
|
||||
async type(text: string): Promise<BrowserActionResult> {
|
||||
return this.doAction(async (page) => {
|
||||
await page.keyboard.type(text)
|
||||
})
|
||||
}
|
||||
|
||||
async scrollDown(): Promise<BrowserActionResult> {
|
||||
return this.doAction(async (page) => {
|
||||
await page.evaluate(() => {
|
||||
window.scrollBy(0, window.innerHeight)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
async scrollUp(): Promise<BrowserActionResult> {
|
||||
return this.doAction(async (page) => {
|
||||
await page.evaluate(() => {
|
||||
window.scrollBy(0, -window.innerHeight)
|
||||
})
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -1,13 +1,11 @@
|
||||
import * as vscode from "vscode"
|
||||
import * as fs from "fs/promises"
|
||||
import * as path from "path"
|
||||
import { Browser, Page, ScreenshotOptions, TimeoutError, launch } from "puppeteer-core"
|
||||
import { Browser, Page, launch } from "puppeteer-core"
|
||||
import * as cheerio from "cheerio"
|
||||
import TurndownService from "turndown"
|
||||
// @ts-ignore
|
||||
import PCR from "puppeteer-chromium-resolver"
|
||||
import pWaitFor from "p-wait-for"
|
||||
import delay from "delay"
|
||||
import { fileExistsAtPath } from "../../utils/fs"
|
||||
|
||||
interface PCRStats {
|
||||
@@ -29,19 +27,16 @@ export class UrlContentFetcher {
|
||||
if (!globalStoragePath) {
|
||||
throw new Error("Global storage uri is invalid")
|
||||
}
|
||||
|
||||
const puppeteerDir = path.join(globalStoragePath, "puppeteer")
|
||||
const dirExists = await fileExistsAtPath(puppeteerDir)
|
||||
if (!dirExists) {
|
||||
await fs.mkdir(puppeteerDir, { recursive: true })
|
||||
}
|
||||
|
||||
// if chromium doesn't exist, this will download it to path.join(puppeteerDir, ".chromium-browser-snapshots")
|
||||
// if it does exist it will return the path to existing chromium
|
||||
const stats: PCRStats = await PCR({
|
||||
downloadPath: puppeteerDir,
|
||||
})
|
||||
|
||||
return stats
|
||||
}
|
||||
|
||||
@@ -89,133 +84,4 @@ export class UrlContentFetcher {
|
||||
|
||||
return markdown
|
||||
}
|
||||
|
||||
async urlToScreenshotAndLogs(url: string): Promise<{ screenshot: string; logs: string }> {
|
||||
if (!this.browser || !this.page) {
|
||||
throw new Error("Browser not initialized")
|
||||
}
|
||||
|
||||
const logs: string[] = []
|
||||
let lastLogTs = Date.now()
|
||||
|
||||
this.page.on("console", (msg) => {
|
||||
if (msg.type() === "log") {
|
||||
logs.push(msg.text())
|
||||
} else {
|
||||
logs.push(`[${msg.type()}] ${msg.text()}`)
|
||||
}
|
||||
lastLogTs = Date.now()
|
||||
})
|
||||
this.page.on("pageerror", (err) => {
|
||||
logs.push(`[Page Error] ${err.toString()}`)
|
||||
lastLogTs = Date.now()
|
||||
})
|
||||
|
||||
try {
|
||||
// networkidle2 isn't good enough since page may take some time to load. we can assume locally running dev sites will reach networkidle0 in a reasonable amount of time
|
||||
await this.page.goto(url, { timeout: 7_000, waitUntil: ["domcontentloaded", "networkidle2"] })
|
||||
// await this.page.goto(url, { timeout: 10_000, waitUntil: "load" })
|
||||
await this.waitTillHTMLStable(this.page) // in case the page is loading more resources
|
||||
} catch (err) {
|
||||
if (!(err instanceof TimeoutError)) {
|
||||
logs.push(`[Navigation Error] ${err.toString()}`)
|
||||
}
|
||||
}
|
||||
|
||||
// Wait for console inactivity, with a timeout
|
||||
await pWaitFor(() => Date.now() - lastLogTs >= 500, {
|
||||
timeout: 3_000,
|
||||
interval: 100,
|
||||
}).catch(() => {})
|
||||
|
||||
// image cannot exceed 8_000 pixels
|
||||
const { pageHeight, pageWidth } = await this.page.evaluate(() => {
|
||||
const html: HTMLElement | null = document.documentElement
|
||||
const body: HTMLElement | null = document.body
|
||||
return {
|
||||
pageHeight: html?.scrollHeight || body?.scrollHeight,
|
||||
pageWidth: html?.clientWidth || body?.clientWidth,
|
||||
}
|
||||
})
|
||||
// const defaultViewport = this.page.viewport(); // width 800 height 600 by default
|
||||
let options: ScreenshotOptions
|
||||
if (pageHeight && pageWidth) {
|
||||
options = {
|
||||
// fullPage: true, // clip and fullPage are mutually exclusive
|
||||
encoding: "base64",
|
||||
// quality: 80,
|
||||
clip: {
|
||||
x: 0,
|
||||
y: 0,
|
||||
width: pageWidth,
|
||||
height: Math.min(pageHeight, 8_000),
|
||||
},
|
||||
}
|
||||
} else {
|
||||
// if we can't get the page dimensions, fallback to full page screenshot
|
||||
options = {
|
||||
encoding: "base64",
|
||||
fullPage: true,
|
||||
}
|
||||
}
|
||||
|
||||
let screenshotBase64 = await this.page.screenshot({
|
||||
...options,
|
||||
type: "webp",
|
||||
})
|
||||
let screenshot = `data:image/webp;base64,${screenshotBase64}`
|
||||
|
||||
if (!screenshotBase64) {
|
||||
console.log("webp screenshot failed, trying png")
|
||||
screenshotBase64 = await this.page.screenshot({
|
||||
...options,
|
||||
type: "png",
|
||||
})
|
||||
screenshot = `data:image/png;base64,${screenshotBase64}`
|
||||
}
|
||||
|
||||
if (!screenshotBase64) {
|
||||
throw new Error("Failed to take screenshot.")
|
||||
}
|
||||
|
||||
this.page.removeAllListeners()
|
||||
|
||||
return {
|
||||
screenshot,
|
||||
logs: logs.join("\n"),
|
||||
}
|
||||
}
|
||||
|
||||
// page.goto { waitUntil: "networkidle0" } may not ever resolve, and not waiting could return page content too early before js has loaded
|
||||
// https://stackoverflow.com/questions/52497252/puppeteer-wait-until-page-is-completely-loaded/61304202#61304202
|
||||
private async waitTillHTMLStable(page: Page, timeout = 5_000) {
|
||||
const checkDurationMsecs = 500 // 1000
|
||||
const maxChecks = timeout / checkDurationMsecs
|
||||
let lastHTMLSize = 0
|
||||
let checkCounts = 1
|
||||
let countStableSizeIterations = 0
|
||||
const minStableSizeIterations = 3
|
||||
|
||||
while (checkCounts++ <= maxChecks) {
|
||||
let html = await page.content()
|
||||
let currentHTMLSize = html.length
|
||||
|
||||
// let bodyHTMLSize = await page.evaluate(() => document.body.innerHTML.length)
|
||||
console.log("last: ", lastHTMLSize, " <> curr: ", currentHTMLSize)
|
||||
|
||||
if (lastHTMLSize !== 0 && currentHTMLSize === lastHTMLSize) {
|
||||
countStableSizeIterations++
|
||||
} else {
|
||||
countStableSizeIterations = 0 //reset the counter
|
||||
}
|
||||
|
||||
if (countStableSizeIterations >= minStableSizeIterations) {
|
||||
console.log("Page rendered fully...")
|
||||
break
|
||||
}
|
||||
|
||||
lastHTMLSize = currentHTMLSize
|
||||
await delay(checkDurationMsecs)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user