This commit is contained in:
Saoud Rizwan
2024-10-27 18:33:20 -04:00
parent 4ae190f70e
commit bca22a8aa1
2 changed files with 22 additions and 12 deletions

View File

@@ -100,9 +100,9 @@ Usage:
? `
## browser_action
Description: Request to interact with a Puppeteer-controlled browser. Every action except \`close\` will be responded to with a screenshot of the browser's current state, along with any new console logs. You may only perform one browser action at a time, as you should assess the screenshot and logs to determine the next action.
Description: Request to interact with a Puppeteer-controlled browser. Every action, except \`close\`, will be responded to with a screenshot of the browser's current state, along with any new console logs. You may only perform one browser action per message, and wait for the user's response including a screenshot and logs to determine the next action.
- The sequence of actions **must always start with** launching the browser at a URL, and **must always end with** closing the browser. If you need to visit a new URL that is not possible to navigate to from the current webpage, you must first close the browser, then launch again at the new URL.
- While the browser is active, only the \`browser_action\` tool can be used. No other tools should be called during this time. You may proceed to use other tools only after closing the browser.
- While the browser is active, only the \`browser_action\` tool can be used. No other tools should be called during this time. You may proceed to use other tools only after closing the browser. For example if you run into an error and need to fix a file, you must close the browser, then use other tools to make the necessary changes, then re-launch the browser to verify the result.
- The browser window has a resolution of **900x600** pixels. When performing any click actions, ensure the coordinates are within this resolution range.
- Before clicking on any elements such as icons, links, or buttons, you must consult the provided screenshot of the page to determine the coordinates of the element. The click should be targeted at the **center of the element**, not on its edges.
Parameters:

View File

@@ -74,6 +74,7 @@ export class BrowserSession {
await this.browser?.close().catch(() => {})
this.browser = undefined
this.page = undefined
this.currentMousePosition = undefined
}
return {}
}
@@ -122,12 +123,13 @@ export class BrowserSession {
let options: ScreenshotOptions = {
encoding: "base64",
clip: {
x: 0,
y: 0,
width: 900,
height: 600,
},
// clip: {
// x: 0,
// y: 0,
// width: 900,
// height: 600,
// },
}
let screenshotBase64 = await this.page.screenshot({
@@ -172,8 +174,8 @@ export class BrowserSession {
// page.goto { waitUntil: "networkidle0" } may not ever resolve, and not waiting could return page content too early before js has loaded
// https://stackoverflow.com/questions/52497252/puppeteer-wait-until-page-is-completely-loaded/61304202#61304202
private async waitTillHTMLStable(page: Page, timeout = 4_000) {
const checkDurationMsecs = 400 // 1000
private async waitTillHTMLStable(page: Page, timeout = 5_000) {
const checkDurationMsecs = 500 // 1000
const maxChecks = timeout / checkDurationMsecs
let lastHTMLSize = 0
let checkCounts = 1
@@ -245,16 +247,24 @@ export class BrowserSession {
async scrollDown(): Promise<BrowserActionResult> {
return this.doAction(async (page) => {
await page.evaluate(() => {
window.scrollBy(0, window.innerHeight)
window.scrollBy({
top: 600,
behavior: "auto",
})
})
await delay(300)
})
}
async scrollUp(): Promise<BrowserActionResult> {
return this.doAction(async (page) => {
await page.evaluate(() => {
window.scrollBy(0, -window.innerHeight)
window.scrollBy({
top: -600,
behavior: "auto",
})
})
await delay(300)
})
}
}