Toggle to switch browser size to 1280x800

This commit is contained in:
Matt Rubens
2024-12-17 12:43:06 -05:00
parent eab4a80981
commit 6a6084b7ca
11 changed files with 62 additions and 21 deletions

View File

@@ -0,0 +1,5 @@
---
"roo-cline": patch
---
Add experimental option to use a bigger browser (1280x800)

View File

@@ -10,6 +10,7 @@ A fork of Cline, an autonomous coding agent, tweaked for more speed and flexibil
- `.clinerules` for project-specific instructions
- Drag and drop images into chats
- Sound effects for feedback
- Option to use a larger 1280x800 browser
- Quick prompt copying from history
- OpenRouter compression support
- Support for newer Gemini models (gemini-exp-1206, gemini-2.0-flash-exp) and Meta 3, 3.1, and 3.2 models via AWS Bedrock

View File

@@ -766,7 +766,8 @@ export class Cline {
throw new Error("MCP hub not available")
}
const systemPrompt = await SYSTEM_PROMPT(cwd, this.api.getModel().info.supportsComputerUse ?? false, mcpHub, this.diffStrategy) + await addCustomInstructions(this.customInstructions ?? '', cwd)
const { browserLargeViewport } = await this.providerRef.deref()?.getState() ?? {}
const systemPrompt = await SYSTEM_PROMPT(cwd, this.api.getModel().info.supportsComputerUse ?? false, mcpHub, this.diffStrategy, browserLargeViewport) + await addCustomInstructions(this.customInstructions ?? '', cwd)
// If the previous API request's total token usage is close to the context window, truncate the conversation history to free up space for the new request
if (previousApiReqIndex >= 0) {

View File

@@ -10,7 +10,8 @@ export const SYSTEM_PROMPT = async (
cwd: string,
supportsComputerUse: boolean,
mcpHub: McpHub,
diffStrategy?: DiffStrategy
diffStrategy?: DiffStrategy,
browserLargeViewport?: boolean
) => `You are Cline, a highly skilled software engineer with extensive knowledge in many programming languages, frameworks, design patterns, and best practices.
====
@@ -111,7 +112,7 @@ Usage:
Description: Request to interact with a Puppeteer-controlled browser. Every action, except \`close\`, will be responded to with a screenshot of the browser's current state, along with any new console logs. You may only perform one browser action per message, and wait for the user's response including a screenshot and logs to determine the next action.
- The sequence of actions **must always start with** launching the browser at a URL, and **must always end with** closing the browser. If you need to visit a new URL that is not possible to navigate to from the current webpage, you must first close the browser, then launch again at the new URL.
- While the browser is active, only the \`browser_action\` tool can be used. No other tools should be called during this time. You may proceed to use other tools only after closing the browser. For example if you run into an error and need to fix a file, you must close the browser, then use other tools to make the necessary changes, then re-launch the browser to verify the result.
- The browser window has a resolution of **900x600** pixels. When performing any click actions, ensure the coordinates are within this resolution range.
- The browser window has a resolution of **${browserLargeViewport ? "1280x800" : "900x600"}** pixels. When performing any click actions, ensure the coordinates are within this resolution range.
- Before clicking on any elements such as icons, links, or buttons, you must consult the provided screenshot of the page to determine the coordinates of the element. The click should be targeted at the **center of the element**, not on its edges.
Parameters:
- action: (required) The action to perform. The available actions are:
@@ -129,7 +130,7 @@ Parameters:
- Example: \`<action>close</action>\`
- url: (optional) Use this for providing the URL for the \`launch\` action.
* Example: <url>https://example.com</url>
- coordinate: (optional) The X and Y coordinates for the \`click\` action. Coordinates should be within the **900x600** resolution.
- coordinate: (optional) The X and Y coordinates for the \`click\` action. Coordinates should be within the **${browserLargeViewport ? "1280x800" : "900x600"}** resolution.
* Example: <coordinate>450,300</coordinate>
- text: (optional) Use this for providing the text for the \`type\` action.
* Example: <text>Hello, world!</text>

View File

@@ -69,6 +69,7 @@ type GlobalStateKey =
| "soundVolume"
| "diffEnabled"
| "alwaysAllowMcp"
| "browserLargeViewport"
export const GlobalFileNames = {
apiConversationHistory: "api_conversation_history.json",
@@ -584,8 +585,6 @@ export class ClineProvider implements vscode.WebviewViewProvider {
}
break
}
// Add more switch case statements here as more webview message commands
// are created within the webview context (i.e. inside media/main.js)
case "playSound":
if (message.audioType) {
const soundPath = path.join(this.context.extensionPath, "audio", `${message.audioType}.wav`)
@@ -609,6 +608,11 @@ export class ClineProvider implements vscode.WebviewViewProvider {
await this.updateGlobalState("diffEnabled", diffEnabled)
await this.postStateToWebview()
break
case "browserLargeViewport":
const browserLargeViewport = message.bool ?? false
await this.updateGlobalState("browserLargeViewport", browserLargeViewport)
await this.postStateToWebview()
break
}
},
null,
@@ -937,6 +941,7 @@ export class ClineProvider implements vscode.WebviewViewProvider {
diffEnabled,
taskHistory,
soundVolume,
browserLargeViewport,
} = await this.getState()
const allowedCommands = vscode.workspace
@@ -962,6 +967,7 @@ export class ClineProvider implements vscode.WebviewViewProvider {
shouldShowAnnouncement: lastShownAnnouncementId !== this.latestAnnouncementId,
allowedCommands,
soundVolume: soundVolume ?? 0.5,
browserLargeViewport: browserLargeViewport ?? false,
}
}
@@ -1055,6 +1061,7 @@ export class ClineProvider implements vscode.WebviewViewProvider {
soundEnabled,
diffEnabled,
soundVolume,
browserLargeViewport,
] = await Promise.all([
this.getGlobalState("apiProvider") as Promise<ApiProvider | undefined>,
this.getGlobalState("apiModelId") as Promise<string | undefined>,
@@ -1093,6 +1100,7 @@ export class ClineProvider implements vscode.WebviewViewProvider {
this.getGlobalState("soundEnabled") as Promise<boolean | undefined>,
this.getGlobalState("diffEnabled") as Promise<boolean | undefined>,
this.getGlobalState("soundVolume") as Promise<number | undefined>,
this.getGlobalState("browserLargeViewport") as Promise<boolean | undefined>,
])
let apiProvider: ApiProvider
@@ -1149,6 +1157,7 @@ export class ClineProvider implements vscode.WebviewViewProvider {
soundEnabled: soundEnabled ?? false,
diffEnabled: diffEnabled ?? false,
soundVolume,
browserLargeViewport: browserLargeViewport ?? false,
}
}

View File

@@ -45,7 +45,7 @@ export class BrowserSession {
return stats
}
async launchBrowser() {
async launchBrowser(): Promise<void> {
console.log("launch browser called")
if (this.browser) {
// throw new Error("Browser already launched")
@@ -58,10 +58,9 @@ export class BrowserSession {
"--user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36",
],
executablePath: stats.executablePath,
defaultViewport: {
width: 900,
height: 600,
},
defaultViewport: await this.context.globalState.get("browserLargeViewport")
? { width: 1280, height: 800 }
: { width: 900, height: 600 },
// headless: false,
})
// (latest version of puppeteer does not add headless to user agent)
@@ -245,25 +244,27 @@ export class BrowserSession {
}
async scrollDown(): Promise<BrowserActionResult> {
const isLargeViewport = await this.context.globalState.get("browserLargeViewport")
return this.doAction(async (page) => {
await page.evaluate(() => {
await page.evaluate((scrollHeight) => {
window.scrollBy({
top: 600,
top: scrollHeight,
behavior: "auto",
})
})
}, isLargeViewport ? 800 : 600)
await delay(300)
})
}
async scrollUp(): Promise<BrowserActionResult> {
const isLargeViewport = await this.context.globalState.get("browserLargeViewport")
return this.doAction(async (page) => {
await page.evaluate(() => {
await page.evaluate((scrollHeight) => {
window.scrollBy({
top: -600,
top: -scrollHeight,
behavior: "auto",
})
})
}, isLargeViewport ? 800 : 600)
await delay(300)
})
}

View File

@@ -53,6 +53,7 @@ export interface ExtensionState {
soundEnabled?: boolean
soundVolume?: number
diffEnabled?: boolean
browserLargeViewport?: boolean
}
export interface ClineMessage {

View File

@@ -34,6 +34,7 @@ export interface WebviewMessage {
| "soundEnabled"
| "soundVolume"
| "diffEnabled"
| "browserLargeViewport"
| "openMcpSettings"
| "restartMcpServer"
| "toggleToolAlwaysAllow"

View File

@@ -1,6 +1,7 @@
import deepEqual from "fast-deep-equal"
import React, { memo, useEffect, useMemo, useRef, useState } from "react"
import { useSize } from "react-use"
import { useExtensionState } from "../../context/ExtensionStateContext"
import {
BrowserAction,
BrowserActionResult,
@@ -219,6 +220,7 @@ const BrowserSessionRow = memo((props: BrowserSessionRowProps) => {
}, [isBrowsing, currentPage?.nextAction?.messages])
// Use latest click position while browsing, otherwise use display state
const { browserLargeViewport } = useExtensionState()
const mousePosition = isBrowsing ? latestClickPosition || displayState.mousePosition : displayState.mousePosition
const [browserSessionRow, { height }] = useSize(
@@ -277,7 +279,7 @@ const BrowserSessionRow = memo((props: BrowserSessionRowProps) => {
<div
style={{
width: "100%",
paddingBottom: "calc(200%/3)",
paddingBottom: browserLargeViewport ? "62.5%" : "66.67%", // 800/1280 = 0.625, 600/900 = 0.667
position: "relative",
backgroundColor: "var(--vscode-input-background)",
}}>
@@ -319,8 +321,8 @@ const BrowserSessionRow = memo((props: BrowserSessionRowProps) => {
<BrowserCursor
style={{
position: "absolute",
top: `${(parseInt(mousePosition.split(",")[1]) / 600) * 100}%`,
left: `${(parseInt(mousePosition.split(",")[0]) / 900) * 100}%`,
top: `${(parseInt(mousePosition.split(",")[1]) / (browserLargeViewport ? 800 : 600)) * 100}%`,
left: `${(parseInt(mousePosition.split(",")[0]) / (browserLargeViewport ? 1280 : 900)) * 100}%`,
transition: "top 0.3s ease-out, left 0.3s ease-out",
}}
/>

View File

@@ -33,6 +33,8 @@ const SettingsView = ({ onDone }: SettingsViewProps) => {
setSoundVolume,
diffEnabled,
setDiffEnabled,
browserLargeViewport = false,
setBrowserLargeViewport,
openRouterModels,
setAllowedCommands,
allowedCommands,
@@ -62,6 +64,7 @@ const SettingsView = ({ onDone }: SettingsViewProps) => {
vscode.postMessage({ type: "soundEnabled", bool: soundEnabled })
vscode.postMessage({ type: "soundVolume", value: soundVolume })
vscode.postMessage({ type: "diffEnabled", bool: diffEnabled })
vscode.postMessage({ type: "browserLargeViewport", bool: browserLargeViewport })
onDone()
}
}
@@ -317,6 +320,20 @@ const SettingsView = ({ onDone }: SettingsViewProps) => {
<div style={{ marginBottom: 5 }}>
<h4 style={{ fontWeight: 500, marginBottom: 10 }}>Experimental Features</h4>
<div style={{ marginBottom: 10 }}>
<VSCodeCheckbox checked={browserLargeViewport} onChange={(e: any) => setBrowserLargeViewport(e.target.checked)}>
<span style={{ fontWeight: "500" }}>Use larger browser viewport (1280x800)</span>
</VSCodeCheckbox>
<p
style={{
fontSize: "12px",
marginTop: "5px",
color: "var(--vscode-descriptionForeground)",
}}>
When enabled, Cline will use a larger viewport size for browser interactions.
</p>
</div>
<div style={{ marginBottom: 5 }}>
<div style={{ marginBottom: 10 }}>
<VSCodeCheckbox checked={soundEnabled} onChange={(e: any) => setSoundEnabled(e.target.checked)}>

View File

@@ -31,6 +31,7 @@ export interface ExtensionStateContextType extends ExtensionState {
setSoundEnabled: (value: boolean) => void
setSoundVolume: (value: number) => void
setDiffEnabled: (value: boolean) => void
setBrowserLargeViewport: (value: boolean) => void
}
const ExtensionStateContext = createContext<ExtensionStateContextType | undefined>(undefined)
@@ -147,6 +148,7 @@ export const ExtensionStateContextProvider: React.FC<{ children: React.ReactNode
setSoundEnabled: (value) => setState((prevState) => ({ ...prevState, soundEnabled: value })),
setSoundVolume: (value) => setState((prevState) => ({ ...prevState, soundVolume: value })),
setDiffEnabled: (value) => setState((prevState) => ({ ...prevState, diffEnabled: value })),
setBrowserLargeViewport: (value) => setState((prevState) => ({ ...prevState, browserLargeViewport: value })),
}
return <ExtensionStateContext.Provider value={contextValue}>{children}</ExtensionStateContext.Provider>