mirror of
https://github.com/pacnpal/Roo-Code.git
synced 2025-12-20 12:21:13 -05:00
Adjust browser resolution; show loading spinner when browser session has started
This commit is contained in:
@@ -1398,6 +1398,11 @@ export class Cline {
|
||||
if (!didApprove) {
|
||||
break
|
||||
}
|
||||
|
||||
// NOTE: it's okay that we call this message since the partial inspect_site is finished streaming. The only scenario we have to avoid is sending messages WHILE a partial message exists at the end of the messages array. For example the api_req_finished message would interfere with the partial message, so we needed to remove that.
|
||||
// await this.say("inspect_site_result", "") // no result, starts the loading spinner waiting for result
|
||||
await this.say("browser_action_result", "") // starts loading spinner
|
||||
|
||||
await this.browserSession.launchBrowser()
|
||||
browserActionResult = await this.browserSession.navigateToUrl(url)
|
||||
} else {
|
||||
@@ -1451,9 +1456,6 @@ export class Cline {
|
||||
}
|
||||
}
|
||||
|
||||
// NOTE: it's okay that we call this message since the partial inspect_site is finished streaming. The only scenario we have to avoid is sending messages WHILE a partial message exists at the end of the messages array. For example the api_req_finished message would interfere with the partial message, so we needed to remove that.
|
||||
// await this.say("inspect_site_result", "") // no result, starts the loading spinner waiting for result
|
||||
|
||||
switch (action) {
|
||||
case "launch":
|
||||
case "click":
|
||||
|
||||
@@ -103,7 +103,7 @@ Usage:
|
||||
Description: Request to interact with a Puppeteer-controlled browser. Every action except \`close\` will be responded to with a screenshot of the browser's current state, along with any new console logs. You may only perform one browser action at a time, as you should assess the screenshot and logs to determine the next action.
|
||||
- The sequence of actions **must always start with** launching the browser at a URL, and **must always end with** closing the browser. If you need to visit a new URL that is not possible to navigate to from the current webpage, you must first close the browser, then launch again at the new URL.
|
||||
- While the browser is active, only the \`browser_action\` tool can be used. No other tools should be called during this time. You may proceed to use other tools only after closing the browser.
|
||||
- The browser window has a resolution of **800x600** pixels. When performing any click actions, ensure the coordinates are within this resolution range.
|
||||
- The browser window has a resolution of **900x600** pixels. When performing any click actions, ensure the coordinates are within this resolution range.
|
||||
- Before clicking on any elements such as icons, links, or buttons, you must consult the provided screenshot of the page to determine the coordinates of the element. The click should be targeted at the **center of the element**, not on its edges.
|
||||
Parameters:
|
||||
- action: (required) The action to perform. The available actions are:
|
||||
@@ -121,8 +121,8 @@ Parameters:
|
||||
- Example: \`<action>close</action>\`
|
||||
- url: (optional) Use this for providing the URL for the \`launch\` action.
|
||||
* Example: <url>https://example.com</url>
|
||||
- coordinate: (optional) The X and Y coordinates for the \`click\` action. Coordinates should be within the **800x600** resolution.
|
||||
* Example: <coordinate>400,300</coordinate>
|
||||
- coordinate: (optional) The X and Y coordinates for the \`click\` action. Coordinates should be within the **900x600** resolution.
|
||||
* Example: <coordinate>450,300</coordinate>
|
||||
- text: (optional) Use this for providing the text for the \`type\` action.
|
||||
* Example: <text>Hello, world!</text>
|
||||
Usage:
|
||||
|
||||
@@ -59,10 +59,10 @@ export class BrowserSession {
|
||||
],
|
||||
executablePath: stats.executablePath,
|
||||
defaultViewport: {
|
||||
width: 800,
|
||||
width: 900,
|
||||
height: 600,
|
||||
},
|
||||
headless: false,
|
||||
// headless: false,
|
||||
})
|
||||
// (latest version of puppeteer does not add headless to user agent)
|
||||
this.page = await this.browser?.newPage()
|
||||
@@ -125,7 +125,7 @@ export class BrowserSession {
|
||||
clip: {
|
||||
x: 0,
|
||||
y: 0,
|
||||
width: 800,
|
||||
width: 900,
|
||||
height: 600,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -32,16 +32,14 @@ interface BrowserSessionRowProps {
|
||||
*/
|
||||
|
||||
const BrowserSessionRow = memo((props: BrowserSessionRowProps) => {
|
||||
const { messages, isLast, onHeightChange, lastModifiedMessage } = props
|
||||
const { messages, isLast, onHeightChange } = props
|
||||
const prevHeightRef = useRef(0)
|
||||
const [maxActionHeight, setMaxActionHeight] = useState(0)
|
||||
const [consoleLogsExpanded, setConsoleLogsExpanded] = useState(false)
|
||||
|
||||
const isBrowsing = useMemo(() => {
|
||||
return (
|
||||
isLast && lastModifiedMessage?.ask !== "resume_task" && lastModifiedMessage?.ask !== "resume_completed_task"
|
||||
)
|
||||
}, [isLast, lastModifiedMessage])
|
||||
return isLast && messages.some((m) => m.say === "browser_action_result") // after user approves, browser_action_result with "" is sent to indicate that the session has started
|
||||
}, [isLast, messages])
|
||||
|
||||
// Organize messages into pages with current state and next action
|
||||
const pages = useMemo(() => {
|
||||
@@ -66,6 +64,10 @@ const BrowserSessionRow = memo((props: BrowserSessionRowProps) => {
|
||||
// Start first page
|
||||
currentStateMessages = [message]
|
||||
} else if (message.say === "browser_action_result") {
|
||||
if (message.text === "") {
|
||||
// first browser_action_result is an empty string that signals that session has started
|
||||
return
|
||||
}
|
||||
// Complete current state
|
||||
currentStateMessages.push(message)
|
||||
const resultData = JSON.parse(message.text || "{}") as BrowserActionResult
|
||||
@@ -156,13 +158,13 @@ const BrowserSessionRow = memo((props: BrowserSessionRowProps) => {
|
||||
const displayState = isLastPage
|
||||
? {
|
||||
url: currentPage?.currentState.url || latestState.url || initialUrl,
|
||||
mousePosition: currentPage?.currentState.mousePosition || latestState.mousePosition || "400,300",
|
||||
mousePosition: currentPage?.currentState.mousePosition || latestState.mousePosition || "700,400",
|
||||
consoleLogs: currentPage?.currentState.consoleLogs,
|
||||
screenshot: currentPage?.currentState.screenshot || latestState.screenshot,
|
||||
}
|
||||
: {
|
||||
url: currentPage?.currentState.url || initialUrl,
|
||||
mousePosition: currentPage?.currentState.mousePosition || "400,300",
|
||||
mousePosition: currentPage?.currentState.mousePosition || "700,400",
|
||||
consoleLogs: currentPage?.currentState.consoleLogs,
|
||||
screenshot: currentPage?.currentState.screenshot,
|
||||
}
|
||||
@@ -177,6 +179,9 @@ const BrowserSessionRow = memo((props: BrowserSessionRowProps) => {
|
||||
setMaxActionHeight={setMaxActionHeight}
|
||||
/>
|
||||
))}
|
||||
{!isBrowsing && messages.some((m) => m.say === "browser_action_result") && currentPageIndex === 0 && (
|
||||
<BrowserActionBox action={"launch"} text={initialUrl} />
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
|
||||
@@ -189,14 +194,29 @@ const BrowserSessionRow = memo((props: BrowserSessionRowProps) => {
|
||||
}
|
||||
}, [actionHeight, maxActionHeight])
|
||||
|
||||
useEffect(() => {
|
||||
if (!displayState.consoleLogs || displayState.consoleLogs.trim() === "") {
|
||||
setConsoleLogsExpanded(false)
|
||||
// Track latest click coordinate
|
||||
const latestClickPosition = useMemo(() => {
|
||||
if (!isBrowsing) return undefined
|
||||
|
||||
// Look through current page's next actions for the latest browser_action
|
||||
const actions = currentPage?.nextAction?.messages || []
|
||||
for (let i = actions.length - 1; i >= 0; i--) {
|
||||
const message = actions[i]
|
||||
if (message.say === "browser_action") {
|
||||
const browserAction = JSON.parse(message.text || "{}") as ClineSayBrowserAction
|
||||
if (browserAction.action === "click" && browserAction.coordinate) {
|
||||
return browserAction.coordinate
|
||||
}
|
||||
}
|
||||
}
|
||||
}, [displayState.consoleLogs])
|
||||
return undefined
|
||||
}, [isBrowsing, currentPage?.nextAction?.messages])
|
||||
|
||||
// Use latest click position while browsing, otherwise use display state
|
||||
const mousePosition = isBrowsing ? latestClickPosition || displayState.mousePosition : displayState.mousePosition
|
||||
|
||||
const [browserSessionRow, { height }] = useSize(
|
||||
<div style={{ padding: "10px 6px 10px 15px" }}>
|
||||
<div style={{ padding: "10px 6px 10px 15px", marginBottom: -10 }}>
|
||||
<div style={{ display: "flex", alignItems: "center", gap: "10px", marginBottom: "10px" }}>
|
||||
{isBrowsing ? (
|
||||
<ProgressIndicator />
|
||||
@@ -215,6 +235,7 @@ const BrowserSessionRow = memo((props: BrowserSessionRowProps) => {
|
||||
border: "1px solid var(--vscode-editorGroup-border)",
|
||||
overflow: "hidden",
|
||||
backgroundColor: CODE_BLOCK_BG_COLOR,
|
||||
marginBottom: 10,
|
||||
}}>
|
||||
{/* URL Bar */}
|
||||
<div
|
||||
@@ -229,19 +250,21 @@ const BrowserSessionRow = memo((props: BrowserSessionRowProps) => {
|
||||
display: "flex",
|
||||
alignItems: "center",
|
||||
justifyContent: "center",
|
||||
color: "var(--vscode-input-foreground)",
|
||||
color: displayState.url
|
||||
? "var(--vscode-input-foreground)"
|
||||
: "var(--vscode-descriptionForeground)",
|
||||
fontSize: "12px",
|
||||
wordBreak: "break-all",
|
||||
whiteSpace: "normal",
|
||||
}}>
|
||||
{displayState.url}
|
||||
{displayState.url || "http"}
|
||||
</div>
|
||||
|
||||
{/* Screenshot Area */}
|
||||
<div
|
||||
style={{
|
||||
width: "100%",
|
||||
paddingBottom: "75%",
|
||||
paddingBottom: "calc(200%/3)",
|
||||
position: "relative",
|
||||
backgroundColor: "var(--vscode-input-background)",
|
||||
}}>
|
||||
@@ -282,8 +305,8 @@ const BrowserSessionRow = memo((props: BrowserSessionRowProps) => {
|
||||
<BrowserCursor
|
||||
style={{
|
||||
position: "absolute",
|
||||
top: `${(parseInt(displayState.mousePosition.split(",")[1]) / 600) * 100}%`,
|
||||
left: `${(parseInt(displayState.mousePosition.split(",")[0]) / 800) * 100}%`,
|
||||
top: `${(parseInt(mousePosition.split(",")[1]) / 600) * 100}%`,
|
||||
left: `${(parseInt(mousePosition.split(",")[0]) / 900) * 100}%`,
|
||||
transition: "top 0.3s ease-out, left 0.3s ease-out",
|
||||
}}
|
||||
/>
|
||||
@@ -381,34 +404,13 @@ const BrowserSessionRowContent = ({
|
||||
marginBottom: "10px",
|
||||
}
|
||||
|
||||
// Copy all the rendering logic from ChatRowContent
|
||||
// This includes handling all message types: api_req_started, browser_action, text, etc.
|
||||
// The implementation would be identical to ChatRowContent
|
||||
|
||||
const getBrowserActionText = (action: BrowserAction, coordinate?: string, text?: string) => {
|
||||
switch (action) {
|
||||
case "click":
|
||||
return `Click (${coordinate?.replace(",", ", ")})`
|
||||
case "type":
|
||||
return `Type "${text}"`
|
||||
case "scroll_down":
|
||||
return "Scroll down"
|
||||
case "scroll_up":
|
||||
return "Scroll up"
|
||||
case "close":
|
||||
return "Close browser"
|
||||
default:
|
||||
return action
|
||||
}
|
||||
}
|
||||
|
||||
switch (message.type) {
|
||||
case "say":
|
||||
switch (message.say) {
|
||||
case "api_req_started":
|
||||
case "text":
|
||||
return (
|
||||
<div style={{ padding: "15px 0 0px 0" }}>
|
||||
<div style={{ padding: "10px 0 10px 0" }}>
|
||||
<ChatRowContent
|
||||
message={message}
|
||||
isExpanded={isExpanded(message.ts)}
|
||||
@@ -427,37 +429,11 @@ const BrowserSessionRowContent = ({
|
||||
case "browser_action":
|
||||
const browserAction = JSON.parse(message.text || "{}") as ClineSayBrowserAction
|
||||
return (
|
||||
<div style={{ padding: "15px 0 0 0" }}>
|
||||
<div
|
||||
style={{
|
||||
borderRadius: 3,
|
||||
backgroundColor: CODE_BLOCK_BG_COLOR,
|
||||
overflow: "hidden",
|
||||
border: "1px solid var(--vscode-editorGroup-border)",
|
||||
}}>
|
||||
<div
|
||||
style={{
|
||||
display: "flex",
|
||||
alignItems: "center",
|
||||
padding: "9px 10px",
|
||||
}}>
|
||||
<span
|
||||
style={{
|
||||
whiteSpace: "nowrap",
|
||||
overflow: "hidden",
|
||||
textOverflow: "ellipsis",
|
||||
marginRight: "8px",
|
||||
}}>
|
||||
<span style={{ fontWeight: 500 }}>Browse Action: </span>
|
||||
{getBrowserActionText(
|
||||
browserAction.action,
|
||||
browserAction.coordinate,
|
||||
browserAction.text
|
||||
)}
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<BrowserActionBox
|
||||
action={browserAction.action}
|
||||
coordinate={browserAction.coordinate}
|
||||
text={browserAction.text}
|
||||
/>
|
||||
)
|
||||
|
||||
default:
|
||||
@@ -490,6 +466,62 @@ const BrowserSessionRowContent = ({
|
||||
}
|
||||
}
|
||||
|
||||
const BrowserActionBox = ({
|
||||
action,
|
||||
coordinate,
|
||||
text,
|
||||
}: {
|
||||
action: BrowserAction
|
||||
coordinate?: string
|
||||
text?: string
|
||||
}) => {
|
||||
const getBrowserActionText = (action: BrowserAction, coordinate?: string, text?: string) => {
|
||||
switch (action) {
|
||||
case "launch":
|
||||
return `Launch browser at ${text}`
|
||||
case "click":
|
||||
return `Click (${coordinate?.replace(",", ", ")})`
|
||||
case "type":
|
||||
return `Type "${text}"`
|
||||
case "scroll_down":
|
||||
return "Scroll down"
|
||||
case "scroll_up":
|
||||
return "Scroll up"
|
||||
case "close":
|
||||
return "Close browser"
|
||||
default:
|
||||
return action
|
||||
}
|
||||
}
|
||||
return (
|
||||
<div style={{ padding: "15px 0 0 0" }}>
|
||||
<div
|
||||
style={{
|
||||
borderRadius: 3,
|
||||
backgroundColor: CODE_BLOCK_BG_COLOR,
|
||||
overflow: "hidden",
|
||||
border: "1px solid var(--vscode-editorGroup-border)",
|
||||
}}>
|
||||
<div
|
||||
style={{
|
||||
display: "flex",
|
||||
alignItems: "center",
|
||||
padding: "9px 10px",
|
||||
}}>
|
||||
<span
|
||||
style={{
|
||||
whiteSpace: "normal",
|
||||
wordBreak: "break-word",
|
||||
}}>
|
||||
<span style={{ fontWeight: 500 }}>Browse Action: </span>
|
||||
{getBrowserActionText(action, coordinate, text)}
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
const BrowserCursor: React.FC<{ style?: React.CSSProperties }> = ({ style }) => {
|
||||
// (can't use svgs in vsc extensions)
|
||||
const cursorBase64 =
|
||||
|
||||
@@ -436,9 +436,6 @@ const ChatView = ({ isHidden, showAnnouncement, hideAnnouncement, showHistoryVie
|
||||
return false
|
||||
}
|
||||
break
|
||||
// case "inspect_site_result":
|
||||
// // don't show row for inspect site result until a screenshot is captured
|
||||
// return !!message.images
|
||||
}
|
||||
return true
|
||||
})
|
||||
|
||||
Reference in New Issue
Block a user