Add BrowserSessionRow grouping

This commit is contained in:
Saoud Rizwan
2024-10-27 04:57:31 -04:00
parent d9f9e7ba16
commit 68df8809ad
4 changed files with 296 additions and 108 deletions

View File

@@ -1356,6 +1356,7 @@ export class Cline {
// if the block is complete and we don't have a valid action this is a mistake
this.consecutiveMistakeCount++
pushToolResult(await this.sayAndCreateMissingParamError("browser_action", "action"))
await this.browserSession.closeBrowser()
}
break
}
@@ -1363,14 +1364,18 @@ export class Cline {
try {
if (block.partial) {
if (action === "launch") {
await this.ask("browser_action_launch", url, block.partial).catch(() => {})
await this.ask(
"browser_action_launch",
removeClosingTag("url", url),
block.partial
).catch(() => {})
} else {
await this.say(
"browser_action",
JSON.stringify({
action: action as BrowserAction,
coordinate,
text,
coordinate: removeClosingTag("coordinate", coordinate),
text: removeClosingTag("text", text),
} satisfies ClineSayBrowserAction),
undefined,
block.partial
@@ -1385,6 +1390,7 @@ export class Cline {
pushToolResult(
await this.sayAndCreateMissingParamError("browser_action", "url")
)
await this.browserSession.closeBrowser()
break
}
this.consecutiveMistakeCount = 0
@@ -1401,6 +1407,7 @@ export class Cline {
pushToolResult(
await this.sayAndCreateMissingParamError("browser_action", "coordinate")
)
await this.browserSession.closeBrowser()
break // can't be within an inner switch
}
}
@@ -1410,6 +1417,7 @@ export class Cline {
pushToolResult(
await this.sayAndCreateMissingParamError("browser_action", "text")
)
await this.browserSession.closeBrowser()
break
}
}
@@ -1446,13 +1454,13 @@ export class Cline {
// NOTE: it's okay that we call this message since the partial inspect_site is finished streaming. The only scenario we have to avoid is sending messages WHILE a partial message exists at the end of the messages array. For example the api_req_finished message would interfere with the partial message, so we needed to remove that.
// await this.say("inspect_site_result", "") // no result, starts the loading spinner waiting for result
await this.say("browser_action_result", JSON.stringify(browserActionResult))
switch (action) {
case "launch":
case "click":
case "type":
case "scroll_down":
case "scroll_up":
await this.say("browser_action_result", JSON.stringify(browserActionResult))
pushToolResult(
formatResponse.toolResult(
`The browser action has been executed. The console logs and screenshot have been captured for your analysis.\n\nConsole logs:\n${
@@ -1473,6 +1481,7 @@ export class Cline {
break
}
} catch (error) {
await this.browserSession.closeBrowser() // if any error occurs, the browser session is terminated
await handleError("executing browser action", error)
break
}

View File

@@ -0,0 +1,159 @@
import deepEqual from "fast-deep-equal"
import React, { memo, useEffect, useRef } from "react"
import { useSize } from "react-use"
import { BrowserActionResult, ClineMessage, ClineSayBrowserAction } from "../../../../src/shared/ExtensionMessage"
import { vscode } from "../../utils/vscode"
import CodeAccordian from "../common/CodeAccordian"
import CodeBlock, { CODE_BLOCK_BG_COLOR } from "../common/CodeBlock"
import { ChatRowContent } from "./ChatRow"
interface BrowserSessionRowProps {
messages: ClineMessage[]
isExpanded: boolean
onToggleExpand: () => void
lastModifiedMessage?: ClineMessage
isLast: boolean
onHeightChange: (isTaller: boolean) => void
}
const BrowserSessionRow = memo((props: BrowserSessionRowProps) => {
const { messages, isLast, onHeightChange } = props
const prevHeightRef = useRef(0)
const [browserSessionRow, { height }] = useSize(
<div style={{ padding: "10px 6px 10px 15px" }}>
<h3>Browser Session Group</h3>
{messages.map((message, index) => (
<BrowserSessionRowContent key={message.ts} {...props} message={message} />
))}
<h3>END Browser Session Group</h3>
</div>
)
useEffect(() => {
const isInitialRender = prevHeightRef.current === 0
if (isLast && height !== 0 && height !== Infinity && height !== prevHeightRef.current) {
if (!isInitialRender) {
onHeightChange(height > prevHeightRef.current)
}
prevHeightRef.current = height
}
}, [height, isLast, onHeightChange])
return browserSessionRow
}, deepEqual)
interface BrowserSessionRowContentProps extends Omit<BrowserSessionRowProps, "messages"> {
message: ClineMessage
}
const BrowserSessionRowContent = ({
message,
isExpanded,
onToggleExpand,
lastModifiedMessage,
isLast,
}: BrowserSessionRowContentProps) => {
const headerStyle: React.CSSProperties = {
display: "flex",
alignItems: "center",
gap: "10px",
marginBottom: "10px",
}
// Copy all the rendering logic from ChatRowContent
// This includes handling all message types: api_req_started, browser_action, text, etc.
// The implementation would be identical to ChatRowContent
switch (message.type) {
case "say":
switch (message.say) {
case "api_req_started":
case "text":
return (
<ChatRowContent
message={message}
isExpanded={isExpanded}
onToggleExpand={onToggleExpand}
lastModifiedMessage={lastModifiedMessage}
isLast={isLast}
/>
)
case "browser_action":
const browserAction = JSON.parse(message.text || "{}") as ClineSayBrowserAction
return (
<div style={{ marginBottom: 10 }}>
<div style={{ fontWeight: "bold" }}>{browserAction.action}</div>
{browserAction.coordinate && <div>{browserAction.coordinate}</div>}
{browserAction.text && <div>{browserAction.text}</div>}
</div>
)
case "browser_action_result":
const { screenshot, logs, currentMousePosition, currentUrl } = JSON.parse(
message.text || "{}"
) as BrowserActionResult
return (
<div style={{ marginBottom: 10 }}>
{currentMousePosition && <div>{currentMousePosition}</div>}
{currentUrl && <div>{currentUrl}</div>}
{screenshot && (
<img
src={screenshot}
alt="Browser action screenshot"
style={{
width: "calc(100% - 2px)",
height: "auto",
objectFit: "contain",
marginBottom: logs ? 7 : 0,
borderRadius: 3,
cursor: "pointer",
marginLeft: "1px",
}}
onClick={() => vscode.postMessage({ type: "openImage", text: screenshot })}
/>
)}
{logs && (
<CodeAccordian
code={logs}
language="shell"
isConsoleLogs={true}
isExpanded={isExpanded}
onToggleExpand={onToggleExpand}
/>
)}
</div>
)
default:
return null
}
case "ask":
switch (message.ask) {
case "browser_action_launch":
return (
<>
<div style={headerStyle}>
<span style={{ fontWeight: "bold" }}>Browser Session Started</span>
</div>
<div
style={{
borderRadius: 3,
border: "1px solid var(--vscode-editorGroup-border)",
overflow: "hidden",
backgroundColor: CODE_BLOCK_BG_COLOR,
}}>
<CodeBlock source={`${"```"}shell\n${message.text}\n${"```"}`} forceWrap={true} />
</div>
</>
)
default:
return null
}
}
}
export default BrowserSessionRow

View File

@@ -1,13 +1,8 @@
import { VSCodeBadge, VSCodeProgressRing } from "@vscode/webview-ui-toolkit/react"
import deepEqual from "fast-deep-equal"
import React, { memo, useEffect, useMemo, useRef } from "react"
import {
BrowserActionResult,
ClineApiReqInfo,
ClineMessage,
ClineSayBrowserAction,
ClineSayTool,
} from "../../../../src/shared/ExtensionMessage"
import { useSize } from "react-use"
import { ClineApiReqInfo, ClineMessage, ClineSayTool } from "../../../../src/shared/ExtensionMessage"
import { COMMAND_OUTPUT_STRING } from "../../../../src/shared/combineCommandSequences"
import { vscode } from "../../utils/vscode"
import CodeAccordian, { removeLeadingNonAlphanumeric } from "../common/CodeAccordian"
@@ -15,7 +10,6 @@ import CodeBlock, { CODE_BLOCK_BG_COLOR } from "../common/CodeBlock"
import MarkdownBlock from "../common/MarkdownBlock"
import Thumbnails from "../common/Thumbnails"
import { highlightMentions } from "./TaskHeader"
import { useSize } from "react-use"
interface ChatRowProps {
message: ClineMessage
@@ -66,7 +60,13 @@ const ChatRow = memo(
export default ChatRow
const ChatRowContent = ({ message, isExpanded, onToggleExpand, lastModifiedMessage, isLast }: ChatRowContentProps) => {
export const ChatRowContent = ({
message,
isExpanded,
onToggleExpand,
lastModifiedMessage,
isLast,
}: ChatRowContentProps) => {
const [cost, apiReqCancelReason, apiReqStreamingFailedMessage] = useMemo(() => {
if (message.text != null && message.say === "api_req_started") {
const info: ClineApiReqInfo = JSON.parse(message.text)
@@ -617,58 +617,6 @@ const ChatRowContent = ({ message, isExpanded, onToggleExpand, lastModifiedMessa
</div>
</>
)
case "browser_action":
const browserAction = JSON.parse(message.text || "{}") as ClineSayBrowserAction
return (
<div
style={{
marginTop: -10,
width: "100%",
}}>
<div style={{ fontWeight: "bold" }}>{browserAction.action}</div>
{browserAction.coordinate && <div>{browserAction.coordinate}</div>}
{browserAction.text && <div>{browserAction.text}</div>}
</div>
)
case "browser_action_result":
const { screenshot, logs, currentMousePosition, currentUrl } = JSON.parse(
message.text || "{}"
) as BrowserActionResult
return (
<div
style={{
marginTop: -10,
width: "100%",
}}>
{currentMousePosition && <div>{currentMousePosition}</div>}
{currentUrl && <div>{currentUrl}</div>}
{screenshot && (
<img
src={screenshot}
alt="Inspect screenshot"
style={{
width: "calc(100% - 2px)",
height: "auto",
objectFit: "contain",
marginBottom: logs ? 7 : 0,
borderRadius: 3,
cursor: "pointer",
marginLeft: "1px",
}}
onClick={() => vscode.postMessage({ type: "openImage", text: screenshot })}
/>
)}
{logs && (
<CodeAccordian
code={logs}
language="shell"
isConsoleLogs={true}
isExpanded={isExpanded}
onToggleExpand={onToggleExpand}
/>
)}
</div>
)
default:
return (
<>
@@ -800,29 +748,6 @@ const ChatRowContent = ({ message, isExpanded, onToggleExpand, lastModifiedMessa
</div>
</>
)
case "browser_action_launch":
// const isInspecting =
// isLast && lastModifiedMessage?.say === "inspect_site_result" && !lastModifiedMessage?.images
return (
<>
<div style={headerStyle}>
{/* {isInspecting ? <ProgressIndicator /> : toolIcon("inspect")} */}
<span style={{ fontWeight: "bold" }}>
<>Cline wants to use the browser:</>
</span>
</div>
<div
style={{
borderRadius: 3,
border: "1px solid var(--vscode-editorGroup-border)",
overflow: "hidden",
backgroundColor: CODE_BLOCK_BG_COLOR,
}}>
<CodeBlock source={`${"```"}shell\n${message.text}\n${"```"}`} forceWrap={true} />
</div>
</>
)
default:
return null
}

View File

@@ -4,7 +4,13 @@ import { useCallback, useEffect, useMemo, useRef, useState } from "react"
import { useDeepCompareEffect, useEvent, useMount } from "react-use"
import { Virtuoso, type VirtuosoHandle } from "react-virtuoso"
import styled from "styled-components"
import { ClineAsk, ClineSayTool, ExtensionMessage } from "../../../../src/shared/ExtensionMessage"
import {
ClineAsk,
ClineMessage,
ClineSayBrowserAction,
ClineSayTool,
ExtensionMessage,
} from "../../../../src/shared/ExtensionMessage"
import { findLast } from "../../../../src/shared/array"
import { combineApiRequests } from "../../../../src/shared/combineApiRequests"
import { combineCommandSequences } from "../../../../src/shared/combineCommandSequences"
@@ -14,6 +20,7 @@ import { vscode } from "../../utils/vscode"
import HistoryPreview from "../history/HistoryPreview"
import { normalizeApiConfiguration } from "../settings/ApiOptions"
import Announcement from "./Announcement"
import BrowserSessionRow from "./BrowserSessionRow"
import ChatRow from "./ChatRow"
import ChatTextArea from "./ChatTextArea"
import TaskHeader from "./TaskHeader"
@@ -437,6 +444,66 @@ const ChatView = ({ isHidden, showAnnouncement, hideAnnouncement, showHistoryVie
})
}, [modifiedMessages])
const isBrowserSessionMessage = (message: ClineMessage): boolean => {
// which of visible messages are browser session messages, see above
if (message.type === "ask") {
return ["browser_action_launch"].includes(message.ask!)
}
if (message.type === "say") {
return ["api_req_started", "text", "browser_action", "browser_action_result"].includes(message.say!)
}
return false
}
const groupedMessages = useMemo(() => {
const result: (ClineMessage | ClineMessage[])[] = []
let currentGroup: ClineMessage[] = []
let isInBrowserSession = false
const endBrowserSession = () => {
if (currentGroup.length > 0) {
result.push([...currentGroup])
currentGroup = []
isInBrowserSession = false
}
}
visibleMessages.forEach((message) => {
if (message.ask === "browser_action_launch") {
// complete existing browser session if any
endBrowserSession()
// start new
isInBrowserSession = true
currentGroup.push(message)
} else if (isInBrowserSession) {
if (isBrowserSessionMessage(message)) {
currentGroup.push(message)
// Check if this is a close action
if (message.say === "browser_action") {
const browserAction = JSON.parse(message.text || "{}") as ClineSayBrowserAction
if (browserAction.action === "close") {
endBrowserSession()
}
}
} else {
// complete existing browser session if any
endBrowserSession()
result.push(message)
}
} else {
result.push(message)
}
})
// Handle case where browser session is the last group
if (currentGroup.length > 0) {
result.push([...currentGroup])
}
return result
}, [visibleMessages])
// scrolling
const scrollToBottomSmooth = useMemo(
@@ -465,10 +532,19 @@ const ChatView = ({ isHidden, showAnnouncement, hideAnnouncement, showHistoryVie
const toggleRowExpansion = useCallback(
(ts: number) => {
const isCollapsing = expandedRows[ts] ?? false
const isLast = visibleMessages.at(-1)?.ts === ts
const isSecondToLast = visibleMessages.at(-2)?.ts === ts
const lastGroup = groupedMessages.at(-1)
const isLast = Array.isArray(lastGroup) ? lastGroup[0].ts === ts : lastGroup?.ts === ts
const secondToLastGroup = groupedMessages.at(-2)
const isSecondToLast = Array.isArray(secondToLastGroup)
? secondToLastGroup[0].ts === ts
: secondToLastGroup?.ts === ts
const isLastCollapsedApiReq =
visibleMessages.at(-1)?.say === "api_req_started" && !expandedRows[visibleMessages.at(-1)?.ts ?? 0]
isLast &&
!Array.isArray(lastGroup) && // Make sure it's not a browser session group
lastGroup?.say === "api_req_started" &&
!expandedRows[lastGroup.ts]
setExpandedRows((prev) => ({
...prev,
[ts]: !prev[ts],
@@ -496,7 +572,7 @@ const ChatView = ({ isHidden, showAnnouncement, hideAnnouncement, showHistoryVie
} else {
const timer = setTimeout(() => {
virtuosoRef.current?.scrollToIndex({
index: visibleMessages.length - (isLast ? 1 : 2),
index: groupedMessages.length - (isLast ? 1 : 2),
align: "start",
})
}, 0)
@@ -504,7 +580,7 @@ const ChatView = ({ isHidden, showAnnouncement, hideAnnouncement, showHistoryVie
}
}
},
[visibleMessages, expandedRows, scrollToBottomAuto, isAtBottom]
[groupedMessages, expandedRows, scrollToBottomAuto, isAtBottom]
)
const handleRowHeightChange = useCallback(
@@ -548,18 +624,37 @@ const ChatView = ({ isHidden, showAnnouncement, hideAnnouncement, showHistoryVie
}, [task])
const itemContent = useCallback(
(index: number, message: any) => (
<ChatRow
key={message.ts}
message={message}
isExpanded={expandedRows[message.ts] || false}
onToggleExpand={() => toggleRowExpansion(message.ts)}
lastModifiedMessage={modifiedMessages.at(-1)}
isLast={index === visibleMessages.length - 1}
onHeightChange={handleRowHeightChange}
/>
),
[expandedRows, modifiedMessages, visibleMessages.length, toggleRowExpansion, handleRowHeightChange]
(index: number, messageOrGroup: ClineMessage | ClineMessage[]) => {
// browser session group
if (Array.isArray(messageOrGroup)) {
const firstMessage = messageOrGroup[0]
return (
<BrowserSessionRow
key={firstMessage.ts}
messages={messageOrGroup}
isExpanded={expandedRows[firstMessage.ts] || false}
onToggleExpand={() => toggleRowExpansion(firstMessage.ts)}
lastModifiedMessage={modifiedMessages.at(-1)}
isLast={index === groupedMessages.length - 1}
onHeightChange={handleRowHeightChange}
/>
)
}
// regular message
return (
<ChatRow
key={messageOrGroup.ts}
message={messageOrGroup}
isExpanded={expandedRows[messageOrGroup.ts] || false}
onToggleExpand={() => toggleRowExpansion(messageOrGroup.ts)}
lastModifiedMessage={modifiedMessages.at(-1)}
isLast={index === groupedMessages.length - 1}
onHeightChange={handleRowHeightChange}
/>
)
},
[expandedRows, modifiedMessages, groupedMessages.length, toggleRowExpansion, handleRowHeightChange]
)
return (
@@ -627,7 +722,7 @@ const ChatView = ({ isHidden, showAnnouncement, hideAnnouncement, showHistoryVie
}}
// increasing top by 3_000 to prevent jumping around when user collapses a row
increaseViewportBy={{ top: 3_000, bottom: Number.MAX_SAFE_INTEGER }} // hack to make sure the last message is always rendered to get truly perfect scroll to bottom animation when new messages are added (Number.MAX_SAFE_INTEGER is safe for arithmetic operations, which is all virtuoso uses this value for in src/sizeRangeSystem.ts)
data={visibleMessages} // messages is the raw format returned by extension, modifiedMessages is the manipulated structure that combines certain messages of related type, and visibleMessages is the filtered structure that removes messages that should not be rendered
data={groupedMessages} // messages is the raw format returned by extension, modifiedMessages is the manipulated structure that combines certain messages of related type, and visibleMessages is the filtered structure that removes messages that should not be rendered
itemContent={itemContent}
atBottomStateChange={(isAtBottom) => {
setIsAtBottom(isAtBottom)
@@ -637,7 +732,7 @@ const ChatView = ({ isHidden, showAnnouncement, hideAnnouncement, showHistoryVie
setShowScrollToBottom(disableAutoScrollRef.current && !isAtBottom)
}}
atBottomThreshold={10} // anything lower causes issues with followOutput
initialTopMostItemIndex={visibleMessages.length - 1}
initialTopMostItemIndex={groupedMessages.length - 1}
/>
</div>
{showScrollToBottom ? (