From bc87deca232b1424e00c0d66e0538ba95bd59dd9 Mon Sep 17 00:00:00 2001 From: xlgzsgf <51521689+hiqiancheng@users.noreply.github.com> Date: Fri, 5 Jun 2026 00:08:27 +0800 Subject: [PATCH 1/2] feat(agent-service): add native browser automation tools --- apps/desktop/src-tauri/Cargo.lock | 48 +- apps/desktop/src-tauri/Cargo.toml | 3 +- .../desktop/src-tauri/src/commands/browser.rs | 75 + apps/desktop/src-tauri/src/commands/mod.rs | 10 + .../src-tauri/src/core/browser/actions.rs | 144 ++ .../desktop/src-tauri/src/core/browser/cdp.rs | 1245 +++++++++++++++++ .../src-tauri/src/core/browser/endpoint.rs | 166 +++ .../desktop/src-tauri/src/core/browser/mod.rs | 12 + .../src-tauri/src/core/browser/process.rs | 303 ++++ .../src-tauri/src/core/browser/runtime.rs | 695 +++++++++ .../src-tauri/src/core/browser/snapshot.rs | 18 + .../src-tauri/src/core/browser/types.rs | 191 +++ .../src-tauri/src/core/browser/url_policy.rs | 23 + apps/desktop/src-tauri/src/core/mod.rs | 1 + apps/desktop/src-tauri/src/lib.rs | 1 + apps/desktop/src-tauri/src/testing/mod.rs | 46 + .../src-tauri/tests/browser_commands.rs | 590 ++++++++ .../src-tauri/tests/browser_live_smoke.rs | 434 ++++++ .../src/database/artifacts/runtime/seed.sql | 18 + .../src/database/queries/builtInTools.ts | 29 + apps/desktop/src/i18n/messages.ts | 18 + .../services/BuiltInToolService/registry.ts | 2 + .../services/BuiltInToolService/service.ts | 16 + .../tools/browser/approval.ts | 125 ++ .../tools/browser/config.ts | 67 + .../tools/browser/constants.ts | 160 +++ .../tools/browser/format.ts | 120 ++ .../BuiltInToolService/tools/browser/index.ts | 425 ++++++ .../tools/browser/operation.ts | 24 + .../tools/browser/redaction.ts | 106 ++ .../src/services/BuiltInToolService/types.ts | 5 +- .../src/services/NativeService/browser.ts | 46 + .../src/services/NativeService/index.ts | 13 + .../src/services/NativeService/types.ts | 53 + .../BuiltInTools/browserToolGroup.ts | 23 + .../BrowserAutomationToolConfig.vue | 122 ++ .../components/BuiltInToolConfig.vue | 86 +- .../components/BuiltInTools/index.vue | 71 +- .../components/BuiltInTools/types.ts | 21 + .../settingsBuiltInToolsBrowserConfig.test.ts | 256 ++++ .../settingsBuiltInToolsBrowserGroup.test.ts | 224 +++ ...tingsBuiltInToolsBrowserGroupLogic.test.ts | 50 + .../browser-tools-registration.test.ts | 24 + .../browser-service-execution.test.ts | 98 ++ .../BuiltInToolService/service-i18n.test.ts | 61 +- .../tools/browser/browserTool.test.ts | 994 +++++++++++++ .../tests/services/native-service.test.ts | 72 + apps/desktop/vitest.config.ts | 12 + 48 files changed, 7305 insertions(+), 41 deletions(-) create mode 100644 apps/desktop/src-tauri/src/commands/browser.rs create mode 100644 apps/desktop/src-tauri/src/core/browser/actions.rs create mode 100644 apps/desktop/src-tauri/src/core/browser/cdp.rs create mode 100644 apps/desktop/src-tauri/src/core/browser/endpoint.rs create mode 100644 apps/desktop/src-tauri/src/core/browser/mod.rs create mode 100644 apps/desktop/src-tauri/src/core/browser/process.rs create mode 100644 apps/desktop/src-tauri/src/core/browser/runtime.rs create mode 100644 apps/desktop/src-tauri/src/core/browser/snapshot.rs create mode 100644 apps/desktop/src-tauri/src/core/browser/types.rs create mode 100644 apps/desktop/src-tauri/src/core/browser/url_policy.rs create mode 100644 apps/desktop/src-tauri/tests/browser_commands.rs create mode 100644 apps/desktop/src-tauri/tests/browser_live_smoke.rs create mode 100644 apps/desktop/src/services/BuiltInToolService/tools/browser/approval.ts create mode 100644 apps/desktop/src/services/BuiltInToolService/tools/browser/config.ts create mode 100644 apps/desktop/src/services/BuiltInToolService/tools/browser/constants.ts create mode 100644 apps/desktop/src/services/BuiltInToolService/tools/browser/format.ts create mode 100644 apps/desktop/src/services/BuiltInToolService/tools/browser/index.ts create mode 100644 apps/desktop/src/services/BuiltInToolService/tools/browser/operation.ts create mode 100644 apps/desktop/src/services/BuiltInToolService/tools/browser/redaction.ts create mode 100644 apps/desktop/src/services/NativeService/browser.ts create mode 100644 apps/desktop/src/views/SettingsView/components/BuiltInTools/browserToolGroup.ts create mode 100644 apps/desktop/src/views/SettingsView/components/BuiltInTools/components/BrowserAutomationToolConfig.vue create mode 100644 apps/desktop/tests/SettingsView/settingsBuiltInToolsBrowserConfig.test.ts create mode 100644 apps/desktop/tests/SettingsView/settingsBuiltInToolsBrowserGroup.test.ts create mode 100644 apps/desktop/tests/SettingsView/settingsBuiltInToolsBrowserGroupLogic.test.ts create mode 100644 apps/desktop/tests/services/AgentService/browser-tools-registration.test.ts create mode 100644 apps/desktop/tests/services/BuiltInToolService/browser-service-execution.test.ts create mode 100644 apps/desktop/tests/services/BuiltInToolService/tools/browser/browserTool.test.ts diff --git a/apps/desktop/src-tauri/Cargo.lock b/apps/desktop/src-tauri/Cargo.lock index 9b8c8550..128a287b 100644 --- a/apps/desktop/src-tauri/Cargo.lock +++ b/apps/desktop/src-tauri/Cargo.lock @@ -1196,6 +1196,12 @@ dependencies = [ "syn 2.0.111", ] +[[package]] +name = "data-encoding" +version = "2.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4ae5f15dda3c708c0ade84bfee31ccab44a3da4f88015ed22f63732abe300c8" + [[package]] name = "data-url" version = "0.3.2" @@ -5545,6 +5551,17 @@ dependencies = [ "stable_deref_trait", ] +[[package]] +name = "sha1" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" +dependencies = [ + "cfg-if", + "cpufeatures 0.2.17", + "digest 0.10.7", +] + [[package]] name = "sha1" version = "0.11.0" @@ -6809,6 +6826,18 @@ dependencies = [ "tokio", ] +[[package]] +name = "tokio-tungstenite" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f72a05e828585856dacd553fba484c242c46e391fb0e58917c942ee9202915c" +dependencies = [ + "futures-util", + "log", + "tokio", + "tungstenite", +] + [[package]] name = "tokio-util" version = "0.7.17" @@ -6989,6 +7018,7 @@ dependencies = [ "tempfile", "time", "tokio", + "tokio-tungstenite", "ureq 2.12.1", "velopack", "webview2-com", @@ -7102,6 +7132,22 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" +[[package]] +name = "tungstenite" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c01152af293afb9c7c2a57e4b559c5620b421f6d133261c60dd2d0cdb38e6b8" +dependencies = [ + "bytes", + "data-encoding", + "http", + "httparse", + "log", + "rand 0.9.2", + "sha1 0.10.6", + "thiserror 2.0.17", +] + [[package]] name = "typed-path" version = "0.12.3" @@ -7344,7 +7390,7 @@ dependencies = [ "semver", "serde", "serde_json", - "sha1", + "sha1 0.11.0", "sha2 0.11.0", "thiserror 2.0.17", "ureq 3.3.0", diff --git a/apps/desktop/src-tauri/Cargo.toml b/apps/desktop/src-tauri/Cargo.toml index 58bbb5ce..16b09faf 100644 --- a/apps/desktop/src-tauri/Cargo.toml +++ b/apps/desktop/src-tauri/Cargo.toml @@ -63,13 +63,14 @@ rmcp = { version = "1.7.0", features = [ ] } reqwest = { version = "0.13", features = ["json", "stream", "blocking"] } futures = "0.3" +tokio-tungstenite = "0.29" clipboard-rs = "0.3.4" html5gum = { version = "0.8.3", default-features = false } sha2 = "0.10" velopack = { version = "=0.0.1589-ga2c5a97", features = ["public-utils"] } +tempfile = "3" [dev-dependencies] -tempfile = "3" [profile.dev] # dev:本地调试用,不生成 debug symbols,最小化磁盘占用 diff --git a/apps/desktop/src-tauri/src/commands/browser.rs b/apps/desktop/src-tauri/src/commands/browser.rs new file mode 100644 index 00000000..bdf5c4b1 --- /dev/null +++ b/apps/desktop/src-tauri/src/commands/browser.rs @@ -0,0 +1,75 @@ +use tauri::State; + +use crate::core::browser::{ + types::{ + BrowserActRequest, BrowserActResult, BrowserNavigateRequest, BrowserObservation, + BrowserObserveRequest, BrowserStartRequest, BrowserStatus, BrowserTabRequest, + }, + BrowserRuntime, +}; + +#[tauri::command] +pub fn browser_status(runtime: State<'_, BrowserRuntime>) -> BrowserStatus { + runtime.status() +} + +#[tauri::command] +pub async fn browser_start( + runtime: State<'_, BrowserRuntime>, + request: BrowserStartRequest, +) -> Result { + runtime.start(request).await +} + +#[tauri::command] +pub fn browser_stop(runtime: State<'_, BrowserRuntime>) -> BrowserStatus { + runtime.stop() +} + +#[tauri::command] +pub async fn browser_navigate( + runtime: State<'_, BrowserRuntime>, + request: BrowserNavigateRequest, +) -> Result { + runtime.navigate(request).await +} + +#[tauri::command] +pub async fn browser_back( + runtime: State<'_, BrowserRuntime>, + request: BrowserTabRequest, +) -> Result { + runtime.history_action(request, "back").await +} + +#[tauri::command] +pub async fn browser_forward( + runtime: State<'_, BrowserRuntime>, + request: BrowserTabRequest, +) -> Result { + runtime.history_action(request, "forward").await +} + +#[tauri::command] +pub async fn browser_reload( + runtime: State<'_, BrowserRuntime>, + request: BrowserTabRequest, +) -> Result { + runtime.history_action(request, "reload").await +} + +#[tauri::command] +pub async fn browser_observe( + runtime: State<'_, BrowserRuntime>, + request: BrowserObserveRequest, +) -> Result { + runtime.observe(request).await +} + +#[tauri::command] +pub async fn browser_act( + runtime: State<'_, BrowserRuntime>, + request: BrowserActRequest, +) -> Result { + runtime.act(request).await +} diff --git a/apps/desktop/src-tauri/src/commands/mod.rs b/apps/desktop/src-tauri/src/commands/mod.rs index 987d6465..04680b8f 100644 --- a/apps/desktop/src-tauri/src/commands/mod.rs +++ b/apps/desktop/src-tauri/src/commands/mod.rs @@ -2,6 +2,7 @@ //! 命令入口模块。 pub mod autostart; +pub mod browser; pub mod built_in_tools; pub mod clipboard; pub mod database; @@ -76,5 +77,14 @@ pub fn invoke_handler( updater::updater_check_for_updates, updater::updater_download_update, updater::updater_install_update, + browser::browser_status, + browser::browser_start, + browser::browser_stop, + browser::browser_navigate, + browser::browser_back, + browser::browser_forward, + browser::browser_reload, + browser::browser_observe, + browser::browser_act, ] } diff --git a/apps/desktop/src-tauri/src/core/browser/actions.rs b/apps/desktop/src-tauri/src/core/browser/actions.rs new file mode 100644 index 00000000..74b6889a --- /dev/null +++ b/apps/desktop/src-tauri/src/core/browser/actions.rs @@ -0,0 +1,144 @@ +use super::{ + endpoint::validate_stale_navigation_token, + types::{BrowserActOperation, BrowserActRequest, BrowserDomRef}, +}; + +const MAX_ACTION_TEXT_BYTES: usize = 16 * 1024; +const MAX_ACTION_KEY_BYTES: usize = 64; +const MAX_FORM_FIELDS: usize = 50; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct BrowserResolvedFormField { + pub navigation_token: String, + pub selector: String, + pub value: String, +} + +#[derive(Debug, Clone)] +pub struct BrowserResolvedAction<'a> { + pub reference: Option<&'a BrowserDomRef>, + pub form_fields: Vec, + pub page_navigation_token: Option, + pub requires_current_observation: bool, +} + +pub fn action_ref_id(request: &BrowserActRequest) -> Option<&str> { + request.ref_id.as_deref().or(request.target_ref.as_deref()) +} + +pub fn resolve_ref_action<'a>( + request: &BrowserActRequest, + refs: &'a [BrowserDomRef], +) -> Result, String> { + match request.action { + BrowserActOperation::Type => { + let text = request + .text + .as_deref() + .ok_or_else(|| "type requires text".to_string())?; + validate_action_text(text)?; + } + BrowserActOperation::Fill => { + let value = request + .value + .as_deref() + .ok_or_else(|| "fill requires value".to_string())?; + validate_action_text(value)?; + } + BrowserActOperation::PressKey => { + let key = request + .key + .as_deref() + .ok_or_else(|| "press_key requires key".to_string())?; + if key.is_empty() || key.len() > MAX_ACTION_KEY_BYTES { + return Err("press_key key is invalid".to_string()); + } + } + _ => {} + } + + if matches!( + request.action, + BrowserActOperation::Click | BrowserActOperation::Type | BrowserActOperation::Fill + ) && action_ref_id(request).is_none() + { + return Err("Browser click requires an observed ref and navigationToken".to_string()); + } + + if request.action == BrowserActOperation::FillForm { + let fields = request + .fields + .as_ref() + .ok_or_else(|| "fill_form requires fields".to_string())?; + if fields.len() > MAX_FORM_FIELDS { + return Err("fill_form field count exceeds the size limit".to_string()); + } + let mut resolved_fields = Vec::with_capacity(fields.len()); + for field in fields { + validate_action_text(&field.value)?; + let reference = find_ref(refs, &field.ref_id)?; + validate_stale_navigation_token(&field.navigation_token, &reference.navigation_token)?; + if !reference.editable { + return Err("Browser target is not editable".to_string()); + } + resolved_fields.push(BrowserResolvedFormField { + navigation_token: reference.navigation_token.clone(), + selector: reference.selector.clone(), + value: field.value.clone(), + }); + } + return Ok(BrowserResolvedAction { + reference: None, + form_fields: resolved_fields, + page_navigation_token: None, + requires_current_observation: false, + }); + } + + let Some(ref_id) = action_ref_id(request) else { + let requires_current_observation = matches!( + request.action, + BrowserActOperation::PressKey | BrowserActOperation::Scroll + ); + return Ok(BrowserResolvedAction { + reference: None, + form_fields: Vec::new(), + page_navigation_token: request.navigation_token.clone(), + requires_current_observation, + }); + }; + let reference = find_ref(refs, ref_id)?; + let supplied = request + .navigation_token + .as_deref() + .ok_or_else(|| "Browser action requires navigationToken for ref targets".to_string())?; + validate_stale_navigation_token(supplied, &reference.navigation_token)?; + + if matches!( + request.action, + BrowserActOperation::Type | BrowserActOperation::Fill + ) && !reference.editable + { + return Err("Browser target is not editable".to_string()); + } + Ok(BrowserResolvedAction { + reference: Some(reference), + form_fields: Vec::new(), + page_navigation_token: None, + requires_current_observation: false, + }) +} + +fn find_ref<'a>(refs: &'a [BrowserDomRef], ref_id: &str) -> Result<&'a BrowserDomRef, String> { + refs.iter() + .find(|candidate| candidate.ref_id == ref_id) + .ok_or_else(|| format!("Browser ref '{ref_id}' was not found; observe again before acting")) +} + +fn validate_action_text(value: &str) -> Result<(), String> { + if value.len() > MAX_ACTION_TEXT_BYTES { + Err("Browser action text exceeds the size limit".to_string()) + } else { + Ok(()) + } +} diff --git a/apps/desktop/src-tauri/src/core/browser/cdp.rs b/apps/desktop/src-tauri/src/core/browser/cdp.rs new file mode 100644 index 00000000..7507ec2a --- /dev/null +++ b/apps/desktop/src-tauri/src/core/browser/cdp.rs @@ -0,0 +1,1245 @@ +use std::{ + collections::hash_map::DefaultHasher, + collections::BTreeMap, + fs, + hash::{Hash, Hasher}, + path::{Path, PathBuf}, + sync::OnceLock, + time::{Duration, Instant, SystemTime, UNIX_EPOCH}, +}; + +use base64::Engine; +use futures::{SinkExt, StreamExt}; +use serde::Deserialize; +use serde_json::{json, Value}; +use tokio_tungstenite::{ + connect_async_with_config, + tungstenite::{protocol::WebSocketConfig, Message}, +}; + +use super::{ + actions::BrowserResolvedAction, + endpoint::{validate_loopback_websocket, BrowserEndpoint}, + types::{BrowserActOperation, BrowserActResult, BrowserDomRef, BrowserTab}, +}; + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct CdpTarget { + id: String, + #[serde(default)] + title: String, + #[serde(default)] + url: String, + #[serde(default, rename = "type")] + target_type: String, + #[serde(default, rename = "webSocketDebuggerUrl")] + web_socket_debugger_url: Option, +} + +#[derive(Debug)] +pub struct PageSnapshot { + pub url: Option, + pub title: Option, + pub navigation_token: Option, + pub refs: Vec, + pub file_path: Option, + pub mime_type: Option, + pub console: Vec, + pub network: Vec, +} + +const MAX_DIAGNOSTIC_ENTRIES: usize = 30; +const MAX_DIAGNOSTIC_ENTRY_BYTES: usize = 2048; +const MAX_DIAGNOSTIC_TOTAL_BYTES: usize = 16 * 1024; +const MAX_DIAGNOSTIC_ARG_BYTES: usize = 512; +const MAX_DIAGNOSTIC_ARGS: usize = 8; +const MAX_CDP_DIAGNOSTIC_MESSAGE_BYTES: usize = 64 * 1024; +const MAX_CDP_HTTP_BODY_BYTES: usize = 256 * 1024; +const MAX_TRACKED_REQUEST_URLS: usize = 200; +const SCREENSHOT_ARTIFACT_PREFIX: &str = "browser-screenshot-"; +const MAX_SCREENSHOT_ARTIFACTS: usize = 50; +const MAX_SCREENSHOT_ARTIFACT_BYTES: usize = 5 * 1024 * 1024; +const MAX_SCREENSHOT_ARTIFACT_BASE64_BYTES: usize = ((MAX_SCREENSHOT_ARTIFACT_BYTES + 2) / 3) * 4; +const MAX_CDP_COMMAND_MESSAGE_BYTES: usize = MAX_SCREENSHOT_ARTIFACT_BASE64_BYTES + 256 * 1024; +const SCREENSHOT_ARTIFACT_TTL: Duration = Duration::from_secs(60 * 60); +const TRUNCATED_SUFFIX: &str = " ...[truncated]"; + +fn http_client() -> Result { + reqwest::Client::builder() + .no_proxy() + .timeout(Duration::from_secs(4)) + .build() + .map_err(|error| format!("Failed to build browser endpoint client: {error}")) +} + +fn navigation_token(tab: &CdpTarget) -> String { + let mut hasher = DefaultHasher::new(); + tab.id.hash(&mut hasher); + tab.url.hash(&mut hasher); + tab.title.hash(&mut hasher); + format!("nav-{:016x}", hasher.finish()) +} + +impl CdpTarget { + fn to_tab(&self, active: bool) -> BrowserTab { + BrowserTab { + id: self.id.clone(), + url: self.url.clone(), + title: self.title.clone(), + active, + navigation_token: navigation_token(self), + } + } +} + +pub async fn list_targets(endpoint: &BrowserEndpoint) -> Result, String> { + let client = reqwest::Client::builder() + .no_proxy() + .timeout(std::time::Duration::from_secs(4)) + .build() + .map_err(|error| format!("Failed to build browser endpoint client: {error}"))?; + + client + .get(endpoint.list_url()) + .send() + .await + .map_err(|error| format!("Failed to list browser tabs: {error}"))? + .error_for_status() + .map_err(|error| format!("Browser tab listing returned an error: {error}"))? + .bytes() + .await + .map_err(|error| format!("Failed to read browser tab listing: {error}")) + .and_then(|bytes| { + parse_bounded_json_bytes(&bytes, MAX_CDP_HTTP_BODY_BYTES, "browser tab listing") + }) + .map_err(|error| format!("Browser tab listing was invalid: {error}")) +} + +pub async fn list_tabs( + endpoint: &BrowserEndpoint, + active_tab_id: Option<&str>, +) -> Result, String> { + let targets = list_targets(endpoint).await?; + let page_targets: Vec = targets + .into_iter() + .filter(|target| target.target_type == "page") + .collect(); + let active = active_tab_id + .filter(|id| page_targets.iter().any(|target| target.id == *id)) + .map(str::to_string) + .or_else(|| page_targets.first().map(|target| target.id.clone())); + + Ok(page_targets + .iter() + .map(|target| target.to_tab(active.as_deref() == Some(target.id.as_str()))) + .collect()) +} + +pub async fn create_tab(endpoint: &BrowserEndpoint, url: &str) -> Result<(), String> { + let client = http_client()?; + client + .put(endpoint.new_tab_url(url)) + .send() + .await + .map_err(|error| format!("Failed to create browser tab: {error}"))? + .error_for_status() + .map_err(|error| format!("Browser tab creation returned an error: {error}"))?; + Ok(()) +} + +pub async fn observe_page( + endpoint: &BrowserEndpoint, + tab_id: Option<&str>, + include_dom: bool, + include_screenshot: bool, + include_console: bool, + include_network: bool, +) -> Result { + let target = resolve_page_target(endpoint, tab_id).await?; + let navigation_token = navigation_token(&target); + let refs = if include_dom { + let value = call_page( + endpoint, + &target, + "Runtime.evaluate", + json!({ + "expression": DOM_REF_SCRIPT, + "returnByValue": true, + "awaitPromise": true + }), + ) + .await?; + parse_dom_refs(value, &navigation_token)? + } else { + Vec::new() + }; + + let screenshot_base64 = if include_screenshot { + let value = call_page( + endpoint, + &target, + "Page.captureScreenshot", + json!({ + "format": "png", + "captureBeyondViewport": false + }), + ) + .await?; + value + .get("data") + .and_then(Value::as_str) + .map(str::to_string) + } else { + None + }; + let file_path = screenshot_base64 + .as_deref() + .map(write_screenshot_artifact) + .transpose()?; + let diagnostics = + collect_page_diagnostics(endpoint, &target, include_console, include_network).await?; + + Ok(PageSnapshot { + url: Some(target.url), + title: Some(target.title), + navigation_token: Some(navigation_token), + refs, + file_path, + mime_type: if include_screenshot { + Some("image/png".to_string()) + } else { + None + }, + console: diagnostics.console, + network: diagnostics.network, + }) +} + +pub async fn navigate_current_page( + endpoint: &BrowserEndpoint, + tab_id: Option<&str>, + url: &str, +) -> Result<(), String> { + let target = resolve_page_target(endpoint, tab_id).await?; + call_page(endpoint, &target, "Page.navigate", json!({ "url": url })).await?; + Ok(()) +} + +pub async fn history_action( + endpoint: &BrowserEndpoint, + tab_id: Option<&str>, + action: &str, +) -> Result<(), String> { + let target = resolve_page_target(endpoint, tab_id).await?; + let method = match action { + "back" => "history.back()", + "forward" => "history.forward()", + "reload" => "location.reload()", + _ => return Err(format!("Unsupported browser history action: {action}")), + }; + call_page( + endpoint, + &target, + "Runtime.evaluate", + json!({ + "expression": method, + "awaitPromise": false, + "returnByValue": true + }), + ) + .await?; + Ok(()) +} + +pub async fn dispatch_action( + endpoint: &BrowserEndpoint, + tab_id: Option<&str>, + request: &super::types::BrowserActRequest, + resolved_action: BrowserResolvedAction<'_>, +) -> Result { + let target = resolve_page_target(endpoint, tab_id).await?; + match request.action { + BrowserActOperation::Click => click(endpoint, &target, resolved_action.reference).await?, + BrowserActOperation::Type => { + type_text(endpoint, &target, request, resolved_action.reference, false).await? + } + BrowserActOperation::Fill => { + type_text(endpoint, &target, request, resolved_action.reference, true).await? + } + BrowserActOperation::FillForm => fill_form(endpoint, &target, &resolved_action).await?, + BrowserActOperation::PressKey => press_key(endpoint, &target, request).await?, + BrowserActOperation::Scroll => scroll(endpoint, &target, request).await?, + BrowserActOperation::Wait => wait(request).await?, + } + + Ok(BrowserActResult { + ok: true, + action: request.action.as_str().to_string(), + message: Some("Browser action completed".to_string()), + }) +} + +async fn resolve_page_target( + endpoint: &BrowserEndpoint, + tab_id: Option<&str>, +) -> Result { + let targets = list_targets(endpoint).await?; + let mut pages = targets + .into_iter() + .filter(|target| target.target_type == "page"); + if let Some(tab_id) = tab_id { + return pages + .find(|target| target.id == tab_id) + .ok_or_else(|| format!("Browser tab '{tab_id}' was not found")); + } + pages + .next() + .ok_or_else(|| "No browser page target is available".to_string()) +} + +async fn call_page( + endpoint: &BrowserEndpoint, + target: &CdpTarget, + method: &str, + params: Value, +) -> Result { + let ws_url = target + .web_socket_debugger_url + .as_deref() + .ok_or_else(|| "Browser page target did not expose a websocket URL".to_string())?; + validate_loopback_websocket(ws_url, endpoint)?; + let request = json!({ + "id": 1, + "method": method, + "params": params, + }); + + let (mut ws, _) = tokio::time::timeout( + Duration::from_secs(6), + connect_async_with_config( + ws_url, + Some(websocket_config(MAX_CDP_COMMAND_MESSAGE_BYTES)), + false, + ), + ) + .await + .map_err(|_| format!("Timed out connecting to browser page websocket for {method}"))? + .map_err(|error| format!("Failed to connect to browser page websocket: {error}"))?; + ws.send(Message::Text(request.to_string().into())) + .await + .map_err(|error| format!("Failed to send CDP command {method}: {error}"))?; + + loop { + let message = tokio::time::timeout(Duration::from_secs(8), ws.next()) + .await + .map_err(|_| format!("Timed out waiting for CDP command {method}"))? + .ok_or_else(|| format!("Browser websocket closed before {method} completed"))? + .map_err(|error| format!("Failed to read CDP response for {method}: {error}"))?; + + let Some(text) = cdp_message_text(message, MAX_CDP_COMMAND_MESSAGE_BYTES) else { + return Err(format!("CDP response for {method} exceeded the size limit")); + }; + let value: Value = serde_json::from_str(&text) + .map_err(|error| format!("CDP response for {method} was invalid JSON: {error}"))?; + if value.get("id").and_then(Value::as_i64) != Some(1) { + continue; + } + if let Some(error) = value.get("error") { + return Err(format!("CDP command {method} failed: {error}")); + } + return Ok(value.get("result").cloned().unwrap_or(Value::Null)); + } +} + +fn cdp_message_text(message: Message, max_bytes: usize) -> Option { + match message { + Message::Text(text) => { + if text.len() > max_bytes { + None + } else { + Some(text.to_string()) + } + } + Message::Binary(bytes) => { + if bytes.len() > max_bytes { + None + } else { + Some(String::from_utf8_lossy(&bytes).to_string()) + } + } + Message::Close(_) => None, + _ => Some(String::new()), + } +} + +fn parse_bounded_json_bytes Deserialize<'de>>( + bytes: &[u8], + max_bytes: usize, + label: &str, +) -> Result { + if bytes.len() > max_bytes { + return Err(format!("{label} exceeded the size limit")); + } + serde_json::from_slice(bytes).map_err(|error| error.to_string()) +} + +fn websocket_config(max_message_bytes: usize) -> WebSocketConfig { + WebSocketConfig::default() + .max_message_size(Some(max_message_bytes)) + .max_frame_size(Some(max_message_bytes)) +} + +async fn connect_page_websocket( + ws_url: &str, + max_message_bytes: usize, +) -> Result< + tokio_tungstenite::WebSocketStream>, + String, +> { + let (ws, _) = + connect_async_with_config(ws_url, Some(websocket_config(max_message_bytes)), false) + .await + .map_err(|error| format!("Failed to connect to browser page websocket: {error}"))?; + Ok(ws) +} + +fn diagnostic_message_text(message: Message) -> Option { + cdp_message_text(message, MAX_CDP_DIAGNOSTIC_MESSAGE_BYTES).filter(|text| !text.is_empty()) +} + +#[derive(Debug, Default)] +struct PageDiagnostics { + console: Vec, + network: Vec, + total_bytes: usize, +} + +impl PageDiagnostics { + fn push_console(&mut self, line: String) -> bool { + push_diagnostic_line(&mut self.console, &mut self.total_bytes, line) + } + + fn push_network(&mut self, line: String) -> bool { + push_diagnostic_line(&mut self.network, &mut self.total_bytes, line) + } +} + +async fn collect_page_diagnostics( + endpoint: &BrowserEndpoint, + target: &CdpTarget, + include_console: bool, + include_network: bool, +) -> Result { + if !include_console && !include_network { + return Ok(PageDiagnostics::default()); + } + + let ws_url = target + .web_socket_debugger_url + .as_deref() + .ok_or_else(|| "Browser page target did not expose a websocket URL".to_string())?; + validate_loopback_websocket(ws_url, endpoint)?; + let mut ws = tokio::time::timeout( + Duration::from_secs(6), + connect_page_websocket(ws_url, MAX_CDP_DIAGNOSTIC_MESSAGE_BYTES), + ) + .await + .map_err(|_| "Timed out connecting to browser page websocket for diagnostics".to_string())? + .map_err(|error| format!("Failed to connect to browser page websocket: {error}"))?; + + let mut next_id = 100_i64; + if include_console { + send_cdp_command(&mut ws, next_id, "Runtime.enable", json!({})).await?; + next_id += 1; + send_cdp_command(&mut ws, next_id, "Log.enable", json!({})).await?; + next_id += 1; + } + if include_network { + send_cdp_command(&mut ws, next_id, "Network.enable", json!({})).await?; + } + + let mut diagnostics = PageDiagnostics::default(); + let mut request_urls = BTreeMap::::new(); + let deadline = Instant::now() + Duration::from_millis(700); + while Instant::now() < deadline + && diagnostics.total_bytes < MAX_DIAGNOSTIC_TOTAL_BYTES + && ((include_console && diagnostics.console.len() < MAX_DIAGNOSTIC_ENTRIES) + || (include_network && diagnostics.network.len() < MAX_DIAGNOSTIC_ENTRIES)) + { + let remaining = deadline.saturating_duration_since(Instant::now()); + if remaining.is_zero() { + break; + } + let Some(message) = tokio::time::timeout(remaining, ws.next()) + .await + .ok() + .flatten() + else { + break; + }; + let message = + message.map_err(|error| format!("Failed to read CDP diagnostics event: {error}"))?; + if matches!(message, Message::Close(_)) { + break; + } + let Some(text) = diagnostic_message_text(message) else { + continue; + }; + let value: Value = serde_json::from_str(&text) + .map_err(|error| format!("CDP diagnostics event was invalid JSON: {error}"))?; + let Some(method) = value.get("method").and_then(Value::as_str) else { + continue; + }; + let params = value.get("params").cloned().unwrap_or(Value::Null); + match method { + "Runtime.consoleAPICalled" + if include_console && diagnostics.console.len() < MAX_DIAGNOSTIC_ENTRIES => + { + diagnostics.push_console(format_runtime_console_event(¶ms)); + } + "Log.entryAdded" + if include_console && diagnostics.console.len() < MAX_DIAGNOSTIC_ENTRIES => + { + diagnostics.push_console(format_log_entry_event(¶ms)); + } + "Network.requestWillBeSent" + if include_network && request_urls.len() < MAX_TRACKED_REQUEST_URLS => + { + if let (Some(request_id), Some(url)) = ( + params.get("requestId").and_then(Value::as_str), + params + .get("request") + .and_then(|request| request.get("url")) + .and_then(Value::as_str), + ) { + request_urls.insert( + truncate_text(request_id, MAX_DIAGNOSTIC_ARG_BYTES), + truncate_text(url, MAX_DIAGNOSTIC_ENTRY_BYTES), + ); + } + } + "Network.responseReceived" + if include_network && diagnostics.network.len() < MAX_DIAGNOSTIC_ENTRIES => + { + if let Some(line) = format_network_response_event(¶ms) { + diagnostics.push_network(line); + } + } + "Network.loadingFailed" + if include_network && diagnostics.network.len() < MAX_DIAGNOSTIC_ENTRIES => + { + diagnostics.push_network(format_network_failed_event(¶ms, &request_urls)); + } + _ => {} + } + } + + Ok(diagnostics) +} + +async fn send_cdp_command( + ws: &mut tokio_tungstenite::WebSocketStream< + tokio_tungstenite::MaybeTlsStream, + >, + id: i64, + method: &str, + params: Value, +) -> Result<(), String> { + ws.send(Message::Text( + json!({ + "id": id, + "method": method, + "params": params, + }) + .to_string() + .into(), + )) + .await + .map_err(|error| format!("Failed to send CDP command {method}: {error}")) +} + +fn format_runtime_console_event(params: &Value) -> String { + let level = params + .get("type") + .and_then(Value::as_str) + .unwrap_or("console"); + let text = params + .get("args") + .and_then(Value::as_array) + .map(|args| { + args.iter() + .take(MAX_DIAGNOSTIC_ARGS) + .filter_map(|arg| { + arg.get("value") + .and_then(Value::as_str) + .or_else(|| arg.get("description").and_then(Value::as_str)) + .map(|value| truncate_text(value, MAX_DIAGNOSTIC_ARG_BYTES)) + }) + .collect::>() + .join(" ") + }) + .filter(|text| !text.trim().is_empty()) + .unwrap_or_else(|| "console event".to_string()); + format!("console.{level}: {text}") +} + +fn format_log_entry_event(params: &Value) -> String { + let entry = params.get("entry").unwrap_or(params); + let level = entry.get("level").and_then(Value::as_str).unwrap_or("log"); + let text = entry + .get("text") + .and_then(Value::as_str) + .map(|text| truncate_text(text, MAX_DIAGNOSTIC_ENTRY_BYTES)) + .unwrap_or_else(|| "log entry".to_string()); + format!("log.{level}: {text}") +} + +fn format_network_response_event(params: &Value) -> Option { + let response = params.get("response")?; + let status = response.get("status").and_then(Value::as_u64)?; + if status < 400 { + return None; + } + let url = response + .get("url") + .and_then(Value::as_str) + .map(|url| truncate_text(url, MAX_DIAGNOSTIC_ENTRY_BYTES)) + .unwrap_or_else(|| "unknown URL".to_string()); + let status_text = response + .get("statusText") + .and_then(Value::as_str) + .map(|text| truncate_text(text, MAX_DIAGNOSTIC_ARG_BYTES)) + .unwrap_or_default(); + Some(format!("response {status} {status_text}: {url}")) +} + +fn format_network_failed_event(params: &Value, request_urls: &BTreeMap) -> String { + let request_id = params + .get("requestId") + .and_then(Value::as_str) + .map(|id| truncate_text(id, MAX_DIAGNOSTIC_ARG_BYTES)) + .unwrap_or_default(); + let url = request_urls + .get(&request_id) + .map(String::as_str) + .unwrap_or("unknown URL"); + let error_text = params + .get("errorText") + .and_then(Value::as_str) + .map(|text| truncate_text(text, MAX_DIAGNOSTIC_ARG_BYTES)) + .unwrap_or_else(|| "network request failed".to_string()); + format!("failed {error_text}: {url}") +} + +fn push_diagnostic_line(lines: &mut Vec, total_bytes: &mut usize, line: String) -> bool { + if *total_bytes >= MAX_DIAGNOSTIC_TOTAL_BYTES { + return false; + } + + let per_entry = truncate_text(&line, MAX_DIAGNOSTIC_ENTRY_BYTES); + let remaining = MAX_DIAGNOSTIC_TOTAL_BYTES.saturating_sub(*total_bytes); + let bounded = truncate_text(&per_entry, remaining); + *total_bytes = (*total_bytes).saturating_add(bounded.len()); + lines.push(bounded); + true +} + +fn truncate_text(value: &str, max_bytes: usize) -> String { + if value.len() <= max_bytes { + return value.to_string(); + } + + let suffix = if max_bytes >= TRUNCATED_SUFFIX.len() { + TRUNCATED_SUFFIX + } else { + "" + }; + let prefix_limit = max_bytes.saturating_sub(suffix.len()); + let mut end = 0; + for (index, character) in value.char_indices() { + let next = index + character.len_utf8(); + if next > prefix_limit { + break; + } + end = next; + } + + format!("{}{}", &value[..end], suffix) +} + +fn parse_dom_refs(value: Value, navigation_token: &str) -> Result, String> { + let items = value + .get("result") + .and_then(|result| result.get("value")) + .and_then(Value::as_array) + .ok_or_else(|| "Browser DOM snapshot did not return an array".to_string())?; + + Ok(items + .iter() + .enumerate() + .filter_map(|(index, item)| { + let description = item + .get("description") + .and_then(Value::as_str) + .unwrap_or("element") + .to_string(); + let selector = item.get("selector")?.as_str()?.to_string(); + Some(BrowserDomRef { + ref_id: format!("ref-{index}"), + navigation_token: navigation_token.to_string(), + description, + editable: item + .get("editable") + .and_then(Value::as_bool) + .unwrap_or(false), + selector, + x: item.get("x").and_then(Value::as_f64).unwrap_or(0.0), + y: item.get("y").and_then(Value::as_f64).unwrap_or(0.0), + }) + }) + .collect()) +} + +async fn click( + endpoint: &BrowserEndpoint, + target: &CdpTarget, + reference: Option<&BrowserDomRef>, +) -> Result<(), String> { + let reference = reference + .ok_or_else(|| "Browser click requires an observed ref and navigationToken".to_string())?; + let selector = css_string(&reference.selector); + call_page( + endpoint, + target, + "Runtime.evaluate", + json!({ + "expression": format!("(() => {{ const el = document.querySelector({selector}); if (!el) throw new Error('target not found'); el.scrollIntoView({{ block: 'center', inline: 'center' }}); if (typeof el.focus === 'function') el.focus({{ preventScroll: true }}); el.click(); return true; }})()"), + "awaitPromise": true, + "returnByValue": true + }), + ) + .await?; + Ok(()) +} + +async fn type_text( + endpoint: &BrowserEndpoint, + target: &CdpTarget, + request: &super::types::BrowserActRequest, + reference: Option<&BrowserDomRef>, + replace: bool, +) -> Result<(), String> { + let reference = + reference.ok_or_else(|| "Browser type/fill requires an observed ref".to_string())?; + focus_and_verify_editable(endpoint, target, reference).await?; + if replace { + call_page( + endpoint, + target, + "Input.dispatchKeyEvent", + json!({ "type": "keyDown", "modifiers": 2, "windowsVirtualKeyCode": 65, "code": "KeyA", "key": "a" }), + ) + .await?; + call_page( + endpoint, + target, + "Input.dispatchKeyEvent", + json!({ "type": "keyUp", "modifiers": 2, "windowsVirtualKeyCode": 65, "code": "KeyA", "key": "a" }), + ) + .await?; + } + let text = if replace { + request + .value + .as_deref() + .ok_or_else(|| "fill requires value".to_string())? + } else { + request + .text + .as_deref() + .ok_or_else(|| "type requires text".to_string())? + }; + call_page( + endpoint, + target, + "Input.insertText", + json!({ "text": text }), + ) + .await?; + Ok(()) +} + +async fn fill_form( + endpoint: &BrowserEndpoint, + target: &CdpTarget, + resolved_action: &BrowserResolvedAction<'_>, +) -> Result<(), String> { + if resolved_action.form_fields.is_empty() { + return Err("fill_form requires fields".to_string()); + } + for field in &resolved_action.form_fields { + let selector = css_string(&field.selector); + let value = css_string(&field.value); + call_page( + endpoint, + target, + "Runtime.evaluate", + json!({ + "expression": format!( + "(() => {{ const el = document.querySelector({selector}); if (!el) throw new Error('field not found'); el.focus(); el.value = {value}; el.dispatchEvent(new Event('input', {{ bubbles: true }})); el.dispatchEvent(new Event('change', {{ bubbles: true }})); return true; }})()" + ), + "awaitPromise": true, + "returnByValue": true + }), + ) + .await?; + } + Ok(()) +} + +async fn press_key( + endpoint: &BrowserEndpoint, + target: &CdpTarget, + request: &super::types::BrowserActRequest, +) -> Result<(), String> { + let key = request + .key + .as_deref() + .ok_or_else(|| "press_key requires key".to_string())?; + call_page( + endpoint, + target, + "Input.dispatchKeyEvent", + json!({ "type": "keyDown", "key": key }), + ) + .await?; + call_page( + endpoint, + target, + "Input.dispatchKeyEvent", + json!({ "type": "keyUp", "key": key }), + ) + .await?; + Ok(()) +} + +async fn scroll( + endpoint: &BrowserEndpoint, + target: &CdpTarget, + request: &super::types::BrowserActRequest, +) -> Result<(), String> { + let x = request.delta_x.unwrap_or(0); + let y = request.delta_y.unwrap_or(600); + call_page( + endpoint, + target, + "Runtime.evaluate", + json!({ + "expression": format!("window.scrollBy({}, {}); true", x, y), + "returnByValue": true + }), + ) + .await?; + Ok(()) +} + +async fn wait(request: &super::types::BrowserActRequest) -> Result<(), String> { + let timeout_ms = request.timeout_ms.unwrap_or(1000).clamp(100, 120_000); + tokio::time::sleep(Duration::from_millis(timeout_ms)).await; + Ok(()) +} + +async fn focus_and_verify_editable( + endpoint: &BrowserEndpoint, + target: &CdpTarget, + reference: &BrowserDomRef, +) -> Result<(), String> { + let selector = css_string(&reference.selector); + let result = call_page( + endpoint, + target, + "Runtime.evaluate", + json!({ + "expression": format!( + "(() => {{ const el = document.querySelector({selector}); if (!el) return {{ ok: false, reason: 'not_found' }}; el.focus(); const active = document.activeElement === el; const editable = !el.disabled && !el.readOnly && (el.isContentEditable || ['INPUT','TEXTAREA','SELECT'].includes(el.tagName)); return {{ ok: active && editable, active, editable, tag: el.tagName, type: el.type || '' }}; }})()" + ), + "awaitPromise": true, + "returnByValue": true + }), + ) + .await?; + let value = result + .get("result") + .and_then(|result| result.get("value")) + .cloned() + .unwrap_or(Value::Null); + if value.get("ok").and_then(Value::as_bool) == Some(true) { + Ok(()) + } else { + Err("Browser target is not active and editable".to_string()) + } +} + +fn write_screenshot_artifact(base64_png: &str) -> Result { + write_screenshot_artifact_in(base64_png, &screenshot_artifact_directory()) +} + +fn write_screenshot_artifact_in(base64_png: &str, directory: &Path) -> Result { + if base64_png.len() > MAX_SCREENSHOT_ARTIFACT_BASE64_BYTES { + return Err("Browser screenshot artifact is too large".to_string()); + } + let bytes = base64::engine::general_purpose::STANDARD + .decode(base64_png) + .map_err(|error| format!("Browser screenshot was not valid base64: {error}"))?; + if bytes.len() > MAX_SCREENSHOT_ARTIFACT_BYTES { + return Err("Browser screenshot artifact is too large".to_string()); + } + let _ = prune_screenshot_artifacts_in(directory, SystemTime::now()); + fs::create_dir_all(directory).map_err(|error| { + format!("Failed to create browser screenshot artifact directory: {error}") + })?; + harden_artifact_directory(directory)?; + let nonce = SystemTime::now() + .duration_since(UNIX_EPOCH) + .map(|duration| duration.as_nanos()) + .unwrap_or_default(); + let path = directory.join(format!( + "browser-screenshot-{}-{nonce}.png", + std::process::id() + )); + fs::write(&path, bytes) + .map_err(|error| format!("Failed to write browser screenshot artifact: {error}"))?; + harden_artifact_file(&path)?; + let _ = prune_screenshot_artifacts_in(directory, SystemTime::now()); + Ok(path) +} + +pub fn prune_screenshot_artifacts() -> Result<(), String> { + prune_screenshot_artifacts_in(&screenshot_artifact_directory(), SystemTime::now()) +} + +fn screenshot_artifact_directory() -> PathBuf { + static ARTIFACT_DIR: OnceLock = OnceLock::new(); + ARTIFACT_DIR + .get_or_init(|| { + tempfile::Builder::new() + .prefix("touchai-browser-artifacts-") + .tempdir() + .map(|dir| dir.keep()) + .unwrap_or_else(|_| { + std::env::temp_dir().join(format!( + "touchai-browser-artifacts-{}-{}", + std::process::id(), + SystemTime::now() + .duration_since(UNIX_EPOCH) + .map(|duration| duration.as_nanos()) + .unwrap_or_default() + )) + }) + }) + .clone() +} + +#[cfg(unix)] +fn harden_artifact_directory(path: &Path) -> Result<(), String> { + use std::os::unix::fs::PermissionsExt; + + fs::set_permissions(path, fs::Permissions::from_mode(0o700)).map_err(|error| { + format!("Failed to restrict browser screenshot artifact directory permissions: {error}") + }) +} + +#[cfg(not(unix))] +fn harden_artifact_directory(_path: &Path) -> Result<(), String> { + Ok(()) +} + +#[cfg(unix)] +fn harden_artifact_file(path: &Path) -> Result<(), String> { + use std::os::unix::fs::PermissionsExt; + + fs::set_permissions(path, fs::Permissions::from_mode(0o600)).map_err(|error| { + format!("Failed to restrict browser screenshot artifact permissions: {error}") + }) +} + +#[cfg(not(unix))] +fn harden_artifact_file(_path: &Path) -> Result<(), String> { + Ok(()) +} + +fn prune_screenshot_artifacts_in(directory: &Path, now: SystemTime) -> Result<(), String> { + if !directory.exists() { + return Ok(()); + } + + let mut retained = Vec::new(); + let entries = fs::read_dir(directory).map_err(|error| { + format!("Failed to read browser screenshot artifact directory: {error}") + })?; + for entry in entries.flatten() { + let file_name = entry.file_name(); + let file_name = file_name.to_string_lossy(); + if !file_name.starts_with(SCREENSHOT_ARTIFACT_PREFIX) || !file_name.ends_with(".png") { + continue; + } + + let path = entry.path(); + let Ok(metadata) = entry.metadata() else { + continue; + }; + let modified = metadata.modified().unwrap_or(UNIX_EPOCH); + let expired = now + .duration_since(modified) + .map(|age| age > SCREENSHOT_ARTIFACT_TTL) + .unwrap_or(false); + if expired { + let _ = fs::remove_file(path); + } else { + retained.push((modified, path)); + } + } + + if retained.len() > MAX_SCREENSHOT_ARTIFACTS { + retained.sort_by_key(|(modified, _)| *modified); + let excess = retained.len() - MAX_SCREENSHOT_ARTIFACTS; + for (_, path) in retained.into_iter().take(excess) { + let _ = fs::remove_file(path); + } + } + + Ok(()) +} + +fn css_string(value: &str) -> String { + serde_json::to_string(value).unwrap_or_else(|_| "\"\"".to_string()) +} + +const DOM_REF_SCRIPT: &str = r#" +(() => { + const cssEscape = globalThis.CSS && CSS.escape ? CSS.escape.bind(CSS) : (value) => String(value).replace(/[^a-zA-Z0-9_-]/g, '\\$&'); + const selectorFor = (el) => { + if (el.id) return `#${cssEscape(el.id)}`; + const parts = []; + let node = el; + while (node && node.nodeType === Node.ELEMENT_NODE && parts.length < 5) { + let part = node.localName; + if (!part) break; + const testId = node.getAttribute('data-testid'); + if (testId) { + part += `[data-testid="${String(testId).replace(/"/g, '\\"')}"]`; + parts.unshift(part); + break; + } + const parent = node.parentElement; + if (parent) { + const siblings = Array.from(parent.children).filter((child) => child.localName === node.localName); + if (siblings.length > 1) part += `:nth-of-type(${siblings.indexOf(node) + 1})`; + } + parts.unshift(part); + node = parent; + } + return parts.join(' > '); + }; + const labelFor = (el) => { + const clean = (value) => String(value || '').trim().replace(/\s+/g, ' ').slice(0, 120); + const aria = clean(el.getAttribute('aria-label')); + const labelledBy = el.getAttribute('aria-labelledby'); + const labelledText = clean(labelledBy ? labelledBy.split(/\s+/).map((id) => document.getElementById(id)?.innerText || '').join(' ') : ''); + const tag = el.tagName.toLowerCase(); + const state = clean(['input','textarea','select'].includes(tag) ? el.value : (el.innerText || el.textContent || '')); + const base = aria || labelledText || clean(el.placeholder) || clean(el.title) || clean(el.name) || clean(el.id) || clean(el.tagName); + return state && state !== base ? `${base}: ${state}` : base; + }; + return Array.from(document.querySelectorAll('a[href],button,input,textarea,select,[role="button"],[contenteditable="true"],[tabindex]')) + .filter((el) => { + const style = getComputedStyle(el); + return style.visibility !== 'hidden' && style.display !== 'none'; + }) + .slice(0, 100) + .map((el) => { + const rect = el.getBoundingClientRect(); + const tag = el.tagName.toLowerCase(); + const role = el.getAttribute('role') || ''; + const editable = !el.disabled && !el.readOnly && (el.isContentEditable || ['input','textarea','select'].includes(tag)); + return { + selector: selectorFor(el), + description: `${tag}${role ? ` role=${role}` : ''}: ${labelFor(el)}`, + editable, + x: rect.left + rect.width / 2, + y: rect.top + rect.height / 2 + }; + }); +})() +"#; + +#[cfg(test)] +mod tests { + use std::{fs, time::Duration}; + + use base64::Engine; + use serde_json::json; + use tempfile::TempDir; + + use super::*; + + #[test] + fn diagnostic_lines_are_byte_bounded() { + let long_text = "a".repeat(MAX_DIAGNOSTIC_ENTRY_BYTES * 2); + let console = format_runtime_console_event(&json!({ + "type": "error", + "args": [{ "value": long_text }] + })); + let mut diagnostics = PageDiagnostics::default(); + + assert!(diagnostics.push_console(console)); + + assert_eq!(diagnostics.console.len(), 1); + assert!(diagnostics.console[0].len() <= MAX_DIAGNOSTIC_ENTRY_BYTES); + assert!(diagnostics.console[0].contains("[truncated]")); + } + + #[test] + fn oversized_diagnostic_messages_are_dropped_before_json_parse() { + let oversized = "a".repeat(MAX_CDP_DIAGNOSTIC_MESSAGE_BYTES + 1); + + assert_eq!( + diagnostic_message_text(Message::Text(oversized.into())), + None + ); + } + + #[test] + fn bounded_json_bytes_reject_oversized_http_bodies() { + let oversized = vec![b' '; MAX_CDP_HTTP_BODY_BYTES + 1]; + + let error = parse_bounded_json_bytes::>( + &oversized, + MAX_CDP_HTTP_BODY_BYTES, + "browser tab listing", + ) + .expect_err("oversized body"); + + assert_eq!(error, "browser tab listing exceeded the size limit"); + } + + #[test] + fn command_messages_are_byte_bounded_before_json_parse() { + let oversized = "a".repeat(MAX_CDP_COMMAND_MESSAGE_BYTES + 1); + + assert_eq!( + cdp_message_text( + Message::Text(oversized.into()), + MAX_CDP_COMMAND_MESSAGE_BYTES + ), + None + ); + } + + #[test] + fn diagnostic_total_bytes_are_bounded() { + let mut diagnostics = PageDiagnostics::default(); + + for index in 0..100 { + diagnostics.push_network(format!("{index}: {}", "b".repeat(2048))); + } + + assert!(diagnostics.total_bytes <= MAX_DIAGNOSTIC_TOTAL_BYTES); + assert!(diagnostics.network.len() < 100); + } + + #[test] + fn screenshot_artifact_writer_prunes_to_retention_cap() { + let temp = TempDir::new().expect("temp dir"); + let artifact_dir = temp.path().join("touchai-browser-artifacts"); + fs::create_dir_all(&artifact_dir).expect("artifact dir"); + for index in 0..(MAX_SCREENSHOT_ARTIFACTS + 5) { + fs::write( + artifact_dir.join(format!( + "{SCREENSHOT_ARTIFACT_PREFIX}{}-{index}.png", + std::process::id() + )), + [index as u8], + ) + .expect("write artifact"); + } + + prune_screenshot_artifacts_in(&artifact_dir, SystemTime::now()).expect("prune artifacts"); + + let retained = fs::read_dir(&artifact_dir) + .expect("read artifact dir") + .flatten() + .filter(|entry| { + entry + .file_name() + .to_string_lossy() + .starts_with(SCREENSHOT_ARTIFACT_PREFIX) + }) + .count(); + assert_eq!(retained, MAX_SCREENSHOT_ARTIFACTS); + } + + #[test] + fn screenshot_artifact_prune_removes_expired_generated_files_and_preserves_unrelated() { + let temp = TempDir::new().expect("temp dir"); + let artifact_dir = temp.path().join("touchai-browser-artifacts"); + fs::create_dir_all(&artifact_dir).expect("artifact dir"); + let old_path = artifact_dir.join(format!("{SCREENSHOT_ARTIFACT_PREFIX}old.png")); + let fresh_path = artifact_dir.join(format!("{SCREENSHOT_ARTIFACT_PREFIX}fresh.png")); + let unrelated_path = artifact_dir.join("unrelated.png"); + fs::write(&old_path, [1]).expect("write old"); + fs::write(&fresh_path, [2]).expect("write fresh"); + fs::write(&unrelated_path, [3]).expect("write unrelated"); + + let now = SystemTime::now() + SCREENSHOT_ARTIFACT_TTL + Duration::from_secs(1); + prune_screenshot_artifacts_in(&artifact_dir, now).expect("prune artifacts"); + + assert!(!old_path.exists()); + assert!(!fresh_path.exists()); + assert!(unrelated_path.exists()); + } + + #[test] + fn screenshot_artifact_writer_decodes_base64_to_png_file() { + let temp = TempDir::new().expect("temp dir"); + let artifact_dir = temp.path().join("touchai-browser-artifacts"); + + let payload = base64::engine::general_purpose::STANDARD.encode([137, 80, 78, 71]); + let path = write_screenshot_artifact_in(&payload, &artifact_dir).expect("write artifact"); + + assert_eq!( + fs::read(path).expect("read artifact"), + vec![137, 80, 78, 71] + ); + } + + #[test] + fn screenshot_artifact_directory_is_randomized_per_runtime() { + let first = screenshot_artifact_directory(); + let second = screenshot_artifact_directory(); + + assert_eq!(first, second); + assert!(first + .file_name() + .and_then(|name| name.to_str()) + .is_some_and(|name| name.starts_with("touchai-browser-artifacts-"))); + assert_ne!( + first, + std::env::temp_dir().join("touchai-browser-artifacts") + ); + } + + #[test] + fn screenshot_artifact_writer_rejects_oversized_payloads() { + let temp = TempDir::new().expect("temp dir"); + let artifact_dir = temp.path().join("touchai-browser-artifacts"); + let payload = "a".repeat(MAX_SCREENSHOT_ARTIFACT_BASE64_BYTES + 1); + + let error = + write_screenshot_artifact_in(&payload, &artifact_dir).expect_err("oversized payload"); + + assert_eq!(error, "Browser screenshot artifact is too large"); + assert!(!artifact_dir.exists()); + } +} diff --git a/apps/desktop/src-tauri/src/core/browser/endpoint.rs b/apps/desktop/src-tauri/src/core/browser/endpoint.rs new file mode 100644 index 00000000..d1043ecc --- /dev/null +++ b/apps/desktop/src-tauri/src/core/browser/endpoint.rs @@ -0,0 +1,166 @@ +use std::{fmt, time::Duration}; + +use reqwest::Url; + +use super::types::{BrowserEndpointSnapshot, CdpVersionResponse}; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct BrowserEndpoint { + pub host: String, + pub port: u16, +} + +impl BrowserEndpoint { + pub fn origin(&self) -> String { + let host = if self.host.contains(':') && !self.host.starts_with('[') { + format!("[{}]", self.host) + } else { + self.host.clone() + }; + format!("http://{}:{}", host, self.port) + } + + pub fn version_url(&self) -> String { + format!("{}/json/version", self.origin()) + } + + pub fn list_url(&self) -> String { + format!("{}/json/list", self.origin()) + } + + pub fn new_tab_url(&self, url: &str) -> String { + format!( + "{}/json/new?{}", + self.origin(), + percent_encode_query_value(url) + ) + } + + pub fn snapshot(&self) -> BrowserEndpointSnapshot { + BrowserEndpointSnapshot { + host: self.host.clone(), + port: self.port, + version_url: self.version_url(), + } + } +} + +impl fmt::Display for BrowserEndpoint { + fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(formatter, "{}", self.origin()) + } +} + +pub fn parse_loopback_endpoint(raw: &str) -> Result { + let url = Url::parse(raw).map_err(|_| "Browser endpoint must be a valid URL".to_string())?; + + if url.scheme() != "http" { + return Err("Browser endpoint must use http".to_string()); + } + + if !url.username().is_empty() || url.password().is_some() { + return Err("Browser endpoint must not include credentials".to_string()); + } + + if url.path() != "/" || url.query().is_some() || url.fragment().is_some() { + return Err( + "Browser endpoint must be an origin without path, query, or fragment".to_string(), + ); + } + + let host = url + .host_str() + .ok_or_else(|| "Browser endpoint must include a host".to_string())?; + let host = normalize_loopback_host(host); + if !is_allowed_loopback_host(&host) { + return Err("Browser endpoint must use a loopback host".to_string()); + } + + let port = url + .port() + .ok_or_else(|| "Browser endpoint must include an explicit port".to_string())?; + + Ok(BrowserEndpoint { host, port }) +} + +pub async fn validate_cdp_version_endpoint( + endpoint: &BrowserEndpoint, +) -> Result { + let client = reqwest::Client::builder() + .no_proxy() + .timeout(Duration::from_secs(4)) + .build() + .map_err(|error| format!("Failed to build browser endpoint client: {error}"))?; + + let version = client + .get(endpoint.version_url()) + .send() + .await + .map_err(|error| format!("Failed to query browser endpoint: {error}"))? + .error_for_status() + .map_err(|error| format!("Browser endpoint returned an error: {error}"))? + .json::() + .await + .map_err(|error| format!("Browser endpoint did not return valid /json/version: {error}"))?; + + let ws_url = version + .web_socket_debugger_url + .as_deref() + .ok_or_else(|| "Browser endpoint did not expose webSocketDebuggerUrl".to_string())?; + validate_loopback_websocket(ws_url, endpoint)?; + Ok(version) +} + +pub fn validate_loopback_websocket(raw: &str, endpoint: &BrowserEndpoint) -> Result<(), String> { + let url = Url::parse(raw).map_err(|_| "CDP websocket URL is invalid".to_string())?; + if url.scheme() != "ws" { + return Err("CDP websocket URL must use ws".to_string()); + } + if !url.username().is_empty() || url.password().is_some() { + return Err("CDP websocket URL must not include credentials".to_string()); + } + let host = url + .host_str() + .ok_or_else(|| "CDP websocket URL must include a host".to_string())?; + let host = normalize_loopback_host(host); + if !is_allowed_loopback_host(&host) || url.port() != Some(endpoint.port) { + return Err( + "CDP websocket URL must use the same loopback port on a loopback host".to_string(), + ); + } + Ok(()) +} + +pub fn validate_stale_navigation_token(supplied: &str, current: &str) -> Result<(), String> { + if supplied == current { + Ok(()) + } else { + Err("Browser ref is stale; observe again before acting".to_string()) + } +} + +fn is_allowed_loopback_host(host: &str) -> bool { + matches!( + host.to_ascii_lowercase().as_str(), + "127.0.0.1" | "localhost" | "::1" + ) +} + +fn normalize_loopback_host(host: &str) -> String { + host.trim_start_matches('[') + .trim_end_matches(']') + .to_string() +} + +fn percent_encode_query_value(value: &str) -> String { + let mut output = String::new(); + for byte in value.bytes() { + match byte { + b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'-' | b'_' | b'.' | b'~' => { + output.push(byte as char); + } + _ => output.push_str(&format!("%{byte:02X}")), + } + } + output +} diff --git a/apps/desktop/src-tauri/src/core/browser/mod.rs b/apps/desktop/src-tauri/src/core/browser/mod.rs new file mode 100644 index 00000000..7cc8002c --- /dev/null +++ b/apps/desktop/src-tauri/src/core/browser/mod.rs @@ -0,0 +1,12 @@ +//! Native Chromium CDP browser runtime. + +pub mod actions; +pub mod cdp; +pub mod endpoint; +pub mod process; +pub mod runtime; +pub mod snapshot; +pub mod types; +pub mod url_policy; + +pub use runtime::BrowserRuntime; diff --git a/apps/desktop/src-tauri/src/core/browser/process.rs b/apps/desktop/src-tauri/src/core/browser/process.rs new file mode 100644 index 00000000..f10f3bcc --- /dev/null +++ b/apps/desktop/src-tauri/src/core/browser/process.rs @@ -0,0 +1,303 @@ +use std::{ + env, fs, + path::{Path, PathBuf}, + process::{Child, Command, Stdio}, + time::{Duration, Instant}, +}; + +use tempfile::TempDir; + +use super::{ + endpoint::BrowserEndpoint, + types::{BrowserDescriptor, BrowserStartRequest}, + url_policy::validate_browser_url, +}; + +#[derive(Debug)] +pub struct ManagedBrowserProcess { + child: Child, + profile_dir: Option, +} + +impl Drop for ManagedBrowserProcess { + fn drop(&mut self) { + let child_is_running = self + .child + .try_wait() + .map(|status| status.is_none()) + .unwrap_or(true); + if child_is_running { + kill_process_tree(self.child.id()); + let _ = self.child.kill(); + } + let _ = self.child.wait(); + if let Some(profile_dir) = self.profile_dir.take() { + remove_profile_dir_with_retry(profile_dir); + } + } +} + +impl ManagedBrowserProcess { + fn profile_path(&self) -> &Path { + self.profile_dir + .as_ref() + .expect("managed browser profile dir") + .path() + } +} + +pub fn discover_installed_browsers() -> Vec { + let mut browsers = Vec::new(); + for (id, name, path) in candidate_browser_paths() { + if path.is_file() { + browsers.push(BrowserDescriptor { + id: id.to_string(), + name: name.to_string(), + path, + }); + } + } + browsers +} + +pub fn launch_managed_browser( + request: BrowserStartRequest, +) -> Result<(BrowserEndpoint, ManagedBrowserProcess), String> { + let startup_url = request + .startup_url + .as_deref() + .map(validate_browser_url) + .transpose()? + .unwrap_or_else(|| "about:blank".to_string()); + let browsers = discover_installed_browsers(); + let browser_path = select_browser_path(&browsers, request.browser_id.as_deref())?; + + let user_data_dir = tempfile::Builder::new() + .prefix(&format!("touchai-browser-{}-", std::process::id())) + .tempdir_in(env::temp_dir()) + .map_err(|error| format!("Failed to create browser profile directory: {error}"))?; + + let mut command = Command::new(&browser_path); + command + .arg(format!("--remote-debugging-address={}", "127.0.0.1")) + .arg("--remote-debugging-port=0") + .arg(format!( + "--user-data-dir={}", + user_data_dir.path().display() + )) + .arg("--no-first-run") + .arg("--no-default-browser-check") + .arg("--disable-background-networking") + .arg("--window-size=1280,900") + .arg(startup_url) + .stdin(Stdio::null()) + .stdout(Stdio::null()) + .stderr(Stdio::null()); + configure_child_group(&mut command); + + let child = command + .spawn() + .map_err(|error| format!("Failed to launch browser: {error}"))?; + let process = ManagedBrowserProcess { + child, + profile_dir: Some(user_data_dir), + }; + let endpoint = match wait_for_devtools_active_port(process.profile_path()) { + Ok(endpoint) => endpoint, + Err(error) => { + drop(process); + return Err(error); + } + }; + + Ok((endpoint, process)) +} + +fn select_browser_path( + browsers: &[BrowserDescriptor], + browser_id: Option<&str>, +) -> Result { + match browser_id { + Some(id) => browsers + .iter() + .find(|browser| browser.id == id) + .map(|browser| browser.path.clone()) + .ok_or_else(|| format!("Supported browser '{id}' was not found")), + None => browsers + .first() + .map(|browser| browser.path.clone()) + .ok_or_else(|| "No installed Chrome or Edge browser was found".to_string()), + } +} + +fn wait_for_devtools_active_port(profile_dir: &Path) -> Result { + let deadline = Instant::now() + Duration::from_secs(8); + let mut last_error = None; + while Instant::now() < deadline { + match read_devtools_active_port(profile_dir) { + Ok(endpoint) => return Ok(endpoint), + Err(error) => { + last_error = Some(error); + std::thread::sleep(Duration::from_millis(50)); + } + } + } + Err(last_error.unwrap_or_else(|| "Browser did not publish DevToolsActivePort".to_string())) +} + +fn read_devtools_active_port(profile_dir: &Path) -> Result { + let path = profile_dir.join("DevToolsActivePort"); + let contents = fs::read_to_string(&path) + .map_err(|error| format!("Failed to read DevToolsActivePort: {error}"))?; + let port_line = contents + .lines() + .next() + .ok_or_else(|| "DevToolsActivePort did not contain a port".to_string())?; + let port = port_line + .parse::() + .map_err(|error| format!("DevToolsActivePort contained an invalid port: {error}"))?; + Ok(BrowserEndpoint { + host: "127.0.0.1".to_string(), + port, + }) +} + +fn remove_profile_dir_with_retry(profile_dir: TempDir) { + let path = profile_dir.path().to_path_buf(); + drop(profile_dir); + let deadline = Instant::now() + Duration::from_secs(8); + while path.exists() && Instant::now() < deadline { + let _ = fs::remove_dir_all(&path); + if !path.exists() { + break; + } + std::thread::sleep(Duration::from_millis(100)); + } +} + +fn candidate_browser_paths() -> Vec<(&'static str, &'static str, PathBuf)> { + let mut paths = Vec::new(); + + #[cfg(windows)] + { + for root_var in ["PROGRAMFILES", "PROGRAMFILES(X86)", "LOCALAPPDATA"] { + if let Some(root) = env::var_os(root_var) { + let root = PathBuf::from(root); + paths.push(( + "chrome", + "Google Chrome", + root.join("Google\\Chrome\\Application\\chrome.exe"), + )); + paths.push(( + "edge", + "Microsoft Edge", + root.join("Microsoft\\Edge\\Application\\msedge.exe"), + )); + } + } + } + + #[cfg(target_os = "macos")] + { + paths.push(( + "chrome", + "Google Chrome", + PathBuf::from("/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"), + )); + paths.push(( + "edge", + "Microsoft Edge", + PathBuf::from("/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge"), + )); + } + + #[cfg(target_os = "linux")] + { + for path in [ + "/usr/bin/google-chrome", + "/usr/bin/google-chrome-stable", + "/usr/bin/chromium", + "/usr/bin/chromium-browser", + ] { + paths.push(("chrome", "Google Chrome", PathBuf::from(path))); + } + paths.push(( + "edge", + "Microsoft Edge", + PathBuf::from("/usr/bin/microsoft-edge"), + )); + } + + paths +} + +#[cfg(windows)] +fn kill_process_tree(pid: u32) { + let _ = Command::new("taskkill") + .args(["/PID", &pid.to_string(), "/T", "/F"]) + .stdin(Stdio::null()) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .status(); +} + +#[cfg(not(windows))] +fn kill_process_tree(pid: u32) { + let pid = pid.to_string(); + let process_group = format!("-{pid}"); + let _ = Command::new("kill") + .args(["-TERM", &process_group]) + .stdin(Stdio::null()) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .status(); + let _ = Command::new("kill") + .args(["-KILL", &process_group]) + .stdin(Stdio::null()) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .status(); +} + +#[cfg(windows)] +fn configure_child_group(_command: &mut Command) {} + +#[cfg(unix)] +fn configure_child_group(command: &mut Command) { + use std::os::unix::process::CommandExt; + + command.process_group(0); +} + +#[cfg(test)] +mod tests { + use std::fs; + + use tempfile::TempDir; + + use super::*; + + #[test] + fn reads_devtools_active_port_from_owned_profile() { + let profile = TempDir::new().expect("temp profile"); + fs::write( + profile.path().join("DevToolsActivePort"), + "54321\n/devtools/browser/test\n", + ) + .expect("write DevToolsActivePort"); + + let endpoint = read_devtools_active_port(profile.path()).expect("endpoint"); + + assert_eq!(endpoint.host, "127.0.0.1"); + assert_eq!(endpoint.port, 54321); + } + + #[test] + fn rejects_invalid_devtools_active_port_content() { + let profile = TempDir::new().expect("temp profile"); + fs::write(profile.path().join("DevToolsActivePort"), "not-a-port\n") + .expect("write DevToolsActivePort"); + + assert!(read_devtools_active_port(profile.path()).is_err()); + } +} diff --git a/apps/desktop/src-tauri/src/core/browser/runtime.rs b/apps/desktop/src-tauri/src/core/browser/runtime.rs new file mode 100644 index 00000000..cbce6f49 --- /dev/null +++ b/apps/desktop/src-tauri/src/core/browser/runtime.rs @@ -0,0 +1,695 @@ +use std::sync::{Arc, Mutex}; + +use super::{ + actions, cdp, + endpoint::{validate_cdp_version_endpoint, validate_stale_navigation_token, BrowserEndpoint}, + process::{self, ManagedBrowserProcess}, + snapshot, + types::{ + BrowserActRequest, BrowserActResult, BrowserDomRef, BrowserNavigateRequest, + BrowserObservation, BrowserObserveOperation, BrowserObserveRequest, BrowserStartRequest, + BrowserStatus, BrowserStatusKind, BrowserTab, BrowserTabRequest, + }, + url_policy::validate_browser_url, +}; + +#[derive(Clone, Default)] +pub struct BrowserRuntime { + inner: Arc>, +} + +#[derive(Default)] +struct BrowserState { + lifecycle_generation: u64, + status: BrowserStatusKind, + managed: bool, + endpoint: Option, + active_tab_id: Option, + tabs: Vec, + refs: Vec, + observed_tab_id: Option, + observed_page_token: Option, + observed_observation_token: Option, + observation_sequence: u64, + process: Option, + error: Option, +} + +impl BrowserRuntime { + pub fn new() -> Self { + Self::default() + } + + pub fn status(&self) -> BrowserStatus { + self.inner.lock().expect("browser runtime lock").status() + } + + pub fn stop(&self) -> BrowserStatus { + let process = { + let mut state = self.inner.lock().expect("browser runtime lock"); + state.lifecycle_generation = state.lifecycle_generation.saturating_add(1); + let process = state.process.take(); + state.status = BrowserStatusKind::Idle; + state.managed = false; + state.endpoint = None; + state.active_tab_id = None; + state.tabs.clear(); + state.refs.clear(); + state.observed_tab_id = None; + state.observed_page_token = None; + state.observed_observation_token = None; + state.error = None; + process + }; + drop(process); + let _ = cdp::prune_screenshot_artifacts(); + self.status() + } + + pub async fn start(&self, request: BrowserStartRequest) -> Result { + let generation = self.begin_start(); + match process::launch_managed_browser(request) { + Ok((endpoint, process)) => { + if !self.is_current_lifecycle_generation(generation) { + drop(process); + return Ok(self.status()); + } + if let Err(error) = wait_for_endpoint(&endpoint).await { + if self.is_current_lifecycle_generation(generation) { + self.set_error(error.clone()); + } + return Err(error); + } + if !self.is_current_lifecycle_generation(generation) { + drop(process); + return Ok(self.status()); + } + let stale_process = { + let mut state = self.inner.lock().expect("browser runtime lock"); + if state.lifecycle_generation != generation { + Some(process) + } else { + state.endpoint = Some(endpoint); + state.process = Some(process); + state.managed = true; + state.status = BrowserStatusKind::Connected; + state.error = None; + state.refs.clear(); + state.observed_tab_id = None; + state.observed_page_token = None; + state.observed_observation_token = None; + None + } + }; + if let Some(process) = stale_process { + drop(process); + return Ok(self.status()); + } + let _ = self.refresh_tabs().await; + Ok(self.status()) + } + Err(error) => { + if self.is_current_lifecycle_generation(generation) { + self.set_error(error.clone()); + } + Err(error) + } + } + } + + pub async fn refresh_tabs(&self) -> Result { + let (generation, endpoint, current_active_tab_id) = self.connected_snapshot()?; + let tabs = cdp::list_tabs(&endpoint, current_active_tab_id.as_deref()).await?; + let active_tab_id = tabs + .iter() + .find(|tab| tab.active) + .or_else(|| tabs.first()) + .map(|tab| tab.id.clone()); + { + let mut state = self.inner.lock().expect("browser runtime lock"); + if state.lifecycle_generation != generation + || state.endpoint.as_ref() != Some(&endpoint) + || state.status != BrowserStatusKind::Connected + { + return Ok(state.status()); + } + state.active_tab_id = active_tab_id; + state.tabs = tabs; + state.status = BrowserStatusKind::Connected; + state.error = None; + } + Ok(self.status()) + } + + pub async fn navigate(&self, request: BrowserNavigateRequest) -> Result { + let url = validate_browser_url(&request.url)?; + let endpoint = self.endpoint()?; + let fallback_active = self.active_tab_id(); + let tab_id = request.tab_id.as_deref().or(fallback_active.as_deref()); + if tab_id.is_some() { + cdp::navigate_current_page(&endpoint, tab_id, &url).await?; + } else { + cdp::create_tab(&endpoint, &url).await?; + } + self.clear_observed_refs(); + self.refresh_tabs().await + } + + pub async fn history_action( + &self, + request: BrowserTabRequest, + action: &str, + ) -> Result { + self.ensure_connected()?; + let endpoint = self.endpoint()?; + let fallback_active = self.active_tab_id(); + let tab_id = request.tab_id.as_deref().or(fallback_active.as_deref()); + cdp::history_action(&endpoint, tab_id, action).await?; + self.clear_observed_refs(); + self.refresh_tabs().await + } + + pub async fn observe( + &self, + request: BrowserObserveRequest, + ) -> Result { + self.ensure_connected()?; + let (generation, endpoint, fallback_active) = self.connected_snapshot()?; + let include_screenshot = request.operation == BrowserObserveOperation::Screenshot; + let include_dom = request.operation == BrowserObserveOperation::Snapshot; + let include_console = request.include_console.unwrap_or(false); + let include_network = request.include_network.unwrap_or(false); + let mut page = cdp::observe_page( + &endpoint, + request.tab_id.as_deref().or(fallback_active.as_deref()), + include_dom, + include_screenshot, + include_console, + include_network, + ) + .await?; + let status = self.refresh_tabs().await?; + if !self.is_current_connected_generation(generation, &endpoint) { + return Err("Browser is not connected".to_string()); + } + let observed_tab_id = current_action_tab( + &status.tabs, + request.tab_id.as_deref().or(fallback_active.as_deref()), + ) + .map(|tab| tab.id.clone()); + let observed_page_token = page.navigation_token.clone(); + let observation_token = self.next_observation_token(); + page.navigation_token = Some(observation_token.clone()); + for reference in &mut page.refs { + reference.navigation_token = observation_token.clone(); + } + { + let mut state = self.inner.lock().expect("browser runtime lock"); + state.refs = page.refs.clone(); + state.observed_tab_id = observed_tab_id; + state.observed_page_token = observed_page_token; + state.observed_observation_token = Some(observation_token); + } + Ok(snapshot::page_observation(status, page)) + } + + pub async fn act(&self, request: BrowserActRequest) -> Result { + self.ensure_connected()?; + let (generation, endpoint, fallback_active) = self.connected_snapshot()?; + let refs = self + .inner + .lock() + .expect("browser runtime lock") + .refs + .clone(); + let resolved_action = actions::resolve_ref_action(&request, &refs)?; + let target_tab_id = request + .tab_id + .as_deref() + .or(fallback_active.as_deref()) + .map(str::to_string); + let status = self.refresh_tabs().await?; + if !self.is_current_connected_generation(generation, &endpoint) { + return Err("Browser is not connected".to_string()); + } + validate_current_observation( + &resolved_action, + &status.tabs, + target_tab_id.as_deref().or(status.active_tab_id.as_deref()), + &self.observation_guard(), + )?; + cdp::dispatch_action( + &endpoint, + request.tab_id.as_deref().or(fallback_active.as_deref()), + &request, + resolved_action, + ) + .await + } + + fn connected_snapshot(&self) -> Result<(u64, BrowserEndpoint, Option), String> { + let state = self.inner.lock().expect("browser runtime lock"); + if state.status != BrowserStatusKind::Connected { + return Err("Browser is not connected".to_string()); + } + let endpoint = state + .endpoint + .clone() + .ok_or_else(|| "Browser is not connected".to_string())?; + Ok(( + state.lifecycle_generation, + endpoint, + state.active_tab_id.clone(), + )) + } + + fn endpoint(&self) -> Result { + let state = self.inner.lock().expect("browser runtime lock"); + if state.status != BrowserStatusKind::Connected { + return Err("Browser is not connected".to_string()); + } + + state + .endpoint + .clone() + .ok_or_else(|| "Browser is not connected".to_string()) + } + + fn ensure_connected(&self) -> Result<(), String> { + if self.inner.lock().expect("browser runtime lock").status == BrowserStatusKind::Connected { + Ok(()) + } else { + Err("Browser is not connected".to_string()) + } + } + + fn active_tab_id(&self) -> Option { + self.inner + .lock() + .expect("browser runtime lock") + .active_tab_id + .clone() + } + + fn begin_start(&self) -> u64 { + self.begin_lifecycle_transition() + } + + fn begin_lifecycle_transition(&self) -> u64 { + let mut state = self.inner.lock().expect("browser runtime lock"); + state.lifecycle_generation = state.lifecycle_generation.saturating_add(1); + state.status = BrowserStatusKind::Starting; + state.error = None; + state.endpoint = None; + state.active_tab_id = None; + state.tabs.clear(); + state.refs.clear(); + state.observed_tab_id = None; + state.observed_page_token = None; + state.observed_observation_token = None; + state.lifecycle_generation + } + + fn is_current_lifecycle_generation(&self, generation: u64) -> bool { + self.inner + .lock() + .expect("browser runtime lock") + .lifecycle_generation + == generation + } + + fn is_current_connected_generation(&self, generation: u64, endpoint: &BrowserEndpoint) -> bool { + let state = self.inner.lock().expect("browser runtime lock"); + state.lifecycle_generation == generation + && state.endpoint.as_ref() == Some(endpoint) + && state.status == BrowserStatusKind::Connected + } + + fn set_error(&self, error: String) { + let process = { + let mut state = self.inner.lock().expect("browser runtime lock"); + state.status = BrowserStatusKind::Error; + state.error = Some(redact_browser_endpoint_urls(&error)); + state.endpoint = None; + state.tabs.clear(); + state.active_tab_id = None; + let process = state.process.take(); + state.managed = false; + state.refs.clear(); + state.observed_tab_id = None; + state.observed_page_token = None; + state.observed_observation_token = None; + process + }; + drop(process); + } + + fn clear_observed_refs(&self) { + let mut state = self.inner.lock().expect("browser runtime lock"); + state.refs.clear(); + state.observed_tab_id = None; + state.observed_page_token = None; + state.observed_observation_token = None; + } + + fn next_observation_token(&self) -> String { + let mut state = self.inner.lock().expect("browser runtime lock"); + state.observation_sequence = state.observation_sequence.saturating_add(1); + format!("obs-{}", state.observation_sequence) + } + + fn observation_guard(&self) -> ObservationGuard { + let state = self.inner.lock().expect("browser runtime lock"); + ObservationGuard { + tab_id: state.observed_tab_id.clone(), + page_token: state.observed_page_token.clone(), + observation_token: state.observed_observation_token.clone(), + } + } + + #[cfg(test)] + fn begin_start_for_tests(&self) -> u64 { + self.begin_start() + } + + #[cfg(test)] + fn set_connected_endpoint_for_tests(&self, endpoint: BrowserEndpoint) { + let mut state = self.inner.lock().expect("browser runtime lock"); + state.endpoint = Some(endpoint); + state.status = BrowserStatusKind::Connected; + state.managed = false; + } + + #[cfg(test)] + fn endpoint_for_tests(&self) -> Result { + self.endpoint() + } + + #[cfg(test)] + fn is_current_lifecycle_generation_for_tests(&self, generation: u64) -> bool { + self.is_current_lifecycle_generation(generation) + } +} + +impl BrowserState { + fn status(&self) -> BrowserStatus { + let connected = self.status == BrowserStatusKind::Connected; + BrowserStatus { + status: self.status, + managed: self.managed, + active_tab_id: connected.then(|| self.active_tab_id.clone()).flatten(), + tabs: if connected { + self.tabs.clone() + } else { + Vec::new() + }, + error: self.error.clone(), + } + } +} + +async fn wait_for_endpoint(endpoint: &BrowserEndpoint) -> Result<(), String> { + let deadline = std::time::Instant::now() + std::time::Duration::from_secs(8); + let mut last_error = None; + while std::time::Instant::now() < deadline { + match validate_cdp_version_endpoint(endpoint).await { + Ok(_) => return Ok(()), + Err(error) => { + last_error = Some(error); + tokio::time::sleep(std::time::Duration::from_millis(150)).await; + } + } + } + Err(last_error.unwrap_or_else(|| "Browser endpoint did not become ready".to_string())) +} + +#[derive(Debug, Clone)] +struct ObservationGuard { + tab_id: Option, + page_token: Option, + observation_token: Option, +} + +fn validate_current_observation( + resolved_action: &actions::BrowserResolvedAction<'_>, + tabs: &[BrowserTab], + tab_id: Option<&str>, + guard: &ObservationGuard, +) -> Result<(), String> { + let Some(expected_token) = action_navigation_token(resolved_action) else { + if resolved_action.requires_current_observation { + let supplied = resolved_action + .page_navigation_token + .as_deref() + .ok_or_else(|| { + "Browser action requires navigationToken from browser_observe".to_string() + })?; + let Some(tab) = current_action_tab(tabs, tab_id) else { + return Ok(()); + }; + if guard.tab_id.as_deref() != Some(tab.id.as_str()) { + return Err("Browser ref is stale; observe again before acting".to_string()); + } + validate_stale_navigation_token( + guard.page_token.as_deref().unwrap_or_default(), + &tab.navigation_token, + )?; + return if guard.observation_token.as_deref() == Some(supplied) { + Ok(()) + } else { + Err("Browser ref is stale; observe again before acting".to_string()) + }; + } + return Ok(()); + }; + let Some(tab) = current_action_tab(tabs, tab_id) else { + return Ok(()); + }; + if guard.tab_id.as_deref() != Some(tab.id.as_str()) { + return Err("Browser ref is stale; observe again before acting".to_string()); + } + validate_stale_navigation_token( + guard.page_token.as_deref().unwrap_or_default(), + &tab.navigation_token, + )?; + if resolved_action + .reference + .is_some_and(|reference| reference.navigation_token == expected_token) + || resolved_action + .form_fields + .iter() + .any(|field| field.navigation_token == expected_token) + { + Ok(()) + } else { + Err("Browser ref is stale; observe again before acting".to_string()) + } +} + +fn action_navigation_token<'a>( + resolved_action: &'a actions::BrowserResolvedAction<'_>, +) -> Option<&'a str> { + resolved_action + .reference + .map(|reference| reference.navigation_token.as_str()) + .or_else(|| { + resolved_action + .form_fields + .first() + .map(|field| field.navigation_token.as_str()) + }) +} + +fn current_action_tab<'a>(tabs: &'a [BrowserTab], tab_id: Option<&str>) -> Option<&'a BrowserTab> { + tab_id + .and_then(|id| tabs.iter().find(|tab| tab.id == id)) + .or_else(|| tabs.iter().find(|tab| tab.active)) + .or_else(|| tabs.first()) +} + +fn redact_browser_endpoint_urls(input: &str) -> String { + let mut output = input.to_string(); + for prefix in ["http://127.0.0.1:", "http://localhost:", "http://[::1]:"] { + output = redact_urls_with_prefix(&output, prefix); + } + output +} + +fn redact_urls_with_prefix(input: &str, prefix: &str) -> String { + let mut output = String::with_capacity(input.len()); + let mut remaining = input; + while let Some(index) = remaining.find(prefix) { + output.push_str(&remaining[..index]); + output.push_str("[browser endpoint]"); + let after_prefix = &remaining[index + prefix.len()..]; + let consumed = after_prefix + .char_indices() + .find(|(_, character)| { + character.is_whitespace() || matches!(character, '"' | '\'' | ')' | ']' | '}') + }) + .map(|(offset, _)| offset) + .unwrap_or(after_prefix.len()); + remaining = &after_prefix[consumed..]; + } + output.push_str(remaining); + output +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn stop_invalidates_in_flight_start_generation() { + let runtime = BrowserRuntime::new(); + + let generation = runtime.begin_start_for_tests(); + runtime.stop(); + + assert!(!runtime.is_current_lifecycle_generation_for_tests(generation)); + assert_eq!(runtime.status().status, BrowserStatusKind::Idle); + } + + #[test] + fn endpoint_is_not_available_during_lifecycle_transition() { + let runtime = BrowserRuntime::new(); + runtime.set_connected_endpoint_for_tests(BrowserEndpoint { + host: "127.0.0.1".to_string(), + port: 9222, + }); + + runtime.begin_start_for_tests(); + + assert_eq!( + runtime + .endpoint_for_tests() + .expect_err("starting state must not expose stale endpoint"), + "Browser is not connected" + ); + } + + #[test] + fn status_does_not_expose_endpoint_or_tabs_during_lifecycle_transition() { + let runtime = BrowserRuntime::new(); + runtime.set_connected_endpoint_for_tests(BrowserEndpoint { + host: "127.0.0.1".to_string(), + port: 9222, + }); + + runtime.begin_start_for_tests(); + let status = runtime.status(); + + assert_eq!(status.status, BrowserStatusKind::Starting); + assert_eq!(status.active_tab_id, None); + assert!(status.tabs.is_empty()); + } + + #[test] + fn observe_operation_deserialization_is_fail_closed() { + let missing_operation_error = + serde_json::from_value::(serde_json::json!({})) + .expect_err("missing operation"); + assert!( + missing_operation_error + .to_string() + .contains("missing field `operation`"), + "unexpected missing operation error: {missing_operation_error}" + ); + + let unsupported_operation_error = + serde_json::from_value::(serde_json::json!({ + "operation": "console" + })) + .expect_err("unsupported operation"); + assert!( + unsupported_operation_error + .to_string() + .contains("unknown variant `console`"), + "unexpected unsupported operation error: {unsupported_operation_error}" + ); + + assert!( + serde_json::from_value::(serde_json::json!({ + "operation": "state" + })) + .is_ok() + ); + assert!( + serde_json::from_value::(serde_json::json!({ + "operation": "snapshot" + })) + .is_ok() + ); + assert!( + serde_json::from_value::(serde_json::json!({ + "operation": "screenshot" + })) + .is_ok() + ); + } + + #[test] + fn page_actions_require_current_observe_navigation_token() { + let tabs = vec![BrowserTab { + id: "tab-1".to_string(), + url: "https://example.test".to_string(), + title: "Example".to_string(), + active: true, + navigation_token: "nav-current".to_string(), + }]; + let guard = ObservationGuard { + tab_id: Some("tab-1".to_string()), + page_token: Some("nav-current".to_string()), + observation_token: Some("obs-current".to_string()), + }; + let action_without_token = actions::BrowserResolvedAction { + reference: None, + form_fields: Vec::new(), + page_navigation_token: None, + requires_current_observation: true, + }; + + assert_eq!( + validate_current_observation(&action_without_token, &tabs, Some("tab-1"), &guard) + .expect_err("missing page token"), + "Browser action requires navigationToken from browser_observe" + ); + + let stale_action = actions::BrowserResolvedAction { + reference: None, + form_fields: Vec::new(), + page_navigation_token: Some("obs-old".to_string()), + requires_current_observation: true, + }; + assert_eq!( + validate_current_observation(&stale_action, &tabs, Some("tab-1"), &guard) + .expect_err("stale page token"), + "Browser ref is stale; observe again before acting" + ); + + let valid_action = actions::BrowserResolvedAction { + reference: None, + form_fields: Vec::new(), + page_navigation_token: Some("obs-current".to_string()), + requires_current_observation: true, + }; + assert!(validate_current_observation(&valid_action, &tabs, Some("tab-1"), &guard).is_ok()); + } + + #[test] + fn error_status_redacts_loopback_browser_endpoint_urls() { + let runtime = BrowserRuntime::new(); + + runtime.set_error( + "Failed to query browser endpoint: http://127.0.0.1:50123/json/version failed" + .to_string(), + ); + + let status = runtime.status(); + let error = status.error.expect("redacted error"); + assert!(error.contains("[browser endpoint]")); + assert!(!error.contains("127.0.0.1")); + assert!(!error.contains("50123")); + assert!(!error.contains("/json/version")); + } +} diff --git a/apps/desktop/src-tauri/src/core/browser/snapshot.rs b/apps/desktop/src-tauri/src/core/browser/snapshot.rs new file mode 100644 index 00000000..08f1d33c --- /dev/null +++ b/apps/desktop/src-tauri/src/core/browser/snapshot.rs @@ -0,0 +1,18 @@ +use super::{ + cdp::PageSnapshot, + types::{BrowserObservation, BrowserStatus}, +}; + +pub fn page_observation(status: BrowserStatus, page: PageSnapshot) -> BrowserObservation { + BrowserObservation { + status, + url: page.url, + title: page.title, + navigation_token: page.navigation_token, + dom_refs: page.refs, + file_path: page.file_path, + mime_type: page.mime_type, + console: page.console, + network: page.network, + } +} diff --git a/apps/desktop/src-tauri/src/core/browser/types.rs b/apps/desktop/src-tauri/src/core/browser/types.rs new file mode 100644 index 00000000..a7a4b961 --- /dev/null +++ b/apps/desktop/src-tauri/src/core/browser/types.rs @@ -0,0 +1,191 @@ +use std::path::PathBuf; + +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum BrowserStatusKind { + Idle, + Starting, + Connected, + Error, +} + +impl Default for BrowserStatusKind { + fn default() -> Self { + Self::Idle + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct BrowserDescriptor { + pub id: String, + pub name: String, + #[serde(skip_serializing)] + pub path: PathBuf, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct BrowserTab { + pub id: String, + pub url: String, + pub title: String, + pub active: bool, + pub navigation_token: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct BrowserStatus { + pub status: BrowserStatusKind, + pub managed: bool, + pub active_tab_id: Option, + pub tabs: Vec, + pub error: Option, +} + +#[derive(Debug, Clone, Default, Deserialize)] +#[serde(rename_all = "camelCase")] +#[serde(deny_unknown_fields)] +pub struct BrowserStartRequest { + pub browser_id: Option, + pub startup_url: Option, +} + +#[derive(Debug, Clone, Deserialize)] +#[serde(rename_all = "camelCase")] +#[serde(deny_unknown_fields)] +pub struct BrowserNavigateRequest { + pub tab_id: Option, + pub url: String, +} + +#[derive(Debug, Clone, Deserialize)] +#[serde(rename_all = "camelCase")] +#[serde(deny_unknown_fields)] +pub struct BrowserTabRequest { + pub tab_id: Option, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum BrowserObserveOperation { + State, + Snapshot, + Screenshot, +} + +#[derive(Debug, Clone, Deserialize)] +#[serde(rename_all = "camelCase")] +#[serde(deny_unknown_fields)] +pub struct BrowserObserveRequest { + pub operation: BrowserObserveOperation, + pub tab_id: Option, + pub include_console: Option, + pub include_network: Option, +} + +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct BrowserDomRef { + pub ref_id: String, + pub navigation_token: String, + pub description: String, + pub editable: bool, + pub selector: String, + pub x: f64, + pub y: f64, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct BrowserObservation { + pub status: BrowserStatus, + pub url: Option, + pub title: Option, + pub navigation_token: Option, + pub dom_refs: Vec, + pub file_path: Option, + pub mime_type: Option, + pub console: Vec, + pub network: Vec, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum BrowserActOperation { + Click, + Type, + Fill, + FillForm, + PressKey, + Scroll, + Wait, +} + +impl BrowserActOperation { + pub fn as_str(self) -> &'static str { + match self { + Self::Click => "click", + Self::Type => "type", + Self::Fill => "fill", + Self::FillForm => "fill_form", + Self::PressKey => "press_key", + Self::Scroll => "scroll", + Self::Wait => "wait", + } + } +} + +#[derive(Debug, Clone, Deserialize)] +#[serde(rename_all = "camelCase")] +#[serde(deny_unknown_fields)] +pub struct BrowserActRequest { + pub action: BrowserActOperation, + pub tab_id: Option, + pub ref_id: Option, + #[serde(alias = "ref")] + pub target_ref: Option, + pub navigation_token: Option, + pub text: Option, + pub value: Option, + pub key: Option, + pub delta_x: Option, + pub delta_y: Option, + pub timeout_ms: Option, + pub fields: Option>, +} + +#[derive(Debug, Clone, Deserialize)] +#[serde(rename_all = "camelCase")] +#[serde(deny_unknown_fields)] +pub struct BrowserFormField { + pub ref_id: String, + pub navigation_token: String, + pub value: String, +} + +#[derive(Debug, Clone, Serialize)] +#[serde(rename_all = "camelCase")] +pub struct BrowserActResult { + pub ok: bool, + pub action: String, + pub message: Option, +} + +#[derive(Debug, Clone, Serialize)] +#[serde(rename_all = "camelCase")] +pub struct BrowserEndpointSnapshot { + pub host: String, + pub port: u16, + pub version_url: String, +} + +#[derive(Debug, Clone, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct CdpVersionResponse { + #[serde(default, rename = "webSocketDebuggerUrl")] + pub web_socket_debugger_url: Option, +} diff --git a/apps/desktop/src-tauri/src/core/browser/url_policy.rs b/apps/desktop/src-tauri/src/core/browser/url_policy.rs new file mode 100644 index 00000000..33fe6c5a --- /dev/null +++ b/apps/desktop/src-tauri/src/core/browser/url_policy.rs @@ -0,0 +1,23 @@ +use reqwest::Url; + +pub fn validate_browser_url(raw: &str) -> Result { + let value = raw.trim(); + if value.is_empty() { + return Err("Browser URL must not be empty".to_string()); + } + + if value.starts_with('-') || value.chars().any(char::is_control) { + return Err("Browser URL contains unsafe command-line characters".to_string()); + } + + if value.eq_ignore_ascii_case("about:blank") { + return Ok("about:blank".to_string()); + } + + let url = Url::parse(value) + .map_err(|_| "Browser URL must be an absolute http or https URL".to_string())?; + match url.scheme() { + "http" | "https" => Ok(value.to_string()), + _ => Err("Browser URL must use http, https, or about:blank".to_string()), + } +} diff --git a/apps/desktop/src-tauri/src/core/mod.rs b/apps/desktop/src-tauri/src/core/mod.rs index 70925d9d..5a9c2707 100644 --- a/apps/desktop/src-tauri/src/core/mod.rs +++ b/apps/desktop/src-tauri/src/core/mod.rs @@ -4,6 +4,7 @@ //! //! 分为窗口域与系统能力域。 +pub mod browser; pub mod built_in_tools; pub mod database; pub mod mcp; diff --git a/apps/desktop/src-tauri/src/lib.rs b/apps/desktop/src-tauri/src/lib.rs index abc5a880..39d91031 100644 --- a/apps/desktop/src-tauri/src/lib.rs +++ b/apps/desktop/src-tauri/src/lib.rs @@ -110,6 +110,7 @@ pub fn run() { .manage(core::window::status_reminder::SessionStatusReminderNotificationRuntime::new()) .manage(core::window::tray::TrayStatusRuntime::new()) .manage(BuiltInProcessExecutionRegistry::new()) + .manage(core::browser::BrowserRuntime::new()) .manage(McpClientManager::new()) .manage(core::updater::AppUpdaterState::default()) .on_window_event(|window, event| { diff --git a/apps/desktop/src-tauri/src/testing/mod.rs b/apps/desktop/src-tauri/src/testing/mod.rs index a8bb3f28..c2246559 100644 --- a/apps/desktop/src-tauri/src/testing/mod.rs +++ b/apps/desktop/src-tauri/src/testing/mod.rs @@ -8,6 +8,11 @@ use tauri::{ use crate::{ commands, core::{ + browser::{ + actions, endpoint, + types::{BrowserActRequest, BrowserDomRef}, + url_policy, BrowserRuntime, + }, database::DatabaseRuntime, updater::AppUpdaterState, window::{ @@ -30,6 +35,7 @@ pub fn test_builder() -> Builder { .manage(SearchSurfaceRuntime::new()) .manage(SessionStatusReminderNotificationRuntime::for_tests()) .manage(TrayStatusRuntime::new()) + .manage(BrowserRuntime::new()) .manage(AppUpdaterState::default()) } @@ -76,3 +82,43 @@ pub fn session_status_reminder_clear_count(app: &App) -> usize { app.state::() .clear_count() } + +pub fn parse_browser_loopback_endpoint_for_tests(raw: &str) -> Result { + serde_json::to_value(endpoint::parse_loopback_endpoint(raw)?.snapshot()) + .map_err(|error| error.to_string()) +} + +pub fn validate_browser_navigation_token_for_tests( + supplied: &str, + current: &str, +) -> Result<(), String> { + endpoint::validate_stale_navigation_token(supplied, current) +} + +pub fn validate_browser_websocket_endpoint_for_tests( + websocket_url: &str, + endpoint_url: &str, +) -> Result<(), String> { + let endpoint = endpoint::parse_loopback_endpoint(endpoint_url)?; + endpoint::validate_loopback_websocket(websocket_url, &endpoint) +} + +pub fn validate_browser_action_for_tests( + request: serde_json::Value, + refs: Vec, +) -> Result<(), String> { + let request: BrowserActRequest = + serde_json::from_value(request).map_err(|error| error.to_string())?; + let refs: Vec = refs + .into_iter() + .map(serde_json::from_value) + .collect::>() + .map_err(|error| error.to_string())?; + actions::resolve_ref_action(&request, &refs).map(|_| ()) +} + +pub fn validate_browser_url_for_tests(raw: &str) -> Result { + url_policy::validate_browser_url(raw) +} + +pub type BrowserStartRequestForTests = crate::core::browser::types::BrowserStartRequest; diff --git a/apps/desktop/src-tauri/tests/browser_commands.rs b/apps/desktop/src-tauri/tests/browser_commands.rs new file mode 100644 index 00000000..e4945f3f --- /dev/null +++ b/apps/desktop/src-tauri/tests/browser_commands.rs @@ -0,0 +1,590 @@ +mod common; + +use common::{build_test_app, invoke_command_err, invoke_command_ok, TestAppOptions}; +use serde_json::json; +use touchai_lib::testing::{ + parse_browser_loopback_endpoint_for_tests, validate_browser_action_for_tests, + validate_browser_navigation_token_for_tests, validate_browser_url_for_tests, + validate_browser_websocket_endpoint_for_tests, +}; + +#[test] +fn endpoint_validation_accepts_loopback_http_origins_only() { + for (raw, expected_host, expected_origin) in [ + ( + "http://127.0.0.1:9222", + "127.0.0.1", + "http://127.0.0.1:9222", + ), + ( + "http://localhost:9222", + "localhost", + "http://localhost:9222", + ), + ("http://[::1]:9222", "::1", "http://[::1]:9222"), + ] { + let endpoint = parse_browser_loopback_endpoint_for_tests(raw).expect("valid endpoint"); + + assert_eq!(endpoint["host"], json!(expected_host)); + assert_eq!(endpoint["port"], json!(9222)); + assert_eq!( + endpoint["versionUrl"], + json!(format!("{expected_origin}/json/version")) + ); + } +} + +#[test] +fn endpoint_validation_rejects_non_loopback_and_url_components() { + for endpoint in [ + "https://127.0.0.1:9222", + "http://192.168.1.5:9222", + "http://127.0.0.1:9222/json/version", + "http://user:pass@127.0.0.1:9222", + "http://127.0.0.1:9222?x=1", + "http://127.0.0.1:9222#fragment", + "http://[::1]:9222/json/list", + ] { + assert!( + parse_browser_loopback_endpoint_for_tests(endpoint).is_err(), + "expected {endpoint} to be rejected" + ); + } +} + +#[test] +fn websocket_validation_requires_ws_loopback_and_same_port() { + for websocket_url in [ + "ws://127.0.0.1:9222/devtools/browser/test", + "ws://localhost:9222/devtools/browser/test", + "ws://[::1]:9222/devtools/browser/test", + ] { + assert!( + validate_browser_websocket_endpoint_for_tests(websocket_url, "http://127.0.0.1:9222",) + .is_ok(), + "expected {websocket_url} to be accepted" + ); + } + + for websocket_url in [ + "wss://127.0.0.1:9222/devtools/browser/test", + "ws://127.0.0.1:9333/devtools/browser/test", + "ws://192.168.1.5:9222/devtools/browser/test", + "ws://user:pass@127.0.0.1:9222/devtools/browser/test", + ] { + assert!( + validate_browser_websocket_endpoint_for_tests(websocket_url, "http://127.0.0.1:9222",) + .is_err(), + "expected {websocket_url} to be rejected" + ); + } +} + +#[test] +fn managed_start_request_rejects_arbitrary_paths_and_directories() { + for request in [ + json!({ + "browserPath": "Z:\\missing\\chrome.exe", + "startupUrl": "about:blank" + }), + json!({ + "userDataDir": "G:\\TouchAI\\unsafe-delete-target", + "startupUrl": "about:blank" + }), + ] { + assert!( + serde_json::from_value::(request) + .is_err(), + "browser_start must not accept caller-controlled executable/profile paths" + ); + } +} + +#[test] +fn browser_url_policy_accepts_only_web_urls_and_about_blank() { + for url in [ + "https://example.test/path?q=1", + "http://127.0.0.1:1420/", + "about:blank", + " ABOUT:blank ", + ] { + assert!( + validate_browser_url_for_tests(url).is_ok(), + "expected {url} to be accepted" + ); + } + + for url in [ + "--user-data-dir=G:\\TouchAI\\unsafe-profile", + "--remote-debugging-address=0.0.0.0", + "-https://example.test", + "file:///C:/Users/person/secret.html", + "chrome://version", + "edge://settings", + "devtools://devtools/bundled/inspector.html", + "javascript:alert(document.cookie)", + "data:text/html,", + "vbscript:msgbox(1)", + "C:\\Users\\person\\secret.html", + "https://example.test/\u{0000}", + ] { + assert!( + validate_browser_url_for_tests(url).is_err(), + "expected {url} to be rejected" + ); + } +} + +#[test] +fn browser_start_rejects_unsafe_startup_url_before_browser_discovery() { + let test_app = build_test_app(TestAppOptions::default()).expect("test app"); + + for startup_url in [ + "--user-data-dir=G:\\TouchAI\\unsafe-profile", + "--remote-debugging-address=0.0.0.0", + "file:///C:/Users/person/secret.html", + "javascript:alert(document.cookie)", + "data:text/html,", + ] { + let error = invoke_command_err( + &test_app.main_webview, + "browser_start", + json!({ + "request": { + "startupUrl": startup_url + } + }), + ); + assert!( + error + .as_str() + .is_some_and(|message| message.contains("Browser URL")), + "unexpected startup URL error for {startup_url}: {error:?}" + ); + } +} + +#[test] +fn stale_navigation_token_validation_rejects_old_refs() { + assert!(validate_browser_navigation_token_for_tests("nav-2", "nav-2").is_ok()); + assert_eq!( + validate_browser_navigation_token_for_tests("nav-1", "nav-2").expect_err("stale ref"), + "Browser ref is stale; observe again before acting" + ); +} + +#[test] +fn ref_action_validation_requires_matching_navigation_token() { + let reference = json!({ + "refId": "ref-1", + "navigationToken": "nav-current", + "description": "input: Email", + "editable": true, + "selector": "#email", + "x": 10.0, + "y": 20.0 + }); + + let request = json!({ + "action": "click", + "refId": "ref-1", + "navigationToken": "nav-old" + }); + + assert_eq!( + validate_browser_action_for_tests(request, vec![reference]).expect_err("stale ref"), + "Browser ref is stale; observe again before acting" + ); +} + +#[test] +fn element_actions_require_observed_refs_and_navigation_tokens() { + let click_without_ref = json!({ + "action": "click", + "x": 10, + "y": 20 + }); + assert!( + validate_browser_action_for_tests(click_without_ref, vec![]).is_err(), + "raw coordinates must be rejected before action dispatch" + ); + + let click_without_token = json!({ + "action": "click", + "refId": "ref-1" + }); + let reference = json!({ + "refId": "ref-1", + "navigationToken": "nav-current", + "description": "button: Submit", + "editable": false, + "selector": "#submit", + "x": 30.0, + "y": 40.0 + }); + assert_eq!( + validate_browser_action_for_tests(click_without_token, vec![reference]) + .expect_err("missing navigation token"), + "Browser action requires navigationToken for ref targets" + ); +} + +#[test] +fn fill_form_validation_rejects_stale_or_non_editable_fields() { + let editable_ref = json!({ + "refId": "ref-1", + "navigationToken": "nav-current", + "description": "input: Email", + "editable": true, + "selector": "#email", + "x": 10.0, + "y": 20.0 + }); + let non_editable_ref = json!({ + "refId": "ref-2", + "navigationToken": "nav-current", + "description": "button: Submit", + "editable": false, + "selector": "#submit", + "x": 30.0, + "y": 40.0 + }); + + let stale_request = json!({ + "action": "fill_form", + "fields": [{ + "refId": "ref-1", + "navigationToken": "nav-old", + "value": "person@example.test" + }] + }); + assert_eq!( + validate_browser_action_for_tests(stale_request, vec![editable_ref.clone()]) + .expect_err("stale fill_form field"), + "Browser ref is stale; observe again before acting" + ); + + let non_editable_request = json!({ + "action": "fill_form", + "fields": [{ + "refId": "ref-2", + "navigationToken": "nav-current", + "value": "not editable" + }] + }); + assert_eq!( + validate_browser_action_for_tests(non_editable_request, vec![non_editable_ref]) + .expect_err("non-editable fill_form field"), + "Browser target is not editable" + ); +} + +#[test] +fn browser_status_defaults_to_idle() { + let test_app = build_test_app(TestAppOptions::default()).expect("test app"); + + let response: serde_json::Value = + invoke_command_ok(&test_app.main_webview, "browser_status", json!({})); + + assert_eq!(response["status"], json!("idle")); + assert_eq!(response["managed"], json!(false)); + assert!(response.get("endpoint").is_none()); + assert_eq!(response["tabs"], json!([])); +} + +#[test] +fn browser_navigation_rejects_unsafe_url_before_connection_check() { + let test_app = build_test_app(TestAppOptions::default()).expect("test app"); + + for unsafe_url in [ + "--remote-debugging-address=0.0.0.0", + "file:///C:/Users/person/secret.html", + "javascript:alert(document.cookie)", + "data:text/html,", + ] { + let error = invoke_command_err( + &test_app.main_webview, + "browser_navigate", + json!({ + "request": { + "url": unsafe_url + } + }), + ); + + assert!( + error + .as_str() + .is_some_and(|message| message.contains("Browser URL")), + "unexpected navigation URL error for {unsafe_url}: {error:?}" + ); + } +} + +#[test] +fn browser_navigation_and_tab_requests_reject_unknown_fields() { + let test_app = build_test_app(TestAppOptions::default()).expect("test app"); + + let navigate_error = invoke_command_err( + &test_app.main_webview, + "browser_navigate", + json!({ + "request": { + "url": "https://example.test", + "rawCdp": true + } + }), + ); + assert!( + navigate_error + .as_str() + .is_some_and(|message| message.contains("unknown field `rawCdp`")), + "unexpected navigate unknown field error: {navigate_error:?}" + ); + + let back_error = invoke_command_err( + &test_app.main_webview, + "browser_back", + json!({ + "request": { + "tabId": "tab-1", + "endpoint": "http://127.0.0.1:9222" + } + }), + ); + assert!( + back_error + .as_str() + .is_some_and(|message| message.contains("unknown field `endpoint`")), + "unexpected tab unknown field error: {back_error:?}" + ); +} + +#[test] +fn browser_observe_rejects_unsupported_operations_and_unknown_fields() { + let test_app = build_test_app(TestAppOptions::default()).expect("test app"); + + let missing_operation_error = invoke_command_err( + &test_app.main_webview, + "browser_observe", + json!({ + "request": {} + }), + ); + assert!( + missing_operation_error + .as_str() + .is_some_and(|message| message.contains("missing field `operation`")), + "unexpected missing observe operation error: {missing_operation_error:?}" + ); + + let hidden_screenshot_error = invoke_command_err( + &test_app.main_webview, + "browser_observe", + json!({ + "request": { + "operation": "state", + "includeScreenshot": true + } + }), + ); + assert!( + hidden_screenshot_error + .as_str() + .is_some_and(|message| message.contains("unknown field `includeScreenshot`")), + "unexpected hidden screenshot error: {hidden_screenshot_error:?}" + ); + + let operation_error = invoke_command_err( + &test_app.main_webview, + "browser_observe", + json!({ + "request": { + "operation": "console" + } + }), + ); + assert!( + operation_error + .as_str() + .is_some_and(|message| message.contains("unknown variant `console`")), + "unexpected observe operation error: {operation_error:?}" + ); + + let field_error = invoke_command_err( + &test_app.main_webview, + "browser_observe", + json!({ + "request": { + "operation": "state", + "rawCdp": true + } + }), + ); + assert!( + field_error + .as_str() + .is_some_and(|message| message.contains("unknown field")), + "unexpected observe unknown field error: {field_error:?}" + ); +} + +#[test] +fn browser_act_rejects_missing_oversized_and_unguarded_page_actions() { + let editable_ref = json!({ + "refId": "ref-1", + "navigationToken": "obs-current", + "description": "input: Email", + "editable": true, + "selector": "#email", + "x": 10.0, + "y": 20.0 + }); + + for (request, expected) in [ + ( + json!({ + "action": "type", + "refId": "ref-1", + "navigationToken": "obs-current" + }), + "type requires text", + ), + ( + json!({ + "action": "fill", + "refId": "ref-1", + "navigationToken": "obs-current" + }), + "fill requires value", + ), + ( + json!({ + "action": "type", + "refId": "ref-1", + "navigationToken": "obs-current", + "text": "x".repeat(16 * 1024 + 1) + }), + "Browser action text exceeds the size limit", + ), + ( + json!({ + "action": "press_key", + "key": "" + }), + "press_key key is invalid", + ), + ( + json!({ + "action": "fill_form", + "fields": [{ + "refId": "ref-1", + "navigationToken": "obs-current", + "value": "x".repeat(16 * 1024 + 1) + }] + }), + "Browser action text exceeds the size limit", + ), + ( + json!({ + "action": "fill_form", + "fields": (0..51) + .map(|_| json!({ + "refId": "ref-1", + "navigationToken": "obs-current", + "value": "ok" + })) + .collect::>() + }), + "fill_form field count exceeds the size limit", + ), + ] { + let error = validate_browser_action_for_tests(request, vec![editable_ref.clone()]) + .expect_err("invalid act request must fail"); + assert!( + error.contains(expected), + "expected {expected:?}, got {error:?}" + ); + } +} + +#[test] +fn browser_act_rejects_operation_alias_and_unknown_actions_at_serde_boundary() { + let operation_alias_error = validate_browser_action_for_tests( + json!({ + "operation": "click", + "refId": "ref-1", + "navigationToken": "nav-current" + }), + vec![], + ) + .expect_err("operation alias must be rejected"); + assert!( + operation_alias_error.contains("missing field `action`") + || operation_alias_error.contains("unknown field `operation`"), + "unexpected operation alias error: {operation_alias_error}" + ); + + let unknown_action_error = validate_browser_action_for_tests( + json!({ + "action": "evaluate", + "refId": "ref-1", + "navigationToken": "nav-current" + }), + vec![], + ) + .expect_err("unknown action must be rejected"); + assert!( + unknown_action_error.contains("unknown variant `evaluate`"), + "unexpected unknown action error: {unknown_action_error}" + ); +} + +#[test] +fn browser_start_failure_sets_error_status() { + let test_app = build_test_app(TestAppOptions::default()).expect("test app"); + + let error = invoke_command_err( + &test_app.main_webview, + "browser_start", + json!({ + "request": { + "browserId": "missing-browser" + } + }), + ); + + assert!( + error.as_str().is_some_and( + |message| message.contains("Supported browser 'missing-browser' was not found") + ), + "unexpected error: {error:?}" + ); + + let status: serde_json::Value = + invoke_command_ok(&test_app.main_webview, "browser_status", json!({})); + assert_eq!(status["status"], json!("error")); +} + +#[test] +fn browser_stop_resets_runtime_to_idle() { + let test_app = build_test_app(TestAppOptions::default()).expect("test app"); + + let _ = invoke_command_err( + &test_app.main_webview, + "browser_start", + json!({ + "request": { + "browserId": "missing-browser" + } + }), + ); + + let stopped: serde_json::Value = + invoke_command_ok(&test_app.main_webview, "browser_stop", json!({})); + + assert_eq!(stopped["status"], json!("idle")); + assert_eq!(stopped["managed"], json!(false)); + assert!(stopped.get("endpoint").is_none()); + assert_eq!(stopped["tabs"], json!([])); +} diff --git a/apps/desktop/src-tauri/tests/browser_live_smoke.rs b/apps/desktop/src-tauri/tests/browser_live_smoke.rs new file mode 100644 index 00000000..ee6e3b04 --- /dev/null +++ b/apps/desktop/src-tauri/tests/browser_live_smoke.rs @@ -0,0 +1,434 @@ +mod common; + +use std::{ + fs, + io::{Read, Write}, + net::{TcpListener, TcpStream}, + path::{Path, PathBuf}, + thread, + time::{Duration, Instant}, +}; + +use common::{ + build_test_app, invoke_command_err, invoke_command_ok, invoke_command_result, TestAppOptions, +}; +use serde_json::{json, Value}; + +#[test] +#[ignore = "requires TOUCHAI_BROWSER_SMOKE_ROOT on G drive and an installed Chrome or Edge"] +fn managed_browser_live_smoke_launches_observes_acts_and_cleans_up() { + let smoke_root = std::env::var("TOUCHAI_BROWSER_SMOKE_ROOT") + .expect("TOUCHAI_BROWSER_SMOKE_ROOT must be set for live browser smoke"); + + assert!( + smoke_root.starts_with("G:\\") || smoke_root.starts_with("G:/"), + "TOUCHAI_BROWSER_SMOKE_ROOT must point at G drive for local smoke runs" + ); + + let test_root = + PathBuf::from(smoke_root).join(format!("browser-live-smoke-{}", std::process::id())); + fs::create_dir_all(&test_root).expect("create smoke root"); + std::env::set_var("TEMP", &test_root); + std::env::set_var("TMP", &test_root); + let fixture_path = write_fixture(&test_root); + let server = FixtureServer::start(fixture_path); + let test_app = build_test_app(TestAppOptions::default()).expect("test app"); + + let status: Value = invoke_command_ok( + &test_app.main_webview, + "browser_start", + json!({ + "request": { + "startupUrl": server.url() + } + }), + ); + assert_eq!(status["status"], json!("connected")); + assert_eq!(status["managed"], json!(true)); + assert!(status.get("endpoint").is_none()); + let mut stop_guard = BrowserStopGuard::new(&test_app.main_webview); + + let observation = observe_until_refs(&test_app.main_webview); + let navigation_token = observation["navigationToken"] + .as_str() + .expect("navigation token") + .to_string(); + let refs = observation["domRefs"].as_array().expect("dom refs"); + let input_ref = refs + .iter() + .find(|item| { + item["editable"].as_bool() == Some(true) + && item["description"] + .as_str() + .is_some_and(|description| description.contains("Email")) + }) + .and_then(|item| item["refId"].as_str()) + .expect("email input ref") + .to_string(); + let submit_ref = refs + .iter() + .find(|item| { + item["description"] + .as_str() + .is_some_and(|description| description.contains("Submit")) + }) + .and_then(|item| item["refId"].as_str()) + .expect("submit button ref") + .to_string(); + + let non_editable_error = invoke_command_err( + &test_app.main_webview, + "browser_act", + json!({ + "request": { + "action": "type", + "refId": submit_ref, + "navigationToken": navigation_token, + "text": "blocked" + } + }), + ); + assert!( + non_editable_error + .as_str() + .is_some_and(|message| message.contains("Browser target is not editable")), + "unexpected non-editable error: {non_editable_error:?}" + ); + + let fill_result: Value = invoke_command_ok( + &test_app.main_webview, + "browser_act", + json!({ + "request": { + "action": "fill", + "refId": input_ref, + "navigationToken": navigation_token, + "value": "person@example.test" + } + }), + ); + assert_eq!(fill_result["ok"], json!(true)); + + let click_result: Value = invoke_command_ok( + &test_app.main_webview, + "browser_act", + json!({ + "request": { + "action": "click", + "refId": submit_ref, + "navigationToken": navigation_token + } + }), + ); + assert_eq!(click_result["ok"], json!(true)); + + let press_key_result: Value = invoke_command_ok( + &test_app.main_webview, + "browser_act", + json!({ + "request": { + "action": "press_key", + "navigationToken": navigation_token, + "key": "Escape" + } + }), + ); + assert_eq!(press_key_result["ok"], json!(true)); + + let scroll_result: Value = invoke_command_ok( + &test_app.main_webview, + "browser_act", + json!({ + "request": { + "action": "scroll", + "navigationToken": navigation_token, + "deltaY": 25 + } + }), + ); + assert_eq!(scroll_result["ok"], json!(true)); + + let submitted_observation = observe_until_submitted(&test_app.main_webview); + assert!( + submitted_observation["domRefs"] + .as_array() + .is_some_and(|refs| refs + .iter() + .any(|item| item["description"].as_str().is_some_and( + |description| description.contains("Submitted: person@example.test") + ) && item["selector"].as_str() == Some("#result"))), + "submitted page state was not observed: {submitted_observation:?}" + ); + + let screenshot_observation: Value = invoke_command_ok( + &test_app.main_webview, + "browser_observe", + json!({ + "request": { + "operation": "screenshot" + } + }), + ); + let screenshot_path = PathBuf::from( + screenshot_observation["filePath"] + .as_str() + .expect("screenshot artifact path"), + ); + let screenshot = fs::read(&screenshot_path).expect("read screenshot artifact"); + assert!( + screenshot.len() > 1024, + "screenshot should contain PNG bytes" + ); + assert_eq!(screenshot_observation.get("screenshotBase64"), None); + + let diagnostics_observation = observe_until_diagnostics(&test_app.main_webview); + assert!( + diagnostics_observation["console"] + .as_array() + .is_some_and(|items| items.iter().any(|item| item + .as_str() + .is_some_and(|line| line.contains("touchai-smoke-console")))), + "console diagnostics were not observed: {diagnostics_observation:?}" + ); + assert!( + diagnostics_observation["network"] + .as_array() + .is_some_and(|items| items.iter().any(|item| item + .as_str() + .is_some_and(|line| line.contains("missing-smoke-resource")))), + "network diagnostics were not observed: {diagnostics_observation:?}" + ); + + let stopped_status: Value = + invoke_command_ok(&test_app.main_webview, "browser_stop", json!({})); + assert_eq!(stopped_status["status"], json!("idle")); + assert_eq!(stopped_status["managed"], json!(false)); + assert!(stopped_status.get("endpoint").is_none()); + stop_guard.disarm(); + + wait_for_owned_profile_cleanup(&test_root); +} + +struct BrowserStopGuard<'a> { + webview: &'a tauri::WebviewWindow, + active: bool, +} + +impl<'a> BrowserStopGuard<'a> { + fn new(webview: &'a tauri::WebviewWindow) -> Self { + Self { + webview, + active: true, + } + } + + fn disarm(&mut self) { + self.active = false; + } +} + +impl Drop for BrowserStopGuard<'_> { + fn drop(&mut self) { + if self.active { + let _ = invoke_command_result(self.webview, "browser_stop", json!({})); + } + } +} + +fn write_fixture(root: &Path) -> PathBuf { + let fixture_path = root.join("fixture.html"); + fs::write( + &fixture_path, + r#" + + TouchAI browser smoke + + + +
Waiting
+
+ + + +"#, + ) + .expect("write fixture"); + fixture_path +} + +fn observe_until_refs(webview: &tauri::WebviewWindow) -> Value { + let deadline = Instant::now() + Duration::from_secs(8); + let mut last_observation = None; + while Instant::now() < deadline { + let observation: Value = invoke_command_ok( + webview, + "browser_observe", + json!({ + "request": { + "operation": "snapshot" + } + }), + ); + if observation["domRefs"] + .as_array() + .is_some_and(|refs| refs.len() >= 2) + { + return observation; + } + last_observation = Some(observation); + thread::sleep(Duration::from_millis(200)); + } + panic!("browser refs did not become available: {last_observation:?}"); +} + +fn observe_until_submitted(webview: &tauri::WebviewWindow) -> Value { + let deadline = Instant::now() + Duration::from_secs(8); + let mut last_observation = None; + while Instant::now() < deadline { + let observation: Value = invoke_command_ok( + webview, + "browser_observe", + json!({ + "request": { + "operation": "snapshot" + } + }), + ); + if observation["domRefs"].as_array().is_some_and(|refs| { + refs.iter().any(|item| { + item["description"] + .as_str() + .is_some_and(|description| description.contains("person@example.test")) + }) + }) { + return observation; + } + last_observation = Some(observation); + thread::sleep(Duration::from_millis(200)); + } + panic!("browser submitted state did not become observable: {last_observation:?}"); +} + +fn observe_until_diagnostics(webview: &tauri::WebviewWindow) -> Value { + let deadline = Instant::now() + Duration::from_secs(8); + let mut last_observation = None; + while Instant::now() < deadline { + let observation: Value = invoke_command_ok( + webview, + "browser_observe", + json!({ + "request": { + "operation": "state", + "includeConsole": true, + "includeNetwork": true + } + }), + ); + let has_console = observation["console"].as_array().is_some_and(|items| { + items.iter().any(|item| { + item.as_str() + .is_some_and(|line| line.contains("touchai-smoke-console")) + }) + }); + let has_network = observation["network"].as_array().is_some_and(|items| { + items.iter().any(|item| { + item.as_str() + .is_some_and(|line| line.contains("missing-smoke-resource")) + }) + }); + if has_console && has_network { + return observation; + } + last_observation = Some(observation); + thread::sleep(Duration::from_millis(200)); + } + panic!("browser diagnostics did not become observable: {last_observation:?}"); +} + +fn wait_for_owned_profile_cleanup(root: &Path) { + let deadline = Instant::now() + Duration::from_secs(8); + while Instant::now() < deadline { + let active_profiles = fs::read_dir(root) + .map(|entries| { + entries + .flatten() + .filter(|entry| is_managed_browser_profile_entry(entry)) + .count() + }) + .unwrap_or(0); + if active_profiles == 0 { + return; + } + thread::sleep(Duration::from_millis(200)); + } + assert!( + fs::read_dir(root) + .map(|entries| { + entries + .flatten() + .filter(|entry| is_managed_browser_profile_entry(entry)) + .count() + }) + .unwrap_or(0) + == 0, + "managed browser profile should be removed after browser_stop" + ); +} + +fn is_managed_browser_profile_entry(entry: &fs::DirEntry) -> bool { + let name = entry.file_name(); + let name = name.to_string_lossy(); + name.starts_with("touchai-browser-") && !name.starts_with("touchai-browser-artifacts") +} + +struct FixtureServer { + url: String, +} + +impl FixtureServer { + fn start(fixture_path: PathBuf) -> Self { + let listener = TcpListener::bind(("127.0.0.1", 0)).expect("bind fixture server"); + let port = listener.local_addr().expect("fixture addr").port(); + thread::spawn(move || { + for stream in listener.incoming().flatten() { + respond_fixture(stream, &fixture_path); + } + }); + Self { + url: format!("http://127.0.0.1:{port}/fixture.html"), + } + } + + fn url(&self) -> &str { + &self.url + } +} + +fn respond_fixture(mut stream: TcpStream, fixture_path: &Path) { + let mut request_buffer = [0_u8; 1024]; + let read_bytes = stream.read(&mut request_buffer).unwrap_or(0); + let request = String::from_utf8_lossy(&request_buffer[..read_bytes]); + if request.starts_with("GET /missing-smoke-resource") { + let body = b"missing smoke resource"; + let response = format!( + "HTTP/1.1 404 Not Found\r\nContent-Type: text/plain; charset=utf-8\r\nContent-Length: {}\r\nConnection: close\r\n\r\n", + body.len() + ); + let _ = stream.write_all(response.as_bytes()); + let _ = stream.write_all(body); + return; + } + + let body = fs::read(fixture_path).expect("read fixture"); + let response = format!( + "HTTP/1.1 200 OK\r\nContent-Type: text/html; charset=utf-8\r\nContent-Length: {}\r\nConnection: close\r\n\r\n", + body.len() + ); + let _ = stream.write_all(response.as_bytes()); + let _ = stream.write_all(&body); +} diff --git a/apps/desktop/src/database/artifacts/runtime/seed.sql b/apps/desktop/src/database/artifacts/runtime/seed.sql index 3fe89d1a..78f3e62b 100644 --- a/apps/desktop/src/database/artifacts/runtime/seed.sql +++ b/apps/desktop/src/database/artifacts/runtime/seed.sql @@ -158,3 +158,21 @@ INSERT INTO built_in_tools ( ) SELECT 'ask_user_question', 'AskUserQuestion', '向用户提出结构化问题', 1, 'low', NULL WHERE NOT EXISTS (SELECT 1 FROM built_in_tools WHERE tool_id = 'ask_user_question'); + +INSERT INTO built_in_tools ( + tool_id, display_name, description, enabled, risk_level, config_json +) +SELECT 'browser_session', 'BrowserSession', '管理浏览器自动化会话与标签页', 1, 'medium', NULL +WHERE NOT EXISTS (SELECT 1 FROM built_in_tools WHERE tool_id = 'browser_session'); + +INSERT INTO built_in_tools ( + tool_id, display_name, description, enabled, risk_level, config_json +) +SELECT 'browser_observe', 'BrowserObserve', '观察浏览器页面状态、快照、截图、控制台与网络摘要', 1, 'low', NULL +WHERE NOT EXISTS (SELECT 1 FROM built_in_tools WHERE tool_id = 'browser_observe'); + +INSERT INTO built_in_tools ( + tool_id, display_name, description, enabled, risk_level, config_json +) +SELECT 'browser_act', 'BrowserAct', '通过浏览器页面引用执行点击、输入、表单、按键、滚动与等待操作', 1, 'medium', NULL +WHERE NOT EXISTS (SELECT 1 FROM built_in_tools WHERE tool_id = 'browser_act'); diff --git a/apps/desktop/src/database/queries/builtInTools.ts b/apps/desktop/src/database/queries/builtInTools.ts index 9e3b08a1..f4d1792a 100644 --- a/apps/desktop/src/database/queries/builtInTools.ts +++ b/apps/desktop/src/database/queries/builtInTools.ts @@ -73,6 +73,35 @@ export const updateBuiltInTool = async ( return updatedTool && updatedTool.id !== undefined ? updatedTool : undefined; }; +/** + * 在同一个事务中更新多条内置工具配置。 + */ +export const updateBuiltInTools = async ( + ids: number[], + data: BuiltInToolUpdateData +): Promise => { + if (ids.length === 0) { + return []; + } + + return await db.transaction(async (tx) => { + const updatedTools: BuiltInToolEntity[] = []; + for (const id of ids) { + const updatedTool = await tx + .update(builtInTools) + .set(data) + .where(eq(builtInTools.id, id)) + .returning() + .get(); + if (!updatedTool || updatedTool.id === undefined) { + throw new Error(`Built-in tool not found after update: ${id}`); + } + updatedTools.push(updatedTool); + } + return updatedTools; + }); +}; + /** * 更新内置工具最近一次使用时间。 */ diff --git a/apps/desktop/src/i18n/messages.ts b/apps/desktop/src/i18n/messages.ts index ce9698ff..1867c006 100644 --- a/apps/desktop/src/i18n/messages.ts +++ b/apps/desktop/src/i18n/messages.ts @@ -356,6 +356,7 @@ const zhCNMessages = { 'settings.builtInTools.summary.setting': '读取和修改应用设置', 'settings.builtInTools.summary.webFetch': '抓取网页并提取易读文本', 'settings.builtInTools.summary.upgradeModel': '升级当前请求模型', + 'settings.builtInTools.summary.browserAutomation': '控制和观察本地浏览器', 'settings.builtInTools.summary.showWidget': '聊天内联可交互可视化', 'settings.builtInTools.summary.visualizeReadMe': '读取 ShowWidget 规范', 'settings.builtInTools.summary.fallback': '暂无描述', @@ -439,6 +440,14 @@ const zhCNMessages = { 'settings.builtInTools.bash.compactOutput': '压缩命令输出', 'settings.builtInTools.bash.compactOutputDescription': '开启后命令输出会自动压缩,大幅降低 Token 消耗。', + 'settings.builtInTools.browser.title': '浏览器自动化', + 'settings.builtInTools.browser.mode.default': '默认浏览器', + 'settings.builtInTools.browser.mode.custom': '指定浏览器', + 'settings.builtInTools.browser.browserId': '浏览器 ID', + 'settings.builtInTools.browser.browserIdPlaceholder': 'chrome 或 edge;留空时自动检测', + 'settings.builtInTools.browser.startupUrl': '启动 URL', + 'settings.builtInTools.browser.startupUrlPlaceholder': '可选', + 'settings.builtInTools.browser.startupUrlInvalid': '请输入有效的 http 或 https URL。', 'settings.mcp.tabs.config': '配置', 'settings.mcp.tabs.tools': '工具', 'settings.mcp.tabs.logs': '日志', @@ -1118,6 +1127,7 @@ const enUSMessages: Record = { 'settings.builtInTools.summary.setting': 'Read and modify application settings', 'settings.builtInTools.summary.webFetch': 'Fetch web pages and extract readable text', 'settings.builtInTools.summary.upgradeModel': 'Upgrade the current request model', + 'settings.builtInTools.summary.browserAutomation': 'Control and observe a local browser', 'settings.builtInTools.summary.showWidget': 'Inline interactive visualization in chat', 'settings.builtInTools.summary.visualizeReadMe': 'Read the ShowWidget specification', 'settings.builtInTools.summary.fallback': 'No description', @@ -1204,6 +1214,14 @@ const enUSMessages: Record = { 'settings.builtInTools.bash.compactOutput': 'Compact command output', 'settings.builtInTools.bash.compactOutputDescription': 'Automatically compress command output to greatly reduce token usage.', + 'settings.builtInTools.browser.title': 'Browser Automation', + 'settings.builtInTools.browser.mode.default': 'Default browser', + 'settings.builtInTools.browser.mode.custom': 'Specific browser', + 'settings.builtInTools.browser.browserId': 'Browser ID', + 'settings.builtInTools.browser.browserIdPlaceholder': 'chrome or edge; auto-detects when unset', + 'settings.builtInTools.browser.startupUrl': 'Startup URL', + 'settings.builtInTools.browser.startupUrlPlaceholder': 'Optional', + 'settings.builtInTools.browser.startupUrlInvalid': 'Enter a valid http or https URL.', 'settings.mcp.tabs.config': 'Configuration', 'settings.mcp.tabs.tools': 'Tools', 'settings.mcp.tabs.logs': 'Logs', diff --git a/apps/desktop/src/services/BuiltInToolService/registry.ts b/apps/desktop/src/services/BuiltInToolService/registry.ts index efd4eb2d..e6397ab6 100644 --- a/apps/desktop/src/services/BuiltInToolService/registry.ts +++ b/apps/desktop/src/services/BuiltInToolService/registry.ts @@ -2,6 +2,7 @@ import { builtInTools as askUserTools } from './tools/askUser'; import { builtInTools as bashTools } from './tools/bash'; +import { builtInTools as browserTools } from './tools/browser'; import { builtInTools as fileSearchTools } from './tools/fileSearch'; import { builtInTools as readTools } from './tools/read'; import { builtInTools as settingTools } from './tools/setting'; @@ -55,6 +56,7 @@ export const builtInToolRegistry = new BuiltInToolRegistry(); builtInToolRegistry.register(askUserTools); builtInToolRegistry.register(bashTools); +builtInToolRegistry.register(browserTools); builtInToolRegistry.register(fileSearchTools); builtInToolRegistry.register(readTools); builtInToolRegistry.register(settingTools); diff --git a/apps/desktop/src/services/BuiltInToolService/service.ts b/apps/desktop/src/services/BuiltInToolService/service.ts index 0d0a0995..fa7144d8 100644 --- a/apps/desktop/src/services/BuiltInToolService/service.ts +++ b/apps/desktop/src/services/BuiltInToolService/service.ts @@ -33,6 +33,8 @@ import type { } from './types'; const BUILT_IN_TOOL_PREFIX = 'builtin__'; +const BROWSER_TOOL_IDS = ['browser_session', 'browser_observe', 'browser_act'] as const; +const BROWSER_TOOL_ID_SET = new Set(BROWSER_TOOL_IDS); interface BuiltInToolExecutionOptions { toolCall: AiToolCall; @@ -128,7 +130,13 @@ class BuiltInToolService { */ async getEnabledToolDefinitions(): Promise { const enabledTools = await findEnabledBuiltInTools(); + const enabledToolIds = new Set(enabledTools.map((tool) => tool.tool_id)); + const browserGroupEnabled = BROWSER_TOOL_IDS.every((toolId) => enabledToolIds.has(toolId)); return enabledTools.flatMap((tool) => { + if (BROWSER_TOOL_ID_SET.has(tool.tool_id) && !browserGroupEnabled) { + return []; + } + const descriptor = builtInToolRegistry.get(tool.tool_id); if (!descriptor) { return []; @@ -161,6 +169,14 @@ class BuiltInToolService { return null; } + if (BROWSER_TOOL_ID_SET.has(toolId)) { + const enabledTools = await findEnabledBuiltInTools(); + const enabledToolIds = new Set(enabledTools.map((tool) => tool.tool_id)); + if (!BROWSER_TOOL_IDS.every((browserToolId) => enabledToolIds.has(browserToolId))) { + return null; + } + } + const tool = builtInToolRegistry.get(toolId); if (!tool) { return null; diff --git a/apps/desktop/src/services/BuiltInToolService/tools/browser/approval.ts b/apps/desktop/src/services/BuiltInToolService/tools/browser/approval.ts new file mode 100644 index 00000000..15e3715e --- /dev/null +++ b/apps/desktop/src/services/BuiltInToolService/tools/browser/approval.ts @@ -0,0 +1,125 @@ +import { tt } from '@/i18n'; +import type { ToolApprovalRequest } from '@/services/AgentService/contracts/tooling'; +import { normalizeOptionalString, truncateText } from '@/utils/text'; + +import type { BrowserToolId } from './index'; +import { parseBrowserOperation } from './operation'; +import { redactBrowserText, redactUrl } from './redaction'; + +const APPROVAL_DELAY_MS = 450; + +function approval(command: string, reason: string, description = ''): ToolApprovalRequest { + return { + title: tt('浏览器操作确认'), + description, + command: truncateText(command, 180), + riskLabel: '', + reason, + commandLabel: '', + approveLabel: tt('批准'), + rejectLabel: tt('拒绝'), + enterHint: 'Enter', + escHint: 'Esc', + keyboardApproveDelayMs: APPROVAL_DELAY_MS, + }; +} + +function formatTarget(args: Record): string { + const ref = normalizeOptionalString(args.ref, { collapseWhitespace: true }); + if (ref) { + return ref; + } + + const selector = normalizeOptionalString(args.selector, { collapseWhitespace: true }); + return selector ? redactBrowserText(selector) : 'selected tab'; +} + +function formatFieldLabel(args: Record): string { + return ( + normalizeOptionalString(args.field, { collapseWhitespace: true }) ?? + normalizeOptionalString(args.name, { collapseWhitespace: true }) ?? + 'field' + ); +} + +export async function createBrowserApprovalRequest( + toolId: BrowserToolId, + args: Record +): Promise { + const operation = parseBrowserOperation(args); + if (!operation) { + return null; + } + + if (toolId === 'browser_observe') { + if (operation === 'screenshot') { + return approval( + `screenshot ${formatTarget(args)}`, + tt('此操作会截取当前网页内容并作为图片附件返回。') + ); + } + + return null; + } + + if (toolId === 'browser_session') { + if (operation === 'start' || operation === 'stop') { + return approval(operation, tt('此操作会启动或停止浏览器自动化会话。')); + } + + return null; + } + + if (toolId !== 'browser_act') { + return null; + } + + if (operation === 'scroll' || operation === 'wait') { + return null; + } + + if (operation === 'navigate') { + const rawUrl = normalizeOptionalString(args.url, { collapseWhitespace: true }) ?? ''; + const targetUrl = rawUrl ? redactUrl(rawUrl) : 'missing URL'; + return approval(`navigate ${targetUrl}`, tt('此操作会让浏览器打开或切换到新的网页。')); + } + + if (operation === 'back' || operation === 'forward' || operation === 'reload') { + return approval( + `${operation} ${formatTarget(args)}`, + tt('此操作会改变当前网页的浏览状态。') + ); + } + + if (operation === 'screenshot') { + return approval( + `screenshot ${formatTarget(args)}`, + tt('此操作会截取当前网页内容并作为图片附件返回。') + ); + } + + if (operation === 'fill') { + const field = formatFieldLabel(args); + return approval( + `fill ${formatTarget(args)} ${redactBrowserText(field)}=[redacted]`, + tt('此操作会向网页表单输入内容。') + ); + } + + if (operation === 'fill_form') { + return approval(`fill_form ${formatTarget(args)}`, tt('此操作会向网页表单输入一组内容。')); + } + + if (operation === 'type') { + return approval(`type ${formatTarget(args)}`, tt('此操作会在网页中输入文本。')); + } + + if (operation === 'click' || operation === 'press_key') { + return approval( + `${operation} ${formatTarget(args)}`, + tt('此操作会与网页交互,可能触发提交、导航或状态变更。') + ); + } + + return null; +} diff --git a/apps/desktop/src/services/BuiltInToolService/tools/browser/config.ts b/apps/desktop/src/services/BuiltInToolService/tools/browser/config.ts new file mode 100644 index 00000000..03e180d0 --- /dev/null +++ b/apps/desktop/src/services/BuiltInToolService/tools/browser/config.ts @@ -0,0 +1,67 @@ +import { t } from '@/i18n'; + +export type BrowserAutomationMode = 'default' | 'custom'; + +export interface BrowserAutomationToolConfig { + mode: BrowserAutomationMode; + browserId: string; + startupUrl: string; +} + +export const DEFAULT_BROWSER_AUTOMATION_TOOL_CONFIG: BrowserAutomationToolConfig = { + mode: 'default', + browserId: '', + startupUrl: '', +}; + +export function parseBrowserAutomationToolConfig( + configJson: string | null +): BrowserAutomationToolConfig { + if (!configJson) { + return { ...DEFAULT_BROWSER_AUTOMATION_TOOL_CONFIG }; + } + + try { + const parsed = JSON.parse(configJson) as Partial; + return { + ...DEFAULT_BROWSER_AUTOMATION_TOOL_CONFIG, + mode: parsed.mode === 'custom' ? 'custom' : 'default', + browserId: + typeof parsed.browserId === 'string' + ? parsed.browserId + : DEFAULT_BROWSER_AUTOMATION_TOOL_CONFIG.browserId, + startupUrl: + typeof parsed.startupUrl === 'string' + ? parsed.startupUrl + : DEFAULT_BROWSER_AUTOMATION_TOOL_CONFIG.startupUrl, + }; + } catch { + return { ...DEFAULT_BROWSER_AUTOMATION_TOOL_CONFIG }; + } +} + +export function serializeBrowserAutomationToolConfig(config: BrowserAutomationToolConfig): string { + return JSON.stringify({ + mode: config.mode, + browserId: config.browserId.trim(), + startupUrl: config.startupUrl.trim(), + }); +} + +export function getBrowserAutomationStartupUrlError(config: BrowserAutomationToolConfig): string { + const startupUrl = config.startupUrl.trim(); + if (!startupUrl) { + return ''; + } + + try { + const url = new URL(startupUrl); + if (url.protocol === 'http:' || url.protocol === 'https:') { + return ''; + } + } catch { + // handled below + } + + return t('settings.builtInTools.browser.startupUrlInvalid'); +} diff --git a/apps/desktop/src/services/BuiltInToolService/tools/browser/constants.ts b/apps/desktop/src/services/BuiltInToolService/tools/browser/constants.ts new file mode 100644 index 00000000..835a5454 --- /dev/null +++ b/apps/desktop/src/services/BuiltInToolService/tools/browser/constants.ts @@ -0,0 +1,160 @@ +import type { AiToolDefinition } from '@/services/AgentService/contracts/tooling'; + +export const BROWSER_SESSION_TOOL_ID = 'browser_session'; +export const BROWSER_OBSERVE_TOOL_ID = 'browser_observe'; +export const BROWSER_ACT_TOOL_ID = 'browser_act'; + +export const BROWSER_SESSION_OPERATIONS = ['status', 'start', 'stop'] as const; + +export const BROWSER_OBSERVE_OPERATIONS = ['current', 'tabs', 'screenshot', 'dom'] as const; + +export const BROWSER_ACT_OPERATIONS = [ + 'navigate', + 'click', + 'type', + 'fill', + 'fill_form', + 'press_key', + 'scroll', + 'wait', + 'back', + 'forward', + 'reload', +] as const; + +const TAB_ID_PROPERTY = { + type: 'string', + description: 'Optional browser tab id. Defaults to the selected tab.', +}; + +const REF_PROPERTY = { + type: 'string', + description: 'Stable element ref returned by browser_observe snapshot.', +}; + +export const BROWSER_SESSION_TOOL_DESCRIPTION = [ + 'Manage the native browser automation session.', + 'Use this for managed browser status, launch, and stop.', + 'This tool does not expose raw CDP or JavaScript evaluation.', +].join(' '); + +export const BROWSER_OBSERVE_TOOL_DESCRIPTION = [ + 'Observe the selected browser tab through the native browser runtime.', + 'Returns compact redacted current-tab state, tab list, screenshots, or DOM-like snapshots.', + 'Screenshot base64 is never returned to the model.', +].join(' '); + +export const BROWSER_ACT_TOOL_DESCRIPTION = [ + 'Interact with elements in the selected browser tab using refs from browser_observe.', + 'Supports navigation, clicks, typing, form filling, key presses, scrolling, waits, history actions, and reloads.', + 'Use browser_observe after actions to verify page state.', +].join(' '); + +export const BROWSER_SESSION_TOOL_INPUT_SCHEMA: AiToolDefinition['input_schema'] = { + type: 'object', + properties: { + operation: { + type: 'string', + enum: [...BROWSER_SESSION_OPERATIONS], + description: 'Browser session operation to perform.', + }, + browserId: { + type: 'string', + description: + 'Optional safe browser id such as chrome or edge for managed browser start.', + }, + startupUrl: { + type: 'string', + description: 'Optional startup URL for the managed browser start operation.', + }, + }, + required: ['operation'], + additionalProperties: false, +}; + +export const BROWSER_OBSERVE_TOOL_INPUT_SCHEMA: AiToolDefinition['input_schema'] = { + type: 'object', + properties: { + operation: { + type: 'string', + enum: [...BROWSER_OBSERVE_OPERATIONS], + description: 'Observation operation to perform.', + }, + tabId: TAB_ID_PROPERTY, + includeConsole: { + type: 'boolean', + description: 'Include a compact recent console summary for the selected tab.', + }, + includeNetwork: { + type: 'boolean', + description: + 'Include a compact recent failed/error network summary for the selected tab.', + }, + }, + required: ['operation'], + additionalProperties: false, +}; + +export const BROWSER_ACT_TOOL_INPUT_SCHEMA: AiToolDefinition['input_schema'] = { + type: 'object', + properties: { + operation: { + type: 'string', + enum: [...BROWSER_ACT_OPERATIONS], + description: 'Browser action operation to perform.', + }, + tabId: TAB_ID_PROPERTY, + ref: REF_PROPERTY, + navigationToken: { + type: 'string', + description: + 'Navigation token returned with observed refs. Required when acting on refs.', + }, + url: { + type: 'string', + description: 'URL for navigate operation.', + }, + text: { + type: 'string', + description: 'Text for type operation.', + }, + value: { + type: 'string', + description: 'Value for fill operation.', + }, + fields: { + type: 'array', + description: 'Fields for fill_form operation.', + items: { + type: 'object', + properties: { + ref: REF_PROPERTY, + navigationToken: { + type: 'string', + description: 'Navigation token returned with the field ref.', + }, + value: { + type: 'string', + description: 'Value to fill into the field.', + }, + }, + required: ['ref', 'navigationToken', 'value'], + additionalProperties: false, + }, + }, + key: { + type: 'string', + description: 'Keyboard key for press_key operation.', + }, + deltaX: { type: 'number' }, + deltaY: { type: 'number' }, + timeoutMs: { + type: 'integer', + minimum: 100, + maximum: 120000, + description: 'Timeout for wait operation.', + }, + }, + required: ['operation'], + additionalProperties: false, +}; diff --git a/apps/desktop/src/services/BuiltInToolService/tools/browser/format.ts b/apps/desktop/src/services/BuiltInToolService/tools/browser/format.ts new file mode 100644 index 00000000..e0852c31 --- /dev/null +++ b/apps/desktop/src/services/BuiltInToolService/tools/browser/format.ts @@ -0,0 +1,120 @@ +import type { AttachmentIndex } from '@/services/AgentService/infrastructure/attachments'; +import { truncateText } from '@/utils/text'; + +import { formatRedactedJson, redactBrowserValue } from './redaction'; + +function isRecord(value: unknown): value is Record { + return Boolean(value) && typeof value === 'object' && !Array.isArray(value); +} + +function getString(value: Record, keys: string[]): string | null { + for (const key of keys) { + const entry = value[key]; + if (typeof entry === 'string' && entry.trim()) { + return entry; + } + } + + return null; +} + +function isScreenshotPayloadKey(key: string): boolean { + return ['base64', 'dataUrl', 'data_url', 'screenshotBase64', 'screenshot_base64'].includes(key); +} + +function isImplementationDetailKey(key: string): boolean { + return key === 'endpoint'; +} + +function stripScreenshotPayloads(value: unknown): unknown { + if (Array.isArray(value)) { + return value.map(stripScreenshotPayloads); + } + + if (!isRecord(value)) { + return value; + } + + return Object.fromEntries( + Object.entries(value) + .filter(([key]) => !isScreenshotPayloadKey(key) && !isImplementationDetailKey(key)) + .map(([key, entry]) => [key, stripScreenshotPayloads(entry)]) + ); +} + +function formatScreenshotResponse(response: Record): { + result: string; + attachments?: AttachmentIndex[]; +} { + const path = getString(response, ['path', 'filePath', 'file_path']); + const mimeType = getString(response, ['mimeType', 'mime_type']) ?? 'image/png'; + const width = typeof response.width === 'number' ? response.width : null; + const height = typeof response.height === 'number' ? response.height : null; + const dimensions = width && height ? `${width}x${height}` : 'unknown dimensions'; + const hasBase64 = + typeof response.base64 === 'string' || + typeof response.screenshotBase64 === 'string' || + typeof response.screenshot_base64 === 'string' || + typeof response.dataUrl === 'string' || + typeof response.data_url === 'string'; + + if (path) { + return { + result: [ + '', + `path: ${path}`, + `mimeType: ${mimeType}`, + `dimensions: ${dimensions}`, + hasBase64 ? 'base64 suppressed from model-visible result' : null, + '', + ] + .filter(Boolean) + .join('\n'), + attachments: [ + { + id: `browser-screenshot-${Date.now()}`, + type: 'image', + path, + originPath: path, + name: path.split(/[\\/]/).pop() || 'browser-screenshot.png', + mimeType, + supportStatus: 'supported', + }, + ], + }; + } + + return { + result: [ + '', + `mimeType: ${mimeType}`, + `dimensions: ${dimensions}`, + hasBase64 + ? 'artifact: screenshot captured; base64 suppressed from model-visible result' + : 'artifact: screenshot metadata returned without local path', + '', + ].join('\n'), + }; +} + +export function formatBrowserToolResult( + operation: string, + response: unknown +): { + result: string; + attachments?: AttachmentIndex[]; +} { + if (operation === 'screenshot' && isRecord(response)) { + return formatScreenshotResponse(response); + } + + const result = formatRedactedJson(stripScreenshotPayloads(response)); + return { + result: truncateText(result, 20000), + }; +} + +export function formatBrowserToolError(error: unknown): string { + const message = error instanceof Error ? error.message : String(error); + return String(redactBrowserValue(message)); +} diff --git a/apps/desktop/src/services/BuiltInToolService/tools/browser/index.ts b/apps/desktop/src/services/BuiltInToolService/tools/browser/index.ts new file mode 100644 index 00000000..244bdcdb --- /dev/null +++ b/apps/desktop/src/services/BuiltInToolService/tools/browser/index.ts @@ -0,0 +1,425 @@ +import { native } from '@services/NativeService'; + +import type { ToolApprovalRequest } from '@/services/AgentService/contracts/tooling'; + +import { + type BaseBuiltInToolExecutionContext, + BuiltInTool, + type BuiltInToolConversationSemantic, + type BuiltInToolExecutionResult, + type BuiltInToolGroup, +} from '../../types'; +import { createBrowserApprovalRequest } from './approval'; +import { + type BrowserAutomationToolConfig, + DEFAULT_BROWSER_AUTOMATION_TOOL_CONFIG, + parseBrowserAutomationToolConfig, +} from './config'; +import { + BROWSER_ACT_OPERATIONS, + BROWSER_ACT_TOOL_DESCRIPTION, + BROWSER_ACT_TOOL_ID, + BROWSER_ACT_TOOL_INPUT_SCHEMA, + BROWSER_OBSERVE_OPERATIONS, + BROWSER_OBSERVE_TOOL_DESCRIPTION, + BROWSER_OBSERVE_TOOL_ID, + BROWSER_OBSERVE_TOOL_INPUT_SCHEMA, + BROWSER_SESSION_OPERATIONS, + BROWSER_SESSION_TOOL_DESCRIPTION, + BROWSER_SESSION_TOOL_ID, + BROWSER_SESSION_TOOL_INPUT_SCHEMA, +} from './constants'; +import { formatBrowserToolError, formatBrowserToolResult } from './format'; +import { browserOperationForSemantic, requireBrowserOperation } from './operation'; + +export type BrowserToolId = + | typeof BROWSER_SESSION_TOOL_ID + | typeof BROWSER_OBSERVE_TOOL_ID + | typeof BROWSER_ACT_TOOL_ID; + +type BrowserToolConfig = BrowserAutomationToolConfig; +type BrowserSessionOperation = (typeof BROWSER_SESSION_OPERATIONS)[number]; +type BrowserObserveOperation = (typeof BROWSER_OBSERVE_OPERATIONS)[number]; +type BrowserActOperation = (typeof BROWSER_ACT_OPERATIONS)[number]; +type NativeBrowserObserveOperation = 'state' | 'snapshot' | 'screenshot'; +type NativeBrowserActOperation = + | 'click' + | 'type' + | 'fill' + | 'fill_form' + | 'press_key' + | 'scroll' + | 'wait'; + +function isOneOf(value: string, candidates: T): value is T[number] { + return candidates.includes(value as T[number]); +} + +function requireKnownOperation( + toolId: BrowserToolId, + args: Record, + candidates: T +): T[number] { + const operation = requireBrowserOperation(toolId, args); + if (!isOneOf(operation, candidates)) { + throw new Error(`Unsupported ${toolId} operation: ${operation}`); + } + + return operation; +} + +function stringArg(args: Record, key: string): string | undefined { + const value = args[key]; + return typeof value === 'string' && value.trim() ? value : undefined; +} + +function numberArg(args: Record, key: string): number | undefined { + const value = args[key]; + return typeof value === 'number' && Number.isFinite(value) ? value : undefined; +} + +function booleanArg(args: Record, key: string): boolean | undefined { + const value = args[key]; + return typeof value === 'boolean' ? value : undefined; +} + +function rejectHiddenObserveFields(args: Record): void { + for (const key of ['includeScreenshot', 'includeDom']) { + if (Object.prototype.hasOwnProperty.call(args, key)) { + throw new Error(`browser_observe does not accept hidden field ${key}`); + } + } +} + +function stringValueArg(args: Record, key: string): string | undefined { + const value = args[key]; + return typeof value === 'string' ? value : undefined; +} + +function optionalConfigString(value: string): string | undefined { + const trimmed = value.trim(); + return trimmed ? trimmed : undefined; +} + +function normalizedFormFieldsArg( + args: Record, + key: string +): Array> | undefined { + const value = args[key]; + if (!Array.isArray(value)) { + return undefined; + } + + const normalized = value + .filter( + (field): field is Record => + Boolean(field) && typeof field === 'object' && !Array.isArray(field) + ) + .map((field) => { + const refId = stringArg(field, 'refId') ?? stringArg(field, 'ref'); + const navigationToken = stringArg(field, 'navigationToken'); + const value = stringValueArg(field, 'value'); + if (!refId || !navigationToken || value === undefined) { + return null; + } + + return { refId, navigationToken, value }; + }) + .filter((field): field is { refId: string; navigationToken: string; value: string } => + Boolean(field) + ); + + return normalized.length > 0 ? normalized : undefined; +} + +function compactRecord>(value: T): T { + return Object.fromEntries( + Object.entries(value).filter((entry): entry is [string, unknown] => entry[1] !== undefined) + ) as T; +} + +function success(operation: string, response: unknown): BuiltInToolExecutionResult { + const formatted = formatBrowserToolResult(operation, response); + return { + result: formatted.result, + attachments: formatted.attachments, + isError: false, + status: 'success', + }; +} + +function errorResult(error: unknown): BuiltInToolExecutionResult { + const message = formatBrowserToolError(error); + return { + result: `Browser tool failed: ${message}`, + isError: true, + status: 'error', + errorMessage: message, + }; +} + +function semantic(action: BuiltInToolConversationSemantic['action'], target: string) { + return { action, target }; +} + +function nativeObserveOperation(operation: BrowserObserveOperation): NativeBrowserObserveOperation { + switch (operation) { + case 'current': + case 'tabs': + return 'state'; + case 'dom': + return 'snapshot'; + case 'screenshot': + return 'screenshot'; + } +} + +function isNativeActOperation( + operation: BrowserActOperation +): operation is NativeBrowserActOperation { + return ( + operation === 'click' || + operation === 'type' || + operation === 'fill' || + operation === 'fill_form' || + operation === 'press_key' || + operation === 'scroll' || + operation === 'wait' + ); +} + +export async function executeBrowserSessionTool( + args: Record, + config: BrowserToolConfig, + _context: BaseBuiltInToolExecutionContext +): Promise { + void _context; + + try { + const operation: BrowserSessionOperation = requireKnownOperation( + 'browser_session', + args, + BROWSER_SESSION_OPERATIONS + ); + + switch (operation) { + case 'status': + return success(operation, await native.browser.status()); + case 'start': + return success( + operation, + await native.browser.start({ + browserId: + stringArg(args, 'browserId') ?? + (config.mode === 'custom' + ? optionalConfigString(config.browserId) + : undefined), + startupUrl: + stringArg(args, 'startupUrl') ?? + stringArg(args, 'url') ?? + optionalConfigString(config.startupUrl), + }) + ); + case 'stop': + return success(operation, await native.browser.stop()); + default: + throw new Error(`Unsupported browser_session operation: ${operation}`); + } + } catch (error) { + return errorResult(error); + } +} + +export async function executeBrowserObserveTool( + args: Record, + _config: BrowserToolConfig, + _context: BaseBuiltInToolExecutionContext +): Promise { + void _config; + void _context; + + try { + const operation: BrowserObserveOperation = requireKnownOperation( + 'browser_observe', + args, + BROWSER_OBSERVE_OPERATIONS + ); + + rejectHiddenObserveFields(args); + + return success( + operation, + await native.browser.observe({ + ...compactRecord({ + operation: nativeObserveOperation(operation), + tabId: stringArg(args, 'tabId'), + includeConsole: booleanArg(args, 'includeConsole'), + includeNetwork: booleanArg(args, 'includeNetwork'), + }), + }) + ); + } catch (error) { + return errorResult(error); + } +} + +export async function executeBrowserActTool( + args: Record, + _config: BrowserToolConfig, + _context: BaseBuiltInToolExecutionContext +): Promise { + void _config; + void _context; + + try { + const operation: BrowserActOperation = requireKnownOperation( + 'browser_act', + args, + BROWSER_ACT_OPERATIONS + ); + + if (operation === 'navigate') { + return success( + operation, + await native.browser.navigate({ + ...compactRecord({ + url: String(args.url ?? ''), + tabId: stringArg(args, 'tabId'), + }), + }) + ); + } + + if (operation === 'back') { + return success( + operation, + await native.browser.back({ tabId: stringArg(args, 'tabId') }) + ); + } + + if (operation === 'forward') { + return success( + operation, + await native.browser.forward({ tabId: stringArg(args, 'tabId') }) + ); + } + + if (operation === 'reload') { + return success( + operation, + await native.browser.reload({ tabId: stringArg(args, 'tabId') }) + ); + } + + if (!isNativeActOperation(operation)) { + throw new Error(`Unsupported browser_act operation: ${operation}`); + } + + return success( + operation, + await native.browser.act({ + ...compactRecord({ + action: operation, + tabId: stringArg(args, 'tabId'), + ref: stringArg(args, 'ref'), + refId: stringArg(args, 'refId'), + targetRef: stringArg(args, 'targetRef'), + navigationToken: stringArg(args, 'navigationToken'), + text: stringValueArg(args, 'text'), + value: stringValueArg(args, 'value'), + fields: normalizedFormFieldsArg(args, 'fields'), + key: stringArg(args, 'key'), + deltaX: numberArg(args, 'deltaX'), + deltaY: numberArg(args, 'deltaY'), + timeoutMs: numberArg(args, 'timeoutMs'), + }), + }) + ); + } catch (error) { + return errorResult(error); + } +} + +abstract class BrowserTool extends BuiltInTool { + readonly defaultConfig: BrowserToolConfig = DEFAULT_BROWSER_AUTOMATION_TOOL_CONFIG; + + override parseConfig(configJson: string | null): BrowserToolConfig { + return parseBrowserAutomationToolConfig(configJson); + } + + override buildApprovalRequest( + args: Record + ): Promise { + return createBrowserApprovalRequest(this.id as BrowserToolId, args); + } +} + +class BrowserSessionTool extends BrowserTool { + readonly id = BROWSER_SESSION_TOOL_ID; + readonly displayName = 'BrowserSession'; + readonly description = BROWSER_SESSION_TOOL_DESCRIPTION; + readonly inputSchema = BROWSER_SESSION_TOOL_INPUT_SCHEMA; + + override buildConversationSemantic(args: Record) { + return semantic('process', `browser ${browserOperationForSemantic(args, 'status')}`); + } + + override execute( + args: Record, + config: BrowserToolConfig, + context: BaseBuiltInToolExecutionContext + ) { + return executeBrowserSessionTool(args, config, context); + } +} + +class BrowserObserveTool extends BrowserTool { + readonly id = BROWSER_OBSERVE_TOOL_ID; + readonly displayName = 'BrowserObserve'; + readonly description = BROWSER_OBSERVE_TOOL_DESCRIPTION; + readonly inputSchema = BROWSER_OBSERVE_TOOL_INPUT_SCHEMA; + + override buildConversationSemantic(args: Record) { + return semantic('read', `browser ${browserOperationForSemantic(args, 'current')}`); + } + + override execute( + args: Record, + config: BrowserToolConfig, + context: BaseBuiltInToolExecutionContext + ) { + return executeBrowserObserveTool(args, config, context); + } +} + +class BrowserActTool extends BrowserTool { + readonly id = BROWSER_ACT_TOOL_ID; + readonly displayName = 'BrowserAct'; + readonly description = BROWSER_ACT_TOOL_DESCRIPTION; + readonly inputSchema = BROWSER_ACT_TOOL_INPUT_SCHEMA; + + override buildConversationSemantic(args: Record) { + return semantic('process', `browser ${browserOperationForSemantic(args, 'act')}`); + } + + override execute( + args: Record, + config: BrowserToolConfig, + context: BaseBuiltInToolExecutionContext + ) { + return executeBrowserActTool(args, config, context); + } +} + +export const browserSessionTool = new BrowserSessionTool(); +export const browserObserveTool = new BrowserObserveTool(); +export const browserActTool = new BrowserActTool(); +export const builtInTools: BuiltInToolGroup = [ + browserSessionTool, + browserObserveTool, + browserActTool, +]; + +export { createBrowserApprovalRequest } from './approval'; +export { formatBrowserToolResult } from './format'; +export { redactBrowserValue } from './redaction'; diff --git a/apps/desktop/src/services/BuiltInToolService/tools/browser/operation.ts b/apps/desktop/src/services/BuiltInToolService/tools/browser/operation.ts new file mode 100644 index 00000000..6eb5d996 --- /dev/null +++ b/apps/desktop/src/services/BuiltInToolService/tools/browser/operation.ts @@ -0,0 +1,24 @@ +import { normalizeOptionalString } from '@/utils/text'; + +export function parseBrowserOperation(args: Record): string | null { + return normalizeOptionalString(args.operation, { collapseWhitespace: true }) ?? null; +} + +export function requireBrowserOperation( + toolId: 'browser_session' | 'browser_observe' | 'browser_act', + args: Record +): string { + const operation = parseBrowserOperation(args); + if (!operation) { + throw new Error(`Missing required ${toolId} operation`); + } + + return operation; +} + +export function browserOperationForSemantic( + args: Record, + fallback: string +): string { + return parseBrowserOperation(args) ?? fallback; +} diff --git a/apps/desktop/src/services/BuiltInToolService/tools/browser/redaction.ts b/apps/desktop/src/services/BuiltInToolService/tools/browser/redaction.ts new file mode 100644 index 00000000..ac43398d --- /dev/null +++ b/apps/desktop/src/services/BuiltInToolService/tools/browser/redaction.ts @@ -0,0 +1,106 @@ +const REDACTED = '[redacted]'; +const EMAIL_RE = /\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}\b/gi; +const URL_RE = /\bhttps?:\/\/[^\s<>"'`]+/gi; +const CODE_LIKE_RE = /\b(?:[a-f0-9]{24,}|[A-Z0-9]{32,}|\d{6,8})\b/gi; +const SECRET_ASSIGNMENT_RE = + /\b(token|password|passwd|secret|api[_-]?key|authorization|bearer|otp|code)=([^\s&]+)/gi; + +function isRecord(value: unknown): value is Record { + return Boolean(value) && typeof value === 'object' && !Array.isArray(value); +} + +function isCredentialKey(key: string): boolean { + return /(password|passwd|secret|token|api[_-]?key|authorization|credential|cookie|otp|code)/i.test( + key + ); +} + +function hasCredentialFieldHint(value: Record): boolean { + return ['field', 'name', 'label', 'placeholder', 'ref', 'selector', 'type'].some((key) => { + const candidate = value[key]; + return typeof candidate === 'string' && isCredentialKey(candidate); + }); +} + +export function redactUrl(value: string): string { + try { + const parsed = new URL(value); + parsed.search = ''; + parsed.hash = ''; + return parsed.toString(); + } catch { + return value.replace(URL_RE, (match) => { + try { + const parsed = new URL(match); + parsed.search = ''; + parsed.hash = ''; + return parsed.toString(); + } catch { + return match; + } + }); + } +} + +export function redactBrowserText(value: string): string { + return value + .replace(URL_RE, (match) => redactUrl(match)) + .replace(EMAIL_RE, '[redacted-email]') + .replace(SECRET_ASSIGNMENT_RE, (_match, key) => `${key}=${REDACTED}`) + .replace(/\bBearer\s+[A-Za-z0-9._~+/=-]{16,}\b/gi, `Bearer ${REDACTED}`) + .replace(CODE_LIKE_RE, REDACTED); +} + +export function redactBrowserValue(value: unknown, keyHint = ''): unknown { + if (typeof value === 'string') { + if (isCredentialKey(keyHint)) { + return REDACTED; + } + + const redactedUrl = /(^|url|href)$/i.test(keyHint) ? redactUrl(value) : value; + return redactBrowserText(redactedUrl); + } + + if (Array.isArray(value)) { + return value.map((item) => redactBrowserValue(item, keyHint)); + } + + if (isRecord(value)) { + const credentialField = hasCredentialFieldHint(value); + const output: Record = {}; + for (const [key, entry] of Object.entries(value)) { + if ( + key === 'base64' || + key === 'dataUrl' || + key === 'data_url' || + key === 'screenshotBase64' || + key === 'screenshot_base64' + ) { + output[key] = '[suppressed]'; + continue; + } + + if ((key === 'value' || key === 'text') && credentialField) { + output[key] = REDACTED; + continue; + } + + output[key] = redactBrowserValue(entry, key); + } + return output; + } + + return value; +} + +export function formatRedactedJson(value: unknown): string { + return JSON.stringify(redactBrowserValue(value), null, 2); +} + +export function redactCredentialFieldValue(field: string | undefined, value: unknown): unknown { + if (!field || !isCredentialKey(field)) { + return redactBrowserValue(value); + } + + return REDACTED; +} diff --git a/apps/desktop/src/services/BuiltInToolService/types.ts b/apps/desktop/src/services/BuiltInToolService/types.ts index 68f1572e..5bb39e88 100644 --- a/apps/desktop/src/services/BuiltInToolService/types.ts +++ b/apps/desktop/src/services/BuiltInToolService/types.ts @@ -26,7 +26,10 @@ export type BuiltInToolId = | 'upgrade_model' | 'show_widget' | 'visualize_read_me' - | 'ask_user_question'; + | 'ask_user_question' + | 'browser_session' + | 'browser_observe' + | 'browser_act'; /** * 所有内置工具共享的最小运行时上下文。 diff --git a/apps/desktop/src/services/NativeService/browser.ts b/apps/desktop/src/services/NativeService/browser.ts new file mode 100644 index 00000000..cea65935 --- /dev/null +++ b/apps/desktop/src/services/NativeService/browser.ts @@ -0,0 +1,46 @@ +import { invoke } from '@tauri-apps/api/core'; + +import type { + BrowserActRequest, + BrowserActResponse, + BrowserNavigationRequest, + BrowserObserveRequest, + BrowserObserveResponse, + BrowserSessionResponse, + BrowserStartRequest, + BrowserStatusResponse, + BrowserTabRequest, +} from './types'; + +/** + * Native browser automation bridge. + */ +export const browser = { + status(): Promise { + return invoke('browser_status'); + }, + start(request: BrowserStartRequest): Promise { + return invoke('browser_start', { request }); + }, + stop(): Promise { + return invoke('browser_stop'); + }, + navigate(request: BrowserNavigationRequest): Promise { + return invoke('browser_navigate', { request }); + }, + back(request: BrowserTabRequest = {}): Promise { + return invoke('browser_back', { request }); + }, + forward(request: BrowserTabRequest = {}): Promise { + return invoke('browser_forward', { request }); + }, + reload(request: BrowserTabRequest = {}): Promise { + return invoke('browser_reload', { request }); + }, + observe(request: BrowserObserveRequest): Promise { + return invoke('browser_observe', { request }); + }, + act(request: BrowserActRequest): Promise { + return invoke('browser_act', { request }); + }, +} as const; diff --git a/apps/desktop/src/services/NativeService/index.ts b/apps/desktop/src/services/NativeService/index.ts index b6b6f7fb..4cc78738 100644 --- a/apps/desktop/src/services/NativeService/index.ts +++ b/apps/desktop/src/services/NativeService/index.ts @@ -1,4 +1,5 @@ import { autostart } from './autostart'; +import { browser } from './browser'; import { builtInTools } from './builtInTools'; import { clipboard } from './clipboard'; import { database } from './database'; @@ -27,6 +28,16 @@ export type { AppUpdateDownload, AppUpdateInfo, AppUpdateRequirement, + BrowserActRequest, + BrowserActResponse, + BrowserNavigationRequest, + BrowserObserveOperation, + BrowserObserveRequest, + BrowserObserveResponse, + BrowserSessionResponse, + BrowserStartRequest, + BrowserStatusResponse, + BrowserTabRequest, BuiltInBashExecutionRequest, BuiltInBashExecutionResponse, ClipboardPayload, @@ -42,6 +53,7 @@ export type { export { autostart, + browser, builtInTools, clipboard, database, @@ -59,6 +71,7 @@ export const native = { window, shortcut, autostart, + browser, clipboard, builtInTools, log, diff --git a/apps/desktop/src/services/NativeService/types.ts b/apps/desktop/src/services/NativeService/types.ts index a3a52379..8c551c3c 100644 --- a/apps/desktop/src/services/NativeService/types.ts +++ b/apps/desktop/src/services/NativeService/types.ts @@ -219,3 +219,56 @@ export interface QuickSearchResult { total_results: number; next_offset: number; } + +export interface BrowserStartRequest { + browserId?: string | null; + startupUrl?: string | null; +} + +export interface BrowserNavigationRequest { + url: string; + tabId?: string | null; +} + +export interface BrowserTabRequest { + tabId?: string | null; +} + +export type BrowserObserveOperation = 'state' | 'snapshot' | 'screenshot'; + +export interface BrowserObserveRequest { + operation: BrowserObserveOperation; + tabId?: string | null; + includeConsole?: boolean | null; + includeNetwork?: boolean | null; +} + +export type BrowserActOperation = + | 'click' + | 'type' + | 'fill' + | 'fill_form' + | 'press_key' + | 'scroll' + | 'wait'; + +export interface BrowserActRequest { + action: BrowserActOperation; + tabId?: string | null; + ref?: string | null; + refId?: string | null; + targetRef?: string | null; + navigationToken?: string | null; + text?: string | null; + value?: string | null; + fields?: Array> | null; + key?: string | null; + deltaX?: number | null; + deltaY?: number | null; + timeoutMs?: number | null; +} + +export type BrowserStatusResponse = Record; +export type BrowserSessionResponse = Record; +export type BrowserObserveResponse = Record; +export type BrowserActResponse = Record; diff --git a/apps/desktop/src/views/SettingsView/components/BuiltInTools/browserToolGroup.ts b/apps/desktop/src/views/SettingsView/components/BuiltInTools/browserToolGroup.ts new file mode 100644 index 00000000..ebdf8ff5 --- /dev/null +++ b/apps/desktop/src/views/SettingsView/components/BuiltInTools/browserToolGroup.ts @@ -0,0 +1,23 @@ +// Copyright (c) 2026. 千诚. Licensed under GPL v3 + +import type { BuiltInToolEntity } from './types'; +import { isBrowserAutomationToolId } from './types'; + +export function getBrowserAutomationTools(tools: BuiltInToolEntity[]): BuiltInToolEntity[] { + return tools.filter((tool) => isBrowserAutomationToolId(tool.tool_id)); +} + +export function getBuiltInToolUpdateTargets( + tools: BuiltInToolEntity[], + tool: BuiltInToolEntity | null | undefined +): BuiltInToolEntity[] { + if (!tool) { + return []; + } + + if (isBrowserAutomationToolId(tool.tool_id)) { + return getBrowserAutomationTools(tools); + } + + return [tool]; +} diff --git a/apps/desktop/src/views/SettingsView/components/BuiltInTools/components/BrowserAutomationToolConfig.vue b/apps/desktop/src/views/SettingsView/components/BuiltInTools/components/BrowserAutomationToolConfig.vue new file mode 100644 index 00000000..64eee9b6 --- /dev/null +++ b/apps/desktop/src/views/SettingsView/components/BuiltInTools/components/BrowserAutomationToolConfig.vue @@ -0,0 +1,122 @@ + + + + + diff --git a/apps/desktop/src/views/SettingsView/components/BuiltInTools/components/BuiltInToolConfig.vue b/apps/desktop/src/views/SettingsView/components/BuiltInTools/components/BuiltInToolConfig.vue index 71017769..457a4913 100644 --- a/apps/desktop/src/views/SettingsView/components/BuiltInTools/components/BuiltInToolConfig.vue +++ b/apps/desktop/src/views/SettingsView/components/BuiltInTools/components/BuiltInToolConfig.vue @@ -9,12 +9,18 @@ import type { BuiltInToolEntity, BuiltInToolUpdateData } from '../types'; import { type BashToolConfig as BashToolConfigValue, + type BrowserAutomationToolConfig as BrowserAutomationToolConfigValue, + getBrowserAutomationStartupUrlError, + isBrowserAutomationToolId, parseBashToolConfig, + parseBrowserAutomationToolConfig, parseUpgradeModelToolConfig, + serializeBrowserAutomationToolConfig, type UpgradeModelToolConfig as UpgradeModelToolConfigValue, usesBuiltInToolEmptyConfig, } from '../types'; import BashToolConfig from './BashToolConfig.vue'; + import BrowserAutomationToolConfig from './BrowserAutomationToolConfig.vue'; import UpgradeModelToolConfig from './UpgradeModelToolConfig.vue'; interface Props { tool: BuiltInToolEntity; @@ -29,15 +35,27 @@ const emit = defineEmits(); const bashConfig = ref(parseBashToolConfig(props.tool.config_json)); + const browserAutomationConfig = ref( + parseBrowserAutomationToolConfig(props.tool.config_json) + ); const upgradeModelConfig = ref( parseUpgradeModelToolConfig(props.tool.config_json) ); let autoSaveTimer: ReturnType | null = null; + function clearAutoSaveTimer() { + if (autoSaveTimer) { + clearTimeout(autoSaveTimer); + autoSaveTimer = null; + } + } + watch( () => props.tool, (tool) => { + clearAutoSaveTimer(); bashConfig.value = parseBashToolConfig(tool.config_json); + browserAutomationConfig.value = parseBrowserAutomationToolConfig(tool.config_json); upgradeModelConfig.value = parseUpgradeModelToolConfig(tool.config_json); }, { deep: true } @@ -47,25 +65,53 @@ () => JSON.stringify(bashConfig.value), (nextConfigJson) => { if (props.tool.tool_id !== 'bash') { - if (autoSaveTimer) { - clearTimeout(autoSaveTimer); - autoSaveTimer = null; - } + clearAutoSaveTimer(); return; } if (nextConfigJson === JSON.stringify(parseBashToolConfig(props.tool.config_json))) { - if (autoSaveTimer) { - clearTimeout(autoSaveTimer); - autoSaveTimer = null; - } + clearAutoSaveTimer(); return; } - if (autoSaveTimer) { - clearTimeout(autoSaveTimer); + clearAutoSaveTimer(); + + autoSaveTimer = setTimeout(() => { + emit('save', { + config_json: nextConfigJson, + }); + autoSaveTimer = null; + }, 450); + } + ); + + watch( + () => JSON.stringify(browserAutomationConfig.value), + () => { + if (!isBrowserAutomationToolId(props.tool.tool_id)) { + clearAutoSaveTimer(); + return; + } + + if (getBrowserAutomationStartupUrlError(browserAutomationConfig.value)) { + clearAutoSaveTimer(); + return; + } + + const currentConfigJson = serializeBrowserAutomationToolConfig( + parseBrowserAutomationToolConfig(props.tool.config_json) + ); + const nextConfigJson = serializeBrowserAutomationToolConfig( + browserAutomationConfig.value + ); + + if (nextConfigJson === currentConfigJson) { + clearAutoSaveTimer(); + return; } + clearAutoSaveTimer(); + autoSaveTimer = setTimeout(() => { emit('save', { config_json: nextConfigJson, @@ -79,6 +125,7 @@ () => JSON.stringify(upgradeModelConfig.value), () => { if (props.tool.tool_id !== 'upgrade_model') { + clearAutoSaveTimer(); return; } @@ -88,16 +135,11 @@ const nextConfigJson = serializeUpgradeModelToolConfig(upgradeModelConfig.value); if (nextConfigJson === currentConfigJson) { - if (autoSaveTimer) { - clearTimeout(autoSaveTimer); - autoSaveTimer = null; - } + clearAutoSaveTimer(); return; } - if (autoSaveTimer) { - clearTimeout(autoSaveTimer); - } + clearAutoSaveTimer(); autoSaveTimer = setTimeout(() => { emit('save', { @@ -109,16 +151,18 @@ ); onUnmounted(() => { - if (autoSaveTimer) { - clearTimeout(autoSaveTimer); - autoSaveTimer = null; - } + clearAutoSaveTimer(); });