From 8b892c054126f4ab9136bbf82b9a093a4a2cd834 Mon Sep 17 00:00:00 2001 From: Jim Pudar Date: Wed, 27 May 2026 06:15:41 -0400 Subject: [PATCH 1/3] Add Cursor CLI spy support --- CURSOR_BUGS.md | 189 ++ extensions/cursor-cli/home-manager.nix | 19 + firewall-vm.nix | 2 + flake.nix | 5 +- proxy/agent_spy.py | 247 ++- proxy/allowed-dns.txt.defaults | 2 + proxy/allowed-https.txt.defaults | 10 + proxy/mitmproxy_addon.py | 5 + proxy/test_agent_spy.py | 137 ++ proxy/test_mitmproxy_addon.py | 17 + .../extensions/pi-plannotator.test.ts | 8 +- src/rootcell/extensions/registry.ts | 13 +- src/rootcell/rootcell.test.ts | 40 +- src/spy/api-contracts.ts | 3 +- src/spy/compaction.test.ts | 47 +- src/spy/compaction.ts | 22 +- src/spy/cursor.test.ts | 347 ++++ src/spy/cursor.ts | 1792 +++++++++++++++++ src/spy/fixtures/README.md | 7 + .../fixtures/cursor-agent-composer-2.5.ndjson | 4 + src/spy/migrations.ts | 27 + src/spy/providers.ts | 11 + src/spy/schemas.test.ts | 4 +- src/spy/schemas.ts | 2 +- src/spy/service.test.ts | 84 +- src/spy/service.ts | 38 +- src/spy/store.test.ts | 203 ++ src/spy/store.ts | 167 +- src/spy/ui/src/App.tsx | 27 +- 29 files changed, 3401 insertions(+), 78 deletions(-) create mode 100644 CURSOR_BUGS.md create mode 100644 extensions/cursor-cli/home-manager.nix create mode 100644 src/spy/cursor.test.ts create mode 100644 src/spy/cursor.ts create mode 100644 src/spy/fixtures/cursor-agent-composer-2.5.ndjson diff --git a/CURSOR_BUGS.md b/CURSOR_BUGS.md new file mode 100644 index 0000000..cab25b6 --- /dev/null +++ b/CURSOR_BUGS.md @@ -0,0 +1,189 @@ +# Cursor Spy Bugs + +## 2026-05-26 22:41 EDT - Fresh Cursor UI verification + +Fresh real run: + +- VM: `jmp` +- CLI: `cursor-agent -p --trust --sandbox enabled --model composer-2.5-fast --output-format stream-json` +- Prompt marker: `CURSOR_UI_VERIFY_DELTA_2243` +- Cursor session: `b0f7c5ec-b883-4ada-81dc-0272ff7cf0f9` +- Cursor request id: `c25955cb-afa3-40d0-924e-aaa6878dc15a` +- CLI model: `Composer 2.5 Fast` +- CLI response: `CURSOR_UI_VERIFY_DELTA_2243` +- CLI usage: `inputTokens=7922`, `outputTokens=58`, `cacheReadTokens=2848`, `cacheWriteTokens=0` +- Spy call: `call-cursor-99a14bd7-49aa-435b-b953-f2eb6ebb12e4` + +Final post-provision run: + +- VM: `jmp` +- CLI: `cursor-agent -p --trust --sandbox enabled --model composer-2.5-fast --output-format stream-json` +- Prompt marker: `CURSOR_UI_VERIFY_FINAL_2321` +- Cursor session: `6ca1c42d-c77c-4b64-aaf4-018b00a469a1` +- Cursor request id: `1e6cbdd8-0f24-4481-9393-7198d720c4aa` +- CLI model: `Composer 2.5 Fast` +- CLI response: `CURSOR_UI_VERIFY_FINAL_2321` +- CLI usage: `inputTokens=7931`, `outputTokens=52`, `cacheReadTokens=2848`, `cacheWriteTokens=0` +- Spy call: `call-cursor-7eb22349-13bb-48ab-ad2f-b91948b1a07a` +- UI showed request system prompt, harness context, current user input, assistant response, redacted auth headers, role-labeled stream events, and Cursor-specific unavailable token records. +- Running `cursor-agent status` afterward did not add unrelated Cursor auth/status traffic to the spy timeline. + +### BUG-001 - Cursor request semantic blocks are missing + +Status: fixed in working tree and verified live at 2026-05-26 22:53 EDT. + +The spy UI shows only one request block for the fresh Cursor call: + +- `Provider Envelope`: `POST api2.cursor.sh/agent.v1.AgentService/RunSSE` + +It does not show the actual current user prompt, Cursor system prompt, tool definitions, rules, skills, MCP context, subagent definitions, or conversation context as request-side semantic blocks. + +Expected: Cursor calls should expose request composition similarly to Bedrock/Pi and Claude Code, including at least current user input and system/tool context when the captured Cursor payload contains it. + +Follow-up verification: + +- Spy call: `call-cursor-182261cf-88d0-4526-b4a5-60b3751f586c` +- Prompt marker: `CURSOR_UI_VERIFY_TOOL_2300` +- Request blocks: 4 +- Request composition now shows: + - provider envelope + - Cursor system prompt + - Cursor harness context / rules / environment + - current user input +- UI Request Blocks section includes the actual prompt: `Use your file listing tool to inspect this directory, then reply with exactly one final line: CURSOR_UI_VERIFY_TOOL_2300` + +### BUG-002 - Cursor token usage is not shown + +Status: fixed in working tree and verified live at 2026-05-27 05:58 EDT. + +The Cursor CLI result reports usage for the same request: + +- `inputTokens=7922` +- `outputTokens=58` +- `cacheReadTokens=2848` +- `cacheWriteTokens=0` + +The spy UI shows `usage n/a`, `read -`, `write -`, and no usage records for the call. + +Current likely cause: Cursor's HTTP response capture does not currently expose a normalized provider usage object, even though the CLI stdout has usage. The live captured HTTP stream for `call-cursor-182261cf-88d0-4526-b4a5-60b3751f586c` has no provider usage object; the UI correctly shows `usage n/a`. + +Final post-provision evidence: `call-cursor-7eb22349-13bb-48ab-ad2f-b91948b1a07a` also has CLI usage (`inputTokens=7931`, `outputTokens=52`, `cacheReadTokens=2848`, `cacheWriteTokens=0`) but the spy UI/API show `Usage Records: No provider usage record`. The final live stream's trailing `line` payload is `{}`, with no `inputTokens`/`outputTokens` object. + +Useful Cursor fallback signals that are available now: + +- Request and response byte sizes. +- Request composition by semantic block type. +- Stream event count. +- Raw payload availability. +- Tool call and tool result blocks. + +Fix: spy now tees decrypted Cursor response stream chunks, persists the raw chunk bytes even when raw payload storage is disabled, reassembles Connect frames, and exposes each raw Connect frame/protobuf payload as stream-event wire data. Provider usage is only a derived annotation from the observed Cursor final usage envelope. + +Final live verification: + +- Spy call: `call-cursor-3825fcc9-d848-4c78-8e23-d6902bd73b15` +- Prompt marker: `RCSPY_CURSOR_WIRE_002` +- CLI usage: `inputTokens=7931`, `outputTokens=61`, `cacheReadTokens=2848`, `cacheWriteTokens=0` +- Spy summary usage: `inputTokens=7931`, `outputTokens=61`, `cacheReadTokens=2848`, `cacheWriteTokens=0`, `totalTokens=10840` +- Stream event ordinal `25` shows `connect-protobuf-frame` with durable raw wire fields `frameB64`, `payloadB64`, and `payloadSha256`. +- The decoded wire tree shows Cursor final usage at `$frame[19].1.14`, with `wireInputTokens=10779`, `outputTokens=61`, `cacheReadTokens=2848`, `cacheWriteTokens=0`. Spy derives displayed input as `10779 - 2848 - 0 = 7931`, matching Cursor CLI. + +### BUG-003 - Cursor tool calls/results are not promoted to response blocks + +Status: fixed in working tree and verified live at 2026-05-26 22:53 EDT. + +The Cursor CLI can emit tool calls and tool results during a `RunSSE` request. Before the fix, those were present only in stream payload/raw payload data, while Response Blocks showed only assistant text. + +Follow-up verification: + +- Spy call: `call-cursor-182261cf-88d0-4526-b4a5-60b3751f586c` +- CLI tool call: `Glob` +- UI Response Blocks now show: + - assistant output + - `Tool Call`: `Glob {"glob_pattern":"**/*","target_directory":"/tmp/rootcell-cursor-ui-tools"}` + - `Tool Result`: `Result of search in '/tmp/rootcell-cursor-ui-tools' (total 1 file): - sample.txt` + +Post-provision verification at 2026-05-26 23:15 EDT: + +- Spy call: `call-cursor-a22bbe0b-38d0-4197-b8f4-9fe6d1f9960c` +- Prompt marker: `CURSOR_UI_VERIFY_TOOL_2315` +- CLI tool call: `Glob` +- UI Response Blocks show the assistant output, `Tool Call`, and `Tool Result`. +- UI Stream Events show role labels including `assistant` and `tool`. + +### BUG-004 - Cursor stream event labels are too generic + +Status: fixed in working tree and verified live at 2026-05-26 22:53 EDT. + +Before the fix, Cursor stream events were labeled as generic `line#...` entries even when the payload contained a role. The UI now shows role-derived labels for the verified call: `system`, `user`, `assistant`, and `tool`. + +### BUG-005 - Operation filter omits Cursor operations + +Status: fixed in working tree and verified live at 2026-05-26 22:59 EDT. + +The provider filter includes Cursor, but the operation filter was still Bedrock-only and did not list `Run`, `RunSSE`, or `StreamUnifiedChat`. This made it impossible to filter live Cursor captures by operation from the UI even though the API accepts operation filters. + +Follow-up verification: + +- The operation filter now lists `Cursor Run`, `Cursor Run SSE`, and `Cursor Unified Chat`. +- Selecting `Cursor Run SSE` shows the live Cursor `RunSSE` calls. + +### BUG-006 - Cursor token-count requests leak Bedrock CountTokens errors + +Status: fixed in working tree and verified live at 2026-05-26 23:10 EDT. + +Clicking/requesting token counts for a Cursor call returned an unavailable record with a Bedrock error: `The provided model identifier is invalid`. Cursor provider token counts are not available from the captured HTTP stream, so the service should not call Bedrock CountTokens for Cursor calls. + +Verified behavior after the fix: Cursor request/block token-count requests and call-detail token records return `unavailable` with a Cursor-specific explanation: provider token counting is currently available only for Bedrock captures; Cursor request/block token recounting is not implemented. Provider usage from Cursor's final wire envelope is captured separately when present. + +Follow-up verification: + +- Spy call: `call-cursor-47ef4eac-dd20-4cfe-8ee5-3559367364a7` +- Prompt marker: `CURSOR_UI_VERIFY_TOKEN_2310` +- CLI model: `Composer 2.5 Fast` +- CLI usage: `inputTokens=3708`, `outputTokens=58`, `cacheReadTokens=7072`, `cacheWriteTokens=0` +- UI Request tokens now show `-` with the Cursor-specific unavailable reason, not a Bedrock model error. + +### BUG-007 - Cursor token-count unavailable events can race the detail load + +Status: fixed in working tree and verified live at 2026-05-26 23:10 EDT. + +Cursor token counting is unavailable immediately because spy should not call Bedrock for Cursor. When that unavailable result was emitted only as a background SSE event, the event could arrive before the React call-detail state was installed. The visible effect was an endless `pending` request token state even though `/api/token-count` returned the correct Cursor-specific unavailable record. + +Fix: for non-Bedrock calls, the service now returns synthetic unavailable token records directly in the call detail response and skips the background provider-token-count path. + +Follow-up verification: + +- Fresh post-provision call: `call-cursor-47ef4eac-dd20-4cfe-8ee5-3559367364a7` +- UI composition panel shows the Cursor-specific unavailable message for request tokens immediately after loading the call. +- Provider and operation filters still show the call when selecting `Cursor` and `Cursor Run SSE`. + +### BUG-008 - Resumed Cursor sessions do not expose full request context in captured HTTP + +Status: open; likely Cursor server-side/session-cache behavior rather than a parser failure. + +Fresh initial Cursor calls expose rich request context in the captured response stream, including the Composer system prompt, harness context, and current user input. A resumed Cursor session did not expose the same context over HTTP; spy captured only the request envelope and the resumed current user input. + +Verification: + +- Resumed Cursor session: `56d03290-acb4-4ff4-84a5-3c237bd05c85` +- Spy call: `call-cursor-638ed1eb-fe15-4bad-9589-f0d6044d6bb3` +- Prompt marker: `CURSOR_UI_VERIFY_RESUME_2313` +- CLI usage: `inputTokens=61`, `outputTokens=56`, `cacheReadTokens=10816`, `cacheWriteTokens=0` +- UI Request Blocks: 2 blocks, request envelope plus current user input. +- UI did not show the prior prompt marker `CURSOR_UI_VERIFY_TOKEN_2310` in the resumed call detail. + +Impact: for the demo, an initial Cursor call can show the system prompt and harness context, but resumed Cursor turns may need to be explained as relying on Cursor-side session/cache state that spy cannot currently reconstruct from the captured HTTP payload alone. + +### BUG-009 - Absent request sections displayed token counts as pending + +Status: fixed in working tree and verified live at 2026-05-26 23:20 EDT. + +While testing Cursor calls with unavailable token counts, the composition table still showed `pending` in the token column for absent sections such as prior conversation history and tool definitions. Those sections have no text to count, so `pending` implied work was still in progress when no count should exist. + +Fix: absent composition sections now display `-` with a `section absent` title. Present sections still display counted, unavailable, or pending token state as appropriate. + +Follow-up verification: + +- Spy call: `call-cursor-a22bbe0b-38d0-4197-b8f4-9fe6d1f9960c` +- UI composition panel shows `-` for absent sections and the Cursor-specific unavailable reason for present request-token records. diff --git a/extensions/cursor-cli/home-manager.nix b/extensions/cursor-cli/home-manager.nix new file mode 100644 index 0000000..f8ac936 --- /dev/null +++ b/extensions/cursor-cli/home-manager.nix @@ -0,0 +1,19 @@ +{ lib, pkgs, ... }: + +{ + home.packages = [ + pkgs.cursor-cli + ]; + + home.activation.cursorCliHttp1ForAgent = lib.hm.dag.entryAfter [ "writeBoundary" ] '' + config="$HOME/.cursor/cli-config.json" + tmp="$config.tmp" + mkdir -p "$HOME/.cursor" + if [ -s "$config" ] && ${pkgs.jq}/bin/jq '.network = (.network // {}) | .network.useHttp1ForAgent = true' "$config" > "$tmp"; then + mv "$tmp" "$config" + else + rm -f "$tmp" + printf '%s\n' '{"network":{"useHttp1ForAgent":true}}' > "$config" + fi + ''; +} diff --git a/firewall-vm.nix b/firewall-vm.nix index 3abeefe..89f1dd0 100644 --- a/firewall-vm.nix +++ b/firewall-vm.nix @@ -294,6 +294,7 @@ in "--listen-port 8080" "--set termlog_verbosity=warn" "--set flow_detail=0" + "--set store_streamed_bodies=true" # Defer opening the upstream TCP connection until after our addon # runs. Originally needed for the SNI deny path's address rewrite # to take effect (the default "eager" strategy opens the upstream @@ -334,6 +335,7 @@ in "--listen-port 8081" "--set termlog_verbosity=warn" "--set flow_detail=0" + "--set store_streamed_bodies=true" "--set connection_strategy=lazy" "--set confdir=%t/mitmproxy-transparent" "-s /etc/agent-vm/mitmproxy_addon.py" diff --git a/flake.nix b/flake.nix index e9c52ba..ec56742 100644 --- a/flake.nix +++ b/flake.nix @@ -26,7 +26,10 @@ pkgs = import nixpkgs { inherit system; - config.allowUnfreePredicate = pkg: nixpkgs.lib.getName pkg == "claude-code"; + config.allowUnfreePredicate = pkg: builtins.elem (nixpkgs.lib.getName pkg) [ + "claude-code" + "cursor-cli" + ]; }; mkVM = module: nixpkgs.lib.nixosSystem { diff --git a/proxy/agent_spy.py b/proxy/agent_spy.py index aa0f1ef..ba44ab7 100644 --- a/proxy/agent_spy.py +++ b/proxy/agent_spy.py @@ -1,8 +1,8 @@ #!/usr/bin/env python3 -"""Bedrock traffic capture shim for the firewall VM. +"""LLM provider traffic capture shim for the firewall VM. This module is intentionally stdlib-only. mitmproxy imports the capture helpers -from its own Python environment, provider-gates Bedrock Runtime traffic, and +from its own Python environment, provider-gates LLM runtime traffic, and writes bounded sanitized spool events for the TypeScript spy service. """ @@ -38,6 +38,35 @@ "converse-stream", } +CURSOR_API_HOSTS = { + "api.cursor.com", + "api.cursor.sh", + "api2.cursor.sh", + "agentn.global.api5.cursor.sh", +} + +CURSOR_CAPTURE_PATH_RE = re.compile( + r"/(?:aiserver|agent|chat|composer|conversation|completion|generate|stream|v\d+/)", + re.IGNORECASE, +) + +CURSOR_SKIP_PATH_RE = re.compile( + r"/(?:auth|login|logout|telemetry|analytics|metrics|update|download|extension|settings)(?:/|$)", + re.IGNORECASE, +) + +CURSOR_SKIP_OPERATION_RE = re.compile( + r"(?:AnalyticsService|DashboardService|ServerConfigService|GetUsableModels|AvailableModels|" + r"GetDefaultModelForCli|GetCliDownloadUrl|SubmitLogs|TrackEvents|BootstrapStatsig|Statsig|traces|" + r"BidiService|BidiAppend)", + re.IGNORECASE, +) + +CURSOR_CAPTURE_OPERATION_RE = re.compile( + r"^(?:Run|RunSSE|StreamUnifiedChat)$", + re.IGNORECASE, +) + SECRET_HEADER_NAMES = { "authorization", "proxy-authorization", @@ -46,6 +75,7 @@ "x-amz-signature", "x-api-key", "api-key", + "cursor-api-key", } PRESIGNED_QUERY_KEYS = { @@ -195,6 +225,128 @@ def detect_bedrock_request(host: str | None, path: str, headers: Any = None) -> } +def is_cursor_api_host(host: str | None) -> bool: + if not host: + return False + normalized = host.split(":", 1)[0].strip(".").lower() + return normalized in CURSOR_API_HOSTS or fnmatch.fnmatchcase(normalized, "*.cursor.sh") + + +def detect_cursor_request( + host: str | None, + path: str, + method: str | None = None, + body: bytes | None = None, +) -> dict[str, str] | None: + """Detect Cursor Agent API calls while excluding auth/update/download noise.""" + + if not is_cursor_api_host(host): + return None + + if method is not None and method.upper() not in {"POST", "PUT", "PATCH"}: + return None + + url_path = urllib.parse.urlsplit(path).path + if CURSOR_SKIP_PATH_RE.search(url_path): + return None + + operation = _cursor_operation_from_path(url_path) + if CURSOR_SKIP_OPERATION_RE.search(url_path) or CURSOR_SKIP_OPERATION_RE.search(operation): + return None + + if CURSOR_CAPTURE_OPERATION_RE.match(operation) is None: + return None + + normalized_host = host.split(":", 1)[0].strip(".").lower() if host else "" + if not normalized_host.endswith(".cursor.sh") and CURSOR_CAPTURE_PATH_RE.search(url_path) is None: + return None + + return { + "provider": "cursor", + "model_id": _cursor_model_id_from_body(body) or "cursor", + "operation": operation, + "streaming": "unknown", + } + + +def detect_provider_request( + host: str | None, + path: str, + headers: Any = None, + method: str | None = None, + body: bytes | None = None, +) -> dict[str, str] | None: + return detect_bedrock_request(host, path, headers) or detect_cursor_request(host, path, method, body) + + +def _cursor_operation_from_path(path: str) -> str: + parts = [urllib.parse.unquote(part) for part in path.split("/") if part] + if not parts: + return "agent" + for part in reversed(parts): + cleaned = re.sub(r"[^A-Za-z0-9_.-]+", "-", part).strip("-") + if cleaned: + return cleaned[:80] + return "agent" + + +def _cursor_model_id_from_body(body: bytes | None) -> str | None: + if not body: + return None + text = _decode_utf8(body) + if text is None: + return _cursor_model_id_from_binary(body) + try: + value = json.loads(text) + except Exception: + return _cursor_model_id_from_text(text) + return _first_deep_string(value, { + "model", + "model_id", + "modelname", + "modeldisplayname", + "selectedmodel", + }) or _cursor_model_id_from_text(text) + + +def _cursor_model_id_from_binary(body: bytes) -> str | None: + try: + text = body.decode("latin1", errors="ignore") + except Exception: + return None + return _cursor_model_id_from_text(text) + + +def _cursor_model_id_from_text(text: str) -> str | None: + match = re.search(r"\bcomposer-2\.5(?:-fast)?\b", text, re.IGNORECASE) + if match: + return match.group(0) + match = re.search(r"Composer\s+2\.5(?:\s+Fast)?", text, re.IGNORECASE) + if match: + return match.group(0) + match = re.search(r"(?:model|modelName|selectedModel)[\"'\s:=]+([A-Za-z0-9_. -]{2,80})", text) + if match: + return match.group(1).strip() + return None + + +def _first_deep_string(value: Any, keys: set[str]) -> str | None: + if isinstance(value, dict): + for key, child in value.items(): + if str(key).lower() in keys and isinstance(child, str) and child: + return child + for child in value.values(): + found = _first_deep_string(child, keys) + if found is not None: + return found + elif isinstance(value, list): + for child in value: + found = _first_deep_string(child, keys) + if found is not None: + return found + return None + + def _decode_utf8(data: bytes) -> str | None: try: return data.decode("utf-8") @@ -209,7 +361,7 @@ def _event_base(flow: Any, direction: str, info: dict[str, str]) -> dict[str, An "ts": time.time(), "direction": direction, "flow_id": str(getattr(flow, "id", "")), - "provider": "bedrock", + "provider": info["provider"], "operation": info["operation"], "model_id": info["model_id"], "host": str(getattr(request, "pretty_host", None) or getattr(request, "host", "")), @@ -235,6 +387,19 @@ def _response_body_bytes(flow: Any) -> bytes: return body +def _next_stream_chunk_index(flow: Any) -> int: + metadata = getattr(flow, "metadata", None) + if not isinstance(metadata, dict): + return 0 + current = metadata.get("agent_spy_stream_chunk_index") + try: + index = int(current) + except (TypeError, ValueError): + index = 0 + metadata["agent_spy_stream_chunk_index"] = index + 1 + return index + + def load_spy_config(path: str | None = None) -> dict[str, str]: config_path = path or SPY_ENV try: @@ -389,19 +554,21 @@ def _write_spool_event(event: dict[str, Any], config: dict[str, Any], write_drop return _atomic_write_spool_file(spool_dir, _spool_file_name(event), payload) -def _bedrock_info_for_flow(flow: Any) -> dict[str, str] | None: +def _provider_info_for_flow(flow: Any) -> dict[str, str] | None: metadata = getattr(flow, "metadata", None) info = metadata.get("agent_spy") if isinstance(metadata, dict) else None - if isinstance(info, dict) and info.get("provider") == "bedrock": + if isinstance(info, dict) and info.get("provider") in {"bedrock", "cursor"}: return {str(key): str(value) for key, value in info.items()} request = getattr(flow, "request", None) if request is None: return None - return detect_bedrock_request( + return detect_provider_request( getattr(request, "pretty_host", None) or getattr(request, "host", None), str(getattr(request, "path", "")), getattr(request, "headers", None), + getattr(request, "method", None), + _request_body_bytes(flow), ) @@ -439,10 +606,10 @@ def _write_shim_error(flow: Any, message: str) -> None: flow_id = _flow_id(flow) if flow_id is not None: event["flow_id"] = flow_id - info = _bedrock_info_for_flow(flow) + info = _provider_info_for_flow(flow) if info is None: return - event["provider"] = "bedrock" + event["provider"] = info["provider"] _write_spool_event(event, config) except Exception: # The spy tap must never interfere with user traffic. @@ -450,17 +617,20 @@ def _write_shim_error(flow: Any, message: str) -> None: def capture_request(flow: Any) -> None: - """mitmproxy hook helper. Capture a validated Bedrock request if enabled.""" + """mitmproxy hook helper. Capture a validated provider request if enabled.""" try: config = _capture_config() if config is None: return request = flow.request - info = detect_bedrock_request( + body = _request_body_bytes(flow) + info = detect_provider_request( getattr(request, "pretty_host", None) or getattr(request, "host", None), str(getattr(request, "path", "")), getattr(request, "headers", None), + getattr(request, "method", None), + body, ) if not info: return @@ -469,7 +639,6 @@ def capture_request(flow: Any) -> None: if isinstance(metadata, dict): metadata["agent_spy"] = info - body = _request_body_bytes(flow) event = _event_base(flow, "request", info) event["headers"] = redact_headers(getattr(request, "headers", None)) _attach_body(event, body) @@ -479,13 +648,13 @@ def capture_request(flow: Any) -> None: def capture_response(flow: Any) -> None: - """mitmproxy hook helper. Capture a Bedrock response if its request matched.""" + """mitmproxy hook helper. Capture a provider response if its request matched.""" try: config = _capture_config() if config is None: return - info = _bedrock_info_for_flow(flow) + info = _provider_info_for_flow(flow) if not info or getattr(flow, "response", None) is None: return @@ -498,7 +667,7 @@ def capture_response(flow: Any) -> None: body = _response_body_bytes(flow) content_type = _header_value(getattr(response, "headers", None), "content-type") - if _is_eventstream_content_type(content_type): + if info["provider"] == "bedrock" and _is_eventstream_content_type(content_type): _attach_body(event, body, force_encoding="aws-eventstream") else: _attach_body(event, body) @@ -507,14 +676,58 @@ def capture_response(flow: Any) -> None: _write_shim_error(flow, str(exc)) +def capture_stream_chunk(flow: Any, chunk: bytes) -> None: + """Capture one provider response body chunk while returning traffic unchanged.""" + + if not chunk: + return + try: + config = _capture_config() + if config is None: + return + info = _provider_info_for_flow(flow) + if info is None: + return + response = getattr(flow, "response", None) + if response is None: + return + event = _event_base(flow, "stream-chunk", info) + event["headers"] = redact_headers(getattr(response, "headers", None)) + event["chunk_index"] = _next_stream_chunk_index(flow) + event["body_b64"] = base64.b64encode(chunk).decode("ascii") + event["body_sha256"] = _sha256_bytes(chunk) + _write_spool_event(event, config) + except Exception as exc: # pragma: no cover - defensive for live traffic. + _write_shim_error(flow, str(exc)) + + +def prepare_response_stream(flow: Any) -> None: + """Enable pass-through streaming for provider responses that are long-lived.""" + + try: + info = _provider_info_for_flow(flow) + response = getattr(flow, "response", None) + if info is None or response is None: + return + if info.get("provider") != "cursor": + return + def tee_cursor_chunk(chunk: bytes) -> bytes: + capture_stream_chunk(flow, chunk) + return chunk + + setattr(response, "stream", tee_cursor_chunk) + except Exception: + return + + def capture_error(flow: Any) -> None: - """mitmproxy hook helper. Capture Bedrock flow errors if spy is enabled.""" + """mitmproxy hook helper. Capture provider flow errors if spy is enabled.""" try: config = _capture_config() if config is None: return - info = _bedrock_info_for_flow(flow) + info = _provider_info_for_flow(flow) if info is None: return flow_error = getattr(flow, "error", None) @@ -523,7 +736,7 @@ def capture_error(flow: Any) -> None: "version": 1, "ts": time.time(), "direction": "error", - "provider": "bedrock", + "provider": info["provider"], "error": str(message), } flow_id = _flow_id(flow) diff --git a/proxy/allowed-dns.txt.defaults b/proxy/allowed-dns.txt.defaults index 8be4bf4..0e2b16e 100644 --- a/proxy/allowed-dns.txt.defaults +++ b/proxy/allowed-dns.txt.defaults @@ -9,6 +9,8 @@ amazonaws.com googleapis.com openai.com chatgpt.com +cursor.com +cursor.sh github.com githubusercontent.com gitlab.com diff --git a/proxy/allowed-https.txt.defaults b/proxy/allowed-https.txt.defaults index ef896cc..b485ee9 100644 --- a/proxy/allowed-https.txt.defaults +++ b/proxy/allowed-https.txt.defaults @@ -19,6 +19,16 @@ bedrock-runtime.*.amazonaws.com # Claude Code binary source used by pinned nixpkgs if the Nix cache misses. storage.googleapis.com ^(GET|HEAD) /claude-code-dist-86c565f3-f756-42ad-8dfa-d59b1c096819/claude-code-releases/2\.1\.141/linux-arm64/claude(?:\?.*)?$ +# Cursor Agent CLI binary source used by pinned nixpkgs if the Nix cache misses. +downloads.cursor.com ^(GET|HEAD) /lab/2026\.05\.07-42ddaca/linux/arm64/agent-cli-package\.tar\.gz(?:\?.*)?$ +# Cursor Agent CLI runtime self-update source. +downloads.cursor.com ^(GET|HEAD) /lab/2026\.05\.24-dda726e/linux/arm64/agent-cli-package\.tar\.gz(?:\?.*)?$ + +# Cursor Agent CLI API. +api.cursor.com +api.cursor.sh +*.cursor.sh + # OpenAI Codex / ChatGPT Plus-Pro subscription auth auth.openai.com chatgpt.com diff --git a/proxy/mitmproxy_addon.py b/proxy/mitmproxy_addon.py index 43fd560..4f4abfb 100644 --- a/proxy/mitmproxy_addon.py +++ b/proxy/mitmproxy_addon.py @@ -333,6 +333,11 @@ def request(flow: http.HTTPFlow) -> None: agent_spy.capture_request(flow) +def responseheaders(flow: http.HTTPFlow) -> None: + if agent_spy is not None: + agent_spy.prepare_response_stream(flow) + + def response(flow: http.HTTPFlow) -> None: if agent_spy is not None: agent_spy.capture_response(flow) diff --git a/proxy/test_agent_spy.py b/proxy/test_agent_spy.py index 907d720..bc95304 100644 --- a/proxy/test_agent_spy.py +++ b/proxy/test_agent_spy.py @@ -97,6 +97,77 @@ def test_detects_bedrock_runtime_paths(self): ) ) + def test_detects_cursor_agent_api_paths(self): + info = agent_spy.detect_cursor_request( + "api2.cursor.sh", + "/aiserver.v1.AiService/StreamUnifiedChat", + "POST", + b'{"model":"Composer 2.5","prompt":"hello"}', + ) + self.assertIsNotNone(info) + self.assertEqual(info["provider"], "cursor") + self.assertEqual(info["operation"], "StreamUnifiedChat") + self.assertEqual(info["model_id"], "Composer 2.5") + + run_info = agent_spy.detect_cursor_request( + "api2.cursor.sh", + "/agent.v1.AgentService/RunSSE", + "POST", + b'{"model":"composer-2.5-fast","prompt":"hello"}', + ) + self.assertIsNotNone(run_info) + self.assertEqual(run_info["provider"], "cursor") + self.assertEqual(run_info["operation"], "RunSSE") + self.assertEqual(run_info["model_id"], "composer-2.5-fast") + + self.assertIsNone( + agent_spy.detect_cursor_request( + "downloads.cursor.com", + "/lab/2026.05.07-42ddaca/linux/arm64/agent-cli-package.tar.gz", + "GET", + ) + ) + self.assertIsNone( + agent_spy.detect_cursor_request( + "api.cursor.com", + "/auth/login", + "POST", + ) + ) + self.assertIsNone( + agent_spy.detect_cursor_request( + "agentn.global.api5.cursor.sh", + "/aiserver.v1.AnalyticsService/BootstrapStatsig", + "POST", + ) + ) + self.assertIsNone( + agent_spy.detect_cursor_request( + "api2.cursor.sh", + "/aiserver.v1.BidiService/BidiAppend", + "POST", + ) + ) + self.assertIsNone( + agent_spy.detect_cursor_request( + "api2.cursor.sh", + "/repository.v1.RepositoryService/FastRepoInitHandshakeV2", + "POST", + ) + ) + + def test_detects_wildcard_cursor_agent_hosts(self): + info = agent_spy.detect_cursor_request( + "agentn.global.api5.cursor.sh", + "/aiserver.v1.AiService/StreamUnifiedChat", + "POST", + b'{"model":"composer-2.5-fast","prompt":"hello"}', + ) + self.assertIsNotNone(info) + self.assertEqual(info["provider"], "cursor") + self.assertEqual(info["operation"], "StreamUnifiedChat") + self.assertEqual(info["model_id"], "composer-2.5-fast") + def test_redacts_auth_headers(self): headers = agent_spy.redact_headers( [ @@ -201,6 +272,72 @@ def test_non_bedrock_request_writes_nothing(self): agent_spy.capture_request(make_flow(host="api.anthropic.com")) self.assertEqual(self.read_events(), []) + def test_cursor_request_spool_event_shape_and_redaction(self): + self.write_config(enabled=True) + flow = make_flow( + host="api2.cursor.sh", + path="/aiserver.v1.AiService/StreamUnifiedChat?signature=secret&ok=1", + request_headers=[ + ("Content-Type", "application/json"), + ("Authorization", "Bearer cursor-secret"), + ("X-Cursor-Client-Version", "fixture"), + ], + request_body=b'{"model":"Composer 2.5","prompt":"RCSPY-CURSOR-ALPHA"}', + ) + + agent_spy.capture_request(flow) + + events = self.read_events() + self.assertEqual(len(events), 1) + event = events[0] + self.assertEqual(event["provider"], "cursor") + self.assertEqual(event["operation"], "StreamUnifiedChat") + self.assertEqual(event["model_id"], "Composer 2.5") + self.assertIn("ok=1", event["path"]) + self.assertNotIn("secret", event["path"]) + self.assertEqual( + [pair for pair in event["headers"] if pair[0].lower() == "authorization"], + [["Authorization", "[redacted]"]], + ) + self.assertEqual(json.loads(event["body_text"])["prompt"], "RCSPY-CURSOR-ALPHA") + self.assertEqual(flow.metadata["agent_spy"]["provider"], "cursor") + + def test_cursor_response_streaming_is_enabled_for_matched_flows(self): + flow = make_flow( + host="agentn.global.api5.cursor.sh", + path="/agent.v1.AgentService/Run", + request_headers=[("Content-Type", "application/connect+proto")], + request_body=b"\x00composer-2.5-fast\x00RCSPY-CURSOR-ALPHA", + response_headers=[("Content-Type", "application/connect+proto")], + ) + + agent_spy.prepare_response_stream(flow) + + self.assertTrue(callable(flow.response.stream)) + + def test_cursor_response_stream_callback_spools_chunks_unchanged(self): + self.write_config(enabled=True) + flow = make_flow( + host="agentn.global.api5.cursor.sh", + path="/agent.v1.AgentService/RunSSE", + request_headers=[("Content-Type", "application/connect+proto")], + request_body=b"\x00composer-2.5-fast\x00RCSPY-CURSOR-ALPHA", + response_headers=[("Content-Type", "application/connect+proto")], + ) + + agent_spy.capture_request(flow) + agent_spy.prepare_response_stream(flow) + returned = flow.response.stream(b"\x00\x00\x00\x00\x05hello") + + self.assertEqual(returned, b"\x00\x00\x00\x00\x05hello") + events = self.read_events() + chunk = [event for event in events if event["direction"] == "stream-chunk"][0] + self.assertEqual(chunk["provider"], "cursor") + self.assertEqual(chunk["operation"], "RunSSE") + self.assertEqual(chunk["chunk_index"], 0) + self.assertEqual(base64.b64decode(chunk["body_b64"]), b"\x00\x00\x00\x00\x05hello") + self.assertEqual(chunk["body_sha256"], agent_spy._sha256_bytes(b"\x00\x00\x00\x00\x05hello")) + def test_response_eventstream_is_spooled_as_b64(self): self.write_config(enabled=True) stream = eventstream_message( diff --git a/proxy/test_mitmproxy_addon.py b/proxy/test_mitmproxy_addon.py index b124104..7bb2eec 100644 --- a/proxy/test_mitmproxy_addon.py +++ b/proxy/test_mitmproxy_addon.py @@ -27,11 +27,13 @@ def setUp(self): mitmproxy_addon.logger.disabled = True self._allow_https = mitmproxy_addon.ALLOW_HTTPS self._https_cache = mitmproxy_addon._https_cache + self._agent_spy = mitmproxy_addon.agent_spy mitmproxy_addon._https_cache = mitmproxy_addon._HttpsPolicyCache() def tearDown(self): mitmproxy_addon.ALLOW_HTTPS = self._allow_https mitmproxy_addon._https_cache = self._https_cache + mitmproxy_addon.agent_spy = self._agent_spy mitmproxy_addon.ctx.options.connection_strategy = "lazy" mitmproxy_addon.logger.disabled = False @@ -141,6 +143,21 @@ def test_request_hook_allows_scoped_host_when_regex_matches(self): finally: os.unlink(path) + def test_responseheaders_delegates_stream_decision_to_spy(self): + calls = [] + + class FakeSpy: + @staticmethod + def prepare_response_stream(flow): + calls.append(flow) + + flow = _flow("api2.cursor.sh", "api2.cursor.sh", "POST", "/aiserver.v1.AiService/StreamUnifiedChat") + mitmproxy_addon.agent_spy = FakeSpy + + mitmproxy_addon.responseheaders(flow) + + self.assertEqual(calls, [flow]) + def test_load_accepts_lazy_connection_strategy(self): mitmproxy_addon.ctx.options.connection_strategy = "lazy" mitmproxy_addon.load(None) diff --git a/src/rootcell/extensions/pi-plannotator.test.ts b/src/rootcell/extensions/pi-plannotator.test.ts index 8e22a4d..e79c7be 100644 --- a/src/rootcell/extensions/pi-plannotator.test.ts +++ b/src/rootcell/extensions/pi-plannotator.test.ts @@ -18,7 +18,7 @@ describe("pi-plannotator extension host command", () => { const logs: string[] = []; let contexts = 0; mkdirSync(instanceDir, { recursive: true }); - writeFileSync(join(instanceDir, "extensions.txt"), "claude-code=false\npi-plannotator=false\npi-subagents=false\n", "utf8"); + writeFileSync(join(instanceDir, "extensions.txt"), "claude-code=false\ncursor-cli=false\npi-plannotator=false\npi-subagents=false\n", "utf8"); const status = await runExtensionCommand({ repoDir: repo, @@ -47,7 +47,7 @@ describe("pi-plannotator extension host command", () => { const instanceDir = join(stateDir, "dev"); const env = { ...process.env, ROOTCELL_STATE_DIR: stateDir }; mkdirSync(instanceDir, { recursive: true }); - writeFileSync(join(instanceDir, "extensions.txt"), "claude-code=false\npi-plannotator=true\npi-subagents=false\n", "utf8"); + writeFileSync(join(instanceDir, "extensions.txt"), "claude-code=false\ncursor-cli=false\npi-plannotator=true\npi-subagents=false\n", "utf8"); expect(completeExtensionCommand({ repoDir: repo, @@ -71,7 +71,7 @@ describe("pi-plannotator extension host command", () => { current: "", })).toEqual([]); - writeFileSync(join(instanceDir, "extensions.txt"), "claude-code=false\npi-plannotator=false\npi-subagents=false\n", "utf8"); + writeFileSync(join(instanceDir, "extensions.txt"), "claude-code=false\ncursor-cli=false\npi-plannotator=false\npi-subagents=false\n", "utf8"); expect(completeExtensionCommand({ repoDir: repo, env, @@ -210,7 +210,7 @@ function testContext(input: { return { repoDir: "/repo", instanceName: "dev", - extensionConfig: parseExtensionsConfig("claude-code=false\npi-plannotator=true\npi-subagents=false\n"), + extensionConfig: parseExtensionsConfig("claude-code=false\ncursor-cli=false\npi-plannotator=true\npi-subagents=false\n"), config: {} as RootcellConfig, log: (message) => input.logs?.push(message), vmStatus: input.vmStatus ?? (() => Promise.resolve({ state: "running" })), diff --git a/src/rootcell/extensions/registry.ts b/src/rootcell/extensions/registry.ts index 21014ed..baad3ec 100644 --- a/src/rootcell/extensions/registry.ts +++ b/src/rootcell/extensions/registry.ts @@ -5,7 +5,7 @@ import type { RootcellConfig } from "../types.ts"; import type { ParsedExtensionsConfig } from "./config.ts"; import { PLANNOTATOR_TUNNEL_COMMAND } from "./pi-plannotator.ts"; -export const RootcellExtensionIdSchema = z.enum(["claude-code", "pi-plannotator", "pi-subagents"]); +export const RootcellExtensionIdSchema = z.enum(["claude-code", "cursor-cli", "pi-plannotator", "pi-subagents"]); export type RootcellExtensionId = z.infer; @@ -117,6 +117,17 @@ export const ROOTCELL_EXTENSIONS: readonly RootcellExtensionDefinition[] = parse }, hostCommands: NO_HOST_COMMANDS, }, + { + id: "cursor-cli", + description: "Cursor Agent CLI configured for headless coding-agent runs", + requiresProvision: true, + guestHooks: { + agentNixos: [], + firewallNixos: [], + homeManager: ["extensions/cursor-cli/home-manager.nix"], + }, + hostCommands: NO_HOST_COMMANDS, + }, { id: "pi-plannotator", description: "Pi Plannotator integration package and remote-session configuration", diff --git a/src/rootcell/rootcell.test.ts b/src/rootcell/rootcell.test.ts index 6373c14..ee83c3d 100644 --- a/src/rootcell/rootcell.test.ts +++ b/src/rootcell/rootcell.test.ts @@ -146,6 +146,15 @@ describe("rootcell extension registry", () => { expect(claudeCode?.guestHooks.firewallNixos).toEqual(expect.schemaMatching(EmptyStringArraySchema)); expect(claudeCode?.hostCommands).toEqual(expect.schemaMatching(EmptyStringArraySchema)); }); + + test("registers Cursor CLI package install hook", () => { + const cursor = ROOTCELL_EXTENSIONS.find((extension) => extension.id === "cursor-cli"); + + expect(cursor?.guestHooks.homeManager).toEqual(["extensions/cursor-cli/home-manager.nix"]); + expect(cursor?.guestHooks.agentNixos).toEqual(expect.schemaMatching(EmptyStringArraySchema)); + expect(cursor?.guestHooks.firewallNixos).toEqual(expect.schemaMatching(EmptyStringArraySchema)); + expect(cursor?.hostCommands).toEqual(expect.schemaMatching(EmptyStringArraySchema)); + }); }); describe("rootcell argument parsing", () => { @@ -605,6 +614,7 @@ describe("rootcell extension config", () => { "", "pi-subagents=true", "claude-code=false", + "cursor-cli=false", "pi-plannotator=false", "", ].join("\n")); @@ -616,16 +626,17 @@ describe("rootcell extension config", () => { const path = join(repo, "extensions.txt"); const seeded = ensureExtensionsConfig(path); expect(formatExtensionsList(seeded)).toContain("claude-code disabled"); + expect(formatExtensionsList(seeded)).toContain("cursor-cli disabled"); expect(formatExtensionsList(seeded)).toContain("pi-plannotator disabled"); - expect(readFileSync(path, "utf8")).toBe("claude-code=false\npi-plannotator=false\npi-subagents=false\n"); + expect(readFileSync(path, "utf8")).toBe("claude-code=false\ncursor-cli=false\npi-plannotator=false\npi-subagents=false\n"); const enabled = setExtensionEnabled(path, "pi-subagents", true); expect(enabled.changed).toBe(true); - expect(readFileSync(path, "utf8")).toBe("claude-code=false\npi-plannotator=false\npi-subagents=true\n"); + expect(readFileSync(path, "utf8")).toBe("claude-code=false\ncursor-cli=false\npi-plannotator=false\npi-subagents=true\n"); const enabledAgain = setExtensionEnabled(path, "pi-subagents", true); expect(enabledAgain.changed).toBe(false); - expect(readFileSync(path, "utf8")).toBe("claude-code=false\npi-plannotator=false\npi-subagents=true\n"); + expect(readFileSync(path, "utf8")).toBe("claude-code=false\ncursor-cli=false\npi-plannotator=false\npi-subagents=true\n"); } finally { rmSync(repo, { recursive: true, force: true }); } @@ -633,7 +644,7 @@ describe("rootcell extension config", () => { test("migrates legacy Pi extension ids in extensions.txt", () => { const rendered = renderExtensionsConfig(parseExtensionsConfig("plannotator=true\nsubagent=false\n")); - expect(rendered).toBe("pi-plannotator=true\npi-subagents=false\nclaude-code=false\n"); + expect(rendered).toBe("pi-plannotator=true\npi-subagents=false\nclaude-code=false\ncursor-cli=false\n"); expect(() => parseExtensionsConfig("plannotator=true\npi-plannotator=false\n")).toThrow("duplicate extension key"); }); }); @@ -652,8 +663,9 @@ describe("rootcell extension Nix hooks", () => { }); test("renders enabled Home Manager extension imports", () => { - const rendered = renderExtensionNixAggregator(parseExtensionsConfig("claude-code=true\npi-subagents=true\npi-plannotator=true\n"), "homeManager"); + const rendered = renderExtensionNixAggregator(parseExtensionsConfig("claude-code=true\ncursor-cli=true\npi-subagents=true\npi-plannotator=true\n"), "homeManager"); expect(rendered).toContain("../extensions/claude-code/home-manager.nix"); + expect(rendered).toContain("../extensions/cursor-cli/home-manager.nix"); expect(rendered).toContain("../extensions/pi-subagents/home-manager.nix"); expect(rendered).toContain("../extensions/pi-plannotator/home-manager.nix"); expect(renderExtensionNixAggregator(parseExtensionsConfig("pi-subagents=true\n"), "agentNixos")).not.toContain("../extensions/pi-subagents"); @@ -2415,7 +2427,7 @@ describe("rootcell edit command", () => { expect(status).toBe(0); expect(readFileSync(record, "utf8").trim()).toBe(join(repo, ".state", "dev", "extensions.txt")); - expect(readFileSync(join(repo, ".state", "dev", "extensions.txt"), "utf8")).toBe("claude-code=false\npi-plannotator=false\npi-subagents=false\n"); + expect(readFileSync(join(repo, ".state", "dev", "extensions.txt"), "utf8")).toBe("claude-code=false\ncursor-cli=false\npi-plannotator=false\npi-subagents=false\n"); } finally { restoreEnv("ROOTCELL_STATE_DIR", oldRootcellStateDir); restoreEnv("EDITOR", oldEditor); @@ -2433,19 +2445,19 @@ describe("rootcell extension command", () => { const list = runCapture("./rootcell", ["--instance", "dev", "extension", "list"], { env }); expect(list.stdout).toContain("pi-plannotator disabled"); expect(list.stdout).toContain("pi-subagents disabled"); - expect(readFileSync(join(repo, ".state", "dev", "extensions.txt"), "utf8")).toBe("claude-code=false\npi-plannotator=false\npi-subagents=false\n"); + expect(readFileSync(join(repo, ".state", "dev", "extensions.txt"), "utf8")).toBe("claude-code=false\ncursor-cli=false\npi-plannotator=false\npi-subagents=false\n"); const enable = runCapture("./rootcell", ["--instance", "dev", "extension", "enable", "pi-subagents"], { env }); expect(enable.stdout).toContain("pi-subagents enabled for instance 'dev'."); expect(enable.stdout).toContain("run ./rootcell --instance dev provision to apply VM changes."); - expect(readFileSync(join(repo, ".state", "dev", "extensions.txt"), "utf8")).toBe("claude-code=false\npi-plannotator=false\npi-subagents=true\n"); + expect(readFileSync(join(repo, ".state", "dev", "extensions.txt"), "utf8")).toBe("claude-code=false\ncursor-cli=false\npi-plannotator=false\npi-subagents=true\n"); const enableAgain = runCapture("./rootcell", ["--instance", "dev", "extension", "enable", "pi-subagents"], { env }); expect(enableAgain.stdout).toContain("pi-subagents already enabled for instance 'dev'."); const disable = runCapture("./rootcell", ["--instance", "dev", "extension", "disable", "pi-subagents"], { env }); expect(disable.stdout).toContain("pi-subagents disabled for instance 'dev'."); - expect(readFileSync(join(repo, ".state", "dev", "extensions.txt"), "utf8")).toBe("claude-code=false\npi-plannotator=false\npi-subagents=false\n"); + expect(readFileSync(join(repo, ".state", "dev", "extensions.txt"), "utf8")).toBe("claude-code=false\ncursor-cli=false\npi-plannotator=false\npi-subagents=false\n"); const invalid = runCapture("./rootcell", ["--instance", "dev", "extension", "enable", "missing"], { env, allowFailure: true }); expect(invalid.status).toBe(2); @@ -2460,7 +2472,7 @@ describe("rootcell extension command", () => { try { const env = { ...process.env, ROOTCELL_STATE_DIR: join(repo, ".state") }; mkdirSync(join(repo, ".state", "dev"), { recursive: true }); - writeFileSync(join(repo, ".state", "dev", "extensions.txt"), "claude-code=false\npi-plannotator=true\npi-subagents=false\n", "utf8"); + writeFileSync(join(repo, ".state", "dev", "extensions.txt"), "claude-code=false\ncursor-cli=false\npi-plannotator=true\npi-subagents=false\n", "utf8"); const calls: string[] = []; const extensions = testHostCommandExtensions(async (context, args) => { calls.push(`run:${context.instanceName}:${args.join(",")}:${context.extensionConfig.enabled.has("pi-plannotator") ? "enabled" : "disabled"}`); @@ -2563,7 +2575,7 @@ describe("rootcell extension command", () => { expect(contexts).toBe(0); mkdirSync(join(repo, ".state", "dev"), { recursive: true }); - writeFileSync(join(repo, ".state", "dev", "extensions.txt"), "claude-code=false\npi-plannotator=true\npi-subagents=false\n", "utf8"); + writeFileSync(join(repo, ".state", "dev", "extensions.txt"), "claude-code=false\ncursor-cli=false\npi-plannotator=true\npi-subagents=false\n", "utf8"); logs.length = 0; expect(await runExtensionCommand({ ...input, rest: ["missing", "check"] })).toBe(2); expect(logs.join("\n")).toContain("unknown extension command or id 'missing'"); @@ -2697,7 +2709,7 @@ describe("shell completions", () => { const stateDir = join(repo, ".state"); const instanceDir = join(stateDir, "dev"); mkdirSync(instanceDir, { recursive: true }); - writeFileSync(join(instanceDir, "extensions.txt"), "claude-code=false\npi-plannotator=false\npi-subagents=true\n", "utf8"); + writeFileSync(join(instanceDir, "extensions.txt"), "claude-code=false\ncursor-cli=false\npi-plannotator=false\npi-subagents=true\n", "utf8"); const env = completionEnv("/bin/bash"); env.ROOTCELL_STATE_DIR = stateDir; writeSelectedRootcellInstance(repo, "dev", env); @@ -2757,7 +2769,7 @@ describe("shell completions", () => { const env = { ...completionEnv("/bin/bash"), ROOTCELL_STATE_DIR: stateDir }; const extensions = testHostCommandExtensions(); mkdirSync(instanceDir, { recursive: true }); - writeFileSync(join(instanceDir, "extensions.txt"), "claude-code=false\npi-plannotator=true\npi-subagents=false\n", "utf8"); + writeFileSync(join(instanceDir, "extensions.txt"), "claude-code=false\ncursor-cli=false\npi-plannotator=true\npi-subagents=false\n", "utf8"); const root = completeExtensionCommand({ repoDir: repo, @@ -2790,7 +2802,7 @@ describe("shell completions", () => { }); expect(commandArgs).toEqual(["alpha"]); - writeFileSync(join(instanceDir, "extensions.txt"), "claude-code=false\npi-plannotator=false\npi-subagents=false\n", "utf8"); + writeFileSync(join(instanceDir, "extensions.txt"), "claude-code=false\ncursor-cli=false\npi-plannotator=false\npi-subagents=false\n", "utf8"); const disabledRoot = completeExtensionCommand({ repoDir: repo, env, diff --git a/src/spy/api-contracts.ts b/src/spy/api-contracts.ts index 54ee362..d49a08b 100644 --- a/src/spy/api-contracts.ts +++ b/src/spy/api-contracts.ts @@ -129,12 +129,13 @@ export const SpyTokenCountResponseSchema = z.object({ records: z.array(SpyTokenCountRecordSchema), }).strict(); -export const SpyCompactionDetectionSourceSchema = z.enum(["none", "pi_pattern", "claude_code_pattern", "heuristic", "summarization_request"]); +export const SpyCompactionDetectionSourceSchema = z.enum(["none", "pi_pattern", "claude_code_pattern", "cursor_pattern", "heuristic", "summarization_request"]); export const SpyCompactionConfidenceSchema = z.enum(["none", "low", "medium", "high"]); export const SpyCompactionReasonSchema = z.enum([ "no_previous_comparable_call", "pi_request_context_profile", "claude_code_request_context_profile", + "cursor_request_context_profile", "summarization_system_prompt", "conversation_wrapper_input", "claude_code_summary_prompt", diff --git a/src/spy/compaction.test.ts b/src/spy/compaction.test.ts index be9c74d..668fd6c 100644 --- a/src/spy/compaction.test.ts +++ b/src/spy/compaction.test.ts @@ -127,6 +127,42 @@ describe("compaction detection", () => { expect(assessment.reasons).toContain("prior_history_block_drop"); }); + test("labels Cursor compaction candidates when captured context shrinks around a summary", () => { + const previousBlocks = [ + requestBlock("previous-system", "harness-system-context", "You are Cursor Agent.", { source: "cursor-request-json", hash: "stable-system" }), + requestBlock("previous-tool", "tool-definition", "shell Run commands", { source: "cursor-request-json", hash: "stable-tool" }), + requestBlock("previous-history-1", "prior-conversation-history", "First Cursor historical turn. ".repeat(120), { role: "user", source: "cursor-request-message" }), + requestBlock("previous-history-2", "prior-conversation-history", "Second Cursor historical turn. ".repeat(120), { role: "assistant", source: "cursor-request-message" }), + requestBlock("previous-history-3", "prior-conversation-history", "Third Cursor historical turn. ".repeat(120), { role: "user", source: "cursor-request-message" }), + requestBlock("previous-history-4", "prior-conversation-history", "Fourth Cursor historical turn. ".repeat(120), { role: "assistant", source: "cursor-request-message" }), + requestBlock("previous-current", "current-user-input", "continue with the task", { role: "user", source: "cursor-request-message" }), + ]; + const currentBlocks = [ + requestBlock("current-system", "harness-system-context", "You are Cursor Agent.", { source: "cursor-request-json", hash: "stable-system" }), + requestBlock("current-tool", "tool-definition", "shell Run commands", { source: "cursor-request-json", hash: "stable-tool" }), + requestBlock("current-summary", "prior-conversation-history", "Summary of the conversation so far: the user asked for Cursor CLI spy support.", { role: "user", source: "cursor-request-message" }), + requestBlock("current-current", "current-user-input", "continue with the task", { role: "user", source: "cursor-request-message" }), + ]; + + const assessment = detectCompaction({ + summary: summary("current-cursor", currentBlocks, { requestByteSize: 5_000, inputTokens: 1_500, provider: "cursor", modelId: "Composer 2.5" }), + requestBlocks: currentBlocks, + previousSummary: summary("previous-cursor", previousBlocks, { requestByteSize: 22_000, inputTokens: 8_000, provider: "cursor", modelId: "Composer 2.5" }), + previousRequestBlocks: previousBlocks, + }); + + expect(assessment).toMatchObject({ + status: "candidate", + source: "cursor_pattern", + confidence: "high", + label: "Cursor compaction candidate", + }); + expect(assessment.reasons).toContain("cursor_request_context_profile"); + expect(assessment.reasons).toContain("stable_request_context"); + expect(assessment.reasons).toContain("summary_like_history_block"); + expect(assessment.reasons).toContain("input_token_drop"); + }); + test("labels Pi summarization requests as compaction events without a prior transition", () => { const currentBlocks = [ requestBlock( @@ -334,16 +370,21 @@ function summaryFromNormalizedCall(call: NormalizedProviderCall): SpyCallSummary function summary( id: string, blocks: readonly NormalizedBlock[], - options: { readonly requestByteSize?: number | undefined; readonly inputTokens?: number | undefined } = {}, + options: { + readonly requestByteSize?: number | undefined; + readonly inputTokens?: number | undefined; + readonly provider?: ProviderCall["provider"] | undefined; + readonly modelId?: string | undefined; + } = {}, ): SpyCallSummary { const request = requestBlocks(blocks); const response = blocks.filter((block) => block.direction === "response"); return { call: { id, - provider: "bedrock", + provider: options.provider ?? "bedrock", operation: "converse-stream", - model_id: "us.anthropic.claude-sonnet-4-6", + model_id: options.modelId ?? "us.anthropic.claude-sonnet-4-6", status: "complete", started_at: id.startsWith("previous") ? 1 : 2, completed_at: id.startsWith("previous") ? 1.5 : 2.5, diff --git a/src/spy/compaction.ts b/src/spy/compaction.ts index 6f678f1..655284b 100644 --- a/src/spy/compaction.ts +++ b/src/spy/compaction.ts @@ -40,6 +40,7 @@ export function detectCompaction(input: RequestTransitionInput): SpyCompactionAs const contextStability = stableContext(input.requestBlocks, input.previousRequestBlocks); const piProfile = isPiRequestContext(input); const claudeCodeProfile = isClaudeCodeRequestContext(input); + const cursorProfile = isCursorRequestContext(input); const summarizationRequest = detectSummarizationRequest(input.requestBlocks, { claudeCodeProfile }); const evidence = { @@ -108,6 +109,9 @@ export function detectCompaction(input: RequestTransitionInput): SpyCompactionAs if (claudeCodeProfile) { reasons.push("claude_code_request_context_profile"); } + if (cursorProfile) { + reasons.push("cursor_request_context_profile"); + } if (stable) { reasons.push("stable_request_context"); } @@ -136,12 +140,16 @@ export function detectCompaction(input: RequestTransitionInput): SpyCompactionAs (summaryLikeHistory && (historyByteDrop || requestByteDrop || inputTokenDrop)) || (historyByteDrop && (historyBlockDrop || requestByteDrop || inputTokenDrop)) ); - const heuristicCandidate = !piCandidate && !claudeCodeCandidate && stable && currentInputExists && hasPriorHistory && ( + const cursorCandidate = !piCandidate && !claudeCodeCandidate && cursorProfile && stable && currentInputExists && hasPriorHistory && ( + (summaryLikeHistory && (historyByteDrop || requestByteDrop || inputTokenDrop)) + || (historyByteDrop && (historyBlockDrop || requestByteDrop || inputTokenDrop)) + ); + const heuristicCandidate = !piCandidate && !claudeCodeCandidate && !cursorCandidate && stable && currentInputExists && hasPriorHistory && ( (summaryLikeHistory && historyByteDrop) || (historyByteDrop && historyBlockDrop && (requestByteDrop || inputTokenDrop)) ); - if (!piCandidate && !claudeCodeCandidate && !heuristicCandidate) { + if (!piCandidate && !claudeCodeCandidate && !cursorCandidate && !heuristicCandidate) { return { status: "none", source: "none", @@ -154,16 +162,16 @@ export function detectCompaction(input: RequestTransitionInput): SpyCompactionAs return { status: "candidate", - source: piCandidate ? "pi_pattern" : claudeCodeCandidate ? "claude_code_pattern" : "heuristic", + source: piCandidate ? "pi_pattern" : claudeCodeCandidate ? "claude_code_pattern" : cursorCandidate ? "cursor_pattern" : "heuristic", confidence: candidateConfidence({ - piCandidate: piCandidate || claudeCodeCandidate, + piCandidate: piCandidate || claudeCodeCandidate || cursorCandidate, summaryLikeHistory, historyByteDrop, historyBlockDrop, requestByteDrop, inputTokenDrop, }), - label: piCandidate ? "Pi compaction candidate" : claudeCodeCandidate ? "Claude Code compaction candidate" : "Heuristic compaction candidate", + label: piCandidate ? "Pi compaction candidate" : claudeCodeCandidate ? "Claude Code compaction candidate" : cursorCandidate ? "Cursor compaction candidate" : "Heuristic compaction candidate", reasons, evidence, }; @@ -314,6 +322,10 @@ function isClaudeCodeRequestContext(input: RequestTransitionInput): boolean { }); } +function isCursorRequestContext(input: RequestTransitionInput): boolean { + return input.summary.call.provider === "cursor" || input.requestBlocks.some((block) => block.source.includes("cursor")); +} + function blockSignature(block: NormalizedBlock): string { return [ block.direction, diff --git a/src/spy/cursor.test.ts b/src/spy/cursor.test.ts new file mode 100644 index 0000000..f204694 --- /dev/null +++ b/src/spy/cursor.test.ts @@ -0,0 +1,347 @@ +import { readFileSync } from "node:fs"; +import { gzipSync } from "node:zlib"; +import { describe, expect, test } from "bun:test"; +import { + normalizeCursorRequest, + normalizeCursorResponse, +} from "./cursor.ts"; +import { + NormalizedBlockSchema, + ProviderCallSchema, + RawPayloadRecordSchema, + SpoolEventSchema, + UsageRecordSchema, + type SpoolRequestEvent, + type SpoolResponseEvent, +} from "./schemas.ts"; + +function cursorRequest(flowId: string, body: Record): SpoolRequestEvent { + return { + version: 1, + ts: 1779497200, + direction: "request", + flow_id: flowId, + provider: "cursor", + operation: "StreamUnifiedChat", + model_id: "Composer 2.5", + host: "api2.cursor.sh", + method: "POST", + path: "/aiserver.v1.AiService/StreamUnifiedChat", + headers: [["content-type", "application/json"]], + body_text: JSON.stringify(body), + }; +} + +function cursorResponse(flowId: string, body: Record): SpoolResponseEvent { + return { + version: 1, + ts: 1779497201, + direction: "response", + flow_id: flowId, + provider: "cursor", + operation: "StreamUnifiedChat", + model_id: "Composer 2.5", + host: "api2.cursor.sh", + method: "POST", + path: "/aiserver.v1.AiService/StreamUnifiedChat", + headers: [["content-type", "application/json"]], + status_code: 200, + reason: "OK", + request_headers: [["content-type", "application/json"]], + body_text: JSON.stringify(body), + }; +} + +function connectFrame(payload: Buffer, compressed = false): Buffer { + const body = compressed ? gzipSync(payload) : payload; + const header = Buffer.alloc(5); + header[0] = compressed ? 1 : 0; + header.writeUInt32BE(body.length, 1); + return Buffer.concat([header, body]); +} + +function protoVarint(value: number): Buffer { + const bytes: number[] = []; + let remaining = value; + while (remaining >= 0x80) { + bytes.push((remaining & 0x7f) | 0x80); + remaining = Math.floor(remaining / 0x80); + } + bytes.push(remaining); + return Buffer.from(bytes); +} + +function protoField(fieldNumber: number, wireType: number, payload: Buffer): Buffer { + return Buffer.concat([protoVarint(fieldNumber * 8 + wireType), payload]); +} + +function protoVarintField(fieldNumber: number, value: number): Buffer { + return protoField(fieldNumber, 0, protoVarint(value)); +} + +function protoMessageField(fieldNumber: number, message: Buffer): Buffer { + return protoField(fieldNumber, 2, Buffer.concat([protoVarint(message.length), message])); +} + +describe("Cursor adapter", () => { + test("normalizes Cursor request semantic blocks", () => { + const normalized = normalizeCursorRequest(cursorRequest("fixture-cursor-flow", { + model: "Composer 2.5", + system: "You are Cursor Agent.", + messages: [ + { role: "user", content: "Earlier request RCSPY-CURSOR-ALPHA" }, + { role: "assistant", content: "Earlier response" }, + { role: "user", content: "Current request RCSPY-CURSOR-BETA" }, + ], + tools: [{ name: "shell", description: "Run commands" }], + })); + + expect(() => ProviderCallSchema.parse(normalized.call)).not.toThrow(); + expect(normalized.call.provider).toBe("cursor"); + expect(normalized.call.model_id).toBe("Composer 2.5"); + for (const block of normalized.blocks) { + expect(() => NormalizedBlockSchema.parse(block)).not.toThrow(); + } + expect(normalized.blocks.find((block) => block.kind === "harness-system-context")?.text).toBe("You are Cursor Agent."); + expect(normalized.blocks.filter((block) => block.kind === "prior-conversation-history").map((block) => block.text).join("\n")).toContain("RCSPY-CURSOR-ALPHA"); + expect(normalized.blocks.find((block) => block.kind === "current-user-input")?.text).toContain("RCSPY-CURSOR-BETA"); + expect(normalized.blocks.find((block) => block.kind === "tool-definition")?.text).toContain("shell"); + }); + + test("normalizes Cursor response text, usage, stream lines, and raw payloads", () => { + const normalized = normalizeCursorResponse(cursorResponse("fixture-cursor-flow-response", { + type: "message", + model: "Composer 2.5", + result: { + text: "cursor-response-ok", + }, + usage: { + inputTokens: 123, + outputTokens: 7, + cachedInputTokens: 11, + }, + }), { storeRaw: true }); + + expect(normalized.call.status).toBe("complete"); + expect(normalized.blocks.find((block) => block.kind === "assistant-output")?.text).toContain("cursor-response-ok"); + expect(normalized.usage[0]).toMatchObject({ + input_tokens: 123, + output_tokens: 7, + cache_read_tokens: 11, + total_tokens: 141, + }); + for (const usage of normalized.usage) { + expect(() => UsageRecordSchema.parse(usage)).not.toThrow(); + } + expect(normalized.rawPayloads).toHaveLength(1); + expect(() => RawPayloadRecordSchema.parse(normalized.rawPayloads[0])).not.toThrow(); + expect(normalized.rawPayloads[0]?.body_text).toContain("cursor-response-ok"); + }); + + test("accepts Cursor spool schema events and extracts printable binary strings", () => { + const event = SpoolEventSchema.parse({ + version: 1, + ts: 1779497200, + direction: "request", + flow_id: "fixture-cursor-binary", + provider: "cursor", + operation: "StreamUnifiedChat", + model_id: "cursor", + host: "api2.cursor.sh", + method: "POST", + path: "/aiserver.v1.AiService/StreamUnifiedChat", + headers: [["content-type", "application/connect+proto"]], + body_b64: Buffer.from("\u0000RCSPY-CURSOR-BINARY\u0000composer-2.5-fast", "utf8").toString("base64"), + }); + if (event.direction !== "request") { + throw new Error("expected request event"); + } + + const normalized = normalizeCursorRequest(event); + expect(normalized.call.provider).toBe("cursor"); + expect(normalized.call.model_id).toBe("composer-2.5-fast"); + expect(JSON.stringify(normalized.blocks)).toContain("RCSPY-CURSOR-BINARY"); + }); + + test("normalizes the redacted Cursor Agent fixture captured from jmp", () => { + const fixtureUrl = new URL("./fixtures/cursor-agent-composer-2.5.ndjson", import.meta.url); + const events = readFileSync(fixtureUrl, "utf8") + .trim() + .split("\n") + .map((line) => SpoolEventSchema.parse(JSON.parse(line) as unknown)); + const requests = events.filter((event): event is SpoolRequestEvent => event.direction === "request"); + const responses = events.filter((event): event is SpoolResponseEvent => event.direction === "response"); + + expect(events.every((event) => event.provider === "cursor")).toBe(true); + expect(requests).toHaveLength(2); + expect(responses).toHaveLength(2); + const normalizedRequests = requests.map((event) => normalizeCursorRequest(event, { storeRaw: true })); + const normalizedResponses = responses.map((event) => normalizeCursorResponse(event, { storeRaw: true })); + expect(normalizedResponses.map((item) => item.call.model_id)).toEqual(["composer-2.5-fast", "composer-2.5-fast"]); + expect(JSON.stringify(normalizedResponses)).toContain("RCSPY-CURSOR-ALPHA-HTTP1"); + expect(JSON.stringify(normalizedResponses)).toContain("RCSPY-CURSOR-BETA-HTTP1"); + expect(normalizedResponses.map((item) => item.usage[0]?.input_tokens)).toEqual([7918, 61]); + expect(normalizedRequests.every((item) => item.rawPayloads.length === 1)).toBe(true); + expect(normalizedResponses.every((item) => item.rawPayloads.length === 1)).toBe(true); + }); + + test("decodes Cursor Connect-framed gzip responses without binary fallback noise", () => { + const systemFrame = connectFrame(Buffer.from([ + "\u0000", + JSON.stringify({ + role: "system", + content: "Cursor system prompt should become request context, not assistant output", + }), + ].join(""), "utf8"), true); + const harnessContextFrame = connectFrame(Buffer.from([ + "\u0000", + JSON.stringify({ + role: "user", + content: "\nWorkspace Path: /tmp/cursor\n\n\n\nUse safe commands.\n", + }), + ].join(""), "utf8"), true); + const userQueryFrame = connectFrame(Buffer.from([ + "\u0000", + JSON.stringify({ + role: "user", + content: [ + { + type: "text", + text: "\nReply with RCSPY-CURSOR-CONNECT-OK\n", + }, + ], + }), + ].join(""), "utf8")); + const assistantFrame = connectFrame(Buffer.from([ + "\u0000", + JSON.stringify({ + role: "assistant", + content: [ + { type: "redacted-reasoning", data: "opaque-reasoning" }, + { type: "text", text: "RCSPY-CURSOR-CONNECT-OK" }, + { type: "tool-call", toolCallId: "tool-one", toolName: "Glob", args: { glob_pattern: "**/*" } }, + ], + providerOptions: { cursor: { modelName: "composer-2.5-fast" } }, + }), + "\u0000", + JSON.stringify({ + role: "tool", + content: [ + { type: "tool-result", toolCallId: "tool-one", toolName: "Glob", result: "0 files found" }, + ], + }), + "\u0000", + JSON.stringify({ + type: "result", + usage: { inputTokens: 10, outputTokens: 2, cacheReadTokens: 3 }, + result: "RCSPY-CURSOR-CONNECT-OK", + }), + ].join(""), "utf8")); + const normalized = normalizeCursorResponse({ + ...cursorResponse("fixture-cursor-connect", {}), + model_id: "cursor", + headers: [["content-type", "text/event-stream"], ["connect-content-encoding", "gzip"]], + body_text: undefined, + body_b64: Buffer.concat([systemFrame, harnessContextFrame, userQueryFrame, assistantFrame]).toString("base64"), + }); + + expect(normalized.call.model_id).toBe("composer-2.5-fast"); + const assistantText = normalized.blocks.find((block) => block.kind === "assistant-output")?.text ?? ""; + expect(assistantText).toContain("RCSPY-CURSOR-CONNECT-OK"); + expect(assistantText).not.toContain("Cursor system prompt"); + expect(assistantText).not.toContain("opaque-reasoning"); + const requestText = normalized.blocks.filter((block) => block.direction === "request").map((block) => block.text).join("\n"); + expect(requestText).toContain("Cursor system prompt should become request context"); + expect(requestText).toContain("Workspace Path: /tmp/cursor"); + expect(requestText).toContain("Reply with RCSPY-CURSOR-CONNECT-OK"); + expect(requestText).not.toContain(""); + expect(normalized.blocks.find((block) => block.kind === "tool-call")?.text).toContain("Glob"); + expect(normalized.blocks.find((block) => block.kind === "tool-result")?.text).toContain("0 files found"); + expect(normalized.streamEvents.map((event) => event.event_type)).toContain("assistant"); + expect(normalized.streamEvents.map((event) => event.event_type)).toContain("tool"); + expect(normalized.usage[0]).toMatchObject({ + input_tokens: 10, + output_tokens: 2, + cache_read_tokens: 3, + total_tokens: 15, + }); + }); + + test("persists raw Cursor Connect/protobuf bytes and annotates the known usage envelope", () => { + const usageMessage = Buffer.concat([ + protoVarintField(1, 10779), + protoVarintField(2, 52), + protoVarintField(3, 2848), + protoVarintField(4, 0), + ]); + const resultMessage = Buffer.concat([ + protoMessageField(1, protoMessageField(14, usageMessage)), + ]); + const normalized = normalizeCursorResponse({ + ...cursorResponse("fixture-cursor-protobuf-usage", {}), + model_id: "cursor", + headers: [["content-type", "application/connect+proto"]], + body_text: undefined, + body_b64: connectFrame(resultMessage).toString("base64"), + }); + + expect(normalized.usage).toHaveLength(1); + expect(normalized.usage[0]).toMatchObject({ + input_tokens: 7931, + output_tokens: 52, + cache_read_tokens: 2848, + cache_write_tokens: 0, + total_tokens: 10831, + }); + expect(normalized.usage[0]?.raw).toMatchObject({ + raw_protobuf: { + frameIndex: 0, + path: "$frame[0].1.14", + wireInputTokens: 10779, + }, + }); + const wireEvent = normalized.streamEvents.find((event) => event.event_type === "connect-protobuf-frame"); + expect(wireEvent?.payload).toMatchObject({ + format: "connect", + frameIndex: 0, + offset: 0, + frameByteLength: connectFrame(resultMessage).length, + payloadByteLength: resultMessage.length, + frameB64: connectFrame(resultMessage).toString("base64"), + payloadB64: resultMessage.toString("base64"), + protobuf: { + format: "protobuf", + fields: [ + { + fieldNumber: 1, + wireType: 2, + nested: { + fields: [ + { + fieldNumber: 14, + wireType: 2, + nested: { + fields: [ + { fieldNumber: 1, wireType: 0, value: 10779 }, + { fieldNumber: 2, wireType: 0, value: 52 }, + { fieldNumber: 3, wireType: 0, value: 2848 }, + { fieldNumber: 4, wireType: 0, value: 0 }, + ], + }, + }, + ], + }, + }, + ], + }, + cursorUsage: [{ + path: "$frame[0].1.14", + inputTokens: 7931, + outputTokens: 52, + cacheReadTokens: 2848, + cacheWriteTokens: 0, + wireInputTokens: 10779, + }], + }); + }); +}); diff --git a/src/spy/cursor.ts b/src/spy/cursor.ts new file mode 100644 index 0000000..6b4dace --- /dev/null +++ b/src/spy/cursor.ts @@ -0,0 +1,1792 @@ +import { createHash } from "node:crypto"; +import { gunzipSync } from "node:zlib"; +import type { + NormalizedBlock, + ProviderCall, + RawPayloadRecord, + SpoolRequestEvent, + SpoolResponseEvent, + StreamEvent, + UsageRecord, +} from "./schemas.ts"; + +export interface CursorAdapterOptions { + readonly storeRaw?: boolean; +} + +export interface NormalizedCursorRequest { + readonly call: ProviderCall; + readonly blocks: readonly NormalizedBlock[]; + readonly rawPayloads: readonly RawPayloadRecord[]; +} + +export interface NormalizedCursorResponse { + readonly call: ProviderCall; + readonly blocks: readonly NormalizedBlock[]; + readonly usage: readonly UsageRecord[]; + readonly streamEvents: readonly StreamEvent[]; + readonly rawPayloads: readonly RawPayloadRecord[]; +} + +type BlockKind = NormalizedBlock["kind"]; +type Direction = NormalizedBlock["direction"]; + +interface BlockInput { + readonly callId: string; + readonly direction: Direction; + readonly ordinal: number; + readonly kind: BlockKind; + readonly source: string; + readonly role?: string | undefined; + readonly providerPath?: string | undefined; + readonly text?: string | undefined; + readonly json?: unknown; +} + +interface TextCandidate { + readonly path: string; + readonly key: string; + readonly text: string; +} + +interface RequestContextCandidate { + readonly kind: BlockKind; + readonly role: string; + readonly path: string; + readonly text: string; +} + +interface ResponseSemanticCandidate { + readonly kind: BlockKind; + readonly role?: string | undefined; + readonly path: string; + readonly text: string; + readonly json?: unknown; +} + +interface ParsedBody { + readonly text?: string | undefined; + readonly binaryText?: string | undefined; + readonly modelText?: string | undefined; + readonly json?: unknown; + readonly jsonLines: readonly unknown[]; + readonly connectFrames?: readonly ParsedConnectFrame[] | undefined; +} + +interface ParsedConnectFrame { + readonly index: number; + readonly compressed: boolean; + readonly offset: number; + readonly frameByteLength: number; + readonly byteLength: number; + readonly frameB64: string; + readonly payloadB64: string; + readonly payloadSha256: string; + readonly jsonValues: readonly unknown[]; + readonly text?: string | undefined; + readonly proto?: DecodedProtoMessage | undefined; +} + +interface DecodedProtoMessage { + readonly path: string; + readonly byteLength: number; + readonly fields: readonly DecodedProtoField[]; +} + +interface DecodedProtoField { + readonly path: string; + readonly fieldNumber: number; + readonly wireType: number; + readonly value?: number | string | undefined; + readonly text?: string | undefined; + readonly packedVarints?: readonly number[] | undefined; + readonly nested?: DecodedProtoMessage | undefined; + readonly byteLength?: number | undefined; +} + +interface VarintRead { + readonly value: bigint; + readonly nextOffset: number; +} + +interface CursorWireUsageCandidate { + readonly path: string; + readonly inputTokens: number; + readonly outputTokens: number; + readonly cacheReadTokens: number; + readonly cacheWriteTokens: number; + readonly wireInputTokens: number; +} + +export function cursorCallIdForFlow(flowId: string): string { + return stableId("call", "cursor", flowId); +} + +export function normalizeCursorRequest( + request: SpoolRequestEvent, + options: CursorAdapterOptions = {}, +): NormalizedCursorRequest { + const callId = cursorCallIdForFlow(request.flow_id); + const body = parseCapturedBody(request); + const modelId = cursorModelId(request.model_id, body) ?? request.model_id; + return { + call: { + id: callId, + provider: "cursor", + operation: request.operation, + model_id: modelId, + status: "pending", + started_at: request.ts, + request_flow_id: request.flow_id, + request_content_hash: hashUnknown(capturedBodyHashMaterial(body)), + }, + blocks: normalizeCursorRequestBlocks(callId, request, body), + rawPayloads: options.storeRaw === true ? [rawPayload(callId, "request", request)] : [], + }; +} + +export function normalizeCursorResponse( + response: SpoolResponseEvent, + options: CursorAdapterOptions = {}, +): NormalizedCursorResponse { + const callId = cursorCallIdForFlow(response.flow_id); + const body = parseCapturedBody(response); + const modelId = cursorModelId(response.model_id, body) ?? response.model_id; + const normalized = normalizeCursorResponseBody(callId, response, body); + return { + call: { + id: callId, + provider: "cursor", + operation: response.operation, + model_id: modelId, + status: response.status_code >= 400 ? "error" : "complete", + started_at: response.ts, + completed_at: response.ts, + status_code: response.status_code, + request_flow_id: response.flow_id, + response_flow_id: response.flow_id, + response_content_hash: hashUnknown(normalized.blocks.map((block) => block.content_hash)), + }, + blocks: normalized.blocks, + usage: normalized.usage, + streamEvents: normalized.streamEvents, + rawPayloads: options.storeRaw === true ? [rawPayload(callId, "response", response)] : [], + }; +} + +function normalizeCursorRequestBlocks( + callId: string, + request: SpoolRequestEvent, + body: ParsedBody, +): NormalizedBlock[] { + const blocks: NormalizedBlock[] = []; + const usedPaths = new Set(); + let ordinal = 0; + const addBlock = (input: Omit): void => { + blocks.push(createBlock({ + callId, + direction: "request", + ordinal, + ...input, + })); + ordinal += 1; + }; + + addBlock({ + kind: "provider-envelope", + source: "cursor-request", + providerPath: "$.http", + text: `${request.method} ${request.host}${request.path}`, + json: { + host: request.host, + method: request.method, + path: request.path, + operation: request.operation, + model_id: request.model_id, + }, + }); + + if (body.json !== undefined) { + normalizeRequestJson(body.json, addBlock, usedPaths); + } else if (body.jsonLines.length > 0) { + body.jsonLines.forEach((value, index) => { + normalizeRequestJson(value, addBlock, usedPaths, `$[${String(index)}]`); + }); + } + + const bodyText = body.text ?? body.binaryText; + if (blocks.length === 1 && bodyText !== undefined && bodyText.trim().length > 0) { + addBlock({ + kind: body.text === undefined ? "media-summary" : "current-user-input", + source: body.text === undefined ? "cursor-request-binary-strings" : "cursor-request-body", + providerPath: "$.body", + text: truncateText(bodyText, 8_000), + }); + } + + return blocks; +} + +function normalizeRequestJson( + value: unknown, + addBlock: (input: Omit) => void, + usedPaths: Set, + rootPath = "$", +): void { + const top = isRecord(value) ? value : undefined; + if (top !== undefined) { + addKnownStringField(top, ["model", "model_id", "modelName", "modelDisplayName", "selectedModel"], addBlock, usedPaths, rootPath, "provider-envelope"); + addKnownStringField(top, ["system", "systemPrompt", "instructions", "rules", "developerInstruction"], addBlock, usedPaths, rootPath, "harness-system-context"); + addKnownStringField(top, ["prompt", "query", "input", "userInput", "currentPrompt", "currentMessage"], addBlock, usedPaths, rootPath, "current-user-input"); + addKnownValueField(top, ["tools", "toolDefinitions", "functions"], addBlock, usedPaths, rootPath, "tool-definition"); + normalizeMessagesFromKnownFields(top, addBlock, usedPaths, rootPath); + } + + for (const candidate of collectTextCandidates(value, rootPath)) { + if (usedPaths.has(candidate.path) || candidate.text.trim().length === 0) { + continue; + } + const kind = requestKindForCandidate(candidate); + if (kind === undefined) { + continue; + } + usedPaths.add(candidate.path); + addBlock({ + kind, + source: "cursor-request-json", + providerPath: candidate.path, + text: truncateText(candidate.text, 16_000), + }); + } + + if (top !== undefined && !hasSemanticRequestBlockForRoot(usedPaths, rootPath)) { + addBlock({ + kind: "unknown", + source: "cursor-request-json", + providerPath: rootPath, + text: "Cursor request JSON", + json: value, + }); + } +} + +function normalizeMessagesFromKnownFields( + top: Record, + addBlock: (input: Omit) => void, + usedPaths: Set, + rootPath: string, +): void { + for (const key of ["messages", "conversation", "conversationHistory", "history", "transcript"]) { + const value = top[key]; + if (!Array.isArray(value)) { + continue; + } + const records = value.filter(isRecord); + const lastUserIndex = findLastUserMessageIndex(records); + records.forEach((message, index) => { + const role = stringField(message, "role") ?? stringField(message, "speaker") ?? stringField(message, "type"); + const content = message.content ?? message.text ?? message.message ?? message.prompt; + const providerPath = `${rootPath}.${key}[${String(index)}]`; + const text = textFromUnknown(content); + const kind = role === "system" + ? "harness-system-context" + : role === "user" && index === lastUserIndex + ? "current-user-input" + : "prior-conversation-history"; + usedPaths.add(providerPath); + if (text !== undefined && text.length > 0) { + addBlock({ + kind, + source: "cursor-request-message", + providerPath, + role, + text: truncateText(text, 16_000), + ...(content !== text ? { json: content } : {}), + }); + return; + } + addBlock({ + kind: "unknown", + source: "cursor-request-message", + providerPath, + role, + json: message, + }); + }); + } +} + +function normalizeCursorResponseBody( + callId: string, + response: SpoolResponseEvent, + body: ParsedBody, +): { + readonly blocks: readonly NormalizedBlock[]; + readonly usage: readonly UsageRecord[]; + readonly streamEvents: readonly StreamEvent[]; +} { + const blocks: NormalizedBlock[] = []; + let ordinal = 0; + const addBlock = (input: Omit): void => { + blocks.push(createBlock({ + callId, + direction: "response", + ordinal, + ...input, + })); + ordinal += 1; + }; + + addBlock({ + kind: "provider-envelope", + source: "cursor-response", + providerPath: "$.http", + text: `status:${String(response.status_code)}`, + json: { + status_code: response.status_code, + reason: response.reason, + operation: response.operation, + model_id: response.model_id, + }, + }); + + const values = structuredValues(body); + const requestBlocks = normalizeCursorResponseRequestContext(callId, values); + const textParts = collectCursorResponseText(values); + if (textParts.length === 0) { + const bodyText = body.text ?? body.binaryText; + if (bodyText !== undefined && isUsefulBodyText(bodyText)) { + textParts.push(bodyText); + } + } + + if (textParts.length > 0) { + addBlock({ + kind: "assistant-output", + source: "cursor-response-body", + providerPath: "$.body", + text: truncateText(joinTextParts(textParts), 32_000), + }); + } + + for (const candidate of collectCursorResponseSemanticBlocks(values)) { + addBlock({ + kind: candidate.kind, + source: "cursor-response-body", + providerPath: candidate.path, + role: candidate.role, + text: truncateText(candidate.text, 16_000), + ...(candidate.json === undefined ? {} : { json: candidate.json }), + }); + } + + if (values.length > 0) { + for (const value of values) { + const model = cursorModelId(undefined, { json: value, jsonLines: [] }); + if (model !== undefined) { + addBlock({ + kind: "provider-envelope", + source: "cursor-response-json", + providerPath: "$.model", + text: `model: ${model}`, + }); + break; + } + } + } else if (textParts.length === 0) { + addBlock({ + kind: "unknown", + source: "cursor-response-body", + providerPath: "$.body", + text: "Cursor response body", + }); + } + + return { + blocks: [...requestBlocks, ...blocks], + usage: usageRecords(callId, values, body.connectFrames ?? []), + streamEvents: streamEvents(callId, response.ts, body), + }; +} + +function normalizeCursorResponseRequestContext( + callId: string, + values: readonly unknown[], +): NormalizedBlock[] { + const candidates = collectCursorRequestContextCandidates(values); + const blocks: NormalizedBlock[] = []; + const seen = new Set(); + let ordinal = 1000; + + for (const candidate of candidates) { + const text = candidate.text.trim(); + const key = `${candidate.kind}\n${candidate.role}\n${text}`; + if (text.length === 0 || seen.has(key)) { + continue; + } + seen.add(key); + blocks.push(createBlock({ + callId, + direction: "request", + ordinal, + kind: candidate.kind, + source: "cursor-response-request-context", + providerPath: candidate.path, + role: candidate.role, + text: truncateText(text, 32_000), + })); + ordinal += 1; + } + + return blocks; +} + +function collectCursorRequestContextCandidates(values: readonly unknown[]): RequestContextCandidate[] { + const candidates: RequestContextCandidate[] = []; + + const visit = (value: unknown, path: string): void => { + if (Array.isArray(value)) { + value.forEach((item, index) => { + visit(item, `${path}[${String(index)}]`); + }); + return; + } + if (!isRecord(value)) { + return; + } + + const role = stringField(value, "role"); + if (role === "system" || role === "user" || role === "human") { + const text = messageTextFromRecord(value); + if (text !== undefined) { + const classified = classifyCursorRequestContext(role, text); + if (classified !== undefined) { + candidates.push({ + kind: classified.kind, + role, + path, + text: classified.text, + }); + } + return; + } + } + if (role === "assistant") { + return; + } + + for (const [key, child] of Object.entries(value)) { + visit(child, `${path}.${key}`); + } + }; + + values.forEach((value, index) => { + visit(value, `$.body[${String(index)}]`); + }); + + return candidates; +} + +function messageTextFromRecord(value: Record): string | undefined { + const parts = [ + ...collectMessageText(value.content), + stringField(value, "text"), + stringField(value, "message"), + ].filter((part): part is string => part !== undefined && part.trim().length > 0); + return parts.length === 0 ? undefined : joinTextParts(parts); +} + +function classifyCursorRequestContext( + role: string, + text: string, +): { readonly kind: BlockKind; readonly text: string } | undefined { + if (role === "system") { + return { kind: "harness-system-context", text }; + } + + const userQuery = extractCursorUserQuery(text); + if (userQuery !== undefined) { + return { kind: "current-user-input", text: userQuery }; + } + + if (isCursorHarnessContext(text)) { + return { kind: "harness-system-context", text }; + } + + return { kind: "prior-conversation-history", text }; +} + +function extractCursorUserQuery(text: string): string | undefined { + const match = /\s*([\s\S]*?)\s*<\/user_query>/i.exec(text); + const query = match?.[1]?.trim(); + return query === undefined || query.length === 0 ? undefined : query; +} + +function isCursorHarnessContext(text: string): boolean { + return /<(?:user_info|agent_transcripts|rules|user_rules|communication|citing_code|terminal_files_information)\b/i.test(text); +} + +function addKnownStringField( + value: Record, + keys: readonly string[], + addBlock: (input: Omit) => void, + usedPaths: Set, + rootPath: string, + kind: BlockKind, +): void { + for (const key of keys) { + const child = value[key]; + if (typeof child !== "string" || child.length === 0) { + continue; + } + const providerPath = `${rootPath}.${key}`; + usedPaths.add(providerPath); + addBlock({ + kind, + source: "cursor-request-json", + providerPath, + text: kind === "provider-envelope" ? `${key}: ${child}` : child, + }); + } +} + +function addKnownValueField( + value: Record, + keys: readonly string[], + addBlock: (input: Omit) => void, + usedPaths: Set, + rootPath: string, + kind: BlockKind, +): void { + for (const key of keys) { + const child = value[key]; + if (child === undefined) { + continue; + } + const providerPath = `${rootPath}.${key}`; + usedPaths.add(providerPath); + addBlock({ + kind, + source: "cursor-request-json", + providerPath, + text: truncateText(textFromUnknown(child) ?? key, 8_000), + json: child, + }); + } +} + +function requestKindForCandidate(candidate: TextCandidate): BlockKind | undefined { + const keyPath = `${candidate.key} ${candidate.path}`.toLowerCase(); + if (/\b(system|instruction|rules?|developer|policy)\b/.test(keyPath)) { + return "harness-system-context"; + } + if (/\b(tool|function|schema)\b/.test(keyPath)) { + return "tool-definition"; + } + if (/\b(history|conversation|transcript|previous|prior|context)\b/.test(keyPath)) { + return "prior-conversation-history"; + } + if (/\b(prompt|query|input|message|content|text|user)\b/.test(keyPath)) { + return "current-user-input"; + } + if (/\b(model|version|mode)\b/.test(keyPath)) { + return "provider-envelope"; + } + return undefined; +} + +function collectCursorResponseText(values: readonly unknown[]): string[] { + const parts: string[] = []; + const seen = new Set(); + const push = (value: string | undefined): void => { + if (value === undefined) { + return; + } + const trimmed = value.trim(); + if (trimmed.length === 0 || seen.has(trimmed)) { + return; + } + seen.add(trimmed); + parts.push(trimmed); + }; + + const visit = (value: unknown): void => { + if (Array.isArray(value)) { + value.forEach(visit); + return; + } + if (!isRecord(value)) { + return; + } + + const role = stringField(value, "role"); + if (role === "assistant") { + collectMessageText(value.content).forEach(push); + push(stringField(value, "text")); + push(stringField(value, "result")); + return; + } + if (role !== undefined) { + return; + } + + const type = stringField(value, "type"); + if (type === "assistant" && isRecord(value.message)) { + collectMessageText(value.message.content).forEach(push); + push(stringField(value.message, "text")); + return; + } + + const result = value.result; + if (typeof result === "string") { + push(result); + } else if (isRecord(result)) { + push(stringField(result, "text")); + push(stringField(result, "message")); + push(stringField(result, "output")); + } + + for (const child of Object.values(value)) { + visit(child); + } + }; + + values.forEach(visit); + + return parts; +} + +function collectCursorResponseSemanticBlocks(values: readonly unknown[]): ResponseSemanticCandidate[] { + const candidates: ResponseSemanticCandidate[] = []; + const seen = new Set(); + + const push = (candidate: ResponseSemanticCandidate): void => { + const key = `${candidate.kind}\n${candidate.role ?? ""}\n${candidate.text}\n${canonicalJson(candidate.json)}`; + if (candidate.text.trim().length === 0 || seen.has(key)) { + return; + } + seen.add(key); + candidates.push(candidate); + }; + + const visit = (value: unknown, path: string): void => { + if (Array.isArray(value)) { + value.forEach((item, index) => { + visit(item, `${path}[${String(index)}]`); + }); + return; + } + if (!isRecord(value)) { + return; + } + + const role = stringField(value, "role"); + if (role === "assistant" && Array.isArray(value.content)) { + value.content.forEach((item, index) => { + if (!isRecord(item) || stringField(item, "type") !== "tool-call") { + return; + } + push({ + kind: "tool-call", + role, + path: `${path}.content[${String(index)}]`, + text: cursorToolCallText(item), + json: item, + }); + }); + return; + } + + if (role === "tool" && Array.isArray(value.content)) { + value.content.forEach((item, index) => { + if (!isRecord(item) || stringField(item, "type") !== "tool-result") { + return; + } + push({ + kind: "tool-result", + role, + path: `${path}.content[${String(index)}]`, + text: cursorToolResultText(item), + json: item, + }); + }); + return; + } + + for (const [key, child] of Object.entries(value)) { + visit(child, `${path}.${key}`); + } + }; + + values.forEach((value, index) => { + visit(value, `$.body[${String(index)}]`); + }); + + return candidates; +} + +function cursorToolCallText(value: Record): string { + const toolName = stringField(value, "toolName") ?? stringField(value, "name") ?? "tool"; + const args = value.args; + return args === undefined ? toolName : `${toolName} ${truncateText(canonicalJson(args), 4_000)}`; +} + +function cursorToolResultText(value: Record): string { + const result = stringField(value, "result") ?? textFromUnknown(value.experimental_content); + if (result !== undefined && result.trim().length > 0) { + return result; + } + return stringField(value, "toolName") ?? stringField(value, "name") ?? "tool result"; +} + +function collectMessageText(value: unknown): string[] { + if (typeof value === "string") { + return [value]; + } + if (!Array.isArray(value)) { + return []; + } + const parts: string[] = []; + for (const item of value) { + if (typeof item === "string") { + parts.push(item); + continue; + } + if (!isRecord(item)) { + continue; + } + const type = stringField(item, "type"); + if (type !== undefined && type !== "text") { + continue; + } + const text = stringField(item, "text"); + if (text !== undefined) { + parts.push(text); + } + } + return parts; +} + +function hasSemanticRequestBlockForRoot(paths: ReadonlySet, rootPath: string): boolean { + for (const path of paths) { + if (path === rootPath || path.startsWith(`${rootPath}.`) || path.startsWith(`${rootPath}[`)) { + return true; + } + } + return false; +} + +function findLastUserMessageIndex(messages: readonly Record[]): number { + for (let index = messages.length - 1; index >= 0; index -= 1) { + const role = stringField(messages[index], "role") ?? stringField(messages[index], "speaker") ?? stringField(messages[index], "type"); + if (role === "user" || role === "human") { + return index; + } + } + return -1; +} + +function parseCapturedBody(event: { + readonly direction?: string | undefined; + readonly headers?: readonly (readonly [string, string])[] | undefined; + readonly body_text?: string | undefined; + readonly body_b64?: string | undefined; +}): ParsedBody { + const text = event.body_text ?? decodeBase64Utf8(event.body_b64); + if (text !== undefined) { + const json = parseJson(text); + if (json.ok) { + return { text, json: json.value, jsonLines: [] }; + } + const jsonLines = parseJsonLinesOrSse(text); + if (jsonLines.length > 0) { + return { text, jsonLines }; + } + if (!isUsefulBodyText(text)) { + const framed = parseConnectEnvelope(event.body_b64, event.direction); + if (framed !== undefined) { + return framed; + } + return { jsonLines: [] }; + } + return { text, jsonLines }; + } + + const framed = parseConnectEnvelope(event.body_b64, event.direction); + if (framed !== undefined) { + return framed; + } + const binaryText = extractUsefulPrintableStrings(event.body_b64); + return { binaryText, jsonLines: [] }; +} + +function parseJson(text: string): { readonly ok: true; readonly value: unknown } | { readonly ok: false } { + try { + return { ok: true, value: JSON.parse(text) as unknown }; + } catch { + return { ok: false }; + } +} + +function parseJsonLinesOrSse(text: string): unknown[] { + const values: unknown[] = []; + for (const rawLine of text.split(/\r?\n/)) { + const line = rawLine.trim(); + if (line.length === 0 || line === "[DONE]") { + continue; + } + const payload = line.startsWith("data:") ? line.slice(5).trim() : line; + if (payload.length === 0 || payload === "[DONE]") { + continue; + } + const parsed = parseJson(payload); + if (parsed.ok) { + values.push(parsed.value); + } + } + return values; +} + +function parseConnectEnvelope(value: string | undefined, direction: string | undefined): ParsedBody | undefined { + if (value === undefined) { + return undefined; + } + let buffer: Buffer; + try { + buffer = Buffer.from(value, "base64"); + } catch { + return undefined; + } + if (buffer.length < 5) { + return undefined; + } + + const frames: ParsedConnectFrame[] = []; + let modelText: string | undefined; + let offset = 0; + let index = 0; + while (offset + 5 <= buffer.length) { + const frameOffset = offset; + const flags = buffer[offset] ?? 0; + const length = buffer.readUInt32BE(offset + 1); + offset += 5; + if (length < 0 || offset + length > buffer.length) { + return undefined; + } + const compressed = (flags & 1) === 1; + const payload = decodeConnectPayload(buffer.subarray(offset, offset + length), compressed); + offset += length; + const text = decodePayloadText(payload); + const jsonValues = text === undefined ? [] : extractJsonValues(text); + if (text !== undefined && modelText === undefined) { + modelText = modelIdFromText(text); + } + const usefulText = text === undefined ? undefined : cursorSemanticTextFromDecoded(text, direction); + const proto = decodeProtoMessage(payload, `$frame[${String(index)}]`); + const frameEnd = offset; + const frameBytes = buffer.subarray(frameOffset, frameEnd); + frames.push({ + index, + compressed, + offset: frameOffset, + frameByteLength: frameBytes.length, + byteLength: payload.length, + frameB64: frameBytes.toString("base64"), + payloadB64: payload.toString("base64"), + payloadSha256: sha256Buffer(payload), + jsonValues, + ...(usefulText === undefined ? {} : { text: usefulText }), + ...(proto === undefined ? {} : { proto }), + }); + index += 1; + } + + if (offset !== buffer.length || frames.length === 0) { + return undefined; + } + + const jsonLines = frames.flatMap((frame) => frame.jsonValues); + const binaryTextParts = uniqueStrings(frames.map((frame) => frame.text).filter((text): text is string => text !== undefined)); + return { + jsonLines, + ...(binaryTextParts.length === 0 ? {} : { binaryText: joinTextParts(binaryTextParts) }), + ...(modelText === undefined ? {} : { modelText }), + connectFrames: frames, + }; +} + +function decodeProtoMessage(buffer: Buffer, path: string, depth = 0): DecodedProtoMessage | undefined { + if (buffer.length === 0 || depth > 8) { + return undefined; + } + + const fields: DecodedProtoField[] = []; + let offset = 0; + while (offset < buffer.length) { + const key = readProtoVarint(buffer, offset); + if (key === undefined || key.value === 0n || key.value > BigInt(Number.MAX_SAFE_INTEGER)) { + return undefined; + } + offset = key.nextOffset; + const keyNumber = Number(key.value); + const fieldNumber = Math.floor(keyNumber / 8); + const wireType = keyNumber % 8; + if (fieldNumber <= 0) { + return undefined; + } + const fieldPath = `${path}.${String(fieldNumber)}`; + + if (wireType === 0) { + const value = readProtoVarint(buffer, offset); + if (value === undefined) { + return undefined; + } + offset = value.nextOffset; + fields.push({ + path: fieldPath, + fieldNumber, + wireType, + value: protoNumber(value.value), + }); + } else if (wireType === 1) { + if (offset + 8 > buffer.length) { + return undefined; + } + const value = buffer.readBigUInt64LE(offset); + offset += 8; + fields.push({ + path: fieldPath, + fieldNumber, + wireType, + value: protoNumber(value), + byteLength: 8, + }); + } else if (wireType === 2) { + const length = readProtoVarint(buffer, offset); + if (length === undefined || length.value > BigInt(Number.MAX_SAFE_INTEGER)) { + return undefined; + } + offset = length.nextOffset; + const byteLength = Number(length.value); + if (byteLength < 0 || offset + byteLength > buffer.length) { + return undefined; + } + const bytes = buffer.subarray(offset, offset + byteLength); + offset += byteLength; + const text = protoString(bytes); + const nested = decodeProtoMessage(bytes, fieldPath, depth + 1); + const packedVarints = decodePackedProtoVarints(bytes); + fields.push({ + path: fieldPath, + fieldNumber, + wireType, + byteLength, + ...(text === undefined ? {} : { text }), + ...(packedVarints === undefined ? {} : { packedVarints }), + ...(nested === undefined ? {} : { nested }), + }); + } else if (wireType === 5) { + if (offset + 4 > buffer.length) { + return undefined; + } + const value = buffer.readUInt32LE(offset); + offset += 4; + fields.push({ + path: fieldPath, + fieldNumber, + wireType, + value, + byteLength: 4, + }); + } else { + return undefined; + } + + if (fields.length > 2_000) { + return undefined; + } + } + + return fields.length === 0 ? undefined : { + path, + byteLength: buffer.length, + fields, + }; +} + +function readProtoVarint(buffer: Buffer, offset: number): VarintRead | undefined { + let result = 0n; + let shift = 0n; + for (let index = 0; index < 10 && offset + index < buffer.length; index += 1) { + const byte = buffer[offset + index]; + if (byte === undefined) { + return undefined; + } + result |= BigInt(byte & 0x7f) << shift; + if ((byte & 0x80) === 0) { + return { + value: result, + nextOffset: offset + index + 1, + }; + } + shift += 7n; + } + return undefined; +} + +function protoNumber(value: bigint): number | string { + return value <= BigInt(Number.MAX_SAFE_INTEGER) ? Number(value) : value.toString(); +} + +function protoString(buffer: Buffer): string | undefined { + if (buffer.length === 0 || buffer.length > 64_000) { + return undefined; + } + const text = buffer.toString("utf8"); + if (text.includes("\uFFFD")) { + return undefined; + } + const controlCount = Array.from(text).filter((char) => { + const code = char.charCodeAt(0); + return code < 32 && char !== "\n" && char !== "\r" && char !== "\t"; + }).length; + if (controlCount > Math.max(1, text.length * 0.05)) { + return undefined; + } + return text.length === 0 ? undefined : truncateText(text, 4_000); +} + +function decodePackedProtoVarints(buffer: Buffer): number[] | undefined { + if (buffer.length === 0 || buffer.length > 10_000) { + return undefined; + } + const values: number[] = []; + let offset = 0; + while (offset < buffer.length) { + const value = readProtoVarint(buffer, offset); + if (value === undefined || value.value > BigInt(Number.MAX_SAFE_INTEGER)) { + return undefined; + } + values.push(Number(value.value)); + offset = value.nextOffset; + } + return values.length === 0 ? undefined : values; +} + +function decodeConnectPayload(payload: Buffer, compressed: boolean): Buffer { + if (!compressed && !isGzip(payload)) { + return payload; + } + try { + return gunzipSync(payload); + } catch { + return payload; + } +} + +function isGzip(payload: Buffer): boolean { + return payload.length >= 2 && payload[0] === 0x1f && payload[1] === 0x8b; +} + +function decodePayloadText(payload: Buffer): string | undefined { + const text = payload.toString("utf8"); + if (!text.includes("\uFFFD")) { + return text; + } + return payload.toString("latin1"); +} + +function extractJsonValues(text: string): unknown[] { + const values: unknown[] = []; + for (let index = 0; index < text.length; index += 1) { + if (text[index] !== "{") { + continue; + } + const end = jsonObjectEnd(text, index); + if (end === undefined) { + continue; + } + const parsed = parseJson(text.slice(index, end + 1)); + if (parsed.ok) { + values.push(parsed.value); + index = end; + } + } + + return values; +} + +function jsonObjectEnd(text: string, start: number): number | undefined { + let depth = 0; + let inString = false; + let escaped = false; + + for (let index = start; index < text.length; index += 1) { + const char = text[index]; + if (inString) { + if (escaped) { + escaped = false; + } else if (char === "\\") { + escaped = true; + } else if (char === "\"") { + inString = false; + } + continue; + } + + if (char === "\"") { + inString = true; + continue; + } + if (char === "{") { + depth += 1; + continue; + } + if (char !== "}" || depth === 0) { + continue; + } + depth -= 1; + if (depth === 0) { + return index; + } + } + + return undefined; +} + +function structuredValues(body: ParsedBody): unknown[] { + if (body.json !== undefined) { + return [body.json]; + } + const framedJson = body.connectFrames?.flatMap((frame) => frame.jsonValues) ?? []; + return [...body.jsonLines, ...framedJson]; +} + +function streamEvents(callId: string, observedAt: number, body: ParsedBody): StreamEvent[] { + const events = body.jsonLines.map((payload, index) => ({ + id: stableId("stream", callId, String(index)), + call_id: callId, + ordinal: index, + event_type: cursorStreamEventType(payload), + headers: {}, + payload, + observed_at: observedAt, + ...(textFromUnknown(payload) === undefined ? {} : { payload_text: truncateText(textFromUnknown(payload) ?? "", 4_000) }), + payload_sha256: hashUnknown(payload), + })); + + let ordinal = events.length; + for (const frame of body.connectFrames ?? []) { + const payload = connectFramePayload(frame); + events.push({ + id: stableId("stream", callId, "connect-frame", String(frame.index)), + call_id: callId, + ordinal, + event_type: frame.proto === undefined ? "connect-frame" : "connect-protobuf-frame", + headers: {}, + payload, + observed_at: observedAt, + payload_text: truncateText(canonicalJson(payload), 4_000), + payload_sha256: hashUnknown(payload), + }); + ordinal += 1; + } + + return events; +} + +function connectFramePayload(frame: ParsedConnectFrame): unknown { + return { + format: "connect", + frameIndex: frame.index, + compressed: frame.compressed, + offset: frame.offset, + frameByteLength: frame.frameByteLength, + payloadByteLength: frame.byteLength, + frameB64: frame.frameB64, + payloadB64: frame.payloadB64, + payloadSha256: frame.payloadSha256, + ...(frame.text === undefined ? {} : { text: truncateText(frame.text, 4_000) }), + ...(frame.jsonValues.length === 0 ? {} : { jsonValues: frame.jsonValues }), + ...(frame.proto === undefined ? {} : { protobuf: protoPayload(frame.proto) }), + ...(frame.proto === undefined ? {} : optionalCursorWireUsage(frame.proto)), + }; +} + +function optionalCursorWireUsage(message: DecodedProtoMessage): { readonly cursorUsage: readonly CursorWireUsageCandidate[] } | Record { + const usage = collectCursorWireUsageCandidates(message); + return usage.length === 0 ? {} : { cursorUsage: usage }; +} + +function protoPayload(message: DecodedProtoMessage): unknown { + return { + format: "protobuf", + path: message.path, + byteLength: message.byteLength, + fields: message.fields.map((field) => ({ + path: field.path, + fieldNumber: field.fieldNumber, + wireType: field.wireType, + ...(field.value === undefined ? {} : { value: field.value }), + ...(field.text === undefined ? {} : { text: truncateText(field.text, 1_000) }), + ...(field.packedVarints === undefined ? {} : { packedVarints: field.packedVarints }), + ...(field.byteLength === undefined ? {} : { byteLength: field.byteLength }), + ...(field.nested === undefined ? {} : { nested: protoPayload(field.nested) }), + })), + }; +} + +function cursorStreamEventType(payload: unknown): string { + const explicit = stringField(payload, "type") ?? stringField(payload, "event"); + if (explicit !== undefined) { + return explicit; + } + const role = stringField(payload, "role"); + if (role !== undefined) { + return role; + } + return "line"; +} + +function usageRecords( + callId: string, + values: readonly unknown[], + frames: readonly ParsedConnectFrame[], +): UsageRecord[] { + const records: UsageRecord[] = []; + const seen = new Set(); + const pushUsage = (usage: Record): void => { + const key = canonicalJson(usage); + if (seen.has(key)) { + return; + } + seen.add(key); + const record = usageRecordFromUsage(callId, records.length, usage); + if (record !== undefined) { + records.push(record); + } + }; + + for (const value of values) { + for (const usage of collectUsageObjects(value)) { + pushUsage(usage); + } + } + for (const usage of collectProtoUsageObjects(frames)) { + pushUsage(usage); + } + return records; +} + +function collectProtoUsageObjects(frames: readonly ParsedConnectFrame[]): Record[] { + const usage: Record[] = []; + for (const frame of frames) { + if (frame.proto === undefined) { + continue; + } + for (const candidate of collectCursorWireUsageCandidates(frame.proto)) { + usage.push({ + inputTokens: candidate.inputTokens, + outputTokens: candidate.outputTokens, + cacheReadTokens: candidate.cacheReadTokens, + cacheWriteTokens: candidate.cacheWriteTokens, + totalTokens: sumNumbers([candidate.inputTokens, candidate.outputTokens, candidate.cacheReadTokens, candidate.cacheWriteTokens]), + raw_protobuf: { + frameIndex: frame.index, + path: candidate.path, + wireInputTokens: candidate.wireInputTokens, + fieldNumbers: { + wireInputTokens: 1, + outputTokens: 2, + cacheReadTokens: 3, + cacheWriteTokens: 4, + }, + }, + }); + } + } + return usage; +} + +function collectCursorWireUsageCandidates(message: DecodedProtoMessage): CursorWireUsageCandidate[] { + const candidates: CursorWireUsageCandidate[] = []; + + const direct = cursorWireUsageCandidate(message); + if (direct !== undefined) { + candidates.push(direct); + } + for (const field of message.fields) { + if (field.nested !== undefined) { + candidates.push(...collectCursorWireUsageCandidates(field.nested)); + } + } + return candidates; +} + +function cursorWireUsageCandidate(message: DecodedProtoMessage): CursorWireUsageCandidate | undefined { + if (!message.path.endsWith(".1.14")) { + return undefined; + } + + const values = new Map(); + let scalarNumericFieldCount = 0; + for (const field of message.fields) { + if (field.wireType !== 0 || typeof field.value !== "number" || !Number.isInteger(field.value)) { + continue; + } + scalarNumericFieldCount += 1; + if (!values.has(field.fieldNumber)) { + values.set(field.fieldNumber, field.value); + } + } + + const wireInputTokens = values.get(1); + const outputTokens = values.get(2); + const cacheReadTokens = values.get(3) ?? 0; + const cacheWriteTokens = values.get(4) ?? 0; + if ( + !plausibleTokenCount(wireInputTokens) + || !plausibleTokenCount(outputTokens) + || !plausibleTokenCount(cacheReadTokens) + || !plausibleTokenCount(cacheWriteTokens) + || scalarNumericFieldCount > 4 + || wireInputTokens < cacheReadTokens + cacheWriteTokens + ) { + return undefined; + } + if (wireInputTokens === 0 && outputTokens === 0) { + return undefined; + } + + return { + path: message.path, + inputTokens: wireInputTokens - cacheReadTokens - cacheWriteTokens, + outputTokens, + cacheReadTokens, + cacheWriteTokens, + wireInputTokens, + }; +} + +function plausibleTokenCount(value: number | undefined): value is number { + return value !== undefined + && Number.isInteger(value) + && value >= 0 + && value <= 100_000_000; +} + +function collectUsageObjects(value: unknown): Record[] { + const usage: Record[] = []; + const visit = (candidate: unknown): void => { + if (Array.isArray(candidate)) { + candidate.forEach(visit); + return; + } + if (!isRecord(candidate)) { + return; + } + if (hasUsageFields(candidate)) { + usage.push(candidate); + } + for (const [key, child] of Object.entries(candidate)) { + if (key.toLowerCase().includes("usage") && isRecord(child) && hasUsageFields(child)) { + usage.push(child); + } + if (Array.isArray(child) || isRecord(child)) { + visit(child); + } + } + }; + visit(value); + return usage; +} + +function hasUsageFields(value: Record): boolean { + return firstNumber(value, [ + "inputTokens", + "input_tokens", + "promptTokens", + "prompt_tokens", + "outputTokens", + "output_tokens", + "completionTokens", + "completion_tokens", + "totalTokens", + "total_tokens", + ]) !== undefined; +} + +function usageRecordFromUsage(callId: string, index: number, usage: Record): UsageRecord | undefined { + const inputTokens = firstNumber(usage, ["inputTokens", "input_tokens", "promptTokens", "prompt_tokens"]); + const outputTokens = firstNumber(usage, ["outputTokens", "output_tokens", "completionTokens", "completion_tokens"]); + const cacheReadTokens = firstNumber(usage, ["cacheReadTokens", "cacheReadInputTokens", "cachedInputTokens", "cache_read_tokens", "cache_read_input_tokens"]); + const cacheWriteTokens = firstNumber(usage, ["cacheWriteTokens", "cacheWriteInputTokens", "cacheCreationInputTokens", "cache_write_tokens", "cache_creation_input_tokens"]); + const totalTokens = firstNumber(usage, ["totalTokens", "total_tokens"]) + ?? sumNumbers([inputTokens, outputTokens, cacheReadTokens, cacheWriteTokens]); + if ([inputTokens, outputTokens, cacheReadTokens, cacheWriteTokens, totalTokens].every((value) => value === undefined)) { + return undefined; + } + return { + id: stableId("usage", callId, String(index)), + call_id: callId, + source: "provider-reported", + raw: usage, + ...optionalIntegerField("input_tokens", inputTokens), + ...optionalIntegerField("output_tokens", outputTokens), + ...optionalIntegerField("cache_read_tokens", cacheReadTokens), + ...optionalIntegerField("cache_write_tokens", cacheWriteTokens), + ...optionalIntegerField("total_tokens", totalTokens), + }; +} + +function cursorModelId(existing: string | undefined, body: Pick & Partial>): string | undefined { + if (existing !== undefined && existing !== "cursor" && existing !== "unknown") { + return existing; + } + for (const value of structuredValues({ ...body, text: undefined })) { + const model = firstStringDeep(value, ["model", "model_id", "modelName", "modelDisplayName", "selectedModel"]); + if (model !== undefined) { + return model; + } + } + return modelIdFromText(body.modelText) ?? modelIdFromText(body.text) ?? modelIdFromText(body.binaryText) ?? existing; +} + +function modelIdFromText(text: string | undefined): string | undefined { + if (text === undefined) { + return undefined; + } + return /\bcomposer-2\.5(?:-fast)?\b/i.exec(text)?.[0] + ?? /\bComposer\s+2\.5(?:\s+Fast)?\b/i.exec(text)?.[0]; +} + +function firstStringDeep(value: unknown, keys: readonly string[]): string | undefined { + const wanted = new Set(keys.map((key) => key.toLowerCase())); + const visit = (candidate: unknown): string | undefined => { + if (Array.isArray(candidate)) { + for (const item of candidate) { + const found = visit(item); + if (found !== undefined) { + return found; + } + } + return undefined; + } + if (!isRecord(candidate)) { + return undefined; + } + for (const [key, child] of Object.entries(candidate)) { + if (wanted.has(key.toLowerCase()) && typeof child === "string" && child.length > 0) { + return child; + } + } + for (const child of Object.values(candidate)) { + const found = visit(child); + if (found !== undefined) { + return found; + } + } + return undefined; + }; + return visit(value); +} + +function collectTextCandidates(value: unknown, rootPath = "$"): TextCandidate[] { + const candidates: TextCandidate[] = []; + const visit = (candidate: unknown, path: string, key: string): void => { + if (typeof candidate === "string") { + candidates.push({ path, key, text: candidate }); + return; + } + if (Array.isArray(candidate)) { + candidate.forEach((item, index) => { + visit(item, `${path}[${String(index)}]`, key); + }); + return; + } + if (!isRecord(candidate)) { + return; + } + for (const [childKey, child] of Object.entries(candidate)) { + visit(child, `${path}.${childKey}`, childKey); + } + }; + visit(value, rootPath, ""); + return candidates; +} + +function textFromUnknown(value: unknown): string | undefined { + if (typeof value === "string") { + return value; + } + if (Array.isArray(value)) { + const parts = value.map(textFromUnknown).filter((part): part is string => part !== undefined && part.length > 0); + return parts.length === 0 ? undefined : parts.join("\n"); + } + if (isRecord(value)) { + const direct = firstString(value, ["text", "content", "message", "prompt", "value"]); + if (direct !== undefined) { + return direct; + } + return truncateText(canonicalJson(value), 8_000); + } + return undefined; +} + +function capturedBodyHashMaterial(body: ParsedBody): unknown { + if (body.json !== undefined) { + return body.json; + } + if (body.jsonLines.length > 0) { + return body.jsonLines; + } + return body.text ?? body.binaryText ?? ""; +} + +function decodeBase64Utf8(value: string | undefined): string | undefined { + if (value === undefined) { + return undefined; + } + try { + const text = Buffer.from(value, "base64").toString("utf8"); + return text.includes("\uFFFD") ? undefined : text; + } catch { + return undefined; + } +} + +function extractUsefulPrintableStrings(value: string | undefined): string | undefined { + if (value === undefined) { + return undefined; + } + let buffer: Buffer; + try { + buffer = Buffer.from(value, "base64"); + } catch { + return undefined; + } + return usefulPrintableTextFromDecoded(buffer.toString("latin1")); +} + +function usefulPrintableTextFromDecoded(text: string): string | undefined { + const strings = text.match(/[ -~]{4,}/g) ?? []; + const useful = strings + .map((part) => part.trim()) + .filter((part) => isUsefulBodyText(part)); + return useful.length === 0 ? undefined : useful.join("\n"); +} + +function cursorSemanticTextFromDecoded(text: string, direction: string | undefined): string | undefined { + const fieldText = extractJsonStringFields(text, ["text", "result"]) + .filter(isUsefulCursorSemanticText); + if (fieldText.length > 0) { + return uniqueStrings(fieldText).join("\n"); + } + if (direction === "response") { + return undefined; + } + if (/"role"\s*:\s*"(?:system|user|assistant)"/.test(text) || /|||System prompt/.test(text)) { + return undefined; + } + return usefulPrintableTextFromDecoded(text); +} + +function extractJsonStringFields(text: string, keys: readonly string[]): string[] { + const keyPattern = keys.map(escapeRegExp).join("|"); + const pattern = new RegExp(`"(?:${keyPattern})"\\s*:\\s*"((?:\\\\.|[^"\\\\])*)"`, "g"); + const values: string[] = []; + for (const match of text.matchAll(pattern)) { + const raw = match[1]; + if (raw === undefined) { + continue; + } + const parsed = parseJson(`"${raw}"`); + if (parsed.ok && typeof parsed.value === "string") { + values.push(parsed.value); + } + } + return values; +} + +function isUsefulCursorSemanticText(text: string): boolean { + const trimmed = text.trim(); + return isUsefulBodyText(trimmed) + && !trimmed.includes("") + && !trimmed.includes("") + && !trimmed.includes("") + && !trimmed.startsWith("You are an AI coding assistant") + && !/^(System prompt|Tool definitions|Rules|Skills|MCP|Subagent definitions|Summarized conversation|Conversation)$/.test(trimmed); +} + +function isUsefulBodyText(text: string): boolean { + const trimmed = text.trim(); + if (trimmed.length < 4) { + return false; + } + const controlCount = Array.from(trimmed).filter((char) => { + const code = char.charCodeAt(0); + return code < 32 && char !== "\n" && char !== "\r" && char !== "\t"; + }).length; + if (controlCount > Math.max(2, trimmed.length * 0.02)) { + return false; + } + if (/^[0-9a-f-]{16,}$/i.test(trimmed)) { + return false; + } + if (/^[A-Za-z0-9_-]{32,}$/.test(trimmed)) { + return false; + } + return /[A-Za-z]{3,}/.test(trimmed) && /\s|[.!?:"']|RCSPY/.test(trimmed); +} + +function escapeRegExp(value: string): string { + return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); +} + +function uniqueStrings(values: readonly string[]): string[] { + const seen = new Set(); + const unique: string[] = []; + for (const value of values) { + if (seen.has(value)) { + continue; + } + seen.add(value); + unique.push(value); + } + return unique; +} + +function createBlock(input: BlockInput): NormalizedBlock { + const material = input.json === undefined ? input.text ?? "" : canonicalJson(input.json); + return { + id: stableId("block", input.callId, input.direction, String(input.ordinal)), + call_id: input.callId, + direction: input.direction, + ordinal: input.ordinal, + kind: input.kind, + source: input.source, + char_size: material.length, + byte_size: Buffer.byteLength(material, "utf8"), + content_hash: sha256(material), + cache_marker: false, + ...(input.role === undefined ? {} : { role: input.role }), + ...(input.providerPath === undefined ? {} : { provider_path: input.providerPath }), + ...(input.text === undefined ? {} : { text: input.text }), + ...(input.json === undefined ? {} : { json: input.json }), + }; +} + +function rawPayload( + callId: string, + direction: RawPayloadRecord["direction"], + event: { + readonly headers: readonly (readonly [string, string])[]; + readonly body_text?: string | undefined; + readonly body_b64?: string | undefined; + readonly body_sha256?: string | undefined; + readonly body_encoding?: "aws-eventstream" | undefined; + }, +): RawPayloadRecord { + const contentType = headerValue(event.headers, "content-type"); + return { + id: stableId("raw", callId, direction), + call_id: callId, + direction, + ...(contentType === undefined ? {} : { content_type: contentType }), + ...(event.body_text === undefined ? {} : { body_text: event.body_text }), + ...(event.body_b64 === undefined ? {} : { body_b64: event.body_b64 }), + ...(event.body_sha256 === undefined ? {} : { body_sha256: event.body_sha256 }), + ...(event.body_encoding === undefined ? {} : { body_encoding: event.body_encoding }), + }; +} + +function headerValue(headers: readonly (readonly [string, string])[], name: string): string | undefined { + const lowerName = name.toLowerCase(); + return headers.find(([key]) => key.toLowerCase() === lowerName)?.[1]; +} + +function stringField(value: unknown, key: string): string | undefined { + if (!isRecord(value)) { + return undefined; + } + const child = value[key]; + return typeof child === "string" ? child : undefined; +} + +function firstString(value: Record, keys: readonly string[]): string | undefined { + for (const key of keys) { + const child = value[key]; + if (typeof child === "string" && child.length > 0) { + return child; + } + } + return undefined; +} + +function firstNumber(value: Record, keys: readonly string[]): number | undefined { + for (const key of keys) { + const child = value[key]; + if (typeof child === "number" && Number.isFinite(child)) { + return child; + } + } + return undefined; +} + +function optionalIntegerField(key: K, value: number | undefined): Record | Record { + return value === undefined ? {} : { [key]: Math.trunc(value) } as Record; +} + +function sumNumbers(values: readonly (number | undefined)[]): number | undefined { + const present = values.filter((value): value is number => value !== undefined); + return present.length === 0 ? undefined : present.reduce((total, value) => total + value, 0); +} + +function joinTextParts(parts: readonly string[]): string { + return parts.map((part) => part.trim()).filter(Boolean).join("\n"); +} + +function truncateText(value: string, maxLength: number): string { + return value.length <= maxLength ? value : `${value.slice(0, maxLength - 3)}...`; +} + +function isRecord(value: unknown): value is Record { + return value !== null && typeof value === "object" && !Array.isArray(value); +} + +function hashUnknown(value: unknown): string { + return sha256(canonicalJson(value)); +} + +function sha256(value: string): string { + return createHash("sha256").update(value).digest("hex"); +} + +function sha256Buffer(value: Buffer): string { + return createHash("sha256").update(value).digest("hex"); +} + +function stableId(prefix: string, ...parts: readonly string[]): string { + return [prefix, ...parts.map(idPart)].join("-"); +} + +function idPart(value: string): string { + const cleaned = value.replaceAll(/[^A-Za-z0-9_.-]+/g, "-").replaceAll(/^-+|-+$/g, ""); + return cleaned.length > 0 ? cleaned : sha256(value).slice(0, 16); +} + +function canonicalJson(value: unknown): string { + if (value === null || typeof value === "string" || typeof value === "boolean") { + return JSON.stringify(value); + } + if (typeof value === "number") { + return Number.isFinite(value) ? JSON.stringify(value) : "null"; + } + if (Array.isArray(value)) { + return `[${value.map(canonicalJson).join(",")}]`; + } + if (isRecord(value)) { + return `{${Object.keys(value).sort().map((key) => `${JSON.stringify(key)}:${canonicalJson(value[key])}`).join(",")}}`; + } + return "null"; +} diff --git a/src/spy/fixtures/README.md b/src/spy/fixtures/README.md index 37a2aaa..3644e72 100644 --- a/src/spy/fixtures/README.md +++ b/src/spy/fixtures/README.md @@ -23,3 +23,10 @@ Anthropic Messages-over-Bedrock fixture for Claude Code request/response normalization and compaction discovery. It models stable Claude Code system/tool context, prior-history replacement by a summary-like block, and provider token drops without including live AWS credentials or project data. + +`cursor-agent-composer-2.5.ndjson` is a redacted and stabilized capture from the +real `jmp` rootcell instance using Cursor Agent CLI against the Composer 2.5 +family with Cursor's HTTP/1.1 agent compatibility mode enabled. It preserves +the Cursor `AgentService/RunSSE` Connect-proto request shape, SSE response +shape, redacted auth headers, first/resumed marker prompts, raw payload storage, +and provider usage metadata. diff --git a/src/spy/fixtures/cursor-agent-composer-2.5.ndjson b/src/spy/fixtures/cursor-agent-composer-2.5.ndjson new file mode 100644 index 0000000..3ce1c33 --- /dev/null +++ b/src/spy/fixtures/cursor-agent-composer-2.5.ndjson @@ -0,0 +1,4 @@ +{"version":1,"ts":1779848049,"direction":"request","flow_id":"fixture-cursor-alpha-http1","provider":"cursor","operation":"RunSSE","model_id":"cursor","host":"api2.cursor.sh","method":"POST","path":"/agent.v1.AgentService/RunSSE","headers":[["authorization","[redacted]"],["connect-accept-encoding","gzip,br"],["connect-protocol-version","1"],["content-type","application/connect+proto"],["user-agent","connect-es/1.6.1"],["x-cursor-client-type","cli"],["x-cursor-client-version","cli-2026.05.07-42ddaca"],["x-cursor-streaming","true"],["x-ghost-mode","true"],["x-request-id","fixture-request-alpha"],["Host","api2.cursor.sh"],["Connection","close"],["Transfer-Encoding","chunked"]],"body_text":"\u0000\u0000\u0000\u0000\u001a\n$fixture-request-alpha"} +{"version":1,"ts":1779848051,"direction":"response","flow_id":"fixture-cursor-alpha-http1","provider":"cursor","operation":"RunSSE","model_id":"cursor","host":"api2.cursor.sh","method":"POST","path":"/agent.v1.AgentService/RunSSE","headers":[["Content-Type","text/event-stream"],["connect-content-encoding","gzip"],["x-cursor-server-region","us-east-1"]],"status_code":200,"reason":"OK","request_headers":[["authorization","[redacted]"],["content-type","application/connect+proto"],["x-request-id","fixture-request-alpha"]],"body_text":"data: {\"type\":\"assistant\",\"message\":{\"role\":\"assistant\",\"content\":[{\"type\":\"redacted-reasoning\",\"providerOptions\":{\"cursor\":{\"modelName\":\"composer-2.5-fast\"}}},{\"type\":\"text\",\"text\":\"RCSPY-CURSOR-ALPHA-HTTP1\"}]}}\n\ndata: {\"type\":\"result\",\"usage\":{\"inputTokens\":7918,\"outputTokens\":52,\"cacheReadTokens\":2848,\"cacheWriteTokens\":0},\"result\":\"RCSPY-CURSOR-ALPHA-HTTP1\"}\n\n"} +{"version":1,"ts":1779848068,"direction":"request","flow_id":"fixture-cursor-beta-http1","provider":"cursor","operation":"RunSSE","model_id":"cursor","host":"api2.cursor.sh","method":"POST","path":"/agent.v1.AgentService/RunSSE","headers":[["authorization","[redacted]"],["connect-accept-encoding","gzip,br"],["connect-protocol-version","1"],["content-type","application/connect+proto"],["user-agent","connect-es/1.6.1"],["x-cursor-client-type","cli"],["x-cursor-client-version","cli-2026.05.07-42ddaca"],["x-cursor-streaming","true"],["x-ghost-mode","true"],["x-request-id","fixture-request-beta"],["Host","api2.cursor.sh"],["Connection","close"],["Transfer-Encoding","chunked"]],"body_text":"\u0000\u0000\u0000\u0000\u0019\n$fixture-request-beta"} +{"version":1,"ts":1779848070,"direction":"response","flow_id":"fixture-cursor-beta-http1","provider":"cursor","operation":"RunSSE","model_id":"cursor","host":"api2.cursor.sh","method":"POST","path":"/agent.v1.AgentService/RunSSE","headers":[["Content-Type","text/event-stream"],["connect-content-encoding","gzip"],["x-cursor-server-region","us-east-1"]],"status_code":200,"reason":"OK","request_headers":[["authorization","[redacted]"],["content-type","application/connect+proto"],["x-request-id","fixture-request-beta"]],"body_text":"data: {\"type\":\"assistant\",\"message\":{\"role\":\"assistant\",\"content\":[{\"type\":\"redacted-reasoning\",\"providerOptions\":{\"cursor\":{\"modelName\":\"composer-2.5-fast\"}}},{\"type\":\"text\",\"text\":\"RCSPY-CURSOR-BETA-HTTP1\"}]}}\n\ndata: {\"type\":\"result\",\"usage\":{\"inputTokens\":61,\"outputTokens\":44,\"cacheReadTokens\":10784,\"cacheWriteTokens\":0},\"result\":\"RCSPY-CURSOR-BETA-HTTP1\"}\n\n"} diff --git a/src/spy/migrations.ts b/src/spy/migrations.ts index e99bf1a..06099fd 100644 --- a/src/spy/migrations.ts +++ b/src/spy/migrations.ts @@ -240,6 +240,33 @@ CREATE INDEX IF NOT EXISTS token_count_call_idx ON token_count(call_id); CREATE INDEX IF NOT EXISTS token_count_subject_idx ON token_count(subject_type, call_id, direction, block_id, kind); +`, + }, + { + version: 5, + name: "provider response stream chunk captures", + sql: ` +CREATE TABLE IF NOT EXISTS stream_chunk_capture ( + id TEXT PRIMARY KEY, + call_id TEXT NOT NULL REFERENCES provider_call(id) ON DELETE CASCADE, + flow_id TEXT NOT NULL, + chunk_index INTEGER NOT NULL, + observed_at REAL NOT NULL, + host TEXT NOT NULL, + method TEXT NOT NULL, + path TEXT NOT NULL, + headers_json TEXT NOT NULL, + body_b64 TEXT NOT NULL, + body_sha256 TEXT, + body_encoding TEXT, + content_type TEXT, + UNIQUE(call_id, chunk_index) +); + +CREATE INDEX IF NOT EXISTS stream_chunk_capture_call_idx + ON stream_chunk_capture(call_id, chunk_index); +CREATE INDEX IF NOT EXISTS stream_chunk_capture_flow_idx + ON stream_chunk_capture(flow_id, chunk_index); `, }, ]; diff --git a/src/spy/providers.ts b/src/spy/providers.ts index 2fb708f..cc3192e 100644 --- a/src/spy/providers.ts +++ b/src/spy/providers.ts @@ -3,6 +3,11 @@ import { normalizeBedrockRequest, normalizeBedrockResponse, } from "./bedrock.ts"; +import { + cursorCallIdForFlow, + normalizeCursorRequest, + normalizeCursorResponse, +} from "./cursor.ts"; import type { NormalizedBlock, ProviderId, @@ -46,6 +51,12 @@ const ADAPTERS: Readonly> = { normalizeRequest: normalizeBedrockRequest, normalizeResponse: normalizeBedrockResponse, }, + cursor: { + id: "cursor", + callIdForFlow: cursorCallIdForFlow, + normalizeRequest: normalizeCursorRequest, + normalizeResponse: normalizeCursorResponse, + }, }; export function providerAdapterFor(provider: ProviderId): SpyProviderAdapter { diff --git a/src/spy/schemas.test.ts b/src/spy/schemas.test.ts index 50871f1..9dcf1b2 100644 --- a/src/spy/schemas.test.ts +++ b/src/spy/schemas.test.ts @@ -114,8 +114,8 @@ describe("spy sqlite migrations", () => { const db = new Database(":memory:"); try { applySpyMigrations(db); - expect(currentSpySchemaVersion()).toBe(4); - expect(db.query("SELECT MAX(version) AS version FROM schema_migration").get()).toEqual({ version: 4 }); + expect(currentSpySchemaVersion()).toBe(5); + expect(db.query("SELECT MAX(version) AS version FROM schema_migration").get()).toEqual({ version: 5 }); db.query(` INSERT INTO provider_call ( diff --git a/src/spy/schemas.ts b/src/spy/schemas.ts index c5943f2..11d64f2 100644 --- a/src/spy/schemas.ts +++ b/src/spy/schemas.ts @@ -9,7 +9,7 @@ const CapturedBodyShape = { body_encoding: z.enum(["aws-eventstream"]).optional(), } as const; -export const ProviderIdSchema = z.enum(["bedrock"]); +export const ProviderIdSchema = z.enum(["bedrock", "cursor"]); export type ProviderId = z.infer; diff --git a/src/spy/service.test.ts b/src/spy/service.test.ts index 34e6dce..874bde3 100644 --- a/src/spy/service.test.ts +++ b/src/spy/service.test.ts @@ -15,7 +15,7 @@ import { SseEventPayloadSchemas, type SpyCallDetail, } from "./api-contracts.ts"; -import { SpoolEventSchema, type SpoolEvent } from "./schemas.ts"; +import { SpoolEventSchema, SpoolRequestEventSchema, SpoolResponseEventSchema, type SpoolEvent, type SpoolRequestEvent, type SpoolResponseEvent } from "./schemas.ts"; import { spyServiceConfigFromEnv, startSpyService, type SpyServiceHandle } from "./service.ts"; import type { BedrockTokenCounter, BedrockTokenCountInput } from "./bedrock-token-count.ts"; @@ -124,6 +124,43 @@ function writeSpoolEvents(spoolDir: string, events: readonly SpoolEvent[]): void }); } +function cursorRequest(flowId: string): SpoolRequestEvent { + return SpoolRequestEventSchema.parse({ + version: 1, + ts: 1779497300, + direction: "request", + flow_id: flowId, + provider: "cursor", + operation: "StreamUnifiedChat", + model_id: "Composer 2.5", + host: "api2.cursor.sh", + method: "POST", + path: "/aiserver.v1.AiService/StreamUnifiedChat", + headers: [["content-type", "application/json"]], + body_text: JSON.stringify({ model: "Composer 2.5", prompt: "RCSPY-CURSOR-SERVICE" }), + }); +} + +function cursorResponse(flowId: string): SpoolResponseEvent { + return SpoolResponseEventSchema.parse({ + version: 1, + ts: 1779497301, + direction: "response", + flow_id: flowId, + provider: "cursor", + operation: "StreamUnifiedChat", + model_id: "Composer 2.5", + host: "api2.cursor.sh", + method: "POST", + path: "/aiserver.v1.AiService/StreamUnifiedChat", + headers: [["content-type", "application/json"]], + status_code: 200, + reason: "OK", + request_headers: [["content-type", "application/json"]], + body_text: JSON.stringify({ result: { text: "cursor-service-ok" } }), + }); +} + async function jsonAs(response: Response, schema: ZodType): Promise { const parsed: unknown = await response.json(); return schema.parse(parsed); @@ -224,6 +261,13 @@ describe("spy web service", () => { const unknownOperation = await jsonAs(unknownOperationResponse, SpyCallSummaryPageSchema); expect(unknownOperation.items).toHaveLength(0); + handle.store.persistRequest(cursorRequest("fixture-cursor-service")); + expect(handle.store.persistResponse(cursorResponse("fixture-cursor-service"))).toBe(true); + const cursorCallsResponse = await fetch(`${handle.url}/api/calls?provider=cursor&model_id=${encodeURIComponent("Composer 2.5")}&status=complete`); + const cursorCalls = await jsonAs(cursorCallsResponse, SpyCallSummaryPageSchema); + expect(cursorCalls.items).toHaveLength(1); + expect(cursorCalls.items[0]?.call.provider).toBe("cursor"); + const filteredSearchResponse = await fetch(`${handle.url}/api/search?q=${encodeURIComponent("Fixture capture")}&since=1779496808&provider=bedrock&model_id=${encodeURIComponent("us.anthropic.claude-sonnet-4-6")}&operation=converse-stream&status=complete&limit=1`); const filteredSearch = await jsonAs(filteredSearchResponse, SpyCallSummaryPageSchema); expect(filteredSearch.items.map((item) => item.call.id)).toEqual(["call-fixture-flow-tool-result"]); @@ -392,6 +436,44 @@ describe("spy web service", () => { }); }); + test("returns a Cursor-specific unavailable token record without calling Bedrock CountTokens", async () => { + const counter = new FakeTokenCounter(77); + const { handle } = createTestService({ + tokenCounter: counter, + tokenCountMode: "provider", + }); + handle.store.persistRequest(cursorRequest("fixture-cursor-token-count")); + expect(handle.store.persistResponse(cursorResponse("fixture-cursor-token-count"))).toBe(true); + + const callId = "call-cursor-fixture-cursor-token-count"; + const detail = await jsonAs(await fetch(`${handle.url}/api/calls/${encodeURIComponent(callId)}`), SpyCallDetailSchema); + expect(detail.tokenCounts.some((record) => + record.subjectType === "call" + && record.direction === "request" + && record.provenance === "unavailable" + && record.error === "provider token counting is currently available only for Bedrock captures; Cursor request/block token recounting is not implemented" + )).toBe(true); + expect(detail.tokenCounts.some((record) => record.subjectType === "block" && record.provenance === "unavailable")).toBe(true); + expect(counter.inputs).toHaveLength(0); + + const response = await jsonAs(await fetch(`${handle.url}/api/token-count`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + mode: "provider", + subjects: [{ type: "call", callId, direction: "request" }], + }), + }), SpyTokenCountResponseSchema); + + expect(counter.inputs).toHaveLength(0); + expect(response.records[0]).toMatchObject({ + subjectType: "call", + provenance: "unavailable", + tokens: null, + error: "provider token counting is currently available only for Bedrock captures; Cursor request/block token recounting is not implemented", + }); + }); + test("returns raw payloads only when raw storage is enabled", async () => { const { handle, spoolDir } = createTestService({ storeRaw: true }); writeSpoolEvents(spoolDir, fixtureEvents()); diff --git a/src/spy/service.ts b/src/spy/service.ts index a1e1c49..aaa887b 100644 --- a/src/spy/service.ts +++ b/src/spy/service.ts @@ -25,7 +25,7 @@ import { type SpyStoreOptions, type SpyStreamEventsOptions, } from "./store.ts"; -import { ProviderCallStatusSchema } from "./schemas.ts"; +import { ProviderCallStatusSchema, ProviderIdSchema, type ProviderId } from "./schemas.ts"; import { unavailableTokenRecord } from "./tokens.ts"; const DEFAULT_BIND = "127.0.0.1"; @@ -40,6 +40,8 @@ const DEFAULT_RETENTION_INTERVAL_MS = 15 * 60 * 1000; const DEFAULT_INGEST_BATCH_LIMIT = 100; const DEFAULT_TOKEN_COUNT_MODE: SpyTokenCountMode = "provider"; const TOKEN_COUNT_CONCURRENCY = 8; +const NON_BEDROCK_TOKEN_COUNT_ERROR = + "provider token counting is currently available only for Bedrock captures; Cursor request/block token recounting is not implemented"; const ClearRequestSchema = z.object({ confirm: z.literal(true), @@ -240,8 +242,11 @@ class SpyHttpService { if (detail === null) { throw new HttpError(404, "call not found"); } - this.startBackgroundTokenCounts(detail); - return jsonResponse(detail); + if (detail.summary.call.provider === "bedrock") { + this.startBackgroundTokenCounts(detail); + return jsonResponse(detail); + } + return jsonResponse(this.withImmediateUnavailableTokenCounts(detail)); } return jsonError(404, "not found"); @@ -323,6 +328,9 @@ class SpyHttpService { return cached; } } + if (prepared.provider !== "bedrock") { + return unavailableTokenRecord(prepared.base, NON_BEDROCK_TOKEN_COUNT_ERROR); + } if (this.tokenCounter === undefined) { return unavailableTokenRecord(prepared.base, "provider token counting is not configured"); } @@ -375,6 +383,23 @@ class SpyHttpService { }); } + private withImmediateUnavailableTokenCounts(detail: SpyCallDetail): SpyCallDetail { + const records: SpyTokenCountRecord[] = []; + for (const subject of missingTokenCountSubjects(detail)) { + const prepared = this.store.prepareTokenCountSubject(subject); + if (prepared !== null && prepared.provider !== "bedrock") { + records.push(unavailableTokenRecord(prepared.base, NON_BEDROCK_TOKEN_COUNT_ERROR)); + } + } + if (records.length === 0) { + return detail; + } + return { + ...detail, + tokenCounts: [...detail.tokenCounts, ...records], + }; + } + private serveStatic(path: string, requestHeaders: Headers): Response { const staticDir = this.config.staticDir; if (staticDir === undefined) { @@ -570,15 +595,16 @@ function callFilters(url: URL): Pick): SpoolRequestEvent { + return SpoolRequestEventSchema.parse({ + version: 1, + ts, + direction: "request", + flow_id: flowId, + provider: "cursor", + operation: "StreamUnifiedChat", + model_id: "Composer 2.5", + host: "api2.cursor.sh", + method: "POST", + path: "/aiserver.v1.AiService/StreamUnifiedChat", + headers: [["content-type", "application/json"]], + body_text: JSON.stringify(body), + }); +} + +function syntheticCursorResponse(flowId: string, ts: number, body: Record): SpoolResponseEvent { + return SpoolResponseEventSchema.parse({ + version: 1, + ts, + direction: "response", + flow_id: flowId, + provider: "cursor", + operation: "StreamUnifiedChat", + model_id: "Composer 2.5", + host: "api2.cursor.sh", + method: "POST", + path: "/aiserver.v1.AiService/StreamUnifiedChat", + headers: [["content-type", "application/json"]], + status_code: 200, + reason: "OK", + request_headers: [["content-type", "application/json"]], + body_text: JSON.stringify(body), + }); +} + +function syntheticCursorStreamChunk(flowId: string, ts: number, chunkIndex: number, body: Buffer): SpoolStreamChunkEvent { + return SpoolStreamChunkEventSchema.parse({ + version: 1, + ts, + direction: "stream-chunk", + flow_id: flowId, + provider: "cursor", + operation: "StreamUnifiedChat", + model_id: "Composer 2.5", + host: "api2.cursor.sh", + method: "POST", + path: "/aiserver.v1.AiService/StreamUnifiedChat", + headers: [["content-type", "application/connect+proto"]], + chunk_index: chunkIndex, + body_b64: body.toString("base64"), + }); +} + +function connectFrame(payload: Buffer): Buffer { + const header = Buffer.alloc(5); + header.writeUInt32BE(payload.length, 1); + return Buffer.concat([header, payload]); +} + +function protoVarint(value: number): Buffer { + const bytes: number[] = []; + let remaining = value; + while (remaining >= 0x80) { + bytes.push((remaining & 0x7f) | 0x80); + remaining = Math.floor(remaining / 0x80); + } + bytes.push(remaining); + return Buffer.from(bytes); +} + +function protoVarintField(fieldNumber: number, value: number): Buffer { + return Buffer.concat([protoVarint(fieldNumber * 8), protoVarint(value)]); +} + +function protoMessageField(fieldNumber: number, message: Buffer): Buffer { + return Buffer.concat([protoVarint(fieldNumber * 8 + 2), protoVarint(message.length), message]); +} + function responseVariant( event: SpoolResponseEvent, overrides: Partial>, @@ -263,6 +346,126 @@ describe("spy SQLite store", () => { } }); + test("ingests Cursor provider request and response records", () => { + const { dbPath, store } = createTestStore({ storeRaw: true }); + try { + const request = syntheticCursorRequest("fixture-cursor-store", 3000, { + model: "Composer 2.5", + messages: [ + { role: "user", content: "RCSPY-CURSOR-ALPHA" }, + { role: "user", content: "RCSPY-CURSOR-BETA" }, + ], + }); + const response = syntheticCursorResponse("fixture-cursor-store", 3001, { + result: { text: "cursor-store-ok" }, + usage: { inputTokens: 50, outputTokens: 5 }, + }); + const placeholderRequest = { ...request, model_id: "cursor" } satisfies SpoolRequestEvent; + + store.persistRequest(placeholderRequest); + expect(store.persistResponse(response)).toBe(true); + + const callId = cursorCallIdForFlow(request.flow_id); + expect(statusForCall(dbPath, callId)).toBe("complete"); + const detail = requiredDetail(store, callId); + expect(detail.summary.call.provider).toBe("cursor"); + expect(detail.summary.call.model_id).toBe("Composer 2.5"); + expect(detail.blocks.map((block) => block.text ?? "").join("\n")).toContain("RCSPY-CURSOR-BETA"); + expect(detail.blocks.map((block) => block.text ?? "").join("\n")).toContain("cursor-store-ok"); + expect(detail.rawPayloads).toHaveLength(2); + expect(store.listCallSummaries({ provider: "cursor" }).items).toHaveLength(1); + expect(store.listCallSummaries({ provider: "bedrock" }).items).toHaveLength(0); + } finally { + store.close(); + } + }); + + test("backfills Cursor request context found in response streams", () => { + const { store } = createTestStore(); + try { + const request = syntheticCursorRequest("fixture-cursor-response-context", 3050, {}); + const response = syntheticCursorResponse("fixture-cursor-response-context", 3051, { + events: [ + { role: "system", content: "Cursor system prompt from response stream" }, + { role: "user", content: "\nWorkspace Path: /tmp/cursor\n" }, + { role: "user", content: "\nCursor current request from response stream\n" }, + { role: "assistant", content: [{ type: "text", text: "cursor-response-context-ok" }] }, + ], + }); + + store.persistRequest(request); + expect(store.persistResponse(response)).toBe(true); + + const detail = requiredDetail(store, cursorCallIdForFlow(request.flow_id)); + const requestBlocks = detail.blocks.filter((block) => block.direction === "request"); + expect(requestBlocks.some((block) => + block.kind === "harness-system-context" + && block.text?.includes("Cursor system prompt from response stream") === true + )).toBe(true); + expect(requestBlocks.some((block) => + block.kind === "current-user-input" + && block.text === "Cursor current request from response stream" + )).toBe(true); + expect(detail.requestComposition.sections.find((section) => section.kind === "current-user-input")?.present).toBe(true); + } finally { + store.close(); + } + }); + + test("reassembles Cursor stream chunks into raw wire events and derived usage when raw payload storage is off", () => { + const { dbPath, spoolDir, store } = createTestStore(); + try { + const flowId = "fixture-cursor-stream-chunks"; + const request = syntheticCursorRequest(flowId, 3060, { + model: "Composer 2.5", + prompt: "RCSPY-CURSOR-STREAM-CHUNK", + }); + const usageMessage = Buffer.concat([ + protoVarintField(1, 10779), + protoVarintField(2, 52), + protoVarintField(3, 2848), + protoVarintField(4, 0), + ]); + const responsePayload = protoMessageField(1, protoMessageField(14, usageMessage)); + const responseBytes = connectFrame(responsePayload); + const chunkOne = syntheticCursorStreamChunk(flowId, 3061, 0, responseBytes.subarray(0, 4)); + const chunkTwo = syntheticCursorStreamChunk(flowId, 3062, 1, responseBytes.subarray(4)); + const response = syntheticCursorResponse(flowId, 3063, {}); + + writeSpoolEvents(spoolDir, [request, chunkOne, chunkTwo, response]); + + expect(store.ingestSpoolBatch()).toMatchObject({ ingested: 4, deferred: 0 }); + + const detail = requiredDetail(store, cursorCallIdForFlow(flowId)); + expect(detail.summary.usage).toMatchObject({ + inputTokens: 7931, + outputTokens: 52, + cacheReadTokens: 2848, + cacheWriteTokens: 0, + totalTokens: 10831, + }); + expect(detail.usageRecords[0]?.raw).toMatchObject({ + raw_protobuf: { + path: "$frame[0].1.14", + wireInputTokens: 10779, + }, + }); + const wireEvents = store.getStreamEvents(detail.summary.call.id).items; + const wireEvent = wireEvents.find((event) => event.event_type === "connect-protobuf-frame"); + expect(wireEvent?.payload).toMatchObject({ + format: "connect", + frameB64: responseBytes.toString("base64"), + payloadB64: responsePayload.toString("base64"), + }); + expect(JSON.stringify(wireEvent?.payload)).toContain("\"fieldNumber\":14"); + expect(detail.rawPayloads).toHaveLength(0); + expect(countRows(dbPath, "raw_payload")).toBe(0); + expect(countRows(dbPath, "stream_chunk_capture")).toBe(2); + } finally { + store.close(); + } + }); + test("filters call summaries and normalized-text search by provider call fields", () => { const { store } = createTestStore(); try { diff --git a/src/spy/store.ts b/src/spy/store.ts index 5e15048..8a0bf4e 100644 --- a/src/spy/store.ts +++ b/src/spy/store.ts @@ -1,3 +1,4 @@ +import { createHash } from "node:crypto"; import { existsSync, mkdirSync, readdirSync, readFileSync, rmSync, statSync } from "node:fs"; import { dirname, join } from "node:path"; import { Database } from "bun:sqlite"; @@ -6,18 +7,15 @@ import type { SpyTokenCountRecord, SpyTokenCountSubject, } from "./api-contracts.ts"; -import { - bedrockCallIdForFlow, - normalizeBedrockRequest, - normalizeBedrockResponse, -} from "./bedrock.ts"; import { detectCompaction } from "./compaction.ts"; import { applySpyMigrations, currentSpySchemaVersion } from "./migrations.ts"; +import { providerAdapterFor } from "./providers.ts"; import { HttpEventRecordSchema, NormalizedBlockSchema, ProviderCallSchema, RawPayloadRecordSchema, + SpoolResponseEventSchema, SpoolEventSchema, StreamEventSchema, UsageRecordSchema, @@ -200,6 +198,7 @@ export interface SpyCallDetail { export interface SpyPreparedTokenCountSubject { readonly subject: SpyTokenCountSubject; + readonly provider: ProviderCall["provider"]; readonly base: TokenRecordBase; readonly text: string; readonly requestBodyText?: string | undefined; @@ -262,7 +261,7 @@ type PragmaRow = Readonly>; interface ProviderCallRow { readonly id: string; - readonly provider: "bedrock"; + readonly provider: ProviderCall["provider"]; readonly operation: string; readonly model_id: string; readonly status: ProviderCall["status"]; @@ -346,6 +345,22 @@ interface RawPayloadRow { readonly body_encoding: "aws-eventstream" | null; } +interface StreamChunkCaptureRow { + readonly id: string; + readonly call_id: string; + readonly flow_id: string; + readonly chunk_index: number; + readonly observed_at: number; + readonly host: string; + readonly method: string; + readonly path: string; + readonly headers_json: string; + readonly body_b64: string; + readonly body_sha256: string | null; + readonly body_encoding: "aws-eventstream" | null; + readonly content_type: string | null; +} + interface TokenCountRow { readonly id: string; readonly call_id: string; @@ -659,6 +674,7 @@ LIMIT ? }; return { subject, + provider: row.provider, base, text, requestBodyText: this.requestBodyTextForCall(subject.callId), @@ -682,6 +698,7 @@ LIMIT ? }; return { subject, + provider: row.provider, base, text, cacheKey: tokenCacheKey(base), @@ -706,6 +723,7 @@ LIMIT ? }; return { subject, + provider: row.provider, base, text, cacheKey: tokenCacheKey(base), @@ -723,6 +741,7 @@ LIMIT ? }; return { subject, + provider: row.provider, base, text: subject.text, cacheKey: tokenCacheKey(base), @@ -878,7 +897,9 @@ ON CONFLICT(cache_key) DO UPDATE SET } else if (event.direction === "error") { this.persistErrorEventUnlocked(event); } else { - this.persistStreamChunkEventUnlocked(event); + if (!this.persistStreamChunkEventUnlocked(event)) { + return "deferred"; + } } } catch (error) { this.recordIngestError(path, error); @@ -890,7 +911,8 @@ ON CONFLICT(cache_key) DO UPDATE SET } private persistRequestUnlocked(event: SpoolRequestEvent): void { - const normalized = normalizeBedrockRequest(event, { storeRaw: this.storeRaw }); + const adapter = providerAdapterFor(event.provider); + const normalized = adapter.normalizeRequest(event, { storeRaw: this.storeRaw }); const httpEvent = httpEventFromRequest(event, normalized.call.id); this.db.transaction(() => { this.upsertPendingCall(normalized.call); @@ -903,17 +925,26 @@ ON CONFLICT(cache_key) DO UPDATE SET } private persistResponseUnlocked(event: SpoolResponseEvent): boolean { - const callId = bedrockCallIdForFlow(event.flow_id); + const adapter = providerAdapterFor(event.provider); + const callId = adapter.callIdForFlow(event.flow_id); if (!this.callExists(callId)) { return false; } - const normalized = normalizeBedrockResponse(event, { storeRaw: this.storeRaw }); + const capturedEvent = this.responseEventWithCapturedStreamChunks(event, callId); + const normalized = adapter.normalizeResponse(capturedEvent, { storeRaw: this.storeRaw }); const httpEvent = httpEventFromResponse(event, normalized.call.id); + const requestBlocks = normalized.blocks.filter((block) => block.direction === "request"); + const responseBlocks = normalized.blocks.filter((block) => block.direction === "response"); this.db.transaction(() => { this.updateResponseCall(normalized.call); this.replaceHttpEvent(httpEvent); - this.replaceBlocks(normalized.call.id, "response", normalized.blocks); + if (requestBlocks.length > 0) { + const existingRequestBlocks = this.blocksForCall(normalized.call.id, "request") + .filter((block) => !isResponseDerivedRequestBlock(block)); + this.replaceBlocks(normalized.call.id, "request", [...existingRequestBlocks, ...requestBlocks]); + } + this.replaceBlocks(normalized.call.id, "response", responseBlocks); this.replaceUsageRecords(normalized.call.id, normalized.usage); this.replaceStreamEvents(normalized.call.id, normalized.streamEvents); this.replaceRawPayloads(normalized.call.id, "response", normalized.rawPayloads); @@ -938,22 +969,31 @@ ON CONFLICT(cache_key) DO UPDATE SET this.setMetadata("last_spool_error_at", String(event.ts)); this.setMetadata("last_spool_error", JSON.stringify(event)); if (event.flow_id !== undefined) { - const callId = bedrockCallIdForFlow(event.flow_id); - if (this.callExists(callId)) { - this.db.query(` + if (event.provider !== undefined) { + const callId = providerAdapterFor(event.provider).callIdForFlow(event.flow_id); + if (this.callExists(callId)) { + this.db.query(` UPDATE provider_call SET status = 'error', completed_at = ?, response_flow_id = ? WHERE id = ? `).run(event.ts, event.flow_id, callId); + } } } })(); } - private persistStreamChunkEventUnlocked(event: SpoolStreamChunkEvent): void { + private persistStreamChunkEventUnlocked(event: SpoolStreamChunkEvent): boolean { + const adapter = providerAdapterFor(event.provider); + const callId = adapter.callIdForFlow(event.flow_id); + if (!this.callExists(callId)) { + return false; + } + this.db.transaction(() => { + this.replaceStreamChunkCapture(event, callId); this.incrementCounter("spool_stream_chunk_events", 1); this.setMetadata("last_stream_chunk_at", String(event.ts)); this.setMetadata("last_stream_chunk_event", JSON.stringify({ @@ -962,6 +1002,7 @@ WHERE id = ? body_sha256: event.body_sha256, })); })(); + return true; } private upsertPendingCall(call: ProviderCall): void { @@ -997,6 +1038,10 @@ ON CONFLICT(id) DO UPDATE SET this.db.query(` UPDATE provider_call SET status = ?, + model_id = CASE + WHEN provider_call.model_id IN ('cursor', 'unknown') THEN ? + ELSE provider_call.model_id + END, completed_at = ?, status_code = ?, response_flow_id = ?, @@ -1004,6 +1049,7 @@ SET status = ?, WHERE id = ? `).run( call.status, + call.model_id, call.completed_at ?? null, call.status_code ?? null, call.response_flow_id ?? null, @@ -1145,6 +1191,85 @@ INSERT INTO raw_payload ( } } + private replaceStreamChunkCapture(event: SpoolStreamChunkEvent, callId: string): void { + this.db.query(` +INSERT INTO stream_chunk_capture ( + id, call_id, flow_id, chunk_index, observed_at, host, method, path, + headers_json, body_b64, body_sha256, body_encoding, content_type +) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) +ON CONFLICT(call_id, chunk_index) DO UPDATE SET + id = excluded.id, + flow_id = excluded.flow_id, + observed_at = excluded.observed_at, + host = excluded.host, + method = excluded.method, + path = excluded.path, + headers_json = excluded.headers_json, + body_b64 = excluded.body_b64, + body_sha256 = excluded.body_sha256, + body_encoding = excluded.body_encoding, + content_type = excluded.content_type +`).run( + streamChunkCaptureId(callId, event.chunk_index), + callId, + event.flow_id, + event.chunk_index, + event.ts, + event.host, + event.method, + event.path, + JSON.stringify(event.headers), + event.body_b64, + event.body_sha256 ?? null, + event.body_encoding ?? null, + contentType(event.headers) ?? null, + ); + } + + private responseEventWithCapturedStreamChunks(event: SpoolResponseEvent, callId: string): SpoolResponseEvent { + const chunks = this.streamChunksForCall(callId); + if (chunks.length === 0) { + return event; + } + + const buffers: Buffer[] = []; + for (const chunk of chunks) { + try { + buffers.push(Buffer.from(chunk.body_b64, "base64")); + } catch { + return event; + } + } + + const body = Buffer.concat(buffers); + if (body.length === 0) { + return event; + } + + const merged: Record = { + ...event, + body_b64: body.toString("base64"), + body_sha256: sha256Buffer(body), + }; + delete merged.body_text; + if (!chunks.every((chunk) => chunk.body_encoding !== null && chunk.body_encoding === chunks[0]?.body_encoding)) { + delete merged.body_encoding; + } else if (chunks[0]?.body_encoding !== null && chunks[0]?.body_encoding !== undefined) { + merged.body_encoding = chunks[0].body_encoding; + } + return SpoolResponseEventSchema.parse(merged); + } + + private streamChunksForCall(callId: string): StreamChunkCaptureRow[] { + return this.db.query(` +SELECT id, call_id, flow_id, chunk_index, observed_at, host, method, path, + headers_json, body_b64, body_sha256, body_encoding, content_type +FROM stream_chunk_capture +WHERE call_id = ? +ORDER BY chunk_index ASC, id ASC +`).all(callId) as StreamChunkCaptureRow[]; + } + private paginatedCallSummaries(rows: readonly ProviderCallRow[], limit: number): SpyPaginatedResult { const pageRows = rows.slice(0, limit); const items = pageRows.map((row) => this.callSummaryForRow(row)); @@ -1871,6 +1996,10 @@ function blockSignature(block: NormalizedBlock): string { ].join("\u001f"); } +function isResponseDerivedRequestBlock(block: NormalizedBlock): boolean { + return block.source === "cursor-response-request-context"; +} + function httpEventFromRequest(event: SpoolRequestEvent, callId: string): HttpEventRecord { return { id: `http-${callId}-request`, @@ -1907,6 +2036,14 @@ function contentType(headers: readonly (readonly [string, string])[]): string | return pair?.[1]; } +function streamChunkCaptureId(callId: string, chunkIndex: number): string { + return `stream-chunk-${callId}-${String(chunkIndex).padStart(6, "0")}`; +} + +function sha256Buffer(buffer: Buffer): string { + return createHash("sha256").update(buffer).digest("hex"); +} + function positiveNumber(value: number | undefined, fallback: number): number { return value === undefined || !Number.isFinite(value) || value <= 0 ? fallback : value; } diff --git a/src/spy/ui/src/App.tsx b/src/spy/ui/src/App.tsx index 7c3a7db..357ae1b 100644 --- a/src/spy/ui/src/App.tsx +++ b/src/spy/ui/src/App.tsx @@ -87,8 +87,13 @@ const BLOCK_KIND_OPTIONS: readonly NormalizedBlock["kind"][] = [ ]; const PROVIDER_OPTIONS = [ { value: "bedrock", label: "Bedrock" }, + { value: "cursor", label: "Cursor" }, ] as const; const OPERATION_OPTIONS = [ + { value: "agent", label: "Agent" }, + { value: "Run", label: "Cursor Run" }, + { value: "RunSSE", label: "Cursor Run SSE" }, + { value: "StreamUnifiedChat", label: "Cursor Unified Chat" }, { value: "invoke", label: "Invoke" }, { value: "invoke-with-response-stream", label: "Invoke Stream" }, { value: "converse", label: "Converse" }, @@ -1318,6 +1323,8 @@ function compactionReasonLabel(reason: SpyCompactionReason): string { return "Pi request profile"; case "claude_code_request_context_profile": return "Claude Code request profile"; + case "cursor_request_context_profile": + return "Cursor request profile"; case "summarization_system_prompt": return "summary system prompt"; case "conversation_wrapper_input": @@ -1417,7 +1424,9 @@ function RequestCompositionPanel(props: { Tokens {composition.sections.map((section) => { - const tokenCount = tokenCountForSection(props.tokenCounts, "request", section.kind); + const tokenCount = section.present ? tokenCountForSection(props.tokenCounts, "request", section.kind) : undefined; + const tokenText = section.present ? formatTokenRecord(tokenCount) : "-"; + const tokenTitle = section.present ? tokenProvenanceLabel(tokenCount) : "section absent"; return (
{blockKindLabel(section.kind)} @@ -1428,7 +1437,7 @@ function RequestCompositionPanel(props: { {formatNumber(section.blockCount)} {formatNumber(section.charSize)} {formatBytes(section.byteSize)} - {formatTokenRecord(tokenCount)} + {tokenText}
); })} @@ -2121,12 +2130,14 @@ function RawPayloadPanel(props: { function RawPayloadRow(props: { readonly payload: RawPayloadRecord }): React.ReactElement { const [expanded, setExpanded] = React.useState(false); const payload = props.payload; + const displayText = payload.body_text ?? payload.body_b64; + const displayLabel = payload.body_text === undefined ? "Raw provider payload (base64)" : "Raw provider payload"; return (
{payload.direction} {payload.content_type ?? payload.body_encoding ?? "payload"} · {payload.body_sha256 ?? "no hash"} - {payload.body_text === undefined ? null : ( + {displayText === undefined ? null : (
- {payload.body_text === undefined ? ( + {displayText === undefined ? (
base64 payload · {payload.body_b64 === undefined ? "not available" : `${formatNumber(payload.body_b64.length)} encoded chars`}
) : expanded ? (
) : ( -
Payload body collapsed.
+
+ {payload.body_text === undefined ? `Base64 payload collapsed · ${formatNumber(payload.body_b64?.length ?? 0)} encoded chars` : "Payload body collapsed."} +
)}
); From ffa2eb0e78b082670ab3d6f1c4072fb3bf3aa7ff Mon Sep 17 00:00:00 2001 From: Jim Pudar Date: Wed, 27 May 2026 08:11:56 -0400 Subject: [PATCH 2/3] Fix Cursor spy capture extraction --- CURSOR_BUGS.md | 175 +++++++++ proxy/agent_spy.py | 60 ++- proxy/test_agent_spy.py | 97 +++-- src/spy/cursor.test.ts | 169 ++++++++ src/spy/cursor.ts | 781 ++++++++++++++++++++++++++++++++++++- src/spy/service.test.ts | 31 +- src/spy/service.ts | 16 +- src/spy/store.test.ts | 53 +++ src/spy/store.ts | 20 +- src/spy/ui/src/App.tsx | 28 +- src/spy/ui/src/api.test.ts | 6 +- src/spy/ui/src/api.ts | 3 + src/spy/ui/src/types.ts | 4 + 13 files changed, 1353 insertions(+), 90 deletions(-) diff --git a/CURSOR_BUGS.md b/CURSOR_BUGS.md index cab25b6..dcb2c22 100644 --- a/CURSOR_BUGS.md +++ b/CURSOR_BUGS.md @@ -1,5 +1,180 @@ # Cursor Spy Bugs +## 2026-05-27 - Cursor startup/tool capability capture + +### BUG-014 - Cursor protobuf text is mojibake when frames contain UTF-8 strings + +Status: fixed in working tree and verified against the saved live raw payload. + +Live evidence: + +- Spy call: `call-cursor-f835ca09-621e-43e2-92dd-169c8e6ee917` +- The assistant output and tool result blocks showed mojibake such as + `—`, `→`, `──`, `►`, `·`, and `Cursor’s`. +- The expected text was normal UTF-8 punctuation and box drawing, for example + `Harness ──protobuf──► Server ──internal──► Composer inference`. + +Root cause: Cursor Connect/protobuf frames are binary protobuf, not whole-frame +UTF-8 text. The previous decoder tried whole-frame UTF-8 first, and when that +failed because protobuf tags/length prefixes are binary, it decoded the entire +frame as Latin-1 before searching for embedded JSON. That preserved ASCII JSON +syntax but corrupted every non-ASCII UTF-8 byte sequence inside JSON strings. +For example, UTF-8 bytes for `──` (`e2 94 80 e2 94 80`) became +`──` after Latin-1 decoding. + +Fix: Connect/protobuf frames no longer fall back to Latin-1 whole-frame text. +The adapter now decodes protobuf length-delimited fields first, then extracts +JSON/text from those exact UTF-8 string field bytes. Whole-frame text extraction +is used only when the entire payload is valid UTF-8. + +Verification: + +- Re-normalizing the saved raw response payload for + `call-cursor-f835ca09-621e-43e2-92dd-169c8e6ee917` now reports + `containsMojibake=false` and includes + `Harness ──protobuf──► Server ──internal──► Composer inference`. +- After `./rootcell provision`, the existing live call was re-normalized by + submitting its saved raw response through the normal spy spool ingest path. + The live detail API now reports zero blocks matching `[âÂ]`; assistant output + and tool results show `→`, `—`, `──`, `►`, `·`, and `Cursor’s` correctly. +- A direct live store scan found 17 historical Cursor calls with stale + normalized mojibake blocks. Re-submitting those saved raw responses through + spool ingest raised the response ingest counter and left zero normalized + blocks matching `[âÂ]` in the live SQLite store. +- Added a regression test that builds a protobuf frame whose JSON string + contains `──`, `►`, `—`, `’`, and `·`; the frame as a whole is invalid UTF-8 + because of protobuf length-prefix bytes, and normalization still returns the + correct Unicode text. +- `bun test src/spy/cursor.test.ts` +- `bun run typecheck` +- `bun run lint` +- `bun run build:spy` + +### BUG-013 - Cursor support traffic overwhelms the spy timeline + +Status: fixed in working tree and verified live after `./rootcell provision`. + +After widening Cursor capture to preserve every Cursor API request, the spy UI +started showing many support RPCs that are not useful as conversation +request/response pairs, including `BidiAppend`, `SubmitLogs`, `TrackEvents`, +`traces`, and privacy/config polling. + +Fix: the call list now has a `traffic` scope. The default `conversation` +scope hides Cursor support RPCs unless the user explicitly selects one of those +operations. The `all` scope still exposes every captured Cursor request for +raw protocol investigation. + +Verification: + +- `traffic=conversation` returned 53 Cursor calls with only `Run` and `RunSSE`. +- `traffic=all` returned 120 Cursor calls including `BidiAppend`, telemetry, + traces, privacy/config polling, and `RunSSE`. +- `traffic=conversation&operation=BidiAppend` still returned BidiAppend calls, + so explicit operation filters can inspect support traffic. +- `./rootcell provision` +- `bun run build:spy` +- `bun run typecheck` +- `bun run lint` + +### BUG-012 - Cursor BidiAppend raw protobuf request data is not promoted + +Status: fixed in working tree and verified against the saved post-restart +capture. + +Live evidence: + +- Spy call: `call-cursor-99ec2eaf-9b30-499d-b650-936259490d43` +- The raw `BidiAppend` request body was 334,642 bytes, but request composition + only showed the HTTP provider envelope. +- The outer protobuf field 1 contained an ASCII hex-encoded inner protobuf + message. Decoding that inner message exposed the current user prompt, + `composer-2.5`, and 12 Cursor skill files from + `/home/luser/.cursor/skills-cursor/.../SKILL.md`. +- A scan of the same capture did not find `ClientSideToolV2` enum capability + IDs, but reverse-engineering evidence indicates older/alternate Cursor agent + requests may send supported tools as enum IDs instead of JSON schemas. + +Fix: Cursor request normalization now decodes raw protobuf bodies, recurses into +hex-encoded BidiAppend data fields, promotes BidiAppend envelope metadata, +current-user protobuf messages, Cursor skill file contents, model markers, and +known `ClientSideToolV2` enum capability lists when present. + +Verification: + +- Saved live `BidiAppend` request re-normalizes to 16 request blocks and + 80,327 request bytes: HTTP envelope, decoded BidiAppend envelope, current + user input, 12 Cursor skill blocks, and model marker. +- `bun test src/spy/cursor.test.ts src/spy/store.test.ts` +- `bun run typecheck` +- `bun run lint` + +### BUG-011 - Cursor setup RPCs may be hidden by capture gating + +Status: diagnostic capture widened in working tree and hot-deployed to the live +firewall. + +Live evidence: + +- Spy call: `call-cursor-e623f954-fcca-4aec-b7ac-7fb78c1cbd7f` +- This call was made after restarting Cursor CLI, but the captured `RunSSE` + request body was still only 43 bytes. +- The `RunSSE` response metadata reported `Tool definitions` as 24,509 bytes, + but the exact schema text was not present in the captured RunSSE protobuf + frames. +- No separate startup/setup call appeared in the store because the proxy only + captured Cursor `Run`, `RunSSE`, and `StreamUnifiedChat` operations. + +Diagnostic fix: Cursor detection now captures every request to known Cursor API +hosts and wildcard `*.cursor.sh` hosts, including startup, auth, analytics, +config/model, repository, and bidi operations. Cursor request/response bodies +are stored as base64 plus sha256 so raw protobuf bytes are preserved even when +they happen to decode as UTF-8. + +Live deployment: + +- Installed updated `/etc/agent-vm/agent_spy.py` on the `jmp` firewall. +- Restarted `mitmproxy-explicit.service` and `mitmproxy-transparent.service`. +- Verified both mitmproxy services are active and the deployed shim checksum + matches the local file. + +## 2026-05-27 - Cursor protobuf context section metadata + +### BUG-010 - Cursor request composition omits protobuf context-section metadata + +Status: fixed in working tree and verified against the captured live call payload. + +Live evidence: + +- Spy call: `call-cursor-70693ea3-5926-41d0-bb0d-a4c778d80e94` +- Before the fix, the detail API/UI showed only 4 request blocks: + - provider envelope + - Cursor system prompt + - one large harness/rules/skills block + - current user input +- The Cursor response stream also carried protobuf section metadata for hidden + or cached request-context sections, including `tools`, `subagents`, and + `conversation`, but the normalizer only used JSON-like role messages. +- Re-normalizing the live raw response with the fixed adapter adds metadata + request blocks for: + - `Tool definitions`: 24,509 bytes + - `Subagent definitions`: 714 bytes + - `Conversation`: 3,580 bytes +- After hot-deploying the rebuilt spy service and backfilling that call, the + live detail API/UI reports 7 request blocks and 47 KiB of request context. + +Fix: Cursor response normalization now walks decoded protobuf frames for +request-context section metadata and promotes sections that are not otherwise +represented by exact captured text. The store also treats these metadata blocks +as response-derived request blocks so repeated response persistence remains +idempotent. + +Verification: + +- `bun test src/spy/cursor.test.ts src/spy/store.test.ts` +- `bun run typecheck` +- `bun run lint` +- `bun run test:spy` with localhost listener permissions + ## 2026-05-26 22:41 EDT - Fresh Cursor UI verification Fresh real run: diff --git a/proxy/agent_spy.py b/proxy/agent_spy.py index ba44ab7..12424aa 100644 --- a/proxy/agent_spy.py +++ b/proxy/agent_spy.py @@ -45,24 +45,7 @@ "agentn.global.api5.cursor.sh", } -CURSOR_CAPTURE_PATH_RE = re.compile( - r"/(?:aiserver|agent|chat|composer|conversation|completion|generate|stream|v\d+/)", - re.IGNORECASE, -) - -CURSOR_SKIP_PATH_RE = re.compile( - r"/(?:auth|login|logout|telemetry|analytics|metrics|update|download|extension|settings)(?:/|$)", - re.IGNORECASE, -) - -CURSOR_SKIP_OPERATION_RE = re.compile( - r"(?:AnalyticsService|DashboardService|ServerConfigService|GetUsableModels|AvailableModels|" - r"GetDefaultModelForCli|GetCliDownloadUrl|SubmitLogs|TrackEvents|BootstrapStatsig|Statsig|traces|" - r"BidiService|BidiAppend)", - re.IGNORECASE, -) - -CURSOR_CAPTURE_OPERATION_RE = re.compile( +CURSOR_STREAM_OPERATION_RE = re.compile( r"^(?:Run|RunSSE|StreamUnifiedChat)$", re.IGNORECASE, ) @@ -238,29 +221,13 @@ def detect_cursor_request( method: str | None = None, body: bytes | None = None, ) -> dict[str, str] | None: - """Detect Cursor Agent API calls while excluding auth/update/download noise.""" + """Detect all Cursor API calls so startup/tool capability traffic is visible.""" if not is_cursor_api_host(host): return None - if method is not None and method.upper() not in {"POST", "PUT", "PATCH"}: - return None - url_path = urllib.parse.urlsplit(path).path - if CURSOR_SKIP_PATH_RE.search(url_path): - return None - operation = _cursor_operation_from_path(url_path) - if CURSOR_SKIP_OPERATION_RE.search(url_path) or CURSOR_SKIP_OPERATION_RE.search(operation): - return None - - if CURSOR_CAPTURE_OPERATION_RE.match(operation) is None: - return None - - normalized_host = host.split(":", 1)[0].strip(".").lower() if host else "" - if not normalized_host.endswith(".cursor.sh") and CURSOR_CAPTURE_PATH_RE.search(url_path) is None: - return None - return { "provider": "cursor", "model_id": _cursor_model_id_from_body(body) or "cursor", @@ -577,13 +544,24 @@ def _flow_id(flow: Any) -> str | None: return str(value) if value is not None else None -def _attach_body(event: dict[str, Any], body: bytes, *, force_encoding: str | None = None) -> None: +def _attach_body( + event: dict[str, Any], + body: bytes, + *, + force_encoding: str | None = None, + force_base64: bool = False, +) -> None: if force_encoding == "aws-eventstream": event["body_b64"] = base64.b64encode(body).decode("ascii") event["body_sha256"] = _sha256_bytes(body) event["body_encoding"] = "aws-eventstream" return + if force_base64: + event["body_b64"] = base64.b64encode(body).decode("ascii") + event["body_sha256"] = _sha256_bytes(body) + return + text = _decode_utf8(body) if text is None: event["body_b64"] = base64.b64encode(body).decode("ascii") @@ -641,7 +619,7 @@ def capture_request(flow: Any) -> None: event = _event_base(flow, "request", info) event["headers"] = redact_headers(getattr(request, "headers", None)) - _attach_body(event, body) + _attach_body(event, body, force_base64=info["provider"] == "cursor") _write_spool_event(event, config) except Exception as exc: # pragma: no cover - defensive for live traffic. _write_shim_error(flow, str(exc)) @@ -670,7 +648,7 @@ def capture_response(flow: Any) -> None: if info["provider"] == "bedrock" and _is_eventstream_content_type(content_type): _attach_body(event, body, force_encoding="aws-eventstream") else: - _attach_body(event, body) + _attach_body(event, body, force_base64=info["provider"] == "cursor") _write_spool_event(event, config) except Exception as exc: # pragma: no cover - defensive for live traffic. _write_shim_error(flow, str(exc)) @@ -711,6 +689,12 @@ def prepare_response_stream(flow: Any) -> None: return if info.get("provider") != "cursor": return + content_type = _header_value(getattr(response, "headers", None), "content-type") + if ( + CURSOR_STREAM_OPERATION_RE.match(info.get("operation", "")) is None + and _content_type_base(content_type) != "text/event-stream" + ): + return def tee_cursor_chunk(chunk: bytes) -> bytes: capture_stream_chunk(flow, chunk) return chunk diff --git a/proxy/test_agent_spy.py b/proxy/test_agent_spy.py index bc95304..437d167 100644 --- a/proxy/test_agent_spy.py +++ b/proxy/test_agent_spy.py @@ -32,6 +32,7 @@ def eventstream_message(headers, payload): def make_flow( flow_id="flow-1", host="bedrock-runtime.us-east-1.amazonaws.com", + method="POST", path="/model/anthropic.claude/converse-stream", request_headers=None, request_body=b'{"messages":[]}', @@ -42,7 +43,7 @@ def make_flow( request = types.SimpleNamespace( pretty_host=host, host=host, - method="POST", + method=method, path=path, headers=request_headers or [ @@ -127,34 +128,41 @@ def test_detects_cursor_agent_api_paths(self): "GET", ) ) - self.assertIsNone( - agent_spy.detect_cursor_request( - "api.cursor.com", - "/auth/login", - "POST", - ) + login_info = agent_spy.detect_cursor_request("api.cursor.com", "/auth/login", "POST") + self.assertIsNotNone(login_info) + self.assertEqual(login_info["operation"], "login") + + analytics_info = agent_spy.detect_cursor_request( + "agentn.global.api5.cursor.sh", + "/aiserver.v1.AnalyticsService/BootstrapStatsig", + "POST", ) - self.assertIsNone( - agent_spy.detect_cursor_request( - "agentn.global.api5.cursor.sh", - "/aiserver.v1.AnalyticsService/BootstrapStatsig", - "POST", - ) + self.assertIsNotNone(analytics_info) + self.assertEqual(analytics_info["operation"], "BootstrapStatsig") + + bidi_info = agent_spy.detect_cursor_request( + "api2.cursor.sh", + "/aiserver.v1.BidiService/BidiAppend", + "POST", ) - self.assertIsNone( - agent_spy.detect_cursor_request( - "api2.cursor.sh", - "/aiserver.v1.BidiService/BidiAppend", - "POST", - ) + self.assertIsNotNone(bidi_info) + self.assertEqual(bidi_info["operation"], "BidiAppend") + + repo_info = agent_spy.detect_cursor_request( + "api2.cursor.sh", + "/repository.v1.RepositoryService/FastRepoInitHandshakeV2", + "POST", ) - self.assertIsNone( - agent_spy.detect_cursor_request( - "api2.cursor.sh", - "/repository.v1.RepositoryService/FastRepoInitHandshakeV2", - "POST", - ) + self.assertIsNotNone(repo_info) + self.assertEqual(repo_info["operation"], "FastRepoInitHandshakeV2") + + get_info = agent_spy.detect_cursor_request( + "api2.cursor.sh", + "/aiserver.v1.ServerConfigService/GetUsableModels", + "GET", ) + self.assertIsNotNone(get_info) + self.assertEqual(get_info["operation"], "GetUsableModels") def test_detects_wildcard_cursor_agent_hosts(self): info = agent_spy.detect_cursor_request( @@ -299,9 +307,33 @@ def test_cursor_request_spool_event_shape_and_redaction(self): [pair for pair in event["headers"] if pair[0].lower() == "authorization"], [["Authorization", "[redacted]"]], ) - self.assertEqual(json.loads(event["body_text"])["prompt"], "RCSPY-CURSOR-ALPHA") + self.assertNotIn("body_text", event) + self.assertEqual( + json.loads(base64.b64decode(event["body_b64"]).decode("utf-8"))["prompt"], + "RCSPY-CURSOR-ALPHA", + ) + self.assertEqual(event["body_sha256"], agent_spy._sha256_bytes(flow.request.raw_content)) self.assertEqual(flow.metadata["agent_spy"]["provider"], "cursor") + def test_cursor_get_request_spools_empty_raw_body(self): + self.write_config(enabled=True) + flow = make_flow( + host="api2.cursor.sh", + method="GET", + path="/aiserver.v1.ServerConfigService/GetUsableModels", + request_body=b"", + ) + + agent_spy.capture_request(flow) + + events = self.read_events() + self.assertEqual(len(events), 1) + event = events[0] + self.assertEqual(event["provider"], "cursor") + self.assertEqual(event["operation"], "GetUsableModels") + self.assertEqual(event["body_b64"], "") + self.assertEqual(event["body_sha256"], agent_spy._sha256_bytes(b"")) + def test_cursor_response_streaming_is_enabled_for_matched_flows(self): flow = make_flow( host="agentn.global.api5.cursor.sh", @@ -315,6 +347,19 @@ def test_cursor_response_streaming_is_enabled_for_matched_flows(self): self.assertTrue(callable(flow.response.stream)) + def test_cursor_non_stream_operation_does_not_force_response_streaming(self): + flow = make_flow( + host="api2.cursor.sh", + path="/aiserver.v1.ServerConfigService/GetUsableModels", + request_headers=[("Content-Type", "application/connect+proto")], + request_body=b"", + response_headers=[("Content-Type", "application/json")], + ) + + agent_spy.prepare_response_stream(flow) + + self.assertFalse(hasattr(flow.response, "stream")) + def test_cursor_response_stream_callback_spools_chunks_unchanged(self): self.write_config(enabled=True) flow = make_flow( diff --git a/src/spy/cursor.test.ts b/src/spy/cursor.test.ts index f204694..f12ce41 100644 --- a/src/spy/cursor.test.ts +++ b/src/spy/cursor.test.ts @@ -79,10 +79,27 @@ function protoVarintField(fieldNumber: number, value: number): Buffer { return protoField(fieldNumber, 0, protoVarint(value)); } +function protoPackedVarintsField(fieldNumber: number, values: readonly number[]): Buffer { + return protoMessageField(fieldNumber, Buffer.concat(values.map((value) => protoVarint(value)))); +} + function protoMessageField(fieldNumber: number, message: Buffer): Buffer { return protoField(fieldNumber, 2, Buffer.concat([protoVarint(message.length), message])); } +function protoStringField(fieldNumber: number, value: string): Buffer { + return protoMessageField(fieldNumber, Buffer.from(value, "utf8")); +} + +function cursorContextSectionMetadata(key: string, label: string, startOffset: number, size: number): Buffer { + return Buffer.concat([ + protoStringField(1, key), + protoStringField(2, label), + protoVarintField(3, startOffset), + protoVarintField(4, size), + ]); +} + describe("Cursor adapter", () => { test("normalizes Cursor request semantic blocks", () => { const normalized = normalizeCursorRequest(cursorRequest("fixture-cursor-flow", { @@ -344,4 +361,156 @@ describe("Cursor adapter", () => { }], }); }); + + test("decodes embedded protobuf JSON as UTF-8 instead of Latin-1 mojibake", () => { + const assistantText = [ + "Harness \u2500\u2500protobuf\u2500\u2500\u25ba Server \u2014 ok", + "Apostrophe: Cursor\u2019s agent. Bullet: \u00b7.", + "Padding so the protobuf length prefix uses a multi-byte varint and the whole frame is not valid UTF-8.", + ].join("\n"); + const assistantJson = JSON.stringify({ + role: "assistant", + content: [{ type: "text", text: assistantText }], + }); + expect(Buffer.byteLength(assistantJson, "utf8")).toBeGreaterThan(127); + const protoPayload = protoStringField(1, assistantJson); + expect(protoPayload.toString("utf8")).toContain("\uFFFD"); + + const normalized = normalizeCursorResponse({ + ...cursorResponse("fixture-cursor-protobuf-utf8-json", {}), + headers: [["content-type", "application/connect+proto"]], + body_text: undefined, + body_b64: connectFrame(protoPayload).toString("base64"), + }); + + const normalizedText = normalized.blocks.find((block) => block.kind === "assistant-output")?.text ?? ""; + expect(normalizedText).toContain("Harness \u2500\u2500protobuf\u2500\u2500\u25ba Server \u2014 ok"); + expect(normalizedText).toContain("Cursor\u2019s agent"); + expect(normalizedText).toContain("Bullet: \u00b7"); + expect(normalizedText).not.toContain("â"); + expect(normalizedText).not.toContain("Â"); + expect(normalized.streamEvents.some((event) => JSON.stringify(event.payload).includes("Harness \u2500\u2500protobuf\u2500\u2500\u25ba Server"))).toBe(true); + }); + + test("promotes Cursor protobuf request-context section metadata", () => { + const sectionEnvelope = Buffer.concat([ + protoMessageField(3, cursorContextSectionMetadata("tools", "Tool definitions", 5_884, 24_509)), + protoMessageField(3, cursorContextSectionMetadata("conversation", "Conversation", 1_029, 3_083)), + ]); + const normalized = normalizeCursorResponse({ + ...cursorResponse("fixture-cursor-protobuf-context-sections", {}), + headers: [["content-type", "application/connect+proto"]], + body_text: undefined, + body_b64: connectFrame(sectionEnvelope).toString("base64"), + }); + + const requestBlocks = normalized.blocks.filter((block) => block.direction === "request"); + const toolMetadata = requestBlocks.find((block) => block.kind === "tool-definition"); + expect(toolMetadata).toMatchObject({ + source: "cursor-response-context-metadata", + provider_path: "$frame[0].3", + char_size: 24_509, + byte_size: 24_509, + }); + expect(toolMetadata?.text).toContain("Tool definitions"); + expect(toolMetadata?.json).toMatchObject({ + sectionKey: "tools", + reportedByteSize: 24_509, + }); + + const conversationMetadata = requestBlocks.find((block) => block.kind === "prior-conversation-history"); + expect(conversationMetadata).toMatchObject({ + source: "cursor-response-context-metadata", + char_size: 3_083, + byte_size: 3_083, + }); + expect(conversationMetadata?.text).toContain("Conversation"); + }); + + test("extracts Cursor BidiAppend hex protobuf request context", () => { + const userMessage = Buffer.concat([ + protoStringField(1, "Please inspect the repo and reply with RCSPY-BIDI-OK"), + protoStringField(2, "a3e38f7d-f57f-4e25-8c71-fe4bba7353f0"), + protoStringField(3, ""), + protoVarintField(4, 1), + ]); + const skillEntry = Buffer.concat([ + protoStringField(1, "/home/luser/.cursor/skills-cursor/sample/SKILL.md"), + protoStringField(2, "---\nname: sample\ndescription: Test skill.\n---\n# Sample Skill\nUse this skill for tests."), + protoStringField(3, "Test skill."), + ]); + const requestContext = Buffer.concat([ + protoMessageField(1, userMessage), + protoMessageField(2, protoMessageField(2, skillEntry)), + ]); + const innerRequest = protoMessageField(1, Buffer.concat([ + protoStringField(1, ""), + protoMessageField(2, protoMessageField(1, requestContext)), + protoMessageField(9, protoStringField(1, "composer-2.5")), + ])); + const bidiAppend = Buffer.concat([ + protoStringField(1, innerRequest.toString("hex")), + protoMessageField(2, protoStringField(1, "e9cf8f00-34dc-4361-b214-be52cc52f310")), + protoVarintField(3, 2), + ]); + + const normalized = normalizeCursorRequest({ + ...cursorRequest("fixture-cursor-bidi-append", {}), + operation: "BidiAppend", + model_id: "cursor", + path: "/aiserver.v1.BidiService/BidiAppend", + headers: [["content-type", "application/proto"]], + body_text: undefined, + body_b64: bidiAppend.toString("base64"), + }, { storeRaw: true }); + + expect(normalized.call.model_id).toBe("composer-2.5"); + expect(normalized.rawPayloads).toHaveLength(1); + const envelope = normalized.blocks.find((block) => block.source === "cursor-request-protobuf-envelope"); + expect(envelope?.text).toContain("requestId=e9cf8f00-34dc-4361-b214-be52cc52f310"); + expect(envelope?.json).toMatchObject({ + appendSeqno: 2, + dataDecodedByteLength: innerRequest.length, + }); + const requestText = normalized.blocks.filter((block) => block.direction === "request").map((block) => block.text).join("\n"); + expect(requestText).toContain("RCSPY-BIDI-OK"); + expect(requestText).toContain("/home/luser/.cursor/skills-cursor/sample/SKILL.md"); + expect(requestText).toContain("# Sample Skill"); + expect(requestText).toContain("model: composer-2.5"); + }); + + test("surfaces Cursor ClientSideToolV2 enum capabilities when protobuf carries them", () => { + const requestProto = Buffer.concat([ + protoVarintField(29, 5), + protoVarintField(29, 6), + protoVarintField(29, 15), + protoPackedVarintsField(29, [41, 42]), + ]); + + const normalized = normalizeCursorRequest({ + ...cursorRequest("fixture-cursor-tool-enums", {}), + model_id: "cursor", + headers: [["content-type", "application/proto"]], + body_text: undefined, + body_b64: requestProto.toString("base64"), + }); + + const toolBlock = normalized.blocks.find((block) => block.source === "cursor-request-protobuf-tool-enums"); + expect(toolBlock?.kind).toBe("tool-definition"); + expect(toolBlock?.text).toContain("READ_FILE (5)"); + expect(toolBlock?.text).toContain("LIST_DIR (6)"); + expect(toolBlock?.text).toContain("RUN_TERMINAL_COMMAND_V2 (15)"); + expect(toolBlock?.text).toContain("RIPGREP_RAW_SEARCH (41)"); + expect(toolBlock?.text).toContain("GLOB_FILE_SEARCH (42)"); + expect(toolBlock?.json).toMatchObject({ + enum: "ClientSideToolV2", + tools: [ + { id: 5, name: "READ_FILE" }, + { id: 6, name: "LIST_DIR" }, + { id: 15, name: "RUN_TERMINAL_COMMAND_V2" }, + { id: 41, name: "RIPGREP_RAW_SEARCH" }, + { id: 42, name: "GLOB_FILE_SEARCH" }, + ], + }); + }); }); diff --git a/src/spy/cursor.ts b/src/spy/cursor.ts index 6b4dace..55ee942 100644 --- a/src/spy/cursor.ts +++ b/src/spy/cursor.ts @@ -41,6 +41,8 @@ interface BlockInput { readonly providerPath?: string | undefined; readonly text?: string | undefined; readonly json?: unknown; + readonly charSize?: number | undefined; + readonly byteSize?: number | undefined; } interface TextCandidate { @@ -70,6 +72,7 @@ interface ParsedBody { readonly modelText?: string | undefined; readonly json?: unknown; readonly jsonLines: readonly unknown[]; + readonly proto?: DecodedProtoMessage | undefined; readonly connectFrames?: readonly ParsedConnectFrame[] | undefined; } @@ -101,6 +104,9 @@ interface DecodedProtoField { readonly text?: string | undefined; readonly packedVarints?: readonly number[] | undefined; readonly nested?: DecodedProtoMessage | undefined; + readonly hexDecoded?: DecodedProtoMessage | undefined; + readonly hexByteLength?: number | undefined; + readonly hexSha256?: string | undefined; readonly byteLength?: number | undefined; } @@ -118,6 +124,109 @@ interface CursorWireUsageCandidate { readonly wireInputTokens: number; } +interface RequestContextSectionMetadata { + readonly key: string; + readonly label: string; + readonly kind: BlockKind; + readonly path: string; + readonly frameIndex: number; + readonly startOffset?: number | undefined; + readonly reportedSize: number; +} + +interface ProtoSource { + readonly source: "raw-protobuf" | "connect-frame"; + readonly message: DecodedProtoMessage; + readonly frameIndex?: number | undefined; +} + +interface CursorBidiAppendEnvelope { + readonly path: string; + readonly requestId?: string | undefined; + readonly appendSeqno?: number | undefined; + readonly dataHexByteLength: number; + readonly dataDecodedByteLength: number; + readonly dataSha256: string; +} + +interface CursorSkillEntry { + readonly path: string; + readonly filePath: string; + readonly content: string; + readonly description?: string | undefined; +} + +interface CursorProtoUserMessage { + readonly path: string; + readonly text: string; + readonly messageId?: string | undefined; +} + +interface CursorToolCapability { + readonly id: number; + readonly name: string; + readonly path: string; + readonly encoding: "varint" | "packed-varint"; +} + +const CURSOR_CONTEXT_SECTION_ORDER = [ + "system_prompt", + "rules", + "skills", + "mcp", + "subagents", + "tools", + "summarized_conversation", + "conversation", +] as const; + +const CURSOR_CLIENT_SIDE_TOOL_V2_NAMES = new Map([ + [1, "READ_SEMSEARCH_FILES"], + [3, "RIPGREP_SEARCH"], + [5, "READ_FILE"], + [6, "LIST_DIR"], + [7, "EDIT_FILE"], + [8, "FILE_SEARCH"], + [9, "SEMANTIC_SEARCH_FULL"], + [11, "DELETE_FILE"], + [12, "REAPPLY"], + [15, "RUN_TERMINAL_COMMAND_V2"], + [16, "FETCH_RULES"], + [18, "WEB_SEARCH"], + [19, "MCP"], + [23, "SEARCH_SYMBOLS"], + [24, "BACKGROUND_COMPOSER_FOLLOWUP"], + [25, "KNOWLEDGE_BASE"], + [26, "FETCH_PULL_REQUEST"], + [27, "DEEP_SEARCH"], + [28, "CREATE_DIAGRAM"], + [29, "FIX_LINTS"], + [30, "READ_LINTS"], + [31, "GO_TO_DEFINITION"], + [32, "TASK"], + [33, "AWAIT_TASK"], + [34, "TODO_READ"], + [35, "TODO_WRITE"], + [38, "EDIT_FILE_V2"], + [39, "LIST_DIR_V2"], + [40, "READ_FILE_V2"], + [41, "RIPGREP_RAW_SEARCH"], + [42, "GLOB_FILE_SEARCH"], + [43, "CREATE_PLAN"], + [44, "LIST_MCP_RESOURCES"], + [45, "READ_MCP_RESOURCE"], + [46, "READ_PROJECT"], + [47, "UPDATE_PROJECT"], + [48, "TASK_V2"], + [49, "CALL_MCP_TOOL"], + [50, "APPLY_AGENT_DIFF"], + [51, "ASK_QUESTION"], + [52, "SWITCH_MODE"], + [53, "GENERATE_IMAGE"], + [54, "COMPUTER_USE"], + [55, "WRITE_SHELL_STDIN"], +]); + export function cursorCallIdForFlow(flowId: string): string { return stableId("call", "cursor", flowId); } @@ -214,6 +323,8 @@ function normalizeCursorRequestBlocks( }); } + normalizeRequestProtobuf(body, addBlock, usedPaths); + const bodyText = body.text ?? body.binaryText; if (blocks.length === 1 && bodyText !== undefined && bodyText.trim().length > 0) { addBlock({ @@ -227,6 +338,317 @@ function normalizeCursorRequestBlocks( return blocks; } +function normalizeRequestProtobuf( + body: ParsedBody, + addBlock: (input: Omit) => void, + usedPaths: Set, +): void { + const sources = cursorProtoSources(body); + if (sources.length === 0) { + return; + } + + const seen = new Set(); + const addUnique = (key: string, input: Omit): void => { + if (seen.has(key)) { + return; + } + seen.add(key); + if (input.providerPath !== undefined) { + usedPaths.add(input.providerPath); + } + addBlock(input); + }; + + for (const source of sources) { + const envelope = cursorBidiAppendEnvelope(source.message); + if (envelope !== undefined) { + addUnique(`bidi-envelope:${envelope.requestId ?? ""}:${String(envelope.appendSeqno ?? "")}:${envelope.dataSha256}`, { + kind: "provider-envelope", + source: "cursor-request-protobuf-envelope", + providerPath: envelope.path, + text: [ + "Cursor BidiAppend protobuf envelope", + envelope.requestId === undefined ? undefined : `requestId=${envelope.requestId}`, + envelope.appendSeqno === undefined ? undefined : `appendSeqno=${String(envelope.appendSeqno)}`, + `decodedData=${String(envelope.dataDecodedByteLength)} bytes`, + ].filter((part): part is string => part !== undefined).join(" "), + json: { + requestId: envelope.requestId, + appendSeqno: envelope.appendSeqno, + dataHexByteLength: envelope.dataHexByteLength, + dataDecodedByteLength: envelope.dataDecodedByteLength, + dataSha256: envelope.dataSha256, + }, + }); + } + } + + for (const message of collectCursorProtoUserMessages(sources)) { + addUnique(`user-message:${message.messageId ?? ""}:${message.text}`, { + kind: "current-user-input", + source: "cursor-request-protobuf-message", + providerPath: message.path, + role: "user", + text: message.text, + json: { + ...(message.messageId === undefined ? {} : { messageId: message.messageId }), + }, + }); + } + + for (const skill of collectCursorProtoSkills(sources)) { + addUnique(`skill:${skill.filePath}:${sha256(skill.content)}`, { + kind: "harness-system-context", + source: "cursor-request-protobuf-skill", + providerPath: skill.path, + role: "skill", + text: `${skill.filePath}\n\n${skill.content}`, + json: { + filePath: skill.filePath, + ...(skill.description === undefined ? {} : { description: skill.description }), + }, + }); + } + + for (const model of collectCursorProtoModels(sources)) { + addUnique(`model:${model.model}`, { + kind: "provider-envelope", + source: "cursor-request-protobuf-model", + providerPath: model.path, + text: `model: ${model.model}`, + }); + } + + const toolCapabilities = collectCursorToolCapabilities(sources); + if (toolCapabilities.length > 0) { + const tools = toolCapabilities.map((tool) => ({ + id: tool.id, + name: tool.name, + path: tool.path, + encoding: tool.encoding, + })); + addUnique(`tool-capabilities:${tools.map((tool) => `${String(tool.id)}:${tool.path}`).join(",")}`, { + kind: "tool-definition", + source: "cursor-request-protobuf-tool-enums", + providerPath: "$.protobuf.ClientSideToolV2", + text: [ + "Cursor ClientSideToolV2 capabilities:", + ...tools.map((tool) => `- ${tool.name} (${String(tool.id)}) at ${tool.path}`), + ].join("\n"), + json: { + enum: "ClientSideToolV2", + tools, + }, + }); + } +} + +function cursorProtoSources(body: ParsedBody): ProtoSource[] { + const sources: ProtoSource[] = []; + if (body.proto !== undefined) { + sources.push({ source: "raw-protobuf", message: body.proto }); + } + for (const frame of body.connectFrames ?? []) { + if (frame.proto !== undefined) { + sources.push({ source: "connect-frame", message: frame.proto, frameIndex: frame.index }); + } + } + return sources; +} + +function cursorBidiAppendEnvelope(message: DecodedProtoMessage): CursorBidiAppendEnvelope | undefined { + const dataField = message.fields.find((field) => + field.fieldNumber === 1 + && field.wireType === 2 + && field.hexDecoded !== undefined + && field.hexByteLength !== undefined + && field.hexSha256 !== undefined + ); + if (dataField?.hexDecoded === undefined || dataField.hexByteLength === undefined || dataField.hexSha256 === undefined) { + return undefined; + } + + const requestIdContainer = message.fields.find((field) => field.fieldNumber === 2 && field.nested !== undefined)?.nested; + const requestId = requestIdContainer === undefined ? undefined : directProtoTextField(requestIdContainer, 1); + const appendSeqno = directProtoNumberField(message, 3); + return { + path: message.path, + requestId, + appendSeqno, + dataHexByteLength: dataField.byteLength ?? 0, + dataDecodedByteLength: dataField.hexByteLength, + dataSha256: dataField.hexSha256, + }; +} + +function collectCursorProtoUserMessages(sources: readonly ProtoSource[]): CursorProtoUserMessage[] { + const messages: CursorProtoUserMessage[] = []; + const seen = new Set(); + + const visit = (message: DecodedProtoMessage): void => { + const text = directProtoTextField(message, 1); + const messageId = directProtoTextField(message, 2); + const role = directProtoNumberField(message, 4); + if ( + text !== undefined + && text.trim().length > 0 + && role === 1 + && (messageId === undefined || isUuidLike(messageId)) + && !looksLikeCursorSkillPath(text) + && !looksLikeCursorSkillContent(text) + ) { + const key = `${messageId ?? ""}:${text}`; + if (!seen.has(key)) { + seen.add(key); + messages.push({ + path: message.path, + text, + ...(messageId === undefined ? {} : { messageId }), + }); + } + } + + for (const child of nestedProtoMessages(message)) { + visit(child); + } + }; + + for (const source of sources) { + visit(source.message); + } + return messages; +} + +function collectCursorProtoSkills(sources: readonly ProtoSource[]): CursorSkillEntry[] { + const skills: CursorSkillEntry[] = []; + const seen = new Set(); + + const visit = (message: DecodedProtoMessage): void => { + const filePath = directProtoTextField(message, 1); + const content = directProtoTextField(message, 2); + if ( + filePath !== undefined + && content !== undefined + && looksLikeCursorSkillPath(filePath) + && looksLikeCursorSkillContent(content) + ) { + const description = directProtoTextField(message, 3); + const key = `${filePath}:${sha256(content)}`; + if (!seen.has(key)) { + seen.add(key); + skills.push({ + path: message.path, + filePath, + content, + ...(description === undefined || description.trim().length === 0 ? {} : { description }), + }); + } + } + + for (const child of nestedProtoMessages(message)) { + visit(child); + } + }; + + for (const source of sources) { + visit(source.message); + } + return skills; +} + +function collectCursorProtoModels(sources: readonly ProtoSource[]): { readonly path: string; readonly model: string }[] { + const models: { readonly path: string; readonly model: string }[] = []; + const seen = new Set(); + + const visit = (message: DecodedProtoMessage): void => { + for (const field of message.fields) { + const model = modelIdFromText(field.text); + if (model !== undefined && !seen.has(model)) { + seen.add(model); + models.push({ path: field.path, model }); + } + } + for (const child of nestedProtoMessages(message)) { + visit(child); + } + }; + + for (const source of sources) { + visit(source.message); + } + return models; +} + +function collectCursorToolCapabilities(sources: readonly ProtoSource[]): CursorToolCapability[] { + const capabilities: CursorToolCapability[] = []; + const seen = new Set(); + + const push = (id: number, path: string, encoding: CursorToolCapability["encoding"]): void => { + const name = CURSOR_CLIENT_SIDE_TOOL_V2_NAMES.get(id); + if (name === undefined || seen.has(id)) { + return; + } + seen.add(id); + capabilities.push({ id, name, path, encoding }); + }; + + const visit = (message: DecodedProtoMessage): void => { + for (const field of message.fields) { + if (field.fieldNumber === 29 && field.wireType === 0 && typeof field.value === "number") { + push(field.value, field.path, "varint"); + } + if (field.fieldNumber === 29 && field.wireType === 2 && plausiblePackedToolIds(field.packedVarints)) { + for (const id of field.packedVarints) { + push(id, field.path, "packed-varint"); + } + } + } + for (const child of nestedProtoMessages(message)) { + visit(child); + } + }; + + for (const source of sources) { + visit(source.message); + } + return capabilities.sort((left, right) => left.id - right.id); +} + +function plausiblePackedToolIds(values: readonly number[] | undefined): values is readonly number[] { + return values !== undefined + && values.length > 0 + && values.length <= CURSOR_CLIENT_SIDE_TOOL_V2_NAMES.size + && values.every((value) => CURSOR_CLIENT_SIDE_TOOL_V2_NAMES.has(value)); +} + +function nestedProtoMessages(message: DecodedProtoMessage): DecodedProtoMessage[] { + const nested: DecodedProtoMessage[] = []; + for (const field of message.fields) { + nested.push(...nestedProtoMessagesFromField(field)); + } + return nested; +} + +function nestedProtoMessagesFromField(field: DecodedProtoField): DecodedProtoMessage[] { + return [ + ...(field.nested === undefined ? [] : [field.nested]), + ...(field.hexDecoded === undefined ? [] : [field.hexDecoded]), + ]; +} + +function looksLikeCursorSkillPath(value: string): boolean { + return /(?:^|\/)\.cursor\/skills(?:-[^/]+)?\/[^/]+\/SKILL\.md$/i.test(value); +} + +function looksLikeCursorSkillContent(value: string): boolean { + return /^---\s*\nname:\s*/.test(value) || /^#\s+\S/.test(value); +} + +function isUuidLike(value: string): boolean { + return /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i.test(value); +} + function normalizeRequestJson( value: unknown, addBlock: (input: Omit) => void, @@ -351,7 +773,7 @@ function normalizeCursorResponseBody( }); const values = structuredValues(body); - const requestBlocks = normalizeCursorResponseRequestContext(callId, values); + const requestBlocks = normalizeCursorResponseRequestContext(callId, values, body.connectFrames ?? []); const textParts = collectCursorResponseText(values); if (textParts.length === 0) { const bodyText = body.text ?? body.binaryText; @@ -412,6 +834,7 @@ function normalizeCursorResponseBody( function normalizeCursorResponseRequestContext( callId: string, values: readonly unknown[], + frames: readonly ParsedConnectFrame[], ): NormalizedBlock[] { const candidates = collectCursorRequestContextCandidates(values); const blocks: NormalizedBlock[] = []; @@ -438,9 +861,170 @@ function normalizeCursorResponseRequestContext( ordinal += 1; } + for (const metadata of collectCursorRequestContextSectionMetadata(frames)) { + if (requestContextMetadataCovered(blocks, metadata)) { + continue; + } + const key = `metadata\n${metadata.key}\n${String(metadata.reportedSize)}`; + if (seen.has(key)) { + continue; + } + seen.add(key); + blocks.push(createBlock({ + callId, + direction: "request", + ordinal, + kind: metadata.kind, + source: "cursor-response-context-metadata", + providerPath: metadata.path, + text: `Cursor reported ${metadata.label} in request context (${String(metadata.reportedSize)} bytes). Exact section text was not exposed in captured Cursor HTTP payload.`, + json: { + sectionKey: metadata.key, + sectionLabel: metadata.label, + reportedCharSize: metadata.reportedSize, + reportedByteSize: metadata.reportedSize, + frameIndex: metadata.frameIndex, + ...(metadata.startOffset === undefined ? {} : { startOffset: metadata.startOffset }), + }, + charSize: metadata.reportedSize, + byteSize: metadata.reportedSize, + })); + ordinal += 1; + } + return blocks; } +function collectCursorRequestContextSectionMetadata( + frames: readonly ParsedConnectFrame[], +): RequestContextSectionMetadata[] { + const byKey = new Map(); + + const visit = (message: DecodedProtoMessage, frameIndex: number): void => { + const metadata = cursorRequestContextSectionMetadata(message, frameIndex); + if (metadata !== undefined) { + const existing = byKey.get(metadata.key); + if ( + existing === undefined + || metadata.reportedSize > existing.reportedSize + || (metadata.reportedSize === existing.reportedSize && metadata.frameIndex > existing.frameIndex) + ) { + byKey.set(metadata.key, metadata); + } + } + for (const field of message.fields) { + for (const child of nestedProtoMessagesFromField(field)) { + visit(child, frameIndex); + } + } + }; + + for (const frame of frames) { + if (frame.proto !== undefined) { + visit(frame.proto, frame.index); + } + } + + return [...byKey.values()].sort((left, right) => + cursorContextSectionOrder(left.key) - cursorContextSectionOrder(right.key) + ); +} + +function cursorRequestContextSectionMetadata( + message: DecodedProtoMessage, + frameIndex: number, +): RequestContextSectionMetadata | undefined { + const key = directProtoTextField(message, 1); + const label = directProtoTextField(message, 2); + const reportedSize = directProtoNumberField(message, 4); + if (key === undefined || label === undefined || reportedSize === undefined || reportedSize <= 0) { + return undefined; + } + const kind = cursorContextSectionKind(key); + if (kind === undefined) { + return undefined; + } + const startOffset = directProtoNumberField(message, 3); + return { + key, + label, + kind, + path: message.path, + frameIndex, + ...(startOffset === undefined ? {} : { startOffset }), + reportedSize, + }; +} + +function cursorContextSectionKind(key: string): BlockKind | undefined { + if (key === "tools") { + return "tool-definition"; + } + if (key === "conversation" || key === "summarized_conversation") { + return "prior-conversation-history"; + } + if (key === "system_prompt" || key === "rules" || key === "skills" || key === "mcp" || key === "subagents") { + return "harness-system-context"; + } + return undefined; +} + +function cursorContextSectionOrder(key: string): number { + const index = CURSOR_CONTEXT_SECTION_ORDER.indexOf(key as (typeof CURSOR_CONTEXT_SECTION_ORDER)[number]); + return index === -1 ? CURSOR_CONTEXT_SECTION_ORDER.length : index; +} + +function requestContextMetadataCovered( + blocks: readonly NormalizedBlock[], + metadata: RequestContextSectionMetadata, +): boolean { + if (metadata.key === "tools") { + return blocks.some((block) => block.kind === "tool-definition"); + } + if (metadata.key === "conversation" || metadata.key === "summarized_conversation") { + return blocks.some((block) => block.kind === "prior-conversation-history"); + } + if (metadata.key === "system_prompt") { + return blocks.some((block) => block.kind === "harness-system-context" && block.role === "system"); + } + + const pattern = cursorContextSectionCoveragePattern(metadata.key); + if (pattern !== undefined) { + return blocks.some((block) => + block.kind === "harness-system-context" + && block.text !== undefined + && pattern.test(block.text) + ); + } + + return blocks.some((block) => block.kind === metadata.kind); +} + +function cursorContextSectionCoveragePattern(key: string): RegExp | undefined { + if (key === "rules") { + return /<(?:rules|user_rules)\b/i; + } + if (key === "skills") { + return /<(?:agent_skills|available_skills|agent_skill)\b/i; + } + if (key === "mcp") { + return / field.fieldNumber === fieldNumber && typeof field.text === "string")?.text; +} + +function directProtoNumberField(message: DecodedProtoMessage, fieldNumber: number): number | undefined { + const value = message.fields.find((field) => field.fieldNumber === fieldNumber && typeof field.value === "number")?.value; + return typeof value === "number" && Number.isFinite(value) ? value : undefined; +} + function collectCursorRequestContextCandidates(values: readonly unknown[]): RequestContextCandidate[] { const candidates: RequestContextCandidate[] = []; @@ -807,6 +1391,10 @@ function parseCapturedBody(event: { if (framed !== undefined) { return framed; } + const proto = parseRawProtoBody(event.body_b64); + if (proto !== undefined) { + return proto; + } return { jsonLines: [] }; } return { text, jsonLines }; @@ -816,6 +1404,10 @@ function parseCapturedBody(event: { if (framed !== undefined) { return framed; } + const proto = parseRawProtoBody(event.body_b64); + if (proto !== undefined) { + return proto; + } const binaryText = extractUsefulPrintableStrings(event.body_b64); return { binaryText, jsonLines: [] }; } @@ -876,13 +1468,21 @@ function parseConnectEnvelope(value: string | undefined, direction: string | und const compressed = (flags & 1) === 1; const payload = decodeConnectPayload(buffer.subarray(offset, offset + length), compressed); offset += length; + const proto = decodeProtoMessage(payload, `$frame[${String(index)}]`); const text = decodePayloadText(payload); - const jsonValues = text === undefined ? [] : extractJsonValues(text); + const textJsonValues = text === undefined ? [] : extractJsonValues(text); + const protoJsonValues = proto === undefined ? [] : collectProtoJsonValues(proto); + const jsonValues = uniqueJsonValues([...textJsonValues, ...protoJsonValues]); if (text !== undefined && modelText === undefined) { modelText = modelIdFromText(text); } - const usefulText = text === undefined ? undefined : cursorSemanticTextFromDecoded(text, direction); - const proto = decodeProtoMessage(payload, `$frame[${String(index)}]`); + if (proto !== undefined && modelText === undefined) { + modelText = collectProtoTextFields(proto).find((field) => modelIdFromText(field.text) !== undefined)?.text; + } + const usefulText = joinOptionalText([ + text === undefined ? undefined : cursorSemanticTextFromDecoded(text, direction), + proto === undefined ? undefined : cursorSemanticTextFromProto(proto, direction), + ]); const frameEnd = offset; const frameBytes = buffer.subarray(frameOffset, frameEnd); frames.push({ @@ -915,6 +1515,68 @@ function parseConnectEnvelope(value: string | undefined, direction: string | und }; } +function parseRawProtoBody(value: string | undefined): ParsedBody | undefined { + if (value === undefined) { + return undefined; + } + let buffer: Buffer; + try { + buffer = Buffer.from(value, "base64"); + } catch { + return undefined; + } + const proto = decodeProtoMessage(buffer, "$protobuf"); + if (proto === undefined) { + return undefined; + } + const jsonLines = collectProtoJsonValues(proto); + const modelText = collectProtoTextFields(proto).find((field) => modelIdFromText(field.text) !== undefined)?.text; + const binaryTextParts = uniqueStrings(collectProtoTextFields(proto) + .map((field) => field.text) + .filter(isUsefulCursorSemanticText)); + return { + jsonLines, + ...(binaryTextParts.length === 0 ? {} : { binaryText: joinTextParts(binaryTextParts) }), + ...(modelText === undefined ? {} : { modelText }), + proto, + }; +} + +function collectProtoJsonValues(message: DecodedProtoMessage): unknown[] { + const values: unknown[] = []; + for (const field of collectProtoTextFields(message)) { + const trimmed = field.text.trim(); + const parsed = trimmed.startsWith("{") ? parseJson(trimmed) : { ok: false as const }; + if (parsed.ok) { + values.push(parsed.value); + continue; + } + if (/^data:\s*[{[]/m.test(trimmed)) { + values.push(...parseJsonLinesOrSse(trimmed)); + } + } + return values; +} + +function collectProtoTextFields(message: DecodedProtoMessage): { readonly path: string; readonly text: string }[] { + const fields: { readonly path: string; readonly text: string }[] = []; + const visit = (candidate: DecodedProtoMessage): void => { + for (const field of candidate.fields) { + if (field.text !== undefined) { + fields.push({ path: field.path, text: field.text }); + } + if (field.nested !== undefined) { + visit(field.nested); + } + if (field.hexDecoded !== undefined) { + visit(field.hexDecoded); + } + } + }; + visit(message); + return fields; +} + function decodeProtoMessage(buffer: Buffer, path: string, depth = 0): DecodedProtoMessage | undefined { if (buffer.length === 0 || depth > 8) { return undefined; @@ -975,6 +1637,8 @@ function decodeProtoMessage(buffer: Buffer, path: string, depth = 0): DecodedPro offset += byteLength; const text = protoString(bytes); const nested = decodeProtoMessage(bytes, fieldPath, depth + 1); + const hexBytes = protoHexBytes(bytes); + const hexDecoded = hexBytes === undefined ? undefined : decodeProtoMessage(hexBytes, `${fieldPath}[hex]`, depth + 1); const packedVarints = decodePackedProtoVarints(bytes); fields.push({ path: fieldPath, @@ -984,6 +1648,11 @@ function decodeProtoMessage(buffer: Buffer, path: string, depth = 0): DecodedPro ...(text === undefined ? {} : { text }), ...(packedVarints === undefined ? {} : { packedVarints }), ...(nested === undefined ? {} : { nested }), + ...(hexDecoded === undefined || hexBytes === undefined ? {} : { + hexDecoded, + hexByteLength: hexBytes.length, + hexSha256: sha256Buffer(hexBytes), + }), }); } else if (wireType === 5) { if (offset + 4 > buffer.length) { @@ -1039,7 +1708,7 @@ function protoNumber(value: bigint): number | string { } function protoString(buffer: Buffer): string | undefined { - if (buffer.length === 0 || buffer.length > 64_000) { + if (buffer.length === 0 || buffer.length > 128_000) { return undefined; } const text = buffer.toString("utf8"); @@ -1053,7 +1722,22 @@ function protoString(buffer: Buffer): string | undefined { if (controlCount > Math.max(1, text.length * 0.05)) { return undefined; } - return text.length === 0 ? undefined : truncateText(text, 4_000); + return text.length === 0 ? undefined : text; +} + +function protoHexBytes(buffer: Buffer): Buffer | undefined { + if (buffer.length < 16 || buffer.length % 2 !== 0) { + return undefined; + } + for (const byte of buffer) { + const isDigit = byte >= 0x30 && byte <= 0x39; + const isLowerHex = byte >= 0x61 && byte <= 0x66; + const isUpperHex = byte >= 0x41 && byte <= 0x46; + if (!isDigit && !isLowerHex && !isUpperHex) { + return undefined; + } + } + return Buffer.from(buffer.toString("ascii"), "hex"); } function decodePackedProtoVarints(buffer: Buffer): number[] | undefined { @@ -1093,7 +1777,21 @@ function decodePayloadText(payload: Buffer): string | undefined { if (!text.includes("\uFFFD")) { return text; } - return payload.toString("latin1"); + return undefined; +} + +function uniqueJsonValues(values: readonly unknown[]): unknown[] { + const seen = new Set(); + const unique: unknown[] = []; + for (const value of values) { + const key = canonicalJson(value); + if (seen.has(key)) { + continue; + } + seen.add(key); + unique.push(value); + } + return unique; } function extractJsonValues(text: string): unknown[] { @@ -1196,6 +1894,7 @@ function streamEvents(callId: string, observedAt: number, body: ParsedBody): Str } function connectFramePayload(frame: ParsedConnectFrame): unknown { + const protoSources = frame.proto === undefined ? [] : [{ source: "connect-frame" as const, message: frame.proto, frameIndex: frame.index }]; return { format: "connect", frameIndex: frame.index, @@ -1210,6 +1909,7 @@ function connectFramePayload(frame: ParsedConnectFrame): unknown { ...(frame.jsonValues.length === 0 ? {} : { jsonValues: frame.jsonValues }), ...(frame.proto === undefined ? {} : { protobuf: protoPayload(frame.proto) }), ...(frame.proto === undefined ? {} : optionalCursorWireUsage(frame.proto)), + ...(protoSources.length === 0 ? {} : optionalCursorToolCapabilities(protoSources)), }; } @@ -1218,6 +1918,11 @@ function optionalCursorWireUsage(message: DecodedProtoMessage): { readonly curso return usage.length === 0 ? {} : { cursorUsage: usage }; } +function optionalCursorToolCapabilities(sources: readonly ProtoSource[]): { readonly cursorToolCapabilities: readonly CursorToolCapability[] } | Record { + const capabilities = collectCursorToolCapabilities(sources); + return capabilities.length === 0 ? {} : { cursorToolCapabilities: capabilities }; +} + function protoPayload(message: DecodedProtoMessage): unknown { return { format: "protobuf", @@ -1232,6 +1937,11 @@ function protoPayload(message: DecodedProtoMessage): unknown { ...(field.packedVarints === undefined ? {} : { packedVarints: field.packedVarints }), ...(field.byteLength === undefined ? {} : { byteLength: field.byteLength }), ...(field.nested === undefined ? {} : { nested: protoPayload(field.nested) }), + ...(field.hexDecoded === undefined ? {} : { + hexDecoded: protoPayload(field.hexDecoded), + hexByteLength: field.hexByteLength, + hexSha256: field.hexSha256, + }), })), }; } @@ -1316,8 +2026,8 @@ function collectCursorWireUsageCandidates(message: DecodedProtoMessage): CursorW candidates.push(direct); } for (const field of message.fields) { - if (field.nested !== undefined) { - candidates.push(...collectCursorWireUsageCandidates(field.nested)); + for (const child of nestedProtoMessagesFromField(field)) { + candidates.push(...collectCursorWireUsageCandidates(child)); } } return candidates; @@ -1439,7 +2149,7 @@ function usageRecordFromUsage(callId: string, index: number, usage: Record & Partial>): string | undefined { +function cursorModelId(existing: string | undefined, body: Pick & Partial>): string | undefined { if (existing !== undefined && existing !== "cursor" && existing !== "unknown") { return existing; } @@ -1449,6 +2159,12 @@ function cursorModelId(existing: string | undefined, body: Pick 0) { return body.jsonLines; } + if (body.proto !== undefined) { + return protoPayload(body.proto); + } + if (body.connectFrames !== undefined) { + return body.connectFrames.map((frame) => ({ + index: frame.index, + payloadSha256: frame.payloadSha256, + })); + } return body.text ?? body.binaryText ?? ""; } @@ -1591,6 +2316,34 @@ function cursorSemanticTextFromDecoded(text: string, direction: string | undefin return usefulPrintableTextFromDecoded(text); } +function cursorSemanticTextFromProto(message: DecodedProtoMessage, direction: string | undefined): string | undefined { + const exactFieldText = collectProtoTextFields(message) + .flatMap((field) => { + const trimmed = field.text.trim(); + const direct = trimmed.startsWith("{") || /^data:\s*[{[]/m.test(trimmed) + ? [] + : [field.text].filter(isUsefulCursorSemanticText); + const jsonFields = extractJsonStringFields(field.text, ["text", "result"]) + .filter(isUsefulCursorSemanticText); + return [...direct, ...jsonFields]; + }); + if (exactFieldText.length > 0) { + return uniqueStrings(exactFieldText).join("\n"); + } + if (direction === "response") { + return undefined; + } + return undefined; +} + +function joinOptionalText(values: readonly (string | undefined)[]): string | undefined { + const parts = uniqueStrings(values + .filter((value): value is string => value !== undefined) + .map((value) => value.trim()) + .filter((value) => value.length > 0)); + return parts.length === 0 ? undefined : parts.join("\n"); +} + function extractJsonStringFields(text: string, keys: readonly string[]): string[] { const keyPattern = keys.map(escapeRegExp).join("|"); const pattern = new RegExp(`"(?:${keyPattern})"\\s*:\\s*"((?:\\\\.|[^"\\\\])*)"`, "g"); @@ -1657,7 +2410,9 @@ function uniqueStrings(values: readonly string[]): string[] { } function createBlock(input: BlockInput): NormalizedBlock { - const material = input.json === undefined ? input.text ?? "" : canonicalJson(input.json); + const material = input.text ?? (input.json === undefined ? "" : canonicalJson(input.json)); + const charSize = input.charSize ?? material.length; + const byteSize = input.byteSize ?? Buffer.byteLength(material, "utf8"); return { id: stableId("block", input.callId, input.direction, String(input.ordinal)), call_id: input.callId, @@ -1665,8 +2420,8 @@ function createBlock(input: BlockInput): NormalizedBlock { ordinal: input.ordinal, kind: input.kind, source: input.source, - char_size: material.length, - byte_size: Buffer.byteLength(material, "utf8"), + char_size: charSize, + byte_size: byteSize, content_hash: sha256(material), cache_marker: false, ...(input.role === undefined ? {} : { role: input.role }), diff --git a/src/spy/service.test.ts b/src/spy/service.test.ts index 874bde3..11313fb 100644 --- a/src/spy/service.test.ts +++ b/src/spy/service.test.ts @@ -124,35 +124,35 @@ function writeSpoolEvents(spoolDir: string, events: readonly SpoolEvent[]): void }); } -function cursorRequest(flowId: string): SpoolRequestEvent { +function cursorRequest(flowId: string, operation = "StreamUnifiedChat"): SpoolRequestEvent { return SpoolRequestEventSchema.parse({ version: 1, ts: 1779497300, direction: "request", flow_id: flowId, provider: "cursor", - operation: "StreamUnifiedChat", + operation, model_id: "Composer 2.5", host: "api2.cursor.sh", method: "POST", - path: "/aiserver.v1.AiService/StreamUnifiedChat", + path: `/aiserver.v1.AiService/${operation}`, headers: [["content-type", "application/json"]], body_text: JSON.stringify({ model: "Composer 2.5", prompt: "RCSPY-CURSOR-SERVICE" }), }); } -function cursorResponse(flowId: string): SpoolResponseEvent { +function cursorResponse(flowId: string, operation = "StreamUnifiedChat"): SpoolResponseEvent { return SpoolResponseEventSchema.parse({ version: 1, ts: 1779497301, direction: "response", flow_id: flowId, provider: "cursor", - operation: "StreamUnifiedChat", + operation, model_id: "Composer 2.5", host: "api2.cursor.sh", method: "POST", - path: "/aiserver.v1.AiService/StreamUnifiedChat", + path: `/aiserver.v1.AiService/${operation}`, headers: [["content-type", "application/json"]], status_code: 200, reason: "OK", @@ -263,10 +263,27 @@ describe("spy web service", () => { handle.store.persistRequest(cursorRequest("fixture-cursor-service")); expect(handle.store.persistResponse(cursorResponse("fixture-cursor-service"))).toBe(true); + handle.store.persistRequest(cursorRequest("fixture-cursor-support", "BidiAppend")); + expect(handle.store.persistResponse(cursorResponse("fixture-cursor-support", "BidiAppend"))).toBe(true); const cursorCallsResponse = await fetch(`${handle.url}/api/calls?provider=cursor&model_id=${encodeURIComponent("Composer 2.5")}&status=complete`); const cursorCalls = await jsonAs(cursorCallsResponse, SpyCallSummaryPageSchema); - expect(cursorCalls.items).toHaveLength(1); + expect(cursorCalls.items).toHaveLength(2); expect(cursorCalls.items[0]?.call.provider).toBe("cursor"); + const conversationCursorCalls = await jsonAs( + await fetch(`${handle.url}/api/calls?provider=cursor&traffic=conversation`), + SpyCallSummaryPageSchema, + ); + expect(conversationCursorCalls.items.map((item) => item.call.operation)).toEqual(["StreamUnifiedChat"]); + const allCursorCalls = await jsonAs( + await fetch(`${handle.url}/api/calls?provider=cursor&traffic=all`), + SpyCallSummaryPageSchema, + ); + expect(allCursorCalls.items.map((item) => item.call.operation)).toContain("BidiAppend"); + const explicitSupportOperation = await jsonAs( + await fetch(`${handle.url}/api/calls?provider=cursor&traffic=conversation&operation=BidiAppend`), + SpyCallSummaryPageSchema, + ); + expect(explicitSupportOperation.items.map((item) => item.call.operation)).toEqual(["BidiAppend"]); const filteredSearchResponse = await fetch(`${handle.url}/api/search?q=${encodeURIComponent("Fixture capture")}&since=1779496808&provider=bedrock&model_id=${encodeURIComponent("us.anthropic.claude-sonnet-4-6")}&operation=converse-stream&status=complete&limit=1`); const filteredSearch = await jsonAs(filteredSearchResponse, SpyCallSummaryPageSchema); diff --git a/src/spy/service.ts b/src/spy/service.ts index aaa887b..d845f03 100644 --- a/src/spy/service.ts +++ b/src/spy/service.ts @@ -24,6 +24,7 @@ import { type SpyStore, type SpyStoreOptions, type SpyStreamEventsOptions, + type SpyTrafficScope, } from "./store.ts"; import { ProviderCallStatusSchema, ProviderIdSchema, type ProviderId } from "./schemas.ts"; import { unavailableTokenRecord } from "./tokens.ts"; @@ -582,16 +583,18 @@ function searchOptions(url: URL): SpySearchCallsOptions { }; } -function callFilters(url: URL): Pick { +function callFilters(url: URL): Pick { const provider = providerParam(url); const modelId = stringParam(url, "model_id"); const operation = stringParam(url, "operation"); const status = statusParam(url); + const traffic = trafficParam(url); return { ...(provider === undefined ? {} : { provider }), ...(modelId === undefined ? {} : { modelId }), ...(operation === undefined ? {} : { operation }), ...(status === undefined ? {} : { status }), + ...(traffic === undefined ? {} : { traffic }), }; } @@ -619,6 +622,17 @@ function statusParam(url: URL): SpyListCallsOptions["status"] { return parsed.data; } +function trafficParam(url: URL): SpyTrafficScope | undefined { + const value = stringParam(url, "traffic"); + if (value === undefined) { + return undefined; + } + if (value === "conversation" || value === "all") { + return value; + } + throw new HttpError(400, "invalid traffic scope"); +} + function streamOptions(url: URL): SpyStreamEventsOptions { return { ...(stringParam(url, "cursor") === undefined ? {} : { cursor: stringParam(url, "cursor") }), diff --git a/src/spy/store.test.ts b/src/spy/store.test.ts index aa12fe7..981e84a 100644 --- a/src/spy/store.test.ts +++ b/src/spy/store.test.ts @@ -241,6 +241,19 @@ function protoMessageField(fieldNumber: number, message: Buffer): Buffer { return Buffer.concat([protoVarint(fieldNumber * 8 + 2), protoVarint(message.length), message]); } +function protoStringField(fieldNumber: number, value: string): Buffer { + return protoMessageField(fieldNumber, Buffer.from(value, "utf8")); +} + +function cursorContextSectionMetadata(key: string, label: string, startOffset: number, size: number): Buffer { + return Buffer.concat([ + protoStringField(1, key), + protoStringField(2, label), + protoVarintField(3, startOffset), + protoVarintField(4, size), + ]); +} + function responseVariant( event: SpoolResponseEvent, overrides: Partial>, @@ -412,6 +425,46 @@ describe("spy SQLite store", () => { } }); + test("includes Cursor protobuf context section metadata in request composition", () => { + const { store } = createTestStore(); + try { + const flowId = "fixture-cursor-context-section-metadata"; + const request = syntheticCursorRequest(flowId, 3060, { + model: "Composer 2.5", + prompt: "Cursor context metadata prompt", + }); + const sectionEnvelope = Buffer.concat([ + protoMessageField(3, cursorContextSectionMetadata("tools", "Tool definitions", 5_884, 24_509)), + protoMessageField(3, cursorContextSectionMetadata("conversation", "Conversation", 1_029, 3_083)), + ]); + const response = SpoolResponseEventSchema.parse({ + ...syntheticCursorResponse(flowId, 3061, {}), + headers: [["content-type", "application/connect+proto"]], + body_text: undefined, + body_b64: connectFrame(sectionEnvelope).toString("base64"), + }); + + store.persistRequest(request); + expect(store.persistResponse(response)).toBe(true); + + const detail = requiredDetail(store, cursorCallIdForFlow(flowId)); + expect(compositionSection(detail, "tool-definition")).toMatchObject({ + present: true, + blockCount: 1, + byteSize: 24_509, + }); + expect(compositionSection(detail, "prior-conversation-history")).toMatchObject({ + present: true, + blockCount: 1, + byteSize: 3_083, + }); + expect(detail.summary.requestByteSize).toBeGreaterThan(24_509 + 3_083); + expect(detail.blocks.find((block) => block.kind === "tool-definition")?.source).toBe("cursor-response-context-metadata"); + } finally { + store.close(); + } + }); + test("reassembles Cursor stream chunks into raw wire events and derived usage when raw payload storage is off", () => { const { dbPath, spoolDir, store } = createTestStore(); try { diff --git a/src/spy/store.ts b/src/spy/store.ts index 8a0bf4e..1ee9338 100644 --- a/src/spy/store.ts +++ b/src/spy/store.ts @@ -45,6 +45,12 @@ const DEFAULT_RETENTION_DAYS = 7; const DEFAULT_MAX_BYTES = 6 * 1024 * 1024 * 1024; const DEFAULT_QUERY_LIMIT = 100; const MAX_QUERY_LIMIT = 500; +const CURSOR_CONVERSATION_OPERATIONS = [ + "Run", + "RunSSE", + "StreamUnifiedChat", + "StreamUnifiedChatWithTools", +] as const; const REQUEST_COMPOSITION_SECTION_ORDER: readonly NormalizedBlock["kind"][] = [ "provider-envelope", "harness-system-context", @@ -167,6 +173,7 @@ export interface SpyProviderCallFilters { readonly modelId?: string | undefined; readonly operation?: string | undefined; readonly status?: ProviderCall["status"] | undefined; + readonly traffic?: SpyTrafficScope | undefined; } export interface SpyListCallsOptions extends SpyProviderCallFilters { @@ -174,6 +181,8 @@ export interface SpyListCallsOptions extends SpyProviderCallFilters { readonly limit?: number | undefined; } +export type SpyTrafficScope = "conversation" | "all"; + export interface SpySearchCallsOptions extends SpyProviderCallFilters { readonly query: string; readonly cursor?: string | undefined; @@ -1983,6 +1992,14 @@ function appendProviderCallFilters( conditions.push(`${column("status")} = ?`); params.push(options.status); } + if (options.traffic === "conversation" && options.operation === undefined) { + conditions.push(`(${column("provider")} != ? OR ${column("operation")} IN (${placeholders(CURSOR_CONVERSATION_OPERATIONS.length)}))`); + params.push("cursor", ...CURSOR_CONVERSATION_OPERATIONS); + } +} + +function placeholders(count: number): string { + return Array.from({ length: count }, () => "?").join(", "); } function blockSignature(block: NormalizedBlock): string { @@ -1997,7 +2014,8 @@ function blockSignature(block: NormalizedBlock): string { } function isResponseDerivedRequestBlock(block: NormalizedBlock): boolean { - return block.source === "cursor-response-request-context"; + return block.source === "cursor-response-request-context" + || block.source === "cursor-response-context-metadata"; } function httpEventFromRequest(event: SpoolRequestEvent, callId: string): HttpEventRecord { diff --git a/src/spy/ui/src/App.tsx b/src/spy/ui/src/App.tsx index 357ae1b..8cdccb2 100644 --- a/src/spy/ui/src/App.tsx +++ b/src/spy/ui/src/App.tsx @@ -60,6 +60,7 @@ import type { const api = new SpyApiClient(); const CALL_LIMIT = 100; const ALL_FILTER = "all"; +const DEFAULT_TRAFFIC_SCOPE = "conversation"; const TIMELINE_ROW_ESTIMATE = 138; const BLOCK_LIST_VIRTUALIZE_MIN_ITEMS = 24; const BLOCK_ROW_ESTIMATE = 230; @@ -94,6 +95,15 @@ const OPERATION_OPTIONS = [ { value: "Run", label: "Cursor Run" }, { value: "RunSSE", label: "Cursor Run SSE" }, { value: "StreamUnifiedChat", label: "Cursor Unified Chat" }, + { value: "StreamUnifiedChatWithTools", label: "Cursor Chat Tools" }, + { value: "BidiAppend", label: "Cursor Bidi Append" }, + { value: "SubmitLogs", label: "Cursor Logs" }, + { value: "TrackEvents", label: "Cursor Events" }, + { value: "traces", label: "Cursor Traces" }, + { value: "GetUserPrivacyMode", label: "Cursor Privacy" }, + { value: "GetServerConfig", label: "Cursor Config" }, + { value: "GetManagedSkills", label: "Cursor Skills" }, + { value: "AvailableModels", label: "Cursor Models" }, { value: "invoke", label: "Invoke" }, { value: "invoke-with-response-stream", label: "Invoke Stream" }, { value: "converse", label: "Converse" }, @@ -164,6 +174,7 @@ export function App(): React.ReactElement { operation: ALL_FILTER, status: ALL_FILTER, blockKind: ALL_FILTER, + traffic: DEFAULT_TRAFFIC_SCOPE, }); const [calls, setCalls] = React.useState([]); const [nextCursor, setNextCursor] = React.useState(); @@ -187,7 +198,8 @@ export function App(): React.ReactElement { filters.model, filters.operation, filters.status, - ].join("|"), [filters.model, filters.operation, filters.provider, filters.status, search, since]); + filters.traffic, + ].join("|"), [filters.model, filters.operation, filters.provider, filters.status, filters.traffic, search, since]); const previousTimelineContextKey = React.useRef(null); const previousSelectedCallId = React.useRef(undefined); @@ -203,6 +215,7 @@ export function App(): React.ReactElement { modelId: filterQueryValue(filters.model), operation: filterQueryValue(filters.operation), status: filterQueryValue(filters.status), + traffic: filters.traffic, ...(options.cursor === undefined ? {} : { cursor: options.cursor }), }); setCalls((current) => options.append === true ? [...current, ...page.items] : page.items); @@ -221,7 +234,7 @@ export function App(): React.ReactElement { setCallState("error"); setCallError(error instanceof Error ? error.message : "failed to load calls"); } - }, [filters.model, filters.operation, filters.provider, filters.status, preset, search, since]); + }, [filters.model, filters.operation, filters.provider, filters.status, filters.traffic, preset, search, since]); React.useEffect(() => { void loadCalls(); @@ -793,6 +806,16 @@ function TimelineControls(props: { +