From 14202a9bb9d0588bb3c82e78ce04d95a3b2b511a Mon Sep 17 00:00:00 2001 From: Ben Date: Thu, 7 May 2026 14:38:08 +0100 Subject: [PATCH 01/10] feat: route default deepclaude through the model proxy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit launch_claude now starts proxy/start-proxy.js and points ANTHROPIC_BASE_URL at the local proxy. Previously only --remote did this. Without it, plain deepclaude pointed Claude Code straight at the backend URL, bypassing the proxy entirely (which means /_proxy/cost always reported zero and any proxy-side feature couldn't fire). start_proxy is a shared helper that sets PROXY_PID/PROXY_PORT/PROXY_LOG as script globals; must be called WITHOUT command substitution because the EXIT trap depends on PROXY_PID reaching the parent shell. SCRIPT_DIR is symlink-resolved so deepclaude works when installed via a ~/.local/bin symlink. The exec on \`claude\` is dropped so the EXIT trap fires and the node child is cleaned up. ANTHROPIC_AUTH_TOKEN is left untouched — whatever the user has in their environment flows through. start-proxy.js legacy mode accepts an optional [defaultMode] third arg so state.mode resolves to e.g. \`deepseek\` rather than \`_single\` and MODEL_REMAP[state.mode] fires. Co-Authored-By: Claude Opus 4.7 (1M context) --- deepclaude.sh | 144 +++++++++++++++++++++++++++++++++++++------ proxy/start-proxy.js | 5 +- 2 files changed, 127 insertions(+), 22 deletions(-) diff --git a/deepclaude.sh b/deepclaude.sh index 5f59e3a..7962b0b 100644 --- a/deepclaude.sh +++ b/deepclaude.sh @@ -4,7 +4,17 @@ set -euo pipefail -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# Resolve SCRIPT_DIR through any symlink chain (e.g. /usr/local/bin/deepclaude +# -> /path/to/repo/deepclaude.sh) so $SCRIPT_DIR/proxy/... works regardless of +# how the script was invoked. +_source="${BASH_SOURCE[0]}" +while [ -L "$_source" ]; do + _dir="$(cd "$(dirname "$_source")" && pwd)" + _source="$(readlink "$_source")" + [[ "$_source" != /* ]] && _source="$_dir/$_source" +done +SCRIPT_DIR="$(cd "$(dirname "$_source")" && pwd)" +unset _source _dir # --- Config --- DEEPSEEK_URL="https://api.deepseek.com/anthropic" @@ -19,14 +29,15 @@ PROXY_PID="" # --- Parse args --- while [[ $# -gt 0 ]]; do case "$1" in - --backend|-b) BACKEND="$2"; shift 2 ;; - --switch|-s) ACTION="switch"; SWITCH_BACKEND="$2"; shift 2 ;; - --remote|-r) ACTION="remote"; shift ;; - --status) ACTION="status"; shift ;; - --cost) ACTION="cost"; shift ;; - --benchmark) ACTION="benchmark"; shift ;; - --help|-h) ACTION="help"; shift ;; - *) break ;; + --backend|-b) BACKEND="$2"; shift 2 ;; + --switch|-s) ACTION="switch"; SWITCH_BACKEND="$2"; shift 2 ;; + --remote|-r) ACTION="remote"; shift ;; + --status) ACTION="status"; shift ;; + --cost) ACTION="cost"; shift ;; + --benchmark) ACTION="benchmark"; shift ;; + --install-statusline) ACTION="install-statusline"; shift ;; + --help|-h) ACTION="help"; shift ;; + *) break ;; esac done @@ -85,6 +96,60 @@ set_model_env() { export CLAUDE_CODE_EFFORT_LEVEL="max" } +backend_long_name() { + case "$1" in + ds|deepseek) echo "deepseek" ;; + or|openrouter) echo "openrouter" ;; + fw|fireworks) echo "fireworks" ;; + anthropic) echo "anthropic" ;; + *) echo "ERROR: Unknown backend '$1'. Use: ds, or, fw, anthropic" >&2; return 1 ;; + esac +} + +# Sets PROXY_PID, PROXY_PORT, PROXY_LOG as script globals so the EXIT trap +# can clean up the node child. Must be called WITHOUT command substitution +# — $(start_proxy) runs in a subshell and globals never reach the parent. +# Requires: RESOLVED_URL, RESOLVED_KEY, BACKEND already set. +start_proxy() { + local backend_long + backend_long=$(backend_long_name "$BACKEND") || exit 1 + + PROXY_LOG="${PROXY_LOG:-/tmp/deepclaude-proxy.$$.log}" + : > "$PROXY_LOG" + node "$SCRIPT_DIR/proxy/start-proxy.js" "$RESOLVED_URL" "$RESOLVED_KEY" "$backend_long" >> "$PROXY_LOG" 2>&1 & + PROXY_PID=$! + + # The proxy emits a banner line, then a bare-numeric port line on a + # successful bind. Match the bare integer to skip the banner; do not + # introduce other numeric-only stdout in proxy startup. + local proxy_port="" + local tries=0 + while [[ -z "$proxy_port" ]] && [[ $tries -lt 30 ]]; do + if kill -0 "$PROXY_PID" 2>/dev/null; then + # `|| true`: with `set -o pipefail`, grep no-match (exit 1) + # would otherwise exit the script; we expect zero matches on + # early iterations before the proxy has emitted its port. + proxy_port=$(grep -E '^[0-9]+$' "$PROXY_LOG" 2>/dev/null | head -1 || true) + else + echo "ERROR: Proxy process died during startup" >&2 + echo " Log: $PROXY_LOG" >&2 + tail -20 "$PROXY_LOG" >&2 2>/dev/null + exit 1 + fi + [[ -z "$proxy_port" ]] && sleep 0.2 + tries=$((tries + 1)) + done + + if [[ -z "$proxy_port" ]]; then + echo "ERROR: Proxy failed to report a port within 6s" >&2 + echo " Log: $PROXY_LOG" >&2 + tail -20 "$PROXY_LOG" >&2 2>/dev/null + exit 1 + fi + + PROXY_PORT="$proxy_port" +} + show_status() { echo "" echo " deepclaude — Backend Status" @@ -142,6 +207,8 @@ show_help() { echo " --cost Pricing comparison" echo " --benchmark Latency test" echo " -s, --switch Switch proxy mid-session" + echo " --install-statusline Add Claude Code statusLine showing" + echo " routing + cumulative cost (requires jq)" echo " -h, --help This help" echo "" echo "Environment variables:" @@ -151,6 +218,39 @@ show_help() { echo " CHEAPCLAUDE_DEFAULT_BACKEND Default backend (default: ds)" } +do_install_statusline() { + if ! command -v jq >/dev/null 2>&1; then + echo "ERROR: jq is required to merge ~/.claude/settings.json" >&2 + echo " Install with: brew install jq (or your platform equivalent)" >&2 + exit 1 + fi + + local script_path="$SCRIPT_DIR/bin/deepclaude-statusline" + if [[ ! -x "$script_path" ]]; then + echo "ERROR: $script_path not found or not executable" >&2 + exit 1 + fi + + local settings_dir="$HOME/.claude" + local settings_file="$settings_dir/settings.json" + mkdir -p "$settings_dir" + if [[ ! -f "$settings_file" ]]; then + echo '{}' > "$settings_file" + fi + + local tmp + tmp=$(mktemp) + # Merge into existing settings.json (preserves any other keys the user + # has set, e.g. permissions, hooks). + jq --arg cmd "$script_path" \ + '. + {statusLine: {type: "command", command: $cmd}}' \ + "$settings_file" > "$tmp" + mv "$tmp" "$settings_file" + + echo " Installed statusLine: $script_path" + echo " ~/.claude/settings.json updated. Restart Claude Code to see the new line." +} + do_switch() { local backend="$SWITCH_BACKEND" case "$backend" in @@ -207,17 +307,20 @@ launch_claude() { resolve_backend + echo " Starting model proxy for $BACKEND..." + start_proxy + echo " Proxy log: $PROXY_LOG" + echo " Launching Claude Code via $BACKEND..." - echo " Endpoint: $RESOLVED_URL" + echo " Proxy on :$PROXY_PORT -> $RESOLVED_URL" echo " Model: $RESOLVED_OPUS (main) + $RESOLVED_HAIKU (subagents)" echo "" - export ANTHROPIC_BASE_URL="$RESOLVED_URL" - export ANTHROPIC_AUTH_TOKEN="$RESOLVED_KEY" + export ANTHROPIC_BASE_URL="http://127.0.0.1:$PROXY_PORT" set_model_env - unset ANTHROPIC_API_KEY - exec claude "$@" + # Don't `exec` — the EXIT trap needs to fire to stop the proxy. + claude "$@" } launch_remote() { @@ -268,11 +371,12 @@ launch_remote() { # --- Main --- case "$ACTION" in - status) show_status ;; - cost) show_cost ;; - benchmark) run_benchmark ;; - help) show_help ;; - switch) do_switch ;; - remote) launch_remote "$@" ;; + status) show_status ;; + cost) show_cost ;; + benchmark) run_benchmark ;; + help) show_help ;; + switch) do_switch ;; + install-statusline) do_install_statusline ;; + remote) launch_remote "$@" ;; launch) launch_claude "$@" ;; esac diff --git a/proxy/start-proxy.js b/proxy/start-proxy.js index 5847076..cb57f29 100644 --- a/proxy/start-proxy.js +++ b/proxy/start-proxy.js @@ -7,9 +7,10 @@ const BACKEND_DEFS = { fireworks: { url: 'https://api.fireworks.ai/inference/v1', keyEnv: 'FIREWORKS_API_KEY' }, }; -// Legacy mode: start-proxy.js (used by deepclaude.sh/ps1) +// Legacy mode: start-proxy.js [defaultMode] (used by deepclaude.sh/ps1) const targetUrl = process.argv[2] || process.env.CHEAPCLAUDE_TARGET_URL; const apiKey = process.argv[3] || process.env.CHEAPCLAUDE_API_KEY; +const legacyDefaultMode = process.argv[4] || process.env.CHEAPCLAUDE_DEFAULT_MODE; if (targetUrl && apiKey) { // Legacy single-backend mode @@ -24,7 +25,7 @@ if (targetUrl && apiKey) { targetUrl, apiKey, backends: hasBackends ? backends : undefined, - defaultMode: hasBackends ? undefined : undefined, + defaultMode: legacyDefaultMode || undefined, }); console.log(port); } else { From a4a1e1923564fa1689b782c556c54156466fa78f Mon Sep 17 00:00:00 2001 From: Ben Date: Thu, 7 May 2026 14:38:42 +0100 Subject: [PATCH 02/10] feat: cost-statusline showing routing + cumulative cost MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A Claude Code statusLine integration that surfaces the actual backend routing and accumulated cost in the bottom bar — closes the loop on the TUI welcome chip lying about the model under --auto, and gives default mode a live token/cost readout that previously only showed via \`curl /_proxy/cost\`. Output looks like: [claude-opus-4-7 → deepseek-v4-pro on api.deepseek.com] · 12.3K tokens · $0.04 When the env var name and the wire-side name match (default mode, no --auto), the arrow is dropped: [deepseek-v4-pro on api.deepseek.com] · 12.3K tokens · $0.04 Components: - proxy/model-proxy.js tracks state.lastRequest = { client_model, wire_model, destination, timestamp } after each /v1/messages remap and exposes it via /_proxy/status alongside backend_host. The status line script polls this once per render. - bin/deepclaude-statusline reads Claude Code's status JSON from stdin, curls the proxy for status + cost, formats the line, prints. Graceful fallback when the proxy isn't reachable. Requires jq. - \`deepclaude --install-statusline\` merges the statusLine entry into ~/.claude/settings.json idempotently (uses jq's '. + {}' so existing keys like permissions or hooks are preserved). Documented in --help. Co-Authored-By: Claude Opus 4.7 (1M context) --- bin/deepclaude-statusline | 78 +++++++++++++++++++++++++++++++++++++++ proxy/model-proxy.js | 35 ++++++++++++++---- 2 files changed, 106 insertions(+), 7 deletions(-) create mode 100755 bin/deepclaude-statusline diff --git a/bin/deepclaude-statusline b/bin/deepclaude-statusline new file mode 100755 index 0000000..3e52436 --- /dev/null +++ b/bin/deepclaude-statusline @@ -0,0 +1,78 @@ +#!/usr/bin/env bash +# Claude Code statusLine for deepclaude. Reads Claude Code's status JSON +# from stdin, polls the local proxy for routing + cumulative cost, and +# emits a one-line summary like: +# +# [claude-opus-4-7 → deepseek-v4-pro on api.deepseek.com] · 12.3K tokens · $0.04 +# +# Install via `deepclaude --install-statusline` or by adding to +# ~/.claude/settings.json: +# +# { "statusLine": { "type": "command", "command": "/abs/path/to/deepclaude-statusline" } } +# +# Requires jq. + +set -uo pipefail + +PROXY_PORT="${DEEPCLAUDE_PROXY_PORT:-3200}" +TIMEOUT="${DEEPCLAUDE_STATUSLINE_TIMEOUT:-1}" + +# Claude Code passes JSON on stdin. Best-effort read; falls through if +# stdin is empty or not JSON. +input="" +if [[ -t 0 ]]; then + input="{}" +else + input=$(cat 2>/dev/null || echo "{}") +fi + +if ! command -v jq >/dev/null 2>&1; then + echo "[deepclaude statusline: jq not installed]" + exit 0 +fi + +claude_model=$(echo "$input" | jq -r '.model.id // .model.display_name // empty' 2>/dev/null) + +status=$(curl -s --max-time "$TIMEOUT" "http://127.0.0.1:${PROXY_PORT}/_proxy/status" 2>/dev/null || echo "{}") +cost=$(curl -s --max-time "$TIMEOUT" "http://127.0.0.1:${PROXY_PORT}/_proxy/cost" 2>/dev/null || echo "{}") + +if [[ "$status" == "{}" ]]; then + echo "[deepclaude proxy not reachable on :${PROXY_PORT}]" + exit 0 +fi + +backend_host=$(echo "$status" | jq -r '.backend_host // "?"') +last_client=$(echo "$status" | jq -r '.last_request.client_model // empty') +last_wire=$(echo "$status" | jq -r '.last_request.wire_model // empty') +last_dest=$(echo "$status" | jq -r '.last_request.destination // empty') + +# Sum tokens across all backend buckets; fall back to 0 if cost endpoint +# returned nothing. +total_tokens=$(echo "$cost" | jq '[.backends // {} | to_entries[] | (.value.input_tokens + .value.output_tokens)] | add // 0' 2>/dev/null) +total_cost=$(echo "$cost" | jq -r '.total_cost // 0' 2>/dev/null) + +# 12345 → 12.3K, 1234567 → 1.2M +fmt_tokens() { + awk -v t="$1" 'BEGIN { + if (t >= 1000000) printf "%.1fM", t/1000000; + else if (t >= 1000) printf "%.1fK", t/1000; + else printf "%d", t; + }' +} +tokens_fmt=$(fmt_tokens "$total_tokens") +cost_fmt=$(awk -v c="$total_cost" 'BEGIN { printf "$%.2f", c }') + +# Compose the model display. Prefer last_request data (most accurate +# client → wire). Fall back to claude_model from stdin if proxy hasn't +# seen a request yet. +display_model="${last_client:-$claude_model}" +display_wire="${last_wire:-$display_model}" +display_dest="${last_dest:-$backend_host}" + +if [[ -n "$display_model" && "$display_model" != "$display_wire" ]]; then + echo "[$display_model → $display_wire on $display_dest] · $tokens_fmt tokens · $cost_fmt" +elif [[ -n "$display_model" ]]; then + echo "[$display_model on $display_dest] · $tokens_fmt tokens · $cost_fmt" +else + echo "[deepclaude on $display_dest] · $tokens_fmt tokens · $cost_fmt" +fi diff --git a/proxy/model-proxy.js b/proxy/model-proxy.js index 85a9295..9049922 100644 --- a/proxy/model-proxy.js +++ b/proxy/model-proxy.js @@ -146,6 +146,9 @@ export function startModelProxy({ targetUrl, apiKey, startPort = 3200, backends, apiKey: startBackend ? startBackend.apiKey : apiKey, useBearer: startBackend ? startBackend.useBearer : initialBearer, hadNonAnthropicSession: !!startBackend, + // Last /v1/messages we forwarded; surfaced via /_proxy/status so a + // statusLine integration can show client → wire mapping live. + lastRequest: null, }; let reqCount = 0; @@ -216,8 +219,10 @@ export function startModelProxy({ targetUrl, apiKey, startPort = 3200, backends, clientRes.writeHead(200, { 'content-type': 'application/json' }); clientRes.end(JSON.stringify({ mode: state.mode, + backend_host: state.target.hostname, uptime: Math.round((Date.now() - t0Global) / 1000), requests: reqCount, + last_request: state.lastRequest, })); return; } @@ -315,17 +320,33 @@ export function startModelProxy({ targetUrl, apiKey, startPort = 3200, backends, clientReq.on('end', () => { let body = Buffer.concat(chunks); - // Remap Anthropic model names to backend-specific names - if (isModelCall && MODEL_REMAP[state.mode]) { + // Remap Anthropic model names to backend-specific names, and + // capture client → wire mapping for /_proxy/status. + let clientModel = null; + let wireModel = null; + if (MODEL_PATHS.includes(urlPath)) { try { const parsed = JSON.parse(body); - const mapped = MODEL_REMAP[state.mode][parsed.model]; - if (mapped) { - console.log(`[MODEL-PROXY] #${reqId} model remap: ${parsed.model} → ${mapped}`); - parsed.model = mapped; - body = Buffer.from(JSON.stringify(parsed)); + clientModel = parsed.model || null; + wireModel = clientModel; + if (isModelCall && MODEL_REMAP[state.mode]) { + const mapped = MODEL_REMAP[state.mode][parsed.model]; + if (mapped) { + console.log(`[MODEL-PROXY] #${reqId} model remap: ${parsed.model} → ${mapped}`); + parsed.model = mapped; + wireModel = mapped; + body = Buffer.from(JSON.stringify(parsed)); + } } } catch { /* not JSON or parse error, pass through */ } + if (clientModel) { + state.lastRequest = { + client_model: clientModel, + wire_model: wireModel, + destination: dest.hostname, + timestamp: Date.now(), + }; + } } // Strip thinking blocks before forwarding. From 9339459706da9cd8b3d114b355d17c8a8424d3ff Mon Sep 17 00:00:00 2001 From: Ben Date: Thu, 7 May 2026 14:38:42 +0100 Subject: [PATCH 03/10] chore: mark deepclaude.sh executable Co-Authored-By: Claude Opus 4.7 (1M context) --- deepclaude.sh | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 deepclaude.sh diff --git a/deepclaude.sh b/deepclaude.sh old mode 100644 new mode 100755 From 23ad761c911e7c2d9bbb1e2f0a3b2acec118e0c9 Mon Sep 17 00:00:00 2001 From: Ben Date: Thu, 7 May 2026 14:44:53 +0100 Subject: [PATCH 04/10] feat: auto-install statusLine on launch instead of opt-in flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previous design required \`deepclaude --install-statusline\` before the status line would appear. Forgetting that step (or not knowing about it) left the bottom of the TUI empty — failing the cost-savings premise on its main UX surface. Now \`launch_claude\` and \`launch_remote\` call ensure_statusline_installed synchronously before \`claude\` starts. Behaviour: - If ~/.claude/settings.json has no statusLine: idempotently merge ours in (preserves all other keys), print a one-line install notice. - If statusLine is already configured (ours or someone else's): no-op. - If jq is not on PATH: silent skip — deepclaude still launches, just without the status line. Removes the explicit \`--install-statusline\` flag/action. There's no \"opt-out\" — if the user wants their own statusLine, they configure it themselves and ours respects their choice. Co-Authored-By: Claude Opus 4.7 (1M context) --- deepclaude.sh | 81 +++++++++++++++++++++++++-------------------------- 1 file changed, 39 insertions(+), 42 deletions(-) diff --git a/deepclaude.sh b/deepclaude.sh index 7962b0b..7f8c04b 100755 --- a/deepclaude.sh +++ b/deepclaude.sh @@ -29,15 +29,14 @@ PROXY_PID="" # --- Parse args --- while [[ $# -gt 0 ]]; do case "$1" in - --backend|-b) BACKEND="$2"; shift 2 ;; - --switch|-s) ACTION="switch"; SWITCH_BACKEND="$2"; shift 2 ;; - --remote|-r) ACTION="remote"; shift ;; - --status) ACTION="status"; shift ;; - --cost) ACTION="cost"; shift ;; - --benchmark) ACTION="benchmark"; shift ;; - --install-statusline) ACTION="install-statusline"; shift ;; - --help|-h) ACTION="help"; shift ;; - *) break ;; + --backend|-b) BACKEND="$2"; shift 2 ;; + --switch|-s) ACTION="switch"; SWITCH_BACKEND="$2"; shift 2 ;; + --remote|-r) ACTION="remote"; shift ;; + --status) ACTION="status"; shift ;; + --cost) ACTION="cost"; shift ;; + --benchmark) ACTION="benchmark"; shift ;; + --help|-h) ACTION="help"; shift ;; + *) break ;; esac done @@ -207,8 +206,6 @@ show_help() { echo " --cost Pricing comparison" echo " --benchmark Latency test" echo " -s, --switch Switch proxy mid-session" - echo " --install-statusline Add Claude Code statusLine showing" - echo " routing + cumulative cost (requires jq)" echo " -h, --help This help" echo "" echo "Environment variables:" @@ -218,37 +215,36 @@ show_help() { echo " CHEAPCLAUDE_DEFAULT_BACKEND Default backend (default: ds)" } -do_install_statusline() { - if ! command -v jq >/dev/null 2>&1; then - echo "ERROR: jq is required to merge ~/.claude/settings.json" >&2 - echo " Install with: brew install jq (or your platform equivalent)" >&2 - exit 1 - fi +# Auto-installs the deepclaude statusLine into ~/.claude/settings.json on +# every launch. No-op if the user already has a statusLine configured +# (either ours or their own custom command). Silent skip if jq isn't on +# PATH so deepclaude still launches without it. +ensure_statusline_installed() { + command -v jq >/dev/null 2>&1 || return 0 local script_path="$SCRIPT_DIR/bin/deepclaude-statusline" - if [[ ! -x "$script_path" ]]; then - echo "ERROR: $script_path not found or not executable" >&2 - exit 1 - fi + [[ -x "$script_path" ]] || return 0 local settings_dir="$HOME/.claude" local settings_file="$settings_dir/settings.json" mkdir -p "$settings_dir" - if [[ ! -f "$settings_file" ]]; then - echo '{}' > "$settings_file" + [[ -f "$settings_file" ]] || echo '{}' > "$settings_file" + + local existing + existing=$(jq -r '.statusLine.command // empty' "$settings_file" 2>/dev/null || echo "") + + if [[ -z "$existing" ]]; then + local tmp + tmp=$(mktemp) + if jq --arg cmd "$script_path" \ + '. + {statusLine: {type: "command", command: $cmd}}' \ + "$settings_file" > "$tmp"; then + mv "$tmp" "$settings_file" + echo " Installed deepclaude statusLine in $settings_file" + else + rm -f "$tmp" + fi fi - - local tmp - tmp=$(mktemp) - # Merge into existing settings.json (preserves any other keys the user - # has set, e.g. permissions, hooks). - jq --arg cmd "$script_path" \ - '. + {statusLine: {type: "command", command: $cmd}}' \ - "$settings_file" > "$tmp" - mv "$tmp" "$settings_file" - - echo " Installed statusLine: $script_path" - echo " ~/.claude/settings.json updated. Restart Claude Code to see the new line." } do_switch() { @@ -306,6 +302,7 @@ launch_claude() { fi resolve_backend + ensure_statusline_installed echo " Starting model proxy for $BACKEND..." start_proxy @@ -334,6 +331,7 @@ launch_remote() { fi resolve_backend + ensure_statusline_installed echo " Starting model proxy for $BACKEND..." @@ -371,12 +369,11 @@ launch_remote() { # --- Main --- case "$ACTION" in - status) show_status ;; - cost) show_cost ;; - benchmark) run_benchmark ;; - help) show_help ;; - switch) do_switch ;; - install-statusline) do_install_statusline ;; - remote) launch_remote "$@" ;; + status) show_status ;; + cost) show_cost ;; + benchmark) run_benchmark ;; + help) show_help ;; + switch) do_switch ;; + remote) launch_remote "$@" ;; launch) launch_claude "$@" ;; esac From cc2414281a88397678d17e3f82b796ea510e5870 Mon Sep 17 00:00:00 2001 From: Ben Date: Thu, 7 May 2026 14:49:51 +0100 Subject: [PATCH 05/10] feat: split input/output tokens and show savings vs Anthropic in statusline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous status line collapsed all tokens into one number and showed only the actual cost — burying the headline value of deepclaude (cost saved versus running through Anthropic directly). New format: [claude-opus-4-7 → deepseek-v4-pro on api.deepseek.com] · ↑5.2K ↓1.1K · \$0.04 (saved \$0.13) \`↑\` is input tokens, \`↓\` is output tokens. The savings tail only appears when savings would round to >= \$0.01 (no \"saved \$0.00\" noise on a fresh session or in pure-Anthropic mode). Implementation: - \`getCostSummary()\` now exposes \`total_input_tokens\` and \`total_output_tokens\` at the top level so the script doesn't have to fold across backend buckets with jq. \`savings\` was already top-level (anthropic_equivalent - total_cost), now surfaced. - bin/deepclaude-statusline reads the new fields, formats input/output separately, conditionally appends the savings tail. Co-Authored-By: Claude Opus 4.7 (1M context) --- bin/deepclaude-statusline | 34 +++++++++++++++++++--------------- proxy/model-proxy.js | 6 ++++++ 2 files changed, 25 insertions(+), 15 deletions(-) diff --git a/bin/deepclaude-statusline b/bin/deepclaude-statusline index 3e52436..2824966 100755 --- a/bin/deepclaude-statusline +++ b/bin/deepclaude-statusline @@ -3,13 +3,9 @@ # from stdin, polls the local proxy for routing + cumulative cost, and # emits a one-line summary like: # -# [claude-opus-4-7 → deepseek-v4-pro on api.deepseek.com] · 12.3K tokens · $0.04 -# -# Install via `deepclaude --install-statusline` or by adding to -# ~/.claude/settings.json: -# -# { "statusLine": { "type": "command", "command": "/abs/path/to/deepclaude-statusline" } } +# [claude-opus-4-7 → deepseek-v4-pro on api.deepseek.com] · ↑5.2K ↓1.1K · $0.04 (saved $0.13) # +# Auto-installed by deepclaude on launch via ensure_statusline_installed. # Requires jq. set -uo pipefail @@ -17,8 +13,6 @@ set -uo pipefail PROXY_PORT="${DEEPCLAUDE_PROXY_PORT:-3200}" TIMEOUT="${DEEPCLAUDE_STATUSLINE_TIMEOUT:-1}" -# Claude Code passes JSON on stdin. Best-effort read; falls through if -# stdin is empty or not JSON. input="" if [[ -t 0 ]]; then input="{}" @@ -46,10 +40,10 @@ last_client=$(echo "$status" | jq -r '.last_request.client_model // empty') last_wire=$(echo "$status" | jq -r '.last_request.wire_model // empty') last_dest=$(echo "$status" | jq -r '.last_request.destination // empty') -# Sum tokens across all backend buckets; fall back to 0 if cost endpoint -# returned nothing. -total_tokens=$(echo "$cost" | jq '[.backends // {} | to_entries[] | (.value.input_tokens + .value.output_tokens)] | add // 0' 2>/dev/null) +input_tokens=$(echo "$cost" | jq -r '.total_input_tokens // 0' 2>/dev/null) +output_tokens=$(echo "$cost" | jq -r '.total_output_tokens // 0' 2>/dev/null) total_cost=$(echo "$cost" | jq -r '.total_cost // 0' 2>/dev/null) +savings=$(echo "$cost" | jq -r '.savings // 0' 2>/dev/null) # 12345 → 12.3K, 1234567 → 1.2M fmt_tokens() { @@ -59,9 +53,17 @@ fmt_tokens() { else printf "%d", t; }' } -tokens_fmt=$(fmt_tokens "$total_tokens") +input_fmt=$(fmt_tokens "$input_tokens") +output_fmt=$(fmt_tokens "$output_tokens") cost_fmt=$(awk -v c="$total_cost" 'BEGIN { printf "$%.2f", c }') +# Show savings tail only when we'd round to >= $0.01. +savings_part="" +if awk -v s="$savings" 'BEGIN { exit !(s >= 0.005) }'; then + savings_fmt=$(awk -v s="$savings" 'BEGIN { printf "$%.2f", s }') + savings_part=" (saved ${savings_fmt})" +fi + # Compose the model display. Prefer last_request data (most accurate # client → wire). Fall back to claude_model from stdin if proxy hasn't # seen a request yet. @@ -70,9 +72,11 @@ display_wire="${last_wire:-$display_model}" display_dest="${last_dest:-$backend_host}" if [[ -n "$display_model" && "$display_model" != "$display_wire" ]]; then - echo "[$display_model → $display_wire on $display_dest] · $tokens_fmt tokens · $cost_fmt" + model_part="[$display_model → $display_wire on $display_dest]" elif [[ -n "$display_model" ]]; then - echo "[$display_model on $display_dest] · $tokens_fmt tokens · $cost_fmt" + model_part="[$display_model on $display_dest]" else - echo "[deepclaude on $display_dest] · $tokens_fmt tokens · $cost_fmt" + model_part="[deepclaude on $display_dest]" fi + +echo "${model_part} · ↑${input_fmt} ↓${output_fmt} · ${cost_fmt}${savings_part}" diff --git a/proxy/model-proxy.js b/proxy/model-proxy.js index 9049922..db1b275 100644 --- a/proxy/model-proxy.js +++ b/proxy/model-proxy.js @@ -166,6 +166,8 @@ export function startModelProxy({ targetUrl, apiKey, startPort = 3200, backends, const summary = {}; let totalActual = 0; let totalAnthropic = 0; + let totalInput = 0; + let totalOutput = 0; for (const [backend, tokens] of Object.entries(costs)) { const p = PRICING_PER_M[backend] || PRICING_PER_M._single; const ap = PRICING_PER_M.anthropic; @@ -173,6 +175,8 @@ export function startModelProxy({ targetUrl, apiKey, startPort = 3200, backends, const anthropicEq = (tokens.input * ap.input + tokens.output * ap.output) / 1_000_000; totalActual += actual; totalAnthropic += anthropicEq; + totalInput += tokens.input; + totalOutput += tokens.output; summary[backend] = { input_tokens: tokens.input, output_tokens: tokens.output, @@ -183,6 +187,8 @@ export function startModelProxy({ targetUrl, apiKey, startPort = 3200, backends, } return { backends: summary, + total_input_tokens: totalInput, + total_output_tokens: totalOutput, total_cost: +totalActual.toFixed(4), anthropic_equivalent: +totalAnthropic.toFixed(4), savings: +((totalAnthropic - totalActual).toFixed(4)), From 8f576a25e96eeec79e9703ba8b113addb65ea2fa Mon Sep 17 00:00:00 2001 From: Ben Date: Thu, 7 May 2026 14:50:33 +0100 Subject: [PATCH 06/10] feat: add percent-saved to statusline savings tail MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit · \$0.04 (saved \$0.13, 76%) Computed in the script as \`savings / anthropic_equivalent * 100\` (both already top-level fields on /_proxy/cost). Skips the percent if anthropic_equivalent is 0 (which would mean no requests yet, in which case the savings tail itself is also suppressed). Co-Authored-By: Claude Opus 4.7 (1M context) --- bin/deepclaude-statusline | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/bin/deepclaude-statusline b/bin/deepclaude-statusline index 2824966..34d9534 100755 --- a/bin/deepclaude-statusline +++ b/bin/deepclaude-statusline @@ -44,6 +44,7 @@ input_tokens=$(echo "$cost" | jq -r '.total_input_tokens // 0' 2>/dev/null) output_tokens=$(echo "$cost" | jq -r '.total_output_tokens // 0' 2>/dev/null) total_cost=$(echo "$cost" | jq -r '.total_cost // 0' 2>/dev/null) savings=$(echo "$cost" | jq -r '.savings // 0' 2>/dev/null) +anthropic_eq=$(echo "$cost" | jq -r '.anthropic_equivalent // 0' 2>/dev/null) # 12345 → 12.3K, 1234567 → 1.2M fmt_tokens() { @@ -57,11 +58,14 @@ input_fmt=$(fmt_tokens "$input_tokens") output_fmt=$(fmt_tokens "$output_tokens") cost_fmt=$(awk -v c="$total_cost" 'BEGIN { printf "$%.2f", c }') -# Show savings tail only when we'd round to >= $0.01. +# Show savings tail only when we'd round to >= $0.01. Percent is savings +# as a fraction of what Anthropic would have charged. savings_part="" if awk -v s="$savings" 'BEGIN { exit !(s >= 0.005) }'; then - savings_fmt=$(awk -v s="$savings" 'BEGIN { printf "$%.2f", s }') - savings_part=" (saved ${savings_fmt})" + savings_part=$(awk -v s="$savings" -v a="$anthropic_eq" 'BEGIN { + pct = (a > 0) ? (s / a) * 100 : 0; + printf " (saved $%.2f, %.0f%%)", s, pct; + }') fi # Compose the model display. Prefer last_request data (most accurate From 66308d1aafd56db3d5780182dad2972666393577 Mon Sep 17 00:00:00 2001 From: Ben Date: Thu, 7 May 2026 14:52:40 +0100 Subject: [PATCH 07/10] feat: trailing blank line in statusline (visual breathing room) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A second \\n after the content gives a one-row gap below the status line in Claude Code's bottom bar. Closest a shell statusLine command can get to CSS-style bottom padding — terminals can't render sub-row vertical space. Co-Authored-By: Claude Opus 4.7 (1M context) --- bin/deepclaude-statusline | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bin/deepclaude-statusline b/bin/deepclaude-statusline index 34d9534..f390ab2 100755 --- a/bin/deepclaude-statusline +++ b/bin/deepclaude-statusline @@ -83,4 +83,6 @@ else model_part="[deepclaude on $display_dest]" fi -echo "${model_part} · ↑${input_fmt} ↓${output_fmt} · ${cost_fmt}${savings_part}" +# Trailing blank line gives a one-row gap below the status line. Closest a +# shell statusLine command can get to bottom padding. +printf '%s\n\n' "${model_part} · ↑${input_fmt} ↓${output_fmt} · ${cost_fmt}${savings_part}" From 9a971224a2c0f53ce51e25536c431cdb0d826055 Mon Sep 17 00:00:00 2001 From: Ben Date: Thu, 7 May 2026 14:55:16 +0100 Subject: [PATCH 08/10] fix: statusline shows main model, not last subagent call MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Claude Code makes background subagent calls (haiku → deepseek-v4-flash) for things like topic detection during startup. Tracking those as 'last_request' caused the status line to flicker to flash even when the user is actively conversing with the main opus/pro model. Two changes: - /_proxy/status now also exposes \`model_remap\` (the MODEL_REMAP table for the current state.mode). Lets the shell look up wire-side mapping for any client model without duplicating the table. - bin/deepclaude-statusline reads Claude Code's \`model.id\` from stdin (which is the *main* conversation model, stable across subagent activity) and looks the wire side up in model_remap. Falls back to last_request.client_model only if stdin doesn't carry a model field. Result: status line displays the model the user is actually talking to, not whatever transient call most recently went through the proxy. Co-Authored-By: Claude Opus 4.7 (1M context) --- bin/deepclaude-statusline | 32 ++++++++++++++++++++++---------- proxy/model-proxy.js | 4 ++++ 2 files changed, 26 insertions(+), 10 deletions(-) diff --git a/bin/deepclaude-statusline b/bin/deepclaude-statusline index f390ab2..a6adbf2 100755 --- a/bin/deepclaude-statusline +++ b/bin/deepclaude-statusline @@ -36,9 +36,28 @@ if [[ "$status" == "{}" ]]; then fi backend_host=$(echo "$status" | jq -r '.backend_host // "?"') -last_client=$(echo "$status" | jq -r '.last_request.client_model // empty') -last_wire=$(echo "$status" | jq -r '.last_request.wire_model // empty') -last_dest=$(echo "$status" | jq -r '.last_request.destination // empty') + +# Prefer the model Claude Code passes on stdin — that's the *main* +# conversation model, stable across the haiku/flash subagent calls +# Claude Code makes for things like topic detection. Falling back to +# last_request would make the statusline flicker to the most recent +# subagent call. +display_model="${claude_model}" +if [[ -z "$display_model" ]]; then + display_model=$(echo "$status" | jq -r '.last_request.client_model // empty') +fi + +# Wire-side mapping for whatever the display model is, looked up from +# the proxy's MODEL_REMAP[state.mode]. Empty if no mapping (default +# mode where env=wire, so display_model is already the backend name). +display_wire="" +if [[ -n "$display_model" ]]; then + display_wire=$(echo "$status" | jq -r --arg m "$display_model" '.model_remap[$m] // empty') +fi +[[ -z "$display_wire" ]] && display_wire="$display_model" + +display_dest=$(echo "$status" | jq -r '.last_request.destination // empty') +[[ -z "$display_dest" ]] && display_dest="$backend_host" input_tokens=$(echo "$cost" | jq -r '.total_input_tokens // 0' 2>/dev/null) output_tokens=$(echo "$cost" | jq -r '.total_output_tokens // 0' 2>/dev/null) @@ -68,13 +87,6 @@ if awk -v s="$savings" 'BEGIN { exit !(s >= 0.005) }'; then }') fi -# Compose the model display. Prefer last_request data (most accurate -# client → wire). Fall back to claude_model from stdin if proxy hasn't -# seen a request yet. -display_model="${last_client:-$claude_model}" -display_wire="${last_wire:-$display_model}" -display_dest="${last_dest:-$backend_host}" - if [[ -n "$display_model" && "$display_model" != "$display_wire" ]]; then model_part="[$display_model → $display_wire on $display_dest]" elif [[ -n "$display_model" ]]; then diff --git a/proxy/model-proxy.js b/proxy/model-proxy.js index db1b275..0217bd4 100644 --- a/proxy/model-proxy.js +++ b/proxy/model-proxy.js @@ -229,6 +229,10 @@ export function startModelProxy({ targetUrl, apiKey, startPort = 3200, backends, uptime: Math.round((Date.now() - t0Global) / 1000), requests: reqCount, last_request: state.lastRequest, + // Statusline looks up the wire-side mapping for whatever + // model Claude Code says it's using (via stdin), without + // having to duplicate the table in shell. + model_remap: MODEL_REMAP[state.mode] || {}, })); return; } From 3d8fe79aae82a1a6cf25a587b260d924a3e03dfa Mon Sep 17 00:00:00 2001 From: Ben Date: Thu, 7 May 2026 14:59:16 +0100 Subject: [PATCH 09/10] fix: drop top-level thinking/context_management on non-Anthropic routes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DeepSeek's anthropic-compat endpoint 400s with: The \`content[].thinking\` in the thinking mode must be passed back to the API. …when the request body has \`thinking: { type: \"enabled\", ... }\` at the top level but the messages don't carry thinking content blocks. Background: foreign-backend thinking blocks are invalid against Anthropic's signing key, so the proxy strips them from messages on isModelCall. But it left the top-level \`thinking\` config in place, creating the contradictory state DeepSeek rejects. Fix: drop both \`thinking\` and \`context_management\` for isModelCall routes (mirrors what the image-fallback path on PR #21 already does on forceAnthropicForImage). Backends like DeepSeek don't honor Anthropic's extended-thinking config anyway, so dropping it costs nothing and fixes the 400. Co-Authored-By: Claude Opus 4.7 (1M context) --- proxy/model-proxy.js | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/proxy/model-proxy.js b/proxy/model-proxy.js index 0217bd4..2c7398a 100644 --- a/proxy/model-proxy.js +++ b/proxy/model-proxy.js @@ -380,6 +380,13 @@ export function startModelProxy({ targetUrl, apiKey, startPort = 3200, backends, try { const parsed = JSON.parse(body); stripAllThinkingBlocks(parsed); + // Top-level thinking/context_management have to go too. + // DeepSeek 400s with "content[].thinking in the thinking + // mode must be passed back" when the body advertises + // thinking but messages don't carry the blocks (which + // we just stripped, since foreign blocks are invalid). + delete parsed.thinking; + delete parsed.context_management; body = Buffer.from(JSON.stringify(parsed)); } catch { /* pass through */ } } From 9ee1fa8b9736d4d65c53b9e2ff8089218e8c619a Mon Sep 17 00:00:00 2001 From: Ben Date: Thu, 7 May 2026 15:02:16 +0100 Subject: [PATCH 10/10] =?UTF-8?q?fix:=20don't=20strip=20thinking=20blocks?= =?UTF-8?q?=20on=20isModelCall=20=E2=80=94=20DeepSeek=20needs=20continuity?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previous attempt dropped only the top-level \`thinking\` config; the 400 still fires because DeepSeek's check is on \`content[].thinking\` inside messages — it expects its own prior thinking blocks to be passed back verbatim for conversation continuity. The original strip was added to clean up foreign-backend blocks on backend switches (commit 70518b6), but it also removes DeepSeek's own blocks in pure-DeepSeek sessions, breaking continuity. For now: leave thinking blocks in place on isModelCall so DeepSeek can see its own history. We continue to drop the top-level thinking config since non-Anthropic backends don't honor Anthropic's extended-thinking spec consistently. Backend-switch case (DeepSeek session → Anthropic) is still handled by the Anthropic-side strip (\`hadNonAnthropicSession ? stripAllThinkingBlocks : stripUnsignedThinkingBlocks\`), which shouldn't regress. If a future user reports a foreign-block 400 going INTO DeepSeek (e.g. switching mid-session from openrouter to deepseek), we'll need a finer-grained strip that distinguishes block origin. Co-Authored-By: Claude Opus 4.7 (1M context) --- proxy/model-proxy.js | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/proxy/model-proxy.js b/proxy/model-proxy.js index 2c7398a..fd2a4e6 100644 --- a/proxy/model-proxy.js +++ b/proxy/model-proxy.js @@ -379,12 +379,14 @@ export function startModelProxy({ targetUrl, apiKey, startPort = 3200, backends, if (isModelCall) { try { const parsed = JSON.parse(body); - stripAllThinkingBlocks(parsed); - // Top-level thinking/context_management have to go too. - // DeepSeek 400s with "content[].thinking in the thinking - // mode must be passed back" when the body advertises - // thinking but messages don't carry the blocks (which - // we just stripped, since foreign blocks are invalid). + // DeepSeek's anthropic-compat endpoint expects its own + // thinking blocks passed back verbatim for continuity + // ("content[].thinking ... must be passed back"), so + // we don't strip thinking blocks here. Top-level + // thinking/context_management still go — non-Anthropic + // backends don't honor Anthropic's extended-thinking + // spec consistently, and a stale config field is a + // noisier error than no config at all. delete parsed.thinking; delete parsed.context_management; body = Buffer.from(JSON.stringify(parsed));