diff --git a/README.md b/README.md index a90821d..6019eea 100644 --- a/README.md +++ b/README.md @@ -89,6 +89,7 @@ Claude Code reads these environment variables to determine where to send API cal | **DeepSeek** (default) | `--backend ds` | $0.44 | $0.87 | China | Auto context caching (120x cheaper on repeat turns) | | **OpenRouter** | `--backend or` | $0.44 | $0.87 | US | Cheapest, lowest latency from US/EU | | **Fireworks AI** | `--backend fw` | $1.74 | $3.48 | US | Fastest inference | +| **sference** | `--backend sf -w 1h` | (varies) | (varies) | EU | Async background inference, up to 75% off | | **Anthropic** | `--backend anthropic` | $3.00 | $15.00 | US | Original Claude Opus (for hard problems) | ### Setup per backend @@ -111,7 +112,49 @@ setx FIREWORKS_API_KEY "fw_..." # Windows export FIREWORKS_API_KEY="fw_..." # macOS/Linux ``` -## Cost comparison +**sference** (async background — lower cost, higher latency per turn): +```bash +export SFERENCE_API_KEY="sk_..." # macOS/Linux +export SFERENCE_MODEL="moonshotai/Kimi-K2.6" # optional, this is the default +export SFERENCE_COMPLETION_WINDOW="1h" # 1h | 24h (sference); others passed to custom providers +deepclaude -b sf -w 1h +``` + +**Custom background provider** (any Responses API with `background: true`): +```bash +export BG_PROVIDER_URL="https://api.example.com" +export BG_PROVIDER_API_KEY="sk_..." +export BG_PROVIDER_MODEL="your-model" +export BG_PROVIDER_WINDOW="1h" +deepclaude -b bg +``` + +## Background providers (`responses-bg`) + +Some inference platforms expose an async **Responses API**: submit with `background: true`, poll until complete. Claude Code only speaks Anthropic `/v1/messages`, so deepclaude's proxy translates each turn: + +``` +Claude Code → POST /v1/messages (Anthropic format, streaming) +Proxy → POST /v1/responses (background: true, completion_window) +Proxy → poll GET /v1/responses/{id} +Proxy → synthesize Anthropic SSE back to Claude Code +``` + +**Trade-off:** each agent turn waits in the provider's queue (minutes to hours depending on window). Best for overnight headless runs (`claude -p`) or when cost matters more than latency. Sync backends (DeepSeek, OpenRouter) remain the default for interactive coding. + +Control endpoints while the proxy runs: +```bash +curl -s http://127.0.0.1:3200/_proxy/status # mode, window, tool_mode +curl -sX POST http://127.0.0.1:3200/_proxy/window -d "window=24h" +curl -s http://127.0.0.1:3200/_proxy/cost +``` + +Add a slash command in `~/.claude/commands/sference.md`: +``` +Switch to sference background mode: +curl -sX POST http://127.0.0.1:3200/_proxy/mode -d "backend=sference" +If successful, say: "Switched to sference (background)." +``` | Usage level | Anthropic Max | deepclaude (DeepSeek) | Savings | |---|---|---|---| diff --git a/deepclaude.ps1 b/deepclaude.ps1 index 26c35a6..13db93c 100644 --- a/deepclaude.ps1 +++ b/deepclaude.ps1 @@ -6,9 +6,9 @@ deepclaude # DeepSeek V4 Pro (default) deepclaude --backend or # OpenRouter (cheapest) deepclaude --backend fw # Fireworks AI (fastest) + deepclaude --backend sf -w 1h # sference background (Kimi-K2.6) deepclaude --backend anthropic # Normal Claude Code deepclaude --remote # Remote control + DeepSeek (browser URL) - deepclaude --remote -b or # Remote control + OpenRouter deepclaude --status # Show keys and backends deepclaude --cost # Pricing comparison deepclaude --benchmark # Latency test @@ -17,8 +17,12 @@ param( [Alias("b")] [string]$Backend, + [Alias("w")] + [string]$Window, [Alias("r")] [switch]$Remote, + [Alias("s")] + [string]$Switch, [switch]$Status, [switch]$Cost, [switch]$Benchmark, @@ -27,10 +31,18 @@ param( $ErrorActionPreference = "Stop" -if (-not $Backend -and -not $Status -and -not $Cost -and -not $Benchmark -and -not $Help) { +if (-not $Backend -and -not $Status -and -not $Cost -and -not $Benchmark -and -not $Help -and -not $Switch) { $Backend = if ($env:CHEAPCLAUDE_DEFAULT_BACKEND) { $env:CHEAPCLAUDE_DEFAULT_BACKEND } else { "ds" } } +if (-not $Window) { + $Window = if ($env:SFERENCE_COMPLETION_WINDOW) { $env:SFERENCE_COMPLETION_WINDOW } else { "1h" } +} + +$ScriptDir = Split-Path -Parent $MyInvocation.MyCommand.Path +$SferenceModel = if ($env:SFERENCE_MODEL) { $env:SFERENCE_MODEL } else { "moonshotai/Kimi-K2.6" } +$SferenceUrl = if ($env:SFERENCE_BASE_URL) { $env:SFERENCE_BASE_URL } else { "https://api.sference.com" } + # --- Config --- $DeepSeekKey = if ($env:DEEPSEEK_API_KEY) { $env:DEEPSEEK_API_KEY } else { [Environment]::GetEnvironmentVariable("DEEPSEEK_API_KEY", "User") @@ -41,12 +53,20 @@ $OpenRouterKey = if ($env:OPENROUTER_API_KEY) { $env:OPENROUTER_API_KEY } else { $FireworksKey = if ($env:FIREWORKS_API_KEY) { $env:FIREWORKS_API_KEY } else { [Environment]::GetEnvironmentVariable("FIREWORKS_API_KEY", "User") } +$SferenceKey = if ($env:SFERENCE_API_KEY) { $env:SFERENCE_API_KEY } else { + [Environment]::GetEnvironmentVariable("SFERENCE_API_KEY", "User") +} +$BgProviderKey = if ($env:BG_PROVIDER_API_KEY) { $env:BG_PROVIDER_API_KEY } else { + [Environment]::GetEnvironmentVariable("BG_PROVIDER_API_KEY", "User") +} $Providers = @{ ds = @{ name = "DeepSeek (direct)" url = "https://api.deepseek.com/anthropic" key = $DeepSeekKey; keyName = "DEEPSEEK_API_KEY" + backendName = "deepseek" + type = "anthropic-compatible" opus = "deepseek-v4-pro"; sonnet = "deepseek-v4-pro" haiku = "deepseek-v4-flash"; subagent = "deepseek-v4-flash" } @@ -54,6 +74,8 @@ $Providers = @{ name = "OpenRouter" url = "https://openrouter.ai/api" key = $OpenRouterKey; keyName = "OPENROUTER_API_KEY" + backendName = "openrouter" + type = "anthropic-compatible" opus = "deepseek/deepseek-v4-pro"; sonnet = "deepseek/deepseek-v4-pro" haiku = "deepseek/deepseek-v4-pro"; subagent = "deepseek/deepseek-v4-pro" } @@ -61,11 +83,35 @@ $Providers = @{ name = "Fireworks AI" url = "https://api.fireworks.ai/inference" key = $FireworksKey; keyName = "FIREWORKS_API_KEY" + backendName = "fireworks" + type = "anthropic-compatible" opus = "accounts/fireworks/models/deepseek-v4-pro" sonnet = "accounts/fireworks/models/deepseek-v4-pro" haiku = "accounts/fireworks/models/deepseek-v4-pro" subagent = "accounts/fireworks/models/deepseek-v4-pro" } + sf = @{ + name = "sference (background)" + url = $SferenceUrl + key = $SferenceKey; keyName = "SFERENCE_API_KEY" + backendName = "sference" + type = "responses-bg" + opus = $SferenceModel; sonnet = $SferenceModel + haiku = $SferenceModel; subagent = $SferenceModel + } +} +$Providers["sference"] = $Providers["sf"] + +if ($BgProviderKey -and $env:BG_PROVIDER_URL -and $env:BG_PROVIDER_MODEL) { + $Providers["bg"] = @{ + name = "Custom background provider" + url = $env:BG_PROVIDER_URL + key = $BgProviderKey; keyName = "BG_PROVIDER_API_KEY" + backendName = "bg" + type = "responses-bg" + opus = $env:BG_PROVIDER_MODEL; sonnet = $env:BG_PROVIDER_MODEL + haiku = $env:BG_PROVIDER_MODEL; subagent = $env:BG_PROVIDER_MODEL + } } function Get-KeyDisplay($k) { @@ -73,6 +119,63 @@ function Get-KeyDisplay($k) { return "set (****" + $k.Substring($k.Length - [Math]::Min(4, $k.Length)) + ")" } +function Start-DeepClaudeProxy($Provider) { + Write-Host "`n Starting model proxy for $($Provider.name)..." -ForegroundColor Cyan + + $proxyScript = Join-Path $ScriptDir "proxy\start-proxy.js" + $portFile = Join-Path $env:TEMP "deepclaude-proxy-port.txt" + if (Test-Path $portFile) { Remove-Item $portFile -Force } + + $env:SFERENCE_COMPLETION_WINDOW = $Window + $env:CHEAPCLAUDE_DEFAULT_MODE = $Provider.backendName + + $proxyProc = Start-Process -FilePath "node" ` + -ArgumentList @($proxyScript, $Provider.url, $Provider.key, $Provider.backendName, "--window", $Window) ` + -PassThru -WindowStyle Hidden -RedirectStandardOutput $portFile + + $tries = 0 + while ($tries -lt 30) { + Start-Sleep -Milliseconds 200 + $tries++ + if (Test-Path $portFile) { + $content = Get-Content $portFile -ErrorAction SilentlyContinue + if ($content) { break } + } + } + + $proxyPort = (Get-Content $portFile -ErrorAction SilentlyContinue | Select-Object -First 1) + Remove-Item $portFile -ErrorAction SilentlyContinue + + if (-not $proxyPort) { + Write-Host "ERROR: Proxy failed to start" -ForegroundColor Red + if ($proxyProc -and -not $proxyProc.HasExited) { Stop-Process -Id $proxyProc.Id -Force } + exit 1 + } + + Write-Host " Proxy on :$proxyPort -> $($Provider.url)" -ForegroundColor DarkGray + if ($Provider.type -eq "responses-bg") { + Write-Host " Background mode: window=$Window model=$($Provider.opus)" -ForegroundColor DarkGray + } + + return @{ Port = $proxyPort; Process = $proxyProc } +} + +function Set-ModelEnv($Provider) { + $env:ANTHROPIC_DEFAULT_OPUS_MODEL = $Provider.opus + $env:ANTHROPIC_DEFAULT_SONNET_MODEL = $Provider.sonnet + $env:ANTHROPIC_DEFAULT_HAIKU_MODEL = $Provider.haiku + $env:CLAUDE_CODE_SUBAGENT_MODEL = $Provider.subagent + $env:CLAUDE_CODE_EFFORT_LEVEL = "max" +} + +function Clear-AnthropicEnv { + foreach ($v in @("ANTHROPIC_BASE_URL","ANTHROPIC_AUTH_TOKEN","ANTHROPIC_DEFAULT_OPUS_MODEL", + "ANTHROPIC_DEFAULT_SONNET_MODEL","ANTHROPIC_DEFAULT_HAIKU_MODEL", + "CLAUDE_CODE_SUBAGENT_MODEL","CLAUDE_CODE_EFFORT_LEVEL","ANTHROPIC_API_KEY")) { + Remove-Item "Env:$v" -ErrorAction SilentlyContinue + } +} + # --- Status --- if ($Status) { Write-Host "`n deepclaude - Backend Status" -ForegroundColor Cyan @@ -81,10 +184,13 @@ if ($Status) { Write-Host " DEEPSEEK_API_KEY: $(Get-KeyDisplay $DeepSeekKey)" Write-Host " OPENROUTER_API_KEY: $(Get-KeyDisplay $OpenRouterKey)" Write-Host " FIREWORKS_API_KEY: $(Get-KeyDisplay $FireworksKey)" + Write-Host " SFERENCE_API_KEY: $(Get-KeyDisplay $SferenceKey)" + Write-Host " BG_PROVIDER_API_KEY: $(Get-KeyDisplay $BgProviderKey)" Write-Host "`n Backends:" -ForegroundColor Yellow Write-Host " deepclaude # DeepSeek V4 Pro (default)" - Write-Host " deepclaude -b or # OpenRouter (cheapest)" - Write-Host " deepclaude -b fw # Fireworks AI (fastest)" + Write-Host " deepclaude -b or # OpenRouter" + Write-Host " deepclaude -b fw # Fireworks AI" + Write-Host " deepclaude -b sf -w 1h # sference background" Write-Host " deepclaude -b anthropic # Normal Claude Code" Write-Host "" exit 0 @@ -92,17 +198,16 @@ if ($Status) { # --- Cost --- if ($Cost) { - Write-Host "`n DeepSeek V4 Pro Pricing" -ForegroundColor Cyan - Write-Host " =======================" -ForegroundColor DarkGray + Write-Host "`n Provider Pricing" -ForegroundColor Cyan + Write-Host " ================" -ForegroundColor DarkGray Write-Host "" - Write-Host " Provider Input/M Output/M Cache Hit/M" -ForegroundColor Yellow - Write-Host " ---------- -------- -------- -----------" - Write-Host " DeepSeek `$0.44 `$0.87 `$0.004" -ForegroundColor Green - Write-Host " OpenRouter `$0.44 `$0.87 (provider)" - Write-Host " Fireworks `$1.74 `$3.48 (provider)" - Write-Host " Anthropic `$3.00 `$15.00 `$0.30" - Write-Host "" - Write-Host " Monthly estimate (heavy use): `$30-80 vs `$200 Anthropic" -ForegroundColor Green + Write-Host " Provider Input/M Output/M Notes" -ForegroundColor Yellow + Write-Host " ---------- -------- -------- -----" + Write-Host " DeepSeek `$0.44 `$0.87 sync + cache" -ForegroundColor Green + Write-Host " OpenRouter `$0.44 `$0.87 sync" + Write-Host " Fireworks `$1.74 `$3.48 sync" + Write-Host " sference (varies) (varies) async bg" + Write-Host " Anthropic `$3.00 `$15.00 sync" Write-Host "" exit 0 } @@ -111,35 +216,69 @@ if ($Cost) { if ($Help) { Write-Host "deepclaude - Claude Code with cheap backends" Write-Host "" - Write-Host "Usage: deepclaude [-b backend] [--status] [--cost] [--benchmark]" + Write-Host "Usage: deepclaude [-b backend] [-w window] [--status] [--cost] [--benchmark]" Write-Host "" - Write-Host " -b, --backend ds (default), or, fw, anthropic" + Write-Host " -b, --backend ds (default), or, fw, sf, bg, anthropic" + Write-Host " -w, --window 15m|1h|6h|24h|48h (background providers)" Write-Host " --status Show keys and backends" Write-Host " --cost Pricing comparison" Write-Host " --benchmark Latency test" exit 0 } +# --- Switch --- +if ($Switch) { + $map = @{ ds="deepseek"; or="openrouter"; fw="fireworks"; sf="sference"; sference="sference"; bg="bg"; anthropic="anthropic" } + $backend = $map[$Switch] + if (-not $backend) { Write-Host "ERROR: Unknown backend '$Switch'" -ForegroundColor Red; exit 1 } + try { + $resp = Invoke-RestMethod -Uri "http://127.0.0.1:3200/_proxy/mode" -Method Post -Body "backend=$backend" + Write-Host " $($resp | ConvertTo-Json -Compress)" + if ($backend -in @("sference","bg")) { + $wresp = Invoke-RestMethod -Uri "http://127.0.0.1:3200/_proxy/window" -Method Post -Body "window=$Window" + Write-Host " $($wresp | ConvertTo-Json -Compress)" + } + } catch { + Write-Host " Proxy not running. Start with: deepclaude" -ForegroundColor Red + exit 1 + } + exit 0 +} + # --- Benchmark --- if ($Benchmark) { Write-Host "`n Latency Benchmark" -ForegroundColor Cyan - Write-Host " ==================" -ForegroundColor DarkGray - foreach ($id in @("ds","or","fw")) { + Write-Host " =================" -ForegroundColor DarkGray + foreach ($id in @("ds","or","fw","sf")) { $p = $Providers[$id] + if (-not $p) { continue } Write-Host " $($p.name)..." -NoNewline if (-not $p.key) { Write-Host " SKIP (no key)" -ForegroundColor DarkGray; continue } - $useBearer = $id -in @("or","fw") - $headers = if ($useBearer) { - @{ "Authorization" = "Bearer $($p.key)"; "content-type" = "application/json"; "anthropic-version" = "2023-06-01" } - } else { - @{ "x-api-key" = $p.key; "content-type" = "application/json"; "anthropic-version" = "2023-06-01" } - } - $body = @{ model = $p.opus; max_tokens = 32; messages = @(@{ role = "user"; content = "Reply: ok" }) } | ConvertTo-Json -Depth 5 $sw = [System.Diagnostics.Stopwatch]::StartNew() try { - $null = Invoke-RestMethod -Uri "$($p.url)/v1/messages" -Method POST -Headers $headers -Body $body -TimeoutSec 30 - $sw.Stop() - Write-Host " OK ($($sw.ElapsedMilliseconds)ms)" -ForegroundColor Green + if ($p.type -eq "responses-bg") { + $body = @{ + model = $p.opus + input = "Reply: ok" + background = $true + metadata = @{ completion_window = "1h" } + } | ConvertTo-Json -Depth 5 + $headers = @{ "X-API-Key" = $p.key; "content-type" = "application/json" } + $null = Invoke-RestMethod -Uri "$($p.url)/v1/responses" -Method POST -Headers $headers -Body $body -TimeoutSec 30 + $sw.Stop() + Write-Host " submit OK ($($sw.ElapsedMilliseconds)ms, async)" -ForegroundColor Green + } else { + $useBearer = $id -in @("or","fw") + $headers = if ($useBearer) { + @{ "Authorization" = "Bearer $($p.key)"; "content-type" = "application/json"; "anthropic-version" = "2023-06-01" } + } else { + @{ "x-api-key" = $p.key; "content-type" = "application/json"; "anthropic-version" = "2023-06-01" } + } + $body = @{ model = $p.opus; max_tokens = 32; messages = @(@{ role = "user"; content = "Reply: ok" }) } | ConvertTo-Json -Depth 5 + $null = Invoke-RestMethod -Uri "$($p.url)/v1/messages" -Method POST -Headers $headers -Body $body -TimeoutSec 30 + $sw.Stop() + Write-Host " OK ($($sw.ElapsedMilliseconds)ms)" -ForegroundColor Green + } } catch { $sw.Stop() $code = if ($_.Exception.Response) { $_.Exception.Response.StatusCode.value__ } else { "timeout" } @@ -152,15 +291,9 @@ if ($Benchmark) { # --- Remote --- if ($Remote) { - $ScriptDir = Split-Path -Parent $MyInvocation.MyCommand.Path if ($Backend -eq "anthropic") { Write-Host "`n Launching remote control (Anthropic)...`n" -ForegroundColor Cyan - foreach ($v in @("ANTHROPIC_BASE_URL","ANTHROPIC_AUTH_TOKEN","ANTHROPIC_DEFAULT_OPUS_MODEL", - "ANTHROPIC_DEFAULT_SONNET_MODEL","ANTHROPIC_DEFAULT_HAIKU_MODEL", - "CLAUDE_CODE_SUBAGENT_MODEL","CLAUDE_CODE_EFFORT_LEVEL")) { - Remove-Item "Env:$v" -ErrorAction SilentlyContinue - } - Remove-Item Env:ANTHROPIC_API_KEY -ErrorAction SilentlyContinue + Clear-AnthropicEnv & claude remote-control @Args exit 0 } @@ -169,47 +302,19 @@ if ($Remote) { if (-not $p) { Write-Host "ERROR: Unknown backend '$Backend'" -ForegroundColor Red; exit 1 } if (-not $p.key) { Write-Host "ERROR: $($p.keyName) not set" -ForegroundColor Red; exit 1 } - Write-Host "`n Starting model proxy for $($p.name)..." -ForegroundColor Cyan - - $proxyScript = Join-Path $ScriptDir "proxy\start-proxy.js" - $proxyProc = Start-Process -FilePath "node" -ArgumentList $proxyScript,$p.url,$p.key -PassThru -WindowStyle Hidden -RedirectStandardOutput "$env:TEMP\deepclaude-proxy-port.txt" - - $tries = 0 - while ($tries -lt 30) { - Start-Sleep -Milliseconds 200 - $tries++ - if (Test-Path "$env:TEMP\deepclaude-proxy-port.txt") { - $content = Get-Content "$env:TEMP\deepclaude-proxy-port.txt" -ErrorAction SilentlyContinue - if ($content) { break } - } - } - - $proxyPort = (Get-Content "$env:TEMP\deepclaude-proxy-port.txt" -ErrorAction SilentlyContinue | Select-Object -First 1) - Remove-Item "$env:TEMP\deepclaude-proxy-port.txt" -ErrorAction SilentlyContinue - - if (-not $proxyPort) { - Write-Host "ERROR: Proxy failed to start" -ForegroundColor Red - if ($proxyProc -and -not $proxyProc.HasExited) { Stop-Process -Id $proxyProc.Id -Force } - exit 1 - } - - Write-Host " Proxy on :$proxyPort -> $($p.url)" -ForegroundColor DarkGray + $proxy = Start-DeepClaudeProxy $p Write-Host " Launching remote control via $($p.name)...`n" -ForegroundColor Cyan - $env:ANTHROPIC_BASE_URL = "http://127.0.0.1:$proxyPort" - $env:ANTHROPIC_DEFAULT_OPUS_MODEL = $p.opus - $env:ANTHROPIC_DEFAULT_SONNET_MODEL = $p.sonnet - $env:ANTHROPIC_DEFAULT_HAIKU_MODEL = $p.haiku - $env:CLAUDE_CODE_SUBAGENT_MODEL = $p.subagent - $env:CLAUDE_CODE_EFFORT_LEVEL = "max" + $env:ANTHROPIC_BASE_URL = "http://127.0.0.1:$($proxy.Port)" + Set-ModelEnv $p Remove-Item Env:ANTHROPIC_API_KEY -ErrorAction SilentlyContinue Remove-Item Env:ANTHROPIC_AUTH_TOKEN -ErrorAction SilentlyContinue try { & claude remote-control @Args } finally { - if ($proxyProc -and -not $proxyProc.HasExited) { - Stop-Process -Id $proxyProc.Id -Force -ErrorAction SilentlyContinue + if ($proxy.Process -and -not $proxy.Process.HasExited) { + Stop-Process -Id $proxy.Process.Id -Force -ErrorAction SilentlyContinue Write-Host " Proxy stopped." -ForegroundColor DarkGray } } @@ -218,20 +323,34 @@ if ($Remote) { # --- Launch --- if ($Backend -eq "anthropic") { - foreach ($v in @("ANTHROPIC_BASE_URL","ANTHROPIC_AUTH_TOKEN","ANTHROPIC_DEFAULT_OPUS_MODEL", - "ANTHROPIC_DEFAULT_SONNET_MODEL","ANTHROPIC_DEFAULT_HAIKU_MODEL", - "CLAUDE_CODE_SUBAGENT_MODEL","CLAUDE_CODE_EFFORT_LEVEL")) { - Remove-Item "Env:$v" -ErrorAction SilentlyContinue - } + Clear-AnthropicEnv Write-Host "`n Launching Claude Code (normal Anthropic)...`n" -ForegroundColor Cyan & claude @Args exit 0 } $p = $Providers[$Backend] -if (-not $p) { Write-Host "ERROR: Unknown backend '$Backend'. Use: ds, or, fw, anthropic" -ForegroundColor Red; exit 1 } +if (-not $p) { Write-Host "ERROR: Unknown backend '$Backend'. Use: ds, or, fw, sf, bg, anthropic" -ForegroundColor Red; exit 1 } if (-not $p.key) { Write-Host "ERROR: $($p.keyName) not set" -ForegroundColor Red; exit 1 } +if ($p.type -eq "responses-bg") { + $proxy = Start-DeepClaudeProxy $p + Write-Host " Launching Claude Code via $($p.name)...`n" -ForegroundColor Cyan + $env:ANTHROPIC_BASE_URL = "http://127.0.0.1:$($proxy.Port)" + Set-ModelEnv $p + Remove-Item Env:ANTHROPIC_API_KEY -ErrorAction SilentlyContinue + Remove-Item Env:ANTHROPIC_AUTH_TOKEN -ErrorAction SilentlyContinue + try { + & claude @Args + } finally { + if ($proxy.Process -and -not $proxy.Process.HasExited) { + Stop-Process -Id $proxy.Process.Id -Force -ErrorAction SilentlyContinue + Write-Host " Proxy stopped." -ForegroundColor DarkGray + } + } + exit 0 +} + Write-Host "`n Launching Claude Code via $($p.name)..." -ForegroundColor Cyan Write-Host " Endpoint: $($p.url)" -ForegroundColor DarkGray Write-Host " Model: $($p.opus) (main) + $($p.haiku) (subagents)" -ForegroundColor DarkGray @@ -239,18 +358,9 @@ Write-Host "" $env:ANTHROPIC_BASE_URL = $p.url $env:ANTHROPIC_AUTH_TOKEN = $p.key -$env:ANTHROPIC_MODEL = $p.opus -$env:ANTHROPIC_DEFAULT_OPUS_MODEL = $p.opus -$env:ANTHROPIC_DEFAULT_SONNET_MODEL = $p.sonnet -$env:ANTHROPIC_DEFAULT_HAIKU_MODEL = $p.haiku -$env:CLAUDE_CODE_SUBAGENT_MODEL = $p.subagent -$env:CLAUDE_CODE_EFFORT_LEVEL = "max" +Set-ModelEnv $p Remove-Item Env:ANTHROPIC_API_KEY -ErrorAction SilentlyContinue & claude @Args -foreach ($v in @("ANTHROPIC_BASE_URL","ANTHROPIC_AUTH_TOKEN","ANTHROPIC_MODEL", - "ANTHROPIC_DEFAULT_OPUS_MODEL","ANTHROPIC_DEFAULT_SONNET_MODEL", - "ANTHROPIC_DEFAULT_HAIKU_MODEL","CLAUDE_CODE_SUBAGENT_MODEL","CLAUDE_CODE_EFFORT_LEVEL")) { - Remove-Item "Env:$v" -ErrorAction SilentlyContinue -} +Clear-AnthropicEnv diff --git a/deepclaude.sh b/deepclaude.sh index 5f59e3a..a3cd0d7 100644 --- a/deepclaude.sh +++ b/deepclaude.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash # deepclaude — Use Claude Code with DeepSeek V4 Pro or other cheap backends -# Usage: deepclaude [--backend ds|or|fw|anthropic] [--remote] [--status] [--cost] [--benchmark] +# Usage: deepclaude [--backend ds|or|fw|sf|anthropic] [--window 1h] [--remote] [--status] [--cost] [--benchmark] set -euo pipefail @@ -10,16 +10,22 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" DEEPSEEK_URL="https://api.deepseek.com/anthropic" OPENROUTER_URL="https://openrouter.ai/api" FIREWORKS_URL="https://api.fireworks.ai/inference" +SFERENCE_URL="${SFERENCE_BASE_URL:-https://api.sference.com}" +SFERENCE_MODEL="${SFERENCE_MODEL:-moonshotai/Kimi-K2.6}" BACKEND="${CHEAPCLAUDE_DEFAULT_BACKEND:-ds}" ACTION="launch" SWITCH_BACKEND="" +COMPLETION_WINDOW="${SFERENCE_COMPLETION_WINDOW:-1h}" PROXY_PID="" +RESOLVED_TYPE="anthropic-compatible" +RESOLVED_BACKEND_NAME="" # --- Parse args --- while [[ $# -gt 0 ]]; do case "$1" in --backend|-b) BACKEND="$2"; shift 2 ;; + --window|-w) COMPLETION_WINDOW="$2"; shift 2 ;; --switch|-s) ACTION="switch"; SWITCH_BACKEND="$2"; shift 2 ;; --remote|-r) ACTION="remote"; shift ;; --status) ACTION="status"; shift ;; @@ -44,37 +50,54 @@ mask_key() { } resolve_backend() { - local url="" key="" opus="" sonnet="" haiku="" subagent="" + local url="" key="" opus="" sonnet="" haiku="" subagent="" btype="anthropic-compatible" bname="" case "$BACKEND" in ds|deepseek) key="${DEEPSEEK_API_KEY:-}" [[ -z "$key" ]] && { echo "ERROR: DEEPSEEK_API_KEY not set" >&2; exit 1; } - url="$DEEPSEEK_URL" + url="$DEEPSEEK_URL"; bname="deepseek" opus="deepseek-v4-pro"; sonnet="deepseek-v4-pro" haiku="deepseek-v4-flash"; subagent="deepseek-v4-flash" ;; or|openrouter) key="${OPENROUTER_API_KEY:-}" [[ -z "$key" ]] && { echo "ERROR: OPENROUTER_API_KEY not set" >&2; exit 1; } - url="$OPENROUTER_URL" + url="$OPENROUTER_URL"; bname="openrouter" opus="deepseek/deepseek-v4-pro"; sonnet="deepseek/deepseek-v4-pro" haiku="deepseek/deepseek-v4-pro"; subagent="deepseek/deepseek-v4-pro" ;; fw|fireworks) key="${FIREWORKS_API_KEY:-}" [[ -z "$key" ]] && { echo "ERROR: FIREWORKS_API_KEY not set" >&2; exit 1; } - url="$FIREWORKS_URL" + url="$FIREWORKS_URL"; bname="fireworks" opus="accounts/fireworks/models/deepseek-v4-pro" sonnet="accounts/fireworks/models/deepseek-v4-pro" haiku="accounts/fireworks/models/deepseek-v4-pro" subagent="accounts/fireworks/models/deepseek-v4-pro" ;; + sf|sference) + key="${SFERENCE_API_KEY:-}" + [[ -z "$key" ]] && { echo "ERROR: SFERENCE_API_KEY not set" >&2; exit 1; } + url="$SFERENCE_URL"; bname="sference"; btype="responses-bg" + opus="$SFERENCE_MODEL"; sonnet="$SFERENCE_MODEL" + haiku="$SFERENCE_MODEL"; subagent="$SFERENCE_MODEL" + ;; + bg) + key="${BG_PROVIDER_API_KEY:-}" + [[ -z "$key" ]] && { echo "ERROR: BG_PROVIDER_API_KEY not set" >&2; exit 1; } + url="${BG_PROVIDER_URL:-}"; bname="bg"; btype="responses-bg" + [[ -z "$url" ]] && { echo "ERROR: BG_PROVIDER_URL not set" >&2; exit 1; } + opus="${BG_PROVIDER_MODEL:-}"; sonnet="$opus"; haiku="$opus"; subagent="$opus" + [[ -z "$opus" ]] && { echo "ERROR: BG_PROVIDER_MODEL not set" >&2; exit 1; } + COMPLETION_WINDOW="${BG_PROVIDER_WINDOW:-$COMPLETION_WINDOW}" + ;; anthropic) ;; - *) echo "ERROR: Unknown backend '$BACKEND'. Use: ds, or, fw, anthropic" >&2; exit 1 ;; + *) echo "ERROR: Unknown backend '$BACKEND'. Use: ds, or, fw, sf, bg, anthropic" >&2; exit 1 ;; esac RESOLVED_URL="$url"; RESOLVED_KEY="$key" RESOLVED_OPUS="$opus"; RESOLVED_SONNET="$sonnet" RESOLVED_HAIKU="$haiku"; RESOLVED_SUBAGENT="$subagent" + RESOLVED_TYPE="$btype"; RESOLVED_BACKEND_NAME="$bname" } set_model_env() { @@ -85,6 +108,37 @@ set_model_env() { export CLAUDE_CODE_EFFORT_LEVEL="max" } +start_proxy() { + echo " Starting model proxy for $BACKEND..." + + local port_file + port_file=$(mktemp) + SFERENCE_COMPLETION_WINDOW="$COMPLETION_WINDOW" \ + CHEAPCLAUDE_DEFAULT_MODE="$RESOLVED_BACKEND_NAME" \ + node "$SCRIPT_DIR/proxy/start-proxy.js" "$RESOLVED_URL" "$RESOLVED_KEY" "$RESOLVED_BACKEND_NAME" --window "$COMPLETION_WINDOW" > "$port_file" & + PROXY_PID=$! + + local tries=0 + while [[ ! -s "$port_file" ]] && [[ $tries -lt 30 ]]; do + sleep 0.2 + tries=$((tries + 1)) + done + + if [[ ! -s "$port_file" ]]; then + echo "ERROR: Proxy failed to start" >&2 + rm -f "$port_file" + exit 1 + fi + + PROXY_PORT=$(head -1 "$port_file") + rm -f "$port_file" + + echo " Proxy on :$PROXY_PORT -> $RESOLVED_URL" + if [[ "$RESOLVED_TYPE" == "responses-bg" ]]; then + echo " Background mode: window=$COMPLETION_WINDOW model=$RESOLVED_OPUS" + fi +} + show_status() { echo "" echo " deepclaude — Backend Status" @@ -94,14 +148,17 @@ show_status() { echo " DEEPSEEK_API_KEY: $(mask_key "${DEEPSEEK_API_KEY:-}")" echo " OPENROUTER_API_KEY: $(mask_key "${OPENROUTER_API_KEY:-}")" echo " FIREWORKS_API_KEY: $(mask_key "${FIREWORKS_API_KEY:-}")" + echo " SFERENCE_API_KEY: $(mask_key "${SFERENCE_API_KEY:-}")" + echo " BG_PROVIDER_API_KEY: $(mask_key "${BG_PROVIDER_API_KEY:-}")" echo "" echo " Backends:" echo " deepclaude # DeepSeek V4 Pro (default)" echo " deepclaude -b or # OpenRouter (cheapest)" echo " deepclaude -b fw # Fireworks AI (fastest)" + echo " deepclaude -b sf -w 1h # sference background (Kimi-K2.6)" + echo " deepclaude -b bg # Custom background provider" echo " deepclaude -b anthropic # Normal Claude Code" echo " deepclaude --remote # Remote control + DeepSeek" - echo " deepclaude --remote -b or # Remote control + OpenRouter" echo "" local proxy_status proxy_status=$(curl -s http://127.0.0.1:3200/_proxy/status 2>/dev/null) || proxy_status="" @@ -116,17 +173,16 @@ show_status() { show_cost() { echo "" - echo " DeepSeek V4 Pro Pricing" - echo " =======================" + echo " Provider Pricing" + echo " ================" echo "" - echo " Provider Input/M Output/M Cache Hit/M" - echo " ---------- -------- -------- -----------" - echo " DeepSeek \$0.44 \$0.87 \$0.004" - echo " OpenRouter \$0.44 \$0.87 (provider)" - echo " Fireworks \$1.74 \$3.48 (provider)" - echo " Anthropic \$3.00 \$15.00 \$0.30" - echo "" - echo " Monthly estimate (heavy use, 25 days): \$30-80" + echo " Provider Input/M Output/M Notes" + echo " ---------- -------- -------- -----" + echo " DeepSeek \$0.44 \$0.87 sync + cache" + echo " OpenRouter \$0.44 \$0.87 sync" + echo " Fireworks \$1.74 \$3.48 sync" + echo " sference (varies) (varies) async bg, up to 75% off" + echo " Anthropic \$3.00 \$15.00 sync" echo "" } @@ -136,19 +192,24 @@ show_help() { echo "Usage: deepclaude [options] [-- claude-args...]" echo "" echo "Options:" - echo " -b, --backend Backend (default: ds)" - echo " -r, --remote Remote control mode (browser URL)" - echo " --status Show keys and backends" - echo " --cost Pricing comparison" - echo " --benchmark Latency test" - echo " -s, --switch Switch proxy mid-session" - echo " -h, --help This help" + echo " -b, --backend Backend (default: ds)" + echo " -w, --window <15m|1h|6h|24h|48h> Completion window (bg providers)" + echo " -r, --remote Remote control mode" + echo " --status Show keys and backends" + echo " --cost Pricing comparison" + echo " --benchmark Latency test" + echo " -s, --switch Switch proxy mid-session" + echo " -h, --help This help" echo "" echo "Environment variables:" - echo " DEEPSEEK_API_KEY DeepSeek API key (required for ds)" - echo " OPENROUTER_API_KEY OpenRouter API key (required for or)" - echo " FIREWORKS_API_KEY Fireworks API key (required for fw)" - echo " CHEAPCLAUDE_DEFAULT_BACKEND Default backend (default: ds)" + echo " DEEPSEEK_API_KEY DeepSeek API key" + echo " OPENROUTER_API_KEY OpenRouter API key" + echo " FIREWORKS_API_KEY Fireworks API key" + echo " SFERENCE_API_KEY sference API key" + echo " SFERENCE_MODEL Model (default: moonshotai/Kimi-K2.6)" + echo " SFERENCE_COMPLETION_WINDOW Window (default: 1h)" + echo " BG_PROVIDER_URL/API_KEY/MODEL Custom background provider" + echo " CHEAPCLAUDE_DEFAULT_BACKEND Default backend (default: ds)" } do_switch() { @@ -157,39 +218,63 @@ do_switch() { ds|deepseek) backend="deepseek" ;; or|openrouter) backend="openrouter" ;; fw|fireworks) backend="fireworks" ;; + sf|sference) backend="sference" ;; + bg) backend="bg" ;; anthropic) backend="anthropic" ;; - *) echo "ERROR: Unknown backend '$backend'. Use: ds, or, fw, anthropic" >&2; exit 1 ;; + *) echo "ERROR: Unknown backend '$backend'" >&2; exit 1 ;; esac local resp resp=$(curl -sX POST http://127.0.0.1:3200/_proxy/mode -d "backend=$backend" 2>/dev/null) || { echo " Proxy not running. Start with: deepclaude" >&2; exit 1 } echo " $resp" + if [[ -n "$COMPLETION_WINDOW" && ( "$backend" == "sference" || "$backend" == "bg" ) ]]; then + local wresp + wresp=$(curl -sX POST http://127.0.0.1:3200/_proxy/window -d "window=$COMPLETION_WINDOW" 2>/dev/null) || true + [[ -n "$wresp" ]] && echo " $wresp" + fi } run_benchmark() { + now_ms() { python3 -c 'import time; print(int(time.time()*1000))'; } + echo "" - echo " Latency Benchmark (1 request each)" - echo " ===================================" - for name in deepseek openrouter fireworks; do - local url="" key="" model="" + echo " Latency Benchmark" + echo " =================" + for name in deepseek openrouter fireworks sference; do + local url="" key="" model="" btype="anthropic-compatible" case "$name" in deepseek) url="$DEEPSEEK_URL"; key="${DEEPSEEK_API_KEY:-}"; model="deepseek-v4-pro" ;; openrouter) url="$OPENROUTER_URL"; key="${OPENROUTER_API_KEY:-}"; model="deepseek/deepseek-v4-pro" ;; fireworks) url="$FIREWORKS_URL"; key="${FIREWORKS_API_KEY:-}"; model="accounts/fireworks/models/deepseek-v4-pro" ;; + sference) url="$SFERENCE_URL"; key="${SFERENCE_API_KEY:-}"; model="$SFERENCE_MODEL"; btype="responses-bg" ;; esac if [[ -z "$key" ]]; then echo " $name: SKIP (no key)"; continue; fi - local start_ms=$(date +%s%3N 2>/dev/null || python3 -c 'import time;print(int(time.time()*1000))') - local status=$(curl -s -o /dev/null -w "%{http_code}" -X POST "$url/v1/messages" \ - -H "x-api-key: $key" -H "content-type: application/json" -H "anthropic-version: 2023-06-01" \ - -d "{\"model\":\"$model\",\"max_tokens\":32,\"messages\":[{\"role\":\"user\",\"content\":\"Reply: ok\"}]}" \ - --max-time 30 2>/dev/null || echo "timeout") - local end_ms=$(date +%s%3N 2>/dev/null || python3 -c 'import time;print(int(time.time()*1000))') - local elapsed=$((end_ms - start_ms)) - if [[ "$status" == "200" ]]; then - echo " $name: OK (${elapsed}ms)" + local start_ms=$(now_ms) + if [[ "$btype" == "responses-bg" ]]; then + local status=$(curl -s -o /dev/null -w "%{http_code}" -X POST "$url/v1/responses" \ + -H "X-API-Key: $key" -H "content-type: application/json" \ + -d "{\"model\":\"$model\",\"input\":\"Reply: ok\",\"background\":true,\"metadata\":{\"completion_window\":\"1h\"}}" \ + --max-time 30 2>/dev/null || echo "timeout") + local end_ms=$(now_ms) + local elapsed=$((end_ms - start_ms)) + if [[ "$status" == "200" || "$status" == "201" || "$status" == "202" ]]; then + echo " $name: submit OK (${elapsed}ms, async — completion time varies)" + else + echo " $name: FAIL ($status, ${elapsed}ms)" + fi else - echo " $name: FAIL ($status, ${elapsed}ms)" + local status=$(curl -s -o /dev/null -w "%{http_code}" -X POST "$url/v1/messages" \ + -H "x-api-key: $key" -H "content-type: application/json" -H "anthropic-version: 2023-06-01" \ + -d "{\"model\":\"$model\",\"max_tokens\":32,\"messages\":[{\"role\":\"user\",\"content\":\"Reply: ok\"}]}" \ + --max-time 30 2>/dev/null || echo "timeout") + local end_ms=$(now_ms) + local elapsed=$((end_ms - start_ms)) + if [[ "$status" == "200" ]]; then + echo " $name: OK (${elapsed}ms)" + else + echo " $name: FAIL ($status, ${elapsed}ms)" + fi fi done echo "" @@ -207,6 +292,18 @@ launch_claude() { resolve_backend + if [[ "$RESOLVED_TYPE" == "responses-bg" ]]; then + start_proxy + echo " Launching Claude Code via $BACKEND (background)..." + echo "" + export ANTHROPIC_BASE_URL="http://127.0.0.1:$PROXY_PORT" + set_model_env + unset ANTHROPIC_API_KEY ANTHROPIC_AUTH_TOKEN + # No exec: the EXIT trap must stay alive to stop the proxy on exit. + claude "$@" + return + fi + echo " Launching Claude Code via $BACKEND..." echo " Endpoint: $RESOLVED_URL" echo " Model: $RESOLVED_OPUS (main) + $RESOLVED_HAIKU (subagents)" @@ -231,35 +328,12 @@ launch_remote() { fi resolve_backend + start_proxy - echo " Starting model proxy for $BACKEND..." - - local port_file - port_file=$(mktemp) - node "$SCRIPT_DIR/proxy/start-proxy.js" "$RESOLVED_URL" "$RESOLVED_KEY" > "$port_file" & - PROXY_PID=$! - - local tries=0 - while [[ ! -s "$port_file" ]] && [[ $tries -lt 30 ]]; do - sleep 0.2 - tries=$((tries + 1)) - done - - if [[ ! -s "$port_file" ]]; then - echo "ERROR: Proxy failed to start" >&2 - rm -f "$port_file" - exit 1 - fi - - local proxy_port - proxy_port=$(head -1 "$port_file") - rm -f "$port_file" - - echo " Proxy on :$proxy_port -> $RESOLVED_URL" echo " Launching remote control via $BACKEND..." echo "" - export ANTHROPIC_BASE_URL="http://127.0.0.1:$proxy_port" + export ANTHROPIC_BASE_URL="http://127.0.0.1:$PROXY_PORT" set_model_env unset ANTHROPIC_API_KEY ANTHROPIC_AUTH_TOKEN diff --git a/proxy/README.md b/proxy/README.md index dc95e37..9cfcda0 100644 --- a/proxy/README.md +++ b/proxy/README.md @@ -1,42 +1,85 @@ -# Model Proxy for Remote Control +# Model Proxy -When using `claude remote-control`, the bridge authentication must go to Anthropic while model API calls go to DeepSeek. This proxy handles the split. +The proxy sits between Claude Code and inference backends. It handles two backend types: -## How it works +| Type | Behavior | +|---|---| +| `anthropic-compatible` | Passthrough `/v1/messages` to DeepSeek, OpenRouter, Fireworks, etc. | +| `responses-bg` | Translate Anthropic `/v1/messages` → async `POST /v1/responses` with `background: true`, poll, synthesize SSE | + +## Remote control split + +When using `claude remote-control`, the bridge authentication must go to Anthropic while model API calls go to the configured backend: ``` claude remote-control ├── Bridge WebSocket → wss://bridge.claudeusercontent.com (Anthropic, hardcoded) └── Model API calls → http://localhost:3200 (this proxy) - ├── /v1/messages → api.deepseek.com (with DeepSeek key) + ├── /v1/messages → active backend └── everything else → api.anthropic.com (passthrough) ``` +## Background provider flow (`responses-bg`) + +Used by sference and any provider implementing the OpenAI Responses API with `background: true`: + +``` +Claude Code → POST /v1/messages (Anthropic, stream=true) +Proxy → POST /v1/responses { background: true, metadata: { completion_window: "1h" } } +Proxy ← { id, status: "in_progress" } +Proxy → SSE pings every 25s (keeps Claude Code connection alive) +Proxy → poll GET /v1/responses/{id} until terminal +Proxy ← { status: "completed", output[], usage } +Proxy → synthesize Anthropic SSE (message_start … message_stop) +Client abort → DELETE /v1/responses/{id} +``` + +Tool calling uses auto mode: native OpenAI function tools first; if the provider rejects them (400), falls back to prompt-based tool emulation for the session. + +## Control endpoints + +| Endpoint | Method | Purpose | +|---|---|---| +| `/_proxy/status` | GET | Current mode, backend type, window, tool_mode, uptime | +| `/_proxy/mode` | POST | Switch backend: `backend=deepseek\|openrouter\|fireworks\|sference\|anthropic` | +| `/_proxy/window` | POST | Set completion window: `window=15m\|1h\|6h\|24h\|48h` | +| `/_proxy/cost` | GET | Token usage and cost tracking | + ## Usage ```javascript import { startModelProxy } from './model-proxy.js'; const proxy = await startModelProxy({ - targetUrl: 'https://api.deepseek.com/anthropic', - apiKey: process.env.DEEPSEEK_API_KEY, + targetUrl: 'https://api.sference.com', + apiKey: process.env.SFERENCE_API_KEY, + defaultMode: 'sference', + initialBackend: { + url: 'https://api.sference.com', + apiKey: process.env.SFERENCE_API_KEY, + type: 'responses-bg', + model: 'moonshotai/Kimi-K2.6', + window: '1h', + authStyle: 'x-api-key', + }, + backends: { /* all registered backends */ }, }); -console.log(`Proxy on port ${proxy.port}`); - -// Set env vars for claude remote-control: +// Claude Code env: // ANTHROPIC_BASE_URL=http://127.0.0.1:${proxy.port} -// ANTHROPIC_DEFAULT_OPUS_MODEL=deepseek-v4-pro -// (do NOT set ANTHROPIC_AUTH_TOKEN — OAuth handles bridge auth) +// (do NOT set ANTHROPIC_AUTH_TOKEN for remote control) +``` + +Or via CLI: -// When done: -proxy.close(); +```bash +SFERENCE_API_KEY=sk_... node proxy/start-proxy.js https://api.sference.com sk_... sference --window 1h ``` ## Why a proxy? Claude Code's remote control uses two separate channels: -1. **Bridge** (WebSocket to `wss://bridge.claudeusercontent.com`) — hardcoded, needs Anthropic OAuth +1. **Bridge** (WebSocket) — hardcoded Anthropic OAuth 2. **Model API** (HTTP to `ANTHROPIC_BASE_URL`) — configurable -Setting `ANTHROPIC_AUTH_TOKEN` to a DeepSeek key breaks the bridge. The proxy lets you keep Anthropic OAuth for the bridge while routing model calls to DeepSeek. +Background providers add a third concern: Claude Code can't submit async jobs or poll. The proxy hides the entire Responses API lifecycle behind familiar Anthropic streaming. diff --git a/proxy/model-proxy.js b/proxy/model-proxy.js index 85a9295..3690119 100644 --- a/proxy/model-proxy.js +++ b/proxy/model-proxy.js @@ -1,11 +1,21 @@ -import { createServer } from 'http'; +import { createServer, request as httpRequest } from 'http'; import { request as httpsRequest } from 'https'; import { URL } from 'url'; import { Transform } from 'stream'; +import { + buildResponsesRequest, + parseResult, + synthesizeSSE, + ssePing, + isTerminalStatus, + pollIntervalForWindow, + toolsErrorIndicatesUnsupported, +} from './responses-adapter.js'; const ANTHROPIC_FALLBACK = 'https://api.anthropic.com'; const MODEL_PATHS = ['/v1/messages']; -const REQUEST_TIMEOUT_MS = 5 * 60 * 1000; // 5 min per request +const REQUEST_TIMEOUT_MS = 5 * 60 * 1000; +const PING_INTERVAL_MS = 25_000; const MODEL_REMAP = { deepseek: { @@ -24,19 +34,22 @@ const MODEL_REMAP = { }, }; -const PRICING_PER_M = { - deepseek: { input: 0.44, output: 0.87 }, - openrouter: { input: 0.44, output: 0.87 }, - fireworks: { input: 1.74, output: 3.48 }, - anthropic: { input: 3.00, output: 15.00 }, - _single: { input: 0.44, output: 0.87 }, -}; +function loadPricing() { + const sIn = parseFloat(process.env.SFERENCE_INPUT_PRICE || '0'); + const sOut = parseFloat(process.env.SFERENCE_OUTPUT_PRICE || '0'); + const bgIn = parseFloat(process.env.BG_PROVIDER_INPUT_PRICE || '0'); + const bgOut = parseFloat(process.env.BG_PROVIDER_OUTPUT_PRICE || '0'); + return { + deepseek: { input: 0.44, output: 0.87 }, + openrouter: { input: 0.44, output: 0.87 }, + fireworks: { input: 1.74, output: 3.48 }, + anthropic: { input: 3.00, output: 15.00 }, + sference: { input: sIn, output: sOut }, + bg: { input: bgIn, output: bgOut }, + _single: { input: 0.44, output: 0.87 }, + }; +} -/** - * Transform stream that intercepts SSE events and injects missing `usage` - * fields. DeepSeek/OpenRouter may omit `usage` in message_start or - * message_delta, which crashes Claude Code ("$.input_tokens" is undefined). - */ class UsageNormalizer extends Transform { constructor(onUsage) { super(); @@ -79,7 +92,7 @@ class UsageNormalizer extends Transform { } } if (changed) return event.replace(m[1], () => JSON.stringify(d)); - } catch { /* not JSON, pass through */ } + } catch { /* not JSON */ } return event; } @@ -90,9 +103,6 @@ class UsageNormalizer extends Transform { } } -/** - * For non-streaming JSON responses, ensure `usage` exists. - */ function normalizeJsonBody(buf) { try { const obj = JSON.parse(buf); @@ -122,36 +132,141 @@ function stripUnsignedThinkingBlocks(body) { } } -export function startModelProxy({ targetUrl, apiKey, startPort = 3200, backends, defaultMode }) { +function normalizeBackendConfig(name, cfg) { + const url = cfg.url || cfg.target; + const useBearer = cfg.authStyle === 'bearer' + || (cfg.authStyle !== 'x-api-key' && (url.includes('openrouter') || url.includes('fireworks'))); + return { + name, + target: new URL(url), + apiKey: cfg.apiKey, + type: cfg.type || 'anthropic-compatible', + useBearer, + model: cfg.model || null, + window: cfg.window || null, + pricing: cfg.pricing || null, + }; +} + +function authHeaders(backend) { + const h = { 'content-type': 'application/json' }; + if (!backend.apiKey) return h; + if (backend.useBearer) { + h.authorization = `Bearer ${backend.apiKey}`; + } else { + h['x-api-key'] = backend.apiKey; + } + return h; +} + +function responsesPath(baseUrl) { + const base = baseUrl.pathname.replace(/\/$/, ''); + return base.endsWith('/v1') ? `${base}/responses` : `${base}/v1/responses`; +} + +function httpsJson(method, url, headers, body) { return new Promise((resolve, reject) => { - const initialTarget = new URL(targetUrl); - const initialBearer = targetUrl.includes('openrouter') || targetUrl.includes('fireworks'); + const u = new URL(url); + const isHttp = u.protocol === 'http:'; + const requestFn = isHttp ? httpRequest : httpsRequest; + const payload = body != null ? JSON.stringify(body) : undefined; + const opts = { + hostname: u.hostname, + port: u.port || (isHttp ? 80 : 443), + path: u.pathname + u.search, + method, + headers: { + ...headers, + ...(payload ? { 'content-length': Buffer.byteLength(payload) } : {}), + }, + timeout: REQUEST_TIMEOUT_MS, + }; + + const req = requestFn(opts, (res) => { + const chunks = []; + res.on('data', c => chunks.push(c)); + res.on('end', () => { + const raw = Buffer.concat(chunks).toString(); + let json = null; + try { json = raw ? JSON.parse(raw) : null; } catch { json = { _raw: raw }; } + resolve({ status: res.statusCode, json, raw }); + }); + }); + req.on('timeout', () => req.destroy(new Error('Request timeout'))); + req.on('error', reject); + if (payload) req.end(payload); + else req.end(); + }); +} + +function applyBackendToState(state, backend) { + state.mode = backend.name; + state.target = backend.target; + state.apiKey = backend.apiKey; + state.useBearer = backend.useBearer; + state.backendType = backend.type; + state.model = backend.model; + state.window = backend.window; + if (backend.type !== 'anthropic-compatible') state.hadNonAnthropicSession = true; +} + +export function startModelProxy({ targetUrl, apiKey, startPort = 3200, backends, defaultMode, initialBackend }) { + return new Promise((resolve, reject) => { + const PRICING_PER_M = loadPricing(); const allBackends = {}; if (backends) { for (const [name, cfg] of Object.entries(backends)) { - allBackends[name] = { - target: new URL(cfg.url), - apiKey: cfg.apiKey, - useBearer: cfg.url.includes('openrouter') || cfg.url.includes('fireworks'), - }; + if (cfg?.url || cfg?.target) { + allBackends[name] = normalizeBackendConfig(name, cfg); + } } } - const initialName = defaultMode || (backends ? 'anthropic' : null); + + if (defaultMode && initialBackend && !allBackends[defaultMode]) { + allBackends[defaultMode] = normalizeBackendConfig(defaultMode, initialBackend); + } + + let singleBackend = null; + if (initialBackend) { + singleBackend = normalizeBackendConfig('_single', initialBackend); + } else if (targetUrl && apiKey) { + singleBackend = normalizeBackendConfig('_single', { + url: targetUrl, + apiKey, + type: 'anthropic-compatible', + }); + } + + const initialName = defaultMode || (Object.keys(allBackends).length ? 'anthropic' : null); const startBackend = initialName && initialName !== 'anthropic' && allBackends[initialName]; const state = { mode: initialName || '_single', - target: startBackend ? startBackend.target : initialTarget, - apiKey: startBackend ? startBackend.apiKey : apiKey, - useBearer: startBackend ? startBackend.useBearer : initialBearer, - hadNonAnthropicSession: !!startBackend, + target: startBackend ? startBackend.target : (singleBackend?.target || new URL(targetUrl || ANTHROPIC_FALLBACK)), + apiKey: startBackend ? startBackend.apiKey : (singleBackend?.apiKey || apiKey), + useBearer: startBackend ? startBackend.useBearer : (singleBackend?.useBearer || false), + backendType: startBackend ? startBackend.type : (singleBackend?.type || 'anthropic-compatible'), + model: startBackend?.model || singleBackend?.model || null, + window: startBackend?.window || singleBackend?.window || null, + hadNonAnthropicSession: !!(startBackend || (singleBackend?.type === 'responses-bg')), }; + const toolModes = {}; let reqCount = 0; const t0Global = Date.now(); const costs = {}; + function activeBackend() { + if (state.mode === 'anthropic') return null; + if (state.mode === '_single') return singleBackend; + return allBackends[state.mode] || null; + } + + function getToolMode(name) { + return toolModes[name] || 'native'; + } + function recordUsage(backend, inputTokens, outputTokens) { if (!costs[backend]) costs[backend] = { input: 0, output: 0, requests: 0 }; costs[backend].input += inputTokens || 0; @@ -164,7 +279,8 @@ export function startModelProxy({ targetUrl, apiKey, startPort = 3200, backends, let totalActual = 0; let totalAnthropic = 0; for (const [backend, tokens] of Object.entries(costs)) { - const p = PRICING_PER_M[backend] || PRICING_PER_M._single; + const b = allBackends[backend]; + const p = b?.pricing || PRICING_PER_M[backend] || PRICING_PER_M._single; const ap = PRICING_PER_M.anthropic; const actual = (tokens.input * p.input + tokens.output * p.output) / 1_000_000; const anthropicEq = (tokens.input * ap.input + tokens.output * ap.output) / 1_000_000; @@ -193,29 +309,228 @@ export function startModelProxy({ targetUrl, apiKey, startPort = 3200, backends, state.target = new URL(ANTHROPIC_FALLBACK); state.apiKey = null; state.useBearer = false; + state.backendType = 'anthropic-compatible'; + state.model = null; return { mode: 'anthropic', previous: prev }; } const b = allBackends[name]; if (!b) return { error: `Unknown backend: ${name}. Valid: anthropic, ${Object.keys(allBackends).join(', ')}` }; if (!b.apiKey) return { error: `API key not set for ${name}` }; const prev = state.mode; - state.mode = name; - state.target = b.target; - state.apiKey = b.apiKey; - state.useBearer = b.useBearer; - state.hadNonAnthropicSession = true; - return { mode: name, previous: prev }; + applyBackendToState(state, b); + return { mode: name, previous: prev, type: b.type, window: b.window }; + } + + function setWindow(window) { + const prev = state.window; + state.window = window; + const b = activeBackend(); + if (b) b.window = window; + return { window, previous: prev }; + } + + async function submitBackgroundRequest(backend, anthropicBody, toolMode, reqId) { + const base = `${backend.target.protocol}//${backend.target.host}`; + const createUrl = `${base}${responsesPath(backend.target)}`; + const headers = authHeaders(backend); + + const body = buildResponsesRequest(anthropicBody, { + model: backend.model || anthropicBody.model, + window: backend.window || state.window, + toolMode, + }); + + console.error(`[MODEL-PROXY] #${reqId} bg submit (${toolMode}) → ${createUrl}`); + let res = await httpsJson('POST', createUrl, headers, body); + + if (res.status === 400 && toolMode === 'native' && anthropicBody.tools?.length + && toolsErrorIndicatesUnsupported(res.raw || res.json)) { + console.error(`[MODEL-PROXY] #${reqId} tools rejected, falling back to emulate`); + toolModes[backend.name || state.mode] = 'emulate'; + const emulated = buildResponsesRequest(anthropicBody, { + model: backend.model || anthropicBody.model, + window: backend.window || state.window, + toolMode: 'emulate', + }); + res = await httpsJson('POST', createUrl, headers, emulated); + } else if (res.status >= 200 && res.status < 300 && toolMode === 'native') { + toolModes[backend.name || state.mode] = 'native'; + } + + return res; + } + + async function pollBackgroundResponse(backend, responseId, reqId, onPing, signal) { + const base = `${backend.target.protocol}//${backend.target.host}`; + const pollUrl = `${base}${responsesPath(backend.target)}/${responseId}`; + const headers = authHeaders(backend); + const interval = pollIntervalForWindow(backend.window || state.window || '1h'); + + while (!signal.aborted) { + const res = await httpsJson('GET', pollUrl, headers); + if (res.status !== 200) { + throw new Error(`Poll failed (${res.status}): ${res.raw?.slice(0, 200)}`); + } + const status = res.json?.status; + console.error(`[MODEL-PROXY] #${reqId} poll status=${status}`); + if (isTerminalStatus(status)) return res.json; + await new Promise(r => setTimeout(r, interval)); + if (onPing) onPing(); + } + throw new Error('Poll aborted'); + } + + async function cancelBackgroundResponse(backend, responseId) { + const base = `${backend.target.protocol}//${backend.target.host}`; + const url = `${base}${responsesPath(backend.target)}/${responseId}`; + try { + await httpsJson('DELETE', url, authHeaders(backend)); + console.error(`[MODEL-PROXY] cancelled bg response ${responseId}`); + } catch (e) { + console.error(`[MODEL-PROXY] cancel failed: ${e.message}`); + } + } + + async function handleResponsesBg(clientReq, clientRes, bodyBuf, reqId, t0) { + const backend = activeBackend(); + if (!backend) { + clientRes.writeHead(502, { 'content-type': 'application/json' }); + clientRes.end(JSON.stringify({ error: { message: 'No active background backend' } })); + return; + } + + let anthropicBody; + try { + anthropicBody = JSON.parse(bodyBuf); + stripAllThinkingBlocks(anthropicBody); + } catch { + clientRes.writeHead(400, { 'content-type': 'application/json' }); + clientRes.end(JSON.stringify({ error: { message: 'Invalid JSON body' } })); + return; + } + + // Anthropic semantics: stream only when explicitly requested. + const wantsStream = anthropicBody.stream === true; + const usageKey = state.mode; + const toolMode = getToolMode(backend.name || state.mode); + const abort = { aborted: false }; + let responseId = null; + let pingTimer = null; + + const canWrite = () => !abort.aborted && !clientRes.writableEnded && !clientRes.destroyed; + + clientRes.on('close', () => { + if (!clientRes.writableEnded) { + abort.aborted = true; + if (pingTimer) clearInterval(pingTimer); + if (responseId) cancelBackgroundResponse(backend, responseId); + } + }); + + try { + const submit = await submitBackgroundRequest(backend, anthropicBody, toolMode, reqId); + if (submit.status < 200 || submit.status >= 300) { + clientRes.writeHead(submit.status, { 'content-type': 'application/json' }); + clientRes.end(submit.raw || JSON.stringify(submit.json)); + return; + } + + responseId = submit.json?.id; + if (!responseId) { + clientRes.writeHead(502, { 'content-type': 'application/json' }); + clientRes.end(JSON.stringify({ error: { message: 'No response id from provider' } })); + return; + } + + if (wantsStream) { + clientRes.writeHead(200, { + 'content-type': 'text/event-stream', + 'cache-control': 'no-cache', + connection: 'keep-alive', + }); + clientRes.write(ssePing()); + pingTimer = setInterval(() => { + if (canWrite()) clientRes.write(ssePing()); + }, PING_INTERVAL_MS); + } + + const result = await pollBackgroundResponse(backend, responseId, reqId, () => { + if (wantsStream && canWrite()) clientRes.write(ssePing()); + }, abort); + + if (abort.aborted) return; + if (pingTimer) clearInterval(pingTimer); + + const mode = getToolMode(backend.name || state.mode); + const parsed = parseResult(result, mode); + recordUsage(usageKey, parsed.usage?.input_tokens, parsed.usage?.output_tokens); + + if (wantsStream) { + clientRes.end(synthesizeSSE(parsed)); + } else if (parsed.error) { + clientRes.writeHead(502, { 'content-type': 'application/json' }); + clientRes.end(JSON.stringify({ type: 'error', error: { type: 'api_error', message: parsed.message } })); + } else { + clientRes.writeHead(200, { 'content-type': 'application/json' }); + clientRes.end(JSON.stringify({ + id: parsed.id, + type: 'message', + role: 'assistant', + model: parsed.model, + content: parsed.content, + stop_reason: parsed.stop_reason, + stop_sequence: null, + usage: parsed.usage, + })); + } + + console.error(`[MODEL-PROXY] #${reqId} bg done in ${((Date.now() - t0) / 1000).toFixed(1)}s`); + } catch (err) { + if (pingTimer) clearInterval(pingTimer); + console.error(`[MODEL-PROXY] #${reqId} bg ERROR: ${err.message}`); + if (abort.aborted || clientRes.destroyed) return; + if (clientRes.headersSent) { + // Headers already sent as SSE — emit an Anthropic error event. + if (!clientRes.writableEnded) { + clientRes.end(`event: error\ndata: ${JSON.stringify({ type: 'error', error: { type: 'api_error', message: err.message } })}\n\n`); + } + } else { + clientRes.writeHead(502, { 'content-type': 'application/json' }); + clientRes.end(JSON.stringify({ type: 'error', error: { type: 'api_error', message: err.message } })); + } + } + } + + // CSRF guard: browser pages must not be able to flip the backend. + function isForbiddenOrigin(clientReq) { + const origin = clientReq.headers['origin'] || ''; + return !!origin && !origin.startsWith('http://127.0.0.1') && !origin.startsWith('http://localhost'); + } + + function readControlBody(clientReq, maxSize, onDone) { + const chunks = []; + let bodySize = 0; + clientReq.on('data', c => { + bodySize += c.length; + if (bodySize > maxSize) { clientReq.destroy(); return; } + chunks.push(c); + }); + clientReq.on('end', () => onDone(Buffer.concat(chunks).toString())); } const server = createServer((clientReq, clientRes) => { const urlPath = clientReq.url.split('?')[0]; - // Control endpoints — /_proxy/* (never collides with /v1/*) if (urlPath.startsWith('/_proxy/')) { if (urlPath === '/_proxy/status') { + const b = activeBackend(); clientRes.writeHead(200, { 'content-type': 'application/json' }); clientRes.end(JSON.stringify({ mode: state.mode, + backend_type: state.backendType, + window: state.window, + tool_mode: getToolMode(state.mode), + model: state.model || b?.model, uptime: Math.round((Date.now() - t0Global) / 1000), requests: reqCount, })); @@ -227,21 +542,12 @@ export function startModelProxy({ targetUrl, apiKey, startPort = 3200, backends, return; } if (urlPath === '/_proxy/mode' && clientReq.method === 'POST') { - const origin = clientReq.headers['origin'] || ''; - if (origin && !origin.startsWith('http://127.0.0.1') && !origin.startsWith('http://localhost')) { + if (isForbiddenOrigin(clientReq)) { clientRes.writeHead(403, { 'content-type': 'application/json' }); clientRes.end(JSON.stringify({ error: 'Forbidden' })); return; } - const chunks = []; - let bodySize = 0; - clientReq.on('data', c => { - bodySize += c.length; - if (bodySize > 1024) { clientReq.destroy(); return; } - chunks.push(c); - }); - clientReq.on('end', () => { - const body = Buffer.concat(chunks).toString(); + readControlBody(clientReq, 1024, (body) => { const m = body.match(/backend=([a-z]+)/); if (!m) { clientRes.writeHead(400, { 'content-type': 'application/json' }); @@ -254,13 +560,33 @@ export function startModelProxy({ targetUrl, apiKey, startPort = 3200, backends, clientRes.end(JSON.stringify(result)); return; } - console.log(`[MODEL-PROXY] Mode switched: ${result.previous} → ${result.mode}`); + console.error(`[MODEL-PROXY] Mode switched: ${result.previous} → ${result.mode}`); clientRes.writeHead(200, { 'content-type': 'application/json' }); clientRes.end(JSON.stringify(result)); }); return; } - if (urlPath === '/_proxy/mode' && clientReq.method !== 'POST') { + if (urlPath === '/_proxy/window' && clientReq.method === 'POST') { + if (isForbiddenOrigin(clientReq)) { + clientRes.writeHead(403, { 'content-type': 'application/json' }); + clientRes.end(JSON.stringify({ error: 'Forbidden' })); + return; + } + readControlBody(clientReq, 1024, (body) => { + const m = body.match(/window=([0-9a-z]+)/); + if (!m) { + clientRes.writeHead(400, { 'content-type': 'application/json' }); + clientRes.end(JSON.stringify({ error: 'Missing window= in body (15m|1h|6h|24h|48h)' })); + return; + } + const result = setWindow(m[1]); + console.error(`[MODEL-PROXY] Window: ${result.previous} → ${result.window}`); + clientRes.writeHead(200, { 'content-type': 'application/json' }); + clientRes.end(JSON.stringify(result)); + }); + return; + } + if ((urlPath === '/_proxy/mode' || urlPath === '/_proxy/window') && clientReq.method !== 'POST') { clientRes.writeHead(405, { 'content-type': 'application/json' }); clientRes.end(JSON.stringify({ error: 'Use POST' })); return; @@ -270,16 +596,13 @@ export function startModelProxy({ targetUrl, apiKey, startPort = 3200, backends, return; } - // In anthropic mode, everything passes through transparently const isAnthropicMode = state.mode === 'anthropic'; const isModelCall = !isAnthropicMode && MODEL_PATHS.includes(urlPath); - const dest = isModelCall ? state.target : new URL(ANTHROPIC_FALLBACK); + const isBgBackend = state.backendType === 'responses-bg'; + const dest = isModelCall && !isBgBackend ? state.target : new URL(ANTHROPIC_FALLBACK); - // Build upstream path. target.pathname may overlap with - // clientReq.url (e.g. OpenRouter /api/v1 + /v1/messages). - // Strip the shared prefix to avoid /api/v1/v1/messages. let fullPath; - if (isModelCall) { + if (isModelCall && !isBgBackend) { const base = state.target.pathname.replace(/\/$/, ''); let overlap = ''; for (let i = 1; i <= Math.min(base.length, urlPath.length); i++) { @@ -293,63 +616,62 @@ export function startModelProxy({ targetUrl, apiKey, startPort = 3200, backends, const reqId = ++reqCount; const t0 = Date.now(); - if (isModelCall) { - console.log(`[MODEL-PROXY] #${reqId} → ${dest.hostname}${fullPath}`); - } + const chunks = []; + clientReq.on('data', c => chunks.push(c)); + clientReq.on('end', () => { + const body = Buffer.concat(chunks); - const headers = { ...clientReq.headers, host: dest.host }; - delete headers['content-length']; + if (isBgBackend && isModelCall && clientReq.method === 'POST') { + handleResponsesBg(clientReq, clientRes, body, reqId, t0); + return; + } - if (isModelCall) { - delete headers['authorization']; - delete headers['x-api-key']; - if (state.useBearer) { - headers['authorization'] = `Bearer ${state.apiKey}`; - } else { - headers['x-api-key'] = state.apiKey; + let bodyMut = body; + if (isModelCall) { + console.error(`[MODEL-PROXY] #${reqId} → ${dest.hostname}${fullPath}`); } - } - const chunks = []; - clientReq.on('data', c => chunks.push(c)); - clientReq.on('end', () => { - let body = Buffer.concat(chunks); + const headers = { ...clientReq.headers, host: dest.host }; + delete headers['content-length']; + + if (isModelCall) { + delete headers['authorization']; + delete headers['x-api-key']; + if (state.useBearer) { + headers['authorization'] = `Bearer ${state.apiKey}`; + } else { + headers['x-api-key'] = state.apiKey; + } + } - // Remap Anthropic model names to backend-specific names if (isModelCall && MODEL_REMAP[state.mode]) { try { - const parsed = JSON.parse(body); + const parsed = JSON.parse(bodyMut); const mapped = MODEL_REMAP[state.mode][parsed.model]; if (mapped) { - console.log(`[MODEL-PROXY] #${reqId} model remap: ${parsed.model} → ${mapped}`); + console.error(`[MODEL-PROXY] #${reqId} model remap: ${parsed.model} → ${mapped}`); parsed.model = mapped; - body = Buffer.from(JSON.stringify(parsed)); + bodyMut = Buffer.from(JSON.stringify(parsed)); } - } catch { /* not JSON or parse error, pass through */ } + } catch { /* pass through */ } } - // Strip thinking blocks before forwarding. - // Non-Anthropic: strip ALL blocks — backends reject thinking blocks - // they didn't generate, even unsigned ones. - // Anthropic after a non-Anthropic session: also strip ALL, because - // foreign backends generate signed-but-invalid thinking blocks that - // stripUnsignedThinkingBlocks passes through, causing Anthropic 400s. if (isAnthropicMode && MODEL_PATHS.includes(urlPath)) { try { - const parsed = JSON.parse(body); + const parsed = JSON.parse(bodyMut); if (state.hadNonAnthropicSession) { stripAllThinkingBlocks(parsed); } else { stripUnsignedThinkingBlocks(parsed); } - body = Buffer.from(JSON.stringify(parsed)); + bodyMut = Buffer.from(JSON.stringify(parsed)); } catch { /* pass through */ } } - if (isModelCall) { + if (isModelCall && !isBgBackend) { try { - const parsed = JSON.parse(body); + const parsed = JSON.parse(bodyMut); stripAllThinkingBlocks(parsed); - body = Buffer.from(JSON.stringify(parsed)); + bodyMut = Buffer.from(JSON.stringify(parsed)); } catch { /* pass through */ } } @@ -358,14 +680,14 @@ export function startModelProxy({ targetUrl, apiKey, startPort = 3200, backends, port: dest.port || 443, path: fullPath, method: clientReq.method, - headers: { ...headers, 'content-length': body.length }, + headers: { ...headers, 'content-length': bodyMut.length }, timeout: REQUEST_TIMEOUT_MS, }; const proxyReq = httpsRequest(opts, (proxyRes) => { if (isModelCall) { const ttfb = Date.now() - t0; - console.log(`[MODEL-PROXY] #${reqId} TTFB ${ttfb}ms (status ${proxyRes.statusCode})`); + console.error(`[MODEL-PROXY] #${reqId} TTFB ${ttfb}ms (status ${proxyRes.statusCode})`); } const ct = proxyRes.headers['content-type'] || ''; @@ -376,7 +698,7 @@ export function startModelProxy({ targetUrl, apiKey, startPort = 3200, backends, const norm = new UsageNormalizer((inp, out) => recordUsage(state.mode, inp, out)); proxyRes.pipe(norm).pipe(clientRes); proxyRes.on('end', () => { - console.log(`[MODEL-PROXY] #${reqId} done in ${((Date.now() - t0) / 1000).toFixed(1)}s (${norm._inputTokens}in/${norm._outputTokens}out)`); + console.error(`[MODEL-PROXY] #${reqId} done in ${((Date.now() - t0) / 1000).toFixed(1)}s (${norm._inputTokens}in/${norm._outputTokens}out)`); }); } else if (isModelCall && ct.includes('application/json')) { const respChunks = []; @@ -391,15 +713,14 @@ export function startModelProxy({ targetUrl, apiKey, startPort = 3200, backends, const outHeaders = { ...proxyRes.headers, 'content-length': fixed.length }; clientRes.writeHead(proxyRes.statusCode, outHeaders); clientRes.end(fixed); - console.log(`[MODEL-PROXY] #${reqId} done in ${((Date.now() - t0) / 1000).toFixed(1)}s (json, ${fixed.length}b)`); + console.error(`[MODEL-PROXY] #${reqId} done in ${((Date.now() - t0) / 1000).toFixed(1)}s (json, ${fixed.length}b)`); }); } else { - // Non-model or unknown content-type: pass through clientRes.writeHead(proxyRes.statusCode, proxyRes.headers); proxyRes.pipe(clientRes); if (isModelCall) { proxyRes.on('end', () => { - console.log(`[MODEL-PROXY] #${reqId} done in ${((Date.now() - t0) / 1000).toFixed(1)}s`); + console.error(`[MODEL-PROXY] #${reqId} done in ${((Date.now() - t0) / 1000).toFixed(1)}s`); }); } } @@ -419,7 +740,7 @@ export function startModelProxy({ targetUrl, apiKey, startPort = 3200, backends, clientRes.end(JSON.stringify({ error: { message: 'Upstream connection error' } })); }); - proxyReq.end(body); + proxyReq.end(bodyMut); }); }); @@ -433,7 +754,7 @@ export function startModelProxy({ targetUrl, apiKey, startPort = 3200, backends, }); server.listen(port, '127.0.0.1', () => { const actualPort = server.address().port; - console.log(`[MODEL-PROXY] Listening on 127.0.0.1:${actualPort} → ${targetUrl} (mode: ${state.mode})`); + console.error(`[MODEL-PROXY] Listening on 127.0.0.1:${actualPort} (mode: ${state.mode}, type: ${state.backendType})`); resolve({ port: actualPort, close: () => server.close(), switchMode }); }); } diff --git a/proxy/responses-adapter.js b/proxy/responses-adapter.js new file mode 100644 index 0000000..4affb8b --- /dev/null +++ b/proxy/responses-adapter.js @@ -0,0 +1,312 @@ +import { randomUUID } from 'crypto'; + +const TOOL_CALL_RE = /```tool_call\s*\n([\s\S]*?)\n```/g; +const TERMINAL_STATUSES = new Set(['completed', 'failed', 'cancelled', 'expired', 'incomplete']); + +/** + * Convert Anthropic Messages API body → OpenAI Responses API body. + * @param {object} anthropicBody + * @param {{ model: string, window?: string, toolMode: 'native'|'emulate'|'none' }} opts + */ +export function buildResponsesRequest(anthropicBody, { model, window, toolMode = 'native' }) { + const req = { + model: model || anthropicBody.model, + background: true, + max_output_tokens: anthropicBody.max_tokens, + }; + + if (anthropicBody.temperature != null) req.temperature = anthropicBody.temperature; + if (anthropicBody.top_p != null) req.top_p = anthropicBody.top_p; + + if (window) { + req.metadata = { completion_window: window }; + } + + let instructions = ''; + if (anthropicBody.system) { + instructions = flattenSystem(anthropicBody.system); + } + + if (anthropicBody.thinking?.type === 'enabled') { + req.enable_thinking = true; + req.include_reasoning = true; + } + + const input = []; + for (const msg of anthropicBody.messages || []) { + const role = msg.role === 'assistant' ? 'assistant' : 'user'; + const text = flattenMessageContent(msg.content, msg.role); + if (!text.trim()) continue; + input.push({ role, content: text }); + } + req.input = input.length === 1 && input[0].role === 'user' ? input[0].content : input; + + if (toolMode === 'native' && anthropicBody.tools?.length) { + req.tools = anthropicBody.tools.map(t => ({ + type: 'function', + name: t.name, + description: t.description || '', + parameters: t.input_schema || { type: 'object', properties: {} }, + })); + } else if (toolMode === 'emulate' && anthropicBody.tools?.length) { + instructions = appendToolEmulationPrompt(instructions, anthropicBody.tools); + } + + if (instructions) req.instructions = instructions; + return req; +} + +function flattenSystem(system) { + if (typeof system === 'string') return system; + if (!Array.isArray(system)) return ''; + return system + .filter(b => b.type === 'text') + .map(b => b.text) + .join('\n\n'); +} + +function flattenMessageContent(content, role) { + if (typeof content === 'string') return content; + if (!Array.isArray(content)) return ''; + + const parts = []; + for (const block of content) { + switch (block.type) { + case 'text': + parts.push(block.text); + break; + case 'thinking': + if (block.thinking) parts.push(`[thinking]\n${block.thinking}`); + break; + case 'tool_use': + parts.push(`[tool_use name=${block.name} id=${block.id}]\n${JSON.stringify(block.input)}`); + break; + case 'tool_result': + parts.push(`[tool_result tool_use_id=${block.tool_use_id}]\n${formatToolResult(block.content)}`); + break; + case 'image': + parts.push('[image omitted — background providers do not support vision]'); + break; + default: + if (block.text) parts.push(block.text); + } + } + return parts.join('\n\n'); +} + +function formatToolResult(content) { + if (typeof content === 'string') return content; + if (!Array.isArray(content)) return JSON.stringify(content); + return content + .map(c => (c.type === 'text' ? c.text : JSON.stringify(c))) + .join('\n'); +} + +function appendToolEmulationPrompt(instructions, tools) { + const schemas = tools.map(t => + `- ${t.name}: ${t.description || ''}\n schema: ${JSON.stringify(t.input_schema || {})}` + ).join('\n'); + + const emulation = [ + '', + '## Tool calling (emulated)', + 'You have access to these tools:', + schemas, + '', + 'When you need to call a tool, respond with ONLY a fenced block:', + '```tool_call', + '{"name": "", "input": {}, "id": "toolu_"}', + '```', + 'Do not include other text when calling a tool.', + ].join('\n'); + + return instructions ? `${instructions}\n${emulation}` : emulation; +} + +/** + * Parse a completed Responses API result into Anthropic message shape. + */ +export function parseResult(response, toolMode = 'native') { + const usage = { + input_tokens: response.usage?.input_tokens || 0, + output_tokens: response.usage?.output_tokens || 0, + }; + + // 'incomplete' (e.g. max_output_tokens hit) may still carry partial output. + const failedStatuses = ['failed', 'cancelled', 'expired']; + if (response.error || failedStatuses.includes(response.status)) { + return { + error: true, + message: response.error?.message || `Response ${response.status}`, + usage, + }; + } + + const content = []; + let hasToolUse = false; + + for (const item of response.output || []) { + if (item.type === 'reasoning') { + const text = item.summary?.map(s => s.text).join('\n') || item.content || ''; + if (text) content.push({ type: 'thinking', thinking: text }); + continue; + } + if (item.type === 'message') { + for (const block of item.content || []) { + if (block.type === 'output_text' && block.text) { + const parsed = extractToolCallsFromText(block.text, toolMode); + if (parsed.toolUses.length) { + content.push(...parsed.toolUses); + hasToolUse = true; + if (parsed.remainingText) { + content.push({ type: 'text', text: parsed.remainingText }); + } + } else { + content.push({ type: 'text', text: block.text }); + } + } + } + continue; + } + if (item.type === 'function_call' || item.type === 'tool_call') { + let input = item.input || item.arguments || {}; + if (typeof item.arguments === 'string') { + try { input = JSON.parse(item.arguments); } catch { input = { _raw: item.arguments }; } + } + hasToolUse = true; + content.push({ + type: 'tool_use', + id: item.call_id || item.id || `toolu_${randomUUID().replace(/-/g, '').slice(0, 24)}`, + name: item.name, + input, + }); + } + } + + const stopReason = hasToolUse ? 'tool_use' : 'end_turn'; + return { + id: response.id || `msg_${randomUUID().replace(/-/g, '').slice(0, 24)}`, + type: 'message', + role: 'assistant', + model: response.model || 'unknown', + content: content.length ? content : [{ type: 'text', text: '' }], + stop_reason: stopReason, + usage, + }; +} + +function extractToolCallsFromText(text, toolMode) { + const toolUses = []; + let remaining = text; + + if (toolMode === 'emulate' || toolMode === 'native') { + remaining = text.replace(TOOL_CALL_RE, (_, jsonStr) => { + try { + const obj = JSON.parse(jsonStr.trim()); + toolUses.push({ + type: 'tool_use', + id: obj.id || `toolu_${randomUUID().replace(/-/g, '').slice(0, 24)}`, + name: obj.name, + input: obj.input || {}, + }); + } catch { /* keep in text */ } + return ''; + }).trim(); + } + + return { toolUses, remainingText: remaining }; +} + +export function isTerminalStatus(status) { + return TERMINAL_STATUSES.has(status); +} + +export function pollIntervalForWindow(window) { + switch (window) { + case '15m': return 5_000; + case '1h': return 15_000; + case '6h': + case '24h': + case '48h': return 60_000; + default: return 15_000; + } +} + +/** + * Synthesize Anthropic SSE event stream from a parsed message. + * @returns {string} full SSE body + */ +export function synthesizeSSE(message) { + if (message.error) { + return sseEvent({ type: 'error', error: { type: 'api_error', message: message.message } }); + } + + const events = []; + const msgId = message.id; + const model = message.model; + const usage = message.usage || { input_tokens: 0, output_tokens: 0 }; + + events.push(sseEvent({ + type: 'message_start', + message: { + id: msgId, + type: 'message', + role: 'assistant', + model, + content: [], + stop_reason: null, + usage: { input_tokens: usage.input_tokens, output_tokens: 0 }, + }, + })); + + let blockIndex = 0; + for (const block of message.content) { + if (block.type === 'thinking') { + events.push(sseEvent({ type: 'content_block_start', index: blockIndex, content_block: { type: 'thinking', thinking: '' } })); + events.push(sseEvent({ type: 'content_block_delta', index: blockIndex, delta: { type: 'thinking_delta', thinking: block.thinking } })); + events.push(sseEvent({ type: 'content_block_stop', index: blockIndex })); + blockIndex++; + } else if (block.type === 'text') { + events.push(sseEvent({ type: 'content_block_start', index: blockIndex, content_block: { type: 'text', text: '' } })); + events.push(sseEvent({ type: 'content_block_delta', index: blockIndex, delta: { type: 'text_delta', text: block.text } })); + events.push(sseEvent({ type: 'content_block_stop', index: blockIndex })); + blockIndex++; + } else if (block.type === 'tool_use') { + events.push(sseEvent({ + type: 'content_block_start', + index: blockIndex, + content_block: { type: 'tool_use', id: block.id, name: block.name, input: {} }, + })); + const inputJson = JSON.stringify(block.input); + events.push(sseEvent({ + type: 'content_block_delta', + index: blockIndex, + delta: { type: 'input_json_delta', partial_json: inputJson }, + })); + events.push(sseEvent({ type: 'content_block_stop', index: blockIndex })); + blockIndex++; + } + } + + events.push(sseEvent({ + type: 'message_delta', + delta: { stop_reason: message.stop_reason, stop_sequence: null }, + usage: { output_tokens: usage.output_tokens }, + })); + events.push(sseEvent({ type: 'message_stop' })); + + return events.join(''); +} + +export function ssePing() { + return 'event: ping\ndata: {"type":"ping"}\n\n'; +} + +function sseEvent(obj) { + return `event: ${obj.type}\ndata: ${JSON.stringify(obj)}\n\n`; +} + +export function toolsErrorIndicatesUnsupported(errBody) { + const s = typeof errBody === 'string' ? errBody : JSON.stringify(errBody); + return /tool/i.test(s) && (/unsupported|invalid|unknown|not allowed|not supported/i.test(s)); +} diff --git a/proxy/start-proxy.js b/proxy/start-proxy.js index 5847076..64c287b 100644 --- a/proxy/start-proxy.js +++ b/proxy/start-proxy.js @@ -1,48 +1,144 @@ #!/usr/bin/env node import { startModelProxy } from './model-proxy.js'; +const SFERENCE_MODEL = process.env.SFERENCE_MODEL || 'moonshotai/Kimi-K2.6'; +const SFERENCE_WINDOW = process.env.SFERENCE_COMPLETION_WINDOW || '1h'; +const SFERENCE_URL = process.env.SFERENCE_BASE_URL || 'https://api.sference.com'; + const BACKEND_DEFS = { - deepseek: { url: 'https://api.deepseek.com/anthropic', keyEnv: 'DEEPSEEK_API_KEY' }, - openrouter: { url: 'https://openrouter.ai/api/v1', keyEnv: 'OPENROUTER_API_KEY' }, - fireworks: { url: 'https://api.fireworks.ai/inference/v1', keyEnv: 'FIREWORKS_API_KEY' }, + deepseek: { + url: 'https://api.deepseek.com/anthropic', + keyEnv: 'DEEPSEEK_API_KEY', + type: 'anthropic-compatible', + }, + openrouter: { + url: 'https://openrouter.ai/api/v1', + keyEnv: 'OPENROUTER_API_KEY', + type: 'anthropic-compatible', + }, + fireworks: { + url: 'https://api.fireworks.ai/inference/v1', + keyEnv: 'FIREWORKS_API_KEY', + type: 'anthropic-compatible', + }, + sference: { + url: SFERENCE_URL, + keyEnv: 'SFERENCE_API_KEY', + type: 'responses-bg', + model: SFERENCE_MODEL, + window: SFERENCE_WINDOW, + authStyle: 'x-api-key', + }, }; -// Legacy mode: start-proxy.js (used by deepclaude.sh/ps1) -const targetUrl = process.argv[2] || process.env.CHEAPCLAUDE_TARGET_URL; -const apiKey = process.argv[3] || process.env.CHEAPCLAUDE_API_KEY; +if (process.env.BG_PROVIDER_URL && process.env.BG_PROVIDER_API_KEY && process.env.BG_PROVIDER_MODEL) { + BACKEND_DEFS.bg = { + url: process.env.BG_PROVIDER_URL, + keyEnv: 'BG_PROVIDER_API_KEY', + type: 'responses-bg', + model: process.env.BG_PROVIDER_MODEL, + window: process.env.BG_PROVIDER_WINDOW || '1h', + authStyle: process.env.BG_PROVIDER_AUTH_STYLE || 'x-api-key', + }; +} -if (targetUrl && apiKey) { - // Legacy single-backend mode +function buildBackendEntry(name, def, keyOverride) { + const key = keyOverride ?? process.env[def.keyEnv]; + if (!key && def.type === 'responses-bg') return null; + return { + url: def.url, + apiKey: key || null, + type: def.type || 'anthropic-compatible', + model: def.model, + window: def.window, + authStyle: def.authStyle, + }; +} + +function buildAllBackends() { const backends = {}; for (const [name, def] of Object.entries(BACKEND_DEFS)) { - const key = process.env[def.keyEnv]; - if (key) backends[name] = { url: def.url, apiKey: key }; + const entry = buildBackendEntry(name, def); + if (entry?.apiKey || def.type === 'anthropic-compatible') { + backends[name] = entry; + } + } + return backends; +} + +function parseArgs(argv) { + const args = { window: process.env.SFERENCE_COMPLETION_WINDOW || '1h' }; + for (let i = 0; i < argv.length; i++) { + if (argv[i] === '--window' && argv[i + 1]) { + args.window = argv[++i]; + } else if (argv[i] === '--mode' && argv[i + 1]) { + args.mode = argv[++i]; + } else if (argv[i] === '--port' && argv[i + 1]) { + args.port = parseInt(argv[++i], 10); + } else if (!args.targetUrl && argv[i].startsWith('http')) { + args.targetUrl = argv[i]; + } else if (!args.apiKey && args.targetUrl && !argv[i].startsWith('-')) { + args.apiKey = argv[i]; + } else if (!args.backendName && !argv[i].startsWith('-')) { + args.backendName = argv[i]; + } + } + return args; +} + +const cli = parseArgs(process.argv.slice(2)); +const targetUrl = cli.targetUrl || process.env.CHEAPCLAUDE_TARGET_URL; +const apiKey = cli.apiKey || process.env.CHEAPCLAUDE_API_KEY; +const backendName = cli.backendName || process.env.CHEAPCLAUDE_DEFAULT_MODE; + +if (targetUrl && apiKey) { + const backends = buildAllBackends(); + const mode = backendName || process.env.CHEAPCLAUDE_DEFAULT_BACKEND; + + let initialBackend = null; + if (mode && BACKEND_DEFS[mode]) { + const def = BACKEND_DEFS[mode]; + initialBackend = buildBackendEntry(mode, def, apiKey); + if (initialBackend && cli.window) initialBackend.window = cli.window; + if (initialBackend?.apiKey) backends[mode] = initialBackend; + } else if (backendName === 'responses-bg' || process.env.CHEAPCLAUDE_BACKEND_TYPE === 'responses-bg') { + initialBackend = { + url: targetUrl, + apiKey, + type: 'responses-bg', + model: process.env.SFERENCE_MODEL || process.env.BG_PROVIDER_MODEL, + window: cli.window, + authStyle: 'x-api-key', + }; } + const hasBackends = Object.keys(backends).length > 0; + const defaultMode = mode && backends[mode] ? mode : (initialBackend?.type === 'responses-bg' ? mode : undefined); const { port } = await startModelProxy({ targetUrl, apiKey, backends: hasBackends ? backends : undefined, - defaultMode: hasBackends ? undefined : undefined, + defaultMode, + initialBackend: initialBackend || undefined, }); console.log(port); } else { - // Standalone mode with live toggle - const backends = {}; + const backends = buildAllBackends(); for (const [name, def] of Object.entries(BACKEND_DEFS)) { - const key = process.env[def.keyEnv]; - backends[name] = { url: def.url, apiKey: key || null }; + if (!backends[name]) { + backends[name] = buildBackendEntry(name, def) || { url: def.url, apiKey: null, type: def.type }; + } } - const fallbackUrl = backends.deepseek?.url || 'https://api.deepseek.com/anthropic'; - const fallbackKey = backends.deepseek?.apiKey || 'unused'; + const defaultMode = cli.mode || 'anthropic'; + const port = cli.port || 3200; + + if (cli.window && backends.sference) backends.sference.window = cli.window; - const args = process.argv.slice(2); - const modeFlag = args.indexOf('--mode'); - const defaultMode = modeFlag >= 0 ? args[modeFlag + 1] : 'anthropic'; - const portFlag = args.indexOf('--port'); - const port = portFlag >= 0 ? parseInt(args[portFlag + 1], 10) : 3200; + const fallback = backends.sference?.apiKey ? backends.sference : backends.deepseek; + const fallbackUrl = fallback?.url || 'https://api.deepseek.com/anthropic'; + const fallbackKey = fallback?.apiKey || 'unused'; const proxy = await startModelProxy({ targetUrl: fallbackUrl, @@ -53,6 +149,7 @@ if (targetUrl && apiKey) { }); console.log(`Proxy on :${proxy.port} (mode: ${defaultMode})`); - console.log(`Switch: curl -sX POST http://127.0.0.1:${proxy.port}/_proxy/mode -d backend=deepseek`); + console.log(`Switch: curl -sX POST http://127.0.0.1:${proxy.port}/_proxy/mode -d backend=sference`); + console.log(`Window: curl -sX POST http://127.0.0.1:${proxy.port}/_proxy/window -d window=1h`); console.log(`Status: curl -s http://127.0.0.1:${proxy.port}/_proxy/status`); }