diff --git a/src/mcp/bin.ts b/src/mcp/bin.ts index ea482300..fd101133 100644 --- a/src/mcp/bin.ts +++ b/src/mcp/bin.ts @@ -27,6 +27,15 @@ * MCP_CODER_FANOUT_HARNESSES comma-separated harness ids to use for variants > 1 * MCP_DISABLE_CODER set to `1` to omit `delegate_code` * MCP_DISABLE_RESEARCHER set to `1` to omit `delegate_research` even when peer is present + * MCP_RESEARCHER_HARNESS researcher worker harness (default `opencode`) + * MCP_RESEARCHER_MODEL researcher worker model id (falls back to + * MCP_WORKER_MODEL, then WORKER_MODEL, then a default) + * MCP_RESEARCHER_FANOUT_HARNESSES comma-separated harnesses for researcher variants > 1 + * MCP_RESEARCHER_FANOUT_MODELS comma-separated per-harness models, index-aligned + * MCP_RESEARCHER_ROUTER_KEY OpenAI-compatible router key for the in-box agent + * (defaults to TANGLE_API_KEY) + * MCP_RESEARCHER_ROUTER_BASE_URL router base for the in-box agent (defaults to the + * repo's resolveRouterBaseUrl, normalized to `/v1`) * AGENT_RUNTIME_DELEGATION_STATE_FILE * optional — absolute path of a JSON state * file. When set, delegation records persist @@ -66,6 +75,7 @@ import { type ResearcherDelegate, settleDetachedCoderTurn, } from './delegates' +import { DEFAULT_SANDBOX_BASE_URL } from './delegation-profile' import { FileDelegationStore } from './delegation-store' import { composeLoopTraceEmitters } from './delegation-trace' import { @@ -78,6 +88,11 @@ import { runDetachedTurn, } from './detached-turn' import type { DelegationExecutor } from './executor' +import { + applyRouterEnv, + type ProvisionableSpec, + resolveResearcherProvisioning, +} from './researcher-provisioning' import { createMcpServer } from './server' import { type DelegationResumeDriver, DelegationTaskQueue } from './task-queue' import { @@ -352,11 +367,11 @@ async function loadSandboxClient(apiKey: string | undefined): Promise undefined) if (!mod) return undefined - type SingleFactory = (opts: { task: unknown }) => ResearcherProfilePreset - type FanoutFactory = (opts: { task: unknown }) => ResearcherFanoutPreset + type SingleFactory = (opts: { + task: unknown + harness?: string + model?: string + }) => ResearcherProfilePreset + type FanoutFactory = (opts: { + task: unknown + harnesses?: string[] + models?: (string | undefined)[] + }) => ResearcherFanoutPreset const fanoutFactory = (mod as { multiHarnessResearcherFanout?: FanoutFactory }) .multiHarnessResearcherFanout const singleFactory = (mod as { researcherProfile?: SingleFactory }).researcherProfile if (!fanoutFactory || !singleFactory) return undefined + // Worker harness + model + provider auth. Two reasons a researcher run otherwise makes + // zero LLM calls and "produces no winner" on a successful box: (1) the profile's default + // harness (opencode/zai-coding-plan/glm-5.1) is not broadly provisionable; (2) the + // sandbox SDK does not wire backend.model.apiKey into the in-box agent's OpenAI-compatible + // provider. resolveResearcherProvisioning picks a provisionable harness + model and the + // router creds (all env-overridable); applyRouterEnv injects them as box env. Applied to + // BOTH the single-variant path and every fanout agent-run so variants > 1 work too. + const { + harness, + model, + routerKey, + routerBaseUrl, + fanoutHarnesses: cfgFanoutHarnesses, + fanoutModels, + } = resolveResearcherProvisioning() + const buildPreset = (task: unknown): ResearcherProfilePreset => { + const preset = singleFactory({ task, harness, model }) + applyRouterEnv(preset.agentRunSpec as ProvisionableSpec, routerKey, routerBaseUrl) + return preset + } + const settleSingle = async ( turn: DetachedTurn, args: DelegateResearchArgs, @@ -405,7 +449,7 @@ async function loadResearcherSupport( signal: AbortSignal, ): Promise => { const task = buildResearchTask(args) - const preset = singleFactory({ task }) + const preset = buildPreset(task) if (!preset.validator) { throw new Error('agent-runtime-mcp: researcher preset exposes no validator; cannot settle') } @@ -423,7 +467,7 @@ async function loadResearcherSupport( const loopEmitter = composeLoopTraceEmitters(traceEmitter, ctx.traceEmitter) ctx.report({ iteration: 0, phase: 'starting' }) if (variants <= 1) { - const preset = singleFactory({ task }) + const preset = buildPreset(task) // Detached dispatch — same contract as the coder delegate: one session // on one box, driveTurn ticks, resume key bound to the sandbox id. if (ctx.detachedSessionRef !== undefined && ctx.updateDetachedSessionRef) { @@ -472,10 +516,26 @@ async function loadResearcherSupport( ctx.report({ iteration: 1, phase: 'completed' }) return output as ResearchOutputShape } - const fanout = fanoutFactory({ task }) + // Match the single-variant fix: use a provisionable harness/model and inject router + // creds into every fanout agent-run, else variants > 1 makes zero LLM calls. Default to + // `variants` copies of the working harness; MCP_RESEARCHER_FANOUT_HARNESSES overrides for + // diversity (with optional per-harness MCP_RESEARCHER_FANOUT_MODELS). + const fanoutHarnesses = cfgFanoutHarnesses ?? Array.from({ length: variants }, () => harness) + const fanout = fanoutFactory({ + task, + harnesses: fanoutHarnesses, + models: fanoutHarnesses.map((_, i) => fanoutModels?.[i] ?? model), + }) + for (const spec of fanout.agentRuns) { + applyRouterEnv(spec as ProvisionableSpec, routerKey, routerBaseUrl) + } + // The harness list may be shorter than `variants` (misconfig) — never claim more + // iterations than there are runs. + const runs = fanout.agentRuns.slice(0, variants) + const effectiveVariants = Math.max(1, runs.length) const result = await runLoop({ driver: fanout.driver, - agentRuns: fanout.agentRuns.slice(0, variants), + agentRuns: runs, output: fanout.output, validator: fanout.validator, task, @@ -484,8 +544,8 @@ async function loadResearcherSupport( signal: ctx.signal, ...(loopEmitter ? { traceEmitter: loopEmitter } : {}), }, - maxIterations: variants, - maxConcurrency: Math.min(maxConcurrency, variants), + maxIterations: effectiveVariants, + maxConcurrency: Math.min(maxConcurrency, effectiveVariants), }) const output = result.winner?.output if (!output) throw new Error('researcher delegate fanout produced no winner') @@ -498,7 +558,8 @@ async function loadResearcherSupport( resume: { message(args) { const task = buildResearchTask(args) - const spec = singleFactory({ task }).agentRunSpec as AgentRunSpec + // Use the same preset construction as dispatch so the displayed prompt can't drift. + const spec = buildPreset(task).agentRunSpec as AgentRunSpec return spec.taskToPrompt(task) }, async settle(turn, args, signal) { diff --git a/src/mcp/delegation-profile.ts b/src/mcp/delegation-profile.ts index cfe23635..204b69e7 100644 --- a/src/mcp/delegation-profile.ts +++ b/src/mcp/delegation-profile.ts @@ -45,7 +45,7 @@ const OTEL_FORWARD_KEYS = [ 'PARENT_SPAN_ID', ] as const -const DEFAULT_SANDBOX_BASE_URL = 'https://sandbox.tangle.tools' +export const DEFAULT_SANDBOX_BASE_URL = 'https://sandbox.tangle.tools' export interface BuildDelegationMcpServerOptions { /** Sandbox API key forwarded as `TANGLE_API_KEY` to the MCP child. The diff --git a/src/mcp/researcher-provisioning.ts b/src/mcp/researcher-provisioning.ts new file mode 100644 index 00000000..6ccb02b5 --- /dev/null +++ b/src/mcp/researcher-provisioning.ts @@ -0,0 +1,100 @@ +/** + * Researcher delegate provisioning — resolves the worker harness, model, and router + * credentials for `delegate_research`, and injects the OpenAI-compatible router creds + * into a sandbox agent-run spec. + * + * Why this exists: the agent-knowledge researcher profile defaults to a harness + * (`opencode/zai-coding-plan/glm-5.1`) that isn't broadly provisionable, and the sandbox + * SDK does not wire `backend.model.apiKey` into the in-box agent's OpenAI-compatible + * provider. So the MCP server picks a provisionable harness + model and passes the router + * creds as box env. Everything is env-overridable and reuses the repo's router resolution. + */ +import { type RouterEnv, resolveRouterBaseUrl } from '../model-resolution.js' + +export interface ResearcherProvisioning { + harness: string + /** Worker model id (router-served). */ + model: string + /** OpenAI-compatible router key for the in-box provider; undefined disables injection. */ + routerKey?: string + /** OpenAI-compatible router base, always ending in a `/vN` segment. */ + routerBaseUrl: string + /** Explicit fanout harness list (MCP_RESEARCHER_FANOUT_HARNESSES); undefined ⇒ caller defaults. */ + fanoutHarnesses?: string[] + /** Per-harness fanout model overrides (MCP_RESEARCHER_FANOUT_MODELS), index-aligned. */ + fanoutModels?: string[] +} + +/** A sandbox agent-run spec whose box env can be overridden. */ +export interface ProvisionableSpec { + sandboxOverrides?: { env?: Record } & Record +} + +const DEFAULT_HARNESS = 'opencode' +const DEFAULT_MODEL = 'moonshotai/kimi-k2.6' + +function trimmed(value: string | undefined): string | undefined { + const t = value?.trim() + return t ? t : undefined +} + +function csv(value: string | undefined): string[] | undefined { + const list = value + ?.split(',') + .map((v) => v.trim()) + .filter(Boolean) + return list && list.length > 0 ? list : undefined +} + +/** + * Resolve harness/model/router from env. Model falls back through the repo's + * `WORKER_MODEL` convention; router base reuses `resolveRouterBaseUrl` (TANGLE_ROUTER_URL + * / TANGLE_ROUTER_BASE_URL) and is normalized to an OpenAI-compatible `/v1` endpoint. + */ +export function resolveResearcherProvisioning( + env: NodeJS.ProcessEnv = process.env, +): ResearcherProvisioning { + const harness = trimmed(env.MCP_RESEARCHER_HARNESS) ?? DEFAULT_HARNESS + const model = + trimmed(env.MCP_RESEARCHER_MODEL) ?? + trimmed(env.MCP_WORKER_MODEL) ?? + trimmed(env.WORKER_MODEL) ?? + DEFAULT_MODEL + const routerKey = trimmed(env.MCP_RESEARCHER_ROUTER_KEY) ?? trimmed(env.TANGLE_API_KEY) + const base = trimmed(env.MCP_RESEARCHER_ROUTER_BASE_URL) ?? resolveRouterBaseUrl(env as RouterEnv) + const routerBaseUrl = /\/v\d+\/?$/.test(base) + ? base.replace(/\/$/, '') + : `${base.replace(/\/$/, '')}/v1` + const fanoutHarnesses = csv(env.MCP_RESEARCHER_FANOUT_HARNESSES) + const fanoutModels = csv(env.MCP_RESEARCHER_FANOUT_MODELS) + return { + harness, + model, + ...(routerKey ? { routerKey } : {}), + routerBaseUrl, + ...(fanoutHarnesses ? { fanoutHarnesses } : {}), + ...(fanoutModels ? { fanoutModels } : {}), + } +} + +/** + * Overlay the router creds onto a spec's box env (in place): preserve every env var the + * preset already supplied and set OPENAI_API_KEY / OPENAI_BASE_URL on top (these two are + * intentionally authoritative — they point the in-box provider at the router). No-op when + * there is no router key. + */ +export function applyRouterEnv( + spec: ProvisionableSpec, + routerKey: string | undefined, + routerBaseUrl: string, +): void { + if (!routerKey) return + spec.sandboxOverrides = { + ...(spec.sandboxOverrides ?? {}), + env: { + ...(spec.sandboxOverrides?.env ?? {}), + OPENAI_API_KEY: routerKey, + OPENAI_BASE_URL: routerBaseUrl, + }, + } +} diff --git a/tests/mcp/researcher-provisioning.test.ts b/tests/mcp/researcher-provisioning.test.ts new file mode 100644 index 00000000..bc6a1932 --- /dev/null +++ b/tests/mcp/researcher-provisioning.test.ts @@ -0,0 +1,172 @@ +import { describe, expect, it } from 'vitest' +import { + applyRouterEnv, + type ProvisionableSpec, + resolveResearcherProvisioning, +} from '../../src/mcp/researcher-provisioning' + +describe('resolveResearcherProvisioning', () => { + it('defaults to a provisionable harness/model and a /v1 router base', () => { + const p = resolveResearcherProvisioning({} as NodeJS.ProcessEnv) + expect(p.harness).toBe('opencode') + expect(p.model).toBe('moonshotai/kimi-k2.6') + expect(p.routerBaseUrl).toBe('https://router.tangle.tools/v1') + expect(p.routerKey).toBeUndefined() + }) + + it('honors explicit env overrides', () => { + const p = resolveResearcherProvisioning({ + MCP_RESEARCHER_HARNESS: 'codex', + MCP_RESEARCHER_MODEL: 'deepseek-chat', + MCP_RESEARCHER_ROUTER_KEY: 'rk_123', + MCP_RESEARCHER_ROUTER_BASE_URL: 'https://example.com/v2', + } as unknown as NodeJS.ProcessEnv) + expect(p).toMatchObject({ + harness: 'codex', + model: 'deepseek-chat', + routerKey: 'rk_123', + routerBaseUrl: 'https://example.com/v2', + }) + }) + + it('falls back through MCP_WORKER_MODEL then WORKER_MODEL', () => { + expect( + resolveResearcherProvisioning({ WORKER_MODEL: 'wm' } as unknown as NodeJS.ProcessEnv).model, + ).toBe('wm') + expect( + resolveResearcherProvisioning({ + MCP_WORKER_MODEL: 'mwm', + WORKER_MODEL: 'wm', + } as unknown as NodeJS.ProcessEnv).model, + ).toBe('mwm') + }) + + it('uses TANGLE_API_KEY as the router key, overridable by MCP_RESEARCHER_ROUTER_KEY', () => { + expect( + resolveResearcherProvisioning({ TANGLE_API_KEY: 'tk' } as unknown as NodeJS.ProcessEnv) + .routerKey, + ).toBe('tk') + expect( + resolveResearcherProvisioning({ + TANGLE_API_KEY: 'tk', + MCP_RESEARCHER_ROUTER_KEY: 'override', + } as unknown as NodeJS.ProcessEnv).routerKey, + ).toBe('override') + }) + + it('reuses the repo router base (TANGLE_ROUTER_*) and normalizes to /v1', () => { + expect( + resolveResearcherProvisioning({ + TANGLE_ROUTER_BASE_URL: 'https://r.example.com', + } as unknown as NodeJS.ProcessEnv).routerBaseUrl, + ).toBe('https://r.example.com/v1') + }) + + it('prefers TANGLE_ROUTER_URL over TANGLE_ROUTER_BASE_URL', () => { + expect( + resolveResearcherProvisioning({ + TANGLE_ROUTER_URL: 'https://primary.example.com', + TANGLE_ROUTER_BASE_URL: 'https://secondary.example.com', + } as unknown as NodeJS.ProcessEnv).routerBaseUrl, + ).toBe('https://primary.example.com/v1') + }) + + it('leaves an already-versioned MCP_RESEARCHER_ROUTER_BASE_URL intact (no double /v1)', () => { + // Exercises the researcher regex directly: this env bypasses resolveRouterBaseUrl. + expect( + resolveResearcherProvisioning({ + MCP_RESEARCHER_ROUTER_BASE_URL: 'https://r.example.com/v1/', + } as unknown as NodeJS.ProcessEnv).routerBaseUrl, + ).toBe('https://r.example.com/v1') + }) + + it('parses fanout harnesses + per-harness models from csv env', () => { + const p = resolveResearcherProvisioning({ + MCP_RESEARCHER_FANOUT_HARNESSES: 'opencode, codex', + MCP_RESEARCHER_FANOUT_MODELS: 'kimi, deepseek', + } as unknown as NodeJS.ProcessEnv) + expect(p.fanoutHarnesses).toEqual(['opencode', 'codex']) + expect(p.fanoutModels).toEqual(['kimi', 'deepseek']) + }) + + it('parses fanout models even without fanout harnesses set', () => { + const p = resolveResearcherProvisioning({ + MCP_RESEARCHER_FANOUT_MODELS: 'kimi, deepseek', + } as unknown as NodeJS.ProcessEnv) + expect(p.fanoutHarnesses).toBeUndefined() + expect(p.fanoutModels).toEqual(['kimi', 'deepseek']) + }) + + it('appends /v1 to a non-versioned MCP_RESEARCHER_ROUTER_BASE_URL', () => { + expect( + resolveResearcherProvisioning({ + MCP_RESEARCHER_ROUTER_BASE_URL: 'https://r.example.com', + } as unknown as NodeJS.ProcessEnv).routerBaseUrl, + ).toBe('https://r.example.com/v1') + }) + + it('treats empty/whitespace env values as unset', () => { + const p = resolveResearcherProvisioning({ + MCP_RESEARCHER_HARNESS: ' ', + TANGLE_API_KEY: '', + MCP_RESEARCHER_FANOUT_HARNESSES: ' , ', + } as unknown as NodeJS.ProcessEnv) + expect(p.harness).toBe('opencode') + expect(p.routerKey).toBeUndefined() + expect(p.fanoutHarnesses).toBeUndefined() + }) +}) + +describe('applyRouterEnv', () => { + it('merges router creds into existing box env instead of replacing it', () => { + const spec: ProvisionableSpec = { sandboxOverrides: { env: { KEEP_ME: '1' }, name: 'box' } } + applyRouterEnv(spec, 'rk', 'https://router/v1') + expect(spec.sandboxOverrides).toEqual({ + name: 'box', + env: { KEEP_ME: '1', OPENAI_API_KEY: 'rk', OPENAI_BASE_URL: 'https://router/v1' }, + }) + }) + + it('initializes sandboxOverrides when absent', () => { + const spec: ProvisionableSpec = {} + applyRouterEnv(spec, 'rk', 'https://router/v1') + expect(spec.sandboxOverrides?.env).toEqual({ + OPENAI_API_KEY: 'rk', + OPENAI_BASE_URL: 'https://router/v1', + }) + }) + + it('is a no-op without a router key (undefined or empty string)', () => { + for (const key of [undefined, '']) { + const spec: ProvisionableSpec = { sandboxOverrides: { env: { KEEP_ME: '1' } } } + applyRouterEnv(spec, key, 'https://router/v1') + expect(spec.sandboxOverrides?.env).toEqual({ KEEP_ME: '1' }) + } + }) +}) + +describe('resolve → apply on a profile-shaped spec', () => { + // The two public functions bin.ts buildPreset composes (resolveResearcherProvisioning + + // applyRouterEnv). buildPreset additionally calls singleFactory(harness, model) — that + // peer call is not exercised here; this asserts the env-overlay half of the composition. + it('overlays resolved router creds onto a profile-shaped spec, preserving preset env', () => { + const provisioning = resolveResearcherProvisioning({ + TANGLE_API_KEY: 'tk', + MCP_RESEARCHER_MODEL: 'moonshotai/kimi-k2.6', + } as unknown as NodeJS.ProcessEnv) + // A spec shaped like researcherProfile().agentRunSpec — profile carries harness/model; + // a preset may already ship box env the in-box agent needs. + const spec: ProvisionableSpec = { + sandboxOverrides: { env: { NS_TOKEN: 'abc' }, name: 'researcher' }, + } + applyRouterEnv(spec, provisioning.routerKey, provisioning.routerBaseUrl) + expect(spec.sandboxOverrides).toEqual({ + name: 'researcher', + env: { + NS_TOKEN: 'abc', + OPENAI_API_KEY: 'tk', + OPENAI_BASE_URL: 'https://router.tangle.tools/v1', + }, + }) + }) +})