From afad91fa94e5ee211797a6471bb88f017adbcde9 Mon Sep 17 00:00:00 2001 From: Drew Stone Date: Sun, 14 Jun 2026 12:58:40 -0600 Subject: [PATCH 1/2] refactor(runtime): runAgentic's shot loop delegates to the canonical routerToolLoop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit runShot hand-rolled the off-box chat→tool_calls→execute loop that routerToolLoop already is — a 3rd copy of the same primitive. Generalize routerToolLoop additively so it can BE the one loop: optional initialMessages (depth continuation — seed with the carried conversation instead of [system,user]), maxTokens (the worker completion cap), and it returns the final messages (for depth carry + the analyst trajectory). runShot becomes a thin adapter: carried messages in, surface.call as execute (ERROR/throw → counted toolError, fed back not thrown), result mapped to ShotOut. External callers (appworld, humaneval-repair-gate) unaffected — the new opts are optional. Depth preserved: tests/loops/strategy-suite.test.ts 34/34 green. Minor: malformed tool-call JSON now feeds an error back to the model inside routerToolLoop (its existing behavior) rather than incrementing toolErrors; surface.call errors still counted. --- src/runtime/router-client.ts | 31 ++++++++++---- src/runtime/strategy.ts | 80 ++++++++++++------------------------ 2 files changed, 50 insertions(+), 61 deletions(-) diff --git a/src/runtime/router-client.ts b/src/runtime/router-client.ts index 08e6f05..0fe52d9 100644 --- a/src/runtime/router-client.ts +++ b/src/runtime/router-client.ts @@ -120,7 +120,7 @@ export async function routerChatWithTools( type: 'function' function: { name: string; description?: string; parameters: unknown } }>, - opts?: { temperature?: number; signal?: AbortSignal; toolChoice?: 'auto' | 'required' | 'none' }, + opts?: { temperature?: number; signal?: AbortSignal; toolChoice?: 'auto' | 'required' | 'none'; maxTokens?: number }, ): Promise { const res = await fetch(`${cfg.routerBaseUrl.replace(/\/$/, '')}/chat/completions`, { method: 'POST', @@ -131,6 +131,7 @@ export async function routerChatWithTools( tools, tool_choice: opts?.toolChoice ?? 'auto', temperature: opts?.temperature ?? 0.3, + ...(opts?.maxTokens ? { max_tokens: opts.maxTokens } : {}), }), ...(opts?.signal ? { signal: opts.signal } : {}), }) @@ -182,6 +183,9 @@ export interface RouterToolLoopResult { * steerer reads (behavior, never the verdict) to diagnose + redirect the next shot. */ toolTrace: Array<{ name: string; args: string; result: string }> usage: { input: number; output: number } + /** The full conversation after the loop (seed + every assistant/tool turn). Lets a caller + * CARRY the messages into the next shot (depth continuation) and read the trajectory. */ + messages: Array> } /** @@ -201,13 +205,23 @@ export async function routerToolLoop( user: string, tools: ReadonlyArray, execute: (name: string, args: Record) => Promise, - opts?: { maxTurns?: number; temperature?: number; signal?: AbortSignal }, + opts?: { + maxTurns?: number + temperature?: number + signal?: AbortSignal + maxTokens?: number + /** Seed the loop with an existing conversation (depth continuation) instead of + * `[system, user]`. When set, `system`/`user` are ignored. The array is copied. */ + initialMessages?: ReadonlyArray> + }, ): Promise { const maxTurns = opts?.maxTurns ?? 4 - const messages: Array> = [ - { role: 'system', content: system }, - { role: 'user', content: user }, - ] + const messages: Array> = opts?.initialMessages + ? [...opts.initialMessages] + : [ + { role: 'system', content: system }, + { role: 'user', content: user }, + ] let toolCalls = 0 let lastText = '' const usage = { input: 0, output: 0 } @@ -216,6 +230,7 @@ export async function routerToolLoop( for (let turn = 1; turn <= maxTurns; turn += 1) { const r = await routerChatWithTools(cfg, messages, tools, { ...(opts?.temperature !== undefined ? { temperature: opts.temperature } : {}), + ...(opts?.maxTokens ? { maxTokens: opts.maxTokens } : {}), ...(opts?.signal ? { signal: opts.signal } : {}), }) if (r.usage) { @@ -224,7 +239,7 @@ export async function routerToolLoop( } if (r.content) lastText = r.content if (r.toolCalls.length === 0) - return { final: lastText, turns: turn, toolCalls, toolTrace, usage } + return { final: lastText, turns: turn, toolCalls, toolTrace, usage, messages } // Record the assistant turn verbatim (content + the tool_calls it requested), then // run each call on the host and fold the result back as a `tool` message. @@ -257,5 +272,5 @@ export async function routerToolLoop( toolTrace.push({ name: tc.name, args: tc.arguments, result: out }) } } - return { final: lastText, turns: maxTurns, toolCalls, toolTrace, usage } + return { final: lastText, turns: maxTurns, toolCalls, toolTrace, usage, messages } } diff --git a/src/runtime/strategy.ts b/src/runtime/strategy.ts index efdf27d..681116c 100644 --- a/src/runtime/strategy.ts +++ b/src/runtime/strategy.ts @@ -27,6 +27,7 @@ import type { RuntimeHooks } from '../runtime-hooks' import { observe } from './observe' import type { Outcome } from './personify/types' import type { Corpus } from './personify/wave-types' +import { routerToolLoop } from './router-client' import { createSupervisor } from './supervise/supervisor' import type { Agent, @@ -149,62 +150,35 @@ async function runShot( opts: AgenticOptions, modelOverride?: string, ): Promise { - const innerTurns = opts.innerTurns ?? 4 - let completions = 0 - let toolCalls = 0 + // The canonical off-box tool loop (routerToolLoop) drives the turns; this shot supplies + // the carried conversation (depth continuation, via initialMessages) and the tool dispatch + // (surface.call). An ERROR:-prefixed result or a thrown call is a real tool outcome — + // counted as a toolError and fed back to the model, never thrown to kill the shot. let toolErrors = 0 - const tokens = { input: 0, output: 0 } - for (let t = 0; t < innerTurns; t += 1) { - const res = await fetch(`${opts.routerBaseUrl.replace(/\/$/, '')}/chat/completions`, { - method: 'POST', - headers: { 'content-type': 'application/json', authorization: `Bearer ${opts.routerKey}` }, - body: JSON.stringify({ - model: modelOverride ?? opts.model, - messages, - tools, - tool_choice: 'auto', - temperature: opts.temperature ?? 0.7, - ...(opts.maxTokens ? { max_tokens: opts.maxTokens } : {}), - }), - }) - if (!res.ok) throw new Error(`router ${res.status}: ${(await res.text()).slice(0, 200)}`) - completions += 1 - const data = (await res.json()) as { - choices?: Array<{ message?: { content?: string; tool_calls?: ToolCall[] } }> - usage?: { prompt_tokens?: number; completion_tokens?: number } - } - if (typeof data.usage?.prompt_tokens === 'number') tokens.input += data.usage.prompt_tokens - if (typeof data.usage?.completion_tokens === 'number') - tokens.output += data.usage.completion_tokens - const msg = data.choices?.[0]?.message - if (!msg) break - const calls = msg.tool_calls ?? [] - messages.push({ - role: 'assistant', - content: msg.content ?? '', - ...(calls.length ? { tool_calls: calls } : {}), - }) - if (calls.length === 0) break - for (const call of calls) { - toolCalls += 1 - let args: Record = {} - try { - args = JSON.parse(call.function.arguments || '{}') - } catch { - toolErrors += 1 - } - let out: string - try { - out = await surface.call(handle, call.function.name, args) - if (out.startsWith('ERROR:')) toolErrors += 1 - } catch (e) { - toolErrors += 1 - out = `ERROR: ${e instanceof Error ? e.message : String(e)}` - } - messages.push({ role: 'tool', tool_call_id: call.id, content: out }) + const execute = async (name: string, args: Record): Promise => { + try { + const out = await surface.call(handle, name, args) + if (out.startsWith('ERROR:')) toolErrors += 1 + return out + } catch (e) { + toolErrors += 1 + return `ERROR: ${e instanceof Error ? e.message : String(e)}` } } - return { messages, completions, toolCalls, toolErrors, tokens } + const r = await routerToolLoop( + { routerBaseUrl: opts.routerBaseUrl, routerKey: opts.routerKey, model: modelOverride ?? opts.model }, + '', + '', + tools, + execute, + { + maxTurns: opts.innerTurns ?? 4, + temperature: opts.temperature ?? 0.7, + initialMessages: messages, + ...(opts.maxTokens ? { maxTokens: opts.maxTokens } : {}), + }, + ) + return { messages: r.messages, completions: r.turns, toolCalls: r.toolCalls, toolErrors, tokens: r.usage } } /** The trace-analyst (selector≠judge): reads ONLY the trajectory + task, never the score. */ From 53ffa9a7c51a843df0804aa9bac5acba3a2dbf48 Mon Sep 17 00:00:00 2001 From: Drew Stone Date: Sun, 14 Jun 2026 13:00:46 -0600 Subject: [PATCH 2/2] style(runtime): biome format the A1 changes --- src/runtime/router-client.ts | 7 ++++++- src/runtime/strategy.ts | 14 ++++++++++++-- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/src/runtime/router-client.ts b/src/runtime/router-client.ts index 0fe52d9..16271d9 100644 --- a/src/runtime/router-client.ts +++ b/src/runtime/router-client.ts @@ -120,7 +120,12 @@ export async function routerChatWithTools( type: 'function' function: { name: string; description?: string; parameters: unknown } }>, - opts?: { temperature?: number; signal?: AbortSignal; toolChoice?: 'auto' | 'required' | 'none'; maxTokens?: number }, + opts?: { + temperature?: number + signal?: AbortSignal + toolChoice?: 'auto' | 'required' | 'none' + maxTokens?: number + }, ): Promise { const res = await fetch(`${cfg.routerBaseUrl.replace(/\/$/, '')}/chat/completions`, { method: 'POST', diff --git a/src/runtime/strategy.ts b/src/runtime/strategy.ts index 681116c..83d24ef 100644 --- a/src/runtime/strategy.ts +++ b/src/runtime/strategy.ts @@ -166,7 +166,11 @@ async function runShot( } } const r = await routerToolLoop( - { routerBaseUrl: opts.routerBaseUrl, routerKey: opts.routerKey, model: modelOverride ?? opts.model }, + { + routerBaseUrl: opts.routerBaseUrl, + routerKey: opts.routerKey, + model: modelOverride ?? opts.model, + }, '', '', tools, @@ -178,7 +182,13 @@ async function runShot( ...(opts.maxTokens ? { maxTokens: opts.maxTokens } : {}), }, ) - return { messages: r.messages, completions: r.turns, toolCalls: r.toolCalls, toolErrors, tokens: r.usage } + return { + messages: r.messages, + completions: r.turns, + toolCalls: r.toolCalls, + toolErrors, + tokens: r.usage, + } } /** The trace-analyst (selector≠judge): reads ONLY the trajectory + task, never the score. */