From afad91fa94e5ee211797a6471bb88f017adbcde9 Mon Sep 17 00:00:00 2001
From: Drew Stone <drewstone329@gmail.com>
Date: Sun, 14 Jun 2026 12:58:40 -0600
Subject: [PATCH 1/2] refactor(runtime): runAgentic's shot loop delegates to
 the canonical routerToolLoop
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

runShot hand-rolled the off-box chat→tool_calls→execute loop that routerToolLoop already
is — a 3rd copy of the same primitive. Generalize routerToolLoop additively so it can BE the
one loop: optional initialMessages (depth continuation — seed with the carried conversation
instead of [system,user]), maxTokens (the worker completion cap), and it returns the final
messages (for depth carry + the analyst trajectory). runShot becomes a thin adapter: carried
messages in, surface.call as execute (ERROR/throw → counted toolError, fed back not thrown),
result mapped to ShotOut. External callers (appworld, humaneval-repair-gate) unaffected — the
new opts are optional. Depth preserved: tests/loops/strategy-suite.test.ts 34/34 green.

Minor: malformed tool-call JSON now feeds an error back to the model inside routerToolLoop
(its existing behavior) rather than incrementing toolErrors; surface.call errors still counted.
---
 src/runtime/router-client.ts | 31 ++++++++++----
 src/runtime/strategy.ts      | 80 ++++++++++++------------------------
 2 files changed, 50 insertions(+), 61 deletions(-)
diff --git a/src/runtime/router-client.ts b/src/runtime/router-client.ts
index 08e6f05..0fe52d9 100644
--- a/src/runtime/router-client.ts
+++ b/src/runtime/router-client.ts
@@ -120,7 +120,7 @@ export async function routerChatWithTools(
     type: 'function'
     function: { name: string; description?: string; parameters: unknown }
   }>,
-  opts?: { temperature?: number; signal?: AbortSignal; toolChoice?: 'auto' | 'required' | 'none' },
+  opts?: { temperature?: number; signal?: AbortSignal; toolChoice?: 'auto' | 'required' | 'none'; maxTokens?: number },
 ): Promise<RouterChatToolsResult> {
   const res = await fetch(`${cfg.routerBaseUrl.replace(/\/$/, '')}/chat/completions`, {
     method: 'POST',
@@ -131,6 +131,7 @@ export async function routerChatWithTools(
       tools,
       tool_choice: opts?.toolChoice ?? 'auto',
       temperature: opts?.temperature ?? 0.3,
+      ...(opts?.maxTokens ? { max_tokens: opts.maxTokens } : {}),
     }),
     ...(opts?.signal ? { signal: opts.signal } : {}),
   })
@@ -182,6 +183,9 @@ export interface RouterToolLoopResult {
    *  steerer reads (behavior, never the verdict) to diagnose + redirect the next shot. */
   toolTrace: Array<{ name: string; args: string; result: string }>
   usage: { input: number; output: number }
+  /** The full conversation after the loop (seed + every assistant/tool turn). Lets a caller
+   *  CARRY the messages into the next shot (depth continuation) and read the trajectory. */
+  messages: Array<Record<string, unknown>>
 }
 
 /**
@@ -201,13 +205,23 @@ export async function routerToolLoop(
   user: string,
   tools: ReadonlyArray<ToolSpec>,
   execute: (name: string, args: Record<string, unknown>) => Promise<string>,
-  opts?: { maxTurns?: number; temperature?: number; signal?: AbortSignal },
+  opts?: {
+    maxTurns?: number
+    temperature?: number
+    signal?: AbortSignal
+    maxTokens?: number
+    /** Seed the loop with an existing conversation (depth continuation) instead of
+     *  `[system, user]`. When set, `system`/`user` are ignored. The array is copied. */
+    initialMessages?: ReadonlyArray<Record<string, unknown>>
+  },
 ): Promise<RouterToolLoopResult> {
   const maxTurns = opts?.maxTurns ?? 4
-  const messages: Array<Record<string, unknown>> = [
-    { role: 'system', content: system },
-    { role: 'user', content: user },
-  ]
+  const messages: Array<Record<string, unknown>> = opts?.initialMessages
+    ? [...opts.initialMessages]
+    : [
+        { role: 'system', content: system },
+        { role: 'user', content: user },
+      ]
   let toolCalls = 0
   let lastText = ''
   const usage = { input: 0, output: 0 }
@@ -216,6 +230,7 @@ export async function routerToolLoop(
   for (let turn = 1; turn <= maxTurns; turn += 1) {
     const r = await routerChatWithTools(cfg, messages, tools, {
       ...(opts?.temperature !== undefined ? { temperature: opts.temperature } : {}),
+      ...(opts?.maxTokens ? { maxTokens: opts.maxTokens } : {}),
       ...(opts?.signal ? { signal: opts.signal } : {}),
     })
     if (r.usage) {
@@ -224,7 +239,7 @@ export async function routerToolLoop(
     }
     if (r.content) lastText = r.content
     if (r.toolCalls.length === 0)
-      return { final: lastText, turns: turn, toolCalls, toolTrace, usage }
+      return { final: lastText, turns: turn, toolCalls, toolTrace, usage, messages }
 
     // Record the assistant turn verbatim (content + the tool_calls it requested), then
     // run each call on the host and fold the result back as a `tool` message.
@@ -257,5 +272,5 @@ export async function routerToolLoop(
       toolTrace.push({ name: tc.name, args: tc.arguments, result: out })
     }
   }
-  return { final: lastText, turns: maxTurns, toolCalls, toolTrace, usage }
+  return { final: lastText, turns: maxTurns, toolCalls, toolTrace, usage, messages }
 }
diff --git a/src/runtime/strategy.ts b/src/runtime/strategy.ts
index efdf27d..681116c 100644
--- a/src/runtime/strategy.ts
+++ b/src/runtime/strategy.ts
@@ -27,6 +27,7 @@ import type { RuntimeHooks } from '../runtime-hooks'
 import { observe } from './observe'
 import type { Outcome } from './personify/types'
 import type { Corpus } from './personify/wave-types'
+import { routerToolLoop } from './router-client'
 import { createSupervisor } from './supervise/supervisor'
 import type {
   Agent,
@@ -149,62 +150,35 @@ async function runShot(
   opts: AgenticOptions,
   modelOverride?: string,
 ): Promise<ShotOut> {
-  const innerTurns = opts.innerTurns ?? 4
-  let completions = 0
-  let toolCalls = 0
+  // The canonical off-box tool loop (routerToolLoop) drives the turns; this shot supplies
+  // the carried conversation (depth continuation, via initialMessages) and the tool dispatch
+  // (surface.call). An ERROR:-prefixed result or a thrown call is a real tool outcome —
+  // counted as a toolError and fed back to the model, never thrown to kill the shot.
   let toolErrors = 0
-  const tokens = { input: 0, output: 0 }
-  for (let t = 0; t < innerTurns; t += 1) {
-    const res = await fetch(`${opts.routerBaseUrl.replace(/\/$/, '')}/chat/completions`, {
-      method: 'POST',
-      headers: { 'content-type': 'application/json', authorization: `Bearer ${opts.routerKey}` },
-      body: JSON.stringify({
-        model: modelOverride ?? opts.model,
-        messages,
-        tools,
-        tool_choice: 'auto',
-        temperature: opts.temperature ?? 0.7,
-        ...(opts.maxTokens ? { max_tokens: opts.maxTokens } : {}),
-      }),
-    })
-    if (!res.ok) throw new Error(`router ${res.status}: ${(await res.text()).slice(0, 200)}`)
-    completions += 1
-    const data = (await res.json()) as {
-      choices?: Array<{ message?: { content?: string; tool_calls?: ToolCall[] } }>
-      usage?: { prompt_tokens?: number; completion_tokens?: number }
-    }
-    if (typeof data.usage?.prompt_tokens === 'number') tokens.input += data.usage.prompt_tokens
-    if (typeof data.usage?.completion_tokens === 'number')
-      tokens.output += data.usage.completion_tokens
-    const msg = data.choices?.[0]?.message
-    if (!msg) break
-    const calls = msg.tool_calls ?? []
-    messages.push({
-      role: 'assistant',
-      content: msg.content ?? '',
-      ...(calls.length ? { tool_calls: calls } : {}),
-    })
-    if (calls.length === 0) break
-    for (const call of calls) {
-      toolCalls += 1
-      let args: Record<string, unknown> = {}
-      try {
-        args = JSON.parse(call.function.arguments || '{}')
-      } catch {
-        toolErrors += 1
-      }
-      let out: string
-      try {
-        out = await surface.call(handle, call.function.name, args)
-        if (out.startsWith('ERROR:')) toolErrors += 1
-      } catch (e) {
-        toolErrors += 1
-        out = `ERROR: ${e instanceof Error ? e.message : String(e)}`
-      }
-      messages.push({ role: 'tool', tool_call_id: call.id, content: out })
+  const execute = async (name: string, args: Record<string, unknown>): Promise<string> => {
+    try {
+      const out = await surface.call(handle, name, args)
+      if (out.startsWith('ERROR:')) toolErrors += 1
+      return out
+    } catch (e) {
+      toolErrors += 1
+      return `ERROR: ${e instanceof Error ? e.message : String(e)}`
     }
   }
-  return { messages, completions, toolCalls, toolErrors, tokens }
+  const r = await routerToolLoop(
+    { routerBaseUrl: opts.routerBaseUrl, routerKey: opts.routerKey, model: modelOverride ?? opts.model },
+    '',
+    '',
+    tools,
+    execute,
+    {
+      maxTurns: opts.innerTurns ?? 4,
+      temperature: opts.temperature ?? 0.7,
+      initialMessages: messages,
+      ...(opts.maxTokens ? { maxTokens: opts.maxTokens } : {}),
+    },
+  )
+  return { messages: r.messages, completions: r.turns, toolCalls: r.toolCalls, toolErrors, tokens: r.usage }
 }
 
 /** The trace-analyst (selector≠judge): reads ONLY the trajectory + task, never the score. */

From 53ffa9a7c51a843df0804aa9bac5acba3a2dbf48 Mon Sep 17 00:00:00 2001
From: Drew Stone <drewstone329@gmail.com>
Date: Sun, 14 Jun 2026 13:00:46 -0600
Subject: [PATCH 2/2] style(runtime): biome format the A1 changes

---
 src/runtime/router-client.ts |  7 ++++++-
 src/runtime/strategy.ts      | 14 ++++++++++++--
 2 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/src/runtime/router-client.ts b/src/runtime/router-client.ts
index 0fe52d9..16271d9 100644
--- a/src/runtime/router-client.ts
+++ b/src/runtime/router-client.ts
@@ -120,7 +120,12 @@ export async function routerChatWithTools(
     type: 'function'
     function: { name: string; description?: string; parameters: unknown }
   }>,
-  opts?: { temperature?: number; signal?: AbortSignal; toolChoice?: 'auto' | 'required' | 'none'; maxTokens?: number },
+  opts?: {
+    temperature?: number
+    signal?: AbortSignal
+    toolChoice?: 'auto' | 'required' | 'none'
+    maxTokens?: number
+  },
 ): Promise<RouterChatToolsResult> {
   const res = await fetch(`${cfg.routerBaseUrl.replace(/\/$/, '')}/chat/completions`, {
     method: 'POST',
diff --git a/src/runtime/strategy.ts b/src/runtime/strategy.ts
index 681116c..83d24ef 100644
--- a/src/runtime/strategy.ts
+++ b/src/runtime/strategy.ts
@@ -166,7 +166,11 @@ async function runShot(
     }
   }
   const r = await routerToolLoop(
-    { routerBaseUrl: opts.routerBaseUrl, routerKey: opts.routerKey, model: modelOverride ?? opts.model },
+    {
+      routerBaseUrl: opts.routerBaseUrl,
+      routerKey: opts.routerKey,
+      model: modelOverride ?? opts.model,
+    },
     '',
     '',
     tools,
@@ -178,7 +182,13 @@ async function runShot(
       ...(opts.maxTokens ? { maxTokens: opts.maxTokens } : {}),
     },
   )
-  return { messages: r.messages, completions: r.turns, toolCalls: r.toolCalls, toolErrors, tokens: r.usage }
+  return {
+    messages: r.messages,
+    completions: r.turns,
+    toolCalls: r.toolCalls,
+    toolErrors,
+    tokens: r.usage,
+  }
 }
 
 /** The trace-analyst (selector≠judge): reads ONLY the trajectory + task, never the score. */