diff --git a/packages/core/execution/src/description.test.ts b/packages/core/execution/src/description.test.ts index fd3d5334a..435e48d88 100644 --- a/packages/core/execution/src/description.test.ts +++ b/packages/core/execution/src/description.test.ts @@ -96,12 +96,16 @@ describe("buildExecuteDescription", () => { // Stable anchor from the workflow preamble. expect(description).toContain("Execute TypeScript in a sandboxed runtime"); expect(description).toContain("Use `emit(value)` to append user-visible output"); - expect(description).toContain("Emit any attachment with `emit(result.data)`"); - expect(description).toContain("pass an MCP content block to `emit(...)`"); + expect(description).toContain("Emit attachments with `emit(result.data)`"); + expect(description).toContain("also accepts MCP content blocks"); expect(description).toContain("`emit(ToolFile)` is MIME-based"); expect(description).toContain( - "Returning a `ToolFile`, a `ToolResult`, an MCP content block, or a bare base64 string does not emit content", + "Returning a `ToolFile`, `ToolResult`, MCP content block, or bare base64 string does not emit content", ); + expect(description).toContain( + "use the separate `execute_cell` and `wait_cell` tools instead of `execute`", + ); + expect(description).toContain("Do not use `resume` for execution cells"); expect(description).toContain("## Available connection prefixes"); expect(description).toContain("- `github.org.prod`"); expect(description).toContain("- `github.user.personal`"); diff --git a/packages/core/execution/src/description.ts b/packages/core/execution/src/description.ts index 37232a446..1571499d9 100644 --- a/packages/core/execution/src/description.ts +++ b/packages/core/execution/src/description.ts @@ -104,32 +104,41 @@ const formatDescription = (connectionEntries: readonly ConnectionInventoryEntry[ '1. `const { items: matches } = await tools.search({ query: "", limit: 12 });`', '2. `const path = matches[0]?.path; if (!path) return "No matching tools found.";`', "3. `const details = await tools.describe.tool({ path });`", - "4. Use `details.inputTypeScript` / `details.outputTypeScript` and `details.typeScriptDefinitions` for compact shapes.", - "5. Use `tools.executor.coreTools.connections.list({})` when you need live saved-connection inventory.", - "6. Call the tool: `const result = await tools.(input);`", + "4. Use `details.inputTypeScript`, `details.outputTypeScript`, and `details.typeScriptDefinitions` for compact shapes.", + "5. Call the exact discovered path: `const result = await tools[path](input);`", + "6. Branch on `result.ok`; `emit(...)` user-visible output, or `return` structured data you need to read.", "", - "## Rules", + "## Tool Discovery", "", "- `tools.search()` returns paginated, ranked matches: `{ items, total, hasMore, nextOffset }`. Best-first. Use short intent phrases like `github issues`, `repo details`, or `create calendar event`.", '- When you already know the namespace, narrow with `tools.search({ namespace: "github", query: "issues" })`.', + "- If `tools.search()` returns `hasMore: true` and you didn't find what you need, fetch the next page: `tools.search({ query, offset: nextOffset, limit })`.", + "- `tools.describe.tool()` returns compact TypeScript shapes. If the path does not resolve, use its `error.suggestions` instead of retrying the same path.", + "- Always call the full address returned as `path`: `tools....(args)`. Do not guess path segments.", + "- The `tools` object is lazy. `Object.keys(tools)` returns bounded built-ins (`search`, `describe`, `executor`), not the full API catalog.", "- `tools.executor.coreTools.connections.list({})` returns saved connections with `{ address, integration, owner, name, ... }`. The `address` field includes the leading `tools.` root.", + '- Pass an object to system tools, e.g. `tools.search({ query: "..." })`, `tools.describe.tool({ path })`, and `tools.executor.coreTools.connections.list({})`.', + "", + "## Tool Results", + "", "- Tool calls return a value union: `{ ok: true, data }` for success or `{ ok: false, error: { code, message, status?, details?, retryable? } }` for expected tool/domain failures. Branch on `result.ok`.", "- `data` is the upstream payload itself. HTTP-backed tools (OpenAPI) also set `http: { status, headers }` beside `data` — read `result.http?.headers` for pagination (Link) or rate-limit headers.", - "- Use `emit(value)` to append user-visible output and return `undefined`. Plain values become MCP text content. MCP content blocks are forwarded as-is. `ToolFile` values are rendered by MIME. Emitted output goes to the user, not back to you; the result envelope reports an `emitted` count so you can confirm it landed, but to read a value yourself, `return` it.", - "- Prefer explicit output helpers when the content kind is known: `text(value)`, `image(value, detail?)`, `audio(block)`, `file(toolFile)`, `resource(block)`, and `notify(value)`. `image` accepts a base64 data URI string, `{ image_url, detail }`, or a raw MCP image block; `detail` may be `auto`, `low`, `high`, or `original`.", - '- File-returning tools may return `ToolFile` values: `{ _tag: "ToolFile", name?, mimeType, encoding: "base64", data, byteLength }`. Emit any attachment with `emit(result.data)`.', - '- To emit MCP-native content directly, pass an MCP content block to `emit(...)`, such as `{ type: "image", data, mimeType }`, `{ type: "audio", data, mimeType }`, `{ type: "text", text }`, `{ type: "resource", resource }`, or `{ type: "resource_link", uri, name, ... }`.', - "- `emit(ToolFile)` is MIME-based: `image/*` becomes MCP image content, `audio/*` becomes MCP audio content, text-like files become decoded text, and other binary files become embedded MCP resources.", - "- `return` is only for ordinary structured data. Returning a `ToolFile`, a `ToolResult`, an MCP content block, or a bare base64 string does not emit content to the MCP client.", - "- Some providers, including Gmail, return attachment bytes without a public URL. To send that attachment to another API from code, decode `ToolFile.data` from base64 and pass the bytes to that API's upload/file input.", - "- If `tools.search()` returns `hasMore: true` and you didn't find what you need, fetch the next page: `tools.search({ query, offset: nextOffset, limit })`.", - "- Always use the full address when calling tools: `tools....(args)`. The `path` returned by `tools.search()` / `tools.describe.tool()` is already the exact path under `tools` — call `tools[path]` rather than guessing segments.", - "- The `tools` object is a lazy proxy. `Object.keys(tools)` returns only bounded built-in discovery keys (`search`, `describe`, `executor`), not the full API catalog. Use `tools.search()` or `tools.executor.coreTools.connections.list({})` for configured tools.", - '- Pass an object to system tools, e.g. `tools.search({ query: "..." })`, `tools.executor.coreTools.connections.list({})`, and `tools.describe.tool({ path })`.', - '- `tools.describe.tool()` returns compact TypeScript shapes. Use `inputTypeScript`, `outputTypeScript`, and `typeScriptDefinitions`. If the path doesn\'t resolve, the result carries `error: { code: "tool_not_found", suggestions }` — use a suggestion instead of retrying the same path.', "- For tools that return large collections (e.g. `getStates`, `getAll`), filter results in code rather than calling per-item tools.", "- Do not use `fetch` — all API calls go through `tools.*`.", - "- If execution pauses for interaction, resume it with the returned `resumePayload`.", + "", + "## Output", + "", + "- Use `emit(value)` to append user-visible output. Emitted output goes to the user, not back to you; to read a value yourself, `return` it.", + "- Prefer explicit output helpers when the content kind is known: `text(value)`, `image(value, detail?)`, `audio(block)`, `file(toolFile)`, `resource(block)`, and `notify(value)`. `image` accepts a base64 data URI string, `{ image_url, detail }`, or a raw MCP image block; `detail` may be `auto`, `low`, `high`, or `original`.", + '- File-returning tools may return `ToolFile` values: `{ _tag: "ToolFile", name?, mimeType, encoding: "base64", data, byteLength }`. Emit attachments with `emit(result.data)` or `file(result.data)`.', + '- `emit(...)` also accepts MCP content blocks such as `{ type: "image", data, mimeType }`, `{ type: "text", text }`, `{ type: "resource", resource }`, and `{ type: "resource_link", uri, name, ... }`.', + "- `emit(ToolFile)` is MIME-based: images, audio, text-like files, and other binary files are rendered as the corresponding MCP content.", + "- `return` is only for ordinary structured data. Returning a `ToolFile`, `ToolResult`, MCP content block, or bare base64 string does not emit content to the MCP client.", + "", + "## Runtime Notes", + "", + "- For work that yields progress, runs longer than one request, or needs incremental observation, use the separate `execute_cell` and `wait_cell` tools instead of `execute`.", + "- If `execute` pauses for user interaction, resume it with the returned `resumePayload`. Do not use `resume` for execution cells.", "- TypeScript type syntax (`: T`, `as T`, generics, interfaces, type aliases) is stripped before execution — feel free to write idiomatic TypeScript using the shapes from `tools.describe.tool()`. Decorators and `enum` are not supported.", ]; diff --git a/packages/hosts/mcp/src/tool-server.test.ts b/packages/hosts/mcp/src/tool-server.test.ts index db618512e..39a8c364f 100644 --- a/packages/hosts/mcp/src/tool-server.test.ts +++ b/packages/hosts/mcp/src/tool-server.test.ts @@ -163,6 +163,27 @@ describe("MCP host server — native elicitation mode", () => { }); }); + it("advertises the persistent cell lifecycle in MCP tool descriptions", async () => { + await withNativeClient(makeStubEngine({}), ELICITATION_CAPS, async (client) => { + const { tools } = await client.listTools(); + const descriptionFor = (name: string): string => + tools.find((tool) => tool.name === name)?.description ?? ""; + + expect(descriptionFor("execute")).toContain("test executor"); + + const executeCellDescription = descriptionFor("execute_cell"); + expect(executeCellDescription).toContain("persistent TypeScript execution cell"); + expect(executeCellDescription).toContain("wait_cell({ cellId, after: cursor })"); + expect(executeCellDescription).toContain("`resume` is not for cells"); + + const waitCellDescription = descriptionFor("wait_cell"); + expect(waitCellDescription).toContain("Keep waiting while status is `running` or `yielded`"); + expect(waitCellDescription).toContain("Do not call `resume` for cells"); + + expect(descriptionFor("terminate_cell")).toContain("Use this only for cancellation"); + }); + }); + it("execute tool renders emitted file image output as MCP images", async () => { const engine = makeStubEngine({ execute: () => diff --git a/packages/hosts/mcp/src/tool-server.ts b/packages/hosts/mcp/src/tool-server.ts index 0b37e0905..380a50b66 100644 --- a/packages/hosts/mcp/src/tool-server.ts +++ b/packages/hosts/mcp/src/tool-server.ts @@ -855,9 +855,10 @@ export const createExecutorMcpServer = ( "execute_cell", { description: [ - "Start a persistent execution cell. Use this for code that yields, runs longer than one request, or needs incremental observation.", - "The code can call `await yield_control()` or `await yieldControl()` to hand back currently emitted output and continue after the next wait.", - "Call `wait_cell` with the returned cellId and cursor to observe new events. Call `terminate_cell` to stop it.", + "Start a persistent TypeScript execution cell. Use this when code yields progress, runs longer than one MCP request, or needs incremental observation.", + "Inside the cell, call `emit(...)` or output helpers for user-visible output. Call `await yield_control()` or `await yieldControl()` to hand back currently emitted output and continue after the next wait.", + "The response includes `cellId`, `cursor`, `status`, and `events`. If status is `running` or `yielded`, call `wait_cell({ cellId, after: cursor })` and repeat with the newest cursor until status is `completed`, `failed`, or `terminated`.", + "`resume` is not for cells; use `wait_cell` to continue observing, and `terminate_cell` only to cancel.", ].join("\n"), inputSchema: { code: z.string().trim().min(1), @@ -876,8 +877,11 @@ export const createExecutorMcpServer = ( server.registerTool( "wait_cell", { - description: - "Wait for new events from a persistent execution cell. Pass the cursor returned by execute_cell or the previous wait_cell call.", + description: [ + "Wait for new events from a persistent execution cell. Pass `cellId` and the `after` cursor returned by `execute_cell` or the previous `wait_cell` call.", + "Use the returned `cursor` for the next wait. Keep waiting while status is `running` or `yielded`; stop when status is `completed`, `failed`, or `terminated`.", + "Do not call `resume` for cells. Set `timeoutMs` when you need to bound how long this wait blocks.", + ].join("\n"), inputSchema: { cellId: z.string(), after: z.number().optional(), @@ -896,7 +900,8 @@ export const createExecutorMcpServer = ( server.registerTool( "terminate_cell", { - description: "Terminate a persistent execution cell.", + description: + "Terminate a persistent execution cell by `cellId`. Use this only for cancellation or cleanup; normal cell progress is observed with `wait_cell`.", inputSchema: { cellId: z.string(), },