codio · ksimuk · Jun 16, 2026 · Jun 19, 2026 · Jun 19, 2026 · Jun 20, 2026
diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@codio-ai/opencode-authoring-agent",
-  "version": "0.1.3",
+  "version": "0.1.7",
   "description": "Opencode plugin for authoring Codio guide assignments — orchestrator, subagents, tools, and skills.",
   "type": "module",
   "main": "dist/index.js",

diff --git a/src/agents/assessment-author.ts b/src/agents/assessment-author.ts
@@ -3,13 +3,13 @@ import { resolvePrompt } from './types';
 
 const BASE_PROMPT = `# Assessment Author
 
-You author one or more assessments (MCQ, code test, fill-in-the-blanks, parsons-puzzle, or llm-based-auto-rubric) for a Codio guide page. You return structured payloads. **You do not write JSON or files directly.** The orchestrator passes your payload to the \`create_assessment\` tool, which stamps the canonical JSON and writes it.
+You author one or more assessments (MCQ, code test, fill-in-the-blanks, parsons-puzzle, llm-based-auto-rubric, or probeable-question-act) for a Codio guide page. You return structured payloads. **You do not write JSON or files directly.** The orchestrator passes your payload to the \`create_assessment\` tool, which stamps the canonical JSON and writes it.
 
 ## Inputs
 
-- \`assessment_type\`: one of \`multiple-choice\`, \`code-output-compare\`, \`fill-in-the-blanks\`, \`parsons-puzzle\`, \`llm-based-auto-rubric\`.
+- \`assessment_type\`: one of \`multiple-choice\`, \`code-output-compare\`, \`fill-in-the-blanks\`, \`parsons-puzzle\`, \`llm-based-auto-rubric\`, \`probeable-question-act\`.
 - \`topic\`, \`lo\` (learning objective), \`page_context\` (the page body for grounding).
-- \`count\` (how many to author; default 1 for code-test/FITB/parsons/auto-rubric, 3 for MCQ).
+- \`count\` (how many to author; default 1 for code-test/FITB/parsons/auto-rubric/probeable, 3 for MCQ).
 
 ## Output
 
@@ -22,6 +22,16 @@ Return one payload per assessment, in the exact shape \`create_assessment\` expe
 - For \`fill-in-the-blanks\`: use the \`<<<answer>>>\` marker syntax inline in the \`text\` field. Each blank should test one concrete piece of recall or application.
 - For \`parsons-puzzle\`: provide a correct code solution as \`initial\`, then append distractor lines below — each distractor labeled with a \`#distractor\` comment so the orchestrator can review. Aim for 2–4 distractors that reflect common syntactic or logical mistakes (off-by-one, wrong operator, wrong variable name). Indent matters for Python — call out which indent levels each block needs.
 - For \`llm-based-auto-rubric\`: write 3–5 rubric items, each scoped to one observable dimension (syntax, logic, edge cases, style). Each item has a clear \`title\` and a \`description\` written for the LLM grader to apply (concrete criteria, not vague). Always include a \`solutions\` array under \`.guides/secure/\` so the grader has reference; never expose solution paths under \`.guides/content/\`.
+- For \`probeable-question-act\`: collect these fields before returning the payload:
+  - \`functionName\` — Python function name (snake_case), used as the file stem.
+  - \`functionSignature\` — full def line with type hints, e.g. \`def min_index(int_list: list[int]) -> int:\`
+  - \`problemStatement\` — narrative description of what the function must do. **Write this intentionally vague or incomplete** — omit edge cases and boundary conditions so that students must use the probe to discover them. Do not spell out every input constraint; leave room for exploration.
+  - \`inputVariable\` — the variable students modify to probe, e.g. \`int_list\`.
+  - \`inputVariableAnnotation\` — Python type annotation, e.g. \`list[int]\`.
+  - \`defaultInputValue\` — default value shown in the student file, e.g. \`[1, 2, 3]\`.
+  - \`referenceSolution\` — the complete working function definition (including the \`def\` line and body).
+  - \`unitTests\` — full Python unittest file content.
+  **If the instructor has not provided \`referenceSolution\` or \`unitTests\`, ask whether they want to supply them or have you generate them. Generate only if the instructor explicitly says to.**
 
 ## What you don't do
 
@@ -40,7 +50,7 @@ export function createAssessmentAuthorAgent(
   return {
     name: 'assessment-author',
     description:
-      'Authors MCQ / code-output-compare / fill-in-the-blanks / parsons-puzzle / llm-based-auto-rubric payloads. Returns structured payloads; does not write files.',
+      'Authors MCQ / code-output-compare / fill-in-the-blanks / parsons-puzzle / llm-based-auto-rubric / probeable-question-act payloads. Returns structured payloads; does not write files.',
     config: {
       mode: 'subagent',
       model,

diff --git a/src/agents/orchestrator.ts b/src/agents/orchestrator.ts
@@ -22,11 +22,11 @@ const AGENT_DESCRIPTIONS: Record<string, string> = {
 - **layoutConfig:** When delegating, include \`layoutConfig: { openFiles: string[] }\` derived from the page's \`files\` list so @page-author can generate adaptive file instructions.`,
 
   'assessment-author': `@assessment-author
-- Role: Authors MCQ / code-output-compare / fill-in-the-blanks / parsons-puzzle / llm-based-auto-rubric payloads. Returns structured payloads only.
+- Role: Authors MCQ / code-output-compare / fill-in-the-blanks / parsons-puzzle / llm-based-auto-rubric / probeable-question-act payloads. Returns structured payloads only.
 - Permissions: Read only
 - Stats: Pedagogy + distractor quality is the hardest content judgment in this system. Sonnet-class.
-- Capabilities: MCQ pedagogy, code-test design, FITB construction, Parsons distractor design, LLM rubric authoring.
-- **Delegate when:** Authoring any new assessment (multiple-choice, code-output-compare, fill-in-the-blanks, parsons-puzzle, llm-based-auto-rubric).
+- Capabilities: MCQ pedagogy, code-test design, FITB construction, Parsons distractor design, LLM rubric authoring, probeable question design.
+- **Delegate when:** Authoring any new assessment (multiple-choice, code-output-compare, fill-in-the-blanks, parsons-puzzle, llm-based-auto-rubric, probeable-question-act).
 - **Don't delegate when:** Re-running create_assessment with the same payload (no LLM work needed).
 - **Rule of thumb:** New question content → @assessment-author. Re-stamp existing payload → tool only.`,
 
@@ -82,7 +82,7 @@ ${enabledAgents}
 
 ### Authoring tools
 - \`create_page({workspace, stem, title, type, layout, learningObjectives, files?, markdownBody, chapterFolder?})\` — stamp a page JSON with a fresh v4 UUID and write the JSON+MD pair. Use after \`@page-author\` returns a body+metadata in draft mode. Validates freeze directive syntax before writing — will throw if FREEZE CODE BEGIN/END are unmatched or use wrong comment prefix for the language.
-- \`create_assessment({type, workspace, payload})\` — stamp an assessment JSON with a fresh taskId and write to \`.guides/assessments/\`. Returns \`{ taskId, jsonPath, embedLine }\`. Use after \`@assessment-author\` returns a payload. The returned \`embedLine\` is what you splice into the page markdown.
+- \`create_assessment({type, workspace, payload})\` — stamp an assessment JSON with a fresh taskId and write to \`.guides/assessments/\`. Returns \`{ taskId, jsonPath, embedLine }\` for standard types. For \`probeable-question-act\` also returns \`{ studentFilePath, probeScriptPath, probeButtonCommand, runButtonCommand, suggestedPageMarkdown }\` and writes 3 additional files (student file, probe script, unit test file). Use \`suggestedPageMarkdown\` as the body_brief for \`@page-author\` when creating the guide page for a probeable question.
 - \`validate_guide({workspace})\` — deterministic structural validation (UUIDs, order arrays, embed references, taskId/filename consistency). Run automatically at the end of new-assignment, import-source, and reorder workflows. Run manually on user request (\`/validate-guide\`).
 
 </Tools>
@@ -107,7 +107,8 @@ Only fan out where work is independent — page drafting (Phase 4) and assessmen
 
 - New page JSON+MD pair → \`@page-author\` (draft mode) returns body+metadata → you call \`create_page\`. **Do not** hand-write the JSON.
 - Existing page surgical edit → \`@page-author\` (revise mode) edits the .md directly. No tool call.
-- New assessment → \`@assessment-author\` returns payload → you call \`create_assessment\` → you splice the returned \`embedLine\` into the page.
+- New standard assessment → \`@assessment-author\` returns payload → you call \`create_assessment\` → you splice the returned \`embedLine\` into the page.
+- New probeable question → \`@assessment-author\` returns payload → you call \`create_assessment\` (type: probeable-question-act) → use \`suggestedPageMarkdown\` as body_brief for \`@page-author\` → call \`create_page\`.
 - \`order\` array updates → you edit the index.json yourself with the edit tool. Small contextual JSON; no tool needed.
 - Stem generation (kebab-slug + 4-hex) → you generate inline. No tool.
 </Workflow>

diff --git a/src/index.ts b/src/index.ts
@@ -93,8 +93,10 @@ const CodioAuthoringPlugin = (async (ctx: { directory: string }) => {
         typeof orchestratorDef?.config?.prompt === 'string'
           ? orchestratorDef.config.prompt
           : buildOrchestratorPrompt(disabledAgents);
+      const workspaceBlock = `<Workspace>\nWorkspace root: ${ctx.directory}\nAlways pass this exact path as the \`workspace\` argument to all tools.\n</Workspace>`;
       output.system[0] =
         orchestratorPrompt +
+        `\n\n${workspaceBlock}` +
         (output.system[0] ? `\n\n${output.system[0]}` : '');
     },
   };

diff --git a/src/skills/reference-assessment-types/SKILL.md b/src/skills/reference-assessment-types/SKILL.md
@@ -1,6 +1,6 @@
 ---
 name: reference-assessment-types
-description: Payload field reference for multiple-choice, code-output-compare (Standard Code Test), fill-in-the-blanks, parsons-puzzle, and llm-based-auto-rubric assessments. Use when building any assessment payload.
+description: Payload field reference for multiple-choice, code-output-compare, fill-in-the-blanks, parsons-puzzle, llm-based-auto-rubric, and probeable-question-act assessments. Use when building any assessment payload.
 ---
 
 # Assessment types
@@ -307,3 +307,127 @@ Note: auto-rubric uses `panelNumber` (not `panel`) in the opened entries.
 - All solutions: `.guides/secure/<filename>` — hidden from students.
 - Never: `.guides/content/<anything>` — student-visible.
 - The tool writes the file only if it does not already exist (safe to re-run).
+
+---
+
+## `probeable-question-act` — Probeable Question (Advanced Code Test)
+
+A probeable question combines a guide page with custom buttons and an Advanced Code Test assessment. Students first "probe" the reference solution with their own inputs to discover edge cases, then implement the function themselves.
+
+### What the tool creates
+
+Calling `create_assessment` with `type: 'probeable-question-act'` writes four files and returns an extended result:
+
+| File | Location | Visible to students? |
+|---|---|---|
+| Assessment JSON | `.guides/assessments/test-XXXXXXXXXX.json` | No |
+| Student file | `code/functions/probeable_{functionName}.py` | Yes |
+| Probe script | `.guides/probeable_questions/probe_{functionName}.py` | Yes (by design) |
+| Unit test file | `.guides/secure/unit_tests/probeable_questions/{functionName}_test.py` | No |
+
+The probe script is intentionally student-visible — students click a button to run it with their probe inputs.
+
+### Payload fields
+
+```
+functionName             string   Python function name (snake_case), used as the file stem
+functionSignature        string   Full def line with type hints, e.g. "def min_index(int_list: list[int]) -> int:"
+problemStatement         string   Narrative description for the guide page. Must be intentionally vague — omit edge cases and boundary conditions so students must use the probe to discover them.
+inputVariable            string   Variable students modify to probe, e.g. "int_list"
+inputVariableAnnotation  string   Python type annotation, e.g. "list[int]"
+defaultInputValue        string   Default value shown in student file, e.g. "[1, 2, 3]"
+referenceSolution        string   Complete working function definition (def line + body)
+unitTests                string   Full Python unittest file content
+name?                    string   Display name (defaults to functionName)
+points?                  number   Default 20
+maxAttemptsCount?        number   Default 0 (unlimited)
+timeoutSeconds?          number   Default 40
+guidance?                string   Default ""
+lo?                      string   Learning objective
+blooms?                  string   Bloom's level
+```
+
+### Assessment JSON fixed fields
+
+The tool writes these fields with fixed values — do not override:
+
+```
+"instructions": "**Submit your work for evaluation**"
+"command": "python3 .guides/secure/unit_tests/probeable_questions/{functionName}_test.py"
+```
+
+The `command` runs the unit test file directly via `python3` (not `pytest`, not `-m unittest`). The overall runner still wraps execution through `python /usr/share/codio/assessments/assessments.py` — the `command` here is the path passed to that runner's `codeEnvConfig`.
+
+### Extended return value
+
+```
+taskId              string   e.g. "test-1234567890"
+jsonPath            string   Absolute path to written assessment JSON
+embedLine           string   '{Check It!|assessment}(test-XXXXXXXXXX)'
+studentFilePath     string   'code/functions/probeable_{functionName}.py'
+probeScriptPath     string   '.guides/probeable_questions/probe_{functionName}.py'
+unitTestPath        string   '.guides/secure/unit_tests/probeable_questions/{functionName}_test.py'
+probeButtonCommand  string   'python3 .guides/probeable_questions/probe_{functionName}.py'
+runButtonCommand    string   'python3 code/functions/probeable_{functionName}.py'
+suggestedPageMarkdown string  Complete guide page markdown — pass to @page-author as body_brief
+```
+
+### Orchestrator workflow
+
+```
+@assessment-author → payload
+  ↓
+create_assessment(type: probeable-question-act) → { taskId, embedLine, suggestedPageMarkdown, ... }
+  ↓
+@page-author (draft mode, body_brief = suggestedPageMarkdown) → markdownBody
+  ↓
+create_page (layout: 2-panels-tree, files: [studentFilePath]) → page JSON + MD
+```
+
+### Guide page structure
+
+The `suggestedPageMarkdown` returned by the tool follows this template — pass it as-is or refine via `@page-author`:
+
+```
+{problemStatement}
+
+#### Probing the solution to find edge cases
+
+Modify {inputVariable} values to check the expected return value and discover edge cases.
+
+|||warning
+## `{functionName}` code
+Probing will run the code in the editor. Syntax errors in your file will prevent probing.
+Pro-tip: Leave only `pass` in the `{functionName}` function while probing.
+|||
+
+{Probe Solution}(python3 .guides/probeable_questions/probe_{functionName}.py)
+
+
+## Complete the function
+
+{Run it !}(python3 code/functions/probeable_{functionName}.py)
+
+{Check It!|assessment}(test-XXXXXXXXXX)
+```
+
+### Unit Test Import Rule
+
+**Never use `from code.functions.probeable_{functionName} import ...`** in the `unitTests` string. Python's built-in `code` module shadows the `code/` directory, causing a `ModuleNotFoundError: No module named 'code.functions'` at runtime.
+
+Always use a `sys.path` injection instead:
+
+```python
+import sys
+import unittest
+sys.path.insert(0, '/home/codio/workspace/code/functions')
+from probeable_{functionName} import {functionName}
+```
+
+This pattern must appear at the top of every `unitTests` string, before any test class definition.
+
+
+### File naming
+
+Files are named by `functionName` (no sequential exercise numbers). If a student or probe file with the same `functionName` already exists, the tool throws a collision error — choose a unique function name per question.
+