diff --git a/package.json b/package.json index 8c69fe1..05387d6 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@codio-ai/opencode-authoring-agent", - "version": "0.1.3", + "version": "0.1.7", "description": "Opencode plugin for authoring Codio guide assignments — orchestrator, subagents, tools, and skills.", "type": "module", "main": "dist/index.js", diff --git a/src/agents/assessment-author.ts b/src/agents/assessment-author.ts index 004a2b6..02fff0a 100644 --- a/src/agents/assessment-author.ts +++ b/src/agents/assessment-author.ts @@ -3,13 +3,13 @@ import { resolvePrompt } from './types'; const BASE_PROMPT = `# Assessment Author -You author one or more assessments (MCQ, code test, fill-in-the-blanks, parsons-puzzle, or llm-based-auto-rubric) for a Codio guide page. You return structured payloads. **You do not write JSON or files directly.** The orchestrator passes your payload to the \`create_assessment\` tool, which stamps the canonical JSON and writes it. +You author one or more assessments (MCQ, code test, fill-in-the-blanks, parsons-puzzle, llm-based-auto-rubric, or probeable-question-act) for a Codio guide page. You return structured payloads. **You do not write JSON or files directly.** The orchestrator passes your payload to the \`create_assessment\` tool, which stamps the canonical JSON and writes it. ## Inputs -- \`assessment_type\`: one of \`multiple-choice\`, \`code-output-compare\`, \`fill-in-the-blanks\`, \`parsons-puzzle\`, \`llm-based-auto-rubric\`. +- \`assessment_type\`: one of \`multiple-choice\`, \`code-output-compare\`, \`fill-in-the-blanks\`, \`parsons-puzzle\`, \`llm-based-auto-rubric\`, \`probeable-question-act\`. - \`topic\`, \`lo\` (learning objective), \`page_context\` (the page body for grounding). -- \`count\` (how many to author; default 1 for code-test/FITB/parsons/auto-rubric, 3 for MCQ). +- \`count\` (how many to author; default 1 for code-test/FITB/parsons/auto-rubric/probeable, 3 for MCQ). ## Output @@ -22,6 +22,16 @@ Return one payload per assessment, in the exact shape \`create_assessment\` expe - For \`fill-in-the-blanks\`: use the \`<<>>\` marker syntax inline in the \`text\` field. Each blank should test one concrete piece of recall or application. - For \`parsons-puzzle\`: provide a correct code solution as \`initial\`, then append distractor lines below — each distractor labeled with a \`#distractor\` comment so the orchestrator can review. Aim for 2–4 distractors that reflect common syntactic or logical mistakes (off-by-one, wrong operator, wrong variable name). Indent matters for Python — call out which indent levels each block needs. - For \`llm-based-auto-rubric\`: write 3–5 rubric items, each scoped to one observable dimension (syntax, logic, edge cases, style). Each item has a clear \`title\` and a \`description\` written for the LLM grader to apply (concrete criteria, not vague). Always include a \`solutions\` array under \`.guides/secure/\` so the grader has reference; never expose solution paths under \`.guides/content/\`. +- For \`probeable-question-act\`: collect these fields before returning the payload: + - \`functionName\` — Python function name (snake_case), used as the file stem. + - \`functionSignature\` — full def line with type hints, e.g. \`def min_index(int_list: list[int]) -> int:\` + - \`problemStatement\` — narrative description of what the function must do. **Write this intentionally vague or incomplete** — omit edge cases and boundary conditions so that students must use the probe to discover them. Do not spell out every input constraint; leave room for exploration. + - \`inputVariable\` — the variable students modify to probe, e.g. \`int_list\`. + - \`inputVariableAnnotation\` — Python type annotation, e.g. \`list[int]\`. + - \`defaultInputValue\` — default value shown in the student file, e.g. \`[1, 2, 3]\`. + - \`referenceSolution\` — the complete working function definition (including the \`def\` line and body). + - \`unitTests\` — full Python unittest file content. + **If the instructor has not provided \`referenceSolution\` or \`unitTests\`, ask whether they want to supply them or have you generate them. Generate only if the instructor explicitly says to.** ## What you don't do @@ -40,7 +50,7 @@ export function createAssessmentAuthorAgent( return { name: 'assessment-author', description: - 'Authors MCQ / code-output-compare / fill-in-the-blanks / parsons-puzzle / llm-based-auto-rubric payloads. Returns structured payloads; does not write files.', + 'Authors MCQ / code-output-compare / fill-in-the-blanks / parsons-puzzle / llm-based-auto-rubric / probeable-question-act payloads. Returns structured payloads; does not write files.', config: { mode: 'subagent', model, diff --git a/src/agents/orchestrator.ts b/src/agents/orchestrator.ts index 4b58a26..e4148b7 100644 --- a/src/agents/orchestrator.ts +++ b/src/agents/orchestrator.ts @@ -22,11 +22,11 @@ const AGENT_DESCRIPTIONS: Record = { - **layoutConfig:** When delegating, include \`layoutConfig: { openFiles: string[] }\` derived from the page's \`files\` list so @page-author can generate adaptive file instructions.`, 'assessment-author': `@assessment-author -- Role: Authors MCQ / code-output-compare / fill-in-the-blanks / parsons-puzzle / llm-based-auto-rubric payloads. Returns structured payloads only. +- Role: Authors MCQ / code-output-compare / fill-in-the-blanks / parsons-puzzle / llm-based-auto-rubric / probeable-question-act payloads. Returns structured payloads only. - Permissions: Read only - Stats: Pedagogy + distractor quality is the hardest content judgment in this system. Sonnet-class. -- Capabilities: MCQ pedagogy, code-test design, FITB construction, Parsons distractor design, LLM rubric authoring. -- **Delegate when:** Authoring any new assessment (multiple-choice, code-output-compare, fill-in-the-blanks, parsons-puzzle, llm-based-auto-rubric). +- Capabilities: MCQ pedagogy, code-test design, FITB construction, Parsons distractor design, LLM rubric authoring, probeable question design. +- **Delegate when:** Authoring any new assessment (multiple-choice, code-output-compare, fill-in-the-blanks, parsons-puzzle, llm-based-auto-rubric, probeable-question-act). - **Don't delegate when:** Re-running create_assessment with the same payload (no LLM work needed). - **Rule of thumb:** New question content → @assessment-author. Re-stamp existing payload → tool only.`, @@ -82,7 +82,7 @@ ${enabledAgents} ### Authoring tools - \`create_page({workspace, stem, title, type, layout, learningObjectives, files?, markdownBody, chapterFolder?})\` — stamp a page JSON with a fresh v4 UUID and write the JSON+MD pair. Use after \`@page-author\` returns a body+metadata in draft mode. Validates freeze directive syntax before writing — will throw if FREEZE CODE BEGIN/END are unmatched or use wrong comment prefix for the language. -- \`create_assessment({type, workspace, payload})\` — stamp an assessment JSON with a fresh taskId and write to \`.guides/assessments/\`. Returns \`{ taskId, jsonPath, embedLine }\`. Use after \`@assessment-author\` returns a payload. The returned \`embedLine\` is what you splice into the page markdown. +- \`create_assessment({type, workspace, payload})\` — stamp an assessment JSON with a fresh taskId and write to \`.guides/assessments/\`. Returns \`{ taskId, jsonPath, embedLine }\` for standard types. For \`probeable-question-act\` also returns \`{ studentFilePath, probeScriptPath, probeButtonCommand, runButtonCommand, suggestedPageMarkdown }\` and writes 3 additional files (student file, probe script, unit test file). Use \`suggestedPageMarkdown\` as the body_brief for \`@page-author\` when creating the guide page for a probeable question. - \`validate_guide({workspace})\` — deterministic structural validation (UUIDs, order arrays, embed references, taskId/filename consistency). Run automatically at the end of new-assignment, import-source, and reorder workflows. Run manually on user request (\`/validate-guide\`). @@ -107,7 +107,8 @@ Only fan out where work is independent — page drafting (Phase 4) and assessmen - New page JSON+MD pair → \`@page-author\` (draft mode) returns body+metadata → you call \`create_page\`. **Do not** hand-write the JSON. - Existing page surgical edit → \`@page-author\` (revise mode) edits the .md directly. No tool call. -- New assessment → \`@assessment-author\` returns payload → you call \`create_assessment\` → you splice the returned \`embedLine\` into the page. +- New standard assessment → \`@assessment-author\` returns payload → you call \`create_assessment\` → you splice the returned \`embedLine\` into the page. +- New probeable question → \`@assessment-author\` returns payload → you call \`create_assessment\` (type: probeable-question-act) → use \`suggestedPageMarkdown\` as body_brief for \`@page-author\` → call \`create_page\`. - \`order\` array updates → you edit the index.json yourself with the edit tool. Small contextual JSON; no tool needed. - Stem generation (kebab-slug + 4-hex) → you generate inline. No tool. diff --git a/src/index.ts b/src/index.ts index a58e830..57d865d 100644 --- a/src/index.ts +++ b/src/index.ts @@ -93,8 +93,10 @@ const CodioAuthoringPlugin = (async (ctx: { directory: string }) => { typeof orchestratorDef?.config?.prompt === 'string' ? orchestratorDef.config.prompt : buildOrchestratorPrompt(disabledAgents); + const workspaceBlock = `\nWorkspace root: ${ctx.directory}\nAlways pass this exact path as the \`workspace\` argument to all tools.\n`; output.system[0] = orchestratorPrompt + + `\n\n${workspaceBlock}` + (output.system[0] ? `\n\n${output.system[0]}` : ''); }, }; diff --git a/src/skills/reference-assessment-types/SKILL.md b/src/skills/reference-assessment-types/SKILL.md index 9ce79a0..57e78ce 100644 --- a/src/skills/reference-assessment-types/SKILL.md +++ b/src/skills/reference-assessment-types/SKILL.md @@ -1,6 +1,6 @@ --- name: reference-assessment-types -description: Payload field reference for multiple-choice, code-output-compare (Standard Code Test), fill-in-the-blanks, parsons-puzzle, and llm-based-auto-rubric assessments. Use when building any assessment payload. +description: Payload field reference for multiple-choice, code-output-compare, fill-in-the-blanks, parsons-puzzle, llm-based-auto-rubric, and probeable-question-act assessments. Use when building any assessment payload. --- # Assessment types @@ -307,3 +307,127 @@ Note: auto-rubric uses `panelNumber` (not `panel`) in the opened entries. - All solutions: `.guides/secure/` — hidden from students. - Never: `.guides/content/` — student-visible. - The tool writes the file only if it does not already exist (safe to re-run). + +--- + +## `probeable-question-act` — Probeable Question (Advanced Code Test) + +A probeable question combines a guide page with custom buttons and an Advanced Code Test assessment. Students first "probe" the reference solution with their own inputs to discover edge cases, then implement the function themselves. + +### What the tool creates + +Calling `create_assessment` with `type: 'probeable-question-act'` writes four files and returns an extended result: + +| File | Location | Visible to students? | +|---|---|---| +| Assessment JSON | `.guides/assessments/test-XXXXXXXXXX.json` | No | +| Student file | `code/functions/probeable_{functionName}.py` | Yes | +| Probe script | `.guides/probeable_questions/probe_{functionName}.py` | Yes (by design) | +| Unit test file | `.guides/secure/unit_tests/probeable_questions/{functionName}_test.py` | No | + +The probe script is intentionally student-visible — students click a button to run it with their probe inputs. + +### Payload fields + +``` +functionName string Python function name (snake_case), used as the file stem +functionSignature string Full def line with type hints, e.g. "def min_index(int_list: list[int]) -> int:" +problemStatement string Narrative description for the guide page. Must be intentionally vague — omit edge cases and boundary conditions so students must use the probe to discover them. +inputVariable string Variable students modify to probe, e.g. "int_list" +inputVariableAnnotation string Python type annotation, e.g. "list[int]" +defaultInputValue string Default value shown in student file, e.g. "[1, 2, 3]" +referenceSolution string Complete working function definition (def line + body) +unitTests string Full Python unittest file content +name? string Display name (defaults to functionName) +points? number Default 20 +maxAttemptsCount? number Default 0 (unlimited) +timeoutSeconds? number Default 40 +guidance? string Default "" +lo? string Learning objective +blooms? string Bloom's level +``` + +### Assessment JSON fixed fields + +The tool writes these fields with fixed values — do not override: + +``` +"instructions": "**Submit your work for evaluation**" +"command": "python3 .guides/secure/unit_tests/probeable_questions/{functionName}_test.py" +``` + +The `command` runs the unit test file directly via `python3` (not `pytest`, not `-m unittest`). The overall runner still wraps execution through `python /usr/share/codio/assessments/assessments.py` — the `command` here is the path passed to that runner's `codeEnvConfig`. + +### Extended return value + +``` +taskId string e.g. "test-1234567890" +jsonPath string Absolute path to written assessment JSON +embedLine string '{Check It!|assessment}(test-XXXXXXXXXX)' +studentFilePath string 'code/functions/probeable_{functionName}.py' +probeScriptPath string '.guides/probeable_questions/probe_{functionName}.py' +unitTestPath string '.guides/secure/unit_tests/probeable_questions/{functionName}_test.py' +probeButtonCommand string 'python3 .guides/probeable_questions/probe_{functionName}.py' +runButtonCommand string 'python3 code/functions/probeable_{functionName}.py' +suggestedPageMarkdown string Complete guide page markdown — pass to @page-author as body_brief +``` + +### Orchestrator workflow + +``` +@assessment-author → payload + ↓ +create_assessment(type: probeable-question-act) → { taskId, embedLine, suggestedPageMarkdown, ... } + ↓ +@page-author (draft mode, body_brief = suggestedPageMarkdown) → markdownBody + ↓ +create_page (layout: 2-panels-tree, files: [studentFilePath]) → page JSON + MD +``` + +### Guide page structure + +The `suggestedPageMarkdown` returned by the tool follows this template — pass it as-is or refine via `@page-author`: + +``` +{problemStatement} + +#### Probing the solution to find edge cases + +Modify {inputVariable} values to check the expected return value and discover edge cases. + +|||warning +## `{functionName}` code +Probing will run the code in the editor. Syntax errors in your file will prevent probing. +Pro-tip: Leave only `pass` in the `{functionName}` function while probing. +||| + +{Probe Solution}(python3 .guides/probeable_questions/probe_{functionName}.py) + + +## Complete the function + +{Run it !}(python3 code/functions/probeable_{functionName}.py) + +{Check It!|assessment}(test-XXXXXXXXXX) +``` + +### Unit Test Import Rule + +**Never use `from code.functions.probeable_{functionName} import ...`** in the `unitTests` string. Python's built-in `code` module shadows the `code/` directory, causing a `ModuleNotFoundError: No module named 'code.functions'` at runtime. + +Always use a `sys.path` injection instead: + +```python +import sys +import unittest +sys.path.insert(0, '/home/codio/workspace/code/functions') +from probeable_{functionName} import {functionName} +``` + +This pattern must appear at the top of every `unitTests` string, before any test class definition. + + +### File naming + +Files are named by `functionName` (no sequential exercise numbers). If a student or probe file with the same `functionName` already exists, the tool throws a collision error — choose a unique function name per question. + diff --git a/src/tools/create-assessment.test.ts b/src/tools/create-assessment.test.ts index e7a1b0c..7e2f984 100644 --- a/src/tools/create-assessment.test.ts +++ b/src/tools/create-assessment.test.ts @@ -10,6 +10,7 @@ import { import { tmpdir } from 'node:os'; import { join } from 'node:path'; import { isTaskId } from '../utils/taskid'; +import type { ProbeableCreateResult } from './create-assessment'; import { buildMcqAssessment, createAssessmentHandler, @@ -397,3 +398,104 @@ describe('createAssessmentHandler — llm-based-auto-rubric', () => { } }); }); + +// ── probeable-question-act ──────────────────────────────────────────────────── + +function makeProbWs(): string { + const dir = mkdtempSync(join(tmpdir(), 'codio-probeable-')); + mkdirSync(join(dir, '.guides', 'assessments'), { recursive: true }); + return dir; +} + +const baseProbeablePayload = { + functionName: 'min_index', + functionSignature: 'def min_index(int_list: list[int]) -> int:', + problemStatement: 'Find the index of the minimum integer in a list.', + inputVariable: 'int_list', + inputVariableAnnotation: 'list[int]', + defaultInputValue: '[1, 2, 3]', + referenceSolution: `def min_index(int_list: list[int]) -> int:\n if not int_list: return -1\n return 0`, + unitTests: `import unittest\nfrom probeable_min_index import min_index\nclass T(unittest.TestCase):\n def test_empty(self):\n self.assertEqual(min_index([]), -1)\n`, +}; + +describe('createAssessmentHandler — probeable-question-act', () => { + test('taskId starts with test- and assessment JSON type is "test"', async () => { + const ws = makeProbWs(); + try { + const result = await createAssessmentHandler({ type: 'probeable-question-act', workspace: ws, payload: baseProbeablePayload }) as ProbeableCreateResult; + expect(result.taskId).toMatch(/^test-[1-9][0-9]{9}$/); + const json = JSON.parse(readFileSync(result.jsonPath, 'utf8')); + expect(json.type).toBe('test'); + expect(json.taskId).toBe(result.taskId); + } finally { rmSync(ws, { recursive: true, force: true }); } + }); + + test('codeEnvConfig is valid JSON with expected fields', async () => { + const ws = makeProbWs(); + try { + const result = await createAssessmentHandler({ type: 'probeable-question-act', workspace: ws, payload: baseProbeablePayload }) as ProbeableCreateResult; + const cfg = JSON.parse(JSON.parse(readFileSync(result.jsonPath, 'utf8')).source.codeEnvConfig); + expect(cfg.type).toBe('python'); + expect(cfg.subtype).toBe('unittest'); + expect(cfg.pythonwd).toBe('.guides/secure/unit_tests/probeable_questions'); + } finally { rmSync(ws, { recursive: true, force: true }); } + }); + + test('student file contains freeze block and default input', async () => { + const ws = makeProbWs(); + try { + const result = await createAssessmentHandler({ type: 'probeable-question-act', workspace: ws, payload: baseProbeablePayload }) as ProbeableCreateResult; + const content = readFileSync(join(ws, result.studentFilePath), 'utf8'); + expect(content).toContain('# FREEZE CODE BEGIN'); + expect(content).toContain('# FREEZE CODE END'); + expect(content).toContain('int_list: list[int] = [1, 2, 3]'); + } finally { rmSync(ws, { recursive: true, force: true }); } + }); + + test('probe script contains reference solution and MissingVariableError', async () => { + const ws = makeProbWs(); + try { + const result = await createAssessmentHandler({ type: 'probeable-question-act', workspace: ws, payload: baseProbeablePayload }) as ProbeableCreateResult; + const content = readFileSync(join(ws, result.probeScriptPath), 'utf8'); + expect(content).toContain('MissingVariableError'); + expect(content).toContain('min_index'); + } finally { rmSync(ws, { recursive: true, force: true }); } + }); + + test('unit test file written to .guides/secure/unit_tests/probeable_questions/', async () => { + const ws = makeProbWs(); + try { + const result = await createAssessmentHandler({ type: 'probeable-question-act', workspace: ws, payload: baseProbeablePayload }) as ProbeableCreateResult; + expect(result.unitTestPath).toContain('.guides/secure/unit_tests/probeable_questions/'); + expect(existsSync(join(ws, result.unitTestPath))).toBe(true); + } finally { rmSync(ws, { recursive: true, force: true }); } + }); + + test('embedLine uses Check It! label', async () => { + const ws = makeProbWs(); + try { + const result = await createAssessmentHandler({ type: 'probeable-question-act', workspace: ws, payload: baseProbeablePayload }) as ProbeableCreateResult; + expect(result.embedLine).toMatch(/^\{Check It!\|assessment\}\(test-[1-9][0-9]{9}\)$/); + } finally { rmSync(ws, { recursive: true, force: true }); } + }); + + test('suggestedPageMarkdown contains probe button, run button, and embed', async () => { + const ws = makeProbWs(); + try { + const result = await createAssessmentHandler({ type: 'probeable-question-act', workspace: ws, payload: baseProbeablePayload }) as ProbeableCreateResult; + expect(result.suggestedPageMarkdown).toContain('{Probe Solution}('); + expect(result.suggestedPageMarkdown).toContain('{Run it !}('); + expect(result.suggestedPageMarkdown).toContain(result.embedLine); + } finally { rmSync(ws, { recursive: true, force: true }); } + }); + + test('rejects duplicate functionName', async () => { + const ws = makeProbWs(); + try { + await createAssessmentHandler({ type: 'probeable-question-act', workspace: ws, payload: baseProbeablePayload }); + await expect( + createAssessmentHandler({ type: 'probeable-question-act', workspace: ws, payload: baseProbeablePayload }), + ).rejects.toThrow(); + } finally { rmSync(ws, { recursive: true, force: true }); } + }); +}); diff --git a/src/tools/create-assessment.ts b/src/tools/create-assessment.ts index dee1446..3691414 100644 --- a/src/tools/create-assessment.ts +++ b/src/tools/create-assessment.ts @@ -11,6 +11,7 @@ const ASSESSMENT_TYPE_TAG: Record = { 'fill-in-the-blanks': 'Fill in the Blanks', 'parsons-puzzle': 'Parsons Puzzle', 'llm-based-auto-rubric': null, + 'probeable-question-act': 'Advanced Code Test', }; interface MetadataTag { @@ -504,6 +505,204 @@ export function buildLlmAutoRubricAssessment( }; } +// ── Probeable Question (Advanced Code Test) ────────────────────────────────── + +export const probeablePayloadSchema = z.object({ + functionName: z.string().min(1), + functionSignature: z.string().min(1), + problemStatement: z.string().min(1), + inputVariable: z.string().min(1), + inputVariableAnnotation: z.string().min(1), + defaultInputValue: z.string().min(1), + referenceSolution: z.string().min(1), + unitTests: z.string().min(1), + name: z.string().optional(), + points: z.number().int().nonnegative().default(20), + maxAttemptsCount: z.number().int().nonnegative().default(0), + timeoutSeconds: z.number().int().positive().default(40), + guidance: z.string().default(''), + lo: z.string().default(''), + blooms: z.string().default(''), +}); + +export type ProbeablePayload = z.infer; + +export interface ProbeableCreateResult { + taskId: string; + jsonPath: string; + embedLine: string; + studentFilePath: string; + probeScriptPath: string; + unitTestPath: string; + probeButtonCommand: string; + runButtonCommand: string; + suggestedPageMarkdown: string; +} + +function indentLines(text: string, spaces: number): string { + const pad = ' '.repeat(spaces); + return text + .split('\n') + .map((line) => (line.trim() ? pad + line : line)) + .join('\n'); +} + +function buildProbeableAssessment( + raw: unknown, + taskId: string, + workspace: string, +): ProbeableCreateResult & { json: object; studentFileContent: string; probeScriptContent: string; unitTestContent: string } { + const p = probeablePayloadSchema.parse(raw); + const fn = p.functionName; + + // Relative paths (workspace-relative) + const studentFilePath = `code/functions/probeable_${fn}.py`; + const probeScriptPath = `.guides/probeable_questions/probe_${fn}.py`; + const unitTestPath = `.guides/secure/unit_tests/probeable_questions/${fn}_test.py`; + + // Student file — has the input variable and function stub with freeze block + const studentFileContent = [ + `# Probe ${fn}: Modify only the ${p.inputVariable} values to test edge cases`, + ``, + `${p.inputVariable}: ${p.inputVariableAnnotation} = ${p.defaultInputValue}`, + ``, + `${p.functionSignature}`, + ` # Replace 'pass' with your solution`, + ` pass`, + ``, + ``, + ``, + `# FREEZE CODE BEGIN`, + `if __name__=="__main__":`, + ` print(f"${p.inputVariable} = {${p.inputVariable}} , result = {${fn}(${p.inputVariable})}")`, + `# FREEZE CODE END`, + ].join('\n'); + + // Probe script — embeds the reference solution inside check_import_and_probe + const indentedSolution = indentLines(p.referenceSolution.trimEnd(), 4); + const moduleName = `probeable_${fn}`; + const logFile = `${workspace}/code/functions/probeable_${fn}_io_logs.txt`; + const probeScriptContent = [ + `import sys`, + `sys.path.append("${workspace}/code/functions/")`, + ``, + `class MissingVariableError(Exception):`, + ` def __str__(self):`, + ` return "Error: The required '${p.inputVariable}' variable is missing. Please declare '${p.inputVariable}' as a ${p.inputVariableAnnotation}."`, + ``, + ``, + `def check_import_and_probe():`, + ` try:`, + ` from ${moduleName} import ${p.inputVariable}`, + ``, + `${indentedSolution}`, + ``, + ` if __name__ == "__main__":`, + ` log_file = "${logFile}"`, + ` result = ${fn}(${p.inputVariable})`, + ` with open(log_file, "a") as f:`, + ` f.write(f"${p.inputVariable} = {${p.inputVariable}} , result = {result}\\n")`, + ` with open(log_file, "r") as f:`, + ` for line in f.readlines():`, + ` print(line)`, + ``, + ` except ImportError:`, + ` raise MissingVariableError()`, + `try:`, + ` check_import_and_probe()`, + `except MissingVariableError as e:`, + ` print(e)`, + ` sys.exit(1)`, + ].join('\n'); + + // Assessment JSON + const codeEnvConfig = JSON.stringify({ + type: 'python', + subtype: 'unittest', + files: [unitTestPath], + maxPoints: p.points, + timeout: p.timeoutSeconds, + executable: 'python3', + pythonwd: '.guides/secure/unit_tests/probeable_questions', + partialPoints: false, + }); + + const assessmentJson = { + type: 'test', + taskId, + source: { + name: p.name ?? fn, + showName: false, + instructions: '**Submit your work for evaluation**', + command: 'python /usr/share/codio/assessments/assessments.py', + codeEnvConfig, + pythonPath: join(workspace, 'code', 'functions'), + timeoutSeconds: p.timeoutSeconds, + guidance: p.guidance, + showGuidanceAfterResponseOption: { type: 'Never' }, + maxAttemptsCount: p.maxAttemptsCount, + points: p.points, + arePartialPointsAllowed: false, + useMaximumScore: false, + metadata: { + tags: [{ name: 'Assessment Type', value: 'Advanced Code Test' }], + files: [studentFilePath], + opened: [{ type: 'file', panelNumber: 0, content: studentFilePath }], + }, + bloomsObjectiveLevel: p.blooms, + learningObjectives: p.lo, + }, + }; + + const probeButtonCommand = `python3 ${probeScriptPath}`; + const runButtonCommand = `python3 ${studentFilePath}`; + const embedLine = `{Check It!|assessment}(${taskId})`; + + // Suggested guide page markdown for the orchestrator to pass to @page-author + const suggestedPageMarkdown = [ + `${p.problemStatement}`, + ``, + `#### Probing the solution to find edge cases`, + ``, + `You have the ability to 'probe' the solution. Modify the values in \`${p.inputVariable}\` to check the *expected* return value and discover edge cases. You will see the results of all your inputs.`, + ``, + `|||warning`, + `## \`${fn}\` code`, + ``, + `Probing the solution will run the code in the editor. If the code in your file has syntax errors, probing will not work.`, + ``, + `Pro-tip: Leave only the \`pass\` keyword in the \`${fn}\` function while probing.`, + `|||`, + ``, + `{Probe Solution}(${probeButtonCommand})`, + ``, + ``, + `## Complete the function`, + ``, + `Now that you have probed the solution, write your code and complete the \`${fn}\` function.`, + ``, + `{Run it !}(${runButtonCommand})`, + ``, + `${embedLine}`, + ].join('\n'); + + return { + taskId, + jsonPath: '', // replaced by caller after writing + embedLine, + studentFilePath, + probeScriptPath, + unitTestPath, + probeButtonCommand, + runButtonCommand, + suggestedPageMarkdown, + json: assessmentJson, + studentFileContent, + probeScriptContent, + unitTestContent: p.unitTests, + }; +} + // ── Dispatcher ────────────────────────────────────────────────────────────── export const createAssessmentInputSchema = z.discriminatedUnion('type', [ @@ -532,18 +731,27 @@ export const createAssessmentInputSchema = z.discriminatedUnion('type', [ workspace: z.string().min(1), payload: z.unknown(), }), + z.object({ + type: z.literal('probeable-question-act'), + workspace: z.string().min(1), + payload: z.unknown(), + }), ]); -export interface CreateAssessmentResult { - taskId: string; - jsonPath: string; - embedLine: string; -} +export type CreateAssessmentResult = + | { taskId: string; jsonPath: string; embedLine: string } + | ProbeableCreateResult; export async function createAssessmentHandler( raw: unknown, ): Promise { - const input = createAssessmentInputSchema.parse(raw); + const input = createAssessmentInputSchema.parse( + typeof raw === 'string' ? JSON.parse(raw) : raw, + ); + // LLMs sometimes JSON-stringify the payload field — normalise it here so + // every branch can safely pass input.payload to its schema parser. + const payload: unknown = + typeof input.payload === 'string' ? JSON.parse(input.payload) : input.payload; const { assessments, workspace } = resolveCodioPaths({ directory: input.workspace, }); @@ -560,13 +768,13 @@ export async function createAssessmentHandler( if (input.type === 'multiple-choice') { const taskId = generateTaskId('multiple-choice'); - json = buildMcqAssessment(input.payload, taskId); + json = buildMcqAssessment(payload, taskId); name = json.source.name || taskId; } else if (input.type === 'code-output-compare') { const taskId = generateTaskId('code-output-compare'); - json = buildCodeTestAssessment(input.payload, taskId); + json = buildCodeTestAssessment(payload, taskId); name = (json.source.name as string) || taskId; - const parsed = codeTestPayloadSchema.parse(input.payload); + const parsed = codeTestPayloadSchema.parse(payload); if (parsed.starter_file_path && parsed.starter_file_content !== undefined) { codeStarter = { path: parsed.starter_file_path, @@ -575,18 +783,42 @@ export async function createAssessmentHandler( } } else if (input.type === 'fill-in-the-blanks') { const taskId = generateTaskId('fill-in-the-blanks'); - json = buildFitbAssessment(input.payload, taskId); + json = buildFitbAssessment(payload, taskId); name = (json.source.name as string) || taskId; } else if (input.type === 'parsons-puzzle') { const taskId = generateTaskId('parsons-puzzle'); - json = buildParsonsAssessment(input.payload, taskId); + json = buildParsonsAssessment(payload, taskId); name = json.source.name || taskId; } else if (input.type === 'llm-based-auto-rubric') { const taskId = generateTaskId('llm-based-auto-rubric'); - json = buildLlmAutoRubricAssessment(input.payload, taskId); + json = buildLlmAutoRubricAssessment(payload, taskId); name = json.source.name || taskId; - const parsed = llmAutoRubricPayloadSchema.parse(input.payload); + const parsed = llmAutoRubricPayloadSchema.parse(payload); llmSolutions = parsed.solutions; + } else if (input.type === 'probeable-question-act') { + const taskId = generateTaskId('probeable-question-act'); + const built = buildProbeableAssessment(payload, taskId, workspace); + const jsonPath = join(assessments, `${taskId}.json`); + // Atomic write of assessment JSON + try { + writeFileSync(jsonPath, `${JSON.stringify(built.json, null, 2)}\n`, { flag: 'wx' }); + } catch (err) { + if ((err as NodeJS.ErrnoException).code === 'EEXIST') throw new Error(`taskId collision at ${jsonPath}`); + throw err; + } + // Student file (atomic — fails if functionName already used) + const studentAbs = join(workspace, built.studentFilePath); + mkdirSync(dirname(studentAbs), { recursive: true }); + writeFileSync(studentAbs, built.studentFileContent, { flag: 'wx' }); + // Probe script + const probeAbs = join(workspace, built.probeScriptPath); + mkdirSync(dirname(probeAbs), { recursive: true }); + writeFileSync(probeAbs, built.probeScriptContent, { flag: 'wx' }); + // Unit test file (in .guides/secure/ — hidden from students) + const unitTestAbs = join(workspace, built.unitTestPath); + mkdirSync(dirname(unitTestAbs), { recursive: true }); + writeFileSync(unitTestAbs, built.unitTestContent, { flag: 'wx' }); + return { ...built, jsonPath }; } else { throw new Error( `unsupported assessment type: ${(input as { type: string }).type}`, diff --git a/src/tools/create-page.ts b/src/tools/create-page.ts index 3565034..3ba8aa0 100644 --- a/src/tools/create-page.ts +++ b/src/tools/create-page.ts @@ -62,7 +62,17 @@ export const createPageInputSchema = z type: z.enum(PAGE_TYPES), layout: z.enum(PAGE_LAYOUTS), learningObjectives: z.array(z.string()).default([]), - files: z.array(z.string()).optional(), + // Accept either plain path strings or file-entry objects {path,...} + // so the LLM can pass either format without errors. + files: z.preprocess( + (v) => + Array.isArray(v) + ? v.map((f) => + typeof f === 'string' ? f : typeof f === 'object' && f !== null && 'path' in f ? String((f as { path: unknown }).path) : f, + ) + : v, + z.array(z.string()).optional(), + ), markdownBody: z.string(), chapterFolder: z .string() diff --git a/src/tools/index.ts b/src/tools/index.ts index 3a5c23c..1b52fbd 100644 --- a/src/tools/index.ts +++ b/src/tools/index.ts @@ -24,10 +24,10 @@ export const create_page = tool({ export const create_assessment = tool({ description: - 'Create a Codio assessment JSON of type multiple-choice, code-output-compare, or fill-in-the-blanks. Generates the taskId, writes to .guides/assessments/, returns { taskId, jsonPath, embedLine }.', + 'Create a Codio assessment. Type options: multiple-choice, code-output-compare, fill-in-the-blanks, parsons-puzzle, llm-based-auto-rubric, probeable-question-act. Returns { taskId, jsonPath, embedLine } for standard types. For probeable-question-act also returns { studentFilePath, probeScriptPath, probeButtonCommand, runButtonCommand, suggestedPageMarkdown } and writes student file, probe script, and unit test file.', args: { type: tool.schema - .enum(['multiple-choice', 'code-output-compare', 'fill-in-the-blanks']) + .enum(['multiple-choice', 'code-output-compare', 'fill-in-the-blanks', 'parsons-puzzle', 'llm-based-auto-rubric', 'probeable-question-act']) .describe('Assessment type'), workspace: tool.schema .string() diff --git a/src/utils/load-env.ts b/src/utils/load-env.ts new file mode 100644 index 0000000..37b9408 --- /dev/null +++ b/src/utils/load-env.ts @@ -0,0 +1,29 @@ +import { existsSync, readFileSync } from 'node:fs'; +import { dirname, join } from 'node:path'; + +/** + * Walk up from startDir looking for a .env file. + * When found, load it with override semantics — .env values always win + * over whatever is already in process.env. + */ +export function loadEnvWalkUp(startDir: string): void { + let dir = startDir.replace(/\/+$/, ''); + while (true) { + const candidate = join(dir, '.env'); + if (existsSync(candidate)) { + for (const line of readFileSync(candidate, 'utf8').split('\n')) { + const t = line.trim(); + if (!t || t.startsWith('#')) continue; + const eq = t.indexOf('='); + if (eq < 1) continue; + const key = t.slice(0, eq).trim(); + const val = t.slice(eq + 1).trim().replace(/^(['"])(.*)\1$/, '$2'); + process.env[key] = val; + } + return; + } + const parent = dirname(dir); + if (parent === dir) return; // reached filesystem root — no .env found + dir = parent; + } +} diff --git a/src/utils/taskid.test.ts b/src/utils/taskid.test.ts index 772dcbc..b8c4f46 100644 --- a/src/utils/taskid.test.ts +++ b/src/utils/taskid.test.ts @@ -16,6 +16,11 @@ describe('generateTaskId', () => { expect(id).toMatch(new RegExp(`^${type}-[1-9][0-9]{9}$`)); }); + test('probeable-question-act produces test- prefix', () => { + const id = generateTaskId('probeable-question-act'); + expect(id).toMatch(/^test-[1-9][0-9]{9}$/); + }); + test('produces unique values', () => { const ids = Array.from({ length: 100 }, () => generateTaskId('multiple-choice'), @@ -29,6 +34,7 @@ describe('isTaskId', () => { expect(isTaskId('multiple-choice-1234567890')).toBe(true); expect(isTaskId('code-output-compare-9876543210')).toBe(true); expect(isTaskId('fill-in-the-blanks-1029384756')).toBe(true); + expect(isTaskId('test-2438580832')).toBe(true); }); test('rejects leading-zero first digit', () => { @@ -47,15 +53,13 @@ describe('isTaskId', () => { describe('parseTaskIdType', () => { test('extracts the assessment type from a valid id', () => { - expect(parseTaskIdType('multiple-choice-1234567890')).toBe( - 'multiple-choice', - ); - expect(parseTaskIdType('code-output-compare-1234567890')).toBe( - 'code-output-compare', - ); - expect(parseTaskIdType('fill-in-the-blanks-1234567890')).toBe( - 'fill-in-the-blanks', - ); + expect(parseTaskIdType('multiple-choice-1234567890')).toBe('multiple-choice'); + expect(parseTaskIdType('code-output-compare-1234567890')).toBe('code-output-compare'); + expect(parseTaskIdType('fill-in-the-blanks-1234567890')).toBe('fill-in-the-blanks'); + }); + + test('reverse-maps test- prefix to probeable-question-act', () => { + expect(parseTaskIdType('test-2438580832')).toBe('probeable-question-act'); }); test('returns null on malformed input', () => { diff --git a/src/utils/taskid.ts b/src/utils/taskid.ts index 2b728d3..550b6bc 100644 --- a/src/utils/taskid.ts +++ b/src/utils/taskid.ts @@ -5,7 +5,8 @@ export type AssessmentType = | 'code-output-compare' | 'fill-in-the-blanks' | 'parsons-puzzle' - | 'llm-based-auto-rubric'; + | 'llm-based-auto-rubric' + | 'probeable-question-act'; export const ASSESSMENT_TYPES: AssessmentType[] = [ 'multiple-choice', @@ -13,21 +14,36 @@ export const ASSESSMENT_TYPES: AssessmentType[] = [ 'fill-in-the-blanks', 'parsons-puzzle', 'llm-based-auto-rubric', + 'probeable-question-act', ]; -const TASKID_RE = new RegExp(`^(${ASSESSMENT_TYPES.join('|')})-[1-9][0-9]{9}$`); +// Types whose taskId prefix differs from the type name. +// probeable-question-act uses Codio's native 'test' prefix. +const TASKID_PREFIX: Partial> = { + 'probeable-question-act': 'test', +}; -// Type-agnostic regex used by validator to detect *any* type prefix. -// Updating ASSESSMENT_TYPES extends both generators and detection. +// Reverse map: taskId prefix → AssessmentType (for types with custom prefixes). +const PREFIX_TO_TYPE: Partial> = { + test: 'probeable-question-act', +}; + +// All unique taskId prefixes (type name or custom override). +const ALL_PREFIXES = [...new Set(ASSESSMENT_TYPES.map((t) => TASKID_PREFIX[t] ?? t))]; + +const TASKID_RE = new RegExp(`^(${ALL_PREFIXES.join('|')})-[1-9][0-9]{9}$`); + +// Type-agnostic regex used by the validator to detect any assessment embed in markdown. export const ANY_TASKID_RE = new RegExp( - `(?:${ASSESSMENT_TYPES.join('|')})-[1-9][0-9]{9}`, + `(?:${ALL_PREFIXES.join('|')})-[1-9][0-9]{9}`, 'g', ); export function generateTaskId(type: AssessmentType): string { + const prefix = TASKID_PREFIX[type] ?? type; const first = randomInt(1, 10); const rest = randomInt(0, 1_000_000_000).toString().padStart(9, '0'); - return `${type}-${first}${rest}`; + return `${prefix}-${first}${rest}`; } export function isTaskId(value: string): boolean { @@ -37,6 +53,8 @@ export function isTaskId(value: string): boolean { export function parseTaskIdType(value: string): AssessmentType | null { const m = value.match(/^([a-z-]+)-[1-9][0-9]{9}$/); if (!m) return null; - const candidate = m[1] as AssessmentType; + const prefix = m[1] ?? ''; + if (PREFIX_TO_TYPE[prefix]) return PREFIX_TO_TYPE[prefix]!; + const candidate = prefix as AssessmentType; return ASSESSMENT_TYPES.includes(candidate) ? candidate : null; }