From 259e9d006a8064c3fcd448a7c4b17de3c0367640 Mon Sep 17 00:00:00 2001 From: Rhys Sullivan <39114868+RhysSullivan@users.noreply.github.com> Date: Sun, 28 Jun 2026 15:47:15 -0700 Subject: [PATCH] e2e: cross-OS packaged-desktop targets (macOS/Linux/Windows), filmed Run the real electron-builder desktop bundle inside a guest VM on macOS, Linux, and Windows, driven over a CDP tunnel and filmed. One shared scenario (desktop-vm/console-renders.test.ts) and driver (src/vm/desktop.ts); each target lands test.ts + session.mp4 + step screenshots in runs//. Only launch and capture differ per OS: macOS: autologin Aqua session, launchctl asuser, screencapture linux: Xvfb + openbox, xdotool window resize, ffmpeg x11grab windows: dockur (QEMU) interactive session, QEMU screendump macOS and Linux auto-provision a tart guest and build the bundle locally (the executor binary cross-compiles via BUN_TARGET); Windows attaches to a dockur host via E2E_DESKTOP_WIN_* env. Not in the default test chain; skips honestly without a guest, like desktop-packaged skips without a display. Also: - Force tart SSH to password-only (PubkeyAuthentication=no, IdentitiesOnly=yes) so a loaded SSH agent does not exhaust the guest's MaxAuthTries, an intermittent failure the existing cli-{os} lanes also hit. - build-sidecar keys the executable-bit chmod on the build target, not the host, so a windows-target cross-build no longer ENOENTs on a unix executor binary. --- apps/desktop/scripts/build-sidecar.ts | 6 +- e2e/AGENTS.md | 42 +++ e2e/desktop-vm/console-renders.test.ts | 75 ++++++ e2e/setup/desktop-linux.globalsetup.ts | 125 +++++++++ e2e/setup/desktop-macos.globalsetup.ts | 108 ++++++++ e2e/setup/desktop-vm.ts | 91 +++++++ e2e/setup/desktop-windows.globalsetup.ts | 106 ++++++++ e2e/src/vm/desktop.ts | 327 +++++++++++++++++++++++ e2e/src/vm/tart.ts | 18 +- e2e/targets/desktop.ts | 5 +- e2e/targets/registry.ts | 6 + e2e/vitest.config.ts | 20 ++ 12 files changed, 926 insertions(+), 3 deletions(-) create mode 100644 e2e/desktop-vm/console-renders.test.ts create mode 100644 e2e/setup/desktop-linux.globalsetup.ts create mode 100644 e2e/setup/desktop-macos.globalsetup.ts create mode 100644 e2e/setup/desktop-vm.ts create mode 100644 e2e/setup/desktop-windows.globalsetup.ts create mode 100644 e2e/src/vm/desktop.ts diff --git a/apps/desktop/scripts/build-sidecar.ts b/apps/desktop/scripts/build-sidecar.ts index cf248e910..74b56f9e8 100644 --- a/apps/desktop/scripts/build-sidecar.ts +++ b/apps/desktop/scripts/build-sidecar.ts @@ -45,7 +45,11 @@ await rm(EXECUTOR_OUT_DIR, { recursive: true, force: true }); await mkdir(EXECUTOR_OUT_DIR, { recursive: true }); await cp(sourceBinDir, EXECUTOR_OUT_DIR, { recursive: true }); -if (process.platform !== "win32") { +// Restore the unix executable bit — keyed on the TARGET, not the host. A +// windows-target cross-build (BUN_TARGET=bun-windows-x64 on macOS/linux) stages +// `executor.exe`, which needs no bit; chmod'ing a non-existent `executor` there +// would ENOENT. +if (!targetPackage.includes("windows")) { await chmod(join(EXECUTOR_OUT_DIR, "executor"), 0o755); } diff --git a/e2e/AGENTS.md b/e2e/AGENTS.md index a77694871..38555bd19 100644 --- a/e2e/AGENTS.md +++ b/e2e/AGENTS.md @@ -130,6 +130,48 @@ When handing results to the user, follow the evidence contract in the root [AGENTS.md](../AGENTS.md) (direct run links + a live instance + what to try); [RUNNING.md](../RUNNING.md) has the current sharing/demo mechanics. +## Desktop targets (the app on real OSes, filmed) + +The packaged desktop app runs as its own targets, each landing in its own +`runs//` bucket with a video. One shared scenario (`desktop-vm/`) and the +shared driver (`src/vm/desktop.ts`) + setup plumbing (`setup/desktop-vm.ts`); one +project + globalsetup per guest OS. + +- **`desktop-packaged`** — the real electron-builder bundle on THIS machine's + display (the supervised-daemon attach path). Needs a logged-in GUI session. +- **`desktop-macos` / `desktop-linux`** — the same bundle inside a guest VM, + driven over CDP from the host and filmed. The globalsetup boots the guest + (tart), builds + pushes the bundle, brings the app up with + `--remote-debugging-port`, forwards it, and the scenario connects + drives + + records. Provisioned automatically — or attach to a running guest with + `E2E_DESKTOP_VM_IP=`: + + ```sh + vitest run --project desktop-macos # or desktop-linux + ``` + +The guests run tart `--no-graphics` (no host window, never steals focus) but +still have a usable display: + +- **macOS**: the base image's autologin reaches a real Aqua session + (WindowServer/Dock/Finder). Launch the app INTO it with `sudo launchctl asuser + …` (a plain SSH spawn lands in a non-GUI session); the unsigned arm64 + bundle is ad-hoc `codesign`'d in the guest; `screencapture` films it. +- **linux**: no window server, so the app renders into an `Xvfb` display with a + minimal WM (`openbox` — without it the electron window never maps); the window + maps tiny (10x10) so the globalsetup `xdotool`-resizes it to fill, and ffmpeg + `x11grab` films it. `--no-sandbox` (the chrome-sandbox needs setuid root). + +Base images (`admin`/`admin`): `executor-macos-base` (cirruslabs sequoia, autologin) +and `executor-linux-base` (cirruslabs ubuntu + Xvfb/ffmpeg/openbox/xdotool + +electron runtime libs). The bundle's `executor` binary is cross-compiled for the +guest (`BUN_TARGET`), and electron-builder's `dir` target assembles the unpacked +app on macOS — so both bundles build on this Mac. + +Note: `desktop-packaged`'s `guiAvailable()` probe (`launchctl managername`) reads +"Background" over SSH even when Aqua is up, so it's host-only; the VM targets gate +on a CDP page target instead. + ## Discovering endpoints - The full OpenAPI spec: `curl http://127.0.0.1:/api/openapi.json` diff --git a/e2e/desktop-vm/console-renders.test.ts b/e2e/desktop-vm/console-renders.test.ts new file mode 100644 index 000000000..c24658784 --- /dev/null +++ b/e2e/desktop-vm/console-renders.test.ts @@ -0,0 +1,75 @@ +// The PACKAGED desktop app, on camera, inside a GUI guest — driven over CDP from +// the host. ONE scenario shared by every desktop- project (desktop-macos, +// desktop-linux): the same bundle and CDP driver, proving it renders on a guest +// OS and filming the actual console. The desktop- globalsetup boots the +// guest, launches the app, forwards its --remote-debugging-port (E2E_DESKTOP_CDP_PORT) +// and publishes the guest IP; this scenario connects, drives, and records. The +// run lands in runs// (its own per-OS bucket). Without a guest it skips +// honestly, like desktop-packaged without a display. +import { writeFileSync } from "node:fs"; +import { join } from "node:path"; + +import { expect, it } from "@effect/vitest"; +import { Effect } from "effect"; + +import { scenario } from "../src/scenario"; +import { RunDir } from "../src/services"; +import { CdpPage, pageWsUrl, recordGuestScreen } from "../src/vm/desktop"; + +const NAME = "Desktop (packaged, in a VM) · the bundle renders its console"; +const cdpPort = process.env.E2E_DESKTOP_CDP_PORT; +const guestIp = process.env.E2E_DESKTOP_VM_IP; +const recSeconds = Number(process.env.E2E_DESKTOP_REC_SECONDS ?? "12"); +const os: "macos" | "linux" | "windows" = + process.env.E2E_TARGET === "desktop-windows" + ? "windows" + : process.env.E2E_TARGET === "desktop-linux" + ? "linux" + : "macos"; + +const run = async (runDir: string) => { + const cdp = await CdpPage.connect(await pageWsUrl(Number(cdpPort))); + try { + await cdp.command("Runtime.enable"); + await cdp.command("Page.enable"); + + // Film the console while we drive it (OS-aware capture lands a playable mp4). + const recording = recordGuestScreen( + guestIp as string, + recSeconds, + join(runDir, "session.mp4"), + os, + ); + + // Reaching the nav proves the packaged bundle booted and connected to its + // daemon on this OS. + await cdp.waitForText("Integrations", 60_000).catch(() => cdp.waitForText("Settings", 60_000)); + writeFileSync(join(runDir, "01-console-rendered.png"), await cdp.screenshot()); + + const body = await cdp.command<{ result?: { value?: string } }>("Runtime.evaluate", { + expression: "document.body.innerText", + returnByValue: true, + }); + expect(body.result?.value ?? "", "the packaged console rendered its nav").toContain( + "Integrations", + ); + + await recording; + } finally { + cdp.close(); + } +}; + +if (!cdpPort || !guestIp) { + it.skip(`${NAME} (needs a desktop guest — set E2E_DESKTOP_VM_IP or run the desktop- project)`, () => {}); +} else { + // Literal name (not NAME) so the run's test.ts review artifact captures it. + scenario( + "Desktop (packaged, in a VM) · the bundle renders its console", + { timeout: 180_000 }, + Effect.gen(function* () { + const runDir = yield* RunDir; + yield* Effect.promise(() => run(runDir)); + }), + ); +} diff --git a/e2e/setup/desktop-linux.globalsetup.ts b/e2e/setup/desktop-linux.globalsetup.ts new file mode 100644 index 000000000..46d535a1c --- /dev/null +++ b/e2e/setup/desktop-linux.globalsetup.ts @@ -0,0 +1,125 @@ +// desktop-linux: bring the PACKAGED app up inside a Linux guest and forward its +// CDP port (the shared attach/forward lives in ./desktop-vm). No window server, +// so the app renders into an Xvfb virtual display; ffmpeg x11grab (in the +// scenario's recorder) films that display. Simpler than macOS: no Aqua, no +// codesign, no launchctl — just background processes with DISPLAY set and +// --no-sandbox (the chrome-sandbox needs setuid root, pointless on a throwaway +// guest). The base image (executor-linux-base) carries Xvfb + ffmpeg + the +// electron runtime libs. +import { execFileSync } from "node:child_process"; +import { existsSync } from "node:fs"; +import { fileURLToPath } from "node:url"; +import { basename, join } from "node:path"; + +import { pushDirAsTar } from "../src/vm/desktop"; +import { tartVm } from "../src/vm/tart"; +import { + attachOrProvision, + CDP_GUEST_PORT, + waitGuestHttp, + waitGuestPageTarget, + type ProvisionedGuest, +} from "./desktop-vm"; + +const DAEMON_PORT = 4789; +const GUEST_DIR = "/home/admin/exe"; +const GUEST_HOME = "/home/admin/exe-home"; +const DISPLAY = ":99"; + +const appDir = fileURLToPath(new URL("../../apps/desktop/", import.meta.url)); +const hostBundle = () => { + // electron-builder names the dir `linux--unpacked` for non-x64. + const dir = join(appDir, "dist", "linux-arm64-unpacked"); + return { + dir, + exe: join(dir, "executor-desktop"), + executor: join(dir, "resources/executor/executor"), + }; +}; + +/** Build the packaged linux-arm64 bundle if it isn't on disk. The `executor` + * binary is cross-compiled here via BUN_TARGET (same as the cli-linux lane); + * electron-builder's `dir` target assembles the unpacked app on macOS without + * Docker. */ +const ensureBundle = (): void => { + if (existsSync(hostBundle().dir)) return; + const run = (cmd: string, args: string[], env: Record = {}) => + execFileSync(cmd, args, { cwd: appDir, stdio: "inherit", env: { ...process.env, ...env } }); + run("bun", ["./scripts/build-sidecar.ts"], { BUN_TARGET: "bun-linux-arm64" }); + run("bunx", ["--bun", "electron-vite", "build"]); + run( + "bunx", + [ + "--bun", + "electron-builder", + "--config", + "electron-builder.e2e.config.ts", + "--linux", + "--arm64", + ], + { CSC_IDENTITY_AUTO_DISCOVERY: "false" }, + ); +}; + +const provisionLinux = async (): Promise => { + ensureBundle(); + const { dir } = hostBundle(); + const vm = await tartVm("linux", "arm64").provision(); + try { + await vm.ssh(`rm -rf ${GUEST_DIR} ${GUEST_HOME}; mkdir -p ${GUEST_HOME}/.executor`); + await pushDirAsTar(vm.host, dir, GUEST_DIR); + + const guestApp = `${GUEST_DIR}/${basename(dir)}`; + const guestExe = `${guestApp}/executor-desktop`; + const guestExecutor = `${guestApp}/resources/executor/executor`; + await vm.ssh(`chmod +x '${guestExe}' '${guestExecutor}' 2>/dev/null || true`); + const env = `HOME=${GUEST_HOME} EXECUTOR_DATA_DIR=${GUEST_HOME}/.executor`; + + // A virtual display + a minimal WM (openbox) — without a window manager the + // electron window doesn't map onto the framebuffer that x11grab records. + await vm.ssh( + `pkill Xvfb 2>/dev/null; pkill openbox 2>/dev/null; ` + + `nohup Xvfb ${DISPLAY} -screen 0 1280x800x24 >/tmp/xvfb.log 2>&1 & sleep 2; ` + + `DISPLAY=${DISPLAY} nohup openbox >/tmp/openbox.log 2>&1 & sleep 1; echo up`, + ); + + // 1) the bundled daemon, supervised — the app attaches to this. + await vm.ssh( + `nohup env ${env} EXECUTOR_SUPERVISED=1 EXECUTOR_AUTH_TOKEN=desktop-linux-e2e EXECUTOR_CLIENT=desktop ` + + `'${guestExecutor}' daemon run --foreground --port ${DAEMON_PORT} --hostname 127.0.0.1 ` + + `>/tmp/executor-daemon.log 2>&1 &`, + ); + if (!(await waitGuestHttp(vm, `http://127.0.0.1:${DAEMON_PORT}/`))) { + throw new Error( + "supervised daemon never came up in the guest (see /tmp/executor-daemon.log)", + ); + } + + // 2) the packaged app on the virtual display, with CDP enabled. + await vm.ssh( + `nohup env ${env} DISPLAY=${DISPLAY} '${guestExe}' --no-sandbox ` + + `--remote-debugging-port=${CDP_GUEST_PORT} --remote-allow-origins='*' ` + + `>/tmp/executor-app.log 2>&1 &`, + ); + if (!(await waitGuestPageTarget(vm, CDP_GUEST_PORT))) { + const log = (await vm.ssh("tail -40 /tmp/executor-app.log 2>/dev/null").catch(() => null)) + ?.stdout; + throw new Error(`the app's CDP page target never appeared:\n${log ?? "(no app log)"}`); + } + + // The electron window maps tiny (10x10) under Xvfb; size it to the screen so + // the x11grab recording captures the full console (CDP screenshots the + // renderer surface regardless, but the film grabs the X framebuffer). + await vm.ssh( + `WID=$(DISPLAY=${DISPLAY} xdotool search --name executor-desktop | head -1); ` + + `[ -n "$WID" ] && DISPLAY=${DISPLAY} xdotool windowsize "$WID" 1280 800 windowmove "$WID" 0 0 || true`, + ); + + return { ip: vm.host, teardown: async () => void (await vm.discard()) }; + } catch (error) { + await vm.discard(); + throw error; + } +}; + +export default (): Promise<(() => Promise) | void> => attachOrProvision(provisionLinux); diff --git a/e2e/setup/desktop-macos.globalsetup.ts b/e2e/setup/desktop-macos.globalsetup.ts new file mode 100644 index 000000000..b26707ca2 --- /dev/null +++ b/e2e/setup/desktop-macos.globalsetup.ts @@ -0,0 +1,108 @@ +// desktop-macos: bring the PACKAGED app up inside a macOS GUI guest and forward +// its CDP port (the shared attach/forward lives in ./desktop-vm). The guest runs +// tart `--no-graphics` (no host window) but the base image's autologin still +// reaches a real Aqua session, so the GUI renders and `screencapture` films it. +// We come up the SAME way desktop-packaged does — start the bundled daemon, then +// launch the app so it ATTACHES (no sidecar spawn → no first-run consent modal). +// The app must be launched INTO the Aqua session (`launchctl asuser`); a plain +// SSH spawn lands in a non-GUI session. +import { execFileSync } from "node:child_process"; +import { existsSync } from "node:fs"; +import { fileURLToPath } from "node:url"; +import { join } from "node:path"; + +import { pushDirAsTar } from "../src/vm/desktop"; +import { tartVm } from "../src/vm/tart"; +import { + attachOrProvision, + CDP_GUEST_PORT, + waitGuestHttp, + waitGuestPageTarget, + type ProvisionedGuest, +} from "./desktop-vm"; + +const DAEMON_PORT = 4789; +const GUEST_DIR = "/Users/admin/exe"; +const GUEST_HOME = "/Users/admin/exe-home"; + +const appDir = fileURLToPath(new URL("../../apps/desktop/", import.meta.url)); +const hostBundle = () => { + const app = join(appDir, "dist", "mac-arm64", "Executor.app"); + return { + app, + exe: join(app, "Contents/MacOS/Executor"), + executor: join(app, "Contents/Resources/executor/executor"), + }; +}; + +/** Build the packaged mac bundle if it isn't on disk (slow; reuse an existing + * dist/ while iterating). Mirrors desktop-packaged.globalsetup. */ +const ensureBundle = (): void => { + if (existsSync(hostBundle().app)) return; + const run = (cmd: string, args: string[]) => + execFileSync(cmd, args, { cwd: appDir, stdio: "inherit", env: { ...process.env } }); + run("bun", ["./scripts/build-sidecar.ts"]); + run("bunx", ["--bun", "electron-vite", "build"]); + execFileSync( + "bunx", + ["--bun", "electron-builder", "--config", "electron-builder.e2e.config.ts", "--mac"], + { + cwd: appDir, + stdio: "inherit", + env: { ...process.env, CSC_IDENTITY_AUTO_DISCOVERY: "false" }, + }, + ); +}; + +const provisionMac = async (): Promise => { + ensureBundle(); + const { exe, executor } = hostBundle(); + const vm = await tartVm("macos", "arm64").provision(); + try { + // Push the bundle (tar-stream, robust over the just-booted link) and clear + // the scp quarantine so it can run. + await vm.ssh(`rm -rf ${GUEST_DIR} ${GUEST_HOME} && mkdir -p ${GUEST_HOME}/.executor`); + await pushDirAsTar(vm.host, hostBundle().app, GUEST_DIR); + await vm.ssh(`xattr -dr com.apple.quarantine ${GUEST_DIR} 2>/dev/null || true`); + // The e2e build is unsigned; an arm64 app needs at least an ad-hoc signature + // to execute, and the host build's signature isn't trusted on another Mac. + await vm.ssh( + `codesign --force --deep --sign - ${GUEST_DIR}/Executor.app 2>&1 | tail -2 || true`, + ); + + const guestExe = `${GUEST_DIR}/Executor.app/${exe.split("/Executor.app/")[1]}`; + const guestExecutor = `${GUEST_DIR}/Executor.app/${executor.split("/Executor.app/")[1]}`; + const env = `HOME=${GUEST_HOME} EXECUTOR_DATA_DIR=${GUEST_HOME}/.executor`; + + // 1) the bundled daemon, supervised — the app attaches to this. + await vm.ssh( + `nohup env ${env} EXECUTOR_SUPERVISED=1 EXECUTOR_AUTH_TOKEN=desktop-macos-e2e EXECUTOR_CLIENT=desktop ` + + `'${guestExecutor}' daemon run --foreground --port ${DAEMON_PORT} --hostname 127.0.0.1 ` + + `>/tmp/executor-daemon.log 2>&1 &`, + ); + if (!(await waitGuestHttp(vm, `http://127.0.0.1:${DAEMON_PORT}/`))) { + throw new Error( + "supervised daemon never came up in the guest (see /tmp/executor-daemon.log)", + ); + } + + // 2) the packaged app, launched INTO the Aqua session with CDP enabled. + await vm.ssh( + `U=$(id -u); sudo launchctl asuser $U bash -lc ` + + `'nohup env HOME=${GUEST_HOME} "${guestExe}" --remote-debugging-port=${CDP_GUEST_PORT} --remote-allow-origins="*" ` + + `>/tmp/executor-app.log 2>&1 &'`, + ); + if (!(await waitGuestPageTarget(vm, CDP_GUEST_PORT))) { + const log = (await vm.ssh("tail -40 /tmp/executor-app.log 2>/dev/null").catch(() => null)) + ?.stdout; + throw new Error(`the app's CDP page target never appeared:\n${log ?? "(no app log)"}`); + } + + return { ip: vm.host, teardown: async () => void (await vm.discard()) }; + } catch (error) { + await vm.discard(); + throw error; + } +}; + +export default (): Promise<(() => Promise) | void> => attachOrProvision(provisionMac); diff --git a/e2e/setup/desktop-vm.ts b/e2e/setup/desktop-vm.ts new file mode 100644 index 000000000..c15f15864 --- /dev/null +++ b/e2e/setup/desktop-vm.ts @@ -0,0 +1,91 @@ +// Shared plumbing for the desktop- globalsetups. Each OS setup supplies a +// `provision` that boots its guest and brings the packaged app up with +// --remote-debugging-port; this module handles the rest the same everywhere: +// attach to an already-running guest (E2E_DESKTOP_VM_IP) or provision a fresh +// one, then forward the guest's CDP port and publish it for the scenario. +import { guestTunnel } from "../src/vm/desktop"; +import type { VmHandle } from "../src/vm/types"; + +export const CDP_GUEST_PORT = 9222; + +const sleep = (ms: number): Promise => new Promise((r) => setTimeout(r, ms)); + +/** Poll until an HTTP endpoint inside the guest answers (any status — a 401 from + * the bearer-gated daemon still means "up"). HTTP, not lsof: the app may be + * owned by root (launchctl asuser), whose listening socket an unprivileged lsof + * can't see — a loopback HTTP probe works regardless of owner. */ +export const waitGuestHttp = async (vm: VmHandle, url: string, attempts = 60): Promise => { + for (let i = 0; i < attempts; i++) { + const r = await vm.ssh( + `curl -s -o /dev/null -w '%{http_code}' --max-time 5 ${url} 2>/dev/null || echo 000`, + ); + const code = r.stdout.trim().slice(-3); + if (code !== "000" && code !== "") return true; + await sleep(2000); + } + return false; +}; + +/** Poll until CDP advertises a real PAGE target — i.e. the app's window/renderer + * is up, not just the browser endpoint. On a cold guest the page appears a good + * bit after the port opens, so gating on this makes the scenario deterministic. */ +export const waitGuestPageTarget = async ( + vm: VmHandle, + port: number, + attempts = 60, +): Promise => { + for (let i = 0; i < attempts; i++) { + const r = await vm.ssh( + `curl -s --max-time 5 http://127.0.0.1:${port}/json/list 2>/dev/null | grep -c '"type": "page"' || echo 0`, + ); + if (Number(r.stdout.trim() || "0") > 0) return true; + await sleep(2000); + } + return false; +}; + +export interface ProvisionedGuest { + readonly ip: string; + readonly teardown: () => Promise; +} + +/** + * The body every desktop-.globalsetup returns: attach to E2E_DESKTOP_VM_IP + * if set, else provision a fresh guest; then forward the guest's CDP port and + * publish it (+ the guest IP, for filming) for the worker. A provision/forward + * failure never fails the run — the scenario skips honestly, like + * desktop-packaged without a display. + */ +export const attachOrProvision = async ( + provision: () => Promise, +): Promise<(() => Promise) | void> => { + let ip = process.env.E2E_DESKTOP_VM_IP; + let teardownVm: (() => Promise) | undefined; + + if (!ip) { + // oxlint-disable-next-line executor/no-try-catch-or-throw -- boundary: VM/host setup may fail; degrade to a skip + try { + const result = await provision(); + ip = result.ip; + teardownVm = result.teardown; + } catch (error) { + console.warn(`[desktop] provision failed, scenario will skip: ${String(error)}`); + return; + } + } + + // oxlint-disable-next-line executor/no-try-catch-or-throw -- boundary: forwarding may fail; degrade to a skip + try { + const forward = await guestTunnel(ip, CDP_GUEST_PORT); + process.env.E2E_DESKTOP_CDP_PORT = String(forward.localPort); + process.env.E2E_DESKTOP_VM_IP = ip; + return async () => { + forward.close(); + await teardownVm?.(); + }; + } catch (error) { + console.warn(`[desktop] could not forward CDP from ${ip}: ${String(error)}`); + await teardownVm?.(); + return; + } +}; diff --git a/e2e/setup/desktop-windows.globalsetup.ts b/e2e/setup/desktop-windows.globalsetup.ts new file mode 100644 index 000000000..814df0871 --- /dev/null +++ b/e2e/setup/desktop-windows.globalsetup.ts @@ -0,0 +1,106 @@ +// desktop-windows: drive the PACKAGED app running in a Windows guest over CDP. +// Windows-in-a-VM works best with dockur (QEMU on a Linux/KVM host): autologin +// gives a real interactive session the app renders into, and QEMU `screendump` +// films the framebuffer directly — sidestepping the session-0 problem that +// defeats SSH-driven screenshots (the prior proof of this path). +// +// Unlike the tart targets this ATTACHES to a long-lived Windows host (the dockur +// guest stays up between runs, like a shared selfhost): it forwards the guest's +// --remote-debugging-port to the host over an SSH jump and publishes it. The +// shared scenario drives; the windows recorder (src/vm/desktop.ts) films via +// screendump. Without a reachable app it skips honestly. All connection details +// come from env (no baked-in host): +// E2E_DESKTOP_WIN_HOST (ssh alias of the docker/KVM host to jump through), +// _SSH_PORT (the guest's mapped OpenSSH port), _KEY, _USER; the recorder also +// reads _CONTAINER and _STORAGE. +import { spawn } from "node:child_process"; +import net from "node:net"; + +const SSH_PORT = process.env.E2E_DESKTOP_WIN_SSH_PORT ?? "2222"; +const KEY = process.env.E2E_DESKTOP_WIN_KEY ?? "/tmp/winkey"; +const USER = process.env.E2E_DESKTOP_WIN_USER ?? "Administrator"; +const CDP_GUEST_PORT = 9222; + +const sleep = (ms: number): Promise => new Promise((r) => setTimeout(r, ms)); + +const freePort = (): Promise => + new Promise((resolve, reject) => { + const srv = net.createServer(); + srv.on("error", reject); + srv.listen(0, "127.0.0.1", () => { + const port = (srv.address() as net.AddressInfo).port; + srv.close(() => resolve(port)); + }); + }); + +interface CdpTarget { + readonly type: string; + readonly webSocketDebuggerUrl?: string; +} + +/** Poll the forwarded port until the app advertises a CDP page target. */ +const pageReady = async (port: number, attempts = 30): Promise => { + for (let i = 0; i < attempts; i++) { + const targets = (await fetch(`http://127.0.0.1:${port}/json/list`) + .then((r) => (r.ok ? r.json() : [])) + .catch(() => [])) as ReadonlyArray; + if (targets.some((t) => t.type === "page" && t.webSocketDebuggerUrl)) return true; + await sleep(2000); + } + return false; +}; + +export default async function setup(): Promise<(() => Promise) | void> { + const host = process.env.E2E_DESKTOP_WIN_HOST; + if (!host) { + console.warn( + "[desktop-windows] E2E_DESKTOP_WIN_HOST not set; scenario will skip. Point it at the ssh " + + "alias of a dockur/KVM Windows host running the packaged app with --remote-debugging-port.", + ); + return; + } + const localPort = await freePort(); + // mac:localPort → (jump host) → guest:9222. -p is the guest's mapped OpenSSH + // port on the host; the final hop into Windows carries the -L forward. + const tunnel = spawn( + "ssh", + [ + "-o", + "StrictHostKeyChecking=no", + "-o", + "UserKnownHostsFile=/dev/null", + "-o", + "ConnectTimeout=12", + "-o", + "ServerAliveInterval=15", + "-J", + host, + "-p", + SSH_PORT, + "-i", + KEY, + "-L", + `${localPort}:127.0.0.1:${CDP_GUEST_PORT}`, + "-N", + `${USER}@127.0.0.1`, + ], + { stdio: "ignore" }, + ); + + if (!(await pageReady(localPort))) { + tunnel.kill(); + console.warn( + `[desktop-windows] no app/CDP reachable on the Windows host (${host}); scenario will skip. ` + + `Bring up the packaged app with --remote-debugging-port=${CDP_GUEST_PORT} in the dockur guest.`, + ); + return; + } + + process.env.E2E_DESKTOP_CDP_PORT = String(localPort); + // Non-empty so the scenario runs; the windows recorder uses E2E_DESKTOP_WIN_*. + process.env.E2E_DESKTOP_VM_IP = host; + + return async () => { + tunnel.kill(); + }; +} diff --git a/e2e/src/vm/desktop.ts b/e2e/src/vm/desktop.ts new file mode 100644 index 000000000..e04866b0a --- /dev/null +++ b/e2e/src/vm/desktop.ts @@ -0,0 +1,327 @@ +// Driving the PACKAGED desktop app inside a GUI guest, from the host. This is +// the shared substrate for the cross-OS desktop targets (Gap A): SSH plumbing, +// an SSH local-forward, a minimal CDP page client, and screen recording — the +// pieces proven against a tart macOS guest. The desktop- globalsetup boots +// the guest and launches the app; a scenario connects over CDP and records. +// +// Why these mechanics (macOS): a tart `--no-graphics` guest opens no host window +// (no focus stealing) yet, with the base image's autologin, still reaches a real +// Aqua session (WindowServer/Dock/Finder) the app can render into. A GUI app must +// be launched INTO that session (`sudo launchctl asuser …`); a plain SSH +// spawn lands in a non-GUI session. The app's --remote-debugging-port is then +// reachable over an SSH forward, and `screencapture` films the console. +import { execFile, spawn } from "node:child_process"; +import net from "node:net"; +import { basename, dirname } from "node:path"; +import { promisify } from "node:util"; + +const execFileP = promisify(execFile); + +const SSHPASS = process.env.E2E_SSHPASS_BIN ?? "/opt/homebrew/bin/sshpass"; +const GUEST_PASS = process.env.E2E_DESKTOP_VM_PASS ?? "admin"; +const GUEST_USER = process.env.E2E_DESKTOP_VM_USER ?? "admin"; +const SSH_OPTS = [ + "-o", + "StrictHostKeyChecking=no", + "-o", + "UserKnownHostsFile=/dev/null", + "-o", + "ConnectTimeout=8", + "-o", + "LogLevel=ERROR", + // Password auth only (sshpass): a loaded SSH agent's keys would otherwise + // exhaust the guest's MaxAuthTries before the password is tried. + "-o", + "PubkeyAuthentication=no", + "-o", + "IdentitiesOnly=yes", +]; + +export const sleep = (ms: number): Promise => + new Promise((resolve) => setTimeout(resolve, ms)); + +export const guestSsh = ( + ip: string, + command: string, +): Promise<{ stdout: string; stderr: string }> => + execFileP(SSHPASS, ["-p", GUEST_PASS, "ssh", ...SSH_OPTS, `${GUEST_USER}@${ip}`, command], { + maxBuffer: 64 * 1024 * 1024, + }); + +export const guestScpFrom = (ip: string, remote: string, local: string): Promise => + execFileP(SSHPASS, [ + "-p", + GUEST_PASS, + "scp", + ...SSH_OPTS, + `${GUEST_USER}@${ip}:${remote}`, + local, + ]); + +/** + * Push a directory into the guest by streaming a tar over ssh: one connection, + * no per-file round-trips, and the flowing data keeps the link alive — far more + * robust than `scp -r` of a big app bundle (thousands of files + symlinks), + * which drops mid-transfer on a freshly-booted guest. Retries once. The dir + * lands at `${remoteParent}/${basename(localDir)}`. + */ +export const pushDirAsTar = async ( + ip: string, + localDir: string, + remoteParent: string, +): Promise => { + const parent = dirname(localDir); + const base = basename(localDir); + const remote = `${SSHPASS} -p ${GUEST_PASS} ssh ${SSH_OPTS.join(" ")} ${GUEST_USER}@${ip} ${JSON.stringify( + `mkdir -p ${remoteParent} && tar xf - -C ${remoteParent}`, + )}`; + const pipeline = `tar cf - -C ${JSON.stringify(parent)} ${JSON.stringify(base)} | ${remote}`; + // oxlint-disable-next-line executor/no-try-catch-or-throw -- boundary: one retry over a flaky just-booted guest link + try { + await execFileP("sh", ["-c", pipeline], { maxBuffer: 16 * 1024 * 1024 }); + } catch { + await sleep(3000); + await execFileP("sh", ["-c", pipeline], { maxBuffer: 16 * 1024 * 1024 }); + } +}; + +const freePort = (): Promise => + new Promise((resolve, reject) => { + const srv = net.createServer(); + srv.on("error", reject); + srv.listen(0, "127.0.0.1", () => { + const port = (srv.address() as net.AddressInfo).port; + srv.close(() => resolve(port)); + }); + }); + +export interface Forward { + readonly localPort: number; + close(): void; +} + +/** SSH local-forward host:localPort → guest:guestPort; resolves once it binds. */ +export const guestTunnel = async (ip: string, guestPort: number): Promise => { + const localPort = await freePort(); + const child = spawn( + SSHPASS, + [ + "-p", + GUEST_PASS, + "ssh", + ...SSH_OPTS, + "-N", + "-L", + `${localPort}:127.0.0.1:${guestPort}`, + `${GUEST_USER}@${ip}`, + ], + { stdio: "ignore" }, + ); + for (let i = 0; i < 40; i++) { + const ok = await new Promise((resolve) => { + const sock = net.connect({ host: "127.0.0.1", port: localPort }, () => { + sock.destroy(); + resolve(true); + }); + sock.on("error", () => resolve(false)); + sock.setTimeout(1000, () => { + sock.destroy(); + resolve(false); + }); + }); + if (ok) break; + await sleep(500); + } + return { localPort, close: () => child.kill() }; +}; + +const guestFileSize = (ip: string, remote: string): Promise => + guestSsh(ip, `stat -f%z ${remote} 2>/dev/null || stat -c%s ${remote} 2>/dev/null || echo 0`) + .then((r) => Number(r.stdout.trim() || "0")) + .catch(() => 0); + +/** + * Film the guest's screen for `seconds` and land it on the host as `localMp4` + * (mp4, plays everywhere). OS-aware capture: + * • macOS — `screencapture -V` to a .mov, then host-side ffmpeg to mp4. The + * first capture after a cold display can silently no-op, so warm it with a + * throwaway still and verify+retry. + * • linux — ffmpeg `x11grab` of the Xvfb display straight to mp4. + * Best-effort: failures never throw — "every run is watchable" wants the video, + * but a missing one shouldn't fail the run. Run it concurrently with the drive. + */ +export const recordGuestScreen = async ( + ip: string, + seconds: number, + localMp4: string, + os: "macos" | "linux" | "windows", +): Promise => { + if (os === "windows") { + // Windows can't screenshot the interactive desktop from an SSH session, so + // we film the VM framebuffer directly via QEMU's `screendump` (the dockur + // host runs the loop + ffmpeg; we pull the mp4). Host/container/storage come + // from env (no baked-in host); best-effort, so skip filming if unconfigured. + const host = process.env.E2E_DESKTOP_WIN_HOST; + const storage = process.env.E2E_DESKTOP_WIN_STORAGE; + if (!host || !storage) return; + const container = process.env.E2E_DESKTOP_WIN_CONTAINER ?? "exec-win"; + const frames = Math.max(8, seconds * 4); + const py = `import socket,time +s=socket.socket(socket.AF_UNIX); s.connect("/run/shm/monitor.sock"); time.sleep(0.2); s.recv(65536) +for i in range(${frames}): + s.sendall(("screendump /storage/frames/f%03d.ppm\\n"%i).encode()); time.sleep(0.2) + try: s.recv(65536) + except Exception: pass`; + const b64 = Buffer.from(py).toString("base64"); + const remote = + `S=${storage}; rm -rf "$S/frames"; mkdir -p "$S/frames"; ` + + `docker exec ${container} python3 -c "import base64;exec(base64.b64decode('${b64}'))"; ` + + `ffmpeg -y -framerate 4 -i "$S/frames/f%03d.ppm" -pix_fmt yuv420p -movflags +faststart "$S/win.mp4" >/dev/null 2>&1`; + await execFileP("ssh", ["-o", "ConnectTimeout=10", host, remote], { + maxBuffer: 16 * 1024 * 1024, + }).catch(() => undefined); + await execFileP("scp", [ + "-o", + "ConnectTimeout=10", + `${host}:${storage}/win.mp4`, + localMp4, + ]).catch(() => undefined); + return; + } + + if (os === "linux") { + const remote = "/tmp/executor-desktop-vm.mp4"; + await guestSsh( + ip, + `rm -f ${remote}; DISPLAY=:99 ffmpeg -y -f x11grab -video_size 1280x800 -framerate 15 ` + + `-i :99 -t ${seconds} -pix_fmt yuv420p ${remote} >/tmp/ffmpeg.log 2>&1`, + ).catch(() => undefined); + // The mostly-flat console compresses small under x264 — a real capture is + // ~30-60KB, a blank/failed one only a few KB. + if ((await guestFileSize(ip, remote)) > 12_000) { + await guestScpFrom(ip, remote, localMp4).catch(() => undefined); + } + return; + } + + const remoteMov = "/tmp/executor-desktop-vm.mov"; + // Warm the capture subsystem — the first screencapture after the display comes + // up can produce nothing. + await guestSsh(ip, "screencapture -x /tmp/.warm.png 2>/dev/null; rm -f /tmp/.warm.png").catch( + () => undefined, + ); + for (let attempt = 0; attempt < 2; attempt++) { + await guestSsh(ip, `rm -f ${remoteMov}; screencapture -V ${seconds} -x ${remoteMov}`).catch( + () => undefined, + ); + if ((await guestFileSize(ip, remoteMov)) > 100_000) { + const localMov = `${localMp4}.mov`; + await guestScpFrom(ip, remoteMov, localMov).catch(() => undefined); + await execFileP("ffmpeg", [ + "-y", + "-i", + localMov, + "-c:v", + "libx264", + "-pix_fmt", + "yuv420p", + "-movflags", + "+faststart", + localMp4, + ]) + .then(() => execFileP("rm", ["-f", localMov])) + .catch(() => undefined); + return; + } + } +}; + +// --- a minimal CDP page client (same protocol as desktop-packaged's driver) -- + +interface CdpTarget { + readonly type: string; + readonly webSocketDebuggerUrl?: string; +} + +export class CdpPage { + private nextId = 1; + private readonly pending = new Map void>(); + + private constructor(private readonly socket: WebSocket) { + socket.addEventListener("message", (event) => { + if (typeof event.data !== "string") return; + const message = JSON.parse(event.data) as { id?: number; result?: unknown }; + if (message.id && this.pending.has(message.id)) { + this.pending.get(message.id)!(message.result); + this.pending.delete(message.id); + } + }); + } + + static connect = (url: string): Promise => + new Promise((resolve, reject) => { + const socket = new WebSocket(url); + const timer = setTimeout( + // oxlint-disable-next-line executor/no-promise-reject, executor/no-error-constructor -- boundary: WebSocket connection promise adapter + () => reject(new Error(`CDP connect timeout: ${url}`)), + 30_000, + ); + socket.addEventListener("open", () => { + clearTimeout(timer); + resolve(new CdpPage(socket)); + }); + socket.addEventListener("error", () => { + clearTimeout(timer); + // oxlint-disable-next-line executor/no-promise-reject, executor/no-error-constructor -- boundary: WebSocket connection promise adapter + reject(new Error(`CDP connect failed: ${url}`)); + }); + }); + + command = (method: string, params: Record = {}): Promise => { + const id = this.nextId++; + const result = new Promise((resolve) => + this.pending.set(id, (value) => resolve(value as T)), + ); + this.socket.send(JSON.stringify({ id, method, params })); + return result; + }; + + waitForText = async (text: string, timeoutMs: number): Promise => { + const deadline = Date.now() + timeoutMs; + const expression = `document.body?.innerText.includes(${JSON.stringify(text)}) ?? false`; + for (;;) { + const r = await this.command<{ result?: { value?: boolean } }>("Runtime.evaluate", { + expression, + returnByValue: true, + }); + if (r.result?.value) return; + // oxlint-disable-next-line executor/no-error-constructor -- boundary: a wait timeout is a plain failure here + if (Date.now() >= deadline) throw new Error(`timed out waiting for text: ${text}`); + await sleep(250); + } + }; + + screenshot = async (): Promise => { + const r = await this.command<{ data: string }>("Page.captureScreenshot", { format: "png" }); + return Buffer.from(r.data, "base64"); + }; + + close = (): void => this.socket.close(); +} + +/** The first drivable page target's WebSocket URL, fetched through the forward + * (so the returned ws URL already points at the local port). */ +export const pageWsUrl = async (localPort: number): Promise => { + const deadline = Date.now() + 60_000; + for (;;) { + const targets = (await fetch(`http://127.0.0.1:${localPort}/json/list`) + .then((r) => (r.ok ? r.json() : [])) + .catch(() => [])) as ReadonlyArray; + const page = targets.find((t) => t.type === "page" && t.webSocketDebuggerUrl); + if (page?.webSocketDebuggerUrl) return page.webSocketDebuggerUrl; + // oxlint-disable-next-line executor/no-error-constructor -- boundary: setup failure surfaced to the caller + if (Date.now() >= deadline) + throw new Error("no CDP page target (app not running with --remote-debugging-port?)"); + await sleep(500); + } +}; diff --git a/e2e/src/vm/tart.ts b/e2e/src/vm/tart.ts index 5ca696420..6b496dca5 100644 --- a/e2e/src/vm/tart.ts +++ b/e2e/src/vm/tart.ts @@ -30,6 +30,14 @@ const SSH_OPTS = [ "ServerAliveInterval=5", "-o", "LogLevel=ERROR", + // We authenticate with sshpass (password). A loaded SSH agent would otherwise + // offer its keys first and exhaust the guest's MaxAuthTries ("Too many + // authentication failures") before the password is tried — intermittently, + // depending on how many keys the agent holds. Force password-only. + "-o", + "PubkeyAuthentication=no", + "-o", + "IdentitiesOnly=yes", ]; const GUEST_USER = "admin"; const GUEST_PASS = "admin"; @@ -92,7 +100,15 @@ export const tartVm = (os: "macos" | "linux", arch: VmArch = "arm64"): VmProvide provision: async () => { const name = `executor-e2e-${os}-${process.pid}-${Math.floor(performance.now())}`; await execFileP(TART, ["clone", baseImage(os), name]); - const runProc = spawn(TART, ["run", name, "--no-graphics"], { stdio: "ignore" }); + // `--no-graphics` opens NO host window (never steals focus) yet the guest + // still has a virtual display: with the base image's autologin it reaches a + // real Aqua session (WindowServer/Dock/Finder), so even the packaged GUI app + // renders and `screencapture` records it. No windowed/VNC mode is needed. + const runProc = spawn(TART, ["run", name, "--no-graphics"], { + stdio: "ignore", + detached: true, + }); + runProc.unref(); const tunnelClosers: Array<() => void> = []; let ip = ""; diff --git a/e2e/targets/desktop.ts b/e2e/targets/desktop.ts index 722ca08ac..5cded8a1a 100644 --- a/e2e/targets/desktop.ts +++ b/e2e/targets/desktop.ts @@ -9,7 +9,10 @@ import { Effect } from "effect"; import type { Target } from "../src/target"; export const desktopTarget = (): Target => ({ - name: "desktop", + // The project name (desktop / desktop-packaged / desktop-macos) so each lands + // in its own runs// bucket and viewer column — they're the same app + // in different harnesses (dev electron / packaged / packaged-in-a-VM). + name: process.env.E2E_TARGET ?? "desktop", baseUrl: "", mcpUrl: "", capabilities: new Set(), diff --git a/e2e/targets/registry.ts b/e2e/targets/registry.ts index 94e966746..c3d6aa351 100644 --- a/e2e/targets/registry.ts +++ b/e2e/targets/registry.ts @@ -19,6 +19,12 @@ const factories: Record Target> = { // The packaged desktop bundle launches its own app per scenario, same as // `desktop` — no standard surfaces to carry. See desktop-packaged.globalsetup. "desktop-packaged": desktopTarget, + // The packaged bundle inside a GUI guest (one per OS), driven over CDP from + // the host. Carries no surfaces (the scenario drives CDP itself). See + // desktop-.globalsetup. + "desktop-macos": desktopTarget, + "desktop-linux": desktopTarget, + "desktop-windows": desktopTarget, local: localTarget, // The supervised CLI daemon inside a VM, one project per guest OS — restart() // is a real reboot. See setup/cli.globalsetup.ts. diff --git a/e2e/vitest.config.ts b/e2e/vitest.config.ts index 74c45288f..408716954 100644 --- a/e2e/vitest.config.ts +++ b/e2e/vitest.config.ts @@ -77,6 +77,26 @@ export default defineConfig({ testTimeout: 360_000, hookTimeout: 600_000, }), + // The packaged desktop app inside a GUI guest, driven over CDP from the + // host and filmed (the cross-OS counterpart of desktop-packaged) — one + // shared scenario (desktop-vm/), one project per guest OS. The globalsetup + // provisions the guest, launches the bundle with --remote-debugging-port, + // and forwards it; the scenario connects, drives, and records the console. + // Each lands in runs//. Not in the default `npm run test` chain — + // run with `vitest run --project desktop-macos` (or desktop-linux). The VM + // is provisioned automatically; set E2E_DESKTOP_VM_IP to attach to an + // already-running guest instead. + // macos/linux provision a tart guest and build+push the ~450MB bundle; + // windows ATTACHES to a long-lived dockur guest over an SSH jump (no + // provision), so it needs no build but the same generous hooks. + ...(["macos", "linux", "windows"] as const).map((os) => + project(`desktop-${os}`, { + include: ["desktop-vm/**/*.test.ts"], + fileParallelism: false, + testTimeout: 300_000, + hookTimeout: 900_000, + }), + ), // The single-user local app. Each scenario launches its OWN `executor // web` via the CLI on a throwaway data dir + an OS-assigned port, so // there is no shared instance and scenarios are independent. Files run