diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f49ed44..fe8a12c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -11,8 +11,127 @@ concurrency: cancel-in-progress: true jobs: + # ─── Tier 0: fast feedback gate ───────────────────────────────────── + # + # Catches formatting, lint, dependency and semver issues in ~30s. + # Blocks all downstream jobs — no expensive runners are wasted on + # broken code. + # + lint: + name: Lint + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 + - uses: dtolnay/rust-toolchain@29eef336d9b2848a0b548edc03f92a220660cdb8 # stable + with: + components: rustfmt, clippy + - uses: Swatinem/rust-cache@e18b497796c12c097a38f9edb9d0641fb99eee32 # v2 + + - name: Formatting + run: cargo fmt --check + + - name: Clippy + run: cargo clippy --workspace -- -D warnings + + - name: Cargo Deny + uses: EmbarkStudios/cargo-deny-action@6c8f9facfa5047ec02d8485b6bf52b587b7777d1 # v2 + + - name: SemVer Check + uses: obi1kenobi/cargo-semver-checks-action@6b69fcf40e9b5fb17adeb57e4b6ecd020649a239 # v2 + with: + package: evalbox + + # ─── Tier 1: build check (all targets) ────────────────────────────── + # + # Validates compilation for every supported target. Cross-compile + # targets use cargo check only (no tests). fail-fast: true — if it + # doesn't compile for one target, stop early. + # + build-check: + name: Check (${{ matrix.target }}) + needs: lint + runs-on: ${{ matrix.runner }} + strategy: + fail-fast: true + matrix: + include: + - target: x86_64-unknown-linux-gnu + runner: ubuntu-latest + scope: "--workspace" + install: "" + - target: aarch64-unknown-linux-gnu + runner: ubuntu-latest + scope: "--workspace" + install: "sudo apt-get update && sudo apt-get install -y gcc-aarch64-linux-gnu" + - target: x86_64-pc-windows-msvc + runner: windows-latest + scope: "-p evalbox-sandbox" + install: "" + - target: aarch64-pc-windows-msvc + runner: windows-latest + scope: "-p evalbox-sandbox" + install: "" + - target: x86_64-unknown-linux-musl + runner: ubuntu-latest + scope: "--workspace" + install: "sudo apt-get update && sudo apt-get install -y musl-tools" + steps: + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 + - uses: dtolnay/rust-toolchain@29eef336d9b2848a0b548edc03f92a220660cdb8 # stable + with: + targets: ${{ matrix.target }} + - uses: Swatinem/rust-cache@e18b497796c12c097a38f9edb9d0641fb99eee32 # v2 + with: + key: ${{ matrix.target }} + + - name: Install tools + if: matrix.install != '' + run: ${{ matrix.install }} + + - name: cargo check + run: cargo check ${{ matrix.scope }} --target ${{ matrix.target }} + + # ─── Tier 2: tests (native runners only) ──────────────────────────── + # + # Runs the full test suite on targets with native hardware. + # fail-fast: false — see all platform failures at once. + # + # Windows tests only evalbox-sandbox (platform-agnostic crate); + # evalbox-sys depends on Linux syscalls. + # + test: + name: Test (${{ matrix.name }}) + needs: build-check + runs-on: ${{ matrix.runner }} + strategy: + fail-fast: false + matrix: + include: + - name: Linux x86_64 + runner: ubuntu-latest + scope: "--workspace" + - name: Linux ARM64 + runner: ubuntu-24.04-arm + scope: "--workspace" + - name: Windows x86_64 + runner: windows-latest + scope: "-p evalbox-sandbox" + steps: + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 + - uses: dtolnay/rust-toolchain@29eef336d9b2848a0b548edc03f92a220660cdb8 # stable + - uses: Swatinem/rust-cache@e18b497796c12c097a38f9edb9d0641fb99eee32 # v2 + + - name: cargo test + run: cargo test ${{ matrix.scope }} + + # ─── Tier 3: heavy checks ─────────────────────────────────────────── + # + # Nix flake check (doc build, integration, reproducibility) and + # E2E security tests. Only run after all tests pass. + # nix-checks: name: Nix Checks + needs: test runs-on: ubuntu-latest permissions: id-token: write @@ -21,51 +140,49 @@ jobs: - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 - uses: DeterminateSystems/determinate-nix-action@bafaa638b9d5ec0e7e3ac1a7fc80453ef1fd265f # v3 - uses: DeterminateSystems/magic-nix-cache-action@908b263ff629f4cc17666315b7fd3ec127c6244d # main - - name: Run checks (clippy, fmt, test, doc) + - name: nix flake check run: nix flake check -L - # E2E security tests require kernel 6.12+ (Landlock ABI v5). - # GHA ubuntu-latest currently ships 6.11; image 20260209 has 6.14 but hasn't propagated yet. - # Uncomment when runners get kernel 6.12+. + # ─── E2E security tests (require kernel 6.12+ for Landlock ABI v5) ─ # - # e2e: - # name: E2E (${{ matrix.distro }}) - # runs-on: ubuntu-latest - # needs: nix-checks - # permissions: - # id-token: write - # contents: read - # strategy: - # fail-fast: false - # matrix: - # distro: [ubuntu:24.04, fedora:41, alpine:3.21] - # steps: - # - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 - # - uses: DeterminateSystems/determinate-nix-action@bafaa638b9d5ec0e7e3ac1a7fc80453ef1fd265f # v3 - # - uses: DeterminateSystems/magic-nix-cache-action@908b263ff629f4cc17666315b7fd3ec127c6244d # main - # - name: Build security test binary - # run: nix build -L .#security-test-bin - # - name: Run security tests in ${{ matrix.distro }} - # run: | - # TEST_BIN=$(realpath result/bin/security_tests-*) - # docker run --rm --privileged \ - # -v /nix/store:/nix/store:ro \ - # ${{ matrix.distro }} \ - # "$TEST_BIN" --ignored --test-threads=1 - - cargo-deny: - name: Cargo Deny + # Tests ACTUAL sandbox isolation: seccomp blocks, Landlock filesystem + # rules, network blocking, resource limits, CVE payloads. + # + # GHA ubuntu-latest ships kernel 6.11 — enable when runners get 6.12+. + # + e2e: + name: E2E (${{ matrix.distro }}) + if: false # TODO: enable when GHA runners ship kernel 6.12+ + needs: test runs-on: ubuntu-latest + permissions: + id-token: write + contents: read + strategy: + fail-fast: false + matrix: + distro: [ubuntu:24.04, fedora:41, alpine:3.21] steps: - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 - - uses: EmbarkStudios/cargo-deny-action@6c8f9facfa5047ec02d8485b6bf52b587b7777d1 # v2 + - uses: DeterminateSystems/determinate-nix-action@bafaa638b9d5ec0e7e3ac1a7fc80453ef1fd265f # v3 + - uses: DeterminateSystems/magic-nix-cache-action@908b263ff629f4cc17666315b7fd3ec127c6244d # main - semver-check: - name: SemVer Check - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 - - uses: Swatinem/rust-cache@e18b497796c12c097a38f9edb9d0641fb99eee32 # v2 - - uses: obi1kenobi/cargo-semver-checks-action@6b69fcf40e9b5fb17adeb57e4b6ecd020649a239 # v2 - with: - package: evalbox + - name: Check kernel version + run: | + KVER=$(uname -r | cut -d. -f1-2) + echo "Kernel: $KVER" + if [ "$(echo "$KVER < 6.12" | bc)" -eq 1 ]; then + echo "::error::Kernel $KVER < 6.12, Landlock ABI v5 not available" + exit 1 + fi + + - name: Build security test binary + run: nix build -L .#security-test-bin + + - name: Run security tests in ${{ matrix.distro }} + run: | + TEST_BIN=$(realpath result/bin/security_tests-*) + docker run --rm --privileged \ + -v /nix/store:/nix/store:ro \ + ${{ matrix.distro }} \ + "$TEST_BIN" --ignored --test-threads=1 diff --git a/Cargo.lock b/Cargo.lock index 613876d..3bb97d6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -83,7 +83,7 @@ dependencies = [ [[package]] name = "evalbox" -version = "0.1.1" +version = "0.2.0" dependencies = [ "dashmap", "evalbox-sandbox", @@ -100,7 +100,7 @@ dependencies = [ [[package]] name = "evalbox-sandbox" -version = "0.1.1" +version = "0.2.0" dependencies = [ "cc", "evalbox-sys", @@ -114,7 +114,7 @@ dependencies = [ [[package]] name = "evalbox-sys" -version = "0.1.1" +version = "0.2.0" dependencies = [ "libc", "rustix", diff --git a/Cargo.toml b/Cargo.toml index 32fe416..c95f537 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ members = [ name = "evalbox" [workspace.package] -version = "0.1.1" +version = "0.2.0" edition = "2024" rust-version = "1.85" license = "MIT OR Apache-2.0" @@ -22,9 +22,9 @@ keywords = ["sandbox", "isolation", "security", "landlock", "seccomp"] categories = ["os::linux-apis", "development-tools"] [workspace.dependencies] -evalbox = { version = "0.1.1", path = "crates/evalbox" } -evalbox-sys = { version = "0.1.1", path = "crates/evalbox-sys" } -evalbox-sandbox = { version = "0.1.1", path = "crates/evalbox-sandbox" } +evalbox = { version = "0.2.0", path = "crates/evalbox" } +evalbox-sys = { version = "0.2.0", path = "crates/evalbox-sys" } +evalbox-sandbox = { version = "0.2.0", path = "crates/evalbox-sandbox" } libc = "0.2" serde = { version = "1", features = ["derive"] } diff --git a/TODO.md b/TODO.md new file mode 100644 index 0000000..9732eb6 --- /dev/null +++ b/TODO.md @@ -0,0 +1,79 @@ +# TODO + +## Upstream: rust-lang/libc + +### PR: add missing `SYS_sendfile` and `SYS_fadvise64` on aarch64 + +**Repo**: https://github.com/rust-lang/libc +**Type**: PR (not issue — small enough to just submit directly) +**Precedent**: PR #1435 (Firecracker team, merged in 1 hour) + +The `libc` crate does not export `SYS_sendfile` (71) or `SYS_fadvise64` (223) for aarch64. The kernel defines them via `__NR3264_*` macros in `asm-generic/unistd.h` — the indirection likely caused them to be skipped when the aarch64 table was originally added. Other architectures using the same generic table (riscv64, loongarch64) already have them. + +**Files to change in rust-lang/libc:** + +``` +src/unix/linux_like/linux/gnu/b64/aarch64/mod.rs + + pub const SYS_sendfile: c_long = 71; + + pub const SYS_fadvise64: c_long = 223; + +src/unix/linux_like/linux/musl/b64/aarch64/mod.rs + + pub const SYS_sendfile: c_long = 71; + + pub const SYS_fadvise64: c_long = 223; + +libc-test/semver/linux-aarch64.txt + + SYS_fadvise64 + + SYS_sendfile +``` + +**PR title**: `aarch64: add missing SYS_sendfile and SYS_fadvise64 constants` + +**PR body**: + +> The aarch64 syscall table is missing `SYS_sendfile` (71) and `SYS_fadvise64` (223). +> +> These are defined in the kernel via `__NR3264_*` macros in `include/uapi/asm-generic/unistd.h` +> and resolve to `__NR_sendfile = 71` and `__NR_fadvise64 = 223` on 64-bit architectures. +> They are also present in glibc's `sysdeps/unix/sysv/linux/aarch64/arch-syscall.h`. +> +> Other architectures sharing the same generic syscall table (riscv64, loongarch64) already +> export these constants. The aarch64 module skips numbers 71 and 223 entirely. +> +> This affects projects using seccomp-BPF on aarch64 that need to whitelist these syscalls +> (e.g. sandbox implementations). + +**Notes**: +- Target `libc-0.2` branch (current stable, gets published to crates.io) +- Constants are manually maintained, not auto-generated +- Run `cd libc-test && cargo test` and `./ci/style.py` before submitting +- The repo has a v1.0 migration happening but `libc-0.2` still receives releases + +**Workaround in evalbox** (until upstream merges): +```rust +// crates/evalbox-sys/src/seccomp.rs +#[cfg(target_arch = "aarch64")] +mod nr { + pub const SYS_SENDFILE: i64 = 71; + pub const SYS_FADVISE64: i64 = 223; +} +``` + +**Evidence**: +- Checked libc 0.2.182, 0.2.185, 0.2.186 and `main` branch — all missing +- Kernel: https://github.com/torvalds/linux/blob/master/include/uapi/asm-generic/unistd.h +- glibc: `sysdeps/unix/sysv/linux/aarch64/arch-syscall.h` defines both +- Related: #1348 (similar musl gap, fixed in #1435 by Firecracker team) + +--- + +## CI blockers + +### E2E security tests — kernel 6.12+ + +GHA `ubuntu-latest` ships kernel 6.11. E2E security tests need Landlock ABI v5 (signal/IPC scoping) which requires 6.12+. Disabled with `if: false`. + +**Action**: Monitor GHA runner updates. Remove `if: false` when 6.12+ lands. + +### Windows ARM64 native runner + +`windows-11-arm` is in GHA public preview (unstable). Currently cross-compiling `aarch64-pc-windows-msvc` from x86_64. When runner stabilizes, add native job with tests. diff --git a/crates/evalbox-sandbox/Cargo.toml b/crates/evalbox-sandbox/Cargo.toml index 7b3224e..efc8c8f 100644 --- a/crates/evalbox-sandbox/Cargo.toml +++ b/crates/evalbox-sandbox/Cargo.toml @@ -8,16 +8,27 @@ repository.workspace = true description = "Sandbox orchestration for evalbox" [dependencies] +thiserror.workspace = true +which.workspace = true +tempfile.workspace = true + +# Linux-only dependencies (seccomp, landlock, pidfd, mio epoll) +[target.'cfg(target_os = "linux")'.dependencies] evalbox-sys.workspace = true libc.workspace = true rustix.workspace = true -tempfile.workspace = true mio.workspace = true -thiserror.workspace = true -which.workspace = true -[build-dependencies] +# Windows-only dependencies (future: windows-sys for Job Objects, AppContainer) +# [target.'cfg(target_os = "windows")'.dependencies] +# windows-sys = { version = "0.59", features = [...] } + +[target.'cfg(target_os = "linux")'.build-dependencies] cc = "1.2" +[package.metadata.docs.rs] +all-features = true +rustdoc-args = ["--cfg", "docsrs"] + [lints] workspace = true diff --git a/crates/evalbox-sandbox/build.rs b/crates/evalbox-sandbox/build.rs index 5c598c4..a874b92 100644 --- a/crates/evalbox-sandbox/build.rs +++ b/crates/evalbox-sandbox/build.rs @@ -1,78 +1,80 @@ //! Build script for evalbox-sandbox. //! -//! Compiles C security test payloads into static binaries. -//! These payloads are used in integration tests to verify sandbox isolation. - -use std::env; -use std::fs; -use std::path::PathBuf; -use std::process::Command; +//! On Linux: compiles C security test payloads into static binaries. +//! On other platforms: no-op. fn main() { - println!("cargo:rerun-if-changed=tests/payloads"); + #[cfg(target_os = "linux")] + linux::compile_payloads(); +} - let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap()); - let payload_dir = out_dir.join("payloads"); - fs::create_dir_all(&payload_dir).unwrap(); +#[cfg(target_os = "linux")] +mod linux { + use std::env; + use std::fs; + use std::path::PathBuf; + use std::process::Command; - let payloads_src = PathBuf::from("tests/payloads"); - if !payloads_src.exists() { - // No payloads directory - skip compilation - return; - } + pub fn compile_payloads() { + println!("cargo:rerun-if-changed=tests/payloads"); + + let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap()); + let payload_dir = out_dir.join("payloads"); + fs::create_dir_all(&payload_dir).unwrap(); - // Find all .c files in tests/payloads/ - let entries = match fs::read_dir(&payloads_src) { - Ok(e) => e, - Err(_) => return, - }; + let payloads_src = PathBuf::from("tests/payloads"); + if !payloads_src.exists() { + return; + } - for entry in entries.flatten() { - let path = entry.path(); - if path.extension().map(|e| e == "c").unwrap_or(false) { - let stem = path.file_stem().unwrap().to_string_lossy(); - let output = payload_dir.join(stem.as_ref()); + let entries = match fs::read_dir(&payloads_src) { + Ok(e) => e, + Err(_) => return, + }; - compile_payload(&path, &output); + for entry in entries.flatten() { + let path = entry.path(); + if path.extension().map(|e| e == "c").unwrap_or(false) { + let stem = path.file_stem().unwrap().to_string_lossy(); + let output = payload_dir.join(stem.as_ref()); + compile_payload(&path, &output); + } } } -} -fn compile_payload(source: &PathBuf, output: &PathBuf) { - let name = source.file_stem().unwrap().to_string_lossy(); + fn compile_payload(source: &PathBuf, output: &PathBuf) { + let name = source.file_stem().unwrap().to_string_lossy(); + + let compilers = ["musl-gcc", "gcc", "cc"]; + for compiler in compilers { + let status = Command::new(compiler) + .args(["-static", "-O2", "-Wall", "-Wextra", "-o"]) + .arg(output) + .arg(source) + .status(); - // Try to compile with musl for static binary, fall back to glibc - let compilers = ["musl-gcc", "gcc", "cc"]; + match status { + Ok(s) if s.success() => { + println!("cargo:warning=Compiled payload: {name}"); + return; + } + _ => continue, + } + } - for compiler in compilers { - let status = Command::new(compiler) - .args(["-static", "-O2", "-Wall", "-Wextra", "-o"]) + let status = Command::new("gcc") + .args(["-O2", "-Wall", "-Wextra", "-o"]) .arg(output) .arg(source) .status(); match status { Ok(s) if s.success() => { - println!("cargo:warning=Compiled payload: {name}"); - return; + println!("cargo:warning=Compiled payload (dynamic): {name}"); + } + _ => { + println!("cargo:warning=Failed to compile payload: {name}"); } - _ => continue, - } - } - - // If static compilation fails, try without -static - let status = Command::new("gcc") - .args(["-O2", "-Wall", "-Wextra", "-o"]) - .arg(output) - .arg(source) - .status(); - - match status { - Ok(s) if s.success() => { - println!("cargo:warning=Compiled payload (dynamic): {name}"); - } - _ => { - println!("cargo:warning=Failed to compile payload: {name}"); } } } diff --git a/crates/evalbox-sandbox/src/isolation/mod.rs b/crates/evalbox-sandbox/src/isolation/mod.rs deleted file mode 100644 index 4e69f46..0000000 --- a/crates/evalbox-sandbox/src/isolation/mod.rs +++ /dev/null @@ -1,11 +0,0 @@ -//! Isolation mechanisms for sandboxed processes. -//! -//! This module contains all the security isolation layers: -//! -//! - **lockdown** - Security restrictions (Landlock v5, seccomp, securebits, capabilities) -//! - **rlimits** - Resource limits (memory, CPU, files, processes) - -mod lockdown; -pub mod rlimits; - -pub use lockdown::{LockdownError, close_extra_fds, lockdown}; diff --git a/crates/evalbox-sandbox/src/isolation/rlimits.rs b/crates/evalbox-sandbox/src/isolation/rlimits.rs deleted file mode 100644 index 67d97b8..0000000 --- a/crates/evalbox-sandbox/src/isolation/rlimits.rs +++ /dev/null @@ -1,71 +0,0 @@ -//! Resource limits for sandboxed processes. -//! -//! Sets kernel-enforced resource limits to prevent denial-of-service. -//! -//! ## Limits Applied -//! -//! | Limit | Purpose | Default | -//! |-------|---------|---------| -//! | `RLIMIT_DATA` | Memory usage | 256 MiB | -//! | `RLIMIT_CPU` | CPU time | timeout * 2 + 60s | -//! | `RLIMIT_FSIZE` | Output file size | 16 MiB | -//! | `RLIMIT_NOFILE` | Open file descriptors | 256 | -//! | `RLIMIT_NPROC` | Max processes | 64 | -//! | `RLIMIT_CORE` | Core dump size | 0 (disabled) | -//! | `RLIMIT_STACK` | Stack size | 8 MiB | -//! -//! ## Note on `RLIMIT_AS` -//! -//! We intentionally do NOT set `RLIMIT_AS` (virtual address space). -//! Modern runtimes like Go, Java, and V8 pre-allocate large virtual address -//! ranges but only commit small portions. `RLIMIT_AS` would break these -//! runtimes. `RLIMIT_DATA` limits actual memory and is more appropriate. - -use evalbox_sys::last_errno; -use rustix::io::Errno; - -use crate::plan::Plan; - -/// Apply resource limits based on the sandbox plan. -pub fn apply_rlimits(plan: &Plan) -> Result<(), Errno> { - let cpu_secs = plan.timeout.as_secs().saturating_mul(2).saturating_add(60); - - set_rlimit(libc::RLIMIT_DATA, plan.memory_limit)?; - set_rlimit(libc::RLIMIT_CPU, cpu_secs)?; - set_rlimit(libc::RLIMIT_FSIZE, plan.max_output)?; - set_rlimit(libc::RLIMIT_NOFILE, 256)?; - set_rlimit(libc::RLIMIT_NPROC, u64::from(plan.max_pids))?; - set_rlimit(libc::RLIMIT_CORE, 0)?; - set_rlimit(libc::RLIMIT_STACK, 8 * 1024 * 1024)?; - Ok(()) -} - -#[inline] -fn set_rlimit(resource: libc::__rlimit_resource_t, limit: u64) -> Result<(), Errno> { - let rlim = libc::rlimit { - rlim_cur: limit, - rlim_max: limit, - }; - // SAFETY: rlim is valid, resource is a valid constant. - if unsafe { libc::setrlimit(resource, &rlim) } != 0 { - Err(last_errno()) - } else { - Ok(()) - } -} - -#[cfg(test)] -mod tests { - #[test] - fn get_current_nofile() { - let mut rlim = libc::rlimit { - rlim_cur: 0, - rlim_max: 0, - }; - assert_eq!( - unsafe { libc::getrlimit(libc::RLIMIT_NOFILE, &mut rlim) }, - 0 - ); - assert!(rlim.rlim_cur > 0); - } -} diff --git a/crates/evalbox-sandbox/src/lib.rs b/crates/evalbox-sandbox/src/lib.rs index 7442490..40696f5 100644 --- a/crates/evalbox-sandbox/src/lib.rs +++ b/crates/evalbox-sandbox/src/lib.rs @@ -26,17 +26,20 @@ //! - Linux kernel 6.12+ (for Landlock ABI 5) //! - Seccomp enabled in kernel -pub mod executor; -pub mod isolation; -pub mod monitor; -pub mod notify; +#[macro_use] +mod macros; + +// Platform-agnostic modules pub mod plan; pub mod resolve; -pub mod sysinfo; pub mod validate; -pub mod workspace; +pub mod virtual_fs; + +// Platform-specific (dispatched via sys) +pub mod sys; -pub use executor::{Event, Executor, ExecutorError, SandboxId}; -pub use monitor::{Output, Status}; +// Public re-exports +pub use sys::{Event, Executor, ExecutorError, SandboxId}; +pub use sys::{Output, Status}; pub use plan::{Landlock, Mount, NotifyMode, Plan, Syscalls, UserFile}; pub use resolve::{ResolveError, ResolvedBinary, resolve_binary}; diff --git a/crates/evalbox-sandbox/src/macros.rs b/crates/evalbox-sandbox/src/macros.rs new file mode 100644 index 0000000..77e09c4 --- /dev/null +++ b/crates/evalbox-sandbox/src/macros.rs @@ -0,0 +1,13 @@ +/// Wraps items with `#[cfg(target_os = "linux")]` and docs.rs annotation. +/// +/// Items annotated with this macro will only be compiled on Linux and will +/// show the platform gate in generated documentation on docs.rs. +macro_rules! cfg_linux { + ($($item:item)*) => { + $( + #[cfg(target_os = "linux")] + #[cfg_attr(docsrs, doc(cfg(target_os = "linux")))] + $item + )* + } +} diff --git a/crates/evalbox-sandbox/src/notify/mod.rs b/crates/evalbox-sandbox/src/notify/mod.rs deleted file mode 100644 index d4912f2..0000000 --- a/crates/evalbox-sandbox/src/notify/mod.rs +++ /dev/null @@ -1,17 +0,0 @@ -//! Seccomp user notification support. -//! -//! This module provides the supervisor side of seccomp user notification, -//! enabling syscall interception without Linux user namespaces. -//! -//! ## Modules -//! -//! - **supervisor** - Main notification loop that handles intercepted syscalls -//! - **`virtual_fs`** - Path translation for filesystem virtualization -//! - **`scm_rights`** - Unix socket fd passing (child → parent listener fd transfer) - -pub mod scm_rights; -pub mod supervisor; -pub mod virtual_fs; - -pub use supervisor::{NotifyEvent, Supervisor}; -pub use virtual_fs::VirtualFs; diff --git a/crates/evalbox-sandbox/src/resolve.rs b/crates/evalbox-sandbox/src/resolve.rs index 7fd934d..23a691e 100644 --- a/crates/evalbox-sandbox/src/resolve.rs +++ b/crates/evalbox-sandbox/src/resolve.rs @@ -5,7 +5,7 @@ use std::path::{Path, PathBuf}; use thiserror::Error; use crate::plan::Mount; -use crate::sysinfo::{SYSTEM_PATHS, SystemPaths, SystemType}; +use crate::sys::sysinfo::{SYSTEM_PATHS, SystemPaths, SystemType}; #[derive(Debug, Clone)] pub struct ResolvedBinary { diff --git a/crates/evalbox-sandbox/src/executor.rs b/crates/evalbox-sandbox/src/sys/linux/executor.rs similarity index 97% rename from crates/evalbox-sandbox/src/executor.rs rename to crates/evalbox-sandbox/src/sys/linux/executor.rs index 0b60f1c..775e3c2 100644 --- a/crates/evalbox-sandbox/src/executor.rs +++ b/crates/evalbox-sandbox/src/sys/linux/executor.rs @@ -35,7 +35,7 @@ //! } //! ``` -use std::collections::{HashMap, HashSet}; +use std::collections::HashMap; use std::ffi::CString; use std::io::{self, Write as _}; use std::os::fd::{AsRawFd, OwnedFd, RawFd}; @@ -49,21 +49,22 @@ use rustix::process::{Pid, PidfdFlags, Signal, pidfd_open, pidfd_send_signal}; use thiserror::Error; use evalbox_sys::seccomp::{ - SockFprog, build_notify_filter, build_whitelist_filter, default_whitelist, notify_fs_syscalls, + SockFprog, build_notify_filter, build_whitelist_filter, notify_fs_syscalls, }; use evalbox_sys::seccomp_notify::seccomp_set_mode_filter_listener; use evalbox_sys::{check, last_errno, seccomp::seccomp_set_mode_filter}; -use crate::isolation::{LockdownError, close_extra_fds, lockdown}; -use crate::monitor::{Output, Status, monitor, set_nonblocking, wait_for_exit, write_stdin}; -use crate::notify::scm_rights; +use super::lockdown::{LockdownError, close_extra_fds, lockdown}; +use super::monitor::{Output, Status, monitor, set_nonblocking, wait_for_exit, write_stdin}; +use super::notify::scm_rights; +use super::workspace::Workspace; use crate::plan::{Mount, NotifyMode, Plan}; use crate::resolve::{ResolvedBinary, resolve_binary}; use crate::validate::validate_cmd; -use crate::workspace::Workspace; /// Error during sandbox execution. #[derive(Debug, Error)] +#[non_exhaustive] pub enum ExecutorError { #[error("system check: {0}")] SystemCheck(String), @@ -113,6 +114,7 @@ impl std::fmt::Display for SandboxId { /// Events emitted by the Executor. #[derive(Debug)] +#[non_exhaustive] pub enum Event { /// Sandbox completed execution. Completed { id: SandboxId, output: Output }, @@ -901,21 +903,9 @@ fn setup_stdio(workspace: &Workspace) -> Result<(), ExecutorError> { // Cast is safe: filter length fits in u16 (max whitelist is 200 + ~20 overhead). #[allow(clippy::cast_possible_truncation)] fn apply_seccomp(plan: &Plan) -> Result<(), ExecutorError> { - let base = default_whitelist(); - let whitelist: Vec = if let Some(ref syscalls) = plan.syscalls { - let mut wl_set: HashSet = base.into_iter().collect(); - for s in &syscalls.denied { - wl_set.remove(s); - } - for s in &syscalls.allowed { - wl_set.insert(*s); - } - wl_set.into_iter().collect() - } else { - base - }; + let config = super::policy::compile_seccomp(plan); - let filter = build_whitelist_filter(&whitelist); + let filter = build_whitelist_filter(&config.whitelist); let fprog = SockFprog { len: filter.len() as u16, filter: filter.as_ptr(), diff --git a/crates/evalbox-sandbox/src/isolation/lockdown.rs b/crates/evalbox-sandbox/src/sys/linux/lockdown.rs similarity index 82% rename from crates/evalbox-sandbox/src/isolation/lockdown.rs rename to crates/evalbox-sandbox/src/sys/linux/lockdown.rs index 888aaa4..04c7107 100644 --- a/crates/evalbox-sandbox/src/isolation/lockdown.rs +++ b/crates/evalbox-sandbox/src/sys/linux/lockdown.rs @@ -37,7 +37,6 @@ use evalbox_sys::last_errno; use rustix::io::Errno; use thiserror::Error; -use super::rlimits::apply_rlimits; use crate::plan::Plan; /// Error during security lockdown. @@ -275,6 +274,52 @@ pub fn close_extra_fds() { } } +// --- Resource limits (inlined from isolation/rlimits.rs) --- + +/// Apply resource limits based on the sandbox plan. +/// +/// ## Limits Applied +/// +/// | Limit | Purpose | Default | +/// |-------|---------|---------| +/// | `RLIMIT_DATA` | Memory usage | 256 MiB | +/// | `RLIMIT_CPU` | CPU time | timeout * 2 + 60s | +/// | `RLIMIT_FSIZE` | Output file size | 16 MiB | +/// | `RLIMIT_NOFILE` | Open file descriptors | 256 | +/// | `RLIMIT_NPROC` | Max processes | 64 | +/// | `RLIMIT_CORE` | Core dump size | 0 (disabled) | +/// | `RLIMIT_STACK` | Stack size | 8 MiB | +/// +/// Note: `RLIMIT_AS` is intentionally NOT set. Modern runtimes like Go, Java, +/// and V8 pre-allocate large virtual address ranges but only commit small +/// portions. `RLIMIT_DATA` limits actual memory and is more appropriate. +fn apply_rlimits(plan: &Plan) -> Result<(), Errno> { + let cpu_secs = plan.timeout.as_secs().saturating_mul(2).saturating_add(60); + + set_rlimit(libc::RLIMIT_DATA, plan.memory_limit)?; + set_rlimit(libc::RLIMIT_CPU, cpu_secs)?; + set_rlimit(libc::RLIMIT_FSIZE, plan.max_output)?; + set_rlimit(libc::RLIMIT_NOFILE, 256)?; + set_rlimit(libc::RLIMIT_NPROC, u64::from(plan.max_pids))?; + set_rlimit(libc::RLIMIT_CORE, 0)?; + set_rlimit(libc::RLIMIT_STACK, 8 * 1024 * 1024)?; + Ok(()) +} + +#[inline] +fn set_rlimit(resource: libc::__rlimit_resource_t, limit: u64) -> Result<(), Errno> { + let rlim = libc::rlimit { + rlim_cur: limit, + rlim_max: limit, + }; + // SAFETY: rlim is valid, resource is a valid constant. + if unsafe { libc::setrlimit(resource, &rlim) } != 0 { + Err(last_errno()) + } else { + Ok(()) + } +} + #[cfg(test)] mod tests { use super::*; @@ -283,4 +328,17 @@ mod tests { fn open_path_valid() { assert!(open_path("/tmp").is_ok()); } + + #[test] + fn get_current_nofile() { + let mut rlim = libc::rlimit { + rlim_cur: 0, + rlim_max: 0, + }; + assert_eq!( + unsafe { libc::getrlimit(libc::RLIMIT_NOFILE, &mut rlim) }, + 0 + ); + assert!(rlim.rlim_cur > 0); + } } diff --git a/crates/evalbox-sandbox/src/sys/linux/mod.rs b/crates/evalbox-sandbox/src/sys/linux/mod.rs new file mode 100644 index 0000000..d499b9f --- /dev/null +++ b/crates/evalbox-sandbox/src/sys/linux/mod.rs @@ -0,0 +1,10 @@ +pub mod policy; +pub mod executor; +pub mod lockdown; +pub mod monitor; +pub mod notify; +pub mod sysinfo; +pub mod workspace; + +pub use executor::{Event, Executor, ExecutorError, SandboxId}; +pub use monitor::{Output, Status}; diff --git a/crates/evalbox-sandbox/src/monitor.rs b/crates/evalbox-sandbox/src/sys/linux/monitor.rs similarity index 99% rename from crates/evalbox-sandbox/src/monitor.rs rename to crates/evalbox-sandbox/src/sys/linux/monitor.rs index e31ee08..c168400 100644 --- a/crates/evalbox-sandbox/src/monitor.rs +++ b/crates/evalbox-sandbox/src/sys/linux/monitor.rs @@ -27,8 +27,8 @@ use std::time::{Duration, Instant}; use rustix::process::{Signal, pidfd_send_signal}; +use super::workspace::Workspace; use crate::plan::Plan; -use crate::workspace::Workspace; /// Output from a sandboxed execution. #[must_use] @@ -61,6 +61,7 @@ impl Output { /// Status of the sandboxed execution. #[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[non_exhaustive] pub enum Status { Exited, Signaled, diff --git a/crates/evalbox-sandbox/src/sys/linux/notify/mod.rs b/crates/evalbox-sandbox/src/sys/linux/notify/mod.rs new file mode 100644 index 0000000..39bdc0e --- /dev/null +++ b/crates/evalbox-sandbox/src/sys/linux/notify/mod.rs @@ -0,0 +1,4 @@ +pub mod scm_rights; +pub mod supervisor; + +pub use supervisor::{NotifyEvent, Supervisor}; diff --git a/crates/evalbox-sandbox/src/notify/scm_rights.rs b/crates/evalbox-sandbox/src/sys/linux/notify/scm_rights.rs similarity index 100% rename from crates/evalbox-sandbox/src/notify/scm_rights.rs rename to crates/evalbox-sandbox/src/sys/linux/notify/scm_rights.rs diff --git a/crates/evalbox-sandbox/src/notify/supervisor.rs b/crates/evalbox-sandbox/src/sys/linux/notify/supervisor.rs similarity index 99% rename from crates/evalbox-sandbox/src/notify/supervisor.rs rename to crates/evalbox-sandbox/src/sys/linux/notify/supervisor.rs index 69fc8fc..6a8d8b5 100644 --- a/crates/evalbox-sandbox/src/notify/supervisor.rs +++ b/crates/evalbox-sandbox/src/sys/linux/notify/supervisor.rs @@ -18,7 +18,7 @@ use evalbox_sys::seccomp_notify::{ SeccompNotifResp, notif_addfd, notif_id_valid, notif_recv, notif_send, }; -use super::virtual_fs::VirtualFs; +use crate::virtual_fs::VirtualFs; use crate::plan::NotifyMode; /// Events emitted by the supervisor for future user-facing notifications. diff --git a/crates/evalbox-sandbox/src/sys/linux/policy.rs b/crates/evalbox-sandbox/src/sys/linux/policy.rs new file mode 100644 index 0000000..573b6d6 --- /dev/null +++ b/crates/evalbox-sandbox/src/sys/linux/policy.rs @@ -0,0 +1,178 @@ +//! Security policy: Plan → Linux isolation primitives. +//! +//! Translates the high-level [`Plan`] into concrete Linux security primitives +//! (Landlock rules, seccomp whitelist, rlimits). Pure computation, no side effects. + +use std::collections::HashSet; +use std::path::{Path, PathBuf}; + +use evalbox_sys::seccomp::default_whitelist; + +use crate::plan::{NotifyMode, Plan}; + +/// Compiled Landlock ruleset (paths + access masks). +pub struct LandlockRuleset { + /// Read-only mount paths from the plan. + pub readonly_mounts: Vec, + /// Extra read-only paths (e.g., resolved binary mounts). + pub extra_readonly_paths: Vec, + /// Writable workspace paths. + pub write_paths: Vec, + /// Whether to block network access. + pub network_blocked: bool, +} + +/// A mount with its landlock access configuration. +pub struct LandlockMount { + pub path: PathBuf, + pub executable: bool, +} + +/// Compiled seccomp configuration. +pub struct SeccompConfig { + /// Final syscall whitelist (base + allowed - denied). + pub whitelist: Vec, +} + +/// Compiled resource limits. +pub struct RlimitConfig { + pub memory: u64, + pub cpu_seconds: u64, + pub max_fsize: u64, + pub max_nofile: u64, + pub max_nproc: u32, +} + +/// Full compilation result. +pub struct CompiledPlan { + pub landlock: LandlockRuleset, + pub seccomp: SeccompConfig, + pub rlimits: RlimitConfig, + pub notify_mode: NotifyMode, +} + +/// Compile a Plan into Linux isolation primitives. +pub fn compile(plan: &Plan, workspace_root: &Path, extra_readonly_paths: &[&str]) -> CompiledPlan { + CompiledPlan { + landlock: compile_landlock(plan, workspace_root, extra_readonly_paths), + seccomp: compile_seccomp(plan), + rlimits: compile_rlimits(plan), + notify_mode: plan.notify_mode, + } +} + +/// Compile seccomp whitelist from Plan. +/// +/// Starts with the default whitelist, applies allowed/denied overrides. +pub fn compile_seccomp(plan: &Plan) -> SeccompConfig { + let base = default_whitelist(); + let whitelist = if let Some(ref syscalls) = plan.syscalls { + let mut wl_set: HashSet = base.into_iter().collect(); + for s in &syscalls.denied { + wl_set.remove(s); + } + for s in &syscalls.allowed { + wl_set.insert(*s); + } + wl_set.into_iter().collect() + } else { + base + }; + + SeccompConfig { whitelist } +} + +/// Compile Landlock ruleset from Plan. +fn compile_landlock( + plan: &Plan, + workspace_root: &Path, + extra_readonly_paths: &[&str], +) -> LandlockRuleset { + let readonly_mounts = plan + .mounts + .iter() + .filter(|m| !m.writable) + .map(|m| LandlockMount { + path: m.source.clone(), + executable: m.executable, + }) + .collect(); + + let extra_readonly = extra_readonly_paths + .iter() + .map(PathBuf::from) + .collect(); + + let write_paths = vec![ + workspace_root.join("work"), + workspace_root.join("tmp"), + workspace_root.join("home"), + ]; + + LandlockRuleset { + readonly_mounts, + extra_readonly_paths: extra_readonly, + write_paths, + network_blocked: plan.network_blocked, + } +} + +/// Compile rlimits from Plan. +fn compile_rlimits(plan: &Plan) -> RlimitConfig { + RlimitConfig { + memory: plan.memory_limit, + cpu_seconds: plan.timeout.as_secs().saturating_mul(2).saturating_add(60), + max_fsize: plan.max_output, + max_nofile: 256, + max_nproc: plan.max_pids, + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::plan::Syscalls; + + #[test] + fn compile_seccomp_default() { + let plan = Plan::new(["echo", "hello"]); + let config = compile_seccomp(&plan); + assert!(!config.whitelist.is_empty()); + } + + #[test] + fn compile_seccomp_custom() { + let plan = Plan::new(["echo"]).syscalls(Syscalls::default().allow(9999).deny(0)); + let config = compile_seccomp(&plan); + assert!(config.whitelist.contains(&9999)); + assert!(!config.whitelist.contains(&0)); + } + + #[test] + fn compile_rlimits_defaults() { + let plan = Plan::new(["echo"]); + let rlimits = compile_rlimits(&plan); + assert_eq!(rlimits.memory, 256 * 1024 * 1024); + assert_eq!(rlimits.max_nproc, 64); + assert_eq!(rlimits.max_nofile, 256); + } + + #[test] + fn compile_landlock_workspace_paths() { + let plan = Plan::new(["echo"]); + let ruleset = compile_landlock(&plan, Path::new("/tmp/ws"), &[]); + assert_eq!(ruleset.write_paths.len(), 3); + assert!(ruleset.write_paths.contains(&PathBuf::from("/tmp/ws/work"))); + assert!(ruleset.write_paths.contains(&PathBuf::from("/tmp/ws/tmp"))); + assert!(ruleset.write_paths.contains(&PathBuf::from("/tmp/ws/home"))); + } + + #[test] + fn compile_full() { + let plan = Plan::new(["echo"]); + let compiled = compile(&plan, Path::new("/tmp/ws"), &["/usr"]); + assert!(!compiled.seccomp.whitelist.is_empty()); + assert_eq!(compiled.landlock.extra_readonly_paths.len(), 1); + assert_eq!(compiled.notify_mode, NotifyMode::Disabled); + } +} diff --git a/crates/evalbox-sandbox/src/sysinfo.rs b/crates/evalbox-sandbox/src/sys/linux/sysinfo.rs similarity index 100% rename from crates/evalbox-sandbox/src/sysinfo.rs rename to crates/evalbox-sandbox/src/sys/linux/sysinfo.rs diff --git a/crates/evalbox-sandbox/src/workspace.rs b/crates/evalbox-sandbox/src/sys/linux/workspace.rs similarity index 100% rename from crates/evalbox-sandbox/src/workspace.rs rename to crates/evalbox-sandbox/src/sys/linux/workspace.rs diff --git a/crates/evalbox-sandbox/src/sys/mod.rs b/crates/evalbox-sandbox/src/sys/mod.rs new file mode 100644 index 0000000..02f57db --- /dev/null +++ b/crates/evalbox-sandbox/src/sys/mod.rs @@ -0,0 +1,16 @@ +cfg_linux! { + pub mod linux; +} + +#[cfg(target_os = "linux")] +pub use linux::*; + +#[cfg(target_os = "windows")] +#[cfg_attr(docsrs, doc(cfg(target_os = "windows")))] +pub mod windows; + +#[cfg(target_os = "windows")] +pub use windows::*; + +#[cfg(not(any(target_os = "linux", target_os = "windows")))] +compile_error!("evalbox-sandbox supports only Linux and Windows."); diff --git a/crates/evalbox-sandbox/src/sys/windows/executor.rs b/crates/evalbox-sandbox/src/sys/windows/executor.rs new file mode 100644 index 0000000..0067dc4 --- /dev/null +++ b/crates/evalbox-sandbox/src/sys/windows/executor.rs @@ -0,0 +1,128 @@ +//! Sandbox executor for Windows. +//! +//! ## Process Creation Flow +//! +//! Unlike Linux (fork → lockdown in child → exec), Windows applies all security +//! attributes atomically at process creation: +//! +//! 1. Create Job Object with resource limits +//! 2. Create AppContainer profile +//! 3. Create Restricted Token with Low Integrity +//! 4. Create isolated WindowStation + Desktop +//! 5. Build `STARTUPINFOEX` with: +//! - `PROC_THREAD_ATTRIBUTE_JOB_LIST` → Job Object +//! - `PROC_THREAD_ATTRIBUTE_SECURITY_CAPABILITIES` → AppContainer +//! 6. `CreateProcessW` with restricted token + extended startup info +//! 7. Monitor via `WaitForSingleObject` + pipe reading +//! +//! ## Linux ↔ Windows Mapping +//! +//! | Linux `executor` | Windows `executor` | +//! |------------------|-------------------| +//! | `fork()` | `CreateProcessW(CREATE_SUSPENDED)` | +//! | `pidfd_open()` | Process `HANDLE` (returned by `CreateProcessW`) | +//! | `pidfd_send_signal(SIGKILL)` | `TerminateProcess` | +//! | `waitid(P_PIDFD)` | `WaitForSingleObject` + `GetExitCodeProcess` | +//! | `pipe()` for stdio | `CreatePipe` for stdio | +//! | `poll()` for mux | `WaitForMultipleObjects` | +//! | `mio::Poll` | `IOCP` (I/O Completion Ports) | +//! +//! **Status**: Stub — not yet implemented. + +use std::io; +use std::time::Duration; + +use thiserror::Error; + +use super::lockdown::LockdownError; +use super::monitor::Output; +use crate::plan::Plan; + +/// Error during sandbox execution. +#[derive(Debug, Error)] +#[non_exhaustive] +pub enum ExecutorError { + #[error("not implemented: Windows sandbox support is not yet available")] + NotImplemented, + + #[error("validation: {0}")] + Validation(#[from] crate::validate::ValidationError), + + #[error("workspace: {0}")] + Workspace(io::Error), + + #[error("process creation: {0}")] + CreateProcess(io::Error), + + #[error("lockdown: {0}")] + Lockdown(#[from] LockdownError), + + #[error("monitor: {0}")] + Monitor(io::Error), + + #[error("command not found: {0}")] + CommandNotFound(String), + + #[error("io: {0}")] + Io(#[from] io::Error), +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct SandboxId(pub usize); + +impl std::fmt::Display for SandboxId { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Sandbox({})", self.0) + } +} + +/// Events emitted by the Executor. +#[derive(Debug)] +#[non_exhaustive] +pub enum Event { + Completed { id: SandboxId, output: Output }, + Timeout { id: SandboxId, output: Output }, + Stdout { id: SandboxId, data: Vec }, + Stderr { id: SandboxId, data: Vec }, +} + +pub struct Executor; + +impl Executor { + pub fn new() -> io::Result { + Ok(Self) + } + + pub fn run(_plan: Plan) -> Result { + Err(ExecutorError::NotImplemented) + } + + pub fn spawn(&mut self, _plan: Plan) -> Result { + Err(ExecutorError::NotImplemented) + } + + pub fn poll( + &mut self, + events: &mut Vec, + _timeout: Option, + ) -> io::Result<()> { + events.clear(); + Ok(()) + } + + pub fn active_count(&self) -> usize { + 0 + } + + pub fn kill(&mut self, _id: SandboxId) -> io::Result<()> { + Ok(()) + } + + pub fn write_stdin(&mut self, _id: SandboxId, _data: &[u8]) -> io::Result { + Err(io::Error::new(io::ErrorKind::Unsupported, "not implemented")) + } + + pub fn close_stdin(&mut self, _id: SandboxId) -> io::Result<()> { + Ok(()) + } +} diff --git a/crates/evalbox-sandbox/src/sys/windows/lockdown.rs b/crates/evalbox-sandbox/src/sys/windows/lockdown.rs new file mode 100644 index 0000000..9be2eef --- /dev/null +++ b/crates/evalbox-sandbox/src/sys/windows/lockdown.rs @@ -0,0 +1,65 @@ +//! Security lockdown for sandboxed processes on Windows. +//! +//! Applies all security restrictions at process creation time via +//! `CreateProcessW` + `STARTUPINFOEX`. Unlike Linux where lockdown happens +//! in the child after fork, Windows configures everything before launch. +//! +//! ## Isolation Layers (applied atomically) +//! +//! 1. **Job Object** — Resource limits enforced by kernel +//! - `CreateJobObjectW` + `SetInformationJobObject` +//! - `JOBOBJECT_EXTENDED_LIMIT_INFORMATION`: memory, CPU time, process count +//! - `JOB_OBJECT_LIMIT_KILL_ON_JOB_CLOSE`: kill all on handle close +//! +//! 2. **AppContainer** — Capability-based filesystem/network isolation +//! - `CreateAppContainerProfile` + `DeriveAppContainerSidFromAppContainerName` +//! - `PROC_THREAD_ATTRIBUTE_SECURITY_CAPABILITIES` at launch +//! - Only granted capabilities are accessible (deny-by-default) +//! +//! 3. **Restricted Token** — Strip privileges and SIDs +//! - `CreateRestrictedToken` with `DISABLE_MAX_PRIVILEGE` +//! - Removes all privileges (`SeChangeNotifyPrivilege`, etc.) +//! - Restricts group SIDs (deny-only) +//! +//! 4. **Integrity Level** — Mandatory access control +//! - `SetTokenInformation(TokenIntegrityLevel)` +//! - `SECURITY_MANDATORY_LOW_RID` (S-1-16-4096) or +//! - `SECURITY_MANDATORY_UNTRUSTED_RID` (S-1-16-0) +//! - Prevents writing to higher-integrity objects +//! +//! 5. **Desktop Isolation** — Separate WindowStation +//! - `CreateWindowStationW` + `CreateDesktopW` +//! - Prevents clipboard access, window message injection +//! - `STARTUPINFO.lpDesktop = "SandboxWinSta\\SandboxDesktop"` +//! +//! ## Linux ↔ Windows Mapping +//! +//! | Linux | Windows | +//! |-------|---------| +//! | `PR_SET_NO_NEW_PRIVS` | Restricted Token + Low Integrity | +//! | Landlock v5 (FS/net) | AppContainer | +//! | Rlimits | Job Object limits | +//! | Securebits | Token restriction flags | +//! | Drop capabilities | `AdjustTokenPrivileges` disable all | +//! | Seccomp BPF | (no equivalent — not needed with AppContainer) | +//! +//! **Status**: Stub — not yet implemented. + +/// Error during security lockdown. +#[derive(Debug, thiserror::Error)] +pub enum LockdownError { + #[error("job object: {0}")] + JobObject(std::io::Error), + + #[error("app container: {0}")] + AppContainer(std::io::Error), + + #[error("restricted token: {0}")] + RestrictedToken(std::io::Error), + + #[error("integrity level: {0}")] + IntegrityLevel(std::io::Error), + + #[error("desktop isolation: {0}")] + DesktopIsolation(std::io::Error), +} diff --git a/crates/evalbox-sandbox/src/sys/windows/mod.rs b/crates/evalbox-sandbox/src/sys/windows/mod.rs new file mode 100644 index 0000000..cb842fa --- /dev/null +++ b/crates/evalbox-sandbox/src/sys/windows/mod.rs @@ -0,0 +1,28 @@ +//! Windows sandbox implementation. +//! +//! Defense-in-depth isolation using Win32 security primitives: +//! +//! - **Job Objects** — Resource limits (memory, CPU, process count) +//! - **AppContainer** — Filesystem and network isolation (capability-based) +//! - **Restricted Tokens** — Privilege reduction (disable SIDs, strip privileges) +//! - **Integrity Levels** — Mandatory access control (Low/Untrusted) +//! - **Desktop Isolation** — Separate WindowStation to prevent UI interaction +//! +//! ## Architecture +//! +//! Unlike Linux (fork + lockdown in child), Windows applies all security +//! attributes atomically at process creation via `CreateProcessW` with +//! `STARTUPINFOEX` and `PROC_THREAD_ATTRIBUTE_*`. +//! +//! **Status**: Stub — not yet implemented. + +mod executor; +pub mod lockdown; +mod monitor; +pub mod notify; +pub mod policy; +pub mod sysinfo; +mod workspace; + +pub use executor::{Event, Executor, ExecutorError, SandboxId}; +pub use monitor::{Output, Status}; diff --git a/crates/evalbox-sandbox/src/sys/windows/monitor.rs b/crates/evalbox-sandbox/src/sys/windows/monitor.rs new file mode 100644 index 0000000..5152948 --- /dev/null +++ b/crates/evalbox-sandbox/src/sys/windows/monitor.rs @@ -0,0 +1,43 @@ +//! Process monitoring for Windows. +//! +//! ## Monitoring Strategy +//! +//! - **Process handle** from `CreateProcessW` → `WaitForSingleObject` +//! - **Stdio pipes** from `CreatePipe` → `ReadFile` (async via IOCP) +//! - **Timeout** → `WaitForMultipleObjects` with timeout parameter +//! - **Output limit** → Track bytes read, `TerminateProcess` if exceeded +//! +//! ## Linux ↔ Windows Mapping +//! +//! | Linux `monitor` | Windows `monitor` | +//! |-----------------|-------------------| +//! | `poll(pidfd, stdout, stderr)` | `WaitForMultipleObjects(process, stdout, stderr)` | +//! | `waitid(P_PIDFD, WEXITED)` | `GetExitCodeProcess` | +//! | `SIGKILL` on timeout | `TerminateProcess` on timeout | +//! | `O_NONBLOCK` + `read()` | Overlapped I/O + `ReadFile` | +//! +//! **Status**: Stub — not yet implemented. + +use std::time::Duration; + +/// Output from a sandboxed execution. +#[must_use] +#[derive(Debug, Clone)] +pub struct Output { + pub stdout: Vec, + pub stderr: Vec, + pub status: Status, + pub duration: Duration, + pub exit_code: Option, + pub signal: Option, +} + +/// Status of the sandboxed execution. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[non_exhaustive] +pub enum Status { + Exited, + Signaled, + Timeout, + OutputLimitExceeded, +} diff --git a/crates/evalbox-sandbox/src/sys/windows/notify/etw.rs b/crates/evalbox-sandbox/src/sys/windows/notify/etw.rs new file mode 100644 index 0000000..2120b55 --- /dev/null +++ b/crates/evalbox-sandbox/src/sys/windows/notify/etw.rs @@ -0,0 +1,83 @@ +//! ETW (Event Tracing for Windows) consumer for syscall monitoring. +//! +//! Equivalent of Linux `supervisor.rs` (seccomp user notify handler). +//! +//! ## How It Works +//! +//! 1. `StartTraceW` — Create a real-time ETW session +//! 2. `EnableTraceEx2` — Enable providers: +//! - `Microsoft-Windows-Kernel-File` — File I/O events +//! - `Microsoft-Windows-Kernel-Process` — Process/thread events +//! - `Microsoft-Windows-Kernel-Network` — Network events +//! 3. `OpenTrace` + `ProcessTrace` — Consume events in a callback +//! 4. Event callback emits `NotifyEvent` for each observed operation +//! +//! ## Modes +//! +//! - **Monitor** (`NotifyMode::Monitor`): +//! Log syscall-equivalent events and emit `NotifyEvent`. Cannot block. +//! ETW is asynchronous — events arrive after the operation completes. +//! +//! - **Virtualize** (`NotifyMode::Virtualize`): +//! Not supported via ETW. On Windows, filesystem virtualization is handled +//! natively by AppContainer (redirects writes to per-app storage). +//! +//! ## Linux ↔ Windows Mapping +//! +//! | Linux `supervisor` | Windows `etw` | +//! |-------------------|---------------| +//! | `notif_recv` (seccomp) | ETW event callback | +//! | `SECCOMP_USER_NOTIF_FLAG_CONTINUE` | N/A (events are post-hoc) | +//! | `SECCOMP_IOCTL_NOTIF_ADDFD` | N/A (AppContainer handles FS) | +//! | `Supervisor::handle_event()` | `EtwConsumer::process_event()` | +//! | `/proc/pid/mem` read | N/A (no memory inspection needed) | +//! +//! ## Key Win32 Functions +//! +//! - `StartTraceW` — Start tracing session +//! - `EnableTraceEx2` — Enable provider with filter +//! - `OpenTraceW` — Open trace for consumption +//! - `ProcessTrace` — Blocking event loop (runs in dedicated thread) +//! - `StopTraceW` — Stop tracing session +//! - `CloseTrace` — Close trace handle +//! +//! **Status**: Stub — not yet implemented. + +use crate::plan::NotifyMode; + +/// Events emitted by the ETW consumer. +/// +/// Equivalent of Linux `NotifyEvent` from `supervisor.rs`. +#[derive(Debug)] +pub struct NotifyEvent { + /// Process ID that triggered the event. + pub pid: u32, + /// Thread ID. + pub tid: u32, + /// Event type (file open, process create, network connect, etc.). + pub event_type: EventType, +} + +/// Types of monitored events. +#[derive(Debug, Clone, Copy)] +pub enum EventType { + /// File operation (open, read, write, delete). + FileIo, + /// Process creation or termination. + Process, + /// Network connection attempt. + Network, +} + +/// ETW event consumer. +/// +/// Equivalent of Linux `Supervisor` from `supervisor.rs`. +pub struct EtwConsumer { + _mode: NotifyMode, +} + +impl EtwConsumer { + pub fn new(mode: NotifyMode) -> Self { + Self { _mode: mode } + } +} diff --git a/crates/evalbox-sandbox/src/sys/windows/notify/handle.rs b/crates/evalbox-sandbox/src/sys/windows/notify/handle.rs new file mode 100644 index 0000000..ece3075 --- /dev/null +++ b/crates/evalbox-sandbox/src/sys/windows/notify/handle.rs @@ -0,0 +1,31 @@ +//! Handle passing between processes on Windows. +//! +//! Equivalent of Linux `scm_rights.rs` (SCM_RIGHTS fd passing over unix socket). +//! +//! On Linux, file descriptors are passed between parent and child via +//! `SCM_RIGHTS` over an `AF_UNIX` socketpair. On Windows, handles are +//! duplicated between processes via `DuplicateHandle`. +//! +//! ## Linux ↔ Windows Mapping +//! +//! | Linux `scm_rights` | Windows `handle` | +//! |-------------------|-----------------| +//! | `socketpair(AF_UNIX)` | Inheritable handle via `SECURITY_ATTRIBUTES` | +//! | `send_fd(socket, fd)` | `DuplicateHandle(src_process, src_handle, dst_process, ...)` | +//! | `recv_fd(socket)` | Handle already in target (via inheritance or `DuplicateHandle`) | +//! | `SCM_RIGHTS` cmsg | `DUPLICATE_SAME_ACCESS` flag | +//! +//! ## Key Win32 Functions +//! +//! - `DuplicateHandle` — Copy handle from one process to another +//! - `SECURITY_ATTRIBUTES.bInheritHandle` — Allow child to inherit handle +//! - `SetHandleInformation(HANDLE_FLAG_INHERIT)` — Toggle inheritability +//! +//! ## Notes +//! +//! Windows handle inheritance is simpler than Linux fd passing: +//! - Inheritable handles are automatically available in child after `CreateProcessW` +//! - `PROC_THREAD_ATTRIBUTE_HANDLE_LIST` restricts which handles are inherited +//! - No socket/cmsg dance needed — just mark handles as inheritable +//! +//! **Status**: Stub — not yet implemented. diff --git a/crates/evalbox-sandbox/src/sys/windows/notify/mod.rs b/crates/evalbox-sandbox/src/sys/windows/notify/mod.rs new file mode 100644 index 0000000..3125db2 --- /dev/null +++ b/crates/evalbox-sandbox/src/sys/windows/notify/mod.rs @@ -0,0 +1,32 @@ +//! Process monitoring and event tracing for Windows. +//! +//! On Linux, seccomp user notify allows the parent to intercept and handle +//! child syscalls synchronously (Monitor + Virtualize modes). Windows has +//! no unprivileged equivalent for syscall interception. +//! +//! ## Available Mechanisms +//! +//! - **ETW (Event Tracing for Windows)** — Asynchronous event monitoring +//! - `StartTraceW` + `EnableTraceEx2` + `ProcessTrace` +//! - Can observe file operations, process creation, network activity +//! - **Cannot block or modify syscalls** (observation only) +//! - Maps to `NotifyMode::Monitor` +//! +//! - **Minifilter Drivers** — Synchronous FS interception (kernel mode) +//! - Requires signed driver, not usable from userspace +//! - Would map to `NotifyMode::Virtualize` but **not feasible unprivileged** +//! +//! ## Linux ↔ Windows Mapping +//! +//! | Linux `notify` | Windows `notify` | +//! |----------------|------------------| +//! | `NotifyMode::Disabled` | No ETW session | +//! | `NotifyMode::Monitor` | ETW session with file/process providers | +//! | `NotifyMode::Virtualize` | AppContainer handles FS isolation natively | +//! | `scm_rights` (fd passing) | `handle` (DuplicateHandle) | +//! | `supervisor.rs` | `etw.rs` (event consumer) | + +pub mod etw; +pub mod handle; + +pub use etw::{EtwConsumer, NotifyEvent}; diff --git a/crates/evalbox-sandbox/src/sys/windows/policy.rs b/crates/evalbox-sandbox/src/sys/windows/policy.rs new file mode 100644 index 0000000..eff84dd --- /dev/null +++ b/crates/evalbox-sandbox/src/sys/windows/policy.rs @@ -0,0 +1,143 @@ +//! Security policy: Plan → Win32 isolation primitives. +//! +//! Translates the high-level [`Plan`] into concrete Windows security primitives +//! (Job Object limits, AppContainer capabilities, token restrictions). +//! Pure computation, no side effects. +//! +//! ## Output Structures +//! +//! - [`JobObjectConfig`] — Memory, CPU, process limits for `SetInformationJobObject` +//! - [`AppContainerConfig`] — Capability SIDs for `SECURITY_CAPABILITIES` +//! - [`TokenConfig`] — Integrity level and privilege restrictions +//! - [`CompiledPlan`] — All of the above combined +//! +//! **Status**: Stub — not yet implemented. + +use std::path::{Path, PathBuf}; + +use crate::plan::{NotifyMode, Plan}; + +/// Compiled Job Object limits. +/// +/// Maps to `JOBOBJECT_EXTENDED_LIMIT_INFORMATION`: +/// - `ProcessMemoryLimit` ← `memory` +/// - `PerProcessUserTimeLimit` ← `cpu_seconds * 10_000_000` (100ns units) +/// - `ActiveProcessLimit` ← `max_pids` +pub struct JobObjectConfig { + pub memory: u64, + pub cpu_seconds: u64, + pub max_pids: u32, + pub max_output: u64, +} + +/// Compiled AppContainer configuration. +/// +/// Maps to `SECURITY_CAPABILITIES` passed via +/// `PROC_THREAD_ATTRIBUTE_SECURITY_CAPABILITIES`. +pub struct AppContainerConfig { + /// Unique profile name for `CreateAppContainerProfile`. + pub profile_name: String, + /// Readable paths (granted via capability SIDs or object ACLs). + pub read_paths: Vec, + /// Writable paths. + pub write_paths: Vec, + /// Allow network access (grants `internetClient` capability). + pub network_allowed: bool, +} + +/// Compiled token restriction configuration. +/// +/// Applied via `CreateRestrictedToken` + `SetTokenInformation(TokenIntegrityLevel)`. +pub struct TokenConfig { + /// Use `SECURITY_MANDATORY_UNTRUSTED_RID` (0) for maximum restriction, + /// or `SECURITY_MANDATORY_LOW_RID` (4096) for compatibility. + pub untrusted: bool, + /// Strip all privileges from the token. + pub disable_all_privileges: bool, +} + +/// Full compilation result for Windows. +pub struct CompiledPlan { + pub job_object: JobObjectConfig, + pub app_container: AppContainerConfig, + pub token: TokenConfig, + pub notify_mode: NotifyMode, +} + +/// Compile a Plan into Win32 isolation primitives. +pub fn compile(plan: &Plan, workspace_root: &Path) -> CompiledPlan { + CompiledPlan { + job_object: compile_job_object(plan), + app_container: compile_app_container(plan, workspace_root), + token: compile_token(plan), + notify_mode: plan.notify_mode, + } +} + +fn compile_job_object(plan: &Plan) -> JobObjectConfig { + JobObjectConfig { + memory: plan.memory_limit, + cpu_seconds: plan.timeout.as_secs().saturating_mul(2).saturating_add(60), + max_pids: plan.max_pids, + max_output: plan.max_output, + } +} + +fn compile_app_container(plan: &Plan, workspace_root: &Path) -> AppContainerConfig { + let read_paths = plan + .mounts + .iter() + .filter(|m| !m.writable) + .map(|m| m.source.clone()) + .collect(); + + let write_paths = vec![ + workspace_root.join("work"), + workspace_root.join("tmp"), + workspace_root.join("home"), + ]; + + AppContainerConfig { + profile_name: format!("evalbox-{}", std::process::id()), + read_paths, + write_paths, + network_allowed: !plan.network_blocked, + } +} + +fn compile_token(_plan: &Plan) -> TokenConfig { + TokenConfig { + untrusted: true, + disable_all_privileges: true, + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn compile_job_object_defaults() { + let plan = Plan::new(["cmd.exe", "/c", "echo hello"]); + let config = compile_job_object(&plan); + assert_eq!(config.memory, 256 * 1024 * 1024); + assert_eq!(config.max_pids, 64); + } + + #[test] + fn compile_app_container_paths() { + let plan = Plan::new(["cmd.exe"]); + let config = compile_app_container(&plan, Path::new("C:\\Temp\\ws")); + assert_eq!(config.write_paths.len(), 3); + assert!(!config.network_allowed); + } + + #[test] + fn compile_full() { + let plan = Plan::new(["cmd.exe"]); + let compiled = compile(&plan, Path::new("C:\\Temp\\ws")); + assert!(compiled.token.untrusted); + assert!(compiled.token.disable_all_privileges); + assert_eq!(compiled.notify_mode, NotifyMode::Disabled); + } +} diff --git a/crates/evalbox-sandbox/src/sys/windows/sysinfo.rs b/crates/evalbox-sandbox/src/sys/windows/sysinfo.rs new file mode 100644 index 0000000..4a3fe30 --- /dev/null +++ b/crates/evalbox-sandbox/src/sys/windows/sysinfo.rs @@ -0,0 +1,144 @@ +//! System detection for Windows. +//! +//! Detects the Windows environment and provides system paths for +//! sandbox configuration. Mirrors the Linux `sysinfo` API so that +//! platform-agnostic code (`resolve.rs`) can use the same types. +//! +//! ## Detection +//! +//! - **Windows version** — Feature availability depends on OS build: +//! - AppContainer: Windows 8+ / Server 2012+ +//! - Process mitigation policies: Windows 10+ +//! - Job Object CPU rate control: Windows 8+ +//! +//! - **Environment** — Detect WSL, MSYS2, standard: +//! - `WSL_DISTRO_NAME` env var → running under WSL +//! - `MSYSTEM` env var → running under MSYS2/Git Bash +//! +//! ## Linux <-> Windows Mapping +//! +//! | Linux `sysinfo` | Windows `sysinfo` | +//! |-----------------|-------------------| +//! | `SystemType::NixOS` / `Fhs` / `Guix` | `SystemType::Standard` / `Msys2` / `Wsl` | +//! | `SYSTEM_PATHS` (LazyLock) | `SYSTEM_PATHS` (LazyLock) | +//! | `/usr`, `/bin`, `/lib` | `System32`, `ProgramFiles` | +//! | `readonly_mounts` | `readonly_mounts` (read-only system dirs) | +//! +//! **Status**: Stub — not yet implemented. + +use std::path::{Path, PathBuf}; +use std::sync::LazyLock; + +pub static SYSTEM_PATHS: LazyLock = LazyLock::new(SystemPaths::detect); + +/// Detected Windows system type. +/// +/// Windows doesn't have FHS/NixOS/Guix, but `resolve.rs` checks for +/// `SystemType::Fhs`. We include it as a variant that is never returned +/// on Windows, so the shared code compiles without `cfg` blocks. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum SystemType { + /// Standard Windows installation. + Standard, + /// Running under MSYS2/Git Bash. + Msys2, + /// Running under WSL (Windows Subsystem for Linux). + Wsl, + /// FHS (Linux only) — never returned on Windows, exists for API compatibility + /// with `resolve.rs`. + Fhs, +} + +/// Detected system paths relevant to sandbox configuration. +/// +/// Mirrors the Linux `SystemPaths` struct so `resolve.rs` can use the +/// same field names on both platforms. +#[derive(Debug, Clone)] +pub struct SystemPaths { + pub system_type: SystemType, + /// Read-only system directories (equivalent to Linux `/usr`, `/bin`, etc.). + /// On Windows: `System32`, `ProgramFiles`. + pub readonly_mounts: Vec, + /// Default PATH for sandboxed processes. + pub default_path: String, +} + +impl SystemType { + pub fn detect() -> Self { + if std::env::var("WSL_DISTRO_NAME").is_ok() { + SystemType::Wsl + } else if std::env::var("MSYSTEM").is_ok() { + SystemType::Msys2 + } else { + SystemType::Standard + } + } +} + +impl SystemPaths { + pub fn detect() -> Self { + let system_type = SystemType::detect(); + + let system32 = std::env::var("SystemRoot") + .map(|r| PathBuf::from(r).join("System32")) + .unwrap_or_else(|_| PathBuf::from(r"C:\Windows\System32")); + + let program_files = std::env::var("ProgramFiles") + .map(PathBuf::from) + .unwrap_or_else(|_| PathBuf::from(r"C:\Program Files")); + + let mut readonly_mounts = Vec::new(); + if system32.exists() { + readonly_mounts.push(system32); + } + if program_files.exists() { + readonly_mounts.push(program_files); + } + + let default_path = std::env::var("PATH").unwrap_or_default(); + + Self { + system_type, + readonly_mounts, + default_path, + } + } +} + +pub fn is_nix_store_path(_path: &Path) -> bool { + false +} + +pub fn is_guix_store_path(_path: &Path) -> bool { + false +} + +pub fn get_store_path(_path: &Path) -> Option { + None +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_system_type_detect() { + let system_type = SystemType::detect(); + assert!(matches!( + system_type, + SystemType::Standard | SystemType::Msys2 | SystemType::Wsl + )); + } + + #[test] + fn test_system_paths_detect() { + let paths = SystemPaths::detect(); + assert!(!paths.default_path.is_empty()); + } + + #[test] + fn test_nix_paths_always_false() { + assert!(!is_nix_store_path(Path::new(r"C:\Windows\System32"))); + assert!(get_store_path(Path::new(r"C:\Windows")).is_none()); + } +} diff --git a/crates/evalbox-sandbox/src/sys/windows/workspace.rs b/crates/evalbox-sandbox/src/sys/windows/workspace.rs new file mode 100644 index 0000000..2ebf744 --- /dev/null +++ b/crates/evalbox-sandbox/src/sys/windows/workspace.rs @@ -0,0 +1,14 @@ +//! Workspace management for Windows. +//! +//! Creates temporary directories and stdio pipes for sandboxed processes. +//! +//! ## Linux ↔ Windows Mapping +//! +//! | Linux `workspace` | Windows `workspace` | +//! |-------------------|---------------------| +//! | `pipe2(O_CLOEXEC)` | `CreatePipe` with `SECURITY_ATTRIBUTES` | +//! | `eventfd` for sync | `CreateEventW` for sync | +//! | `tempdir` in `/tmp` | `tempdir` in `%TEMP%` | +//! | `dup2` for stdio | `SetStdHandle` / `STARTUPINFO.hStd*` | +//! +//! **Status**: Stub — not yet implemented. diff --git a/crates/evalbox-sandbox/src/notify/virtual_fs.rs b/crates/evalbox-sandbox/src/virtual_fs.rs similarity index 100% rename from crates/evalbox-sandbox/src/notify/virtual_fs.rs rename to crates/evalbox-sandbox/src/virtual_fs.rs diff --git a/crates/evalbox-sys/src/seccomp.rs b/crates/evalbox-sys/src/seccomp.rs index ef16ffd..7f10e71 100644 --- a/crates/evalbox-sys/src/seccomp.rs +++ b/crates/evalbox-sys/src/seccomp.rs @@ -93,6 +93,18 @@ const AUDIT_ARCH: u32 = 0xc000003e; // AUDIT_ARCH #[cfg(target_arch = "aarch64")] const AUDIT_ARCH: u32 = 0xc00000b7; // AUDIT_ARCH_AARCH64 +// Syscall numbers missing from the `libc` crate on aarch64. +// +// The kernel defines these via __NR3264_* macros in asm-generic/unistd.h +// and they resolve to __NR_sendfile (71) and __NR_fadvise64 (223) on +// 64-bit architectures. The Rust `libc` crate skips them for aarch64-gnu. +// See: https://github.com/torvalds/linux/blob/master/include/uapi/asm-generic/unistd.h +#[cfg(target_arch = "aarch64")] +mod nr { + pub const SYS_SENDFILE: i64 = 71; + pub const SYS_FADVISE64: i64 = 223; +} + // seccomp_data offsets (same layout on x86_64 and aarch64) const OFFSET_SYSCALL_NR: u32 = 0; const OFFSET_ARCH: u32 = 4; @@ -219,12 +231,18 @@ const BASE_WHITELIST: &[i64] = &[ libc::SYS_fsync, libc::SYS_fdatasync, libc::SYS_ftruncate, + #[cfg(target_arch = "x86_64")] libc::SYS_fadvise64, + #[cfg(target_arch = "aarch64")] + nr::SYS_FADVISE64, libc::SYS_pipe2, libc::SYS_ppoll, libc::SYS_pselect6, // Efficient file operations (Python/Node use these) + #[cfg(target_arch = "x86_64")] libc::SYS_sendfile, + #[cfg(target_arch = "aarch64")] + nr::SYS_SENDFILE, libc::SYS_copy_file_range, libc::SYS_splice, libc::SYS_tee, @@ -786,7 +804,10 @@ mod tests { fn safe_syscalls_present() { let wl = default_whitelist(); assert!(wl.contains(&libc::SYS_execve)); + #[cfg(target_arch = "x86_64")] assert!(wl.contains(&libc::SYS_sendfile)); + #[cfg(target_arch = "aarch64")] + assert!(wl.contains(&nr::SYS_SENDFILE)); assert!(wl.contains(&libc::SYS_close_range)); #[cfg(target_arch = "x86_64")] { diff --git a/crates/evalbox/Cargo.toml b/crates/evalbox/Cargo.toml index 4a30f53..9dcbb95 100644 --- a/crates/evalbox/Cargo.toml +++ b/crates/evalbox/Cargo.toml @@ -29,9 +29,11 @@ default = ["python", "go", "shell"] shell = [] # Shell is always available (no extra deps) python = ["dep:goblin", "dep:memmap2", "dep:walkdir", "dep:dashmap", "dep:serde", "dep:serde_json"] go = ["dep:tempfile", "dep:regex"] -node = ["dep:serde", "dep:serde_json", "dep:tempfile"] -rust-lang = ["dep:tempfile", "dep:serde", "dep:serde_json"] testing = [] # Exposes clear_cache() for test isolation +[package.metadata.docs.rs] +all-features = true +rustdoc-args = ["--cfg", "docsrs"] + [lints] workspace = true diff --git a/crates/evalbox/src/lib.rs b/crates/evalbox/src/lib.rs index a6f6d6f..5ab4789 100644 --- a/crates/evalbox/src/lib.rs +++ b/crates/evalbox/src/lib.rs @@ -77,12 +77,15 @@ mod probe_cache; // Runtime implementations #[cfg(feature = "python")] +#[cfg_attr(docsrs, doc(cfg(feature = "python")))] pub mod python; #[cfg(feature = "go")] +#[cfg_attr(docsrs, doc(cfg(feature = "go")))] pub mod go; #[cfg(feature = "shell")] +#[cfg_attr(docsrs, doc(cfg(feature = "shell")))] pub mod shell; // Public API - Core types diff --git a/deny.toml b/deny.toml index 0bf7a56..7c4c1ae 100644 --- a/deny.toml +++ b/deny.toml @@ -1,5 +1,10 @@ [graph] -targets = [{ triple = "x86_64-unknown-linux-gnu" }] +targets = [ + { triple = "x86_64-unknown-linux-gnu" }, + { triple = "aarch64-unknown-linux-gnu" }, + { triple = "x86_64-pc-windows-msvc" }, + { triple = "aarch64-pc-windows-msvc" }, +] all-features = true [advisories] diff --git a/docs/refactor-multiplatform.md b/docs/refactor-multiplatform.md new file mode 100644 index 0000000..7f0fcab --- /dev/null +++ b/docs/refactor-multiplatform.md @@ -0,0 +1,2158 @@ +# evalbox Multi-Platform Refactor Study + +## References Studied + +| Project | Pattern | Relevance | +|---------|---------|-----------| +| **mio** (tokio) | `sys/unix/selector/{epoll,kqueue}.rs` + `sys/windows/` | Event loop abstraction (epoll vs IOCP) | +| **wgpu** | HAL crate with feature-gated backends (`vulkan/`, `metal/`, `dx12/`) | Multiple backend implementations behind traits | +| **crosvm** | `sys.rs` re-export + `sys/{linux,windows}/` modules | Gold standard for cfg organization | +| **alacritty** | `tty/{mod,unix,windows}.rs` | PTY abstraction per platform | +| **cap-std** | `cap-primitives` crate as abstraction layer | Layered sandbox primitives | +| **nix** | Module-per-header, cfg aliases in `build.rs` | Unix syscall bindings organization | +| **windows-rs** | `windows-sys` (raw FFI) vs `windows` (safe wrappers) | Windows API access patterns | +| **stdbr** | `core/` + `bindings/{python,nodejs,wasm,ffi-c}/` | Language bindings structure | + +--- + +## Recommended Pattern: crosvm `sys/` model + +The crosvm pattern is the cleanest fit for evalbox because: +1. evalbox has exactly 2 platform targets (Linux, Windows) -- same as crosvm +2. The platform code is concentrated in specific modules (executor, isolation, monitor) +3. It avoids trait boilerplate when the abstraction is simple type aliasing + +### How it works + +```rust +// src/sys.rs -- the re-export hub +cfg_if::cfg_if! { + if #[cfg(target_os = "linux")] { + mod linux; + pub use linux::*; + } else if #[cfg(target_os = "windows")] { + mod windows; + pub use windows::*; + } +} +``` + +Each platform module exports the **same public types/functions** with the same signatures, so the rest of the crate uses them transparently. + +--- + +## Proposed Folder Structure + +``` +crates/ +├── evalbox/ # Public API (unchanged) +│ src/ +│ ├── lib.rs # python/go/shell modules, re-exports +│ ├── session.rs # Session wraps Executor (unchanged) +│ ├── output.rs +│ ├── error.rs +│ ├── detect.rs +│ ├── probe.rs +│ ├── python/ +│ ├── go/ +│ └── shell/ +│ +├── evalbox-sandbox/ # Orchestration +│ src/ +│ ├── lib.rs # pub mod declarations +│ ├── plan.rs # Platform-agnostic Plan builder +│ ├── validate.rs # Platform-agnostic validation +│ ├── resolve.rs # Binary resolution (cross-platform) +│ ├── sysinfo.rs # System info (cross-platform) +│ ├── sys.rs # <<< RE-EXPORT HUB (crosvm pattern) +│ ├── sys/ +│ │ ├── linux/ +│ │ │ ├── mod.rs # pub use of all Linux types +│ │ │ ├── executor.rs # fork/exec, pidfd, mio event loop +│ │ │ ├── monitor.rs # pipe read, waitpid, output collection +│ │ │ ├── workspace.rs # Unix pipes, eventfd, SyncPair, Workspace +│ │ │ └── lockdown.rs # Landlock + seccomp + rlimits + caps +│ │ └── windows/ +│ │ ├── mod.rs # pub use of all Windows types +│ │ ├── executor.rs # CreateProcess, IOCP event loop +│ │ ├── monitor.rs # Named pipe read, WaitForSingleObject +│ │ ├── workspace.rs # Named pipes, Event objects, temp dir + DACLs +│ │ └── lockdown.rs # Job Objects + Restricted Tokens + Integrity Levels +│ └── isolation/ # REMOVED (logic moved to sys/linux/) +│ +├── evalbox-sys/ # Linux syscall wrappers (unchanged) +│ src/ +│ ├── lib.rs +│ ├── seccomp.rs # BPF filter generation (826 lines) +│ ├── seccomp_notify.rs +│ ├── landlock.rs # Landlock v5 bindings +│ └── check.rs +│ +├── evalbox-win32/ # NEW: Windows API wrappers +│ src/ +│ ├── lib.rs +│ ├── job_object.rs # Create, set limits, assign, completion port +│ ├── token.rs # Restricted tokens, integrity levels, SIDs +│ ├── process.rs # CreateProcessAsUserW, mitigation policies +│ └── acl.rs # DACL/SACL for workspace directory isolation +│ +├── evalbox-mods/ # NEW: Mod provisioning system +│ src/ +│ ├── lib.rs # Mod trait + registry +│ ├── manifest.rs # Parse/write manifest.toml +│ ├── download.rs # HTTP fetch + sha256 verify +│ ├── extract.rs # tar.gz / tar.zst / zip +│ ├── python.rs # Python mod (python-build-standalone) +│ └── sqlite.rs # SQLite mod (static binary) +│ +└── bindings/ + ├── python/ # PyO3 + │ ├── Cargo.toml + │ ├── pyproject.toml + │ └── src/ + │ ├── lib.rs # #[pymodule] fn evalbox(...) + │ ├── session.rs # PySession + │ └── output.rs # PyOutput + ├── nodejs/ # napi-rs + │ ├── Cargo.toml + │ ├── build.rs + │ ├── npm/ # Prebuilt platform binaries + │ │ ├── linux-x64-gnu/ + │ │ ├── linux-arm64-gnu/ + │ │ └── win32-x64-msvc/ + │ └── src/ + │ ├── lib.rs + │ ├── session.rs + │ └── output.rs + └── ffi-c/ # cbindgen + ├── Cargo.toml + ├── cbindgen.toml + └── src/ + ├── lib.rs # extern "C" functions + └── types.rs # repr(C) output structs +``` + +--- + +## Key Design Decisions + +### 1. Why `sys/` folder instead of traits + +Traits add indirection and boilerplate. Since evalbox compiles for exactly one platform at a time (you can't sandbox Linux from Windows), the crosvm `sys.rs` re-export pattern is simpler: + +```rust +// crates/evalbox-sandbox/src/sys.rs +cfg_if::cfg_if! { + if #[cfg(target_os = "linux")] { + #[path = "sys/linux/mod.rs"] + mod platform; + } else if #[cfg(target_os = "windows")] { + #[path = "sys/windows/mod.rs"] + mod platform; + } else { + compile_error!("evalbox supports only Linux and Windows"); + } +} + +pub use platform::*; +``` + +Both `sys/linux/mod.rs` and `sys/windows/mod.rs` export: +```rust +pub struct Executor { ... } +pub struct Workspace { ... } +pub struct Output { ... } + +impl Executor { + pub fn new() -> io::Result; + pub fn spawn(&mut self, plan: Plan) -> Result; + pub fn run(plan: Plan) -> Result; + pub fn poll(&mut self, events: &mut Vec, timeout: Option) -> io::Result<()>; + pub fn kill(&mut self, id: SandboxId) -> io::Result<()>; + pub fn write_stdin(&mut self, id: SandboxId, data: &[u8]) -> io::Result; + pub fn close_stdin(&mut self, id: SandboxId) -> io::Result<()>; + pub fn active_count(&self) -> usize; +} +``` + +The public API is **structural** -- same function signatures, same types returned. No trait needed. + +### 2. Why separate `evalbox-win32` crate instead of extending `evalbox-sys` + +| Criterion | evalbox-sys | evalbox-win32 | +|-----------|-------------|---------------| +| Target OS | Linux only | Windows only | +| Content | Landlock, seccomp, BPF (826 lines) | Job Objects, Tokens, CreateProcess | +| Dependencies | libc, rustix | windows-sys | +| Compiled | Only on Linux | Only on Windows | + +Mixing them would create a crate that never fully compiles on any platform. Separate crates = clean `target` gates in workspace Cargo.toml. + +### 3. `notify/` -- Linux vs Windows filesystem interception + +#### O que notify/ faz (Linux) + +`notify/` implementa **seccomp user notification** -- interceptacao de syscalls do child pelo parent: + +``` +Parent (broker) Child (sandboxed) +═══════════════ ═════════════════ +create_socketpair() +fork() + Instala BPF filter com SECCOMP_FILTER_FLAG_NEW_LISTENER + send_fd(listener_fd) via SCM_RIGHTS +recv_fd(listener_fd) + child chama openat("/work/file.txt") + → kernel PAUSA o child +notif_recv(listener_fd) ← kernel notifica parent +read /proc/pid/mem → extrai path +notif_id_valid() → TOCTOU check +vfs.translate("/work/file.txt") → "/tmp/sandbox/work/file.txt" +open("/tmp/sandbox/work/file.txt") +notif_addfd() → injeta fd no child + → child retoma com fd valido +``` + +**Arquivos:** +- `scm_rights.rs` (5.1 KB) -- passa fd via Unix socket (`sendmsg`/`recvmsg` + `SCM_RIGHTS`) +- `supervisor.rs` (10.1 KB) -- loop de notificacao (ioctls: `NOTIF_RECV`, `NOTIF_SEND`, `NOTIF_ADDFD`, `NOTIF_ID_VALID`) +- `virtual_fs.rs` (4.5 KB) -- traducao de paths (virtual → real) + +**Syscalls interceptados:** +- Modernos (ARM64 + x86_64): `openat`, `faccessat`, `faccessat2`, `newfstatat`, `statx`, `readlinkat` +- Legacy (x86_64 only): `open`, `creat`, `access`, `stat`, `lstat`, `readlink` + +**Status atual:** Infraestrutura completa mas **NAO integrada** no event loop (notify_fd e armazenado mas nao polled). + +**APIs Linux requeridas:** +- Seccomp User Notify (kernel 5.0+) +- `/proc/pid/mem` (leitura de memoria do child) +- Unix sockets com SCM_RIGHTS +- `SECCOMP_IOCTL_NOTIF_*` ioctls + +#### Windows: Abordagens Reais + +**NAO existe equivalente direto** de seccomp notify no Windows. Porem existem mecanismos producao-proven: + +##### Chromium Broker Pattern (RECOMENDADO) + +O que Chrome/Edge/Firefox fazem com bilhoes de instalacoes: + +``` +Parent/Broker Child (sandboxed) +═══════════════ ═════════════════ +CreateRestrictedToken() + → remove privileges, deny SIDs + → integrity level: Untrusted +CreateProcessAsUser(restricted_token) + → child spawna com token restrito +WriteProcessMemory() + → injeta hooks em ntdll.dll do child + ntdll!NtCreateFile("file.txt") + → hook desvia pra IPC + SharedMemory → envia request pro broker +Broker recebe via shared memory +Avalia policy +NtCreateFile() no contexto do broker +DuplicateHandle() → injeta handle no child + Child recebe handle, continua execucao +``` + +| Aspecto | Detalhe | +|---------|---------| +| Requer kernel driver? | **Nao** | +| Requer admin? | **Nao** | +| Robusto? | Sim -- seguranca vem do restricted token (kernel-enforced), nao dos hooks | +| Producao? | Chrome, Edge, Firefox, Adobe Reader | +| Bypassavel? | Hooks sim, mas token bloqueia acesso direto de qualquer forma | +| Rust? | `windows-rs` + `retour` crate (inline hooking) | + +**Por que funciona:** Mesmo que o child bypasse os hooks e chame `NtCreateFile` diretamente, o kernel nega porque o token restrito nao tem permissao. Os hooks sao para **compatibilidade** (redirecionamento), nao seguranca. + +##### ProjFS (Projected File System) + +Minifilter driver **built into Windows** (desde 1809): + +```rust +// Provider registra diretorio virtual +PrjStartVirtualizing("C:\\sandbox\\root", &callbacks)?; + +// Qualquer processo que acessa C:\sandbox\root\* dispara callbacks +fn get_file_data_cb(path: &str) -> Vec { + // Retorna conteudo do arquivo virtualizado +} +``` + +| Aspecto | Detalhe | +|---------|---------| +| Requer kernel driver? | **Nao** (PrjFlt.sys ja vem no Windows) | +| Requer admin? | Sim para habilitar feature (one-time). Nao para usar | +| Robusto? | Sim -- interceptacao no nivel kernel (minifilter) | +| Producao? | VFS for Git (Microsoft), backup tools | +| Limitacao | So funciona para diretorio especifico, nao intercepta globalmente | + +**Uso no evalbox:** Combinar com restricted token que so permite acesso ao root ProjFS. Child ve filesystem virtual, nao pode sair. + +##### Sandboxie (referencia, requer driver) + +Kernel driver (`SbieDrv.sys`) + DLL injection (`SbieDll.dll`): +- Copy-on-write semantics: reads veem merged view, writes vao pra sandbox local +- 20+ anos de producao, open source GPLv3 +- **NAO viavel** pra library (requer instalacao de driver) + +#### Decisao de Arquitetura + +``` +┌────────────────────────────────────────────────────────┐ +│ evalbox-sandbox │ +├────────────────────────────────────────────────────────┤ +│ │ +│ Linux (notify/) Windows (broker/) │ +│ ════════════════ ══════════════════ │ +│ seccomp BPF filter Restricted Token │ +│ + SECCOMP_NOTIFY listener + Untrusted IL │ +│ + /proc/pid/mem read + ntdll inline hooks │ +│ + fd injection (ADDFD) + shared memory IPC │ +│ + VirtualFs paths + DuplicateHandle() │ +│ + VirtualFs paths │ +│ Seguranca: seccomp Seguranca: token │ +│ Virtualizacao: notify Virtualizacao: hooks+IPC │ +│ │ +└────────────────────────────────────────────────────────┘ +``` + +**Mapeamento funcional:** + +| Linux notify/ | Windows broker/ | Funcao | +|---------------|-----------------|--------| +| `scm_rights.rs` | N/A (nao precisa) | Passa fd entre processos | +| `supervisor.rs` | `broker.rs` | Loop de interceptacao | +| `virtual_fs.rs` | `virtual_fs.rs` (compartilhado!) | Traducao de paths | +| `NOTIF_RECV` ioctl | Shared memory read | Recebe request do child | +| `NOTIF_ADDFD` ioctl | `DuplicateHandle()` | Injeta handle no child | +| `/proc/pid/mem` | `ReadProcessMemory()` | Le args do child | +| `NOTIF_ID_VALID` | N/A | TOCTOU check (Linux-only) | + +**O que pode ser compartilhado cross-platform:** +- `VirtualFs` (path translation) -- puro Rust, zero syscalls +- Policy evaluation logic +- `NotifyMode` enum e tipos publicos + +**O que e 100% platform-specific:** +- IPC mechanism (Unix socket vs shared memory) +- Interceptacao (seccomp notify vs ntdll hooks) +- Handle injection (ADDFD vs DuplicateHandle) + +#### Folder structure final + +``` +sys/ +├── linux/ +│ └── notify/ +│ ├── mod.rs +│ ├── scm_rights.rs # Unix socket fd passing +│ ├── supervisor.rs # seccomp notify loop +│ └── virtual_fs.rs # → usa shared virtual_fs +└── windows/ + └── broker/ + ├── mod.rs + ├── hooks.rs # ntdll inline hooking (retour) + ├── ipc.rs # Shared memory IPC + ├── broker.rs # Request dispatch loop + └── virtual_fs.rs # → usa shared virtual_fs + +# Compartilhado (em src/ root): +src/ +├── virtual_fs.rs # Path translation (platform-agnostic) +└── ... +``` + +#### Rust crates necessarias (Windows) + +```toml +[target.'cfg(windows)'.dependencies] +windows-sys = { version = "0.59", features = [ + "Win32_Security", + "Win32_System_Threading", + "Win32_System_Memory", + "Win32_Foundation", +] } +retour = "0.4" # Inline function hooking (ntdll patching) +``` + +#### Gate + +```rust +// crates/evalbox-sandbox/src/sys.rs +cfg_if::cfg_if! { + if #[cfg(target_os = "linux")] { + #[path = "sys/linux/mod.rs"] + mod platform; + } else if #[cfg(target_os = "windows")] { + #[path = "sys/windows/mod.rs"] + mod platform; + } +} +pub use platform::*; +``` + +`notify/` no Linux e `broker/` no Windows -- nomes diferentes porque sao mecanismos fundamentalmente diferentes, mas servem o mesmo proposito: **filesystem virtualization for sandboxed processes**. + +### 4. Plan Unificado -- Intent Over Mechanism + +**Zero `#[cfg]` no Plan.** O usuario expressa INTENCAO, o backend traduz pra primitivas da plataforma. + +Inspirado em: Deno permissions, WASI capabilities, Flatpak permissions, Android permission groups, macOS Seatbelt operations. + +#### Principios + +1. **Deny by default** (Flatpak, macOS, WASI) -- tudo bloqueado, usuario concede explicitamente +2. **Intent, nao mecanismo** (Android, Deno) -- `network: None`, nao "block socket()" +3. **Capability grants** (WASI) -- filesystem = handles explicitos pra diretorios +4. **Deny vence allow** (Deno) -- conflitos sempre restringem +5. **Limites sao universais** (OCI, Job Objects) -- memory/time/pids funciona em todo OS +6. **Escape hatch existe** (OCI linux{}) -- platform-specific possivel mas separado + +#### Filesystem: Capability Grants + +```rust +/// Nivel de acesso a um path (inspirado em Flatpak :ro/:create, Deno --allow-read/write) +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum FsAccess { + Read, // le arquivos, lista dirs + ReadWrite, // le, escreve, cria + ReadExecute, // le e executa binarios (/usr/bin) + Full, // tudo +} + +/// Grant de filesystem (inspirado em WASI --dir=host::guest, Docker volumes) +#[derive(Debug, Clone)] +pub struct FsGrant { + pub host_path: Option, // None = guest_path e o mesmo + pub guest_path: PathBuf, // path visivel dentro do sandbox + pub access: FsAccess, +} + +impl FsGrant { + pub fn read(path: impl Into) -> Self { /* ... */ } + pub fn read_write(path: impl Into) -> Self { /* ... */ } + pub fn read_execute(path: impl Into) -> Self { /* ... */ } + /// Remap: host path aparece em guest path diferente + pub fn bind(host: impl Into, guest: impl Into, access: FsAccess) -> Self { /* ... */ } +} +``` + +**Mapping pra plataforma:** + +| FsGrant | Linux | Windows | +|---------|-------|---------| +| `Read` | Landlock `REFER+READ_FILE+READ_DIR` + bind-mount RO | ACL `GENERIC_READ` no path | +| `ReadWrite` | Landlock full + bind-mount RW | ACL `GENERIC_READ\|GENERIC_WRITE` | +| `ReadExecute` | Landlock + bind-mount RO + exec bit | ACL `GENERIC_READ\|GENERIC_EXECUTE` | +| `bind(host, guest)` | bind-mount host→guest | Junction/symlink + ACL | + +#### Network Policy + +```rust +/// Inspirado em: Deno --allow-net=host:port, Android INTERNET permission +#[derive(Debug, Clone, PartialEq, Eq, Default)] +pub enum NetworkPolicy { + #[default] + None, // Nada (default seguro) + Full, // Tudo liberado + AllowList(Vec), // So destinos especificos +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct NetTarget { + pub host: String, // "example.com", "*.api.com", "10.0.0.1" + pub port: Option, // None = todas portas + pub protocol: Option, +} +``` + +**Mapping:** + +| NetworkPolicy | Linux | Windows | +|---------------|-------|---------| +| `None` | seccomp block `socket()` + Landlock deny network | Restricted token sem network SID | +| `Full` | Nenhum filtro de rede | Token normal | +| `AllowList` | seccomp allow socket + eBPF/cgroup filter | AppContainer + network rules | + +#### Process Policy + +```rust +/// Inspirado em: Deno --allow-run, Chromium job objects +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum ProcessPolicy { + None, // Nao pode spawnar nada + Allow { max_pids: u32 }, // Pode, com limite + AllowList { executables: Vec, max_pids: u32 }, // So estes binarios +} +``` + +**Mapping:** + +| ProcessPolicy | Linux | Windows | +|---------------|-------|---------| +| `None` | seccomp block `clone`/`fork`/`execve` | Job `JOB_OBJECT_LIMIT_ACTIVE_PROCESS=1` | +| `Allow { max_pids: 64 }` | rlimit NPROC=64 | Job `ACTIVE_PROCESS=64` | +| `AllowList` | seccomp notify + whitelist check | Broker intercept + whitelist | + +#### Resource Limits (universal) + +```rust +/// Mapeia limpo em TODAS plataformas: +/// Linux: rlimits + cgroups v2 +/// Windows: Job Object limits +/// macOS: rlimits + Seatbelt +#[derive(Debug, Clone)] +pub struct ResourceLimits { + pub memory: u64, // bytes (default: 256 MiB) + pub timeout: Duration, // wall-clock (default: 30s) + pub max_output: u64, // stdout+stderr cap (default: 16 MiB) + pub max_disk_write: u64, // max bytes escritos (default: 64 MiB) + pub cpu_time: Option, // CPU time limit (None = same as timeout) +} +``` + +#### Syscall Categories (abstract) + +```rust +/// Categorias abstratas ao inves de numeros crus. +/// Inspirado em: macOS operations (file-read*, network*), Android permission groups. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum SyscallCategory { + Core, // Sempre permitido: brk, mmap(ANON), clock_gettime + FileIo, // open, read, write, stat -- escopo via FsGrant + Networking, // socket, connect, send, recv -- escopo via NetworkPolicy + ProcessMgmt, // fork, exec, wait -- escopo via ProcessPolicy + Signals, // sigaction, sigprocmask, kill(self) + Threading, // clone(THREAD), futex + Timers, // nanosleep, timer_create + MemoryMapping, // mmap(file), mprotect + SystemInfo, // uname, sysinfo (read-only) + Ipc, // pipes, unix sockets, shm +} + +#[derive(Debug, Clone)] +pub struct SyscallPolicy { + pub allowed: Vec, + /// Escape hatch: platform-specific. Quebra portabilidade. + pub platform_extras: PlatformSyscalls, +} + +/// Separado pra nao ter #[cfg] no Plan +#[derive(Debug, Clone, Default)] +pub struct PlatformSyscalls { + pub linux_allow: Vec, // libc::SYS_* (ignorado em outros OS) + pub linux_deny: Vec, +} +``` + +**Como o backend traduz categorias → syscalls:** + +```rust +// crates/evalbox-sandbox/src/sys/linux/compile.rs +fn category_to_syscalls(cat: SyscallCategory) -> &'static [i64] { + match cat { + SyscallCategory::Core => &[ + SYS_brk, SYS_mmap, SYS_munmap, SYS_mprotect, + SYS_clock_gettime, SYS_gettid, SYS_getpid, ... + ], + SyscallCategory::FileIo => &[ + SYS_openat, SYS_read, SYS_write, SYS_close, + SYS_fstat, SYS_lseek, SYS_getdents64, ... + ], + SyscallCategory::Networking => &[ + SYS_socket, SYS_connect, SYS_sendto, SYS_recvfrom, + SYS_bind, SYS_listen, SYS_accept4, ... + ], + // ... + } +} +``` + +#### Environment + +```rust +/// Inspirado em: Deno --allow-env, WASI explicit env passing +#[derive(Debug, Clone, Default)] +pub enum EnvPolicy { + /// So vars explicitamente setadas (seguro, default) + #[default] + Explicit(HashMap), + /// Herda vars especificas do host + overrides + Inherit { inherit: Vec, overrides: HashMap }, +} +``` + +#### Observation Mode + +```rust +/// Debug/virtualizacao. Inspirado em: Chromium tracing, seccomp TRACE +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +pub enum ObserveMode { + #[default] + Disabled, // Zero overhead (producao) + LogDenials, // Loga operacoes negadas (debug) + Audit, // Trail completo (dev, overhead alto) + VirtualizeFs, // Intercepta FS ops pra virtualizacao +} +``` + +**Mapping:** + +| ObserveMode | Linux | Windows | +|-------------|-------|---------| +| `Disabled` | No notify filter | No hooks, token-only | +| `LogDenials` | seccomp SECCOMP_RET_LOG | ETW tracing | +| `Audit` | seccomp SECCOMP_RET_TRACE + ptrace | ETW + API hooks | +| `VirtualizeFs` | seccomp notify + supervisor | Broker hooks + IPC | + +#### Plan Completo + +```rust +#[must_use] +#[derive(Debug, Clone)] +pub struct Plan { + // O que rodar + pub cmd: Vec, + pub cwd: PathBuf, + pub stdin: Option>, + pub user_files: Vec, + + // Capability grants (deny-by-default) + pub fs: Vec, + pub network: NetworkPolicy, + pub processes: ProcessPolicy, + pub ipc: IpcPolicy, + pub env: EnvPolicy, + + // Limites universais + pub limits: ResourceLimits, + + // Syscall control + pub syscalls: SyscallPolicy, + + // Observacao + pub observe: ObserveMode, +} +``` + +**Zero `#[cfg]`. Zero platform leak. O backend faz o trabalho.** + +#### Crate Separation + +``` +evalbox-plan/ ← ZERO deps de plataforma, pure Rust, serializable + src/plan.rs ← Plan + todos os tipos acima + +evalbox-sandbox/ + src/sys/linux/ + compile.rs ← Plan → seccomp BPF + Landlock ruleset + rlimits + clone flags + src/sys/windows/ + compile.rs ← Plan → restricted token + job object + integrity level + hooks +``` + +O "compile" step traduz `SyscallCategory::FileIo` → lista de `openat, read, write, stat...` no Linux, ou `NetworkPolicy::None` → restricted token sem network SID no Windows. + +#### Exemplo de Uso (mesmo codigo, roda nos dois OS) + +```rust +use evalbox::{Plan, FsGrant, NetworkPolicy, ResourceLimits}; +use std::time::Duration; + +let plan = Plan::new(["python3", "-c", "print('hello')"]) + .fs(FsGrant::read("/usr")) + .fs(FsGrant::read_write("/work")) + .network(NetworkPolicy::None) + .limits(ResourceLimits { + memory: 512 * 1024 * 1024, + timeout: Duration::from_secs(10), + ..Default::default() + }); + +// Roda em Linux (seccomp+landlock) ou Windows (token+job) sem mudar nada +let output = evalbox::run(plan)?; +``` + +### 5. Bindings depend only on `evalbox` (public API crate) + +```toml +# bindings/python/Cargo.toml +[dependencies] +evalbox = { path = "../../crates/evalbox" } +pyo3 = { version = "0.24", features = ["extension-module", "abi3-py39"] } +``` + +Never import `evalbox-sandbox` or `evalbox-sys` from bindings. The public API crate provides the stable surface. + +--- + +## Cargo.toml Changes + +### Workspace root + +```toml +[workspace] +resolver = "3" +members = [ + "crates/evalbox", + "crates/evalbox-sys", + "crates/evalbox-sandbox", + "crates/evalbox-win32", + "bindings/python", + "bindings/nodejs", + "bindings/ffi-c", +] + +[workspace.dependencies] +evalbox = { version = "0.1.1", path = "crates/evalbox" } +evalbox-sys = { version = "0.1.1", path = "crates/evalbox-sys" } +evalbox-sandbox = { version = "0.1.1", path = "crates/evalbox-sandbox" } +evalbox-win32 = { version = "0.1.1", path = "crates/evalbox-win32" } +``` + +### evalbox-sandbox/Cargo.toml + +```toml +[dependencies] +cfg-if = "1" +thiserror = { workspace = true } +tempfile = { workspace = true } + +[target.'cfg(target_os = "linux")'.dependencies] +evalbox-sys = { workspace = true } +libc = { workspace = true } +rustix = { workspace = true } +mio = { workspace = true } + +[target.'cfg(target_os = "windows")'.dependencies] +evalbox-win32 = { workspace = true } +windows-sys = { version = "0.61", features = [ + "Win32_Foundation", + "Win32_Security", + "Win32_System_JobObjects", + "Win32_System_Threading", + "Win32_System_IO", + "Win32_System_Pipes", +] } +``` + +### evalbox-win32/Cargo.toml + +```toml +[package] +name = "evalbox-win32" +version.workspace = true +edition.workspace = true +# Only builds on Windows +[target.'cfg(not(target_os = "windows"))'.dependencies] +# Empty -- this crate is Windows-only + +[dependencies] +windows-sys = { version = "0.61", features = [ + "Win32_Foundation", + "Win32_Security", + "Win32_Security_Authorization", + "Win32_System_JobObjects", + "Win32_System_Threading", + "Win32_System_IO", + "Win32_System_Pipes", + "Win32_Storage_FileSystem", +] } +thiserror = { workspace = true } +``` + +--- + +## Refactor Deep Analysis + +### Dependency Graph (what imports what) + +``` +plan.rs ←──── executor.rs ────→ monitor.rs + ↑ │ │ + │ ↓ ↓ + ├────── lockdown.rs workspace.rs (LEAF - no crate imports) + │ │ + └────── rlimits.rs + + executor.rs also imports: + → notify/scm_rights (Unix socket pairs) + → resolve.rs (binary detection) + → validate.rs (input validation) +``` + +### File Inventory + +| File | Lines | Linux syscalls | Moves to sys/linux/ | Risk | +|------|-------|---------------|---------------------|------| +| executor.rs | 980 | 40+ (fork, pidfd, poll, execve, dup2, pipe2) | YES | HIGH | +| monitor.rs | 318 | 20+ (pidfd, poll, waitid, read, write) | YES | HIGH | +| workspace.rs | 225 | 5 (pipe2, eventfd, close) | YES | LOW | +| isolation/lockdown.rs | 287 | 10 (Landlock, prctl, caps) | YES | MEDIUM | +| isolation/rlimits.rs | 72 | 2 (setrlimit, getrlimit) | YES (merge into lockdown) | LOW | +| notify/ | ~200 | Unix sockets (scm_rights) | YES (all Linux-only) | MEDIUM | +| plan.rs | 530 | 0 | NO (platform-agnostic) | -- | +| resolve.rs | 116 | 0 | NO (platform-agnostic) | -- | +| sysinfo.rs | 227 | 0 | NO (platform-agnostic) | -- | +| validate.rs | 120 | 0 | NO (platform-agnostic) | -- | + +### Critical Risks + +**RISK 1: LockdownError in ExecutorError** + +`ExecutorError` contains `LockdownError` as a variant: +```rust +pub enum ExecutorError { + #[error("lockdown: {0}")] + Lockdown(#[from] LockdownError), + // ... +} +``` +Both types move to sys/linux/ together, so this is safe. But `ExecutorError` is part of the public API (re-exported in lib.rs). The re-export chain must be: +``` +lib.rs → sys/linux/mod.rs → executor.rs → ExecutorError (contains LockdownError) +``` + +**RISK 2: Workspace types used across executor + monitor** + +`Workspace`, `Pipe`, `SyncPair` are NOT in the public API but heavily used internally: +- executor.rs stores Workspace in SpawnedSandbox +- monitor.rs takes `&Workspace` as parameter +- Both files move to sys/linux/, so internal access is preserved via `super::workspace` + +**RISK 3: notify/scm_rights coupling** + +executor.rs calls `scm_rights::create_socketpair()`, `recv_fd()`, `send_fd()`. The entire notify/ module is Linux-only (seccomp_notify, Unix domain sockets). It must move with executor.rs. + +**RISK 4: Mount type is heavily used in public API** + +`Mount` is defined in plan.rs (stays put) and used by ALL builders in evalbox crate: +- `Mount::ro()`, `Mount::bind()`, `Mount::rw()`, `.writable()` +- Used in python/builder.rs, go/builder.rs, shell/builder.rs, probe.rs +- Since plan.rs stays platform-agnostic, `Mount` is safe. No risk. + +**RISK 5: Plan fields are directly accessed in tests** + +Shell builder tests access `plan.cmd`, `plan.timeout`, `plan.network_blocked` directly. Plan stays in place, so no risk. + +### What evalbox (public API crate) imports from evalbox-sandbox + +``` +Re-exported types: Event, Executor, ExecutorError, Mount, Plan, SandboxId, + Landlock, Syscalls, UserFile, Output, Status + +Used in builders: Plan::new(), .cwd(), .timeout(), .memory(), .max_pids(), + .max_output(), .network(), .mounts(), .mount(), .file(), + .env(), .stdin(), .executable() + Mount::ro(), Mount::bind(), Mount::rw(), .writable() + Executor::run() + +evalbox does NOT import from evalbox-sys directly. +``` + +All these types come from either plan.rs (stays) or executor.rs/monitor.rs (moves but stays re-exported through sys/linux/mod.rs → lib.rs). The public API surface is unchanged. + +--- + +## Refactor Steps (Phase 1 -- Linux code reorganization) + +Zero functionality change. All 128 tests must pass. + +### Step 1: Add `cfg-if` dependency + +```toml +# crates/evalbox-sandbox/Cargo.toml +cfg-if = "1" +``` + +### Step 2: Create directory structure + +``` +mkdir -p crates/evalbox-sandbox/src/sys/linux +``` + +### Step 3: Move files + +| From | To | Notes | +|------|----|-------| +| `src/executor.rs` | `src/sys/linux/executor.rs` | 980 lines, 40+ syscalls | +| `src/monitor.rs` | `src/sys/linux/monitor.rs` | 318 lines, 20+ syscalls | +| `src/workspace.rs` | `src/sys/linux/workspace.rs` | 225 lines, leaf module | +| `src/isolation/lockdown.rs` + `rlimits.rs` | `src/sys/linux/lockdown.rs` | Merge (71 lines into 287) | +| `src/notify/` (entire dir) | `src/sys/linux/notify/` | All Linux-only | + +### Step 4: Create `sys/linux/mod.rs` + +```rust +pub mod executor; +pub mod lockdown; +pub mod monitor; +pub mod notify; +pub mod workspace; + +// Public API re-exports (must match what lib.rs expects) +pub use executor::{Event, Executor, ExecutorError, SandboxId}; +pub use monitor::{Output, Status}; +``` + +### Step 5: Create `src/sys.rs` + +```rust +cfg_if::cfg_if! { + if #[cfg(target_os = "linux")] { + #[path = "sys/linux/mod.rs"] + mod platform; + } else { + compile_error!("evalbox currently supports only Linux. Windows support coming soon."); + } +} + +pub use platform::*; +``` + +### Step 6: Update `src/lib.rs` + +```rust +pub mod plan; +pub mod resolve; +pub mod sysinfo; +pub mod validate; + +mod sys; + +// Re-export platform types (Executor, Event, etc.) +pub use sys::{Event, Executor, ExecutorError, SandboxId, Output, Status}; + +// Re-export platform-agnostic plan types +pub use plan::{Landlock, Mount, NotifyMode, Plan, Syscalls, UserFile}; +pub use resolve::{ResolveError, ResolvedBinary, resolve_binary}; +``` + +### Step 7: Fix imports in moved files + +Key changes needed: +```rust +// In sys/linux/executor.rs: +// OLD: use crate::isolation::{LockdownError, close_extra_fds, lockdown}; +// NEW: use super::lockdown::{LockdownError, close_extra_fds, lockdown}; + +// OLD: use crate::monitor::{...}; +// NEW: use super::monitor::{...}; + +// OLD: use crate::workspace::Workspace; +// NEW: use super::workspace::Workspace; + +// OLD: use crate::notify::scm_rights; +// NEW: use super::notify::scm_rights; + +// These stay the same (plan/resolve/validate didn't move): +// use crate::plan::{...}; +// use crate::resolve::{...}; +// use crate::validate::validate_cmd; +``` + +```rust +// In sys/linux/monitor.rs: +// OLD: use crate::workspace::Workspace; +// NEW: use super::workspace::Workspace; +// use crate::plan::Plan; // stays (plan didn't move) +``` + +```rust +// In sys/linux/lockdown.rs: +// use crate::plan::Plan; // stays (plan didn't move) +// Merge rlimits: inline apply_rlimits() and set_rlimit() +``` + +### Step 8: Delete old directories + +```bash +rm -rf crates/evalbox-sandbox/src/isolation/ +# executor.rs, monitor.rs, workspace.rs already moved +``` + +### Step 9: Verify + +```bash +nix flake check # clippy + fmt + doc + 128 tests +``` + +--- + +## Windows Sandbox Security Model + +For reference when implementing Phase 2+3: + +### Lockdown Sequence (mirrors Linux order) + +``` +Linux Windows +───── ─────── +1. NO_NEW_PRIVS 1. Create restricted token (removes privileges) +2. Landlock v5 2. Set DACLs on workspace dir +3. Rlimits 3. Create Job Object with limits +4. Securebits 4. Set integrity level (Untrusted/Low) +5. Drop capabilities 5. Apply process mitigation policies +6. Seccomp filter 6. (no equivalent -- token restrictions suffice) +7. exec 7. CreateProcessAsUser (CREATE_SUSPENDED) + 8. Assign to Job Object + 9. ResumeThread +``` + +### Minimum Windows Version + +Windows 10 1703+ (Creator's Update) for: +- `PROC_THREAD_ATTRIBUTE_MITIGATION_POLICY` v2 +- Process mitigation policies (win32k lockdown) +- Full Job Object completion port support + +--- + +## Parity Testing + +Following stdbr's `tools/parity_gen/`: + +``` +tests/ +└── parity/ + ├── vectors.json # Golden test vectors + └── runner/ + └── src/main.rs # Runs vectors through all bindings + +# Example vector: +{ + "name": "echo_hello", + "cmd": ["echo", "hello"], + "expected": { + "exit_code": 0, + "stdout": "hello\n", + "stderr": "" + } +} +``` + +Each binding (Rust, Python, Node.js, C) must produce identical `Output` for the same vector. + +--- + +## Shell: Not a Mod -- Part of the Filesystem + +Shell is **always available** as part of the OS. Not something to install or manage. + +### Linux + +- `sh` resolved via `which` (handles dash, bash, busybox, Nix) +- evalbox grants Landlock read+execute on `/usr`, `/bin`, `/lib`, `/lib64` +- Coreutils (ls, cat, grep) are separate binaries in those paths -- automatically available +- Current code: `Plan::new(["sh", "-c", &script])` + +### Windows + +- `cmd.exe` always at `C:\Windows\System32\cmd.exe` +- System32 DLLs always readable even under restricted tokens +- No installation needed, no provisioning +- Equivalent: `Plan::new(["cmd.exe", "/c", &script])` + +### Cross-Platform API + +```rust +// shell::run() uses the platform's native shell +shell::run("echo hello") // sh -c on Linux, cmd /c on Windows + +// No normalization between platforms. +// "ls -la" works on Linux, fails on Windows. Correct. +// "dir" works on Windows, fails on Linux. Correct. +``` + +Shell is a **zero-dep feature** (`shell = []` in Cargo.toml). No probe, no download, no runtime management. + +--- + +## Mods: Self-Contained Capability Packs + +Mods are installable capability packs. Each mod bundles binaries + deps + security policy + API. The user (or agent) declares what mods it needs, evalbox provisions everything. + +**Initial mods: `python` and `sqlite` only.** + +### Concept + +```rust +let sb = Sandbox::new() + .with_mod(python::latest()) // auto-downloads 80MB on first use + .with_mod(sqlite::latest()) // auto-downloads 1MB on first use + .build()?; + +// Python code using SQLite -- all inside one sandbox +sb.python(r#" + import sqlite3 + conn = sqlite3.connect('/work/data.db') + conn.execute('CREATE TABLE users(id INTEGER, name TEXT)') + conn.execute("INSERT INTO users VALUES (1, 'Alice')") + print(conn.execute('SELECT * FROM users').fetchall()) +"#).exec()?; + +// Or use SQLite CLI directly +sb.exec(["sqlite3", "/work/data.db", ".tables"])?; +``` + +First call downloads the mod, caches in `~/.evalbox/mods/`. Subsequent calls instant. + +### Why Mods (Not Just Cargo Features) + +| Concern | Cargo Feature | Mod | +|---------|--------------|-----| +| What | Compile-time code inclusion | Runtime binary provisioning | +| When | Build time | First use (lazy download) | +| Where | In the evalbox binary | In `~/.evalbox/mods/` on disk | +| Size | Part of binary | Separate download | +| Example | `#[cfg(feature = "python")]` compiles probe code | `python::latest()` ensures python3 binary exists | + +Both needed: feature enables code path, mod provides binary. + +### Mod Trait + +```rust +pub trait Mod: Send + Sync { + fn name(&self) -> &str; + fn provision(&self) -> Result; + fn security_policy(&self) -> SecurityPolicy; + fn mounts(&self) -> Vec; + fn env(&self) -> HashMap; +} + +pub struct ModPaths { + pub root: PathBuf, // ~/.evalbox/mods/python/3.12.8/ + pub binary: PathBuf, // ~/.evalbox/mods/python/3.12.8/bin/python3 +} + +pub struct SecurityPolicy { + pub needs_network: bool, + pub max_memory: u64, + pub max_pids: u32, + pub extra_syscalls: Vec, +} +``` + +### Mod: Python + +**Source**: python-build-standalone (Astral) -- 70M+ downloads, used by uv/rye/mise/hatch. + +``` +~/.evalbox/mods/python/3.12.8/ + bin/python3 # Relocatable, full CPython + lib/python3.12/ # Full stdlib (includes sqlite3 module!) + mod.toml +``` + +- **Size**: ~80 MB stripped +- **Security**: R/W workspace, RO stdlib, no network, 256MB memory, 32 pids +- `import sqlite3` works out of the box (python-build-standalone includes libsqlite3) + +### Mod: SQLite + +**Source**: Static sqlite3 binary from sqlite.org. + +``` +~/.evalbox/mods/sqlite/3.50.0/ + bin/sqlite3 # Static binary + mod.toml +``` + +- **Size**: ~1 MB +- **Security**: R/W workspace only, no network, 128MB memory, 4 pids +- Use cases: CLI SQL execution, pre-seeded DBs for teaching, agent state persistence + +### Provisioning + +``` +~/.evalbox/ + mods/ + python/3.12.8/ # Extracted python-build-standalone + sqlite/3.50.0/ # Static sqlite3 binary + manifest.toml # Tracks installed mods + versions + checksums +``` + +**manifest.toml:** +```toml +[python] +version = "3.12.8" +sha256 = "abc123..." +url = "https://github.com/astral-sh/python-build-standalone/releases/..." + +[sqlite] +version = "3.50.0" +sha256 = "def456..." +url = "https://sqlite.org/2026/sqlite-tools-linux-x86-3500000.zip" +``` + +### CLI + +```bash +evalbox mod install python # Download latest Python +evalbox mod install python@3.11 # Specific version +evalbox mod install sqlite # Download SQLite CLI +evalbox mod list # Show installed +evalbox mod remove python@3.11 # Remove +``` + +### Resolution Chain + +```rust +fn resolve_mod(name: &str, version: &str) -> Result { + // 1. Env var override (EVALBOX_PYTHON, EVALBOX_SQLITE) + // 2. Cached in ~/.evalbox/mods/ + // 3. Auto-download (opt-in, default on) +} +``` + +### Crate: `evalbox-mods` + +``` +crates/evalbox-mods/ + src/ + lib.rs # Mod trait + registry + manifest.rs # Parse/write manifest.toml + download.rs # HTTP fetch + sha256 verify + extract.rs # tar.gz / tar.zst / zip + python.rs # Python mod + sqlite.rs # SQLite mod +``` + +```toml +[package] +name = "evalbox-mods" + +[dependencies] +evalbox-sandbox = { workspace = true } +reqwest = { version = "0.12", features = ["rustls-tls"], optional = true } +sha2 = "0.10" +flate2 = "1" +tar = "0.4" +toml = "0.8" +dirs = "6" + +[features] +default = ["download", "python", "sqlite"] +download = ["reqwest"] +python = [] +sqlite = [] +``` + +### Distribution + +| Channel | How mods are available | +|---------|----------------------| +| **crates.io** | `evalbox mod install` or auto-download | +| **pip/npm** | Auto-download on first use | +| **Nix flake** | Nixpkgs paths via env vars, zero download | +| **Docker** | Pre-installed, offline mode | + +### Nix Integration + +```nix +postInstall = '' + wrapProgram $out/bin/evalbox \ + --set EVALBOX_PYTHON ${pkgs.python312}/bin/python3 \ + --set EVALBOX_SQLITE ${pkgs.sqlite}/bin/sqlite3 +''; +``` + +### Future Mods (not now) + +| Mod | Size | Use Case | +|-----|------|----------| +| node | ~60 MB | JavaScript/TypeScript | +| go | ~500 MB | Go compilation + execution | +| ffmpeg | ~70 MB | Media processing | +| redis | ~8 MB | Key-value store, agent memory | +| deno | ~100 MB | Modern JS/TS | + +### Size Budget + +| Component | Size | +|-----------|------| +| evalbox binary | ~5 MB | +| Python mod | ~80 MB | +| SQLite mod | ~1 MB | +| **Total** | **~86 MB** | +| **Initial (no mods)** | **~5 MB** | + +--- + +## Bazel Build System + +With multi-platform + bindings, Bazel becomes worth it. Following stdbr's production setup with Bzlmod + rules_rust + crate_universe. + +### Why Bazel Now + +| Without Bazel | With Bazel | +|---------------|------------| +| `cargo build` per platform manually | `bazel build --platforms=//platforms:windows` | +| Separate maturin/napi-cli/cbindgen invocations | One `bazel build //...` builds everything | +| CI scripts glue together builds | Hermetic, reproducible, cached | +| Cross-compilation is manual toolchain setup | `extra_target_triples` in MODULE.bazel | + +### Root Files + +**MODULE.bazel:** +```python +module(name = "evalbox", version = "0.1.1") + +bazel_dep(name = "rules_rust", version = "0.70.0") +bazel_dep(name = "rules_python", version = "1.6.3") +bazel_dep(name = "bazel_skylib", version = "1.8.2") +bazel_dep(name = "platforms", version = "1.0.0") + +# Rust toolchain with cross-compilation targets +rust = use_extension("@rules_rust//rust:extensions.bzl", "rust") +rust.toolchain( + edition = "2024", + versions = ["1.87.0"], + extra_target_triples = [ + "x86_64-pc-windows-gnu", + "aarch64-unknown-linux-gnu", + ], +) +use_repo(rust, "rust_toolchains") +register_toolchains("@rust_toolchains//:all") + +# crate_universe reads Cargo.toml workspace +crate = use_extension("@rules_rust//crate_universe:extensions.bzl", "crate") +crate.from_cargo( + name = "crates", + cargo_lockfile = "//:Cargo.lock", + manifests = [ + "//:Cargo.toml", + "//crates/evalbox:Cargo.toml", + "//crates/evalbox-sys:Cargo.toml", + "//crates/evalbox-sandbox:Cargo.toml", + "//crates/evalbox-win32:Cargo.toml", + "//bindings/python:Cargo.toml", + "//bindings/nodejs:Cargo.toml", + "//bindings/ffi-c:Cargo.toml", + ], +) +use_repo(crate, "crates") +``` + +**BUILD.bazel (root):** +```python +exports_files(["Cargo.toml", "Cargo.lock"]) +``` + +**.bazelrc:** +``` +common --enable_bzlmod +test --test_output=errors +build --action_env=CC +build --action_env=PATH + +# Platform configs +build:linux --platforms=//platforms:linux_x86_64 +build:windows --platforms=//platforms:windows_x86_64 + +# Release optimization +build:release -c opt +build:release --@rules_rust//:extra_rustc_flags=-Cstrip=symbols,-Cpanic=abort,-Ccodegen-units=1,-Copt-level=3,-Clto=thin +``` + +### Platform Definitions + +**platforms/BUILD.bazel:** +```python +platform( + name = "linux_x86_64", + constraint_values = [ + "@platforms//os:linux", + "@platforms//cpu:x86_64", + ], +) + +platform( + name = "linux_aarch64", + constraint_values = [ + "@platforms//os:linux", + "@platforms//cpu:aarch64", + ], +) + +platform( + name = "windows_x86_64", + constraint_values = [ + "@platforms//os:windows", + "@platforms//cpu:x86_64", + ], +) +``` + +### Core Crates BUILD Files + +**crates/evalbox-sys/BUILD.bazel:** +```python +load("@rules_rust//rust:defs.bzl", "rust_library", "rust_test") + +rust_library( + name = "evalbox-sys", + srcs = glob(["src/**/*.rs"]), + crate_root = "src/lib.rs", + # Linux-only crate + target_compatible_with = ["@platforms//os:linux"], + deps = [ + "@crates//:libc", + "@crates//:rustix", + ], + visibility = ["//visibility:public"], +) + +rust_test( + name = "evalbox-sys-test", + crate = ":evalbox-sys", +) +``` + +**crates/evalbox-win32/BUILD.bazel:** +```python +load("@rules_rust//rust:defs.bzl", "rust_library", "rust_test") + +rust_library( + name = "evalbox-win32", + srcs = glob(["src/**/*.rs"]), + crate_root = "src/lib.rs", + # Windows-only crate + target_compatible_with = ["@platforms//os:windows"], + deps = [ + "@crates//:windows-sys", + "@crates//:thiserror", + ], + visibility = ["//visibility:public"], +) +``` + +**crates/evalbox-sandbox/BUILD.bazel:** +```python +load("@rules_rust//rust:defs.bzl", "rust_library", "rust_test") + +rust_library( + name = "evalbox-sandbox", + srcs = glob(["src/**/*.rs"]), + crate_root = "src/lib.rs", + deps = [ + "@crates//:cfg-if", + "@crates//:thiserror", + "@crates//:tempfile", + ] + select({ + "@platforms//os:linux": [ + "//crates/evalbox-sys", + "@crates//:libc", + "@crates//:rustix", + "@crates//:mio", + ], + "@platforms//os:windows": [ + "//crates/evalbox-win32", + "@crates//:windows-sys", + ], + }), + crate_features = select({ + "@platforms//os:linux": ["linux"], + "@platforms//os:windows": ["windows"], + }), + visibility = ["//visibility:public"], +) + +rust_test( + name = "evalbox-sandbox-test", + crate = ":evalbox-sandbox", +) +``` + +**crates/evalbox/BUILD.bazel:** +```python +load("@rules_rust//rust:defs.bzl", "rust_library", "rust_test") + +rust_library( + name = "evalbox", + srcs = glob(["src/**/*.rs"]), + crate_root = "src/lib.rs", + crate_features = ["python", "go", "shell"], + deps = [ + "//crates/evalbox-sandbox", + "@crates//:serde", + "@crates//:serde_json", + "@crates//:thiserror", + "@crates//:which", + ], + visibility = ["//visibility:public"], +) + +rust_test( + name = "evalbox-test", + crate = ":evalbox", +) +``` + +### Bindings BUILD Files + +**bindings/python/BUILD.bazel** (stdbr pattern): +```python +load("@rules_rust//rust:defs.bzl", "rust_shared_library") + +exports_files(["Cargo.toml"] + glob(["src/**/*.rs"])) + +rust_shared_library( + name = "evalbox-python", + srcs = glob(["src/**/*.rs"]), + crate_root = "src/lib.rs", + deps = [ + "//crates/evalbox", + "@crates//:pyo3", + ], + visibility = ["//visibility:public"], +) + +# Rename to Python-importable module +genrule( + name = "evalbox-pymodule", + srcs = [":evalbox-python"], + outs = ["evalbox.abi3.so"], + cmd = "cp $< $@", + visibility = ["//visibility:public"], +) +``` + +**bindings/nodejs/BUILD.bazel** (stdbr pattern): +```python +load("@rules_rust//rust:defs.bzl", "rust_shared_library") +load("@rules_rust//cargo:defs.bzl", "cargo_build_script") + +exports_files(["Cargo.toml"] + glob(["src/**/*.rs"])) + +cargo_build_script( + name = "napi_build_script", + srcs = ["build.rs"], + deps = ["@crates//:napi-build"], +) + +rust_shared_library( + name = "evalbox-napi", + srcs = glob(["src/**/*.rs"]), + crate_root = "src/lib.rs", + deps = [ + "//crates/evalbox", + "@crates//:napi", + ":napi_build_script", + ], + proc_macro_deps = ["@crates//:napi-derive"], + visibility = ["//visibility:public"], +) + +# Rename to Node-loadable module +genrule( + name = "evalbox-node", + srcs = [":evalbox-napi"], + outs = ["evalbox.node"], + cmd = "cp $< $@", + visibility = ["//visibility:public"], +) +``` + +**bindings/ffi-c/BUILD.bazel** (stdbr pattern): +```python +load("@rules_rust//rust:defs.bzl", "rust_shared_library", "rust_static_library") +load("//tools/rules_rust_extras:cbindgen.bzl", "cbindgen") + +exports_files(["Cargo.toml"] + glob(["src/**/*.rs"])) + +rust_shared_library( + name = "evalbox-ffi-shared", + srcs = glob(["src/**/*.rs"]), + crate_root = "src/lib.rs", + deps = ["//crates/evalbox"], + visibility = ["//visibility:public"], +) + +rust_static_library( + name = "evalbox-ffi-static", + srcs = glob(["src/**/*.rs"]), + crate_root = "src/lib.rs", + deps = ["//crates/evalbox"], + visibility = ["//visibility:public"], +) + +cbindgen( + name = "evalbox-header", + srcs = glob(["src/**/*.rs"]), + config = "cbindgen.toml", + crate_name = "evalbox-ffi", + header_name = "evalbox.h", + lockfile = "//:Cargo.lock", + manifest = "//:Cargo.toml", + workspace_srcs = [ + "//crates/evalbox:Cargo.toml", + "//crates/evalbox-sys:Cargo.toml", + "//crates/evalbox-sandbox:Cargo.toml", + ":Cargo.toml", + "//bindings/python:Cargo.toml", + "//bindings/nodejs:Cargo.toml", + ], + visibility = ["//visibility:public"], +) +``` + +### Custom cbindgen Rule + +**tools/rules_rust_extras/cbindgen.bzl** (copied from stdbr): +```python +def _cbindgen_impl(ctx): + output = ctx.actions.declare_file(ctx.attr.header_name) + + cmd = " ".join([ + "cbindgen", + "--config", ctx.file.config.path, + "--lockfile", ctx.file.lockfile.path, + "--crate", ctx.attr.crate_name, + "--output", output.path, + ctx.file.manifest.dirname, + ]) + + all_inputs = ( + ctx.files.srcs + + ctx.files.workspace_srcs + + [ctx.file.config, ctx.file.lockfile, ctx.file.manifest] + ) + + ctx.actions.run_shell( + inputs = all_inputs, + outputs = [output], + command = cmd, + use_default_shell_env = True, + mnemonic = "Cbindgen", + progress_message = "Generating C header %{output}", + ) + + return [ + DefaultInfo(files = depset([output])), + CcInfo( + compilation_context = cc_common.create_compilation_context( + headers = depset([output]), + system_includes = depset([output.dirname]), + ), + ), + ] + +cbindgen = rule( + implementation = _cbindgen_impl, + attrs = { + "srcs": attr.label_list(allow_files = [".rs"]), + "config": attr.label(allow_single_file = [".toml"]), + "manifest": attr.label(allow_single_file = ["Cargo.toml"]), + "lockfile": attr.label(allow_single_file = ["Cargo.lock"]), + "workspace_srcs": attr.label_list(allow_files = True), + "crate_name": attr.string(), + "header_name": attr.string(default = "evalbox.h"), + }, +) +``` + +### Parity Tests BUILD + +**tests/parity/BUILD.bazel:** +```python +load("@rules_rust//rust:defs.bzl", "rust_test") + +# Generate golden vectors +genrule( + name = "generate_vectors", + srcs = [], + outs = ["vectors.json"], + cmd = "$(location //tools/parity_gen) > $@", + tools = ["//tools/parity_gen"], +) + +# Rust parity test +rust_test( + name = "parity_rust", + srcs = ["parity_rust.rs"], + data = [":vectors.json"], + deps = [ + "//crates/evalbox", + "@crates//:serde_json", + ], +) + +# Python parity test +sh_test( + name = "parity_python", + srcs = ["run_python_parity.sh"], + data = [ + ":vectors.json", + "//bindings/python:evalbox-pymodule", + "test_parity.py", + ], +) + +# Node.js parity test +sh_test( + name = "parity_nodejs", + srcs = ["run_nodejs_parity.sh"], + data = [ + ":vectors.json", + "//bindings/nodejs:evalbox-node", + "test_parity.js", + ], +) + +# C parity test +cc_test( + name = "parity_c", + srcs = ["test_parity.c"], + data = [":vectors.json"], + deps = [ + "//bindings/ffi-c:evalbox-ffi-static", + "//bindings/ffi-c:evalbox-header", + ], +) +``` + +### Usage + +```bash +# Build everything for current platform +bazel build //... + +# Cross-compile for Windows +bazel build //crates/evalbox --config=windows + +# Build only Python binding +bazel build //bindings/python:evalbox-pymodule + +# Run all tests +bazel test //... + +# Run parity tests only +bazel test //tests/parity/... + +# Release build +bazel build //... --config=release +``` + +### Coexistence with Cargo + Nix + +Bazel and Cargo coexist. `crate.from_cargo()` reads the existing `Cargo.toml`/`Cargo.lock` so there's no duplication of dependency versions. Nix provides the dev environment (Rust toolchain, system deps), Bazel provides the build system for multi-platform + bindings. Developers can still use `cargo test` for fast iteration on Linux. + +``` +# Dev workflow: nix develop → cargo test +# CI/release workflow: nix develop → bazel build //... --config=release +# Cross-compile: nix develop → bazel build //... --config=windows +``` + +--- + +## Rust Library Patterns + +Patterns de crates top-tier (reqwest, tokio, bevy, mio, serde) que devem guiar o evalbox. + +### Builder Pattern + +Dois estilos: **consuming** (`self`) e **borrowing** (`&mut self`). + +**Consuming** (reqwest, tonic) -- cada metodo consome e retorna o builder. Ideal pra one-shot: + +```rust +// reqwest style +pub struct ClientBuilder { config: Config } + +impl ClientBuilder { + pub fn new() -> Self { /* ... */ } + pub fn timeout(mut self, timeout: Duration) -> Self { /* ... */ } + pub fn user_agent(mut self, agent: &str) -> Self { /* ... */ } + pub fn build(self) -> Result { /* valida e constroi */ } +} + +// uso: +let client = Client::builder() + .timeout(Duration::from_secs(30)) + .user_agent("evalbox/0.1") + .build()?; +``` + +**Borrowing** (`std::Command`) -- metodos retornam `&mut Self`, builder reutilizavel: + +```rust +let mut cmd = Command::new("cargo"); +cmd.arg("build"); +if release { cmd.arg("--release"); } +cmd.spawn()?; +``` + +**evalbox hoje**: Usa consuming corretamente nos builders (Python, Go, Shell). Manter. + +### Error Handling + +**Flat enum + thiserror** (padrao para crates pequenos/medios): + +```rust +#[derive(Debug, Error)] +#[non_exhaustive] // CRITICO: permite adicionar variants sem breaking change +pub enum Error { + #[error("I/O error: {0}")] + Io(#[from] std::io::Error), + + #[error("timeout after {timeout_ms}ms")] + Timeout { timeout_ms: u64 }, +} +``` + +**Opaque error struct** (reqwest, para crates grandes): + +```rust +pub struct Error { + inner: Box, // tamanho pequeno no stack +} + +struct Inner { + kind: Kind, // Kind e PRIVADO + source: Option>, +} + +impl Error { + pub fn is_timeout(&self) -> bool { /* ... */ } + pub fn is_connect(&self) -> bool { /* ... */ } +} +``` + +**Regra**: `#[non_exhaustive]` em TODA enum publica que pode crescer. Sem excecao. + +### Feature Flags (tokio pattern) + +```toml +[features] +default = [] + +# Meta-feature: tudo estavel +full = ["fs", "net", "time", "sync", "macros"] + +# Granulares +fs = [] +net = ["mio/os-poll", "mio/net"] +time = [] +sync = [] +macros = ["tokio-macros"] +``` + +Regras: +1. **Features sao aditivas** -- habilitar nunca remove API +2. **`--all-features` deve compilar** -- features nao podem conflitar +3. **`full` = tudo estavel** -- instavel fica atras de `cfg` flag, nao feature + +### Re-Export Strategy (Facade Crate) + +**bevy**: facade crate re-exporta tudo: + +```rust +// bevy/src/lib.rs (facade) +pub use bevy_internal::*; + +// bevy_internal/src/lib.rs (agregacao) +pub use bevy_app as app; +pub use bevy_ecs as ecs; +pub mod prelude { + pub use bevy_app::prelude::*; + pub use bevy_ecs::prelude::*; +} +``` + +**tokio**: crate unico com modulos feature-gated: + +```rust +#[cfg(feature = "net")] +#[cfg_attr(docsrs, doc(cfg(feature = "net")))] +pub mod net; +``` + +**evalbox hoje**: Ja usa facade pattern (evalbox re-exporta de evalbox-sandbox). Correto. + +### cfg-Gated Modules (tokio wrapper macro) + +Evita repetir `#[cfg(...)]` + `#[cfg_attr(docsrs, ...)]` em cada item: + +```rust +// Macro interna +macro_rules! cfg_linux { + ($($item:item)*) => { + $( + #[cfg(target_os = "linux")] + #[cfg_attr(docsrs, doc(cfg(target_os = "linux")))] + $item + )* + } +} + +// Uso: +cfg_linux! { + pub mod lockdown; + pub mod monitor; +} +``` + +**Aplicacao no evalbox**: Criar `cfg_linux!` e `cfg_windows!` macros em `evalbox-sandbox/src/macros.rs`. + +### Platform Abstraction (mio pattern) + +**Sealed trait** -- trait que externos podem USAR mas nao IMPLEMENTAR: + +```rust +mod private { pub trait Sealed {} } + +pub trait Source: private::Sealed { + fn register(&mut self, ...) -> io::Result<()>; +} + +// Apenas tipos internos implementam +impl private::Sealed for TcpListener {} +impl Source for TcpListener { /* ... */ } +``` + +**mio platform dispatch**: + +```rust +// src/sys/mod.rs +#[cfg(unix)] +mod unix; +#[cfg(unix)] +pub use self::unix::*; + +#[cfg(windows)] +mod windows; +#[cfg(windows)] +pub use self::windows::*; +``` + +Cada modulo de plataforma exporta os mesmos tipos. API uniforme sem `dyn` dispatch. + +### Public API Minimization + +```rust +// Modulos PRIVADOS, re-export seletivo +mod config; // privado +mod client; // privado +mod error; // privado + +pub use config::Config; +pub use client::Client; +pub use error::Error; +// Usuarios veem mycrate::Client, nao mycrate::client::Client +``` + +Ferramentas: +- `pub(crate)` -- visivel dentro do crate, invisivel pros usuarios +- `pub(super)` -- visivel so pro modulo pai +- `#[doc(hidden)]` -- publico (pra macros) mas escondido da docs +- Sealed traits -- previne impl externo + +### Prelude + +Usar **apenas** quando >80% dos usuarios precisam dos mesmos imports em todo arquivo: + +```rust +pub mod prelude { + // Traits com `as _` -- importa metodos sem poluir namespace + pub use crate::future::{FutureExt as _, TryFutureExt as _}; + pub use crate::stream::StreamExt as _; +} +``` + +**evalbox**: API pequena, NAO precisa de prelude. `use evalbox::{python, Session}` e suficiente. + +### Type State (compile-time guarantees) + +```rust +pub struct NoUrl; +pub struct HasUrl; + +pub struct RequestBuilder { + url: Option, + _state: PhantomData, +} + +// send() so existe quando URL esta setada +impl RequestBuilder { + pub fn send(self) -> Response { /* ... */ } +} +``` + +**evalbox**: Nao precisa. Builders sao simples o suficiente com validacao em `exec()`. + +--- + +## Audit: evalbox API Atual + +### O Que Esta Bom + +| Aspecto | Status | Detalhe | +|---------|--------|---------| +| Builder pattern | Excelente | Consuming, consistente, `#[must_use]` | +| API tiers | Claro | Tier 1-4 documentado | +| Probe framework | Bom | Trait extensivel, cache com mtime | +| Module exports | Bom | Estratificacao clara | +| Naming conventions | Correto | Segue Rust API Guidelines | + +### O Que Precisa Corrigir + +#### 1. Errors NAO sao `#[non_exhaustive]` (CRITICO) + +```rust +// HOJE (quebra se adicionar variant): +#[derive(Debug, Error)] +pub enum Error { + Sandbox(String), + Validation(String), + // ... +} + +// CORRETO: +#[derive(Debug, Error)] +#[non_exhaustive] +pub enum Error { + // ... +} +``` + +Afeta: `Error`, `ProbeError`, `ExecutorError` + +#### 2. Error handling inconsistente no Session + +```rust +// Session mistura io::Result e custom Result: +pub fn spawn(&mut self, plan: Plan) -> Result // custom +pub fn poll(&mut self) -> io::Result> // io +pub fn kill(&mut self, id: SandboxId) -> io::Result<()> // io +``` + +Deveria usar `Result` (custom) em tudo, com `Io(#[from] io::Error)` no enum. + +#### 3. ExecutorError perde info na conversao + +```rust +// HOJE: +impl From for Error { + fn from(e: evalbox_sandbox::ExecutorError) -> Self { + Self::Sandbox(e.to_string()) // PERDE o tipo original + } +} + +// CORRETO: +#[non_exhaustive] +pub enum Error { + #[error(transparent)] + Executor(#[from] evalbox_sandbox::ExecutorError), + // ... +} +``` + +#### 4. String-based error variants + +```rust +// HOJE: +Sandbox(String), // generico demais +Validation(String), // sem estrutura + +// MELHOR: +#[error("sandbox execution failed")] +Sandbox(#[source] ExecutorError), + +#[error("validation failed: {field}")] +Validation { field: &'static str, reason: String }, +``` + +#### 5. Features declaradas mas nao implementadas + +```toml +# Cargo.toml do evalbox: +node = ["dep:serde", "dep:serde_json", "dep:tempfile"] # SEM CODIGO +rust-lang = ["dep:tempfile", "dep:serde", "dep:serde_json"] # SEM CODIGO +``` + +Remover ou marcar como `# TODO: not yet implemented`. + +#### 6. Global state nos probe caches + +```rust +// Python e Go tem LazyLock global: +static PROBE_CACHE: LazyLock = LazyLock::new(ProbeCache::new); +``` + +Funciona, mas nao e configuravel. Aceitavel por agora -- resolver quando adicionar Mods. + +#### 7. Plan fields publicos demais + +```rust +// Testes acessam campos diretamente: +assert_eq!(plan.cmd, vec!["sh", "-c", "echo hello"]); +assert_eq!(plan.network_blocked, true); +``` + +Plan deveria ter getters, campos `pub(crate)`. Mas e breaking change -- resolver no refactor. + +#### 8. docsrs annotations ausentes + +```rust +// HOJE: +#[cfg(feature = "python")] +pub mod python; + +// CORRETO (mostra no docs.rs qual feature ativa): +#[cfg(feature = "python")] +#[cfg_attr(docsrs, doc(cfg(feature = "python")))] +pub mod python; +``` + +### Checklist de Correcoes + +Ordenado por prioridade (fazer ANTES do refactor multi-platform): + +| # | Correcao | Arquivos | Breaking? | +|---|----------|----------|-----------| +| 1 | `#[non_exhaustive]` em todas enums publicas | `error.rs`, `evalbox-sandbox/src/executor.rs` | Sim (minor bump) | +| 2 | Unificar error handling no Session | `session.rs` | Sim | +| 3 | `#[from] ExecutorError` ao inves de `String` | `error.rs` | Sim | +| 4 | Remover features `node`/`rust-lang` nao implementadas | `Cargo.toml` | Nao | +| 5 | Adicionar `#[cfg_attr(docsrs, ...)]` | `lib.rs`, modulos feature-gated | Nao | +| 6 | Criar `cfg_linux!` macro wrapper | novo `macros.rs` | Nao | +| 7 | Plan fields → `pub(crate)` + getters | `plan.rs` | Sim | + +Correcoes 1-3 sao breaking changes → fazer juntas num bump de versao 0.2.0 ANTES do refactor sys/. + +--- + +## Summary + +| Decision | Choice | Rationale | +|----------|--------|-----------| +| Platform abstraction | crosvm `sys/` re-export | Simpler than traits for 2-platform case | +| Windows syscalls | Separate `evalbox-win32` crate | Clean compilation, no cfg spaghetti | +| Bindings location | `bindings/{python,nodejs,ffi-c}/` | stdbr proven pattern | +| Binding dependency | Only `evalbox` (public API) | Stable surface, no internal leaks | +| `notify/` module | `cfg(target_os = "linux")` gated | No Windows equivalent | +| Plan type | Unificado, zero `#[cfg]`, intent-based | Backend compila pra primitivas de cada OS | +| Event loop | mio (Linux) / IOCP (Windows) | Each platform's native model | +| Shell | Part of filesystem, not a mod | Zero deps, always available on both OS | +| Runtime provisioning | Mods (self-contained packs) | Auto-download, cached, per-mod security | +| Initial mods | Python + SQLite only | Highest value, smallest scope | +| Mod source (Python) | python-build-standalone (Astral) | 70M+ downloads, used by uv/rye/mise | +| Mod source (SQLite) | Static binary from sqlite.org | 1 MB, trivial, huge utility | +| Build system | Bazel (Bzlmod + rules_rust) | Multi-platform, bindings, caching | +| Dependency sync | `crate.from_cargo()` | Single source of truth in Cargo.toml | +| Cross-compilation | `extra_target_triples` + `--platforms` | Hermetic, declarative | +| cbindgen | Custom Starlark rule (from stdbr) | Returns CcInfo for downstream cc_test | +| Builder pattern | Consuming (`self`), manter atual | Ja consistente nos 3 runtimes | +| Error enums | `#[non_exhaustive]` em todas | Permite crescer sem breaking change | +| Error handling | Unificar pra custom `Result` | Session mistura `io::Result` hoje | +| Feature flags | Remover `node`/`rust-lang` nao usadas | Limpar antes do refactor | +| docsrs | `#[cfg_attr(docsrs, ...)]` em tudo gated | docs.rs mostra qual feature ativa | +| cfg macros | `cfg_linux!`/`cfg_windows!` wrappers | Reduz boilerplate de `#[cfg]` | +| API surface | `pub(crate)` + getters no Plan | Esconder fields internos | +| Pre-refactor bump | 0.2.0 com breaking fixes | Errors + Session + Plan antes do sys/ |