From 4d9701571c79698ac75aa324821e6a3c6450d22c Mon Sep 17 00:00:00 2001 From: Nathan Bourgeois Date: Mon, 27 Apr 2026 23:59:55 -0400 Subject: [PATCH 01/44] Build and Link on 3DS --- build.zig | 305 ++++++++++++++++++++++++++++++-- src/platform/3ds/3ds_audio.zig | 34 ++++ src/platform/3ds/3ds_gfx.zig | 63 +++++++ src/platform/3ds/3ds_thread.zig | 25 +++ src/platform/3ds/input.zig | 24 +++ src/platform/3ds/services.zig | 30 ++++ src/platform/3ds/surface.zig | 29 +++ src/platform/audio.zig | 2 + src/platform/gfx.zig | 7 +- src/platform/input.zig | 2 + src/platform/thread.zig | 2 + src/root.zig | 8 + test/main.zig | 33 +++- 13 files changed, 539 insertions(+), 25 deletions(-) create mode 100644 src/platform/3ds/3ds_audio.zig create mode 100644 src/platform/3ds/3ds_gfx.zig create mode 100644 src/platform/3ds/3ds_thread.zig create mode 100644 src/platform/3ds/input.zig create mode 100644 src/platform/3ds/services.zig create mode 100644 src/platform/3ds/surface.zig diff --git a/build.zig b/build.zig index 03b5eb5..d72d621 100644 --- a/build.zig +++ b/build.zig @@ -6,6 +6,10 @@ pub const Platform = enum { linux, macos, psp, + /// Nintendo 3DS. Builtin os tag is `.@"3ds"`, but the Zig options + /// serializer can't emit `.@"3ds"` as an enum value literal, so the + /// internal Aether tag uses a leading-letter form. + nintendo_3ds, }; pub const Gfx = enum { @@ -59,6 +63,7 @@ pub const Config = struct { .macos => .macos, .linux => .linux, .psp => .psp, + .@"3ds" => .nintendo_3ds, else => |t| { std.debug.panic("Unsupported OS! {}\n", .{t}); }, @@ -141,6 +146,15 @@ fn macosGlfwPath(b: *std.Build) []const u8 { return p; } +var devkitpro_path_cached: ?[]const u8 = null; +fn devkitProPath(b: *std.Build) []const u8 { + if (devkitpro_path_cached) |p| return p; + const opt = b.option([]const u8, "devkitpro-path", "3DS: devkitPro install root (default: $DEVKITPRO or /opt/devkitpro)"); + const p = opt orelse b.graph.environ_map.get("DEVKITPRO") orelse "/opt/devkitpro"; + devkitpro_path_cached = p; + return p; +} + /// Creates an executable with the Aether engine module and all platform /// dependencies wired up. Returns the compile step so the caller can /// further customize it (install, add run steps, etc.). @@ -157,13 +171,23 @@ fn macosGlfwPath(b: *std.Build) []const u8 { pub fn addGame(owner: *std.Build, b: *std.Build, opts: GameOptions) *std.Build.Step.Compile { const config = Config.resolve(opts.target, opts.overrides); + // 3DS forces ofmt=c — there's no Zig-native backend for the Horizon + // ARM target yet, so we emit C and let an external 3DS toolchain + // (devkitARM/libctru) compile the result. Stub backends keep the + // engine compiling end-to-end until a real SDK is wired in. + const target = if (config.platform == .nintendo_3ds) blk: { + var q = opts.target.query; + q.ofmt = .c; + break :blk b.resolveTargetQuery(q); + } else opts.target; + const options = b.addOptions(); options.addOption(Config, "config", config); const options_module = options.createModule(); const mod = b.addModule("Aether", .{ .root_source_file = owner.path("src/root.zig"), - .target = opts.target, + .target = target, .imports = &.{ .{ .name = "options", .module = options_module }, }, @@ -171,20 +195,23 @@ pub fn addGame(owner: *std.Build, b: *std.Build, opts: GameOptions) *std.Build.S // --- platform-specific engine dependencies --- const psp_dep = if (config.platform == .psp) owner.dependency("pspsdk", .{ - .target = opts.target, + .target = target, .optimize = opts.optimize, }) else null; if (psp_dep) |pd| { mod.addImport("pspsdk", pd.module("pspsdk")); + } else if (config.platform == .nintendo_3ds) { + // No 3DS SDK is wired in yet; stubs satisfy every backend + // contract so addImport calls are unnecessary here. } else { const zglfw = owner.dependency("zglfw", .{ - .target = opts.target, + .target = target, .optimize = opts.optimize, }); const glfw = owner.dependency("glfw_zig", .{ - .target = opts.target, + .target = target, .optimize = opts.optimize, }); @@ -204,14 +231,14 @@ pub fn addGame(owner: *std.Build, b: *std.Build, opts: GameOptions) *std.Build.S if (config.audio != .none) { const zaudio_dep = owner.dependency("zaudio", .{ - .target = opts.target, + .target = target, .optimize = opts.optimize, }); mod.addImport("zaudio", zaudio_dep.module("root")); mod.linkLibrary(zaudio_dep.artifact("miniaudio")); } - if (opts.target.result.os.tag == .macos) { + if (target.result.os.tag == .macos) { // Link MoltenVK directly as the Vulkan ICD -- no loader. Feeds // its vkGetInstanceProcAddr into GLFW via glfwInitVulkanLoader // in platform/glfw/surface.zig so GLFW doesn't dlopen libvulkan @@ -241,7 +268,7 @@ pub fn addGame(owner: *std.Build, b: *std.Build, opts: GameOptions) *std.Build.S .name = opts.name, .root_module = b.createModule(.{ .root_source_file = opts.root_source_file, - .target = opts.target, + .target = target, .optimize = opts.optimize, .strip = if (config.platform == .psp) false else null, .imports = &.{ @@ -267,6 +294,14 @@ pub fn addGame(owner: *std.Build, b: *std.Build, opts: GameOptions) *std.Build.S exe.subsystem = .windows; } + if (config.platform == .nintendo_3ds) { + // std/start.zig opts `.@"3ds"` out of exporting a default entry + // symbol, so without an explicit entry the linker DCEs `main` + // and the emitted C is constants-only. Force `main` to keep + // the whole call graph alive for the external 3DS toolchain. + exe.entry = .{ .symbol_name = "main" }; + } + return exe; } @@ -286,20 +321,27 @@ pub fn addHeadless(owner: *std.Build, b: *std.Build, opts: HeadlessOptions) *std config.gfx = .headless; config.audio = .none; + // 3DS forces ofmt=c (see addGame for details). + const target = if (config.platform == .nintendo_3ds) blk: { + var q = opts.target.query; + q.ofmt = .c; + break :blk b.resolveTargetQuery(q); + } else opts.target; + const options = b.addOptions(); options.addOption(Config, "config", config); const options_module = options.createModule(); const mod = b.addModule("Aether", .{ .root_source_file = owner.path("src/root.zig"), - .target = opts.target, + .target = target, .imports = &.{ .{ .name = "options", .module = options_module }, }, }); const psp_dep = if (config.platform == .psp) owner.dependency("pspsdk", .{ - .target = opts.target, + .target = target, .optimize = opts.optimize, }) else null; @@ -311,7 +353,7 @@ pub fn addHeadless(owner: *std.Build, b: *std.Build, opts: HeadlessOptions) *std .name = opts.name, .root_module = b.createModule(.{ .root_source_file = opts.root_source_file, - .target = opts.target, + .target = target, .optimize = opts.optimize, .strip = if (config.platform == .psp) false else null, .imports = &.{ @@ -391,8 +433,18 @@ pub const ExportOptions = struct { icon_png: ?std.Build.LazyPath = null, /// Files to install into the app bundle. On macOS they land under /// `Contents/Resources/`. On desktop non-macOS they are copied - /// alongside the exe in `zig-out/bin/`. Ignored on PSP. + /// alongside the exe in `zig-out/bin/`. Ignored on PSP and 3DS. resources: []const Resource = &.{}, + /// 3DS: SMDH long description (the second line shown in the HOME + /// menu detail panel). Falls back to "Built with Aether" when empty. + smdh_long_description: []const u8 = "", + /// 3DS: SMDH author string. Empty leaves the field blank. + smdh_author: []const u8 = "", + /// 3DS: 48x48 PNG icon embedded in the SMDH. When null, libctru's + /// `default_icon.png` is used. + smdh_icon: ?std.Build.LazyPath = null, + /// 3DS: directory (or pre-built `.romfs`) embedded into the 3DSX. + romfs: ?std.Build.LazyPath = null, pub const Resource = struct { /// Source file to copy. @@ -416,6 +468,8 @@ pub fn exportArtifact(owner: *std.Build, b: *std.Build, exe: *std.Build.Step.Com // register on the downstream project's builder. const psp_dep = owner.dependency("pspsdk", .{}); _ = pspEbootPipeline(b, exe, psp_dep, opts); + } else if (config.platform == .nintendo_3ds) { + threedsxPipeline(b, exe, opts); } else if (config.platform == .macos) { macosAppBundle(b, exe, opts); } else { @@ -660,6 +714,188 @@ fn pspEbootPipeline(b: *std.Build, exe: *std.Build.Step.Compile, psp_dep: *std.B return result; } +/// Compiles the zig-emitted C with devkitARM, links against libctru, and +/// packages the ELF (plus an SMDH and optional RomFS) into a `.3dsx` +/// homebrew bundle. Mirrors `pspEbootPipeline` for the PSP toolchain. +fn threedsxPipeline(b: *std.Build, exe: *std.Build.Step.Compile, opts: ExportOptions) void { + // Derive a sibling target for compiler_rt: same cpu/abi/endianness + // as the game (so the calling conventions and float ABI match + // libctru), but os=freestanding (sidesteps the 3DS-specific posix + // dependencies in std) and the default object format (so this + // module compiles natively to an ELF object the gcc driver can + // consume, rather than .c). devkitARM's libgcc.a doesn't ship the + // 128-bit-int compiler-rt entry points (`__multi3`/`__divti3`/etc.), + // so we provide them ourselves from zig's compiler_rt. + const game_target = exe.root_module.resolved_target.?; + var crt_query = game_target.query; + crt_query.os_tag = .freestanding; + crt_query.ofmt = null; + // Explicitly pin the cpu model to whatever the game target + // resolved to (arm.mpcore for the 3DS). Without this, swapping + // os_tag to .freestanding loses the os-derived cpu choice and + // zig falls back to a generic baseline that emits ARMv6T2+ + // instructions (e.g. `mls`) the ARMv6K MPCore doesn't decode — + // crashes show up as "undefined instruction" in compiler_rt + // helpers like `__udivmodsi4`. + crt_query.cpu_model = .{ .explicit = game_target.result.cpu.model }; + const crt_target = b.resolveTargetQuery(crt_query); + + const compiler_rt_path = b.pathJoin(&.{ + b.graph.zig_lib_directory.path orelse ".", + "compiler_rt.zig", + }); + const crt_obj = b.addObject(.{ + .name = "aether_3ds_compiler_rt", + .root_module = b.createModule(.{ + .root_source_file = .{ .cwd_relative = compiler_rt_path }, + .target = crt_target, + .optimize = .ReleaseSmall, + .strip = true, + }), + }); + + const dkp = devkitProPath(b); + + // Strip the libc-overlap symbols from the compiler_rt object. + // zig's compiler_rt re-exports `memset`/`memcpy`/`memmove` and + // their `__aeabi_*` shims as WEAK; the `__aeabi_memset` and + // `memset` versions form a recursive `bl` cycle that blows the + // stack on 32-bit ARM. Newlib has real implementations, but the + // linker won't reach for them while compiler_rt's weak version + // already resolves the reference. `--strip-symbol` drops the + // exports so the references stay unresolved at compiler_rt and + // the linker pulls newlib's strong implementations from libc.a. + const strip_libc = b.addSystemCommand(&.{ + b.pathJoin(&.{ dkp, "devkitARM/bin/arm-none-eabi-objcopy" }), + "--localize-symbol=memset", + "--localize-symbol=memcpy", + "--localize-symbol=memmove", + "--localize-symbol=memcmp", + "--localize-symbol=__memset", + "--localize-symbol=__memcpy", + "--localize-symbol=__memmove", + "--localize-symbol=__memcpy_chk", + "--localize-symbol=__aeabi_memset", + "--localize-symbol=__aeabi_memset4", + "--localize-symbol=__aeabi_memset8", + "--localize-symbol=__aeabi_memcpy", + "--localize-symbol=__aeabi_memcpy4", + "--localize-symbol=__aeabi_memcpy8", + "--localize-symbol=__aeabi_memmove", + "--localize-symbol=__aeabi_memmove4", + "--localize-symbol=__aeabi_memmove8", + "--localize-symbol=strlen", + "--localize-symbol=bcmp", + }); + strip_libc.addArtifactArg(crt_obj); + const crt_clean = strip_libc.addOutputFileArg("aether_3ds_compiler_rt.o"); + const gcc = b.pathJoin(&.{ dkp, "devkitARM/bin/arm-none-eabi-gcc" }); + const tool_3dsx = b.pathJoin(&.{ dkp, "tools/bin/3dsxtool" }); + const tool_smdh = b.pathJoin(&.{ dkp, "tools/bin/smdhtool" }); + const ctru_inc = b.pathJoin(&.{ dkp, "libctru/include" }); + const ctru_lib = b.pathJoin(&.{ dkp, "libctru/lib" }); + const default_icon = b.pathJoin(&.{ dkp, "libctru/default_icon.png" }); + const zig_h_src = b.pathJoin(&.{ b.graph.zig_lib_directory.path orelse ".", "zig.h" }); + + // zig.h hardcodes `zig_align(16)` for its `zig_i128`/`zig_u128` + // struct fallback (used when `__int128` isn't supported by the C + // compiler — gcc on 32-bit ARM is one such target). But zig's own + // layout pass uses 8-byte alignment for i128 on 32-bit ARM, so + // the `_Static_assert(_Alignof(...) == 8)` baked into the + // generated C fails against gcc's 16-byte view. Patch the four + // sites down to `zig_align(8)`. The patched copy lands in a + // dedicated include dir we point gcc at first. + const patch = b.addSystemCommand(&.{"sed"}); + patch.addArg("s/zig_align(16)/zig_align(8)/g"); + patch.addFileArg(.{ .cwd_relative = zig_h_src }); + const patched_zig_h = patch.captureStdOut(.{ .basename = "zig.h" }); + + const include_wf = b.addWriteFiles(); + _ = include_wf.addCopyFile(patched_zig_h, "zig.h"); + + // Standard 3DS arch flags from devkitPro's template Makefile. + const arch = [_][]const u8{ + "-march=armv6k", "-mtune=mpcore", "-mfloat-abi=hard", "-mtp=soft", + }; + + // Single-shot compile + link via the gcc driver. 3dsx.specs pulls + // in `_3dsx_crt0` (which calls our exported `main`) and the 3DSX + // linker script. + const link = b.addSystemCommand(&.{gcc}); + link.addArgs(&arch); + link.addArgs(&.{ + "-mword-relocations", "-ffunction-sections", + "-D__3DS__", "-DARM11", + "-O2", "-g", + "-specs=3dsx.specs", + // Pin the C standard to C11. zig.h picks `[[noreturn]]` under + // C23 but emits it in attribute-list position that gcc rejects; + // C11's `_Noreturn` is what zig's emitter actually targets. + "-std=gnu11", + // zig's -ofmt=c emitter treats `uintptr_t` and `uint32_t` as + // interchangeable on 32-bit ARM (they ARE the same width) but + // gcc 14+ promotes the resulting pointer-type mismatch from a + // warning to an error. Demote it and a couple of related + // chatters; we don't author this C and there's nothing + // actionable in the warnings. + "-Wno-incompatible-pointer-types", "-Wno-int-conversion", + "-Wno-builtin-declaration-mismatch", + }); + link.addArg(b.fmt("-I{s}", .{ctru_inc})); + link.addPrefixedDirectoryArg("-I", include_wf.getDirectory()); + link.addArg("-x"); + link.addArg("c"); + link.addArtifactArg(exe); + // Reset language so gcc treats subsequent inputs by extension; the + // compiler_rt object is ELF arm and `-x c` would mis-parse it. + link.addArg("-x"); + link.addArg("none"); + link.addFileArg(crt_clean); + link.addArg(b.fmt("-L{s}", .{ctru_lib})); + link.addArgs(&.{ "-lctru", "-lm" }); + link.addArg("-o"); + const elf = link.addOutputFileArg(b.fmt("{s}.elf", .{exe.name})); + + // SMDH metadata (HOME-menu name, description, author, icon). + const smdh_run = b.addSystemCommand(&.{ tool_smdh, "--create" }); + smdh_run.addArg(if (opts.title.len > 0) opts.title else exe.name); + smdh_run.addArg(if (opts.smdh_long_description.len > 0) + opts.smdh_long_description + else + "Built with Aether"); + smdh_run.addArg(opts.smdh_author); + if (opts.smdh_icon) |icon| + smdh_run.addFileArg(icon) + else + smdh_run.addArg(default_icon); + const smdh = smdh_run.addOutputFileArg(b.fmt("{s}.smdh", .{exe.name})); + + // ELF -> 3DSX. The smdh and (optional) romfs ride in via the + // `--smdh=` / `--romfs=` flag-form args. + const pack = b.addSystemCommand(&.{tool_3dsx}); + pack.addFileArg(elf); + const threedsx = pack.addOutputFileArg(b.fmt("{s}.3dsx", .{exe.name})); + pack.addPrefixedFileArg("--smdh=", smdh); + if (opts.romfs) |r| pack.addPrefixedDirectoryArg("--romfs=", r); + + if (opts.output_dir) |dir| { + const alloc = b.allocator; + b.getInstallStep().dependOn(&b.addInstallBinFile( + threedsx, + std.mem.concat(alloc, u8, &.{ dir, "/", exe.name, ".3dsx" }) catch @panic("OOM"), + ).step); + b.getInstallStep().dependOn(&b.addInstallBinFile( + elf, + std.mem.concat(alloc, u8, &.{ dir, "/", exe.name, ".elf" }) catch @panic("OOM"), + ).step); + } else { + b.getInstallStep().dependOn(&b.addInstallBinFile( + threedsx, + b.fmt("{s}.3dsx", .{exe.name}), + ).step); + } +} + /// Registers a shader pair for the game executable. Slang sources are /// compiled to SPIR-V (Vulkan) or GLSL (OpenGL) via slangc. On /// shaderless platforms (PSP), empty stubs are provided. @@ -745,20 +981,51 @@ pub fn build(b: *std.Build) void { exportArtifact(b, b, exe, config, .{ .title = "Aether", - .output_dir = "Aether-PSP", + .output_dir = switch (config.platform) { + .psp => "Aether-PSP", + .nintendo_3ds => "Aether-3DS", + else => null, + }, + .smdh_long_description = "Aether engine test app", + .smdh_author = "Aether", }); const run_step = b.step("run", "Run the app"); - const run_cmd = b.addRunArtifact(exe); - run_step.dependOn(&run_cmd.step); + if (config.platform == .nintendo_3ds) { + // 3DS can't run natively on the host. The 3DS-side homebrew + // launcher listens for incoming .3dsx pushes on port 17491; + // `3dslink` finds it via mDNS or accepts an explicit IP. + const dkp = devkitProPath(b); + const link_cmd = b.addSystemCommand(&.{b.pathJoin(&.{ dkp, "tools/bin/3dslink" })}); + if (b.option([]const u8, "3dslink-address", "3DS: target IP for 3dslink push (default: mDNS auto-discover)")) |ip| { + link_cmd.addArgs(&.{ "-a", ip }); + } + if (b.option(u32, "3dslink-retries", "3DS: 3dslink retry count (default: 10)")) |n| { + link_cmd.addArgs(&.{ "-r", b.fmt("{d}", .{n}) }); + } + if (b.option(bool, "3dslink-server", "3DS: pass -s so 3dslink stays listening after the upload (useful for some Rosalina versions and for stdout relay)") orelse false) { + link_cmd.addArg("-s"); + } + link_cmd.addArg(b.getInstallPath(.bin, "Aether-3DS/Aether.3dsx")); + link_cmd.step.dependOn(b.getInstallStep()); + if (b.args) |args| link_cmd.addArgs(args); - run_cmd.step.dependOn(b.getInstallStep()); - if (b.args) |args| { - run_cmd.addArgs(args); + const link_step = b.step("3dslink", "Push the 3dsx to a networked 3DS via 3dslink"); + link_step.dependOn(&link_cmd.step); + + // `zig build run` aliases to 3dslink for 3DS so the same + // command works across host/PSP/3DS workflows. + run_step.dependOn(&link_cmd.step); + } else { + const run_cmd = b.addRunArtifact(exe); + run_cmd.step.dependOn(b.getInstallStep()); + if (b.args) |args| run_cmd.addArgs(args); + run_step.dependOn(&run_cmd.step); } - // Engine unit tests (desktop only) - if (config.platform != .psp) { + // Engine unit tests (desktop only — PSP/3DS pull in symbols that + // can't be linked or analyzed under the test runner) + if (config.platform != .psp and config.platform != .nintendo_3ds) { const mod_tests = b.addTest(.{ .root_module = exe.root_module.import_table.get("aether").?, }); diff --git a/src/platform/3ds/3ds_audio.zig b/src/platform/3ds/3ds_audio.zig new file mode 100644 index 0000000..37410ff --- /dev/null +++ b/src/platform/3ds/3ds_audio.zig @@ -0,0 +1,34 @@ +//! 3DS audio backend stub. +//! +//! Empty slot-based PCM output that satisfies `audio_api.Interface`. +//! Real bring-up will route through libctru's NDSP (or CSND on older +//! firmwares). 24 voices is what NDSP exposes on retail hardware. + +const std = @import("std"); +const Stream = @import("../../audio/stream.zig").Stream; + +var audio_alloc: std.mem.Allocator = undefined; +var audio_io: std.Io = undefined; + +pub fn setup(alloc: std.mem.Allocator, io: std.Io) void { + audio_alloc = alloc; + audio_io = io; +} + +pub fn init() anyerror!void {} +pub fn deinit() void {} +pub fn update() void {} + +pub fn max_voices() u32 { + return 24; +} + +pub fn play_slot(_: u8, _: Stream) anyerror!void {} + +pub fn stop_slot(_: u8) void {} + +pub fn set_slot_gain_pan(_: u8, _: f32, _: f32) void {} + +pub fn is_slot_active(_: u8) bool { + return false; +} diff --git a/src/platform/3ds/3ds_gfx.zig b/src/platform/3ds/3ds_gfx.zig new file mode 100644 index 0000000..30a7a1a --- /dev/null +++ b/src/platform/3ds/3ds_gfx.zig @@ -0,0 +1,63 @@ +//! 3DS GPU backend stub. +//! +//! Empty template: every entry point satisfies `gfx_api.Interface` so the +//! engine compiles end-to-end on `-Dtarget=...-3ds`. Real bring-up will +//! plug libctru's GPU (citro3d / GPU command lists) in here. + +const std = @import("std"); +const Mat4 = @import("../../math/math.zig").Mat4; +const Rendering = @import("../../rendering/rendering.zig"); +const Pipeline = Rendering.Pipeline; +const Mesh = Rendering.mesh; +const Texture = Rendering.Texture; + +var render_alloc: std.mem.Allocator = undefined; +var render_io: std.Io = undefined; + +pub fn setup(alloc: std.mem.Allocator, io: std.Io) void { + render_alloc = alloc; + render_io = io; +} + +pub fn init() anyerror!void {} +pub fn deinit() void {} + +pub fn set_clear_color(_: f32, _: f32, _: f32, _: f32) void {} +pub fn set_alpha_blend(_: bool) void {} +pub fn set_depth_write(_: bool) void {} +pub fn set_fog(_: bool, _: f32, _: f32, _: f32, _: f32, _: f32) void {} +pub fn set_clip_planes(_: bool) void {} +pub fn set_proj_matrix(_: *const Mat4) void {} +pub fn set_view_matrix(_: *const Mat4) void {} + +pub fn start_frame() bool { + return false; +} + +pub fn end_frame() void {} +pub fn clear_depth() void {} +pub fn set_vsync(_: bool) void {} + +pub fn create_pipeline(_: Pipeline.VertexLayout, _: ?[:0]align(4) const u8, _: ?[:0]align(4) const u8) anyerror!Pipeline.Handle { + return 0; +} + +pub fn destroy_pipeline(_: Pipeline.Handle) void {} +pub fn bind_pipeline(_: Pipeline.Handle) void {} + +pub fn create_mesh(_: Pipeline.Handle) anyerror!Mesh.Handle { + return 0; +} + +pub fn destroy_mesh(_: Mesh.Handle) void {} +pub fn update_mesh(_: Mesh.Handle, _: []const u8) void {} +pub fn draw_mesh(_: Mesh.Handle, _: *const Mat4, _: usize, _: Mesh.Primitive) void {} + +pub fn create_texture(_: u32, _: u32, _: []align(16) u8) anyerror!Texture.Handle { + return 0; +} + +pub fn update_texture(_: Texture.Handle, _: []align(16) u8) void {} +pub fn bind_texture(_: Texture.Handle) void {} +pub fn destroy_texture(_: Texture.Handle) void {} +pub fn force_texture_resident(_: Texture.Handle) void {} diff --git a/src/platform/3ds/3ds_thread.zig b/src/platform/3ds/3ds_thread.zig new file mode 100644 index 0000000..3582702 --- /dev/null +++ b/src/platform/3ds/3ds_thread.zig @@ -0,0 +1,25 @@ +//! 3DS thread backend stub. +//! +//! Real implementation will wrap libctru's `threadCreate`/`threadJoin`. +//! Until then `spawn` returns `error.Unsupported` so callers fail fast +//! instead of silently returning a bogus handle. + +const std = @import("std"); +const api = @import("../thread_api.zig"); + +pub const Handle = u32; + +pub fn spawn(cfg: api.Config, comptime func: anytype, args: anytype) !Handle { + _ = cfg; + _ = func; + _ = args; + return error.Unsupported; +} + +pub fn join(_: Handle) void {} + +pub fn set_priority(_: Handle, _: api.Priority) anyerror!void {} + +pub fn current_priority() api.Priority { + return .normal; +} diff --git a/src/platform/3ds/input.zig b/src/platform/3ds/input.zig new file mode 100644 index 0000000..7ad25db --- /dev/null +++ b/src/platform/3ds/input.zig @@ -0,0 +1,24 @@ +//! 3DS input backend stub. +//! +//! Wires up to libctru's `hid` (button state, circle pad, touch) once an +//! SDK is available. For now every reading reads as neutral so the action +//! system is silent on 3DS. + +const std = @import("std"); +const core = @import("../../core/input/input.zig"); + +pub fn setup(_: std.mem.Allocator, _: std.Io) void {} + +pub fn init() anyerror!void {} + +pub fn deinit() void {} + +pub fn pump() void { + core.signal_frame_boundary(); +} + +pub fn apply_cursor_mode(_: core.CursorMode) void {} + +pub fn begin_text_input_session(_: core.TextInputTarget, _: core.TextInputOptions) anyerror!void {} + +pub fn end_text_input_session() void {} diff --git a/src/platform/3ds/services.zig b/src/platform/3ds/services.zig new file mode 100644 index 0000000..2b2c89e --- /dev/null +++ b/src/platform/3ds/services.zig @@ -0,0 +1,30 @@ +//! 3DS system services / entry shim. +//! +//! Exports a C-callable `main` that hands control to the user's Zig +//! `main`. `Init` is currently `undefined` — invoking the engine on +//! hardware will crash on the first real allocation. The shim exists +//! so the engine call graph is reachable from `-ofmt=c` codegen and +//! so libctru integration has a clear landing pad: when libctru is +//! wired in, build a real `std.process.Init` here (libctru-backed +//! `ArenaAllocator`, an `Io` implementation talking to the FS / SOC +//! services, an `Environ.Map`) and pass it to `root.main`. +//! +//! Stack default: libctru's 32 KB is far too small for any std-using +//! Zig code path. We override `__stacksize__` (a `WEAK` symbol in +//! libctru) with a strong export. 1 MB is comfortable; bump if engine +//! frames grow. + +const std = @import("std"); + +comptime { + @export(&entry, .{ .name = "main" }); + @export(&stack_size, .{ .name = "__stacksize__" }); +} + +var stack_size: u32 = 1 * 1024 * 1024; + +fn entry() callconv(.c) c_int { + const init: std.process.Init = undefined; + @import("root").main(init) catch return 1; + return 0; +} diff --git a/src/platform/3ds/surface.zig b/src/platform/3ds/surface.zig new file mode 100644 index 0000000..b1a12fb --- /dev/null +++ b/src/platform/3ds/surface.zig @@ -0,0 +1,29 @@ +//! 3DS surface stub. +//! +//! Top screen of an O3DS is 400x240; bottom touch screen is 320x240. The +//! real backend will likely advertise the top screen here and expose the +//! bottom one separately. For now `update` returns true so the engine +//! loop keeps ticking even though nothing is drawn. + +const std = @import("std"); +const Self = @This(); + +alloc: std.mem.Allocator, + +pub fn init(_: *Self, _: u32, _: u32, _: [:0]const u8, _: bool, _: bool, _: bool) anyerror!void {} + +pub fn deinit(_: *Self) void {} + +pub fn update(_: *Self) bool { + return true; +} + +pub fn draw(_: *Self) void {} + +pub fn get_width(_: *Self) u32 { + return 400; +} + +pub fn get_height(_: *Self) u32 { + return 240; +} diff --git a/src/platform/audio.zig b/src/platform/audio.zig index 6fc0568..9e0e82e 100644 --- a/src/platform/audio.zig +++ b/src/platform/audio.zig @@ -12,6 +12,8 @@ pub const Api = if (options.config.audio == .none) @import("headless/headless_audio.zig") else if (builtin.os.tag == .psp) @import("psp/psp_audio.zig") +else if (builtin.os.tag == .@"3ds") + @import("3ds/3ds_audio.zig") else @import("glfw/audio.zig"); diff --git a/src/platform/gfx.zig b/src/platform/gfx.zig index 411938b..e583dda 100644 --- a/src/platform/gfx.zig +++ b/src/platform/gfx.zig @@ -10,7 +10,10 @@ const surface_iface = @import("surface.zig"); /// `gfx.api.start_frame()` resolve to direct function calls with no /// indirection. pub const Api = switch (options.config.gfx) { - .default => @import("psp/psp_gfx_ge.zig"), + .default => if (builtin.os.tag == .@"3ds") + @import("3ds/3ds_gfx.zig") + else + @import("psp/psp_gfx_ge.zig"), .opengl => @import("glfw/opengl/opengl_gfx.zig"), .vulkan => @import("glfw/vulkan/vulkan_gfx.zig"), .headless => @import("headless/headless_gfx.zig"), @@ -22,6 +25,8 @@ pub const Surface = if (options.config.gfx == .headless) @import("headless/surface.zig") else if (builtin.os.tag == .psp) @import("psp/surface.zig") +else if (builtin.os.tag == .@"3ds") + @import("3ds/surface.zig") else @import("glfw/surface.zig"); diff --git a/src/platform/input.zig b/src/platform/input.zig index 4fe0232..53b620d 100644 --- a/src/platform/input.zig +++ b/src/platform/input.zig @@ -11,6 +11,8 @@ pub const Api = if (options.config.gfx == .headless) @import("headless/input.zig") else if (builtin.os.tag == .psp) @import("psp/input.zig") +else if (builtin.os.tag == .@"3ds") + @import("3ds/input.zig") else @import("glfw/input.zig"); diff --git a/src/platform/thread.zig b/src/platform/thread.zig index d043677..6567b25 100644 --- a/src/platform/thread.zig +++ b/src/platform/thread.zig @@ -5,6 +5,8 @@ const thread_api = @import("thread_api.zig"); pub const Api = if (builtin.os.tag == .psp) @import("psp/psp_thread.zig") +else if (builtin.os.tag == .@"3ds") + @import("3ds/3ds_thread.zig") else @import("std_thread.zig"); diff --git a/src/root.zig b/src/root.zig index 4b07796..bdbed2f 100644 --- a/src/root.zig +++ b/src/root.zig @@ -13,6 +13,14 @@ pub const ctx_to_self = Util.ctx_to_self; /// Only available when `platform == .psp`; evaluates to `void` otherwise. pub const Psp = if (platform == .psp) @import("platform/psp/psp_dialogs.zig") else void; +// Pull in the 3DS entry shim on 3DS builds. Its comptime block +// `@export`s a C-callable `main` so `-ofmt=c` emits the full engine. +comptime { + if (platform == .nintendo_3ds) { + _ = @import("platform/3ds/services.zig"); + } +} + /// Comptime-known platform and graphics backend, resolved from build options. /// User code can switch on these for per-platform configuration without /// importing the build options module directly. diff --git a/test/main.zig b/test/main.zig index faf8d18..4c4bf59 100644 --- a/test/main.zig +++ b/test/main.zig @@ -18,14 +18,37 @@ comptime { pub const psp_stack_size: u32 = 256 * 1024; -// PSP: override panic/IO handlers that would otherwise pull in posix symbols. -pub const panic = if (ae.platform == .psp) sdk.extra.debug.panic else std.debug.FullPanic(std.debug.defaultPanic); -pub const std_options_debug_threaded_io = if (ae.platform == .psp) null else std.Io.Threaded.global_single_threaded; -pub const std_options_debug_io = if (ae.platform == .psp) sdk.extra.Io.psp_io else std.Io.Threaded.global_single_threaded.io(); -pub const std_options_cwd = if (ae.platform == .psp) psp_cwd else null; +// PSP and 3DS override panic/IO handlers that would otherwise pull in +// posix symbols (Io.Threaded references std.posix decls that don't exist +// for these targets). 3DS doesn't have an SDK wired up yet, so its +// debug_io is `undefined` for now — invoking it before libctru lands is +// UB, which matches the placeholder entry in `platform/3ds/services.zig`. +const is_freestanding_console = ae.platform == .psp or ae.platform == .nintendo_3ds; +// 3DS uses `no_panic` while libctru integration is stubbed: `std_options_debug_io` +// is `undefined` on 3DS, so `defaultPanic` would deref garbage when formatting, +// re-panic, and recurse until the stack blows. `no_panic` keeps the first fault +// as the only fault — a single ARM exception you can read off the screen rather +// than an unbounded loop through `memset`. +pub const panic = if (ae.platform == .psp) sdk.extra.debug.panic + else if (ae.platform == .nintendo_3ds) std.debug.no_panic + else std.debug.FullPanic(std.debug.defaultPanic); +pub const std_options_debug_threaded_io = if (is_freestanding_console) null else std.Io.Threaded.global_single_threaded; +pub const std_options_debug_io: std.Io = + if (ae.platform == .psp) sdk.extra.Io.psp_io + else if (ae.platform == .nintendo_3ds) undefined + else std.Io.Threaded.global_single_threaded.io(); +pub const std_options_cwd = + if (ae.platform == .psp) psp_cwd + else if (ae.platform == .nintendo_3ds) stub_cwd + else null; fn psp_cwd() std.Io.Dir { return .{ .handle = -1 }; } +fn stub_cwd() std.Io.Dir { + // Dir.Handle resolves to `void` on 3DS (posix.fd_t falls back to + // void in the empty system struct). Init with `{}` to match. + return .{ .handle = {} }; +} const Vertex = extern struct { uv: [2]i16, From 5c860ea5c6f71a3966fbd6bc6774cc47b5f732a7 Mon Sep 17 00:00:00 2001 From: Nathan Bourgeois Date: Tue, 28 Apr 2026 00:49:28 -0400 Subject: [PATCH 02/44] Nintendo Switch build --- build.zig | 261 +++++++++++++++++++++++--- src/platform/audio.zig | 2 + src/platform/gfx.zig | 4 + src/platform/input.zig | 2 + src/platform/switch/input.zig | 24 +++ src/platform/switch/services.zig | 28 +++ src/platform/switch/surface.zig | 30 +++ src/platform/switch/switch_audio.zig | 35 ++++ src/platform/switch/switch_gfx.zig | 64 +++++++ src/platform/switch/switch_thread.zig | 25 +++ src/platform/thread.zig | 3 + src/root.zig | 8 +- test/main.zig | 41 ++-- 13 files changed, 476 insertions(+), 51 deletions(-) create mode 100644 src/platform/switch/input.zig create mode 100644 src/platform/switch/services.zig create mode 100644 src/platform/switch/surface.zig create mode 100644 src/platform/switch/switch_audio.zig create mode 100644 src/platform/switch/switch_gfx.zig create mode 100644 src/platform/switch/switch_thread.zig diff --git a/build.zig b/build.zig index d72d621..100e6ae 100644 --- a/build.zig +++ b/build.zig @@ -10,6 +10,12 @@ pub const Platform = enum { /// serializer can't emit `.@"3ds"` as an enum value literal, so the /// internal Aether tag uses a leading-letter form. nintendo_3ds, + /// Nintendo Switch. Zig 0.16 has no `switch`/`horizon` OS tag, so the + /// canonical target is `aarch64-freestanding-none` and we can't infer + /// the platform from `target.os.tag` alone. Opt in with + /// `-Dnintendo-switch=true`; `Config.resolve` then promotes a + /// freestanding aarch64 target to this variant. + nintendo_switch, }; pub const Gfx = enum { @@ -58,15 +64,26 @@ pub const Config = struct { use_cwd: bool = false, pub fn resolve(target: std.Build.ResolvedTarget, overrides: Overrides) Config { - const plat: Platform = switch (target.result.os.tag) { - .windows => .windows, - .macos => .macos, - .linux => .linux, - .psp => .psp, - .@"3ds" => .nintendo_3ds, - else => |t| { - std.debug.panic("Unsupported OS! {}\n", .{t}); - }, + const plat: Platform = blk: { + if (overrides.nintendo_switch == true) { + if (target.result.cpu.arch != .aarch64 or target.result.os.tag != .freestanding) { + std.debug.panic( + "-Dnintendo-switch=true requires -Dtarget=aarch64-freestanding-none (got {s}-{s})\n", + .{ @tagName(target.result.cpu.arch), @tagName(target.result.os.tag) }, + ); + } + break :blk .nintendo_switch; + } + break :blk switch (target.result.os.tag) { + .windows => .windows, + .macos => .macos, + .linux => .linux, + .psp => .psp, + .@"3ds" => .nintendo_3ds, + else => |t| { + std.debug.panic("Unsupported OS! {}\n", .{t}); + }, + }; }; const default_gfx: Gfx = switch (target.result.os.tag) { @@ -100,6 +117,9 @@ pub const Config = struct { psp_display_mode: ?PspDisplayMode = null, psp_mipmaps: ?bool = null, use_cwd: ?bool = null, + /// Promotes an `aarch64-freestanding-none` target to the + /// `nintendo_switch` platform. No effect when null/false. + nintendo_switch: ?bool = null, }; }; @@ -171,11 +191,12 @@ fn devkitProPath(b: *std.Build) []const u8 { pub fn addGame(owner: *std.Build, b: *std.Build, opts: GameOptions) *std.Build.Step.Compile { const config = Config.resolve(opts.target, opts.overrides); - // 3DS forces ofmt=c — there's no Zig-native backend for the Horizon - // ARM target yet, so we emit C and let an external 3DS toolchain - // (devkitARM/libctru) compile the result. Stub backends keep the - // engine compiling end-to-end until a real SDK is wired in. - const target = if (config.platform == .nintendo_3ds) blk: { + // 3DS and Switch force ofmt=c — there's no Zig-native backend for + // either Horizon target yet, so we emit C and let an external + // toolchain (devkitARM/libctru on 3DS, devkitA64/libnx on Switch) + // compile the result. Stub backends keep the engine compiling + // end-to-end until a real SDK is wired in. + const target = if (config.platform == .nintendo_3ds or config.platform == .nintendo_switch) blk: { var q = opts.target.query; q.ofmt = .c; break :blk b.resolveTargetQuery(q); @@ -201,9 +222,9 @@ pub fn addGame(owner: *std.Build, b: *std.Build, opts: GameOptions) *std.Build.S if (psp_dep) |pd| { mod.addImport("pspsdk", pd.module("pspsdk")); - } else if (config.platform == .nintendo_3ds) { - // No 3DS SDK is wired in yet; stubs satisfy every backend - // contract so addImport calls are unnecessary here. + } else if (config.platform == .nintendo_3ds or config.platform == .nintendo_switch) { + // No 3DS/Switch SDK is wired in yet; stubs satisfy every + // backend contract so addImport calls are unnecessary here. } else { const zglfw = owner.dependency("zglfw", .{ .target = target, @@ -294,11 +315,13 @@ pub fn addGame(owner: *std.Build, b: *std.Build, opts: GameOptions) *std.Build.S exe.subsystem = .windows; } - if (config.platform == .nintendo_3ds) { - // std/start.zig opts `.@"3ds"` out of exporting a default entry - // symbol, so without an explicit entry the linker DCEs `main` - // and the emitted C is constants-only. Force `main` to keep - // the whole call graph alive for the external 3DS toolchain. + if (config.platform == .nintendo_3ds or config.platform == .nintendo_switch) { + // std/start.zig opts `.@"3ds"` and freestanding out of + // exporting a default entry symbol, so without an explicit + // entry the linker DCEs `main` and the emitted C is + // constants-only. Force `main` to keep the whole call graph + // alive for the external toolchain. (libnx's switch.specs + // also `--require-defined=main` at link time.) exe.entry = .{ .symbol_name = "main" }; } @@ -321,8 +344,8 @@ pub fn addHeadless(owner: *std.Build, b: *std.Build, opts: HeadlessOptions) *std config.gfx = .headless; config.audio = .none; - // 3DS forces ofmt=c (see addGame for details). - const target = if (config.platform == .nintendo_3ds) blk: { + // 3DS and Switch force ofmt=c (see addGame for details). + const target = if (config.platform == .nintendo_3ds or config.platform == .nintendo_switch) blk: { var q = opts.target.query; q.ofmt = .c; break :blk b.resolveTargetQuery(q); @@ -445,6 +468,18 @@ pub const ExportOptions = struct { smdh_icon: ?std.Build.LazyPath = null, /// 3DS: directory (or pre-built `.romfs`) embedded into the 3DSX. romfs: ?std.Build.LazyPath = null, + /// Switch: NACP author string (shows under the title in the HOME + /// menu). Empty falls back to "Aether". + switch_author: []const u8 = "", + /// Switch: NACP version string (e.g. "1.0.0"). Empty falls back to + /// "1.0.0". + switch_version: []const u8 = "", + /// Switch: 256x256 JPEG icon embedded in the NRO. When null, libnx's + /// `default_icon.jpg` is used. + switch_icon: ?std.Build.LazyPath = null, + /// Switch: directory embedded into the NRO as RomFS. When null, no + /// RomFS is attached. + switch_romfs: ?std.Build.LazyPath = null, pub const Resource = struct { /// Source file to copy. @@ -470,6 +505,8 @@ pub fn exportArtifact(owner: *std.Build, b: *std.Build, exe: *std.Build.Step.Com _ = pspEbootPipeline(b, exe, psp_dep, opts); } else if (config.platform == .nintendo_3ds) { threedsxPipeline(b, exe, opts); + } else if (config.platform == .nintendo_switch) { + switchNroPipeline(b, exe, opts); } else if (config.platform == .macos) { macosAppBundle(b, exe, opts); } else { @@ -896,6 +933,145 @@ fn threedsxPipeline(b: *std.Build, exe: *std.Build.Step.Compile, opts: ExportOpt } } +/// Compiles the zig-emitted C with devkitA64, links against libnx, and +/// packages the ELF (plus a NACP and optional RomFS) into a `.nro` +/// homebrew bundle. Mirrors `threedsxPipeline` for the Switch toolchain. +fn switchNroPipeline(b: *std.Build, exe: *std.Build.Step.Compile, opts: ExportOptions) void { + // aarch64 GCC supports __int128 natively, so we don't need the + // `zig.h` align(16) -> align(8) patch the 3DS pipeline applies. + // + // We do still need a compiler_rt object: zig.h calls helpers like + // `__floatunsisf` / `__floatundidf` / `__floatdisf` unconditionally, + // but devkitA64's libgcc doesn't ship them — gcc on aarch64 with + // hardware FP inlines these casts as `ucvtf`/`scvtf`, so the + // helpers are dead code in normal compilations. Zig's emitted C + // takes the slow path, so we drop in zig's own compiler_rt to + // satisfy the references. Like the 3DS pipeline we localize + // symbols that overlap newlib (memset/memcpy/...) so the linker + // pulls newlib's strong implementations. + const game_target = exe.root_module.resolved_target.?; + var crt_query = game_target.query; + crt_query.os_tag = .freestanding; + crt_query.ofmt = null; + crt_query.cpu_model = .{ .explicit = game_target.result.cpu.model }; + const crt_target = b.resolveTargetQuery(crt_query); + + const compiler_rt_path = b.pathJoin(&.{ + b.graph.zig_lib_directory.path orelse ".", + "compiler_rt.zig", + }); + const crt_obj = b.addObject(.{ + .name = "aether_switch_compiler_rt", + .root_module = b.createModule(.{ + .root_source_file = .{ .cwd_relative = compiler_rt_path }, + .target = crt_target, + .optimize = .ReleaseSmall, + .strip = true, + // Switch homebrew uses libnx's switch.specs which links + // with `-z text`. PIC is mandatory for any object that + // ends up in the read-only .text segment, otherwise the + // linker rejects the dynamic absolute relocations. + .pic = true, + }), + }); + + const dkp = devkitProPath(b); + + const strip_libc = b.addSystemCommand(&.{ + b.pathJoin(&.{ dkp, "devkitA64/bin/aarch64-none-elf-objcopy" }), + "--localize-symbol=memset", + "--localize-symbol=memcpy", + "--localize-symbol=memmove", + "--localize-symbol=memcmp", + "--localize-symbol=strlen", + "--localize-symbol=bcmp", + }); + strip_libc.addArtifactArg(crt_obj); + const crt_clean = strip_libc.addOutputFileArg("aether_switch_compiler_rt.o"); + const gcc = b.pathJoin(&.{ dkp, "devkitA64/bin/aarch64-none-elf-gcc" }); + const tool_elf2nro = b.pathJoin(&.{ dkp, "tools/bin/elf2nro" }); + const tool_nacp = b.pathJoin(&.{ dkp, "tools/bin/nacptool" }); + const libnx_inc = b.pathJoin(&.{ dkp, "libnx/include" }); + const libnx_lib = b.pathJoin(&.{ dkp, "libnx/lib" }); + const libnx_specs = b.pathJoin(&.{ dkp, "libnx/switch.specs" }); + const default_icon = b.pathJoin(&.{ dkp, "libnx/default_icon.jpg" }); + + // Standard Switch arch flags from devkitPro's switch_rules / + // example Makefiles. `-mtp=soft` matches what libnx is built + // against; mismatching the TLS access mode crashes on the first + // thread-local read. + const arch = [_][]const u8{ + "-march=armv8-a+crc+crypto", "-mtune=cortex-a57", "-mtp=soft", "-fPIE", + }; + + const link = b.addSystemCommand(&.{gcc}); + link.addArgs(&arch); + link.addArgs(&.{ + "-ffunction-sections", "-fdata-sections", + "-D__SWITCH__", "-O2", + "-g", b.fmt("-specs={s}", .{libnx_specs}), + // Pin the C standard to C11 (zig.h targets `_Noreturn`, not + // C23's `[[noreturn]]`). + "-std=gnu11", + // zig's -ofmt=c emitter has known pointer/int-conversion + // mismatches that gcc 14+ promotes to errors. We don't author + // the C, so demote them. + "-Wno-incompatible-pointer-types", + "-Wno-int-conversion", "-Wno-builtin-declaration-mismatch", + }); + link.addArg(b.fmt("-I{s}", .{libnx_inc})); + // zig's emitted C `#include "zig.h"`. The header lives in zig's + // own lib directory; point gcc at it. aarch64 GCC's __int128 + // alignment matches zig's, so no patching is needed (unlike 3DS). + link.addArg(b.fmt("-I{s}", .{b.graph.zig_lib_directory.path orelse "."})); + link.addArg("-x"); + link.addArg("c"); + link.addArtifactArg(exe); + link.addArg("-x"); + link.addArg("none"); + link.addFileArg(crt_clean); + link.addArg(b.fmt("-L{s}", .{libnx_lib})); + link.addArgs(&.{ "-lnx", "-lm" }); + link.addArg("-o"); + const elf = link.addOutputFileArg(b.fmt("{s}.elf", .{exe.name})); + + // NACP metadata (HOME-menu title, author, version). + const nacp_run = b.addSystemCommand(&.{ tool_nacp, "--create" }); + nacp_run.addArg(if (opts.title.len > 0) opts.title else exe.name); + nacp_run.addArg(if (opts.switch_author.len > 0) opts.switch_author else "Aether"); + nacp_run.addArg(if (opts.switch_version.len > 0) opts.switch_version else "1.0.0"); + const nacp = nacp_run.addOutputFileArg(b.fmt("{s}.nacp", .{exe.name})); + + // ELF -> NRO. The icon, NACP, and (optional) romfs ride in via + // flag-form args. + const pack = b.addSystemCommand(&.{tool_elf2nro}); + pack.addFileArg(elf); + const nro = pack.addOutputFileArg(b.fmt("{s}.nro", .{exe.name})); + if (opts.switch_icon) |icon| + pack.addPrefixedFileArg("--icon=", icon) + else + pack.addArg(b.fmt("--icon={s}", .{default_icon})); + pack.addPrefixedFileArg("--nacp=", nacp); + if (opts.switch_romfs) |r| pack.addPrefixedDirectoryArg("--romfsdir=", r); + + if (opts.output_dir) |dir| { + const alloc = b.allocator; + b.getInstallStep().dependOn(&b.addInstallBinFile( + nro, + std.mem.concat(alloc, u8, &.{ dir, "/", exe.name, ".nro" }) catch @panic("OOM"), + ).step); + b.getInstallStep().dependOn(&b.addInstallBinFile( + elf, + std.mem.concat(alloc, u8, &.{ dir, "/", exe.name, ".elf" }) catch @panic("OOM"), + ).step); + } else { + b.getInstallStep().dependOn(&b.addInstallBinFile( + nro, + b.fmt("{s}.nro", .{exe.name}), + ).step); + } +} + /// Registers a shader pair for the game executable. Slang sources are /// compiled to SPIR-V (Vulkan) or GLSL (OpenGL) via slangc. On /// shaderless platforms (PSP), empty stubs are provided. @@ -963,6 +1139,7 @@ pub fn build(b: *std.Build) void { .psp_display_mode = b.option(PspDisplayMode, "psp-display", "PSP display mode: rgba8888 (32-bit, default) or rgb565 (16-bit)"), .psp_mipmaps = b.option(bool, "psp-mipmaps", "PSP: generate mip levels for VRAM-resident textures (default: false)"), .use_cwd = b.option(bool, "use-cwd", "Force resources+data dirs to CWD (debug/CI convenience; default: false)"), + .nintendo_switch = b.option(bool, "nintendo-switch", "Build for Nintendo Switch (requires -Dtarget=aarch64-freestanding-none and devkitA64/libnx)"), }; const config = Config.resolve(target, overrides); @@ -984,6 +1161,7 @@ pub fn build(b: *std.Build) void { .output_dir = switch (config.platform) { .psp => "Aether-PSP", .nintendo_3ds => "Aether-3DS", + .nintendo_switch => "Aether-Switch", else => null, }, .smdh_long_description = "Aether engine test app", @@ -1016,6 +1194,35 @@ pub fn build(b: *std.Build) void { // `zig build run` aliases to 3dslink for 3DS so the same // command works across host/PSP/3DS workflows. run_step.dependOn(&link_cmd.step); + } else if (config.platform == .nintendo_switch) { + // Switch can't run natively on the host. nxlink pushes the + // .nro to nx-hbloader on a networked Switch (mDNS by default, + // explicit IP via -a). + const dkp = devkitProPath(b); + const link_cmd = b.addSystemCommand(&.{b.pathJoin(&.{ dkp, "tools/bin/nxlink" })}); + if (b.option([]const u8, "nxlink-address", "Switch: target IP for nxlink push (default: mDNS auto-discover)")) |ip| { + link_cmd.addArgs(&.{ "-a", ip }); + } + if (b.option(u32, "nxlink-retries", "Switch: nxlink retry count (default: 10)")) |n| { + link_cmd.addArgs(&.{ "-r", b.fmt("{d}", .{n}) }); + } + if (b.option(bool, "nxlink-server", "Switch: pass -s so nxlink stays listening after upload (relays stdout/stderr from nro)") orelse false) { + link_cmd.addArg("-s"); + } + link_cmd.addArg(b.getInstallPath(.bin, "Aether-Switch/Aether.nro")); + link_cmd.step.dependOn(b.getInstallStep()); + if (b.args) |args| { + // nxlink takes nro args after a `--args` separator. + link_cmd.addArg("--args"); + link_cmd.addArgs(args); + } + + const link_step = b.step("nxlink", "Push the nro to a networked Switch via nxlink"); + link_step.dependOn(&link_cmd.step); + + // `zig build run` aliases to nxlink for Switch so the same + // command works across host/PSP/3DS/Switch workflows. + run_step.dependOn(&link_cmd.step); } else { const run_cmd = b.addRunArtifact(exe); run_cmd.step.dependOn(b.getInstallStep()); @@ -1023,9 +1230,9 @@ pub fn build(b: *std.Build) void { run_step.dependOn(&run_cmd.step); } - // Engine unit tests (desktop only — PSP/3DS pull in symbols that - // can't be linked or analyzed under the test runner) - if (config.platform != .psp and config.platform != .nintendo_3ds) { + // Engine unit tests (desktop only — PSP/3DS/Switch pull in symbols + // that can't be linked or analyzed under the test runner) + if (config.platform != .psp and config.platform != .nintendo_3ds and config.platform != .nintendo_switch) { const mod_tests = b.addTest(.{ .root_module = exe.root_module.import_table.get("aether").?, }); diff --git a/src/platform/audio.zig b/src/platform/audio.zig index 9e0e82e..bf7ba8a 100644 --- a/src/platform/audio.zig +++ b/src/platform/audio.zig @@ -14,6 +14,8 @@ else if (builtin.os.tag == .psp) @import("psp/psp_audio.zig") else if (builtin.os.tag == .@"3ds") @import("3ds/3ds_audio.zig") +else if (options.config.platform == .nintendo_switch) + @import("switch/switch_audio.zig") else @import("glfw/audio.zig"); diff --git a/src/platform/gfx.zig b/src/platform/gfx.zig index e583dda..36252e8 100644 --- a/src/platform/gfx.zig +++ b/src/platform/gfx.zig @@ -12,6 +12,8 @@ const surface_iface = @import("surface.zig"); pub const Api = switch (options.config.gfx) { .default => if (builtin.os.tag == .@"3ds") @import("3ds/3ds_gfx.zig") + else if (options.config.platform == .nintendo_switch) + @import("switch/switch_gfx.zig") else @import("psp/psp_gfx_ge.zig"), .opengl => @import("glfw/opengl/opengl_gfx.zig"), @@ -27,6 +29,8 @@ else if (builtin.os.tag == .psp) @import("psp/surface.zig") else if (builtin.os.tag == .@"3ds") @import("3ds/surface.zig") +else if (options.config.platform == .nintendo_switch) + @import("switch/surface.zig") else @import("glfw/surface.zig"); diff --git a/src/platform/input.zig b/src/platform/input.zig index 53b620d..0b134b7 100644 --- a/src/platform/input.zig +++ b/src/platform/input.zig @@ -13,6 +13,8 @@ else if (builtin.os.tag == .psp) @import("psp/input.zig") else if (builtin.os.tag == .@"3ds") @import("3ds/input.zig") +else if (options.config.platform == .nintendo_switch) + @import("switch/input.zig") else @import("glfw/input.zig"); diff --git a/src/platform/switch/input.zig b/src/platform/switch/input.zig new file mode 100644 index 0000000..f5edce2 --- /dev/null +++ b/src/platform/switch/input.zig @@ -0,0 +1,24 @@ +//! Switch input backend stub. +//! +//! Wires up to libnx's `hid` (pads, touch, motion) once an SDK is +//! available. For now every reading reads as neutral so the action +//! system is silent on Switch. + +const std = @import("std"); +const core = @import("../../core/input/input.zig"); + +pub fn setup(_: std.mem.Allocator, _: std.Io) void {} + +pub fn init() anyerror!void {} + +pub fn deinit() void {} + +pub fn pump() void { + core.signal_frame_boundary(); +} + +pub fn apply_cursor_mode(_: core.CursorMode) void {} + +pub fn begin_text_input_session(_: core.TextInputTarget, _: core.TextInputOptions) anyerror!void {} + +pub fn end_text_input_session() void {} diff --git a/src/platform/switch/services.zig b/src/platform/switch/services.zig new file mode 100644 index 0000000..5067f50 --- /dev/null +++ b/src/platform/switch/services.zig @@ -0,0 +1,28 @@ +//! Switch system services / entry shim. +//! +//! Exports a C-callable `main` that hands control to the user's Zig +//! `main`. `Init` is currently `undefined` — invoking the engine on +//! hardware will crash on the first real allocation. The shim exists +//! so the engine call graph is reachable from `-ofmt=c` codegen and +//! so libnx integration has a clear landing pad: when libnx is wired +//! in, build a real `std.process.Init` here (libnx-backed allocator, +//! an `Io` implementation talking to fs:srv / sockets, an +//! `Environ.Map`) and pass it to `root.main`. +//! +//! libnx's switch.specs links with `--require-defined=main`, which +//! pulls a strong `main` from libnx's crt0 by default. We shadow it +//! with this Zig export — same name, weakness doesn't matter since +//! ld picks the first definition seen — to route the entry through +//! Aether instead of libnx's nnMain wrapper. + +const std = @import("std"); + +comptime { + @export(&entry, .{ .name = "main" }); +} + +fn entry(_: c_int, _: [*c][*c]u8) callconv(.c) c_int { + const init: std.process.Init = undefined; + @import("root").main(init) catch return 1; + return 0; +} diff --git a/src/platform/switch/surface.zig b/src/platform/switch/surface.zig new file mode 100644 index 0000000..ea4d539 --- /dev/null +++ b/src/platform/switch/surface.zig @@ -0,0 +1,30 @@ +//! Switch surface stub. +//! +//! Switch's framebuffer is 1280x720 in handheld mode and 1920x1080 +//! docked. We advertise 1280x720 so the engine has a sane default; +//! a real backend will query `appletGetOperationMode` and resize on +//! dock transitions. `update` returns true so the engine loop keeps +//! ticking even though nothing is drawn. + +const std = @import("std"); +const Self = @This(); + +alloc: std.mem.Allocator, + +pub fn init(_: *Self, _: u32, _: u32, _: [:0]const u8, _: bool, _: bool, _: bool) anyerror!void {} + +pub fn deinit(_: *Self) void {} + +pub fn update(_: *Self) bool { + return true; +} + +pub fn draw(_: *Self) void {} + +pub fn get_width(_: *Self) u32 { + return 1280; +} + +pub fn get_height(_: *Self) u32 { + return 720; +} diff --git a/src/platform/switch/switch_audio.zig b/src/platform/switch/switch_audio.zig new file mode 100644 index 0000000..1a68e43 --- /dev/null +++ b/src/platform/switch/switch_audio.zig @@ -0,0 +1,35 @@ +//! Switch audio backend stub. +//! +//! Empty slot-based PCM output that satisfies `audio_api.Interface`. +//! Real bring-up will route through libnx's `audren` (the high-level +//! audio renderer) or `audout` for raw PCM. 24 voices matches what +//! audren exposes by default — bump if a downstream game needs more. + +const std = @import("std"); +const Stream = @import("../../audio/stream.zig").Stream; + +var audio_alloc: std.mem.Allocator = undefined; +var audio_io: std.Io = undefined; + +pub fn setup(alloc: std.mem.Allocator, io: std.Io) void { + audio_alloc = alloc; + audio_io = io; +} + +pub fn init() anyerror!void {} +pub fn deinit() void {} +pub fn update() void {} + +pub fn max_voices() u32 { + return 24; +} + +pub fn play_slot(_: u8, _: Stream) anyerror!void {} + +pub fn stop_slot(_: u8) void {} + +pub fn set_slot_gain_pan(_: u8, _: f32, _: f32) void {} + +pub fn is_slot_active(_: u8) bool { + return false; +} diff --git a/src/platform/switch/switch_gfx.zig b/src/platform/switch/switch_gfx.zig new file mode 100644 index 0000000..b549513 --- /dev/null +++ b/src/platform/switch/switch_gfx.zig @@ -0,0 +1,64 @@ +//! Switch GPU backend stub. +//! +//! Empty template: every entry point satisfies `gfx_api.Interface` so the +//! engine compiles end-to-end on Switch (`-Dnintendo-switch=true`). Real +//! bring-up will plug libnx's deko3d (or nvgpu via the nv:* services) +//! in here. + +const std = @import("std"); +const Mat4 = @import("../../math/math.zig").Mat4; +const Rendering = @import("../../rendering/rendering.zig"); +const Pipeline = Rendering.Pipeline; +const Mesh = Rendering.mesh; +const Texture = Rendering.Texture; + +var render_alloc: std.mem.Allocator = undefined; +var render_io: std.Io = undefined; + +pub fn setup(alloc: std.mem.Allocator, io: std.Io) void { + render_alloc = alloc; + render_io = io; +} + +pub fn init() anyerror!void {} +pub fn deinit() void {} + +pub fn set_clear_color(_: f32, _: f32, _: f32, _: f32) void {} +pub fn set_alpha_blend(_: bool) void {} +pub fn set_depth_write(_: bool) void {} +pub fn set_fog(_: bool, _: f32, _: f32, _: f32, _: f32, _: f32) void {} +pub fn set_clip_planes(_: bool) void {} +pub fn set_proj_matrix(_: *const Mat4) void {} +pub fn set_view_matrix(_: *const Mat4) void {} + +pub fn start_frame() bool { + return false; +} + +pub fn end_frame() void {} +pub fn clear_depth() void {} +pub fn set_vsync(_: bool) void {} + +pub fn create_pipeline(_: Pipeline.VertexLayout, _: ?[:0]align(4) const u8, _: ?[:0]align(4) const u8) anyerror!Pipeline.Handle { + return 0; +} + +pub fn destroy_pipeline(_: Pipeline.Handle) void {} +pub fn bind_pipeline(_: Pipeline.Handle) void {} + +pub fn create_mesh(_: Pipeline.Handle) anyerror!Mesh.Handle { + return 0; +} + +pub fn destroy_mesh(_: Mesh.Handle) void {} +pub fn update_mesh(_: Mesh.Handle, _: []const u8) void {} +pub fn draw_mesh(_: Mesh.Handle, _: *const Mat4, _: usize, _: Mesh.Primitive) void {} + +pub fn create_texture(_: u32, _: u32, _: []align(16) u8) anyerror!Texture.Handle { + return 0; +} + +pub fn update_texture(_: Texture.Handle, _: []align(16) u8) void {} +pub fn bind_texture(_: Texture.Handle) void {} +pub fn destroy_texture(_: Texture.Handle) void {} +pub fn force_texture_resident(_: Texture.Handle) void {} diff --git a/src/platform/switch/switch_thread.zig b/src/platform/switch/switch_thread.zig new file mode 100644 index 0000000..b133be7 --- /dev/null +++ b/src/platform/switch/switch_thread.zig @@ -0,0 +1,25 @@ +//! Switch thread backend stub. +//! +//! Real implementation will wrap libnx's `threadCreate`/`threadStart`/ +//! `threadWaitForExit`. Until then `spawn` returns `error.Unsupported` +//! so callers fail fast instead of silently returning a bogus handle. + +const std = @import("std"); +const api = @import("../thread_api.zig"); + +pub const Handle = u32; + +pub fn spawn(cfg: api.Config, comptime func: anytype, args: anytype) !Handle { + _ = cfg; + _ = func; + _ = args; + return error.Unsupported; +} + +pub fn join(_: Handle) void {} + +pub fn set_priority(_: Handle, _: api.Priority) anyerror!void {} + +pub fn current_priority() api.Priority { + return .normal; +} diff --git a/src/platform/thread.zig b/src/platform/thread.zig index 6567b25..4475792 100644 --- a/src/platform/thread.zig +++ b/src/platform/thread.zig @@ -1,12 +1,15 @@ //! Comptime-selected thread backend. const builtin = @import("builtin"); +const options = @import("options"); const thread_api = @import("thread_api.zig"); pub const Api = if (builtin.os.tag == .psp) @import("psp/psp_thread.zig") else if (builtin.os.tag == .@"3ds") @import("3ds/3ds_thread.zig") +else if (options.config.platform == .nintendo_switch) + @import("switch/switch_thread.zig") else @import("std_thread.zig"); diff --git a/src/root.zig b/src/root.zig index bdbed2f..c8528f7 100644 --- a/src/root.zig +++ b/src/root.zig @@ -13,12 +13,16 @@ pub const ctx_to_self = Util.ctx_to_self; /// Only available when `platform == .psp`; evaluates to `void` otherwise. pub const Psp = if (platform == .psp) @import("platform/psp/psp_dialogs.zig") else void; -// Pull in the 3DS entry shim on 3DS builds. Its comptime block -// `@export`s a C-callable `main` so `-ofmt=c` emits the full engine. +// Pull in the 3DS / Switch entry shim on those targets. Each shim's +// comptime block `@export`s a C-callable `main` so `-ofmt=c` emits +// the full engine call graph rather than constants-only output. comptime { if (platform == .nintendo_3ds) { _ = @import("platform/3ds/services.zig"); } + if (platform == .nintendo_switch) { + _ = @import("platform/switch/services.zig"); + } } /// Comptime-known platform and graphics backend, resolved from build options. diff --git a/test/main.zig b/test/main.zig index 4c4bf59..9a1de4e 100644 --- a/test/main.zig +++ b/test/main.zig @@ -18,35 +18,32 @@ comptime { pub const psp_stack_size: u32 = 256 * 1024; -// PSP and 3DS override panic/IO handlers that would otherwise pull in -// posix symbols (Io.Threaded references std.posix decls that don't exist -// for these targets). 3DS doesn't have an SDK wired up yet, so its -// debug_io is `undefined` for now — invoking it before libctru lands is -// UB, which matches the placeholder entry in `platform/3ds/services.zig`. -const is_freestanding_console = ae.platform == .psp or ae.platform == .nintendo_3ds; -// 3DS uses `no_panic` while libctru integration is stubbed: `std_options_debug_io` -// is `undefined` on 3DS, so `defaultPanic` would deref garbage when formatting, -// re-panic, and recurse until the stack blows. `no_panic` keeps the first fault -// as the only fault — a single ARM exception you can read off the screen rather -// than an unbounded loop through `memset`. -pub const panic = if (ae.platform == .psp) sdk.extra.debug.panic - else if (ae.platform == .nintendo_3ds) std.debug.no_panic - else std.debug.FullPanic(std.debug.defaultPanic); +// PSP, 3DS, and Switch override panic/IO handlers that would otherwise +// pull in posix symbols (Io.Threaded references std.posix decls that +// don't exist for these targets). 3DS and Switch don't have SDKs wired +// up yet, so their debug_io is `undefined` for now — invoking it before +// libctru/libnx lands is UB, which matches the placeholder entries in +// each platform's `services.zig`. +const is_freestanding_console = ae.platform == .psp or ae.platform == .nintendo_3ds or ae.platform == .nintendo_switch; +// 3DS and Switch use `no_panic` while their SDK integration is stubbed: +// `std_options_debug_io` is `undefined` on those targets, so +// `defaultPanic` would deref garbage when formatting, re-panic, and +// recurse until the stack blows. `no_panic` keeps the first fault as +// the only fault — a single CPU exception you can read off the screen +// rather than an unbounded loop through `memset`. +pub const panic = if (ae.platform == .psp) sdk.extra.debug.panic else if (ae.platform == .nintendo_3ds or ae.platform == .nintendo_switch) std.debug.no_panic else std.debug.FullPanic(std.debug.defaultPanic); pub const std_options_debug_threaded_io = if (is_freestanding_console) null else std.Io.Threaded.global_single_threaded; pub const std_options_debug_io: std.Io = - if (ae.platform == .psp) sdk.extra.Io.psp_io - else if (ae.platform == .nintendo_3ds) undefined - else std.Io.Threaded.global_single_threaded.io(); + if (ae.platform == .psp) sdk.extra.Io.psp_io else if (ae.platform == .nintendo_3ds or ae.platform == .nintendo_switch) undefined else std.Io.Threaded.global_single_threaded.io(); pub const std_options_cwd = - if (ae.platform == .psp) psp_cwd - else if (ae.platform == .nintendo_3ds) stub_cwd - else null; + if (ae.platform == .psp) psp_cwd else if (ae.platform == .nintendo_3ds or ae.platform == .nintendo_switch) stub_cwd else null; fn psp_cwd() std.Io.Dir { return .{ .handle = -1 }; } fn stub_cwd() std.Io.Dir { - // Dir.Handle resolves to `void` on 3DS (posix.fd_t falls back to - // void in the empty system struct). Init with `{}` to match. + // Dir.Handle resolves to `void` on 3DS/Switch freestanding targets + // (posix.fd_t falls back to void in the empty system struct). Init + // with `{}` to match. return .{ .handle = {} }; } From 3c9a0f3ad57658ddd98d15d56c253ed53ed97c73 Mon Sep 17 00:00:00 2001 From: Nathan Bourgeois Date: Tue, 26 May 2026 17:44:03 -0400 Subject: [PATCH 03/44] Update .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 25062cc..8089f12 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ zig-pkg .claude CLAUDE.md *.log +*.wav From 10163c0ed37d5227d64904c1138798abf9fda92a Mon Sep 17 00:00:00 2001 From: Nathan Bourgeois Date: Tue, 26 May 2026 18:21:41 -0400 Subject: [PATCH 04/44] Get closer to working sample --- src/platform/3ds/3ds_gfx.zig | 2 + src/platform/3ds/services.zig | 15 +- src/platform/3ds/time.zig | 43 +++ src/platform/c_io.zig | 557 +++++++++++++++++++++++++++++ src/platform/c_process_init.zig | 88 +++++ src/platform/switch/services.zig | 13 +- src/platform/switch/switch_gfx.zig | 2 + src/platform/switch/time.zig | 37 ++ src/root.zig | 1 + src/util/util.zig | 2 + test/main.zig | 18 +- 11 files changed, 749 insertions(+), 29 deletions(-) create mode 100644 src/platform/3ds/time.zig create mode 100644 src/platform/c_io.zig create mode 100644 src/platform/c_process_init.zig create mode 100644 src/platform/switch/time.zig diff --git a/src/platform/3ds/3ds_gfx.zig b/src/platform/3ds/3ds_gfx.zig index 30a7a1a..8565df4 100644 --- a/src/platform/3ds/3ds_gfx.zig +++ b/src/platform/3ds/3ds_gfx.zig @@ -27,6 +27,8 @@ pub fn set_alpha_blend(_: bool) void {} pub fn set_depth_write(_: bool) void {} pub fn set_fog(_: bool, _: f32, _: f32, _: f32, _: f32, _: f32) void {} pub fn set_clip_planes(_: bool) void {} +pub fn set_culling(_: bool) void {} +pub fn set_uv_offset(_: f32, _: f32) void {} pub fn set_proj_matrix(_: *const Mat4) void {} pub fn set_view_matrix(_: *const Mat4) void {} diff --git a/src/platform/3ds/services.zig b/src/platform/3ds/services.zig index 2b2c89e..9ba0043 100644 --- a/src/platform/3ds/services.zig +++ b/src/platform/3ds/services.zig @@ -1,20 +1,17 @@ //! 3DS system services / entry shim. //! //! Exports a C-callable `main` that hands control to the user's Zig -//! `main`. `Init` is currently `undefined` — invoking the engine on -//! hardware will crash on the first real allocation. The shim exists -//! so the engine call graph is reachable from `-ofmt=c` codegen and -//! so libctru integration has a clear landing pad: when libctru is -//! wired in, build a real `std.process.Init` here (libctru-backed -//! `ArenaAllocator`, an `Io` implementation talking to the FS / SOC -//! services, an `Environ.Map`) and pass it to `root.main`. +//! `main`. The allocator and baseline `std.Io` pieces of `std.process.Init` +//! are wired through newlib; deeper platform services remain TODO. //! //! Stack default: libctru's 32 KB is far too small for any std-using //! Zig code path. We override `__stacksize__` (a `WEAK` symbol in //! libctru) with a strong export. 1 MB is comfortable; bump if engine //! frames grow. -const std = @import("std"); +const process_init = @import("../c_process_init.zig"); + +const argv = [_][*:0]const u8{"Aether"}; comptime { @export(&entry, .{ .name = "main" }); @@ -24,7 +21,7 @@ comptime { var stack_size: u32 = 1 * 1024 * 1024; fn entry() callconv(.c) c_int { - const init: std.process.Init = undefined; + const init = process_init.makeInit(.{ .vector = &argv }); @import("root").main(init) catch return 1; return 0; } diff --git a/src/platform/3ds/time.zig b/src/platform/3ds/time.zig new file mode 100644 index 0000000..3c6775a --- /dev/null +++ b/src/platform/3ds/time.zig @@ -0,0 +1,43 @@ +const std = @import("std"); + +extern fn osGetTime() u64; +extern fn svcGetSystemTick() u64; +extern fn svcSleepThread(ns: i64) void; + +const arm11_hz: u128 = 16_756_991 * 2 * 4 * 2; +const unix_epoch_from_1900_ms: i96 = 2_208_988_800 * std.time.ms_per_s; + +pub fn now(clock: std.Io.Clock) std.Io.Timestamp { + return switch (clock) { + .real => .fromNanoseconds((@as(i96, @intCast(osGetTime())) - unix_epoch_from_1900_ms) * std.time.ns_per_ms), + .awake, .boot => .fromNanoseconds(@intCast((@as(u128, svcGetSystemTick()) * std.time.ns_per_s) / arm11_hz)), + else => std.debug.panic("3ds std.Io clock {s} is not implemented", .{@tagName(clock)}), + }; +} + +pub fn clockResolution(clock: std.Io.Clock) std.Io.Clock.ResolutionError!std.Io.Duration { + return switch (clock) { + .real => .fromMilliseconds(1), + .awake, .boot => .fromNanoseconds(4), + else => error.ClockUnavailable, + }; +} + +pub fn sleep(timeout: std.Io.Timeout) std.Io.Cancelable!void { + const ns = timeoutNanoseconds(timeout); + if (ns <= 0) return; + svcSleepThread(clampNs(ns)); +} + +fn timeoutNanoseconds(timeout: std.Io.Timeout) i96 { + return switch (timeout) { + .none => 0, + .duration => |duration| duration.raw.nanoseconds, + .deadline => |deadline| deadline.raw.nanoseconds - now(deadline.clock).nanoseconds, + }; +} + +fn clampNs(ns: i96) i64 { + if (ns > std.math.maxInt(i64)) return std.math.maxInt(i64); + return @intCast(ns); +} diff --git a/src/platform/c_io.zig b/src/platform/c_io.zig new file mode 100644 index 0000000..700f3bf --- /dev/null +++ b/src/platform/c_io.zig @@ -0,0 +1,557 @@ +const std = @import("std"); +const options = @import("options"); + +const Io = std.Io; +const Dir = Io.Dir; +const File = Io.File; + +const platform_time = switch (options.config.platform) { + .nintendo_3ds => @import("3ds/time.zig"), + .nintendo_switch => @import("switch/time.zig"), + else => @compileError("platform/c_io.zig is only wired for Nintendo targets"), +}; + +const c = struct { + extern fn open(path: [*:0]const u8, flags: c_int, ...) c_int; + extern fn close(fd: c_int) c_int; + extern fn read(fd: c_int, buf: [*]u8, count: usize) isize; + extern fn write(fd: c_int, buf: [*]const u8, count: usize) isize; + extern fn lseek(fd: c_int, offset: c_long, whence: c_int) c_long; + extern fn fsync(fd: c_int) c_int; + extern fn ftruncate(fd: c_int, length: c_long) c_int; + extern fn getcwd(buf: [*]u8, size: usize) ?[*:0]u8; + extern fn chdir(path: [*:0]const u8) c_int; + extern fn __errno() *c_int; +}; + +const max_path_bytes = 1024; + +const O_RDONLY: c_int = 0; +const O_WRONLY: c_int = 1; +const O_RDWR: c_int = 2; +const O_CREAT: c_int = 0x0200; +const O_TRUNC: c_int = 0x0400; +const O_EXCL: c_int = 0x0800; + +const SEEK_SET: c_int = 0; +const SEEK_CUR: c_int = 1; +const SEEK_END: c_int = 2; + +var read_fd: c_int = -1; +var write_fd: c_int = -1; +var stderr_writer: File.Writer = undefined; +var stderr_writer_initialized = false; +var empty_stderr_buffer: [0]u8 = .{}; + +const vtable: Io.VTable = blk: { + var v = Io.failing.vtable.*; + v.crashHandler = crashHandler; + v.async = Io.noAsync; + v.groupAsync = Io.noGroupAsync; + v.recancel = recancel; + v.swapCancelProtection = swapCancelProtection; + v.checkCancel = checkCancel; + v.operate = operate; + v.dirCreateFile = dirCreateFile; + v.dirOpenFile = dirOpenFile; + v.fileStat = fileStat; + v.fileLength = fileLength; + v.fileClose = fileClose; + v.fileWritePositional = fileWritePositional; + v.fileReadPositional = fileReadPositional; + v.fileSeekBy = fileSeekBy; + v.fileSeekTo = fileSeekTo; + v.fileSync = fileSync; + v.fileIsTty = fileIsTty; + v.fileEnableAnsiEscapeCodes = fileEnableAnsiEscapeCodes; + v.fileSupportsAnsiEscapeCodes = fileSupportsAnsiEscapeCodes; + v.fileSetLength = fileSetLength; + v.lockStderr = lockStderr; + v.tryLockStderr = tryLockStderr; + v.unlockStderr = unlockStderr; + v.processCurrentPath = processCurrentPath; + v.processSetCurrentPath = processSetCurrentPath; + v.now = now; + v.clockResolution = clockResolution; + v.sleep = sleep; + v.random = random; + break :blk v; +}; + +pub fn io() Io { + return .{ .userdata = null, .vtable = &vtable }; +} + +fn crashHandler(_: ?*anyopaque) void {} + +fn recancel(_: ?*anyopaque) void {} + +fn swapCancelProtection(_: ?*anyopaque, new: Io.CancelProtection) Io.CancelProtection { + _ = new; + return .unblocked; +} + +fn checkCancel(_: ?*anyopaque) Io.Cancelable!void {} + +fn operate(_: ?*anyopaque, operation: Io.Operation) Io.Cancelable!Io.Operation.Result { + return switch (operation) { + .file_read_streaming => |op| .{ .file_read_streaming = fileReadStreaming(op.file, op.data) }, + .file_write_streaming => |op| .{ .file_write_streaming = fileWriteStreaming(op.file, op.header, op.data, op.splat) }, + .device_io_control => unsupported("device_io_control"), + .net_receive => .{ .net_receive = .{ error.NetworkDown, 0 } }, + }; +} + +fn dirOpenFile(_: ?*anyopaque, _: Dir, sub_path: []const u8, flags: Dir.OpenFileOptions) File.OpenError!File { + if (flags.lock != .none or flags.path_only or flags.allow_ctty or flags.resolve_beneath) + unsupported("dirOpenFile option"); + if (!flags.allow_directory or !flags.follow_symlinks) + unsupported("dirOpenFile path policy"); + + var path_buffer: [max_path_bytes:0]u8 = undefined; + const path = try zPath(&path_buffer, sub_path); + const open_flags: c_int = switch (flags.mode) { + .read_only => O_RDONLY, + .write_only => O_WRONLY, + .read_write => O_RDWR, + }; + const fd = c.open(path.ptr, open_flags, @as(c_int, 0)); + if (fd < 0) return openError(); + errdefer _ = c.close(fd); + return registerFile(fd, .read); +} + +fn dirCreateFile(_: ?*anyopaque, _: Dir, sub_path: []const u8, flags: Dir.CreateFileOptions) File.OpenError!File { + if (flags.lock != .none or flags.resolve_beneath) unsupported("dirCreateFile option"); + + var path_buffer: [max_path_bytes:0]u8 = undefined; + const path = try zPath(&path_buffer, sub_path); + var open_flags: c_int = if (flags.read) O_RDWR else O_WRONLY; + open_flags |= O_CREAT; + if (flags.truncate) open_flags |= O_TRUNC; + if (flags.exclusive) open_flags |= O_EXCL; + + const mode: c_int = if (@bitSizeOf(File.Permissions) == 0) + 0o666 + else + @intCast(@intFromEnum(flags.permissions)); + const fd = c.open(path.ptr, open_flags, mode); + if (fd < 0) return openError(); + errdefer _ = c.close(fd); + return registerFile(fd, .write); +} + +fn fileStat(_: ?*anyopaque, file: File) File.StatError!File.Stat { + return .{ + .inode = zero(File.INode), + .nlink = zero(File.NLink), + .size = try fileLength(null, file), + .permissions = .default_file, + .kind = .file, + .atime = null, + .mtime = now(null, .real), + .ctime = now(null, .real), + .block_size = 1, + }; +} + +fn fileLength(_: ?*anyopaque, file: File) File.LengthError!u64 { + const fd = fdForRegular(file); + const current = c.lseek(fd, 0, SEEK_CUR); + if (current < 0) return seekToLengthError(); + const end = c.lseek(fd, 0, SEEK_END); + if (end < 0) return seekToLengthError(); + _ = c.lseek(fd, current, SEEK_SET); + return @intCast(end); +} + +fn fileClose(_: ?*anyopaque, files: []const File) void { + for (files) |file| { + if (isStderrFile(file)) continue; + if (@sizeOf(File.Handle) != 0) { + const fd = fdFromFileHandle(file); + if (fd > 2) _ = c.close(fd); + continue; + } + if (read_fd >= 0) { + _ = c.close(read_fd); + read_fd = -1; + } else if (write_fd >= 0) { + _ = c.close(write_fd); + write_fd = -1; + } + } +} + +fn fileReadPositional( + _: ?*anyopaque, + file: File, + data: []const []u8, + offset: u64, +) File.ReadPositionalError!usize { + const fd = fdForRead(file); + try seekToOffset(fd, offset); + + var total: usize = 0; + for (data) |buf| { + var remaining = buf; + while (remaining.len > 0) { + const n = c.read(fd, remaining.ptr, remaining.len); + if (n < 0) return readError(); + if (n == 0) return total; + const amt: usize = @intCast(n); + total += amt; + remaining = remaining[amt..]; + if (amt == 0) return total; + } + } + return total; +} + +fn fileReadStreaming(file: File, data: []const []u8) Io.Operation.FileReadStreaming.Result { + const fd = fdForRead(file); + var total: usize = 0; + for (data) |buf| { + var remaining = buf; + while (remaining.len > 0) { + const n = c.read(fd, remaining.ptr, remaining.len); + if (n < 0) return readStreamingError(); + if (n == 0) return if (total == 0) error.EndOfStream else total; + const amt: usize = @intCast(n); + total += amt; + remaining = remaining[amt..]; + } + } + return total; +} + +fn fileWritePositional( + _: ?*anyopaque, + file: File, + header: []const u8, + data: []const []const u8, + splat: usize, + offset: u64, +) File.WritePositionalError!usize { + const fd = fdForWrite(file); + try seekToOffset(fd, offset); + return writeVectors(fd, header, data, splat); +} + +fn fileWriteStreaming(file: File, header: []const u8, data: []const []const u8, splat: usize) Io.Operation.FileWriteStreaming.Result { + return writeVectors(fdForWrite(file), header, data, splat) catch |err| switch (err) { + error.Canceled => unreachable, + error.Unseekable => error.InputOutput, + else => |e| @errorCast(e), + }; +} + +fn fileSeekBy(_: ?*anyopaque, file: File, relative_offset: i64) File.SeekError!void { + if (c.lseek(fdForRegular(file), @intCast(relative_offset), SEEK_CUR) < 0) return seekError(); +} + +fn fileSeekTo(_: ?*anyopaque, file: File, absolute_offset: u64) File.SeekError!void { + try seekToOffset(fdForRegular(file), absolute_offset); +} + +fn fileSync(_: ?*anyopaque, file: File) File.SyncError!void { + if (isStderrFile(file)) return; + if (c.fsync(fdForRegular(file)) < 0) return syncError(); +} + +fn fileIsTty(_: ?*anyopaque, _: File) Io.Cancelable!bool { + return false; +} + +fn fileEnableAnsiEscapeCodes(_: ?*anyopaque, _: File) File.EnableAnsiEscapeCodesError!void { + return error.NotTerminalDevice; +} + +fn fileSupportsAnsiEscapeCodes(_: ?*anyopaque, _: File) Io.Cancelable!bool { + return false; +} + +fn fileSetLength(_: ?*anyopaque, file: File, length: u64) File.SetLengthError!void { + if (length > std.math.maxInt(c_long)) return error.FileTooBig; + if (c.ftruncate(fdForRegular(file), @intCast(length)) < 0) return setLengthError(); +} + +fn lockStderr(_: ?*anyopaque, terminal_mode: ?Io.Terminal.Mode) Io.Cancelable!Io.LockedStderr { + if (!stderr_writer_initialized) { + var stderr_file: File = .{ + .handle = if (@sizeOf(File.Handle) == 0) {} else @as(File.Handle, @intCast(2)), + .flags = .{ .nonblocking = true }, + }; + stderr_file.flags.nonblocking = true; + stderr_writer = stderr_file.writerStreaming(io(), &empty_stderr_buffer); + stderr_writer_initialized = true; + } + return .{ + .file_writer = &stderr_writer, + .terminal_mode = terminal_mode orelse .no_color, + }; +} + +fn tryLockStderr(userdata: ?*anyopaque, terminal_mode: ?Io.Terminal.Mode) Io.Cancelable!?Io.LockedStderr { + return try lockStderr(userdata, terminal_mode); +} + +fn unlockStderr(_: ?*anyopaque) void { + if (stderr_writer_initialized) stderr_writer.interface.flush() catch {}; +} + +fn processCurrentPath(_: ?*anyopaque, buffer: []u8) std.process.CurrentPathError!usize { + if (buffer.len == 0) return error.NameTooLong; + const ptr = c.getcwd(buffer.ptr, buffer.len) orelse return currentPathError(); + const path = std.mem.span(ptr); + if (path.len >= buffer.len) return error.NameTooLong; + return path.len; +} + +fn processSetCurrentPath(_: ?*anyopaque, path: []const u8) std.process.SetCurrentPathError!void { + var path_buffer: [max_path_bytes:0]u8 = undefined; + const z = zPath(&path_buffer, path) catch |err| switch (err) { + error.NameTooLong => return error.NameTooLong, + error.BadPathName => return error.BadPathName, + }; + if (c.chdir(z.ptr) < 0) return setCurrentPathError(); +} + +fn now(_: ?*anyopaque, clock: Io.Clock) Io.Timestamp { + return platform_time.now(clock); +} + +fn clockResolution(_: ?*anyopaque, clock: Io.Clock) Io.Clock.ResolutionError!Io.Duration { + return platform_time.clockResolution(clock); +} + +fn sleep(_: ?*anyopaque, timeout: Io.Timeout) Io.Cancelable!void { + return platform_time.sleep(timeout); +} + +fn random(_: ?*anyopaque, buffer: []u8) void { + @memset(buffer, 0); +} + +const FileRole = enum { read, write }; + +fn registerFile(fd: c_int, role: FileRole) File { + if (@sizeOf(File.Handle) == 0) { + switch (role) { + .read => { + if (read_fd >= 0) unsupported("more than one regular read file"); + read_fd = fd; + }, + .write => { + if (write_fd >= 0) unsupported("more than one regular write file"); + write_fd = fd; + }, + } + return .{ .handle = {}, .flags = .{ .nonblocking = false } }; + } + return .{ + .handle = @intCast(fd), + .flags = .{ .nonblocking = false }, + }; +} + +fn fdForRead(file: File) c_int { + if (@sizeOf(File.Handle) != 0) return fdFromFileHandle(file); + if (read_fd < 0) unsupported("read from unopened regular file"); + return read_fd; +} + +fn fdForWrite(file: File) c_int { + if (isStderrFile(file)) return 2; + if (@sizeOf(File.Handle) != 0) return fdFromFileHandle(file); + if (write_fd < 0) unsupported("write to unopened regular file"); + return write_fd; +} + +fn fdForRegular(file: File) c_int { + if (isStderrFile(file)) unsupported("regular file operation on stderr"); + if (@sizeOf(File.Handle) != 0) return fdFromFileHandle(file); + if (read_fd >= 0) return read_fd; + if (write_fd >= 0) return write_fd; + unsupported("regular file operation with no open file"); +} + +fn fdFromFileHandle(file: File) c_int { + return @intCast(file.handle); +} + +fn isStderrFile(file: File) bool { + return file.flags.nonblocking; +} + +fn seekToOffset(fd: c_int, offset: u64) File.SeekError!void { + if (offset > std.math.maxInt(c_long)) return error.Unseekable; + if (c.lseek(fd, @intCast(offset), SEEK_SET) < 0) return seekError(); +} + +fn writeVectors(fd: c_int, header: []const u8, data: []const []const u8, splat: usize) File.WritePositionalError!usize { + var total: usize = 0; + total += try writeOne(fd, header); + for (0..splat) |_| { + for (data) |buf| { + total += try writeOne(fd, buf); + } + } + return total; +} + +fn writeOne(fd: c_int, bytes: []const u8) File.WritePositionalError!usize { + var remaining = bytes; + var total: usize = 0; + while (remaining.len > 0) { + const n = c.write(fd, remaining.ptr, remaining.len); + if (n < 0) return writeError(); + if (n == 0) return total; + const amt: usize = @intCast(n); + total += amt; + remaining = remaining[amt..]; + } + return total; +} + +fn zPath(buf: *[max_path_bytes:0]u8, path: []const u8) error{ NameTooLong, BadPathName }![:0]const u8 { + if (path.len >= max_path_bytes) return error.NameTooLong; + if (std.mem.indexOfScalar(u8, path, 0) != null) return error.BadPathName; + @memcpy(buf[0..path.len], path); + buf[path.len] = 0; + return buf[0..path.len :0]; +} + +fn errno() c_int { + return c.__errno().*; +} + +fn openError() File.OpenError { + return switch (errno()) { + 1 => error.PermissionDenied, + 2 => error.FileNotFound, + 6 => error.NoDevice, + 12 => error.SystemResources, + 13 => error.AccessDenied, + 16 => error.DeviceBusy, + 17 => error.PathAlreadyExists, + 20 => error.NotDir, + 21 => error.IsDir, + 23 => error.SystemFdQuotaExceeded, + 24 => error.ProcessFdQuotaExceeded, + 26 => error.FileBusy, + 27 => error.FileTooBig, + 28 => error.NoSpaceLeft, + 30 => error.ReadOnlyFileSystem, + 91 => error.NameTooLong, + 92 => error.SymLinkLoop, + else => error.Unexpected, + }; +} + +fn readError() File.ReadPositionalError { + return switch (errno()) { + 5 => error.InputOutput, + 11 => error.WouldBlock, + 12 => error.SystemResources, + 13 => error.AccessDenied, + 21 => error.IsDir, + 29 => error.Unseekable, + else => error.Unexpected, + }; +} + +fn readStreamingError() Io.Operation.FileReadStreaming.Error { + return switch (errno()) { + 5 => error.InputOutput, + 11 => error.WouldBlock, + 12 => error.SystemResources, + 13 => error.AccessDenied, + 21 => error.IsDir, + else => error.Unexpected, + }; +} + +fn writeError() File.WritePositionalError { + return switch (errno()) { + 5 => error.InputOutput, + 6 => error.NoDevice, + 11 => error.WouldBlock, + 12 => error.SystemResources, + 13 => error.AccessDenied, + 27 => error.FileTooBig, + 28 => error.NoSpaceLeft, + 29 => error.Unseekable, + 32 => error.BrokenPipe, + else => error.Unexpected, + }; +} + +fn seekError() File.SeekError { + return switch (errno()) { + 13 => error.AccessDenied, + 29 => error.Unseekable, + else => error.Unexpected, + }; +} + +fn seekToLengthError() File.LengthError { + return switch (errno()) { + 5 => error.Unexpected, + 13 => error.AccessDenied, + 29 => error.Streaming, + else => error.Unexpected, + }; +} + +fn syncError() File.SyncError { + return switch (errno()) { + 5 => error.InputOutput, + 12 => error.Unexpected, + 13 => error.AccessDenied, + 28 => error.NoSpaceLeft, + 132 => error.DiskQuota, + else => error.Unexpected, + }; +} + +fn setLengthError() File.SetLengthError { + return switch (errno()) { + 5 => error.InputOutput, + 13 => error.AccessDenied, + 16 => error.FileBusy, + 27 => error.FileTooBig, + 29 => error.NonResizable, + else => error.Unexpected, + }; +} + +fn currentPathError() std.process.CurrentPathError { + return switch (errno()) { + 12 => error.Unexpected, + 91 => error.NameTooLong, + else => error.CurrentDirUnlinked, + }; +} + +fn setCurrentPathError() std.process.SetCurrentPathError { + return switch (errno()) { + 2 => error.FileNotFound, + 13 => error.AccessDenied, + 20 => error.NotDir, + 91 => error.NameTooLong, + else => error.Unexpected, + }; +} + +fn zero(comptime T: type) T { + return switch (@typeInfo(T)) { + .void => {}, + .int, .comptime_int => 0, + else => @as(T, @intCast(0)), + }; +} + +fn unsupported(comptime name: []const u8) noreturn { + std.debug.panic("c std.Io baseline does not implement {s}", .{name}); +} diff --git a/src/platform/c_process_init.zig b/src/platform/c_process_init.zig new file mode 100644 index 0000000..e0f08bf --- /dev/null +++ b/src/platform/c_process_init.zig @@ -0,0 +1,88 @@ +const std = @import("std"); +const c_io = @import("c_io.zig"); + +extern fn memalign(alignment: usize, size: usize) ?*anyopaque; +extern fn free(ptr: ?*anyopaque) void; + +var arena_state: std.heap.ArenaAllocator = undefined; +var environ_map_state: std.process.Environ.Map = undefined; + +const allocator_vtable: std.mem.Allocator.VTable = .{ + .alloc = alloc, + .resize = resize, + .remap = remap, + .free = dealloc, +}; + +pub fn makeInit(args: std.process.Args) std.process.Init { + const gpa = allocator(); + arena_state = std.heap.ArenaAllocator.init(gpa); + environ_map_state = std.process.Environ.Map.init(gpa); + + return .{ + .minimal = .{ + .environ = std.process.Environ.empty, + .args = args, + }, + .arena = &arena_state, + .gpa = gpa, + .io = c_io.io(), + .environ_map = &environ_map_state, + .preopens = std.process.Preopens.empty, + }; +} + +fn allocator() std.mem.Allocator { + return .{ + .ptr = undefined, + .vtable = &allocator_vtable, + }; +} + +fn alloc( + _: *anyopaque, + len: usize, + alignment: std.mem.Alignment, + _: usize, +) ?[*]u8 { + std.debug.assert(len > 0); + + const effective_alignment = @max(alignment.toByteUnits(), @sizeOf(usize)); + const ptr = memalign(effective_alignment, len) orelse return null; + std.debug.assert(alignment.check(@intFromPtr(ptr))); + return @ptrCast(ptr); +} + +fn resize( + _: *anyopaque, + memory: []u8, + _: std.mem.Alignment, + new_len: usize, + _: usize, +) bool { + std.debug.assert(memory.len > 0); + std.debug.assert(new_len > 0); + return new_len <= memory.len; +} + +fn remap( + _: *anyopaque, + memory: []u8, + _: std.mem.Alignment, + new_len: usize, + _: usize, +) ?[*]u8 { + std.debug.assert(memory.len > 0); + std.debug.assert(new_len > 0); + return if (new_len <= memory.len) memory.ptr else null; +} + +fn dealloc( + _: *anyopaque, + memory: []u8, + _: std.mem.Alignment, + _: usize, +) void { + std.debug.assert(memory.len > 0); + free(memory.ptr); +} diff --git a/src/platform/switch/services.zig b/src/platform/switch/services.zig index 5067f50..ec43ed9 100644 --- a/src/platform/switch/services.zig +++ b/src/platform/switch/services.zig @@ -1,13 +1,8 @@ //! Switch system services / entry shim. //! //! Exports a C-callable `main` that hands control to the user's Zig -//! `main`. `Init` is currently `undefined` — invoking the engine on -//! hardware will crash on the first real allocation. The shim exists -//! so the engine call graph is reachable from `-ofmt=c` codegen and -//! so libnx integration has a clear landing pad: when libnx is wired -//! in, build a real `std.process.Init` here (libnx-backed allocator, -//! an `Io` implementation talking to fs:srv / sockets, an -//! `Environ.Map`) and pass it to `root.main`. +//! `main`. The allocator and baseline `std.Io` pieces of `std.process.Init` +//! are wired through newlib; deeper platform services remain TODO. //! //! libnx's switch.specs links with `--require-defined=main`, which //! pulls a strong `main` from libnx's crt0 by default. We shadow it @@ -15,14 +10,14 @@ //! ld picks the first definition seen — to route the entry through //! Aether instead of libnx's nnMain wrapper. -const std = @import("std"); +const process_init = @import("../c_process_init.zig"); comptime { @export(&entry, .{ .name = "main" }); } fn entry(_: c_int, _: [*c][*c]u8) callconv(.c) c_int { - const init: std.process.Init = undefined; + const init = process_init.makeInit(.{ .vector = {} }); @import("root").main(init) catch return 1; return 0; } diff --git a/src/platform/switch/switch_gfx.zig b/src/platform/switch/switch_gfx.zig index b549513..ca7309d 100644 --- a/src/platform/switch/switch_gfx.zig +++ b/src/platform/switch/switch_gfx.zig @@ -28,6 +28,8 @@ pub fn set_alpha_blend(_: bool) void {} pub fn set_depth_write(_: bool) void {} pub fn set_fog(_: bool, _: f32, _: f32, _: f32, _: f32, _: f32) void {} pub fn set_clip_planes(_: bool) void {} +pub fn set_culling(_: bool) void {} +pub fn set_uv_offset(_: f32, _: f32) void {} pub fn set_proj_matrix(_: *const Mat4) void {} pub fn set_view_matrix(_: *const Mat4) void {} diff --git a/src/platform/switch/time.zig b/src/platform/switch/time.zig new file mode 100644 index 0000000..35ea360 --- /dev/null +++ b/src/platform/switch/time.zig @@ -0,0 +1,37 @@ +const std = @import("std"); + +extern fn svcGetSystemTick() u64; +extern fn svcSleepThread(ns: i64) void; + +pub fn now(clock: std.Io.Clock) std.Io.Timestamp { + return switch (clock) { + .real, .awake, .boot => .fromNanoseconds(@intCast((@as(u128, svcGetSystemTick()) * 625) / 12)), + else => std.debug.panic("switch std.Io clock {s} is not implemented", .{@tagName(clock)}), + }; +} + +pub fn clockResolution(clock: std.Io.Clock) std.Io.Clock.ResolutionError!std.Io.Duration { + return switch (clock) { + .real, .awake, .boot => .fromNanoseconds(53), + else => error.ClockUnavailable, + }; +} + +pub fn sleep(timeout: std.Io.Timeout) std.Io.Cancelable!void { + const ns = timeoutNanoseconds(timeout); + if (ns <= 0) return; + svcSleepThread(clampNs(ns)); +} + +fn timeoutNanoseconds(timeout: std.Io.Timeout) i96 { + return switch (timeout) { + .none => 0, + .duration => |duration| duration.raw.nanoseconds, + .deadline => |deadline| deadline.raw.nanoseconds - now(deadline.clock).nanoseconds, + }; +} + +fn clampNs(ns: i96) i64 { + if (ns > std.math.maxInt(i64)) return std.math.maxInt(i64); + return @intCast(ns); +} diff --git a/src/root.zig b/src/root.zig index c8528f7..38a5ce8 100644 --- a/src/root.zig +++ b/src/root.zig @@ -12,6 +12,7 @@ pub const ctx_to_self = Util.ctx_to_self; /// PSP-exclusive system utility dialogs (OSK, network configuration). /// Only available when `platform == .psp`; evaluates to `void` otherwise. pub const Psp = if (platform == .psp) @import("platform/psp/psp_dialogs.zig") else void; +pub const Cio = if (platform == .nintendo_3ds or platform == .nintendo_switch) @import("platform/c_io.zig") else void; // Pull in the 3DS / Switch entry shim on those targets. Each shim's // comptime block `@export`s a C-callable `main` so `-ofmt=c` emits diff --git a/src/util/util.zig b/src/util/util.zig index 700c857..183e934 100644 --- a/src/util/util.zig +++ b/src/util/util.zig @@ -21,6 +21,8 @@ comptime { pub const std_options: std.Options = .{ .log_level = if (builtin.mode == .Debug) .debug else .info, .logFn = logger.aether_log_fn, + .page_size_min = if (builtin.os.tag == .@"3ds" or builtin.os.tag == .freestanding) 4096 else null, + .page_size_max = if (builtin.os.tag == .@"3ds" or builtin.os.tag == .freestanding) 4096 else null, }; pub const engine_logger = std.log.scoped(.engine); diff --git a/test/main.zig b/test/main.zig index 9a1de4e..61e27d1 100644 --- a/test/main.zig +++ b/test/main.zig @@ -20,21 +20,17 @@ pub const psp_stack_size: u32 = 256 * 1024; // PSP, 3DS, and Switch override panic/IO handlers that would otherwise // pull in posix symbols (Io.Threaded references std.posix decls that -// don't exist for these targets). 3DS and Switch don't have SDKs wired -// up yet, so their debug_io is `undefined` for now — invoking it before -// libctru/libnx lands is UB, which matches the placeholder entries in -// each platform's `services.zig`. +// don't exist for these targets). 3DS and Switch use Aether's newlib-backed +// baseline so debug prints go through stderr instead of dereferencing an +// undefined Io implementation. const is_freestanding_console = ae.platform == .psp or ae.platform == .nintendo_3ds or ae.platform == .nintendo_switch; -// 3DS and Switch use `no_panic` while their SDK integration is stubbed: -// `std_options_debug_io` is `undefined` on those targets, so -// `defaultPanic` would deref garbage when formatting, re-panic, and -// recurse until the stack blows. `no_panic` keeps the first fault as -// the only fault — a single CPU exception you can read off the screen -// rather than an unbounded loop through `memset`. +// 3DS and Switch keep `no_panic` while the debug IO baseline is intentionally +// small. Missing operations should fail at the original call site instead of +// recursing through stack-trace formatting on early bring-up builds. pub const panic = if (ae.platform == .psp) sdk.extra.debug.panic else if (ae.platform == .nintendo_3ds or ae.platform == .nintendo_switch) std.debug.no_panic else std.debug.FullPanic(std.debug.defaultPanic); pub const std_options_debug_threaded_io = if (is_freestanding_console) null else std.Io.Threaded.global_single_threaded; pub const std_options_debug_io: std.Io = - if (ae.platform == .psp) sdk.extra.Io.psp_io else if (ae.platform == .nintendo_3ds or ae.platform == .nintendo_switch) undefined else std.Io.Threaded.global_single_threaded.io(); + if (ae.platform == .psp) sdk.extra.Io.psp_io else if (ae.platform == .nintendo_3ds or ae.platform == .nintendo_switch) ae.Cio.io() else std.Io.Threaded.global_single_threaded.io(); pub const std_options_cwd = if (ae.platform == .psp) psp_cwd else if (ae.platform == .nintendo_3ds or ae.platform == .nintendo_switch) stub_cwd else null; fn psp_cwd() std.Io.Dir { From 1a020d8dbc82ea832da8a8210cd2c0c58ac19f21 Mon Sep 17 00:00:00 2001 From: Nathan Bourgeois Date: Tue, 26 May 2026 18:32:29 -0400 Subject: [PATCH 05/44] Improve path finding --- build.zig | 7 ++ src/core/paths.zig | 14 ++- src/platform/3ds/paths.zig | 11 +++ src/platform/3ds/surface.zig | 7 +- src/platform/c_io.zig | 154 ++++++++++++++++++++++++++++++-- src/platform/switch/paths.zig | 13 +++ src/platform/switch/surface.zig | 7 +- test/main.zig | 2 +- 8 files changed, 197 insertions(+), 18 deletions(-) create mode 100644 src/platform/3ds/paths.zig create mode 100644 src/platform/switch/paths.zig diff --git a/build.zig b/build.zig index 100e6ae..06ba226 100644 --- a/build.zig +++ b/build.zig @@ -1156,6 +1156,11 @@ pub fn build(b: *std.Build) void { .slang = b.path("test/shaders/basic.slang"), }); + const nintendo_romfs = b.addWriteFiles(); + _ = nintendo_romfs.addCopyFile(b.path("test/test.png"), "test.png"); + _ = nintendo_romfs.addCopyFile(b.path("test/calm1.wav"), "calm1.wav"); + _ = nintendo_romfs.addCopyFile(b.path("test/grass1.wav"), "grass1.wav"); + exportArtifact(b, b, exe, config, .{ .title = "Aether", .output_dir = switch (config.platform) { @@ -1166,6 +1171,8 @@ pub fn build(b: *std.Build) void { }, .smdh_long_description = "Aether engine test app", .smdh_author = "Aether", + .romfs = if (config.platform == .nintendo_3ds) nintendo_romfs.getDirectory() else null, + .switch_romfs = if (config.platform == .nintendo_switch) nintendo_romfs.getDirectory() else null, }); const run_step = b.step("run", "Run the app"); diff --git a/src/core/paths.zig b/src/core/paths.zig index 79c488d..377a8bf 100644 --- a/src/core/paths.zig +++ b/src/core/paths.zig @@ -24,10 +24,13 @@ //! per project style guide they go through `std.Io` / `std.process`. const std = @import("std"); -const builtin = @import("builtin"); const options = @import("options"); const Io = std.Io; +const NintendoIo = if (options.config.platform == .nintendo_3ds or options.config.platform == .nintendo_switch) + @import("../platform/c_io.zig") +else + void; /// Engine-owned directory handles. Cleared via `close()` at engine shutdown. pub const Dirs = struct { @@ -82,13 +85,15 @@ pub fn resolve( // and debug/CI builds where state co-located with the binary is a // feature, not a bug. if (options.config.use_cwd) { + if (NintendoIo != void) NintendoIo.useCwdDirs(); return .{ .resources = Io.Dir.cwd(), .data = Io.Dir.cwd() }; } - return switch (builtin.os.tag) { + return switch (options.config.platform) { .macos => resolve_macos(io, environ_map, app_name), .windows => resolve_windows(io, environ_map, app_name), .linux => resolve_linux(io, environ_map, app_name), + .nintendo_3ds, .nintendo_switch => resolve_nintendo(app_name), // PSP: both dirs collapse to CWD. The EBOOT and its siblings all // live under `ms0:/PSP/GAME//`; the runtime sets CWD there // before main. No separation to enforce. @@ -100,6 +105,11 @@ pub fn resolve( }; } +fn resolve_nintendo(app_name: []const u8) Error!Dirs { + try NintendoIo.initAppDirs(app_name); + return .{ .resources = Io.Dir.cwd(), .data = Io.Dir.cwd() }; +} + // -- macOS -------------------------------------------------------------------- fn resolve_macos( diff --git a/src/platform/3ds/paths.zig b/src/platform/3ds/paths.zig new file mode 100644 index 0000000..fed61a9 --- /dev/null +++ b/src/platform/3ds/paths.zig @@ -0,0 +1,11 @@ +const std = @import("std"); + +extern fn romfsMountSelf(name: [*:0]const u8) u32; + +pub fn mountResources() bool { + return romfsMountSelf("romfs") == 0; +} + +pub fn dataRoot(buffer: []u8, app_name: []const u8) error{NameTooLong}![]const u8 { + return std.fmt.bufPrint(buffer, "sdmc:/3ds/{s}", .{app_name}) catch error.NameTooLong; +} diff --git a/src/platform/3ds/surface.zig b/src/platform/3ds/surface.zig index b1a12fb..f7bb7df 100644 --- a/src/platform/3ds/surface.zig +++ b/src/platform/3ds/surface.zig @@ -2,12 +2,13 @@ //! //! Top screen of an O3DS is 400x240; bottom touch screen is 320x240. The //! real backend will likely advertise the top screen here and expose the -//! bottom one separately. For now `update` returns true so the engine -//! loop keeps ticking even though nothing is drawn. +//! bottom one separately. const std = @import("std"); const Self = @This(); +extern fn aptMainLoop() bool; + alloc: std.mem.Allocator, pub fn init(_: *Self, _: u32, _: u32, _: [:0]const u8, _: bool, _: bool, _: bool) anyerror!void {} @@ -15,7 +16,7 @@ pub fn init(_: *Self, _: u32, _: u32, _: [:0]const u8, _: bool, _: bool, _: bool pub fn deinit(_: *Self) void {} pub fn update(_: *Self) bool { - return true; + return aptMainLoop(); } pub fn draw(_: *Self) void {} diff --git a/src/platform/c_io.zig b/src/platform/c_io.zig index 700f3bf..d389f5c 100644 --- a/src/platform/c_io.zig +++ b/src/platform/c_io.zig @@ -10,6 +10,11 @@ const platform_time = switch (options.config.platform) { .nintendo_switch => @import("switch/time.zig"), else => @compileError("platform/c_io.zig is only wired for Nintendo targets"), }; +const platform_paths = switch (options.config.platform) { + .nintendo_3ds => @import("3ds/paths.zig"), + .nintendo_switch => @import("switch/paths.zig"), + else => unreachable, +}; const c = struct { extern fn open(path: [*:0]const u8, flags: c_int, ...) c_int; @@ -21,6 +26,7 @@ const c = struct { extern fn ftruncate(fd: c_int, length: c_long) c_int; extern fn getcwd(buf: [*]u8, size: usize) ?[*:0]u8; extern fn chdir(path: [*:0]const u8) c_int; + extern fn mkdir(path: [*:0]const u8, mode: c_int) c_int; extern fn __errno() *c_int; }; @@ -42,6 +48,10 @@ var write_fd: c_int = -1; var stderr_writer: File.Writer = undefined; var stderr_writer_initialized = false; var empty_stderr_buffer: [0]u8 = .{}; +var resource_root_buffer: [max_path_bytes:0]u8 = @splat(0); +var data_root_buffer: [max_path_bytes:0]u8 = @splat(0); +var resource_root_len: usize = 0; +var data_root_len: usize = 0; const vtable: Io.VTable = blk: { var v = Io.failing.vtable.*; @@ -82,6 +92,24 @@ pub fn io() Io { return .{ .userdata = null, .vtable = &vtable }; } +pub fn initAppDirs(app_name: []const u8) Dir.CreateDirPathOpenError!void { + if (platform_paths.mountResources()) { + setResourceRoot("romfs:/") catch return error.NameTooLong; + } else { + setResourceRoot("") catch unreachable; + } + + var data_buffer: [max_path_bytes]u8 = undefined; + const data_root = platform_paths.dataRoot(&data_buffer, app_name) catch return error.NameTooLong; + try setDataRoot(data_root); + try ensureDirPath(data_root); +} + +pub fn useCwdDirs() void { + setResourceRoot("") catch unreachable; + setDataRoot("") catch unreachable; +} + fn crashHandler(_: ?*anyopaque) void {} fn recancel(_: ?*anyopaque) void {} @@ -107,26 +135,38 @@ fn dirOpenFile(_: ?*anyopaque, _: Dir, sub_path: []const u8, flags: Dir.OpenFile unsupported("dirOpenFile option"); if (!flags.allow_directory or !flags.follow_symlinks) unsupported("dirOpenFile path policy"); + const role: FileRole = switch (flags.mode) { + .read_only => .read, + .write_only => .write, + .read_write => unsupported("read-write file open on handle-less Nintendo target"), + }; var path_buffer: [max_path_bytes:0]u8 = undefined; - const path = try zPath(&path_buffer, sub_path); const open_flags: c_int = switch (flags.mode) { .read_only => O_RDONLY, .write_only => O_WRONLY, .read_write => O_RDWR, }; - const fd = c.open(path.ptr, open_flags, @as(c_int, 0)); - if (fd < 0) return openError(); + const path = try rootedPath(&path_buffer, sub_path, if (flags.mode == .read_only) resourceRoot() else dataRoot()); + var fd = c.open(path.ptr, open_flags, @as(c_int, 0)); + if (fd < 0 and flags.mode == .read_only and data_root_len > 0) { + const first_errno = errno(); + if (first_errno != 2) return openError(first_errno); + const fallback_path = try rootedPath(&path_buffer, sub_path, dataRoot()); + fd = c.open(fallback_path.ptr, open_flags, @as(c_int, 0)); + } + if (fd < 0) return openError(errno()); errdefer _ = c.close(fd); - return registerFile(fd, .read); + return registerFile(fd, role); } fn dirCreateFile(_: ?*anyopaque, _: Dir, sub_path: []const u8, flags: Dir.CreateFileOptions) File.OpenError!File { if (flags.lock != .none or flags.resolve_beneath) unsupported("dirCreateFile option"); + if (flags.read) unsupported("readable created file on handle-less Nintendo target"); var path_buffer: [max_path_bytes:0]u8 = undefined; - const path = try zPath(&path_buffer, sub_path); - var open_flags: c_int = if (flags.read) O_RDWR else O_WRONLY; + const path = try rootedPath(&path_buffer, sub_path, dataRoot()); + var open_flags: c_int = O_WRONLY; open_flags |= O_CREAT; if (flags.truncate) open_flags |= O_TRUNC; if (flags.exclusive) open_flags |= O_EXCL; @@ -136,7 +176,7 @@ fn dirCreateFile(_: ?*anyopaque, _: Dir, sub_path: []const u8, flags: Dir.Create else @intCast(@intFromEnum(flags.permissions)); const fd = c.open(path.ptr, open_flags, mode); - if (fd < 0) return openError(); + if (fd < 0) return openError(errno()); errdefer _ = c.close(fd); return registerFile(fd, .write); } @@ -335,6 +375,29 @@ fn random(_: ?*anyopaque, buffer: []u8) void { const FileRole = enum { read, write }; +fn resourceRoot() []const u8 { + return resource_root_buffer[0..resource_root_len]; +} + +fn dataRoot() []const u8 { + return data_root_buffer[0..data_root_len]; +} + +fn setResourceRoot(root: []const u8) error{NameTooLong}!void { + try setRoot(&resource_root_buffer, &resource_root_len, root); +} + +fn setDataRoot(root: []const u8) error{NameTooLong}!void { + try setRoot(&data_root_buffer, &data_root_len, root); +} + +fn setRoot(buffer: *[max_path_bytes:0]u8, len: *usize, root: []const u8) error{NameTooLong}!void { + if (root.len >= max_path_bytes) return error.NameTooLong; + @memcpy(buffer[0..root.len], root); + buffer[root.len] = 0; + len.* = root.len; +} + fn registerFile(fd: c_int, role: FileRole) File { if (@sizeOf(File.Handle) == 0) { switch (role) { @@ -422,12 +485,85 @@ fn zPath(buf: *[max_path_bytes:0]u8, path: []const u8) error{ NameTooLong, BadPa return buf[0..path.len :0]; } +fn rootedPath(buf: *[max_path_bytes:0]u8, path: []const u8, root: []const u8) error{ NameTooLong, BadPathName }![:0]const u8 { + if (isAbsoluteOrDevicePath(path) or root.len == 0) return zPath(buf, path); + if (std.mem.indexOfScalar(u8, path, 0) != null) return error.BadPathName; + + const needs_sep = root.len > 0 and !std.mem.endsWith(u8, root, "/") and !std.mem.startsWith(u8, path, "/"); + const len = root.len + @intFromBool(needs_sep) + path.len; + if (len >= max_path_bytes) return error.NameTooLong; + + var i: usize = 0; + @memcpy(buf[i..][0..root.len], root); + i += root.len; + if (needs_sep) { + buf[i] = '/'; + i += 1; + } + @memcpy(buf[i..][0..path.len], path); + buf[len] = 0; + return buf[0..len :0]; +} + +fn isAbsoluteOrDevicePath(path: []const u8) bool { + if (path.len == 0) return false; + if (path[0] == '/') return true; + const colon = std.mem.indexOfScalar(u8, path, ':') orelse return false; + const slash = std.mem.indexOfAny(u8, path, "/\\") orelse path.len; + return colon < slash; +} + +fn ensureDirPath(path: []const u8) Dir.CreateDirPathOpenError!void { + if (path.len == 0) return error.BadPathName; + var path_buffer: [max_path_bytes:0]u8 = undefined; + const full = zPath(&path_buffer, path) catch |err| return err; + const full_len = full.len; + const full_ptr = path_buffer[0..].ptr; + + const start = pathRootEnd(path); + var i = start; + while (i < full_len) : (i += 1) { + if (path_buffer[i] != '/') continue; + if (i == start) continue; + path_buffer[i] = 0; + try createDir(full_ptr); + path_buffer[i] = '/'; + } + try createDir(full_ptr); +} + +fn pathRootEnd(path: []const u8) usize { + if (std.mem.indexOfScalar(u8, path, ':')) |colon| { + if (colon + 1 < path.len and path[colon + 1] == '/') return colon + 2; + return colon + 1; + } + return if (path.len > 0 and path[0] == '/') 1 else 0; +} + +fn createDir(path: [*:0]const u8) Dir.CreateDirPathOpenError!void { + if (c.mkdir(path, 0o777) == 0) return; + switch (errno()) { + 17 => return, + 1 => return error.PermissionDenied, + 2 => return error.FileNotFound, + 6 => return error.NoDevice, + 12 => return error.SystemResources, + 13 => return error.AccessDenied, + 20 => return error.NotDir, + 28 => return error.NoSpaceLeft, + 30 => return error.ReadOnlyFileSystem, + 91 => return error.NameTooLong, + 92 => return error.SymLinkLoop, + else => return error.Unexpected, + } +} + fn errno() c_int { return c.__errno().*; } -fn openError() File.OpenError { - return switch (errno()) { +fn openError(code: c_int) File.OpenError { + return switch (code) { 1 => error.PermissionDenied, 2 => error.FileNotFound, 6 => error.NoDevice, diff --git a/src/platform/switch/paths.zig b/src/platform/switch/paths.zig new file mode 100644 index 0000000..69e4ec3 --- /dev/null +++ b/src/platform/switch/paths.zig @@ -0,0 +1,13 @@ +const std = @import("std"); + +extern fn fsdevMountSdmc() u32; +extern fn romfsMountSelf(name: [*:0]const u8) u32; + +pub fn mountResources() bool { + _ = fsdevMountSdmc(); + return romfsMountSelf("romfs") == 0; +} + +pub fn dataRoot(buffer: []u8, app_name: []const u8) error{NameTooLong}![]const u8 { + return std.fmt.bufPrint(buffer, "sdmc:/switch/{s}", .{app_name}) catch error.NameTooLong; +} diff --git a/src/platform/switch/surface.zig b/src/platform/switch/surface.zig index ea4d539..8d49220 100644 --- a/src/platform/switch/surface.zig +++ b/src/platform/switch/surface.zig @@ -3,12 +3,13 @@ //! Switch's framebuffer is 1280x720 in handheld mode and 1920x1080 //! docked. We advertise 1280x720 so the engine has a sane default; //! a real backend will query `appletGetOperationMode` and resize on -//! dock transitions. `update` returns true so the engine loop keeps -//! ticking even though nothing is drawn. +//! dock transitions. const std = @import("std"); const Self = @This(); +extern fn appletMainLoop() bool; + alloc: std.mem.Allocator, pub fn init(_: *Self, _: u32, _: u32, _: [:0]const u8, _: bool, _: bool, _: bool) anyerror!void {} @@ -16,7 +17,7 @@ pub fn init(_: *Self, _: u32, _: u32, _: [:0]const u8, _: bool, _: bool, _: bool pub fn deinit(_: *Self) void {} pub fn update(_: *Self) bool { - return true; + return appletMainLoop(); } pub fn draw(_: *Self) void {} diff --git a/test/main.zig b/test/main.zig index 61e27d1..2dbf04c 100644 --- a/test/main.zig +++ b/test/main.zig @@ -74,7 +74,7 @@ const MyState = struct { grass_spawn: u32, fn load_wav(engine: *ae.Engine, path: []const u8) ![]u8 { - var file = try std.Io.Dir.cwd().openFile(engine.io, path, .{}); + var file = try engine.dirs.resources.openFile(engine.io, path, .{}); defer file.close(engine.io); var tmp: [4096]u8 = undefined; From 3303a4a748f9f057e21f1df147503adee1c32f25 Mon Sep 17 00:00:00 2001 From: Nathan Bourgeois Date: Tue, 26 May 2026 18:41:50 -0400 Subject: [PATCH 06/44] Audio --- src/platform/3ds/3ds_audio.zig | 248 ++++++++++++++++++++++++-- src/platform/switch/switch_audio.zig | 253 +++++++++++++++++++++++++-- 2 files changed, 474 insertions(+), 27 deletions(-) diff --git a/src/platform/3ds/3ds_audio.zig b/src/platform/3ds/3ds_audio.zig index 37410ff..239ece8 100644 --- a/src/platform/3ds/3ds_audio.zig +++ b/src/platform/3ds/3ds_audio.zig @@ -1,34 +1,256 @@ -//! 3DS audio backend stub. +//! 3DS audio backend -- NDSP hardware voices. //! -//! Empty slot-based PCM output that satisfies `audio_api.Interface`. -//! Real bring-up will route through libctru's NDSP (or CSND on older -//! firmwares). 24 voices is what NDSP exposes on retail hardware. +//! Each Aether mixer slot maps to one NDSP channel. The game thread refills +//! double-buffered linear-memory wave buffers from the Stream reader in +//! `update`; NDSP handles sample-rate conversion and channel mixing. const std = @import("std"); const Stream = @import("../../audio/stream.zig").Stream; +const PcmFormat = @import("../../audio/stream.zig").PcmFormat; +const NUM_SLOTS: usize = 24; +const BUFFERS_PER_SLOT: usize = 2; +const SAMPLES_PER_BUF: usize = 4096; +const MAX_CHANNELS: usize = 2; +const MAX_BYTES_PER_SAMPLE: usize = 2; +const MAX_BYTES_PER_BUF: usize = SAMPLES_PER_BUF * MAX_CHANNELS * MAX_BYTES_PER_SAMPLE; +const TOTAL_AUDIO_BYTES: usize = NUM_SLOTS * BUFFERS_PER_SLOT * MAX_BYTES_PER_BUF; + +const NDSP_OUTPUT_STEREO: c_int = 1; +const NDSP_INTERP_LINEAR: c_int = 1; +const NDSP_FORMAT_MONO_PCM16: u16 = 5; +const NDSP_FORMAT_STEREO_PCM16: u16 = 6; +const NDSP_WBUF_DONE: u8 = 3; + +const Result = c_int; + +const NdspAdpcmData = extern struct { + index: u16, + history0: i16, + history1: i16, +}; + +const NdspWaveBuf = extern struct { + data_vaddr: ?*const anyopaque, + nsamples: u32, + adpcm_data: ?*NdspAdpcmData, + offset: u32, + looping: bool, + status: u8, + sequence_id: u16, + next: ?*NdspWaveBuf, +}; + +extern fn ndspInit() Result; +extern fn ndspExit() void; +extern fn ndspSetOutputMode(mode: c_int) void; +extern fn ndspChnReset(id: c_int) void; +extern fn ndspChnSetInterp(id: c_int, interp: c_int) void; +extern fn ndspChnSetRate(id: c_int, rate: f32) void; +extern fn ndspChnSetFormat(id: c_int, format: u16) void; +extern fn ndspChnSetMix(id: c_int, mix: *[12]f32) void; +extern fn ndspChnWaveBufClear(id: c_int) void; +extern fn ndspChnWaveBufAdd(id: c_int, buf: *NdspWaveBuf) void; +extern fn DSP_FlushDataCache(address: *const anyopaque, size: u32) Result; +extern fn linearAlloc(size: usize) ?*anyopaque; +extern fn linearFree(mem: ?*anyopaque) void; + +const SlotState = enum(u8) { + inactive = 0, + pending = 1, + active = 2, + finished = 3, +}; + +const Slot = struct { + state: SlotState = .inactive, + gain: f32 = 0, + pan: f32 = 0, + stream: Stream = undefined, + format: PcmFormat = .{ .sample_rate = 44_100, .channels = 1, .bit_depth = 16 }, + wave_bufs: [BUFFERS_PER_SLOT]NdspWaveBuf = undefined, +}; + +var slots: [NUM_SLOTS]Slot = init_slots(); var audio_alloc: std.mem.Allocator = undefined; var audio_io: std.Io = undefined; +var audio_data: ?[*]u8 = null; + +fn init_slots() [NUM_SLOTS]Slot { + var s: [NUM_SLOTS]Slot = undefined; + for (&s) |*slot| { + slot.* = .{}; + } + return s; +} pub fn setup(alloc: std.mem.Allocator, io: std.Io) void { audio_alloc = alloc; audio_io = io; } -pub fn init() anyerror!void {} -pub fn deinit() void {} -pub fn update() void {} +pub fn init() anyerror!void { + _ = audio_alloc; + _ = audio_io; + + audio_data = @ptrCast(linearAlloc(TOTAL_AUDIO_BYTES) orelse return error.AudioInitFailed); + + if (ndspInit() != 0) { + linearFree(audio_data); + audio_data = null; + return error.AudioInitFailed; + } + + ndspSetOutputMode(NDSP_OUTPUT_STEREO); + + for (0..NUM_SLOTS) |i| { + ndspChnReset(@intCast(i)); + slots[i] = .{}; + init_wave_bufs(i); + } +} + +pub fn deinit() void { + for (0..NUM_SLOTS) |i| { + ndspChnWaveBufClear(@intCast(i)); + ndspChnReset(@intCast(i)); + slots[i].state = .inactive; + } + + ndspExit(); + + if (audio_data) |data| { + linearFree(data); + audio_data = null; + } +} + +pub fn update() void { + if (audio_data == null) return; + + for (&slots, 0..) |*slot, i| { + switch (slot.state) { + .inactive, .finished => {}, + .pending => start_slot(slot, i) catch { + slot.state = .finished; + }, + .active => refill_done_buffers(slot, i), + } + } +} pub fn max_voices() u32 { - return 24; + return NUM_SLOTS; +} + +pub fn play_slot(slot: u8, stream: Stream) anyerror!void { + if (slot >= NUM_SLOTS) return error.InvalidArgs; + if (!format_supported(stream.format)) return error.UnsupportedFormat; + + const i: usize = slot; + ndspChnWaveBufClear(slot); + slots[i].stream = stream; + slots[i].format = stream.format; + slots[i].state = .pending; +} + +pub fn stop_slot(slot: u8) void { + if (slot >= NUM_SLOTS) return; + ndspChnWaveBufClear(slot); + slots[slot].state = .inactive; +} + +pub fn set_slot_gain_pan(slot: u8, gain: f32, pan: f32) void { + if (slot >= NUM_SLOTS) return; + slots[slot].gain = gain; + slots[slot].pan = pan; + if (slots[slot].state == .active) apply_mix(slot, &slots[slot]); } -pub fn play_slot(_: u8, _: Stream) anyerror!void {} +pub fn is_slot_active(slot: u8) bool { + if (slot >= NUM_SLOTS) return false; + return slots[slot].state != .inactive and slots[slot].state != .finished; +} + +fn init_wave_bufs(slot_index: usize) void { + const base = audio_data.?; + const slot_base = slot_index * BUFFERS_PER_SLOT * MAX_BYTES_PER_BUF; + + for (&slots[slot_index].wave_bufs, 0..) |*buf, b| { + buf.* = .{ + .data_vaddr = @ptrCast(base + slot_base + b * MAX_BYTES_PER_BUF), + .nsamples = SAMPLES_PER_BUF, + .adpcm_data = null, + .offset = 0, + .looping = false, + .status = NDSP_WBUF_DONE, + .sequence_id = 0, + .next = null, + }; + } +} + +fn start_slot(slot: *Slot, slot_index: usize) !void { + const id: c_int = @intCast(slot_index); + + ndspChnWaveBufClear(id); + ndspChnReset(id); + ndspChnSetInterp(id, NDSP_INTERP_LINEAR); + ndspChnSetRate(id, @floatFromInt(slot.format.sample_rate)); + ndspChnSetFormat(id, if (slot.format.channels == 1) NDSP_FORMAT_MONO_PCM16 else NDSP_FORMAT_STEREO_PCM16); + apply_mix(@intCast(slot_index), slot); -pub fn stop_slot(_: u8) void {} + var queued: bool = false; + for (&slot.wave_bufs) |*buf| { + if (fill_wave_buf(slot, buf)) { + ndspChnWaveBufAdd(id, buf); + queued = true; + } else break; + } -pub fn set_slot_gain_pan(_: u8, _: f32, _: f32) void {} + slot.state = if (queued) .active else .finished; +} + +fn refill_done_buffers(slot: *Slot, slot_index: usize) void { + const id: c_int = @intCast(slot_index); + + for (&slot.wave_bufs) |*buf| { + if (buf.status != NDSP_WBUF_DONE) continue; + if (!fill_wave_buf(slot, buf)) { + slot.state = .finished; + return; + } + ndspChnWaveBufAdd(id, buf); + } +} + +fn fill_wave_buf(slot: *Slot, buf: *NdspWaveBuf) bool { + const byte_count = SAMPLES_PER_BUF * slot.format.frame_size(); + if (byte_count > MAX_BYTES_PER_BUF) return false; + + const raw: [*]u8 = @ptrCast(@constCast(buf.data_vaddr.?)); + const dst = raw[0..byte_count]; + + slot.stream.reader.readSliceAll(dst) catch return false; + _ = DSP_FlushDataCache(buf.data_vaddr.?, @intCast(byte_count)); + + buf.nsamples = SAMPLES_PER_BUF; + buf.offset = 0; + buf.looping = false; + buf.status = NDSP_WBUF_DONE; + buf.next = null; + return true; +} + +fn apply_mix(slot: u8, s: *const Slot) void { + const left = s.gain * std.math.clamp(1.0 - s.pan, 0.0, 1.0); + const right = s.gain * std.math.clamp(1.0 + s.pan, 0.0, 1.0); + var mix: [12]f32 = @splat(0); + mix[0] = std.math.clamp(left, 0.0, 1.0); + mix[1] = std.math.clamp(right, 0.0, 1.0); + ndspChnSetMix(slot, &mix); +} -pub fn is_slot_active(_: u8) bool { - return false; +fn format_supported(fmt: PcmFormat) bool { + return fmt.bit_depth == 16 and (fmt.channels == 1 or fmt.channels == 2); } diff --git a/src/platform/switch/switch_audio.zig b/src/platform/switch/switch_audio.zig index 1a68e43..35ab166 100644 --- a/src/platform/switch/switch_audio.zig +++ b/src/platform/switch/switch_audio.zig @@ -1,35 +1,260 @@ -//! Switch audio backend stub. +//! Switch audio backend -- audout with software mixing. //! -//! Empty slot-based PCM output that satisfies `audio_api.Interface`. -//! Real bring-up will route through libnx's `audren` (the high-level -//! audio renderer) or `audout` for raw PCM. 24 voices matches what -//! audren exposes by default — bump if a downstream game needs more. +//! audout exposes one 48 kHz stereo i16 output stream, so this backend mixes +//! Aether's slots into a ring of audout buffers. A small nearest-neighbor +//! resampler keeps the existing 44.1 kHz test WAVs playable. const std = @import("std"); const Stream = @import("../../audio/stream.zig").Stream; +const PcmFormat = @import("../../audio/stream.zig").PcmFormat; +const DEVICE_SAMPLE_RATE: u32 = 48_000; +const DEVICE_CHANNELS: usize = 2; +const NUM_SLOTS: usize = 24; +const BUFFER_COUNT: usize = 3; +const SAMPLES_PER_BUF: usize = 2048; +const OUTPUT_BYTES: usize = SAMPLES_PER_BUF * DEVICE_CHANNELS * @sizeOf(i16); +const OUTPUT_BUFFER_BYTES: usize = std.mem.alignForward(usize, OUTPUT_BYTES, 0x1000); +const TOTAL_OUTPUT_BYTES: usize = BUFFER_COUNT * OUTPUT_BUFFER_BYTES; +const FP_ONE: u64 = 1 << 32; + +const Result = u32; + +const AudioOutBuffer = extern struct { + next: ?*AudioOutBuffer, + buffer: ?*anyopaque, + buffer_size: u64, + data_size: u64, + data_offset: u64, +}; + +extern fn audoutInitialize() Result; +extern fn audoutExit() void; +extern fn audoutStartAudioOut() Result; +extern fn audoutStopAudioOut() Result; +extern fn audoutAppendAudioOutBuffer(buffer: *AudioOutBuffer) Result; +extern fn audoutGetReleasedAudioOutBuffer(buffer: *?*AudioOutBuffer, released_count: *u32) Result; +extern fn memalign(alignment: usize, size: usize) ?*anyopaque; +extern fn free(ptr: ?*anyopaque) void; + +const SlotState = enum(u8) { + inactive = 0, + pending = 1, + active = 2, + finished = 3, +}; + +const Slot = struct { + state: SlotState = .inactive, + gain: f32 = 0, + pan: f32 = 0, + stream: Stream = undefined, + format: PcmFormat = .{ .sample_rate = 44_100, .channels = 1, .bit_depth = 16 }, + step_fp: u64 = FP_ONE, + phase_fp: u64 = 0, + current_left: i16 = 0, + current_right: i16 = 0, +}; + +var slots: [NUM_SLOTS]Slot = init_slots(); var audio_alloc: std.mem.Allocator = undefined; var audio_io: std.Io = undefined; +var output_data: ?[*]u8 = null; +var buffers: [BUFFER_COUNT]AudioOutBuffer = undefined; +var initialized: bool = false; + +fn init_slots() [NUM_SLOTS]Slot { + var s: [NUM_SLOTS]Slot = undefined; + for (&s) |*slot| { + slot.* = .{}; + } + return s; +} pub fn setup(alloc: std.mem.Allocator, io: std.Io) void { audio_alloc = alloc; audio_io = io; } -pub fn init() anyerror!void {} -pub fn deinit() void {} -pub fn update() void {} +pub fn init() anyerror!void { + _ = audio_alloc; + _ = audio_io; + + output_data = @ptrCast(memalign(0x1000, TOTAL_OUTPUT_BYTES) orelse return error.AudioInitFailed); + @memset(output_data.?[0..TOTAL_OUTPUT_BYTES], 0); + + if (audoutInitialize() != 0) { + free_output(); + return error.AudioInitFailed; + } + + if (audoutStartAudioOut() != 0) { + audoutExit(); + free_output(); + return error.AudioInitFailed; + } + + initialized = true; + + for (&buffers, 0..) |*buf, i| { + buf.* = .{ + .next = null, + .buffer = @ptrCast(output_data.? + i * OUTPUT_BUFFER_BYTES), + .buffer_size = OUTPUT_BUFFER_BYTES, + .data_size = OUTPUT_BYTES, + .data_offset = 0, + }; + if (audoutAppendAudioOutBuffer(buf) != 0) { + _ = audoutStopAudioOut(); + audoutExit(); + initialized = false; + free_output(); + return error.AudioInitFailed; + } + } +} + +pub fn deinit() void { + if (initialized) { + _ = audoutStopAudioOut(); + audoutExit(); + initialized = false; + } + + free_output(); + + for (&slots) |*slot| { + slot.state = .inactive; + } +} + +pub fn update() void { + if (!initialized) return; + + while (true) { + var released: ?*AudioOutBuffer = null; + var released_count: u32 = 0; + if (audoutGetReleasedAudioOutBuffer(&released, &released_count) != 0) return; + if (released_count == 0 or released == null) return; + + const buf = released.?; + fill_output_buffer(buf); + _ = audoutAppendAudioOutBuffer(buf); + } +} pub fn max_voices() u32 { - return 24; + return NUM_SLOTS; } -pub fn play_slot(_: u8, _: Stream) anyerror!void {} +pub fn play_slot(slot: u8, stream: Stream) anyerror!void { + if (slot >= NUM_SLOTS) return error.InvalidArgs; + if (!format_supported(stream.format)) return error.UnsupportedFormat; -pub fn stop_slot(_: u8) void {} + const i: usize = slot; + slots[i].stream = stream; + slots[i].format = stream.format; + slots[i].step_fp = (@as(u64, stream.format.sample_rate) << 32) / DEVICE_SAMPLE_RATE; + slots[i].phase_fp = 0; + slots[i].current_left = 0; + slots[i].current_right = 0; + slots[i].state = .pending; +} + +pub fn stop_slot(slot: u8) void { + if (slot >= NUM_SLOTS) return; + slots[slot].state = .inactive; +} -pub fn set_slot_gain_pan(_: u8, _: f32, _: f32) void {} +pub fn set_slot_gain_pan(slot: u8, gain: f32, pan: f32) void { + if (slot >= NUM_SLOTS) return; + slots[slot].gain = gain; + slots[slot].pan = pan; +} + +pub fn is_slot_active(slot: u8) bool { + if (slot >= NUM_SLOTS) return false; + return slots[slot].state != .inactive and slots[slot].state != .finished; +} + +fn fill_output_buffer(buf: *AudioOutBuffer) void { + const out: [*]i16 = @ptrCast(@alignCast(buf.buffer.?)); + + for (0..SAMPLES_PER_BUF) |frame| { + var left_acc: i32 = 0; + var right_acc: i32 = 0; + + for (&slots) |*slot| { + if (slot.state == .pending) { + if (read_next_sample(slot)) { + slot.state = .active; + } else { + slot.state = .finished; + } + } + + if (slot.state != .active) continue; + + const left_gain = slot.gain * std.math.clamp(1.0 - slot.pan, 0.0, 1.0); + const right_gain = slot.gain * std.math.clamp(1.0 + slot.pan, 0.0, 1.0); + const left_vol: i32 = @intFromFloat(std.math.clamp(left_gain, 0.0, 1.0) * 32768.0); + const right_vol: i32 = @intFromFloat(std.math.clamp(right_gain, 0.0, 1.0) * 32768.0); + + left_acc += (@as(i32, slot.current_left) * left_vol) >> 15; + right_acc += (@as(i32, slot.current_right) * right_vol) >> 15; + + advance_sample(slot); + } + + out[frame * 2] = clamp_i16(left_acc); + out[frame * 2 + 1] = clamp_i16(right_acc); + } + + buf.data_size = OUTPUT_BYTES; + buf.data_offset = 0; +} + +fn advance_sample(slot: *Slot) void { + slot.phase_fp +%= slot.step_fp; + while (slot.phase_fp >= FP_ONE) { + slot.phase_fp -= FP_ONE; + if (!read_next_sample(slot)) { + slot.state = .finished; + return; + } + } +} + +fn read_next_sample(slot: *Slot) bool { + var tmp: [4]u8 = undefined; + const frame_size = slot.format.frame_size(); + if (frame_size > tmp.len) return false; + + slot.stream.reader.readSliceAll(tmp[0..frame_size]) catch return false; + + if (slot.format.channels == 1) { + const s = std.mem.readInt(i16, tmp[0..2], .little); + slot.current_left = s; + slot.current_right = s; + } else { + slot.current_left = std.mem.readInt(i16, tmp[0..2], .little); + slot.current_right = std.mem.readInt(i16, tmp[2..4], .little); + } + + return true; +} + +fn clamp_i16(v: i32) i16 { + return @intCast(std.math.clamp(v, std.math.minInt(i16), std.math.maxInt(i16))); +} + +fn free_output() void { + if (output_data) |data| { + free(data); + output_data = null; + } +} -pub fn is_slot_active(_: u8) bool { - return false; +fn format_supported(fmt: PcmFormat) bool { + return fmt.bit_depth == 16 and (fmt.channels == 1 or fmt.channels == 2); } From 3a93ddbd8158cb89e1dac37fdbdf7f6ffa786b90 Mon Sep 17 00:00:00 2001 From: Nathan Bourgeois Date: Tue, 26 May 2026 18:57:19 -0400 Subject: [PATCH 07/44] Input --- src/platform/3ds/input.zig | 242 ++++++++++++++++++++++++++++-- src/platform/switch/input.zig | 274 ++++++++++++++++++++++++++++++++-- 2 files changed, 498 insertions(+), 18 deletions(-) diff --git a/src/platform/3ds/input.zig b/src/platform/3ds/input.zig index 7ad25db..6e7d8d6 100644 --- a/src/platform/3ds/input.zig +++ b/src/platform/3ds/input.zig @@ -1,24 +1,248 @@ -//! 3DS input backend stub. -//! -//! Wires up to libctru's `hid` (button state, circle pad, touch) once an -//! SDK is available. For now every reading reads as neutral so the action -//! system is silent on 3DS. +//! 3DS input backend. Polls libctru HID once per engine update and +//! translates button, circle-pad, C-stick, trigger, touch, and software +//! keyboard state into Aether's core input events. const std = @import("std"); const core = @import("../../core/input/input.zig"); -pub fn setup(_: std.mem.Allocator, _: std.Io) void {} +const Result = c_int; -pub fn init() anyerror!void {} +const TouchPosition = extern struct { + px: u16, + py: u16, +}; -pub fn deinit() void {} +const CirclePosition = extern struct { + dx: i16, + dy: i16, +}; + +extern fn hidInit() Result; +extern fn hidExit() void; +extern fn hidScanInput() void; +extern fn hidKeysHeld() u32; +extern fn hidTouchRead(pos: *TouchPosition) void; +extern fn hidCircleRead(pos: *CirclePosition) void; + +extern fn swkbdInit(swkbd: *anyopaque, typ: c_int, num_buttons: c_int, max_text_length: c_int) void; +extern fn swkbdSetFeatures(swkbd: *anyopaque, features: u32) void; +extern fn swkbdSetHintText(swkbd: *anyopaque, text: [*:0]const u8) void; +extern fn swkbdSetButton(swkbd: *anyopaque, button: c_int, text: [*:0]const u8, submit: bool) void; +extern fn swkbdSetInitialText(swkbd: *anyopaque, text: [*:0]const u8) void; +extern fn swkbdInputText(swkbd: *anyopaque, buf: [*]u8, bufsize: usize) c_int; + +const KEY_A: u32 = 1 << 0; +const KEY_B: u32 = 1 << 1; +const KEY_SELECT: u32 = 1 << 2; +const KEY_START: u32 = 1 << 3; +const KEY_DRIGHT: u32 = 1 << 4; +const KEY_DLEFT: u32 = 1 << 5; +const KEY_DUP: u32 = 1 << 6; +const KEY_DDOWN: u32 = 1 << 7; +const KEY_R: u32 = 1 << 8; +const KEY_L: u32 = 1 << 9; +const KEY_X: u32 = 1 << 10; +const KEY_Y: u32 = 1 << 11; +const KEY_ZL: u32 = 1 << 14; +const KEY_ZR: u32 = 1 << 15; +const KEY_TOUCH: u32 = 1 << 20; +const KEY_CSTICK_RIGHT: u32 = 1 << 24; +const KEY_CSTICK_LEFT: u32 = 1 << 25; +const KEY_CSTICK_UP: u32 = 1 << 26; +const KEY_CSTICK_DOWN: u32 = 1 << 27; + +const CIRCLE_PAD_MAX: f32 = 156.0; +const MAX_TEXT_BYTES: usize = 1024; +const SWKBD_STATE_BYTES: usize = 0x1000; +const SWKBD_TYPE_NORMAL: c_int = 0; +const SWKBD_BUTTON_LEFT: c_int = 0; +const SWKBD_BUTTON_RIGHT: c_int = 2; +const SWKBD_DARKEN_TOP_SCREEN: u32 = 1 << 1; +const SWKBD_MULTILINE: u32 = 1 << 3; +const SWKBD_DEFAULT_QWERTY: u32 = 1 << 9; + +const axis_count = @typeInfo(core.Axis).@"enum".fields.len; + +var initialized: bool = false; +var prev_keys: u32 = 0; +var prev_axes: [axis_count]f32 = @splat(0.0); +var prev_touch_down: bool = false; +var prev_touch_pos: core.Vec2 = .{}; + +pub fn setup(_: std.mem.Allocator, _: std.Io) void { + initialized = false; + prev_keys = 0; + prev_axes = @splat(0.0); + prev_touch_down = false; + prev_touch_pos = .{}; +} + +pub fn init() anyerror!void { + if (hidInit() != 0) return error.InputInitFailed; + initialized = true; +} + +pub fn deinit() void { + if (!initialized) return; + hidExit(); + initialized = false; +} pub fn pump() void { + hidScanInput(); + const keys = hidKeysHeld(); + + diff_buttons(keys); + pump_axes(keys); + pump_touch(keys); + + prev_keys = keys; core.signal_frame_boundary(); } pub fn apply_cursor_mode(_: core.CursorMode) void {} -pub fn begin_text_input_session(_: core.TextInputTarget, _: core.TextInputOptions) anyerror!void {} +pub fn begin_text_input_session(target: core.TextInputTarget, options: core.TextInputOptions) anyerror!void { + var state_buf: [SWKBD_STATE_BYTES]u8 align(8) = @splat(0); + const state: *anyopaque = @ptrCast(&state_buf); + + var initial_buf: [MAX_TEXT_BYTES:0]u8 = @splat(0); + const initial_len = copy_current_text(&initial_buf); + const initial = initial_buf[0..initial_len :0]; + + var hint_buf: [128:0]u8 = @splat(0); + const hint = copy_z(&hint_buf, target.id); + + const max_text_len = text_limit_c_int(options.max_bytes); + swkbdInit(state, SWKBD_TYPE_NORMAL, 2, max_text_len); + swkbdSetInitialText(state, initial.ptr); + swkbdSetHintText(state, hint.ptr); + swkbdSetButton(state, SWKBD_BUTTON_LEFT, "Cancel", false); + swkbdSetButton(state, SWKBD_BUTTON_RIGHT, "OK", true); + + var features = SWKBD_DARKEN_TOP_SCREEN | SWKBD_DEFAULT_QWERTY; + if (options.multiline) features |= SWKBD_MULTILINE; + swkbdSetFeatures(state, features); + + var out_buf: [MAX_TEXT_BYTES:0]u8 = @splat(0); + const out_size = output_buffer_size(options.max_bytes); + const button = swkbdInputText(state, out_buf[0..].ptr, out_size); + if (button == SWKBD_BUTTON_RIGHT) { + const len = bounded_z_len(out_buf[0..out_size]); + core.write_text_session_buffer(out_buf[0..len], .submitted); + } else { + core.write_text_session_buffer(initial_buf[0..initial_len], .cancelled); + } +} pub fn end_text_input_session() void {} + +fn diff_buttons(keys: u32) void { + const Pair = struct { mask: u32, button: core.Button }; + const map = [_]Pair{ + .{ .mask = KEY_A, .button = .A }, + .{ .mask = KEY_B, .button = .B }, + .{ .mask = KEY_X, .button = .X }, + .{ .mask = KEY_Y, .button = .Y }, + .{ .mask = KEY_L, .button = .LButton }, + .{ .mask = KEY_R, .button = .RButton }, + .{ .mask = KEY_SELECT, .button = .Back }, + .{ .mask = KEY_START, .button = .Start }, + .{ .mask = KEY_DUP, .button = .DpadUp }, + .{ .mask = KEY_DRIGHT, .button = .DpadRight }, + .{ .mask = KEY_DDOWN, .button = .DpadDown }, + .{ .mask = KEY_DLEFT, .button = .DpadLeft }, + }; + + inline for (map) |entry| { + const now = keys & entry.mask != 0; + const prev = prev_keys & entry.mask != 0; + if (now != prev) { + core.deliver_gamepad_button(entry.button, if (now) .pressed else .released); + } + } +} + +fn pump_axes(keys: u32) void { + var circle: CirclePosition = .{ .dx = 0, .dy = 0 }; + hidCircleRead(&circle); + + deliver_axis(.LeftX, normalize_signed(circle.dx, CIRCLE_PAD_MAX)); + deliver_axis(.LeftY, -normalize_signed(circle.dy, CIRCLE_PAD_MAX)); + deliver_axis(.RightX, digital_axis(keys, KEY_CSTICK_RIGHT, KEY_CSTICK_LEFT)); + deliver_axis(.RightY, digital_axis(keys, KEY_CSTICK_DOWN, KEY_CSTICK_UP)); + deliver_axis(.LeftTrigger, if (keys & KEY_ZL != 0) 1.0 else 0.0); + deliver_axis(.RightTrigger, if (keys & KEY_ZR != 0) 1.0 else 0.0); +} + +fn pump_touch(keys: u32) void { + const touch_down = keys & KEY_TOUCH != 0; + if (touch_down) { + var touch: TouchPosition = .{ .px = 0, .py = 0 }; + hidTouchRead(&touch); + const pos: core.Vec2 = .{ + .x = @floatFromInt(touch.px), + .y = @floatFromInt(touch.py), + }; + const delta: core.Vec2 = if (prev_touch_down) + .{ .x = pos.x - prev_touch_pos.x, .y = pos.y - prev_touch_pos.y } + else + .{}; + + core.deliver_mouse_move(pos, delta); + if (!prev_touch_down) core.deliver_mouse_button(.Left, .pressed, pos); + prev_touch_pos = pos; + } else if (prev_touch_down) { + core.deliver_mouse_button(.Left, .released, prev_touch_pos); + } + + prev_touch_down = touch_down; +} + +fn deliver_axis(axis: core.Axis, value: f32) void { + const idx = @intFromEnum(axis); + const prev = prev_axes[idx]; + if (value != 0.0 or prev != 0.0) core.deliver_gamepad_axis(axis, value); + prev_axes[idx] = value; +} + +fn normalize_signed(raw: anytype, max_value: f32) f32 { + const value = @as(f32, @floatFromInt(raw)) / max_value; + return std.math.clamp(value, -1.0, 1.0); +} + +fn digital_axis(keys: u32, positive_mask: u32, negative_mask: u32) f32 { + var value: f32 = 0.0; + if (keys & positive_mask != 0) value += 1.0; + if (keys & negative_mask != 0) value -= 1.0; + return value; +} + +fn output_buffer_size(limit: ?usize) usize { + const max = @min(limit orelse (MAX_TEXT_BYTES - 1), MAX_TEXT_BYTES - 1); + return max + 1; +} + +fn text_limit_c_int(limit: ?usize) c_int { + const max = @min(limit orelse (MAX_TEXT_BYTES - 1), MAX_TEXT_BYTES - 1); + return @intCast(@max(max, 1)); +} + +fn copy_current_text(dst: []u8) usize { + const s = core.current_text_session() orelse return 0; + const n = @min(dst.len - 1, s.buffer.items.len); + @memcpy(dst[0..n], s.buffer.items[0..n]); + dst[n] = 0; + return n; +} + +fn copy_z(dst: []u8, text: []const u8) [:0]const u8 { + const n = @min(dst.len - 1, text.len); + @memcpy(dst[0..n], text[0..n]); + dst[n] = 0; + return dst[0..n :0]; +} + +fn bounded_z_len(buf: []const u8) usize { + return std.mem.indexOfScalar(u8, buf, 0) orelse buf.len; +} diff --git a/src/platform/switch/input.zig b/src/platform/switch/input.zig index f5edce2..397d024 100644 --- a/src/platform/switch/input.zig +++ b/src/platform/switch/input.zig @@ -1,24 +1,280 @@ -//! Switch input backend stub. -//! -//! Wires up to libnx's `hid` (pads, touch, motion) once an SDK is -//! available. For now every reading reads as neutral so the action -//! system is silent on Switch. +//! Switch input backend. Polls libnx's pad and touchscreen helpers once per +//! engine update and translates them into Aether core input events. const std = @import("std"); const core = @import("../../core/input/input.zig"); -pub fn setup(_: std.mem.Allocator, _: std.Io) void {} +const Result = u32; -pub fn init() anyerror!void {} +const HidAnalogStickState = extern struct { + x: i32, + y: i32, +}; -pub fn deinit() void {} +const HidTouchState = extern struct { + delta_time: u64, + attributes: u32, + finger_id: u32, + x: u32, + y: u32, + diameter_x: u32, + diameter_y: u32, + rotation_angle: u32, + reserved: u32, +}; + +const HidTouchScreenState = extern struct { + sampling_number: u64, + count: i32, + reserved: u32, + touches: [16]HidTouchState, +}; + +const PadState = extern struct { + id_mask: u8, + active_id_mask: u8, + read_handheld: bool, + active_handheld: bool, + style_set: u32, + attributes: u32, + buttons_cur: u64, + buttons_old: u64, + sticks: [2]HidAnalogStickState, + gc_triggers: [2]u32, +}; + +extern fn hidInitialize() Result; +extern fn hidExit() void; +extern fn hidInitializeTouchScreen() void; +extern fn hidGetTouchScreenStates(states: [*]HidTouchScreenState, count: usize) usize; + +extern fn padConfigureInput(max_players: u32, style_set: u32) void; +extern fn padInitializeWithMask(pad: *PadState, mask: u64) void; +extern fn padUpdate(pad: *PadState) void; + +extern fn swkbdCreate(config: *anyopaque, max_dictwords: i32) Result; +extern fn swkbdClose(config: *anyopaque) void; +extern fn swkbdConfigMakePresetDefault(config: *anyopaque) void; +extern fn swkbdConfigSetOkButtonText(config: *anyopaque, text: [*:0]const u8) void; +extern fn swkbdConfigSetHeaderText(config: *anyopaque, text: [*:0]const u8) void; +extern fn swkbdConfigSetGuideText(config: *anyopaque, text: [*:0]const u8) void; +extern fn swkbdConfigSetInitialText(config: *anyopaque, text: [*:0]const u8) void; +extern fn swkbdShow(config: *anyopaque, out_string: [*]u8, out_string_size: usize) Result; + +const HID_NPAD_STYLE_FULL_KEY: u32 = 1 << 0; +const HID_NPAD_STYLE_HANDHELD: u32 = 1 << 1; +const HID_NPAD_STYLE_JOY_DUAL: u32 = 1 << 2; +const HID_NPAD_STYLE_JOY_LEFT: u32 = 1 << 3; +const HID_NPAD_STYLE_JOY_RIGHT: u32 = 1 << 4; +const HID_NPAD_STYLE_STANDARD: u32 = HID_NPAD_STYLE_FULL_KEY | HID_NPAD_STYLE_HANDHELD | HID_NPAD_STYLE_JOY_DUAL | HID_NPAD_STYLE_JOY_LEFT | HID_NPAD_STYLE_JOY_RIGHT; + +const HID_NPAD_ID_NO1: u64 = 1 << 0; +const HID_NPAD_ID_HANDHELD: u64 = 1 << 32; +const DEFAULT_PAD_MASK: u64 = HID_NPAD_ID_NO1 | HID_NPAD_ID_HANDHELD; + +const BUTTON_A: u64 = 1 << 0; +const BUTTON_B: u64 = 1 << 1; +const BUTTON_X: u64 = 1 << 2; +const BUTTON_Y: u64 = 1 << 3; +const BUTTON_STICK_L: u64 = 1 << 4; +const BUTTON_STICK_R: u64 = 1 << 5; +const BUTTON_L: u64 = 1 << 6; +const BUTTON_R: u64 = 1 << 7; +const BUTTON_ZL: u64 = 1 << 8; +const BUTTON_ZR: u64 = 1 << 9; +const BUTTON_PLUS: u64 = 1 << 10; +const BUTTON_MINUS: u64 = 1 << 11; +const BUTTON_LEFT: u64 = 1 << 12; +const BUTTON_UP: u64 = 1 << 13; +const BUTTON_RIGHT: u64 = 1 << 14; +const BUTTON_DOWN: u64 = 1 << 15; +const BUTTON_LEFT_SL: u64 = 1 << 24; +const BUTTON_LEFT_SR: u64 = 1 << 25; +const BUTTON_RIGHT_SL: u64 = 1 << 26; +const BUTTON_RIGHT_SR: u64 = 1 << 27; + +const JOYSTICK_MAX: f32 = 32767.0; +const MAX_TEXT_BYTES: usize = 1024; +const SWKBD_CONFIG_BYTES: usize = 0x600; + +const axis_count = @typeInfo(core.Axis).@"enum".fields.len; + +var initialized: bool = false; +var pad: PadState = undefined; +var prev_buttons: u64 = 0; +var prev_axes: [axis_count]f32 = @splat(0.0); +var prev_touch_down: bool = false; +var prev_touch_pos: core.Vec2 = .{}; + +pub fn setup(_: std.mem.Allocator, _: std.Io) void { + initialized = false; + pad = std.mem.zeroes(PadState); + prev_buttons = 0; + prev_axes = @splat(0.0); + prev_touch_down = false; + prev_touch_pos = .{}; +} + +pub fn init() anyerror!void { + if (hidInitialize() != 0) return error.InputInitFailed; + hidInitializeTouchScreen(); + padConfigureInput(1, HID_NPAD_STYLE_STANDARD); + padInitializeWithMask(&pad, DEFAULT_PAD_MASK); + initialized = true; +} + +pub fn deinit() void { + if (!initialized) return; + hidExit(); + initialized = false; +} pub fn pump() void { + padUpdate(&pad); + + diff_buttons(pad.buttons_cur); + pump_axes(pad.buttons_cur); + pump_touch(); + + prev_buttons = pad.buttons_cur; core.signal_frame_boundary(); } pub fn apply_cursor_mode(_: core.CursorMode) void {} -pub fn begin_text_input_session(_: core.TextInputTarget, _: core.TextInputOptions) anyerror!void {} +pub fn begin_text_input_session(target: core.TextInputTarget, options: core.TextInputOptions) anyerror!void { + var config_buf: [SWKBD_CONFIG_BYTES]u8 align(8) = @splat(0); + const config: *anyopaque = @ptrCast(&config_buf); + + var initial_buf: [MAX_TEXT_BYTES:0]u8 = @splat(0); + const initial_len = copy_current_text(&initial_buf); + const initial = initial_buf[0..initial_len :0]; + + var target_buf: [128:0]u8 = @splat(0); + const target_text = copy_z(&target_buf, target.id); + + if (swkbdCreate(config, 0) != 0) { + core.write_text_session_buffer(initial_buf[0..initial_len], .cancelled); + return; + } + defer swkbdClose(config); + + swkbdConfigMakePresetDefault(config); + swkbdConfigSetOkButtonText(config, "OK"); + swkbdConfigSetHeaderText(config, target_text.ptr); + swkbdConfigSetGuideText(config, target_text.ptr); + swkbdConfigSetInitialText(config, initial.ptr); + + var out_buf: [MAX_TEXT_BYTES:0]u8 = @splat(0); + const out_size = output_buffer_size(options.max_bytes); + if (swkbdShow(config, out_buf[0..].ptr, out_size) == 0) { + const len = bounded_z_len(out_buf[0..out_size]); + core.write_text_session_buffer(out_buf[0..len], .submitted); + } else { + core.write_text_session_buffer(initial_buf[0..initial_len], .cancelled); + } +} pub fn end_text_input_session() void {} + +fn diff_buttons(buttons: u64) void { + const Pair = struct { mask: u64, button: core.Button }; + const map = [_]Pair{ + .{ .mask = BUTTON_A, .button = .A }, + .{ .mask = BUTTON_B, .button = .B }, + .{ .mask = BUTTON_X, .button = .X }, + .{ .mask = BUTTON_Y, .button = .Y }, + .{ .mask = BUTTON_L | BUTTON_LEFT_SL | BUTTON_RIGHT_SL, .button = .LButton }, + .{ .mask = BUTTON_R | BUTTON_LEFT_SR | BUTTON_RIGHT_SR, .button = .RButton }, + .{ .mask = BUTTON_MINUS, .button = .Back }, + .{ .mask = BUTTON_PLUS, .button = .Start }, + .{ .mask = BUTTON_STICK_L, .button = .LeftThumb }, + .{ .mask = BUTTON_STICK_R, .button = .RightThumb }, + .{ .mask = BUTTON_UP, .button = .DpadUp }, + .{ .mask = BUTTON_RIGHT, .button = .DpadRight }, + .{ .mask = BUTTON_DOWN, .button = .DpadDown }, + .{ .mask = BUTTON_LEFT, .button = .DpadLeft }, + }; + + inline for (map) |entry| { + const now = buttons & entry.mask != 0; + const prev = prev_buttons & entry.mask != 0; + if (now != prev) { + core.deliver_gamepad_button(entry.button, if (now) .pressed else .released); + } + } +} + +fn pump_axes(buttons: u64) void { + const left = pad.sticks[0]; + const right = pad.sticks[1]; + + deliver_axis(.LeftX, normalize_stick(left.x)); + deliver_axis(.LeftY, -normalize_stick(left.y)); + deliver_axis(.RightX, normalize_stick(right.x)); + deliver_axis(.RightY, -normalize_stick(right.y)); + deliver_axis(.LeftTrigger, if (buttons & BUTTON_ZL != 0) 1.0 else 0.0); + deliver_axis(.RightTrigger, if (buttons & BUTTON_ZR != 0) 1.0 else 0.0); +} + +fn pump_touch() void { + var states: [1]HidTouchScreenState = undefined; + const state_count = hidGetTouchScreenStates(&states, states.len); + const touch_down = state_count > 0 and states[0].count > 0; + + if (touch_down) { + const touch = states[0].touches[0]; + const pos: core.Vec2 = .{ + .x = @floatFromInt(touch.x), + .y = @floatFromInt(touch.y), + }; + const delta: core.Vec2 = if (prev_touch_down) + .{ .x = pos.x - prev_touch_pos.x, .y = pos.y - prev_touch_pos.y } + else + .{}; + + core.deliver_mouse_move(pos, delta); + if (!prev_touch_down) core.deliver_mouse_button(.Left, .pressed, pos); + prev_touch_pos = pos; + } else if (prev_touch_down) { + core.deliver_mouse_button(.Left, .released, prev_touch_pos); + } + + prev_touch_down = touch_down; +} + +fn deliver_axis(axis: core.Axis, value: f32) void { + const idx = @intFromEnum(axis); + const prev = prev_axes[idx]; + if (value != 0.0 or prev != 0.0) core.deliver_gamepad_axis(axis, value); + prev_axes[idx] = value; +} + +fn normalize_stick(raw: i32) f32 { + const value = @as(f32, @floatFromInt(raw)) / JOYSTICK_MAX; + return std.math.clamp(value, -1.0, 1.0); +} + +fn output_buffer_size(limit: ?usize) usize { + const max = @min(limit orelse (MAX_TEXT_BYTES - 1), MAX_TEXT_BYTES - 1); + return max + 1; +} + +fn copy_current_text(dst: []u8) usize { + const s = core.current_text_session() orelse return 0; + const n = @min(dst.len - 1, s.buffer.items.len); + @memcpy(dst[0..n], s.buffer.items[0..n]); + dst[n] = 0; + return n; +} + +fn copy_z(dst: []u8, text: []const u8) [:0]const u8 { + const n = @min(dst.len - 1, text.len); + @memcpy(dst[0..n], text[0..n]); + dst[n] = 0; + return dst[0..n :0]; +} + +fn bounded_z_len(buf: []const u8) usize { + return std.mem.indexOfScalar(u8, buf, 0) orelse buf.len; +} From 7b5345f754c1ec45854d7b1cf36d56da36f93136 Mon Sep 17 00:00:00 2001 From: Nathan Bourgeois Date: Tue, 26 May 2026 19:08:28 -0400 Subject: [PATCH 08/44] Switch graphics --- build.zig | 47 +- src/platform/switch/switch_gfx.zig | 746 ++++++++++++++++++++++++++++- 2 files changed, 772 insertions(+), 21 deletions(-) diff --git a/build.zig b/build.zig index 06ba226..ddb9f11 100644 --- a/build.zig +++ b/build.zig @@ -432,6 +432,13 @@ fn addSlangStep(b: *std.Build, slangc: ?std.Build.LazyPath, args: []const []cons return output; } +fn addUamStep(b: *std.Build, uam: []const u8, stage: []const u8, comptime output_name: []const u8, input: std.Build.LazyPath) std.Build.LazyPath { + const run = b.addSystemCommand(&.{ uam, "-s", stage, "-o" }); + const output = run.addOutputFileArg(output_name); + run.addFileArg(input); + return output; +} + pub const ExportOptions = struct { /// PSP/macOS: human-readable name shown to the OS (XMB title on PSP, /// CFBundleName on macOS). Ignored elsewhere. @@ -1031,7 +1038,7 @@ fn switchNroPipeline(b: *std.Build, exe: *std.Build.Step.Compile, opts: ExportOp link.addArg("none"); link.addFileArg(crt_clean); link.addArg(b.fmt("-L{s}", .{libnx_lib})); - link.addArgs(&.{ "-lnx", "-lm" }); + link.addArgs(&.{ "-ldeko3d", "-lnx", "-lm" }); link.addArg("-o"); const elf = link.addOutputFileArg(b.fmt("{s}.elf", .{exe.name})); @@ -1082,6 +1089,44 @@ fn switchNroPipeline(b: *std.Build, exe: *std.Build.Step.Compile, opts: ExportOp /// Aether.addShader(ae_dep.builder, b, exe, config, "basic", .{ ... }); /// pub fn addShader(owner: *std.Build, b: *std.Build, exe: *std.Build.Step.Compile, config: Config, comptime name: []const u8, paths: ShaderPaths) void { + if (config.platform == .nintendo_switch and config.gfx == .default) { + const uam = b.pathJoin(&.{ devkitProPath(b), "tools/bin/uam" }); + const sources = b.addWriteFiles(); + const vert_src = sources.add(name ++ "_switch.vert.glsl", + \\#version 460 + \\ + \\layout (location = 0) in vec3 inPosition; + \\layout (location = 1) in vec4 inColor; + \\ + \\layout (location = 0) out vec4 outColor; + \\ + \\void main() + \\{ + \\ gl_Position = vec4(inPosition, 1.0); + \\ outColor = inColor; + \\} + \\ + ); + const frag_src = sources.add(name ++ "_switch.frag.glsl", + \\#version 460 + \\ + \\layout (location = 0) in vec4 inColor; + \\layout (location = 0) out vec4 outColor; + \\ + \\void main() + \\{ + \\ outColor = inColor; + \\} + \\ + ); + + const vert = addUamStep(b, uam, "vert", name ++ ".vert.dksh", vert_src); + const frag = addUamStep(b, uam, "frag", name ++ ".frag.dksh", frag_src); + exe.root_module.addAnonymousImport(name ++ "_vert", .{ .root_source_file = vert }); + exe.root_module.addAnonymousImport(name ++ "_frag", .{ .root_source_file = frag }); + return; + } + switch (config.gfx) { .vulkan => { const slangc = slangcPath(owner); diff --git a/src/platform/switch/switch_gfx.zig b/src/platform/switch/switch_gfx.zig index ca7309d..5bbbe46 100644 --- a/src/platform/switch/switch_gfx.zig +++ b/src/platform/switch/switch_gfx.zig @@ -1,29 +1,375 @@ -//! Switch GPU backend stub. +//! Minimal Nintendo Switch deko3d backend. //! -//! Empty template: every entry point satisfies `gfx_api.Interface` so the -//! engine compiles end-to-end on Switch (`-Dnintendo-switch=true`). Real -//! bring-up will plug libnx's deko3d (or nvgpu via the nv:* services) -//! in here. +//! This is the first bring-up milestone: render Aether's current colored +//! demo mesh through deko3d and present it. Textures, matrices, and richer +//! render state are intentionally left as no-ops until the full backend pass. const std = @import("std"); +const Util = @import("../../util/util.zig"); const Mat4 = @import("../../math/math.zig").Mat4; const Rendering = @import("../../rendering/rendering.zig"); const Pipeline = Rendering.Pipeline; const Mesh = Rendering.mesh; const Texture = Rendering.Texture; +const gfx = @import("../gfx.zig"); + +const DkDevice_T = opaque {}; +const DkMemBlock_T = opaque {}; +const DkCmdBuf_T = opaque {}; +const DkQueue_T = opaque {}; +const DkSwapchain_T = opaque {}; + +const DkDevice = ?*DkDevice_T; +const DkMemBlock = ?*DkMemBlock_T; +const DkCmdBuf = ?*DkCmdBuf_T; +const DkQueue = ?*DkQueue_T; +const DkSwapchain = ?*DkSwapchain_T; +const DkGpuAddr = u64; +const DkCmdList = usize; + +const DkDeviceMaker = extern struct { + userData: ?*anyopaque, + cbDebug: ?*const anyopaque, + cbAlloc: ?*const anyopaque, + cbFree: ?*const anyopaque, + flags: u32, +}; + +const DkMemBlockMaker = extern struct { + device: DkDevice, + size: u32, + flags: u32, + storage: ?*anyopaque, +}; + +const DkCmdBufMaker = extern struct { + device: DkDevice, + userData: ?*anyopaque, + cbAddMem: ?*const anyopaque, +}; + +const DkQueueMaker = extern struct { + device: DkDevice, + flags: u32, + commandMemorySize: u32, + flushThreshold: u32, + perWarpScratchMemorySize: u32, + maxConcurrentComputeJobs: u32, +}; + +const DkShaderMaker = extern struct { + codeMem: DkMemBlock, + control: ?*const anyopaque, + codeOffset: u32, + programId: u32, +}; + +const DkImageLayoutMaker = extern struct { + device: DkDevice, + type: u32, + flags: u32, + format: u32, + msMode: u32, + dimensions: [3]u32, + mipLevels: u32, + pitchStride: u32, +}; + +const DkSwapchainMaker = extern struct { + device: DkDevice, + nativeWindow: ?*anyopaque, + pImages: [*]const *const DkImage, + numImages: u32, +}; + +const DkShader = extern struct { + storage: [16]u64, +}; + +const DkImageLayout = extern struct { + storage: [16]u64, +}; + +const DkImage = extern struct { + storage: [16]u64, +}; + +const DkImageView = extern struct { + pImage: *const DkImage, + type: u32, + format: u32, + swizzle: [4]u32, + dsSource: u32, + layerOffset: u16, + layerCount: u16, + mipLevelOffset: u8, + mipLevelCount: u8, +}; + +const DkViewport = extern struct { + x: f32, + y: f32, + width: f32, + height: f32, + near: f32, + far: f32, +}; + +const DkScissor = extern struct { + x: u32, + y: u32, + width: u32, + height: u32, +}; + +const DkRasterizerState = extern struct { + bits: u32, +}; + +const DkColorState = extern struct { + bits: u32, +}; + +const DkColorWriteState = extern struct { + masks: u32, +}; + +const DkDepthStencilState = extern struct { + bits0: u32, + bits1: u32, +}; + +const DkVtxAttribState = extern struct { + bits: u32, +}; + +const DkVtxBufferState = extern struct { + stride: u32, + divisor: u32, +}; + +const DkBufExtents = extern struct { + addr: DkGpuAddr, + size: u32, +}; + +extern fn nwindowGetDefault() ?*anyopaque; + +extern fn dkDeviceCreate(maker: *const DkDeviceMaker) DkDevice; +extern fn dkDeviceDestroy(obj: DkDevice) void; + +extern fn dkMemBlockCreate(maker: *const DkMemBlockMaker) DkMemBlock; +extern fn dkMemBlockDestroy(obj: DkMemBlock) void; +extern fn dkMemBlockGetCpuAddr(obj: DkMemBlock) ?*anyopaque; +extern fn dkMemBlockGetGpuAddr(obj: DkMemBlock) DkGpuAddr; +extern fn dkMemBlockGetSize(obj: DkMemBlock) u32; +extern fn dkMemBlockFlushCpuCache(obj: DkMemBlock, offset: u32, size: u32) u32; + +extern fn dkCmdBufCreate(maker: *const DkCmdBufMaker) DkCmdBuf; +extern fn dkCmdBufDestroy(obj: DkCmdBuf) void; +extern fn dkCmdBufAddMemory(obj: DkCmdBuf, mem: DkMemBlock, offset: u32, size: u32) void; +extern fn dkCmdBufFinishList(obj: DkCmdBuf) DkCmdList; +extern fn dkCmdBufClear(obj: DkCmdBuf) void; +extern fn dkCmdBufBindShaders(obj: DkCmdBuf, stageMask: u32, shaders: [*]const *const DkShader, numShaders: u32) void; +extern fn dkCmdBufBindRenderTargets(obj: DkCmdBuf, colorTargets: [*]const *const DkImageView, numColorTargets: u32, depthTarget: ?*const DkImageView) void; +extern fn dkCmdBufBindRasterizerState(obj: DkCmdBuf, state: *const DkRasterizerState) void; +extern fn dkCmdBufBindColorState(obj: DkCmdBuf, state: *const DkColorState) void; +extern fn dkCmdBufBindColorWriteState(obj: DkCmdBuf, state: *const DkColorWriteState) void; +extern fn dkCmdBufBindDepthStencilState(obj: DkCmdBuf, state: *const DkDepthStencilState) void; +extern fn dkCmdBufBindVtxAttribState(obj: DkCmdBuf, attribs: [*]const DkVtxAttribState, numAttribs: u32) void; +extern fn dkCmdBufBindVtxBufferState(obj: DkCmdBuf, buffers: [*]const DkVtxBufferState, numBuffers: u32) void; +extern fn dkCmdBufBindVtxBuffers(obj: DkCmdBuf, firstId: u32, buffers: [*]const DkBufExtents, numBuffers: u32) void; +extern fn dkCmdBufSetViewports(obj: DkCmdBuf, firstId: u32, viewports: [*]const DkViewport, numViewports: u32) void; +extern fn dkCmdBufSetScissors(obj: DkCmdBuf, firstId: u32, scissors: [*]const DkScissor, numScissors: u32) void; +extern fn dkCmdBufClearColor(obj: DkCmdBuf, targetId: u32, clearMask: u32, clearData: *const anyopaque) void; +extern fn dkCmdBufDraw(obj: DkCmdBuf, prim: u32, vertexCount: u32, instanceCount: u32, firstVertex: u32, firstInstance: u32) void; + +extern fn dkQueueCreate(maker: *const DkQueueMaker) DkQueue; +extern fn dkQueueDestroy(obj: DkQueue) void; +extern fn dkQueueWaitIdle(obj: DkQueue) void; +extern fn dkQueueSubmitCommands(obj: DkQueue, cmds: DkCmdList) void; +extern fn dkQueueAcquireImage(obj: DkQueue, swapchain: DkSwapchain) c_int; +extern fn dkQueuePresentImage(obj: DkQueue, swapchain: DkSwapchain, imageSlot: c_int) void; + +extern fn dkShaderInitialize(obj: *DkShader, maker: *const DkShaderMaker) void; +extern fn dkShaderIsValid(obj: *const DkShader) bool; + +extern fn dkImageLayoutInitialize(obj: *DkImageLayout, maker: *const DkImageLayoutMaker) void; +extern fn dkImageLayoutGetSize(obj: *const DkImageLayout) u64; +extern fn dkImageLayoutGetAlignment(obj: *const DkImageLayout) u32; +extern fn dkImageInitialize(obj: *DkImage, layout: *const DkImageLayout, memBlock: DkMemBlock, offset: u32) void; + +extern fn dkSwapchainCreate(maker: *const DkSwapchainMaker) DkSwapchain; +extern fn dkSwapchainDestroy(obj: DkSwapchain) void; +extern fn dkSwapchainSetSwapInterval(obj: DkSwapchain, interval: u32) void; + +const FB_COUNT = 2; +const FB_WIDTH = 1280; +const FB_HEIGHT = 720; +const CODE_MEM_SIZE = 512 * 1024; +const CMD_MEM_SIZE = 64 * 1024; +const MAX_VERTEX_ATTRIBS = 32; +const MAX_VERTEX_BUFFERS = 16; + +const DK_MEMBLOCK_ALIGNMENT = 0x1000; +const DK_SHADER_CODE_ALIGNMENT = 0x100; + +const DK_MEM_CPU_UNCACHED = 1 << 0; +const DK_MEM_GPU_CACHED = 2 << 2; +const DK_MEM_CODE = 1 << 4; +const DK_MEM_IMAGE = 1 << 5; + +const DK_QUEUE_GRAPHICS = 1 << 0; +const DK_QUEUE_MEDIUM_PRIO = 0 << 2; +const DK_QUEUE_ENABLE_ZCULL = 0 << 4; +const DK_QUEUE_MIN_CMDMEM_SIZE = 0x10000; +const DK_PER_WARP_SCRATCH_MEM_ALIGNMENT = 0x200; +const DK_DEFAULT_MAX_COMPUTE_CONCURRENT_JOBS = 128; + +const DK_IMAGE_TYPE_NONE = 0; +const DK_IMAGE_TYPE_2D = 2; +const DK_IMAGE_RGBA8_UNORM = 28; +const DK_IMAGE_USAGE_RENDER = 1 << 8; +const DK_IMAGE_USAGE_PRESENT = 1 << 10; +const DK_IMAGE_HW_COMPRESSION = 1 << 2; + +const DK_STAGE_GRAPHICS_MASK = (1 << 5) - 1; +const DK_COLOR_MASK_RGBA = 0xF; + +const DK_PRIMITIVE_LINES = 1; +const DK_PRIMITIVE_TRIANGLES = 4; + +const DK_ATTR_SIZE_2X32 = 0x04; +const DK_ATTR_SIZE_3X32 = 0x02; +const DK_ATTR_SIZE_2X16 = 0x0f; +const DK_ATTR_SIZE_3X16 = 0x05; +const DK_ATTR_SIZE_2X8 = 0x18; +const DK_ATTR_SIZE_4X8 = 0x0a; + +const DK_ATTR_TYPE_SNORM = 1; +const DK_ATTR_TYPE_UNORM = 2; +const DK_ATTR_TYPE_FLOAT = 7; + +const DK_SWIZZLE_RED = 2; +const DK_SWIZZLE_GREEN = 3; +const DK_SWIZZLE_BLUE = 4; +const DK_SWIZZLE_ALPHA = 5; +const DK_DS_SOURCE_DEPTH = 0; + +const PipelineData = struct { + vertex_shader: DkShader, + fragment_shader: DkShader, + attribs: [MAX_VERTEX_ATTRIBS]DkVtxAttribState, + attrib_count: u32, + vtx_buffers: [MAX_VERTEX_BUFFERS]DkVtxBufferState, + vtx_buffer_count: u32, +}; + +const MeshData = struct { + pipeline: Pipeline.Handle, + mem_block: DkMemBlock = null, + gpu_addr: DkGpuAddr = 0, + capacity: u32 = 0, + size: u32 = 0, +}; var render_alloc: std.mem.Allocator = undefined; var render_io: std.Io = undefined; +var device: DkDevice = null; +var render_queue: DkQueue = null; +var swapchain: DkSwapchain = null; +var framebuffer_mem: DkMemBlock = null; +var framebuffers: [FB_COUNT]DkImage = undefined; +var command_mem: DkMemBlock = null; +var command_buffer: DkCmdBuf = null; +var code_mem: DkMemBlock = null; +var code_offset: u32 = 0; + +var pipelines = Util.CircularBuffer(PipelineData, 16).init(); +var meshes = Util.CircularBuffer(MeshData, 8192).init(); + +var current_pipeline: Pipeline.Handle = 0; +var current_slot: c_int = -1; +var initialized: bool = false; +var clear_color: [4]f32 = .{ 0.0, 0.0, 0.0, 1.0 }; +var vsync_enabled: bool = true; + pub fn setup(alloc: std.mem.Allocator, io: std.Io) void { render_alloc = alloc; render_io = io; } -pub fn init() anyerror!void {} -pub fn deinit() void {} +pub fn init() anyerror!void { + _ = render_alloc; + _ = render_io; + + var device_maker = DkDeviceMaker{ + .userData = null, + .cbDebug = null, + .cbAlloc = null, + .cbFree = null, + .flags = 0, + }; + device = dkDeviceCreate(&device_maker); + if (device == null) return error.GfxInitFailed; + errdefer { + dkDeviceDestroy(device); + device = null; + } + + try create_framebuffers(); + errdefer destroy_framebuffers(); + + try create_code_memory(); + errdefer destroy_code_memory(); + + try create_command_buffer(); + errdefer destroy_command_buffer(); + + var queue_maker = DkQueueMaker{ + .device = device, + .flags = DK_QUEUE_GRAPHICS | DK_QUEUE_MEDIUM_PRIO | DK_QUEUE_ENABLE_ZCULL, + .commandMemorySize = DK_QUEUE_MIN_CMDMEM_SIZE, + .flushThreshold = DK_QUEUE_MIN_CMDMEM_SIZE / 8, + .perWarpScratchMemorySize = 4 * DK_PER_WARP_SCRATCH_MEM_ALIGNMENT, + .maxConcurrentComputeJobs = DK_DEFAULT_MAX_COMPUTE_CONCURRENT_JOBS, + }; + render_queue = dkQueueCreate(&queue_maker); + if (render_queue == null) return error.GfxInitFailed; + + initialized = true; + set_vsync(vsync_enabled); +} + +pub fn deinit() void { + if (render_queue) |_| dkQueueWaitIdle(render_queue); + + destroy_all_meshes(); + pipelines.clear(); + current_pipeline = 0; + + if (render_queue) |_| { + dkQueueDestroy(render_queue); + render_queue = null; + } + + destroy_command_buffer(); + destroy_code_memory(); + destroy_framebuffers(); + + if (device) |_| { + dkDeviceDestroy(device); + device = null; + } + + initialized = false; +} + +pub fn set_clear_color(r: f32, g: f32, b: f32, a: f32) void { + clear_color = .{ r, g, b, a }; +} -pub fn set_clear_color(_: f32, _: f32, _: f32, _: f32) void {} pub fn set_alpha_blend(_: bool) void {} pub fn set_depth_write(_: bool) void {} pub fn set_fog(_: bool, _: f32, _: f32, _: f32, _: f32, _: f32) void {} @@ -34,27 +380,165 @@ pub fn set_proj_matrix(_: *const Mat4) void {} pub fn set_view_matrix(_: *const Mat4) void {} pub fn start_frame() bool { - return false; + if (!initialized or render_queue == null or swapchain == null or command_buffer == null) return false; + + // Single command-memory arena for the bring-up path. Wait before reuse so + // the GPU cannot still be reading last frame's command list. + dkQueueWaitIdle(render_queue); + + const slot = dkQueueAcquireImage(render_queue, swapchain); + if (slot < 0 or slot >= FB_COUNT) return false; + current_slot = slot; + + dkCmdBufClear(command_buffer); + dkCmdBufAddMemory(command_buffer, command_mem, 0, CMD_MEM_SIZE); + + var color_view = imageView(&framebuffers[@intCast(slot)]); + const color_targets = [_]*const DkImageView{&color_view}; + dkCmdBufBindRenderTargets(command_buffer, color_targets[0..].ptr, 1, null); + + const width = gfx.surface.get_width(); + const height = gfx.surface.get_height(); + if (width == 0 or height == 0) return false; + + var viewport = DkViewport{ + .x = 0.0, + .y = 0.0, + .width = @floatFromInt(width), + .height = @floatFromInt(height), + .near = 0.0, + .far = 1.0, + }; + var scissor = DkScissor{ + .x = 0, + .y = 0, + .width = width, + .height = height, + }; + dkCmdBufSetViewports(command_buffer, 0, @ptrCast(&viewport), 1); + dkCmdBufSetScissors(command_buffer, 0, @ptrCast(&scissor), 1); + dkCmdBufClearColor(command_buffer, 0, DK_COLOR_MASK_RGBA, &clear_color); + + bind_fixed_state(); + return true; +} + +pub fn end_frame() void { + if (!initialized or render_queue == null or swapchain == null or command_buffer == null or current_slot < 0) return; + + const list = dkCmdBufFinishList(command_buffer); + dkQueueSubmitCommands(render_queue, list); + dkQueuePresentImage(render_queue, swapchain, current_slot); + current_slot = -1; } -pub fn end_frame() void {} pub fn clear_depth() void {} -pub fn set_vsync(_: bool) void {} -pub fn create_pipeline(_: Pipeline.VertexLayout, _: ?[:0]align(4) const u8, _: ?[:0]align(4) const u8) anyerror!Pipeline.Handle { - return 0; +pub fn set_vsync(v: bool) void { + vsync_enabled = v; + if (swapchain) |_| dkSwapchainSetSwapInterval(swapchain, @intFromBool(v)); } -pub fn destroy_pipeline(_: Pipeline.Handle) void {} -pub fn bind_pipeline(_: Pipeline.Handle) void {} +pub fn create_pipeline(layout: Pipeline.VertexLayout, v_shader: ?[:0]align(4) const u8, f_shader: ?[:0]align(4) const u8) anyerror!Pipeline.Handle { + const vertex_code = v_shader orelse return error.InvalidShader; + const fragment_code = f_shader orelse return error.InvalidShader; -pub fn create_mesh(_: Pipeline.Handle) anyerror!Mesh.Handle { - return 0; + var data = PipelineData{ + .vertex_shader = undefined, + .fragment_shader = undefined, + .attribs = @splat(.{ .bits = 0 }), + .attrib_count = 0, + .vtx_buffers = @splat(.{ .stride = 0, .divisor = 0 }), + .vtx_buffer_count = 0, + }; + + try init_layout(&data, layout); + try load_shader(&data.vertex_shader, vertex_code); + try load_shader(&data.fragment_shader, fragment_code); + + const pipeline = pipelines.add_element(data) orelse return error.OutOfPipelines; + return @intCast(pipeline); +} + +pub fn destroy_pipeline(pipeline: Pipeline.Handle) void { + _ = pipelines.remove_element(pipeline); + if (current_pipeline == pipeline) current_pipeline = 0; +} + +pub fn bind_pipeline(pipeline: Pipeline.Handle) void { + current_pipeline = pipeline; +} + +pub fn create_mesh(pipeline: Pipeline.Handle) anyerror!Mesh.Handle { + _ = pipelines.get_element(pipeline) orelse return error.InvalidPipeline; + const mesh = meshes.add_element(.{ .pipeline = pipeline }) orelse return error.OutOfMeshes; + return @intCast(mesh); +} + +pub fn destroy_mesh(handle: Mesh.Handle) void { + const mesh = meshes.get_element(handle) orelse return; + if (mesh.mem_block) |_| dkMemBlockDestroy(mesh.mem_block); + _ = meshes.remove_element(handle); } -pub fn destroy_mesh(_: Mesh.Handle) void {} -pub fn update_mesh(_: Mesh.Handle, _: []const u8) void {} -pub fn draw_mesh(_: Mesh.Handle, _: *const Mat4, _: usize, _: Mesh.Primitive) void {} +pub fn update_mesh(handle: Mesh.Handle, data: []const u8) void { + var mesh = meshes.get_element(handle) orelse return; + + if (data.len == 0) { + mesh.size = 0; + meshes.update_element(handle, mesh); + return; + } + + const needed: u32 = @intCast(data.len); + if (mesh.mem_block == null or mesh.capacity < needed) { + if (mesh.mem_block) |_| dkMemBlockDestroy(mesh.mem_block); + + const alloc_size = alignForward(needed, DK_MEMBLOCK_ALIGNMENT); + var maker = memBlockMaker(alloc_size, DK_MEM_CPU_UNCACHED | DK_MEM_GPU_CACHED); + mesh.mem_block = dkMemBlockCreate(&maker); + if (mesh.mem_block == null) { + mesh.capacity = 0; + mesh.size = 0; + meshes.update_element(handle, mesh); + return; + } + mesh.capacity = dkMemBlockGetSize(mesh.mem_block); + mesh.gpu_addr = dkMemBlockGetGpuAddr(mesh.mem_block); + } + + const dst: [*]u8 = @ptrCast(dkMemBlockGetCpuAddr(mesh.mem_block) orelse return); + @memcpy(dst[0..data.len], data); + _ = dkMemBlockFlushCpuCache(mesh.mem_block, 0, needed); + + mesh.size = needed; + meshes.update_element(handle, mesh); +} + +pub fn draw_mesh(handle: Mesh.Handle, _: *const Mat4, count: usize, primitive: Mesh.Primitive) void { + if (!initialized or command_buffer == null) return; + const mesh = meshes.get_element(handle) orelse return; + if (mesh.mem_block == null or mesh.size == 0 or count == 0) return; + + const pipeline_handle = if (current_pipeline != 0) current_pipeline else mesh.pipeline; + const pl = pipelines.get_element(pipeline_handle) orelse return; + + const shaders = [_]*const DkShader{ &pl.vertex_shader, &pl.fragment_shader }; + dkCmdBufBindShaders(command_buffer, DK_STAGE_GRAPHICS_MASK, shaders[0..].ptr, shaders.len); + dkCmdBufBindVtxAttribState(command_buffer, pl.attribs[0..].ptr, pl.attrib_count); + dkCmdBufBindVtxBufferState(command_buffer, pl.vtx_buffers[0..].ptr, pl.vtx_buffer_count); + + var extents: [MAX_VERTEX_BUFFERS]DkBufExtents = undefined; + for (extents[0..pl.vtx_buffer_count]) |*extent| { + extent.* = .{ .addr = mesh.gpu_addr, .size = mesh.size }; + } + dkCmdBufBindVtxBuffers(command_buffer, 0, extents[0..].ptr, pl.vtx_buffer_count); + + dkCmdBufDraw(command_buffer, switch (primitive) { + .triangles => DK_PRIMITIVE_TRIANGLES, + .lines => DK_PRIMITIVE_LINES, + }, @intCast(count), 1, 0, 0); +} pub fn create_texture(_: u32, _: u32, _: []align(16) u8) anyerror!Texture.Handle { return 0; @@ -64,3 +548,225 @@ pub fn update_texture(_: Texture.Handle, _: []align(16) u8) void {} pub fn bind_texture(_: Texture.Handle) void {} pub fn destroy_texture(_: Texture.Handle) void {} pub fn force_texture_resident(_: Texture.Handle) void {} + +fn create_framebuffers() !void { + var layout_maker = DkImageLayoutMaker{ + .device = device, + .type = DK_IMAGE_TYPE_2D, + .flags = DK_IMAGE_USAGE_RENDER | DK_IMAGE_USAGE_PRESENT | DK_IMAGE_HW_COMPRESSION, + .format = DK_IMAGE_RGBA8_UNORM, + .msMode = 0, + .dimensions = .{ FB_WIDTH, FB_HEIGHT, 0 }, + .mipLevels = 1, + .pitchStride = 0, + }; + + var framebuffer_layout: DkImageLayout = undefined; + dkImageLayoutInitialize(&framebuffer_layout, &layout_maker); + + const fb_align = dkImageLayoutGetAlignment(&framebuffer_layout); + const fb_size = alignForward(@intCast(dkImageLayoutGetSize(&framebuffer_layout)), fb_align); + var mem_maker = memBlockMaker(FB_COUNT * fb_size, DK_MEM_GPU_CACHED | DK_MEM_IMAGE); + framebuffer_mem = dkMemBlockCreate(&mem_maker); + if (framebuffer_mem == null) return error.GfxInitFailed; + errdefer { + dkMemBlockDestroy(framebuffer_mem); + framebuffer_mem = null; + } + + var swapchain_images: [FB_COUNT]*const DkImage = undefined; + for (&framebuffers, 0..) |*fb, i| { + dkImageInitialize(fb, &framebuffer_layout, framebuffer_mem, @intCast(i * fb_size)); + swapchain_images[i] = fb; + } + + var swapchain_maker = DkSwapchainMaker{ + .device = device, + .nativeWindow = nwindowGetDefault(), + .pImages = swapchain_images[0..].ptr, + .numImages = FB_COUNT, + }; + swapchain = dkSwapchainCreate(&swapchain_maker); + if (swapchain == null) return error.GfxInitFailed; +} + +fn destroy_framebuffers() void { + if (swapchain) |_| { + dkSwapchainDestroy(swapchain); + swapchain = null; + } + if (framebuffer_mem) |_| { + dkMemBlockDestroy(framebuffer_mem); + framebuffer_mem = null; + } +} + +fn create_code_memory() !void { + var maker = memBlockMaker(CODE_MEM_SIZE, DK_MEM_CPU_UNCACHED | DK_MEM_GPU_CACHED | DK_MEM_CODE); + code_mem = dkMemBlockCreate(&maker); + if (code_mem == null) return error.GfxInitFailed; + code_offset = 0; +} + +fn destroy_code_memory() void { + if (code_mem) |_| { + dkMemBlockDestroy(code_mem); + code_mem = null; + } + code_offset = 0; +} + +fn create_command_buffer() !void { + var mem_maker = memBlockMaker(CMD_MEM_SIZE, DK_MEM_CPU_UNCACHED | DK_MEM_GPU_CACHED); + command_mem = dkMemBlockCreate(&mem_maker); + if (command_mem == null) return error.GfxInitFailed; + errdefer { + dkMemBlockDestroy(command_mem); + command_mem = null; + } + + var cmd_maker = DkCmdBufMaker{ + .device = device, + .userData = null, + .cbAddMem = null, + }; + command_buffer = dkCmdBufCreate(&cmd_maker); + if (command_buffer == null) return error.GfxInitFailed; +} + +fn destroy_command_buffer() void { + if (command_buffer) |_| { + dkCmdBufDestroy(command_buffer); + command_buffer = null; + } + if (command_mem) |_| { + dkMemBlockDestroy(command_mem); + command_mem = null; + } +} + +fn destroy_all_meshes() void { + for (&meshes.buffer) |*slot| { + if (slot.*) |mesh| { + if (mesh.mem_block) |_| dkMemBlockDestroy(mesh.mem_block); + slot.* = null; + } + } + meshes.clear(); +} + +fn memBlockMaker(size: u32, flags: u32) DkMemBlockMaker { + return .{ + .device = device, + .size = alignForward(size, DK_MEMBLOCK_ALIGNMENT), + .flags = flags, + .storage = null, + }; +} + +fn load_shader(shader: *DkShader, code: []const u8) !void { + if (code_mem == null) return error.GfxInitFailed; + + const offset = alignForward(code_offset, DK_SHADER_CODE_ALIGNMENT); + const end = offset + alignForward(@intCast(code.len), DK_SHADER_CODE_ALIGNMENT); + if (end > CODE_MEM_SIZE) return error.OutOfShaderMemory; + + const base: [*]u8 = @ptrCast(dkMemBlockGetCpuAddr(code_mem) orelse return error.GfxInitFailed); + @memcpy(base[offset..][0..code.len], code); + + var maker = DkShaderMaker{ + .codeMem = code_mem, + .control = null, + .codeOffset = offset, + .programId = 0, + }; + dkShaderInitialize(shader, &maker); + if (!dkShaderIsValid(shader)) return error.InvalidShader; + + code_offset = end; +} + +fn init_layout(data: *PipelineData, layout: Pipeline.VertexLayout) !void { + var max_location: u32 = 0; + var max_binding: u32 = 0; + + for (layout.attributes) |attr| { + if (attr.location >= MAX_VERTEX_ATTRIBS or attr.binding >= MAX_VERTEX_BUFFERS) { + return error.UnsupportedVertexLayout; + } + + const loc: usize = attr.location; + data.attribs[loc] = vtxAttrib(attr); + max_location = @max(max_location, attr.location + 1); + max_binding = @max(max_binding, attr.binding + 1); + } + + for (data.vtx_buffers[0..max_binding]) |*buf| { + buf.* = .{ .stride = @intCast(layout.stride), .divisor = 0 }; + } + + data.attrib_count = max_location; + data.vtx_buffer_count = @max(max_binding, 1); +} + +fn vtxAttrib(attr: Pipeline.Attribute) DkVtxAttribState { + const Format = struct { + size: u32, + kind: u32, + }; + const fmt: Format = switch (attr.format) { + .f32x2 => .{ .size = DK_ATTR_SIZE_2X32, .kind = DK_ATTR_TYPE_FLOAT }, + .f32x3 => .{ .size = DK_ATTR_SIZE_3X32, .kind = DK_ATTR_TYPE_FLOAT }, + .unorm8x2 => .{ .size = DK_ATTR_SIZE_2X8, .kind = DK_ATTR_TYPE_UNORM }, + .unorm8x4 => .{ .size = DK_ATTR_SIZE_4X8, .kind = DK_ATTR_TYPE_UNORM }, + .unorm16x2 => .{ .size = DK_ATTR_SIZE_2X16, .kind = DK_ATTR_TYPE_UNORM }, + .unorm16x3 => .{ .size = DK_ATTR_SIZE_3X16, .kind = DK_ATTR_TYPE_UNORM }, + .snorm16x2 => .{ .size = DK_ATTR_SIZE_2X16, .kind = DK_ATTR_TYPE_SNORM }, + .snorm16x3 => .{ .size = DK_ATTR_SIZE_3X16, .kind = DK_ATTR_TYPE_SNORM }, + }; + + return .{ .bits = (@as(u32, attr.binding) & 0x1F) | + ((@as(u32, @intCast(attr.offset)) & 0x3FFF) << 7) | + ((fmt.size & 0x3F) << 21) | + ((fmt.kind & 0x7) << 27) }; +} + +fn bind_fixed_state() void { + const rasterizer = DkRasterizerState{ + // rasterizer on, fill both faces, no culling, CCW front face. + .bits = 1 | (2 << 3) | (2 << 5) | (1 << 9) | (1 << 10), + }; + const color = DkColorState{ + // logicOp=Copy, alphaCompare=Always, blending disabled. + .bits = (3 << 8) | (8 << 16), + }; + const color_write = DkColorWriteState{ .masks = 0xFFFF_FFFF }; + const depth = DkDepthStencilState{ + // No depth attachment in this milestone, so keep depth/stencil off. + .bits0 = 8 << 4, + .bits1 = 0, + }; + + dkCmdBufBindRasterizerState(command_buffer, &rasterizer); + dkCmdBufBindColorState(command_buffer, &color); + dkCmdBufBindColorWriteState(command_buffer, &color_write); + dkCmdBufBindDepthStencilState(command_buffer, &depth); +} + +fn imageView(image: *const DkImage) DkImageView { + return .{ + .pImage = image, + .type = DK_IMAGE_TYPE_NONE, + .format = 0, + .swizzle = .{ DK_SWIZZLE_RED, DK_SWIZZLE_GREEN, DK_SWIZZLE_BLUE, DK_SWIZZLE_ALPHA }, + .dsSource = DK_DS_SOURCE_DEPTH, + .layerOffset = 0, + .layerCount = 0, + .mipLevelOffset = 0, + .mipLevelCount = 0, + }; +} + +fn alignForward(value: u32, alignment: u32) u32 { + return std.mem.alignForward(u32, value, alignment); +} From e88eca4adb4b95d4c66cd162e39f40f01fbbf6f9 Mon Sep 17 00:00:00 2001 From: Nathan Bourgeois Date: Tue, 26 May 2026 21:46:48 -0400 Subject: [PATCH 09/44] First triangle --- build.zig | 84 ++++- src/audio/audio.zig | 2 + src/engine.zig | 5 +- src/platform/3ds/3ds_gfx.zig | 591 ++++++++++++++++++++++++++++++++-- src/platform/3ds/services.zig | 159 ++++++++- src/platform/3ds/time.zig | 56 +++- src/root.zig | 1 + test/main.zig | 30 +- 8 files changed, 867 insertions(+), 61 deletions(-) diff --git a/build.zig b/build.zig index ddb9f11..752dc17 100644 --- a/build.zig +++ b/build.zig @@ -96,8 +96,8 @@ pub const Config = struct { // macOS default is `.none` because the current miniaudio build is // bugged there. Flip back to `.default` with `-Daudio=default` once // that's fixed. - const default_audio: Audio = switch (target.result.os.tag) { - .macos => .none, + const default_audio: Audio = switch (plat) { + .macos, .nintendo_3ds => .none, else => .default, }; @@ -194,8 +194,7 @@ pub fn addGame(owner: *std.Build, b: *std.Build, opts: GameOptions) *std.Build.S // 3DS and Switch force ofmt=c — there's no Zig-native backend for // either Horizon target yet, so we emit C and let an external // toolchain (devkitARM/libctru on 3DS, devkitA64/libnx on Switch) - // compile the result. Stub backends keep the engine compiling - // end-to-end until a real SDK is wired in. + // compile the result. const target = if (config.platform == .nintendo_3ds or config.platform == .nintendo_switch) blk: { var q = opts.target.query; q.ofmt = .c; @@ -223,8 +222,8 @@ pub fn addGame(owner: *std.Build, b: *std.Build, opts: GameOptions) *std.Build.S if (psp_dep) |pd| { mod.addImport("pspsdk", pd.module("pspsdk")); } else if (config.platform == .nintendo_3ds or config.platform == .nintendo_switch) { - // No 3DS/Switch SDK is wired in yet; stubs satisfy every - // backend contract so addImport calls are unnecessary here. + // Console SDK symbols are declared as backend-local externs and + // resolved by the export pipeline's devkitPro link step. } else { const zglfw = owner.dependency("zglfw", .{ .target = target, @@ -439,6 +438,13 @@ fn addUamStep(b: *std.Build, uam: []const u8, stage: []const u8, comptime output return output; } +fn addPicassoStep(b: *std.Build, picasso: []const u8, comptime output_name: []const u8, input: std.Build.LazyPath) std.Build.LazyPath { + const run = b.addSystemCommand(&.{ picasso, "-o" }); + const output = run.addOutputFileArg(output_name); + run.addFileArg(input); + return output; +} + pub const ExportOptions = struct { /// PSP/macOS: human-readable name shown to the OS (XMB title on PSP, /// CFBundleName on macOS). Ignored elsewhere. @@ -857,6 +863,18 @@ fn threedsxPipeline(b: *std.Build, exe: *std.Build.Step.Compile, opts: ExportOpt const include_wf = b.addWriteFiles(); _ = include_wf.addCopyFile(patched_zig_h, "zig.h"); + const shim_wf = b.addWriteFiles(); + const exception_shim = shim_wf.add("aether_3ds_exception.c", + \\#include <3ds.h> + \\ + \\extern void aether3dsExceptionHandler(ERRF_ExceptionInfo *excep, CpuRegisters *regs); + \\ + \\void aether3dsInstallExceptionHandler(void *stack_top) { + \\ threadOnException(aether3dsExceptionHandler, stack_top, WRITE_DATA_TO_HANDLER_STACK); + \\} + \\ + ); + // Standard 3DS arch flags from devkitPro's template Makefile. const arch = [_][]const u8{ "-march=armv6k", "-mtune=mpcore", "-mfloat-abi=hard", "-mtp=soft", @@ -868,35 +886,36 @@ fn threedsxPipeline(b: *std.Build, exe: *std.Build.Step.Compile, opts: ExportOpt const link = b.addSystemCommand(&.{gcc}); link.addArgs(&arch); link.addArgs(&.{ - "-mword-relocations", "-ffunction-sections", - "-D__3DS__", "-DARM11", - "-O2", "-g", - "-specs=3dsx.specs", + "-mword-relocations", "-ffunction-sections", + "-D__3DS__", "-DARM11", + "-O2", "-g", + "-specs=3dsx.specs", "-Wl,--wrap=threadCreate", // Pin the C standard to C11. zig.h picks `[[noreturn]]` under // C23 but emits it in attribute-list position that gcc rejects; // C11's `_Noreturn` is what zig's emitter actually targets. - "-std=gnu11", + "-std=gnu11", // zig's -ofmt=c emitter treats `uintptr_t` and `uint32_t` as // interchangeable on 32-bit ARM (they ARE the same width) but // gcc 14+ promotes the resulting pointer-type mismatch from a // warning to an error. Demote it and a couple of related // chatters; we don't author this C and there's nothing // actionable in the warnings. - "-Wno-incompatible-pointer-types", "-Wno-int-conversion", - "-Wno-builtin-declaration-mismatch", + "-Wno-incompatible-pointer-types", + "-Wno-int-conversion", "-Wno-builtin-declaration-mismatch", }); link.addArg(b.fmt("-I{s}", .{ctru_inc})); link.addPrefixedDirectoryArg("-I", include_wf.getDirectory()); link.addArg("-x"); link.addArg("c"); link.addArtifactArg(exe); + link.addFileArg(exception_shim); // Reset language so gcc treats subsequent inputs by extension; the // compiler_rt object is ELF arm and `-x c` would mis-parse it. link.addArg("-x"); link.addArg("none"); link.addFileArg(crt_clean); link.addArg(b.fmt("-L{s}", .{ctru_lib})); - link.addArgs(&.{ "-lctru", "-lm" }); + link.addArgs(&.{ "-lcitro3d", "-lctru", "-lm" }); link.addArg("-o"); const elf = link.addOutputFileArg(b.fmt("{s}.elf", .{exe.name})); @@ -1089,6 +1108,43 @@ fn switchNroPipeline(b: *std.Build, exe: *std.Build.Step.Compile, opts: ExportOp /// Aether.addShader(ae_dep.builder, b, exe, config, "basic", .{ ... }); /// pub fn addShader(owner: *std.Build, b: *std.Build, exe: *std.Build.Step.Compile, config: Config, comptime name: []const u8, paths: ShaderPaths) void { + if (config.platform == .nintendo_3ds and config.gfx == .default) { + const picasso = b.pathJoin(&.{ devkitProPath(b), "tools/bin/picasso" }); + const sources = b.addWriteFiles(); + const vert_src = sources.add(name ++ "_3ds.v.pica", + \\.fvec projection[4] + \\ + \\.constf myconst(0.0, 1.0, 0.0, 0.0) + \\.alias ones myconst.yyyy + \\ + \\.out outpos position + \\.out outclr color + \\ + \\.alias inpos v0 + \\.alias inclr v1 + \\ + \\.proc main + \\ mov r0.xyz, inpos + \\ mov r0.w, ones + \\ + \\ dp4 outpos.x, projection[0], r0 + \\ dp4 outpos.y, projection[1], r0 + \\ dp4 outpos.z, projection[2], r0 + \\ dp4 outpos.w, projection[3], r0 + \\ + \\ mov outclr, inclr + \\ end + \\.end + \\ + ); + const vert = addPicassoStep(b, picasso, name ++ ".shbin", vert_src); + const empty = b.addWriteFiles(); + const frag = empty.add(name ++ "_3ds_frag_stub", ""); + exe.root_module.addAnonymousImport(name ++ "_vert", .{ .root_source_file = vert }); + exe.root_module.addAnonymousImport(name ++ "_frag", .{ .root_source_file = frag }); + return; + } + if (config.platform == .nintendo_switch and config.gfx == .default) { const uam = b.pathJoin(&.{ devkitProPath(b), "tools/bin/uam" }); const sources = b.addWriteFiles(); diff --git a/src/audio/audio.zig b/src/audio/audio.zig index 98cd9c2..8892733 100644 --- a/src/audio/audio.zig +++ b/src/audio/audio.zig @@ -1,6 +1,7 @@ const std = @import("std"); const Vec3 = @import("../math/math.zig").Vec3; const platform_audio = @import("../platform/audio.zig"); +const options = @import("options"); // -- types ------------------------------------------------------------------- @@ -13,6 +14,7 @@ pub const mixer_mod = @import("mixer.zig"); pub const SoundHandle = mixer_mod.SoundHandle; pub const PlayOptions = mixer_mod.PlayOptions; pub const Priority = mixer_mod.Priority; +pub const enabled = options.config.audio != .none; // -- forwarding to the instantiated mixer ------------------------------------ diff --git a/src/engine.zig b/src/engine.zig index c94645c..df8083d 100644 --- a/src/engine.zig +++ b/src/engine.zig @@ -326,10 +326,11 @@ pub const Engine = struct { const next_tick = @as(i64, @intCast(TICK_US)) - tick_accum; const sleep_us = @max(0, @min(next_update, next_tick)); if (sleep_us > 0) { - try std.Io.sleep(self.io, .fromMicroseconds(sleep_us), clock); + const sleep_ns = sleep_us * NS_PER_US; + try std.Io.sleep(self.io, .fromNanoseconds(@intCast(sleep_ns)), clock); } } else if (options.config.platform != .psp) { - try std.Io.sleep(self.io, .fromMilliseconds(50), clock); + try std.Io.sleep(self.io, .fromNanoseconds(50 * std.time.ns_per_ms), clock); } } diff --git a/src/platform/3ds/3ds_gfx.zig b/src/platform/3ds/3ds_gfx.zig index 8565df4..e7ef606 100644 --- a/src/platform/3ds/3ds_gfx.zig +++ b/src/platform/3ds/3ds_gfx.zig @@ -1,59 +1,440 @@ -//! 3DS GPU backend stub. +//! Minimal Nintendo 3DS Citro3D backend. //! -//! Empty template: every entry point satisfies `gfx_api.Interface` so the -//! engine compiles end-to-end on `-Dtarget=...-3ds`. Real bring-up will -//! plug libctru's GPU (citro3d / GPU command lists) in here. +//! First bring-up milestone: initialize the top-screen render target and draw +//! Aether's current colored triangle path. Textures and richer render state are +//! still no-ops until the next backend pass. const std = @import("std"); +const Util = @import("../../util/util.zig"); const Mat4 = @import("../../math/math.zig").Mat4; const Rendering = @import("../../rendering/rendering.zig"); const Pipeline = Rendering.Pipeline; const Mesh = Rendering.mesh; const Texture = Rendering.Texture; +const C3D_AttrInfo = opaque {}; +const C3D_RenderTarget = extern struct { + next: ?*C3D_RenderTarget, + prev: ?*C3D_RenderTarget, + frameBuf: C3D_FrameBuf, + used: bool, + ownsColor: bool, + ownsDepth: bool, + linked: bool, + screen: c_int, + side: c_int, + transferFlags: u32, +}; +const C3D_FrameBuf = extern struct { + colorBuf: ?*anyopaque, + depthBuf: ?*anyopaque, + width: u16, + height: u16, + colorFmt: c_int, + depthFmt: c_int, + block32: bool, + masks: u8, +}; +const C3D_TexEnv = extern struct { + srcRgb: u16, + srcAlpha: u16, + opAll: u32, + funcRgb: u16, + funcAlpha: u16, + color: u32, + scaleRgb: u16, + scaleAlpha: u16, +}; +const C3D_FVec = extern struct { + w: f32, + z: f32, + y: f32, + x: f32, +}; +const C3D_Mtx = extern struct { + r: [4]C3D_FVec, +}; + +const DVLP = extern struct { + codeSize: u32, + codeData: [*]u32, + opdescSize: u32, + opcdescData: [*]u32, +}; +const DVLEConstEntry = extern struct { + typ: u16, + id: u16, + data: [4]u32, +}; +const DVLEOutEntry = extern struct { + typ: u16, + regID: u16, + mask: u8, + unk: [3]u8, +}; +const DVLEUniformEntry = extern struct { + symbolOffset: u32, + startReg: u16, + endReg: u16, +}; +const DVLE = extern struct { + typ: c_int, + mergeOutmaps: bool, + gshMode: c_int, + gshFixedVtxStart: u8, + gshVariableVtxNum: u8, + gshFixedVtxNum: u8, + dvlp: *DVLP, + mainOffset: u32, + endmainOffset: u32, + constTableSize: u32, + constTableData: [*]DVLEConstEntry, + outTableSize: u32, + outTableData: [*]DVLEOutEntry, + uniformTableSize: u32, + uniformTableData: [*]DVLEUniformEntry, + symbolTableData: [*]u8, + outmapMask: u8, + outmapData: [8]u32, + outmapMode: u32, + outmapClock: u32, +}; +const DVLB = extern struct { + numDVLE: u32, + DVLP: DVLP, + DVLE: [*]DVLE, +}; +const ShaderInstance = opaque {}; +const ShaderProgram = extern struct { + vertexShader: ?*ShaderInstance, + geometryShader: ?*ShaderInstance, + geoShaderInputPermutation: [2]u32, + geoShaderInputStride: u8, +}; + +extern fn gfxInitDefault() void; +extern fn gfxExit() void; + +extern fn C3D_Init(cmdBufSize: usize) bool; +extern fn C3D_Fini() void; +extern fn C3D_FrameBegin(flags: u8) bool; +extern fn C3D_FrameDrawOn(target: *C3D_RenderTarget) bool; +extern fn C3D_FrameEnd(flags: u8) void; +extern fn C3D_RenderTargetCreate(width: c_int, height: c_int, colorFmt: c_int, depthFmt: c_int) ?*C3D_RenderTarget; +extern fn C3D_RenderTargetDelete(target: *C3D_RenderTarget) void; +extern fn C3D_RenderTargetSetOutput(target: ?*C3D_RenderTarget, screen: c_int, side: c_int, transferFlags: u32) void; +extern fn C3D_FrameBufClear(fb: *C3D_FrameBuf, clearBits: c_int, clearColor: u32, clearDepth: u32) void; +extern fn C3D_BindProgram(program: *ShaderProgram) void; +extern fn C3D_GetAttrInfo() *C3D_AttrInfo; +extern fn AttrInfo_Init(info: *C3D_AttrInfo) void; +extern fn AttrInfo_AddLoader(info: *C3D_AttrInfo, regId: c_int, format: c_int, count: c_int) c_int; +extern fn C3D_GetTexEnv(id: c_int) *C3D_TexEnv; +extern fn C3D_DirtyTexEnv(env: *C3D_TexEnv) void; +extern fn C3D_CullFace(mode: c_int) void; +extern fn C3D_DepthTest(enable: bool, function: c_int, writemask: c_int) void; +extern fn C3D_AlphaBlend(colorEq: c_int, alphaEq: c_int, srcClr: c_int, dstClr: c_int, srcAlpha: c_int, dstAlpha: c_int) void; +extern fn C3D_ImmDrawBegin(primitive: c_int) void; +extern fn C3D_ImmSendAttrib(x: f32, y: f32, z: f32, w: f32) void; +extern fn C3D_ImmDrawEnd() void; +extern fn Mtx_OrthoTilt(mtx: *C3D_Mtx, left: f32, right: f32, bottom: f32, top: f32, near: f32, far: f32, isLeftHanded: bool) void; + +extern fn DVLB_ParseFile(shbinData: [*]u32, shbinSize: u32) ?*DVLB; +extern fn DVLB_Free(dvlb: *DVLB) void; +extern fn shaderProgramInit(sp: *ShaderProgram) c_int; +extern fn shaderProgramFree(sp: *ShaderProgram) c_int; +extern fn shaderProgramSetVsh(sp: *ShaderProgram, dvle: *DVLE) c_int; +extern fn shaderInstanceGetUniformLocation(si: *ShaderInstance, name: [*:0]const u8) i8; + +extern var C3D_FVUnif: [2][C3D_FVUNIF_COUNT]C3D_FVec; +extern var C3D_FVUnifDirty: [2][C3D_FVUNIF_COUNT]bool; + +const C3D_DEFAULT_CMDBUF_SIZE = 0x40000; +const C3D_FRAME_SYNCDRAW = 1 << 0; +const C3D_CLEAR_COLOR = 1 << 0; +const C3D_CLEAR_DEPTH = 1 << 1; +const C3D_CLEAR_ALL = C3D_CLEAR_COLOR | C3D_CLEAR_DEPTH; +const C3D_FVUNIF_COUNT = 96; + +const GPU_VERTEX_SHADER = 0; +const GPU_FLOAT = 3; +const GPU_RB_RGBA8 = 0; +const GPU_RB_DEPTH24_STENCIL8 = 3; +const GPU_ALWAYS = 1; +const GPU_WRITE_COLOR = 0x0F; +const GPU_CULL_NONE = 0; +const GPU_BLEND_ADD = 0; +const GPU_ZERO = 0; +const GPU_ONE = 1; +const GPU_SRC_ALPHA = 6; +const GPU_ONE_MINUS_SRC_ALPHA = 7; +const GPU_PRIMARY_COLOR = 0x00; +const GPU_REPLACE = 0x00; +const GPU_TEVSCALE_1 = 0x0; +const GPU_TRIANGLES = 0x0000; + +const GFX_TOP = 0; +const GFX_LEFT = 0; +const GX_TRANSFER_FMT_RGB8 = 1; +const DISPLAY_TRANSFER_FLAGS = GX_TRANSFER_FMT_RGB8 << 12; +const TOP_SCREEN_WIDTH: f32 = 400.0; +const TOP_SCREEN_HEIGHT: f32 = 240.0; + +const ConvertedVertex = struct { + pos: [4]f32, + color: [4]f32, +}; + +const PipelineData = struct { + program: ShaderProgram, + dvlb: *DVLB, + stride: usize, + position_attr: Pipeline.Attribute, + color_attr: ?Pipeline.Attribute, + projection_loc: i8, +}; + +const MeshData = struct { + pipeline: Pipeline.Handle, + ptr: ?[*]ConvertedVertex = null, + len: usize = 0, + capacity: usize = 0, +}; + var render_alloc: std.mem.Allocator = undefined; var render_io: std.Io = undefined; +var initialized: bool = false; +var target: ?*C3D_RenderTarget = null; +var clear_color: u32 = 0x000000FF; +var vsync_enabled: bool = true; +var current_pipeline: Pipeline.Handle = 0; +var current_proj: Mat4 = Mat4.identity(); +var current_view: Mat4 = Mat4.identity(); +var screen_projection: C3D_Mtx = undefined; + +var pipelines = Util.CircularBuffer(PipelineData, 16).init(); +var meshes = Util.CircularBuffer(MeshData, 2048).init(); + pub fn setup(alloc: std.mem.Allocator, io: std.Io) void { render_alloc = alloc; render_io = io; } -pub fn init() anyerror!void {} -pub fn deinit() void {} +pub fn init() anyerror!void { + _ = render_io; + + gfxInitDefault(); + errdefer gfxExit(); + + if (!C3D_Init(C3D_DEFAULT_CMDBUF_SIZE)) return error.GfxInitFailed; + errdefer C3D_Fini(); + + target = C3D_RenderTargetCreate(240, 400, GPU_RB_RGBA8, GPU_RB_DEPTH24_STENCIL8); + if (target == null) return error.GfxInitFailed; + errdefer { + C3D_RenderTargetDelete(target.?); + target = null; + } + + C3D_RenderTargetSetOutput(target, GFX_TOP, GFX_LEFT, DISPLAY_TRANSFER_FLAGS); + Mtx_OrthoTilt(&screen_projection, 0.0, TOP_SCREEN_WIDTH, 0.0, TOP_SCREEN_HEIGHT, 0.0, 1.0, true); + + configure_fixed_attributes(); + configure_color_texenv(); + C3D_CullFace(GPU_CULL_NONE); + C3D_DepthTest(false, GPU_ALWAYS, GPU_WRITE_COLOR); + set_alpha_blend(true); + + initialized = true; +} + +pub fn deinit() void { + destroy_all_meshes(); + destroy_all_pipelines(); + current_pipeline = 0; + + if (target) |t| { + C3D_RenderTargetDelete(t); + target = null; + } + + if (initialized) { + C3D_Fini(); + gfxExit(); + initialized = false; + } +} + +pub fn set_clear_color(r: f32, g: f32, b: f32, a: f32) void { + clear_color = (@as(u32, floatByte(r)) << 24) | + (@as(u32, floatByte(g)) << 16) | + (@as(u32, floatByte(b)) << 8) | + @as(u32, floatByte(a)); +} + +pub fn set_alpha_blend(enabled: bool) void { + if (enabled) { + C3D_AlphaBlend(GPU_BLEND_ADD, GPU_BLEND_ADD, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA); + } else { + C3D_AlphaBlend(GPU_BLEND_ADD, GPU_BLEND_ADD, GPU_ONE, GPU_ZERO, GPU_ONE, GPU_ZERO); + } +} + +pub fn set_depth_write(enabled: bool) void { + C3D_DepthTest(enabled, GPU_ALWAYS, if (enabled) GPU_WRITE_COLOR | 0x10 else GPU_WRITE_COLOR); +} -pub fn set_clear_color(_: f32, _: f32, _: f32, _: f32) void {} -pub fn set_alpha_blend(_: bool) void {} -pub fn set_depth_write(_: bool) void {} pub fn set_fog(_: bool, _: f32, _: f32, _: f32, _: f32, _: f32) void {} pub fn set_clip_planes(_: bool) void {} -pub fn set_culling(_: bool) void {} +pub fn set_culling(enabled: bool) void { + C3D_CullFace(if (enabled) 2 else GPU_CULL_NONE); +} pub fn set_uv_offset(_: f32, _: f32) void {} -pub fn set_proj_matrix(_: *const Mat4) void {} -pub fn set_view_matrix(_: *const Mat4) void {} + +pub fn set_proj_matrix(mat: *const Mat4) void { + current_proj = mat.*; +} + +pub fn set_view_matrix(mat: *const Mat4) void { + current_view = mat.*; +} pub fn start_frame() bool { - return false; + const t = target orelse return false; + if (!initialized) return false; + + const flags: u8 = if (vsync_enabled) C3D_FRAME_SYNCDRAW else 0; + if (!C3D_FrameBegin(flags)) return false; + + render_target_clear(t, C3D_CLEAR_ALL, clear_color, 0); + if (!C3D_FrameDrawOn(t)) { + C3D_FrameEnd(0); + return false; + } + + return true; } -pub fn end_frame() void {} -pub fn clear_depth() void {} -pub fn set_vsync(_: bool) void {} +pub fn end_frame() void { + if (!initialized) return; + C3D_FrameEnd(0); +} -pub fn create_pipeline(_: Pipeline.VertexLayout, _: ?[:0]align(4) const u8, _: ?[:0]align(4) const u8) anyerror!Pipeline.Handle { - return 0; +pub fn clear_depth() void { + if (target) |t| render_target_clear(t, C3D_CLEAR_DEPTH, clear_color, 0); } -pub fn destroy_pipeline(_: Pipeline.Handle) void {} -pub fn bind_pipeline(_: Pipeline.Handle) void {} +pub fn set_vsync(v: bool) void { + vsync_enabled = v; +} -pub fn create_mesh(_: Pipeline.Handle) anyerror!Mesh.Handle { - return 0; +pub fn create_pipeline(layout: Pipeline.VertexLayout, v_shader: ?[:0]align(4) const u8, _: ?[:0]align(4) const u8) anyerror!Pipeline.Handle { + const code = v_shader orelse return error.InvalidShader; + if (code.len == 0) return error.InvalidShader; + + const dvlb = DVLB_ParseFile(@ptrCast(@constCast(code.ptr)), @intCast(code.len)) orelse return error.InvalidShader; + errdefer DVLB_Free(dvlb); + if (dvlb.numDVLE == 0) return error.InvalidShader; + + var program: ShaderProgram = undefined; + if (shaderProgramInit(&program) != 0) return error.InvalidShader; + errdefer _ = shaderProgramFree(&program); + if (shaderProgramSetVsh(&program, &dvlb.DVLE[0]) != 0) return error.InvalidShader; + + const vertex_shader = program.vertexShader orelse return error.InvalidShader; + const projection_loc = shaderInstanceGetUniformLocation(vertex_shader, "projection"); + if (projection_loc < 0) return error.InvalidShader; + + const position_attr = find_attr(layout, .position) orelse return error.UnsupportedVertexLayout; + const data = PipelineData{ + .program = program, + .dvlb = dvlb, + .stride = layout.stride, + .position_attr = position_attr, + .color_attr = find_attr(layout, .color), + .projection_loc = projection_loc, + }; + + const handle = pipelines.add_element(data) orelse return error.OutOfPipelines; + return @intCast(handle); +} + +pub fn destroy_pipeline(handle: Pipeline.Handle) void { + var pl = pipelines.get_element(handle) orelse return; + _ = shaderProgramFree(&pl.program); + DVLB_Free(pl.dvlb); + _ = pipelines.remove_element(handle); + if (current_pipeline == handle) current_pipeline = 0; +} + +pub fn bind_pipeline(handle: Pipeline.Handle) void { + current_pipeline = handle; } -pub fn destroy_mesh(_: Mesh.Handle) void {} -pub fn update_mesh(_: Mesh.Handle, _: []const u8) void {} -pub fn draw_mesh(_: Mesh.Handle, _: *const Mat4, _: usize, _: Mesh.Primitive) void {} +pub fn create_mesh(pipeline: Pipeline.Handle) anyerror!Mesh.Handle { + _ = pipelines.get_element(pipeline) orelse return error.InvalidPipeline; + const handle = meshes.add_element(.{ .pipeline = pipeline }) orelse return error.OutOfMeshes; + return @intCast(handle); +} + +pub fn destroy_mesh(handle: Mesh.Handle) void { + var mesh = meshes.get_element(handle) orelse return; + free_mesh_vertices(&mesh); + _ = meshes.remove_element(handle); +} + +pub fn update_mesh(handle: Mesh.Handle, data: []const u8) void { + var mesh = meshes.get_element(handle) orelse return; + const pl = pipelines.get_element(mesh.pipeline) orelse return; + + const vertex_count = if (pl.stride == 0) 0 else data.len / pl.stride; + if (vertex_count > mesh.capacity) { + free_mesh_vertices(&mesh); + const verts = render_alloc.alloc(ConvertedVertex, vertex_count) catch { + mesh.len = 0; + mesh.capacity = 0; + meshes.update_element(handle, mesh); + return; + }; + mesh.ptr = verts.ptr; + mesh.capacity = verts.len; + } + + if (mesh.ptr) |ptr| { + const dst = ptr[0..mesh.capacity]; + for (0..vertex_count) |i| { + const src = data[i * pl.stride ..][0..pl.stride]; + dst[i] = convert_vertex(src, pl); + } + } + mesh.len = vertex_count; + meshes.update_element(handle, mesh); +} + +pub fn draw_mesh(handle: Mesh.Handle, model: *const Mat4, count: usize, primitive: Mesh.Primitive) void { + if (!initialized or primitive != .triangles) return; + + const mesh = meshes.get_element(handle) orelse return; + const ptr = mesh.ptr orelse return; + const pipeline_handle = if (current_pipeline != 0) current_pipeline else mesh.pipeline; + var pl = pipelines.get_element(pipeline_handle) orelse return; + const draw_count = @min(count, mesh.len); + if (draw_count == 0) return; + + const view_proj = Mat4.mul(current_view, current_proj); + const mvp = Mat4.mul(model.*, view_proj); + + C3D_BindProgram(&pl.program); + upload_matrix_uniform(pl.projection_loc, &screen_projection); + + C3D_ImmDrawBegin(GPU_TRIANGLES); + for (ptr[0..draw_count]) |vertex| { + const pos = clip_to_screen(transform_pos(vertex.pos, &mvp)); + C3D_ImmSendAttrib(pos[0], pos[1], pos[2], pos[3]); + C3D_ImmSendAttrib(vertex.color[0], vertex.color[1], vertex.color[2], vertex.color[3]); + } + C3D_ImmDrawEnd(); +} pub fn create_texture(_: u32, _: u32, _: []align(16) u8) anyerror!Texture.Handle { return 0; @@ -63,3 +444,163 @@ pub fn update_texture(_: Texture.Handle, _: []align(16) u8) void {} pub fn bind_texture(_: Texture.Handle) void {} pub fn destroy_texture(_: Texture.Handle) void {} pub fn force_texture_resident(_: Texture.Handle) void {} + +fn render_target_clear(t: *C3D_RenderTarget, bits: c_int, color: u32, depth: u32) void { + C3D_FrameBufClear(&t.frameBuf, bits, color, depth); +} + +fn configure_fixed_attributes() void { + const attr = C3D_GetAttrInfo(); + AttrInfo_Init(attr); + _ = AttrInfo_AddLoader(attr, 0, GPU_FLOAT, 4); + _ = AttrInfo_AddLoader(attr, 1, GPU_FLOAT, 4); +} + +fn configure_color_texenv() void { + const env = C3D_GetTexEnv(0); + env.* = .{ + .srcRgb = GPU_PRIMARY_COLOR, + .srcAlpha = GPU_PRIMARY_COLOR, + .opAll = 0, + .funcRgb = GPU_REPLACE, + .funcAlpha = GPU_REPLACE, + .color = 0xFFFFFFFF, + .scaleRgb = GPU_TEVSCALE_1, + .scaleAlpha = GPU_TEVSCALE_1, + }; + C3D_DirtyTexEnv(env); +} + +fn upload_matrix_uniform(loc: i8, mat: *const C3D_Mtx) void { + if (loc < 0) return; + + const base: usize = @intCast(loc); + if (base + 4 > C3D_FVUNIF_COUNT) return; + + inline for (0..4) |i| { + C3D_FVUnif[GPU_VERTEX_SHADER][base + i] = mat.r[i]; + C3D_FVUnifDirty[GPU_VERTEX_SHADER][base + i] = true; + } +} + +fn destroy_all_pipelines() void { + for (&pipelines.buffer) |*slot| { + if (slot.*) |*pl| { + _ = shaderProgramFree(&pl.program); + DVLB_Free(pl.dvlb); + slot.* = null; + } + } + pipelines.clear(); +} + +fn destroy_all_meshes() void { + for (&meshes.buffer) |*slot| { + if (slot.*) |*mesh| { + free_mesh_vertices(mesh); + slot.* = null; + } + } + meshes.clear(); +} + +fn free_mesh_vertices(mesh: *MeshData) void { + if (mesh.ptr) |ptr| { + render_alloc.free(ptr[0..mesh.capacity]); + mesh.ptr = null; + } + mesh.len = 0; + mesh.capacity = 0; +} + +fn find_attr(layout: Pipeline.VertexLayout, usage: Pipeline.AttributeUsage) ?Pipeline.Attribute { + for (layout.attributes) |attr| { + if (attr.usage == usage) return attr; + } + return null; +} + +fn convert_vertex(src: []const u8, pl: PipelineData) ConvertedVertex { + return .{ + .pos = decode_vec4(src, pl.position_attr, .{ 0.0, 0.0, 0.0, 1.0 }), + .color = if (pl.color_attr) |attr| decode_color(src, attr) else .{ 1.0, 1.0, 1.0, 1.0 }, + }; +} + +fn decode_vec4(src: []const u8, attr: Pipeline.Attribute, default: [4]f32) [4]f32 { + const off = attr.offset; + return switch (attr.format) { + .f32x2 => .{ read_f32(src, off, default[0]), read_f32(src, off + 4, default[1]), default[2], default[3] }, + .f32x3 => .{ read_f32(src, off, default[0]), read_f32(src, off + 4, default[1]), read_f32(src, off + 8, default[2]), default[3] }, + .unorm8x2 => .{ read_u8_norm(src, off, default[0]), read_u8_norm(src, off + 1, default[1]), default[2], default[3] }, + .unorm8x4 => .{ read_u8_norm(src, off, default[0]), read_u8_norm(src, off + 1, default[1]), read_u8_norm(src, off + 2, default[2]), read_u8_norm(src, off + 3, default[3]) }, + .unorm16x2 => .{ read_u16_norm(src, off, default[0]), read_u16_norm(src, off + 2, default[1]), default[2], default[3] }, + .unorm16x3 => .{ read_u16_norm(src, off, default[0]), read_u16_norm(src, off + 2, default[1]), read_u16_norm(src, off + 4, default[2]), default[3] }, + .snorm16x2 => .{ read_i16_norm(src, off, default[0]), read_i16_norm(src, off + 2, default[1]), default[2], default[3] }, + .snorm16x3 => .{ read_i16_norm(src, off, default[0]), read_i16_norm(src, off + 2, default[1]), read_i16_norm(src, off + 4, default[2]), default[3] }, + }; +} + +fn decode_color(src: []const u8, attr: Pipeline.Attribute) [4]f32 { + return switch (attr.format) { + .unorm8x4 => decode_vec4(src, attr, .{ 1.0, 1.0, 1.0, 1.0 }), + .f32x3 => .{ + read_f32(src, attr.offset, 1.0), + read_f32(src, attr.offset + 4, 1.0), + read_f32(src, attr.offset + 8, 1.0), + 1.0, + }, + .f32x2, .unorm8x2, .unorm16x2, .unorm16x3, .snorm16x2, .snorm16x3 => decode_vec4(src, attr, .{ 1.0, 1.0, 1.0, 1.0 }), + }; +} + +fn transform_pos(pos: [4]f32, mat: *const Mat4) [4]f32 { + return .{ + pos[0] * mat.data[0][0] + pos[1] * mat.data[1][0] + pos[2] * mat.data[2][0] + pos[3] * mat.data[3][0], + pos[0] * mat.data[0][1] + pos[1] * mat.data[1][1] + pos[2] * mat.data[2][1] + pos[3] * mat.data[3][1], + pos[0] * mat.data[0][2] + pos[1] * mat.data[1][2] + pos[2] * mat.data[2][2] + pos[3] * mat.data[3][2], + pos[0] * mat.data[0][3] + pos[1] * mat.data[1][3] + pos[2] * mat.data[2][3] + pos[3] * mat.data[3][3], + }; +} + +fn clip_to_screen(pos: [4]f32) [4]f32 { + const inv_w: f32 = if (@abs(pos[3]) > 0.000001) 1.0 / pos[3] else 1.0; + const ndc_x = pos[0] * inv_w; + const ndc_y = pos[1] * inv_w; + const ndc_z = pos[2] * inv_w; + + return .{ + (ndc_x * 0.5 + 0.5) * TOP_SCREEN_WIDTH, + (ndc_y * 0.5 + 0.5) * TOP_SCREEN_HEIGHT, + @max(0.0, @min(1.0, ndc_z)), + 1.0, + }; +} + +fn read_f32(src: []const u8, offset: usize, default: f32) f32 { + if (offset + 4 > src.len) return default; + const bits = std.mem.readInt(u32, src[offset..][0..4], .little); + return @bitCast(bits); +} + +fn read_u8_norm(src: []const u8, offset: usize, default: f32) f32 { + if (offset >= src.len) return default; + return @as(f32, @floatFromInt(src[offset])) / 255.0; +} + +fn read_u16_norm(src: []const u8, offset: usize, default: f32) f32 { + if (offset + 2 > src.len) return default; + const value = std.mem.readInt(u16, src[offset..][0..2], .little); + return @as(f32, @floatFromInt(value)) / 65535.0; +} + +fn read_i16_norm(src: []const u8, offset: usize, default: f32) f32 { + if (offset + 2 > src.len) return default; + const bits = std.mem.readInt(u16, src[offset..][0..2], .little); + const value: i16 = @bitCast(bits); + return @max(-1.0, @as(f32, @floatFromInt(value)) / 32767.0); +} + +fn floatByte(v: f32) u8 { + return @intFromFloat(@max(0.0, @min(1.0, v)) * 255.0); +} diff --git a/src/platform/3ds/services.zig b/src/platform/3ds/services.zig index 9ba0043..e3572e5 100644 --- a/src/platform/3ds/services.zig +++ b/src/platform/3ds/services.zig @@ -8,20 +8,177 @@ //! Zig code path. We override `__stacksize__` (a `WEAK` symbol in //! libctru) with a strong export. 1 MB is comfortable; bump if engine //! frames grow. +//! +//! libctru also creates service threads internally. NDSP currently asks for +//! a 4 KB stack, which can underflow in its sound-frame worker before Aether +//! code is on the stack. The 3DS link step wraps `threadCreate`, and the +//! wrapper below raises tiny service-thread stacks to a conservative floor. const process_init = @import("../c_process_init.zig"); +const std = @import("std"); const argv = [_][*:0]const u8{"Aether"}; +const min_service_thread_stack = 128 * 1024; +const exception_stack_size = 16 * 1024; +const fatal_result: c_int = -1; +const USERBREAK_PANIC = 0; + +const Thread = ?*anyopaque; +const ThreadFunc = *const fn (?*anyopaque) callconv(.c) void; +const ExceptionInfo = extern struct { + typ: c_int, + reserved: [3]u8, + fsr: u32, + far: u32, + fpexc: u32, + fpinst: u32, + fpinst2: u32, +}; +const CpuRegisters = extern struct { + r: [13]u32, + sp: u32, + lr: u32, + pc: u32, + cpsr: u32, +}; + +extern fn __real_threadCreate( + entrypoint: ThreadFunc, + arg: ?*anyopaque, + stack_size: usize, + prio: c_int, + core_id: c_int, + detached: bool, +) Thread; + +extern fn aether3dsInstallExceptionHandler(stack_top: ?*anyopaque) void; +extern fn errfInit() c_int; +extern fn ERRF_SetUserString(user_string: [*:0]const u8) c_int; +extern fn ERRF_ThrowResultWithMessage(failure: c_int, message: [*:0]const u8) c_int; +extern fn ERRF_ExceptionHandler(excep: *ExceptionInfo, regs: *CpuRegisters) noreturn; +extern fn svcBreak(break_reason: c_int) void; +extern fn svcOutputDebugString(str: [*]const u8, length: i32) c_int; comptime { @export(&entry, .{ .name = "main" }); @export(&stack_size, .{ .name = "__stacksize__" }); + @export(&threadCreateWrap, .{ .name = "__wrap_threadCreate" }); + @export(&exceptionHandler, .{ .name = "aether3dsExceptionHandler" }); } var stack_size: u32 = 1 * 1024 * 1024; +var exception_stack: [exception_stack_size]u8 align(8) = undefined; +var panic_stage: u8 = 0; + +fn threadCreateWrap( + entrypoint: ThreadFunc, + arg: ?*anyopaque, + requested_stack_size: usize, + prio: c_int, + core_id: c_int, + detached: bool, +) callconv(.c) Thread { + return __real_threadCreate( + entrypoint, + arg, + @max(requested_stack_size, min_service_thread_stack), + prio, + core_id, + detached, + ); +} fn entry() callconv(.c) c_int { + installCrashHandlers(); + const init = process_init.makeInit(.{ .vector = &argv }); - @import("root").main(init) catch return 1; + @import("root").main(init) catch |err| { + fatal("Aether main returned error.{s} at 0x{x}", .{ @errorName(err), @returnAddress() }); + }; return 0; } + +pub fn panic(msg: []const u8, _: ?*std.builtin.StackTrace, first_trace_addr: ?usize) noreturn { + @branchHint(.cold); + + if (panic_stage != 0) { + fatalDisplay("Aether recursive panic"); + } + panic_stage = 1; + + fatal("Aether panic at 0x{x}: {s}", .{ first_trace_addr orelse @returnAddress(), msg }); +} + +fn installCrashHandlers() void { + const top: ?*anyopaque = @ptrFromInt(@intFromPtr(&exception_stack) + exception_stack.len); + aether3dsInstallExceptionHandler(top); +} + +fn exceptionHandler(excep: *ExceptionInfo, regs: *CpuRegisters) callconv(.c) noreturn { + @branchHint(.cold); + + var buf: [256:0]u8 = @splat(0); + const msg = std.fmt.bufPrintZ(&buf, + \\Aether {s} + \\PC=0x{x:0>8} LR=0x{x:0>8} SP=0x{x:0>8} + \\FAR=0x{x:0>8} FSR=0x{x:0>8} CPSR=0x{x:0>8} + \\R0=0x{x:0>8} R1=0x{x:0>8} R2=0x{x:0>8} R3=0x{x:0>8} + , .{ + exceptionName(excep.typ), + regs.pc, + regs.lr, + regs.sp, + excep.far, + excep.fsr, + regs.cpsr, + regs.r[0], + regs.r[1], + regs.r[2], + regs.r[3], + }) catch fallback: { + @memcpy(buf[0.."Aether CPU exception".len], "Aether CPU exception"); + break :fallback buf[0.."Aether CPU exception".len :0]; + }; + + debugString(msg); + debugString("\n"); + _ = errfInit(); + _ = ERRF_SetUserString(msg.ptr); + ERRF_ExceptionHandler(excep, regs); +} + +fn exceptionName(typ: c_int) []const u8 { + return switch (typ) { + 0 => "Prefetch Abort", + 1 => "Data Abort", + 2 => "Undefined Instruction", + 3 => "VFP Exception", + else => "CPU Exception", + }; +} + +fn fatal(comptime fmt: []const u8, args: anytype) noreturn { + var buf: [256:0]u8 = @splat(0); + const msg = std.fmt.bufPrintZ(&buf, fmt, args) catch fallback: { + @memcpy(buf[0.."Aether fatal error".len], "Aether fatal error"); + break :fallback buf[0.."Aether fatal error".len :0]; + }; + fatalDisplay(msg); +} + +fn fatalDisplay(message: [:0]const u8) noreturn { + debugString(message); + debugString("\n"); + + _ = errfInit(); + _ = ERRF_SetUserString(message.ptr); + _ = ERRF_ThrowResultWithMessage(fatal_result, message.ptr); + + svcBreak(USERBREAK_PANIC); + while (true) {} +} + +fn debugString(message: []const u8) void { + if (message.len == 0) return; + _ = svcOutputDebugString(message.ptr, @intCast(@min(message.len, std.math.maxInt(i32)))); +} diff --git a/src/platform/3ds/time.zig b/src/platform/3ds/time.zig index 3c6775a..1153cfa 100644 --- a/src/platform/3ds/time.zig +++ b/src/platform/3ds/time.zig @@ -4,20 +4,23 @@ extern fn osGetTime() u64; extern fn svcGetSystemTick() u64; extern fn svcSleepThread(ns: i64) void; -const arm11_hz: u128 = 16_756_991 * 2 * 4 * 2; -const unix_epoch_from_1900_ms: i96 = 2_208_988_800 * std.time.ms_per_s; +const ns_per_ms: u64 = std.time.ns_per_ms; +const ns_per_s: u64 = std.time.ns_per_s; +const arm11_hz: u64 = 16_756_991 * 2 * 4 * 2; +const unix_epoch_from_1900_ms: u64 = 2_208_988_800 * std.time.ms_per_s; +const max_i64_ns: u64 = @intCast(std.math.maxInt(i64)); pub fn now(clock: std.Io.Clock) std.Io.Timestamp { return switch (clock) { - .real => .fromNanoseconds((@as(i96, @intCast(osGetTime())) - unix_epoch_from_1900_ms) * std.time.ns_per_ms), - .awake, .boot => .fromNanoseconds(@intCast((@as(u128, svcGetSystemTick()) * std.time.ns_per_s) / arm11_hz)), + .real => .fromNanoseconds(@intCast(realNanoseconds())), + .awake, .boot => .fromNanoseconds(@intCast(tickNanoseconds(svcGetSystemTick()))), else => std.debug.panic("3ds std.Io clock {s} is not implemented", .{@tagName(clock)}), }; } pub fn clockResolution(clock: std.Io.Clock) std.Io.Clock.ResolutionError!std.Io.Duration { return switch (clock) { - .real => .fromMilliseconds(1), + .real => .fromNanoseconds(std.time.ns_per_ms), .awake, .boot => .fromNanoseconds(4), else => error.ClockUnavailable, }; @@ -26,18 +29,53 @@ pub fn clockResolution(clock: std.Io.Clock) std.Io.Clock.ResolutionError!std.Io. pub fn sleep(timeout: std.Io.Timeout) std.Io.Cancelable!void { const ns = timeoutNanoseconds(timeout); if (ns <= 0) return; - svcSleepThread(clampNs(ns)); + svcSleepThread(ns); } -fn timeoutNanoseconds(timeout: std.Io.Timeout) i96 { +fn realNanoseconds() u64 { + const ms_since_1900 = osGetTime(); + if (ms_since_1900 <= unix_epoch_from_1900_ms) return 0; + return millisecondsToNanoseconds(ms_since_1900 - unix_epoch_from_1900_ms); +} + +fn tickNanoseconds(ticks: u64) u64 { + const seconds = ticks / arm11_hz; + const remainder = ticks % arm11_hz; + const whole_ns = secondsToNanoseconds(seconds); + if (whole_ns == max_i64_ns) return max_i64_ns; + + const fractional_ns = (remainder *% ns_per_s) / arm11_hz; + if (fractional_ns > max_i64_ns - whole_ns) return max_i64_ns; + return whole_ns + fractional_ns; +} + +fn millisecondsToNanoseconds(ms: u64) u64 { + if (ms >= max_i64_ns / ns_per_ms) return max_i64_ns; + return ms *% ns_per_ms; +} + +fn secondsToNanoseconds(seconds: u64) u64 { + if (seconds >= max_i64_ns / ns_per_s) return max_i64_ns; + return seconds *% ns_per_s; +} + +fn timeoutNanoseconds(timeout: std.Io.Timeout) i64 { return switch (timeout) { .none => 0, - .duration => |duration| duration.raw.nanoseconds, - .deadline => |deadline| deadline.raw.nanoseconds - now(deadline.clock).nanoseconds, + .duration => |duration| clampNs(duration.raw.nanoseconds), + .deadline => |deadline| deadlineNanoseconds(deadline), }; } fn clampNs(ns: i96) i64 { if (ns > std.math.maxInt(i64)) return std.math.maxInt(i64); + if (ns < std.math.minInt(i64)) return std.math.minInt(i64); return @intCast(ns); } + +fn deadlineNanoseconds(deadline: std.Io.Clock.Timestamp) i64 { + const target = clampNs(deadline.raw.nanoseconds); + const current = clampNs(now(deadline.clock).nanoseconds); + if (target <= current) return 0; + return target - current; +} diff --git a/src/root.zig b/src/root.zig index 38a5ce8..b1b85cc 100644 --- a/src/root.zig +++ b/src/root.zig @@ -13,6 +13,7 @@ pub const ctx_to_self = Util.ctx_to_self; /// Only available when `platform == .psp`; evaluates to `void` otherwise. pub const Psp = if (platform == .psp) @import("platform/psp/psp_dialogs.zig") else void; pub const Cio = if (platform == .nintendo_3ds or platform == .nintendo_switch) @import("platform/c_io.zig") else void; +pub const ThreeDS = if (platform == .nintendo_3ds) @import("platform/3ds/services.zig") else void; // Pull in the 3DS / Switch entry shim on those targets. Each shim's // comptime block `@export`s a C-callable `main` so `-ofmt=c` emits diff --git a/test/main.zig b/test/main.zig index 2dbf04c..fc025d7 100644 --- a/test/main.zig +++ b/test/main.zig @@ -24,10 +24,9 @@ pub const psp_stack_size: u32 = 256 * 1024; // baseline so debug prints go through stderr instead of dereferencing an // undefined Io implementation. const is_freestanding_console = ae.platform == .psp or ae.platform == .nintendo_3ds or ae.platform == .nintendo_switch; -// 3DS and Switch keep `no_panic` while the debug IO baseline is intentionally -// small. Missing operations should fail at the original call site instead of -// recursing through stack-trace formatting on early bring-up builds. -pub const panic = if (ae.platform == .psp) sdk.extra.debug.panic else if (ae.platform == .nintendo_3ds or ae.platform == .nintendo_switch) std.debug.no_panic else std.debug.FullPanic(std.debug.defaultPanic); +// 3DS routes panics through err:f; Switch keeps `no_panic` while the debug IO +// baseline is intentionally small. +pub const panic = if (ae.platform == .psp) sdk.extra.debug.panic else if (ae.platform == .nintendo_3ds) ae.ThreeDS.panic else if (ae.platform == .nintendo_switch) std.debug.no_panic else std.debug.FullPanic(std.debug.defaultPanic); pub const std_options_debug_threaded_io = if (is_freestanding_console) null else std.Io.Threaded.global_single_threaded; pub const std_options_debug_io: std.Io = if (ae.platform == .psp) sdk.extra.Io.psp_io else if (ae.platform == .nintendo_3ds or ae.platform == .nintendo_switch) ae.Cio.io() else std.Io.Threaded.global_single_threaded.io(); @@ -66,9 +65,9 @@ const MyState = struct { mesh: MyMesh, transform: Rendering.Transform, texture: Rendering.Texture, - music_data: []u8, + music_data: []const u8, music_reader: std.Io.Reader, - grass_data: []u8, + grass_data: []const u8, grass_readers: [MAX_GRASS_VOICES]std.Io.Reader, grass_tick: u32, grass_spawn: u32, @@ -101,7 +100,10 @@ const MyState = struct { self.mesh = try MyMesh.new(render, pipeline); self.transform = Rendering.Transform.new(); - self.texture = try Rendering.Texture.load(engine.io, engine.dirs.resources, render, "test.png"); + const texture_bytes = @embedFile("test.png"); + var texture_reader: std.Io.Reader = .fixed(texture_bytes); + self.texture = try Rendering.Texture.load_from_reader(render, &texture_reader); + try self.mesh.append(render, &.{ Vertex{ .pos = .{ -16383, -16383, 0 }, .color = 0xFF0000FF, .uv = .{ 0, 32767 } }, Vertex{ .pos = .{ 16383, -16383, 0 }, .color = 0xFF00FF00, .uv = .{ 32767, 32767 } }, @@ -109,6 +111,15 @@ const MyState = struct { }); self.mesh.update(); + self.music_data = &.{}; + self.music_reader = .fixed(&.{}); + self.grass_data = &.{}; + self.grass_readers = @splat(.fixed(&.{})); + self.grass_tick = 0; + self.grass_spawn = 0; + + if (!Audio.enabled) return; + // -- background music -- self.music_data = try load_wav(engine, "calm1.wav"); self.music_reader = .fixed(self.music_data); @@ -117,9 +128,6 @@ const MyState = struct { // -- spatial SFX data -- self.grass_data = try load_wav(engine, "grass1.wav"); - self.grass_readers = @splat(.fixed(&.{})); - self.grass_tick = 0; - self.grass_spawn = 0; // Listener at origin, facing -Z Audio.set_listener(Vec3.zero(), Vec3.new(0, 0, -1), Vec3.new(0, 1, 0)); @@ -136,6 +144,8 @@ const MyState = struct { } fn tick(ctx: *anyopaque, _: *ae.Engine) anyerror!void { + if (!Audio.enabled) return; + var self = ae.ctx_to_self(MyState, ctx); self.grass_tick += 1; From c8b1470686b075578e038d0cb054ac78eb97e7f0 Mon Sep 17 00:00:00 2001 From: Nathan Bourgeois Date: Wed, 27 May 2026 02:24:10 -0400 Subject: [PATCH 10/44] Fix 3DS Time --- build.zig | 22 ++++++++--- src/engine.zig | 47 +++++++++++++++------- src/platform/3ds/3ds_gfx.zig | 1 + src/platform/3ds/time.zig | 76 ++++++++++++++++++------------------ src/util/estimator.zig | 4 +- 5 files changed, 88 insertions(+), 62 deletions(-) diff --git a/build.zig b/build.zig index 752dc17..ad31e06 100644 --- a/build.zig +++ b/build.zig @@ -886,22 +886,32 @@ fn threedsxPipeline(b: *std.Build, exe: *std.Build.Step.Compile, opts: ExportOpt const link = b.addSystemCommand(&.{gcc}); link.addArgs(&arch); link.addArgs(&.{ - "-mword-relocations", "-ffunction-sections", - "-D__3DS__", "-DARM11", - "-O2", "-g", - "-specs=3dsx.specs", "-Wl,--wrap=threadCreate", + "-mword-relocations", + "-ffunction-sections", + "-D__3DS__", + "-DARM11", + if (exe.root_module.optimize != .Debug or exe.root_module.optimize == .ReleaseSmall) "-O2" else if (exe.root_module.optimize == .ReleaseSmall) "-Os" else "-O0", + if (exe.root_module.optimize == .Debug or exe.root_module.optimize == .ReleaseSafe) "-g" else "-g0", + "-specs=3dsx.specs", + "-Wl,--wrap=threadCreate", + "-Wl,--no-warn-execstack", + }); + link.addArgs(&.{ // Pin the C standard to C11. zig.h picks `[[noreturn]]` under // C23 but emits it in attribute-list position that gcc rejects; // C11's `_Noreturn` is what zig's emitter actually targets. "-std=gnu11", + }); + link.addArgs(&.{ // zig's -ofmt=c emitter treats `uintptr_t` and `uint32_t` as // interchangeable on 32-bit ARM (they ARE the same width) but // gcc 14+ promotes the resulting pointer-type mismatch from a // warning to an error. Demote it and a couple of related // chatters; we don't author this C and there's nothing // actionable in the warnings. - "-Wno-incompatible-pointer-types", - "-Wno-int-conversion", "-Wno-builtin-declaration-mismatch", + "-Wno-incompatible-pointer-types", + "-Wno-int-conversion", + "-Wno-builtin-declaration-mismatch", }); link.addArg(b.fmt("-I{s}", .{ctru_inc})); link.addPrefixedDirectoryArg("-I", include_wf.getDirectory()); diff --git a/src/engine.zig b/src/engine.zig index df8083d..7927b97 100644 --- a/src/engine.zig +++ b/src/engine.zig @@ -87,7 +87,7 @@ pub const Engine = struct { trackers: [TRACKER_COUNT]CategoryTracker, running: bool, vsync: bool, - state: *const Core.State, + state: Core.State, dirs: Core.paths.Dirs, pub const Config = struct { @@ -127,7 +127,7 @@ pub const Engine = struct { self.io = sys_io; self.running = true; self.vsync = config.vsync; - self.state = state; + self.state = state.*; self.pool = memory.PoolAlloc.init(mem, "main"); const inner = self.pool.allocator(); @@ -150,7 +150,7 @@ pub const Engine = struct { try Platform.init(self, config.width, config.height, config.title, config.fullscreen, config.vsync, config.resizable); try Rendering.Texture.init_defaults(self.allocator(.render)); - try Core.state_machine.init(self, state); + try Core.state_machine.init(self, &self.state); } pub fn deinit(self: *Engine) void { @@ -236,11 +236,12 @@ pub const Engine = struct { const update_budget_ns: i64 = @as(i64, @intCast(UPDATE_US)) * NS_PER_US; - var clock = std.Io.Clock.real; + var clock = std.Io.Clock.boot; var last_us: i64 = @truncate(@divTrunc(clock.now(self.io).toNanoseconds(), 1000)); var update_accum: i64 = 0; var tick_accum: i64 = 0; + var stale_time_frames: u32 = 0; const report_fps = options.config.gfx != .headless; var fps_count: u32 = 0; @@ -249,13 +250,35 @@ pub const Engine = struct { while (self.running) { const now_us: i64 = @truncate(@divTrunc(clock.now(self.io).toNanoseconds(), 1000)); var frame_dt_us: i64 = now_us - last_us; - last_us = now_us; + var synthetic_frame_dt = false; + + if (frame_dt_us <= 0) { + frame_dt_us = 0; + if (options.config.platform == .nintendo_3ds and self.vsync) { + stale_time_frames +|= 1; + if (stale_time_frames >= 2) { + frame_dt_us = @intCast(US_PER_S / 60); + synthetic_frame_dt = true; + } + } + } else { + stale_time_frames = 0; + } if (frame_dt_us > 500_000) frame_dt_us = 500_000; + last_us = if (synthetic_frame_dt) last_us + frame_dt_us else now_us; update_accum += frame_dt_us; tick_accum += frame_dt_us; + const platform_start_ns: i64 = @truncate(clock.now(self.io).toNanoseconds()); + Platform.input.update(); + Platform.update(self); + Core.input.update(); + const platform_done_ns: i64 = @truncate(clock.now(self.io).toNanoseconds()); + var pre_update_elapsed_ns = platform_done_ns - platform_start_ns; + if (!self.running) break; + // ---- fixed-rate TICK steps (e.g., 20 Hz logic) ---- var is_tick_frame = false; var tick_cost_ns: i64 = 0; @@ -270,28 +293,22 @@ pub const Engine = struct { tick_accum -= tick_us; } - // ---- fixed-rate UPDATE steps (input update & interpolation) ---- + // ---- fixed-rate UPDATE steps (simulation & interpolation) ---- const UPDATE_DT_S: f32 = @as(f32, @floatFromInt(UPDATE_US)) / @as(f32, US_PER_S); while (update_accum >= UPDATE_US) { @branchHint(.unpredictable); - const step_start_ns: i64 = @truncate(clock.now(self.io).toNanoseconds()); - Platform.input.update(); - Platform.update(self); - Core.input.update(); - const engine_done_ns: i64 = @truncate(clock.now(self.io).toNanoseconds()); - const engine_elapsed_ns = engine_done_ns - step_start_ns; - const budget = Util.BudgetContext{ .phase_budget_ns = update_budget_ns, - .engine_elapsed_ns = engine_elapsed_ns, - .remaining_ns = update_budget_ns - engine_elapsed_ns, + .engine_elapsed_ns = pre_update_elapsed_ns, + .remaining_ns = update_budget_ns - pre_update_elapsed_ns, .is_tick_frame = is_tick_frame, .tick_cost_ns = tick_cost_ns, .safety_margin_ns = Util.BudgetContext.DEFAULT_SAFETY_MARGIN_NS, }; try Core.state_machine.update(self, UPDATE_DT_S, &budget); + pre_update_elapsed_ns = 0; update_accum -= UPDATE_US; } diff --git a/src/platform/3ds/3ds_gfx.zig b/src/platform/3ds/3ds_gfx.zig index e7ef606..33b6631 100644 --- a/src/platform/3ds/3ds_gfx.zig +++ b/src/platform/3ds/3ds_gfx.zig @@ -425,6 +425,7 @@ pub fn draw_mesh(handle: Mesh.Handle, model: *const Mat4, count: usize, primitiv const mvp = Mat4.mul(model.*, view_proj); C3D_BindProgram(&pl.program); + configure_fixed_attributes(); upload_matrix_uniform(pl.projection_loc, &screen_projection); C3D_ImmDrawBegin(GPU_TRIANGLES); diff --git a/src/platform/3ds/time.zig b/src/platform/3ds/time.zig index 1153cfa..8e39b9c 100644 --- a/src/platform/3ds/time.zig +++ b/src/platform/3ds/time.zig @@ -1,62 +1,60 @@ const std = @import("std"); -extern fn osGetTime() u64; -extern fn svcGetSystemTick() u64; +const Timespec = extern struct { + tv_sec: i64, + tv_nsec: c_long, +}; + +extern fn clock_gettime(clock_id: c_int, tp: *Timespec) c_int; +extern fn clock_getres(clock_id: c_int, res: *Timespec) c_int; extern fn svcSleepThread(ns: i64) void; -const ns_per_ms: u64 = std.time.ns_per_ms; const ns_per_s: u64 = std.time.ns_per_s; -const arm11_hz: u64 = 16_756_991 * 2 * 4 * 2; -const unix_epoch_from_1900_ms: u64 = 2_208_988_800 * std.time.ms_per_s; -const max_i64_ns: u64 = @intCast(std.math.maxInt(i64)); +const max_i64_ns: i64 = std.math.maxInt(i64); +const CLOCK_REALTIME: c_int = 1; +const CLOCK_MONOTONIC: c_int = 4; pub fn now(clock: std.Io.Clock) std.Io.Timestamp { - return switch (clock) { - .real => .fromNanoseconds(@intCast(realNanoseconds())), - .awake, .boot => .fromNanoseconds(@intCast(tickNanoseconds(svcGetSystemTick()))), - else => std.debug.panic("3ds std.Io clock {s} is not implemented", .{@tagName(clock)}), - }; + const id = clockId(clock) orelse std.debug.panic("3ds std.Io clock {s} is not implemented", .{@tagName(clock)}); + var ts: Timespec = undefined; + if (clock_gettime(id, &ts) != 0) { + std.debug.panic("3ds clock_gettime failed for std.Io clock {s}", .{@tagName(clock)}); + } + return .fromNanoseconds(timespecNanoseconds(ts)); } pub fn clockResolution(clock: std.Io.Clock) std.Io.Clock.ResolutionError!std.Io.Duration { - return switch (clock) { - .real => .fromNanoseconds(std.time.ns_per_ms), - .awake, .boot => .fromNanoseconds(4), - else => error.ClockUnavailable, - }; + const id = clockId(clock) orelse return error.ClockUnavailable; + var ts: Timespec = undefined; + if (clock_getres(id, &ts) != 0) return error.ClockUnavailable; + return .fromNanoseconds(timespecNanoseconds(ts)); } -pub fn sleep(timeout: std.Io.Timeout) std.Io.Cancelable!void { - const ns = timeoutNanoseconds(timeout); - if (ns <= 0) return; - svcSleepThread(ns); +fn clockId(clock: std.Io.Clock) ?c_int { + return switch (clock) { + .real => CLOCK_REALTIME, + // libctru's POSIX shim supports CLOCK_MONOTONIC for svcGetSystemTick. + // CLOCK_BOOTTIME is declared by newlib but not implemented by libctru. + .awake, .boot => CLOCK_MONOTONIC, + else => null, + }; } -fn realNanoseconds() u64 { - const ms_since_1900 = osGetTime(); - if (ms_since_1900 <= unix_epoch_from_1900_ms) return 0; - return millisecondsToNanoseconds(ms_since_1900 - unix_epoch_from_1900_ms); -} +fn timespecNanoseconds(ts: Timespec) i64 { + if (ts.tv_sec <= 0) return @max(0, @as(i64, @intCast(ts.tv_nsec))); -fn tickNanoseconds(ticks: u64) u64 { - const seconds = ticks / arm11_hz; - const remainder = ticks % arm11_hz; - const whole_ns = secondsToNanoseconds(seconds); - if (whole_ns == max_i64_ns) return max_i64_ns; + const sec: i64 = @intCast(@min(ts.tv_sec, @divTrunc(max_i64_ns, @as(i64, @intCast(ns_per_s))))); + const whole_ns = sec * @as(i64, @intCast(ns_per_s)); + const fractional_ns: i64 = @max(0, @as(i64, @intCast(ts.tv_nsec))); - const fractional_ns = (remainder *% ns_per_s) / arm11_hz; if (fractional_ns > max_i64_ns - whole_ns) return max_i64_ns; return whole_ns + fractional_ns; } -fn millisecondsToNanoseconds(ms: u64) u64 { - if (ms >= max_i64_ns / ns_per_ms) return max_i64_ns; - return ms *% ns_per_ms; -} - -fn secondsToNanoseconds(seconds: u64) u64 { - if (seconds >= max_i64_ns / ns_per_s) return max_i64_ns; - return seconds *% ns_per_s; +pub fn sleep(timeout: std.Io.Timeout) std.Io.Cancelable!void { + const ns = timeoutNanoseconds(timeout); + if (ns <= 0) return; + svcSleepThread(ns); } fn timeoutNanoseconds(timeout: std.Io.Timeout) i64 { diff --git a/src/util/estimator.zig b/src/util/estimator.zig index f13d32b..fbbff92 100644 --- a/src/util/estimator.zig +++ b/src/util/estimator.zig @@ -30,12 +30,12 @@ pub const Estimator = struct { } pub fn begin(self: *Estimator, io: std.Io) void { - var clock = std.Io.Clock.real; + var clock = std.Io.Clock.boot; self.start_ns = @truncate(clock.now(io).toNanoseconds()); } pub fn end(self: *Estimator, io: std.Io) void { - var clock = std.Io.Clock.real; + var clock = std.Io.Clock.boot; const now_ns: i64 = @truncate(clock.now(io).toNanoseconds()); const elapsed = now_ns - self.start_ns; self.start_ns = 0; From d0493bb15e6795a27e524ed7ae6399ea1dc94974 Mon Sep 17 00:00:00 2001 From: Nathan Bourgeois Date: Fri, 29 May 2026 21:11:58 -0400 Subject: [PATCH 11/44] Textures working --- build.zig | 5 +- src/engine.zig | 62 ++-- src/platform/3ds/3ds_gfx.zig | 534 ++++++++++++++++++++++++++++++++--- src/platform/3ds/time.zig | 17 ++ test/main.zig | 22 +- 5 files changed, 565 insertions(+), 75 deletions(-) diff --git a/build.zig b/build.zig index ad31e06..d314f33 100644 --- a/build.zig +++ b/build.zig @@ -1128,10 +1128,12 @@ pub fn addShader(owner: *std.Build, b: *std.Build, exe: *std.Build.Step.Compile, \\.alias ones myconst.yyyy \\ \\.out outpos position + \\.out outtc0 texcoord0 \\.out outclr color \\ \\.alias inpos v0 - \\.alias inclr v1 + \\.alias inuv v1 + \\.alias inclr v2 \\ \\.proc main \\ mov r0.xyz, inpos @@ -1142,6 +1144,7 @@ pub fn addShader(owner: *std.Build, b: *std.Build, exe: *std.Build.Step.Compile, \\ dp4 outpos.z, projection[2], r0 \\ dp4 outpos.w, projection[3], r0 \\ + \\ mov outtc0, inuv \\ mov outclr, inclr \\ end \\.end diff --git a/src/engine.zig b/src/engine.zig index 7927b97..f41eff0 100644 --- a/src/engine.zig +++ b/src/engine.zig @@ -228,8 +228,8 @@ pub const Engine = struct { const US_PER_S: u64 = std.time.us_per_s; const NS_PER_US: i64 = 1000; - // Fixed-step rates -- PSP targets 60 Hz display - const UPDATES_HZ: u32 = if (options.config.platform == .psp) 60 else 144; + // Fixed-step rates -- handheld backends target 60 Hz displays. + const UPDATES_HZ: u32 = if (options.config.platform == .psp or options.config.platform == .nintendo_3ds) 60 else 144; const TICKS_HZ: u32 = 20; const UPDATE_US: u64 = US_PER_S / UPDATES_HZ; const TICK_US: u64 = US_PER_S / TICKS_HZ; @@ -237,19 +237,21 @@ pub const Engine = struct { const update_budget_ns: i64 = @as(i64, @intCast(UPDATE_US)) * NS_PER_US; var clock = std.Io.Clock.boot; + const run_start_ns = clock.now(self.io).toNanoseconds(); + const fps_window_us: i64 = @intCast(US_PER_S); - var last_us: i64 = @truncate(@divTrunc(clock.now(self.io).toNanoseconds(), 1000)); + var last_us: i64 = 0; var update_accum: i64 = 0; var tick_accum: i64 = 0; var stale_time_frames: u32 = 0; const report_fps = options.config.gfx != .headless; var fps_count: u32 = 0; - var fps_window_end: i64 = last_us + US_PER_S; + var fps_window_end: i64 = saturatingAddI64(last_us, fps_window_us); while (self.running) { - const now_us: i64 = @truncate(@divTrunc(clock.now(self.io).toNanoseconds(), 1000)); - var frame_dt_us: i64 = now_us - last_us; + const now_us = elapsedUsSince(run_start_ns, clock.now(self.io).toNanoseconds()); + var frame_dt_us = saturatingSubI64(now_us, last_us); var synthetic_frame_dt = false; if (frame_dt_us <= 0) { @@ -266,17 +268,17 @@ pub const Engine = struct { } if (frame_dt_us > 500_000) frame_dt_us = 500_000; - last_us = if (synthetic_frame_dt) last_us + frame_dt_us else now_us; + last_us = if (synthetic_frame_dt) saturatingAddI64(last_us, frame_dt_us) else now_us; - update_accum += frame_dt_us; - tick_accum += frame_dt_us; + update_accum = saturatingAddI64(update_accum, frame_dt_us); + tick_accum = saturatingAddI64(tick_accum, frame_dt_us); - const platform_start_ns: i64 = @truncate(clock.now(self.io).toNanoseconds()); + const platform_start_ns = clock.now(self.io).toNanoseconds(); Platform.input.update(); Platform.update(self); Core.input.update(); - const platform_done_ns: i64 = @truncate(clock.now(self.io).toNanoseconds()); - var pre_update_elapsed_ns = platform_done_ns - platform_start_ns; + const platform_done_ns = clock.now(self.io).toNanoseconds(); + var pre_update_elapsed_ns = elapsedNsBetween(platform_start_ns, platform_done_ns); if (!self.running) break; // ---- fixed-rate TICK steps (e.g., 20 Hz logic) ---- @@ -286,10 +288,10 @@ pub const Engine = struct { while (tick_accum >= tick_us) { @branchHint(.unpredictable); is_tick_frame = true; - const tick_start_ns: i64 = @truncate(clock.now(self.io).toNanoseconds()); + const tick_start_ns = clock.now(self.io).toNanoseconds(); try Core.state_machine.tick(self); - const tick_end_ns: i64 = @truncate(clock.now(self.io).toNanoseconds()); - tick_cost_ns += tick_end_ns - tick_start_ns; + const tick_end_ns = clock.now(self.io).toNanoseconds(); + tick_cost_ns = saturatingAddI64(tick_cost_ns, elapsedNsBetween(tick_start_ns, tick_end_ns)); tick_accum -= tick_us; } @@ -316,7 +318,7 @@ pub const Engine = struct { const frame_dt_s: f32 = @as(f32, @floatFromInt(frame_dt_us)) / @as(f32, US_PER_S); const drew_frame = Platform.gfx.api.start_frame(); if (drew_frame) { - const draw_start_ns: i64 = @truncate(clock.now(self.io).toNanoseconds()); + const draw_start_ns = clock.now(self.io).toNanoseconds(); // Time until next update step is due const slack_us: i64 = @as(i64, @intCast(UPDATE_US)) - @max(0, update_accum); const draw_budget_ns: i64 = if (self.vsync) @@ -354,13 +356,37 @@ pub const Engine = struct { // ---- FPS counting ---- if (report_fps) { if (drew_frame) fps_count += 1; - const end_us: i64 = @truncate(@divTrunc(clock.now(self.io).toNanoseconds(), 1000)); + const end_us = elapsedUsSince(run_start_ns, clock.now(self.io).toNanoseconds()); if (end_us >= fps_window_end) { Util.engine_logger.info("FPS: {}", .{fps_count}); fps_count = 0; - fps_window_end = end_us + US_PER_S; + fps_window_end = saturatingAddI64(end_us, fps_window_us); } } } } }; + +fn elapsedNsBetween(start_ns: i96, end_ns: i96) i64 { + return clampI96ToI64(end_ns - start_ns); +} + +fn elapsedUsSince(start_ns: i96, end_ns: i96) i64 { + return clampI96ToI64(@divTrunc(end_ns - start_ns, std.time.ns_per_us)); +} + +fn saturatingAddI64(a: i64, b: i64) i64 { + return clampI96ToI64(@as(i96, a) + @as(i96, b)); +} + +fn saturatingSubI64(a: i64, b: i64) i64 { + return clampI96ToI64(@as(i96, a) - @as(i96, b)); +} + +fn clampI96ToI64(value: i96) i64 { + const max: i96 = std.math.maxInt(i64); + const min: i96 = std.math.minInt(i64); + if (value > max) return std.math.maxInt(i64); + if (value < min) return std.math.minInt(i64); + return @intCast(value); +} diff --git a/src/platform/3ds/3ds_gfx.zig b/src/platform/3ds/3ds_gfx.zig index 33b6631..7cf5e14 100644 --- a/src/platform/3ds/3ds_gfx.zig +++ b/src/platform/3ds/3ds_gfx.zig @@ -1,8 +1,9 @@ -//! Minimal Nintendo 3DS Citro3D backend. +//! Nintendo 3DS Citro3D backend. //! -//! First bring-up milestone: initialize the top-screen render target and draw -//! Aether's current colored triangle path. Textures and richer render state are -//! still no-ops until the next backend pass. +//! The top screen render target is physically 240x400 and displayed rotated. +//! This backend keeps Aether's normal landscape projection contract by +//! transforming vertices to top-screen coordinates on the CPU, then using +//! Citro3D's tilted orthographic projection for the final hardware transform. const std = @import("std"); const Util = @import("../../util/util.zig"); @@ -13,6 +14,7 @@ const Mesh = Rendering.mesh; const Texture = Rendering.Texture; const C3D_AttrInfo = opaque {}; +const C3D_BufInfo = opaque {}; const C3D_RenderTarget = extern struct { next: ?*C3D_RenderTarget, prev: ?*C3D_RenderTarget, @@ -54,6 +56,17 @@ const C3D_FVec = extern struct { const C3D_Mtx = extern struct { r: [4]C3D_FVec, }; +const C3D_Tex = extern struct { + data: ?*anyopaque, + fmt_size: u32, + dim: u32, + param: u32, + border: u32, + lod_param: u32, +}; +const C3D_FogLut = extern struct { + data: [128]u32, +}; const DVLP = extern struct { codeSize: u32, @@ -128,15 +141,27 @@ extern fn C3D_BindProgram(program: *ShaderProgram) void; extern fn C3D_GetAttrInfo() *C3D_AttrInfo; extern fn AttrInfo_Init(info: *C3D_AttrInfo) void; extern fn AttrInfo_AddLoader(info: *C3D_AttrInfo, regId: c_int, format: c_int, count: c_int) c_int; +extern fn C3D_GetBufInfo() *C3D_BufInfo; +extern fn BufInfo_Init(info: *C3D_BufInfo) void; +extern fn BufInfo_Add(info: *C3D_BufInfo, data: ?*const anyopaque, stride: isize, attribCount: c_int, permutation: u64) c_int; extern fn C3D_GetTexEnv(id: c_int) *C3D_TexEnv; extern fn C3D_DirtyTexEnv(env: *C3D_TexEnv) void; extern fn C3D_CullFace(mode: c_int) void; extern fn C3D_DepthTest(enable: bool, function: c_int, writemask: c_int) void; extern fn C3D_AlphaBlend(colorEq: c_int, alphaEq: c_int, srcClr: c_int, dstClr: c_int, srcAlpha: c_int, dstAlpha: c_int) void; -extern fn C3D_ImmDrawBegin(primitive: c_int) void; -extern fn C3D_ImmSendAttrib(x: f32, y: f32, z: f32, w: f32) void; -extern fn C3D_ImmDrawEnd() void; +extern fn C3D_DrawArrays(primitive: c_int, first: c_int, size: c_int) void; +extern fn C3D_TexInitWithParams(tex: *C3D_Tex, cube: ?*anyopaque, params: u64) bool; +extern fn C3D_TexLoadImage(tex: *C3D_Tex, data: ?*const anyopaque, face: c_int, level: c_int) void; +extern fn C3D_TexBind(unitId: c_int, tex: *C3D_Tex) void; +extern fn C3D_TexDelete(tex: *C3D_Tex) void; +extern fn C3D_FogGasMode(fogMode: c_int, gasMode: c_int, zFlip: bool) void; +extern fn C3D_FogColor(color: u32) void; +extern fn C3D_FogLutBind(lut: *C3D_FogLut) void; +extern fn FogLut_FromArray(lut: *C3D_FogLut, data: *const [256]f32) void; +extern fn GSPGPU_FlushDataCache(adr: ?*const anyopaque, size: u32) c_int; extern fn Mtx_OrthoTilt(mtx: *C3D_Mtx, left: f32, right: f32, bottom: f32, top: f32, near: f32, far: f32, isLeftHanded: bool) void; +extern fn linearAlloc(size: usize) ?*anyopaque; +extern fn linearFree(mem: ?*anyopaque) void; extern fn DVLB_ParseFile(shbinData: [*]u32, shbinSize: u32) ?*DVLB; extern fn DVLB_Free(dvlb: *DVLB) void; @@ -160,17 +185,31 @@ const GPU_FLOAT = 3; const GPU_RB_RGBA8 = 0; const GPU_RB_DEPTH24_STENCIL8 = 3; const GPU_ALWAYS = 1; +const GPU_GEQUAL = 7; const GPU_WRITE_COLOR = 0x0F; +const GPU_WRITE_DEPTH = 0x10; const GPU_CULL_NONE = 0; +const GPU_CULL_BACK_CCW = 2; const GPU_BLEND_ADD = 0; const GPU_ZERO = 0; const GPU_ONE = 1; const GPU_SRC_ALPHA = 6; const GPU_ONE_MINUS_SRC_ALPHA = 7; const GPU_PRIMARY_COLOR = 0x00; +const GPU_TEXTURE0 = 0x03; const GPU_REPLACE = 0x00; +const GPU_MODULATE = 0x01; const GPU_TEVSCALE_1 = 0x0; const GPU_TRIANGLES = 0x0000; +const GPU_NO_FOG = 0; +const GPU_FOG = 5; +const GPU_PLAIN_DENSITY = 0; +const GPU_TEX_2D = 0; +const GPU_TEXFACE_2D = 0; +const GPU_RGBA8 = 0; +const GPU_NEAREST = 0; +const GPU_LINEAR = 1; +const GPU_REPEAT = 2; const GFX_TOP = 0; const GFX_LEFT = 0; @@ -178,10 +217,24 @@ const GX_TRANSFER_FMT_RGB8 = 1; const DISPLAY_TRANSFER_FLAGS = GX_TRANSFER_FMT_RGB8 << 12; const TOP_SCREEN_WIDTH: f32 = 400.0; const TOP_SCREEN_HEIGHT: f32 = 240.0; +const TEXTURE_BPP: usize = 4; +const MIN_TEXTURE_SIZE: u32 = 8; +const SMALL_TEXTURE_EXPAND_SIZE: u32 = 32; +const MAX_TEXTURE_SIZE: u32 = 1024; +const LINE_WIDTH: f32 = 1.5; +const DEBUG_UV_AS_COLOR = false; +const DEBUG_TEXTURE_ONLY = false; const ConvertedVertex = struct { pos: [4]f32, color: [4]f32, + uv: [2]f32, +}; + +const GpuVertex = extern struct { + pos: [4]f32, + uv: [2]f32, + color: [4]f32, }; const PipelineData = struct { @@ -190,16 +243,27 @@ const PipelineData = struct { stride: usize, position_attr: Pipeline.Attribute, color_attr: ?Pipeline.Attribute, + uv_attr: ?Pipeline.Attribute, projection_loc: i8, }; const MeshData = struct { pipeline: Pipeline.Handle, - ptr: ?[*]ConvertedVertex = null, + ptr: ?[*]u8 = null, len: usize = 0, capacity: usize = 0, }; +const TextureData = struct { + width: u32, + height: u32, + tex_width: u16, + tex_height: u16, + uv_scale: [2]f32, + upload_data: []align(16) u8, + tex: C3D_Tex, +}; + var render_alloc: std.mem.Allocator = undefined; var render_io: std.Io = undefined; @@ -210,10 +274,20 @@ var vsync_enabled: bool = true; var current_pipeline: Pipeline.Handle = 0; var current_proj: Mat4 = Mat4.identity(); var current_view: Mat4 = Mat4.identity(); +var uv_offset: [2]f32 = .{ 0.0, 0.0 }; +var depth_write_enabled: bool = true; var screen_projection: C3D_Mtx = undefined; +var fog_lut: C3D_FogLut = undefined; +var white_texture: C3D_Tex = undefined; +var white_texture_ready: bool = false; +var bound_texture: Texture.Handle = 0; +var draw_vbo_raw: ?*anyopaque = null; +var draw_vbo: ?[*]GpuVertex = null; +var draw_vbo_capacity: usize = 0; var pipelines = Util.CircularBuffer(PipelineData, 16).init(); var meshes = Util.CircularBuffer(MeshData, 2048).init(); +var textures = Util.CircularBuffer(TextureData, 64).init(); pub fn setup(alloc: std.mem.Allocator, io: std.Io) void { render_alloc = alloc; @@ -240,9 +314,11 @@ pub fn init() anyerror!void { Mtx_OrthoTilt(&screen_projection, 0.0, TOP_SCREEN_WIDTH, 0.0, TOP_SCREEN_HEIGHT, 0.0, 1.0, true); configure_fixed_attributes(); - configure_color_texenv(); + configure_texture_texenv(); + try init_white_texture(); C3D_CullFace(GPU_CULL_NONE); - C3D_DepthTest(false, GPU_ALWAYS, GPU_WRITE_COLOR); + apply_depth_state(); + C3D_FogGasMode(GPU_NO_FOG, GPU_PLAIN_DENSITY, false); set_alpha_blend(true); initialized = true; @@ -251,7 +327,15 @@ pub fn init() anyerror!void { pub fn deinit() void { destroy_all_meshes(); destroy_all_pipelines(); + destroy_all_textures(); + free_draw_vbo(); current_pipeline = 0; + bound_texture = 0; + + if (white_texture_ready) { + C3D_TexDelete(&white_texture); + white_texture_ready = false; + } if (target) |t| { C3D_RenderTargetDelete(t); @@ -281,15 +365,39 @@ pub fn set_alpha_blend(enabled: bool) void { } pub fn set_depth_write(enabled: bool) void { - C3D_DepthTest(enabled, GPU_ALWAYS, if (enabled) GPU_WRITE_COLOR | 0x10 else GPU_WRITE_COLOR); + depth_write_enabled = enabled; + apply_depth_state(); +} + +pub fn set_fog(enabled: bool, start: f32, end: f32, r: f32, g: f32, b: f32) void { + if (!enabled or end <= start) { + C3D_FogGasMode(GPU_NO_FOG, GPU_PLAIN_DENSITY, false); + return; + } + + var data: [256]f32 = undefined; + for (&data, 0..) |*v, i| { + const z = @as(f32, @floatFromInt(i)) / 255.0; + v.* = @max(0.0, @min(1.0, (z - start) / (end - start))); + } + + FogLut_FromArray(&fog_lut, &data); + C3D_FogColor((@as(u32, floatByte(r)) << 16) | + (@as(u32, floatByte(g)) << 8) | + @as(u32, floatByte(b))); + C3D_FogGasMode(GPU_FOG, GPU_PLAIN_DENSITY, false); + C3D_FogLutBind(&fog_lut); } -pub fn set_fog(_: bool, _: f32, _: f32, _: f32, _: f32, _: f32) void {} pub fn set_clip_planes(_: bool) void {} + pub fn set_culling(enabled: bool) void { - C3D_CullFace(if (enabled) 2 else GPU_CULL_NONE); + C3D_CullFace(if (enabled) GPU_CULL_BACK_CCW else GPU_CULL_NONE); +} + +pub fn set_uv_offset(u: f32, v: f32) void { + uv_offset = .{ u, v }; } -pub fn set_uv_offset(_: f32, _: f32) void {} pub fn set_proj_matrix(mat: *const Mat4) void { current_proj = mat.*; @@ -352,6 +460,7 @@ pub fn create_pipeline(layout: Pipeline.VertexLayout, v_shader: ?[:0]align(4) co .stride = layout.stride, .position_attr = position_attr, .color_attr = find_attr(layout, .color), + .uv_attr = find_attr(layout, .uv), .projection_loc = projection_loc, }; @@ -385,40 +494,35 @@ pub fn destroy_mesh(handle: Mesh.Handle) void { pub fn update_mesh(handle: Mesh.Handle, data: []const u8) void { var mesh = meshes.get_element(handle) orelse return; - const pl = pipelines.get_element(mesh.pipeline) orelse return; - const vertex_count = if (pl.stride == 0) 0 else data.len / pl.stride; - if (vertex_count > mesh.capacity) { + if (data.len > mesh.capacity) { free_mesh_vertices(&mesh); - const verts = render_alloc.alloc(ConvertedVertex, vertex_count) catch { + const bytes = render_alloc.alloc(u8, data.len) catch { mesh.len = 0; mesh.capacity = 0; meshes.update_element(handle, mesh); return; }; - mesh.ptr = verts.ptr; - mesh.capacity = verts.len; + mesh.ptr = bytes.ptr; + mesh.capacity = bytes.len; } if (mesh.ptr) |ptr| { - const dst = ptr[0..mesh.capacity]; - for (0..vertex_count) |i| { - const src = data[i * pl.stride ..][0..pl.stride]; - dst[i] = convert_vertex(src, pl); - } + @memcpy(ptr[0..data.len], data); } - mesh.len = vertex_count; + mesh.len = data.len; meshes.update_element(handle, mesh); } pub fn draw_mesh(handle: Mesh.Handle, model: *const Mat4, count: usize, primitive: Mesh.Primitive) void { - if (!initialized or primitive != .triangles) return; + if (!initialized) return; const mesh = meshes.get_element(handle) orelse return; const ptr = mesh.ptr orelse return; const pipeline_handle = if (current_pipeline != 0) current_pipeline else mesh.pipeline; var pl = pipelines.get_element(pipeline_handle) orelse return; - const draw_count = @min(count, mesh.len); + const available_count = if (pl.stride == 0) 0 else mesh.len / pl.stride; + const draw_count = @min(count, available_count); if (draw_count == 0) return; const view_proj = Mat4.mul(current_view, current_proj); @@ -426,45 +530,138 @@ pub fn draw_mesh(handle: Mesh.Handle, model: *const Mat4, count: usize, primitiv C3D_BindProgram(&pl.program); configure_fixed_attributes(); + configure_texture_texenv(); + bind_current_texture_for_draw(); upload_matrix_uniform(pl.projection_loc, &screen_projection); - C3D_ImmDrawBegin(GPU_TRIANGLES); - for (ptr[0..draw_count]) |vertex| { - const pos = clip_to_screen(transform_pos(vertex.pos, &mvp)); - C3D_ImmSendAttrib(pos[0], pos[1], pos[2], pos[3]); - C3D_ImmSendAttrib(vertex.color[0], vertex.color[1], vertex.color[2], vertex.color[3]); + const vbo_count = switch (primitive) { + .triangles => draw_count, + .lines => (draw_count / 2) * 6, + }; + if (vbo_count == 0) return; + const out = prepare_draw_vbo(vbo_count) orelse return; + + var written_count: usize = 0; + switch (primitive) { + .triangles => { + for (0..draw_count) |i| { + const vertex = decode_mesh_vertex(ptr, i, pl); + out[i] = to_gpu_vertex(to_screen_vertex(vertex, &mvp)); + } + written_count = draw_count; + }, + .lines => { + var src_i: usize = 0; + var dst_i: usize = 0; + while (src_i + 1 < draw_count) : (src_i += 2) { + const a = decode_mesh_vertex(ptr, src_i, pl); + const b = decode_mesh_vertex(ptr, src_i + 1, pl); + dst_i = write_line_segment(out, dst_i, a, b, &mvp); + } + written_count = dst_i; + }, } - C3D_ImmDrawEnd(); + if (written_count == 0) return; + + const draw_vertices = out[0..written_count]; + flush_draw_vbo(draw_vertices); + configure_draw_buffer(out.ptr); + C3D_DrawArrays(GPU_TRIANGLES, 0, @intCast(draw_vertices.len)); } -pub fn create_texture(_: u32, _: u32, _: []align(16) u8) anyerror!Texture.Handle { - return 0; +pub fn create_texture(width: u32, height: u32, data: []align(16) u8) anyerror!Texture.Handle { + const expand_small = width < MIN_TEXTURE_SIZE or height < MIN_TEXTURE_SIZE; + const tex_width: u16 = if (expand_small) @intCast(SMALL_TEXTURE_EXPAND_SIZE) else try texture_dim(width); + const tex_height: u16 = if (expand_small) @intCast(SMALL_TEXTURE_EXPAND_SIZE) else try texture_dim(height); + const upload_len = @as(usize, tex_width) * @as(usize, tex_height) * TEXTURE_BPP; + const upload_data = try render_alloc.alignedAlloc(u8, .fromByteUnits(16), upload_len); + errdefer render_alloc.free(upload_data); + + var tex: C3D_Tex = undefined; + if (!tex_init(&tex, tex_width, tex_height, false)) return error.TextureCreateFailed; + errdefer C3D_TexDelete(&tex); + tex_set_default_params(&tex); + + convert_texture_data(upload_data, data, width, height, tex_width, tex_height, expand_small); + tex_upload(&tex, upload_data); + + const handle = textures.add_element(.{ + .width = width, + .height = height, + .tex_width = tex_width, + .tex_height = tex_height, + .uv_scale = if (expand_small) .{ 1.0, 1.0 } else .{ + @as(f32, @floatFromInt(width)) / @as(f32, @floatFromInt(tex_width)), + @as(f32, @floatFromInt(height)) / @as(f32, @floatFromInt(tex_height)), + }, + .upload_data = upload_data, + .tex = tex, + }) orelse return error.OutOfTextures; + + return @intCast(handle); +} + +pub fn update_texture(handle: Texture.Handle, data: []align(16) u8) void { + const tex = get_texture_ptr(handle) orelse return; + const expand_small = tex.width < MIN_TEXTURE_SIZE or tex.height < MIN_TEXTURE_SIZE; + convert_texture_data(tex.upload_data, data, tex.width, tex.height, tex.tex_width, tex.tex_height, expand_small); + tex_upload(&tex.tex, tex.upload_data); +} + +pub fn bind_texture(handle: Texture.Handle) void { + bound_texture = if (get_texture_ptr(handle) != null) handle else 0; +} + +pub fn destroy_texture(handle: Texture.Handle) void { + const tex = get_texture_ptr(handle) orelse return; + C3D_TexDelete(&tex.tex); + render_alloc.free(tex.upload_data); + _ = textures.remove_element(handle); + if (bound_texture == handle) bound_texture = 0; } -pub fn update_texture(_: Texture.Handle, _: []align(16) u8) void {} -pub fn bind_texture(_: Texture.Handle) void {} -pub fn destroy_texture(_: Texture.Handle) void {} pub fn force_texture_resident(_: Texture.Handle) void {} fn render_target_clear(t: *C3D_RenderTarget, bits: c_int, color: u32, depth: u32) void { C3D_FrameBufClear(&t.frameBuf, bits, color, depth); } +fn apply_depth_state() void { + const depth_mask: c_int = if (depth_write_enabled) GPU_WRITE_DEPTH else 0; + const mask: c_int = GPU_WRITE_COLOR | depth_mask; + C3D_DepthTest(true, GPU_GEQUAL, mask); +} + fn configure_fixed_attributes() void { const attr = C3D_GetAttrInfo(); AttrInfo_Init(attr); _ = AttrInfo_AddLoader(attr, 0, GPU_FLOAT, 4); - _ = AttrInfo_AddLoader(attr, 1, GPU_FLOAT, 4); + _ = AttrInfo_AddLoader(attr, 1, GPU_FLOAT, 2); + _ = AttrInfo_AddLoader(attr, 2, GPU_FLOAT, 4); +} + +fn configure_draw_buffer(ptr: [*]GpuVertex) void { + const buf = C3D_GetBufInfo(); + BufInfo_Init(buf); + _ = BufInfo_Add(buf, @ptrCast(&ptr[0]), @intCast(@sizeOf(GpuVertex)), 3, 0x210); } -fn configure_color_texenv() void { +fn configure_texture_texenv() void { const env = C3D_GetTexEnv(0); + const src = if (DEBUG_UV_AS_COLOR) + tev_sources(GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR) + else if (DEBUG_TEXTURE_ONLY) + tev_sources(GPU_TEXTURE0, GPU_TEXTURE0, GPU_TEXTURE0) + else + tev_sources(GPU_TEXTURE0, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR); + const func = if (DEBUG_UV_AS_COLOR or DEBUG_TEXTURE_ONLY) GPU_REPLACE else GPU_MODULATE; + env.* = .{ - .srcRgb = GPU_PRIMARY_COLOR, - .srcAlpha = GPU_PRIMARY_COLOR, + .srcRgb = src, + .srcAlpha = src, .opAll = 0, - .funcRgb = GPU_REPLACE, - .funcAlpha = GPU_REPLACE, + .funcRgb = func, + .funcAlpha = func, .color = 0xFFFFFFFF, .scaleRgb = GPU_TEVSCALE_1, .scaleAlpha = GPU_TEVSCALE_1, @@ -472,6 +669,21 @@ fn configure_color_texenv() void { C3D_DirtyTexEnv(env); } +fn tev_sources(a: u16, b: u16, c: u16) u16 { + return a | (b << 4) | (c << 8); +} + +fn bind_current_texture_for_draw() void { + if (get_texture_ptr(bound_texture)) |tex| { + C3D_TexBind(0, &tex.tex); + return; + } + + if (white_texture_ready) { + C3D_TexBind(0, &white_texture); + } +} + fn upload_matrix_uniform(loc: i8, mat: *const C3D_Mtx) void { if (loc < 0) return; @@ -505,6 +717,47 @@ fn destroy_all_meshes() void { meshes.clear(); } +fn destroy_all_textures() void { + for (&textures.buffer) |*slot| { + if (slot.*) |*tex| { + C3D_TexDelete(&tex.tex); + render_alloc.free(tex.upload_data); + slot.* = null; + } + } + textures.clear(); +} + +fn prepare_draw_vbo(count: usize) ?[]GpuVertex { + if (count > draw_vbo_capacity) { + free_draw_vbo(); + + const bytes = count * @sizeOf(GpuVertex); + const mem = linearAlloc(bytes) orelse return null; + const aligned: *align(@alignOf(GpuVertex)) anyopaque = @alignCast(mem); + const ptr: [*]GpuVertex = @ptrCast(aligned); + draw_vbo_raw = mem; + draw_vbo = ptr; + draw_vbo_capacity = count; + } + + const ptr = draw_vbo orelse return null; + return ptr[0..count]; +} + +fn flush_draw_vbo(vertices: []GpuVertex) void { + _ = GSPGPU_FlushDataCache(@ptrCast(&vertices[0]), @intCast(vertices.len * @sizeOf(GpuVertex))); +} + +fn free_draw_vbo() void { + if (draw_vbo_raw) |mem| { + linearFree(mem); + } + draw_vbo_raw = null; + draw_vbo = null; + draw_vbo_capacity = 0; +} + fn free_mesh_vertices(mesh: *MeshData) void { if (mesh.ptr) |ptr| { render_alloc.free(ptr[0..mesh.capacity]); @@ -521,10 +774,26 @@ fn find_attr(layout: Pipeline.VertexLayout, usage: Pipeline.AttributeUsage) ?Pip return null; } +fn decode_mesh_vertex(ptr: [*]const u8, index: usize, pl: PipelineData) ConvertedVertex { + const src = ptr[index * pl.stride ..][0..pl.stride]; + return convert_vertex(src, pl); +} + fn convert_vertex(src: []const u8, pl: PipelineData) ConvertedVertex { return .{ .pos = decode_vec4(src, pl.position_attr, .{ 0.0, 0.0, 0.0, 1.0 }), .color = if (pl.color_attr) |attr| decode_color(src, attr) else .{ 1.0, 1.0, 1.0, 1.0 }, + .uv = if (pl.uv_attr) |attr| decode_vec2(src, attr, .{ 0.0, 0.0 }) else .{ 0.0, 0.0 }, + }; +} + +fn decode_vec2(src: []const u8, attr: Pipeline.Attribute, default: [2]f32) [2]f32 { + const off = attr.offset; + return switch (attr.format) { + .f32x2, .f32x3 => .{ read_f32(src, off, default[0]), read_f32(src, off + 4, default[1]) }, + .unorm8x2, .unorm8x4 => .{ read_u8_norm(src, off, default[0]), read_u8_norm(src, off + 1, default[1]) }, + .unorm16x2, .unorm16x3 => .{ read_u16_norm(src, off, default[0]), read_u16_norm(src, off + 2, default[1]) }, + .snorm16x2, .snorm16x3 => .{ read_i16_norm(src, off, default[0]), read_i16_norm(src, off + 2, default[1]) }, }; } @@ -555,6 +824,69 @@ fn decode_color(src: []const u8, attr: Pipeline.Attribute) [4]f32 { }; } +const ScreenVertex = struct { + pos: [4]f32, + color: [4]f32, + uv: [2]f32, +}; + +fn to_screen_vertex(vertex: ConvertedVertex, mvp: *const Mat4) ScreenVertex { + return .{ + .pos = clip_to_screen(transform_pos(vertex.pos, mvp)), + .color = vertex.color, + .uv = transform_uv(vertex.uv), + }; +} + +fn transform_uv(uv: [2]f32) [2]f32 { + const texture_scale = if (get_texture_ptr(bound_texture)) |tex| tex.uv_scale else .{ 1.0, 1.0 }; + return .{ + (uv[0] + uv_offset[0]) * texture_scale[0], + (uv[1] + uv_offset[1]) * texture_scale[1], + }; +} + +fn to_gpu_vertex(vertex: ScreenVertex) GpuVertex { + return .{ + .pos = vertex.pos, + .uv = vertex.uv, + .color = vertex.color, + }; +} + +fn write_line_segment(dst: []GpuVertex, index: usize, a: ConvertedVertex, b: ConvertedVertex, mvp: *const Mat4) usize { + const av = to_screen_vertex(a, mvp); + const bv = to_screen_vertex(b, mvp); + const dx = bv.pos[0] - av.pos[0]; + const dy = bv.pos[1] - av.pos[1]; + const len_sq = dx * dx + dy * dy; + if (len_sq <= 0.000001) return index; + + const inv_len = 1.0 / @sqrt(len_sq); + const nx = -dy * inv_len * (LINE_WIDTH * 0.5); + const ny = dx * inv_len * (LINE_WIDTH * 0.5); + + const a0 = offset_screen_vertex(av, nx, ny); + const a1 = offset_screen_vertex(av, -nx, -ny); + const b0 = offset_screen_vertex(bv, nx, ny); + const b1 = offset_screen_vertex(bv, -nx, -ny); + + dst[index + 0] = to_gpu_vertex(a0); + dst[index + 1] = to_gpu_vertex(a1); + dst[index + 2] = to_gpu_vertex(b0); + dst[index + 3] = to_gpu_vertex(b0); + dst[index + 4] = to_gpu_vertex(a1); + dst[index + 5] = to_gpu_vertex(b1); + return index + 6; +} + +fn offset_screen_vertex(vertex: ScreenVertex, dx: f32, dy: f32) ScreenVertex { + var out = vertex; + out.pos[0] += dx; + out.pos[1] += dy; + return out; +} + fn transform_pos(pos: [4]f32, mat: *const Mat4) [4]f32 { return .{ pos[0] * mat.data[0][0] + pos[1] * mat.data[1][0] + pos[2] * mat.data[2][0] + pos[3] * mat.data[3][0], @@ -578,6 +910,118 @@ fn clip_to_screen(pos: [4]f32) [4]f32 { }; } +fn init_white_texture() !void { + if (white_texture_ready) return; + + var data align(16) = [_]u8{0xFF} ** (MIN_TEXTURE_SIZE * MIN_TEXTURE_SIZE * TEXTURE_BPP); + if (!tex_init(&white_texture, MIN_TEXTURE_SIZE, MIN_TEXTURE_SIZE, false)) { + return error.TextureCreateFailed; + } + errdefer C3D_TexDelete(&white_texture); + + tex_set_default_params(&white_texture); + tex_upload(&white_texture, data[0..]); + white_texture_ready = true; +} + +fn tex_init(tex: *C3D_Tex, width: u16, height: u16, vram: bool) bool { + return C3D_TexInitWithParams(tex, null, tex_init_params(width, height, 0, GPU_RGBA8, GPU_TEX_2D, vram)); +} + +fn tex_upload(tex: *C3D_Tex, data: []align(16) const u8) void { + _ = GSPGPU_FlushDataCache(data.ptr, @intCast(data.len)); + C3D_TexLoadImage(tex, data.ptr, GPU_TEXFACE_2D, 0); +} + +fn tex_set_default_params(tex: *C3D_Tex) void { + tex.param &= ~(gpu_texture_mag_filter(GPU_LINEAR) | gpu_texture_min_filter(GPU_LINEAR)); + tex.param |= gpu_texture_mag_filter(GPU_NEAREST) | gpu_texture_min_filter(GPU_NEAREST); + tex.param &= ~(gpu_texture_wrap_s(3) | gpu_texture_wrap_t(3)); + tex.param |= gpu_texture_wrap_s(GPU_REPEAT) | gpu_texture_wrap_t(GPU_REPEAT); +} + +fn tex_init_params(width: u16, height: u16, max_level: u8, format: u8, tex_type: u8, vram: bool) u64 { + const flags0: u8 = (max_level & 0x0F) | ((format & 0x0F) << 4); + const flags1: u8 = (tex_type & 0x07) | (@as(u8, @intFromBool(vram)) << 3); + return @as(u64, width) | + (@as(u64, height) << 16) | + (@as(u64, flags0) << 32) | + (@as(u64, flags1) << 40); +} + +fn gpu_texture_mag_filter(v: u32) u32 { + return (v & 0x1) << 1; +} + +fn gpu_texture_min_filter(v: u32) u32 { + return (v & 0x1) << 2; +} + +fn gpu_texture_wrap_s(v: u32) u32 { + return (v & 0x3) << 12; +} + +fn gpu_texture_wrap_t(v: u32) u32 { + return (v & 0x3) << 8; +} + +fn get_texture_ptr(handle: Texture.Handle) ?*TextureData { + if (handle == 0 or handle >= textures.buffer.len) return null; + if (textures.buffer[handle]) |*tex| return tex; + return null; +} + +fn texture_dim(value: u32) !u16 { + if (value == 0 or value > MAX_TEXTURE_SIZE) return error.InvalidTextureSize; + + var out: u32 = MIN_TEXTURE_SIZE; + while (out < value) : (out <<= 1) {} + if (out > MAX_TEXTURE_SIZE) return error.InvalidTextureSize; + return @intCast(out); +} + +fn convert_texture_data(dst: []align(16) u8, src: []const u8, width: u32, height: u32, tex_width: u16, tex_height: u16, expand_small: bool) void { + const source_len = @as(usize, width) * @as(usize, height) * TEXTURE_BPP; + if (src.len < source_len) return; + + const tw: u32 = tex_width; + const th: u32 = tex_height; + for (0..th) |y| { + const sy = if (expand_small) + @min((@as(u32, @intCast(y)) * height) / th, height - 1) + else + @min(@as(u32, @intCast(y)), height - 1); + for (0..tw) |x| { + const sx = if (expand_small) + @min((@as(u32, @intCast(x)) * width) / tw, width - 1) + else + @min(@as(u32, @intCast(x)), width - 1); + const src_off = (@as(usize, sy) * width + sx) * TEXTURE_BPP; + const dst_off = tiled_pixel_offset(@intCast(x), @intCast(y), tw) * TEXTURE_BPP; + dst[dst_off + 0] = src[src_off + 3]; + dst[dst_off + 1] = src[src_off + 2]; + dst[dst_off + 2] = src[src_off + 1]; + dst[dst_off + 3] = src[src_off + 0]; + } + } +} + +fn tiled_pixel_offset(x: u32, y: u32, width: u32) usize { + const tile_x = x & ~@as(u32, 7); + const tile_y = y & ~@as(u32, 7); + const tile_base = tile_y * width + tile_x * 8; + return @intCast(tile_base + morton8(x & 7, y & 7)); +} + +fn morton8(x: u32, y: u32) u32 { + return (x & 1) | + ((y & 1) << 1) | + ((x & 2) << 1) | + ((y & 2) << 2) | + ((x & 4) << 2) | + ((y & 4) << 3); +} + fn read_f32(src: []const u8, offset: usize, default: f32) f32 { if (offset + 4 > src.len) return default; const bits = std.mem.readInt(u32, src[offset..][0..4], .little); diff --git a/src/platform/3ds/time.zig b/src/platform/3ds/time.zig index 8e39b9c..681986e 100644 --- a/src/platform/3ds/time.zig +++ b/src/platform/3ds/time.zig @@ -8,13 +8,20 @@ const Timespec = extern struct { extern fn clock_gettime(clock_id: c_int, tp: *Timespec) c_int; extern fn clock_getres(clock_id: c_int, res: *Timespec) c_int; extern fn svcSleepThread(ns: i64) void; +extern fn svcGetSystemTick() u64; const ns_per_s: u64 = std.time.ns_per_s; const max_i64_ns: i64 = std.math.maxInt(i64); const CLOCK_REALTIME: c_int = 1; const CLOCK_MONOTONIC: c_int = 4; +const SYSCLOCK_ARM11: u128 = 268_111_856; pub fn now(clock: std.Io.Clock) std.Io.Timestamp { + switch (clock) { + .awake, .boot => return .fromNanoseconds(systemTickNanoseconds(svcGetSystemTick())), + else => {}, + } + const id = clockId(clock) orelse std.debug.panic("3ds std.Io clock {s} is not implemented", .{@tagName(clock)}); var ts: Timespec = undefined; if (clock_gettime(id, &ts) != 0) { @@ -24,6 +31,11 @@ pub fn now(clock: std.Io.Clock) std.Io.Timestamp { } pub fn clockResolution(clock: std.Io.Clock) std.Io.Clock.ResolutionError!std.Io.Duration { + switch (clock) { + .awake, .boot => return .fromNanoseconds(@intCast((@as(u128, ns_per_s) + SYSCLOCK_ARM11 - 1) / SYSCLOCK_ARM11)), + else => {}, + } + const id = clockId(clock) orelse return error.ClockUnavailable; var ts: Timespec = undefined; if (clock_getres(id, &ts) != 0) return error.ClockUnavailable; @@ -40,6 +52,11 @@ fn clockId(clock: std.Io.Clock) ?c_int { }; } +fn systemTickNanoseconds(ticks: u64) i64 { + const ns = (@as(u128, ticks) * @as(u128, ns_per_s)) / SYSCLOCK_ARM11; + return @intCast(@min(ns, @as(u128, @intCast(max_i64_ns)))); +} + fn timespecNanoseconds(ts: Timespec) i64 { if (ts.tv_sec <= 0) return @max(0, @as(i64, @intCast(ts.tv_nsec))); diff --git a/test/main.zig b/test/main.zig index fc025d7..d830fd5 100644 --- a/test/main.zig +++ b/test/main.zig @@ -121,13 +121,13 @@ const MyState = struct { if (!Audio.enabled) return; // -- background music -- - self.music_data = try load_wav(engine, "calm1.wav"); - self.music_reader = .fixed(self.music_data); - const music_stream = try Audio.wav.open(&self.music_reader); - _ = try Audio.play(music_stream, .{ .priority = .critical }); + // self.music_data = try load_wav(engine, "calm1.wav"); + // self.music_reader = .fixed(self.music_data); + // const music_stream = try Audio.wav.open(&self.music_reader); + // _ = try Audio.play(music_stream, .{ .priority = .critical }); - // -- spatial SFX data -- - self.grass_data = try load_wav(engine, "grass1.wav"); + // // -- spatial SFX data -- + // self.grass_data = try load_wav(engine, "grass1.wav"); // Listener at origin, facing -Z Audio.set_listener(Vec3.zero(), Vec3.new(0, 0, -1), Vec3.new(0, 1, 0)); @@ -163,11 +163,11 @@ const MyState = struct { const pos = Vec3.new(@cos(angle) * dist, 0, @sin(angle) * dist); self.grass_readers[i] = .fixed(self.grass_data); - const stream = Audio.wav.open(&self.grass_readers[i]) catch return; - _ = Audio.play_at(stream, pos, .{ - .ref_distance = 1.0, - .max_distance = 25.0, - }) catch return; + // const stream = Audio.wav.open(&self.grass_readers[i]) catch return; + // _ = Audio.play_at(stream, pos, .{ + // .ref_distance = 1.0, + // .max_distance = 25.0, + // }) catch return; Util.game_logger.info("grass at ({d:.1}, 0, {d:.1}) dist={d:.1}", .{ pos.x, pos.z, dist }); } From 124596fe07b665013c9a292e0f5fc78a13d3550d Mon Sep 17 00:00:00 2001 From: Nathan Bourgeois Date: Sat, 30 May 2026 21:00:04 -0400 Subject: [PATCH 12/44] Fix file IO --- build.zig | 19 ++- src/core/paths.zig | 15 ++- src/platform/3ds/paths.zig | 15 +++ src/platform/3ds/services.zig | 18 ++- src/platform/c_io.zig | 226 ++++++++++++++++++++++++++++++---- src/platform/switch/paths.zig | 15 ++- src/rendering/texture.zig | 8 +- src/util/logger.zig | 2 +- test/main.zig | 43 +++---- 9 files changed, 302 insertions(+), 59 deletions(-) diff --git a/build.zig b/build.zig index d314f33..2dad327 100644 --- a/build.zig +++ b/build.zig @@ -97,7 +97,7 @@ pub const Config = struct { // bugged there. Flip back to `.default` with `-Daudio=default` once // that's fixed. const default_audio: Audio = switch (plat) { - .macos, .nintendo_3ds => .none, + .macos => .none, else => .default, }; @@ -190,12 +190,13 @@ fn devkitProPath(b: *std.Build) []const u8 { /// the actual build steps and executable. pub fn addGame(owner: *std.Build, b: *std.Build, opts: GameOptions) *std.Build.Step.Compile { const config = Config.resolve(opts.target, opts.overrides); + const uses_nintendo_c_io = config.platform == .nintendo_3ds or config.platform == .nintendo_switch; // 3DS and Switch force ofmt=c — there's no Zig-native backend for // either Horizon target yet, so we emit C and let an external // toolchain (devkitARM/libctru on 3DS, devkitA64/libnx on Switch) // compile the result. - const target = if (config.platform == .nintendo_3ds or config.platform == .nintendo_switch) blk: { + const target = if (uses_nintendo_c_io) blk: { var q = opts.target.query; q.ofmt = .c; break :blk b.resolveTargetQuery(q); @@ -314,7 +315,7 @@ pub fn addGame(owner: *std.Build, b: *std.Build, opts: GameOptions) *std.Build.S exe.subsystem = .windows; } - if (config.platform == .nintendo_3ds or config.platform == .nintendo_switch) { + if (uses_nintendo_c_io) { // std/start.zig opts `.@"3ds"` and freestanding out of // exporting a default entry symbol, so without an explicit // entry the linker DCEs `main` and the emitted C is @@ -342,9 +343,10 @@ pub fn addHeadless(owner: *std.Build, b: *std.Build, opts: HeadlessOptions) *std var config = Config.resolve(opts.target, opts.overrides); config.gfx = .headless; config.audio = .none; + const uses_nintendo_c_io = config.platform == .nintendo_3ds or config.platform == .nintendo_switch; // 3DS and Switch force ofmt=c (see addGame for details). - const target = if (config.platform == .nintendo_3ds or config.platform == .nintendo_switch) blk: { + const target = if (uses_nintendo_c_io) blk: { var q = opts.target.query; q.ofmt = .c; break :blk b.resolveTargetQuery(q); @@ -394,6 +396,10 @@ pub fn addHeadless(owner: *std.Build, b: *std.Build, opts: HeadlessOptions) *std exe.setLinkerScript(pd.path("tools/linkfile.ld")); } + if (uses_nintendo_c_io) { + exe.entry = .{ .symbol_name = "main" }; + } + return exe; } @@ -1285,6 +1291,11 @@ pub fn build(b: *std.Build) void { }, .smdh_long_description = "Aether engine test app", .smdh_author = "Aether", + .resources = &.{ + .{ .path = b.path("test/test.png"), .name = "test.png" }, + .{ .path = b.path("test/calm1.wav"), .name = "calm1.wav" }, + .{ .path = b.path("test/grass1.wav"), .name = "grass1.wav" }, + }, .romfs = if (config.platform == .nintendo_3ds) nintendo_romfs.getDirectory() else null, .switch_romfs = if (config.platform == .nintendo_switch) nintendo_romfs.getDirectory() else null, }); diff --git a/src/core/paths.zig b/src/core/paths.zig index 377a8bf..b64905a 100644 --- a/src/core/paths.zig +++ b/src/core/paths.zig @@ -31,17 +31,25 @@ const NintendoIo = if (options.config.platform == .nintendo_3ds or options.confi @import("../platform/c_io.zig") else void; +const PathDir = if (NintendoIo != void) NintendoIo.AppDir else Io.Dir; /// Engine-owned directory handles. Cleared via `close()` at engine shutdown. pub const Dirs = struct { /// Read-only assets shipped with the app. On platforms where the /// concept doesn't apply, points at CWD. - resources: Io.Dir, + resources: PathDir, /// User-writable persistent state. On platforms where the concept /// doesn't apply, points at CWD (same handle as `resources`). - data: Io.Dir, + data: PathDir, pub fn close(self: *Dirs, io: Io) void { + if (NintendoIo != void) { + self.resources.close(io); + if (!self.data.eql(self.resources)) self.data.close(io); + NintendoIo.deinitAppDirs(); + return; + } + // std.Io.Dir.cwd() docs: "Closing the returned Dir is checked // illegal behavior." On CWD-fallback platforms (use_cwd, PSP, // unsupported OS) either handle may be the cwd sentinel, so skip @@ -86,6 +94,7 @@ pub fn resolve( // feature, not a bug. if (options.config.use_cwd) { if (NintendoIo != void) NintendoIo.useCwdDirs(); + if (NintendoIo != void) return .{ .resources = NintendoIo.cwdDir(), .data = NintendoIo.cwdDir() }; return .{ .resources = Io.Dir.cwd(), .data = Io.Dir.cwd() }; } @@ -107,7 +116,7 @@ pub fn resolve( fn resolve_nintendo(app_name: []const u8) Error!Dirs { try NintendoIo.initAppDirs(app_name); - return .{ .resources = Io.Dir.cwd(), .data = Io.Dir.cwd() }; + return .{ .resources = NintendoIo.resourcesDir(), .data = NintendoIo.dataDir() }; } // -- macOS -------------------------------------------------------------------- diff --git a/src/platform/3ds/paths.zig b/src/platform/3ds/paths.zig index fed61a9..38d1cb2 100644 --- a/src/platform/3ds/paths.zig +++ b/src/platform/3ds/paths.zig @@ -1,11 +1,26 @@ const std = @import("std"); +extern fn archiveMountSdmc() u32; +extern fn archiveUnmount(name: [*:0]const u8) u32; extern fn romfsMountSelf(name: [*:0]const u8) u32; +extern fn romfsUnmount(name: [*:0]const u8) u32; + +pub fn mountData() bool { + return archiveMountSdmc() == 0; +} + +pub fn unmountData() void { + _ = archiveUnmount("sdmc"); +} pub fn mountResources() bool { return romfsMountSelf("romfs") == 0; } +pub fn unmountResources() void { + _ = romfsUnmount("romfs"); +} + pub fn dataRoot(buffer: []u8, app_name: []const u8) error{NameTooLong}![]const u8 { return std.fmt.bufPrint(buffer, "sdmc:/3ds/{s}", .{app_name}) catch error.NameTooLong; } diff --git a/src/platform/3ds/services.zig b/src/platform/3ds/services.zig index e3572e5..2ba70f7 100644 --- a/src/platform/3ds/services.zig +++ b/src/platform/3ds/services.zig @@ -93,11 +93,27 @@ fn entry() callconv(.c) c_int { const init = process_init.makeInit(.{ .vector = &argv }); @import("root").main(init) catch |err| { - fatal("Aether main returned error.{s} at 0x{x}", .{ @errorName(err), @returnAddress() }); + fatalMainError(err, @errorReturnTrace(), @returnAddress()); }; return 0; } +fn fatalMainError(err: anyerror, maybe_trace: ?*std.builtin.StackTrace, fallback_addr: usize) noreturn { + if (maybe_trace) |trace| { + const len = @min(trace.instruction_addresses.len, trace.index); + const addrs = trace.instruction_addresses[0..@min(len, 4)]; + switch (addrs.len) { + 0 => {}, + 1 => fatal("Aether main returned error.{s} at 0x{x}", .{ @errorName(err), addrs[0] }), + 2 => fatal("Aether main returned error.{s} at 0x{x} 0x{x}", .{ @errorName(err), addrs[0], addrs[1] }), + 3 => fatal("Aether main returned error.{s} at 0x{x} 0x{x} 0x{x}", .{ @errorName(err), addrs[0], addrs[1], addrs[2] }), + else => fatal("Aether main returned error.{s} at 0x{x} 0x{x} 0x{x} 0x{x}", .{ @errorName(err), addrs[0], addrs[1], addrs[2], addrs[3] }), + } + } + + fatal("Aether main returned error.{s} at 0x{x}", .{ @errorName(err), fallback_addr }); +} + pub fn panic(msg: []const u8, _: ?*std.builtin.StackTrace, first_trace_addr: ?usize) noreturn { @branchHint(.cold); diff --git a/src/platform/c_io.zig b/src/platform/c_io.zig index d389f5c..855b137 100644 --- a/src/platform/c_io.zig +++ b/src/platform/c_io.zig @@ -38,6 +38,7 @@ const O_RDWR: c_int = 2; const O_CREAT: c_int = 0x0200; const O_TRUNC: c_int = 0x0400; const O_EXCL: c_int = 0x0800; +const O_BINARY: c_int = 0x10000; const SEEK_SET: c_int = 0; const SEEK_CUR: c_int = 1; @@ -52,6 +53,54 @@ var resource_root_buffer: [max_path_bytes:0]u8 = @splat(0); var data_root_buffer: [max_path_bytes:0]u8 = @splat(0); var resource_root_len: usize = 0; var data_root_len: usize = 0; +var resources_mounted = false; +var data_mounted = false; + +const max_dynamic_dirs = 16; + +const DirSlot = struct { + used: bool = false, + read_only: bool = false, + path: [max_path_bytes:0]u8 = @splat(0), + len: usize = 0, +}; +var dir_slots: [max_dynamic_dirs]DirSlot = [_]DirSlot{.{}} ** max_dynamic_dirs; + +const AppDirKind = enum { cwd, resources, data, dynamic }; + +/// Engine-facing directory token. `std.Io.Dir.Handle` is `void` on the +/// no-libc Nintendo targets, so resource/data identity has to live outside +/// the std dir handle. +pub const AppDir = struct { + kind: AppDirKind, + slot: usize = 0, + + pub fn eql(self: AppDir, other: AppDir) bool { + return self.kind == other.kind and (self.kind != .dynamic or self.slot == other.slot); + } + + pub fn openFile(self: AppDir, io_arg: Io, sub_path: []const u8, flags: std.Io.Dir.OpenFileOptions) File.OpenError!File { + _ = io_arg; + return appDirOpenFile(self, sub_path, flags); + } + + pub fn createFile(self: AppDir, io_arg: Io, sub_path: []const u8, flags: std.Io.Dir.CreateFileOptions) File.OpenError!File { + _ = io_arg; + return appDirCreateFile(self, sub_path, flags); + } + + pub fn createDirPathOpen(self: AppDir, io_arg: Io, sub_path: []const u8, create_options: std.Io.Dir.CreateDirPathOpenOptions) std.Io.Dir.CreateDirPathOpenError!AppDir { + _ = io_arg; + return appDirCreateDirPathOpen(self, sub_path, create_options.open_options); + } + + pub fn close(self: AppDir, io_arg: Io) void { + _ = io_arg; + if (self.kind == .dynamic and self.slot < dir_slots.len) { + dir_slots[self.slot].used = false; + } + } +}; const vtable: Io.VTable = blk: { var v = Io.failing.vtable.*; @@ -62,8 +111,10 @@ const vtable: Io.VTable = blk: { v.swapCancelProtection = swapCancelProtection; v.checkCancel = checkCancel; v.operate = operate; + v.dirCreateDirPathOpen = dirCreateDirPathOpen; v.dirCreateFile = dirCreateFile; v.dirOpenFile = dirOpenFile; + v.dirClose = dirClose; v.fileStat = fileStat; v.fileLength = fileLength; v.fileClose = fileClose; @@ -92,12 +143,28 @@ pub fn io() Io { return .{ .userdata = null, .vtable = &vtable }; } +pub fn cwd() Dir { + return .{ .handle = if (@sizeOf(Dir.Handle) == 0) {} else @as(Dir.Handle, @intCast(-1)) }; +} + +pub fn cwdDir() AppDir { + return .{ .kind = .cwd }; +} + +pub fn resourcesDir() AppDir { + return .{ .kind = .resources }; +} + +pub fn dataDir() AppDir { + return .{ .kind = .data }; +} + pub fn initAppDirs(app_name: []const u8) Dir.CreateDirPathOpenError!void { - if (platform_paths.mountResources()) { - setResourceRoot("romfs:/") catch return error.NameTooLong; - } else { - setResourceRoot("") catch unreachable; - } + data_mounted = platform_paths.mountData(); + + resources_mounted = platform_paths.mountResources(); + errdefer deinitAppDirs(); + setResourceRoot("romfs:/") catch return error.NameTooLong; var data_buffer: [max_path_bytes]u8 = undefined; const data_root = platform_paths.dataRoot(&data_buffer, app_name) catch return error.NameTooLong; @@ -105,7 +172,23 @@ pub fn initAppDirs(app_name: []const u8) Dir.CreateDirPathOpenError!void { try ensureDirPath(data_root); } +pub fn deinitAppDirs() void { + for (&dir_slots) |*slot| slot.used = false; + setResourceRoot("") catch unreachable; + setDataRoot("") catch unreachable; + + if (resources_mounted) { + platform_paths.unmountResources(); + resources_mounted = false; + } + if (data_mounted) { + platform_paths.unmountData(); + data_mounted = false; + } +} + pub fn useCwdDirs() void { + deinitAppDirs(); setResourceRoot("") catch unreachable; setDataRoot("") catch unreachable; } @@ -130,7 +213,46 @@ fn operate(_: ?*anyopaque, operation: Io.Operation) Io.Cancelable!Io.Operation.R }; } -fn dirOpenFile(_: ?*anyopaque, _: Dir, sub_path: []const u8, flags: Dir.OpenFileOptions) File.OpenError!File { +fn dirCreateDirPathOpen( + _: ?*anyopaque, + dir: Dir, + sub_path: []const u8, + _: Dir.Permissions, + open_options: Dir.OpenOptions, +) Dir.CreateDirPathOpenError!Dir { + _ = dir; + _ = try appDirCreateDirPathOpen(cwdDir(), sub_path, open_options); + return cwd(); +} + +fn dirOpenFile(_: ?*anyopaque, dir: Dir, sub_path: []const u8, flags: Dir.OpenFileOptions) File.OpenError!File { + _ = dir; + return appDirOpenFile(cwdDir(), sub_path, flags); +} + +fn dirCreateFile(_: ?*anyopaque, dir: Dir, sub_path: []const u8, flags: Dir.CreateFileOptions) File.OpenError!File { + _ = dir; + return appDirCreateFile(cwdDir(), sub_path, flags); +} + +fn dirClose(_: ?*anyopaque, _: []const Dir) void {} + +fn appDirCreateDirPathOpen( + dir: AppDir, + sub_path: []const u8, + open_options: Dir.OpenOptions, +) Dir.CreateDirPathOpenError!AppDir { + if (!open_options.access_sub_paths or open_options.iterate or !open_options.follow_symlinks) + unsupported("dirCreateDirPathOpen option"); + if (writeDenied(dir, sub_path)) return error.ReadOnlyFileSystem; + + var path_buffer: [max_path_bytes:0]u8 = undefined; + const path = try rootedPathForDir(&path_buffer, dir, sub_path); + try ensureDirPath(path); + return registerDir(path, false); +} + +fn appDirOpenFile(dir: AppDir, sub_path: []const u8, flags: Dir.OpenFileOptions) File.OpenError!File { if (flags.lock != .none or flags.path_only or flags.allow_ctty or flags.resolve_beneath) unsupported("dirOpenFile option"); if (!flags.allow_directory or !flags.follow_symlinks) @@ -138,35 +260,30 @@ fn dirOpenFile(_: ?*anyopaque, _: Dir, sub_path: []const u8, flags: Dir.OpenFile const role: FileRole = switch (flags.mode) { .read_only => .read, .write_only => .write, - .read_write => unsupported("read-write file open on handle-less Nintendo target"), + .read_write => .read_write, }; + if (flags.mode != .read_only and writeDenied(dir, sub_path)) return error.ReadOnlyFileSystem; var path_buffer: [max_path_bytes:0]u8 = undefined; - const open_flags: c_int = switch (flags.mode) { + const open_flags: c_int = O_BINARY | switch (flags.mode) { .read_only => O_RDONLY, .write_only => O_WRONLY, .read_write => O_RDWR, }; - const path = try rootedPath(&path_buffer, sub_path, if (flags.mode == .read_only) resourceRoot() else dataRoot()); - var fd = c.open(path.ptr, open_flags, @as(c_int, 0)); - if (fd < 0 and flags.mode == .read_only and data_root_len > 0) { - const first_errno = errno(); - if (first_errno != 2) return openError(first_errno); - const fallback_path = try rootedPath(&path_buffer, sub_path, dataRoot()); - fd = c.open(fallback_path.ptr, open_flags, @as(c_int, 0)); - } + const path = try rootedPathForDir(&path_buffer, dir, sub_path); + const fd = c.open(path.ptr, open_flags, @as(c_int, 0)); if (fd < 0) return openError(errno()); errdefer _ = c.close(fd); return registerFile(fd, role); } -fn dirCreateFile(_: ?*anyopaque, _: Dir, sub_path: []const u8, flags: Dir.CreateFileOptions) File.OpenError!File { +fn appDirCreateFile(dir: AppDir, sub_path: []const u8, flags: Dir.CreateFileOptions) File.OpenError!File { if (flags.lock != .none or flags.resolve_beneath) unsupported("dirCreateFile option"); - if (flags.read) unsupported("readable created file on handle-less Nintendo target"); + if (writeDenied(dir, sub_path)) return error.ReadOnlyFileSystem; var path_buffer: [max_path_bytes:0]u8 = undefined; - const path = try rootedPath(&path_buffer, sub_path, dataRoot()); - var open_flags: c_int = O_WRONLY; + const path = try rootedPathForDir(&path_buffer, dir, sub_path); + var open_flags: c_int = O_BINARY | if (flags.read) O_RDWR else O_WRONLY; open_flags |= O_CREAT; if (flags.truncate) open_flags |= O_TRUNC; if (flags.exclusive) open_flags |= O_EXCL; @@ -178,7 +295,7 @@ fn dirCreateFile(_: ?*anyopaque, _: Dir, sub_path: []const u8, flags: Dir.Create const fd = c.open(path.ptr, open_flags, mode); if (fd < 0) return openError(errno()); errdefer _ = c.close(fd); - return registerFile(fd, .write); + return registerFile(fd, if (flags.read) .read_write else .write); } fn fileStat(_: ?*anyopaque, file: File) File.StatError!File.Stat { @@ -213,7 +330,11 @@ fn fileClose(_: ?*anyopaque, files: []const File) void { if (fd > 2) _ = c.close(fd); continue; } - if (read_fd >= 0) { + if (read_fd >= 0 and read_fd == write_fd) { + _ = c.close(read_fd); + read_fd = -1; + write_fd = -1; + } else if (read_fd >= 0) { _ = c.close(read_fd); read_fd = -1; } else if (write_fd >= 0) { @@ -373,7 +494,7 @@ fn random(_: ?*anyopaque, buffer: []u8) void { @memset(buffer, 0); } -const FileRole = enum { read, write }; +const FileRole = enum { read, write, read_write }; fn resourceRoot() []const u8 { return resource_root_buffer[0..resource_root_len]; @@ -398,6 +519,50 @@ fn setRoot(buffer: *[max_path_bytes:0]u8, len: *usize, root: []const u8) error{N len.* = root.len; } +fn dirRoot(dir: AppDir) []const u8 { + return switch (dir.kind) { + .cwd => "", + .resources => resourceRoot(), + .data => dataRoot(), + .dynamic => { + if (dir.slot >= dir_slots.len or !dir_slots[dir.slot].used) + unsupported("closed Nintendo dir handle"); + return dir_slots[dir.slot].path[0..dir_slots[dir.slot].len]; + }, + }; +} + +fn dirReadOnly(dir: AppDir) bool { + return switch (dir.kind) { + .resources => true, + .cwd, .data => false, + .dynamic => { + if (dir.slot >= dir_slots.len or !dir_slots[dir.slot].used) + unsupported("closed Nintendo dir handle"); + return dir_slots[dir.slot].read_only; + }, + }; +} + +fn writeDenied(dir: AppDir, sub_path: []const u8) bool { + if (isRomfsPath(sub_path)) return true; + return !isAbsoluteOrDevicePath(sub_path) and dirReadOnly(dir); +} + +fn registerDir(path: []const u8, read_only: bool) Dir.CreateDirPathOpenError!AppDir { + for (&dir_slots, 0..) |*slot, i| { + if (slot.used) continue; + if (path.len >= max_path_bytes) return error.NameTooLong; + @memcpy(slot.path[0..path.len], path); + slot.path[path.len] = 0; + slot.len = path.len; + slot.read_only = read_only; + slot.used = true; + return .{ .kind = .dynamic, .slot = i }; + } + return error.SystemResources; +} + fn registerFile(fd: c_int, role: FileRole) File { if (@sizeOf(File.Handle) == 0) { switch (role) { @@ -409,6 +574,12 @@ fn registerFile(fd: c_int, role: FileRole) File { if (write_fd >= 0) unsupported("more than one regular write file"); write_fd = fd; }, + .read_write => { + if (read_fd >= 0) unsupported("more than one regular read file"); + if (write_fd >= 0) unsupported("more than one regular write file"); + read_fd = fd; + write_fd = fd; + }, } return .{ .handle = {}, .flags = .{ .nonblocking = false } }; } @@ -505,6 +676,10 @@ fn rootedPath(buf: *[max_path_bytes:0]u8, path: []const u8, root: []const u8) er return buf[0..len :0]; } +fn rootedPathForDir(buf: *[max_path_bytes:0]u8, dir: AppDir, path: []const u8) error{ NameTooLong, BadPathName }![:0]const u8 { + return rootedPath(buf, path, dirRoot(dir)); +} + fn isAbsoluteOrDevicePath(path: []const u8) bool { if (path.len == 0) return false; if (path[0] == '/') return true; @@ -513,6 +688,11 @@ fn isAbsoluteOrDevicePath(path: []const u8) bool { return colon < slash; } +fn isRomfsPath(path: []const u8) bool { + if (path.len < "romfs:".len) return false; + return std.ascii.eqlIgnoreCase(path[0.."romfs:".len], "romfs:"); +} + fn ensureDirPath(path: []const u8) Dir.CreateDirPathOpenError!void { if (path.len == 0) return error.BadPathName; var path_buffer: [max_path_bytes:0]u8 = undefined; diff --git a/src/platform/switch/paths.zig b/src/platform/switch/paths.zig index 69e4ec3..77b14fe 100644 --- a/src/platform/switch/paths.zig +++ b/src/platform/switch/paths.zig @@ -1,13 +1,26 @@ const std = @import("std"); extern fn fsdevMountSdmc() u32; +extern fn fsdevUnmountDevice(name: [*:0]const u8) c_int; extern fn romfsMountSelf(name: [*:0]const u8) u32; +extern fn romfsUnmount(name: [*:0]const u8) u32; + +pub fn mountData() bool { + return fsdevMountSdmc() == 0; +} + +pub fn unmountData() void { + _ = fsdevUnmountDevice("sdmc"); +} pub fn mountResources() bool { - _ = fsdevMountSdmc(); return romfsMountSelf("romfs") == 0; } +pub fn unmountResources() void { + _ = romfsUnmount("romfs"); +} + pub fn dataRoot(buffer: []u8, app_name: []const u8) error{NameTooLong}![]const u8 { return std.fmt.bufPrint(buffer, "sdmc:/switch/{s}", .{app_name}) catch error.NameTooLong; } diff --git a/src/rendering/texture.zig b/src/rendering/texture.zig index d237d8c..7517c5d 100644 --- a/src/rendering/texture.zig +++ b/src/rendering/texture.zig @@ -6,6 +6,7 @@ const Platform = @import("../platform/platform.zig"); const gfx = Platform.gfx; const options = @import("options"); const psp_gfx = if (builtin.os.tag == .psp) @import("../platform/psp/psp_gfx_ge.zig") else struct {}; +const use_streaming_file_reader = options.config.platform == .nintendo_3ds or options.config.platform == .nintendo_switch; pub const Handle = u32; @@ -68,12 +69,15 @@ pub fn init_defaults(alloc: std.mem.Allocator) !void { /// `engine.dirs.data` for user-provided ones. Do not use /// `std.Io.Dir.cwd()` -- CWD is not guaranteed to be the app root /// (Finder-launched `.app` bundles give CWD = `/`). -pub fn load(io: std.Io, dir: std.Io.Dir, alloc: std.mem.Allocator, path: []const u8) !Texture { +pub fn load(io: std.Io, dir: anytype, alloc: std.mem.Allocator, path: []const u8) !Texture { var file = try dir.openFile(io, path, .{}); defer file.close(io); var temp: [4096]u8 = undefined; - var reader = file.reader(io, &temp); + var reader = if (use_streaming_file_reader) + file.readerStreaming(io, &temp) + else + file.reader(io, &temp); return load_from_reader(alloc, &reader.interface); } diff --git a/src/util/logger.zig b/src/util/logger.zig index 5fd6357..7e2fc19 100644 --- a/src/util/logger.zig +++ b/src/util/logger.zig @@ -11,7 +11,7 @@ var writer: *std.Io.Writer = undefined; /// other platform routes through the engine-resolved data dir so /// Finder-launched `.app` bundles don't try to write into read-only /// bundle internals. -pub fn init(io: std.Io, data_dir: std.Io.Dir) !void { +pub fn init(io: std.Io, data_dir: anytype) !void { if (builtin.os.tag == .psp) { file_log = try std.Io.Dir.cwd().createFile(io, "ms0:/aether.log", .{ .truncate = true }); } else { diff --git a/test/main.zig b/test/main.zig index d830fd5..a460e9e 100644 --- a/test/main.zig +++ b/test/main.zig @@ -21,8 +21,8 @@ pub const psp_stack_size: u32 = 256 * 1024; // PSP, 3DS, and Switch override panic/IO handlers that would otherwise // pull in posix symbols (Io.Threaded references std.posix decls that // don't exist for these targets). 3DS and Switch use Aether's newlib-backed -// baseline so debug prints go through stderr instead of dereferencing an -// undefined Io implementation. +// baseline so debug prints and file IO go through the backend instead of +// dereferencing an undefined Io implementation. const is_freestanding_console = ae.platform == .psp or ae.platform == .nintendo_3ds or ae.platform == .nintendo_switch; // 3DS routes panics through err:f; Switch keeps `no_panic` while the debug IO // baseline is intentionally small. @@ -31,16 +31,10 @@ pub const std_options_debug_threaded_io = if (is_freestanding_console) null else pub const std_options_debug_io: std.Io = if (ae.platform == .psp) sdk.extra.Io.psp_io else if (ae.platform == .nintendo_3ds or ae.platform == .nintendo_switch) ae.Cio.io() else std.Io.Threaded.global_single_threaded.io(); pub const std_options_cwd = - if (ae.platform == .psp) psp_cwd else if (ae.platform == .nintendo_3ds or ae.platform == .nintendo_switch) stub_cwd else null; + if (ae.platform == .psp) psp_cwd else if (ae.platform == .nintendo_3ds or ae.platform == .nintendo_switch) ae.Cio.cwd else null; fn psp_cwd() std.Io.Dir { return .{ .handle = -1 }; } -fn stub_cwd() std.Io.Dir { - // Dir.Handle resolves to `void` on 3DS/Switch freestanding targets - // (posix.fd_t falls back to void in the empty system struct). Init - // with `{}` to match. - return .{ .handle = {} }; -} const Vertex = extern struct { uv: [2]i16, @@ -77,7 +71,10 @@ const MyState = struct { defer file.close(engine.io); var tmp: [4096]u8 = undefined; - var rdr = file.reader(engine.io, &tmp); + var rdr = if (ae.platform == .nintendo_3ds or ae.platform == .nintendo_switch) + file.readerStreaming(engine.io, &tmp) + else + file.reader(engine.io, &tmp); var riff_hdr: [8]u8 = undefined; try rdr.interface.readSliceAll(&riff_hdr); @@ -100,9 +97,7 @@ const MyState = struct { self.mesh = try MyMesh.new(render, pipeline); self.transform = Rendering.Transform.new(); - const texture_bytes = @embedFile("test.png"); - var texture_reader: std.Io.Reader = .fixed(texture_bytes); - self.texture = try Rendering.Texture.load_from_reader(render, &texture_reader); + self.texture = try Rendering.Texture.load(engine.io, engine.dirs.resources, render, "test.png"); try self.mesh.append(render, &.{ Vertex{ .pos = .{ -16383, -16383, 0 }, .color = 0xFF0000FF, .uv = .{ 0, 32767 } }, @@ -121,13 +116,13 @@ const MyState = struct { if (!Audio.enabled) return; // -- background music -- - // self.music_data = try load_wav(engine, "calm1.wav"); - // self.music_reader = .fixed(self.music_data); - // const music_stream = try Audio.wav.open(&self.music_reader); - // _ = try Audio.play(music_stream, .{ .priority = .critical }); + self.music_data = try load_wav(engine, "calm1.wav"); + self.music_reader = .fixed(self.music_data); + const music_stream = try Audio.wav.open(&self.music_reader); + _ = try Audio.play(music_stream, .{ .priority = .critical }); - // // -- spatial SFX data -- - // self.grass_data = try load_wav(engine, "grass1.wav"); + // -- spatial SFX data -- + self.grass_data = try load_wav(engine, "grass1.wav"); // Listener at origin, facing -Z Audio.set_listener(Vec3.zero(), Vec3.new(0, 0, -1), Vec3.new(0, 1, 0)); @@ -163,11 +158,11 @@ const MyState = struct { const pos = Vec3.new(@cos(angle) * dist, 0, @sin(angle) * dist); self.grass_readers[i] = .fixed(self.grass_data); - // const stream = Audio.wav.open(&self.grass_readers[i]) catch return; - // _ = Audio.play_at(stream, pos, .{ - // .ref_distance = 1.0, - // .max_distance = 25.0, - // }) catch return; + const stream = Audio.wav.open(&self.grass_readers[i]) catch return; + _ = Audio.play_at(stream, pos, .{ + .ref_distance = 1.0, + .max_distance = 25.0, + }) catch return; Util.game_logger.info("grass at ({d:.1}, 0, {d:.1}) dist={d:.1}", .{ pos.x, pos.z, dist }); } From 3b5c78e4b15d34b16bd791c5c5fc916be41fb438 Mon Sep 17 00:00:00 2001 From: Nathan Bourgeois Date: Sat, 30 May 2026 21:50:06 -0400 Subject: [PATCH 13/44] Destroy Miniaudio, use SDL3 --- build.zig | 36 +++++++---- build.zig.zon | 9 +-- src/platform/audio.zig | 4 +- src/platform/glfw/audio.zig | 117 ++++++++++++++++++++++-------------- 4 files changed, 102 insertions(+), 64 deletions(-) diff --git a/build.zig b/build.zig index 2dad327..fb45afc 100644 --- a/build.zig +++ b/build.zig @@ -93,13 +93,7 @@ pub const Config = struct { else => .default, }; - // macOS default is `.none` because the current miniaudio build is - // bugged there. Flip back to `.default` with `-Daudio=default` once - // that's fixed. - const default_audio: Audio = switch (plat) { - .macos => .none, - else => .default, - }; + const default_audio: Audio = .default; return .{ .platform = plat, @@ -251,12 +245,30 @@ pub fn addGame(owner: *std.Build, b: *std.Build, opts: GameOptions) *std.Build.S mod.addImport("vulkan", vulkan); if (config.audio != .none) { - const zaudio_dep = owner.dependency("zaudio", .{ + const sdl3_dep = owner.lazyDependency("sdl3", .{ .target = target, .optimize = opts.optimize, - }); - mod.addImport("zaudio", zaudio_dep.module("root")); - mod.linkLibrary(zaudio_dep.artifact("miniaudio")); + .main = false, + .ext_image = false, + .ext_net = false, + .ext_ttf = false, + // Static SDL3 and static GLFW both embed generated Wayland + // protocol objects on Linux. Keep SDL dynamic there since + // Aether only uses its audio subsystem. + .c_sdl_preferred_linkage = @as( + std.builtin.LinkMode, + if (target.result.os.tag == .linux) .dynamic else .static, + ), + }) orelse @panic("sdl3 dependency is required when desktop audio is enabled"); + mod.addImport("sdl3", sdl3_dep.module("sdl3")); + + if (target.result.os.tag == .linux) { + mod.addRPathSpecial("$ORIGIN"); + const install_sdl3 = b.addInstallArtifact(sdl3_dep.artifact("SDL3"), .{ + .dest_dir = .{ .override = .bin }, + }); + b.getInstallStep().dependOn(&install_sdl3.step); + } } if (target.result.os.tag == .macos) { @@ -1255,7 +1267,7 @@ pub fn build(b: *std.Build) void { const overrides: Config.Overrides = .{ .gfx = b.option(Gfx, "gfx", "Graphics backend override (default: auto-detect from target)"), - .audio = b.option(Audio, "audio", "Audio backend override (default: .none on macOS, .default elsewhere)"), + .audio = b.option(Audio, "audio", "Audio backend override (default: platform default)"), .psp_display_mode = b.option(PspDisplayMode, "psp-display", "PSP display mode: rgba8888 (32-bit, default) or rgb565 (16-bit)"), .psp_mipmaps = b.option(bool, "psp-mipmaps", "PSP: generate mip levels for VRAM-resident textures (default: false)"), .use_cwd = b.option(bool, "use-cwd", "Force resources+data dirs to CWD (debug/CI convenience; default: false)"), diff --git a/build.zig.zon b/build.zig.zon index c6b279c..74faae2 100644 --- a/build.zig.zon +++ b/build.zig.zon @@ -76,15 +76,16 @@ .url = "git+https://github.com/Snektron/vulkan-zig#3adbeefbc833c12656791d304b37a6315b357745", .hash = "vulkan-0.0.0-r7YtxyBsAwD35FvkFhT2xruTNdCByD3TtGw44XWtfun8", }, - .zaudio = .{ - .url = "git+https://github.com/IridescentRose/zaudio?ref=update_zig_16#54e45e002005448623bff0ba06baa919bd036e03", - .hash = "zaudio-0.11.0-dev-_M-91l8yQQDTAb3Hk8V-lyP88DGgLzS9QNUkmpbCN_PK", - }, .system_sdk = .{ .url = "https://github.com/zig-gamedev/system_sdk/archive/c0dbf11cdc17da5904ea8a17eadc54dee26567ec.tar.gz", .hash = "system_sdk-0.3.0-dev-alwUNnYaaAJAtIdE2fg4NQfDqEKs7QCXy_qYukAOBfmF", .lazy = true, }, + .sdl3 = .{ + .url = "git+https://codeberg.org/7Games/zig-sdl3?ref=master#40c2e4b579aa556db37a502c936426aa1c8b5c95", + .hash = "sdl3-0.2.0-NmT1Q0mFJwBi9kZmArzh2rfJ_mFshydV0zPGULVlpACc", + .lazy = true, + }, }, .paths = .{ "build.zig", diff --git a/src/platform/audio.zig b/src/platform/audio.zig index bf7ba8a..29c5dcd 100644 --- a/src/platform/audio.zig +++ b/src/platform/audio.zig @@ -6,8 +6,8 @@ const audio_api = @import("audio_api.zig"); const mixer_mod = @import("../audio/mixer.zig"); /// Comptime-selected audio backend module (slot-based PCM output). -/// `.none` routes to the silent backend -- used by headless builds and by -/// the macOS default while miniaudio is bugged there. +/// `.none` routes to the silent backend -- used by headless builds and +/// explicit `-Daudio=none` builds. pub const Api = if (options.config.audio == .none) @import("headless/headless_audio.zig") else if (builtin.os.tag == .psp) diff --git a/src/platform/glfw/audio.zig b/src/platform/glfw/audio.zig index f0bbe09..cb75750 100644 --- a/src/platform/glfw/audio.zig +++ b/src/platform/glfw/audio.zig @@ -1,17 +1,19 @@ -//! Desktop audio backend -- uses zaudio (miniaudio) with a low-level device +//! Desktop audio backend -- uses SDL3 audio with an on-demand stream //! callback. The audio thread pulls PCM from each slot's Stream reader, -//! converts to float32 stereo, applies gain/pan from the mixer, and writes -//! to the output device. +//! converts to float32 stereo, applies gain/pan from the mixer, and queues +//! mixed frames to SDL. const std = @import("std"); -const zaudio = @import("zaudio"); +const sdl3 = @import("sdl3"); const Stream = @import("../../audio/stream.zig").Stream; const PcmFormat = @import("../../audio/stream.zig").PcmFormat; -const DEVICE_SAMPLE_RATE: u32 = 44_100; -const DEVICE_CHANNELS: u32 = 2; +const SDL_AUDIO_FLAGS = sdl3.InitFlags{ .audio = true }; +const DEVICE_SAMPLE_RATE: usize = 44_100; +const DEVICE_CHANNELS: usize = 2; const NUM_SLOTS: usize = 32; -/// Maximum frames the device callback will request per invocation. +const OUTPUT_FRAME_BYTES: usize = DEVICE_CHANNELS * @sizeOf(f32); +/// Maximum frames mixed per callback chunk. const MAX_PERIOD_FRAMES: usize = 1024; /// Per-slot scratch buffer: room for MAX_PERIOD_FRAMES of stereo 32-bit PCM. const READ_BUF_SIZE: usize = MAX_PERIOD_FRAMES * 2 * 4; @@ -48,35 +50,46 @@ fn init_slots() [NUM_SLOTS]Slot { // -- device ------------------------------------------------------------------ -var device: ?*zaudio.Device = null; -var audio_alloc: std.mem.Allocator = undefined; -var audio_io: std.Io = undefined; +var device_stream: ?sdl3.audio.Stream = null; +var sdl_audio_initialized = false; +var output_buf: [MAX_PERIOD_FRAMES * DEVICE_CHANNELS]f32 = undefined; -pub fn setup(alloc: std.mem.Allocator, io: std.Io) void { - audio_alloc = alloc; - audio_io = io; -} +pub fn setup(_: std.mem.Allocator, _: std.Io) void {} pub fn init() anyerror!void { - zaudio.init(audio_alloc); + try sdl3.init(SDL_AUDIO_FLAGS); + sdl_audio_initialized = true; + errdefer { + sdl3.quit(SDL_AUDIO_FLAGS); + sdl_audio_initialized = false; + } - var config = zaudio.Device.Config.init(.playback); - config.playback.format = .float32; - config.playback.channels = DEVICE_CHANNELS; - config.sample_rate = DEVICE_SAMPLE_RATE; - config.data_callback = data_callback; + const spec = sdl3.audio.Spec{ + .format = .floating_32_bit, + .num_channels = DEVICE_CHANNELS, + .sample_rate = DEVICE_SAMPLE_RATE, + }; + + const stream = try sdl3.audio.Device.default_playback.openStream(spec, anyopaque, data_callback, null); + device_stream = stream; + errdefer { + stream.deinit(); + device_stream = null; + } - device = try zaudio.Device.create(null, config); - try device.?.start(); + try stream.resumeDevice(); } pub fn deinit() void { - if (device) |d| { - d.stop() catch {}; - d.destroy(); - device = null; + if (device_stream) |stream| { + stream.pauseDevice() catch {}; + stream.deinit(); + device_stream = null; + } + if (sdl_audio_initialized) { + sdl3.quit(SDL_AUDIO_FLAGS); + sdl_audio_initialized = false; } - zaudio.deinit(); } pub fn update() void {} @@ -109,19 +122,33 @@ pub fn is_slot_active(slot: u8) bool { return state != .inactive and state != .finished; } -// -- audio thread callback --------------------------------------------------- +// -- audio stream callback --------------------------------------------------- fn data_callback( - _: *zaudio.Device, - raw_output: ?*anyopaque, - _: ?*const anyopaque, - frame_count: u32, -) callconv(.c) void { - const out: [*]f32 = @ptrCast(@alignCast(raw_output orelse return)); - const total_samples: usize = @as(usize, frame_count) * DEVICE_CHANNELS; + _: ?*anyopaque, + stream: sdl3.audio.Stream, + additional_amount: usize, + _: usize, +) void { + var bytes_remaining = additional_amount; + while (bytes_remaining > 0) { + const frames = @min( + MAX_PERIOD_FRAMES, + (bytes_remaining + OUTPUT_FRAME_BYTES - 1) / OUTPUT_FRAME_BYTES, + ); + const out = output_buf[0 .. frames * DEVICE_CHANNELS]; + fill_output(out, frames); + + const bytes = std.mem.sliceAsBytes(out); + stream.putData(bytes) catch return; + + if (bytes_remaining <= bytes.len) break; + bytes_remaining -= bytes.len; + } +} - // Start with silence. - @memset(out[0..total_samples], 0); +fn fill_output(out: []f32, frame_count: usize) void { + @memset(out, 0); for (&slots) |*slot| { const raw_state = slot.state.load(.acquire); @@ -141,7 +168,7 @@ fn data_callback( const right_gain = gain * std.math.clamp(1.0 + pan, 0.0, 1.0); const fmt = slot.stream.format; - const bytes_needed: usize = @as(usize, frame_count) * fmt.frame_size(); + const bytes_needed: usize = frame_count * @as(usize, fmt.frame_size()); if (bytes_needed > READ_BUF_SIZE) { slot.state.store(@intFromEnum(SlotState.finished), .release); @@ -171,24 +198,22 @@ fn read_f32(buf: []const u8, index: usize) f32 { } fn mix_into( - out: [*]f32, + out: []f32, buf: []const u8, fmt: PcmFormat, - frame_count: u32, + frame_count: usize, left_gain: f32, right_gain: f32, ) void { - const frames: usize = frame_count; - if (fmt.bit_depth == 16) { if (fmt.channels == 1) { - for (0..frames) |f| { + for (0..frame_count) |f| { const s = read_i16(buf, f); out[f * 2] += s * left_gain; out[f * 2 + 1] += s * right_gain; } } else { - for (0..frames) |f| { + for (0..frame_count) |f| { const l = read_i16(buf, f * 2); const r = read_i16(buf, f * 2 + 1); out[f * 2] += l * left_gain; @@ -197,13 +222,13 @@ fn mix_into( } } else if (fmt.bit_depth == 32) { if (fmt.channels == 1) { - for (0..frames) |f| { + for (0..frame_count) |f| { const s = read_f32(buf, f); out[f * 2] += s * left_gain; out[f * 2 + 1] += s * right_gain; } } else { - for (0..frames) |f| { + for (0..frame_count) |f| { out[f * 2] += read_f32(buf, f * 2) * left_gain; out[f * 2 + 1] += read_f32(buf, f * 2 + 1) * right_gain; } From 543c839c5ebe8a804aebbaa5384bbadaa6fd24fc Mon Sep 17 00:00:00 2001 From: Nathan Bourgeois Date: Tue, 2 Jun 2026 06:54:47 -0400 Subject: [PATCH 14/44] Dir iter, C patch. --- build.zig | 91 +++++++++++++++++------- src/platform/c_io.zig | 157 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 225 insertions(+), 23 deletions(-) diff --git a/build.zig b/build.zig index fb45afc..029a9e9 100644 --- a/build.zig +++ b/build.zig @@ -169,6 +169,27 @@ fn devkitProPath(b: *std.Build) []const u8 { return p; } +/// Creates a `3dslink` command for pushing an installed `.3dsx` to a +/// networked 3DS. Reuses Aether's devkitPro option/cache so downstream +/// builds do not need to redeclare `-Ddevkitpro-path`. +pub fn add3dslink(b: *std.Build, threedsx_path: []const u8) *std.Build.Step.Run { + const dkp = devkitProPath(b); + const link_cmd = b.addSystemCommand(&.{b.pathJoin(&.{ dkp, "tools/bin/3dslink" })}); + if (b.option([]const u8, "3dslink-address", "3DS: target IP for 3dslink push (default: mDNS auto-discover)")) |ip| { + link_cmd.addArgs(&.{ "-a", ip }); + } + if (b.option(u32, "3dslink-retries", "3DS: 3dslink retry count (default: 10)")) |n| { + link_cmd.addArgs(&.{ "-r", b.fmt("{d}", .{n}) }); + } + if (b.option(bool, "3dslink-server", "3DS: pass -s so 3dslink stays listening after the upload (useful for some Rosalina versions and for stdout relay)") orelse false) { + link_cmd.addArg("-s"); + } + link_cmd.addArg(threedsx_path); + link_cmd.step.dependOn(b.getInstallStep()); + if (b.args) |args| link_cmd.addArgs(args); + return link_cmd; +} + /// Creates an executable with the Aether engine module and all platform /// dependencies wired up. Returns the compile step so the caller can /// further customize it (install, add run steps, etc.). @@ -782,6 +803,36 @@ fn pspEbootPipeline(b: *std.Build, exe: *std.Build.Step.Compile, psp_dep: *std.B return result; } +fn patch3dsGeneratedC(b: *std.Build, exe: *std.Build.Step.Compile) std.Build.LazyPath { + const patch = b.addSystemCommand(&.{ + "perl", "-e", + \\local $/; + \\my $src = <>; + \\my %align16 = (); + \\while ($src =~ /zig_static_assert\(_Alignof \(struct ([A-Za-z0-9_]+)\) == 16,/g) { + \\ $align16{$1} = 1; + \\} + \\my $pending = ""; + \\for my $line (split /(?<=\n)/, $src) { + \\ if ($pending ne "") { + \\ if ($line =~ s/^};/} __attribute__((aligned(16)));/) { + \\ $pending = ""; + \\ } + \\ } elsif ($line =~ /^struct\s+([A-Za-z0-9_]+)\s*\{/) { + \\ my $name = $1; + \\ if ($align16{$name}) { + \\ if ($line !~ s/\};/} __attribute__((aligned(16)));/) { + \\ $pending = $name; + \\ } + \\ } + \\ } + \\ print $line; + \\} + }); + patch.addArtifactArg(exe); + return patch.captureStdOut(.{ .basename = b.fmt("{s}.3ds.c", .{exe.name}) }); +} + /// Compiles the zig-emitted C with devkitARM, links against libctru, and /// packages the ELF (plus an SMDH and optional RomFS) into a `.3dsx` /// homebrew bundle. Mirrors `pspEbootPipeline` for the PSP toolchain. @@ -867,14 +918,21 @@ fn threedsxPipeline(b: *std.Build, exe: *std.Build.Step.Compile, opts: ExportOpt // zig.h hardcodes `zig_align(16)` for its `zig_i128`/`zig_u128` // struct fallback (used when `__int128` isn't supported by the C - // compiler — gcc on 32-bit ARM is one such target). But zig's own - // layout pass uses 8-byte alignment for i128 on 32-bit ARM, so - // the `_Static_assert(_Alignof(...) == 8)` baked into the - // generated C fails against gcc's 16-byte view. Patch the four - // sites down to `zig_align(8)`. The patched copy lands in a - // dedicated include dir we point gcc at first. - const patch = b.addSystemCommand(&.{"sed"}); - patch.addArg("s/zig_align(16)/zig_align(8)/g"); + // compiler -- gcc on 32-bit ARM is one such target). Zig's ARM + // layout uses 8-byte alignment for those integer types, while f128 + // still needs 16-byte alignment. Patch only the integer fallback + // typedefs, then route unsupported ARM f128 through zig.h's vector + // fallback with explicit 16-byte alignment. + const patch = b.addSystemCommand(&.{"perl"}); + patch.addArgs(&.{ + "-0pe", + \\s/typedef struct \{ zig_align\(16\) uint64_t lo; uint64_t hi; \} zig_u128;/typedef struct { zig_align(8) uint64_t lo; uint64_t hi; } zig_u128;/g; + \\s/typedef struct \{ zig_align\(16\) uint64_t lo; int64_t hi; \} zig_i128;/typedef struct { zig_align(8) uint64_t lo; int64_t hi; } zig_i128;/g; + \\s/typedef struct \{ zig_align\(16\) uint64_t hi; uint64_t lo; \} zig_u128;/typedef struct { zig_align(8) uint64_t hi; uint64_t lo; } zig_u128;/g; + \\s/typedef struct \{ zig_align\(16\) int64_t hi; uint64_t lo; \} zig_i128;/typedef struct { zig_align(8) int64_t hi; uint64_t lo; } zig_i128;/g; + \\s/#if defined\(zig_darwin\) \|\| defined\(zig_aarch64\)/#if defined(zig_darwin) || defined(zig_aarch64) || defined(zig_arm)/; + \\s/typedef __attribute__\(\(__vector_size__\(2 \* sizeof\(uint64_t\)\)\)\) uint64_t zig_v2u64;/typedef __attribute__((__vector_size__(2 * sizeof(uint64_t)), aligned(16))) uint64_t zig_v2u64;/; + }); patch.addFileArg(.{ .cwd_relative = zig_h_src }); const patched_zig_h = patch.captureStdOut(.{ .basename = "zig.h" }); @@ -935,7 +993,7 @@ fn threedsxPipeline(b: *std.Build, exe: *std.Build.Step.Compile, opts: ExportOpt link.addPrefixedDirectoryArg("-I", include_wf.getDirectory()); link.addArg("-x"); link.addArg("c"); - link.addArtifactArg(exe); + link.addFileArg(patch3dsGeneratedC(b, exe)); link.addFileArg(exception_shim); // Reset language so gcc treats subsequent inputs by extension; the // compiler_rt object is ELF arm and `-x c` would mis-parse it. @@ -1317,20 +1375,7 @@ pub fn build(b: *std.Build) void { // 3DS can't run natively on the host. The 3DS-side homebrew // launcher listens for incoming .3dsx pushes on port 17491; // `3dslink` finds it via mDNS or accepts an explicit IP. - const dkp = devkitProPath(b); - const link_cmd = b.addSystemCommand(&.{b.pathJoin(&.{ dkp, "tools/bin/3dslink" })}); - if (b.option([]const u8, "3dslink-address", "3DS: target IP for 3dslink push (default: mDNS auto-discover)")) |ip| { - link_cmd.addArgs(&.{ "-a", ip }); - } - if (b.option(u32, "3dslink-retries", "3DS: 3dslink retry count (default: 10)")) |n| { - link_cmd.addArgs(&.{ "-r", b.fmt("{d}", .{n}) }); - } - if (b.option(bool, "3dslink-server", "3DS: pass -s so 3dslink stays listening after the upload (useful for some Rosalina versions and for stdout relay)") orelse false) { - link_cmd.addArg("-s"); - } - link_cmd.addArg(b.getInstallPath(.bin, "Aether-3DS/Aether.3dsx")); - link_cmd.step.dependOn(b.getInstallStep()); - if (b.args) |args| link_cmd.addArgs(args); + const link_cmd = add3dslink(b, b.getInstallPath(.bin, "Aether-3DS/Aether.3dsx")); const link_step = b.step("3dslink", "Push the 3dsx to a networked 3DS via 3dslink"); link_step.dependOn(&link_cmd.step); diff --git a/src/platform/c_io.zig b/src/platform/c_io.zig index 855b137..35adc90 100644 --- a/src/platform/c_io.zig +++ b/src/platform/c_io.zig @@ -22,6 +22,8 @@ const c = struct { extern fn read(fd: c_int, buf: [*]u8, count: usize) isize; extern fn write(fd: c_int, buf: [*]const u8, count: usize) isize; extern fn lseek(fd: c_int, offset: c_long, whence: c_int) c_long; + extern fn rename(old: [*:0]const u8, new: [*:0]const u8) c_int; + extern fn unlink(path: [*:0]const u8) c_int; extern fn fsync(fd: c_int) c_int; extern fn ftruncate(fd: c_int, length: c_long) c_int; extern fn getcwd(buf: [*]u8, size: usize) ?[*:0]u8; @@ -94,12 +96,52 @@ pub const AppDir = struct { return appDirCreateDirPathOpen(self, sub_path, create_options.open_options); } + pub fn access(self: AppDir, io_arg: Io, sub_path: []const u8, opts: Dir.AccessOptions) Dir.AccessError!void { + _ = io_arg; + return appDirAccess(self, sub_path, opts); + } + + pub fn createDir(self: AppDir, io_arg: Io, sub_path: []const u8, permissions: Dir.Permissions) Dir.CreateDirError!void { + _ = io_arg; + _ = permissions; + return appDirCreateDir(self, sub_path); + } + + pub fn openDir(self: AppDir, io_arg: Io, sub_path: []const u8, opts: Dir.OpenOptions) Dir.OpenError!AppDir { + _ = io_arg; + return appDirOpenDir(self, sub_path, opts); + } + + pub fn deleteFile(self: AppDir, io_arg: Io, sub_path: []const u8) Dir.DeleteFileError!void { + _ = io_arg; + return appDirDeleteFile(self, sub_path); + } + + pub fn rename(self: AppDir, old_sub_path: []const u8, new_dir: AppDir, new_sub_path: []const u8, io_arg: Io) Dir.RenameError!void { + _ = io_arg; + return appDirRename(self, old_sub_path, new_dir, new_sub_path); + } + + pub fn iterate(self: AppDir) Iterator { + return .{ .dir = self }; + } + pub fn close(self: AppDir, io_arg: Io) void { _ = io_arg; if (self.kind == .dynamic and self.slot < dir_slots.len) { dir_slots[self.slot].used = false; } } + + pub const Iterator = struct { + dir: AppDir, + + pub fn next(self: *Iterator, io_arg: Io) anyerror!?Dir.Entry { + _ = self; + _ = io_arg; + unsupported("dir iteration"); + } + }; }; const vtable: Io.VTable = blk: { @@ -148,6 +190,7 @@ pub fn cwd() Dir { } pub fn cwdDir() AppDir { + if (data_root_len != 0) return dataDir(); return .{ .kind = .cwd }; } @@ -252,6 +295,32 @@ fn appDirCreateDirPathOpen( return registerDir(path, false); } +fn appDirAccess(dir: AppDir, sub_path: []const u8, opts: Dir.AccessOptions) Dir.AccessError!void { + if (!opts.follow_symlinks) unsupported("dirAccess option"); + if (opts.write and writeDenied(dir, sub_path)) return error.ReadOnlyFileSystem; + + var path_buffer: [max_path_bytes:0]u8 = undefined; + const path = try rootedPathForDir(&path_buffer, dir, sub_path); + const fd = c.open(path.ptr, O_BINARY | O_RDONLY, @as(c_int, 0)); + if (fd < 0) return accessError(errno()); + _ = c.close(fd); +} + +fn appDirCreateDir(dir: AppDir, sub_path: []const u8) Dir.CreateDirError!void { + if (writeDenied(dir, sub_path)) return error.ReadOnlyFileSystem; + + var path_buffer: [max_path_bytes:0]u8 = undefined; + const path = try rootedPathForDir(&path_buffer, dir, sub_path); + try createSingleDir(path.ptr); +} + +fn appDirOpenDir(dir: AppDir, sub_path: []const u8, opts: Dir.OpenOptions) Dir.OpenError!AppDir { + _ = dir; + _ = sub_path; + _ = opts; + unsupported("dirOpenDir"); +} + fn appDirOpenFile(dir: AppDir, sub_path: []const u8, flags: Dir.OpenFileOptions) File.OpenError!File { if (flags.lock != .none or flags.path_only or flags.allow_ctty or flags.resolve_beneath) unsupported("dirOpenFile option"); @@ -298,6 +367,24 @@ fn appDirCreateFile(dir: AppDir, sub_path: []const u8, flags: Dir.CreateFileOpti return registerFile(fd, if (flags.read) .read_write else .write); } +fn appDirDeleteFile(dir: AppDir, sub_path: []const u8) Dir.DeleteFileError!void { + if (writeDenied(dir, sub_path)) return error.ReadOnlyFileSystem; + + var path_buffer: [max_path_bytes:0]u8 = undefined; + const path = try rootedPathForDir(&path_buffer, dir, sub_path); + if (c.unlink(path.ptr) != 0) return deleteFileError(errno()); +} + +fn appDirRename(old_dir: AppDir, old_sub_path: []const u8, new_dir: AppDir, new_sub_path: []const u8) Dir.RenameError!void { + if (writeDenied(old_dir, old_sub_path) or writeDenied(new_dir, new_sub_path)) return error.ReadOnlyFileSystem; + + var old_path_buffer: [max_path_bytes:0]u8 = undefined; + var new_path_buffer: [max_path_bytes:0]u8 = undefined; + const old_path = try rootedPathForDir(&old_path_buffer, old_dir, old_sub_path); + const new_path = try rootedPathForDir(&new_path_buffer, new_dir, new_sub_path); + if (c.rename(old_path.ptr, new_path.ptr) != 0) return renameError(errno()); +} + fn fileStat(_: ?*anyopaque, file: File) File.StatError!File.Stat { return .{ .inode = zero(File.INode), @@ -738,10 +825,43 @@ fn createDir(path: [*:0]const u8) Dir.CreateDirPathOpenError!void { } } +fn createSingleDir(path: [*:0]const u8) Dir.CreateDirError!void { + if (c.mkdir(path, 0o777) == 0) return; + switch (errno()) { + 1 => return error.PermissionDenied, + 2 => return error.FileNotFound, + 6 => return error.NoDevice, + 12 => return error.SystemResources, + 13 => return error.AccessDenied, + 17 => return error.PathAlreadyExists, + 20 => return error.NotDir, + 28 => return error.NoSpaceLeft, + 30 => return error.ReadOnlyFileSystem, + 91 => return error.NameTooLong, + 92 => return error.SymLinkLoop, + else => return error.Unexpected, + } +} + fn errno() c_int { return c.__errno().*; } +fn accessError(code: c_int) Dir.AccessError { + return switch (code) { + 1 => error.PermissionDenied, + 2 => error.FileNotFound, + 5 => error.InputOutput, + 12 => error.SystemResources, + 13 => error.AccessDenied, + 16 => error.FileBusy, + 30 => error.ReadOnlyFileSystem, + 91 => error.NameTooLong, + 92 => error.SymLinkLoop, + else => error.Unexpected, + }; +} + fn openError(code: c_int) File.OpenError { return switch (code) { 1 => error.PermissionDenied, @@ -765,6 +885,43 @@ fn openError(code: c_int) File.OpenError { }; } +fn deleteFileError(code: c_int) Dir.DeleteFileError { + return switch (code) { + 1 => error.PermissionDenied, + 2 => error.FileNotFound, + 12 => error.SystemResources, + 13 => error.AccessDenied, + 16 => error.FileBusy, + 20 => error.NotDir, + 21 => error.IsDir, + 30 => error.ReadOnlyFileSystem, + 91 => error.NameTooLong, + 92 => error.SymLinkLoop, + else => error.Unexpected, + }; +} + +fn renameError(code: c_int) Dir.RenameError { + return switch (code) { + 1 => error.PermissionDenied, + 2 => error.FileNotFound, + 5 => error.HardwareFailure, + 6 => error.NoDevice, + 12 => error.SystemResources, + 13 => error.AccessDenied, + 16 => error.FileBusy, + 18 => error.CrossDevice, + 20 => error.NotDir, + 21 => error.IsDir, + 28 => error.NoSpaceLeft, + 30 => error.ReadOnlyFileSystem, + 39 => error.DirNotEmpty, + 91 => error.NameTooLong, + 92 => error.SymLinkLoop, + else => error.Unexpected, + }; +} + fn readError() File.ReadPositionalError { return switch (errno()) { 5 => error.InputOutput, From 2bbbd5da3735712a1a1715cd5b6a76c20a08fb0b Mon Sep 17 00:00:00 2001 From: Nathan Bourgeois Date: Tue, 2 Jun 2026 07:44:28 -0400 Subject: [PATCH 15/44] link_libc --- build.zig | 126 +++-- src/core/paths.zig | 35 +- src/engine.zig | 13 +- src/platform/3ds/services.zig | 20 +- src/platform/c_io.zig | 777 +++++++++++++++++-------------- src/platform/switch/services.zig | 22 +- src/root.zig | 17 +- src/util/logger.zig | 6 +- 8 files changed, 594 insertions(+), 422 deletions(-) diff --git a/build.zig b/build.zig index 029a9e9..b8676ca 100644 --- a/build.zig +++ b/build.zig @@ -137,6 +137,12 @@ pub const ShaderPaths = struct { slang: std.Build.LazyPath, }; +const user_root_import_name = "aether_user_root"; + +pub fn userRootModule(exe: *std.Build.Step.Compile) *std.Build.Module { + return exe.root_module.import_table.get(user_root_import_name) orelse exe.root_module; +} + // Cached per-build user options. b.option panics on second declaration, so // these getters declare once and memoize. Accessed from both addGame (for // linking) and exportArtifact (for bundle packaging). Module-level mutable @@ -224,6 +230,7 @@ pub fn addGame(owner: *std.Build, b: *std.Build, opts: GameOptions) *std.Build.S const mod = b.addModule("Aether", .{ .root_source_file = owner.path("src/root.zig"), .target = target, + .link_libc = if (uses_nintendo_c_io) true else null, .imports = &.{ .{ .name = "options", .module = options_module }, }, @@ -318,25 +325,44 @@ pub fn addGame(owner: *std.Build, b: *std.Build, opts: GameOptions) *std.Build.S } // --- user executable --- + const user_mod = b.createModule(.{ + .root_source_file = opts.root_source_file, + .target = target, + .optimize = opts.optimize, + .strip = if (config.platform == .psp) false else null, + .link_libc = if (uses_nintendo_c_io) true else null, + .imports = &.{ + .{ .name = "aether", .module = mod }, + }, + }); + + const root_mod = if (uses_nintendo_c_io) b.createModule(.{ + .root_source_file = owner.path(switch (config.platform) { + .nintendo_3ds => "src/platform/3ds/services.zig", + .nintendo_switch => "src/platform/switch/services.zig", + else => unreachable, + }), + .target = target, + .optimize = opts.optimize, + .link_libc = true, + .imports = &.{ + .{ .name = "aether", .module = mod }, + .{ .name = user_root_import_name, .module = user_mod }, + .{ .name = "options", .module = options_module }, + }, + }) else user_mod; + const exe = b.addExecutable(.{ .name = opts.name, - .root_module = b.createModule(.{ - .root_source_file = opts.root_source_file, - .target = target, - .optimize = opts.optimize, - .strip = if (config.platform == .psp) false else null, - .imports = &.{ - .{ .name = "aether", .module = mod }, - }, - }), + .root_module = root_mod, }); if (psp_dep) |pd| { // Inline PSP config -- pspsdk.configurePspExecutable uses // dependencyFromBuildZig on exe.step.owner which fails when // the exe is owned by a downstream builder. - if (exe.root_module.import_table.get("pspsdk") == null) { - exe.root_module.addImport("pspsdk", mod.import_table.get("pspsdk").?); + if (userRootModule(exe).import_table.get("pspsdk") == null) { + userRootModule(exe).addImport("pspsdk", mod.import_table.get("pspsdk").?); } exe.link_eh_frame_hdr = true; exe.link_emit_relocs = true; @@ -349,13 +375,11 @@ pub fn addGame(owner: *std.Build, b: *std.Build, opts: GameOptions) *std.Build.S } if (uses_nintendo_c_io) { - // std/start.zig opts `.@"3ds"` and freestanding out of - // exporting a default entry symbol, so without an explicit - // entry the linker DCEs `main` and the emitted C is - // constants-only. Force `main` to keep the whole call graph - // alive for the external toolchain. (libnx's switch.specs - // also `--require-defined=main` at link time.) - exe.entry = .{ .symbol_name = "main" }; + // The platform shim exports C `main` itself. Keeping std/start's + // libc main wrapper disabled avoids pulling in unsupported + // freestanding libc/thread startup paths while still preserving the + // exported shim in the emitted C. + exe.entry = .disabled; } return exe; @@ -392,6 +416,7 @@ pub fn addHeadless(owner: *std.Build, b: *std.Build, opts: HeadlessOptions) *std const mod = b.addModule("Aether", .{ .root_source_file = owner.path("src/root.zig"), .target = target, + .link_libc = if (uses_nintendo_c_io) true else null, .imports = &.{ .{ .name = "options", .module = options_module }, }, @@ -406,22 +431,41 @@ pub fn addHeadless(owner: *std.Build, b: *std.Build, opts: HeadlessOptions) *std mod.addImport("pspsdk", pd.module("pspsdk")); } + const user_mod = b.createModule(.{ + .root_source_file = opts.root_source_file, + .target = target, + .optimize = opts.optimize, + .strip = if (config.platform == .psp) false else null, + .link_libc = if (uses_nintendo_c_io) true else null, + .imports = &.{ + .{ .name = "aether", .module = mod }, + }, + }); + + const root_mod = if (uses_nintendo_c_io) b.createModule(.{ + .root_source_file = owner.path(switch (config.platform) { + .nintendo_3ds => "src/platform/3ds/services.zig", + .nintendo_switch => "src/platform/switch/services.zig", + else => unreachable, + }), + .target = target, + .optimize = opts.optimize, + .link_libc = true, + .imports = &.{ + .{ .name = "aether", .module = mod }, + .{ .name = user_root_import_name, .module = user_mod }, + .{ .name = "options", .module = options_module }, + }, + }) else user_mod; + const exe = b.addExecutable(.{ .name = opts.name, - .root_module = b.createModule(.{ - .root_source_file = opts.root_source_file, - .target = target, - .optimize = opts.optimize, - .strip = if (config.platform == .psp) false else null, - .imports = &.{ - .{ .name = "aether", .module = mod }, - }, - }), + .root_module = root_mod, }); if (psp_dep) |pd| { - if (exe.root_module.import_table.get("pspsdk") == null) { - exe.root_module.addImport("pspsdk", mod.import_table.get("pspsdk").?); + if (userRootModule(exe).import_table.get("pspsdk") == null) { + userRootModule(exe).addImport("pspsdk", mod.import_table.get("pspsdk").?); } exe.link_eh_frame_hdr = true; exe.link_emit_relocs = true; @@ -430,7 +474,7 @@ pub fn addHeadless(owner: *std.Build, b: *std.Build, opts: HeadlessOptions) *std } if (uses_nintendo_c_io) { - exe.entry = .{ .symbol_name = "main" }; + exe.entry = .disabled; } return exe; @@ -1194,6 +1238,8 @@ fn switchNroPipeline(b: *std.Build, exe: *std.Build.Step.Compile, opts: ExportOp /// Aether.addShader(ae_dep.builder, b, exe, config, "basic", .{ ... }); /// pub fn addShader(owner: *std.Build, b: *std.Build, exe: *std.Build.Step.Compile, config: Config, comptime name: []const u8, paths: ShaderPaths) void { + const root_module = userRootModule(exe); + if (config.platform == .nintendo_3ds and config.gfx == .default) { const picasso = b.pathJoin(&.{ devkitProPath(b), "tools/bin/picasso" }); const sources = b.addWriteFiles(); @@ -1229,8 +1275,8 @@ pub fn addShader(owner: *std.Build, b: *std.Build, exe: *std.Build.Step.Compile, const vert = addPicassoStep(b, picasso, name ++ ".shbin", vert_src); const empty = b.addWriteFiles(); const frag = empty.add(name ++ "_3ds_frag_stub", ""); - exe.root_module.addAnonymousImport(name ++ "_vert", .{ .root_source_file = vert }); - exe.root_module.addAnonymousImport(name ++ "_frag", .{ .root_source_file = frag }); + root_module.addAnonymousImport(name ++ "_vert", .{ .root_source_file = vert }); + root_module.addAnonymousImport(name ++ "_frag", .{ .root_source_file = frag }); return; } @@ -1267,8 +1313,8 @@ pub fn addShader(owner: *std.Build, b: *std.Build, exe: *std.Build.Step.Compile, const vert = addUamStep(b, uam, "vert", name ++ ".vert.dksh", vert_src); const frag = addUamStep(b, uam, "frag", name ++ ".frag.dksh", frag_src); - exe.root_module.addAnonymousImport(name ++ "_vert", .{ .root_source_file = vert }); - exe.root_module.addAnonymousImport(name ++ "_frag", .{ .root_source_file = frag }); + root_module.addAnonymousImport(name ++ "_vert", .{ .root_source_file = vert }); + root_module.addAnonymousImport(name ++ "_frag", .{ .root_source_file = frag }); return; } @@ -1285,8 +1331,8 @@ pub fn addShader(owner: *std.Build, b: *std.Build, exe: *std.Build.Step.Compile, "-DVULKAN", "-entry", "fragmentMain", "-stage", "fragment", }, name ++ ".frag.spv", paths.slang); - if (vert) |v| exe.root_module.addAnonymousImport(name ++ "_vert", .{ .root_source_file = v }); - if (frag) |f| exe.root_module.addAnonymousImport(name ++ "_frag", .{ .root_source_file = f }); + if (vert) |v| root_module.addAnonymousImport(name ++ "_vert", .{ .root_source_file = v }); + if (frag) |f| root_module.addAnonymousImport(name ++ "_frag", .{ .root_source_file = f }); }, .opengl => { const slangc = slangcPath(owner); @@ -1300,17 +1346,17 @@ pub fn addShader(owner: *std.Build, b: *std.Build, exe: *std.Build.Step.Compile, "-profile", "glsl_450", "-entry", "fragmentMain", "-stage", "fragment", }, name ++ ".frag.glsl", paths.slang); - if (vert) |v| exe.root_module.addAnonymousImport(name ++ "_vert", .{ .root_source_file = v }); - if (frag) |f| exe.root_module.addAnonymousImport(name ++ "_frag", .{ .root_source_file = f }); + if (vert) |v| root_module.addAnonymousImport(name ++ "_vert", .{ .root_source_file = v }); + if (frag) |f| root_module.addAnonymousImport(name ++ "_frag", .{ .root_source_file = f }); }, .default, .headless => { // Provide empty stubs so @embedFile(name ++ "_vert") still compiles. const empty = b.addWriteFiles(); const stub = empty.add(name ++ "_stub", ""); - exe.root_module.addAnonymousImport(name ++ "_vert", .{ + root_module.addAnonymousImport(name ++ "_vert", .{ .root_source_file = stub, }); - exe.root_module.addAnonymousImport(name ++ "_frag", .{ + root_module.addAnonymousImport(name ++ "_frag", .{ .root_source_file = stub, }); }, diff --git a/src/core/paths.zig b/src/core/paths.zig index b64905a..da9a5b9 100644 --- a/src/core/paths.zig +++ b/src/core/paths.zig @@ -31,25 +31,17 @@ const NintendoIo = if (options.config.platform == .nintendo_3ds or options.confi @import("../platform/c_io.zig") else void; -const PathDir = if (NintendoIo != void) NintendoIo.AppDir else Io.Dir; /// Engine-owned directory handles. Cleared via `close()` at engine shutdown. pub const Dirs = struct { /// Read-only assets shipped with the app. On platforms where the /// concept doesn't apply, points at CWD. - resources: PathDir, + resources: Io.Dir, /// User-writable persistent state. On platforms where the concept /// doesn't apply, points at CWD (same handle as `resources`). - data: PathDir, + data: Io.Dir, pub fn close(self: *Dirs, io: Io) void { - if (NintendoIo != void) { - self.resources.close(io); - if (!self.data.eql(self.resources)) self.data.close(io); - NintendoIo.deinitAppDirs(); - return; - } - // std.Io.Dir.cwd() docs: "Closing the returned Dir is checked // illegal behavior." On CWD-fallback platforms (use_cwd, PSP, // unsupported OS) either handle may be the cwd sentinel, so skip @@ -59,6 +51,8 @@ pub const Dirs = struct { if (self.resources.handle != cwd_handle) self.resources.close(io); if (self.data.handle != cwd_handle and self.data.handle != self.resources.handle) self.data.close(io); + + if (NintendoIo != void) NintendoIo.deinitAppDirs(); } }; @@ -94,7 +88,6 @@ pub fn resolve( // feature, not a bug. if (options.config.use_cwd) { if (NintendoIo != void) NintendoIo.useCwdDirs(); - if (NintendoIo != void) return .{ .resources = NintendoIo.cwdDir(), .data = NintendoIo.cwdDir() }; return .{ .resources = Io.Dir.cwd(), .data = Io.Dir.cwd() }; } @@ -102,7 +95,7 @@ pub fn resolve( .macos => resolve_macos(io, environ_map, app_name), .windows => resolve_windows(io, environ_map, app_name), .linux => resolve_linux(io, environ_map, app_name), - .nintendo_3ds, .nintendo_switch => resolve_nintendo(app_name), + .nintendo_3ds, .nintendo_switch => resolve_nintendo(io, app_name), // PSP: both dirs collapse to CWD. The EBOOT and its siblings all // live under `ms0:/PSP/GAME//`; the runtime sets CWD there // before main. No separation to enforce. @@ -114,9 +107,21 @@ pub fn resolve( }; } -fn resolve_nintendo(app_name: []const u8) Error!Dirs { - try NintendoIo.initAppDirs(app_name); - return .{ .resources = NintendoIo.resourcesDir(), .data = NintendoIo.dataDir() }; +fn resolve_nintendo(io: Io, app_name: []const u8) Error!Dirs { + NintendoIo.mountData(); + errdefer NintendoIo.deinitAppDirs(); + + var data_buf: [Io.Dir.max_path_bytes]u8 = undefined; + const data_path = NintendoIo.dataRoot(&data_buf, app_name) catch return error.PathTooLong; + const data = try Io.Dir.cwd().createDirPathOpen(io, data_path, .{ .open_options = .{ .iterate = true } }); + errdefer data.close(io); + + const resources = if (NintendoIo.mountResources()) + Io.Dir.openDirAbsolute(io, "romfs:/", .{}) catch data + else + data; + + return .{ .resources = resources, .data = data }; } // -- macOS -------------------------------------------------------------------- diff --git a/src/engine.zig b/src/engine.zig index f41eff0..1d127d0 100644 --- a/src/engine.zig +++ b/src/engine.zig @@ -199,27 +199,26 @@ pub const Engine = struct { } pub fn report(self: *const Engine) void { - const mib = 1024.0 * 1024.0; Util.engine_logger.info("--- memory pools ---", .{}); inline for (std.meta.fields(Pool)) |f| { const p: Pool = @enumFromInt(f.value); const used = self.pool_used(p); const budget = self.pool_budget(p); const remaining = self.pool_remaining(p); - Util.engine_logger.info(" {s}: {}/{} bytes ({d:.3}/{d:.3} MiB, {} remaining)", .{ + Util.engine_logger.info(" {s}: {}/{} bytes ({}/{} KiB, {} remaining)", .{ f.name, used, budget, - @as(f64, @floatFromInt(used)) / mib, - @as(f64, @floatFromInt(budget)) / mib, + used / 1024, + budget / 1024, remaining, }); } - Util.engine_logger.info(" total: {}/{} bytes ({d:.3}/{d:.3} MiB)", .{ + Util.engine_logger.info(" total: {}/{} bytes ({}/{} KiB)", .{ self.pool.used, self.pool.budget, - @as(f64, @floatFromInt(self.pool.used)) / mib, - @as(f64, @floatFromInt(self.pool.budget)) / mib, + self.pool.used / 1024, + self.pool.budget / 1024, }); Util.engine_logger.info("--------------------", .{}); } diff --git a/src/platform/3ds/services.zig b/src/platform/3ds/services.zig index 2ba70f7..2509218 100644 --- a/src/platform/3ds/services.zig +++ b/src/platform/3ds/services.zig @@ -14,9 +14,25 @@ //! code is on the stack. The 3DS link step wraps `threadCreate`, and the //! wrapper below raises tiny service-thread stacks to a conservative floor. -const process_init = @import("../c_process_init.zig"); +const process_init = @import("aether").CProcessInit; const std = @import("std"); +pub const os = struct { + pub const PATH_MAX = 1024; + pub const NAME_MAX = 255; +}; + +fn AppRoot() type { + const root = @import("root"); + return if (@hasDecl(root, "main")) root else @import("aether_user_root"); +} + +pub const std_options = if (@hasDecl(AppRoot(), "std_options")) AppRoot().std_options else std.Options{}; +pub const std_options_debug_threaded_io = if (@hasDecl(AppRoot(), "std_options_debug_threaded_io")) AppRoot().std_options_debug_threaded_io else null; +pub const std_options_debug_io = if (@hasDecl(AppRoot(), "std_options_debug_io")) AppRoot().std_options_debug_io else std.Io.failing; +const app_std_options_cwd: ?fn () std.Io.Dir = if (@hasDecl(AppRoot(), "std_options_cwd")) AppRoot().std_options_cwd else null; +pub const std_options_cwd = app_std_options_cwd orelse @import("aether").Cio.cwd; + const argv = [_][*:0]const u8{"Aether"}; const min_service_thread_stack = 128 * 1024; const exception_stack_size = 16 * 1024; @@ -92,7 +108,7 @@ fn entry() callconv(.c) c_int { installCrashHandlers(); const init = process_init.makeInit(.{ .vector = &argv }); - @import("root").main(init) catch |err| { + AppRoot().main(init) catch |err| { fatalMainError(err, @errorReturnTrace(), @returnAddress()); }; return 0; diff --git a/src/platform/c_io.zig b/src/platform/c_io.zig index 35adc90..abaa6bd 100644 --- a/src/platform/c_io.zig +++ b/src/platform/c_io.zig @@ -16,24 +16,25 @@ const platform_paths = switch (options.config.platform) { else => unreachable, }; -const c = struct { - extern fn open(path: [*:0]const u8, flags: c_int, ...) c_int; - extern fn close(fd: c_int) c_int; - extern fn read(fd: c_int, buf: [*]u8, count: usize) isize; - extern fn write(fd: c_int, buf: [*]const u8, count: usize) isize; - extern fn lseek(fd: c_int, offset: c_long, whence: c_int) c_long; - extern fn rename(old: [*:0]const u8, new: [*:0]const u8) c_int; - extern fn unlink(path: [*:0]const u8) c_int; - extern fn fsync(fd: c_int) c_int; - extern fn ftruncate(fd: c_int, length: c_long) c_int; - extern fn getcwd(buf: [*]u8, size: usize) ?[*:0]u8; - extern fn chdir(path: [*:0]const u8) c_int; - extern fn mkdir(path: [*:0]const u8, mode: c_int) c_int; - extern fn __errno() *c_int; +const c = std.c; + +const CDirent = extern struct { + d_ino: c_int, + d_type: u8, + d_name: [256:0]u8, }; +const devkit = struct { + extern "c" fn open(path: [*:0]const u8, flags: c_int, ...) c_int; + extern "c" fn mkdir(path: [*:0]const u8, mode: c_int) c_int; + extern "c" fn readdir(dirp: *c.DIR) ?*CDirent; + extern "c" fn lseek(fd: c_int, offset: c_long, whence: c_int) c_long; + extern "c" fn ftruncate(fd: c_int, length: c_long) c_int; + extern "c" fn __errno() *c_int; +}; const max_path_bytes = 1024; +const AT_FDCWD: c_int = -2; const O_RDONLY: c_int = 0; const O_WRONLY: c_int = 1; const O_RDWR: c_int = 2; @@ -41,109 +42,38 @@ const O_CREAT: c_int = 0x0200; const O_TRUNC: c_int = 0x0400; const O_EXCL: c_int = 0x0800; const O_BINARY: c_int = 0x10000; - +const O_CLOEXEC: c_int = 0x40000; +const O_NOFOLLOW: c_int = 0x100000; const SEEK_SET: c_int = 0; const SEEK_CUR: c_int = 1; const SEEK_END: c_int = 2; +const DT_FIFO: u8 = 1; +const DT_CHR: u8 = 2; +const DT_DIR: u8 = 4; +const DT_BLK: u8 = 6; +const DT_REG: u8 = 8; +const DT_LNK: u8 = 10; +const DT_SOCK: u8 = 12; +const DT_WHT: u8 = 14; + var read_fd: c_int = -1; var write_fd: c_int = -1; var stderr_writer: File.Writer = undefined; var stderr_writer_initialized = false; var empty_stderr_buffer: [0]u8 = .{}; -var resource_root_buffer: [max_path_bytes:0]u8 = @splat(0); -var data_root_buffer: [max_path_bytes:0]u8 = @splat(0); -var resource_root_len: usize = 0; -var data_root_len: usize = 0; var resources_mounted = false; var data_mounted = false; +var atomic_counter: u64 = 0x6165_7468_6572_0000; -const max_dynamic_dirs = 16; - +const max_dynamic_dirs = 32; const DirSlot = struct { used: bool = false, - read_only: bool = false, path: [max_path_bytes:0]u8 = @splat(0), len: usize = 0, }; var dir_slots: [max_dynamic_dirs]DirSlot = [_]DirSlot{.{}} ** max_dynamic_dirs; -const AppDirKind = enum { cwd, resources, data, dynamic }; - -/// Engine-facing directory token. `std.Io.Dir.Handle` is `void` on the -/// no-libc Nintendo targets, so resource/data identity has to live outside -/// the std dir handle. -pub const AppDir = struct { - kind: AppDirKind, - slot: usize = 0, - - pub fn eql(self: AppDir, other: AppDir) bool { - return self.kind == other.kind and (self.kind != .dynamic or self.slot == other.slot); - } - - pub fn openFile(self: AppDir, io_arg: Io, sub_path: []const u8, flags: std.Io.Dir.OpenFileOptions) File.OpenError!File { - _ = io_arg; - return appDirOpenFile(self, sub_path, flags); - } - - pub fn createFile(self: AppDir, io_arg: Io, sub_path: []const u8, flags: std.Io.Dir.CreateFileOptions) File.OpenError!File { - _ = io_arg; - return appDirCreateFile(self, sub_path, flags); - } - - pub fn createDirPathOpen(self: AppDir, io_arg: Io, sub_path: []const u8, create_options: std.Io.Dir.CreateDirPathOpenOptions) std.Io.Dir.CreateDirPathOpenError!AppDir { - _ = io_arg; - return appDirCreateDirPathOpen(self, sub_path, create_options.open_options); - } - - pub fn access(self: AppDir, io_arg: Io, sub_path: []const u8, opts: Dir.AccessOptions) Dir.AccessError!void { - _ = io_arg; - return appDirAccess(self, sub_path, opts); - } - - pub fn createDir(self: AppDir, io_arg: Io, sub_path: []const u8, permissions: Dir.Permissions) Dir.CreateDirError!void { - _ = io_arg; - _ = permissions; - return appDirCreateDir(self, sub_path); - } - - pub fn openDir(self: AppDir, io_arg: Io, sub_path: []const u8, opts: Dir.OpenOptions) Dir.OpenError!AppDir { - _ = io_arg; - return appDirOpenDir(self, sub_path, opts); - } - - pub fn deleteFile(self: AppDir, io_arg: Io, sub_path: []const u8) Dir.DeleteFileError!void { - _ = io_arg; - return appDirDeleteFile(self, sub_path); - } - - pub fn rename(self: AppDir, old_sub_path: []const u8, new_dir: AppDir, new_sub_path: []const u8, io_arg: Io) Dir.RenameError!void { - _ = io_arg; - return appDirRename(self, old_sub_path, new_dir, new_sub_path); - } - - pub fn iterate(self: AppDir) Iterator { - return .{ .dir = self }; - } - - pub fn close(self: AppDir, io_arg: Io) void { - _ = io_arg; - if (self.kind == .dynamic and self.slot < dir_slots.len) { - dir_slots[self.slot].used = false; - } - } - - pub const Iterator = struct { - dir: AppDir, - - pub fn next(self: *Iterator, io_arg: Io) anyerror!?Dir.Entry { - _ = self; - _ = io_arg; - unsupported("dir iteration"); - } - }; -}; - const vtable: Io.VTable = blk: { var v = Io.failing.vtable.*; v.crashHandler = crashHandler; @@ -153,10 +83,20 @@ const vtable: Io.VTable = blk: { v.swapCancelProtection = swapCancelProtection; v.checkCancel = checkCancel; v.operate = operate; + v.dirCreateDir = dirCreateDir; + v.dirCreateDirPath = dirCreateDirPath; v.dirCreateDirPathOpen = dirCreateDirPathOpen; + v.dirOpenDir = dirOpenDir; + v.dirAccess = dirAccess; v.dirCreateFile = dirCreateFile; + v.dirCreateFileAtomic = dirCreateFileAtomic; v.dirOpenFile = dirOpenFile; v.dirClose = dirClose; + v.dirRead = dirRead; + v.dirDeleteFile = dirDeleteFile; + v.dirDeleteDir = dirDeleteDir; + v.dirRename = dirRename; + v.dirRenamePreserve = dirRenamePreserve; v.fileStat = fileStat; v.fileLength = fileLength; v.fileClose = fileClose; @@ -186,40 +126,24 @@ pub fn io() Io { } pub fn cwd() Dir { - return .{ .handle = if (@sizeOf(Dir.Handle) == 0) {} else @as(Dir.Handle, @intCast(-1)) }; + return .{ .handle = AT_FDCWD }; } -pub fn cwdDir() AppDir { - if (data_root_len != 0) return dataDir(); - return .{ .kind = .cwd }; -} - -pub fn resourcesDir() AppDir { - return .{ .kind = .resources }; -} - -pub fn dataDir() AppDir { - return .{ .kind = .data }; -} - -pub fn initAppDirs(app_name: []const u8) Dir.CreateDirPathOpenError!void { +pub fn mountData() void { data_mounted = platform_paths.mountData(); +} +pub fn mountResources() bool { resources_mounted = platform_paths.mountResources(); - errdefer deinitAppDirs(); - setResourceRoot("romfs:/") catch return error.NameTooLong; + return resources_mounted; +} - var data_buffer: [max_path_bytes]u8 = undefined; - const data_root = platform_paths.dataRoot(&data_buffer, app_name) catch return error.NameTooLong; - try setDataRoot(data_root); - try ensureDirPath(data_root); +pub fn dataRoot(buffer: []u8, app_name: []const u8) error{NameTooLong}![]const u8 { + return platform_paths.dataRoot(buffer, app_name); } pub fn deinitAppDirs() void { for (&dir_slots) |*slot| slot.used = false; - setResourceRoot("") catch unreachable; - setDataRoot("") catch unreachable; - if (resources_mounted) { platform_paths.unmountResources(); resources_mounted = false; @@ -232,8 +156,6 @@ pub fn deinitAppDirs() void { pub fn useCwdDirs() void { deinitAppDirs(); - setResourceRoot("") catch unreachable; - setDataRoot("") catch unreachable; } fn crashHandler(_: ?*anyopaque) void {} @@ -256,133 +178,275 @@ fn operate(_: ?*anyopaque, operation: Io.Operation) Io.Cancelable!Io.Operation.R }; } -fn dirCreateDirPathOpen( +fn dirCreateDir( _: ?*anyopaque, dir: Dir, sub_path: []const u8, - _: Dir.Permissions, - open_options: Dir.OpenOptions, -) Dir.CreateDirPathOpenError!Dir { - _ = dir; - _ = try appDirCreateDirPathOpen(cwdDir(), sub_path, open_options); - return cwd(); -} - -fn dirOpenFile(_: ?*anyopaque, dir: Dir, sub_path: []const u8, flags: Dir.OpenFileOptions) File.OpenError!File { - _ = dir; - return appDirOpenFile(cwdDir(), sub_path, flags); + permissions: Dir.Permissions, +) Dir.CreateDirError!void { + var path_buffer: [max_path_bytes:0]u8 = undefined; + const path = try rootedPathForDir(&path_buffer, dir, sub_path); + const mode = permissionsMode(permissions, 0o777); + if (devkit.mkdir(path.ptr, mode) == 0) return; + return createDirError(errno()); } -fn dirCreateFile(_: ?*anyopaque, dir: Dir, sub_path: []const u8, flags: Dir.CreateFileOptions) File.OpenError!File { - _ = dir; - return appDirCreateFile(cwdDir(), sub_path, flags); +fn dirCreateDirPath( + _: ?*anyopaque, + dir: Dir, + sub_path: []const u8, + permissions: Dir.Permissions, +) Dir.CreateDirPathError!Dir.CreatePathStatus { + return createDirPathAt(dir, sub_path, permissions); } -fn dirClose(_: ?*anyopaque, _: []const Dir) void {} - -fn appDirCreateDirPathOpen( - dir: AppDir, +fn dirCreateDirPathOpen( + userdata: ?*anyopaque, + dir: Dir, sub_path: []const u8, + permissions: Dir.Permissions, open_options: Dir.OpenOptions, -) Dir.CreateDirPathOpenError!AppDir { - if (!open_options.access_sub_paths or open_options.iterate or !open_options.follow_symlinks) - unsupported("dirCreateDirPathOpen option"); - if (writeDenied(dir, sub_path)) return error.ReadOnlyFileSystem; - - var path_buffer: [max_path_bytes:0]u8 = undefined; - const path = try rootedPathForDir(&path_buffer, dir, sub_path); - try ensureDirPath(path); - return registerDir(path, false); +) Dir.CreateDirPathOpenError!Dir { + _ = try dirCreateDirPath(userdata, dir, sub_path, permissions); + return dirOpenDir(userdata, dir, sub_path, open_options); } -fn appDirAccess(dir: AppDir, sub_path: []const u8, opts: Dir.AccessOptions) Dir.AccessError!void { - if (!opts.follow_symlinks) unsupported("dirAccess option"); - if (opts.write and writeDenied(dir, sub_path)) return error.ReadOnlyFileSystem; +fn dirOpenDir(_: ?*anyopaque, dir: Dir, sub_path: []const u8, options_arg: Dir.OpenOptions) Dir.OpenError!Dir { + if (!options_arg.access_sub_paths) unsupported("dirOpenDir without sub-path access"); var path_buffer: [max_path_bytes:0]u8 = undefined; const path = try rootedPathForDir(&path_buffer, dir, sub_path); - const fd = c.open(path.ptr, O_BINARY | O_RDONLY, @as(c_int, 0)); - if (fd < 0) return accessError(errno()); - _ = c.close(fd); + const stream = c.opendir(path.ptr) orelse return dirOpenError(errno()); + _ = c.closedir(stream); + + return registerDir(path); } -fn appDirCreateDir(dir: AppDir, sub_path: []const u8) Dir.CreateDirError!void { - if (writeDenied(dir, sub_path)) return error.ReadOnlyFileSystem; +fn dirAccess(_: ?*anyopaque, dir: Dir, sub_path: []const u8, opts: Dir.AccessOptions) Dir.AccessError!void { + if (!opts.follow_symlinks) unsupported("dirAccess without symlink following"); var path_buffer: [max_path_bytes:0]u8 = undefined; const path = try rootedPathForDir(&path_buffer, dir, sub_path); - try createSingleDir(path.ptr); + const fd = devkit.open(path.ptr, O_BINARY | O_RDONLY | O_CLOEXEC, @as(c_int, 0)); + if (fd < 0) return accessError(errno()); + _ = c.close(fd); } -fn appDirOpenDir(dir: AppDir, sub_path: []const u8, opts: Dir.OpenOptions) Dir.OpenError!AppDir { - _ = dir; - _ = sub_path; - _ = opts; - unsupported("dirOpenDir"); -} +fn dirOpenFile(_: ?*anyopaque, dir: Dir, sub_path: []const u8, flags: Dir.OpenFileOptions) File.OpenError!File { + if (flags.lock != .none) return error.FileLocksUnsupported; + if (flags.path_only) unsupported("path-only file open"); + if (!flags.allow_ctty) {} -fn appDirOpenFile(dir: AppDir, sub_path: []const u8, flags: Dir.OpenFileOptions) File.OpenError!File { - if (flags.lock != .none or flags.path_only or flags.allow_ctty or flags.resolve_beneath) - unsupported("dirOpenFile option"); - if (!flags.allow_directory or !flags.follow_symlinks) - unsupported("dirOpenFile path policy"); const role: FileRole = switch (flags.mode) { .read_only => .read, .write_only => .write, .read_write => .read_write, }; - if (flags.mode != .read_only and writeDenied(dir, sub_path)) return error.ReadOnlyFileSystem; var path_buffer: [max_path_bytes:0]u8 = undefined; - const open_flags: c_int = O_BINARY | switch (flags.mode) { + var open_flags: c_int = O_BINARY | O_CLOEXEC | switch (flags.mode) { .read_only => O_RDONLY, .write_only => O_WRONLY, .read_write => O_RDWR, }; + if (!flags.follow_symlinks) open_flags |= O_NOFOLLOW; + const path = try rootedPathForDir(&path_buffer, dir, sub_path); - const fd = c.open(path.ptr, open_flags, @as(c_int, 0)); + const fd = devkit.open(path.ptr, open_flags, @as(c_int, 0)); if (fd < 0) return openError(errno()); errdefer _ = c.close(fd); return registerFile(fd, role); } -fn appDirCreateFile(dir: AppDir, sub_path: []const u8, flags: Dir.CreateFileOptions) File.OpenError!File { - if (flags.lock != .none or flags.resolve_beneath) unsupported("dirCreateFile option"); - if (writeDenied(dir, sub_path)) return error.ReadOnlyFileSystem; +fn dirCreateFile(_: ?*anyopaque, dir: Dir, sub_path: []const u8, flags: Dir.CreateFileOptions) File.OpenError!File { + if (flags.lock != .none) return error.FileLocksUnsupported; var path_buffer: [max_path_bytes:0]u8 = undefined; - const path = try rootedPathForDir(&path_buffer, dir, sub_path); - var open_flags: c_int = O_BINARY | if (flags.read) O_RDWR else O_WRONLY; + var open_flags: c_int = O_BINARY | O_CLOEXEC | if (flags.read) O_RDWR else O_WRONLY; open_flags |= O_CREAT; if (flags.truncate) open_flags |= O_TRUNC; if (flags.exclusive) open_flags |= O_EXCL; - const mode: c_int = if (@bitSizeOf(File.Permissions) == 0) - 0o666 - else - @intCast(@intFromEnum(flags.permissions)); - const fd = c.open(path.ptr, open_flags, mode); + const mode = permissionsMode(flags.permissions, 0o666); + const path = try rootedPathForDir(&path_buffer, dir, sub_path); + const fd = devkit.open(path.ptr, open_flags, mode); if (fd < 0) return openError(errno()); errdefer _ = c.close(fd); return registerFile(fd, if (flags.read) .read_write else .write); } -fn appDirDeleteFile(dir: AppDir, sub_path: []const u8) Dir.DeleteFileError!void { - if (writeDenied(dir, sub_path)) return error.ReadOnlyFileSystem; +fn dirCreateFileAtomic( + userdata: ?*anyopaque, + dir: Dir, + sub_path: []const u8, + opts: Dir.CreateFileAtomicOptions, +) Dir.CreateFileAtomicError!File.Atomic { + var target_dir = dir; + var close_target_dir = false; + var dest_sub_path = sub_path; + errdefer if (close_target_dir) target_dir.close(io()); + + if (std.fs.path.dirname(sub_path)) |parent| { + target_dir = if (opts.make_path) + dirCreateDirPathOpen(userdata, dir, parent, .default_dir, .{}) catch |err| return createFileAtomicDirError(err) + else + dirOpenDir(userdata, dir, parent, .{}) catch |err| return createFileAtomicDirError(err); + close_target_dir = true; + dest_sub_path = std.fs.path.basename(sub_path); + } else if (opts.make_path) { + _ = opts.make_path; + } + + var attempts: u8 = 0; + while (attempts < 16) : (attempts += 1) { + atomic_counter +%= 1; + const basename_hex = atomic_counter; + const tmp_sub_path = std.fmt.hex(basename_hex); + const file = dirCreateFile(userdata, target_dir, &tmp_sub_path, .{ + .read = true, + .exclusive = true, + .permissions = opts.permissions, + }) catch |err| switch (err) { + error.PathAlreadyExists => continue, + error.FileTooBig, error.IsDir, error.DeviceBusy, error.FileLocksUnsupported, error.PipeBusy => return error.Unexpected, + else => |e| return @errorCast(e), + }; + errdefer file.close(io()); + + const result: File.Atomic = .{ + .file = file, + .file_basename_hex = basename_hex, + .file_open = true, + .file_exists = true, + .dir = target_dir, + .close_dir_on_deinit = close_target_dir, + .dest_sub_path = dest_sub_path, + }; + close_target_dir = false; + return result; + } + + return error.SystemResources; +} + +fn dirClose(_: ?*anyopaque, dirs: []const Dir) void { + for (dirs) |dir| { + if (dirSlotIndex(dir)) |i| dir_slots[i].used = false; + } +} + +fn dirRead(_: ?*anyopaque, reader: *Dir.Reader, out: []Dir.Entry) Dir.Reader.Error!usize { + const Header = extern struct { + pos: c_long, + }; + const header_end = @sizeOf(Header); + if (reader.index < header_end) { + reader.index = header_end; + reader.end = header_end; + const header: *Header = @ptrCast(@alignCast(reader.buffer.ptr)); + header.* = .{ .pos = 0 }; + } + + const header: *Header = @ptrCast(@alignCast(reader.buffer.ptr)); + if (reader.state == .reset) { + header.pos = 0; + reader.state = .reading; + } + if (reader.state == .finished) return 0; + + var path_buffer: [max_path_bytes:0]u8 = undefined; + const root = dirRoot(reader.dir); + const path = zPath(&path_buffer, if (root.len == 0) "." else root) catch return error.Unexpected; + const stream = c.opendir(path.ptr) orelse return dirReadError(errno()); + var stream_open = true; + defer if (stream_open) { + _ = c.closedir(stream); + }; + + c.seekdir(stream, header.pos); + + var count: usize = 0; + var name_end = reader.buffer.len; + while (count < out.len) { + devkit.__errno().* = 0; + const entry = devkit.readdir(stream) orelse { + if (errno() != 0) return dirReadError(errno()); + reader.state = .finished; + return count; + }; + header.pos = c.telldir(stream); + + const name = std.mem.span(@as([*:0]const u8, @ptrCast(&entry.d_name))); + if (std.mem.eql(u8, name, ".") or std.mem.eql(u8, name, "..")) continue; + if (name.len + 1 > name_end - header_end) { + if (count == 0) return error.Unexpected; + break; + } + + name_end -= name.len + 1; + @memcpy(reader.buffer[name_end..][0..name.len], name); + reader.buffer[name_end + name.len] = 0; + out[count] = .{ + .name = reader.buffer[name_end .. name_end + name.len], + .kind = direntKind(entry.d_type), + .inode = @intCast(entry.d_ino), + }; + count += 1; + } + + stream_open = false; + _ = c.closedir(stream); + return count; +} +fn dirDeleteFile(_: ?*anyopaque, dir: Dir, sub_path: []const u8) Dir.DeleteFileError!void { var path_buffer: [max_path_bytes:0]u8 = undefined; const path = try rootedPathForDir(&path_buffer, dir, sub_path); - if (c.unlink(path.ptr) != 0) return deleteFileError(errno()); + if (c.unlink(path.ptr) == 0) return; + return deleteFileError(errno()); } -fn appDirRename(old_dir: AppDir, old_sub_path: []const u8, new_dir: AppDir, new_sub_path: []const u8) Dir.RenameError!void { - if (writeDenied(old_dir, old_sub_path) or writeDenied(new_dir, new_sub_path)) return error.ReadOnlyFileSystem; +fn dirDeleteDir(_: ?*anyopaque, dir: Dir, sub_path: []const u8) Dir.DeleteDirError!void { + var path_buffer: [max_path_bytes:0]u8 = undefined; + const path = try rootedPathForDir(&path_buffer, dir, sub_path); + if (c.rmdir(path.ptr) == 0) return; + return deleteDirError(errno()); +} +fn dirRename( + _: ?*anyopaque, + old_dir: Dir, + old_sub_path: []const u8, + new_dir: Dir, + new_sub_path: []const u8, +) Dir.RenameError!void { var old_path_buffer: [max_path_bytes:0]u8 = undefined; var new_path_buffer: [max_path_bytes:0]u8 = undefined; const old_path = try rootedPathForDir(&old_path_buffer, old_dir, old_sub_path); const new_path = try rootedPathForDir(&new_path_buffer, new_dir, new_sub_path); - if (c.rename(old_path.ptr, new_path.ptr) != 0) return renameError(errno()); + if (c.rename(old_path.ptr, new_path.ptr) == 0) return; + return renameError(errno()); +} + +fn dirRenamePreserve( + userdata: ?*anyopaque, + old_dir: Dir, + old_sub_path: []const u8, + new_dir: Dir, + new_sub_path: []const u8, +) Dir.RenamePreserveError!void { + dirAccess(userdata, new_dir, new_sub_path, .{}) catch |err| switch (err) { + error.FileNotFound => {}, + else => |e| return @errorCast(e), + }; + if (dirAccess(userdata, new_dir, new_sub_path, .{})) |_| return error.PathAlreadyExists else |err| switch (err) { + error.FileNotFound => {}, + else => |e| return @errorCast(e), + } + return dirRename(userdata, old_dir, old_sub_path, new_dir, new_sub_path) catch |err| switch (err) { + error.DiskQuota, error.IsDir, error.LinkQuotaExceeded, error.NoDevice, error.PipeBusy, error.AntivirusInterference, error.HardwareFailure => return error.Unexpected, + else => |e| return @errorCast(e), + }; } fn fileStat(_: ?*anyopaque, file: File) File.StatError!File.Stat { @@ -401,11 +465,11 @@ fn fileStat(_: ?*anyopaque, file: File) File.StatError!File.Stat { fn fileLength(_: ?*anyopaque, file: File) File.LengthError!u64 { const fd = fdForRegular(file); - const current = c.lseek(fd, 0, SEEK_CUR); + const current = devkit.lseek(fd, 0, SEEK_CUR); if (current < 0) return seekToLengthError(); - const end = c.lseek(fd, 0, SEEK_END); + const end = devkit.lseek(fd, 0, SEEK_END); if (end < 0) return seekToLengthError(); - _ = c.lseek(fd, current, SEEK_SET); + _ = devkit.lseek(fd, current, SEEK_SET); return @intCast(end); } @@ -495,7 +559,7 @@ fn fileWriteStreaming(file: File, header: []const u8, data: []const []const u8, } fn fileSeekBy(_: ?*anyopaque, file: File, relative_offset: i64) File.SeekError!void { - if (c.lseek(fdForRegular(file), @intCast(relative_offset), SEEK_CUR) < 0) return seekError(); + if (devkit.lseek(fdForRegular(file), @intCast(relative_offset), SEEK_CUR) < 0) return seekError(); } fn fileSeekTo(_: ?*anyopaque, file: File, absolute_offset: u64) File.SeekError!void { @@ -521,7 +585,7 @@ fn fileSupportsAnsiEscapeCodes(_: ?*anyopaque, _: File) Io.Cancelable!bool { fn fileSetLength(_: ?*anyopaque, file: File, length: u64) File.SetLengthError!void { if (length > std.math.maxInt(c_long)) return error.FileTooBig; - if (c.ftruncate(fdForRegular(file), @intCast(length)) < 0) return setLengthError(); + if (devkit.ftruncate(fdForRegular(file), @intCast(length)) < 0) return setLengthError(); } fn lockStderr(_: ?*anyopaque, terminal_mode: ?Io.Terminal.Mode) Io.Cancelable!Io.LockedStderr { @@ -551,9 +615,8 @@ fn unlockStderr(_: ?*anyopaque) void { fn processCurrentPath(_: ?*anyopaque, buffer: []u8) std.process.CurrentPathError!usize { if (buffer.len == 0) return error.NameTooLong; const ptr = c.getcwd(buffer.ptr, buffer.len) orelse return currentPathError(); - const path = std.mem.span(ptr); - if (path.len >= buffer.len) return error.NameTooLong; - return path.len; + _ = ptr; + return std.mem.indexOfScalar(u8, buffer, 0) orelse error.NameTooLong; } fn processSetCurrentPath(_: ?*anyopaque, path: []const u8) std.process.SetCurrentPathError!void { @@ -583,71 +646,45 @@ fn random(_: ?*anyopaque, buffer: []u8) void { const FileRole = enum { read, write, read_write }; -fn resourceRoot() []const u8 { - return resource_root_buffer[0..resource_root_len]; -} - -fn dataRoot() []const u8 { - return data_root_buffer[0..data_root_len]; -} - -fn setResourceRoot(root: []const u8) error{NameTooLong}!void { - try setRoot(&resource_root_buffer, &resource_root_len, root); -} - -fn setDataRoot(root: []const u8) error{NameTooLong}!void { - try setRoot(&data_root_buffer, &data_root_len, root); -} +fn createDirPathAt(dir: Dir, sub_path: []const u8, permissions: Dir.Permissions) Dir.CreateDirPathError!Dir.CreatePathStatus { + if (sub_path.len == 0) return error.BadPathName; -fn setRoot(buffer: *[max_path_bytes:0]u8, len: *usize, root: []const u8) error{NameTooLong}!void { - if (root.len >= max_path_bytes) return error.NameTooLong; - @memcpy(buffer[0..root.len], root); - buffer[root.len] = 0; - len.* = root.len; -} - -fn dirRoot(dir: AppDir) []const u8 { - return switch (dir.kind) { - .cwd => "", - .resources => resourceRoot(), - .data => dataRoot(), - .dynamic => { - if (dir.slot >= dir_slots.len or !dir_slots[dir.slot].used) - unsupported("closed Nintendo dir handle"); - return dir_slots[dir.slot].path[0..dir_slots[dir.slot].len]; - }, - }; -} - -fn dirReadOnly(dir: AppDir) bool { - return switch (dir.kind) { - .resources => true, - .cwd, .data => false, - .dynamic => { - if (dir.slot >= dir_slots.len or !dir_slots[dir.slot].used) - unsupported("closed Nintendo dir handle"); - return dir_slots[dir.slot].read_only; - }, - }; -} + var path_buffer: [max_path_bytes:0]u8 = undefined; + const full = rootedPathForDir(&path_buffer, dir, sub_path) catch |err| return err; + const full_len = full.len; + const full_ptr = path_buffer[0..].ptr; + const mode = permissionsMode(permissions, 0o777); -fn writeDenied(dir: AppDir, sub_path: []const u8) bool { - if (isRomfsPath(sub_path)) return true; - return !isAbsoluteOrDevicePath(sub_path) and dirReadOnly(dir); + var status: Dir.CreatePathStatus = .existed; + const start = pathRootEnd(full); + var i = start; + while (i < full_len) : (i += 1) { + if (path_buffer[i] != '/') continue; + if (i == start) continue; + path_buffer[i] = 0; + if (try createSingleDirPath(full_ptr, mode) == .created) status = .created; + path_buffer[i] = '/'; + } + if (try createSingleDirPath(full_ptr, mode) == .created) status = .created; + return status; } -fn registerDir(path: []const u8, read_only: bool) Dir.CreateDirPathOpenError!AppDir { - for (&dir_slots, 0..) |*slot, i| { - if (slot.used) continue; - if (path.len >= max_path_bytes) return error.NameTooLong; - @memcpy(slot.path[0..path.len], path); - slot.path[path.len] = 0; - slot.len = path.len; - slot.read_only = read_only; - slot.used = true; - return .{ .kind = .dynamic, .slot = i }; +fn createSingleDirPath(path: [*:0]const u8, mode: c_int) Dir.CreateDirPathError!Dir.CreatePathStatus { + if (devkit.mkdir(path, mode) == 0) return .created; + switch (errno()) { + 17 => return .existed, + 1 => return error.PermissionDenied, + 2 => return error.FileNotFound, + 6 => return error.NoDevice, + 12 => return error.SystemResources, + 13 => return error.AccessDenied, + 20 => return error.NotDir, + 28 => return error.NoSpaceLeft, + 30 => return error.ReadOnlyFileSystem, + 91 => return error.NameTooLong, + 92 => return error.SymLinkLoop, + else => return error.Unexpected, } - return error.SystemResources; } fn registerFile(fd: c_int, role: FileRole) File { @@ -701,13 +738,87 @@ fn fdFromFileHandle(file: File) c_int { return @intCast(file.handle); } +fn fdFromDirHandle(dir: Dir) c_int { + return @intCast(dir.handle); +} + +fn permissionsMode(permissions: File.Permissions, default: c_int) c_int { + if (@bitSizeOf(File.Permissions) == 0) return default; + return @intCast(@intFromEnum(permissions)); +} + fn isStderrFile(file: File) bool { return file.flags.nonblocking; } +fn registerDir(path: []const u8) Dir.OpenError!Dir { + for (&dir_slots, 0..) |*slot, i| { + if (slot.used) continue; + if (path.len >= max_path_bytes) return error.NameTooLong; + @memcpy(slot.path[0..path.len], path); + slot.path[path.len] = 0; + slot.len = path.len; + slot.used = true; + return .{ .handle = @intCast(i + 3) }; + } + return error.SystemResources; +} + +fn dirSlotIndex(dir: Dir) ?usize { + const handle = fdFromDirHandle(dir); + if (handle < 3) return null; + const index: usize = @intCast(handle - 3); + if (index >= dir_slots.len or !dir_slots[index].used) return null; + return index; +} + +fn dirRoot(dir: Dir) []const u8 { + if (fdFromDirHandle(dir) == AT_FDCWD) return ""; + const index = dirSlotIndex(dir) orelse unsupported("closed Nintendo dir handle"); + return dir_slots[index].path[0..dir_slots[index].len]; +} + +fn rootedPath(buf: *[max_path_bytes:0]u8, path: []const u8, root: []const u8) error{ NameTooLong, BadPathName }![:0]const u8 { + if (isAbsoluteOrDevicePath(path) or root.len == 0) return zPath(buf, path); + if (std.mem.indexOfScalar(u8, path, 0) != null) return error.BadPathName; + + const needs_sep = !std.mem.endsWith(u8, root, "/") and !std.mem.startsWith(u8, path, "/"); + const len = root.len + @intFromBool(needs_sep) + path.len; + if (len >= max_path_bytes) return error.NameTooLong; + + var i: usize = 0; + @memcpy(buf[i..][0..root.len], root); + i += root.len; + if (needs_sep) { + buf[i] = '/'; + i += 1; + } + @memcpy(buf[i..][0..path.len], path); + buf[len] = 0; + return buf[0..len :0]; +} + +fn rootedPathForDir(buf: *[max_path_bytes:0]u8, dir: Dir, path: []const u8) error{ NameTooLong, BadPathName }![:0]const u8 { + return rootedPath(buf, path, dirRoot(dir)); +} + +fn direntKind(kind: u8) File.Kind { + return switch (kind) { + DT_BLK => .block_device, + DT_CHR => .character_device, + DT_DIR => .directory, + DT_FIFO => .named_pipe, + DT_LNK => .sym_link, + DT_REG => .file, + DT_SOCK => .unix_domain_socket, + DT_WHT => .whiteout, + else => .unknown, + }; +} + fn seekToOffset(fd: c_int, offset: u64) File.SeekError!void { if (offset > std.math.maxInt(c_long)) return error.Unseekable; - if (c.lseek(fd, @intCast(offset), SEEK_SET) < 0) return seekError(); + if (devkit.lseek(fd, @intCast(offset), SEEK_SET) < 0) return seekError(); } fn writeVectors(fd: c_int, header: []const u8, data: []const []const u8, splat: usize) File.WritePositionalError!usize { @@ -743,30 +854,6 @@ fn zPath(buf: *[max_path_bytes:0]u8, path: []const u8) error{ NameTooLong, BadPa return buf[0..path.len :0]; } -fn rootedPath(buf: *[max_path_bytes:0]u8, path: []const u8, root: []const u8) error{ NameTooLong, BadPathName }![:0]const u8 { - if (isAbsoluteOrDevicePath(path) or root.len == 0) return zPath(buf, path); - if (std.mem.indexOfScalar(u8, path, 0) != null) return error.BadPathName; - - const needs_sep = root.len > 0 and !std.mem.endsWith(u8, root, "/") and !std.mem.startsWith(u8, path, "/"); - const len = root.len + @intFromBool(needs_sep) + path.len; - if (len >= max_path_bytes) return error.NameTooLong; - - var i: usize = 0; - @memcpy(buf[i..][0..root.len], root); - i += root.len; - if (needs_sep) { - buf[i] = '/'; - i += 1; - } - @memcpy(buf[i..][0..path.len], path); - buf[len] = 0; - return buf[0..len :0]; -} - -fn rootedPathForDir(buf: *[max_path_bytes:0]u8, dir: AppDir, path: []const u8) error{ NameTooLong, BadPathName }![:0]const u8 { - return rootedPath(buf, path, dirRoot(dir)); -} - fn isAbsoluteOrDevicePath(path: []const u8) bool { if (path.len == 0) return false; if (path[0] == '/') return true; @@ -775,30 +862,6 @@ fn isAbsoluteOrDevicePath(path: []const u8) bool { return colon < slash; } -fn isRomfsPath(path: []const u8) bool { - if (path.len < "romfs:".len) return false; - return std.ascii.eqlIgnoreCase(path[0.."romfs:".len], "romfs:"); -} - -fn ensureDirPath(path: []const u8) Dir.CreateDirPathOpenError!void { - if (path.len == 0) return error.BadPathName; - var path_buffer: [max_path_bytes:0]u8 = undefined; - const full = zPath(&path_buffer, path) catch |err| return err; - const full_len = full.len; - const full_ptr = path_buffer[0..].ptr; - - const start = pathRootEnd(path); - var i = start; - while (i < full_len) : (i += 1) { - if (path_buffer[i] != '/') continue; - if (i == start) continue; - path_buffer[i] = 0; - try createDir(full_ptr); - path_buffer[i] = '/'; - } - try createDir(full_ptr); -} - fn pathRootEnd(path: []const u8) usize { if (std.mem.indexOfScalar(u8, path, ':')) |colon| { if (colon + 1 < path.len and path[colon + 1] == '/') return colon + 2; @@ -807,44 +870,33 @@ fn pathRootEnd(path: []const u8) usize { return if (path.len > 0 and path[0] == '/') 1 else 0; } -fn createDir(path: [*:0]const u8) Dir.CreateDirPathOpenError!void { - if (c.mkdir(path, 0o777) == 0) return; - switch (errno()) { - 17 => return, - 1 => return error.PermissionDenied, - 2 => return error.FileNotFound, - 6 => return error.NoDevice, - 12 => return error.SystemResources, - 13 => return error.AccessDenied, - 20 => return error.NotDir, - 28 => return error.NoSpaceLeft, - 30 => return error.ReadOnlyFileSystem, - 91 => return error.NameTooLong, - 92 => return error.SymLinkLoop, - else => return error.Unexpected, - } +fn createFileAtomicDirError(err: anyerror) Dir.CreateFileAtomicError { + return switch (err) { + error.PathAlreadyExists, error.NotDir => error.NotDir, + error.FileTooBig, error.IsDir, error.DeviceBusy, error.FileLocksUnsupported => error.Unexpected, + else => @errorCast(err), + }; } -fn createSingleDir(path: [*:0]const u8) Dir.CreateDirError!void { - if (c.mkdir(path, 0o777) == 0) return; - switch (errno()) { - 1 => return error.PermissionDenied, - 2 => return error.FileNotFound, - 6 => return error.NoDevice, - 12 => return error.SystemResources, - 13 => return error.AccessDenied, - 17 => return error.PathAlreadyExists, - 20 => return error.NotDir, - 28 => return error.NoSpaceLeft, - 30 => return error.ReadOnlyFileSystem, - 91 => return error.NameTooLong, - 92 => return error.SymLinkLoop, - else => return error.Unexpected, - } +fn errno() c_int { + return devkit.__errno().*; } -fn errno() c_int { - return c.__errno().*; +fn createDirError(code: c_int) Dir.CreateDirError { + return switch (code) { + 1 => error.PermissionDenied, + 2 => error.FileNotFound, + 6 => error.NoDevice, + 12 => error.SystemResources, + 13 => error.AccessDenied, + 17 => error.PathAlreadyExists, + 20 => error.NotDir, + 28 => error.NoSpaceLeft, + 30 => error.ReadOnlyFileSystem, + 91 => error.NameTooLong, + 92 => error.SymLinkLoop, + else => error.Unexpected, + }; } fn accessError(code: c_int) Dir.AccessError { @@ -862,6 +914,22 @@ fn accessError(code: c_int) Dir.AccessError { }; } +fn dirOpenError(code: c_int) Dir.OpenError { + return switch (code) { + 1 => error.PermissionDenied, + 2 => error.FileNotFound, + 6 => error.NoDevice, + 12 => error.SystemResources, + 13 => error.AccessDenied, + 20 => error.NotDir, + 23 => error.ProcessFdQuotaExceeded, + 24 => error.SystemFdQuotaExceeded, + 91 => error.NameTooLong, + 92 => error.SymLinkLoop, + else => error.Unexpected, + }; +} + fn openError(code: c_int) File.OpenError { return switch (code) { 1 => error.PermissionDenied, @@ -901,6 +969,22 @@ fn deleteFileError(code: c_int) Dir.DeleteFileError { }; } +fn deleteDirError(code: c_int) Dir.DeleteDirError { + return switch (code) { + 1 => error.PermissionDenied, + 2 => error.FileNotFound, + 12 => error.SystemResources, + 13 => error.AccessDenied, + 16 => error.FileBusy, + 20 => error.NotDir, + 30 => error.ReadOnlyFileSystem, + 39 => error.DirNotEmpty, + 91 => error.NameTooLong, + 92 => error.SymLinkLoop, + else => error.Unexpected, + }; +} + fn renameError(code: c_int) Dir.RenameError { return switch (code) { 1 => error.PermissionDenied, @@ -945,6 +1029,15 @@ fn readStreamingError() Io.Operation.FileReadStreaming.Error { }; } +fn dirReadError(code: c_int) Dir.Reader.Error { + return switch (code) { + 1 => error.PermissionDenied, + 12 => error.SystemResources, + 13 => error.AccessDenied, + else => error.Unexpected, + }; +} + fn writeError() File.WritePositionalError { return switch (errno()) { 5 => error.InputOutput, diff --git a/src/platform/switch/services.zig b/src/platform/switch/services.zig index ec43ed9..09c05fd 100644 --- a/src/platform/switch/services.zig +++ b/src/platform/switch/services.zig @@ -10,7 +10,25 @@ //! ld picks the first definition seen — to route the entry through //! Aether instead of libnx's nnMain wrapper. -const process_init = @import("../c_process_init.zig"); +const process_init = @import("aether").CProcessInit; +const std = @import("std"); + +pub const os = struct { + pub const PATH_MAX = 1024; + pub const NAME_MAX = 255; +}; + +fn AppRoot() type { + const root = @import("root"); + return if (@hasDecl(root, "main")) root else @import("aether_user_root"); +} + +pub const std_options = if (@hasDecl(AppRoot(), "std_options")) AppRoot().std_options else std.Options{}; +pub const panic = if (@hasDecl(AppRoot(), "panic")) AppRoot().panic else std.debug.no_panic; +pub const std_options_debug_threaded_io = if (@hasDecl(AppRoot(), "std_options_debug_threaded_io")) AppRoot().std_options_debug_threaded_io else null; +pub const std_options_debug_io = if (@hasDecl(AppRoot(), "std_options_debug_io")) AppRoot().std_options_debug_io else std.Io.failing; +const app_std_options_cwd: ?fn () std.Io.Dir = if (@hasDecl(AppRoot(), "std_options_cwd")) AppRoot().std_options_cwd else null; +pub const std_options_cwd = app_std_options_cwd orelse @import("aether").Cio.cwd; comptime { @export(&entry, .{ .name = "main" }); @@ -18,6 +36,6 @@ comptime { fn entry(_: c_int, _: [*c][*c]u8) callconv(.c) c_int { const init = process_init.makeInit(.{ .vector = {} }); - @import("root").main(init) catch return 1; + AppRoot().main(init) catch return 1; return 0; } diff --git a/src/root.zig b/src/root.zig index b1b85cc..0f81917 100644 --- a/src/root.zig +++ b/src/root.zig @@ -13,19 +13,10 @@ pub const ctx_to_self = Util.ctx_to_self; /// Only available when `platform == .psp`; evaluates to `void` otherwise. pub const Psp = if (platform == .psp) @import("platform/psp/psp_dialogs.zig") else void; pub const Cio = if (platform == .nintendo_3ds or platform == .nintendo_switch) @import("platform/c_io.zig") else void; -pub const ThreeDS = if (platform == .nintendo_3ds) @import("platform/3ds/services.zig") else void; - -// Pull in the 3DS / Switch entry shim on those targets. Each shim's -// comptime block `@export`s a C-callable `main` so `-ofmt=c` emits -// the full engine call graph rather than constants-only output. -comptime { - if (platform == .nintendo_3ds) { - _ = @import("platform/3ds/services.zig"); - } - if (platform == .nintendo_switch) { - _ = @import("platform/switch/services.zig"); - } -} +pub const CProcessInit = if (platform == .nintendo_3ds or platform == .nintendo_switch) @import("platform/c_process_init.zig") else void; +pub const ThreeDS = if (platform == .nintendo_3ds) struct { + pub const panic = @import("root").panic; +} else void; /// Comptime-known platform and graphics backend, resolved from build options. /// User code can switch on these for per-platform configuration without diff --git a/src/util/logger.zig b/src/util/logger.zig index 7e2fc19..2e1b7e9 100644 --- a/src/util/logger.zig +++ b/src/util/logger.zig @@ -5,6 +5,7 @@ var log_buffer: [4096]u8 = @splat(0); var file_log: std.Io.File = undefined; var file_writer: std.Io.File.Writer = undefined; var writer: *std.Io.Writer = undefined; +var file_logging = false; /// PSP has no per-user data dir concept; the log sits at CWD (which is /// where the EBOOT lives) regardless of what `data_dir` points at. Every @@ -19,11 +20,14 @@ pub fn init(io: std.Io, data_dir: anytype) !void { } file_writer = file_log.writer(io, &log_buffer); writer = &file_writer.interface; + file_logging = true; } pub fn deinit(io: std.Io) void { + if (!file_logging) return; writer.flush() catch {}; file_log.close(io); + file_logging = false; } pub fn aether_log_fn( @@ -36,6 +40,6 @@ pub fn aether_log_fn( const prefix = scope_prefix ++ "[" ++ comptime level.asText() ++ "]: "; - writer.print(prefix ++ format ++ "\n", args) catch {}; + if (file_logging) writer.print(prefix ++ format ++ "\n", args) catch {}; std.debug.print(prefix ++ format ++ "\n", args); } From b7e0ab7a3dff15c054469380b223e16df6ee4af6 Mon Sep 17 00:00:00 2001 From: Nathan Bourgeois Date: Tue, 2 Jun 2026 08:05:50 -0400 Subject: [PATCH 16/44] Fix seeking --- src/platform/c_io.zig | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/src/platform/c_io.zig b/src/platform/c_io.zig index abaa6bd..f31ce75 100644 --- a/src/platform/c_io.zig +++ b/src/platform/c_io.zig @@ -28,8 +28,6 @@ const devkit = struct { extern "c" fn open(path: [*:0]const u8, flags: c_int, ...) c_int; extern "c" fn mkdir(path: [*:0]const u8, mode: c_int) c_int; extern "c" fn readdir(dirp: *c.DIR) ?*CDirent; - extern "c" fn lseek(fd: c_int, offset: c_long, whence: c_int) c_long; - extern "c" fn ftruncate(fd: c_int, length: c_long) c_int; extern "c" fn __errno() *c_int; }; const max_path_bytes = 1024; @@ -465,11 +463,11 @@ fn fileStat(_: ?*anyopaque, file: File) File.StatError!File.Stat { fn fileLength(_: ?*anyopaque, file: File) File.LengthError!u64 { const fd = fdForRegular(file); - const current = devkit.lseek(fd, 0, SEEK_CUR); + const current = c.lseek(fd, 0, SEEK_CUR); if (current < 0) return seekToLengthError(); - const end = devkit.lseek(fd, 0, SEEK_END); + const end = c.lseek(fd, 0, SEEK_END); if (end < 0) return seekToLengthError(); - _ = devkit.lseek(fd, current, SEEK_SET); + _ = c.lseek(fd, current, SEEK_SET); return @intCast(end); } @@ -559,7 +557,7 @@ fn fileWriteStreaming(file: File, header: []const u8, data: []const []const u8, } fn fileSeekBy(_: ?*anyopaque, file: File, relative_offset: i64) File.SeekError!void { - if (devkit.lseek(fdForRegular(file), @intCast(relative_offset), SEEK_CUR) < 0) return seekError(); + if (c.lseek(fdForRegular(file), @intCast(relative_offset), SEEK_CUR) < 0) return seekError(); } fn fileSeekTo(_: ?*anyopaque, file: File, absolute_offset: u64) File.SeekError!void { @@ -584,8 +582,8 @@ fn fileSupportsAnsiEscapeCodes(_: ?*anyopaque, _: File) Io.Cancelable!bool { } fn fileSetLength(_: ?*anyopaque, file: File, length: u64) File.SetLengthError!void { - if (length > std.math.maxInt(c_long)) return error.FileTooBig; - if (devkit.ftruncate(fdForRegular(file), @intCast(length)) < 0) return setLengthError(); + if (length > std.math.maxInt(c.off_t)) return error.FileTooBig; + if (c.ftruncate(fdForRegular(file), @intCast(length)) < 0) return setLengthError(); } fn lockStderr(_: ?*anyopaque, terminal_mode: ?Io.Terminal.Mode) Io.Cancelable!Io.LockedStderr { @@ -817,8 +815,8 @@ fn direntKind(kind: u8) File.Kind { } fn seekToOffset(fd: c_int, offset: u64) File.SeekError!void { - if (offset > std.math.maxInt(c_long)) return error.Unseekable; - if (devkit.lseek(fd, @intCast(offset), SEEK_SET) < 0) return seekError(); + if (offset > std.math.maxInt(c.off_t)) return error.Unseekable; + if (c.lseek(fd, @intCast(offset), SEEK_SET) < 0) return seekError(); } fn writeVectors(fd: c_int, header: []const u8, data: []const []const u8, splat: usize) File.WritePositionalError!usize { From 0d1d96c3465a7af855786d4c9990ceaca101bb32 Mon Sep 17 00:00:00 2001 From: Nathan Bourgeois Date: Tue, 2 Jun 2026 08:21:31 -0400 Subject: [PATCH 17/44] Fix crashes --- src/core/input/input.zig | 16 +++++++++++++--- src/platform/3ds/3ds_gfx.zig | 36 +++++++++++++++++++++++------------- 2 files changed, 36 insertions(+), 16 deletions(-) diff --git a/src/core/input/input.zig b/src/core/input/input.zig index 9d23303..978655a 100644 --- a/src/core/input/input.zig +++ b/src/core/input/input.zig @@ -277,7 +277,7 @@ pub fn register_action_set(name: []const u8) !ActionSetHandle { pub fn add_action(set: ActionSetHandle, name: []const u8, kind: ActionKind) !void { const s = set_ptr(set) orelse return error.UnknownActionSet; - if (s.actions.contains(name)) return error.ActionAlreadyExists; + if (action_ptr(s, name) != null) return error.ActionAlreadyExists; try s.actions.put(alloc, name, .{ .kind = kind, .bindings = .empty, @@ -288,7 +288,7 @@ pub fn add_action(set: ActionSetHandle, name: []const u8, kind: ActionKind) !voi pub fn bind_action(set: ActionSetHandle, action_name: []const u8, b: Binding) !void { const s = set_ptr(set) orelse return error.UnknownActionSet; - const a = s.actions.getPtr(action_name) orelse return error.ActionNotFound; + const a = action_ptr(s, action_name) orelse return error.ActionNotFound; if (a.kind == .vector2 and b.component == .none) return error.Vector2BindingNeedsComponent; try a.bindings.append(alloc, b); } @@ -310,7 +310,7 @@ pub fn get_action(name: []const u8) ?ActionValue { const top = stack.top() orelse return null; const s = set_ptr(top.actions) orelse return null; if (!s.installed) return null; - const a = s.actions.getPtr(name) orelse return null; + const a = action_ptr(s, name) orelse return null; return a.current_value; } @@ -479,6 +479,16 @@ fn set_ptr_or_null(handle: ActionSetHandle) ?*ActionSet { return set_ptr(handle); } +fn action_ptr(set: *ActionSet, name: []const u8) ?*Action { + if (set.actions.getPtr(name)) |action| return action; + + var it = set.actions.iterator(); + while (it.next()) |entry| { + if (std.mem.eql(u8, entry.key_ptr.*, name)) return entry.value_ptr; + } + return null; +} + fn route_text_to_session(text: []const u8) void { const top = stack.top() orelse return; if (!top.consumes_text) return; diff --git a/src/platform/3ds/3ds_gfx.zig b/src/platform/3ds/3ds_gfx.zig index 7cf5e14..f928d85 100644 --- a/src/platform/3ds/3ds_gfx.zig +++ b/src/platform/3ds/3ds_gfx.zig @@ -469,7 +469,7 @@ pub fn create_pipeline(layout: Pipeline.VertexLayout, v_shader: ?[:0]align(4) co } pub fn destroy_pipeline(handle: Pipeline.Handle) void { - var pl = pipelines.get_element(handle) orelse return; + const pl = get_pipeline_ptr(handle) orelse return; _ = shaderProgramFree(&pl.program); DVLB_Free(pl.dvlb); _ = pipelines.remove_element(handle); @@ -481,26 +481,25 @@ pub fn bind_pipeline(handle: Pipeline.Handle) void { } pub fn create_mesh(pipeline: Pipeline.Handle) anyerror!Mesh.Handle { - _ = pipelines.get_element(pipeline) orelse return error.InvalidPipeline; + _ = get_pipeline_ptr(pipeline) orelse return error.InvalidPipeline; const handle = meshes.add_element(.{ .pipeline = pipeline }) orelse return error.OutOfMeshes; return @intCast(handle); } pub fn destroy_mesh(handle: Mesh.Handle) void { - var mesh = meshes.get_element(handle) orelse return; - free_mesh_vertices(&mesh); + const mesh = get_mesh_ptr(handle) orelse return; + free_mesh_vertices(mesh); _ = meshes.remove_element(handle); } pub fn update_mesh(handle: Mesh.Handle, data: []const u8) void { - var mesh = meshes.get_element(handle) orelse return; + const mesh = get_mesh_ptr(handle) orelse return; if (data.len > mesh.capacity) { - free_mesh_vertices(&mesh); + free_mesh_vertices(mesh); const bytes = render_alloc.alloc(u8, data.len) catch { mesh.len = 0; mesh.capacity = 0; - meshes.update_element(handle, mesh); return; }; mesh.ptr = bytes.ptr; @@ -511,16 +510,15 @@ pub fn update_mesh(handle: Mesh.Handle, data: []const u8) void { @memcpy(ptr[0..data.len], data); } mesh.len = data.len; - meshes.update_element(handle, mesh); } pub fn draw_mesh(handle: Mesh.Handle, model: *const Mat4, count: usize, primitive: Mesh.Primitive) void { if (!initialized) return; - const mesh = meshes.get_element(handle) orelse return; + const mesh = get_mesh_ptr(handle) orelse return; const ptr = mesh.ptr orelse return; const pipeline_handle = if (current_pipeline != 0) current_pipeline else mesh.pipeline; - var pl = pipelines.get_element(pipeline_handle) orelse return; + const pl = get_pipeline_ptr(pipeline_handle) orelse return; const available_count = if (pl.stride == 0) 0 else mesh.len / pl.stride; const draw_count = @min(count, available_count); if (draw_count == 0) return; @@ -545,7 +543,7 @@ pub fn draw_mesh(handle: Mesh.Handle, model: *const Mat4, count: usize, primitiv switch (primitive) { .triangles => { for (0..draw_count) |i| { - const vertex = decode_mesh_vertex(ptr, i, pl); + const vertex = decode_mesh_vertex(ptr, i, pl.*); out[i] = to_gpu_vertex(to_screen_vertex(vertex, &mvp)); } written_count = draw_count; @@ -554,8 +552,8 @@ pub fn draw_mesh(handle: Mesh.Handle, model: *const Mat4, count: usize, primitiv var src_i: usize = 0; var dst_i: usize = 0; while (src_i + 1 < draw_count) : (src_i += 2) { - const a = decode_mesh_vertex(ptr, src_i, pl); - const b = decode_mesh_vertex(ptr, src_i + 1, pl); + const a = decode_mesh_vertex(ptr, src_i, pl.*); + const b = decode_mesh_vertex(ptr, src_i + 1, pl.*); dst_i = write_line_segment(out, dst_i, a, b, &mvp); } written_count = dst_i; @@ -971,6 +969,18 @@ fn get_texture_ptr(handle: Texture.Handle) ?*TextureData { return null; } +fn get_pipeline_ptr(handle: Pipeline.Handle) ?*PipelineData { + if (handle == 0 or handle >= pipelines.buffer.len) return null; + if (pipelines.buffer[handle]) |*pl| return pl; + return null; +} + +fn get_mesh_ptr(handle: Mesh.Handle) ?*MeshData { + if (handle == 0 or handle >= meshes.buffer.len) return null; + if (meshes.buffer[handle]) |*mesh| return mesh; + return null; +} + fn texture_dim(value: u32) !u16 { if (value == 0 or value > MAX_TEXTURE_SIZE) return error.InvalidTextureSize; From b580843ebf524347ccbfe2782a39236d5bb914dd Mon Sep 17 00:00:00 2001 From: Nathan Bourgeois Date: Wed, 3 Jun 2026 01:04:33 -0400 Subject: [PATCH 18/44] Fix high FPS input --- src/engine.zig | 13 +++++++++---- src/platform/glfw/input.zig | 7 +++++-- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/src/engine.zig b/src/engine.zig index 1d127d0..340f627 100644 --- a/src/engine.zig +++ b/src/engine.zig @@ -273,9 +273,7 @@ pub const Engine = struct { tick_accum = saturatingAddI64(tick_accum, frame_dt_us); const platform_start_ns = clock.now(self.io).toNanoseconds(); - Platform.input.update(); Platform.update(self); - Core.input.update(); const platform_done_ns = clock.now(self.io).toNanoseconds(); var pre_update_elapsed_ns = elapsedNsBetween(platform_start_ns, platform_done_ns); if (!self.running) break; @@ -299,10 +297,17 @@ pub const Engine = struct { while (update_accum >= UPDATE_US) { @branchHint(.unpredictable); + const input_start_ns = clock.now(self.io).toNanoseconds(); + Platform.input.update(); + Core.input.update(); + const input_done_ns = clock.now(self.io).toNanoseconds(); + const engine_elapsed_ns = saturatingAddI64(pre_update_elapsed_ns, elapsedNsBetween(input_start_ns, input_done_ns)); + if (!self.running) break; + const budget = Util.BudgetContext{ .phase_budget_ns = update_budget_ns, - .engine_elapsed_ns = pre_update_elapsed_ns, - .remaining_ns = update_budget_ns - pre_update_elapsed_ns, + .engine_elapsed_ns = engine_elapsed_ns, + .remaining_ns = update_budget_ns - engine_elapsed_ns, .is_tick_frame = is_tick_frame, .tick_cost_ns = tick_cost_ns, .safety_margin_ns = Util.BudgetContext.DEFAULT_SAFETY_MARGIN_NS, diff --git a/src/platform/glfw/input.zig b/src/platform/glfw/input.zig index 0241174..ad332c7 100644 --- a/src/platform/glfw/input.zig +++ b/src/platform/glfw/input.zig @@ -148,7 +148,10 @@ export fn cursor_pos_callback(window: *glfw.Window, xpos: f64, ypos: f64) callco prev_cursor_x = xpos; prev_cursor_y = ypos; have_prev_cursor = true; - // GLFW gives window coords; rest of the engine works in framebuffer pixels. + // GLFW positions are window coordinates, while rendering/UI picking uses + // framebuffer pixels. Keep the absolute position scaled, but leave the + // relative delta in GLFW cursor-motion units so mouse-look is not warped + // or quantized by DPI/content-scale changes. var win_w: c_int = 0; var win_h: c_int = 0; var fb_w: c_int = 0; @@ -159,7 +162,7 @@ export fn cursor_pos_callback(window: *glfw.Window, xpos: f64, ypos: f64) callco const sy = @as(f64, @floatFromInt(fb_h)) / @as(f64, @floatFromInt(win_h)); core.deliver_mouse_move( .{ .x = @floatCast(xpos * sx), .y = @floatCast(ypos * sy) }, - .{ .x = @floatCast(dx * sx), .y = @floatCast(dy * sy) }, + .{ .x = @floatCast(dx), .y = @floatCast(dy) }, ); } From 7556d5feb8e70af15b60de6727328e53f2d17d85 Mon Sep 17 00:00:00 2001 From: Nathan Bourgeois Date: Wed, 3 Jun 2026 05:41:48 -0400 Subject: [PATCH 19/44] move matmul to shader --- build.zig | 43 +++- src/platform/3ds/3ds_gfx.zig | 470 +++++++++++++++++------------------ 2 files changed, 256 insertions(+), 257 deletions(-) diff --git a/build.zig b/build.zig index b8676ca..1805b83 100644 --- a/build.zig +++ b/build.zig @@ -1244,10 +1244,15 @@ pub fn addShader(owner: *std.Build, b: *std.Build, exe: *std.Build.Step.Compile, const picasso = b.pathJoin(&.{ devkitProPath(b), "tools/bin/picasso" }); const sources = b.addWriteFiles(); const vert_src = sources.add(name ++ "_3ds.v.pica", - \\.fvec projection[4] + \\.fvec projection[4], modelView[4], screenProjection[4] + \\.fvec posScale, uvScaleOffset, colorScale \\ - \\.constf myconst(0.0, 1.0, 0.0, 0.0) + \\.constf myconst(0.0, 1.0, -1.0, 0.0) + \\.constf viewport(200.0, 120.0, 0.0, 0.0) + \\.alias zeros myconst.xxxx \\.alias ones myconst.yyyy + \\.alias negOnes myconst.zzzz + \\.alias halfViewport viewport.xyyy \\ \\.out outpos position \\.out outtc0 texcoord0 @@ -1258,16 +1263,36 @@ pub fn addShader(owner: *std.Build, b: *std.Build, exe: *std.Build.Step.Compile, \\.alias inclr v2 \\ \\.proc main - \\ mov r0.xyz, inpos + \\ mul r0.xyz, posScale, inpos \\ mov r0.w, ones \\ - \\ dp4 outpos.x, projection[0], r0 - \\ dp4 outpos.y, projection[1], r0 - \\ dp4 outpos.z, projection[2], r0 - \\ dp4 outpos.w, projection[3], r0 + \\ dp4 r1.x, modelView[0], r0 + \\ dp4 r1.y, modelView[1], r0 + \\ dp4 r1.z, modelView[2], r0 + \\ dp4 r1.w, modelView[3], r0 \\ - \\ mov outtc0, inuv - \\ mov outclr, inclr + \\ dp4 r2.x, projection[0], r1 + \\ dp4 r2.y, projection[1], r1 + \\ dp4 r2.z, projection[2], r1 + \\ dp4 r2.w, projection[3], r1 + \\ + \\ add r3.x, r2.x, r2.w + \\ mul r3.x, halfViewport.x, r3.x + \\ add r3.y, r2.y, r2.w + \\ mul r3.y, halfViewport.y, r3.y + \\ mul r3.z, negOnes.z, r2.z + \\ add r3.z, r2.w, r3.z + \\ mov r3.w, r2.w + \\ + \\ dp4 outpos.x, screenProjection[0], r3 + \\ dp4 outpos.y, screenProjection[1], r3 + \\ dp4 outpos.z, screenProjection[2], r3 + \\ dp4 outpos.w, screenProjection[3], r3 + \\ + \\ mul outtc0.xy, uvScaleOffset.xy, inuv.xy + \\ add outtc0.xy, uvScaleOffset.zw, outtc0.xy + \\ mov outtc0.zw, zeros + \\ mul outclr, colorScale, inclr \\ end \\.end \\ diff --git a/src/platform/3ds/3ds_gfx.zig b/src/platform/3ds/3ds_gfx.zig index f928d85..ebf43bc 100644 --- a/src/platform/3ds/3ds_gfx.zig +++ b/src/platform/3ds/3ds_gfx.zig @@ -148,6 +148,7 @@ extern fn C3D_GetTexEnv(id: c_int) *C3D_TexEnv; extern fn C3D_DirtyTexEnv(env: *C3D_TexEnv) void; extern fn C3D_CullFace(mode: c_int) void; extern fn C3D_DepthTest(enable: bool, function: c_int, writemask: c_int) void; +extern fn C3D_AlphaTest(enable: bool, function: c_int, ref: c_int) void; extern fn C3D_AlphaBlend(colorEq: c_int, alphaEq: c_int, srcClr: c_int, dstClr: c_int, srcAlpha: c_int, dstAlpha: c_int) void; extern fn C3D_DrawArrays(primitive: c_int, first: c_int, size: c_int) void; extern fn C3D_TexInitWithParams(tex: *C3D_Tex, cube: ?*anyopaque, params: u64) bool; @@ -181,10 +182,14 @@ const C3D_CLEAR_ALL = C3D_CLEAR_COLOR | C3D_CLEAR_DEPTH; const C3D_FVUNIF_COUNT = 96; const GPU_VERTEX_SHADER = 0; +const GPU_BYTE = 0; +const GPU_UNSIGNED_BYTE = 1; +const GPU_SHORT = 2; const GPU_FLOAT = 3; const GPU_RB_RGBA8 = 0; const GPU_RB_DEPTH24_STENCIL8 = 3; const GPU_ALWAYS = 1; +const GPU_GREATER = 6; const GPU_GEQUAL = 7; const GPU_WRITE_COLOR = 0x0F; const GPU_WRITE_DEPTH = 0x10; @@ -197,6 +202,7 @@ const GPU_SRC_ALPHA = 6; const GPU_ONE_MINUS_SRC_ALPHA = 7; const GPU_PRIMARY_COLOR = 0x00; const GPU_TEXTURE0 = 0x03; +const GPU_PREVIOUS = 0x0F; const GPU_REPLACE = 0x00; const GPU_MODULATE = 0x01; const GPU_TEVSCALE_1 = 0x0; @@ -218,33 +224,30 @@ const DISPLAY_TRANSFER_FLAGS = GX_TRANSFER_FMT_RGB8 << 12; const TOP_SCREEN_WIDTH: f32 = 400.0; const TOP_SCREEN_HEIGHT: f32 = 240.0; const TEXTURE_BPP: usize = 4; +const DEPTH_CLEAR: u32 = 0; +const ALPHA_REF: c_int = 26; +const LINEAR_MESH_MIN_CAPACITY: usize = 256; const MIN_TEXTURE_SIZE: u32 = 8; const SMALL_TEXTURE_EXPAND_SIZE: u32 = 32; const MAX_TEXTURE_SIZE: u32 = 1024; -const LINE_WIDTH: f32 = 1.5; -const DEBUG_UV_AS_COLOR = false; const DEBUG_TEXTURE_ONLY = false; -const ConvertedVertex = struct { - pos: [4]f32, - color: [4]f32, - uv: [2]f32, -}; - -const GpuVertex = extern struct { - pos: [4]f32, - uv: [2]f32, - color: [4]f32, -}; - const PipelineData = struct { program: ShaderProgram, dvlb: *DVLB, stride: usize, position_attr: Pipeline.Attribute, - color_attr: ?Pipeline.Attribute, - uv_attr: ?Pipeline.Attribute, + uv_attr: Pipeline.Attribute, + color_attr: Pipeline.Attribute, projection_loc: i8, + model_view_loc: i8, + screen_projection_loc: i8, + pos_scale_loc: i8, + uv_scale_offset_loc: i8, + color_scale_loc: i8, + pos_scale: [3]f32, + uv_attr_scale: [2]f32, + color_scale: [4]f32, }; const MeshData = struct { @@ -264,7 +267,6 @@ const TextureData = struct { tex: C3D_Tex, }; -var render_alloc: std.mem.Allocator = undefined; var render_io: std.Io = undefined; var initialized: bool = false; @@ -281,16 +283,13 @@ var fog_lut: C3D_FogLut = undefined; var white_texture: C3D_Tex = undefined; var white_texture_ready: bool = false; var bound_texture: Texture.Handle = 0; -var draw_vbo_raw: ?*anyopaque = null; -var draw_vbo: ?[*]GpuVertex = null; -var draw_vbo_capacity: usize = 0; var pipelines = Util.CircularBuffer(PipelineData, 16).init(); var meshes = Util.CircularBuffer(MeshData, 2048).init(); var textures = Util.CircularBuffer(TextureData, 64).init(); pub fn setup(alloc: std.mem.Allocator, io: std.Io) void { - render_alloc = alloc; + _ = alloc; render_io = io; } @@ -313,7 +312,6 @@ pub fn init() anyerror!void { C3D_RenderTargetSetOutput(target, GFX_TOP, GFX_LEFT, DISPLAY_TRANSFER_FLAGS); Mtx_OrthoTilt(&screen_projection, 0.0, TOP_SCREEN_WIDTH, 0.0, TOP_SCREEN_HEIGHT, 0.0, 1.0, true); - configure_fixed_attributes(); configure_texture_texenv(); try init_white_texture(); C3D_CullFace(GPU_CULL_NONE); @@ -328,7 +326,6 @@ pub fn deinit() void { destroy_all_meshes(); destroy_all_pipelines(); destroy_all_textures(); - free_draw_vbo(); current_pipeline = 0; bound_texture = 0; @@ -359,8 +356,10 @@ pub fn set_clear_color(r: f32, g: f32, b: f32, a: f32) void { pub fn set_alpha_blend(enabled: bool) void { if (enabled) { C3D_AlphaBlend(GPU_BLEND_ADD, GPU_BLEND_ADD, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA); + C3D_AlphaTest(true, GPU_GREATER, ALPHA_REF); } else { C3D_AlphaBlend(GPU_BLEND_ADD, GPU_BLEND_ADD, GPU_ONE, GPU_ZERO, GPU_ONE, GPU_ZERO); + C3D_AlphaTest(false, GPU_ALWAYS, 0); } } @@ -414,7 +413,7 @@ pub fn start_frame() bool { const flags: u8 = if (vsync_enabled) C3D_FRAME_SYNCDRAW else 0; if (!C3D_FrameBegin(flags)) return false; - render_target_clear(t, C3D_CLEAR_ALL, clear_color, 0); + render_target_clear(t, C3D_CLEAR_ALL, clear_color, DEPTH_CLEAR); if (!C3D_FrameDrawOn(t)) { C3D_FrameEnd(0); return false; @@ -429,7 +428,7 @@ pub fn end_frame() void { } pub fn clear_depth() void { - if (target) |t| render_target_clear(t, C3D_CLEAR_DEPTH, clear_color, 0); + if (target) |t| render_target_clear(t, C3D_CLEAR_DEPTH, clear_color, DEPTH_CLEAR); } pub fn set_vsync(v: bool) void { @@ -451,17 +450,44 @@ pub fn create_pipeline(layout: Pipeline.VertexLayout, v_shader: ?[:0]align(4) co const vertex_shader = program.vertexShader orelse return error.InvalidShader; const projection_loc = shaderInstanceGetUniformLocation(vertex_shader, "projection"); - if (projection_loc < 0) return error.InvalidShader; + const model_view_loc = shaderInstanceGetUniformLocation(vertex_shader, "modelView"); + const screen_projection_loc = shaderInstanceGetUniformLocation(vertex_shader, "screenProjection"); + const pos_scale_loc = shaderInstanceGetUniformLocation(vertex_shader, "posScale"); + const uv_scale_offset_loc = shaderInstanceGetUniformLocation(vertex_shader, "uvScaleOffset"); + const color_scale_loc = shaderInstanceGetUniformLocation(vertex_shader, "colorScale"); + if (projection_loc < 0 or + model_view_loc < 0 or + screen_projection_loc < 0 or + pos_scale_loc < 0 or + uv_scale_offset_loc < 0 or + color_scale_loc < 0) + { + return error.InvalidShader; + } const position_attr = find_attr(layout, .position) orelse return error.UnsupportedVertexLayout; + const uv_attr = find_attr(layout, .uv) orelse return error.UnsupportedVertexLayout; + const color_attr = find_attr(layout, .color) orelse return error.UnsupportedVertexLayout; + const pos_scale = position_scale(position_attr) orelse return error.UnsupportedVertexLayout; + const uv_attr_scale = uv_scale(uv_attr) orelse return error.UnsupportedVertexLayout; + const color_attr_scale = color_scale(color_attr) orelse return error.UnsupportedVertexLayout; + if (!direct_layout_supported(layout.stride, position_attr, uv_attr, color_attr)) return error.UnsupportedVertexLayout; const data = PipelineData{ .program = program, .dvlb = dvlb, .stride = layout.stride, .position_attr = position_attr, - .color_attr = find_attr(layout, .color), - .uv_attr = find_attr(layout, .uv), + .uv_attr = uv_attr, + .color_attr = color_attr, .projection_loc = projection_loc, + .model_view_loc = model_view_loc, + .screen_projection_loc = screen_projection_loc, + .pos_scale_loc = pos_scale_loc, + .uv_scale_offset_loc = uv_scale_offset_loc, + .color_scale_loc = color_scale_loc, + .pos_scale = pos_scale, + .uv_attr_scale = uv_attr_scale, + .color_scale = color_attr_scale, }; const handle = pipelines.add_element(data) orelse return error.OutOfPipelines; @@ -497,7 +523,8 @@ pub fn update_mesh(handle: Mesh.Handle, data: []const u8) void { if (data.len > mesh.capacity) { free_mesh_vertices(mesh); - const bytes = render_alloc.alloc(u8, data.len) catch { + const new_capacity = linear_mesh_capacity(data.len); + const bytes = alloc_linear_bytes(new_capacity) catch { mesh.len = 0; mesh.capacity = 0; return; @@ -508,12 +535,16 @@ pub fn update_mesh(handle: Mesh.Handle, data: []const u8) void { if (mesh.ptr) |ptr| { @memcpy(ptr[0..data.len], data); + if (data.len > 0) { + _ = GSPGPU_FlushDataCache(@ptrCast(&ptr[0]), @intCast(data.len)); + } } mesh.len = data.len; } pub fn draw_mesh(handle: Mesh.Handle, model: *const Mat4, count: usize, primitive: Mesh.Primitive) void { if (!initialized) return; + if (primitive == .lines) return; const mesh = get_mesh_ptr(handle) orelse return; const ptr = mesh.ptr orelse return; @@ -523,48 +554,22 @@ pub fn draw_mesh(handle: Mesh.Handle, model: *const Mat4, count: usize, primitiv const draw_count = @min(count, available_count); if (draw_count == 0) return; - const view_proj = Mat4.mul(current_view, current_proj); - const mvp = Mat4.mul(model.*, view_proj); + const model_view = Mat4.mul(model.*, current_view); C3D_BindProgram(&pl.program); - configure_fixed_attributes(); + configure_fixed_attributes(pl.*); configure_texture_texenv(); bind_current_texture_for_draw(); - upload_matrix_uniform(pl.projection_loc, &screen_projection); - - const vbo_count = switch (primitive) { - .triangles => draw_count, - .lines => (draw_count / 2) * 6, - }; - if (vbo_count == 0) return; - const out = prepare_draw_vbo(vbo_count) orelse return; - - var written_count: usize = 0; - switch (primitive) { - .triangles => { - for (0..draw_count) |i| { - const vertex = decode_mesh_vertex(ptr, i, pl.*); - out[i] = to_gpu_vertex(to_screen_vertex(vertex, &mvp)); - } - written_count = draw_count; - }, - .lines => { - var src_i: usize = 0; - var dst_i: usize = 0; - while (src_i + 1 < draw_count) : (src_i += 2) { - const a = decode_mesh_vertex(ptr, src_i, pl.*); - const b = decode_mesh_vertex(ptr, src_i + 1, pl.*); - dst_i = write_line_segment(out, dst_i, a, b, &mvp); - } - written_count = dst_i; - }, - } - if (written_count == 0) return; + apply_depth_state(); + upload_aether_matrix_uniform(pl.projection_loc, ¤t_proj); + upload_aether_matrix_uniform(pl.model_view_loc, &model_view); + upload_c3d_matrix_uniform(pl.screen_projection_loc, &screen_projection); + upload_vec_uniform(pl.pos_scale_loc, .{ pl.pos_scale[0], pl.pos_scale[1], pl.pos_scale[2], 1.0 }); + upload_uv_uniform(pl.*); + upload_vec_uniform(pl.color_scale_loc, pl.color_scale); - const draw_vertices = out[0..written_count]; - flush_draw_vbo(draw_vertices); - configure_draw_buffer(out.ptr); - C3D_DrawArrays(GPU_TRIANGLES, 0, @intCast(draw_vertices.len)); + if (!configure_draw_buffer(ptr, pl.*)) return; + C3D_DrawArrays(GPU_TRIANGLES, 0, @intCast(draw_count)); } pub fn create_texture(width: u32, height: u32, data: []align(16) u8) anyerror!Texture.Handle { @@ -572,8 +577,8 @@ pub fn create_texture(width: u32, height: u32, data: []align(16) u8) anyerror!Te const tex_width: u16 = if (expand_small) @intCast(SMALL_TEXTURE_EXPAND_SIZE) else try texture_dim(width); const tex_height: u16 = if (expand_small) @intCast(SMALL_TEXTURE_EXPAND_SIZE) else try texture_dim(height); const upload_len = @as(usize, tex_width) * @as(usize, tex_height) * TEXTURE_BPP; - const upload_data = try render_alloc.alignedAlloc(u8, .fromByteUnits(16), upload_len); - errdefer render_alloc.free(upload_data); + const upload_data = try alloc_linear_bytes(upload_len); + errdefer free_linear_bytes(upload_data); var tex: C3D_Tex = undefined; if (!tex_init(&tex, tex_width, tex_height, false)) return error.TextureCreateFailed; @@ -613,7 +618,7 @@ pub fn bind_texture(handle: Texture.Handle) void { pub fn destroy_texture(handle: Texture.Handle) void { const tex = get_texture_ptr(handle) orelse return; C3D_TexDelete(&tex.tex); - render_alloc.free(tex.upload_data); + free_linear_bytes(tex.upload_data); _ = textures.remove_element(handle); if (bound_texture == handle) bound_texture = 0; } @@ -630,29 +635,44 @@ fn apply_depth_state() void { C3D_DepthTest(true, GPU_GEQUAL, mask); } -fn configure_fixed_attributes() void { +fn configure_fixed_attributes(pl: PipelineData) void { const attr = C3D_GetAttrInfo(); AttrInfo_Init(attr); - _ = AttrInfo_AddLoader(attr, 0, GPU_FLOAT, 4); - _ = AttrInfo_AddLoader(attr, 1, GPU_FLOAT, 2); - _ = AttrInfo_AddLoader(attr, 2, GPU_FLOAT, 4); + add_attr_loader(attr, 0, pl.position_attr); + add_attr_loader(attr, 1, pl.uv_attr); + add_attr_loader(attr, 2, pl.color_attr); +} + +fn add_attr_loader(info: *C3D_AttrInfo, reg_id: c_int, attr: Pipeline.Attribute) void { + const fmt = gpu_attribute_format(attr.format); + _ = AttrInfo_AddLoader(info, reg_id, fmt, @intCast(attr.size)); } -fn configure_draw_buffer(ptr: [*]GpuVertex) void { +fn configure_draw_buffer(ptr: [*]u8, pl: PipelineData) bool { const buf = C3D_GetBufInfo(); BufInfo_Init(buf); - _ = BufInfo_Add(buf, @ptrCast(&ptr[0]), @intCast(@sizeOf(GpuVertex)), 3, 0x210); + if (!add_attr_buffer(buf, ptr, pl.stride, pl.position_attr, 0)) return false; + if (!add_attr_buffer(buf, ptr, pl.stride, pl.uv_attr, 1)) return false; + if (!add_attr_buffer(buf, ptr, pl.stride, pl.color_attr, 2)) return false; + return true; +} + +fn add_attr_buffer(buf: *C3D_BufInfo, ptr: [*]u8, stride: usize, attr: Pipeline.Attribute, reg_id: u64) bool { + const result = BufInfo_Add(buf, @ptrCast(&ptr[attr.offset]), @intCast(stride), 1, reg_id); + if (result < 0) { + BufInfo_Init(buf); + return false; + } + return true; } fn configure_texture_texenv() void { const env = C3D_GetTexEnv(0); - const src = if (DEBUG_UV_AS_COLOR) - tev_sources(GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR) - else if (DEBUG_TEXTURE_ONLY) + const src = if (DEBUG_TEXTURE_ONLY) tev_sources(GPU_TEXTURE0, GPU_TEXTURE0, GPU_TEXTURE0) else tev_sources(GPU_TEXTURE0, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR); - const func = if (DEBUG_UV_AS_COLOR or DEBUG_TEXTURE_ONLY) GPU_REPLACE else GPU_MODULATE; + const func = if (DEBUG_TEXTURE_ONLY) GPU_REPLACE else GPU_MODULATE; env.* = .{ .srcRgb = src, @@ -665,6 +685,26 @@ fn configure_texture_texenv() void { .scaleAlpha = GPU_TEVSCALE_1, }; C3D_DirtyTexEnv(env); + + var stage: c_int = 1; + while (stage < 6) : (stage += 1) { + configure_passthrough_texenv(stage); + } +} + +fn configure_passthrough_texenv(stage: c_int) void { + const env = C3D_GetTexEnv(stage); + env.* = .{ + .srcRgb = tev_sources(GPU_PREVIOUS, 0, 0), + .srcAlpha = tev_sources(GPU_PREVIOUS, 0, 0), + .opAll = 0, + .funcRgb = GPU_REPLACE, + .funcAlpha = GPU_REPLACE, + .color = 0xFFFFFFFF, + .scaleRgb = GPU_TEVSCALE_1, + .scaleAlpha = GPU_TEVSCALE_1, + }; + C3D_DirtyTexEnv(env); } fn tev_sources(a: u16, b: u16, c: u16) u16 { @@ -682,7 +722,12 @@ fn bind_current_texture_for_draw() void { } } -fn upload_matrix_uniform(loc: i8, mat: *const C3D_Mtx) void { +fn upload_aether_matrix_uniform(loc: i8, mat: *const Mat4) void { + const c3d = mat4_to_c3d(mat); + upload_c3d_matrix_uniform(loc, &c3d); +} + +fn upload_c3d_matrix_uniform(loc: i8, mat: *const C3D_Mtx) void { if (loc < 0) return; const base: usize = @intCast(loc); @@ -694,6 +739,39 @@ fn upload_matrix_uniform(loc: i8, mat: *const C3D_Mtx) void { } } +fn upload_vec_uniform(loc: i8, v: [4]f32) void { + if (loc < 0) return; + + const base: usize = @intCast(loc); + if (base >= C3D_FVUNIF_COUNT) return; + + C3D_FVUnif[GPU_VERTEX_SHADER][base] = fvec(v[0], v[1], v[2], v[3]); + C3D_FVUnifDirty[GPU_VERTEX_SHADER][base] = true; +} + +fn upload_uv_uniform(pl: PipelineData) void { + const texture_scale = if (get_texture_ptr(bound_texture)) |tex| tex.uv_scale else .{ 1.0, 1.0 }; + upload_vec_uniform(pl.uv_scale_offset_loc, .{ + pl.uv_attr_scale[0] * texture_scale[0], + pl.uv_attr_scale[1] * texture_scale[1], + uv_offset[0] * texture_scale[0], + uv_offset[1] * texture_scale[1], + }); +} + +fn mat4_to_c3d(mat: *const Mat4) C3D_Mtx { + return .{ .r = .{ + fvec(mat.data[0][0], mat.data[1][0], mat.data[2][0], mat.data[3][0]), + fvec(mat.data[0][1], mat.data[1][1], mat.data[2][1], mat.data[3][1]), + fvec(mat.data[0][2], mat.data[1][2], mat.data[2][2], mat.data[3][2]), + fvec(mat.data[0][3], mat.data[1][3], mat.data[2][3], mat.data[3][3]), + } }; +} + +fn fvec(x: f32, y: f32, z: f32, w: f32) C3D_FVec { + return .{ .x = x, .y = y, .z = z, .w = w }; +} + fn destroy_all_pipelines() void { for (&pipelines.buffer) |*slot| { if (slot.*) |*pl| { @@ -719,46 +797,16 @@ fn destroy_all_textures() void { for (&textures.buffer) |*slot| { if (slot.*) |*tex| { C3D_TexDelete(&tex.tex); - render_alloc.free(tex.upload_data); + free_linear_bytes(tex.upload_data); slot.* = null; } } textures.clear(); } -fn prepare_draw_vbo(count: usize) ?[]GpuVertex { - if (count > draw_vbo_capacity) { - free_draw_vbo(); - - const bytes = count * @sizeOf(GpuVertex); - const mem = linearAlloc(bytes) orelse return null; - const aligned: *align(@alignOf(GpuVertex)) anyopaque = @alignCast(mem); - const ptr: [*]GpuVertex = @ptrCast(aligned); - draw_vbo_raw = mem; - draw_vbo = ptr; - draw_vbo_capacity = count; - } - - const ptr = draw_vbo orelse return null; - return ptr[0..count]; -} - -fn flush_draw_vbo(vertices: []GpuVertex) void { - _ = GSPGPU_FlushDataCache(@ptrCast(&vertices[0]), @intCast(vertices.len * @sizeOf(GpuVertex))); -} - -fn free_draw_vbo() void { - if (draw_vbo_raw) |mem| { - linearFree(mem); - } - draw_vbo_raw = null; - draw_vbo = null; - draw_vbo_capacity = 0; -} - fn free_mesh_vertices(mesh: *MeshData) void { if (mesh.ptr) |ptr| { - render_alloc.free(ptr[0..mesh.capacity]); + linearFree(ptr); mesh.ptr = null; } mesh.len = 0; @@ -772,146 +820,84 @@ fn find_attr(layout: Pipeline.VertexLayout, usage: Pipeline.AttributeUsage) ?Pip return null; } -fn decode_mesh_vertex(ptr: [*]const u8, index: usize, pl: PipelineData) ConvertedVertex { - const src = ptr[index * pl.stride ..][0..pl.stride]; - return convert_vertex(src, pl); +fn direct_layout_supported(stride: usize, position_attr: Pipeline.Attribute, uv_attr: Pipeline.Attribute, color_attr: Pipeline.Attribute) bool { + return attr_fits(stride, position_attr) and + attr_fits(stride, uv_attr) and + attr_fits(stride, color_attr); } -fn convert_vertex(src: []const u8, pl: PipelineData) ConvertedVertex { - return .{ - .pos = decode_vec4(src, pl.position_attr, .{ 0.0, 0.0, 0.0, 1.0 }), - .color = if (pl.color_attr) |attr| decode_color(src, attr) else .{ 1.0, 1.0, 1.0, 1.0 }, - .uv = if (pl.uv_attr) |attr| decode_vec2(src, attr, .{ 0.0, 0.0 }) else .{ 0.0, 0.0 }, - }; +fn attr_fits(stride: usize, attr: Pipeline.Attribute) bool { + const size = attribute_size_bytes(attr.format); + return attr.offset <= stride and size <= stride - attr.offset; } -fn decode_vec2(src: []const u8, attr: Pipeline.Attribute, default: [2]f32) [2]f32 { - const off = attr.offset; - return switch (attr.format) { - .f32x2, .f32x3 => .{ read_f32(src, off, default[0]), read_f32(src, off + 4, default[1]) }, - .unorm8x2, .unorm8x4 => .{ read_u8_norm(src, off, default[0]), read_u8_norm(src, off + 1, default[1]) }, - .unorm16x2, .unorm16x3 => .{ read_u16_norm(src, off, default[0]), read_u16_norm(src, off + 2, default[1]) }, - .snorm16x2, .snorm16x3 => .{ read_i16_norm(src, off, default[0]), read_i16_norm(src, off + 2, default[1]) }, +fn attribute_size_bytes(format: Pipeline.AttributeFormat) usize { + return switch (format) { + .f32x2 => 8, + .f32x3 => 12, + .unorm8x2 => 2, + .unorm8x4 => 4, + .unorm16x2, .snorm16x2 => 4, + .unorm16x3, .snorm16x3 => 6, }; } -fn decode_vec4(src: []const u8, attr: Pipeline.Attribute, default: [4]f32) [4]f32 { - const off = attr.offset; - return switch (attr.format) { - .f32x2 => .{ read_f32(src, off, default[0]), read_f32(src, off + 4, default[1]), default[2], default[3] }, - .f32x3 => .{ read_f32(src, off, default[0]), read_f32(src, off + 4, default[1]), read_f32(src, off + 8, default[2]), default[3] }, - .unorm8x2 => .{ read_u8_norm(src, off, default[0]), read_u8_norm(src, off + 1, default[1]), default[2], default[3] }, - .unorm8x4 => .{ read_u8_norm(src, off, default[0]), read_u8_norm(src, off + 1, default[1]), read_u8_norm(src, off + 2, default[2]), read_u8_norm(src, off + 3, default[3]) }, - .unorm16x2 => .{ read_u16_norm(src, off, default[0]), read_u16_norm(src, off + 2, default[1]), default[2], default[3] }, - .unorm16x3 => .{ read_u16_norm(src, off, default[0]), read_u16_norm(src, off + 2, default[1]), read_u16_norm(src, off + 4, default[2]), default[3] }, - .snorm16x2 => .{ read_i16_norm(src, off, default[0]), read_i16_norm(src, off + 2, default[1]), default[2], default[3] }, - .snorm16x3 => .{ read_i16_norm(src, off, default[0]), read_i16_norm(src, off + 2, default[1]), read_i16_norm(src, off + 4, default[2]), default[3] }, +fn gpu_attribute_format(format: Pipeline.AttributeFormat) c_int { + return switch (format) { + .f32x2, .f32x3 => GPU_FLOAT, + .unorm8x2, .unorm8x4 => GPU_UNSIGNED_BYTE, + .unorm16x2, .unorm16x3, .snorm16x2, .snorm16x3 => GPU_SHORT, }; } -fn decode_color(src: []const u8, attr: Pipeline.Attribute) [4]f32 { +fn position_scale(attr: Pipeline.Attribute) ?[3]f32 { + if (attr.size != 3) return null; return switch (attr.format) { - .unorm8x4 => decode_vec4(src, attr, .{ 1.0, 1.0, 1.0, 1.0 }), - .f32x3 => .{ - read_f32(src, attr.offset, 1.0), - read_f32(src, attr.offset + 4, 1.0), - read_f32(src, attr.offset + 8, 1.0), - 1.0, - }, - .f32x2, .unorm8x2, .unorm16x2, .unorm16x3, .snorm16x2, .snorm16x3 => decode_vec4(src, attr, .{ 1.0, 1.0, 1.0, 1.0 }), + .f32x3 => .{ 1.0, 1.0, 1.0 }, + .snorm16x3 => .{ snorm16_scale(), snorm16_scale(), snorm16_scale() }, + else => null, }; } -const ScreenVertex = struct { - pos: [4]f32, - color: [4]f32, - uv: [2]f32, -}; - -fn to_screen_vertex(vertex: ConvertedVertex, mvp: *const Mat4) ScreenVertex { - return .{ - .pos = clip_to_screen(transform_pos(vertex.pos, mvp)), - .color = vertex.color, - .uv = transform_uv(vertex.uv), +fn uv_scale(attr: Pipeline.Attribute) ?[2]f32 { + if (attr.size != 2) return null; + return switch (attr.format) { + .f32x2 => .{ 1.0, 1.0 }, + .unorm8x2 => .{ unorm8_scale(), unorm8_scale() }, + .snorm16x2 => .{ snorm16_scale(), snorm16_scale() }, + else => null, }; } -fn transform_uv(uv: [2]f32) [2]f32 { - const texture_scale = if (get_texture_ptr(bound_texture)) |tex| tex.uv_scale else .{ 1.0, 1.0 }; - return .{ - (uv[0] + uv_offset[0]) * texture_scale[0], - (uv[1] + uv_offset[1]) * texture_scale[1], +fn color_scale(attr: Pipeline.Attribute) ?[4]f32 { + if (attr.size != 4) return null; + return switch (attr.format) { + .unorm8x4 => .{ unorm8_scale(), unorm8_scale(), unorm8_scale(), unorm8_scale() }, + else => null, }; } -fn to_gpu_vertex(vertex: ScreenVertex) GpuVertex { - return .{ - .pos = vertex.pos, - .uv = vertex.uv, - .color = vertex.color, - }; +fn snorm16_scale() f32 { + return 1.0 / 32767.0; } -fn write_line_segment(dst: []GpuVertex, index: usize, a: ConvertedVertex, b: ConvertedVertex, mvp: *const Mat4) usize { - const av = to_screen_vertex(a, mvp); - const bv = to_screen_vertex(b, mvp); - const dx = bv.pos[0] - av.pos[0]; - const dy = bv.pos[1] - av.pos[1]; - const len_sq = dx * dx + dy * dy; - if (len_sq <= 0.000001) return index; - - const inv_len = 1.0 / @sqrt(len_sq); - const nx = -dy * inv_len * (LINE_WIDTH * 0.5); - const ny = dx * inv_len * (LINE_WIDTH * 0.5); - - const a0 = offset_screen_vertex(av, nx, ny); - const a1 = offset_screen_vertex(av, -nx, -ny); - const b0 = offset_screen_vertex(bv, nx, ny); - const b1 = offset_screen_vertex(bv, -nx, -ny); - - dst[index + 0] = to_gpu_vertex(a0); - dst[index + 1] = to_gpu_vertex(a1); - dst[index + 2] = to_gpu_vertex(b0); - dst[index + 3] = to_gpu_vertex(b0); - dst[index + 4] = to_gpu_vertex(a1); - dst[index + 5] = to_gpu_vertex(b1); - return index + 6; -} - -fn offset_screen_vertex(vertex: ScreenVertex, dx: f32, dy: f32) ScreenVertex { - var out = vertex; - out.pos[0] += dx; - out.pos[1] += dy; - return out; -} - -fn transform_pos(pos: [4]f32, mat: *const Mat4) [4]f32 { - return .{ - pos[0] * mat.data[0][0] + pos[1] * mat.data[1][0] + pos[2] * mat.data[2][0] + pos[3] * mat.data[3][0], - pos[0] * mat.data[0][1] + pos[1] * mat.data[1][1] + pos[2] * mat.data[2][1] + pos[3] * mat.data[3][1], - pos[0] * mat.data[0][2] + pos[1] * mat.data[1][2] + pos[2] * mat.data[2][2] + pos[3] * mat.data[3][2], - pos[0] * mat.data[0][3] + pos[1] * mat.data[1][3] + pos[2] * mat.data[2][3] + pos[3] * mat.data[3][3], - }; +fn unorm8_scale() f32 { + return 1.0 / 255.0; } -fn clip_to_screen(pos: [4]f32) [4]f32 { - const inv_w: f32 = if (@abs(pos[3]) > 0.000001) 1.0 / pos[3] else 1.0; - const ndc_x = pos[0] * inv_w; - const ndc_y = pos[1] * inv_w; - const ndc_z = pos[2] * inv_w; - - return .{ - (ndc_x * 0.5 + 0.5) * TOP_SCREEN_WIDTH, - (ndc_y * 0.5 + 0.5) * TOP_SCREEN_HEIGHT, - @max(0.0, @min(1.0, ndc_z)), - 1.0, - }; +fn linear_mesh_capacity(required: usize) usize { + var capacity: usize = LINEAR_MESH_MIN_CAPACITY; + while (capacity < required) : (capacity *= 2) {} + return capacity; } fn init_white_texture() !void { if (white_texture_ready) return; - var data align(16) = [_]u8{0xFF} ** (MIN_TEXTURE_SIZE * MIN_TEXTURE_SIZE * TEXTURE_BPP); + const data = try alloc_linear_bytes(MIN_TEXTURE_SIZE * MIN_TEXTURE_SIZE * TEXTURE_BPP); + defer free_linear_bytes(data); + @memset(data, 0xFF); + if (!tex_init(&white_texture, MIN_TEXTURE_SIZE, MIN_TEXTURE_SIZE, false)) { return error.TextureCreateFailed; } @@ -990,6 +976,17 @@ fn texture_dim(value: u32) !u16 { return @intCast(out); } +fn alloc_linear_bytes(len: usize) ![]align(16) u8 { + const mem = linearAlloc(len) orelse return error.OutOfMemory; + const aligned: *align(16) anyopaque = @alignCast(mem); + const ptr: [*]align(16) u8 = @ptrCast(aligned); + return ptr[0..len]; +} + +fn free_linear_bytes(bytes: []align(16) u8) void { + linearFree(bytes.ptr); +} + fn convert_texture_data(dst: []align(16) u8, src: []const u8, width: u32, height: u32, tex_width: u16, tex_height: u16, expand_small: bool) void { const source_len = @as(usize, width) * @as(usize, height) * TEXTURE_BPP; if (src.len < source_len) return; @@ -997,10 +994,11 @@ fn convert_texture_data(dst: []align(16) u8, src: []const u8, width: u32, height const tw: u32 = tex_width; const th: u32 = tex_height; for (0..th) |y| { - const sy = if (expand_small) + const source_y = if (expand_small) @min((@as(u32, @intCast(y)) * height) / th, height - 1) else @min(@as(u32, @intCast(y)), height - 1); + const sy = height - 1 - source_y; for (0..tw) |x| { const sx = if (expand_small) @min((@as(u32, @intCast(x)) * width) / tw, width - 1) @@ -1032,30 +1030,6 @@ fn morton8(x: u32, y: u32) u32 { ((y & 4) << 3); } -fn read_f32(src: []const u8, offset: usize, default: f32) f32 { - if (offset + 4 > src.len) return default; - const bits = std.mem.readInt(u32, src[offset..][0..4], .little); - return @bitCast(bits); -} - -fn read_u8_norm(src: []const u8, offset: usize, default: f32) f32 { - if (offset >= src.len) return default; - return @as(f32, @floatFromInt(src[offset])) / 255.0; -} - -fn read_u16_norm(src: []const u8, offset: usize, default: f32) f32 { - if (offset + 2 > src.len) return default; - const value = std.mem.readInt(u16, src[offset..][0..2], .little); - return @as(f32, @floatFromInt(value)) / 65535.0; -} - -fn read_i16_norm(src: []const u8, offset: usize, default: f32) f32 { - if (offset + 2 > src.len) return default; - const bits = std.mem.readInt(u16, src[offset..][0..2], .little); - const value: i16 = @bitCast(bits); - return @max(-1.0, @as(f32, @floatFromInt(value)) / 32767.0); -} - fn floatByte(v: f32) u8 { return @intFromFloat(@max(0.0, @min(1.0, v)) * 255.0); } From d077fdce6a2ab1f2940ac648e38e8d8ee6311cc9 Mon Sep 17 00:00:00 2001 From: Nathan Bourgeois Date: Thu, 4 Jun 2026 03:19:52 -0400 Subject: [PATCH 20/44] resolve some gpu issues --- src/platform/3ds/3ds_gfx.zig | 190 +++++++++++++++++++++++++++++------ 1 file changed, 158 insertions(+), 32 deletions(-) diff --git a/src/platform/3ds/3ds_gfx.zig b/src/platform/3ds/3ds_gfx.zig index ebf43bc..1171409 100644 --- a/src/platform/3ds/3ds_gfx.zig +++ b/src/platform/3ds/3ds_gfx.zig @@ -215,6 +215,7 @@ const GPU_TEXFACE_2D = 0; const GPU_RGBA8 = 0; const GPU_NEAREST = 0; const GPU_LINEAR = 1; +const GPU_CLAMP_TO_EDGE = 0; const GPU_REPEAT = 2; const GFX_TOP = 0; @@ -224,6 +225,7 @@ const DISPLAY_TRANSFER_FLAGS = GX_TRANSFER_FMT_RGB8 << 12; const TOP_SCREEN_WIDTH: f32 = 400.0; const TOP_SCREEN_HEIGHT: f32 = 240.0; const TEXTURE_BPP: usize = 4; +const DATA_CACHE_LINE_SIZE: usize = 32; const DEPTH_CLEAR: u32 = 0; const ALPHA_REF: c_int = 26; const LINEAR_MESH_MIN_CAPACITY: usize = 256; @@ -231,6 +233,8 @@ const MIN_TEXTURE_SIZE: u32 = 8; const SMALL_TEXTURE_EXPAND_SIZE: u32 = 32; const MAX_TEXTURE_SIZE: u32 = 1024; const DEBUG_TEXTURE_ONLY = false; +const DEBUG_COLOR_ONLY = false; +const DEBUG_DRAW_QUAD_CHUNKS = false; const PipelineData = struct { program: ShaderProgram, @@ -239,6 +243,12 @@ const PipelineData = struct { position_attr: Pipeline.Attribute, uv_attr: Pipeline.Attribute, color_attr: Pipeline.Attribute, + position_loader_size: u8, + uv_loader_size: u8, + color_loader_size: u8, + buffer_base_offset: usize, + buffer_attribute_count: u8, + buffer_permutation: u64, projection_loc: i8, model_view_loc: i8, screen_projection_loc: i8, @@ -356,7 +366,7 @@ pub fn set_clear_color(r: f32, g: f32, b: f32, a: f32) void { pub fn set_alpha_blend(enabled: bool) void { if (enabled) { C3D_AlphaBlend(GPU_BLEND_ADD, GPU_BLEND_ADD, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA); - C3D_AlphaTest(true, GPU_GREATER, ALPHA_REF); + C3D_AlphaTest(false, GPU_ALWAYS, 0); } else { C3D_AlphaBlend(GPU_BLEND_ADD, GPU_BLEND_ADD, GPU_ONE, GPU_ZERO, GPU_ONE, GPU_ZERO); C3D_AlphaTest(false, GPU_ALWAYS, 0); @@ -410,8 +420,8 @@ pub fn start_frame() bool { const t = target orelse return false; if (!initialized) return false; - const flags: u8 = if (vsync_enabled) C3D_FRAME_SYNCDRAW else 0; - if (!C3D_FrameBegin(flags)) return false; + _ = vsync_enabled; + if (!C3D_FrameBegin(C3D_FRAME_SYNCDRAW)) return false; render_target_clear(t, C3D_CLEAR_ALL, clear_color, DEPTH_CLEAR); if (!C3D_FrameDrawOn(t)) { @@ -471,7 +481,7 @@ pub fn create_pipeline(layout: Pipeline.VertexLayout, v_shader: ?[:0]align(4) co const pos_scale = position_scale(position_attr) orelse return error.UnsupportedVertexLayout; const uv_attr_scale = uv_scale(uv_attr) orelse return error.UnsupportedVertexLayout; const color_attr_scale = color_scale(color_attr) orelse return error.UnsupportedVertexLayout; - if (!direct_layout_supported(layout.stride, position_attr, uv_attr, color_attr)) return error.UnsupportedVertexLayout; + const buffer_layout = buffer_layout_from_attrs(layout.stride, position_attr, uv_attr, color_attr) orelse return error.UnsupportedVertexLayout; const data = PipelineData{ .program = program, .dvlb = dvlb, @@ -479,6 +489,12 @@ pub fn create_pipeline(layout: Pipeline.VertexLayout, v_shader: ?[:0]align(4) co .position_attr = position_attr, .uv_attr = uv_attr, .color_attr = color_attr, + .position_loader_size = buffer_layout.position_loader_size, + .uv_loader_size = buffer_layout.uv_loader_size, + .color_loader_size = buffer_layout.color_loader_size, + .buffer_base_offset = buffer_layout.base_offset, + .buffer_attribute_count = buffer_layout.attribute_count, + .buffer_permutation = buffer_layout.permutation, .projection_loc = projection_loc, .model_view_loc = model_view_loc, .screen_projection_loc = screen_projection_loc, @@ -535,9 +551,7 @@ pub fn update_mesh(handle: Mesh.Handle, data: []const u8) void { if (mesh.ptr) |ptr| { @memcpy(ptr[0..data.len], data); - if (data.len > 0) { - _ = GSPGPU_FlushDataCache(@ptrCast(&ptr[0]), @intCast(data.len)); - } + flush_data_cache_range(@ptrCast(&ptr[0]), data.len); } mesh.len = data.len; } @@ -569,7 +583,15 @@ pub fn draw_mesh(handle: Mesh.Handle, model: *const Mat4, count: usize, primitiv upload_vec_uniform(pl.color_scale_loc, pl.color_scale); if (!configure_draw_buffer(ptr, pl.*)) return; - C3D_DrawArrays(GPU_TRIANGLES, 0, @intCast(draw_count)); + if (DEBUG_DRAW_QUAD_CHUNKS) { + var first: usize = 0; + while (first < draw_count) : (first += 6) { + const chunk_count = @min(@as(usize, 6), draw_count - first); + C3D_DrawArrays(GPU_TRIANGLES, @intCast(first), @intCast(chunk_count)); + } + } else { + C3D_DrawArrays(GPU_TRIANGLES, 0, @intCast(draw_count)); + } } pub fn create_texture(width: u32, height: u32, data: []align(16) u8) anyerror!Texture.Handle { @@ -630,35 +652,33 @@ fn render_target_clear(t: *C3D_RenderTarget, bits: c_int, color: u32, depth: u32 } fn apply_depth_state() void { - const depth_mask: c_int = if (depth_write_enabled) GPU_WRITE_DEPTH else 0; - const mask: c_int = GPU_WRITE_COLOR | depth_mask; - C3D_DepthTest(true, GPU_GEQUAL, mask); + _ = depth_write_enabled; + C3D_DepthTest(false, GPU_ALWAYS, GPU_WRITE_COLOR); } fn configure_fixed_attributes(pl: PipelineData) void { const attr = C3D_GetAttrInfo(); AttrInfo_Init(attr); - add_attr_loader(attr, 0, pl.position_attr); - add_attr_loader(attr, 1, pl.uv_attr); - add_attr_loader(attr, 2, pl.color_attr); + add_attr_loader(attr, 0, pl.position_attr, pl.position_loader_size); + add_attr_loader(attr, 1, pl.uv_attr, pl.uv_loader_size); + add_attr_loader(attr, 2, pl.color_attr, pl.color_loader_size); } -fn add_attr_loader(info: *C3D_AttrInfo, reg_id: c_int, attr: Pipeline.Attribute) void { +fn add_attr_loader(info: *C3D_AttrInfo, reg_id: c_int, attr: Pipeline.Attribute, loader_size: u8) void { const fmt = gpu_attribute_format(attr.format); - _ = AttrInfo_AddLoader(info, reg_id, fmt, @intCast(attr.size)); + _ = AttrInfo_AddLoader(info, reg_id, fmt, loader_size); } fn configure_draw_buffer(ptr: [*]u8, pl: PipelineData) bool { const buf = C3D_GetBufInfo(); BufInfo_Init(buf); - if (!add_attr_buffer(buf, ptr, pl.stride, pl.position_attr, 0)) return false; - if (!add_attr_buffer(buf, ptr, pl.stride, pl.uv_attr, 1)) return false; - if (!add_attr_buffer(buf, ptr, pl.stride, pl.color_attr, 2)) return false; - return true; -} - -fn add_attr_buffer(buf: *C3D_BufInfo, ptr: [*]u8, stride: usize, attr: Pipeline.Attribute, reg_id: u64) bool { - const result = BufInfo_Add(buf, @ptrCast(&ptr[attr.offset]), @intCast(stride), 1, reg_id); + const result = BufInfo_Add( + buf, + @ptrCast(&ptr[pl.buffer_base_offset]), + @intCast(pl.stride), + @intCast(pl.buffer_attribute_count), + pl.buffer_permutation, + ); if (result < 0) { BufInfo_Init(buf); return false; @@ -668,11 +688,13 @@ fn add_attr_buffer(buf: *C3D_BufInfo, ptr: [*]u8, stride: usize, attr: Pipeline. fn configure_texture_texenv() void { const env = C3D_GetTexEnv(0); - const src = if (DEBUG_TEXTURE_ONLY) + const src = if (DEBUG_COLOR_ONLY) + tev_sources(GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR) + else if (DEBUG_TEXTURE_ONLY) tev_sources(GPU_TEXTURE0, GPU_TEXTURE0, GPU_TEXTURE0) else tev_sources(GPU_TEXTURE0, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR); - const func = if (DEBUG_TEXTURE_ONLY) GPU_REPLACE else GPU_MODULATE; + const func = if (DEBUG_COLOR_ONLY or DEBUG_TEXTURE_ONLY) GPU_REPLACE else GPU_MODULATE; env.* = .{ .srcRgb = src, @@ -820,10 +842,82 @@ fn find_attr(layout: Pipeline.VertexLayout, usage: Pipeline.AttributeUsage) ?Pip return null; } -fn direct_layout_supported(stride: usize, position_attr: Pipeline.Attribute, uv_attr: Pipeline.Attribute, color_attr: Pipeline.Attribute) bool { - return attr_fits(stride, position_attr) and - attr_fits(stride, uv_attr) and - attr_fits(stride, color_attr); +const BufferLayout = struct { + base_offset: usize, + attribute_count: u8, + permutation: u64, + position_loader_size: u8, + uv_loader_size: u8, + color_loader_size: u8, +}; + +fn buffer_layout_from_attrs(stride: usize, position_attr: Pipeline.Attribute, uv_attr: Pipeline.Attribute, color_attr: Pipeline.Attribute) ?BufferLayout { + var attrs = [_]Pipeline.Attribute{ position_attr, uv_attr, color_attr }; + sort_attrs_by_offset(&attrs); + + const base_offset = attrs[0].offset; + var current_rel: usize = 0; + var attribute_count: usize = 0; + var permutation: u64 = 0; + var position_loader_size: u8 = 0; + var uv_loader_size: u8 = 0; + var color_loader_size: u8 = 0; + + for (attrs, 0..) |attr, i| { + if (!attr_fits(stride, attr)) return null; + if (attr.offset < base_offset) return null; + const rel_offset = attr.offset - base_offset; + if (rel_offset != current_rel) return null; + const next_offset = if (i + 1 < attrs.len) attrs[i + 1].offset else stride; + if (next_offset < attr.offset) return null; + const loader_size = attribute_loader_size(attr, next_offset - attr.offset) orelse return null; + const loaded_bytes = attribute_size_bytes_with_count(attr.format, loader_size) orelse return null; + if (loaded_bytes > next_offset - attr.offset) return null; + if (i + 1 < attrs.len and loaded_bytes != next_offset - attr.offset) return null; + const shift: u6 = @intCast(attribute_count * 4); + permutation |= attribute_loader_id(attr.usage) << shift; + attribute_count += 1; + switch (attr.usage) { + .position => position_loader_size = loader_size, + .uv => uv_loader_size = loader_size, + .color => color_loader_size = loader_size, + .normal => unreachable, + } + current_rel = rel_offset + loaded_bytes; + } + + if (stride < current_rel) return null; + if (position_loader_size == 0 or uv_loader_size == 0 or color_loader_size == 0) return null; + + return .{ + .base_offset = base_offset, + .attribute_count = @intCast(attribute_count), + .permutation = permutation, + .position_loader_size = position_loader_size, + .uv_loader_size = uv_loader_size, + .color_loader_size = color_loader_size, + }; +} + +fn sort_attrs_by_offset(attrs: *[3]Pipeline.Attribute) void { + var i: usize = 1; + while (i < attrs.len) : (i += 1) { + var j = i; + while (j > 0 and attrs[j - 1].offset > attrs[j].offset) : (j -= 1) { + const tmp = attrs[j - 1]; + attrs[j - 1] = attrs[j]; + attrs[j] = tmp; + } + } +} + +fn attribute_loader_id(usage: Pipeline.AttributeUsage) u64 { + return switch (usage) { + .position => 0, + .uv => 1, + .color => 2, + .normal => unreachable, + }; } fn attr_fits(stride: usize, attr: Pipeline.Attribute) bool { @@ -831,6 +925,14 @@ fn attr_fits(stride: usize, attr: Pipeline.Attribute) bool { return attr.offset <= stride and size <= stride - attr.offset; } +fn attribute_loader_size(attr: Pipeline.Attribute, available_bytes: usize) ?u8 { + if (attr.usage == .position and attr.size == 3 and attribute_component_size_bytes(attr.format) == 2 and available_bytes >= 8) { + return 4; + } + if (attr.size > 4) return null; + return @intCast(attr.size); +} + fn attribute_size_bytes(format: Pipeline.AttributeFormat) usize { return switch (format) { .f32x2 => 8, @@ -842,6 +944,19 @@ fn attribute_size_bytes(format: Pipeline.AttributeFormat) usize { }; } +fn attribute_size_bytes_with_count(format: Pipeline.AttributeFormat, count: u8) ?usize { + if (count == 0 or count > 4) return null; + return attribute_component_size_bytes(format) * @as(usize, count); +} + +fn attribute_component_size_bytes(format: Pipeline.AttributeFormat) usize { + return switch (format) { + .f32x2, .f32x3 => 4, + .unorm8x2, .unorm8x4 => 1, + .unorm16x2, .unorm16x3, .snorm16x2, .snorm16x3 => 2, + }; +} + fn gpu_attribute_format(format: Pipeline.AttributeFormat) c_int { return switch (format) { .f32x2, .f32x3 => GPU_FLOAT, @@ -913,15 +1028,26 @@ fn tex_init(tex: *C3D_Tex, width: u16, height: u16, vram: bool) bool { } fn tex_upload(tex: *C3D_Tex, data: []align(16) const u8) void { - _ = GSPGPU_FlushDataCache(data.ptr, @intCast(data.len)); + flush_data_cache_range(data.ptr, data.len); C3D_TexLoadImage(tex, data.ptr, GPU_TEXFACE_2D, 0); } +fn flush_data_cache_range(ptr: *const anyopaque, len: usize) void { + if (len == 0) return; + + const start = @intFromPtr(ptr); + const aligned_start = std.mem.alignBackward(usize, start, DATA_CACHE_LINE_SIZE); + const aligned_end = std.mem.alignForward(usize, start + len, DATA_CACHE_LINE_SIZE); + const aligned_len = aligned_end - aligned_start; + const aligned_ptr: *const anyopaque = @ptrFromInt(aligned_start); + _ = GSPGPU_FlushDataCache(aligned_ptr, @intCast(aligned_len)); +} + fn tex_set_default_params(tex: *C3D_Tex) void { tex.param &= ~(gpu_texture_mag_filter(GPU_LINEAR) | gpu_texture_min_filter(GPU_LINEAR)); tex.param |= gpu_texture_mag_filter(GPU_NEAREST) | gpu_texture_min_filter(GPU_NEAREST); tex.param &= ~(gpu_texture_wrap_s(3) | gpu_texture_wrap_t(3)); - tex.param |= gpu_texture_wrap_s(GPU_REPEAT) | gpu_texture_wrap_t(GPU_REPEAT); + tex.param |= gpu_texture_wrap_s(GPU_CLAMP_TO_EDGE) | gpu_texture_wrap_t(GPU_CLAMP_TO_EDGE); } fn tex_init_params(width: u16, height: u16, max_level: u8, format: u8, tex_type: u8, vram: bool) u64 { From a220e0f185aba80053ed6c386ed7208612033846 Mon Sep 17 00:00:00 2001 From: Nathan Bourgeois Date: Thu, 4 Jun 2026 05:18:04 -0400 Subject: [PATCH 21/44] guarantee linear ram --- build.zig | 4 +- src/engine.zig | 80 ++++++++++++++++++++++++++++++++---- src/platform/3ds/3ds_gfx.zig | 79 +++++++++++++++++++++++++++++------ 3 files changed, 140 insertions(+), 23 deletions(-) diff --git a/build.zig b/build.zig index 1805b83..03819f6 100644 --- a/build.zig +++ b/build.zig @@ -1259,8 +1259,8 @@ pub fn addShader(owner: *std.Build, b: *std.Build, exe: *std.Build.Step.Compile, \\.out outclr color \\ \\.alias inpos v0 - \\.alias inuv v1 - \\.alias inclr v2 + \\.alias inclr v1 + \\.alias inuv v2 \\ \\.proc main \\ mul r0.xyz, posScale, inpos diff --git a/src/engine.zig b/src/engine.zig index 340f627..9de923f 100644 --- a/src/engine.zig +++ b/src/engine.zig @@ -13,6 +13,12 @@ const options = @import("options"); pub const Pool = memory.Pool; pub const MemoryConfig = memory.MemoryConfig; +extern fn linearAlloc(size: usize) ?*anyopaque; +extern fn linearFree(mem: ?*anyopaque) void; +extern fn linearSpaceFree() u32; + +const LINEAR_RENDER_RESERVE_BYTES: usize = 2 * 1024 * 1024; + // -- category tracker (wrapper allocator with per-category accounting) -------- pub const CategoryTracker = struct { @@ -79,11 +85,20 @@ pub const CategoryTracker = struct { const TRACKER_COUNT = @typeInfo(Pool).@"enum".fields.len; +fn alloc_linear_bytes(len: usize) ![]align(16) u8 { + const mem = linearAlloc(len) orelse return error.OutOfMemory; + const aligned: *align(16) anyopaque = @alignCast(mem); + const ptr: [*]align(16) u8 = @ptrCast(aligned); + return ptr[0..len]; +} + // -- engine ------------------------------------------------------------------- pub const Engine = struct { io: std.Io, pool: memory.PoolAlloc, + linear_render_pool: memory.PoolAlloc, + linear_render_mem: ?[]align(16) u8, trackers: [TRACKER_COUNT]CategoryTracker, running: bool, vsync: bool, @@ -103,6 +118,9 @@ pub const Engine = struct { /// `title` so single-word titles just work; override when the /// title contains characters you don't want in a filesystem path. app_name: ?[]const u8 = null, + /// On 3DS, provide this to back the render category with linear + /// FCRAM so vertex buffers can be used as direct PICA sources. + render_capacity: ?usize = null, }; /// Initializes the engine in place. `self` must live at a stable address @@ -122,12 +140,18 @@ pub const Engine = struct { config: Config, state: *const Core.State, ) !void { - assert(config.memory.total() <= mem.len); + const external_render = use_external_render_pool(config); + const main_required = if (external_render) + config.memory.total() - config.memory.render + else + config.memory.total(); + assert(main_required <= mem.len); self.io = sys_io; self.running = true; self.vsync = config.vsync; self.state = state.*; + self.linear_render_mem = null; self.pool = memory.PoolAlloc.init(mem, "main"); const inner = self.pool.allocator(); @@ -149,6 +173,9 @@ pub const Engine = struct { try logger.init(sys_io, self.dirs.data); try Platform.init(self, config.width, config.height, config.title, config.fullscreen, config.vsync, config.resizable); + if (external_render) try self.init_linear_render_pool(config); + errdefer self.deinit_linear_render_pool(); + try Rendering.Texture.init_defaults(self.allocator(.render)); try Core.state_machine.init(self, &self.state); } @@ -159,6 +186,36 @@ pub const Engine = struct { Platform.deinit(); logger.deinit(self.io); self.dirs.close(self.io); + self.deinit_linear_render_pool(); + } + + fn use_external_render_pool(config: Config) bool { + return options.config.platform == .nintendo_3ds and config.render_capacity != null; + } + + fn init_linear_render_pool(self: *Engine, config: Config) !void { + assert(config.render_capacity.? >= config.memory.render); + + const available: usize = linearSpaceFree(); + if (available <= LINEAR_RENDER_RESERVE_BYTES) return error.OutOfMemory; + + const capacity = @min(config.render_capacity.?, available - LINEAR_RENDER_RESERVE_BYTES); + if (capacity < config.memory.render) return error.OutOfMemory; + + const linear_mem = try alloc_linear_bytes(capacity); + self.linear_render_mem = linear_mem; + self.linear_render_pool = memory.PoolAlloc.init(linear_mem, "render-linear"); + + const render_idx = @intFromEnum(Pool.render); + self.trackers[render_idx].inner = self.linear_render_pool.allocator(); + self.trackers[render_idx].budget = @min(self.trackers[render_idx].budget, capacity); + } + + fn deinit_linear_render_pool(self: *Engine) void { + if (self.linear_render_mem) |mem| { + linearFree(mem.ptr); + self.linear_render_mem = null; + } } pub fn allocator(self: *Engine, p: Pool) std.mem.Allocator { @@ -187,15 +244,22 @@ pub const Engine = struct { } pub fn set_budget(self: *Engine, p: Pool, new_budget: usize) void { - self.trackers[@intFromEnum(p)].budget = new_budget; + self.trackers[@intFromEnum(p)].budget = if (p == .render) blk: { + const mem = self.linear_render_mem orelse break :blk new_budget; + break :blk @min(new_budget, mem.len); + } else new_budget; } pub fn total_used(self: *const Engine) usize { - return self.pool.used; + var total: usize = 0; + for (self.trackers) |tracker| total += tracker.used; + return total; } pub fn total_budget(self: *const Engine) usize { - return self.pool.budget; + var total: usize = 0; + for (self.trackers) |tracker| total += tracker.budget; + return total; } pub fn report(self: *const Engine) void { @@ -215,10 +279,10 @@ pub const Engine = struct { }); } Util.engine_logger.info(" total: {}/{} bytes ({}/{} KiB)", .{ - self.pool.used, - self.pool.budget, - self.pool.used / 1024, - self.pool.budget / 1024, + self.total_used(), + self.total_budget(), + self.total_used() / 1024, + self.total_budget() / 1024, }); Util.engine_logger.info("--------------------", .{}); } diff --git a/src/platform/3ds/3ds_gfx.zig b/src/platform/3ds/3ds_gfx.zig index 1171409..92fddc2 100644 --- a/src/platform/3ds/3ds_gfx.zig +++ b/src/platform/3ds/3ds_gfx.zig @@ -229,6 +229,12 @@ const DATA_CACHE_LINE_SIZE: usize = 32; const DEPTH_CLEAR: u32 = 0; const ALPHA_REF: c_int = 26; const LINEAR_MESH_MIN_CAPACITY: usize = 256; +const OS_OLD_FCRAM_VADDR: usize = 0x14000000; +const OS_OLD_FCRAM_SIZE: usize = 0x08000000; +const OS_FCRAM_VADDR: usize = 0x30000000; +const OS_FCRAM_SIZE: usize = 0x10000000; +const OS_VRAM_VADDR: usize = 0x1F000000; +const OS_VRAM_SIZE: usize = 0x00600000; const MIN_TEXTURE_SIZE: u32 = 8; const SMALL_TEXTURE_EXPAND_SIZE: u32 = 32; const MAX_TEXTURE_SIZE: u32 = 1024; @@ -262,7 +268,8 @@ const PipelineData = struct { const MeshData = struct { pipeline: Pipeline.Handle, - ptr: ?[*]u8 = null, + ptr: ?[*]const u8 = null, + owned_ptr: ?[*]u8 = null, len: usize = 0, capacity: usize = 0, }; @@ -481,6 +488,12 @@ pub fn create_pipeline(layout: Pipeline.VertexLayout, v_shader: ?[:0]align(4) co const pos_scale = position_scale(position_attr) orelse return error.UnsupportedVertexLayout; const uv_attr_scale = uv_scale(uv_attr) orelse return error.UnsupportedVertexLayout; const color_attr_scale = color_scale(color_attr) orelse return error.UnsupportedVertexLayout; + if (!supported_attr_location(position_attr) or + !supported_attr_location(uv_attr) or + !supported_attr_location(color_attr)) + { + return error.UnsupportedVertexLayout; + } const buffer_layout = buffer_layout_from_attrs(layout.stride, position_attr, uv_attr, color_attr) orelse return error.UnsupportedVertexLayout; const data = PipelineData{ .program = program, @@ -537,21 +550,37 @@ pub fn destroy_mesh(handle: Mesh.Handle) void { pub fn update_mesh(handle: Mesh.Handle, data: []const u8) void { const mesh = get_mesh_ptr(handle) orelse return; + if (data.len == 0) { + mesh.ptr = null; + mesh.len = 0; + return; + } + + if (is_linear_gpu_memory(data.ptr, data.len)) { + if (mesh.owned_ptr != null) free_owned_mesh_vertices(mesh); + mesh.ptr = data.ptr; + mesh.len = data.len; + flush_data_cache_range(data.ptr, data.len); + return; + } + if (data.len > mesh.capacity) { - free_mesh_vertices(mesh); + free_owned_mesh_vertices(mesh); const new_capacity = linear_mesh_capacity(data.len); const bytes = alloc_linear_bytes(new_capacity) catch { + mesh.ptr = null; mesh.len = 0; mesh.capacity = 0; return; }; - mesh.ptr = bytes.ptr; + mesh.owned_ptr = bytes.ptr; mesh.capacity = bytes.len; } - if (mesh.ptr) |ptr| { + if (mesh.owned_ptr) |ptr| { @memcpy(ptr[0..data.len], data); flush_data_cache_range(@ptrCast(&ptr[0]), data.len); + mesh.ptr = ptr; } mesh.len = data.len; } @@ -659,17 +688,17 @@ fn apply_depth_state() void { fn configure_fixed_attributes(pl: PipelineData) void { const attr = C3D_GetAttrInfo(); AttrInfo_Init(attr); - add_attr_loader(attr, 0, pl.position_attr, pl.position_loader_size); - add_attr_loader(attr, 1, pl.uv_attr, pl.uv_loader_size); - add_attr_loader(attr, 2, pl.color_attr, pl.color_loader_size); + add_attr_loader(attr, pl.position_attr, pl.position_loader_size); + add_attr_loader(attr, pl.uv_attr, pl.uv_loader_size); + add_attr_loader(attr, pl.color_attr, pl.color_loader_size); } -fn add_attr_loader(info: *C3D_AttrInfo, reg_id: c_int, attr: Pipeline.Attribute, loader_size: u8) void { +fn add_attr_loader(info: *C3D_AttrInfo, attr: Pipeline.Attribute, loader_size: u8) void { const fmt = gpu_attribute_format(attr.format); - _ = AttrInfo_AddLoader(info, reg_id, fmt, loader_size); + _ = AttrInfo_AddLoader(info, @intCast(attr.location), fmt, loader_size); } -fn configure_draw_buffer(ptr: [*]u8, pl: PipelineData) bool { +fn configure_draw_buffer(ptr: [*]const u8, pl: PipelineData) bool { const buf = C3D_GetBufInfo(); BufInfo_Init(buf); const result = BufInfo_Add( @@ -827,11 +856,16 @@ fn destroy_all_textures() void { } fn free_mesh_vertices(mesh: *MeshData) void { - if (mesh.ptr) |ptr| { + free_owned_mesh_vertices(mesh); + mesh.ptr = null; + mesh.len = 0; +} + +fn free_owned_mesh_vertices(mesh: *MeshData) void { + if (mesh.owned_ptr) |ptr| { linearFree(ptr); - mesh.ptr = null; + mesh.owned_ptr = null; } - mesh.len = 0; mesh.capacity = 0; } @@ -842,6 +876,10 @@ fn find_attr(layout: Pipeline.VertexLayout, usage: Pipeline.AttributeUsage) ?Pip return null; } +fn supported_attr_location(attr: Pipeline.Attribute) bool { + return attr.location < 12; +} + const BufferLayout = struct { base_offset: usize, attribute_count: u8, @@ -1043,6 +1081,21 @@ fn flush_data_cache_range(ptr: *const anyopaque, len: usize) void { _ = GSPGPU_FlushDataCache(aligned_ptr, @intCast(aligned_len)); } +fn is_linear_gpu_memory(ptr: *const anyopaque, len: usize) bool { + if (len == 0) return true; + + const start = @intFromPtr(ptr); + return range_in_region(start, len, OS_FCRAM_VADDR, OS_FCRAM_SIZE) or + range_in_region(start, len, OS_OLD_FCRAM_VADDR, OS_OLD_FCRAM_SIZE) or + range_in_region(start, len, OS_VRAM_VADDR, OS_VRAM_SIZE); +} + +fn range_in_region(start: usize, len: usize, base: usize, size: usize) bool { + if (start < base) return false; + const rel = start - base; + return rel <= size and len <= size - rel; +} + fn tex_set_default_params(tex: *C3D_Tex) void { tex.param &= ~(gpu_texture_mag_filter(GPU_LINEAR) | gpu_texture_min_filter(GPU_LINEAR)); tex.param |= gpu_texture_mag_filter(GPU_NEAREST) | gpu_texture_min_filter(GPU_NEAREST); From 73272727457b5313578f9622cde26b484baa064d Mon Sep 17 00:00:00 2001 From: Nathan Bourgeois Date: Thu, 4 Jun 2026 05:39:10 -0400 Subject: [PATCH 22/44] Slow working diag --- src/platform/3ds/3ds_gfx.zig | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/platform/3ds/3ds_gfx.zig b/src/platform/3ds/3ds_gfx.zig index 92fddc2..366773c 100644 --- a/src/platform/3ds/3ds_gfx.zig +++ b/src/platform/3ds/3ds_gfx.zig @@ -240,7 +240,8 @@ const SMALL_TEXTURE_EXPAND_SIZE: u32 = 32; const MAX_TEXTURE_SIZE: u32 = 1024; const DEBUG_TEXTURE_ONLY = false; const DEBUG_COLOR_ONLY = false; -const DEBUG_DRAW_QUAD_CHUNKS = false; +const DEBUG_DRAW_QUAD_CHUNKS = true; +const DEBUG_DRAW_CHUNK_VERTICES: usize = 3; const PipelineData = struct { program: ShaderProgram, @@ -611,14 +612,16 @@ pub fn draw_mesh(handle: Mesh.Handle, model: *const Mat4, count: usize, primitiv upload_uv_uniform(pl.*); upload_vec_uniform(pl.color_scale_loc, pl.color_scale); - if (!configure_draw_buffer(ptr, pl.*)) return; if (DEBUG_DRAW_QUAD_CHUNKS) { var first: usize = 0; - while (first < draw_count) : (first += 6) { - const chunk_count = @min(@as(usize, 6), draw_count - first); - C3D_DrawArrays(GPU_TRIANGLES, @intCast(first), @intCast(chunk_count)); + while (first < draw_count) : (first += DEBUG_DRAW_CHUNK_VERTICES) { + const chunk_count = @min(DEBUG_DRAW_CHUNK_VERTICES, draw_count - first); + const chunk_ptr = ptr + first * pl.stride; + if (!configure_draw_buffer(chunk_ptr, pl.*)) return; + C3D_DrawArrays(GPU_TRIANGLES, 0, @intCast(chunk_count)); } } else { + if (!configure_draw_buffer(ptr, pl.*)) return; C3D_DrawArrays(GPU_TRIANGLES, 0, @intCast(draw_count)); } } From 4a993a864b20ff8c84848f5ea824634a6992247e Mon Sep 17 00:00:00 2001 From: Nathan Bourgeois Date: Sat, 6 Jun 2026 04:50:40 -0400 Subject: [PATCH 23/44] Better 3DS Panic --- build.zig | 25 ++- src/platform/3ds/services.zig | 334 +++++++++++++++++++++++++++++++++- 2 files changed, 348 insertions(+), 11 deletions(-) diff --git a/build.zig b/build.zig index 03819f6..e31b280 100644 --- a/build.zig +++ b/build.zig @@ -995,14 +995,33 @@ fn threedsxPipeline(b: *std.Build, exe: *std.Build.Step.Compile, opts: ExportOpt \\ ); + // Small linker script fragment providing accurate .text bounds as + // link-time constants. This lets the panic unwinder in services.zig + // use real section start/end (via ADDR/SIZEOF) instead of any + // hardcoded/sketchy ranges for isLikelyReturnAddress text checks. + const syms_wf = b.addWriteFiles(); + const text_syms_ld = syms_wf.add("aether_3ds_text_syms.ld", + \\/* Zig C backend (for 3DS ofmt=c) mangles extern names with zig_e_ prefix. */ + \\/* Provide both for the isLikelyReturnAddress range checks + any debug. */ + \\zig_e___text_start = ADDR(.text); + \\zig_e___text_end = ADDR(.text) + SIZEOF(.text); + \\__text_start = zig_e___text_start; + \\__text_end = zig_e___text_end; + ); + // Standard 3DS arch flags from devkitPro's template Makefile. const arch = [_][]const u8{ "-march=armv6k", "-mtune=mpcore", "-mfloat-abi=hard", "-mtp=soft", + // Keep frame pointers so manual r11-based stack walk in panic handler + // can produce useful unwind (otherwise gcc -O* uses r11 as temp and + // chains are absent or clobbered, leading to data aborts in the walker). + "-fno-omit-frame-pointer", }; // Single-shot compile + link via the gcc driver. 3dsx.specs pulls // in `_3dsx_crt0` (which calls our exported `main`) and the 3DSX - // linker script. + // linker script. We also supply a tiny -T fragment for accurate + // __text_* symbols (see text_syms_ld above). const link = b.addSystemCommand(&.{gcc}); link.addArgs(&arch); link.addArgs(&.{ @@ -1013,6 +1032,10 @@ fn threedsxPipeline(b: *std.Build, exe: *std.Build.Step.Compile, opts: ExportOpt if (exe.root_module.optimize != .Debug or exe.root_module.optimize == .ReleaseSmall) "-O2" else if (exe.root_module.optimize == .ReleaseSmall) "-Os" else "-O0", if (exe.root_module.optimize == .Debug or exe.root_module.optimize == .ReleaseSafe) "-g" else "-g0", "-specs=3dsx.specs", + "-T", + }); + link.addFileArg(text_syms_ld); + link.addArgs(&.{ "-Wl,--wrap=threadCreate", "-Wl,--no-warn-execstack", }); diff --git a/src/platform/3ds/services.zig b/src/platform/3ds/services.zig index 2509218..c7066ce 100644 --- a/src/platform/3ds/services.zig +++ b/src/platform/3ds/services.zig @@ -75,6 +75,14 @@ extern fn ERRF_ExceptionHandler(excep: *ExceptionInfo, regs: *CpuRegisters) nore extern fn svcBreak(break_reason: c_int) void; extern fn svcOutputDebugString(str: [*]const u8, length: i32) c_int; +// .text bounds provided by the linker script fragment in build.zig +// (using ADDR(.text) + SIZEOF(.text)) so the panic unwinder can +// precisely know what is code vs data/rodata/string literals when +// doing the low-confidence stack scan (and BL-prev check). This +// replaces any sketchy hardcoded ranges for "is this value in .text?". +extern const __text_start: u8; +extern const __text_end: u8; + comptime { @export(&entry, .{ .name = "main" }); @export(&stack_size, .{ .name = "__stacksize__" }); @@ -85,6 +93,9 @@ comptime { var stack_size: u32 = 1 * 1024 * 1024; var exception_stack: [exception_stack_size]u8 align(8) = undefined; var panic_stage: u8 = 0; +/// Captured very early in entry() so that the panic walker can scan the full +/// used stack up to the initial top (instead of an arbitrary small window). +var program_stack_top: usize = 0; fn threadCreateWrap( entrypoint: ThreadFunc, @@ -105,6 +116,11 @@ fn threadCreateWrap( } fn entry() callconv(.c) c_int { + if (program_stack_top == 0) { + // Capture the SP as set by the 3DS crt0 / loader, before we push anything. + // This is the "stack top" (high address); the base is roughly top - stack_size. + program_stack_top = asm volatile ("mov %[top], sp" : [top] "=r" (-> usize)); + } installCrashHandlers(); const init = process_init.makeInit(.{ .vector = &argv }); @@ -114,31 +130,329 @@ fn entry() callconv(.c) c_int { return 0; } +fn getFramePointer() usize { + return asm volatile ("mov %[fp], r11" : [fp] "=r" (-> usize)); +} + +/// Returns true if `addr` looks like a plausible ARM return address (i.e. the +/// address immediately after a `bl` / `blx` instruction in .text). +/// Used to filter the low-confidence stack scan so we don't pollute the trace +/// with string constants, literal pools, or random data that happen to be in +/// the code address range. +fn isLikelyReturnAddress(addr: usize) bool { + @setRuntimeSafety(false); + const ts = @intFromPtr(&__text_start); + const te = @intFromPtr(&__text_end); + if ((addr & 3) != 0 or addr < ts or addr >= te) return false; + const prev = addr -% 4; + if (prev < ts) return false; + const inst = @as(*const u32, @ptrFromInt(prev)).*; + // ARM branch with link (BL) encoding: bits [27:24] == 0b1011 (0xb) + // Covers unconditional and conditional BL. + const op = (inst >> 24) & 0xf; + if (op == 0xb) return true; + return false; +} + +/// Manual frame-pointer walk to collect return addresses for panic reporting. +/// Limited depth, best-effort (unsafe during crash is acceptable). +/// On 3DS (C-emitted code via gcc, often with omitted frame pointers), r11 +/// may not be a valid FP; we defensively reject low/implausible values to +/// avoid data aborts inside the panic handler itself (which would turn a +/// nice panic into a CPU exception). +/// +/// start_fp / start_lr (if provided) should be captured *very early* in the +/// panic handler (before the function's own frame setup or calls) so that +/// they reflect the caller's frame (the code that hit the panic/unreachable). +fn collectStackAddresses(first_addr: usize, out: []usize, start_fp: ?usize, start_lr: ?usize, start_sp: ?usize) usize { + @setRuntimeSafety(false); // best-effort only; raw ptr walks can fault (caught by exception handler). Prevents safety checks here from causing recursive zig panic(). + const min_valid_fp: usize = 0x100000; // below this is not plausible for user stack frames on 3DS + var count: usize = 0; + if (out.len > 0) { + out[0] = first_addr; + count = 1; + } + // Prefer the provided start_lr (often the return addr from the panicking bl site) + if (start_lr) |lr| { + if (lr > 1 and lr != first_addr and count < out.len and isLikelyReturnAddress(lr)) { + out[count] = lr; + count += 1; + } + } + var fp = start_fp orelse getFramePointer(); + var guard: usize = 0; + while (count < out.len and guard < 64) : (guard += 1) { // allow deeper for panic + if (fp < min_valid_fp or (fp & 3) != 0) break; + const saved_fp: *const u32 = @ptrFromInt(fp); + const saved_lr: *const u32 = @ptrFromInt(fp + 4); + const lr = saved_lr.*; + const next_fp = saved_fp.*; + if (lr > 1 and count < out.len and isLikelyReturnAddress(lr)) { + out[count] = lr; + count += 1; + } + // Downward-growing stack: outer (caller) frames have *higher* fp values than inner. + // Walk while next_fp > fp. Stop on invalid, zero, misaligned, or non-progress (next <= fp would be cycle or wrong dir). + if (next_fp < min_valid_fp or next_fp == 0 or next_fp <= fp or (next_fp & 3) != 0) break; + fp = next_fp; + } + + // Low-confidence heuristic stack scan for more entries (even if FP chain is broken, + // which is common on 3DS C-backend). Scan for values that look like code addresses + // (aligned, in plausible .text range). Useful for deeper traces on null unwraps etc. + // We dedup and accept some noise because "crashing anyway". Larger window and + // looser filter to get more candidates as requested. + if (start_sp) |sp| { + var top = if (program_stack_top != 0) program_stack_top else sp +% (1024 * 1024); + // Extra sanity on top/sp to avoid absurdly large scans or bad windows if + // program_stack_top got clobbered or capture was off. Cap total scan to 2MB. + if (top <= sp or top -% sp > 2 * 1024 * 1024 or sp < 0x1000 or top > 0x30000000) { + top = sp +% (64 * 1024); // small safe fallback window + } + // Scan the entire used stack (from current SP up to the initial top). + // With isLikelyReturnAddress() this is safe and will only pick real RAs + // that were saved by bl instructions, even across the full stack depth. + // We still dedup and bound the number added. + var scan: usize = sp & ~@as(usize, 3); + const max_scan: usize = 2 * 1024 * 1024; + var scanned: usize = 0; + while (scan < top and count < out.len and scanned < max_scan) : ({ + scan += 4; + scanned += 4; + }) { + if (scan < 0x1000 or scan > 0x30000000) break; + const val = @as(*const u32, @ptrFromInt(scan)).*; + if (isLikelyReturnAddress(val)) { + var have = false; + for (out[0..count]) |prev| if (prev == val) { have = true; break; }; + if (!have) { + out[count] = val; + count += 1; + } + } + } + } + + return count; +} + fn fatalMainError(err: anyerror, maybe_trace: ?*std.builtin.StackTrace, fallback_addr: usize) noreturn { + @branchHint(.cold); + @setRuntimeSafety(false); // best-effort crash reporting only; disables safety checks that could re-invoke panic() and produce "Aether recursive..." instead of the original info. + + if (panic_stage != 0) { + fatalDisplay("Aether recursive error in main"); + } + panic_stage = 1; + + const main_pc = if (maybe_trace) |trace| blk: { + const nn = @min(trace.index, trace.instruction_addresses.len); + if (nn > 0) break :blk trace.instruction_addresses[0]; + break :blk fallback_addr; + } else fallback_addr; + + // Capture fp/lr/sp early for better starting point for walk (see panic() for details). + const entry_fp = asm volatile ("mov %[fp], r11" : [fp] "=r" (-> usize)); + const entry_lr = asm volatile ("mov %[lr], lr" : [lr] "=r" (-> usize)); + const entry_sp = asm volatile ("mov %[sp], sp" : [sp] "=r" (-> usize)); + + var addrs: [32]usize = undefined; + const n = collectStackAddresses(main_pc, &addrs, entry_fp, entry_lr, entry_sp); + + var trace_buf: [512]u8 = undefined; + var fixed: std.Io.Writer = .fixed(&trace_buf); + + fixed.print("Aether main returned error.{s} at 0x{x}\n", .{@errorName(err), main_pc}) catch {}; + fixed.print("entry_fp=0x{x} entry_sp=0x{x}\n", .{entry_fp, entry_sp}) catch {}; + + if (maybe_trace) |trace| { + const nerr = @min(trace.index, trace.instruction_addresses.len); + if (nerr > 0) { + fixed.writeAll("error return trace:\n") catch {}; + for (trace.instruction_addresses[0..nerr], 0..) |a, i| { + fixed.print("{d: >2}: 0x{x:0>8}\n", .{ i, a }) catch {}; + } + } + } + + // Current stack walk for more context (seeded from entry + heuristic) + fixed.writeAll("stack trace:\n") catch {}; + const show = @min(n, 20); + for (addrs[0..show], 0..) |a, i| { + fixed.print("{d: >2}: 0x{x:0>8}\n", .{ i, a }) catch {}; + } + + fixed.writeAll("\n") catch {}; + const text = fixed.buffered(); + + debugString(text); + debugString("\n"); + + // For ERRF_SetUserString (max ~256 bytes), compact version with PC + stack addrs. + var user_buf: [256:0]u8 = @splat(0); + var uw: std.Io.Writer = .fixed(&user_buf); + var hbuf: [96]u8 = undefined; + const h = std.fmt.bufPrint(&hbuf, "Aether main err at 0x{x}: {s}\n", .{main_pc, @errorName(err)}) catch "Aether main err\n"; + uw.writeAll(h[0..@min(h.len, 70)]) catch {}; if (maybe_trace) |trace| { - const len = @min(trace.instruction_addresses.len, trace.index); - const addrs = trace.instruction_addresses[0..@min(len, 4)]; - switch (addrs.len) { - 0 => {}, - 1 => fatal("Aether main returned error.{s} at 0x{x}", .{ @errorName(err), addrs[0] }), - 2 => fatal("Aether main returned error.{s} at 0x{x} 0x{x}", .{ @errorName(err), addrs[0], addrs[1] }), - 3 => fatal("Aether main returned error.{s} at 0x{x} 0x{x} 0x{x}", .{ @errorName(err), addrs[0], addrs[1], addrs[2] }), - else => fatal("Aether main returned error.{s} at 0x{x} 0x{x} 0x{x} 0x{x}", .{ @errorName(err), addrs[0], addrs[1], addrs[2], addrs[3] }), + const nerr = @min(trace.index, trace.instruction_addresses.len); + if (nerr > 0) { + uw.writeAll("err:") catch {}; + for (trace.instruction_addresses[0..@min(nerr,3)], 0..) |a, i| { + uw.print(" {d}:0x{x}", .{i, a}) catch {}; + } + uw.writeAll("\n") catch {}; } } + uw.writeAll("stack:") catch {}; + const nprint = @min(n, 24); + for (addrs[0..nprint], 0..) |a, i| { + uw.print(" {d}:0x{x}", .{i, a}) catch {}; + } + uw.writeAll("\n") catch {}; + const end = @min(uw.end, 255); + const user_str = user_buf[0..end :0]; + + _ = errfInit(); + _ = ERRF_SetUserString(user_str.ptr); - fatal("Aether main returned error.{s} at 0x{x}", .{ @errorName(err), fallback_addr }); + // Compact message for the visible "Reason" (0x60 limit): include the PC + some stack. + var throw_buf: [0x60:0]u8 = @splat(0); + var w: std.Io.Writer = .fixed(&throw_buf); + w.print("Aether main err at 0x{x}: {s}", .{main_pc, @errorName(err)}) catch {}; + if (n > 0) { + w.print(" [", .{}) catch {}; + const max_short = 4; + for (addrs[0..@min(n, max_short)], 0..) |a, i| { + if (i > 0) w.print(" ", .{}) catch {}; + w.print("{d}:0x{x}", .{i, a}) catch {}; + } + if (n > max_short) w.print("..", .{}) catch {}; + w.print("]", .{}) catch {}; + } + _ = w.flush() catch {}; + _ = ERRF_ThrowResultWithMessage(fatal_result, &throw_buf); + + svcBreak(USERBREAK_PANIC); + while (true) {} } pub fn panic(msg: []const u8, _: ?*std.builtin.StackTrace, first_trace_addr: ?usize) noreturn { @branchHint(.cold); + @setRuntimeSafety(false); // best-effort crash reporting only; disables safety checks that could re-invoke panic() and produce "Aether recursive..." instead of the original info. if (panic_stage != 0) { fatalDisplay("Aether recursive panic"); } panic_stage = 1; - fatal("Aether panic at 0x{x}: {s}", .{ first_trace_addr orelse @returnAddress(), msg }); + const first = first_trace_addr orelse @returnAddress(); + + // Capture fp/lr/sp *immediately* at entry (before any local stack alloc or calls + // that might clobber r11 in the generated prologue). This gives us the fp of + // the frame that called the panic handler (i.e. the function containing the + // unreachable/panic site). sp is used for low-conf heuristic scan. + const entry_fp = asm volatile ("mov %[fp], r11" : [fp] "=r" (-> usize)); + const entry_lr = asm volatile ("mov %[lr], lr" : [lr] "=r" (-> usize)); + const entry_sp = asm volatile ("mov %[sp], sp" : [sp] "=r" (-> usize)); + + // Collect early, passing the captured entry context so the walker starts + // from the caller's frame (not inside the panic handler). Larger array + sp + // scan for more (even low-confidence) frames. + var addrs: [64]usize = undefined; + const n = collectStackAddresses(first, &addrs, entry_fp, entry_lr, entry_sp); + + var trace_buf: [512]u8 = undefined; + var fixed: std.Io.Writer = .fixed(&trace_buf); + + fixed.print("Aether panic at 0x{x}: {s}\n", .{first, msg}) catch {}; + // entry_fp / entry_sp are printed here for the svc debug output (3dslink etc.) + // They are omitted from the short user_str to keep the error screen clean. + fixed.print("entry_fp=0x{x} entry_sp=0x{x}\n", .{entry_fp, entry_sp}) catch {}; + + // error return trace (if any) + if (@errorReturnTrace()) |t| { + const nerr = @min(t.index, t.instruction_addresses.len); + if (nerr > 0) { + fixed.writeAll("error return trace:\n") catch {}; + for (t.instruction_addresses[0..nerr], 0..) |a, i| { + fixed.print("{d: >2}: 0x{x:0>8}\n", .{ i, a }) catch {}; + } + } + } + + // current stack via manual fp walk (seeded from entry + walked from caller's fp) + // + heuristic scan (filtered to likely RAs). Skip internal safety panic + // handler frames (they are just noise in the trace). + fixed.writeAll("stack trace:\n") catch {}; + const show = @min(n, 32); + for (addrs[0..show], 0..) |a, i| { + fixed.print("{d: >2}: 0x{x:0>8}\n", .{ i, a }) catch {}; + } + if (n == 0) { + fixed.print("0: 0x{x:0>8}\n", .{first}) catch {}; + } + + const text = fixed.buffered(); + + debugString(text); + debugString("\n"); + + // For ERRF_SetUserString (max ~256 bytes per libctru), build a compact version + // that prioritizes the PC + as many stack addresses as will fit. This is what + // appears in CFW error screens / exception logs when the full Reason is limited. + // The pretty multi-line version above still goes to debugString (visible via 3dslink etc.). + var user_buf: [256:0]u8 = @splat(0); + var uw: std.Io.Writer = .fixed(&user_buf); + // short header to leave room for addresses + var hbuf: [96]u8 = undefined; + const h = std.fmt.bufPrint(&hbuf, "Aether panic at 0x{x}: {s}\n", .{first, msg}) catch "Aether panic\n"; + uw.writeAll(h[0..@min(h.len, 70)]) catch {}; + if (@errorReturnTrace()) |t| { + const nerr = @min(t.index, t.instruction_addresses.len); + if (nerr > 0) { + uw.writeAll("err:") catch {}; + for (t.instruction_addresses[0..@min(nerr,3)], 0..) |a, i| { + uw.print(" {d}:0x{x}", .{i, a}) catch {}; + } + uw.writeAll("\n") catch {}; + } + } + uw.writeAll("stack:") catch {}; + const nprint = @min(n, 30); + for (addrs[0..nprint], 0..) |a, i| { + uw.print(" {d}:0x{x}", .{i, a}) catch {}; + } + if (nprint == 0) { + uw.print(" 0:0x{x}", .{first}) catch {}; + } + uw.writeAll("\n") catch {}; + const end = @min(uw.end, 255); + const user_str = user_buf[0..end :0]; + + _ = errfInit(); + _ = ERRF_SetUserString(user_str.ptr); + + // The failure message shown as "Reason" on the error screen is limited (~0x60 bytes). + // Use Writer.fixed to cleanly format PC + as many stack addrs as fit. + var throw_buf: [0x60:0]u8 = @splat(0); + var w: std.Io.Writer = .fixed(&throw_buf); + w.print("Aether at 0x{x}: {s}", .{first, msg}) catch {}; + if (n > 0) { + w.print(" [", .{}) catch {}; + const max_short = 4; + for (addrs[0..@min(n, max_short)], 0..) |a, i| { + if (i > 0) w.print(" ", .{}) catch {}; + w.print("{d}:0x{x}", .{i, a}) catch {}; + } + if (n > max_short) w.print("..", .{}) catch {}; + w.print("]", .{}) catch {}; + } + _ = w.flush() catch {}; + _ = ERRF_ThrowResultWithMessage(fatal_result, &throw_buf); + + svcBreak(USERBREAK_PANIC); + while (true) {} } fn installCrashHandlers() void { From 49953a9c563c69bee27ff30eb360711fbae71f32 Mon Sep 17 00:00:00 2001 From: Nathan Bourgeois Date: Sat, 6 Jun 2026 09:51:54 -0400 Subject: [PATCH 24/44] Delete 3ds_gfx.zig --- src/platform/3ds/3ds_gfx.zig | 1217 ---------------------------------- 1 file changed, 1217 deletions(-) delete mode 100644 src/platform/3ds/3ds_gfx.zig diff --git a/src/platform/3ds/3ds_gfx.zig b/src/platform/3ds/3ds_gfx.zig deleted file mode 100644 index 366773c..0000000 --- a/src/platform/3ds/3ds_gfx.zig +++ /dev/null @@ -1,1217 +0,0 @@ -//! Nintendo 3DS Citro3D backend. -//! -//! The top screen render target is physically 240x400 and displayed rotated. -//! This backend keeps Aether's normal landscape projection contract by -//! transforming vertices to top-screen coordinates on the CPU, then using -//! Citro3D's tilted orthographic projection for the final hardware transform. - -const std = @import("std"); -const Util = @import("../../util/util.zig"); -const Mat4 = @import("../../math/math.zig").Mat4; -const Rendering = @import("../../rendering/rendering.zig"); -const Pipeline = Rendering.Pipeline; -const Mesh = Rendering.mesh; -const Texture = Rendering.Texture; - -const C3D_AttrInfo = opaque {}; -const C3D_BufInfo = opaque {}; -const C3D_RenderTarget = extern struct { - next: ?*C3D_RenderTarget, - prev: ?*C3D_RenderTarget, - frameBuf: C3D_FrameBuf, - used: bool, - ownsColor: bool, - ownsDepth: bool, - linked: bool, - screen: c_int, - side: c_int, - transferFlags: u32, -}; -const C3D_FrameBuf = extern struct { - colorBuf: ?*anyopaque, - depthBuf: ?*anyopaque, - width: u16, - height: u16, - colorFmt: c_int, - depthFmt: c_int, - block32: bool, - masks: u8, -}; -const C3D_TexEnv = extern struct { - srcRgb: u16, - srcAlpha: u16, - opAll: u32, - funcRgb: u16, - funcAlpha: u16, - color: u32, - scaleRgb: u16, - scaleAlpha: u16, -}; -const C3D_FVec = extern struct { - w: f32, - z: f32, - y: f32, - x: f32, -}; -const C3D_Mtx = extern struct { - r: [4]C3D_FVec, -}; -const C3D_Tex = extern struct { - data: ?*anyopaque, - fmt_size: u32, - dim: u32, - param: u32, - border: u32, - lod_param: u32, -}; -const C3D_FogLut = extern struct { - data: [128]u32, -}; - -const DVLP = extern struct { - codeSize: u32, - codeData: [*]u32, - opdescSize: u32, - opcdescData: [*]u32, -}; -const DVLEConstEntry = extern struct { - typ: u16, - id: u16, - data: [4]u32, -}; -const DVLEOutEntry = extern struct { - typ: u16, - regID: u16, - mask: u8, - unk: [3]u8, -}; -const DVLEUniformEntry = extern struct { - symbolOffset: u32, - startReg: u16, - endReg: u16, -}; -const DVLE = extern struct { - typ: c_int, - mergeOutmaps: bool, - gshMode: c_int, - gshFixedVtxStart: u8, - gshVariableVtxNum: u8, - gshFixedVtxNum: u8, - dvlp: *DVLP, - mainOffset: u32, - endmainOffset: u32, - constTableSize: u32, - constTableData: [*]DVLEConstEntry, - outTableSize: u32, - outTableData: [*]DVLEOutEntry, - uniformTableSize: u32, - uniformTableData: [*]DVLEUniformEntry, - symbolTableData: [*]u8, - outmapMask: u8, - outmapData: [8]u32, - outmapMode: u32, - outmapClock: u32, -}; -const DVLB = extern struct { - numDVLE: u32, - DVLP: DVLP, - DVLE: [*]DVLE, -}; -const ShaderInstance = opaque {}; -const ShaderProgram = extern struct { - vertexShader: ?*ShaderInstance, - geometryShader: ?*ShaderInstance, - geoShaderInputPermutation: [2]u32, - geoShaderInputStride: u8, -}; - -extern fn gfxInitDefault() void; -extern fn gfxExit() void; - -extern fn C3D_Init(cmdBufSize: usize) bool; -extern fn C3D_Fini() void; -extern fn C3D_FrameBegin(flags: u8) bool; -extern fn C3D_FrameDrawOn(target: *C3D_RenderTarget) bool; -extern fn C3D_FrameEnd(flags: u8) void; -extern fn C3D_RenderTargetCreate(width: c_int, height: c_int, colorFmt: c_int, depthFmt: c_int) ?*C3D_RenderTarget; -extern fn C3D_RenderTargetDelete(target: *C3D_RenderTarget) void; -extern fn C3D_RenderTargetSetOutput(target: ?*C3D_RenderTarget, screen: c_int, side: c_int, transferFlags: u32) void; -extern fn C3D_FrameBufClear(fb: *C3D_FrameBuf, clearBits: c_int, clearColor: u32, clearDepth: u32) void; -extern fn C3D_BindProgram(program: *ShaderProgram) void; -extern fn C3D_GetAttrInfo() *C3D_AttrInfo; -extern fn AttrInfo_Init(info: *C3D_AttrInfo) void; -extern fn AttrInfo_AddLoader(info: *C3D_AttrInfo, regId: c_int, format: c_int, count: c_int) c_int; -extern fn C3D_GetBufInfo() *C3D_BufInfo; -extern fn BufInfo_Init(info: *C3D_BufInfo) void; -extern fn BufInfo_Add(info: *C3D_BufInfo, data: ?*const anyopaque, stride: isize, attribCount: c_int, permutation: u64) c_int; -extern fn C3D_GetTexEnv(id: c_int) *C3D_TexEnv; -extern fn C3D_DirtyTexEnv(env: *C3D_TexEnv) void; -extern fn C3D_CullFace(mode: c_int) void; -extern fn C3D_DepthTest(enable: bool, function: c_int, writemask: c_int) void; -extern fn C3D_AlphaTest(enable: bool, function: c_int, ref: c_int) void; -extern fn C3D_AlphaBlend(colorEq: c_int, alphaEq: c_int, srcClr: c_int, dstClr: c_int, srcAlpha: c_int, dstAlpha: c_int) void; -extern fn C3D_DrawArrays(primitive: c_int, first: c_int, size: c_int) void; -extern fn C3D_TexInitWithParams(tex: *C3D_Tex, cube: ?*anyopaque, params: u64) bool; -extern fn C3D_TexLoadImage(tex: *C3D_Tex, data: ?*const anyopaque, face: c_int, level: c_int) void; -extern fn C3D_TexBind(unitId: c_int, tex: *C3D_Tex) void; -extern fn C3D_TexDelete(tex: *C3D_Tex) void; -extern fn C3D_FogGasMode(fogMode: c_int, gasMode: c_int, zFlip: bool) void; -extern fn C3D_FogColor(color: u32) void; -extern fn C3D_FogLutBind(lut: *C3D_FogLut) void; -extern fn FogLut_FromArray(lut: *C3D_FogLut, data: *const [256]f32) void; -extern fn GSPGPU_FlushDataCache(adr: ?*const anyopaque, size: u32) c_int; -extern fn Mtx_OrthoTilt(mtx: *C3D_Mtx, left: f32, right: f32, bottom: f32, top: f32, near: f32, far: f32, isLeftHanded: bool) void; -extern fn linearAlloc(size: usize) ?*anyopaque; -extern fn linearFree(mem: ?*anyopaque) void; - -extern fn DVLB_ParseFile(shbinData: [*]u32, shbinSize: u32) ?*DVLB; -extern fn DVLB_Free(dvlb: *DVLB) void; -extern fn shaderProgramInit(sp: *ShaderProgram) c_int; -extern fn shaderProgramFree(sp: *ShaderProgram) c_int; -extern fn shaderProgramSetVsh(sp: *ShaderProgram, dvle: *DVLE) c_int; -extern fn shaderInstanceGetUniformLocation(si: *ShaderInstance, name: [*:0]const u8) i8; - -extern var C3D_FVUnif: [2][C3D_FVUNIF_COUNT]C3D_FVec; -extern var C3D_FVUnifDirty: [2][C3D_FVUNIF_COUNT]bool; - -const C3D_DEFAULT_CMDBUF_SIZE = 0x40000; -const C3D_FRAME_SYNCDRAW = 1 << 0; -const C3D_CLEAR_COLOR = 1 << 0; -const C3D_CLEAR_DEPTH = 1 << 1; -const C3D_CLEAR_ALL = C3D_CLEAR_COLOR | C3D_CLEAR_DEPTH; -const C3D_FVUNIF_COUNT = 96; - -const GPU_VERTEX_SHADER = 0; -const GPU_BYTE = 0; -const GPU_UNSIGNED_BYTE = 1; -const GPU_SHORT = 2; -const GPU_FLOAT = 3; -const GPU_RB_RGBA8 = 0; -const GPU_RB_DEPTH24_STENCIL8 = 3; -const GPU_ALWAYS = 1; -const GPU_GREATER = 6; -const GPU_GEQUAL = 7; -const GPU_WRITE_COLOR = 0x0F; -const GPU_WRITE_DEPTH = 0x10; -const GPU_CULL_NONE = 0; -const GPU_CULL_BACK_CCW = 2; -const GPU_BLEND_ADD = 0; -const GPU_ZERO = 0; -const GPU_ONE = 1; -const GPU_SRC_ALPHA = 6; -const GPU_ONE_MINUS_SRC_ALPHA = 7; -const GPU_PRIMARY_COLOR = 0x00; -const GPU_TEXTURE0 = 0x03; -const GPU_PREVIOUS = 0x0F; -const GPU_REPLACE = 0x00; -const GPU_MODULATE = 0x01; -const GPU_TEVSCALE_1 = 0x0; -const GPU_TRIANGLES = 0x0000; -const GPU_NO_FOG = 0; -const GPU_FOG = 5; -const GPU_PLAIN_DENSITY = 0; -const GPU_TEX_2D = 0; -const GPU_TEXFACE_2D = 0; -const GPU_RGBA8 = 0; -const GPU_NEAREST = 0; -const GPU_LINEAR = 1; -const GPU_CLAMP_TO_EDGE = 0; -const GPU_REPEAT = 2; - -const GFX_TOP = 0; -const GFX_LEFT = 0; -const GX_TRANSFER_FMT_RGB8 = 1; -const DISPLAY_TRANSFER_FLAGS = GX_TRANSFER_FMT_RGB8 << 12; -const TOP_SCREEN_WIDTH: f32 = 400.0; -const TOP_SCREEN_HEIGHT: f32 = 240.0; -const TEXTURE_BPP: usize = 4; -const DATA_CACHE_LINE_SIZE: usize = 32; -const DEPTH_CLEAR: u32 = 0; -const ALPHA_REF: c_int = 26; -const LINEAR_MESH_MIN_CAPACITY: usize = 256; -const OS_OLD_FCRAM_VADDR: usize = 0x14000000; -const OS_OLD_FCRAM_SIZE: usize = 0x08000000; -const OS_FCRAM_VADDR: usize = 0x30000000; -const OS_FCRAM_SIZE: usize = 0x10000000; -const OS_VRAM_VADDR: usize = 0x1F000000; -const OS_VRAM_SIZE: usize = 0x00600000; -const MIN_TEXTURE_SIZE: u32 = 8; -const SMALL_TEXTURE_EXPAND_SIZE: u32 = 32; -const MAX_TEXTURE_SIZE: u32 = 1024; -const DEBUG_TEXTURE_ONLY = false; -const DEBUG_COLOR_ONLY = false; -const DEBUG_DRAW_QUAD_CHUNKS = true; -const DEBUG_DRAW_CHUNK_VERTICES: usize = 3; - -const PipelineData = struct { - program: ShaderProgram, - dvlb: *DVLB, - stride: usize, - position_attr: Pipeline.Attribute, - uv_attr: Pipeline.Attribute, - color_attr: Pipeline.Attribute, - position_loader_size: u8, - uv_loader_size: u8, - color_loader_size: u8, - buffer_base_offset: usize, - buffer_attribute_count: u8, - buffer_permutation: u64, - projection_loc: i8, - model_view_loc: i8, - screen_projection_loc: i8, - pos_scale_loc: i8, - uv_scale_offset_loc: i8, - color_scale_loc: i8, - pos_scale: [3]f32, - uv_attr_scale: [2]f32, - color_scale: [4]f32, -}; - -const MeshData = struct { - pipeline: Pipeline.Handle, - ptr: ?[*]const u8 = null, - owned_ptr: ?[*]u8 = null, - len: usize = 0, - capacity: usize = 0, -}; - -const TextureData = struct { - width: u32, - height: u32, - tex_width: u16, - tex_height: u16, - uv_scale: [2]f32, - upload_data: []align(16) u8, - tex: C3D_Tex, -}; - -var render_io: std.Io = undefined; - -var initialized: bool = false; -var target: ?*C3D_RenderTarget = null; -var clear_color: u32 = 0x000000FF; -var vsync_enabled: bool = true; -var current_pipeline: Pipeline.Handle = 0; -var current_proj: Mat4 = Mat4.identity(); -var current_view: Mat4 = Mat4.identity(); -var uv_offset: [2]f32 = .{ 0.0, 0.0 }; -var depth_write_enabled: bool = true; -var screen_projection: C3D_Mtx = undefined; -var fog_lut: C3D_FogLut = undefined; -var white_texture: C3D_Tex = undefined; -var white_texture_ready: bool = false; -var bound_texture: Texture.Handle = 0; - -var pipelines = Util.CircularBuffer(PipelineData, 16).init(); -var meshes = Util.CircularBuffer(MeshData, 2048).init(); -var textures = Util.CircularBuffer(TextureData, 64).init(); - -pub fn setup(alloc: std.mem.Allocator, io: std.Io) void { - _ = alloc; - render_io = io; -} - -pub fn init() anyerror!void { - _ = render_io; - - gfxInitDefault(); - errdefer gfxExit(); - - if (!C3D_Init(C3D_DEFAULT_CMDBUF_SIZE)) return error.GfxInitFailed; - errdefer C3D_Fini(); - - target = C3D_RenderTargetCreate(240, 400, GPU_RB_RGBA8, GPU_RB_DEPTH24_STENCIL8); - if (target == null) return error.GfxInitFailed; - errdefer { - C3D_RenderTargetDelete(target.?); - target = null; - } - - C3D_RenderTargetSetOutput(target, GFX_TOP, GFX_LEFT, DISPLAY_TRANSFER_FLAGS); - Mtx_OrthoTilt(&screen_projection, 0.0, TOP_SCREEN_WIDTH, 0.0, TOP_SCREEN_HEIGHT, 0.0, 1.0, true); - - configure_texture_texenv(); - try init_white_texture(); - C3D_CullFace(GPU_CULL_NONE); - apply_depth_state(); - C3D_FogGasMode(GPU_NO_FOG, GPU_PLAIN_DENSITY, false); - set_alpha_blend(true); - - initialized = true; -} - -pub fn deinit() void { - destroy_all_meshes(); - destroy_all_pipelines(); - destroy_all_textures(); - current_pipeline = 0; - bound_texture = 0; - - if (white_texture_ready) { - C3D_TexDelete(&white_texture); - white_texture_ready = false; - } - - if (target) |t| { - C3D_RenderTargetDelete(t); - target = null; - } - - if (initialized) { - C3D_Fini(); - gfxExit(); - initialized = false; - } -} - -pub fn set_clear_color(r: f32, g: f32, b: f32, a: f32) void { - clear_color = (@as(u32, floatByte(r)) << 24) | - (@as(u32, floatByte(g)) << 16) | - (@as(u32, floatByte(b)) << 8) | - @as(u32, floatByte(a)); -} - -pub fn set_alpha_blend(enabled: bool) void { - if (enabled) { - C3D_AlphaBlend(GPU_BLEND_ADD, GPU_BLEND_ADD, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA); - C3D_AlphaTest(false, GPU_ALWAYS, 0); - } else { - C3D_AlphaBlend(GPU_BLEND_ADD, GPU_BLEND_ADD, GPU_ONE, GPU_ZERO, GPU_ONE, GPU_ZERO); - C3D_AlphaTest(false, GPU_ALWAYS, 0); - } -} - -pub fn set_depth_write(enabled: bool) void { - depth_write_enabled = enabled; - apply_depth_state(); -} - -pub fn set_fog(enabled: bool, start: f32, end: f32, r: f32, g: f32, b: f32) void { - if (!enabled or end <= start) { - C3D_FogGasMode(GPU_NO_FOG, GPU_PLAIN_DENSITY, false); - return; - } - - var data: [256]f32 = undefined; - for (&data, 0..) |*v, i| { - const z = @as(f32, @floatFromInt(i)) / 255.0; - v.* = @max(0.0, @min(1.0, (z - start) / (end - start))); - } - - FogLut_FromArray(&fog_lut, &data); - C3D_FogColor((@as(u32, floatByte(r)) << 16) | - (@as(u32, floatByte(g)) << 8) | - @as(u32, floatByte(b))); - C3D_FogGasMode(GPU_FOG, GPU_PLAIN_DENSITY, false); - C3D_FogLutBind(&fog_lut); -} - -pub fn set_clip_planes(_: bool) void {} - -pub fn set_culling(enabled: bool) void { - C3D_CullFace(if (enabled) GPU_CULL_BACK_CCW else GPU_CULL_NONE); -} - -pub fn set_uv_offset(u: f32, v: f32) void { - uv_offset = .{ u, v }; -} - -pub fn set_proj_matrix(mat: *const Mat4) void { - current_proj = mat.*; -} - -pub fn set_view_matrix(mat: *const Mat4) void { - current_view = mat.*; -} - -pub fn start_frame() bool { - const t = target orelse return false; - if (!initialized) return false; - - _ = vsync_enabled; - if (!C3D_FrameBegin(C3D_FRAME_SYNCDRAW)) return false; - - render_target_clear(t, C3D_CLEAR_ALL, clear_color, DEPTH_CLEAR); - if (!C3D_FrameDrawOn(t)) { - C3D_FrameEnd(0); - return false; - } - - return true; -} - -pub fn end_frame() void { - if (!initialized) return; - C3D_FrameEnd(0); -} - -pub fn clear_depth() void { - if (target) |t| render_target_clear(t, C3D_CLEAR_DEPTH, clear_color, DEPTH_CLEAR); -} - -pub fn set_vsync(v: bool) void { - vsync_enabled = v; -} - -pub fn create_pipeline(layout: Pipeline.VertexLayout, v_shader: ?[:0]align(4) const u8, _: ?[:0]align(4) const u8) anyerror!Pipeline.Handle { - const code = v_shader orelse return error.InvalidShader; - if (code.len == 0) return error.InvalidShader; - - const dvlb = DVLB_ParseFile(@ptrCast(@constCast(code.ptr)), @intCast(code.len)) orelse return error.InvalidShader; - errdefer DVLB_Free(dvlb); - if (dvlb.numDVLE == 0) return error.InvalidShader; - - var program: ShaderProgram = undefined; - if (shaderProgramInit(&program) != 0) return error.InvalidShader; - errdefer _ = shaderProgramFree(&program); - if (shaderProgramSetVsh(&program, &dvlb.DVLE[0]) != 0) return error.InvalidShader; - - const vertex_shader = program.vertexShader orelse return error.InvalidShader; - const projection_loc = shaderInstanceGetUniformLocation(vertex_shader, "projection"); - const model_view_loc = shaderInstanceGetUniformLocation(vertex_shader, "modelView"); - const screen_projection_loc = shaderInstanceGetUniformLocation(vertex_shader, "screenProjection"); - const pos_scale_loc = shaderInstanceGetUniformLocation(vertex_shader, "posScale"); - const uv_scale_offset_loc = shaderInstanceGetUniformLocation(vertex_shader, "uvScaleOffset"); - const color_scale_loc = shaderInstanceGetUniformLocation(vertex_shader, "colorScale"); - if (projection_loc < 0 or - model_view_loc < 0 or - screen_projection_loc < 0 or - pos_scale_loc < 0 or - uv_scale_offset_loc < 0 or - color_scale_loc < 0) - { - return error.InvalidShader; - } - - const position_attr = find_attr(layout, .position) orelse return error.UnsupportedVertexLayout; - const uv_attr = find_attr(layout, .uv) orelse return error.UnsupportedVertexLayout; - const color_attr = find_attr(layout, .color) orelse return error.UnsupportedVertexLayout; - const pos_scale = position_scale(position_attr) orelse return error.UnsupportedVertexLayout; - const uv_attr_scale = uv_scale(uv_attr) orelse return error.UnsupportedVertexLayout; - const color_attr_scale = color_scale(color_attr) orelse return error.UnsupportedVertexLayout; - if (!supported_attr_location(position_attr) or - !supported_attr_location(uv_attr) or - !supported_attr_location(color_attr)) - { - return error.UnsupportedVertexLayout; - } - const buffer_layout = buffer_layout_from_attrs(layout.stride, position_attr, uv_attr, color_attr) orelse return error.UnsupportedVertexLayout; - const data = PipelineData{ - .program = program, - .dvlb = dvlb, - .stride = layout.stride, - .position_attr = position_attr, - .uv_attr = uv_attr, - .color_attr = color_attr, - .position_loader_size = buffer_layout.position_loader_size, - .uv_loader_size = buffer_layout.uv_loader_size, - .color_loader_size = buffer_layout.color_loader_size, - .buffer_base_offset = buffer_layout.base_offset, - .buffer_attribute_count = buffer_layout.attribute_count, - .buffer_permutation = buffer_layout.permutation, - .projection_loc = projection_loc, - .model_view_loc = model_view_loc, - .screen_projection_loc = screen_projection_loc, - .pos_scale_loc = pos_scale_loc, - .uv_scale_offset_loc = uv_scale_offset_loc, - .color_scale_loc = color_scale_loc, - .pos_scale = pos_scale, - .uv_attr_scale = uv_attr_scale, - .color_scale = color_attr_scale, - }; - - const handle = pipelines.add_element(data) orelse return error.OutOfPipelines; - return @intCast(handle); -} - -pub fn destroy_pipeline(handle: Pipeline.Handle) void { - const pl = get_pipeline_ptr(handle) orelse return; - _ = shaderProgramFree(&pl.program); - DVLB_Free(pl.dvlb); - _ = pipelines.remove_element(handle); - if (current_pipeline == handle) current_pipeline = 0; -} - -pub fn bind_pipeline(handle: Pipeline.Handle) void { - current_pipeline = handle; -} - -pub fn create_mesh(pipeline: Pipeline.Handle) anyerror!Mesh.Handle { - _ = get_pipeline_ptr(pipeline) orelse return error.InvalidPipeline; - const handle = meshes.add_element(.{ .pipeline = pipeline }) orelse return error.OutOfMeshes; - return @intCast(handle); -} - -pub fn destroy_mesh(handle: Mesh.Handle) void { - const mesh = get_mesh_ptr(handle) orelse return; - free_mesh_vertices(mesh); - _ = meshes.remove_element(handle); -} - -pub fn update_mesh(handle: Mesh.Handle, data: []const u8) void { - const mesh = get_mesh_ptr(handle) orelse return; - - if (data.len == 0) { - mesh.ptr = null; - mesh.len = 0; - return; - } - - if (is_linear_gpu_memory(data.ptr, data.len)) { - if (mesh.owned_ptr != null) free_owned_mesh_vertices(mesh); - mesh.ptr = data.ptr; - mesh.len = data.len; - flush_data_cache_range(data.ptr, data.len); - return; - } - - if (data.len > mesh.capacity) { - free_owned_mesh_vertices(mesh); - const new_capacity = linear_mesh_capacity(data.len); - const bytes = alloc_linear_bytes(new_capacity) catch { - mesh.ptr = null; - mesh.len = 0; - mesh.capacity = 0; - return; - }; - mesh.owned_ptr = bytes.ptr; - mesh.capacity = bytes.len; - } - - if (mesh.owned_ptr) |ptr| { - @memcpy(ptr[0..data.len], data); - flush_data_cache_range(@ptrCast(&ptr[0]), data.len); - mesh.ptr = ptr; - } - mesh.len = data.len; -} - -pub fn draw_mesh(handle: Mesh.Handle, model: *const Mat4, count: usize, primitive: Mesh.Primitive) void { - if (!initialized) return; - if (primitive == .lines) return; - - const mesh = get_mesh_ptr(handle) orelse return; - const ptr = mesh.ptr orelse return; - const pipeline_handle = if (current_pipeline != 0) current_pipeline else mesh.pipeline; - const pl = get_pipeline_ptr(pipeline_handle) orelse return; - const available_count = if (pl.stride == 0) 0 else mesh.len / pl.stride; - const draw_count = @min(count, available_count); - if (draw_count == 0) return; - - const model_view = Mat4.mul(model.*, current_view); - - C3D_BindProgram(&pl.program); - configure_fixed_attributes(pl.*); - configure_texture_texenv(); - bind_current_texture_for_draw(); - apply_depth_state(); - upload_aether_matrix_uniform(pl.projection_loc, ¤t_proj); - upload_aether_matrix_uniform(pl.model_view_loc, &model_view); - upload_c3d_matrix_uniform(pl.screen_projection_loc, &screen_projection); - upload_vec_uniform(pl.pos_scale_loc, .{ pl.pos_scale[0], pl.pos_scale[1], pl.pos_scale[2], 1.0 }); - upload_uv_uniform(pl.*); - upload_vec_uniform(pl.color_scale_loc, pl.color_scale); - - if (DEBUG_DRAW_QUAD_CHUNKS) { - var first: usize = 0; - while (first < draw_count) : (first += DEBUG_DRAW_CHUNK_VERTICES) { - const chunk_count = @min(DEBUG_DRAW_CHUNK_VERTICES, draw_count - first); - const chunk_ptr = ptr + first * pl.stride; - if (!configure_draw_buffer(chunk_ptr, pl.*)) return; - C3D_DrawArrays(GPU_TRIANGLES, 0, @intCast(chunk_count)); - } - } else { - if (!configure_draw_buffer(ptr, pl.*)) return; - C3D_DrawArrays(GPU_TRIANGLES, 0, @intCast(draw_count)); - } -} - -pub fn create_texture(width: u32, height: u32, data: []align(16) u8) anyerror!Texture.Handle { - const expand_small = width < MIN_TEXTURE_SIZE or height < MIN_TEXTURE_SIZE; - const tex_width: u16 = if (expand_small) @intCast(SMALL_TEXTURE_EXPAND_SIZE) else try texture_dim(width); - const tex_height: u16 = if (expand_small) @intCast(SMALL_TEXTURE_EXPAND_SIZE) else try texture_dim(height); - const upload_len = @as(usize, tex_width) * @as(usize, tex_height) * TEXTURE_BPP; - const upload_data = try alloc_linear_bytes(upload_len); - errdefer free_linear_bytes(upload_data); - - var tex: C3D_Tex = undefined; - if (!tex_init(&tex, tex_width, tex_height, false)) return error.TextureCreateFailed; - errdefer C3D_TexDelete(&tex); - tex_set_default_params(&tex); - - convert_texture_data(upload_data, data, width, height, tex_width, tex_height, expand_small); - tex_upload(&tex, upload_data); - - const handle = textures.add_element(.{ - .width = width, - .height = height, - .tex_width = tex_width, - .tex_height = tex_height, - .uv_scale = if (expand_small) .{ 1.0, 1.0 } else .{ - @as(f32, @floatFromInt(width)) / @as(f32, @floatFromInt(tex_width)), - @as(f32, @floatFromInt(height)) / @as(f32, @floatFromInt(tex_height)), - }, - .upload_data = upload_data, - .tex = tex, - }) orelse return error.OutOfTextures; - - return @intCast(handle); -} - -pub fn update_texture(handle: Texture.Handle, data: []align(16) u8) void { - const tex = get_texture_ptr(handle) orelse return; - const expand_small = tex.width < MIN_TEXTURE_SIZE or tex.height < MIN_TEXTURE_SIZE; - convert_texture_data(tex.upload_data, data, tex.width, tex.height, tex.tex_width, tex.tex_height, expand_small); - tex_upload(&tex.tex, tex.upload_data); -} - -pub fn bind_texture(handle: Texture.Handle) void { - bound_texture = if (get_texture_ptr(handle) != null) handle else 0; -} - -pub fn destroy_texture(handle: Texture.Handle) void { - const tex = get_texture_ptr(handle) orelse return; - C3D_TexDelete(&tex.tex); - free_linear_bytes(tex.upload_data); - _ = textures.remove_element(handle); - if (bound_texture == handle) bound_texture = 0; -} - -pub fn force_texture_resident(_: Texture.Handle) void {} - -fn render_target_clear(t: *C3D_RenderTarget, bits: c_int, color: u32, depth: u32) void { - C3D_FrameBufClear(&t.frameBuf, bits, color, depth); -} - -fn apply_depth_state() void { - _ = depth_write_enabled; - C3D_DepthTest(false, GPU_ALWAYS, GPU_WRITE_COLOR); -} - -fn configure_fixed_attributes(pl: PipelineData) void { - const attr = C3D_GetAttrInfo(); - AttrInfo_Init(attr); - add_attr_loader(attr, pl.position_attr, pl.position_loader_size); - add_attr_loader(attr, pl.uv_attr, pl.uv_loader_size); - add_attr_loader(attr, pl.color_attr, pl.color_loader_size); -} - -fn add_attr_loader(info: *C3D_AttrInfo, attr: Pipeline.Attribute, loader_size: u8) void { - const fmt = gpu_attribute_format(attr.format); - _ = AttrInfo_AddLoader(info, @intCast(attr.location), fmt, loader_size); -} - -fn configure_draw_buffer(ptr: [*]const u8, pl: PipelineData) bool { - const buf = C3D_GetBufInfo(); - BufInfo_Init(buf); - const result = BufInfo_Add( - buf, - @ptrCast(&ptr[pl.buffer_base_offset]), - @intCast(pl.stride), - @intCast(pl.buffer_attribute_count), - pl.buffer_permutation, - ); - if (result < 0) { - BufInfo_Init(buf); - return false; - } - return true; -} - -fn configure_texture_texenv() void { - const env = C3D_GetTexEnv(0); - const src = if (DEBUG_COLOR_ONLY) - tev_sources(GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR) - else if (DEBUG_TEXTURE_ONLY) - tev_sources(GPU_TEXTURE0, GPU_TEXTURE0, GPU_TEXTURE0) - else - tev_sources(GPU_TEXTURE0, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR); - const func = if (DEBUG_COLOR_ONLY or DEBUG_TEXTURE_ONLY) GPU_REPLACE else GPU_MODULATE; - - env.* = .{ - .srcRgb = src, - .srcAlpha = src, - .opAll = 0, - .funcRgb = func, - .funcAlpha = func, - .color = 0xFFFFFFFF, - .scaleRgb = GPU_TEVSCALE_1, - .scaleAlpha = GPU_TEVSCALE_1, - }; - C3D_DirtyTexEnv(env); - - var stage: c_int = 1; - while (stage < 6) : (stage += 1) { - configure_passthrough_texenv(stage); - } -} - -fn configure_passthrough_texenv(stage: c_int) void { - const env = C3D_GetTexEnv(stage); - env.* = .{ - .srcRgb = tev_sources(GPU_PREVIOUS, 0, 0), - .srcAlpha = tev_sources(GPU_PREVIOUS, 0, 0), - .opAll = 0, - .funcRgb = GPU_REPLACE, - .funcAlpha = GPU_REPLACE, - .color = 0xFFFFFFFF, - .scaleRgb = GPU_TEVSCALE_1, - .scaleAlpha = GPU_TEVSCALE_1, - }; - C3D_DirtyTexEnv(env); -} - -fn tev_sources(a: u16, b: u16, c: u16) u16 { - return a | (b << 4) | (c << 8); -} - -fn bind_current_texture_for_draw() void { - if (get_texture_ptr(bound_texture)) |tex| { - C3D_TexBind(0, &tex.tex); - return; - } - - if (white_texture_ready) { - C3D_TexBind(0, &white_texture); - } -} - -fn upload_aether_matrix_uniform(loc: i8, mat: *const Mat4) void { - const c3d = mat4_to_c3d(mat); - upload_c3d_matrix_uniform(loc, &c3d); -} - -fn upload_c3d_matrix_uniform(loc: i8, mat: *const C3D_Mtx) void { - if (loc < 0) return; - - const base: usize = @intCast(loc); - if (base + 4 > C3D_FVUNIF_COUNT) return; - - inline for (0..4) |i| { - C3D_FVUnif[GPU_VERTEX_SHADER][base + i] = mat.r[i]; - C3D_FVUnifDirty[GPU_VERTEX_SHADER][base + i] = true; - } -} - -fn upload_vec_uniform(loc: i8, v: [4]f32) void { - if (loc < 0) return; - - const base: usize = @intCast(loc); - if (base >= C3D_FVUNIF_COUNT) return; - - C3D_FVUnif[GPU_VERTEX_SHADER][base] = fvec(v[0], v[1], v[2], v[3]); - C3D_FVUnifDirty[GPU_VERTEX_SHADER][base] = true; -} - -fn upload_uv_uniform(pl: PipelineData) void { - const texture_scale = if (get_texture_ptr(bound_texture)) |tex| tex.uv_scale else .{ 1.0, 1.0 }; - upload_vec_uniform(pl.uv_scale_offset_loc, .{ - pl.uv_attr_scale[0] * texture_scale[0], - pl.uv_attr_scale[1] * texture_scale[1], - uv_offset[0] * texture_scale[0], - uv_offset[1] * texture_scale[1], - }); -} - -fn mat4_to_c3d(mat: *const Mat4) C3D_Mtx { - return .{ .r = .{ - fvec(mat.data[0][0], mat.data[1][0], mat.data[2][0], mat.data[3][0]), - fvec(mat.data[0][1], mat.data[1][1], mat.data[2][1], mat.data[3][1]), - fvec(mat.data[0][2], mat.data[1][2], mat.data[2][2], mat.data[3][2]), - fvec(mat.data[0][3], mat.data[1][3], mat.data[2][3], mat.data[3][3]), - } }; -} - -fn fvec(x: f32, y: f32, z: f32, w: f32) C3D_FVec { - return .{ .x = x, .y = y, .z = z, .w = w }; -} - -fn destroy_all_pipelines() void { - for (&pipelines.buffer) |*slot| { - if (slot.*) |*pl| { - _ = shaderProgramFree(&pl.program); - DVLB_Free(pl.dvlb); - slot.* = null; - } - } - pipelines.clear(); -} - -fn destroy_all_meshes() void { - for (&meshes.buffer) |*slot| { - if (slot.*) |*mesh| { - free_mesh_vertices(mesh); - slot.* = null; - } - } - meshes.clear(); -} - -fn destroy_all_textures() void { - for (&textures.buffer) |*slot| { - if (slot.*) |*tex| { - C3D_TexDelete(&tex.tex); - free_linear_bytes(tex.upload_data); - slot.* = null; - } - } - textures.clear(); -} - -fn free_mesh_vertices(mesh: *MeshData) void { - free_owned_mesh_vertices(mesh); - mesh.ptr = null; - mesh.len = 0; -} - -fn free_owned_mesh_vertices(mesh: *MeshData) void { - if (mesh.owned_ptr) |ptr| { - linearFree(ptr); - mesh.owned_ptr = null; - } - mesh.capacity = 0; -} - -fn find_attr(layout: Pipeline.VertexLayout, usage: Pipeline.AttributeUsage) ?Pipeline.Attribute { - for (layout.attributes) |attr| { - if (attr.usage == usage) return attr; - } - return null; -} - -fn supported_attr_location(attr: Pipeline.Attribute) bool { - return attr.location < 12; -} - -const BufferLayout = struct { - base_offset: usize, - attribute_count: u8, - permutation: u64, - position_loader_size: u8, - uv_loader_size: u8, - color_loader_size: u8, -}; - -fn buffer_layout_from_attrs(stride: usize, position_attr: Pipeline.Attribute, uv_attr: Pipeline.Attribute, color_attr: Pipeline.Attribute) ?BufferLayout { - var attrs = [_]Pipeline.Attribute{ position_attr, uv_attr, color_attr }; - sort_attrs_by_offset(&attrs); - - const base_offset = attrs[0].offset; - var current_rel: usize = 0; - var attribute_count: usize = 0; - var permutation: u64 = 0; - var position_loader_size: u8 = 0; - var uv_loader_size: u8 = 0; - var color_loader_size: u8 = 0; - - for (attrs, 0..) |attr, i| { - if (!attr_fits(stride, attr)) return null; - if (attr.offset < base_offset) return null; - const rel_offset = attr.offset - base_offset; - if (rel_offset != current_rel) return null; - const next_offset = if (i + 1 < attrs.len) attrs[i + 1].offset else stride; - if (next_offset < attr.offset) return null; - const loader_size = attribute_loader_size(attr, next_offset - attr.offset) orelse return null; - const loaded_bytes = attribute_size_bytes_with_count(attr.format, loader_size) orelse return null; - if (loaded_bytes > next_offset - attr.offset) return null; - if (i + 1 < attrs.len and loaded_bytes != next_offset - attr.offset) return null; - const shift: u6 = @intCast(attribute_count * 4); - permutation |= attribute_loader_id(attr.usage) << shift; - attribute_count += 1; - switch (attr.usage) { - .position => position_loader_size = loader_size, - .uv => uv_loader_size = loader_size, - .color => color_loader_size = loader_size, - .normal => unreachable, - } - current_rel = rel_offset + loaded_bytes; - } - - if (stride < current_rel) return null; - if (position_loader_size == 0 or uv_loader_size == 0 or color_loader_size == 0) return null; - - return .{ - .base_offset = base_offset, - .attribute_count = @intCast(attribute_count), - .permutation = permutation, - .position_loader_size = position_loader_size, - .uv_loader_size = uv_loader_size, - .color_loader_size = color_loader_size, - }; -} - -fn sort_attrs_by_offset(attrs: *[3]Pipeline.Attribute) void { - var i: usize = 1; - while (i < attrs.len) : (i += 1) { - var j = i; - while (j > 0 and attrs[j - 1].offset > attrs[j].offset) : (j -= 1) { - const tmp = attrs[j - 1]; - attrs[j - 1] = attrs[j]; - attrs[j] = tmp; - } - } -} - -fn attribute_loader_id(usage: Pipeline.AttributeUsage) u64 { - return switch (usage) { - .position => 0, - .uv => 1, - .color => 2, - .normal => unreachable, - }; -} - -fn attr_fits(stride: usize, attr: Pipeline.Attribute) bool { - const size = attribute_size_bytes(attr.format); - return attr.offset <= stride and size <= stride - attr.offset; -} - -fn attribute_loader_size(attr: Pipeline.Attribute, available_bytes: usize) ?u8 { - if (attr.usage == .position and attr.size == 3 and attribute_component_size_bytes(attr.format) == 2 and available_bytes >= 8) { - return 4; - } - if (attr.size > 4) return null; - return @intCast(attr.size); -} - -fn attribute_size_bytes(format: Pipeline.AttributeFormat) usize { - return switch (format) { - .f32x2 => 8, - .f32x3 => 12, - .unorm8x2 => 2, - .unorm8x4 => 4, - .unorm16x2, .snorm16x2 => 4, - .unorm16x3, .snorm16x3 => 6, - }; -} - -fn attribute_size_bytes_with_count(format: Pipeline.AttributeFormat, count: u8) ?usize { - if (count == 0 or count > 4) return null; - return attribute_component_size_bytes(format) * @as(usize, count); -} - -fn attribute_component_size_bytes(format: Pipeline.AttributeFormat) usize { - return switch (format) { - .f32x2, .f32x3 => 4, - .unorm8x2, .unorm8x4 => 1, - .unorm16x2, .unorm16x3, .snorm16x2, .snorm16x3 => 2, - }; -} - -fn gpu_attribute_format(format: Pipeline.AttributeFormat) c_int { - return switch (format) { - .f32x2, .f32x3 => GPU_FLOAT, - .unorm8x2, .unorm8x4 => GPU_UNSIGNED_BYTE, - .unorm16x2, .unorm16x3, .snorm16x2, .snorm16x3 => GPU_SHORT, - }; -} - -fn position_scale(attr: Pipeline.Attribute) ?[3]f32 { - if (attr.size != 3) return null; - return switch (attr.format) { - .f32x3 => .{ 1.0, 1.0, 1.0 }, - .snorm16x3 => .{ snorm16_scale(), snorm16_scale(), snorm16_scale() }, - else => null, - }; -} - -fn uv_scale(attr: Pipeline.Attribute) ?[2]f32 { - if (attr.size != 2) return null; - return switch (attr.format) { - .f32x2 => .{ 1.0, 1.0 }, - .unorm8x2 => .{ unorm8_scale(), unorm8_scale() }, - .snorm16x2 => .{ snorm16_scale(), snorm16_scale() }, - else => null, - }; -} - -fn color_scale(attr: Pipeline.Attribute) ?[4]f32 { - if (attr.size != 4) return null; - return switch (attr.format) { - .unorm8x4 => .{ unorm8_scale(), unorm8_scale(), unorm8_scale(), unorm8_scale() }, - else => null, - }; -} - -fn snorm16_scale() f32 { - return 1.0 / 32767.0; -} - -fn unorm8_scale() f32 { - return 1.0 / 255.0; -} - -fn linear_mesh_capacity(required: usize) usize { - var capacity: usize = LINEAR_MESH_MIN_CAPACITY; - while (capacity < required) : (capacity *= 2) {} - return capacity; -} - -fn init_white_texture() !void { - if (white_texture_ready) return; - - const data = try alloc_linear_bytes(MIN_TEXTURE_SIZE * MIN_TEXTURE_SIZE * TEXTURE_BPP); - defer free_linear_bytes(data); - @memset(data, 0xFF); - - if (!tex_init(&white_texture, MIN_TEXTURE_SIZE, MIN_TEXTURE_SIZE, false)) { - return error.TextureCreateFailed; - } - errdefer C3D_TexDelete(&white_texture); - - tex_set_default_params(&white_texture); - tex_upload(&white_texture, data[0..]); - white_texture_ready = true; -} - -fn tex_init(tex: *C3D_Tex, width: u16, height: u16, vram: bool) bool { - return C3D_TexInitWithParams(tex, null, tex_init_params(width, height, 0, GPU_RGBA8, GPU_TEX_2D, vram)); -} - -fn tex_upload(tex: *C3D_Tex, data: []align(16) const u8) void { - flush_data_cache_range(data.ptr, data.len); - C3D_TexLoadImage(tex, data.ptr, GPU_TEXFACE_2D, 0); -} - -fn flush_data_cache_range(ptr: *const anyopaque, len: usize) void { - if (len == 0) return; - - const start = @intFromPtr(ptr); - const aligned_start = std.mem.alignBackward(usize, start, DATA_CACHE_LINE_SIZE); - const aligned_end = std.mem.alignForward(usize, start + len, DATA_CACHE_LINE_SIZE); - const aligned_len = aligned_end - aligned_start; - const aligned_ptr: *const anyopaque = @ptrFromInt(aligned_start); - _ = GSPGPU_FlushDataCache(aligned_ptr, @intCast(aligned_len)); -} - -fn is_linear_gpu_memory(ptr: *const anyopaque, len: usize) bool { - if (len == 0) return true; - - const start = @intFromPtr(ptr); - return range_in_region(start, len, OS_FCRAM_VADDR, OS_FCRAM_SIZE) or - range_in_region(start, len, OS_OLD_FCRAM_VADDR, OS_OLD_FCRAM_SIZE) or - range_in_region(start, len, OS_VRAM_VADDR, OS_VRAM_SIZE); -} - -fn range_in_region(start: usize, len: usize, base: usize, size: usize) bool { - if (start < base) return false; - const rel = start - base; - return rel <= size and len <= size - rel; -} - -fn tex_set_default_params(tex: *C3D_Tex) void { - tex.param &= ~(gpu_texture_mag_filter(GPU_LINEAR) | gpu_texture_min_filter(GPU_LINEAR)); - tex.param |= gpu_texture_mag_filter(GPU_NEAREST) | gpu_texture_min_filter(GPU_NEAREST); - tex.param &= ~(gpu_texture_wrap_s(3) | gpu_texture_wrap_t(3)); - tex.param |= gpu_texture_wrap_s(GPU_CLAMP_TO_EDGE) | gpu_texture_wrap_t(GPU_CLAMP_TO_EDGE); -} - -fn tex_init_params(width: u16, height: u16, max_level: u8, format: u8, tex_type: u8, vram: bool) u64 { - const flags0: u8 = (max_level & 0x0F) | ((format & 0x0F) << 4); - const flags1: u8 = (tex_type & 0x07) | (@as(u8, @intFromBool(vram)) << 3); - return @as(u64, width) | - (@as(u64, height) << 16) | - (@as(u64, flags0) << 32) | - (@as(u64, flags1) << 40); -} - -fn gpu_texture_mag_filter(v: u32) u32 { - return (v & 0x1) << 1; -} - -fn gpu_texture_min_filter(v: u32) u32 { - return (v & 0x1) << 2; -} - -fn gpu_texture_wrap_s(v: u32) u32 { - return (v & 0x3) << 12; -} - -fn gpu_texture_wrap_t(v: u32) u32 { - return (v & 0x3) << 8; -} - -fn get_texture_ptr(handle: Texture.Handle) ?*TextureData { - if (handle == 0 or handle >= textures.buffer.len) return null; - if (textures.buffer[handle]) |*tex| return tex; - return null; -} - -fn get_pipeline_ptr(handle: Pipeline.Handle) ?*PipelineData { - if (handle == 0 or handle >= pipelines.buffer.len) return null; - if (pipelines.buffer[handle]) |*pl| return pl; - return null; -} - -fn get_mesh_ptr(handle: Mesh.Handle) ?*MeshData { - if (handle == 0 or handle >= meshes.buffer.len) return null; - if (meshes.buffer[handle]) |*mesh| return mesh; - return null; -} - -fn texture_dim(value: u32) !u16 { - if (value == 0 or value > MAX_TEXTURE_SIZE) return error.InvalidTextureSize; - - var out: u32 = MIN_TEXTURE_SIZE; - while (out < value) : (out <<= 1) {} - if (out > MAX_TEXTURE_SIZE) return error.InvalidTextureSize; - return @intCast(out); -} - -fn alloc_linear_bytes(len: usize) ![]align(16) u8 { - const mem = linearAlloc(len) orelse return error.OutOfMemory; - const aligned: *align(16) anyopaque = @alignCast(mem); - const ptr: [*]align(16) u8 = @ptrCast(aligned); - return ptr[0..len]; -} - -fn free_linear_bytes(bytes: []align(16) u8) void { - linearFree(bytes.ptr); -} - -fn convert_texture_data(dst: []align(16) u8, src: []const u8, width: u32, height: u32, tex_width: u16, tex_height: u16, expand_small: bool) void { - const source_len = @as(usize, width) * @as(usize, height) * TEXTURE_BPP; - if (src.len < source_len) return; - - const tw: u32 = tex_width; - const th: u32 = tex_height; - for (0..th) |y| { - const source_y = if (expand_small) - @min((@as(u32, @intCast(y)) * height) / th, height - 1) - else - @min(@as(u32, @intCast(y)), height - 1); - const sy = height - 1 - source_y; - for (0..tw) |x| { - const sx = if (expand_small) - @min((@as(u32, @intCast(x)) * width) / tw, width - 1) - else - @min(@as(u32, @intCast(x)), width - 1); - const src_off = (@as(usize, sy) * width + sx) * TEXTURE_BPP; - const dst_off = tiled_pixel_offset(@intCast(x), @intCast(y), tw) * TEXTURE_BPP; - dst[dst_off + 0] = src[src_off + 3]; - dst[dst_off + 1] = src[src_off + 2]; - dst[dst_off + 2] = src[src_off + 1]; - dst[dst_off + 3] = src[src_off + 0]; - } - } -} - -fn tiled_pixel_offset(x: u32, y: u32, width: u32) usize { - const tile_x = x & ~@as(u32, 7); - const tile_y = y & ~@as(u32, 7); - const tile_base = tile_y * width + tile_x * 8; - return @intCast(tile_base + morton8(x & 7, y & 7)); -} - -fn morton8(x: u32, y: u32) u32 { - return (x & 1) | - ((y & 1) << 1) | - ((x & 2) << 1) | - ((y & 2) << 2) | - ((x & 4) << 2) | - ((y & 4) << 3); -} - -fn floatByte(v: f32) u8 { - return @intFromFloat(@max(0.0, @min(1.0, v)) * 255.0); -} From a88e1ceafb9a3f9c034e83f0f8a073e53f574ae3 Mon Sep 17 00:00:00 2001 From: Nathan Bourgeois Date: Sat, 6 Jun 2026 10:00:25 -0400 Subject: [PATCH 25/44] Fix assert crash --- src/core/paths.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/paths.zig b/src/core/paths.zig index da9a5b9..96538a1 100644 --- a/src/core/paths.zig +++ b/src/core/paths.zig @@ -117,7 +117,7 @@ fn resolve_nintendo(io: Io, app_name: []const u8) Error!Dirs { errdefer data.close(io); const resources = if (NintendoIo.mountResources()) - Io.Dir.openDirAbsolute(io, "romfs:/", .{}) catch data + Io.Dir.cwd().openDir(io, "romfs:/", .{}) catch data else data; From b6dc17482aa5d08ddca97ac16636acc388f027ac Mon Sep 17 00:00:00 2001 From: Nathan Bourgeois Date: Sat, 6 Jun 2026 10:05:30 -0400 Subject: [PATCH 26/44] Create 3ds_gfx.zig --- src/platform/3ds/3ds_gfx.zig | 65 ++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 src/platform/3ds/3ds_gfx.zig diff --git a/src/platform/3ds/3ds_gfx.zig b/src/platform/3ds/3ds_gfx.zig new file mode 100644 index 0000000..d06fd52 --- /dev/null +++ b/src/platform/3ds/3ds_gfx.zig @@ -0,0 +1,65 @@ +//! 3DS graphics stub. +//! +//! This backend intentionally satisfies the graphics interface without +//! touching the 3DS GPU. It lets the rest of the platform path compile and +//! run while the real Citro3D/PICA200 renderer is still absent. + +const std = @import("std"); +const Mat4 = @import("../../math/math.zig").Mat4; +const Rendering = @import("../../rendering/rendering.zig"); +const Pipeline = Rendering.Pipeline; +const Mesh = Rendering.mesh; +const Texture = Rendering.Texture; + +var render_alloc: std.mem.Allocator = undefined; +var render_io: std.Io = undefined; + +pub fn setup(alloc: std.mem.Allocator, io: std.Io) void { + render_alloc = alloc; + render_io = io; +} + +pub fn init() anyerror!void {} +pub fn deinit() void {} + +pub fn set_clear_color(_: f32, _: f32, _: f32, _: f32) void {} +pub fn set_alpha_blend(_: bool) void {} +pub fn set_depth_write(_: bool) void {} +pub fn set_fog(_: bool, _: f32, _: f32, _: f32, _: f32, _: f32) void {} +pub fn set_clip_planes(_: bool) void {} +pub fn set_culling(_: bool) void {} +pub fn set_uv_offset(_: f32, _: f32) void {} +pub fn set_proj_matrix(_: *const Mat4) void {} +pub fn set_view_matrix(_: *const Mat4) void {} + +pub fn start_frame() bool { + return false; +} + +pub fn end_frame() void {} +pub fn clear_depth() void {} +pub fn set_vsync(_: bool) void {} + +pub fn create_pipeline(_: Pipeline.VertexLayout, _: ?[:0]align(4) const u8, _: ?[:0]align(4) const u8) anyerror!Pipeline.Handle { + return 0; +} + +pub fn destroy_pipeline(_: Pipeline.Handle) void {} +pub fn bind_pipeline(_: Pipeline.Handle) void {} + +pub fn create_mesh(_: Pipeline.Handle) anyerror!Mesh.Handle { + return 0; +} + +pub fn destroy_mesh(_: Mesh.Handle) void {} +pub fn update_mesh(_: Mesh.Handle, _: []const u8) void {} +pub fn draw_mesh(_: Mesh.Handle, _: *const Mat4, _: usize, _: Mesh.Primitive) void {} + +pub fn create_texture(_: u32, _: u32, _: []align(16) u8) anyerror!Texture.Handle { + return 0; +} + +pub fn update_texture(_: Texture.Handle, _: []align(16) u8) void {} +pub fn bind_texture(_: Texture.Handle) void {} +pub fn destroy_texture(_: Texture.Handle) void {} +pub fn force_texture_resident(_: Texture.Handle) void {} From 39881f4a8915adb52427212a54d0b91276618184 Mon Sep 17 00:00:00 2001 From: Nathan Bourgeois Date: Sat, 6 Jun 2026 11:15:54 -0400 Subject: [PATCH 27/44] 3DS GFX Rewrite --- build.zig | 50 +- src/engine.zig | 42 +- src/platform/3ds/3ds_gfx.zig | 1007 ++++++++++++++++++++++- src/platform/gfx_api.zig | 2 +- src/platform/glfw/opengl/opengl_gfx.zig | 9 +- src/platform/glfw/vulkan/vulkan_gfx.zig | 7 +- src/platform/headless/headless_gfx.zig | 2 +- src/platform/psp/psp_gfx_ge.zig | 7 +- src/platform/switch/switch_gfx.zig | 7 +- src/rendering/mesh.zig | 16 +- src/rendering/texture.zig | 6 +- test/main.zig | 1 + test/test.png | Bin 83 -> 342 bytes 13 files changed, 1054 insertions(+), 102 deletions(-) diff --git a/build.zig b/build.zig index e31b280..166a558 100644 --- a/build.zig +++ b/build.zig @@ -175,6 +175,14 @@ fn devkitProPath(b: *std.Build) []const u8 { return p; } +fn add3dsCImportPaths(mod: *std.Build.Module, dkp: []const u8) void { + const b = mod.owner; + // Keep newlib before libctru so libctru's include_next sys wrappers + // resolve during Zig's C translation of Citro3D/libctru headers. + mod.addIncludePath(.{ .cwd_relative = b.pathJoin(&.{ dkp, "devkitARM/arm-none-eabi/include" }) }); + mod.addIncludePath(.{ .cwd_relative = b.pathJoin(&.{ dkp, "libctru/include" }) }); +} + /// Creates a `3dslink` command for pushing an installed `.3dsx` to a /// networked 3DS. Reuses Aether's devkitPro option/cache so downstream /// builds do not need to redeclare `-Ddevkitpro-path`. @@ -324,6 +332,10 @@ pub fn addGame(owner: *std.Build, b: *std.Build, opts: GameOptions) *std.Build.S } } + if (config.platform == .nintendo_3ds) { + add3dsCImportPaths(mod, devkitProPath(b)); + } + // --- user executable --- const user_mod = b.createModule(.{ .root_source_file = opts.root_source_file, @@ -431,6 +443,10 @@ pub fn addHeadless(owner: *std.Build, b: *std.Build, opts: HeadlessOptions) *std mod.addImport("pspsdk", pd.module("pspsdk")); } + if (config.platform == .nintendo_3ds) { + add3dsCImportPaths(mod, devkitProPath(b)); + } + const user_mod = b.createModule(.{ .root_source_file = opts.root_source_file, .target = target, @@ -1011,7 +1027,7 @@ fn threedsxPipeline(b: *std.Build, exe: *std.Build.Step.Compile, opts: ExportOpt // Standard 3DS arch flags from devkitPro's template Makefile. const arch = [_][]const u8{ - "-march=armv6k", "-mtune=mpcore", "-mfloat-abi=hard", "-mtp=soft", + "-march=armv6k", "-mtune=mpcore", "-mfloat-abi=hard", "-mtp=soft", // Keep frame pointers so manual r11-based stack walk in panic handler // can produce useful unwind (otherwise gcc -O* uses r11 as temp and // chains are absent or clobbered, leading to data aborts in the walker). @@ -1267,23 +1283,20 @@ pub fn addShader(owner: *std.Build, b: *std.Build, exe: *std.Build.Step.Compile, const picasso = b.pathJoin(&.{ devkitProPath(b), "tools/bin/picasso" }); const sources = b.addWriteFiles(); const vert_src = sources.add(name ++ "_3ds.v.pica", - \\.fvec projection[4], modelView[4], screenProjection[4] + \\.fvec projection[4], modelView[4] \\.fvec posScale, uvScaleOffset, colorScale \\ - \\.constf myconst(0.0, 1.0, -1.0, 0.0) - \\.constf viewport(200.0, 120.0, 0.0, 0.0) + \\.constf myconst(0.0, 1.0, 0.0, 0.0) \\.alias zeros myconst.xxxx \\.alias ones myconst.yyyy - \\.alias negOnes myconst.zzzz - \\.alias halfViewport viewport.xyyy \\ \\.out outpos position \\.out outtc0 texcoord0 \\.out outclr color \\ \\.alias inpos v0 - \\.alias inclr v1 - \\.alias inuv v2 + \\.alias inuv v1 + \\.alias inclr v2 \\ \\.proc main \\ mul r0.xyz, posScale, inpos @@ -1294,23 +1307,10 @@ pub fn addShader(owner: *std.Build, b: *std.Build, exe: *std.Build.Step.Compile, \\ dp4 r1.z, modelView[2], r0 \\ dp4 r1.w, modelView[3], r0 \\ - \\ dp4 r2.x, projection[0], r1 - \\ dp4 r2.y, projection[1], r1 - \\ dp4 r2.z, projection[2], r1 - \\ dp4 r2.w, projection[3], r1 - \\ - \\ add r3.x, r2.x, r2.w - \\ mul r3.x, halfViewport.x, r3.x - \\ add r3.y, r2.y, r2.w - \\ mul r3.y, halfViewport.y, r3.y - \\ mul r3.z, negOnes.z, r2.z - \\ add r3.z, r2.w, r3.z - \\ mov r3.w, r2.w - \\ - \\ dp4 outpos.x, screenProjection[0], r3 - \\ dp4 outpos.y, screenProjection[1], r3 - \\ dp4 outpos.z, screenProjection[2], r3 - \\ dp4 outpos.w, screenProjection[3], r3 + \\ dp4 outpos.x, projection[0], r1 + \\ dp4 outpos.y, projection[1], r1 + \\ dp4 outpos.z, projection[2], r1 + \\ dp4 outpos.w, projection[3], r1 \\ \\ mul outtc0.xy, uvScaleOffset.xy, inuv.xy \\ add outtc0.xy, uvScaleOffset.zw, outtc0.xy diff --git a/src/engine.zig b/src/engine.zig index 9de923f..baf9e42 100644 --- a/src/engine.zig +++ b/src/engine.zig @@ -13,9 +13,36 @@ const options = @import("options"); pub const Pool = memory.Pool; pub const MemoryConfig = memory.MemoryConfig; -extern fn linearAlloc(size: usize) ?*anyopaque; -extern fn linearFree(mem: ?*anyopaque) void; -extern fn linearSpaceFree() u32; +const LinearRenderMemory = if (options.config.platform == .nintendo_3ds) struct { + extern fn linearAlloc(size: usize) ?*anyopaque; + extern fn linearFree(mem: ?*anyopaque) void; + extern fn linearSpaceFree() u32; + + fn alloc(size: usize) ?*anyopaque { + return linearAlloc(size); + } + + fn free(mem: ?*anyopaque) void { + linearFree(mem); + } + + fn spaceFree() u32 { + return linearSpaceFree(); + } +} else struct { + fn alloc(size: usize) ?*anyopaque { + _ = size; + return null; + } + + fn free(mem: ?*anyopaque) void { + _ = mem; + } + + fn spaceFree() u32 { + return 0; + } +}; const LINEAR_RENDER_RESERVE_BYTES: usize = 2 * 1024 * 1024; @@ -86,7 +113,7 @@ pub const CategoryTracker = struct { const TRACKER_COUNT = @typeInfo(Pool).@"enum".fields.len; fn alloc_linear_bytes(len: usize) ![]align(16) u8 { - const mem = linearAlloc(len) orelse return error.OutOfMemory; + const mem = LinearRenderMemory.alloc(len) orelse return error.OutOfMemory; const aligned: *align(16) anyopaque = @alignCast(mem); const ptr: [*]align(16) u8 = @ptrCast(aligned); return ptr[0..len]; @@ -190,13 +217,14 @@ pub const Engine = struct { } fn use_external_render_pool(config: Config) bool { - return options.config.platform == .nintendo_3ds and config.render_capacity != null; + if (options.config.platform != .nintendo_3ds) return false; + return config.render_capacity != null; } fn init_linear_render_pool(self: *Engine, config: Config) !void { assert(config.render_capacity.? >= config.memory.render); - const available: usize = linearSpaceFree(); + const available: usize = LinearRenderMemory.spaceFree(); if (available <= LINEAR_RENDER_RESERVE_BYTES) return error.OutOfMemory; const capacity = @min(config.render_capacity.?, available - LINEAR_RENDER_RESERVE_BYTES); @@ -213,7 +241,7 @@ pub const Engine = struct { fn deinit_linear_render_pool(self: *Engine) void { if (self.linear_render_mem) |mem| { - linearFree(mem.ptr); + LinearRenderMemory.free(mem.ptr); self.linear_render_mem = null; } } diff --git a/src/platform/3ds/3ds_gfx.zig b/src/platform/3ds/3ds_gfx.zig index d06fd52..e61c6d6 100644 --- a/src/platform/3ds/3ds_gfx.zig +++ b/src/platform/3ds/3ds_gfx.zig @@ -1,16 +1,40 @@ -//! 3DS graphics stub. -//! -//! This backend intentionally satisfies the graphics interface without -//! touching the 3DS GPU. It lets the rest of the platform path compile and -//! run while the real Citro3D/PICA200 renderer is still absent. +//! Citro3D graphics backend for Nintendo 3DS. const std = @import("std"); +const Util = @import("../../util/util.zig"); const Mat4 = @import("../../math/math.zig").Mat4; const Rendering = @import("../../rendering/rendering.zig"); const Pipeline = Rendering.Pipeline; const Mesh = Rendering.mesh; const Texture = Rendering.Texture; +const c = @cImport({ + @cDefine("wint_t", "unsigned int"); + @cInclude("3ds/types.h"); + @cInclude("3ds/gpu/enums.h"); + @cInclude("3ds/gpu/gpu.h"); + @cInclude("3ds/gpu/gx.h"); + @cInclude("3ds/services/gspgpu.h"); + @cInclude("3ds/gfx.h"); + @cInclude("3ds/allocator/vram.h"); + @cInclude("3ds/gpu/shbin.h"); + @cInclude("3ds/gpu/shaderProgram.h"); + @cUndef("__3DS__"); + @cUndef("_3DS"); + @cInclude("c3d/types.h"); + @cInclude("c3d/maths.h"); + @cInclude("c3d/uniforms.h"); + @cInclude("c3d/attribs.h"); + @cInclude("c3d/buffers.h"); + @cInclude("c3d/base.h"); + @cInclude("c3d/texenv.h"); + @cInclude("c3d/effect.h"); + @cInclude("c3d/texture.h"); + @cInclude("c3d/fog.h"); + @cInclude("c3d/framebuffer.h"); + @cInclude("c3d/renderqueue.h"); +}); + var render_alloc: std.mem.Allocator = undefined; var render_io: std.Io = undefined; @@ -19,47 +43,964 @@ pub fn setup(alloc: std.mem.Allocator, io: std.Io) void { render_io = io; } -pub fn init() anyerror!void {} -pub fn deinit() void {} +const SCREEN_WIDTH: u32 = 400; +const SCREEN_HEIGHT: u32 = 240; +const TARGET_WIDTH: c_int = 240; +const TARGET_HEIGHT: c_int = 400; +const MAX_VERTEX_ATTRS: usize = 12; +const MAX_TEXTURE_SIZE: u32 = 1024; +const MIN_TEXTURE_SIZE: u32 = 8; +const TEX_BPP: usize = 4; +const OS_FCRAM_VADDR: usize = 0x30000000; +const OS_FCRAM_SIZE: usize = 0x10000000; +const OS_OLD_FCRAM_VADDR: usize = 0x14000000; +const OS_OLD_FCRAM_SIZE: usize = 0x08000000; + +const DISPLAY_TRANSFER_FLAGS: u32 = @intCast( + c.GX_TRANSFER_FLIP_VERT(0) | + c.GX_TRANSFER_OUT_TILED(0) | + c.GX_TRANSFER_RAW_COPY(0) | + c.GX_TRANSFER_IN_FORMAT(c.GX_TRANSFER_FMT_RGBA8) | + c.GX_TRANSFER_OUT_FORMAT(c.GX_TRANSFER_FMT_RGB8) | + c.GX_TRANSFER_SCALING(c.GX_TRANSFER_SCALE_NO), +); + +const ShaderType = c.GPU_SHADER_TYPE; +const VERTEX_SHADER: ShaderType = @intCast(c.GPU_VERTEX_SHADER); +const VERTEX_SHADER_INDEX: usize = 0; + +const BufferBinding = struct { + offset: usize, + attrib_count: c_int, + permutation: u64, +}; + +const PipelineData = struct { + dvlb: [*c]c.DVLB_s, + program: c.shaderProgram_s, + attr_info: c.C3D_AttrInfo, + stride: usize, + buffer: BufferBinding, + pos_scale: [4]f32, + uv_scale: [2]f32, + color_scale: [4]f32, + u_projection: c_int, + u_model_view: c_int, + u_pos_scale: c_int, + u_uv_scale_offset: c_int, + u_color_scale: c_int, +}; + +const MeshData = struct { + pipeline: Pipeline.Handle, + data: ?[*]const u8, + len: usize, +}; + +const TexMirror = extern struct { + data: ?*anyopaque, + fmt_size: u32, + dim: u32, + param: u32, + border: u32, + lod_param: u32, +}; + +comptime { + std.debug.assert(@sizeOf(TexMirror) == 24); +} + +const TextureData = struct { + width: u32, + height: u32, + tex: TexMirror, +}; + +const TexEnvMirror = extern struct { + src_rgb: u16, + src_alpha: u16, + op_all: u32, + func_rgb: u16, + func_alpha: u16, + color: u32, + scale_rgb: u16, + scale_alpha: u16, +}; + +comptime { + std.debug.assert(@sizeOf(TexEnvMirror) == 20); +} + +const FrameBufMirror = extern struct { + color_buf: ?*anyopaque, + depth_buf: ?*anyopaque, + width: u16, + height: u16, + color_fmt: c.GPU_COLORBUF, + depth_fmt: c.GPU_DEPTHBUF, + block32: bool, + masks: u8, +}; + +const RenderTargetMirror = extern struct { + next: ?*c.C3D_RenderTarget, + prev: ?*c.C3D_RenderTarget, + frame_buf: FrameBufMirror, +}; + +var pipelines = Util.CircularBuffer(PipelineData, 16).init(); +var meshes = Util.CircularBuffer(MeshData, 2048).init(); +var textures = Util.CircularBuffer(TextureData, 64).init(); + +var target: ?*c.C3D_RenderTarget = null; +var projection_transform: c.C3D_Mtx = undefined; +var fog_lut: c.C3D_FogLut = undefined; +var initialized = false; +var frame_started = false; +var vsync_enabled = true; +var clear_color: u32 = 0x000000ff; +var alpha_blend_enabled = true; +var depth_write_enabled = true; +var cull_face_enabled = true; +var fog_enabled = false; +var uv_offset: [2]f32 = .{ 0.0, 0.0 }; +var proj_matrix: Mat4 = Mat4.identity(); +var view_matrix: Mat4 = Mat4.identity(); +var bound_pipeline: Pipeline.Handle = 0; +var bound_texture: Texture.Handle = 0; + +pub fn init() anyerror!void { + _ = render_alloc; + _ = render_io; + + c.gfxInitDefault(); + if (!c.C3D_Init(c.C3D_DEFAULT_CMDBUF_SIZE)) { + c.gfxExit(); + return error.GfxInitFailed; + } + errdefer { + c.C3D_Fini(); + c.gfxExit(); + } + + target = c.C3D_RenderTargetCreate( + TARGET_WIDTH, + TARGET_HEIGHT, + c.GPU_RB_RGBA8, + c.C3D_DEPTHTYPE{ .__e = c.GPU_RB_DEPTH24_STENCIL8 }, + ) orelse return error.GfxInitFailed; + errdefer { + c.C3D_RenderTargetDelete(target); + target = null; + } + + c.C3D_RenderTargetSetOutput(target, c.GFX_TOP, c.GFX_LEFT, DISPLAY_TRANSFER_FLAGS); + init_projection_transform(); + + initialized = true; + frame_started = false; + apply_render_state(); + init_texenvs(); +} + +pub fn deinit() void { + frame_started = false; + + for (1..textures.buffer.len) |i| { + if (textures.buffer[i]) |*tex| { + c.C3D_TexDelete(tex_ptr(tex)); + } + } + textures.clear(); + + for (1..pipelines.buffer.len) |i| { + if (pipelines.buffer[i]) |*pl| { + _ = c.shaderProgramFree(&pl.program); + c.DVLB_Free(pl.dvlb); + } + } + pipelines.clear(); + meshes.clear(); + + if (target) |t| { + c.C3D_RenderTargetDelete(t); + target = null; + } + + if (initialized) { + c.C3D_Fini(); + c.gfxExit(); + initialized = false; + } +} + +pub fn set_clear_color(r: f32, g: f32, b: f32, a: f32) void { + clear_color = pack_color_rgba(r, g, b, a); +} + +pub fn set_alpha_blend(enabled: bool) void { + alpha_blend_enabled = enabled; + if (!initialized) return; + + if (enabled) { + c.C3D_AlphaBlend( + c.GPU_BLEND_ADD, + c.GPU_BLEND_ADD, + c.GPU_SRC_ALPHA, + c.GPU_ONE_MINUS_SRC_ALPHA, + c.GPU_ONE, + c.GPU_ONE_MINUS_SRC_ALPHA, + ); + } else { + c.C3D_AlphaBlend( + c.GPU_BLEND_ADD, + c.GPU_BLEND_ADD, + c.GPU_ONE, + c.GPU_ZERO, + c.GPU_ONE, + c.GPU_ZERO, + ); + } +} + +pub fn set_depth_write(enabled: bool) void { + depth_write_enabled = enabled; + if (!initialized) return; + c.C3D_DepthTest(true, c.GPU_GEQUAL, if (enabled) c.GPU_WRITE_ALL else c.GPU_WRITE_COLOR); +} + +pub fn set_fog(enabled: bool, start: f32, end: f32, r: f32, g: f32, b: f32) void { + fog_enabled = enabled; + if (!initialized) return; + + if (!enabled) { + c.C3D_FogGasMode(c.GPU_NO_FOG, c.GPU_PLAIN_DENSITY, false); + return; + } + + const safe_end = if (end <= start) start + 0.001 else end; + const density = 1.0 / @max(0.001, safe_end - start); + c.FogLut_Exp(&fog_lut, density, 1.0, start, safe_end); + c.C3D_FogGasMode(c.GPU_FOG, c.GPU_PLAIN_DENSITY, false); + c.C3D_FogColor(pack_color_rgba(r, g, b, 1.0)); + c.C3D_FogLutBind(&fog_lut); +} -pub fn set_clear_color(_: f32, _: f32, _: f32, _: f32) void {} -pub fn set_alpha_blend(_: bool) void {} -pub fn set_depth_write(_: bool) void {} -pub fn set_fog(_: bool, _: f32, _: f32, _: f32, _: f32, _: f32) void {} pub fn set_clip_planes(_: bool) void {} -pub fn set_culling(_: bool) void {} -pub fn set_uv_offset(_: f32, _: f32) void {} -pub fn set_proj_matrix(_: *const Mat4) void {} -pub fn set_view_matrix(_: *const Mat4) void {} + +pub fn set_culling(enabled: bool) void { + cull_face_enabled = enabled; + if (!initialized) return; + c.C3D_CullFace(if (enabled) c.GPU_CULL_BACK_CCW else c.GPU_CULL_NONE); +} + +pub fn set_uv_offset(u: f32, v: f32) void { + uv_offset = .{ u, v }; +} + +pub fn set_proj_matrix(mat: *const Mat4) void { + proj_matrix = mat.*; +} + +pub fn set_view_matrix(mat: *const Mat4) void { + view_matrix = mat.*; +} pub fn start_frame() bool { - return false; + const t = target orelse return false; + const flags: u8 = @intCast(if (vsync_enabled) c.C3D_FRAME_SYNCDRAW else c.C3D_FRAME_NONBLOCK); + + if (!c.C3D_FrameBegin(flags)) return false; + c.C3D_FrameBufClear(target_frame_buf(t), c.C3D_CLEAR_ALL, clear_color, 0); + + if (!c.C3D_FrameDrawOn(t)) { + c.C3D_FrameEnd(0); + return false; + } + + frame_started = true; + c.C3D_SetViewport(0, 0, TARGET_WIDTH, TARGET_HEIGHT); + apply_render_state(); + init_texenvs(); + rebind_texture(); + return true; +} + +pub fn end_frame() void { + if (!frame_started) return; + c.C3D_FrameEnd(0); + frame_started = false; +} + +pub fn clear_depth() void { + if (!frame_started) return; + const t = target orelse return; + c.C3D_FrameBufClear(target_frame_buf(t), c.C3D_CLEAR_DEPTH, clear_color, 0); +} + +pub fn set_vsync(v: bool) void { + vsync_enabled = v; +} + +pub fn create_pipeline(layout: Pipeline.VertexLayout, v_shader: ?[:0]align(4) const u8, f_shader: ?[:0]align(4) const u8) anyerror!Pipeline.Handle { + _ = f_shader; + + const code = v_shader orelse return error.InvalidShader; + if (layout.stride == 0 or layout.attributes.len > MAX_VERTEX_ATTRS) return error.UnsupportedVertexLayout; + + const dvlb = c.DVLB_ParseFile(@ptrCast(@constCast(code.ptr)), @intCast(code.len)); + if (dvlb == null or dvlb[0].numDVLE == 0) return error.InvalidShader; + errdefer c.DVLB_Free(dvlb); + + var program: c.shaderProgram_s = undefined; + if (c.shaderProgramInit(&program) != 0) return error.InvalidShader; + errdefer _ = c.shaderProgramFree(&program); + + if (c.shaderProgramSetVsh(&program, &dvlb[0].DVLE[0]) != 0) return error.InvalidShader; + + for (layout.attributes) |attr| { + if (attr.binding != 0) return error.UnsupportedVertexLayout; + } + + const position_attr = find_attr(layout, .position) orelse return error.UnsupportedVertexLayout; + const uv_attr = find_attr(layout, .uv) orelse return error.UnsupportedVertexLayout; + const color_attr = find_attr(layout, .color) orelse return error.UnsupportedVertexLayout; + const pos_scale = position_scale(position_attr) orelse return error.UnsupportedVertexLayout; + const uv_attr_scale = uv_scale(uv_attr) orelse return error.UnsupportedVertexLayout; + const color_attr_scale = color_scale(color_attr) orelse return error.UnsupportedVertexLayout; + const buffer_layout = buffer_layout_from_attrs(layout.stride, position_attr, uv_attr, color_attr) orelse return error.UnsupportedVertexLayout; + + var attr_info: c.C3D_AttrInfo = undefined; + c.AttrInfo_Init(&attr_info); + if (add_attr_loader(&attr_info, 0, position_attr, buffer_layout.position_loader_size) < 0) return error.UnsupportedVertexLayout; + if (add_attr_loader(&attr_info, 1, uv_attr, buffer_layout.uv_loader_size) < 0) return error.UnsupportedVertexLayout; + if (add_attr_loader(&attr_info, 2, color_attr, buffer_layout.color_loader_size) < 0) return error.UnsupportedVertexLayout; + + const handle = pipelines.add_element(.{ + .dvlb = dvlb, + .program = program, + .attr_info = attr_info, + .stride = layout.stride, + .buffer = .{ + .offset = buffer_layout.base_offset, + .attrib_count = buffer_layout.attribute_count, + .permutation = buffer_layout.permutation, + }, + .pos_scale = pos_scale, + .uv_scale = uv_attr_scale, + .color_scale = color_attr_scale, + .u_projection = c.shaderInstanceGetUniformLocation(program.vertexShader, "projection"), + .u_model_view = c.shaderInstanceGetUniformLocation(program.vertexShader, "modelView"), + .u_pos_scale = c.shaderInstanceGetUniformLocation(program.vertexShader, "posScale"), + .u_uv_scale_offset = c.shaderInstanceGetUniformLocation(program.vertexShader, "uvScaleOffset"), + .u_color_scale = c.shaderInstanceGetUniformLocation(program.vertexShader, "colorScale"), + }) orelse return error.OutOfPipelines; + + return @intCast(handle); +} + +pub fn destroy_pipeline(handle: Pipeline.Handle) void { + if (pipeline_slot(handle)) |pl| { + _ = c.shaderProgramFree(&pl.program); + c.DVLB_Free(pl.dvlb); + } + if (bound_pipeline == handle) bound_pipeline = 0; + _ = pipelines.remove_element(handle); +} + +pub fn bind_pipeline(handle: Pipeline.Handle) void { + bound_pipeline = handle; + if (pipeline_slot(handle)) |pl| bind_pipeline_data(pl); +} + +pub fn create_mesh(pipeline: Pipeline.Handle) anyerror!Mesh.Handle { + if (pipeline_slot(pipeline) == null) return error.InvalidPipeline; + const handle = meshes.add_element(.{ + .pipeline = pipeline, + .data = null, + .len = 0, + }) orelse return error.OutOfMeshes; + + return @intCast(handle); } -pub fn end_frame() void {} -pub fn clear_depth() void {} -pub fn set_vsync(_: bool) void {} +pub fn destroy_mesh(handle: Mesh.Handle) void { + _ = meshes.remove_element(handle); +} + +pub fn update_mesh(handle: Mesh.Handle, data: []const u8) void { + const mesh = mesh_slot(handle) orelse return; + if (data.len == 0) { + mesh.data = null; + mesh.len = 0; + return; + } -pub fn create_pipeline(_: Pipeline.VertexLayout, _: ?[:0]align(4) const u8, _: ?[:0]align(4) const u8) anyerror!Pipeline.Handle { - return 0; + if (!is_linear_fcram(data.ptr, data.len)) { + std.debug.panic("3ds_gfx: mesh vertex data must be allocated in linear FCRAM", .{}); + } + if (data.len > std.math.maxInt(u32)) { + std.debug.panic("3ds_gfx: mesh vertex data is too large to flush", .{}); + } + + _ = c.GSPGPU_FlushDataCache(data.ptr, @intCast(data.len)); + mesh.data = data.ptr; + mesh.len = data.len; } -pub fn destroy_pipeline(_: Pipeline.Handle) void {} -pub fn bind_pipeline(_: Pipeline.Handle) void {} +pub fn draw_mesh(handle: Mesh.Handle, model: *const Mat4, count: usize) void { + const mesh = mesh_slot(handle) orelse return; + const pl = pipeline_slot(mesh.pipeline) orelse return; + const data = mesh.data orelse return; + if (count == 0 or mesh.len == 0) return; + + bind_pipeline_data(pl); + upload_draw_uniforms(pl, model); + rebind_texture(); -pub fn create_mesh(_: Pipeline.Handle) anyerror!Mesh.Handle { - return 0; + const buf = c.C3D_GetBufInfo() orelse return; + c.BufInfo_Init(buf); + const ptr = data + pl.buffer.offset; + const added = c.BufInfo_Add(buf, ptr, @intCast(pl.stride), pl.buffer.attrib_count, pl.buffer.permutation); + if (added < 0) { + c.BufInfo_Init(buf); + return; + } + c.C3D_DrawArrays(c.GPU_TRIANGLES, 0, @intCast(count)); } -pub fn destroy_mesh(_: Mesh.Handle) void {} -pub fn update_mesh(_: Mesh.Handle, _: []const u8) void {} -pub fn draw_mesh(_: Mesh.Handle, _: *const Mat4, _: usize, _: Mesh.Primitive) void {} +pub fn create_texture(width: u32, height: u32, data: []align(16) u8) anyerror!Texture.Handle { + try validate_texture(width, height, data); + + const size = texture_size(width, height); + if (c.vramSpaceFree() < size) return error.OutOfTextureMemory; + + const mem = c.vramAlloc(size) orelse return error.OutOfTextureMemory; + errdefer c.vramFree(mem); + + var tex = TextureData{ + .width = width, + .height = height, + .tex = init_tex_mirror(width, height, mem, size), + }; + + try upload_texture_data(&tex, data[0..size]); + + const handle = textures.add_element(tex) orelse return error.OutOfTextures; + return @intCast(handle); +} + +pub fn update_texture(handle: Texture.Handle, data: []align(16) u8) void { + const tex = texture_slot(handle) orelse return; + const size = texture_size(tex.width, tex.height); + if (data.len < size) return; + if (!is_linear_fcram(data.ptr, size)) { + std.debug.panic("3ds_gfx: texture upload data must be allocated in linear FCRAM", .{}); + } + + upload_texture_data(tex, data[0..size]) catch return; +} -pub fn create_texture(_: u32, _: u32, _: []align(16) u8) anyerror!Texture.Handle { - return 0; +pub fn bind_texture(handle: Texture.Handle) void { + bound_texture = handle; + rebind_texture(); +} + +pub fn destroy_texture(handle: Texture.Handle) void { + if (texture_slot(handle)) |tex| { + c.C3D_TexDelete(tex_ptr(tex)); + } + if (bound_texture == handle) { + bound_texture = 0; + if (initialized) c.C3D_TexBind(0, null); + } + _ = textures.remove_element(handle); } -pub fn update_texture(_: Texture.Handle, _: []align(16) u8) void {} -pub fn bind_texture(_: Texture.Handle) void {} -pub fn destroy_texture(_: Texture.Handle) void {} pub fn force_texture_resident(_: Texture.Handle) void {} + +fn apply_render_state() void { + set_alpha_blend(alpha_blend_enabled); + set_depth_write(depth_write_enabled); + set_culling(cull_face_enabled); + if (!fog_enabled) { + c.C3D_FogGasMode(c.GPU_NO_FOG, c.GPU_PLAIN_DENSITY, false); + } +} + +fn init_texenvs() void { + texenv_modulate(0); + var i: c_int = 1; + while (i < 6) : (i += 1) texenv_replace_previous(i); +} + +fn texenv_modulate(id: c_int) void { + const env = c.C3D_GetTexEnv(id) orelse return; + const mirror: *TexEnvMirror = @ptrCast(@alignCast(env)); + const sources: u16 = @intCast(c.GPU_TEVSOURCES(c.GPU_TEXTURE0, c.GPU_PRIMARY_COLOR, 0)); + mirror.* = .{ + .src_rgb = sources, + .src_alpha = sources, + .op_all = 0, + .func_rgb = @intCast(c.GPU_MODULATE), + .func_alpha = @intCast(c.GPU_MODULATE), + .color = 0xffffffff, + .scale_rgb = @intCast(c.GPU_TEVSCALE_1), + .scale_alpha = @intCast(c.GPU_TEVSCALE_1), + }; + c.C3D_DirtyTexEnv(env); +} + +fn texenv_replace_previous(id: c_int) void { + const env = c.C3D_GetTexEnv(id) orelse return; + const mirror: *TexEnvMirror = @ptrCast(@alignCast(env)); + const sources: u16 = @intCast(c.GPU_TEVSOURCES(c.GPU_PREVIOUS, 0, 0)); + mirror.* = .{ + .src_rgb = sources, + .src_alpha = sources, + .op_all = 0, + .func_rgb = @intCast(c.GPU_REPLACE), + .func_alpha = @intCast(c.GPU_REPLACE), + .color = 0xffffffff, + .scale_rgb = @intCast(c.GPU_TEVSCALE_1), + .scale_alpha = @intCast(c.GPU_TEVSCALE_1), + }; + c.C3D_DirtyTexEnv(env); +} + +fn bind_pipeline_data(pl: *PipelineData) void { + c.C3D_BindProgram(&pl.program); + c.C3D_SetAttrInfo(&pl.attr_info); +} + +fn init_projection_transform() void { + var screen: c.C3D_Mtx = undefined; + c.Mtx_OrthoTilt(&screen, 0.0, @floatFromInt(SCREEN_WIDTH), 0.0, @floatFromInt(SCREEN_HEIGHT), 0.0, 1.0, true); + var viewport = logical_viewport_transform(); + projection_transform = c3d_mtx_mul(&screen, &viewport); +} + +fn logical_viewport_transform() c.C3D_Mtx { + var out: c.C3D_Mtx = undefined; + set_fvec(&out.r[0], @as(f32, @floatFromInt(SCREEN_WIDTH)) * 0.5, 0.0, 0.0, @as(f32, @floatFromInt(SCREEN_WIDTH)) * 0.5); + set_fvec(&out.r[1], 0.0, @as(f32, @floatFromInt(SCREEN_HEIGHT)) * 0.5, 0.0, @as(f32, @floatFromInt(SCREEN_HEIGHT)) * 0.5); + set_fvec(&out.r[2], 0.0, 0.0, -1.0, 1.0); + set_fvec(&out.r[3], 0.0, 0.0, 0.0, 1.0); + return out; +} + +fn upload_draw_uniforms(pl: *PipelineData, model: *const Mat4) void { + var aether_projection = mat4_to_c3d_transposed(proj_matrix); + var projection = c3d_mtx_mul(&projection_transform, &aether_projection); + const model_view = Mat4.mul(model.*, view_matrix); + var model_view_c3d = mat4_to_c3d_transposed(model_view); + + upload_matrix(pl.u_projection, &projection); + upload_matrix(pl.u_model_view, &model_view_c3d); + upload_vec4(pl.u_pos_scale, pl.pos_scale); + upload_vec4(pl.u_uv_scale_offset, .{ + pl.uv_scale[0], + pl.uv_scale[1], + uv_offset[0], + uv_offset[1], + }); + upload_vec4(pl.u_color_scale, pl.color_scale); +} + +fn upload_matrix(location: c_int, matrix: *const c.C3D_Mtx) void { + const idx = uniform_location(location, 4) orelse return; + inline for (0..4) |i| { + c.C3D_FVUnif[VERTEX_SHADER_INDEX][idx + i] = matrix.r[i]; + c.C3D_FVUnifDirty[VERTEX_SHADER_INDEX][idx + i] = true; + } +} + +fn upload_vec4(location: c_int, values: [4]f32) void { + const idx = uniform_location(location, 1) orelse return; + set_fvec(&c.C3D_FVUnif[VERTEX_SHADER_INDEX][idx], values[0], values[1], values[2], values[3]); + c.C3D_FVUnifDirty[VERTEX_SHADER_INDEX][idx] = true; +} + +fn uniform_location(location: c_int, count: usize) ?usize { + if (location < 0) return null; + const idx: usize = @intCast(location); + if (idx + count > c.C3D_FVUNIF_COUNT) return null; + return idx; +} + +fn mat4_to_c3d_transposed(mat: Mat4) c.C3D_Mtx { + var out: c.C3D_Mtx = undefined; + inline for (0..4) |row| { + set_fvec(&out.r[row], mat.data[0][row], mat.data[1][row], mat.data[2][row], mat.data[3][row]); + } + return out; +} + +fn c3d_mtx_mul(a: *const c.C3D_Mtx, b: *const c.C3D_Mtx) c.C3D_Mtx { + var out: c.C3D_Mtx = undefined; + inline for (0..4) |row| { + var values: [4]f32 = undefined; + inline for (0..4) |col| { + var sum: f32 = 0.0; + inline for (0..4) |k| { + sum += fvec_component(&a.r[row], k) * fvec_component(&b.r[k], col); + } + values[col] = sum; + } + set_fvec(&out.r[row], values[0], values[1], values[2], values[3]); + } + return out; +} + +fn fvec_component(v: *const c.C3D_FVec, index: usize) f32 { + return switch (index) { + 0 => v.unnamed_0.x, + 1 => v.unnamed_0.y, + 2 => v.unnamed_0.z, + 3 => v.unnamed_0.w, + else => unreachable, + }; +} + +fn set_fvec(v: *c.C3D_FVec, x: f32, y: f32, z: f32, w: f32) void { + v.unnamed_0.x = x; + v.unnamed_0.y = y; + v.unnamed_0.z = z; + v.unnamed_0.w = w; +} + +fn rebind_texture() void { + if (!initialized or bound_texture == 0) return; + const tex = texture_slot(bound_texture) orelse return; + c.C3D_TexBind(0, tex_ptr(tex)); +} + +fn validate_texture(width: u32, height: u32, data: []align(16) u8) !void { + if (width < MIN_TEXTURE_SIZE or height < MIN_TEXTURE_SIZE) { + Util.engine_logger.err("3ds_gfx: texture {d}x{d} is too small; Citro3D requires at least {d}x{d}", .{ + width, + height, + MIN_TEXTURE_SIZE, + MIN_TEXTURE_SIZE, + }); + return error.TextureTooSmall; + } + if (width > MAX_TEXTURE_SIZE or height > MAX_TEXTURE_SIZE) { + Util.engine_logger.err("3ds_gfx: texture {d}x{d} is too large; Citro3D limit is {d}x{d}", .{ + width, + height, + MAX_TEXTURE_SIZE, + MAX_TEXTURE_SIZE, + }); + return error.TextureTooLarge; + } + if (!std.math.isPowerOfTwo(width) or !std.math.isPowerOfTwo(height)) { + Util.engine_logger.err("3ds_gfx: texture {d}x{d} is unsupported; Citro3D requires power-of-two dimensions", .{ width, height }); + return error.UnsupportedTextureSize; + } + + const size = texture_size(width, height); + if (data.len < size) return error.InsufficientData; + if (!is_linear_fcram(data.ptr, size)) { + Util.engine_logger.err("3ds_gfx: texture upload data must be allocated in linear FCRAM", .{}); + return error.TextureDataNotLinear; + } + if (size > std.math.maxInt(u32)) return error.TextureTooLarge; +} + +fn init_tex_mirror(width: u32, height: u32, data: ?*anyopaque, size: u32) TexMirror { + return .{ + .data = data, + .fmt_size = (size << 4) | @as(u32, @intCast(c.GPU_RGBA8)), + .dim = (width << 16) | height, + .param = texture_param(), + .border = 0, + .lod_param = 0, + }; +} + +fn texture_param() u32 { + return @intCast( + c.GPU_TEXTURE_MODE(c.GPU_TEX_2D) | + c.GPU_TEXTURE_MAG_FILTER(c.GPU_NEAREST) | + c.GPU_TEXTURE_MIN_FILTER(c.GPU_NEAREST) | + c.GPU_TEXTURE_WRAP_S(c.GPU_REPEAT) | + c.GPU_TEXTURE_WRAP_T(c.GPU_REPEAT), + ); +} + +fn upload_texture_data(tex: *TextureData, data: []align(16) const u8) !void { + const size = texture_size(tex.width, tex.height); + const upload = try render_alloc.alignedAlloc(u8, .fromByteUnits(16), size); + defer render_alloc.free(upload); + + convert_texture_data(upload, data, tex.width, tex.height); + flush_texture_source(upload); + c.C3D_TexLoadImage(tex_ptr(tex), upload.ptr, c.GPU_TEXFACE_2D, 0); + c.C3D_TexFlush(tex_ptr(tex)); +} + +fn convert_texture_data(dst: []align(16) u8, src: []align(16) const u8, width: u32, height: u32) void { + for (0..height) |y| { + const sy = height - 1 - @as(u32, @intCast(y)); + for (0..width) |x| { + const xu: u32 = @intCast(x); + const src_off = (@as(usize, sy) * width + xu) * TEX_BPP; + const dst_off = tiled_pixel_offset(xu, @intCast(y), width) * TEX_BPP; + dst[dst_off + 0] = src[src_off + 3]; + dst[dst_off + 1] = src[src_off + 2]; + dst[dst_off + 2] = src[src_off + 1]; + dst[dst_off + 3] = src[src_off + 0]; + } + } +} + +fn tiled_pixel_offset(x: u32, y: u32, width: u32) usize { + const tile_x = x & ~@as(u32, 7); + const tile_y = y & ~@as(u32, 7); + const tile_base = tile_y * width + tile_x * 8; + return @intCast(tile_base + morton8(x & 7, y & 7)); +} + +fn morton8(x: u32, y: u32) u32 { + return (x & 1) | + ((y & 1) << 1) | + ((x & 2) << 1) | + ((y & 2) << 2) | + ((x & 4) << 2) | + ((y & 4) << 3); +} + +fn flush_texture_source(data: []align(16) u8) void { + _ = c.GSPGPU_FlushDataCache(data.ptr, @intCast(data.len)); +} + +fn texture_size(width: u32, height: u32) u32 { + return @intCast(@as(usize, width) * height * TEX_BPP); +} + +fn tex_ptr(tex: *TextureData) *c.C3D_Tex { + return @ptrCast(&tex.tex); +} + +fn target_frame_buf(t: *c.C3D_RenderTarget) *c.C3D_FrameBuf { + const mirror: *RenderTargetMirror = @ptrCast(@alignCast(t)); + return @ptrCast(&mirror.frame_buf); +} + +fn pipeline_slot(handle: Pipeline.Handle) ?*PipelineData { + const idx: usize = handle; + if (idx == 0 or idx >= pipelines.buffer.len) return null; + if (pipelines.buffer[idx]) |*pl| return pl; + return null; +} + +fn mesh_slot(handle: Mesh.Handle) ?*MeshData { + const idx: usize = handle; + if (idx == 0 or idx >= meshes.buffer.len) return null; + if (meshes.buffer[idx]) |*mesh| return mesh; + return null; +} + +fn texture_slot(handle: Texture.Handle) ?*TextureData { + const idx: usize = handle; + if (idx == 0 or idx >= textures.buffer.len) return null; + if (textures.buffer[idx]) |*tex| return tex; + return null; +} + +fn find_attr(layout: Pipeline.VertexLayout, usage: Pipeline.AttributeUsage) ?Pipeline.Attribute { + for (layout.attributes) |attr| { + if (attr.usage == usage) return attr; + } + return null; +} + +const BufferLayout = struct { + base_offset: usize, + attribute_count: c_int, + permutation: u64, + position_loader_size: u8, + uv_loader_size: u8, + color_loader_size: u8, +}; + +fn buffer_layout_from_attrs(stride: usize, position_attr: Pipeline.Attribute, uv_attr: Pipeline.Attribute, color_attr: Pipeline.Attribute) ?BufferLayout { + var attrs = [_]Pipeline.Attribute{ position_attr, uv_attr, color_attr }; + sort_attrs_by_offset(&attrs); + + const base_offset = attrs[0].offset; + var current_rel: usize = 0; + var attribute_count: usize = 0; + var permutation: u64 = 0; + var position_loader_size: u8 = 0; + var uv_loader_size: u8 = 0; + var color_loader_size: u8 = 0; + + for (attrs, 0..) |attr, i| { + if (!attr_fits(stride, attr)) return null; + if (attr.offset < base_offset) return null; + const rel_offset = attr.offset - base_offset; + if (rel_offset != current_rel) return null; + + const next_offset = if (i + 1 < attrs.len) attrs[i + 1].offset else stride; + if (next_offset < attr.offset) return null; + const available_bytes = next_offset - attr.offset; + const loader_size = attribute_loader_size(attr, available_bytes) orelse return null; + const loaded_bytes = attribute_size_bytes_with_count(attr.format, loader_size) orelse return null; + if (loaded_bytes > available_bytes) return null; + if (i + 1 < attrs.len and loaded_bytes != available_bytes) return null; + + permutation |= attribute_loader_id(attr.usage) << @as(u6, @intCast(attribute_count * 4)); + attribute_count += 1; + switch (attr.usage) { + .position => position_loader_size = loader_size, + .uv => uv_loader_size = loader_size, + .color => color_loader_size = loader_size, + .normal => return null, + } + current_rel = rel_offset + loaded_bytes; + } + + if (stride < current_rel) return null; + if (position_loader_size == 0 or uv_loader_size == 0 or color_loader_size == 0) return null; + + return .{ + .base_offset = base_offset, + .attribute_count = @intCast(attribute_count), + .permutation = permutation, + .position_loader_size = position_loader_size, + .uv_loader_size = uv_loader_size, + .color_loader_size = color_loader_size, + }; +} + +fn sort_attrs_by_offset(attrs: *[3]Pipeline.Attribute) void { + var i: usize = 1; + while (i < attrs.len) : (i += 1) { + var j = i; + while (j > 0 and attrs[j - 1].offset > attrs[j].offset) : (j -= 1) { + const tmp = attrs[j - 1]; + attrs[j - 1] = attrs[j]; + attrs[j] = tmp; + } + } +} + +fn attribute_loader_id(usage: Pipeline.AttributeUsage) u64 { + return switch (usage) { + .position => 0, + .uv => 1, + .color => 2, + .normal => unreachable, + }; +} + +fn attr_fits(stride: usize, attr: Pipeline.Attribute) bool { + const size = attribute_size_bytes(attr.format); + return attr.offset <= stride and size <= stride - attr.offset; +} + +fn attribute_loader_size(attr: Pipeline.Attribute, available_bytes: usize) ?u8 { + if (attr.usage == .position and attr.size == 3 and attribute_component_size_bytes(attr.format) == 2 and available_bytes >= 8) { + return 4; + } + if (attr.size == 0 or attr.size > 4) return null; + return @intCast(attr.size); +} + +fn attribute_size_bytes(format: Pipeline.AttributeFormat) usize { + return switch (format) { + .f32x2 => 8, + .f32x3 => 12, + .unorm8x2 => 2, + .unorm8x4 => 4, + .unorm16x2, .snorm16x2 => 4, + .unorm16x3, .snorm16x3 => 6, + }; +} + +fn attribute_size_bytes_with_count(format: Pipeline.AttributeFormat, count: u8) ?usize { + if (count == 0 or count > 4) return null; + return attribute_component_size_bytes(format) * @as(usize, count); +} + +fn attribute_component_size_bytes(format: Pipeline.AttributeFormat) usize { + return switch (format) { + .f32x2, .f32x3 => 4, + .unorm8x2, .unorm8x4 => 1, + .unorm16x2, .unorm16x3, .snorm16x2, .snorm16x3 => 2, + }; +} + +fn gpu_format(format: Pipeline.AttributeFormat) c.GPU_FORMATS { + return switch (format) { + .f32x2, .f32x3 => c.GPU_FLOAT, + .unorm8x2, .unorm8x4 => c.GPU_UNSIGNED_BYTE, + .unorm16x2, .unorm16x3, .snorm16x2, .snorm16x3 => c.GPU_SHORT, + }; +} + +fn add_attr_loader(info: *c.C3D_AttrInfo, reg_id: c_int, attr: Pipeline.Attribute, loader_size: u8) c_int { + return c.AttrInfo_AddLoader(info, reg_id, gpu_format(attr.format), loader_size); +} + +fn position_scale(attr: Pipeline.Attribute) ?[4]f32 { + if (attr.size != 3) return null; + return switch (attr.format) { + .f32x3 => .{ 1.0, 1.0, 1.0, 1.0 }, + .snorm16x3 => .{ snorm16_scale(), snorm16_scale(), snorm16_scale(), 1.0 }, + else => null, + }; +} + +fn uv_scale(attr: Pipeline.Attribute) ?[2]f32 { + if (attr.size != 2) return null; + return switch (attr.format) { + .f32x2 => .{ 1.0, 1.0 }, + .unorm8x2 => .{ unorm8_scale(), unorm8_scale() }, + .snorm16x2 => .{ snorm16_scale(), snorm16_scale() }, + else => null, + }; +} + +fn color_scale(attr: Pipeline.Attribute) ?[4]f32 { + if (attr.size != 4) return null; + return switch (attr.format) { + .unorm8x4 => .{ unorm8_scale(), unorm8_scale(), unorm8_scale(), unorm8_scale() }, + .f32x3 => .{ 1.0, 1.0, 1.0, 1.0 }, + else => null, + }; +} + +fn unorm8_scale() f32 { + return 1.0 / 255.0; +} + +fn snorm16_scale() f32 { + return 1.0 / 32767.0; +} + +fn is_linear_fcram(ptr: [*]const u8, len: usize) bool { + const start = @intFromPtr(ptr); + return in_range(start, len, OS_FCRAM_VADDR, OS_FCRAM_SIZE) or + in_range(start, len, OS_OLD_FCRAM_VADDR, OS_OLD_FCRAM_SIZE); +} + +fn in_range(start: usize, len: usize, base: usize, size: usize) bool { + if (start < base) return false; + const offset = start - base; + return offset <= size and len <= size - offset; +} + +fn float_to_u8(v: f32) u8 { + return @intFromFloat(@max(0.0, @min(1.0, v)) * 255.0); +} + +fn pack_color_rgba(r: f32, g: f32, b: f32, a: f32) u32 { + const ri: u32 = float_to_u8(r); + const gi: u32 = float_to_u8(g); + const bi: u32 = float_to_u8(b); + const ai: u32 = float_to_u8(a); + return (ri << 24) | (gi << 16) | (bi << 8) | ai; +} diff --git a/src/platform/gfx_api.zig b/src/platform/gfx_api.zig index 0758e0c..acfd2c0 100644 --- a/src/platform/gfx_api.zig +++ b/src/platform/gfx_api.zig @@ -38,7 +38,7 @@ pub const Interface = struct { create_mesh: fn (Pipeline.Handle) anyerror!Mesh.Handle, destroy_mesh: fn (Mesh.Handle) void, update_mesh: fn (Mesh.Handle, []const u8) void, - draw_mesh: fn (Mesh.Handle, *const Mat4, usize, Mesh.Primitive) void, + draw_mesh: fn (Mesh.Handle, *const Mat4, usize) void, create_texture: fn (u32, u32, []align(16) u8) anyerror!Texture.Handle, update_texture: fn (Texture.Handle, []align(16) u8) void, diff --git a/src/platform/glfw/opengl/opengl_gfx.zig b/src/platform/glfw/opengl/opengl_gfx.zig index 2d8fcc1..e726bab 100644 --- a/src/platform/glfw/opengl/opengl_gfx.zig +++ b/src/platform/glfw/opengl/opengl_gfx.zig @@ -63,11 +63,9 @@ pub fn init() anyerror!void { shader.state.proj = Mat4.identity(); shader.state.view = Mat4.identity(); shader.update_ubo(); - } pub fn deinit() void { - shader.deinit(); gl.makeProcTableCurrent(null); @@ -240,16 +238,13 @@ pub fn update_mesh(handle: Mesh.Handle, data: []const u8) void { gl.NamedBufferSubData(mesh.vbo, 0, @intCast(data.len), data.ptr); } -pub fn draw_mesh(handle: Mesh.Handle, model: *const Mat4, count: usize, primitive: Mesh.Primitive) void { +pub fn draw_mesh(handle: Mesh.Handle, model: *const Mat4, count: usize) void { const mesh = meshes.get_element(handle) orelse return; const pl = pipelines.get_element(mesh.pipeline) orelse return; shader.update_per_object(model); gl.VertexArrayVertexBuffer(pl.vao, 0, mesh.vbo, 0, @intCast(pl.layout.stride)); - gl.DrawArrays(switch (primitive) { - .triangles => gl.TRIANGLES, - .lines => gl.LINES, - }, 0, @intCast(count)); + gl.DrawArrays(gl.TRIANGLES, 0, @intCast(count)); } pub fn create_texture(width: u32, height: u32, data: []align(16) u8) anyerror!Texture.Handle { diff --git a/src/platform/glfw/vulkan/vulkan_gfx.zig b/src/platform/glfw/vulkan/vulkan_gfx.zig index 4107ac2..4af4f81 100644 --- a/src/platform/glfw/vulkan/vulkan_gfx.zig +++ b/src/platform/glfw/vulkan/vulkan_gfx.zig @@ -1056,7 +1056,7 @@ pub fn update_mesh(handle: Mesh.Handle, data: []const u8) void { meshes.update_element(handle, m_data); } -pub fn draw_mesh(handle: Mesh.Handle, model: *const Mat4, count: usize, primitive: Mesh.Primitive) void { +pub fn draw_mesh(handle: Mesh.Handle, model: *const Mat4, count: usize) void { draw_state.mat = model.*; const m_data = meshes.get_element(handle) orelse return; @@ -1071,10 +1071,7 @@ pub fn draw_mesh(handle: Mesh.Handle, model: *const Mat4, count: usize, primitiv const dyn_offsets = [_]u32{current_camera_slot * camera_rings[swapchain.image_index].slot_stride}; command_buffer.bindDescriptorSets(.graphics, p_data.layout, 0, &sets, &dyn_offsets); - command_buffer.setPrimitiveTopology(switch (primitive) { - .triangles => .triangle_list, - .lines => .line_list, - }); + command_buffer.setPrimitiveTopology(.triangle_list); const offset = [_]vk.DeviceSize{0}; const frame_buf = m_data.buffers[swapchain.image_index]; diff --git a/src/platform/headless/headless_gfx.zig b/src/platform/headless/headless_gfx.zig index 1e3d447..2699531 100644 --- a/src/platform/headless/headless_gfx.zig +++ b/src/platform/headless/headless_gfx.zig @@ -47,7 +47,7 @@ pub fn create_mesh(_: Pipeline.Handle) anyerror!Mesh.Handle { pub fn destroy_mesh(_: Mesh.Handle) void {} pub fn update_mesh(_: Mesh.Handle, _: []const u8) void {} -pub fn draw_mesh(_: Mesh.Handle, _: *const Mat4, _: usize, _: Mesh.Primitive) void {} +pub fn draw_mesh(_: Mesh.Handle, _: *const Mat4, _: usize) void {} pub fn create_texture(_: u32, _: u32, _: []align(16) u8) anyerror!Texture.Handle { return 0; diff --git a/src/platform/psp/psp_gfx_ge.zig b/src/platform/psp/psp_gfx_ge.zig index c174a55..2446083 100644 --- a/src/platform/psp/psp_gfx_ge.zig +++ b/src/platform/psp/psp_gfx_ge.zig @@ -946,7 +946,7 @@ pub fn update_mesh(handle: Mesh.Handle, data: []const u8) void { meshes.update_element(handle, mesh); } -pub fn draw_mesh(handle: Mesh.Handle, model: *const Mat4, count: usize, primitive: Mesh.Primitive) void { +pub fn draw_mesh(handle: Mesh.Handle, model: *const Mat4, count: usize) void { const mesh = meshes.get_element(handle) orelse return; const pl = pipelines.get_element(mesh.pipeline) orelse return; const data = mesh.data orelse return; @@ -963,10 +963,7 @@ pub fn draw_mesh(handle: Mesh.Handle, model: *const Mat4, count: usize, primitiv must(cmd.vertex_type(@as(u24, @bitCast(pl.vertex_type)))); must(cmd.vertex_address(@intFromPtr(data))); - must(cmd.primitive(switch (primitive) { - .triangles => .triangles, - .lines => .lines, - }, @intCast(count))); + must(cmd.primitive(.triangles, @intCast(count))); advance_stall(); } diff --git a/src/platform/switch/switch_gfx.zig b/src/platform/switch/switch_gfx.zig index 5bbbe46..6c52949 100644 --- a/src/platform/switch/switch_gfx.zig +++ b/src/platform/switch/switch_gfx.zig @@ -515,7 +515,7 @@ pub fn update_mesh(handle: Mesh.Handle, data: []const u8) void { meshes.update_element(handle, mesh); } -pub fn draw_mesh(handle: Mesh.Handle, _: *const Mat4, count: usize, primitive: Mesh.Primitive) void { +pub fn draw_mesh(handle: Mesh.Handle, _: *const Mat4, count: usize) void { if (!initialized or command_buffer == null) return; const mesh = meshes.get_element(handle) orelse return; if (mesh.mem_block == null or mesh.size == 0 or count == 0) return; @@ -534,10 +534,7 @@ pub fn draw_mesh(handle: Mesh.Handle, _: *const Mat4, count: usize, primitive: M } dkCmdBufBindVtxBuffers(command_buffer, 0, extents[0..].ptr, pl.vtx_buffer_count); - dkCmdBufDraw(command_buffer, switch (primitive) { - .triangles => DK_PRIMITIVE_TRIANGLES, - .lines => DK_PRIMITIVE_LINES, - }, @intCast(count), 1, 0, 0); + dkCmdBufDraw(command_buffer, DK_PRIMITIVE_TRIANGLES, @intCast(count), 1, 0, 0); } pub fn create_texture(_: u32, _: u32, _: []align(16) u8) anyerror!Texture.Handle { diff --git a/src/rendering/mesh.zig b/src/rendering/mesh.zig index 1a2a10a..3bad056 100644 --- a/src/rendering/mesh.zig +++ b/src/rendering/mesh.zig @@ -7,11 +7,6 @@ const gfx = Platform.gfx; pub const Handle = u32; -pub const Primitive = enum { - triangles, - lines, -}; - /// A generic mesh parameterised by vertex type `V`. /// /// Vertex data is stored in `vertices` (an unmanaged ArrayList backed by the @@ -24,13 +19,12 @@ pub fn Mesh(comptime V: type) type { pub const Vertex = V; - handle: Handle, - vertices: std.ArrayList(Vertex), - primitive: Primitive = .triangles, + handle: Handle, + vertices: std.ArrayList(Vertex), pub fn new(alloc: std.mem.Allocator, pipeline: Pipeline.Handle) !Self { return .{ - .handle = try gfx.api.create_mesh(pipeline), + .handle = try gfx.api.create_mesh(pipeline), .vertices = try std.ArrayList(V).initCapacity(alloc, 32), }; } @@ -57,7 +51,9 @@ pub fn Mesh(comptime V: type) type { pub fn draw(self: *Self, mat: *const Mat4) void { gfx.api.draw_mesh( - self.handle, mat, self.vertices.items.len, self.primitive, + self.handle, + mat, + self.vertices.items.len, ); } }; diff --git a/src/rendering/texture.zig b/src/rendering/texture.zig index 7517c5d..e31a4e7 100644 --- a/src/rendering/texture.zig +++ b/src/rendering/texture.zig @@ -50,16 +50,16 @@ pub fn load_from_data(alloc: std.mem.Allocator, width: u32, height: u32, pixels: }; } -/// 4x4 solid white default texture, initialized by `init_defaults`. +/// 8x8 solid white default texture, initialized by `init_defaults`. pub var Default: Texture = undefined; pub fn init_defaults(alloc: std.mem.Allocator) !void { const pixels = comptime blk: { - var data: [4 * 4 * 4]u8 = undefined; + var data: [8 * 8 * 4]u8 = undefined; @memset(&data, 0xFF); break :blk data; }; - Default = try load_from_data(alloc, 4, 4, &pixels); + Default = try load_from_data(alloc, 8, 8, &pixels); } /// Loads a PNG from `path` (resolved against `dir`) into GPU memory. diff --git a/test/main.zig b/test/main.zig index a460e9e..5c29d64 100644 --- a/test/main.zig +++ b/test/main.zig @@ -213,6 +213,7 @@ pub fn main(init: std.process.Init) !void { .game = 2 * 1024 * 1024, .user = 8 * 1024 * 1024, }, + .render_capacity = if (ae.platform == .nintendo_3ds) 12 * 1024 * 1024 else null, .resizable = true, }, &state.state()); defer engine.deinit(); diff --git a/test/test.png b/test/test.png index c3cd43aff7e37de05a7fcfc1b544072ecc7391c3..f3af608229d6f76154c16d33a6c53ca240d92745 100644 GIT binary patch literal 342 zcmeAS@N?(olHy`uVBq!ia0vp^93afW1|*O0@9PFq3dtTpz6=aiY77hwEes65fI|H(?D8gRi>Fdh=ij9p=!(@55 zzb;TnwZt`|BqgyV)hf9t6-Y4{85o)A8h}uUp^24=ft9g=wt<0_fx+bi3t3P!Gwj6~<#8I~yO9Fw=Rv0cL}E*8t*3p9bj M)78&qol`;+0EmxXL;wH) literal 83 zcmeAS@N?(olHy`uVBq!ia0vp^EFjFm1|(O0oL2{=q&!_5Ln>}1|2Ti(gFKJqKW4^` gHlqhZETIex8&4_U6&Bn66{w2A)78&qol`;+04^gJB>(^b From fbf42841a60a383b125aa2360dd845042d6d27b4 Mon Sep 17 00:00:00 2001 From: Nathan Bourgeois Date: Sat, 6 Jun 2026 11:22:07 -0400 Subject: [PATCH 28/44] Improved demo --- test/main.zig | 234 +++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 221 insertions(+), 13 deletions(-) diff --git a/test/main.zig b/test/main.zig index 5c29d64..dcf6d1d 100644 --- a/test/main.zig +++ b/test/main.zig @@ -52,12 +52,205 @@ const Vertex = extern struct { const MyMesh = Rendering.Mesh(Vertex); +const BATCH_A_TRIANGLES = 61; +const BATCH_B_TRIANGLES = 78; const MAX_GRASS_VOICES = 4; const Vec3 = Math.Vec3; +fn rgba(r: u8, g: u8, b: u8) u32 { + return @as(u32, r) | + (@as(u32, g) << 8) | + (@as(u32, b) << 16) | + (@as(u32, 0xFF) << 24); +} + +const BatchAColors = [_]u32{ + rgba(255, 62, 62), + rgba(255, 170, 54), + rgba(245, 235, 80), + rgba(76, 210, 130), + rgba(58, 190, 235), + rgba(145, 105, 255), +}; + +const BatchBColors = [_]u32{ + rgba(48, 110, 255), + rgba(56, 205, 190), + rgba(225, 88, 180), + rgba(240, 150, 70), + rgba(210, 230, 80), + rgba(255, 255, 255), +}; + +fn snorm16(v: f32) i16 { + return @intFromFloat(std.math.clamp(v, -1.0, 1.0) * 32767.0); +} + +fn vertex(x: f32, y: f32, color: u32, u: f32, v: f32) Vertex { + return .{ + .pos = .{ snorm16(x), snorm16(y), 0 }, + .color = color, + .uv = .{ snorm16(u), snorm16(v) }, + }; +} + +fn appendTriangle( + alloc: std.mem.Allocator, + mesh: *MyMesh, + a: [2]f32, + b: [2]f32, + c: [2]f32, + ca: u32, + cb: u32, + cc: u32, +) !void { + try mesh.append(alloc, &.{ + vertex(a[0], a[1], ca, 0.5, 0.0), + vertex(b[0], b[1], cb, 0.0, 1.0), + vertex(c[0], c[1], cc, 1.0, 1.0), + }); +} + +fn orientedPoint(cx: f32, cy: f32, lx: f32, ly: f32, angle: f32) [2]f32 { + const c = @cos(angle); + const s = @sin(angle); + return .{ + cx + lx * c - ly * s, + cy + lx * s + ly * c, + }; +} + +fn appendOrientedTriangle( + alloc: std.mem.Allocator, + mesh: *MyMesh, + cx: f32, + cy: f32, + sx: f32, + sy: f32, + angle: f32, + c0: u32, + c1: u32, + c2: u32, +) !void { + try appendTriangle( + alloc, + mesh, + orientedPoint(cx, cy, 0.0, sy, angle), + orientedPoint(cx, cy, -sx, -sy, angle), + orientedPoint(cx, cy, sx, -sy, angle), + c0, + c1, + c2, + ); +} + +fn buildBatchA(alloc: std.mem.Allocator, mesh: *MyMesh) !void { + try mesh.vertices.ensureTotalCapacity(alloc, BATCH_A_TRIANGLES * 3); + + try appendTriangle(alloc, mesh, .{ -0.44, -0.40 }, .{ 0.44, -0.40 }, .{ 0.0, 0.52 }, BatchAColors[0], BatchAColors[3], BatchAColors[5]); + + const spoke_count = 36; + for (0..spoke_count) |i| { + const t = @as(f32, @floatFromInt(i)) / @as(f32, @floatFromInt(spoke_count)); + const angle = t * std.math.pi * 2.0; + const tip_radius = 0.78 + @sin(angle * 3.0) * 0.04; + const base_radius = 0.31 + @cos(angle * 2.0) * 0.035; + const half_width = 0.075 + @sin(angle * 5.0) * 0.012; + const tip = [2]f32{ @cos(angle) * tip_radius, @sin(angle) * tip_radius }; + const left = [2]f32{ @cos(angle - half_width) * base_radius, @sin(angle - half_width) * base_radius }; + const right = [2]f32{ @cos(angle + half_width) * base_radius, @sin(angle + half_width) * base_radius }; + + try appendTriangle( + alloc, + mesh, + tip, + left, + right, + BatchAColors[i % BatchAColors.len], + BatchAColors[(i + 2) % BatchAColors.len], + BatchAColors[(i + 4) % BatchAColors.len], + ); + } + + const marker_count = 24; + for (0..marker_count) |i| { + const t = @as(f32, @floatFromInt(i)) / @as(f32, @floatFromInt(marker_count)); + const angle = t * std.math.pi * 2.0; + const radius = 0.56 + if (i % 2 == 0) @as(f32, 0.045) else -0.025; + const size = 0.032 + @as(f32, @floatFromInt(i % 4)) * 0.006; + try appendOrientedTriangle( + alloc, + mesh, + @cos(angle) * radius, + @sin(angle) * radius, + size * 0.75, + size, + angle + std.math.pi * 0.5, + BatchAColors[(i + 1) % BatchAColors.len], + BatchAColors[(i + 3) % BatchAColors.len], + BatchAColors[(i + 5) % BatchAColors.len], + ); + } +} + +fn buildBatchB(alloc: std.mem.Allocator, mesh: *MyMesh) !void { + try mesh.vertices.ensureTotalCapacity(alloc, BATCH_B_TRIANGLES * 3); + + const cols = 9; + const rows = 6; + for (0..rows) |row| { + for (0..cols) |col| { + const idx = row * cols + col; + const fx = @as(f32, @floatFromInt(col)) / @as(f32, @floatFromInt(cols - 1)); + const fy = @as(f32, @floatFromInt(row)) / @as(f32, @floatFromInt(rows - 1)); + const x = -0.88 + fx * 1.76; + const y = -0.67 + fy * 1.34; + const size = 0.04 + @as(f32, @floatFromInt((idx + row) % 5)) * 0.008; + const angle = @as(f32, @floatFromInt(idx)) * 0.43 + @sin(fy * std.math.pi) * 0.35; + + try appendOrientedTriangle( + alloc, + mesh, + x, + y, + size * (0.72 + fx * 0.35), + size, + angle, + BatchBColors[idx % BatchBColors.len], + BatchBColors[(idx + 2) % BatchBColors.len], + BatchBColors[(idx + 4) % BatchBColors.len], + ); + } + } + + const wave_count = 24; + for (0..wave_count) |i| { + const t = @as(f32, @floatFromInt(i)) / @as(f32, @floatFromInt(wave_count - 1)); + const x = -0.94 + t * 1.88; + const y = 0.78 + @sin(t * std.math.pi * 6.0) * 0.095; + const size = 0.035 + @as(f32, @floatFromInt(i % 3)) * 0.007; + const angle = t * std.math.pi * 4.0; + + try appendOrientedTriangle( + alloc, + mesh, + x, + y, + size, + size * 1.6, + angle, + BatchBColors[(i + 5) % BatchBColors.len], + BatchBColors[(i + 1) % BatchBColors.len], + BatchBColors[(i + 3) % BatchBColors.len], + ); + } +} + const MyState = struct { - mesh: MyMesh, - transform: Rendering.Transform, + batch_a: MyMesh, + batch_b: MyMesh, + batch_a_transform: Rendering.Transform, + batch_b_transform: Rendering.Transform, texture: Rendering.Texture, music_data: []const u8, music_reader: std.Io.Reader, @@ -65,6 +258,7 @@ const MyState = struct { grass_readers: [MAX_GRASS_VOICES]std.Io.Reader, grass_tick: u32, grass_spawn: u32, + time: f32, fn load_wav(engine: *ae.Engine, path: []const u8) ![]u8 { var file = try engine.dirs.resources.openFile(engine.io, path, .{}); @@ -94,17 +288,17 @@ const MyState = struct { const render = engine.allocator(.render); - self.mesh = try MyMesh.new(render, pipeline); - self.transform = Rendering.Transform.new(); + self.batch_a = try MyMesh.new(render, pipeline); + self.batch_b = try MyMesh.new(render, pipeline); + self.batch_a_transform = Rendering.Transform.new(); + self.batch_b_transform = Rendering.Transform.new(); self.texture = try Rendering.Texture.load(engine.io, engine.dirs.resources, render, "test.png"); - try self.mesh.append(render, &.{ - Vertex{ .pos = .{ -16383, -16383, 0 }, .color = 0xFF0000FF, .uv = .{ 0, 32767 } }, - Vertex{ .pos = .{ 16383, -16383, 0 }, .color = 0xFF00FF00, .uv = .{ 32767, 32767 } }, - Vertex{ .pos = .{ 0, 16383, 0 }, .color = 0xFFFF0000, .uv = .{ 16383, 0 } }, - }); - self.mesh.update(); + try buildBatchA(render, &self.batch_a); + try buildBatchB(render, &self.batch_b); + self.batch_a.update(); + self.batch_b.update(); self.music_data = &.{}; self.music_reader = .fixed(&.{}); @@ -112,6 +306,7 @@ const MyState = struct { self.grass_readers = @splat(.fixed(&.{})); self.grass_tick = 0; self.grass_spawn = 0; + self.time = 0.0; if (!Audio.enabled) return; @@ -134,7 +329,8 @@ const MyState = struct { var self = ae.ctx_to_self(MyState, ctx); const render = engine.allocator(.render); self.texture.deinit(render); - self.mesh.deinit(render); + self.batch_b.deinit(render); + self.batch_a.deinit(render); Rendering.Pipeline.deinit(pipeline); } @@ -170,7 +366,18 @@ const MyState = struct { fn update(ctx: *anyopaque, _: *ae.Engine, dt: f32, _: *const Util.BudgetContext) anyerror!void { var self = ae.ctx_to_self(MyState, ctx); - self.transform.rot.z += 60.0 * dt; + self.time += dt; + + self.batch_a_transform.rot.z += 76.0 * dt; + self.batch_b_transform.rot.z -= 18.0 * dt; + + const batch_a_pulse = 1.0 + @sin(self.time * 1.8) * 0.06; + const batch_b_x = 1.0 + @cos(self.time * 0.9) * 0.035; + const batch_b_y = 1.0 + @sin(self.time * 1.1) * 0.035; + + self.batch_a_transform.scale = Vec3.new(batch_a_pulse, batch_a_pulse, 1.0); + self.batch_b_transform.scale = Vec3.new(batch_b_x, batch_b_y, 1.0); + self.batch_b_transform.pos = Vec3.new(@sin(self.time * 0.7) * 0.075, @cos(self.time * 0.5) * 0.04, 0.0); } fn draw(ctx: *anyopaque, _: *ae.Engine, _: f32, _: *const Util.BudgetContext) anyerror!void { @@ -185,7 +392,8 @@ const MyState = struct { Rendering.Pipeline.bind(pipeline); self.texture.bind(); - self.mesh.draw(&self.transform.get_matrix()); + self.batch_b.draw(&self.batch_b_transform.get_matrix()); + self.batch_a.draw(&self.batch_a_transform.get_matrix()); } pub fn state(self: *MyState) State { From 373768cae0134b58a9bd2a440bb68a351a861012 Mon Sep 17 00:00:00 2001 From: Nathan Bourgeois Date: Sat, 6 Jun 2026 12:52:18 -0400 Subject: [PATCH 29/44] Add 3DS memory changes --- build.zig | 15 +++ src/engine.zig | 54 ++++++-- src/platform/3ds/3ds_audio.zig | 4 +- src/platform/3ds/3ds_gfx.zig | 217 ++++++++++++++++++++++++++++++--- src/platform/3ds/services.zig | 21 +++- src/platform/gfx.zig | 7 ++ src/platform/platform.zig | 15 ++- src/root.zig | 2 + test/main.zig | 33 ++++- 9 files changed, 325 insertions(+), 43 deletions(-) diff --git a/build.zig b/build.zig index 166a558..26387bd 100644 --- a/build.zig +++ b/build.zig @@ -1,6 +1,9 @@ const std = @import("std"); const pspsdk = @import("pspsdk"); +const DEFAULT_3DS_HEAP_SIZE: u32 = 48 * 1024 * 1024; +const DEFAULT_3DS_LINEAR_HEAP_SIZE: u32 = 32 * 1024 * 1024; + pub const Platform = enum { windows, linux, @@ -49,6 +52,10 @@ pub const Config = struct { /// levels for the texture. Off by default since the extra VRAM cost /// only pays off for textures sampled at a wide range of distances. psp_mipmaps: bool = false, + /// 3DS: newlib/application heap reserved by libctru's startup shim. + nintendo_3ds_heap_size: u32 = DEFAULT_3DS_HEAP_SIZE, + /// 3DS: linear heap reserved by libctru's startup shim. + nintendo_3ds_linear_heap_size: u32 = DEFAULT_3DS_LINEAR_HEAP_SIZE, /// When true, `Core.paths.resolve` returns CWD for both resources /// and data, bypassing the platform-specific layout (.app Resources /// on mac, APPDATA on Windows, XDG on Linux). @@ -101,6 +108,8 @@ pub const Config = struct { .audio = overrides.audio orelse default_audio, .psp_display_mode = overrides.psp_display_mode orelse .rgba8888, .psp_mipmaps = overrides.psp_mipmaps orelse false, + .nintendo_3ds_heap_size = overrides.nintendo_3ds_heap_size orelse DEFAULT_3DS_HEAP_SIZE, + .nintendo_3ds_linear_heap_size = overrides.nintendo_3ds_linear_heap_size orelse DEFAULT_3DS_LINEAR_HEAP_SIZE, .use_cwd = overrides.use_cwd orelse false, }; } @@ -110,6 +119,8 @@ pub const Config = struct { audio: ?Audio = null, psp_display_mode: ?PspDisplayMode = null, psp_mipmaps: ?bool = null, + nintendo_3ds_heap_size: ?u32 = null, + nintendo_3ds_linear_heap_size: ?u32 = null, use_cwd: ?bool = null, /// Promotes an `aarch64-freestanding-none` target to the /// `nintendo_switch` platform. No effect when null/false. @@ -1416,12 +1427,16 @@ pub fn addShader(owner: *std.Build, b: *std.Build, exe: *std.Build.Step.Compile, pub fn build(b: *std.Build) void { const target = b.standardTargetOptions(.{}); const optimize = b.standardOptimizeOption(.{}); + const threeds_heap_mib = b.option(u32, "3ds-heap-mib", "3DS: application heap size in MiB (default: 48)"); + const threeds_linear_heap_mib = b.option(u32, "3ds-linear-heap-mib", "3DS: linear heap size in MiB (default: 32)"); const overrides: Config.Overrides = .{ .gfx = b.option(Gfx, "gfx", "Graphics backend override (default: auto-detect from target)"), .audio = b.option(Audio, "audio", "Audio backend override (default: platform default)"), .psp_display_mode = b.option(PspDisplayMode, "psp-display", "PSP display mode: rgba8888 (32-bit, default) or rgb565 (16-bit)"), .psp_mipmaps = b.option(bool, "psp-mipmaps", "PSP: generate mip levels for VRAM-resident textures (default: false)"), + .nintendo_3ds_heap_size = if (threeds_heap_mib) |mib| mib * 1024 * 1024 else null, + .nintendo_3ds_linear_heap_size = if (threeds_linear_heap_mib) |mib| mib * 1024 * 1024 else null, .use_cwd = b.option(bool, "use-cwd", "Force resources+data dirs to CWD (debug/CI convenience; default: false)"), .nintendo_switch = b.option(bool, "nintendo-switch", "Build for Nintendo Switch (requires -Dtarget=aarch64-freestanding-none and devkitA64/libnx)"), }; diff --git a/src/engine.zig b/src/engine.zig index baf9e42..d9cbf93 100644 --- a/src/engine.zig +++ b/src/engine.zig @@ -44,7 +44,10 @@ const LinearRenderMemory = if (options.config.platform == .nintendo_3ds) struct } }; -const LINEAR_RENDER_RESERVE_BYTES: usize = 2 * 1024 * 1024; +const LINEAR_RENDER_RESERVE_BYTES: usize = if (options.config.platform == .nintendo_3ds) + 4 * 1024 * 1024 +else + 2 * 1024 * 1024; // -- category tracker (wrapper allocator with per-category accounting) -------- @@ -199,12 +202,26 @@ pub const Engine = struct { try logger.init(sys_io, self.dirs.data); - try Platform.init(self, config.width, config.height, config.title, config.fullscreen, config.vsync, config.resizable); - if (external_render) try self.init_linear_render_pool(config); + Platform.init(self, config.width, config.height, config.title, config.fullscreen, config.vsync, config.resizable) catch |err| switch (err) { + error.OutOfMemory => return error.PlatformInitOutOfMemory, + else => return err, + }; + errdefer Platform.deinit(); + + if (external_render) { + try self.init_linear_render_pool(config); + Platform.gfx.rebind_backend_allocator(self.allocator(.render), self.io); + } errdefer self.deinit_linear_render_pool(); - try Rendering.Texture.init_defaults(self.allocator(.render)); - try Core.state_machine.init(self, &self.state); + Rendering.Texture.init_defaults(self.allocator(.render)) catch |err| switch (err) { + error.OutOfMemory => return error.DefaultTexturesOutOfMemory, + else => return err, + }; + Core.state_machine.init(self, &self.state) catch |err| switch (err) { + error.OutOfMemory => return error.StateInitOutOfMemory, + else => return err, + }; } pub fn deinit(self: *Engine) void { @@ -223,14 +240,35 @@ pub const Engine = struct { fn init_linear_render_pool(self: *Engine, config: Config) !void { assert(config.render_capacity.? >= config.memory.render); + const mib = 1024 * 1024; const available: usize = LinearRenderMemory.spaceFree(); - if (available <= LINEAR_RENDER_RESERVE_BYTES) return error.OutOfMemory; + if (available <= LINEAR_RENDER_RESERVE_BYTES) { + std.debug.panic( + "LinLowMiB a={} q={} h={} l={}", + .{ + available / mib, + config.render_capacity.? / mib, + options.config.nintendo_3ds_heap_size / mib, + options.config.nintendo_3ds_linear_heap_size / mib, + }, + ); + } const capacity = @min(config.render_capacity.?, available - LINEAR_RENDER_RESERVE_BYTES); - if (capacity < config.memory.render) return error.OutOfMemory; - const linear_mem = try alloc_linear_bytes(capacity); + const linear_mem = alloc_linear_bytes(capacity) catch |err| switch (err) { + error.OutOfMemory => std.debug.panic( + "LinAllocMiB a={} c={} q={} h={} l={}", + .{ + available / mib, + capacity / mib, + config.render_capacity.? / mib, + options.config.nintendo_3ds_heap_size / mib, + options.config.nintendo_3ds_linear_heap_size / mib, + }, + ), + }; self.linear_render_mem = linear_mem; self.linear_render_pool = memory.PoolAlloc.init(linear_mem, "render-linear"); diff --git a/src/platform/3ds/3ds_audio.zig b/src/platform/3ds/3ds_audio.zig index 239ece8..750a338 100644 --- a/src/platform/3ds/3ds_audio.zig +++ b/src/platform/3ds/3ds_audio.zig @@ -93,12 +93,12 @@ pub fn init() anyerror!void { _ = audio_alloc; _ = audio_io; - audio_data = @ptrCast(linearAlloc(TOTAL_AUDIO_BYTES) orelse return error.AudioInitFailed); + audio_data = @ptrCast(linearAlloc(TOTAL_AUDIO_BYTES) orelse return error.AudioLinearAllocFailed); if (ndspInit() != 0) { linearFree(audio_data); audio_data = null; - return error.AudioInitFailed; + return error.NdspInitFailed; } ndspSetOutputMode(NDSP_OUTPUT_STEREO); diff --git a/src/platform/3ds/3ds_gfx.zig b/src/platform/3ds/3ds_gfx.zig index e61c6d6..1204eb5 100644 --- a/src/platform/3ds/3ds_gfx.zig +++ b/src/platform/3ds/3ds_gfx.zig @@ -48,6 +48,9 @@ const SCREEN_HEIGHT: u32 = 240; const TARGET_WIDTH: c_int = 240; const TARGET_HEIGHT: c_int = 400; const MAX_VERTEX_ATTRS: usize = 12; +const MESH_SLOT_COUNT: usize = 2; +const MAX_DEFERRED_MESH_FREES: usize = 4096; +const C3D_CMD_BUFFER_SIZE: usize = 1024 * 1024; const MAX_TEXTURE_SIZE: u32 = 1024; const MIN_TEXTURE_SIZE: u32 = 8; const TEX_BPP: usize = 4; @@ -71,6 +74,7 @@ const VERTEX_SHADER_INDEX: usize = 0; const BufferBinding = struct { offset: usize, + vertex_span: usize, attrib_count: c_int, permutation: u64, }; @@ -93,10 +97,22 @@ const PipelineData = struct { const MeshData = struct { pipeline: Pipeline.Handle, - data: ?[*]const u8, + slots: [MESH_SLOT_COUNT]MeshSlot = .{ .{}, .{} }, + latest_slot: ?usize = null, len: usize, }; +const MeshSlot = struct { + data: ?[]align(16) u8 = null, + len: usize = 0, + in_flight: bool = false, + used_this_frame: bool = false, +}; + +const DeferredMeshFree = struct { + data: []align(16) u8, +}; + const TexMirror = extern struct { data: ?*anyopaque, fmt_size: u32, @@ -114,6 +130,7 @@ const TextureData = struct { width: u32, height: u32, tex: TexMirror, + staging: ?[]align(16) u8 = null, }; const TexEnvMirror = extern struct { @@ -150,6 +167,7 @@ const RenderTargetMirror = extern struct { var pipelines = Util.CircularBuffer(PipelineData, 16).init(); var meshes = Util.CircularBuffer(MeshData, 2048).init(); +var deferred_mesh_frees = Util.CircularBuffer(DeferredMeshFree, MAX_DEFERRED_MESH_FREES + 1).init(); var textures = Util.CircularBuffer(TextureData, 64).init(); var target: ?*c.C3D_RenderTarget = null; @@ -174,9 +192,9 @@ pub fn init() anyerror!void { _ = render_io; c.gfxInitDefault(); - if (!c.C3D_Init(c.C3D_DEFAULT_CMDBUF_SIZE)) { + if (!c.C3D_Init(C3D_CMD_BUFFER_SIZE)) { c.gfxExit(); - return error.GfxInitFailed; + return error.C3DInitFailed; } errdefer { c.C3D_Fini(); @@ -188,7 +206,7 @@ pub fn init() anyerror!void { TARGET_HEIGHT, c.GPU_RB_RGBA8, c.C3D_DEPTHTYPE{ .__e = c.GPU_RB_DEPTH24_STENCIL8 }, - ) orelse return error.GfxInitFailed; + ) orelse return error.C3DRenderTargetCreateFailed; errdefer { c.C3D_RenderTargetDelete(target); target = null; @@ -205,10 +223,14 @@ pub fn init() anyerror!void { pub fn deinit() void { frame_started = false; + if (initialized) c.C3D_FrameSync(); + release_completed_mesh_slots(); + free_deferred_mesh_slots(); for (1..textures.buffer.len) |i| { if (textures.buffer[i]) |*tex| { c.C3D_TexDelete(tex_ptr(tex)); + free_texture_staging(tex); } } textures.clear(); @@ -220,6 +242,12 @@ pub fn deinit() void { } } pipelines.clear(); + + for (1..meshes.buffer.len) |i| { + if (meshes.buffer[i]) |*mesh| { + free_mesh_slots(mesh); + } + } meshes.clear(); if (target) |t| { @@ -311,6 +339,9 @@ pub fn start_frame() bool { const flags: u8 = @intCast(if (vsync_enabled) c.C3D_FRAME_SYNCDRAW else c.C3D_FRAME_NONBLOCK); if (!c.C3D_FrameBegin(flags)) return false; + release_completed_mesh_slots(); + free_deferred_mesh_slots(); + c.C3D_FrameBufClear(target_frame_buf(t), c.C3D_CLEAR_ALL, clear_color, 0); if (!c.C3D_FrameDrawOn(t)) { @@ -328,6 +359,7 @@ pub fn start_frame() bool { pub fn end_frame() void { if (!frame_started) return; + mark_current_frame_mesh_slots_in_flight(); c.C3D_FrameEnd(0); frame_started = false; } @@ -383,6 +415,7 @@ pub fn create_pipeline(layout: Pipeline.VertexLayout, v_shader: ?[:0]align(4) co .stride = layout.stride, .buffer = .{ .offset = buffer_layout.base_offset, + .vertex_span = buffer_layout.vertex_span, .attrib_count = buffer_layout.attribute_count, .permutation = buffer_layout.permutation, }, @@ -417,7 +450,6 @@ pub fn create_mesh(pipeline: Pipeline.Handle) anyerror!Mesh.Handle { if (pipeline_slot(pipeline) == null) return error.InvalidPipeline; const handle = meshes.add_element(.{ .pipeline = pipeline, - .data = null, .len = 0, }) orelse return error.OutOfMeshes; @@ -425,34 +457,49 @@ pub fn create_mesh(pipeline: Pipeline.Handle) anyerror!Mesh.Handle { } pub fn destroy_mesh(handle: Mesh.Handle) void { + if (mesh_slot(handle)) |mesh| { + free_mesh_slots(mesh); + } _ = meshes.remove_element(handle); } pub fn update_mesh(handle: Mesh.Handle, data: []const u8) void { const mesh = mesh_slot(handle) orelse return; + if (data.len > std.math.maxInt(u32)) { + std.debug.panic("3ds_gfx: mesh vertex data is too large to flush", .{}); + } + if (data.len == 0) { - mesh.data = null; + mesh.latest_slot = null; mesh.len = 0; return; } - if (!is_linear_fcram(data.ptr, data.len)) { - std.debug.panic("3ds_gfx: mesh vertex data must be allocated in linear FCRAM", .{}); - } - if (data.len > std.math.maxInt(u32)) { - std.debug.panic("3ds_gfx: mesh vertex data is too large to flush", .{}); - } + const slot_idx = select_upload_slot(mesh) orelse + std.debug.panic("3ds_gfx: update_mesh called while both 3DS mesh upload slots are in use", .{}); + const slot = &mesh.slots[slot_idx]; + ensure_mesh_slot_capacity(slot, data.len) catch + std.debug.panic("3ds_gfx: out of linear memory for mesh upload", .{}); - _ = c.GSPGPU_FlushDataCache(data.ptr, @intCast(data.len)); - mesh.data = data.ptr; + const dst = slot.data.?; + @memcpy(dst[0..data.len], data); + slot.len = data.len; + _ = c.GSPGPU_FlushDataCache(dst.ptr, @intCast(data.len)); + + mesh.latest_slot = slot_idx; mesh.len = data.len; } pub fn draw_mesh(handle: Mesh.Handle, model: *const Mat4, count: usize) void { const mesh = mesh_slot(handle) orelse return; const pl = pipeline_slot(mesh.pipeline) orelse return; - const data = mesh.data orelse return; - if (count == 0 or mesh.len == 0) return; + const slot_idx = mesh.latest_slot orelse return; + const slot = &mesh.slots[slot_idx]; + const data = slot.data orelse return; + if (count == 0 or slot.len == 0) return; + + const needed = mesh_draw_bytes_needed(pl, count) orelse return; + if (needed > slot.len) return; bind_pipeline_data(pl); upload_draw_uniforms(pl, model); @@ -460,12 +507,14 @@ pub fn draw_mesh(handle: Mesh.Handle, model: *const Mat4, count: usize) void { const buf = c.C3D_GetBufInfo() orelse return; c.BufInfo_Init(buf); - const ptr = data + pl.buffer.offset; + const ptr = @as([*]const u8, data.ptr) + pl.buffer.offset; const added = c.BufInfo_Add(buf, ptr, @intCast(pl.stride), pl.buffer.attrib_count, pl.buffer.permutation); if (added < 0) { c.BufInfo_Init(buf); return; } + + slot.used_this_frame = true; c.C3D_DrawArrays(c.GPU_TRIANGLES, 0, @intCast(count)); } @@ -483,6 +532,7 @@ pub fn create_texture(width: u32, height: u32, data: []align(16) u8) anyerror!Te .height = height, .tex = init_tex_mirror(width, height, mem, size), }; + errdefer free_texture_staging(&tex); try upload_texture_data(&tex, data[0..size]); @@ -509,6 +559,7 @@ pub fn bind_texture(handle: Texture.Handle) void { pub fn destroy_texture(handle: Texture.Handle) void { if (texture_slot(handle)) |tex| { c.C3D_TexDelete(tex_ptr(tex)); + free_texture_staging(tex); } if (bound_texture == handle) { bound_texture = 0; @@ -519,6 +570,109 @@ pub fn destroy_texture(handle: Texture.Handle) void { pub fn force_texture_resident(_: Texture.Handle) void {} +fn select_upload_slot(mesh: *const MeshData) ?usize { + if (mesh.latest_slot) |idx| { + const slot = mesh.slots[idx]; + if (!slot.in_flight and !slot.used_this_frame) return idx; + } + + for (0..MESH_SLOT_COUNT) |idx| { + const slot = mesh.slots[idx]; + if (!slot.in_flight and !slot.used_this_frame) return idx; + } + + return null; +} + +fn ensure_mesh_slot_capacity(slot: *MeshSlot, len: usize) !void { + if (slot.data) |buf| { + if (buf.len >= len) return; + } + + const cap = mesh_slot_capacity(len); + const new_data = try render_alloc.alignedAlloc(u8, .fromByteUnits(16), cap); + + if (!is_linear_fcram(new_data.ptr, new_data.len)) { + render_alloc.free(new_data); + std.debug.panic("3ds_gfx: mesh upload slots must be allocated in linear FCRAM", .{}); + } + + if (slot.data) |old| render_alloc.free(old); + slot.data = new_data; + slot.len = 0; +} + +fn mesh_slot_capacity(len: usize) usize { + return @max(len, 256); +} + +fn free_mesh_slots(mesh: *MeshData) void { + for (&mesh.slots) |*slot| free_mesh_slot(slot); + mesh.latest_slot = null; + mesh.len = 0; +} + +fn free_mesh_slot(slot: *MeshSlot) void { + if (slot.data) |data| { + if (slot.in_flight or slot.used_this_frame) { + defer_mesh_free(data); + } else { + render_alloc.free(data); + } + } + slot.* = .{}; +} + +fn defer_mesh_free(data: []align(16) u8) void { + if (deferred_mesh_frees.add_element(.{ .data = data }) != null) return; + std.debug.panic("3ds_gfx: deferred mesh free queue exhausted", .{}); +} + +fn free_deferred_mesh_slots() void { + for (1..deferred_mesh_frees.buffer.len) |i| { + if (deferred_mesh_frees.buffer[i]) |free| { + render_alloc.free(free.data); + } + } + deferred_mesh_frees.clear(); +} + +fn release_completed_mesh_slots() void { + for (1..meshes.buffer.len) |i| { + if (meshes.buffer[i]) |*mesh| { + for (&mesh.slots) |*slot| { + slot.in_flight = false; + slot.used_this_frame = false; + } + } + } +} + +fn mark_current_frame_mesh_slots_in_flight() void { + for (1..meshes.buffer.len) |i| { + if (meshes.buffer[i]) |*mesh| { + for (&mesh.slots) |*slot| { + if (slot.used_this_frame) { + slot.in_flight = true; + slot.used_this_frame = false; + } + } + } + } +} + +fn mesh_draw_bytes_needed(pl: *const PipelineData, count: usize) ?usize { + if (count == 0) return 0; + + const tail_count = count - 1; + const max = std.math.maxInt(usize); + if (pl.stride != 0 and tail_count > (max - pl.buffer.vertex_span) / pl.stride) return null; + + const rel_end = tail_count * pl.stride + pl.buffer.vertex_span; + if (pl.buffer.offset > max - rel_end) return null; + return pl.buffer.offset + rel_end; +} + fn apply_render_state() void { set_alpha_blend(alpha_blend_enabled); set_depth_write(depth_write_enabled); @@ -731,8 +885,7 @@ fn texture_param() u32 { fn upload_texture_data(tex: *TextureData, data: []align(16) const u8) !void { const size = texture_size(tex.width, tex.height); - const upload = try render_alloc.alignedAlloc(u8, .fromByteUnits(16), size); - defer render_alloc.free(upload); + const upload = try ensure_texture_staging(tex, size); convert_texture_data(upload, data, tex.width, tex.height); flush_texture_source(upload); @@ -740,6 +893,30 @@ fn upload_texture_data(tex: *TextureData, data: []align(16) const u8) !void { c.C3D_TexFlush(tex_ptr(tex)); } +fn ensure_texture_staging(tex: *TextureData, size: usize) ![]align(16) u8 { + if (tex.staging) |buf| { + if (buf.len >= size) return buf[0..size]; + render_alloc.free(buf); + tex.staging = null; + } + + const staging = try render_alloc.alignedAlloc(u8, .fromByteUnits(16), size); + if (!is_linear_fcram(staging.ptr, staging.len)) { + render_alloc.free(staging); + std.debug.panic("3ds_gfx: texture staging must be allocated in linear FCRAM", .{}); + } + + tex.staging = staging; + return staging; +} + +fn free_texture_staging(tex: *TextureData) void { + if (tex.staging) |staging| { + render_alloc.free(staging); + tex.staging = null; + } +} + fn convert_texture_data(dst: []align(16) u8, src: []align(16) const u8, width: u32, height: u32) void { for (0..height) |y| { const sy = height - 1 - @as(u32, @intCast(y)); @@ -818,6 +995,7 @@ fn find_attr(layout: Pipeline.VertexLayout, usage: Pipeline.AttributeUsage) ?Pip const BufferLayout = struct { base_offset: usize, + vertex_span: usize, attribute_count: c_int, permutation: u64, position_loader_size: u8, @@ -867,6 +1045,7 @@ fn buffer_layout_from_attrs(stride: usize, position_attr: Pipeline.Attribute, uv return .{ .base_offset = base_offset, + .vertex_span = current_rel, .attribute_count = @intCast(attribute_count), .permutation = permutation, .position_loader_size = position_loader_size, diff --git a/src/platform/3ds/services.zig b/src/platform/3ds/services.zig index c7066ce..c582e71 100644 --- a/src/platform/3ds/services.zig +++ b/src/platform/3ds/services.zig @@ -9,6 +9,11 @@ //! libctru) with a strong export. 1 MB is comfortable; bump if engine //! frames grow. //! +//! libctru also exposes weak `__ctru_heap_size` and +//! `__ctru_linear_heap_size` symbols. Leaving them at zero asks libctru to +//! compute a conservative split, which is too small for Aether's explicit +//! linear render pool. Export strong values from build config instead. +//! //! libctru also creates service threads internally. NDSP currently asks for //! a 4 KB stack, which can underflow in its sound-frame worker before Aether //! code is on the stack. The 3DS link step wraps `threadCreate`, and the @@ -16,6 +21,7 @@ const process_init = @import("aether").CProcessInit; const std = @import("std"); +const options = @import("options"); pub const os = struct { pub const PATH_MAX = 1024; @@ -86,11 +92,15 @@ extern const __text_end: u8; comptime { @export(&entry, .{ .name = "main" }); @export(&stack_size, .{ .name = "__stacksize__" }); + @export(&heap_size, .{ .name = "__ctru_heap_size" }); + @export(&linear_heap_size, .{ .name = "__ctru_linear_heap_size" }); @export(&threadCreateWrap, .{ .name = "__wrap_threadCreate" }); @export(&exceptionHandler, .{ .name = "aether3dsExceptionHandler" }); } var stack_size: u32 = 1 * 1024 * 1024; +var heap_size: u32 = options.config.nintendo_3ds_heap_size; +var linear_heap_size: u32 = options.config.nintendo_3ds_linear_heap_size; var exception_stack: [exception_stack_size]u8 align(8) = undefined; var panic_stage: u8 = 0; /// Captured very early in entry() so that the panic walker can scan the full @@ -404,9 +414,10 @@ pub fn panic(msg: []const u8, _: ?*std.builtin.StackTrace, first_trace_addr: ?us // The pretty multi-line version above still goes to debugString (visible via 3dslink etc.). var user_buf: [256:0]u8 = @splat(0); var uw: std.Io.Writer = .fixed(&user_buf); - // short header to leave room for addresses + // Put the panic message first: ERRF surfaces have very little room, and + // diagnostics often carry the useful measurement in `msg`. var hbuf: [96]u8 = undefined; - const h = std.fmt.bufPrint(&hbuf, "Aether panic at 0x{x}: {s}\n", .{first, msg}) catch "Aether panic\n"; + const h = std.fmt.bufPrint(&hbuf, "{s}\npc=0x{x}\n", .{msg, first}) catch "Aether panic\n"; uw.writeAll(h[0..@min(h.len, 70)]) catch {}; if (@errorReturnTrace()) |t| { const nerr = @min(t.index, t.instruction_addresses.len); @@ -433,11 +444,11 @@ pub fn panic(msg: []const u8, _: ?*std.builtin.StackTrace, first_trace_addr: ?us _ = errfInit(); _ = ERRF_SetUserString(user_str.ptr); - // The failure message shown as "Reason" on the error screen is limited (~0x60 bytes). - // Use Writer.fixed to cleanly format PC + as many stack addrs as fit. + // The failure message shown as "Reason" on the error screen is limited + // (~0x60 bytes), so prioritize the measurement text over the PC/stack. var throw_buf: [0x60:0]u8 = @splat(0); var w: std.Io.Writer = .fixed(&throw_buf); - w.print("Aether at 0x{x}: {s}", .{first, msg}) catch {}; + w.print("{s} pc=0x{x}", .{msg, first}) catch {}; if (n > 0) { w.print(" [", .{}) catch {}; const max_short = 4; diff --git a/src/platform/gfx.zig b/src/platform/gfx.zig index 36252e8..f4748c6 100644 --- a/src/platform/gfx.zig +++ b/src/platform/gfx.zig @@ -63,6 +63,13 @@ pub fn init( try Api.init(); } +/// Rebinds the backend-side allocator after graphics initialization. +/// Used by backends that need platform services online before choosing +/// their final render-memory arena. +pub fn rebind_backend_allocator(alloc: std.mem.Allocator, io: std.Io) void { + Api.setup(alloc, io); +} + /// Deinitializes the graphics subsystem and frees all associated resources. pub fn deinit() void { Api.deinit(); diff --git a/src/platform/platform.zig b/src/platform/platform.zig index 4390ab6..f10d1aa 100644 --- a/src/platform/platform.zig +++ b/src/platform/platform.zig @@ -11,9 +11,18 @@ pub const GraphicsAPI = @import("options").@"build.Gfx"; /// Initializes the platform subsystems: graphics, audio, then input. /// Order matters: input subscribes to surface callbacks created by gfx. pub fn init(engine: *Engine, width: u32, height: u32, title: [:0]const u8, fullscreen: bool, sync: bool, resizable: bool) !void { - try gfx.init(engine.allocator(.render), engine.io, width, height, title, fullscreen, sync, resizable); - try audio.init(engine.allocator(.audio), engine.io); - try input.init(engine.allocator(.game), engine.io); + gfx.init(engine.allocator(.render), engine.io, width, height, title, fullscreen, sync, resizable) catch |err| switch (err) { + error.OutOfMemory => return error.GfxInitOutOfMemory, + else => return err, + }; + audio.init(engine.allocator(.audio), engine.io) catch |err| switch (err) { + error.OutOfMemory => return error.AudioInitOutOfMemory, + else => return err, + }; + input.init(engine.allocator(.game), engine.io) catch |err| switch (err) { + error.OutOfMemory => return error.InputInitOutOfMemory, + else => return err, + }; } /// Updates the platform subsystems. Must be called once per frame. diff --git a/src/root.zig b/src/root.zig index 0f81917..b01f21b 100644 --- a/src/root.zig +++ b/src/root.zig @@ -25,6 +25,8 @@ pub const Platform = options.@"build.Platform"; pub const Gfx = options.@"build.Gfx"; pub const platform: Platform = options.config.platform; pub const gfx: Gfx = options.config.gfx; +pub const nintendo_3ds_heap_size: u32 = options.config.nintendo_3ds_heap_size; +pub const nintendo_3ds_linear_heap_size: u32 = options.config.nintendo_3ds_linear_heap_size; comptime { std.testing.refAllDecls(@This()); diff --git a/test/main.zig b/test/main.zig index dcf6d1d..79d77bc 100644 --- a/test/main.zig +++ b/test/main.zig @@ -391,9 +391,11 @@ const MyState = struct { )); Rendering.Pipeline.bind(pipeline); + Rendering.gfx.api.set_depth_write(false); self.texture.bind(); self.batch_b.draw(&self.batch_b_transform.get_matrix()); self.batch_a.draw(&self.batch_a_transform.get_matrix()); + Rendering.gfx.api.set_depth_write(true); } pub fn state(self: *MyState) State { @@ -410,20 +412,39 @@ const MyState = struct { var pipeline: Rendering.Pipeline.Handle = undefined; pub fn main(init: std.process.Init) !void { - const memory = try init.arena.allocator().alloc(u8, 32 * 1024 * 1024); + const mib = 1024 * 1024; + const render_memory_bytes = if (ae.platform == .nintendo_3ds) 28 * 1024 * 1024 else 12 * 1024 * 1024; + const main_memory_bytes = if (ae.platform == .nintendo_3ds) 24 * 1024 * 1024 else 32 * 1024 * 1024; + const memory = init.arena.allocator().alloc(u8, main_memory_bytes) catch |err| switch (err) { + error.OutOfMemory => std.debug.panic( + "MainOOMMiB m={} r={} h={} l={}", + .{ + main_memory_bytes / mib, + render_memory_bytes / mib, + ae.nintendo_3ds_heap_size / mib, + ae.nintendo_3ds_linear_heap_size / mib, + }, + ), + }; var state: MyState = undefined; var engine: ae.Engine = undefined; - try engine.init(init.io, init.environ_map, memory, .{ + engine.init(init.io, init.environ_map, memory, .{ .memory = .{ - .render = 12 * 1024 * 1024, + .render = render_memory_bytes, .audio = 10 * 1024 * 1024, .game = 2 * 1024 * 1024, .user = 8 * 1024 * 1024, }, - .render_capacity = if (ae.platform == .nintendo_3ds) 12 * 1024 * 1024 else null, + .render_capacity = if (ae.platform == .nintendo_3ds) render_memory_bytes else null, .resizable = true, - }, &state.state()); + }, &state.state()) catch |err| switch (err) { + error.OutOfMemory => return error.EngineInitOutOfMemory, + else => return err, + }; defer engine.deinit(); - try engine.run(); + engine.run() catch |err| switch (err) { + error.OutOfMemory => return error.EngineRunOutOfMemory, + else => return err, + }; } From 2070d8dd50d855a83ed199c3fa9b0243d2a0d78e Mon Sep 17 00:00:00 2001 From: Nathan Bourgeois Date: Sat, 6 Jun 2026 13:04:10 -0400 Subject: [PATCH 30/44] Simplify memory --- README.md | 12 +++- build.zig | 12 ++-- src/engine.zig | 120 +------------------------------- src/platform/3ds/services.zig | 6 +- src/platform/c_process_init.zig | 26 +++++-- src/platform/gfx.zig | 7 -- test/main.zig | 36 ++++++---- 7 files changed, 67 insertions(+), 152 deletions(-) diff --git a/README.md b/README.md index 6d491e4..b0612c2 100644 --- a/README.md +++ b/README.md @@ -117,11 +117,12 @@ const MyState = struct { }; pub fn main(init: std.process.Init) !void { - const memory = try init.arena.allocator().alloc(u8, 32 * 1024 * 1024); + const memory = try init.gpa.alignedAlloc(u8, .fromByteUnits(16), 32 * 1024 * 1024); + defer init.gpa.free(memory); var my_state: MyState = undefined; var engine: ae.Engine = undefined; - try engine.init(init.io, memory, .{ + try engine.init(init.io, init.environ_map, memory, .{ .memory = .{ .render = 8 * 1024 * 1024, .audio = 2 * 1024 * 1024, @@ -159,10 +160,17 @@ zig build run -Dgfx=opengl # Build for PSP zig build -Dtarget=mipsel-psp +# Build for 3DS +zig build -Dtarget=arm-3ds-eabihf + # Build in release mode zig build -Doptimize=ReleaseFast ``` +3DS builds default to a 4 MiB regular libctru/newlib heap and a 60 MiB +linear heap. Aether's process allocator uses linear memory on 3DS, so engine +pool allocations and GPU upload buffers come from the same memory class. + ## Input System Actions are registered by name and bound to one or more input sources: diff --git a/build.zig b/build.zig index 26387bd..994712e 100644 --- a/build.zig +++ b/build.zig @@ -1,8 +1,8 @@ const std = @import("std"); const pspsdk = @import("pspsdk"); -const DEFAULT_3DS_HEAP_SIZE: u32 = 48 * 1024 * 1024; -const DEFAULT_3DS_LINEAR_HEAP_SIZE: u32 = 32 * 1024 * 1024; +const DEFAULT_3DS_HEAP_SIZE: u32 = 4 * 1024 * 1024; +const DEFAULT_3DS_LINEAR_HEAP_SIZE: u32 = 60 * 1024 * 1024; pub const Platform = enum { windows, @@ -52,9 +52,9 @@ pub const Config = struct { /// levels for the texture. Off by default since the extra VRAM cost /// only pays off for textures sampled at a wide range of distances. psp_mipmaps: bool = false, - /// 3DS: newlib/application heap reserved by libctru's startup shim. + /// 3DS: small regular heap reserved for libctru/newlib internals. nintendo_3ds_heap_size: u32 = DEFAULT_3DS_HEAP_SIZE, - /// 3DS: linear heap reserved by libctru's startup shim. + /// 3DS: linear heap used by Aether's process allocator and GPU uploads. nintendo_3ds_linear_heap_size: u32 = DEFAULT_3DS_LINEAR_HEAP_SIZE, /// When true, `Core.paths.resolve` returns CWD for both resources /// and data, bypassing the platform-specific layout (.app Resources @@ -1427,8 +1427,8 @@ pub fn addShader(owner: *std.Build, b: *std.Build, exe: *std.Build.Step.Compile, pub fn build(b: *std.Build) void { const target = b.standardTargetOptions(.{}); const optimize = b.standardOptimizeOption(.{}); - const threeds_heap_mib = b.option(u32, "3ds-heap-mib", "3DS: application heap size in MiB (default: 48)"); - const threeds_linear_heap_mib = b.option(u32, "3ds-linear-heap-mib", "3DS: linear heap size in MiB (default: 32)"); + const threeds_heap_mib = b.option(u32, "3ds-heap-mib", "3DS: regular libctru/newlib heap size in MiB (default: 4)"); + const threeds_linear_heap_mib = b.option(u32, "3ds-linear-heap-mib", "3DS: linear heap size in MiB (default: 60)"); const overrides: Config.Overrides = .{ .gfx = b.option(Gfx, "gfx", "Graphics backend override (default: auto-detect from target)"), diff --git a/src/engine.zig b/src/engine.zig index d9cbf93..4f9627a 100644 --- a/src/engine.zig +++ b/src/engine.zig @@ -1,5 +1,4 @@ const std = @import("std"); -const builtin = @import("builtin"); const assert = std.debug.assert; const Util = @import("util/util.zig"); @@ -13,42 +12,6 @@ const options = @import("options"); pub const Pool = memory.Pool; pub const MemoryConfig = memory.MemoryConfig; -const LinearRenderMemory = if (options.config.platform == .nintendo_3ds) struct { - extern fn linearAlloc(size: usize) ?*anyopaque; - extern fn linearFree(mem: ?*anyopaque) void; - extern fn linearSpaceFree() u32; - - fn alloc(size: usize) ?*anyopaque { - return linearAlloc(size); - } - - fn free(mem: ?*anyopaque) void { - linearFree(mem); - } - - fn spaceFree() u32 { - return linearSpaceFree(); - } -} else struct { - fn alloc(size: usize) ?*anyopaque { - _ = size; - return null; - } - - fn free(mem: ?*anyopaque) void { - _ = mem; - } - - fn spaceFree() u32 { - return 0; - } -}; - -const LINEAR_RENDER_RESERVE_BYTES: usize = if (options.config.platform == .nintendo_3ds) - 4 * 1024 * 1024 -else - 2 * 1024 * 1024; - // -- category tracker (wrapper allocator with per-category accounting) -------- pub const CategoryTracker = struct { @@ -115,20 +78,11 @@ pub const CategoryTracker = struct { const TRACKER_COUNT = @typeInfo(Pool).@"enum".fields.len; -fn alloc_linear_bytes(len: usize) ![]align(16) u8 { - const mem = LinearRenderMemory.alloc(len) orelse return error.OutOfMemory; - const aligned: *align(16) anyopaque = @alignCast(mem); - const ptr: [*]align(16) u8 = @ptrCast(aligned); - return ptr[0..len]; -} - // -- engine ------------------------------------------------------------------- pub const Engine = struct { io: std.Io, pool: memory.PoolAlloc, - linear_render_pool: memory.PoolAlloc, - linear_render_mem: ?[]align(16) u8, trackers: [TRACKER_COUNT]CategoryTracker, running: bool, vsync: bool, @@ -148,9 +102,6 @@ pub const Engine = struct { /// `title` so single-word titles just work; override when the /// title contains characters you don't want in a filesystem path. app_name: ?[]const u8 = null, - /// On 3DS, provide this to back the render category with linear - /// FCRAM so vertex buffers can be used as direct PICA sources. - render_capacity: ?usize = null, }; /// Initializes the engine in place. `self` must live at a stable address @@ -170,18 +121,12 @@ pub const Engine = struct { config: Config, state: *const Core.State, ) !void { - const external_render = use_external_render_pool(config); - const main_required = if (external_render) - config.memory.total() - config.memory.render - else - config.memory.total(); - assert(main_required <= mem.len); + assert(config.memory.total() <= mem.len); self.io = sys_io; self.running = true; self.vsync = config.vsync; self.state = state.*; - self.linear_render_mem = null; self.pool = memory.PoolAlloc.init(mem, "main"); const inner = self.pool.allocator(); @@ -208,12 +153,6 @@ pub const Engine = struct { }; errdefer Platform.deinit(); - if (external_render) { - try self.init_linear_render_pool(config); - Platform.gfx.rebind_backend_allocator(self.allocator(.render), self.io); - } - errdefer self.deinit_linear_render_pool(); - Rendering.Texture.init_defaults(self.allocator(.render)) catch |err| switch (err) { error.OutOfMemory => return error.DefaultTexturesOutOfMemory, else => return err, @@ -230,58 +169,6 @@ pub const Engine = struct { Platform.deinit(); logger.deinit(self.io); self.dirs.close(self.io); - self.deinit_linear_render_pool(); - } - - fn use_external_render_pool(config: Config) bool { - if (options.config.platform != .nintendo_3ds) return false; - return config.render_capacity != null; - } - - fn init_linear_render_pool(self: *Engine, config: Config) !void { - assert(config.render_capacity.? >= config.memory.render); - const mib = 1024 * 1024; - - const available: usize = LinearRenderMemory.spaceFree(); - if (available <= LINEAR_RENDER_RESERVE_BYTES) { - std.debug.panic( - "LinLowMiB a={} q={} h={} l={}", - .{ - available / mib, - config.render_capacity.? / mib, - options.config.nintendo_3ds_heap_size / mib, - options.config.nintendo_3ds_linear_heap_size / mib, - }, - ); - } - - const capacity = @min(config.render_capacity.?, available - LINEAR_RENDER_RESERVE_BYTES); - - const linear_mem = alloc_linear_bytes(capacity) catch |err| switch (err) { - error.OutOfMemory => std.debug.panic( - "LinAllocMiB a={} c={} q={} h={} l={}", - .{ - available / mib, - capacity / mib, - config.render_capacity.? / mib, - options.config.nintendo_3ds_heap_size / mib, - options.config.nintendo_3ds_linear_heap_size / mib, - }, - ), - }; - self.linear_render_mem = linear_mem; - self.linear_render_pool = memory.PoolAlloc.init(linear_mem, "render-linear"); - - const render_idx = @intFromEnum(Pool.render); - self.trackers[render_idx].inner = self.linear_render_pool.allocator(); - self.trackers[render_idx].budget = @min(self.trackers[render_idx].budget, capacity); - } - - fn deinit_linear_render_pool(self: *Engine) void { - if (self.linear_render_mem) |mem| { - LinearRenderMemory.free(mem.ptr); - self.linear_render_mem = null; - } } pub fn allocator(self: *Engine, p: Pool) std.mem.Allocator { @@ -310,10 +197,7 @@ pub const Engine = struct { } pub fn set_budget(self: *Engine, p: Pool, new_budget: usize) void { - self.trackers[@intFromEnum(p)].budget = if (p == .render) blk: { - const mem = self.linear_render_mem orelse break :blk new_budget; - break :blk @min(new_budget, mem.len); - } else new_budget; + self.trackers[@intFromEnum(p)].budget = new_budget; } pub fn total_used(self: *const Engine) usize { diff --git a/src/platform/3ds/services.zig b/src/platform/3ds/services.zig index c582e71..0684dce 100644 --- a/src/platform/3ds/services.zig +++ b/src/platform/3ds/services.zig @@ -10,9 +10,9 @@ //! frames grow. //! //! libctru also exposes weak `__ctru_heap_size` and -//! `__ctru_linear_heap_size` symbols. Leaving them at zero asks libctru to -//! compute a conservative split, which is too small for Aether's explicit -//! linear render pool. Export strong values from build config instead. +//! `__ctru_linear_heap_size` symbols. Aether keeps the regular heap small +//! and routes its process allocator through linear memory, so export strong +//! values from build config instead of asking libctru for its default split. //! //! libctru also creates service threads internally. NDSP currently asks for //! a 4 KB stack, which can underflow in its sound-frame worker before Aether diff --git a/src/platform/c_process_init.zig b/src/platform/c_process_init.zig index e0f08bf..48343dd 100644 --- a/src/platform/c_process_init.zig +++ b/src/platform/c_process_init.zig @@ -1,8 +1,26 @@ const std = @import("std"); const c_io = @import("c_io.zig"); +const options = @import("options"); -extern fn memalign(alignment: usize, size: usize) ?*anyopaque; -extern fn free(ptr: ?*anyopaque) void; +const ProcessHeap = if (options.config.platform == .nintendo_3ds) struct { + extern fn linearMemAlign(size: usize, alignment: usize) ?*anyopaque; + extern fn linearFree(ptr: ?*anyopaque) void; + + fn alloc(alignment: usize, size: usize) ?*anyopaque { + return linearMemAlign(size, alignment); + } + + fn free(ptr: ?*anyopaque) void { + linearFree(ptr); + } +} else struct { + extern fn memalign(alignment: usize, size: usize) ?*anyopaque; + extern fn free(ptr: ?*anyopaque) void; + + fn alloc(alignment: usize, size: usize) ?*anyopaque { + return memalign(alignment, size); + } +}; var arena_state: std.heap.ArenaAllocator = undefined; var environ_map_state: std.process.Environ.Map = undefined; @@ -48,7 +66,7 @@ fn alloc( std.debug.assert(len > 0); const effective_alignment = @max(alignment.toByteUnits(), @sizeOf(usize)); - const ptr = memalign(effective_alignment, len) orelse return null; + const ptr = ProcessHeap.alloc(effective_alignment, len) orelse return null; std.debug.assert(alignment.check(@intFromPtr(ptr))); return @ptrCast(ptr); } @@ -84,5 +102,5 @@ fn dealloc( _: usize, ) void { std.debug.assert(memory.len > 0); - free(memory.ptr); + ProcessHeap.free(memory.ptr); } diff --git a/src/platform/gfx.zig b/src/platform/gfx.zig index f4748c6..36252e8 100644 --- a/src/platform/gfx.zig +++ b/src/platform/gfx.zig @@ -63,13 +63,6 @@ pub fn init( try Api.init(); } -/// Rebinds the backend-side allocator after graphics initialization. -/// Used by backends that need platform services online before choosing -/// their final render-memory arena. -pub fn rebind_backend_allocator(alloc: std.mem.Allocator, io: std.Io) void { - Api.setup(alloc, io); -} - /// Deinitializes the graphics subsystem and frees all associated resources. pub fn deinit() void { Api.deinit(); diff --git a/test/main.zig b/test/main.zig index 79d77bc..0333e09 100644 --- a/test/main.zig +++ b/test/main.zig @@ -18,6 +18,18 @@ comptime { pub const psp_stack_size: u32 = 256 * 1024; +const ThreeDSLinearHeap = if (ae.platform == .nintendo_3ds) struct { + extern fn linearSpaceFree() u32; + + fn spaceFree() usize { + return linearSpaceFree(); + } +} else struct { + fn spaceFree() usize { + return 0; + } +}; + // PSP, 3DS, and Switch override panic/IO handlers that would otherwise // pull in posix symbols (Io.Threaded references std.posix decls that // don't exist for these targets). 3DS and Switch use Aether's newlib-backed @@ -413,30 +425,30 @@ var pipeline: Rendering.Pipeline.Handle = undefined; pub fn main(init: std.process.Init) !void { const mib = 1024 * 1024; - const render_memory_bytes = if (ae.platform == .nintendo_3ds) 28 * 1024 * 1024 else 12 * 1024 * 1024; - const main_memory_bytes = if (ae.platform == .nintendo_3ds) 24 * 1024 * 1024 else 32 * 1024 * 1024; - const memory = init.arena.allocator().alloc(u8, main_memory_bytes) catch |err| switch (err) { + const memory_config: ae.Util.MemoryConfig = .{ + .render = if (ae.platform == .nintendo_3ds) 28 * 1024 * 1024 else 12 * 1024 * 1024, + .audio = 10 * 1024 * 1024, + .game = 2 * 1024 * 1024, + .user = 8 * 1024 * 1024, + }; + const main_memory_bytes = memory_config.total(); + const memory = init.gpa.alignedAlloc(u8, .fromByteUnits(16), main_memory_bytes) catch |err| switch (err) { error.OutOfMemory => std.debug.panic( - "MainOOMMiB m={} r={} h={} l={}", + "MainOOMMiB m={} f={} h={} l={}", .{ main_memory_bytes / mib, - render_memory_bytes / mib, + ThreeDSLinearHeap.spaceFree() / mib, ae.nintendo_3ds_heap_size / mib, ae.nintendo_3ds_linear_heap_size / mib, }, ), }; + defer init.gpa.free(memory); var state: MyState = undefined; var engine: ae.Engine = undefined; engine.init(init.io, init.environ_map, memory, .{ - .memory = .{ - .render = render_memory_bytes, - .audio = 10 * 1024 * 1024, - .game = 2 * 1024 * 1024, - .user = 8 * 1024 * 1024, - }, - .render_capacity = if (ae.platform == .nintendo_3ds) render_memory_bytes else null, + .memory = memory_config, .resizable = true, }, &state.state()) catch |err| switch (err) { error.OutOfMemory => return error.EngineInitOutOfMemory, From 670df624ce35b0d7568ed38909ccc417bb4f6b8c Mon Sep 17 00:00:00 2001 From: Nathan Bourgeois Date: Sat, 6 Jun 2026 13:53:21 -0400 Subject: [PATCH 31/44] Simplify shader model --- README.md | 22 +- build.zig | 252 ++++++++------------ src/platform/3ds/3ds_gfx.zig | 7 +- src/platform/3ds/shaders/basic.v.pica | 35 +++ src/platform/gfx_api.zig | 2 +- src/platform/glfw/opengl/opengl_gfx.zig | 11 +- src/platform/glfw/vulkan/vulkan_gfx.zig | 14 +- src/platform/headless/headless_gfx.zig | 2 +- src/platform/psp/psp_gfx_ge.zig | 2 +- src/platform/switch/shaders/basic.frag.glsl | 9 + src/platform/switch/shaders/basic.vert.glsl | 12 + src/platform/switch/switch_gfx.zig | 7 +- src/rendering/pipeline.zig | 4 +- {test => src/rendering}/shaders/basic.slang | 14 +- test/main.zig | 4 +- 15 files changed, 193 insertions(+), 204 deletions(-) create mode 100644 src/platform/3ds/shaders/basic.v.pica create mode 100644 src/platform/switch/shaders/basic.frag.glsl create mode 100644 src/platform/switch/shaders/basic.vert.glsl rename {test => src/rendering}/shaders/basic.slang (90%) diff --git a/README.md b/README.md index b0612c2..946f511 100644 --- a/README.md +++ b/README.md @@ -67,11 +67,6 @@ pub fn build(b: *std.Build) void { .overrides = overrides, }); - // Compile a Slang shader for the selected backend and embed it at compile time - Aether.addShader(ae_dep.builder, b, exe, config, "basic", .{ - .slang = b.path("shaders/basic.slang"), - }); - // Export the artifact (produces EBOOT.PBP for PSP, install artifact otherwise) Aether.exportArtifact(ae_dep.builder, b, exe, config, .{ .title = "My Game", @@ -83,7 +78,7 @@ pub fn build(b: *std.Build) void { } ``` -The first argument to `addGame`, `addShader`, and `exportArtifact` is the +The first argument to `addGame` and `exportArtifact` is the dependency's builder (`ae_dep.builder`), and the second is your project's builder (`b`). This lets Aether resolve its own internal dependencies (GLFW, Vulkan, Slang, pspsdk) from its `build.zig.zon` while building artifacts that @@ -198,11 +193,9 @@ const Vertex = struct { }); pub const Layout = Rendering.Pipeline.layout_from_struct(@This(), &Attributes); }; - -const MyMesh = Rendering.Mesh(Vertex); ``` -Shaders are written in [Slang](https://shader-slang.com/) (`.slang` files), compiled at build time via `addShader`, and embedded into the binary. Vulkan consumes SPIR-V; OpenGL consumes GLSL 4.50. PSP targets ignore shaders entirely (fixed-function pipeline); the build system generates empty stubs. +Aether owns its built-in pipeline shaders as backend internals. Downstream games create pipelines from vertex layouts; the selected backend compiles and embeds the shader code it needs. ## Build API Reference @@ -218,15 +211,6 @@ Creates a game executable with the engine module and platform dependencies wired | `optimize` | `OptimizeMode` | Optimization level (default: `.Debug`) | | `overrides` | `Config.Overrides` | Graphics/display mode overrides (default: `.{}`) | -### `Aether.addShader(owner, b, exe, config, name, paths)` - -Compiles a Slang shader for the selected backend and embeds it into the executable. Vulkan gets SPIR-V, OpenGL gets GLSL 4.50, and PSP targets get empty stubs. - -| Option | Type | Description | -|--------|------|-------------| -| `name` | `[]const u8` | Shader name (used for `@embedFile` lookup) | -| `paths.slang` | `LazyPath` | Path to the `.slang` source file | - ### `Aether.exportArtifact(owner, b, exe, config, opts)` Exports the build artifact. For PSP targets, produces an `EBOOT.PBP`. For desktop, installs the artifact normally. @@ -241,7 +225,7 @@ Exports the build artifact. For PSP targets, produces an `EBOOT.PBP`. For deskto ### `Aether.Config.resolve(target, overrides) -> Config` -Resolves the full engine configuration (platform, graphics backend, audio, input) from the build target and any user overrides. Pass the result to `addShader` and `exportArtifact`. +Resolves the full engine configuration (platform, graphics backend, audio, input) from the build target and any user overrides. Pass the result to `exportArtifact`. ### `Aether.Config.Overrides` diff --git a/build.zig b/build.zig index 994712e..6f7f8cb 100644 --- a/build.zig +++ b/build.zig @@ -144,8 +144,9 @@ pub const HeadlessOptions = struct { overrides: Config.Overrides = .{}, }; -pub const ShaderPaths = struct { - slang: std.Build.LazyPath, +const ShaderStagePaths = struct { + vert: std.Build.LazyPath, + frag: std.Build.LazyPath, }; const user_root_import_name = "aether_user_root"; @@ -347,6 +348,8 @@ pub fn addGame(owner: *std.Build, b: *std.Build, opts: GameOptions) *std.Build.S add3dsCImportPaths(mod, devkitProPath(b)); } + addInternalShaderModule(owner, b, mod, config); + // --- user executable --- const user_mod = b.createModule(.{ .root_source_file = opts.root_source_file, @@ -530,10 +533,13 @@ fn slangcPath(owner: *std.Build) ?std.Build.LazyPath { return dep.path(exe_name); } -fn addSlangStep(b: *std.Build, slangc: ?std.Build.LazyPath, args: []const []const u8, comptime output_name: []const u8, input: std.Build.LazyPath) ?std.Build.LazyPath { - const sc = slangc orelse return null; +fn requireSlangcPath(owner: *std.Build) std.Build.LazyPath { + return slangcPath(owner) orelse @panic("slangc dependency unavailable; run zig build --fetch"); +} + +fn addSlangStep(b: *std.Build, slangc: std.Build.LazyPath, args: []const []const u8, comptime output_name: []const u8, input: std.Build.LazyPath) std.Build.LazyPath { const run = std.Build.Step.Run.create(b, "slangc " ++ output_name); - run.addFileArg(sc); + run.addFileArg(slangc); run.addArgs(args); run.addArg("-o"); const output = run.addOutputFileArg(output_name); @@ -555,6 +561,94 @@ fn addPicassoStep(b: *std.Build, picasso: []const u8, comptime output_name: []co return output; } +fn addInternalShaderModule(owner: *std.Build, b: *std.Build, mod: *std.Build.Module, config: Config) void { + const stages = internalShaderStages(owner, b, config) orelse return; + + const files = b.addWriteFiles(); + _ = files.addCopyFile(stages.vert, "basic.vert"); + _ = files.addCopyFile(stages.frag, "basic.frag"); + const root = files.add("aether_shaders.zig", + \\pub const basic_vert align(@alignOf(u32)) = @embedFile("basic.vert").*; + \\pub const basic_frag align(@alignOf(u32)) = @embedFile("basic.frag").*; + \\ + ); + + mod.addImport("aether_shaders", b.createModule(.{ + .root_source_file = root, + })); +} + +fn internalShaderStages(owner: *std.Build, b: *std.Build, config: Config) ?ShaderStagePaths { + if (config.platform == .nintendo_3ds and config.gfx == .default) { + const picasso = b.pathJoin(&.{ devkitProPath(b), "tools/bin/picasso" }); + const vert = addPicassoStep( + b, + picasso, + "basic.shbin", + owner.path("src/platform/3ds/shaders/basic.v.pica"), + ); + const files = b.addWriteFiles(); + const frag = files.add("basic.frag.stub", ""); + return .{ .vert = vert, .frag = frag }; + } + + if (config.platform == .nintendo_switch and config.gfx == .default) { + const uam = b.pathJoin(&.{ devkitProPath(b), "tools/bin/uam" }); + return .{ + .vert = addUamStep( + b, + uam, + "vert", + "basic.vert.dksh", + owner.path("src/platform/switch/shaders/basic.vert.glsl"), + ), + .frag = addUamStep( + b, + uam, + "frag", + "basic.frag.dksh", + owner.path("src/platform/switch/shaders/basic.frag.glsl"), + ), + }; + } + + switch (config.gfx) { + .vulkan => { + const slangc = requireSlangcPath(owner); + const source = owner.path("src/rendering/shaders/basic.slang"); + return .{ + .vert = addSlangStep(b, slangc, &.{ + "-target", "spirv", "-emit-spirv-directly", "-matrix-layout-column-major", + "-DVULKAN", "-entry", "vertexMain", "-stage", + "vertex", + }, "basic.vert.spv", source), + .frag = addSlangStep(b, slangc, &.{ + "-target", "spirv", "-emit-spirv-directly", "-matrix-layout-column-major", + "-DVULKAN", "-entry", "fragmentMain", "-stage", + "fragment", + }, "basic.frag.spv", source), + }; + }, + .opengl => { + const slangc = requireSlangcPath(owner); + const source = owner.path("src/rendering/shaders/basic.slang"); + return .{ + .vert = addSlangStep(b, slangc, &.{ + "-target", "glsl", "-matrix-layout-column-major", + "-profile", "glsl_450", "-entry", + "vertexMain", "-stage", "vertex", + }, "basic.vert.glsl", source), + .frag = addSlangStep(b, slangc, &.{ + "-target", "glsl", "-matrix-layout-column-major", + "-profile", "glsl_450", "-entry", + "fragmentMain", "-stage", "fragment", + }, "basic.frag.glsl", source), + }; + }, + .default, .headless => return null, + } +} + pub const ExportOptions = struct { /// PSP/macOS: human-readable name shown to the OS (XMB title on PSP, /// CFBundleName on macOS). Ignored elsewhere. @@ -1278,150 +1372,6 @@ fn switchNroPipeline(b: *std.Build, exe: *std.Build.Step.Compile, opts: ExportOp } } -/// Registers a shader pair for the game executable. Slang sources are -/// compiled to SPIR-V (Vulkan) or GLSL (OpenGL) via slangc. On -/// shaderless platforms (PSP), empty stubs are provided. -/// -/// When called from Aether's own build, use `addShader(b, b, ...)`. -/// From a downstream project: -/// -/// Aether.addShader(ae_dep.builder, b, exe, config, "basic", .{ ... }); -/// -pub fn addShader(owner: *std.Build, b: *std.Build, exe: *std.Build.Step.Compile, config: Config, comptime name: []const u8, paths: ShaderPaths) void { - const root_module = userRootModule(exe); - - if (config.platform == .nintendo_3ds and config.gfx == .default) { - const picasso = b.pathJoin(&.{ devkitProPath(b), "tools/bin/picasso" }); - const sources = b.addWriteFiles(); - const vert_src = sources.add(name ++ "_3ds.v.pica", - \\.fvec projection[4], modelView[4] - \\.fvec posScale, uvScaleOffset, colorScale - \\ - \\.constf myconst(0.0, 1.0, 0.0, 0.0) - \\.alias zeros myconst.xxxx - \\.alias ones myconst.yyyy - \\ - \\.out outpos position - \\.out outtc0 texcoord0 - \\.out outclr color - \\ - \\.alias inpos v0 - \\.alias inuv v1 - \\.alias inclr v2 - \\ - \\.proc main - \\ mul r0.xyz, posScale, inpos - \\ mov r0.w, ones - \\ - \\ dp4 r1.x, modelView[0], r0 - \\ dp4 r1.y, modelView[1], r0 - \\ dp4 r1.z, modelView[2], r0 - \\ dp4 r1.w, modelView[3], r0 - \\ - \\ dp4 outpos.x, projection[0], r1 - \\ dp4 outpos.y, projection[1], r1 - \\ dp4 outpos.z, projection[2], r1 - \\ dp4 outpos.w, projection[3], r1 - \\ - \\ mul outtc0.xy, uvScaleOffset.xy, inuv.xy - \\ add outtc0.xy, uvScaleOffset.zw, outtc0.xy - \\ mov outtc0.zw, zeros - \\ mul outclr, colorScale, inclr - \\ end - \\.end - \\ - ); - const vert = addPicassoStep(b, picasso, name ++ ".shbin", vert_src); - const empty = b.addWriteFiles(); - const frag = empty.add(name ++ "_3ds_frag_stub", ""); - root_module.addAnonymousImport(name ++ "_vert", .{ .root_source_file = vert }); - root_module.addAnonymousImport(name ++ "_frag", .{ .root_source_file = frag }); - return; - } - - if (config.platform == .nintendo_switch and config.gfx == .default) { - const uam = b.pathJoin(&.{ devkitProPath(b), "tools/bin/uam" }); - const sources = b.addWriteFiles(); - const vert_src = sources.add(name ++ "_switch.vert.glsl", - \\#version 460 - \\ - \\layout (location = 0) in vec3 inPosition; - \\layout (location = 1) in vec4 inColor; - \\ - \\layout (location = 0) out vec4 outColor; - \\ - \\void main() - \\{ - \\ gl_Position = vec4(inPosition, 1.0); - \\ outColor = inColor; - \\} - \\ - ); - const frag_src = sources.add(name ++ "_switch.frag.glsl", - \\#version 460 - \\ - \\layout (location = 0) in vec4 inColor; - \\layout (location = 0) out vec4 outColor; - \\ - \\void main() - \\{ - \\ outColor = inColor; - \\} - \\ - ); - - const vert = addUamStep(b, uam, "vert", name ++ ".vert.dksh", vert_src); - const frag = addUamStep(b, uam, "frag", name ++ ".frag.dksh", frag_src); - root_module.addAnonymousImport(name ++ "_vert", .{ .root_source_file = vert }); - root_module.addAnonymousImport(name ++ "_frag", .{ .root_source_file = frag }); - return; - } - - switch (config.gfx) { - .vulkan => { - const slangc = slangcPath(owner); - const vert = addSlangStep(b, slangc, &.{ - "-target", "spirv", "-emit-spirv-directly", "-matrix-layout-column-major", - "-DVULKAN", "-entry", "vertexMain", "-stage", - "vertex", - }, name ++ ".vert.spv", paths.slang); - const frag = addSlangStep(b, slangc, &.{ - "-target", "spirv", "-emit-spirv-directly", "-matrix-layout-column-major", - "-DVULKAN", "-entry", "fragmentMain", "-stage", - "fragment", - }, name ++ ".frag.spv", paths.slang); - if (vert) |v| root_module.addAnonymousImport(name ++ "_vert", .{ .root_source_file = v }); - if (frag) |f| root_module.addAnonymousImport(name ++ "_frag", .{ .root_source_file = f }); - }, - .opengl => { - const slangc = slangcPath(owner); - const vert = addSlangStep(b, slangc, &.{ - "-target", "glsl", "-matrix-layout-column-major", - "-profile", "glsl_450", "-entry", - "vertexMain", "-stage", "vertex", - }, name ++ ".vert.glsl", paths.slang); - const frag = addSlangStep(b, slangc, &.{ - "-target", "glsl", "-matrix-layout-column-major", - "-profile", "glsl_450", "-entry", - "fragmentMain", "-stage", "fragment", - }, name ++ ".frag.glsl", paths.slang); - if (vert) |v| root_module.addAnonymousImport(name ++ "_vert", .{ .root_source_file = v }); - if (frag) |f| root_module.addAnonymousImport(name ++ "_frag", .{ .root_source_file = f }); - }, - .default, .headless => { - // Provide empty stubs so @embedFile(name ++ "_vert") still compiles. - const empty = b.addWriteFiles(); - const stub = empty.add(name ++ "_stub", ""); - root_module.addAnonymousImport(name ++ "_vert", .{ - .root_source_file = stub, - }); - root_module.addAnonymousImport(name ++ "_frag", .{ - .root_source_file = stub, - }); - }, - } -} - // --- Aether's own build (test app + engine tests) --- pub fn build(b: *std.Build) void { @@ -1451,10 +1401,6 @@ pub fn build(b: *std.Build) void { .overrides = overrides, }); - addShader(b, b, exe, config, "basic", .{ - .slang = b.path("test/shaders/basic.slang"), - }); - const nintendo_romfs = b.addWriteFiles(); _ = nintendo_romfs.addCopyFile(b.path("test/test.png"), "test.png"); _ = nintendo_romfs.addCopyFile(b.path("test/calm1.wav"), "calm1.wav"); diff --git a/src/platform/3ds/3ds_gfx.zig b/src/platform/3ds/3ds_gfx.zig index 1204eb5..76971dc 100644 --- a/src/platform/3ds/3ds_gfx.zig +++ b/src/platform/3ds/3ds_gfx.zig @@ -7,6 +7,7 @@ const Rendering = @import("../../rendering/rendering.zig"); const Pipeline = Rendering.Pipeline; const Mesh = Rendering.mesh; const Texture = Rendering.Texture; +const shaders = @import("aether_shaders"); const c = @cImport({ @cDefine("wint_t", "unsigned int"); @@ -374,10 +375,8 @@ pub fn set_vsync(v: bool) void { vsync_enabled = v; } -pub fn create_pipeline(layout: Pipeline.VertexLayout, v_shader: ?[:0]align(4) const u8, f_shader: ?[:0]align(4) const u8) anyerror!Pipeline.Handle { - _ = f_shader; - - const code = v_shader orelse return error.InvalidShader; +pub fn create_pipeline(layout: Pipeline.VertexLayout) anyerror!Pipeline.Handle { + const code: [:0]align(4) const u8 = &shaders.basic_vert; if (layout.stride == 0 or layout.attributes.len > MAX_VERTEX_ATTRS) return error.UnsupportedVertexLayout; const dvlb = c.DVLB_ParseFile(@ptrCast(@constCast(code.ptr)), @intCast(code.len)); diff --git a/src/platform/3ds/shaders/basic.v.pica b/src/platform/3ds/shaders/basic.v.pica new file mode 100644 index 0000000..f10c5d8 --- /dev/null +++ b/src/platform/3ds/shaders/basic.v.pica @@ -0,0 +1,35 @@ +.fvec projection[4], modelView[4] +.fvec posScale, uvScaleOffset, colorScale + +.constf myconst(0.0, 1.0, 0.0, 0.0) +.alias zeros myconst.xxxx +.alias ones myconst.yyyy + +.out outpos position +.out outtc0 texcoord0 +.out outclr color + +.alias inpos v0 +.alias inuv v1 +.alias inclr v2 + +.proc main + mul r0.xyz, posScale, inpos + mov r0.w, ones + + dp4 r1.x, modelView[0], r0 + dp4 r1.y, modelView[1], r0 + dp4 r1.z, modelView[2], r0 + dp4 r1.w, modelView[3], r0 + + dp4 outpos.x, projection[0], r1 + dp4 outpos.y, projection[1], r1 + dp4 outpos.z, projection[2], r1 + dp4 outpos.w, projection[3], r1 + + mul outtc0.xy, uvScaleOffset.xy, inuv.xy + add outtc0.xy, uvScaleOffset.zw, outtc0.xy + mov outtc0.zw, zeros + mul outclr, colorScale, inclr + end +.end diff --git a/src/platform/gfx_api.zig b/src/platform/gfx_api.zig index acfd2c0..b059d4b 100644 --- a/src/platform/gfx_api.zig +++ b/src/platform/gfx_api.zig @@ -31,7 +31,7 @@ pub const Interface = struct { set_vsync: fn (bool) void, - create_pipeline: fn (Pipeline.VertexLayout, ?[:0]align(4) const u8, ?[:0]align(4) const u8) anyerror!Pipeline.Handle, + create_pipeline: fn (Pipeline.VertexLayout) anyerror!Pipeline.Handle, destroy_pipeline: fn (Pipeline.Handle) void, bind_pipeline: fn (Pipeline.Handle) void, diff --git a/src/platform/glfw/opengl/opengl_gfx.zig b/src/platform/glfw/opengl/opengl_gfx.zig index e726bab..319ea25 100644 --- a/src/platform/glfw/opengl/opengl_gfx.zig +++ b/src/platform/glfw/opengl/opengl_gfx.zig @@ -12,6 +12,7 @@ const Mesh = Rendering.mesh; const Pipeline = Rendering.Pipeline; const Texture = Rendering.Texture; const GLFWSurface = @import("../surface.zig"); +const shaders = @import("aether_shaders"); var render_alloc: std.mem.Allocator = undefined; var render_io: std.Io = undefined; @@ -160,11 +161,7 @@ pub fn set_view_matrix(mat: *const Mat4) void { shader.update_ubo(); } -pub fn create_pipeline(layout: Pipeline.VertexLayout, v_shader: ?[:0]align(4) const u8, f_shader: ?[:0]align(4) const u8) anyerror!Pipeline.Handle { - if (v_shader == null or f_shader == null) { - return error.InvalidShader; - } - +pub fn create_pipeline(layout: Pipeline.VertexLayout) anyerror!Pipeline.Handle { var vao: gl.uint = 0; gl.CreateVertexArrays(1, @ptrCast(&vao)); for (layout.attributes) |a| { @@ -182,7 +179,9 @@ pub fn create_pipeline(layout: Pipeline.VertexLayout, v_shader: ?[:0]align(4) co gl.VertexArrayAttribBinding(vao, a.location, a.binding); } - const program = try shader.Shader.init(v_shader.?, f_shader.?); + const v_shader: [:0]align(4) const u8 = &shaders.basic_vert; + const f_shader: [:0]align(4) const u8 = &shaders.basic_frag; + const program = try shader.Shader.init(v_shader, f_shader); const pipeline = pipelines.add_element(.{ .layout = layout, diff --git a/src/platform/glfw/vulkan/vulkan_gfx.zig b/src/platform/glfw/vulkan/vulkan_gfx.zig index 4af4f81..740edbb 100644 --- a/src/platform/glfw/vulkan/vulkan_gfx.zig +++ b/src/platform/glfw/vulkan/vulkan_gfx.zig @@ -18,6 +18,7 @@ const Rendering = @import("../../../rendering/rendering.zig"); const Pipeline = Rendering.Pipeline; const Mesh = Rendering.mesh; const Texture = Rendering.Texture; +const shaders = @import("aether_shaders"); const Context = @import("context.zig"); const Swapchain = @import("swapchain.zig"); @@ -751,8 +752,9 @@ fn flush_camera_if_dirty() void { camera_dirty = false; } -pub fn create_pipeline(layout: Pipeline.VertexLayout, vs: ?[:0]align(4) const u8, fs: ?[:0]align(4) const u8) anyerror!Pipeline.Handle { - if (vs == null or fs == null) return error.InvalidShader; +pub fn create_pipeline(layout: Pipeline.VertexLayout) anyerror!Pipeline.Handle { + const vs: [:0]align(4) const u8 = &shaders.basic_vert; + const fs: [:0]align(4) const u8 = &shaders.basic_frag; const set_layouts = [_]vk.DescriptorSetLayout{ descriptor_set_layout, tex_set_layout }; @@ -770,13 +772,13 @@ pub fn create_pipeline(layout: Pipeline.VertexLayout, vs: ?[:0]align(4) const u8 }, null); const vert = try context.logical_device.createShaderModule(&.{ - .code_size = vs.?.len, - .p_code = @ptrCast(@alignCast(vs.?.ptr)), + .code_size = vs.len, + .p_code = @ptrCast(@alignCast(vs.ptr)), }, null); const frag = try context.logical_device.createShaderModule(&.{ - .code_size = fs.?.len, - .p_code = @ptrCast(@alignCast(fs.?.ptr)), + .code_size = fs.len, + .p_code = @ptrCast(@alignCast(fs.ptr)), }, null); const pipeline_shade_stage_create_info = [_]vk.PipelineShaderStageCreateInfo{ diff --git a/src/platform/headless/headless_gfx.zig b/src/platform/headless/headless_gfx.zig index 2699531..d1dcf56 100644 --- a/src/platform/headless/headless_gfx.zig +++ b/src/platform/headless/headless_gfx.zig @@ -34,7 +34,7 @@ pub fn end_frame() void {} pub fn clear_depth() void {} pub fn set_vsync(_: bool) void {} -pub fn create_pipeline(_: Pipeline.VertexLayout, _: ?[:0]align(4) const u8, _: ?[:0]align(4) const u8) anyerror!Pipeline.Handle { +pub fn create_pipeline(_: Pipeline.VertexLayout) anyerror!Pipeline.Handle { return 0; } diff --git a/src/platform/psp/psp_gfx_ge.zig b/src/platform/psp/psp_gfx_ge.zig index 2446083..b26888c 100644 --- a/src/platform/psp/psp_gfx_ge.zig +++ b/src/platform/psp/psp_gfx_ge.zig @@ -857,7 +857,7 @@ pub fn set_vsync(v: bool) void { // ---- pipelines ------------------------------------------------------------- -pub fn create_pipeline(layout: Pipeline.VertexLayout, _: ?[:0]align(4) const u8, _: ?[:0]align(4) const u8) anyerror!Pipeline.Handle { +pub fn create_pipeline(layout: Pipeline.VertexLayout) anyerror!Pipeline.Handle { var vtype = VertexType{ .vertex = .Vertex32Bitf, // default, overridden by position attribute .transform = .Transform3D, diff --git a/src/platform/switch/shaders/basic.frag.glsl b/src/platform/switch/shaders/basic.frag.glsl new file mode 100644 index 0000000..dd08075 --- /dev/null +++ b/src/platform/switch/shaders/basic.frag.glsl @@ -0,0 +1,9 @@ +#version 460 + +layout (location = 0) in vec4 inColor; +layout (location = 0) out vec4 outColor; + +void main() +{ + outColor = inColor; +} diff --git a/src/platform/switch/shaders/basic.vert.glsl b/src/platform/switch/shaders/basic.vert.glsl new file mode 100644 index 0000000..d0644e8 --- /dev/null +++ b/src/platform/switch/shaders/basic.vert.glsl @@ -0,0 +1,12 @@ +#version 460 + +layout (location = 0) in vec3 inPosition; +layout (location = 1) in vec4 inColor; + +layout (location = 0) out vec4 outColor; + +void main() +{ + gl_Position = vec4(inPosition, 1.0); + outColor = inColor; +} diff --git a/src/platform/switch/switch_gfx.zig b/src/platform/switch/switch_gfx.zig index 6c52949..a8ba78e 100644 --- a/src/platform/switch/switch_gfx.zig +++ b/src/platform/switch/switch_gfx.zig @@ -12,6 +12,7 @@ const Pipeline = Rendering.Pipeline; const Mesh = Rendering.mesh; const Texture = Rendering.Texture; const gfx = @import("../gfx.zig"); +const shader_data = @import("aether_shaders"); const DkDevice_T = opaque {}; const DkMemBlock_T = opaque {}; @@ -439,9 +440,9 @@ pub fn set_vsync(v: bool) void { if (swapchain) |_| dkSwapchainSetSwapInterval(swapchain, @intFromBool(v)); } -pub fn create_pipeline(layout: Pipeline.VertexLayout, v_shader: ?[:0]align(4) const u8, f_shader: ?[:0]align(4) const u8) anyerror!Pipeline.Handle { - const vertex_code = v_shader orelse return error.InvalidShader; - const fragment_code = f_shader orelse return error.InvalidShader; +pub fn create_pipeline(layout: Pipeline.VertexLayout) anyerror!Pipeline.Handle { + const vertex_code: [:0]align(4) const u8 = &shader_data.basic_vert; + const fragment_code: [:0]align(4) const u8 = &shader_data.basic_frag; var data = PipelineData{ .vertex_shader = undefined, diff --git a/src/rendering/pipeline.zig b/src/rendering/pipeline.zig index f700e16..49f2e3b 100644 --- a/src/rendering/pipeline.zig +++ b/src/rendering/pipeline.zig @@ -88,8 +88,8 @@ pub fn layout_from_struct(comptime V: type, comptime attrs: []const Attribute) V handle: Handle, -pub fn new(layout: VertexLayout, vs: ?[:0]align(4) const u8, fs: ?[:0]align(4) const u8) !Handle { - return gfx.api.create_pipeline(layout, vs, fs); +pub fn new(layout: VertexLayout) !Handle { + return gfx.api.create_pipeline(layout); } pub fn deinit(handle: Handle) void { diff --git a/test/shaders/basic.slang b/src/rendering/shaders/basic.slang similarity index 90% rename from test/shaders/basic.slang rename to src/rendering/shaders/basic.slang index 71538cc..ef62bf8 100644 --- a/test/shaders/basic.slang +++ b/src/rendering/shaders/basic.slang @@ -51,6 +51,7 @@ SamplerState g_sampler; Texture2D g_textures[]; float4x4 getModel() { return pc.model; } +float2 getUvOffset() { return pc.uvOffset; } #else @@ -68,6 +69,7 @@ ConstantBuffer perObject; Sampler2D u_combinedTexture; float4x4 getModel() { return perObject.model; } +float2 getUvOffset() { return camera.uvOffset; } #endif @@ -77,11 +79,7 @@ VertexOutput vertexMain(VertexInput input) { float4 viewPos = mul(camera.view, mul(getModel(), float4(input.position, 1.0))); output.position = mul(camera.proj, viewPos); output.viewDepth = -viewPos.z; -#ifdef VULKAN - output.uv = input.uv + pc.uvOffset; -#else - output.uv = input.uv + camera.uvOffset; -#endif + output.uv = input.uv + getUvOffset(); output.color = input.color; return output; } @@ -106,6 +104,9 @@ float4 fragmentMain(VertexOutput input) : SV_Target { } if (pc.alphaBlendEnabled == 0) { color.a = 1.0; + } else { + if (color.a <= 0.1) + discard; } #else if (camera.fogEnabled != 0) { @@ -114,6 +115,9 @@ float4 fragmentMain(VertexOutput input) : SV_Target { } if (camera.alphaBlendEnabled == 0) { color.a = 1.0; + } else { + if (color.a <= 0.1) + discard; } #endif return color; diff --git a/test/main.zig b/test/main.zig index 0333e09..1bdf3b5 100644 --- a/test/main.zig +++ b/test/main.zig @@ -294,9 +294,7 @@ const MyState = struct { fn init(ctx: *anyopaque, engine: *ae.Engine) anyerror!void { var self = ae.ctx_to_self(MyState, ctx); - const vert align(@alignOf(u32)) = @embedFile("basic_vert").*; - const frag align(@alignOf(u32)) = @embedFile("basic_frag").*; - pipeline = try Rendering.Pipeline.new(Vertex.Layout, &vert, &frag); + pipeline = try Rendering.Pipeline.new(Vertex.Layout); const render = engine.allocator(.render); From 354d709b4260e95f5dcf63c21563b2f410ab7e08 Mon Sep 17 00:00:00 2001 From: Nathan Bourgeois Date: Sat, 6 Jun 2026 14:15:39 -0400 Subject: [PATCH 32/44] Delete Pipeline API --- src/platform/3ds/3ds_gfx.zig | 89 +++++++---------- src/platform/gfx_api.zig | 7 +- src/platform/glfw/opengl/opengl_gfx.zig | 48 +++++---- src/platform/glfw/vulkan/vulkan_gfx.zig | 59 +++++------ src/platform/headless/headless_gfx.zig | 10 +- src/platform/psp/psp_gfx_ge.zig | 34 ++----- src/platform/switch/switch_gfx.zig | 41 +++----- src/rendering/Vertex.zig | 127 ++++++++++++++++++++++++ src/rendering/mesh.zig | 5 +- src/rendering/pipeline.zig | 101 ------------------- src/rendering/rendering.zig | 3 +- test/main.zig | 24 +---- 12 files changed, 243 insertions(+), 305 deletions(-) create mode 100644 src/rendering/Vertex.zig delete mode 100644 src/rendering/pipeline.zig diff --git a/src/platform/3ds/3ds_gfx.zig b/src/platform/3ds/3ds_gfx.zig index 76971dc..e1707e3 100644 --- a/src/platform/3ds/3ds_gfx.zig +++ b/src/platform/3ds/3ds_gfx.zig @@ -4,7 +4,7 @@ const std = @import("std"); const Util = @import("../../util/util.zig"); const Mat4 = @import("../../math/math.zig").Mat4; const Rendering = @import("../../rendering/rendering.zig"); -const Pipeline = Rendering.Pipeline; +const vertex = Rendering.vertex; const Mesh = Rendering.mesh; const Texture = Rendering.Texture; const shaders = @import("aether_shaders"); @@ -97,7 +97,6 @@ const PipelineData = struct { }; const MeshData = struct { - pipeline: Pipeline.Handle, slots: [MESH_SLOT_COUNT]MeshSlot = .{ .{}, .{} }, latest_slot: ?usize = null, len: usize, @@ -166,10 +165,11 @@ const RenderTargetMirror = extern struct { frame_buf: FrameBufMirror, }; -var pipelines = Util.CircularBuffer(PipelineData, 16).init(); var meshes = Util.CircularBuffer(MeshData, 2048).init(); var deferred_mesh_frees = Util.CircularBuffer(DeferredMeshFree, MAX_DEFERRED_MESH_FREES + 1).init(); var textures = Util.CircularBuffer(TextureData, 64).init(); +var render_pipeline: PipelineData = undefined; +var render_pipeline_initialized = false; var target: ?*c.C3D_RenderTarget = null; var projection_transform: c.C3D_Mtx = undefined; @@ -185,7 +185,6 @@ var fog_enabled = false; var uv_offset: [2]f32 = .{ 0.0, 0.0 }; var proj_matrix: Mat4 = Mat4.identity(); var view_matrix: Mat4 = Mat4.identity(); -var bound_pipeline: Pipeline.Handle = 0; var bound_texture: Texture.Handle = 0; pub fn init() anyerror!void { @@ -215,6 +214,8 @@ pub fn init() anyerror!void { c.C3D_RenderTargetSetOutput(target, c.GFX_TOP, c.GFX_LEFT, DISPLAY_TRANSFER_FLAGS); init_projection_transform(); + render_pipeline = try init_pipeline(vertex.Layout); + render_pipeline_initialized = true; initialized = true; frame_started = false; @@ -236,13 +237,10 @@ pub fn deinit() void { } textures.clear(); - for (1..pipelines.buffer.len) |i| { - if (pipelines.buffer[i]) |*pl| { - _ = c.shaderProgramFree(&pl.program); - c.DVLB_Free(pl.dvlb); - } + if (render_pipeline_initialized) { + deinit_pipeline(&render_pipeline); + render_pipeline_initialized = false; } - pipelines.clear(); for (1..meshes.buffer.len) |i| { if (meshes.buffer[i]) |*mesh| { @@ -375,7 +373,7 @@ pub fn set_vsync(v: bool) void { vsync_enabled = v; } -pub fn create_pipeline(layout: Pipeline.VertexLayout) anyerror!Pipeline.Handle { +fn init_pipeline(layout: vertex.VertexLayout) !PipelineData { const code: [:0]align(4) const u8 = &shaders.basic_vert; if (layout.stride == 0 or layout.attributes.len > MAX_VERTEX_ATTRS) return error.UnsupportedVertexLayout; @@ -407,7 +405,7 @@ pub fn create_pipeline(layout: Pipeline.VertexLayout) anyerror!Pipeline.Handle { if (add_attr_loader(&attr_info, 1, uv_attr, buffer_layout.uv_loader_size) < 0) return error.UnsupportedVertexLayout; if (add_attr_loader(&attr_info, 2, color_attr, buffer_layout.color_loader_size) < 0) return error.UnsupportedVertexLayout; - const handle = pipelines.add_element(.{ + return .{ .dvlb = dvlb, .program = program, .attr_info = attr_info, @@ -426,29 +424,16 @@ pub fn create_pipeline(layout: Pipeline.VertexLayout) anyerror!Pipeline.Handle { .u_pos_scale = c.shaderInstanceGetUniformLocation(program.vertexShader, "posScale"), .u_uv_scale_offset = c.shaderInstanceGetUniformLocation(program.vertexShader, "uvScaleOffset"), .u_color_scale = c.shaderInstanceGetUniformLocation(program.vertexShader, "colorScale"), - }) orelse return error.OutOfPipelines; - - return @intCast(handle); -} - -pub fn destroy_pipeline(handle: Pipeline.Handle) void { - if (pipeline_slot(handle)) |pl| { - _ = c.shaderProgramFree(&pl.program); - c.DVLB_Free(pl.dvlb); - } - if (bound_pipeline == handle) bound_pipeline = 0; - _ = pipelines.remove_element(handle); + }; } -pub fn bind_pipeline(handle: Pipeline.Handle) void { - bound_pipeline = handle; - if (pipeline_slot(handle)) |pl| bind_pipeline_data(pl); +fn deinit_pipeline(pl: *PipelineData) void { + _ = c.shaderProgramFree(&pl.program); + c.DVLB_Free(pl.dvlb); } -pub fn create_mesh(pipeline: Pipeline.Handle) anyerror!Mesh.Handle { - if (pipeline_slot(pipeline) == null) return error.InvalidPipeline; +pub fn create_mesh() anyerror!Mesh.Handle { const handle = meshes.add_element(.{ - .pipeline = pipeline, .len = 0, }) orelse return error.OutOfMeshes; @@ -490,8 +475,9 @@ pub fn update_mesh(handle: Mesh.Handle, data: []const u8) void { } pub fn draw_mesh(handle: Mesh.Handle, model: *const Mat4, count: usize) void { + if (!render_pipeline_initialized) return; const mesh = mesh_slot(handle) orelse return; - const pl = pipeline_slot(mesh.pipeline) orelse return; + const pl = &render_pipeline; const slot_idx = mesh.latest_slot orelse return; const slot = &mesh.slots[slot_idx]; const data = slot.data orelse return; @@ -500,7 +486,7 @@ pub fn draw_mesh(handle: Mesh.Handle, model: *const Mat4, count: usize) void { const needed = mesh_draw_bytes_needed(pl, count) orelse return; if (needed > slot.len) return; - bind_pipeline_data(pl); + bind_vertex_state(pl); upload_draw_uniforms(pl, model); rebind_texture(); @@ -721,7 +707,7 @@ fn texenv_replace_previous(id: c_int) void { c.C3D_DirtyTexEnv(env); } -fn bind_pipeline_data(pl: *PipelineData) void { +fn bind_vertex_state(pl: *PipelineData) void { c.C3D_BindProgram(&pl.program); c.C3D_SetAttrInfo(&pl.attr_info); } @@ -964,13 +950,6 @@ fn target_frame_buf(t: *c.C3D_RenderTarget) *c.C3D_FrameBuf { return @ptrCast(&mirror.frame_buf); } -fn pipeline_slot(handle: Pipeline.Handle) ?*PipelineData { - const idx: usize = handle; - if (idx == 0 or idx >= pipelines.buffer.len) return null; - if (pipelines.buffer[idx]) |*pl| return pl; - return null; -} - fn mesh_slot(handle: Mesh.Handle) ?*MeshData { const idx: usize = handle; if (idx == 0 or idx >= meshes.buffer.len) return null; @@ -985,7 +964,7 @@ fn texture_slot(handle: Texture.Handle) ?*TextureData { return null; } -fn find_attr(layout: Pipeline.VertexLayout, usage: Pipeline.AttributeUsage) ?Pipeline.Attribute { +fn find_attr(layout: vertex.VertexLayout, usage: vertex.AttributeUsage) ?vertex.Attribute { for (layout.attributes) |attr| { if (attr.usage == usage) return attr; } @@ -1002,8 +981,8 @@ const BufferLayout = struct { color_loader_size: u8, }; -fn buffer_layout_from_attrs(stride: usize, position_attr: Pipeline.Attribute, uv_attr: Pipeline.Attribute, color_attr: Pipeline.Attribute) ?BufferLayout { - var attrs = [_]Pipeline.Attribute{ position_attr, uv_attr, color_attr }; +fn buffer_layout_from_attrs(stride: usize, position_attr: vertex.Attribute, uv_attr: vertex.Attribute, color_attr: vertex.Attribute) ?BufferLayout { + var attrs = [_]vertex.Attribute{ position_attr, uv_attr, color_attr }; sort_attrs_by_offset(&attrs); const base_offset = attrs[0].offset; @@ -1053,7 +1032,7 @@ fn buffer_layout_from_attrs(stride: usize, position_attr: Pipeline.Attribute, uv }; } -fn sort_attrs_by_offset(attrs: *[3]Pipeline.Attribute) void { +fn sort_attrs_by_offset(attrs: *[3]vertex.Attribute) void { var i: usize = 1; while (i < attrs.len) : (i += 1) { var j = i; @@ -1065,7 +1044,7 @@ fn sort_attrs_by_offset(attrs: *[3]Pipeline.Attribute) void { } } -fn attribute_loader_id(usage: Pipeline.AttributeUsage) u64 { +fn attribute_loader_id(usage: vertex.AttributeUsage) u64 { return switch (usage) { .position => 0, .uv => 1, @@ -1074,12 +1053,12 @@ fn attribute_loader_id(usage: Pipeline.AttributeUsage) u64 { }; } -fn attr_fits(stride: usize, attr: Pipeline.Attribute) bool { +fn attr_fits(stride: usize, attr: vertex.Attribute) bool { const size = attribute_size_bytes(attr.format); return attr.offset <= stride and size <= stride - attr.offset; } -fn attribute_loader_size(attr: Pipeline.Attribute, available_bytes: usize) ?u8 { +fn attribute_loader_size(attr: vertex.Attribute, available_bytes: usize) ?u8 { if (attr.usage == .position and attr.size == 3 and attribute_component_size_bytes(attr.format) == 2 and available_bytes >= 8) { return 4; } @@ -1087,7 +1066,7 @@ fn attribute_loader_size(attr: Pipeline.Attribute, available_bytes: usize) ?u8 { return @intCast(attr.size); } -fn attribute_size_bytes(format: Pipeline.AttributeFormat) usize { +fn attribute_size_bytes(format: vertex.AttributeFormat) usize { return switch (format) { .f32x2 => 8, .f32x3 => 12, @@ -1098,12 +1077,12 @@ fn attribute_size_bytes(format: Pipeline.AttributeFormat) usize { }; } -fn attribute_size_bytes_with_count(format: Pipeline.AttributeFormat, count: u8) ?usize { +fn attribute_size_bytes_with_count(format: vertex.AttributeFormat, count: u8) ?usize { if (count == 0 or count > 4) return null; return attribute_component_size_bytes(format) * @as(usize, count); } -fn attribute_component_size_bytes(format: Pipeline.AttributeFormat) usize { +fn attribute_component_size_bytes(format: vertex.AttributeFormat) usize { return switch (format) { .f32x2, .f32x3 => 4, .unorm8x2, .unorm8x4 => 1, @@ -1111,7 +1090,7 @@ fn attribute_component_size_bytes(format: Pipeline.AttributeFormat) usize { }; } -fn gpu_format(format: Pipeline.AttributeFormat) c.GPU_FORMATS { +fn gpu_format(format: vertex.AttributeFormat) c.GPU_FORMATS { return switch (format) { .f32x2, .f32x3 => c.GPU_FLOAT, .unorm8x2, .unorm8x4 => c.GPU_UNSIGNED_BYTE, @@ -1119,11 +1098,11 @@ fn gpu_format(format: Pipeline.AttributeFormat) c.GPU_FORMATS { }; } -fn add_attr_loader(info: *c.C3D_AttrInfo, reg_id: c_int, attr: Pipeline.Attribute, loader_size: u8) c_int { +fn add_attr_loader(info: *c.C3D_AttrInfo, reg_id: c_int, attr: vertex.Attribute, loader_size: u8) c_int { return c.AttrInfo_AddLoader(info, reg_id, gpu_format(attr.format), loader_size); } -fn position_scale(attr: Pipeline.Attribute) ?[4]f32 { +fn position_scale(attr: vertex.Attribute) ?[4]f32 { if (attr.size != 3) return null; return switch (attr.format) { .f32x3 => .{ 1.0, 1.0, 1.0, 1.0 }, @@ -1132,7 +1111,7 @@ fn position_scale(attr: Pipeline.Attribute) ?[4]f32 { }; } -fn uv_scale(attr: Pipeline.Attribute) ?[2]f32 { +fn uv_scale(attr: vertex.Attribute) ?[2]f32 { if (attr.size != 2) return null; return switch (attr.format) { .f32x2 => .{ 1.0, 1.0 }, @@ -1142,7 +1121,7 @@ fn uv_scale(attr: Pipeline.Attribute) ?[2]f32 { }; } -fn color_scale(attr: Pipeline.Attribute) ?[4]f32 { +fn color_scale(attr: vertex.Attribute) ?[4]f32 { if (attr.size != 4) return null; return switch (attr.format) { .unorm8x4 => .{ unorm8_scale(), unorm8_scale(), unorm8_scale(), unorm8_scale() }, diff --git a/src/platform/gfx_api.zig b/src/platform/gfx_api.zig index b059d4b..0f84f5c 100644 --- a/src/platform/gfx_api.zig +++ b/src/platform/gfx_api.zig @@ -1,7 +1,6 @@ const std = @import("std"); const Mat4 = @import("../math/math.zig").Mat4; const Rendering = @import("../rendering/rendering.zig"); -const Pipeline = Rendering.Pipeline; const Mesh = Rendering.mesh; const Texture = Rendering.Texture; @@ -31,11 +30,7 @@ pub const Interface = struct { set_vsync: fn (bool) void, - create_pipeline: fn (Pipeline.VertexLayout) anyerror!Pipeline.Handle, - destroy_pipeline: fn (Pipeline.Handle) void, - bind_pipeline: fn (Pipeline.Handle) void, - - create_mesh: fn (Pipeline.Handle) anyerror!Mesh.Handle, + create_mesh: fn () anyerror!Mesh.Handle, destroy_mesh: fn (Mesh.Handle) void, update_mesh: fn (Mesh.Handle, []const u8) void, draw_mesh: fn (Mesh.Handle, *const Mat4, usize) void, diff --git a/src/platform/glfw/opengl/opengl_gfx.zig b/src/platform/glfw/opengl/opengl_gfx.zig index 319ea25..5a4405d 100644 --- a/src/platform/glfw/opengl/opengl_gfx.zig +++ b/src/platform/glfw/opengl/opengl_gfx.zig @@ -1,15 +1,15 @@ const std = @import("std"); -const Util = @import("../../../util/util.zig"); const glfw = @import("glfw"); const gl = @import("gl"); const Mat4 = @import("../../../math/math.zig").Mat4; +const Util = @import("../../../util/util.zig"); const shader = @import("shader.zig"); const gfx = @import("../../gfx.zig"); const Rendering = @import("../../../rendering/rendering.zig"); const Mesh = Rendering.mesh; -const Pipeline = Rendering.Pipeline; +const vertex = Rendering.vertex; const Texture = Rendering.Texture; const GLFWSurface = @import("../surface.zig"); const shaders = @import("aether_shaders"); @@ -25,19 +25,19 @@ pub fn setup(alloc: std.mem.Allocator, io: std.Io) void { var procs: gl.ProcTable = undefined; var last_width: u32 = 0; var last_height: u32 = 0; -var pipelines = Util.CircularBuffer(PipelineData, 16).init(); var meshes = Util.CircularBuffer(MeshInternal, 8192).init(); var alpha_blend_enabled: bool = true; var cull_face_enabled: bool = true; +var pipeline: PipelineData = undefined; +var pipeline_initialized: bool = false; const PipelineData = struct { - layout: Pipeline.VertexLayout, + layout: vertex.VertexLayout, vao: gl.uint, program: shader.Shader, }; const MeshInternal = struct { - pipeline: Pipeline.Handle, vbo: gl.uint, }; @@ -61,12 +61,21 @@ pub fn init() anyerror!void { gl.LineWidth(5.0); try shader.init(); + errdefer shader.deinit(); + shader.state.proj = Mat4.identity(); shader.state.view = Mat4.identity(); shader.update_ubo(); + + pipeline = try init_pipeline(vertex.Layout); + pipeline_initialized = true; } pub fn deinit() void { + if (pipeline_initialized) { + deinit_pipeline(&pipeline); + pipeline_initialized = false; + } shader.deinit(); gl.makeProcTableCurrent(null); @@ -161,7 +170,7 @@ pub fn set_view_matrix(mat: *const Mat4) void { shader.update_ubo(); } -pub fn create_pipeline(layout: Pipeline.VertexLayout) anyerror!Pipeline.Handle { +fn init_pipeline(layout: vertex.VertexLayout) !PipelineData { var vao: gl.uint = 0; gl.CreateVertexArrays(1, @ptrCast(&vao)); for (layout.attributes) |a| { @@ -183,39 +192,25 @@ pub fn create_pipeline(layout: Pipeline.VertexLayout) anyerror!Pipeline.Handle { const f_shader: [:0]align(4) const u8 = &shaders.basic_frag; const program = try shader.Shader.init(v_shader, f_shader); - const pipeline = pipelines.add_element(.{ + return .{ .layout = layout, .vao = vao, .program = program, - }) orelse return error.OutOfPipelines; - - return @intCast(pipeline); + }; } -pub fn bind_pipeline(pipeline: Pipeline.Handle) void { - const pl = pipelines.get_element(pipeline) orelse return; - gl.BindVertexArray(pl.vao); - gl.UseProgram(pl.program.shader_program); -} - -pub fn destroy_pipeline(pipeline: Pipeline.Handle) void { - var pl = pipelines.get_element(pipeline) orelse return; +fn deinit_pipeline(pl: *PipelineData) void { gl.DeleteVertexArrays(1, @ptrCast(&pl.vao)); pl.vao = 0; pl.program.deinit(); - - _ = pipelines.remove_element(pipeline); } -pub fn create_mesh(pipeline: Pipeline.Handle) anyerror!Mesh.Handle { - const pl = pipelines.get_element(pipeline).?; +pub fn create_mesh() anyerror!Mesh.Handle { var vbo: gl.uint = 0; gl.CreateBuffers(1, @ptrCast(&vbo)); gl.NamedBufferData(vbo, 0, null, gl.STATIC_DRAW); - gl.VertexArrayVertexBuffer(pl.vao, 0, vbo, 0, @intCast(pl.layout.stride)); const mesh_idx = meshes.add_element(.{ - .pipeline = pipeline, .vbo = vbo, }) orelse return error.OutOfMeshes; @@ -238,10 +233,13 @@ pub fn update_mesh(handle: Mesh.Handle, data: []const u8) void { } pub fn draw_mesh(handle: Mesh.Handle, model: *const Mat4, count: usize) void { + if (!pipeline_initialized) return; const mesh = meshes.get_element(handle) orelse return; - const pl = pipelines.get_element(mesh.pipeline) orelse return; + const pl = &pipeline; shader.update_per_object(model); + gl.BindVertexArray(pl.vao); + gl.UseProgram(pl.program.shader_program); gl.VertexArrayVertexBuffer(pl.vao, 0, mesh.vbo, 0, @intCast(pl.layout.stride)); gl.DrawArrays(gl.TRIANGLES, 0, @intCast(count)); } diff --git a/src/platform/glfw/vulkan/vulkan_gfx.zig b/src/platform/glfw/vulkan/vulkan_gfx.zig index 740edbb..083c859 100644 --- a/src/platform/glfw/vulkan/vulkan_gfx.zig +++ b/src/platform/glfw/vulkan/vulkan_gfx.zig @@ -15,7 +15,7 @@ const Mat4 = @import("../../../math/math.zig").Mat4; const vk = @import("vulkan"); const gfx = @import("../../gfx.zig"); const Rendering = @import("../../../rendering/rendering.zig"); -const Pipeline = Rendering.Pipeline; +const vertex = Rendering.vertex; const Mesh = Rendering.mesh; const Texture = Rendering.Texture; const shaders = @import("aether_shaders"); @@ -34,9 +34,8 @@ pub fn setup(alloc: std.mem.Allocator, io: std.Io) void { } const PipelineData = struct { - layout: vk.PipelineLayout, - vert_layout: Pipeline.VertexLayout, - pipeline: vk.Pipeline, + layout: vk.PipelineLayout = .null_handle, + pipeline: vk.Pipeline = .null_handle, }; const MAX_FRAMES = 3; @@ -46,7 +45,6 @@ const MeshData = struct { memories: [MAX_FRAMES]vk.DeviceMemory = @splat(.null_handle), mapped: [MAX_FRAMES]?[*]u8 = @splat(null), capacity: usize = 0, - pipeline: Pipeline.Handle = 0, built: bool = false, }; @@ -116,8 +114,8 @@ var tex_sampler: vk.Sampler = .null_handle; const TextureRec = struct { image: vk.Image, memory: vk.DeviceMemory, view: vk.ImageView, width: u32, height: u32 }; var textures = Util.CircularBuffer(TextureRec, TEXTURE_CAP).init(); -var pipelines = Util.CircularBuffer(PipelineData, 16).init(); var meshes = Util.CircularBuffer(MeshData, 8192).init(); +var render_pipeline: PipelineData = .{}; var swap_state: Swapchain.PresentState = .optimal; var alpha_blend_enabled: bool = true; @@ -438,6 +436,7 @@ pub fn init() anyerror!void { try create_texture_set_layout(); try create_texture_descriptor_pool_and_set(4096); try create_texture_sampler(); + render_pipeline = try init_pipeline(vertex.Layout); GLFWSurface.on_resize = resize_flag; } @@ -450,6 +449,7 @@ pub fn deinit() void { GLFWSurface.on_resize = null; context.logical_device.deviceWaitIdle() catch {}; + deinit_pipeline(&render_pipeline); destroy_texture_sampler(); destroy_texture_descriptor_pool_and_set(); destroy_texture_set_layout(); @@ -752,7 +752,7 @@ fn flush_camera_if_dirty() void { camera_dirty = false; } -pub fn create_pipeline(layout: Pipeline.VertexLayout) anyerror!Pipeline.Handle { +fn init_pipeline(layout: vertex.VertexLayout) !PipelineData { const vs: [:0]align(4) const u8 = &shaders.basic_vert; const fs: [:0]align(4) const u8 = &shaders.basic_frag; @@ -770,16 +770,19 @@ pub fn create_pipeline(layout: Pipeline.VertexLayout) anyerror!Pipeline.Handle { .push_constant_range_count = 1, .p_push_constant_ranges = @ptrCast(&range), }, null); + errdefer context.logical_device.destroyPipelineLayout(pl, null); const vert = try context.logical_device.createShaderModule(&.{ .code_size = vs.len, .p_code = @ptrCast(@alignCast(vs.ptr)), }, null); + defer context.logical_device.destroyShaderModule(vert, null); const frag = try context.logical_device.createShaderModule(&.{ .code_size = fs.len, .p_code = @ptrCast(@alignCast(fs.ptr)), }, null); + defer context.logical_device.destroyShaderModule(frag, null); const pipeline_shade_stage_create_info = [_]vk.PipelineShaderStageCreateInfo{ .{ @@ -956,38 +959,24 @@ pub fn create_pipeline(layout: Pipeline.VertexLayout) anyerror!Pipeline.Handle { return error.PipelineCreationFailed; } - const p_handle = pipelines.add_element(.{ + return .{ .layout = pl, - .vert_layout = layout, .pipeline = pipeline, - }) orelse return error.OutOfPipelines; - - return @intCast(p_handle); -} - -pub fn destroy_pipeline(handle: Pipeline.Handle) void { - context.logical_device.deviceWaitIdle() catch {}; - const pd = pipelines.get_element(handle) orelse return; - - context.logical_device.destroyPipeline(pd.pipeline, null); - context.logical_device.destroyPipelineLayout(pd.layout, null); - - _ = pipelines.remove_element(handle); + }; } -pub fn bind_pipeline(handle: Pipeline.Handle) void { - const pd = pipelines.get_element(handle) orelse return; - - command_buffer.bindPipeline(.graphics, pd.pipeline); - // Descriptor sets are (re)bound per draw in draw_mesh because the - // dynamic UBO offset can change as the user swaps projection/view - // matrices mid-frame. +fn deinit_pipeline(pd: *PipelineData) void { + if (pd.pipeline != .null_handle) { + context.logical_device.destroyPipeline(pd.pipeline, null); + } + if (pd.layout != .null_handle) { + context.logical_device.destroyPipelineLayout(pd.layout, null); + } + pd.* = .{}; } -pub fn create_mesh(pipeline: Pipeline.Handle) anyerror!Mesh.Handle { - const m_handle = meshes.add_element(.{ - .pipeline = pipeline, - }) orelse return error.OutOfMeshes; +pub fn create_mesh() anyerror!Mesh.Handle { + const m_handle = meshes.add_element(.{}) orelse return error.OutOfMeshes; return @intCast(m_handle); } @@ -1062,9 +1051,11 @@ pub fn draw_mesh(handle: Mesh.Handle, model: *const Mat4, count: usize) void { draw_state.mat = model.*; const m_data = meshes.get_element(handle) orelse return; - const p_data = pipelines.get_element(m_data.pipeline) orelse return; + if (render_pipeline.pipeline == .null_handle) return; + const p_data = &render_pipeline; flush_camera_if_dirty(); + command_buffer.bindPipeline(.graphics, p_data.pipeline); const sets = [_]vk.DescriptorSet{ descriptor_sets[swapchain.image_index], diff --git a/src/platform/headless/headless_gfx.zig b/src/platform/headless/headless_gfx.zig index d1dcf56..f976bf0 100644 --- a/src/platform/headless/headless_gfx.zig +++ b/src/platform/headless/headless_gfx.zig @@ -1,7 +1,6 @@ const std = @import("std"); const Mat4 = @import("../../math/math.zig").Mat4; const Rendering = @import("../../rendering/rendering.zig"); -const Pipeline = Rendering.Pipeline; const Mesh = Rendering.mesh; const Texture = Rendering.Texture; @@ -34,14 +33,7 @@ pub fn end_frame() void {} pub fn clear_depth() void {} pub fn set_vsync(_: bool) void {} -pub fn create_pipeline(_: Pipeline.VertexLayout) anyerror!Pipeline.Handle { - return 0; -} - -pub fn destroy_pipeline(_: Pipeline.Handle) void {} -pub fn bind_pipeline(_: Pipeline.Handle) void {} - -pub fn create_mesh(_: Pipeline.Handle) anyerror!Mesh.Handle { +pub fn create_mesh() anyerror!Mesh.Handle { return 0; } diff --git a/src/platform/psp/psp_gfx_ge.zig b/src/platform/psp/psp_gfx_ge.zig index b26888c..784b055 100644 --- a/src/platform/psp/psp_gfx_ge.zig +++ b/src/platform/psp/psp_gfx_ge.zig @@ -9,7 +9,7 @@ const std = @import("std"); const Util = @import("../../util/util.zig"); const Mat4 = @import("../../math/math.zig").Mat4; const Rendering = @import("../../rendering/rendering.zig"); -const Pipeline = Rendering.Pipeline; +const vertex = Rendering.vertex; const Mesh = Rendering.mesh; const Texture = Rendering.Texture; @@ -113,7 +113,7 @@ const frame_bpp: u32 = switch (options.config.psp_display_mode) { const VertexType = sdk.VertexType; -// ---- pipeline cache -------------------------------------------------------- +// ---- vertex pipeline ------------------------------------------------------- const PipelineData = struct { vertex_type: VertexType, @@ -124,8 +124,7 @@ const PipelineData = struct { uv_unorm8: bool, }; -var pipelines = Util.CircularBuffer(PipelineData, 16).init(); -var bound_pipeline: Pipeline.Handle = 0; +var render_pipeline: PipelineData = undefined; var alpha_blend_enabled: bool = true; var clip_planes_enabled: bool = false; var fog_enabled: bool = false; @@ -545,6 +544,7 @@ fn emit_depth_range(near_value: u16, far_value: u16) void { // ---- engine state ---------------------------------------------------------- pub fn init() anyerror!void { + render_pipeline = init_pipeline(vertex.Layout); clear_color = 0x000000; swapchain.init(); @@ -855,9 +855,9 @@ pub fn set_vsync(v: bool) void { gfx.surface.sync = v; } -// ---- pipelines ------------------------------------------------------------- +// ---- meshes --------------------------------------------------------------- -pub fn create_pipeline(layout: Pipeline.VertexLayout) anyerror!Pipeline.Handle { +fn init_pipeline(layout: vertex.VertexLayout) PipelineData { var vtype = VertexType{ .vertex = .Vertex32Bitf, // default, overridden by position attribute .transform = .Transform3D, @@ -895,36 +895,22 @@ pub fn create_pipeline(layout: Pipeline.VertexLayout) anyerror!Pipeline.Handle { } } - const handle = pipelines.add_element(.{ + return .{ .vertex_type = vtype, .stride = layout.stride, .uv_unorm8 = uv_unorm8, - }) orelse return error.OutOfPipelines; - - return @intCast(handle); -} - -pub fn destroy_pipeline(handle: Pipeline.Handle) void { - _ = pipelines.remove_element(handle); -} - -pub fn bind_pipeline(handle: Pipeline.Handle) void { - bound_pipeline = handle; + }; } -// ---- meshes --------------------------------------------------------------- - const MeshData = struct { - pipeline: Pipeline.Handle, data: ?[*]const u8, len: usize, }; var meshes = Util.CircularBuffer(MeshData, 2048).init(); -pub fn create_mesh(pipeline: Pipeline.Handle) anyerror!Mesh.Handle { +pub fn create_mesh() anyerror!Mesh.Handle { const handle = meshes.add_element(.{ - .pipeline = pipeline, .data = null, .len = 0, }) orelse return error.OutOfMeshes; @@ -948,7 +934,7 @@ pub fn update_mesh(handle: Mesh.Handle, data: []const u8) void { pub fn draw_mesh(handle: Mesh.Handle, model: *const Mat4, count: usize) void { const mesh = meshes.get_element(handle) orelse return; - const pl = pipelines.get_element(mesh.pipeline) orelse return; + const pl = &render_pipeline; const data = mesh.data orelse return; must(cmd.world_matrix(mat4_as_floats(model))); diff --git a/src/platform/switch/switch_gfx.zig b/src/platform/switch/switch_gfx.zig index a8ba78e..c6c2341 100644 --- a/src/platform/switch/switch_gfx.zig +++ b/src/platform/switch/switch_gfx.zig @@ -8,7 +8,7 @@ const std = @import("std"); const Util = @import("../../util/util.zig"); const Mat4 = @import("../../math/math.zig").Mat4; const Rendering = @import("../../rendering/rendering.zig"); -const Pipeline = Rendering.Pipeline; +const vertex = Rendering.vertex; const Mesh = Rendering.mesh; const Texture = Rendering.Texture; const gfx = @import("../gfx.zig"); @@ -267,7 +267,6 @@ const PipelineData = struct { }; const MeshData = struct { - pipeline: Pipeline.Handle, mem_block: DkMemBlock = null, gpu_addr: DkGpuAddr = 0, capacity: u32 = 0, @@ -287,10 +286,10 @@ var command_buffer: DkCmdBuf = null; var code_mem: DkMemBlock = null; var code_offset: u32 = 0; -var pipelines = Util.CircularBuffer(PipelineData, 16).init(); var meshes = Util.CircularBuffer(MeshData, 8192).init(); +var render_pipeline: PipelineData = undefined; -var current_pipeline: Pipeline.Handle = 0; +var render_pipeline_initialized = false; var current_slot: c_int = -1; var initialized: bool = false; var clear_color: [4]f32 = .{ 0.0, 0.0, 0.0, 1.0 }; @@ -325,6 +324,9 @@ pub fn init() anyerror!void { try create_code_memory(); errdefer destroy_code_memory(); + render_pipeline = try init_pipeline(vertex.Layout); + render_pipeline_initialized = true; + try create_command_buffer(); errdefer destroy_command_buffer(); @@ -347,8 +349,7 @@ pub fn deinit() void { if (render_queue) |_| dkQueueWaitIdle(render_queue); destroy_all_meshes(); - pipelines.clear(); - current_pipeline = 0; + render_pipeline_initialized = false; if (render_queue) |_| { dkQueueDestroy(render_queue); @@ -440,7 +441,7 @@ pub fn set_vsync(v: bool) void { if (swapchain) |_| dkSwapchainSetSwapInterval(swapchain, @intFromBool(v)); } -pub fn create_pipeline(layout: Pipeline.VertexLayout) anyerror!Pipeline.Handle { +fn init_pipeline(layout: vertex.VertexLayout) !PipelineData { const vertex_code: [:0]align(4) const u8 = &shader_data.basic_vert; const fragment_code: [:0]align(4) const u8 = &shader_data.basic_frag; @@ -457,22 +458,11 @@ pub fn create_pipeline(layout: Pipeline.VertexLayout) anyerror!Pipeline.Handle { try load_shader(&data.vertex_shader, vertex_code); try load_shader(&data.fragment_shader, fragment_code); - const pipeline = pipelines.add_element(data) orelse return error.OutOfPipelines; - return @intCast(pipeline); -} - -pub fn destroy_pipeline(pipeline: Pipeline.Handle) void { - _ = pipelines.remove_element(pipeline); - if (current_pipeline == pipeline) current_pipeline = 0; -} - -pub fn bind_pipeline(pipeline: Pipeline.Handle) void { - current_pipeline = pipeline; + return data; } -pub fn create_mesh(pipeline: Pipeline.Handle) anyerror!Mesh.Handle { - _ = pipelines.get_element(pipeline) orelse return error.InvalidPipeline; - const mesh = meshes.add_element(.{ .pipeline = pipeline }) orelse return error.OutOfMeshes; +pub fn create_mesh() anyerror!Mesh.Handle { + const mesh = meshes.add_element(.{}) orelse return error.OutOfMeshes; return @intCast(mesh); } @@ -517,12 +507,11 @@ pub fn update_mesh(handle: Mesh.Handle, data: []const u8) void { } pub fn draw_mesh(handle: Mesh.Handle, _: *const Mat4, count: usize) void { - if (!initialized or command_buffer == null) return; + if (!initialized or !render_pipeline_initialized or command_buffer == null) return; const mesh = meshes.get_element(handle) orelse return; if (mesh.mem_block == null or mesh.size == 0 or count == 0) return; - const pipeline_handle = if (current_pipeline != 0) current_pipeline else mesh.pipeline; - const pl = pipelines.get_element(pipeline_handle) orelse return; + const pl = &render_pipeline; const shaders = [_]*const DkShader{ &pl.vertex_shader, &pl.fragment_shader }; dkCmdBufBindShaders(command_buffer, DK_STAGE_GRAPHICS_MASK, shaders[0..].ptr, shaders.len); @@ -684,7 +673,7 @@ fn load_shader(shader: *DkShader, code: []const u8) !void { code_offset = end; } -fn init_layout(data: *PipelineData, layout: Pipeline.VertexLayout) !void { +fn init_layout(data: *PipelineData, layout: vertex.VertexLayout) !void { var max_location: u32 = 0; var max_binding: u32 = 0; @@ -707,7 +696,7 @@ fn init_layout(data: *PipelineData, layout: Pipeline.VertexLayout) !void { data.vtx_buffer_count = @max(max_binding, 1); } -fn vtxAttrib(attr: Pipeline.Attribute) DkVtxAttribState { +fn vtxAttrib(attr: vertex.Attribute) DkVtxAttribState { const Format = struct { size: u32, kind: u32, diff --git a/src/rendering/Vertex.zig b/src/rendering/Vertex.zig new file mode 100644 index 0000000..3555547 --- /dev/null +++ b/src/rendering/Vertex.zig @@ -0,0 +1,127 @@ +const options = @import("options"); + +pub const Vertex = if (options.config.platform == .nintendo_3ds) Vertex3DS else VertexBaseline; + +const Vertex3DS = extern struct { + pos: [3]i16, + uv: [2]i16, + color: u32 align(2), +}; + +const VertexBaseline = extern struct { + uv: [2]i16, + color: u32, + pos: [3]i16, + _pad: i16 = 0, +}; + +pub const AttributeUsage = enum { + position, + uv, + color, + normal, +}; + +pub const AttributeFormat = enum(u8) { + f32x2, + f32x3, + unorm8x2, + unorm8x4, + unorm16x2, + unorm16x3, + snorm16x2, + snorm16x3, + + fn infer(comptime T: type) AttributeFormat { + return switch (T) { + [2]f32 => .f32x2, + [3]f32 => .f32x3, + [2]u8 => .unorm8x2, + [4]u8, u32 => .unorm8x4, + [2]u16 => .unorm16x2, + [3]u16 => .unorm16x3, + [2]i16 => .snorm16x2, + [3]i16 => .snorm16x3, + else => @compileError("Unsupported vertex attribute field type: " ++ @typeName(T)), + }; + } + + pub fn count(self: AttributeFormat) usize { + return switch (self) { + .f32x2, .unorm8x2, .unorm16x2, .snorm16x2 => 2, + .f32x3, .unorm16x3, .snorm16x3 => 3, + .unorm8x4 => 4, + }; + } +}; + +pub const Attribute = struct { + location: u8, + binding: u8 = 0, + offset: usize, + size: usize, + format: AttributeFormat, + usage: AttributeUsage, +}; + +pub const VertexLayout = struct { + stride: usize, + attributes: []const Attribute, +}; + +pub const Attributes = attributesFromVertex(Vertex); +pub const Layout = VertexLayout{ + .stride = @sizeOf(Vertex), + .attributes = &Attributes, +}; + +fn attributesFromVertex(comptime V: type) [3]Attribute { + return .{ + makeAttribute(V, "pos", 0, .position, 3), + makeAttribute(V, "color", 1, .color, 4), + makeAttribute(V, "uv", 2, .uv, 2), + }; +} + +fn makeAttribute( + comptime V: type, + comptime field_name: []const u8, + comptime location: u8, + comptime usage: AttributeUsage, + comptime expected_count: usize, +) Attribute { + if (!hasField(V, field_name)) { + @compileError("Rendering.Vertex is missing required field '" ++ field_name ++ "'"); + } + + const format = AttributeFormat.infer(@FieldType(V, field_name)); + if (format.count() != expected_count) { + @compileError("Rendering.Vertex field '" ++ field_name ++ "' has the wrong component count"); + } + + return .{ + .location = location, + .offset = @offsetOf(V, field_name), + .size = format.count(), + .format = format, + .usage = usage, + }; +} + +fn hasField(comptime T: type, comptime field_name: []const u8) bool { + const info = @typeInfo(T); + if (info != .@"struct") return false; + + inline for (info.@"struct".fields) |field| { + if (comptime eql(field.name, field_name)) return true; + } + return false; +} + +fn eql(comptime a: []const u8, comptime b: []const u8) bool { + if (a.len != b.len) return false; + inline for (a, 0..) |ch, i| { + if (ch != b[i]) return false; + } + return true; +} diff --git a/src/rendering/mesh.zig b/src/rendering/mesh.zig index 3bad056..d8b60cf 100644 --- a/src/rendering/mesh.zig +++ b/src/rendering/mesh.zig @@ -1,6 +1,5 @@ const std = @import("std"); const Mat4 = @import("../math/math.zig").Mat4; -const Pipeline = @import("pipeline.zig"); const Util = @import("../util/util.zig"); const Platform = @import("../platform/platform.zig"); const gfx = Platform.gfx; @@ -22,9 +21,9 @@ pub fn Mesh(comptime V: type) type { handle: Handle, vertices: std.ArrayList(Vertex), - pub fn new(alloc: std.mem.Allocator, pipeline: Pipeline.Handle) !Self { + pub fn new(alloc: std.mem.Allocator) !Self { return .{ - .handle = try gfx.api.create_mesh(pipeline), + .handle = try gfx.api.create_mesh(), .vertices = try std.ArrayList(V).initCapacity(alloc, 32), }; } diff --git a/src/rendering/pipeline.zig b/src/rendering/pipeline.zig deleted file mode 100644 index 49f2e3b..0000000 --- a/src/rendering/pipeline.zig +++ /dev/null @@ -1,101 +0,0 @@ -const Platform = @import("../platform/platform.zig"); -const gfx = Platform.gfx; - -pub const Handle = u32; - -pub const AttributeUsage = enum { - position, - uv, - color, - normal, -}; - -pub const AttributeFormat = enum(u8) { - f32x2, - f32x3, - unorm8x2, - unorm8x4, - unorm16x2, - unorm16x3, - snorm16x2, - snorm16x3, - - fn infer(comptime T: type) AttributeFormat { - return switch (T) { - [2]f32 => .f32x2, - [3]f32 => .f32x3, - [2]u8 => .unorm8x2, - [4]u8, u32 => .unorm8x4, - [2]u16 => .unorm16x2, - [3]u16 => .unorm16x3, - [2]i16 => .snorm16x2, - [3]i16 => .snorm16x3, - else => @compileError("Unsupported attribute field type"), - }; - } - - pub fn count(self: AttributeFormat) usize { - return switch (self) { - .f32x2, .unorm8x2, .unorm16x2, .snorm16x2 => 2, - .f32x3, .unorm16x3, .snorm16x3 => 3, - .unorm8x4 => 4, - }; - } -}; - -pub const Attribute = struct { - location: u8, - binding: u8 = 0, - offset: usize, - size: usize, - format: AttributeFormat, - usage: AttributeUsage, -}; - -pub const VertexLayout = struct { - stride: usize, - attributes: []const Attribute, -}; - -pub const AttributeSpec = struct { - field: []const u8, - location: u8, - binding: u8 = 0, - usage: AttributeUsage, -}; - -pub fn attributes_from_struct(comptime V: type, comptime specs: []const AttributeSpec) [specs.len]Attribute { - comptime var attrs: [specs.len]Attribute = undefined; - - inline for (specs, 0..) |s, i| { - const format = AttributeFormat.infer(@FieldType(V, s.field)); - attrs[i] = .{ - .location = s.location, - .binding = s.binding, - .size = format.count(), - .offset = @offsetOf(V, s.field), - .format = format, - .usage = s.usage, - }; - } - - return attrs; -} - -pub fn layout_from_struct(comptime V: type, comptime attrs: []const Attribute) VertexLayout { - return .{ .stride = @sizeOf(V), .attributes = attrs }; -} - -handle: Handle, - -pub fn new(layout: VertexLayout) !Handle { - return gfx.api.create_pipeline(layout); -} - -pub fn deinit(handle: Handle) void { - gfx.api.destroy_pipeline(handle); -} - -pub fn bind(handle: Handle) void { - gfx.api.bind_pipeline(handle); -} diff --git a/src/rendering/rendering.zig b/src/rendering/rendering.zig index e173d0e..319f58e 100644 --- a/src/rendering/rendering.zig +++ b/src/rendering/rendering.zig @@ -1,8 +1,9 @@ pub const mesh = @import("mesh.zig"); pub const Mesh = mesh.Mesh; +pub const vertex = @import("Vertex.zig"); +pub const Vertex = vertex.Vertex; pub const Transform = @import("transform.zig"); pub const Camera = @import("camera.zig"); pub const Texture = @import("texture.zig"); -pub const Pipeline = @import("pipeline.zig"); pub const gfx = @import("../platform/platform.zig").gfx; diff --git a/test/main.zig b/test/main.zig index 1bdf3b5..178e70e 100644 --- a/test/main.zig +++ b/test/main.zig @@ -48,20 +48,7 @@ fn psp_cwd() std.Io.Dir { return .{ .handle = -1 }; } -const Vertex = extern struct { - uv: [2]i16, - color: u32, - pos: [3]i16, - _pad: i16 = 0, - - pub const Attributes = Rendering.Pipeline.attributes_from_struct(@This(), &[_]Rendering.Pipeline.AttributeSpec{ - .{ .field = "pos", .location = 0, .usage = .position }, - .{ .field = "color", .location = 1, .usage = .color }, - .{ .field = "uv", .location = 2, .usage = .uv }, - }); - pub const Layout = Rendering.Pipeline.layout_from_struct(@This(), &Attributes); -}; - +const Vertex = Rendering.Vertex; const MyMesh = Rendering.Mesh(Vertex); const BATCH_A_TRIANGLES = 61; @@ -294,12 +281,11 @@ const MyState = struct { fn init(ctx: *anyopaque, engine: *ae.Engine) anyerror!void { var self = ae.ctx_to_self(MyState, ctx); - pipeline = try Rendering.Pipeline.new(Vertex.Layout); const render = engine.allocator(.render); - self.batch_a = try MyMesh.new(render, pipeline); - self.batch_b = try MyMesh.new(render, pipeline); + self.batch_a = try MyMesh.new(render); + self.batch_b = try MyMesh.new(render); self.batch_a_transform = Rendering.Transform.new(); self.batch_b_transform = Rendering.Transform.new(); @@ -341,7 +327,6 @@ const MyState = struct { self.texture.deinit(render); self.batch_b.deinit(render); self.batch_a.deinit(render); - Rendering.Pipeline.deinit(pipeline); } fn tick(ctx: *anyopaque, _: *ae.Engine) anyerror!void { @@ -400,7 +385,6 @@ const MyState = struct { 1, )); - Rendering.Pipeline.bind(pipeline); Rendering.gfx.api.set_depth_write(false); self.texture.bind(); self.batch_b.draw(&self.batch_b_transform.get_matrix()); @@ -419,8 +403,6 @@ const MyState = struct { } }; -var pipeline: Rendering.Pipeline.Handle = undefined; - pub fn main(init: std.process.Init) !void { const mib = 1024 * 1024; const memory_config: ae.Util.MemoryConfig = .{ From c854237db05984d28bcd924c96100e2c190894da Mon Sep 17 00:00:00 2001 From: Nathan Bourgeois Date: Sat, 6 Jun 2026 14:35:27 -0400 Subject: [PATCH 33/44] Cleanup C Libs --- build.zig | 28 ++-- src/platform/3ds/paths.zig | 14 +- src/platform/c_io.zig | 83 +++++------ src/platform/c_process_init.zig | 17 ++- src/platform/nintendo_c.zig | 41 ++++++ src/platform/switch/aether_switch_import.h | 118 +++++++++++++++ src/platform/switch/input.zig | 160 +++++++-------------- src/platform/switch/paths.zig | 14 +- src/platform/switch/surface.zig | 5 +- src/platform/switch/switch_audio.zig | 50 +++---- src/platform/switch/time.zig | 8 +- 11 files changed, 303 insertions(+), 235 deletions(-) create mode 100644 src/platform/nintendo_c.zig create mode 100644 src/platform/switch/aether_switch_import.h diff --git a/build.zig b/build.zig index 6f7f8cb..1be496e 100644 --- a/build.zig +++ b/build.zig @@ -187,12 +187,22 @@ fn devkitProPath(b: *std.Build) []const u8 { return p; } -fn add3dsCImportPaths(mod: *std.Build.Module, dkp: []const u8) void { +fn addNintendoCImportPaths(owner: *std.Build, mod: *std.Build.Module, config: Config, dkp: []const u8) void { const b = mod.owner; - // Keep newlib before libctru so libctru's include_next sys wrappers - // resolve during Zig's C translation of Citro3D/libctru headers. - mod.addIncludePath(.{ .cwd_relative = b.pathJoin(&.{ dkp, "devkitARM/arm-none-eabi/include" }) }); - mod.addIncludePath(.{ .cwd_relative = b.pathJoin(&.{ dkp, "libctru/include" }) }); + mod.addIncludePath(owner.path("src/platform")); + switch (config.platform) { + .nintendo_3ds => { + // Keep newlib before libctru so libctru's include_next sys wrappers + // resolve during Zig's C translation of Citro3D/libctru headers. + mod.addIncludePath(.{ .cwd_relative = b.pathJoin(&.{ dkp, "devkitARM/arm-none-eabi/include" }) }); + mod.addIncludePath(.{ .cwd_relative = b.pathJoin(&.{ dkp, "libctru/include" }) }); + }, + .nintendo_switch => { + mod.addIncludePath(.{ .cwd_relative = b.pathJoin(&.{ dkp, "devkitA64/aarch64-none-elf/include" }) }); + mod.addIncludePath(.{ .cwd_relative = b.pathJoin(&.{ dkp, "libnx/include" }) }); + }, + else => {}, + } } /// Creates a `3dslink` command for pushing an installed `.3dsx` to a @@ -344,8 +354,8 @@ pub fn addGame(owner: *std.Build, b: *std.Build, opts: GameOptions) *std.Build.S } } - if (config.platform == .nintendo_3ds) { - add3dsCImportPaths(mod, devkitProPath(b)); + if (uses_nintendo_c_io) { + addNintendoCImportPaths(owner, mod, config, devkitProPath(b)); } addInternalShaderModule(owner, b, mod, config); @@ -457,8 +467,8 @@ pub fn addHeadless(owner: *std.Build, b: *std.Build, opts: HeadlessOptions) *std mod.addImport("pspsdk", pd.module("pspsdk")); } - if (config.platform == .nintendo_3ds) { - add3dsCImportPaths(mod, devkitProPath(b)); + if (uses_nintendo_c_io) { + addNintendoCImportPaths(owner, mod, config, devkitProPath(b)); } const user_mod = b.createModule(.{ diff --git a/src/platform/3ds/paths.zig b/src/platform/3ds/paths.zig index 38d1cb2..f5bbc68 100644 --- a/src/platform/3ds/paths.zig +++ b/src/platform/3ds/paths.zig @@ -1,24 +1,20 @@ const std = @import("std"); - -extern fn archiveMountSdmc() u32; -extern fn archiveUnmount(name: [*:0]const u8) u32; -extern fn romfsMountSelf(name: [*:0]const u8) u32; -extern fn romfsUnmount(name: [*:0]const u8) u32; +const c = @import("../nintendo_c.zig").c; pub fn mountData() bool { - return archiveMountSdmc() == 0; + return c.archiveMountSdmc() == 0; } pub fn unmountData() void { - _ = archiveUnmount("sdmc"); + _ = c.archiveUnmount("sdmc"); } pub fn mountResources() bool { - return romfsMountSelf("romfs") == 0; + return c.romfsMountSelf("romfs") == 0; } pub fn unmountResources() void { - _ = romfsUnmount("romfs"); + _ = c.romfsUnmount("romfs"); } pub fn dataRoot(buffer: []u8, app_name: []const u8) error{NameTooLong}![]const u8 { diff --git a/src/platform/c_io.zig b/src/platform/c_io.zig index f31ce75..64a8ffd 100644 --- a/src/platform/c_io.zig +++ b/src/platform/c_io.zig @@ -16,44 +16,31 @@ const platform_paths = switch (options.config.platform) { else => unreachable, }; -const c = std.c; - -const CDirent = extern struct { - d_ino: c_int, - d_type: u8, - d_name: [256:0]u8, -}; - -const devkit = struct { - extern "c" fn open(path: [*:0]const u8, flags: c_int, ...) c_int; - extern "c" fn mkdir(path: [*:0]const u8, mode: c_int) c_int; - extern "c" fn readdir(dirp: *c.DIR) ?*CDirent; - extern "c" fn __errno() *c_int; -}; +const c = @import("nintendo_c.zig").c; const max_path_bytes = 1024; const AT_FDCWD: c_int = -2; -const O_RDONLY: c_int = 0; -const O_WRONLY: c_int = 1; -const O_RDWR: c_int = 2; -const O_CREAT: c_int = 0x0200; -const O_TRUNC: c_int = 0x0400; -const O_EXCL: c_int = 0x0800; -const O_BINARY: c_int = 0x10000; -const O_CLOEXEC: c_int = 0x40000; -const O_NOFOLLOW: c_int = 0x100000; -const SEEK_SET: c_int = 0; -const SEEK_CUR: c_int = 1; -const SEEK_END: c_int = 2; - -const DT_FIFO: u8 = 1; -const DT_CHR: u8 = 2; -const DT_DIR: u8 = 4; -const DT_BLK: u8 = 6; -const DT_REG: u8 = 8; -const DT_LNK: u8 = 10; -const DT_SOCK: u8 = 12; -const DT_WHT: u8 = 14; +const O_RDONLY: c_int = c.O_RDONLY; +const O_WRONLY: c_int = c.O_WRONLY; +const O_RDWR: c_int = c.O_RDWR; +const O_CREAT: c_int = c.O_CREAT; +const O_TRUNC: c_int = c.O_TRUNC; +const O_EXCL: c_int = c.O_EXCL; +const O_BINARY: c_int = c.O_BINARY; +const O_CLOEXEC: c_int = c.O_CLOEXEC; +const O_NOFOLLOW: c_int = c.O_NOFOLLOW; +const SEEK_SET: c_int = c.SEEK_SET; +const SEEK_CUR: c_int = c.SEEK_CUR; +const SEEK_END: c_int = c.SEEK_END; + +const DT_FIFO: u8 = c.DT_FIFO; +const DT_CHR: u8 = c.DT_CHR; +const DT_DIR: u8 = c.DT_DIR; +const DT_BLK: u8 = c.DT_BLK; +const DT_REG: u8 = c.DT_REG; +const DT_LNK: u8 = c.DT_LNK; +const DT_SOCK: u8 = c.DT_SOCK; +const DT_WHT: u8 = c.DT_WHT; var read_fd: c_int = -1; var write_fd: c_int = -1; @@ -185,7 +172,7 @@ fn dirCreateDir( var path_buffer: [max_path_bytes:0]u8 = undefined; const path = try rootedPathForDir(&path_buffer, dir, sub_path); const mode = permissionsMode(permissions, 0o777); - if (devkit.mkdir(path.ptr, mode) == 0) return; + if (c.mkdir(path.ptr, mode) == 0) return; return createDirError(errno()); } @@ -225,7 +212,7 @@ fn dirAccess(_: ?*anyopaque, dir: Dir, sub_path: []const u8, opts: Dir.AccessOpt var path_buffer: [max_path_bytes:0]u8 = undefined; const path = try rootedPathForDir(&path_buffer, dir, sub_path); - const fd = devkit.open(path.ptr, O_BINARY | O_RDONLY | O_CLOEXEC, @as(c_int, 0)); + const fd = c.open(path.ptr, O_BINARY | O_RDONLY | O_CLOEXEC, @as(c.mode_t, 0)); if (fd < 0) return accessError(errno()); _ = c.close(fd); } @@ -250,7 +237,7 @@ fn dirOpenFile(_: ?*anyopaque, dir: Dir, sub_path: []const u8, flags: Dir.OpenFi if (!flags.follow_symlinks) open_flags |= O_NOFOLLOW; const path = try rootedPathForDir(&path_buffer, dir, sub_path); - const fd = devkit.open(path.ptr, open_flags, @as(c_int, 0)); + const fd = c.open(path.ptr, open_flags, @as(c.mode_t, 0)); if (fd < 0) return openError(errno()); errdefer _ = c.close(fd); return registerFile(fd, role); @@ -267,7 +254,7 @@ fn dirCreateFile(_: ?*anyopaque, dir: Dir, sub_path: []const u8, flags: Dir.Crea const mode = permissionsMode(flags.permissions, 0o666); const path = try rootedPathForDir(&path_buffer, dir, sub_path); - const fd = devkit.open(path.ptr, open_flags, mode); + const fd = c.open(path.ptr, open_flags, mode); if (fd < 0) return openError(errno()); errdefer _ = c.close(fd); return registerFile(fd, if (flags.read) .read_write else .write); @@ -366,15 +353,15 @@ fn dirRead(_: ?*anyopaque, reader: *Dir.Reader, out: []Dir.Entry) Dir.Reader.Err var count: usize = 0; var name_end = reader.buffer.len; while (count < out.len) { - devkit.__errno().* = 0; - const entry = devkit.readdir(stream) orelse { + c.__errno().* = 0; + const entry = c.readdir(stream) orelse { if (errno() != 0) return dirReadError(errno()); reader.state = .finished; return count; }; header.pos = c.telldir(stream); - const name = std.mem.span(@as([*:0]const u8, @ptrCast(&entry.d_name))); + const name = std.mem.span(@as([*:0]const u8, @ptrCast(&entry.*.d_name))); if (std.mem.eql(u8, name, ".") or std.mem.eql(u8, name, "..")) continue; if (name.len + 1 > name_end - header_end) { if (count == 0) return error.Unexpected; @@ -386,8 +373,8 @@ fn dirRead(_: ?*anyopaque, reader: *Dir.Reader, out: []Dir.Entry) Dir.Reader.Err reader.buffer[name_end + name.len] = 0; out[count] = .{ .name = reader.buffer[name_end .. name_end + name.len], - .kind = direntKind(entry.d_type), - .inode = @intCast(entry.d_ino), + .kind = direntKind(entry.*.d_type), + .inode = @intCast(entry.*.d_ino), }; count += 1; } @@ -667,8 +654,8 @@ fn createDirPathAt(dir: Dir, sub_path: []const u8, permissions: Dir.Permissions) return status; } -fn createSingleDirPath(path: [*:0]const u8, mode: c_int) Dir.CreateDirPathError!Dir.CreatePathStatus { - if (devkit.mkdir(path, mode) == 0) return .created; +fn createSingleDirPath(path: [*:0]const u8, mode: c.mode_t) Dir.CreateDirPathError!Dir.CreatePathStatus { + if (c.mkdir(path, mode) == 0) return .created; switch (errno()) { 17 => return .existed, 1 => return error.PermissionDenied, @@ -740,7 +727,7 @@ fn fdFromDirHandle(dir: Dir) c_int { return @intCast(dir.handle); } -fn permissionsMode(permissions: File.Permissions, default: c_int) c_int { +fn permissionsMode(permissions: File.Permissions, default: c.mode_t) c.mode_t { if (@bitSizeOf(File.Permissions) == 0) return default; return @intCast(@intFromEnum(permissions)); } @@ -877,7 +864,7 @@ fn createFileAtomicDirError(err: anyerror) Dir.CreateFileAtomicError { } fn errno() c_int { - return devkit.__errno().*; + return c.__errno().*; } fn createDirError(code: c_int) Dir.CreateDirError { diff --git a/src/platform/c_process_init.zig b/src/platform/c_process_init.zig index 48343dd..9c1378f 100644 --- a/src/platform/c_process_init.zig +++ b/src/platform/c_process_init.zig @@ -1,24 +1,23 @@ const std = @import("std"); const c_io = @import("c_io.zig"); +const c = @import("nintendo_c.zig").c; const options = @import("options"); const ProcessHeap = if (options.config.platform == .nintendo_3ds) struct { - extern fn linearMemAlign(size: usize, alignment: usize) ?*anyopaque; - extern fn linearFree(ptr: ?*anyopaque) void; - fn alloc(alignment: usize, size: usize) ?*anyopaque { - return linearMemAlign(size, alignment); + return c.linearMemAlign(size, alignment); } fn free(ptr: ?*anyopaque) void { - linearFree(ptr); + c.linearFree(ptr); } } else struct { - extern fn memalign(alignment: usize, size: usize) ?*anyopaque; - extern fn free(ptr: ?*anyopaque) void; - fn alloc(alignment: usize, size: usize) ?*anyopaque { - return memalign(alignment, size); + return c.memalign(alignment, size); + } + + fn free(ptr: ?*anyopaque) void { + c.free(ptr); } }; diff --git a/src/platform/nintendo_c.zig b/src/platform/nintendo_c.zig new file mode 100644 index 0000000..e5efa1d --- /dev/null +++ b/src/platform/nintendo_c.zig @@ -0,0 +1,41 @@ +const options = @import("options"); + +pub const c = @cImport({ + @cUndef("_GNU_SOURCE"); + @cUndef("_DEFAULT_SOURCE"); + @cDefine("_POSIX_C_SOURCE", "200809L"); + @cDefine("wint_t", "__WINT_TYPE__"); + + switch (options.config.platform) { + .nintendo_3ds => { + @cDefine("__3DS__", "1"); + @cDefine("ARM11", "1"); + }, + .nintendo_switch => { + @cDefine("__SWITCH__", "1"); + }, + else => @compileError("platform/nintendo_c.zig is only wired for Nintendo targets"), + } + + @cInclude("errno.h"); + @cInclude("fcntl.h"); + @cInclude("dirent.h"); + @cInclude("sys/stat.h"); + @cInclude("unistd.h"); + @cInclude("malloc.h"); + @cInclude("stdio.h"); + + switch (options.config.platform) { + .nintendo_3ds => { + @cInclude("3ds/types.h"); + @cInclude("3ds/allocator/linear.h"); + @cInclude("3ds/archive.h"); + @cInclude("3ds/romfs.h"); + @cInclude("3ds/svc.h"); + }, + .nintendo_switch => { + @cInclude("switch/aether_switch_import.h"); + }, + else => unreachable, + } +}); diff --git a/src/platform/switch/aether_switch_import.h b/src/platform/switch/aether_switch_import.h new file mode 100644 index 0000000..6e9393d --- /dev/null +++ b/src/platform/switch/aether_switch_import.h @@ -0,0 +1,118 @@ +#pragma once + +#include +#include +#include + +Result fsdevMountSdmc(void); +int fsdevUnmountDevice(const char *name); +Result romfsMountSelf(const char *name); +Result romfsUnmount(const char *name); + +u64 svcGetSystemTick(void); +void svcSleepThread(s64 nano); + +bool appletMainLoop(void); + +typedef struct HidAnalogStickState { + s32 x; + s32 y; +} HidAnalogStickState; + +typedef struct HidTouchState { + u64 delta_time; + u32 attributes; + u32 finger_id; + u32 x; + u32 y; + u32 diameter_x; + u32 diameter_y; + u32 rotation_angle; + u32 reserved; +} HidTouchState; + +typedef struct HidTouchScreenState { + u64 sampling_number; + s32 count; + u32 reserved; + HidTouchState touches[16]; +} HidTouchScreenState; + +typedef struct PadState { + u8 id_mask; + u8 active_id_mask; + bool read_handheld; + bool active_handheld; + u32 style_set; + u32 attributes; + u64 buttons_cur; + u64 buttons_old; + HidAnalogStickState sticks[2]; + u32 gc_triggers[2]; +} PadState; + +#define HidNpadStyleTag_NpadFullKey BIT(0) +#define HidNpadStyleTag_NpadHandheld BIT(1) +#define HidNpadStyleTag_NpadJoyDual BIT(2) +#define HidNpadStyleTag_NpadJoyLeft BIT(3) +#define HidNpadStyleTag_NpadJoyRight BIT(4) + +#define HidNpadIdType_No1 0 +#define HidNpadIdType_Handheld 0x20 + +#define HidNpadButton_A BITL(0) +#define HidNpadButton_B BITL(1) +#define HidNpadButton_X BITL(2) +#define HidNpadButton_Y BITL(3) +#define HidNpadButton_StickL BITL(4) +#define HidNpadButton_StickR BITL(5) +#define HidNpadButton_L BITL(6) +#define HidNpadButton_R BITL(7) +#define HidNpadButton_ZL BITL(8) +#define HidNpadButton_ZR BITL(9) +#define HidNpadButton_Plus BITL(10) +#define HidNpadButton_Minus BITL(11) +#define HidNpadButton_Left BITL(12) +#define HidNpadButton_Up BITL(13) +#define HidNpadButton_Right BITL(14) +#define HidNpadButton_Down BITL(15) +#define HidNpadButton_LeftSL BITL(24) +#define HidNpadButton_LeftSR BITL(25) +#define HidNpadButton_RightSL BITL(26) +#define HidNpadButton_RightSR BITL(27) + +#define JOYSTICK_MAX 0x7FFF + +Result hidInitialize(void); +void hidExit(void); +void hidInitializeTouchScreen(void); +size_t hidGetTouchScreenStates(HidTouchScreenState *states, size_t count); + +void padConfigureInput(u32 max_players, u32 style_set); +void padInitializeWithMask(PadState *pad, u64 mask); +void padUpdate(PadState *pad); + +Result swkbdCreate(void *c, s32 max_dictwords); +void swkbdClose(void *c); +void swkbdConfigMakePresetDefault(void *c); +void swkbdConfigSetOkButtonText(void *c, const char *str); +void swkbdConfigSetHeaderText(void *c, const char *str); +void swkbdConfigSetGuideText(void *c, const char *str); +void swkbdConfigSetInitialText(void *c, const char *str); +Result swkbdShow(void *c, char *out_string, size_t out_string_size); + +typedef struct AudioOutBuffer AudioOutBuffer; +struct AudioOutBuffer { + AudioOutBuffer *next; + void *buffer; + u64 buffer_size; + u64 data_size; + u64 data_offset; +}; + +Result audoutInitialize(void); +void audoutExit(void); +Result audoutStartAudioOut(void); +Result audoutStopAudioOut(void); +Result audoutAppendAudioOutBuffer(AudioOutBuffer *buffer); +Result audoutGetReleasedAudioOutBuffer(AudioOutBuffer **buffer, u32 *released_count); diff --git a/src/platform/switch/input.zig b/src/platform/switch/input.zig index 397d024..0922bb4 100644 --- a/src/platform/switch/input.zig +++ b/src/platform/switch/input.zig @@ -3,104 +3,46 @@ const std = @import("std"); const core = @import("../../core/input/input.zig"); - -const Result = u32; - -const HidAnalogStickState = extern struct { - x: i32, - y: i32, -}; - -const HidTouchState = extern struct { - delta_time: u64, - attributes: u32, - finger_id: u32, - x: u32, - y: u32, - diameter_x: u32, - diameter_y: u32, - rotation_angle: u32, - reserved: u32, -}; - -const HidTouchScreenState = extern struct { - sampling_number: u64, - count: i32, - reserved: u32, - touches: [16]HidTouchState, -}; - -const PadState = extern struct { - id_mask: u8, - active_id_mask: u8, - read_handheld: bool, - active_handheld: bool, - style_set: u32, - attributes: u32, - buttons_cur: u64, - buttons_old: u64, - sticks: [2]HidAnalogStickState, - gc_triggers: [2]u32, -}; - -extern fn hidInitialize() Result; -extern fn hidExit() void; -extern fn hidInitializeTouchScreen() void; -extern fn hidGetTouchScreenStates(states: [*]HidTouchScreenState, count: usize) usize; - -extern fn padConfigureInput(max_players: u32, style_set: u32) void; -extern fn padInitializeWithMask(pad: *PadState, mask: u64) void; -extern fn padUpdate(pad: *PadState) void; - -extern fn swkbdCreate(config: *anyopaque, max_dictwords: i32) Result; -extern fn swkbdClose(config: *anyopaque) void; -extern fn swkbdConfigMakePresetDefault(config: *anyopaque) void; -extern fn swkbdConfigSetOkButtonText(config: *anyopaque, text: [*:0]const u8) void; -extern fn swkbdConfigSetHeaderText(config: *anyopaque, text: [*:0]const u8) void; -extern fn swkbdConfigSetGuideText(config: *anyopaque, text: [*:0]const u8) void; -extern fn swkbdConfigSetInitialText(config: *anyopaque, text: [*:0]const u8) void; -extern fn swkbdShow(config: *anyopaque, out_string: [*]u8, out_string_size: usize) Result; - -const HID_NPAD_STYLE_FULL_KEY: u32 = 1 << 0; -const HID_NPAD_STYLE_HANDHELD: u32 = 1 << 1; -const HID_NPAD_STYLE_JOY_DUAL: u32 = 1 << 2; -const HID_NPAD_STYLE_JOY_LEFT: u32 = 1 << 3; -const HID_NPAD_STYLE_JOY_RIGHT: u32 = 1 << 4; -const HID_NPAD_STYLE_STANDARD: u32 = HID_NPAD_STYLE_FULL_KEY | HID_NPAD_STYLE_HANDHELD | HID_NPAD_STYLE_JOY_DUAL | HID_NPAD_STYLE_JOY_LEFT | HID_NPAD_STYLE_JOY_RIGHT; - -const HID_NPAD_ID_NO1: u64 = 1 << 0; -const HID_NPAD_ID_HANDHELD: u64 = 1 << 32; -const DEFAULT_PAD_MASK: u64 = HID_NPAD_ID_NO1 | HID_NPAD_ID_HANDHELD; - -const BUTTON_A: u64 = 1 << 0; -const BUTTON_B: u64 = 1 << 1; -const BUTTON_X: u64 = 1 << 2; -const BUTTON_Y: u64 = 1 << 3; -const BUTTON_STICK_L: u64 = 1 << 4; -const BUTTON_STICK_R: u64 = 1 << 5; -const BUTTON_L: u64 = 1 << 6; -const BUTTON_R: u64 = 1 << 7; -const BUTTON_ZL: u64 = 1 << 8; -const BUTTON_ZR: u64 = 1 << 9; -const BUTTON_PLUS: u64 = 1 << 10; -const BUTTON_MINUS: u64 = 1 << 11; -const BUTTON_LEFT: u64 = 1 << 12; -const BUTTON_UP: u64 = 1 << 13; -const BUTTON_RIGHT: u64 = 1 << 14; -const BUTTON_DOWN: u64 = 1 << 15; -const BUTTON_LEFT_SL: u64 = 1 << 24; -const BUTTON_LEFT_SR: u64 = 1 << 25; -const BUTTON_RIGHT_SL: u64 = 1 << 26; -const BUTTON_RIGHT_SR: u64 = 1 << 27; - -const JOYSTICK_MAX: f32 = 32767.0; +const c = @import("../nintendo_c.zig").c; + +const HID_NPAD_STYLE_STANDARD: u32 = c.HidNpadStyleTag_NpadFullKey | + c.HidNpadStyleTag_NpadHandheld | + c.HidNpadStyleTag_NpadJoyDual | + c.HidNpadStyleTag_NpadJoyLeft | + c.HidNpadStyleTag_NpadJoyRight; + +const DEFAULT_PAD_MASK: u64 = (@as(u64, 1) << c.HidNpadIdType_No1) | + (@as(u64, 1) << c.HidNpadIdType_Handheld); + +const BUTTON_A: u64 = c.HidNpadButton_A; +const BUTTON_B: u64 = c.HidNpadButton_B; +const BUTTON_X: u64 = c.HidNpadButton_X; +const BUTTON_Y: u64 = c.HidNpadButton_Y; +const BUTTON_STICK_L: u64 = c.HidNpadButton_StickL; +const BUTTON_STICK_R: u64 = c.HidNpadButton_StickR; +const BUTTON_L: u64 = c.HidNpadButton_L; +const BUTTON_R: u64 = c.HidNpadButton_R; +const BUTTON_ZL: u64 = c.HidNpadButton_ZL; +const BUTTON_ZR: u64 = c.HidNpadButton_ZR; +const BUTTON_PLUS: u64 = c.HidNpadButton_Plus; +const BUTTON_MINUS: u64 = c.HidNpadButton_Minus; +const BUTTON_LEFT: u64 = c.HidNpadButton_Left; +const BUTTON_UP: u64 = c.HidNpadButton_Up; +const BUTTON_RIGHT: u64 = c.HidNpadButton_Right; +const BUTTON_DOWN: u64 = c.HidNpadButton_Down; +const BUTTON_LEFT_SL: u64 = c.HidNpadButton_LeftSL; +const BUTTON_LEFT_SR: u64 = c.HidNpadButton_LeftSR; +const BUTTON_RIGHT_SL: u64 = c.HidNpadButton_RightSL; +const BUTTON_RIGHT_SR: u64 = c.HidNpadButton_RightSR; + +const JOYSTICK_MAX: f32 = @floatFromInt(c.JOYSTICK_MAX); const MAX_TEXT_BYTES: usize = 1024; const SWKBD_CONFIG_BYTES: usize = 0x600; const axis_count = @typeInfo(core.Axis).@"enum".fields.len; var initialized: bool = false; -var pad: PadState = undefined; +var pad: c.PadState = undefined; var prev_buttons: u64 = 0; var prev_axes: [axis_count]f32 = @splat(0.0); var prev_touch_down: bool = false; @@ -108,7 +50,7 @@ var prev_touch_pos: core.Vec2 = .{}; pub fn setup(_: std.mem.Allocator, _: std.Io) void { initialized = false; - pad = std.mem.zeroes(PadState); + pad = std.mem.zeroes(c.PadState); prev_buttons = 0; prev_axes = @splat(0.0); prev_touch_down = false; @@ -116,21 +58,21 @@ pub fn setup(_: std.mem.Allocator, _: std.Io) void { } pub fn init() anyerror!void { - if (hidInitialize() != 0) return error.InputInitFailed; - hidInitializeTouchScreen(); - padConfigureInput(1, HID_NPAD_STYLE_STANDARD); - padInitializeWithMask(&pad, DEFAULT_PAD_MASK); + if (c.hidInitialize() != 0) return error.InputInitFailed; + c.hidInitializeTouchScreen(); + c.padConfigureInput(1, HID_NPAD_STYLE_STANDARD); + c.padInitializeWithMask(&pad, DEFAULT_PAD_MASK); initialized = true; } pub fn deinit() void { if (!initialized) return; - hidExit(); + c.hidExit(); initialized = false; } pub fn pump() void { - padUpdate(&pad); + c.padUpdate(&pad); diff_buttons(pad.buttons_cur); pump_axes(pad.buttons_cur); @@ -153,21 +95,21 @@ pub fn begin_text_input_session(target: core.TextInputTarget, options: core.Text var target_buf: [128:0]u8 = @splat(0); const target_text = copy_z(&target_buf, target.id); - if (swkbdCreate(config, 0) != 0) { + if (c.swkbdCreate(config, 0) != 0) { core.write_text_session_buffer(initial_buf[0..initial_len], .cancelled); return; } - defer swkbdClose(config); + defer c.swkbdClose(config); - swkbdConfigMakePresetDefault(config); - swkbdConfigSetOkButtonText(config, "OK"); - swkbdConfigSetHeaderText(config, target_text.ptr); - swkbdConfigSetGuideText(config, target_text.ptr); - swkbdConfigSetInitialText(config, initial.ptr); + c.swkbdConfigMakePresetDefault(config); + c.swkbdConfigSetOkButtonText(config, "OK"); + c.swkbdConfigSetHeaderText(config, target_text.ptr); + c.swkbdConfigSetGuideText(config, target_text.ptr); + c.swkbdConfigSetInitialText(config, initial.ptr); var out_buf: [MAX_TEXT_BYTES:0]u8 = @splat(0); const out_size = output_buffer_size(options.max_bytes); - if (swkbdShow(config, out_buf[0..].ptr, out_size) == 0) { + if (c.swkbdShow(config, out_buf[0..].ptr, out_size) == 0) { const len = bounded_z_len(out_buf[0..out_size]); core.write_text_session_buffer(out_buf[0..len], .submitted); } else { @@ -218,8 +160,8 @@ fn pump_axes(buttons: u64) void { } fn pump_touch() void { - var states: [1]HidTouchScreenState = undefined; - const state_count = hidGetTouchScreenStates(&states, states.len); + var states: [1]c.HidTouchScreenState = undefined; + const state_count = c.hidGetTouchScreenStates(&states, states.len); const touch_down = state_count > 0 and states[0].count > 0; if (touch_down) { diff --git a/src/platform/switch/paths.zig b/src/platform/switch/paths.zig index 77b14fe..2267f6d 100644 --- a/src/platform/switch/paths.zig +++ b/src/platform/switch/paths.zig @@ -1,24 +1,20 @@ const std = @import("std"); - -extern fn fsdevMountSdmc() u32; -extern fn fsdevUnmountDevice(name: [*:0]const u8) c_int; -extern fn romfsMountSelf(name: [*:0]const u8) u32; -extern fn romfsUnmount(name: [*:0]const u8) u32; +const c = @import("../nintendo_c.zig").c; pub fn mountData() bool { - return fsdevMountSdmc() == 0; + return c.fsdevMountSdmc() == 0; } pub fn unmountData() void { - _ = fsdevUnmountDevice("sdmc"); + _ = c.fsdevUnmountDevice("sdmc"); } pub fn mountResources() bool { - return romfsMountSelf("romfs") == 0; + return c.romfsMountSelf("romfs") == 0; } pub fn unmountResources() void { - _ = romfsUnmount("romfs"); + _ = c.romfsUnmount("romfs"); } pub fn dataRoot(buffer: []u8, app_name: []const u8) error{NameTooLong}![]const u8 { diff --git a/src/platform/switch/surface.zig b/src/platform/switch/surface.zig index 8d49220..80f8f10 100644 --- a/src/platform/switch/surface.zig +++ b/src/platform/switch/surface.zig @@ -7,8 +7,7 @@ const std = @import("std"); const Self = @This(); - -extern fn appletMainLoop() bool; +const c = @import("../nintendo_c.zig").c; alloc: std.mem.Allocator, @@ -17,7 +16,7 @@ pub fn init(_: *Self, _: u32, _: u32, _: [:0]const u8, _: bool, _: bool, _: bool pub fn deinit(_: *Self) void {} pub fn update(_: *Self) bool { - return appletMainLoop(); + return c.appletMainLoop(); } pub fn draw(_: *Self) void {} diff --git a/src/platform/switch/switch_audio.zig b/src/platform/switch/switch_audio.zig index 35ab166..358dd11 100644 --- a/src/platform/switch/switch_audio.zig +++ b/src/platform/switch/switch_audio.zig @@ -7,6 +7,7 @@ const std = @import("std"); const Stream = @import("../../audio/stream.zig").Stream; const PcmFormat = @import("../../audio/stream.zig").PcmFormat; +const c = @import("../nintendo_c.zig").c; const DEVICE_SAMPLE_RATE: u32 = 48_000; const DEVICE_CHANNELS: usize = 2; @@ -18,25 +19,6 @@ const OUTPUT_BUFFER_BYTES: usize = std.mem.alignForward(usize, OUTPUT_BYTES, 0x1 const TOTAL_OUTPUT_BYTES: usize = BUFFER_COUNT * OUTPUT_BUFFER_BYTES; const FP_ONE: u64 = 1 << 32; -const Result = u32; - -const AudioOutBuffer = extern struct { - next: ?*AudioOutBuffer, - buffer: ?*anyopaque, - buffer_size: u64, - data_size: u64, - data_offset: u64, -}; - -extern fn audoutInitialize() Result; -extern fn audoutExit() void; -extern fn audoutStartAudioOut() Result; -extern fn audoutStopAudioOut() Result; -extern fn audoutAppendAudioOutBuffer(buffer: *AudioOutBuffer) Result; -extern fn audoutGetReleasedAudioOutBuffer(buffer: *?*AudioOutBuffer, released_count: *u32) Result; -extern fn memalign(alignment: usize, size: usize) ?*anyopaque; -extern fn free(ptr: ?*anyopaque) void; - const SlotState = enum(u8) { inactive = 0, pending = 1, @@ -60,7 +42,7 @@ var slots: [NUM_SLOTS]Slot = init_slots(); var audio_alloc: std.mem.Allocator = undefined; var audio_io: std.Io = undefined; var output_data: ?[*]u8 = null; -var buffers: [BUFFER_COUNT]AudioOutBuffer = undefined; +var buffers: [BUFFER_COUNT]c.AudioOutBuffer = undefined; var initialized: bool = false; fn init_slots() [NUM_SLOTS]Slot { @@ -80,16 +62,16 @@ pub fn init() anyerror!void { _ = audio_alloc; _ = audio_io; - output_data = @ptrCast(memalign(0x1000, TOTAL_OUTPUT_BYTES) orelse return error.AudioInitFailed); + output_data = @ptrCast(c.memalign(0x1000, TOTAL_OUTPUT_BYTES) orelse return error.AudioInitFailed); @memset(output_data.?[0..TOTAL_OUTPUT_BYTES], 0); - if (audoutInitialize() != 0) { + if (c.audoutInitialize() != 0) { free_output(); return error.AudioInitFailed; } - if (audoutStartAudioOut() != 0) { - audoutExit(); + if (c.audoutStartAudioOut() != 0) { + c.audoutExit(); free_output(); return error.AudioInitFailed; } @@ -104,9 +86,9 @@ pub fn init() anyerror!void { .data_size = OUTPUT_BYTES, .data_offset = 0, }; - if (audoutAppendAudioOutBuffer(buf) != 0) { - _ = audoutStopAudioOut(); - audoutExit(); + if (c.audoutAppendAudioOutBuffer(buf) != 0) { + _ = c.audoutStopAudioOut(); + c.audoutExit(); initialized = false; free_output(); return error.AudioInitFailed; @@ -116,8 +98,8 @@ pub fn init() anyerror!void { pub fn deinit() void { if (initialized) { - _ = audoutStopAudioOut(); - audoutExit(); + _ = c.audoutStopAudioOut(); + c.audoutExit(); initialized = false; } @@ -132,14 +114,14 @@ pub fn update() void { if (!initialized) return; while (true) { - var released: ?*AudioOutBuffer = null; + var released: ?*c.AudioOutBuffer = null; var released_count: u32 = 0; - if (audoutGetReleasedAudioOutBuffer(&released, &released_count) != 0) return; + if (c.audoutGetReleasedAudioOutBuffer(&released, &released_count) != 0) return; if (released_count == 0 or released == null) return; const buf = released.?; fill_output_buffer(buf); - _ = audoutAppendAudioOutBuffer(buf); + _ = c.audoutAppendAudioOutBuffer(buf); } } @@ -177,7 +159,7 @@ pub fn is_slot_active(slot: u8) bool { return slots[slot].state != .inactive and slots[slot].state != .finished; } -fn fill_output_buffer(buf: *AudioOutBuffer) void { +fn fill_output_buffer(buf: *c.AudioOutBuffer) void { const out: [*]i16 = @ptrCast(@alignCast(buf.buffer.?)); for (0..SAMPLES_PER_BUF) |frame| { @@ -250,7 +232,7 @@ fn clamp_i16(v: i32) i16 { fn free_output() void { if (output_data) |data| { - free(data); + c.free(data); output_data = null; } } diff --git a/src/platform/switch/time.zig b/src/platform/switch/time.zig index 35ea360..9ab8691 100644 --- a/src/platform/switch/time.zig +++ b/src/platform/switch/time.zig @@ -1,11 +1,9 @@ const std = @import("std"); - -extern fn svcGetSystemTick() u64; -extern fn svcSleepThread(ns: i64) void; +const c = @import("../nintendo_c.zig").c; pub fn now(clock: std.Io.Clock) std.Io.Timestamp { return switch (clock) { - .real, .awake, .boot => .fromNanoseconds(@intCast((@as(u128, svcGetSystemTick()) * 625) / 12)), + .real, .awake, .boot => .fromNanoseconds(@intCast((@as(u128, c.svcGetSystemTick()) * 625) / 12)), else => std.debug.panic("switch std.Io clock {s} is not implemented", .{@tagName(clock)}), }; } @@ -20,7 +18,7 @@ pub fn clockResolution(clock: std.Io.Clock) std.Io.Clock.ResolutionError!std.Io. pub fn sleep(timeout: std.Io.Timeout) std.Io.Cancelable!void { const ns = timeoutNanoseconds(timeout); if (ns <= 0) return; - svcSleepThread(clampNs(ns)); + c.svcSleepThread(clampNs(ns)); } fn timeoutNanoseconds(timeout: std.Io.Timeout) i96 { From 00864b9423fc84cff6ccbdcf697b46b0e6969674 Mon Sep 17 00:00:00 2001 From: Nathan Bourgeois Date: Sat, 6 Jun 2026 14:45:27 -0400 Subject: [PATCH 34/44] Fix 3DS Hang --- build.zig | 6 +++++ src/platform/3ds/3ds_audio.zig | 7 ++++++ src/platform/3ds/3ds_gfx.zig | 42 ++++++++++++++++++++++++++++++++++ src/platform/3ds/input.zig | 5 ++++ src/platform/3ds/surface.zig | 20 ++++++++++++++-- src/platform/c_io.zig | 13 +++++++++++ src/platform/platform.zig | 1 + 7 files changed, 92 insertions(+), 2 deletions(-) diff --git a/build.zig b/build.zig index 1be496e..86383b3 100644 --- a/build.zig +++ b/build.zig @@ -194,6 +194,11 @@ fn addNintendoCImportPaths(owner: *std.Build, mod: *std.Build.Module, config: Co .nintendo_3ds => { // Keep newlib before libctru so libctru's include_next sys wrappers // resolve during Zig's C translation of Citro3D/libctru headers. + // + // Zig's 3DS C import can otherwise see newlib's fortified unistd + // wrappers and emit references to __ssp_real_* symbols. devkitARM is + // built without libssp, so keep fortify off for translated SDK calls. + mod.addCMacro("_FORTIFY_SOURCE", "0"); mod.addIncludePath(.{ .cwd_relative = b.pathJoin(&.{ dkp, "devkitARM/arm-none-eabi/include" }) }); mod.addIncludePath(.{ .cwd_relative = b.pathJoin(&.{ dkp, "libctru/include" }) }); }, @@ -1158,6 +1163,7 @@ fn threedsxPipeline(b: *std.Build, exe: *std.Build.Step.Compile, opts: ExportOpt link.addArgs(&.{ "-mword-relocations", "-ffunction-sections", + "-D_FORTIFY_SOURCE=0", "-D__3DS__", "-DARM11", if (exe.root_module.optimize != .Debug or exe.root_module.optimize == .ReleaseSmall) "-O2" else if (exe.root_module.optimize == .ReleaseSmall) "-Os" else "-O0", diff --git a/src/platform/3ds/3ds_audio.zig b/src/platform/3ds/3ds_audio.zig index 750a338..15d736e 100644 --- a/src/platform/3ds/3ds_audio.zig +++ b/src/platform/3ds/3ds_audio.zig @@ -5,6 +5,7 @@ //! `update`; NDSP handles sample-rate conversion and channel mixing. const std = @import("std"); +const surface = @import("surface.zig"); const Stream = @import("../../audio/stream.zig").Stream; const PcmFormat = @import("../../audio/stream.zig").PcmFormat; @@ -111,6 +112,12 @@ pub fn init() anyerror!void { } pub fn deinit() void { + if (surface.is_system_closing()) { + slots = init_slots(); + audio_data = null; + return; + } + for (0..NUM_SLOTS) |i| { ndspChnWaveBufClear(@intCast(i)); ndspChnReset(@intCast(i)); diff --git a/src/platform/3ds/3ds_gfx.zig b/src/platform/3ds/3ds_gfx.zig index e1707e3..e97e35f 100644 --- a/src/platform/3ds/3ds_gfx.zig +++ b/src/platform/3ds/3ds_gfx.zig @@ -7,6 +7,7 @@ const Rendering = @import("../../rendering/rendering.zig"); const vertex = Rendering.vertex; const Mesh = Rendering.mesh; const Texture = Rendering.Texture; +const surface = @import("surface.zig"); const shaders = @import("aether_shaders"); const c = @cImport({ @@ -224,6 +225,11 @@ pub fn init() anyerror!void { } pub fn deinit() void { + if (surface.is_system_closing()) { + abandon_service_resources(); + return; + } + frame_started = false; if (initialized) c.C3D_FrameSync(); release_completed_mesh_slots(); @@ -334,6 +340,8 @@ pub fn set_view_matrix(mat: *const Mat4) void { } pub fn start_frame() bool { + if (surface.is_system_closing()) return false; + const t = target orelse return false; const flags: u8 = @intCast(if (vsync_enabled) c.C3D_FRAME_SYNCDRAW else c.C3D_FRAME_NONBLOCK); @@ -358,6 +366,10 @@ pub fn start_frame() bool { pub fn end_frame() void { if (!frame_started) return; + if (surface.is_system_closing()) { + frame_started = false; + return; + } mark_current_frame_mesh_slots_in_flight(); c.C3D_FrameEnd(0); frame_started = false; @@ -441,6 +453,11 @@ pub fn create_mesh() anyerror!Mesh.Handle { } pub fn destroy_mesh(handle: Mesh.Handle) void { + if (surface.is_system_closing()) { + _ = meshes.remove_element(handle); + return; + } + if (mesh_slot(handle)) |mesh| { free_mesh_slots(mesh); } @@ -448,6 +465,8 @@ pub fn destroy_mesh(handle: Mesh.Handle) void { } pub fn update_mesh(handle: Mesh.Handle, data: []const u8) void { + if (surface.is_system_closing()) return; + const mesh = mesh_slot(handle) orelse return; if (data.len > std.math.maxInt(u32)) { std.debug.panic("3ds_gfx: mesh vertex data is too large to flush", .{}); @@ -475,6 +494,8 @@ pub fn update_mesh(handle: Mesh.Handle, data: []const u8) void { } pub fn draw_mesh(handle: Mesh.Handle, model: *const Mat4, count: usize) void { + if (surface.is_system_closing()) return; + if (!render_pipeline_initialized) return; const mesh = mesh_slot(handle) orelse return; const pl = &render_pipeline; @@ -526,6 +547,8 @@ pub fn create_texture(width: u32, height: u32, data: []align(16) u8) anyerror!Te } pub fn update_texture(handle: Texture.Handle, data: []align(16) u8) void { + if (surface.is_system_closing()) return; + const tex = texture_slot(handle) orelse return; const size = texture_size(tex.width, tex.height); if (data.len < size) return; @@ -537,11 +560,19 @@ pub fn update_texture(handle: Texture.Handle, data: []align(16) u8) void { } pub fn bind_texture(handle: Texture.Handle) void { + if (surface.is_system_closing()) return; + bound_texture = handle; rebind_texture(); } pub fn destroy_texture(handle: Texture.Handle) void { + if (surface.is_system_closing()) { + if (bound_texture == handle) bound_texture = 0; + _ = textures.remove_element(handle); + return; + } + if (texture_slot(handle)) |tex| { c.C3D_TexDelete(tex_ptr(tex)); free_texture_staging(tex); @@ -633,6 +664,17 @@ fn release_completed_mesh_slots() void { } } +fn abandon_service_resources() void { + frame_started = false; + initialized = false; + render_pipeline_initialized = false; + target = null; + bound_texture = 0; + meshes.clear(); + textures.clear(); + deferred_mesh_frees.clear(); +} + fn mark_current_frame_mesh_slots_in_flight() void { for (1..meshes.buffer.len) |i| { if (meshes.buffer[i]) |*mesh| { diff --git a/src/platform/3ds/input.zig b/src/platform/3ds/input.zig index 6e7d8d6..a93abe7 100644 --- a/src/platform/3ds/input.zig +++ b/src/platform/3ds/input.zig @@ -4,6 +4,7 @@ const std = @import("std"); const core = @import("../../core/input/input.zig"); +const surface = @import("surface.zig"); const Result = c_int; @@ -84,6 +85,10 @@ pub fn init() anyerror!void { pub fn deinit() void { if (!initialized) return; + if (surface.is_system_closing()) { + initialized = false; + return; + } hidExit(); initialized = false; } diff --git a/src/platform/3ds/surface.zig b/src/platform/3ds/surface.zig index f7bb7df..c166a84 100644 --- a/src/platform/3ds/surface.zig +++ b/src/platform/3ds/surface.zig @@ -8,19 +8,35 @@ const std = @import("std"); const Self = @This(); extern fn aptMainLoop() bool; +extern fn aptShouldClose() bool; + +var system_closing = false; alloc: std.mem.Allocator, -pub fn init(_: *Self, _: u32, _: u32, _: [:0]const u8, _: bool, _: bool, _: bool) anyerror!void {} +pub fn init(_: *Self, _: u32, _: u32, _: [:0]const u8, _: bool, _: bool, _: bool) anyerror!void { + system_closing = false; +} pub fn deinit(_: *Self) void {} pub fn update(_: *Self) bool { - return aptMainLoop(); + if (system_closing) return false; + + const keep_running = aptMainLoop(); + if (!keep_running or aptShouldClose()) { + system_closing = true; + return false; + } + return true; } pub fn draw(_: *Self) void {} +pub fn is_system_closing() bool { + return system_closing; +} + pub fn get_width(_: *Self) u32 { return 400; } diff --git a/src/platform/c_io.zig b/src/platform/c_io.zig index 64a8ffd..3adc5e7 100644 --- a/src/platform/c_io.zig +++ b/src/platform/c_io.zig @@ -15,6 +15,14 @@ const platform_paths = switch (options.config.platform) { .nintendo_switch => @import("switch/paths.zig"), else => unreachable, }; +const platform_lifecycle = switch (options.config.platform) { + .nintendo_3ds => @import("3ds/surface.zig"), + else => struct { + pub fn is_system_closing() bool { + return false; + } + }, +}; const c = @import("nintendo_c.zig").c; const max_path_bytes = 1024; @@ -129,6 +137,11 @@ pub fn dataRoot(buffer: []u8, app_name: []const u8) error{NameTooLong}![]const u pub fn deinitAppDirs() void { for (&dir_slots) |*slot| slot.used = false; + if (platform_lifecycle.is_system_closing()) { + resources_mounted = false; + data_mounted = false; + return; + } if (resources_mounted) { platform_paths.unmountResources(); resources_mounted = false; diff --git a/src/platform/platform.zig b/src/platform/platform.zig index f10d1aa..4acd083 100644 --- a/src/platform/platform.zig +++ b/src/platform/platform.zig @@ -30,6 +30,7 @@ pub fn update(engine: *Engine) void { if (!gfx.surface.update()) { // Window should close engine.running = false; + return; } audio.update(); } From 5b3b5cbfbd9e29090e6c4561baa0e743e7bc4a44 Mon Sep 17 00:00:00 2001 From: Nathan Bourgeois Date: Sat, 6 Jun 2026 15:10:21 -0400 Subject: [PATCH 35/44] Simplify 3DS backend --- src/platform/3ds/3ds_gfx.zig | 253 ++++------------------------------- src/rendering/Vertex.zig | 21 ++- 2 files changed, 38 insertions(+), 236 deletions(-) diff --git a/src/platform/3ds/3ds_gfx.zig b/src/platform/3ds/3ds_gfx.zig index e97e35f..40fc172 100644 --- a/src/platform/3ds/3ds_gfx.zig +++ b/src/platform/3ds/3ds_gfx.zig @@ -4,7 +4,6 @@ const std = @import("std"); const Util = @import("../../util/util.zig"); const Mat4 = @import("../../math/math.zig").Mat4; const Rendering = @import("../../rendering/rendering.zig"); -const vertex = Rendering.vertex; const Mesh = Rendering.mesh; const Texture = Rendering.Texture; const surface = @import("surface.zig"); @@ -49,7 +48,6 @@ const SCREEN_WIDTH: u32 = 400; const SCREEN_HEIGHT: u32 = 240; const TARGET_WIDTH: c_int = 240; const TARGET_HEIGHT: c_int = 400; -const MAX_VERTEX_ATTRS: usize = 12; const MESH_SLOT_COUNT: usize = 2; const MAX_DEFERRED_MESH_FREES: usize = 4096; const C3D_CMD_BUFFER_SIZE: usize = 1024 * 1024; @@ -70,26 +68,28 @@ const DISPLAY_TRANSFER_FLAGS: u32 = @intCast( c.GX_TRANSFER_SCALING(c.GX_TRANSFER_SCALE_NO), ); -const ShaderType = c.GPU_SHADER_TYPE; -const VERTEX_SHADER: ShaderType = @intCast(c.GPU_VERTEX_SHADER); const VERTEX_SHADER_INDEX: usize = 0; +const VERTEX_STRIDE: usize = @sizeOf(Rendering.Vertex); +const VERTEX_ATTR_COUNT: c_int = 3; +const VERTEX_BUFFER_PERMUTATION: u64 = 0x210; // buffer order pos,uv,color -> shader v0,v1,v2 +const VERTEX_POSITION_REG: c_int = 0; +const VERTEX_UV_REG: c_int = 1; +const VERTEX_COLOR_REG: c_int = 2; +const POS_SCALE: [4]f32 = .{ snorm16_scale(), snorm16_scale(), snorm16_scale(), 1.0 }; +const UV_SCALE: [2]f32 = .{ snorm16_scale(), snorm16_scale() }; +const COLOR_SCALE: [4]f32 = .{ unorm8_scale(), unorm8_scale(), unorm8_scale(), unorm8_scale() }; -const BufferBinding = struct { - offset: usize, - vertex_span: usize, - attrib_count: c_int, - permutation: u64, -}; +comptime { + std.debug.assert(VERTEX_STRIDE == 16); + std.debug.assert(@offsetOf(Rendering.Vertex, "pos") == 0); + std.debug.assert(@offsetOf(Rendering.Vertex, "uv") == 8); + std.debug.assert(@offsetOf(Rendering.Vertex, "color") == 12); +} const PipelineData = struct { dvlb: [*c]c.DVLB_s, program: c.shaderProgram_s, attr_info: c.C3D_AttrInfo, - stride: usize, - buffer: BufferBinding, - pos_scale: [4]f32, - uv_scale: [2]f32, - color_scale: [4]f32, u_projection: c_int, u_model_view: c_int, u_pos_scale: c_int, @@ -215,7 +215,7 @@ pub fn init() anyerror!void { c.C3D_RenderTargetSetOutput(target, c.GFX_TOP, c.GFX_LEFT, DISPLAY_TRANSFER_FLAGS); init_projection_transform(); - render_pipeline = try init_pipeline(vertex.Layout); + render_pipeline = try init_pipeline(); render_pipeline_initialized = true; initialized = true; @@ -385,9 +385,8 @@ pub fn set_vsync(v: bool) void { vsync_enabled = v; } -fn init_pipeline(layout: vertex.VertexLayout) !PipelineData { +fn init_pipeline() !PipelineData { const code: [:0]align(4) const u8 = &shaders.basic_vert; - if (layout.stride == 0 or layout.attributes.len > MAX_VERTEX_ATTRS) return error.UnsupportedVertexLayout; const dvlb = c.DVLB_ParseFile(@ptrCast(@constCast(code.ptr)), @intCast(code.len)); if (dvlb == null or dvlb[0].numDVLE == 0) return error.InvalidShader; @@ -399,38 +398,16 @@ fn init_pipeline(layout: vertex.VertexLayout) !PipelineData { if (c.shaderProgramSetVsh(&program, &dvlb[0].DVLE[0]) != 0) return error.InvalidShader; - for (layout.attributes) |attr| { - if (attr.binding != 0) return error.UnsupportedVertexLayout; - } - - const position_attr = find_attr(layout, .position) orelse return error.UnsupportedVertexLayout; - const uv_attr = find_attr(layout, .uv) orelse return error.UnsupportedVertexLayout; - const color_attr = find_attr(layout, .color) orelse return error.UnsupportedVertexLayout; - const pos_scale = position_scale(position_attr) orelse return error.UnsupportedVertexLayout; - const uv_attr_scale = uv_scale(uv_attr) orelse return error.UnsupportedVertexLayout; - const color_attr_scale = color_scale(color_attr) orelse return error.UnsupportedVertexLayout; - const buffer_layout = buffer_layout_from_attrs(layout.stride, position_attr, uv_attr, color_attr) orelse return error.UnsupportedVertexLayout; - var attr_info: c.C3D_AttrInfo = undefined; c.AttrInfo_Init(&attr_info); - if (add_attr_loader(&attr_info, 0, position_attr, buffer_layout.position_loader_size) < 0) return error.UnsupportedVertexLayout; - if (add_attr_loader(&attr_info, 1, uv_attr, buffer_layout.uv_loader_size) < 0) return error.UnsupportedVertexLayout; - if (add_attr_loader(&attr_info, 2, color_attr, buffer_layout.color_loader_size) < 0) return error.UnsupportedVertexLayout; + if (c.AttrInfo_AddLoader(&attr_info, VERTEX_POSITION_REG, c.GPU_SHORT, 4) < 0) return error.UnsupportedVertexLayout; + if (c.AttrInfo_AddLoader(&attr_info, VERTEX_UV_REG, c.GPU_SHORT, 2) < 0) return error.UnsupportedVertexLayout; + if (c.AttrInfo_AddLoader(&attr_info, VERTEX_COLOR_REG, c.GPU_UNSIGNED_BYTE, 4) < 0) return error.UnsupportedVertexLayout; return .{ .dvlb = dvlb, .program = program, .attr_info = attr_info, - .stride = layout.stride, - .buffer = .{ - .offset = buffer_layout.base_offset, - .vertex_span = buffer_layout.vertex_span, - .attrib_count = buffer_layout.attribute_count, - .permutation = buffer_layout.permutation, - }, - .pos_scale = pos_scale, - .uv_scale = uv_attr_scale, - .color_scale = color_attr_scale, .u_projection = c.shaderInstanceGetUniformLocation(program.vertexShader, "projection"), .u_model_view = c.shaderInstanceGetUniformLocation(program.vertexShader, "modelView"), .u_pos_scale = c.shaderInstanceGetUniformLocation(program.vertexShader, "posScale"), @@ -504,7 +481,7 @@ pub fn draw_mesh(handle: Mesh.Handle, model: *const Mat4, count: usize) void { const data = slot.data orelse return; if (count == 0 or slot.len == 0) return; - const needed = mesh_draw_bytes_needed(pl, count) orelse return; + const needed = mesh_draw_bytes_needed(count) orelse return; if (needed > slot.len) return; bind_vertex_state(pl); @@ -513,8 +490,7 @@ pub fn draw_mesh(handle: Mesh.Handle, model: *const Mat4, count: usize) void { const buf = c.C3D_GetBufInfo() orelse return; c.BufInfo_Init(buf); - const ptr = @as([*]const u8, data.ptr) + pl.buffer.offset; - const added = c.BufInfo_Add(buf, ptr, @intCast(pl.stride), pl.buffer.attrib_count, pl.buffer.permutation); + const added = c.BufInfo_Add(buf, data.ptr, @intCast(VERTEX_STRIDE), VERTEX_ATTR_COUNT, VERTEX_BUFFER_PERMUTATION); if (added < 0) { c.BufInfo_Init(buf); return; @@ -688,16 +664,11 @@ fn mark_current_frame_mesh_slots_in_flight() void { } } -fn mesh_draw_bytes_needed(pl: *const PipelineData, count: usize) ?usize { +fn mesh_draw_bytes_needed(count: usize) ?usize { if (count == 0) return 0; - - const tail_count = count - 1; const max = std.math.maxInt(usize); - if (pl.stride != 0 and tail_count > (max - pl.buffer.vertex_span) / pl.stride) return null; - - const rel_end = tail_count * pl.stride + pl.buffer.vertex_span; - if (pl.buffer.offset > max - rel_end) return null; - return pl.buffer.offset + rel_end; + if (count > max / VERTEX_STRIDE) return null; + return count * VERTEX_STRIDE; } fn apply_render_state() void { @@ -778,14 +749,14 @@ fn upload_draw_uniforms(pl: *PipelineData, model: *const Mat4) void { upload_matrix(pl.u_projection, &projection); upload_matrix(pl.u_model_view, &model_view_c3d); - upload_vec4(pl.u_pos_scale, pl.pos_scale); + upload_vec4(pl.u_pos_scale, POS_SCALE); upload_vec4(pl.u_uv_scale_offset, .{ - pl.uv_scale[0], - pl.uv_scale[1], + UV_SCALE[0], + UV_SCALE[1], uv_offset[0], uv_offset[1], }); - upload_vec4(pl.u_color_scale, pl.color_scale); + upload_vec4(pl.u_color_scale, COLOR_SCALE); } fn upload_matrix(location: c_int, matrix: *const c.C3D_Mtx) void { @@ -1006,172 +977,6 @@ fn texture_slot(handle: Texture.Handle) ?*TextureData { return null; } -fn find_attr(layout: vertex.VertexLayout, usage: vertex.AttributeUsage) ?vertex.Attribute { - for (layout.attributes) |attr| { - if (attr.usage == usage) return attr; - } - return null; -} - -const BufferLayout = struct { - base_offset: usize, - vertex_span: usize, - attribute_count: c_int, - permutation: u64, - position_loader_size: u8, - uv_loader_size: u8, - color_loader_size: u8, -}; - -fn buffer_layout_from_attrs(stride: usize, position_attr: vertex.Attribute, uv_attr: vertex.Attribute, color_attr: vertex.Attribute) ?BufferLayout { - var attrs = [_]vertex.Attribute{ position_attr, uv_attr, color_attr }; - sort_attrs_by_offset(&attrs); - - const base_offset = attrs[0].offset; - var current_rel: usize = 0; - var attribute_count: usize = 0; - var permutation: u64 = 0; - var position_loader_size: u8 = 0; - var uv_loader_size: u8 = 0; - var color_loader_size: u8 = 0; - - for (attrs, 0..) |attr, i| { - if (!attr_fits(stride, attr)) return null; - if (attr.offset < base_offset) return null; - const rel_offset = attr.offset - base_offset; - if (rel_offset != current_rel) return null; - - const next_offset = if (i + 1 < attrs.len) attrs[i + 1].offset else stride; - if (next_offset < attr.offset) return null; - const available_bytes = next_offset - attr.offset; - const loader_size = attribute_loader_size(attr, available_bytes) orelse return null; - const loaded_bytes = attribute_size_bytes_with_count(attr.format, loader_size) orelse return null; - if (loaded_bytes > available_bytes) return null; - if (i + 1 < attrs.len and loaded_bytes != available_bytes) return null; - - permutation |= attribute_loader_id(attr.usage) << @as(u6, @intCast(attribute_count * 4)); - attribute_count += 1; - switch (attr.usage) { - .position => position_loader_size = loader_size, - .uv => uv_loader_size = loader_size, - .color => color_loader_size = loader_size, - .normal => return null, - } - current_rel = rel_offset + loaded_bytes; - } - - if (stride < current_rel) return null; - if (position_loader_size == 0 or uv_loader_size == 0 or color_loader_size == 0) return null; - - return .{ - .base_offset = base_offset, - .vertex_span = current_rel, - .attribute_count = @intCast(attribute_count), - .permutation = permutation, - .position_loader_size = position_loader_size, - .uv_loader_size = uv_loader_size, - .color_loader_size = color_loader_size, - }; -} - -fn sort_attrs_by_offset(attrs: *[3]vertex.Attribute) void { - var i: usize = 1; - while (i < attrs.len) : (i += 1) { - var j = i; - while (j > 0 and attrs[j - 1].offset > attrs[j].offset) : (j -= 1) { - const tmp = attrs[j - 1]; - attrs[j - 1] = attrs[j]; - attrs[j] = tmp; - } - } -} - -fn attribute_loader_id(usage: vertex.AttributeUsage) u64 { - return switch (usage) { - .position => 0, - .uv => 1, - .color => 2, - .normal => unreachable, - }; -} - -fn attr_fits(stride: usize, attr: vertex.Attribute) bool { - const size = attribute_size_bytes(attr.format); - return attr.offset <= stride and size <= stride - attr.offset; -} - -fn attribute_loader_size(attr: vertex.Attribute, available_bytes: usize) ?u8 { - if (attr.usage == .position and attr.size == 3 and attribute_component_size_bytes(attr.format) == 2 and available_bytes >= 8) { - return 4; - } - if (attr.size == 0 or attr.size > 4) return null; - return @intCast(attr.size); -} - -fn attribute_size_bytes(format: vertex.AttributeFormat) usize { - return switch (format) { - .f32x2 => 8, - .f32x3 => 12, - .unorm8x2 => 2, - .unorm8x4 => 4, - .unorm16x2, .snorm16x2 => 4, - .unorm16x3, .snorm16x3 => 6, - }; -} - -fn attribute_size_bytes_with_count(format: vertex.AttributeFormat, count: u8) ?usize { - if (count == 0 or count > 4) return null; - return attribute_component_size_bytes(format) * @as(usize, count); -} - -fn attribute_component_size_bytes(format: vertex.AttributeFormat) usize { - return switch (format) { - .f32x2, .f32x3 => 4, - .unorm8x2, .unorm8x4 => 1, - .unorm16x2, .unorm16x3, .snorm16x2, .snorm16x3 => 2, - }; -} - -fn gpu_format(format: vertex.AttributeFormat) c.GPU_FORMATS { - return switch (format) { - .f32x2, .f32x3 => c.GPU_FLOAT, - .unorm8x2, .unorm8x4 => c.GPU_UNSIGNED_BYTE, - .unorm16x2, .unorm16x3, .snorm16x2, .snorm16x3 => c.GPU_SHORT, - }; -} - -fn add_attr_loader(info: *c.C3D_AttrInfo, reg_id: c_int, attr: vertex.Attribute, loader_size: u8) c_int { - return c.AttrInfo_AddLoader(info, reg_id, gpu_format(attr.format), loader_size); -} - -fn position_scale(attr: vertex.Attribute) ?[4]f32 { - if (attr.size != 3) return null; - return switch (attr.format) { - .f32x3 => .{ 1.0, 1.0, 1.0, 1.0 }, - .snorm16x3 => .{ snorm16_scale(), snorm16_scale(), snorm16_scale(), 1.0 }, - else => null, - }; -} - -fn uv_scale(attr: vertex.Attribute) ?[2]f32 { - if (attr.size != 2) return null; - return switch (attr.format) { - .f32x2 => .{ 1.0, 1.0 }, - .unorm8x2 => .{ unorm8_scale(), unorm8_scale() }, - .snorm16x2 => .{ snorm16_scale(), snorm16_scale() }, - else => null, - }; -} - -fn color_scale(attr: vertex.Attribute) ?[4]f32 { - if (attr.size != 4) return null; - return switch (attr.format) { - .unorm8x4 => .{ unorm8_scale(), unorm8_scale(), unorm8_scale(), unorm8_scale() }, - .f32x3 => .{ 1.0, 1.0, 1.0, 1.0 }, - else => null, - }; -} - fn unorm8_scale() f32 { return 1.0 / 255.0; } diff --git a/src/rendering/Vertex.zig b/src/rendering/Vertex.zig index 3555547..17f57d8 100644 --- a/src/rendering/Vertex.zig +++ b/src/rendering/Vertex.zig @@ -1,20 +1,17 @@ -const options = @import("options"); - -pub const Vertex = if (options.config.platform == .nintendo_3ds) Vertex3DS else VertexBaseline; - -const Vertex3DS = extern struct { +pub const Vertex = extern struct { pos: [3]i16, - uv: [2]i16, - color: u32 align(2), -}; - -const VertexBaseline = extern struct { + _pad: i16 = 0, uv: [2]i16, color: u32, - pos: [3]i16, - _pad: i16 = 0, }; +comptime { + if (@sizeOf(Vertex) != 16) @compileError("Rendering.Vertex must stay 16 bytes"); + if (@offsetOf(Vertex, "pos") != 0) @compileError("Rendering.Vertex.pos must stay at byte offset 0"); + if (@offsetOf(Vertex, "uv") != 8) @compileError("Rendering.Vertex.uv must stay at byte offset 8"); + if (@offsetOf(Vertex, "color") != 12) @compileError("Rendering.Vertex.color must stay at byte offset 12"); +} + pub const AttributeUsage = enum { position, uv, From 7ab318a6a1ec970ead19d0c72943f7465a4f0b0b Mon Sep 17 00:00:00 2001 From: Nathan Bourgeois Date: Sat, 6 Jun 2026 16:00:47 -0400 Subject: [PATCH 36/44] Lessen corruption --- src/platform/3ds/3ds_gfx.zig | 383 +++++++++++++++++--------- src/platform/3ds/shaders/basic.v.pica | 4 +- src/rendering/Vertex.zig | 6 +- 3 files changed, 257 insertions(+), 136 deletions(-) diff --git a/src/platform/3ds/3ds_gfx.zig b/src/platform/3ds/3ds_gfx.zig index 40fc172..e490d81 100644 --- a/src/platform/3ds/3ds_gfx.zig +++ b/src/platform/3ds/3ds_gfx.zig @@ -15,6 +15,7 @@ const c = @cImport({ @cInclude("3ds/gpu/enums.h"); @cInclude("3ds/gpu/gpu.h"); @cInclude("3ds/gpu/gx.h"); + @cInclude("3ds/os.h"); @cInclude("3ds/services/gspgpu.h"); @cInclude("3ds/gfx.h"); @cInclude("3ds/allocator/vram.h"); @@ -54,10 +55,14 @@ const C3D_CMD_BUFFER_SIZE: usize = 1024 * 1024; const MAX_TEXTURE_SIZE: u32 = 1024; const MIN_TEXTURE_SIZE: u32 = 8; const TEX_BPP: usize = 4; +const CACHE_LINE_SIZE: usize = 32; const OS_FCRAM_VADDR: usize = 0x30000000; const OS_FCRAM_SIZE: usize = 0x10000000; const OS_OLD_FCRAM_VADDR: usize = 0x14000000; const OS_OLD_FCRAM_SIZE: usize = 0x08000000; +const BUFFER_BASE_PADDR: u32 = 0x18000000; +const SH_MODE_VSH: u32 = 0xA0000000; +const FLOAT_UNIFORM_UPLOAD_F32: u32 = 0x80000000; const DISPLAY_TRANSFER_FLAGS: u32 = @intCast( c.GX_TRANSFER_FLIP_VERT(0) | @@ -71,19 +76,21 @@ const DISPLAY_TRANSFER_FLAGS: u32 = @intCast( const VERTEX_SHADER_INDEX: usize = 0; const VERTEX_STRIDE: usize = @sizeOf(Rendering.Vertex); const VERTEX_ATTR_COUNT: c_int = 3; -const VERTEX_BUFFER_PERMUTATION: u64 = 0x210; // buffer order pos,uv,color -> shader v0,v1,v2 +const VERTEX_BUFFER_PERMUTATION: u64 = 0x210; // buffer order pos,color,uv -> shader v0,v1,v2 const VERTEX_POSITION_REG: c_int = 0; -const VERTEX_UV_REG: c_int = 1; -const VERTEX_COLOR_REG: c_int = 2; +const VERTEX_COLOR_REG: c_int = 1; +const VERTEX_UV_REG: c_int = 2; const POS_SCALE: [4]f32 = .{ snorm16_scale(), snorm16_scale(), snorm16_scale(), 1.0 }; const UV_SCALE: [2]f32 = .{ snorm16_scale(), snorm16_scale() }; const COLOR_SCALE: [4]f32 = .{ unorm8_scale(), unorm8_scale(), unorm8_scale(), unorm8_scale() }; +extern fn C3Di_UpdateContext() void; + comptime { std.debug.assert(VERTEX_STRIDE == 16); std.debug.assert(@offsetOf(Rendering.Vertex, "pos") == 0); - std.debug.assert(@offsetOf(Rendering.Vertex, "uv") == 8); - std.debug.assert(@offsetOf(Rendering.Vertex, "color") == 12); + std.debug.assert(@offsetOf(Rendering.Vertex, "color") == 8); + std.debug.assert(@offsetOf(Rendering.Vertex, "uv") == 12); } const PipelineData = struct { @@ -131,44 +138,13 @@ const TextureData = struct { width: u32, height: u32, tex: TexMirror, - staging: ?[]align(16) u8 = null, -}; - -const TexEnvMirror = extern struct { - src_rgb: u16, - src_alpha: u16, - op_all: u32, - func_rgb: u16, - func_alpha: u16, - color: u32, - scale_rgb: u16, - scale_alpha: u16, -}; - -comptime { - std.debug.assert(@sizeOf(TexEnvMirror) == 20); -} - -const FrameBufMirror = extern struct { - color_buf: ?*anyopaque, - depth_buf: ?*anyopaque, - width: u16, - height: u16, - color_fmt: c.GPU_COLORBUF, - depth_fmt: c.GPU_DEPTHBUF, - block32: bool, - masks: u8, -}; - -const RenderTargetMirror = extern struct { - next: ?*c.C3D_RenderTarget, - prev: ?*c.C3D_RenderTarget, - frame_buf: FrameBufMirror, + staging: ?[]align(16) u32 = null, }; var meshes = Util.CircularBuffer(MeshData, 2048).init(); var deferred_mesh_frees = Util.CircularBuffer(DeferredMeshFree, MAX_DEFERRED_MESH_FREES + 1).init(); var textures = Util.CircularBuffer(TextureData, 64).init(); +var sequential_indices: ?[]align(CACHE_LINE_SIZE) u16 = null; var render_pipeline: PipelineData = undefined; var render_pipeline_initialized = false; @@ -221,7 +197,6 @@ pub fn init() anyerror!void { initialized = true; frame_started = false; apply_render_state(); - init_texenvs(); } pub fn deinit() void { @@ -254,6 +229,7 @@ pub fn deinit() void { } } meshes.clear(); + free_index_buffer(); if (target) |t| { c.C3D_RenderTargetDelete(t); @@ -349,17 +325,15 @@ pub fn start_frame() bool { release_completed_mesh_slots(); free_deferred_mesh_slots(); - c.C3D_FrameBufClear(target_frame_buf(t), c.C3D_CLEAR_ALL, clear_color, 0); - if (!c.C3D_FrameDrawOn(t)) { c.C3D_FrameEnd(0); return false; } frame_started = true; + clear_current_framebuffer(c.C3D_CLEAR_ALL); c.C3D_SetViewport(0, 0, TARGET_WIDTH, TARGET_HEIGHT); apply_render_state(); - init_texenvs(); rebind_texture(); return true; } @@ -371,14 +345,14 @@ pub fn end_frame() void { return; } mark_current_frame_mesh_slots_in_flight(); + finish_frame_direct(); c.C3D_FrameEnd(0); frame_started = false; } pub fn clear_depth() void { if (!frame_started) return; - const t = target orelse return; - c.C3D_FrameBufClear(target_frame_buf(t), c.C3D_CLEAR_DEPTH, clear_color, 0); + clear_current_framebuffer(c.C3D_CLEAR_DEPTH); } pub fn set_vsync(v: bool) void { @@ -401,8 +375,8 @@ fn init_pipeline() !PipelineData { var attr_info: c.C3D_AttrInfo = undefined; c.AttrInfo_Init(&attr_info); if (c.AttrInfo_AddLoader(&attr_info, VERTEX_POSITION_REG, c.GPU_SHORT, 4) < 0) return error.UnsupportedVertexLayout; - if (c.AttrInfo_AddLoader(&attr_info, VERTEX_UV_REG, c.GPU_SHORT, 2) < 0) return error.UnsupportedVertexLayout; if (c.AttrInfo_AddLoader(&attr_info, VERTEX_COLOR_REG, c.GPU_UNSIGNED_BYTE, 4) < 0) return error.UnsupportedVertexLayout; + if (c.AttrInfo_AddLoader(&attr_info, VERTEX_UV_REG, c.GPU_SHORT, 2) < 0) return error.UnsupportedVertexLayout; return .{ .dvlb = dvlb, @@ -464,7 +438,7 @@ pub fn update_mesh(handle: Mesh.Handle, data: []const u8) void { const dst = slot.data.?; @memcpy(dst[0..data.len], data); slot.len = data.len; - _ = c.GSPGPU_FlushDataCache(dst.ptr, @intCast(data.len)); + flush_data_cache(dst.ptr, data.len); mesh.latest_slot = slot_idx; mesh.len = data.len; @@ -484,20 +458,19 @@ pub fn draw_mesh(handle: Mesh.Handle, model: *const Mat4, count: usize) void { const needed = mesh_draw_bytes_needed(count) orelse return; if (needed > slot.len) return; - bind_vertex_state(pl); - upload_draw_uniforms(pl, model); + bind_program(pl); rebind_texture(); - const buf = c.C3D_GetBufInfo() orelse return; - c.BufInfo_Init(buf); - const added = c.BufInfo_Add(buf, data.ptr, @intCast(VERTEX_STRIDE), VERTEX_ATTR_COUNT, VERTEX_BUFFER_PERMUTATION); - if (added < 0) { - c.BufInfo_Init(buf); - return; - } + C3Di_UpdateContext(); + upload_draw_uniforms(pl, model); + bind_texenv_direct(); + bind_vertex_layout_direct(pl); + if (!bind_vertex_buffer_direct(data.ptr)) return; slot.used_this_frame = true; - c.C3D_DrawArrays(c.GPU_TRIANGLES, 0, @intCast(count)); + if (!draw_elements_direct(count)) { + draw_arrays_direct(0, @intCast(count)); + } } pub fn create_texture(width: u32, height: u32, data: []align(16) u8) anyerror!Texture.Handle { @@ -582,7 +555,7 @@ fn ensure_mesh_slot_capacity(slot: *MeshSlot, len: usize) !void { } const cap = mesh_slot_capacity(len); - const new_data = try render_alloc.alignedAlloc(u8, .fromByteUnits(16), cap); + const new_data = try render_alloc.alignedAlloc(u8, .fromByteUnits(CACHE_LINE_SIZE), cap); if (!is_linear_fcram(new_data.ptr, new_data.len)) { render_alloc.free(new_data); @@ -648,6 +621,7 @@ fn abandon_service_resources() void { bound_texture = 0; meshes.clear(); textures.clear(); + sequential_indices = null; deferred_mesh_frees.clear(); } @@ -671,6 +645,11 @@ fn mesh_draw_bytes_needed(count: usize) ?usize { return count * VERTEX_STRIDE; } +fn clear_current_framebuffer(bits: c.C3D_ClearBits) void { + const fb = c.C3D_GetFrameBuf() orelse return; + c.C3D_FrameBufClear(fb, bits, clear_color, 0); +} + fn apply_render_state() void { set_alpha_blend(alpha_blend_enabled); set_depth_write(depth_write_enabled); @@ -680,49 +659,169 @@ fn apply_render_state() void { } } -fn init_texenvs() void { - texenv_modulate(0); - var i: c_int = 1; - while (i < 6) : (i += 1) texenv_replace_previous(i); +fn bind_program(pl: *PipelineData) void { + c.C3D_BindProgram(&pl.program); } -fn texenv_modulate(id: c_int) void { - const env = c.C3D_GetTexEnv(id) orelse return; - const mirror: *TexEnvMirror = @ptrCast(@alignCast(env)); - const sources: u16 = @intCast(c.GPU_TEVSOURCES(c.GPU_TEXTURE0, c.GPU_PRIMARY_COLOR, 0)); - mirror.* = .{ - .src_rgb = sources, - .src_alpha = sources, - .op_all = 0, - .func_rgb = @intCast(c.GPU_MODULATE), - .func_alpha = @intCast(c.GPU_MODULATE), - .color = 0xffffffff, - .scale_rgb = @intCast(c.GPU_TEVSCALE_1), - .scale_alpha = @intCast(c.GPU_TEVSCALE_1), +fn bind_texenv_direct() void { + if (bound_texture == 0) { + bind_texenv_stage_direct(0, c.GPU_TEVSOURCES(c.GPU_PRIMARY_COLOR, 0, 0), c.GPU_REPLACE); + } else { + bind_texenv_stage_direct(0, c.GPU_TEVSOURCES(c.GPU_TEXTURE0, c.GPU_PRIMARY_COLOR, 0), c.GPU_MODULATE); + } + + bind_texenv_stage_direct(1, c.GPU_TEVSOURCES(c.GPU_PREVIOUS, 0, 0), c.GPU_REPLACE); + bind_texenv_stage_direct(2, c.GPU_TEVSOURCES(c.GPU_PREVIOUS, 0, 0), c.GPU_REPLACE); + bind_texenv_stage_direct(3, c.GPU_TEVSOURCES(c.GPU_PREVIOUS, 0, 0), c.GPU_REPLACE); + bind_texenv_stage_direct(4, c.GPU_TEVSOURCES(c.GPU_PREVIOUS, 0, 0), c.GPU_REPLACE); + bind_texenv_stage_direct(5, c.GPU_TEVSOURCES(c.GPU_PREVIOUS, 0, 0), c.GPU_REPLACE); +} + +fn bind_texenv_stage_direct(stage: c_int, source: u32, combiner: u32) void { + const source_both = source | (source << 16); + const combiner_both = combiner | (combiner << 16); + const regs = [_]u32{ + source_both, + 0, + combiner_both, + 0xffffffff, + @as(u32, @intCast(c.GPU_TEVSCALE_1)) | (@as(u32, @intCast(c.GPU_TEVSCALE_1)) << 16), }; - c.C3D_DirtyTexEnv(env); -} - -fn texenv_replace_previous(id: c_int) void { - const env = c.C3D_GetTexEnv(id) orelse return; - const mirror: *TexEnvMirror = @ptrCast(@alignCast(env)); - const sources: u16 = @intCast(c.GPU_TEVSOURCES(c.GPU_PREVIOUS, 0, 0)); - mirror.* = .{ - .src_rgb = sources, - .src_alpha = sources, - .op_all = 0, - .func_rgb = @intCast(c.GPU_REPLACE), - .func_alpha = @intCast(c.GPU_REPLACE), - .color = 0xffffffff, - .scale_rgb = @intCast(c.GPU_TEVSCALE_1), - .scale_alpha = @intCast(c.GPU_TEVSCALE_1), + gpu_cmd_add_incremental_writes(texenv_source_reg(stage), regs[0..]); +} + +fn texenv_source_reg(stage: c_int) c_int { + return switch (stage) { + 0 => c.GPUREG_TEXENV0_SOURCE, + 1 => c.GPUREG_TEXENV1_SOURCE, + 2 => c.GPUREG_TEXENV2_SOURCE, + 3 => c.GPUREG_TEXENV3_SOURCE, + 4 => c.GPUREG_TEXENV4_SOURCE, + 5 => c.GPUREG_TEXENV5_SOURCE, + else => unreachable, }; - c.C3D_DirtyTexEnv(env); } -fn bind_vertex_state(pl: *PipelineData) void { - c.C3D_BindProgram(&pl.program); - c.C3D_SetAttrInfo(&pl.attr_info); +fn bind_vertex_layout_direct(pl: *const PipelineData) void { + gpu_cmd_add_write(c.GPUREG_ATTRIBBUFFERS_LOC, BUFFER_BASE_PADDR >> 3); + gpu_cmd_add_incremental_writes(c.GPUREG_ATTRIBBUFFERS_FORMAT_LOW, pl.attr_info.flags[0..]); + gpu_cmd_add_write(c.GPUREG_VERTEX_OFFSET, 0); + gpu_cmd_add_write(c.GPUREG_ATTRIBBUFFER0_CONFIG1, @intCast(VERTEX_BUFFER_PERMUTATION)); + gpu_cmd_add_write(c.GPUREG_ATTRIBBUFFER0_CONFIG2, vertex_buffer_format()); + gpu_cmd_add_write(c.GPUREG_VSH_ATTRIBUTES_PERMUTATION_LOW, @intCast(VERTEX_BUFFER_PERMUTATION)); + gpu_cmd_add_write(c.GPUREG_VSH_ATTRIBUTES_PERMUTATION_HIGH, 0); + set_vsh_input_count_direct(VERTEX_ATTR_COUNT); +} + +fn bind_vertex_buffer_direct(data: [*]align(16) const u8) bool { + const phys = c.osConvertVirtToPhys(data); + if (phys < BUFFER_BASE_PADDR) return false; + gpu_cmd_add_write(c.GPUREG_ATTRIBBUFFER0_OFFSET, phys - BUFFER_BASE_PADDR); + return true; +} + +fn draw_elements_direct(count: usize) bool { + const indices = ensure_index_buffer(count) catch return false; + const phys = c.osConvertVirtToPhys(indices.ptr); + if (phys < BUFFER_BASE_PADDR) return false; + + gpu_cmd_add_write(c.GPUREG_INDEXBUFFER_CONFIG, (phys - BUFFER_BASE_PADDR) | (1 << 31)); + gpu_cmd_add_masked_write(c.GPUREG_PRIMITIVE_CONFIG, 2, @intCast(c.GPU_GEOMETRY_PRIM)); + gpu_cmd_add_write(c.GPUREG_RESTART_PRIMITIVE, 1); + gpu_cmd_add_write(c.GPUREG_NUMVERTICES, @intCast(count)); + gpu_cmd_add_write(c.GPUREG_VERTEX_OFFSET, 0); + gpu_cmd_add_masked_write(c.GPUREG_GEOSTAGE_CONFIG, 2, 0x100); + gpu_cmd_add_masked_write(c.GPUREG_GEOSTAGE_CONFIG2, 2, 0x100); + gpu_cmd_add_masked_write(c.GPUREG_START_DRAW_FUNC0, 1, 0); + gpu_cmd_add_write(c.GPUREG_DRAWELEMENTS, 1); + gpu_cmd_add_masked_write(c.GPUREG_START_DRAW_FUNC0, 1, 1); + gpu_cmd_add_masked_write(c.GPUREG_GEOSTAGE_CONFIG, 2, 0); + gpu_cmd_add_masked_write(c.GPUREG_GEOSTAGE_CONFIG2, 2, 0); + gpu_cmd_add_write(c.GPUREG_VTX_FUNC, 1); + gpu_cmd_add_masked_write(c.GPUREG_PRIMITIVE_CONFIG, 0x8, 0); + gpu_cmd_add_masked_write(c.GPUREG_PRIMITIVE_CONFIG, 0x8, 0); + return true; +} + +fn draw_arrays_direct(first: u32, count: u32) void { + gpu_cmd_add_masked_write(c.GPUREG_PRIMITIVE_CONFIG, 2, @intCast(c.GPU_TRIANGLES)); + gpu_cmd_add_write(c.GPUREG_RESTART_PRIMITIVE, 1); + gpu_cmd_add_write(c.GPUREG_INDEXBUFFER_CONFIG, 0x80000000); + gpu_cmd_add_write(c.GPUREG_NUMVERTICES, count); + gpu_cmd_add_write(c.GPUREG_VERTEX_OFFSET, first); + gpu_cmd_add_masked_write(c.GPUREG_GEOSTAGE_CONFIG2, 1, 1); + gpu_cmd_add_masked_write(c.GPUREG_START_DRAW_FUNC0, 1, 0); + gpu_cmd_add_write(c.GPUREG_DRAWARRAYS, 1); + gpu_cmd_add_masked_write(c.GPUREG_START_DRAW_FUNC0, 1, 1); + gpu_cmd_add_masked_write(c.GPUREG_GEOSTAGE_CONFIG2, 1, 0); + gpu_cmd_add_write(c.GPUREG_VTX_FUNC, 1); +} + +fn ensure_index_buffer(count: usize) ![]align(CACHE_LINE_SIZE) u16 { + if (count == 0 or count > std.math.maxInt(u16) + 1) return error.UnsupportedIndexCount; + if (sequential_indices) |indices| { + if (indices.len >= count) return indices[0..count]; + render_alloc.free(indices); + sequential_indices = null; + } + + const indices = try render_alloc.alignedAlloc(u16, .fromByteUnits(CACHE_LINE_SIZE), count); + const bytes: [*]const u8 = @ptrCast(indices.ptr); + if (!is_linear_fcram(bytes, indices.len * @sizeOf(u16))) { + render_alloc.free(indices); + return error.IndexBufferNotLinear; + } + + for (indices, 0..) |*idx, i| idx.* = @intCast(i); + flush_data_cache(bytes, indices.len * @sizeOf(u16)); + sequential_indices = indices; + return indices; +} + +fn free_index_buffer() void { + if (sequential_indices) |indices| { + render_alloc.free(indices); + sequential_indices = null; + } +} + +fn finish_frame_direct() void { + gpu_cmd_add_write(c.GPUREG_FRAMEBUFFER_FLUSH, 1); + gpu_cmd_add_write(c.GPUREG_FRAMEBUFFER_INVALIDATE, 1); + gpu_cmd_add_write(c.GPUREG_EARLYDEPTH_CLEAR, 1); +} + +fn vertex_buffer_format() u32 { + return (@as(u32, @intCast(VERTEX_STRIDE)) << 16) | + (@as(u32, @intCast(VERTEX_ATTR_COUNT)) << 28); +} + +fn set_vsh_input_count_direct(count: c_int) void { + const value = @as(u32, @intCast(count - 1)); + gpu_cmd_add_masked_write(c.GPUREG_VSH_INPUTBUFFER_CONFIG, 0xB, SH_MODE_VSH | value); + gpu_cmd_add_write(c.GPUREG_VSH_NUM_ATTR, value); +} + +fn gpu_cmd_add_write(reg: c_int, value: u32) void { + gpu_cmd_add_masked_write(reg, 0xF, value); +} + +fn gpu_cmd_add_masked_write(reg: c_int, mask: u32, value: u32) void { + var param = value; + c.GPUCMD_Add(gpu_cmd_header(false, mask, reg), ¶m, 1); +} + +fn gpu_cmd_add_writes(reg: c_int, values: []const u32) void { + c.GPUCMD_Add(gpu_cmd_header(false, 0xF, reg), values.ptr, @intCast(values.len)); +} + +fn gpu_cmd_add_incremental_writes(reg: c_int, values: []const u32) void { + c.GPUCMD_Add(gpu_cmd_header(true, 0xF, reg), values.ptr, @intCast(values.len)); +} + +fn gpu_cmd_header(incremental: bool, mask: u32, reg: c_int) u32 { + const inc: u32 = if (incremental) 1 else 0; + return (inc << 31) | ((mask & 0xF) << 16) | (@as(u32, @intCast(reg)) & 0x3FF); } fn init_projection_transform() void { @@ -761,16 +860,18 @@ fn upload_draw_uniforms(pl: *PipelineData, model: *const Mat4) void { fn upload_matrix(location: c_int, matrix: *const c.C3D_Mtx) void { const idx = uniform_location(location, 4) orelse return; - inline for (0..4) |i| { - c.C3D_FVUnif[VERTEX_SHADER_INDEX][idx + i] = matrix.r[i]; - c.C3D_FVUnifDirty[VERTEX_SHADER_INDEX][idx + i] = true; - } + const words: [*]const u32 = @ptrCast(matrix); + gpu_cmd_add_write(c.GPUREG_VSH_FLOATUNIFORM_CONFIG, @as(u32, @intCast(idx)) | FLOAT_UNIFORM_UPLOAD_F32); + gpu_cmd_add_writes(c.GPUREG_VSH_FLOATUNIFORM_DATA, words[0..16]); } fn upload_vec4(location: c_int, values: [4]f32) void { const idx = uniform_location(location, 1) orelse return; - set_fvec(&c.C3D_FVUnif[VERTEX_SHADER_INDEX][idx], values[0], values[1], values[2], values[3]); - c.C3D_FVUnifDirty[VERTEX_SHADER_INDEX][idx] = true; + var vec: c.C3D_FVec = undefined; + set_fvec(&vec, values[0], values[1], values[2], values[3]); + const words: [*]const u32 = @ptrCast(&vec); + gpu_cmd_add_write(c.GPUREG_VSH_FLOATUNIFORM_CONFIG, @as(u32, @intCast(idx)) | FLOAT_UNIFORM_UPLOAD_F32); + gpu_cmd_add_writes(c.GPUREG_VSH_FLOATUNIFORM_DATA, words[0..4]); } fn uniform_location(location: c_int, count: usize) ?usize { @@ -883,7 +984,7 @@ fn texture_param() u32 { fn upload_texture_data(tex: *TextureData, data: []align(16) const u8) !void { const size = texture_size(tex.width, tex.height); - const upload = try ensure_texture_staging(tex, size); + const upload = try ensure_texture_staging(tex, size / TEX_BPP); convert_texture_data(upload, data, tex.width, tex.height); flush_texture_source(upload); @@ -891,15 +992,16 @@ fn upload_texture_data(tex: *TextureData, data: []align(16) const u8) !void { c.C3D_TexFlush(tex_ptr(tex)); } -fn ensure_texture_staging(tex: *TextureData, size: usize) ![]align(16) u8 { +fn ensure_texture_staging(tex: *TextureData, len: usize) ![]align(16) u32 { if (tex.staging) |buf| { - if (buf.len >= size) return buf[0..size]; + if (buf.len >= len) return buf[0..len]; render_alloc.free(buf); tex.staging = null; } - const staging = try render_alloc.alignedAlloc(u8, .fromByteUnits(16), size); - if (!is_linear_fcram(staging.ptr, staging.len)) { + const staging = try render_alloc.alignedAlloc(u32, .fromByteUnits(CACHE_LINE_SIZE), len); + const bytes: [*]const u8 = @ptrCast(staging.ptr); + if (!is_linear_fcram(bytes, len * TEX_BPP)) { render_alloc.free(staging); std.debug.panic("3ds_gfx: texture staging must be allocated in linear FCRAM", .{}); } @@ -915,39 +1017,54 @@ fn free_texture_staging(tex: *TextureData) void { } } -fn convert_texture_data(dst: []align(16) u8, src: []align(16) const u8, width: u32, height: u32) void { +fn convert_texture_data(dst: []align(16) u32, src: []align(16) const u8, width: u32, height: u32) void { + const factors = twiddle_factors(width, height); + var y_bits: u32 = 0; for (0..height) |y| { - const sy = height - 1 - @as(u32, @intCast(y)); + const yu: u32 = @intCast(y); + var x_bits: u32 = 0; for (0..width) |x| { const xu: u32 = @intCast(x); - const src_off = (@as(usize, sy) * width + xu) * TEX_BPP; - const dst_off = tiled_pixel_offset(xu, @intCast(y), width) * TEX_BPP; - dst[dst_off + 0] = src[src_off + 3]; - dst[dst_off + 1] = src[src_off + 2]; - dst[dst_off + 2] = src[src_off + 1]; - dst[dst_off + 3] = src[src_off + 0]; + const src_off = (@as(usize, yu) * width + xu) * TEX_BPP; + const dst_pixel = x_bits | (factors.mask_y - y_bits); + dst[@intCast(dst_pixel)] = + (@as(u32, src[src_off + 0]) << 24) | + (@as(u32, src[src_off + 1]) << 16) | + (@as(u32, src[src_off + 2]) << 8) | + @as(u32, src[src_off + 3]); + x_bits = (x_bits -% factors.mask_x) & factors.mask_x; } + y_bits = (y_bits -% factors.mask_y) & factors.mask_y; } } -fn tiled_pixel_offset(x: u32, y: u32, width: u32) usize { - const tile_x = x & ~@as(u32, 7); - const tile_y = y & ~@as(u32, 7); - const tile_base = tile_y * width + tile_x * 8; - return @intCast(tile_base + morton8(x & 7, y & 7)); -} +const TwiddleFactors = struct { + mask_x: u32, + mask_y: u32, +}; + +fn twiddle_factors(width: u32, height: u32) TwiddleFactors { + var mask_x: u32 = 0b010101; + var mask_y: u32 = 0b101010; + var w = width >> 4; + var h = height >> 4; + var shift: u5 = 6; -fn morton8(x: u32, y: u32) u32 { - return (x & 1) | - ((y & 1) << 1) | - ((x & 2) << 1) | - ((y & 2) << 2) | - ((x & 4) << 2) | - ((y & 4) << 3); + while (w > 0) : (w >>= 1) { + mask_x += @as(u32, 1) << shift; + shift += 1; + } + while (h > 0) : (h >>= 1) { + mask_y += @as(u32, 1) << shift; + shift += 1; + } + + return .{ .mask_x = mask_x, .mask_y = mask_y }; } -fn flush_texture_source(data: []align(16) u8) void { - _ = c.GSPGPU_FlushDataCache(data.ptr, @intCast(data.len)); +fn flush_texture_source(data: []align(16) u32) void { + const bytes: [*]const u8 = @ptrCast(data.ptr); + flush_data_cache(bytes, data.len * TEX_BPP); } fn texture_size(width: u32, height: u32) u32 { @@ -958,9 +1075,13 @@ fn tex_ptr(tex: *TextureData) *c.C3D_Tex { return @ptrCast(&tex.tex); } -fn target_frame_buf(t: *c.C3D_RenderTarget) *c.C3D_FrameBuf { - const mirror: *RenderTargetMirror = @ptrCast(@alignCast(t)); - return @ptrCast(&mirror.frame_buf); +fn flush_data_cache(ptr: [*]const u8, len: usize) void { + if (len == 0) return; + + const start = @intFromPtr(ptr) & ~(CACHE_LINE_SIZE - 1); + const end = std.mem.alignForward(usize, @intFromPtr(ptr) + len, CACHE_LINE_SIZE); + const flush_ptr: *const anyopaque = @ptrFromInt(start); + _ = c.GSPGPU_FlushDataCache(flush_ptr, @intCast(end - start)); } fn mesh_slot(handle: Mesh.Handle) ?*MeshData { diff --git a/src/platform/3ds/shaders/basic.v.pica b/src/platform/3ds/shaders/basic.v.pica index f10c5d8..98e2204 100644 --- a/src/platform/3ds/shaders/basic.v.pica +++ b/src/platform/3ds/shaders/basic.v.pica @@ -10,8 +10,8 @@ .out outclr color .alias inpos v0 -.alias inuv v1 -.alias inclr v2 +.alias inclr v1 +.alias inuv v2 .proc main mul r0.xyz, posScale, inpos diff --git a/src/rendering/Vertex.zig b/src/rendering/Vertex.zig index 17f57d8..f4adc5d 100644 --- a/src/rendering/Vertex.zig +++ b/src/rendering/Vertex.zig @@ -1,15 +1,15 @@ pub const Vertex = extern struct { pos: [3]i16, _pad: i16 = 0, - uv: [2]i16, color: u32, + uv: [2]i16, }; comptime { if (@sizeOf(Vertex) != 16) @compileError("Rendering.Vertex must stay 16 bytes"); if (@offsetOf(Vertex, "pos") != 0) @compileError("Rendering.Vertex.pos must stay at byte offset 0"); - if (@offsetOf(Vertex, "uv") != 8) @compileError("Rendering.Vertex.uv must stay at byte offset 8"); - if (@offsetOf(Vertex, "color") != 12) @compileError("Rendering.Vertex.color must stay at byte offset 12"); + if (@offsetOf(Vertex, "color") != 8) @compileError("Rendering.Vertex.color must stay at byte offset 8"); + if (@offsetOf(Vertex, "uv") != 12) @compileError("Rendering.Vertex.uv must stay at byte offset 12"); } pub const AttributeUsage = enum { From f77f8513d521d5c6a4ac3c70165df05752963bfd Mon Sep 17 00:00:00 2001 From: Nathan Bourgeois Date: Sun, 7 Jun 2026 01:30:06 -0400 Subject: [PATCH 37/44] Mango 3DS GFX --- LICENSE.zitrus | 9 + build.zig | 65 +- build.zig.zon | 4 + src/platform/3ds/3ds_gfx.zig | 1227 ++++--- src/platform/3ds/mango/fmt.zig | 2 + src/platform/3ds/mango/fmt/zpsh.zig | 430 +++ src/platform/3ds/mango/hardware.zig | 201 ++ src/platform/3ds/mango/hardware/cpu.zig | 194 ++ src/platform/3ds/mango/hardware/cpu/arm11.zig | 324 ++ src/platform/3ds/mango/hardware/cpu/arm9.zig | 83 + src/platform/3ds/mango/hardware/csnd.zig | 116 + src/platform/3ds/mango/hardware/dsp.zig | 90 + src/platform/3ds/mango/hardware/hid.zig | 54 + src/platform/3ds/mango/hardware/i2c.zig | 45 + src/platform/3ds/mango/hardware/lcd.zig | 92 + src/platform/3ds/mango/hardware/lgy.zig | 138 + src/platform/3ds/mango/hardware/pica.zig | 2940 +++++++++++++++++ .../3ds/mango/hardware/pica/command.zig | 544 +++ .../3ds/mango/hardware/pica/shader.zig | 70 + .../mango/hardware/pica/shader/Encoder.zig | 428 +++ .../3ds/mango/hardware/pica/shader/as.zig | 13 + .../hardware/pica/shader/as/Assembler.zig | 1468 ++++++++ .../hardware/pica/shader/as/tokenizer.zig | 414 +++ .../mango/hardware/pica/shader/encoding.zig | 915 +++++ .../mango/hardware/pica/shader/register.zig | 248 ++ .../3ds/mango/hardware/pica/shader/spirv.zig | 119 + .../hardware/pica/shader/spirv/Reader.zig | 169 + src/platform/3ds/mango/hardware/pxi.zig | 57 + src/platform/3ds/mango/mango.zig | 1664 ++++++++++ src/platform/3ds/mango/mango/Buffer.zig | 49 + .../3ds/mango/mango/CommandBuffer.zig | 1093 ++++++ src/platform/3ds/mango/mango/CommandPool.zig | 238 ++ src/platform/3ds/mango/mango/Device.zig | 677 ++++ src/platform/3ds/mango/mango/DeviceMemory.zig | 104 + .../3ds/mango/mango/GraphicsState.zig | 994 ++++++ src/platform/3ds/mango/mango/Image.zig | 141 + src/platform/3ds/mango/mango/ImageView.zig | 104 + .../3ds/mango/mango/LightLookupTable.zig | 46 + src/platform/3ds/mango/mango/QueryPool.zig | 122 + src/platform/3ds/mango/mango/Queue.zig | 620 ++++ .../3ds/mango/mango/RenderingState.zig | 790 +++++ src/platform/3ds/mango/mango/Sampler.zig | 67 + src/platform/3ds/mango/mango/Semaphore.zig | 40 + src/platform/3ds/mango/mango/Shader.zig | 116 + src/platform/3ds/mango/mango/Surface.zig | 11 + src/platform/3ds/mango/mango/Swapchain.zig | 29 + .../3ds/mango/mango/TextureCombinerState.zig | 44 + .../3ds/mango/mango/VertexInputLayout.zig | 335 ++ src/platform/3ds/mango/mango/backend.zig | 230 ++ .../3ds/mango/mango/backend/AetherCtru.zig | 483 +++ src/platform/3ds/mango/mango/debug.zig | 9 + src/platform/3ds/mango/mango/validation.zig | 160 + src/platform/3ds/mango/memory.zig | 46 + src/platform/3ds/mango/tools/psm_to_zpsh.zig | 158 + src/platform/3ds/mango/zitrus.zig | 5 + src/platform/3ds/mango/zitrus_tools.zig | 2 + src/platform/3ds/shaders/basic.psm | 47 + src/util/pool_alloc.zig | 58 +- 58 files changed, 18303 insertions(+), 638 deletions(-) create mode 100644 LICENSE.zitrus create mode 100644 src/platform/3ds/mango/fmt.zig create mode 100644 src/platform/3ds/mango/fmt/zpsh.zig create mode 100644 src/platform/3ds/mango/hardware.zig create mode 100644 src/platform/3ds/mango/hardware/cpu.zig create mode 100644 src/platform/3ds/mango/hardware/cpu/arm11.zig create mode 100644 src/platform/3ds/mango/hardware/cpu/arm9.zig create mode 100644 src/platform/3ds/mango/hardware/csnd.zig create mode 100644 src/platform/3ds/mango/hardware/dsp.zig create mode 100644 src/platform/3ds/mango/hardware/hid.zig create mode 100644 src/platform/3ds/mango/hardware/i2c.zig create mode 100644 src/platform/3ds/mango/hardware/lcd.zig create mode 100644 src/platform/3ds/mango/hardware/lgy.zig create mode 100644 src/platform/3ds/mango/hardware/pica.zig create mode 100644 src/platform/3ds/mango/hardware/pica/command.zig create mode 100644 src/platform/3ds/mango/hardware/pica/shader.zig create mode 100644 src/platform/3ds/mango/hardware/pica/shader/Encoder.zig create mode 100644 src/platform/3ds/mango/hardware/pica/shader/as.zig create mode 100644 src/platform/3ds/mango/hardware/pica/shader/as/Assembler.zig create mode 100644 src/platform/3ds/mango/hardware/pica/shader/as/tokenizer.zig create mode 100644 src/platform/3ds/mango/hardware/pica/shader/encoding.zig create mode 100644 src/platform/3ds/mango/hardware/pica/shader/register.zig create mode 100644 src/platform/3ds/mango/hardware/pica/shader/spirv.zig create mode 100644 src/platform/3ds/mango/hardware/pica/shader/spirv/Reader.zig create mode 100644 src/platform/3ds/mango/hardware/pxi.zig create mode 100644 src/platform/3ds/mango/mango.zig create mode 100644 src/platform/3ds/mango/mango/Buffer.zig create mode 100644 src/platform/3ds/mango/mango/CommandBuffer.zig create mode 100644 src/platform/3ds/mango/mango/CommandPool.zig create mode 100644 src/platform/3ds/mango/mango/Device.zig create mode 100644 src/platform/3ds/mango/mango/DeviceMemory.zig create mode 100644 src/platform/3ds/mango/mango/GraphicsState.zig create mode 100644 src/platform/3ds/mango/mango/Image.zig create mode 100644 src/platform/3ds/mango/mango/ImageView.zig create mode 100644 src/platform/3ds/mango/mango/LightLookupTable.zig create mode 100644 src/platform/3ds/mango/mango/QueryPool.zig create mode 100644 src/platform/3ds/mango/mango/Queue.zig create mode 100644 src/platform/3ds/mango/mango/RenderingState.zig create mode 100644 src/platform/3ds/mango/mango/Sampler.zig create mode 100644 src/platform/3ds/mango/mango/Semaphore.zig create mode 100644 src/platform/3ds/mango/mango/Shader.zig create mode 100644 src/platform/3ds/mango/mango/Surface.zig create mode 100644 src/platform/3ds/mango/mango/Swapchain.zig create mode 100644 src/platform/3ds/mango/mango/TextureCombinerState.zig create mode 100644 src/platform/3ds/mango/mango/VertexInputLayout.zig create mode 100644 src/platform/3ds/mango/mango/backend.zig create mode 100644 src/platform/3ds/mango/mango/backend/AetherCtru.zig create mode 100644 src/platform/3ds/mango/mango/debug.zig create mode 100644 src/platform/3ds/mango/mango/validation.zig create mode 100644 src/platform/3ds/mango/memory.zig create mode 100644 src/platform/3ds/mango/tools/psm_to_zpsh.zig create mode 100644 src/platform/3ds/mango/zitrus.zig create mode 100644 src/platform/3ds/mango/zitrus_tools.zig create mode 100644 src/platform/3ds/shaders/basic.psm diff --git a/LICENSE.zitrus b/LICENSE.zitrus new file mode 100644 index 0000000..f18b392 --- /dev/null +++ b/LICENSE.zitrus @@ -0,0 +1,9 @@ +This project's 3DS backend uses components from Zitrus: + +Copyright © 2025 GasInfinity + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/build.zig b/build.zig index 86383b3..49b4cb6 100644 --- a/build.zig +++ b/build.zig @@ -193,7 +193,7 @@ fn addNintendoCImportPaths(owner: *std.Build, mod: *std.Build.Module, config: Co switch (config.platform) { .nintendo_3ds => { // Keep newlib before libctru so libctru's include_next sys wrappers - // resolve during Zig's C translation of Citro3D/libctru headers. + // resolve during Zig's C translation of SDK headers. // // Zig's 3DS C import can otherwise see newlib's fortified unistd // wrappers and emit references to __ssp_real_* symbols. devkitARM is @@ -362,6 +362,9 @@ pub fn addGame(owner: *std.Build, b: *std.Build, opts: GameOptions) *std.Build.S if (uses_nintendo_c_io) { addNintendoCImportPaths(owner, mod, config, devkitProPath(b)); } + if (config.platform == .nintendo_3ds and config.gfx == .default) { + add3dsMangoImport(owner, b, mod, target, opts.optimize); + } addInternalShaderModule(owner, b, mod, config); @@ -576,6 +579,56 @@ fn addPicassoStep(b: *std.Build, picasso: []const u8, comptime output_name: []co return output; } +fn add3dsMangoImport(owner: *std.Build, b: *std.Build, mod: *std.Build.Module, target: std.Build.ResolvedTarget, optimize: std.builtin.OptimizeMode) void { + const zsflt = owner.dependency("zsflt", .{}).module("zsflt"); + const dkp = devkitProPath(owner); + const zitrus_mod = b.createModule(.{ + .root_source_file = owner.path("src/platform/3ds/mango/zitrus.zig"), + .target = target, + .optimize = optimize, + .imports = &.{ + .{ .name = "zsflt", .module = zsflt }, + }, + }); + zitrus_mod.addImport("zitrus", zitrus_mod); + zitrus_mod.addIncludePath(owner.path("src/platform")); + zitrus_mod.addCMacro("_FORTIFY_SOURCE", "0"); + zitrus_mod.addIncludePath(.{ .cwd_relative = b.pathJoin(&.{ dkp, "devkitARM/arm-none-eabi/include" }) }); + zitrus_mod.addIncludePath(.{ .cwd_relative = b.pathJoin(&.{ dkp, "libctru/include" }) }); + mod.addImport("zitrus", zitrus_mod); +} + +fn addMangoPsmStep(owner: *std.Build, b: *std.Build, comptime output_name: []const u8, input: std.Build.LazyPath) std.Build.LazyPath { + const zsflt = owner.dependency("zsflt", .{}).module("zsflt"); + const host_target = b.resolveTargetQuery(.{}); + const zitrus_mod = b.createModule(.{ + .root_source_file = owner.path("src/platform/3ds/mango/zitrus_tools.zig"), + .target = host_target, + .optimize = .Debug, + .imports = &.{ + .{ .name = "zsflt", .module = zsflt }, + }, + }); + zitrus_mod.addImport("zitrus", zitrus_mod); + + const tool = b.addExecutable(.{ + .name = "aether-psm-to-zpsh", + .root_module = b.createModule(.{ + .root_source_file = owner.path("src/platform/3ds/mango/tools/psm_to_zpsh.zig"), + .target = host_target, + .optimize = .Debug, + .imports = &.{ + .{ .name = "zitrus", .module = zitrus_mod }, + }, + }), + }); + + const run = b.addRunArtifact(tool); + run.setName("assemble 3ds mango shader"); + run.addFileArg(input); + return run.addOutputFileArg(output_name); +} + fn addInternalShaderModule(owner: *std.Build, b: *std.Build, mod: *std.Build.Module, config: Config) void { const stages = internalShaderStages(owner, b, config) orelse return; @@ -595,13 +648,7 @@ fn addInternalShaderModule(owner: *std.Build, b: *std.Build, mod: *std.Build.Mod fn internalShaderStages(owner: *std.Build, b: *std.Build, config: Config) ?ShaderStagePaths { if (config.platform == .nintendo_3ds and config.gfx == .default) { - const picasso = b.pathJoin(&.{ devkitProPath(b), "tools/bin/picasso" }); - const vert = addPicassoStep( - b, - picasso, - "basic.shbin", - owner.path("src/platform/3ds/shaders/basic.v.pica"), - ); + const vert = addMangoPsmStep(owner, b, "basic.psh", owner.path("src/platform/3ds/shaders/basic.psm")); const files = b.addWriteFiles(); const frag = files.add("basic.frag.stub", ""); return .{ .vert = vert, .frag = frag }; @@ -1205,7 +1252,7 @@ fn threedsxPipeline(b: *std.Build, exe: *std.Build.Step.Compile, opts: ExportOpt link.addArg("none"); link.addFileArg(crt_clean); link.addArg(b.fmt("-L{s}", .{ctru_lib})); - link.addArgs(&.{ "-lcitro3d", "-lctru", "-lm" }); + link.addArgs(&.{ "-lctru", "-lm" }); link.addArg("-o"); const elf = link.addOutputFileArg(b.fmt("{s}.elf", .{exe.name})); diff --git a/build.zig.zon b/build.zig.zon index 74faae2..5c1b89c 100644 --- a/build.zig.zon +++ b/build.zig.zon @@ -86,6 +86,10 @@ .hash = "sdl3-0.2.0-NmT1Q0mFJwBi9kZmArzh2rfJ_mFshydV0zPGULVlpACc", .lazy = true, }, + .zsflt = .{ + .url = "git+https://github.com/GasInfinity/zsflt.git#1929cbae9d41c7e1178e494e42aa7b349a68cd8a", + .hash = "zsflt-0.0.1-_pCibQ4wAABnfZskVOk2Y14F056VNJtuqVpAEzUNBGfl", + }, }, .paths = .{ "build.zig", diff --git a/src/platform/3ds/3ds_gfx.zig b/src/platform/3ds/3ds_gfx.zig index e490d81..fe2fb8e 100644 --- a/src/platform/3ds/3ds_gfx.zig +++ b/src/platform/3ds/3ds_gfx.zig @@ -1,4 +1,4 @@ -//! Citro3D graphics backend for Nintendo 3DS. +//! Mango/libctru graphics backend for Nintendo 3DS. const std = @import("std"); const Util = @import("../../util/util.zig"); @@ -8,6 +8,9 @@ const Mesh = Rendering.mesh; const Texture = Rendering.Texture; const surface = @import("surface.zig"); const shaders = @import("aether_shaders"); +const zitrus = @import("zitrus"); +const mango = zitrus.mango; +const pica = zitrus.hardware.pica; const c = @cImport({ @cDefine("wint_t", "unsigned int"); @@ -19,22 +22,6 @@ const c = @cImport({ @cInclude("3ds/services/gspgpu.h"); @cInclude("3ds/gfx.h"); @cInclude("3ds/allocator/vram.h"); - @cInclude("3ds/gpu/shbin.h"); - @cInclude("3ds/gpu/shaderProgram.h"); - @cUndef("__3DS__"); - @cUndef("_3DS"); - @cInclude("c3d/types.h"); - @cInclude("c3d/maths.h"); - @cInclude("c3d/uniforms.h"); - @cInclude("c3d/attribs.h"); - @cInclude("c3d/buffers.h"); - @cInclude("c3d/base.h"); - @cInclude("c3d/texenv.h"); - @cInclude("c3d/effect.h"); - @cInclude("c3d/texture.h"); - @cInclude("c3d/fog.h"); - @cInclude("c3d/framebuffer.h"); - @cInclude("c3d/renderqueue.h"); }); var render_alloc: std.mem.Allocator = undefined; @@ -47,11 +34,10 @@ pub fn setup(alloc: std.mem.Allocator, io: std.Io) void { const SCREEN_WIDTH: u32 = 400; const SCREEN_HEIGHT: u32 = 240; -const TARGET_WIDTH: c_int = 240; -const TARGET_HEIGHT: c_int = 400; +const TARGET_WIDTH: u16 = 240; +const TARGET_HEIGHT: u16 = 400; const MESH_SLOT_COUNT: usize = 2; const MAX_DEFERRED_MESH_FREES: usize = 4096; -const C3D_CMD_BUFFER_SIZE: usize = 1024 * 1024; const MAX_TEXTURE_SIZE: u32 = 1024; const MIN_TEXTURE_SIZE: u32 = 8; const TEX_BPP: usize = 4; @@ -60,9 +46,6 @@ const OS_FCRAM_VADDR: usize = 0x30000000; const OS_FCRAM_SIZE: usize = 0x10000000; const OS_OLD_FCRAM_VADDR: usize = 0x14000000; const OS_OLD_FCRAM_SIZE: usize = 0x08000000; -const BUFFER_BASE_PADDR: u32 = 0x18000000; -const SH_MODE_VSH: u32 = 0xA0000000; -const FLOAT_UNIFORM_UPLOAD_F32: u32 = 0x80000000; const DISPLAY_TRANSFER_FLAGS: u32 = @intCast( c.GX_TRANSFER_FLIP_VERT(0) | @@ -73,19 +56,15 @@ const DISPLAY_TRANSFER_FLAGS: u32 = @intCast( c.GX_TRANSFER_SCALING(c.GX_TRANSFER_SCALE_NO), ); -const VERTEX_SHADER_INDEX: usize = 0; +fn gx_buffer_dim(width: u16, height: u16) u32 { + return (@as(u32, height) << 16) | @as(u32, width); +} + const VERTEX_STRIDE: usize = @sizeOf(Rendering.Vertex); -const VERTEX_ATTR_COUNT: c_int = 3; -const VERTEX_BUFFER_PERMUTATION: u64 = 0x210; // buffer order pos,color,uv -> shader v0,v1,v2 -const VERTEX_POSITION_REG: c_int = 0; -const VERTEX_COLOR_REG: c_int = 1; -const VERTEX_UV_REG: c_int = 2; const POS_SCALE: [4]f32 = .{ snorm16_scale(), snorm16_scale(), snorm16_scale(), 1.0 }; const UV_SCALE: [2]f32 = .{ snorm16_scale(), snorm16_scale() }; const COLOR_SCALE: [4]f32 = .{ unorm8_scale(), unorm8_scale(), unorm8_scale(), unorm8_scale() }; -extern fn C3Di_UpdateContext() void; - comptime { std.debug.assert(VERTEX_STRIDE == 16); std.debug.assert(@offsetOf(Rendering.Vertex, "pos") == 0); @@ -94,14 +73,19 @@ comptime { } const PipelineData = struct { - dvlb: [*c]c.DVLB_s, - program: c.shaderProgram_s, - attr_info: c.C3D_AttrInfo, - u_projection: c_int, - u_model_view: c_int, - u_pos_scale: c_int, - u_uv_scale_offset: c_int, - u_color_scale: c_int, + shader: mango.Shader, + vertex_input: mango.VertexInputLayout, + sampler: mango.Sampler, +}; + +const RenderTargetData = struct { + color_memory: mango.DeviceMemory, + depth_memory: mango.DeviceMemory, + color_image: mango.Image, + depth_image: mango.Image, + color_view: mango.ImageView, + depth_view: mango.ImageView, + color_pixels: []u8, }; const MeshData = struct { @@ -111,92 +95,97 @@ const MeshData = struct { }; const MeshSlot = struct { - data: ?[]align(16) u8 = null, + memory: mango.DeviceMemory = .null, + buffer: mango.Buffer = .null, + mapped: []u8 = &.{}, len: usize = 0, + capacity: usize = 0, in_flight: bool = false, used_this_frame: bool = false, }; const DeferredMeshFree = struct { - data: []align(16) u8, + memory: mango.DeviceMemory, + buffer: mango.Buffer, }; -const TexMirror = extern struct { - data: ?*anyopaque, - fmt_size: u32, - dim: u32, - param: u32, - border: u32, - lod_param: u32, -}; - -comptime { - std.debug.assert(@sizeOf(TexMirror) == 24); -} - const TextureData = struct { width: u32, height: u32, - tex: TexMirror, - staging: ?[]align(16) u32 = null, + memory: mango.DeviceMemory, + image: mango.Image, + view: mango.ImageView, +}; + +const FogState = struct { + enabled: bool = false, + start: f32 = 0.0, + end: f32 = 1.0, + color: [4]u8 = .{ 0, 0, 0, 255 }, + table: [128]u32 = @splat(0), }; var meshes = Util.CircularBuffer(MeshData, 2048).init(); var deferred_mesh_frees = Util.CircularBuffer(DeferredMeshFree, MAX_DEFERRED_MESH_FREES + 1).init(); var textures = Util.CircularBuffer(TextureData, 64).init(); -var sequential_indices: ?[]align(CACHE_LINE_SIZE) u16 = null; + +var device: mango.Device = .null; +var submit_queue: mango.Queue = .null; +var fill_queue: mango.Queue = .null; +var command_pool: mango.CommandPool = .null; +var command_buffer: mango.CommandBuffer = .null; var render_pipeline: PipelineData = undefined; +var render_target: RenderTargetData = undefined; var render_pipeline_initialized = false; +var render_target_initialized = false; +var command_resources_initialized = false; -var target: ?*c.C3D_RenderTarget = null; -var projection_transform: c.C3D_Mtx = undefined; -var fog_lut: c.C3D_FogLut = undefined; +var projection_transform: Mat4 = Mat4.identity(); var initialized = false; var frame_started = false; var vsync_enabled = true; -var clear_color: u32 = 0x000000ff; +var clear_color: [4]u8 = .{ 0, 0, 0, 255 }; var alpha_blend_enabled = true; var depth_write_enabled = true; var cull_face_enabled = true; -var fog_enabled = false; +var fog_state: FogState = .{}; var uv_offset: [2]f32 = .{ 0.0, 0.0 }; var proj_matrix: Mat4 = Mat4.identity(); var view_matrix: Mat4 = Mat4.identity(); +var default_texture: Texture.Handle = 0; var bound_texture: Texture.Handle = 0; pub fn init() anyerror!void { - _ = render_alloc; _ = render_io; c.gfxInitDefault(); - if (!c.C3D_Init(C3D_CMD_BUFFER_SIZE)) { - c.gfxExit(); - return error.C3DInitFailed; - } - errdefer { - c.C3D_Fini(); - c.gfxExit(); - } + errdefer c.gfxExit(); - target = c.C3D_RenderTargetCreate( - TARGET_WIDTH, - TARGET_HEIGHT, - c.GPU_RB_RGBA8, - c.C3D_DEPTHTYPE{ .__e = c.GPU_RB_DEPTH24_STENCIL8 }, - ) orelse return error.C3DRenderTargetCreateFailed; + device = try mango.createAetherCtruBackedDevice(.{ .linear_gpa = render_alloc }, render_alloc); errdefer { - c.C3D_RenderTargetDelete(target); - target = null; + device.destroy(); + device = .null; } - c.C3D_RenderTargetSetOutput(target, c.GFX_TOP, c.GFX_LEFT, DISPLAY_TRANSFER_FLAGS); - init_projection_transform(); + submit_queue = device.getQueue(.submit); + fill_queue = device.getQueue(.fill); + + try init_command_resources(); + errdefer deinit_command_resources(); + + try init_render_target(); + errdefer deinit_render_target(); + render_pipeline = try init_pipeline(); render_pipeline_initialized = true; + errdefer { + deinit_pipeline(&render_pipeline); + render_pipeline_initialized = false; + } + init_projection_transform(); initialized = true; frame_started = false; - apply_render_state(); } pub fn deinit() void { @@ -206,15 +195,12 @@ pub fn deinit() void { } frame_started = false; - if (initialized) c.C3D_FrameSync(); + if (initialized and device != .null) device.waitIdle(); release_completed_mesh_slots(); free_deferred_mesh_slots(); for (1..textures.buffer.len) |i| { - if (textures.buffer[i]) |*tex| { - c.C3D_TexDelete(tex_ptr(tex)); - free_texture_staging(tex); - } + if (textures.buffer[i]) |*tex| free_texture(tex); } textures.clear(); @@ -224,83 +210,56 @@ pub fn deinit() void { } for (1..meshes.buffer.len) |i| { - if (meshes.buffer[i]) |*mesh| { - free_mesh_slots(mesh); - } + if (meshes.buffer[i]) |*mesh| free_mesh_slots(mesh); } meshes.clear(); - free_index_buffer(); - if (target) |t| { - c.C3D_RenderTargetDelete(t); - target = null; + deinit_render_target(); + deinit_command_resources(); + + if (device != .null) { + device.destroy(); + device = .null; } if (initialized) { - c.C3D_Fini(); c.gfxExit(); initialized = false; } } pub fn set_clear_color(r: f32, g: f32, b: f32, a: f32) void { - clear_color = pack_color_rgba(r, g, b, a); + clear_color = .{ + float_to_u8(r), + float_to_u8(g), + float_to_u8(b), + float_to_u8(a), + }; } pub fn set_alpha_blend(enabled: bool) void { alpha_blend_enabled = enabled; - if (!initialized) return; - - if (enabled) { - c.C3D_AlphaBlend( - c.GPU_BLEND_ADD, - c.GPU_BLEND_ADD, - c.GPU_SRC_ALPHA, - c.GPU_ONE_MINUS_SRC_ALPHA, - c.GPU_ONE, - c.GPU_ONE_MINUS_SRC_ALPHA, - ); - } else { - c.C3D_AlphaBlend( - c.GPU_BLEND_ADD, - c.GPU_BLEND_ADD, - c.GPU_ONE, - c.GPU_ZERO, - c.GPU_ONE, - c.GPU_ZERO, - ); - } + if (frame_started) apply_dynamic_state(); } pub fn set_depth_write(enabled: bool) void { depth_write_enabled = enabled; - if (!initialized) return; - c.C3D_DepthTest(true, c.GPU_GEQUAL, if (enabled) c.GPU_WRITE_ALL else c.GPU_WRITE_COLOR); + if (frame_started) apply_dynamic_state(); } pub fn set_fog(enabled: bool, start: f32, end: f32, r: f32, g: f32, b: f32) void { - fog_enabled = enabled; - if (!initialized) return; - - if (!enabled) { - c.C3D_FogGasMode(c.GPU_NO_FOG, c.GPU_PLAIN_DENSITY, false); - return; - } - - const safe_end = if (end <= start) start + 0.001 else end; - const density = 1.0 / @max(0.001, safe_end - start); - c.FogLut_Exp(&fog_lut, density, 1.0, start, safe_end); - c.C3D_FogGasMode(c.GPU_FOG, c.GPU_PLAIN_DENSITY, false); - c.C3D_FogColor(pack_color_rgba(r, g, b, 1.0)); - c.C3D_FogLutBind(&fog_lut); + fog_state.enabled = enabled; + fog_state.start = start; + fog_state.end = end; + fog_state.color = .{ float_to_u8(r), float_to_u8(g), float_to_u8(b), 255 }; + rebuild_fog_table(); } pub fn set_clip_planes(_: bool) void {} pub fn set_culling(enabled: bool) void { cull_face_enabled = enabled; - if (!initialized) return; - c.C3D_CullFace(if (enabled) c.GPU_CULL_BACK_CCW else c.GPU_CULL_NONE); + if (frame_started) apply_dynamic_state(); } pub fn set_uv_offset(u: f32, v: f32) void { @@ -317,24 +276,20 @@ pub fn set_view_matrix(mat: *const Mat4) void { pub fn start_frame() bool { if (surface.is_system_closing()) return false; + if (!initialized or frame_started) return false; - const t = target orelse return false; - const flags: u8 = @intCast(if (vsync_enabled) c.C3D_FRAME_SYNCDRAW else c.C3D_FRAME_NONBLOCK); - - if (!c.C3D_FrameBegin(flags)) return false; release_completed_mesh_slots(); free_deferred_mesh_slots(); - if (!c.C3D_FrameDrawOn(t)) { - c.C3D_FrameEnd(0); - return false; - } - frame_started = true; - clear_current_framebuffer(c.C3D_CLEAR_ALL); - c.C3D_SetViewport(0, 0, TARGET_WIDTH, TARGET_HEIGHT); - apply_render_state(); - rebind_texture(); + clear_frame_targets() catch { + frame_started = false; + return false; + }; + begin_command_buffer() catch { + frame_started = false; + return false; + }; return true; } @@ -344,55 +299,35 @@ pub fn end_frame() void { frame_started = false; return; } + mark_current_frame_mesh_slots_in_flight(); - finish_frame_direct(); - c.C3D_FrameEnd(0); + finish_command_buffer() catch { + frame_started = false; + return; + }; + present_render_target(); frame_started = false; } pub fn clear_depth() void { if (!frame_started) return; - clear_current_framebuffer(c.C3D_CLEAR_DEPTH); -} - -pub fn set_vsync(v: bool) void { - vsync_enabled = v; -} - -fn init_pipeline() !PipelineData { - const code: [:0]align(4) const u8 = &shaders.basic_vert; - - const dvlb = c.DVLB_ParseFile(@ptrCast(@constCast(code.ptr)), @intCast(code.len)); - if (dvlb == null or dvlb[0].numDVLE == 0) return error.InvalidShader; - errdefer c.DVLB_Free(dvlb); - - var program: c.shaderProgram_s = undefined; - if (c.shaderProgramInit(&program) != 0) return error.InvalidShader; - errdefer _ = c.shaderProgramFree(&program); - if (c.shaderProgramSetVsh(&program, &dvlb[0].DVLE[0]) != 0) return error.InvalidShader; - - var attr_info: c.C3D_AttrInfo = undefined; - c.AttrInfo_Init(&attr_info); - if (c.AttrInfo_AddLoader(&attr_info, VERTEX_POSITION_REG, c.GPU_SHORT, 4) < 0) return error.UnsupportedVertexLayout; - if (c.AttrInfo_AddLoader(&attr_info, VERTEX_COLOR_REG, c.GPU_UNSIGNED_BYTE, 4) < 0) return error.UnsupportedVertexLayout; - if (c.AttrInfo_AddLoader(&attr_info, VERTEX_UV_REG, c.GPU_SHORT, 2) < 0) return error.UnsupportedVertexLayout; - - return .{ - .dvlb = dvlb, - .program = program, - .attr_info = attr_info, - .u_projection = c.shaderInstanceGetUniformLocation(program.vertexShader, "projection"), - .u_model_view = c.shaderInstanceGetUniformLocation(program.vertexShader, "modelView"), - .u_pos_scale = c.shaderInstanceGetUniformLocation(program.vertexShader, "posScale"), - .u_uv_scale_offset = c.shaderInstanceGetUniformLocation(program.vertexShader, "uvScaleOffset"), - .u_color_scale = c.shaderInstanceGetUniformLocation(program.vertexShader, "colorScale"), + finish_command_buffer() catch { + frame_started = false; + return; + }; + clear_depth_target() catch { + frame_started = false; + return; + }; + begin_command_buffer() catch { + frame_started = false; + return; }; } -fn deinit_pipeline(pl: *PipelineData) void { - _ = c.shaderProgramFree(&pl.program); - c.DVLB_Free(pl.dvlb); +pub fn set_vsync(v: bool) void { + vsync_enabled = v; } pub fn create_mesh() anyerror!Mesh.Handle { @@ -435,10 +370,9 @@ pub fn update_mesh(handle: Mesh.Handle, data: []const u8) void { ensure_mesh_slot_capacity(slot, data.len) catch std.debug.panic("3ds_gfx: out of linear memory for mesh upload", .{}); - const dst = slot.data.?; - @memcpy(dst[0..data.len], data); + @memcpy(slot.mapped[0..data.len], data); slot.len = data.len; - flush_data_cache(dst.ptr, data.len); + flush_memory(slot.memory, data.len) catch {}; mesh.latest_slot = slot_idx; mesh.len = data.len; @@ -446,53 +380,36 @@ pub fn update_mesh(handle: Mesh.Handle, data: []const u8) void { pub fn draw_mesh(handle: Mesh.Handle, model: *const Mat4, count: usize) void { if (surface.is_system_closing()) return; + if (!frame_started or !render_pipeline_initialized) return; - if (!render_pipeline_initialized) return; const mesh = mesh_slot(handle) orelse return; - const pl = &render_pipeline; const slot_idx = mesh.latest_slot orelse return; const slot = &mesh.slots[slot_idx]; - const data = slot.data orelse return; - if (count == 0 or slot.len == 0) return; + if (count == 0 or slot.len == 0 or slot.buffer == .null) return; const needed = mesh_draw_bytes_needed(count) orelse return; if (needed > slot.len) return; - bind_program(pl); - rebind_texture(); - - C3Di_UpdateContext(); - upload_draw_uniforms(pl, model); - bind_texenv_direct(); - bind_vertex_layout_direct(pl); - if (!bind_vertex_buffer_direct(data.ptr)) return; + upload_draw_uniforms(model); + bind_current_texture(); + command_buffer.setAetherFog(fog_state.enabled, fog_state.color, if (fog_state.enabled) &fog_state.table else &.{}); + command_buffer.bindVertexBuffersSlice(0, &.{slot.buffer}, &.{0}); slot.used_this_frame = true; - if (!draw_elements_direct(count)) { - draw_arrays_direct(0, @intCast(count)); - } + command_buffer.draw(@intCast(@min(count, std.math.maxInt(u32))), 0); } pub fn create_texture(width: u32, height: u32, data: []align(16) u8) anyerror!Texture.Handle { try validate_texture(width, height, data); - const size = texture_size(width, height); - if (c.vramSpaceFree() < size) return error.OutOfTextureMemory; + var tex = try create_texture_resources(width, height); + errdefer free_texture(&tex); - const mem = c.vramAlloc(size) orelse return error.OutOfTextureMemory; - errdefer c.vramFree(mem); + try upload_texture_data(&tex, data[0..texture_size(width, height)]); - var tex = TextureData{ - .width = width, - .height = height, - .tex = init_tex_mirror(width, height, mem, size), - }; - errdefer free_texture_staging(&tex); - - try upload_texture_data(&tex, data[0..size]); - - const handle = textures.add_element(tex) orelse return error.OutOfTextures; - return @intCast(handle); + const handle: Texture.Handle = @intCast(textures.add_element(tex) orelse return error.OutOfTextures); + if (default_texture == 0) default_texture = handle; + return handle; } pub fn update_texture(handle: Texture.Handle, data: []align(16) u8) void { @@ -510,31 +427,368 @@ pub fn update_texture(handle: Texture.Handle, data: []align(16) u8) void { pub fn bind_texture(handle: Texture.Handle) void { if (surface.is_system_closing()) return; - bound_texture = handle; - rebind_texture(); + if (frame_started) bind_current_texture(); } pub fn destroy_texture(handle: Texture.Handle) void { if (surface.is_system_closing()) { if (bound_texture == handle) bound_texture = 0; + if (default_texture == handle) default_texture = 0; _ = textures.remove_element(handle); return; } - if (texture_slot(handle)) |tex| { - c.C3D_TexDelete(tex_ptr(tex)); - free_texture_staging(tex); - } - if (bound_texture == handle) { - bound_texture = 0; - if (initialized) c.C3D_TexBind(0, null); - } + if (texture_slot(handle)) |tex| free_texture(tex); + if (bound_texture == handle) bound_texture = 0; + if (default_texture == handle) default_texture = 0; _ = textures.remove_element(handle); } pub fn force_texture_resident(_: Texture.Handle) void {} +fn init_command_resources() !void { + command_pool = try device.createCommandPool(.no_preheat, null); + errdefer { + device.destroyCommandPool(command_pool, null); + command_pool = .null; + } + + var buffers: [1]mango.CommandBuffer = undefined; + try device.allocateCommandBuffers(.{ + .pool = command_pool, + .command_buffer_count = buffers.len, + }, &buffers); + command_buffer = buffers[0]; + command_resources_initialized = true; +} + +fn deinit_command_resources() void { + if (!command_resources_initialized or device == .null) return; + if (command_buffer != .null) { + device.freeCommandBuffers(command_pool, &.{command_buffer}); + command_buffer = .null; + } + if (command_pool != .null) { + device.destroyCommandPool(command_pool, null); + command_pool = .null; + } + command_resources_initialized = false; +} + +fn init_render_target() !void { + const color_size = mango.Format.a8b8g8r8_unorm.scale(@as(usize, TARGET_WIDTH) * TARGET_HEIGHT); + const depth_size = mango.Format.d24_unorm_s8_uint.scale(@as(usize, TARGET_WIDTH) * TARGET_HEIGHT); + + const color_memory = try device.allocateMemory(.{ + .memory_type = .vram_a, + .allocation_size = .size(@intCast(color_size)), + }, null); + errdefer device.freeMemory(color_memory, null); + + const depth_memory = try device.allocateMemory(.{ + .memory_type = .vram_b, + .allocation_size = .size(@intCast(depth_size)), + }, null); + errdefer device.freeMemory(depth_memory, null); + + const color_image = try device.createImage(.{ + .flags = .{}, + .type = .@"2d", + .tiling = .optimal, + .usage = .{ .transfer_src = true, .color_attachment = true }, + .extent = .{ .width = TARGET_WIDTH, .height = TARGET_HEIGHT }, + .format = .a8b8g8r8_unorm, + .mip_levels = .@"1", + .array_layers = .@"1", + }, null); + errdefer device.destroyImage(color_image, null); + try device.bindImageMemory(color_image, color_memory, .size(0)); + + const depth_image = try device.createImage(.{ + .flags = .{}, + .type = .@"2d", + .tiling = .optimal, + .usage = .{ .depth_stencil_attachment = true }, + .extent = .{ .width = TARGET_WIDTH, .height = TARGET_HEIGHT }, + .format = .d24_unorm_s8_uint, + .mip_levels = .@"1", + .array_layers = .@"1", + }, null); + errdefer device.destroyImage(depth_image, null); + try device.bindImageMemory(depth_image, depth_memory, .size(0)); + + const color_view = try device.createImageView(.{ + .type = .@"2d", + .format = .a8b8g8r8_unorm, + .image = color_image, + .subresource_range = .full, + }, null); + errdefer device.destroyImageView(color_view, null); + + const depth_view = try device.createImageView(.{ + .type = .@"2d", + .format = .d24_unorm_s8_uint, + .image = depth_image, + .subresource_range = .full, + }, null); + errdefer device.destroyImageView(depth_view, null); + + const color_pixels = try device.mapMemory(color_memory, .size(0), .whole); + + render_target = .{ + .color_memory = color_memory, + .depth_memory = depth_memory, + .color_image = color_image, + .depth_image = depth_image, + .color_view = color_view, + .depth_view = depth_view, + .color_pixels = color_pixels, + }; + render_target_initialized = true; +} + +fn deinit_render_target() void { + if (!render_target_initialized or device == .null) return; + device.unmapMemory(render_target.color_memory); + device.destroyImageView(render_target.depth_view, null); + device.destroyImageView(render_target.color_view, null); + device.destroyImage(render_target.depth_image, null); + device.destroyImage(render_target.color_image, null); + device.freeMemory(render_target.depth_memory, null); + device.freeMemory(render_target.color_memory, null); + render_target_initialized = false; +} + +fn init_pipeline() !PipelineData { + const code: []const u8 = &shaders.basic_vert; + const shader = try device.createShader(.init(.psh, code, "main"), null); + errdefer device.destroyShader(shader, null); + + const bindings = [_]mango.VertexInputBindingDescription{ + .{ .stride = VERTEX_STRIDE }, + }; + const attributes = [_]mango.VertexInputAttributeDescription{ + .{ .location = .v0, .binding = .@"0", .format = .r16g16b16a16_sscaled, .offset = 0 }, + .{ .location = .v1, .binding = .@"0", .format = .r8g8b8a8_uscaled, .offset = 8 }, + .{ .location = .v2, .binding = .@"0", .format = .r16g16_sscaled, .offset = 12 }, + }; + const vertex_input = try device.createVertexInputLayout(.init(&bindings, &attributes, &.{}), null); + errdefer device.destroyVertexInputLayout(vertex_input, null); + + const sampler = try device.createSampler(.{ + .mag_filter = .nearest, + .min_filter = .nearest, + .mip_filter = .nearest, + .address_mode_u = .repeat, + .address_mode_v = .repeat, + .lod_bias = 0.0, + .min_lod = 0, + .max_lod = 0, + .border_color = .{ 0, 0, 0, 0 }, + }, null); + + return .{ + .shader = shader, + .vertex_input = vertex_input, + .sampler = sampler, + }; +} + +fn deinit_pipeline(pl: *PipelineData) void { + device.destroySampler(pl.sampler, null); + device.destroyVertexInputLayout(pl.vertex_input, null); + device.destroyShader(pl.shader, null); +} + +fn clear_frame_targets() !void { + try fill_queue.clearColorImage(.{ + .image = render_target.color_image, + .color = clear_color, + .subresource_range = .full, + }); + try clear_depth_target(); +} + +fn clear_depth_target() !void { + try fill_queue.clearDepthStencilImage(.{ + .image = render_target.depth_image, + .depth = 0.0, + .stencil = 0, + .subresource_range = .full, + }); +} + +fn begin_command_buffer() !void { + try command_buffer.begin(); + command_buffer.bindShaders(&.{.vertex}, &.{render_pipeline.shader}); + command_buffer.setVertexInput(render_pipeline.vertex_input); + command_buffer.setLightingEnable(false); + command_buffer.setLightEnvironmentEnable(.{}); + command_buffer.setLogicOpEnable(false); + command_buffer.setAlphaTestEnable(false); + command_buffer.setStencilTestEnable(false); + command_buffer.setDepthTestEnable(true); + command_buffer.setDepthMode(.z_buffer); + command_buffer.setDepthCompareOp(.ge); + command_buffer.setPrimitiveTopology(.triangle_list); + command_buffer.setFrontFace(.ccw); + command_buffer.setColorWriteMask(.rgba); + command_buffer.setViewport(.{ + .rect = .{ .offset = .{ .x = 0, .y = 0 }, .extent = .{ .width = TARGET_WIDTH, .height = TARGET_HEIGHT } }, + .min_depth = 0.0, + .max_depth = 1.0, + }); + command_buffer.setScissor(.inside(.{ + .offset = .{ .x = 0, .y = 0 }, + .extent = .{ .width = TARGET_WIDTH, .height = TARGET_HEIGHT }, + })); + apply_dynamic_state(); + bind_current_texture(); + command_buffer.beginRendering(.{ + .color_attachment = render_target.color_view, + .depth_stencil_attachment = render_target.depth_view, + }); +} + +fn finish_command_buffer() !void { + command_buffer.endRendering(); + try command_buffer.end(); + try submit_queue.submit(.{ .command_buffer = command_buffer }); + device.waitIdle(); +} + +fn present_render_target() void { + var fb_width: u16 = 0; + var fb_height: u16 = 0; + const framebuffer = c.gfxGetFramebuffer(c.GFX_TOP, c.GFX_LEFT, &fb_width, &fb_height) orelse return; + + _ = c.GX_DisplayTransfer( + @ptrCast(@alignCast(render_target.color_pixels.ptr)), + gx_buffer_dim(TARGET_WIDTH, TARGET_HEIGHT), + @ptrCast(@alignCast(framebuffer)), + gx_buffer_dim(TARGET_WIDTH, TARGET_HEIGHT), + DISPLAY_TRANSFER_FLAGS, + ); + c.gspWaitForEvent(c.GSPGPU_EVENT_PPF, false); + if (vsync_enabled) c.gspWaitForEvent(c.GSPGPU_EVENT_VBlank0, true); + c.gfxSwapBuffers(); +} + +fn apply_dynamic_state() void { + command_buffer.setDepthWriteEnable(depth_write_enabled); + command_buffer.setCullMode(if (cull_face_enabled) .back else .none); + command_buffer.setBlendEquation(if (alpha_blend_enabled) .{ + .src_color_factor = .src_alpha, + .dst_color_factor = .one_minus_src_alpha, + .color_op = .add, + .src_alpha_factor = .one, + .dst_alpha_factor = .one_minus_src_alpha, + .alpha_op = .add, + } else .{ + .src_color_factor = .one, + .dst_color_factor = .zero, + .color_op = .add, + .src_alpha_factor = .one, + .dst_alpha_factor = .zero, + .alpha_op = .add, + }); +} + +fn bind_current_texture() void { + if (!frame_started) return; + + const effective_texture = if (bound_texture != 0) bound_texture else default_texture; + if (effective_texture == 0) { + command_buffer.bindCombinedImageSamplers(0, &.{mango.CombinedImageSampler.none}); + bind_texenv(false); + return; + } + + const tex = texture_slot(effective_texture) orelse { + command_buffer.bindCombinedImageSamplers(0, &.{mango.CombinedImageSampler.none}); + bind_texenv(false); + return; + }; + + command_buffer.bindCombinedImageSamplers(0, &.{.{ + .image = tex.view, + .sampler = render_pipeline.sampler, + }}); + bind_texenv(true); +} + +fn bind_texenv(textured: bool) void { + const first: mango.TextureCombinerUnit = if (textured) .{ + .color_src = .{ .texture_0, .primary_color, .primary_color }, + .alpha_src = .{ .texture_0, .primary_color, .primary_color }, + .color_factor = @splat(.src_color), + .alpha_factor = @splat(.src_alpha), + .color_op = .modulate, + .alpha_op = .modulate, + .color_scale = .@"1x", + .alpha_scale = .@"1x", + .constant = @splat(0xFF), + } else .{ + .color_src = @splat(.primary_color), + .alpha_src = @splat(.primary_color), + .color_factor = @splat(.src_color), + .alpha_factor = @splat(.src_alpha), + .color_op = .replace, + .alpha_op = .replace, + .color_scale = .@"1x", + .alpha_scale = .@"1x", + .constant = @splat(0xFF), + }; + + command_buffer.setTextureCombiners(&.{first}, &.{}); +} + +fn upload_draw_uniforms(model: *const Mat4) void { + var projection_rows = mat4_to_uniform_rows(Mat4.mul(proj_matrix, projection_transform)); + var model_view_rows = mat4_to_uniform_rows(Mat4.mul(model.*, view_matrix)); + var uniforms: [11][4]f32 = undefined; + @memcpy(uniforms[0..4], projection_rows[0..4]); + @memcpy(uniforms[4..8], model_view_rows[0..4]); + uniforms[8] = POS_SCALE; + uniforms[9] = .{ UV_SCALE[0], UV_SCALE[1], uv_offset[0], uv_offset[1] }; + uniforms[10] = COLOR_SCALE; + command_buffer.bindFloatUniforms(.vertex, 0, &uniforms); +} + +fn mat4_to_uniform_rows(mat: Mat4) [4][4]f32 { + var out: [4][4]f32 = undefined; + inline for (0..4) |row| { + out[row] = .{ mat.data[0][row], mat.data[1][row], mat.data[2][row], mat.data[3][row] }; + } + return out; +} + +fn init_projection_transform() void { + projection_transform = Mat4.mul(ortho_tilt(0.0, @floatFromInt(SCREEN_WIDTH), 0.0, @floatFromInt(SCREEN_HEIGHT), 0.0, 1.0), logical_viewport_transform()); +} + +fn logical_viewport_transform() Mat4 { + return .{ .data = .{ + .{ @as(f32, @floatFromInt(SCREEN_WIDTH)) * 0.5, 0.0, 0.0, @as(f32, @floatFromInt(SCREEN_WIDTH)) * 0.5 }, + .{ 0.0, @as(f32, @floatFromInt(SCREEN_HEIGHT)) * 0.5, 0.0, @as(f32, @floatFromInt(SCREEN_HEIGHT)) * 0.5 }, + .{ 0.0, 0.0, -1.0, 1.0 }, + .{ 0.0, 0.0, 0.0, 1.0 }, + } }; +} + +fn ortho_tilt(left: f32, right: f32, bottom: f32, top: f32, near: f32, far: f32) Mat4 { + const rl = right - left; + const tb = top - bottom; + const fnv = far - near; + return .{ .data = .{ + .{ 0.0, 2.0 / tb, 0.0, -((top + bottom) / tb) }, + .{ -2.0 / rl, 0.0, 0.0, (right + left) / rl }, + .{ 0.0, 0.0, 1.0 / fnv, 0.5 * ((near + far) / (near - far)) - 0.5 }, + .{ 0.0, 0.0, 0.0, 1.0 }, + } }; +} + fn select_upload_slot(mesh: *const MeshData) ?usize { if (mesh.latest_slot) |idx| { const slot = mesh.slots[idx]; @@ -550,25 +804,36 @@ fn select_upload_slot(mesh: *const MeshData) ?usize { } fn ensure_mesh_slot_capacity(slot: *MeshSlot, len: usize) !void { - if (slot.data) |buf| { - if (buf.len >= len) return; - } - - const cap = mesh_slot_capacity(len); - const new_data = try render_alloc.alignedAlloc(u8, .fromByteUnits(CACHE_LINE_SIZE), cap); - - if (!is_linear_fcram(new_data.ptr, new_data.len)) { - render_alloc.free(new_data); + if (slot.capacity >= len and slot.buffer != .null) return; + + free_mesh_slot(slot); + + const cap = std.mem.alignForward(usize, @max(len, 256), CACHE_LINE_SIZE); + const memory = try device.allocateMemory(.{ + .memory_type = .fcram_cached, + .allocation_size = .size(@intCast(cap)), + }, null); + errdefer device.freeMemory(memory, null); + + const buffer = try device.createBuffer(.{ + .size = .size(@intCast(cap)), + .usage = .{ .vertex_buffer = true }, + }, null); + errdefer device.destroyBuffer(buffer, null); + + try device.bindBufferMemory(buffer, memory, .size(0)); + const mapped = try device.mapMemory(memory, .size(0), .whole); + if (!is_linear_fcram(mapped.ptr, mapped.len)) { std.debug.panic("3ds_gfx: mesh upload slots must be allocated in linear FCRAM", .{}); } - if (slot.data) |old| render_alloc.free(old); - slot.data = new_data; - slot.len = 0; -} - -fn mesh_slot_capacity(len: usize) usize { - return @max(len, 256); + slot.* = .{ + .memory = memory, + .buffer = buffer, + .mapped = mapped, + .capacity = cap, + .len = 0, + }; } fn free_mesh_slots(mesh: *MeshData) void { @@ -578,25 +843,33 @@ fn free_mesh_slots(mesh: *MeshData) void { } fn free_mesh_slot(slot: *MeshSlot) void { - if (slot.data) |data| { + if (slot.buffer != .null or slot.memory != .null) { if (slot.in_flight or slot.used_this_frame) { - defer_mesh_free(data); + defer_mesh_free(.{ .memory = slot.memory, .buffer = slot.buffer }); } else { - render_alloc.free(data); + destroy_mesh_resources(slot.memory, slot.buffer); } } slot.* = .{}; } -fn defer_mesh_free(data: []align(16) u8) void { - if (deferred_mesh_frees.add_element(.{ .data = data }) != null) return; +fn destroy_mesh_resources(memory: mango.DeviceMemory, buffer: mango.Buffer) void { + if (buffer != .null) device.destroyBuffer(buffer, null); + if (memory != .null) { + device.unmapMemory(memory); + device.freeMemory(memory, null); + } +} + +fn defer_mesh_free(free: DeferredMeshFree) void { + if (deferred_mesh_frees.add_element(free) != null) return; std.debug.panic("3ds_gfx: deferred mesh free queue exhausted", .{}); } fn free_deferred_mesh_slots() void { for (1..deferred_mesh_frees.buffer.len) |i| { if (deferred_mesh_frees.buffer[i]) |free| { - render_alloc.free(free.data); + destroy_mesh_resources(free.memory, free.buffer); } } deferred_mesh_frees.clear(); @@ -617,11 +890,15 @@ fn abandon_service_resources() void { frame_started = false; initialized = false; render_pipeline_initialized = false; - target = null; + render_target_initialized = false; + command_resources_initialized = false; + device = .null; + submit_queue = .null; + fill_queue = .null; + default_texture = 0; bound_texture = 0; meshes.clear(); textures.clear(); - sequential_indices = null; deferred_mesh_frees.clear(); } @@ -640,297 +917,58 @@ fn mark_current_frame_mesh_slots_in_flight() void { fn mesh_draw_bytes_needed(count: usize) ?usize { if (count == 0) return 0; - const max = std.math.maxInt(usize); - if (count > max / VERTEX_STRIDE) return null; + if (count > std.math.maxInt(usize) / VERTEX_STRIDE) return null; return count * VERTEX_STRIDE; } -fn clear_current_framebuffer(bits: c.C3D_ClearBits) void { - const fb = c.C3D_GetFrameBuf() orelse return; - c.C3D_FrameBufClear(fb, bits, clear_color, 0); -} - -fn apply_render_state() void { - set_alpha_blend(alpha_blend_enabled); - set_depth_write(depth_write_enabled); - set_culling(cull_face_enabled); - if (!fog_enabled) { - c.C3D_FogGasMode(c.GPU_NO_FOG, c.GPU_PLAIN_DENSITY, false); - } -} - -fn bind_program(pl: *PipelineData) void { - c.C3D_BindProgram(&pl.program); -} - -fn bind_texenv_direct() void { - if (bound_texture == 0) { - bind_texenv_stage_direct(0, c.GPU_TEVSOURCES(c.GPU_PRIMARY_COLOR, 0, 0), c.GPU_REPLACE); - } else { - bind_texenv_stage_direct(0, c.GPU_TEVSOURCES(c.GPU_TEXTURE0, c.GPU_PRIMARY_COLOR, 0), c.GPU_MODULATE); - } - - bind_texenv_stage_direct(1, c.GPU_TEVSOURCES(c.GPU_PREVIOUS, 0, 0), c.GPU_REPLACE); - bind_texenv_stage_direct(2, c.GPU_TEVSOURCES(c.GPU_PREVIOUS, 0, 0), c.GPU_REPLACE); - bind_texenv_stage_direct(3, c.GPU_TEVSOURCES(c.GPU_PREVIOUS, 0, 0), c.GPU_REPLACE); - bind_texenv_stage_direct(4, c.GPU_TEVSOURCES(c.GPU_PREVIOUS, 0, 0), c.GPU_REPLACE); - bind_texenv_stage_direct(5, c.GPU_TEVSOURCES(c.GPU_PREVIOUS, 0, 0), c.GPU_REPLACE); -} - -fn bind_texenv_stage_direct(stage: c_int, source: u32, combiner: u32) void { - const source_both = source | (source << 16); - const combiner_both = combiner | (combiner << 16); - const regs = [_]u32{ - source_both, - 0, - combiner_both, - 0xffffffff, - @as(u32, @intCast(c.GPU_TEVSCALE_1)) | (@as(u32, @intCast(c.GPU_TEVSCALE_1)) << 16), - }; - gpu_cmd_add_incremental_writes(texenv_source_reg(stage), regs[0..]); -} - -fn texenv_source_reg(stage: c_int) c_int { - return switch (stage) { - 0 => c.GPUREG_TEXENV0_SOURCE, - 1 => c.GPUREG_TEXENV1_SOURCE, - 2 => c.GPUREG_TEXENV2_SOURCE, - 3 => c.GPUREG_TEXENV3_SOURCE, - 4 => c.GPUREG_TEXENV4_SOURCE, - 5 => c.GPUREG_TEXENV5_SOURCE, - else => unreachable, - }; -} - -fn bind_vertex_layout_direct(pl: *const PipelineData) void { - gpu_cmd_add_write(c.GPUREG_ATTRIBBUFFERS_LOC, BUFFER_BASE_PADDR >> 3); - gpu_cmd_add_incremental_writes(c.GPUREG_ATTRIBBUFFERS_FORMAT_LOW, pl.attr_info.flags[0..]); - gpu_cmd_add_write(c.GPUREG_VERTEX_OFFSET, 0); - gpu_cmd_add_write(c.GPUREG_ATTRIBBUFFER0_CONFIG1, @intCast(VERTEX_BUFFER_PERMUTATION)); - gpu_cmd_add_write(c.GPUREG_ATTRIBBUFFER0_CONFIG2, vertex_buffer_format()); - gpu_cmd_add_write(c.GPUREG_VSH_ATTRIBUTES_PERMUTATION_LOW, @intCast(VERTEX_BUFFER_PERMUTATION)); - gpu_cmd_add_write(c.GPUREG_VSH_ATTRIBUTES_PERMUTATION_HIGH, 0); - set_vsh_input_count_direct(VERTEX_ATTR_COUNT); -} - -fn bind_vertex_buffer_direct(data: [*]align(16) const u8) bool { - const phys = c.osConvertVirtToPhys(data); - if (phys < BUFFER_BASE_PADDR) return false; - gpu_cmd_add_write(c.GPUREG_ATTRIBBUFFER0_OFFSET, phys - BUFFER_BASE_PADDR); - return true; -} - -fn draw_elements_direct(count: usize) bool { - const indices = ensure_index_buffer(count) catch return false; - const phys = c.osConvertVirtToPhys(indices.ptr); - if (phys < BUFFER_BASE_PADDR) return false; - - gpu_cmd_add_write(c.GPUREG_INDEXBUFFER_CONFIG, (phys - BUFFER_BASE_PADDR) | (1 << 31)); - gpu_cmd_add_masked_write(c.GPUREG_PRIMITIVE_CONFIG, 2, @intCast(c.GPU_GEOMETRY_PRIM)); - gpu_cmd_add_write(c.GPUREG_RESTART_PRIMITIVE, 1); - gpu_cmd_add_write(c.GPUREG_NUMVERTICES, @intCast(count)); - gpu_cmd_add_write(c.GPUREG_VERTEX_OFFSET, 0); - gpu_cmd_add_masked_write(c.GPUREG_GEOSTAGE_CONFIG, 2, 0x100); - gpu_cmd_add_masked_write(c.GPUREG_GEOSTAGE_CONFIG2, 2, 0x100); - gpu_cmd_add_masked_write(c.GPUREG_START_DRAW_FUNC0, 1, 0); - gpu_cmd_add_write(c.GPUREG_DRAWELEMENTS, 1); - gpu_cmd_add_masked_write(c.GPUREG_START_DRAW_FUNC0, 1, 1); - gpu_cmd_add_masked_write(c.GPUREG_GEOSTAGE_CONFIG, 2, 0); - gpu_cmd_add_masked_write(c.GPUREG_GEOSTAGE_CONFIG2, 2, 0); - gpu_cmd_add_write(c.GPUREG_VTX_FUNC, 1); - gpu_cmd_add_masked_write(c.GPUREG_PRIMITIVE_CONFIG, 0x8, 0); - gpu_cmd_add_masked_write(c.GPUREG_PRIMITIVE_CONFIG, 0x8, 0); - return true; -} - -fn draw_arrays_direct(first: u32, count: u32) void { - gpu_cmd_add_masked_write(c.GPUREG_PRIMITIVE_CONFIG, 2, @intCast(c.GPU_TRIANGLES)); - gpu_cmd_add_write(c.GPUREG_RESTART_PRIMITIVE, 1); - gpu_cmd_add_write(c.GPUREG_INDEXBUFFER_CONFIG, 0x80000000); - gpu_cmd_add_write(c.GPUREG_NUMVERTICES, count); - gpu_cmd_add_write(c.GPUREG_VERTEX_OFFSET, first); - gpu_cmd_add_masked_write(c.GPUREG_GEOSTAGE_CONFIG2, 1, 1); - gpu_cmd_add_masked_write(c.GPUREG_START_DRAW_FUNC0, 1, 0); - gpu_cmd_add_write(c.GPUREG_DRAWARRAYS, 1); - gpu_cmd_add_masked_write(c.GPUREG_START_DRAW_FUNC0, 1, 1); - gpu_cmd_add_masked_write(c.GPUREG_GEOSTAGE_CONFIG2, 1, 0); - gpu_cmd_add_write(c.GPUREG_VTX_FUNC, 1); -} - -fn ensure_index_buffer(count: usize) ![]align(CACHE_LINE_SIZE) u16 { - if (count == 0 or count > std.math.maxInt(u16) + 1) return error.UnsupportedIndexCount; - if (sequential_indices) |indices| { - if (indices.len >= count) return indices[0..count]; - render_alloc.free(indices); - sequential_indices = null; - } - - const indices = try render_alloc.alignedAlloc(u16, .fromByteUnits(CACHE_LINE_SIZE), count); - const bytes: [*]const u8 = @ptrCast(indices.ptr); - if (!is_linear_fcram(bytes, indices.len * @sizeOf(u16))) { - render_alloc.free(indices); - return error.IndexBufferNotLinear; - } - - for (indices, 0..) |*idx, i| idx.* = @intCast(i); - flush_data_cache(bytes, indices.len * @sizeOf(u16)); - sequential_indices = indices; - return indices; -} - -fn free_index_buffer() void { - if (sequential_indices) |indices| { - render_alloc.free(indices); - sequential_indices = null; - } -} - -fn finish_frame_direct() void { - gpu_cmd_add_write(c.GPUREG_FRAMEBUFFER_FLUSH, 1); - gpu_cmd_add_write(c.GPUREG_FRAMEBUFFER_INVALIDATE, 1); - gpu_cmd_add_write(c.GPUREG_EARLYDEPTH_CLEAR, 1); -} - -fn vertex_buffer_format() u32 { - return (@as(u32, @intCast(VERTEX_STRIDE)) << 16) | - (@as(u32, @intCast(VERTEX_ATTR_COUNT)) << 28); -} - -fn set_vsh_input_count_direct(count: c_int) void { - const value = @as(u32, @intCast(count - 1)); - gpu_cmd_add_masked_write(c.GPUREG_VSH_INPUTBUFFER_CONFIG, 0xB, SH_MODE_VSH | value); - gpu_cmd_add_write(c.GPUREG_VSH_NUM_ATTR, value); -} - -fn gpu_cmd_add_write(reg: c_int, value: u32) void { - gpu_cmd_add_masked_write(reg, 0xF, value); -} - -fn gpu_cmd_add_masked_write(reg: c_int, mask: u32, value: u32) void { - var param = value; - c.GPUCMD_Add(gpu_cmd_header(false, mask, reg), ¶m, 1); -} - -fn gpu_cmd_add_writes(reg: c_int, values: []const u32) void { - c.GPUCMD_Add(gpu_cmd_header(false, 0xF, reg), values.ptr, @intCast(values.len)); -} - -fn gpu_cmd_add_incremental_writes(reg: c_int, values: []const u32) void { - c.GPUCMD_Add(gpu_cmd_header(true, 0xF, reg), values.ptr, @intCast(values.len)); -} - -fn gpu_cmd_header(incremental: bool, mask: u32, reg: c_int) u32 { - const inc: u32 = if (incremental) 1 else 0; - return (inc << 31) | ((mask & 0xF) << 16) | (@as(u32, @intCast(reg)) & 0x3FF); -} - -fn init_projection_transform() void { - var screen: c.C3D_Mtx = undefined; - c.Mtx_OrthoTilt(&screen, 0.0, @floatFromInt(SCREEN_WIDTH), 0.0, @floatFromInt(SCREEN_HEIGHT), 0.0, 1.0, true); - var viewport = logical_viewport_transform(); - projection_transform = c3d_mtx_mul(&screen, &viewport); -} - -fn logical_viewport_transform() c.C3D_Mtx { - var out: c.C3D_Mtx = undefined; - set_fvec(&out.r[0], @as(f32, @floatFromInt(SCREEN_WIDTH)) * 0.5, 0.0, 0.0, @as(f32, @floatFromInt(SCREEN_WIDTH)) * 0.5); - set_fvec(&out.r[1], 0.0, @as(f32, @floatFromInt(SCREEN_HEIGHT)) * 0.5, 0.0, @as(f32, @floatFromInt(SCREEN_HEIGHT)) * 0.5); - set_fvec(&out.r[2], 0.0, 0.0, -1.0, 1.0); - set_fvec(&out.r[3], 0.0, 0.0, 0.0, 1.0); - return out; -} - -fn upload_draw_uniforms(pl: *PipelineData, model: *const Mat4) void { - var aether_projection = mat4_to_c3d_transposed(proj_matrix); - var projection = c3d_mtx_mul(&projection_transform, &aether_projection); - const model_view = Mat4.mul(model.*, view_matrix); - var model_view_c3d = mat4_to_c3d_transposed(model_view); - - upload_matrix(pl.u_projection, &projection); - upload_matrix(pl.u_model_view, &model_view_c3d); - upload_vec4(pl.u_pos_scale, POS_SCALE); - upload_vec4(pl.u_uv_scale_offset, .{ - UV_SCALE[0], - UV_SCALE[1], - uv_offset[0], - uv_offset[1], - }); - upload_vec4(pl.u_color_scale, COLOR_SCALE); -} - -fn upload_matrix(location: c_int, matrix: *const c.C3D_Mtx) void { - const idx = uniform_location(location, 4) orelse return; - const words: [*]const u32 = @ptrCast(matrix); - gpu_cmd_add_write(c.GPUREG_VSH_FLOATUNIFORM_CONFIG, @as(u32, @intCast(idx)) | FLOAT_UNIFORM_UPLOAD_F32); - gpu_cmd_add_writes(c.GPUREG_VSH_FLOATUNIFORM_DATA, words[0..16]); -} - -fn upload_vec4(location: c_int, values: [4]f32) void { - const idx = uniform_location(location, 1) orelse return; - var vec: c.C3D_FVec = undefined; - set_fvec(&vec, values[0], values[1], values[2], values[3]); - const words: [*]const u32 = @ptrCast(&vec); - gpu_cmd_add_write(c.GPUREG_VSH_FLOATUNIFORM_CONFIG, @as(u32, @intCast(idx)) | FLOAT_UNIFORM_UPLOAD_F32); - gpu_cmd_add_writes(c.GPUREG_VSH_FLOATUNIFORM_DATA, words[0..4]); -} - -fn uniform_location(location: c_int, count: usize) ?usize { - if (location < 0) return null; - const idx: usize = @intCast(location); - if (idx + count > c.C3D_FVUNIF_COUNT) return null; - return idx; -} - -fn mat4_to_c3d_transposed(mat: Mat4) c.C3D_Mtx { - var out: c.C3D_Mtx = undefined; - inline for (0..4) |row| { - set_fvec(&out.r[row], mat.data[0][row], mat.data[1][row], mat.data[2][row], mat.data[3][row]); - } - return out; -} - -fn c3d_mtx_mul(a: *const c.C3D_Mtx, b: *const c.C3D_Mtx) c.C3D_Mtx { - var out: c.C3D_Mtx = undefined; - inline for (0..4) |row| { - var values: [4]f32 = undefined; - inline for (0..4) |col| { - var sum: f32 = 0.0; - inline for (0..4) |k| { - sum += fvec_component(&a.r[row], k) * fvec_component(&b.r[k], col); - } - values[col] = sum; - } - set_fvec(&out.r[row], values[0], values[1], values[2], values[3]); - } - return out; -} +fn create_texture_resources(width: u32, height: u32) !TextureData { + const size = texture_size(width, height); + // 3DSX launchers can map VRAM read-only for the CPU. Keep CPU-authored + // texture storage in linear FCRAM, which PICA can still sample by physical address. + const memory = try device.allocateMemory(.{ + .memory_type = .fcram_cached, + .allocation_size = .size(size), + }, null); + errdefer device.freeMemory(memory, null); + + const image = try device.createImage(.{ + .flags = .{}, + .type = .@"2d", + .tiling = .optimal, + .usage = .{ .sampled = true, .transfer_dst = true }, + .extent = .{ .width = @intCast(width), .height = @intCast(height) }, + .format = .a8b8g8r8_unorm, + .mip_levels = .@"1", + .array_layers = .@"1", + }, null); + errdefer device.destroyImage(image, null); + try device.bindImageMemory(image, memory, .size(0)); + + const view = try device.createImageView(.{ + .type = .@"2d", + .format = .a8b8g8r8_unorm, + .image = image, + .subresource_range = .full, + }, null); -fn fvec_component(v: *const c.C3D_FVec, index: usize) f32 { - return switch (index) { - 0 => v.unnamed_0.x, - 1 => v.unnamed_0.y, - 2 => v.unnamed_0.z, - 3 => v.unnamed_0.w, - else => unreachable, + return .{ + .width = width, + .height = height, + .memory = memory, + .image = image, + .view = view, }; } -fn set_fvec(v: *c.C3D_FVec, x: f32, y: f32, z: f32, w: f32) void { - v.unnamed_0.x = x; - v.unnamed_0.y = y; - v.unnamed_0.z = z; - v.unnamed_0.w = w; -} - -fn rebind_texture() void { - if (!initialized or bound_texture == 0) return; - const tex = texture_slot(bound_texture) orelse return; - c.C3D_TexBind(0, tex_ptr(tex)); +fn free_texture(tex: *TextureData) void { + device.destroyImageView(tex.view, null); + device.destroyImage(tex.image, null); + device.freeMemory(tex.memory, null); } fn validate_texture(width: u32, height: u32, data: []align(16) u8) !void { if (width < MIN_TEXTURE_SIZE or height < MIN_TEXTURE_SIZE) { - Util.engine_logger.err("3ds_gfx: texture {d}x{d} is too small; Citro3D requires at least {d}x{d}", .{ + Util.engine_logger.err("3ds_gfx: texture {d}x{d} is too small; 3DS requires at least {d}x{d}", .{ width, height, MIN_TEXTURE_SIZE, @@ -939,7 +977,7 @@ fn validate_texture(width: u32, height: u32, data: []align(16) u8) !void { return error.TextureTooSmall; } if (width > MAX_TEXTURE_SIZE or height > MAX_TEXTURE_SIZE) { - Util.engine_logger.err("3ds_gfx: texture {d}x{d} is too large; Citro3D limit is {d}x{d}", .{ + Util.engine_logger.err("3ds_gfx: texture {d}x{d} is too large; 3DS limit is {d}x{d}", .{ width, height, MAX_TEXTURE_SIZE, @@ -948,7 +986,7 @@ fn validate_texture(width: u32, height: u32, data: []align(16) u8) !void { return error.TextureTooLarge; } if (!std.math.isPowerOfTwo(width) or !std.math.isPowerOfTwo(height)) { - Util.engine_logger.err("3ds_gfx: texture {d}x{d} is unsupported; Citro3D requires power-of-two dimensions", .{ width, height }); + Util.engine_logger.err("3ds_gfx: texture {d}x{d} is unsupported; 3DS textures require power-of-two dimensions", .{ width, height }); return error.UnsupportedTextureSize; } @@ -958,66 +996,18 @@ fn validate_texture(width: u32, height: u32, data: []align(16) u8) !void { Util.engine_logger.err("3ds_gfx: texture upload data must be allocated in linear FCRAM", .{}); return error.TextureDataNotLinear; } - if (size > std.math.maxInt(u32)) return error.TextureTooLarge; -} - -fn init_tex_mirror(width: u32, height: u32, data: ?*anyopaque, size: u32) TexMirror { - return .{ - .data = data, - .fmt_size = (size << 4) | @as(u32, @intCast(c.GPU_RGBA8)), - .dim = (width << 16) | height, - .param = texture_param(), - .border = 0, - .lod_param = 0, - }; -} - -fn texture_param() u32 { - return @intCast( - c.GPU_TEXTURE_MODE(c.GPU_TEX_2D) | - c.GPU_TEXTURE_MAG_FILTER(c.GPU_NEAREST) | - c.GPU_TEXTURE_MIN_FILTER(c.GPU_NEAREST) | - c.GPU_TEXTURE_WRAP_S(c.GPU_REPEAT) | - c.GPU_TEXTURE_WRAP_T(c.GPU_REPEAT), - ); } fn upload_texture_data(tex: *TextureData, data: []align(16) const u8) !void { - const size = texture_size(tex.width, tex.height); - const upload = try ensure_texture_staging(tex, size / TEX_BPP); - - convert_texture_data(upload, data, tex.width, tex.height); - flush_texture_source(upload); - c.C3D_TexLoadImage(tex_ptr(tex), upload.ptr, c.GPU_TEXFACE_2D, 0); - c.C3D_TexFlush(tex_ptr(tex)); -} - -fn ensure_texture_staging(tex: *TextureData, len: usize) ![]align(16) u32 { - if (tex.staging) |buf| { - if (buf.len >= len) return buf[0..len]; - render_alloc.free(buf); - tex.staging = null; - } - - const staging = try render_alloc.alignedAlloc(u32, .fromByteUnits(CACHE_LINE_SIZE), len); - const bytes: [*]const u8 = @ptrCast(staging.ptr); - if (!is_linear_fcram(bytes, len * TEX_BPP)) { - render_alloc.free(staging); - std.debug.panic("3ds_gfx: texture staging must be allocated in linear FCRAM", .{}); - } + const mapped = try device.mapMemory(tex.memory, .size(0), .size(texture_size(tex.width, tex.height))); + defer device.unmapMemory(tex.memory); - tex.staging = staging; - return staging; + convert_texture_data(mapped, data, tex.width, tex.height); + try flush_memory(tex.memory, mapped.len); } -fn free_texture_staging(tex: *TextureData) void { - if (tex.staging) |staging| { - render_alloc.free(staging); - tex.staging = null; - } -} - -fn convert_texture_data(dst: []align(16) u32, src: []align(16) const u8, width: u32, height: u32) void { +fn convert_texture_data(dst: []u8, src: []align(16) const u8, width: u32, height: u32) void { + const words: []align(4) u32 = @ptrCast(@alignCast(dst)); const factors = twiddle_factors(width, height); var y_bits: u32 = 0; for (0..height) |y| { @@ -1027,7 +1017,7 @@ fn convert_texture_data(dst: []align(16) u32, src: []align(16) const u8, width: const xu: u32 = @intCast(x); const src_off = (@as(usize, yu) * width + xu) * TEX_BPP; const dst_pixel = x_bits | (factors.mask_y - y_bits); - dst[@intCast(dst_pixel)] = + words[@intCast(dst_pixel)] = (@as(u32, src[src_off + 0]) << 24) | (@as(u32, src[src_off + 1]) << 16) | (@as(u32, src[src_off + 2]) << 8) | @@ -1062,26 +1052,37 @@ fn twiddle_factors(width: u32, height: u32) TwiddleFactors { return .{ .mask_x = mask_x, .mask_y = mask_y }; } -fn flush_texture_source(data: []align(16) u32) void { - const bytes: [*]const u8 = @ptrCast(data.ptr); - flush_data_cache(bytes, data.len * TEX_BPP); -} - fn texture_size(width: u32, height: u32) u32 { return @intCast(@as(usize, width) * height * TEX_BPP); } -fn tex_ptr(tex: *TextureData) *c.C3D_Tex { - return @ptrCast(&tex.tex); -} +fn rebuild_fog_table() void { + const safe_end = if (fog_state.end <= fog_state.start) fog_state.start + 0.001 else fog_state.end; + var values: [129]f32 = undefined; + for (&values, 0..) |*value, i| { + const t = @as(f32, @floatFromInt(i)) / 128.0; + const distance = fog_state.start + (safe_end - fog_state.start) * t; + const fog = std.math.clamp((distance - fog_state.start) / (safe_end - fog_state.start), 0.0, 1.0); + value.* = fog; + } -fn flush_data_cache(ptr: [*]const u8, len: usize) void { - if (len == 0) return; + for (&fog_state.table, 0..) |*raw, i| { + const current = values[i]; + const next = values[i + 1]; + const lut_value = pica.Graphics.TextureCombiners.FogLutValue{ + .value = .ofSaturating(current), + .next_difference = .ofSaturating(next - current), + }; + raw.* = @bitCast(zitrus.hardware.LsbRegister(pica.Graphics.TextureCombiners.FogLutValue).init(lut_value)); + } +} - const start = @intFromPtr(ptr) & ~(CACHE_LINE_SIZE - 1); - const end = std.mem.alignForward(usize, @intFromPtr(ptr) + len, CACHE_LINE_SIZE); - const flush_ptr: *const anyopaque = @ptrFromInt(start); - _ = c.GSPGPU_FlushDataCache(flush_ptr, @intCast(end - start)); +fn flush_memory(memory: mango.DeviceMemory, len: usize) !void { + try device.flushMappedMemoryRanges(&.{.{ + .memory = memory, + .offset = .size(0), + .size = .size(@intCast(len)), + }}); } fn mesh_slot(handle: Mesh.Handle) ?*MeshData { @@ -1106,6 +1107,10 @@ fn snorm16_scale() f32 { return 1.0 / 32767.0; } +fn float_to_u8(v: f32) u8 { + return @intFromFloat(std.math.clamp(v, 0.0, 1.0) * 255.0); +} + fn is_linear_fcram(ptr: [*]const u8, len: usize) bool { const start = @intFromPtr(ptr); return in_range(start, len, OS_FCRAM_VADDR, OS_FCRAM_SIZE) or @@ -1117,15 +1122,3 @@ fn in_range(start: usize, len: usize, base: usize, size: usize) bool { const offset = start - base; return offset <= size and len <= size - offset; } - -fn float_to_u8(v: f32) u8 { - return @intFromFloat(@max(0.0, @min(1.0, v)) * 255.0); -} - -fn pack_color_rgba(r: f32, g: f32, b: f32, a: f32) u32 { - const ri: u32 = float_to_u8(r); - const gi: u32 = float_to_u8(g); - const bi: u32 = float_to_u8(b); - const ai: u32 = float_to_u8(a); - return (ri << 24) | (gi << 16) | (bi << 8) | ai; -} diff --git a/src/platform/3ds/mango/fmt.zig b/src/platform/3ds/mango/fmt.zig new file mode 100644 index 0000000..5dfd04f --- /dev/null +++ b/src/platform/3ds/mango/fmt.zig @@ -0,0 +1,2 @@ +pub const zpsh = @import("fmt/zpsh.zig"); + diff --git a/src/platform/3ds/mango/fmt/zpsh.zig b/src/platform/3ds/mango/fmt/zpsh.zig new file mode 100644 index 0000000..0b41066 --- /dev/null +++ b/src/platform/3ds/mango/fmt/zpsh.zig @@ -0,0 +1,430 @@ +//! Zitrus PICA200 shader +//! +//! A simple shader format which omits the need for positional reads and has an overall simpler structure. +//! It omits numerous things that are not used or cannot be used by zitrus. +//! +//! Even if things are tightly packed, all sections are aligned to 32-bits. + +pub const magic = "ZPSH"; + +pub const Header = extern struct { + pub const Shader = packed struct(u32) { + entrypoints: u12, + instructions_minus_one: u12, + descriptors: u8, + + pub fn init(entrypoints: usize, instructions_size: usize, descriptors: usize) Shader { + return .{ + .entrypoints = @intCast(entrypoints), + .instructions_minus_one = @intCast(instructions_size - 1), + .descriptors = @intCast(descriptors), + }; + } + + pub fn instructions(size: Shader) usize { + return @as(usize, size.instructions_minus_one) + 1; + } + }; + + pub const Flags = packed struct(u8) { _: u8 = 0 }; + + magic: [magic.len]u8 = magic.*, + shader: Shader, + /// In `u32`s + entry_string_table_size: u16, + flags: Flags = .{}, + /// In `u32`s + header_size: u8 = @divExact(@sizeOf(Header), @sizeOf(u32)), + /// A xxHash32 hash of instructions and operand descriptors, in the described order. + /// Seed is 67 + code_hash: u32, + + pub const CheckError = error{ NotZpsh, InvalidZpsh }; + + pub fn check(hdr: Header) CheckError!void { + if (!std.mem.eql(u8, &hdr.magic, magic)) return error.NotZpsh; + } +}; + +pub const EntrypointHeader = extern struct { + pub const Flags = packed struct(u16) { + _: u16 = 0, + }; + + pub const ShaderInfo = packed struct(u16) { + pub const vertex: ShaderInfo = .{ .type = .vertex }; + pub const Type = enum(u2) { vertex, geometry_point, geometry_variable, geometry_fixed }; + + pub const Geometry = packed union(u14) { + pub const empty: Geometry = .{ .point = std.mem.zeroes(Geometry.Point) }; + + pub const Point = packed struct(u14) { + inputs_minus_one: u4, + _: u10 = 0, + }; + + pub const Variable = packed struct(u14) { + full_vertices: u5, + _: u9 = 0, + }; + + pub const Fixed = packed struct(u14) { + vertices_minus_one: u4, + uniform_start: FloatingRegister, + _: u3 = 0, + }; + + point: Point, + fixed: Fixed, + variable: Variable, + + pub fn initPoint(inputs: u5) Geometry { + return .{ .point = .{ .inputs_minus_one = @intCast(inputs - 1) } }; + } + + pub fn initVariable(full_vertices: u5) Geometry { + return .{ .variable = .{ .full_vertices = full_vertices } }; + } + + pub fn initFixed(vertices: u5, uniform_start: FloatingRegister) Geometry { + return .{ .fixed = .{ .vertices_minus_one = @intCast(vertices - 1), .uniform_start = uniform_start } }; + } + }; + + type: Type, + geometry: Geometry = .empty, + }; + + pub const BooleanConstantMask = packed struct(u16) { + // zig fmt: off + b0: bool, b1: bool, b2: bool, b3: bool, b4: bool, b5: bool, b6: bool, b7: bool, + b8: bool, b9: bool, b10: bool, b11: bool, b12: bool, b13: bool, b14: bool, b15: bool, + // zig fmt: on + + pub fn fromSet(set: std.EnumSet(BooleanRegister)) BooleanConstantMask { + var mask: BooleanConstantMask = std.mem.zeroes(BooleanConstantMask); + + for (std.enums.values(BooleanRegister)) |b| { + std.mem.writePackedInt(u1, std.mem.asBytes(&mask), @intFromEnum(b), @intFromBool(set.contains(b)), .little); + } + + return mask; + } + + pub fn toSet(mask: BooleanConstantMask) std.EnumSet(BooleanRegister) { + var set: std.EnumSet(BooleanRegister) = .initEmpty(); + + for (std.enums.values(BooleanRegister)) |b| { + set.setPresent(b, std.mem.readPackedInt(u1, std.mem.asBytes(&mask), @intFromEnum(b), .little) != 0); + } + + return set; + } + }; + + pub const IntegerConstantMask = packed struct(u16) { + // zig fmt: off + i0: bool, i1: bool, + i2: bool, i3: bool, + // zig fmt: on + _: u12, + + pub fn fromSet(set: std.EnumSet(IntegerRegister)) IntegerConstantMask { + var mask: IntegerConstantMask = std.mem.zeroes(IntegerConstantMask); + + for (std.enums.values(IntegerRegister)) |i| { + std.mem.writePackedInt(u1, std.mem.asBytes(&mask), @intFromEnum(i), @intFromBool(set.contains(i)), .little); + } + + return mask; + } + + pub fn toSet(mask: IntegerConstantMask) std.EnumSet(IntegerRegister) { + var set: std.EnumSet(IntegerRegister) = .initEmpty(); + + for (std.enums.values(IntegerRegister)) |i| { + set.setPresent(i, std.mem.readPackedInt(u1, std.mem.asBytes(&mask), @intFromEnum(i), .little) != 0); + } + + return set; + } + }; + + pub const FloatingConstantMask = extern struct { + // zig fmt: off + pub const Low = packed struct(u32) { + f0: bool, f1: bool, f2: bool, f3: bool, f4: bool, f5: bool, f6: bool, f7: bool, + f8: bool, f9: bool, f10: bool, f11: bool, f12: bool, f13: bool, f14: bool, f15: bool, + f16: bool, f17: bool, f18: bool, f19: bool, f20: bool, f21: bool, f22: bool, f23: bool, + f24: bool, f25: bool, f26: bool, f27: bool, f28: bool, f29: bool, f30: bool, f31: bool, + }; + + pub const Mid = packed struct(u32) { + f32: bool, f33: bool, f34: bool, f35: bool, f36: bool, f37: bool, f38: bool, f39: bool, + f40: bool, f41: bool, f42: bool, f43: bool, f44: bool, f45: bool, f46: bool, f47: bool, + f48: bool, f49: bool, f50: bool, f51: bool, f52: bool, f53: bool, f54: bool, f55: bool, + f56: bool, f57: bool, f58: bool, f59: bool, f60: bool, f61: bool, f62: bool, f63: bool, + }; + + pub const High = packed struct(u32) { + f64: bool, f65: bool, f66: bool, f67: bool, f68: bool, f69: bool, f70: bool, f71: bool, + f72: bool, f73: bool, f74: bool, f75: bool, f76: bool, f77: bool, f78: bool, f79: bool, + f80: bool, f81: bool, f82: bool, f83: bool, f84: bool, f85: bool, f86: bool, f87: bool, + f88: bool, f89: bool, f90: bool, f91: bool, f92: bool, f93: bool, f94: bool, f95: bool, + }; + // zig fmt: on + + low: Low, + mid: Mid, + high: High, + + pub fn fromSet(set: std.EnumSet(FloatingRegister)) FloatingConstantMask { + var mask: FloatingConstantMask = std.mem.zeroes(FloatingConstantMask); + + for (std.enums.values(FloatingRegister)) |f| { + std.mem.writePackedInt(u1, std.mem.asBytes(&mask), @intFromEnum(f), @intFromBool(set.contains(f)), .little); + } + + return mask; + } + + pub fn toSet(mask: FloatingConstantMask) std.EnumSet(FloatingRegister) { + var set: std.EnumSet(FloatingRegister) = .initEmpty(); + + for (std.enums.values(FloatingRegister)) |f| { + set.setPresent(f, std.mem.readPackedInt(u1, std.mem.asBytes(&mask), @intFromEnum(f), .little) != 0); + } + + return set; + } + }; + + pub const OutputMask = packed struct(u32) { + // zig fmt: off + o0: bool, o1: bool, o2: bool, o3: bool, o4: bool, o5: bool, o6: bool, o7: bool, + o8: bool, o9: bool, o10: bool, o11: bool, o12: bool, o13: bool, o14: bool, o15: bool, + _: u16 = 0, + // zig fmt: on + + pub fn fromSet(set: std.EnumSet(OutputRegister)) OutputMask { + var mask: OutputMask = std.mem.zeroes(OutputMask); + + for (std.enums.values(OutputRegister)) |o| { + std.mem.writePackedInt(u1, std.mem.asBytes(&mask), @intFromEnum(o), @intFromBool(set.contains(o)), .little); + } + + return mask; + } + + pub fn toSet(mask: OutputMask) std.EnumSet(OutputRegister) { + var set: std.EnumSet(OutputRegister) = .initEmpty(); + + for (std.enums.values(OutputRegister)) |o| { + set.setPresent(o, std.mem.readPackedInt(u1, std.mem.asBytes(&mask), @intFromEnum(o), .little) != 0); + } + + return set; + } + }; + + name_string_offset: u32, + instruction_offset: u16, + info: ShaderInfo, + flags: Flags, + header_size: u16 = @divExact(@sizeOf(EntrypointHeader), @sizeOf(u32)), + + // NOTE: Constants are sorted, that is, e.g: f0 = true, f1 = false, f2 = true then in memory there will be two floating constant entries that correspond to f0 and f2. Same for integers and same for outputs. + boolean_constant_mask: BooleanConstantMask, + integer_constant_mask: IntegerConstantMask, + floating_constant_mask: FloatingConstantMask, + output_mask: OutputMask, +}; + +pub const Parsed = struct { + code_hash: u32, + instructions: []const shader.encoding.Instruction, + operand_descriptors: []const shader.encoding.OperandDescriptor, + string_table: []const u8, + entrypoint_offsets: []const u8, + entrypoint_data: []const u8, + entrypoints: usize, + + pub fn initBuffer(buffer: []const u8) Header.CheckError!Parsed { + const header = try checkedSlice(buffer, 0, @sizeOf(Header)); + if (!std.mem.eql(u8, header[0..magic.len], magic)) return error.NotZpsh; + + const shader_word = try readLittle(u32, header, 4); + const entrypoints: usize = @intCast(shader_word & 0xfff); + const instructions_minus_one: u16 = @intCast((shader_word >> 12) & 0xfff); + const descriptors: u8 = @intCast(shader_word >> 24); + const instructions_count = @as(usize, instructions_minus_one) + 1; + const entry_string_table_size = try readLittle(u16, header, 8); + const header_size_words = try readLittle(u8, header, 11); + const code_hash = try readLittle(u32, header, 12); + + const header_size = try checkedMul(@as(usize, header_size_words), @sizeOf(u32)); + if (header_size < @sizeOf(Header)) return error.InvalidZpsh; + const entrypoint_offsets_start = header_size; + const entrypoint_offsets_size = try checkedMul(@as(usize, entrypoints), @sizeOf(u32)); + const code_start = try checkedAdd(entrypoint_offsets_start, entrypoint_offsets_size); + const code_size = try checkedMul(@sizeOf(shader.encoding.Instruction), instructions_count); + const operands_start = try checkedAdd(code_start, code_size); + const operands_size = try checkedMul(@sizeOf(shader.encoding.OperandDescriptor), @as(usize, descriptors)); + const string_table_start = try checkedAdd(operands_start, operands_size); + const string_table_size = try checkedMul(@as(usize, entry_string_table_size), @sizeOf(u32)); + const entrypoints_start = try checkedAdd(string_table_start, string_table_size); + + if (entrypoints_start > buffer.len) return error.InvalidZpsh; + + return .{ + .code_hash = code_hash, + .instructions = @alignCast(std.mem.bytesAsSlice(pica.shader.encoding.Instruction, try checkedSlice(buffer, code_start, code_size))), + .operand_descriptors = @alignCast(std.mem.bytesAsSlice(pica.shader.encoding.OperandDescriptor, try checkedSlice(buffer, operands_start, operands_size))), + .string_table = try checkedSlice(buffer, string_table_start, string_table_size), + .entrypoint_offsets = try checkedSlice(buffer, entrypoint_offsets_start, entrypoint_offsets_size), + .entrypoint_data = buffer[entrypoints_start..], + .entrypoints = entrypoints, + }; + } + + pub fn iterator(parsed: *const Parsed) EntrypointIterator { + return .{ + .parsed = parsed, + .offset_cursor = 0, + }; + } + + // TODO: This assumes a proper ZPSH (as we're the only ones who currently use them we're allowed to not care :p) + pub const EntrypointIterator = struct { + pub const Entry = struct { + info: EntrypointHeader.ShaderInfo, + offset: u16, + + name: [:0]const u8, + boolean_constant_set: std.enums.EnumSet(BooleanRegister), + integer_constant_set: std.enums.EnumSet(IntegerRegister), + floating_constant_set: std.enums.EnumSet(FloatingRegister), + output_set: std.enums.EnumSet(OutputRegister), + + integer_constants: []const [4]u8, + floating_constants: []const pica.F7_16x4, + output_map: []const pica.OutputMap, + }; + + parsed: *const Parsed, + offset_cursor: usize, + + pub fn next(it: *EntrypointIterator) Header.CheckError!?Entry { + if (it.offset_cursor >= it.parsed.entrypoint_offsets.len) return null; + + const offset = try readLittle(u32, it.parsed.entrypoint_offsets, it.offset_cursor); + it.offset_cursor +%= @sizeOf(u32); + + const entry_offset: usize = @intCast(offset); + if (entry_offset > it.parsed.entrypoint_data.len) return error.InvalidZpsh; + const entry_start = it.parsed.entrypoint_data[entry_offset..]; + _ = try checkedSlice(entry_start, 0, @sizeOf(EntrypointHeader)); + + const name_string_offset = try readLittle(u32, entry_start, 0); + const instruction_offset = try readLittle(u16, entry_start, 4); + const info: EntrypointHeader.ShaderInfo = @bitCast(try readLittle(u16, entry_start, 6)); + const boolean_constant_set = enumSetFromMask(BooleanRegister, u16, try readLittle(u16, entry_start, 12)); + const integer_constant_set = enumSetFromMask(IntegerRegister, u16, try readLittle(u16, entry_start, 14)); + const floating_constant_set = floatingSetFromMask( + try readLittle(u32, entry_start, 16), + try readLittle(u32, entry_start, 20), + try readLittle(u32, entry_start, 24), + ); + const output_map_set = enumSetFromMask(OutputRegister, u32, try readLittle(u32, entry_start, 28)); + + const integer_constants_byte_size = try checkedMul(integer_constant_set.count(), @sizeOf([4]u8)); + const floating_constants_byte_size = try checkedMul(floating_constant_set.count(), @sizeOf(pica.F7_16x4)); + const output_map_byte_size = try checkedMul(output_map_set.count(), @sizeOf(pica.OutputMap)); + const integer_constants_start = @sizeOf(EntrypointHeader); + const floating_constants_start = try checkedAdd(integer_constants_start, integer_constants_byte_size); + const output_map_start = try checkedAdd(floating_constants_start, floating_constants_byte_size); + + if (instruction_offset > std.math.maxInt(u12)) return error.InvalidZpsh; + if (name_string_offset > it.parsed.string_table.len) return error.InvalidZpsh; + const name_tail = it.parsed.string_table[name_string_offset..]; + const name_end = std.mem.indexOfScalar(u8, name_tail, 0) orelse return error.InvalidZpsh; + + return .{ + .info = info, + .offset = @intCast(instruction_offset), + + .name = name_tail[0..name_end :0], + .boolean_constant_set = boolean_constant_set, + .integer_constant_set = integer_constant_set, + .floating_constant_set = floating_constant_set, + .output_set = output_map_set, + + .integer_constants = @alignCast(std.mem.bytesAsSlice([4]u8, try checkedSlice(entry_start, integer_constants_start, integer_constants_byte_size))), + .floating_constants = @alignCast(std.mem.bytesAsSlice(pica.F7_16x4, try checkedSlice(entry_start, floating_constants_start, floating_constants_byte_size))), + .output_map = @alignCast(std.mem.bytesAsSlice(pica.OutputMap, try checkedSlice(entry_start, output_map_start, output_map_byte_size))), + }; + } + }; +}; + +fn checkedAdd(a: usize, b: usize) Header.CheckError!usize { + if (a > std.math.maxInt(usize) - b) return error.InvalidZpsh; + return a + b; +} + +fn checkedMul(a: usize, b: usize) Header.CheckError!usize { + if (b != 0 and a > std.math.maxInt(usize) / b) return error.InvalidZpsh; + return a * b; +} + +fn checkedSlice(buffer: []const u8, start: usize, len: usize) Header.CheckError![]const u8 { + const end = try checkedAdd(start, len); + if (end > buffer.len) return error.InvalidZpsh; + return buffer[start..end]; +} + +fn readLittle(comptime T: type, buffer: []const u8, offset: usize) Header.CheckError!T { + const bytes = try checkedSlice(buffer, offset, @sizeOf(T)); + return std.mem.readInt(T, bytes[0..@sizeOf(T)], .little); +} + +fn enumSetFromMask(comptime E: type, comptime T: type, mask: T) std.EnumSet(E) { + var set: std.EnumSet(E) = .initEmpty(); + for (std.enums.values(E)) |value| { + const bit: std.math.Log2Int(T) = @intCast(@intFromEnum(value)); + set.setPresent(value, ((mask >> bit) & 1) != 0); + } + return set; +} + +fn floatingSetFromMask(low: u32, mid: u32, high: u32) std.EnumSet(FloatingRegister) { + var set: std.EnumSet(FloatingRegister) = .initEmpty(); + for (std.enums.values(FloatingRegister)) |value| { + const index = @intFromEnum(value); + const word = switch (index / 32) { + 0 => low, + 1 => mid, + 2 => high, + else => unreachable, + }; + const bit: u5 = @intCast(index & 31); + set.setPresent(value, ((word >> bit) & 1) != 0); + } + return set; +} + +comptime { + std.debug.assert(std.mem.isAligned(@sizeOf(Header), @sizeOf(u32))); + std.debug.assert(std.mem.isAligned(@sizeOf(EntrypointHeader), @sizeOf(u32))); +} + +const builtin = @import("builtin"); +const std = @import("std"); +const zitrus = @import("zitrus"); +const pica = zitrus.hardware.pica; +const shader = pica.shader; + +const BooleanRegister = shader.register.Integral.Boolean; +const IntegerRegister = shader.register.Integral.Integer; +const FloatingRegister = shader.register.Source.Constant; +const OutputRegister = shader.register.Destination.Output; diff --git a/src/platform/3ds/mango/hardware.zig b/src/platform/3ds/mango/hardware.zig new file mode 100644 index 0000000..9613ef4 --- /dev/null +++ b/src/platform/3ds/mango/hardware.zig @@ -0,0 +1,201 @@ +//! Definitions for 3DS hardware + +pub const cpu = @import("hardware/cpu.zig"); +pub const pica = @import("hardware/pica.zig"); +pub const csnd = @import("hardware/csnd.zig"); +pub const pxi = @import("hardware/pxi.zig"); +pub const dsp = @import("hardware/dsp.zig"); +pub const hid = @import("hardware/hid.zig"); +pub const lgy = @import("hardware/lgy.zig"); +pub const i2c = @import("hardware/i2c.zig"); +pub const lcd = @import("hardware/lcd.zig"); + +/// Represents a register which is triggered by writing a value to it. +pub const Trigger = enum(u1) { trigger = 1 }; + +/// Represents an `AlignedPhysicalAddress` with no alignment. +pub const PhysicalAddress = AlignedPhysicalAddress(.@"1", .@"1"); + +/// Represents a `PhysicalAddress` which is aligned to `address_alignment` and stored with `address_shift` +pub fn AlignedPhysicalAddress(comptime address_alignment: std.mem.Alignment, comptime address_shift: std.mem.Alignment) type { + std.debug.assert(address_alignment.order(address_shift) != .lt); + + return enum(u32) { + zero = 0x00, + _, + + const AlignedPhysAddr = @This(); + pub const alignment = address_alignment; + pub const shift = address_shift; + + pub inline fn fromAddress(address: u32) AlignedPhysAddr { + return .fromPhysical(@as(PhysicalAddress, @enumFromInt(address))); + } + + pub fn fromPhysical(aligned_address: anytype) AlignedPhysAddr { + const OtherAlignedPhysAddr = @TypeOf(aligned_address); + + if (@typeInfo(OtherAlignedPhysAddr) != .@"enum" or !@hasDecl(OtherAlignedPhysAddr, "alignment") or !@hasDecl(OtherAlignedPhysAddr, "shift")) + @compileError("please provide a valid AlignedPhysicalAddress to .of()"); + + const other_alignment = @field(OtherAlignedPhysAddr, "alignment"); + const other_shift = @field(OtherAlignedPhysAddr, "shift"); + + if (@TypeOf(other_alignment) != std.mem.Alignment or @TypeOf(other_shift) != std.mem.Alignment or OtherAlignedPhysAddr != AlignedPhysicalAddress(other_alignment, other_shift)) + @compileError("please provide a valid AlignedPhysicalAddress to .of()"); + + const address = @intFromEnum(aligned_address) << @intCast(std.math.log2(other_shift.toByteUnits())); + + if (alignment.order(other_alignment) != .lt) { + std.debug.assert(alignment.check(address)); + } + + return @enumFromInt(address >> @intCast(std.math.log2(shift.toByteUnits()))); + } + }; +} + +/// Represents a a register which only spans the LSb of a word, leaving the others unused. +pub fn LsbRegister(comptime T: type) type { + std.debug.assert(@bitSizeOf(T) < @bitSizeOf(u32)); + + return packed struct(u32) { + const Lsb = @This(); + + value: T, + _: std.meta.Int(.unsigned, @bitSizeOf(u32) - @bitSizeOf(T)) = 0, + + pub inline fn init(value: T) Lsb { + return .{ .value = value }; + } + + pub fn format(lsb: Lsb, w: *std.Io.Writer) std.Io.Writer.Error!void { + try w.print(if (std.meta.hasFn(T, "format")) "{f}" else "{any}", .{lsb.value}); + } + }; +} + +/// Represents a a register which only spans the MSb of a word, leaving the others unused. +pub fn MsbRegister(comptime T: type) type { + std.debug.assert(@bitSizeOf(T) < @bitSizeOf(u32)); + + return packed struct(u32) { + const Msb = @This(); + + _: std.meta.Int(.unsigned, @bitSizeOf(u32) - @bitSizeOf(T)) = 0, + value: T, + + pub inline fn init(value: T) Msb { + return .{ .value = value }; + } + + pub fn format(msb: Msb, w: *std.Io.Writer) std.Io.Writer.Error!void { + try w.print(if (std.meta.hasFn(T, "format")) "{f}" else "{any}", .{msb.value}); + } + }; +} + +/// Represents a bitpacked array of `n` elements of `T`. +/// Stored in native endian. +/// +/// A `BitpackedArray` is stored from LSb (0) to MSb (n - 1). +pub fn BitpackedArray(comptime T: type, comptime n: usize) type { + const total_bit_size = @bitSizeOf(T) * n; + const ArrayInt = @Int(.unsigned, total_bit_size); + const ElementInt = @Int(.unsigned, @bitSizeOf(T)); + + return packed struct(ArrayInt) { + pub const Int = ArrayInt; + + raw: ArrayInt, + + pub inline fn init(value: [n]T) Self { + // NOTE: Cannot be `undefined`, any `undefined` bits make the entire value `undefined`. + var bt: Self = std.mem.zeroes(Self); + inline for (0..n) |i| bt.set(i, value[i]); + return bt; + } + + pub inline fn splat(value: T) Self { + // NOTE: Cannot be `undefined`, any `undefined` bits make the entire value `undefined`. + var bt: Self = std.mem.zeroes(Self); + inline for (0..n) |i| bt.set(i, value); + return bt; + } + + pub inline fn slice(bt: Self, index: usize, comptime len: usize) BitpackedArray(T, len) { + std.debug.assert(index + len <= n); + + const NewBitpacked = BitpackedArray(T, len); + const bt_int: Int = @bitCast(bt); + const new_bt_int: NewBitpacked.Int = @truncate(bt_int >> (index * @bitSizeOf(T))); + return @bitCast(new_bt_int); + } + + pub inline fn get(bt: Self, index: usize) T { + const value = std.mem.readPackedInt(ElementInt, @ptrCast(&bt.raw), index * @bitSizeOf(ElementInt), .native); + + return switch (@typeInfo(T)) { + .@"enum" => @enumFromInt(value), + else => @bitCast(value), + }; + } + + pub inline fn copyWith(bt: Self, comptime index: usize, value: T) Self { + var new_bt: Self = bt; + new_bt.set(index, value); + return new_bt; + } + + pub inline fn set(bt: *Self, index: usize, value: T) void { + std.mem.writePackedInt(ElementInt, @ptrCast(&bt.raw), index * @bitSizeOf(ElementInt), switch (@typeInfo(T)) { + .@"enum" => @intFromEnum(value), + else => @bitCast(value), + }, .native); + } + + pub fn format(bt: Self, w: *std.Io.Writer) std.Io.Writer.Error!void { + try w.writeAll("{ "); + for (0..n) |i| { + try w.print(if (std.meta.hasFn(T, "format")) "{f}" else "{any}", .{bt.get(i)}); + if (i != n - 1) try w.writeAll(", "); + } + try w.writeAll(" }"); + } + + const Self = @This(); + }; +} + +comptime { + _ = pica; + _ = csnd; + _ = pxi; + _ = dsp; + _ = lgy; + _ = i2c; +} + +const testing = std.testing; + +test BitpackedArray { + const Thing = enum(u1) { foo, bar }; + const ThingArray = BitpackedArray(Thing, 4); + + var bt: ThingArray = .splat(.foo); + + bt.set(3, .bar); + + try testing.expect(bt.get(3) == .bar); + + bt.set(3, .foo); + + try testing.expect(bt.get(3) == .foo); + + bt.set(0, .bar); + + try testing.expect(bt.get(0) == .bar); +} + +const builtin = @import("builtin"); +const std = @import("std"); diff --git a/src/platform/3ds/mango/hardware/cpu.zig b/src/platform/3ds/mango/hardware/cpu.zig new file mode 100644 index 0000000..32ea21a --- /dev/null +++ b/src/platform/3ds/mango/hardware/cpu.zig @@ -0,0 +1,194 @@ +//! Definitions for ARM instructions and MMIO registers +//! which are common to both CPUs. +//! +//! See `arm9` and `arm11` for cpu-specific things. +//! +//! Based on the technical reference manuals of both. + +// TODO: Not tested + +pub const arm9 = @import("cpu/arm9.zig"); +pub const arm11 = @import("cpu/arm11.zig"); + +pub const Register = enum(u4) { + pub const sp: Register = .r13; + pub const lr: Register = .r14; + pub const pc: Register = .r15; + + r0, + r1, + r2, + r3, + r4, + r5, + r6, + r7, + r8, + r9, + r10, + r11, + r12, + r13, + r14, + r15, +}; + +// CP15 c0 c0 0 -> ID +// CP15 c0 c0 1 -> Cache Type + +pub inline fn waitForInterrupt() void { + asm volatile ("mcr p15, 0, %[sbz], c7, c0, 4" + : + : [sbz] "r" (0), + ); +} + +pub const cache = struct { + pub const SetWay = packed struct(u32) { + _reserved0: u4 = 0, + /// Depends on cache size + set: u13, + _reserved1: u13, + way: u2, + + pub inline fn invalidateInstruction(set_way: SetWay) void { + asm volatile ("mcr p15, 0, %[sw], c7, c5, 2" + : + : [sw] "r" (set_way), + ); + } + + pub inline fn invalidateData(set_way: SetWay) void { + asm volatile ("mcr p15, 0, %[sw], c7, c6, 2" + : + : [sw] "r" (set_way), + ); + } + + pub inline fn cleanData(set_way: SetWay) void { + asm volatile ("mcr p15, 0, %[sw], c7, c10, 2" + : + : [sw] "r" (set_way), + ); + } + + pub inline fn flushData(set_way: SetWay) void { + asm volatile ("mcr p15, 0, %[sw], c7, c14, 2" + : + : [sw] "r" (set_way), + ); + } + }; + + pub const Address = packed struct(u32) { + virtual: u32, + + pub inline fn invalidateInstruction(address: Address) void { + asm volatile ("mcr p15, 0, %[addr], c7, c5, 1" + : + : [addr] "r" (address), + ); + } + + pub inline fn invalidateData(address: Address) void { + asm volatile ("mcr p15, 0, %[addr], c7, c6, 1" + : + : [addr] "r" (address), + ); + } + + pub inline fn cleanData(address: Address) void { + asm volatile ("mcr p15, 0, %[addr], c7, c10, 1" + : + : [addr] "r" (address), + ); + } + + pub inline fn flushData(address: Address) void { + asm volatile ("mcr p15, 0, %[addr], c7, c14, 1" + : + : [addr] "r" (address), + ); + } + + pub inline fn flushBranchPredictor(address: Address) void { + asm volatile ("mcr p15, 0, %[addr], c7, c5, 7" + : + : [addr] "r" (address), + ); + } + }; + + pub inline fn flushPrefetchBuffer() void { + asm volatile ("mcr p15, 0, %[sbz], c7, c5, 4" + : + : [sbz] "r" (0), + ); + } + + pub inline fn dataSynchronizationBarrier() void { + asm volatile ("mcr p15, 0, %[sbz], c7, c10, 4" + : + : [sbz] "r" (0), + ); + } + + pub inline fn dataMemoryBarrier() void { + asm volatile ("mcr p15, 0, %[sbz], c7, c10, 5" + : + : [sbz] "r" (0), + ); + } + + pub inline fn flushBranchPredictor() void { + asm volatile ("mcr p15, 0, %[sbz], c7, c5, 6" + : + : [sbz] "r" (0), + ); + } + + /// Also flushes branch predictor cache + pub inline fn invalidateInstruction() void { + asm volatile ("mcr p15, 0, %[sbz], c7, c5, 0" + : + : [sbz] "r" (0), + ); + } + + pub inline fn invalidateData() void { + asm volatile ("mcr p15, 0, %[sbz], c7, c6, 0" + : + : [sbz] "r" (0), + ); + } + + /// Also flushes branch predictor cache + pub inline fn invalidate() void { + asm volatile ("mcr p15, 0, %[sbz], c7, c7, 0" + : + : [sbz] "r" (0), + ); + } + + pub inline fn cleanData() void { + asm volatile ("mcr p15, 0, %[sbz], c7, c10, 0" + : + : [sbz] "r" (0), + ); + } + + pub inline fn flushData() void { + asm volatile ("mcr p15, 0, %[sbz], c7, c14, 0" + : + : [sbz] "r" (0), + ); + } +}; + +comptime { + _ = arm11; + _ = arm9; +} + +const std = @import("std"); +const zitrus = @import("zitrus"); diff --git a/src/platform/3ds/mango/hardware/cpu/arm11.zig b/src/platform/3ds/mango/hardware/cpu/arm11.zig new file mode 100644 index 0000000..4393460 --- /dev/null +++ b/src/platform/3ds/mango/hardware/cpu/arm11.zig @@ -0,0 +1,324 @@ +//! Based on the ARM11 MPCore r2p0 Technical Reference Manual + +// CP15 c0 c0 3 -> TLB Type +// CP15 c0 c0 5 -> CPUID +// CP15 c0 c1 -> Feature Registers +// CP15 c0 c2 -> ISA Attributes Registers + +pub const Control = packed struct(u32) { + pub const Auxiliary = packed struct(u32) { + pub const Multiprocessing = enum(u1) { amp, smp }; + + return_stack: bool = false, + dynamic_branch_prediction: bool = false, + static_branch_prediction: bool = false, + instruction_folding: bool = false, + exclusive_caches: bool = false, + multiprocessing: Multiprocessing, + l1_parity_errors: bool = false, + _reserved0: u25 = 0, + + pub inline fn read() Auxiliary { + return asm volatile ("mrc p15, 0, %[cnt], c1, c0, 1" + : [cnt] "=r" (-> Auxiliary), + ); + } + + pub inline fn write(cnt: Auxiliary) void { + return asm volatile ("mcr p15, 0, %[cnt], c1, c0, 1" + : + : [cnt] "r" (cnt), + ); + } + }; + + mmu: bool = false, + /// Data abort on unaligned loads/stores + strict_alignment: bool = false, + l1_data_cache: bool = false, + _reserved0: u4 = std.math.maxInt(u4), + _reserved1: u1 = 0, + /// Deprecated + system_protection: bool = false, + /// Deprecated + rom_protection: bool = false, + _reserved2: u1 = 0, + branch_prediction: bool = false, + l1_instruction_cache: bool = false, + alternate_exception_vectors: bool = false, + _reserved3: u1 = 1, + disable_thumb_by_pc_loads: bool = false, + _unused0: u6 = 0, + unaligned_access: bool = false, + subpage_access_permissions: bool = false, + _reserved4: u1 = 0, + set_cpsr_e_on_exception: bool = false, + _reserved5: u1 = 0, + non_maskable_fast_irq: bool = false, + tex_remap: bool = false, + force_access_permissions: bool = false, + _reserved6: u2 = 0, + + pub inline fn read() Control { + return asm volatile ("mrc p15, 0, %[cnt], c1, c0, 0" + : [cnt] "=r" (-> Control), + ); + } + + pub inline fn write(cnt: Control) void { + return asm volatile ("mcr p15, 0, %[cnt], c1, c0, 0" + : + : [cnt] "r" (cnt), + ); + } +}; + +pub const CoprocessorAccess = packed struct(u32) { + pub const Mode = enum(u2) { denied, supervisor, full = 3 }; + + _reserved0: u20 = 0, + @"10": Mode = .denied, + @"11": Mode = .denied, + _reserved1: u8 = 0, + + pub inline fn read() CoprocessorAccess { + return asm volatile ("mrc p15, 0, %[acc], c1, c0, 2" + : [acc] "=r" (-> CoprocessorAccess), + ); + } + + pub inline fn write(acc: CoprocessorAccess) void { + return asm volatile ("mcr p15, 0, %[acc], c1, c0, 2" + : + : [acc] "r" (acc), + ); + } +}; + +pub const TranslationTable = extern struct { + pub const Cachable = enum(u2) { + none, + write_back_allocate, + write_through, + write_back, + }; + + pub const Base = packed struct(u32) { + _reserved0: u1 = 0, + shared: bool = false, + _reserved1: u1 = 0, + region: Cachable = .none, + /// TTBL 0 base depends on `Control.separate_table_boundary` and TTBL 1 is restricted to 16KB pages + base: u27, + + pub inline fn read(comptime table: u1) Base { + return asm volatile ("mrc p15, 0, %[base], c2, c0, %[reg]" + : [base] "=r" (-> Base), + : [reg] "i" (table), + ); + } + + pub inline fn write(base: Base, comptime table: u1) void { + return asm volatile ("mcr p15, 0, %[base], c2, c0, %[reg]" + : + : [base] "r" (base), + [reg] "i" (table), + ); + } + }; + + pub const Control = packed struct(u32) { + pub const Boundary = enum(u3) { + @"16KB", + @"8KB", + @"4KB", + @"2KB", + @"1KB", + @"512B", + @"256B", + @"128B", + }; + + separate_table_boundary: Boundary, + _reserved0: u29 = 0, + + pub inline fn read() TranslationTable.Control { + return asm volatile ("mrc p15, 0, %[cnt], c2, c0, 2" + : [cnt] "=r" (-> TranslationTable.Control), + ); + } + + pub inline fn write(cnt: TranslationTable.Control) void { + return asm volatile ("mcr p15, 0, %[cnt], c2, c0, 2" + : + : [cnt] "r" (cnt), + ); + } + }; +}; + +pub const DomainAccess = packed struct(u32) { + pub const Mode = enum(u2) { none, client, manager = 3 }; + + access: BitpackedArray(Mode, 16), + + pub inline fn read() DomainAccess { + return asm volatile ("mrc p15, 0, %[acc], c3, c0, 0" + : [acc] "=r" (-> DomainAccess), + ); + } + + pub inline fn write(acc: DomainAccess) void { + return asm volatile ("mcr p15, 0, %[acc], c3, c0, 0" + : + : [acc] "r" (acc), + ); + } +}; + +pub const Fault = packed struct(u32) { + pub const Kind = enum(u1) { data, instruction }; + pub const Operation = enum(u1) { read, write }; + pub const Status = enum(u5) { + alignment = 0b00001, + instruction_cache_maintenance = 0b00100, + first_level_external_abort = 0b01100, + second_level_external_abort = 0b01110, + section_translation = 0b00101, + page_translation = 0b00111, + section_access = 0b00011, + page_access = 0b00110, + section_domain = 0b01001, + page_domain = 0b01011, + section_permission = 0b01101, + page_permission = 0b01111, + precise_external_abort = 0b01000, + imprecise_external_abort = 0b10110, + debug = 0b00010, + _, + }; + + status_lo: u4, + domain: u4, + _reserved0: u2 = 0, + status_hi: u1, + operation: Operation, + external_abort: bool, + _reserved1: u19, + + pub fn status(fault: Fault) Status { + return @enumFromInt(fault.status_lo | (@as(u5, fault.status_hi) << 4)); + } + + pub inline fn read(comptime kind: Kind) Fault { + return asm volatile ("mrc p15, 0, %[st], c5, c0, %[kind]" + : [st] "=r" (-> DomainAccess), + : [kind] "i" (@intFromEnum(kind)), + ); + } + + pub inline fn write(st: Fault, comptime kind: Kind) void { + return asm volatile ("mcr p15, 0, %[st], c5, c0, %[kind]" + : + : [st] "r" (st), + [kind] "i" (@intFromEnum(kind)), + ); + } + + pub const Address = packed struct(u32) { + pub const Kind = enum(u1) { default, watchpoint }; + + virtual: u32, + + pub inline fn read(comptime kind: Address.Kind) u32 { + return asm volatile ("mrc p15, 0, %[addr], c6, c0, %[kind]" + : [addr] "=r" (-> Address), + : [kind] "i" (@intFromEnum(kind)), + ); + } + + pub inline fn write(addr: Address, comptime kind: Address.Kind) void { + return asm volatile ("mcr p15, 0, %[addr], c6, c0, %[kind]" + : + : [addr] "r" (addr), + [kind] "i" (@intFromEnum(kind)), + ); + } + }; +}; + +pub fn Monitor(comptime T: type) type { + return extern struct { + raw: T, + + pub fn init(value: T) MonitorSelf { + return .{ .raw = value }; + } + + /// Performs a load, putting the monitor into a exclusive access state. + pub fn load(mon: *const MonitorSelf) T { + return switch (@bitSizeOf(T)) { + 8 => @bitCast(asm volatile ("ldrexb %[to], %[ptr]" + : [to] "=r" (-> u8), + : [ptr] "p" (&mon.raw), + )), + 16 => @bitCast(asm volatile ("ldrexh %[to], %[ptr]" + : [to] "=r" (-> u16), + : [ptr] "p" (&mon.raw), + )), + 32 => @bitCast(asm volatile ("ldrex %[to], %[ptr]" + : [to] "=r" (-> u32), + : [ptr] "p" (&mon.raw), + )), + 64 => asm volatile ("ldrexd %[to:Q], %[to:R], %[ptr]" + : [to] "=r" (-> u64), + : [ptr] "p" (&mon.raw), + ), + else => @compileError("Unsupported Monitor(" ++ @typeName(T) ++ ")"), + }; + } + + /// Tries to perform a store. If the monitor is still in exclusive access after a + /// `load`, the store succeeds and returns `false` putting the monitor into an + /// open state again. + /// + /// Spurious changes to an open state may happen. + pub fn store(mon: *MonitorSelf, value: T) bool { + return switch (@bitSizeOf(T)) { + 8 => asm volatile ("strexb %[fail], %[value], %[ptr]" + : [fail] "=&r" (-> bool), + : [ptr] "p" (&mon.raw), + [value] "r" (value), + : .{ .memory = true }), + 16 => asm volatile ("strexh %[fail], %[value], %[ptr]" + : [fail] "=&r" (-> bool), + : [ptr] "p" (&mon.raw), + [value] "r" (value), + : .{ .memory = true }), + 32 => asm volatile ("strex %[fail], %[value], %[ptr]" + : [fail] "=&r" (-> bool), + : [ptr] "p" (&mon.raw), + [value] "r" (value), + : .{ .memory = true }), + 64 => asm volatile ("strexd %[fail], %[value:Q], %[value:R], %[ptr]" + : [fail] "=&r" (-> bool), + : [ptr] "p" (&mon.raw), + [value] "r" (value), + : .{ .memory = true }), + else => @compileError("Unsupported Monitor(" ++ @typeName(T) ++ ")"), + }; + } + + /// Puts the monitor into an open state. + pub fn clear(_: *MonitorSelf) void { + asm volatile ("clrex"); + } + + const MonitorSelf = @This(); + }; +} + +const std = @import("std"); +const zitrus = @import("zitrus"); +const hardware = zitrus.hardware; +const BitpackedArray = hardware.BitpackedArray; diff --git a/src/platform/3ds/mango/hardware/cpu/arm9.zig b/src/platform/3ds/mango/hardware/cpu/arm9.zig new file mode 100644 index 0000000..7d93d74 --- /dev/null +++ b/src/platform/3ds/mango/hardware/cpu/arm9.zig @@ -0,0 +1,83 @@ +pub const Control = packed struct(u32) { + mmu: bool = false, + _reserved0: u1 = 0, + data_cache: bool = false, + _reserved1: u4 = std.math.maxInt(u4), + big_endian: bool = false, + _reserved2: u4 = 0, + instruction_cache: bool = false, + alternate_exception_vectors: bool = false, + cache_round_robin_replacement: bool = false, + disable_thumb_by_pc_loads: bool = false, + data_tcm: bool = false, + data_tcm_load_mode: bool = false, + instruction_tcm: bool = false, + instruction_tcm_load_mode: bool = false, + _reserved3: u12 = 0, + + pub inline fn read() Control { + return asm volatile ("mrc p15, 0, %[cnt], c1, c0, 0" + : [cnt] "=r" (-> Control), + ); + } + + pub inline fn write(cnt: Control) void { + return asm volatile ("mcr p15, 0, %[cnt], c1, c0, 0" + : + : [cnt] "r" (cnt), + ); + } +}; + +pub const Interrupt = packed struct(u32) { + pub const Registers = extern struct { + enable: Interrupt, + flags: Interrupt, + }; + + pub const Pxi = packed struct(u3) { + sync: bool, + send_emoty: bool, + receive_full: bool, + }; + + pub const Sdio = packed struct(u2) { + controller: bool, + async: bool, + }; + + pub const Debug = packed struct(u2) { + receive: bool, + send: bool, + }; + + pub const Gamecard = packed struct(u2) { + power_off: bool, + insert: bool, + }; + + pub const Xdma = packed struct(u2) { + event: bool, + fault: bool, + }; + + ndma: BitpackedArray(bool, 8), + timer: BitpackedArray(bool, 4), + pxi: Pxi, + aes: bool, + sdio: BitpackedArray(Sdio, 2), + debug: Debug, + rsa: bool, + ctr_card: BitpackedArray(bool, 2), + gamecard: Gamecard, + ntr_card: bool, + xdma: Xdma, + _unused0: u2 = 0, +}; + +// CP15 c0 c0 2 -> TCM size + +const std = @import("std"); +const zitrus = @import("zitrus"); +const hardware = zitrus.hardware; +const BitpackedArray = hardware.BitpackedArray; diff --git a/src/platform/3ds/mango/hardware/csnd.zig b/src/platform/3ds/mango/hardware/csnd.zig new file mode 100644 index 0000000..fa59fe9 --- /dev/null +++ b/src/platform/3ds/mango/hardware/csnd.zig @@ -0,0 +1,116 @@ +//! Definitions for MMIO `CSND` registers. +//! +//! Based on the documentation found in GBATEK and 3dbrew: +//! - https://problemkaputt.de/gbatek.htm#3dssoundandmicrophone +//! - https://www.3dbrew.org/wiki/CSND_Registers + +pub const Volume = enum(u16) { + pub const min: Volume = .volume(0); + pub const max: Volume = .volume(0); + + _, + + pub fn volume(value: u16) Volume { + return @enumFromInt(@min(value, 0x8000)); + } +}; + +pub const SampleRate = enum(u16) { + pub const min: SampleRate = .rate(0); + pub const max: SampleRate = .rate(0xFFBE); + + _, + + pub fn rate(value: u16) SampleRate { + std.debug.assert(value <= 0xFFBE); + return @enumFromInt(value); + } +}; + +pub const MasterControl = packed struct(u32) { + volume: Volume, + mute: bool, + _unused0: u13, + dissonant_disable: bool, + /// When this is not true, some registers won't be written. + read_write: bool, +}; + +pub const Channel = extern struct { + pub const WaveDuty = enum(u3) { _ }; + pub const Format = enum(u2) { pcm8, pcm16, ima_adpcm, psg }; + pub const Repeat = enum(u2) { manual, loop, one_shot, loop_constant }; + + pub const Control = packed struct(u32) { + wave_duty: WaveDuty, + _unused0: u2 = 0, + interpolate_linearly: bool, + hold_last: bool, + _unused1: u2 = 0, + repeat: Repeat, + format: Format, + pause_disable: bool, + busy: bool, + sample_rate: SampleRate, + }; + + pub const ImaAdPcm = packed struct(u32) { + value: i16, + index_value: u7, + _unused0: u8, + reload_second_buffer_state: bool, + }; + + pub const Volume = packed struct(u32) { + right: csnd.Volume, + left: csnd.Volume, + }; + + control: Control, + output_volume: Channel.Volume, + capture_volume: Channel.Volume, + start_address: PhysicalAddress, + total_size: hardware.LsbRegister(u27), + // So you can start with some sound and then loop with another? If true cool. + // XXX: 3dbrew says this is the other channel? When this is 0x0 then mono audio is played. Name is not accurate + loop_restart_address: PhysicalAddress, + start_ima_state: ImaAdPcm, + restart_ima_state: ImaAdPcm, +}; + +pub const Capture = extern struct { + pub const Format = enum(u1) { pcm16, pcm8 }; + + pub const Control = packed struct(u32) { + one_shot: bool, + format: Format, + _unknown0: u1, + _unused0: u12 = 0, + busy: bool, + _unused1: u16 = 0, + }; + + control: Control, + sample_rate: LsbRegister(SampleRate), + length: LsbRegister(u24), + address: PhysicalAddress, +}; + +pub const Registers = extern struct { + master: MasterControl, + _unused0: [0x3FC]u8, + channels: [32]Channel, + captures: [2]Capture, +}; + +const csnd = @This(); + +const std = @import("std"); + +const zitrus = @import("zitrus"); +const hardware = zitrus.hardware; + +const LsbRegister = hardware.LsbRegister; +const MsbRegister = hardware.MsbRegister; + +const PhysicalAddress = hardware.PhysicalAddress; diff --git a/src/platform/3ds/mango/hardware/dsp.zig b/src/platform/3ds/mango/hardware/dsp.zig new file mode 100644 index 0000000..15c9578 --- /dev/null +++ b/src/platform/3ds/mango/hardware/dsp.zig @@ -0,0 +1,90 @@ +//! Definitions for MMIO `DSP` registers. +//! +//! Used for **D**igital **S**ignal **P**rocessing, a.k.a: sound. +//! Its an independent processor named *TeakLite II* from XpertTeak. +//! +//! +//! Based on the documentation found in GBATEK and 3dbrew: +//! - https://problemkaputt.de/gbatek.htm#3dssoundandmicrophone + +pub const Address = enum(u16) { + _, + + pub fn init(address: u16) Address { + return @enumFromInt(address); + } +}; + +pub const Configuration = packed struct(u16) { + pub const Region = enum(u2) { data, mmio, code, ahbm }; + pub const Length = enum(u2) { @"1", @"8", @"16", free }; + + reset: bool, + auto_increment_transfer_address: bool, + read_length: Length, + read_start: bool, + irq_read_fifo_full: bool, + irq_read_fifo_not_empty: bool, + irq_write_fifo_full: bool, + irq_write_fifo_empty: bool, + irq_reply_register: BitpackedArray(bool, 3), + transfer_region: Region, +}; + +pub const Status = packed struct(u16) { + reading: bool, + writing: bool, + resetting: bool, + _unused0: u2, + read_fifo_full: bool, + read_fifo_not_empty: bool, + write_fifo_full: bool, + write_fifo_empty: bool, + semaphore_irq: bool, + reply_register_unwritten: BitpackedArray(bool, 3), + command_register_unread: BitpackedArray(bool, 3), +}; + +pub const Semaphore = extern struct { + send: BitpackedArray(bool, 16), + _unused0: [2]u8, + irq_disable: BitpackedArray(bool, 16), + _unused1: [2]u8, + send_clear: BitpackedArray(bool, 16), + _unused2: [2]u8, + receive: BitpackedArray(bool, 16), + _unused3: [2]u8, +}; + +pub const Registers = extern struct { + fifo: u16, + _unused0: [2]u8, + transfer_address: Address, + _unused1: [2]u8, + config: Configuration, + _unused2: [2]u8, + status: Status, + _unused3: [2]u8, + semaphore: Semaphore, + command0: u16, + _unused4: [2]u8, + reply0: u16, + _unused5: [2]u8, + command1: u16, + _unused6: [2]u8, + reply1: u16, + _unused7: [2]u8, + command2: u16, + _unused8: [2]u8, + reply2: u16, + _unused9: [2]u8, +}; + +const dsp = @This(); + +const std = @import("std"); + +const zitrus = @import("zitrus"); +const hardware = zitrus.hardware; + +const BitpackedArray = hardware.BitpackedArray; diff --git a/src/platform/3ds/mango/hardware/hid.zig b/src/platform/3ds/mango/hardware/hid.zig new file mode 100644 index 0000000..45adc62 --- /dev/null +++ b/src/platform/3ds/mango/hardware/hid.zig @@ -0,0 +1,54 @@ +//! Definitions for MMIO `HID` registers. +//! +//! Used only for main PAD buttons (Cicle Pad + New3DS buttons not included) +//! +//! Based on the documentation found in GBATEK: https://problemkaputt.de/gbatek.htm#3dsmiscregisters + +pub const State = packed struct(u16) { + a: bool, + b: bool, + select: bool, + start: bool, + right: bool, + left: bool, + up: bool, + down: bool, + r: bool, + l: bool, + x: bool, + y: bool, + _unused0: u3 = 0, +}; + +pub const Interrupt = packed struct(u16) { + pub const Condition = enum(u1) { @"or", @"and" }; + + pub const Source = packed struct(u12) { + a: bool, + b: bool, + select: bool, + start: bool, + right: bool, + left: bool, + up: bool, + down: bool, + r: bool, + l: bool, + x: bool, + y: bool, + }; + + source: Source, + _unused0: u2 = 0, + enable: bool, + condition: Condition, +}; + +pub const Registers = extern struct { + released: State, + irq: Interrupt, +}; + +const hid = @This(); + +const zitrus = @import("zitrus"); diff --git a/src/platform/3ds/mango/hardware/i2c.zig b/src/platform/3ds/mango/hardware/i2c.zig new file mode 100644 index 0000000..0a4e128 --- /dev/null +++ b/src/platform/3ds/mango/hardware/i2c.zig @@ -0,0 +1,45 @@ +//! Definitions for MMIO `HID` registers. +//! +//! Used for numerous things. +//! +//! Based on the documentation found in GBATEK: https://problemkaputt.de/gbatek.htm#3dsi2cregisters + +pub const Direction = enum(u1) { write, read }; + +pub const Control = packed struct(u8) { + stop: bool, + start: bool, + pause: bool, + _unknown0: u1 = 0, + ack: bool, + direction: Direction, + irq_enable: bool, + busy: bool, +}; + +pub const ControlExtended = packed struct(u16) { + clock: bool, + wait_if_clock_low: bool, + _unused0: u13 = 0, + _unknown1: u1 = 0, +}; + +pub const Speed = enum(u6) { + _, +}; + +pub const Bus = extern struct { + pub const Clock = packed struct(u16) { + low: Speed, + _unused0: u2 = 0, + high: Speed, + _unused1: u2 = 0, + }; + + data: u8, + control: Control, + control_extended: ControlExtended, + clock: Clock, +}; + +const std = @import("std"); diff --git a/src/platform/3ds/mango/hardware/lcd.zig b/src/platform/3ds/mango/hardware/lcd.zig new file mode 100644 index 0000000..2629c48 --- /dev/null +++ b/src/platform/3ds/mango/hardware/lcd.zig @@ -0,0 +1,92 @@ +//! Definitions for MMIO `LCD` registers. +//! +//! Based on the documentation found in GBATEK: https://problemkaputt.de/gbatek.htm#3dsvideolcdregisters + +pub const Parallax = extern struct { + pub const Control = packed struct(u32) { + pub const Enable = enum(u2) { off, enable, _ }; + + tp27_enable: Enable, + tp27_invert_output: bool, + _unused0: u13 = 0, + tp29_enable: Enable, + tp29_invert_output: bool, + _unused1: u13 = 0, + }; + + pub const Duty = packed struct(u32) { + /// (off + 1) * 0.9us + off: u16, + /// (on + 1) * 0.9us + on: u16, + }; + + control: Control, + /// Controls the TP27 parallax PWM + duty: Duty, +}; + +pub const Screen = extern struct { + pub const Flags = packed struct(u32) { + abl_enable: bool, + _unused0: u7 = 0, + dither_related: BitpackedArray(bool, 2), + _unused1: u22 = 0, + }; + + pub const Fill = packed struct(u32) { + r: u8, + g: u8, + b: u8, + enable: bool, + _unused0: u7 = 0, + }; + + pub const AdaptiveBacklight = extern struct { + // TODO: lazy + _todo: [0x5F8]u8, + }; + + flags: Flags, + fill: Fill, + adaptive_backlight: AdaptiveBacklight, + + comptime { + std.debug.assert(@sizeOf(Screen) == 0x600); + } +}; + +pub const Clock = packed struct(u32) { + top_disable: bool = false, + _unused0: u15 = 0, + bottom_disable: bool = false, + _unused1: u15 = 0, +}; + +pub const Status = packed struct(u32) { + _: u32 = 0, +}; + +pub const Reset = enum(u1) { reset, enable }; + +pub const Registers = extern struct { + parallax: Parallax, + status: Status, + clock: Clock, + _unknown0: u32, + reset: LsbRegister(Reset), + _unused0: [122]u32, + top: Screen, + _unused1: [128]u32, + bottom: Screen, +}; + +const lcd = @This(); + +const std = @import("std"); + +const zitrus = @import("zitrus"); +const hardware = zitrus.hardware; + +const LsbRegister = hardware.LsbRegister; +const BitpackedArray = hardware.BitpackedArray; diff --git a/src/platform/3ds/mango/hardware/lgy.zig b/src/platform/3ds/mango/hardware/lgy.zig new file mode 100644 index 0000000..f3def32 --- /dev/null +++ b/src/platform/3ds/mango/hardware/lgy.zig @@ -0,0 +1,138 @@ +//! Definitions for MMIO `LGY` registers. +//! +//! Used for **L**e**g**ac**y** framebuffer conversion, NDS/GBA -> 3DS. +//! +//! Based on the documentation found in GBATEK: https://problemkaputt.de/gbatek.htm#3dsvideolgyregisterslegacygbandsvideotoframebuffer + +pub const Framebuffer = extern struct { + pub const Format = enum(u2) { abgr8888, bgr888, rgb5551, rgb565 }; + pub const Rotate = enum(u2) { none, @"90", @"180", @"270" }; + + pub const Control = packed struct(u32) { + start: bool, + enable_vertical_scaling: bool, + enable_horizontal_scaling: bool, + _unused0: u1 = 0, + brightness_dither_enable: bool, + _brigness_dither_enable_too: bool, + _unused1: u2 = 0, + format: Format, + /// Clockwise rotation + rotate: Rotate, + swizzle: bool, + _unused2: u2 = 0, + dma: bool, + _unused3: u16 = 0, + }; + + pub const Dimensions = packed struct(u32) { + width_minus_one: u9, + _unused0: u7, + height_minus_one: u9, + _unused1: u7, + + pub fn init(width: u9, height: u9) Dimensions { + return .{ .width_minus_one = width - 1, .height_minus_one = height - 1 }; + } + }; + + pub const InterruptStatus = packed struct(u32) { + first_block: bool, + next_block: bool, + last_line: bool, + _unused0: u13 = 0, + current_block_line: u8, + _unused1: u8 = 0, + }; + + pub const InterruptEnable = packed struct(u32) { + first_block: bool, + next_block: bool, + last_line: bool, + _unused0: u29 = 0, + }; + + pub const Scaling = extern struct { + /// Scale according `length` output pixels. + /// + /// `bits` tell which input pixels get used, effectively making it `length` / `bits` + pub const Pattern = extern struct { + pub const @"1x": Pattern = .init(1, .splat(1)); + pub const @"1.16x": Pattern = .init(7, .init(.{ 1, 1, 0, 1, 1, 0, 1, 0 })); + pub const @"1.2x": Pattern = .init(6, .init(.{ 1, 1, 1, 0, 1, 1, 0, 0 })); + pub const @"1.25x": Pattern = .init(5, .init(.{ 1, 1, 0, 1, 1, 0, 0, 0 })); + pub const @"1.33x": Pattern = .init(4, .init(.{ 1, 1, 1, 0, 0, 0, 0, 0 })); + pub const @"1.4x": Pattern = .init(7, .init(.{ 1, 1, 1, 1, 1, 0, 0, 0 })); + pub const @"1.5x": Pattern = .init(3, .init(.{ 1, 1, 0, 0, 0, 0, 0, 0 })); + pub const @"1.66x": Pattern = .init(5, .init(.{ 1, 1, 1, 0, 1, 0, 0, 0 })); + pub const @"1.75x": Pattern = .init(7, .init(.{ 1, 0, 1, 0, 1, 0, 1, 0 })); + pub const @"2x": Pattern = .init(2, .init(.{ 1, 0, 0, 0, 0, 0, 0, 0 })); + pub const @"2.33x": Pattern = .init(7, .init(.{ 1, 0, 0, 1, 0, 0, 1, 0 })); + pub const @"2.5x": Pattern = .init(5, .init(.{ 1, 0, 1, 0, 0, 0, 0, 0 })); + pub const @"2.66x": Pattern = .init(8, .init(.{ 1, 0, 0, 1, 0, 0, 0, 1 })); + pub const @"3x": Pattern = .init(3, .init(.{ 1, 0, 0, 0, 0, 0, 0, 0 })); + pub const @"3.5x": Pattern = .init(7, .init(.{ 1, 0, 0, 1, 0, 0, 0, 0 })); + + length: LsbRegister(u3), + bits: LsbRegister(BitpackedArray(u1, 8)), + + pub fn init(length: u3, bits: BitpackedArray(u1, 8)) Pattern { + return .{ .length = length, .bits = bits }; + } + }; + + pub const Brightness = enum(u16) { _ }; + + pattern: Pattern, + _unused0: [0x38]u8, + brightness: [6][8]LsbRegister(Brightness), + + comptime { + std.debug.assert(@sizeOf(Scaling) == 0x100); + } + }; + + control: Control, + size: Dimensions, + irq_status: InterruptStatus, + irq_enable: InterruptEnable, + _unused0: [0x10]u8, + alpha: LsbRegister(u8), + _unused1: [0xCC]u8, + prefetch: LsbRegister(u4), + _unused2: [0x0C]u8, + dither: [4]u64, + _unused3: [0xE0]u8, + vertical_scaling: Scaling, + horizontal_scaling: Scaling, + + comptime { + std.debug.assert(@sizeOf(Framebuffer) == 0x400); + } +}; + +pub const Config = extern struct { + bottom: Framebuffer, + _unused0: [0xC00]u8, + top: Framebuffer, +}; + +pub const Fifo = extern struct { + bottom: [0x1000]u8, + top: [0x1000]u8, +}; + +comptime { + _ = Config; + _ = Fifo; +} + +const lgy = @This(); + +const std = @import("std"); + +const zitrus = @import("zitrus"); +const hardware = zitrus.hardware; + +const LsbRegister = hardware.LsbRegister; +const BitpackedArray = hardware.BitpackedArray; diff --git a/src/platform/3ds/mango/hardware/pica.zig b/src/platform/3ds/mango/hardware/pica.zig new file mode 100644 index 0000000..9fa5859 --- /dev/null +++ b/src/platform/3ds/mango/hardware/pica.zig @@ -0,0 +1,2940 @@ +//! Definitions for MMIO `PICA200` registers. +//! +//! - LCD screens are physically rotated 90º CCW from how the devices are held (i.e: bottom is not 320x240, is 240x320) +//! - NDC clipping volume: +//! - X: [-W, W] +//! - Y: [-W, W] +//! - Z: [0, -W] +//! - Framebuffer origin can be changed so `-1` in NDC could mean bottom-left (GL) or top-left (D3D, Metal, VK) +//! - Texture Origin is Bottom-Left, they are sampled from bottom (0, 0) to top (1, 1). +//! +//! WARNING: zitrus has opinionated register naming that suit better their usage! +//! +//! Based on the documentation found in GBATEK and 3dbrew: +//! - https://problemkaputt.de/gbatek.htm#3dsgpuinternalregisteroverview +//! - https://www.3dbrew.org/wiki/GPU/External_Registers +//! - https://www.3dbrew.org/wiki/GPU/Internal_Registers + +pub const command = @import("pica/command.zig"); +pub const shader = @import("pica/shader.zig"); + +pub const UQ0_11 = zsflt.Fixed(.unsigned, 0, 11); +pub const UQ0_12 = zsflt.Fixed(.unsigned, 0, 12); +pub const UQ0_23 = zsflt.Fixed(.unsigned, 0, 23); +pub const Q4_8 = zsflt.Fixed(.signed, 4, 8); +pub const Q0_11 = zsflt.Fixed(.signed, 0, 11); +pub const Q1_11 = zsflt.Fixed(.signed, 1, 11); + +pub const F5_10 = zsflt.Float(5, 10); +pub const F3_12 = zsflt.Float(3, 12); +pub const F7_12 = zsflt.Float(7, 12); +pub const F7_16 = zsflt.Float(7, 16); +pub const F7_23 = zsflt.Float(7, 23); +pub const F8_23 = zsflt.Float(8, 23); + +pub const Q1_11x2 = packed struct(u32) { + x: Q1_11, + _unused0: u3 = 0, + y: Q1_11, + _unused1: u3 = 0, + + pub fn init(x: Q1_11, y: Q1_11) Q1_11x2 { + return .{ .x = x, .y = y }; + } +}; + +pub const F5_10x2 = packed struct(u32) { + x: F5_10, + y: F5_10, + + pub fn init(x: F5_10, y: F5_10) F5_10x2 { + return .{ .x = x, .y = y }; + } +}; + +pub const F7_16x4 = extern struct { + pub const Unpacked = struct { x: F7_16, y: F7_16, z: F7_16, w: F7_16 }; + + data: [@divExact(@bitSizeOf(F7_16) * 4, @bitSizeOf(u32))]u32, + + pub fn pack(x: F7_16, y: F7_16, z: F7_16, w: F7_16) F7_16x4 { + var vec: F7_16x4 = undefined; + const vec_bytes = std.mem.asBytes(&vec.data); + + std.mem.writePackedInt(u24, vec_bytes, 0, @bitCast(x), .little); + std.mem.writePackedInt(u24, vec_bytes, @bitSizeOf(F7_16), @bitCast(y), .little); + std.mem.writePackedInt(u24, vec_bytes, @bitSizeOf(F7_16) * 2, @bitCast(z), .little); + std.mem.writePackedInt(u24, vec_bytes, @bitSizeOf(F7_16) * 3, @bitCast(w), .little); + std.mem.swap(u32, &vec.data[0], &vec.data[2]); + + return vec; + } + + pub fn unpack(value: F7_16x4) [4]F7_16 { + var unpacked: [3]u32 = value.data; + std.mem.swap(u32, &unpacked[0], &unpacked[2]); + + return .{ + @bitCast(std.mem.readPackedInt(u24, @ptrCast(&unpacked), 0, .little)), + @bitCast(std.mem.readPackedInt(u24, @ptrCast(&unpacked), @bitSizeOf(F7_16), .little)), + @bitCast(std.mem.readPackedInt(u24, @ptrCast(&unpacked), @bitSizeOf(F7_16) * 2, .little)), + @bitCast(std.mem.readPackedInt(u24, @ptrCast(&unpacked), @bitSizeOf(F7_16) * 3, .little)), + }; + } +}; + +pub const morton = struct { + /// Returns the morton/z-order coordinates for `value` + pub fn toDimensions(comptime T: type, comptime dimensions: usize, value: T) [dimensions]std.meta.Int(.unsigned, @divExact(@bitSizeOf(T), dimensions)) { + std.debug.assert(@typeInfo(T) == .int); + const DecomposedInt = std.meta.Int(.unsigned, @divExact(@bitSizeOf(T), dimensions)); + + // Basically bits are interleaved + // 2-dimensional 8-bits example: yxyxyxyx + var values: [dimensions]DecomposedInt = @splat(0); + var current_value = value; + inline for (0..@bitSizeOf(T)) |i| { + const shift = i / dimensions; + const set = &values[i % dimensions]; + + set.* |= @intCast((current_value & 0b1) << shift); + current_value >>= 1; + } + + return values; + } + + test toDimensions { + try testing.expectEqual([2]u3{ 0b001, 0b001 }, toDimensions(u6, 2, 0b000011)); + try testing.expectEqual([2]u3{ 0b010, 0b010 }, toDimensions(u6, 2, 0b001100)); + try testing.expectEqual([2]u3{ 0b011, 0b011 }, toDimensions(u6, 2, 0b001111)); + try testing.expectEqual([2]u3{ 0b111, 0b111 }, toDimensions(u6, 2, 0b111111)); + } + + /// Returns the morton linear index for the coordinates. + pub fn toIndex(comptime T: type, comptime dimensions: usize, value: [dimensions]T) std.meta.Int(.unsigned, dimensions * @bitSizeOf(T)) { + std.debug.assert(@typeInfo(T) == .int); + + const IndexType = std.meta.Int(.unsigned, dimensions * @bitSizeOf(T)); + const max_index = dimensions * @bitSizeOf(T); + var index: IndexType = 0; + + inline for (0..max_index) |i| { + const dimension = i % dimensions; + const dimension_bit = i / dimensions; + + index |= (@as(IndexType, value[dimension]) << (i - dimension_bit)) & (@as(IndexType, 1) << i); + } + + return index; + } + + test toIndex { + try testing.expectEqual(0b000011, toIndex(u3, 2, .{ 1, 1 })); + try testing.expectEqual(0b001100, toIndex(u3, 2, .{ 2, 2 })); + try testing.expectEqual(0b001111, toIndex(u3, 2, .{ 3, 3 })); + try testing.expectEqual(0b110000, toIndex(u3, 2, .{ 4, 4 })); + try testing.expectEqual(0b111111, toIndex(u3, 2, .{ 7, 7 })); + + try testing.expectEqual(0b101011, toIndex(u3, 2, .{ 1, 7 })); + try testing.expectEqual(0b011101, toIndex(u3, 2, .{ 7, 2 })); + } + + // TODO: We could test fuzzing (zig 0.16.0) value -> toDimensions -> toIndex -> value, as it must always be idempotent + + pub const Strategy = enum { + /// Linear -> Morton + tile, + /// Morton -> Linear + untile, + }; + + pub const ConversionOptions = struct { + input_x: usize, + input_y: usize, + input_stride: usize, + + output_x: usize, + output_y: usize, + output_stride: usize, + + width: usize, + height: usize, + + pixel_size: usize, + + pub fn full(width: usize, height: usize, pixel_size: usize) ConversionOptions { + return .{ + .input_x = 0, + .input_y = 0, + .input_stride = width * pixel_size, + + .output_x = 0, + .output_y = 0, + .output_stride = width * pixel_size, + + .width = width, + .height = height, + + .pixel_size = pixel_size, + }; + } + }; + + /// Asserts that the Morton-tiled Image is divisible by the `tile_size` + pub fn convert2(comptime strategy: Strategy, comptime tile_size: usize, dst_pixels: []u8, src_pixels: []const u8, opts: ConversionOptions) void { + comptime std.debug.assert(std.math.isPowerOfTwo(tile_size)); // We depend on this and the PICA only supports 2x2 (ETC), 8x8 and 32x32 tile sizes. + + const tile_pixels = (tile_size * tile_size); + const subtile_mask = (tile_size - 1); + const tile_shift = comptime std.math.log2(tile_size); + + const output_real_width = opts.output_stride / opts.pixel_size; + + for (0..opts.height) |current_y| { + const input_y = current_y + opts.input_y; + const output_y = current_y + opts.output_y; + + for (0..opts.width) |current_x| { + const input_x = current_x + opts.input_x; + const output_x = current_x + opts.output_x; + + const src_pixel, const dst_pixel = pxl: switch (strategy) { + .tile => { + const src_index = input_y * opts.input_stride + input_x * opts.pixel_size; + + std.debug.assert((output_real_width & subtile_mask) == 0); + const dst_tile_pixels_per_line = (output_real_width >> tile_shift) * tile_pixels; + + const dst_tile_y = output_y >> tile_shift; + const dst_tile_x = output_x >> tile_shift; + + const dst_subtile_y: u3 = @intCast(output_y & subtile_mask); + const dst_subtile_x: u3 = @intCast(output_x & subtile_mask); + const dst_subtile_morton = toIndex(u3, 2, .{ dst_subtile_x, dst_subtile_y }); + + const dst_pixel_start = (dst_tile_y * dst_tile_pixels_per_line) + (dst_tile_x * tile_pixels); + const dst_index = (dst_pixel_start + dst_subtile_morton) * opts.pixel_size; + + break :pxl .{ src_pixels[src_index..][0..opts.pixel_size], dst_pixels[dst_index..][0..opts.pixel_size] }; + }, + .untile => { + comptime unreachable; // TODO + }, + }; + + @memcpy(dst_pixel, src_pixel); + } + } + } + + pub fn convert(comptime strategy: Strategy, comptime tile_size: usize, width: usize, pixel_size: usize, dst_pixels: []u8, src_pixels: []const u8) void { + std.debug.assert(dst_pixels.len == src_pixels.len); + const max_tile_subindex = (tile_size * tile_size); + const SubindexInt = std.math.IntFittingRange(0, max_tile_subindex - 1); + + const height = @divExact(@divExact(src_pixels.len, pixel_size), width); + const width_tiles = @divExact(width, tile_size); + const height_tiles = @divExact(height, tile_size); + const stride = width * pixel_size; + + var i: usize = 0; + for (0..height_tiles) |y_tile| { + const y_start = y_tile * tile_size; + + for (0..width_tiles) |x_tile| { + const x_start = x_tile * tile_size; + + for (0..max_tile_subindex) |tile| { + const x, const y = toDimensions(SubindexInt, 2, @intCast(tile)); + + const linear_index = i; + const morton_index = (y_start + y) * stride + (x_start + x) * pixel_size; + + const src_pixel, const dst_pixel = switch (strategy) { + .tile => .{ src_pixels[morton_index..][0..pixel_size], dst_pixels[linear_index..][0..pixel_size] }, + .untile => .{ src_pixels[linear_index..][0..pixel_size], dst_pixels[morton_index..][0..pixel_size] }, + }; + + @memcpy(dst_pixel, src_pixel); + i += pixel_size; + } + } + } + } + + pub fn convertNibbles(comptime strategy: Strategy, comptime tile_size: usize, width: usize, dst_pixels: []u8, src_pixels: []const u8) void { + std.debug.assert(dst_pixels.len == src_pixels.len); + const max_tile_subindex = (tile_size * tile_size); + const SubindexInt = std.math.IntFittingRange(0, max_tile_subindex - 1); + + const height = @divExact(src_pixels.len << 1, width); + const width_tiles = @divExact(width, tile_size); + const height_tiles = @divExact(height, tile_size); + const stride = @divExact(width, 2); + + var i: usize = 0; + for (0..height_tiles) |y_tile| { + const y_start = y_tile * tile_size; + + for (0..width_tiles) |x_tile| { + const x_start = x_tile * tile_size; + + for (0..max_tile_subindex) |tile| { + const x, const y = toDimensions(SubindexInt, 2, @intCast(tile)); + + const linear_index = i >> 1; + const second_linear_nibble = (i & 1) != 0; + + const morton_x = (x_start + x); + const morton_index = (y_start + y) * stride + (morton_x >> 1); + const second_morton_nibble = (morton_x & 1) != 0; + + const src_pixel, const dst_pixel, const second_src_nibble, const second_dst_nibble = switch (strategy) { + .tile => .{ &src_pixels[morton_index], &dst_pixels[linear_index], second_morton_nibble, second_linear_nibble }, + .untile => .{ &src_pixels[linear_index], &dst_pixels[morton_index], second_linear_nibble, second_morton_nibble }, + }; + + const src_nibble = if (second_src_nibble) (src_pixel.* >> 4) else (src_pixel.* & 0xF); + const last_dst_pixel = dst_pixel.*; + + dst_pixel.* = if (second_dst_nibble) (last_dst_pixel & 0xF) | (src_nibble << 4) else (last_dst_pixel & 0xF0) | src_nibble; + i += 1; + } + } + } + } +}; + +pub const Screen = enum(u1) { + pub const width_po2 = 256; + + top, + bottom, + + pub fn other(screen: Screen) Screen { + return switch (screen) { + .top => .bottom, + .bottom => .top, + }; + } + + pub inline fn width(_: Screen) usize { + return 240; + } + + pub inline fn height(screen: Screen) usize { + return switch (screen) { + .top => 400, + .bottom => 320, + }; + } +}; + +/// Deprecated: DisplayController.Framebuffer.Pixel.Size +pub const PixelSize = DisplayController.Framebuffer.Pixel.Size; + +/// Deprecated: DisplayController.Framebuffer.Pixel +pub const ColorFormat = DisplayController.Framebuffer.Pixel; + +/// Depth values are stored as normalized integers. +pub const DepthStencilFormat = enum(u2) { + /// 2 bytes for depth, `0xDDDD`. + d16, + /// 3 bytes for depthm `0xDDDDDD`. + d24 = 2, + /// 3 bytes for depth and 1 byte for stencil `0xSSDDDDDD`. + d24_s8, + + pub fn bytesPerPixel(format: DepthStencilFormat) usize { + return switch (format) { + .d16 => @sizeOf(u16), + .d24 => 3, + .d24_s8 => @sizeOf(u32), + }; + } +}; + +/// Deprecated: use DisplayController.Framebuffer.Interlacing +pub const FramebufferInterlacingMode = DisplayController.Framebuffer.Interlacing; + +/// Deprecated: use DisplayController.Framebuffer.Dma +pub const DmaSize = DisplayController.Framebuffer.Dma; + +pub const TextureUnit = enum(u2) { + pub const main: TextureUnit = .@"0"; + pub const procedural: TextureUnit = .@"3"; + + @"0", + @"1", + @"2", + @"3", +}; + +/// The front face is always counter-clockwise and cannot be changed. +pub const CullMode = enum(u2) { + /// No triangles are discarded. + none, + /// Triangles with a counter-clockwise winding order are culled. + ccw, + /// Triangles with a clockwise winding order are culled. + cw, +}; + +pub const ScissorMode = enum(u2) { + /// No pixels will be discarded. + disable, + /// The pixels outside the scissor area will be rendered. + outside, + /// The pixels inside the scissor area will be rendered. + inside = 3, +}; + +pub const EarlyDepthCompareOperation = enum(u2) { + ge, + gt, + le, + lt, +}; + +pub const OutputMap = packed struct(u32) { + pub const Semantic = enum(u5) { + position_x, + position_y, + position_z, + position_w, + + normal_quaternion_x, + normal_quaternion_y, + normal_quaternion_z, + normal_quaternion_w, + + color_r, + color_g, + color_b, + color_a, + + texture_coordinates_0_u, + texture_coordinates_0_v, + texture_coordinates_1_u, + texture_coordinates_1_v, + texture_coordinates_0_w, + + view_x = 0x12, + view_y, + view_z, + + texture_coordinates_2_u, + texture_coordinates_2_v, + + unused = 0x1F, + _, + + pub fn isNormalQuaternion(semantic: Semantic) bool { + return switch (semantic) { + .normal_quaternion_x, .normal_quaternion_y, .normal_quaternion_z, .normal_quaternion_w => true, + else => false, + }; + } + + pub fn isColor(semantic: Semantic) bool { + return switch (semantic) { + .color_r, .color_g, .color_b, .color_a => true, + else => false, + }; + } + + pub fn isTextureCoordinates0(semantic: Semantic) bool { + return switch (semantic) { + .texture_coordinates_0_u, .texture_coordinates_0_v, .texture_coordinates_0_w => true, + else => false, + }; + } + + pub fn isTextureCoordinates1(semantic: Semantic) bool { + return switch (semantic) { + .texture_coordinates_1_u, .texture_coordinates_1_v => true, + else => false, + }; + } + + pub fn isTextureCoordinates2(semantic: Semantic) bool { + return switch (semantic) { + .texture_coordinates_2_u, .texture_coordinates_2_v => true, + else => false, + }; + } + + pub fn isView(semantic: Semantic) bool { + return switch (semantic) { + .view_x, .view_y, .view_z => true, + else => false, + }; + } + }; + + x: Semantic, + _unusd0: u3 = 0, + y: Semantic, + _unusd1: u3 = 0, + z: Semantic, + _unusd2: u3 = 0, + w: Semantic, + _unusd3: u3 = 0, + + pub fn format(map: OutputMap, w: *std.Io.Writer) std.Io.Writer.Error!void { + try w.print("({}, {}, {}, {})", .{ map.x, map.y, map.z, map.w }); + } +}; + +pub const BlendOperation = enum(u3) { + /// `src_factor * src + dst_factor * dst` + add, + /// `src_factor * src - dst_factor * dst` + sub, + /// `dst_factor * dst - src_factor * src` + reverse_sub, + /// `min(src_factor * src, dst_factor * dst)` + min, + /// `max(src_factor * src, dst_factor * dst)` + max, +}; + +pub const BlendFactor = enum(u4) { + zero, + one, + src_color, + one_minus_src_color, + dst_color, + one_minus_dst_color, + src_alpha, + one_minus_src_alpha, + dst_alpha, + one_minus_dst_alpha, + constant_color, + one_minus_constant_color, + constant_alpha, + one_minus_constant_alpha, + src_alpha_saturate, +}; + +pub const LogicOperation = enum(u4) { + clear, + @"and", + reverse_and, + copy, + set, + copy_inverted, + nop, + invert, + nand, + @"or", + nor, + xor, + equivalent, + and_inverted, + or_reverse, + or_inverted, +}; + +pub const CompareOperation = enum(u3) { + never, + always, + eq, + neq, + lt, + le, + gt, + ge, +}; + +pub const StencilOperation = enum(u3) { + /// Keep the current value. + keep, + /// Sets the value to `0`. + zero, + /// Sets the value to `reference`. + replace, + /// Increments the current value and clamps to the maximum representable unsigned value. + increment, + /// Decrements the current value and clamps to `0`. + decrement, + /// Bitwise-inverts the current value. + invert, + /// Increments the current value and clamps to `0` when the maximum value would have exceeded. + increment_wrap, + /// Decrements the current value and clamps to the maximum possible value when the value would go below `0`. + decrement_wrap, +}; + +/// The PICA200 supports only triangle-based primitive topologies. +pub const PrimitiveTopology = enum(u2) { + /// Specifies a series of separate triangle primitives. + /// The number of primitives generated is `(vertexCount / 3)` + triangle_list, + /// Specifies a series of connected triangle primitives with consecutive triangles sharing an edge. + /// The number of primitives generated is `max(0, vertexCount - 2)` + triangle_strip, + /// Specifies a series of connected triangle primitives with all triangles sharing a common vertex. + /// The number of primitives generated is `max(0, vertexCount - 2)` + triangle_fan, + /// Specifies a series of triangle primitives which are to be defined by the geometry shader. + /// The number of primitives generated depends on the shader implementation. + geometry, + + /// Another PICA200 classic. For `drawIndexed` (`drawElements` as GL people call it) you set + /// the primitive topology to `geometry`. + /// + /// Why? Ask the DMP engineers + pub fn indexedTopology(topology: PrimitiveTopology) PrimitiveTopology { + return switch (topology) { + .triangle_list => .geometry, + else => |topo| topo, + }; + } +}; + +pub const IndexFormat = enum(u1) { + /// Specifies that indices are unsigned 8-bit numbers. + u8, + /// Specifies that indices are unsigned 16-bit numbers. + u16, +}; + +pub const TextureUnitFilter = enum(u1) { + nearest, + linear, +}; + +pub const TextureUnitAddressMode = enum(u3) { + clamp_to_edge, + clamp_to_border, + repeat, + mirrored_repeat, +}; + +pub const TextureUnitType = enum(u3) { + @"2d", + cube_map, + shadow_2d, + projection, + shadow_cube, + disabled, +}; + +pub const TextureUnitFormat = enum(u4) { + pub const Hilo88 = extern struct { g: u8, r: u8 }; + pub const I8 = packed struct(u8) { i: u8 }; + pub const A8 = packed struct(u8) { a: u8 }; + pub const Ia88 = packed struct(u16) { i: u8, a: u8 }; + pub const I4 = packed struct(u8) { i: u8 }; + pub const A4 = packed struct(u8) { i: u8 }; + pub const Ia44 = packed struct(u8) { i: u4, a: u4 }; + + abgr8888, + bgr888, + rgba5551, + rgb565, + rgba4444, + ia88, + hilo88, + i8, + a8, + ia44, + i4, + a4, + etc1, + etc1a4, + + pub fn scale(format: TextureUnitFormat, size: usize) usize { + return switch (format) { + .abgr8888 => size << 2, + .bgr888 => size * 3, + .rgba5551, .rgb565, .rgba4444, .ia88, .hilo88 => size << 1, + .i8, .a8, .ia44, .etc1a4 => size, + .i4, .a4, .etc1 => size >> 1, + }; + } +}; + +pub const TextureUnitTexture2Coordinates = enum(u1) { + @"2", + @"1", +}; + +pub const TextureUnitTexture3Coordinates = enum(u2) { + @"0", + @"1", + @"2", +}; + +pub const MemoryFill = extern struct { + pub const Control = packed struct(u32) { + pub const none: Control = .{ .busy = false, .width = .@"16" }; + + busy: bool, + finished: bool = false, + _unused0: u6 = 0, + width: PixelSize, + _unused1: u6 = 0, + _unknown0: u5 = 0, + _unused2: u11 = 0, + + pub fn init(width: PixelSize) Control { + return .{ .busy = true, .width = width }; + } + }; + + start: AlignedPhysicalAddress(.@"16", .@"8"), + end: AlignedPhysicalAddress(.@"16", .@"8"), + value: u32, + control: Control, +}; + +pub const PictureFormatter = extern struct { + pub const Dimensions = packed struct(u32) { width: u16, height: u16 }; + + pub const Copy = extern struct { + pub const Line = packed struct(u32) { width: u16, gap: u16 }; + + size: u32, + src: Line, + dst: Line, + }; + + pub const Flags = packed struct(u32) { + pub const Downscale = enum(u2) { none, @"2x1", @"2x2" }; + + flip_v: bool, + linear_tiled: bool, + output_width_less_than_input: bool, + copy: bool, + _unwritable0: u1 = 0, + tiled_tiled: bool, + _unwritable1: u2 = 0, + src_format: ColorFormat, + _unwritable2: u1 = 0, + dst_format: ColorFormat, + _unwritable3: u1 = 0, + use_32x32_tiles: bool, + _unwritable4: u7 = 0, + downscale: Downscale, + _unwritable5: u6 = 0, + }; + + pub const Control = packed struct(u32) { + start: bool, + _unused0: u7 = 0, + finished: bool, + _unused1: u23 = 0, + }; + + src: AlignedPhysicalAddress(.@"16", .@"8"), + dst: AlignedPhysicalAddress(.@"16", .@"8"), + dst_dimensions: Dimensions, + src_dimensions: Dimensions, + flags: Flags, + write_0_before_display_transfer: u32, + control: Control, + _unknown0: u32 = 0, + copy: Copy, +}; + +pub const Graphics = extern struct { + pub const AttributeIndex = enum(u4) { @"0", @"1", @"2", @"3", @"4", @"5", @"6", @"7", @"8", @"9", @"10", @"11" }; + pub const ArrayComponentIndex = enum(u4) { @"0", @"1", @"2", @"3", @"4", @"5", @"6", @"7", @"8", @"9", @"10", @"11" }; + + pub const Interrupt = extern struct { + pub const Mask = extern struct { + disabled_low: BitpackedArray(bool, 32), + disabled_high: BitpackedArray(bool, 32), + }; + + pub const Stat = extern struct { + match_low: BitpackedArray(bool, 32), + match_high: BitpackedArray(bool, 32), + }; + + /// 0x000 + ack: [64]u8, + /// 0x040 + req: [64]u8, + /// 0x080 + cmp: [64]u8, + /// 0x0C0 + mask: Mask, + /// 0x0C8 + stat: Stat, + /// 0x0D0 + // XXX: Does this really work? Specifiying an invalid size + // (a.k.a bigger than it is will hang the GPU), even adding multiple + // req/finalize commands WILL hang the GPU (or GSP?)! This either means: + // - This doesn't work + // - Somehow something else has to be set? + autostop: LsbRegister(bool), + /// 0x0D4 + fixed_0x00010002: u32, + + comptime { + std.debug.assert(@sizeOf(Interrupt) == 0xD8); + } + }; + + pub const Rasterizer = extern struct { + pub const Mode = enum(u2) { normal, interlace, wireframe, normal_2 }; + pub const ClippingPlane = extern struct { + /// Enable the clipping plane + /// 0x11C + enable: LsbRegister(bool), + /// Coefficients of the clipping plane. + /// 0x120 + coefficients: [4]LsbRegister(F7_16), + }; + + pub const Statistics = extern struct { + /// 0x168 + vertices_received: u32, + /// 0x16C + triangles_received: u32, + /// 0x170 + triangles_displayed: u32, + }; + + pub const Scissor = extern struct { + /// 0x194 + mode: LsbRegister(ScissorMode), + /// The start of the scissor region, origin bottom-left. + /// 0x198 + start: [2]u16, + /// The end of the scissor region (inclusive), origin bottom-left. + /// 0x19C + end: [2]u16, + }; + + pub const InputMode = packed struct(u32) { + /// XXX: Textures still work when setting this to false...? + use_texture_coordinates: bool = false, + _: u31 = 0, + }; + + pub const Clock = packed struct(u32) { + position_z: bool = false, + color: bool = false, + _unused0: u6 = 0, + texture_coordinates: BitpackedArray(bool, 3) = .splat(false), + _unused1: u5 = 0, + texture_coordinates_0_w: bool = false, + _unused2: u7 = 0, + normal_or_view: bool = false, + _unused3: u7 = 0, + }; + + pub const DepthMap = extern struct { + pub const Mode = enum(u1) { + /// Precision is evenly distributed. + w, + /// Precision is higher close to the near plane. + z, + }; + + /// Scale to map depth from [0, -1] to [0, 1]. + /// 0x134 + scale: LsbRegister(F7_16), + /// Bias to map depth from [0, -1] to [0, 1]. + /// 0x138 + bias: LsbRegister(F7_16), + }; + + pub const UndocumentedConfig0 = packed struct(u32) { + _unknown0: bool = false, + _unused0: u7 = 0, + /// Sometimes interlaces, sometimes skips pixels. + /// Depends on how the GPU feels that day. + dirty_interlace_skip: bool = false, + /// Weird, it "enables" some sort of wireframe in the rasterizer, + /// it does NOT convert triangles to lines as clipped triangles will + /// be correctly splitted, is purely a rasterizer thing. + dirty_wireframe: bool, + _unused1: u22 = 0, + }; + + /// 0x100 + cull_config: LsbRegister(CullMode), + /// `Width / 2.0`, used for scaling vertex coordinates. + /// 0x104 + viewport_h_scale: LsbRegister(F7_16), + /// `2.0 / Width`, supposedly used for stepping colors and texture coordinates. + /// 0x108 + viewport_h_step: MsbRegister(F7_23), + /// `Height / 2.0`, used for scaling vertex coordinates. + /// 0x10C + viewport_v_scale: LsbRegister(F7_16), + /// `2.0 / Height`, supposedly used for stepping colors and texture coordinates. + /// 0x110 + viewport_v_step: MsbRegister(F7_23), + /// 0x114 + _unknown0: [2]u32, + /// Extra user-defined clipping plane. + /// 0x11C + extra_clipping_plane: ClippingPlane, + /// 0x130 + _unknown1: [1]u32, + /// Maps depth from NDC [0, -1] to framebuffer [0, 1]. + /// 0x134 + depth_map: DepthMap, + /// 0x13C + num_inputs: LsbRegister(u3), + /// 0x140 + inputs: [7]OutputMap, + /// 0x15C + _unknown2: u32, + /// 0x160 + shader_output_map_qualifiers: u32, // According to GBATEK this allows you to use the flat qualifier in output colors. + /// 0x164 + _unknown3: u32, + /// 0x168 + statistics: Statistics, + /// 0x174 + _unknown4: [3]u32, + /// 0x180 + config: UndocumentedConfig0, + /// So early depth somehow has a separate internal buffer that must be cleared. + /// From tests it looks like it has MUCH LESS precision and literally breaks with anything, + /// 32x32 is needed. Overall, is it really needed? + /// + /// I don't know what engineers were smoking but it has too many false fails, discarding lots of fragments. + /// XXX: No more can be said, this is vaulted until new info comes out. + /// 0x184 + early_depth_function: LsbRegister(EarlyDepthCompareOperation), + /// 0x188 + early_depth_test_enable: LsbRegister(bool), + /// 0x18C + early_depth_clear: LsbRegister(Trigger), + /// 0x190 + input_mode: InputMode, + /// 0x194 + scissor: Scissor, + /// Viewport origin, origin is bottom-left. + /// 0x1A0 + viewport_xy: [2]u16, + /// 0x1A4 + _unknown8: u32, + /// 0x1A8 + early_depth_data: LsbRegister(u24), + /// 0x1AC + _unknown9: [2]u32, + /// 0x1B4 + depth_map_mode: LsbRegister(DepthMap.Mode), + /// Does not seem to have an effect but it's still documented like this + /// 0x1B8 + _unused_render_buffer_dimensions: u32, // XXX: Why would the rasterizer need output dimensions? + /// The clock driving inputs to the rasterizer from the shader. + /// + /// If a shader outputs a value which the rasterizer doesn't clock, + /// the rasterizer reads a default value; e.g color will read (1, 1, 1, 1) + /// 0x1BC + input_clock: Clock, + }; + + pub const TextureUnits = extern struct { + pub const Config = packed struct(u32) { + texture_enabled: BitpackedArray(bool, 3), + _unused0: u5 = 0, + texture_3_coordinates: TextureUnitTexture3Coordinates, + texture_3_enabled: bool, + _unused1: u1 = 0, + _unused2: u1 = 1, + texture_2_coordinates: TextureUnitTexture2Coordinates, + _unused3: u2 = 0, + clear_texture_cache: bool, + _unused4: u15 = 0, + + pub fn format(cfg: Config, w: *std.Io.Writer) std.Io.Writer.Error!void { + try w.print("Enable: {f}, Enable procedural: {} | T2 source: {}, T3 source: {} | Clear cache: {}", .{ + cfg.texture_enabled, + cfg.texture_3_enabled, + cfg.texture_2_coordinates, + cfg.texture_3_coordinates, + cfg.clear_texture_cache, + }); + } + }; + + pub const Parameters = packed struct(u32) { + pub const Etc1Flag = enum(u2) { none, etc1 = 2 }; + + _unknown0: u1 = 0, + mag_filter: TextureUnitFilter, + min_filter: TextureUnitFilter, + _unknown1: u1 = 0, + etc1: Etc1Flag, + _unknown2: u2 = 0, + address_mode_v: TextureUnitAddressMode, + _unknown3: u1 = 0, + address_mode_u: TextureUnitAddressMode, + _unknown4: u5 = 0, + is_shadow: bool, + _unknown5: u3 = 0, + mip_filter: TextureUnitFilter, + _unused0: u3 = 0, + type: TextureUnitType, + _unused1: u1 = 0, + + pub fn format(params: Parameters, w: *std.Io.Writer) std.Io.Writer.Error!void { + try w.print("Type: {} | Min: {}, Mag: {}, Mip: {} | U: {}, V: {}", .{ + params.type, + params.min_filter, + params.mag_filter, + params.mip_filter, + params.address_mode_u, + params.address_mode_v, + }); + } + }; + + pub const LevelOfDetail = packed struct(u32) { + bias: Q4_8, + _unknown0: u3 = 0, + max_level_of_detail: u4, + _unknown1: u4 = 0, + min_level_of_detail: u4, + _unused0: u4 = 0, + }; + + pub const Shadow = packed struct(u32) { + orthogonal: bool, + // XXX: Documented as "the higher 23-bits of an UQ0.24": Bro, thats just an UQ0.23? + z_bias: UQ0_23, + _unknown0: u8 = 0, + }; + + pub const Primary = extern struct { + border_color: [4]u8, + /// Height and WIdth, NOT Width and Height! + dimensions: [2]u16, + parameters: Parameters, + lod: LevelOfDetail, + address: [6]AlignedPhysicalAddress(.@"8", .@"8"), + shadow: Shadow, + _unknown0: u32, + _unknown1: u32, + format: LsbRegister(TextureUnitFormat), + }; + + pub const Secondary = extern struct { + border_color: [4]u8, + /// Height and WIdth, NOT Width and Height! + dimensions: [2]u16, + /// WARNING: Type is ignored in secondary texture units, they're always 2d according to 3dbrew. + parameters: Parameters, + lod: LevelOfDetail, + address: AlignedPhysicalAddress(.@"8", .@"8"), + format: LsbRegister(TextureUnitFormat), + }; + + config: Config, + @"0": Primary, + lighting_enable: LsbRegister(bool), + _unknown0: u32, + @"1": Secondary, + _unknown1: [2]u32, + @"2": Secondary, + }; + + pub const ProceduralTextureUnit = extern struct { + pub const Main = extern struct { + procedural_texture: [5]u32, + procedural_texture_5_low: u32, + procedural_texture_5_high: u32, + }; + + @"3": Main, + lut_index: u32, + lut_data: [8]u32, + }; + + pub const TextureCombiners = extern struct { + pub const FogMode = enum(u3) { disabled, fog = 5, gas = 7 }; + pub const ShadingDensity = enum(u1) { plain, depth }; + pub const BufferSource = enum(u1) { previous_buffer, previous }; + pub const Multiplier = enum(u2) { @"1x", @"2x", @"4x" }; + pub const Source = enum(u4) { + primary_color, + fragment_primary_color, + fragment_secondary_color, + texture_0, + texture_1, + texture_2, + texture_3, + previous_buffer = 0xD, + constant, + previous, + }; + + pub const ColorFactor = enum(u4) { + src_color, + one_minus_src_color, + src_alpha, + one_minus_src_alpha, + src_red, + one_minus_src_red, + src_green = 8, + one_minus_src_green, + src_blue = 12, + one_minus_src_blue, + }; + + pub const AlphaFactor = enum(u3) { + src_alpha, + one_minus_src_alpha, + src_red, + one_minus_src_red, + src_green, + one_minus_src_green, + src_blue, + one_minus_src_blue, + }; + + pub const Operation = enum(u4) { + /// `src0` + replace, + /// `src0 * src1` + modulate, + /// `src0 + src1` + add, + /// `src0 + src1 - 0.5` + add_signed, + /// `src0 * src2 + src1 * (1 - src2)` + interpolate, + /// `src0 - src1` + subtract, + /// `4 * ((src0r − 0.5) * (src1r − 0.5) + (src0g − 0.5) * (src1g − 0.5) + (src0b − 0.5) * (src1b − 0.5))` + dot3_rgb, + /// `4 * ((src0r − 0.5) * (src1r − 0.5) + (src0g − 0.5) * (src1g − 0.5) + (src0b − 0.5) * (src1b − 0.5))` + dot3_rgba, + /// `src0 * src1 + src2` (?) + multiply_add, + /// `src0 + src1 * src2` (?) + add_multiply, + }; + + pub const Config = packed struct(u32) { + fog_mode: FogMode, + shading_density_source: ShadingDensity, + _unused0: u4 = 0, + combiner_color_buffer_src: BitpackedArray(BufferSource, 4), + combiner_alpha_buffer_src: BitpackedArray(BufferSource, 4), + z_flip: bool, + _unused1: u7 = 0, + _unknown0: u2 = 0, + _unused2: u6 = 0, + + pub const BufferIndex = enum(u3) { @"1", @"2", @"3", @"4" }; + + pub fn setColorBufferSource(update_buffer: *Config, index: BufferIndex, buffer_src: BufferSource) void { + std.mem.writePackedIntNative(u1, std.mem.asBytes(update_buffer), @as(usize, @bitOffsetOf(Config, "combiner_color_buffer_src")) + @intFromEnum(index), @intFromEnum(buffer_src)); + } + + pub fn setAlphaBufferSource(update_buffer: *Config, index: BufferIndex, buffer_src: BufferSource) void { + std.mem.writePackedIntNative(u1, std.mem.asBytes(update_buffer), @as(usize, @bitOffsetOf(Config, "combiner_alpha_buffer_src")) + @intFromEnum(index), @intFromEnum(buffer_src)); + } + + pub fn format(cfg: Config, w: *std.Io.Writer) std.Io.Writer.Error!void { + try w.print("Fog Mode: {}, Shading Density: {}, Flip Z: {} | Color Buffer Src: {}, Alpha Buffer Src: {}", .{ + cfg.fog_mode, + cfg.shading_density_source, + cfg.z_flip, + cfg.combiner_color_buffer_src, + cfg.combiner_alpha_buffer_src, + }); + } + }; + + pub const Unit = extern struct { + pub const Sources = packed struct(u32) { + color_src: BitpackedArray(Source, 3), + _unused0: u4 = 0, + alpha_src: BitpackedArray(Source, 3), + _unused1: u4 = 0, + }; + + pub const Factors = packed struct(u32) { + color_factor: BitpackedArray(ColorFactor, 3), + alpha_factor: BitpackedArray(AlphaFactor, 3), + _unused0: u11 = 0, + }; + + pub const Operations = packed struct(u32) { + color: Operation, + _unused0: u12 = 0, + alpha: Operation, + _unused1: u12 = 0, + + pub fn format(operations: Operations, w: *std.Io.Writer) std.Io.Writer.Error!void { + try w.print("Color: {}, Alpha: {}", .{ + operations.color, + operations.alpha, + }); + } + }; + + pub const Scales = packed struct(u32) { + color: Multiplier, + _unused0: u14 = 0, + alpha: Multiplier, + _unused1: u14 = 0, + + pub fn format(scales: Scales, w: *std.Io.Writer) std.Io.Writer.Error!void { + try w.print("Color: {}, Alpha: {}", .{ + scales.color, + scales.alpha, + }); + } + }; + + sources: Sources, + factors: Factors, + operations: Operations, + color: [4]u8, + scales: Scales, + }; + + pub const FogLutValue = packed struct(u24) { + next_difference: Q1_11, + value: UQ0_11, + }; + + /// 0x200 + @"0": Unit, + _unknown0: [3]u32, + @"1": Unit, + _unknown1: [3]u32, + @"2": Unit, + _unknown2: [3]u32, + @"3": Unit, + _unknown3: [3]u32, + config: Config, + fog_color: [4]u8, + _unknown4: [2]u32, + gas_attenuation: LsbRegister(F5_10), + gas_accumulation_max: LsbRegister(F5_10), + fog_lut_index: LsbRegister(u16), + _unknown5: u32, + fog_lut_data: [8]LsbRegister(FogLutValue), + @"4": Unit, + _unknown6: [3]u32, + @"5": Unit, + buffer_color: [4]u8, + }; + + pub const OutputMerger = extern struct { + pub const Pixel = DisplayController.Framebuffer.Pixel; + pub const Blend = enum(u1) { logic, blend }; + pub const Mode = enum(u2) { default, gas, shadow = 3 }; + pub const Interlace = enum(u1) { disable, even }; + pub const BlockSize = enum(u1) { @"8x8", @"32x32" }; + + pub const Config = packed struct(u32) { + mode: Mode, + _unused0: u6 = 0, + + blend: Blend, + _unused1: u7 = 0, + _unknown0: u8 = 0xE4, // (?) Not setting this doesn't have an effect visually (?) + /// Isn't this similar to interlace? I mean rendering each 2nd line is literally that. There should be something for odd lines maybe? + interlace: Interlace = .disable, + /// Can this be some sort of primitive discard or is literally render nothing? + disable_rendering: bool = false, + _unused2: u6 = 0, + }; + + pub const BlendConfig = packed struct(u32) { + color_op: BlendOperation, + _unused0: u5 = 0, + alpha_op: BlendOperation, + _unused1: u5 = 0, + src_color_factor: BlendFactor, + dst_color_factor: BlendFactor, + src_alpha_factor: BlendFactor, + dst_alpha_factor: BlendFactor, + }; + + pub const AlphaTest = packed struct(u32) { + enable: bool, + _unused0: u3 = 0, + op: CompareOperation, + _unused1: u1 = 0, + reference: u8, + _unused3: u16 = 0, + + pub fn format(cfg: AlphaTest, w: *std.Io.Writer) std.Io.Writer.Error!void { + try w.print("Enable: {}, Operation: {}, Reference: {}", .{ + cfg.enable, + cfg.op, + cfg.reference, + }); + } + }; + + pub const StencilTest = extern struct { + pub const Config = packed struct(u32) { + enable: bool, + _unused0: u3 = 0, + op: CompareOperation, + _unused1: u1 = 0, + compare_mask: u8, + reference: u8, + write_mask: u8, + + pub fn format(cfg: StencilTest.Config, w: *std.Io.Writer) std.Io.Writer.Error!void { + try w.print("Enable: {}, Operation: {}, Reference: {}, Compare Mask: 0x{X:0>2}, Write Mask: 0x{X:0>2}", .{ + cfg.enable, + cfg.op, + cfg.reference, + cfg.compare_mask, + cfg.write_mask, + }); + } + }; + + pub const Operation = packed struct(u32) { + fail: StencilOperation, + _unused0: u1 = 0, + depth_fail: StencilOperation, + _unused1: u1 = 0, + pass: StencilOperation, + _unused2: u21 = 0, + + pub fn format(op: StencilTest.Operation, w: *std.Io.Writer) std.Io.Writer.Error!void { + try w.print("Fail: {}, Depth Fail: {}, Pass: {}", .{ + op.fail, + op.depth_fail, + op.pass, + }); + } + }; + + config: StencilTest.Config, + operation: StencilTest.Operation, + }; + + pub const DepthTestColorConfig = packed struct(u32) { + enable_depth_test: bool, + _unused0: u3 = 0, + depth_op: CompareOperation, + _unused1: u1 = 0, + enable_r_write: bool, + enable_g_write: bool, + enable_b_write: bool, + enable_a_write: bool, + enable_depth_write: bool, + _unused2: u19 = 0, + + pub fn format(cfg: DepthTestColorConfig, w: *std.Io.Writer) std.Io.Writer.Error!void { + try w.print("Depth [Test: {}, Operation: {}, Write: {}] | Color {c}{c}{c}{c}", .{ + cfg.enable_depth_test, + cfg.depth_op, + cfg.enable_depth_write, + @as(u8, if (cfg.enable_r_write) 'r' else '_'), + @as(u8, if (cfg.enable_g_write) 'g' else '_'), + @as(u8, if (cfg.enable_b_write) 'b' else '_'), + @as(u8, if (cfg.enable_a_write) 'a' else '_'), + }); + } + }; + + pub const ColorAccess = enum(u4) { disable, all = 0xF }; + pub const DepthStencilAccess = enum(u2) { disable, stencil, depth, all }; + + pub const RenderBufferDimensions = packed struct(u32) { + width: u11, + _unused0: u1 = 0, + height_end: u10, + _unused1: u2 = 0, + flip_vertically: bool = false, + _unused2: u7 = 0, + + pub fn init(width: u11, height: u10, flip_vertically: bool) RenderBufferDimensions { + return .{ .width = width, .height_end = height - 1, .flip_vertically = flip_vertically }; + } + + pub fn format(dim: RenderBufferDimensions, w: *std.Io.Writer) std.Io.Writer.Error!void { + try w.print("{}x{} (flip: {})", .{ dim.width, @as(u32, dim.height_end) + 1, dim.flip_vertically }); + } + }; + + pub const ColorBufferFormat = packed struct(u32) { + pixel_size: Pixel.Size, + _unused0: u14 = 0, + pixel_format: Pixel, + _unused1: u13 = 0, + + pub fn init(pixel_format: Pixel) ColorBufferFormat { + return .{ + .pixel_size = pixel_format.pixelSize(), + .pixel_format = pixel_format, + }; + } + + pub fn format(fmt: ColorBufferFormat, w: *std.Io.Writer) std.Io.Writer.Error!void { + try w.print("{} ({} bits)", .{ fmt.pixel_format, fmt.pixel_size }); + } + }; + + config: Config, + blend_config: BlendConfig, + logic_config: LsbRegister(LogicOperation), + blend_color: [4]u8, + alpha_test: AlphaTest, + stencil_test: StencilTest, + depth_color_config: DepthTestColorConfig, + _unknown0: [8]u32, + invalidate: LsbRegister(Trigger), + /// Flushing without issuing a drawcall before-hand in the same submitted command queue + /// will hang the GPU + /// + /// Don't ask me why + flush: LsbRegister(Trigger), + color_read: LsbRegister(ColorAccess), + color_write: LsbRegister(ColorAccess), + depth_read: LsbRegister(DepthStencilAccess), + depth_write: LsbRegister(DepthStencilAccess), + depth_format: LsbRegister(DepthStencilFormat), + color_format: ColorBufferFormat, + early_depth_test_enable: LsbRegister(bool), + _unknown1: [2]u32, + block_size: LsbRegister(BlockSize), + depth_location: AlignedPhysicalAddress(.@"64", .@"8"), + color_location: AlignedPhysicalAddress(.@"64", .@"8"), + dimensions: RenderBufferDimensions, + _unknown2: u32, + gas_light_xy: u32, + gas_light_z: u32, + gas_light_z_color: u32, + gas_lut_index: u32, + gas_lut_data: u32, + _unknown3: u32, + gas_delta_z_depth: u32, + _unknown4: [9]u32, + fragment_operation_shadow: u32, + _unknown5: [15]u32, + }; + + /// Fragment lighting in the PICA200 is done primarily through 1D lookup tables and quaternion interpolation. + /// + /// The vertex shader (or geometry if used) must output a Quaternion representing the rotation from the z-axis + /// to the normal. This can be done in different ways, with the standard RotationFromTo(.{0, 0, 1}, Normal) or + /// the approach in the 'Shading by Quaternion Interpolation' paper. + /// + /// It must also output a View position that is optionally used for positional lights to calculate the + /// light vector, as directional lights are not affected by it. + /// + /// There are 22 `LookupTable`s available: + /// - 2 distribution tables for specular: D0 and D1 + /// - 1 fresnel table: Fr + /// - 3 reflection tables for each color channel for reflection (D1): Rr, Rg and Rb + /// - 8 spotlight tables: Sp0 to Sp7 + /// - 8 distance attenuation tables: Da0 to Da7 + /// + /// The relevant lighting formulas are these (sources below): + /// Cp -> primary color, also called diffuse / Cs -> secondary color, also called specular + /// + /// Cp = ambient + foreach light ( Da*i*(*sd*) * Sp*i*(*in*) * H * (ambient*i* + diffuse*i* * f(L * N)) ) + /// + /// Cs = foreach light ( Da*i*(*sd*) * Sp*i*(*in*) * H * (specular*i*0**x** * D0(*in*) * G + specular*i*1**x** * R**x**(*in*) * D1(*in*) * G) ) + /// + /// where: + /// - H -> shadow attenuation factor + /// - *i* -> For light *i* + /// - **x** -> Color channel (r, g or b) + /// - *sd* -> Scaled distance, clip(`bias`*i* + `scale`*i* * distance, 0, 1) + /// - *in* -> One of the `LookupTable.Input`s + /// - G -> Geometric factor, when enabled is `(L * N) / lengthSqr(L + N)`, `1.0` otherwise + /// + /// Lookup tables (except Da) can have an input domain of [-1.0, 1.0] or [0.0, 1.0] depending on the `LookupTable.Absolute` flags. + /// Da always has an input domain of [0.0, 1.0]. The mapping of input to index is: + /// - [0.0, 1.0] -> [0, 255] + /// - [-1.0, 1.0] -> [0.0, 1.0] is [0, 127] and [-1.0, 0.0] is [128, 255] + /// + /// + /// With all of that, the PICA200 can do both PBR and NPBR, for example a Blinn-Phong shading model can be done with: + /// - D0 enabled (absolute) with input N * H where each entry is `(N * H)^s` and `s` is the *shininess* of the surface. + /// + /// Sources: + /// - 3dbrew + /// - 'Primitive Processing and Advanced Shading Architecture for Embedded Space' by Max Kazakov & Eisaku Ohbuchi. + /// - Both slides and paper are useful! + /// - 'A Real-Time Configurable Shader Based on Lookup Tables' by Eisaku Ohbuchi & Hiroshi Unno. + /// - Warning: Paywalled, you must pay or access it through an institution (e.g: university) + /// - 'Shading by Quaternion Interpolation' by Anders Hast. + pub const FragmentLighting = extern struct { + pub const Color = packed struct(u32) { + b: u8, + _unused0: u2 = 0, + g: u8, + _unused1: u2 = 0, + r: u8, + _unused2: u4 = 0, + + pub fn init(r: u8, g: u8, b: u8) Color { + return .{ .r = r, .g = g, .b = b }; + } + + pub fn initBuffer(rgb: [3]u8) Color { + return .init(rgb[0], rgb[1], rgb[2]); + } + + pub fn splat(v: u8) Color { + return .init(v, v, v); + } + + pub fn format(color: Color, w: *std.Io.Writer) std.Io.Writer.Error!void { + try w.print("({}, {}, {})", .{ + color.r, + color.g, + color.b, + }); + } + }; + + pub const FresnelSelector = enum(u2) { none, primary, secondary, both }; + pub const LookupTable = enum(u5) { + pub const Enabled = enum(u4) { + d0_rr_sp_da, + fr_rr_sp_da, + d0_d1_rr_da, + d0_d1_fr_da, + d0_d1_rx_sp_da, + d0_fr_rx_sp_da, + d0_d1_fr_rr_sp_da, + all = 8, + }; + + pub const Index = packed struct(u32) { + index: u8, + table: LookupTable, + _unused0: u19 = 0, + + pub fn init(table: LookupTable, index: u8) Index { + return .{ .table = table, .index = index }; + } + + pub fn format(idx: Index, w: *std.Io.Writer) std.Io.Writer.Error!void { + try w.print("{}[{d}]", .{ + idx.table, + idx.index, + }); + } + }; + + pub const Absolute = packed struct(u32) { + _unused0: u1 = 0, + disable_d0: bool = false, + _unused1: u3 = 0, + disable_d1: bool = false, + _unused2: u3 = 0, + disable_sp: bool = false, + _unused3: u3 = 0, + disable_fr: bool = false, + _unused4: u3 = 0, + disable_rb: bool = false, + _unused5: u3 = 0, + disable_rg: bool = false, + _unused6: u3 = 0, + disable_rr: bool = false, + _unused7: u6 = 0, + + pub fn format(absolute: Absolute, w: *std.Io.Writer) std.Io.Writer.Error!void { + try w.print("D0: {}, D1: {}, SP: {}, FR: {}, RR: {}, RG: {}, RB: {}", .{ + !absolute.disable_d0, + !absolute.disable_d1, + !absolute.disable_sp, + !absolute.disable_fr, + !absolute.disable_rr, + !absolute.disable_rg, + !absolute.disable_rb, + }); + } + }; + + pub const Input = enum(u3) { @"N * H", @"V * H", @"N * V", @"L * N", @"-L * P", @"cos(phi)" }; + pub const Select = packed struct(u32) { + d0: Input = .@"N * H", + _unused0: u1 = 0, + d1: Input = .@"N * H", + _unused1: u1 = 0, + sp: Input = .@"N * H", + _unused2: u1 = 0, + fr: Input = .@"N * H", + _unused3: u1 = 0, + rb: Input = .@"N * H", + _unused4: u1 = 0, + rg: Input = .@"N * H", + _unused5: u1 = 0, + rr: Input = .@"N * H", + _unused6: u5 = 0, + + pub fn format(select: Select, w: *std.Io.Writer) std.Io.Writer.Error!void { + try w.print("D0: {}, D1: {}, SP: {}, FR: {}, RR: {}, RG: {}, RB: {}", .{ + select.d0, + select.d1, + select.sp, + select.fr, + select.rr, + select.rg, + select.rb, + }); + } + }; + + pub const Multiplier = enum(u3) { @"1x", @"2x", @"4x", @"8x", @"0.25x" = 6, @"0.5x" }; + pub const Scale = packed struct(u32) { + d0: Multiplier = .@"1x", + _unused0: u1 = 0, + d1: Multiplier = .@"1x", + _unused1: u1 = 0, + sp: Multiplier = .@"1x", + _unused2: u1 = 0, + fr: Multiplier = .@"1x", + _unused3: u1 = 0, + rb: Multiplier = .@"1x", + _unused4: u1 = 0, + rg: Multiplier = .@"1x", + _unused5: u1 = 0, + rr: Multiplier = .@"1x", + _unused6: u5 = 0, + + pub fn format(scale: Scale, w: *std.Io.Writer) std.Io.Writer.Error!void { + try w.print("D0: {}, D1: {}, SP: {}, FR: {}, RR: {}, RG: {}, RB: {}", .{ + scale.d0, + scale.d1, + scale.sp, + scale.fr, + scale.rr, + scale.rg, + scale.rb, + }); + } + }; + + pub const Data = packed struct(u32) { + entry: UQ0_12, + next_absolute_difference: Q0_11, + _unused0: u8 = 0, + + // TODO: initBuffer + + pub fn initContext( + context: anytype, + absolute: bool, + ) [256]Data { + var lut: [256]Data = undefined; + + const absolute_unit: f32 = @floatFromInt(@intFromBool(absolute)); + const negated_unit = 1 - absolute_unit; + + const msb_multiplier: f32 = (absolute_unit * 2 - 1) * 128; + const max = 256.0 - (negated_unit * 128.0); + + var last: f32 = context.value(0.0); + for (1..lut.len) |i| { + const input = (@as(f32, @floatFromInt(i & 0x7F)) + @as(f32, @floatFromInt((i >> 7) & 0b1)) * msb_multiplier) / max; + + const current: f32 = context.value(input); + defer last = current; + + lut[i - 1] = .{ + .entry = .ofSaturating(last), + .next_absolute_difference = .ofSaturating(@abs(current - last)), + }; + } + + lut[255] = .{ .entry = .ofSaturating(last), .next_absolute_difference = .ofSaturating(context.value(absolute_unit) - last) }; + return lut; + } + + pub fn format(data: Data, w: *std.Io.Writer) std.Io.Writer.Error!void { + try w.print("Entry: {}, Diff: {}", .{ data.entry, data.next_absolute_difference }); + } + }; + + // zig fmt: off + d0, d1, + fr = 3, + rb, rg, rr, + sp0 = 8, sp1, sp2, sp3, sp4, sp5, sp6, sp7, + da0, da1, da2, da3, da4, da5, da6, da7, + // zig fmt: on + }; + + pub const BumpMode = enum(u2) { none, bump, tangent }; + + pub const Control = extern struct { + pub const Environment = packed struct(u32) { + enable_shadow_factor: bool, + _unused0: u1 = 0, + fresnel: FresnelSelector, + enabled_lookup_tables: LookupTable.Enabled, + _unknown0: u4 = 0x4, + _unused1: u4 = 0, + apply_shadow_attenuation_to_primary_color: bool, + apply_shadow_attenuation_to_secondary_color: bool, + invert_shadow_attenuation: bool, + apply_shadow_attenuation_to_alpha: bool, + _unused2: u2 = 0, + bump_map_unit: TextureUnit, + /// BIG BRAIN TIME, lets configure the shadow map unit........ + /// Only unit 0 supports them? + shadow_map_unit: TextureUnit, + _unused3: u1 = 0, + clamp_highlights: bool, + bump_mode: BumpMode, + disable_bump_recalculation: bool, + _unknown1: u1 = 0x1, + + pub fn format(env: Environment, w: *std.Io.Writer) std.Io.Writer.Error!void { + try w.print("Enabled Tables: {}, Fresnel: {}, Shadow: {} | Shadow to primary: {}, Shadow to secondary: {}, Shadow to alpha: {}, Invert Shadow: {} | Bump Unit: {}, Shadow Unit: {} | Clamp Highlights: {}, Bump Mode: {}, Bump Recalculation: {}", .{ + env.enabled_lookup_tables, + env.fresnel, + env.enable_shadow_factor, + env.apply_shadow_attenuation_to_primary_color, + env.apply_shadow_attenuation_to_secondary_color, + env.apply_shadow_attenuation_to_alpha, + env.invert_shadow_attenuation, + env.bump_map_unit, + env.shadow_map_unit, + env.clamp_highlights, + env.bump_mode, + !env.disable_bump_recalculation, + }); + } + }; + + pub const Lights = packed struct(u32) { + shadows_disabled: BitpackedArray(bool, 8), + spotlight_disabled: BitpackedArray(bool, 8), + disable_d0: bool, + disable_d1: bool, + _unknown0: u1 = 0x1, + disable_fr: bool, + disable_rb: bool, + disable_rg: bool, + disable_rr: bool, + _unknown1: u1 = 0x1, + distance_attenuation_disabled: BitpackedArray(bool, 8), + + pub fn format(lights: Lights, w: *std.Io.Writer) std.Io.Writer.Error!void { + try w.print("D0: {}, D1: {}, FR: {}, RR: {}, RG: {}, RB: {} | Shadow Disabled: {f}, Spotlight Disabled: {f}, Distance Attenuation Disabled: {f}", .{ + !lights.disable_d0, + !lights.disable_d1, + !lights.disable_fr, + !lights.disable_rr, + !lights.disable_rg, + !lights.disable_rb, + lights.shadows_disabled, + lights.spotlight_disabled, + lights.distance_attenuation_disabled, + }); + } + }; + + environment: Environment, + lights: Lights, + }; + + pub const Light = extern struct { + pub const Id = enum(u4) { + _, + + pub fn init(value: u3) Id { + return @enumFromInt(value); + } + }; + + pub const Type = enum(u1) { positional, directional }; + pub const DiffuseSides = enum(u1) { one, both }; + + pub const Factors = extern struct { + specular: [2]Color, + diffuse: Color, + ambient: Color, + }; + + pub const Parameters = extern struct { + /// Its `xy` position if positional, otherwise its `xy` direction (unitary). + /// + /// If it is a directional light, the direction vector is Object -> Light, + xy: F5_10x2, + /// Its `z` position if positional, otherwise its `z` direction (unitary). + z: LsbRegister(F5_10), + /// Its `xy` spot (for spotlights) direction (unitary). + spot_xy: Q1_11x2, + /// Its `z` spot (for spotlights) direction (unitary). + spot_z: LsbRegister(Q1_11), + }; + + pub const Config = packed struct(u32) { + type: Type, + diffuse_sides: DiffuseSides, + geometric_factor_enable: BitpackedArray(bool, 2), + _unused0: u28 = 0, + }; + + pub const Attenuation = extern struct { + bias: LsbRegister(F7_12), + scale: LsbRegister(F7_12), + }; + + /// Color factors for primary and secondary colors. + factors: Factors, + parameters: Parameters, + _unknown0: u32, + config: Config, + /// Distance attenuation coefficients for the lookup input: + /// + /// `DA(clamp(distance * scale + bias, 0.0, 1.0))` + attenuation: Attenuation, + }; + + light: [8]Light, + _unknown0: [32]u32, + /// Scene/Global ambient color. + ambient: Color, + _unknown1: u32, + /// Number of active lights minus one. + num_lights_min_one: LsbRegister(u3), + control: Control, + lut_index: LookupTable.Index, + disable: LsbRegister(bool), + _unknown2: u32, + lut_data: [8]LookupTable.Data, + lut_input_absolute: LookupTable.Absolute, + lut_input_select: LookupTable.Select, + lut_input_scale: LookupTable.Scale, + _unknown3: [6]u32, + /// Maps enabled light index to its configuration. e.g: you can have 3 lights enabled but have those 3 lights be 0, 4 and 7 for example. + light_permutation: BitpackedArray(Light.Id, 8), + }; + + pub const PrimitiveEngine = extern struct { + pub const Mode = enum(u1) { drawing, config }; + + pub const PrimitiveConfig = packed struct(u32) { + total_vertex_outputs: u4, + _unused0: u4 = 0, + topology: PrimitiveTopology, + _unused1: u6 = 0, + _unknown0: u1 = 0, + _unused2: u15 = 0, + + pub fn format(cfg: PrimitiveConfig, w: *std.Io.Writer) std.Io.Writer.Error!void { + try w.print("Topology: {} | Vertex Outputs: {}", .{ + cfg.topology, + cfg.total_vertex_outputs, + }); + } + }; + + pub const PipelineConfig = packed struct(u32) { + pub const GeometryUsage = enum(u2) { disabled, enabled = 2 }; + + geometry_shader_usage: GeometryUsage = .disabled, + _unused0: u6 = 0, + drawing_triangles: bool = false, + _unknown0: u1 = 0, + _unused1: u6 = 0, + _unknown1: u4 = 0, + _unused2: u11 = 0, + variable_geometry_inputs: bool = false, + }; + + pub const State = packed struct(u32) { + inputting_vertices_or_draw_arrays: bool = false, + _unused0: u7 = 0, + drawing_triangles: bool = false, + _unused1: u23 = 0, + }; + + pub const GeometryShader = packed struct(u32) { + pub const Mode = enum(u2) { + /// Vertex shader outputs begin filling geometry shader inputs (`point_vertices_minus_one`) until all slots are filled, + /// in which case a geometry shader invocation is performed. + point, + /// Vertex shader ouputs are buffered into uniform registers starting at `f1`, + /// `f0` stores the number of vertices in the batch. + /// + /// All drawcalls must be indexed while using this geometry shader mode. + /// + /// The first index signifies how many vertices to process, two kinds of vertices + /// are batched: main and secondary vertices; main vertices passthrough all outputs + /// while secondary ones only the first. + variable, + /// Vertex shader outputs are buffered into geometry shader uniform registers (up to `fixed_vertices_minus_one`) starting + /// at `uniform_start`. + fixed, + }; + + mode: GeometryShader.Mode, + _unused0: u6 = 0, + fixed_vertices_minus_one: u4, + point_inputs_minus_one: u4, + uniform_start: shader.register.Source.Constant, + _unused1: u1 = 0, + /// Unknown, but it is said that must be set when mode is fixed in both 3dbrew and GBATEK. + fixed: bool, + _unused2: u7 = 0, + }; + + pub const Attribute = extern struct { + pub const Format = packed struct(u4) { + pub const i8x1: Format = .{ .type = .i8, .size = .x }; + pub const i8x2: Format = .{ .type = .i8, .size = .xy }; + pub const i8x3: Format = .{ .type = .i8, .size = .xyz }; + pub const i8x4: Format = .{ .type = .i8, .size = .xyzw }; + + pub const u8x1: Format = .{ .type = .u8, .size = .x }; + pub const u8x2: Format = .{ .type = .u8, .size = .xy }; + pub const u8x3: Format = .{ .type = .u8, .size = .xyz }; + pub const u8x4: Format = .{ .type = .u8, .size = .xyzw }; + + pub const i16x1: Format = .{ .type = .i16, .size = .x }; + pub const i16x2: Format = .{ .type = .i16, .size = .xy }; + pub const i16x3: Format = .{ .type = .i16, .size = .xyz }; + pub const i16x4: Format = .{ .type = .i16, .size = .xyzw }; + + pub const f32x1: Format = .{ .type = .f32, .size = .x }; + pub const f32x2: Format = .{ .type = .f32, .size = .xy }; + pub const f32x3: Format = .{ .type = .f32, .size = .xyz }; + pub const f32x4: Format = .{ .type = .f32, .size = .xyzw }; + + pub const Type = enum(u2) { + i8, + u8, + i16, + f32, + + pub fn byteSize(typ: Type) usize { + return switch (typ) { + .i8, .u8 => @sizeOf(u8), + .i16 => @sizeOf(i16), + .f32 => @sizeOf(f32), + }; + } + }; + + pub const Size = enum(u2) { x, xy, xyz, xyzw }; + + type: Type = .i8, + size: Size = .x, + + pub fn byteSize(fmt: Format) usize { + return fmt.type.byteSize() * (@as(usize, @intFromEnum(fmt.size)) + 1); + } + }; + + pub const Config = extern struct { + pub const Flags = enum(u1) { array, fixed }; + + pub const Low = packed struct(u32) { attributes: BitpackedArray(Format, 8) = .splat(.{}) }; + pub const High = packed struct(u32) { + remaining_attributes: BitpackedArray(Format, 4) = .splat(.{}), + flags: BitpackedArray(Flags, 12) = .splat(.array), + attributes_end: u4, + }; + + low: Low, + high: High, + + pub fn setAttribute(config: *Config, index: AttributeIndex, value: Format) void { + std.mem.writePackedInt(u4, std.mem.asBytes(config), @as(usize, @intFromEnum(index)) * @bitSizeOf(Format), @bitCast(value), .little); + } + + pub fn getAttribute(config: *const Config, index: AttributeIndex) Format { + return @bitCast(std.mem.readPackedInt(u4, std.mem.asBytes(config), @as(usize, @intFromEnum(index)) * @bitSizeOf(Format), .little)); + } + + pub fn setFlag(config: *Config, index: AttributeIndex, value: Flags) void { + std.mem.writePackedInt(u1, std.mem.asBytes(config), (@as(usize, 12) * @bitSizeOf(Format)) + @intFromEnum(index) * @bitSizeOf(Flags), @intFromEnum(value), .little); + } + + pub fn getFlag(config: *const Config, index: AttributeIndex) Flags { + return @enumFromInt(std.mem.readPackedInt(u1, std.mem.asBytes(config), (@as(usize, 12) * @bitSizeOf(Format)) + @intFromEnum(index) * @bitSizeOf(Flags), .little)); + } + }; + + pub const VertexBuffer = extern struct { + pub const Config = extern struct { + pub const ArrayComponent = enum(u4) { + attribute_0, + attribute_1, + attribute_2, + attribute_3, + attribute_4, + attribute_5, + attribute_6, + attribute_7, + attribute_8, + attribute_9, + attribute_10, + attribute_11, + + padding_4, + padding_8, + padding_12, + padding_16, + }; + + pub const Low = packed struct(u32) { + components: BitpackedArray(ArrayComponent, 8) = .init(.{ + .attribute_0, + .attribute_1, + .attribute_2, + .attribute_3, + .attribute_4, + .attribute_5, + .attribute_6, + .attribute_7, + }), + }; + + pub const High = packed struct(u32) { + components: BitpackedArray(ArrayComponent, 4) = .init(.{ + .attribute_8, + .attribute_9, + .attribute_10, + .attribute_11, + }), + + bytes_per_vertex: u8, + _unused0: u4 = 0, + num_components: u4, + }; + + low: Low, + high: High, + + pub fn setComponent(config: *VertexBuffer.Config, index: ArrayComponentIndex, value: ArrayComponent) void { + std.mem.writePackedInt(u4, std.mem.asBytes(config), @as(usize, @intFromEnum(index)) * @bitSizeOf(ArrayComponent), @intFromEnum(value), .little); + } + + pub fn getComponent(config: *const VertexBuffer.Config, index: ArrayComponentIndex) ArrayComponent { + return @enumFromInt(std.mem.readPackedInt(u4, std.mem.asBytes(config), @as(usize, @intFromEnum(index)) * @bitSizeOf(ArrayComponent), .little)); + } + }; + + offset: LsbRegister(u28), + config: VertexBuffer.Config, + }; + + pub const IndexBuffer = packed struct(u32) { + offset: u28, + _unused0: u3 = 0, + fmt: IndexFormat, + + pub fn init(base_offset: u28, fmt: IndexFormat) IndexBuffer { + return .{ .offset = base_offset, .fmt = fmt }; + } + + pub fn format(idx: IndexBuffer, w: *std.Io.Writer) std.Io.Writer.Error!void { + try w.print("0x{X} ({t})", .{ + idx.offset, + idx.fmt, + }); + } + }; + + base: AlignedPhysicalAddress(.@"16", .@"8"), + config: Config, + vertex_buffers: [12]VertexBuffer, + index_buffer: IndexBuffer, + }; + + pub const FixedAttribute = extern struct { + pub const Index = packed struct(u32) { + /// Begin immediate submission of vertex attributes. + pub const immediate: Index = .{ .index = 0xF }; + + index: u4, + _: u28 = 0, + + pub fn register(input: u4) Index { + std.debug.assert(input < 12); + return .{ .index = input }; + } + }; + + /// If `Index.immediate` the written `value`s will begin filling shader inputs + /// and drawing primitives. Otherwise it is an index representing the attribute + /// whose `value` will be set. + index: Index, + + /// The value to write to a shader input or attribute. + value: F7_16x4, + }; + + pub const CommandBuffer = extern struct { + /// Shifted to the left by 3. + size: [2]LsbRegister(u22), + address: [2]AlignedPhysicalAddress(.@"16", .@"8"), + jump: [2]LsbRegister(Trigger), + }; + + /// Attribute info used when issuing drawcalls via `draw` or `draw_indexed`. + attributes: Attribute, + /// The amount of vertices that will be processed by a drawcall. + draw_vertex_count: u32, + config: PipelineConfig, + /// The first index used by drawcalls. Only used in `draw`, ignored by `draw_indexed`. + draw_first_index: u32, + _unknown0: [2]u32, + post_vertex_cache: LsbRegister(u8), + /// Triggers a non-indexed drawcall, will begin reading from `draw_first_index` + /// until `draw_vertex_count` vertices are processed. + draw: LsbRegister(Trigger), + /// Triggers an indexed drawcall, + draw_indexed: LsbRegister(Trigger), + _unknown1: u32, + clear_post_vertex_cache: LsbRegister(Trigger), + fixed_attribute: FixedAttribute, + _unknown2: [2]u32, + command_buffer: CommandBuffer, + _unknown3: [4]u32, + vertex_shader_input_attributes: LsbRegister(u4), + _unknown4: u32, + /// updates to the vertex shader unit will not be propagated to the geometry shader if true. + exclusive_shader_configuration: LsbRegister(bool), + mode: LsbRegister(Mode), + _unknown5: [4]u32, + vertex_shader_output_map_total_2: LsbRegister(u4), + _unknown6: [6]u32, + vertex_shader_output_map_total_1: LsbRegister(u4), + geometry_shader: GeometryShader, + state: State, + geometry_shader_full_vertices_minus_one: LsbRegister(u5), + _unknown7: u32, + _unknown8: [8]u32, + primitive_config: PrimitiveConfig, + restart_primitive: LsbRegister(Trigger), + }; + + pub const Shader = extern struct { + pub const Entry = packed struct(u32) { + entry: u16, + _: u16 = 0x7FFF, + + pub fn initEntry(entry: u16) Entry { + return .{ .entry = entry }; + } + }; + + pub const Input = packed struct(u32) { + /// Amount of input registers + inputs: u4, + _unused0: u4 = 0, + /// When true, inputs will fill uniform registers instead of input ones. + /// Used for variable and fixed geometry shader modes. + uniform: bool = false, + _unused1: u18 = 0, + enabled_for_geometry_0: bool = false, + _unknown0: u1 = 0, + enabled_for_vertex_0: bool = false, + _unused2: u1 = 0, + enabled_for_vertex_1: bool = false, + + pub fn format(input: Input, w: *std.Io.Writer) std.Io.Writer.Error!void { + try w.print("Inputs: {d} | Uniform: {}", .{ + input.inputs, + input.uniform, + }); + } + }; + + pub const FloatUniformConfig = packed struct(u32) { + pub const Mode = enum(u1) { f7_16, f8_23 }; + + index: FloatConstantRegister, + _unused0: u24 = 0, + mode: Mode, + + pub fn format(cfg: FloatUniformConfig, w: *std.Io.Writer) std.Io.Writer.Error!void { + try w.print("{d} ({t})", .{ + cfg.index, + cfg.mode, + }); + } + }; + + pub const AttributePermutation = extern struct { + pub const Low = packed struct(u32) { + attributes: BitpackedArray(InputRegister, 8) = .init(.{ .v0, .v1, .v2, .v3, .v4, .v5, .v6, .v7 }), + }; + + pub const High = packed struct(u32) { + remaining_attribute: BitpackedArray(InputRegister, 8) = .init(.{ .v8, .v9, .v10, .v11, .v12, .v13, .v14, .v15 }), + }; + + low: Low = .{}, + high: High = .{}, + + pub fn setAttribute(config: *AttributePermutation, index: AttributeIndex, value: InputRegister) void { + std.mem.writePackedInt(u4, std.mem.asBytes(config), @intFromEnum(index) * @bitSizeOf(InputRegister), @intFromEnum(value), .little); + } + + pub fn getAttribute(config: *AttributePermutation, index: AttributeIndex) InputRegister { + return @enumFromInt(std.mem.readPackedInt(u4, std.mem.asBytes(config), @intFromEnum(index) * @bitSizeOf(InputRegister), .little)); + } + }; + + pub const BooleanUniformMask = packed struct(u32) { + mask: BitpackedArray(bool, 16), + _unused0: u16 = 0x7FFF, + + pub fn init(mask: BitpackedArray(bool, 16)) BooleanUniformMask { + return .{ .mask = mask }; + } + }; + + bool_uniforms: BooleanUniformMask, + int_uniforms: [4][4]u8, + _unused0: [4]u32, + input: Input, + entrypoint: Entry, + attribute_permutation: AttributePermutation, + output_map_mask: LsbRegister(BitpackedArray(bool, 16)), + _unused1: u32, + code_transfer_end: LsbRegister(Trigger), + float_uniform_index: FloatUniformConfig, + float_uniform_data: [8]u32, + _unused2: [2]u32, + code_transfer_index: LsbRegister(u12), + code_transfer_data: [8]Instruction, + _unused3: u32, + operand_descriptors_index: LsbRegister(u7), + operand_descriptors_data: [8]OperandDescriptor, + }; + + /// 0x000 + irq: Interrupt, + /// 0x0D8 + _unused0: [40]u8, + /// 0x100 + rasterizer: Rasterizer, + /// 0x1C0 + _unused1: [64]u8, + /// 0x200 + texture_units: TextureUnits, + _unused2: [36]u8, + /// 0x2A0 + procedural_texture_unit: ProceduralTextureUnit, + _unused3: [32]u8, + /// 0x300 + texture_combiners: TextureCombiners, + _unused4: [8]u8, + /// 0x400 + output_merger: OutputMerger, + /// 0x500 + fragment_lighting: FragmentLighting, + _unused5: [152]u8, + /// 0x800 + primitive_engine: PrimitiveEngine, + _unused6: [128]u8, + /// 0xA00 + geometry_shader: Shader, + _unused7: [8]u8, + /// 0xAC0 + vertex_shader: Shader, + + comptime { + std.debug.assert(@offsetOf(Graphics, "irq") == 0x000); + std.debug.assert(@offsetOf(Graphics, "rasterizer") == 0x100); + std.debug.assert(@offsetOf(Graphics, "texture_units") == 0x200); + std.debug.assert(@offsetOf(Graphics, "procedural_texture_unit") == 0x2A0); + std.debug.assert(@offsetOf(Graphics, "texture_combiners") == 0x300); + std.debug.assert(@offsetOf(Graphics, "output_merger") == 0x400); + std.debug.assert(@offsetOf(Graphics, "fragment_lighting") == 0x500); + std.debug.assert(@offsetOf(Graphics, "primitive_engine") == 0x800); + std.debug.assert(@offsetOf(Graphics, "geometry_shader") == 0xA00); + std.debug.assert(@offsetOf(Graphics, "vertex_shader") == 0xAC0); + } +}; + +/// There are 3 main points where LCDs are configured: +/// * I2C (TODO: I2C in general) +/// * LCD registers (`zitrus.hardware.lcd`) +/// * These +/// +/// The GPU `DisplayController` is what drives the LCD, pushing pixels and is in charge +/// of the timing of each display (along with the IRQs). +/// +/// With these, looks like it's possible to: +/// * Change the Hz of the display itself +/// * Change the size of the displayed area, display it anywhere and set a +/// configurable border color in the non-displayed area. +/// +/// It also looks like the pixel clock is the main clock divided by 24, i.e `268111856 / 24` +/// +/// All of this has been synthetized from 3dbrew and GBATEK, both sources have wildly different +/// register naming but as everywhere else, the naming is different and reflects what I've seen +/// and think. +/// +/// WARNING: Modifying these registers CAN damage hardware in some LCDs: o3DS (burn-in) and IPS (new ones). +/// Thanks to @sono3 in the godmode9 discord who told me this. +/// Use the decls in `Preset` as those values are taken from what OFW use. +pub const DisplayController = extern struct { + pub const Color = packed struct(u32) { + r: u8, + g: u8, + b: u8, + _unused0: u8 = 0, + }; + + pub const Framebuffer = extern struct { + pub const Control = packed struct(u32) { + enable: bool, + _unused0: u7 = 0, + disable_horizontal_sync_irq: bool, + disable_vertical_sync_irq: bool, + disable_error_irq: bool, + _unused1: u5 = 0, + maybe_output_enable: bool = true, + _unused2: u15 = 0, + }; + + pub const Select = packed struct(u32) { + select: u1, + _unused0: u3 = 0, + current: u1 = 0, + _unused1: u3 = 0, + reset_fifo: bool = false, + _unused2: u7 = 0, + horizontal_ack: bool = false, + vertical_ack: bool = false, + error_ack: bool = false, + _unused3: u13 = 0, + }; + + pub const Status = packed struct(u32) { + horizontal_irq: bool, + vertical_irq: bool, + _unused0: u2 = 0, + bit: bool, + _unused1: u3 = 0, + horizontal_sync: bool, + horizontal_blank: bool, + horizontal_drawing: bool, + _unused2: u1 = 0, + vertical_sync: bool, + vertical_blank: bool, + vertical_drawing: bool, + _unknown0: bool, + _unused3: u16 = 0, + }; + + pub const Pixel = enum(u3) { + pub const Size = enum(u2) { + @"16", + @"24", + @"32", + _, + }; + + pub const Abgr8888 = extern struct { a: u8, b: u8, g: u8, r: u8 }; + pub const Bgr888 = extern struct { b: u8, g: u8, r: u8 }; + pub const Rgb565 = packed struct(u16) { b: u5, g: u6, r: u5 }; + pub const Rgba5551 = packed struct(u16) { a: u1, b: u5, g: u5, r: u5 }; + pub const Rgba4444 = packed struct(u16) { a: u4, b: u4, g: u4, r: u4 }; + + /// 4 bytes, `A B G R`. + abgr8888, + /// 3 bytes, `B G R`. + bgr888, + /// Packed, 2 bytes, `RRRRRGGGGGGBBBBB`. + rgb565, + /// Packed, 2 bytes, `RRRRRGGGGGBBBBBA`. + rgba5551, + /// Packed, 2 bytes, `RRRRGGGGBBBBAAAA`. + rgba4444, + + pub inline fn Data(comptime format: Pixel) type { + return switch (format) { + .abgr8888 => Abgr8888, + .bgr888 => Bgr888, + .rgb565 => Rgb565, + .rgba5551 => Rgba5551, + .rgba4444 => Rgba4444, + }; + } + + pub fn pixelSize(format: Pixel) Size { + return switch (format.bytesPerPixel()) { + 2 => .@"16", + 3 => .@"24", + 4 => .@"32", + else => unreachable, + }; + } + + pub fn bytesPerPixel(format: Pixel) usize { + return switch (format) { + inline else => |f| @sizeOf(f.Data()), + }; + } + + pub fn components(format: Pixel) usize { + return switch (format) { + inline else => |f| @typeInfo(f.Data()).@"struct".fields.len, + }; + } + }; + + pub const Interlacing = enum(u2) { + none, + scanline_doubling, + enable, + enable_inverted, + }; + + pub const Dma = enum(u2) { + @"32", + @"64", + @"128", + vram, + }; + + pub const Format = packed struct(u32) { + pixel_format: Pixel, + _unused0: u1 = 0, + interlacing: Interlacing, + /// Should only be used on the top screen. + /// + /// Makes the display controller reuse the same fetched pixel twice. + /// + /// Halves the pixel rate (?) + /// + /// NOTE: this is just synthetized from testing and above docs, still needs more testing + half_rate: bool, + _unused1: bool = false, + dma_size: Dma, + _unused2: u6 = 0, + unknown0: u16 = 8, + }; + + left_address: [2]AlignedPhysicalAddress(.@"16", .@"1"), + format: Format, + control: Control, + select: Select, + status: Status, + color_lookup_index: LsbRegister(u8), + color_lookup_data: Color, + _unused0: [2]u32, + stride: u32, + right_address: [2]AlignedPhysicalAddress(.@"16", .@"1"), + }; + + pub const SynchronizationPolarity = packed struct(u32) { + horizontal_active_high: bool, + _unused0: u3 = 0, + vertical_active_high: bool, + _unused1: u27 = 0, + }; + + /// Total = Back Porch Start -> Back Porch Mid (Left/Upper Border Start) -> Display Start/Back Porch End + /// -> Front Porch Start (Right/Lower Border End) -> Front Porch Mid (Supposedly Bugged) -> Front Porch End -> IRQ + /// + /// The LCDs are sensitive, look at the comment in `DisplayController`. + pub const Timing = extern struct { + pub const Display = packed struct(u32) { + back_porch_mid: u12, + _unused0: u4 = 0, + front_porch_start: u12, + _unused1: u4 = 0, + }; + + pub const Range = packed struct(u32) { + start: u12, + _unused0: u4 = 0, + end: u12, + _unused1: u4 = 0, + }; + + /// 0x00 + total: LsbRegister(u12), + /// 0x04 + back_porch_end: LsbRegister(u12), + /// 0x08 + front_porch_mid: LsbRegister(u12), + /// 0x0C + front_porch_end: LsbRegister(u12), + /// 0x10 + sync_start: LsbRegister(u12), + /// 0x14 + sync_end: LsbRegister(u12), + /// 0x18 + back_porch_start: LsbRegister(u12), + /// 0x1C + interrupt: Range, + /// 0x20 + unknown: u32, + + comptime { + std.debug.assert(@sizeOf(Timing) == 0x24); + } + }; + + pub const DisplaySize = packed struct(u32) { + width: u12, + _unused0: u4 = 0, + height: u12, + _unused1: u4 = 0, + }; + + pub const LatchingPoint = packed struct(u32) { + horizontal: u12, + _unused0: u4 = 0, + vertical: u12, + _unused1: u4 = 0, + }; + + pub const Preset = struct { + /// Top with half rate + pub const @"top_240x400@60Hz": Preset = .{ + .display_size = .{ .width = 240, .height = 400 }, + .horizontal_timing = .{ + .total = .init(450), + .back_porch_end = .init(209), + .front_porch_mid = .init(449), + .front_porch_end = .init(449), + .sync_start = .init(0), + .sync_end = .init(207), + .back_porch_start = .init(209), + .interrupt = .{ + .start = 449, + .end = 453, + }, + .unknown = 0x10000, + }, + .horizontal_display_timing = .{ + .back_porch_mid = 209, + .front_porch_start = 449, + }, + .vertical_timing = .{ + .total = .init(413), + .back_porch_end = .init(2), + .front_porch_mid = .init(402), + .front_porch_end = .init(402), + .sync_start = .init(402), + .sync_end = .init(1), + .back_porch_start = .init(2), + .interrupt = .{ + .start = 402, + .end = 406, + }, + .unknown = 0, + }, + .vertical_display_timing = .{ + .back_porch_mid = 2, + .front_porch_start = 402, + }, + }; + + /// Top with interlace enabled + pub const @"top_2x240x400@60Hz": Preset = .{ + .display_size = .{ .width = 240, .height = 400 }, + .horizontal_timing = .{ + .total = .init(450), + .back_porch_end = .init(209), + .front_porch_mid = .init(449), + .front_porch_end = .init(449), + .sync_start = .init(0), + .sync_end = .init(207), + .back_porch_start = .init(209), + .interrupt = .{ + .start = 449, + .end = 453, + }, + .unknown = 0x10000, + }, + .horizontal_display_timing = .{ + .back_porch_mid = 209, + .front_porch_start = 449, + }, + .vertical_timing = .{ + .total = .init(827), + .back_porch_end = .init(2), + .front_porch_mid = .init(802), + .front_porch_end = .init(802), + .sync_start = .init(802), + .sync_end = .init(1), + .back_porch_start = .init(2), + .interrupt = .{ + .start = 802, + .end = 806, + }, + .unknown = 0, + }, + .vertical_display_timing = .{ + .back_porch_mid = 2, + .front_porch_start = 802, + }, + }; + + /// Top + pub const @"top_240x800@60Hz": Preset = .{ + .display_size = .{ .width = 240, .height = 800 }, + .horizontal_timing = .{ + .total = .init(450), + .back_porch_end = .init(209), + .front_porch_mid = .init(449), + .front_porch_end = .init(449), + .sync_start = .init(0), + .sync_end = .init(207), + .back_porch_start = .init(209), + .interrupt = .{ + .start = 449, + .end = 453, + }, + .unknown = 0x10000, + }, + .horizontal_display_timing = .{ + .back_porch_mid = 209, + .front_porch_start = 449, + }, + .vertical_timing = .{ + .total = .init(827), + .back_porch_end = .init(2), + .front_porch_mid = .init(802), + .front_porch_end = .init(802), + .sync_start = .init(802), + .sync_end = .init(1), + .back_porch_start = .init(2), + .interrupt = .{ + .start = 802, + .end = 806, + }, + .unknown = 0, + }, + .vertical_display_timing = .{ + .back_porch_mid = 2, + .front_porch_start = 802, + }, + }; + + /// Bottom + pub const @"bottom_240x320@60Hz": Preset = .{ + .display_size = .{ .width = 240, .height = 320 }, + .horizontal_timing = .{ + .total = .init(450), + .back_porch_end = .init(209), + .front_porch_mid = .init(449), + .front_porch_end = .init(449), + .sync_start = .init(205), + .sync_end = .init(207), + .back_porch_start = .init(209), + .interrupt = .{ + .start = 449, + .end = 453, + }, + .unknown = 0x10000, + }, + .horizontal_display_timing = .{ + .back_porch_mid = 209, + .front_porch_start = 449, + }, + .vertical_timing = .{ + .total = .init(413), + .back_porch_end = .init(82), + .front_porch_mid = .init(402), + .front_porch_end = .init(402), + .sync_start = .init(79), + .sync_end = .init(80), + .back_porch_start = .init(82), + .interrupt = .{ + .start = 403, + .end = 407, + }, + .unknown = 0x00, + }, + .vertical_display_timing = .{ + .back_porch_mid = 82, + .front_porch_start = 402, + }, + }; + + display_size: DisplaySize, + horizontal_timing: Timing, + horizontal_display_timing: Timing.Display, + vertical_timing: Timing, + vertical_display_timing: Timing.Display, + + /// Tries to compute timing parameters for the specified LCD configuration, returning `null` if no + /// configuration exists. + /// + /// WARNING: THIS HAS ONLY BEEN TESTED ON A o2DS! + pub fn init(screen: Screen, half_rate: bool, x: u12, y: u12, width: u12, height: u12, refresh: f32) ?Preset { + std.debug.assert(x + width <= screen.width() and y + height <= screen.height()); + + // WARNING: changing this has (unsafe) implications, read above + const h_total = 450; + + const multiplier = if (half_rate) 1 else 2; + const remaining: f32 = @floatFromInt((zitrus.time.arm11_ticks_per_s / 24) * multiplier / (h_total + 1)); + const unbounded_v_total: u32 = @ceil(remaining / refresh) - 1; + + // We won't modify h_total so we're cooked. + if (unbounded_v_total > std.math.maxInt(u12) or unbounded_v_total < screen.height() + 12) return null; + + const v_total: u12 = @intCast(unbounded_v_total); + const v_unused_total = v_total - screen.height() - 12; + + // - First Border is Mid->End + // - Second Border is Start->Mid + const v_sync_end = v_unused_total + 2; + const v_back_porch_start = v_sync_end + 2; + const v_back_porch_mid = v_back_porch_start; + const v_back_porch_end = v_back_porch_mid + y; + + const v_front_porch_start = v_back_porch_end + height; + const v_front_porch_mid = v_back_porch_end + (screen.height() - y); + const v_front_porch_end = v_front_porch_mid; + const v_sync_start = switch (screen) { + // Yes... totally makes sense! + .top => v_front_porch_end, + // When Sync Start -> End takes more than 1 tick, + // the screen directly desyncs/fades lmao (o2DS) + .bottom => v_sync_end - 1, + }; + + const v_irq_start = v_front_porch_end + 1; + const v_irq_end = v_irq_start + 4; + + return .{ + .display_size = .{ .width = @intCast(width), .height = @intCast(height) }, + // NOTE: as HTotal is hardcoded, we don't have to calculate things (unlike with V) + .horizontal_timing = .{ + .total = .init(450), + .back_porch_end = .init(209 + x), + .front_porch_mid = .init(449), + .front_porch_end = .init(449), + .sync_start = .init(205), + .sync_end = .init(207), + .back_porch_start = .init(209), + .interrupt = .{ + .start = 449, + .end = 453, + }, + .unknown = 0x10000, + }, + .horizontal_display_timing = .{ + .back_porch_mid = 209, + .front_porch_start = 209 + x + width, + }, + .vertical_timing = .{ + .total = .init(v_total), + .back_porch_end = .init(v_back_porch_end), + .front_porch_mid = .init(v_front_porch_mid), + .front_porch_end = .init(v_front_porch_end), + .sync_start = .init(v_sync_start), + .sync_end = .init(v_sync_end), + .back_porch_start = .init(v_back_porch_start), + .interrupt = .{ + .start = v_irq_start, + .end = v_irq_end, + }, + .unknown = 0x00, + }, + .vertical_display_timing = .{ + .back_porch_mid = v_back_porch_mid, + .front_porch_start = v_front_porch_start, + }, + }; + } + }; + + /// 0x00 + horizontal_timing: Timing, + /// 0x24 + vertical_timing: Timing, + /// 0x48 + synchronization_polarity: SynchronizationPolarity, + /// 0x4C + border_color: Color, + /// 0x50 + horizontal_position: LsbRegister(u12), + /// 0x54 + vertical_position: LsbRegister(u12), + /// 0x58 + _unused0: u32, + /// 0x5C + display_size: DisplaySize, + /// 0x60 + horizontal_display_timing: Timing.Display, + /// 0x64 + vertical_display_timing: Timing.Display, + /// 0x68 + framebuffer: Framebuffer, + /// 0x9C + latching_point: LatchingPoint, + /// 0xA0 + _unused2: [24]u32, + + comptime { + std.debug.assert(@sizeOf(DisplayController) == 0x100); + } +}; + +// TODO: Properly finish this +pub const Registers = extern struct { + pub const VRamPower = packed struct(u32) { + _unknown0: u8 = std.math.maxInt(u8), + power_off_a_low: bool, + power_off_a_high: bool, + power_off_b_low: bool, + power_off_b_high: bool, + _unknown1: u20 = std.math.maxInt(u20), + }; + + pub const InterruptFlags = packed struct(u32) { + _unknown0: u1 = 0, + _unknown1: u1 = 0, + _unused0: u24 = 0, + psc0: bool, + psc1: bool, + pdc0: bool, + pdc1: bool, + ppf: bool, + p3d: bool, + }; + + pub const Busy = packed struct(u32) { + // NOTE: THESE CHANGE ON P3D. TESTTESTTEST + // WHEN THE GPU HANGS SOME BITS STAY ON (AND THEY'RE ALMOST ALWAYS THE SAME ONES!!!!!) + _unknown0: bool, + _unknown1: bool, + _unknown2: bool, + _unknown3: bool, + _unknown4: bool, + _unknown5: bool, + _unknown6: bool, + _unknown7: bool, + _unknown8: bool, + _unknown9: bool, + _unknown10: bool, + _unknown11: bool, + _unknown12: bool, + _unknown13: bool, + _unknown14: bool, + _unknown15: bool, + _unknown_vram_power_0: bool, + _unknown_vram_power_1: bool, + memory_fill_busy: bool, + memory_copy_busy: bool, + _unused2: u12, + }; + + pub const TrafficStatistics = extern struct { + non_vram_reads: u32, + non_vram_writes: u32, + vram_a_reads: u32, + vram_a_writes: u32, + vram_b_reads: u32, + vram_b_writes: u32, + input_assembly_reads: u32, + sampled_texture_reads: u32, + depth_buffer_reads: u32, + depth_buffer_writes: u32, + color_buffer_reads: u32, + color_buffer_writes: u32, + top_lcd_reads: u32, + bottom_lcd_reads: u32, + memory_copy_reads: u32, + memory_copy_writes: u32, + memory_fill_writes: [2]u32, + cpu_vram_reads: u32, + cpu_vram_writes: u32, + }; + + hardware_id: u32, + clock: u32, + _unknown0: u32, + _unused0: u32, + psc: [2]MemoryFill, + vram_power: VRamPower, + irq: InterruptFlags, + _something: u32, + _make_something: u32, + _backlight_or_so_0: u32, + /// 0x044 + _unknown1: u32, + /// 0x048 + _unknown2: u32, + /// 0x04C + _unused1: u32, + /// 0x050 + timing_control: [2]u32, + /// 0x058 + busy: Busy, + /// 0x05C + _unknown3: u32, + /// 0x060 + _unknown4: u32, + /// 0x064 + _unknown5: u32, + /// 0x068 + _unknown6: u32, + _unused2: u32, + traffic_statistics: TrafficStatistics, + _backlight_or_so_1: u32, + vram_a_base_address: [*]u8, + vram_b_base_address: [*]u8, + _backlight_or_so_2: u32, + _unknown7: u32, + _unused3: [0x2C]u8, + _unused4: [0x300]u8, + pdc: [2]DisplayController, + _unused5: [0x600]u8 = @splat(0), + ppf: PictureFormatter, + _unknown8: [0xF5]u32 = @splat(0), + p3d: Graphics, + + comptime { + if (builtin.cpu.arch.isArm()) { + if (@offsetOf(Registers, "timing_control") != 0x50) @compileError(std.fmt.comptimePrint("found 0x{X}", .{@offsetOf(Registers, "timing_control")})); + if (@offsetOf(Registers, "traffic_statistics") != 0x70) @compileError(std.fmt.comptimePrint("found 0x{X}", .{@offsetOf(Registers, "traffic_statistics")})); + if (@offsetOf(Registers, "pdc") != 0x400) @compileError(std.fmt.comptimePrint("found 0x{X}", .{@offsetOf(Registers, "pdc")})); + if (@offsetOf(Registers, "ppf") != 0xC00) @compileError(std.fmt.comptimePrint("found 0x{X}", .{@offsetOf(Registers, "ppf")})); + if (@offsetOf(Registers, "p3d") != 0x1000) @compileError(std.fmt.comptimePrint("found 0x{X}", .{@offsetOf(Registers, "p3d")})); + } + } +}; + +comptime { + if (@sizeOf(MemoryFill) != 0x10) + @compileError(std.fmt.comptimePrint("(@sizeOf(MemoryFill) == 0x{X}) and 0x{X} != 0x10!", .{ @sizeOf(MemoryFill), @sizeOf(MemoryFill) })); + + if (@sizeOf(PictureFormatter) != 0x2C) + @compileError(std.fmt.comptimePrint("(@sizeOf(MemoryCopy) == 0x{X}) and 0x{X} != 0x2C!", .{ @sizeOf(PictureFormatter), @sizeOf(PictureFormatter) })); + + _ = morton; + _ = shader; +} + +const testing = std.testing; + +const builtin = @import("builtin"); + +const std = @import("std"); +const zsflt = @import("zsflt"); +const zitrus = @import("zitrus"); +const hardware = zitrus.hardware; + +const Trigger = hardware.Trigger; +const LsbRegister = hardware.LsbRegister; +const MsbRegister = hardware.MsbRegister; +const BitpackedArray = hardware.BitpackedArray; +const AlignedPhysicalAddress = hardware.AlignedPhysicalAddress; +const PhysicalAddress = hardware.PhysicalAddress; + +const OperandDescriptor = shader.encoding.OperandDescriptor; +const Instruction = shader.encoding.Instruction; +const FloatConstantRegister = shader.register.Source.Constant; +const InputRegister = shader.register.Source.Input; diff --git a/src/platform/3ds/mango/hardware/pica/command.zig b/src/platform/3ds/mango/hardware/pica/command.zig new file mode 100644 index 0000000..f52fcfa --- /dev/null +++ b/src/platform/3ds/mango/hardware/pica/command.zig @@ -0,0 +1,544 @@ +//! Type-safe PICA200 `pica.Graphics` command wrappers and types. +//! +//! Address and Size of command queues/buffers/lists must be aligned to 16 bytes +//! Commands are aligned to 8 bytes + +pub const Header = packed struct(u32) { + pub const Mode = enum(u1) { consecutive, incremental }; + + id: Id, + mask: u4, + extra: u8, + _unused0: u3 = 0, + mode: Mode, +}; + +pub const Id = enum(u16) { + _, + + pub fn fromRegister(comptime base: *volatile pica.Graphics, register: *volatile anyopaque) Id { + std.debug.assert(@intFromPtr(register) >= @intFromPtr(base) and @intFromPtr(register) < (@intFromPtr(base) + @sizeOf(pica.Graphics))); // invalid internal register, pointer is not within the valid range + + const offset = @intFromPtr(register) - @intFromPtr(base); + + std.debug.assert((offset % @alignOf(u32)) == 0); // invalid internal register, it must be aligned to 4 bytes + + return @enumFromInt(@divExact(offset, @alignOf(u32))); + } +}; + +/// WARNING: using this will bloat your binary! +pub const Dump = struct { + pub const Iterator = struct { + words: []const u32, + current: u32, + + pub fn init(words: []const u32) Iterator { + return .{ .words = words, .current = 0 }; + } + + pub fn next(it: *Iterator) ?Dump { + if (it.current + 1 >= it.words.len) return null; + const hdr: Header = @bitCast(it.words[it.current + 1]); + const full_len = 2 + @as(u32, hdr.extra); + defer it.current += std.mem.alignForward(u32, full_len, 2); + + return .{ .words = it.words[it.current..][0..@min(full_len, it.words.len - it.current)] }; + } + }; + + pub const Single = struct { + // XXX: This is quite bad, a rewrite would be good + pub const Info = struct { + /// Fully qualified name + name: []const u8, + type: type, + + pub fn findName(id: Id) []const u8 { + return switch (@intFromEnum(id)) { + (@sizeOf(pica.Graphics) / @sizeOf(u32))...0xFFFF => "", + inline else => |word_offset| find(word_offset * @sizeOf(u32)).name, + }; + } + + pub fn find(comptime offset: u32) Info { + @setEvalBranchQuota(200000); + + var current = Search.find(pica.Graphics, offset); + var current_offset = offset - @offsetOf(pica.Graphics, current.name); + var fully_qualified_name = current.name; + while (@sizeOf(current.type) > @sizeOf(u32)) switch (@typeInfo(current.type)) { + .@"struct" => |st| switch (st.layout) { + .auto => unreachable, + .@"packed" => unreachable, // Hitting this means you have an invalid packed struct in there. + .@"extern" => { + const next = Search.find(current.type, current_offset); + + current_offset -= @offsetOf(current.type, next.name); + current = next; + + fully_qualified_name = fully_qualified_name ++ "." ++ next.name; + }, + }, + .array => |array| switch (std.math.order(@sizeOf(array.child), @sizeOf(u32))) { + .lt => current.type = [@divExact(@sizeOf(u32), @sizeOf(array.child))]array.child, + .eq, .gt => { + fully_qualified_name = fully_qualified_name ++ std.fmt.comptimePrint("[{d}]", .{current_offset / @sizeOf(array.child)}); + current_offset %= @sizeOf(array.child); + current.type = array.child; + }, + }, + else => @compileError("TODO"), + }; + + return .{ .name = fully_qualified_name, .type = current.type }; + } + + const Search = struct { + parent: type, + offset: u32, + + pub fn find(comptime T: type, comptime offset: u32) Info { + const fields = @typeInfo(T).@"struct".fields; + const ctx: Search = .{ .parent = T, .offset = offset }; + const index = std.sort.binarySearch(std.builtin.Type.StructField, fields, ctx, Search.compare) orelse unreachable; + return .{ .name = fields[index].name, .type = fields[index].type }; + } + + pub fn compare(ctx: Search, field: std.builtin.Type.StructField) std.math.Order { + const field_offset = @offsetOf(ctx.parent, field.name); + if (ctx.offset < field_offset) return .lt; + if (ctx.offset >= field_offset + @sizeOf(field.type)) return .gt; + return .eq; + } + }; + }; + + mode: Header.Mode, + id: Id, + raw: u32, + + pub fn format(single: Single, w: *std.Io.Writer) std.Io.Writer.Error!void { + switch (@intFromEnum(single.id)) { + (@sizeOf(pica.Graphics) / @sizeOf(u32))...0xFFFF => try w.print("{X:0>8}", .{single.raw}), + inline else => |word_offset| { + const info: Info = .find(word_offset * @sizeOf(u32)); + + switch (single.mode) { + .incremental => { + try w.print("{s} ({X:0>3}) -> ", .{ info.name, single.id }); + try printValue(info.type, single.raw, w); + }, + .consecutive => try printValue(info.type, single.raw, w), + } + }, + } + } + + pub fn printValue(comptime T: type, raw: u32, w: *std.Io.Writer) std.Io.Writer.Error!void { + const typed: T = switch (@typeInfo(T)) { + .@"enum" => @enumFromInt(raw), + else => @bitCast(raw), + }; + + try w.print(if (std.meta.hasFn(T, "format")) "{f}" else if (T == u32) "{X:0>8}" else "{any}", .{typed}); + } + }; + + words: []const u32, + + pub fn format(dump: Dump, w: *std.Io.Writer) std.Io.Writer.Error!void { + const hdr: Header = @bitCast(dump.words[1]); + + switch (hdr.mode) { + .incremental => try w.print("{t} ({b:0>4})", .{ hdr.mode, hdr.mask }), + .consecutive => try w.print("{t}: {s} ({X:0>3}, {b:0>4})", .{ hdr.mode, Single.Info.findName(hdr.id), hdr.id, hdr.mask }), + } + + if (hdr.extra > 0) try w.writeByte('\n'); + + var single: Single = .{ + .mode = hdr.mode, + .id = hdr.id, + .raw = dump.words[0], + }; + + var i: u32 = 0; + while (true) { + try w.print("... {f}", .{single}); + + if (i >= hdr.extra) break; + try w.writeByte('\n'); + single = .{ + .mode = hdr.mode, + .raw = dump.words[2 + i], + .id = switch (hdr.mode) { + .consecutive => single.id, + .incremental => @enumFromInt(@intFromEnum(single.id) + 1), + }, + }; + i += 1; + } + } +}; + +pub const Queue = struct { + pub const empty: Queue = .{ .buffer = .empty, .end = 0 }; + + buffer: []align(16) u32, + end: u32, + + pub fn initBuffer(buffer: []align(16) u32) Queue { + return .{ + .buffer = buffer, + .end = 0, + }; + } + + pub fn slice(queue: Queue) []align(16) u32 { + return queue.buffer[0..queue.end]; + } + + pub fn unusedCapacitySlice(queue: Queue) []align(8) u32 { + return @alignCast(queue.buffer[queue.end..]); + } + + pub fn reset(queue: *Queue) void { + queue.end = 0; + } + + pub inline fn add(queue: *Queue, comptime base: *volatile pica.Graphics, register: anytype, value: std.meta.Child(@TypeOf(register))) void { + return queue.addMasked(base, register, value, 0xF); + } + + pub fn addMasked(queue: *Queue, comptime base: *volatile pica.Graphics, register: anytype, value: std.meta.Child(@TypeOf(register)), mask: u4) void { + comptime std.debug.assert(@typeInfo(@TypeOf(register)) == .pointer); + + const Child = std.meta.Child(@TypeOf(register)); + const child_info = @typeInfo(Child); + + const id: Id = .fromRegister(base, register); + + switch (comptime std.math.order(@bitSizeOf(Child), @bitSizeOf(u32))) { + .eq => queue.addMaskedBuffer(id, &.{switch (child_info) { + .@"enum" => @intFromEnum(value), + else => @bitCast(value), + }}, mask, .consecutive), + .gt => { + const as_u32_array = switch (child_info) { + .array => |a| if (@bitSizeOf(a.child) != @bitSizeOf(u32)) + @compileError("only arrays of 32-bit types are supported for incremental writes") + else + @as([a.len]u32, @bitCast(value)), + .@"struct" => |s| if (s.layout == .auto or (@bitSizeOf(Child) % @bitSizeOf(u32)) != 0) + @compileError("only non-auto structs with a bitSize multiple of 32 are supported") + else + @as([@divExact(@bitSizeOf(Child), @bitSizeOf(u32))]u32, @bitCast(value)), + else => @compileError("unsupported type for incremental write"), + }; + + queue.addMaskedBuffer(id, &as_u32_array, mask, .incremental); + }, + .lt => @compileError("commands only support writing full 32-bit values (which you can mask!)"), + } + } + + fn IncrementalWritesTuple(comptime base: *volatile pica.Graphics, comptime registers: anytype) type { + const RegistersType = @TypeOf(registers); + + comptime std.debug.assert(@typeInfo(RegistersType) == .@"struct"); + const st_ty = @typeInfo(RegistersType).@"struct"; + + comptime std.debug.assert(st_ty.is_tuple); + + var needed_field_types: [st_ty.fields.len]type = undefined; + + @setEvalBranchQuota(st_ty.fields.len * 2000); + for (st_ty.fields, 0..) |field, i| { + std.debug.assert(@typeInfo(field.type) == .pointer); + + const f_ty = @typeInfo(field.type).pointer; + const current = registers[i]; + const current_id: Id = .fromRegister(base, current); + + if (@bitSizeOf(f_ty.child) != @bitSizeOf(u32)) @compileLog("only values with a @bitSizeOf(u32) are supported."); + + if (i > 0) { + const last_id: Id = .fromRegister(base, registers[i - 1]); + + comptime std.debug.assert(std.math.order(@intFromEnum(current_id), @intFromEnum(last_id)) == .gt); + comptime std.debug.assert((@intFromEnum(current_id) - @intFromEnum(last_id)) == 1); + } + + needed_field_types[i] = f_ty.child; + } + + return @Tuple(&needed_field_types); + } + + pub inline fn addIncremental(queue: *Queue, comptime base: *volatile pica.Graphics, comptime registers: anytype, values: IncrementalWritesTuple(base, registers)) void { + return queue.addIncrementalMasked(base, registers, values, 0b1111); + } + + pub fn addIncrementalMasked(queue: *Queue, comptime base: *volatile pica.Graphics, comptime registers: anytype, values: IncrementalWritesTuple(base, registers), mask: u4) void { + if (registers.len == 0) return; + + comptime std.debug.assert(values.len <= 256); + const first_id: Id = .fromRegister(base, registers[0]); + + var u32_values: [values.len]u32 = undefined; + inline for (&u32_values, 0..) |*v, i| v.* = switch (@typeInfo(@TypeOf(values[i]))) { + .@"enum" => @intFromEnum(values[i]), + else => @bitCast(values[i]), + }; + + return queue.addMaskedBuffer(first_id, &u32_values, mask, .incremental); + } + + pub inline fn addConsecutive(queue: *Queue, comptime base: *volatile pica.Graphics, register: anytype, values: []const std.meta.Child(@TypeOf(register))) void { + return queue.addConsecutiveMasked(base, register, values, 0b1111); + } + + pub fn addConsecutiveMasked(queue: *Queue, comptime base: *volatile pica.Graphics, register: anytype, values: []const std.meta.Child(@TypeOf(register)), mask: u4) void { + comptime std.debug.assert(@typeInfo(@TypeOf(register)) == .pointer); + + const Child = std.meta.Child(@TypeOf(register)); + const id: Id = .fromRegister(base, register); + + comptime std.debug.assert(@bitSizeOf(Child) == @bitSizeOf(u32)); + + return queue.addMaskedBuffer(id, @ptrCast(values), mask, .consecutive); + } + + pub fn addMaskedBuffer(queue: *Queue, id: Id, values: []const u32, mask: u4, mode: Header.Mode) void { + if (values.len == 0) return; + + var current_id: Id = id; + var current: usize = 0; + var remaining: usize = values.len; + + while (remaining > 0) { + const len = @min(remaining, 256); + defer { + current += len; + remaining -= len; + } + + const remaining_slice = values[current..][0..len]; + + queue.buffer[queue.end] = remaining_slice[0]; + queue.buffer[queue.end + 1] = @bitCast(Header{ + .id = id, + .mask = mask, + .extra = @intCast(len - 1), + .mode = mode, + }); + queue.end += 2; + + @memcpy(queue.buffer[queue.end..][0..(len - 1)], remaining_slice[1..len]); + queue.end += std.mem.alignForward(usize, len - 1, 2); // commands must be aligned to 8 bytes + if (mode == .incremental) current_id = @enumFromInt(@intFromEnum(current_id) + len); + } + } + + pub fn chain(queue: *Queue, address: zitrus.hardware.AlignedPhysicalAddress(.@"16", .@"8")) *zitrus.hardware.LsbRegister(u22) { + const p3d = &zitrus.memory.arm11.pica.p3d; + + const size = &queue.buffer[queue.end]; + queue.add(p3d, &p3d.primitive_engine.command_buffer.size[0], .init(0)); + queue.add(p3d, &p3d.primitive_engine.command_buffer.address[0], address); + queue.add(p3d, &p3d.primitive_engine.command_buffer.jump[0], .init(.trigger)); + + if (!std.mem.isAligned(queue.end, 4)) { + queue.add(p3d, &p3d.primitive_engine.command_buffer.jump[0], .init(.trigger)); + } + + return @ptrCast(size); + } + + pub fn finalize(queue: *Queue) void { + const p3d = &zitrus.memory.arm11.pica.p3d; + + queue.add(p3d, &p3d.irq.req[0..4].*, @bitCast(@as(u32, 0x12345678))); + + if (!std.mem.isAligned(queue.end, 4)) { + queue.add(p3d, &p3d.irq.req[0..4].*, @bitCast(@as(u32, 0x12345678))); + } + } +}; + +/// Represents a growable command stream (multiple chained command queues) +pub const stream = struct { + pub const StreamResetMode = enum { free_all, retain_largest }; + pub const Segment = struct { + queue: Queue, + node: std.SinglyLinkedList.Node, + + comptime { + std.debug.assert(@sizeOf(Segment) == 16); + } + + pub fn data(segment: *Segment) []align(16) u32 { + return @as([*]align(16) u32, @ptrCast(@alignCast(segment)))[0 .. @divExact(@sizeOf(Segment), @sizeOf(u32)) + segment.queue.buffer.len]; + } + }; + + /// Context must have a field called `use_jumps` which toggles whether the stream + /// is a single command queue or multiple chained ones (when growing it) + /// + /// If `use_jumps` is not comptime-known or is `true`, it must also implement + /// `fn virtualToPhysical(ctx, virtual: *align(4096) const anyopaque) zitrus.hardware.PhysicalAddress`. + pub fn Custom(comptime Context: type) type { + return struct { + pub const empty: Stream = .{ .list = .{}, .last_chain_size = null, .initial_chunk = &.{}, .start = 0 }; + + list: std.SinglyLinkedList, + last_chain_size: ?*zitrus.hardware.LsbRegister(u22), + initial_chunk: []align(16) const u32, + /// This is intended to be modified directly, must be aligned to 4 words (16 bytes) + /// + /// Changes when finalizing or chaining queues (e.g when growing) + start: u32, + + pub fn deinit(strm: *Stream, gpa: std.mem.Allocator) void { + strm.reset(gpa, .free_all); + strm.* = undefined; + } + + pub fn first(strm: *Stream) ?*Queue { + const head = strm.list.first orelse return null; + const segment: *Segment = @fieldParentPtr("node", head); + return &segment.queue; + } + + /// Grows the stream exponentially, i.e 4096->8192->16384; starting from `min_len` + pub fn grow( + strm: *Stream, + gpa: std.mem.Allocator, + /// Length of the first queue *in `u32`*s + min_len: u32, + ctx: Context, + ) !void { + std.debug.assert(min_len >= @sizeOf(Segment)); // You're crazy, please bump the len A LOT. + std.debug.assert(std.mem.isAligned(strm.start, 4)); + + const segment = if (strm.list.first) |node| blk: { + const first_segment: *Segment = @alignCast(@fieldParentPtr("node", node)); + const first_que: *Queue = &first_segment.queue; + const first_data = first_segment.data(); + const next_len = first_data.len << 1; + + if (!ctx.use_jumps) { + std.debug.assert(first_segment.node.next == null); + + const new_len = first_data.len + next_len; + const new = if (gpa.remap(first_data, new_len)) |remapped| remapped else remapped: { + const new = try gpa.alignedAlloc(u32, .@"16", new_len); + defer gpa.free(first_data); + + const copying = first_data[0 .. @divExact(@sizeOf(Segment), @sizeOf(u32)) + first_segment.queue.end]; + @memcpy(new[0..copying.len], copying); + break :remapped new; + }; + + const new_segment: *Segment = @ptrCast(new); + // NOTE: we copied all commands above + new_segment.queue.buffer = new[@divExact(@sizeOf(Segment), @sizeOf(u32))..]; + strm.list.first = &new_segment.node; + return; + } + + const new_segment = try allocSegment(gpa, next_len); + const had_last_chain = strm.last_chain_size != null; + + if (strm.last_chain_size) |last_size| { + const len = (first_que.end - strm.start); + last_size.* = .init(@intCast((len * @sizeOf(u32)) >> 3)); + } + + strm.last_chain_size = first_que.chain(.fromPhysical(ctx.virtualToPhysical(new_segment.queue.buffer.ptr))); + + if (!had_last_chain) { + strm.initial_chunk = @alignCast(first_que.buffer[strm.start..first_que.end]); + } + + strm.start = 0; + break :blk new_segment; + } else try allocSegment(gpa, min_len); + + strm.list.prepend(&segment.node); + } + + /// Finalizes and returns the initial chunk of the stream or null if none. + pub fn finalize(strm: *Stream) ?[]align(16) const u32 { + std.debug.assert(std.mem.isAligned(strm.start, 4)); + + const que = strm.first() orelse return null; + + // Nothing to finalize + if (strm.start == que.end and strm.last_chain_size == null) return null; + que.finalize(); + + const initial_chunk: []align(16) const u32 = if (strm.last_chain_size) |last_size| blk: { + last_size.* = .init(@intCast(((que.end - strm.start) * @sizeOf(u32)) >> 3)); + break :blk strm.initial_chunk; + } else @alignCast(que.buffer[strm.start..que.end]); + + strm.last_chain_size = null; + strm.start = que.end; + return initial_chunk; + } + + pub fn reset(strm: *Stream, gpa: std.mem.Allocator, mode: StreamResetMode) void { + const first_node = strm.list.first orelse return; + strm.last_chain_size = null; + strm.initial_chunk = &.{}; + strm.start = 0; + + var freeing = switch (mode) { + .free_all => blk: { + strm.list.first = null; + break :blk first_node; + }, + .retain_largest => blk: { + first_node.next = null; + + const first_segment: *Segment = @fieldParentPtr("node", first_node); + first_segment.queue.end = 0; + break :blk first_node.next; + }, + }; + + while (freeing) |node| { + freeing = node.next; + + const segment: *Segment = @alignCast(@fieldParentPtr("node", node)); + const segment_data = segment.data(); + gpa.free(segment_data); + } + } + + fn allocSegment(gpa: std.mem.Allocator, len: u32) !*Segment { + const data = try gpa.alignedAlloc(u32, .@"16", len); + const segment: *Segment = @ptrCast(data); + + segment.* = .{ + .queue = .{ + .buffer = data[@divExact(@sizeOf(Segment), @sizeOf(u32))..], + .end = 0, + }, + .node = .{}, + }; + + return segment; + } + + const Stream = @This(); + }; + } +}; + +const std = @import("std"); + +const zitrus = @import("zitrus"); +const pica = zitrus.hardware.pica; diff --git a/src/platform/3ds/mango/hardware/pica/shader.zig b/src/platform/3ds/mango/hardware/pica/shader.zig new file mode 100644 index 0000000..beafe10 --- /dev/null +++ b/src/platform/3ds/mango/hardware/pica/shader.zig @@ -0,0 +1,70 @@ +//! PICA200 shader ISA encoder, assembler and (TODO) disassembler. +//! +//! * `as` - zitrus PICA200 shader assembler / disassebler. +//! * `register` - register enums for everything shader related +//! * `encoding` - single instruction encoding +//! * `spirv` - ?? :) (TODO) +//! +//! * `Encoder` - Type-safe PICA200 shader ISA encoder + +pub const Type = enum(u1) { + vertex, + geometry, +}; + +pub const Geometry = union(Kind) { + pub const Kind = enum { + point, + variable, + fixed, + }; + + pub const Point = struct { + inputs: u5, + }; + + pub const Variable = struct { + full_vertices: u5, + }; + + pub const Fixed = struct { + vertices: u5, + uniform_start: register.Source.Constant, + }; + + point: Point, + variable: Variable, + fixed: Fixed, + + pub fn initPoint(inputs: u5) Geometry { + return .{ .point = .{ .inputs = inputs } }; + } + + pub fn initVariable(full_vertices: u5) Geometry { + return .{ .variable = .{ .full_vertices = full_vertices } }; + } + + pub fn initFixed(vertices: u5, uniform_start: register.Source.Constant) Geometry { + return .{ .fixed = .{ .vertices = vertices, .uniform_start = uniform_start } }; + } +}; + +pub const as = @import("shader/as.zig"); +pub const Encoder = @import("shader/Encoder.zig"); + +pub const register = @import("shader/register.zig"); +pub const encoding = @import("shader/encoding.zig"); + +pub const spirv = @import("shader/spirv.zig"); + +comptime { + _ = as; + _ = Encoder; + + _ = register; + _ = encoding; + + _ = spirv; +} + +const std = @import("std"); diff --git a/src/platform/3ds/mango/hardware/pica/shader/Encoder.zig b/src/platform/3ds/mango/hardware/pica/shader/Encoder.zig new file mode 100644 index 0000000..8a0c35f --- /dev/null +++ b/src/platform/3ds/mango/hardware/pica/shader/Encoder.zig @@ -0,0 +1,428 @@ +//! Type-safe PICA200 shader ISA encoder + +pub const OperandDescriptorAllocationError = error{OutOfDescriptors}; +pub const InstructionEncodingError = error{InvalidSourceRegisterCombination}; + +const max_descriptors = std.math.maxInt(u7); + +instructions: std.ArrayList(Instruction), +descriptors: [max_descriptors]OperandDescriptor, +masks: [max_descriptors]OperandDescriptor.Mask, +allocated_descriptors: u8, + +pub const init: Encoder = .{ + .instructions = .empty, + .descriptors = undefined, + .masks = undefined, + .allocated_descriptors = 0, +}; + +pub fn move(encoder: *Encoder) Encoder { + defer encoder.* = .init; + return encoder.*; +} + +pub fn deinit(encoder: *Encoder, allocator: Allocator) void { + encoder.instructions.deinit(allocator); + encoder.* = undefined; +} + +pub fn constDescriptorSlice(encoder: *const Encoder) []const OperandDescriptor { + return encoder.descriptors[0..encoder.allocated_descriptors]; +} + +pub fn descriptorSlice(encoder: *Encoder) []OperandDescriptor { + return encoder.descriptors[0..encoder.allocated_descriptors]; +} + +pub fn getOrAllocateOperandDescriptor(encoder: *Encoder, comptime T: type, comptime descriptor_mask: OperandDescriptor.Mask, operand_descriptor: OperandDescriptor) OperandDescriptorAllocationError!T { + std.debug.assert(T == u5 or T == u7); + + for (encoder.descriptors[0..encoder.allocated_descriptors], encoder.masks[0..encoder.allocated_descriptors], 0..) |*descriptor, *mask, i| { + if (mask.*.contains(descriptor_mask) and operand_descriptor.equalsMasked(descriptor_mask, descriptor.*)) { + // Reuse the descriptor + return @intCast(i); + } + + if (descriptor_mask.contains(mask.*) and operand_descriptor.equalsMasked(mask.*, descriptor.*)) { + // Reuse and expand the descriptor + descriptor.* = operand_descriptor; + mask.* = descriptor_mask; + return @intCast(i); + } + } + + if (encoder.descriptors.len == encoder.allocated_descriptors) { + return error.OutOfDescriptors; + } + + if (encoder.allocated_descriptors < std.math.maxInt(T)) { + encoder.descriptors[encoder.allocated_descriptors] = operand_descriptor; + encoder.masks[encoder.allocated_descriptors] = descriptor_mask; + encoder.allocated_descriptors += 1; + return @intCast(encoder.allocated_descriptors - 1); + } + + // TODO: + // Swap a non-reduced descriptor or return error + return error.OutOfDescriptors; +} + +pub fn addInstruction(encoder: *Encoder, allocator: Allocator, instruction: Instruction) !void { + try encoder.instructions.append(allocator, instruction); +} + +pub fn unparametized(encoder: *Encoder, alloc: Allocator, opcode: Instruction.Opcode) !void { + return encoder.addInstruction(alloc, .{ .unparametized = .{ .opcode = opcode } }); +} + +pub fn unary(encoder: *Encoder, alloc: Allocator, opcode: Instruction.Opcode, dest: DestinationRegister, dst_mask: Mask, src1_neg: Negation, src1: SourceRegister, src1_selector: Selector, src_rel: RelativeComponent) !void { + const descriptor_id = try encoder.getOrAllocateOperandDescriptor(u7, .unary, .{ + .dst_mask = dst_mask, + .src1_neg = src1_neg, + .src1_selector = src1_selector, + }); + + return encoder.addInstruction(alloc, .{ .register = .{ + .operand_descriptor_id = descriptor_id, + .src1 = src1, + .src2 = .v0, + .address_component = src_rel, + .dst = dest, + .opcode = opcode, + } }); +} + +pub fn binary(encoder: *Encoder, alloc: Allocator, opcode: Instruction.Opcode, dest: DestinationRegister, dst_mask: Mask, src1_neg: Negation, src1: SourceRegister, src1_selector: Selector, src2_neg: Negation, src2: SourceRegister, src2_selector: Selector, src_rel: RelativeComponent) !void { + if (!src1.isLimited() and !src2.isLimited()) { + return error.InvalidSourceRegisterCombination; + } + + if (src1.isLimited() != src2.isLimited() and !src2.isLimited()) { + if (!opcode.isCommutative()) { + if (opcode.invert()) |opcode_i| { + const descriptor_id = try encoder.getOrAllocateOperandDescriptor(u7, .binary, .{ + .dst_mask = dst_mask, + .src1_neg = src1_neg, + .src1_selector = src1_selector, + .src2_neg = src2_neg, + .src2_selector = src2_selector, + }); + + return encoder.addInstruction(alloc, .{ .register_inverted = .{ .operand_descriptor_id = descriptor_id, .src2 = src2, .src1 = src1.toLimited().?, .address_component = src_rel, .dst = dest, .opcode = opcode_i } }); + } + + return error.InvalidSourceRegisterCombination; + } + + const descriptor_id = try encoder.getOrAllocateOperandDescriptor(u7, .binary, .{ + .dst_mask = dst_mask, + .src1_neg = src2_neg, + .src1_selector = src2_selector, + .src2_neg = src1_neg, + .src2_selector = src1_selector, + }); + + return encoder.addInstruction(alloc, .{ .register = .{ .operand_descriptor_id = descriptor_id, .src2 = src1.toLimited().?, .src1 = src2, .address_component = src_rel, .dst = dest, .opcode = opcode } }); + } + + // TODO: If commutative we could search and reuse an operand descriptor with swapped src1 <=> src2 + const descriptor_id = try encoder.getOrAllocateOperandDescriptor(u7, .binary, .{ + .dst_mask = dst_mask, + .src1_neg = src1_neg, + .src1_selector = src1_selector, + .src2_neg = src2_neg, + .src2_selector = src2_selector, + }); + + return encoder.addInstruction(alloc, .{ .register = .{ .operand_descriptor_id = descriptor_id, .src2 = src2.toLimited().?, .src1 = src1, .address_component = src_rel, .dst = dest, .opcode = opcode } }); +} + +pub fn flow(encoder: *Encoder, alloc: Allocator, opcode: Instruction.Opcode, num: u8, dest: u12, condition: Condition, x: bool, y: bool) !void { + return encoder.addInstruction(alloc, .{ .control_flow = .{ + .num = num, + .dst = dest, + .condition = condition, + .ref_x = x, + .ref_y = y, + .opcode = opcode, + } }); +} + +pub fn flowConstant(encoder: *Encoder, alloc: Allocator, opcode: Instruction.Opcode, num: u8, dest: u12, constant: IntegralRegister) !void { + return encoder.addInstruction(alloc, .{ .constant_control_flow = .{ + .num = num, + .dst = dest, + .src = constant, + .opcode = opcode, + } }); +} + +pub fn add(encoder: *Encoder, alloc: Allocator, dest: DestinationRegister, dst_mask: Mask, src1_neg: Negation, src1: SourceRegister, src1_selector: Selector, src2_neg: Negation, src2: SourceRegister, src2_selector: Selector, src_rel: RelativeComponent) !void { + return encoder.binary(alloc, .add, dest, dst_mask, src1_neg, src1, src1_selector, src2_neg, src2, src2_selector, src_rel); +} + +pub fn dp3(encoder: *Encoder, alloc: Allocator, dest: DestinationRegister, dst_mask: Mask, src1_neg: Negation, src1: SourceRegister, src1_selector: Selector, src2_neg: Negation, src2: SourceRegister, src2_selector: Selector, src_rel: RelativeComponent) !void { + return encoder.binary(alloc, .dp3, dest, dst_mask, src1_neg, src1, src1_selector, src2_neg, src2, src2_selector, src_rel); +} + +pub fn dp4(encoder: *Encoder, alloc: Allocator, dest: DestinationRegister, dst_mask: Mask, src1_neg: Negation, src1: SourceRegister, src1_selector: Selector, src2_neg: Negation, src2: SourceRegister, src2_selector: Selector, src_rel: RelativeComponent) !void { + return encoder.binary(alloc, .dp4, dest, dst_mask, src1_neg, src1, src1_selector, src2_neg, src2, src2_selector, src_rel); +} + +pub fn dph(encoder: *Encoder, alloc: Allocator, dest: DestinationRegister, dst_mask: Mask, src1_neg: Negation, src1: SourceRegister, src1_selector: Selector, src2_neg: Negation, src2: SourceRegister, src2_selector: Selector, src_rel: RelativeComponent) !void { + return encoder.binary(alloc, .dph, dest, dst_mask, src1_neg, src1, src1_selector, src2_neg, src2, src2_selector, src_rel); +} + +pub fn dst(encoder: *Encoder, alloc: Allocator, dest: DestinationRegister, dst_mask: Mask, src1_neg: Negation, src1: SourceRegister, src1_selector: Selector, src2_neg: Negation, src2: SourceRegister, src2_selector: Selector, src_rel: RelativeComponent) !void { + return encoder.binary(alloc, .dst, dest, dst_mask, src1_neg, src1, src1_selector, src2_neg, src2, src2_selector, src_rel); +} + +pub fn ex2(encoder: *Encoder, alloc: Allocator, dest: DestinationRegister, dst_mask: Mask, src1_neg: Negation, src1: SourceRegister, src1_selector: Selector, src_rel: RelativeComponent) !void { + return encoder.unary(alloc, .ex2, dest, dst_mask, src1_neg, src1, src1_selector, src_rel); +} + +pub fn lg2(encoder: *Encoder, alloc: Allocator, dest: DestinationRegister, dst_mask: Mask, src1_neg: Negation, src1: SourceRegister, src1_selector: Selector, src_rel: RelativeComponent) !void { + return encoder.unary(alloc, .lg2, dest, dst_mask, src1_neg, src1, src1_selector, src_rel); +} + +pub fn litp(encoder: *Encoder, alloc: Allocator, dest: DestinationRegister, dst_mask: Mask, src1_neg: Negation, src1: SourceRegister, src1_selector: Selector, src_rel: RelativeComponent) !void { + return encoder.unary(alloc, .litp, dest, dst_mask, src1_neg, src1, src1_selector, src_rel); +} + +pub fn mul(encoder: *Encoder, alloc: Allocator, dest: DestinationRegister, dst_mask: Mask, src1_neg: Negation, src1: SourceRegister, src1_selector: Selector, src2_neg: Negation, src2: SourceRegister, src2_selector: Selector, src_rel: RelativeComponent) !void { + return encoder.binary(alloc, .mul, dest, dst_mask, src1_neg, src1, src1_selector, src2_neg, src2, src2_selector, src_rel); +} + +pub fn sge(encoder: *Encoder, alloc: Allocator, dest: DestinationRegister, dst_mask: Mask, src1_neg: Negation, src1: SourceRegister, src1_selector: Selector, src2_neg: Negation, src2: SourceRegister, src2_selector: Selector, src_rel: RelativeComponent) !void { + return encoder.binary(alloc, .sge, dest, dst_mask, src1_neg, src1, src1_selector, src2_neg, src2, src2_selector, src_rel); +} + +pub fn slt(encoder: *Encoder, alloc: Allocator, dest: DestinationRegister, dst_mask: Mask, src1_neg: Negation, src1: SourceRegister, src1_selector: Selector, src2_neg: Negation, src2: SourceRegister, src2_selector: Selector, src_rel: RelativeComponent) !void { + return encoder.binary(alloc, .slt, dest, dst_mask, src1_neg, src1, src1_selector, src2_neg, src2, src2_selector, src_rel); +} + +pub fn flr(encoder: *Encoder, alloc: Allocator, dest: DestinationRegister, dst_mask: Mask, src1_neg: Negation, src1: SourceRegister, src1_selector: Selector, src_rel: RelativeComponent) !void { + return encoder.unary(alloc, .flr, dest, dst_mask, src1_neg, src1, src1_selector, src_rel); +} + +pub fn max(encoder: *Encoder, alloc: Allocator, dest: DestinationRegister, dst_mask: Mask, src1_neg: Negation, src1: SourceRegister, src1_selector: Selector, src2_neg: Negation, src2: SourceRegister, src2_selector: Selector, src_rel: RelativeComponent) !void { + return encoder.binary(alloc, .max, dest, dst_mask, src1_neg, src1, src1_selector, src2_neg, src2, src2_selector, src_rel); +} + +pub fn min(encoder: *Encoder, alloc: Allocator, dest: DestinationRegister, dst_mask: Mask, src1_neg: Negation, src1: SourceRegister, src1_selector: Selector, src2_neg: Negation, src2: SourceRegister, src2_selector: Selector, src_rel: RelativeComponent) !void { + return encoder.binary(alloc, .min, dest, dst_mask, src1_neg, src1, src1_selector, src2_neg, src2, src2_selector, src_rel); +} + +pub fn rcp(encoder: *Encoder, alloc: Allocator, dest: DestinationRegister, dst_mask: Mask, src1_neg: Negation, src1: SourceRegister, src1_selector: Selector, src_rel: RelativeComponent) !void { + return encoder.unary(alloc, .rcp, dest, dst_mask, src1_neg, src1, src1_selector, src_rel); +} + +pub fn rsq(encoder: *Encoder, alloc: Allocator, dest: DestinationRegister, dst_mask: Mask, src1_neg: Negation, src1: SourceRegister, src1_selector: Selector, src_rel: RelativeComponent) !void { + return encoder.unary(alloc, .rsq, dest, dst_mask, src1_neg, src1, src1_selector, src_rel); +} + +pub fn mova(encoder: *Encoder, alloc: Allocator, a_mask: register.AddressComponent.Mask, src1_neg: Negation, src1: SourceRegister, src1_selector: Selector, src_rel: RelativeComponent) !void { + return encoder.unary(alloc, .mova, .o0, .{ .enable_x = a_mask.enable_x, .enable_y = a_mask.enable_y }, src1_neg, src1, src1_selector, src_rel); +} + +pub fn mov(encoder: *Encoder, alloc: Allocator, dest: DestinationRegister, dst_mask: Mask, src1_neg: Negation, src1: SourceRegister, src1_selector: Selector, src_rel: RelativeComponent) !void { + return encoder.unary(alloc, .mov, dest, dst_mask, src1_neg, src1, src1_selector, src_rel); +} + +// dphi handled by dph +// dsti handled by dst +// sgei handled by sge +// slti handled by slt + +pub fn @"break"(encoder: *Encoder, alloc: Allocator) !void { + return encoder.unparametized(alloc, .@"break"); +} + +pub fn nop(encoder: *Encoder, alloc: Allocator) !void { + return encoder.unparametized(alloc, .nop); +} + +pub fn end(encoder: *Encoder, alloc: Allocator) !void { + return encoder.unparametized(alloc, .end); +} + +pub fn breakc(encoder: *Encoder, alloc: Allocator, condition: Condition, x: bool, y: bool) !void { + return encoder.flow(alloc, .breakc, 0, 0, condition, x, y); +} + +pub fn call(encoder: *Encoder, alloc: Allocator, dest: u12, num: u8) !void { + return encoder.flow(alloc, .call, num, dest, .@"and", false, false); +} + +pub fn callc(encoder: *Encoder, alloc: Allocator, condition: Condition, x: bool, y: bool, dest: u12, num: u8) !void { + return encoder.flow(alloc, .callc, num, dest, condition, x, y); +} + +pub fn callu(encoder: *Encoder, alloc: Allocator, b: BooleanRegister, dest: u12, num: u8) !void { + return encoder.flowConstant(alloc, .callu, num, dest, .{ .bool = b }); +} + +pub fn ifu(encoder: *Encoder, alloc: Allocator, b: BooleanRegister, dest: u12, num: u8) !void { + return encoder.flowConstant(alloc, .ifu, num, dest, .{ .bool = b }); +} + +pub fn ifc(encoder: *Encoder, alloc: Allocator, condition: Condition, x: bool, y: bool, dest: u12, num: u8) !void { + return encoder.flow(alloc, .ifc, num, dest, condition, x, y); +} + +pub fn loop(encoder: *Encoder, alloc: Allocator, i: IntegerRegister, dest: u12) !void { + return encoder.flowConstant(alloc, .loop, 0, dest, .{ .int = .{ .used = i } }); +} + +pub fn setemit(encoder: *Encoder, alloc: Allocator, vertex_id: u2, primitive: Primitive, winding: Winding) !void { + return encoder.addInstruction(alloc, .{ .set_emit = .{ + .winding = winding, + .primitive_emit = primitive, + .vertex_id = vertex_id, + .opcode = .setemit, + } }); +} + +pub fn emit(encoder: *Encoder, alloc: Allocator) !void { + return encoder.unparametized(alloc, .emit); +} + +pub fn jmpc(encoder: *Encoder, alloc: Allocator, condition: Condition, x: bool, y: bool, dest: u12) !void { + return encoder.flow(alloc, .jmpc, 0, dest, condition, x, y); +} + +pub fn jmpu(encoder: *Encoder, alloc: Allocator, b: BooleanRegister, if_true: bool, dest: u12) !void { + return encoder.flowConstant(alloc, .jmpu, @intFromBool(!if_true), dest, .{ .bool = b }); +} + +pub fn cmp(encoder: *Encoder, alloc: Allocator, src1_neg: Negation, src1: SourceRegister, src1_selector: Selector, x: Comparison, y: Comparison, src2_neg: Negation, src2: SourceRegister, src2_selector: Selector, src_rel: RelativeComponent) !void { + if (!src1.isLimited() and !src2.isLimited()) { + return error.InvalidSourceRegisterCombination; + } + + const descriptor_id, const i_src1, const i_src2, const x_cmp, const y_cmp = if (!src2.isLimited()) + .{ try encoder.getOrAllocateOperandDescriptor(u7, .comparison, .{ + .src1_neg = src2_neg, + .src1_selector = src2_selector, + .src2_neg = src1_neg, + .src2_selector = src1_selector, + }), src2, src1.toLimited().?, x.invert(), y.invert() } + else + .{ try encoder.getOrAllocateOperandDescriptor(u7, .comparison, .{ + .src1_neg = src1_neg, + .src1_selector = src1_selector, + .src2_neg = src2_neg, + .src2_selector = src2_selector, + }), src1, src2.toLimited().?, x, y }; + + return encoder.addInstruction(alloc, .{ .comparison = .{ + .operand_descriptor_id = descriptor_id, + .src2 = i_src2, + .src1 = i_src1, + .address_component = src_rel, + .x_operation = x_cmp, + .y_operation = y_cmp, + .opcode = Instruction.Opcode.cmp0.toComparison().?, + } }); +} + +// madi handled by mad + +pub fn mad(encoder: *Encoder, alloc: Allocator, dest: DestinationRegister, dst_mask: Mask, src1_neg: Negation, src1: SourceRegister.Limited, src1_selector: Selector, src2_neg: Negation, src2: SourceRegister, src2_selector: Selector, src3_neg: Negation, src3: SourceRegister, src3_selector: Selector, src_rel: RelativeComponent) !void { + if (!src2.isLimited() and !src3.isLimited()) { + return error.InvalidSourceRegisterCombination; + } + + const descriptor_id = try encoder.getOrAllocateOperandDescriptor(u5, .full, .{ + .dst_mask = dst_mask, + .src1_neg = src1_neg, + .src1_selector = src1_selector, + .src2_neg = src2_neg, + .src2_selector = src2_selector, + .src3_neg = src3_neg, + .src3_selector = src3_selector, + }); + + if (src2.isLimited() != src3.isLimited() and src2.isLimited()) { + return try encoder.addInstruction(alloc, .{ .mad_inverted = .{ + .operand_descriptor_id = descriptor_id, + .src1 = src1, + .src2 = src2.toLimited().?, + .src3 = src3, + .address_component = src_rel, + .dst = dest, + .opcode = Instruction.Opcode.madi0.toMad().?, + } }); + } + + return try encoder.addInstruction(alloc, .{ .mad = .{ + .operand_descriptor_id = descriptor_id, + .src1 = src1, + .src2 = src2, + .src3 = src3.toLimited().?, + .address_component = src_rel, + .dst = dest, + .opcode = Instruction.Opcode.mad0.toMad().?, + } }); +} + +test "test?" { + var fixed: [256]u8 = undefined; + var fba: std.heap.FixedBufferAllocator = .init(&fixed); + const alloc = fba.allocator(); + + const expected_output: []const u32 = &.{ + 0b000000_10000_00_0000000_00001_0000000, + 0b001011_10001_00_0010000_00000_0000000, + 0b001000_10001_00_0000011_01000_0000001, + }; + + var encoder: Encoder = .init; + defer encoder.deinit(alloc); + + try encoder.add(alloc, .r0, .x, .@"+", .v0, .xyzw, .@"+", .v1, .xyzw, .none); + + // Must have same descriptor as the previous instruction + try encoder.flr(alloc, .r1, .x, .@"+", .r0, .xyzw, .none); + + // Should create a new descriptor + try encoder.mul(alloc, .r1, .x, .@"+", .v3, .wyxz, .@"+", .v8, .xxxx, .none); + + // FIXME: Regression, cannot use this on the 3DS test runner. + // try testing.expectEqualSlices(u32, expected_output, std.mem.bytesAsSlice(u32, std.mem.sliceAsBytes(encoder.instructions.items))); + for (expected_output, encoder.instructions.items) |expected, output| { + try testing.expect(expected == @as(u32, @bitCast(output))); + } +} + +const Encoder = @This(); + +const std = @import("std"); +const testing = std.testing; + +const Allocator = std.mem.Allocator; + +const zitrus = @import("zitrus"); +const shader = zitrus.hardware.pica.shader; + +const encoding = shader.encoding; +const Instruction = encoding.Instruction; +const OperandDescriptor = encoding.OperandDescriptor; +const Negation = OperandDescriptor.Negation; +const Condition = encoding.Condition; +const Comparison = encoding.ComparisonOperation; +const Winding = encoding.Winding; +const Primitive = encoding.Primitive; + +const Mask = encoding.Component.Mask; +const Selector = encoding.Component.Selector; + +const register = shader.register; +const RelativeComponent = register.AddressComponent; +const SourceRegister = register.Source; +const DestinationRegister = register.Destination; + +const IntegralRegister = register.Integral; +const BooleanRegister = IntegralRegister.Boolean; +const IntegerRegister = IntegralRegister.Integer; diff --git a/src/platform/3ds/mango/hardware/pica/shader/as.zig b/src/platform/3ds/mango/hardware/pica/shader/as.zig new file mode 100644 index 0000000..f926eda --- /dev/null +++ b/src/platform/3ds/mango/hardware/pica/shader/as.zig @@ -0,0 +1,13 @@ +//! Zitrus PICA200 shader assembler / disassembler. + +pub const Tokenizer = tokenizer.Tokenizer; +pub const Token = tokenizer.Token; +pub const Assembler = @import("as/Assembler.zig"); + +comptime { + _ = Tokenizer; + _ = Token; + _ = Assembler; +} + +const tokenizer = @import("as/tokenizer.zig"); diff --git a/src/platform/3ds/mango/hardware/pica/shader/as/Assembler.zig b/src/platform/3ds/mango/hardware/pica/shader/as/Assembler.zig new file mode 100644 index 0000000..dd5e2ea --- /dev/null +++ b/src/platform/3ds/mango/hardware/pica/shader/as/Assembler.zig @@ -0,0 +1,1468 @@ +//! Zitrus PICA200 shader assembler. + +// TODO: Last one, we're missing a proper diagnostic for using two floating constant registers (one must be limited) but that is easy! + +pub const TokenList = std.MultiArrayList(struct { + tag: Token.Tag, + start: u32, +}); + +pub const LabelMap = std.StringArrayHashMapUnmanaged(u12); +pub const Outputs = std.EnumMap(register.Destination.Output, pica.OutputMap); + +pub const FloatingConstants = std.EnumMap(register.Source.Constant, pica.F7_16x4); +pub const IntegerConstants = std.EnumMap(register.Integral.Integer, [4]u8); +pub const BooleanConstants = std.EnumSet(register.Integral.Boolean); + +pub const Assembled = struct { + pub const Entrypoint = struct { + pub const Map = std.StringArrayHashMapUnmanaged(Assembled.Entrypoint); + pub const Info = union(pica.shader.Type) { + vertex, + geometry: pica.shader.Geometry, + }; + + pub const Constants = struct { + pub const empty: Constants = .{ + .float = .init(.{}), + .int = .init(.{}), + .bool = .init(.{}), + }; + + float: FloatingConstants, + int: IntegerConstants, + bool: BooleanConstants, + }; + + info: Info, + constants: Constants, + outputs: Outputs, + offset: u16, + }; + + source: [:0]const u8, + tokens: TokenList.Slice, + entrypoints: Assembled.Entrypoint.Map, + encoded: Encoder, + + errors: []const Error, + + pub fn deinit(assembled: *Assembled, gpa: std.mem.Allocator) void { + assembled.tokens.deinit(gpa); + assembled.entrypoints.deinit(gpa); + assembled.encoded.deinit(gpa); + gpa.free(assembled.errors); + assembled.* = undefined; + } + + pub fn tokenTag(a: Assembled, tok_index: usize) Token.Tag { + return a.tokens.items(.tag)[tok_index]; + } + + pub fn tokenStart(a: Assembled, tok_index: usize) u32 { + return a.tokens.items(.start)[tok_index]; + } + + pub fn tokenSlice(a: Assembled, tok_index: usize) []const u8 { + const tok_tag = a.tokenTag(tok_index); + + if (tok_tag.lexeme()) |lexeme| { + return lexeme; + } + + const tok_start = a.tokenStart(tok_index); + var tokenizer: shader.as.Tokenizer = .{ + .buffer = a.source, + .index = tok_start, + }; + + const tok = tokenizer.next(); + std.debug.assert(tok.tag == tok_tag); + return a.source[tok.loc.start..tok.loc.end]; + } + + pub fn assemble(gpa: std.mem.Allocator, source: [:0]const u8) !Assembled { + var tokens = TokenList{}; + defer tokens.deinit(gpa); + + { + var tokenizer: shader.as.Tokenizer = .init(source); + + while (true) { + const tok = tokenizer.next(); + + try tokens.append(gpa, .{ + .tag = tok.tag, + .start = @intCast(tok.loc.start), + }); + + if (tok.tag == .eof) { + break; + } + } + } + + var assembler: Assembler = .{ + .gpa = gpa, + .aliases = .empty, + .errors = .empty, + .source = source, + .tokens = tokens.toOwnedSlice(), + .encoder = .init, + .labels = .empty, + .entrypoints = .empty, + .tok_i = 0, + .inst_i = 0, + }; + defer assembler.deinit(gpa); + + assembler.passRoot() catch |e| switch (e) { + error.ParseError => {}, + else => return e, + }; + + var entrypoints: Assembled.Entrypoint.Map = .empty; + errdefer entrypoints.deinit(gpa); + + if (assembler.errors.items.len == 0) assemble: { + assembler.passAssemble() catch |e| switch (e) { + error.ParseError => break :assemble, + else => return e, + }; + + var it = assembler.entrypoints.iterator(); + while (it.next()) |entry| { + const label_offset = assembler.labels.get(entry.key_ptr.*) orelse { + try assembler.warnMsg(.{ + .tag = .undefined_label, + .tok_i = entry.value_ptr.*.tok_i, + }); + + continue; + }; + + try entrypoints.put(gpa, entry.key_ptr.*, .{ + .info = entry.value_ptr.info, + .constants = entry.value_ptr.constants, + .outputs = entry.value_ptr.outputs, + .offset = label_offset, + }); + } + } + + return .{ + .source = assembler.source, + .tokens = assembler.tokens, + .encoded = assembler.encoder.move(), + .entrypoints = entrypoints, + .errors = try assembler.errors.toOwnedSlice(gpa), + }; + } +}; + +pub const Error = struct { + tag: Tag, + tok_i: u32, + expected_tok: Token.Tag = .invalid, + + pub const Tag = enum { + unknown_directive, + invalid_register, + expected_address_register, + invalid_address_register_mask, + expected_address_component, + cannot_address_relative, + + expected_condition_register, + invalid_condition_register_mask, + + expected_src_register, + expected_limited_src_register, + expected_dst_register, + expected_bool_register, + expected_int_register, + expected_float_register, + expected_output_register, + expected_uniform_register, + + invalid_mask, + swizzled_mask, + invalid_swizzle, + cannot_swizzle, + + expected_number, + number_too_small, + number_too_big, + + expected_semantic, + invalid_semantic_component, + output_has_semantic, + + expected_primitive, + expected_winding, + expected_comparison, + expected_condition, + expected_boolean, + expected_shader_type, + expected_geometry_kind, + + redefined_label, + undefined_label, + label_range_too_big, + + redefined_entry, + undefined_entry, + + expected_directive_or_label_or_mnemonic, + expected_token, + }; +}; + +const Entrypoint = struct { + pub const Map = std.StringArrayHashMapUnmanaged(Entrypoint); + + info: Assembled.Entrypoint.Info, + constants: Assembled.Entrypoint.Constants, + outputs: Outputs, + tok_i: u32, +}; + +const Directive = enum { + /// .entry