From 2c81a777737ca607f199d04fd7509c5df15f9147 Mon Sep 17 00:00:00 2001 From: Cathal Mullan Date: Wed, 10 Jun 2026 19:39:10 +0100 Subject: [PATCH] Implement remaining aarch64 SHA-3 LLVM intrinsics --- example/neon.rs | 235 +++++++++++++++++++++++++++++++++ src/intrinsics/llvm_aarch64.rs | 82 ++++++++++++ 2 files changed, 317 insertions(+) diff --git a/example/neon.rs b/example/neon.rs index 6b024de7bb..ba63333daa 100644 --- a/example/neon.rs +++ b/example/neon.rs @@ -470,6 +470,220 @@ unsafe fn test_vsha512su1q_u64() { assert_eq!(r, e); } +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "sha3")] +unsafe fn test_veor3q_s8() { + // AArch64 llvm intrinsic: llvm.aarch64.crypto.eor3s.v16i8 + let a = i8x16::from([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); + let b = i8x16::from([16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]); + let c = i8x16::from([32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47]); + let e = i8x16::from([48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63]); + let r: i8x16 = unsafe { transmute(veor3q_s8(transmute(a), transmute(b), transmute(c))) }; + assert_eq!(r, e); +} + +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "sha3")] +unsafe fn test_veor3q_s16() { + // AArch64 llvm intrinsic: llvm.aarch64.crypto.eor3s.v8i16 + let a = i16x8::from([0, 1, 2, 3, 4, 5, 6, 7]); + let b = i16x8::from([8, 9, 10, 11, 12, 13, 14, 15]); + let c = i16x8::from([16, 17, 18, 19, 20, 21, 22, 23]); + let e = i16x8::from([24, 25, 26, 27, 28, 29, 30, 31]); + let r: i16x8 = unsafe { transmute(veor3q_s16(transmute(a), transmute(b), transmute(c))) }; + assert_eq!(r, e); +} + +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "sha3")] +unsafe fn test_veor3q_s32() { + // AArch64 llvm intrinsic: llvm.aarch64.crypto.eor3s.v4i32 + let a = i32x4::from([0, 1, 2, 3]); + let b = i32x4::from([4, 5, 6, 7]); + let c = i32x4::from([8, 9, 10, 11]); + let e = i32x4::from([12, 13, 14, 15]); + let r: i32x4 = unsafe { transmute(veor3q_s32(transmute(a), transmute(b), transmute(c))) }; + assert_eq!(r, e); +} + +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "sha3")] +unsafe fn test_veor3q_s64() { + // AArch64 llvm intrinsic: llvm.aarch64.crypto.eor3s.v2i64 + let a = i64x2::from([0, 1]); + let b = i64x2::from([2, 3]); + let c = i64x2::from([4, 5]); + let e = i64x2::from([6, 7]); + let r: i64x2 = unsafe { transmute(veor3q_s64(transmute(a), transmute(b), transmute(c))) }; + assert_eq!(r, e); +} + +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "sha3")] +unsafe fn test_veor3q_u8() { + // AArch64 llvm intrinsic: llvm.aarch64.crypto.eor3u.v16i8 + let a = u8x16::from([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); + let b = u8x16::from([16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]); + let c = u8x16::from([32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47]); + let e = u8x16::from([48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63]); + let r: u8x16 = unsafe { transmute(veor3q_u8(transmute(a), transmute(b), transmute(c))) }; + assert_eq!(r, e); +} + +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "sha3")] +unsafe fn test_veor3q_u16() { + // AArch64 llvm intrinsic: llvm.aarch64.crypto.eor3u.v8i16 + let a = u16x8::from([0, 1, 2, 3, 4, 5, 6, 7]); + let b = u16x8::from([8, 9, 10, 11, 12, 13, 14, 15]); + let c = u16x8::from([16, 17, 18, 19, 20, 21, 22, 23]); + let e = u16x8::from([24, 25, 26, 27, 28, 29, 30, 31]); + let r: u16x8 = unsafe { transmute(veor3q_u16(transmute(a), transmute(b), transmute(c))) }; + assert_eq!(r, e); +} + +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "sha3")] +unsafe fn test_veor3q_u32() { + // AArch64 llvm intrinsic: llvm.aarch64.crypto.eor3u.v4i32 + let a = u32x4::from([0, 1, 2, 3]); + let b = u32x4::from([4, 5, 6, 7]); + let c = u32x4::from([8, 9, 10, 11]); + let e = u32x4::from([12, 13, 14, 15]); + let r: u32x4 = unsafe { transmute(veor3q_u32(transmute(a), transmute(b), transmute(c))) }; + assert_eq!(r, e); +} + +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "sha3")] +unsafe fn test_veor3q_u64() { + // AArch64 llvm intrinsic: llvm.aarch64.crypto.eor3u.v2i64 + let a = u64x2::from([0, 1]); + let b = u64x2::from([2, 3]); + let c = u64x2::from([4, 5]); + let e = u64x2::from([6, 7]); + let r: u64x2 = unsafe { transmute(veor3q_u64(transmute(a), transmute(b), transmute(c))) }; + assert_eq!(r, e); +} + +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "sha3")] +unsafe fn test_vbcaxq_s8() { + // AArch64 llvm intrinsic: llvm.aarch64.crypto.bcaxs.v16i8 + let a = i8x16::from([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); + let b = i8x16::from([16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]); + let c = i8x16::from([32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47]); + let e = i8x16::from([16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]); + let r: i8x16 = unsafe { transmute(vbcaxq_s8(transmute(a), transmute(b), transmute(c))) }; + assert_eq!(r, e); +} + +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "sha3")] +unsafe fn test_vbcaxq_s16() { + // AArch64 llvm intrinsic: llvm.aarch64.crypto.bcaxs.v8i16 + let a = i16x8::from([0, 1, 2, 3, 4, 5, 6, 7]); + let b = i16x8::from([8, 9, 10, 11, 12, 13, 14, 15]); + let c = i16x8::from([16, 17, 18, 19, 20, 21, 22, 23]); + let e = i16x8::from([8, 9, 10, 11, 12, 13, 14, 15]); + let r: i16x8 = unsafe { transmute(vbcaxq_s16(transmute(a), transmute(b), transmute(c))) }; + assert_eq!(r, e); +} + +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "sha3")] +unsafe fn test_vbcaxq_s32() { + // AArch64 llvm intrinsic: llvm.aarch64.crypto.bcaxs.v4i32 + let a = i32x4::from([0, 1, 2, 3]); + let b = i32x4::from([4, 5, 6, 7]); + let c = i32x4::from([8, 9, 10, 11]); + let e = i32x4::from([4, 5, 6, 7]); + let r: i32x4 = unsafe { transmute(vbcaxq_s32(transmute(a), transmute(b), transmute(c))) }; + assert_eq!(r, e); +} + +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "sha3")] +unsafe fn test_vbcaxq_s64() { + // AArch64 llvm intrinsic: llvm.aarch64.crypto.bcaxs.v2i64 + let a = i64x2::from([0, 1]); + let b = i64x2::from([2, 3]); + let c = i64x2::from([4, 5]); + let e = i64x2::from([2, 3]); + let r: i64x2 = unsafe { transmute(vbcaxq_s64(transmute(a), transmute(b), transmute(c))) }; + assert_eq!(r, e); +} + +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "sha3")] +unsafe fn test_vbcaxq_u8() { + // AArch64 llvm intrinsic: llvm.aarch64.crypto.bcaxu.v16i8 + let a = u8x16::from([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); + let b = u8x16::from([16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]); + let c = u8x16::from([32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47]); + let e = u8x16::from([16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]); + let r: u8x16 = unsafe { transmute(vbcaxq_u8(transmute(a), transmute(b), transmute(c))) }; + assert_eq!(r, e); +} + +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "sha3")] +unsafe fn test_vbcaxq_u16() { + // AArch64 llvm intrinsic: llvm.aarch64.crypto.bcaxu.v8i16 + let a = u16x8::from([0, 1, 2, 3, 4, 5, 6, 7]); + let b = u16x8::from([8, 9, 10, 11, 12, 13, 14, 15]); + let c = u16x8::from([16, 17, 18, 19, 20, 21, 22, 23]); + let e = u16x8::from([8, 9, 10, 11, 12, 13, 14, 15]); + let r: u16x8 = unsafe { transmute(vbcaxq_u16(transmute(a), transmute(b), transmute(c))) }; + assert_eq!(r, e); +} + +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "sha3")] +unsafe fn test_vbcaxq_u32() { + // AArch64 llvm intrinsic: llvm.aarch64.crypto.bcaxu.v4i32 + let a = u32x4::from([0, 1, 2, 3]); + let b = u32x4::from([4, 5, 6, 7]); + let c = u32x4::from([8, 9, 10, 11]); + let e = u32x4::from([4, 5, 6, 7]); + let r: u32x4 = unsafe { transmute(vbcaxq_u32(transmute(a), transmute(b), transmute(c))) }; + assert_eq!(r, e); +} + +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "sha3")] +unsafe fn test_vbcaxq_u64() { + // AArch64 llvm intrinsic: llvm.aarch64.crypto.bcaxu.v2i64 + let a = u64x2::from([0, 1]); + let b = u64x2::from([2, 3]); + let c = u64x2::from([4, 5]); + let e = u64x2::from([2, 3]); + let r: u64x2 = unsafe { transmute(vbcaxq_u64(transmute(a), transmute(b), transmute(c))) }; + assert_eq!(r, e); +} + +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "sha3")] +unsafe fn test_vrax1q_u64() { + // AArch64 llvm intrinsic: llvm.aarch64.crypto.rax1 + let a = u64x2::from([0, 1]); + let b = u64x2::from([2, 3]); + let e = u64x2::from([4, 7]); + let r: u64x2 = unsafe { transmute(vrax1q_u64(transmute(a), transmute(b))) }; + assert_eq!(r, e); +} + +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "sha3")] +unsafe fn test_vxarq_u64() { + // AArch64 llvm intrinsic: llvm.aarch64.crypto.xar + let a = u64x2::from([0, 1]); + let b = u64x2::from([2, 3]); + let e = u64x2::from([4, 4]); + let r: u64x2 = unsafe { transmute(vxarq_u64::<63>(transmute(a), transmute(b))) }; + assert_eq!(r, e); +} + #[cfg(target_arch = "aarch64")] #[target_feature(enable = "aes")] fn test_vmull_p64() { @@ -698,6 +912,27 @@ fn main() { test_vsha512h2q_u64(); test_vsha512su0q_u64(); test_vsha512su1q_u64(); + + test_veor3q_s8(); + test_veor3q_s16(); + test_veor3q_s32(); + test_veor3q_s64(); + test_veor3q_u8(); + test_veor3q_u16(); + test_veor3q_u32(); + test_veor3q_u64(); + + test_vbcaxq_s8(); + test_vbcaxq_s16(); + test_vbcaxq_s32(); + test_vbcaxq_s64(); + test_vbcaxq_u8(); + test_vbcaxq_u16(); + test_vbcaxq_u32(); + test_vbcaxq_u64(); + + test_vrax1q_u64(); + test_vxarq_u64(); } test_vmull_p64(); diff --git a/src/intrinsics/llvm_aarch64.rs b/src/intrinsics/llvm_aarch64.rs index c322859fec..147bfafba4 100644 --- a/src/intrinsics/llvm_aarch64.rs +++ b/src/intrinsics/llvm_aarch64.rs @@ -977,6 +977,88 @@ pub(super) fn codegen_aarch64_llvm_intrinsic_call<'tcx>( ); } + "llvm.aarch64.crypto.eor3s.v2i64" + | "llvm.aarch64.crypto.eor3s.v4i32" + | "llvm.aarch64.crypto.eor3s.v8i16" + | "llvm.aarch64.crypto.eor3s.v16i8" + | "llvm.aarch64.crypto.eor3u.v2i64" + | "llvm.aarch64.crypto.eor3u.v4i32" + | "llvm.aarch64.crypto.eor3u.v8i16" + | "llvm.aarch64.crypto.eor3u.v16i8" => { + // https://developer.arm.com/documentation/ddi0602/2026-03/SIMD-FP-Instructions/EOR3--Three-way-exclusive-OR- + intrinsic_args!(fx, args => (a, b, c); intrinsic); + + simd_trio_for_each_lane( + fx, + a, + b, + c, + ret, + &|fx, _lane_ty, _res_lane_ty, a_lane, b_lane, c_lane| { + let xor = fx.bcx.ins().bxor(a_lane, b_lane); + fx.bcx.ins().bxor(xor, c_lane) + }, + ); + } + + "llvm.aarch64.crypto.bcaxs.v2i64" + | "llvm.aarch64.crypto.bcaxs.v4i32" + | "llvm.aarch64.crypto.bcaxs.v8i16" + | "llvm.aarch64.crypto.bcaxs.v16i8" + | "llvm.aarch64.crypto.bcaxu.v2i64" + | "llvm.aarch64.crypto.bcaxu.v4i32" + | "llvm.aarch64.crypto.bcaxu.v8i16" + | "llvm.aarch64.crypto.bcaxu.v16i8" => { + // https://developer.arm.com/documentation/ddi0602/2026-03/SIMD-FP-Instructions/BCAX--Bit-clear-and-exclusive-OR- + intrinsic_args!(fx, args => (a, b, c); intrinsic); + + simd_trio_for_each_lane( + fx, + a, + b, + c, + ret, + &|fx, _lane_ty, _res_lane_ty, a_lane, b_lane, c_lane| { + let band_not = fx.bcx.ins().band_not(b_lane, c_lane); + fx.bcx.ins().bxor(a_lane, band_not) + }, + ); + } + + "llvm.aarch64.crypto.rax1" => { + // https://developer.arm.com/documentation/ddi0602/2026-03/SIMD-FP-Instructions/RAX1--Rotate-and-exclusive-OR- + intrinsic_args!(fx, args => (a, b); intrinsic); + + simd_pair_for_each_lane( + fx, + a, + b, + ret, + &|fx, _lane_ty, _res_lane_ty, a_lane, b_lane| { + let rot = fx.bcx.ins().rotl_imm(b_lane, 1); + fx.bcx.ins().bxor(a_lane, rot) + }, + ); + } + + "llvm.aarch64.crypto.xar" => { + // https://developer.arm.com/documentation/ddi0602/2026-03/SIMD-FP-Instructions/XAR--Exclusive-OR-and-rotate- + intrinsic_args!(fx, args => (a, b, c); intrinsic); + + let c = c.load_scalar(fx); + + simd_pair_for_each_lane( + fx, + a, + b, + ret, + &|fx, _lane_ty, _res_lane_ty, a_lane, b_lane| { + let xor = fx.bcx.ins().bxor(a_lane, b_lane); + fx.bcx.ins().rotr(xor, c) + }, + ); + } + "llvm.aarch64.neon.pmull64" => { intrinsic_args!(fx, args => (a, b); intrinsic);