Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
235 changes: 235 additions & 0 deletions example/neon.rs
Original file line number Diff line number Diff line change
Expand Up @@ -470,6 +470,220 @@ unsafe fn test_vsha512su1q_u64() {
assert_eq!(r, e);
}

#[cfg(target_arch = "aarch64")]
#[target_feature(enable = "sha3")]
unsafe fn test_veor3q_s8() {
// AArch64 llvm intrinsic: llvm.aarch64.crypto.eor3s.v16i8
let a = i8x16::from([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
let b = i8x16::from([16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]);
let c = i8x16::from([32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47]);
let e = i8x16::from([48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63]);
let r: i8x16 = unsafe { transmute(veor3q_s8(transmute(a), transmute(b), transmute(c))) };
assert_eq!(r, e);
}

#[cfg(target_arch = "aarch64")]
#[target_feature(enable = "sha3")]
unsafe fn test_veor3q_s16() {
// AArch64 llvm intrinsic: llvm.aarch64.crypto.eor3s.v8i16
let a = i16x8::from([0, 1, 2, 3, 4, 5, 6, 7]);
let b = i16x8::from([8, 9, 10, 11, 12, 13, 14, 15]);
let c = i16x8::from([16, 17, 18, 19, 20, 21, 22, 23]);
let e = i16x8::from([24, 25, 26, 27, 28, 29, 30, 31]);
let r: i16x8 = unsafe { transmute(veor3q_s16(transmute(a), transmute(b), transmute(c))) };
assert_eq!(r, e);
}

#[cfg(target_arch = "aarch64")]
#[target_feature(enable = "sha3")]
unsafe fn test_veor3q_s32() {
// AArch64 llvm intrinsic: llvm.aarch64.crypto.eor3s.v4i32
let a = i32x4::from([0, 1, 2, 3]);
let b = i32x4::from([4, 5, 6, 7]);
let c = i32x4::from([8, 9, 10, 11]);
let e = i32x4::from([12, 13, 14, 15]);
let r: i32x4 = unsafe { transmute(veor3q_s32(transmute(a), transmute(b), transmute(c))) };
assert_eq!(r, e);
}

#[cfg(target_arch = "aarch64")]
#[target_feature(enable = "sha3")]
unsafe fn test_veor3q_s64() {
// AArch64 llvm intrinsic: llvm.aarch64.crypto.eor3s.v2i64
let a = i64x2::from([0, 1]);
let b = i64x2::from([2, 3]);
let c = i64x2::from([4, 5]);
let e = i64x2::from([6, 7]);
let r: i64x2 = unsafe { transmute(veor3q_s64(transmute(a), transmute(b), transmute(c))) };
assert_eq!(r, e);
}

#[cfg(target_arch = "aarch64")]
#[target_feature(enable = "sha3")]
unsafe fn test_veor3q_u8() {
// AArch64 llvm intrinsic: llvm.aarch64.crypto.eor3u.v16i8
let a = u8x16::from([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
let b = u8x16::from([16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]);
let c = u8x16::from([32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47]);
let e = u8x16::from([48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63]);
let r: u8x16 = unsafe { transmute(veor3q_u8(transmute(a), transmute(b), transmute(c))) };
assert_eq!(r, e);
}

#[cfg(target_arch = "aarch64")]
#[target_feature(enable = "sha3")]
unsafe fn test_veor3q_u16() {
// AArch64 llvm intrinsic: llvm.aarch64.crypto.eor3u.v8i16
let a = u16x8::from([0, 1, 2, 3, 4, 5, 6, 7]);
let b = u16x8::from([8, 9, 10, 11, 12, 13, 14, 15]);
let c = u16x8::from([16, 17, 18, 19, 20, 21, 22, 23]);
let e = u16x8::from([24, 25, 26, 27, 28, 29, 30, 31]);
let r: u16x8 = unsafe { transmute(veor3q_u16(transmute(a), transmute(b), transmute(c))) };
assert_eq!(r, e);
}

#[cfg(target_arch = "aarch64")]
#[target_feature(enable = "sha3")]
unsafe fn test_veor3q_u32() {
// AArch64 llvm intrinsic: llvm.aarch64.crypto.eor3u.v4i32
let a = u32x4::from([0, 1, 2, 3]);
let b = u32x4::from([4, 5, 6, 7]);
let c = u32x4::from([8, 9, 10, 11]);
let e = u32x4::from([12, 13, 14, 15]);
let r: u32x4 = unsafe { transmute(veor3q_u32(transmute(a), transmute(b), transmute(c))) };
assert_eq!(r, e);
}

#[cfg(target_arch = "aarch64")]
#[target_feature(enable = "sha3")]
unsafe fn test_veor3q_u64() {
// AArch64 llvm intrinsic: llvm.aarch64.crypto.eor3u.v2i64
let a = u64x2::from([0, 1]);
let b = u64x2::from([2, 3]);
let c = u64x2::from([4, 5]);
let e = u64x2::from([6, 7]);
let r: u64x2 = unsafe { transmute(veor3q_u64(transmute(a), transmute(b), transmute(c))) };
assert_eq!(r, e);
}

#[cfg(target_arch = "aarch64")]
#[target_feature(enable = "sha3")]
unsafe fn test_vbcaxq_s8() {
// AArch64 llvm intrinsic: llvm.aarch64.crypto.bcaxs.v16i8
let a = i8x16::from([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
let b = i8x16::from([16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]);
let c = i8x16::from([32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47]);
let e = i8x16::from([16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]);
let r: i8x16 = unsafe { transmute(vbcaxq_s8(transmute(a), transmute(b), transmute(c))) };
assert_eq!(r, e);
}

#[cfg(target_arch = "aarch64")]
#[target_feature(enable = "sha3")]
unsafe fn test_vbcaxq_s16() {
// AArch64 llvm intrinsic: llvm.aarch64.crypto.bcaxs.v8i16
let a = i16x8::from([0, 1, 2, 3, 4, 5, 6, 7]);
let b = i16x8::from([8, 9, 10, 11, 12, 13, 14, 15]);
let c = i16x8::from([16, 17, 18, 19, 20, 21, 22, 23]);
let e = i16x8::from([8, 9, 10, 11, 12, 13, 14, 15]);
let r: i16x8 = unsafe { transmute(vbcaxq_s16(transmute(a), transmute(b), transmute(c))) };
assert_eq!(r, e);
}

#[cfg(target_arch = "aarch64")]
#[target_feature(enable = "sha3")]
unsafe fn test_vbcaxq_s32() {
// AArch64 llvm intrinsic: llvm.aarch64.crypto.bcaxs.v4i32
let a = i32x4::from([0, 1, 2, 3]);
let b = i32x4::from([4, 5, 6, 7]);
let c = i32x4::from([8, 9, 10, 11]);
let e = i32x4::from([4, 5, 6, 7]);
let r: i32x4 = unsafe { transmute(vbcaxq_s32(transmute(a), transmute(b), transmute(c))) };
assert_eq!(r, e);
}

#[cfg(target_arch = "aarch64")]
#[target_feature(enable = "sha3")]
unsafe fn test_vbcaxq_s64() {
// AArch64 llvm intrinsic: llvm.aarch64.crypto.bcaxs.v2i64
let a = i64x2::from([0, 1]);
let b = i64x2::from([2, 3]);
let c = i64x2::from([4, 5]);
let e = i64x2::from([2, 3]);
let r: i64x2 = unsafe { transmute(vbcaxq_s64(transmute(a), transmute(b), transmute(c))) };
assert_eq!(r, e);
}

#[cfg(target_arch = "aarch64")]
#[target_feature(enable = "sha3")]
unsafe fn test_vbcaxq_u8() {
// AArch64 llvm intrinsic: llvm.aarch64.crypto.bcaxu.v16i8
let a = u8x16::from([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
let b = u8x16::from([16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]);
let c = u8x16::from([32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47]);
let e = u8x16::from([16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]);
let r: u8x16 = unsafe { transmute(vbcaxq_u8(transmute(a), transmute(b), transmute(c))) };
assert_eq!(r, e);
}

#[cfg(target_arch = "aarch64")]
#[target_feature(enable = "sha3")]
unsafe fn test_vbcaxq_u16() {
// AArch64 llvm intrinsic: llvm.aarch64.crypto.bcaxu.v8i16
let a = u16x8::from([0, 1, 2, 3, 4, 5, 6, 7]);
let b = u16x8::from([8, 9, 10, 11, 12, 13, 14, 15]);
let c = u16x8::from([16, 17, 18, 19, 20, 21, 22, 23]);
let e = u16x8::from([8, 9, 10, 11, 12, 13, 14, 15]);
let r: u16x8 = unsafe { transmute(vbcaxq_u16(transmute(a), transmute(b), transmute(c))) };
assert_eq!(r, e);
}

#[cfg(target_arch = "aarch64")]
#[target_feature(enable = "sha3")]
unsafe fn test_vbcaxq_u32() {
// AArch64 llvm intrinsic: llvm.aarch64.crypto.bcaxu.v4i32
let a = u32x4::from([0, 1, 2, 3]);
let b = u32x4::from([4, 5, 6, 7]);
let c = u32x4::from([8, 9, 10, 11]);
let e = u32x4::from([4, 5, 6, 7]);
let r: u32x4 = unsafe { transmute(vbcaxq_u32(transmute(a), transmute(b), transmute(c))) };
assert_eq!(r, e);
}

#[cfg(target_arch = "aarch64")]
#[target_feature(enable = "sha3")]
unsafe fn test_vbcaxq_u64() {
// AArch64 llvm intrinsic: llvm.aarch64.crypto.bcaxu.v2i64
let a = u64x2::from([0, 1]);
let b = u64x2::from([2, 3]);
let c = u64x2::from([4, 5]);
let e = u64x2::from([2, 3]);
let r: u64x2 = unsafe { transmute(vbcaxq_u64(transmute(a), transmute(b), transmute(c))) };
assert_eq!(r, e);
}

#[cfg(target_arch = "aarch64")]
#[target_feature(enable = "sha3")]
unsafe fn test_vrax1q_u64() {
// AArch64 llvm intrinsic: llvm.aarch64.crypto.rax1
let a = u64x2::from([0, 1]);
let b = u64x2::from([2, 3]);
let e = u64x2::from([4, 7]);
let r: u64x2 = unsafe { transmute(vrax1q_u64(transmute(a), transmute(b))) };
assert_eq!(r, e);
}

#[cfg(target_arch = "aarch64")]
#[target_feature(enable = "sha3")]
unsafe fn test_vxarq_u64() {
// AArch64 llvm intrinsic: llvm.aarch64.crypto.xar
let a = u64x2::from([0, 1]);
let b = u64x2::from([2, 3]);
let e = u64x2::from([4, 4]);
let r: u64x2 = unsafe { transmute(vxarq_u64::<63>(transmute(a), transmute(b))) };
assert_eq!(r, e);
}

#[cfg(target_arch = "aarch64")]
#[target_feature(enable = "aes")]
fn test_vmull_p64() {
Expand Down Expand Up @@ -698,6 +912,27 @@ fn main() {
test_vsha512h2q_u64();
test_vsha512su0q_u64();
test_vsha512su1q_u64();

test_veor3q_s8();
test_veor3q_s16();
test_veor3q_s32();
test_veor3q_s64();
test_veor3q_u8();
test_veor3q_u16();
test_veor3q_u32();
test_veor3q_u64();

test_vbcaxq_s8();
test_vbcaxq_s16();
test_vbcaxq_s32();
test_vbcaxq_s64();
test_vbcaxq_u8();
test_vbcaxq_u16();
test_vbcaxq_u32();
test_vbcaxq_u64();

test_vrax1q_u64();
test_vxarq_u64();
}

test_vmull_p64();
Expand Down
82 changes: 82 additions & 0 deletions src/intrinsics/llvm_aarch64.rs
Original file line number Diff line number Diff line change
Expand Up @@ -977,6 +977,88 @@ pub(super) fn codegen_aarch64_llvm_intrinsic_call<'tcx>(
);
}

"llvm.aarch64.crypto.eor3s.v2i64"
| "llvm.aarch64.crypto.eor3s.v4i32"
| "llvm.aarch64.crypto.eor3s.v8i16"
| "llvm.aarch64.crypto.eor3s.v16i8"
| "llvm.aarch64.crypto.eor3u.v2i64"
| "llvm.aarch64.crypto.eor3u.v4i32"
| "llvm.aarch64.crypto.eor3u.v8i16"
| "llvm.aarch64.crypto.eor3u.v16i8" => {
// https://developer.arm.com/documentation/ddi0602/2026-03/SIMD-FP-Instructions/EOR3--Three-way-exclusive-OR-
intrinsic_args!(fx, args => (a, b, c); intrinsic);

simd_trio_for_each_lane(
fx,
a,
b,
c,
ret,
&|fx, _lane_ty, _res_lane_ty, a_lane, b_lane, c_lane| {
let xor = fx.bcx.ins().bxor(a_lane, b_lane);
fx.bcx.ins().bxor(xor, c_lane)
},
);
}

"llvm.aarch64.crypto.bcaxs.v2i64"
| "llvm.aarch64.crypto.bcaxs.v4i32"
| "llvm.aarch64.crypto.bcaxs.v8i16"
| "llvm.aarch64.crypto.bcaxs.v16i8"
| "llvm.aarch64.crypto.bcaxu.v2i64"
| "llvm.aarch64.crypto.bcaxu.v4i32"
| "llvm.aarch64.crypto.bcaxu.v8i16"
| "llvm.aarch64.crypto.bcaxu.v16i8" => {
// https://developer.arm.com/documentation/ddi0602/2026-03/SIMD-FP-Instructions/BCAX--Bit-clear-and-exclusive-OR-
intrinsic_args!(fx, args => (a, b, c); intrinsic);

simd_trio_for_each_lane(
fx,
a,
b,
c,
ret,
&|fx, _lane_ty, _res_lane_ty, a_lane, b_lane, c_lane| {
let band_not = fx.bcx.ins().band_not(b_lane, c_lane);
fx.bcx.ins().bxor(a_lane, band_not)
},
);
}

"llvm.aarch64.crypto.rax1" => {
// https://developer.arm.com/documentation/ddi0602/2026-03/SIMD-FP-Instructions/RAX1--Rotate-and-exclusive-OR-
intrinsic_args!(fx, args => (a, b); intrinsic);

simd_pair_for_each_lane(
fx,
a,
b,
ret,
&|fx, _lane_ty, _res_lane_ty, a_lane, b_lane| {
let rot = fx.bcx.ins().rotl_imm(b_lane, 1);
fx.bcx.ins().bxor(a_lane, rot)
},
);
}

"llvm.aarch64.crypto.xar" => {
// https://developer.arm.com/documentation/ddi0602/2026-03/SIMD-FP-Instructions/XAR--Exclusive-OR-and-rotate-
intrinsic_args!(fx, args => (a, b, c); intrinsic);

let c = c.load_scalar(fx);

simd_pair_for_each_lane(
fx,
a,
b,
ret,
&|fx, _lane_ty, _res_lane_ty, a_lane, b_lane| {
let xor = fx.bcx.ins().bxor(a_lane, b_lane);
fx.bcx.ins().rotr(xor, c)
},
);
}

"llvm.aarch64.neon.pmull64" => {
intrinsic_args!(fx, args => (a, b); intrinsic);

Expand Down
Loading