From 5b1fda15f148337f62d5c3b68b369f50a9184f91 Mon Sep 17 00:00:00 2001 From: reucru01 Date: Fri, 24 Oct 2025 10:15:01 +0100 Subject: [PATCH 1/5] Fixes generator, neon intrinics now build in debug This fixes build issues associated with failing LLVM const param assertions --- .../src/arm_shared/neon/generated.rs | 1128 ++++++----------- .../spec/neon/arm_shared.spec.yml | 335 ++--- crates/stdarch-gen-arm/src/expression.rs | 17 + crates/stdarch-gen-arm/src/intrinsic.rs | 3 +- 4 files changed, 565 insertions(+), 918 deletions(-) diff --git a/crates/core_arch/src/arm_shared/neon/generated.rs b/crates/core_arch/src/arm_shared/neon/generated.rs index b5ba792b18..c1bd70175c 100644 --- a/crates/core_arch/src/arm_shared/neon/generated.rs +++ b/crates/core_arch/src/arm_shared/neon/generated.rs @@ -15137,268 +15137,104 @@ pub unsafe fn vld1q_f16_x4(a: *const f16) -> float16x8x4_t { #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))] pub unsafe fn vld1_f32(ptr: *const f32) -> float32x2_t { - transmute(vld1_v2f32( - ptr as *const i8, - crate::mem::align_of::() as i32, - )) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] -#[inline] -#[cfg(target_endian = "big")] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))] -pub unsafe fn vld1_f32(ptr: *const f32) -> float32x2_t { - let ret_val: float32x2_t = transmute(vld1_v2f32( - ptr as *const i8, - crate::mem::align_of::() as i32, - )); - simd_shuffle!(ret_val, ret_val, [1, 0]) + const ALIGN: i32 = crate::mem::align_of::() as i32; + transmute(vld1_v2f32::(ptr as *const i8)) } #[doc = "Load multiple single-element structures to one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))] pub unsafe fn vld1q_f32(ptr: *const f32) -> float32x4_t { - transmute(vld1q_v4f32( - ptr as *const i8, - crate::mem::align_of::() as i32, - )) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] -#[inline] -#[cfg(target_endian = "big")] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))] -pub unsafe fn vld1q_f32(ptr: *const f32) -> float32x4_t { - let ret_val: float32x4_t = transmute(vld1q_v4f32( - ptr as *const i8, - crate::mem::align_of::() as i32, - )); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] -#[inline] -#[cfg(target_endian = "little")] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] -pub unsafe fn vld1_u8(ptr: *const u8) -> uint8x8_t { - transmute(vld1_v8i8( - ptr as *const i8, - crate::mem::align_of::() as i32, - )) + const ALIGN: i32 = crate::mem::align_of::() as i32; + transmute(vld1q_v4f32::(ptr as *const i8)) } #[doc = "Load multiple single-element structures to one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] pub unsafe fn vld1_u8(ptr: *const u8) -> uint8x8_t { - let ret_val: uint8x8_t = transmute(vld1_v8i8( - ptr as *const i8, - crate::mem::align_of::() as i32, - )); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + const ALIGN: i32 = crate::mem::align_of::() as i32; + transmute(vld1_v8i8::(ptr as *const i8)) } #[doc = "Load multiple single-element structures to one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] pub unsafe fn vld1q_u8(ptr: *const u8) -> uint8x16_t { - transmute(vld1q_v16i8( - ptr as *const i8, - crate::mem::align_of::() as i32, - )) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] -#[inline] -#[cfg(target_endian = "big")] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] -pub unsafe fn vld1q_u8(ptr: *const u8) -> uint8x16_t { - let ret_val: uint8x16_t = transmute(vld1q_v16i8( - ptr as *const i8, - crate::mem::align_of::() as i32, - )); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] -#[inline] -#[cfg(target_endian = "little")] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] -pub unsafe fn vld1_u16(ptr: *const u16) -> uint16x4_t { - transmute(vld1_v4i16( - ptr as *const i8, - crate::mem::align_of::() as i32, - )) + const ALIGN: i32 = crate::mem::align_of::() as i32; + transmute(vld1q_v16i8::(ptr as *const i8)) } #[doc = "Load multiple single-element structures to one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] pub unsafe fn vld1_u16(ptr: *const u16) -> uint16x4_t { - let ret_val: uint16x4_t = transmute(vld1_v4i16( - ptr as *const i8, - crate::mem::align_of::() as i32, - )); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + const ALIGN: i32 = crate::mem::align_of::() as i32; + transmute(vld1_v4i16::(ptr as *const i8)) } #[doc = "Load multiple single-element structures to one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] pub unsafe fn vld1q_u16(ptr: *const u16) -> uint16x8_t { - transmute(vld1q_v8i16( - ptr as *const i8, - crate::mem::align_of::() as i32, - )) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] -#[inline] -#[cfg(target_endian = "big")] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] -pub unsafe fn vld1q_u16(ptr: *const u16) -> uint16x8_t { - let ret_val: uint16x8_t = transmute(vld1q_v8i16( - ptr as *const i8, - crate::mem::align_of::() as i32, - )); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] -#[inline] -#[cfg(target_endian = "little")] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))] -pub unsafe fn vld1_u32(ptr: *const u32) -> uint32x2_t { - transmute(vld1_v2i32( - ptr as *const i8, - crate::mem::align_of::() as i32, - )) + const ALIGN: i32 = crate::mem::align_of::() as i32; + transmute(vld1q_v8i16::(ptr as *const i8)) } #[doc = "Load multiple single-element structures to one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))] pub unsafe fn vld1_u32(ptr: *const u32) -> uint32x2_t { - let ret_val: uint32x2_t = transmute(vld1_v2i32( - ptr as *const i8, - crate::mem::align_of::() as i32, - )); - simd_shuffle!(ret_val, ret_val, [1, 0]) + const ALIGN: i32 = crate::mem::align_of::() as i32; + transmute(vld1_v2i32::(ptr as *const i8)) } #[doc = "Load multiple single-element structures to one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))] pub unsafe fn vld1q_u32(ptr: *const u32) -> uint32x4_t { - transmute(vld1q_v4i32( - ptr as *const i8, - crate::mem::align_of::() as i32, - )) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] -#[inline] -#[cfg(target_endian = "big")] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))] -pub unsafe fn vld1q_u32(ptr: *const u32) -> uint32x4_t { - let ret_val: uint32x4_t = transmute(vld1q_v4i32( - ptr as *const i8, - crate::mem::align_of::() as i32, - )); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + const ALIGN: i32 = crate::mem::align_of::() as i32; + transmute(vld1q_v4i32::(ptr as *const i8)) } #[doc = "Load multiple single-element structures to one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u64)"] @@ -15410,212 +15246,86 @@ pub unsafe fn vld1q_u32(ptr: *const u32) -> uint32x4_t { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))] pub unsafe fn vld1_u64(ptr: *const u64) -> uint64x1_t { - transmute(vld1_v1i64( - ptr as *const i8, - crate::mem::align_of::() as i32, - )) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] -#[inline] -#[cfg(target_endian = "little")] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.64"))] -pub unsafe fn vld1q_u64(ptr: *const u64) -> uint64x2_t { - transmute(vld1q_v2i64( - ptr as *const i8, - crate::mem::align_of::() as i32, - )) + const ALIGN: i32 = crate::mem::align_of::() as i32; + transmute(vld1_v1i64::(ptr as *const i8)) } #[doc = "Load multiple single-element structures to one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.64"))] pub unsafe fn vld1q_u64(ptr: *const u64) -> uint64x2_t { - let ret_val: uint64x2_t = transmute(vld1q_v2i64( - ptr as *const i8, - crate::mem::align_of::() as i32, - )); - simd_shuffle!(ret_val, ret_val, [1, 0]) + const ALIGN: i32 = crate::mem::align_of::() as i32; + transmute(vld1q_v2i64::(ptr as *const i8)) } #[doc = "Load multiple single-element structures to one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] pub unsafe fn vld1_p8(ptr: *const p8) -> poly8x8_t { - transmute(vld1_v8i8( - ptr as *const i8, - crate::mem::align_of::() as i32, - )) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] -#[inline] -#[cfg(target_endian = "big")] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] -pub unsafe fn vld1_p8(ptr: *const p8) -> poly8x8_t { - let ret_val: poly8x8_t = transmute(vld1_v8i8( - ptr as *const i8, - crate::mem::align_of::() as i32, - )); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] -#[inline] -#[cfg(target_endian = "little")] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] -pub unsafe fn vld1q_p8(ptr: *const p8) -> poly8x16_t { - transmute(vld1q_v16i8( - ptr as *const i8, - crate::mem::align_of::() as i32, - )) + const ALIGN: i32 = crate::mem::align_of::() as i32; + transmute(vld1_v8i8::(ptr as *const i8)) } #[doc = "Load multiple single-element structures to one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] pub unsafe fn vld1q_p8(ptr: *const p8) -> poly8x16_t { - let ret_val: poly8x16_t = transmute(vld1q_v16i8( - ptr as *const i8, - crate::mem::align_of::() as i32, - )); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) + const ALIGN: i32 = crate::mem::align_of::() as i32; + transmute(vld1q_v16i8::(ptr as *const i8)) } #[doc = "Load multiple single-element structures to one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] pub unsafe fn vld1_p16(ptr: *const p16) -> poly16x4_t { - transmute(vld1_v4i16( - ptr as *const i8, - crate::mem::align_of::() as i32, - )) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] -#[inline] -#[cfg(target_endian = "big")] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] -pub unsafe fn vld1_p16(ptr: *const p16) -> poly16x4_t { - let ret_val: poly16x4_t = transmute(vld1_v4i16( - ptr as *const i8, - crate::mem::align_of::() as i32, - )); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] -#[inline] -#[cfg(target_endian = "little")] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] -pub unsafe fn vld1q_p16(ptr: *const p16) -> poly16x8_t { - transmute(vld1q_v8i16( - ptr as *const i8, - crate::mem::align_of::() as i32, - )) + const ALIGN: i32 = crate::mem::align_of::() as i32; + transmute(vld1_v4i16::(ptr as *const i8)) } #[doc = "Load multiple single-element structures to one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] pub unsafe fn vld1q_p16(ptr: *const p16) -> poly16x8_t { - let ret_val: poly16x8_t = transmute(vld1q_v8i16( - ptr as *const i8, - crate::mem::align_of::() as i32, - )); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + const ALIGN: i32 = crate::mem::align_of::() as i32; + transmute(vld1q_v8i16::(ptr as *const i8)) } #[doc = "Load multiple single-element structures to one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,aes")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.64"))] pub unsafe fn vld1q_p64(ptr: *const p64) -> poly64x2_t { - transmute(vld1q_v2i64( - ptr as *const i8, - crate::mem::align_of::() as i32, - )) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] -#[inline] -#[cfg(target_endian = "big")] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,aes")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.64"))] -pub unsafe fn vld1q_p64(ptr: *const p64) -> poly64x2_t { - let ret_val: poly64x2_t = transmute(vld1q_v2i64( - ptr as *const i8, - crate::mem::align_of::() as i32, - )); - simd_shuffle!(ret_val, ret_val, [1, 0]) + const ALIGN: i32 = crate::mem::align_of::() as i32; + transmute(vld1q_v2i64::(ptr as *const i8)) } #[doc = "Load multiple single-element structures to one, two, three, or four registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f32_x2)"] @@ -16694,7 +16404,8 @@ pub unsafe fn vld1q_p64_x4(a: *const p64) -> poly64x2x4_t { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] pub unsafe fn vld1_s8(ptr: *const i8) -> int8x8_t { - vld1_v8i8(ptr as *const i8, crate::mem::align_of::() as i32) + const ALIGN: i32 = crate::mem::align_of::() as i32; + vld1_v8i8::(ptr as *const i8) } #[doc = "Load multiple single-element structures to one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s8)"] @@ -16706,7 +16417,8 @@ pub unsafe fn vld1_s8(ptr: *const i8) -> int8x8_t { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] pub unsafe fn vld1q_s8(ptr: *const i8) -> int8x16_t { - vld1q_v16i8(ptr as *const i8, crate::mem::align_of::() as i32) + const ALIGN: i32 = crate::mem::align_of::() as i32; + vld1q_v16i8::(ptr as *const i8) } #[doc = "Load multiple single-element structures to one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s16)"] @@ -16718,7 +16430,8 @@ pub unsafe fn vld1q_s8(ptr: *const i8) -> int8x16_t { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] pub unsafe fn vld1_s16(ptr: *const i16) -> int16x4_t { - vld1_v4i16(ptr as *const i8, crate::mem::align_of::() as i32) + const ALIGN: i32 = crate::mem::align_of::() as i32; + vld1_v4i16::(ptr as *const i8) } #[doc = "Load multiple single-element structures to one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s16)"] @@ -16730,7 +16443,8 @@ pub unsafe fn vld1_s16(ptr: *const i16) -> int16x4_t { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] pub unsafe fn vld1q_s16(ptr: *const i16) -> int16x8_t { - vld1q_v8i16(ptr as *const i8, crate::mem::align_of::() as i32) + const ALIGN: i32 = crate::mem::align_of::() as i32; + vld1q_v8i16::(ptr as *const i8) } #[doc = "Load multiple single-element structures to one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s32)"] @@ -16742,7 +16456,8 @@ pub unsafe fn vld1q_s16(ptr: *const i16) -> int16x8_t { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))] pub unsafe fn vld1_s32(ptr: *const i32) -> int32x2_t { - vld1_v2i32(ptr as *const i8, crate::mem::align_of::() as i32) + const ALIGN: i32 = crate::mem::align_of::() as i32; + vld1_v2i32::(ptr as *const i8) } #[doc = "Load multiple single-element structures to one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s32)"] @@ -16754,7 +16469,8 @@ pub unsafe fn vld1_s32(ptr: *const i32) -> int32x2_t { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))] pub unsafe fn vld1q_s32(ptr: *const i32) -> int32x4_t { - vld1q_v4i32(ptr as *const i8, crate::mem::align_of::() as i32) + const ALIGN: i32 = crate::mem::align_of::() as i32; + vld1q_v4i32::(ptr as *const i8) } #[doc = "Load multiple single-element structures to one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s64)"] @@ -16766,7 +16482,8 @@ pub unsafe fn vld1q_s32(ptr: *const i32) -> int32x4_t { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))] pub unsafe fn vld1_s64(ptr: *const i64) -> int64x1_t { - vld1_v1i64(ptr as *const i8, crate::mem::align_of::() as i32) + const ALIGN: i32 = crate::mem::align_of::() as i32; + vld1_v1i64::(ptr as *const i8) } #[doc = "Load multiple single-element structures to one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s64)"] @@ -16778,7 +16495,8 @@ pub unsafe fn vld1_s64(ptr: *const i64) -> int64x1_t { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.64"))] pub unsafe fn vld1q_s64(ptr: *const i64) -> int64x2_t { - vld1q_v2i64(ptr as *const i8, crate::mem::align_of::() as i32) + const ALIGN: i32 = crate::mem::align_of::() as i32; + vld1q_v2i64::(ptr as *const i8) } #[doc = "Load multiple single-element structures to one, two, three, or four registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s8_x2)"] @@ -19417,160 +19135,136 @@ pub unsafe fn vld1q_p16_x4(a: *const p16) -> poly16x8x4_t { ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [7, 6, 5, 4, 3, 2, 1, 0]) }; ret_val } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_v1i64)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] #[inline] +#[rustc_legacy_const_generics(1)] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -unsafe fn vld1_v1i64(a: *const i8, b: i32) -> int64x1_t { +unsafe fn vld1_v1i64(a: *const i8) -> int64x1_t { unsafe extern "unadjusted" { #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v1i64")] fn _vld1_v1i64(a: *const i8, b: i32) -> int64x1_t; } - _vld1_v1i64(a, b) + _vld1_v1i64(a, ALIGN) } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_v2f32)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] #[inline] +#[rustc_legacy_const_generics(1)] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -unsafe fn vld1_v2f32(a: *const i8, b: i32) -> float32x2_t { +unsafe fn vld1_v2f32(a: *const i8) -> float32x2_t { unsafe extern "unadjusted" { #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v2f32")] fn _vld1_v2f32(a: *const i8, b: i32) -> float32x2_t; } - _vld1_v2f32(a, b) + _vld1_v2f32(a, ALIGN) } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_v2i32)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] #[inline] +#[rustc_legacy_const_generics(1)] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -unsafe fn vld1_v2i32(a: *const i8, b: i32) -> int32x2_t { +unsafe fn vld1_v2i32(a: *const i8) -> int32x2_t { unsafe extern "unadjusted" { #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v2i32")] fn _vld1_v2i32(a: *const i8, b: i32) -> int32x2_t; } - _vld1_v2i32(a, b) + _vld1_v2i32(a, ALIGN) } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_v4i16)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] #[inline] +#[rustc_legacy_const_generics(1)] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -unsafe fn vld1_v4i16(a: *const i8, b: i32) -> int16x4_t { +unsafe fn vld1_v4i16(a: *const i8) -> int16x4_t { unsafe extern "unadjusted" { #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v4i16")] fn _vld1_v4i16(a: *const i8, b: i32) -> int16x4_t; } - _vld1_v4i16(a, b) + _vld1_v4i16(a, ALIGN) } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_v8i8)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] #[inline] +#[rustc_legacy_const_generics(1)] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -unsafe fn vld1_v8i8(a: *const i8, b: i32) -> int8x8_t { +unsafe fn vld1_v8i8(a: *const i8) -> int8x8_t { unsafe extern "unadjusted" { #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v8i8")] fn _vld1_v8i8(a: *const i8, b: i32) -> int8x8_t; } - _vld1_v8i8(a, b) + _vld1_v8i8(a, ALIGN) } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_v16i8)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] #[inline] +#[rustc_legacy_const_generics(1)] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -unsafe fn vld1q_v16i8(a: *const i8, b: i32) -> int8x16_t { +unsafe fn vld1q_v16i8(a: *const i8) -> int8x16_t { unsafe extern "unadjusted" { #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v16i8")] fn _vld1q_v16i8(a: *const i8, b: i32) -> int8x16_t; } - _vld1q_v16i8(a, b) + _vld1q_v16i8(a, ALIGN) } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_v2i64)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] #[inline] +#[rustc_legacy_const_generics(1)] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -unsafe fn vld1q_v2i64(a: *const i8, b: i32) -> int64x2_t { +unsafe fn vld1q_v2i64(a: *const i8) -> int64x2_t { unsafe extern "unadjusted" { #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v2i64")] fn _vld1q_v2i64(a: *const i8, b: i32) -> int64x2_t; } - _vld1q_v2i64(a, b) + _vld1q_v2i64(a, ALIGN) } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_v4f32)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] #[inline] +#[rustc_legacy_const_generics(1)] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -unsafe fn vld1q_v4f32(a: *const i8, b: i32) -> float32x4_t { +unsafe fn vld1q_v4f32(a: *const i8) -> float32x4_t { unsafe extern "unadjusted" { #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v4f32")] fn _vld1q_v4f32(a: *const i8, b: i32) -> float32x4_t; } - _vld1q_v4f32(a, b) + _vld1q_v4f32(a, ALIGN) } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_v4i32)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] #[inline] +#[rustc_legacy_const_generics(1)] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -unsafe fn vld1q_v4i32(a: *const i8, b: i32) -> int32x4_t { +unsafe fn vld1q_v4i32(a: *const i8) -> int32x4_t { unsafe extern "unadjusted" { #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v4i32")] fn _vld1q_v4i32(a: *const i8, b: i32) -> int32x4_t; } - _vld1q_v4i32(a, b) + _vld1q_v4i32(a, ALIGN) } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_v8i16)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] #[inline] +#[rustc_legacy_const_generics(1)] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -unsafe fn vld1q_v8i16(a: *const i8, b: i32) -> int16x8_t { +unsafe fn vld1q_v8i16(a: *const i8) -> int16x8_t { unsafe extern "unadjusted" { #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v8i16")] fn _vld1q_v8i16(a: *const i8, b: i32) -> int16x8_t; } - _vld1q_v8i16(a, b) + _vld1q_v8i16(a, ALIGN) } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_v4f16)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] #[inline] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] @@ -19585,10 +19279,6 @@ unsafe fn vld1_v4f16(a: *const i8, b: i32) -> float16x4_t { } _vld1_v4f16(a, b) } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_v8f16)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] #[inline] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] @@ -61060,117 +60750,229 @@ pub fn vsha256su1q_u32(tw0_3: uint32x4_t, w8_11: uint32x4_t, w12_15: uint32x4_t) } unsafe { _vsha256su1q_u32(tw0_3, w8_11, w12_15) } } +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[rustc_legacy_const_generics(2)] +fn vshiftlins_v16i8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v16i8")] + fn _vshiftlins_v16i8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t; + } + unsafe { _vshiftlins_v16i8(a, b, const { int8x16_t([N as i8; 16]) }) } +} +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[rustc_legacy_const_generics(2)] +fn vshiftlins_v1i64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v1i64")] + fn _vshiftlins_v1i64(a: int64x1_t, b: int64x1_t, c: int64x1_t) -> int64x1_t; + } + unsafe { _vshiftlins_v1i64(a, b, const { int64x1_t([N as i64; 1]) }) } +} +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[rustc_legacy_const_generics(2)] +fn vshiftlins_v2i32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v2i32")] + fn _vshiftlins_v2i32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t; + } + unsafe { _vshiftlins_v2i32(a, b, const { int32x2_t([N; 2]) }) } +} +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[rustc_legacy_const_generics(2)] +fn vshiftlins_v2i64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v2i64")] + fn _vshiftlins_v2i64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t; + } + unsafe { _vshiftlins_v2i64(a, b, const { int64x2_t([N as i64; 2]) }) } +} +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[rustc_legacy_const_generics(2)] +fn vshiftlins_v4i16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v4i16")] + fn _vshiftlins_v4i16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t; + } + unsafe { _vshiftlins_v4i16(a, b, const { int16x4_t([N as i16; 4]) }) } +} +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[rustc_legacy_const_generics(2)] +fn vshiftlins_v4i32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v4i32")] + fn _vshiftlins_v4i32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t; + } + unsafe { _vshiftlins_v4i32(a, b, const { int32x4_t([N; 4]) }) } +} +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[rustc_legacy_const_generics(2)] +fn vshiftlins_v8i16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v8i16")] + fn _vshiftlins_v8i16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t; + } + unsafe { _vshiftlins_v8i16(a, b, const { int16x8_t([N as i16; 8]) }) } +} +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[rustc_legacy_const_generics(2)] +fn vshiftlins_v8i8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v8i8")] + fn _vshiftlins_v8i8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t; + } + unsafe { _vshiftlins_v8i8(a, b, const { int8x8_t([N as i8; 8]) }) } +} #[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftins_v16i8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v16i8)"] #[inline] #[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -fn vshiftins_v16i8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t { +#[rustc_legacy_const_generics(2)] +fn vshiftrins_v16i8(a: int8x16_t, b: int8x16_t) -> int8x16_t { unsafe extern "unadjusted" { #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v16i8")] - fn _vshiftins_v16i8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t; + fn _vshiftrins_v16i8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t; } - unsafe { _vshiftins_v16i8(a, b, c) } + unsafe { _vshiftrins_v16i8(a, b, const { int8x16_t([-N as i8; 16]) }) } } #[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftins_v1i64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v1i64)"] #[inline] #[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -fn vshiftins_v1i64(a: int64x1_t, b: int64x1_t, c: int64x1_t) -> int64x1_t { +#[rustc_legacy_const_generics(2)] +fn vshiftrins_v1i64(a: int64x1_t, b: int64x1_t) -> int64x1_t { unsafe extern "unadjusted" { #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v1i64")] - fn _vshiftins_v1i64(a: int64x1_t, b: int64x1_t, c: int64x1_t) -> int64x1_t; + fn _vshiftrins_v1i64(a: int64x1_t, b: int64x1_t, c: int64x1_t) -> int64x1_t; } - unsafe { _vshiftins_v1i64(a, b, c) } + unsafe { _vshiftrins_v1i64(a, b, const { int64x1_t([-N as i64; 1]) }) } } #[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftins_v2i32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v2i32)"] #[inline] #[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -fn vshiftins_v2i32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t { +#[rustc_legacy_const_generics(2)] +fn vshiftrins_v2i32(a: int32x2_t, b: int32x2_t) -> int32x2_t { unsafe extern "unadjusted" { #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v2i32")] - fn _vshiftins_v2i32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t; + fn _vshiftrins_v2i32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t; } - unsafe { _vshiftins_v2i32(a, b, c) } + unsafe { _vshiftrins_v2i32(a, b, const { int32x2_t([-N; 2]) }) } } #[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftins_v2i64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v2i64)"] #[inline] #[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -fn vshiftins_v2i64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t { +#[rustc_legacy_const_generics(2)] +fn vshiftrins_v2i64(a: int64x2_t, b: int64x2_t) -> int64x2_t { unsafe extern "unadjusted" { #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v2i64")] - fn _vshiftins_v2i64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t; + fn _vshiftrins_v2i64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t; } - unsafe { _vshiftins_v2i64(a, b, c) } + unsafe { _vshiftrins_v2i64(a, b, const { int64x2_t([-N as i64; 2]) }) } } #[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftins_v4i16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v4i16)"] #[inline] #[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -fn vshiftins_v4i16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t { +#[rustc_legacy_const_generics(2)] +fn vshiftrins_v4i16(a: int16x4_t, b: int16x4_t) -> int16x4_t { unsafe extern "unadjusted" { #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v4i16")] - fn _vshiftins_v4i16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t; + fn _vshiftrins_v4i16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t; } - unsafe { _vshiftins_v4i16(a, b, c) } + unsafe { _vshiftrins_v4i16(a, b, const { int16x4_t([-N as i16; 4]) }) } } #[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftins_v4i32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v4i32)"] #[inline] #[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -fn vshiftins_v4i32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t { +#[rustc_legacy_const_generics(2)] +fn vshiftrins_v4i32(a: int32x4_t, b: int32x4_t) -> int32x4_t { unsafe extern "unadjusted" { #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v4i32")] - fn _vshiftins_v4i32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t; + fn _vshiftrins_v4i32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t; } - unsafe { _vshiftins_v4i32(a, b, c) } + unsafe { _vshiftrins_v4i32(a, b, const { int32x4_t([-N; 4]) }) } } #[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftins_v8i16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v8i16)"] #[inline] #[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -fn vshiftins_v8i16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t { +#[rustc_legacy_const_generics(2)] +fn vshiftrins_v8i16(a: int16x8_t, b: int16x8_t) -> int16x8_t { unsafe extern "unadjusted" { #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v8i16")] - fn _vshiftins_v8i16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t; + fn _vshiftrins_v8i16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t; } - unsafe { _vshiftins_v8i16(a, b, c) } + unsafe { _vshiftrins_v8i16(a, b, const { int16x8_t([-N as i16; 8]) }) } } #[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftins_v8i8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v8i8)"] #[inline] #[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -fn vshiftins_v8i8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t { +#[rustc_legacy_const_generics(2)] +fn vshiftrins_v8i8(a: int8x8_t, b: int8x8_t) -> int8x8_t { unsafe extern "unadjusted" { #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v8i8")] - fn _vshiftins_v8i8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t; + fn _vshiftrins_v8i8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t; } - unsafe { _vshiftins_v8i8(a, b, c) } + unsafe { _vshiftrins_v8i8(a, b, const { int8x8_t([-N as i8; 8]) }) } } #[doc = "Shift left"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_s8)"] @@ -62706,7 +62508,7 @@ pub fn vshrn_n_u64(a: uint64x2_t) -> uint32x2_t { #[rustc_legacy_const_generics(2)] pub fn vsli_n_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { static_assert_uimm_bits!(N, 3); - vshiftins_v8i8(a, b, int8x8_t::splat(N as i8)) + vshiftlins_v8i8::(a, b) } #[doc = "Shift Left and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s8)"] @@ -62718,7 +62520,7 @@ pub fn vsli_n_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { #[rustc_legacy_const_generics(2)] pub fn vsliq_n_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { static_assert_uimm_bits!(N, 3); - vshiftins_v16i8(a, b, int8x16_t::splat(N as i8)) + vshiftlins_v16i8::(a, b) } #[doc = "Shift Left and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s16)"] @@ -62730,7 +62532,7 @@ pub fn vsliq_n_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { #[rustc_legacy_const_generics(2)] pub fn vsli_n_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { static_assert_uimm_bits!(N, 4); - vshiftins_v4i16(a, b, int16x4_t::splat(N as i16)) + vshiftlins_v4i16::(a, b) } #[doc = "Shift Left and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s16)"] @@ -62742,7 +62544,7 @@ pub fn vsli_n_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { #[rustc_legacy_const_generics(2)] pub fn vsliq_n_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { static_assert_uimm_bits!(N, 4); - vshiftins_v8i16(a, b, int16x8_t::splat(N as i16)) + vshiftlins_v8i16::(a, b) } #[doc = "Shift Left and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s32)"] @@ -62754,7 +62556,7 @@ pub fn vsliq_n_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { #[rustc_legacy_const_generics(2)] pub fn vsli_n_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { static_assert!(N >= 0 && N <= 31); - vshiftins_v2i32(a, b, int32x2_t::splat(N)) + vshiftlins_v2i32::(a, b) } #[doc = "Shift Left and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s32)"] @@ -62766,7 +62568,7 @@ pub fn vsli_n_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { #[rustc_legacy_const_generics(2)] pub fn vsliq_n_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { static_assert!(N >= 0 && N <= 31); - vshiftins_v4i32(a, b, int32x4_t::splat(N)) + vshiftlins_v4i32::(a, b) } #[doc = "Shift Left and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s64)"] @@ -62778,7 +62580,7 @@ pub fn vsliq_n_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { #[rustc_legacy_const_generics(2)] pub fn vsli_n_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { static_assert!(N >= 0 && N <= 63); - vshiftins_v1i64(a, b, int64x1_t::splat(N as i64)) + vshiftlins_v1i64::(a, b) } #[doc = "Shift Left and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s64)"] @@ -62790,7 +62592,7 @@ pub fn vsli_n_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { #[rustc_legacy_const_generics(2)] pub fn vsliq_n_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { static_assert!(N >= 0 && N <= 63); - vshiftins_v2i64(a, b, int64x2_t::splat(N as i64)) + vshiftlins_v2i64::(a, b) } #[doc = "Shift Left and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u8)"] @@ -62802,13 +62604,7 @@ pub fn vsliq_n_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { #[rustc_legacy_const_generics(2)] pub fn vsli_n_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { static_assert_uimm_bits!(N, 3); - unsafe { - transmute(vshiftins_v8i8( - transmute(a), - transmute(b), - int8x8_t::splat(N as i8), - )) - } + unsafe { transmute(vshiftlins_v8i8::(transmute(a), transmute(b))) } } #[doc = "Shift Left and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u8)"] @@ -62820,13 +62616,7 @@ pub fn vsli_n_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { #[rustc_legacy_const_generics(2)] pub fn vsliq_n_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { static_assert_uimm_bits!(N, 3); - unsafe { - transmute(vshiftins_v16i8( - transmute(a), - transmute(b), - int8x16_t::splat(N as i8), - )) - } + unsafe { transmute(vshiftlins_v16i8::(transmute(a), transmute(b))) } } #[doc = "Shift Left and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u16)"] @@ -62838,13 +62628,7 @@ pub fn vsliq_n_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { #[rustc_legacy_const_generics(2)] pub fn vsli_n_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { static_assert_uimm_bits!(N, 4); - unsafe { - transmute(vshiftins_v4i16( - transmute(a), - transmute(b), - int16x4_t::splat(N as i16), - )) - } + unsafe { transmute(vshiftlins_v4i16::(transmute(a), transmute(b))) } } #[doc = "Shift Left and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u16)"] @@ -62856,13 +62640,7 @@ pub fn vsli_n_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { #[rustc_legacy_const_generics(2)] pub fn vsliq_n_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { static_assert_uimm_bits!(N, 4); - unsafe { - transmute(vshiftins_v8i16( - transmute(a), - transmute(b), - int16x8_t::splat(N as i16), - )) - } + unsafe { transmute(vshiftlins_v8i16::(transmute(a), transmute(b))) } } #[doc = "Shift Left and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u32)"] @@ -62874,13 +62652,7 @@ pub fn vsliq_n_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { #[rustc_legacy_const_generics(2)] pub fn vsli_n_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { static_assert!(N >= 0 && N <= 31); - unsafe { - transmute(vshiftins_v2i32( - transmute(a), - transmute(b), - int32x2_t::splat(N), - )) - } + unsafe { transmute(vshiftlins_v2i32::(transmute(a), transmute(b))) } } #[doc = "Shift Left and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u32)"] @@ -62892,13 +62664,7 @@ pub fn vsli_n_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { #[rustc_legacy_const_generics(2)] pub fn vsliq_n_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { static_assert!(N >= 0 && N <= 31); - unsafe { - transmute(vshiftins_v4i32( - transmute(a), - transmute(b), - int32x4_t::splat(N), - )) - } + unsafe { transmute(vshiftlins_v4i32::(transmute(a), transmute(b))) } } #[doc = "Shift Left and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u64)"] @@ -62910,13 +62676,7 @@ pub fn vsliq_n_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { #[rustc_legacy_const_generics(2)] pub fn vsli_n_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { static_assert!(N >= 0 && N <= 63); - unsafe { - transmute(vshiftins_v1i64( - transmute(a), - transmute(b), - int64x1_t::splat(N as i64), - )) - } + unsafe { transmute(vshiftlins_v1i64::(transmute(a), transmute(b))) } } #[doc = "Shift Left and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u64)"] @@ -62928,13 +62688,7 @@ pub fn vsli_n_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { #[rustc_legacy_const_generics(2)] pub fn vsliq_n_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { static_assert!(N >= 0 && N <= 63); - unsafe { - transmute(vshiftins_v2i64( - transmute(a), - transmute(b), - int64x2_t::splat(N as i64), - )) - } + unsafe { transmute(vshiftlins_v2i64::(transmute(a), transmute(b))) } } #[doc = "Shift Left and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_p8)"] @@ -62946,13 +62700,7 @@ pub fn vsliq_n_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { #[rustc_legacy_const_generics(2)] pub fn vsli_n_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { static_assert_uimm_bits!(N, 3); - unsafe { - transmute(vshiftins_v8i8( - transmute(a), - transmute(b), - int8x8_t::splat(N as i8), - )) - } + unsafe { transmute(vshiftlins_v8i8::(transmute(a), transmute(b))) } } #[doc = "Shift Left and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_p8)"] @@ -62964,13 +62712,7 @@ pub fn vsli_n_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { #[rustc_legacy_const_generics(2)] pub fn vsliq_n_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { static_assert_uimm_bits!(N, 3); - unsafe { - transmute(vshiftins_v16i8( - transmute(a), - transmute(b), - int8x16_t::splat(N as i8), - )) - } + unsafe { transmute(vshiftlins_v16i8::(transmute(a), transmute(b))) } } #[doc = "Shift Left and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_p16)"] @@ -62982,13 +62724,7 @@ pub fn vsliq_n_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { #[rustc_legacy_const_generics(2)] pub fn vsli_n_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { static_assert_uimm_bits!(N, 4); - unsafe { - transmute(vshiftins_v4i16( - transmute(a), - transmute(b), - int16x4_t::splat(N as i16), - )) - } + unsafe { transmute(vshiftlins_v4i16::(transmute(a), transmute(b))) } } #[doc = "Shift Left and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_p16)"] @@ -63000,13 +62736,7 @@ pub fn vsli_n_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { #[rustc_legacy_const_generics(2)] pub fn vsliq_n_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { static_assert_uimm_bits!(N, 4); - unsafe { - transmute(vshiftins_v8i16( - transmute(a), - transmute(b), - int16x8_t::splat(N as i16), - )) - } + unsafe { transmute(vshiftlins_v8i16::(transmute(a), transmute(b))) } } #[doc = "Signed shift right and accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_s8)"] @@ -63386,7 +63116,7 @@ pub fn vsraq_n_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { #[rustc_legacy_const_generics(2)] pub fn vsri_n_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { static_assert!(1 <= N && N <= 8); - vshiftins_v8i8(a, b, int8x8_t::splat(-N as i8)) + vshiftrins_v8i8::(a, b) } #[doc = "Shift Right and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s8)"] @@ -63398,7 +63128,7 @@ pub fn vsri_n_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { #[rustc_legacy_const_generics(2)] pub fn vsriq_n_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { static_assert!(1 <= N && N <= 8); - vshiftins_v16i8(a, b, int8x16_t::splat(-N as i8)) + vshiftrins_v16i8::(a, b) } #[doc = "Shift Right and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_s16)"] @@ -63410,7 +63140,7 @@ pub fn vsriq_n_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { #[rustc_legacy_const_generics(2)] pub fn vsri_n_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { static_assert!(1 <= N && N <= 16); - vshiftins_v4i16(a, b, int16x4_t::splat(-N as i16)) + vshiftrins_v4i16::(a, b) } #[doc = "Shift Right and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s16)"] @@ -63422,7 +63152,7 @@ pub fn vsri_n_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { #[rustc_legacy_const_generics(2)] pub fn vsriq_n_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { static_assert!(1 <= N && N <= 16); - vshiftins_v8i16(a, b, int16x8_t::splat(-N as i16)) + vshiftrins_v8i16::(a, b) } #[doc = "Shift Right and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_s32)"] @@ -63434,7 +63164,7 @@ pub fn vsriq_n_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { #[rustc_legacy_const_generics(2)] pub fn vsri_n_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { static_assert!(1 <= N && N <= 32); - vshiftins_v2i32(a, b, int32x2_t::splat(-N)) + vshiftrins_v2i32::(a, b) } #[doc = "Shift Right and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s32)"] @@ -63446,7 +63176,7 @@ pub fn vsri_n_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { #[rustc_legacy_const_generics(2)] pub fn vsriq_n_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { static_assert!(1 <= N && N <= 32); - vshiftins_v4i32(a, b, int32x4_t::splat(-N)) + vshiftrins_v4i32::(a, b) } #[doc = "Shift Right and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_s64)"] @@ -63458,7 +63188,7 @@ pub fn vsriq_n_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { #[rustc_legacy_const_generics(2)] pub fn vsri_n_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { static_assert!(1 <= N && N <= 64); - vshiftins_v1i64(a, b, int64x1_t::splat(-N as i64)) + vshiftrins_v1i64::(a, b) } #[doc = "Shift Right and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s64)"] @@ -63470,7 +63200,7 @@ pub fn vsri_n_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { #[rustc_legacy_const_generics(2)] pub fn vsriq_n_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { static_assert!(1 <= N && N <= 64); - vshiftins_v2i64(a, b, int64x2_t::splat(-N as i64)) + vshiftrins_v2i64::(a, b) } #[doc = "Shift Right and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_u8)"] @@ -63482,13 +63212,7 @@ pub fn vsriq_n_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { #[rustc_legacy_const_generics(2)] pub fn vsri_n_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { static_assert!(1 <= N && N <= 8); - unsafe { - transmute(vshiftins_v8i8( - transmute(a), - transmute(b), - int8x8_t::splat(-N as i8), - )) - } + unsafe { transmute(vshiftrins_v8i8::(transmute(a), transmute(b))) } } #[doc = "Shift Right and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_u8)"] @@ -63500,13 +63224,7 @@ pub fn vsri_n_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { #[rustc_legacy_const_generics(2)] pub fn vsriq_n_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { static_assert!(1 <= N && N <= 8); - unsafe { - transmute(vshiftins_v16i8( - transmute(a), - transmute(b), - int8x16_t::splat(-N as i8), - )) - } + unsafe { transmute(vshiftrins_v16i8::(transmute(a), transmute(b))) } } #[doc = "Shift Right and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_u16)"] @@ -63518,13 +63236,7 @@ pub fn vsriq_n_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { #[rustc_legacy_const_generics(2)] pub fn vsri_n_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { static_assert!(1 <= N && N <= 16); - unsafe { - transmute(vshiftins_v4i16( - transmute(a), - transmute(b), - int16x4_t::splat(-N as i16), - )) - } + unsafe { transmute(vshiftrins_v4i16::(transmute(a), transmute(b))) } } #[doc = "Shift Right and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_u16)"] @@ -63536,13 +63248,7 @@ pub fn vsri_n_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { #[rustc_legacy_const_generics(2)] pub fn vsriq_n_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { static_assert!(1 <= N && N <= 16); - unsafe { - transmute(vshiftins_v8i16( - transmute(a), - transmute(b), - int16x8_t::splat(-N as i16), - )) - } + unsafe { transmute(vshiftrins_v8i16::(transmute(a), transmute(b))) } } #[doc = "Shift Right and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_u32)"] @@ -63554,13 +63260,7 @@ pub fn vsriq_n_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { #[rustc_legacy_const_generics(2)] pub fn vsri_n_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { static_assert!(1 <= N && N <= 32); - unsafe { - transmute(vshiftins_v2i32( - transmute(a), - transmute(b), - int32x2_t::splat(-N), - )) - } + unsafe { transmute(vshiftrins_v2i32::(transmute(a), transmute(b))) } } #[doc = "Shift Right and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_u32)"] @@ -63572,13 +63272,7 @@ pub fn vsri_n_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { #[rustc_legacy_const_generics(2)] pub fn vsriq_n_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { static_assert!(1 <= N && N <= 32); - unsafe { - transmute(vshiftins_v4i32( - transmute(a), - transmute(b), - int32x4_t::splat(-N), - )) - } + unsafe { transmute(vshiftrins_v4i32::(transmute(a), transmute(b))) } } #[doc = "Shift Right and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_u64)"] @@ -63590,13 +63284,7 @@ pub fn vsriq_n_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { #[rustc_legacy_const_generics(2)] pub fn vsri_n_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { static_assert!(1 <= N && N <= 64); - unsafe { - transmute(vshiftins_v1i64( - transmute(a), - transmute(b), - int64x1_t::splat(-N as i64), - )) - } + unsafe { transmute(vshiftrins_v1i64::(transmute(a), transmute(b))) } } #[doc = "Shift Right and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_u64)"] @@ -63608,13 +63296,7 @@ pub fn vsri_n_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { #[rustc_legacy_const_generics(2)] pub fn vsriq_n_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { static_assert!(1 <= N && N <= 64); - unsafe { - transmute(vshiftins_v2i64( - transmute(a), - transmute(b), - int64x2_t::splat(-N as i64), - )) - } + unsafe { transmute(vshiftrins_v2i64::(transmute(a), transmute(b))) } } #[doc = "Shift Right and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_p8)"] @@ -63626,13 +63308,7 @@ pub fn vsriq_n_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { #[rustc_legacy_const_generics(2)] pub fn vsri_n_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { static_assert!(1 <= N && N <= 8); - unsafe { - transmute(vshiftins_v8i8( - transmute(a), - transmute(b), - int8x8_t::splat(-N as i8), - )) - } + unsafe { transmute(vshiftrins_v8i8::(transmute(a), transmute(b))) } } #[doc = "Shift Right and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_p8)"] @@ -63644,13 +63320,7 @@ pub fn vsri_n_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { #[rustc_legacy_const_generics(2)] pub fn vsriq_n_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { static_assert!(1 <= N && N <= 8); - unsafe { - transmute(vshiftins_v16i8( - transmute(a), - transmute(b), - int8x16_t::splat(-N as i8), - )) - } + unsafe { transmute(vshiftrins_v16i8::(transmute(a), transmute(b))) } } #[doc = "Shift Right and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_p16)"] @@ -63662,13 +63332,7 @@ pub fn vsriq_n_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { #[rustc_legacy_const_generics(2)] pub fn vsri_n_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { static_assert!(1 <= N && N <= 16); - unsafe { - transmute(vshiftins_v4i16( - transmute(a), - transmute(b), - int16x4_t::splat(-N as i16), - )) - } + unsafe { transmute(vshiftrins_v4i16::(transmute(a), transmute(b))) } } #[doc = "Shift Right and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_p16)"] @@ -63680,13 +63344,7 @@ pub fn vsri_n_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { #[rustc_legacy_const_generics(2)] pub fn vsriq_n_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { static_assert!(1 <= N && N <= 16); - unsafe { - transmute(vshiftins_v8i16( - transmute(a), - transmute(b), - int16x8_t::splat(-N as i16), - )) - } + unsafe { transmute(vshiftrins_v8i16::(transmute(a), transmute(b))) } } #[doc = "Store multiple single-element structures from one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16)"] @@ -63987,11 +63645,8 @@ pub unsafe fn vst1q_f16_x4(a: *mut f16, b: float16x8x4_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))] pub unsafe fn vst1_f32(ptr: *mut f32, a: float32x2_t) { - vst1_v2f32( - ptr as *const i8, - transmute(a), - crate::mem::align_of::() as i32, - ) + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1_v2f32::(ptr as *const i8, transmute(a)) } #[doc = "Store multiple single-element structures from one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32)"] @@ -64004,11 +63659,8 @@ pub unsafe fn vst1_f32(ptr: *mut f32, a: float32x2_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))] pub unsafe fn vst1q_f32(ptr: *mut f32, a: float32x4_t) { - vst1q_v4f32( - ptr as *const i8, - transmute(a), - crate::mem::align_of::() as i32, - ) + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1q_v4f32::(ptr as *const i8, transmute(a)) } #[doc = "Store multiple single-element structures from one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8)"] @@ -64021,7 +63673,8 @@ pub unsafe fn vst1q_f32(ptr: *mut f32, a: float32x4_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))] pub unsafe fn vst1_s8(ptr: *mut i8, a: int8x8_t) { - vst1_v8i8(ptr as *const i8, a, crate::mem::align_of::() as i32) + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1_v8i8::(ptr as *const i8, a) } #[doc = "Store multiple single-element structures from one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8)"] @@ -64034,7 +63687,8 @@ pub unsafe fn vst1_s8(ptr: *mut i8, a: int8x8_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))] pub unsafe fn vst1q_s8(ptr: *mut i8, a: int8x16_t) { - vst1q_v16i8(ptr as *const i8, a, crate::mem::align_of::() as i32) + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1q_v16i8::(ptr as *const i8, a) } #[doc = "Store multiple single-element structures from one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16)"] @@ -64047,7 +63701,8 @@ pub unsafe fn vst1q_s8(ptr: *mut i8, a: int8x16_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] pub unsafe fn vst1_s16(ptr: *mut i16, a: int16x4_t) { - vst1_v4i16(ptr as *const i8, a, crate::mem::align_of::() as i32) + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1_v4i16::(ptr as *const i8, a) } #[doc = "Store multiple single-element structures from one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16)"] @@ -64060,7 +63715,8 @@ pub unsafe fn vst1_s16(ptr: *mut i16, a: int16x4_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] pub unsafe fn vst1q_s16(ptr: *mut i16, a: int16x8_t) { - vst1q_v8i16(ptr as *const i8, a, crate::mem::align_of::() as i32) + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1q_v8i16::(ptr as *const i8, a) } #[doc = "Store multiple single-element structures from one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32)"] @@ -64073,7 +63729,8 @@ pub unsafe fn vst1q_s16(ptr: *mut i16, a: int16x8_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))] pub unsafe fn vst1_s32(ptr: *mut i32, a: int32x2_t) { - vst1_v2i32(ptr as *const i8, a, crate::mem::align_of::() as i32) + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1_v2i32::(ptr as *const i8, a) } #[doc = "Store multiple single-element structures from one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32)"] @@ -64086,7 +63743,8 @@ pub unsafe fn vst1_s32(ptr: *mut i32, a: int32x2_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))] pub unsafe fn vst1q_s32(ptr: *mut i32, a: int32x4_t) { - vst1q_v4i32(ptr as *const i8, a, crate::mem::align_of::() as i32) + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1q_v4i32::(ptr as *const i8, a) } #[doc = "Store multiple single-element structures from one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64)"] @@ -64099,7 +63757,8 @@ pub unsafe fn vst1q_s32(ptr: *mut i32, a: int32x4_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))] pub unsafe fn vst1_s64(ptr: *mut i64, a: int64x1_t) { - vst1_v1i64(ptr as *const i8, a, crate::mem::align_of::() as i32) + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1_v1i64::(ptr as *const i8, a) } #[doc = "Store multiple single-element structures from one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64)"] @@ -64112,7 +63771,8 @@ pub unsafe fn vst1_s64(ptr: *mut i64, a: int64x1_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))] pub unsafe fn vst1q_s64(ptr: *mut i64, a: int64x2_t) { - vst1q_v2i64(ptr as *const i8, a, crate::mem::align_of::() as i32) + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1q_v2i64::(ptr as *const i8, a) } #[doc = "Store multiple single-element structures from one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u8)"] @@ -64125,11 +63785,8 @@ pub unsafe fn vst1q_s64(ptr: *mut i64, a: int64x2_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))] pub unsafe fn vst1_u8(ptr: *mut u8, a: uint8x8_t) { - vst1_v8i8( - ptr as *const i8, - transmute(a), - crate::mem::align_of::() as i32, - ) + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1_v8i8::(ptr as *const i8, transmute(a)) } #[doc = "Store multiple single-element structures from one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u8)"] @@ -64142,11 +63799,8 @@ pub unsafe fn vst1_u8(ptr: *mut u8, a: uint8x8_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))] pub unsafe fn vst1q_u8(ptr: *mut u8, a: uint8x16_t) { - vst1q_v16i8( - ptr as *const i8, - transmute(a), - crate::mem::align_of::() as i32, - ) + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1q_v16i8::(ptr as *const i8, transmute(a)) } #[doc = "Store multiple single-element structures from one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u16)"] @@ -64159,11 +63813,8 @@ pub unsafe fn vst1q_u8(ptr: *mut u8, a: uint8x16_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] pub unsafe fn vst1_u16(ptr: *mut u16, a: uint16x4_t) { - vst1_v4i16( - ptr as *const i8, - transmute(a), - crate::mem::align_of::() as i32, - ) + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1_v4i16::(ptr as *const i8, transmute(a)) } #[doc = "Store multiple single-element structures from one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u16)"] @@ -64176,11 +63827,8 @@ pub unsafe fn vst1_u16(ptr: *mut u16, a: uint16x4_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] pub unsafe fn vst1q_u16(ptr: *mut u16, a: uint16x8_t) { - vst1q_v8i16( - ptr as *const i8, - transmute(a), - crate::mem::align_of::() as i32, - ) + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1q_v8i16::(ptr as *const i8, transmute(a)) } #[doc = "Store multiple single-element structures from one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u32)"] @@ -64193,11 +63841,8 @@ pub unsafe fn vst1q_u16(ptr: *mut u16, a: uint16x8_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))] pub unsafe fn vst1_u32(ptr: *mut u32, a: uint32x2_t) { - vst1_v2i32( - ptr as *const i8, - transmute(a), - crate::mem::align_of::() as i32, - ) + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1_v2i32::(ptr as *const i8, transmute(a)) } #[doc = "Store multiple single-element structures from one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u32)"] @@ -64210,11 +63855,8 @@ pub unsafe fn vst1_u32(ptr: *mut u32, a: uint32x2_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))] pub unsafe fn vst1q_u32(ptr: *mut u32, a: uint32x4_t) { - vst1q_v4i32( - ptr as *const i8, - transmute(a), - crate::mem::align_of::() as i32, - ) + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1q_v4i32::(ptr as *const i8, transmute(a)) } #[doc = "Store multiple single-element structures from one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u64)"] @@ -64227,11 +63869,8 @@ pub unsafe fn vst1q_u32(ptr: *mut u32, a: uint32x4_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))] pub unsafe fn vst1_u64(ptr: *mut u64, a: uint64x1_t) { - vst1_v1i64( - ptr as *const i8, - transmute(a), - crate::mem::align_of::() as i32, - ) + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1_v1i64::(ptr as *const i8, transmute(a)) } #[doc = "Store multiple single-element structures from one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u64)"] @@ -64244,11 +63883,8 @@ pub unsafe fn vst1_u64(ptr: *mut u64, a: uint64x1_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))] pub unsafe fn vst1q_u64(ptr: *mut u64, a: uint64x2_t) { - vst1q_v2i64( - ptr as *const i8, - transmute(a), - crate::mem::align_of::() as i32, - ) + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1q_v2i64::(ptr as *const i8, transmute(a)) } #[doc = "Store multiple single-element structures from one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p8)"] @@ -64261,11 +63897,8 @@ pub unsafe fn vst1q_u64(ptr: *mut u64, a: uint64x2_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))] pub unsafe fn vst1_p8(ptr: *mut p8, a: poly8x8_t) { - vst1_v8i8( - ptr as *const i8, - transmute(a), - crate::mem::align_of::() as i32, - ) + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1_v8i8::(ptr as *const i8, transmute(a)) } #[doc = "Store multiple single-element structures from one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p8)"] @@ -64278,11 +63911,8 @@ pub unsafe fn vst1_p8(ptr: *mut p8, a: poly8x8_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))] pub unsafe fn vst1q_p8(ptr: *mut p8, a: poly8x16_t) { - vst1q_v16i8( - ptr as *const i8, - transmute(a), - crate::mem::align_of::() as i32, - ) + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1q_v16i8::(ptr as *const i8, transmute(a)) } #[doc = "Store multiple single-element structures from one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p16)"] @@ -64295,11 +63925,8 @@ pub unsafe fn vst1q_p8(ptr: *mut p8, a: poly8x16_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] pub unsafe fn vst1_p16(ptr: *mut p16, a: poly16x4_t) { - vst1_v4i16( - ptr as *const i8, - transmute(a), - crate::mem::align_of::() as i32, - ) + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1_v4i16::(ptr as *const i8, transmute(a)) } #[doc = "Store multiple single-element structures from one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p16)"] @@ -64312,11 +63939,8 @@ pub unsafe fn vst1_p16(ptr: *mut p16, a: poly16x4_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] pub unsafe fn vst1q_p16(ptr: *mut p16, a: poly16x8_t) { - vst1q_v8i16( - ptr as *const i8, - transmute(a), - crate::mem::align_of::() as i32, - ) + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1q_v8i16::(ptr as *const i8, transmute(a)) } #[doc = "Store multiple single-element structures from one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p64)"] @@ -64329,11 +63953,8 @@ pub unsafe fn vst1q_p16(ptr: *mut p16, a: poly16x8_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))] pub unsafe fn vst1_p64(ptr: *mut p64, a: poly64x1_t) { - vst1_v1i64( - ptr as *const i8, - transmute(a), - crate::mem::align_of::() as i32, - ) + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1_v1i64::(ptr as *const i8, transmute(a)) } #[doc = "Store multiple single-element structures from one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p64)"] @@ -64346,11 +63967,8 @@ pub unsafe fn vst1_p64(ptr: *mut p64, a: poly64x1_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))] pub unsafe fn vst1q_p64(ptr: *mut p64, a: poly64x2_t) { - vst1q_v2i64( - ptr as *const i8, - transmute(a), - crate::mem::align_of::() as i32, - ) + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1q_v2i64::(ptr as *const i8, transmute(a)) } #[doc = "Store multiple single-element structures to one, two, three, or four registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32_x2)"] @@ -66973,175 +66591,145 @@ pub unsafe fn vst1q_p16_x3(a: *mut p16, b: poly16x8x3_t) { pub unsafe fn vst1q_p16_x4(a: *mut p16, b: poly16x8x4_t) { vst1q_s16_x4(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_v1i64)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))] -unsafe fn vst1_v1i64(addr: *const i8, val: int64x1_t, align: i32) { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64", ALIGN = 0))] +#[rustc_legacy_const_generics(2)] +unsafe fn vst1_v1i64(addr: *const i8, val: int64x1_t) { unsafe extern "unadjusted" { #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v1i64.p0")] fn _vst1_v1i64(addr: *const i8, val: int64x1_t, align: i32); } - _vst1_v1i64(addr, val, align) + _vst1_v1i64(addr, val, ALIGN) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_v2f32)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))] -unsafe fn vst1_v2f32(addr: *const i8, val: float32x2_t, align: i32) { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32", ALIGN = 0))] +#[rustc_legacy_const_generics(2)] +unsafe fn vst1_v2f32(addr: *const i8, val: float32x2_t) { unsafe extern "unadjusted" { #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v2f32.p0")] fn _vst1_v2f32(addr: *const i8, val: float32x2_t, align: i32); } - _vst1_v2f32(addr, val, align) + _vst1_v2f32(addr, val, ALIGN) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_v2i32)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))] -unsafe fn vst1_v2i32(addr: *const i8, val: int32x2_t, align: i32) { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32", ALIGN = 0))] +#[rustc_legacy_const_generics(2)] +unsafe fn vst1_v2i32(addr: *const i8, val: int32x2_t) { unsafe extern "unadjusted" { #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v2i32.p0")] fn _vst1_v2i32(addr: *const i8, val: int32x2_t, align: i32); } - _vst1_v2i32(addr, val, align) + _vst1_v2i32(addr, val, ALIGN) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_v4i16)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] -unsafe fn vst1_v4i16(addr: *const i8, val: int16x4_t, align: i32) { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16", ALIGN = 0))] +#[rustc_legacy_const_generics(2)] +unsafe fn vst1_v4i16(addr: *const i8, val: int16x4_t) { unsafe extern "unadjusted" { #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v4i16.p0")] fn _vst1_v4i16(addr: *const i8, val: int16x4_t, align: i32); } - _vst1_v4i16(addr, val, align) + _vst1_v4i16(addr, val, ALIGN) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_v8i8)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))] -unsafe fn vst1_v8i8(addr: *const i8, val: int8x8_t, align: i32) { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8", ALIGN = 0))] +#[rustc_legacy_const_generics(2)] +unsafe fn vst1_v8i8(addr: *const i8, val: int8x8_t) { unsafe extern "unadjusted" { #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v8i8.p0")] fn _vst1_v8i8(addr: *const i8, val: int8x8_t, align: i32); } - _vst1_v8i8(addr, val, align) + _vst1_v8i8(addr, val, ALIGN) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_v16i8)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))] -unsafe fn vst1q_v16i8(addr: *const i8, val: int8x16_t, align: i32) { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8", ALIGN = 0))] +#[rustc_legacy_const_generics(2)] +unsafe fn vst1q_v16i8(addr: *const i8, val: int8x16_t) { unsafe extern "unadjusted" { #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v16i8.p0")] fn _vst1q_v16i8(addr: *const i8, val: int8x16_t, align: i32); } - _vst1q_v16i8(addr, val, align) + _vst1q_v16i8(addr, val, ALIGN) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_v2i64)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))] -unsafe fn vst1q_v2i64(addr: *const i8, val: int64x2_t, align: i32) { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64", ALIGN = 0))] +#[rustc_legacy_const_generics(2)] +unsafe fn vst1q_v2i64(addr: *const i8, val: int64x2_t) { unsafe extern "unadjusted" { #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v2i64.p0")] fn _vst1q_v2i64(addr: *const i8, val: int64x2_t, align: i32); } - _vst1q_v2i64(addr, val, align) + _vst1q_v2i64(addr, val, ALIGN) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_v4f32)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))] -unsafe fn vst1q_v4f32(addr: *const i8, val: float32x4_t, align: i32) { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32", ALIGN = 0))] +#[rustc_legacy_const_generics(2)] +unsafe fn vst1q_v4f32(addr: *const i8, val: float32x4_t) { unsafe extern "unadjusted" { #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v4f32.p0")] fn _vst1q_v4f32(addr: *const i8, val: float32x4_t, align: i32); } - _vst1q_v4f32(addr, val, align) + _vst1q_v4f32(addr, val, ALIGN) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_v4i32)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))] -unsafe fn vst1q_v4i32(addr: *const i8, val: int32x4_t, align: i32) { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32", ALIGN = 0))] +#[rustc_legacy_const_generics(2)] +unsafe fn vst1q_v4i32(addr: *const i8, val: int32x4_t) { unsafe extern "unadjusted" { #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v4i32.p0")] fn _vst1q_v4i32(addr: *const i8, val: int32x4_t, align: i32); } - _vst1q_v4i32(addr, val, align) + _vst1q_v4i32(addr, val, ALIGN) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_v8i16)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] -unsafe fn vst1q_v8i16(addr: *const i8, val: int16x8_t, align: i32) { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16", ALIGN = 0))] +#[rustc_legacy_const_generics(2)] +unsafe fn vst1q_v8i16(addr: *const i8, val: int16x8_t) { unsafe extern "unadjusted" { #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v8i16.p0")] fn _vst1q_v8i16(addr: *const i8, val: int16x8_t, align: i32); } - _vst1q_v8i16(addr, val, align) + _vst1q_v8i16(addr, val, ALIGN) } #[doc = "Store multiple single-element structures from one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_v4f16)"] diff --git a/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml b/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml index 43dd3b9031..4816d17bd6 100644 --- a/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml +++ b/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml @@ -11787,39 +11787,39 @@ intrinsics: - name: "vld1{type[0]}" visibility: private - doc: "Load multiple single-element structures to one, two, three, or four registers" - arguments: ["a: {type[1]}", "b: {type[2]}"] - return_type: "{neon_type[3]}" + arguments: ["a: {type[1]}"] + static_defs: ["const ALIGN: i32"] + return_type: "{neon_type[2]}" attr: + - FnCall: [rustc_legacy_const_generics, ['1']] - *target-is-arm - *enable-v7 - # - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld1]]}]] + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vld1.8"', "ALIGN = 0"]]}]] - *neon-arm-unstable safety: unsafe: [neon] types: - - ["_v8i8", "*const i8", "i32", "int8x8_t"] - - ["q_v16i8", "*const i8", "i32", "int8x16_t"] - - ["_v4i16", "*const i8", "i32", "int16x4_t"] - - ["q_v8i16", "*const i8", "i32", "int16x8_t"] - - ["_v2i32", "*const i8", "i32", "int32x2_t"] - - ["q_v4i32", "*const i8", "i32", "int32x4_t"] - - ["_v1i64", "*const i8", "i32", "int64x1_t"] - - ["q_v2i64", "*const i8", "i32", "int64x2_t"] - - ["_v2f32", "*const i8", "i32", "float32x2_t"] - - ["q_v4f32", "*const i8", "i32", "float32x4_t"] + - ["_v8i8", "*const i8", "int8x8_t" ] + - ["q_v16i8", "*const i8", "int8x16_t" ] + - ["_v4i16", "*const i8", "int16x4_t" ] + - ["q_v8i16", "*const i8", "int16x8_t" ] + - ["_v2i32", "*const i8", "int32x2_t" ] + - ["q_v4i32", "*const i8", "int32x4_t" ] + - ["_v1i64", "*const i8", "int64x1_t" ] + - ["q_v2i64", "*const i8", "int64x2_t" ] + - ["_v2f32", "*const i8", "float32x2_t"] + - ["q_v4f32", "*const i8", "float32x4_t"] compose: - LLVMLink: name: "vld1.{type[0]}" + arguments: ["a: {type[1]}", "b: i32"] links: - - link: "llvm.arm.neon.vld1.{neon_type[3]}" + - link: "llvm.arm.neon.vld1.{neon_type[2]}" arch: arm - - FnCall: ["_vld1{type[0]}", [a, b]] - + - FnCall: ["_vld1{type[0]}", [a, ALIGN]] - name: "vld1{type[0]}" visibility: private - doc: "Load multiple single-element structures to one, two, three, or four registers" arguments: ["a: {type[1]}", "b: {type[2]}"] return_type: "{neon_type[3]}" attr: @@ -11855,19 +11855,23 @@ intrinsics: - *neon-arm-unstable - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['{type[2]}']]}]] types: - - ['*const i8', int8x8_t, '"vld1.8"', 'crate::mem::align_of::() as i32', '_v8i8'] - - ['*const i8', int8x16_t, '"vld1.8"', 'crate::mem::align_of::() as i32', 'q_v16i8'] - - ['*const i16', int16x4_t, '"vld1.16"', 'crate::mem::align_of::() as i32', '_v4i16'] - - ['*const i16', int16x8_t, '"vld1.16"', 'crate::mem::align_of::() as i32', 'q_v8i16'] - - ['*const i32', int32x2_t, 'vldr', 'crate::mem::align_of::() as i32', '_v2i32'] - - ['*const i32', int32x4_t, '"vld1.32"', 'crate::mem::align_of::() as i32', 'q_v4i32'] - - ['*const i64', int64x1_t, 'vldr', 'crate::mem::align_of::() as i32', '_v1i64'] - - ['*const i64', int64x2_t, '"vld1.64"', 'crate::mem::align_of::() as i32', 'q_v2i64'] + - ['*const i8', int8x8_t, '"vld1.8"', 'crate::mem::align_of::()', '_v8i8' ] + - ['*const i8', int8x16_t, '"vld1.8"', 'crate::mem::align_of::()', 'q_v16i8'] + - ['*const i16', int16x4_t, '"vld1.16"', 'crate::mem::align_of::()', '_v4i16' ] + - ['*const i16', int16x8_t, '"vld1.16"', 'crate::mem::align_of::()', 'q_v8i16'] + - ['*const i32', int32x2_t, 'vldr', 'crate::mem::align_of::()', '_v2i32' ] + - ['*const i32', int32x4_t, '"vld1.32"', 'crate::mem::align_of::()', 'q_v4i32'] + - ['*const i64', int64x1_t, 'vldr', 'crate::mem::align_of::()', '_v1i64' ] + - ['*const i64', int64x2_t, '"vld1.64"', 'crate::mem::align_of::()', 'q_v2i64'] compose: + - Const: + - ALIGN + - "i32" + - "{type[3]} as i32" - FnCall: - "vld1{type[4]}" - - - 'ptr as *const i8' - - '{type[3]}' + - ['ptr as *const i8'] + - ['ALIGN'] - name: "vld1{neon_type[1].no}" doc: "Load multiple single-element structures to one, two, three, or four registers." @@ -11881,28 +11885,32 @@ intrinsics: - *neon-arm-unstable - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['{type[2]}']]}]] types: - - ['*const u8', uint8x8_t, '"vld1.8"', 'neon,v7', 'crate::mem::align_of::() as i32', '_v8i8'] - - ['*const u8', uint8x16_t, '"vld1.8"', 'neon,v7', 'crate::mem::align_of::() as i32', 'q_v16i8'] - - ['*const u16', uint16x4_t, '"vld1.16"', 'neon,v7', 'crate::mem::align_of::() as i32', '_v4i16'] - - ['*const u16', uint16x8_t, '"vld1.16"', 'neon,v7', 'crate::mem::align_of::() as i32', 'q_v8i16'] - - ['*const u32', uint32x2_t, 'vldr', 'neon,v7', 'crate::mem::align_of::() as i32', '_v2i32'] - - ['*const u32', uint32x4_t, '"vld1.32"', 'neon,v7', 'crate::mem::align_of::() as i32', 'q_v4i32'] - - ['*const u64', uint64x1_t, 'vldr', 'neon,v7', 'crate::mem::align_of::() as i32', '_v1i64'] - - ['*const u64', uint64x2_t, '"vld1.64"', 'neon,v7', 'crate::mem::align_of::() as i32', 'q_v2i64'] - - ['*const p8', poly8x8_t, '"vld1.8"', 'neon,v7', 'crate::mem::align_of::() as i32', '_v8i8'] - - ['*const p8', poly8x16_t, '"vld1.8"', 'neon,v7', 'crate::mem::align_of::() as i32', 'q_v16i8'] - - ['*const p16', poly16x4_t, '"vld1.16"', 'neon,v7', 'crate::mem::align_of::() as i32', '_v4i16'] - - ['*const p16', poly16x8_t, '"vld1.16"', 'neon,v7', 'crate::mem::align_of::() as i32', 'q_v8i16'] - - ['*const p64', poly64x2_t, '"vld1.64"', 'neon,aes', 'crate::mem::align_of::() as i32', 'q_v2i64'] - - ['*const f32', float32x2_t, 'vldr', 'neon,v7', 'crate::mem::align_of::() as i32', '_v2f32'] - - ['*const f32', float32x4_t, '"vld1.32"', 'neon,v7', 'crate::mem::align_of::() as i32', 'q_v4f32'] - compose: + - ['*const u8', uint8x8_t, '"vld1.8"', 'neon,v7', 'crate::mem::align_of::()', '_v8i8' ] + - ['*const u8', uint8x16_t, '"vld1.8"', 'neon,v7', 'crate::mem::align_of::()', 'q_v16i8'] + - ['*const u16', uint16x4_t, '"vld1.16"', 'neon,v7', 'crate::mem::align_of::()', '_v4i16' ] + - ['*const u16', uint16x8_t, '"vld1.16"', 'neon,v7', 'crate::mem::align_of::()', 'q_v8i16'] + - ['*const u32', uint32x2_t, 'vldr', 'neon,v7', 'crate::mem::align_of::()', '_v2i32' ] + - ['*const u32', uint32x4_t, '"vld1.32"', 'neon,v7', 'crate::mem::align_of::()', 'q_v4i32'] + - ['*const u64', uint64x1_t, 'vldr', 'neon,v7', 'crate::mem::align_of::()', '_v1i64' ] + - ['*const u64', uint64x2_t, '"vld1.64"', 'neon,v7', 'crate::mem::align_of::()', 'q_v2i64'] + - ['*const p8', poly8x8_t, '"vld1.8"', 'neon,v7', 'crate::mem::align_of::()', '_v8i8' ] + - ['*const p8', poly8x16_t, '"vld1.8"', 'neon,v7', 'crate::mem::align_of::()', 'q_v16i8'] + - ['*const p16', poly16x4_t, '"vld1.16"', 'neon,v7', 'crate::mem::align_of::()', '_v4i16' ] + - ['*const p16', poly16x8_t, '"vld1.16"', 'neon,v7', 'crate::mem::align_of::()', 'q_v8i16'] + - ['*const p64', poly64x2_t, '"vld1.64"', 'neon,aes', 'crate::mem::align_of::()', 'q_v2i64'] + - ['*const f32', float32x2_t, 'vldr', 'neon,v7', 'crate::mem::align_of::()', '_v2f32' ] + - ['*const f32', float32x4_t, '"vld1.32"', 'neon,v7', 'crate::mem::align_of::()', 'q_v4f32'] + compose: + - Const: + - ALIGN + - "i32" + - "{type[4]} as i32" - FnCall: - transmute - - FnCall: - "vld1{type[5]}" - - - 'ptr as *const i8' - - '{type[4]}' + - ['ptr as *const i8'] + - ['ALIGN'] - name: "vld1{neon_type[1].no}" doc: "Load multiple single-element structures to one, two, three, or four registers." @@ -13724,32 +13732,35 @@ intrinsics: - name: "vst1{type[0]}" visibility: private - doc: "Store multiple single-element structures from one, two, three, or four registers." - arguments: ["addr: {type[1]}", "val: {neon_type[2]}", "align: {type[3]}"] + arguments: ["addr: {type[1]}", "val: {neon_type[2]}"] + static_defs: ["const ALIGN: i32"] safety: unsafe: [neon] attr: - *target-is-arm - *neon-v7 - *neon-arm-unstable - - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vst1.{type[4]}"']]}]] + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vst1.{type[3]}"', "ALIGN = 0"]]}]] + - FnCall: ["rustc_legacy_const_generics", ['2']] types: - - ['_v8i8', '* const i8', int8x8_t, i32, '8'] - - ['q_v16i8', '* const i8', int8x16_t, i32, '8'] - - ['_v4i16', '* const i8', int16x4_t, i32, '16'] - - ['q_v8i16', '* const i8', int16x8_t, i32, '16'] - - ['_v2i32', '* const i8', int32x2_t, i32, '32'] - - ['q_v4i32', '* const i8', int32x4_t, i32, '32'] - - ['_v1i64', '* const i8', int64x1_t, i32, '64'] - - ['q_v2i64', '* const i8', int64x2_t, i32, '64'] - - ['_v2f32', '* const i8', float32x2_t, i32, '32'] - - ['q_v4f32', '* const i8', float32x4_t, i32, '32'] + - ['_v8i8', '* const i8', int8x8_t, '8' ] + - ['q_v16i8', '* const i8', int8x16_t, '8' ] + - ['_v4i16', '* const i8', int16x4_t, '16'] + - ['q_v8i16', '* const i8', int16x8_t, '16'] + - ['_v2i32', '* const i8', int32x2_t, '32'] + - ['q_v4i32', '* const i8', int32x4_t, '32'] + - ['_v1i64', '* const i8', int64x1_t, '64'] + - ['q_v2i64', '* const i8', int64x2_t, '64'] + - ['_v2f32', '* const i8', float32x2_t, '32'] + - ['q_v4f32', '* const i8', float32x4_t, '32'] compose: - LLVMLink: name: "_vst1{type[0]}" + arguments: ["addr: {type[1]}", "val: {neon_type[2]}", "align: i32"] links: - link: "llvm.arm.neon.vst1.{neon_type[2]}.p0" arch: arm + - FnCall: ["_vst1{type[0]}",[addr, val, ALIGN]] - name: "vst1{type[0]}" visibility: private @@ -13785,37 +13796,39 @@ intrinsics: - *neon-arm-unstable - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vst1.{type[2]}"']]}]] types: - - ['*mut i8', int8x8_t, '8', 'a', 'crate::mem::align_of::() as i32', '_v8i8'] - - ['*mut i8', int8x16_t, '8', 'a', 'crate::mem::align_of::() as i32', 'q_v16i8'] - - ['*mut i16', int16x4_t, '16', 'a', 'crate::mem::align_of::() as i32', '_v4i16'] - - ['*mut i16', int16x8_t, '16', 'a', 'crate::mem::align_of::() as i32', 'q_v8i16'] - - ['*mut i32', int32x2_t, '32', 'a', 'crate::mem::align_of::() as i32', '_v2i32'] - - ['*mut i32', int32x4_t, '32', 'a', 'crate::mem::align_of::() as i32', 'q_v4i32'] - - ['*mut i64', int64x1_t, '64', 'a', 'crate::mem::align_of::() as i32', '_v1i64'] - - ['*mut i64', int64x2_t, '64', 'a', 'crate::mem::align_of::() as i32', 'q_v2i64'] - - ['*mut u8', uint8x8_t, '8', 'transmute(a)', 'crate::mem::align_of::() as i32', '_v8i8'] - - ['*mut u8', uint8x16_t, '8', 'transmute(a)', 'crate::mem::align_of::() as i32', 'q_v16i8'] - - ['*mut u16', uint16x4_t, '16', 'transmute(a)', 'crate::mem::align_of::() as i32', '_v4i16'] - - ['*mut u16', uint16x8_t, '16', 'transmute(a)', 'crate::mem::align_of::() as i32', 'q_v8i16'] - - ['*mut u32', uint32x2_t, '32', 'transmute(a)', 'crate::mem::align_of::() as i32', '_v2i32'] - - ['*mut u32', uint32x4_t, '32', 'transmute(a)', 'crate::mem::align_of::() as i32', 'q_v4i32'] - - ['*mut u64', uint64x1_t, '64', 'transmute(a)', 'crate::mem::align_of::() as i32', '_v1i64'] - - ['*mut u64', uint64x2_t, '64', 'transmute(a)', 'crate::mem::align_of::() as i32', 'q_v2i64'] - - ['*mut p8', poly8x8_t, '8', 'transmute(a)', 'crate::mem::align_of::() as i32', '_v8i8'] - - ['*mut p8', poly8x16_t, '8', 'transmute(a)', 'crate::mem::align_of::() as i32', 'q_v16i8'] - - ['*mut p16', poly16x4_t, '16', 'transmute(a)', 'crate::mem::align_of::() as i32', '_v4i16'] - - ['*mut p16', poly16x8_t, '16', 'transmute(a)', 'crate::mem::align_of::() as i32', 'q_v8i16'] - - ['*mut p64', poly64x1_t, '64', 'transmute(a)', 'crate::mem::align_of::() as i32', '_v1i64'] - - ['*mut p64', poly64x2_t, '64', 'transmute(a)', 'crate::mem::align_of::() as i32', 'q_v2i64'] - - ['*mut f32', float32x2_t, '32', 'transmute(a)', 'crate::mem::align_of::() as i32', '_v2f32'] - - ['*mut f32', float32x4_t, '32', 'transmute(a)', 'crate::mem::align_of::() as i32', 'q_v4f32'] - compose: + - ['*mut i8', int8x8_t, '8', 'a', 'crate::mem::align_of::()', '_v8i8' ] + - ['*mut i8', int8x16_t, '8', 'a', 'crate::mem::align_of::()', 'q_v16i8'] + - ['*mut i16', int16x4_t, '16', 'a', 'crate::mem::align_of::()', '_v4i16' ] + - ['*mut i16', int16x8_t, '16', 'a', 'crate::mem::align_of::()', 'q_v8i16'] + - ['*mut i32', int32x2_t, '32', 'a', 'crate::mem::align_of::()', '_v2i32' ] + - ['*mut i32', int32x4_t, '32', 'a', 'crate::mem::align_of::()', 'q_v4i32'] + - ['*mut i64', int64x1_t, '64', 'a', 'crate::mem::align_of::()', '_v1i64' ] + - ['*mut i64', int64x2_t, '64', 'a', 'crate::mem::align_of::()', 'q_v2i64'] + - ['*mut u8', uint8x8_t, '8', 'transmute(a)', 'crate::mem::align_of::()', '_v8i8' ] + - ['*mut u8', uint8x16_t, '8', 'transmute(a)', 'crate::mem::align_of::()', 'q_v16i8'] + - ['*mut u16', uint16x4_t, '16', 'transmute(a)', 'crate::mem::align_of::()', '_v4i16' ] + - ['*mut u16', uint16x8_t, '16', 'transmute(a)', 'crate::mem::align_of::()', 'q_v8i16'] + - ['*mut u32', uint32x2_t, '32', 'transmute(a)', 'crate::mem::align_of::()', '_v2i32' ] + - ['*mut u32', uint32x4_t, '32', 'transmute(a)', 'crate::mem::align_of::()', 'q_v4i32'] + - ['*mut u64', uint64x1_t, '64', 'transmute(a)', 'crate::mem::align_of::()', '_v1i64' ] + - ['*mut u64', uint64x2_t, '64', 'transmute(a)', 'crate::mem::align_of::()', 'q_v2i64'] + - ['*mut p8', poly8x8_t, '8', 'transmute(a)', 'crate::mem::align_of::()', '_v8i8' ] + - ['*mut p8', poly8x16_t, '8', 'transmute(a)', 'crate::mem::align_of::()', 'q_v16i8'] + - ['*mut p16', poly16x4_t, '16', 'transmute(a)', 'crate::mem::align_of::()', '_v4i16' ] + - ['*mut p16', poly16x8_t, '16', 'transmute(a)', 'crate::mem::align_of::()', 'q_v8i16'] + - ['*mut p64', poly64x1_t, '64', 'transmute(a)', 'crate::mem::align_of::()', '_v1i64' ] + - ['*mut p64', poly64x2_t, '64', 'transmute(a)', 'crate::mem::align_of::()', 'q_v2i64'] + - ['*mut f32', float32x2_t, '32', 'transmute(a)', 'crate::mem::align_of::()', '_v2f32' ] + - ['*mut f32', float32x4_t, '32', 'transmute(a)', 'crate::mem::align_of::()', 'q_v4f32'] + compose: + - Const: + - ALIGN + - "i32" + - "{type[4]} as i32" - FnCall: - "vst1{type[5]}" - - - 'ptr as *const i8' - - '{type[3]}' - - '{type[4]}' - + - ['ptr as *const i8','{type[3]}'] + - ['ALIGN'] - name: "vst1{neon_type[1].no}" doc: "Store multiple single-element structures from one, two, three, or four registers." @@ -13839,32 +13852,64 @@ intrinsics: - '{type[3]}' - '{type[4]}' - - - name: "vshiftins{type[0]}" + - name: "vshiftlins{type[0]}" visibility: private + arguments: ["a: {neon_type[1]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + safety: safe + attr: + - *target-is-arm + - *neon-v7 + - *neon-arm-unstable + - FnCall: [rustc_legacy_const_generics, ['2']] + static_defs: ['const N: i32'] + types: + - ['_v8i8', 'int8x8_t', '8', 'int8x8_t([N as i8; 8 ])' ] + - ['_v16i8', 'int8x16_t', '8', 'int8x16_t([N as i8; 16])'] + - ['_v4i16', 'int16x4_t', '16', 'int16x4_t([N as i16; 4])'] + - ['_v8i16', 'int16x8_t', '16', 'int16x8_t([N as i16; 8])'] + - ['_v2i32', 'int32x2_t', '32', 'int32x2_t([N; 2])' ] + - ['_v4i32', 'int32x4_t', '32', 'int32x4_t([N; 4])' ] + - ['_v1i64', 'int64x1_t', '64', 'int64x1_t([N as i64; 1])'] + - ['_v2i64', 'int64x2_t', '64', 'int64x2_t([N as i64; 2])'] + compose: + - LLVMLink: + name: "_vshiftins{type[0]}" + arguments: ["a: {type[1]}", "b: {type[1]}", "c: {type[1]}"] + links: + - link: "llvm.arm.neon.vshiftins.{neon_type[1]}" + arch: arm + - FnCall: ["_vshiftlins{type[0]}", [a,b, "const {{ {type[3]} }}"], [], true] + + - name: "vshiftrins{type[0]}" doc: "Shift Right and Insert (immediate)" - arguments: ["a: {neon_type[1]}", "b: {neon_type[1]}", "c: {neon_type[1]}"] + visibility: private + arguments: ["a: {neon_type[1]}", "b: {neon_type[1]}"] return_type: "{neon_type[1]}" safety: safe attr: - *target-is-arm - *neon-v7 - *neon-arm-unstable + - FnCall: [rustc_legacy_const_generics, ['2']] + static_defs: ['const N: i32'] types: - - ['_v8i8', "int8x8_t", '8'] - - ['_v16i8', 'int8x16_t', '8'] - - ['_v4i16', 'int16x4_t', '16'] - - ['_v8i16', 'int16x8_t', '16'] - - ['_v2i32', 'int32x2_t', '32'] - - ['_v4i32', 'int32x4_t', '32'] - - ['_v1i64', 'int64x1_t', '64'] - - ['_v2i64', 'int64x2_t', '64'] + - ['_v8i8', 'int8x8_t', '8', 'int8x8_t([-N as i8; 8 ])' ] + - ['_v16i8', 'int8x16_t', '8', 'int8x16_t([-N as i8; 16])'] + - ['_v4i16', 'int16x4_t', '16', 'int16x4_t([-N as i16; 4])'] + - ['_v8i16', 'int16x8_t', '16', 'int16x8_t([-N as i16; 8])'] + - ['_v2i32', 'int32x2_t', '32', 'int32x2_t([-N; 2])' ] + - ['_v4i32', 'int32x4_t', '32', 'int32x4_t([-N; 4])' ] + - ['_v1i64', 'int64x1_t', '64', 'int64x1_t([-N as i64; 1])'] + - ['_v2i64', 'int64x2_t', '64', 'int64x2_t([-N as i64; 2])'] compose: - LLVMLink: name: "_vshiftins{type[0]}" + arguments: ["a: {type[1]}", "b: {type[1]}", "c: {type[1]}"] links: - link: "llvm.arm.neon.vshiftins.{neon_type[1]}" arch: arm + - FnCall: ["_vshiftrins{type[0]}", [a,b, "const {{ {type[3]} }}"], [], true] - name: "vsri{neon_type[0].N}" doc: "Shift Right and Insert (immediate)" @@ -13879,18 +13924,18 @@ intrinsics: - FnCall: [rustc_legacy_const_generics, ['2']] static_defs: ['const N: i32'] types: - - [uint8x8_t, "neon,v7", '8', '1 <= N && N <= 8', 'v8i8', 'int8x8_t::splat', '-N as i8'] - - [uint8x16_t, "neon,v7", '8', '1 <= N && N <= 8', 'v16i8', 'int8x16_t::splat', '-N as i8'] - - [uint16x4_t, "neon,v7", '16', '1 <= N && N <= 16', 'v4i16', 'int16x4_t::splat', '-N as i16'] - - [uint16x8_t, "neon,v7", '16', '1 <= N && N <= 16', 'v8i16', 'int16x8_t::splat', '-N as i16'] - - [uint32x2_t, "neon,v7", '32', '1 <= N && N <= 32', 'v2i32', 'int32x2_t::splat', '-N'] - - [uint32x4_t, "neon,v7", '32', '1 <= N && N <= 32', 'v4i32', 'int32x4_t::splat', '-N'] - - [uint64x1_t, "neon,v7", '64', '1 <= N && N <= 64', 'v1i64', 'int64x1_t::splat', '-N as i64'] - - [uint64x2_t, "neon,v7", '64', '1 <= N && N <= 64', 'v2i64', 'int64x2_t::splat', '-N as i64'] - - [poly8x8_t, "neon,v7", '8', '1 <= N && N <= 8', 'v8i8', 'int8x8_t::splat', '-N as i8'] - - [poly8x16_t, "neon,v7", '8', '1 <= N && N <= 8', 'v16i8', 'int8x16_t::splat', '-N as i8'] - - [poly16x4_t, "neon,v7", '16', '1 <= N && N <= 16', 'v4i16', 'int16x4_t::splat', '-N as i16'] - - [poly16x8_t, "neon,v7", '16', '1 <= N && N <= 16', 'v8i16', 'int16x8_t::splat', '-N as i16'] + - [uint8x8_t, "neon,v7", '8', '1 <= N && N <= 8', 'v8i8' ] + - [uint8x16_t, "neon,v7", '8', '1 <= N && N <= 8', 'v16i8'] + - [uint16x4_t, "neon,v7", '16', '1 <= N && N <= 16', 'v4i16'] + - [uint16x8_t, "neon,v7", '16', '1 <= N && N <= 16', 'v8i16'] + - [uint32x2_t, "neon,v7", '32', '1 <= N && N <= 32', 'v2i32'] + - [uint32x4_t, "neon,v7", '32', '1 <= N && N <= 32', 'v4i32'] + - [uint64x1_t, "neon,v7", '64', '1 <= N && N <= 64', 'v1i64'] + - [uint64x2_t, "neon,v7", '64', '1 <= N && N <= 64', 'v2i64'] + - [poly8x8_t, "neon,v7", '8', '1 <= N && N <= 8', 'v8i8' ] + - [poly8x16_t, "neon,v7", '8', '1 <= N && N <= 8', 'v16i8'] + - [poly16x4_t, "neon,v7", '16', '1 <= N && N <= 16', 'v4i16'] + - [poly16x8_t, "neon,v7", '16', '1 <= N && N <= 16', 'v8i16'] ## These live in ./crates/core_arch/src/arm/neon.rs #- [poly64x1_t, "neon,v7,aes", '64', '1 <= N && N <= 64', 'v1i64', 'int64x1_t::splat', '-N as i64'] #- [poly64x2_t, "neon,v7,aes", '64', '1 <= N && N <= 64', 'v2i64', 'int64x2_t::splat', '-N as i64'] @@ -13899,10 +13944,9 @@ intrinsics: - FnCall: - 'transmute' - - FnCall: - - "vshiftins_{type[4]}" + - "vshiftrins_{type[4]}::" - - FnCall: [transmute, [a]] - FnCall: [transmute, [b]] - - FnCall: ["{type[5]}", ["{type[6]}"]] - name: "vsri{neon_type[0].N}" doc: "Shift Right and Insert (immediate)" @@ -13917,21 +13961,20 @@ intrinsics: - FnCall: [rustc_legacy_const_generics, ['2']] safety: safe types: - - [int8x8_t, '8', '1 <= N && N <= 8', 'v8i8', 'int8x8_t::splat', '-N as i8'] - - [int8x16_t, '8', '1 <= N && N <= 8', 'v16i8', 'int8x16_t::splat', '-N as i8'] - - [int16x4_t, '16', '1 <= N && N <= 16', 'v4i16', 'int16x4_t::splat', '-N as i16'] - - [int16x8_t, '16', '1 <= N && N <= 16', 'v8i16', 'int16x8_t::splat', '-N as i16'] - - [int32x2_t, '32', '1 <= N && N <= 32', 'v2i32', 'int32x2_t::splat', '-N'] - - [int32x4_t, '32', '1 <= N && N <= 32', 'v4i32', 'int32x4_t::splat', '-N'] - - [int64x1_t, '64', '1 <= N && N <= 64', 'v1i64', 'int64x1_t::splat', '-N as i64'] - - [int64x2_t, '64', '1 <= N && N <= 64', 'v2i64', 'int64x2_t::splat', '-N as i64'] + - [int8x8_t, '8', '1 <= N && N <= 8', 'v8i8' ] + - [int8x16_t, '8', '1 <= N && N <= 8', 'v16i8'] + - [int16x4_t, '16', '1 <= N && N <= 16', 'v4i16'] + - [int16x8_t, '16', '1 <= N && N <= 16', 'v8i16'] + - [int32x2_t, '32', '1 <= N && N <= 32', 'v2i32'] + - [int32x4_t, '32', '1 <= N && N <= 32', 'v4i32'] + - [int64x1_t, '64', '1 <= N && N <= 64', 'v1i64'] + - [int64x2_t, '64', '1 <= N && N <= 64', 'v2i64'] compose: - FnCall: ["static_assert!", ['{type[2]}']] - FnCall: - - "vshiftins_{type[3]}" + - "vshiftrins_{type[3]}::" - - a - b - - FnCall: ["{type[4]}", ["{type[5]}"]] - name: "vsli{neon_type[0].N}" doc: "Shift Left and Insert (immediate)" @@ -13946,18 +13989,18 @@ intrinsics: - FnCall: [rustc_legacy_const_generics, ['2']] static_defs: ['const N: i32'] types: - - [uint8x8_t, "neon,v7", '8', 'static_assert_uimm_bits!', 'N, 3', 'v8i8', 'int8x8_t::splat', 'N as i8'] - - [uint8x16_t, "neon,v7", '8', 'static_assert_uimm_bits!', 'N, 3', 'v16i8', 'int8x16_t::splat', 'N as i8'] - - [uint16x4_t, "neon,v7", '16', 'static_assert_uimm_bits!', 'N, 4', 'v4i16', 'int16x4_t::splat', 'N as i16'] - - [uint16x8_t, "neon,v7", '16', 'static_assert_uimm_bits!', 'N, 4', 'v8i16', 'int16x8_t::splat', 'N as i16'] - - [uint32x2_t, "neon,v7", '32', 'static_assert!', 'N >= 0 && N <= 31', 'v2i32', 'int32x2_t::splat', 'N'] - - [uint32x4_t, "neon,v7", '32', 'static_assert!', 'N >= 0 && N <= 31', 'v4i32', 'int32x4_t::splat', 'N'] - - [uint64x1_t, "neon,v7", '64', 'static_assert!', 'N >= 0 && N <= 63', 'v1i64', 'int64x1_t::splat', 'N as i64'] - - [uint64x2_t, "neon,v7", '64', 'static_assert!', 'N >= 0 && N <= 63', 'v2i64', 'int64x2_t::splat', 'N as i64'] - - [poly8x8_t, "neon,v7", '8', 'static_assert_uimm_bits!', 'N, 3', 'v8i8', 'int8x8_t::splat', 'N as i8'] - - [poly8x16_t, "neon,v7", '8', 'static_assert_uimm_bits!', 'N, 3', 'v16i8', 'int8x16_t::splat', 'N as i8'] - - [poly16x4_t, "neon,v7", '16', 'static_assert_uimm_bits!', 'N, 4', 'v4i16', 'int16x4_t::splat', 'N as i16'] - - [poly16x8_t, "neon,v7", '16', 'static_assert_uimm_bits!', 'N, 4', 'v8i16', 'int16x8_t::splat', 'N as i16'] + - [uint8x8_t, "neon,v7", '8', 'static_assert_uimm_bits!', 'N, 3', 'v8i8'] + - [uint8x16_t, "neon,v7", '8', 'static_assert_uimm_bits!', 'N, 3', 'v16i8'] + - [uint16x4_t, "neon,v7", '16', 'static_assert_uimm_bits!', 'N, 4', 'v4i16'] + - [uint16x8_t, "neon,v7", '16', 'static_assert_uimm_bits!', 'N, 4', 'v8i16'] + - [uint32x2_t, "neon,v7", '32', 'static_assert!', 'N >= 0 && N <= 31', 'v2i32'] + - [uint32x4_t, "neon,v7", '32', 'static_assert!', 'N >= 0 && N <= 31', 'v4i32'] + - [uint64x1_t, "neon,v7", '64', 'static_assert!', 'N >= 0 && N <= 63', 'v1i64'] + - [uint64x2_t, "neon,v7", '64', 'static_assert!', 'N >= 0 && N <= 63', 'v2i64'] + - [poly8x8_t, "neon,v7", '8', 'static_assert_uimm_bits!', 'N, 3', 'v8i8'] + - [poly8x16_t, "neon,v7", '8', 'static_assert_uimm_bits!', 'N, 3', 'v16i8'] + - [poly16x4_t, "neon,v7", '16', 'static_assert_uimm_bits!', 'N, 4', 'v4i16'] + - [poly16x8_t, "neon,v7", '16', 'static_assert_uimm_bits!', 'N, 4', 'v8i16'] ## These live in ./crates/core_arch/src/arm/neon.rs #- [poly64x1_t, "neon,v7,aes", '"vsli.64"', 'static_assert!', '0 <= N && N <= 63', 'v1i64', 'int64x1_t::splat', 'N as i64'] #- [poly64x2_t, "neon,v7,aes", '"vsli.64"', 'static_assert!', '0 <= N && N <= 63', 'v2i64', 'int64x2_t::splat', 'N as i64'] @@ -13966,10 +14009,9 @@ intrinsics: - FnCall: - 'transmute' - - FnCall: - - "vshiftins_{type[5]}" + - "vshiftlins_{type[5]}::" - - FnCall: [transmute, [a]] - FnCall: [transmute, [b]] - - FnCall: ["{type[6]}", ["{type[7]}"]] - name: "vsli{neon_type[0].N}" doc: "Shift Left and Insert (immediate)" @@ -13984,21 +14026,20 @@ intrinsics: - FnCall: [rustc_legacy_const_generics, ['2']] static_defs: ['const N: i32'] types: - - [int8x8_t, '8', 'static_assert_uimm_bits!', 'N, 3', 'v8i8', 'int8x8_t::splat', 'N as i8'] - - [int8x16_t, '8', 'static_assert_uimm_bits!', 'N, 3', 'v16i8', 'int8x16_t::splat', 'N as i8'] - - [int16x4_t, '16', 'static_assert_uimm_bits!', 'N, 4', 'v4i16', 'int16x4_t::splat', 'N as i16'] - - [int16x8_t, '16', 'static_assert_uimm_bits!', 'N, 4', 'v8i16', 'int16x8_t::splat', 'N as i16'] - - [int32x2_t, '32', 'static_assert!', 'N >= 0 && N <= 31', 'v2i32', 'int32x2_t::splat', 'N'] - - [int32x4_t, '32', 'static_assert!', 'N >= 0 && N <= 31', 'v4i32', 'int32x4_t::splat', 'N'] - - [int64x1_t, '64', 'static_assert!', 'N >= 0 && N <= 63', 'v1i64', 'int64x1_t::splat', 'N as i64'] - - [int64x2_t, '64', 'static_assert!', 'N >= 0 && N <= 63', 'v2i64', 'int64x2_t::splat', 'N as i64'] + - [int8x8_t, '8', 'static_assert_uimm_bits!', 'N, 3', 'v8i8' ] + - [int8x16_t, '8', 'static_assert_uimm_bits!', 'N, 3', 'v16i8'] + - [int16x4_t, '16', 'static_assert_uimm_bits!', 'N, 4', 'v4i16'] + - [int16x8_t, '16', 'static_assert_uimm_bits!', 'N, 4', 'v8i16'] + - [int32x2_t, '32', 'static_assert!', 'N >= 0 && N <= 31', 'v2i32'] + - [int32x4_t, '32', 'static_assert!', 'N >= 0 && N <= 31', 'v4i32'] + - [int64x1_t, '64', 'static_assert!', 'N >= 0 && N <= 63', 'v1i64'] + - [int64x2_t, '64', 'static_assert!', 'N >= 0 && N <= 63', 'v2i64'] compose: - FnCall: ["{type[2]}", ['{type[3]}']] - FnCall: - - "vshiftins_{type[4]}" + - "vshiftlins_{type[4]}::" - - a - b - - FnCall: ["{type[5]}", ["{type[6]}"]] - name: "vcombine{neon_type[0].no}" doc: Join two smaller vectors into a single larger vector diff --git a/crates/stdarch-gen-arm/src/expression.rs b/crates/stdarch-gen-arm/src/expression.rs index d5644ef27d..bf48f0dab7 100644 --- a/crates/stdarch-gen-arm/src/expression.rs +++ b/crates/stdarch-gen-arm/src/expression.rs @@ -108,6 +108,8 @@ impl ToTokens for FnCall { pub enum Expression { /// (Re)Defines a variable Let(LetVariant), + /// Defines a const + Const(WildString, TypeKind, Box), /// Performs a variable assignment operation Assign(String, Box), /// Performs a macro call @@ -169,6 +171,7 @@ impl Expression { | LetVariant::WithType(_, _, ex) | LetVariant::MutWithType(_, _, ex), ) => ex.pre_build(ctx), + Self::Const(_, _, ex) => ex.pre_build(ctx), Self::CastAs(ex, _) => ex.pre_build(ctx), Self::Multiply(lhs, rhs) | Self::Xor(lhs, rhs) => { lhs.pre_build(ctx)?; @@ -245,6 +248,15 @@ impl Expression { ); ex.build(intrinsic, ctx) } + Self::Const(var_name, ty, ex) => { + var_name.build_acle(ctx.local)?; + ctx.local.variables.insert( + var_name.to_string(), + (ty.to_owned(), VariableType::Internal), + ); + ex.build(intrinsic, ctx) + } + Self::CastAs(ex, _) => ex.build(intrinsic, ctx), Self::Multiply(lhs, rhs) | Self::Xor(lhs, rhs) => { lhs.build(intrinsic, ctx)?; @@ -303,6 +315,7 @@ impl Expression { | LetVariant::WithType(_, _, exp) | LetVariant::MutWithType(_, _, exp), ) => exp.requires_unsafe_wrapper(ctx_fn), + Self::Const(_, _, exp) => exp.requires_unsafe_wrapper(ctx_fn), Self::Array(exps) => exps.iter().any(|exp| exp.requires_unsafe_wrapper(ctx_fn)), Self::Multiply(lhs, rhs) | Self::Xor(lhs, rhs) => { lhs.requires_unsafe_wrapper(ctx_fn) || rhs.requires_unsafe_wrapper(ctx_fn) @@ -462,6 +475,10 @@ impl ToTokens for Expression { let var_ident = format_ident!("{}", var_name.to_string()); tokens.append_all(quote! { let mut #var_ident: #ty = #exp }) } + Self::Const(var_name, ty, exp) => { + let var_ident = format_ident!("{}", var_name.to_string()); + tokens.append_all(quote! { const #var_ident: #ty = #exp }) + } Self::Assign(var_name, exp) => { /* If we are dereferencing a variable to assign a value \ * the 'format_ident!' macro does not like the asterix */ diff --git a/crates/stdarch-gen-arm/src/intrinsic.rs b/crates/stdarch-gen-arm/src/intrinsic.rs index efaa9e1418..0cdff6ff6c 100644 --- a/crates/stdarch-gen-arm/src/intrinsic.rs +++ b/crates/stdarch-gen-arm/src/intrinsic.rs @@ -1730,7 +1730,8 @@ fn create_tokens(intrinsic: &Intrinsic, endianness: Endianness, tokens: &mut Tok "{fn_name} needs to be private, or to have documentation." ); assert!( - !safety.has_doc_comments(), + !safety.has_doc_comments() + || matches!(intrinsic.visibility, FunctionVisibility::Private), "{fn_name} needs a documentation section for its safety comments." ); } From 926e84a402947b3b96bfc7211e584fac81f40b98 Mon Sep 17 00:00:00 2001 From: reucru01 Date: Fri, 24 Oct 2025 10:15:30 +0100 Subject: [PATCH 2/5] Modifies CI to pass profile to test tool --- .github/workflows/main.yml | 4 +-- ci/intrinsic-test-docker.sh | 1 + ci/intrinsic-test.sh | 21 ++++++++------ ci/run-docker.sh | 1 + ci/run.sh | 30 ++++++++++---------- crates/intrinsic-test/src/common/cli.rs | 7 +++++ crates/intrinsic-test/src/common/compare.rs | 14 +++++++-- crates/intrinsic-test/src/common/gen_rust.rs | 9 ++++-- crates/intrinsic-test/src/common/mod.rs | 4 ++- 9 files changed, 60 insertions(+), 31 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 28c15cf473..92083443e4 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -220,7 +220,7 @@ jobs: if: matrix.build_std != '' # Configure some env vars based on matrix configuration - - run: echo "PROFILE=--profile=${{matrix.profile}}" >> $GITHUB_ENV + - run: echo "PROFILE=${{matrix.profile}}" >> $GITHUB_ENV shell: bash - run: echo "NORUN=1" >> $GITHUB_ENV shell: bash @@ -280,7 +280,7 @@ jobs: if: ${{ matrix.build_std }} # Configure some env vars based on matrix configuration - - run: echo "PROFILE=--profile=${{ matrix.profile }}" >> $GITHUB_ENV + - run: echo "PROFILE=${{ matrix.profile }}" >> $GITHUB_ENV - run: ./ci/intrinsic-test-docker.sh ${{ matrix.target }} if: ${{ !startsWith(matrix.target, 'thumb') }} env: diff --git a/ci/intrinsic-test-docker.sh b/ci/intrinsic-test-docker.sh index f62d7e484f..beeff42c76 100755 --- a/ci/intrinsic-test-docker.sh +++ b/ci/intrinsic-test-docker.sh @@ -30,6 +30,7 @@ run() { --env CARGO_HOME=/cargo \ --env CARGO_TARGET_DIR=/checkout/target \ --env TARGET="${1}" \ + --env PROFILE \ --env "${HOST_LINKER}"="cc" \ --env STDARCH_DISABLE_ASSERT_INSTR \ --env NOSTD \ diff --git a/ci/intrinsic-test.sh b/ci/intrinsic-test.sh index be63f0c0c6..6e52d86f49 100755 --- a/ci/intrinsic-test.sh +++ b/ci/intrinsic-test.sh @@ -6,7 +6,7 @@ set -ex export RUSTFLAGS="${RUSTFLAGS} -D warnings -Z merge-functions=disabled -Z verify-llvm-ir" export HOST_RUSTFLAGS="${RUSTFLAGS}" -export PROFILE="${PROFILE:="--profile=release"}" +export PROFILE="${PROFILE:="release"}" case ${TARGET} in # On 32-bit use a static relocation model which avoids some extra @@ -85,27 +85,29 @@ esac # Arm specific case "${TARGET}" in aarch64-unknown-linux-gnu*|armv7-unknown-linux-gnueabihf*) - CPPFLAGS="${TEST_CPPFLAGS}" RUSTFLAGS="${HOST_RUSTFLAGS}" RUST_LOG=info \ - cargo run "${INTRINSIC_TEST}" "${PROFILE}" \ + CPPFLAGS="${TEST_CPPFLAGS}" RUSTFLAGS="${HOST_RUSTFLAGS}" RUST_LOG=warn \ + cargo run "${INTRINSIC_TEST}" --release \ --bin intrinsic-test -- intrinsics_data/arm_intrinsics.json \ --runner "${TEST_RUNNER}" \ --cppcompiler "${TEST_CXX_COMPILER}" \ --skip "${TEST_SKIP_INTRINSICS}" \ --target "${TARGET}" \ + --profile "${PROFILE}" \ --sample-percentage "${TEST_SAMPLE_INTRINSICS_PERCENTAGE}" ;; aarch64_be-unknown-linux-gnu*) - CPPFLAGS="${TEST_CPPFLAGS}" RUSTFLAGS="${HOST_RUSTFLAGS}" RUST_LOG=info \ - cargo run "${INTRINSIC_TEST}" "${PROFILE}" \ + CPPFLAGS="${TEST_CPPFLAGS}" RUSTFLAGS="${HOST_RUSTFLAGS}" RUST_LOG=warn \ + cargo run "${INTRINSIC_TEST}" --release \ --bin intrinsic-test -- intrinsics_data/arm_intrinsics.json \ --runner "${TEST_RUNNER}" \ --cppcompiler "${TEST_CXX_COMPILER}" \ --skip "${TEST_SKIP_INTRINSICS}" \ --target "${TARGET}" \ - --sample-percentage "${TEST_SAMPLE_INTRINSICS_PERCENTAGE}" \ + --profile "${PROFILE}" \ --linker "${CARGO_TARGET_AARCH64_BE_UNKNOWN_LINUX_GNU_LINKER}" \ - --cxx-toolchain-dir "${AARCH64_BE_TOOLCHAIN}" + --cxx-toolchain-dir "${AARCH64_BE_TOOLCHAIN}" \ + --sample-percentage "${TEST_SAMPLE_INTRINSICS_PERCENTAGE}" ;; x86_64-unknown-linux-gnu*) @@ -114,13 +116,14 @@ case "${TARGET}" in # Hence the use of `env -u`. env -u CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER \ CPPFLAGS="${TEST_CPPFLAGS}" RUSTFLAGS="${HOST_RUSTFLAGS}" \ - RUST_LOG=info RUST_BACKTRACE=1 \ - cargo run "${INTRINSIC_TEST}" "${PROFILE}" \ + RUST_LOG=warn RUST_BACKTRACE=1 \ + cargo run "${INTRINSIC_TEST}" --release \ --bin intrinsic-test -- intrinsics_data/x86-intel.xml \ --runner "${TEST_RUNNER}" \ --skip "${TEST_SKIP_INTRINSICS}" \ --cppcompiler "${TEST_CXX_COMPILER}" \ --target "${TARGET}" \ + --profile "${PROFILE}" \ --sample-percentage "${TEST_SAMPLE_INTRINSICS_PERCENTAGE}" ;; *) diff --git a/ci/run-docker.sh b/ci/run-docker.sh index d7aa50a8c9..28dfd5a24a 100755 --- a/ci/run-docker.sh +++ b/ci/run-docker.sh @@ -37,6 +37,7 @@ run() { --env NORUN \ --env RUSTFLAGS \ --env CARGO_UNSTABLE_BUILD_STD \ + --env PROFILE \ --volume "${HOME}/.cargo":/cargo \ --volume "$(rustc --print sysroot)":/rust:ro \ --volume "$(pwd)":/checkout:ro \ diff --git a/ci/run.sh b/ci/run.sh index 2bb77bae25..a8925c470a 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -12,7 +12,7 @@ set -ex export RUSTFLAGS="${RUSTFLAGS} -D warnings -Z merge-functions=disabled -Z verify-llvm-ir" export HOST_RUSTFLAGS="${RUSTFLAGS}" -export PROFILE="${PROFILE:="--profile=release"}" +export PROFILE="${PROFILE:="release"}" case ${TARGET} in # On Windows the linker performs identical COMDAT folding (ICF) by default @@ -63,7 +63,7 @@ cargo_test() { if [ "$NORUN" = "1" ]; then export subcmd="build" fi - cmd="$cmd ${subcmd} --target=$TARGET $1" + cmd="$cmd ${subcmd} --target=$TARGET --profile=$PROFILE $1" cmd="$cmd -- $2" case ${TARGET} in @@ -80,10 +80,10 @@ cargo_test() { CORE_ARCH="--manifest-path=crates/core_arch/Cargo.toml" STDARCH_EXAMPLES="--manifest-path=examples/Cargo.toml" -cargo_test "${CORE_ARCH} ${PROFILE}" +cargo_test "${CORE_ARCH}" if [ "$NOSTD" != "1" ]; then - cargo_test "${STDARCH_EXAMPLES} ${PROFILE}" + cargo_test "${STDARCH_EXAMPLES}" fi @@ -93,41 +93,41 @@ case ${TARGET} in export STDARCH_DISABLE_ASSERT_INSTR=1 export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+avx" - cargo_test "${PROFILE}" + cargo_test export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+avx512f" - cargo_test "${PROFILE}" + cargo_test ;; x86_64* | i686*) export STDARCH_DISABLE_ASSERT_INSTR=1 export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+avx" - cargo_test "${PROFILE}" + cargo_test ;; # FIXME: don't build anymore #mips-*gnu* | mipsel-*gnu*) # export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+msa,+fp64,+mips32r5" - # cargo_test "${PROFILE}" + # cargo_test # ;; mips64*) export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+msa" - cargo_test "${PROFILE}" + cargo_test ;; s390x*) export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+vector-enhancements-1" - cargo_test "${PROFILE}" + cargo_test ;; powerpc64*) export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+altivec" - cargo_test "${PROFILE}" + cargo_test export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+vsx" - cargo_test "${PROFILE}" + cargo_test ;; powerpc*) # qemu has a bug in PPC32 which leads to a crash when compiled with `vsx` export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+altivec" - cargo_test "${PROFILE}" + cargo_test ;; *) ;; @@ -138,7 +138,7 @@ if [ "$NORUN" != "1" ] && [ "$NOSTD" != 1 ]; then # Test examples ( cd examples - cargo test --target "$TARGET" "${PROFILE}" - echo test | cargo run --target "$TARGET" "${PROFILE}" hex + cargo test --target "${TARGET}" --profile "${PROFILE}" + echo test | cargo run --target "${TARGET}" --profile "${PROFILE}" hex ) fi diff --git a/crates/intrinsic-test/src/common/cli.rs b/crates/intrinsic-test/src/common/cli.rs index 461ab542ea..032abdf653 100644 --- a/crates/intrinsic-test/src/common/cli.rs +++ b/crates/intrinsic-test/src/common/cli.rs @@ -47,6 +47,10 @@ pub struct Cli { #[arg(long, default_value_t = String::from("armv7-unknown-linux-gnueabihf"))] pub target: String, + /// Pass a profile (release, dev) + #[arg(long, default_value_t = String::from("release"))] + pub profile: String, + /// Set the linker #[arg(long)] pub linker: Option, @@ -65,6 +69,7 @@ pub struct ProcessedCli { pub cpp_compiler: Option, pub runner: String, pub target: String, + pub profile: String, pub linker: Option, pub cxx_toolchain_dir: Option, pub skip: Vec, @@ -76,6 +81,7 @@ impl ProcessedCli { let filename = cli_options.input; let runner = cli_options.runner.unwrap_or_default(); let target = cli_options.target; + let profile = cli_options.profile; let linker = cli_options.linker; let cxx_toolchain_dir = cli_options.cxx_toolchain_dir; let sample_percentage = cli_options.sample_percentage; @@ -109,6 +115,7 @@ impl ProcessedCli { cpp_compiler, runner, target, + profile, linker, cxx_toolchain_dir, skip, diff --git a/crates/intrinsic-test/src/common/compare.rs b/crates/intrinsic-test/src/common/compare.rs index c0459b743a..1c1b062762 100644 --- a/crates/intrinsic-test/src/common/compare.rs +++ b/crates/intrinsic-test/src/common/compare.rs @@ -10,7 +10,12 @@ fn runner_command(runner: &str) -> Command { cmd } -pub fn compare_outputs(intrinsic_name_list: &Vec, runner: &str, target: &str) -> bool { +pub fn compare_outputs( + intrinsic_name_list: &Vec, + runner: &str, + target: &str, + profile: &str, +) -> bool { let intrinsics = intrinsic_name_list .par_iter() .filter_map(|intrinsic_name| { @@ -20,8 +25,13 @@ pub fn compare_outputs(intrinsic_name_list: &Vec, runner: &str, target: .current_dir("c_programs") .output(); + let profile_dir = match profile { + "dev" => "debug", + _ => "release", + }; + let rust = runner_command(runner) - .arg(format!("./target/{target}/release/intrinsic-test-programs")) + .arg(format!("./target/{target}/{profile_dir}/intrinsic-test-programs")) .arg(intrinsic_name) .current_dir("rust_programs") .output(); diff --git a/crates/intrinsic-test/src/common/gen_rust.rs b/crates/intrinsic-test/src/common/gen_rust.rs index c8d815e46e..48550d93f2 100644 --- a/crates/intrinsic-test/src/common/gen_rust.rs +++ b/crates/intrinsic-test/src/common/gen_rust.rs @@ -146,7 +146,12 @@ pub fn write_lib_rs( Ok(()) } -pub fn compile_rust_programs(toolchain: Option<&str>, target: &str, linker: Option<&str>) -> bool { +pub fn compile_rust_programs( + toolchain: Option<&str>, + target: &str, + profile: &str, + linker: Option<&str>, +) -> bool { /* If there has been a linker explicitly set from the command line then * we want to set it via setting it in the RUSTFLAGS*/ @@ -167,7 +172,7 @@ pub fn compile_rust_programs(toolchain: Option<&str>, target: &str, linker: Opti if toolchain.is_some_and(|val| !val.is_empty()) { cargo_command.arg(toolchain.unwrap()); } - cargo_command.args(["build", "--target", target, "--release"]); + cargo_command.args(["build", "--target", target, "--profile", profile]); let mut rust_flags = "-Cdebuginfo=0".to_string(); if let Some(linker) = linker { diff --git a/crates/intrinsic-test/src/common/mod.rs b/crates/intrinsic-test/src/common/mod.rs index 8b6bd943a7..a1062b3a87 100644 --- a/crates/intrinsic-test/src/common/mod.rs +++ b/crates/intrinsic-test/src/common/mod.rs @@ -146,6 +146,7 @@ pub trait SupportedArchitectureTest { .unwrap(); let target = &self.cli_options().target; + let profile = &self.cli_options().profile; let toolchain = self.cli_options().toolchain.as_deref(); let linker = self.cli_options().linker.as_deref(); @@ -178,7 +179,7 @@ pub trait SupportedArchitectureTest { .collect::>() .unwrap(); - compile_rust_programs(toolchain, target, linker) + compile_rust_programs(toolchain, target, profile, linker) } fn compare_outputs(&self) -> bool { @@ -193,6 +194,7 @@ pub trait SupportedArchitectureTest { &intrinsics_name_list, &self.cli_options().runner, &self.cli_options().target, + &self.cli_options().profile, ) } else { true From e36ca30a356793ded81761c31be1a624c00a7ab5 Mon Sep 17 00:00:00 2001 From: reucru01 Date: Tue, 28 Oct 2025 12:19:12 +0000 Subject: [PATCH 3/5] Fixes hardcoded path for WASM runner --- ci/docker/wasm32-wasip1/Dockerfile | 2 -- ci/run.sh | 6 ++++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/ci/docker/wasm32-wasip1/Dockerfile b/ci/docker/wasm32-wasip1/Dockerfile index 0527c0df17..cb4a9b2948 100644 --- a/ci/docker/wasm32-wasip1/Dockerfile +++ b/ci/docker/wasm32-wasip1/Dockerfile @@ -11,5 +11,3 @@ ENV VERSION=v38.0.3 RUN curl -L https://github.com/bytecodealliance/wasmtime/releases/download/${VERSION}/wasmtime-${VERSION}-x86_64-linux.tar.xz | tar xJf - ENV PATH=$PATH:/wasmtime-${VERSION}-x86_64-linux - -ENV CARGO_TARGET_WASM32_WASIP1_RUNNER="wasmtime -Wexceptions --dir /checkout/target/wasm32-wasip1/release/deps::." diff --git a/ci/run.sh b/ci/run.sh index a8925c470a..f40a305117 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -71,6 +71,12 @@ cargo_test() { # harness isn't trying to capture output, otherwise we won't get any useful # output. wasm32*) + if [ "$PROFILE" = "release" ]; then + dir="release" + else + dir="debug" + fi + export CARGO_TARGET_WASM32_WASIP1_RUNNER="wasmtime -Wexceptions --dir /checkout/target/wasm32-wasip1/$dir/deps::." cmd="$cmd --nocapture" ;; esac From fb760c17ad7466c9fb69fcc55b86e19687c75f39 Mon Sep 17 00:00:00 2001 From: reucru01 Date: Thu, 6 Nov 2025 10:01:41 +0000 Subject: [PATCH 4/5] Fixes differences found between clang & rust The intrinsics test was flagging differences in aarch64_be between rust in debug and clang in O2. It was found that rust was correct in debug, but incorrect in release, and in both cases were being compared against clang in O2 which was also incorrect. The vdot intrinsics were fixed and are now correct in rust for both release and debug. However the vcmla ones could not be as the issue lies with LLVM. Both the vdot and vcmla intrinsics were added to the skiplist as clang is still incorrect for both. LLVM issue: https://github.com/llvm/llvm-project/issues/166190 --- ci/intrinsic-test.sh | 2 +- .../core_arch/src/aarch64/neon/generated.rs | 24 ++-- .../src/arm_shared/neon/generated.rs | 32 +++--- crates/intrinsic-test/missing_aarch64_be.txt | 105 ++++++++++++++++++ .../spec/neon/aarch64.spec.yml | 30 ++--- .../spec/neon/arm_shared.spec.yml | 36 +++--- 6 files changed, 171 insertions(+), 58 deletions(-) create mode 100644 crates/intrinsic-test/missing_aarch64_be.txt diff --git a/ci/intrinsic-test.sh b/ci/intrinsic-test.sh index 6e52d86f49..b9caa272c0 100755 --- a/ci/intrinsic-test.sh +++ b/ci/intrinsic-test.sh @@ -56,7 +56,7 @@ case ${TARGET} in aarch64_be-unknown-linux-gnu*) TEST_CPPFLAGS="-fuse-ld=lld" - TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_aarch64.txt + TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_aarch64_be.txt TEST_CXX_COMPILER="clang++" TEST_RUNNER="${CARGO_TARGET_AARCH64_BE_UNKNOWN_LINUX_GNU_RUNNER}" : "${TEST_SAMPLE_INTRINSICS_PERCENTAGE:=100}" diff --git a/crates/core_arch/src/aarch64/neon/generated.rs b/crates/core_arch/src/aarch64/neon/generated.rs index 09cf381804..ce864cc7d9 100644 --- a/crates/core_arch/src/aarch64/neon/generated.rs +++ b/crates/core_arch/src/aarch64/neon/generated.rs @@ -9500,10 +9500,10 @@ pub fn vdivh_f16(a: f16, b: f16) -> f16 { #[unstable(feature = "stdarch_neon_dotprod", issue = "117224")] pub fn vdot_laneq_s32(a: int32x2_t, b: int8x8_t, c: int8x16_t) -> int32x2_t { static_assert_uimm_bits!(LANE, 2); + let c: int32x4_t = vreinterpretq_s32_s8(c); unsafe { - let c: int32x4_t = transmute(c); let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]); - vdot_s32(a, b, transmute(c)) + vdot_s32(a, b, vreinterpret_s8_s32(c)) } } #[doc = "Dot product arithmetic (indexed)"] @@ -9515,11 +9515,11 @@ pub fn vdot_laneq_s32(a: int32x2_t, b: int8x8_t, c: int8x16_t) #[unstable(feature = "stdarch_neon_dotprod", issue = "117224")] pub fn vdotq_laneq_s32(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t { static_assert_uimm_bits!(LANE, 2); + let c: int32x4_t = vreinterpretq_s32_s8(c); unsafe { - let c: int32x4_t = transmute(c); let c: int32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); - vdotq_s32(a, b, transmute(c)) + vdotq_s32(a, b, vreinterpretq_s8_s32(c)) } } #[doc = "Dot product arithmetic (indexed)"] @@ -9531,10 +9531,10 @@ pub fn vdotq_laneq_s32(a: int32x4_t, b: int8x16_t, c: int8x16_t #[unstable(feature = "stdarch_neon_dotprod", issue = "117224")] pub fn vdot_laneq_u32(a: uint32x2_t, b: uint8x8_t, c: uint8x16_t) -> uint32x2_t { static_assert_uimm_bits!(LANE, 2); + let c: uint32x4_t = vreinterpretq_u32_u8(c); unsafe { - let c: uint32x4_t = transmute(c); let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]); - vdot_u32(a, b, transmute(c)) + vdot_u32(a, b, vreinterpret_u8_u32(c)) } } #[doc = "Dot product arithmetic (indexed)"] @@ -9546,11 +9546,11 @@ pub fn vdot_laneq_u32(a: uint32x2_t, b: uint8x8_t, c: uint8x16_ #[unstable(feature = "stdarch_neon_dotprod", issue = "117224")] pub fn vdotq_laneq_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t { static_assert_uimm_bits!(LANE, 2); + let c: uint32x4_t = vreinterpretq_u32_u8(c); unsafe { - let c: uint32x4_t = transmute(c); let c: uint32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); - vdotq_u32(a, b, transmute(c)) + vdotq_u32(a, b, vreinterpretq_u8_u32(c)) } } #[doc = "Set all vector lanes to the same value"] @@ -28283,10 +28283,10 @@ pub fn vuqadds_s32(a: i32, b: u32) -> i32 { #[unstable(feature = "stdarch_neon_i8mm", issue = "117223")] pub fn vusdot_laneq_s32(a: int32x2_t, b: uint8x8_t, c: int8x16_t) -> int32x2_t { static_assert_uimm_bits!(LANE, 2); + let c: int32x4_t = vreinterpretq_s32_s8(c); unsafe { - let c: int32x4_t = transmute(c); let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]); - vusdot_s32(a, b, transmute(c)) + vusdot_s32(a, b, vreinterpret_s8_s32(c)) } } #[doc = "Dot product index form with unsigned and signed integers"] @@ -28298,11 +28298,11 @@ pub fn vusdot_laneq_s32(a: int32x2_t, b: uint8x8_t, c: int8x16_ #[unstable(feature = "stdarch_neon_i8mm", issue = "117223")] pub fn vusdotq_laneq_s32(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t { static_assert_uimm_bits!(LANE, 2); + let c: int32x4_t = vreinterpretq_s32_s8(c); unsafe { - let c: int32x4_t = transmute(c); let c: int32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); - vusdotq_s32(a, b, transmute(c)) + vusdotq_s32(a, b, vreinterpretq_s8_s32(c)) } } #[doc = "Unzip vectors"] diff --git a/crates/core_arch/src/arm_shared/neon/generated.rs b/crates/core_arch/src/arm_shared/neon/generated.rs index c1bd70175c..7229f33f61 100644 --- a/crates/core_arch/src/arm_shared/neon/generated.rs +++ b/crates/core_arch/src/arm_shared/neon/generated.rs @@ -9201,10 +9201,10 @@ pub fn vcvtq_u32_f32(a: float32x4_t) -> uint32x4_t { )] pub fn vdot_lane_s32(a: int32x2_t, b: int8x8_t, c: int8x8_t) -> int32x2_t { static_assert_uimm_bits!(LANE, 1); + let c: int32x2_t = vreinterpret_s32_s8(c); unsafe { - let c: int32x2_t = transmute(c); let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]); - vdot_s32(a, b, transmute(c)) + vdot_s32(a, b, vreinterpret_s8_s32(c)) } } #[doc = "Dot product arithmetic (indexed)"] @@ -9228,11 +9228,11 @@ pub fn vdot_lane_s32(a: int32x2_t, b: int8x8_t, c: int8x8_t) -> )] pub fn vdotq_lane_s32(a: int32x4_t, b: int8x16_t, c: int8x8_t) -> int32x4_t { static_assert_uimm_bits!(LANE, 1); + let c: int32x2_t = vreinterpret_s32_s8(c); unsafe { - let c: int32x2_t = transmute(c); let c: int32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); - vdotq_s32(a, b, transmute(c)) + vdotq_s32(a, b, vreinterpretq_s8_s32(c)) } } #[doc = "Dot product arithmetic (indexed)"] @@ -9256,10 +9256,10 @@ pub fn vdotq_lane_s32(a: int32x4_t, b: int8x16_t, c: int8x8_t) )] pub fn vdot_lane_u32(a: uint32x2_t, b: uint8x8_t, c: uint8x8_t) -> uint32x2_t { static_assert_uimm_bits!(LANE, 1); + let c: uint32x2_t = vreinterpret_u32_u8(c); unsafe { - let c: uint32x2_t = transmute(c); let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]); - vdot_u32(a, b, transmute(c)) + vdot_u32(a, b, vreinterpret_u8_u32(c)) } } #[doc = "Dot product arithmetic (indexed)"] @@ -9283,11 +9283,11 @@ pub fn vdot_lane_u32(a: uint32x2_t, b: uint8x8_t, c: uint8x8_t) )] pub fn vdotq_lane_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x8_t) -> uint32x4_t { static_assert_uimm_bits!(LANE, 1); + let c: uint32x2_t = vreinterpret_u32_u8(c); unsafe { - let c: uint32x2_t = transmute(c); let c: uint32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); - vdotq_u32(a, b, transmute(c)) + vdotq_u32(a, b, vreinterpretq_u8_u32(c)) } } #[doc = "Dot product arithmetic (vector)"] @@ -71710,10 +71710,10 @@ pub fn vsubw_u32(a: uint64x2_t, b: uint32x2_t) -> uint64x2_t { )] pub fn vsudot_lane_s32(a: int32x2_t, b: int8x8_t, c: uint8x8_t) -> int32x2_t { static_assert_uimm_bits!(LANE, 1); + let c: uint32x2_t = vreinterpret_u32_u8(c); unsafe { - let c: uint32x2_t = transmute(c); let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]); - vusdot_s32(a, transmute(c), b) + vusdot_s32(a, vreinterpret_u8_u32(c), b) } } #[doc = "Dot product index form with signed and unsigned integers"] @@ -71737,11 +71737,11 @@ pub fn vsudot_lane_s32(a: int32x2_t, b: int8x8_t, c: uint8x8_t) )] pub fn vsudotq_lane_s32(a: int32x4_t, b: int8x16_t, c: uint8x8_t) -> int32x4_t { static_assert_uimm_bits!(LANE, 1); + let c: uint32x2_t = vreinterpret_u32_u8(c); unsafe { - let c: uint32x2_t = transmute(c); let c: uint32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); - vusdotq_s32(a, transmute(c), b) + vusdotq_s32(a, vreinterpretq_u8_u32(c), b) } } #[doc = "Table look-up"] @@ -73630,10 +73630,10 @@ pub fn vtstq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { )] pub fn vusdot_lane_s32(a: int32x2_t, b: uint8x8_t, c: int8x8_t) -> int32x2_t { static_assert_uimm_bits!(LANE, 1); + let c: int32x2_t = vreinterpret_s32_s8(c); unsafe { - let c: int32x2_t = transmute(c); let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]); - vusdot_s32(a, b, transmute(c)) + vusdot_s32(a, b, vreinterpret_s8_s32(c)) } } #[doc = "Dot product index form with unsigned and signed integers"] @@ -73657,11 +73657,11 @@ pub fn vusdot_lane_s32(a: int32x2_t, b: uint8x8_t, c: int8x8_t) )] pub fn vusdotq_lane_s32(a: int32x4_t, b: uint8x16_t, c: int8x8_t) -> int32x4_t { static_assert_uimm_bits!(LANE, 1); + let c: int32x2_t = vreinterpret_s32_s8(c); unsafe { - let c: int32x2_t = transmute(c); let c: int32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); - vusdotq_s32(a, b, transmute(c)) + vusdotq_s32(a, b, vreinterpretq_s8_s32(c)) } } #[doc = "Dot product vector form with unsigned and signed integers"] diff --git a/crates/intrinsic-test/missing_aarch64_be.txt b/crates/intrinsic-test/missing_aarch64_be.txt new file mode 100644 index 0000000000..28e1d9203e --- /dev/null +++ b/crates/intrinsic-test/missing_aarch64_be.txt @@ -0,0 +1,105 @@ +# Bad LLVM codegen for BE in O2 in clang, and release in rust (https://github.com/llvm/llvm-project/issues/166190) +vcmla_lane_f16 +vcmla_laneq_f16 +vcmla_rot180_lane_f16 +vcmla_rot180_laneq_f16 +vcmla_rot270_lane_f16 +vcmla_rot270_laneq_f16 +vcmla_rot90_lane_f16 +vcmla_rot90_laneq_f16 +vcmlaq_lane_f16 +vcmlaq_laneq_f16 +vcmlaq_laneq_f32 +vcmlaq_rot180_lane_f16 +vcmlaq_rot180_laneq_f16 +vcmlaq_rot180_laneq_f32 +vcmlaq_rot270_lane_f16 +vcmlaq_rot270_laneq_f16 +vcmlaq_rot270_laneq_f32 +vcmlaq_rot90_lane_f16 +vcmlaq_rot90_laneq_f16 +vcmlaq_rot90_laneq_f32 +# Bad codegen for BE in O2 in clang, correct in rust. Same cause as above issue. +vdot_lane_s32 +vdot_lane_u32 +vdot_laneq_s32 +vdot_laneq_u32 +vdotq_lane_s32 +vdotq_lane_u32 +vdotq_laneq_s32 +vdotq_laneq_u32 +vsudot_lane_s32 +vsudot_laneq_s32 +vsudotq_lane_s32 +vsudotq_laneq_s32 +vusdot_lane_s32 +vusdot_laneq_s32 +vusdotq_lane_s32 +vusdotq_laneq_s32 + +# Below are in common to missing_aarch64.txt + +# Not implemented in stdarch yet +vbfdot_f32 +vbfdot_lane_f32 +vbfdot_laneq_f32 +vbfdotq_f32 +vbfdotq_lane_f32 +vbfdotq_laneq_f32 +vbfmlalbq_f32 +vbfmlalbq_lane_f32 +vbfmlalbq_laneq_f32 +vbfmlaltq_f32 +vbfmlaltq_lane_f32 +vbfmlaltq_laneq_f32 +vbfmmlaq_f32 + + +# Implemented in stdarch, but missing in Clang. +vrnd32xq_f64 +vrnd32zq_f64 +vrnd64xq_f64 +vrnd64zq_f64 +vamin_f32 +vaminq_f32 +vaminq_f64 +vamax_f32 +vamaxq_f32 +vamaxq_f64 +# LLVM select error, and missing in Clang. +vrnd32x_f64 +vrnd32z_f64 +vrnd64x_f64 +vrnd64z_f64 +vluti2_lane_p16 +vluti2_lane_p8 +vluti2_lane_s16 +vluti2_lane_s8 +vluti2_lane_u16 +vluti2_lane_u8 +vluti2q_lane_p16 +vluti2q_lane_p8 +vluti2q_lane_s16 +vluti2q_lane_s8 +vluti2q_lane_u16 +vluti2q_lane_u8 +vluti4q_lane_f16_x2 +vluti4q_lane_p16_x2 +vluti4q_lane_p8 +vluti4q_lane_s16_x2 +vluti4q_lane_s8 +vluti4q_lane_u16_x2 +vluti4q_lane_u8 +vluti4q_laneq_f16_x2 +vluti4q_laneq_p16_x2 +vluti4q_laneq_p8 +vluti4q_laneq_s16_x2 +vluti4q_laneq_s8 +vluti4q_laneq_u16_x2 +vluti4q_laneq_u8 + +# Broken in Clang +vcvth_s16_f16 +# FIXME: Broken output due to missing f16 printing support in Rust +vmulh_lane_f16 +vmulh_laneq_f16 diff --git a/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml index fe64f9d786..48c12779a8 100644 --- a/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml +++ b/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml @@ -5091,13 +5091,13 @@ intrinsics: static_defs: ["const LANE: i32"] safety: safe types: - - [int32x2_t, uint8x8_t, int8x16_t, '[LANE as u32, LANE as u32]'] - - [int32x4_t, uint8x16_t, int8x16_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [int32x2_t, uint8x8_t, int8x16_t, '[LANE as u32, LANE as u32]',''] + - [int32x4_t, uint8x16_t, int8x16_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]','q'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, '2']] - - Let: [c, int32x4_t, {FnCall: [transmute, [c]]}] + - Let: [c, int32x4_t, {FnCall: ['vreinterpretq_s32_s8', [c]]}] - Let: [c, "{neon_type[0]}", {FnCall: [simd_shuffle!, [c, c, "{type[3]}"]]}] - - FnCall: ["vusdot{neon_type[0].no}", [a, b, {FnCall: [transmute, [c]]}]] + - FnCall: ["vusdot{neon_type[0].no}", [a, b, {FnCall: ['vreinterpret{type[4]}_s8_s32', [c]]}]] - name: "vsudot{neon_type[0].laneq_nox}" doc: "Dot product index form with signed and unsigned integers" @@ -5123,7 +5123,11 @@ intrinsics: - c - "{type[4]}" - FnCall: [simd_shuffle!, [c, c, "{type[3]}"]] - - FnCall: ["vusdot{neon_type[0].no}", [a, {FnCall: [transmute, [c]]}, b]] + - FnCall: + - "vusdot{neon_type[0].no}" + - - a + - FnCall: [transmute, [c]] + - b - name: "vmul{neon_type.no}" doc: Multiply @@ -6580,14 +6584,14 @@ intrinsics: - FnCall: [unstable, ['feature = "stdarch_neon_dotprod"', 'issue = "117224"']] safety: safe types: - - [int32x2_t, int8x8_t, int8x16_t, int32x4_t, '[LANE as u32, LANE as u32]'] - - [int32x4_t, int8x16_t, int8x16_t, int32x4_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [int32x2_t, int8x8_t, int8x16_t, int32x4_t, '[LANE as u32, LANE as u32]', ''] + - [int32x4_t, int8x16_t, int8x16_t, int32x4_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]','q'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, '2']] - Let: - c - "{neon_type[3]}" - - FnCall: [transmute, [c]] + - FnCall: ['vreinterpretq_{neon_type[0]}_{neon_type[1]}', [c]] - Let: - c - "{neon_type[0]}" @@ -6596,7 +6600,7 @@ intrinsics: - "vdot{neon_type[0].no}" - - a - b - - FnCall: [transmute, [c]] + - FnCall: ['vreinterpret{type[5]}_{neon_type[1]}_{neon_type[0]}', [c]] - name: "vdot{neon_type[0].laneq_nox}" doc: Dot product arithmetic (indexed) @@ -6610,14 +6614,14 @@ intrinsics: - FnCall: [unstable, ['feature = "stdarch_neon_dotprod"', 'issue = "117224"']] safety: safe types: - - [uint32x2_t, uint8x8_t, uint8x16_t, uint32x4_t, '[LANE as u32, LANE as u32]'] - - [uint32x4_t, uint8x16_t, uint8x16_t, uint32x4_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [uint32x2_t, uint8x8_t, uint8x16_t, uint32x4_t, '[LANE as u32, LANE as u32]',''] + - [uint32x4_t, uint8x16_t, uint8x16_t, uint32x4_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]','q'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, '2']] - Let: - c - "{neon_type[3]}" - - FnCall: [transmute, [c]] + - FnCall: ['vreinterpretq_{neon_type[0]}_{neon_type[1]}', [c]] - Let: - c - "{neon_type[0]}" @@ -6626,7 +6630,7 @@ intrinsics: - "vdot{neon_type[0].no}" - - a - b - - FnCall: [transmute, [c]] + - FnCall: ['vreinterpret{type[5]}_{neon_type[1]}_{neon_type[0]}', [c]] - name: "vmax{neon_type.no}" doc: Maximum (vector) diff --git a/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml b/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml index 4816d17bd6..bb44aab66b 100644 --- a/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml +++ b/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml @@ -6261,19 +6261,19 @@ intrinsics: static_defs: ["const LANE: i32"] safety: safe types: - - ['_lane_s32', int32x2_t, uint8x8_t, '[LANE as u32, LANE as u32]'] - - ['q_lane_s32', int32x4_t, uint8x16_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - ['_lane_s32', int32x2_t, uint8x8_t, '[LANE as u32, LANE as u32]',''] + - ['q_lane_s32', int32x4_t, uint8x16_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]','q'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, '1']] - Let: - c - int32x2_t - - FnCall: [transmute, [c]] + - FnCall: ['vreinterpret_s32_s8', [c]] - Let: - c - "{type[1]}" - FnCall: [simd_shuffle!, [c, c, "{type[3]}"]] - - FnCall: ["vusdot{neon_type[1].no}", [a, b, {FnCall: [transmute, [c]]}]] + - FnCall: ["vusdot{neon_type[1].no}", [a, b, {FnCall: ['vreinterpret{type[4]}_s8_s32', [c]]}]] - name: "vsudot{neon_type[0].lane_nox}" doc: "Dot product index form with signed and unsigned integers" @@ -6290,19 +6290,23 @@ intrinsics: static_defs: ["const LANE: i32"] safety: safe types: - - [int32x2_t, int8x8_t, uint8x8_t, '[LANE as u32, LANE as u32]', uint32x2_t] - - [int32x4_t, int8x16_t, uint8x8_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]', uint32x4_t] + - [int32x2_t, int8x8_t, uint8x8_t, '[LANE as u32, LANE as u32]', uint32x2_t,''] + - [int32x4_t, int8x16_t, uint8x8_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]', uint32x4_t,'q'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, '1']] - Let: - c - uint32x2_t - - FnCall: [transmute, [c]] + - FnCall: ['vreinterpret_u32_u8', [c]] - Let: - c - "{type[4]}" - FnCall: [simd_shuffle!, [c, c, "{type[3]}"]] - - FnCall: ["vusdot{neon_type[0].no}", [a, {FnCall: [transmute, [c]]}, b]] + - FnCall: + - "vusdot{neon_type[0].no}" + - - a + - FnCall: ['vreinterpret{type[5]}_u8_u32', [c]] + - b - name: "vmul{neon_type[1].no}" doc: Multiply @@ -7113,14 +7117,14 @@ intrinsics: - *neon-cfg-arm-unstable safety: safe types: - - [int32x2_t, int8x8_t, int8x8_t, int32x2_t, '[LANE as u32, LANE as u32]'] - - [int32x4_t, int8x16_t, int8x8_t, int32x2_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [int32x2_t, int8x8_t, int8x8_t, int32x2_t, '[LANE as u32, LANE as u32]',''] + - [int32x4_t, int8x16_t, int8x8_t, int32x2_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]','q'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, '1']] - Let: - c - "{neon_type[3]}" - - FnCall: [transmute, [c]] + - FnCall: ['vreinterpret_{neon_type[0]}_{neon_type[1]}', [c]] - Let: - c - "{neon_type[0]}" @@ -7129,7 +7133,7 @@ intrinsics: - "vdot{neon_type[0].no}" - - a - b - - FnCall: [transmute, [c]] + - FnCall: ['vreinterpret{type[5]}_{neon_type[1]}_{neon_type[0]}', [c]] - name: "vdot{neon_type[0].lane_nox}" doc: Dot product arithmetic (indexed) @@ -7146,14 +7150,14 @@ intrinsics: - *neon-cfg-arm-unstable safety: safe types: - - [uint32x2_t, uint8x8_t, uint8x8_t, uint32x2_t, '[LANE as u32, LANE as u32]'] - - [uint32x4_t, uint8x16_t, uint8x8_t, uint32x2_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [uint32x2_t, uint8x8_t, uint8x8_t, uint32x2_t, '[LANE as u32, LANE as u32]',''] + - [uint32x4_t, uint8x16_t, uint8x8_t, uint32x2_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]','q'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, '1']] - Let: - c - "{neon_type[3]}" - - FnCall: [transmute, [c]] + - FnCall: ['vreinterpret_{neon_type[0]}_{neon_type[1]}', [c]] - Let: - c - "{neon_type[0]}" @@ -7162,7 +7166,7 @@ intrinsics: - "vdot{neon_type[0].no}" - - a - b - - FnCall: [transmute, [c]] + - FnCall: ['vreinterpret{type[5]}_{neon_type[1]}_{neon_type[0]}', [c]] - name: "vmax{neon_type.no}" doc: Maximum (vector) From 2f3cde7b5b7d8a850d9ff4c5b5cfe657952633e4 Mon Sep 17 00:00:00 2001 From: reucru01 Date: Mon, 24 Nov 2025 14:34:03 +0000 Subject: [PATCH 5/5] Provides work-around for vreinterpret inline fail --- .../src/arm_shared/neon/generated.rs | 304 +++++++++++++++++- .../spec/neon/arm_shared.spec.yml | 20 +- 2 files changed, 300 insertions(+), 24 deletions(-) diff --git a/crates/core_arch/src/arm_shared/neon/generated.rs b/crates/core_arch/src/arm_shared/neon/generated.rs index 7229f33f61..d45454c696 100644 --- a/crates/core_arch/src/arm_shared/neon/generated.rs +++ b/crates/core_arch/src/arm_shared/neon/generated.rs @@ -9183,6 +9183,7 @@ pub fn vcvtq_u32_f32(a: float32x4_t) -> uint32x4_t { #[doc = "Dot product arithmetic (indexed)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_lane_s32)"] #[inline] +#[cfg(target_endian = "little")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[target_feature(enable = "neon,dotprod")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot, LANE = 0))] @@ -9201,15 +9202,77 @@ pub fn vcvtq_u32_f32(a: float32x4_t) -> uint32x4_t { )] pub fn vdot_lane_s32(a: int32x2_t, b: int8x8_t, c: int8x8_t) -> int32x2_t { static_assert_uimm_bits!(LANE, 1); - let c: int32x2_t = vreinterpret_s32_s8(c); unsafe { + let c: int32x2_t = transmute(c); let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]); - vdot_s32(a, b, vreinterpret_s8_s32(c)) + vdot_s32(a, b, transmute(c)) + } +} +#[doc = "Dot product arithmetic (indexed)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_lane_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[target_feature(enable = "neon,dotprod")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sdot, LANE = 0) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + unstable(feature = "stdarch_neon_dotprod", issue = "117224") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdot_lane_s32(a: int32x2_t, b: int8x8_t, c: int8x8_t) -> int32x2_t { + static_assert_uimm_bits!(LANE, 1); + let a: int32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + let b: int8x8_t = unsafe { simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]) }; + let c: int8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let c: int32x2_t = transmute(c); + let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]); + let ret_val: int32x2_t = vdot_s32(a, b, transmute(c)); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Dot product arithmetic (indexed)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_lane_s32)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[target_feature(enable = "neon,dotprod")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sdot, LANE = 0) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + unstable(feature = "stdarch_neon_dotprod", issue = "117224") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdotq_lane_s32(a: int32x4_t, b: int8x16_t, c: int8x8_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { + let c: int32x2_t = transmute(c); + let c: int32x4_t = + simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); + vdotq_s32(a, b, transmute(c)) } } #[doc = "Dot product arithmetic (indexed)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_lane_s32)"] #[inline] +#[cfg(target_endian = "big")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[target_feature(enable = "neon,dotprod")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot, LANE = 0))] @@ -9228,16 +9291,22 @@ pub fn vdot_lane_s32(a: int32x2_t, b: int8x8_t, c: int8x8_t) -> )] pub fn vdotq_lane_s32(a: int32x4_t, b: int8x16_t, c: int8x8_t) -> int32x4_t { static_assert_uimm_bits!(LANE, 1); - let c: int32x2_t = vreinterpret_s32_s8(c); + let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + let b: int8x16_t = + unsafe { simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + let c: int8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) }; unsafe { + let c: int32x2_t = transmute(c); let c: int32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); - vdotq_s32(a, b, vreinterpretq_s8_s32(c)) + let ret_val: int32x4_t = vdotq_s32(a, b, transmute(c)); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } #[doc = "Dot product arithmetic (indexed)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_lane_u32)"] #[inline] +#[cfg(target_endian = "little")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[target_feature(enable = "neon,dotprod")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot, LANE = 0))] @@ -9256,15 +9325,48 @@ pub fn vdotq_lane_s32(a: int32x4_t, b: int8x16_t, c: int8x8_t) )] pub fn vdot_lane_u32(a: uint32x2_t, b: uint8x8_t, c: uint8x8_t) -> uint32x2_t { static_assert_uimm_bits!(LANE, 1); - let c: uint32x2_t = vreinterpret_u32_u8(c); unsafe { + let c: uint32x2_t = transmute(c); let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]); - vdot_u32(a, b, vreinterpret_u8_u32(c)) + vdot_u32(a, b, transmute(c)) + } +} +#[doc = "Dot product arithmetic (indexed)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_lane_u32)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[target_feature(enable = "neon,dotprod")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(udot, LANE = 0) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + unstable(feature = "stdarch_neon_dotprod", issue = "117224") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdot_lane_u32(a: uint32x2_t, b: uint8x8_t, c: uint8x8_t) -> uint32x2_t { + static_assert_uimm_bits!(LANE, 1); + let a: uint32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + let b: uint8x8_t = unsafe { simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]) }; + let c: uint8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let c: uint32x2_t = transmute(c); + let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]); + let ret_val: uint32x2_t = vdot_u32(a, b, transmute(c)); + simd_shuffle!(ret_val, ret_val, [1, 0]) } } #[doc = "Dot product arithmetic (indexed)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_lane_u32)"] #[inline] +#[cfg(target_endian = "little")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[target_feature(enable = "neon,dotprod")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot, LANE = 0))] @@ -9283,11 +9385,45 @@ pub fn vdot_lane_u32(a: uint32x2_t, b: uint8x8_t, c: uint8x8_t) )] pub fn vdotq_lane_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x8_t) -> uint32x4_t { static_assert_uimm_bits!(LANE, 1); - let c: uint32x2_t = vreinterpret_u32_u8(c); unsafe { + let c: uint32x2_t = transmute(c); let c: uint32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); - vdotq_u32(a, b, vreinterpretq_u8_u32(c)) + vdotq_u32(a, b, transmute(c)) + } +} +#[doc = "Dot product arithmetic (indexed)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_lane_u32)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[target_feature(enable = "neon,dotprod")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(udot, LANE = 0) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + unstable(feature = "stdarch_neon_dotprod", issue = "117224") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdotq_lane_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x8_t) -> uint32x4_t { + static_assert_uimm_bits!(LANE, 1); + let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + let b: uint8x16_t = + unsafe { simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + let c: uint8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let c: uint32x2_t = transmute(c); + let c: uint32x4_t = + simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); + let ret_val: uint32x4_t = vdotq_u32(a, b, transmute(c)); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } #[doc = "Dot product arithmetic (vector)"] @@ -71692,6 +71828,7 @@ pub fn vsubw_u32(a: uint64x2_t, b: uint32x2_t) -> uint64x2_t { #[doc = "Dot product index form with signed and unsigned integers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudot_lane_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,i8mm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsudot, LANE = 0))] @@ -71710,15 +71847,77 @@ pub fn vsubw_u32(a: uint64x2_t, b: uint32x2_t) -> uint64x2_t { )] pub fn vsudot_lane_s32(a: int32x2_t, b: int8x8_t, c: uint8x8_t) -> int32x2_t { static_assert_uimm_bits!(LANE, 1); - let c: uint32x2_t = vreinterpret_u32_u8(c); unsafe { + let c: uint32x2_t = transmute(c); let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]); - vusdot_s32(a, vreinterpret_u8_u32(c), b) + vusdot_s32(a, transmute(c), b) + } +} +#[doc = "Dot product index form with signed and unsigned integers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudot_lane_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,i8mm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsudot, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sudot, LANE = 0) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + unstable(feature = "stdarch_neon_i8mm", issue = "117223") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsudot_lane_s32(a: int32x2_t, b: int8x8_t, c: uint8x8_t) -> int32x2_t { + static_assert_uimm_bits!(LANE, 1); + let a: int32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + let b: int8x8_t = unsafe { simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]) }; + let c: uint8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let c: uint32x2_t = transmute(c); + let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]); + let ret_val: int32x2_t = vusdot_s32(a, transmute(c), b); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Dot product index form with signed and unsigned integers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudotq_lane_s32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,i8mm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsudot, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sudot, LANE = 0) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + unstable(feature = "stdarch_neon_i8mm", issue = "117223") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsudotq_lane_s32(a: int32x4_t, b: int8x16_t, c: uint8x8_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { + let c: uint32x2_t = transmute(c); + let c: uint32x4_t = + simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); + vusdotq_s32(a, transmute(c), b) } } #[doc = "Dot product index form with signed and unsigned integers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudotq_lane_s32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon,i8mm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsudot, LANE = 0))] @@ -71737,11 +71936,16 @@ pub fn vsudot_lane_s32(a: int32x2_t, b: int8x8_t, c: uint8x8_t) )] pub fn vsudotq_lane_s32(a: int32x4_t, b: int8x16_t, c: uint8x8_t) -> int32x4_t { static_assert_uimm_bits!(LANE, 1); - let c: uint32x2_t = vreinterpret_u32_u8(c); + let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + let b: int8x16_t = + unsafe { simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + let c: uint8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) }; unsafe { + let c: uint32x2_t = transmute(c); let c: uint32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); - vusdotq_s32(a, vreinterpretq_u8_u32(c), b) + let ret_val: int32x4_t = vusdotq_s32(a, transmute(c), b); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } #[doc = "Table look-up"] @@ -73612,6 +73816,7 @@ pub fn vtstq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { #[doc = "Dot product index form with unsigned and signed integers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdot_lane_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,i8mm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot, LANE = 0))] @@ -73630,15 +73835,48 @@ pub fn vtstq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { )] pub fn vusdot_lane_s32(a: int32x2_t, b: uint8x8_t, c: int8x8_t) -> int32x2_t { static_assert_uimm_bits!(LANE, 1); - let c: int32x2_t = vreinterpret_s32_s8(c); unsafe { + let c: int32x2_t = transmute(c); let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]); - vusdot_s32(a, b, vreinterpret_s8_s32(c)) + vusdot_s32(a, b, transmute(c)) + } +} +#[doc = "Dot product index form with unsigned and signed integers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdot_lane_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,i8mm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(usdot, LANE = 0) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + unstable(feature = "stdarch_neon_i8mm", issue = "117223") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vusdot_lane_s32(a: int32x2_t, b: uint8x8_t, c: int8x8_t) -> int32x2_t { + static_assert_uimm_bits!(LANE, 1); + let a: int32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + let b: uint8x8_t = unsafe { simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]) }; + let c: int8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let c: int32x2_t = transmute(c); + let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]); + let ret_val: int32x2_t = vusdot_s32(a, b, transmute(c)); + simd_shuffle!(ret_val, ret_val, [1, 0]) } } #[doc = "Dot product index form with unsigned and signed integers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdotq_lane_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,i8mm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot, LANE = 0))] @@ -73657,11 +73895,45 @@ pub fn vusdot_lane_s32(a: int32x2_t, b: uint8x8_t, c: int8x8_t) )] pub fn vusdotq_lane_s32(a: int32x4_t, b: uint8x16_t, c: int8x8_t) -> int32x4_t { static_assert_uimm_bits!(LANE, 1); - let c: int32x2_t = vreinterpret_s32_s8(c); unsafe { + let c: int32x2_t = transmute(c); let c: int32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); - vusdotq_s32(a, b, vreinterpretq_s8_s32(c)) + vusdotq_s32(a, b, transmute(c)) + } +} +#[doc = "Dot product index form with unsigned and signed integers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdotq_lane_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,i8mm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(usdot, LANE = 0) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + unstable(feature = "stdarch_neon_i8mm", issue = "117223") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vusdotq_lane_s32(a: int32x4_t, b: uint8x16_t, c: int8x8_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 1); + let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + let b: uint8x16_t = + unsafe { simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + let c: int8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let c: int32x2_t = transmute(c); + let c: int32x4_t = + simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); + let ret_val: int32x4_t = vusdotq_s32(a, b, transmute(c)); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } #[doc = "Dot product vector form with unsigned and signed integers"] diff --git a/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml b/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml index bb44aab66b..9ebdc4334c 100644 --- a/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml +++ b/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml @@ -6259,6 +6259,7 @@ intrinsics: - *neon-unstable-i8mm - *neon-cfg-arm-unstable static_defs: ["const LANE: i32"] + big_endian_inverse: true # TODO: Remove this attribute, and replace transmute with vreinterpret when https://github.com/llvm/llvm-project/pull/169337 is merged, LLVM inlining issue causing assertion failure. safety: safe types: - ['_lane_s32', int32x2_t, uint8x8_t, '[LANE as u32, LANE as u32]',''] @@ -6268,12 +6269,12 @@ intrinsics: - Let: - c - int32x2_t - - FnCall: ['vreinterpret_s32_s8', [c]] + - FnCall: [transmute, [c]] #- FnCall: ['vreinterpret_s32_s8', [c]] - Let: - c - "{type[1]}" - FnCall: [simd_shuffle!, [c, c, "{type[3]}"]] - - FnCall: ["vusdot{neon_type[1].no}", [a, b, {FnCall: ['vreinterpret{type[4]}_s8_s32', [c]]}]] + - FnCall: ["vusdot{neon_type[1].no}", [a, b, {FnCall: [transmute, [c]]}]] #'vreinterpret{type[4]}_s8_s32' - name: "vsudot{neon_type[0].lane_nox}" doc: "Dot product index form with signed and unsigned integers" @@ -6288,6 +6289,7 @@ intrinsics: - *neon-unstable-i8mm - *neon-cfg-arm-unstable static_defs: ["const LANE: i32"] + big_endian_inverse: true # TODO: Remove this attribute, and replace transmute with vreinterpret when https://github.com/llvm/llvm-project/pull/169337 is merged, LLVM inlining issue causing assertion failure. safety: safe types: - [int32x2_t, int8x8_t, uint8x8_t, '[LANE as u32, LANE as u32]', uint32x2_t,''] @@ -6297,7 +6299,7 @@ intrinsics: - Let: - c - uint32x2_t - - FnCall: ['vreinterpret_u32_u8', [c]] + - FnCall: [transmute, [c]] #- FnCall: ['vreinterpret_u32_u8', [c]] - Let: - c - "{type[4]}" @@ -6305,7 +6307,7 @@ intrinsics: - FnCall: - "vusdot{neon_type[0].no}" - - a - - FnCall: ['vreinterpret{type[5]}_u8_u32', [c]] + - FnCall: [transmute, [c]] #- FnCall: ['vreinterpret{type[5]}_u8_u32', [c]] - b - name: "vmul{neon_type[1].no}" @@ -7115,6 +7117,7 @@ intrinsics: - FnCall: [rustc_legacy_const_generics, ['3']] - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [unstable, ['feature = "stdarch_neon_dotprod"', 'issue = "117224"']]}]] - *neon-cfg-arm-unstable + big_endian_inverse: true # TODO: Remove this attribute, and replace transmute with vreinterpret when https://github.com/llvm/llvm-project/pull/169337 is merged, LLVM inlining issue causing assertion failure. safety: safe types: - [int32x2_t, int8x8_t, int8x8_t, int32x2_t, '[LANE as u32, LANE as u32]',''] @@ -7124,7 +7127,7 @@ intrinsics: - Let: - c - "{neon_type[3]}" - - FnCall: ['vreinterpret_{neon_type[0]}_{neon_type[1]}', [c]] + - FnCall: [transmute, [c]] - Let: - c - "{neon_type[0]}" @@ -7133,7 +7136,7 @@ intrinsics: - "vdot{neon_type[0].no}" - - a - b - - FnCall: ['vreinterpret{type[5]}_{neon_type[1]}_{neon_type[0]}', [c]] + - FnCall: [transmute, [c]] - name: "vdot{neon_type[0].lane_nox}" doc: Dot product arithmetic (indexed) @@ -7149,6 +7152,7 @@ intrinsics: - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [unstable, ['feature = "stdarch_neon_dotprod"', 'issue = "117224"']]}]] - *neon-cfg-arm-unstable safety: safe + big_endian_inverse: true # TODO: Remove this attribute, and replace transmute with vreinterpret when https://github.com/llvm/llvm-project/pull/169337 is merged, LLVM inlining issue causing assertion failure. types: - [uint32x2_t, uint8x8_t, uint8x8_t, uint32x2_t, '[LANE as u32, LANE as u32]',''] - [uint32x4_t, uint8x16_t, uint8x8_t, uint32x2_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]','q'] @@ -7157,7 +7161,7 @@ intrinsics: - Let: - c - "{neon_type[3]}" - - FnCall: ['vreinterpret_{neon_type[0]}_{neon_type[1]}', [c]] + - FnCall: [transmute, [c]] #- FnCall: ['vreinterpret_{neon_type[0]}_{neon_type[1]}', [c]] - Let: - c - "{neon_type[0]}" @@ -7166,7 +7170,7 @@ intrinsics: - "vdot{neon_type[0].no}" - - a - b - - FnCall: ['vreinterpret{type[5]}_{neon_type[1]}_{neon_type[0]}', [c]] + - FnCall: [transmute, [c]] #- FnCall: ['vreinterpret{type[5]}_{neon_type[1]}_{neon_type[0]}', [c]] - name: "vmax{neon_type.no}" doc: Maximum (vector)