From f0e32856547feb7ac53e93e8bd50043d7c19c688 Mon Sep 17 00:00:00 2001 From: Adam Gemmell Date: Thu, 2 Apr 2026 17:32:29 +0100 Subject: [PATCH 1/2] Add round trip tests for v{ld,st}{2,3,4} & v{ld,st}{2,3,4} lane intrinsics --- .github/workflows/main.yml | 1 + .../core_arch/src/aarch64/neon/generated.rs | 4 +- crates/core_arch/src/aarch64/neon/mod.rs | 205 +++++++++++------- .../src/arm_shared/neon/generated.rs | 2 +- .../src/arm_shared/neon/load_tests.rs | 18 ++ crates/core_arch/src/arm_shared/neon/mod.rs | 129 ++++++++++- .../src/arm_shared/neon/store_tests.rs | 38 ++++ .../spec/neon/aarch64.spec.yml | 4 +- .../spec/neon/arm_shared.spec.yml | 2 +- 9 files changed, 317 insertions(+), 86 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 1f598f6e20..0bbd772b0e 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -348,6 +348,7 @@ jobs: - name: Run miri tests env: TARGET: "aarch64-unknown-linux-gnu" + RUSTFLAGS: "-Ctarget-cpu=neoverse-v3" run: | # read filters and join them with a space. FILTERS=$(cat aarch64-miri-tests.txt | tr '\n' ' ') diff --git a/crates/core_arch/src/aarch64/neon/generated.rs b/crates/core_arch/src/aarch64/neon/generated.rs index c9ce7a69a6..d749ccbe66 100644 --- a/crates/core_arch/src/aarch64/neon/generated.rs +++ b/crates/core_arch/src/aarch64/neon/generated.rs @@ -12189,7 +12189,7 @@ pub unsafe fn vld3q_lane_p64(a: *const p64, b: poly64x2x3_t) -> #[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vld3q_lane_s8(a: *const i8, b: int8x16x3_t) -> int8x16x3_t { - static_assert_uimm_bits!(LANE, 3); + static_assert_uimm_bits!(LANE, 4); unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), @@ -12571,7 +12571,7 @@ pub unsafe fn vld4q_lane_f64(a: *const f64, b: float64x2x4_t) - #[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vld4q_lane_s8(a: *const i8, b: int8x16x4_t) -> int8x16x4_t { - static_assert_uimm_bits!(LANE, 3); + static_assert_uimm_bits!(LANE, 4); unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), diff --git a/crates/core_arch/src/aarch64/neon/mod.rs b/crates/core_arch/src/aarch64/neon/mod.rs index 29a278b80d..cab36b9b4b 100644 --- a/crates/core_arch/src/aarch64/neon/mod.rs +++ b/crates/core_arch/src/aarch64/neon/mod.rs @@ -1037,6 +1037,7 @@ mod tests { macro_rules! wide_store_load_roundtrip_fp16 { ($( $name:ident $args:tt);* $(;)?) => { $( + #[cfg_attr(miri, ignore)] #[simd_test(enable = "neon,fp16")] #[cfg(not(target_arch = "arm64ec"))] unsafe fn $name() { @@ -1055,13 +1056,13 @@ mod tests { test_vld1q_f16_x3(f16, 24, float16x8x3_t, vst1q_f16_x3, vld1q_f16_x3); test_vld1q_f16_x4(f16, 32, float16x8x4_t, vst1q_f16_x4, vld1q_f16_x4); - test_vld2_f16_x2(f16, 8, float16x4x2_t, vst2_f16, vld2_f16); - test_vld2_f16_x3(f16, 12, float16x4x3_t, vst3_f16, vld3_f16); - test_vld2_f16_x4(f16, 16, float16x4x4_t, vst4_f16, vld4_f16); + test_vld2_f16(f16, 8, float16x4x2_t, vst2_f16, vld2_f16); + test_vld3_f16(f16, 12, float16x4x3_t, vst3_f16, vld3_f16); + test_vld4_f16(f16, 16, float16x4x4_t, vst4_f16, vld4_f16); - test_vld2q_f16_x2(f16, 16, float16x8x2_t, vst2q_f16, vld2q_f16); - test_vld3q_f16_x3(f16, 24, float16x8x3_t, vst3q_f16, vld3q_f16); - test_vld4q_f16_x4(f16, 32, float16x8x4_t, vst4q_f16, vld4q_f16); + test_vld2q_f16(f16, 16, float16x8x2_t, vst2q_f16, vld2q_f16); + test_vld3q_f16(f16, 24, float16x8x3_t, vst3q_f16, vld3q_f16); + test_vld4q_f16(f16, 32, float16x8x4_t, vst4q_f16, vld4q_f16); } macro_rules! wide_store_load_roundtrip_aes { @@ -1195,101 +1196,149 @@ mod tests { } wide_store_load_roundtrip_neon! { - test_vld2_f32_x2(f32, 4, float32x2x2_t, vst2_f32, vld2_f32); - test_vld2_f32_x3(f32, 6, float32x2x3_t, vst3_f32, vld3_f32); - test_vld2_f32_x4(f32, 8, float32x2x4_t, vst4_f32, vld4_f32); + test_vld2_f32(f32, 4, float32x2x2_t, vst2_f32, vld2_f32); + test_vld3_f32(f32, 6, float32x2x3_t, vst3_f32, vld3_f32); + test_vld4_f32(f32, 8, float32x2x4_t, vst4_f32, vld4_f32); - test_vld2q_f32_x2(f32, 8, float32x4x2_t, vst2q_f32, vld2q_f32); - test_vld3q_f32_x3(f32, 12, float32x4x3_t, vst3q_f32, vld3q_f32); - test_vld4q_f32_x4(f32, 16, float32x4x4_t, vst4q_f32, vld4q_f32); + test_vld2q_f32(f32, 8, float32x4x2_t, vst2q_f32, vld2q_f32); + test_vld3q_f32(f32, 12, float32x4x3_t, vst3q_f32, vld3q_f32); + test_vld4q_f32(f32, 16, float32x4x4_t, vst4q_f32, vld4q_f32); - test_vld2_f64_x2(f64, 2, float64x1x2_t, vst2_f64, vld2_f64); - test_vld2_f64_x3(f64, 3, float64x1x3_t, vst3_f64, vld3_f64); - test_vld2_f64_x4(f64, 4, float64x1x4_t, vst4_f64, vld4_f64); + test_vld2_f64(f64, 2, float64x1x2_t, vst2_f64, vld2_f64); + test_vld3_f64(f64, 3, float64x1x3_t, vst3_f64, vld3_f64); + test_vld4_f64(f64, 4, float64x1x4_t, vst4_f64, vld4_f64); - test_vld2q_f64_x2(f64, 4, float64x2x2_t, vst2q_f64, vld2q_f64); - test_vld3q_f64_x3(f64, 6, float64x2x3_t, vst3q_f64, vld3q_f64); - test_vld4q_f64_x4(f64, 8, float64x2x4_t, vst4q_f64, vld4q_f64); + test_vld2q_f64(f64, 4, float64x2x2_t, vst2q_f64, vld2q_f64); + test_vld3q_f64(f64, 6, float64x2x3_t, vst3q_f64, vld3q_f64); + test_vld4q_f64(f64, 8, float64x2x4_t, vst4q_f64, vld4q_f64); - test_vld2_s8_x2(i8, 16, int8x8x2_t, vst2_s8, vld2_s8); - test_vld2_s8_x3(i8, 24, int8x8x3_t, vst3_s8, vld3_s8); - test_vld2_s8_x4(i8, 32, int8x8x4_t, vst4_s8, vld4_s8); + test_vld2_s8(i8, 16, int8x8x2_t, vst2_s8, vld2_s8); + test_vld3_s8(i8, 24, int8x8x3_t, vst3_s8, vld3_s8); + test_vld4_s8(i8, 32, int8x8x4_t, vst4_s8, vld4_s8); - test_vld2q_s8_x2(i8, 32, int8x16x2_t, vst2q_s8, vld2q_s8); - test_vld3q_s8_x3(i8, 48, int8x16x3_t, vst3q_s8, vld3q_s8); - test_vld4q_s8_x4(i8, 64, int8x16x4_t, vst4q_s8, vld4q_s8); + test_vld2q_s8(i8, 32, int8x16x2_t, vst2q_s8, vld2q_s8); + test_vld3q_s8(i8, 48, int8x16x3_t, vst3q_s8, vld3q_s8); + test_vld4q_s8(i8, 64, int8x16x4_t, vst4q_s8, vld4q_s8); - test_vld2_s16_x2(i16, 8, int16x4x2_t, vst2_s16, vld2_s16); - test_vld2_s16_x3(i16, 12, int16x4x3_t, vst3_s16, vld3_s16); - test_vld2_s16_x4(i16, 16, int16x4x4_t, vst4_s16, vld4_s16); + test_vld2_s16(i16, 8, int16x4x2_t, vst2_s16, vld2_s16); + test_vld3_s16(i16, 12, int16x4x3_t, vst3_s16, vld3_s16); + test_vld4_s16(i16, 16, int16x4x4_t, vst4_s16, vld4_s16); - test_vld2q_s16_x2(i16, 16, int16x8x2_t, vst2q_s16, vld2q_s16); - test_vld3q_s16_x3(i16, 24, int16x8x3_t, vst3q_s16, vld3q_s16); - test_vld4q_s16_x4(i16, 32, int16x8x4_t, vst4q_s16, vld4q_s16); + test_vld2q_s16(i16, 16, int16x8x2_t, vst2q_s16, vld2q_s16); + test_vld3q_s16(i16, 24, int16x8x3_t, vst3q_s16, vld3q_s16); + test_vld4q_s16(i16, 32, int16x8x4_t, vst4q_s16, vld4q_s16); - test_vld2_s32_x2(i32, 4, int32x2x2_t, vst2_s32, vld2_s32); - test_vld2_s32_x3(i32, 6, int32x2x3_t, vst3_s32, vld3_s32); - test_vld2_s32_x4(i32, 8, int32x2x4_t, vst4_s32, vld4_s32); + test_vld2_s32(i32, 4, int32x2x2_t, vst2_s32, vld2_s32); + test_vld3_s32(i32, 6, int32x2x3_t, vst3_s32, vld3_s32); + test_vld4_s32(i32, 8, int32x2x4_t, vst4_s32, vld4_s32); - test_vld2q_s32_x2(i32, 8, int32x4x2_t, vst2q_s32, vld2q_s32); - test_vld3q_s32_x3(i32, 12, int32x4x3_t, vst3q_s32, vld3q_s32); - test_vld4q_s32_x4(i32, 16, int32x4x4_t, vst4q_s32, vld4q_s32); + test_vld2q_s32(i32, 8, int32x4x2_t, vst2q_s32, vld2q_s32); + test_vld3q_s32(i32, 12, int32x4x3_t, vst3q_s32, vld3q_s32); + test_vld4q_s32(i32, 16, int32x4x4_t, vst4q_s32, vld4q_s32); - test_vld2_s64_x2(i64, 2, int64x1x2_t, vst2_s64, vld2_s64); - test_vld2_s64_x3(i64, 3, int64x1x3_t, vst3_s64, vld3_s64); - test_vld2_s64_x4(i64, 4, int64x1x4_t, vst4_s64, vld4_s64); + test_vld2_s64(i64, 2, int64x1x2_t, vst2_s64, vld2_s64); + test_vld3_s64(i64, 3, int64x1x3_t, vst3_s64, vld3_s64); + test_vld4_s64(i64, 4, int64x1x4_t, vst4_s64, vld4_s64); - test_vld2q_s64_x2(i64, 4, int64x2x2_t, vst2q_s64, vld2q_s64); - test_vld3q_s64_x3(i64, 6, int64x2x3_t, vst3q_s64, vld3q_s64); - test_vld4q_s64_x4(i64, 8, int64x2x4_t, vst4q_s64, vld4q_s64); + test_vld2q_s64(i64, 4, int64x2x2_t, vst2q_s64, vld2q_s64); + test_vld3q_s64(i64, 6, int64x2x3_t, vst3q_s64, vld3q_s64); + test_vld4q_s64(i64, 8, int64x2x4_t, vst4q_s64, vld4q_s64); - test_vld2_u8_x2(u8, 16, uint8x8x2_t, vst2_u8, vld2_u8); - test_vld2_u8_x3(u8, 24, uint8x8x3_t, vst3_u8, vld3_u8); - test_vld2_u8_x4(u8, 32, uint8x8x4_t, vst4_u8, vld4_u8); + test_vld2_u8(u8, 16, uint8x8x2_t, vst2_u8, vld2_u8); + test_vld3_u8(u8, 24, uint8x8x3_t, vst3_u8, vld3_u8); + test_vld4_u8(u8, 32, uint8x8x4_t, vst4_u8, vld4_u8); - test_vld2q_u8_x2(u8, 32, uint8x16x2_t, vst2q_u8, vld2q_u8); - test_vld3q_u8_x3(u8, 48, uint8x16x3_t, vst3q_u8, vld3q_u8); - test_vld4q_u8_x4(u8, 64, uint8x16x4_t, vst4q_u8, vld4q_u8); + test_vld2q_u8(u8, 32, uint8x16x2_t, vst2q_u8, vld2q_u8); + test_vld3q_u8(u8, 48, uint8x16x3_t, vst3q_u8, vld3q_u8); + test_vld4q_u8(u8, 64, uint8x16x4_t, vst4q_u8, vld4q_u8); - test_vld2_u16_x2(u16, 8, uint16x4x2_t, vst2_u16, vld2_u16); - test_vld2_u16_x3(u16, 12, uint16x4x3_t, vst3_u16, vld3_u16); - test_vld2_u16_x4(u16, 16, uint16x4x4_t, vst4_u16, vld4_u16); + test_vld2_u16(u16, 8, uint16x4x2_t, vst2_u16, vld2_u16); + test_vld3_u16(u16, 12, uint16x4x3_t, vst3_u16, vld3_u16); + test_vld4_u16(u16, 16, uint16x4x4_t, vst4_u16, vld4_u16); - test_vld2q_u16_x2(u16, 16, uint16x8x2_t, vst2q_u16, vld2q_u16); - test_vld3q_u16_x3(u16, 24, uint16x8x3_t, vst3q_u16, vld3q_u16); - test_vld4q_u16_x4(u16, 32, uint16x8x4_t, vst4q_u16, vld4q_u16); + test_vld2q_u16(u16, 16, uint16x8x2_t, vst2q_u16, vld2q_u16); + test_vld3q_u16(u16, 24, uint16x8x3_t, vst3q_u16, vld3q_u16); + test_vld4q_u16(u16, 32, uint16x8x4_t, vst4q_u16, vld4q_u16); - test_vld2_u32_x2(u32, 4, uint32x2x2_t, vst2_u32, vld2_u32); - test_vld2_u32_x3(u32, 6, uint32x2x3_t, vst3_u32, vld3_u32); - test_vld2_u32_x4(u32, 8, uint32x2x4_t, vst4_u32, vld4_u32); + test_vld2_u32(u32, 4, uint32x2x2_t, vst2_u32, vld2_u32); + test_vld3_u32(u32, 6, uint32x2x3_t, vst3_u32, vld3_u32); + test_vld4_u32(u32, 8, uint32x2x4_t, vst4_u32, vld4_u32); - test_vld2q_u32_x2(u32, 8, uint32x4x2_t, vst2q_u32, vld2q_u32); - test_vld3q_u32_x3(u32, 12, uint32x4x3_t, vst3q_u32, vld3q_u32); - test_vld4q_u32_x4(u32, 16, uint32x4x4_t, vst4q_u32, vld4q_u32); + test_vld2q_u32(u32, 8, uint32x4x2_t, vst2q_u32, vld2q_u32); + test_vld3q_u32(u32, 12, uint32x4x3_t, vst3q_u32, vld3q_u32); + test_vld4q_u32(u32, 16, uint32x4x4_t, vst4q_u32, vld4q_u32); - test_vld2_u64_x2(u64, 2, uint64x1x2_t, vst2_u64, vld2_u64); - test_vld2_u64_x3(u64, 3, uint64x1x3_t, vst3_u64, vld3_u64); - test_vld2_u64_x4(u64, 4, uint64x1x4_t, vst4_u64, vld4_u64); + test_vld2_u64(u64, 2, uint64x1x2_t, vst2_u64, vld2_u64); + test_vld3_u64(u64, 3, uint64x1x3_t, vst3_u64, vld3_u64); + test_vld4_u64(u64, 4, uint64x1x4_t, vst4_u64, vld4_u64); - test_vld2q_u64_x2(u64, 4, uint64x2x2_t, vst2q_u64, vld2q_u64); - test_vld3q_u64_x3(u64, 6, uint64x2x3_t, vst3q_u64, vld3q_u64); - test_vld4q_u64_x4(u64, 8, uint64x2x4_t, vst4q_u64, vld4q_u64); + test_vld2q_u64(u64, 4, uint64x2x2_t, vst2q_u64, vld2q_u64); + test_vld3q_u64(u64, 6, uint64x2x3_t, vst3q_u64, vld3q_u64); + test_vld4q_u64(u64, 8, uint64x2x4_t, vst4q_u64, vld4q_u64); - test_vld2_p8_x2(p8, 16, poly8x8x2_t, vst2_p8, vld2_p8); - test_vld2_p8_x3(p8, 24, poly8x8x3_t, vst3_p8, vld3_p8); - test_vld2_p8_x4(p8, 32, poly8x8x4_t, vst4_p8, vld4_p8); + test_vld2_p8(p8, 16, poly8x8x2_t, vst2_p8, vld2_p8); + test_vld3_p8(p8, 24, poly8x8x3_t, vst3_p8, vld3_p8); + test_vld4_p8(p8, 32, poly8x8x4_t, vst4_p8, vld4_p8); - test_vld2q_p8_x2(p8, 32, poly8x16x2_t, vst2q_p8, vld2q_p8); - test_vld3q_p8_x3(p8, 48, poly8x16x3_t, vst3q_p8, vld3q_p8); - test_vld4q_p8_x4(p8, 64, poly8x16x4_t, vst4q_p8, vld4q_p8); + test_vld2q_p8(p8, 32, poly8x16x2_t, vst2q_p8, vld2q_p8); + test_vld3q_p8(p8, 48, poly8x16x3_t, vst3q_p8, vld3q_p8); + test_vld4q_p8(p8, 64, poly8x16x4_t, vst4q_p8, vld4q_p8); - test_vld2_p16_x2(p16, 8, poly16x4x2_t, vst2_p16, vld2_p16); - test_vld2_p16_x3(p16, 12, poly16x4x3_t, vst3_p16, vld3_p16); - test_vld2_p16_x4(p16, 16, poly16x4x4_t, vst4_p16, vld4_p16); + test_vld2_p16(p16, 8, poly16x4x2_t, vst2_p16, vld2_p16); + test_vld3_p16(p16, 12, poly16x4x3_t, vst3_p16, vld3_p16); + test_vld4_p16(p16, 16, poly16x4x4_t, vst4_p16, vld4_p16); - test_vld2q_p16_x2(p16, 16, poly16x8x2_t, vst2q_p16, vld2q_p16); - test_vld3q_p16_x3(p16, 24, poly16x8x3_t, vst3q_p16, vld3q_p16); - test_vld4q_p16_x4(p16, 32, poly16x8x4_t, vst4q_p16, vld4q_p16); + test_vld2q_p16(p16, 16, poly16x8x2_t, vst2q_p16, vld2q_p16); + test_vld3q_p16(p16, 24, poly16x8x3_t, vst3q_p16, vld3q_p16); + test_vld4q_p16(p16, 32, poly16x8x4_t, vst4q_p16, vld4q_p16); + } + + macro_rules! lane_wide_store_load_roundtrip { + ($elem_ty:ty, $len:expr, $idx:expr, $vec_ty:ty, $store:ident, $load:ident) => { + let vals: [$elem_ty; $len] = crate::array::from_fn(|i| i as $elem_ty); + let a: $vec_ty = transmute(vals); + let mut tmp = [0 as $elem_ty; 4]; + $store::<$idx>(tmp.as_mut_ptr().cast(), a); + let r: $vec_ty = $load::<$idx>(tmp.as_ptr().cast(), a); + let out: [$elem_ty; $len] = transmute(r); + assert_eq!(out, vals); + }; + } + + macro_rules! lane_wide_store_load_roundtrip_neon { + ($( $name:ident $args:tt);* $(;)?) => { + $( + #[cfg_attr(miri, ignore)] + #[simd_test(enable = "neon")] + unsafe fn $name() { + lane_wide_store_load_roundtrip! $args; + } + )* + }; + } + + lane_wide_store_load_roundtrip_neon! { + test_vld2q_lane_s8(i8, 32, 15, int8x16x2_t, vst2q_lane_s8, vld2q_lane_s8); + test_vld3q_lane_s8(i8, 48, 15, int8x16x3_t, vst3q_lane_s8, vld3q_lane_s8); + test_vld4q_lane_s8(i8, 64, 15, int8x16x4_t, vst4q_lane_s8, vld4q_lane_s8); + + test_vld2q_lane_u8(u8, 32, 15, uint8x16x2_t, vst2q_lane_u8, vld2q_lane_u8); + test_vld3q_lane_u8(u8, 48, 15, uint8x16x3_t, vst3q_lane_u8, vld3q_lane_u8); + test_vld4q_lane_u8(u8, 64, 15, uint8x16x4_t, vst4q_lane_u8, vld4q_lane_u8); + + test_vld2_lane_s64(i64, 2, 0, int64x1x2_t, vst2_lane_s64, vld2_lane_s64); + test_vld3_lane_s64(i64, 3, 0, int64x1x3_t, vst3_lane_s64, vld3_lane_s64); + test_vld4_lane_s64(i64, 4, 0, int64x1x4_t, vst4_lane_s64, vld4_lane_s64); + test_vld2q_lane_s64(i64, 4, 1, int64x2x2_t, vst2q_lane_s64, vld2q_lane_s64); + test_vld3q_lane_s64(i64, 6, 1, int64x2x3_t, vst3q_lane_s64, vld3q_lane_s64); + test_vld4q_lane_s64(i64, 8, 1, int64x2x4_t, vst4q_lane_s64, vld4q_lane_s64); + + test_vld2_lane_u64(u64, 2, 0, uint64x1x2_t, vst2_lane_u64, vld2_lane_u64); + test_vld3_lane_u64(u64, 3, 0, uint64x1x3_t, vst3_lane_u64, vld3_lane_u64); + test_vld4_lane_u64(u64, 4, 0, uint64x1x4_t, vst4_lane_u64, vld4_lane_u64); + test_vld2q_lane_u64(u64, 4, 1, uint64x2x2_t, vst2q_lane_u64, vld2q_lane_u64); + test_vld3q_lane_u64(u64, 6, 1, uint64x2x3_t, vst3q_lane_u64, vld3q_lane_u64); + test_vld4q_lane_u64(u64, 8, 1, uint64x2x4_t, vst4q_lane_u64, vld4q_lane_u64); } } diff --git a/crates/core_arch/src/arm_shared/neon/generated.rs b/crates/core_arch/src/arm_shared/neon/generated.rs index cf4d10162e..663cba151e 100644 --- a/crates/core_arch/src/arm_shared/neon/generated.rs +++ b/crates/core_arch/src/arm_shared/neon/generated.rs @@ -66192,7 +66192,7 @@ pub unsafe fn vst2_lane_f16(a: *mut f16, b: float16x4x2_t) { #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] pub unsafe fn vst2q_lane_f16(a: *mut f16, b: float16x8x2_t) { - static_assert_uimm_bits!(LANE, 1); + static_assert_uimm_bits!(LANE, 3); unsafe extern "unadjusted" { #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.p0.v8f16")] fn _vst2q_lane_f16(ptr: *mut i8, a: float16x8_t, b: float16x8_t, n: i32, size: i32); diff --git a/crates/core_arch/src/arm_shared/neon/load_tests.rs b/crates/core_arch/src/arm_shared/neon/load_tests.rs index cc821b4af2..70a37f7c05 100644 --- a/crates/core_arch/src/arm_shared/neon/load_tests.rs +++ b/crates/core_arch/src/arm_shared/neon/load_tests.rs @@ -190,6 +190,24 @@ fn test_vld1q_p64() { assert_eq!(r, e) } +#[cfg(not(target_arch = "arm64ec"))] +#[simd_test(enable = "neon,fp16")] +fn test_vld1_f16() { + let a: [f16; 5] = [0., 1., 2., 3., 4.]; + let e = f16x4::new(1., 2., 3., 4.); + let r = unsafe { f16x4::from(vld1_f16(a[1..].as_ptr())) }; + assert_eq!(r, e) +} + +#[cfg(not(target_arch = "arm64ec"))] +#[simd_test(enable = "neon,fp16")] +fn test_vld1q_f16() { + let a: [f16; 9] = [0., 1., 2., 3., 4., 5., 6., 7., 8.]; + let e = f16x8::new(1., 2., 3., 4., 5., 6., 7., 8.); + let r = unsafe { f16x8::from(vld1q_f16(a[1..].as_ptr())) }; + assert_eq!(r, e) +} + #[simd_test(enable = "neon")] fn test_vld1_f32() { let a: [f32; 3] = [0., 1., 2.]; diff --git a/crates/core_arch/src/arm_shared/neon/mod.rs b/crates/core_arch/src/arm_shared/neon/mod.rs index 8a4a6e9228..ed65de2b89 100644 --- a/crates/core_arch/src/arm_shared/neon/mod.rs +++ b/crates/core_arch/src/arm_shared/neon/mod.rs @@ -5793,8 +5793,7 @@ mod tests { #[cfg(not(target_arch = "arm64ec"))] mod fp16 { use super::*; - #[cfg_attr(target_arch = "arm", simd_test(enable = "neon,fp16"))] - #[cfg_attr(not(target_arch = "arm"), simd_test(enable = "neon"))] + #[simd_test(enable = "neon,fp16")] fn test_vcombine_f16() { let a = f16x4::from_array([3_f16, 4., 5., 6.]); let b = f16x4::from_array([13_f16, 14., 15., 16.]); @@ -5802,6 +5801,40 @@ mod tests { let c = f16x8::from(vcombine_f16(a.into(), b.into())); assert_eq!(c, e); } + + #[simd_test(enable = "neon,fp16")] + fn test_vld1_lane_f16() { + let a = f16x4::new(0., 1., 2., 3.); + let elem: f16 = 42.; + let e = f16x4::new(0., 1., 2., 42.); + let r = unsafe { f16x4::from(vld1_lane_f16::<3>(&elem, a.into())) }; + assert_eq!(r, e) + } + + #[simd_test(enable = "neon,fp16")] + fn test_vld1q_lane_f16() { + let a = f16x8::new(0., 1., 2., 3., 4., 5., 6., 7.); + let elem: f16 = 42.; + let e = f16x8::new(0., 1., 2., 3., 4., 5., 6., 42.); + let r = unsafe { f16x8::from(vld1q_lane_f16::<7>(&elem, a.into())) }; + assert_eq!(r, e) + } + + #[simd_test(enable = "neon,fp16")] + fn test_vld1_dup_f16() { + let elem: f16 = 42.; + let e = f16x4::new(42., 42., 42., 42.); + let r = unsafe { f16x4::from(vld1_dup_f16(&elem)) }; + assert_eq!(r, e) + } + + #[simd_test(enable = "neon,fp16")] + fn test_vld1q_dup_f16() { + let elem: f16 = 42.; + let e = f16x8::new(42., 42., 42., 42., 42., 42., 42., 42.); + let r = unsafe { f16x8::from(vld1q_dup_f16(&elem)) }; + assert_eq!(r, e) + } } test_vcombine!(test_vcombine_s32 => vcombine_s32([3_i32, -4], [13_i32, -14])); @@ -5814,6 +5847,98 @@ mod tests { test_vcombine!(test_vcombine_p64 => vcombine_p64([3_u64], [13_u64])); #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] test_vcombine!(test_vcombine_f64 => vcombine_f64([-3_f64], [13_f64])); + + macro_rules! lane_wide_store_load_roundtrip { + ($elem_ty:ty, $len:expr, $idx:expr, $vec_ty:ty, $store:ident, $load:ident) => { + let vals: [$elem_ty; $len] = crate::array::from_fn(|i| i as $elem_ty); + let a: $vec_ty = transmute(vals); + let mut tmp = [0 as $elem_ty; 4]; + $store::<$idx>(tmp.as_mut_ptr().cast(), a); + let r: $vec_ty = $load::<$idx>(tmp.as_ptr().cast(), a); + let out: [$elem_ty; $len] = transmute(r); + assert_eq!(out, vals); + }; + } + + // Most of these are implemented with builtins, which miri can't handle + macro_rules! lane_wide_store_load_roundtrip_neon { + ($( $name:ident $args:tt);* $(;)?) => { + $( + #[cfg_attr(miri, ignore)] + #[simd_test(enable = "neon")] + unsafe fn $name() { + lane_wide_store_load_roundtrip! $args; + } + )* + }; + } + + macro_rules! lane_wide_store_load_roundtrip_fp16 { + ($( $name:ident $args:tt);* $(;)?) => { + $( + #[cfg_attr(miri, ignore)] + #[simd_test(enable = "neon,fp16")] + #[cfg(not(target_arch = "arm64ec"))] + unsafe fn $name() { + lane_wide_store_load_roundtrip! $args; + } + )* + }; + } + + lane_wide_store_load_roundtrip_neon! { + test_vld2_lane_s8(i8, 16, 7, int8x8x2_t, vst2_lane_s8, vld2_lane_s8); + test_vld3_lane_s8(i8, 24, 7, int8x8x3_t, vst3_lane_s8, vld3_lane_s8); + test_vld4_lane_s8(i8, 32, 7, int8x8x4_t, vst4_lane_s8, vld4_lane_s8); + + test_vld2_lane_u8(u8, 16, 7, uint8x8x2_t, vst2_lane_u8, vld2_lane_u8); + test_vld3_lane_u8(u8, 24, 7, uint8x8x3_t, vst3_lane_u8, vld3_lane_u8); + test_vld4_lane_u8(u8, 32, 7, uint8x8x4_t, vst4_lane_u8, vld4_lane_u8); + + test_vld2_lane_s16(i16, 8, 3, int16x4x2_t, vst2_lane_s16, vld2_lane_s16); + test_vld3_lane_s16(i16, 12, 3, int16x4x3_t, vst3_lane_s16, vld3_lane_s16); + test_vld4_lane_s16(i16, 16, 3, int16x4x4_t, vst4_lane_s16, vld4_lane_s16); + test_vld2q_lane_s16(i16, 16, 7, int16x8x2_t, vst2q_lane_s16, vld2q_lane_s16); + test_vld3q_lane_s16(i16, 24, 7, int16x8x3_t, vst3q_lane_s16, vld3q_lane_s16); + test_vld4q_lane_s16(i16, 32, 7, int16x8x4_t, vst4q_lane_s16, vld4q_lane_s16); + + test_vld2_lane_u16(u16, 8, 3, uint16x4x2_t, vst2_lane_u16, vld2_lane_u16); + test_vld3_lane_u16(u16, 12, 3, uint16x4x3_t, vst3_lane_u16, vld3_lane_u16); + test_vld4_lane_u16(u16, 16, 3, uint16x4x4_t, vst4_lane_u16, vld4_lane_u16); + test_vld2q_lane_u16(u16, 16, 7, uint16x8x2_t, vst2q_lane_u16, vld2q_lane_u16); + test_vld3q_lane_u16(u16, 24, 7, uint16x8x3_t, vst3q_lane_u16, vld3q_lane_u16); + test_vld4q_lane_u16(u16, 32, 7, uint16x8x4_t, vst4q_lane_u16, vld4q_lane_u16); + + test_vld2_lane_s32(i32, 4, 1, int32x2x2_t, vst2_lane_s32, vld2_lane_s32); + test_vld3_lane_s32(i32, 6, 1, int32x2x3_t, vst3_lane_s32, vld3_lane_s32); + test_vld4_lane_s32(i32, 8, 1, int32x2x4_t, vst4_lane_s32, vld4_lane_s32); + test_vld2q_lane_s32(i32, 8, 3, int32x4x2_t, vst2q_lane_s32, vld2q_lane_s32); + test_vld3q_lane_s32(i32, 12, 3, int32x4x3_t, vst3q_lane_s32, vld3q_lane_s32); + test_vld4q_lane_s32(i32, 16, 3, int32x4x4_t, vst4q_lane_s32, vld4q_lane_s32); + + test_vld2_lane_u32(u32, 4, 1, uint32x2x2_t, vst2_lane_u32, vld2_lane_u32); + test_vld3_lane_u32(u32, 6, 1, uint32x2x3_t, vst3_lane_u32, vld3_lane_u32); + test_vld4_lane_u32(u32, 8, 1, uint32x2x4_t, vst4_lane_u32, vld4_lane_u32); + test_vld2q_lane_u32(u32, 8, 3, uint32x4x2_t, vst2q_lane_u32, vld2q_lane_u32); + test_vld3q_lane_u32(u32, 12, 3, uint32x4x3_t, vst3q_lane_u32, vld3q_lane_u32); + test_vld4q_lane_u32(u32, 16, 3, uint32x4x4_t, vst4q_lane_u32, vld4q_lane_u32); + + test_vld2_lane_f32(f32, 4, 1, float32x2x2_t, vst2_lane_f32, vld2_lane_f32); + test_vld3_lane_f32(f32, 6, 1, float32x2x3_t, vst3_lane_f32, vld3_lane_f32); + test_vld4_lane_f32(f32, 8, 1, float32x2x4_t, vst4_lane_f32, vld4_lane_f32); + test_vld2q_lane_f32(f32, 8, 3, float32x4x2_t, vst2q_lane_f32, vld2q_lane_f32); + test_vld3q_lane_f32(f32, 12, 3, float32x4x3_t, vst3q_lane_f32, vld3q_lane_f32); + test_vld4q_lane_f32(f32, 16, 3, float32x4x4_t, vst4q_lane_f32, vld4q_lane_f32); + } + + lane_wide_store_load_roundtrip_fp16! { + test_vld2_lane_f16(f16, 8, 3, float16x4x2_t, vst2_lane_f16, vld2_lane_f16); + test_vld3_lane_f16(f16, 12, 3, float16x4x3_t, vst3_lane_f16, vld3_lane_f16); + test_vld4_lane_f16(f16, 16, 3, float16x4x4_t, vst4_lane_f16, vld4_lane_f16); + test_vld2q_lane_f16(f16, 16, 7, float16x8x2_t, vst2q_lane_f16, vld2q_lane_f16); + test_vld3q_lane_f16(f16, 24, 7, float16x8x3_t, vst3q_lane_f16, vld3q_lane_f16); + test_vld4q_lane_f16(f16, 32, 7, float16x8x4_t, vst4q_lane_f16, vld4q_lane_f16); + } } #[cfg(all(test, target_arch = "arm"))] diff --git a/crates/core_arch/src/arm_shared/neon/store_tests.rs b/crates/core_arch/src/arm_shared/neon/store_tests.rs index 2b10b38f2d..6eb60e4c78 100644 --- a/crates/core_arch/src/arm_shared/neon/store_tests.rs +++ b/crates/core_arch/src/arm_shared/neon/store_tests.rs @@ -406,6 +406,44 @@ fn test_vst1q_p64() { assert_eq!(vals[2], 2); } +#[cfg(not(target_arch = "arm64ec"))] +#[simd_test(enable = "neon,fp16")] +fn test_vst1_f16() { + let mut vals = [0_f16; 5]; + let a = f16x4::new(1., 2., 3., 4.); + + unsafe { + vst1_f16(vals[1..].as_mut_ptr(), a.into()); + } + + assert_eq!(vals[0], 0.); + assert_eq!(vals[1], 1.); + assert_eq!(vals[2], 2.); + assert_eq!(vals[3], 3.); + assert_eq!(vals[4], 4.); +} + +#[cfg(not(target_arch = "arm64ec"))] +#[simd_test(enable = "neon,fp16")] +fn test_vst1q_f16() { + let mut vals = [0_f16; 9]; + let a = f16x8::new(1., 2., 3., 4., 5., 6., 7., 8.); + + unsafe { + vst1q_f16(vals[1..].as_mut_ptr(), a.into()); + } + + assert_eq!(vals[0], 0.); + assert_eq!(vals[1], 1.); + assert_eq!(vals[2], 2.); + assert_eq!(vals[3], 3.); + assert_eq!(vals[4], 4.); + assert_eq!(vals[5], 5.); + assert_eq!(vals[6], 6.); + assert_eq!(vals[7], 7.); + assert_eq!(vals[8], 8.); +} + #[simd_test(enable = "neon")] fn test_vst1_f32() { let mut vals = [0_f32; 3]; diff --git a/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml index a769d35264..990fd7d498 100644 --- a/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml +++ b/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml @@ -3822,7 +3822,7 @@ intrinsics: safety: unsafe: [neon] types: - - ['*const i8', int8x16x3_t, int8x16_t, i8, '3'] + - ['*const i8', int8x16x3_t, int8x16_t, i8, '4'] - ['*const i64', int64x2x3_t, int64x2_t, i64, '1'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, '{type[4]}']] @@ -4246,7 +4246,7 @@ intrinsics: safety: unsafe: [neon] types: - - ['*const i8', int8x16x4_t, int8x16_t, i8, '3'] + - ['*const i8', int8x16x4_t, int8x16_t, i8, '4'] - ['*const i64', int64x2x4_t, int64x2_t, i64, '1'] - ['*const f64', float64x2x4_t, float64x2_t, f64, '1'] compose: diff --git a/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml b/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml index f6ef7f17d7..2d29d95f0a 100644 --- a/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml +++ b/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml @@ -5368,7 +5368,7 @@ intrinsics: unsafe: [neon] types: - [f16, float16x4x2_t, '2', float16x4_t, '2'] - - [f16, float16x8x2_t, '1', float16x8_t, '2'] + - [f16, float16x8x2_t, '3', float16x8_t, '2'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, "{type[2]}"]] - LLVMLink: From 2d49c7b69aaaf2647558348a2d74ff2c894c3420 Mon Sep 17 00:00:00 2001 From: Adam Gemmell Date: Wed, 15 Apr 2026 16:52:13 +0100 Subject: [PATCH 2/2] Generate some svldff1 tests now that the qemu bug has been fixed --- .../src/aarch64/sve/ld_st_tests_aarch64.rs | 2348 +++++++++++++++-- .../stdarch-gen-arm/src/load_store_tests.rs | 7 - 2 files changed, 2119 insertions(+), 236 deletions(-) diff --git a/crates/core_arch/src/aarch64/sve/ld_st_tests_aarch64.rs b/crates/core_arch/src/aarch64/sve/ld_st_tests_aarch64.rs index 973b7e9fa3..3007ba4ee6 100644 --- a/crates/core_arch/src/aarch64/sve/ld_st_tests_aarch64.rs +++ b/crates/core_arch/src/aarch64/sve/ld_st_tests_aarch64.rs @@ -7067,304 +7067,606 @@ unsafe fn test_svldff1_u64() { ); } #[simd_test(enable = "sve")] -unsafe fn test_svldff1_vnum_f32() { +unsafe fn test_svldff1_gather_s32index_f32() { + let indices = svindex_s32(0, 1); svsetffr(); - let _ = svld1_vnum_f32(svptrue_b32(), F32_DATA.as_ptr(), 1); - let loaded = svldff1_vnum_f32(svptrue_b32(), F32_DATA.as_ptr(), 1); - let len = svcntw() as usize; + let _ = svld1_gather_s32index_f32(svptrue_b32(), F32_DATA.as_ptr(), indices); + let loaded = svldff1_gather_s32index_f32(svptrue_b32(), F32_DATA.as_ptr(), indices); assert_vector_matches_f32( loaded, svcvt_f32_s32_x( svptrue_b32(), - svindex_s32( - (len + 0usize).try_into().unwrap(), - 1usize.try_into().unwrap(), - ), + svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), ), ); } #[simd_test(enable = "sve")] -unsafe fn test_svldff1_vnum_f64() { +unsafe fn test_svldff1_gather_s32index_s32() { + let indices = svindex_s32(0, 1); svsetffr(); - let _ = svld1_vnum_f64(svptrue_b64(), F64_DATA.as_ptr(), 1); - let loaded = svldff1_vnum_f64(svptrue_b64(), F64_DATA.as_ptr(), 1); - let len = svcntd() as usize; + let _ = svld1_gather_s32index_s32(svptrue_b32(), I32_DATA.as_ptr(), indices); + let loaded = svldff1_gather_s32index_s32(svptrue_b32(), I32_DATA.as_ptr(), indices); + assert_vector_matches_i32( + loaded, + svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1_gather_s32index_u32() { + let indices = svindex_s32(0, 1); + svsetffr(); + let _ = svld1_gather_s32index_u32(svptrue_b32(), U32_DATA.as_ptr(), indices); + let loaded = svldff1_gather_s32index_u32(svptrue_b32(), U32_DATA.as_ptr(), indices); + assert_vector_matches_u32( + loaded, + svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1_gather_s64index_f64() { + let indices = svindex_s64(0, 1); + svsetffr(); + let _ = svld1_gather_s64index_f64(svptrue_b64(), F64_DATA.as_ptr(), indices); + let loaded = svldff1_gather_s64index_f64(svptrue_b64(), F64_DATA.as_ptr(), indices); assert_vector_matches_f64( loaded, svcvt_f64_s64_x( svptrue_b64(), - svindex_s64( - (len + 0usize).try_into().unwrap(), - 1usize.try_into().unwrap(), - ), + svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), ), ); } #[simd_test(enable = "sve")] -unsafe fn test_svldff1_vnum_s8() { +unsafe fn test_svldff1_gather_s64index_s64() { + let indices = svindex_s64(0, 1); svsetffr(); - let _ = svld1_vnum_s8(svptrue_b8(), I8_DATA.as_ptr(), 1); - let loaded = svldff1_vnum_s8(svptrue_b8(), I8_DATA.as_ptr(), 1); - let len = svcntb() as usize; - assert_vector_matches_i8( + let _ = svld1_gather_s64index_s64(svptrue_b64(), I64_DATA.as_ptr(), indices); + let loaded = svldff1_gather_s64index_s64(svptrue_b64(), I64_DATA.as_ptr(), indices); + assert_vector_matches_i64( loaded, - svindex_s8( - (len + 0usize).try_into().unwrap(), - 1usize.try_into().unwrap(), - ), + svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), ); } #[simd_test(enable = "sve")] -unsafe fn test_svldff1_vnum_s16() { +unsafe fn test_svldff1_gather_s64index_u64() { + let indices = svindex_s64(0, 1); svsetffr(); - let _ = svld1_vnum_s16(svptrue_b16(), I16_DATA.as_ptr(), 1); - let loaded = svldff1_vnum_s16(svptrue_b16(), I16_DATA.as_ptr(), 1); - let len = svcnth() as usize; - assert_vector_matches_i16( + let _ = svld1_gather_s64index_u64(svptrue_b64(), U64_DATA.as_ptr(), indices); + let loaded = svldff1_gather_s64index_u64(svptrue_b64(), U64_DATA.as_ptr(), indices); + assert_vector_matches_u64( loaded, - svindex_s16( - (len + 0usize).try_into().unwrap(), - 1usize.try_into().unwrap(), - ), + svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), ); } #[simd_test(enable = "sve")] -unsafe fn test_svldff1_vnum_s32() { +unsafe fn test_svldff1_gather_u32index_f32() { + let indices = svindex_u32(0, 1); svsetffr(); - let _ = svld1_vnum_s32(svptrue_b32(), I32_DATA.as_ptr(), 1); - let loaded = svldff1_vnum_s32(svptrue_b32(), I32_DATA.as_ptr(), 1); - let len = svcntw() as usize; - assert_vector_matches_i32( + let _ = svld1_gather_u32index_f32(svptrue_b32(), F32_DATA.as_ptr(), indices); + let loaded = svldff1_gather_u32index_f32(svptrue_b32(), F32_DATA.as_ptr(), indices); + assert_vector_matches_f32( loaded, - svindex_s32( - (len + 0usize).try_into().unwrap(), - 1usize.try_into().unwrap(), + svcvt_f32_s32_x( + svptrue_b32(), + svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), ), ); } #[simd_test(enable = "sve")] -unsafe fn test_svldff1_vnum_s64() { +unsafe fn test_svldff1_gather_u32index_s32() { + let indices = svindex_u32(0, 1); svsetffr(); - let _ = svld1_vnum_s64(svptrue_b64(), I64_DATA.as_ptr(), 1); - let loaded = svldff1_vnum_s64(svptrue_b64(), I64_DATA.as_ptr(), 1); - let len = svcntd() as usize; - assert_vector_matches_i64( + let _ = svld1_gather_u32index_s32(svptrue_b32(), I32_DATA.as_ptr(), indices); + let loaded = svldff1_gather_u32index_s32(svptrue_b32(), I32_DATA.as_ptr(), indices); + assert_vector_matches_i32( loaded, - svindex_s64( - (len + 0usize).try_into().unwrap(), - 1usize.try_into().unwrap(), - ), + svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), ); } #[simd_test(enable = "sve")] -unsafe fn test_svldff1_vnum_u8() { +unsafe fn test_svldff1_gather_u32index_u32() { + let indices = svindex_u32(0, 1); svsetffr(); - let _ = svld1_vnum_u8(svptrue_b8(), U8_DATA.as_ptr(), 1); - let loaded = svldff1_vnum_u8(svptrue_b8(), U8_DATA.as_ptr(), 1); - let len = svcntb() as usize; - assert_vector_matches_u8( + let _ = svld1_gather_u32index_u32(svptrue_b32(), U32_DATA.as_ptr(), indices); + let loaded = svldff1_gather_u32index_u32(svptrue_b32(), U32_DATA.as_ptr(), indices); + assert_vector_matches_u32( loaded, - svindex_u8( - (len + 0usize).try_into().unwrap(), - 1usize.try_into().unwrap(), - ), + svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), ); } #[simd_test(enable = "sve")] -unsafe fn test_svldff1_vnum_u16() { +unsafe fn test_svldff1_gather_u64index_f64() { + let indices = svindex_u64(0, 1); svsetffr(); - let _ = svld1_vnum_u16(svptrue_b16(), U16_DATA.as_ptr(), 1); - let loaded = svldff1_vnum_u16(svptrue_b16(), U16_DATA.as_ptr(), 1); - let len = svcnth() as usize; - assert_vector_matches_u16( + let _ = svld1_gather_u64index_f64(svptrue_b64(), F64_DATA.as_ptr(), indices); + let loaded = svldff1_gather_u64index_f64(svptrue_b64(), F64_DATA.as_ptr(), indices); + assert_vector_matches_f64( loaded, - svindex_u16( - (len + 0usize).try_into().unwrap(), - 1usize.try_into().unwrap(), + svcvt_f64_s64_x( + svptrue_b64(), + svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), ), ); } #[simd_test(enable = "sve")] -unsafe fn test_svldff1_vnum_u32() { +unsafe fn test_svldff1_gather_u64index_s64() { + let indices = svindex_u64(0, 1); svsetffr(); - let _ = svld1_vnum_u32(svptrue_b32(), U32_DATA.as_ptr(), 1); - let loaded = svldff1_vnum_u32(svptrue_b32(), U32_DATA.as_ptr(), 1); - let len = svcntw() as usize; - assert_vector_matches_u32( + let _ = svld1_gather_u64index_s64(svptrue_b64(), I64_DATA.as_ptr(), indices); + let loaded = svldff1_gather_u64index_s64(svptrue_b64(), I64_DATA.as_ptr(), indices); + assert_vector_matches_i64( loaded, - svindex_u32( - (len + 0usize).try_into().unwrap(), - 1usize.try_into().unwrap(), - ), + svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), ); } #[simd_test(enable = "sve")] -unsafe fn test_svldff1_vnum_u64() { +unsafe fn test_svldff1_gather_u64index_u64() { + let indices = svindex_u64(0, 1); svsetffr(); - let _ = svld1_vnum_u64(svptrue_b64(), U64_DATA.as_ptr(), 1); - let loaded = svldff1_vnum_u64(svptrue_b64(), U64_DATA.as_ptr(), 1); - let len = svcntd() as usize; + let _ = svld1_gather_u64index_u64(svptrue_b64(), U64_DATA.as_ptr(), indices); + let loaded = svldff1_gather_u64index_u64(svptrue_b64(), U64_DATA.as_ptr(), indices); assert_vector_matches_u64( loaded, - svindex_u64( - (len + 0usize).try_into().unwrap(), - 1usize.try_into().unwrap(), - ), + svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), ); } #[simd_test(enable = "sve")] -unsafe fn test_svldff1sb_s16() { +unsafe fn test_svldff1_gather_s32offset_f32() { + let offsets = svindex_s32(0, 4u32.try_into().unwrap()); svsetffr(); - let _ = svld1sb_s16(svptrue_b8(), I8_DATA.as_ptr()); - let loaded = svldff1sb_s16(svptrue_b8(), I8_DATA.as_ptr()); - assert_vector_matches_i16( + let _ = svld1_gather_s32offset_f32(svptrue_b32(), F32_DATA.as_ptr(), offsets); + let loaded = svldff1_gather_s32offset_f32(svptrue_b32(), F32_DATA.as_ptr(), offsets); + assert_vector_matches_f32( loaded, - svindex_s16((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + svcvt_f32_s32_x( + svptrue_b32(), + svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ), ); } #[simd_test(enable = "sve")] -unsafe fn test_svldff1sb_s32() { +unsafe fn test_svldff1_gather_s32offset_s32() { + let offsets = svindex_s32(0, 4u32.try_into().unwrap()); svsetffr(); - let _ = svld1sb_s32(svptrue_b8(), I8_DATA.as_ptr()); - let loaded = svldff1sb_s32(svptrue_b8(), I8_DATA.as_ptr()); + let _ = svld1_gather_s32offset_s32(svptrue_b32(), I32_DATA.as_ptr(), offsets); + let loaded = svldff1_gather_s32offset_s32(svptrue_b32(), I32_DATA.as_ptr(), offsets); assert_vector_matches_i32( loaded, svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), ); } #[simd_test(enable = "sve")] -unsafe fn test_svldff1sh_s32() { +unsafe fn test_svldff1_gather_s32offset_u32() { + let offsets = svindex_s32(0, 4u32.try_into().unwrap()); svsetffr(); - let _ = svld1sh_s32(svptrue_b16(), I16_DATA.as_ptr()); - let loaded = svldff1sh_s32(svptrue_b16(), I16_DATA.as_ptr()); - assert_vector_matches_i32( + let _ = svld1_gather_s32offset_u32(svptrue_b32(), U32_DATA.as_ptr(), offsets); + let loaded = svldff1_gather_s32offset_u32(svptrue_b32(), U32_DATA.as_ptr(), offsets); + assert_vector_matches_u32( loaded, - svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), ); } #[simd_test(enable = "sve")] -unsafe fn test_svldff1sb_s64() { +unsafe fn test_svldff1_gather_s64offset_f64() { + let offsets = svindex_s64(0, 8u32.try_into().unwrap()); svsetffr(); - let _ = svld1sb_s64(svptrue_b8(), I8_DATA.as_ptr()); - let loaded = svldff1sb_s64(svptrue_b8(), I8_DATA.as_ptr()); - assert_vector_matches_i64( + let _ = svld1_gather_s64offset_f64(svptrue_b64(), F64_DATA.as_ptr(), offsets); + let loaded = svldff1_gather_s64offset_f64(svptrue_b64(), F64_DATA.as_ptr(), offsets); + assert_vector_matches_f64( loaded, - svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + svcvt_f64_s64_x( + svptrue_b64(), + svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ), ); } #[simd_test(enable = "sve")] -unsafe fn test_svldff1sh_s64() { +unsafe fn test_svldff1_gather_s64offset_s64() { + let offsets = svindex_s64(0, 8u32.try_into().unwrap()); svsetffr(); - let _ = svld1sh_s64(svptrue_b16(), I16_DATA.as_ptr()); - let loaded = svldff1sh_s64(svptrue_b16(), I16_DATA.as_ptr()); + let _ = svld1_gather_s64offset_s64(svptrue_b64(), I64_DATA.as_ptr(), offsets); + let loaded = svldff1_gather_s64offset_s64(svptrue_b64(), I64_DATA.as_ptr(), offsets); assert_vector_matches_i64( loaded, svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), ); } #[simd_test(enable = "sve")] -unsafe fn test_svldff1sw_s64() { +unsafe fn test_svldff1_gather_s64offset_u64() { + let offsets = svindex_s64(0, 8u32.try_into().unwrap()); svsetffr(); - let _ = svld1sw_s64(svptrue_b32(), I32_DATA.as_ptr()); - let loaded = svldff1sw_s64(svptrue_b32(), I32_DATA.as_ptr()); - assert_vector_matches_i64( + let _ = svld1_gather_s64offset_u64(svptrue_b64(), U64_DATA.as_ptr(), offsets); + let loaded = svldff1_gather_s64offset_u64(svptrue_b64(), U64_DATA.as_ptr(), offsets); + assert_vector_matches_u64( loaded, - svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), ); } #[simd_test(enable = "sve")] -unsafe fn test_svldff1sb_u16() { +unsafe fn test_svldff1_gather_u32offset_f32() { + let offsets = svindex_u32(0, 4u32.try_into().unwrap()); svsetffr(); - let _ = svld1sb_u16(svptrue_b8(), I8_DATA.as_ptr()); - let loaded = svldff1sb_u16(svptrue_b8(), I8_DATA.as_ptr()); - assert_vector_matches_u16( + let _ = svld1_gather_u32offset_f32(svptrue_b32(), F32_DATA.as_ptr(), offsets); + let loaded = svldff1_gather_u32offset_f32(svptrue_b32(), F32_DATA.as_ptr(), offsets); + assert_vector_matches_f32( loaded, - svindex_u16((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + svcvt_f32_s32_x( + svptrue_b32(), + svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ), ); } #[simd_test(enable = "sve")] -unsafe fn test_svldff1sb_u32() { +unsafe fn test_svldff1_gather_u32offset_s32() { + let offsets = svindex_u32(0, 4u32.try_into().unwrap()); svsetffr(); - let _ = svld1sb_u32(svptrue_b8(), I8_DATA.as_ptr()); - let loaded = svldff1sb_u32(svptrue_b8(), I8_DATA.as_ptr()); - assert_vector_matches_u32( + let _ = svld1_gather_u32offset_s32(svptrue_b32(), I32_DATA.as_ptr(), offsets); + let loaded = svldff1_gather_u32offset_s32(svptrue_b32(), I32_DATA.as_ptr(), offsets); + assert_vector_matches_i32( loaded, - svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), ); } #[simd_test(enable = "sve")] -unsafe fn test_svldff1sh_u32() { +unsafe fn test_svldff1_gather_u32offset_u32() { + let offsets = svindex_u32(0, 4u32.try_into().unwrap()); svsetffr(); - let _ = svld1sh_u32(svptrue_b16(), I16_DATA.as_ptr()); - let loaded = svldff1sh_u32(svptrue_b16(), I16_DATA.as_ptr()); + let _ = svld1_gather_u32offset_u32(svptrue_b32(), U32_DATA.as_ptr(), offsets); + let loaded = svldff1_gather_u32offset_u32(svptrue_b32(), U32_DATA.as_ptr(), offsets); assert_vector_matches_u32( loaded, svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), ); } #[simd_test(enable = "sve")] -unsafe fn test_svldff1sb_u64() { +unsafe fn test_svldff1_gather_u64offset_f64() { + let offsets = svindex_u64(0, 8u32.try_into().unwrap()); svsetffr(); - let _ = svld1sb_u64(svptrue_b8(), I8_DATA.as_ptr()); - let loaded = svldff1sb_u64(svptrue_b8(), I8_DATA.as_ptr()); - assert_vector_matches_u64( + let _ = svld1_gather_u64offset_f64(svptrue_b64(), F64_DATA.as_ptr(), offsets); + let loaded = svldff1_gather_u64offset_f64(svptrue_b64(), F64_DATA.as_ptr(), offsets); + assert_vector_matches_f64( loaded, - svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + svcvt_f64_s64_x( + svptrue_b64(), + svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ), ); } #[simd_test(enable = "sve")] -unsafe fn test_svldff1sh_u64() { +unsafe fn test_svldff1_gather_u64offset_s64() { + let offsets = svindex_u64(0, 8u32.try_into().unwrap()); svsetffr(); - let _ = svld1sh_u64(svptrue_b16(), I16_DATA.as_ptr()); - let loaded = svldff1sh_u64(svptrue_b16(), I16_DATA.as_ptr()); - assert_vector_matches_u64( + let _ = svld1_gather_u64offset_s64(svptrue_b64(), I64_DATA.as_ptr(), offsets); + let loaded = svldff1_gather_u64offset_s64(svptrue_b64(), I64_DATA.as_ptr(), offsets); + assert_vector_matches_i64( loaded, - svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), ); } #[simd_test(enable = "sve")] -unsafe fn test_svldff1sw_u64() { +unsafe fn test_svldff1_gather_u64offset_u64() { + let offsets = svindex_u64(0, 8u32.try_into().unwrap()); svsetffr(); - let _ = svld1sw_u64(svptrue_b32(), I32_DATA.as_ptr()); - let loaded = svldff1sw_u64(svptrue_b32(), I32_DATA.as_ptr()); + let _ = svld1_gather_u64offset_u64(svptrue_b64(), U64_DATA.as_ptr(), offsets); + let loaded = svldff1_gather_u64offset_u64(svptrue_b64(), U64_DATA.as_ptr(), offsets); assert_vector_matches_u64( loaded, svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), ); } #[simd_test(enable = "sve")] -unsafe fn test_svldff1sb_vnum_s16() { +unsafe fn test_svldff1_gather_u64base_f64() { + let bases = svdup_n_u64(F64_DATA.as_ptr() as u64); + let offsets = svindex_u64(0, 8u32.try_into().unwrap()); + let bases = svadd_u64_x(svptrue_b64(), bases, offsets); svsetffr(); - let _ = svld1sb_vnum_s16(svptrue_b8(), I8_DATA.as_ptr(), 1); - let loaded = svldff1sb_vnum_s16(svptrue_b8(), I8_DATA.as_ptr(), 1); - let len = svcnth() as usize; - assert_vector_matches_i16( + let _ = svld1_gather_u64base_f64(svptrue_b64(), bases); + let loaded = svldff1_gather_u64base_f64(svptrue_b64(), bases); + assert_vector_matches_f64( loaded, - svindex_s16( - (len + 0usize).try_into().unwrap(), - 1usize.try_into().unwrap(), + svcvt_f64_s64_x( + svptrue_b64(), + svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), ), ); } #[simd_test(enable = "sve")] -unsafe fn test_svldff1sb_vnum_s32() { +unsafe fn test_svldff1_gather_u64base_s64() { + let bases = svdup_n_u64(I64_DATA.as_ptr() as u64); + let offsets = svindex_u64(0, 8u32.try_into().unwrap()); + let bases = svadd_u64_x(svptrue_b64(), bases, offsets); svsetffr(); - let _ = svld1sb_vnum_s32(svptrue_b8(), I8_DATA.as_ptr(), 1); - let loaded = svldff1sb_vnum_s32(svptrue_b8(), I8_DATA.as_ptr(), 1); - let len = svcntw() as usize; - assert_vector_matches_i32( + let _ = svld1_gather_u64base_s64(svptrue_b64(), bases); + let loaded = svldff1_gather_u64base_s64(svptrue_b64(), bases); + assert_vector_matches_i64( loaded, - svindex_s32( - (len + 0usize).try_into().unwrap(), - 1usize.try_into().unwrap(), - ), + svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), ); } #[simd_test(enable = "sve")] -unsafe fn test_svldff1sh_vnum_s32() { +unsafe fn test_svldff1_gather_u64base_u64() { + let bases = svdup_n_u64(U64_DATA.as_ptr() as u64); + let offsets = svindex_u64(0, 8u32.try_into().unwrap()); + let bases = svadd_u64_x(svptrue_b64(), bases, offsets); svsetffr(); - let _ = svld1sh_vnum_s32(svptrue_b16(), I16_DATA.as_ptr(), 1); - let loaded = svldff1sh_vnum_s32(svptrue_b16(), I16_DATA.as_ptr(), 1); + let _ = svld1_gather_u64base_u64(svptrue_b64(), bases); + let loaded = svldff1_gather_u64base_u64(svptrue_b64(), bases); + assert_vector_matches_u64( + loaded, + svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1_gather_u32base_index_f32() { + let bases = svindex_u32(0, 4u32.try_into().unwrap()); + svsetffr(); + let _ = svld1_gather_u32base_index_f32( + svptrue_b32(), + bases, + F32_DATA.as_ptr() as i64 / (4u32 as i64) + 1, + ); + let loaded = svldff1_gather_u32base_index_f32( + svptrue_b32(), + bases, + F32_DATA.as_ptr() as i64 / (4u32 as i64) + 1, + ); + assert_vector_matches_f32( + loaded, + svcvt_f32_s32_x( + svptrue_b32(), + svindex_s32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1_gather_u32base_index_s32() { + let bases = svindex_u32(0, 4u32.try_into().unwrap()); + svsetffr(); + let _ = svld1_gather_u32base_index_s32( + svptrue_b32(), + bases, + I32_DATA.as_ptr() as i64 / (4u32 as i64) + 1, + ); + let loaded = svldff1_gather_u32base_index_s32( + svptrue_b32(), + bases, + I32_DATA.as_ptr() as i64 / (4u32 as i64) + 1, + ); + assert_vector_matches_i32( + loaded, + svindex_s32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1_gather_u32base_index_u32() { + let bases = svindex_u32(0, 4u32.try_into().unwrap()); + svsetffr(); + let _ = svld1_gather_u32base_index_u32( + svptrue_b32(), + bases, + U32_DATA.as_ptr() as i64 / (4u32 as i64) + 1, + ); + let loaded = svldff1_gather_u32base_index_u32( + svptrue_b32(), + bases, + U32_DATA.as_ptr() as i64 / (4u32 as i64) + 1, + ); + assert_vector_matches_u32( + loaded, + svindex_u32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1_gather_u64base_index_f64() { + let bases = svdup_n_u64(F64_DATA.as_ptr() as u64); + let offsets = svindex_u64(0, 8u32.try_into().unwrap()); + let bases = svadd_u64_x(svptrue_b64(), bases, offsets); + svsetffr(); + let _ = svld1_gather_u64base_index_f64(svptrue_b64(), bases, 1.try_into().unwrap()); + let loaded = svldff1_gather_u64base_index_f64(svptrue_b64(), bases, 1.try_into().unwrap()); + assert_vector_matches_f64( + loaded, + svcvt_f64_s64_x( + svptrue_b64(), + svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1_gather_u64base_index_s64() { + let bases = svdup_n_u64(I64_DATA.as_ptr() as u64); + let offsets = svindex_u64(0, 8u32.try_into().unwrap()); + let bases = svadd_u64_x(svptrue_b64(), bases, offsets); + svsetffr(); + let _ = svld1_gather_u64base_index_s64(svptrue_b64(), bases, 1.try_into().unwrap()); + let loaded = svldff1_gather_u64base_index_s64(svptrue_b64(), bases, 1.try_into().unwrap()); + assert_vector_matches_i64( + loaded, + svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1_gather_u64base_index_u64() { + let bases = svdup_n_u64(U64_DATA.as_ptr() as u64); + let offsets = svindex_u64(0, 8u32.try_into().unwrap()); + let bases = svadd_u64_x(svptrue_b64(), bases, offsets); + svsetffr(); + let _ = svld1_gather_u64base_index_u64(svptrue_b64(), bases, 1.try_into().unwrap()); + let loaded = svldff1_gather_u64base_index_u64(svptrue_b64(), bases, 1.try_into().unwrap()); + assert_vector_matches_u64( + loaded, + svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1_gather_u32base_offset_f32() { + let bases = svindex_u32(0, 4u32.try_into().unwrap()); + svsetffr(); + let _ = svld1_gather_u32base_offset_f32( + svptrue_b32(), + bases, + F32_DATA.as_ptr() as i64 + 4u32 as i64, + ); + let loaded = svldff1_gather_u32base_offset_f32( + svptrue_b32(), + bases, + F32_DATA.as_ptr() as i64 + 4u32 as i64, + ); + assert_vector_matches_f32( + loaded, + svcvt_f32_s32_x( + svptrue_b32(), + svindex_s32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1_gather_u32base_offset_s32() { + let bases = svindex_u32(0, 4u32.try_into().unwrap()); + svsetffr(); + let _ = svld1_gather_u32base_offset_s32( + svptrue_b32(), + bases, + I32_DATA.as_ptr() as i64 + 4u32 as i64, + ); + let loaded = svldff1_gather_u32base_offset_s32( + svptrue_b32(), + bases, + I32_DATA.as_ptr() as i64 + 4u32 as i64, + ); + assert_vector_matches_i32( + loaded, + svindex_s32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1_gather_u32base_offset_u32() { + let bases = svindex_u32(0, 4u32.try_into().unwrap()); + svsetffr(); + let _ = svld1_gather_u32base_offset_u32( + svptrue_b32(), + bases, + U32_DATA.as_ptr() as i64 + 4u32 as i64, + ); + let loaded = svldff1_gather_u32base_offset_u32( + svptrue_b32(), + bases, + U32_DATA.as_ptr() as i64 + 4u32 as i64, + ); + assert_vector_matches_u32( + loaded, + svindex_u32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1_gather_u64base_offset_f64() { + let bases = svdup_n_u64(F64_DATA.as_ptr() as u64); + let offsets = svindex_u64(0, 8u32.try_into().unwrap()); + let bases = svadd_u64_x(svptrue_b64(), bases, offsets); + svsetffr(); + let _ = svld1_gather_u64base_offset_f64(svptrue_b64(), bases, 8u32.try_into().unwrap()); + let loaded = svldff1_gather_u64base_offset_f64(svptrue_b64(), bases, 8u32.try_into().unwrap()); + assert_vector_matches_f64( + loaded, + svcvt_f64_s64_x( + svptrue_b64(), + svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1_gather_u64base_offset_s64() { + let bases = svdup_n_u64(I64_DATA.as_ptr() as u64); + let offsets = svindex_u64(0, 8u32.try_into().unwrap()); + let bases = svadd_u64_x(svptrue_b64(), bases, offsets); + svsetffr(); + let _ = svld1_gather_u64base_offset_s64(svptrue_b64(), bases, 8u32.try_into().unwrap()); + let loaded = svldff1_gather_u64base_offset_s64(svptrue_b64(), bases, 8u32.try_into().unwrap()); + assert_vector_matches_i64( + loaded, + svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1_gather_u64base_offset_u64() { + let bases = svdup_n_u64(U64_DATA.as_ptr() as u64); + let offsets = svindex_u64(0, 8u32.try_into().unwrap()); + let bases = svadd_u64_x(svptrue_b64(), bases, offsets); + svsetffr(); + let _ = svld1_gather_u64base_offset_u64(svptrue_b64(), bases, 8u32.try_into().unwrap()); + let loaded = svldff1_gather_u64base_offset_u64(svptrue_b64(), bases, 8u32.try_into().unwrap()); + assert_vector_matches_u64( + loaded, + svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1_vnum_f32() { + svsetffr(); + let _ = svld1_vnum_f32(svptrue_b32(), F32_DATA.as_ptr(), 1); + let loaded = svldff1_vnum_f32(svptrue_b32(), F32_DATA.as_ptr(), 1); + let len = svcntw() as usize; + assert_vector_matches_f32( + loaded, + svcvt_f32_s32_x( + svptrue_b32(), + svindex_s32( + (len + 0usize).try_into().unwrap(), + 1usize.try_into().unwrap(), + ), + ), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1_vnum_f64() { + svsetffr(); + let _ = svld1_vnum_f64(svptrue_b64(), F64_DATA.as_ptr(), 1); + let loaded = svldff1_vnum_f64(svptrue_b64(), F64_DATA.as_ptr(), 1); + let len = svcntd() as usize; + assert_vector_matches_f64( + loaded, + svcvt_f64_s64_x( + svptrue_b64(), + svindex_s64( + (len + 0usize).try_into().unwrap(), + 1usize.try_into().unwrap(), + ), + ), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1_vnum_s8() { + svsetffr(); + let _ = svld1_vnum_s8(svptrue_b8(), I8_DATA.as_ptr(), 1); + let loaded = svldff1_vnum_s8(svptrue_b8(), I8_DATA.as_ptr(), 1); + let len = svcntb() as usize; + assert_vector_matches_i8( + loaded, + svindex_s8( + (len + 0usize).try_into().unwrap(), + 1usize.try_into().unwrap(), + ), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1_vnum_s16() { + svsetffr(); + let _ = svld1_vnum_s16(svptrue_b16(), I16_DATA.as_ptr(), 1); + let loaded = svldff1_vnum_s16(svptrue_b16(), I16_DATA.as_ptr(), 1); + let len = svcnth() as usize; + assert_vector_matches_i16( + loaded, + svindex_s16( + (len + 0usize).try_into().unwrap(), + 1usize.try_into().unwrap(), + ), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1_vnum_s32() { + svsetffr(); + let _ = svld1_vnum_s32(svptrue_b32(), I32_DATA.as_ptr(), 1); + let loaded = svldff1_vnum_s32(svptrue_b32(), I32_DATA.as_ptr(), 1); let len = svcntw() as usize; assert_vector_matches_i32( loaded, @@ -7375,10 +7677,10 @@ unsafe fn test_svldff1sh_vnum_s32() { ); } #[simd_test(enable = "sve")] -unsafe fn test_svldff1sb_vnum_s64() { +unsafe fn test_svldff1_vnum_s64() { svsetffr(); - let _ = svld1sb_vnum_s64(svptrue_b8(), I8_DATA.as_ptr(), 1); - let loaded = svldff1sb_vnum_s64(svptrue_b8(), I8_DATA.as_ptr(), 1); + let _ = svld1_vnum_s64(svptrue_b64(), I64_DATA.as_ptr(), 1); + let loaded = svldff1_vnum_s64(svptrue_b64(), I64_DATA.as_ptr(), 1); let len = svcntd() as usize; assert_vector_matches_i64( loaded, @@ -7389,115 +7691,1481 @@ unsafe fn test_svldff1sb_vnum_s64() { ); } #[simd_test(enable = "sve")] -unsafe fn test_svldff1sh_vnum_s64() { +unsafe fn test_svldff1_vnum_u8() { svsetffr(); - let _ = svld1sh_vnum_s64(svptrue_b16(), I16_DATA.as_ptr(), 1); - let loaded = svldff1sh_vnum_s64(svptrue_b16(), I16_DATA.as_ptr(), 1); - let len = svcntd() as usize; - assert_vector_matches_i64( + let _ = svld1_vnum_u8(svptrue_b8(), U8_DATA.as_ptr(), 1); + let loaded = svldff1_vnum_u8(svptrue_b8(), U8_DATA.as_ptr(), 1); + let len = svcntb() as usize; + assert_vector_matches_u8( + loaded, + svindex_u8( + (len + 0usize).try_into().unwrap(), + 1usize.try_into().unwrap(), + ), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1_vnum_u16() { + svsetffr(); + let _ = svld1_vnum_u16(svptrue_b16(), U16_DATA.as_ptr(), 1); + let loaded = svldff1_vnum_u16(svptrue_b16(), U16_DATA.as_ptr(), 1); + let len = svcnth() as usize; + assert_vector_matches_u16( + loaded, + svindex_u16( + (len + 0usize).try_into().unwrap(), + 1usize.try_into().unwrap(), + ), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1_vnum_u32() { + svsetffr(); + let _ = svld1_vnum_u32(svptrue_b32(), U32_DATA.as_ptr(), 1); + let loaded = svldff1_vnum_u32(svptrue_b32(), U32_DATA.as_ptr(), 1); + let len = svcntw() as usize; + assert_vector_matches_u32( + loaded, + svindex_u32( + (len + 0usize).try_into().unwrap(), + 1usize.try_into().unwrap(), + ), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1_vnum_u64() { + svsetffr(); + let _ = svld1_vnum_u64(svptrue_b64(), U64_DATA.as_ptr(), 1); + let loaded = svldff1_vnum_u64(svptrue_b64(), U64_DATA.as_ptr(), 1); + let len = svcntd() as usize; + assert_vector_matches_u64( + loaded, + svindex_u64( + (len + 0usize).try_into().unwrap(), + 1usize.try_into().unwrap(), + ), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sb_gather_s32offset_s32() { + let offsets = svindex_s32(0, 1u32.try_into().unwrap()); + svsetffr(); + let _ = svld1sb_gather_s32offset_s32(svptrue_b8(), I8_DATA.as_ptr(), offsets); + let loaded = svldff1sb_gather_s32offset_s32(svptrue_b8(), I8_DATA.as_ptr(), offsets); + assert_vector_matches_i32( + loaded, + svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sh_gather_s32offset_s32() { + let offsets = svindex_s32(0, 2u32.try_into().unwrap()); + svsetffr(); + let _ = svld1sh_gather_s32offset_s32(svptrue_b16(), I16_DATA.as_ptr(), offsets); + let loaded = svldff1sh_gather_s32offset_s32(svptrue_b16(), I16_DATA.as_ptr(), offsets); + assert_vector_matches_i32( + loaded, + svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sb_gather_s32offset_u32() { + let offsets = svindex_s32(0, 1u32.try_into().unwrap()); + svsetffr(); + let _ = svld1sb_gather_s32offset_u32(svptrue_b8(), I8_DATA.as_ptr(), offsets); + let loaded = svldff1sb_gather_s32offset_u32(svptrue_b8(), I8_DATA.as_ptr(), offsets); + assert_vector_matches_u32( + loaded, + svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sh_gather_s32offset_u32() { + let offsets = svindex_s32(0, 2u32.try_into().unwrap()); + svsetffr(); + let _ = svld1sh_gather_s32offset_u32(svptrue_b16(), I16_DATA.as_ptr(), offsets); + let loaded = svldff1sh_gather_s32offset_u32(svptrue_b16(), I16_DATA.as_ptr(), offsets); + assert_vector_matches_u32( + loaded, + svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sb_gather_s64offset_s64() { + let offsets = svindex_s64(0, 1u32.try_into().unwrap()); + svsetffr(); + let _ = svld1sb_gather_s64offset_s64(svptrue_b8(), I8_DATA.as_ptr(), offsets); + let loaded = svldff1sb_gather_s64offset_s64(svptrue_b8(), I8_DATA.as_ptr(), offsets); + assert_vector_matches_i64( + loaded, + svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sh_gather_s64offset_s64() { + let offsets = svindex_s64(0, 2u32.try_into().unwrap()); + svsetffr(); + let _ = svld1sh_gather_s64offset_s64(svptrue_b16(), I16_DATA.as_ptr(), offsets); + let loaded = svldff1sh_gather_s64offset_s64(svptrue_b16(), I16_DATA.as_ptr(), offsets); + assert_vector_matches_i64( + loaded, + svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sw_gather_s64offset_s64() { + let offsets = svindex_s64(0, 4u32.try_into().unwrap()); + svsetffr(); + let _ = svld1sw_gather_s64offset_s64(svptrue_b32(), I32_DATA.as_ptr(), offsets); + let loaded = svldff1sw_gather_s64offset_s64(svptrue_b32(), I32_DATA.as_ptr(), offsets); + assert_vector_matches_i64( + loaded, + svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sb_gather_s64offset_u64() { + let offsets = svindex_s64(0, 1u32.try_into().unwrap()); + svsetffr(); + let _ = svld1sb_gather_s64offset_u64(svptrue_b8(), I8_DATA.as_ptr(), offsets); + let loaded = svldff1sb_gather_s64offset_u64(svptrue_b8(), I8_DATA.as_ptr(), offsets); + assert_vector_matches_u64( + loaded, + svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sh_gather_s64offset_u64() { + let offsets = svindex_s64(0, 2u32.try_into().unwrap()); + svsetffr(); + let _ = svld1sh_gather_s64offset_u64(svptrue_b16(), I16_DATA.as_ptr(), offsets); + let loaded = svldff1sh_gather_s64offset_u64(svptrue_b16(), I16_DATA.as_ptr(), offsets); + assert_vector_matches_u64( + loaded, + svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sw_gather_s64offset_u64() { + let offsets = svindex_s64(0, 4u32.try_into().unwrap()); + svsetffr(); + let _ = svld1sw_gather_s64offset_u64(svptrue_b32(), I32_DATA.as_ptr(), offsets); + let loaded = svldff1sw_gather_s64offset_u64(svptrue_b32(), I32_DATA.as_ptr(), offsets); + assert_vector_matches_u64( + loaded, + svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sb_gather_u32offset_s32() { + let offsets = svindex_u32(0, 1u32.try_into().unwrap()); + svsetffr(); + let _ = svld1sb_gather_u32offset_s32(svptrue_b8(), I8_DATA.as_ptr(), offsets); + let loaded = svldff1sb_gather_u32offset_s32(svptrue_b8(), I8_DATA.as_ptr(), offsets); + assert_vector_matches_i32( + loaded, + svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sh_gather_u32offset_s32() { + let offsets = svindex_u32(0, 2u32.try_into().unwrap()); + svsetffr(); + let _ = svld1sh_gather_u32offset_s32(svptrue_b16(), I16_DATA.as_ptr(), offsets); + let loaded = svldff1sh_gather_u32offset_s32(svptrue_b16(), I16_DATA.as_ptr(), offsets); + assert_vector_matches_i32( + loaded, + svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sb_gather_u32offset_u32() { + let offsets = svindex_u32(0, 1u32.try_into().unwrap()); + svsetffr(); + let _ = svld1sb_gather_u32offset_u32(svptrue_b8(), I8_DATA.as_ptr(), offsets); + let loaded = svldff1sb_gather_u32offset_u32(svptrue_b8(), I8_DATA.as_ptr(), offsets); + assert_vector_matches_u32( + loaded, + svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sh_gather_u32offset_u32() { + let offsets = svindex_u32(0, 2u32.try_into().unwrap()); + svsetffr(); + let _ = svld1sh_gather_u32offset_u32(svptrue_b16(), I16_DATA.as_ptr(), offsets); + let loaded = svldff1sh_gather_u32offset_u32(svptrue_b16(), I16_DATA.as_ptr(), offsets); + assert_vector_matches_u32( + loaded, + svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sb_gather_u64offset_s64() { + let offsets = svindex_u64(0, 1u32.try_into().unwrap()); + svsetffr(); + let _ = svld1sb_gather_u64offset_s64(svptrue_b8(), I8_DATA.as_ptr(), offsets); + let loaded = svldff1sb_gather_u64offset_s64(svptrue_b8(), I8_DATA.as_ptr(), offsets); + assert_vector_matches_i64( + loaded, + svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sh_gather_u64offset_s64() { + let offsets = svindex_u64(0, 2u32.try_into().unwrap()); + svsetffr(); + let _ = svld1sh_gather_u64offset_s64(svptrue_b16(), I16_DATA.as_ptr(), offsets); + let loaded = svldff1sh_gather_u64offset_s64(svptrue_b16(), I16_DATA.as_ptr(), offsets); + assert_vector_matches_i64( + loaded, + svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sw_gather_u64offset_s64() { + let offsets = svindex_u64(0, 4u32.try_into().unwrap()); + svsetffr(); + let _ = svld1sw_gather_u64offset_s64(svptrue_b32(), I32_DATA.as_ptr(), offsets); + let loaded = svldff1sw_gather_u64offset_s64(svptrue_b32(), I32_DATA.as_ptr(), offsets); + assert_vector_matches_i64( + loaded, + svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sb_gather_u64offset_u64() { + let offsets = svindex_u64(0, 1u32.try_into().unwrap()); + svsetffr(); + let _ = svld1sb_gather_u64offset_u64(svptrue_b8(), I8_DATA.as_ptr(), offsets); + let loaded = svldff1sb_gather_u64offset_u64(svptrue_b8(), I8_DATA.as_ptr(), offsets); + assert_vector_matches_u64( + loaded, + svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sh_gather_u64offset_u64() { + let offsets = svindex_u64(0, 2u32.try_into().unwrap()); + svsetffr(); + let _ = svld1sh_gather_u64offset_u64(svptrue_b16(), I16_DATA.as_ptr(), offsets); + let loaded = svldff1sh_gather_u64offset_u64(svptrue_b16(), I16_DATA.as_ptr(), offsets); + assert_vector_matches_u64( + loaded, + svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sw_gather_u64offset_u64() { + let offsets = svindex_u64(0, 4u32.try_into().unwrap()); + svsetffr(); + let _ = svld1sw_gather_u64offset_u64(svptrue_b32(), I32_DATA.as_ptr(), offsets); + let loaded = svldff1sw_gather_u64offset_u64(svptrue_b32(), I32_DATA.as_ptr(), offsets); + assert_vector_matches_u64( + loaded, + svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sb_gather_u32base_offset_s32() { + let bases = svindex_u32(0, 1u32.try_into().unwrap()); + svsetffr(); + let _ = svld1sb_gather_u32base_offset_s32( + svptrue_b8(), + bases, + I8_DATA.as_ptr() as i64 + 1u32 as i64, + ); + let loaded = svldff1sb_gather_u32base_offset_s32( + svptrue_b8(), + bases, + I8_DATA.as_ptr() as i64 + 1u32 as i64, + ); + assert_vector_matches_i32( + loaded, + svindex_s32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sh_gather_u32base_offset_s32() { + let bases = svindex_u32(0, 2u32.try_into().unwrap()); + svsetffr(); + let _ = svld1sh_gather_u32base_offset_s32( + svptrue_b16(), + bases, + I16_DATA.as_ptr() as i64 + 2u32 as i64, + ); + let loaded = svldff1sh_gather_u32base_offset_s32( + svptrue_b16(), + bases, + I16_DATA.as_ptr() as i64 + 2u32 as i64, + ); + assert_vector_matches_i32( + loaded, + svindex_s32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sb_gather_u32base_offset_u32() { + let bases = svindex_u32(0, 1u32.try_into().unwrap()); + svsetffr(); + let _ = svld1sb_gather_u32base_offset_u32( + svptrue_b8(), + bases, + I8_DATA.as_ptr() as i64 + 1u32 as i64, + ); + let loaded = svldff1sb_gather_u32base_offset_u32( + svptrue_b8(), + bases, + I8_DATA.as_ptr() as i64 + 1u32 as i64, + ); + assert_vector_matches_u32( + loaded, + svindex_u32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sh_gather_u32base_offset_u32() { + let bases = svindex_u32(0, 2u32.try_into().unwrap()); + svsetffr(); + let _ = svld1sh_gather_u32base_offset_u32( + svptrue_b16(), + bases, + I16_DATA.as_ptr() as i64 + 2u32 as i64, + ); + let loaded = svldff1sh_gather_u32base_offset_u32( + svptrue_b16(), + bases, + I16_DATA.as_ptr() as i64 + 2u32 as i64, + ); + assert_vector_matches_u32( + loaded, + svindex_u32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sb_gather_u64base_offset_s64() { + let bases = svdup_n_u64(I8_DATA.as_ptr() as u64); + let offsets = svindex_u64(0, 1u32.try_into().unwrap()); + let bases = svadd_u64_x(svptrue_b8(), bases, offsets); + svsetffr(); + let _ = svld1sb_gather_u64base_offset_s64(svptrue_b8(), bases, 1u32.try_into().unwrap()); + let loaded = svldff1sb_gather_u64base_offset_s64(svptrue_b8(), bases, 1u32.try_into().unwrap()); + assert_vector_matches_i64( + loaded, + svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sh_gather_u64base_offset_s64() { + let bases = svdup_n_u64(I16_DATA.as_ptr() as u64); + let offsets = svindex_u64(0, 2u32.try_into().unwrap()); + let bases = svadd_u64_x(svptrue_b16(), bases, offsets); + svsetffr(); + let _ = svld1sh_gather_u64base_offset_s64(svptrue_b16(), bases, 2u32.try_into().unwrap()); + let loaded = + svldff1sh_gather_u64base_offset_s64(svptrue_b16(), bases, 2u32.try_into().unwrap()); + assert_vector_matches_i64( + loaded, + svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sw_gather_u64base_offset_s64() { + let bases = svdup_n_u64(I32_DATA.as_ptr() as u64); + let offsets = svindex_u64(0, 4u32.try_into().unwrap()); + let bases = svadd_u64_x(svptrue_b32(), bases, offsets); + svsetffr(); + let _ = svld1sw_gather_u64base_offset_s64(svptrue_b32(), bases, 4u32.try_into().unwrap()); + let loaded = + svldff1sw_gather_u64base_offset_s64(svptrue_b32(), bases, 4u32.try_into().unwrap()); + assert_vector_matches_i64( + loaded, + svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sb_gather_u64base_offset_u64() { + let bases = svdup_n_u64(I8_DATA.as_ptr() as u64); + let offsets = svindex_u64(0, 1u32.try_into().unwrap()); + let bases = svadd_u64_x(svptrue_b8(), bases, offsets); + svsetffr(); + let _ = svld1sb_gather_u64base_offset_u64(svptrue_b8(), bases, 1u32.try_into().unwrap()); + let loaded = svldff1sb_gather_u64base_offset_u64(svptrue_b8(), bases, 1u32.try_into().unwrap()); + assert_vector_matches_u64( + loaded, + svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sh_gather_u64base_offset_u64() { + let bases = svdup_n_u64(I16_DATA.as_ptr() as u64); + let offsets = svindex_u64(0, 2u32.try_into().unwrap()); + let bases = svadd_u64_x(svptrue_b16(), bases, offsets); + svsetffr(); + let _ = svld1sh_gather_u64base_offset_u64(svptrue_b16(), bases, 2u32.try_into().unwrap()); + let loaded = + svldff1sh_gather_u64base_offset_u64(svptrue_b16(), bases, 2u32.try_into().unwrap()); + assert_vector_matches_u64( + loaded, + svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sw_gather_u64base_offset_u64() { + let bases = svdup_n_u64(I32_DATA.as_ptr() as u64); + let offsets = svindex_u64(0, 4u32.try_into().unwrap()); + let bases = svadd_u64_x(svptrue_b32(), bases, offsets); + svsetffr(); + let _ = svld1sw_gather_u64base_offset_u64(svptrue_b32(), bases, 4u32.try_into().unwrap()); + let loaded = + svldff1sw_gather_u64base_offset_u64(svptrue_b32(), bases, 4u32.try_into().unwrap()); + assert_vector_matches_u64( + loaded, + svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sb_gather_u64base_s64() { + let bases = svdup_n_u64(I8_DATA.as_ptr() as u64); + let offsets = svindex_u64(0, 1u32.try_into().unwrap()); + let bases = svadd_u64_x(svptrue_b8(), bases, offsets); + svsetffr(); + let _ = svld1sb_gather_u64base_s64(svptrue_b8(), bases); + let loaded = svldff1sb_gather_u64base_s64(svptrue_b8(), bases); + assert_vector_matches_i64( + loaded, + svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sh_gather_u64base_s64() { + let bases = svdup_n_u64(I16_DATA.as_ptr() as u64); + let offsets = svindex_u64(0, 2u32.try_into().unwrap()); + let bases = svadd_u64_x(svptrue_b16(), bases, offsets); + svsetffr(); + let _ = svld1sh_gather_u64base_s64(svptrue_b16(), bases); + let loaded = svldff1sh_gather_u64base_s64(svptrue_b16(), bases); + assert_vector_matches_i64( + loaded, + svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sw_gather_u64base_s64() { + let bases = svdup_n_u64(I32_DATA.as_ptr() as u64); + let offsets = svindex_u64(0, 4u32.try_into().unwrap()); + let bases = svadd_u64_x(svptrue_b32(), bases, offsets); + svsetffr(); + let _ = svld1sw_gather_u64base_s64(svptrue_b32(), bases); + let loaded = svldff1sw_gather_u64base_s64(svptrue_b32(), bases); + assert_vector_matches_i64( + loaded, + svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sb_gather_u64base_u64() { + let bases = svdup_n_u64(I8_DATA.as_ptr() as u64); + let offsets = svindex_u64(0, 1u32.try_into().unwrap()); + let bases = svadd_u64_x(svptrue_b8(), bases, offsets); + svsetffr(); + let _ = svld1sb_gather_u64base_u64(svptrue_b8(), bases); + let loaded = svldff1sb_gather_u64base_u64(svptrue_b8(), bases); + assert_vector_matches_u64( + loaded, + svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sh_gather_u64base_u64() { + let bases = svdup_n_u64(I16_DATA.as_ptr() as u64); + let offsets = svindex_u64(0, 2u32.try_into().unwrap()); + let bases = svadd_u64_x(svptrue_b16(), bases, offsets); + svsetffr(); + let _ = svld1sh_gather_u64base_u64(svptrue_b16(), bases); + let loaded = svldff1sh_gather_u64base_u64(svptrue_b16(), bases); + assert_vector_matches_u64( + loaded, + svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sw_gather_u64base_u64() { + let bases = svdup_n_u64(I32_DATA.as_ptr() as u64); + let offsets = svindex_u64(0, 4u32.try_into().unwrap()); + let bases = svadd_u64_x(svptrue_b32(), bases, offsets); + svsetffr(); + let _ = svld1sw_gather_u64base_u64(svptrue_b32(), bases); + let loaded = svldff1sw_gather_u64base_u64(svptrue_b32(), bases); + assert_vector_matches_u64( + loaded, + svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sb_s16() { + svsetffr(); + let _ = svld1sb_s16(svptrue_b8(), I8_DATA.as_ptr()); + let loaded = svldff1sb_s16(svptrue_b8(), I8_DATA.as_ptr()); + assert_vector_matches_i16( + loaded, + svindex_s16((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sb_s32() { + svsetffr(); + let _ = svld1sb_s32(svptrue_b8(), I8_DATA.as_ptr()); + let loaded = svldff1sb_s32(svptrue_b8(), I8_DATA.as_ptr()); + assert_vector_matches_i32( + loaded, + svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sh_s32() { + svsetffr(); + let _ = svld1sh_s32(svptrue_b16(), I16_DATA.as_ptr()); + let loaded = svldff1sh_s32(svptrue_b16(), I16_DATA.as_ptr()); + assert_vector_matches_i32( + loaded, + svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sb_s64() { + svsetffr(); + let _ = svld1sb_s64(svptrue_b8(), I8_DATA.as_ptr()); + let loaded = svldff1sb_s64(svptrue_b8(), I8_DATA.as_ptr()); + assert_vector_matches_i64( + loaded, + svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sh_s64() { + svsetffr(); + let _ = svld1sh_s64(svptrue_b16(), I16_DATA.as_ptr()); + let loaded = svldff1sh_s64(svptrue_b16(), I16_DATA.as_ptr()); + assert_vector_matches_i64( + loaded, + svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sw_s64() { + svsetffr(); + let _ = svld1sw_s64(svptrue_b32(), I32_DATA.as_ptr()); + let loaded = svldff1sw_s64(svptrue_b32(), I32_DATA.as_ptr()); + assert_vector_matches_i64( + loaded, + svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sb_u16() { + svsetffr(); + let _ = svld1sb_u16(svptrue_b8(), I8_DATA.as_ptr()); + let loaded = svldff1sb_u16(svptrue_b8(), I8_DATA.as_ptr()); + assert_vector_matches_u16( + loaded, + svindex_u16((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sb_u32() { + svsetffr(); + let _ = svld1sb_u32(svptrue_b8(), I8_DATA.as_ptr()); + let loaded = svldff1sb_u32(svptrue_b8(), I8_DATA.as_ptr()); + assert_vector_matches_u32( + loaded, + svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sh_u32() { + svsetffr(); + let _ = svld1sh_u32(svptrue_b16(), I16_DATA.as_ptr()); + let loaded = svldff1sh_u32(svptrue_b16(), I16_DATA.as_ptr()); + assert_vector_matches_u32( + loaded, + svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sb_u64() { + svsetffr(); + let _ = svld1sb_u64(svptrue_b8(), I8_DATA.as_ptr()); + let loaded = svldff1sb_u64(svptrue_b8(), I8_DATA.as_ptr()); + assert_vector_matches_u64( + loaded, + svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sh_u64() { + svsetffr(); + let _ = svld1sh_u64(svptrue_b16(), I16_DATA.as_ptr()); + let loaded = svldff1sh_u64(svptrue_b16(), I16_DATA.as_ptr()); + assert_vector_matches_u64( + loaded, + svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sw_u64() { + svsetffr(); + let _ = svld1sw_u64(svptrue_b32(), I32_DATA.as_ptr()); + let loaded = svldff1sw_u64(svptrue_b32(), I32_DATA.as_ptr()); + assert_vector_matches_u64( + loaded, + svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sb_vnum_s16() { + svsetffr(); + let _ = svld1sb_vnum_s16(svptrue_b8(), I8_DATA.as_ptr(), 1); + let loaded = svldff1sb_vnum_s16(svptrue_b8(), I8_DATA.as_ptr(), 1); + let len = svcnth() as usize; + assert_vector_matches_i16( + loaded, + svindex_s16( + (len + 0usize).try_into().unwrap(), + 1usize.try_into().unwrap(), + ), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sb_vnum_s32() { + svsetffr(); + let _ = svld1sb_vnum_s32(svptrue_b8(), I8_DATA.as_ptr(), 1); + let loaded = svldff1sb_vnum_s32(svptrue_b8(), I8_DATA.as_ptr(), 1); + let len = svcntw() as usize; + assert_vector_matches_i32( + loaded, + svindex_s32( + (len + 0usize).try_into().unwrap(), + 1usize.try_into().unwrap(), + ), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sh_vnum_s32() { + svsetffr(); + let _ = svld1sh_vnum_s32(svptrue_b16(), I16_DATA.as_ptr(), 1); + let loaded = svldff1sh_vnum_s32(svptrue_b16(), I16_DATA.as_ptr(), 1); + let len = svcntw() as usize; + assert_vector_matches_i32( + loaded, + svindex_s32( + (len + 0usize).try_into().unwrap(), + 1usize.try_into().unwrap(), + ), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sb_vnum_s64() { + svsetffr(); + let _ = svld1sb_vnum_s64(svptrue_b8(), I8_DATA.as_ptr(), 1); + let loaded = svldff1sb_vnum_s64(svptrue_b8(), I8_DATA.as_ptr(), 1); + let len = svcntd() as usize; + assert_vector_matches_i64( + loaded, + svindex_s64( + (len + 0usize).try_into().unwrap(), + 1usize.try_into().unwrap(), + ), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sh_vnum_s64() { + svsetffr(); + let _ = svld1sh_vnum_s64(svptrue_b16(), I16_DATA.as_ptr(), 1); + let loaded = svldff1sh_vnum_s64(svptrue_b16(), I16_DATA.as_ptr(), 1); + let len = svcntd() as usize; + assert_vector_matches_i64( + loaded, + svindex_s64( + (len + 0usize).try_into().unwrap(), + 1usize.try_into().unwrap(), + ), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sw_vnum_s64() { + svsetffr(); + let _ = svld1sw_vnum_s64(svptrue_b32(), I32_DATA.as_ptr(), 1); + let loaded = svldff1sw_vnum_s64(svptrue_b32(), I32_DATA.as_ptr(), 1); + let len = svcntd() as usize; + assert_vector_matches_i64( + loaded, + svindex_s64( + (len + 0usize).try_into().unwrap(), + 1usize.try_into().unwrap(), + ), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sb_vnum_u16() { + svsetffr(); + let _ = svld1sb_vnum_u16(svptrue_b8(), I8_DATA.as_ptr(), 1); + let loaded = svldff1sb_vnum_u16(svptrue_b8(), I8_DATA.as_ptr(), 1); + let len = svcnth() as usize; + assert_vector_matches_u16( + loaded, + svindex_u16( + (len + 0usize).try_into().unwrap(), + 1usize.try_into().unwrap(), + ), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sb_vnum_u32() { + svsetffr(); + let _ = svld1sb_vnum_u32(svptrue_b8(), I8_DATA.as_ptr(), 1); + let loaded = svldff1sb_vnum_u32(svptrue_b8(), I8_DATA.as_ptr(), 1); + let len = svcntw() as usize; + assert_vector_matches_u32( + loaded, + svindex_u32( + (len + 0usize).try_into().unwrap(), + 1usize.try_into().unwrap(), + ), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sh_vnum_u32() { + svsetffr(); + let _ = svld1sh_vnum_u32(svptrue_b16(), I16_DATA.as_ptr(), 1); + let loaded = svldff1sh_vnum_u32(svptrue_b16(), I16_DATA.as_ptr(), 1); + let len = svcntw() as usize; + assert_vector_matches_u32( + loaded, + svindex_u32( + (len + 0usize).try_into().unwrap(), + 1usize.try_into().unwrap(), + ), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sb_vnum_u64() { + svsetffr(); + let _ = svld1sb_vnum_u64(svptrue_b8(), I8_DATA.as_ptr(), 1); + let loaded = svldff1sb_vnum_u64(svptrue_b8(), I8_DATA.as_ptr(), 1); + let len = svcntd() as usize; + assert_vector_matches_u64( + loaded, + svindex_u64( + (len + 0usize).try_into().unwrap(), + 1usize.try_into().unwrap(), + ), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sh_vnum_u64() { + svsetffr(); + let _ = svld1sh_vnum_u64(svptrue_b16(), I16_DATA.as_ptr(), 1); + let loaded = svldff1sh_vnum_u64(svptrue_b16(), I16_DATA.as_ptr(), 1); + let len = svcntd() as usize; + assert_vector_matches_u64( + loaded, + svindex_u64( + (len + 0usize).try_into().unwrap(), + 1usize.try_into().unwrap(), + ), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sw_vnum_u64() { + svsetffr(); + let _ = svld1sw_vnum_u64(svptrue_b32(), I32_DATA.as_ptr(), 1); + let loaded = svldff1sw_vnum_u64(svptrue_b32(), I32_DATA.as_ptr(), 1); + let len = svcntd() as usize; + assert_vector_matches_u64( + loaded, + svindex_u64( + (len + 0usize).try_into().unwrap(), + 1usize.try_into().unwrap(), + ), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sh_gather_s32index_s32() { + let indices = svindex_s32(0, 1); + svsetffr(); + let _ = svld1sh_gather_s32index_s32(svptrue_b16(), I16_DATA.as_ptr(), indices); + let loaded = svldff1sh_gather_s32index_s32(svptrue_b16(), I16_DATA.as_ptr(), indices); + assert_vector_matches_i32( + loaded, + svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sh_gather_s32index_u32() { + let indices = svindex_s32(0, 1); + svsetffr(); + let _ = svld1sh_gather_s32index_u32(svptrue_b16(), I16_DATA.as_ptr(), indices); + let loaded = svldff1sh_gather_s32index_u32(svptrue_b16(), I16_DATA.as_ptr(), indices); + assert_vector_matches_u32( + loaded, + svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sh_gather_s64index_s64() { + let indices = svindex_s64(0, 1); + svsetffr(); + let _ = svld1sh_gather_s64index_s64(svptrue_b16(), I16_DATA.as_ptr(), indices); + let loaded = svldff1sh_gather_s64index_s64(svptrue_b16(), I16_DATA.as_ptr(), indices); + assert_vector_matches_i64( + loaded, + svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sw_gather_s64index_s64() { + let indices = svindex_s64(0, 1); + svsetffr(); + let _ = svld1sw_gather_s64index_s64(svptrue_b32(), I32_DATA.as_ptr(), indices); + let loaded = svldff1sw_gather_s64index_s64(svptrue_b32(), I32_DATA.as_ptr(), indices); + assert_vector_matches_i64( + loaded, + svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sh_gather_s64index_u64() { + let indices = svindex_s64(0, 1); + svsetffr(); + let _ = svld1sh_gather_s64index_u64(svptrue_b16(), I16_DATA.as_ptr(), indices); + let loaded = svldff1sh_gather_s64index_u64(svptrue_b16(), I16_DATA.as_ptr(), indices); + assert_vector_matches_u64( + loaded, + svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sw_gather_s64index_u64() { + let indices = svindex_s64(0, 1); + svsetffr(); + let _ = svld1sw_gather_s64index_u64(svptrue_b32(), I32_DATA.as_ptr(), indices); + let loaded = svldff1sw_gather_s64index_u64(svptrue_b32(), I32_DATA.as_ptr(), indices); + assert_vector_matches_u64( + loaded, + svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sh_gather_u32index_s32() { + let indices = svindex_u32(0, 1); + svsetffr(); + let _ = svld1sh_gather_u32index_s32(svptrue_b16(), I16_DATA.as_ptr(), indices); + let loaded = svldff1sh_gather_u32index_s32(svptrue_b16(), I16_DATA.as_ptr(), indices); + assert_vector_matches_i32( + loaded, + svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sh_gather_u32index_u32() { + let indices = svindex_u32(0, 1); + svsetffr(); + let _ = svld1sh_gather_u32index_u32(svptrue_b16(), I16_DATA.as_ptr(), indices); + let loaded = svldff1sh_gather_u32index_u32(svptrue_b16(), I16_DATA.as_ptr(), indices); + assert_vector_matches_u32( + loaded, + svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sh_gather_u64index_s64() { + let indices = svindex_u64(0, 1); + svsetffr(); + let _ = svld1sh_gather_u64index_s64(svptrue_b16(), I16_DATA.as_ptr(), indices); + let loaded = svldff1sh_gather_u64index_s64(svptrue_b16(), I16_DATA.as_ptr(), indices); + assert_vector_matches_i64( + loaded, + svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sw_gather_u64index_s64() { + let indices = svindex_u64(0, 1); + svsetffr(); + let _ = svld1sw_gather_u64index_s64(svptrue_b32(), I32_DATA.as_ptr(), indices); + let loaded = svldff1sw_gather_u64index_s64(svptrue_b32(), I32_DATA.as_ptr(), indices); + assert_vector_matches_i64( + loaded, + svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sh_gather_u64index_u64() { + let indices = svindex_u64(0, 1); + svsetffr(); + let _ = svld1sh_gather_u64index_u64(svptrue_b16(), I16_DATA.as_ptr(), indices); + let loaded = svldff1sh_gather_u64index_u64(svptrue_b16(), I16_DATA.as_ptr(), indices); + assert_vector_matches_u64( + loaded, + svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sw_gather_u64index_u64() { + let indices = svindex_u64(0, 1); + svsetffr(); + let _ = svld1sw_gather_u64index_u64(svptrue_b32(), I32_DATA.as_ptr(), indices); + let loaded = svldff1sw_gather_u64index_u64(svptrue_b32(), I32_DATA.as_ptr(), indices); + assert_vector_matches_u64( + loaded, + svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sh_gather_u32base_index_s32() { + let bases = svindex_u32(0, 2u32.try_into().unwrap()); + svsetffr(); + let _ = svld1sh_gather_u32base_index_s32( + svptrue_b16(), + bases, + I16_DATA.as_ptr() as i64 / (2u32 as i64) + 1, + ); + let loaded = svldff1sh_gather_u32base_index_s32( + svptrue_b16(), + bases, + I16_DATA.as_ptr() as i64 / (2u32 as i64) + 1, + ); + assert_vector_matches_i32( + loaded, + svindex_s32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sh_gather_u32base_index_u32() { + let bases = svindex_u32(0, 2u32.try_into().unwrap()); + svsetffr(); + let _ = svld1sh_gather_u32base_index_u32( + svptrue_b16(), + bases, + I16_DATA.as_ptr() as i64 / (2u32 as i64) + 1, + ); + let loaded = svldff1sh_gather_u32base_index_u32( + svptrue_b16(), + bases, + I16_DATA.as_ptr() as i64 / (2u32 as i64) + 1, + ); + assert_vector_matches_u32( + loaded, + svindex_u32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sh_gather_u64base_index_s64() { + let bases = svdup_n_u64(I16_DATA.as_ptr() as u64); + let offsets = svindex_u64(0, 2u32.try_into().unwrap()); + let bases = svadd_u64_x(svptrue_b16(), bases, offsets); + svsetffr(); + let _ = svld1sh_gather_u64base_index_s64(svptrue_b16(), bases, 1.try_into().unwrap()); + let loaded = svldff1sh_gather_u64base_index_s64(svptrue_b16(), bases, 1.try_into().unwrap()); + assert_vector_matches_i64( + loaded, + svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sw_gather_u64base_index_s64() { + let bases = svdup_n_u64(I32_DATA.as_ptr() as u64); + let offsets = svindex_u64(0, 4u32.try_into().unwrap()); + let bases = svadd_u64_x(svptrue_b32(), bases, offsets); + svsetffr(); + let _ = svld1sw_gather_u64base_index_s64(svptrue_b32(), bases, 1.try_into().unwrap()); + let loaded = svldff1sw_gather_u64base_index_s64(svptrue_b32(), bases, 1.try_into().unwrap()); + assert_vector_matches_i64( + loaded, + svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sh_gather_u64base_index_u64() { + let bases = svdup_n_u64(I16_DATA.as_ptr() as u64); + let offsets = svindex_u64(0, 2u32.try_into().unwrap()); + let bases = svadd_u64_x(svptrue_b16(), bases, offsets); + svsetffr(); + let _ = svld1sh_gather_u64base_index_u64(svptrue_b16(), bases, 1.try_into().unwrap()); + let loaded = svldff1sh_gather_u64base_index_u64(svptrue_b16(), bases, 1.try_into().unwrap()); + assert_vector_matches_u64( + loaded, + svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1sw_gather_u64base_index_u64() { + let bases = svdup_n_u64(I32_DATA.as_ptr() as u64); + let offsets = svindex_u64(0, 4u32.try_into().unwrap()); + let bases = svadd_u64_x(svptrue_b32(), bases, offsets); + svsetffr(); + let _ = svld1sw_gather_u64base_index_u64(svptrue_b32(), bases, 1.try_into().unwrap()); + let loaded = svldff1sw_gather_u64base_index_u64(svptrue_b32(), bases, 1.try_into().unwrap()); + assert_vector_matches_u64( + loaded, + svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1ub_gather_s32offset_s32() { + let offsets = svindex_s32(0, 1u32.try_into().unwrap()); + svsetffr(); + let _ = svld1ub_gather_s32offset_s32(svptrue_b8(), U8_DATA.as_ptr(), offsets); + let loaded = svldff1ub_gather_s32offset_s32(svptrue_b8(), U8_DATA.as_ptr(), offsets); + assert_vector_matches_i32( + loaded, + svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1uh_gather_s32offset_s32() { + let offsets = svindex_s32(0, 2u32.try_into().unwrap()); + svsetffr(); + let _ = svld1uh_gather_s32offset_s32(svptrue_b16(), U16_DATA.as_ptr(), offsets); + let loaded = svldff1uh_gather_s32offset_s32(svptrue_b16(), U16_DATA.as_ptr(), offsets); + assert_vector_matches_i32( + loaded, + svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1ub_gather_s32offset_u32() { + let offsets = svindex_s32(0, 1u32.try_into().unwrap()); + svsetffr(); + let _ = svld1ub_gather_s32offset_u32(svptrue_b8(), U8_DATA.as_ptr(), offsets); + let loaded = svldff1ub_gather_s32offset_u32(svptrue_b8(), U8_DATA.as_ptr(), offsets); + assert_vector_matches_u32( + loaded, + svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1uh_gather_s32offset_u32() { + let offsets = svindex_s32(0, 2u32.try_into().unwrap()); + svsetffr(); + let _ = svld1uh_gather_s32offset_u32(svptrue_b16(), U16_DATA.as_ptr(), offsets); + let loaded = svldff1uh_gather_s32offset_u32(svptrue_b16(), U16_DATA.as_ptr(), offsets); + assert_vector_matches_u32( + loaded, + svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1ub_gather_s64offset_s64() { + let offsets = svindex_s64(0, 1u32.try_into().unwrap()); + svsetffr(); + let _ = svld1ub_gather_s64offset_s64(svptrue_b8(), U8_DATA.as_ptr(), offsets); + let loaded = svldff1ub_gather_s64offset_s64(svptrue_b8(), U8_DATA.as_ptr(), offsets); + assert_vector_matches_i64( + loaded, + svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1uh_gather_s64offset_s64() { + let offsets = svindex_s64(0, 2u32.try_into().unwrap()); + svsetffr(); + let _ = svld1uh_gather_s64offset_s64(svptrue_b16(), U16_DATA.as_ptr(), offsets); + let loaded = svldff1uh_gather_s64offset_s64(svptrue_b16(), U16_DATA.as_ptr(), offsets); + assert_vector_matches_i64( + loaded, + svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1uw_gather_s64offset_s64() { + let offsets = svindex_s64(0, 4u32.try_into().unwrap()); + svsetffr(); + let _ = svld1uw_gather_s64offset_s64(svptrue_b32(), U32_DATA.as_ptr(), offsets); + let loaded = svldff1uw_gather_s64offset_s64(svptrue_b32(), U32_DATA.as_ptr(), offsets); + assert_vector_matches_i64( + loaded, + svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1ub_gather_s64offset_u64() { + let offsets = svindex_s64(0, 1u32.try_into().unwrap()); + svsetffr(); + let _ = svld1ub_gather_s64offset_u64(svptrue_b8(), U8_DATA.as_ptr(), offsets); + let loaded = svldff1ub_gather_s64offset_u64(svptrue_b8(), U8_DATA.as_ptr(), offsets); + assert_vector_matches_u64( + loaded, + svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1uh_gather_s64offset_u64() { + let offsets = svindex_s64(0, 2u32.try_into().unwrap()); + svsetffr(); + let _ = svld1uh_gather_s64offset_u64(svptrue_b16(), U16_DATA.as_ptr(), offsets); + let loaded = svldff1uh_gather_s64offset_u64(svptrue_b16(), U16_DATA.as_ptr(), offsets); + assert_vector_matches_u64( + loaded, + svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1uw_gather_s64offset_u64() { + let offsets = svindex_s64(0, 4u32.try_into().unwrap()); + svsetffr(); + let _ = svld1uw_gather_s64offset_u64(svptrue_b32(), U32_DATA.as_ptr(), offsets); + let loaded = svldff1uw_gather_s64offset_u64(svptrue_b32(), U32_DATA.as_ptr(), offsets); + assert_vector_matches_u64( + loaded, + svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1ub_gather_u32offset_s32() { + let offsets = svindex_u32(0, 1u32.try_into().unwrap()); + svsetffr(); + let _ = svld1ub_gather_u32offset_s32(svptrue_b8(), U8_DATA.as_ptr(), offsets); + let loaded = svldff1ub_gather_u32offset_s32(svptrue_b8(), U8_DATA.as_ptr(), offsets); + assert_vector_matches_i32( + loaded, + svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1uh_gather_u32offset_s32() { + let offsets = svindex_u32(0, 2u32.try_into().unwrap()); + svsetffr(); + let _ = svld1uh_gather_u32offset_s32(svptrue_b16(), U16_DATA.as_ptr(), offsets); + let loaded = svldff1uh_gather_u32offset_s32(svptrue_b16(), U16_DATA.as_ptr(), offsets); + assert_vector_matches_i32( + loaded, + svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1ub_gather_u32offset_u32() { + let offsets = svindex_u32(0, 1u32.try_into().unwrap()); + svsetffr(); + let _ = svld1ub_gather_u32offset_u32(svptrue_b8(), U8_DATA.as_ptr(), offsets); + let loaded = svldff1ub_gather_u32offset_u32(svptrue_b8(), U8_DATA.as_ptr(), offsets); + assert_vector_matches_u32( + loaded, + svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1uh_gather_u32offset_u32() { + let offsets = svindex_u32(0, 2u32.try_into().unwrap()); + svsetffr(); + let _ = svld1uh_gather_u32offset_u32(svptrue_b16(), U16_DATA.as_ptr(), offsets); + let loaded = svldff1uh_gather_u32offset_u32(svptrue_b16(), U16_DATA.as_ptr(), offsets); + assert_vector_matches_u32( + loaded, + svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1ub_gather_u64offset_s64() { + let offsets = svindex_u64(0, 1u32.try_into().unwrap()); + svsetffr(); + let _ = svld1ub_gather_u64offset_s64(svptrue_b8(), U8_DATA.as_ptr(), offsets); + let loaded = svldff1ub_gather_u64offset_s64(svptrue_b8(), U8_DATA.as_ptr(), offsets); + assert_vector_matches_i64( + loaded, + svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1uh_gather_u64offset_s64() { + let offsets = svindex_u64(0, 2u32.try_into().unwrap()); + svsetffr(); + let _ = svld1uh_gather_u64offset_s64(svptrue_b16(), U16_DATA.as_ptr(), offsets); + let loaded = svldff1uh_gather_u64offset_s64(svptrue_b16(), U16_DATA.as_ptr(), offsets); + assert_vector_matches_i64( + loaded, + svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1uw_gather_u64offset_s64() { + let offsets = svindex_u64(0, 4u32.try_into().unwrap()); + svsetffr(); + let _ = svld1uw_gather_u64offset_s64(svptrue_b32(), U32_DATA.as_ptr(), offsets); + let loaded = svldff1uw_gather_u64offset_s64(svptrue_b32(), U32_DATA.as_ptr(), offsets); + assert_vector_matches_i64( + loaded, + svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1ub_gather_u64offset_u64() { + let offsets = svindex_u64(0, 1u32.try_into().unwrap()); + svsetffr(); + let _ = svld1ub_gather_u64offset_u64(svptrue_b8(), U8_DATA.as_ptr(), offsets); + let loaded = svldff1ub_gather_u64offset_u64(svptrue_b8(), U8_DATA.as_ptr(), offsets); + assert_vector_matches_u64( + loaded, + svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1uh_gather_u64offset_u64() { + let offsets = svindex_u64(0, 2u32.try_into().unwrap()); + svsetffr(); + let _ = svld1uh_gather_u64offset_u64(svptrue_b16(), U16_DATA.as_ptr(), offsets); + let loaded = svldff1uh_gather_u64offset_u64(svptrue_b16(), U16_DATA.as_ptr(), offsets); + assert_vector_matches_u64( + loaded, + svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1uw_gather_u64offset_u64() { + let offsets = svindex_u64(0, 4u32.try_into().unwrap()); + svsetffr(); + let _ = svld1uw_gather_u64offset_u64(svptrue_b32(), U32_DATA.as_ptr(), offsets); + let loaded = svldff1uw_gather_u64offset_u64(svptrue_b32(), U32_DATA.as_ptr(), offsets); + assert_vector_matches_u64( + loaded, + svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1ub_gather_u32base_offset_s32() { + let bases = svindex_u32(0, 1u32.try_into().unwrap()); + svsetffr(); + let _ = svld1ub_gather_u32base_offset_s32( + svptrue_b8(), + bases, + U8_DATA.as_ptr() as i64 + 1u32 as i64, + ); + let loaded = svldff1ub_gather_u32base_offset_s32( + svptrue_b8(), + bases, + U8_DATA.as_ptr() as i64 + 1u32 as i64, + ); + assert_vector_matches_i32( + loaded, + svindex_s32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1uh_gather_u32base_offset_s32() { + let bases = svindex_u32(0, 2u32.try_into().unwrap()); + svsetffr(); + let _ = svld1uh_gather_u32base_offset_s32( + svptrue_b16(), + bases, + U16_DATA.as_ptr() as i64 + 2u32 as i64, + ); + let loaded = svldff1uh_gather_u32base_offset_s32( + svptrue_b16(), + bases, + U16_DATA.as_ptr() as i64 + 2u32 as i64, + ); + assert_vector_matches_i32( + loaded, + svindex_s32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1ub_gather_u32base_offset_u32() { + let bases = svindex_u32(0, 1u32.try_into().unwrap()); + svsetffr(); + let _ = svld1ub_gather_u32base_offset_u32( + svptrue_b8(), + bases, + U8_DATA.as_ptr() as i64 + 1u32 as i64, + ); + let loaded = svldff1ub_gather_u32base_offset_u32( + svptrue_b8(), + bases, + U8_DATA.as_ptr() as i64 + 1u32 as i64, + ); + assert_vector_matches_u32( + loaded, + svindex_u32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1uh_gather_u32base_offset_u32() { + let bases = svindex_u32(0, 2u32.try_into().unwrap()); + svsetffr(); + let _ = svld1uh_gather_u32base_offset_u32( + svptrue_b16(), + bases, + U16_DATA.as_ptr() as i64 + 2u32 as i64, + ); + let loaded = svldff1uh_gather_u32base_offset_u32( + svptrue_b16(), + bases, + U16_DATA.as_ptr() as i64 + 2u32 as i64, + ); + assert_vector_matches_u32( + loaded, + svindex_u32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1ub_gather_u64base_offset_s64() { + let bases = svdup_n_u64(U8_DATA.as_ptr() as u64); + let offsets = svindex_u64(0, 1u32.try_into().unwrap()); + let bases = svadd_u64_x(svptrue_b8(), bases, offsets); + svsetffr(); + let _ = svld1ub_gather_u64base_offset_s64(svptrue_b8(), bases, 1u32.try_into().unwrap()); + let loaded = svldff1ub_gather_u64base_offset_s64(svptrue_b8(), bases, 1u32.try_into().unwrap()); + assert_vector_matches_i64( + loaded, + svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1uh_gather_u64base_offset_s64() { + let bases = svdup_n_u64(U16_DATA.as_ptr() as u64); + let offsets = svindex_u64(0, 2u32.try_into().unwrap()); + let bases = svadd_u64_x(svptrue_b16(), bases, offsets); + svsetffr(); + let _ = svld1uh_gather_u64base_offset_s64(svptrue_b16(), bases, 2u32.try_into().unwrap()); + let loaded = + svldff1uh_gather_u64base_offset_s64(svptrue_b16(), bases, 2u32.try_into().unwrap()); + assert_vector_matches_i64( + loaded, + svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1uw_gather_u64base_offset_s64() { + let bases = svdup_n_u64(U32_DATA.as_ptr() as u64); + let offsets = svindex_u64(0, 4u32.try_into().unwrap()); + let bases = svadd_u64_x(svptrue_b32(), bases, offsets); + svsetffr(); + let _ = svld1uw_gather_u64base_offset_s64(svptrue_b32(), bases, 4u32.try_into().unwrap()); + let loaded = + svldff1uw_gather_u64base_offset_s64(svptrue_b32(), bases, 4u32.try_into().unwrap()); + assert_vector_matches_i64( + loaded, + svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1ub_gather_u64base_offset_u64() { + let bases = svdup_n_u64(U8_DATA.as_ptr() as u64); + let offsets = svindex_u64(0, 1u32.try_into().unwrap()); + let bases = svadd_u64_x(svptrue_b8(), bases, offsets); + svsetffr(); + let _ = svld1ub_gather_u64base_offset_u64(svptrue_b8(), bases, 1u32.try_into().unwrap()); + let loaded = svldff1ub_gather_u64base_offset_u64(svptrue_b8(), bases, 1u32.try_into().unwrap()); + assert_vector_matches_u64( + loaded, + svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1uh_gather_u64base_offset_u64() { + let bases = svdup_n_u64(U16_DATA.as_ptr() as u64); + let offsets = svindex_u64(0, 2u32.try_into().unwrap()); + let bases = svadd_u64_x(svptrue_b16(), bases, offsets); + svsetffr(); + let _ = svld1uh_gather_u64base_offset_u64(svptrue_b16(), bases, 2u32.try_into().unwrap()); + let loaded = + svldff1uh_gather_u64base_offset_u64(svptrue_b16(), bases, 2u32.try_into().unwrap()); + assert_vector_matches_u64( loaded, - svindex_s64( - (len + 0usize).try_into().unwrap(), - 1usize.try_into().unwrap(), - ), + svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()), ); } #[simd_test(enable = "sve")] -unsafe fn test_svldff1sw_vnum_s64() { +unsafe fn test_svldff1uw_gather_u64base_offset_u64() { + let bases = svdup_n_u64(U32_DATA.as_ptr() as u64); + let offsets = svindex_u64(0, 4u32.try_into().unwrap()); + let bases = svadd_u64_x(svptrue_b32(), bases, offsets); svsetffr(); - let _ = svld1sw_vnum_s64(svptrue_b32(), I32_DATA.as_ptr(), 1); - let loaded = svldff1sw_vnum_s64(svptrue_b32(), I32_DATA.as_ptr(), 1); - let len = svcntd() as usize; - assert_vector_matches_i64( + let _ = svld1uw_gather_u64base_offset_u64(svptrue_b32(), bases, 4u32.try_into().unwrap()); + let loaded = + svldff1uw_gather_u64base_offset_u64(svptrue_b32(), bases, 4u32.try_into().unwrap()); + assert_vector_matches_u64( loaded, - svindex_s64( - (len + 0usize).try_into().unwrap(), - 1usize.try_into().unwrap(), - ), + svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()), ); } #[simd_test(enable = "sve")] -unsafe fn test_svldff1sb_vnum_u16() { +unsafe fn test_svldff1ub_gather_u64base_s64() { + let bases = svdup_n_u64(U8_DATA.as_ptr() as u64); + let offsets = svindex_u64(0, 1u32.try_into().unwrap()); + let bases = svadd_u64_x(svptrue_b8(), bases, offsets); svsetffr(); - let _ = svld1sb_vnum_u16(svptrue_b8(), I8_DATA.as_ptr(), 1); - let loaded = svldff1sb_vnum_u16(svptrue_b8(), I8_DATA.as_ptr(), 1); - let len = svcnth() as usize; - assert_vector_matches_u16( + let _ = svld1ub_gather_u64base_s64(svptrue_b8(), bases); + let loaded = svldff1ub_gather_u64base_s64(svptrue_b8(), bases); + assert_vector_matches_i64( loaded, - svindex_u16( - (len + 0usize).try_into().unwrap(), - 1usize.try_into().unwrap(), - ), + svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), ); } #[simd_test(enable = "sve")] -unsafe fn test_svldff1sb_vnum_u32() { +unsafe fn test_svldff1uh_gather_u64base_s64() { + let bases = svdup_n_u64(U16_DATA.as_ptr() as u64); + let offsets = svindex_u64(0, 2u32.try_into().unwrap()); + let bases = svadd_u64_x(svptrue_b16(), bases, offsets); svsetffr(); - let _ = svld1sb_vnum_u32(svptrue_b8(), I8_DATA.as_ptr(), 1); - let loaded = svldff1sb_vnum_u32(svptrue_b8(), I8_DATA.as_ptr(), 1); - let len = svcntw() as usize; - assert_vector_matches_u32( + let _ = svld1uh_gather_u64base_s64(svptrue_b16(), bases); + let loaded = svldff1uh_gather_u64base_s64(svptrue_b16(), bases); + assert_vector_matches_i64( loaded, - svindex_u32( - (len + 0usize).try_into().unwrap(), - 1usize.try_into().unwrap(), - ), + svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), ); } #[simd_test(enable = "sve")] -unsafe fn test_svldff1sh_vnum_u32() { +unsafe fn test_svldff1uw_gather_u64base_s64() { + let bases = svdup_n_u64(U32_DATA.as_ptr() as u64); + let offsets = svindex_u64(0, 4u32.try_into().unwrap()); + let bases = svadd_u64_x(svptrue_b32(), bases, offsets); svsetffr(); - let _ = svld1sh_vnum_u32(svptrue_b16(), I16_DATA.as_ptr(), 1); - let loaded = svldff1sh_vnum_u32(svptrue_b16(), I16_DATA.as_ptr(), 1); - let len = svcntw() as usize; - assert_vector_matches_u32( + let _ = svld1uw_gather_u64base_s64(svptrue_b32(), bases); + let loaded = svldff1uw_gather_u64base_s64(svptrue_b32(), bases); + assert_vector_matches_i64( loaded, - svindex_u32( - (len + 0usize).try_into().unwrap(), - 1usize.try_into().unwrap(), - ), + svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), ); } #[simd_test(enable = "sve")] -unsafe fn test_svldff1sb_vnum_u64() { +unsafe fn test_svldff1ub_gather_u64base_u64() { + let bases = svdup_n_u64(U8_DATA.as_ptr() as u64); + let offsets = svindex_u64(0, 1u32.try_into().unwrap()); + let bases = svadd_u64_x(svptrue_b8(), bases, offsets); svsetffr(); - let _ = svld1sb_vnum_u64(svptrue_b8(), I8_DATA.as_ptr(), 1); - let loaded = svldff1sb_vnum_u64(svptrue_b8(), I8_DATA.as_ptr(), 1); - let len = svcntd() as usize; + let _ = svld1ub_gather_u64base_u64(svptrue_b8(), bases); + let loaded = svldff1ub_gather_u64base_u64(svptrue_b8(), bases); assert_vector_matches_u64( loaded, - svindex_u64( - (len + 0usize).try_into().unwrap(), - 1usize.try_into().unwrap(), - ), + svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), ); } #[simd_test(enable = "sve")] -unsafe fn test_svldff1sh_vnum_u64() { +unsafe fn test_svldff1uh_gather_u64base_u64() { + let bases = svdup_n_u64(U16_DATA.as_ptr() as u64); + let offsets = svindex_u64(0, 2u32.try_into().unwrap()); + let bases = svadd_u64_x(svptrue_b16(), bases, offsets); svsetffr(); - let _ = svld1sh_vnum_u64(svptrue_b16(), I16_DATA.as_ptr(), 1); - let loaded = svldff1sh_vnum_u64(svptrue_b16(), I16_DATA.as_ptr(), 1); - let len = svcntd() as usize; + let _ = svld1uh_gather_u64base_u64(svptrue_b16(), bases); + let loaded = svldff1uh_gather_u64base_u64(svptrue_b16(), bases); assert_vector_matches_u64( loaded, - svindex_u64( - (len + 0usize).try_into().unwrap(), - 1usize.try_into().unwrap(), - ), + svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), ); } #[simd_test(enable = "sve")] -unsafe fn test_svldff1sw_vnum_u64() { +unsafe fn test_svldff1uw_gather_u64base_u64() { + let bases = svdup_n_u64(U32_DATA.as_ptr() as u64); + let offsets = svindex_u64(0, 4u32.try_into().unwrap()); + let bases = svadd_u64_x(svptrue_b32(), bases, offsets); svsetffr(); - let _ = svld1sw_vnum_u64(svptrue_b32(), I32_DATA.as_ptr(), 1); - let loaded = svldff1sw_vnum_u64(svptrue_b32(), I32_DATA.as_ptr(), 1); - let len = svcntd() as usize; + let _ = svld1uw_gather_u64base_u64(svptrue_b32(), bases); + let loaded = svldff1uw_gather_u64base_u64(svptrue_b32(), bases); assert_vector_matches_u64( loaded, - svindex_u64( - (len + 0usize).try_into().unwrap(), - 1usize.try_into().unwrap(), - ), + svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), ); } #[simd_test(enable = "sve")] @@ -7789,6 +9457,228 @@ unsafe fn test_svldff1uw_vnum_u64() { ); } #[simd_test(enable = "sve")] +unsafe fn test_svldff1uh_gather_s32index_s32() { + let indices = svindex_s32(0, 1); + svsetffr(); + let _ = svld1uh_gather_s32index_s32(svptrue_b16(), U16_DATA.as_ptr(), indices); + let loaded = svldff1uh_gather_s32index_s32(svptrue_b16(), U16_DATA.as_ptr(), indices); + assert_vector_matches_i32( + loaded, + svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1uh_gather_s32index_u32() { + let indices = svindex_s32(0, 1); + svsetffr(); + let _ = svld1uh_gather_s32index_u32(svptrue_b16(), U16_DATA.as_ptr(), indices); + let loaded = svldff1uh_gather_s32index_u32(svptrue_b16(), U16_DATA.as_ptr(), indices); + assert_vector_matches_u32( + loaded, + svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1uh_gather_s64index_s64() { + let indices = svindex_s64(0, 1); + svsetffr(); + let _ = svld1uh_gather_s64index_s64(svptrue_b16(), U16_DATA.as_ptr(), indices); + let loaded = svldff1uh_gather_s64index_s64(svptrue_b16(), U16_DATA.as_ptr(), indices); + assert_vector_matches_i64( + loaded, + svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1uw_gather_s64index_s64() { + let indices = svindex_s64(0, 1); + svsetffr(); + let _ = svld1uw_gather_s64index_s64(svptrue_b32(), U32_DATA.as_ptr(), indices); + let loaded = svldff1uw_gather_s64index_s64(svptrue_b32(), U32_DATA.as_ptr(), indices); + assert_vector_matches_i64( + loaded, + svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1uh_gather_s64index_u64() { + let indices = svindex_s64(0, 1); + svsetffr(); + let _ = svld1uh_gather_s64index_u64(svptrue_b16(), U16_DATA.as_ptr(), indices); + let loaded = svldff1uh_gather_s64index_u64(svptrue_b16(), U16_DATA.as_ptr(), indices); + assert_vector_matches_u64( + loaded, + svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1uw_gather_s64index_u64() { + let indices = svindex_s64(0, 1); + svsetffr(); + let _ = svld1uw_gather_s64index_u64(svptrue_b32(), U32_DATA.as_ptr(), indices); + let loaded = svldff1uw_gather_s64index_u64(svptrue_b32(), U32_DATA.as_ptr(), indices); + assert_vector_matches_u64( + loaded, + svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1uh_gather_u32index_s32() { + let indices = svindex_u32(0, 1); + svsetffr(); + let _ = svld1uh_gather_u32index_s32(svptrue_b16(), U16_DATA.as_ptr(), indices); + let loaded = svldff1uh_gather_u32index_s32(svptrue_b16(), U16_DATA.as_ptr(), indices); + assert_vector_matches_i32( + loaded, + svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1uh_gather_u32index_u32() { + let indices = svindex_u32(0, 1); + svsetffr(); + let _ = svld1uh_gather_u32index_u32(svptrue_b16(), U16_DATA.as_ptr(), indices); + let loaded = svldff1uh_gather_u32index_u32(svptrue_b16(), U16_DATA.as_ptr(), indices); + assert_vector_matches_u32( + loaded, + svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1uh_gather_u64index_s64() { + let indices = svindex_u64(0, 1); + svsetffr(); + let _ = svld1uh_gather_u64index_s64(svptrue_b16(), U16_DATA.as_ptr(), indices); + let loaded = svldff1uh_gather_u64index_s64(svptrue_b16(), U16_DATA.as_ptr(), indices); + assert_vector_matches_i64( + loaded, + svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1uw_gather_u64index_s64() { + let indices = svindex_u64(0, 1); + svsetffr(); + let _ = svld1uw_gather_u64index_s64(svptrue_b32(), U32_DATA.as_ptr(), indices); + let loaded = svldff1uw_gather_u64index_s64(svptrue_b32(), U32_DATA.as_ptr(), indices); + assert_vector_matches_i64( + loaded, + svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1uh_gather_u64index_u64() { + let indices = svindex_u64(0, 1); + svsetffr(); + let _ = svld1uh_gather_u64index_u64(svptrue_b16(), U16_DATA.as_ptr(), indices); + let loaded = svldff1uh_gather_u64index_u64(svptrue_b16(), U16_DATA.as_ptr(), indices); + assert_vector_matches_u64( + loaded, + svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1uw_gather_u64index_u64() { + let indices = svindex_u64(0, 1); + svsetffr(); + let _ = svld1uw_gather_u64index_u64(svptrue_b32(), U32_DATA.as_ptr(), indices); + let loaded = svldff1uw_gather_u64index_u64(svptrue_b32(), U32_DATA.as_ptr(), indices); + assert_vector_matches_u64( + loaded, + svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1uh_gather_u32base_index_s32() { + let bases = svindex_u32(0, 2u32.try_into().unwrap()); + svsetffr(); + let _ = svld1uh_gather_u32base_index_s32( + svptrue_b16(), + bases, + U16_DATA.as_ptr() as i64 / (2u32 as i64) + 1, + ); + let loaded = svldff1uh_gather_u32base_index_s32( + svptrue_b16(), + bases, + U16_DATA.as_ptr() as i64 / (2u32 as i64) + 1, + ); + assert_vector_matches_i32( + loaded, + svindex_s32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1uh_gather_u32base_index_u32() { + let bases = svindex_u32(0, 2u32.try_into().unwrap()); + svsetffr(); + let _ = svld1uh_gather_u32base_index_u32( + svptrue_b16(), + bases, + U16_DATA.as_ptr() as i64 / (2u32 as i64) + 1, + ); + let loaded = svldff1uh_gather_u32base_index_u32( + svptrue_b16(), + bases, + U16_DATA.as_ptr() as i64 / (2u32 as i64) + 1, + ); + assert_vector_matches_u32( + loaded, + svindex_u32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1uh_gather_u64base_index_s64() { + let bases = svdup_n_u64(U16_DATA.as_ptr() as u64); + let offsets = svindex_u64(0, 2u32.try_into().unwrap()); + let bases = svadd_u64_x(svptrue_b16(), bases, offsets); + svsetffr(); + let _ = svld1uh_gather_u64base_index_s64(svptrue_b16(), bases, 1.try_into().unwrap()); + let loaded = svldff1uh_gather_u64base_index_s64(svptrue_b16(), bases, 1.try_into().unwrap()); + assert_vector_matches_i64( + loaded, + svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1uw_gather_u64base_index_s64() { + let bases = svdup_n_u64(U32_DATA.as_ptr() as u64); + let offsets = svindex_u64(0, 4u32.try_into().unwrap()); + let bases = svadd_u64_x(svptrue_b32(), bases, offsets); + svsetffr(); + let _ = svld1uw_gather_u64base_index_s64(svptrue_b32(), bases, 1.try_into().unwrap()); + let loaded = svldff1uw_gather_u64base_index_s64(svptrue_b32(), bases, 1.try_into().unwrap()); + assert_vector_matches_i64( + loaded, + svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1uh_gather_u64base_index_u64() { + let bases = svdup_n_u64(U16_DATA.as_ptr() as u64); + let offsets = svindex_u64(0, 2u32.try_into().unwrap()); + let bases = svadd_u64_x(svptrue_b16(), bases, offsets); + svsetffr(); + let _ = svld1uh_gather_u64base_index_u64(svptrue_b16(), bases, 1.try_into().unwrap()); + let loaded = svldff1uh_gather_u64base_index_u64(svptrue_b16(), bases, 1.try_into().unwrap()); + assert_vector_matches_u64( + loaded, + svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] +unsafe fn test_svldff1uw_gather_u64base_index_u64() { + let bases = svdup_n_u64(U32_DATA.as_ptr() as u64); + let offsets = svindex_u64(0, 4u32.try_into().unwrap()); + let bases = svadd_u64_x(svptrue_b32(), bases, offsets); + svsetffr(); + let _ = svld1uw_gather_u64base_index_u64(svptrue_b32(), bases, 1.try_into().unwrap()); + let loaded = svldff1uw_gather_u64base_index_u64(svptrue_b32(), bases, 1.try_into().unwrap()); + assert_vector_matches_u64( + loaded, + svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()), + ); +} +#[simd_test(enable = "sve")] unsafe fn test_svldnf1_f32() { svsetffr(); let _ = svld1_f32(svptrue_b32(), F32_DATA.as_ptr()); diff --git a/crates/stdarch-gen-arm/src/load_store_tests.rs b/crates/stdarch-gen-arm/src/load_store_tests.rs index 0f4de83dac..cbd5df50de 100644 --- a/crates/stdarch-gen-arm/src/load_store_tests.rs +++ b/crates/stdarch-gen-arm/src/load_store_tests.rs @@ -141,13 +141,6 @@ fn generate_single_test( } } - if fn_name.starts_with("svldff1") && fn_name.contains("gather") { - // TODO: We can remove this check when first-faulting gathers are fixed in CI's QEMU - // https://gitlab.com/qemu-project/qemu/-/issues/1612 - println!("Skipping test for {fn_name}"); - return Ok(quote!()); - } - let fn_ident = format_ident!("{fn_name}"); let test_name = format_ident!( "test_{fn_name}{}",