Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 20 additions & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ jobs:
intrinsic-test:
needs: [style]
name: Intrinsic Test
runs-on: ubuntu-latest
runs-on: ubuntu-latest
strategy:
matrix:
target:
Expand Down Expand Up @@ -330,11 +330,30 @@ jobs:
cargo run -p stdarch-gen-hexagon --release
git diff --exit-code

# Run some tests with Miri. Most stdarch functions use platform-specific intrinsics
# that Miri does not support. Also Miri is reltively slow.
#
# Below we run some tests where Miri might catch UB, for instance on intrinsics that read from
# or write to pointers.
miri:
needs: [style]
name: Run some tests with miri
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6
- name: Install Rust
run: rustup update nightly && rustup default nightly && rustup component add miri
- name: Aarch64 load/store roundtrip
env:
TARGET: "aarch64-unknown-linux-gnu"
run: cargo miri test -p core_arch --target aarch64-unknown-linux-gnu -- test_vld3q
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it might be nice to have a small .txt file with all the test names instead of having them inline. Also we can probably test more, the CI already takes 12 minutes, so we can increase this until the time reaches 12 minutes without slowing CI down at all

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure we have time (and could shard if not) but most of the other tests still fail because either the read or write does not use the portable intrinsic. This particular one is of interest for the backport of the UB fix in #t-libs/backports > #153336: beta-nominated.


conclusion:
needs:
- docs
- verify
- test
- miri
- intrinsic-test
- check-stdarch-gen
runs-on: ubuntu-latest
Expand Down
18 changes: 2 additions & 16 deletions crates/core_arch/src/aarch64/neon/generated.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12127,14 +12127,7 @@ pub unsafe fn vld3q_dup_u64(a: *const u64) -> uint64x2x3_t {
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(ld3))]
pub unsafe fn vld3q_f64(a: *const f64) -> float64x2x3_t {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.ld3.v2f64.p0"
)]
fn _vld3q_f64(ptr: *const float64x2_t) -> float64x2x3_t;
}
_vld3q_f64(a as _)
crate::core_arch::macros::deinterleaving_load!(f64, 2, 3, a)
}
#[doc = "Load multiple 3-element structures to three registers"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s64)"]
Expand All @@ -12145,14 +12138,7 @@ pub unsafe fn vld3q_f64(a: *const f64) -> float64x2x3_t {
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(ld3))]
pub unsafe fn vld3q_s64(a: *const i64) -> int64x2x3_t {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.ld3.v2i64.p0"
)]
fn _vld3q_s64(ptr: *const int64x2_t) -> int64x2x3_t;
}
_vld3q_s64(a as _)
crate::core_arch::macros::deinterleaving_load!(i64, 2, 3, a)
}
#[doc = "Load multiple 3-element structures to three registers"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_f64)"]
Expand Down
6 changes: 5 additions & 1 deletion crates/core_arch/src/aarch64/neon/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1022,8 +1022,12 @@ mod tests {
($elem_ty:ty, $len:expr, $vec_ty:ty, $store:expr, $load:expr) => {
let vals: [$elem_ty; $len] = crate::array::from_fn(|i| i as $elem_ty);
let a: $vec_ty = transmute(vals);
let mut tmp = [0 as $elem_ty; $len];
let mut tmp = core::mem::MaybeUninit::<[$elem_ty; $len]>::uninit();
$store(tmp.as_mut_ptr().cast(), a);

// With Miri this will check that all elements were initialized.
let tmp = tmp.assume_init();

let r: $vec_ty = $load(tmp.as_ptr().cast());
let out: [$elem_ty; $len] = transmute(r);
assert_eq!(out, vals);
Expand Down
72 changes: 8 additions & 64 deletions crates/core_arch/src/arm_shared/neon/generated.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67158,14 +67158,7 @@ pub unsafe fn vst3q_s32(a: *mut i32, b: int32x4x3_t) {
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(st3))]
pub unsafe fn vst3_f32(a: *mut f32, b: float32x2x3_t) {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.st3.v2f32.p0"
)]
fn _vst3_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t, ptr: *mut i8);
}
_vst3_f32(b.0, b.1, b.2, a as _)
crate::core_arch::macros::interleaving_store!(f32, 2, 3, a, b)
}
#[doc = "Store multiple 3-element structures from three registers"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_f32)"]
Expand All @@ -67177,14 +67170,7 @@ pub unsafe fn vst3_f32(a: *mut f32, b: float32x2x3_t) {
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(st3))]
pub unsafe fn vst3q_f32(a: *mut f32, b: float32x4x3_t) {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.st3.v4f32.p0"
)]
fn _vst3q_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t, ptr: *mut i8);
}
_vst3q_f32(b.0, b.1, b.2, a as _)
crate::core_arch::macros::interleaving_store!(f32, 4, 3, a, b)
}
#[doc = "Store multiple 3-element structures from three registers"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s8)"]
Expand All @@ -67196,14 +67182,7 @@ pub unsafe fn vst3q_f32(a: *mut f32, b: float32x4x3_t) {
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(st3))]
pub unsafe fn vst3_s8(a: *mut i8, b: int8x8x3_t) {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.st3.v8i8.p0"
)]
fn _vst3_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t, ptr: *mut i8);
}
_vst3_s8(b.0, b.1, b.2, a as _)
crate::core_arch::macros::interleaving_store!(i8, 8, 3, a, b)
}
#[doc = "Store multiple 3-element structures from three registers"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s8)"]
Expand All @@ -67215,14 +67194,7 @@ pub unsafe fn vst3_s8(a: *mut i8, b: int8x8x3_t) {
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(st3))]
pub unsafe fn vst3q_s8(a: *mut i8, b: int8x16x3_t) {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.st3.v16i8.p0"
)]
fn _vst3q_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t, ptr: *mut i8);
}
_vst3q_s8(b.0, b.1, b.2, a as _)
crate::core_arch::macros::interleaving_store!(i8, 16, 3, a, b)
}
#[doc = "Store multiple 3-element structures from three registers"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s16)"]
Expand All @@ -67234,14 +67206,7 @@ pub unsafe fn vst3q_s8(a: *mut i8, b: int8x16x3_t) {
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(st3))]
pub unsafe fn vst3_s16(a: *mut i16, b: int16x4x3_t) {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.st3.v4i16.p0"
)]
fn _vst3_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t, ptr: *mut i8);
}
_vst3_s16(b.0, b.1, b.2, a as _)
crate::core_arch::macros::interleaving_store!(i16, 4, 3, a, b)
}
#[doc = "Store multiple 3-element structures from three registers"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s16)"]
Expand All @@ -67253,14 +67218,7 @@ pub unsafe fn vst3_s16(a: *mut i16, b: int16x4x3_t) {
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(st3))]
pub unsafe fn vst3q_s16(a: *mut i16, b: int16x8x3_t) {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.st3.v8i16.p0"
)]
fn _vst3q_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t, ptr: *mut i8);
}
_vst3q_s16(b.0, b.1, b.2, a as _)
crate::core_arch::macros::interleaving_store!(i16, 8, 3, a, b)
}
#[doc = "Store multiple 3-element structures from three registers"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s32)"]
Expand All @@ -67272,14 +67230,7 @@ pub unsafe fn vst3q_s16(a: *mut i16, b: int16x8x3_t) {
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(st3))]
pub unsafe fn vst3_s32(a: *mut i32, b: int32x2x3_t) {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.st3.v2i32.p0"
)]
fn _vst3_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t, ptr: *mut i8);
}
_vst3_s32(b.0, b.1, b.2, a as _)
crate::core_arch::macros::interleaving_store!(i32, 2, 3, a, b)
}
#[doc = "Store multiple 3-element structures from three registers"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s32)"]
Expand All @@ -67291,14 +67242,7 @@ pub unsafe fn vst3_s32(a: *mut i32, b: int32x2x3_t) {
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(st3))]
pub unsafe fn vst3q_s32(a: *mut i32, b: int32x4x3_t) {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.st3.v4i32.p0"
)]
fn _vst3q_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t, ptr: *mut i8);
}
_vst3q_s32(b.0, b.1, b.2, a as _)
crate::core_arch::macros::interleaving_store!(i32, 4, 3, a, b)
}
#[doc = "Store multiple 3-element structures from three registers"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_f16)"]
Expand Down
13 changes: 3 additions & 10 deletions crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4031,17 +4031,10 @@ intrinsics:
unsafe: [neon]
assert_instr: [ld3]
types:
- ['*const i64', int64x2x3_t, '*const int64x2_t', i64]
- ['*const f64', float64x2x3_t, '*const float64x2_t', f64]
- ['*const i64', int64x2x3_t, i64, "2"]
- ['*const f64', float64x2x3_t, f64, "2"]
compose:
- LLVMLink:
name: 'vld3{neon_type[1].nox}'
arguments:
- 'ptr: {type[2]}'
links:
- link: 'llvm.aarch64.neon.ld3.v{neon_type[1].lane}{type[3]}.p0'
arch: aarch64,arm64ec
- FnCall: ['_vld3{neon_type[1].nox}', ['a as _']]
- FnCall: ["crate::core_arch::macros::deinterleaving_load!", [{ Type: "{type[2]}" }, "{type[3]}", "3", a], [], true]

- name: "vld3{neon_type[1].nox}"
doc: Load multiple 3-element structures to three registers
Expand Down
29 changes: 9 additions & 20 deletions crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5642,27 +5642,16 @@ intrinsics:
safety:
unsafe: [neon]
types:
- [i8, int8x8x3_t, int8x8_t]
- [i16, int16x4x3_t, int16x4_t]
- [i32, int32x2x3_t, int32x2_t]
- [i8, int8x16x3_t, int8x16_t]
- [i16, int16x8x3_t, int16x8_t]
- [i32, int32x4x3_t, int32x4_t]
- [f32, float32x2x3_t, float32x2_t]
- [f32, float32x4x3_t, float32x4_t]
- [i8, int8x8x3_t, "8"]
- [i16, int16x4x3_t, "4"]
- [i32, int32x2x3_t, "2"]
- [i8, int8x16x3_t, "16"]
- [i16, int16x8x3_t, "8"]
- [i32, int32x4x3_t, "4"]
- [f32, float32x2x3_t, "2"]
- [f32, float32x4x3_t, "4"]
compose:
- LLVMLink:
name: 'vst3.{neon_type[1]}'
arguments:
- 'a: {type[2]}'
- 'b: {type[2]}'
- 'c: {type[2]}'
- 'ptr: *mut i8'
links:
- link: 'llvm.aarch64.neon.st3.v{neon_type[1].lane}{type[0]}.p0'
arch: aarch64,arm64ec
- FnCall: ['_vst3{neon_type[1].nox}', ['b.0', 'b.1', 'b.2', 'a as _']]

- FnCall: ["crate::core_arch::macros::interleaving_store!", [{ Type: "{type[0]}" }, "{type[2]}", "3", a, b], [], true]

- name: "vst3{neon_type[1].nox}"
doc: "Store multiple 3-element structures from three registers"
Expand Down