diff --git a/fearless_simd/src/core_arch/fallback.rs b/fearless_simd/src/core_arch/fallback.rs index 4b6a1e6a..d6514c74 100644 --- a/fearless_simd/src/core_arch/fallback.rs +++ b/fearless_simd/src/core_arch/fallback.rs @@ -13,4 +13,4 @@ impl Fallback { pub fn new() -> Self { Self { _private: () } } -} \ No newline at end of file +} diff --git a/fearless_simd/src/core_arch/mod.rs b/fearless_simd/src/core_arch/mod.rs index c09f01d3..56ac34dd 100644 --- a/fearless_simd/src/core_arch/mod.rs +++ b/fearless_simd/src/core_arch/mod.rs @@ -6,6 +6,6 @@ #[cfg(target_arch = "aarch64")] pub mod aarch64; +pub mod fallback; #[cfg(target_arch = "x86_64")] pub mod x86_64; -pub mod fallback; diff --git a/fearless_simd/src/generated.rs b/fearless_simd/src/generated.rs index e6c0947d..0e9c99a4 100644 --- a/fearless_simd/src/generated.rs +++ b/fearless_simd/src/generated.rs @@ -5,19 +5,19 @@ //! //! All files in this subdirectory are autogenerated by the `fearless_simd_gen` crate. +mod fallback; #[cfg(all(feature = "std", target_arch = "aarch64"))] mod neon; -#[cfg(target_arch = "wasm32")] -mod wasm; -mod fallback; mod ops; mod simd_trait; mod simd_types; +#[cfg(target_arch = "wasm32")] +mod wasm; +pub use fallback::*; #[cfg(all(feature = "std", target_arch = "aarch64"))] pub use neon::*; -#[cfg(target_arch = "wasm32")] -pub use wasm::*; -pub use fallback::*; pub use simd_trait::*; pub use simd_types::*; +#[cfg(target_arch = "wasm32")] +pub use wasm::*; diff --git a/fearless_simd/src/generated/fallback.rs b/fearless_simd/src/generated/fallback.rs index d7964b9d..359db2ad 100644 --- a/fearless_simd/src/generated/fallback.rs +++ b/fearless_simd/src/generated/fallback.rs @@ -1,11 +1,11 @@ // This file is autogenerated by fearless_simd_gen -use core::ops::*; -use crate::{seal::Seal, Level, Simd, SimdInto}; +use crate::{Level, Simd, SimdInto, seal::Seal}; use crate::{ - f32x4, i8x16, u8x16, mask8x16, i16x8, u16x8, mask16x8, i32x4, u32x4, mask32x4, f32x8, - i8x32, u8x32, mask8x32, i16x16, u16x16, mask16x16, i32x8, u32x8, mask32x8, + f32x4, f32x8, i8x16, i8x32, i16x8, i16x16, i32x4, i32x8, mask8x16, mask8x32, mask16x8, + mask16x16, mask32x4, mask32x8, u8x16, u8x32, u16x8, u16x16, u32x4, u32x8, }; +use core::ops::*; #[cfg(all(feature = "libm", not(feature = "std")))] trait FloatExt { fn floor(self) -> f32; @@ -67,7 +67,7 @@ impl Simd for Fallback { f32::abs(a[2usize]), f32::abs(a[3usize]), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn neg_f32x4(self, a: f32x4) -> f32x4 { @@ -77,7 +77,7 @@ impl Simd for Fallback { f32::neg(a[2usize]), f32::neg(a[3usize]), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn sqrt_f32x4(self, a: f32x4) -> f32x4 { @@ -87,7 +87,7 @@ impl Simd for Fallback { f32::sqrt(a[2usize]), f32::sqrt(a[3usize]), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn add_f32x4(self, a: f32x4, b: f32x4) -> f32x4 { @@ -97,7 +97,7 @@ impl Simd for Fallback { f32::add(a[2usize], &b[2usize]), f32::add(a[3usize], &b[3usize]), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn sub_f32x4(self, a: f32x4, b: f32x4) -> f32x4 { @@ -107,7 +107,7 @@ impl Simd for Fallback { f32::sub(a[2usize], &b[2usize]), f32::sub(a[3usize], &b[3usize]), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn mul_f32x4(self, a: f32x4, b: f32x4) -> f32x4 { @@ -117,7 +117,7 @@ impl Simd for Fallback { f32::mul(a[2usize], &b[2usize]), f32::mul(a[3usize], &b[3usize]), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn div_f32x4(self, a: f32x4, b: f32x4) -> f32x4 { @@ -127,7 +127,7 @@ impl Simd for Fallback { f32::div(a[2usize], &b[2usize]), f32::div(a[3usize], &b[3usize]), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn copysign_f32x4(self, a: f32x4, b: f32x4) -> f32x4 { @@ -137,7 +137,7 @@ impl Simd for Fallback { f32::copysign(a[2usize], b[2usize]), f32::copysign(a[3usize], b[3usize]), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn simd_eq_f32x4(self, a: f32x4, b: f32x4) -> mask32x4 { @@ -147,7 +147,7 @@ impl Simd for Fallback { -(f32::eq(&a[2usize], &b[2usize]) as i32), -(f32::eq(&a[3usize], &b[3usize]) as i32), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn simd_lt_f32x4(self, a: f32x4, b: f32x4) -> mask32x4 { @@ -157,7 +157,7 @@ impl Simd for Fallback { -(f32::lt(&a[2usize], &b[2usize]) as i32), -(f32::lt(&a[3usize], &b[3usize]) as i32), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn simd_le_f32x4(self, a: f32x4, b: f32x4) -> mask32x4 { @@ -167,7 +167,7 @@ impl Simd for Fallback { -(f32::le(&a[2usize], &b[2usize]) as i32), -(f32::le(&a[3usize], &b[3usize]) as i32), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn simd_ge_f32x4(self, a: f32x4, b: f32x4) -> mask32x4 { @@ -177,7 +177,7 @@ impl Simd for Fallback { -(f32::ge(&a[2usize], &b[2usize]) as i32), -(f32::ge(&a[3usize], &b[3usize]) as i32), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn simd_gt_f32x4(self, a: f32x4, b: f32x4) -> mask32x4 { @@ -187,7 +187,7 @@ impl Simd for Fallback { -(f32::gt(&a[2usize], &b[2usize]) as i32), -(f32::gt(&a[3usize], &b[3usize]) as i32), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn zip_f32x4(self, a: f32x4, b: f32x4) -> (f32x4, f32x4) { @@ -200,8 +200,13 @@ impl Simd for Fallback { fn unzip_f32x4(self, a: f32x4, b: f32x4) -> (f32x4, f32x4) { ( [a[0usize * 2], a[1usize * 2], b[0usize * 2], b[1usize * 2]].simd_into(self), - [a[0usize * 2 + 1], a[1usize * 2 + 1], b[0usize * 2 + 1], b[1usize * 2 + 1]] - .simd_into(self), + [ + a[0usize * 2 + 1], + a[1usize * 2 + 1], + b[0usize * 2 + 1], + b[1usize * 2 + 1], + ] + .simd_into(self), ) } #[inline(always)] @@ -212,7 +217,7 @@ impl Simd for Fallback { f32::max(a[2usize], b[2usize]), f32::max(a[3usize], b[3usize]), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn max_precise_f32x4(self, a: f32x4, b: f32x4) -> f32x4 { @@ -222,7 +227,7 @@ impl Simd for Fallback { f32::max(a[2usize], b[2usize]), f32::max(a[3usize], b[3usize]), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn min_f32x4(self, a: f32x4, b: f32x4) -> f32x4 { @@ -232,7 +237,7 @@ impl Simd for Fallback { f32::min(a[2usize], b[2usize]), f32::min(a[3usize], b[3usize]), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn min_precise_f32x4(self, a: f32x4, b: f32x4) -> f32x4 { @@ -242,7 +247,7 @@ impl Simd for Fallback { f32::min(a[2usize], b[2usize]), f32::min(a[3usize], b[3usize]), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn madd_f32x4(self, a: f32x4, b: f32x4, c: f32x4) -> f32x4 { @@ -256,22 +261,17 @@ impl Simd for Fallback { f32::floor(a[2usize]), f32::floor(a[3usize]), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] - fn select_f32x4( - self, - a: mask32x4, - b: f32x4, - c: f32x4, - ) -> f32x4 { + fn select_f32x4(self, a: mask32x4, b: f32x4, c: f32x4) -> f32x4 { [ if a[0usize] != 0 { b[0usize] } else { c[0usize] }, if a[1usize] != 0 { b[1usize] } else { c[1usize] }, if a[2usize] != 0 { b[2usize] } else { c[2usize] }, if a[3usize] != 0 { b[3usize] } else { c[3usize] }, ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn combine_f32x4(self, a: f32x4, b: f32x4) -> f32x8 { @@ -282,8 +282,13 @@ impl Simd for Fallback { } #[inline(always)] fn cvt_u32_f32x4(self, a: f32x4) -> u32x4 { - [a[0usize] as u32, a[1usize] as u32, a[2usize] as u32, a[3usize] as u32] - .simd_into(self) + [ + a[0usize] as u32, + a[1usize] as u32, + a[2usize] as u32, + a[3usize] as u32, + ] + .simd_into(self) } #[inline(always)] fn splat_i8x16(self, val: i8) -> i8x16 { @@ -309,7 +314,7 @@ impl Simd for Fallback { i8::not(a[14usize]), i8::not(a[15usize]), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn add_i8x16(self, a: i8x16, b: i8x16) -> i8x16 { @@ -331,7 +336,7 @@ impl Simd for Fallback { i8::add(a[14usize], &b[14usize]), i8::add(a[15usize], &b[15usize]), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn sub_i8x16(self, a: i8x16, b: i8x16) -> i8x16 { @@ -353,7 +358,7 @@ impl Simd for Fallback { i8::sub(a[14usize], &b[14usize]), i8::sub(a[15usize], &b[15usize]), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn mul_i8x16(self, a: i8x16, b: i8x16) -> i8x16 { @@ -375,7 +380,7 @@ impl Simd for Fallback { i8::mul(a[14usize], &b[14usize]), i8::mul(a[15usize], &b[15usize]), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn and_i8x16(self, a: i8x16, b: i8x16) -> i8x16 { @@ -397,7 +402,7 @@ impl Simd for Fallback { i8::bitand(a[14usize], &b[14usize]), i8::bitand(a[15usize], &b[15usize]), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn or_i8x16(self, a: i8x16, b: i8x16) -> i8x16 { @@ -419,7 +424,7 @@ impl Simd for Fallback { i8::bitor(a[14usize], &b[14usize]), i8::bitor(a[15usize], &b[15usize]), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn xor_i8x16(self, a: i8x16, b: i8x16) -> i8x16 { @@ -441,7 +446,7 @@ impl Simd for Fallback { i8::bitxor(a[14usize], &b[14usize]), i8::bitxor(a[15usize], &b[15usize]), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn simd_eq_i8x16(self, a: i8x16, b: i8x16) -> mask8x16 { @@ -463,7 +468,7 @@ impl Simd for Fallback { -(i8::eq(&a[14usize], &b[14usize]) as i8), -(i8::eq(&a[15usize], &b[15usize]) as i8), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn simd_lt_i8x16(self, a: i8x16, b: i8x16) -> mask8x16 { @@ -485,7 +490,7 @@ impl Simd for Fallback { -(i8::lt(&a[14usize], &b[14usize]) as i8), -(i8::lt(&a[15usize], &b[15usize]) as i8), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn simd_le_i8x16(self, a: i8x16, b: i8x16) -> mask8x16 { @@ -507,7 +512,7 @@ impl Simd for Fallback { -(i8::le(&a[14usize], &b[14usize]) as i8), -(i8::le(&a[15usize], &b[15usize]) as i8), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn simd_ge_i8x16(self, a: i8x16, b: i8x16) -> mask8x16 { @@ -529,7 +534,7 @@ impl Simd for Fallback { -(i8::ge(&a[14usize], &b[14usize]) as i8), -(i8::ge(&a[15usize], &b[15usize]) as i8), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn simd_gt_i8x16(self, a: i8x16, b: i8x16) -> mask8x16 { @@ -551,49 +556,23 @@ impl Simd for Fallback { -(i8::gt(&a[14usize], &b[14usize]) as i8), -(i8::gt(&a[15usize], &b[15usize]) as i8), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn zip_i8x16(self, a: i8x16, b: i8x16) -> (i8x16, i8x16) { ( [ - a[0usize], - b[0usize], - a[1usize], - b[1usize], - a[2usize], - b[2usize], - a[3usize], - b[3usize], - a[4usize], - b[4usize], - a[5usize], - b[5usize], - a[6usize], - b[6usize], - a[7usize], - b[7usize], + a[0usize], b[0usize], a[1usize], b[1usize], a[2usize], b[2usize], a[3usize], + b[3usize], a[4usize], b[4usize], a[5usize], b[5usize], a[6usize], b[6usize], + a[7usize], b[7usize], ] - .simd_into(self), + .simd_into(self), [ - a[8usize], - b[8usize], - a[9usize], - b[9usize], - a[10usize], - b[10usize], - a[11usize], - b[11usize], - a[12usize], - b[12usize], - a[13usize], - b[13usize], - a[14usize], - b[14usize], - a[15usize], - b[15usize], + a[8usize], b[8usize], a[9usize], b[9usize], a[10usize], b[10usize], a[11usize], + b[11usize], a[12usize], b[12usize], a[13usize], b[13usize], a[14usize], b[14usize], + a[15usize], b[15usize], ] - .simd_into(self), + .simd_into(self), ) } #[inline(always)] @@ -617,7 +596,7 @@ impl Simd for Fallback { b[6usize * 2], b[7usize * 2], ] - .simd_into(self), + .simd_into(self), [ a[0usize * 2 + 1], a[1usize * 2 + 1], @@ -636,16 +615,11 @@ impl Simd for Fallback { b[6usize * 2 + 1], b[7usize * 2 + 1], ] - .simd_into(self), + .simd_into(self), ) } #[inline(always)] - fn select_i8x16( - self, - a: mask8x16, - b: i8x16, - c: i8x16, - ) -> i8x16 { + fn select_i8x16(self, a: mask8x16, b: i8x16, c: i8x16) -> i8x16 { [ if a[0usize] != 0 { b[0usize] } else { c[0usize] }, if a[1usize] != 0 { b[1usize] } else { c[1usize] }, @@ -657,14 +631,38 @@ impl Simd for Fallback { if a[7usize] != 0 { b[7usize] } else { c[7usize] }, if a[8usize] != 0 { b[8usize] } else { c[8usize] }, if a[9usize] != 0 { b[9usize] } else { c[9usize] }, - if a[10usize] != 0 { b[10usize] } else { c[10usize] }, - if a[11usize] != 0 { b[11usize] } else { c[11usize] }, - if a[12usize] != 0 { b[12usize] } else { c[12usize] }, - if a[13usize] != 0 { b[13usize] } else { c[13usize] }, - if a[14usize] != 0 { b[14usize] } else { c[14usize] }, - if a[15usize] != 0 { b[15usize] } else { c[15usize] }, - ] - .simd_into(self) + if a[10usize] != 0 { + b[10usize] + } else { + c[10usize] + }, + if a[11usize] != 0 { + b[11usize] + } else { + c[11usize] + }, + if a[12usize] != 0 { + b[12usize] + } else { + c[12usize] + }, + if a[13usize] != 0 { + b[13usize] + } else { + c[13usize] + }, + if a[14usize] != 0 { + b[14usize] + } else { + c[14usize] + }, + if a[15usize] != 0 { + b[15usize] + } else { + c[15usize] + }, + ] + .simd_into(self) } #[inline(always)] fn combine_i8x16(self, a: i8x16, b: i8x16) -> i8x32 { @@ -697,7 +695,7 @@ impl Simd for Fallback { u8::not(a[14usize]), u8::not(a[15usize]), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn add_u8x16(self, a: u8x16, b: u8x16) -> u8x16 { @@ -719,7 +717,7 @@ impl Simd for Fallback { u8::add(a[14usize], &b[14usize]), u8::add(a[15usize], &b[15usize]), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn sub_u8x16(self, a: u8x16, b: u8x16) -> u8x16 { @@ -741,7 +739,7 @@ impl Simd for Fallback { u8::sub(a[14usize], &b[14usize]), u8::sub(a[15usize], &b[15usize]), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn mul_u8x16(self, a: u8x16, b: u8x16) -> u8x16 { @@ -763,7 +761,7 @@ impl Simd for Fallback { u8::mul(a[14usize], &b[14usize]), u8::mul(a[15usize], &b[15usize]), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn and_u8x16(self, a: u8x16, b: u8x16) -> u8x16 { @@ -785,7 +783,7 @@ impl Simd for Fallback { u8::bitand(a[14usize], &b[14usize]), u8::bitand(a[15usize], &b[15usize]), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn or_u8x16(self, a: u8x16, b: u8x16) -> u8x16 { @@ -807,7 +805,7 @@ impl Simd for Fallback { u8::bitor(a[14usize], &b[14usize]), u8::bitor(a[15usize], &b[15usize]), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn xor_u8x16(self, a: u8x16, b: u8x16) -> u8x16 { @@ -829,7 +827,7 @@ impl Simd for Fallback { u8::bitxor(a[14usize], &b[14usize]), u8::bitxor(a[15usize], &b[15usize]), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn simd_eq_u8x16(self, a: u8x16, b: u8x16) -> mask8x16 { @@ -851,7 +849,7 @@ impl Simd for Fallback { -(u8::eq(&a[14usize], &b[14usize]) as i8), -(u8::eq(&a[15usize], &b[15usize]) as i8), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn simd_lt_u8x16(self, a: u8x16, b: u8x16) -> mask8x16 { @@ -873,7 +871,7 @@ impl Simd for Fallback { -(u8::lt(&a[14usize], &b[14usize]) as i8), -(u8::lt(&a[15usize], &b[15usize]) as i8), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn simd_le_u8x16(self, a: u8x16, b: u8x16) -> mask8x16 { @@ -895,7 +893,7 @@ impl Simd for Fallback { -(u8::le(&a[14usize], &b[14usize]) as i8), -(u8::le(&a[15usize], &b[15usize]) as i8), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn simd_ge_u8x16(self, a: u8x16, b: u8x16) -> mask8x16 { @@ -917,7 +915,7 @@ impl Simd for Fallback { -(u8::ge(&a[14usize], &b[14usize]) as i8), -(u8::ge(&a[15usize], &b[15usize]) as i8), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn simd_gt_u8x16(self, a: u8x16, b: u8x16) -> mask8x16 { @@ -939,49 +937,23 @@ impl Simd for Fallback { -(u8::gt(&a[14usize], &b[14usize]) as i8), -(u8::gt(&a[15usize], &b[15usize]) as i8), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn zip_u8x16(self, a: u8x16, b: u8x16) -> (u8x16, u8x16) { ( [ - a[0usize], - b[0usize], - a[1usize], - b[1usize], - a[2usize], - b[2usize], - a[3usize], - b[3usize], - a[4usize], - b[4usize], - a[5usize], - b[5usize], - a[6usize], - b[6usize], - a[7usize], - b[7usize], + a[0usize], b[0usize], a[1usize], b[1usize], a[2usize], b[2usize], a[3usize], + b[3usize], a[4usize], b[4usize], a[5usize], b[5usize], a[6usize], b[6usize], + a[7usize], b[7usize], ] - .simd_into(self), + .simd_into(self), [ - a[8usize], - b[8usize], - a[9usize], - b[9usize], - a[10usize], - b[10usize], - a[11usize], - b[11usize], - a[12usize], - b[12usize], - a[13usize], - b[13usize], - a[14usize], - b[14usize], - a[15usize], - b[15usize], + a[8usize], b[8usize], a[9usize], b[9usize], a[10usize], b[10usize], a[11usize], + b[11usize], a[12usize], b[12usize], a[13usize], b[13usize], a[14usize], b[14usize], + a[15usize], b[15usize], ] - .simd_into(self), + .simd_into(self), ) } #[inline(always)] @@ -1005,7 +977,7 @@ impl Simd for Fallback { b[6usize * 2], b[7usize * 2], ] - .simd_into(self), + .simd_into(self), [ a[0usize * 2 + 1], a[1usize * 2 + 1], @@ -1024,16 +996,11 @@ impl Simd for Fallback { b[6usize * 2 + 1], b[7usize * 2 + 1], ] - .simd_into(self), + .simd_into(self), ) } #[inline(always)] - fn select_u8x16( - self, - a: mask8x16, - b: u8x16, - c: u8x16, - ) -> u8x16 { + fn select_u8x16(self, a: mask8x16, b: u8x16, c: u8x16) -> u8x16 { [ if a[0usize] != 0 { b[0usize] } else { c[0usize] }, if a[1usize] != 0 { b[1usize] } else { c[1usize] }, @@ -1045,14 +1012,38 @@ impl Simd for Fallback { if a[7usize] != 0 { b[7usize] } else { c[7usize] }, if a[8usize] != 0 { b[8usize] } else { c[8usize] }, if a[9usize] != 0 { b[9usize] } else { c[9usize] }, - if a[10usize] != 0 { b[10usize] } else { c[10usize] }, - if a[11usize] != 0 { b[11usize] } else { c[11usize] }, - if a[12usize] != 0 { b[12usize] } else { c[12usize] }, - if a[13usize] != 0 { b[13usize] } else { c[13usize] }, - if a[14usize] != 0 { b[14usize] } else { c[14usize] }, - if a[15usize] != 0 { b[15usize] } else { c[15usize] }, - ] - .simd_into(self) + if a[10usize] != 0 { + b[10usize] + } else { + c[10usize] + }, + if a[11usize] != 0 { + b[11usize] + } else { + c[11usize] + }, + if a[12usize] != 0 { + b[12usize] + } else { + c[12usize] + }, + if a[13usize] != 0 { + b[13usize] + } else { + c[13usize] + }, + if a[14usize] != 0 { + b[14usize] + } else { + c[14usize] + }, + if a[15usize] != 0 { + b[15usize] + } else { + c[15usize] + }, + ] + .simd_into(self) } #[inline(always)] fn combine_u8x16(self, a: u8x16, b: u8x16) -> u8x32 { @@ -1085,7 +1076,7 @@ impl Simd for Fallback { i8::not(a[14usize]), i8::not(a[15usize]), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn and_mask8x16(self, a: mask8x16, b: mask8x16) -> mask8x16 { @@ -1107,7 +1098,7 @@ impl Simd for Fallback { i8::bitand(a[14usize], &b[14usize]), i8::bitand(a[15usize], &b[15usize]), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn or_mask8x16(self, a: mask8x16, b: mask8x16) -> mask8x16 { @@ -1129,7 +1120,7 @@ impl Simd for Fallback { i8::bitor(a[14usize], &b[14usize]), i8::bitor(a[15usize], &b[15usize]), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn xor_mask8x16(self, a: mask8x16, b: mask8x16) -> mask8x16 { @@ -1151,7 +1142,7 @@ impl Simd for Fallback { i8::bitxor(a[14usize], &b[14usize]), i8::bitxor(a[15usize], &b[15usize]), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn select_mask8x16( @@ -1171,14 +1162,38 @@ impl Simd for Fallback { if a[7usize] != 0 { b[7usize] } else { c[7usize] }, if a[8usize] != 0 { b[8usize] } else { c[8usize] }, if a[9usize] != 0 { b[9usize] } else { c[9usize] }, - if a[10usize] != 0 { b[10usize] } else { c[10usize] }, - if a[11usize] != 0 { b[11usize] } else { c[11usize] }, - if a[12usize] != 0 { b[12usize] } else { c[12usize] }, - if a[13usize] != 0 { b[13usize] } else { c[13usize] }, - if a[14usize] != 0 { b[14usize] } else { c[14usize] }, - if a[15usize] != 0 { b[15usize] } else { c[15usize] }, - ] - .simd_into(self) + if a[10usize] != 0 { + b[10usize] + } else { + c[10usize] + }, + if a[11usize] != 0 { + b[11usize] + } else { + c[11usize] + }, + if a[12usize] != 0 { + b[12usize] + } else { + c[12usize] + }, + if a[13usize] != 0 { + b[13usize] + } else { + c[13usize] + }, + if a[14usize] != 0 { + b[14usize] + } else { + c[14usize] + }, + if a[15usize] != 0 { + b[15usize] + } else { + c[15usize] + }, + ] + .simd_into(self) } #[inline(always)] fn zip_mask8x16( @@ -1188,43 +1203,17 @@ impl Simd for Fallback { ) -> (mask8x16, mask8x16) { ( [ - a[0usize], - b[0usize], - a[1usize], - b[1usize], - a[2usize], - b[2usize], - a[3usize], - b[3usize], - a[4usize], - b[4usize], - a[5usize], - b[5usize], - a[6usize], - b[6usize], - a[7usize], - b[7usize], + a[0usize], b[0usize], a[1usize], b[1usize], a[2usize], b[2usize], a[3usize], + b[3usize], a[4usize], b[4usize], a[5usize], b[5usize], a[6usize], b[6usize], + a[7usize], b[7usize], ] - .simd_into(self), + .simd_into(self), [ - a[8usize], - b[8usize], - a[9usize], - b[9usize], - a[10usize], - b[10usize], - a[11usize], - b[11usize], - a[12usize], - b[12usize], - a[13usize], - b[13usize], - a[14usize], - b[14usize], - a[15usize], - b[15usize], + a[8usize], b[8usize], a[9usize], b[9usize], a[10usize], b[10usize], a[11usize], + b[11usize], a[12usize], b[12usize], a[13usize], b[13usize], a[14usize], b[14usize], + a[15usize], b[15usize], ] - .simd_into(self), + .simd_into(self), ) } #[inline(always)] @@ -1252,7 +1241,7 @@ impl Simd for Fallback { b[6usize * 2], b[7usize * 2], ] - .simd_into(self), + .simd_into(self), [ a[0usize * 2 + 1], a[1usize * 2 + 1], @@ -1271,7 +1260,7 @@ impl Simd for Fallback { b[6usize * 2 + 1], b[7usize * 2 + 1], ] - .simd_into(self), + .simd_into(self), ) } #[inline(always)] @@ -1294,7 +1283,7 @@ impl Simd for Fallback { -(i8::eq(&a[14usize], &b[14usize]) as i8), -(i8::eq(&a[15usize], &b[15usize]) as i8), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn combine_mask8x16(self, a: mask8x16, b: mask8x16) -> mask8x32 { @@ -1319,7 +1308,7 @@ impl Simd for Fallback { i16::not(a[6usize]), i16::not(a[7usize]), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn add_i16x8(self, a: i16x8, b: i16x8) -> i16x8 { @@ -1333,7 +1322,7 @@ impl Simd for Fallback { i16::add(a[6usize], &b[6usize]), i16::add(a[7usize], &b[7usize]), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn sub_i16x8(self, a: i16x8, b: i16x8) -> i16x8 { @@ -1347,7 +1336,7 @@ impl Simd for Fallback { i16::sub(a[6usize], &b[6usize]), i16::sub(a[7usize], &b[7usize]), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn mul_i16x8(self, a: i16x8, b: i16x8) -> i16x8 { @@ -1361,7 +1350,7 @@ impl Simd for Fallback { i16::mul(a[6usize], &b[6usize]), i16::mul(a[7usize], &b[7usize]), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn and_i16x8(self, a: i16x8, b: i16x8) -> i16x8 { @@ -1375,7 +1364,7 @@ impl Simd for Fallback { i16::bitand(a[6usize], &b[6usize]), i16::bitand(a[7usize], &b[7usize]), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn or_i16x8(self, a: i16x8, b: i16x8) -> i16x8 { @@ -1389,7 +1378,7 @@ impl Simd for Fallback { i16::bitor(a[6usize], &b[6usize]), i16::bitor(a[7usize], &b[7usize]), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn xor_i16x8(self, a: i16x8, b: i16x8) -> i16x8 { @@ -1403,7 +1392,7 @@ impl Simd for Fallback { i16::bitxor(a[6usize], &b[6usize]), i16::bitxor(a[7usize], &b[7usize]), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn simd_eq_i16x8(self, a: i16x8, b: i16x8) -> mask16x8 { @@ -1417,7 +1406,7 @@ impl Simd for Fallback { -(i16::eq(&a[6usize], &b[6usize]) as i16), -(i16::eq(&a[7usize], &b[7usize]) as i16), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn simd_lt_i16x8(self, a: i16x8, b: i16x8) -> mask16x8 { @@ -1431,7 +1420,7 @@ impl Simd for Fallback { -(i16::lt(&a[6usize], &b[6usize]) as i16), -(i16::lt(&a[7usize], &b[7usize]) as i16), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn simd_le_i16x8(self, a: i16x8, b: i16x8) -> mask16x8 { @@ -1445,7 +1434,7 @@ impl Simd for Fallback { -(i16::le(&a[6usize], &b[6usize]) as i16), -(i16::le(&a[7usize], &b[7usize]) as i16), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn simd_ge_i16x8(self, a: i16x8, b: i16x8) -> mask16x8 { @@ -1459,7 +1448,7 @@ impl Simd for Fallback { -(i16::ge(&a[6usize], &b[6usize]) as i16), -(i16::ge(&a[7usize], &b[7usize]) as i16), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn simd_gt_i16x8(self, a: i16x8, b: i16x8) -> mask16x8 { @@ -1473,33 +1462,21 @@ impl Simd for Fallback { -(i16::gt(&a[6usize], &b[6usize]) as i16), -(i16::gt(&a[7usize], &b[7usize]) as i16), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn zip_i16x8(self, a: i16x8, b: i16x8) -> (i16x8, i16x8) { ( [ - a[0usize], - b[0usize], - a[1usize], - b[1usize], - a[2usize], - b[2usize], - a[3usize], + a[0usize], b[0usize], a[1usize], b[1usize], a[2usize], b[2usize], a[3usize], b[3usize], ] - .simd_into(self), + .simd_into(self), [ - a[4usize], - b[4usize], - a[5usize], - b[5usize], - a[6usize], - b[6usize], - a[7usize], + a[4usize], b[4usize], a[5usize], b[5usize], a[6usize], b[6usize], a[7usize], b[7usize], ] - .simd_into(self), + .simd_into(self), ) } #[inline(always)] @@ -1515,7 +1492,7 @@ impl Simd for Fallback { b[2usize * 2], b[3usize * 2], ] - .simd_into(self), + .simd_into(self), [ a[0usize * 2 + 1], a[1usize * 2 + 1], @@ -1526,16 +1503,11 @@ impl Simd for Fallback { b[2usize * 2 + 1], b[3usize * 2 + 1], ] - .simd_into(self), + .simd_into(self), ) } #[inline(always)] - fn select_i16x8( - self, - a: mask16x8, - b: i16x8, - c: i16x8, - ) -> i16x8 { + fn select_i16x8(self, a: mask16x8, b: i16x8, c: i16x8) -> i16x8 { [ if a[0usize] != 0 { b[0usize] } else { c[0usize] }, if a[1usize] != 0 { b[1usize] } else { c[1usize] }, @@ -1546,7 +1518,7 @@ impl Simd for Fallback { if a[6usize] != 0 { b[6usize] } else { c[6usize] }, if a[7usize] != 0 { b[7usize] } else { c[7usize] }, ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn combine_i16x8(self, a: i16x8, b: i16x8) -> i16x16 { @@ -1571,7 +1543,7 @@ impl Simd for Fallback { u16::not(a[6usize]), u16::not(a[7usize]), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn add_u16x8(self, a: u16x8, b: u16x8) -> u16x8 { @@ -1585,7 +1557,7 @@ impl Simd for Fallback { u16::add(a[6usize], &b[6usize]), u16::add(a[7usize], &b[7usize]), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn sub_u16x8(self, a: u16x8, b: u16x8) -> u16x8 { @@ -1599,7 +1571,7 @@ impl Simd for Fallback { u16::sub(a[6usize], &b[6usize]), u16::sub(a[7usize], &b[7usize]), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn mul_u16x8(self, a: u16x8, b: u16x8) -> u16x8 { @@ -1613,7 +1585,7 @@ impl Simd for Fallback { u16::mul(a[6usize], &b[6usize]), u16::mul(a[7usize], &b[7usize]), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn and_u16x8(self, a: u16x8, b: u16x8) -> u16x8 { @@ -1627,7 +1599,7 @@ impl Simd for Fallback { u16::bitand(a[6usize], &b[6usize]), u16::bitand(a[7usize], &b[7usize]), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn or_u16x8(self, a: u16x8, b: u16x8) -> u16x8 { @@ -1641,7 +1613,7 @@ impl Simd for Fallback { u16::bitor(a[6usize], &b[6usize]), u16::bitor(a[7usize], &b[7usize]), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn xor_u16x8(self, a: u16x8, b: u16x8) -> u16x8 { @@ -1655,7 +1627,7 @@ impl Simd for Fallback { u16::bitxor(a[6usize], &b[6usize]), u16::bitxor(a[7usize], &b[7usize]), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn simd_eq_u16x8(self, a: u16x8, b: u16x8) -> mask16x8 { @@ -1669,7 +1641,7 @@ impl Simd for Fallback { -(u16::eq(&a[6usize], &b[6usize]) as i16), -(u16::eq(&a[7usize], &b[7usize]) as i16), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn simd_lt_u16x8(self, a: u16x8, b: u16x8) -> mask16x8 { @@ -1683,7 +1655,7 @@ impl Simd for Fallback { -(u16::lt(&a[6usize], &b[6usize]) as i16), -(u16::lt(&a[7usize], &b[7usize]) as i16), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn simd_le_u16x8(self, a: u16x8, b: u16x8) -> mask16x8 { @@ -1697,7 +1669,7 @@ impl Simd for Fallback { -(u16::le(&a[6usize], &b[6usize]) as i16), -(u16::le(&a[7usize], &b[7usize]) as i16), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn simd_ge_u16x8(self, a: u16x8, b: u16x8) -> mask16x8 { @@ -1711,7 +1683,7 @@ impl Simd for Fallback { -(u16::ge(&a[6usize], &b[6usize]) as i16), -(u16::ge(&a[7usize], &b[7usize]) as i16), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn simd_gt_u16x8(self, a: u16x8, b: u16x8) -> mask16x8 { @@ -1725,33 +1697,21 @@ impl Simd for Fallback { -(u16::gt(&a[6usize], &b[6usize]) as i16), -(u16::gt(&a[7usize], &b[7usize]) as i16), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn zip_u16x8(self, a: u16x8, b: u16x8) -> (u16x8, u16x8) { ( [ - a[0usize], - b[0usize], - a[1usize], - b[1usize], - a[2usize], - b[2usize], - a[3usize], + a[0usize], b[0usize], a[1usize], b[1usize], a[2usize], b[2usize], a[3usize], b[3usize], ] - .simd_into(self), + .simd_into(self), [ - a[4usize], - b[4usize], - a[5usize], - b[5usize], - a[6usize], - b[6usize], - a[7usize], + a[4usize], b[4usize], a[5usize], b[5usize], a[6usize], b[6usize], a[7usize], b[7usize], ] - .simd_into(self), + .simd_into(self), ) } #[inline(always)] @@ -1767,7 +1727,7 @@ impl Simd for Fallback { b[2usize * 2], b[3usize * 2], ] - .simd_into(self), + .simd_into(self), [ a[0usize * 2 + 1], a[1usize * 2 + 1], @@ -1778,16 +1738,11 @@ impl Simd for Fallback { b[2usize * 2 + 1], b[3usize * 2 + 1], ] - .simd_into(self), + .simd_into(self), ) } #[inline(always)] - fn select_u16x8( - self, - a: mask16x8, - b: u16x8, - c: u16x8, - ) -> u16x8 { + fn select_u16x8(self, a: mask16x8, b: u16x8, c: u16x8) -> u16x8 { [ if a[0usize] != 0 { b[0usize] } else { c[0usize] }, if a[1usize] != 0 { b[1usize] } else { c[1usize] }, @@ -1798,7 +1753,7 @@ impl Simd for Fallback { if a[6usize] != 0 { b[6usize] } else { c[6usize] }, if a[7usize] != 0 { b[7usize] } else { c[7usize] }, ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn combine_u16x8(self, a: u16x8, b: u16x8) -> u16x16 { @@ -1823,7 +1778,7 @@ impl Simd for Fallback { i16::not(a[6usize]), i16::not(a[7usize]), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn and_mask16x8(self, a: mask16x8, b: mask16x8) -> mask16x8 { @@ -1837,7 +1792,7 @@ impl Simd for Fallback { i16::bitand(a[6usize], &b[6usize]), i16::bitand(a[7usize], &b[7usize]), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn or_mask16x8(self, a: mask16x8, b: mask16x8) -> mask16x8 { @@ -1851,7 +1806,7 @@ impl Simd for Fallback { i16::bitor(a[6usize], &b[6usize]), i16::bitor(a[7usize], &b[7usize]), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn xor_mask16x8(self, a: mask16x8, b: mask16x8) -> mask16x8 { @@ -1865,7 +1820,7 @@ impl Simd for Fallback { i16::bitxor(a[6usize], &b[6usize]), i16::bitxor(a[7usize], &b[7usize]), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn select_mask16x8( @@ -1884,7 +1839,7 @@ impl Simd for Fallback { if a[6usize] != 0 { b[6usize] } else { c[6usize] }, if a[7usize] != 0 { b[7usize] } else { c[7usize] }, ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn zip_mask16x8( @@ -1894,27 +1849,15 @@ impl Simd for Fallback { ) -> (mask16x8, mask16x8) { ( [ - a[0usize], - b[0usize], - a[1usize], - b[1usize], - a[2usize], - b[2usize], - a[3usize], + a[0usize], b[0usize], a[1usize], b[1usize], a[2usize], b[2usize], a[3usize], b[3usize], ] - .simd_into(self), + .simd_into(self), [ - a[4usize], - b[4usize], - a[5usize], - b[5usize], - a[6usize], - b[6usize], - a[7usize], + a[4usize], b[4usize], a[5usize], b[5usize], a[6usize], b[6usize], a[7usize], b[7usize], ] - .simd_into(self), + .simd_into(self), ) } #[inline(always)] @@ -1934,7 +1877,7 @@ impl Simd for Fallback { b[2usize * 2], b[3usize * 2], ] - .simd_into(self), + .simd_into(self), [ a[0usize * 2 + 1], a[1usize * 2 + 1], @@ -1945,7 +1888,7 @@ impl Simd for Fallback { b[2usize * 2 + 1], b[3usize * 2 + 1], ] - .simd_into(self), + .simd_into(self), ) } #[inline(always)] @@ -1960,7 +1903,7 @@ impl Simd for Fallback { -(i16::eq(&a[6usize], &b[6usize]) as i16), -(i16::eq(&a[7usize], &b[7usize]) as i16), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn combine_mask16x8(self, a: mask16x8, b: mask16x8) -> mask16x16 { @@ -1981,7 +1924,7 @@ impl Simd for Fallback { i32::not(a[2usize]), i32::not(a[3usize]), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn add_i32x4(self, a: i32x4, b: i32x4) -> i32x4 { @@ -1991,7 +1934,7 @@ impl Simd for Fallback { i32::add(a[2usize], &b[2usize]), i32::add(a[3usize], &b[3usize]), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn sub_i32x4(self, a: i32x4, b: i32x4) -> i32x4 { @@ -2001,7 +1944,7 @@ impl Simd for Fallback { i32::sub(a[2usize], &b[2usize]), i32::sub(a[3usize], &b[3usize]), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn mul_i32x4(self, a: i32x4, b: i32x4) -> i32x4 { @@ -2011,7 +1954,7 @@ impl Simd for Fallback { i32::mul(a[2usize], &b[2usize]), i32::mul(a[3usize], &b[3usize]), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn and_i32x4(self, a: i32x4, b: i32x4) -> i32x4 { @@ -2021,7 +1964,7 @@ impl Simd for Fallback { i32::bitand(a[2usize], &b[2usize]), i32::bitand(a[3usize], &b[3usize]), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn or_i32x4(self, a: i32x4, b: i32x4) -> i32x4 { @@ -2031,7 +1974,7 @@ impl Simd for Fallback { i32::bitor(a[2usize], &b[2usize]), i32::bitor(a[3usize], &b[3usize]), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn xor_i32x4(self, a: i32x4, b: i32x4) -> i32x4 { @@ -2041,7 +1984,7 @@ impl Simd for Fallback { i32::bitxor(a[2usize], &b[2usize]), i32::bitxor(a[3usize], &b[3usize]), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn simd_eq_i32x4(self, a: i32x4, b: i32x4) -> mask32x4 { @@ -2051,7 +1994,7 @@ impl Simd for Fallback { -(i32::eq(&a[2usize], &b[2usize]) as i32), -(i32::eq(&a[3usize], &b[3usize]) as i32), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn simd_lt_i32x4(self, a: i32x4, b: i32x4) -> mask32x4 { @@ -2061,7 +2004,7 @@ impl Simd for Fallback { -(i32::lt(&a[2usize], &b[2usize]) as i32), -(i32::lt(&a[3usize], &b[3usize]) as i32), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn simd_le_i32x4(self, a: i32x4, b: i32x4) -> mask32x4 { @@ -2071,7 +2014,7 @@ impl Simd for Fallback { -(i32::le(&a[2usize], &b[2usize]) as i32), -(i32::le(&a[3usize], &b[3usize]) as i32), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn simd_ge_i32x4(self, a: i32x4, b: i32x4) -> mask32x4 { @@ -2081,7 +2024,7 @@ impl Simd for Fallback { -(i32::ge(&a[2usize], &b[2usize]) as i32), -(i32::ge(&a[3usize], &b[3usize]) as i32), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn simd_gt_i32x4(self, a: i32x4, b: i32x4) -> mask32x4 { @@ -2091,7 +2034,7 @@ impl Simd for Fallback { -(i32::gt(&a[2usize], &b[2usize]) as i32), -(i32::gt(&a[3usize], &b[3usize]) as i32), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn zip_i32x4(self, a: i32x4, b: i32x4) -> (i32x4, i32x4) { @@ -2104,24 +2047,24 @@ impl Simd for Fallback { fn unzip_i32x4(self, a: i32x4, b: i32x4) -> (i32x4, i32x4) { ( [a[0usize * 2], a[1usize * 2], b[0usize * 2], b[1usize * 2]].simd_into(self), - [a[0usize * 2 + 1], a[1usize * 2 + 1], b[0usize * 2 + 1], b[1usize * 2 + 1]] - .simd_into(self), + [ + a[0usize * 2 + 1], + a[1usize * 2 + 1], + b[0usize * 2 + 1], + b[1usize * 2 + 1], + ] + .simd_into(self), ) } #[inline(always)] - fn select_i32x4( - self, - a: mask32x4, - b: i32x4, - c: i32x4, - ) -> i32x4 { + fn select_i32x4(self, a: mask32x4, b: i32x4, c: i32x4) -> i32x4 { [ if a[0usize] != 0 { b[0usize] } else { c[0usize] }, if a[1usize] != 0 { b[1usize] } else { c[1usize] }, if a[2usize] != 0 { b[2usize] } else { c[2usize] }, if a[3usize] != 0 { b[3usize] } else { c[3usize] }, ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn combine_i32x4(self, a: i32x4, b: i32x4) -> i32x8 { @@ -2142,7 +2085,7 @@ impl Simd for Fallback { u32::not(a[2usize]), u32::not(a[3usize]), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn add_u32x4(self, a: u32x4, b: u32x4) -> u32x4 { @@ -2152,7 +2095,7 @@ impl Simd for Fallback { u32::add(a[2usize], &b[2usize]), u32::add(a[3usize], &b[3usize]), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn sub_u32x4(self, a: u32x4, b: u32x4) -> u32x4 { @@ -2162,7 +2105,7 @@ impl Simd for Fallback { u32::sub(a[2usize], &b[2usize]), u32::sub(a[3usize], &b[3usize]), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn mul_u32x4(self, a: u32x4, b: u32x4) -> u32x4 { @@ -2172,7 +2115,7 @@ impl Simd for Fallback { u32::mul(a[2usize], &b[2usize]), u32::mul(a[3usize], &b[3usize]), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn and_u32x4(self, a: u32x4, b: u32x4) -> u32x4 { @@ -2182,7 +2125,7 @@ impl Simd for Fallback { u32::bitand(a[2usize], &b[2usize]), u32::bitand(a[3usize], &b[3usize]), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn or_u32x4(self, a: u32x4, b: u32x4) -> u32x4 { @@ -2192,7 +2135,7 @@ impl Simd for Fallback { u32::bitor(a[2usize], &b[2usize]), u32::bitor(a[3usize], &b[3usize]), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn xor_u32x4(self, a: u32x4, b: u32x4) -> u32x4 { @@ -2202,7 +2145,7 @@ impl Simd for Fallback { u32::bitxor(a[2usize], &b[2usize]), u32::bitxor(a[3usize], &b[3usize]), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn simd_eq_u32x4(self, a: u32x4, b: u32x4) -> mask32x4 { @@ -2212,7 +2155,7 @@ impl Simd for Fallback { -(u32::eq(&a[2usize], &b[2usize]) as i32), -(u32::eq(&a[3usize], &b[3usize]) as i32), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn simd_lt_u32x4(self, a: u32x4, b: u32x4) -> mask32x4 { @@ -2222,7 +2165,7 @@ impl Simd for Fallback { -(u32::lt(&a[2usize], &b[2usize]) as i32), -(u32::lt(&a[3usize], &b[3usize]) as i32), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn simd_le_u32x4(self, a: u32x4, b: u32x4) -> mask32x4 { @@ -2232,7 +2175,7 @@ impl Simd for Fallback { -(u32::le(&a[2usize], &b[2usize]) as i32), -(u32::le(&a[3usize], &b[3usize]) as i32), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn simd_ge_u32x4(self, a: u32x4, b: u32x4) -> mask32x4 { @@ -2242,7 +2185,7 @@ impl Simd for Fallback { -(u32::ge(&a[2usize], &b[2usize]) as i32), -(u32::ge(&a[3usize], &b[3usize]) as i32), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn simd_gt_u32x4(self, a: u32x4, b: u32x4) -> mask32x4 { @@ -2252,7 +2195,7 @@ impl Simd for Fallback { -(u32::gt(&a[2usize], &b[2usize]) as i32), -(u32::gt(&a[3usize], &b[3usize]) as i32), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn zip_u32x4(self, a: u32x4, b: u32x4) -> (u32x4, u32x4) { @@ -2265,24 +2208,24 @@ impl Simd for Fallback { fn unzip_u32x4(self, a: u32x4, b: u32x4) -> (u32x4, u32x4) { ( [a[0usize * 2], a[1usize * 2], b[0usize * 2], b[1usize * 2]].simd_into(self), - [a[0usize * 2 + 1], a[1usize * 2 + 1], b[0usize * 2 + 1], b[1usize * 2 + 1]] - .simd_into(self), + [ + a[0usize * 2 + 1], + a[1usize * 2 + 1], + b[0usize * 2 + 1], + b[1usize * 2 + 1], + ] + .simd_into(self), ) } #[inline(always)] - fn select_u32x4( - self, - a: mask32x4, - b: u32x4, - c: u32x4, - ) -> u32x4 { + fn select_u32x4(self, a: mask32x4, b: u32x4, c: u32x4) -> u32x4 { [ if a[0usize] != 0 { b[0usize] } else { c[0usize] }, if a[1usize] != 0 { b[1usize] } else { c[1usize] }, if a[2usize] != 0 { b[2usize] } else { c[2usize] }, if a[3usize] != 0 { b[3usize] } else { c[3usize] }, ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn combine_u32x4(self, a: u32x4, b: u32x4) -> u32x8 { @@ -2303,7 +2246,7 @@ impl Simd for Fallback { i32::not(a[2usize]), i32::not(a[3usize]), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn and_mask32x4(self, a: mask32x4, b: mask32x4) -> mask32x4 { @@ -2313,7 +2256,7 @@ impl Simd for Fallback { i32::bitand(a[2usize], &b[2usize]), i32::bitand(a[3usize], &b[3usize]), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn or_mask32x4(self, a: mask32x4, b: mask32x4) -> mask32x4 { @@ -2323,7 +2266,7 @@ impl Simd for Fallback { i32::bitor(a[2usize], &b[2usize]), i32::bitor(a[3usize], &b[3usize]), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn xor_mask32x4(self, a: mask32x4, b: mask32x4) -> mask32x4 { @@ -2333,7 +2276,7 @@ impl Simd for Fallback { i32::bitxor(a[2usize], &b[2usize]), i32::bitxor(a[3usize], &b[3usize]), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn select_mask32x4( @@ -2348,7 +2291,7 @@ impl Simd for Fallback { if a[2usize] != 0 { b[2usize] } else { c[2usize] }, if a[3usize] != 0 { b[3usize] } else { c[3usize] }, ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn zip_mask32x4( @@ -2369,8 +2312,13 @@ impl Simd for Fallback { ) -> (mask32x4, mask32x4) { ( [a[0usize * 2], a[1usize * 2], b[0usize * 2], b[1usize * 2]].simd_into(self), - [a[0usize * 2 + 1], a[1usize * 2 + 1], b[0usize * 2 + 1], b[1usize * 2 + 1]] - .simd_into(self), + [ + a[0usize * 2 + 1], + a[1usize * 2 + 1], + b[0usize * 2 + 1], + b[1usize * 2 + 1], + ] + .simd_into(self), ) } #[inline(always)] @@ -2381,7 +2329,7 @@ impl Simd for Fallback { -(i32::eq(&a[2usize], &b[2usize]) as i32), -(i32::eq(&a[3usize], &b[3usize]) as i32), ] - .simd_into(self) + .simd_into(self) } #[inline(always)] fn combine_mask32x4(self, a: mask32x4, b: mask32x4) -> mask32x8 { @@ -2529,12 +2477,7 @@ impl Simd for Fallback { self.combine_f32x4(self.floor_f32x4(a0), self.floor_f32x4(a1)) } #[inline(always)] - fn select_f32x8( - self, - a: mask32x8, - b: f32x8, - c: f32x8, - ) -> f32x8 { + fn select_f32x8(self, a: mask32x8, b: f32x8, c: f32x8) -> f32x8 { let (a0, a1) = self.split_mask32x8(a); let (b0, b1) = self.split_f32x8(b); let (c0, c1) = self.split_f32x8(c); @@ -2646,12 +2589,7 @@ impl Simd for Fallback { (self.combine_i8x16(c00, c10), self.combine_i8x16(c01, c11)) } #[inline(always)] - fn select_i8x32( - self, - a: mask8x32, - b: i8x32, - c: i8x32, - ) -> i8x32 { + fn select_i8x32(self, a: mask8x32, b: i8x32, c: i8x32) -> i8x32 { let (a0, a1) = self.split_mask8x32(a); let (b0, b1) = self.split_i8x32(b); let (c0, c1) = self.split_i8x32(c); @@ -2758,12 +2696,7 @@ impl Simd for Fallback { (self.combine_u8x16(c00, c10), self.combine_u8x16(c01, c11)) } #[inline(always)] - fn select_u8x32( - self, - a: mask8x32, - b: u8x32, - c: u8x32, - ) -> u8x32 { + fn select_u8x32(self, a: mask8x32, b: u8x32, c: u8x32) -> u8x32 { let (a0, a1) = self.split_mask8x32(a); let (b0, b1) = self.split_u8x32(b); let (c0, c1) = self.split_u8x32(c); @@ -2830,7 +2763,10 @@ impl Simd for Fallback { let (b0, b1) = self.split_mask8x32(b); let (c00, c01) = self.zip_mask8x16(a0, b0); let (c10, c11) = self.zip_mask8x16(a1, b1); - (self.combine_mask8x16(c00, c01), self.combine_mask8x16(c10, c11)) + ( + self.combine_mask8x16(c00, c01), + self.combine_mask8x16(c10, c11), + ) } #[inline(always)] fn unzip_mask8x32( @@ -2842,16 +2778,16 @@ impl Simd for Fallback { let (b0, b1) = self.split_mask8x32(b); let (c00, c01) = self.unzip_mask8x16(a0, a1); let (c10, c11) = self.unzip_mask8x16(b0, b1); - (self.combine_mask8x16(c00, c10), self.combine_mask8x16(c01, c11)) + ( + self.combine_mask8x16(c00, c10), + self.combine_mask8x16(c01, c11), + ) } #[inline(always)] fn simd_eq_mask8x32(self, a: mask8x32, b: mask8x32) -> mask8x32 { let (a0, a1) = self.split_mask8x32(a); let (b0, b1) = self.split_mask8x32(b); - self.combine_mask8x16( - self.simd_eq_mask8x16(a0, b0), - self.simd_eq_mask8x16(a1, b1), - ) + self.combine_mask8x16(self.simd_eq_mask8x16(a0, b0), self.simd_eq_mask8x16(a1, b1)) } #[inline(always)] fn split_mask8x32(self, a: mask8x32) -> (mask8x16, mask8x16) { @@ -2938,11 +2874,7 @@ impl Simd for Fallback { self.combine_mask16x8(self.simd_gt_i16x8(a0, b0), self.simd_gt_i16x8(a1, b1)) } #[inline(always)] - fn zip_i16x16( - self, - a: i16x16, - b: i16x16, - ) -> (i16x16, i16x16) { + fn zip_i16x16(self, a: i16x16, b: i16x16) -> (i16x16, i16x16) { let (a0, a1) = self.split_i16x16(a); let (b0, b1) = self.split_i16x16(b); let (c00, c01) = self.zip_i16x8(a0, b0); @@ -2950,11 +2882,7 @@ impl Simd for Fallback { (self.combine_i16x8(c00, c01), self.combine_i16x8(c10, c11)) } #[inline(always)] - fn unzip_i16x16( - self, - a: i16x16, - b: i16x16, - ) -> (i16x16, i16x16) { + fn unzip_i16x16(self, a: i16x16, b: i16x16) -> (i16x16, i16x16) { let (a0, a1) = self.split_i16x16(a); let (b0, b1) = self.split_i16x16(b); let (c00, c01) = self.unzip_i16x8(a0, a1); @@ -2962,12 +2890,7 @@ impl Simd for Fallback { (self.combine_i16x8(c00, c10), self.combine_i16x8(c01, c11)) } #[inline(always)] - fn select_i16x16( - self, - a: mask16x16, - b: i16x16, - c: i16x16, - ) -> i16x16 { + fn select_i16x16(self, a: mask16x16, b: i16x16, c: i16x16) -> i16x16 { let (a0, a1) = self.split_mask16x16(a); let (b0, b1) = self.split_i16x16(b); let (c0, c1) = self.split_i16x16(c); @@ -3058,11 +2981,7 @@ impl Simd for Fallback { self.combine_mask16x8(self.simd_gt_u16x8(a0, b0), self.simd_gt_u16x8(a1, b1)) } #[inline(always)] - fn zip_u16x16( - self, - a: u16x16, - b: u16x16, - ) -> (u16x16, u16x16) { + fn zip_u16x16(self, a: u16x16, b: u16x16) -> (u16x16, u16x16) { let (a0, a1) = self.split_u16x16(a); let (b0, b1) = self.split_u16x16(b); let (c00, c01) = self.zip_u16x8(a0, b0); @@ -3070,11 +2989,7 @@ impl Simd for Fallback { (self.combine_u16x8(c00, c01), self.combine_u16x8(c10, c11)) } #[inline(always)] - fn unzip_u16x16( - self, - a: u16x16, - b: u16x16, - ) -> (u16x16, u16x16) { + fn unzip_u16x16(self, a: u16x16, b: u16x16) -> (u16x16, u16x16) { let (a0, a1) = self.split_u16x16(a); let (b0, b1) = self.split_u16x16(b); let (c00, c01) = self.unzip_u16x8(a0, a1); @@ -3082,12 +2997,7 @@ impl Simd for Fallback { (self.combine_u16x8(c00, c10), self.combine_u16x8(c01, c11)) } #[inline(always)] - fn select_u16x16( - self, - a: mask16x16, - b: u16x16, - c: u16x16, - ) -> u16x16 { + fn select_u16x16(self, a: mask16x16, b: u16x16, c: u16x16) -> u16x16 { let (a0, a1) = self.split_mask16x16(a); let (b0, b1) = self.split_u16x16(b); let (c0, c1) = self.split_u16x16(c); @@ -3154,7 +3064,10 @@ impl Simd for Fallback { let (b0, b1) = self.split_mask16x16(b); let (c00, c01) = self.zip_mask16x8(a0, b0); let (c10, c11) = self.zip_mask16x8(a1, b1); - (self.combine_mask16x8(c00, c01), self.combine_mask16x8(c10, c11)) + ( + self.combine_mask16x8(c00, c01), + self.combine_mask16x8(c10, c11), + ) } #[inline(always)] fn unzip_mask16x16( @@ -3166,20 +3079,16 @@ impl Simd for Fallback { let (b0, b1) = self.split_mask16x16(b); let (c00, c01) = self.unzip_mask16x8(a0, a1); let (c10, c11) = self.unzip_mask16x8(b0, b1); - (self.combine_mask16x8(c00, c10), self.combine_mask16x8(c01, c11)) + ( + self.combine_mask16x8(c00, c10), + self.combine_mask16x8(c01, c11), + ) } #[inline(always)] - fn simd_eq_mask16x16( - self, - a: mask16x16, - b: mask16x16, - ) -> mask16x16 { + fn simd_eq_mask16x16(self, a: mask16x16, b: mask16x16) -> mask16x16 { let (a0, a1) = self.split_mask16x16(a); let (b0, b1) = self.split_mask16x16(b); - self.combine_mask16x8( - self.simd_eq_mask16x8(a0, b0), - self.simd_eq_mask16x8(a1, b1), - ) + self.combine_mask16x8(self.simd_eq_mask16x8(a0, b0), self.simd_eq_mask16x8(a1, b1)) } #[inline(always)] fn split_mask16x16(self, a: mask16x16) -> (mask16x8, mask16x8) { @@ -3282,12 +3191,7 @@ impl Simd for Fallback { (self.combine_i32x4(c00, c10), self.combine_i32x4(c01, c11)) } #[inline(always)] - fn select_i32x8( - self, - a: mask32x8, - b: i32x8, - c: i32x8, - ) -> i32x8 { + fn select_i32x8(self, a: mask32x8, b: i32x8, c: i32x8) -> i32x8 { let (a0, a1) = self.split_mask32x8(a); let (b0, b1) = self.split_i32x8(b); let (c0, c1) = self.split_i32x8(c); @@ -3394,12 +3298,7 @@ impl Simd for Fallback { (self.combine_u32x4(c00, c10), self.combine_u32x4(c01, c11)) } #[inline(always)] - fn select_u32x8( - self, - a: mask32x8, - b: u32x8, - c: u32x8, - ) -> u32x8 { + fn select_u32x8(self, a: mask32x8, b: u32x8, c: u32x8) -> u32x8 { let (a0, a1) = self.split_mask32x8(a); let (b0, b1) = self.split_u32x8(b); let (c0, c1) = self.split_u32x8(c); @@ -3466,7 +3365,10 @@ impl Simd for Fallback { let (b0, b1) = self.split_mask32x8(b); let (c00, c01) = self.zip_mask32x4(a0, b0); let (c10, c11) = self.zip_mask32x4(a1, b1); - (self.combine_mask32x4(c00, c01), self.combine_mask32x4(c10, c11)) + ( + self.combine_mask32x4(c00, c01), + self.combine_mask32x4(c10, c11), + ) } #[inline(always)] fn unzip_mask32x8( @@ -3478,16 +3380,16 @@ impl Simd for Fallback { let (b0, b1) = self.split_mask32x8(b); let (c00, c01) = self.unzip_mask32x4(a0, a1); let (c10, c11) = self.unzip_mask32x4(b0, b1); - (self.combine_mask32x4(c00, c10), self.combine_mask32x4(c01, c11)) + ( + self.combine_mask32x4(c00, c10), + self.combine_mask32x4(c01, c11), + ) } #[inline(always)] fn simd_eq_mask32x8(self, a: mask32x8, b: mask32x8) -> mask32x8 { let (a0, a1) = self.split_mask32x8(a); let (b0, b1) = self.split_mask32x8(b); - self.combine_mask32x4( - self.simd_eq_mask32x4(a0, b0), - self.simd_eq_mask32x4(a1, b1), - ) + self.combine_mask32x4(self.simd_eq_mask32x4(a0, b0), self.simd_eq_mask32x4(a1, b1)) } #[inline(always)] fn split_mask32x8(self, a: mask32x8) -> (mask32x4, mask32x4) { diff --git a/fearless_simd/src/generated/neon.rs b/fearless_simd/src/generated/neon.rs index 5113595c..59a2fbc9 100644 --- a/fearless_simd/src/generated/neon.rs +++ b/fearless_simd/src/generated/neon.rs @@ -1,11 +1,11 @@ // This file is autogenerated by fearless_simd_gen -use core::arch::aarch64::*; -use crate::{seal::Seal, Level, Simd, SimdFrom, SimdInto}; +use crate::{Level, Simd, SimdFrom, SimdInto, seal::Seal}; use crate::{ - f32x4, i8x16, u8x16, mask8x16, i16x8, u16x8, mask16x8, i32x4, u32x4, mask32x4, f32x8, - i8x32, u8x32, mask8x32, i16x16, u16x16, mask16x16, i32x8, u32x8, mask32x8, + f32x4, f32x8, i8x16, i8x32, i16x8, i16x16, i32x4, i32x8, mask8x16, mask8x32, mask16x8, + mask16x16, mask32x4, mask32x8, u8x16, u8x32, u16x8, u16x16, u32x4, u32x8, }; +use core::arch::aarch64::*; /// The SIMD token for the "neon" level. #[derive(Clone, Copy, Debug)] pub struct Neon { @@ -107,13 +107,23 @@ impl Simd for Neon { fn zip_f32x4(self, a: f32x4, b: f32x4) -> (f32x4, f32x4) { let x = a.into(); let y = b.into(); - unsafe { (vzip1q_f32(x, y).simd_into(self), vzip2q_f32(x, y).simd_into(self)) } + unsafe { + ( + vzip1q_f32(x, y).simd_into(self), + vzip2q_f32(x, y).simd_into(self), + ) + } } #[inline(always)] fn unzip_f32x4(self, a: f32x4, b: f32x4) -> (f32x4, f32x4) { let x = a.into(); let y = b.into(); - unsafe { (vuzp1q_f32(x, y).simd_into(self), vuzp2q_f32(x, y).simd_into(self)) } + unsafe { + ( + vuzp1q_f32(x, y).simd_into(self), + vuzp2q_f32(x, y).simd_into(self), + ) + } } #[inline(always)] fn max_f32x4(self, a: f32x4, b: f32x4) -> f32x4 { @@ -140,16 +150,8 @@ impl Simd for Neon { unsafe { vrndmq_f32(a.into()).simd_into(self) } } #[inline(always)] - fn select_f32x4( - self, - a: mask32x4, - b: f32x4, - c: f32x4, - ) -> f32x4 { - unsafe { - vbslq_f32(vreinterpretq_u32_s32(a.into()), b.into(), c.into()) - .simd_into(self) - } + fn select_f32x4(self, a: mask32x4, b: f32x4, c: f32x4) -> f32x4 { + unsafe { vbslq_f32(vreinterpretq_u32_s32(a.into()), b.into(), c.into()).simd_into(self) } } #[inline(always)] fn combine_f32x4(self, a: f32x4, b: f32x4) -> f32x8 { @@ -218,26 +220,29 @@ impl Simd for Neon { fn zip_i8x16(self, a: i8x16, b: i8x16) -> (i8x16, i8x16) { let x = a.into(); let y = b.into(); - unsafe { (vzip1q_s8(x, y).simd_into(self), vzip2q_s8(x, y).simd_into(self)) } + unsafe { + ( + vzip1q_s8(x, y).simd_into(self), + vzip2q_s8(x, y).simd_into(self), + ) + } } #[inline(always)] fn unzip_i8x16(self, a: i8x16, b: i8x16) -> (i8x16, i8x16) { let x = a.into(); let y = b.into(); - unsafe { (vuzp1q_s8(x, y).simd_into(self), vuzp2q_s8(x, y).simd_into(self)) } - } - #[inline(always)] - fn select_i8x16( - self, - a: mask8x16, - b: i8x16, - c: i8x16, - ) -> i8x16 { unsafe { - vbslq_s8(vreinterpretq_u8_s8(a.into()), b.into(), c.into()).simd_into(self) + ( + vuzp1q_s8(x, y).simd_into(self), + vuzp2q_s8(x, y).simd_into(self), + ) } } #[inline(always)] + fn select_i8x16(self, a: mask8x16, b: i8x16, c: i8x16) -> i8x16 { + unsafe { vbslq_s8(vreinterpretq_u8_s8(a.into()), b.into(), c.into()).simd_into(self) } + } + #[inline(always)] fn combine_i8x16(self, a: i8x16, b: i8x16) -> i8x32 { let mut result = [0; 32usize]; result[0..16usize].copy_from_slice(&a.val); @@ -300,26 +305,29 @@ impl Simd for Neon { fn zip_u8x16(self, a: u8x16, b: u8x16) -> (u8x16, u8x16) { let x = a.into(); let y = b.into(); - unsafe { (vzip1q_u8(x, y).simd_into(self), vzip2q_u8(x, y).simd_into(self)) } + unsafe { + ( + vzip1q_u8(x, y).simd_into(self), + vzip2q_u8(x, y).simd_into(self), + ) + } } #[inline(always)] fn unzip_u8x16(self, a: u8x16, b: u8x16) -> (u8x16, u8x16) { let x = a.into(); let y = b.into(); - unsafe { (vuzp1q_u8(x, y).simd_into(self), vuzp2q_u8(x, y).simd_into(self)) } - } - #[inline(always)] - fn select_u8x16( - self, - a: mask8x16, - b: u8x16, - c: u8x16, - ) -> u8x16 { unsafe { - vbslq_u8(vreinterpretq_u8_s8(a.into()), b.into(), c.into()).simd_into(self) + ( + vuzp1q_u8(x, y).simd_into(self), + vuzp2q_u8(x, y).simd_into(self), + ) } } #[inline(always)] + fn select_u8x16(self, a: mask8x16, b: u8x16, c: u8x16) -> u8x16 { + unsafe { vbslq_u8(vreinterpretq_u8_s8(a.into()), b.into(), c.into()).simd_into(self) } + } + #[inline(always)] fn combine_u8x16(self, a: u8x16, b: u8x16) -> u8x32 { let mut result = [0; 32usize]; result[0..16usize].copy_from_slice(&a.val); @@ -353,9 +361,7 @@ impl Simd for Neon { b: mask8x16, c: mask8x16, ) -> mask8x16 { - unsafe { - vbslq_s8(vreinterpretq_u8_s8(a.into()), b.into(), c.into()).simd_into(self) - } + unsafe { vbslq_s8(vreinterpretq_u8_s8(a.into()), b.into(), c.into()).simd_into(self) } } #[inline(always)] fn zip_mask8x16( @@ -365,7 +371,12 @@ impl Simd for Neon { ) -> (mask8x16, mask8x16) { let x = a.into(); let y = b.into(); - unsafe { (vzip1q_s8(x, y).simd_into(self), vzip2q_s8(x, y).simd_into(self)) } + unsafe { + ( + vzip1q_s8(x, y).simd_into(self), + vzip2q_s8(x, y).simd_into(self), + ) + } } #[inline(always)] fn unzip_mask8x16( @@ -375,7 +386,12 @@ impl Simd for Neon { ) -> (mask8x16, mask8x16) { let x = a.into(); let y = b.into(); - unsafe { (vuzp1q_s8(x, y).simd_into(self), vuzp2q_s8(x, y).simd_into(self)) } + unsafe { + ( + vuzp1q_s8(x, y).simd_into(self), + vuzp2q_s8(x, y).simd_into(self), + ) + } } #[inline(always)] fn simd_eq_mask8x16(self, a: mask8x16, b: mask8x16) -> mask8x16 { @@ -444,27 +460,29 @@ impl Simd for Neon { fn zip_i16x8(self, a: i16x8, b: i16x8) -> (i16x8, i16x8) { let x = a.into(); let y = b.into(); - unsafe { (vzip1q_s16(x, y).simd_into(self), vzip2q_s16(x, y).simd_into(self)) } + unsafe { + ( + vzip1q_s16(x, y).simd_into(self), + vzip2q_s16(x, y).simd_into(self), + ) + } } #[inline(always)] fn unzip_i16x8(self, a: i16x8, b: i16x8) -> (i16x8, i16x8) { let x = a.into(); let y = b.into(); - unsafe { (vuzp1q_s16(x, y).simd_into(self), vuzp2q_s16(x, y).simd_into(self)) } - } - #[inline(always)] - fn select_i16x8( - self, - a: mask16x8, - b: i16x8, - c: i16x8, - ) -> i16x8 { unsafe { - vbslq_s16(vreinterpretq_u16_s16(a.into()), b.into(), c.into()) - .simd_into(self) + ( + vuzp1q_s16(x, y).simd_into(self), + vuzp2q_s16(x, y).simd_into(self), + ) } } #[inline(always)] + fn select_i16x8(self, a: mask16x8, b: i16x8, c: i16x8) -> i16x8 { + unsafe { vbslq_s16(vreinterpretq_u16_s16(a.into()), b.into(), c.into()).simd_into(self) } + } + #[inline(always)] fn combine_i16x8(self, a: i16x8, b: i16x8) -> i16x16 { let mut result = [0; 16usize]; result[0..8usize].copy_from_slice(&a.val); @@ -527,27 +545,29 @@ impl Simd for Neon { fn zip_u16x8(self, a: u16x8, b: u16x8) -> (u16x8, u16x8) { let x = a.into(); let y = b.into(); - unsafe { (vzip1q_u16(x, y).simd_into(self), vzip2q_u16(x, y).simd_into(self)) } + unsafe { + ( + vzip1q_u16(x, y).simd_into(self), + vzip2q_u16(x, y).simd_into(self), + ) + } } #[inline(always)] fn unzip_u16x8(self, a: u16x8, b: u16x8) -> (u16x8, u16x8) { let x = a.into(); let y = b.into(); - unsafe { (vuzp1q_u16(x, y).simd_into(self), vuzp2q_u16(x, y).simd_into(self)) } - } - #[inline(always)] - fn select_u16x8( - self, - a: mask16x8, - b: u16x8, - c: u16x8, - ) -> u16x8 { unsafe { - vbslq_u16(vreinterpretq_u16_s16(a.into()), b.into(), c.into()) - .simd_into(self) + ( + vuzp1q_u16(x, y).simd_into(self), + vuzp2q_u16(x, y).simd_into(self), + ) } } #[inline(always)] + fn select_u16x8(self, a: mask16x8, b: u16x8, c: u16x8) -> u16x8 { + unsafe { vbslq_u16(vreinterpretq_u16_s16(a.into()), b.into(), c.into()).simd_into(self) } + } + #[inline(always)] fn combine_u16x8(self, a: u16x8, b: u16x8) -> u16x16 { let mut result = [0; 16usize]; result[0..8usize].copy_from_slice(&a.val); @@ -581,10 +601,7 @@ impl Simd for Neon { b: mask16x8, c: mask16x8, ) -> mask16x8 { - unsafe { - vbslq_s16(vreinterpretq_u16_s16(a.into()), b.into(), c.into()) - .simd_into(self) - } + unsafe { vbslq_s16(vreinterpretq_u16_s16(a.into()), b.into(), c.into()).simd_into(self) } } #[inline(always)] fn zip_mask16x8( @@ -594,7 +611,12 @@ impl Simd for Neon { ) -> (mask16x8, mask16x8) { let x = a.into(); let y = b.into(); - unsafe { (vzip1q_s16(x, y).simd_into(self), vzip2q_s16(x, y).simd_into(self)) } + unsafe { + ( + vzip1q_s16(x, y).simd_into(self), + vzip2q_s16(x, y).simd_into(self), + ) + } } #[inline(always)] fn unzip_mask16x8( @@ -604,7 +626,12 @@ impl Simd for Neon { ) -> (mask16x8, mask16x8) { let x = a.into(); let y = b.into(); - unsafe { (vuzp1q_s16(x, y).simd_into(self), vuzp2q_s16(x, y).simd_into(self)) } + unsafe { + ( + vuzp1q_s16(x, y).simd_into(self), + vuzp2q_s16(x, y).simd_into(self), + ) + } } #[inline(always)] fn simd_eq_mask16x8(self, a: mask16x8, b: mask16x8) -> mask16x8 { @@ -673,27 +700,29 @@ impl Simd for Neon { fn zip_i32x4(self, a: i32x4, b: i32x4) -> (i32x4, i32x4) { let x = a.into(); let y = b.into(); - unsafe { (vzip1q_s32(x, y).simd_into(self), vzip2q_s32(x, y).simd_into(self)) } + unsafe { + ( + vzip1q_s32(x, y).simd_into(self), + vzip2q_s32(x, y).simd_into(self), + ) + } } #[inline(always)] fn unzip_i32x4(self, a: i32x4, b: i32x4) -> (i32x4, i32x4) { let x = a.into(); let y = b.into(); - unsafe { (vuzp1q_s32(x, y).simd_into(self), vuzp2q_s32(x, y).simd_into(self)) } - } - #[inline(always)] - fn select_i32x4( - self, - a: mask32x4, - b: i32x4, - c: i32x4, - ) -> i32x4 { unsafe { - vbslq_s32(vreinterpretq_u32_s32(a.into()), b.into(), c.into()) - .simd_into(self) + ( + vuzp1q_s32(x, y).simd_into(self), + vuzp2q_s32(x, y).simd_into(self), + ) } } #[inline(always)] + fn select_i32x4(self, a: mask32x4, b: i32x4, c: i32x4) -> i32x4 { + unsafe { vbslq_s32(vreinterpretq_u32_s32(a.into()), b.into(), c.into()).simd_into(self) } + } + #[inline(always)] fn combine_i32x4(self, a: i32x4, b: i32x4) -> i32x8 { let mut result = [0; 8usize]; result[0..4usize].copy_from_slice(&a.val); @@ -756,27 +785,29 @@ impl Simd for Neon { fn zip_u32x4(self, a: u32x4, b: u32x4) -> (u32x4, u32x4) { let x = a.into(); let y = b.into(); - unsafe { (vzip1q_u32(x, y).simd_into(self), vzip2q_u32(x, y).simd_into(self)) } + unsafe { + ( + vzip1q_u32(x, y).simd_into(self), + vzip2q_u32(x, y).simd_into(self), + ) + } } #[inline(always)] fn unzip_u32x4(self, a: u32x4, b: u32x4) -> (u32x4, u32x4) { let x = a.into(); let y = b.into(); - unsafe { (vuzp1q_u32(x, y).simd_into(self), vuzp2q_u32(x, y).simd_into(self)) } - } - #[inline(always)] - fn select_u32x4( - self, - a: mask32x4, - b: u32x4, - c: u32x4, - ) -> u32x4 { unsafe { - vbslq_u32(vreinterpretq_u32_s32(a.into()), b.into(), c.into()) - .simd_into(self) + ( + vuzp1q_u32(x, y).simd_into(self), + vuzp2q_u32(x, y).simd_into(self), + ) } } #[inline(always)] + fn select_u32x4(self, a: mask32x4, b: u32x4, c: u32x4) -> u32x4 { + unsafe { vbslq_u32(vreinterpretq_u32_s32(a.into()), b.into(), c.into()).simd_into(self) } + } + #[inline(always)] fn combine_u32x4(self, a: u32x4, b: u32x4) -> u32x8 { let mut result = [0; 8usize]; result[0..4usize].copy_from_slice(&a.val); @@ -810,10 +841,7 @@ impl Simd for Neon { b: mask32x4, c: mask32x4, ) -> mask32x4 { - unsafe { - vbslq_s32(vreinterpretq_u32_s32(a.into()), b.into(), c.into()) - .simd_into(self) - } + unsafe { vbslq_s32(vreinterpretq_u32_s32(a.into()), b.into(), c.into()).simd_into(self) } } #[inline(always)] fn zip_mask32x4( @@ -823,7 +851,12 @@ impl Simd for Neon { ) -> (mask32x4, mask32x4) { let x = a.into(); let y = b.into(); - unsafe { (vzip1q_s32(x, y).simd_into(self), vzip2q_s32(x, y).simd_into(self)) } + unsafe { + ( + vzip1q_s32(x, y).simd_into(self), + vzip2q_s32(x, y).simd_into(self), + ) + } } #[inline(always)] fn unzip_mask32x4( @@ -833,7 +866,12 @@ impl Simd for Neon { ) -> (mask32x4, mask32x4) { let x = a.into(); let y = b.into(); - unsafe { (vuzp1q_s32(x, y).simd_into(self), vuzp2q_s32(x, y).simd_into(self)) } + unsafe { + ( + vuzp1q_s32(x, y).simd_into(self), + vuzp2q_s32(x, y).simd_into(self), + ) + } } #[inline(always)] fn simd_eq_mask32x4(self, a: mask32x4, b: mask32x4) -> mask32x4 { @@ -985,12 +1023,7 @@ impl Simd for Neon { self.combine_f32x4(self.floor_f32x4(a0), self.floor_f32x4(a1)) } #[inline(always)] - fn select_f32x8( - self, - a: mask32x8, - b: f32x8, - c: f32x8, - ) -> f32x8 { + fn select_f32x8(self, a: mask32x8, b: f32x8, c: f32x8) -> f32x8 { let (a0, a1) = self.split_mask32x8(a); let (b0, b1) = self.split_f32x8(b); let (c0, c1) = self.split_f32x8(c); @@ -1102,12 +1135,7 @@ impl Simd for Neon { (self.combine_i8x16(c00, c10), self.combine_i8x16(c01, c11)) } #[inline(always)] - fn select_i8x32( - self, - a: mask8x32, - b: i8x32, - c: i8x32, - ) -> i8x32 { + fn select_i8x32(self, a: mask8x32, b: i8x32, c: i8x32) -> i8x32 { let (a0, a1) = self.split_mask8x32(a); let (b0, b1) = self.split_i8x32(b); let (c0, c1) = self.split_i8x32(c); @@ -1214,12 +1242,7 @@ impl Simd for Neon { (self.combine_u8x16(c00, c10), self.combine_u8x16(c01, c11)) } #[inline(always)] - fn select_u8x32( - self, - a: mask8x32, - b: u8x32, - c: u8x32, - ) -> u8x32 { + fn select_u8x32(self, a: mask8x32, b: u8x32, c: u8x32) -> u8x32 { let (a0, a1) = self.split_mask8x32(a); let (b0, b1) = self.split_u8x32(b); let (c0, c1) = self.split_u8x32(c); @@ -1286,7 +1309,10 @@ impl Simd for Neon { let (b0, b1) = self.split_mask8x32(b); let (c00, c01) = self.zip_mask8x16(a0, b0); let (c10, c11) = self.zip_mask8x16(a1, b1); - (self.combine_mask8x16(c00, c01), self.combine_mask8x16(c10, c11)) + ( + self.combine_mask8x16(c00, c01), + self.combine_mask8x16(c10, c11), + ) } #[inline(always)] fn unzip_mask8x32( @@ -1298,16 +1324,16 @@ impl Simd for Neon { let (b0, b1) = self.split_mask8x32(b); let (c00, c01) = self.unzip_mask8x16(a0, a1); let (c10, c11) = self.unzip_mask8x16(b0, b1); - (self.combine_mask8x16(c00, c10), self.combine_mask8x16(c01, c11)) + ( + self.combine_mask8x16(c00, c10), + self.combine_mask8x16(c01, c11), + ) } #[inline(always)] fn simd_eq_mask8x32(self, a: mask8x32, b: mask8x32) -> mask8x32 { let (a0, a1) = self.split_mask8x32(a); let (b0, b1) = self.split_mask8x32(b); - self.combine_mask8x16( - self.simd_eq_mask8x16(a0, b0), - self.simd_eq_mask8x16(a1, b1), - ) + self.combine_mask8x16(self.simd_eq_mask8x16(a0, b0), self.simd_eq_mask8x16(a1, b1)) } #[inline(always)] fn split_mask8x32(self, a: mask8x32) -> (mask8x16, mask8x16) { @@ -1394,11 +1420,7 @@ impl Simd for Neon { self.combine_mask16x8(self.simd_gt_i16x8(a0, b0), self.simd_gt_i16x8(a1, b1)) } #[inline(always)] - fn zip_i16x16( - self, - a: i16x16, - b: i16x16, - ) -> (i16x16, i16x16) { + fn zip_i16x16(self, a: i16x16, b: i16x16) -> (i16x16, i16x16) { let (a0, a1) = self.split_i16x16(a); let (b0, b1) = self.split_i16x16(b); let (c00, c01) = self.zip_i16x8(a0, b0); @@ -1406,11 +1428,7 @@ impl Simd for Neon { (self.combine_i16x8(c00, c01), self.combine_i16x8(c10, c11)) } #[inline(always)] - fn unzip_i16x16( - self, - a: i16x16, - b: i16x16, - ) -> (i16x16, i16x16) { + fn unzip_i16x16(self, a: i16x16, b: i16x16) -> (i16x16, i16x16) { let (a0, a1) = self.split_i16x16(a); let (b0, b1) = self.split_i16x16(b); let (c00, c01) = self.unzip_i16x8(a0, a1); @@ -1418,12 +1436,7 @@ impl Simd for Neon { (self.combine_i16x8(c00, c10), self.combine_i16x8(c01, c11)) } #[inline(always)] - fn select_i16x16( - self, - a: mask16x16, - b: i16x16, - c: i16x16, - ) -> i16x16 { + fn select_i16x16(self, a: mask16x16, b: i16x16, c: i16x16) -> i16x16 { let (a0, a1) = self.split_mask16x16(a); let (b0, b1) = self.split_i16x16(b); let (c0, c1) = self.split_i16x16(c); @@ -1514,11 +1527,7 @@ impl Simd for Neon { self.combine_mask16x8(self.simd_gt_u16x8(a0, b0), self.simd_gt_u16x8(a1, b1)) } #[inline(always)] - fn zip_u16x16( - self, - a: u16x16, - b: u16x16, - ) -> (u16x16, u16x16) { + fn zip_u16x16(self, a: u16x16, b: u16x16) -> (u16x16, u16x16) { let (a0, a1) = self.split_u16x16(a); let (b0, b1) = self.split_u16x16(b); let (c00, c01) = self.zip_u16x8(a0, b0); @@ -1526,11 +1535,7 @@ impl Simd for Neon { (self.combine_u16x8(c00, c01), self.combine_u16x8(c10, c11)) } #[inline(always)] - fn unzip_u16x16( - self, - a: u16x16, - b: u16x16, - ) -> (u16x16, u16x16) { + fn unzip_u16x16(self, a: u16x16, b: u16x16) -> (u16x16, u16x16) { let (a0, a1) = self.split_u16x16(a); let (b0, b1) = self.split_u16x16(b); let (c00, c01) = self.unzip_u16x8(a0, a1); @@ -1538,12 +1543,7 @@ impl Simd for Neon { (self.combine_u16x8(c00, c10), self.combine_u16x8(c01, c11)) } #[inline(always)] - fn select_u16x16( - self, - a: mask16x16, - b: u16x16, - c: u16x16, - ) -> u16x16 { + fn select_u16x16(self, a: mask16x16, b: u16x16, c: u16x16) -> u16x16 { let (a0, a1) = self.split_mask16x16(a); let (b0, b1) = self.split_u16x16(b); let (c0, c1) = self.split_u16x16(c); @@ -1610,7 +1610,10 @@ impl Simd for Neon { let (b0, b1) = self.split_mask16x16(b); let (c00, c01) = self.zip_mask16x8(a0, b0); let (c10, c11) = self.zip_mask16x8(a1, b1); - (self.combine_mask16x8(c00, c01), self.combine_mask16x8(c10, c11)) + ( + self.combine_mask16x8(c00, c01), + self.combine_mask16x8(c10, c11), + ) } #[inline(always)] fn unzip_mask16x16( @@ -1622,20 +1625,16 @@ impl Simd for Neon { let (b0, b1) = self.split_mask16x16(b); let (c00, c01) = self.unzip_mask16x8(a0, a1); let (c10, c11) = self.unzip_mask16x8(b0, b1); - (self.combine_mask16x8(c00, c10), self.combine_mask16x8(c01, c11)) + ( + self.combine_mask16x8(c00, c10), + self.combine_mask16x8(c01, c11), + ) } #[inline(always)] - fn simd_eq_mask16x16( - self, - a: mask16x16, - b: mask16x16, - ) -> mask16x16 { + fn simd_eq_mask16x16(self, a: mask16x16, b: mask16x16) -> mask16x16 { let (a0, a1) = self.split_mask16x16(a); let (b0, b1) = self.split_mask16x16(b); - self.combine_mask16x8( - self.simd_eq_mask16x8(a0, b0), - self.simd_eq_mask16x8(a1, b1), - ) + self.combine_mask16x8(self.simd_eq_mask16x8(a0, b0), self.simd_eq_mask16x8(a1, b1)) } #[inline(always)] fn split_mask16x16(self, a: mask16x16) -> (mask16x8, mask16x8) { @@ -1738,12 +1737,7 @@ impl Simd for Neon { (self.combine_i32x4(c00, c10), self.combine_i32x4(c01, c11)) } #[inline(always)] - fn select_i32x8( - self, - a: mask32x8, - b: i32x8, - c: i32x8, - ) -> i32x8 { + fn select_i32x8(self, a: mask32x8, b: i32x8, c: i32x8) -> i32x8 { let (a0, a1) = self.split_mask32x8(a); let (b0, b1) = self.split_i32x8(b); let (c0, c1) = self.split_i32x8(c); @@ -1850,12 +1844,7 @@ impl Simd for Neon { (self.combine_u32x4(c00, c10), self.combine_u32x4(c01, c11)) } #[inline(always)] - fn select_u32x8( - self, - a: mask32x8, - b: u32x8, - c: u32x8, - ) -> u32x8 { + fn select_u32x8(self, a: mask32x8, b: u32x8, c: u32x8) -> u32x8 { let (a0, a1) = self.split_mask32x8(a); let (b0, b1) = self.split_u32x8(b); let (c0, c1) = self.split_u32x8(c); @@ -1922,7 +1911,10 @@ impl Simd for Neon { let (b0, b1) = self.split_mask32x8(b); let (c00, c01) = self.zip_mask32x4(a0, b0); let (c10, c11) = self.zip_mask32x4(a1, b1); - (self.combine_mask32x4(c00, c01), self.combine_mask32x4(c10, c11)) + ( + self.combine_mask32x4(c00, c01), + self.combine_mask32x4(c10, c11), + ) } #[inline(always)] fn unzip_mask32x8( @@ -1934,16 +1926,16 @@ impl Simd for Neon { let (b0, b1) = self.split_mask32x8(b); let (c00, c01) = self.unzip_mask32x4(a0, a1); let (c10, c11) = self.unzip_mask32x4(b0, b1); - (self.combine_mask32x4(c00, c10), self.combine_mask32x4(c01, c11)) + ( + self.combine_mask32x4(c00, c10), + self.combine_mask32x4(c01, c11), + ) } #[inline(always)] fn simd_eq_mask32x8(self, a: mask32x8, b: mask32x8) -> mask32x8 { let (a0, a1) = self.split_mask32x8(a); let (b0, b1) = self.split_mask32x8(b); - self.combine_mask32x4( - self.simd_eq_mask32x4(a0, b0), - self.simd_eq_mask32x4(a1, b1), - ) + self.combine_mask32x4(self.simd_eq_mask32x4(a0, b0), self.simd_eq_mask32x4(a1, b1)) } #[inline(always)] fn split_mask32x8(self, a: mask32x8) -> (mask32x4, mask32x4) { diff --git a/fearless_simd/src/generated/ops.rs b/fearless_simd/src/generated/ops.rs index 7a4ec46e..3facc5e0 100644 --- a/fearless_simd/src/generated/ops.rs +++ b/fearless_simd/src/generated/ops.rs @@ -2,8 +2,8 @@ use crate::{Simd, SimdInto}; use crate::{ - f32x4, i8x16, u8x16, mask8x16, i16x8, u16x8, mask16x8, i32x4, u32x4, mask32x4, f32x8, - i8x32, u8x32, mask8x32, i16x16, u16x16, mask16x16, i32x8, u32x8, mask32x8, + f32x4, f32x8, i8x16, i8x32, i16x8, i16x16, i32x4, i32x8, mask8x16, mask8x32, mask16x8, + mask16x16, mask32x4, mask32x8, u8x16, u8x32, u16x8, u16x16, u32x4, u32x8, }; impl core::ops::Neg for f32x4 { type Output = Self; diff --git a/fearless_simd/src/generated/simd_trait.rs b/fearless_simd/src/generated/simd_trait.rs index 98a9eac4..4a87d5b9 100644 --- a/fearless_simd/src/generated/simd_trait.rs +++ b/fearless_simd/src/generated/simd_trait.rs @@ -1,9 +1,9 @@ // This file is autogenerated by fearless_simd_gen -use crate::{seal::Seal, Level, SimdElement, SimdInto}; +use crate::{Level, SimdElement, SimdInto, seal::Seal}; use crate::{ - f32x4, i8x16, u8x16, mask8x16, i16x8, u16x8, mask16x8, i32x4, u32x4, mask32x4, f32x8, - i8x32, u8x32, mask8x32, i16x16, u16x16, mask16x16, i32x8, u32x8, mask32x8, + f32x4, f32x8, i8x16, i8x32, i16x8, i16x16, i32x4, i32x8, mask8x16, mask8x32, mask16x8, + mask16x16, mask32x4, mask32x8, u8x16, u8x32, u16x8, u16x16, u32x4, u32x8, }; /// TODO: docstring pub trait Simd: Sized + Clone + Copy + Send + Sync + Seal + 'static { @@ -44,12 +44,7 @@ pub trait Simd: Sized + Clone + Copy + Send + Sync + Seal + 'static { fn min_precise_f32x4(self, a: f32x4, b: f32x4) -> f32x4; fn madd_f32x4(self, a: f32x4, b: f32x4, c: f32x4) -> f32x4; fn floor_f32x4(self, a: f32x4) -> f32x4; - fn select_f32x4( - self, - a: mask32x4, - b: f32x4, - c: f32x4, - ) -> f32x4; + fn select_f32x4(self, a: mask32x4, b: f32x4, c: f32x4) -> f32x4; fn combine_f32x4(self, a: f32x4, b: f32x4) -> f32x8; fn cvt_u32_f32x4(self, a: f32x4) -> u32x4; fn splat_i8x16(self, val: i8) -> i8x16; @@ -67,12 +62,7 @@ pub trait Simd: Sized + Clone + Copy + Send + Sync + Seal + 'static { fn simd_gt_i8x16(self, a: i8x16, b: i8x16) -> mask8x16; fn zip_i8x16(self, a: i8x16, b: i8x16) -> (i8x16, i8x16); fn unzip_i8x16(self, a: i8x16, b: i8x16) -> (i8x16, i8x16); - fn select_i8x16( - self, - a: mask8x16, - b: i8x16, - c: i8x16, - ) -> i8x16; + fn select_i8x16(self, a: mask8x16, b: i8x16, c: i8x16) -> i8x16; fn combine_i8x16(self, a: i8x16, b: i8x16) -> i8x32; fn splat_u8x16(self, val: u8) -> u8x16; fn not_u8x16(self, a: u8x16) -> u8x16; @@ -89,12 +79,7 @@ pub trait Simd: Sized + Clone + Copy + Send + Sync + Seal + 'static { fn simd_gt_u8x16(self, a: u8x16, b: u8x16) -> mask8x16; fn zip_u8x16(self, a: u8x16, b: u8x16) -> (u8x16, u8x16); fn unzip_u8x16(self, a: u8x16, b: u8x16) -> (u8x16, u8x16); - fn select_u8x16( - self, - a: mask8x16, - b: u8x16, - c: u8x16, - ) -> u8x16; + fn select_u8x16(self, a: mask8x16, b: u8x16, c: u8x16) -> u8x16; fn combine_u8x16(self, a: u8x16, b: u8x16) -> u8x32; fn splat_mask8x16(self, val: i8) -> mask8x16; fn not_mask8x16(self, a: mask8x16) -> mask8x16; @@ -107,11 +92,8 @@ pub trait Simd: Sized + Clone + Copy + Send + Sync + Seal + 'static { b: mask8x16, c: mask8x16, ) -> mask8x16; - fn zip_mask8x16( - self, - a: mask8x16, - b: mask8x16, - ) -> (mask8x16, mask8x16); + fn zip_mask8x16(self, a: mask8x16, b: mask8x16) + -> (mask8x16, mask8x16); fn unzip_mask8x16( self, a: mask8x16, @@ -134,12 +116,7 @@ pub trait Simd: Sized + Clone + Copy + Send + Sync + Seal + 'static { fn simd_gt_i16x8(self, a: i16x8, b: i16x8) -> mask16x8; fn zip_i16x8(self, a: i16x8, b: i16x8) -> (i16x8, i16x8); fn unzip_i16x8(self, a: i16x8, b: i16x8) -> (i16x8, i16x8); - fn select_i16x8( - self, - a: mask16x8, - b: i16x8, - c: i16x8, - ) -> i16x8; + fn select_i16x8(self, a: mask16x8, b: i16x8, c: i16x8) -> i16x8; fn combine_i16x8(self, a: i16x8, b: i16x8) -> i16x16; fn splat_u16x8(self, val: u16) -> u16x8; fn not_u16x8(self, a: u16x8) -> u16x8; @@ -156,12 +133,7 @@ pub trait Simd: Sized + Clone + Copy + Send + Sync + Seal + 'static { fn simd_gt_u16x8(self, a: u16x8, b: u16x8) -> mask16x8; fn zip_u16x8(self, a: u16x8, b: u16x8) -> (u16x8, u16x8); fn unzip_u16x8(self, a: u16x8, b: u16x8) -> (u16x8, u16x8); - fn select_u16x8( - self, - a: mask16x8, - b: u16x8, - c: u16x8, - ) -> u16x8; + fn select_u16x8(self, a: mask16x8, b: u16x8, c: u16x8) -> u16x8; fn combine_u16x8(self, a: u16x8, b: u16x8) -> u16x16; fn splat_mask16x8(self, val: i16) -> mask16x8; fn not_mask16x8(self, a: mask16x8) -> mask16x8; @@ -174,11 +146,8 @@ pub trait Simd: Sized + Clone + Copy + Send + Sync + Seal + 'static { b: mask16x8, c: mask16x8, ) -> mask16x8; - fn zip_mask16x8( - self, - a: mask16x8, - b: mask16x8, - ) -> (mask16x8, mask16x8); + fn zip_mask16x8(self, a: mask16x8, b: mask16x8) + -> (mask16x8, mask16x8); fn unzip_mask16x8( self, a: mask16x8, @@ -201,12 +170,7 @@ pub trait Simd: Sized + Clone + Copy + Send + Sync + Seal + 'static { fn simd_gt_i32x4(self, a: i32x4, b: i32x4) -> mask32x4; fn zip_i32x4(self, a: i32x4, b: i32x4) -> (i32x4, i32x4); fn unzip_i32x4(self, a: i32x4, b: i32x4) -> (i32x4, i32x4); - fn select_i32x4( - self, - a: mask32x4, - b: i32x4, - c: i32x4, - ) -> i32x4; + fn select_i32x4(self, a: mask32x4, b: i32x4, c: i32x4) -> i32x4; fn combine_i32x4(self, a: i32x4, b: i32x4) -> i32x8; fn splat_u32x4(self, val: u32) -> u32x4; fn not_u32x4(self, a: u32x4) -> u32x4; @@ -223,12 +187,7 @@ pub trait Simd: Sized + Clone + Copy + Send + Sync + Seal + 'static { fn simd_gt_u32x4(self, a: u32x4, b: u32x4) -> mask32x4; fn zip_u32x4(self, a: u32x4, b: u32x4) -> (u32x4, u32x4); fn unzip_u32x4(self, a: u32x4, b: u32x4) -> (u32x4, u32x4); - fn select_u32x4( - self, - a: mask32x4, - b: u32x4, - c: u32x4, - ) -> u32x4; + fn select_u32x4(self, a: mask32x4, b: u32x4, c: u32x4) -> u32x4; fn combine_u32x4(self, a: u32x4, b: u32x4) -> u32x8; fn splat_mask32x4(self, val: i32) -> mask32x4; fn not_mask32x4(self, a: mask32x4) -> mask32x4; @@ -241,11 +200,8 @@ pub trait Simd: Sized + Clone + Copy + Send + Sync + Seal + 'static { b: mask32x4, c: mask32x4, ) -> mask32x4; - fn zip_mask32x4( - self, - a: mask32x4, - b: mask32x4, - ) -> (mask32x4, mask32x4); + fn zip_mask32x4(self, a: mask32x4, b: mask32x4) + -> (mask32x4, mask32x4); fn unzip_mask32x4( self, a: mask32x4, @@ -275,12 +231,7 @@ pub trait Simd: Sized + Clone + Copy + Send + Sync + Seal + 'static { fn min_precise_f32x8(self, a: f32x8, b: f32x8) -> f32x8; fn madd_f32x8(self, a: f32x8, b: f32x8, c: f32x8) -> f32x8; fn floor_f32x8(self, a: f32x8) -> f32x8; - fn select_f32x8( - self, - a: mask32x8, - b: f32x8, - c: f32x8, - ) -> f32x8; + fn select_f32x8(self, a: mask32x8, b: f32x8, c: f32x8) -> f32x8; fn split_f32x8(self, a: f32x8) -> (f32x4, f32x4); fn cvt_u32_f32x8(self, a: f32x8) -> u32x8; fn splat_i8x32(self, val: i8) -> i8x32; @@ -298,12 +249,7 @@ pub trait Simd: Sized + Clone + Copy + Send + Sync + Seal + 'static { fn simd_gt_i8x32(self, a: i8x32, b: i8x32) -> mask8x32; fn zip_i8x32(self, a: i8x32, b: i8x32) -> (i8x32, i8x32); fn unzip_i8x32(self, a: i8x32, b: i8x32) -> (i8x32, i8x32); - fn select_i8x32( - self, - a: mask8x32, - b: i8x32, - c: i8x32, - ) -> i8x32; + fn select_i8x32(self, a: mask8x32, b: i8x32, c: i8x32) -> i8x32; fn split_i8x32(self, a: i8x32) -> (i8x16, i8x16); fn splat_u8x32(self, val: u8) -> u8x32; fn not_u8x32(self, a: u8x32) -> u8x32; @@ -320,12 +266,7 @@ pub trait Simd: Sized + Clone + Copy + Send + Sync + Seal + 'static { fn simd_gt_u8x32(self, a: u8x32, b: u8x32) -> mask8x32; fn zip_u8x32(self, a: u8x32, b: u8x32) -> (u8x32, u8x32); fn unzip_u8x32(self, a: u8x32, b: u8x32) -> (u8x32, u8x32); - fn select_u8x32( - self, - a: mask8x32, - b: u8x32, - c: u8x32, - ) -> u8x32; + fn select_u8x32(self, a: mask8x32, b: u8x32, c: u8x32) -> u8x32; fn split_u8x32(self, a: u8x32) -> (u8x16, u8x16); fn splat_mask8x32(self, val: i8) -> mask8x32; fn not_mask8x32(self, a: mask8x32) -> mask8x32; @@ -338,11 +279,8 @@ pub trait Simd: Sized + Clone + Copy + Send + Sync + Seal + 'static { b: mask8x32, c: mask8x32, ) -> mask8x32; - fn zip_mask8x32( - self, - a: mask8x32, - b: mask8x32, - ) -> (mask8x32, mask8x32); + fn zip_mask8x32(self, a: mask8x32, b: mask8x32) + -> (mask8x32, mask8x32); fn unzip_mask8x32( self, a: mask8x32, @@ -363,22 +301,9 @@ pub trait Simd: Sized + Clone + Copy + Send + Sync + Seal + 'static { fn simd_le_i16x16(self, a: i16x16, b: i16x16) -> mask16x16; fn simd_ge_i16x16(self, a: i16x16, b: i16x16) -> mask16x16; fn simd_gt_i16x16(self, a: i16x16, b: i16x16) -> mask16x16; - fn zip_i16x16( - self, - a: i16x16, - b: i16x16, - ) -> (i16x16, i16x16); - fn unzip_i16x16( - self, - a: i16x16, - b: i16x16, - ) -> (i16x16, i16x16); - fn select_i16x16( - self, - a: mask16x16, - b: i16x16, - c: i16x16, - ) -> i16x16; + fn zip_i16x16(self, a: i16x16, b: i16x16) -> (i16x16, i16x16); + fn unzip_i16x16(self, a: i16x16, b: i16x16) -> (i16x16, i16x16); + fn select_i16x16(self, a: mask16x16, b: i16x16, c: i16x16) -> i16x16; fn split_i16x16(self, a: i16x16) -> (i16x8, i16x8); fn splat_u16x16(self, val: u16) -> u16x16; fn not_u16x16(self, a: u16x16) -> u16x16; @@ -393,22 +318,9 @@ pub trait Simd: Sized + Clone + Copy + Send + Sync + Seal + 'static { fn simd_le_u16x16(self, a: u16x16, b: u16x16) -> mask16x16; fn simd_ge_u16x16(self, a: u16x16, b: u16x16) -> mask16x16; fn simd_gt_u16x16(self, a: u16x16, b: u16x16) -> mask16x16; - fn zip_u16x16( - self, - a: u16x16, - b: u16x16, - ) -> (u16x16, u16x16); - fn unzip_u16x16( - self, - a: u16x16, - b: u16x16, - ) -> (u16x16, u16x16); - fn select_u16x16( - self, - a: mask16x16, - b: u16x16, - c: u16x16, - ) -> u16x16; + fn zip_u16x16(self, a: u16x16, b: u16x16) -> (u16x16, u16x16); + fn unzip_u16x16(self, a: u16x16, b: u16x16) -> (u16x16, u16x16); + fn select_u16x16(self, a: mask16x16, b: u16x16, c: u16x16) -> u16x16; fn split_u16x16(self, a: u16x16) -> (u16x8, u16x8); fn splat_mask16x16(self, val: i16) -> mask16x16; fn not_mask16x16(self, a: mask16x16) -> mask16x16; @@ -431,11 +343,7 @@ pub trait Simd: Sized + Clone + Copy + Send + Sync + Seal + 'static { a: mask16x16, b: mask16x16, ) -> (mask16x16, mask16x16); - fn simd_eq_mask16x16( - self, - a: mask16x16, - b: mask16x16, - ) -> mask16x16; + fn simd_eq_mask16x16(self, a: mask16x16, b: mask16x16) -> mask16x16; fn split_mask16x16(self, a: mask16x16) -> (mask16x8, mask16x8); fn splat_i32x8(self, val: i32) -> i32x8; fn not_i32x8(self, a: i32x8) -> i32x8; @@ -452,12 +360,7 @@ pub trait Simd: Sized + Clone + Copy + Send + Sync + Seal + 'static { fn simd_gt_i32x8(self, a: i32x8, b: i32x8) -> mask32x8; fn zip_i32x8(self, a: i32x8, b: i32x8) -> (i32x8, i32x8); fn unzip_i32x8(self, a: i32x8, b: i32x8) -> (i32x8, i32x8); - fn select_i32x8( - self, - a: mask32x8, - b: i32x8, - c: i32x8, - ) -> i32x8; + fn select_i32x8(self, a: mask32x8, b: i32x8, c: i32x8) -> i32x8; fn split_i32x8(self, a: i32x8) -> (i32x4, i32x4); fn splat_u32x8(self, val: u32) -> u32x8; fn not_u32x8(self, a: u32x8) -> u32x8; @@ -474,12 +377,7 @@ pub trait Simd: Sized + Clone + Copy + Send + Sync + Seal + 'static { fn simd_gt_u32x8(self, a: u32x8, b: u32x8) -> mask32x8; fn zip_u32x8(self, a: u32x8, b: u32x8) -> (u32x8, u32x8); fn unzip_u32x8(self, a: u32x8, b: u32x8) -> (u32x8, u32x8); - fn select_u32x8( - self, - a: mask32x8, - b: u32x8, - c: u32x8, - ) -> u32x8; + fn select_u32x8(self, a: mask32x8, b: u32x8, c: u32x8) -> u32x8; fn split_u32x8(self, a: u32x8) -> (u32x4, u32x4); fn splat_mask32x8(self, val: i32) -> mask32x8; fn not_mask32x8(self, a: mask32x8) -> mask32x8; @@ -492,11 +390,8 @@ pub trait Simd: Sized + Clone + Copy + Send + Sync + Seal + 'static { b: mask32x8, c: mask32x8, ) -> mask32x8; - fn zip_mask32x8( - self, - a: mask32x8, - b: mask32x8, - ) -> (mask32x8, mask32x8); + fn zip_mask32x8(self, a: mask32x8, b: mask32x8) + -> (mask32x8, mask32x8); fn unzip_mask32x8( self, a: mask32x8, @@ -505,10 +400,9 @@ pub trait Simd: Sized + Clone + Copy + Send + Sync + Seal + 'static { fn simd_eq_mask32x8(self, a: mask32x8, b: mask32x8) -> mask32x8; fn split_mask32x8(self, a: mask32x8) -> (mask32x4, mask32x4); } -pub trait SimdBase< - Element: SimdElement, - S: Simd, ->: Copy + Sync + Send + 'static + crate::Bytes { +pub trait SimdBase: + Copy + Sync + Send + 'static + crate::Bytes +{ const N: usize; /// A SIMD vector mask with the same number of elements. /// @@ -530,30 +424,18 @@ pub trait SimdBase< fn splat(simd: S, val: Element) -> Self; fn block_splat(block: Self::Block) -> Self; } -pub trait SimdFloat< - Element: SimdElement, - S: Simd, ->: SimdBase< - Element, - S, - > + core::ops::Neg< - Output = Self, - > + core::ops::Add< - Output = Self, - > + core::ops::Add< - Element, - Output = Self, - > + core::ops::Sub< - Output = Self, - > + core::ops::Sub< - Element, - Output = Self, - > + core::ops::Mul< - Output = Self, - > + core::ops::Mul< - Element, - Output = Self, - > + core::ops::Div + core::ops::Div { +pub trait SimdFloat: + SimdBase + + core::ops::Neg + + core::ops::Add + + core::ops::Add + + core::ops::Sub + + core::ops::Sub + + core::ops::Mul + + core::ops::Mul + + core::ops::Div + + core::ops::Div +{ fn abs(self) -> Self; fn sqrt(self) -> Self; fn copysign(self, rhs: impl SimdInto) -> Self; @@ -571,38 +453,21 @@ pub trait SimdFloat< fn madd(self, op1: impl SimdInto, op2: impl SimdInto) -> Self; fn floor(self) -> Self; } -pub trait SimdInt< - Element: SimdElement, - S: Simd, ->: SimdBase< - Element, - S, - > + core::ops::Add< - Output = Self, - > + core::ops::Add< - Element, - Output = Self, - > + core::ops::Sub< - Output = Self, - > + core::ops::Sub< - Element, - Output = Self, - > + core::ops::Mul< - Output = Self, - > + core::ops::Mul< - Element, - Output = Self, - > + core::ops::BitAnd< - Output = Self, - > + core::ops::BitAnd< - Element, - Output = Self, - > + core::ops::BitOr< - Output = Self, - > + core::ops::BitOr< - Element, - Output = Self, - > + core::ops::BitXor + core::ops::BitXor { +pub trait SimdInt: + SimdBase + + core::ops::Add + + core::ops::Add + + core::ops::Sub + + core::ops::Sub + + core::ops::Mul + + core::ops::Mul + + core::ops::BitAnd + + core::ops::BitAnd + + core::ops::BitOr + + core::ops::BitOr + + core::ops::BitXor + + core::ops::BitXor +{ fn simd_eq(self, rhs: impl SimdInto) -> Self::Mask; fn simd_lt(self, rhs: impl SimdInto) -> Self::Mask; fn simd_le(self, rhs: impl SimdInto) -> Self::Mask; @@ -611,17 +476,13 @@ pub trait SimdInt< fn zip(self, rhs: impl SimdInto) -> (Self, Self); fn unzip(self, rhs: impl SimdInto) -> (Self, Self); } -pub trait SimdMask< - Element: SimdElement, - S: Simd, ->: SimdBase< - Element, - S, - > + core::ops::Not< - Output = Self, - > + core::ops::BitAnd< - Output = Self, - > + core::ops::BitOr + core::ops::BitXor { +pub trait SimdMask: + SimdBase + + core::ops::Not + + core::ops::BitAnd + + core::ops::BitOr + + core::ops::BitXor +{ fn zip(self, rhs: impl SimdInto) -> (Self, Self); fn unzip(self, rhs: impl SimdInto) -> (Self, Self); fn simd_eq(self, rhs: impl SimdInto) -> Self::Mask; diff --git a/fearless_simd/src/generated/simd_types.rs b/fearless_simd/src/generated/simd_types.rs index e448b92a..b1103408 100644 --- a/fearless_simd/src/generated/simd_types.rs +++ b/fearless_simd/src/generated/simd_types.rs @@ -236,7 +236,8 @@ impl crate::SimdFloat for f32x4 { } #[inline(always)] fn madd(self, op1: impl SimdInto, op2: impl SimdInto) -> f32x4 { - self.simd.madd_f32x4(self, op1.simd_into(self.simd), op2.simd_into(self.simd)) + self.simd + .madd_f32x4(self, op1.simd_into(self.simd), op2.simd_into(self.simd)) } #[inline(always)] fn floor(self) -> f32x4 { @@ -2013,7 +2014,8 @@ impl crate::SimdFloat for f32x8 { } #[inline(always)] fn madd(self, op1: impl SimdInto, op2: impl SimdInto) -> f32x8 { - self.simd.madd_f32x8(self, op1.simd_into(self.simd), op2.simd_into(self.simd)) + self.simd + .madd_f32x8(self, op1.simd_into(self.simd), op2.simd_into(self.simd)) } #[inline(always)] fn floor(self) -> f32x8 { diff --git a/fearless_simd/src/generated/wasm.rs b/fearless_simd/src/generated/wasm.rs index 71656a58..6b49c57d 100644 --- a/fearless_simd/src/generated/wasm.rs +++ b/fearless_simd/src/generated/wasm.rs @@ -1,9 +1,11 @@ -use core::arch::wasm32::*; -use crate::{seal::Seal, Level, Simd, SimdFrom, SimdInto}; +// This file is autogenerated by fearless_simd_gen + +use crate::{Level, Simd, SimdFrom, SimdInto, seal::Seal}; use crate::{ - f32x4, i8x16, u8x16, mask8x16, i16x8, u16x8, mask16x8, i32x4, u32x4, mask32x4, f32x8, - i8x32, u8x32, mask8x32, i16x16, u16x16, mask16x16, i32x8, u32x8, mask32x8, + f32x4, f32x8, i8x16, i8x32, i16x8, i16x16, i32x4, i32x8, mask8x16, mask8x32, mask16x8, + mask16x16, mask32x4, mask32x8, u8x16, u8x32, u16x8, u16x16, u32x4, u32x8, }; +use core::arch::wasm32::*; /// The SIMD token for the "wasm128" level. #[derive(Clone, Copy, Debug)] pub struct WasmSimd128 { @@ -130,12 +132,7 @@ impl Simd for WasmSimd128 { f32x4_floor(a.into()).simd_into(self) } #[inline(always)] - fn select_f32x4( - self, - a: mask32x4, - b: f32x4, - c: f32x4, - ) -> f32x4 { + fn select_f32x4(self, a: mask32x4, b: f32x4, c: f32x4) -> f32x4 { todo!() } #[inline(always)] @@ -214,12 +211,7 @@ impl Simd for WasmSimd128 { todo!() } #[inline(always)] - fn select_i8x16( - self, - a: mask8x16, - b: i8x16, - c: i8x16, - ) -> i8x16 { + fn select_i8x16(self, a: mask8x16, b: i8x16, c: i8x16) -> i8x16 { todo!() } #[inline(always)] @@ -294,12 +286,7 @@ impl Simd for WasmSimd128 { todo!() } #[inline(always)] - fn select_u8x16( - self, - a: mask8x16, - b: u8x16, - c: u8x16, - ) -> u8x16 { + fn select_u8x16(self, a: mask8x16, b: u8x16, c: u8x16) -> u8x16 { todo!() } #[inline(always)] @@ -434,12 +421,7 @@ impl Simd for WasmSimd128 { todo!() } #[inline(always)] - fn select_i16x8( - self, - a: mask16x8, - b: i16x8, - c: i16x8, - ) -> i16x8 { + fn select_i16x8(self, a: mask16x8, b: i16x8, c: i16x8) -> i16x8 { todo!() } #[inline(always)] @@ -514,12 +496,7 @@ impl Simd for WasmSimd128 { todo!() } #[inline(always)] - fn select_u16x8( - self, - a: mask16x8, - b: u16x8, - c: u16x8, - ) -> u16x8 { + fn select_u16x8(self, a: mask16x8, b: u16x8, c: u16x8) -> u16x8 { todo!() } #[inline(always)] @@ -654,12 +631,7 @@ impl Simd for WasmSimd128 { todo!() } #[inline(always)] - fn select_i32x4( - self, - a: mask32x4, - b: i32x4, - c: i32x4, - ) -> i32x4 { + fn select_i32x4(self, a: mask32x4, b: i32x4, c: i32x4) -> i32x4 { todo!() } #[inline(always)] @@ -734,12 +706,7 @@ impl Simd for WasmSimd128 { todo!() } #[inline(always)] - fn select_u32x4( - self, - a: mask32x4, - b: u32x4, - c: u32x4, - ) -> u32x4 { + fn select_u32x4(self, a: mask32x4, b: u32x4, c: u32x4) -> u32x4 { todo!() } #[inline(always)] @@ -948,12 +915,7 @@ impl Simd for WasmSimd128 { self.combine_f32x4(self.floor_f32x4(a0), self.floor_f32x4(a1)) } #[inline(always)] - fn select_f32x8( - self, - a: mask32x8, - b: f32x8, - c: f32x8, - ) -> f32x8 { + fn select_f32x8(self, a: mask32x8, b: f32x8, c: f32x8) -> f32x8 { let (a0, a1) = self.split_mask32x8(a); let (b0, b1) = self.split_f32x8(b); let (c0, c1) = self.split_f32x8(c); @@ -1065,12 +1027,7 @@ impl Simd for WasmSimd128 { (self.combine_i8x16(c00, c10), self.combine_i8x16(c01, c11)) } #[inline(always)] - fn select_i8x32( - self, - a: mask8x32, - b: i8x32, - c: i8x32, - ) -> i8x32 { + fn select_i8x32(self, a: mask8x32, b: i8x32, c: i8x32) -> i8x32 { let (a0, a1) = self.split_mask8x32(a); let (b0, b1) = self.split_i8x32(b); let (c0, c1) = self.split_i8x32(c); @@ -1177,12 +1134,7 @@ impl Simd for WasmSimd128 { (self.combine_u8x16(c00, c10), self.combine_u8x16(c01, c11)) } #[inline(always)] - fn select_u8x32( - self, - a: mask8x32, - b: u8x32, - c: u8x32, - ) -> u8x32 { + fn select_u8x32(self, a: mask8x32, b: u8x32, c: u8x32) -> u8x32 { let (a0, a1) = self.split_mask8x32(a); let (b0, b1) = self.split_u8x32(b); let (c0, c1) = self.split_u8x32(c); @@ -1249,7 +1201,10 @@ impl Simd for WasmSimd128 { let (b0, b1) = self.split_mask8x32(b); let (c00, c01) = self.zip_mask8x16(a0, b0); let (c10, c11) = self.zip_mask8x16(a1, b1); - (self.combine_mask8x16(c00, c01), self.combine_mask8x16(c10, c11)) + ( + self.combine_mask8x16(c00, c01), + self.combine_mask8x16(c10, c11), + ) } #[inline(always)] fn unzip_mask8x32( @@ -1261,16 +1216,16 @@ impl Simd for WasmSimd128 { let (b0, b1) = self.split_mask8x32(b); let (c00, c01) = self.unzip_mask8x16(a0, a1); let (c10, c11) = self.unzip_mask8x16(b0, b1); - (self.combine_mask8x16(c00, c10), self.combine_mask8x16(c01, c11)) + ( + self.combine_mask8x16(c00, c10), + self.combine_mask8x16(c01, c11), + ) } #[inline(always)] fn simd_eq_mask8x32(self, a: mask8x32, b: mask8x32) -> mask8x32 { let (a0, a1) = self.split_mask8x32(a); let (b0, b1) = self.split_mask8x32(b); - self.combine_mask8x16( - self.simd_eq_mask8x16(a0, b0), - self.simd_eq_mask8x16(a1, b1), - ) + self.combine_mask8x16(self.simd_eq_mask8x16(a0, b0), self.simd_eq_mask8x16(a1, b1)) } #[inline(always)] fn split_mask8x32(self, a: mask8x32) -> (mask8x16, mask8x16) { @@ -1357,11 +1312,7 @@ impl Simd for WasmSimd128 { self.combine_mask16x8(self.simd_gt_i16x8(a0, b0), self.simd_gt_i16x8(a1, b1)) } #[inline(always)] - fn zip_i16x16( - self, - a: i16x16, - b: i16x16, - ) -> (i16x16, i16x16) { + fn zip_i16x16(self, a: i16x16, b: i16x16) -> (i16x16, i16x16) { let (a0, a1) = self.split_i16x16(a); let (b0, b1) = self.split_i16x16(b); let (c00, c01) = self.zip_i16x8(a0, b0); @@ -1369,11 +1320,7 @@ impl Simd for WasmSimd128 { (self.combine_i16x8(c00, c01), self.combine_i16x8(c10, c11)) } #[inline(always)] - fn unzip_i16x16( - self, - a: i16x16, - b: i16x16, - ) -> (i16x16, i16x16) { + fn unzip_i16x16(self, a: i16x16, b: i16x16) -> (i16x16, i16x16) { let (a0, a1) = self.split_i16x16(a); let (b0, b1) = self.split_i16x16(b); let (c00, c01) = self.unzip_i16x8(a0, a1); @@ -1381,12 +1328,7 @@ impl Simd for WasmSimd128 { (self.combine_i16x8(c00, c10), self.combine_i16x8(c01, c11)) } #[inline(always)] - fn select_i16x16( - self, - a: mask16x16, - b: i16x16, - c: i16x16, - ) -> i16x16 { + fn select_i16x16(self, a: mask16x16, b: i16x16, c: i16x16) -> i16x16 { let (a0, a1) = self.split_mask16x16(a); let (b0, b1) = self.split_i16x16(b); let (c0, c1) = self.split_i16x16(c); @@ -1477,11 +1419,7 @@ impl Simd for WasmSimd128 { self.combine_mask16x8(self.simd_gt_u16x8(a0, b0), self.simd_gt_u16x8(a1, b1)) } #[inline(always)] - fn zip_u16x16( - self, - a: u16x16, - b: u16x16, - ) -> (u16x16, u16x16) { + fn zip_u16x16(self, a: u16x16, b: u16x16) -> (u16x16, u16x16) { let (a0, a1) = self.split_u16x16(a); let (b0, b1) = self.split_u16x16(b); let (c00, c01) = self.zip_u16x8(a0, b0); @@ -1489,11 +1427,7 @@ impl Simd for WasmSimd128 { (self.combine_u16x8(c00, c01), self.combine_u16x8(c10, c11)) } #[inline(always)] - fn unzip_u16x16( - self, - a: u16x16, - b: u16x16, - ) -> (u16x16, u16x16) { + fn unzip_u16x16(self, a: u16x16, b: u16x16) -> (u16x16, u16x16) { let (a0, a1) = self.split_u16x16(a); let (b0, b1) = self.split_u16x16(b); let (c00, c01) = self.unzip_u16x8(a0, a1); @@ -1501,12 +1435,7 @@ impl Simd for WasmSimd128 { (self.combine_u16x8(c00, c10), self.combine_u16x8(c01, c11)) } #[inline(always)] - fn select_u16x16( - self, - a: mask16x16, - b: u16x16, - c: u16x16, - ) -> u16x16 { + fn select_u16x16(self, a: mask16x16, b: u16x16, c: u16x16) -> u16x16 { let (a0, a1) = self.split_mask16x16(a); let (b0, b1) = self.split_u16x16(b); let (c0, c1) = self.split_u16x16(c); @@ -1573,7 +1502,10 @@ impl Simd for WasmSimd128 { let (b0, b1) = self.split_mask16x16(b); let (c00, c01) = self.zip_mask16x8(a0, b0); let (c10, c11) = self.zip_mask16x8(a1, b1); - (self.combine_mask16x8(c00, c01), self.combine_mask16x8(c10, c11)) + ( + self.combine_mask16x8(c00, c01), + self.combine_mask16x8(c10, c11), + ) } #[inline(always)] fn unzip_mask16x16( @@ -1585,20 +1517,16 @@ impl Simd for WasmSimd128 { let (b0, b1) = self.split_mask16x16(b); let (c00, c01) = self.unzip_mask16x8(a0, a1); let (c10, c11) = self.unzip_mask16x8(b0, b1); - (self.combine_mask16x8(c00, c10), self.combine_mask16x8(c01, c11)) + ( + self.combine_mask16x8(c00, c10), + self.combine_mask16x8(c01, c11), + ) } #[inline(always)] - fn simd_eq_mask16x16( - self, - a: mask16x16, - b: mask16x16, - ) -> mask16x16 { + fn simd_eq_mask16x16(self, a: mask16x16, b: mask16x16) -> mask16x16 { let (a0, a1) = self.split_mask16x16(a); let (b0, b1) = self.split_mask16x16(b); - self.combine_mask16x8( - self.simd_eq_mask16x8(a0, b0), - self.simd_eq_mask16x8(a1, b1), - ) + self.combine_mask16x8(self.simd_eq_mask16x8(a0, b0), self.simd_eq_mask16x8(a1, b1)) } #[inline(always)] fn split_mask16x16(self, a: mask16x16) -> (mask16x8, mask16x8) { @@ -1701,12 +1629,7 @@ impl Simd for WasmSimd128 { (self.combine_i32x4(c00, c10), self.combine_i32x4(c01, c11)) } #[inline(always)] - fn select_i32x8( - self, - a: mask32x8, - b: i32x8, - c: i32x8, - ) -> i32x8 { + fn select_i32x8(self, a: mask32x8, b: i32x8, c: i32x8) -> i32x8 { let (a0, a1) = self.split_mask32x8(a); let (b0, b1) = self.split_i32x8(b); let (c0, c1) = self.split_i32x8(c); @@ -1813,12 +1736,7 @@ impl Simd for WasmSimd128 { (self.combine_u32x4(c00, c10), self.combine_u32x4(c01, c11)) } #[inline(always)] - fn select_u32x8( - self, - a: mask32x8, - b: u32x8, - c: u32x8, - ) -> u32x8 { + fn select_u32x8(self, a: mask32x8, b: u32x8, c: u32x8) -> u32x8 { let (a0, a1) = self.split_mask32x8(a); let (b0, b1) = self.split_u32x8(b); let (c0, c1) = self.split_u32x8(c); @@ -1885,7 +1803,10 @@ impl Simd for WasmSimd128 { let (b0, b1) = self.split_mask32x8(b); let (c00, c01) = self.zip_mask32x4(a0, b0); let (c10, c11) = self.zip_mask32x4(a1, b1); - (self.combine_mask32x4(c00, c01), self.combine_mask32x4(c10, c11)) + ( + self.combine_mask32x4(c00, c01), + self.combine_mask32x4(c10, c11), + ) } #[inline(always)] fn unzip_mask32x8( @@ -1897,16 +1818,16 @@ impl Simd for WasmSimd128 { let (b0, b1) = self.split_mask32x8(b); let (c00, c01) = self.unzip_mask32x4(a0, a1); let (c10, c11) = self.unzip_mask32x4(b0, b1); - (self.combine_mask32x4(c00, c10), self.combine_mask32x4(c01, c11)) + ( + self.combine_mask32x4(c00, c10), + self.combine_mask32x4(c01, c11), + ) } #[inline(always)] fn simd_eq_mask32x8(self, a: mask32x8, b: mask32x8) -> mask32x8 { let (a0, a1) = self.split_mask32x8(a); let (b0, b1) = self.split_mask32x8(b); - self.combine_mask32x4( - self.simd_eq_mask32x4(a0, b0), - self.simd_eq_mask32x4(a1, b1), - ) + self.combine_mask32x4(self.simd_eq_mask32x4(a0, b0), self.simd_eq_mask32x4(a1, b1)) } #[inline(always)] fn split_mask32x8(self, a: mask32x8) -> (mask32x4, mask32x4) { @@ -2067,4 +1988,3 @@ impl From> for v128 { unsafe { core::mem::transmute(value.val) } } } - diff --git a/fearless_simd/src/lib.rs b/fearless_simd/src/lib.rs index 5b5ba0d0..e91dab64 100644 --- a/fearless_simd/src/lib.rs +++ b/fearless_simd/src/lib.rs @@ -69,7 +69,7 @@ impl Level { _ => None, } } - + #[cfg(target_arch = "wasm32")] #[inline] pub fn as_wasm_simd128(self) -> Option { diff --git a/fearless_simd_gen/src/main.rs b/fearless_simd_gen/src/main.rs index 3dc63414..b8224385 100644 --- a/fearless_simd_gen/src/main.rs +++ b/fearless_simd_gen/src/main.rs @@ -28,8 +28,19 @@ enum Module { } #[derive(Parser)] +#[command( + name = "fearless_simd_gen", + about = "Generate SIMD trait implementations for `fearless_simd`", + long_about = "Generate SIMD trait implementations for fearless_simd.\n\ + \n\ + Generates code for SIMD types, traits, operations, and architecture-specific \ + implementations (NEON, WASM, fallback).\n\ + \n\ + Run from the root of the repository without arguments to automatically \ + generate all module files in ./fearless_simd/src/generated/." +)] struct Cli { - #[arg(short, long)] + #[arg(short, long, help = "Generate a specific module and print to stdout")] module: Option, } @@ -70,6 +81,7 @@ const MODULES: &[Module] = &[ Module::Ops, Module::Neon, Module::Fallback, + Module::Wasm, ]; const FILE_BASE: &str = "./fearless_simd/src/generated"; @@ -88,10 +100,9 @@ fn main() { for module in MODULES { let name = module.file_base(); let path = base_dir.join(format!("{name}.rs")); - let mut file = File::create(&path).expect("error creating {path:?}"); + let file = File::create(&path).expect("error creating {path:?}"); let code_str = module.generate_string(); - file.write_all(code_str.as_bytes()) - .expect("error writing {name}"); + write_formatted(code_str.as_bytes(), file); } } } @@ -105,3 +116,15 @@ fn print_code(code: &TokenStream) { } } } + +fn write_formatted(text: &[u8], out: File) { + let mut child = std::process::Command::new("rustfmt") + .stdin(std::process::Stdio::piped()) + .stdout(out) + .spawn() + .expect("`rustfmt` command to have spawned"); + let mut stdin = child.stdin.take().expect("stdin handle to be present"); + stdin.write_all(text).unwrap(); + drop(stdin); + child.wait().expect("rustfmt should write to file"); +}