Skip to content

Commit fa2c255

Browse files
authored
Simplify some shift casting (#186)
This is in preparation for bumping the lint set, which will include `clippy::cast_possible_wrap`. For the fallback code using `Shl`, we don't need to cast as `Shl` is implemented for all scalar types as shift argument. Casting versus calling directly compile to the same thing: https://godbolt.org/z/6G9bvYdjd. For the x86 code, we cast from `u32` to `i32`, so we can use `u32::cast_signed` for that, which explicitly stays at the same bit width and has the sign behavior that's probably expected here.
1 parent 744661d commit fa2c255

File tree

7 files changed

+157
-143
lines changed

7 files changed

+157
-143
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ This release has an [MSRV][] of 1.88.
6868
- Breaking change: the `Element` type on the `SimdBase` trait is now an associated type instead of a type parameter. This should make it more pleasant to write code that's generic over different vector types. ([#170][] by [@valadaptive][])
6969
- The `WasmSimd128` token type now wraps the new `crate::core_arch::wasm32::WasmSimd128` type. This doesn't expose any new functionality as WASM SIMD128 can only be enabled statically, but matches all the other backend tokens. ([#176][] by [@valadaptive][])
7070
- Breaking change: the `SimdFrom::simd_from` method now takes the SIMD token as the first argument instead of the second. This matches the argument order of the `from_slice`, `splat`, and `from_fn` methods on `SimdBase`. ([#180][] by [@valadaptive][])
71+
- Code generation has been improved for shift argument casting on x86 and for scalar fallback. ([#186][] by [@tomcur][])
7172

7273
### Removed
7374

@@ -127,6 +128,7 @@ No changelog was kept for this release.
127128

128129
[@Ralith]: https://github.com/Ralith
129130
[@DJMcNab]: https://github.com/DJMcNab
131+
[@tomcur]: https://github.com/tomcur
130132
[@valadaptive]: https://github.com/valadaptive
131133
[@LaurenzV]: https://github.com/LaurenzV
132134
[@Shnatsel]: https://github.com/Shnatsel
@@ -168,6 +170,7 @@ No changelog was kept for this release.
168170
[#170]: https://github.com/linebender/fearless_simd/pull/170
169171
[#176]: https://github.com/linebender/fearless_simd/pull/176
170172
[#180]: https://github.com/linebender/fearless_simd/pull/180
173+
[#186]: https://github.com/linebender/fearless_simd/pull/186
171174

172175
[Unreleased]: https://github.com/linebender/fearless_simd/compare/v0.3.0...HEAD
173176
[0.3.0]: https://github.com/linebender/fearless_simd/compare/v0.3.0...v0.2.0

fearless_simd/src/generated.rs

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
clippy::cast_possible_truncation,
77
clippy::unseparated_literal_suffix,
88
clippy::use_self,
9-
trivial_numeric_casts,
109
reason = "TODO: https://github.com/linebender/fearless_simd/issues/40"
1110
)]
1211
#![cfg_attr(
@@ -15,7 +14,6 @@
1514
clippy::missing_transmute_annotations,
1615
clippy::useless_transmute,
1716
clippy::new_without_default,
18-
clippy::unnecessary_cast,
1917
reason = "TODO: https://github.com/linebender/fearless_simd/issues/40"
2018
)
2119
)]
@@ -25,7 +23,6 @@
2523
clippy::missing_transmute_annotations,
2624
clippy::useless_transmute,
2725
clippy::new_without_default,
28-
clippy::unnecessary_cast,
2926
reason = "TODO: https://github.com/linebender/fearless_simd/issues/40"
3027
)
3128
)]
@@ -39,7 +36,6 @@
3936
clippy::missing_transmute_annotations,
4037
clippy::useless_transmute,
4138
clippy::new_without_default,
42-
clippy::unnecessary_cast,
4339
reason = "TODO: https://github.com/linebender/fearless_simd/issues/40"
4440
)
4541
)]

fearless_simd/src/generated/avx2.rs

Lines changed: 32 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -943,15 +943,15 @@ impl Simd for Avx2 {
943943
}
944944
#[inline(always)]
945945
fn shl_i16x8(self, a: i16x8<Self>, shift: u32) -> i16x8<Self> {
946-
unsafe { _mm_sll_epi16(a.into(), _mm_cvtsi32_si128(shift as _)).simd_into(self) }
946+
unsafe { _mm_sll_epi16(a.into(), _mm_cvtsi32_si128(shift.cast_signed())).simd_into(self) }
947947
}
948948
#[inline(always)]
949949
fn shlv_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
950950
core::array::from_fn(|i| core::ops::Shl::shl(a[i], b[i])).simd_into(self)
951951
}
952952
#[inline(always)]
953953
fn shr_i16x8(self, a: i16x8<Self>, shift: u32) -> i16x8<Self> {
954-
unsafe { _mm_sra_epi16(a.into(), _mm_cvtsi32_si128(shift as _)).simd_into(self) }
954+
unsafe { _mm_sra_epi16(a.into(), _mm_cvtsi32_si128(shift.cast_signed())).simd_into(self) }
955955
}
956956
#[inline(always)]
957957
fn shrv_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
@@ -1113,15 +1113,15 @@ impl Simd for Avx2 {
11131113
}
11141114
#[inline(always)]
11151115
fn shl_u16x8(self, a: u16x8<Self>, shift: u32) -> u16x8<Self> {
1116-
unsafe { _mm_sll_epi16(a.into(), _mm_cvtsi32_si128(shift as _)).simd_into(self) }
1116+
unsafe { _mm_sll_epi16(a.into(), _mm_cvtsi32_si128(shift.cast_signed())).simd_into(self) }
11171117
}
11181118
#[inline(always)]
11191119
fn shlv_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
11201120
core::array::from_fn(|i| core::ops::Shl::shl(a[i], b[i])).simd_into(self)
11211121
}
11221122
#[inline(always)]
11231123
fn shr_u16x8(self, a: u16x8<Self>, shift: u32) -> u16x8<Self> {
1124-
unsafe { _mm_srl_epi16(a.into(), _mm_cvtsi32_si128(shift as _)).simd_into(self) }
1124+
unsafe { _mm_srl_epi16(a.into(), _mm_cvtsi32_si128(shift.cast_signed())).simd_into(self) }
11251125
}
11261126
#[inline(always)]
11271127
fn shrv_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
@@ -1390,15 +1390,15 @@ impl Simd for Avx2 {
13901390
}
13911391
#[inline(always)]
13921392
fn shl_i32x4(self, a: i32x4<Self>, shift: u32) -> i32x4<Self> {
1393-
unsafe { _mm_sll_epi32(a.into(), _mm_cvtsi32_si128(shift as _)).simd_into(self) }
1393+
unsafe { _mm_sll_epi32(a.into(), _mm_cvtsi32_si128(shift.cast_signed())).simd_into(self) }
13941394
}
13951395
#[inline(always)]
13961396
fn shlv_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
13971397
unsafe { _mm_sllv_epi32(a.into(), b.into()).simd_into(self) }
13981398
}
13991399
#[inline(always)]
14001400
fn shr_i32x4(self, a: i32x4<Self>, shift: u32) -> i32x4<Self> {
1401-
unsafe { _mm_sra_epi32(a.into(), _mm_cvtsi32_si128(shift as _)).simd_into(self) }
1401+
unsafe { _mm_sra_epi32(a.into(), _mm_cvtsi32_si128(shift.cast_signed())).simd_into(self) }
14021402
}
14031403
#[inline(always)]
14041404
fn shrv_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
@@ -1562,15 +1562,15 @@ impl Simd for Avx2 {
15621562
}
15631563
#[inline(always)]
15641564
fn shl_u32x4(self, a: u32x4<Self>, shift: u32) -> u32x4<Self> {
1565-
unsafe { _mm_sll_epi32(a.into(), _mm_cvtsi32_si128(shift as _)).simd_into(self) }
1565+
unsafe { _mm_sll_epi32(a.into(), _mm_cvtsi32_si128(shift.cast_signed())).simd_into(self) }
15661566
}
15671567
#[inline(always)]
15681568
fn shlv_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
15691569
unsafe { _mm_sllv_epi32(a.into(), b.into()).simd_into(self) }
15701570
}
15711571
#[inline(always)]
15721572
fn shr_u32x4(self, a: u32x4<Self>, shift: u32) -> u32x4<Self> {
1573-
unsafe { _mm_srl_epi32(a.into(), _mm_cvtsi32_si128(shift as _)).simd_into(self) }
1573+
unsafe { _mm_srl_epi32(a.into(), _mm_cvtsi32_si128(shift.cast_signed())).simd_into(self) }
15741574
}
15751575
#[inline(always)]
15761576
fn shrv_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
@@ -3049,15 +3049,19 @@ impl Simd for Avx2 {
30493049
}
30503050
#[inline(always)]
30513051
fn shl_i16x16(self, a: i16x16<Self>, shift: u32) -> i16x16<Self> {
3052-
unsafe { _mm256_sll_epi16(a.into(), _mm_cvtsi32_si128(shift as _)).simd_into(self) }
3052+
unsafe {
3053+
_mm256_sll_epi16(a.into(), _mm_cvtsi32_si128(shift.cast_signed())).simd_into(self)
3054+
}
30533055
}
30543056
#[inline(always)]
30553057
fn shlv_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
30563058
core::array::from_fn(|i| core::ops::Shl::shl(a[i], b[i])).simd_into(self)
30573059
}
30583060
#[inline(always)]
30593061
fn shr_i16x16(self, a: i16x16<Self>, shift: u32) -> i16x16<Self> {
3060-
unsafe { _mm256_sra_epi16(a.into(), _mm_cvtsi32_si128(shift as _)).simd_into(self) }
3062+
unsafe {
3063+
_mm256_sra_epi16(a.into(), _mm_cvtsi32_si128(shift.cast_signed())).simd_into(self)
3064+
}
30613065
}
30623066
#[inline(always)]
30633067
fn shrv_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
@@ -3261,15 +3265,19 @@ impl Simd for Avx2 {
32613265
}
32623266
#[inline(always)]
32633267
fn shl_u16x16(self, a: u16x16<Self>, shift: u32) -> u16x16<Self> {
3264-
unsafe { _mm256_sll_epi16(a.into(), _mm_cvtsi32_si128(shift as _)).simd_into(self) }
3268+
unsafe {
3269+
_mm256_sll_epi16(a.into(), _mm_cvtsi32_si128(shift.cast_signed())).simd_into(self)
3270+
}
32653271
}
32663272
#[inline(always)]
32673273
fn shlv_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
32683274
core::array::from_fn(|i| core::ops::Shl::shl(a[i], b[i])).simd_into(self)
32693275
}
32703276
#[inline(always)]
32713277
fn shr_u16x16(self, a: u16x16<Self>, shift: u32) -> u16x16<Self> {
3272-
unsafe { _mm256_srl_epi16(a.into(), _mm_cvtsi32_si128(shift as _)).simd_into(self) }
3278+
unsafe {
3279+
_mm256_srl_epi16(a.into(), _mm_cvtsi32_si128(shift.cast_signed())).simd_into(self)
3280+
}
32733281
}
32743282
#[inline(always)]
32753283
fn shrv_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
@@ -3608,15 +3616,19 @@ impl Simd for Avx2 {
36083616
}
36093617
#[inline(always)]
36103618
fn shl_i32x8(self, a: i32x8<Self>, shift: u32) -> i32x8<Self> {
3611-
unsafe { _mm256_sll_epi32(a.into(), _mm_cvtsi32_si128(shift as _)).simd_into(self) }
3619+
unsafe {
3620+
_mm256_sll_epi32(a.into(), _mm_cvtsi32_si128(shift.cast_signed())).simd_into(self)
3621+
}
36123622
}
36133623
#[inline(always)]
36143624
fn shlv_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
36153625
unsafe { _mm256_sllv_epi32(a.into(), b.into()).simd_into(self) }
36163626
}
36173627
#[inline(always)]
36183628
fn shr_i32x8(self, a: i32x8<Self>, shift: u32) -> i32x8<Self> {
3619-
unsafe { _mm256_sra_epi32(a.into(), _mm_cvtsi32_si128(shift as _)).simd_into(self) }
3629+
unsafe {
3630+
_mm256_sra_epi32(a.into(), _mm_cvtsi32_si128(shift.cast_signed())).simd_into(self)
3631+
}
36203632
}
36213633
#[inline(always)]
36223634
fn shrv_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
@@ -3812,15 +3824,19 @@ impl Simd for Avx2 {
38123824
}
38133825
#[inline(always)]
38143826
fn shl_u32x8(self, a: u32x8<Self>, shift: u32) -> u32x8<Self> {
3815-
unsafe { _mm256_sll_epi32(a.into(), _mm_cvtsi32_si128(shift as _)).simd_into(self) }
3827+
unsafe {
3828+
_mm256_sll_epi32(a.into(), _mm_cvtsi32_si128(shift.cast_signed())).simd_into(self)
3829+
}
38163830
}
38173831
#[inline(always)]
38183832
fn shlv_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
38193833
unsafe { _mm256_sllv_epi32(a.into(), b.into()).simd_into(self) }
38203834
}
38213835
#[inline(always)]
38223836
fn shr_u32x8(self, a: u32x8<Self>, shift: u32) -> u32x8<Self> {
3823-
unsafe { _mm256_srl_epi32(a.into(), _mm_cvtsi32_si128(shift as _)).simd_into(self) }
3837+
unsafe {
3838+
_mm256_srl_epi32(a.into(), _mm_cvtsi32_si128(shift.cast_signed())).simd_into(self)
3839+
}
38243840
}
38253841
#[inline(always)]
38263842
fn shrv_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {

0 commit comments

Comments
 (0)