linebender
diff --git a/‎CHANGELOG.md‎
Lines changed: 3 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎fearless_simd/src/generated.rs‎
Lines changed: 0 additions & 4 deletions b/‎fearless_simd/src/generated.rs‎
Lines changed: 0 additions & 4 deletions
diff --git a/‎fearless_simd/src/generated/avx2.rs‎
Lines changed: 32 additions & 16 deletions b/‎fearless_simd/src/generated/avx2.rs‎
Lines changed: 32 additions & 16 deletions
@@ -68,6 +68,7 @@ This release has an [MSRV][] of 1.88.
 - Breaking change: the `Element` type on the `SimdBase` trait is now an associated type instead of a type parameter. This should make it more pleasant to write code that's generic over different vector types. ([#170][] by [@valadaptive][])
 - The `WasmSimd128` token type now wraps the new `crate::core_arch::wasm32::WasmSimd128` type. This doesn't expose any new functionality as WASM SIMD128 can only be enabled statically, but matches all the other backend tokens. ([#176][] by [@valadaptive][])
 - Breaking change: the `SimdFrom::simd_from` method now takes the SIMD token as the first argument instead of the second. This matches the argument order of the `from_slice`, `splat`, and `from_fn` methods on `SimdBase`. ([#180][] by [@valadaptive][])
+- Code generation has been improved for shift argument casting on x86 and for scalar fallback. ([#186][] by [@tomcur][])
 
 ### Removed
 
@@ -127,6 +128,7 @@ No changelog was kept for this release.
 
 [@Ralith]: https://github.com/Ralith
 [@DJMcNab]: https://github.com/DJMcNab
+[@tomcur]: https://github.com/tomcur
 [@valadaptive]: https://github.com/valadaptive
 [@LaurenzV]: https://github.com/LaurenzV
 [@Shnatsel]: https://github.com/Shnatsel
@@ -168,6 +170,7 @@ No changelog was kept for this release.
 [#170]: https://github.com/linebender/fearless_simd/pull/170
 [#176]: https://github.com/linebender/fearless_simd/pull/176
 [#180]: https://github.com/linebender/fearless_simd/pull/180
+[#186]: https://github.com/linebender/fearless_simd/pull/186
 
 [Unreleased]: https://github.com/linebender/fearless_simd/compare/v0.3.0...HEAD
 [0.3.0]: https://github.com/linebender/fearless_simd/compare/v0.3.0...v0.2.0
 
@@ -6,7 +6,6 @@
     clippy::cast_possible_truncation,
     clippy::unseparated_literal_suffix,
     clippy::use_self,
-    trivial_numeric_casts,
     reason = "TODO: https://github.com/linebender/fearless_simd/issues/40"
 )]
 #![cfg_attr(
@@ -15,7 +14,6 @@
         clippy::missing_transmute_annotations,
         clippy::useless_transmute,
         clippy::new_without_default,
-        clippy::unnecessary_cast,
         reason = "TODO: https://github.com/linebender/fearless_simd/issues/40"
     )
 )]
@@ -25,7 +23,6 @@
         clippy::missing_transmute_annotations,
         clippy::useless_transmute,
         clippy::new_without_default,
-        clippy::unnecessary_cast,
         reason = "TODO: https://github.com/linebender/fearless_simd/issues/40"
     )
 )]
@@ -39,7 +36,6 @@
         clippy::missing_transmute_annotations,
         clippy::useless_transmute,
         clippy::new_without_default,
-        clippy::unnecessary_cast,
         reason = "TODO: https://github.com/linebender/fearless_simd/issues/40"
     )
 )]
 
@@ -943,15 +943,15 @@ impl Simd for Avx2 {
     }
     #[inline(always)]
     fn shl_i16x8(self, a: i16x8<Self>, shift: u32) -> i16x8<Self> {
-        unsafe { _mm_sll_epi16(a.into(), _mm_cvtsi32_si128(shift as _)).simd_into(self) }
+        unsafe { _mm_sll_epi16(a.into(), _mm_cvtsi32_si128(shift.cast_signed())).simd_into(self) }
     }
     #[inline(always)]
     fn shlv_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
         core::array::from_fn(|i| core::ops::Shl::shl(a[i], b[i])).simd_into(self)
     }
     #[inline(always)]
     fn shr_i16x8(self, a: i16x8<Self>, shift: u32) -> i16x8<Self> {
-        unsafe { _mm_sra_epi16(a.into(), _mm_cvtsi32_si128(shift as _)).simd_into(self) }
+        unsafe { _mm_sra_epi16(a.into(), _mm_cvtsi32_si128(shift.cast_signed())).simd_into(self) }
     }
     #[inline(always)]
     fn shrv_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
@@ -1113,15 +1113,15 @@ impl Simd for Avx2 {
     }
     #[inline(always)]
     fn shl_u16x8(self, a: u16x8<Self>, shift: u32) -> u16x8<Self> {
-        unsafe { _mm_sll_epi16(a.into(), _mm_cvtsi32_si128(shift as _)).simd_into(self) }
+        unsafe { _mm_sll_epi16(a.into(), _mm_cvtsi32_si128(shift.cast_signed())).simd_into(self) }
     }
     #[inline(always)]
     fn shlv_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
         core::array::from_fn(|i| core::ops::Shl::shl(a[i], b[i])).simd_into(self)
     }
     #[inline(always)]
     fn shr_u16x8(self, a: u16x8<Self>, shift: u32) -> u16x8<Self> {
-        unsafe { _mm_srl_epi16(a.into(), _mm_cvtsi32_si128(shift as _)).simd_into(self) }
+        unsafe { _mm_srl_epi16(a.into(), _mm_cvtsi32_si128(shift.cast_signed())).simd_into(self) }
     }
     #[inline(always)]
     fn shrv_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
@@ -1390,15 +1390,15 @@ impl Simd for Avx2 {
     }
     #[inline(always)]
     fn shl_i32x4(self, a: i32x4<Self>, shift: u32) -> i32x4<Self> {
-        unsafe { _mm_sll_epi32(a.into(), _mm_cvtsi32_si128(shift as _)).simd_into(self) }
+        unsafe { _mm_sll_epi32(a.into(), _mm_cvtsi32_si128(shift.cast_signed())).simd_into(self) }
     }
     #[inline(always)]
     fn shlv_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
         unsafe { _mm_sllv_epi32(a.into(), b.into()).simd_into(self) }
     }
     #[inline(always)]
     fn shr_i32x4(self, a: i32x4<Self>, shift: u32) -> i32x4<Self> {
-        unsafe { _mm_sra_epi32(a.into(), _mm_cvtsi32_si128(shift as _)).simd_into(self) }
+        unsafe { _mm_sra_epi32(a.into(), _mm_cvtsi32_si128(shift.cast_signed())).simd_into(self) }
     }
     #[inline(always)]
     fn shrv_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
@@ -1562,15 +1562,15 @@ impl Simd for Avx2 {
     }
     #[inline(always)]
     fn shl_u32x4(self, a: u32x4<Self>, shift: u32) -> u32x4<Self> {
-        unsafe { _mm_sll_epi32(a.into(), _mm_cvtsi32_si128(shift as _)).simd_into(self) }
+        unsafe { _mm_sll_epi32(a.into(), _mm_cvtsi32_si128(shift.cast_signed())).simd_into(self) }
     }
     #[inline(always)]
     fn shlv_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
         unsafe { _mm_sllv_epi32(a.into(), b.into()).simd_into(self) }
     }
     #[inline(always)]
     fn shr_u32x4(self, a: u32x4<Self>, shift: u32) -> u32x4<Self> {
-        unsafe { _mm_srl_epi32(a.into(), _mm_cvtsi32_si128(shift as _)).simd_into(self) }
+        unsafe { _mm_srl_epi32(a.into(), _mm_cvtsi32_si128(shift.cast_signed())).simd_into(self) }
     }
     #[inline(always)]
     fn shrv_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
@@ -3049,15 +3049,19 @@ impl Simd for Avx2 {
     }
     #[inline(always)]
     fn shl_i16x16(self, a: i16x16<Self>, shift: u32) -> i16x16<Self> {
-        unsafe { _mm256_sll_epi16(a.into(), _mm_cvtsi32_si128(shift as _)).simd_into(self) }
+        unsafe {
+            _mm256_sll_epi16(a.into(), _mm_cvtsi32_si128(shift.cast_signed())).simd_into(self)
+        }
     }
     #[inline(always)]
     fn shlv_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
         core::array::from_fn(|i| core::ops::Shl::shl(a[i], b[i])).simd_into(self)
     }
     #[inline(always)]
     fn shr_i16x16(self, a: i16x16<Self>, shift: u32) -> i16x16<Self> {
-        unsafe { _mm256_sra_epi16(a.into(), _mm_cvtsi32_si128(shift as _)).simd_into(self) }
+        unsafe {
+            _mm256_sra_epi16(a.into(), _mm_cvtsi32_si128(shift.cast_signed())).simd_into(self)
+        }
     }
     #[inline(always)]
     fn shrv_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
@@ -3261,15 +3265,19 @@ impl Simd for Avx2 {
     }
     #[inline(always)]
     fn shl_u16x16(self, a: u16x16<Self>, shift: u32) -> u16x16<Self> {
-        unsafe { _mm256_sll_epi16(a.into(), _mm_cvtsi32_si128(shift as _)).simd_into(self) }
+        unsafe {
+            _mm256_sll_epi16(a.into(), _mm_cvtsi32_si128(shift.cast_signed())).simd_into(self)
+        }
     }
     #[inline(always)]
     fn shlv_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
         core::array::from_fn(|i| core::ops::Shl::shl(a[i], b[i])).simd_into(self)
     }
     #[inline(always)]
     fn shr_u16x16(self, a: u16x16<Self>, shift: u32) -> u16x16<Self> {
-        unsafe { _mm256_srl_epi16(a.into(), _mm_cvtsi32_si128(shift as _)).simd_into(self) }
+        unsafe {
+            _mm256_srl_epi16(a.into(), _mm_cvtsi32_si128(shift.cast_signed())).simd_into(self)
+        }
     }
     #[inline(always)]
     fn shrv_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
@@ -3608,15 +3616,19 @@ impl Simd for Avx2 {
     }
     #[inline(always)]
     fn shl_i32x8(self, a: i32x8<Self>, shift: u32) -> i32x8<Self> {
-        unsafe { _mm256_sll_epi32(a.into(), _mm_cvtsi32_si128(shift as _)).simd_into(self) }
+        unsafe {
+            _mm256_sll_epi32(a.into(), _mm_cvtsi32_si128(shift.cast_signed())).simd_into(self)
+        }
     }
     #[inline(always)]
     fn shlv_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
         unsafe { _mm256_sllv_epi32(a.into(), b.into()).simd_into(self) }
     }
     #[inline(always)]
     fn shr_i32x8(self, a: i32x8<Self>, shift: u32) -> i32x8<Self> {
-        unsafe { _mm256_sra_epi32(a.into(), _mm_cvtsi32_si128(shift as _)).simd_into(self) }
+        unsafe {
+            _mm256_sra_epi32(a.into(), _mm_cvtsi32_si128(shift.cast_signed())).simd_into(self)
+        }
     }
     #[inline(always)]
     fn shrv_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
@@ -3812,15 +3824,19 @@ impl Simd for Avx2 {
     }
     #[inline(always)]
     fn shl_u32x8(self, a: u32x8<Self>, shift: u32) -> u32x8<Self> {
-        unsafe { _mm256_sll_epi32(a.into(), _mm_cvtsi32_si128(shift as _)).simd_into(self) }
+        unsafe {
+            _mm256_sll_epi32(a.into(), _mm_cvtsi32_si128(shift.cast_signed())).simd_into(self)
+        }
     }
     #[inline(always)]
     fn shlv_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
         unsafe { _mm256_sllv_epi32(a.into(), b.into()).simd_into(self) }
     }
     #[inline(always)]
     fn shr_u32x8(self, a: u32x8<Self>, shift: u32) -> u32x8<Self> {
-        unsafe { _mm256_srl_epi32(a.into(), _mm_cvtsi32_si128(shift as _)).simd_into(self) }
+        unsafe {
+            _mm256_srl_epi32(a.into(), _mm_cvtsi32_si128(shift.cast_signed())).simd_into(self)
+        }
     }
     #[inline(always)]
     fn shrv_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
Original file line number	Diff line number	Diff line change
`@@ -943,15 +943,15 @@ impl Simd for Avx2 {`
`943`	`943`	`}`
`944`	`944`	`#[inline(always)]`
`945`	`945`	`fn shl_i16x8(self, a: i16x8<Self>, shift: u32) -> i16x8<Self> {`
`946`		`- unsafe { _mm_sll_epi16(a.into(), _mm_cvtsi32_si128(shift as _)).simd_into(self) }`
	`946`	`+ unsafe { _mm_sll_epi16(a.into(), _mm_cvtsi32_si128(shift.cast_signed())).simd_into(self) }`
`947`	`947`	`}`
`948`	`948`	`#[inline(always)]`
`949`	`949`	`fn shlv_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {`
`950`	`950`	`core::array::from_fn(\|i\| core::ops::Shl::shl(a[i], b[i])).simd_into(self)`
`951`	`951`	`}`
`952`	`952`	`#[inline(always)]`
`953`	`953`	`fn shr_i16x8(self, a: i16x8<Self>, shift: u32) -> i16x8<Self> {`
`954`		`- unsafe { _mm_sra_epi16(a.into(), _mm_cvtsi32_si128(shift as _)).simd_into(self) }`
	`954`	`+ unsafe { _mm_sra_epi16(a.into(), _mm_cvtsi32_si128(shift.cast_signed())).simd_into(self) }`
`955`	`955`	`}`
`956`	`956`	`#[inline(always)]`
`957`	`957`	`fn shrv_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {`
`@@ -1113,15 +1113,15 @@ impl Simd for Avx2 {`
`1113`	`1113`	`}`
`1114`	`1114`	`#[inline(always)]`
`1115`	`1115`	`fn shl_u16x8(self, a: u16x8<Self>, shift: u32) -> u16x8<Self> {`
`1116`		`- unsafe { _mm_sll_epi16(a.into(), _mm_cvtsi32_si128(shift as _)).simd_into(self) }`
	`1116`	`+ unsafe { _mm_sll_epi16(a.into(), _mm_cvtsi32_si128(shift.cast_signed())).simd_into(self) }`
`1117`	`1117`	`}`
`1118`	`1118`	`#[inline(always)]`
`1119`	`1119`	`fn shlv_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {`
`1120`	`1120`	`core::array::from_fn(\|i\| core::ops::Shl::shl(a[i], b[i])).simd_into(self)`
`1121`	`1121`	`}`
`1122`	`1122`	`#[inline(always)]`
`1123`	`1123`	`fn shr_u16x8(self, a: u16x8<Self>, shift: u32) -> u16x8<Self> {`
`1124`		`- unsafe { _mm_srl_epi16(a.into(), _mm_cvtsi32_si128(shift as _)).simd_into(self) }`
	`1124`	`+ unsafe { _mm_srl_epi16(a.into(), _mm_cvtsi32_si128(shift.cast_signed())).simd_into(self) }`
`1125`	`1125`	`}`
`1126`	`1126`	`#[inline(always)]`
`1127`	`1127`	`fn shrv_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {`
`@@ -1390,15 +1390,15 @@ impl Simd for Avx2 {`
`1390`	`1390`	`}`
`1391`	`1391`	`#[inline(always)]`
`1392`	`1392`	`fn shl_i32x4(self, a: i32x4<Self>, shift: u32) -> i32x4<Self> {`
`1393`		`- unsafe { _mm_sll_epi32(a.into(), _mm_cvtsi32_si128(shift as _)).simd_into(self) }`
	`1393`	`+ unsafe { _mm_sll_epi32(a.into(), _mm_cvtsi32_si128(shift.cast_signed())).simd_into(self) }`
`1394`	`1394`	`}`
`1395`	`1395`	`#[inline(always)]`
`1396`	`1396`	`fn shlv_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {`
`1397`	`1397`	`unsafe { _mm_sllv_epi32(a.into(), b.into()).simd_into(self) }`
`1398`	`1398`	`}`
`1399`	`1399`	`#[inline(always)]`
`1400`	`1400`	`fn shr_i32x4(self, a: i32x4<Self>, shift: u32) -> i32x4<Self> {`
`1401`		`- unsafe { _mm_sra_epi32(a.into(), _mm_cvtsi32_si128(shift as _)).simd_into(self) }`
	`1401`	`+ unsafe { _mm_sra_epi32(a.into(), _mm_cvtsi32_si128(shift.cast_signed())).simd_into(self) }`
`1402`	`1402`	`}`
`1403`	`1403`	`#[inline(always)]`
`1404`	`1404`	`fn shrv_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {`
`@@ -1562,15 +1562,15 @@ impl Simd for Avx2 {`
`1562`	`1562`	`}`
`1563`	`1563`	`#[inline(always)]`
`1564`	`1564`	`fn shl_u32x4(self, a: u32x4<Self>, shift: u32) -> u32x4<Self> {`
`1565`		`- unsafe { _mm_sll_epi32(a.into(), _mm_cvtsi32_si128(shift as _)).simd_into(self) }`
	`1565`	`+ unsafe { _mm_sll_epi32(a.into(), _mm_cvtsi32_si128(shift.cast_signed())).simd_into(self) }`
`1566`	`1566`	`}`
`1567`	`1567`	`#[inline(always)]`
`1568`	`1568`	`fn shlv_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {`
`1569`	`1569`	`unsafe { _mm_sllv_epi32(a.into(), b.into()).simd_into(self) }`
`1570`	`1570`	`}`
`1571`	`1571`	`#[inline(always)]`
`1572`	`1572`	`fn shr_u32x4(self, a: u32x4<Self>, shift: u32) -> u32x4<Self> {`
`1573`		`- unsafe { _mm_srl_epi32(a.into(), _mm_cvtsi32_si128(shift as _)).simd_into(self) }`
	`1573`	`+ unsafe { _mm_srl_epi32(a.into(), _mm_cvtsi32_si128(shift.cast_signed())).simd_into(self) }`
`1574`	`1574`	`}`
`1575`	`1575`	`#[inline(always)]`
`1576`	`1576`	`fn shrv_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {`
`@@ -3049,15 +3049,19 @@ impl Simd for Avx2 {`
`3049`	`3049`	`}`
`3050`	`3050`	`#[inline(always)]`
`3051`	`3051`	`fn shl_i16x16(self, a: i16x16<Self>, shift: u32) -> i16x16<Self> {`
`3052`		`- unsafe { _mm256_sll_epi16(a.into(), _mm_cvtsi32_si128(shift as _)).simd_into(self) }`
	`3052`	`+ unsafe {`
	`3053`	`+ _mm256_sll_epi16(a.into(), _mm_cvtsi32_si128(shift.cast_signed())).simd_into(self)`
	`3054`	`+ }`
`3053`	`3055`	`}`
`3054`	`3056`	`#[inline(always)]`
`3055`	`3057`	`fn shlv_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {`
`3056`	`3058`	`core::array::from_fn(\|i\| core::ops::Shl::shl(a[i], b[i])).simd_into(self)`
`3057`	`3059`	`}`
`3058`	`3060`	`#[inline(always)]`
`3059`	`3061`	`fn shr_i16x16(self, a: i16x16<Self>, shift: u32) -> i16x16<Self> {`
`3060`		`- unsafe { _mm256_sra_epi16(a.into(), _mm_cvtsi32_si128(shift as _)).simd_into(self) }`
	`3062`	`+ unsafe {`
	`3063`	`+ _mm256_sra_epi16(a.into(), _mm_cvtsi32_si128(shift.cast_signed())).simd_into(self)`
	`3064`	`+ }`
`3061`	`3065`	`}`
`3062`	`3066`	`#[inline(always)]`
`3063`	`3067`	`fn shrv_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {`
`@@ -3261,15 +3265,19 @@ impl Simd for Avx2 {`
`3261`	`3265`	`}`
`3262`	`3266`	`#[inline(always)]`
`3263`	`3267`	`fn shl_u16x16(self, a: u16x16<Self>, shift: u32) -> u16x16<Self> {`
`3264`		`- unsafe { _mm256_sll_epi16(a.into(), _mm_cvtsi32_si128(shift as _)).simd_into(self) }`
	`3268`	`+ unsafe {`
	`3269`	`+ _mm256_sll_epi16(a.into(), _mm_cvtsi32_si128(shift.cast_signed())).simd_into(self)`
	`3270`	`+ }`
`3265`	`3271`	`}`
`3266`	`3272`	`#[inline(always)]`
`3267`	`3273`	`fn shlv_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {`
`3268`	`3274`	`core::array::from_fn(\|i\| core::ops::Shl::shl(a[i], b[i])).simd_into(self)`
`3269`	`3275`	`}`
`3270`	`3276`	`#[inline(always)]`
`3271`	`3277`	`fn shr_u16x16(self, a: u16x16<Self>, shift: u32) -> u16x16<Self> {`
`3272`		`- unsafe { _mm256_srl_epi16(a.into(), _mm_cvtsi32_si128(shift as _)).simd_into(self) }`
	`3278`	`+ unsafe {`
	`3279`	`+ _mm256_srl_epi16(a.into(), _mm_cvtsi32_si128(shift.cast_signed())).simd_into(self)`
	`3280`	`+ }`
`3273`	`3281`	`}`
`3274`	`3282`	`#[inline(always)]`
`3275`	`3283`	`fn shrv_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {`
`@@ -3608,15 +3616,19 @@ impl Simd for Avx2 {`
`3608`	`3616`	`}`
`3609`	`3617`	`#[inline(always)]`
`3610`	`3618`	`fn shl_i32x8(self, a: i32x8<Self>, shift: u32) -> i32x8<Self> {`
`3611`		`- unsafe { _mm256_sll_epi32(a.into(), _mm_cvtsi32_si128(shift as _)).simd_into(self) }`
	`3619`	`+ unsafe {`
	`3620`	`+ _mm256_sll_epi32(a.into(), _mm_cvtsi32_si128(shift.cast_signed())).simd_into(self)`
	`3621`	`+ }`
`3612`	`3622`	`}`
`3613`	`3623`	`#[inline(always)]`
`3614`	`3624`	`fn shlv_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {`
`3615`	`3625`	`unsafe { _mm256_sllv_epi32(a.into(), b.into()).simd_into(self) }`
`3616`	`3626`	`}`
`3617`	`3627`	`#[inline(always)]`
`3618`	`3628`	`fn shr_i32x8(self, a: i32x8<Self>, shift: u32) -> i32x8<Self> {`
`3619`		`- unsafe { _mm256_sra_epi32(a.into(), _mm_cvtsi32_si128(shift as _)).simd_into(self) }`
	`3629`	`+ unsafe {`
	`3630`	`+ _mm256_sra_epi32(a.into(), _mm_cvtsi32_si128(shift.cast_signed())).simd_into(self)`
	`3631`	`+ }`
`3620`	`3632`	`}`
`3621`	`3633`	`#[inline(always)]`
`3622`	`3634`	`fn shrv_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {`
`@@ -3812,15 +3824,19 @@ impl Simd for Avx2 {`
`3812`	`3824`	`}`
`3813`	`3825`	`#[inline(always)]`
`3814`	`3826`	`fn shl_u32x8(self, a: u32x8<Self>, shift: u32) -> u32x8<Self> {`
`3815`		`- unsafe { _mm256_sll_epi32(a.into(), _mm_cvtsi32_si128(shift as _)).simd_into(self) }`
	`3827`	`+ unsafe {`
	`3828`	`+ _mm256_sll_epi32(a.into(), _mm_cvtsi32_si128(shift.cast_signed())).simd_into(self)`
	`3829`	`+ }`
`3816`	`3830`	`}`
`3817`	`3831`	`#[inline(always)]`
`3818`	`3832`	`fn shlv_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {`
`3819`	`3833`	`unsafe { _mm256_sllv_epi32(a.into(), b.into()).simd_into(self) }`
`3820`	`3834`	`}`
`3821`	`3835`	`#[inline(always)]`
`3822`	`3836`	`fn shr_u32x8(self, a: u32x8<Self>, shift: u32) -> u32x8<Self> {`
`3823`		`- unsafe { _mm256_srl_epi32(a.into(), _mm_cvtsi32_si128(shift as _)).simd_into(self) }`
	`3837`	`+ unsafe {`
	`3838`	`+ _mm256_srl_epi32(a.into(), _mm_cvtsi32_si128(shift.cast_signed())).simd_into(self)`
	`3839`	`+ }`
`3824`	`3840`	`}`
`3825`	`3841`	`#[inline(always)]`
`3826`	`3842`	`fn shrv_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {`