Skip to content

Commit 586b4d7

Browse files
authored
One more explicit shift to signed casting on x86 (#189)
Missed this in #186.
1 parent bf99e21 commit 586b4d7

File tree

4 files changed

+15
-14
lines changed

4 files changed

+15
-14
lines changed

CHANGELOG.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ This release has an [MSRV][] of 1.88.
6868
- Breaking change: the `Element` type on the `SimdBase` trait is now an associated type instead of a type parameter. This should make it more pleasant to write code that's generic over different vector types. ([#170][] by [@valadaptive][])
6969
- The `WasmSimd128` token type now wraps the new `crate::core_arch::wasm32::WasmSimd128` type. This doesn't expose any new functionality as WASM SIMD128 can only be enabled statically, but matches all the other backend tokens. ([#176][] by [@valadaptive][])
7070
- Breaking change: the `SimdFrom::simd_from` method now takes the SIMD token as the first argument instead of the second. This matches the argument order of the `from_slice`, `splat`, and `from_fn` methods on `SimdBase`. ([#180][] by [@valadaptive][])
71-
- Code generation has been improved for shift argument casting on x86 and for scalar fallback. ([#186][] by [@tomcur][])
71+
- Code generation has been improved for shift argument casting on x86 and for scalar fallback. ([#186][] and [#189][] by [@tomcur][])
7272

7373
### Removed
7474

@@ -172,6 +172,7 @@ No changelog was kept for this release.
172172
[#180]: https://github.com/linebender/fearless_simd/pull/180
173173
[#181]: https://github.com/linebender/fearless_simd/pull/181
174174
[#186]: https://github.com/linebender/fearless_simd/pull/186
175+
[#189]: https://github.com/linebender/fearless_simd/pull/189
175176

176177
[Unreleased]: https://github.com/linebender/fearless_simd/compare/v0.3.0...HEAD
177178
[0.3.0]: https://github.com/linebender/fearless_simd/compare/v0.3.0...v0.2.0

fearless_simd/src/generated/avx2.rs

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -469,7 +469,7 @@ impl Simd for Avx2 {
469469
fn shl_i8x16(self, a: i8x16<Self>, shift: u32) -> i8x16<Self> {
470470
unsafe {
471471
let val = a.into();
472-
let shift_count = _mm_cvtsi32_si128(shift as i32);
472+
let shift_count = _mm_cvtsi32_si128(shift.cast_signed());
473473
let lo_16 = _mm_unpacklo_epi8(val, _mm_cmpgt_epi8(_mm_setzero_si128(), val));
474474
let hi_16 = _mm_unpackhi_epi8(val, _mm_cmpgt_epi8(_mm_setzero_si128(), val));
475475
let lo_shifted = _mm_sll_epi16(lo_16, shift_count);
@@ -485,7 +485,7 @@ impl Simd for Avx2 {
485485
fn shr_i8x16(self, a: i8x16<Self>, shift: u32) -> i8x16<Self> {
486486
unsafe {
487487
let val = a.into();
488-
let shift_count = _mm_cvtsi32_si128(shift as i32);
488+
let shift_count = _mm_cvtsi32_si128(shift.cast_signed());
489489
let lo_16 = _mm_unpacklo_epi8(val, _mm_cmpgt_epi8(_mm_setzero_si128(), val));
490490
let hi_16 = _mm_unpackhi_epi8(val, _mm_cmpgt_epi8(_mm_setzero_si128(), val));
491491
let lo_shifted = _mm_sra_epi16(lo_16, shift_count);
@@ -670,7 +670,7 @@ impl Simd for Avx2 {
670670
fn shl_u8x16(self, a: u8x16<Self>, shift: u32) -> u8x16<Self> {
671671
unsafe {
672672
let val = a.into();
673-
let shift_count = _mm_cvtsi32_si128(shift as i32);
673+
let shift_count = _mm_cvtsi32_si128(shift.cast_signed());
674674
let lo_16 = _mm_unpacklo_epi8(val, _mm_setzero_si128());
675675
let hi_16 = _mm_unpackhi_epi8(val, _mm_setzero_si128());
676676
let lo_shifted = _mm_sll_epi16(lo_16, shift_count);
@@ -686,7 +686,7 @@ impl Simd for Avx2 {
686686
fn shr_u8x16(self, a: u8x16<Self>, shift: u32) -> u8x16<Self> {
687687
unsafe {
688688
let val = a.into();
689-
let shift_count = _mm_cvtsi32_si128(shift as i32);
689+
let shift_count = _mm_cvtsi32_si128(shift.cast_signed());
690690
let lo_16 = _mm_unpacklo_epi8(val, _mm_setzero_si128());
691691
let hi_16 = _mm_unpackhi_epi8(val, _mm_setzero_si128());
692692
let lo_shifted = _mm_srl_epi16(lo_16, shift_count);
@@ -2544,7 +2544,7 @@ impl Simd for Avx2 {
25442544
fn shl_i8x32(self, a: i8x32<Self>, shift: u32) -> i8x32<Self> {
25452545
unsafe {
25462546
let val = a.into();
2547-
let shift_count = _mm_cvtsi32_si128(shift as i32);
2547+
let shift_count = _mm_cvtsi32_si128(shift.cast_signed());
25482548
let lo_16 = _mm256_unpacklo_epi8(val, _mm256_cmpgt_epi8(_mm256_setzero_si256(), val));
25492549
let hi_16 = _mm256_unpackhi_epi8(val, _mm256_cmpgt_epi8(_mm256_setzero_si256(), val));
25502550
let lo_shifted = _mm256_sll_epi16(lo_16, shift_count);
@@ -2560,7 +2560,7 @@ impl Simd for Avx2 {
25602560
fn shr_i8x32(self, a: i8x32<Self>, shift: u32) -> i8x32<Self> {
25612561
unsafe {
25622562
let val = a.into();
2563-
let shift_count = _mm_cvtsi32_si128(shift as i32);
2563+
let shift_count = _mm_cvtsi32_si128(shift.cast_signed());
25642564
let lo_16 = _mm256_unpacklo_epi8(val, _mm256_cmpgt_epi8(_mm256_setzero_si256(), val));
25652565
let hi_16 = _mm256_unpackhi_epi8(val, _mm256_cmpgt_epi8(_mm256_setzero_si256(), val));
25662566
let lo_shifted = _mm256_sra_epi16(lo_16, shift_count);
@@ -2781,7 +2781,7 @@ impl Simd for Avx2 {
27812781
fn shl_u8x32(self, a: u8x32<Self>, shift: u32) -> u8x32<Self> {
27822782
unsafe {
27832783
let val = a.into();
2784-
let shift_count = _mm_cvtsi32_si128(shift as i32);
2784+
let shift_count = _mm_cvtsi32_si128(shift.cast_signed());
27852785
let lo_16 = _mm256_unpacklo_epi8(val, _mm256_setzero_si256());
27862786
let hi_16 = _mm256_unpackhi_epi8(val, _mm256_setzero_si256());
27872787
let lo_shifted = _mm256_sll_epi16(lo_16, shift_count);
@@ -2797,7 +2797,7 @@ impl Simd for Avx2 {
27972797
fn shr_u8x32(self, a: u8x32<Self>, shift: u32) -> u8x32<Self> {
27982798
unsafe {
27992799
let val = a.into();
2800-
let shift_count = _mm_cvtsi32_si128(shift as i32);
2800+
let shift_count = _mm_cvtsi32_si128(shift.cast_signed());
28012801
let lo_16 = _mm256_unpacklo_epi8(val, _mm256_setzero_si256());
28022802
let hi_16 = _mm256_unpackhi_epi8(val, _mm256_setzero_si256());
28032803
let lo_shifted = _mm256_srl_epi16(lo_16, shift_count);

fearless_simd/src/generated/sse4_2.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -477,7 +477,7 @@ impl Simd for Sse4_2 {
477477
fn shl_i8x16(self, a: i8x16<Self>, shift: u32) -> i8x16<Self> {
478478
unsafe {
479479
let val = a.into();
480-
let shift_count = _mm_cvtsi32_si128(shift as i32);
480+
let shift_count = _mm_cvtsi32_si128(shift.cast_signed());
481481
let lo_16 = _mm_unpacklo_epi8(val, _mm_cmpgt_epi8(_mm_setzero_si128(), val));
482482
let hi_16 = _mm_unpackhi_epi8(val, _mm_cmpgt_epi8(_mm_setzero_si128(), val));
483483
let lo_shifted = _mm_sll_epi16(lo_16, shift_count);
@@ -493,7 +493,7 @@ impl Simd for Sse4_2 {
493493
fn shr_i8x16(self, a: i8x16<Self>, shift: u32) -> i8x16<Self> {
494494
unsafe {
495495
let val = a.into();
496-
let shift_count = _mm_cvtsi32_si128(shift as i32);
496+
let shift_count = _mm_cvtsi32_si128(shift.cast_signed());
497497
let lo_16 = _mm_unpacklo_epi8(val, _mm_cmpgt_epi8(_mm_setzero_si128(), val));
498498
let hi_16 = _mm_unpackhi_epi8(val, _mm_cmpgt_epi8(_mm_setzero_si128(), val));
499499
let lo_shifted = _mm_sra_epi16(lo_16, shift_count);
@@ -681,7 +681,7 @@ impl Simd for Sse4_2 {
681681
fn shl_u8x16(self, a: u8x16<Self>, shift: u32) -> u8x16<Self> {
682682
unsafe {
683683
let val = a.into();
684-
let shift_count = _mm_cvtsi32_si128(shift as i32);
684+
let shift_count = _mm_cvtsi32_si128(shift.cast_signed());
685685
let lo_16 = _mm_unpacklo_epi8(val, _mm_setzero_si128());
686686
let hi_16 = _mm_unpackhi_epi8(val, _mm_setzero_si128());
687687
let lo_shifted = _mm_sll_epi16(lo_16, shift_count);
@@ -697,7 +697,7 @@ impl Simd for Sse4_2 {
697697
fn shr_u8x16(self, a: u8x16<Self>, shift: u32) -> u8x16<Self> {
698698
unsafe {
699699
let val = a.into();
700-
let shift_count = _mm_cvtsi32_si128(shift as i32);
700+
let shift_count = _mm_cvtsi32_si128(shift.cast_signed());
701701
let lo_16 = _mm_unpacklo_epi8(val, _mm_setzero_si128());
702702
let hi_16 = _mm_unpackhi_epi8(val, _mm_setzero_si128());
703703
let lo_shifted = _mm_srl_epi16(lo_16, shift_count);

fearless_simd_gen/src/mk_x86.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -568,7 +568,7 @@ impl X86 {
568568
#method_sig {
569569
unsafe {
570570
let val = a.into();
571-
let shift_count = _mm_cvtsi32_si128(shift as i32);
571+
let shift_count = _mm_cvtsi32_si128(shift.cast_signed());
572572

573573
let lo_16 = #extend_intrinsic_lo;
574574
let hi_16 = #extend_intrinsic_hi;

0 commit comments

Comments
 (0)