diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5a13857265..49a893a91f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -624,6 +624,11 @@ jobs: run: | set -eo pipefail sudo apt install -qq llvm + # The codegen tests evaluate assembly for multiple target architectures. + # These tests assume that `rustup` is installed on the host. We must + # explicitly add the targets before compilation, because `rustc` aborts + # when commanded to emit assembly for a target that has not been added. + rustup target add --toolchain $(./cargo.sh --version nightly) thumbv7m-none-eabi riscv32imc-unknown-none-elf ./cargo.sh +nightly install --quiet cargo-show-asm RUSTFLAGS="$RUSTFLAGS -Awarnings" ./cargo.sh +nightly test \ --package zerocopy \ diff --git a/benches/formats/coco_dynamic_padding.rs b/benches/formats/coco_dynamic_padding.rs index 9a938e1a9c..78b1e7a768 100644 --- a/benches/formats/coco_dynamic_padding.rs +++ b/benches/formats/coco_dynamic_padding.rs @@ -1,4 +1,4 @@ -use zerocopy_derive::*; +use zerocopy::*; // The only valid value of this type are the bytes `0xC0C0`. #[derive(TryFromBytes, KnownLayout, Immutable)] diff --git a/benches/formats/coco_dynamic_size.rs b/benches/formats/coco_dynamic_size.rs index f4626cf949..57dd5ed625 100644 --- a/benches/formats/coco_dynamic_size.rs +++ b/benches/formats/coco_dynamic_size.rs @@ -1,4 +1,4 @@ -use zerocopy_derive::*; +use zerocopy::*; // The only valid value of this type are the bytes `0xC0C0`. #[derive(TryFromBytes, KnownLayout, Immutable)] diff --git a/benches/formats/coco_static_size.rs b/benches/formats/coco_static_size.rs index 6d37cfd798..e8ce5200a5 100644 --- a/benches/formats/coco_static_size.rs +++ b/benches/formats/coco_static_size.rs @@ -1,4 +1,4 @@ -use zerocopy_derive::*; +use zerocopy::*; // The only valid value of this type are the bytes `0xC0C0`. #[derive(TryFromBytes, KnownLayout, Immutable)] diff --git a/benches/read_from_bytes.cortex-m3 b/benches/read_from_bytes.cortex-m3 new file mode 100644 index 0000000000..6bdd43d443 --- /dev/null +++ b/benches/read_from_bytes.cortex-m3 @@ -0,0 +1,16 @@ +bench_read_from_bytes_static_size: + push {r7, lr} + mov r7, sp + cmp r2, #6 + bne .LBB0_2 + ldrh r2, [r1] + ldr.w r1, [r1, #2] + strh r2, [r0, #2] + str r1, [r0, #4] + movs r1, #1 + strh r1, [r0] + pop {r7, pc} +.LBB0_2: + movs r1, #0 + strh r1, [r0] + pop {r7, pc} diff --git a/benches/read_from_bytes.generic-rv32 b/benches/read_from_bytes.generic-rv32 new file mode 100644 index 0000000000..92eec56a81 --- /dev/null +++ b/benches/read_from_bytes.generic-rv32 @@ -0,0 +1,26 @@ +bench_read_from_bytes_static_size: + li a3, 6 + bne a2, a3, .LBB0_2 + lbu a6, 4(a1) + lbu a3, 5(a1) + lbu a4, 1(a1) + lbu a5, 0(a1) + lbu a2, 2(a1) + lbu a1, 3(a1) + slli a4, a4, 8 + or a4, a4, a5 + slli a6, a6, 16 + slli a3, a3, 24 + or a3, a3, a6 + slli a1, a1, 8 + or a1, a1, a2 + srli a3, a3, 16 + sh a4, 2(a0) + sh a1, 4(a0) + sh a3, 6(a0) + li a1, 1 + sh a1, 0(a0) + ret +.LBB0_2: + sh zero, 0(a0) + ret diff --git a/benches/read_from_bytes.x86-64 b/benches/read_from_bytes.x86-64 index 9082d79f1f..221b6485e5 100644 --- a/benches/read_from_bytes.x86-64 +++ b/benches/read_from_bytes.x86-64 @@ -1,12 +1,12 @@ bench_read_from_bytes_static_size: mov rcx, rsi cmp rsi, 6 - jne .LBB5_2 + jne .LBB0_2 mov eax, dword ptr [rdi] movzx ecx, word ptr [rdi + 4] shl rcx, 32 or rcx, rax -.LBB5_2: +.LBB0_2: shl rcx, 16 inc rcx xor eax, eax diff --git a/benches/read_from_bytes.x86-64.mca b/benches/read_from_bytes.x86-64.mca index 77e787c190..da362bb906 100644 --- a/benches/read_from_bytes.x86-64.mca +++ b/benches/read_from_bytes.x86-64.mca @@ -20,7 +20,7 @@ Instruction Info: [1] [2] [3] [4] [5] [6] Instructions: 1 1 0.33 mov rcx, rsi 1 1 0.33 cmp rsi, 6 - 1 1 1.00 jne .LBB5_2 + 1 1 1.00 jne .LBB0_2 1 5 0.50 * mov eax, dword ptr [rdi] 1 5 0.50 * movzx ecx, word ptr [rdi + 4] 1 1 0.50 shl rcx, 32 @@ -52,7 +52,7 @@ Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - 0.63 0.36 - 0.01 - - mov rcx, rsi - - 0.05 0.05 - 0.90 - - cmp rsi, 6 - - - - - - 1.00 - - jne .LBB5_2 + - - - - - 1.00 - - jne .LBB0_2 - - - - - - - 1.00 mov eax, dword ptr [rdi] - - - - - - 1.00 - movzx ecx, word ptr [rdi + 4] - - 0.97 - - 0.03 - - shl rcx, 32 diff --git a/benches/read_from_prefix.cortex-m3 b/benches/read_from_prefix.cortex-m3 new file mode 100644 index 0000000000..023943fc26 --- /dev/null +++ b/benches/read_from_prefix.cortex-m3 @@ -0,0 +1,16 @@ +bench_read_from_prefix_static_size: + push {r7, lr} + mov r7, sp + cmp r2, #6 + bhs .LBB1_2 + movs r1, #0 + strh r1, [r0] + pop {r7, pc} +.LBB1_2: + ldr r2, [r1] + ldrh r1, [r1, #4] + str.w r2, [r0, #2] + strh r1, [r0, #6] + movs r1, #1 + strh r1, [r0] + pop {r7, pc} diff --git a/benches/read_from_prefix.generic-rv32 b/benches/read_from_prefix.generic-rv32 new file mode 100644 index 0000000000..0b14b8d061 --- /dev/null +++ b/benches/read_from_prefix.generic-rv32 @@ -0,0 +1,26 @@ +bench_read_from_prefix_static_size: + li a3, 6 + bgeu a2, a3, .LBB1_2 + sh zero, 0(a0) + ret +.LBB1_2: + lbu a6, 4(a1) + lbu a3, 5(a1) + lbu a4, 0(a1) + lbu a5, 1(a1) + lbu a2, 2(a1) + lbu a1, 3(a1) + slli a3, a3, 8 + or a3, a3, a6 + slli a2, a2, 16 + slli a1, a1, 24 + or a1, a1, a2 + slli a5, a5, 8 + or a4, a4, a5 + srli a1, a1, 16 + sh a4, 2(a0) + sh a1, 4(a0) + sh a3, 6(a0) + li a1, 1 + sh a1, 0(a0) + ret diff --git a/benches/read_from_prefix.x86-64 b/benches/read_from_prefix.x86-64 index c75b06c0c2..905938d80f 100644 --- a/benches/read_from_prefix.x86-64 +++ b/benches/read_from_prefix.x86-64 @@ -1,11 +1,11 @@ bench_read_from_prefix_static_size: cmp rsi, 5 - jbe .LBB5_2 + jbe .LBB1_2 mov eax, dword ptr [rdi] movzx edi, word ptr [rdi + 4] shl rdi, 32 or rdi, rax -.LBB5_2: +.LBB1_2: shl rdi, 16 inc rdi xor eax, eax diff --git a/benches/read_from_prefix.x86-64.mca b/benches/read_from_prefix.x86-64.mca index 04e76cdd07..d9bc499424 100644 --- a/benches/read_from_prefix.x86-64.mca +++ b/benches/read_from_prefix.x86-64.mca @@ -19,7 +19,7 @@ Instruction Info: [1] [2] [3] [4] [5] [6] Instructions: 1 1 0.33 cmp rsi, 5 - 1 1 1.00 jbe .LBB5_2 + 1 1 1.00 jbe .LBB1_2 1 5 0.50 * mov eax, dword ptr [rdi] 1 5 0.50 * movzx edi, word ptr [rdi + 4] 1 1 0.50 shl rdi, 32 @@ -50,7 +50,7 @@ Resource pressure per iteration: Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - 0.05 0.94 - 0.01 - - cmp rsi, 5 - - - - - - 1.00 - - jbe .LBB5_2 + - - - - - 1.00 - - jbe .LBB1_2 - - - - - - - 1.00 mov eax, dword ptr [rdi] - - - - - - 1.00 - movzx edi, word ptr [rdi + 4] - - 0.71 - - 0.29 - - shl rdi, 32 diff --git a/benches/read_from_suffix.cortex-m3 b/benches/read_from_suffix.cortex-m3 new file mode 100644 index 0000000000..afdd1214a2 --- /dev/null +++ b/benches/read_from_suffix.cortex-m3 @@ -0,0 +1,17 @@ +bench_read_from_suffix_static_size: + push {r7, lr} + mov r7, sp + cmp r2, #6 + bhs .LBB2_2 + movs r1, #0 + strh r1, [r0] + pop {r7, pc} +.LBB2_2: + add r1, r2 + ldr r2, [r1, #-6] + ldrh r1, [r1, #-2] + str.w r2, [r0, #2] + strh r1, [r0, #6] + movs r1, #1 + strh r1, [r0] + pop {r7, pc} diff --git a/benches/read_from_suffix.generic-rv32 b/benches/read_from_suffix.generic-rv32 new file mode 100644 index 0000000000..20631840d5 --- /dev/null +++ b/benches/read_from_suffix.generic-rv32 @@ -0,0 +1,27 @@ +bench_read_from_suffix_static_size: + li a3, 6 + bgeu a2, a3, .LBB2_2 + sh zero, 0(a0) + ret +.LBB2_2: + add a1, a1, a2 + lbu a6, -6(a1) + lbu a3, -5(a1) + lbu a4, -4(a1) + lbu a5, -3(a1) + lbu a2, -2(a1) + lbu a1, -1(a1) + slli a4, a4, 16 + slli a5, a5, 24 + or a4, a4, a5 + slli a3, a3, 8 + or a3, a3, a6 + slli a1, a1, 8 + or a1, a1, a2 + srli a4, a4, 16 + sh a3, 2(a0) + sh a4, 4(a0) + sh a1, 6(a0) + li a1, 1 + sh a1, 0(a0) + ret diff --git a/benches/read_from_suffix.x86-64 b/benches/read_from_suffix.x86-64 index 5cff2a0e2f..330e35d1a3 100644 --- a/benches/read_from_suffix.x86-64 +++ b/benches/read_from_suffix.x86-64 @@ -1,12 +1,12 @@ bench_read_from_suffix_static_size: mov rcx, rsi cmp rsi, 6 - jb .LBB5_2 + jb .LBB2_2 mov eax, dword ptr [rdi + rsi - 6] movzx ecx, word ptr [rdi + rsi - 2] shl rcx, 32 or rcx, rax -.LBB5_2: +.LBB2_2: shl rcx, 16 inc rcx xor eax, eax diff --git a/benches/read_from_suffix.x86-64.mca b/benches/read_from_suffix.x86-64.mca index 0107de8956..64b56bcc10 100644 --- a/benches/read_from_suffix.x86-64.mca +++ b/benches/read_from_suffix.x86-64.mca @@ -20,7 +20,7 @@ Instruction Info: [1] [2] [3] [4] [5] [6] Instructions: 1 1 0.33 mov rcx, rsi 1 1 0.33 cmp rsi, 6 - 1 1 1.00 jb .LBB5_2 + 1 1 1.00 jb .LBB2_2 1 5 0.50 * mov eax, dword ptr [rdi + rsi - 6] 1 5 0.50 * movzx ecx, word ptr [rdi + rsi - 2] 1 1 0.50 shl rcx, 32 @@ -52,7 +52,7 @@ Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - 0.63 0.36 - 0.01 - - mov rcx, rsi - - 0.05 0.05 - 0.90 - - cmp rsi, 6 - - - - - - 1.00 - - jb .LBB5_2 + - - - - - 1.00 - - jb .LBB2_2 - - - - - - - 1.00 mov eax, dword ptr [rdi + rsi - 6] - - - - - - 1.00 - movzx ecx, word ptr [rdi + rsi - 2] - - 0.97 - - 0.03 - - shl rcx, 32 diff --git a/benches/ref_from_bytes_dynamic_padding.cortex-m3 b/benches/ref_from_bytes_dynamic_padding.cortex-m3 new file mode 100644 index 0000000000..22be110ab0 --- /dev/null +++ b/benches/ref_from_bytes_dynamic_padding.cortex-m3 @@ -0,0 +1,26 @@ +bench_ref_from_bytes_dynamic_padding: + push {r7, lr} + mov r7, sp + lsls r2, r0, #30 + bne .LBB3_3 + movw r2, #65532 + movt r2, #32767 + ands r2, r1 + cmp r2, #9 + blo .LBB3_3 + movw r3, #43691 + subs r2, #9 + movt r3, #43690 + umull r2, r3, r2, r3 + lsrs r2, r3, #1 + add.w r3, r2, r2, lsl #1 + orr r3, r3, #3 + adds r3, #9 + cmp r1, r3 + beq .LBB3_4 +.LBB3_3: + movs r0, #0 + mov r2, r1 +.LBB3_4: + mov r1, r2 + pop {r7, pc} diff --git a/benches/ref_from_bytes_dynamic_padding.generic-rv32 b/benches/ref_from_bytes_dynamic_padding.generic-rv32 new file mode 100644 index 0000000000..89dfa1494b --- /dev/null +++ b/benches/ref_from_bytes_dynamic_padding.generic-rv32 @@ -0,0 +1,24 @@ +bench_ref_from_bytes_dynamic_padding: + andi a2, a0, 3 + bnez a2, .LBB3_3 + lui a2, 524288 + addi a2, a2, -4 + and a2, a2, a1 + li a3, 9 + bltu a2, a3, .LBB3_3 + addi a2, a2, -9 + lui a3, 699051 + addi a3, a3, -1365 + mulhu a3, a2, a3 + srli a2, a3, 1 + andi a3, a3, -2 + add a3, a3, a2 + ori a3, a3, 3 + addi a3, a3, 9 + beq a1, a3, .LBB3_4 +.LBB3_3: + li a0, 0 + mv a2, a1 +.LBB3_4: + mv a1, a2 + ret diff --git a/benches/ref_from_bytes_dynamic_padding.x86-64 b/benches/ref_from_bytes_dynamic_padding.x86-64 index e844a4608f..b0c5e71c40 100644 --- a/benches/ref_from_bytes_dynamic_padding.x86-64 +++ b/benches/ref_from_bytes_dynamic_padding.x86-64 @@ -1,10 +1,10 @@ bench_ref_from_bytes_dynamic_padding: test dil, 3 - jne .LBB5_3 + jne .LBB3_3 movabs rax, 9223372036854775804 and rax, rsi cmp rax, 9 - jb .LBB5_3 + jb .LBB3_3 add rax, -9 movabs rcx, -6148914691236517205 mul rcx @@ -13,10 +13,10 @@ bench_ref_from_bytes_dynamic_padding: or rax, 3 add rax, 9 cmp rsi, rax - je .LBB5_4 -.LBB5_3: + je .LBB3_4 +.LBB3_3: xor edi, edi mov rdx, rsi -.LBB5_4: +.LBB3_4: mov rax, rdi ret diff --git a/benches/ref_from_bytes_dynamic_padding.x86-64.mca b/benches/ref_from_bytes_dynamic_padding.x86-64.mca index 423ed38ba2..ddddc28aa6 100644 --- a/benches/ref_from_bytes_dynamic_padding.x86-64.mca +++ b/benches/ref_from_bytes_dynamic_padding.x86-64.mca @@ -19,11 +19,11 @@ Instruction Info: [1] [2] [3] [4] [5] [6] Instructions: 1 1 0.33 test dil, 3 - 1 1 1.00 jne .LBB5_3 + 1 1 1.00 jne .LBB3_3 1 1 0.33 movabs rax, 9223372036854775804 1 1 0.33 and rax, rsi 1 1 0.33 cmp rax, 9 - 1 1 1.00 jb .LBB5_3 + 1 1 1.00 jb .LBB3_3 1 1 0.33 add rax, -9 1 1 0.33 movabs rcx, -6148914691236517205 2 4 1.00 mul rcx @@ -32,7 +32,7 @@ Instruction Info: 1 1 0.33 or rax, 3 1 1 0.33 add rax, 9 1 1 0.33 cmp rsi, rax - 1 1 1.00 je .LBB5_4 + 1 1 1.00 je .LBB3_4 1 0 0.25 xor edi, edi 1 1 0.33 mov rdx, rsi 1 1 0.33 mov rax, rdi @@ -57,11 +57,11 @@ Resource pressure per iteration: Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - 0.64 0.35 - 0.01 - - test dil, 3 - - - - - - 1.00 - - jne .LBB5_3 + - - - - - 1.00 - - jne .LBB3_3 - - 0.34 0.65 - 0.01 - - movabs rax, 9223372036854775804 - - 0.35 0.65 - - - - and rax, rsi - - 0.33 0.34 - 0.33 - - cmp rax, 9 - - - - - - 1.00 - - jb .LBB5_3 + - - - - - 1.00 - - jb .LBB3_3 - - 0.35 - - 0.65 - - add rax, -9 - - 0.97 0.01 - 0.02 - - movabs rcx, -6148914691236517205 - - 1.00 1.00 - - - - mul rcx @@ -70,7 +70,7 @@ Resource pressure by instruction: - - 0.34 0.66 - - - - or rax, 3 - - 0.33 0.66 - 0.01 - - add rax, 9 - - 0.01 0.99 - - - - cmp rsi, rax - - - - - - 1.00 - - je .LBB5_4 + - - - - - 1.00 - - je .LBB3_4 - - - - - - - - xor edi, edi - - 0.32 0.01 - 0.67 - - mov rdx, rsi - - 0.02 0.34 - 0.64 - - mov rax, rdi diff --git a/benches/ref_from_bytes_dynamic_size.cortex-m3 b/benches/ref_from_bytes_dynamic_size.cortex-m3 new file mode 100644 index 0000000000..0cf2006db4 --- /dev/null +++ b/benches/ref_from_bytes_dynamic_size.cortex-m3 @@ -0,0 +1,19 @@ +bench_ref_from_bytes_dynamic_size: + push {r7, lr} + mov r7, sp + lsls r2, r0, #31 + mov.w r2, #0 + bne .LBB4_3 + cmp r1, #4 + blo .LBB4_3 + subs r2, r1, #4 + bic r3, r2, #1 + adds r3, #4 + cmp r1, r3 + ite ne + movne r0, #0 + lsreq r1, r2, #1 + mov r2, r0 +.LBB4_3: + mov r0, r2 + pop {r7, pc} diff --git a/benches/ref_from_bytes_dynamic_size.generic-rv32 b/benches/ref_from_bytes_dynamic_size.generic-rv32 new file mode 100644 index 0000000000..ec4adfacd4 --- /dev/null +++ b/benches/ref_from_bytes_dynamic_size.generic-rv32 @@ -0,0 +1,19 @@ +bench_ref_from_bytes_dynamic_size: + andi a3, a0, 1 + li a2, 0 + bnez a3, .LBB4_5 + li a3, 4 + bltu a1, a3, .LBB4_5 + addi a3, a1, -4 + andi a2, a3, -2 + addi a4, a2, 4 + xor a2, a1, a4 + snez a2, a2 + bne a1, a4, .LBB4_4 + srli a1, a3, 1 +.LBB4_4: + addi a2, a2, -1 + and a2, a2, a0 +.LBB4_5: + mv a0, a2 + ret diff --git a/benches/ref_from_bytes_dynamic_size.x86-64 b/benches/ref_from_bytes_dynamic_size.x86-64 index cc905b76c0..c59979afd2 100644 --- a/benches/ref_from_bytes_dynamic_size.x86-64 +++ b/benches/ref_from_bytes_dynamic_size.x86-64 @@ -4,10 +4,10 @@ bench_ref_from_bytes_dynamic_size: setb al or al, dil test al, 1 - je .LBB5_2 + je .LBB4_2 xor eax, eax ret -.LBB5_2: +.LBB4_2: lea rcx, [rdx - 4] mov rsi, rcx and rsi, -2 diff --git a/benches/ref_from_bytes_dynamic_size.x86-64.mca b/benches/ref_from_bytes_dynamic_size.x86-64.mca index 68aea583e4..ff815f692b 100644 --- a/benches/ref_from_bytes_dynamic_size.x86-64.mca +++ b/benches/ref_from_bytes_dynamic_size.x86-64.mca @@ -23,7 +23,7 @@ Instruction Info: 1 1 0.50 setb al 1 1 0.33 or al, dil 1 1 0.33 test al, 1 - 1 1 1.00 je .LBB5_2 + 1 1 1.00 je .LBB4_2 1 0 0.25 xor eax, eax 1 1 1.00 U ret 1 1 0.50 lea rcx, [rdx - 4] @@ -60,7 +60,7 @@ Resource pressure by instruction: - - 0.03 - - 0.97 - - setb al - - 0.01 0.02 - 0.97 - - or al, dil - - - 0.98 - 0.02 - - test al, 1 - - - - - - 1.00 - - je .LBB5_2 + - - - - - 1.00 - - je .LBB4_2 - - - - - - - - xor eax, eax - - - - - 1.00 - - ret - - 0.98 0.02 - - - - lea rcx, [rdx - 4] diff --git a/benches/ref_from_bytes_static_size.cortex-m3 b/benches/ref_from_bytes_static_size.cortex-m3 new file mode 100644 index 0000000000..c09718110c --- /dev/null +++ b/benches/ref_from_bytes_static_size.cortex-m3 @@ -0,0 +1,9 @@ +bench_ref_from_bytes_static_size: + push {r7, lr} + mov r7, sp + and r2, r0, #1 + eor r1, r1, #6 + orrs r1, r2 + it ne + movne r0, #0 + pop {r7, pc} diff --git a/benches/ref_from_bytes_static_size.generic-rv32 b/benches/ref_from_bytes_static_size.generic-rv32 new file mode 100644 index 0000000000..8284947e80 --- /dev/null +++ b/benches/ref_from_bytes_static_size.generic-rv32 @@ -0,0 +1,8 @@ +bench_ref_from_bytes_static_size: + andi a2, a0, 1 + xori a1, a1, 6 + or a1, a1, a2 + snez a1, a1 + addi a1, a1, -1 + and a0, a0, a1 + ret diff --git a/benches/ref_from_bytes_with_elems_dynamic_padding.cortex-m3 b/benches/ref_from_bytes_with_elems_dynamic_padding.cortex-m3 new file mode 100644 index 0000000000..8b0bfa9366 --- /dev/null +++ b/benches/ref_from_bytes_with_elems_dynamic_padding.cortex-m3 @@ -0,0 +1,27 @@ +bench_ref_from_bytes_with_elems_dynamic_padding: + push {r7, lr} + mov r7, sp + mov r12, r2 + movs r2, #3 + umull r3, r2, r12, r2 + cbnz r2, .LBB6_3 + cmn.w r3, #10 + bhi .LBB6_3 + movs r2, #3 + add.w lr, r3, #9 + bics r2, r3 + add.w r3, r2, lr + cmp r1, r3 + it eq + andseq r2, r0, #3 + beq .LBB6_4 +.LBB6_3: + movs r0, #0 + mov r12, r1 + mov r1, r12 + pop {r7, pc} +.LBB6_4: + cmp r3, lr + blo .LBB6_3 + mov r1, r12 + pop {r7, pc} diff --git a/benches/ref_from_bytes_with_elems_dynamic_padding.generic-rv32 b/benches/ref_from_bytes_with_elems_dynamic_padding.generic-rv32 new file mode 100644 index 0000000000..fd8d6143b2 --- /dev/null +++ b/benches/ref_from_bytes_with_elems_dynamic_padding.generic-rv32 @@ -0,0 +1,21 @@ +bench_ref_from_bytes_with_elems_dynamic_padding: + li a3, 3 + mulhu a3, a2, a3 + bnez a3, .LBB6_6 + slli a4, a2, 1 + add a4, a4, a2 + li a3, -10 + bltu a3, a4, .LBB6_6 + addi a3, a4, 9 + not a4, a4 + andi a4, a4, 3 + add a4, a4, a3 + bne a1, a4, .LBB6_6 + andi a5, a0, 3 + bnez a5, .LBB6_6 + bltu a4, a3, .LBB6_6 + mv a1, a2 + ret +.LBB6_6: + li a0, 0 + ret diff --git a/benches/ref_from_bytes_with_elems_dynamic_padding.x86-64 b/benches/ref_from_bytes_with_elems_dynamic_padding.x86-64 index ba9e1a2c78..1736796e7c 100644 --- a/benches/ref_from_bytes_with_elems_dynamic_padding.x86-64 +++ b/benches/ref_from_bytes_with_elems_dynamic_padding.x86-64 @@ -3,26 +3,26 @@ bench_ref_from_bytes_with_elems_dynamic_padding: mov edx, 3 mov rax, rcx mul rdx - jo .LBB5_5 + jo .LBB6_5 cmp rax, -10 - ja .LBB5_5 + ja .LBB6_5 mov edx, eax not edx and edx, 3 add rdx, rax add rdx, 9 cmp rsi, rdx - jne .LBB5_5 + jne .LBB6_5 mov r8d, edi and r8d, 3 - jne .LBB5_5 + jne .LBB6_5 add rax, 9 cmp rdx, rax - jb .LBB5_5 + jb .LBB6_5 mov rax, rdi mov rdx, rcx ret -.LBB5_5: +.LBB6_5: xor edi, edi mov rcx, rsi mov rax, rdi diff --git a/benches/ref_from_bytes_with_elems_dynamic_padding.x86-64.mca b/benches/ref_from_bytes_with_elems_dynamic_padding.x86-64.mca index 93696305cb..4b46867228 100644 --- a/benches/ref_from_bytes_with_elems_dynamic_padding.x86-64.mca +++ b/benches/ref_from_bytes_with_elems_dynamic_padding.x86-64.mca @@ -22,22 +22,22 @@ Instruction Info: 1 1 0.33 mov edx, 3 1 1 0.33 mov rax, rcx 2 4 1.00 mul rdx - 1 1 1.00 jo .LBB5_5 + 1 1 1.00 jo .LBB6_5 1 1 0.33 cmp rax, -10 - 1 1 1.00 ja .LBB5_5 + 1 1 1.00 ja .LBB6_5 1 1 0.33 mov edx, eax 1 1 0.33 not edx 1 1 0.33 and edx, 3 1 1 0.33 add rdx, rax 1 1 0.33 add rdx, 9 1 1 0.33 cmp rsi, rdx - 1 1 1.00 jne .LBB5_5 + 1 1 1.00 jne .LBB6_5 1 1 0.33 mov r8d, edi 1 1 0.33 and r8d, 3 - 1 1 1.00 jne .LBB5_5 + 1 1 1.00 jne .LBB6_5 1 1 0.33 add rax, 9 1 1 0.33 cmp rdx, rax - 1 1 1.00 jb .LBB5_5 + 1 1 1.00 jb .LBB6_5 1 1 0.33 mov rax, rdi 1 1 0.33 mov rdx, rcx 1 1 1.00 U ret @@ -69,22 +69,22 @@ Resource pressure by instruction: - - 0.17 0.83 - - - - mov edx, 3 - - 0.50 0.49 - 0.01 - - mov rax, rcx - - 1.00 1.00 - - - - mul rdx - - - - - - 1.00 - - jo .LBB5_5 + - - - - - 1.00 - - jo .LBB6_5 - - 0.82 0.18 - - - - cmp rax, -10 - - - - - - 1.00 - - ja .LBB5_5 + - - - - - 1.00 - - ja .LBB6_5 - - 0.02 0.98 - - - - mov edx, eax - - 0.82 0.02 - 0.16 - - not edx - - 0.82 0.17 - 0.01 - - and edx, 3 - - 0.99 - - 0.01 - - add rdx, rax - - 0.98 0.01 - 0.01 - - add rdx, 9 - - 1.00 - - - - - cmp rsi, rdx - - - - - - 1.00 - - jne .LBB5_5 + - - - - - 1.00 - - jne .LBB6_5 - - 0.16 0.83 - 0.01 - - mov r8d, edi - - 0.17 0.17 - 0.66 - - and r8d, 3 - - - - - - 1.00 - - jne .LBB5_5 + - - - - - 1.00 - - jne .LBB6_5 - - 0.02 0.98 - - - - add rax, 9 - - - 0.17 - 0.83 - - cmp rdx, rax - - - - - - 1.00 - - jb .LBB5_5 + - - - - - 1.00 - - jb .LBB6_5 - - 0.01 0.67 - 0.32 - - mov rax, rdi - - 0.02 0.98 - - - - mov rdx, rcx - - - - - 1.00 - - ret diff --git a/benches/ref_from_bytes_with_elems_dynamic_size.cortex-m3 b/benches/ref_from_bytes_with_elems_dynamic_size.cortex-m3 new file mode 100644 index 0000000000..2c2b1030e5 --- /dev/null +++ b/benches/ref_from_bytes_with_elems_dynamic_size.cortex-m3 @@ -0,0 +1,20 @@ +bench_ref_from_bytes_with_elems_dynamic_size: + push {r7, lr} + mov r7, sp + movw r3, #65534 + movt r3, #32767 + cmp r2, r3 + add.w r3, r2, #2 + and r12, r0, #1 + eor.w r3, r1, r3, lsl #1 + orr.w r3, r3, r12 + clz r3, r3 + mov.w lr, #0 + lsr.w r3, r3, #5 + it lo + movlo.w lr, #1 + ands.w r3, r3, lr + ite ne + movne r1, r2 + moveq r0, r3 + pop {r7, pc} diff --git a/benches/ref_from_bytes_with_elems_dynamic_size.generic-rv32 b/benches/ref_from_bytes_with_elems_dynamic_size.generic-rv32 new file mode 100644 index 0000000000..43ad50c675 --- /dev/null +++ b/benches/ref_from_bytes_with_elems_dynamic_size.generic-rv32 @@ -0,0 +1,15 @@ +bench_ref_from_bytes_with_elems_dynamic_size: + lui a3, 524288 + addi a3, a3, -3 + bltu a3, a2, .LBB7_3 + andi a3, a0, 1 + bnez a3, .LBB7_3 + slli a3, a2, 1 + addi a3, a3, 4 + beq a1, a3, .LBB7_4 +.LBB7_3: + li a0, 0 + mv a2, a1 +.LBB7_4: + mv a1, a2 + ret diff --git a/benches/ref_from_bytes_with_elems_dynamic_size.x86-64 b/benches/ref_from_bytes_with_elems_dynamic_size.x86-64 index 6aaff6d066..c9eaec643a 100644 --- a/benches/ref_from_bytes_with_elems_dynamic_size.x86-64 +++ b/benches/ref_from_bytes_with_elems_dynamic_size.x86-64 @@ -5,12 +5,12 @@ bench_ref_from_bytes_with_elems_dynamic_size: mov rax, rdi or dil, cl test dil, 1 - jne .LBB5_2 + jne .LBB7_2 lea rcx, [2*rdx + 4] cmp rsi, rcx - je .LBB5_3 -.LBB5_2: + je .LBB7_3 +.LBB7_2: xor eax, eax mov rdx, rsi -.LBB5_3: +.LBB7_3: ret diff --git a/benches/ref_from_bytes_with_elems_dynamic_size.x86-64.mca b/benches/ref_from_bytes_with_elems_dynamic_size.x86-64.mca index 4a67974f1a..1de6e39b7a 100644 --- a/benches/ref_from_bytes_with_elems_dynamic_size.x86-64.mca +++ b/benches/ref_from_bytes_with_elems_dynamic_size.x86-64.mca @@ -24,10 +24,10 @@ Instruction Info: 1 1 0.33 mov rax, rdi 1 1 0.33 or dil, cl 1 1 0.33 test dil, 1 - 1 1 1.00 jne .LBB5_2 + 1 1 1.00 jne .LBB7_2 1 1 0.50 lea rcx, [2*rdx + 4] 1 1 0.33 cmp rsi, rcx - 1 1 1.00 je .LBB5_3 + 1 1 1.00 je .LBB7_3 1 0 0.25 xor eax, eax 1 1 0.33 mov rdx, rsi 1 1 1.00 U ret @@ -56,10 +56,10 @@ Resource pressure by instruction: - - 0.01 0.99 - - - - mov rax, rdi - - 1.00 - - - - - or dil, cl - - 0.99 0.01 - - - - test dil, 1 - - - - - - 1.00 - - jne .LBB5_2 + - - - - - 1.00 - - jne .LBB7_2 - - - 1.00 - - - - lea rcx, [2*rdx + 4] - - 0.01 - - 0.99 - - cmp rsi, rcx - - - - - - 1.00 - - je .LBB5_3 + - - - - - 1.00 - - je .LBB7_3 - - - - - - - - xor eax, eax - - - 0.67 - 0.33 - - mov rdx, rsi - - - - - 1.00 - - ret diff --git a/benches/ref_from_prefix_dynamic_padding.cortex-m3 b/benches/ref_from_prefix_dynamic_padding.cortex-m3 new file mode 100644 index 0000000000..e0069a4fb5 --- /dev/null +++ b/benches/ref_from_prefix_dynamic_padding.cortex-m3 @@ -0,0 +1,24 @@ +bench_ref_from_prefix_dynamic_padding: + push {r7, lr} + mov r7, sp + lsls r2, r0, #30 + beq .LBB8_2 + movs r1, #0 + movs r0, #0 + pop {r7, pc} +.LBB8_2: + movw r2, #65532 + movt r2, #32767 + ands r1, r2 + cmp r1, #9 + bhs .LBB8_4 + movs r0, #0 + movs r1, #1 + pop {r7, pc} +.LBB8_4: + movw r2, #43691 + subs r1, #9 + movt r2, #43690 + umull r1, r2, r1, r2 + lsrs r1, r2, #1 + pop {r7, pc} diff --git a/benches/ref_from_prefix_dynamic_padding.generic-rv32 b/benches/ref_from_prefix_dynamic_padding.generic-rv32 new file mode 100644 index 0000000000..9c06fe6433 --- /dev/null +++ b/benches/ref_from_prefix_dynamic_padding.generic-rv32 @@ -0,0 +1,22 @@ +bench_ref_from_prefix_dynamic_padding: + andi a2, a0, 3 + beqz a2, .LBB8_2 + li a1, 0 + li a0, 0 + ret +.LBB8_2: + lui a2, 524288 + addi a2, a2, -4 + and a1, a1, a2 + li a2, 9 + bgeu a1, a2, .LBB8_4 + li a0, 0 + li a1, 1 + ret +.LBB8_4: + addi a1, a1, -9 + lui a2, 699051 + addi a2, a2, -1365 + mulhu a1, a1, a2 + srli a1, a1, 1 + ret diff --git a/benches/ref_from_prefix_dynamic_padding.x86-64 b/benches/ref_from_prefix_dynamic_padding.x86-64 index a58592a245..1db89c5b00 100644 --- a/benches/ref_from_prefix_dynamic_padding.x86-64 +++ b/benches/ref_from_prefix_dynamic_padding.x86-64 @@ -2,17 +2,17 @@ bench_ref_from_prefix_dynamic_padding: xor edx, edx mov eax, 0 test dil, 3 - je .LBB5_1 + je .LBB8_1 ret -.LBB5_1: +.LBB8_1: movabs rax, 9223372036854775804 and rsi, rax cmp rsi, 9 - jae .LBB5_3 + jae .LBB8_3 mov edx, 1 xor eax, eax ret -.LBB5_3: +.LBB8_3: add rsi, -9 movabs rcx, -6148914691236517205 mov rax, rsi diff --git a/benches/ref_from_prefix_dynamic_padding.x86-64.mca b/benches/ref_from_prefix_dynamic_padding.x86-64.mca index 62ea4babaf..314da32e7a 100644 --- a/benches/ref_from_prefix_dynamic_padding.x86-64.mca +++ b/benches/ref_from_prefix_dynamic_padding.x86-64.mca @@ -21,12 +21,12 @@ Instruction Info: 1 0 0.25 xor edx, edx 1 1 0.33 mov eax, 0 1 1 0.33 test dil, 3 - 1 1 1.00 je .LBB5_1 + 1 1 1.00 je .LBB8_1 1 1 1.00 U ret 1 1 0.33 movabs rax, 9223372036854775804 1 1 0.33 and rsi, rax 1 1 0.33 cmp rsi, 9 - 1 1 1.00 jae .LBB5_3 + 1 1 1.00 jae .LBB8_3 1 1 0.33 mov edx, 1 1 0 0.25 xor eax, eax 1 1 1.00 U ret @@ -59,12 +59,12 @@ Resource pressure by instruction: - - - - - - - - xor edx, edx - - 0.01 0.98 - 0.01 - - mov eax, 0 - - 0.98 0.01 - 0.01 - - test dil, 3 - - - - - - 1.00 - - je .LBB5_1 + - - - - - 1.00 - - je .LBB8_1 - - - - - 1.00 - - ret - - 0.01 0.99 - - - - movabs rax, 9223372036854775804 - - - 1.00 - - - - and rsi, rax - - - 1.00 - - - - cmp rsi, 9 - - - - - - 1.00 - - jae .LBB5_3 + - - - - - 1.00 - - jae .LBB8_3 - - 1.00 - - - - - mov edx, 1 - - - - - - - - xor eax, eax - - - - - 1.00 - - ret diff --git a/benches/ref_from_prefix_dynamic_size.cortex-m3 b/benches/ref_from_prefix_dynamic_size.cortex-m3 new file mode 100644 index 0000000000..5abec8adef --- /dev/null +++ b/benches/ref_from_prefix_dynamic_size.cortex-m3 @@ -0,0 +1,16 @@ +bench_ref_from_prefix_dynamic_size: + push {r7, lr} + mov r7, sp + lsls r2, r0, #31 + bne .LBB9_2 + cmp r1, #4 + ittee lo + movlo r0, #0 + movlo r1, #1 + subhs r1, #4 + lsrhs r1, r1, #1 + pop {r7, pc} +.LBB9_2: + movs r1, #0 + movs r0, #0 + pop {r7, pc} diff --git a/benches/ref_from_prefix_dynamic_size.generic-rv32 b/benches/ref_from_prefix_dynamic_size.generic-rv32 new file mode 100644 index 0000000000..b7c5f0a3ad --- /dev/null +++ b/benches/ref_from_prefix_dynamic_size.generic-rv32 @@ -0,0 +1,16 @@ +bench_ref_from_prefix_dynamic_size: + andi a2, a0, 1 + bnez a2, .LBB9_3 + li a2, 4 + bgeu a1, a2, .LBB9_4 + li a0, 0 + li a1, 1 + ret +.LBB9_3: + li a1, 0 + li a0, 0 + ret +.LBB9_4: + addi a1, a1, -4 + srli a1, a1, 1 + ret diff --git a/benches/ref_from_prefix_dynamic_size.x86-64 b/benches/ref_from_prefix_dynamic_size.x86-64 index fe6332c910..ada241eb1e 100644 --- a/benches/ref_from_prefix_dynamic_size.x86-64 +++ b/benches/ref_from_prefix_dynamic_size.x86-64 @@ -2,16 +2,16 @@ bench_ref_from_prefix_dynamic_size: xor edx, edx mov eax, 0 test dil, 1 - jne .LBB5_4 + jne .LBB9_4 cmp rsi, 4 - jae .LBB5_3 + jae .LBB9_3 mov edx, 1 xor eax, eax ret -.LBB5_3: +.LBB9_3: add rsi, -4 shr rsi mov rdx, rsi mov rax, rdi -.LBB5_4: +.LBB9_4: ret diff --git a/benches/ref_from_prefix_dynamic_size.x86-64.mca b/benches/ref_from_prefix_dynamic_size.x86-64.mca index 3900a59461..d26203c9ae 100644 --- a/benches/ref_from_prefix_dynamic_size.x86-64.mca +++ b/benches/ref_from_prefix_dynamic_size.x86-64.mca @@ -21,9 +21,9 @@ Instruction Info: 1 0 0.25 xor edx, edx 1 1 0.33 mov eax, 0 1 1 0.33 test dil, 1 - 1 1 1.00 jne .LBB5_4 + 1 1 1.00 jne .LBB9_4 1 1 0.33 cmp rsi, 4 - 1 1 1.00 jae .LBB5_3 + 1 1 1.00 jae .LBB9_3 1 1 0.33 mov edx, 1 1 0 0.25 xor eax, eax 1 1 1.00 U ret @@ -54,9 +54,9 @@ Resource pressure by instruction: - - - - - - - - xor edx, edx - - 0.01 0.98 - 0.01 - - mov eax, 0 - - 0.98 0.02 - - - - test dil, 1 - - - - - - 1.00 - - jne .LBB5_4 + - - - - - 1.00 - - jne .LBB9_4 - - 0.02 0.98 - - - - cmp rsi, 4 - - - - - - 1.00 - - jae .LBB5_3 + - - - - - 1.00 - - jae .LBB9_3 - - 0.98 0.01 - 0.01 - - mov edx, 1 - - - - - - - - xor eax, eax - - - - - 1.00 - - ret diff --git a/benches/ref_from_prefix_static_size.cortex-m3 b/benches/ref_from_prefix_static_size.cortex-m3 new file mode 100644 index 0000000000..ea672849cd --- /dev/null +++ b/benches/ref_from_prefix_static_size.cortex-m3 @@ -0,0 +1,12 @@ +bench_ref_from_prefix_static_size: + push {r7, lr} + mov r7, sp + cmp r1, #6 + mov r1, r0 + it lo + movlo r1, #0 + lsls r0, r0, #31 + it ne + movne r1, #0 + mov r0, r1 + pop {r7, pc} diff --git a/benches/ref_from_prefix_static_size.generic-rv32 b/benches/ref_from_prefix_static_size.generic-rv32 new file mode 100644 index 0000000000..3a87f08fb3 --- /dev/null +++ b/benches/ref_from_prefix_static_size.generic-rv32 @@ -0,0 +1,7 @@ +bench_ref_from_prefix_static_size: + sltiu a1, a1, 6 + or a1, a1, a0 + andi a1, a1, 1 + addi a1, a1, -1 + and a0, a0, a1 + ret diff --git a/benches/ref_from_prefix_with_elems_dynamic_padding.cortex-m3 b/benches/ref_from_prefix_with_elems_dynamic_padding.cortex-m3 new file mode 100644 index 0000000000..54b1e2b109 --- /dev/null +++ b/benches/ref_from_prefix_with_elems_dynamic_padding.cortex-m3 @@ -0,0 +1,31 @@ +bench_ref_from_prefix_with_elems_dynamic_padding: + movs r3, #3 + mov r12, r2 + umull r3, r2, r2, r3 + cbnz r2, .LBB11_3 + cmn.w r3, #10 + bhi .LBB11_3 + push {r7, lr} + mov r7, sp + movs r2, #3 + add.w lr, r3, #9 + bics r2, r3 + adds.w r3, r2, lr + pop.w {r7, lr} + blo .LBB11_4 +.LBB11_3: + movs r0, #0 + movs r1, #1 + bx lr +.LBB11_4: + lsls r2, r0, #30 + ittt ne + movne r1, #0 + movne r0, #0 + bxne lr + cmp r3, r1 + itt hi + movhi r0, #0 + movhi.w r12, #1 + mov r1, r12 + bx lr diff --git a/benches/ref_from_prefix_with_elems_dynamic_padding.generic-rv32 b/benches/ref_from_prefix_with_elems_dynamic_padding.generic-rv32 new file mode 100644 index 0000000000..be57040da1 --- /dev/null +++ b/benches/ref_from_prefix_with_elems_dynamic_padding.generic-rv32 @@ -0,0 +1,33 @@ +bench_ref_from_prefix_with_elems_dynamic_padding: + mv a3, a1 + li a1, 3 + mulhu a4, a2, a1 + li a1, 1 + bnez a4, .LBB11_3 + slli a4, a2, 1 + add a4, a4, a2 + li a5, -10 + bltu a5, a4, .LBB11_3 + ori a5, a4, 3 + not a4, a4 + andi a4, a4, 3 + addi a5, a5, 9 + bgeu a5, a4, .LBB11_4 +.LBB11_3: + li a0, 0 + ret +.LBB11_4: + andi a1, a0, 3 + beqz a1, .LBB11_6 + li a1, 0 + li a0, 0 + ret +.LBB11_6: + sltu a1, a3, a5 + bgeu a3, a5, .LBB11_8 + li a2, 1 +.LBB11_8: + addi a1, a1, -1 + and a0, a0, a1 + mv a1, a2 + ret diff --git a/benches/ref_from_prefix_with_elems_dynamic_padding.x86-64 b/benches/ref_from_prefix_with_elems_dynamic_padding.x86-64 index c03811bdbe..2b2abb79eb 100644 --- a/benches/ref_from_prefix_with_elems_dynamic_padding.x86-64 +++ b/benches/ref_from_prefix_with_elems_dynamic_padding.x86-64 @@ -4,26 +4,26 @@ bench_ref_from_prefix_with_elems_dynamic_padding: mov rax, rcx mul rdx mov edx, 1 - jo .LBB5_5 + jo .LBB11_5 cmp rax, -10 - ja .LBB5_5 + ja .LBB11_5 lea r8, [rax + 9] not eax and eax, 3 add rax, r8 - jae .LBB5_3 -.LBB5_5: + jae .LBB11_3 +.LBB11_5: xor r8d, r8d mov rax, r8 ret -.LBB5_3: +.LBB11_3: xor edx, edx mov r8d, 0 test dil, 3 - je .LBB5_4 + je .LBB11_4 mov rax, r8 ret -.LBB5_4: +.LBB11_4: xor edx, edx cmp rax, rsi mov eax, 1 diff --git a/benches/ref_from_prefix_with_elems_dynamic_padding.x86-64.mca b/benches/ref_from_prefix_with_elems_dynamic_padding.x86-64.mca index 6a3968fe9e..db4853f727 100644 --- a/benches/ref_from_prefix_with_elems_dynamic_padding.x86-64.mca +++ b/benches/ref_from_prefix_with_elems_dynamic_padding.x86-64.mca @@ -23,21 +23,21 @@ Instruction Info: 1 1 0.33 mov rax, rcx 2 4 1.00 mul rdx 1 1 0.33 mov edx, 1 - 1 1 1.00 jo .LBB5_5 + 1 1 1.00 jo .LBB11_5 1 1 0.33 cmp rax, -10 - 1 1 1.00 ja .LBB5_5 + 1 1 1.00 ja .LBB11_5 1 1 0.50 lea r8, [rax + 9] 1 1 0.33 not eax 1 1 0.33 and eax, 3 1 1 0.33 add rax, r8 - 1 1 1.00 jae .LBB5_3 + 1 1 1.00 jae .LBB11_3 1 0 0.25 xor r8d, r8d 1 1 0.33 mov rax, r8 1 1 1.00 U ret 1 0 0.25 xor edx, edx 1 1 0.33 mov r8d, 0 1 1 0.33 test dil, 3 - 1 1 1.00 je .LBB5_4 + 1 1 1.00 je .LBB11_4 1 1 0.33 mov rax, r8 1 1 1.00 U ret 1 0 0.25 xor edx, edx @@ -73,21 +73,21 @@ Resource pressure by instruction: - - 0.51 0.48 - 0.01 - - mov rax, rcx - - 1.00 1.00 - - - - mul rdx - - 0.49 0.50 - 0.01 - - mov edx, 1 - - - - - - 1.00 - - jo .LBB5_5 + - - - - - 1.00 - - jo .LBB11_5 - - 0.98 0.02 - - - - cmp rax, -10 - - - - - - 1.00 - - ja .LBB5_5 + - - - - - 1.00 - - ja .LBB11_5 - - 0.02 0.98 - - - - lea r8, [rax + 9] - - 0.98 0.02 - - - - not eax - - 0.99 0.01 - - - - and eax, 3 - - 0.98 0.01 - 0.01 - - add rax, r8 - - - - - - 1.00 - - jae .LBB5_3 + - - - - - 1.00 - - jae .LBB11_3 - - - - - - - - xor r8d, r8d - - 0.01 0.98 - 0.01 - - mov rax, r8 - - - - - 1.00 - - ret - - - - - - - - xor edx, edx - - 0.48 0.52 - - - - mov r8d, 0 - - 0.02 0.97 - 0.01 - - test dil, 3 - - - - - - 1.00 - - je .LBB5_4 + - - - - - 1.00 - - je .LBB11_4 - - 0.49 0.50 - 0.01 - - mov rax, r8 - - - - - 1.00 - - ret - - - - - - - - xor edx, edx diff --git a/benches/ref_from_prefix_with_elems_dynamic_size.cortex-m3 b/benches/ref_from_prefix_with_elems_dynamic_size.cortex-m3 new file mode 100644 index 0000000000..1677a247ef --- /dev/null +++ b/benches/ref_from_prefix_with_elems_dynamic_size.cortex-m3 @@ -0,0 +1,25 @@ +bench_ref_from_prefix_with_elems_dynamic_size: + push {r7, lr} + mov r7, sp + movw r3, #65533 + movt r3, #32767 + cmp r2, r3 + bhi .LBB12_4 + lsls r3, r0, #31 + bne .LBB12_3 + adds r3, r2, #2 + lsls r3, r3, #1 + cmp r3, r1 + itt hi + movhi r0, #0 + movhi r2, #1 + mov r1, r2 + pop {r7, pc} +.LBB12_3: + movs r1, #0 + movs r0, #0 + pop {r7, pc} +.LBB12_4: + movs r0, #0 + movs r1, #1 + pop {r7, pc} diff --git a/benches/ref_from_prefix_with_elems_dynamic_size.generic-rv32 b/benches/ref_from_prefix_with_elems_dynamic_size.generic-rv32 new file mode 100644 index 0000000000..99b12b9727 --- /dev/null +++ b/benches/ref_from_prefix_with_elems_dynamic_size.generic-rv32 @@ -0,0 +1,24 @@ +bench_ref_from_prefix_with_elems_dynamic_size: + lui a3, 524288 + addi a3, a3, -3 + bltu a3, a2, .LBB12_6 + andi a3, a0, 1 + bnez a3, .LBB12_5 + slli a3, a2, 1 + addi a4, a3, 4 + sltu a3, a1, a4 + bgeu a1, a4, .LBB12_4 + li a2, 1 +.LBB12_4: + addi a3, a3, -1 + and a0, a0, a3 + mv a1, a2 + ret +.LBB12_5: + li a0, 0 + li a1, 0 + ret +.LBB12_6: + li a0, 0 + li a1, 1 + ret diff --git a/benches/ref_from_prefix_with_elems_dynamic_size.x86-64 b/benches/ref_from_prefix_with_elems_dynamic_size.x86-64 index 05818b0633..1ebc91077e 100644 --- a/benches/ref_from_prefix_with_elems_dynamic_size.x86-64 +++ b/benches/ref_from_prefix_with_elems_dynamic_size.x86-64 @@ -1,12 +1,12 @@ bench_ref_from_prefix_with_elems_dynamic_size: movabs rax, 9223372036854775805 cmp rdx, rax - ja .LBB5_1 + ja .LBB12_1 mov rcx, rdx xor edx, edx mov eax, 0 test dil, 1 - jne .LBB5_4 + jne .LBB12_4 lea rax, [2*rcx + 4] xor r8d, r8d cmp rax, rsi @@ -14,9 +14,9 @@ bench_ref_from_prefix_with_elems_dynamic_size: cmovbe rdx, rcx cmova rdi, r8 mov rax, rdi -.LBB5_4: +.LBB12_4: ret -.LBB5_1: +.LBB12_1: mov edx, 1 xor eax, eax ret diff --git a/benches/ref_from_prefix_with_elems_dynamic_size.x86-64.mca b/benches/ref_from_prefix_with_elems_dynamic_size.x86-64.mca index 94c718e22c..157b16d488 100644 --- a/benches/ref_from_prefix_with_elems_dynamic_size.x86-64.mca +++ b/benches/ref_from_prefix_with_elems_dynamic_size.x86-64.mca @@ -20,12 +20,12 @@ Instruction Info: [1] [2] [3] [4] [5] [6] Instructions: 1 1 0.33 movabs rax, 9223372036854775805 1 1 0.33 cmp rdx, rax - 1 1 1.00 ja .LBB5_1 + 1 1 1.00 ja .LBB12_1 1 1 0.33 mov rcx, rdx 1 0 0.25 xor edx, edx 1 1 0.33 mov eax, 0 1 1 0.33 test dil, 1 - 1 1 1.00 jne .LBB5_4 + 1 1 1.00 jne .LBB12_4 1 1 0.50 lea rax, [2*rcx + 4] 1 0 0.25 xor r8d, r8d 1 1 0.33 cmp rax, rsi @@ -58,12 +58,12 @@ Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - - 0.99 - 0.01 - - movabs rax, 9223372036854775805 - - 0.37 0.63 - - - - cmp rdx, rax - - - - - - 1.00 - - ja .LBB5_1 + - - - - - 1.00 - - ja .LBB12_1 - - 0.63 0.37 - - - - mov rcx, rdx - - - - - - - - xor edx, edx - - 0.01 0.98 - 0.01 - - mov eax, 0 - - 0.98 0.02 - - - - test dil, 1 - - - - - - 1.00 - - jne .LBB5_4 + - - - - - 1.00 - - jne .LBB12_4 - - 0.01 0.99 - - - - lea rax, [2*rcx + 4] - - - - - - - - xor r8d, r8d - - 1.00 - - - - - cmp rax, rsi diff --git a/benches/ref_from_suffix_dynamic_padding.cortex-m3 b/benches/ref_from_suffix_dynamic_padding.cortex-m3 new file mode 100644 index 0000000000..c009a177d9 --- /dev/null +++ b/benches/ref_from_suffix_dynamic_padding.cortex-m3 @@ -0,0 +1,28 @@ +bench_ref_from_suffix_dynamic_padding: + push {r7, lr} + mov r7, sp + adds r2, r1, r0 + lsls r2, r2, #30 + bne .LBB13_2 + movw r2, #65532 + movt r2, #32767 + ands r2, r1 + cmp r2, #9 + bhs .LBB13_3 +.LBB13_2: + movs r0, #0 + pop {r7, pc} +.LBB13_3: + movw r3, #43691 + subs r2, #9 + movt r3, #43690 + umull r2, r3, r2, r3 + mov r12, r1 + lsrs r1, r3, #1 + add.w r3, r1, r1, lsl #1 + orn r2, r3, #3 + sub.w r3, r12, r3 + add r0, r3 + add r0, r2 + subs r0, #8 + pop {r7, pc} diff --git a/benches/ref_from_suffix_dynamic_padding.generic-rv32 b/benches/ref_from_suffix_dynamic_padding.generic-rv32 new file mode 100644 index 0000000000..bb18b7d4a6 --- /dev/null +++ b/benches/ref_from_suffix_dynamic_padding.generic-rv32 @@ -0,0 +1,26 @@ +bench_ref_from_suffix_dynamic_padding: + add a2, a1, a0 + andi a2, a2, 3 + bnez a2, .LBB13_2 + lui a2, 524288 + addi a2, a2, -4 + and a2, a2, a1 + li a3, 9 + bgeu a2, a3, .LBB13_3 +.LBB13_2: + li a0, 0 + ret +.LBB13_3: + addi a2, a2, -9 + lui a3, 699051 + add a0, a0, a1 + addi a1, a3, -1365 + mulhu a2, a2, a1 + srli a1, a2, 1 + andi a2, a2, -2 + add a2, a2, a1 + ori a3, a2, -4 + sub a0, a0, a2 + add a0, a0, a3 + addi a0, a0, -8 + ret diff --git a/benches/ref_from_suffix_dynamic_padding.x86-64 b/benches/ref_from_suffix_dynamic_padding.x86-64 index 3e05f6023f..52752211fb 100644 --- a/benches/ref_from_suffix_dynamic_padding.x86-64 +++ b/benches/ref_from_suffix_dynamic_padding.x86-64 @@ -1,15 +1,15 @@ bench_ref_from_suffix_dynamic_padding: lea eax, [rsi + rdi] test al, 3 - jne .LBB5_1 + jne .LBB13_1 movabs rax, 9223372036854775804 and rax, rsi cmp rax, 9 - jae .LBB5_3 -.LBB5_1: + jae .LBB13_3 +.LBB13_1: xor eax, eax ret -.LBB5_3: +.LBB13_3: add rax, -9 movabs rcx, -6148914691236517205 mul rcx diff --git a/benches/ref_from_suffix_dynamic_padding.x86-64.mca b/benches/ref_from_suffix_dynamic_padding.x86-64.mca index 73599d5b6a..af1369d4bb 100644 --- a/benches/ref_from_suffix_dynamic_padding.x86-64.mca +++ b/benches/ref_from_suffix_dynamic_padding.x86-64.mca @@ -20,11 +20,11 @@ Instruction Info: [1] [2] [3] [4] [5] [6] Instructions: 1 1 0.50 lea eax, [rsi + rdi] 1 1 0.33 test al, 3 - 1 1 1.00 jne .LBB5_1 + 1 1 1.00 jne .LBB13_1 1 1 0.33 movabs rax, 9223372036854775804 1 1 0.33 and rax, rsi 1 1 0.33 cmp rax, 9 - 1 1 1.00 jae .LBB5_3 + 1 1 1.00 jae .LBB13_3 1 0 0.25 xor eax, eax 1 1 1.00 U ret 1 1 0.33 add rax, -9 @@ -59,11 +59,11 @@ Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - 0.90 0.10 - - - - lea eax, [rsi + rdi] - - 0.93 - - 0.07 - - test al, 3 - - - - - - 1.00 - - jne .LBB5_1 + - - - - - 1.00 - - jne .LBB13_1 - - 0.51 0.47 - 0.02 - - movabs rax, 9223372036854775804 - - - - - 1.00 - - and rax, rsi - - - 0.09 - 0.91 - - cmp rax, 9 - - - - - - 1.00 - - jae .LBB5_3 + - - - - - 1.00 - - jae .LBB13_3 - - - - - - - - xor eax, eax - - - - - 1.00 - - ret - - 0.43 0.47 - 0.10 - - add rax, -9 diff --git a/benches/ref_from_suffix_dynamic_size.cortex-m3 b/benches/ref_from_suffix_dynamic_size.cortex-m3 new file mode 100644 index 0000000000..4ee62ebe6c --- /dev/null +++ b/benches/ref_from_suffix_dynamic_size.cortex-m3 @@ -0,0 +1,15 @@ +bench_ref_from_suffix_dynamic_size: + push {r7, lr} + mov r7, sp + adds r3, r1, r0 + subs r2, r1, #4 + and r1, r1, #1 + add r0, r1 + lsr.w r2, r2, #1 + it lo + movlo r0, #0 + lsls r1, r3, #31 + mov r1, r2 + it ne + movne r0, #0 + pop {r7, pc} diff --git a/benches/ref_from_suffix_dynamic_size.generic-rv32 b/benches/ref_from_suffix_dynamic_size.generic-rv32 new file mode 100644 index 0000000000..42840852a9 --- /dev/null +++ b/benches/ref_from_suffix_dynamic_size.generic-rv32 @@ -0,0 +1,12 @@ +bench_ref_from_suffix_dynamic_size: + add a2, a1, a0 + sltiu a3, a1, 4 + or a2, a2, a3 + addi a3, a1, -4 + andi a4, a1, 1 + srli a1, a3, 1 + andi a2, a2, 1 + add a0, a0, a4 + addi a2, a2, -1 + and a0, a0, a2 + ret diff --git a/benches/ref_from_suffix_static_size.cortex-m3 b/benches/ref_from_suffix_static_size.cortex-m3 new file mode 100644 index 0000000000..ca0b7d71e3 --- /dev/null +++ b/benches/ref_from_suffix_static_size.cortex-m3 @@ -0,0 +1,12 @@ +bench_ref_from_suffix_static_size: + push {r7, lr} + mov r7, sp + adds r2, r1, r0 + lsls r0, r2, #31 + mov.w r0, #0 + it ne + popne {r7, pc} + cmp r1, #6 + it hs + subhs r0, r2, #6 + pop {r7, pc} diff --git a/benches/ref_from_suffix_static_size.generic-rv32 b/benches/ref_from_suffix_static_size.generic-rv32 new file mode 100644 index 0000000000..1f214e1cbd --- /dev/null +++ b/benches/ref_from_suffix_static_size.generic-rv32 @@ -0,0 +1,10 @@ +bench_ref_from_suffix_static_size: + add a2, a1, a0 + andi a3, a2, 1 + li a0, 0 + bnez a3, .LBB15_3 + li a3, 6 + bltu a1, a3, .LBB15_3 + addi a0, a2, -6 +.LBB15_3: + ret diff --git a/benches/ref_from_suffix_static_size.x86-64 b/benches/ref_from_suffix_static_size.x86-64 index 9e90b9e254..75a4379f76 100644 --- a/benches/ref_from_suffix_static_size.x86-64 +++ b/benches/ref_from_suffix_static_size.x86-64 @@ -4,10 +4,10 @@ bench_ref_from_suffix_static_size: setb cl or cl, al test cl, 1 - je .LBB5_2 + je .LBB15_2 xor eax, eax ret -.LBB5_2: +.LBB15_2: lea rax, [rdi + rsi] add rax, -6 ret diff --git a/benches/ref_from_suffix_static_size.x86-64.mca b/benches/ref_from_suffix_static_size.x86-64.mca index ef5892647b..75d01eaa43 100644 --- a/benches/ref_from_suffix_static_size.x86-64.mca +++ b/benches/ref_from_suffix_static_size.x86-64.mca @@ -23,7 +23,7 @@ Instruction Info: 1 1 0.50 setb cl 1 1 0.33 or cl, al 1 1 0.33 test cl, 1 - 1 1 1.00 je .LBB5_2 + 1 1 1.00 je .LBB15_2 1 0 0.25 xor eax, eax 1 1 1.00 U ret 1 1 0.50 lea rax, [rdi + rsi] @@ -53,7 +53,7 @@ Resource pressure by instruction: - - 1.00 - - - - - setb cl - - - 1.00 - - - - or cl, al - - - 1.00 - - - - test cl, 1 - - - - - - 1.00 - - je .LBB5_2 + - - - - - 1.00 - - je .LBB15_2 - - - - - - - - xor eax, eax - - - - - 1.00 - - ret - - 0.34 0.66 - - - - lea rax, [rdi + rsi] diff --git a/benches/ref_from_suffix_with_elems_dynamic_padding.cortex-m3 b/benches/ref_from_suffix_with_elems_dynamic_padding.cortex-m3 new file mode 100644 index 0000000000..d7c9b404dd --- /dev/null +++ b/benches/ref_from_suffix_with_elems_dynamic_padding.cortex-m3 @@ -0,0 +1,32 @@ +bench_ref_from_suffix_with_elems_dynamic_padding: + movs r3, #3 + mov r12, r2 + umull r3, r2, r2, r3 + cbnz r2, .LBB16_5 + cmn.w r3, #10 + bhi .LBB16_5 + push {r7, lr} + mov r7, sp + movs r2, #3 + add.w lr, r3, #9 + bics r2, r3 + adds.w r3, r2, lr + pop.w {r7, lr} + bhs .LBB16_5 + adds r2, r1, r0 + lsls r2, r2, #30 + ittt ne + movne r1, #0 + movne r0, #0 + bxne lr + cmp r1, r3 + bhs .LBB16_6 +.LBB16_5: + movs r0, #0 + movs r1, #1 + bx lr +.LBB16_6: + subs r1, r1, r3 + add r0, r1 + mov r1, r12 + bx lr diff --git a/benches/ref_from_suffix_with_elems_dynamic_padding.generic-rv32 b/benches/ref_from_suffix_with_elems_dynamic_padding.generic-rv32 new file mode 100644 index 0000000000..dab6337409 --- /dev/null +++ b/benches/ref_from_suffix_with_elems_dynamic_padding.generic-rv32 @@ -0,0 +1,31 @@ +bench_ref_from_suffix_with_elems_dynamic_padding: + mv a3, a1 + li a1, 3 + mulhu a4, a2, a1 + li a1, 1 + bnez a4, .LBB16_3 + slli a4, a2, 1 + add a4, a4, a2 + li a5, -10 + bltu a5, a4, .LBB16_3 + ori a5, a4, 3 + not a4, a4 + andi a4, a4, 3 + addi a5, a5, 9 + bgeu a5, a4, .LBB16_4 +.LBB16_3: + li a0, 0 + ret +.LBB16_4: + add a4, a3, a0 + andi a4, a4, 3 + beqz a4, .LBB16_6 + li a1, 0 + li a0, 0 + ret +.LBB16_6: + bltu a3, a5, .LBB16_3 + sub a3, a3, a5 + add a0, a0, a3 + mv a1, a2 + ret diff --git a/benches/ref_from_suffix_with_elems_dynamic_padding.x86-64 b/benches/ref_from_suffix_with_elems_dynamic_padding.x86-64 index b3e239cb75..8a987d810f 100644 --- a/benches/ref_from_suffix_with_elems_dynamic_padding.x86-64 +++ b/benches/ref_from_suffix_with_elems_dynamic_padding.x86-64 @@ -3,30 +3,30 @@ bench_ref_from_suffix_with_elems_dynamic_padding: mov edx, 3 mov rax, rcx mul rdx - jo .LBB5_1 + jo .LBB16_1 cmp rax, -10 - ja .LBB5_1 + ja .LBB16_1 lea rdx, [rax + 9] not eax and eax, 3 add rax, rdx - jae .LBB5_4 -.LBB5_1: + jae .LBB16_4 +.LBB16_1: xor r8d, r8d mov edx, 1 mov rax, r8 ret -.LBB5_4: +.LBB16_4: lea r9d, [rsi + rdi] xor edx, edx mov r8d, 0 test r9b, 3 - je .LBB5_5 + je .LBB16_5 mov rax, r8 ret -.LBB5_5: +.LBB16_5: sub rsi, rax - jb .LBB5_1 + jb .LBB16_1 add rdi, rsi mov rdx, rcx mov r8, rdi diff --git a/benches/ref_from_suffix_with_elems_dynamic_padding.x86-64.mca b/benches/ref_from_suffix_with_elems_dynamic_padding.x86-64.mca index c7c3c7ec2b..ecaf7ac849 100644 --- a/benches/ref_from_suffix_with_elems_dynamic_padding.x86-64.mca +++ b/benches/ref_from_suffix_with_elems_dynamic_padding.x86-64.mca @@ -22,14 +22,14 @@ Instruction Info: 1 1 0.33 mov edx, 3 1 1 0.33 mov rax, rcx 2 4 1.00 mul rdx - 1 1 1.00 jo .LBB5_1 + 1 1 1.00 jo .LBB16_1 1 1 0.33 cmp rax, -10 - 1 1 1.00 ja .LBB5_1 + 1 1 1.00 ja .LBB16_1 1 1 0.50 lea rdx, [rax + 9] 1 1 0.33 not eax 1 1 0.33 and eax, 3 1 1 0.33 add rax, rdx - 1 1 1.00 jae .LBB5_4 + 1 1 1.00 jae .LBB16_4 1 0 0.25 xor r8d, r8d 1 1 0.33 mov edx, 1 1 1 0.33 mov rax, r8 @@ -38,11 +38,11 @@ Instruction Info: 1 0 0.25 xor edx, edx 1 1 0.33 mov r8d, 0 1 1 0.33 test r9b, 3 - 1 1 1.00 je .LBB5_5 + 1 1 1.00 je .LBB16_5 1 1 0.33 mov rax, r8 1 1 1.00 U ret 1 1 0.33 sub rsi, rax - 1 1 1.00 jb .LBB5_1 + 1 1 1.00 jb .LBB16_1 1 1 0.33 add rdi, rsi 1 1 0.33 mov rdx, rcx 1 1 0.33 mov r8, rdi @@ -71,14 +71,14 @@ Resource pressure by instruction: - - 0.66 0.34 - - - - mov edx, 3 - - 0.34 0.66 - - - - mov rax, rcx - - 1.00 1.00 - - - - mul rdx - - - - - - 1.00 - - jo .LBB5_1 + - - - - - 1.00 - - jo .LBB16_1 - - 1.00 - - - - - cmp rax, -10 - - - - - - 1.00 - - ja .LBB5_1 + - - - - - 1.00 - - ja .LBB16_1 - - - 1.00 - - - - lea rdx, [rax + 9] - - 1.00 - - - - - not eax - - 1.00 - - - - - and eax, 3 - - 1.00 - - - - - add rax, rdx - - - - - - 1.00 - - jae .LBB5_4 + - - - - - 1.00 - - jae .LBB16_4 - - - - - - - - xor r8d, r8d - - 0.33 0.33 - 0.34 - - mov edx, 1 - - 0.33 - - 0.67 - - mov rax, r8 @@ -87,11 +87,11 @@ Resource pressure by instruction: - - - - - - - - xor edx, edx - - 0.67 0.33 - - - - mov r8d, 0 - - 0.33 0.34 - 0.33 - - test r9b, 3 - - - - - - 1.00 - - je .LBB5_5 + - - - - - 1.00 - - je .LBB16_5 - - 0.66 0.01 - 0.33 - - mov rax, r8 - - - - - 1.00 - - ret - - 0.33 0.67 - - - - sub rsi, rax - - - - - - 1.00 - - jb .LBB5_1 + - - - - - 1.00 - - jb .LBB16_1 - - - 1.00 - - - - add rdi, rsi - - 0.01 0.99 - - - - mov rdx, rcx - - - 1.00 - - - - mov r8, rdi diff --git a/benches/ref_from_suffix_with_elems_dynamic_size.cortex-m3 b/benches/ref_from_suffix_with_elems_dynamic_size.cortex-m3 new file mode 100644 index 0000000000..4b3bb1fd31 --- /dev/null +++ b/benches/ref_from_suffix_with_elems_dynamic_size.cortex-m3 @@ -0,0 +1,27 @@ +bench_ref_from_suffix_with_elems_dynamic_size: + push {r7, lr} + mov r7, sp + movw r3, #65533 + movt r3, #32767 + cmp r2, r3 + bhi .LBB17_3 + adds r3, r1, r0 + lsls r3, r3, #31 + bne .LBB17_4 + movs r3, #4 + add.w r3, r3, r2, lsl #1 + cmp r1, r3 + bhs .LBB17_5 +.LBB17_3: + movs r0, #0 + movs r1, #1 + pop {r7, pc} +.LBB17_4: + movs r1, #0 + movs r0, #0 + pop {r7, pc} +.LBB17_5: + subs r1, r1, r3 + add r0, r1 + mov r1, r2 + pop {r7, pc} diff --git a/benches/ref_from_suffix_with_elems_dynamic_size.generic-rv32 b/benches/ref_from_suffix_with_elems_dynamic_size.generic-rv32 new file mode 100644 index 0000000000..8b62726edb --- /dev/null +++ b/benches/ref_from_suffix_with_elems_dynamic_size.generic-rv32 @@ -0,0 +1,24 @@ +bench_ref_from_suffix_with_elems_dynamic_size: + mv a3, a1 + lui a1, 524288 + addi a4, a1, -3 + li a1, 1 + bltu a4, a2, .LBB17_3 + add a4, a3, a0 + andi a4, a4, 1 + bnez a4, .LBB17_4 + slli a4, a2, 1 + addi a4, a4, 4 + bgeu a3, a4, .LBB17_5 +.LBB17_3: + li a0, 0 + ret +.LBB17_4: + li a1, 0 + li a0, 0 + ret +.LBB17_5: + sub a3, a3, a4 + add a0, a0, a3 + mv a1, a2 + ret diff --git a/benches/ref_from_suffix_with_elems_dynamic_size.x86-64 b/benches/ref_from_suffix_with_elems_dynamic_size.x86-64 index 5b401e7ca1..199aa8ce7e 100644 --- a/benches/ref_from_suffix_with_elems_dynamic_size.x86-64 +++ b/benches/ref_from_suffix_with_elems_dynamic_size.x86-64 @@ -1,23 +1,23 @@ bench_ref_from_suffix_with_elems_dynamic_size: movabs rax, 9223372036854775805 cmp rdx, rax - ja .LBB5_1 + ja .LBB17_1 lea r8d, [rsi + rdi] xor ecx, ecx mov eax, 0 test r8b, 1 - jne .LBB5_5 + jne .LBB17_5 lea rax, [2*rdx + 4] sub rsi, rax - jae .LBB5_4 -.LBB5_1: + jae .LBB17_4 +.LBB17_1: xor eax, eax mov edx, 1 ret -.LBB5_4: +.LBB17_4: add rdi, rsi mov rcx, rdx mov rax, rdi -.LBB5_5: +.LBB17_5: mov rdx, rcx ret diff --git a/benches/ref_from_suffix_with_elems_dynamic_size.x86-64.mca b/benches/ref_from_suffix_with_elems_dynamic_size.x86-64.mca index eac400f3f4..6e73bee2bb 100644 --- a/benches/ref_from_suffix_with_elems_dynamic_size.x86-64.mca +++ b/benches/ref_from_suffix_with_elems_dynamic_size.x86-64.mca @@ -20,15 +20,15 @@ Instruction Info: [1] [2] [3] [4] [5] [6] Instructions: 1 1 0.33 movabs rax, 9223372036854775805 1 1 0.33 cmp rdx, rax - 1 1 1.00 ja .LBB5_1 + 1 1 1.00 ja .LBB17_1 1 1 0.50 lea r8d, [rsi + rdi] 1 0 0.25 xor ecx, ecx 1 1 0.33 mov eax, 0 1 1 0.33 test r8b, 1 - 1 1 1.00 jne .LBB5_5 + 1 1 1.00 jne .LBB17_5 1 1 0.50 lea rax, [2*rdx + 4] 1 1 0.33 sub rsi, rax - 1 1 1.00 jae .LBB5_4 + 1 1 1.00 jae .LBB17_4 1 0 0.25 xor eax, eax 1 1 0.33 mov edx, 1 1 1 1.00 U ret @@ -58,15 +58,15 @@ Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - 0.66 0.33 - 0.01 - - movabs rax, 9223372036854775805 - - 0.01 0.99 - - - - cmp rdx, rax - - - - - - 1.00 - - ja .LBB5_1 + - - - - - 1.00 - - ja .LBB17_1 - - 0.99 0.01 - - - - lea r8d, [rsi + rdi] - - - - - - - - xor ecx, ecx - - 0.33 0.33 - 0.34 - - mov eax, 0 - - 0.33 0.34 - 0.33 - - test r8b, 1 - - - - - - 1.00 - - jne .LBB5_5 + - - - - - 1.00 - - jne .LBB17_5 - - 0.34 0.66 - - - - lea rax, [2*rdx + 4] - - - 1.00 - - - - sub rsi, rax - - - - - - 1.00 - - jae .LBB5_4 + - - - - - 1.00 - - jae .LBB17_4 - - - - - - - - xor eax, eax - - 1.00 - - - - - mov edx, 1 - - - - - 1.00 - - ret diff --git a/benches/transmute.cortex-m3 b/benches/transmute.cortex-m3 new file mode 100644 index 0000000000..15cfb152f0 --- /dev/null +++ b/benches/transmute.cortex-m3 @@ -0,0 +1,8 @@ +bench_transmute: + push {r7, lr} + mov r7, sp + ldrh r2, [r1, #4] + ldr r1, [r1] + strh r2, [r0, #4] + str r1, [r0] + pop {r7, pc} diff --git a/benches/transmute.generic-rv32 b/benches/transmute.generic-rv32 new file mode 100644 index 0000000000..932a94a7c2 --- /dev/null +++ b/benches/transmute.generic-rv32 @@ -0,0 +1,14 @@ +bench_transmute: + lbu a2, 4(a1) + lbu a3, 5(a1) + sb a2, 4(a0) + sb a3, 5(a0) + lbu a2, 0(a1) + lbu a3, 1(a1) + lbu a4, 2(a1) + lbu a1, 3(a1) + sb a2, 0(a0) + sb a3, 1(a0) + sb a4, 2(a0) + sb a1, 3(a0) + ret diff --git a/benches/transmute.rs b/benches/transmute.rs index e60bfb252f..199e677e02 100644 --- a/benches/transmute.rs +++ b/benches/transmute.rs @@ -1,5 +1,4 @@ -use zerocopy::Unalign; -use zerocopy_derive::*; +use zerocopy::{Unalign, *}; #[path = "formats/coco_static_size.rs"] mod format; diff --git a/benches/transmute_ref_dynamic_size.cortex-m3 b/benches/transmute_ref_dynamic_size.cortex-m3 new file mode 100644 index 0000000000..5609d5c95c --- /dev/null +++ b/benches/transmute_ref_dynamic_size.cortex-m3 @@ -0,0 +1,5 @@ +bench_transmute_ref_dynamic_size: + push {r7, lr} + mov r7, sp + adds r1, #1 + pop {r7, pc} diff --git a/benches/transmute_ref_dynamic_size.generic-rv32 b/benches/transmute_ref_dynamic_size.generic-rv32 new file mode 100644 index 0000000000..ff72615250 --- /dev/null +++ b/benches/transmute_ref_dynamic_size.generic-rv32 @@ -0,0 +1,3 @@ +bench_transmute_ref_dynamic_size: + addi a1, a1, 1 + ret diff --git a/benches/transmute_ref_dynamic_size.rs b/benches/transmute_ref_dynamic_size.rs index 825f0f2bed..39e95d1dbf 100644 --- a/benches/transmute_ref_dynamic_size.rs +++ b/benches/transmute_ref_dynamic_size.rs @@ -1,4 +1,4 @@ -use zerocopy_derive::*; +use zerocopy::*; #[path = "formats/coco_dynamic_size.rs"] mod format; diff --git a/benches/transmute_ref_static_size.cortex-m3 b/benches/transmute_ref_static_size.cortex-m3 new file mode 100644 index 0000000000..ce2f38ca9b --- /dev/null +++ b/benches/transmute_ref_static_size.cortex-m3 @@ -0,0 +1,4 @@ +bench_transmute_ref_static_size: + push {r7, lr} + mov r7, sp + pop {r7, pc} diff --git a/benches/transmute_ref_static_size.generic-rv32 b/benches/transmute_ref_static_size.generic-rv32 new file mode 100644 index 0000000000..fc5a0ac58b --- /dev/null +++ b/benches/transmute_ref_static_size.generic-rv32 @@ -0,0 +1,2 @@ +bench_transmute_ref_static_size: + ret diff --git a/benches/transmute_ref_static_size.rs b/benches/transmute_ref_static_size.rs index a6db611fde..5c2d5b2bc2 100644 --- a/benches/transmute_ref_static_size.rs +++ b/benches/transmute_ref_static_size.rs @@ -1,4 +1,4 @@ -use zerocopy_derive::*; +use zerocopy::*; #[path = "formats/coco_static_size.rs"] mod format; diff --git a/benches/try_read_from_bytes.cortex-m3 b/benches/try_read_from_bytes.cortex-m3 new file mode 100644 index 0000000000..29663e3409 --- /dev/null +++ b/benches/try_read_from_bytes.cortex-m3 @@ -0,0 +1,23 @@ +bench_try_read_from_bytes_static_size: + cmp r2, #6 + movw r2, #49344 + bne .LBB21_3 + ldrh r3, [r1] + cmp r3, r2 + bne .LBB21_3 + push {r7, lr} + mov r7, sp + sub sp, #8 + ldr.w r1, [r1, #2] + strh.w r2, [sp] + str.w r1, [sp, #2] + ldr r2, [sp] + lsrs r1, r1, #16 + str r2, [r0] + strh r1, [r0, #4] + add sp, #8 + pop {r7, pc} +.LBB21_3: + adds r1, r2, #1 + strh r1, [r0] + bx lr diff --git a/benches/try_read_from_bytes.generic-rv32 b/benches/try_read_from_bytes.generic-rv32 new file mode 100644 index 0000000000..0e09e59893 --- /dev/null +++ b/benches/try_read_from_bytes.generic-rv32 @@ -0,0 +1,35 @@ +bench_try_read_from_bytes_static_size: + lui a4, 12 + li a5, 6 + addi a3, a4, 193 + bne a2, a5, .LBB21_3 + lbu a2, 1(a1) + lbu a5, 0(a1) + slli a2, a2, 8 + or a2, a2, a5 + addi a4, a4, 192 + bne a2, a4, .LBB21_3 + addi sp, sp, -16 + lbu a2, 3(a1) + lbu a4, 2(a1) + lbu a5, 4(a1) + lbu a1, 5(a1) + slli a2, a2, 8 + or a2, a2, a4 + addi a3, a3, -1 + slli a5, a5, 16 + sh a3, 8(sp) + sh a2, 10(sp) + lw a2, 8(sp) + slli a1, a1, 24 + or a1, a1, a5 + srli a1, a1, 16 + srli a3, a2, 16 + sh a2, 0(a0) + sh a3, 2(a0) + sh a1, 4(a0) + addi sp, sp, 16 + ret +.LBB21_3: + sh a3, 0(a0) + ret diff --git a/benches/try_read_from_bytes.x86-64 b/benches/try_read_from_bytes.x86-64 index 08088a08fd..5a294dacac 100644 --- a/benches/try_read_from_bytes.x86-64 +++ b/benches/try_read_from_bytes.x86-64 @@ -1,22 +1,22 @@ bench_try_read_from_bytes_static_size: mov ax, -16191 cmp rsi, 6 - jne .LBB5_1 + jne .LBB21_1 mov ecx, dword ptr [rdi] movzx edx, cx cmp edx, 49344 - jne .LBB5_4 + jne .LBB21_4 movzx eax, word ptr [rdi + 4] shl rax, 32 or rcx, rax shr rcx, 16 mov ax, -16192 -.LBB5_4: +.LBB21_4: shl rcx, 16 movzx eax, ax or rax, rcx ret -.LBB5_1: +.LBB21_1: shl rcx, 16 movzx eax, ax or rax, rcx diff --git a/benches/try_read_from_bytes.x86-64.mca b/benches/try_read_from_bytes.x86-64.mca index 385e6a4802..557ebdd268 100644 --- a/benches/try_read_from_bytes.x86-64.mca +++ b/benches/try_read_from_bytes.x86-64.mca @@ -20,11 +20,11 @@ Instruction Info: [1] [2] [3] [4] [5] [6] Instructions: 1 1 0.33 mov ax, -16191 1 1 0.33 cmp rsi, 6 - 1 1 1.00 jne .LBB5_1 + 1 1 1.00 jne .LBB21_1 1 5 0.50 * mov ecx, dword ptr [rdi] 1 1 0.33 movzx edx, cx 1 1 0.33 cmp edx, 49344 - 1 1 1.00 jne .LBB5_4 + 1 1 1.00 jne .LBB21_4 1 5 0.50 * movzx eax, word ptr [rdi + 4] 1 1 0.50 shl rax, 32 1 1 0.33 or rcx, rax @@ -59,11 +59,11 @@ Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - - 0.99 - 0.01 - - mov ax, -16191 - - - 0.01 - 0.99 - - cmp rsi, 6 - - - - - - 1.00 - - jne .LBB5_1 + - - - - - 1.00 - - jne .LBB21_1 - - - - - - - 1.00 mov ecx, dword ptr [rdi] - - 0.98 - - 0.02 - - movzx edx, cx - - 0.99 0.01 - - - - cmp edx, 49344 - - - - - - 1.00 - - jne .LBB5_4 + - - - - - 1.00 - - jne .LBB21_4 - - - - - - 1.00 - movzx eax, word ptr [rdi + 4] - - 0.01 - - 0.99 - - shl rax, 32 - - 0.02 0.98 - - - - or rcx, rax diff --git a/benches/try_read_from_prefix.cortex-m3 b/benches/try_read_from_prefix.cortex-m3 new file mode 100644 index 0000000000..05fb97e985 --- /dev/null +++ b/benches/try_read_from_prefix.cortex-m3 @@ -0,0 +1,18 @@ +bench_try_read_from_prefix_static_size: + push {r7, lr} + mov r7, sp + cmp r2, #6 + movw r12, #49344 + blo .LBB22_3 + ldr r3, [r1] + uxth r2, r3 + cmp r2, r12 + bne .LBB22_3 + ldrh r1, [r1, #4] + str r3, [r0] + strh r1, [r0, #4] + pop {r7, pc} +.LBB22_3: + add.w r1, r12, #1 + strh r1, [r0] + pop {r7, pc} diff --git a/benches/try_read_from_prefix.generic-rv32 b/benches/try_read_from_prefix.generic-rv32 new file mode 100644 index 0000000000..52763d1387 --- /dev/null +++ b/benches/try_read_from_prefix.generic-rv32 @@ -0,0 +1,32 @@ +bench_try_read_from_prefix_static_size: + li a3, 6 + bltu a2, a3, .LBB22_3 + lbu a2, 1(a1) + lbu a3, 0(a1) + lbu a4, 2(a1) + lbu a5, 3(a1) + slli a2, a2, 8 + or a2, a2, a3 + slli a4, a4, 16 + slli a5, a5, 24 + or a4, a4, a5 + lui a3, 12 + or a2, a2, a4 + slli a4, a2, 16 + srli a4, a4, 16 + addi a3, a3, 192 + bne a4, a3, .LBB22_3 + lbu a3, 5(a1) + lbu a1, 4(a1) + slli a3, a3, 8 + or a1, a1, a3 + srli a3, a2, 16 + sh a2, 0(a0) + sh a3, 2(a0) + sh a1, 4(a0) + ret +.LBB22_3: + lui a1, 12 + addi a1, a1, 193 + sh a1, 0(a0) + ret diff --git a/benches/try_read_from_prefix.x86-64 b/benches/try_read_from_prefix.x86-64 index d3e1edc3ea..d2107f2cb8 100644 --- a/benches/try_read_from_prefix.x86-64 +++ b/benches/try_read_from_prefix.x86-64 @@ -1,7 +1,7 @@ bench_try_read_from_prefix_static_size: mov eax, 49345 cmp rsi, 6 - jb .LBB5_2 + jb .LBB22_2 mov eax, dword ptr [rdi] movzx ecx, word ptr [rdi + 4] shl rcx, 32 @@ -12,5 +12,5 @@ bench_try_read_from_prefix_static_size: cmp eax, 49344 mov eax, 49345 cmove rax, rcx -.LBB5_2: +.LBB22_2: ret diff --git a/benches/try_read_from_prefix.x86-64.mca b/benches/try_read_from_prefix.x86-64.mca index 40401d89e8..7dcd2f4ecb 100644 --- a/benches/try_read_from_prefix.x86-64.mca +++ b/benches/try_read_from_prefix.x86-64.mca @@ -20,7 +20,7 @@ Instruction Info: [1] [2] [3] [4] [5] [6] Instructions: 1 1 0.33 mov eax, 49345 1 1 0.33 cmp rsi, 6 - 1 1 1.00 jb .LBB5_2 + 1 1 1.00 jb .LBB22_2 1 5 0.50 * mov eax, dword ptr [rdi] 1 5 0.50 * movzx ecx, word ptr [rdi + 4] 1 1 0.50 shl rcx, 32 @@ -53,7 +53,7 @@ Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - 0.65 0.01 - 0.34 - - mov eax, 49345 - - 0.01 0.33 - 0.66 - - cmp rsi, 6 - - - - - - 1.00 - - jb .LBB5_2 + - - - - - 1.00 - - jb .LBB22_2 - - - - - - - 1.00 mov eax, dword ptr [rdi] - - - - - - 1.00 - movzx ecx, word ptr [rdi + 4] - - 0.65 - - 0.35 - - shl rcx, 32 diff --git a/benches/try_read_from_suffix.cortex-m3 b/benches/try_read_from_suffix.cortex-m3 new file mode 100644 index 0000000000..1d40fe2516 --- /dev/null +++ b/benches/try_read_from_suffix.cortex-m3 @@ -0,0 +1,19 @@ +bench_try_read_from_suffix_static_size: + push {r7, lr} + mov r7, sp + cmp r2, #6 + blo .LBB23_3 + add r2, r1 + ldr r12, [r2, #-6] + movw r1, #49344 + uxth.w r3, r12 + cmp r3, r1 + bne .LBB23_3 + ldrh r1, [r2, #-2] + str.w r12, [r0] + strh r1, [r0, #4] + pop {r7, pc} +.LBB23_3: + movw r1, #49345 + strh r1, [r0] + pop {r7, pc} diff --git a/benches/try_read_from_suffix.generic-rv32 b/benches/try_read_from_suffix.generic-rv32 new file mode 100644 index 0000000000..755a99dc99 --- /dev/null +++ b/benches/try_read_from_suffix.generic-rv32 @@ -0,0 +1,33 @@ +bench_try_read_from_suffix_static_size: + li a3, 6 + bltu a2, a3, .LBB23_3 + add a1, a1, a2 + lbu a2, -5(a1) + lbu a3, -6(a1) + lbu a4, -4(a1) + lbu a5, -3(a1) + slli a2, a2, 8 + or a2, a2, a3 + slli a4, a4, 16 + slli a5, a5, 24 + or a4, a4, a5 + lui a3, 12 + or a2, a2, a4 + slli a4, a2, 16 + srli a4, a4, 16 + addi a3, a3, 192 + bne a4, a3, .LBB23_3 + lbu a3, -1(a1) + lbu a1, -2(a1) + slli a3, a3, 8 + or a1, a1, a3 + srli a3, a2, 16 + sh a2, 0(a0) + sh a3, 2(a0) + sh a1, 4(a0) + ret +.LBB23_3: + lui a1, 12 + addi a1, a1, 193 + sh a1, 0(a0) + ret diff --git a/benches/try_read_from_suffix.x86-64 b/benches/try_read_from_suffix.x86-64 index 095e326f04..6358897975 100644 --- a/benches/try_read_from_suffix.x86-64 +++ b/benches/try_read_from_suffix.x86-64 @@ -1,7 +1,7 @@ bench_try_read_from_suffix_static_size: mov eax, 49345 cmp rsi, 6 - jb .LBB5_2 + jb .LBB23_2 mov eax, dword ptr [rdi + rsi - 6] movzx ecx, word ptr [rdi + rsi - 2] shl rcx, 32 @@ -14,5 +14,5 @@ bench_try_read_from_suffix_static_size: and rcx, -65536 xor rax, 49345 or rax, rcx -.LBB5_2: +.LBB23_2: ret diff --git a/benches/try_read_from_suffix.x86-64.mca b/benches/try_read_from_suffix.x86-64.mca index d3eaadbb8a..a3daa00482 100644 --- a/benches/try_read_from_suffix.x86-64.mca +++ b/benches/try_read_from_suffix.x86-64.mca @@ -20,7 +20,7 @@ Instruction Info: [1] [2] [3] [4] [5] [6] Instructions: 1 1 0.33 mov eax, 49345 1 1 0.33 cmp rsi, 6 - 1 1 1.00 jb .LBB5_2 + 1 1 1.00 jb .LBB23_2 1 5 0.50 * mov eax, dword ptr [rdi + rsi - 6] 1 5 0.50 * movzx ecx, word ptr [rdi + rsi - 2] 1 1 0.50 shl rcx, 32 @@ -55,7 +55,7 @@ Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - 0.32 0.01 - 0.67 - - mov eax, 49345 - - 0.62 0.02 - 0.36 - - cmp rsi, 6 - - - - - - 1.00 - - jb .LBB5_2 + - - - - - 1.00 - - jb .LBB23_2 - - - - - - - 1.00 mov eax, dword ptr [rdi + rsi - 6] - - - - - - 1.00 - movzx ecx, word ptr [rdi + rsi - 2] - - 0.37 - - 0.63 - - shl rcx, 32 diff --git a/benches/try_ref_from_bytes_dynamic_padding.cortex-m3 b/benches/try_ref_from_bytes_dynamic_padding.cortex-m3 new file mode 100644 index 0000000000..c5f842907b --- /dev/null +++ b/benches/try_ref_from_bytes_dynamic_padding.cortex-m3 @@ -0,0 +1,30 @@ +bench_try_ref_from_bytes_dynamic_padding: + push {r7, lr} + mov r7, sp + lsls r2, r0, #30 + bne .LBB24_4 + movw r2, #65532 + movt r2, #32767 + ands r2, r1 + cmp r2, #9 + blo .LBB24_4 + movw r3, #43691 + subs r2, #9 + movt r3, #43690 + umull r2, r3, r2, r3 + lsrs r2, r3, #1 + add.w r3, r2, r2, lsl #1 + orr r3, r3, #3 + adds r3, #9 + cmp r1, r3 + bne .LBB24_4 + ldrh.w r12, [r0] + movw r3, #49344 + cmp r12, r3 + beq .LBB24_5 +.LBB24_4: + movs r0, #0 + mov r2, r1 +.LBB24_5: + mov r1, r2 + pop {r7, pc} diff --git a/benches/try_ref_from_bytes_dynamic_padding.generic-rv32 b/benches/try_ref_from_bytes_dynamic_padding.generic-rv32 new file mode 100644 index 0000000000..202d365cef --- /dev/null +++ b/benches/try_ref_from_bytes_dynamic_padding.generic-rv32 @@ -0,0 +1,31 @@ +bench_try_ref_from_bytes_dynamic_padding: + andi a2, a0, 3 + bnez a2, .LBB24_4 + lui a2, 524288 + addi a2, a2, -4 + and a2, a2, a1 + li a3, 9 + bltu a2, a3, .LBB24_4 + addi a2, a2, -9 + lui a3, 699051 + addi a3, a3, -1365 + mulhu a3, a2, a3 + srli a2, a3, 1 + andi a3, a3, -2 + add a3, a3, a2 + ori a3, a3, 3 + addi a3, a3, 9 + bne a1, a3, .LBB24_4 + lbu a3, 1(a0) + lbu a4, 0(a0) + slli a3, a3, 8 + or a3, a3, a4 + lui a4, 12 + addi a4, a4, 192 + beq a3, a4, .LBB24_5 +.LBB24_4: + li a0, 0 + mv a2, a1 +.LBB24_5: + mv a1, a2 + ret diff --git a/benches/try_ref_from_bytes_dynamic_padding.x86-64 b/benches/try_ref_from_bytes_dynamic_padding.x86-64 index 217c5fc617..1bc6e45334 100644 --- a/benches/try_ref_from_bytes_dynamic_padding.x86-64 +++ b/benches/try_ref_from_bytes_dynamic_padding.x86-64 @@ -1,10 +1,10 @@ bench_try_ref_from_bytes_dynamic_padding: test dil, 3 - jne .LBB5_4 + jne .LBB24_4 movabs rax, 9223372036854775804 and rax, rsi cmp rax, 9 - jb .LBB5_4 + jb .LBB24_4 add rax, -9 movabs rcx, -6148914691236517205 mul rcx @@ -13,12 +13,12 @@ bench_try_ref_from_bytes_dynamic_padding: or rax, 3 add rax, 9 cmp rsi, rax - jne .LBB5_4 + jne .LBB24_4 cmp word ptr [rdi], -16192 - je .LBB5_5 -.LBB5_4: + je .LBB24_5 +.LBB24_4: xor edi, edi mov rdx, rsi -.LBB5_5: +.LBB24_5: mov rax, rdi ret diff --git a/benches/try_ref_from_bytes_dynamic_padding.x86-64.mca b/benches/try_ref_from_bytes_dynamic_padding.x86-64.mca index 95b993c7e0..06c9f5d7ea 100644 --- a/benches/try_ref_from_bytes_dynamic_padding.x86-64.mca +++ b/benches/try_ref_from_bytes_dynamic_padding.x86-64.mca @@ -19,11 +19,11 @@ Instruction Info: [1] [2] [3] [4] [5] [6] Instructions: 1 1 0.33 test dil, 3 - 1 1 1.00 jne .LBB5_4 + 1 1 1.00 jne .LBB24_4 1 1 0.33 movabs rax, 9223372036854775804 1 1 0.33 and rax, rsi 1 1 0.33 cmp rax, 9 - 1 1 1.00 jb .LBB5_4 + 1 1 1.00 jb .LBB24_4 1 1 0.33 add rax, -9 1 1 0.33 movabs rcx, -6148914691236517205 2 4 1.00 mul rcx @@ -32,9 +32,9 @@ Instruction Info: 1 1 0.33 or rax, 3 1 1 0.33 add rax, 9 1 1 0.33 cmp rsi, rax - 1 1 1.00 jne .LBB5_4 + 1 1 1.00 jne .LBB24_4 2 6 0.50 * cmp word ptr [rdi], -16192 - 1 1 1.00 je .LBB5_5 + 1 1 1.00 je .LBB24_5 1 0 0.25 xor edi, edi 1 1 0.33 mov rdx, rsi 1 1 0.33 mov rax, rdi @@ -59,11 +59,11 @@ Resource pressure per iteration: Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - 0.48 0.51 - 0.01 - - test dil, 3 - - - - - - 1.00 - - jne .LBB5_4 + - - - - - 1.00 - - jne .LBB24_4 - - 0.51 0.49 - - - - movabs rax, 9223372036854775804 - - 0.01 0.99 - - - - and rax, rsi - - 0.51 0.49 - - - - cmp rax, 9 - - - - - - 1.00 - - jb .LBB5_4 + - - - - - 1.00 - - jb .LBB24_4 - - 0.98 - - 0.02 - - add rax, -9 - - 0.98 0.02 - - - - movabs rcx, -6148914691236517205 - - 1.00 1.00 - - - - mul rcx @@ -72,9 +72,9 @@ Resource pressure by instruction: - - - 0.51 - 0.49 - - or rax, 3 - - 0.01 0.49 - 0.50 - - add rax, 9 - - - 0.02 - 0.98 - - cmp rsi, rax - - - - - - 1.00 - - jne .LBB5_4 + - - - - - 1.00 - - jne .LBB24_4 - - 0.51 0.49 - - 0.50 0.50 cmp word ptr [rdi], -16192 - - - - - - 1.00 - - je .LBB5_5 + - - - - - 1.00 - - je .LBB24_5 - - - - - - - - xor edi, edi - - 0.50 0.50 - - - - mov rdx, rsi - - 0.50 0.48 - 0.02 - - mov rax, rdi diff --git a/benches/try_ref_from_bytes_dynamic_size.cortex-m3 b/benches/try_ref_from_bytes_dynamic_size.cortex-m3 new file mode 100644 index 0000000000..741b880164 --- /dev/null +++ b/benches/try_ref_from_bytes_dynamic_size.cortex-m3 @@ -0,0 +1,21 @@ +bench_try_ref_from_bytes_dynamic_size: + push {r7, lr} + mov r7, sp + lsls r2, r0, #31 + bne .LBB25_4 + cmp r1, #4 + blo .LBB25_4 + subs r2, r1, #4 + bic r3, r2, #1 + adds r3, #4 + cmp r1, r3 + bne .LBB25_4 + ldrh.w r12, [r0] + movw r3, #49344 + cmp r12, r3 + itt eq + lsreq r1, r2, #1 + popeq {r7, pc} +.LBB25_4: + movs r0, #0 + pop {r7, pc} diff --git a/benches/try_ref_from_bytes_dynamic_size.generic-rv32 b/benches/try_ref_from_bytes_dynamic_size.generic-rv32 new file mode 100644 index 0000000000..7388271516 --- /dev/null +++ b/benches/try_ref_from_bytes_dynamic_size.generic-rv32 @@ -0,0 +1,21 @@ +bench_try_ref_from_bytes_dynamic_size: + andi a2, a0, 1 + bnez a2, .LBB25_5 + li a2, 4 + bltu a1, a2, .LBB25_5 + addi a2, a1, -4 + andi a3, a2, -2 + addi a3, a3, 4 + bne a1, a3, .LBB25_5 + lbu a3, 1(a0) + lbu a4, 0(a0) + slli a3, a3, 8 + or a3, a3, a4 + lui a4, 12 + addi a4, a4, 192 + bne a3, a4, .LBB25_5 + srli a1, a2, 1 + ret +.LBB25_5: + li a0, 0 + ret diff --git a/benches/try_ref_from_bytes_dynamic_size.x86-64 b/benches/try_ref_from_bytes_dynamic_size.x86-64 index cf67afd31c..76b20d5ef4 100644 --- a/benches/try_ref_from_bytes_dynamic_size.x86-64 +++ b/benches/try_ref_from_bytes_dynamic_size.x86-64 @@ -5,18 +5,18 @@ bench_try_ref_from_bytes_dynamic_size: setb cl or cl, al test cl, 1 - jne .LBB5_4 + jne .LBB25_4 lea rcx, [rdx - 4] mov rsi, rcx and rsi, -2 add rsi, 4 cmp rdx, rsi - jne .LBB5_4 + jne .LBB25_4 cmp word ptr [rax], -16192 - jne .LBB5_4 + jne .LBB25_4 shr rcx mov rdx, rcx ret -.LBB5_4: +.LBB25_4: xor eax, eax ret diff --git a/benches/try_ref_from_bytes_dynamic_size.x86-64.mca b/benches/try_ref_from_bytes_dynamic_size.x86-64.mca index ecd7a18f6d..c733e27d60 100644 --- a/benches/try_ref_from_bytes_dynamic_size.x86-64.mca +++ b/benches/try_ref_from_bytes_dynamic_size.x86-64.mca @@ -24,15 +24,15 @@ Instruction Info: 1 1 0.50 setb cl 1 1 0.33 or cl, al 1 1 0.33 test cl, 1 - 1 1 1.00 jne .LBB5_4 + 1 1 1.00 jne .LBB25_4 1 1 0.50 lea rcx, [rdx - 4] 1 1 0.33 mov rsi, rcx 1 1 0.33 and rsi, -2 1 1 0.33 add rsi, 4 1 1 0.33 cmp rdx, rsi - 1 1 1.00 jne .LBB5_4 + 1 1 1.00 jne .LBB25_4 2 6 0.50 * cmp word ptr [rax], -16192 - 1 1 1.00 jne .LBB5_4 + 1 1 1.00 jne .LBB25_4 1 1 0.50 shr rcx 1 1 0.33 mov rdx, rcx 1 1 1.00 U ret @@ -63,15 +63,15 @@ Resource pressure by instruction: - - 0.99 - - 0.01 - - setb cl - - 0.01 0.99 - - - - or cl, al - - - 1.00 - - - - test cl, 1 - - - - - - 1.00 - - jne .LBB5_4 + - - - - - 1.00 - - jne .LBB25_4 - - 0.66 0.34 - - - - lea rcx, [rdx - 4] - - 0.33 0.66 - 0.01 - - mov rsi, rcx - - 1.00 - - - - - and rsi, -2 - - 0.66 0.34 - - - - add rsi, 4 - - - 1.00 - - - - cmp rdx, rsi - - - - - - 1.00 - - jne .LBB5_4 + - - - - - 1.00 - - jne .LBB25_4 - - - - - 1.00 0.50 0.50 cmp word ptr [rax], -16192 - - - - - - 1.00 - - jne .LBB5_4 + - - - - - 1.00 - - jne .LBB25_4 - - 0.67 - - 0.33 - - shr rcx - - 0.67 0.33 - - - - mov rdx, rcx - - - - - 1.00 - - ret diff --git a/benches/try_ref_from_bytes_static_size.cortex-m3 b/benches/try_ref_from_bytes_static_size.cortex-m3 new file mode 100644 index 0000000000..6dcd6d3154 --- /dev/null +++ b/benches/try_ref_from_bytes_static_size.cortex-m3 @@ -0,0 +1,16 @@ +bench_try_ref_from_bytes_static_size: + push {r7, lr} + mov r7, sp + cmp r1, #6 + it eq + andseq r1, r0, #1 + beq .LBB26_2 + movs r0, #0 + pop {r7, pc} +.LBB26_2: + ldrh r1, [r0] + movw r2, #49344 + cmp r1, r2 + it ne + movne r0, #0 + pop {r7, pc} diff --git a/benches/try_ref_from_bytes_static_size.generic-rv32 b/benches/try_ref_from_bytes_static_size.generic-rv32 new file mode 100644 index 0000000000..27457aeec0 --- /dev/null +++ b/benches/try_ref_from_bytes_static_size.generic-rv32 @@ -0,0 +1,16 @@ +bench_try_ref_from_bytes_static_size: + li a2, 6 + bne a1, a2, .LBB26_3 + andi a1, a0, 1 + bnez a1, .LBB26_3 + lbu a1, 1(a0) + lbu a2, 0(a0) + slli a1, a1, 8 + or a1, a1, a2 + lui a2, 12 + addi a2, a2, 192 + beq a1, a2, .LBB26_4 +.LBB26_3: + li a0, 0 +.LBB26_4: + ret diff --git a/benches/try_ref_from_bytes_static_size.x86-64 b/benches/try_ref_from_bytes_static_size.x86-64 index a11f27189e..7e05794f78 100644 --- a/benches/try_ref_from_bytes_static_size.x86-64 +++ b/benches/try_ref_from_bytes_static_size.x86-64 @@ -4,10 +4,10 @@ bench_try_ref_from_bytes_static_size: setne cl or cl, al test cl, 1 - jne .LBB5_2 + jne .LBB26_2 cmp word ptr [rax], -16192 - je .LBB5_3 -.LBB5_2: + je .LBB26_3 +.LBB26_2: xor eax, eax -.LBB5_3: +.LBB26_3: ret diff --git a/benches/try_ref_from_bytes_static_size.x86-64.mca b/benches/try_ref_from_bytes_static_size.x86-64.mca index e6bd20533a..fd5f2f16d8 100644 --- a/benches/try_ref_from_bytes_static_size.x86-64.mca +++ b/benches/try_ref_from_bytes_static_size.x86-64.mca @@ -23,9 +23,9 @@ Instruction Info: 1 1 0.50 setne cl 1 1 0.33 or cl, al 1 1 0.33 test cl, 1 - 1 1 1.00 jne .LBB5_2 + 1 1 1.00 jne .LBB26_2 2 6 0.50 * cmp word ptr [rax], -16192 - 1 1 1.00 je .LBB5_3 + 1 1 1.00 je .LBB26_3 1 0 0.25 xor eax, eax 1 1 1.00 U ret @@ -52,8 +52,8 @@ Resource pressure by instruction: - - 1.00 - - - - - setne cl - - 0.97 0.02 - 0.01 - - or cl, al - - 0.96 0.03 - 0.01 - - test cl, 1 - - - - - - 1.00 - - jne .LBB5_2 + - - - - - 1.00 - - jne .LBB26_2 - - 0.01 0.98 - 0.01 0.50 0.50 cmp word ptr [rax], -16192 - - - - - - 1.00 - - je .LBB5_3 + - - - - - 1.00 - - je .LBB26_3 - - - - - - - - xor eax, eax - - - - - 1.00 - - ret diff --git a/benches/try_ref_from_bytes_with_elems_dynamic_padding.cortex-m3 b/benches/try_ref_from_bytes_with_elems_dynamic_padding.cortex-m3 new file mode 100644 index 0000000000..9915b443ea --- /dev/null +++ b/benches/try_ref_from_bytes_with_elems_dynamic_padding.cortex-m3 @@ -0,0 +1,35 @@ +bench_try_ref_from_bytes_with_elems_dynamic_padding: + movs r3, #3 + umull r3, r12, r2, r3 + cmp.w r12, #0 + bne .LBB27_6 + cmn.w r3, #10 + bhi .LBB27_6 + push {r4, r6, r7, lr} + add r7, sp, #8 + mov r12, r0 + movs r0, #3 + add.w lr, r3, #9 + bics r0, r3 + add.w r3, r0, lr + cmp r1, r3 + mov.w r0, #0 + it eq + andseq r4, r12, #3 + beq .LBB27_4 + pop {r4, r6, r7, pc} +.LBB27_4: + cmp r3, lr + it lo + poplo {r4, r6, r7, pc} + ldrh.w r0, [r12] + movw r3, #49344 + cmp r0, r3 + ite eq + moveq r1, r2 + movne.w r12, #0 + mov r0, r12 + pop {r4, r6, r7, pc} +.LBB27_6: + movs r0, #0 + bx lr diff --git a/benches/try_ref_from_bytes_with_elems_dynamic_padding.generic-rv32 b/benches/try_ref_from_bytes_with_elems_dynamic_padding.generic-rv32 new file mode 100644 index 0000000000..316a9f4107 --- /dev/null +++ b/benches/try_ref_from_bytes_with_elems_dynamic_padding.generic-rv32 @@ -0,0 +1,37 @@ +bench_try_ref_from_bytes_with_elems_dynamic_padding: + li a3, 3 + mulhu a3, a2, a3 + bnez a3, .LBB27_9 + slli a3, a2, 1 + add a3, a3, a2 + li a4, -10 + bltu a4, a3, .LBB27_9 + addi a4, a3, 9 + not a3, a3 + andi a3, a3, 3 + add a5, a3, a4 + li a6, 0 + bne a1, a5, .LBB27_8 + andi a3, a0, 3 + bnez a3, .LBB27_8 + bltu a5, a4, .LBB27_8 + lbu a3, 1(a0) + lbu a4, 0(a0) + lui a5, 12 + slli a3, a3, 8 + or a4, a4, a3 + addi a5, a5, 192 + xor a3, a4, a5 + snez a3, a3 + beq a4, a5, .LBB27_7 + mv a2, a1 +.LBB27_7: + addi a3, a3, -1 + and a6, a3, a0 + mv a1, a2 +.LBB27_8: + mv a0, a6 + ret +.LBB27_9: + li a0, 0 + ret diff --git a/benches/try_ref_from_bytes_with_elems_dynamic_padding.x86-64 b/benches/try_ref_from_bytes_with_elems_dynamic_padding.x86-64 index f8a719dd10..5d05c71b87 100644 --- a/benches/try_ref_from_bytes_with_elems_dynamic_padding.x86-64 +++ b/benches/try_ref_from_bytes_with_elems_dynamic_padding.x86-64 @@ -3,10 +3,10 @@ bench_try_ref_from_bytes_with_elems_dynamic_padding: mov edx, 3 mov rax, rcx mul rdx - jo .LBB5_8 + jo .LBB27_8 mov rdx, rax cmp rax, -10 - ja .LBB5_8 + ja .LBB27_8 mov eax, edx not eax and eax, 3 @@ -14,23 +14,23 @@ bench_try_ref_from_bytes_with_elems_dynamic_padding: add r8, 9 xor eax, eax cmp rsi, r8 - jne .LBB5_6 + jne .LBB27_6 mov r9d, edi and r9d, 3 - jne .LBB5_6 + jne .LBB27_6 add rdx, 9 cmp r8, rdx - jb .LBB5_6 + jb .LBB27_6 movzx edx, word ptr [rdi] cmp dx, -16192 cmove rsi, rcx xor eax, eax cmp edx, 49344 cmove rax, rdi -.LBB5_6: +.LBB27_6: mov rdx, rsi ret -.LBB5_8: +.LBB27_8: xor eax, eax mov rdx, rsi ret diff --git a/benches/try_ref_from_bytes_with_elems_dynamic_padding.x86-64.mca b/benches/try_ref_from_bytes_with_elems_dynamic_padding.x86-64.mca index bc48088077..efc7d340bf 100644 --- a/benches/try_ref_from_bytes_with_elems_dynamic_padding.x86-64.mca +++ b/benches/try_ref_from_bytes_with_elems_dynamic_padding.x86-64.mca @@ -22,10 +22,10 @@ Instruction Info: 1 1 0.33 mov edx, 3 1 1 0.33 mov rax, rcx 2 4 1.00 mul rdx - 1 1 1.00 jo .LBB5_8 + 1 1 1.00 jo .LBB27_8 1 1 0.33 mov rdx, rax 1 1 0.33 cmp rax, -10 - 1 1 1.00 ja .LBB5_8 + 1 1 1.00 ja .LBB27_8 1 1 0.33 mov eax, edx 1 1 0.33 not eax 1 1 0.33 and eax, 3 @@ -33,13 +33,13 @@ Instruction Info: 1 1 0.33 add r8, 9 1 0 0.25 xor eax, eax 1 1 0.33 cmp rsi, r8 - 1 1 1.00 jne .LBB5_6 + 1 1 1.00 jne .LBB27_6 1 1 0.33 mov r9d, edi 1 1 0.33 and r9d, 3 - 1 1 1.00 jne .LBB5_6 + 1 1 1.00 jne .LBB27_6 1 1 0.33 add rdx, 9 1 1 0.33 cmp r8, rdx - 1 1 1.00 jb .LBB5_6 + 1 1 1.00 jb .LBB27_6 1 5 0.50 * movzx edx, word ptr [rdi] 1 1 0.33 cmp dx, -16192 2 2 0.67 cmove rsi, rcx @@ -74,10 +74,10 @@ Resource pressure by instruction: - - 0.74 0.03 - 0.23 - - mov edx, 3 - - 0.60 0.40 - - - - mov rax, rcx - - 1.00 1.00 - - - - mul rdx - - - - - - 1.00 - - jo .LBB5_8 + - - - - - 1.00 - - jo .LBB27_8 - - 0.11 0.89 - - - - mov rdx, rax - - 0.99 0.01 - - - - cmp rax, -10 - - - - - - 1.00 - - ja .LBB5_8 + - - - - - 1.00 - - ja .LBB27_8 - - 0.01 0.89 - 0.10 - - mov eax, edx - - 0.11 0.89 - - - - not eax - - 0.01 0.88 - 0.11 - - and eax, 3 @@ -85,13 +85,13 @@ Resource pressure by instruction: - - 0.01 0.99 - - - - add r8, 9 - - - - - - - - xor eax, eax - - - 0.99 - 0.01 - - cmp rsi, r8 - - - - - - 1.00 - - jne .LBB5_6 + - - - - - 1.00 - - jne .LBB27_6 - - 0.42 - - 0.58 - - mov r9d, edi - - 0.53 0.01 - 0.46 - - and r9d, 3 - - - - - - 1.00 - - jne .LBB5_6 + - - - - - 1.00 - - jne .LBB27_6 - - 0.99 0.01 - - - - add rdx, 9 - - 0.99 0.01 - - - - cmp r8, rdx - - - - - - 1.00 - - jb .LBB5_6 + - - - - - 1.00 - - jb .LBB27_6 - - - - - - 0.50 0.50 movzx edx, word ptr [rdi] - - 0.45 0.01 - 0.54 - - cmp dx, -16192 - - 1.00 0.35 - 0.65 - - cmove rsi, rcx diff --git a/benches/try_ref_from_bytes_with_elems_dynamic_size.cortex-m3 b/benches/try_ref_from_bytes_with_elems_dynamic_size.cortex-m3 new file mode 100644 index 0000000000..4b6435b05b --- /dev/null +++ b/benches/try_ref_from_bytes_with_elems_dynamic_size.cortex-m3 @@ -0,0 +1,23 @@ +bench_try_ref_from_bytes_with_elems_dynamic_size: + push {r7, lr} + mov r7, sp + movw r3, #65533 + movt r3, #32767 + cmp r2, r3 + bhi .LBB28_4 + ands r3, r0, #1 + bne .LBB28_4 + movs r3, #4 + add.w r3, r3, r2, lsl #1 + cmp r1, r3 + bne .LBB28_4 + ldrh.w r12, [r0] + movw r3, #49344 + cmp r12, r3 + beq .LBB28_5 +.LBB28_4: + movs r0, #0 + mov r2, r1 +.LBB28_5: + mov r1, r2 + pop {r7, pc} diff --git a/benches/try_ref_from_bytes_with_elems_dynamic_size.generic-rv32 b/benches/try_ref_from_bytes_with_elems_dynamic_size.generic-rv32 new file mode 100644 index 0000000000..39fc0824aa --- /dev/null +++ b/benches/try_ref_from_bytes_with_elems_dynamic_size.generic-rv32 @@ -0,0 +1,22 @@ +bench_try_ref_from_bytes_with_elems_dynamic_size: + lui a3, 524288 + addi a3, a3, -3 + bltu a3, a2, .LBB28_4 + andi a3, a0, 1 + bnez a3, .LBB28_4 + slli a3, a2, 1 + addi a3, a3, 4 + bne a1, a3, .LBB28_4 + lbu a3, 1(a0) + lbu a4, 0(a0) + slli a3, a3, 8 + or a3, a3, a4 + lui a4, 12 + addi a4, a4, 192 + beq a3, a4, .LBB28_5 +.LBB28_4: + li a0, 0 + mv a2, a1 +.LBB28_5: + mv a1, a2 + ret diff --git a/benches/try_ref_from_bytes_with_elems_dynamic_size.x86-64 b/benches/try_ref_from_bytes_with_elems_dynamic_size.x86-64 index 791351a659..0cba65b7b4 100644 --- a/benches/try_ref_from_bytes_with_elems_dynamic_size.x86-64 +++ b/benches/try_ref_from_bytes_with_elems_dynamic_size.x86-64 @@ -5,14 +5,14 @@ bench_try_ref_from_bytes_with_elems_dynamic_size: mov rax, rdi or dil, cl test dil, 1 - jne .LBB5_3 + jne .LBB28_3 lea rcx, [2*rdx + 4] cmp rsi, rcx - jne .LBB5_3 + jne .LBB28_3 cmp word ptr [rax], -16192 - je .LBB5_4 -.LBB5_3: + je .LBB28_4 +.LBB28_3: xor eax, eax mov rdx, rsi -.LBB5_4: +.LBB28_4: ret diff --git a/benches/try_ref_from_bytes_with_elems_dynamic_size.x86-64.mca b/benches/try_ref_from_bytes_with_elems_dynamic_size.x86-64.mca index 76a7caaecf..ba3c8bf39d 100644 --- a/benches/try_ref_from_bytes_with_elems_dynamic_size.x86-64.mca +++ b/benches/try_ref_from_bytes_with_elems_dynamic_size.x86-64.mca @@ -24,12 +24,12 @@ Instruction Info: 1 1 0.33 mov rax, rdi 1 1 0.33 or dil, cl 1 1 0.33 test dil, 1 - 1 1 1.00 jne .LBB5_3 + 1 1 1.00 jne .LBB28_3 1 1 0.50 lea rcx, [2*rdx + 4] 1 1 0.33 cmp rsi, rcx - 1 1 1.00 jne .LBB5_3 + 1 1 1.00 jne .LBB28_3 2 6 0.50 * cmp word ptr [rax], -16192 - 1 1 1.00 je .LBB5_4 + 1 1 1.00 je .LBB28_4 1 0 0.25 xor eax, eax 1 1 0.33 mov rdx, rsi 1 1 1.00 U ret @@ -58,12 +58,12 @@ Resource pressure by instruction: - - 0.01 0.99 - - - - mov rax, rdi - - 1.00 - - - - - or dil, cl - - 0.99 0.01 - - - - test dil, 1 - - - - - - 1.00 - - jne .LBB5_3 + - - - - - 1.00 - - jne .LBB28_3 - - 0.01 0.99 - - - - lea rcx, [2*rdx + 4] - - 0.02 0.49 - 0.49 - - cmp rsi, rcx - - - - - - 1.00 - - jne .LBB5_3 + - - - - - 1.00 - - jne .LBB28_3 - - - 0.51 - 0.49 0.50 0.50 cmp word ptr [rax], -16192 - - - - - - 1.00 - - je .LBB5_4 + - - - - - 1.00 - - je .LBB28_4 - - - - - - - - xor eax, eax - - 0.49 0.51 - - - - mov rdx, rsi - - - - - 1.00 - - ret diff --git a/benches/try_ref_from_prefix_dynamic_padding.cortex-m3 b/benches/try_ref_from_prefix_dynamic_padding.cortex-m3 new file mode 100644 index 0000000000..86cd9aea63 --- /dev/null +++ b/benches/try_ref_from_prefix_dynamic_padding.cortex-m3 @@ -0,0 +1,30 @@ +bench_try_ref_from_prefix_dynamic_padding: + push {r7, lr} + mov r7, sp + lsls r2, r0, #30 + beq .LBB29_2 + movs r1, #0 + movs r0, #0 + pop {r7, pc} +.LBB29_2: + mov r2, r0 + movw r0, #65532 + movt r0, #32767 + ands r1, r0 + cmp r1, #9 + mov.w r0, #0 + itt lo + movlo r1, #1 + poplo {r7, pc} + movw r3, #43691 + subs r1, #9 + movt r3, #43690 + umull r1, r12, r1, r3 + ldrh r1, [r2] + movw r3, #49344 + cmp r1, r3 + mov.w r1, #2 + itt eq + lsreq.w r1, r12, #1 + moveq r0, r2 + pop {r7, pc} diff --git a/benches/try_ref_from_prefix_dynamic_padding.generic-rv32 b/benches/try_ref_from_prefix_dynamic_padding.generic-rv32 new file mode 100644 index 0000000000..783cc125ff --- /dev/null +++ b/benches/try_ref_from_prefix_dynamic_padding.generic-rv32 @@ -0,0 +1,38 @@ +bench_try_ref_from_prefix_dynamic_padding: + andi a2, a0, 3 + beqz a2, .LBB29_2 + li a1, 0 + li a0, 0 + ret +.LBB29_2: + lui a2, 524288 + addi a2, a2, -4 + and a1, a1, a2 + li a2, 9 + bgeu a1, a2, .LBB29_4 + li a0, 0 + li a1, 1 + ret +.LBB29_4: + lbu a2, 1(a0) + lbu a3, 0(a0) + lui a4, 12 + slli a2, a2, 8 + or a3, a3, a2 + addi a4, a4, 192 + xor a2, a3, a4 + snez a2, a2 + beq a3, a4, .LBB29_6 + li a1, 2 + addi a2, a2, -1 + and a0, a0, a2 + ret +.LBB29_6: + addi a1, a1, -9 + lui a3, 699051 + addi a3, a3, -1365 + mulhu a1, a1, a3 + srli a1, a1, 1 + addi a2, a2, -1 + and a0, a0, a2 + ret diff --git a/benches/try_ref_from_prefix_dynamic_padding.x86-64 b/benches/try_ref_from_prefix_dynamic_padding.x86-64 index d832cb7ecf..2f4aa5a636 100644 --- a/benches/try_ref_from_prefix_dynamic_padding.x86-64 +++ b/benches/try_ref_from_prefix_dynamic_padding.x86-64 @@ -2,17 +2,17 @@ bench_try_ref_from_prefix_dynamic_padding: xor edx, edx mov eax, 0 test dil, 3 - je .LBB5_1 + je .LBB29_1 ret -.LBB5_1: +.LBB29_1: movabs rax, 9223372036854775804 and rsi, rax cmp rsi, 9 - jae .LBB5_3 + jae .LBB29_3 mov edx, 1 xor eax, eax ret -.LBB5_3: +.LBB29_3: add rsi, -9 movabs rcx, -6148914691236517205 mov rax, rsi diff --git a/benches/try_ref_from_prefix_dynamic_padding.x86-64.mca b/benches/try_ref_from_prefix_dynamic_padding.x86-64.mca index 482112a39b..26172507d4 100644 --- a/benches/try_ref_from_prefix_dynamic_padding.x86-64.mca +++ b/benches/try_ref_from_prefix_dynamic_padding.x86-64.mca @@ -21,12 +21,12 @@ Instruction Info: 1 0 0.25 xor edx, edx 1 1 0.33 mov eax, 0 1 1 0.33 test dil, 3 - 1 1 1.00 je .LBB5_1 + 1 1 1.00 je .LBB29_1 1 1 1.00 U ret 1 1 0.33 movabs rax, 9223372036854775804 1 1 0.33 and rsi, rax 1 1 0.33 cmp rsi, 9 - 1 1 1.00 jae .LBB5_3 + 1 1 1.00 jae .LBB29_3 1 1 0.33 mov edx, 1 1 0 0.25 xor eax, eax 1 1 1.00 U ret @@ -66,12 +66,12 @@ Resource pressure by instruction: - - - - - - - - xor edx, edx - - 0.32 0.34 - 0.34 - - mov eax, 0 - - 0.34 0.33 - 0.33 - - test dil, 3 - - - - - - 1.00 - - je .LBB5_1 + - - - - - 1.00 - - je .LBB29_1 - - - - - 1.00 - - ret - - 0.35 0.65 - - - - movabs rax, 9223372036854775804 - - 0.96 0.03 - 0.01 - - and rsi, rax - - 0.01 0.97 - 0.02 - - cmp rsi, 9 - - - - - - 1.00 - - jae .LBB5_3 + - - - - - 1.00 - - jae .LBB29_3 - - 0.67 0.01 - 0.32 - - mov edx, 1 - - - - - - - - xor eax, eax - - - - - 1.00 - - ret diff --git a/benches/try_ref_from_prefix_dynamic_size.cortex-m3 b/benches/try_ref_from_prefix_dynamic_size.cortex-m3 new file mode 100644 index 0000000000..cc7a5b0cf5 --- /dev/null +++ b/benches/try_ref_from_prefix_dynamic_size.cortex-m3 @@ -0,0 +1,24 @@ +bench_try_ref_from_prefix_dynamic_size: + push {r7, lr} + mov r7, sp + mov r2, r0 + lsls r0, r0, #31 + bne .LBB30_3 + cmp r1, #4 + mov.w r0, #0 + itt lo + movlo r1, #1 + poplo {r7, pc} + sub.w r12, r1, #4 + ldrh r1, [r2] + movw r3, #49344 + cmp r1, r3 + mov.w r1, #2 + itt eq + lsreq.w r1, r12, #1 + moveq r0, r2 + pop {r7, pc} +.LBB30_3: + movs r1, #0 + movs r0, #0 + pop {r7, pc} diff --git a/benches/try_ref_from_prefix_dynamic_size.generic-rv32 b/benches/try_ref_from_prefix_dynamic_size.generic-rv32 new file mode 100644 index 0000000000..a58e066355 --- /dev/null +++ b/benches/try_ref_from_prefix_dynamic_size.generic-rv32 @@ -0,0 +1,32 @@ +bench_try_ref_from_prefix_dynamic_size: + andi a2, a0, 1 + bnez a2, .LBB30_3 + li a2, 4 + bgeu a1, a2, .LBB30_4 + li a0, 0 + li a1, 1 + ret +.LBB30_3: + li a1, 0 + li a0, 0 + ret +.LBB30_4: + lbu a2, 1(a0) + lbu a3, 0(a0) + lui a4, 12 + slli a2, a2, 8 + or a3, a3, a2 + addi a4, a4, 192 + xor a2, a3, a4 + snez a2, a2 + beq a3, a4, .LBB30_6 + li a1, 2 + addi a2, a2, -1 + and a0, a0, a2 + ret +.LBB30_6: + addi a1, a1, -4 + srli a1, a1, 1 + addi a2, a2, -1 + and a0, a0, a2 + ret diff --git a/benches/try_ref_from_prefix_dynamic_size.x86-64 b/benches/try_ref_from_prefix_dynamic_size.x86-64 index be7f34b9f8..66dbed41e8 100644 --- a/benches/try_ref_from_prefix_dynamic_size.x86-64 +++ b/benches/try_ref_from_prefix_dynamic_size.x86-64 @@ -2,13 +2,13 @@ bench_try_ref_from_prefix_dynamic_size: xor edx, edx mov eax, 0 test dil, 1 - jne .LBB5_4 + jne .LBB30_4 cmp rsi, 4 - jae .LBB5_3 + jae .LBB30_3 mov edx, 1 xor eax, eax ret -.LBB5_3: +.LBB30_3: add rsi, -4 shr rsi movzx ecx, word ptr [rdi] @@ -18,5 +18,5 @@ bench_try_ref_from_prefix_dynamic_size: xor eax, eax cmp cx, -16192 cmove rax, rdi -.LBB5_4: +.LBB30_4: ret diff --git a/benches/try_ref_from_prefix_dynamic_size.x86-64.mca b/benches/try_ref_from_prefix_dynamic_size.x86-64.mca index 11706defe1..180c7a825a 100644 --- a/benches/try_ref_from_prefix_dynamic_size.x86-64.mca +++ b/benches/try_ref_from_prefix_dynamic_size.x86-64.mca @@ -21,9 +21,9 @@ Instruction Info: 1 0 0.25 xor edx, edx 1 1 0.33 mov eax, 0 1 1 0.33 test dil, 1 - 1 1 1.00 jne .LBB5_4 + 1 1 1.00 jne .LBB30_4 1 1 0.33 cmp rsi, 4 - 1 1 1.00 jae .LBB5_3 + 1 1 1.00 jae .LBB30_3 1 1 0.33 mov edx, 1 1 0 0.25 xor eax, eax 1 1 1.00 U ret @@ -59,9 +59,9 @@ Resource pressure by instruction: - - - - - - - - xor edx, edx - - 0.30 0.37 - 0.33 - - mov eax, 0 - - 0.35 0.32 - 0.33 - - test dil, 1 - - - - - - 1.00 - - jne .LBB5_4 + - - - - - 1.00 - - jne .LBB30_4 - - 0.32 0.33 - 0.35 - - cmp rsi, 4 - - - - - - 1.00 - - jae .LBB5_3 + - - - - - 1.00 - - jae .LBB30_3 - - 0.33 0.35 - 0.32 - - mov edx, 1 - - - - - - - - xor eax, eax - - - - - 1.00 - - ret diff --git a/benches/try_ref_from_prefix_static_size.cortex-m3 b/benches/try_ref_from_prefix_static_size.cortex-m3 new file mode 100644 index 0000000000..ddcbc9ef8d --- /dev/null +++ b/benches/try_ref_from_prefix_static_size.cortex-m3 @@ -0,0 +1,18 @@ +bench_try_ref_from_prefix_static_size: + push {r7, lr} + mov r7, sp + lsls r2, r0, #31 + bne .LBB31_3 + cmp r1, #6 + blo .LBB31_3 + ldrh r1, [r0] + movw r2, #49344 + uxth r1, r1 + cmp r1, r2 + itt ne + movne r0, #2 + movne r0, #0 + pop {r7, pc} +.LBB31_3: + movs r0, #0 + pop {r7, pc} diff --git a/benches/try_ref_from_prefix_static_size.generic-rv32 b/benches/try_ref_from_prefix_static_size.generic-rv32 new file mode 100644 index 0000000000..bb1b54ba98 --- /dev/null +++ b/benches/try_ref_from_prefix_static_size.generic-rv32 @@ -0,0 +1,23 @@ +bench_try_ref_from_prefix_static_size: + andi a2, a0, 1 + bnez a2, .LBB31_6 + li a2, 6 + bltu a1, a2, .LBB31_6 + lbu a1, 1(a0) + lbu a2, 0(a0) + slli a1, a1, 8 + or a1, a1, a2 + lui a2, 12 + slli a1, a1, 16 + srli a1, a1, 16 + addi a2, a2, 192 + bne a1, a2, .LBB31_5 + bne a1, a2, .LBB31_6 +.LBB31_4: + ret +.LBB31_5: + li a0, 2 + beq a1, a2, .LBB31_4 +.LBB31_6: + li a0, 0 + ret diff --git a/benches/try_ref_from_prefix_static_size.x86-64 b/benches/try_ref_from_prefix_static_size.x86-64 index 83212f776e..b8824b8776 100644 --- a/benches/try_ref_from_prefix_static_size.x86-64 +++ b/benches/try_ref_from_prefix_static_size.x86-64 @@ -3,13 +3,13 @@ bench_try_ref_from_prefix_static_size: setb al or al, dil test al, 1 - jne .LBB5_2 + jne .LBB31_2 movzx eax, word ptr [rdi] cmp eax, 49344 mov eax, 2 cmove rax, rdi - je .LBB5_3 -.LBB5_2: + je .LBB31_3 +.LBB31_2: xor eax, eax -.LBB5_3: +.LBB31_3: ret diff --git a/benches/try_ref_from_prefix_static_size.x86-64.mca b/benches/try_ref_from_prefix_static_size.x86-64.mca index 5d02b863a7..7d5dd2aaee 100644 --- a/benches/try_ref_from_prefix_static_size.x86-64.mca +++ b/benches/try_ref_from_prefix_static_size.x86-64.mca @@ -22,12 +22,12 @@ Instruction Info: 1 1 0.50 setb al 1 1 0.33 or al, dil 1 1 0.33 test al, 1 - 1 1 1.00 jne .LBB5_2 + 1 1 1.00 jne .LBB31_2 1 5 0.50 * movzx eax, word ptr [rdi] 1 1 0.33 cmp eax, 49344 1 1 0.33 mov eax, 2 2 2 0.67 cmove rax, rdi - 1 1 1.00 je .LBB5_3 + 1 1 1.00 je .LBB31_3 1 0 0.25 xor eax, eax 1 1 1.00 U ret @@ -53,11 +53,11 @@ Resource pressure by instruction: - - 1.00 - - - - - setb al - - 0.02 0.66 - 0.32 - - or al, dil - - 0.03 0.65 - 0.32 - - test al, 1 - - - - - - 1.00 - - jne .LBB5_2 + - - - - - 1.00 - - jne .LBB31_2 - - - - - - 0.50 0.50 movzx eax, word ptr [rdi] - - 0.92 0.07 - 0.01 - - cmp eax, 49344 - - 0.37 0.63 - - - - mov eax, 2 - - 0.97 1.00 - 0.03 - - cmove rax, rdi - - - - - - 1.00 - - je .LBB5_3 + - - - - - 1.00 - - je .LBB31_3 - - - - - - - - xor eax, eax - - - - - 1.00 - - ret diff --git a/benches/try_ref_from_prefix_with_elems_dynamic_padding.cortex-m3 b/benches/try_ref_from_prefix_with_elems_dynamic_padding.cortex-m3 new file mode 100644 index 0000000000..c23625f7fe --- /dev/null +++ b/benches/try_ref_from_prefix_with_elems_dynamic_padding.cortex-m3 @@ -0,0 +1,39 @@ +bench_try_ref_from_prefix_with_elems_dynamic_padding: + push {r7, lr} + mov r7, sp + movs r3, #3 + mov lr, r2 + umull r3, r2, r2, r3 + cbnz r2, .LBB32_2 + cmn.w r3, #10 + itttt ls + addls.w r12, r3, #9 + movls r2, #3 + bicls r2, r3 + addsls.w r3, r2, r12 + blo .LBB32_3 +.LBB32_2: + movs r0, #0 + movs r1, #1 + pop {r7, pc} +.LBB32_3: + lsls r2, r0, #30 + beq .LBB32_5 + movs r1, #0 + movs r0, #0 + pop {r7, pc} +.LBB32_5: + mov r12, r0 + cmp r3, r1 + mov.w r0, #0 + itt hi + movhi r1, #1 + pophi {r7, pc} + ldrh.w r1, [r12] + movw r2, #49344 + cmp r1, r2 + ite ne + movne.w lr, #2 + moveq r0, r12 + mov r1, lr + pop {r7, pc} diff --git a/benches/try_ref_from_prefix_with_elems_dynamic_padding.generic-rv32 b/benches/try_ref_from_prefix_with_elems_dynamic_padding.generic-rv32 new file mode 100644 index 0000000000..85e8f43eed --- /dev/null +++ b/benches/try_ref_from_prefix_with_elems_dynamic_padding.generic-rv32 @@ -0,0 +1,41 @@ +bench_try_ref_from_prefix_with_elems_dynamic_padding: + mv a3, a1 + li a1, 3 + mulhu a4, a2, a1 + li a1, 1 + bnez a4, .LBB32_3 + slli a4, a2, 1 + add a4, a4, a2 + li a5, -10 + bltu a5, a4, .LBB32_3 + ori a5, a4, 3 + not a4, a4 + andi a4, a4, 3 + addi a5, a5, 9 + bgeu a5, a4, .LBB32_4 +.LBB32_3: + li a0, 0 + ret +.LBB32_4: + andi a4, a0, 3 + beqz a4, .LBB32_6 + li a1, 0 + li a0, 0 + ret +.LBB32_6: + bltu a3, a5, .LBB32_3 + lbu a1, 1(a0) + lbu a3, 0(a0) + lui a4, 12 + slli a1, a1, 8 + or a3, a3, a1 + addi a4, a4, 192 + xor a1, a3, a4 + snez a1, a1 + beq a3, a4, .LBB32_9 + li a2, 2 +.LBB32_9: + addi a1, a1, -1 + and a0, a0, a1 + mv a1, a2 + ret diff --git a/benches/try_ref_from_prefix_with_elems_dynamic_padding.x86-64 b/benches/try_ref_from_prefix_with_elems_dynamic_padding.x86-64 index d7b2ca9ce2..c6479759a0 100644 --- a/benches/try_ref_from_prefix_with_elems_dynamic_padding.x86-64 +++ b/benches/try_ref_from_prefix_with_elems_dynamic_padding.x86-64 @@ -3,28 +3,28 @@ bench_try_ref_from_prefix_with_elems_dynamic_padding: mov edx, 3 mov rax, rcx mul rdx - jo .LBB5_1 + jo .LBB32_1 cmp rax, -10 - ja .LBB5_1 + ja .LBB32_1 lea rdx, [rax + 9] not eax and eax, 3 add rax, rdx - jae .LBB5_4 -.LBB5_1: + jae .LBB32_4 +.LBB32_1: xor eax, eax mov edx, 1 ret -.LBB5_4: +.LBB32_4: mov r8, rax xor edx, edx mov eax, 0 test dil, 3 - je .LBB5_5 + je .LBB32_5 ret -.LBB5_5: +.LBB32_5: cmp r8, rsi - ja .LBB5_1 + ja .LBB32_1 movzx esi, word ptr [rdi] cmp si, -16192 mov edx, 2 diff --git a/benches/try_ref_from_prefix_with_elems_dynamic_padding.x86-64.mca b/benches/try_ref_from_prefix_with_elems_dynamic_padding.x86-64.mca index 9df1d29761..a3bad49c98 100644 --- a/benches/try_ref_from_prefix_with_elems_dynamic_padding.x86-64.mca +++ b/benches/try_ref_from_prefix_with_elems_dynamic_padding.x86-64.mca @@ -22,14 +22,14 @@ Instruction Info: 1 1 0.33 mov edx, 3 1 1 0.33 mov rax, rcx 2 4 1.00 mul rdx - 1 1 1.00 jo .LBB5_1 + 1 1 1.00 jo .LBB32_1 1 1 0.33 cmp rax, -10 - 1 1 1.00 ja .LBB5_1 + 1 1 1.00 ja .LBB32_1 1 1 0.50 lea rdx, [rax + 9] 1 1 0.33 not eax 1 1 0.33 and eax, 3 1 1 0.33 add rax, rdx - 1 1 1.00 jae .LBB5_4 + 1 1 1.00 jae .LBB32_4 1 0 0.25 xor eax, eax 1 1 0.33 mov edx, 1 1 1 1.00 U ret @@ -37,10 +37,10 @@ Instruction Info: 1 0 0.25 xor edx, edx 1 1 0.33 mov eax, 0 1 1 0.33 test dil, 3 - 1 1 1.00 je .LBB5_5 + 1 1 1.00 je .LBB32_5 1 1 1.00 U ret 1 1 0.33 cmp r8, rsi - 1 1 1.00 ja .LBB5_1 + 1 1 1.00 ja .LBB32_1 1 5 0.50 * movzx esi, word ptr [rdi] 1 1 0.33 cmp si, -16192 1 1 0.33 mov edx, 2 @@ -72,14 +72,14 @@ Resource pressure by instruction: - - 0.01 0.99 - - - - mov edx, 3 - - 0.99 0.01 - - - - mov rax, rcx - - 1.00 1.00 - - - - mul rdx - - - - - - 1.00 - - jo .LBB5_1 + - - - - - 1.00 - - jo .LBB32_1 - - 1.00 - - - - - cmp rax, -10 - - - - - - 1.00 - - ja .LBB5_1 + - - - - - 1.00 - - ja .LBB32_1 - - - 1.00 - - - - lea rdx, [rax + 9] - - 1.00 - - - - - not eax - - 0.99 0.01 - - - - and eax, 3 - - 0.99 0.01 - - - - add rax, rdx - - - - - - 1.00 - - jae .LBB5_4 + - - - - - 1.00 - - jae .LBB32_4 - - - - - - - - xor eax, eax - - - 0.98 - 0.02 - - mov edx, 1 - - - - - 1.00 - - ret @@ -87,10 +87,10 @@ Resource pressure by instruction: - - - - - - - - xor edx, edx - - 0.02 0.49 - 0.49 - - mov eax, 0 - - - 0.49 - 0.51 - - test dil, 3 - - - - - - 1.00 - - je .LBB5_5 + - - - - - 1.00 - - je .LBB32_5 - - - - - 1.00 - - ret - - 0.98 0.02 - - - - cmp r8, rsi - - - - - - 1.00 - - ja .LBB5_1 + - - - - - 1.00 - - ja .LBB32_1 - - - - - - 0.50 0.50 movzx esi, word ptr [rdi] - - 0.02 0.98 - - - - cmp si, -16192 - - 0.98 0.02 - - - - mov edx, 2 diff --git a/benches/try_ref_from_prefix_with_elems_dynamic_size.cortex-m3 b/benches/try_ref_from_prefix_with_elems_dynamic_size.cortex-m3 new file mode 100644 index 0000000000..c03b4021d5 --- /dev/null +++ b/benches/try_ref_from_prefix_with_elems_dynamic_size.cortex-m3 @@ -0,0 +1,33 @@ +bench_try_ref_from_prefix_with_elems_dynamic_size: + push {r7, lr} + mov r7, sp + movw r3, #65533 + movt r3, #32767 + cmp r2, r3 + bhi .LBB33_5 + lsls r3, r0, #31 + bne .LBB33_4 + mov r3, r0 + adds r0, r2, #2 + lsls r0, r0, #1 + cmp r0, r1 + mov.w r0, #0 + itt hi + movhi r1, #1 + pophi {r7, pc} + ldrh.w r12, [r3] + movw r1, #49344 + cmp r12, r1 + ite ne + movne r2, #2 + moveq r0, r3 + mov r1, r2 + pop {r7, pc} +.LBB33_4: + movs r1, #0 + movs r0, #0 + pop {r7, pc} +.LBB33_5: + movs r0, #0 + movs r1, #1 + pop {r7, pc} diff --git a/benches/try_ref_from_prefix_with_elems_dynamic_size.generic-rv32 b/benches/try_ref_from_prefix_with_elems_dynamic_size.generic-rv32 new file mode 100644 index 0000000000..a72216499d --- /dev/null +++ b/benches/try_ref_from_prefix_with_elems_dynamic_size.generic-rv32 @@ -0,0 +1,34 @@ +bench_try_ref_from_prefix_with_elems_dynamic_size: + mv a3, a1 + lui a1, 524288 + addi a4, a1, -3 + li a1, 1 + bltu a4, a2, .LBB33_3 + andi a4, a0, 1 + bnez a4, .LBB33_4 + slli a4, a2, 1 + addi a4, a4, 4 + bgeu a3, a4, .LBB33_5 +.LBB33_3: + li a0, 0 + ret +.LBB33_4: + li a1, 0 + li a0, 0 + ret +.LBB33_5: + lbu a1, 1(a0) + lbu a3, 0(a0) + lui a4, 12 + slli a1, a1, 8 + or a3, a3, a1 + addi a4, a4, 192 + xor a1, a3, a4 + snez a1, a1 + beq a3, a4, .LBB33_7 + li a2, 2 +.LBB33_7: + addi a1, a1, -1 + and a0, a0, a1 + mv a1, a2 + ret diff --git a/benches/try_ref_from_prefix_with_elems_dynamic_size.x86-64 b/benches/try_ref_from_prefix_with_elems_dynamic_size.x86-64 index b659b67b58..dc7084e56b 100644 --- a/benches/try_ref_from_prefix_with_elems_dynamic_size.x86-64 +++ b/benches/try_ref_from_prefix_with_elems_dynamic_size.x86-64 @@ -1,20 +1,20 @@ bench_try_ref_from_prefix_with_elems_dynamic_size: movabs rax, 9223372036854775805 cmp rdx, rax - ja .LBB5_1 + ja .LBB33_1 mov rcx, rdx xor edx, edx mov eax, 0 test dil, 1 - jne .LBB5_5 + jne .LBB33_5 lea rax, [2*rcx + 4] cmp rax, rsi - jbe .LBB5_4 -.LBB5_1: + jbe .LBB33_4 +.LBB33_1: xor eax, eax mov edx, 1 ret -.LBB5_4: +.LBB33_4: movzx esi, word ptr [rdi] cmp si, -16192 mov edx, 2 @@ -22,5 +22,5 @@ bench_try_ref_from_prefix_with_elems_dynamic_size: xor eax, eax cmp esi, 49344 cmove rax, rdi -.LBB5_5: +.LBB33_5: ret diff --git a/benches/try_ref_from_prefix_with_elems_dynamic_size.x86-64.mca b/benches/try_ref_from_prefix_with_elems_dynamic_size.x86-64.mca index 7dc6caa16b..ce8aeace40 100644 --- a/benches/try_ref_from_prefix_with_elems_dynamic_size.x86-64.mca +++ b/benches/try_ref_from_prefix_with_elems_dynamic_size.x86-64.mca @@ -20,15 +20,15 @@ Instruction Info: [1] [2] [3] [4] [5] [6] Instructions: 1 1 0.33 movabs rax, 9223372036854775805 1 1 0.33 cmp rdx, rax - 1 1 1.00 ja .LBB5_1 + 1 1 1.00 ja .LBB33_1 1 1 0.33 mov rcx, rdx 1 0 0.25 xor edx, edx 1 1 0.33 mov eax, 0 1 1 0.33 test dil, 1 - 1 1 1.00 jne .LBB5_5 + 1 1 1.00 jne .LBB33_5 1 1 0.50 lea rax, [2*rcx + 4] 1 1 0.33 cmp rax, rsi - 1 1 1.00 jbe .LBB5_4 + 1 1 1.00 jbe .LBB33_4 1 0 0.25 xor eax, eax 1 1 0.33 mov edx, 1 1 1 1.00 U ret @@ -61,15 +61,15 @@ Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - 0.66 0.33 - 0.01 - - movabs rax, 9223372036854775805 - - 0.02 0.66 - 0.32 - - cmp rdx, rax - - - - - - 1.00 - - ja .LBB5_1 + - - - - - 1.00 - - ja .LBB33_1 - - 0.66 0.33 - 0.01 - - mov rcx, rdx - - - - - - - - xor edx, edx - - 0.33 0.01 - 0.66 - - mov eax, 0 - - 0.34 0.65 - 0.01 - - test dil, 1 - - - - - - 1.00 - - jne .LBB5_5 + - - - - - 1.00 - - jne .LBB33_5 - - 0.65 0.35 - - - - lea rax, [2*rcx + 4] - - - 1.00 - - - - cmp rax, rsi - - - - - - 1.00 - - jbe .LBB5_4 + - - - - - 1.00 - - jbe .LBB33_4 - - - - - - - - xor eax, eax - - 0.34 0.01 - 0.65 - - mov edx, 1 - - - - - 1.00 - - ret diff --git a/benches/try_ref_from_suffix_dynamic_padding.cortex-m3 b/benches/try_ref_from_suffix_dynamic_padding.cortex-m3 new file mode 100644 index 0000000000..ed492d2fac --- /dev/null +++ b/benches/try_ref_from_suffix_dynamic_padding.cortex-m3 @@ -0,0 +1,36 @@ +bench_try_ref_from_suffix_dynamic_padding: + adds r2, r1, r0 + lsls r2, r2, #30 + beq .LBB34_2 + movs r0, #0 + bx lr +.LBB34_2: + mov r12, r0 + movw r0, #65532 + movt r0, #32767 + and.w r3, r1, r0 + cmp r3, #9 + mov.w r0, #0 + bhs .LBB34_4 + bx lr +.LBB34_4: + push {r7, lr} + mov r7, sp + movw r2, #43691 + subs r3, #9 + movt r2, #43690 + umull r2, r3, r3, r2 + bic r2, r3, #1 + mov lr, r1 + add.w r2, r2, r3, lsr #1 + lsrs r1, r3, #1 + orn r3, r2, #3 + sub.w r2, lr, r2 + add r2, r12 + add r2, r3 + ldrh r12, [r2, #-8]! + movw r3, #49344 + cmp r12, r3 + it eq + moveq r0, r2 + pop {r7, pc} diff --git a/benches/try_ref_from_suffix_dynamic_padding.generic-rv32 b/benches/try_ref_from_suffix_dynamic_padding.generic-rv32 new file mode 100644 index 0000000000..67e4cf5099 --- /dev/null +++ b/benches/try_ref_from_suffix_dynamic_padding.generic-rv32 @@ -0,0 +1,36 @@ +bench_try_ref_from_suffix_dynamic_padding: + add a2, a1, a0 + andi a2, a2, 3 + bnez a2, .LBB34_2 + lui a2, 524288 + addi a2, a2, -4 + and a2, a2, a1 + li a3, 9 + bgeu a2, a3, .LBB34_3 +.LBB34_2: + li a0, 0 + ret +.LBB34_3: + addi a2, a2, -9 + lui a3, 699051 + add a0, a0, a1 + addi a1, a3, -1365 + mulhu a2, a2, a1 + srli a1, a2, 1 + andi a2, a2, -2 + add a2, a2, a1 + ori a3, a2, -4 + sub a0, a0, a2 + add a0, a0, a3 + lbu a2, -7(a0) + lbu a3, -8(a0) + lui a4, 12 + addi a0, a0, -8 + slli a2, a2, 8 + or a2, a2, a3 + addi a3, a4, 192 + xor a2, a2, a3 + snez a2, a2 + addi a2, a2, -1 + and a0, a0, a2 + ret diff --git a/benches/try_ref_from_suffix_dynamic_padding.x86-64 b/benches/try_ref_from_suffix_dynamic_padding.x86-64 index b3e9244428..7cb2dafaa1 100644 --- a/benches/try_ref_from_suffix_dynamic_padding.x86-64 +++ b/benches/try_ref_from_suffix_dynamic_padding.x86-64 @@ -1,15 +1,15 @@ bench_try_ref_from_suffix_dynamic_padding: lea eax, [rsi + rdi] test al, 3 - jne .LBB5_1 + jne .LBB34_1 movabs rax, 9223372036854775804 and rax, rsi cmp rax, 9 - jae .LBB5_3 -.LBB5_1: + jae .LBB34_3 +.LBB34_1: xor eax, eax ret -.LBB5_3: +.LBB34_3: add rax, -9 movabs rcx, -6148914691236517205 mul rcx diff --git a/benches/try_ref_from_suffix_dynamic_padding.x86-64.mca b/benches/try_ref_from_suffix_dynamic_padding.x86-64.mca index d56ae56d85..62514f2109 100644 --- a/benches/try_ref_from_suffix_dynamic_padding.x86-64.mca +++ b/benches/try_ref_from_suffix_dynamic_padding.x86-64.mca @@ -20,11 +20,11 @@ Instruction Info: [1] [2] [3] [4] [5] [6] Instructions: 1 1 0.50 lea eax, [rsi + rdi] 1 1 0.33 test al, 3 - 1 1 1.00 jne .LBB5_1 + 1 1 1.00 jne .LBB34_1 1 1 0.33 movabs rax, 9223372036854775804 1 1 0.33 and rax, rsi 1 1 0.33 cmp rax, 9 - 1 1 1.00 jae .LBB5_3 + 1 1 1.00 jae .LBB34_3 1 0 0.25 xor eax, eax 1 1 1.00 U ret 1 1 0.33 add rax, -9 @@ -62,11 +62,11 @@ Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - 0.26 0.74 - - - - lea eax, [rsi + rdi] - - 0.19 0.28 - 0.53 - - test al, 3 - - - - - - 1.00 - - jne .LBB5_1 + - - - - - 1.00 - - jne .LBB34_1 - - 0.93 0.06 - 0.01 - - movabs rax, 9223372036854775804 - - 0.81 0.14 - 0.05 - - and rax, rsi - - 0.55 0.43 - 0.02 - - cmp rax, 9 - - - - - - 1.00 - - jae .LBB5_3 + - - - - - 1.00 - - jae .LBB34_3 - - - - - - - - xor eax, eax - - - - - 1.00 - - ret - - 0.42 0.56 - 0.02 - - add rax, -9 diff --git a/benches/try_ref_from_suffix_dynamic_size.cortex-m3 b/benches/try_ref_from_suffix_dynamic_size.cortex-m3 new file mode 100644 index 0000000000..b9163c9c27 --- /dev/null +++ b/benches/try_ref_from_suffix_dynamic_size.cortex-m3 @@ -0,0 +1,25 @@ +bench_try_ref_from_suffix_dynamic_size: + push {r7, lr} + mov r7, sp + mov r3, r1 + add r1, r0 + lsls r1, r1, #31 + mov.w r2, #0 + bne .LBB35_4 + cmp r3, #4 + blo .LBB35_3 + subs r1, r3, #4 + and r3, r3, #1 + ldrh.w r12, [r0, r3] + movw r2, #49344 + lsrs r1, r1, #1 + cmp r12, r2 + mov.w r2, #0 + it eq + addeq r2, r0, r3 +.LBB35_3: + mov r0, r2 + pop {r7, pc} +.LBB35_4: + mov r0, r2 + pop {r7, pc} diff --git a/benches/try_ref_from_suffix_dynamic_size.generic-rv32 b/benches/try_ref_from_suffix_dynamic_size.generic-rv32 new file mode 100644 index 0000000000..e82d330da5 --- /dev/null +++ b/benches/try_ref_from_suffix_dynamic_size.generic-rv32 @@ -0,0 +1,28 @@ +bench_try_ref_from_suffix_dynamic_size: + mv a3, a0 + add a0, a0, a1 + andi a2, a0, 1 + li a0, 0 + bnez a2, .LBB35_4 + li a4, 4 + bltu a1, a4, .LBB35_3 + andi a0, a1, 1 + add a0, a0, a3 + lbu a2, 1(a0) + lbu a3, 0(a0) + addi a1, a1, -4 + slli a2, a2, 8 + or a3, a3, a2 + lui a4, 12 + srli a2, a1, 1 + addi a1, a4, 192 + xor a1, a1, a3 + snez a1, a1 + addi a1, a1, -1 + and a0, a0, a1 +.LBB35_3: + mv a1, a2 + ret +.LBB35_4: + mv a1, a2 + ret diff --git a/benches/try_ref_from_suffix_dynamic_size.x86-64 b/benches/try_ref_from_suffix_dynamic_size.x86-64 index d51f7817e5..96461cb126 100644 --- a/benches/try_ref_from_suffix_dynamic_size.x86-64 +++ b/benches/try_ref_from_suffix_dynamic_size.x86-64 @@ -4,10 +4,10 @@ bench_try_ref_from_suffix_dynamic_size: setb cl or cl, al test cl, 1 - je .LBB5_2 + je .LBB35_2 xor eax, eax ret -.LBB5_2: +.LBB35_2: lea rdx, [rsi - 4] shr rdx and esi, 1 diff --git a/benches/try_ref_from_suffix_dynamic_size.x86-64.mca b/benches/try_ref_from_suffix_dynamic_size.x86-64.mca index 6cf7f8e493..4b771139dd 100644 --- a/benches/try_ref_from_suffix_dynamic_size.x86-64.mca +++ b/benches/try_ref_from_suffix_dynamic_size.x86-64.mca @@ -23,7 +23,7 @@ Instruction Info: 1 1 0.50 setb cl 1 1 0.33 or cl, al 1 1 0.33 test cl, 1 - 1 1 1.00 je .LBB5_2 + 1 1 1.00 je .LBB35_2 1 0 0.25 xor eax, eax 1 1 1.00 U ret 1 1 0.50 lea rdx, [rsi - 4] @@ -58,7 +58,7 @@ Resource pressure by instruction: - - 1.00 - - - - - setb cl - - 0.01 0.99 - - - - or cl, al - - 0.01 0.07 - 0.92 - - test cl, 1 - - - - - - 1.00 - - je .LBB5_2 + - - - - - 1.00 - - je .LBB35_2 - - - - - - - - xor eax, eax - - - - - 1.00 - - ret - - 0.93 0.07 - - - - lea rdx, [rsi - 4] diff --git a/benches/try_ref_from_suffix_static_size.cortex-m3 b/benches/try_ref_from_suffix_static_size.cortex-m3 new file mode 100644 index 0000000000..06481dbb50 --- /dev/null +++ b/benches/try_ref_from_suffix_static_size.cortex-m3 @@ -0,0 +1,18 @@ +bench_try_ref_from_suffix_static_size: + push {r7, lr} + mov r7, sp + adds r2, r1, r0 + lsls r0, r2, #31 + mov.w r0, #0 + bne .LBB36_3 + cmp r1, #6 + it lo + poplo {r7, pc} + ldrh r0, [r2, #-6]! + movw r1, #49344 + cmp r0, r1 + it ne + movne r2, #0 + mov r0, r2 +.LBB36_3: + pop {r7, pc} diff --git a/benches/try_ref_from_suffix_static_size.generic-rv32 b/benches/try_ref_from_suffix_static_size.generic-rv32 new file mode 100644 index 0000000000..79eef8adca --- /dev/null +++ b/benches/try_ref_from_suffix_static_size.generic-rv32 @@ -0,0 +1,20 @@ +bench_try_ref_from_suffix_static_size: + add a2, a1, a0 + andi a3, a2, 1 + li a0, 0 + bnez a3, .LBB36_3 + li a3, 6 + bltu a1, a3, .LBB36_3 + lbu a0, -5(a2) + lbu a1, -6(a2) + addi a2, a2, -6 + slli a0, a0, 8 + or a0, a0, a1 + lui a1, 12 + addi a1, a1, 192 + xor a0, a0, a1 + snez a0, a0 + addi a0, a0, -1 + and a0, a0, a2 +.LBB36_3: + ret diff --git a/benches/try_ref_from_suffix_static_size.x86-64 b/benches/try_ref_from_suffix_static_size.x86-64 index cd39f70931..3913e2e235 100644 --- a/benches/try_ref_from_suffix_static_size.x86-64 +++ b/benches/try_ref_from_suffix_static_size.x86-64 @@ -4,10 +4,10 @@ bench_try_ref_from_suffix_static_size: setb cl or cl, al test cl, 1 - je .LBB5_2 + je .LBB36_2 xor eax, eax ret -.LBB5_2: +.LBB36_2: lea rcx, [rdi + rsi] add rcx, -6 xor eax, eax diff --git a/benches/try_ref_from_suffix_static_size.x86-64.mca b/benches/try_ref_from_suffix_static_size.x86-64.mca index 087d1e7ed9..c4a4dd47f9 100644 --- a/benches/try_ref_from_suffix_static_size.x86-64.mca +++ b/benches/try_ref_from_suffix_static_size.x86-64.mca @@ -23,7 +23,7 @@ Instruction Info: 1 1 0.50 setb cl 1 1 0.33 or cl, al 1 1 0.33 test cl, 1 - 1 1 1.00 je .LBB5_2 + 1 1 1.00 je .LBB36_2 1 0 0.25 xor eax, eax 1 1 1.00 U ret 1 1 0.50 lea rcx, [rdi + rsi] @@ -56,7 +56,7 @@ Resource pressure by instruction: - - 1.00 - - - - - setb cl - - 0.95 0.05 - - - - or cl, al - - 0.95 0.02 - 0.03 - - test cl, 1 - - - - - - 1.00 - - je .LBB5_2 + - - - - - 1.00 - - je .LBB36_2 - - - - - - - - xor eax, eax - - - - - 1.00 - - ret - - 0.04 0.96 - - - - lea rcx, [rdi + rsi] diff --git a/benches/try_ref_from_suffix_with_elems_dynamic_padding.cortex-m3 b/benches/try_ref_from_suffix_with_elems_dynamic_padding.cortex-m3 new file mode 100644 index 0000000000..0a7e5859a2 --- /dev/null +++ b/benches/try_ref_from_suffix_with_elems_dynamic_padding.cortex-m3 @@ -0,0 +1,41 @@ +bench_try_ref_from_suffix_with_elems_dynamic_padding: + push {r7, lr} + mov r7, sp + movs r3, #3 + mov lr, r2 + umull r3, r2, r2, r3 + cbnz r2, .LBB37_2 + cmn.w r3, #10 + itttt ls + addls.w r12, r3, #9 + movls r2, #3 + bicls r2, r3 + addsls.w r3, r2, r12 + blo .LBB37_3 +.LBB37_2: + movs r0, #0 + movs r1, #1 + pop {r7, pc} +.LBB37_3: + adds r2, r1, r0 + lsls r2, r2, #30 + beq .LBB37_5 + movs r1, #0 + movs r0, #0 + pop {r7, pc} +.LBB37_5: + mov r12, r0 + cmp r1, r3 + mov.w r0, #0 + itt lo + movlo r1, #1 + poplo {r7, pc} + subs r1, r1, r3 + ldrh.w r2, [r12, r1] + movw r3, #49344 + cmp r2, r3 + ite ne + movne.w lr, #2 + addeq.w r0, r12, r1 + mov r1, lr + pop {r7, pc} diff --git a/benches/try_ref_from_suffix_with_elems_dynamic_padding.generic-rv32 b/benches/try_ref_from_suffix_with_elems_dynamic_padding.generic-rv32 new file mode 100644 index 0000000000..2af4dcd16d --- /dev/null +++ b/benches/try_ref_from_suffix_with_elems_dynamic_padding.generic-rv32 @@ -0,0 +1,44 @@ +bench_try_ref_from_suffix_with_elems_dynamic_padding: + mv a3, a1 + li a1, 3 + mulhu a4, a2, a1 + li a1, 1 + bnez a4, .LBB37_3 + slli a4, a2, 1 + add a4, a4, a2 + li a5, -10 + bltu a5, a4, .LBB37_3 + ori a5, a4, 3 + not a4, a4 + andi a4, a4, 3 + addi a5, a5, 9 + bgeu a5, a4, .LBB37_4 +.LBB37_3: + li a0, 0 + ret +.LBB37_4: + add a4, a3, a0 + andi a4, a4, 3 + beqz a4, .LBB37_6 + li a1, 0 + li a0, 0 + ret +.LBB37_6: + bltu a3, a5, .LBB37_3 + sub a3, a3, a5 + add a0, a0, a3 + lbu a1, 1(a0) + lbu a3, 0(a0) + lui a4, 12 + slli a1, a1, 8 + or a3, a3, a1 + addi a4, a4, 192 + xor a1, a3, a4 + snez a1, a1 + beq a3, a4, .LBB37_9 + li a2, 2 +.LBB37_9: + addi a1, a1, -1 + and a0, a0, a1 + mv a1, a2 + ret diff --git a/benches/try_ref_from_suffix_with_elems_dynamic_padding.x86-64 b/benches/try_ref_from_suffix_with_elems_dynamic_padding.x86-64 index a8ceabe11f..2da059d17e 100644 --- a/benches/try_ref_from_suffix_with_elems_dynamic_padding.x86-64 +++ b/benches/try_ref_from_suffix_with_elems_dynamic_padding.x86-64 @@ -3,30 +3,30 @@ bench_try_ref_from_suffix_with_elems_dynamic_padding: mov edx, 3 mov rax, rcx mul rdx - jo .LBB5_1 + jo .LBB37_1 cmp rax, -10 - ja .LBB5_1 + ja .LBB37_1 lea rdx, [rax + 9] not eax and eax, 3 add rax, rdx - jae .LBB5_4 -.LBB5_1: + jae .LBB37_4 +.LBB37_1: xor r8d, r8d mov edx, 1 mov rax, r8 ret -.LBB5_4: +.LBB37_4: lea r9d, [rsi + rdi] xor edx, edx mov r8d, 0 test r9b, 3 - je .LBB5_5 + je .LBB37_5 mov rax, r8 ret -.LBB5_5: +.LBB37_5: sub rsi, rax - jb .LBB5_1 + jb .LBB37_1 lea rax, [rdi + rsi] movzx esi, word ptr [rdi + rsi] cmp si, -16192 diff --git a/benches/try_ref_from_suffix_with_elems_dynamic_padding.x86-64.mca b/benches/try_ref_from_suffix_with_elems_dynamic_padding.x86-64.mca index 4937b556fe..67d9ca07db 100644 --- a/benches/try_ref_from_suffix_with_elems_dynamic_padding.x86-64.mca +++ b/benches/try_ref_from_suffix_with_elems_dynamic_padding.x86-64.mca @@ -22,14 +22,14 @@ Instruction Info: 1 1 0.33 mov edx, 3 1 1 0.33 mov rax, rcx 2 4 1.00 mul rdx - 1 1 1.00 jo .LBB5_1 + 1 1 1.00 jo .LBB37_1 1 1 0.33 cmp rax, -10 - 1 1 1.00 ja .LBB5_1 + 1 1 1.00 ja .LBB37_1 1 1 0.50 lea rdx, [rax + 9] 1 1 0.33 not eax 1 1 0.33 and eax, 3 1 1 0.33 add rax, rdx - 1 1 1.00 jae .LBB5_4 + 1 1 1.00 jae .LBB37_4 1 0 0.25 xor r8d, r8d 1 1 0.33 mov edx, 1 1 1 0.33 mov rax, r8 @@ -38,11 +38,11 @@ Instruction Info: 1 0 0.25 xor edx, edx 1 1 0.33 mov r8d, 0 1 1 0.33 test r9b, 3 - 1 1 1.00 je .LBB5_5 + 1 1 1.00 je .LBB37_5 1 1 0.33 mov rax, r8 1 1 1.00 U ret 1 1 0.33 sub rsi, rax - 1 1 1.00 jb .LBB5_1 + 1 1 1.00 jb .LBB37_1 1 1 0.50 lea rax, [rdi + rsi] 1 5 0.50 * movzx esi, word ptr [rdi + rsi] 1 1 0.33 cmp si, -16192 @@ -76,14 +76,14 @@ Resource pressure by instruction: - - 0.66 0.18 - 0.16 - - mov edx, 3 - - 1.00 - - - - - mov rax, rcx - - 1.00 1.00 - - - - mul rdx - - - - - - 1.00 - - jo .LBB5_1 + - - - - - 1.00 - - jo .LBB37_1 - - 0.01 0.99 - - - - cmp rax, -10 - - - - - - 1.00 - - ja .LBB5_1 + - - - - - 1.00 - - ja .LBB37_1 - - 0.99 0.01 - - - - lea rdx, [rax + 9] - - 0.01 0.99 - - - - not eax - - 0.02 0.98 - - - - and eax, 3 - - 0.02 0.98 - - - - add rax, rdx - - - - - - 1.00 - - jae .LBB5_4 + - - - - - 1.00 - - jae .LBB37_4 - - - - - - - - xor r8d, r8d - - 0.66 0.01 - 0.33 - - mov edx, 1 - - 0.50 - - 0.50 - - mov rax, r8 @@ -92,11 +92,11 @@ Resource pressure by instruction: - - - - - - - - xor edx, edx - - 0.50 0.32 - 0.18 - - mov r8d, 0 - - 0.16 0.17 - 0.67 - - test r9b, 3 - - - - - - 1.00 - - je .LBB5_5 + - - - - - 1.00 - - je .LBB37_5 - - 0.33 0.33 - 0.34 - - mov rax, r8 - - - - - 1.00 - - ret - - - 0.51 - 0.49 - - sub rsi, rax - - - - - - 1.00 - - jb .LBB5_1 + - - - - - 1.00 - - jb .LBB37_1 - - 0.16 0.84 - - - - lea rax, [rdi + rsi] - - - - - - 0.50 0.50 movzx esi, word ptr [rdi + rsi] - - 0.02 0.98 - - - - cmp si, -16192 diff --git a/benches/try_ref_from_suffix_with_elems_dynamic_size.cortex-m3 b/benches/try_ref_from_suffix_with_elems_dynamic_size.cortex-m3 new file mode 100644 index 0000000000..36c099952b --- /dev/null +++ b/benches/try_ref_from_suffix_with_elems_dynamic_size.cortex-m3 @@ -0,0 +1,35 @@ +bench_try_ref_from_suffix_with_elems_dynamic_size: + movw r3, #65533 + movt r3, #32767 + cmp r2, r3 + bhi .LBB38_4 + adds r3, r1, r0 + lsls r3, r3, #31 + ittt ne + movne r1, #0 + movne r0, #0 + bxne lr + mov r12, r0 + movs r0, #4 + add.w r3, r0, r2, lsl #1 + cmp r1, r3 + mov.w r0, #0 + itt lo + movlo r1, #1 + bxlo lr + push {r7, lr} + mov r7, sp + subs r1, r1, r3 + ldrh.w lr, [r12, r1] + movw r3, #49344 + cmp lr, r3 + ite ne + movne r2, #2 + addeq.w r0, r12, r1 + pop.w {r7, lr} + mov r1, r2 + bx lr +.LBB38_4: + movs r0, #0 + movs r1, #1 + bx lr diff --git a/benches/try_ref_from_suffix_with_elems_dynamic_size.generic-rv32 b/benches/try_ref_from_suffix_with_elems_dynamic_size.generic-rv32 new file mode 100644 index 0000000000..797eb1fab0 --- /dev/null +++ b/benches/try_ref_from_suffix_with_elems_dynamic_size.generic-rv32 @@ -0,0 +1,37 @@ +bench_try_ref_from_suffix_with_elems_dynamic_size: + mv a3, a1 + lui a1, 524288 + addi a4, a1, -3 + li a1, 1 + bltu a4, a2, .LBB38_3 + add a4, a3, a0 + andi a4, a4, 1 + bnez a4, .LBB38_4 + slli a4, a2, 1 + addi a4, a4, 4 + bgeu a3, a4, .LBB38_5 +.LBB38_3: + li a0, 0 + ret +.LBB38_4: + li a1, 0 + li a0, 0 + ret +.LBB38_5: + sub a3, a3, a4 + add a0, a0, a3 + lbu a1, 1(a0) + lbu a3, 0(a0) + lui a4, 12 + slli a1, a1, 8 + or a3, a3, a1 + addi a4, a4, 192 + xor a1, a3, a4 + snez a1, a1 + beq a3, a4, .LBB38_7 + li a2, 2 +.LBB38_7: + addi a1, a1, -1 + and a0, a0, a1 + mv a1, a2 + ret diff --git a/benches/try_ref_from_suffix_with_elems_dynamic_size.x86-64 b/benches/try_ref_from_suffix_with_elems_dynamic_size.x86-64 index ff25a78945..ad7a2ec9d3 100644 --- a/benches/try_ref_from_suffix_with_elems_dynamic_size.x86-64 +++ b/benches/try_ref_from_suffix_with_elems_dynamic_size.x86-64 @@ -1,20 +1,20 @@ bench_try_ref_from_suffix_with_elems_dynamic_size: movabs rax, 9223372036854775805 cmp rdx, rax - ja .LBB5_1 + ja .LBB38_1 lea r8d, [rsi + rdi] xor ecx, ecx mov eax, 0 test r8b, 1 - jne .LBB5_5 + jne .LBB38_5 lea rax, [2*rdx + 4] sub rsi, rax - jae .LBB5_4 -.LBB5_1: + jae .LBB38_4 +.LBB38_1: xor eax, eax mov edx, 1 ret -.LBB5_4: +.LBB38_4: lea r8, [rdi + rsi] movzx esi, word ptr [rdi + rsi] cmp si, -16192 @@ -23,6 +23,6 @@ bench_try_ref_from_suffix_with_elems_dynamic_size: xor eax, eax cmp esi, 49344 cmove rax, r8 -.LBB5_5: +.LBB38_5: mov rdx, rcx ret diff --git a/benches/try_ref_from_suffix_with_elems_dynamic_size.x86-64.mca b/benches/try_ref_from_suffix_with_elems_dynamic_size.x86-64.mca index 8b6333bf34..ea6ca21b4a 100644 --- a/benches/try_ref_from_suffix_with_elems_dynamic_size.x86-64.mca +++ b/benches/try_ref_from_suffix_with_elems_dynamic_size.x86-64.mca @@ -20,15 +20,15 @@ Instruction Info: [1] [2] [3] [4] [5] [6] Instructions: 1 1 0.33 movabs rax, 9223372036854775805 1 1 0.33 cmp rdx, rax - 1 1 1.00 ja .LBB5_1 + 1 1 1.00 ja .LBB38_1 1 1 0.50 lea r8d, [rsi + rdi] 1 0 0.25 xor ecx, ecx 1 1 0.33 mov eax, 0 1 1 0.33 test r8b, 1 - 1 1 1.00 jne .LBB5_5 + 1 1 1.00 jne .LBB38_5 1 1 0.50 lea rax, [2*rdx + 4] 1 1 0.33 sub rsi, rax - 1 1 1.00 jae .LBB5_4 + 1 1 1.00 jae .LBB38_4 1 0 0.25 xor eax, eax 1 1 0.33 mov edx, 1 1 1 1.00 U ret @@ -63,15 +63,15 @@ Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: - - 0.02 0.95 - 0.03 - - movabs rax, 9223372036854775805 - - 0.93 0.04 - 0.03 - - cmp rdx, rax - - - - - - 1.00 - - ja .LBB5_1 + - - - - - 1.00 - - ja .LBB38_1 - - 0.96 0.04 - - - - lea r8d, [rsi + rdi] - - - - - - - - xor ecx, ecx - - 0.95 0.02 - 0.03 - - mov eax, 0 - - 0.95 0.05 - - - - test r8b, 1 - - - - - - 1.00 - - jne .LBB5_5 + - - - - - 1.00 - - jne .LBB38_5 - - 0.06 0.94 - - - - lea rax, [2*rdx + 4] - - 0.93 0.07 - - - - sub rsi, rax - - - - - - 1.00 - - jae .LBB5_4 + - - - - - 1.00 - - jae .LBB38_4 - - - - - - - - xor eax, eax - - 0.03 0.95 - 0.02 - - mov edx, 1 - - - - - 1.00 - - ret diff --git a/benches/try_transmute.cortex-m3 b/benches/try_transmute.cortex-m3 new file mode 100644 index 0000000000..e4764b1fbe --- /dev/null +++ b/benches/try_transmute.cortex-m3 @@ -0,0 +1,17 @@ +bench_try_transmute: + push {r7, lr} + mov r7, sp + ldrh r3, [r1] + movw r2, #49344 + cmp r3, r2 + bne .LBB39_2 + ldr.w r1, [r1, #2] + strh r2, [r0] + str.w r1, [r0, #2] + pop {r7, pc} +.LBB39_2: + movw r1, #49344 + movt r1, #65535 + adds r2, r1, #1 + strh r2, [r0] + pop {r7, pc} diff --git a/benches/try_transmute.generic-rv32 b/benches/try_transmute.generic-rv32 new file mode 100644 index 0000000000..abc1aa1b74 --- /dev/null +++ b/benches/try_transmute.generic-rv32 @@ -0,0 +1,26 @@ +bench_try_transmute: + lbu a2, 1(a1) + lbu a4, 0(a1) + lui a3, 12 + slli a2, a2, 8 + or a4, a4, a2 + addi a2, a3, 192 + bne a4, a2, .LBB39_2 + lbu a3, 2(a1) + lbu a4, 3(a1) + lbu a5, 4(a1) + lbu a1, 5(a1) + sb a3, 2(a0) + sb a4, 3(a0) + sb a5, 4(a0) + sb a1, 5(a0) + srli a1, a2, 8 + sb a2, 0(a0) + sb a1, 1(a0) + ret +.LBB39_2: + addi a2, a3, 193 + srli a1, a2, 8 + sb a2, 0(a0) + sb a1, 1(a0) + ret diff --git a/benches/try_transmute.rs b/benches/try_transmute.rs index c0de07a8d0..09c18c5a95 100644 --- a/benches/try_transmute.rs +++ b/benches/try_transmute.rs @@ -1,5 +1,4 @@ -use zerocopy::Unalign; -use zerocopy_derive::*; +use zerocopy::{Unalign, *}; #[path = "formats/coco_static_size.rs"] mod format; diff --git a/benches/try_transmute_ref_dynamic_size.cortex-m3 b/benches/try_transmute_ref_dynamic_size.cortex-m3 new file mode 100644 index 0000000000..83e5aba0a6 --- /dev/null +++ b/benches/try_transmute_ref_dynamic_size.cortex-m3 @@ -0,0 +1,10 @@ +bench_try_transmute_ref_dynamic_size: + push {r7, lr} + mov r7, sp + ldrh r2, [r0] + adds r1, #1 + movw r3, #49344 + cmp r2, r3 + it ne + movne r0, #0 + pop {r7, pc} diff --git a/benches/try_transmute_ref_dynamic_size.generic-rv32 b/benches/try_transmute_ref_dynamic_size.generic-rv32 new file mode 100644 index 0000000000..8257192ccd --- /dev/null +++ b/benches/try_transmute_ref_dynamic_size.generic-rv32 @@ -0,0 +1,10 @@ +bench_try_transmute_ref_dynamic_size: + lhu a2, 0(a0) + lui a3, 12 + addi a3, a3, 192 + xor a2, a2, a3 + snez a2, a2 + addi a1, a1, 1 + addi a2, a2, -1 + and a0, a0, a2 + ret diff --git a/benches/try_transmute_ref_dynamic_size.rs b/benches/try_transmute_ref_dynamic_size.rs index c9236e13d2..75db72226f 100644 --- a/benches/try_transmute_ref_dynamic_size.rs +++ b/benches/try_transmute_ref_dynamic_size.rs @@ -1,4 +1,4 @@ -use zerocopy_derive::*; +use zerocopy::*; #[path = "formats/coco_dynamic_size.rs"] mod format; diff --git a/benches/try_transmute_ref_static_size.cortex-m3 b/benches/try_transmute_ref_static_size.cortex-m3 new file mode 100644 index 0000000000..4ace5f6075 --- /dev/null +++ b/benches/try_transmute_ref_static_size.cortex-m3 @@ -0,0 +1,9 @@ +bench_try_transmute_ref_static_size: + push {r7, lr} + mov r7, sp + ldrh r1, [r0] + movw r2, #49344 + cmp r1, r2 + it ne + movne r0, #0 + pop {r7, pc} diff --git a/benches/try_transmute_ref_static_size.generic-rv32 b/benches/try_transmute_ref_static_size.generic-rv32 new file mode 100644 index 0000000000..85e09301aa --- /dev/null +++ b/benches/try_transmute_ref_static_size.generic-rv32 @@ -0,0 +1,9 @@ +bench_try_transmute_ref_static_size: + lhu a1, 0(a0) + lui a2, 12 + addi a2, a2, 192 + xor a1, a1, a2 + snez a1, a1 + addi a1, a1, -1 + and a0, a0, a1 + ret diff --git a/benches/try_transmute_ref_static_size.rs b/benches/try_transmute_ref_static_size.rs index 631cce2b0b..c281df93ee 100644 --- a/benches/try_transmute_ref_static_size.rs +++ b/benches/try_transmute_ref_static_size.rs @@ -1,4 +1,4 @@ -use zerocopy_derive::*; +use zerocopy::*; #[path = "formats/coco_static_size.rs"] mod format; diff --git a/ci/check_all_toolchains_tested.sh b/ci/check_all_toolchains_tested.sh index 88d4208a0e..8704bfbe1b 100755 --- a/ci/check_all_toolchains_tested.sh +++ b/ci/check_all_toolchains_tested.sh @@ -18,7 +18,7 @@ set -eo pipefail # non-zero error code, which causes this script to fail (thanks to # `set -e`). diff \ - <(cat .github/workflows/ci.yml | yq '.jobs.build_test.strategy.matrix.toolchain | .[]' | \ + <(yq -r '.jobs.build_test.strategy.matrix.toolchain | .[]' .github/workflows/ci.yml | \ sort -u | grep -v '^\(msrv\|stable\|nightly\)$') \ <(cargo metadata -q --format-version 1 | \ jq -r ".packages[] | select(.name == \"zerocopy\").metadata.\"build-rs\" | keys | .[]" | \ diff --git a/ci/check_job_dependencies.sh b/ci/check_job_dependencies.sh index a9e0d362fc..ffce2274a3 100755 --- a/ci/check_job_dependencies.sh +++ b/ci/check_job_dependencies.sh @@ -18,7 +18,7 @@ jobs=$(for i in $(find .github -iname '*.yaml' -or -iname '*.yml') # This gets the list of jobs that all-jobs-succeed does not depend on. comm -23 \ <(yq -r '.jobs | keys | .[]' "$i" | sort | uniq) \ - <(yq -r '.jobs.all-jobs-succeed.needs[]' "$i" | sort | uniq) + <(yq -r '.jobs["all-jobs-succeed"].needs[]' "$i" | sort | uniq) fi # The grep call here excludes all-jobs-succeed from the list of jobs that diff --git a/tests/codegen.rs b/tests/codegen.rs index 5c4f1086f3..47b4fd2408 100644 --- a/tests/codegen.rs +++ b/tests/codegen.rs @@ -8,7 +8,13 @@ #![cfg(__ZEROCOPY_INTERNAL_USE_ONLY_NIGHTLY_FEATURES_IN_TESTS)] -use std::{panic, path::PathBuf, process::Command, thread}; +use std::{ + panic, + path::PathBuf, + process::Command, + sync::{Arc, Mutex}, + thread, +}; enum Directive { Asm, @@ -31,31 +37,61 @@ impl Directive { } } -fn run_codegen_test(bench_name: &str, target_cpu: &str, bless: bool) { +/// A configuration for an instruction set architecture (ISA) being tested. +/// +/// We define an explicit structure to represent this because not all target ISAs +/// support `llvm-mca`. Generic CPUs (like `cortex-m3` or `generic-rv32`) lack +/// pipeline scheduling models upstream in LLVM, causing `--mca` extraction to +/// abort. Thus, `llvm-mca` generation is conditionally evaluated based upon +/// `supports_mca`. +#[derive(Clone)] +struct IsaConfig { + target_triple: Option<&'static str>, + target_cpu: &'static str, + supports_mca: bool, +} + +fn run_codegen_test(bench_name: &str, config: &IsaConfig, bless: bool) { + let target_cpu = config.target_cpu; println!("Testing {bench_name}.{target_cpu}"); - let manifest_path = env!("CARGO_MANIFEST_PATH"); - let target_dir = env!("CARGO_TARGET_DIR"); + // We isolate the `cargo-show-asm` target directories to avoid workspace + // `Cargo.lock` contention. Generating artifacts that overlap standard + // `target/...` directories triggers lock blocks when testing concurrently. + // Statically mapping the build execution uniquely to the OS temporary + // directory circumvents Cargo's recursive lock boundary logic. + let mut inner_target_dir = std::env::temp_dir(); + inner_target_dir.push(format!("codegen_benches_{}_{}", target_cpu, bench_name)); let cargo_asm = |directive: &Directive| { + let mut args = vec![ + "asm", + "--quiet", + "-p", + "codegen-benches", + "--manifest-path", + "tools/codegen-benches/Cargo.toml", + "--lib", + ]; + if let Some(triple) = config.target_triple { + args.extend(["--target", triple]); + } + args.extend(["--target-cpu", config.target_cpu, "--simplify", directive.arg()]); + let bench_name_arg = format!("bench_{bench_name}"); + args.push(&bench_name_arg); + args.push("0"); + + // The outer `cargo test` explicitly injects `CARGO_MAKEFLAGS` to + // configure POSIX pipeline jobserver logic for threaded runs. We strip + // this environment variable to prevent child `cargo-show-asm` pipelines + // from attempting to read file descriptor tokens from the active outer + // testing pipe, which causes immediate "os error 2" read failures from + // the host system. Command::new("cargo") - .args([ - "asm", - "--quiet", - "-p", - "zerocopy", - "--manifest-path", - manifest_path, - "--target-dir", - target_dir, - "--bench", - bench_name, - "--target-cpu", - target_cpu, - "--simplify", - directive.arg(), - &format!("bench_{bench_name}"), - ]) + .env_remove("CARGO_MAKEFLAGS") + .env("CARGO_PROFILE_RELEASE_PANIC", "abort") + .env("CARGO_TARGET_DIR", &inner_target_dir) + .args(&args) .output() .expect("failed to execute process") }; @@ -86,37 +122,92 @@ fn run_codegen_test(bench_name: &str, target_cpu: &str, bless: bool) { } else { let expected_result = std::fs::read(expected_file_path).unwrap_or_default(); if actual_result != expected_result { - let expected = String::from_utf8_lossy(&expected_result[..]); - panic!("Bless codegen tests with BLESS=1\nGot unexpected output:\n{}", expected); + println!("Expected output:\n{}", String::from_utf8_lossy(&expected_result)); + println!("Actual output:\n{}", String::from_utf8_lossy(&actual_result)); + panic!("Bless codegen tests with BLESS=1"); } } }; test_directive(Directive::Asm); - test_directive(Directive::Mca); + if config.supports_mca { + test_directive(Directive::Mca); + } } #[test] #[cfg_attr(miri, ignore)] fn codegen() { let bless = std::env::var("BLESS").is_ok(); - let handles: Vec<_> = std::fs::read_dir("benches") - .unwrap() - .map(|entry| entry.unwrap().path()) - .filter(|path| path.extension().is_some_and(|ext| ext == "rs")) - .map(|path| { + let isas = vec![ + IsaConfig { target_triple: None, target_cpu: "x86-64", supports_mca: true }, + IsaConfig { + target_triple: Some("thumbv7m-none-eabi"), + target_cpu: "cortex-m3", + supports_mca: false, + }, + IsaConfig { + target_triple: Some("riscv32imc-unknown-none-elf"), + target_cpu: "generic-rv32", + supports_mca: false, + }, + ]; + + let mut tasks = Vec::new(); + for entry in std::fs::read_dir("benches").unwrap() { + let path = entry.unwrap().path(); + if path.extension().is_some_and(|ext| ext == "rs") { let bench_name = path.file_stem().unwrap().to_str().unwrap().to_owned(); - thread::spawn(move || { - panic::catch_unwind(panic::AssertUnwindSafe(|| { - run_codegen_test(&bench_name, "x86-64", bless); - })) - }) - }) - .collect(); + for config in &isas { + tasks.push((bench_name.clone(), config.clone())); + } + } + } - let failed = handles.into_iter().any(|handle| handle.join().unwrap().is_err()); + // We explicitly restrict thread spawning into a bounded thread pool due to + // resource limits. Launching hundreds of native `cargo rustc` cross- + // compilation binaries simultaneously overwhelms the host operating + // system's file descriptor and memory limits, resulting in silent test + // failures or internal panics. + let max_threads = std::thread::available_parallelism().map(|n| n.get()).unwrap_or(8); + let mut failed = false; + + let tasks = Arc::new(Mutex::new(tasks.into_iter())); + let mut handles = Vec::new(); + + for _ in 0..max_threads { + let tasks = tasks.clone(); + handles.push(thread::spawn(move || { + let mut failed = false; + loop { + let task = { + let mut tasks = tasks.lock().unwrap(); + tasks.next() + }; + if let Some((bench_name, config)) = task { + let result = panic::catch_unwind(panic::AssertUnwindSafe(|| { + run_codegen_test(&bench_name, &config, bless); + })); + if result.is_err() { + failed = true; + } + } else { + break; + } + } + if failed { + panic!("codegen test failed"); + } + })); + } + + for handle in handles { + if handle.join().is_err() { + failed = true; + } + } if failed { - panic!("One or more codegen tests failed. See thread panics above for details."); + panic!("One or more codegen tests failed. See panics above for details."); } } diff --git a/tools/codegen-benches/Cargo.lock b/tools/codegen-benches/Cargo.lock new file mode 100644 index 0000000000..084529e6da --- /dev/null +++ b/tools/codegen-benches/Cargo.lock @@ -0,0 +1,61 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "codegen-benches" +version = "0.0.0" +dependencies = [ + "zerocopy", +] + +[[package]] +name = "proc-macro2" +version = "1.0.80" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a56dea16b0a29e94408b9aa5e2940a4eedbd128a1ba20e8f7ae60fd3d465af0e" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "syn" +version = "2.0.56" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e2415488199887523e74fd9a5f7be804dfd42d868ae0eca382e3917094d210e" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" + +[[package]] +name = "zerocopy" +version = "0.8.42" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.42" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] diff --git a/tools/codegen-benches/Cargo.toml b/tools/codegen-benches/Cargo.toml new file mode 100644 index 0000000000..01f94065fd --- /dev/null +++ b/tools/codegen-benches/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "codegen-benches" +version = "0.0.0" +edition = "2021" +publish = false + +[dependencies] +zerocopy = { path = "../../", features = ["derive"] } + +[lib] +name = "codegen_benches" +path = "src/lib.rs" + +[workspace] +[profile.dev] +panic = "abort" +[profile.release] +panic = "abort" diff --git a/tools/codegen-benches/build.rs b/tools/codegen-benches/build.rs new file mode 100644 index 0000000000..6d45b5a132 --- /dev/null +++ b/tools/codegen-benches/build.rs @@ -0,0 +1,31 @@ +use std::{env, fs, path::PathBuf}; + +fn main() { + let out_dir = env::var("OUT_DIR").unwrap(); + let manifest_dir = env::var("CARGO_MANIFEST_DIR").unwrap(); + let dest_path = PathBuf::from(out_dir).join("benches.rs"); + + let benches_dir = PathBuf::from(&manifest_dir).join("../../benches"); + let mut out = String::new(); + + let mut entries: Vec<_> = fs::read_dir(&benches_dir) + .unwrap() + .map(|res| res.unwrap()) + .filter(|e| e.path().extension().is_some_and(|ext| ext == "rs")) + .collect(); + entries.sort_by_key(|e| e.path()); + + for entry in entries { + let path = entry.path(); + let name = path.file_stem().unwrap().to_str().unwrap(); + if name != "formats" { + let abs_path = benches_dir.join(format!("{}.rs", name)); + let path_str = abs_path.to_str().unwrap().replace("\\", "/"); + out.push_str(&format!("#[path = \"{}\"]\n", path_str)); + out.push_str(&format!("pub mod {};\n", name)); + } + } + + fs::write(&dest_path, out).unwrap(); + println!("cargo:rerun-if-changed=../../benches"); +} diff --git a/tools/codegen-benches/src/lib.rs b/tools/codegen-benches/src/lib.rs new file mode 100644 index 0000000000..19968d65bc --- /dev/null +++ b/tools/codegen-benches/src/lib.rs @@ -0,0 +1,8 @@ +// We define these codegen benchmarks in a dedicated `#![no_std]` crate rather +// than treating them as standard `[dev-dependencies]` of the main `zerocopy` +// crate. This isolation allows us to compile instruction set architecture +// targets like `thumbv7m` or `riscv32imc` without accidentally pulling in +// `std`-dependent test harness crates which causes hard cross-compilation +// failures. +#![no_std] +include!(concat!(env!("OUT_DIR"), "/benches.rs")); diff --git a/tools/update-expected-test-output.sh b/tools/update-expected-test-output.sh index f25ef9b428..ee01023fa3 100755 --- a/tools/update-expected-test-output.sh +++ b/tools/update-expected-test-output.sh @@ -10,8 +10,12 @@ set -eo pipefail -# Update the `.stderr` reference files used to validate our UI tests and the -# `.x86-64.mca` files used to validate our codegen tests. +# Update the `.stderr` reference files used to validate our UI tests. This also +# updates the `.s` assembly snapshots for all ISAs, and the `.mca` files for +# supported ISAs, which are used to validate our codegen tests. The codegen +# framework assumes that `rustup` is installed and available in the +# environment's `PATH` to properly add cross-compilation targets. +rustup target add --toolchain $(./cargo.sh --version nightly) thumbv7m-none-eabi riscv32imc-unknown-none-elf BLESS=1 ./cargo.sh +nightly test --test codegen -p zerocopy --all-features BLESS=1 ./cargo.sh +nightly test --test ui -p zerocopy --all-features BLESS=1 ./cargo.sh +stable test --test ui -p zerocopy --features=__internal_use_only_features_that_work_on_stable