Skip to content

Commit f49c3f5

Browse files
committed
Add x64 isel support for fcmp+branch folding
1 parent 88be614 commit f49c3f5

File tree

3 files changed

+160
-137
lines changed

3 files changed

+160
-137
lines changed

crates/codegen/src/target/x64/lower.rs

Lines changed: 70 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
use core::mem;
2+
13
use alloc::vec::Vec;
24

35
use ir::{
@@ -13,7 +15,7 @@ use crate::{
1315
lir::{DefOperand, PhysReg, PhysRegSet, RegClass, UseOperand, VirtReg},
1416
machine::MachineLower,
1517
num_utils::{is_sint, is_uint},
16-
target::x64::{FpuBinOp, FpuCmpCode},
18+
target::x64::{CompoundCondCode, FpuBinOp, FpuCmpCode},
1719
};
1820

1921
use super::{
@@ -542,6 +544,12 @@ fn select_brcond(
542544
return;
543545
}
544546
}
547+
548+
match_value! {
549+
if let node cmp_node @ &NodeKind::Fcmp(kind) = ctx, cond {
550+
return select_fcmp_brcond(ctx, cmp_node, kind, true_target, false_target);
551+
}
552+
}
545553
}
546554

547555
emit_alu_rr_discarded(ctx, cond, cond, AluBinOp::Test);
@@ -552,6 +560,67 @@ fn select_brcond(
552560
);
553561
}
554562

563+
fn select_fcmp_brcond(
564+
ctx: &mut IselContext<'_, '_, X64Machine>,
565+
cmp_node: Node,
566+
cmp_kind: FcmpKind,
567+
true_target: Block,
568+
false_target: Block,
569+
) {
570+
let [op1, op2] = ctx.node_inputs_exact(cmp_node);
571+
572+
let mut op1 = ctx.get_value_vreg(op1);
573+
let mut op2 = ctx.get_value_vreg(op2);
574+
575+
// See `select_direct_fcmp` for more detail about what's going on here.
576+
let (swap_cmp_operands, branch_instr) = match cmp_kind {
577+
FcmpKind::Oeq => (
578+
false,
579+
X64Instr::CompundJumpcc(CompoundCondCode::FpuOeq, true_target, false_target),
580+
),
581+
FcmpKind::One => (
582+
false,
583+
X64Instr::Jumpcc(CondCode::Ne, true_target, false_target),
584+
),
585+
FcmpKind::Olt => (
586+
true,
587+
X64Instr::Jumpcc(CondCode::A, true_target, false_target),
588+
),
589+
FcmpKind::Ole => (
590+
true,
591+
X64Instr::Jumpcc(CondCode::Ae, true_target, false_target),
592+
),
593+
FcmpKind::Ueq => (
594+
false,
595+
X64Instr::Jumpcc(CondCode::E, true_target, false_target),
596+
),
597+
FcmpKind::Une => (
598+
false,
599+
X64Instr::CompundJumpcc(CompoundCondCode::FpuUne, true_target, false_target),
600+
),
601+
FcmpKind::Ult => (
602+
false,
603+
X64Instr::Jumpcc(CondCode::B, true_target, false_target),
604+
),
605+
FcmpKind::Ule => (
606+
false,
607+
X64Instr::Jumpcc(CondCode::Be, true_target, false_target),
608+
),
609+
};
610+
611+
if swap_cmp_operands {
612+
mem::swap(&mut op1, &mut op2);
613+
}
614+
615+
ctx.emit_instr(
616+
X64Instr::FpuRRm(FpuBinOp::Ucomi),
617+
&[],
618+
&[UseOperand::any_reg(op1), UseOperand::any(op2)],
619+
);
620+
621+
ctx.emit_instr(branch_instr, &[], &[]);
622+
}
623+
555624
// Raw emission helpers
556625

557626
fn emit_funcaddr(

crates/filetests/cases/codegen/fcmp_brcond.spdr

Lines changed: 42 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,12 @@ func @select_oeq:i32(f64, f64, i32, i32) {
55
# nextln: 000000: 55 push rbp
66
# nextln: 000001: 48 89 e5 mov rbp, rsp
77
# nextln: 000004: 48 89 f8 mov rax, rdi
8-
# nextln: 000007: f2 0f c2 c1 00 cmpeqsd xmm0, xmm1
9-
# nextln: 00000c: 66 48 0f 7e c1 movq rcx, xmm0
10-
# nextln: 000011: 83 e1 01 and ecx, 1
11-
# nextln: 000014: 85 c9 test ecx, ecx
12-
# nextln: 000016: 0f 85 03 00 00 00 jne 0x1f
13-
# nextln: 00001c: 48 89 f0 mov rax, rsi
14-
# nextln: 00001f: 5d pop rbp
15-
# nextln: 000020: c3 ret
8+
# nextln: 000007: 66 0f 2e c1 ucomisd xmm0, xmm1
9+
# nextln: 00000b: 0f 85 06 00 00 00 jne 0x17
10+
# nextln: 000011: 0f 8b 03 00 00 00 jnp 0x1a
11+
# nextln: 000017: 48 89 f0 mov rax, rsi
12+
# nextln: 00001a: 5d pop rbp
13+
# nextln: 00001b: c3 ret
1614

1715
%0:ctrl, %a:f64, %b:f64, %2:i32, %3:i32 = entry
1816
%1:i32 = fcmp oeq %a, %b
@@ -27,14 +25,11 @@ func @select_one:i32(f64, f64, i32, i32) {
2725
# nextln: 000000: 55 push rbp
2826
# nextln: 000001: 48 89 e5 mov rbp, rsp
2927
# nextln: 000004: 48 89 f8 mov rax, rdi
30-
# nextln: 000007: 33 c9 xor ecx, ecx
31-
# nextln: 000009: 66 0f 2e c1 ucomisd xmm0, xmm1
32-
# nextln: 00000d: 0f 95 c1 setne cl
33-
# nextln: 000010: 85 c9 test ecx, ecx
34-
# nextln: 000012: 0f 85 03 00 00 00 jne 0x1b
35-
# nextln: 000018: 48 89 f0 mov rax, rsi
36-
# nextln: 00001b: 5d pop rbp
37-
# nextln: 00001c: c3 ret
28+
# nextln: 000007: 66 0f 2e c1 ucomisd xmm0, xmm1
29+
# nextln: 00000b: 0f 85 03 00 00 00 jne 0x14
30+
# nextln: 000011: 48 89 f0 mov rax, rsi
31+
# nextln: 000014: 5d pop rbp
32+
# nextln: 000015: c3 ret
3833

3934
%0:ctrl, %a:f64, %b:f64, %2:i32, %3:i32 = entry
4035
%1:i32 = fcmp one %a, %b
@@ -49,14 +44,11 @@ func @select_olt:i32(f64, f64, i32, i32) {
4944
# nextln: 000000: 55 push rbp
5045
# nextln: 000001: 48 89 e5 mov rbp, rsp
5146
# nextln: 000004: 48 89 f8 mov rax, rdi
52-
# nextln: 000007: 33 c9 xor ecx, ecx
53-
# nextln: 000009: 66 0f 2e c8 ucomisd xmm1, xmm0
54-
# nextln: 00000d: 0f 97 c1 seta cl
55-
# nextln: 000010: 85 c9 test ecx, ecx
56-
# nextln: 000012: 0f 85 03 00 00 00 jne 0x1b
57-
# nextln: 000018: 48 89 f0 mov rax, rsi
58-
# nextln: 00001b: 5d pop rbp
59-
# nextln: 00001c: c3 ret
47+
# nextln: 000007: 66 0f 2e c8 ucomisd xmm1, xmm0
48+
# nextln: 00000b: 0f 87 03 00 00 00 ja 0x14
49+
# nextln: 000011: 48 89 f0 mov rax, rsi
50+
# nextln: 000014: 5d pop rbp
51+
# nextln: 000015: c3 ret
6052

6153
%0:ctrl, %a:f64, %b:f64, %2:i32, %3:i32 = entry
6254
%1:i32 = fcmp olt %a, %b
@@ -71,14 +63,11 @@ func @select_ole:i32(f64, f64, i32, i32) {
7163
# nextln: 000000: 55 push rbp
7264
# nextln: 000001: 48 89 e5 mov rbp, rsp
7365
# nextln: 000004: 48 89 f8 mov rax, rdi
74-
# nextln: 000007: 33 c9 xor ecx, ecx
75-
# nextln: 000009: 66 0f 2e c8 ucomisd xmm1, xmm0
76-
# nextln: 00000d: 0f 93 c1 setae cl
77-
# nextln: 000010: 85 c9 test ecx, ecx
78-
# nextln: 000012: 0f 85 03 00 00 00 jne 0x1b
79-
# nextln: 000018: 48 89 f0 mov rax, rsi
80-
# nextln: 00001b: 5d pop rbp
81-
# nextln: 00001c: c3 ret
66+
# nextln: 000007: 66 0f 2e c8 ucomisd xmm1, xmm0
67+
# nextln: 00000b: 0f 83 03 00 00 00 jae 0x14
68+
# nextln: 000011: 48 89 f0 mov rax, rsi
69+
# nextln: 000014: 5d pop rbp
70+
# nextln: 000015: c3 ret
8271

8372
%0:ctrl, %a:f64, %b:f64, %2:i32, %3:i32 = entry
8473
%1:i32 = fcmp ole %a, %b
@@ -93,14 +82,11 @@ func @select_ueq:i32(f64, f64, i32, i32) {
9382
# nextln: 000000: 55 push rbp
9483
# nextln: 000001: 48 89 e5 mov rbp, rsp
9584
# nextln: 000004: 48 89 f8 mov rax, rdi
96-
# nextln: 000007: 33 c9 xor ecx, ecx
97-
# nextln: 000009: 66 0f 2e c1 ucomisd xmm0, xmm1
98-
# nextln: 00000d: 0f 94 c1 sete cl
99-
# nextln: 000010: 85 c9 test ecx, ecx
100-
# nextln: 000012: 0f 85 03 00 00 00 jne 0x1b
101-
# nextln: 000018: 48 89 f0 mov rax, rsi
102-
# nextln: 00001b: 5d pop rbp
103-
# nextln: 00001c: c3 ret
85+
# nextln: 000007: 66 0f 2e c1 ucomisd xmm0, xmm1
86+
# nextln: 00000b: 0f 84 03 00 00 00 je 0x14
87+
# nextln: 000011: 48 89 f0 mov rax, rsi
88+
# nextln: 000014: 5d pop rbp
89+
# nextln: 000015: c3 ret
10490

10591
%0:ctrl, %a:f64, %b:f64, %2:i32, %3:i32 = entry
10692
%1:i32 = fcmp ueq %a, %b
@@ -115,14 +101,12 @@ func @select_une:i32(f64, f64, i32, i32) {
115101
# nextln: 000000: 55 push rbp
116102
# nextln: 000001: 48 89 e5 mov rbp, rsp
117103
# nextln: 000004: 48 89 f8 mov rax, rdi
118-
# nextln: 000007: f2 0f c2 c1 04 cmpneqsd xmm0, xmm1
119-
# nextln: 00000c: 66 48 0f 7e c1 movq rcx, xmm0
120-
# nextln: 000011: 83 e1 01 and ecx, 1
121-
# nextln: 000014: 85 c9 test ecx, ecx
122-
# nextln: 000016: 0f 85 03 00 00 00 jne 0x1f
123-
# nextln: 00001c: 48 89 f0 mov rax, rsi
124-
# nextln: 00001f: 5d pop rbp
125-
# nextln: 000020: c3 ret
104+
# nextln: 000007: 66 0f 2e c1 ucomisd xmm0, xmm1
105+
# nextln: 00000b: 0f 85 09 00 00 00 jne 0x1a
106+
# nextln: 000011: 0f 8a 03 00 00 00 jp 0x1a
107+
# nextln: 000017: 48 89 f0 mov rax, rsi
108+
# nextln: 00001a: 5d pop rbp
109+
# nextln: 00001b: c3 ret
126110

127111
%0:ctrl, %a:f64, %b:f64, %2:i32, %3:i32 = entry
128112
%1:i32 = fcmp une %a, %b
@@ -137,14 +121,11 @@ func @select_ult:i32(f64, f64, i32, i32) {
137121
# nextln: 000000: 55 push rbp
138122
# nextln: 000001: 48 89 e5 mov rbp, rsp
139123
# nextln: 000004: 48 89 f8 mov rax, rdi
140-
# nextln: 000007: 33 c9 xor ecx, ecx
141-
# nextln: 000009: 66 0f 2e c1 ucomisd xmm0, xmm1
142-
# nextln: 00000d: 0f 92 c1 setb cl
143-
# nextln: 000010: 85 c9 test ecx, ecx
144-
# nextln: 000012: 0f 85 03 00 00 00 jne 0x1b
145-
# nextln: 000018: 48 89 f0 mov rax, rsi
146-
# nextln: 00001b: 5d pop rbp
147-
# nextln: 00001c: c3 ret
124+
# nextln: 000007: 66 0f 2e c1 ucomisd xmm0, xmm1
125+
# nextln: 00000b: 0f 82 03 00 00 00 jb 0x14
126+
# nextln: 000011: 48 89 f0 mov rax, rsi
127+
# nextln: 000014: 5d pop rbp
128+
# nextln: 000015: c3 ret
148129

149130
%0:ctrl, %a:f64, %b:f64, %2:i32, %3:i32 = entry
150131
%1:i32 = fcmp ult %a, %b
@@ -159,14 +140,11 @@ func @select_ule:i32(f64, f64, i32, i32) {
159140
# nextln: 000000: 55 push rbp
160141
# nextln: 000001: 48 89 e5 mov rbp, rsp
161142
# nextln: 000004: 48 89 f8 mov rax, rdi
162-
# nextln: 000007: 33 c9 xor ecx, ecx
163-
# nextln: 000009: 66 0f 2e c1 ucomisd xmm0, xmm1
164-
# nextln: 00000d: 0f 96 c1 setbe cl
165-
# nextln: 000010: 85 c9 test ecx, ecx
166-
# nextln: 000012: 0f 85 03 00 00 00 jne 0x1b
167-
# nextln: 000018: 48 89 f0 mov rax, rsi
168-
# nextln: 00001b: 5d pop rbp
169-
# nextln: 00001c: c3 ret
143+
# nextln: 000007: 66 0f 2e c1 ucomisd xmm0, xmm1
144+
# nextln: 00000b: 0f 86 03 00 00 00 jbe 0x14
145+
# nextln: 000011: 48 89 f0 mov rax, rsi
146+
# nextln: 000014: 5d pop rbp
147+
# nextln: 000015: c3 ret
170148

171149
%0:ctrl, %a:f64, %b:f64, %2:i32, %3:i32 = entry
172150
%1:i32 = fcmp ule %a, %b

0 commit comments

Comments
 (0)