Skip to content

Commit 410711d

Browse files
committed
Mark more x64 instructions as rematerializable
We now rematerialize signed 32-bit constants and stack addresses as well, which made for some dramatic codegen improvements. The most prominent of these was the `bittest_run` family, which ended up losing all its spills. The more aggressive rematerialization did reveal several deficiencies in the current implementation, however. The most egregious of these is that we don't yet prune all dead definitions, leading to sequences like this one: 000042: 48 8d 54 24 18 lea rdx, [rsp + 0x18] 000047: 48 8d 54 24 1a lea rdx, [rsp + 0x1a] 00004c: 48 8d 54 24 1c lea rdx, [rsp + 0x1c] 000051: 48 8d 54 24 10 lea rdx, [rsp + 0x10] 000056: 48 8d 54 24 14 lea rdx, [rsp + 0x14] 00005b: 48 89 e2 mov rdx, rsp 00005e: 48 8d 54 24 08 lea rdx, [rsp + 8] We also don't yet trim rematerializable live ranges optimally when splitting them, resulting in artifacts such as: 00003a: ba 18 00 00 00 mov edx, 0x18 00003f: 48 89 d6 mov rsi, rdx 000042: 48 03 f0 add rsi, rax We'll need to deal with both in the future.
1 parent dc25838 commit 410711d

15 files changed

+8133
-8276
lines changed

crates/codegen/src/target/x64.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,13 @@ impl MachineRegalloc for X64Machine {
261261
}
262262

263263
fn can_remat(&self, instr: &X64Instr) -> bool {
264-
matches!(instr, X64Instr::MovRU32(..) | X64Instr::MovRI64(..))
264+
matches!(
265+
instr,
266+
X64Instr::MovRmS32(..)
267+
| X64Instr::MovRU32(..)
268+
| X64Instr::MovRI64(..)
269+
| X64Instr::StackAddr(..)
270+
)
265271
}
266272
}
267273

crates/filetests/cases/codegen/tdn/array_jagged.spdr

Lines changed: 120 additions & 126 deletions
Large diffs are not rendered by default.

crates/filetests/cases/codegen/tdn/bittest_run.spdr

Lines changed: 363 additions & 377 deletions
Large diffs are not rendered by default.

crates/filetests/cases/codegen/tdn/calc_loop_nest_debug_canon.spdr

Lines changed: 325 additions & 328 deletions
Large diffs are not rendered by default.

crates/filetests/cases/codegen/tdn/int_no_op_sub_funclet_10_canon.spdr

Lines changed: 360 additions & 358 deletions
Large diffs are not rendered by default.

crates/filetests/cases/codegen/tdn/large_spills.spdr

Lines changed: 4649 additions & 4668 deletions
Large diffs are not rendered by default.

crates/filetests/cases/codegen/tdn/large_spills_canon.spdr

Lines changed: 1402 additions & 1403 deletions
Large diffs are not rendered by default.

crates/filetests/cases/isel-regalloc/tdn/array_jagged.spdr

Lines changed: 62 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -31,139 +31,127 @@ func @throw_index_out_of_range_exception() {
3131
func @"System.Int32 Tests.CodeGenBringUpTests::ArrayJagged(System.Int32)":i32(i32) {
3232
# check: function `System.Int32 Tests.CodeGenBringUpTests::ArrayJagged(System.Int32)`:
3333
# nextln: clobbers: rax, rbx, rcx, rdx, rdi, rsi, r8, r9, r10, r11, r12, r13, r14, r15
34-
# nextln: frame: size 72, align 8
34+
# nextln: frame: size 24, align 8
3535
# nextln: !0: 0
3636
# nextln: spill0: 8
3737
# nextln: spill1: 16
38-
# nextln: spill2: 24
39-
# nextln: spill3: 32
40-
# nextln: spill4: 40
41-
# nextln: spill5: 48
42-
# nextln: spill6: 56
43-
# nextln: spill7: 64
4438
# nextln: block0:
45-
# nextln: $$spill2 = $$rdi
39+
# nextln: $$spill0 = $$rdi
4640
# nextln: 0000: Jump(block1)
4741
# nextln: block1:
4842
# nextln: 0001: $$rax = MovRZ
4943
# nextln: 0002: MovStackR(!0, S64) $$rax
5044
# nextln: 0003: Jump(block2)
5145
# nextln: block2:
5246
# nextln: 0004: $$rax = MovRmS32(2)
53-
# nextln: 0005: $$r12 = MovsxRRm(Ext32_64) $$rax
54-
# nextln: $$spill6 = $$rax
55-
# nextln: 0006: $$spill1 = MovRmS32(4)
56-
# nextln: 0007: $$rcx = ImulRRmI(S64, 4) $$r12
57-
# nextln: 0008: $$spill4 = MovRmS32(8)
58-
# nextln: 0009: $$rax = ImulRRmI(S64, 8) $$r12
47+
# nextln: 0005: $$r13 = MovsxRRm(Ext32_64) $$rax
48+
# nextln: 0007: $$rcx = ImulRRmI(S64, 4) $$r13
49+
# nextln: 0009: $$rax = ImulRRmI(S64, 8) $$r13
5950
# nextln: 0010: $$rdx = MovRmS32(20)
6051
# nextln: $$rdi = $$rdx
6152
# nextln: 0011: $$rdi = AluRRm(S64, Add) $$rdi, $$rcx
62-
# nextln: $$spill7 = $$rdi
63-
# nextln: $$spill0 = $$rdx
64-
# nextln: 0012: $$rcx = MovRmS32(170)
53+
# nextln: $$rcx = $$rdi
6554
# nextln: 0013: $$rdx = MovRmS32(24)
6655
# nextln: $$rsi = $$rdx
6756
# nextln: 0014: $$rsi = AluRRm(S64, Add) $$rsi, $$rax
68-
# nextln: $$spill3 = $$rdx
69-
# nextln: 0015: $$rbx = MovRmS32(1)
70-
# nextln: 0016: $$r14 = MovsxRRm(Ext32_64) $$rbx
57+
# nextln: 0015: $$r12 = MovRmS32(1)
58+
# nextln: $$rbx = $$rcx
59+
# nextln: 0016: $$r14 = MovsxRRm(Ext32_64) $$r12
7160
# nextln: 0017: $$rdi = MovRmS32(171)
72-
# nextln: $$r13 = $$rcx
7361
# nextln: 0018: $$rax = CallRel(Internal(func0)) $$rdi, $$rsi
7462
# nextln: $$rcx = $$rax
7563
# nextln: 0019: $$rcx = AluRmI(S64, Add, 16) $$rcx
76-
# nextln: 0020: MovMR(S32) $$rcx, $$r12
64+
# nextln: 0020: MovMR(S32) $$rcx, $$r13
7765
# nextln: 0021: MovStackR(!0, S64) $$rax
7866
# nextln: 0022: $$r15 = MovRStack(!0, S64)
79-
# nextln: $$rsi = $$spill7
80-
# nextln: $$rdi = $$r13
67+
# nextln: $$rdi = MovRmS32(170)
68+
# nextln: $$rsi = $$rbx
8169
# nextln: 0023: $$rax = CallRel(Internal(func0)) $$rdi, $$rsi
82-
# nextln: $$rdi = $$r13
70+
# nextln: $$rsi = $$rbx
8371
# nextln: $$rcx = $$rax
8472
# nextln: 0024: $$rcx = AluRmI(S64, Add, 16) $$rcx
85-
# nextln: 0025: MovMR(S32) $$rcx, $$r12
73+
# nextln: 0025: MovMR(S32) $$rcx, $$r13
8674
# nextln: 0026: $$rcx = MovRM(S32) $$rcx
8775
# nextln: 0027: AluRRm(S64, Cmp) $$r14, $$rcx
8876
# nextln: 0028: Jumpcc(L, block3, block4)
8977
# nextln: block3:
90-
# nextln: $$rcx = $$spill1
91-
# nextln: $$rsi = $$rcx
92-
# nextln: 0029: $$rsi = ImulRRm(S64) $$rsi, $$r14
93-
# nextln: $$rcx = $$r15
94-
# nextln: 0030: $$rcx = AluRmI(S64, Add, 16) $$rcx
95-
# nextln: $$r8 = $$spill0
96-
# nextln: $$rdx = $$r8
97-
# nextln: 0031: $$rdx = AluRRm(S64, Add) $$rdx, $$rsi
98-
# nextln: $$rsi = $$rax
99-
# nextln: 0032: $$rsi = AluRRm(S64, Add) $$rsi, $$rdx
100-
# nextln: 0033: MovMR(S32) $$rsi, $$rbx
101-
# nextln: $$spill5 = $$rdx
102-
# nextln: 0034: $$rcx = MovRM(S32) $$rcx
103-
# nextln: 0035: $$rdx = MovRZ
104-
# nextln: 0036: $$r13 = MovsxRRm(Ext32_64) $$rdx
105-
# nextln: 0037: AluRRm(S64, Cmp) $$r13, $$rcx
78+
# nextln: $$rcx = MovRmS32(4)
79+
# nextln: $$rdi = $$rcx
80+
# nextln: 0029: $$rdi = ImulRRm(S64) $$rdi, $$r14
81+
# nextln: $$rdx = $$r15
82+
# nextln: 0030: $$rdx = AluRmI(S64, Add, 16) $$rdx
83+
# nextln: $$rcx = MovRmS32(8)
84+
# nextln: $$r8 = MovRmS32(20)
85+
# nextln: 0031: $$r8 = AluRRm(S64, Add) $$r8, $$rdi
86+
# nextln: $$rdi = $$rax
87+
# nextln: 0032: $$rdi = AluRRm(S64, Add) $$rdi, $$r8
88+
# nextln: $$spill1 = $$r8
89+
# nextln: 0033: MovMR(S32) $$rdi, $$r12
90+
# nextln: 0034: $$rdx = MovRM(S32) $$rdx
91+
# nextln: 0035: $$rdi = MovRZ
92+
# nextln: 0036: $$r12 = MovsxRRm(Ext32_64) $$rdi
93+
# nextln: 0037: AluRRm(S64, Cmp) $$r12, $$rdx
10694
# nextln: 0038: Jumpcc(L, block6, block7)
10795
# nextln: block6:
108-
# nextln: $$rcx = $$spill4
109-
# nextln: 0039: $$rcx = ImulRRm(S64) $$rcx, $$r13
110-
# nextln: $$rbx = $$spill3
111-
# nextln: $$rdx = $$rbx
112-
# nextln: 0040: $$rdx = AluRRm(S64, Add) $$rdx, $$rcx
96+
# nextln: $$rdi = $$rcx
97+
# nextln: 0039: $$rdi = ImulRRm(S64) $$rdi, $$r12
98+
# nextln: $$rdx = MovRmS32(24)
99+
# nextln: 0040: $$rdx = AluRRm(S64, Add) $$rdx, $$rdi
113100
# nextln: 0041: $$r15 = AluRRm(S64, Add) $$r15, $$rdx
114101
# nextln: 0042: MovMR(S64) $$r15, $$rax
115-
# nextln: 0043: $$r15 = MovRStack(!0, S64)
116-
# nextln: $$rsi = $$spill7
102+
# nextln: $$r15 = $$rcx
103+
# nextln: 0043: $$rbx = MovRStack(!0, S64)
104+
# nextln: $$rdi = MovRmS32(170)
117105
# nextln: 0044: $$rax = CallRel(Internal(func0)) $$rdi, $$rsi
118106
# nextln: $$rdi = $$rax
119107
# nextln: 0045: $$rdi = AluRmI(S64, Add, 16) $$rdi
120-
# nextln: 0046: MovMR(S32) $$rdi, $$r12
108+
# nextln: 0046: MovMR(S32) $$rdi, $$r13
121109
# nextln: 0047: $$rcx = MovRM(S32) $$rdi
122-
# nextln: 0048: AluRRm(S64, Cmp) $$r13, $$rcx
110+
# nextln: 0048: AluRRm(S64, Cmp) $$r12, $$rcx
123111
# nextln: 0049: Jumpcc(L, block8, block9)
124112
# nextln: block8:
125-
# nextln: $$rcx = $$spill1
126-
# nextln: $$rsi = $$rcx
127-
# nextln: 0050: $$rsi = ImulRRm(S64) $$rsi, $$r13
128-
# nextln: $$rdx = $$spill0
129-
# nextln: $$r8 = $$rdx
130-
# nextln: 0051: $$r8 = AluRRm(S64, Add) $$r8, $$rsi
131-
# nextln: $$rsi = $$rax
132-
# nextln: 0052: $$rsi = AluRRm(S64, Add) $$rsi, $$r8
133-
# nextln: $$r8 = $$spill6
134-
# nextln: 0053: MovMR(S32) $$rsi, $$r8
113+
# nextln: $$rcx = MovRmS32(4)
114+
# nextln: $$r8 = $$rcx
115+
# nextln: 0050: $$r8 = ImulRRm(S64) $$r8, $$r12
116+
# nextln: $$rdx = MovRmS32(20)
117+
# nextln: $$rsi = $$rdx
118+
# nextln: 0051: $$rsi = AluRRm(S64, Add) $$rsi, $$r8
119+
# nextln: $$r8 = $$rax
120+
# nextln: 0052: $$r8 = AluRRm(S64, Add) $$r8, $$rsi
121+
# nextln: $$rsi = MovRmS32(2)
122+
# nextln: 0053: MovMR(S32) $$r8, $$rsi
135123
# nextln: 0054: $$rdi = MovRM(S32) $$rdi
136124
# nextln: 0055: AluRRm(S64, Cmp) $$r14, $$rdi
137125
# nextln: 0056: Jumpcc(L, block10, block11)
138126
# nextln: block10:
139-
# nextln: $$rdi = $$r15
127+
# nextln: $$rdi = $$rbx
140128
# nextln: 0057: $$rdi = AluRmI(S64, Add, 16) $$rdi
141129
# nextln: $$rsi = $$rax
142-
# nextln: 0058: $$rsi = AluRRm(S64, Add) $$rsi, $$spill5
130+
# nextln: 0058: $$rsi = AluRRm(S64, Add) $$rsi, $$spill1
143131
# nextln: 0059: $$r8 = MovRmS32(3)
144132
# nextln: 0060: MovMR(S32) $$rsi, $$r8
145133
# nextln: 0061: $$rdi = MovRM(S32) $$rdi
146134
# nextln: 0062: AluRRm(S64, Cmp) $$r14, $$rdi
147135
# nextln: 0063: Jumpcc(L, block12, block13)
148136
# nextln: block12:
149-
# nextln: $$rdi = $$spill4
150-
# nextln: 0064: $$rdi = ImulRRm(S64) $$rdi, $$r14
151-
# nextln: 0065: $$rbx = AluRRm(S64, Add) $$rbx, $$rdi
152-
# nextln: 0066: $$r15 = AluRRm(S64, Add) $$r15, $$rbx
153-
# nextln: 0067: MovMR(S64) $$r15, $$rax
137+
# nextln: 0064: $$r15 = ImulRRm(S64) $$r15, $$r14
138+
# nextln: $$rdi = MovRmS32(24)
139+
# nextln: 0065: $$rdi = AluRRm(S64, Add) $$rdi, $$r15
140+
# nextln: 0066: $$rbx = AluRRm(S64, Add) $$rbx, $$rdi
141+
# nextln: 0067: MovMR(S64) $$rbx, $$rax
154142
# nextln: 0068: $$rax = MovRStack(!0, S64)
155-
# nextln: $$rdi = $$rax
156-
# nextln: 0069: $$rdi = AluRmI(S64, Add, 16) $$rdi
157-
# nextln: 0070: $$rdi = MovRM(S32) $$rdi
158-
# nextln: 0071: AluRRm(S64, Cmp) $$r14, $$rdi
143+
# nextln: $$rsi = $$rax
144+
# nextln: 0069: $$rsi = AluRmI(S64, Add, 16) $$rsi
145+
# nextln: 0070: $$rsi = MovRM(S32) $$rsi
146+
# nextln: 0071: AluRRm(S64, Cmp) $$r14, $$rsi
159147
# nextln: 0072: Jumpcc(L, block14, block15)
160148
# nextln: block14:
161-
# nextln: 0073: $$rax = AluRRm(S64, Add) $$rax, $$rbx
149+
# nextln: 0073: $$rax = AluRRm(S64, Add) $$rax, $$rdi
162150
# nextln: 0074: $$rax = MovRM(S64) $$rax
163151
# nextln: $$rdi = $$rax
164152
# nextln: 0075: $$rdi = AluRmI(S64, Add, 16) $$rdi
165153
# nextln: 0076: $$rsi = MovRM(S32) $$rdi
166-
# nextln: 0077: $$rdi = MovsxRRm(Ext32_64) $$spill2
154+
# nextln: 0077: $$rdi = MovsxRRm(Ext32_64) $$spill0
167155
# nextln: 0078: AluRRm(S64, Cmp) $$rdi, $$rsi
168156
# nextln: 0079: Jumpcc(L, block16, block17)
169157
# nextln: block16:

0 commit comments

Comments
 (0)