Skip to content

Commit dc59df0

Browse files
committed
Further tame regalloc splitting fights
Fragments containing only a single instruction will now never be split more than once on conflict boundaries, which further helps avoid massive splitting and needless register permutations under high register pressure.
1 parent d32c7dc commit dc59df0

13 files changed

+3210
-3226
lines changed

crates/codegen/src/regalloc/assign.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -593,6 +593,15 @@ impl<M: MachineRegalloc> RegAllocContext<'_, M> {
593593
fragment: LiveSetFragment,
594594
boundary: ConflictBoundary,
595595
) -> Option<Instr> {
596+
if self.is_fragment_split(fragment) && self.fragment_only_instr(fragment).is_some() {
597+
// We don't want to repeatedly split single-instruction fragments on conflict
598+
// boundaries, as they can get us into pointless eviction/splitting/shuffling fights
599+
// under high register pressure. It's better to just give up early and spill in this
600+
// case; it frequently even reduces total spill count because there aren't a bunch of
601+
// small pieces to move around.
602+
return None;
603+
}
604+
596605
match boundary {
597606
ConflictBoundary::StartsAt(instr) => {
598607
let last_instr_below = self
@@ -1011,6 +1020,11 @@ impl<M: MachineRegalloc> RegAllocContext<'_, M> {
10111020
.can_split_before(ProgramPoint::before(instr))
10121021
}
10131022

1023+
fn is_fragment_split(&self, fragment: LiveSetFragment) -> bool {
1024+
let fragment = &self.live_set_fragments[fragment];
1025+
fragment.prev_split_neighbor.is_some() || fragment.next_split_neighbor.is_some()
1026+
}
1027+
10141028
fn is_fragment_global(&self, fragment: LiveSetFragment) -> bool {
10151029
let hull = self.fragment_hull(fragment);
10161030
self.lir.instr_block_index(hull.start.instr())

crates/filetests/cases/codegen/tdn/array_jagged.spdr

Lines changed: 95 additions & 99 deletions
Large diffs are not rendered by default.

crates/filetests/cases/codegen/tdn/bittest_run.spdr

Lines changed: 368 additions & 367 deletions
Large diffs are not rendered by default.

crates/filetests/cases/codegen/tdn/calc_loop_nest_debug_canon.spdr

Lines changed: 327 additions & 335 deletions
Large diffs are not rendered by default.

crates/filetests/cases/codegen/tdn/large_spills.spdr

Lines changed: 2123 additions & 2124 deletions
Large diffs are not rendered by default.

crates/filetests/cases/isel-regalloc/call_permute_args_high_pressure_large.spdr

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -12,29 +12,29 @@ func @call_permute_args_high_pressure(i32, i32, i32, i32, i32, i32, ptr) {
1212
# nextln: spill2: 16
1313
# nextln: spill3: 24
1414
# nextln: block0:
15-
# nextln: 0000: $$rax = MovRRbp { offset: 16 }
16-
# nextln: 0001: $$r14 = MovRM(S32) $$rax
17-
# nextln: 0002: $$r13 = MovRM(S32) $$rax
18-
# nextln: 0003: $$r12 = MovRM(S32) $$rax
19-
# nextln: 0004: $$rbx = MovRM(S32) $$rax
20-
# nextln: 0005: $$r15 = MovRM(S32) $$rax
21-
# nextln: 0006: $$r10 = MovRM(S32) $$rax
15+
# nextln: $$rax = $$r8
16+
# nextln: $$r8 = $$r9
17+
# nextln: 0000: $$r9 = MovRRbp { offset: 16 }
18+
# nextln: 0001: $$r14 = MovRM(S32) $$r9
19+
# nextln: 0002: $$r13 = MovRM(S32) $$r9
20+
# nextln: 0003: $$r12 = MovRM(S32) $$r9
21+
# nextln: 0004: $$rbx = MovRM(S32) $$r9
22+
# nextln: 0005: $$r15 = MovRM(S32) $$r9
23+
# nextln: 0006: $$r10 = MovRM(S32) $$r9
2224
# nextln: $$spill3 = $$r10
23-
# nextln: 0007: $$r10 = MovRM(S32) $$rax
25+
# nextln: 0007: $$r10 = MovRM(S32) $$r9
2426
# nextln: $$spill2 = $$r10
25-
# nextln: 0008: $$r10 = MovRM(S32) $$rax
27+
# nextln: 0008: $$r10 = MovRM(S32) $$r9
2628
# nextln: $$spill1 = $$r10
27-
# nextln: 0009: $$rax = MovRM(S32) $$rax
28-
# nextln: $$spill0 = $$rax
29-
# nextln: 0010: $$rax = FuncAddrAbs(External(extfunc1))
30-
# nextln: $$r10 = $$rdi
29+
# nextln: 0009: $$r9 = MovRM(S32) $$r9
30+
# nextln: $$spill0 = $$r9
31+
# nextln: 0010: $$r10 = FuncAddrAbs(External(extfunc1))
32+
# nextln: $$r9 = $$rdi
3133
# nextln: $$rdi = $$rsi
3234
# nextln: $$rsi = $$rdx
3335
# nextln: $$rdx = $$rcx
34-
# nextln: $$rcx = $$r8
35-
# nextln: $$r8 = $$r9
36-
# nextln: $$r9 = $$r10
37-
# nextln: 0011: CallRm $$rax, $$rdi, $$rsi, $$rdx, $$rcx, $$r8, $$r9
36+
# nextln: $$rcx = $$rax
37+
# nextln: 0011: CallRm $$r10, $$rdi, $$rsi, $$rdx, $$rcx, $$r8, $$r9
3838
# nextln: 0012: $$rax = FuncAddrAbs(External(extfunc0))
3939
# nextln: $$rdi = $$r14
4040
# nextln: 0013: CallRm $$rax, $$rdi

crates/filetests/cases/isel-regalloc/call_permute_args_high_pressure_small.spdr

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,14 @@ func @call_permute_args_high_pressure(i32, i32, i32, i32, i32, i32, ptr) {
1818
# nextln: $$rdi = $$rsi
1919
# nextln: 0000: $$rsi = MovRRbp { offset: 16 }
2020
# nextln: 0001: $$r15 = MovRM(S32) $$rsi
21-
# nextln: 0002: $$rbx = MovRM(S32) $$rsi
22-
# nextln: 0003: $$r12 = MovRM(S32) $$rsi
21+
# nextln: 0002: $$r10 = MovRM(S32) $$rsi
22+
# nextln: $$spill3 = $$r10
23+
# nextln: 0003: $$rbx = MovRM(S32) $$rsi
2324
# nextln: 0004: $$r13 = MovRM(S32) $$rsi
2425
# nextln: 0005: $$r14 = MovRM(S32) $$rsi
2526
# nextln: 0006: $$r10 = MovRM(S32) $$rsi
26-
# nextln: $$spill3 = $$r10
27-
# nextln: 0007: $$r10 = MovRM(S32) $$rsi
2827
# nextln: $$spill2 = $$r10
28+
# nextln: 0007: $$r12 = MovRM(S32) $$rsi
2929
# nextln: 0008: $$r10 = MovRM(S32) $$rsi
3030
# nextln: $$spill1 = $$r10
3131
# nextln: 0009: $$rsi = MovRM(S32) $$rsi
@@ -36,17 +36,17 @@ func @call_permute_args_high_pressure(i32, i32, i32, i32, i32, i32, ptr) {
3636
# nextln: 0010: CallRel(External(extfunc1)) $$rdi, $$rsi, $$rdx, $$rcx, $$r8, $$r9
3737
# nextln: $$rdi = $$r15
3838
# nextln: 0011: CallRel(External(extfunc0)) $$rdi
39-
# nextln: $$rdi = $$rbx
39+
# nextln: $$rdi = $$spill3
4040
# nextln: 0012: CallRel(External(extfunc0)) $$rdi
41-
# nextln: $$rdi = $$r12
41+
# nextln: $$rdi = $$rbx
4242
# nextln: 0013: CallRel(External(extfunc0)) $$rdi
4343
# nextln: $$rdi = $$r13
4444
# nextln: 0014: CallRel(External(extfunc0)) $$rdi
4545
# nextln: $$rdi = $$r14
4646
# nextln: 0015: CallRel(External(extfunc0)) $$rdi
47-
# nextln: $$rdi = $$spill3
48-
# nextln: 0016: CallRel(External(extfunc0)) $$rdi
4947
# nextln: $$rdi = $$spill2
48+
# nextln: 0016: CallRel(External(extfunc0)) $$rdi
49+
# nextln: $$rdi = $$r12
5050
# nextln: 0017: CallRel(External(extfunc0)) $$rdi
5151
# nextln: $$rdi = $$spill1
5252
# nextln: 0018: CallRel(External(extfunc0)) $$rdi

crates/filetests/cases/isel-regalloc/tdn/array_jagged.spdr

Lines changed: 47 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ func @throw_index_out_of_range_exception() {
3131
func @"System.Int32 Tests.CodeGenBringUpTests::ArrayJagged(System.Int32)":i32(i32) {
3232
# check: function `System.Int32 Tests.CodeGenBringUpTests::ArrayJagged(System.Int32)`:
3333
# nextln: clobbers: rax, rbx, rcx, rdx, rdi, rsi, r8, r9, r10, r11, r12, r13, r14, r15
34-
# nextln: frame: size 80, align 8
34+
# nextln: frame: size 72, align 8
3535
# nextln: !0: 0
3636
# nextln: spill0: 8
3737
# nextln: spill1: 16
@@ -41,7 +41,6 @@ func @"System.Int32 Tests.CodeGenBringUpTests::ArrayJagged(System.Int32)":i32(i3
4141
# nextln: spill5: 48
4242
# nextln: spill6: 56
4343
# nextln: spill7: 64
44-
# nextln: spill8: 72
4544
# nextln: block0:
4645
# nextln: $$spill2 = $$rdi
4746
# nextln: 0000: Jump(block1)
@@ -52,15 +51,15 @@ func @"System.Int32 Tests.CodeGenBringUpTests::ArrayJagged(System.Int32)":i32(i3
5251
# nextln: block2:
5352
# nextln: 0004: $$rax = MovRmS32(2)
5453
# nextln: 0005: $$r12 = MovsxRRm(Ext32_64) $$rax
55-
# nextln: $$spill7 = $$rax
54+
# nextln: $$spill6 = $$rax
5655
# nextln: 0006: $$spill1 = MovRmS32(4)
5756
# nextln: 0007: $$rcx = ImulRRmI(S64, 4) $$r12
58-
# nextln: 0008: $$spill5 = MovRmS32(8)
57+
# nextln: 0008: $$spill4 = MovRmS32(8)
5958
# nextln: 0009: $$rax = ImulRRmI(S64, 8) $$r12
6059
# nextln: 0010: $$rdx = MovRmS32(20)
6160
# nextln: $$rdi = $$rdx
6261
# nextln: 0011: $$rdi = AluRRm(S64, Add) $$rdi, $$rcx
63-
# nextln: $$spill8 = $$rdi
62+
# nextln: $$spill7 = $$rdi
6463
# nextln: $$spill0 = $$rdx
6564
# nextln: 0012: $$rcx = MovRmS32(170)
6665
# nextln: 0013: $$rdx = MovRmS32(24)
@@ -77,90 +76,86 @@ func @"System.Int32 Tests.CodeGenBringUpTests::ArrayJagged(System.Int32)":i32(i3
7776
# nextln: 0020: MovMR(S32) $$rcx, $$r12
7877
# nextln: 0021: MovStackR(!0, S64) $$rax
7978
# nextln: 0022: $$r15 = MovRStack(!0, S64)
80-
# nextln: $$rsi = $$spill8
79+
# nextln: $$rsi = $$spill7
8180
# nextln: $$rdi = $$r13
8281
# nextln: 0023: $$rax = CallRel(Internal(func0)) $$rdi, $$rsi
8382
# nextln: $$rdi = $$r13
8483
# nextln: $$rcx = $$rax
8584
# nextln: 0024: $$rcx = AluRmI(S64, Add, 16) $$rcx
8685
# nextln: 0025: MovMR(S32) $$rcx, $$r12
87-
# nextln: 0026: $$rdx = MovRM(S32) $$rcx
88-
# nextln: $$rcx = $$r14
89-
# nextln: 0027: AluRRm(S64, Cmp) $$rcx, $$rdx
86+
# nextln: 0026: $$rcx = MovRM(S32) $$rcx
87+
# nextln: 0027: AluRRm(S64, Cmp) $$r14, $$rcx
9088
# nextln: 0028: Jumpcc(L, block3, block4)
9189
# nextln: block3:
92-
# nextln: $$rdx = $$spill1
93-
# nextln: $$r8 = $$rdx
94-
# nextln: 0029: $$r8 = ImulRRm(S64) $$r8, $$rcx
95-
# nextln: $$rdx = $$r15
96-
# nextln: 0030: $$rdx = AluRmI(S64, Add, 16) $$rdx
97-
# nextln: $$r9 = $$spill0
98-
# nextln: $$rsi = $$r9
99-
# nextln: 0031: $$rsi = AluRRm(S64, Add) $$rsi, $$r8
100-
# nextln: $$r8 = $$rax
101-
# nextln: 0032: $$r8 = AluRRm(S64, Add) $$r8, $$rsi
102-
# nextln: 0033: MovMR(S32) $$r8, $$rbx
103-
# nextln: $$spill6 = $$rsi
104-
# nextln: $$spill4 = $$rcx
105-
# nextln: 0034: $$rcx = MovRM(S32) $$rdx
90+
# nextln: $$rcx = $$spill1
91+
# nextln: $$rsi = $$rcx
92+
# nextln: 0029: $$rsi = ImulRRm(S64) $$rsi, $$r14
93+
# nextln: $$rcx = $$r15
94+
# nextln: 0030: $$rcx = AluRmI(S64, Add, 16) $$rcx
95+
# nextln: $$r8 = $$spill0
96+
# nextln: $$rdx = $$r8
97+
# nextln: 0031: $$rdx = AluRRm(S64, Add) $$rdx, $$rsi
98+
# nextln: $$rsi = $$rax
99+
# nextln: 0032: $$rsi = AluRRm(S64, Add) $$rsi, $$rdx
100+
# nextln: 0033: MovMR(S32) $$rsi, $$rbx
101+
# nextln: $$spill5 = $$rdx
102+
# nextln: 0034: $$rcx = MovRM(S32) $$rcx
106103
# nextln: 0035: $$rdx = MovRZ
107104
# nextln: 0036: $$r13 = MovsxRRm(Ext32_64) $$rdx
108105
# nextln: 0037: AluRRm(S64, Cmp) $$r13, $$rcx
109106
# nextln: 0038: Jumpcc(L, block6, block7)
110107
# nextln: block6:
111-
# nextln: $$r14 = $$spill5
112-
# nextln: $$rcx = $$r14
108+
# nextln: $$rcx = $$spill4
113109
# nextln: 0039: $$rcx = ImulRRm(S64) $$rcx, $$r13
114110
# nextln: $$rbx = $$spill3
115111
# nextln: $$rdx = $$rbx
116112
# nextln: 0040: $$rdx = AluRRm(S64, Add) $$rdx, $$rcx
117113
# nextln: 0041: $$r15 = AluRRm(S64, Add) $$r15, $$rdx
118114
# nextln: 0042: MovMR(S64) $$r15, $$rax
119115
# nextln: 0043: $$r15 = MovRStack(!0, S64)
120-
# nextln: $$rsi = $$spill8
116+
# nextln: $$rsi = $$spill7
121117
# nextln: 0044: $$rax = CallRel(Internal(func0)) $$rdi, $$rsi
122-
# nextln: $$rdi = $$r14
123-
# nextln: $$rsi = $$rax
124-
# nextln: 0045: $$rsi = AluRmI(S64, Add, 16) $$rsi
125-
# nextln: 0046: MovMR(S32) $$rsi, $$r12
126-
# nextln: 0047: $$rcx = MovRM(S32) $$rsi
118+
# nextln: $$rdi = $$rax
119+
# nextln: 0045: $$rdi = AluRmI(S64, Add, 16) $$rdi
120+
# nextln: 0046: MovMR(S32) $$rdi, $$r12
121+
# nextln: 0047: $$rcx = MovRM(S32) $$rdi
127122
# nextln: 0048: AluRRm(S64, Cmp) $$r13, $$rcx
128123
# nextln: 0049: Jumpcc(L, block8, block9)
129124
# nextln: block8:
130125
# nextln: $$rcx = $$spill1
131-
# nextln: $$r8 = $$rcx
132-
# nextln: 0050: $$r8 = ImulRRm(S64) $$r8, $$r13
126+
# nextln: $$rsi = $$rcx
127+
# nextln: 0050: $$rsi = ImulRRm(S64) $$rsi, $$r13
133128
# nextln: $$rdx = $$spill0
134-
# nextln: $$r9 = $$rdx
135-
# nextln: 0051: $$r9 = AluRRm(S64, Add) $$r9, $$r8
136-
# nextln: $$r8 = $$rax
137-
# nextln: 0052: $$r8 = AluRRm(S64, Add) $$r8, $$r9
138-
# nextln: $$r9 = $$spill7
139-
# nextln: 0053: MovMR(S32) $$r8, $$r9
140-
# nextln: 0054: $$r8 = MovRM(S32) $$rsi
141-
# nextln: $$rsi = $$spill4
142-
# nextln: 0055: AluRRm(S64, Cmp) $$rsi, $$r8
129+
# nextln: $$r8 = $$rdx
130+
# nextln: 0051: $$r8 = AluRRm(S64, Add) $$r8, $$rsi
131+
# nextln: $$rsi = $$rax
132+
# nextln: 0052: $$rsi = AluRRm(S64, Add) $$rsi, $$r8
133+
# nextln: $$r8 = $$spill6
134+
# nextln: 0053: MovMR(S32) $$rsi, $$r8
135+
# nextln: 0054: $$rdi = MovRM(S32) $$rdi
136+
# nextln: 0055: AluRRm(S64, Cmp) $$r14, $$rdi
143137
# nextln: 0056: Jumpcc(L, block10, block11)
144138
# nextln: block10:
145-
# nextln: $$r8 = $$r15
146-
# nextln: 0057: $$r8 = AluRmI(S64, Add, 16) $$r8
147-
# nextln: $$r9 = $$rax
148-
# nextln: 0058: $$r9 = AluRRm(S64, Add) $$r9, $$spill6
149-
# nextln: 0059: $$r10 = MovRmS32(3)
150-
# nextln: 0060: MovMR(S32) $$r9, $$r10
151-
# nextln: 0061: $$r8 = MovRM(S32) $$r8
152-
# nextln: 0062: AluRRm(S64, Cmp) $$rsi, $$r8
139+
# nextln: $$rdi = $$r15
140+
# nextln: 0057: $$rdi = AluRmI(S64, Add, 16) $$rdi
141+
# nextln: $$rsi = $$rax
142+
# nextln: 0058: $$rsi = AluRRm(S64, Add) $$rsi, $$spill5
143+
# nextln: 0059: $$r8 = MovRmS32(3)
144+
# nextln: 0060: MovMR(S32) $$rsi, $$r8
145+
# nextln: 0061: $$rdi = MovRM(S32) $$rdi
146+
# nextln: 0062: AluRRm(S64, Cmp) $$r14, $$rdi
153147
# nextln: 0063: Jumpcc(L, block12, block13)
154148
# nextln: block12:
155-
# nextln: 0064: $$rdi = ImulRRm(S64) $$rdi, $$rsi
149+
# nextln: $$rdi = $$spill4
150+
# nextln: 0064: $$rdi = ImulRRm(S64) $$rdi, $$r14
156151
# nextln: 0065: $$rbx = AluRRm(S64, Add) $$rbx, $$rdi
157152
# nextln: 0066: $$r15 = AluRRm(S64, Add) $$r15, $$rbx
158153
# nextln: 0067: MovMR(S64) $$r15, $$rax
159154
# nextln: 0068: $$rax = MovRStack(!0, S64)
160155
# nextln: $$rdi = $$rax
161156
# nextln: 0069: $$rdi = AluRmI(S64, Add, 16) $$rdi
162157
# nextln: 0070: $$rdi = MovRM(S32) $$rdi
163-
# nextln: 0071: AluRRm(S64, Cmp) $$rsi, $$rdi
158+
# nextln: 0071: AluRRm(S64, Cmp) $$r14, $$rdi
164159
# nextln: 0072: Jumpcc(L, block14, block15)
165160
# nextln: block14:
166161
# nextln: 0073: $$rax = AluRRm(S64, Add) $$rax, $$rbx

0 commit comments

Comments
 (0)