Skip to content

Commit fa67096

Browse files
committed
Spill rematerializable fragments more aggressively
Now, any rematerializable fragments containing no uses are spilled immediately. The assumption is that those fragments don't bear any value, and would needlessly hold up a register were they allocated. In practice, while it works well for fragments containing only definitions (see the `bittest_run` test), the heuristic might be a bit overzealous about empty ranges connecting previous uses of a value to future ones; if those can actually be allocated without creating any copies, doing so is preferable to rematerializing the value later.
1 parent e35e535 commit fa67096

File tree

14 files changed

+5531
-5529
lines changed

14 files changed

+5531
-5529
lines changed

crates/codegen/src/regalloc/assign.rs

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,10 @@ use crate::{
1313
use super::{
1414
conflict::{iter_btree_ranges, iter_conflicts, iter_slice_ranges},
1515
context::RegAllocContext,
16-
types::{ConflictBoundary, LiveSetFragment, ProgramRange, QueuedFragment, RangeEndKey},
16+
types::{
17+
ConflictBoundary, LiveSetFragment, LiveSetFragmentFlags, ProgramRange, QueuedFragment,
18+
RangeEndKey,
19+
},
1720
utils::{coalesce_slice, get_weight_at_instr},
1821
RegallocError,
1922
};
@@ -117,6 +120,17 @@ impl<M: MachineRegalloc> RegAllocContext<'_, M> {
117120
}
118121

119122
fn try_assign(&mut self, fragment: LiveSetFragment) -> Result<(), RegallocError> {
123+
if self.live_set_fragments[fragment]
124+
.flags
125+
.contains(LiveSetFragmentFlags::REMAT_NO_USES)
126+
{
127+
// Don't even bother if a rematerializable range contains no uses - splitting should
128+
// already have carved out the interesting portions of the original live set, so just
129+
// let the value be rematerialized there.
130+
self.spill_fragment_and_neighbors(fragment);
131+
return Ok(());
132+
}
133+
120134
let live_set = self.live_set_fragments[fragment].live_set;
121135
let class = self.live_sets[live_set].class;
122136

crates/codegen/src/regalloc/fragment.rs

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,21 +45,48 @@ impl<M: MachineRegalloc> RegAllocContext<'_, M> {
4545
}
4646

4747
pub fn compute_live_fragment_properties(&mut self, fragment: LiveSetFragment) {
48+
enum RematNoUseState {
49+
Uninit,
50+
Yes(VirtReg),
51+
No,
52+
}
53+
4854
let mut size = 0;
4955
let mut total_weight = 0.0;
5056
let mut some_instr_needs_reg = false;
5157

58+
// Track whether this fragment is completely rematerializable and contains no uses.
59+
let mut remat_no_use_state = RematNoUseState::Uninit;
60+
5261
let fragment_data = &mut self.live_set_fragments[fragment];
5362
fragment_data.hints.clear();
5463

5564
for range in &fragment_data.ranges {
5665
let range_data = &mut self.live_ranges[range.live_range];
66+
let vreg = range_data.vreg;
5767
range_data.fragment = fragment;
5868
size += range.prog_range.len();
5969

70+
// The fragment can only be rematerialized when all its ranges come from the same vreg,
71+
// and that vreg can itself be rematerialized.
72+
let can_remat = match remat_no_use_state {
73+
RematNoUseState::Uninit if self.remattable_vreg_defs[vreg].is_some() => true,
74+
RematNoUseState::Yes(existing_vreg) if existing_vreg == vreg => true,
75+
_ => false,
76+
};
77+
78+
remat_no_use_state = if can_remat {
79+
RematNoUseState::Yes(vreg)
80+
} else {
81+
RematNoUseState::No
82+
};
83+
6084
for instr in &range_data.instrs {
6185
total_weight += instr.weight();
6286
some_instr_needs_reg |= instr.needs_reg();
87+
if !instr.is_def() {
88+
remat_no_use_state = RematNoUseState::No;
89+
}
6390
}
6491

6592
if let Some(range_hints) = self.live_range_hints.get(&range.live_range) {
@@ -72,12 +99,23 @@ impl<M: MachineRegalloc> RegAllocContext<'_, M> {
7299
sort_reg_hints(&mut fragment_data.hints);
73100

74101
fragment_data.size = size;
75-
let is_atomic = some_instr_needs_reg && covers_single_instr(fragment_data.hull());
102+
103+
let remat_no_uses = matches!(remat_no_use_state, RematNoUseState::Yes(..));
104+
105+
// Single-instruction fragments requiring a register _cannot_ be spilled (we don't have a
106+
// way to chop them smaller), unless they only cover a rematerializable definition, in which
107+
// case spilling will just kill the instruction.
108+
let is_atomic =
109+
!remat_no_uses && some_instr_needs_reg && covers_single_instr(fragment_data.hull());
76110

77111
fragment_data
78112
.flags
79113
.set(LiveSetFragmentFlags::ATOMIC, is_atomic);
80114

115+
fragment_data
116+
.flags
117+
.set(LiveSetFragmentFlags::REMAT_NO_USES, remat_no_uses);
118+
81119
fragment_data.spill_weight = if is_atomic {
82120
ATOMIC_FRAGMENT_WEIGHT
83121
} else {

crates/codegen/src/regalloc/types.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -359,6 +359,7 @@ bitflags! {
359359
pub struct LiveSetFragmentFlags: u8 {
360360
const ATOMIC = 0b01;
361361
const SPILLED = 0b10;
362+
const REMAT_NO_USES = 0b100;
362363
}
363364
}
364365

crates/filetests/cases/codegen/tdn/array_jagged.spdr

Lines changed: 72 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ func @"System.Int32 Tests.CodeGenBringUpTests::ArrayJagged(System.Int32)":i32(i3
7171
# nextln: 000084: 44 89 29 mov dword ptr [rcx], r13d
7272
# nextln: 000087: 8b 09 mov ecx, dword ptr [rcx]
7373
# nextln: 000089: 4c 3b f1 cmp r14, rcx
74-
# nextln: 00008c: 0f 8d 2d 01 00 00 jge 0x1bf
74+
# nextln: 00008c: 0f 8d 30 01 00 00 jge 0x1c2
7575
# nextln: 000092: b9 04 00 00 00 mov ecx, 4
7676
# nextln: 000097: 48 89 cf mov rdi, rcx
7777
# nextln: 00009a: 49 0f af fe imul rdi, r14
@@ -88,78 +88,78 @@ func @"System.Int32 Tests.CodeGenBringUpTests::ArrayJagged(System.Int32)":i32(i3
8888
# nextln: 0000c3: 33 ff xor edi, edi
8989
# nextln: 0000c5: 4c 63 e7 movsxd r12, edi
9090
# nextln: 0000c8: 4c 3b e2 cmp r12, rdx
91-
# nextln: 0000cb: 0f 8d ee 00 00 00 jge 0x1bf
92-
# nextln: 0000d1: 48 89 cf mov rdi, rcx
93-
# nextln: 0000d4: 49 0f af fc imul rdi, r12
94-
# nextln: 0000d8: ba 18 00 00 00 mov edx, 0x18
95-
# nextln: 0000dd: 48 03 d7 add rdx, rdi
96-
# nextln: 0000e0: 4c 03 fa add r15, rdx
91+
# nextln: 0000cb: 0f 8d f1 00 00 00 jge 0x1c2
92+
# nextln: 0000d1: 48 89 ca mov rdx, rcx
93+
# nextln: 0000d4: 49 0f af d4 imul rdx, r12
94+
# nextln: 0000d8: b9 18 00 00 00 mov ecx, 0x18
95+
# nextln: 0000dd: 48 03 ca add rcx, rdx
96+
# nextln: 0000e0: 4c 03 f9 add r15, rcx
9797
# nextln: 0000e3: 49 89 07 mov qword ptr [r15], rax
98-
# nextln: 0000e6: 49 89 cf mov r15, rcx
99-
# nextln: 0000e9: 48 8b 1c 24 mov rbx, qword ptr [rsp]
100-
# nextln: 0000ed: bf aa 00 00 00 mov edi, 0xaa
101-
# nextln: 0000f2: e8 00 00 00 00 call 0xf7 # RELOC_PC32 -> @gc_new + -4
102-
# nextln: 0000f7: 48 89 c7 mov rdi, rax
103-
# nextln: 0000fa: 48 83 c7 10 add rdi, 0x10
104-
# nextln: 0000fe: 44 89 2f mov dword ptr [rdi], r13d
105-
# nextln: 000101: 8b 0f mov ecx, dword ptr [rdi]
106-
# nextln: 000103: 4c 3b e1 cmp r12, rcx
107-
# nextln: 000106: 0f 8d b3 00 00 00 jge 0x1bf
108-
# nextln: 00010c: b9 04 00 00 00 mov ecx, 4
109-
# nextln: 000111: 49 89 c8 mov r8, rcx
110-
# nextln: 000114: 4d 0f af c4 imul r8, r12
111-
# nextln: 000118: ba 14 00 00 00 mov edx, 0x14
112-
# nextln: 00011d: 48 89 d6 mov rsi, rdx
113-
# nextln: 000120: 49 03 f0 add rsi, r8
114-
# nextln: 000123: 49 89 c0 mov r8, rax
115-
# nextln: 000126: 4c 03 c6 add r8, rsi
116-
# nextln: 000129: be 02 00 00 00 mov esi, 2
117-
# nextln: 00012e: 41 89 30 mov dword ptr [r8], esi
118-
# nextln: 000131: 8b 3f mov edi, dword ptr [rdi]
119-
# nextln: 000133: 4c 3b f7 cmp r14, rdi
120-
# nextln: 000136: 0f 8d 83 00 00 00 jge 0x1bf
121-
# nextln: 00013c: 48 89 df mov rdi, rbx
122-
# nextln: 00013f: 48 83 c7 10 add rdi, 0x10
123-
# nextln: 000143: 48 89 c6 mov rsi, rax
124-
# nextln: 000146: 48 03 74 24 10 add rsi, qword ptr [rsp + 0x10]
125-
# nextln: 00014b: 41 b8 03 00 00 00 mov r8d, 3
126-
# nextln: 000151: 44 89 06 mov dword ptr [rsi], r8d
127-
# nextln: 000154: 8b 3f mov edi, dword ptr [rdi]
128-
# nextln: 000156: 4c 3b f7 cmp r14, rdi
129-
# nextln: 000159: 0f 8d 60 00 00 00 jge 0x1bf
130-
# nextln: 00015f: 4d 0f af fe imul r15, r14
131-
# nextln: 000163: bf 18 00 00 00 mov edi, 0x18
132-
# nextln: 000168: 49 03 ff add rdi, r15
133-
# nextln: 00016b: 48 03 df add rbx, rdi
134-
# nextln: 00016e: 48 89 03 mov qword ptr [rbx], rax
135-
# nextln: 000171: 48 8b 04 24 mov rax, qword ptr [rsp]
136-
# nextln: 000175: 48 89 c6 mov rsi, rax
137-
# nextln: 000178: 48 83 c6 10 add rsi, 0x10
138-
# nextln: 00017c: 8b 36 mov esi, dword ptr [rsi]
139-
# nextln: 00017e: 4c 3b f6 cmp r14, rsi
140-
# nextln: 000181: 0f 8d 38 00 00 00 jge 0x1bf
141-
# nextln: 000187: 48 03 c7 add rax, rdi
142-
# nextln: 00018a: 48 8b 00 mov rax, qword ptr [rax]
143-
# nextln: 00018d: 48 89 c7 mov rdi, rax
144-
# nextln: 000190: 48 83 c7 10 add rdi, 0x10
145-
# nextln: 000194: 8b 37 mov esi, dword ptr [rdi]
146-
# nextln: 000196: 48 63 7c 24 08 movsxd rdi, dword ptr [rsp + 8]
147-
# nextln: 00019b: 48 3b fe cmp rdi, rsi
148-
# nextln: 00019e: 0f 8d 1b 00 00 00 jge 0x1bf
149-
# nextln: 0001a4: 48 0f af cf imul rcx, rdi
150-
# nextln: 0001a8: 48 03 d1 add rdx, rcx
151-
# nextln: 0001ab: 48 03 c2 add rax, rdx
152-
# nextln: 0001ae: 8b 00 mov eax, dword ptr [rax]
153-
# nextln: 0001b0: 48 83 c4 28 add rsp, 0x28
154-
# nextln: 0001b4: 5b pop rbx
155-
# nextln: 0001b5: 41 5c pop r12
156-
# nextln: 0001b7: 41 5d pop r13
157-
# nextln: 0001b9: 41 5e pop r14
158-
# nextln: 0001bb: 41 5f pop r15
159-
# nextln: 0001bd: 5d pop rbp
160-
# nextln: 0001be: c3 ret
161-
# nextln: 0001bf: e8 00 00 00 00 call 0x1c4 # RELOC_PC32 -> @throw_index_out_of_range_exception + -4
162-
# nextln: 0001c4: 0f 0b ud2
98+
# nextln: 0000e6: 41 bf 08 00 00 00 mov r15d, 8
99+
# nextln: 0000ec: 48 8b 1c 24 mov rbx, qword ptr [rsp]
100+
# nextln: 0000f0: bf aa 00 00 00 mov edi, 0xaa
101+
# nextln: 0000f5: e8 00 00 00 00 call 0xfa # RELOC_PC32 -> @gc_new + -4
102+
# nextln: 0000fa: 48 89 c7 mov rdi, rax
103+
# nextln: 0000fd: 48 83 c7 10 add rdi, 0x10
104+
# nextln: 000101: 44 89 2f mov dword ptr [rdi], r13d
105+
# nextln: 000104: 8b 0f mov ecx, dword ptr [rdi]
106+
# nextln: 000106: 4c 3b e1 cmp r12, rcx
107+
# nextln: 000109: 0f 8d b3 00 00 00 jge 0x1c2
108+
# nextln: 00010f: b9 04 00 00 00 mov ecx, 4
109+
# nextln: 000114: 49 89 c8 mov r8, rcx
110+
# nextln: 000117: 4d 0f af c4 imul r8, r12
111+
# nextln: 00011b: ba 14 00 00 00 mov edx, 0x14
112+
# nextln: 000120: 48 89 d6 mov rsi, rdx
113+
# nextln: 000123: 49 03 f0 add rsi, r8
114+
# nextln: 000126: 49 89 c0 mov r8, rax
115+
# nextln: 000129: 4c 03 c6 add r8, rsi
116+
# nextln: 00012c: be 02 00 00 00 mov esi, 2
117+
# nextln: 000131: 41 89 30 mov dword ptr [r8], esi
118+
# nextln: 000134: 8b 3f mov edi, dword ptr [rdi]
119+
# nextln: 000136: 4c 3b f7 cmp r14, rdi
120+
# nextln: 000139: 0f 8d 83 00 00 00 jge 0x1c2
121+
# nextln: 00013f: 48 89 df mov rdi, rbx
122+
# nextln: 000142: 48 83 c7 10 add rdi, 0x10
123+
# nextln: 000146: 48 89 c6 mov rsi, rax
124+
# nextln: 000149: 48 03 74 24 10 add rsi, qword ptr [rsp + 0x10]
125+
# nextln: 00014e: 41 b8 03 00 00 00 mov r8d, 3
126+
# nextln: 000154: 44 89 06 mov dword ptr [rsi], r8d
127+
# nextln: 000157: 8b 3f mov edi, dword ptr [rdi]
128+
# nextln: 000159: 4c 3b f7 cmp r14, rdi
129+
# nextln: 00015c: 0f 8d 60 00 00 00 jge 0x1c2
130+
# nextln: 000162: 4d 0f af fe imul r15, r14
131+
# nextln: 000166: bf 18 00 00 00 mov edi, 0x18
132+
# nextln: 00016b: 49 03 ff add rdi, r15
133+
# nextln: 00016e: 48 03 df add rbx, rdi
134+
# nextln: 000171: 48 89 03 mov qword ptr [rbx], rax
135+
# nextln: 000174: 48 8b 04 24 mov rax, qword ptr [rsp]
136+
# nextln: 000178: 48 89 c6 mov rsi, rax
137+
# nextln: 00017b: 48 83 c6 10 add rsi, 0x10
138+
# nextln: 00017f: 8b 36 mov esi, dword ptr [rsi]
139+
# nextln: 000181: 4c 3b f6 cmp r14, rsi
140+
# nextln: 000184: 0f 8d 38 00 00 00 jge 0x1c2
141+
# nextln: 00018a: 48 03 c7 add rax, rdi
142+
# nextln: 00018d: 48 8b 00 mov rax, qword ptr [rax]
143+
# nextln: 000190: 48 89 c7 mov rdi, rax
144+
# nextln: 000193: 48 83 c7 10 add rdi, 0x10
145+
# nextln: 000197: 8b 37 mov esi, dword ptr [rdi]
146+
# nextln: 000199: 48 63 7c 24 08 movsxd rdi, dword ptr [rsp + 8]
147+
# nextln: 00019e: 48 3b fe cmp rdi, rsi
148+
# nextln: 0001a1: 0f 8d 1b 00 00 00 jge 0x1c2
149+
# nextln: 0001a7: 48 0f af cf imul rcx, rdi
150+
# nextln: 0001ab: 48 03 d1 add rdx, rcx
151+
# nextln: 0001ae: 48 03 c2 add rax, rdx
152+
# nextln: 0001b1: 8b 00 mov eax, dword ptr [rax]
153+
# nextln: 0001b3: 48 83 c4 28 add rsp, 0x28
154+
# nextln: 0001b7: 5b pop rbx
155+
# nextln: 0001b8: 41 5c pop r12
156+
# nextln: 0001ba: 41 5d pop r13
157+
# nextln: 0001bc: 41 5e pop r14
158+
# nextln: 0001be: 41 5f pop r15
159+
# nextln: 0001c0: 5d pop rbp
160+
# nextln: 0001c1: c3 ret
161+
# nextln: 0001c2: e8 00 00 00 00 call 0x1c7 # RELOC_PC32 -> @throw_index_out_of_range_exception + -4
162+
# nextln: 0001c7: 0f 0b ud2
163163

164164
%0:ctrl, %1:i32 = entry
165165
%2:ctrl, %3:phisel = region %0

0 commit comments

Comments
 (0)