Skip to content

Commit a17a10e

Browse files
committed
Finish x64 float-to-unisgned-64-bit codegen support
We now use an absolute relocation to access the constant pool when building with the large-abs internal relocation model.
1 parent 1b487f0 commit a17a10e

File tree

5 files changed

+106
-26
lines changed

5 files changed

+106
-26
lines changed

crates/codegen/src/target/x64.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,7 @@ pub enum X64Instr {
254254
Cvts2si(OperandSize, SseFpuPrecision),
255255
PseudoUint64ToFloat(SseFpuPrecision),
256256
PseudoFloatToUint64Rel(SseFpuPrecision),
257+
PseudoFloatToUint64Abs(SseFpuPrecision),
257258
MovGprmXmm(OperandSize),
258259
/// Load from [rbp + offset]
259260
MovRRbp {
@@ -309,6 +310,7 @@ impl X64Instr {
309310
X64Instr::Cvts2si(..) => false,
310311
X64Instr::PseudoUint64ToFloat(..) => false,
311312
X64Instr::PseudoFloatToUint64Rel(..) => false,
313+
X64Instr::PseudoFloatToUint64Abs(..) => false,
312314
X64Instr::MovGprmXmm(..) => false,
313315
X64Instr::MovRRbp { .. } => false,
314316
X64Instr::MovsRRbp { .. } => false,
@@ -356,6 +358,7 @@ impl X64Instr {
356358
X64Instr::Cvts2si(..) => false,
357359
X64Instr::PseudoUint64ToFloat(..) => true,
358360
X64Instr::PseudoFloatToUint64Rel(..) => true,
361+
X64Instr::PseudoFloatToUint64Abs(..) => true,
359362
X64Instr::MovGprmXmm(..) => false,
360363
X64Instr::Setcc(..) => false,
361364
X64Instr::MovRRbp { .. } => false,

crates/codegen/src/target/x64/emit.rs

Lines changed: 49 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -332,6 +332,14 @@ impl MachineEmit for X64Machine {
332332
defs[1].as_reg().unwrap(),
333333
defs[2].as_reg().unwrap(),
334334
),
335+
&X64Instr::PseudoFloatToUint64Abs(prec) => emit_float_to_uint64_abs(
336+
buffer,
337+
prec,
338+
defs[0].as_reg().unwrap(),
339+
uses[0].as_reg().unwrap(),
340+
defs[1].as_reg().unwrap(),
341+
defs[2].as_reg().unwrap(),
342+
),
335343
&X64Instr::MovGprmXmm(op_size) => emit_mov_gprm_xmm(
336344
buffer,
337345
op_size,
@@ -623,10 +631,48 @@ fn emit_float_to_uint64_rel(
623631
src: PhysReg,
624632
tmp_xmm1: PhysReg,
625633
tmp_xmm2: PhysReg,
634+
) {
635+
let f_1p63 = get_f_1p63(buffer, prec);
636+
emit_movs_r_rm_rip_reloc(buffer, prec, tmp_xmm1, BufferRelocTarget::Constant(f_1p63));
637+
emit_float_to_uint64_common(buffer, prec, dest, src, tmp_xmm1, tmp_xmm2);
638+
}
639+
640+
fn emit_float_to_uint64_abs(
641+
buffer: &mut CodeBuffer<X64Fixup>,
642+
prec: SseFpuPrecision,
643+
dest: PhysReg,
644+
src: PhysReg,
645+
tmp_xmm1: PhysReg,
646+
tmp_xmm2: PhysReg,
647+
) {
648+
let f_1p63 = get_f_1p63(buffer, prec);
649+
650+
// Use `dest` as a temporary for the constant pool access first.
651+
emit_movabs_r_i_reloc(buffer, dest, BufferRelocTarget::Constant(f_1p63));
652+
emit_movs_r_rm(
653+
buffer,
654+
prec,
655+
tmp_xmm1,
656+
RegMem::Mem(RawAddrMode::BaseIndexOff {
657+
base: Some(dest),
658+
index: None,
659+
offset: 0,
660+
}),
661+
);
662+
663+
emit_float_to_uint64_common(buffer, prec, dest, src, tmp_xmm1, tmp_xmm2);
664+
}
665+
666+
fn emit_float_to_uint64_common(
667+
buffer: &mut CodeBuffer<X64Fixup>,
668+
prec: SseFpuPrecision,
669+
dest: PhysReg,
670+
src: PhysReg,
671+
tmp_xmm1: PhysReg,
672+
tmp_xmm2: PhysReg,
626673
) {
627674
// Emit the following:
628675
//
629-
// movsd tmp_xmm1, [rip + C_f_1p63]
630676
// ucomis[sd] src, tmp_xmm1
631677
// jae has_high_bit
632678
// cvts[sd]2si dest, src
@@ -637,12 +683,12 @@ fn emit_float_to_uint64_rel(
637683
// cvts[sd]2si dest, tmp_xmm2
638684
// btc dest
639685
// done:
686+
//
687+
// assuming `tmp_xmm1` already contains the correct constant.
640688

641689
let high_bit_set = buffer.create_label();
642690
let done = buffer.create_label();
643691

644-
let f_1p63 = get_f_1p63(buffer, prec);
645-
emit_movs_r_rm_rip_reloc(buffer, prec, tmp_xmm1, BufferRelocTarget::Constant(f_1p63));
646692
emit_ucomi(buffer, prec, src, RegMem::Reg(tmp_xmm1));
647693
emit_jcc(buffer, CondCode::Ae, high_bit_set);
648694

crates/codegen/src/target/x64/lower.rs

Lines changed: 18 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,7 @@ impl MachineLower for X64Machine {
206206
NodeKind::SintToFloat => emit_cvtsi2s(ctx, node),
207207
NodeKind::UintToFloat => select_uinttofloat(ctx, node),
208208
NodeKind::FloatToSint => emit_cvts2si(ctx, node),
209-
NodeKind::FloatToUint => select_floattouint(self, ctx, node)?,
209+
NodeKind::FloatToUint => select_floattouint(self, ctx, node),
210210
NodeKind::PtrOff => select_alu(ctx, node, AluBinOp::Add),
211211
&NodeKind::Load(mem_size) => select_load(ctx, node, mem_size),
212212
&NodeKind::Store(mem_size) => select_store(ctx, node, mem_size),
@@ -552,11 +552,7 @@ fn select_uinttofloat(ctx: &mut IselContext<'_, '_, X64Machine>, node: Node) {
552552
}
553553
}
554554

555-
fn select_floattouint(
556-
machine: &X64Machine,
557-
ctx: &mut IselContext<'_, '_, X64Machine>,
558-
node: Node,
559-
) -> Result<(), MachineIselError> {
555+
fn select_floattouint(machine: &X64Machine, ctx: &mut IselContext<'_, '_, X64Machine>, node: Node) {
560556
let [output] = ctx.node_outputs_exact(node);
561557
let [input] = ctx.node_inputs_exact(node);
562558

@@ -574,27 +570,26 @@ fn select_floattouint(
574570
);
575571
}
576572
Type::I64 => {
577-
if machine.config.internal_code_model == CodeModel::SmallPic {
578-
let tmp_xmm1 = ctx.create_temp_vreg(RC_XMM);
579-
let tmp_xmm2 = ctx.create_temp_vreg(RC_XMM);
573+
let instr = match machine.config.internal_code_model {
574+
CodeModel::SmallPic => X64Instr::PseudoFloatToUint64Rel(SseFpuPrecision::Double),
575+
CodeModel::LargeAbs => X64Instr::PseudoFloatToUint64Abs(SseFpuPrecision::Double),
576+
};
580577

581-
ctx.emit_instr(
582-
X64Instr::PseudoFloatToUint64Rel(SseFpuPrecision::Double),
583-
&[
584-
DefOperand::any_reg(output),
585-
DefOperand::new(tmp_xmm1, DefOperandConstraint::AnyReg, OperandPos::Early),
586-
DefOperand::any_reg(tmp_xmm2),
587-
],
588-
&[UseOperand::any_reg(input)],
589-
);
590-
} else {
591-
return Err(MachineIselError);
592-
}
578+
let tmp_xmm1 = ctx.create_temp_vreg(RC_XMM);
579+
let tmp_xmm2 = ctx.create_temp_vreg(RC_XMM);
580+
581+
ctx.emit_instr(
582+
instr,
583+
&[
584+
DefOperand::any_reg(output),
585+
DefOperand::new(tmp_xmm1, DefOperandConstraint::AnyReg, OperandPos::Early),
586+
DefOperand::any_reg(tmp_xmm2),
587+
],
588+
&[UseOperand::any_reg(input)],
589+
);
593590
}
594591
_ => unreachable!(),
595592
}
596-
597-
Ok(())
598593
}
599594

600595
fn select_load(ctx: &mut IselContext<'_, '_, X64Machine>, node: Node, mem_size: MemSize) {
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# run: codegen[large-abs]
2+
3+
func @floattouint64:i64(f64) {
4+
# check: function `floattouint64`:
5+
# nextln: 000000: 55 push rbp
6+
# nextln: 000001: 48 89 e5 mov rbp, rsp
7+
# nextln: 000004: 48 b8 00 00 00 00 00 00 00 00 movabs rax, 0 # RELOC_ABS64 -> @<CP> + 0
8+
# nextln: 00000e: f2 0f 10 08 movsd xmm1, qword ptr [rax]
9+
# nextln: 000012: 66 0f 2e c1 ucomisd xmm0, xmm1
10+
# nextln: 000016: 0f 83 0a 00 00 00 jae 0x26
11+
# nextln: 00001c: f2 48 0f 2d c0 cvtsd2si rax, xmm0
12+
# nextln: 000021: e9 0e 00 00 00 jmp 0x34
13+
# nextln: 000026: f2 0f 5c c1 subsd xmm0, xmm1
14+
# nextln: 00002a: f2 48 0f 2d c0 cvtsd2si rax, xmm0
15+
# nextln: 00002f: 48 0f ba f8 3f btc rax, 0x3f
16+
# nextln: 000034: 5d pop rbp
17+
# nextln: 000035: c3 ret
18+
# nextln: <CP>:
19+
# nextln: 000000: 00 00 00 00 00 00 e0 43
20+
21+
%0:ctrl, %1:f64 = entry
22+
%2:i64 = floattouint %1
23+
return %0, %2
24+
}
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# run: isel[large-abs]
2+
3+
func @floattouint64:i64(f64) {
4+
# check: function `floattouint64`:
5+
# nextln: block0[%1:xmm($$xmm0)]:
6+
# nextln: 0000: %0:gpr(reg)[late], %2:xmm(reg)[early], %3:xmm(reg)[late] = PseudoFloatToUint64Abs(Double) %1(reg)[early]
7+
# nextln: 0001: Ret %0($$rax)[early]
8+
9+
%0:ctrl, %1:f64 = entry
10+
%2:i64 = floattouint %1
11+
return %0, %2
12+
}

0 commit comments

Comments
 (0)