Skip to content

Commit eab658b

Browse files
committed
Add x64 codegen support for float-to-unsigned-64-bit conversion
Right now, this only works with the small-pic internal code model because it needs to access the constant pool. Note that this pseudo-operation would also benefit from regalloc "soft ties".
1 parent 11ff05e commit eab658b

File tree

5 files changed

+139
-3
lines changed

5 files changed

+139
-3
lines changed

crates/codegen/src/target/x64.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,7 @@ pub enum X64Instr {
253253
Cvtsi2s(OperandSize, SseFpuPrecision),
254254
Cvts2si(OperandSize, SseFpuPrecision),
255255
PseudoUint64ToFloat(SseFpuPrecision),
256+
PseudoFloatToUint64Rel(SseFpuPrecision),
256257
MovGprmXmm(OperandSize),
257258
/// Load from [rbp + offset]
258259
MovRRbp {
@@ -307,6 +308,7 @@ impl X64Instr {
307308
X64Instr::Cvtsi2s(..) => false,
308309
X64Instr::Cvts2si(..) => false,
309310
X64Instr::PseudoUint64ToFloat(..) => false,
311+
X64Instr::PseudoFloatToUint64Rel(..) => false,
310312
X64Instr::MovGprmXmm(..) => false,
311313
X64Instr::MovRRbp { .. } => false,
312314
X64Instr::MovsRRbp { .. } => false,
@@ -353,6 +355,7 @@ impl X64Instr {
353355
X64Instr::Cvtsi2s(..) => false,
354356
X64Instr::Cvts2si(..) => false,
355357
X64Instr::PseudoUint64ToFloat(..) => true,
358+
X64Instr::PseudoFloatToUint64Rel(..) => true,
356359
X64Instr::MovGprmXmm(..) => false,
357360
X64Instr::Setcc(..) => false,
358361
X64Instr::MovRRbp { .. } => false,

crates/codegen/src/target/x64/emit.rs

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ use ir::node::FunctionRef;
44
use crate::{
55
cfg::Block,
66
code_buffer::{BufferRelocTarget, CodeBuffer, FixupKind, InstrAnchor, InstrSink, Label},
7+
constpool::Constant,
78
emit::{EmitContext, EmitInstrData},
89
frame::FrameLayout,
910
lir::{Instr, PhysReg, PhysRegSet, StackSlot},
@@ -323,6 +324,14 @@ impl MachineEmit for X64Machine {
323324
defs[1].as_reg().unwrap(),
324325
defs[2].as_reg().unwrap(),
325326
),
327+
&X64Instr::PseudoFloatToUint64Rel(prec) => emit_float_to_uint64_rel(
328+
buffer,
329+
prec,
330+
defs[0].as_reg().unwrap(),
331+
uses[0].as_reg().unwrap(),
332+
defs[1].as_reg().unwrap(),
333+
defs[2].as_reg().unwrap(),
334+
),
326335
&X64Instr::MovGprmXmm(op_size) => emit_mov_gprm_xmm(
327336
buffer,
328337
op_size,
@@ -607,6 +616,63 @@ fn emit_uint64_to_float(
607616
buffer.bind_label(done);
608617
}
609618

619+
fn emit_float_to_uint64_rel(
620+
buffer: &mut CodeBuffer<X64Fixup>,
621+
prec: SseFpuPrecision,
622+
dest: PhysReg,
623+
src: PhysReg,
624+
tmp_xmm1: PhysReg,
625+
tmp_xmm2: PhysReg,
626+
) {
627+
// Emit the following:
628+
//
629+
// movsd tmp_xmm1, [rip + C_f_1p63]
630+
// ucomis[sd] src, tmp_xmm1
631+
// jae has_high_bit
632+
// cvts[sd]2si dest, src
633+
// jmp done
634+
// has_high_bit:
635+
// mov tmp_xmm2, src
636+
// subsd tmp_xmm2, tmp_xmm1
637+
// cvts[sd]2si dest, tmp_xmm2
638+
// btc dest
639+
// done:
640+
641+
let high_bit_set = buffer.create_label();
642+
let done = buffer.create_label();
643+
644+
let f_1p63 = get_f_1p63(buffer, prec);
645+
emit_movs_r_rm_rip_reloc(buffer, prec, tmp_xmm1, BufferRelocTarget::Constant(f_1p63));
646+
emit_ucomi(buffer, prec, src, RegMem::Reg(tmp_xmm1));
647+
emit_jcc(buffer, CondCode::Ae, high_bit_set);
648+
649+
emit_cvts2si(buffer, OperandSize::S64, prec, dest, RegMem::Reg(src));
650+
emit_jmp(buffer, done);
651+
652+
buffer.bind_label(high_bit_set);
653+
if tmp_xmm2 != src {
654+
emit_movaps_r_rm(buffer, tmp_xmm2, RegMem::Reg(src));
655+
}
656+
emit_sse_fpu_r_rm(
657+
buffer,
658+
prec,
659+
SseFpuBinOp::Sub,
660+
tmp_xmm2,
661+
RegMem::Reg(tmp_xmm1),
662+
);
663+
emit_cvts2si(buffer, OperandSize::S64, prec, dest, RegMem::Reg(tmp_xmm2));
664+
emit_btc_rm_i(buffer, OperandSize::S64, RegMem::Reg(dest), 63);
665+
666+
buffer.bind_label(done);
667+
}
668+
669+
fn get_f_1p63(buffer: &mut CodeBuffer<X64Fixup>, prec: SseFpuPrecision) -> Constant {
670+
match prec {
671+
SseFpuPrecision::Single => buffer.get_constant(4, &0x5f000000u32.to_le_bytes()),
672+
SseFpuPrecision::Double => buffer.get_constant(8, &0x43e0000000000000u64.to_le_bytes()),
673+
}
674+
}
675+
610676
// Single-instruction emission helpers
611677

612678
fn emit_push(buffer: &mut CodeBuffer<X64Fixup>, reg: PhysReg) {
@@ -960,6 +1026,19 @@ fn emit_setcc_r(buffer: &mut CodeBuffer<X64Fixup>, code: CondCode, dest: PhysReg
9601026
});
9611027
}
9621028

1029+
fn emit_btc_rm_i(buffer: &mut CodeBuffer<X64Fixup>, op_size: OperandSize, arg: RegMem, imm: u8) {
1030+
let (rex, modrm_sib) = encode_reg_mem_parts(arg, |rex| {
1031+
rex.encode_operand_size(op_size);
1032+
0x7
1033+
});
1034+
buffer.instr(|sink| {
1035+
rex.emit(sink);
1036+
sink.emit(&[0xf, 0xba]);
1037+
modrm_sib.emit(sink);
1038+
sink.emit(&[imm]);
1039+
});
1040+
}
1041+
9631042
fn emit_movsx_r_rm(buffer: &mut CodeBuffer<X64Fixup>, width: ExtWidth, dest: PhysReg, src: RegMem) {
9641043
let (opcode, op_size): (&[u8], _) = match width {
9651044
ExtWidth::Ext8_32 => (&[0xf, 0xbe], OperandSize::S32),

crates/codegen/src/target/x64/lower.rs

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,10 @@ use valmatch::match_value;
1212
use crate::{
1313
cfg::Block,
1414
isel::{IselContext, MachineIselError, ParamLoc},
15-
lir::{DefOperand, PhysReg, PhysRegSet, RegClass, UseOperand, VirtReg},
15+
lir::{
16+
DefOperand, DefOperandConstraint, OperandPos, PhysReg, PhysRegSet, RegClass, UseOperand,
17+
VirtReg,
18+
},
1619
machine::MachineLower,
1720
num_utils::{is_sint, is_uint},
1821
target::x64::{CompoundCondCode, SseFpuBinOp, SseFpuCmpCode, SseFpuPrecision},
@@ -203,7 +206,7 @@ impl MachineLower for X64Machine {
203206
NodeKind::SintToFloat => emit_cvtsi2s(ctx, node),
204207
NodeKind::UintToFloat => select_uinttofloat(ctx, node),
205208
NodeKind::FloatToSint => emit_cvts2si(ctx, node),
206-
NodeKind::FloatToUint => select_floattouint(ctx, node)?,
209+
NodeKind::FloatToUint => select_floattouint(self, ctx, node)?,
207210
NodeKind::PtrOff => select_alu(ctx, node, AluBinOp::Add),
208211
&NodeKind::Load(mem_size) => select_load(ctx, node, mem_size),
209212
&NodeKind::Store(mem_size) => select_store(ctx, node, mem_size),
@@ -550,6 +553,7 @@ fn select_uinttofloat(ctx: &mut IselContext<'_, '_, X64Machine>, node: Node) {
550553
}
551554

552555
fn select_floattouint(
556+
machine: &X64Machine,
553557
ctx: &mut IselContext<'_, '_, X64Machine>,
554558
node: Node,
555559
) -> Result<(), MachineIselError> {
@@ -569,7 +573,24 @@ fn select_floattouint(
569573
&[UseOperand::any(input)],
570574
);
571575
}
572-
Type::I64 => return Err(MachineIselError),
576+
Type::I64 => {
577+
if machine.config.internal_code_model == CodeModel::SmallPic {
578+
let tmp_xmm1 = ctx.create_temp_vreg(RC_XMM);
579+
let tmp_xmm2 = ctx.create_temp_vreg(RC_XMM);
580+
581+
ctx.emit_instr(
582+
X64Instr::PseudoFloatToUint64Rel(SseFpuPrecision::Double),
583+
&[
584+
DefOperand::any_reg(output),
585+
DefOperand::new(tmp_xmm1, DefOperandConstraint::AnyReg, OperandPos::Early),
586+
DefOperand::any_reg(tmp_xmm2),
587+
],
588+
&[UseOperand::any_reg(input)],
589+
);
590+
} else {
591+
return Err(MachineIselError);
592+
}
593+
}
573594
_ => unreachable!(),
574595
}
575596

crates/filetests/cases/codegen/floattouint.spdr

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,3 +12,25 @@ func @floattouint32:i32(f64) {
1212
%2:i32 = floattouint %1
1313
return %0, %2
1414
}
15+
16+
func @floattouint64:i64(f64) {
17+
# check: function `floattouint64`:
18+
# nextln: 000000: 55 push rbp
19+
# nextln: 000001: 48 89 e5 mov rbp, rsp
20+
# nextln: 000004: f2 0f 10 0d 00 00 00 00 movsd xmm1, qword ptr [rip] # RELOC_PC32 -> @<CP> + -4
21+
# nextln: 00000c: 66 0f 2e c1 ucomisd xmm0, xmm1
22+
# nextln: 000010: 0f 83 0a 00 00 00 jae 0x20
23+
# nextln: 000016: f2 48 0f 2d c0 cvtsd2si rax, xmm0
24+
# nextln: 00001b: e9 0e 00 00 00 jmp 0x2e
25+
# nextln: 000020: f2 0f 5c c1 subsd xmm0, xmm1
26+
# nextln: 000024: f2 48 0f 2d c0 cvtsd2si rax, xmm0
27+
# nextln: 000029: 48 0f ba f8 3f btc rax, 0x3f
28+
# nextln: 00002e: 5d pop rbp
29+
# nextln: 00002f: c3 ret
30+
# nextln: <CP>:
31+
# nextln: 000000: 00 00 00 00 00 00 e0 43
32+
33+
%0:ctrl, %1:f64 = entry
34+
%2:i64 = floattouint %1
35+
return %0, %2
36+
}

crates/filetests/cases/isel/floattouint.spdr

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,14 @@ func @floattouint32:i32(f64) {
1010
%2:i32 = floattouint %1
1111
return %0, %2
1212
}
13+
14+
func @floattouint64:i64(f64) {
15+
# check: function `floattouint64`:
16+
# nextln: block0[%1:xmm($$xmm0)]:
17+
# nextln: 0000: %0:gpr(reg)[late], %2:xmm(reg)[early], %3:xmm(reg)[late] = PseudoFloatToUint64Rel(Double) %1(reg)[early]
18+
# nextln: 0001: Ret %0($$rax)[early]
19+
20+
%0:ctrl, %1:f64 = entry
21+
%2:i64 = floattouint %1
22+
return %0, %2
23+
}

0 commit comments

Comments
 (0)