Finish x64 float-to-unisgned-64-bit codegen support

nmraz · nmraz · commit a17a10ed5667 · 2025-07-07T23:06:49.000+03:00
We now use an absolute relocation to access the constant pool when
building with the large-abs internal relocation model.
diff --git a/crates/codegen/src/target/x64.rs b/crates/codegen/src/target/x64.rs
@@ -254,6 +254,7 @@ pub enum X64Instr {
     Cvts2si(OperandSize, SseFpuPrecision),
     PseudoUint64ToFloat(SseFpuPrecision),
     PseudoFloatToUint64Rel(SseFpuPrecision),
+    PseudoFloatToUint64Abs(SseFpuPrecision),
     MovGprmXmm(OperandSize),
     /// Load from [rbp + offset]
     MovRRbp {
@@ -309,6 +310,7 @@ impl X64Instr {
             X64Instr::Cvts2si(..) => false,
             X64Instr::PseudoUint64ToFloat(..) => false,
             X64Instr::PseudoFloatToUint64Rel(..) => false,
+            X64Instr::PseudoFloatToUint64Abs(..) => false,
             X64Instr::MovGprmXmm(..) => false,
             X64Instr::MovRRbp { .. } => false,
             X64Instr::MovsRRbp { .. } => false,
@@ -356,6 +358,7 @@ impl X64Instr {
             X64Instr::Cvts2si(..) => false,
             X64Instr::PseudoUint64ToFloat(..) => true,
             X64Instr::PseudoFloatToUint64Rel(..) => true,
+            X64Instr::PseudoFloatToUint64Abs(..) => true,
             X64Instr::MovGprmXmm(..) => false,
             X64Instr::Setcc(..) => false,
             X64Instr::MovRRbp { .. } => false,
diff --git a/crates/codegen/src/target/x64/emit.rs b/crates/codegen/src/target/x64/emit.rs
@@ -332,6 +332,14 @@ impl MachineEmit for X64Machine {
                 defs[1].as_reg().unwrap(),
                 defs[2].as_reg().unwrap(),
             ),
+            &X64Instr::PseudoFloatToUint64Abs(prec) => emit_float_to_uint64_abs(
+                buffer,
+                prec,
+                defs[0].as_reg().unwrap(),
+                uses[0].as_reg().unwrap(),
+                defs[1].as_reg().unwrap(),
+                defs[2].as_reg().unwrap(),
+            ),
             &X64Instr::MovGprmXmm(op_size) => emit_mov_gprm_xmm(
                 buffer,
                 op_size,
@@ -623,10 +631,48 @@ fn emit_float_to_uint64_rel(
     src: PhysReg,
     tmp_xmm1: PhysReg,
     tmp_xmm2: PhysReg,
+) {
+    let f_1p63 = get_f_1p63(buffer, prec);
+    emit_movs_r_rm_rip_reloc(buffer, prec, tmp_xmm1, BufferRelocTarget::Constant(f_1p63));
+    emit_float_to_uint64_common(buffer, prec, dest, src, tmp_xmm1, tmp_xmm2);
+}
+
+fn emit_float_to_uint64_abs(
+    buffer: &mut CodeBuffer<X64Fixup>,
+    prec: SseFpuPrecision,
+    dest: PhysReg,
+    src: PhysReg,
+    tmp_xmm1: PhysReg,
+    tmp_xmm2: PhysReg,
+) {
+    let f_1p63 = get_f_1p63(buffer, prec);
+
+    // Use `dest` as a temporary for the constant pool access first.
+    emit_movabs_r_i_reloc(buffer, dest, BufferRelocTarget::Constant(f_1p63));
+    emit_movs_r_rm(
+        buffer,
+        prec,
+        tmp_xmm1,
+        RegMem::Mem(RawAddrMode::BaseIndexOff {
+            base: Some(dest),
+            index: None,
+            offset: 0,
+        }),
+    );
+
+    emit_float_to_uint64_common(buffer, prec, dest, src, tmp_xmm1, tmp_xmm2);
+}
+
+fn emit_float_to_uint64_common(
+    buffer: &mut CodeBuffer<X64Fixup>,
+    prec: SseFpuPrecision,
+    dest: PhysReg,
+    src: PhysReg,
+    tmp_xmm1: PhysReg,
+    tmp_xmm2: PhysReg,
 ) {
     // Emit the following:
     //
-    //         movsd tmp_xmm1, [rip + C_f_1p63]
     //         ucomis[sd] src, tmp_xmm1
     //         jae has_high_bit
     //         cvts[sd]2si dest, src
@@ -637,12 +683,12 @@ fn emit_float_to_uint64_rel(
     //         cvts[sd]2si dest, tmp_xmm2
     //         btc dest
     //     done:
+    //
+    // assuming `tmp_xmm1` already contains the correct constant.
 
     let high_bit_set = buffer.create_label();
     let done = buffer.create_label();
 
-    let f_1p63 = get_f_1p63(buffer, prec);
-    emit_movs_r_rm_rip_reloc(buffer, prec, tmp_xmm1, BufferRelocTarget::Constant(f_1p63));
     emit_ucomi(buffer, prec, src, RegMem::Reg(tmp_xmm1));
     emit_jcc(buffer, CondCode::Ae, high_bit_set);
 
diff --git a/crates/codegen/src/target/x64/lower.rs b/crates/codegen/src/target/x64/lower.rs
@@ -206,7 +206,7 @@ impl MachineLower for X64Machine {
             NodeKind::SintToFloat => emit_cvtsi2s(ctx, node),
             NodeKind::UintToFloat => select_uinttofloat(ctx, node),
             NodeKind::FloatToSint => emit_cvts2si(ctx, node),
-            NodeKind::FloatToUint => select_floattouint(self, ctx, node)?,
+            NodeKind::FloatToUint => select_floattouint(self, ctx, node),
             NodeKind::PtrOff => select_alu(ctx, node, AluBinOp::Add),
             &NodeKind::Load(mem_size) => select_load(ctx, node, mem_size),
             &NodeKind::Store(mem_size) => select_store(ctx, node, mem_size),
@@ -552,11 +552,7 @@ fn select_uinttofloat(ctx: &mut IselContext<'_, '_, X64Machine>, node: Node) {
     }
 }
 
-fn select_floattouint(
-    machine: &X64Machine,
-    ctx: &mut IselContext<'_, '_, X64Machine>,
-    node: Node,
-) -> Result<(), MachineIselError> {
+fn select_floattouint(machine: &X64Machine, ctx: &mut IselContext<'_, '_, X64Machine>, node: Node) {
     let [output] = ctx.node_outputs_exact(node);
     let [input] = ctx.node_inputs_exact(node);
 
@@ -574,27 +570,26 @@ fn select_floattouint(
             );
         }
         Type::I64 => {
-            if machine.config.internal_code_model == CodeModel::SmallPic {
-                let tmp_xmm1 = ctx.create_temp_vreg(RC_XMM);
-                let tmp_xmm2 = ctx.create_temp_vreg(RC_XMM);
+            let instr = match machine.config.internal_code_model {
+                CodeModel::SmallPic => X64Instr::PseudoFloatToUint64Rel(SseFpuPrecision::Double),
+                CodeModel::LargeAbs => X64Instr::PseudoFloatToUint64Abs(SseFpuPrecision::Double),
+            };
 
-                ctx.emit_instr(
-                    X64Instr::PseudoFloatToUint64Rel(SseFpuPrecision::Double),
-                    &[
-                        DefOperand::any_reg(output),
-                        DefOperand::new(tmp_xmm1, DefOperandConstraint::AnyReg, OperandPos::Early),
-                        DefOperand::any_reg(tmp_xmm2),
-                    ],
-                    &[UseOperand::any_reg(input)],
-                );
-            } else {
-                return Err(MachineIselError);
-            }
+            let tmp_xmm1 = ctx.create_temp_vreg(RC_XMM);
+            let tmp_xmm2 = ctx.create_temp_vreg(RC_XMM);
+
+            ctx.emit_instr(
+                instr,
+                &[
+                    DefOperand::any_reg(output),
+                    DefOperand::new(tmp_xmm1, DefOperandConstraint::AnyReg, OperandPos::Early),
+                    DefOperand::any_reg(tmp_xmm2),
+                ],
+                &[UseOperand::any_reg(input)],
+            );
         }
         _ => unreachable!(),
     }
-
-    Ok(())
 }
 
 fn select_load(ctx: &mut IselContext<'_, '_, X64Machine>, node: Node, mem_size: MemSize) {
diff --git a/crates/filetests/cases/codegen/floattouint64_cm_large.spdr b/crates/filetests/cases/codegen/floattouint64_cm_large.spdr
@@ -0,0 +1,24 @@
+# run: codegen[large-abs]
+
+func @floattouint64:i64(f64) {
+    # check: function `floattouint64`:
+    # nextln: 000000: 55                              push rbp
+    # nextln: 000001: 48 89 e5                        mov rbp, rsp
+    # nextln: 000004: 48 b8 00 00 00 00 00 00 00 00   movabs rax, 0  # RELOC_ABS64 -> @<CP> + 0
+    # nextln: 00000e: f2 0f 10 08                     movsd xmm1, qword ptr [rax]
+    # nextln: 000012: 66 0f 2e c1                     ucomisd xmm0, xmm1
+    # nextln: 000016: 0f 83 0a 00 00 00               jae 0x26
+    # nextln: 00001c: f2 48 0f 2d c0                  cvtsd2si rax, xmm0
+    # nextln: 000021: e9 0e 00 00 00                  jmp 0x34
+    # nextln: 000026: f2 0f 5c c1                     subsd xmm0, xmm1
+    # nextln: 00002a: f2 48 0f 2d c0                  cvtsd2si rax, xmm0
+    # nextln: 00002f: 48 0f ba f8 3f                  btc rax, 0x3f
+    # nextln: 000034: 5d                              pop rbp
+    # nextln: 000035: c3                              ret
+    # nextln: <CP>:
+    # nextln: 000000: 00 00 00 00 00 00 e0 43
+
+    %0:ctrl, %1:f64 = entry
+    %2:i64 = floattouint %1
+    return %0, %2
+}
diff --git a/crates/filetests/cases/isel/floattouint64_cm_large.spdr b/crates/filetests/cases/isel/floattouint64_cm_large.spdr
@@ -0,0 +1,12 @@
+# run: isel[large-abs]
+
+func @floattouint64:i64(f64) {
+    # check: function `floattouint64`:
+    # nextln:       block0[%1:xmm($$xmm0)]:
+    # nextln: 0000:      %0:gpr(reg)[late], %2:xmm(reg)[early], %3:xmm(reg)[late] = PseudoFloatToUint64Abs(Double) %1(reg)[early]
+    # nextln: 0001:      Ret %0($$rax)[early]
+
+    %0:ctrl, %1:f64 = entry
+    %2:i64 = floattouint %1
+    return %0, %2
+}