Add x64 codegen support for float extension/truncation

nmraz · nmraz · commit 75e5d07c3c73 · 2025-10-24T23:25:04.000+03:00
diff --git a/crates/codegen/src/target/x64.rs b/crates/codegen/src/target/x64.rs
@@ -270,6 +270,8 @@ pub enum X64Instr {
     F64ConstAddrAbs(f64),
     Cvtsi2s(OperandSize, SseFpuPrecision),
     Cvts2si(OperandSize, SseFpuPrecision),
+    Cvtss2sd,
+    Cvtsd2ss,
     PseudoUint64ToFloat(SseFpuPrecision),
     PseudoFloatToUint64Rel(SseFpuPrecision),
     PseudoFloatToUint64Abs(SseFpuPrecision),
@@ -332,6 +334,8 @@ impl X64Instr {
             X64Instr::F64ConstAddrAbs(..) => false,
             X64Instr::Cvtsi2s(..) => false,
             X64Instr::Cvts2si(..) => false,
+            X64Instr::Cvtss2sd => false,
+            X64Instr::Cvtsd2ss => false,
             X64Instr::PseudoUint64ToFloat(..) => false,
             X64Instr::PseudoFloatToUint64Rel(..) => false,
             X64Instr::PseudoFloatToUint64Abs(..) => false,
@@ -386,6 +390,8 @@ impl X64Instr {
             X64Instr::F64ConstAddrAbs(..) => false,
             X64Instr::Cvtsi2s(..) => false,
             X64Instr::Cvts2si(..) => false,
+            X64Instr::Cvtss2sd => false,
+            X64Instr::Cvtsd2ss => false,
             X64Instr::PseudoUint64ToFloat(..) => true,
             X64Instr::PseudoFloatToUint64Rel(..) => true,
             X64Instr::PseudoFloatToUint64Abs(..) => true,
diff --git a/crates/codegen/src/target/x64/emit.rs b/crates/codegen/src/target/x64/emit.rs
@@ -402,6 +402,16 @@ impl MachineEmit for X64Machine {
                 defs[0].as_reg().unwrap(),
                 state.operand_reg_mem(uses[0]),
             ),
+            X64Instr::Cvtss2sd => emit_cvtss2sd(
+                buffer,
+                defs[0].as_reg().unwrap(),
+                state.operand_reg_mem(uses[0]),
+            ),
+            X64Instr::Cvtsd2ss => emit_cvtsd2ss(
+                buffer,
+                defs[0].as_reg().unwrap(),
+                state.operand_reg_mem(uses[0]),
+            ),
             &X64Instr::PseudoUint64ToFloat(prec) => emit_uint64_to_float(
                 buffer,
                 prec,
@@ -1382,6 +1392,14 @@ fn emit_cvts2si(
     emit_sse_fpu_with_mandatory_prefix_and_op_size(buffer, prec, op_size, 0x2d, dest, src);
 }
 
+fn emit_cvtss2sd(buffer: &mut CodeBuffer<X64Fixup>, dest: PhysReg, src: RegMem) {
+    emit_sse_fpu_with_mandatory_prefix(buffer, SseFpuPrecision::Single, 0x5a, dest, src);
+}
+
+fn emit_cvtsd2ss(buffer: &mut CodeBuffer<X64Fixup>, dest: PhysReg, src: RegMem) {
+    emit_sse_fpu_with_mandatory_prefix(buffer, SseFpuPrecision::Double, 0x5a, dest, src);
+}
+
 fn emit_mov_gprm_xmm(
     buffer: &mut CodeBuffer<X64Fixup>,
     op_size: OperandSize,
diff --git a/crates/codegen/src/target/x64/lower.rs b/crates/codegen/src/target/x64/lower.rs
@@ -209,6 +209,8 @@ impl MachineLower for X64Machine {
             NodeKind::Fsub => emit_fpu_rr(ctx, node, SseFpuBinOp::Sub),
             NodeKind::Fmul => emit_fpu_rr(ctx, node, SseFpuBinOp::Mul),
             NodeKind::Fdiv => emit_fpu_rr(ctx, node, SseFpuBinOp::Div),
+            NodeKind::Fext => emit_cvtss2sd(ctx, node),
+            NodeKind::Ftrunc => emit_cvtsd2ss(ctx, node),
             &NodeKind::Fcmp(kind) => select_direct_fcmp(ctx, node, kind),
             NodeKind::SintToFloat => emit_cvtsi2s(ctx, node),
             NodeKind::UintToFloat => select_uinttofloat(ctx, node),
@@ -1329,6 +1331,34 @@ fn emit_cvts2si(ctx: &mut IselContext<'_, '_, X64Machine>, node: Node) {
     );
 }
 
+fn emit_cvtss2sd(ctx: &mut IselContext<'_, '_, X64Machine>, node: Node) {
+    let [output] = ctx.node_outputs_exact(node);
+    let [input] = ctx.node_inputs_exact(node);
+
+    let input = ctx.get_value_vreg(input);
+    let output = ctx.get_value_vreg(output);
+
+    ctx.emit_instr(
+        X64Instr::Cvtss2sd,
+        &[DefOperand::any_reg(output)],
+        &[UseOperand::any(input)],
+    );
+}
+
+fn emit_cvtsd2ss(ctx: &mut IselContext<'_, '_, X64Machine>, node: Node) {
+    let [output] = ctx.node_outputs_exact(node);
+    let [input] = ctx.node_inputs_exact(node);
+
+    let input = ctx.get_value_vreg(input);
+    let output = ctx.get_value_vreg(output);
+
+    ctx.emit_instr(
+        X64Instr::Cvtsd2ss,
+        &[DefOperand::any_reg(output)],
+        &[UseOperand::any(input)],
+    );
+}
+
 // Matching helpers
 
 fn match_icmp_imm32(
diff --git a/crates/filetests/cases/codegen/fext_trunc.spdr b/crates/filetests/cases/codegen/fext_trunc.spdr
@@ -0,0 +1,27 @@
+# run: codegen
+
+func @fext:f64(f32) {
+    # check: function `fext`:
+    # nextln: 000000: 55                              push rbp
+    # nextln: 000001: 48 89 e5                        mov rbp, rsp
+    # nextln: 000004: f3 0f 5a c0                     cvtss2sd xmm0, xmm0
+    # nextln: 000008: 5d                              pop rbp
+    # nextln: 000009: c3                              ret
+
+    %0:ctrl, %1:f32 = entry
+    %2:f64 = fext %1
+    return %0, %2
+}
+
+func @ftrunc:f32(f64) {
+    # check: function `ftrunc`:
+    # nextln: 000000: 55                              push rbp
+    # nextln: 000001: 48 89 e5                        mov rbp, rsp
+    # nextln: 000004: f2 0f 5a c0                     cvtsd2ss xmm0, xmm0
+    # nextln: 000008: 5d                              pop rbp
+    # nextln: 000009: c3                              ret
+
+    %0:ctrl, %1:f64 = entry
+    %2:f32 = ftrunc %1
+    return %0, %2
+}
diff --git a/crates/filetests/cases/isel/fext_trunc.spdr b/crates/filetests/cases/isel/fext_trunc.spdr
@@ -0,0 +1,23 @@
+# run: isel
+
+func @fext:f64(f32) {
+    # check: function `fext`:
+    # nextln:       block0[%1:xmm32($$xmm0)]:
+    # nextln: 0000:      %0:xmm64(reg)[late] = Cvtss2sd %1(any)[early]
+    # nextln: 0001:      Ret %0($$xmm0)[early]
+
+    %0:ctrl, %1:f32 = entry
+    %2:f64 = fext %1
+    return %0, %2
+}
+
+func @ftrunc:f32(f64) {
+    # check: function `ftrunc`:
+    # nextln:       block0[%1:xmm64($$xmm0)]:
+    # nextln: 0000:      %0:xmm32(reg)[late] = Cvtsd2ss %1(any)[early]
+    # nextln: 0001:      Ret %0($$xmm0)[early]
+
+    %0:ctrl, %1:f64 = entry
+    %2:f32 = ftrunc %1
+    return %0, %2
+}