diff --git a/example/mini_core.rs b/example/mini_core.rs
index 0aba44a88c5..2e165cc3c12 100644
--- a/example/mini_core.rs
+++ b/example/mini_core.rs
@@ -628,11 +628,6 @@ impl<T: ?Sized, A: Allocator> Deref for Box<T, A> {
     }
 }
 
-#[lang = "exchange_malloc"]
-unsafe fn allocate(size: usize, _align: usize) -> *mut u8 {
-    libc::malloc(size)
-}
-
 #[lang = "drop"]
 pub trait Drop {
     fn drop(&mut self);
diff --git a/rust-toolchain b/rust-toolchain
index 655fa6abbab..14cf26fbf63 100644
--- a/rust-toolchain
+++ b/rust-toolchain
@@ -1,3 +1,3 @@
 [toolchain]
-channel = "nightly-2026-02-14"
+channel = "nightly-2026-03-06"
 components = ["rust-src", "rustc-dev", "llvm-tools-preview"]
diff --git a/src/back/lto.rs b/src/back/lto.rs
index 9a9040708ef..1bb7d73a02c 100644
--- a/src/back/lto.rs
+++ b/src/back/lto.rs
@@ -28,7 +28,7 @@ use object::read::archive::ArchiveFile;
 use rustc_codegen_ssa::back::lto::{SerializedModule, ThinModule, ThinShared};
 use rustc_codegen_ssa::back::write::{CodegenContext, FatLtoInput, SharedEmitter};
 use rustc_codegen_ssa::traits::*;
-use rustc_codegen_ssa::{ModuleCodegen, ModuleKind, looks_like_rust_object_file};
+use rustc_codegen_ssa::{CompiledModule, ModuleCodegen, ModuleKind, looks_like_rust_object_file};
 use rustc_data_structures::memmap::Mmap;
 use rustc_data_structures::profiling::SelfProfilerRef;
 use rustc_errors::{DiagCtxt, DiagCtxtHandle};
@@ -39,7 +39,7 @@ use rustc_session::config::Lto;
 use rustc_target::spec::RelocModel;
 use tempfile::{TempDir, tempdir};
 
-use crate::back::write::save_temp_bitcode;
+use crate::back::write::{codegen, save_temp_bitcode};
 use crate::errors::LtoBitcodeFromRlib;
 use crate::{GccCodegenBackend, GccContext, LTO_SUPPORTED, LtoMode, SyncContext, to_gcc_opt_level};
 
@@ -117,7 +117,7 @@ pub(crate) fn run_fat(
     shared_emitter: &SharedEmitter,
     each_linked_rlib_for_lto: &[PathBuf],
     modules: Vec<FatLtoInput<GccCodegenBackend>>,
-) -> ModuleCodegen<GccContext> {
+) -> CompiledModule {
     let dcx = DiagCtxt::new(Box::new(shared_emitter.clone()));
     let dcx = dcx.handle();
     let lto_data = prepare_lto(cgcx, each_linked_rlib_for_lto, dcx);
@@ -137,12 +137,12 @@ pub(crate) fn run_fat(
 fn fat_lto(
     cgcx: &CodegenContext,
     prof: &SelfProfilerRef,
-    _dcx: DiagCtxtHandle<'_>,
+    dcx: DiagCtxtHandle<'_>,
     modules: Vec<FatLtoInput<GccCodegenBackend>>,
     mut serialized_modules: Vec<(SerializedModule<ModuleBuffer>, CString)>,
     tmp_path: TempDir,
     //symbols_below_threshold: &[String],
-) -> ModuleCodegen<GccContext> {
+) -> CompiledModule {
     let _timer = prof.generic_activity("GCC_fat_lto_build_monolithic_module");
     info!("going for a fat lto");
 
@@ -238,7 +238,7 @@ fn fat_lto(
                     module
                         .module_llvm
                         .context
-                        .add_driver_option(module_buffer.0.to_str().expect("path"));
+                        .add_driver_option(module_buffer.path.to_str().expect("path"));
                 }
                 SerializedModule::FromRlib(_) => unimplemented!("from rlib"),
                 SerializedModule::FromUncompressedFile(_) => {
@@ -265,14 +265,22 @@ fn fat_lto(
     // of now.
     module.module_llvm.temp_dir = Some(tmp_path);
 
-    module
+    codegen(cgcx, prof, dcx, module, &cgcx.module_config)
 }
 
-pub struct ModuleBuffer(PathBuf);
+pub struct ModuleBuffer {
+    path: PathBuf,
+    // Temporary directory used by LTO. We keep it here so that it's not removed before linking.
+    _temp_dir: Option<TempDir>,
+}
 
 impl ModuleBuffer {
     pub fn new(path: PathBuf) -> ModuleBuffer {
-        ModuleBuffer(path)
+        ModuleBuffer { path, _temp_dir: None }
+    }
+
+    pub fn new_in_temp_dir(path: PathBuf, temp_dir: TempDir) -> ModuleBuffer {
+        ModuleBuffer { path, _temp_dir: Some(temp_dir) }
     }
 }
 
@@ -290,7 +298,7 @@ pub(crate) fn run_thin(
     prof: &SelfProfilerRef,
     dcx: DiagCtxtHandle<'_>,
     each_linked_rlib_for_lto: &[PathBuf],
-    modules: Vec<(String, ThinBuffer)>,
+    modules: Vec<(String, ModuleBuffer)>,
     cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
 ) -> (Vec<ThinModule<GccCodegenBackend>>, Vec<WorkProduct>) {
     let lto_data = prepare_lto(cgcx, each_linked_rlib_for_lto, dcx);
@@ -312,48 +320,11 @@ pub(crate) fn run_thin(
     )
 }
 
-pub(crate) fn prepare_thin(module: ModuleCodegen<GccContext>) -> (String, ThinBuffer) {
-    let name = module.name;
-    //let buffer = ThinBuffer::new(module.module_llvm.context, true);
-    let buffer = ThinBuffer::new(&module.module_llvm.context);
-    (name, buffer)
-}
-
-/// Prepare "thin" LTO to get run on these modules.
-///
-/// The general structure of ThinLTO is quite different from the structure of
-/// "fat" LTO above. With "fat" LTO all LLVM modules in question are merged into
-/// one giant LLVM module, and then we run more optimization passes over this
-/// big module after internalizing most symbols. Thin LTO, on the other hand,
-/// avoid this large bottleneck through more targeted optimization.
-///
-/// At a high level Thin LTO looks like:
-///
-///    1. Prepare a "summary" of each LLVM module in question which describes
-///       the values inside, cost of the values, etc.
-///    2. Merge the summaries of all modules in question into one "index"
-///    3. Perform some global analysis on this index
-///    4. For each module, use the index and analysis calculated previously to
-///       perform local transformations on the module, for example inlining
-///       small functions from other modules.
-///    5. Run thin-specific optimization passes over each module, and then code
-///       generate everything at the end.
-///
-/// The summary for each module is intended to be quite cheap, and the global
-/// index is relatively quite cheap to create as well. As a result, the goal of
-/// ThinLTO is to reduce the bottleneck on LTO and enable LTO to be used in more
-/// situations. For example one cheap optimization is that we can parallelize
-/// all codegen modules, easily making use of all the cores on a machine.
-///
-/// With all that in mind, the function here is designed at specifically just
-/// calculating the *index* for ThinLTO. This index will then be shared amongst
-/// all of the `LtoModuleCodegen` units returned below and destroyed once
-/// they all go out of scope.
 fn thin_lto(
     _cgcx: &CodegenContext,
     prof: &SelfProfilerRef,
     _dcx: DiagCtxtHandle<'_>,
-    modules: Vec<(String, ThinBuffer)>,
+    modules: Vec<(String, ModuleBuffer)>,
     serialized_modules: Vec<(SerializedModule<ModuleBuffer>, CString)>,
     tmp_path: TempDir,
     cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
@@ -362,9 +333,6 @@ fn thin_lto(
     let _timer = prof.generic_activity("LLVM_thin_lto_global_analysis");
     info!("going for that thin, thin LTO");
 
-    /*let green_modules: FxHashMap<_, _> =
-    cached_modules.iter().map(|(_, wp)| (wp.cgu_name.clone(), wp.clone())).collect();*/
-
     let full_scope_len = modules.len() + serialized_modules.len() + cached_modules.len();
     let mut thin_buffers = Vec::with_capacity(modules.len());
     let mut module_names = Vec::with_capacity(full_scope_len);
@@ -373,31 +341,10 @@ fn thin_lto(
     for (i, (name, buffer)) in modules.into_iter().enumerate() {
         info!("local module: {} - {}", i, name);
         let cname = CString::new(name.as_bytes()).unwrap();
-        /*thin_modules.push(llvm::ThinLTOModule {
-            identifier: cname.as_ptr(),
-            data: buffer.data().as_ptr(),
-            len: buffer.data().len(),
-        });*/
         thin_buffers.push(buffer);
         module_names.push(cname);
     }
 
-    // FIXME: All upstream crates are deserialized internally in the
-    //        function below to extract their summary and modules. Note that
-    //        unlike the loop above we *must* decode and/or read something
-    //        here as these are all just serialized files on disk. An
-    //        improvement, however, to make here would be to store the
-    //        module summary separately from the actual module itself. Right
-    //        now this is store in one large bitcode file, and the entire
-    //        file is deflate-compressed. We could try to bypass some of the
-    //        decompression by storing the index uncompressed and only
-    //        lazily decompressing the bytecode if necessary.
-    //
-    //        Note that truly taking advantage of this optimization will
-    //        likely be further down the road. We'd have to implement
-    //        incremental ThinLTO first where we could actually avoid
-    //        looking at upstream modules entirely sometimes (the contents,
-    //        we must always unconditionally look at the index).
     let mut serialized = Vec::with_capacity(serialized_modules.len() + cached_modules.len());
 
     let cached_modules =
@@ -405,75 +352,12 @@ fn thin_lto(
 
     for (module, name) in serialized_modules.into_iter().chain(cached_modules) {
         info!("upstream or cached module {:?}", name);
-        /*thin_modules.push(llvm::ThinLTOModule {
-            identifier: name.as_ptr(),
-            data: module.data().as_ptr(),
-            len: module.data().len(),
-        });*/
-
-        match module {
-            SerializedModule::Local(_) => {
-                //let path = module_buffer.0.to_str().expect("path");
-                //let my_path = PathBuf::from(path);
-                //let exists = my_path.exists();
-                /*module.module_llvm.should_combine_object_files = true;
-                module
-                .module_llvm
-                .context
-                .add_driver_option(module_buffer.0.to_str().expect("path"));*/
-            }
-            SerializedModule::FromRlib(_) => unimplemented!("from rlib"),
-            SerializedModule::FromUncompressedFile(_) => {
-                unimplemented!("from uncompressed file")
-            }
-        }
-
         serialized.push(module);
         module_names.push(name);
     }
 
-    // Sanity check
-    //assert_eq!(thin_modules.len(), module_names.len());
-
-    // Delegate to the C++ bindings to create some data here. Once this is a
-    // tried-and-true interface we may wish to try to upstream some of this
-    // to LLVM itself, right now we reimplement a lot of what they do
-    // upstream...
-    /*let data = llvm::LLVMRustCreateThinLTOData(
-        thin_modules.as_ptr(),
-        thin_modules.len() as u32,
-        symbols_below_threshold.as_ptr(),
-        symbols_below_threshold.len() as u32,
-    )
-    .ok_or_else(|| write::llvm_err(dcx, LlvmError::PrepareThinLtoContext))?;
-    */
-
     let data = ThinData; //(Arc::new(tmp_path))/*(data)*/;
 
-    info!("thin LTO data created");
-
-    /*let (key_map_path, prev_key_map, curr_key_map) =
-        if let Some(ref incr_comp_session_dir) = cgcx.incr_comp_session_dir {
-            let path = incr_comp_session_dir.join(THIN_LTO_KEYS_INCR_COMP_FILE_NAME);
-            // If the previous file was deleted, or we get an IO error
-            // reading the file, then we'll just use `None` as the
-            // prev_key_map, which will force the code to be recompiled.
-            let prev =
-                if path.exists() { ThinLTOKeysMap::load_from_file(&path).ok() } else { None };
-            let curr = ThinLTOKeysMap::from_thin_lto_modules(&data, &thin_modules, &module_names);
-            (Some(path), prev, curr)
-        }
-        else {
-            // If we don't compile incrementally, we don't need to load the
-            // import data from LLVM.
-            assert!(green_modules.is_empty());
-            let curr = ThinLTOKeysMap::default();
-            (None, None, curr)
-        };
-    info!("thin LTO cache key map loaded");
-    info!("prev_key_map: {:#?}", prev_key_map);
-    info!("curr_key_map: {:#?}", curr_key_map);*/
-
     // Throw our data in an `Arc` as we'll be sharing it across threads. We
     // also put all memory referenced by the C++ data (buffers, ids, etc)
     // into the arc as well. After this we'll create a thin module
@@ -487,34 +371,10 @@ fn thin_lto(
     info!("checking which modules can be-reused and which have to be re-optimized.");
     for (module_index, module_name) in shared.module_names.iter().enumerate() {
         let module_name = module_name_to_str(module_name);
-        /*if let (Some(prev_key_map), true) =
-            (prev_key_map.as_ref(), green_modules.contains_key(module_name))
-        {
-            assert!(cgcx.incr_comp_session_dir.is_some());
-
-            // If a module exists in both the current and the previous session,
-            // and has the same LTO cache key in both sessions, then we can re-use it
-            if prev_key_map.keys.get(module_name) == curr_key_map.keys.get(module_name) {
-                let work_product = green_modules[module_name].clone();
-                copy_jobs.push(work_product);
-                info!(" - {}: re-used", module_name);
-                assert!(cgcx.incr_comp_session_dir.is_some());
-                continue;
-            }
-        }*/
-
         info!(" - {}: re-compiled", module_name);
         opt_jobs.push(ThinModule { shared: shared.clone(), idx: module_index });
     }
 
-    // Save the current ThinLTO import information for the next compilation
-    // session, overwriting the previous serialized data (if any).
-    /*if let Some(path) = key_map_path {
-        if let Err(err) = curr_key_map.save_to_file(&path) {
-            return Err(write::llvm_err(dcx, LlvmError::WriteThinLtoKey { err }));
-        }
-    }*/
-
     // NOTE: save the temporary directory used by LTO so that it gets deleted after linking instead
     // of now.
     //module.module_llvm.temp_dir = Some(tmp_path);
@@ -524,36 +384,43 @@ fn thin_lto(
     (opt_jobs, copy_jobs)
 }
 
-pub fn optimize_thin_module(
+pub fn optimize_and_codegen_thin(
+    cgcx: &CodegenContext,
+    prof: &SelfProfilerRef,
+    shared_emitter: &SharedEmitter,
     thin_module: ThinModule<GccCodegenBackend>,
-    _cgcx: &CodegenContext,
-) -> ModuleCodegen<GccContext> {
-    //let module_name = &thin_module.shared.module_names[thin_module.idx];
-
-    // Right now the implementation we've got only works over serialized
-    // modules, so we create a fresh new LLVM context and parse the module
-    // into that context. One day, however, we may do this for upstream
-    // crates but for locally codegened modules we may be able to reuse
-    // that LLVM Context and Module.
-    //let llcx = llvm::LLVMRustContextCreate(cgcx.fewer_names);
-    //let llmod_raw = parse_module(llcx, module_name, thin_module.data(), &dcx)? as *const _;
-    let mut lto_mode = LtoMode::None;
+) -> CompiledModule {
+    let dcx = DiagCtxt::new(Box::new(shared_emitter.clone()));
+    let dcx = dcx.handle();
+
+    let lto_supported = LTO_SUPPORTED.load(Ordering::SeqCst);
+    let lto_mode = if lto_supported { LtoMode::Fat } else { LtoMode::Thin };
     let context = match thin_module.shared.thin_buffers.get(thin_module.idx) {
-        Some(thin_buffer) => Arc::clone(&thin_buffer.context),
+        Some(thin_buffer) => {
+            println!("local: {:?}", &thin_module.shared.module_names[thin_module.idx]);
+            let context = Context::default();
+            context.add_driver_option(thin_buffer.path.to_str().expect("path"));
+            Arc::new(SyncContext::new(context));
+
+            return CompiledModule {
+                name: thin_module.shared.module_names[thin_module.idx].to_str().unwrap().to_owned(),
+                kind: ModuleKind::Regular,
+                object: Some(thin_buffer.path.clone()),
+                dwarf_object: None,
+                bytecode: None,
+                assembly: None,
+                llvm_ir: None,
+                links_from_incr_cache: vec![],
+            };
+        }
         None => {
+            println!("foreign: {:?}", &thin_module.shared.module_names[thin_module.idx]);
             let context = Context::default();
             let len = thin_module.shared.thin_buffers.len();
             let module = &thin_module.shared.serialized_modules[thin_module.idx - len];
             match *module {
                 SerializedModule::Local(ref module_buffer) => {
-                    let path = module_buffer.0.to_str().expect("path");
-                    context.add_driver_option(path);
-                    lto_mode = LtoMode::Thin;
-                    /*module.module_llvm.should_combine_object_files = true;
-                    module
-                        .module_llvm
-                        .context
-                        .add_driver_option(module_buffer.0.to_str().expect("path"));*/
+                    context.add_driver_option(module_buffer.path.to_str().expect("path"));
                 }
                 SerializedModule::FromRlib(_) => unimplemented!("from rlib"),
                 SerializedModule::FromUncompressedFile(_) => {
@@ -563,7 +430,7 @@ pub fn optimize_thin_module(
             Arc::new(SyncContext::new(context))
         }
     };
-    let lto_supported = LTO_SUPPORTED.load(Ordering::SeqCst);
+    context.add_command_line_option("-flinker-output=nolto-rel");
     let module = ModuleCodegen::new_regular(
         thin_module.name().to_string(),
         GccContext {
@@ -575,88 +442,10 @@ pub fn optimize_thin_module(
             temp_dir: None,
         },
     );
-    /*{
-        let target = &*module.module_llvm.tm;
-        let llmod = module.module_llvm.llmod();
-        save_temp_bitcode(cgcx, &module, "thin-lto-input");
-
-        // Up next comes the per-module local analyses that we do for Thin LTO.
-        // Each of these functions is basically copied from the LLVM
-        // implementation and then tailored to suit this implementation. Ideally
-        // each of these would be supported by upstream LLVM but that's perhaps
-        // a patch for another day!
-        //
-        // You can find some more comments about these functions in the LLVM
-        // bindings we've got (currently `PassWrapper.cpp`)
-        {
-            let _timer =
-                cgcx.prof.generic_activity_with_arg("LLVM_thin_lto_rename", thin_module.name());
-            unsafe { llvm::LLVMRustPrepareThinLTORename(thin_module.shared.data.0, llmod, target) };
-            save_temp_bitcode(cgcx, &module, "thin-lto-after-rename");
-        }
-
-        {
-            let _timer = cgcx
-                .prof
-                .generic_activity_with_arg("LLVM_thin_lto_resolve_weak", thin_module.name());
-            if !llvm::LLVMRustPrepareThinLTOResolveWeak(thin_module.shared.data.0, llmod) {
-                return Err(write::llvm_err(&dcx, LlvmError::PrepareThinLtoModule));
-            }
-            save_temp_bitcode(cgcx, &module, "thin-lto-after-resolve");
-        }
-
-        {
-            let _timer = cgcx
-                .prof
-                .generic_activity_with_arg("LLVM_thin_lto_internalize", thin_module.name());
-            if !llvm::LLVMRustPrepareThinLTOInternalize(thin_module.shared.data.0, llmod) {
-                return Err(write::llvm_err(&dcx, LlvmError::PrepareThinLtoModule));
-            }
-            save_temp_bitcode(cgcx, &module, "thin-lto-after-internalize");
-        }
-
-        {
-            let _timer =
-                cgcx.prof.generic_activity_with_arg("LLVM_thin_lto_import", thin_module.name());
-            if !llvm::LLVMRustPrepareThinLTOImport(thin_module.shared.data.0, llmod, target) {
-                return Err(write::llvm_err(&dcx, LlvmError::PrepareThinLtoModule));
-            }
-            save_temp_bitcode(cgcx, &module, "thin-lto-after-import");
-        }
-
-        // Alright now that we've done everything related to the ThinLTO
-        // analysis it's time to run some optimizations! Here we use the same
-        // `run_pass_manager` as the "fat" LTO above except that we tell it to
-        // populate a thin-specific pass manager, which presumably LLVM treats a
-        // little differently.
-        {
-            info!("running thin lto passes over {}", module.name);
-            run_pass_manager(cgcx, &dcx, &mut module, true)?;
-            save_temp_bitcode(cgcx, &module, "thin-lto-after-pm");
-        }
-    }*/
-    // FIXME: switch to #[expect] when the clippy bug is fixed.
-    #[allow(clippy::let_and_return)]
-    module
-}
-
-pub struct ThinBuffer {
-    context: Arc<SyncContext>,
-}
-
-impl ThinBuffer {
-    pub(crate) fn new(context: &Arc<SyncContext>) -> Self {
-        Self { context: Arc::clone(context) }
-    }
-}
-
-impl ThinBufferMethods for ThinBuffer {
-    fn data(&self) -> &[u8] {
-        &[]
-    }
+    crate::back::write::codegen(cgcx, prof, dcx, module, &cgcx.module_config)
 }
 
-pub struct ThinData; //(Arc<TempDir>);
+pub struct ThinData;
 
 fn module_name_to_str(c_str: &CStr) -> &str {
     c_str.to_str().unwrap_or_else(|e| {
diff --git a/src/back/write.rs b/src/back/write.rs
index ddf13558027..24ea2b66ba7 100644
--- a/src/back/write.rs
+++ b/src/back/write.rs
@@ -2,12 +2,10 @@ use std::{env, fs};
 
 use gccjit::{Context, OutputKind};
 use rustc_codegen_ssa::back::link::ensure_removed;
-use rustc_codegen_ssa::back::write::{
-    BitcodeSection, CodegenContext, EmitObj, ModuleConfig, SharedEmitter,
-};
+use rustc_codegen_ssa::back::write::{BitcodeSection, CodegenContext, EmitObj, ModuleConfig};
 use rustc_codegen_ssa::{CompiledModule, ModuleCodegen};
 use rustc_data_structures::profiling::SelfProfilerRef;
-use rustc_errors::DiagCtxt;
+use rustc_errors::DiagCtxtHandle;
 use rustc_fs_util::link_or_copy;
 use rustc_log::tracing::debug;
 use rustc_session::config::OutputType;
@@ -20,13 +18,10 @@ use crate::{GccContext, LtoMode};
 pub(crate) fn codegen(
     cgcx: &CodegenContext,
     prof: &SelfProfilerRef,
-    shared_emitter: &SharedEmitter,
+    dcx: DiagCtxtHandle<'_>,
     module: ModuleCodegen<GccContext>,
     config: &ModuleConfig,
 ) -> CompiledModule {
-    let dcx = DiagCtxt::new(Box::new(shared_emitter.clone()));
-    let dcx = dcx.handle();
-
     let _timer = prof.generic_activity_with_arg("GCC_module_codegen", &*module.name);
     {
         let context = &module.module_llvm.context;
diff --git a/src/builder.rs b/src/builder.rs
index e1937f5c11e..1d5db049f7d 100644
--- a/src/builder.rs
+++ b/src/builder.rs
@@ -485,13 +485,14 @@ impl<'a, 'gcc, 'tcx> Deref for Builder<'a, 'gcc, 'tcx> {
 }
 
 impl<'gcc, 'tcx> BackendTypes for Builder<'_, 'gcc, 'tcx> {
-    type Value = <CodegenCx<'gcc, 'tcx> as BackendTypes>::Value;
-    type Metadata = <CodegenCx<'gcc, 'tcx> as BackendTypes>::Metadata;
     type Function = <CodegenCx<'gcc, 'tcx> as BackendTypes>::Function;
     type BasicBlock = <CodegenCx<'gcc, 'tcx> as BackendTypes>::BasicBlock;
-    type Type = <CodegenCx<'gcc, 'tcx> as BackendTypes>::Type;
     type Funclet = <CodegenCx<'gcc, 'tcx> as BackendTypes>::Funclet;
 
+    type Value = <CodegenCx<'gcc, 'tcx> as BackendTypes>::Value;
+    type Type = <CodegenCx<'gcc, 'tcx> as BackendTypes>::Type;
+    type FunctionSignature = <CodegenCx<'gcc, 'tcx> as BackendTypes>::FunctionSignature;
+
     type DIScope = <CodegenCx<'gcc, 'tcx> as BackendTypes>::DIScope;
     type DILocation = <CodegenCx<'gcc, 'tcx> as BackendTypes>::DILocation;
     type DIVariable = <CodegenCx<'gcc, 'tcx> as BackendTypes>::DIVariable;
@@ -1655,6 +1656,10 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
         unimplemented!();
     }
 
+    fn get_funclet_cleanuppad(&self, _funclet: &Funclet) -> RValue<'gcc> {
+        unimplemented!();
+    }
+
     // Atomic Operations
     fn atomic_cmpxchg(
         &mut self,
diff --git a/src/common.rs b/src/common.rs
index 86a4eeac89d..79cae9e0282 100644
--- a/src/common.rs
+++ b/src/common.rs
@@ -5,9 +5,10 @@ use rustc_codegen_ssa::traits::{
     BaseTypeCodegenMethods, ConstCodegenMethods, MiscCodegenMethods, StaticCodegenMethods,
 };
 use rustc_middle::mir::Mutability;
-use rustc_middle::mir::interpret::{ConstAllocation, GlobalAlloc, PointerArithmetic, Scalar};
+use rustc_middle::mir::interpret::{GlobalAlloc, PointerArithmetic, Scalar};
 use rustc_middle::ty::layout::LayoutOf;
 
+use crate::consts::const_alloc_to_gcc;
 use crate::context::{CodegenCx, new_array_type};
 use crate::type_of::LayoutGccExt;
 
@@ -260,11 +261,13 @@ impl<'gcc, 'tcx> ConstCodegenMethods for CodegenCx<'gcc, 'tcx> {
                             };
                         }
 
-                        let init = self.const_data_from_alloc(alloc);
-                        let alloc = alloc.inner();
-                        let value = match alloc.mutability {
-                            Mutability::Mut => self.static_addr_of_mut(init, alloc.align, None),
-                            _ => self.static_addr_of(init, alloc.align, None),
+                        let value = match alloc.inner().mutability {
+                            Mutability::Mut => self.static_addr_of_mut(
+                                const_alloc_to_gcc(self, alloc),
+                                alloc.inner().align,
+                                None,
+                            ),
+                            _ => self.static_addr_of(alloc, None),
                         };
                         if !self.sess().fewer_names() {
                             // TODO(antoyo): set value name.
@@ -282,8 +285,7 @@ impl<'gcc, 'tcx> ConstCodegenMethods for CodegenCx<'gcc, 'tcx> {
                                 }),
                             )))
                             .unwrap_memory();
-                        let init = self.const_data_from_alloc(alloc);
-                        self.static_addr_of(init, alloc.inner().align, None)
+                        self.static_addr_of(alloc, None)
                     }
                     GlobalAlloc::TypeId { .. } => {
                         let val = self.const_usize(offset.bytes());
@@ -311,22 +313,6 @@ impl<'gcc, 'tcx> ConstCodegenMethods for CodegenCx<'gcc, 'tcx> {
         }
     }
 
-    fn const_data_from_alloc(&self, alloc: ConstAllocation<'_>) -> Self::Value {
-        // We ignore the alignment for the purpose of deduping RValues
-        // The alignment is not handled / used in any way by `const_alloc_to_gcc`,
-        // so it is OK to overwrite it here.
-        let mut mock_alloc = alloc.inner().clone();
-        mock_alloc.align = rustc_abi::Align::MAX;
-        // Check if the rvalue is already in the cache - if so, just return it directly.
-        if let Some(res) = self.const_cache.borrow().get(&mock_alloc) {
-            return *res;
-        }
-        // Rvalue not in the cache - convert and add it.
-        let res = crate::consts::const_alloc_to_gcc_uncached(self, alloc);
-        self.const_cache.borrow_mut().insert(mock_alloc, res);
-        res
-    }
-
     fn const_ptr_byte_offset(&self, base_addr: Self::Value, offset: abi::Size) -> Self::Value {
         self.context
             .new_array_access(None, base_addr, self.const_usize(offset.bytes()))
diff --git a/src/consts.rs b/src/consts.rs
index 6fb96f8832b..8afa57bc28f 100644
--- a/src/consts.rs
+++ b/src/consts.rs
@@ -22,6 +22,25 @@ use crate::base;
 use crate::context::CodegenCx;
 use crate::type_of::LayoutGccExt;
 
+pub(crate) fn const_alloc_to_gcc<'gcc, 'tcx>(
+    cx: &CodegenCx<'gcc, 'tcx>,
+    alloc: ConstAllocation<'_>,
+) -> RValue<'gcc> {
+    // We ignore the alignment for the purpose of deduping RValues
+    // The alignment is not handled / used in any way by `const_alloc_to_gcc`,
+    // so it is OK to overwrite it here.
+    let mut mock_alloc = alloc.inner().clone();
+    mock_alloc.align = rustc_abi::Align::MAX;
+    // Check if the rvalue is already in the cache - if so, just return it directly.
+    if let Some(res) = cx.const_cache.borrow().get(&mock_alloc) {
+        return *res;
+    }
+    // Rvalue not in the cache - convert and add it.
+    let res = crate::consts::const_alloc_to_gcc_uncached(cx, alloc);
+    cx.const_cache.borrow_mut().insert(mock_alloc, res);
+    res
+}
+
 fn set_global_alignment<'gcc, 'tcx>(
     cx: &CodegenCx<'gcc, 'tcx>,
     gv: LValue<'gcc>,
@@ -37,7 +56,10 @@ fn set_global_alignment<'gcc, 'tcx>(
 }
 
 impl<'gcc, 'tcx> StaticCodegenMethods for CodegenCx<'gcc, 'tcx> {
-    fn static_addr_of(&self, cv: RValue<'gcc>, align: Align, kind: Option<&str>) -> RValue<'gcc> {
+    fn static_addr_of(&self, alloc: ConstAllocation<'_>, kind: Option<&str>) -> RValue<'gcc> {
+        let cv = const_alloc_to_gcc(self, alloc);
+        let align = alloc.inner().align;
+
         if let Some(variable) = self.const_globals.borrow().get(&cv) {
             if let Some(global_variable) = self.global_lvalues.borrow().get(variable) {
                 let alignment = align.bits() as i32;
@@ -361,7 +383,7 @@ fn codegen_static_initializer<'gcc, 'tcx>(
     def_id: DefId,
 ) -> Result<(RValue<'gcc>, ConstAllocation<'tcx>), ErrorHandled> {
     let alloc = cx.tcx.eval_static_initializer(def_id)?;
-    Ok((cx.const_data_from_alloc(alloc), alloc))
+    Ok((const_alloc_to_gcc(cx, alloc), alloc))
 }
 
 fn check_and_apply_linkage<'gcc, 'tcx>(
diff --git a/src/context.rs b/src/context.rs
index 03f207f4572..ada3d73f612 100644
--- a/src/context.rs
+++ b/src/context.rs
@@ -380,14 +380,14 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
 }
 
 impl<'gcc, 'tcx> BackendTypes for CodegenCx<'gcc, 'tcx> {
-    type Value = RValue<'gcc>;
-    type Metadata = RValue<'gcc>;
     type Function = Function<'gcc>;
-
     type BasicBlock = Block<'gcc>;
-    type Type = Type<'gcc>;
     type Funclet = (); // TODO(antoyo)
 
+    type Value = RValue<'gcc>;
+    type Type = Type<'gcc>;
+    type FunctionSignature = Type<'gcc>;
+
     type DIScope = (); // TODO(antoyo)
     type DILocation = Location<'gcc>;
     type DIVariable = (); // TODO(antoyo)
diff --git a/src/intrinsic/mod.rs b/src/intrinsic/mod.rs
index ffcae6a1cdd..bf4fea604c2 100644
--- a/src/intrinsic/mod.rs
+++ b/src/intrinsic/mod.rs
@@ -676,7 +676,7 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tc
         &mut self,
         _vtable: Self::Value,
         _vtable_byte_offset: u64,
-        _typeid: Self::Value,
+        _typeid: &[u8],
     ) -> Self::Value {
         // Unsupported.
         self.context.new_rvalue_from_int(self.int_type, 0)
diff --git a/src/lib.rs b/src/lib.rs
index 1b3d78c42e6..05251c19383 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -19,6 +19,7 @@
 #![warn(unused_lifetimes)]
 #![deny(clippy::pattern_type_mismatch)]
 #![expect(clippy::uninlined_format_args)]
+#![allow(clippy::collapsible_match)]
 
 // The rustc crates we need
 extern crate rustc_abi;
@@ -76,8 +77,7 @@ use std::path::{Path, PathBuf};
 use std::sync::atomic::{AtomicBool, Ordering};
 use std::sync::{Arc, Mutex};
 
-use back::lto::{ThinBuffer, ThinData};
-use gccjit::{CType, Context, OptimizationLevel};
+use gccjit::{CType, Context, OptimizationLevel, OutputKind};
 #[cfg(feature = "master")]
 use gccjit::{TargetInfo, Version};
 use rustc_ast::expand::allocator::AllocatorMethod;
@@ -88,11 +88,11 @@ use rustc_codegen_ssa::back::write::{
 use rustc_codegen_ssa::base::codegen_crate;
 use rustc_codegen_ssa::target_features::cfg_target_feature;
 use rustc_codegen_ssa::traits::{CodegenBackend, ExtraBackendMethods, WriteBackendMethods};
-use rustc_codegen_ssa::{CodegenResults, CompiledModule, ModuleCodegen, TargetConfig};
+use rustc_codegen_ssa::{CompiledModule, CompiledModules, CrateInfo, ModuleCodegen, TargetConfig};
 use rustc_data_structures::fx::FxIndexMap;
 use rustc_data_structures::profiling::SelfProfilerRef;
 use rustc_data_structures::sync::IntoDynSyncSend;
-use rustc_errors::DiagCtxtHandle;
+use rustc_errors::{DiagCtxt, DiagCtxtHandle};
 use rustc_middle::dep_graph::{WorkProduct, WorkProductId};
 use rustc_middle::ty::TyCtxt;
 use rustc_middle::util::Providers;
@@ -102,7 +102,7 @@ use rustc_span::Symbol;
 use rustc_target::spec::{Arch, RelocModel};
 use tempfile::TempDir;
 
-use crate::back::lto::ModuleBuffer;
+use crate::back::lto::{ModuleBuffer, ThinData};
 use crate::gcc_util::{target_cpu, to_gcc_features};
 
 pub struct PrintOnPanic<F: Fn() -> String>(pub F);
@@ -286,11 +286,12 @@ impl CodegenBackend for GccCodegenBackend {
             |tcx, ()| gcc_util::global_gcc_features(tcx.sess)
     }
 
-    fn codegen_crate(&self, tcx: TyCtxt<'_>) -> Box<dyn Any> {
-        let target_cpu = target_cpu(tcx.sess);
-        let res = codegen_crate(self.clone(), tcx, target_cpu.to_string());
+    fn target_cpu(&self, sess: &Session) -> String {
+        target_cpu(sess).to_owned()
+    }
 
-        Box::new(res)
+    fn codegen_crate(&self, tcx: TyCtxt<'_>, crate_info: &CrateInfo) -> Box<dyn Any> {
+        Box::new(codegen_crate(self.clone(), tcx, crate_info))
     }
 
     fn join_codegen(
@@ -298,7 +299,7 @@ impl CodegenBackend for GccCodegenBackend {
         ongoing_codegen: Box<dyn Any>,
         sess: &Session,
         _outputs: &OutputFilenames,
-    ) -> (CodegenResults, FxIndexMap<WorkProductId, WorkProduct>) {
+    ) -> (CompiledModules, FxIndexMap<WorkProductId, WorkProduct>) {
         ongoing_codegen
             .downcast::<rustc_codegen_ssa::back::write::OngoingCodegen<GccCodegenBackend>>()
             .expect("Expected GccCodegenBackend's OngoingCodegen, found Box<Any>")
@@ -369,16 +370,6 @@ impl ExtraBackendMethods for GccCodegenBackend {
             self.lto_supported.load(Ordering::SeqCst),
         )
     }
-
-    fn target_machine_factory(
-        &self,
-        _sess: &Session,
-        _opt_level: OptLevel,
-        _features: &[String],
-    ) -> TargetMachineFactoryFn<Self> {
-        // TODO(antoyo): set opt level.
-        Arc::new(|_, _| ())
-    }
 }
 
 #[derive(Clone, Copy, PartialEq)]
@@ -426,9 +417,18 @@ impl WriteBackendMethods for GccCodegenBackend {
     type TargetMachine = ();
     type ModuleBuffer = ModuleBuffer;
     type ThinData = ThinData;
-    type ThinBuffer = ThinBuffer;
 
-    fn run_and_optimize_fat_lto(
+    fn target_machine_factory(
+        &self,
+        _sess: &Session,
+        _opt_level: OptLevel,
+        _features: &[String],
+    ) -> TargetMachineFactoryFn<Self> {
+        // TODO(antoyo): set opt level.
+        Arc::new(|_, _| ())
+    }
+
+    fn optimize_and_codegen_fat_lto(
         cgcx: &CodegenContext,
         prof: &SelfProfilerRef,
         shared_emitter: &SharedEmitter,
@@ -437,7 +437,7 @@ impl WriteBackendMethods for GccCodegenBackend {
         _exported_symbols_for_lto: &[String],
         each_linked_rlib_for_lto: &[PathBuf],
         modules: Vec<FatLtoInput<Self>>,
-    ) -> ModuleCodegen<Self::Module> {
+    ) -> CompiledModule {
         back::lto::run_fat(cgcx, prof, shared_emitter, each_linked_rlib_for_lto, modules)
     }
 
@@ -448,20 +448,12 @@ impl WriteBackendMethods for GccCodegenBackend {
         // FIXME(bjorn3): Limit LTO exports to these symbols
         _exported_symbols_for_lto: &[String],
         each_linked_rlib_for_lto: &[PathBuf],
-        modules: Vec<(String, Self::ThinBuffer)>,
+        modules: Vec<(String, Self::ModuleBuffer)>,
         cached_modules: Vec<(SerializedModule<Self::ModuleBuffer>, WorkProduct)>,
     ) -> (Vec<ThinModule<Self>>, Vec<WorkProduct>) {
         back::lto::run_thin(cgcx, prof, dcx, each_linked_rlib_for_lto, modules, cached_modules)
     }
 
-    fn print_pass_timings(&self) {
-        unimplemented!();
-    }
-
-    fn print_statistics(&self) {
-        unimplemented!()
-    }
-
     fn optimize(
         _cgcx: &CodegenContext,
         _prof: &SelfProfilerRef,
@@ -472,14 +464,14 @@ impl WriteBackendMethods for GccCodegenBackend {
         module.module_llvm.context.set_optimization_level(to_gcc_opt_level(config.opt_level));
     }
 
-    fn optimize_thin(
+    fn optimize_and_codegen_thin(
         cgcx: &CodegenContext,
-        _prof: &SelfProfilerRef,
-        _shared_emitter: &SharedEmitter,
+        prof: &SelfProfilerRef,
+        shared_emitter: &SharedEmitter,
         _tm_factory: TargetMachineFactoryFn<Self>,
         thin: ThinModule<Self>,
-    ) -> ModuleCodegen<Self::Module> {
-        back::lto::optimize_thin_module(thin, cgcx)
+    ) -> CompiledModule {
+        back::lto::optimize_and_codegen_thin(cgcx, prof, shared_emitter, thin)
     }
 
     fn codegen(
@@ -489,15 +481,25 @@ impl WriteBackendMethods for GccCodegenBackend {
         module: ModuleCodegen<Self::Module>,
         config: &ModuleConfig,
     ) -> CompiledModule {
-        back::write::codegen(cgcx, prof, shared_emitter, module, config)
+        let dcx = DiagCtxt::new(Box::new(shared_emitter.clone()));
+        let dcx = dcx.handle();
+        back::write::codegen(cgcx, prof, dcx, module, config)
     }
 
-    fn prepare_thin(module: ModuleCodegen<Self::Module>) -> (String, Self::ThinBuffer) {
-        back::lto::prepare_thin(module)
-    }
+    fn serialize_module(module: Self::Module, _is_thin: bool) -> Self::ModuleBuffer {
+        let context = &module.context;
+
+        let temp_dir = TempDir::new().unwrap();
+        let bc_out = temp_dir.path().join("fakethinlto.o");
+        std::mem::forget(temp_dir);
+
+        /*if module.lto_supported {
+            context.add_command_line_option("-flto=auto");
+            context.add_command_line_option("-flto-partition=one");
+        }*/
+        context.compile_to_file(OutputKind::ObjectFile, bc_out.to_str().expect("path to str"));
 
-    fn serialize_module(_module: ModuleCodegen<Self::Module>) -> (String, Self::ModuleBuffer) {
-        unimplemented!();
+        ModuleBuffer::new(bc_out)
     }
 }
 
diff --git a/src/type_of.rs b/src/type_of.rs
index 68fca5a17ad..48d1b016390 100644
--- a/src/type_of.rs
+++ b/src/type_of.rs
@@ -288,7 +288,9 @@ impl<'tcx> LayoutGccExt<'tcx> for TyAndLayout<'tcx> {
             Float(f) => cx.type_from_float(f),
             Pointer(address_space) => {
                 // If we know the alignment, pick something better than i8.
-                let pointee = if let Some(pointee) = self.pointee_info_at(cx, offset) {
+                let pointee = if let Some(pointee) = self.pointee_info_at(cx, offset)
+                    && pointee.align > rustc_abi::Align::ONE
+                {
                     cx.type_pointee_for_align(pointee.align)
                 } else {
                     cx.type_i8()
diff --git a/tests/run/asm.rs b/tests/run/asm.rs
index 9b15a28d829..4c9c49671ba 100644
--- a/tests/run/asm.rs
+++ b/tests/run/asm.rs
@@ -213,7 +213,7 @@ fn asm() {
         core::arch::asm!(
             "",
             out("al") _,
-            out("bl") _,
+            //out("bl") _, reserved by LLVM, reservation enforced by rustc
             out("cl") _,
             out("dl") _,
             out("sil") _,
diff --git a/tests/run/call-llvm-intrinsics.rs b/tests/run/call-llvm-intrinsics.rs
new file mode 100644
index 00000000000..86e041c3a2f
--- /dev/null
+++ b/tests/run/call-llvm-intrinsics.rs
@@ -0,0 +1,38 @@
+// Compiler:
+//
+// Run-time:
+//   status: 0
+
+// FIXME: Remove this test once rustc's `./tests/codegen/riscv-abi/call-llvm-intrinsics.rs`
+// stops ignoring GCC backend.
+
+#![feature(link_llvm_intrinsics)]
+#![allow(internal_features)]
+
+struct A;
+
+impl Drop for A {
+    fn drop(&mut self) {
+        println!("A");
+    }
+}
+
+extern "C" {
+    #[link_name = "llvm.sqrt.f32"]
+    fn sqrt(x: f32) -> f32;
+}
+
+pub fn do_call() {
+    let _a = A;
+
+    unsafe {
+        // Ensure that we `call` LLVM intrinsics instead of trying to `invoke` them
+        // CHECK: store float 4.000000e+00, float* %{{.}}, align 4
+        // CHECK: call float @llvm.sqrt.f32(float %{{.}}
+        sqrt(4.0);
+    }
+}
+
+fn main() {
+    do_call();
+}
diff --git a/tests/run/simd-ffi.rs b/tests/run/simd-ffi.rs
new file mode 100644
index 00000000000..67cc2e5b96e
--- /dev/null
+++ b/tests/run/simd-ffi.rs
@@ -0,0 +1,102 @@
+// Compiler:
+//
+// Run-time:
+//   status: 0
+
+// FIXME: Remove this test once <tests/run-make/simd-ffi/simd.rs> stops
+// ignoring GCC backend.
+
+#![allow(internal_features, non_camel_case_types)]
+// we can compile to a variety of platforms, because we don't need
+// cross-compiled standard libraries.
+#![feature(no_core, auto_traits)]
+#![no_core]
+#![feature(repr_simd, simd_ffi, link_llvm_intrinsics, lang_items, rustc_attrs)]
+
+#[derive(Copy)]
+#[repr(simd)]
+pub struct f32x4([f32; 4]);
+
+extern "C" {
+    #[link_name = "llvm.sqrt.v4f32"]
+    fn vsqrt(x: f32x4) -> f32x4;
+}
+
+pub fn foo(x: f32x4) -> f32x4 {
+    unsafe { vsqrt(x) }
+}
+
+#[derive(Copy)]
+#[repr(simd)]
+pub struct i32x4([i32; 4]);
+
+extern "C" {
+    // _mm_sll_epi32
+    #[cfg(all(any(target_arch = "x86", target_arch = "x86-64"), target_feature = "sse2"))]
+    #[link_name = "llvm.x86.sse2.psll.d"]
+    fn integer(a: i32x4, b: i32x4) -> i32x4;
+
+    // vmaxq_s32
+    #[cfg(target_arch = "arm")]
+    #[link_name = "llvm.arm.neon.vmaxs.v4i32"]
+    fn integer(a: i32x4, b: i32x4) -> i32x4;
+    // vmaxq_s32
+    #[cfg(target_arch = "aarch64")]
+    #[link_name = "llvm.aarch64.neon.maxs.v4i32"]
+    fn integer(a: i32x4, b: i32x4) -> i32x4;
+
+    // Use a generic LLVM intrinsic to do type checking on other platforms
+    #[cfg(not(any(
+        all(any(target_arch = "x86", target_arch = "x86-64"), target_feature = "sse2"),
+        target_arch = "arm",
+        target_arch = "aarch64"
+    )))]
+    #[link_name = "llvm.smax.v4i32"]
+    fn integer(a: i32x4, b: i32x4) -> i32x4;
+}
+
+pub fn bar(a: i32x4, b: i32x4) -> i32x4 {
+    unsafe { integer(a, b) }
+}
+
+#[lang = "pointee_sized"]
+pub trait PointeeSized {}
+
+#[lang = "meta_sized"]
+pub trait MetaSized: PointeeSized {}
+
+#[lang = "sized"]
+pub trait Sized: MetaSized {}
+
+#[lang = "copy"]
+pub trait Copy {}
+
+impl Copy for f32 {}
+impl Copy for i32 {}
+impl Copy for [f32; 4] {}
+impl Copy for [i32; 4] {}
+
+pub mod marker {
+    pub use Copy;
+}
+
+#[lang = "freeze"]
+auto trait Freeze {}
+
+#[macro_export]
+#[rustc_builtin_macro]
+macro_rules! Copy {
+    () => {};
+}
+#[macro_export]
+#[rustc_builtin_macro]
+macro_rules! derive {
+    () => {};
+}
+
+#[lang = "start"]
+fn start<T>(_main: fn() -> T, _argc: isize, _argv: *const *const u8, _sigpipe: u8) -> isize {
+    0
+}
+
+fn main() {}