From 24afdc93bc6f217dd2d52a555c80d94177900502 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Tue, 16 Dec 2025 14:03:07 +0000 Subject: [PATCH 01/15] CP-310853: claim the entire footprint of the VM for now MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Edwin Török --- ocaml/xenopsd/xc/domain.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocaml/xenopsd/xc/domain.ml b/ocaml/xenopsd/xc/domain.ml index 6c65d467f3..67f855189e 100644 --- a/ocaml/xenopsd/xc/domain.ml +++ b/ocaml/xenopsd/xc/domain.ml @@ -1110,7 +1110,7 @@ let build_pre ~xc ~xs ~vcpus ~memory ~hard_affinity domid = Xenops_server.cores_of_numa_affinity_policy pin ~vcpus in numa_placement domid ~vcpus ~cores - ~memory:(Int64.mul memory.xen_max_mib 1048576L) + ~memory:(Int64.mul memory.required_host_free_mib 1048576L) affinity |> Option.map fst ) From 060d7925017716cdbba73a0c3efe4da2ff24242b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Wed, 17 Dec 2025 14:48:19 +0000 Subject: [PATCH 02/15] CA-422188: either always use claims or never use claims MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Do not mix using claims with not using claims. Xen cannot currently guarantee that it'll honour a VM's memory claim, unless all other VMs also use claims. Global claims have existed since a long time in Xen, so this should be safe to do on both XS8 and XS9. Signed-off-by: Edwin Török --- ocaml/xenopsd/xc/domain.ml | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/ocaml/xenopsd/xc/domain.ml b/ocaml/xenopsd/xc/domain.ml index 67f855189e..e07e865e25 100644 --- a/ocaml/xenopsd/xc/domain.ml +++ b/ocaml/xenopsd/xc/domain.ml @@ -1002,6 +1002,8 @@ let numa_placement domid ~vcpus ~cores ~memory affinity = in let nr_pages = Int64.div memory 4096L |> Int64.to_int in try + D.debug "NUMAClaim domid %d: local claim on node %d: %d pages" domid + node nr_pages ; Xenctrlext.domain_claim_pages xcext domid ~numa_node nr_pages ; set_vcpu_affinity cpu_affinity ; Some (node, memory) @@ -1009,6 +1011,7 @@ let numa_placement domid ~vcpus ~cores ~memory affinity = | Xenctrlext.Not_available -> (* Xen does not provide the interface to claim pages from a single NUMA node, ignore the error and continue. *) + D.debug "NUMAClaim domid %d: local claim not available" domid ; None | Xenctrlext.Unix_error (errno, _) -> D.info @@ -1109,10 +1112,29 @@ let build_pre ~xc ~xs ~vcpus ~memory ~hard_affinity domid = and cores = Xenops_server.cores_of_numa_affinity_policy pin ~vcpus in - numa_placement domid ~vcpus ~cores - ~memory:(Int64.mul memory.required_host_free_mib 1048576L) - affinity - |> Option.map fst + let memory = Int64.mul memory.required_host_free_mib 1048576L in + match numa_placement domid ~vcpus ~cores ~memory affinity with + | None -> + (* Always perform a global claim when NUMA placement is + enabled, and single node claims failed or were + unavailable: + This tries to ensures that memory allocated for this + domain won't use up memory claimed by other domains. + If claims are mixed with non-claims then Xen can't + currently guarantee that it would honour the existing + claims. + A failure here is a hard failure: we'd fail allocating + memory later anyway + *) + let nr_pages = Int64.div memory 4096L |> Int64.to_int in + let xcext = Xenctrlext.get_handle () in + D.debug "NUMAClaim domid %d: global claim: %d pages" domid + nr_pages ; + Xenctrlext.domain_claim_pages xcext domid + ~numa_node:Xenctrlext.NumaNode.none nr_pages ; + None + | Some (plan, _) -> + Some plan ) in let store_chan, console_chan = create_channels ~xc uuid domid in From 4437591c45d3aede7cce4440935162941f45672f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Wed, 17 Dec 2025 14:53:56 +0000 Subject: [PATCH 03/15] CA-422187: fix NUMA on XS8 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On XS8 we always raise an exception when attempting to claim from a single node. We wanted to only use soft affinity when the single node claim succeeded (which is the correct fix on XS9, where this API is available). However this meant that we've effectively completely disabled NUMA support on XS8, without any way to turn it on. Always use soft affinity when the single-node claim API is unavailable, this should keep NUMA working on XS8. On XS9 Xen itself would never raise ENOSYS (it has a `err = errno = 0` on ENOSYS). Fixes: fb66dfc03 ("CA-421847: set vcpu affinity if node claim succeeded") Signed-off-by: Edwin Török --- ocaml/xenopsd/xc/domain.ml | 1 + 1 file changed, 1 insertion(+) diff --git a/ocaml/xenopsd/xc/domain.ml b/ocaml/xenopsd/xc/domain.ml index e07e865e25..b80ed923cd 100644 --- a/ocaml/xenopsd/xc/domain.ml +++ b/ocaml/xenopsd/xc/domain.ml @@ -1012,6 +1012,7 @@ let numa_placement domid ~vcpus ~cores ~memory affinity = (* Xen does not provide the interface to claim pages from a single NUMA node, ignore the error and continue. *) D.debug "NUMAClaim domid %d: local claim not available" domid ; + set_vcpu_affinity cpu_affinity ; None | Xenctrlext.Unix_error (errno, _) -> D.info From cb363f009a960c908a03a0a7ae6289daee499c18 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Thu, 18 Dec 2025 13:53:12 +0000 Subject: [PATCH 04/15] CA-422187: make power of 2 more explicit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Edwin Török --- ocaml/xenopsd/xc/domain.ml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ocaml/xenopsd/xc/domain.ml b/ocaml/xenopsd/xc/domain.ml index b80ed923cd..36afd289f3 100644 --- a/ocaml/xenopsd/xc/domain.ml +++ b/ocaml/xenopsd/xc/domain.ml @@ -1113,7 +1113,9 @@ let build_pre ~xc ~xs ~vcpus ~memory ~hard_affinity domid = and cores = Xenops_server.cores_of_numa_affinity_policy pin ~vcpus in - let memory = Int64.mul memory.required_host_free_mib 1048576L in + let memory = + Int64.(mul memory.required_host_free_mib (shift_left 1L 20)) + in match numa_placement domid ~vcpus ~cores ~memory affinity with | None -> (* Always perform a global claim when NUMA placement is From 112db1f4dcfb57ff040a54e1470eacb085217992 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Thu, 18 Dec 2025 13:53:53 +0000 Subject: [PATCH 05/15] CA-422187: only ENOMEM is retrieable when a single-node NUMA claim fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Edwin Török --- ocaml/xenopsd/xc/domain.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocaml/xenopsd/xc/domain.ml b/ocaml/xenopsd/xc/domain.ml index 36afd289f3..0a643997f1 100644 --- a/ocaml/xenopsd/xc/domain.ml +++ b/ocaml/xenopsd/xc/domain.ml @@ -1014,7 +1014,7 @@ let numa_placement domid ~vcpus ~cores ~memory affinity = D.debug "NUMAClaim domid %d: local claim not available" domid ; set_vcpu_affinity cpu_affinity ; None - | Xenctrlext.Unix_error (errno, _) -> + | Xenctrlext.Unix_error ((Unix.ENOMEM as errno), _) -> D.info "%s: unable to claim enough memory, domain %d won't be hosted in a \ single NUMA node. (error %s)" From 95367e1aa6b965f43c5bd30a3de7dd1fac4fce16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Thu, 18 Dec 2025 16:06:54 +0000 Subject: [PATCH 06/15] CA-422187: safer defaults for global claims MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Xen may have already allocated some memory for the domain, and the overhead is only an estimate. A global claim failing is a hard failure, so instead use a more conservative estimate: `memory.build_start_mib`. This is similar to `required_host_free_mib`, but doesn't take overhead into account. Eventually we'd want to have another argument to the create hypercall that tells it what NUMA node(s) to use, and then we can include all the overhead too there. For the single node claim keep the amount as it was, it is only a best effort claim. Fixes: 060d79250 ("CA-422188: either always use claims or never use claims") Signed-off-by: Edwin Török --- ocaml/xenopsd/xc/domain.ml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ocaml/xenopsd/xc/domain.ml b/ocaml/xenopsd/xc/domain.ml index 0a643997f1..1ea2bdeb9c 100644 --- a/ocaml/xenopsd/xc/domain.ml +++ b/ocaml/xenopsd/xc/domain.ml @@ -1115,6 +1115,8 @@ let build_pre ~xc ~xs ~vcpus ~memory ~hard_affinity domid = in let memory = Int64.(mul memory.required_host_free_mib (shift_left 1L 20)) + and memory_hard = + Int64.(mul memory.build_start_mib (shift_left 1L 20)) in match numa_placement domid ~vcpus ~cores ~memory affinity with | None -> @@ -1129,7 +1131,7 @@ let build_pre ~xc ~xs ~vcpus ~memory ~hard_affinity domid = A failure here is a hard failure: we'd fail allocating memory later anyway *) - let nr_pages = Int64.div memory 4096L |> Int64.to_int in + let nr_pages = Int64.div memory_hard 4096L |> Int64.to_int in let xcext = Xenctrlext.get_handle () in D.debug "NUMAClaim domid %d: global claim: %d pages" domid nr_pages ; From 02c6ed1a7e4088b49225a0d730d6077eda8202f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Thu, 18 Dec 2025 17:33:23 +0000 Subject: [PATCH 07/15] CA-422187: do not claim shadow_mib, it has already been allocated MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When rebooting lots of VMs in parallel we might run out of memory and fail to boot all the VMs again. This is because we overestimate the amount of memory required, and claim too much. That memory is released when the domain build finishes, but when building domains in parallel it'll temporarily result in an out of memory error. Instead try to claim only what is left to be allocated: the p2m map and shadow map have already been allocated by this point. Fixes: 95367e1aa ("CA-422187: safer defaults for global claims") Signed-off-by: Edwin Török --- ocaml/xapi-idl/memory/memory.ml | 3 +++ ocaml/xenopsd/xc/domain.ml | 4 +--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/ocaml/xapi-idl/memory/memory.ml b/ocaml/xapi-idl/memory/memory.ml index 99951f7e3e..4d0b0e0043 100644 --- a/ocaml/xapi-idl/memory/memory.ml +++ b/ocaml/xapi-idl/memory/memory.ml @@ -192,6 +192,9 @@ module Memory_model (D : MEMORY_MODEL_DATA) = struct static_max_mib --- Int64.of_int video_mib +++ D.shim_mib static_max_mib let build_start_mib static_max_mib target_mib video_mib = + D.extra_internal_mib + +++ D.extra_external_mib + +++ if D.can_start_ballooned_down then target_mib --- Int64.of_int video_mib +++ D.shim_mib target_mib else diff --git a/ocaml/xenopsd/xc/domain.ml b/ocaml/xenopsd/xc/domain.ml index 1ea2bdeb9c..56137d3669 100644 --- a/ocaml/xenopsd/xc/domain.ml +++ b/ocaml/xenopsd/xc/domain.ml @@ -1114,8 +1114,6 @@ let build_pre ~xc ~xs ~vcpus ~memory ~hard_affinity domid = Xenops_server.cores_of_numa_affinity_policy pin ~vcpus in let memory = - Int64.(mul memory.required_host_free_mib (shift_left 1L 20)) - and memory_hard = Int64.(mul memory.build_start_mib (shift_left 1L 20)) in match numa_placement domid ~vcpus ~cores ~memory affinity with @@ -1131,7 +1129,7 @@ let build_pre ~xc ~xs ~vcpus ~memory ~hard_affinity domid = A failure here is a hard failure: we'd fail allocating memory later anyway *) - let nr_pages = Int64.div memory_hard 4096L |> Int64.to_int in + let nr_pages = Int64.div memory 4096L |> Int64.to_int in let xcext = Xenctrlext.get_handle () in D.debug "NUMAClaim domid %d: global claim: %d pages" domid nr_pages ; From 95491040d998ebcaf69563e25760922a24d038f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Fri, 19 Dec 2025 09:33:27 +0000 Subject: [PATCH 08/15] CA-422187: claim just the bare minimum MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a domain build finishes Xen releases any extra unused memory from the claim. In my tests that is ~544 pages, which is about the amount that got added here, so we're double counting something. Remove the hack, so we allocate just the bare minimum. Fixes: 02c6ed1a7 ("CA-422187: do not claim shadow_mib, it has already been allocated") Signed-off-by: Edwin Török --- ocaml/xapi-idl/memory/memory.ml | 3 --- 1 file changed, 3 deletions(-) diff --git a/ocaml/xapi-idl/memory/memory.ml b/ocaml/xapi-idl/memory/memory.ml index 4d0b0e0043..99951f7e3e 100644 --- a/ocaml/xapi-idl/memory/memory.ml +++ b/ocaml/xapi-idl/memory/memory.ml @@ -192,9 +192,6 @@ module Memory_model (D : MEMORY_MODEL_DATA) = struct static_max_mib --- Int64.of_int video_mib +++ D.shim_mib static_max_mib let build_start_mib static_max_mib target_mib video_mib = - D.extra_internal_mib - +++ D.extra_external_mib - +++ if D.can_start_ballooned_down then target_mib --- Int64.of_int video_mib +++ D.shim_mib target_mib else From a4bc2bbe16374808cf1d1255bf07adfca14f2554 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Fri, 19 Dec 2025 11:34:26 +0000 Subject: [PATCH 09/15] CA-422187: more accurate claims and debug messages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We noticed that xenguest releases 32 unused pages from the domain's claim. These are from the low 1MiB video range, so avoid requesting it. Also always print memory free statistics when `wait_xen_free_mem` is called. Turns out `scrub_pages` is always 0, since this never got implemented in Xen (it is hardcoded to 0). Signed-off-by: Edwin Török --- ocaml/xenopsd/xc/domain.ml | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/ocaml/xenopsd/xc/domain.ml b/ocaml/xenopsd/xc/domain.ml index 56137d3669..73ec591178 100644 --- a/ocaml/xenopsd/xc/domain.ml +++ b/ocaml/xenopsd/xc/domain.ml @@ -244,7 +244,10 @@ let wait_xen_free_mem ~xc ?(maximum_wait_time_seconds = 64) required_memory_kib in (* At exponentially increasing intervals, write *) (* a debug message saying how long we've waited: *) - if is_power_of_2 accumulated_wait_time_seconds then + if + accumulated_wait_time_seconds = 0 + || is_power_of_2 accumulated_wait_time_seconds + then debug "Waited %i second(s) for memory to become available: %Ld KiB free, %Ld \ KiB scrub, %Ld KiB required" @@ -1000,7 +1003,7 @@ let numa_placement domid ~vcpus ~cores ~memory affinity = __FUNCTION__ domid ; None in - let nr_pages = Int64.div memory 4096L |> Int64.to_int in + let nr_pages = (Int64.div memory 4096L |> Int64.to_int) - 32 in try D.debug "NUMAClaim domid %d: local claim on node %d: %d pages" domid node nr_pages ; @@ -1129,7 +1132,9 @@ let build_pre ~xc ~xs ~vcpus ~memory ~hard_affinity domid = A failure here is a hard failure: we'd fail allocating memory later anyway *) - let nr_pages = Int64.div memory 4096L |> Int64.to_int in + let nr_pages = + (Int64.div memory 4096L |> Int64.to_int) - 32 + in let xcext = Xenctrlext.get_handle () in D.debug "NUMAClaim domid %d: global claim: %d pages" domid nr_pages ; From b51326917952e2c62474a7dd64492547afc2b372 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Wed, 7 Jan 2026 15:31:39 +0000 Subject: [PATCH 10/15] more debug info --- ocaml/xenopsd/xc/domain.ml | 5 +++++ ocaml/xenopsd/xc/xenops_server_xen.ml | 11 +++++++++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/ocaml/xenopsd/xc/domain.ml b/ocaml/xenopsd/xc/domain.ml index 73ec591178..0a44af5567 100644 --- a/ocaml/xenopsd/xc/domain.ml +++ b/ocaml/xenopsd/xc/domain.ml @@ -1029,6 +1029,11 @@ let numa_placement domid ~vcpus ~cores ~memory affinity = let build_pre ~xc ~xs ~vcpus ~memory ~hard_affinity domid = let open Memory in let uuid = get_uuid ~xc domid in + debug "VM = %s, build_max_mib = %Ld, build_start_mib = %Ld, xen_max_mib = + %Ld, shadow_mib = %Ld, required_host_free_mib = %Ld, overhead_mib = %Ld" + (Uuidx.to_string uuid) + memory.build_max_mib memory.build_start_mib memory.xen_max_mib + memory.shadow_mib memory.required_host_free_mib memory.overhead_mib; debug "VM = %s; domid = %d; waiting for %Ld MiB of free host memory" (Uuidx.to_string uuid) domid memory.required_host_free_mib ; (* CA-39743: Wait, if necessary, for the Xen scrubber to catch up. *) diff --git a/ocaml/xenopsd/xc/xenops_server_xen.ml b/ocaml/xenopsd/xc/xenops_server_xen.ml index 8b4d0a4b40..81f9003133 100644 --- a/ocaml/xenopsd/xc/xenops_server_xen.ml +++ b/ocaml/xenopsd/xc/xenops_server_xen.ml @@ -1830,6 +1830,7 @@ module VM = struct let dbg = Xenops_task.get_dbg task in Mem.with_reservation dbg min_kib max_kib (fun target_plus_overhead_kib reservation_id -> + debug "VM = %s, memory [%Ld KiB, %Ld KiB], target_plus_overhead=%Ld KiB" vm.Vm.id min_kib max_kib target_plus_overhead_kib; let domain_config, persistent = match persistent.VmExtra.domain_config with | Some dc -> @@ -1870,6 +1871,7 @@ module VM = struct let target_bytes = target_plus_overhead_bytes --- overhead_bytes in + debug "VM = %s, memory target_bytes = %Ld, dynamic max = %Ld" vm.Vm.id target_bytes vm.memory_dynamic_max; min vm.memory_dynamic_max target_bytes in set_initial_target ~xs domid (Int64.div initial_target 1024L) ; @@ -2967,6 +2969,7 @@ module VM = struct | _ -> "" in + debug "VM = %s, initial_target = %Ld" vm.Vm.id initial_target; ({x with Domain.memory_target= initial_target}, timeoffset) in let vtpm = vtpm_of ~vm in @@ -3106,7 +3109,9 @@ module VM = struct let memory_actual = let pages = Int64.of_nativeint di.Xenctrl.total_memory_pages in let kib = Xenctrl.pages_to_kib pages in - Memory.bytes_of_kib kib + let bytes = Memory.bytes_of_kib kib in + D.debug "VM %s memory actual: %Ld pages = %Ld KiB = %Ld bytes" (Uuidm.to_string uuid) pages kib bytes; + bytes in let memory_limit = (* The maximum amount of memory the domain can consume is the max @@ -3129,7 +3134,9 @@ module VM = struct in (* CA-31764: may be larger than static_max if maxmem has been increased to initial-reservation. *) - max memory_actual max_memory_bytes + let result = max memory_actual max_memory_bytes in + D.debug "VM %s memory limit = %Ld bytes" (Uuidm.to_string uuid) result; + result in let rtc = try From ae57d8827b17ecb23c32c9a76412350c4cf9aae4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Wed, 7 Jan 2026 16:05:17 +0000 Subject: [PATCH 11/15] Drop -32 hack: not useful for migration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Edwin Török --- ocaml/xenopsd/xc/domain.ml | 16 +++++++++------- ocaml/xenopsd/xc/xenops_server_xen.ml | 17 ++++++++++++----- 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/ocaml/xenopsd/xc/domain.ml b/ocaml/xenopsd/xc/domain.ml index 0a44af5567..d1892cc92e 100644 --- a/ocaml/xenopsd/xc/domain.ml +++ b/ocaml/xenopsd/xc/domain.ml @@ -1003,7 +1003,7 @@ let numa_placement domid ~vcpus ~cores ~memory affinity = __FUNCTION__ domid ; None in - let nr_pages = (Int64.div memory 4096L |> Int64.to_int) - 32 in + let nr_pages = Memory.pages_of_bytes_used memory |> Int64.to_int in try D.debug "NUMAClaim domid %d: local claim on node %d: %d pages" domid node nr_pages ; @@ -1029,11 +1029,13 @@ let numa_placement domid ~vcpus ~cores ~memory affinity = let build_pre ~xc ~xs ~vcpus ~memory ~hard_affinity domid = let open Memory in let uuid = get_uuid ~xc domid in - debug "VM = %s, build_max_mib = %Ld, build_start_mib = %Ld, xen_max_mib = - %Ld, shadow_mib = %Ld, required_host_free_mib = %Ld, overhead_mib = %Ld" - (Uuidx.to_string uuid) - memory.build_max_mib memory.build_start_mib memory.xen_max_mib - memory.shadow_mib memory.required_host_free_mib memory.overhead_mib; + debug + "VM = %s, build_max_mib = %Ld, build_start_mib = %Ld, xen_max_mib =\n\ + \ %Ld, shadow_mib = %Ld, required_host_free_mib = %Ld, overhead_mib = \ + %Ld" + (Uuidx.to_string uuid) memory.build_max_mib memory.build_start_mib + memory.xen_max_mib memory.shadow_mib memory.required_host_free_mib + memory.overhead_mib ; debug "VM = %s; domid = %d; waiting for %Ld MiB of free host memory" (Uuidx.to_string uuid) domid memory.required_host_free_mib ; (* CA-39743: Wait, if necessary, for the Xen scrubber to catch up. *) @@ -1138,7 +1140,7 @@ let build_pre ~xc ~xs ~vcpus ~memory ~hard_affinity domid = memory later anyway *) let nr_pages = - (Int64.div memory 4096L |> Int64.to_int) - 32 + Memory.pages_of_bytes_used memory |> Int64.to_int in let xcext = Xenctrlext.get_handle () in D.debug "NUMAClaim domid %d: global claim: %d pages" domid diff --git a/ocaml/xenopsd/xc/xenops_server_xen.ml b/ocaml/xenopsd/xc/xenops_server_xen.ml index 81f9003133..f69ec75276 100644 --- a/ocaml/xenopsd/xc/xenops_server_xen.ml +++ b/ocaml/xenopsd/xc/xenops_server_xen.ml @@ -1830,7 +1830,10 @@ module VM = struct let dbg = Xenops_task.get_dbg task in Mem.with_reservation dbg min_kib max_kib (fun target_plus_overhead_kib reservation_id -> - debug "VM = %s, memory [%Ld KiB, %Ld KiB], target_plus_overhead=%Ld KiB" vm.Vm.id min_kib max_kib target_plus_overhead_kib; + debug + "VM = %s, memory [%Ld KiB, %Ld KiB], \ + target_plus_overhead=%Ld KiB" + vm.Vm.id min_kib max_kib target_plus_overhead_kib ; let domain_config, persistent = match persistent.VmExtra.domain_config with | Some dc -> @@ -1871,7 +1874,9 @@ module VM = struct let target_bytes = target_plus_overhead_bytes --- overhead_bytes in - debug "VM = %s, memory target_bytes = %Ld, dynamic max = %Ld" vm.Vm.id target_bytes vm.memory_dynamic_max; + debug + "VM = %s, memory target_bytes = %Ld, dynamic max = %Ld" + vm.Vm.id target_bytes vm.memory_dynamic_max ; min vm.memory_dynamic_max target_bytes in set_initial_target ~xs domid (Int64.div initial_target 1024L) ; @@ -2969,7 +2974,7 @@ module VM = struct | _ -> "" in - debug "VM = %s, initial_target = %Ld" vm.Vm.id initial_target; + debug "VM = %s, initial_target = %Ld" vm.Vm.id initial_target ; ({x with Domain.memory_target= initial_target}, timeoffset) in let vtpm = vtpm_of ~vm in @@ -3110,7 +3115,8 @@ module VM = struct let pages = Int64.of_nativeint di.Xenctrl.total_memory_pages in let kib = Xenctrl.pages_to_kib pages in let bytes = Memory.bytes_of_kib kib in - D.debug "VM %s memory actual: %Ld pages = %Ld KiB = %Ld bytes" (Uuidm.to_string uuid) pages kib bytes; + D.debug "VM %s memory actual: %Ld pages = %Ld KiB = %Ld bytes" + (Uuidm.to_string uuid) pages kib bytes ; bytes in let memory_limit = @@ -3135,7 +3141,8 @@ module VM = struct (* CA-31764: may be larger than static_max if maxmem has been increased to initial-reservation. *) let result = max memory_actual max_memory_bytes in - D.debug "VM %s memory limit = %Ld bytes" (Uuidm.to_string uuid) result; + D.debug "VM %s memory limit = %Ld bytes" (Uuidm.to_string uuid) + result ; result in let rtc = From cc39655afb8dd855855b17ff24b54f2e3a26d137 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Thu, 8 Jan 2026 10:16:00 +0000 Subject: [PATCH 12/15] plumb migration pages through --- ocaml/xapi-idl/memory/memory.ml | 5 ++++- ocaml/xenopsd/xc/domain.ml | 21 +++++++++++++++++++-- ocaml/xenopsd/xc/domain.mli | 2 ++ ocaml/xenopsd/xc/xenops_server_xen.ml | 26 +++++++++++++++++++++++--- 4 files changed, 48 insertions(+), 6 deletions(-) diff --git a/ocaml/xapi-idl/memory/memory.ml b/ocaml/xapi-idl/memory/memory.ml index 99951f7e3e..2f3057a0ed 100644 --- a/ocaml/xapi-idl/memory/memory.ml +++ b/ocaml/xapi-idl/memory/memory.ml @@ -185,6 +185,7 @@ type memory_config = { ; shadow_mib: int64 ; required_host_free_mib: int64 ; overhead_mib: int64 + ; build_claim_pages: int64 } module Memory_model (D : MEMORY_MODEL_DATA) = struct @@ -226,14 +227,16 @@ module Memory_model (D : MEMORY_MODEL_DATA) = struct let shadow_multiplier_default = 1.0 let full_config static_max_mib video_mib target_mib vcpus shadow_multiplier = + let build_start_mib = build_start_mib static_max_mib target_mib video_mib in { build_max_mib= build_max_mib static_max_mib video_mib - ; build_start_mib= build_start_mib static_max_mib target_mib video_mib + ; build_start_mib ; xen_max_mib= xen_max_mib static_max_mib ; shadow_mib= shadow_mib static_max_mib vcpus shadow_multiplier ; required_host_free_mib= footprint_mib target_mib static_max_mib vcpus shadow_multiplier ; overhead_mib= overhead_mib static_max_mib vcpus shadow_multiplier + ; build_claim_pages= pages_of_mib build_start_mib } end diff --git a/ocaml/xenopsd/xc/domain.ml b/ocaml/xenopsd/xc/domain.ml index d1892cc92e..4e77a1ce19 100644 --- a/ocaml/xenopsd/xc/domain.ml +++ b/ocaml/xenopsd/xc/domain.ml @@ -150,6 +150,8 @@ type builder_spec_info = type build_info = { memory_max: int64 (** memory max in kilobytes *) ; memory_target: int64 (** memory target in kilobytes *) + ; memory_total_source: int64 option + (** amount of memory to claim (during migration) *) ; kernel: string (** in hvm case, point to hvmloader *) ; vcpus: int (** vcpus max *) ; priv: builder_spec_info @@ -1123,9 +1125,14 @@ let build_pre ~xc ~xs ~vcpus ~memory ~hard_affinity domid = and cores = Xenops_server.cores_of_numa_affinity_policy pin ~vcpus in - let memory = - Int64.(mul memory.build_start_mib (shift_left 1L 20)) + + let build_claim_bytes = + Memory.bytes_of_pages memory.build_claim_pages in + D.debug "VM = %s; domid = %d; will claim %Ld bytes = %Ld pages" + (Uuidx.to_string uuid) domid build_claim_bytes + memory.build_claim_pages ; + let memory = build_claim_bytes in match numa_placement domid ~vcpus ~cores ~memory affinity with | None -> (* Always perform a global claim when NUMA placement is @@ -1830,6 +1837,16 @@ let restore (task : Xenops_task.task_handle) ~xc ~xs ~dm ~timeoffset ~extras maybe_ca_140252_workaround ~xc ~vcpus domid ; (memory, vm_stuff, `pvh) in + let memory = + match info.memory_total_source with + | None -> + memory + | Some bytes -> + let build_claim_pages = Memory.pages_of_bytes_used bytes in + debug "Domid %d: memory_total_source = %Ld bytes = %Ld pages" domid + bytes build_claim_pages ; + Memory.{memory with build_claim_pages} + in let store_port, console_port, numa_placements = build_pre ~xc ~xs ~memory ~vcpus ~hard_affinity:info.hard_affinity domid in diff --git a/ocaml/xenopsd/xc/domain.mli b/ocaml/xenopsd/xc/domain.mli index 574782fdce..9c71f78fde 100644 --- a/ocaml/xenopsd/xc/domain.mli +++ b/ocaml/xenopsd/xc/domain.mli @@ -130,6 +130,8 @@ val builder_spec_info : builder_spec_info Rpc.Types.def type build_info = { memory_max: int64 (** memory max in kilobytes *) ; memory_target: int64 (** memory target in kilobytes *) + ; memory_total_source: int64 option + (** memory used on source during migration/resume in kilobytes *) ; kernel: string (** image to load. In HVM case, point to hvmloader *) ; vcpus: int (** vcpus max *) ; priv: builder_spec_info diff --git a/ocaml/xenopsd/xc/xenops_server_xen.ml b/ocaml/xenopsd/xc/xenops_server_xen.ml index f69ec75276..8d4f6a4799 100644 --- a/ocaml/xenopsd/xc/xenops_server_xen.ml +++ b/ocaml/xenopsd/xc/xenops_server_xen.ml @@ -1624,6 +1624,7 @@ module VM = struct { Domain.memory_max= vm.memory_static_max /// 1024L ; memory_target= vm.memory_dynamic_min /// 1024L + ; memory_total_source= None ; kernel= "" ; vcpus= vm.vcpu_max ; priv= builder_spec_info @@ -1803,27 +1804,30 @@ module VM = struct needed. If we are live migrating then we will only know an upper bound. If we are starting from scratch then we have a free choice. *) - let min_bytes, max_bytes = + let min_bytes, max_bytes, memory_total_source_bytes = match memory_upper_bound with | Some x -> debug "VM = %s; using memory_upper_bound = %Ld" vm.Vm.id x ; - (x, x) + (x, x, Some x) | None -> if resuming then ( debug "VM = %s; using stored suspend_memory_bytes = %Ld" vm.Vm.id persistent.VmExtra.suspend_memory_bytes ; ( persistent.VmExtra.suspend_memory_bytes , persistent.VmExtra.suspend_memory_bytes + , Some persistent.VmExtra.suspend_memory_bytes ) ) else ( debug "VM = %s; using memory_dynamic_min = %Ld and \ memory_dynamic_max = %Ld" vm.Vm.id vm.memory_dynamic_min vm.memory_dynamic_max ; - (vm.memory_dynamic_min, vm.memory_dynamic_max) + (vm.memory_dynamic_min, vm.memory_dynamic_max, None) ) in let min_kib = kib_of_bytes_used (min_bytes +++ overhead_bytes) + and memory_total_source_kib = + Option.map kib_of_bytes_used memory_total_source_bytes and max_kib = kib_of_bytes_used (max_bytes +++ overhead_bytes) in (* XXX: we would like to be able to cancel an in-progress with_reservation *) @@ -1879,6 +1883,21 @@ module VM = struct vm.Vm.id target_bytes vm.memory_dynamic_max ; min vm.memory_dynamic_max target_bytes in + let persistent = + match persistent with + | {VmExtra.build_info= Some x; _} as t -> + { + t with + build_info= + Some + { + x with + memory_total_source= memory_total_source_kib + } + } + | _ -> + persistent + in set_initial_target ~xs domid (Int64.div initial_target 1024L) ; (* Log uses of obsolete option *) if vm.suppress_spurious_page_faults then @@ -2352,6 +2371,7 @@ module VM = struct { Domain.memory_max= static_max_kib ; memory_target= initial_target + ; memory_total_source= None ; kernel ; vcpus= vm.vcpu_max ; priv From 695ef8fd73be7a49ff0c29f216c04f072e4e3719 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Thu, 8 Jan 2026 10:45:03 +0000 Subject: [PATCH 13/15] fixup! plumb migration pages through --- ocaml/xenopsd/xc/domain.ml | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/ocaml/xenopsd/xc/domain.ml b/ocaml/xenopsd/xc/domain.ml index 4e77a1ce19..af54fc679c 100644 --- a/ocaml/xenopsd/xc/domain.ml +++ b/ocaml/xenopsd/xc/domain.ml @@ -1841,10 +1841,11 @@ let restore (task : Xenops_task.task_handle) ~xc ~xs ~dm ~timeoffset ~extras match info.memory_total_source with | None -> memory - | Some bytes -> - let build_claim_pages = Memory.pages_of_bytes_used bytes in - debug "Domid %d: memory_total_source = %Ld bytes = %Ld pages" domid - bytes build_claim_pages ; + | Some kib -> + let build_claim_pages = Memory.pages_of_kib_used kib in + let bytes = Memory.bytes_of_kib kib in + debug "Domid %d: memory_total_source = %Ld bytes = %Ld KiB = %Ld pages" + domid bytes kib build_claim_pages ; Memory.{memory with build_claim_pages} in let store_port, console_port, numa_placements = From e2159adfc1950309314633766d66fb61cc376f35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Thu, 8 Jan 2026 11:10:51 +0000 Subject: [PATCH 14/15] fixup! fixup! plumb migration pages through --- ocaml/xenopsd/lib/xenops_server.ml | 35 ++++++++++++++++---- ocaml/xenopsd/lib/xenops_server_plugin.ml | 1 + ocaml/xenopsd/lib/xenops_server_simulator.ml | 2 +- ocaml/xenopsd/xc/xenops_server_xen.ml | 6 ++-- 4 files changed, 33 insertions(+), 11 deletions(-) diff --git a/ocaml/xenopsd/lib/xenops_server.ml b/ocaml/xenopsd/lib/xenops_server.ml index 54d528829f..09a64a2579 100644 --- a/ocaml/xenopsd/lib/xenops_server.ml +++ b/ocaml/xenopsd/lib/xenops_server.ml @@ -155,7 +155,8 @@ type atomic = | VM_create_device_model of (Vm.id * bool) | VM_destroy_device_model of Vm.id | VM_destroy of Vm.id - | VM_create of (Vm.id * int64 option * Vm.id option * bool) (*no_sharept*) + | VM_create of (Vm.id * (int64 * int64 option) option * Vm.id option * bool) + (*no_sharept*) | VM_build of (Vm.id * bool) | VM_shutdown_domain of (Vm.id * shutdown_request * float) | VM_s3suspend of Vm.id @@ -330,6 +331,7 @@ type vm_receive_op = { ; vmr_socket: Unix.file_descr ; vmr_handshake: string option (** handshake protocol *) ; vmr_compressed: bool + ; vmr_memory_total_source: int64 option [@default None] } [@@deriving rpcty] @@ -2301,19 +2303,26 @@ let rec perform_atomic ~progress_callback ?result (op : atomic) | VM_destroy id -> debug "VM.destroy %s" id ; B.VM.destroy t (VM_DB.read_exn id) - | VM_create (id, memory_upper_bound, final_id, no_sharept) -> + | VM_create (id, memory_upper_bound_and_source, final_id, no_sharept) -> let num_of_vbds = List.length (VBD_DB.vbds id) in let num_of_vifs = List.length (VIF_DB.vifs id) in + let memory_upper_bound = Option.map fst memory_upper_bound_and_source + and memory_total_source = + Option.map snd memory_upper_bound_and_source |> Option.join + in debug - "VM.create %s memory_upper_bound = %s, num_of_vbds = %d, num_of_vifs = \ - %d" + "VM.create %s memory_upper_bound = %s, memory_total_source = %s, \ + num_of_vbds = %d, num_of_vifs = %d" id (Option.value ~default:"None" (Option.map Int64.to_string memory_upper_bound) ) + (Option.value ~default:"None" + (Option.map Int64.to_string memory_total_source) + ) num_of_vbds num_of_vifs ; - B.VM.create t memory_upper_bound (VM_DB.read_exn id) final_id no_sharept - num_of_vbds num_of_vifs + B.VM.create t memory_upper_bound memory_total_source (VM_DB.read_exn id) + final_id no_sharept num_of_vbds num_of_vifs | VM_build (id, force) -> debug "VM.build %s" id ; let vbds : Vbd.t list = VBD_DB.vbds id |> vbd_plug_order in @@ -2990,6 +2999,7 @@ and perform_exn ?result (op : operation) (t : Xenops_task.task_handle) : unit = vmr_id= id ; vmr_final_id= final_id ; vmr_memory_limit= memory_limit + ; vmr_memory_total_source= memory_total_source ; vmr_socket= s ; vmr_handshake= handshake ; vmr_compressed @@ -3070,7 +3080,14 @@ and perform_exn ?result (op : operation) (t : Xenops_task.task_handle) : unit = ) in perform_atomics - ([VM_create (id, Some memory_limit, Some final_id, no_sharept)] + ([ + VM_create + ( id + , Some (memory_limit, memory_total_source) + , Some final_id + , no_sharept + ) + ] (* Perform as many operations as possible on the destination domain before pausing the original domain *) @ atomics_of_operation (VM_restore_vifs id) @@ -3877,6 +3894,9 @@ module VM = struct let module Response = Cohttp.Response.Make (Cohttp_posix_io.Unbuffered_IO) in let dbg = List.assoc "dbg" cookies in let memory_limit = List.assoc "memory_limit" cookies |> Int64.of_string in + let memory_total_source = + List.assoc_opt "memory_total_source" cookies |> Option.map Int64.of_string + in let handshake = List.assoc_opt cookie_mem_migration cookies in let compressed_memory = get_compression cookies in Debug.with_thread_associated dbg @@ -3907,6 +3927,7 @@ module VM = struct ; vmr_socket= transferred_fd ; vmr_handshake= handshake ; vmr_compressed= compressed_memory + ; vmr_memory_total_source= memory_total_source } in let task = diff --git a/ocaml/xenopsd/lib/xenops_server_plugin.ml b/ocaml/xenopsd/lib/xenops_server_plugin.ml index 6cee8a58f0..209e6d098a 100644 --- a/ocaml/xenopsd/lib/xenops_server_plugin.ml +++ b/ocaml/xenopsd/lib/xenops_server_plugin.ml @@ -81,6 +81,7 @@ module type S = sig val create : Xenops_task.task_handle -> int64 option + -> int64 option -> Vm.t -> Vm.id option -> bool (* no_sharept*) diff --git a/ocaml/xenopsd/lib/xenops_server_simulator.ml b/ocaml/xenopsd/lib/xenops_server_simulator.ml index 0c6ac3f606..61ac6f0e88 100644 --- a/ocaml/xenopsd/lib/xenops_server_simulator.ml +++ b/ocaml/xenopsd/lib/xenops_server_simulator.ml @@ -547,7 +547,7 @@ module VM = struct let remove _vm = () - let create _ memory_limit vm _ _ _ _ = + let create _ memory_limit _ vm _ _ _ _ = with_lock m (create_nolock memory_limit vm) let destroy _ vm = with_lock m (destroy_nolock vm) diff --git a/ocaml/xenopsd/xc/xenops_server_xen.ml b/ocaml/xenopsd/xc/xenops_server_xen.ml index 8d4f6a4799..71a9b2b1a6 100644 --- a/ocaml/xenopsd/xc/xenops_server_xen.ml +++ b/ocaml/xenopsd/xc/xenops_server_xen.ml @@ -1730,8 +1730,8 @@ module VM = struct in (device_id, revision) - let create_exn task memory_upper_bound vm final_id no_sharept num_of_vbds - num_of_vifs = + let create_exn task memory_upper_bound memory_total_source vm final_id + no_sharept num_of_vbds num_of_vifs = let k = vm.Vm.id in with_xc_and_xs (fun xc xs -> (* Ensure the DB contains something for this VM - this is to avoid a @@ -1808,7 +1808,7 @@ module VM = struct match memory_upper_bound with | Some x -> debug "VM = %s; using memory_upper_bound = %Ld" vm.Vm.id x ; - (x, x, Some x) + (x, x, memory_total_source) | None -> if resuming then ( debug "VM = %s; using stored suspend_memory_bytes = %Ld" From e5ab020897313e43d6b2bdb5c7b241911ea3598a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Thu, 8 Jan 2026 11:22:23 +0000 Subject: [PATCH 15/15] fixup --- ocaml/xenopsd/lib/xenops_server.ml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ocaml/xenopsd/lib/xenops_server.ml b/ocaml/xenopsd/lib/xenops_server.ml index 09a64a2579..6bb0a9592f 100644 --- a/ocaml/xenopsd/lib/xenops_server.ml +++ b/ocaml/xenopsd/lib/xenops_server.ml @@ -2891,7 +2891,10 @@ and perform_exn ?result (op : operation) (t : Xenops_task.task_handle) : unit = Request.write (fun _ -> ()) request fd in do_request vm_fd - [("memory_limit", Int64.to_string state.Vm.memory_limit)] + [ + ("memory_limit", Int64.to_string state.Vm.memory_limit) + ; ("memory_total_source", Int64.to_string state.Vm.memory_actual) + ] url ; let first_handshake () = ( match Handshake.recv vm_fd with