Skip to content

Commit 577e3a6

Browse files
committed
CA-422187: create an emergency reserve of pages (workaround)
Do not let domains fully use up all available memory on the host, we have too many unexplained bugs in this area. As a workaround try to reserve some amount (e.g. 256MiB) that domains cannot normally use from XAPI's point of view. Then during parallel domain construction this emergency reserve can be used by Xen. Signed-off-by: Edwin Török <[email protected]>
1 parent a4bc2bb commit 577e3a6

File tree

16 files changed

+25
-15
lines changed

16 files changed

+25
-15
lines changed

doc/content/design/numa.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ This function receives as arguments a domain ID and the number of nodes
112112
this domain is using (acquired using `domain_get_numa_info_node_pages`)
113113

114114
The number of NUMA nodes of the host (not domain) is reported by
115-
`Xenctrl.physinfo` which returns a value of type `physinfo`.
115+
`Xenctrlext.physinfo` which returns a value of type `physinfo`.
116116

117117
```diff
118118
index b4579862ff..491bd3fc73 100644

doc/content/xenopsd/walkthroughs/VM.build/Domain.build.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ to call:
6464
[wait_xen_free_mem](https://github.com/xapi-project/xen-api/blob/master/ocaml/xenopsd/xc/domain.ml#L236-L272)
6565
to wait (if necessary), for the Xen memory scrubber to catch up reclaiming memory.
6666
It
67-
1. calls `Xenctrl.physinfo` which returns:
67+
1. calls `Xenctrlext.physinfo` which returns:
6868
- `hostinfo.free_pages` - the free and already scrubbed pages (available)
6969
- `host.scrub_pages` - the not yet scrubbed pages (not yet available)
7070
2. repeats this until a timeout as long as `free_pages` is *lower*

ocaml/libs/xenctrl-ext/xenctrlext.ml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,3 +131,8 @@ let domain_claim_pages handle domid ?(numa_node = NumaNode.none) nr_pages =
131131
let get_nr_nodes handle =
132132
let info = numainfo handle in
133133
Array.length info.memory
134+
135+
let physinfo xc =
136+
let info = Xenctrl.physinfo xc in
137+
let emergency_reserve_pages = Nativeint.shift_left 1n 16 in
138+
{info with free_pages= Nativeint.sub info.free_pages emergency_reserve_pages}

ocaml/libs/xenctrl-ext/xenctrlext.mli

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,3 +107,5 @@ val domain_claim_pages : handle -> domid -> ?numa_node:NumaNode.t -> int -> unit
107107

108108
val get_nr_nodes : handle -> int
109109
(** Returns the count of NUMA nodes available in the system. *)
110+
111+
val physinfo : Xenctrl.handle -> Xenctrl.physinfo

ocaml/squeezed/src/dune

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
threads.posix
1414
unix
1515
xenctrl
16+
xenctrl_ext
1617
xenstore
1718
xenstore.unix
1819
xenstore_transport

ocaml/squeezed/src/squeeze_xen.ml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -579,7 +579,7 @@ let make_host ~verbose ~xc =
579579
pages -- this might cause something else to fail (eg domain builder?) *)
580580
while
581581
Int64.div
582-
((Xenctrl.physinfo xc).Xenctrl.scrub_pages |> Int64.of_nativeint)
582+
((Xenctrlext.physinfo xc).Xenctrl.scrub_pages |> Int64.of_nativeint)
583583
1024L
584584
<> 0L
585585
do
@@ -762,7 +762,7 @@ let make_host ~verbose ~xc =
762762
(* For the host free memory we sum the free pages and the pages needing
763763
scrubbing: we don't want to adjust targets simply because the scrubber is
764764
slow. *)
765-
let physinfo = Xenctrl.physinfo xc in
765+
let physinfo = Xenctrlext.physinfo xc in
766766
let free_pages_kib =
767767
Xenctrl.pages_to_kib (Int64.of_nativeint physinfo.Xenctrl.free_pages)
768768
and scrub_pages_kib =

ocaml/xcp-rrdd/bin/rrdp-cpu/dune

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
xapi-rrd
1212
xapi-stdext-unix
1313
xenctrl
14+
xenctrl_ext
1415
)
1516
)
1617

ocaml/xcp-rrdd/bin/rrdp-cpu/rrdp_cpu.ml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ let dss_pcpus xc =
184184
let len = Array.length !physcpus in
185185
let newinfos =
186186
if len = 0 then (
187-
let physinfo = Xenctrl.physinfo xc in
187+
let physinfo = Xenctrlext.physinfo xc in
188188
let pcpus = physinfo.Xenctrl.nr_cpus in
189189
physcpus := if pcpus > 0 then Array.make pcpus 0L else [||] ;
190190
Xenctrl.pcpu_info xc pcpus
@@ -237,7 +237,7 @@ let count_power_state_running_domains domains =
237237
0 domains
238238

239239
let dss_hostload xc domains =
240-
let physinfo = Xenctrl.physinfo xc in
240+
let physinfo = Xenctrlext.physinfo xc in
241241
let pcpus = physinfo.Xenctrl.nr_cpus in
242242
let rec sum acc n f =
243243
match n with n when n >= 0 -> sum (acc + f n) (n - 1) f | _ -> acc
@@ -298,7 +298,7 @@ let _ =
298298
let _, domains, _ = Xenctrl_lib.domain_snapshot xc in
299299
Process.initialise () ;
300300
(* Share one page per PCPU and dom each *)
301-
let physinfo = Xenctrl.physinfo xc in
301+
let physinfo = Xenctrlext.physinfo xc in
302302
let shared_page_count =
303303
physinfo.Xenctrl.nr_cpus
304304
+ Int.max Rrd_interface.max_supported_vms (List.length domains)

ocaml/xcp-rrdd/bin/rrdp-squeezed/dune

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
xapi-log
1414
xapi-rrd
1515
xenctrl
16+
xenctrl_ext
1617
xenstore
1718
xenstore.unix
1819
xenstore_transport

ocaml/xcp-rrdd/bin/rrdp-squeezed/rrdp_squeezed.ml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,7 @@ let generate_host_sources xc counters =
169169
in
170170
let memory_reclaimed = bytes_of_kib memory_reclaimed in
171171
let memory_possibly_reclaimed = bytes_of_kib memory_possibly_reclaimed in
172-
let physinfo = Xenctrl.physinfo xc in
172+
let physinfo = Xenctrlext.physinfo xc in
173173
let total_kib =
174174
Xenctrl.pages_to_kib (Int64.of_nativeint physinfo.Xenctrl.total_pages)
175175
in

0 commit comments

Comments
 (0)