From e97e0052baa6c85fcb5fa0096ab3517076da0bf2 Mon Sep 17 00:00:00 2001 From: shilingwang Date: Thu, 12 Mar 2026 14:26:28 +0000 Subject: [PATCH 1/8] add test for systemctl --- rs/tests/nested/BUILD.bazel | 17 +++++++ .../nested/guestos_no_failed_systemd_units.rs | 45 +++++++++++++++++++ 2 files changed, 62 insertions(+) create mode 100644 rs/tests/nested/guestos_no_failed_systemd_units.rs diff --git a/rs/tests/nested/BUILD.bazel b/rs/tests/nested/BUILD.bazel index eae75bae2ff7..1e8c5275765e 100644 --- a/rs/tests/nested/BUILD.bazel +++ b/rs/tests/nested/BUILD.bazel @@ -62,6 +62,23 @@ system_test_nns( ], ) +# Tests that no systemd units have failed on GuestOS nodes after they boot and register. +system_test_nns( + name = "guestos_no_failed_systemd_units", + env = MAINNET_ENV, + setupos = True, + tags = ["long_test"], + test_timeout = "eternal", + runtime_deps = IC_GATEWAY_RUNTIME_DEPS, + deps = [ + # Keep sorted. + ":nested", + "//rs/tests/driver:ic-system-test-driver", + "@crate_index//:anyhow", + "@crate_index//:slog", + ], +) + rust_binary( name = "guestos_upgrade_test_bin", testonly = True, diff --git a/rs/tests/nested/guestos_no_failed_systemd_units.rs b/rs/tests/nested/guestos_no_failed_systemd_units.rs new file mode 100644 index 000000000000..e2af2e0b2fb6 --- /dev/null +++ b/rs/tests/nested/guestos_no_failed_systemd_units.rs @@ -0,0 +1,45 @@ +use anyhow::Result; +use ic_system_test_driver::{ + driver::{group::SystemTestGroup, test_env::TestEnv, test_env_api::*}, + systest, +}; +use slog::info; +use std::time::Duration; + +fn main() -> Result<()> { + SystemTestGroup::new() + .with_setup(nested::setup) + .with_teardown(nested::teardown) + .add_test(systest!(check_no_failed_systemd_units)) + .with_timeout_per_test(Duration::from_secs(20 * 60)) + .with_overall_timeout(Duration::from_secs(30 * 60)) + .execute_from_args()?; + + Ok(()) +} + +/// Wait for GuestOS nodes to register and then assert that no systemd units +/// have failed on any of them. +fn check_no_failed_systemd_units(env: TestEnv) { + let logger = env.logger(); + + nested::registration(env.clone()); + + let topology = env.topology_snapshot(); + for node in topology.unassigned_nodes() { + let failed_units = node + .block_on_bash_script("systemctl list-units --failed --no-legend --no-pager") + .expect("Failed to run systemctl list-units --failed on GuestOS node"); + info!( + logger, + "Node {}: systemctl list-units --failed:\n{}", node.node_id, failed_units + ); + assert!( + failed_units.trim().is_empty(), + "Node {} has failed systemd units:\n{}", + node.node_id, + failed_units + ); + } + info!(logger, "No failed systemd units found on any GuestOS node."); +} From d973bb43ae07b7121aec4313268b39f7a486bf3c Mon Sep 17 00:00:00 2001 From: Andrew Battat Date: Thu, 12 Mar 2026 16:18:14 +0000 Subject: [PATCH 2/8] Run failed-units GuestOS check in registration test only --- rs/tests/nested/BUILD.bazel | 17 ------- .../nested/guestos_no_failed_systemd_units.rs | 45 ------------------- rs/tests/nested/registration.rs | 12 ++++- rs/tests/nested/src/lib.rs | 31 +++++++++++++ 4 files changed, 41 insertions(+), 64 deletions(-) delete mode 100644 rs/tests/nested/guestos_no_failed_systemd_units.rs diff --git a/rs/tests/nested/BUILD.bazel b/rs/tests/nested/BUILD.bazel index 1e8c5275765e..eae75bae2ff7 100644 --- a/rs/tests/nested/BUILD.bazel +++ b/rs/tests/nested/BUILD.bazel @@ -62,23 +62,6 @@ system_test_nns( ], ) -# Tests that no systemd units have failed on GuestOS nodes after they boot and register. -system_test_nns( - name = "guestos_no_failed_systemd_units", - env = MAINNET_ENV, - setupos = True, - tags = ["long_test"], - test_timeout = "eternal", - runtime_deps = IC_GATEWAY_RUNTIME_DEPS, - deps = [ - # Keep sorted. - ":nested", - "//rs/tests/driver:ic-system-test-driver", - "@crate_index//:anyhow", - "@crate_index//:slog", - ], -) - rust_binary( name = "guestos_upgrade_test_bin", testonly = True, diff --git a/rs/tests/nested/guestos_no_failed_systemd_units.rs b/rs/tests/nested/guestos_no_failed_systemd_units.rs deleted file mode 100644 index e2af2e0b2fb6..000000000000 --- a/rs/tests/nested/guestos_no_failed_systemd_units.rs +++ /dev/null @@ -1,45 +0,0 @@ -use anyhow::Result; -use ic_system_test_driver::{ - driver::{group::SystemTestGroup, test_env::TestEnv, test_env_api::*}, - systest, -}; -use slog::info; -use std::time::Duration; - -fn main() -> Result<()> { - SystemTestGroup::new() - .with_setup(nested::setup) - .with_teardown(nested::teardown) - .add_test(systest!(check_no_failed_systemd_units)) - .with_timeout_per_test(Duration::from_secs(20 * 60)) - .with_overall_timeout(Duration::from_secs(30 * 60)) - .execute_from_args()?; - - Ok(()) -} - -/// Wait for GuestOS nodes to register and then assert that no systemd units -/// have failed on any of them. -fn check_no_failed_systemd_units(env: TestEnv) { - let logger = env.logger(); - - nested::registration(env.clone()); - - let topology = env.topology_snapshot(); - for node in topology.unassigned_nodes() { - let failed_units = node - .block_on_bash_script("systemctl list-units --failed --no-legend --no-pager") - .expect("Failed to run systemctl list-units --failed on GuestOS node"); - info!( - logger, - "Node {}: systemctl list-units --failed:\n{}", node.node_id, failed_units - ); - assert!( - failed_units.trim().is_empty(), - "Node {} has failed systemd units:\n{}", - node.node_id, - failed_units - ); - } - info!(logger, "No failed systemd units found on any GuestOS node."); -} diff --git a/rs/tests/nested/registration.rs b/rs/tests/nested/registration.rs index afd06eb5e4e2..8f6ab254760e 100644 --- a/rs/tests/nested/registration.rs +++ b/rs/tests/nested/registration.rs @@ -1,12 +1,20 @@ use anyhow::Result; -use ic_system_test_driver::{driver::group::SystemTestGroup, systest}; +use ic_system_test_driver::{ + driver::{group::SystemTestGroup, test_env::TestEnv}, + systest, +}; use std::time::Duration; +fn registration_with_failed_units_check(env: TestEnv) { + nested::registration(env.clone()); + nested::check_no_failed_systemd_units(env); +} + fn main() -> Result<()> { SystemTestGroup::new() .with_setup(nested::setup) .with_teardown(nested::teardown) - .add_test(systest!(nested::registration)) + .add_test(systest!(registration_with_failed_units_check)) .with_timeout_per_test(Duration::from_secs(20 * 60)) .with_overall_timeout(Duration::from_secs(30 * 60)) .execute_from_args()?; diff --git a/rs/tests/nested/src/lib.rs b/rs/tests/nested/src/lib.rs index edef41e67f1d..2a7e6ec3df6b 100644 --- a/rs/tests/nested/src/lib.rs +++ b/rs/tests/nested/src/lib.rs @@ -126,6 +126,37 @@ pub fn registration(env: TestEnv) { info!(logger, "All {n} nodes successfully came up and registered."); } +/// Assert that no systemd units have failed on unassigned GuestOS nodes. +pub fn check_no_failed_systemd_units(env: TestEnv) { + let logger = env.logger(); + + if std::env::var("TRUSTED_EXECUTION_ENVIRONMENT").is_ok() { + info!( + logger, + "Skipping GuestOS failed-units check because TRUSTED_EXECUTION_ENVIRONMENT is enabled." + ); + return; + } + + let topology = env.topology_snapshot(); + for node in topology.unassigned_nodes() { + let failed_units = node + .block_on_bash_script("systemctl list-units --failed --no-legend --no-pager") + .expect("Failed to run systemctl list-units --failed on GuestOS node"); + info!( + logger, + "Node {}: systemctl list-units --failed:\n{}", node.node_id, failed_units + ); + assert!( + failed_units.trim().is_empty(), + "Node {} has failed systemd units:\n{}", + node.node_id, + failed_units + ); + } + info!(logger, "No failed systemd units found on any GuestOS node."); +} + /// Clean up the environment after nested tests. pub fn teardown(env: TestEnv) { if let Ok(pid) = IpmiProcessId::try_read_attribute(&env) { From 53c2dfa3098e4a07136e72841b7d5f85ca4089d2 Mon Sep 17 00:00:00 2001 From: Andrew Battat Date: Thu, 12 Mar 2026 20:18:44 +0000 Subject: [PATCH 3/8] Revert "Run failed-units GuestOS check in registration test only" This reverts commit d973bb43ae07b7121aec4313268b39f7a486bf3c. --- rs/tests/nested/BUILD.bazel | 17 +++++++ .../nested/guestos_no_failed_systemd_units.rs | 45 +++++++++++++++++++ rs/tests/nested/registration.rs | 12 +---- rs/tests/nested/src/lib.rs | 31 ------------- 4 files changed, 64 insertions(+), 41 deletions(-) create mode 100644 rs/tests/nested/guestos_no_failed_systemd_units.rs diff --git a/rs/tests/nested/BUILD.bazel b/rs/tests/nested/BUILD.bazel index eae75bae2ff7..1e8c5275765e 100644 --- a/rs/tests/nested/BUILD.bazel +++ b/rs/tests/nested/BUILD.bazel @@ -62,6 +62,23 @@ system_test_nns( ], ) +# Tests that no systemd units have failed on GuestOS nodes after they boot and register. +system_test_nns( + name = "guestos_no_failed_systemd_units", + env = MAINNET_ENV, + setupos = True, + tags = ["long_test"], + test_timeout = "eternal", + runtime_deps = IC_GATEWAY_RUNTIME_DEPS, + deps = [ + # Keep sorted. + ":nested", + "//rs/tests/driver:ic-system-test-driver", + "@crate_index//:anyhow", + "@crate_index//:slog", + ], +) + rust_binary( name = "guestos_upgrade_test_bin", testonly = True, diff --git a/rs/tests/nested/guestos_no_failed_systemd_units.rs b/rs/tests/nested/guestos_no_failed_systemd_units.rs new file mode 100644 index 000000000000..e2af2e0b2fb6 --- /dev/null +++ b/rs/tests/nested/guestos_no_failed_systemd_units.rs @@ -0,0 +1,45 @@ +use anyhow::Result; +use ic_system_test_driver::{ + driver::{group::SystemTestGroup, test_env::TestEnv, test_env_api::*}, + systest, +}; +use slog::info; +use std::time::Duration; + +fn main() -> Result<()> { + SystemTestGroup::new() + .with_setup(nested::setup) + .with_teardown(nested::teardown) + .add_test(systest!(check_no_failed_systemd_units)) + .with_timeout_per_test(Duration::from_secs(20 * 60)) + .with_overall_timeout(Duration::from_secs(30 * 60)) + .execute_from_args()?; + + Ok(()) +} + +/// Wait for GuestOS nodes to register and then assert that no systemd units +/// have failed on any of them. +fn check_no_failed_systemd_units(env: TestEnv) { + let logger = env.logger(); + + nested::registration(env.clone()); + + let topology = env.topology_snapshot(); + for node in topology.unassigned_nodes() { + let failed_units = node + .block_on_bash_script("systemctl list-units --failed --no-legend --no-pager") + .expect("Failed to run systemctl list-units --failed on GuestOS node"); + info!( + logger, + "Node {}: systemctl list-units --failed:\n{}", node.node_id, failed_units + ); + assert!( + failed_units.trim().is_empty(), + "Node {} has failed systemd units:\n{}", + node.node_id, + failed_units + ); + } + info!(logger, "No failed systemd units found on any GuestOS node."); +} diff --git a/rs/tests/nested/registration.rs b/rs/tests/nested/registration.rs index 8f6ab254760e..afd06eb5e4e2 100644 --- a/rs/tests/nested/registration.rs +++ b/rs/tests/nested/registration.rs @@ -1,20 +1,12 @@ use anyhow::Result; -use ic_system_test_driver::{ - driver::{group::SystemTestGroup, test_env::TestEnv}, - systest, -}; +use ic_system_test_driver::{driver::group::SystemTestGroup, systest}; use std::time::Duration; -fn registration_with_failed_units_check(env: TestEnv) { - nested::registration(env.clone()); - nested::check_no_failed_systemd_units(env); -} - fn main() -> Result<()> { SystemTestGroup::new() .with_setup(nested::setup) .with_teardown(nested::teardown) - .add_test(systest!(registration_with_failed_units_check)) + .add_test(systest!(nested::registration)) .with_timeout_per_test(Duration::from_secs(20 * 60)) .with_overall_timeout(Duration::from_secs(30 * 60)) .execute_from_args()?; diff --git a/rs/tests/nested/src/lib.rs b/rs/tests/nested/src/lib.rs index 2a7e6ec3df6b..edef41e67f1d 100644 --- a/rs/tests/nested/src/lib.rs +++ b/rs/tests/nested/src/lib.rs @@ -126,37 +126,6 @@ pub fn registration(env: TestEnv) { info!(logger, "All {n} nodes successfully came up and registered."); } -/// Assert that no systemd units have failed on unassigned GuestOS nodes. -pub fn check_no_failed_systemd_units(env: TestEnv) { - let logger = env.logger(); - - if std::env::var("TRUSTED_EXECUTION_ENVIRONMENT").is_ok() { - info!( - logger, - "Skipping GuestOS failed-units check because TRUSTED_EXECUTION_ENVIRONMENT is enabled." - ); - return; - } - - let topology = env.topology_snapshot(); - for node in topology.unassigned_nodes() { - let failed_units = node - .block_on_bash_script("systemctl list-units --failed --no-legend --no-pager") - .expect("Failed to run systemctl list-units --failed on GuestOS node"); - info!( - logger, - "Node {}: systemctl list-units --failed:\n{}", node.node_id, failed_units - ); - assert!( - failed_units.trim().is_empty(), - "Node {} has failed systemd units:\n{}", - node.node_id, - failed_units - ); - } - info!(logger, "No failed systemd units found on any GuestOS node."); -} - /// Clean up the environment after nested tests. pub fn teardown(env: TestEnv) { if let Ok(pid) = IpmiProcessId::try_read_attribute(&env) { From 8e0beaa1b57977af34ebc7cfdbfa65323771a17f Mon Sep 17 00:00:00 2001 From: shilingwang Date: Fri, 13 Mar 2026 10:42:50 +0000 Subject: [PATCH 4/8] move the tests to node and make them simple farm test withVM --- rs/tests/nested/BUILD.bazel | 17 ------- rs/tests/node/BUILD.bazel | 12 +++++ .../guestos_no_failed_systemd_units.rs | 45 ++++++++++++------- 3 files changed, 41 insertions(+), 33 deletions(-) rename rs/tests/{nested => node}/guestos_no_failed_systemd_units.rs (53%) diff --git a/rs/tests/nested/BUILD.bazel b/rs/tests/nested/BUILD.bazel index 1e8c5275765e..eae75bae2ff7 100644 --- a/rs/tests/nested/BUILD.bazel +++ b/rs/tests/nested/BUILD.bazel @@ -62,23 +62,6 @@ system_test_nns( ], ) -# Tests that no systemd units have failed on GuestOS nodes after they boot and register. -system_test_nns( - name = "guestos_no_failed_systemd_units", - env = MAINNET_ENV, - setupos = True, - tags = ["long_test"], - test_timeout = "eternal", - runtime_deps = IC_GATEWAY_RUNTIME_DEPS, - deps = [ - # Keep sorted. - ":nested", - "//rs/tests/driver:ic-system-test-driver", - "@crate_index//:anyhow", - "@crate_index//:slog", - ], -) - rust_binary( name = "guestos_upgrade_test_bin", testonly = True, diff --git a/rs/tests/node/BUILD.bazel b/rs/tests/node/BUILD.bazel index b2944ab6ec42..235481aceb67 100644 --- a/rs/tests/node/BUILD.bazel +++ b/rs/tests/node/BUILD.bazel @@ -144,3 +144,15 @@ system_test( }, test_driver_target = ":kill_start_test_bin", ) + +# Tests that no systemd units have failed on GuestOS nodes after they boot. +system_test( + name = "guestos_no_failed_systemd_units", + deps = [ + # Keep sorted. + "//rs/registry/subnet_type", + "//rs/tests/driver:ic-system-test-driver", + "@crate_index//:anyhow", + "@crate_index//:slog", + ], +) diff --git a/rs/tests/nested/guestos_no_failed_systemd_units.rs b/rs/tests/node/guestos_no_failed_systemd_units.rs similarity index 53% rename from rs/tests/nested/guestos_no_failed_systemd_units.rs rename to rs/tests/node/guestos_no_failed_systemd_units.rs index e2af2e0b2fb6..5e84ad227080 100644 --- a/rs/tests/nested/guestos_no_failed_systemd_units.rs +++ b/rs/tests/node/guestos_no_failed_systemd_units.rs @@ -1,32 +1,34 @@ use anyhow::Result; +use ic_registry_subnet_type::SubnetType; use ic_system_test_driver::{ - driver::{group::SystemTestGroup, test_env::TestEnv, test_env_api::*}, + driver::{ + group::SystemTestGroup, + ic::InternetComputer, + test_env::TestEnv, + test_env_api::{HasTopologySnapshot, IcNodeContainer, SshSession}, + }, systest, }; use slog::info; use std::time::Duration; -fn main() -> Result<()> { - SystemTestGroup::new() - .with_setup(nested::setup) - .with_teardown(nested::teardown) - .add_test(systest!(check_no_failed_systemd_units)) - .with_timeout_per_test(Duration::from_secs(20 * 60)) - .with_overall_timeout(Duration::from_secs(30 * 60)) - .execute_from_args()?; - - Ok(()) +fn setup(env: TestEnv) { + InternetComputer::new() + .add_fast_single_node_subnet(SubnetType::System) + .setup_and_start(&env) + .expect("failed to setup IC under test"); } -/// Wait for GuestOS nodes to register and then assert that no systemd units -/// have failed on any of them. +/// Wait for GuestOS nodes to be SSH-accessible and then assert that no systemd +/// units have failed on any of them. fn check_no_failed_systemd_units(env: TestEnv) { let logger = env.logger(); + let topology = env.topology_snapshot(); - nested::registration(env.clone()); + for node in topology.subnets().flat_map(|s| s.nodes()) { + node.await_can_login_as_admin_via_ssh() + .expect("Failed to establish SSH session to GuestOS node"); - let topology = env.topology_snapshot(); - for node in topology.unassigned_nodes() { let failed_units = node .block_on_bash_script("systemctl list-units --failed --no-legend --no-pager") .expect("Failed to run systemctl list-units --failed on GuestOS node"); @@ -43,3 +45,14 @@ fn check_no_failed_systemd_units(env: TestEnv) { } info!(logger, "No failed systemd units found on any GuestOS node."); } + +fn main() -> Result<()> { + SystemTestGroup::new() + .with_setup(setup) + .add_test(systest!(check_no_failed_systemd_units)) + .with_timeout_per_test(Duration::from_secs(10 * 60)) + .with_overall_timeout(Duration::from_secs(20 * 60)) + .execute_from_args()?; + + Ok(()) +} From f483514deb62f4007e348cded4622f27791dee3f Mon Sep 17 00:00:00 2001 From: Shiling Wang Date: Fri, 13 Mar 2026 16:10:35 +0100 Subject: [PATCH 5/8] Update rs/tests/node/guestos_no_failed_systemd_units.rs Co-authored-by: Bas van Dijk --- rs/tests/node/guestos_no_failed_systemd_units.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/rs/tests/node/guestos_no_failed_systemd_units.rs b/rs/tests/node/guestos_no_failed_systemd_units.rs index 5e84ad227080..08e64af398a8 100644 --- a/rs/tests/node/guestos_no_failed_systemd_units.rs +++ b/rs/tests/node/guestos_no_failed_systemd_units.rs @@ -26,6 +26,8 @@ fn check_no_failed_systemd_units(env: TestEnv) { let topology = env.topology_snapshot(); for node in topology.subnets().flat_map(|s| s.nodes()) { + node.await_status_is_healthy() + .expect("Node's status endpoint didn't report healthy"); node.await_can_login_as_admin_via_ssh() .expect("Failed to establish SSH session to GuestOS node"); From 541ba915518ff250ad416b7c10ac7f65beee14c1 Mon Sep 17 00:00:00 2001 From: shilingwang Date: Fri, 13 Mar 2026 15:24:21 +0000 Subject: [PATCH 6/8] fix build --- rs/tests/node/guestos_no_failed_systemd_units.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rs/tests/node/guestos_no_failed_systemd_units.rs b/rs/tests/node/guestos_no_failed_systemd_units.rs index 08e64af398a8..42a08b616cde 100644 --- a/rs/tests/node/guestos_no_failed_systemd_units.rs +++ b/rs/tests/node/guestos_no_failed_systemd_units.rs @@ -5,7 +5,7 @@ use ic_system_test_driver::{ group::SystemTestGroup, ic::InternetComputer, test_env::TestEnv, - test_env_api::{HasTopologySnapshot, IcNodeContainer, SshSession}, + test_env_api::{HasPublicApiUrl, HasTopologySnapshot, IcNodeContainer, SshSession}, }, systest, }; From ba9b5df1a69e09ef7a970af99158e69cabda55b3 Mon Sep 17 00:00:00 2001 From: shilingwang Date: Fri, 13 Mar 2026 17:00:33 +0000 Subject: [PATCH 7/8] remove defaults --- rs/tests/node/guestos_no_failed_systemd_units.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/rs/tests/node/guestos_no_failed_systemd_units.rs b/rs/tests/node/guestos_no_failed_systemd_units.rs index 42a08b616cde..4a976d453f58 100644 --- a/rs/tests/node/guestos_no_failed_systemd_units.rs +++ b/rs/tests/node/guestos_no_failed_systemd_units.rs @@ -10,7 +10,6 @@ use ic_system_test_driver::{ systest, }; use slog::info; -use std::time::Duration; fn setup(env: TestEnv) { InternetComputer::new() @@ -52,8 +51,6 @@ fn main() -> Result<()> { SystemTestGroup::new() .with_setup(setup) .add_test(systest!(check_no_failed_systemd_units)) - .with_timeout_per_test(Duration::from_secs(10 * 60)) - .with_overall_timeout(Duration::from_secs(20 * 60)) .execute_from_args()?; Ok(()) From 3f62b5f4db0f4e6f4794ec82bcbb3c43163b4f14 Mon Sep 17 00:00:00 2001 From: shilingwang Date: Fri, 13 Mar 2026 17:09:13 +0000 Subject: [PATCH 8/8] add the test to Cargo.toml --- rs/tests/node/Cargo.toml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/rs/tests/node/Cargo.toml b/rs/tests/node/Cargo.toml index fd7ad6631a12..aa6f0ef31ed6 100644 --- a/rs/tests/node/Cargo.toml +++ b/rs/tests/node/Cargo.toml @@ -46,3 +46,7 @@ path = "launch_single_host.rs" [[bin]] name = "ic-systest-kill-start" path = "kill_start_test.rs" + +[[bin]] +name = "guestos-no-failed-systemd-units" +path = "guestos_no_failed_systemd_units.rs"