@@ -104,10 +104,10 @@ def test_update_rollback_failure(
104104 logger .info ("Injecting cfn-signal failure on head node..." )
105105 _inject_cfn_signal_failure (remote_command_executor )
106106
107- # Step 3: Disable cfn-hup on CN1 BEFORE update
107+ # Step 3: Disable pcluster-check-update timer on CN1 BEFORE update
108108 # This ensures CN1 won't apply the update, causing cluster readiness check to fail
109- logger .info (f"Disabling cfn-hup on CN1 ({ cn1 } ) before update..." )
110- _disable_cfn_hup_on_compute_node (remote_command_executor , cn1 )
109+ logger .info (f"Disabling the pcluster-check-update timer on CN1 ({ cn1 } ) before update..." )
110+ _disable_check_update_timer_on_compute_node (remote_command_executor , cn1 )
111111
112112 # Step 4: Trigger cluster update with wait=False (non-blocking)
113113 logger .info ("Triggering cluster update (non-blocking)..." )
@@ -126,8 +126,11 @@ def test_update_rollback_failure(
126126 region , cluster .name , cn2_instance_id , initial_config_version , timeout_minutes = 15
127127 )
128128
129- logger .info (f"CN2 has applied the update. Disabling cfn-hup on CN2 ({ cn2 } ) to inject rollback failure..." )
130- _disable_cfn_hup_on_compute_node (remote_command_executor , cn2 )
129+ logger .info (
130+ f"CN2 has applied the update. Disabling pcluster-check-update timer on CN2 "
131+ f"({ cn2 } ) to inject rollback failure..."
132+ )
133+ _disable_check_update_timer_on_compute_node (remote_command_executor , cn2 )
131134
132135 # Wait for stack to reach UPDATE_ROLLBACK_COMPLETE state
133136 logger .info ("Waiting for stack to reach UPDATE_ROLLBACK_COMPLETE..." )
@@ -269,27 +272,24 @@ def _inject_cfn_signal_failure(remote_command_executor):
269272 logger .info ("cfn-signal wrapper installed" )
270273
271274
272- def _disable_cfn_hup_on_compute_node (remote_command_executor , node_name ):
275+ def _disable_check_update_timer_on_compute_node (remote_command_executor , node_name ):
273276 """
274- Disable cfn-hup on a compute node using srun.
277+ Disable pcluster-check-update on a compute node using srun.
275278
276- Uses supervisorctl to stop cfn-hup service on the compute node.
279+ Uses systemctl to stop the pcluster-check-update.timer on the compute node.
277280 """
278- logger .info (f"Disabling cfn-hup on compute node { node_name } ..." )
279-
280- supervisorctl_path = _get_supervisorctl_path (remote_command_executor )
281+ logger .info (f"Disabling pcluster-check-update on compute node { node_name } ..." )
281282
282- # Stop cfn-hup using srun
283- remote_command_executor .run_remote_command (f"srun -w { node_name } sudo { supervisorctl_path } stop cfn-hup " )
283+ # Stop pcluster-check-update.timer using srun
284+ remote_command_executor .run_remote_command (f"srun -w { node_name } sudo systemctl stop pcluster-check-update.timer " )
284285
285- # Verify cfn-hup is stopped
286- # Note: supervisorctl status returns exit code 3 when process is STOPPED, so we use raise_on_error=False
286+ # Verify pcluster-check-update.timer is stopped
287287 result = remote_command_executor .run_remote_command (
288- f"srun -w { node_name } sudo { supervisorctl_path } status cfn-hup " ,
288+ f"srun -w { node_name } systemctl is-active pcluster-check-update.timer " ,
289289 raise_on_error = False ,
290290 )
291- assert_that (result .stdout ) .contains ("STOPPED " )
292- logger .info (f"cfn-hup stopped on { node_name } ✓" )
291+ assert_that (result .stdout . strip ()) .contains ("inactive " )
292+ logger .info (f"pcluster-check-update.timer stopped on { node_name } ✓" )
293293
294294
295295@retry (wait_fixed = seconds (30 ), stop_max_delay = minutes (60 ))
0 commit comments