@@ -1354,7 +1354,31 @@ func (c *Cluster) deleteWimiRoleAssignments(ctx context.Context, vnetResourceGro
13541354
13551355func (c * Cluster ) deleteCluster (ctx context.Context , resourceGroup , clusterName string ) error {
13561356 c .log .Printf ("deleting cluster %s" , clusterName )
1357- if err := c .openshiftclusters .DeleteAndWait (ctx , resourceGroup , clusterName ); err != nil {
1357+
1358+ timeoutCtx , cancel := context .WithTimeout (ctx , 30 * time .Minute )
1359+ defer cancel ()
1360+
1361+ var lastErr error
1362+ err := wait .PollImmediateUntil (30 * time .Second , func () (bool , error ) {
1363+ err := c .openshiftclusters .DeleteAndWait (timeoutCtx , resourceGroup , clusterName )
1364+ if err == nil {
1365+ return true , nil
1366+ }
1367+ if azureerrors .IsRetryableError (err ) {
1368+ c .log .Warnf ("retryable error deleting cluster %s, will retry: %v" , clusterName , err )
1369+ lastErr = err
1370+ return false , nil
1371+ }
1372+ return false , err
1373+ }, timeoutCtx .Done ())
1374+
1375+ if err != nil {
1376+ // If err is wait.ErrWaitTimeout, the timeout expired while retrying
1377+ // In that case, return lastErr if we have one (more informative)
1378+ if err == wait .ErrWaitTimeout && lastErr != nil {
1379+ return fmt .Errorf ("error deleting cluster %s: %w" , clusterName , lastErr )
1380+ }
1381+ // Otherwise return the actual error (e.g., non-retryable error from poll function)
13581382 return fmt .Errorf ("error deleting cluster %s: %w" , clusterName , err )
13591383 }
13601384 return nil
@@ -1365,14 +1389,39 @@ func (c *Cluster) ensureResourceGroupDeleted(ctx context.Context, resourceGroupN
13651389 timeoutCtx , cancel := context .WithTimeout (ctx , 10 * time .Minute )
13661390 defer cancel ()
13671391
1368- return wait .PollImmediateUntil (5 * time .Second , func () (bool , error ) {
1369- _ , err := c .groups .Get (ctx , resourceGroupName )
1392+ var lastErr error
1393+ err := wait .PollImmediateUntil (5 * time .Second , func () (bool , error ) {
1394+ _ , err := c .groups .Get (timeoutCtx , resourceGroupName )
13701395 if azureerrors .ResourceGroupNotFound (err ) {
13711396 c .log .Infof ("finished deleting resource group %s" , resourceGroupName )
13721397 return true , nil
13731398 }
1374- return false , fmt .Errorf ("failed to delete resource group %s with %s" , resourceGroupName , err )
1399+ if err != nil {
1400+ // Non-retryable errors should fail immediately
1401+ if ! azureerrors .IsRetryableError (err ) {
1402+ return false , fmt .Errorf ("non-retryable error checking resource group %s: %w" , resourceGroupName , err )
1403+ }
1404+ lastErr = err
1405+ c .log .Warnf ("retryable error checking resource group %s, will retry: %v" , resourceGroupName , err )
1406+ } else {
1407+ c .log .Infof ("resource group %s still exists, waiting for deletion" , resourceGroupName )
1408+ }
1409+ return false , nil
13751410 }, timeoutCtx .Done ())
1411+
1412+ if err != nil {
1413+ // If err is wait.ErrWaitTimeout, the timeout expired while retrying
1414+ // In that case, return lastErr if we have one (more informative)
1415+ if err == wait .ErrWaitTimeout && lastErr != nil {
1416+ return fmt .Errorf ("timed out waiting for resource group %s to be deleted, last error: %w" , resourceGroupName , lastErr )
1417+ }
1418+ // Otherwise return the actual error (e.g., non-retryable error or timeout with no lastErr)
1419+ if err == wait .ErrWaitTimeout {
1420+ return fmt .Errorf ("timed out waiting for resource group %s to be deleted" , resourceGroupName )
1421+ }
1422+ return err
1423+ }
1424+ return nil
13761425}
13771426
13781427func (c * Cluster ) deleteResourceGroup (ctx context.Context , resourceGroup string ) error {
0 commit comments