From 94889b727d9b87b04eac9392384e85f32bd8ee39 Mon Sep 17 00:00:00 2001 From: Jeff Berger Date: Tue, 14 Feb 2023 13:10:35 -0800 Subject: [PATCH] Revert panic on auth failure during state storage --- statemachine/statemachine.go | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/statemachine/statemachine.go b/statemachine/statemachine.go index 26fb2c6..8d3f14a 100644 --- a/statemachine/statemachine.go +++ b/statemachine/statemachine.go @@ -6,7 +6,7 @@ import ( "runtime" "sync" "time" - "strings" + "github.com/lytics/metafora" ) @@ -371,16 +371,8 @@ func (s *stateMachine) Run() (done bool) { metafora.Infof("task=%q transitioning %s --> %s --> %s", tid, state, msg, newstate) // Save state - if err := s.ss.Store(s.task, newstate); err != nil { - // After upgrading to 1.25.5-gke.2000 we started experiencing the metadata server throwing POD_FINDER_IP_MISMATCH - // errors resulting in failures authenticating to spanner. This panic will cause the pod to cyle - // See https://github.com/lytics/lio/issues/30414 - if strings.Contains(err.Error(), "spanner: code = \"Unauthenticated\"") { - metafora.Errorf("task=%q Unable to persist state=%q due to failure to authenticate to spanner.", tid, newstate.Code) - panic(err) - } - - metafora.Errorf("task=%q Unable to persist state=%q. Continuing.", tid, newstate.Code) + if err := s.ss.Store(s.task, newstate); err != nil { + metafora.Errorf("task=%q Unable to persist state=%q. Unscheduling.", tid, newstate.Code) return true }