diff options
author | Jakob Borg <jakob@kastelo.net> | 2023-01-19 11:15:18 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-01-19 11:15:18 +0100 |
commit | abdac2caa2866ab1aea79aae4090c50e858b6f51 (patch) | |
tree | 18a5d7b9a49c87043299896f11f3880f97deb7da | |
parent | 5f1e27bb7fc1108639db530fb012a3e147a65039 (diff) |
Handle relay connect timeout (fixes #8749) (#8755)
This makes sure the service manager doesn't interpret timeout errors, or any other error, as a signal to stop the service instead of restarting it.
I added it directly to our service utility function, as it may help catch other instances of the same problem... We would typically want timeouts etc to be a retryable error, unless it is the top level context that has timed out and we check for that specifically.
-rw-r--r-- | lib/svcutil/svcutil.go | 24 |
1 files changed, 22 insertions, 2 deletions
diff --git a/lib/svcutil/svcutil.go b/lib/svcutil/svcutil.go index 41dd970a10..f9d5684b00 100644 --- a/lib/svcutil/svcutil.go +++ b/lib/svcutil/svcutil.go @@ -122,7 +122,12 @@ func (s *service) Serve(ctx context.Context) error { s.err = nil s.mut.Unlock() - err := s.serve(ctx) + // The error returned by serve() may well be a network timeout, which as + // of Go 1.19 is a context.DeadlineExceeded, which Suture interprets as + // a signal to stop the service instead of restarting it. This typically + // isn't what we want, so we make sure to remove the context specific + // error types unless *our* context is actually cancelled. + err := asNonContextError(ctx, s.serve(ctx)) s.mut.Lock() s.err = err @@ -139,7 +144,6 @@ func (s *service) Error() error { func (s *service) String() string { return fmt.Sprintf("Service@%p created by %v", s, s.creator) - } type doneService func() @@ -203,3 +207,19 @@ func infoEventHook(l logger.Logger) suture.EventHook { } } } + +// asNonContextError returns err, except if it is context.Canceled or +// context.DeadlineExceeded in which case the error will be a simple string +// representation instead. The given context is checked for cancellation, +// and if it is cancelled then that error is returned instead of err. +func asNonContextError(ctx context.Context, err error) error { + select { + case <-ctx.Done(): + return ctx.Err() + default: + } + if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) { + return fmt.Errorf("%s (non-context)", err.Error()) + } + return err +} |