summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJakob Borg <jakob@kastelo.net>2023-01-19 11:15:18 +0100
committerGitHub <noreply@github.com>2023-01-19 11:15:18 +0100
commitabdac2caa2866ab1aea79aae4090c50e858b6f51 (patch)
tree18a5d7b9a49c87043299896f11f3880f97deb7da
parent5f1e27bb7fc1108639db530fb012a3e147a65039 (diff)
Handle relay connect timeout (fixes #8749) (#8755)
This makes sure the service manager doesn't interpret timeout errors, or any other error, as a signal to stop the service instead of restarting it. I added it directly to our service utility function, as it may help catch other instances of the same problem... We would typically want timeouts etc to be a retryable error, unless it is the top level context that has timed out and we check for that specifically.
-rw-r--r--lib/svcutil/svcutil.go24
1 files changed, 22 insertions, 2 deletions
diff --git a/lib/svcutil/svcutil.go b/lib/svcutil/svcutil.go
index 41dd970a10..f9d5684b00 100644
--- a/lib/svcutil/svcutil.go
+++ b/lib/svcutil/svcutil.go
@@ -122,7 +122,12 @@ func (s *service) Serve(ctx context.Context) error {
s.err = nil
s.mut.Unlock()
- err := s.serve(ctx)
+ // The error returned by serve() may well be a network timeout, which as
+ // of Go 1.19 is a context.DeadlineExceeded, which Suture interprets as
+ // a signal to stop the service instead of restarting it. This typically
+ // isn't what we want, so we make sure to remove the context specific
+ // error types unless *our* context is actually cancelled.
+ err := asNonContextError(ctx, s.serve(ctx))
s.mut.Lock()
s.err = err
@@ -139,7 +144,6 @@ func (s *service) Error() error {
func (s *service) String() string {
return fmt.Sprintf("Service@%p created by %v", s, s.creator)
-
}
type doneService func()
@@ -203,3 +207,19 @@ func infoEventHook(l logger.Logger) suture.EventHook {
}
}
}
+
+// asNonContextError returns err, except if it is context.Canceled or
+// context.DeadlineExceeded in which case the error will be a simple string
+// representation instead. The given context is checked for cancellation,
+// and if it is cancelled then that error is returned instead of err.
+func asNonContextError(ctx context.Context, err error) error {
+ select {
+ case <-ctx.Done():
+ return ctx.Err()
+ default:
+ }
+ if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {
+ return fmt.Errorf("%s (non-context)", err.Error())
+ }
+ return err
+}