From 447108fed2842e264897659856e9fd9cdc32ca23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Erik=20Pedersen?= Date: Fri, 17 May 2024 17:06:47 +0200 Subject: Add a HTTP cache for remote resources. Fixes #12502 Closes #11891 --- resources/resource_cache.go | 6 + resources/resource_factories/create/create.go | 74 ++++- .../create/create_integration_test.go | 3 +- resources/resource_factories/create/remote.go | 364 +++++++++++++-------- resources/resource_factories/create/remote_test.go | 18 +- resources/resource_spec.go | 19 +- 6 files changed, 334 insertions(+), 150 deletions(-) (limited to 'resources') diff --git a/resources/resource_cache.go b/resources/resource_cache.go index bf930c71d..a3ba9aa26 100644 --- a/resources/resource_cache.go +++ b/resources/resource_cache.go @@ -36,6 +36,11 @@ func newResourceCache(rs *Spec, memCache *dynacache.Cache) *ResourceCache { "/res1", dynacache.OptionsPartition{ClearWhen: dynacache.ClearOnChange, Weight: 40}, ), + CacheResourceRemote: dynacache.GetOrCreatePartition[string, resource.Resource]( + memCache, + "/resr", + dynacache.OptionsPartition{ClearWhen: dynacache.ClearOnChange, Weight: 40}, + ), cacheResources: dynacache.GetOrCreatePartition[string, resource.Resources]( memCache, "/ress", @@ -53,6 +58,7 @@ type ResourceCache struct { sync.RWMutex cacheResource *dynacache.Partition[string, resource.Resource] + CacheResourceRemote *dynacache.Partition[string, resource.Resource] cacheResources *dynacache.Partition[string, resource.Resources] cacheResourceTransformation *dynacache.Partition[string, *resourceAdapterInner] diff --git a/resources/resource_factories/create/create.go b/resources/resource_factories/create/create.go index 4725cf390..35a1fb59d 100644 --- a/resources/resource_factories/create/create.go +++ b/resources/resource_factories/create/create.go @@ -23,6 +23,9 @@ import ( "strings" "time" + "github.com/bep/logg" + "github.com/gohugoio/httpcache" + hhttpcache "github.com/gohugoio/hugo/cache/httpcache" "github.com/gohugoio/hugo/helpers" "github.com/gohugoio/hugo/hugofs/glob" "github.com/gohugoio/hugo/identity" @@ -31,7 +34,9 @@ import ( "github.com/gohugoio/hugo/cache/dynacache" "github.com/gohugoio/hugo/cache/filecache" + "github.com/gohugoio/hugo/common/hcontext" "github.com/gohugoio/hugo/common/hugio" + "github.com/gohugoio/hugo/common/tasks" "github.com/gohugoio/hugo/resources" "github.com/gohugoio/hugo/resources/resource" ) @@ -39,19 +44,76 @@ import ( // Client contains methods to create Resource objects. // tasks to Resource objects. type Client struct { - rs *resources.Spec - httpClient *http.Client - cacheGetResource *filecache.Cache + rs *resources.Spec + httpClient *http.Client + httpCacheConfig hhttpcache.ConfigCompiled + cacheGetResource *filecache.Cache + resourceIDDispatcher hcontext.ContextDispatcher[string] + + // Set when watching. + remoteResourceChecker *tasks.RunEvery + remoteResourceLogger logg.LevelLogger } +type contextKey string + // New creates a new Client with the given specification. func New(rs *resources.Spec) *Client { + fileCache := rs.FileCaches.GetResourceCache() + resourceIDDispatcher := hcontext.NewContextDispatcher[string](contextKey("resourceID")) + httpCacheConfig := rs.Cfg.GetConfigSection("httpCacheCompiled").(hhttpcache.ConfigCompiled) + var remoteResourceChecker *tasks.RunEvery + if rs.Cfg.Watching() && !httpCacheConfig.IsPollingDisabled() { + remoteResourceChecker = &tasks.RunEvery{ + HandleError: func(name string, err error) { + rs.Logger.Warnf("Failed to check remote resource: %s", err) + }, + RunImmediately: false, + } + + if err := remoteResourceChecker.Start(); err != nil { + panic(err) + } + + rs.BuildClosers.Add(remoteResourceChecker) + } + + httpTimeout := 2 * time.Minute // Need to cover retries. + if httpTimeout < (rs.Cfg.Timeout() + 30*time.Second) { + httpTimeout = rs.Cfg.Timeout() + 30*time.Second + } + return &Client{ - rs: rs, + rs: rs, + httpCacheConfig: httpCacheConfig, + resourceIDDispatcher: resourceIDDispatcher, + remoteResourceChecker: remoteResourceChecker, + remoteResourceLogger: rs.Logger.InfoCommand("remote"), httpClient: &http.Client{ - Timeout: time.Minute, + Timeout: httpTimeout, + Transport: &httpcache.Transport{ + Cache: fileCache.AsHTTPCache(), + CacheKey: func(req *http.Request) string { + return resourceIDDispatcher.Get(req.Context()) + }, + Around: func(req *http.Request, key string) func() { + return fileCache.NamedLock(key) + }, + AlwaysUseCachedResponse: func(req *http.Request, key string) bool { + return !httpCacheConfig.For(req.URL.String()) + }, + ShouldCache: func(req *http.Request, resp *http.Response, key string) bool { + return shouldCache(resp.StatusCode) + }, + MarkCachedResponses: true, + EnableETagPair: true, + Transport: &transport{ + Cfg: rs.Cfg, + Logger: rs.Logger, + }, + }, }, - cacheGetResource: rs.FileCaches.GetResourceCache(), + cacheGetResource: fileCache, } } diff --git a/resources/resource_factories/create/create_integration_test.go b/resources/resource_factories/create/create_integration_test.go index 61bc17adb..17084574d 100644 --- a/resources/resource_factories/create/create_integration_test.go +++ b/resources/resource_factories/create/create_integration_test.go @@ -134,8 +134,7 @@ mediaTypes = ['text/plain'] // This is hard to get stable on GitHub Actions, it sometimes succeeds due to timing issues. if err != nil { b.AssertLogContains("Got Err") - b.AssertLogContains("Retry timeout") - b.AssertLogContains("ContentLength:0") + b.AssertLogContains("retry timeout") } }) } diff --git a/resources/resource_factories/create/remote.go b/resources/resource_factories/create/remote.go index c2d17e7a5..ef8078228 100644 --- a/resources/resource_factories/create/remote.go +++ b/resources/resource_factories/create/remote.go @@ -14,22 +14,27 @@ package create import ( - "bufio" "bytes" + "context" "fmt" "io" "math/rand" "mime" "net/http" - "net/http/httputil" "net/url" "path" "strings" "time" + gmaps "maps" + + "github.com/gohugoio/httpcache" "github.com/gohugoio/hugo/common/hugio" + "github.com/gohugoio/hugo/common/loggers" "github.com/gohugoio/hugo/common/maps" + "github.com/gohugoio/hugo/common/tasks" "github.com/gohugoio/hugo/common/types" + "github.com/gohugoio/hugo/config" "github.com/gohugoio/hugo/identity" "github.com/gohugoio/hugo/media" "github.com/gohugoio/hugo/resources" @@ -92,6 +97,60 @@ var temporaryHTTPStatusCodes = map[int]bool{ 504: true, } +func (c *Client) configurePollingIfEnabled(uri, optionsKey string, getRes func() (*http.Response, error)) { + if c.remoteResourceChecker == nil { + return + } + + // Set up polling for changes to this resource. + pollingConfig := c.httpCacheConfig.PollConfigFor(uri) + if pollingConfig.IsZero() || pollingConfig.Config.Disable { + return + } + + if c.remoteResourceChecker.Has(optionsKey) { + return + } + + var lastChange time.Time + c.remoteResourceChecker.Add(optionsKey, + tasks.Func{ + IntervalLow: pollingConfig.Config.Low, + IntervalHigh: pollingConfig.Config.High, + F: func(interval time.Duration) (time.Duration, error) { + start := time.Now() + defer func() { + duration := time.Since(start) + c.rs.Logger.Debugf("Polled remote resource for changes in %13s. Interval: %4s (low: %4s high: %4s) resource: %q ", duration, interval, pollingConfig.Config.Low, pollingConfig.Config.High, uri) + }() + // TODO(bep) figure out a ways to remove unused tasks. + res, err := getRes() + if err != nil { + return pollingConfig.Config.High, err + } + // The caching is delayed until the body is read. + io.Copy(io.Discard, res.Body) + res.Body.Close() + x1, x2 := res.Header.Get(httpcache.XETag1), res.Header.Get(httpcache.XETag2) + if x1 != x2 { + lastChange = time.Now() + c.remoteResourceLogger.Logf("detected change in remote resource %q", uri) + c.rs.Rebuilder.SignalRebuild(identity.StringIdentity(optionsKey)) + } + + if time.Since(lastChange) < 10*time.Second { + // The user is typing, check more often. + return 0, nil + } + + // Increase the interval to avoid hammering the server. + interval += 1 * time.Second + + return interval, nil + }, + }) +} + // FromRemote expects one or n-parts of a URL to a resource // If you provide multiple parts they will be joined together to the final URL. func (c *Client) FromRemote(uri string, optionsm map[string]any) (resource.Resource, error) { @@ -101,168 +160,139 @@ func (c *Client) FromRemote(uri string, optionsm map[string]any) (resource.Resou } method := "GET" - if s, ok := maps.LookupEqualFold(optionsm, "method"); ok { + if s, _, ok := maps.LookupEqualFold(optionsm, "method"); ok { method = strings.ToUpper(s.(string)) } isHeadMethod := method == "HEAD" - resourceID := calculateResourceID(uri, optionsm) + optionsm = gmaps.Clone(optionsm) + userKey, optionsKey := remoteResourceKeys(uri, optionsm) + + // A common pattern is to use the key in the options map as + // a way to control cache eviction, + // so make sure we use any user provided kehy as the file cache key, + // but the auto generated and more stable key for everything else. + filecacheKey := userKey - _, httpResponse, err := c.cacheGetResource.GetOrCreate(resourceID, func() (io.ReadCloser, error) { + return c.rs.ResourceCache.CacheResourceRemote.GetOrCreate(optionsKey, func(key string) (resource.Resource, error) { options, err := decodeRemoteOptions(optionsm) if err != nil { return nil, fmt.Errorf("failed to decode options for resource %s: %w", uri, err) } + if err := c.validateFromRemoteArgs(uri, options); err != nil { return nil, err } - var ( - start time.Time - nextSleep = time.Duration((rand.Intn(1000) + 100)) * time.Millisecond - nextSleepLimit = time.Duration(5) * time.Second - ) + getRes := func() (*http.Response, error) { + ctx := context.Background() + ctx = c.resourceIDDispatcher.Set(ctx, filecacheKey) - for { - b, retry, err := func() ([]byte, bool, error) { - req, err := options.NewRequest(uri) - if err != nil { - return nil, false, fmt.Errorf("failed to create request for resource %s: %w", uri, err) - } - - res, err := c.httpClient.Do(req) - if err != nil { - return nil, false, err - } - defer res.Body.Close() - - if res.StatusCode != http.StatusNotFound { - if res.StatusCode < 200 || res.StatusCode > 299 { - return nil, temporaryHTTPStatusCodes[res.StatusCode], toHTTPError(fmt.Errorf("failed to fetch remote resource: %s", http.StatusText(res.StatusCode)), res, !isHeadMethod) - } - } - - b, err := httputil.DumpResponse(res, true) - if err != nil { - return nil, false, toHTTPError(err, res, !isHeadMethod) - } - - return b, false, nil - }() + req, err := options.NewRequest(uri) if err != nil { - if retry { - if start.IsZero() { - start = time.Now() - } else if d := time.Since(start) + nextSleep; d >= c.rs.Cfg.Timeout() { - c.rs.Logger.Errorf("Retry timeout (configured to %s) fetching remote resource.", c.rs.Cfg.Timeout()) - return nil, err - } - time.Sleep(nextSleep) - if nextSleep < nextSleepLimit { - nextSleep *= 2 - } - continue - } - return nil, err + return nil, fmt.Errorf("failed to create request for resource %s: %w", uri, err) } - return hugio.ToReadCloser(bytes.NewReader(b)), nil + req = req.WithContext(ctx) + return c.httpClient.Do(req) } - }) - if err != nil { - return nil, err - } - defer httpResponse.Close() - - res, err := http.ReadResponse(bufio.NewReader(httpResponse), nil) - if err != nil { - return nil, err - } - defer res.Body.Close() - if res.StatusCode == http.StatusNotFound { - // Not found. This matches how looksup for local resources work. - return nil, nil - } - - var ( - body []byte - mediaType media.Type - ) - // A response to a HEAD method should not have a body. If it has one anyway, that body must be ignored. - // See https://developer.mozilla.org/en-US/docs/Web/HTTP/Methods/HEAD - if !isHeadMethod && res.Body != nil { - body, err = io.ReadAll(res.Body) + res, err := getRes() if err != nil { - return nil, fmt.Errorf("failed to read remote resource %q: %w", uri, err) + return nil, err } - } + defer res.Body.Close() - filename := path.Base(rURL.Path) - if _, params, _ := mime.ParseMediaType(res.Header.Get("Content-Disposition")); params != nil { - if _, ok := params["filename"]; ok { - filename = params["filename"] + c.configurePollingIfEnabled(uri, optionsKey, getRes) + + if res.StatusCode == http.StatusNotFound { + // Not found. This matches how lookups for local resources work. + return nil, nil } - } - contentType := res.Header.Get("Content-Type") + if res.StatusCode < 200 || res.StatusCode > 299 { + return nil, toHTTPError(fmt.Errorf("failed to fetch remote resource: %s", http.StatusText(res.StatusCode)), res, !isHeadMethod) + } - // For HEAD requests we have no body to work with, so we need to use the Content-Type header. - if isHeadMethod || c.rs.ExecHelper.Sec().HTTP.MediaTypes.Accept(contentType) { - var found bool - mediaType, found = c.rs.MediaTypes().GetByType(contentType) - if !found { - // A media type not configured in Hugo, just create one from the content type string. - mediaType, _ = media.FromString(contentType) + var ( + body []byte + mediaType media.Type + ) + // A response to a HEAD method should not have a body. If it has one anyway, that body must be ignored. + // See https://developer.mozilla.org/en-US/docs/Web/HTTP/Methods/HEAD + if !isHeadMethod && res.Body != nil { + body, err = io.ReadAll(res.Body) + if err != nil { + return nil, fmt.Errorf("failed to read remote resource %q: %w", uri, err) + } } - } - if mediaType.IsZero() { + filename := path.Base(rURL.Path) + if _, params, _ := mime.ParseMediaType(res.Header.Get("Content-Disposition")); params != nil { + if _, ok := params["filename"]; ok { + filename = params["filename"] + } + } - var extensionHints []string + contentType := res.Header.Get("Content-Type") - // mime.ExtensionsByType gives a long list of extensions for text/plain, - // just use ".txt". - if strings.HasPrefix(contentType, "text/plain") { - extensionHints = []string{".txt"} - } else { - exts, _ := mime.ExtensionsByType(contentType) - if exts != nil { - extensionHints = exts + // For HEAD requests we have no body to work with, so we need to use the Content-Type header. + if isHeadMethod || c.rs.ExecHelper.Sec().HTTP.MediaTypes.Accept(contentType) { + var found bool + mediaType, found = c.rs.MediaTypes().GetByType(contentType) + if !found { + // A media type not configured in Hugo, just create one from the content type string. + mediaType, _ = media.FromString(contentType) } } - // Look for a file extension. If it's .txt, look for a more specific. - if extensionHints == nil || extensionHints[0] == ".txt" { - if ext := path.Ext(filename); ext != "" { - extensionHints = []string{ext} + if mediaType.IsZero() { + + var extensionHints []string + + // mime.ExtensionsByType gives a long list of extensions for text/plain, + // just use ".txt". + if strings.HasPrefix(contentType, "text/plain") { + extensionHints = []string{".txt"} + } else { + exts, _ := mime.ExtensionsByType(contentType) + if exts != nil { + extensionHints = exts + } } - } - // Now resolve the media type primarily using the content. - mediaType = media.FromContent(c.rs.MediaTypes(), extensionHints, body) + // Look for a file extension. If it's .txt, look for a more specific. + if extensionHints == nil || extensionHints[0] == ".txt" { + if ext := path.Ext(filename); ext != "" { + extensionHints = []string{ext} + } + } - } + // Now resolve the media type primarily using the content. + mediaType = media.FromContent(c.rs.MediaTypes(), extensionHints, body) - if mediaType.IsZero() { - return nil, fmt.Errorf("failed to resolve media type for remote resource %q", uri) - } + } - resourceID = filename[:len(filename)-len(path.Ext(filename))] + "_" + resourceID + mediaType.FirstSuffix.FullSuffix - data := responseToData(res, false) - - return c.rs.NewResource( - resources.ResourceSourceDescriptor{ - MediaType: mediaType, - Data: data, - GroupIdentity: identity.StringIdentity(resourceID), - LazyPublish: true, - OpenReadSeekCloser: func() (hugio.ReadSeekCloser, error) { - return hugio.NewReadSeekerNoOpCloser(bytes.NewReader(body)), nil - }, - TargetPath: resourceID, - }) + if mediaType.IsZero() { + return nil, fmt.Errorf("failed to resolve media type for remote resource %q", uri) + } + + userKey = filename[:len(filename)-len(path.Ext(filename))] + "_" + userKey + mediaType.FirstSuffix.FullSuffix + data := responseToData(res, false) + + return c.rs.NewResource( + resources.ResourceSourceDescriptor{ + MediaType: mediaType, + Data: data, + GroupIdentity: identity.StringIdentity(optionsKey), + LazyPublish: true, + OpenReadSeekCloser: func() (hugio.ReadSeekCloser, error) { + return hugio.NewReadSeekerNoOpCloser(bytes.NewReader(body)), nil + }, + TargetPath: userKey, + }) + }) } func (c *Client) validateFromRemoteArgs(uri string, options fromRemoteOptions) error { @@ -277,11 +307,17 @@ func (c *Client) validateFromRemoteArgs(uri string, options fromRemoteOptions) e return nil } -func calculateResourceID(uri string, optionsm map[string]any) string { - if key, found := maps.LookupEqualFold(optionsm, "key"); found { - return identity.HashString(key) +func remoteResourceKeys(uri string, optionsm map[string]any) (string, string) { + var userKey string + if key, k, found := maps.LookupEqualFold(optionsm, "key"); found { + userKey = identity.HashString(key) + delete(optionsm, k) + } + optionsKey := identity.HashString(uri, optionsm) + if userKey == "" { + userKey = optionsKey } - return identity.HashString(uri, optionsm) + return userKey, optionsKey } func addDefaultHeaders(req *http.Request) { @@ -350,3 +386,71 @@ func decodeRemoteOptions(optionsm map[string]any) (fromRemoteOptions, error) { return options, nil } + +var _ http.RoundTripper = (*transport)(nil) + +type transport struct { + Cfg config.AllProvider + Logger loggers.Logger +} + +func (t *transport) RoundTrip(req *http.Request) (resp *http.Response, err error) { + defer func() { + if resp != nil && resp.StatusCode != http.StatusNotFound && resp.StatusCode != http.StatusNotModified { + t.Logger.Debugf("Fetched remote resource: %s", req.URL.String()) + } + }() + + var ( + start time.Time + nextSleep = time.Duration((rand.Intn(1000) + 100)) * time.Millisecond + nextSleepLimit = time.Duration(5) * time.Second + retry bool + ) + + for { + resp, retry, err = func() (*http.Response, bool, error) { + resp2, err := http.DefaultTransport.RoundTrip(req) + if err != nil { + return resp2, false, err + } + + if resp2.StatusCode != http.StatusNotFound && resp2.StatusCode != http.StatusNotModified { + if resp2.StatusCode < 200 || resp2.StatusCode > 299 { + return resp2, temporaryHTTPStatusCodes[resp2.StatusCode], nil + } + } + return resp2, false, nil + }() + + if retry { + if start.IsZero() { + start = time.Now() + } else if d := time.Since(start) + nextSleep; d >= t.Cfg.Timeout() { + msg := "" + if resp != nil { + msg = resp.Status + } + err := toHTTPError(fmt.Errorf("retry timeout (configured to %s) fetching remote resource: %s", t.Cfg.Timeout(), msg), resp, req.Method != "HEAD") + return resp, err + } + time.Sleep(nextSleep) + if nextSleep < nextSleepLimit { + nextSleep *= 2 + } + continue + } + + return + } +} + +// We need to send the redirect responses back to the HTTP client from RoundTrip, +// but we don't want to cache them. +func shouldCache(statusCode int) bool { + switch statusCode { + case http.StatusMovedPermanently, http.StatusFound, http.StatusSeeOther, http.StatusTemporaryRedirect, http.StatusPermanentRedirect: + return false + } + return true +} diff --git a/resources/resource_factories/create/remote_test.go b/resources/resource_factories/create/remote_test.go index 21314ad34..49d0b1541 100644 --- a/resources/resource_factories/create/remote_test.go +++ b/resources/resource_factories/create/remote_test.go @@ -115,15 +115,21 @@ func TestOptionsNewRequest(t *testing.T) { c.Assert(req.Header["User-Agent"], qt.DeepEquals, []string{"foo"}) } -func TestCalculateResourceID(t *testing.T) { +func TestRemoteResourceKeys(t *testing.T) { t.Parallel() c := qt.New(t) - c.Assert(calculateResourceID("foo", nil), qt.Equals, "5917621528921068675") - c.Assert(calculateResourceID("foo", map[string]any{"bar": "baz"}), qt.Equals, "7294498335241413323") + check := func(uri string, optionsm map[string]any, expect1, expect2 string) { + got1, got2 := remoteResourceKeys(uri, optionsm) + c.Assert(got1, qt.Equals, expect1) + c.Assert(got2, qt.Equals, expect2) + } - c.Assert(calculateResourceID("foo", map[string]any{"key": "1234", "bar": "baz"}), qt.Equals, "14904296279238663669") - c.Assert(calculateResourceID("asdf", map[string]any{"key": "1234", "bar": "asdf"}), qt.Equals, "14904296279238663669") - c.Assert(calculateResourceID("asdf", map[string]any{"key": "12345", "bar": "asdf"}), qt.Equals, "12191037851845371770") + check("foo", nil, "5917621528921068675", "5917621528921068675") + check("foo", map[string]any{"bar": "baz"}, "7294498335241413323", "7294498335241413323") + check("foo", map[string]any{"key": "1234", "bar": "baz"}, "14904296279238663669", "7294498335241413323") + check("foo", map[string]any{"key": "12345", "bar": "baz"}, "12191037851845371770", "7294498335241413323") + check("asdf", map[string]any{"key": "1234", "bar": "asdf"}, "14904296279238663669", "3787889110563790121") + check("asdf", map[string]any{"key": "12345", "bar": "asdf"}, "12191037851845371770", "3787889110563790121") } diff --git a/resources/resource_spec.go b/resources/resource_spec.go index 644259e48..ef76daa1a 100644 --- a/resources/resource_spec.go +++ b/resources/resource_spec.go @@ -29,6 +29,7 @@ import ( "github.com/gohugoio/hugo/common/hexec" "github.com/gohugoio/hugo/common/loggers" "github.com/gohugoio/hugo/common/paths" + "github.com/gohugoio/hugo/common/types" "github.com/gohugoio/hugo/identity" @@ -53,6 +54,8 @@ func NewSpec( logger loggers.Logger, errorHandler herrors.ErrorSender, execHelper *hexec.Exec, + buildClosers types.CloseAdder, + rebuilder identity.SignalRebuilder, ) (*Spec, error) { conf := s.Cfg.GetConfig().(*allconfig.Config) imgConfig := conf.Imaging @@ -87,10 +90,12 @@ func NewSpec( } rs := &Spec{ - PathSpec: s, - Logger: logger, - ErrorSender: errorHandler, - imaging: imaging, + PathSpec: s, + Logger: logger, + ErrorSender: errorHandler, + BuildClosers: buildClosers, + Rebuilder: rebuilder, + imaging: imaging, ImageCache: newImageCache( fileCaches.ImageCache(), memCache, @@ -111,8 +116,10 @@ func NewSpec( type Spec struct { *helpers.PathSpec - Logger loggers.Logger - ErrorSender herrors.ErrorSender + Logger loggers.Logger + ErrorSender herrors.ErrorSender + BuildClosers types.CloseAdder + Rebuilder identity.SignalRebuilder TextTemplates tpl.TemplateParseFinder -- cgit v1.2.3