From 7285e74090852b5d52f25e577850fa75f4aa8573 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Erik=20Pedersen?= Date: Sun, 24 Dec 2023 19:11:05 +0100 Subject: all: Rework page store, add a dynacache, improve partial rebuilds, and some general spring cleaning There are some breaking changes in this commit, see #11455. Closes #11455 Closes #11549 This fixes a set of bugs (see issue list) and it is also paying some technical debt accumulated over the years. We now build with Staticcheck enabled in the CI build. The performance should be about the same as before for regular sized Hugo sites, but it should perform and scale much better to larger data sets, as objects that uses lots of memory (e.g. rendered Markdown, big JSON files read into maps with transform.Unmarshal etc.) will now get automatically garbage collected if needed. Performance on partial rebuilds when running the server in fast render mode should be the same, but the change detection should be much more accurate. A list of the notable new features: * A new dependency tracker that covers (almost) all of Hugo's API and is used to do fine grained partial rebuilds when running the server. * A new and simpler tree document store which allows fast lookups and prefix-walking in all dimensions (e.g. language) concurrently. * You can now configure an upper memory limit allowing for much larger data sets and/or running on lower specced PCs. We have lifted the "no resources in sub folders" restriction for branch bundles (e.g. sections). Memory Limit * Hugos will, by default, set aside a quarter of the total system memory, but you can set this via the OS environment variable HUGO_MEMORYLIMIT (in gigabytes). This is backed by a partitioned LRU cache used throughout Hugo. A cache that gets dynamically resized in low memory situations, allowing Go's Garbage Collector to free the memory. New Dependency Tracker: Hugo has had a rule based coarse grained approach to server rebuilds that has worked mostly pretty well, but there have been some surprises (e.g. stale content). This is now revamped with a new dependency tracker that can quickly calculate the delta given a changed resource (e.g. a content file, template, JS file etc.). This handles transitive relations, e.g. $page -> js.Build -> JS import, or $page1.Content -> render hook -> site.GetPage -> $page2.Title, or $page1.Content -> shortcode -> partial -> site.RegularPages -> $page2.Content -> shortcode ..., and should also handle changes to aggregated values (e.g. site.Lastmod) effectively. This covers all of Hugo's API with 2 known exceptions (a list that may not be fully exhaustive): Changes to files loaded with template func os.ReadFile may not be handled correctly. We recommend loading resources with resources.Get Changes to Hugo objects (e.g. Page) passed in the template context to lang.Translate may not be detected correctly. We recommend having simple i18n templates without too much data context passed in other than simple types such as strings and numbers. Note that the cachebuster configuration (when A changes then rebuild B) works well with the above, but we recommend that you revise that configuration, as it in most situations should not be needed. One example where it is still needed is with TailwindCSS and using changes to hugo_stats.json to trigger new CSS rebuilds. Document Store: Previously, a little simplified, we split the document store (where we store pages and resources) in a tree per language. This worked pretty well, but the structure made some operations harder than they needed to be. We have now restructured it into one Radix tree for all languages. Internally the language is considered to be a dimension of that tree, and the tree can be viewed in all dimensions concurrently. This makes some operations re. language simpler (e.g. finding translations is just a slice range), but the idea is that it should also be relatively inexpensive to add more dimensions if needed (e.g. role). Fixes #10169 Fixes #10364 Fixes #10482 Fixes #10630 Fixes #10656 Fixes #10694 Fixes #10918 Fixes #11262 Fixes #11439 Fixes #11453 Fixes #11457 Fixes #11466 Fixes #11540 Fixes #11551 Fixes #11556 Fixes #11654 Fixes #11661 Fixes #11663 Fixes #11664 Fixes #11669 Fixes #11671 Fixes #11807 Fixes #11808 Fixes #11809 Fixes #11815 Fixes #11840 Fixes #11853 Fixes #11860 Fixes #11883 Fixes #11904 Fixes #7388 Fixes #7425 Fixes #7436 Fixes #7544 Fixes #7882 Fixes #7960 Fixes #8255 Fixes #8307 Fixes #8863 Fixes #8927 Fixes #9192 Fixes #9324 --- hugolib/content_map_page.go | 2351 +++++++++++++++++++++++++++++-------------- 1 file changed, 1600 insertions(+), 751 deletions(-) (limited to 'hugolib/content_map_page.go') diff --git a/hugolib/content_map_page.go b/hugolib/content_map_page.go index 2c14ffa59..536f23ccd 100644 --- a/hugolib/content_map_page.go +++ b/hugolib/content_map_page.go @@ -1,4 +1,4 @@ -// Copyright 2019 The Hugo Authors. All rights reserved. +// Copyright 2024 The Hugo Authors. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,1025 +16,1874 @@ package hugolib import ( "context" "fmt" + "io" "path" - "path/filepath" + "sort" + "strconv" "strings" - "sync" - - "github.com/gohugoio/hugo/common/maps" - + "sync/atomic" + "time" + + "github.com/bep/logg" + "github.com/gohugoio/hugo/cache/dynacache" + "github.com/gohugoio/hugo/common/loggers" + "github.com/gohugoio/hugo/common/paths" + "github.com/gohugoio/hugo/common/predicate" + "github.com/gohugoio/hugo/common/rungroup" "github.com/gohugoio/hugo/common/types" + "github.com/gohugoio/hugo/hugofs/files" + "github.com/gohugoio/hugo/hugolib/doctree" + "github.com/gohugoio/hugo/identity" + "github.com/gohugoio/hugo/output" "github.com/gohugoio/hugo/resources" + "github.com/spf13/cast" + + "github.com/gohugoio/hugo/common/maps" - "github.com/gohugoio/hugo/common/hugio" - "github.com/gohugoio/hugo/hugofs" - "github.com/gohugoio/hugo/hugofs/files" - "github.com/gohugoio/hugo/parser/pageparser" "github.com/gohugoio/hugo/resources/kinds" "github.com/gohugoio/hugo/resources/page" "github.com/gohugoio/hugo/resources/resource" - "github.com/spf13/cast" - - "github.com/gohugoio/hugo/common/para" ) -func newPageMaps(h *HugoSites) *pageMaps { - mps := make([]*pageMap, len(h.Sites)) - for i, s := range h.Sites { - mps[i] = s.pageMap - } - return &pageMaps{ - workers: para.New(h.numWorkers), - pmaps: mps, - } +var pagePredicates = struct { + KindPage predicate.P[*pageState] + KindSection predicate.P[*pageState] + KindHome predicate.P[*pageState] + KindTerm predicate.P[*pageState] + ShouldListLocal predicate.P[*pageState] + ShouldListGlobal predicate.P[*pageState] + ShouldListAny predicate.P[*pageState] + ShouldLink predicate.P[page.Page] +}{ + KindPage: func(p *pageState) bool { + return p.Kind() == kinds.KindPage + }, + KindSection: func(p *pageState) bool { + return p.Kind() == kinds.KindSection + }, + KindHome: func(p *pageState) bool { + return p.Kind() == kinds.KindHome + }, + KindTerm: func(p *pageState) bool { + return p.Kind() == kinds.KindTerm + }, + ShouldListLocal: func(p *pageState) bool { + return p.m.shouldList(false) + }, + ShouldListGlobal: func(p *pageState) bool { + return p.m.shouldList(true) + }, + ShouldListAny: func(p *pageState) bool { + return p.m.shouldListAny() + }, + ShouldLink: func(p page.Page) bool { + return !p.(*pageState).m.noLink() + }, } type pageMap struct { + i int s *Site - *contentMap -} -func (m *pageMap) Len() int { - l := 0 - for _, t := range m.contentMap.pageTrees { - l += t.Len() - } - return l -} + // Main storage for all pages. + *pageTrees -func (m *pageMap) createMissingTaxonomyNodes() error { - if m.cfg.taxonomyDisabled { - return nil - } - m.taxonomyEntries.Walk(func(s string, v any) bool { - n := v.(*contentNode) - vi := n.viewInfo - k := cleanSectionTreeKey(vi.name.plural + "/" + vi.termKey) + // Used for simple page lookups by name, e.g. "mypage.md" or "mypage". + pageReverseIndex *contentTreeReverseIndex - if _, found := m.taxonomies.Get(k); !found { - vic := &contentBundleViewInfo{ - name: vi.name, - termKey: vi.termKey, - termOrigin: vi.termOrigin, - } - m.taxonomies.Insert(k, &contentNode{viewInfo: vic}) - } - return false - }) + cachePages *dynacache.Partition[string, page.Pages] + cacheResources *dynacache.Partition[string, resource.Resources] + cacheContentRendered *dynacache.Partition[string, *resources.StaleValue[contentSummary]] + cacheContentPlain *dynacache.Partition[string, *resources.StaleValue[contentPlainPlainWords]] + contentTableOfContents *dynacache.Partition[string, *resources.StaleValue[contentTableOfContents]] + cacheContentSource *dynacache.Partition[string, *resources.StaleValue[[]byte]] - return nil + cfg contentMapConfig } -func (m *pageMap) newPageFromContentNode(n *contentNode, parentBucket *pagesMapBucket, owner *pageState) (*pageState, error) { - if n.fi == nil { - panic("FileInfo must (currently) be set") - } +// pageTrees holds pages and resources in a tree structure for all sites/languages. +// Eeach site gets its own tree set via the Shape method. +type pageTrees struct { + // This tree contains all Pages. + // This include regular pages, sections, taxonimies and so on. + // Note that all of these trees share the same key structure, + // so you can take a leaf Page key and do a prefix search + // with key + "/" to get all of its resources. + treePages *doctree.NodeShiftTree[contentNodeI] - f, err := newFileInfo(m.s.SourceSpec, n.fi) - if err != nil { - return nil, err - } + // This tree contains Resoures bundled in pages. + treeResources *doctree.NodeShiftTree[contentNodeI] - meta := n.fi.Meta() - content := func() (hugio.ReadSeekCloser, error) { - return meta.Open() - } + // All pages and resources. + treePagesResources doctree.WalkableTrees[contentNodeI] - bundled := owner != nil - s := m.s + // This tree contains all taxonomy entries, e.g "/tags/blue/page1" + treeTaxonomyEntries *doctree.TreeShiftTree[*weightedContentNode] - sections := s.sectionsFromFile(f) + // A slice of the resource trees. + resourceTrees doctree.MutableTrees +} - kind := s.kindFromFileInfoOrSections(f, sections) - if kind == kinds.KindTerm { - s.PathSpec.MakePathsSanitized(sections) +// collectIdentities collects all identities from in all trees matching the given key. +// This will at most match in one tree, but may give identies from multiple dimensions (e.g. language). +func (t *pageTrees) collectIdentities(key string) []identity.Identity { + var ids []identity.Identity + if n := t.treePages.Get(key); n != nil { + n.ForEeachIdentity(func(id identity.Identity) bool { + ids = append(ids, id) + return false + }) } - - metaProvider := &pageMeta{kind: kind, sections: sections, bundled: bundled, s: s, f: f} - - ps, err := newPageBase(metaProvider) - if err != nil { - return nil, err + if n := t.treeResources.Get(key); n != nil { + n.ForEeachIdentity(func(id identity.Identity) bool { + ids = append(ids, id) + return false + }) } - if n.fi.Meta().IsRootFile { - // Make sure that the bundle/section we start walking from is always - // rendered. - // This is only relevant in server fast render mode. - ps.forceRender = true - } + return ids +} - n.p = ps - if ps.IsNode() { - ps.bucket = newPageBucket(ps) - } +// collectIdentitiesSurrounding collects all identities surrounding the given key. +func (t *pageTrees) collectIdentitiesSurrounding(key string, maxSamplesPerTree int) []identity.Identity { + // TODO1 test language coverage from this. + ids := t.collectIdentitiesSurroundingIn(key, maxSamplesPerTree, t.treePages) + ids = append(ids, t.collectIdentitiesSurroundingIn(key, maxSamplesPerTree, t.treeResources)...) + return ids +} - gi, err := s.h.gitInfoForPage(ps) - if err != nil { - return nil, fmt.Errorf("failed to load Git data: %w", err) +func (t *pageTrees) collectIdentitiesSurroundingIn(key string, maxSamples int, tree *doctree.NodeShiftTree[contentNodeI]) []identity.Identity { + var ids []identity.Identity + section, ok := tree.LongestPrefixAll(path.Dir(key)) + if ok { + count := 0 + prefix := section + "/" + level := strings.Count(prefix, "/") + tree.WalkPrefixRaw(prefix, func(s string, n contentNodeI) bool { + if level != strings.Count(s, "/") { + return true + } + n.ForEeachIdentity(func(id identity.Identity) bool { + ids = append(ids, id) + return false + }) + count++ + return count > maxSamples + }) } - ps.gitInfo = gi - owners, err := s.h.codeownersForPage(ps) - if err != nil { - return nil, fmt.Errorf("failed to load CODEOWNERS: %w", err) - } - ps.codeowners = owners + return ids +} - r, err := content() - if err != nil { - return nil, err +func (t *pageTrees) DeletePageAndResourcesBelow(ss ...string) { + commit1 := t.resourceTrees.Lock(true) + defer commit1() + commit2 := t.treePages.Lock(true) + defer commit2() + for _, s := range ss { + t.resourceTrees.DeletePrefix(paths.AddTrailingSlash(s)) + t.treePages.Delete(s) } - defer r.Close() +} - parseResult, err := pageparser.Parse( - r, - pageparser.Config{}, - ) - if err != nil { - return nil, err - } +// Shape shapes all trees in t to the given dimension. +func (t pageTrees) Shape(d, v int) *pageTrees { + t.treePages = t.treePages.Shape(d, v) + t.treeResources = t.treeResources.Shape(d, v) + t.treeTaxonomyEntries = t.treeTaxonomyEntries.Shape(d, v) - ps.pageContent = pageContent{ - source: rawPageContent{ - parsed: parseResult, - posMainContent: -1, - posSummaryEnd: -1, - posBodyStart: -1, - }, - } + return &t +} - if err := ps.mapContent(parentBucket, metaProvider); err != nil { - return nil, ps.wrapError(err) - } +var ( + _ resource.Identifier = pageMapQueryPagesInSection{} + _ resource.Identifier = pageMapQueryPagesBelowPath{} +) - if err := metaProvider.applyDefaultValues(n); err != nil { - return nil, err - } +type pageMapQueryPagesInSection struct { + pageMapQueryPagesBelowPath - ps.init.Add(func(context.Context) (any, error) { - pp, err := newPagePaths(s, ps, metaProvider) - if err != nil { - return nil, err - } + Recursive bool + IncludeSelf bool +} - outputFormatsForPage := ps.m.outputFormats() +func (q pageMapQueryPagesInSection) Key() string { + return "gagesInSection" + "/" + q.pageMapQueryPagesBelowPath.Key() + "/" + strconv.FormatBool(q.Recursive) + "/" + strconv.FormatBool(q.IncludeSelf) +} - // Prepare output formats for all sites. - // We do this even if this page does not get rendered on - // its own. It may be referenced via .Site.GetPage and - // it will then need an output format. - ps.pageOutputs = make([]*pageOutput, len(ps.s.h.renderFormats)) - created := make(map[string]*pageOutput) - shouldRenderPage := !ps.m.noRender() +// This needs to be hashable. +type pageMapQueryPagesBelowPath struct { + Path string - for i, f := range ps.s.h.renderFormats { - if po, found := created[f.Name]; found { - ps.pageOutputs[i] = po - continue - } + // Additional identifier for this query. + // Used as part of the cache key. + KeyPart string - render := shouldRenderPage - if render { - _, render = outputFormatsForPage.GetByName(f.Name) - } + // Page inclusion filter. + // May be nil. + Include predicate.P[*pageState] +} - po := newPageOutput(ps, pp, f, render) +func (q pageMapQueryPagesBelowPath) Key() string { + return q.Path + "/" + q.KeyPart +} - // Create a content provider for the first, - // we may be able to reuse it. - if i == 0 { - contentProvider, err := newPageContentOutput(ps, po) - if err != nil { - return nil, err +// Apply fn to all pages in m matching the given predicate. +// fn may return true to stop the walk. +func (m *pageMap) forEachPage(include predicate.P[*pageState], fn func(p *pageState) (bool, error)) error { + if include == nil { + include = func(p *pageState) bool { + return true + } + } + w := &doctree.NodeShiftTreeWalker[contentNodeI]{ + Tree: m.treePages, + LockType: doctree.LockTypeRead, + Handle: func(key string, n contentNodeI, match doctree.DimensionFlag) (bool, error) { + if p, ok := n.(*pageState); ok && include(p) { + if terminate, err := fn(p); terminate || err != nil { + return terminate, err } - po.initContentProvider(contentProvider) } + return false, nil + }, + } - ps.pageOutputs[i] = po - created[f.Name] = po - - } - - if err := ps.initCommonProviders(pp); err != nil { - return nil, err - } - - return nil, nil - }) - - ps.parent = owner - - return ps, nil + return w.Walk(context.Background()) } -func (m *pageMap) newResource(fim hugofs.FileMetaInfo, owner *pageState) (resource.Resource, error) { - if owner == nil { - panic("owner is nil") - } - // TODO(bep) consolidate with multihost logic + clean up - outputFormats := owner.m.outputFormats() - seen := make(map[string]bool) - var targetBasePaths []string - // Make sure bundled resources are published to all of the output formats' - // sub paths. - for _, f := range outputFormats { - p := f.Path - if seen[p] { - continue +func (m *pageMap) forEeachPageIncludingBundledPages(include predicate.P[*pageState], fn func(p *pageState) (bool, error)) error { + if include == nil { + include = func(p *pageState) bool { + return true } - seen[p] = true - targetBasePaths = append(targetBasePaths, p) + } + if err := m.forEachPage(include, fn); err != nil { + return err } - meta := fim.Meta() - r := func() (hugio.ReadSeekCloser, error) { - return meta.Open() + w := &doctree.NodeShiftTreeWalker[contentNodeI]{ + Tree: m.treeResources, + LockType: doctree.LockTypeRead, + Handle: func(key string, n contentNodeI, match doctree.DimensionFlag) (bool, error) { + if rs, ok := n.(*resourceSource); ok { + if p, ok := rs.r.(*pageState); ok && include(p) { + if terminate, err := fn(p); terminate || err != nil { + return terminate, err + } + } + } + return false, nil + }, } - target := strings.TrimPrefix(meta.Path, owner.File().Dir()) + return w.Walk(context.Background()) +} - return owner.s.ResourceSpec.New( - resources.ResourceSourceDescriptor{ - TargetPaths: owner.getTargetPaths, - OpenReadSeekCloser: r, - FileInfo: fim, - RelTargetFilename: target, - TargetBasePaths: targetBasePaths, - LazyPublish: !owner.m.buildConfig.PublishResources, - }) +func (m *pageMap) getOrCreatePagesFromCache( + key string, create func(string) (page.Pages, error), +) (page.Pages, error) { + return m.cachePages.GetOrCreate(key, create) } -func (m *pageMap) createSiteTaxonomies() error { - m.s.taxonomies = make(page.TaxonomyList) - var walkErr error - m.taxonomies.Walk(func(s string, v any) bool { - n := v.(*contentNode) - t := n.viewInfo +func (m *pageMap) getPagesInSection(q pageMapQueryPagesInSection) page.Pages { + cacheKey := q.Key() - viewName := t.name + pages, err := m.getOrCreatePagesFromCache(cacheKey, func(string) (page.Pages, error) { + prefix := paths.AddTrailingSlash(q.Path) - if t.termKey == "" { - m.s.taxonomies[viewName.plural] = make(page.Taxonomy) - } else { - taxonomy := m.s.taxonomies[viewName.plural] - if taxonomy == nil { - walkErr = fmt.Errorf("missing taxonomy: %s", viewName.plural) - return true - } - m.taxonomyEntries.WalkPrefix(s, func(ss string, v any) bool { - b2 := v.(*contentNode) - info := b2.viewInfo - taxonomy[info.termKey] = append(taxonomy[info.termKey], page.NewWeightedPage(info.weight, info.ref.p, n.p)) + var ( + pas page.Pages + otherBranch string + ) - return false - }) + include := q.Include + if include == nil { + include = pagePredicates.ShouldListLocal } - return false - }) + w := &doctree.NodeShiftTreeWalker[contentNodeI]{ + Tree: m.treePages, + Prefix: prefix, + Handle: func(key string, n contentNodeI, match doctree.DimensionFlag) (bool, error) { + if q.Recursive { + if p, ok := n.(*pageState); ok && include(p) { + pas = append(pas, p) + } + return false, nil + } - for _, taxonomy := range m.s.taxonomies { - for _, v := range taxonomy { - v.Sort() - } - } + // We store both leafs and branches in the same tree, so for non-recursive walks, + // we need to walk until the end, but can skip + // any not belonging to child branches. + if otherBranch != "" && strings.HasPrefix(key, otherBranch) { + return false, nil + } - return walkErr -} + if p, ok := n.(*pageState); ok && include(p) { + pas = append(pas, p) + } -func (m *pageMap) createListAllPages() page.Pages { - pages := make(page.Pages, 0) + if n.isContentNodeBranch() { + otherBranch = key + "/" + } - m.contentMap.pageTrees.Walk(func(s string, n *contentNode) bool { - if n.p == nil { - panic(fmt.Sprintf("BUG: page not set for %q", s)) + return false, nil + }, } - if contentTreeNoListAlwaysFilter(s, n) { - return false + + err := w.Walk(context.Background()) + + if err == nil { + if q.IncludeSelf { + if n := m.treePages.Get(q.Path); n != nil { + if p, ok := n.(*pageState); ok && include(p) { + pas = append(pas, p) + } + } + } + page.SortByDefault(pas) } - pages = append(pages, n.p) - return false + + return pas, err }) + if err != nil { + panic(err) + } - page.SortByDefault(pages) return pages } -func (m *pageMap) assemblePages() error { - m.taxonomyEntries.DeletePrefix("/") +func (m *pageMap) getPagesWithTerm(q pageMapQueryPagesBelowPath) page.Pages { + key := q.Key() - if err := m.assembleSections(); err != nil { - return err - } + v, err := m.cachePages.GetOrCreate(key, func(string) (page.Pages, error) { + var pas page.Pages + include := q.Include + if include == nil { + include = pagePredicates.ShouldListLocal + } + + err := m.treeTaxonomyEntries.WalkPrefix( + doctree.LockTypeNone, + paths.AddTrailingSlash(q.Path), + func(s string, n *weightedContentNode) (bool, error) { + p := n.n.(*pageState) + if !include(p) { + return false, nil + } + pas = append(pas, pageWithWeight0{n.weight, p}) + return false, nil + }, + ) + if err != nil { + return nil, err + } - var err error + page.SortByDefault(pas) + return pas, nil + }) if err != nil { - return err + panic(err) } - m.pages.Walk(func(s string, v any) bool { - n := v.(*contentNode) - - var shouldBuild bool - - defer func() { - // Make sure we always rebuild the view cache. - if shouldBuild && err == nil && n.p != nil { - m.attachPageToViews(s, n) - } - }() - - if n.p != nil { - // A rebuild - shouldBuild = true - return false - } + return v +} - var parent *contentNode - var parentBucket *pagesMapBucket +func (m *pageMap) getTermsForPageInTaxonomy(path, taxonomy string) page.Pages { + prefix := paths.AddLeadingSlash(taxonomy) - _, parent = m.getSection(s) - if parent == nil { - panic(fmt.Sprintf("BUG: parent not set for %q", s)) - } - parentBucket = parent.p.bucket + v, err := m.cachePages.GetOrCreate(prefix+path, func(string) (page.Pages, error) { + var pas page.Pages - n.p, err = m.newPageFromContentNode(n, parentBucket, nil) + err := m.treeTaxonomyEntries.WalkPrefix( + doctree.LockTypeNone, + paths.AddTrailingSlash(prefix), + func(s string, n *weightedContentNode) (bool, error) { + if strings.HasSuffix(s, path) { + pas = append(pas, n.term) + } + return false, nil + }, + ) if err != nil { - return true - } - - shouldBuild = !(n.p.Kind() == kinds.KindPage && m.cfg.pageDisabled) && m.s.shouldBuild(n.p) - if !shouldBuild { - m.deletePage(s) - return false - } - - n.p.treeRef = &contentTreeRef{ - m: m, - t: m.pages, - n: n, - key: s, + return nil, err } - if err = m.assembleResources(s, n.p, parentBucket); err != nil { - return true - } + page.SortByDefault(pas) - return false + return pas, nil }) + if err != nil { + panic(err) + } - m.deleteOrphanSections() - - return err + return v } -func (m *pageMap) assembleResources(s string, p *pageState, parentBucket *pagesMapBucket) error { - var err error - - m.resources.WalkPrefix(s, func(s string, v any) bool { - n := v.(*contentNode) - meta := n.fi.Meta() - classifier := meta.Classifier - var r resource.Resource - switch classifier { - case files.ContentClassContent: - var rp *pageState - rp, err = m.newPageFromContentNode(n, parentBucket, p) - if err != nil { - return true - } - rp.m.resourcePath = filepath.ToSlash(strings.TrimPrefix(rp.File().Path(), p.File().Dir())) - r = rp +func (m *pageMap) forEachResourceInPage( + ps *pageState, + lockType doctree.LockType, + exact bool, + handle func(resourceKey string, n contentNodeI, match doctree.DimensionFlag) (bool, error), +) error { + keyPage := ps.Path() + if keyPage == "/" { + keyPage = "" + } + prefix := paths.AddTrailingSlash(ps.Path()) + isBranch := ps.IsNode() + + rw := &doctree.NodeShiftTreeWalker[contentNodeI]{ + Tree: m.treeResources, + Prefix: prefix, + LockType: lockType, + Exact: exact, + } - case files.ContentClassFile: - r, err = m.newResource(n.fi, p) - if err != nil { - return true + rw.Handle = func(resourceKey string, n contentNodeI, match doctree.DimensionFlag) (bool, error) { + if isBranch { + ownerKey, _ := m.treePages.LongestPrefixAll(resourceKey) + if ownerKey != keyPage { + // Stop walking downwards, someone else owns this resource. + rw.SkipPrefix(ownerKey + "/") + return false, nil } - default: - panic(fmt.Sprintf("invalid classifier: %q", classifier)) } + return handle(resourceKey, n, match) + } - p.resources = append(p.resources, r) - return false - }) - - return err + return rw.Walk(context.Background()) } -func (m *pageMap) assembleSections() error { - var sectionsToDelete []string - var err error +func (m *pageMap) getResourcesForPage(ps *pageState) (resource.Resources, error) { + var res resource.Resources + m.forEachResourceInPage(ps, doctree.LockTypeNone, false, func(resourceKey string, n contentNodeI, match doctree.DimensionFlag) (bool, error) { + rs := n.(*resourceSource) + if rs.r != nil { + res = append(res, rs.r) + } + return false, nil + }) + return res, nil +} - m.sections.Walk(func(s string, v any) bool { - n := v.(*contentNode) - var shouldBuild bool +func (m *pageMap) getOrCreateResourcesForPage(ps *pageState) resource.Resources { + keyPage := ps.Path() + if keyPage == "/" { + keyPage = "" + } + key := keyPage + "/get-resources-for-page" + v, err := m.cacheResources.GetOrCreate(key, func(string) (resource.Resources, error) { + res, err := m.getResourcesForPage(ps) + if err != nil { + return nil, err + } - defer func() { - // Make sure we always rebuild the view cache. - if shouldBuild && err == nil && n.p != nil { - m.attachPageToViews(s, n) - if n.p.IsHome() { - m.s.home = n.p + if translationKey := ps.m.translationKey; translationKey != "" { + // This this should not be a very common case. + // Merge in resources from the other languages. + translatedPages, _ := m.s.h.translationKeyPages.Get(translationKey) + for _, tp := range translatedPages { + if tp == ps { + continue + } + tps := tp.(*pageState) + // Make sure we query from the correct language root. + res2, err := tps.s.pageMap.getResourcesForPage(tps) + if err != nil { + return nil, err + } + // Add if Name not already in res. + for _, r := range res2 { + var found bool + for _, r2 := range res { + if r2.Name() == r.Name() { + found = true + break + } + } + if !found { + res = append(res, r) + } } } - }() - - sections := m.splitKey(s) - - if n.p != nil { - if n.p.IsHome() { - m.s.home = n.p - } - shouldBuild = true - return false } - var parent *contentNode - var parentBucket *pagesMapBucket - - if s != "/" { - _, parent = m.getSection(s) - if parent == nil || parent.p == nil { - panic(fmt.Sprintf("BUG: parent not set for %q", s)) + lessFunc := func(i, j int) bool { + ri, rj := res[i], res[j] + if ri.ResourceType() < rj.ResourceType() { + return true } - } - if parent != nil { - parentBucket = parent.p.bucket - } else if s == "/" { - parentBucket = m.s.siteBucket - } + p1, ok1 := ri.(page.Page) + p2, ok2 := rj.(page.Page) - kind := kinds.KindSection - if s == "/" { - kind = kinds.KindHome - } + if ok1 != ok2 { + // Pull pages behind other resources. - if n.fi != nil { - n.p, err = m.newPageFromContentNode(n, parentBucket, nil) - if err != nil { - return true + return ok2 } - } else { - n.p = m.s.newPage(n, parentBucket, kind, "", sections...) - } - shouldBuild = m.s.shouldBuild(n.p) - if !shouldBuild { - sectionsToDelete = append(sectionsToDelete, s) - return false - } + if ok1 { + return page.DefaultPageSort(p1, p2) + } - n.p.treeRef = &contentTreeRef{ - m: m, - t: m.sections, - n: n, - key: s, + // Make sure not to use RelPermalink or any of the other methods that + // trigger lazy publishing. + return ri.Name() < rj.Name() } - if err = m.assembleResources(s+cmLeafSeparator, n.p, parentBucket); err != nil { - return true + sort.SliceStable(res, lessFunc) + + if len(ps.m.resourcesMetadata) > 0 { + for i, r := range res { + res[i] = resources.CloneWithMetadataIfNeeded(ps.m.resourcesMetadata, r) + } + sort.SliceStable(res, lessFunc) } - return false + return res, nil }) - - for _, s := range sectionsToDelete { - m.deleteSectionByPath(s) + if err != nil { + panic(err) } - return err + return v } -func (m *pageMap) assembleTaxonomies() error { - var taxonomiesToDelete []string - var err error +type weightedContentNode struct { + n contentNodeI + weight int + term *pageWithOrdinal +} - m.taxonomies.Walk(func(s string, v any) bool { - n := v.(*contentNode) +type buildStateReseter interface { + resetBuildState() +} - if n.p != nil { - return false - } +type contentNodeI interface { + identity.IdentityProvider + identity.ForEeachIdentityProvider + Path() string + isContentNodeBranch() bool + buildStateReseter + resource.StaleMarker +} - kind := n.viewInfo.kind() - sections := n.viewInfo.sections() +var _ contentNodeI = (*contentNodeIs)(nil) - _, parent := m.getTaxonomyParent(s) - if parent == nil || parent.p == nil { - panic(fmt.Sprintf("BUG: parent not set for %q", s)) - } - parentBucket := parent.p.bucket +type contentNodeIs []contentNodeI - if n.fi != nil { - n.p, err = m.newPageFromContentNode(n, parent.p.bucket, nil) - if err != nil { - return true - } - } else { - title := "" - if kind == kinds.KindTerm { - title = n.viewInfo.term() - } - n.p = m.s.newPage(n, parent.p.bucket, kind, title, sections...) - } +func (n contentNodeIs) Path() string { + return n[0].Path() +} - if !m.s.shouldBuild(n.p) { - taxonomiesToDelete = append(taxonomiesToDelete, s) - return false - } +func (n contentNodeIs) isContentNodeBranch() bool { + return n[0].isContentNodeBranch() +} - n.p.treeRef = &contentTreeRef{ - m: m, - t: m.taxonomies, - n: n, - key: s, - } +func (n contentNodeIs) GetIdentity() identity.Identity { + return n[0].GetIdentity() +} - if err = m.assembleResources(s+cmLeafSeparator, n.p, parentBucket); err != nil { - return true +func (n contentNodeIs) ForEeachIdentity(f func(identity.Identity) bool) { + for _, nn := range n { + if nn != nil { + nn.ForEeachIdentity(f) } - - return false - }) - - for _, s := range taxonomiesToDelete { - m.deleteTaxonomy(s) } - - return err } -func (m *pageMap) attachPageToViews(s string, b *contentNode) { - if m.cfg.taxonomyDisabled { - return +func (n contentNodeIs) resetBuildState() { + for _, nn := range n { + if nn != nil { + nn.resetBuildState() + } } +} - for _, viewName := range m.cfg.taxonomyConfig { - vals := types.ToStringSlicePreserveString(getParam(b.p, viewName.plural, false)) - if vals == nil { - continue - } - w := getParamToLower(b.p, viewName.plural+"_weight") - weight, err := cast.ToIntE(w) - if err != nil { - m.s.Log.Errorf("Unable to convert taxonomy weight %#v to int for %q", w, b.p.Pathc()) - // weight will equal zero, so let the flow continue +func (n contentNodeIs) MarkStale() { + for _, nn := range n { + if nn != nil { + nn.MarkStale() } + } +} - for i, v := range vals { - termKey := m.s.getTaxonomyKey(v) +type contentNodeShifter struct { + numLanguages int +} - bv := &contentNode{ - viewInfo: &contentBundleViewInfo{ - ordinal: i, - name: viewName, - termKey: termKey, - termOrigin: v, - weight: weight, - ref: b, - }, +func (s *contentNodeShifter) Delete(n contentNodeI, dimension doctree.Dimension) (bool, bool) { + lidx := dimension[0] + switch v := n.(type) { + case contentNodeIs: + resource.MarkStale(v[lidx]) + wasDeleted := v[lidx] != nil + v[lidx] = nil + isEmpty := true + for _, vv := range v { + if vv != nil { + isEmpty = false + break } - - var key string - if strings.HasSuffix(s, "/") { - key = cleanSectionTreeKey(path.Join(viewName.plural, termKey, s)) - } else { - key = cleanTreeKey(path.Join(viewName.plural, termKey, s)) + } + return wasDeleted, isEmpty + case resourceSources: + resource.MarkStale(v[lidx]) + wasDeleted := v[lidx] != nil + v[lidx] = nil + isEmpty := true + for _, vv := range v { + if vv != nil { + isEmpty = false + break } - m.taxonomyEntries.Insert(key, bv) } + return wasDeleted, isEmpty + case *resourceSource: + resource.MarkStale(v) + return true, true + case *pageState: + resource.MarkStale(v) + return true, true + default: + panic(fmt.Sprintf("unknown type %T", n)) } } -type pageMapQuery struct { - Prefix string - Filter contentTreeNodeCallback -} - -func (m *pageMap) collectPages(query pageMapQuery, fn func(c *contentNode)) error { - if query.Filter == nil { - query.Filter = contentTreeNoListAlwaysFilter +func (s *contentNodeShifter) Shift(n contentNodeI, dimension doctree.Dimension, exact bool) (contentNodeI, bool, doctree.DimensionFlag) { + lidx := dimension[0] + // How accurate is the match. + accuracy := doctree.DimensionLanguage + switch v := n.(type) { + case contentNodeIs: + if len(v) == 0 { + panic("empty contentNodeIs") + } + vv := v[lidx] + if vv != nil { + return vv, true, accuracy + } + return nil, false, 0 + case resourceSources: + vv := v[lidx] + if vv != nil { + return vv, true, doctree.DimensionLanguage + } + if exact { + return nil, false, 0 + } + // For non content resources, pick the first match. + for _, vv := range v { + if vv != nil { + if vv.isPage() { + return nil, false, 0 + } + return vv, true, 0 + } + } + case *resourceSource: + if v.LangIndex() == lidx { + return v, true, doctree.DimensionLanguage + } + if !v.isPage() && !exact { + return v, true, 0 + } + case *pageState: + if v.s.languagei == lidx { + return n, true, doctree.DimensionLanguage + } + default: + panic(fmt.Sprintf("unknown type %T", n)) } - - m.pages.WalkQuery(query, func(s string, n *contentNode) bool { - fn(n) - return false - }) - - return nil + return nil, false, 0 } -func (m *pageMap) collectPagesAndSections(query pageMapQuery, fn func(c *contentNode)) error { - if err := m.collectSections(query, fn); err != nil { - return err +func (s *contentNodeShifter) ForEeachInDimension(n contentNodeI, d int, f func(contentNodeI) bool) { + if d != doctree.DimensionLanguage.Index() { + panic("only language dimension supported") } - query.Prefix = query.Prefix + cmBranchSeparator - if err := m.collectPages(query, fn); err != nil { - return err + switch vv := n.(type) { + case contentNodeIs: + for _, v := range vv { + if v != nil { + if f(v) { + return + } + } + } + default: + f(vv) } - - return nil } -func (m *pageMap) collectSections(query pageMapQuery, fn func(c *contentNode)) error { - level := strings.Count(query.Prefix, "/") - - return m.collectSectionsFn(query, func(s string, c *contentNode) bool { - if strings.Count(s, "/") != level+1 { - return false +func (s *contentNodeShifter) InsertInto(old, new contentNodeI, dimension doctree.Dimension) contentNodeI { + langi := dimension[doctree.DimensionLanguage.Index()] + switch vv := old.(type) { + case *pageState: + newp, ok := new.(*pageState) + if !ok { + panic(fmt.Sprintf("unknown type %T", new)) } + if vv.s.languagei == newp.s.languagei && newp.s.languagei == langi { + return new + } + is := make(contentNodeIs, s.numLanguages) + is[vv.s.languagei] = old + is[langi] = new + return is + case contentNodeIs: + vv[langi] = new + return vv + case resourceSources: + vv[langi] = new.(*resourceSource) + return vv + case *resourceSource: + newp, ok := new.(*resourceSource) + if !ok { + panic(fmt.Sprintf("unknown type %T", new)) + } + if vv.LangIndex() == newp.LangIndex() && newp.LangIndex() == langi { + return new + } + rs := make(resourceSources, s.numLanguages) + rs[vv.LangIndex()] = vv + rs[langi] = newp + return rs - fn(c) + default: + panic(fmt.Sprintf("unknown type %T", old)) + } +} - return false - }) +func (s *contentNodeShifter) Insert(old, new contentNodeI) contentNodeI { + switch vv := old.(type) { + case *pageState: + newp, ok := new.(*pageState) + if !ok { + panic(fmt.Sprintf("unknown type %T", new)) + } + if vv.s.languagei == newp.s.languagei { + return new + } + is := make(contentNodeIs, s.numLanguages) + is[newp.s.languagei] = new + is[vv.s.languagei] = old + return is + case contentNodeIs: + newp, ok := new.(*pageState) + if !ok { + panic(fmt.Sprintf("unknown type %T", new)) + } + vv[newp.s.languagei] = new + return vv + case *resourceSource: + newp, ok := new.(*resourceSource) + if !ok { + panic(fmt.Sprintf("unknown type %T", new)) + } + if vv.LangIndex() == newp.LangIndex() { + return new + } + rs := make(resourceSources, s.numLanguages) + rs[newp.LangIndex()] = newp + rs[vv.LangIndex()] = vv + return rs + case resourceSources: + newp, ok := new.(*resourceSource) + if !ok { + panic(fmt.Sprintf("unknown type %T", new)) + } + vv[newp.LangIndex()] = newp + return vv + default: + panic(fmt.Sprintf("unknown type %T", old)) + } } -func (m *pageMap) collectSectionsFn(query pageMapQuery, fn func(s string, c *contentNode) bool) error { - if !strings.HasSuffix(query.Prefix, "/") { - query.Prefix += "/" +func newPageMap(i int, s *Site, mcache *dynacache.Cache, pageTrees *pageTrees) *pageMap { + var m *pageMap + + var taxonomiesConfig taxonomiesConfig = s.conf.Taxonomies + + m = &pageMap{ + pageTrees: pageTrees.Shape(0, i), + cachePages: dynacache.GetOrCreatePartition[string, page.Pages](mcache, fmt.Sprintf("/pags/%d", i), dynacache.OptionsPartition{Weight: 10, ClearWhen: dynacache.ClearOnRebuild}), + cacheResources: dynacache.GetOrCreatePartition[string, resource.Resources](mcache, fmt.Sprintf("/ress/%d", i), dynacache.OptionsPartition{Weight: 60, ClearWhen: dynacache.ClearOnRebuild}), + cacheContentRendered: dynacache.GetOrCreatePartition[string, *resources.StaleValue[contentSummary]](mcache, fmt.Sprintf("/cont/ren/%d", i), dynacache.OptionsPartition{Weight: 70, ClearWhen: dynacache.ClearOnChange}), + cacheContentPlain: dynacache.GetOrCreatePartition[string, *resources.StaleValue[contentPlainPlainWords]](mcache, fmt.Sprintf("/cont/pla/%d", i), dynacache.OptionsPartition{Weight: 70, ClearWhen: dynacache.ClearOnChange}), + contentTableOfContents: dynacache.GetOrCreatePartition[string, *resources.StaleValue[contentTableOfContents]](mcache, fmt.Sprintf("/cont/toc/%d", i), dynacache.OptionsPartition{Weight: 70, ClearWhen: dynacache.ClearOnChange}), + cacheContentSource: dynacache.GetOrCreatePartition[string, *resources.StaleValue[[]byte]](mcache, fmt.Sprintf("/cont/src/%d", i), dynacache.OptionsPartition{Weight: 70, ClearWhen: dynacache.ClearOnChange}), + + cfg: contentMapConfig{ + lang: s.Lang(), + taxonomyConfig: taxonomiesConfig.Values(), + taxonomyDisabled: !s.conf.IsKindEnabled(kinds.KindTaxonomy), + taxonomyTermDisabled: !s.conf.IsKindEnabled(kinds.KindTerm), + pageDisabled: !s.conf.IsKindEnabled(kinds.KindPage), + }, + i: i, + s: s, } - m.sections.WalkQuery(query, func(s string, n *contentNode) bool { - return fn(s, n) - }) + m.pageReverseIndex = &contentTreeReverseIndex{ + initFn: func(rm map[any]contentNodeI) { + add := func(k string, n contentNodeI) { + existing, found := rm[k] + if found && existing != ambiguousContentNode { + rm[k] = ambiguousContentNode + } else if !found { + rm[k] = n + } + } - return nil + w := &doctree.NodeShiftTreeWalker[contentNodeI]{ + Tree: m.treePages, + LockType: doctree.LockTypeRead, + Handle: func(s string, n contentNodeI, match doctree.DimensionFlag) (bool, error) { + p := n.(*pageState) + if p.File() != nil { + add(p.File().FileInfo().Meta().PathInfo.BaseNameNoIdentifier(), p) + } + return false, nil + }, + } + + if err := w.Walk(context.Background()); err != nil { + panic(err) + } + }, + contentTreeReverseIndexMap: &contentTreeReverseIndexMap{}, + } + + return m } -func (m *pageMap) collectSectionsRecursiveIncludingSelf(query pageMapQuery, fn func(c *contentNode)) error { - return m.collectSectionsFn(query, func(s string, c *contentNode) bool { - fn(c) - return false - }) +type contentTreeReverseIndex struct { + initFn func(rm map[any]contentNodeI) + *contentTreeReverseIndexMap } -func (m *pageMap) collectTaxonomies(prefix string, fn func(c *contentNode)) error { - m.taxonomies.WalkQuery(pageMapQuery{Prefix: prefix}, func(s string, n *contentNode) bool { - fn(n) - return false - }) - return nil +func (c *contentTreeReverseIndex) Reset() { + c.contentTreeReverseIndexMap = &contentTreeReverseIndexMap{ + m: make(map[any]contentNodeI), + } } -// withEveryBundlePage applies fn to every Page, including those bundled inside -// leaf bundles. -func (m *pageMap) withEveryBundlePage(fn func(p *pageState) bool) { - m.bundleTrees.Walk(func(s string, n *contentNode) bool { - if n.p != nil { - return fn(n.p) - } - return false +func (c *contentTreeReverseIndex) Get(key any) contentNodeI { + c.init.Do(func() { + c.m = make(map[any]contentNodeI) + c.initFn(c.contentTreeReverseIndexMap.m) }) + return c.m[key] } -type pageMaps struct { - workers *para.Workers - pmaps []*pageMap +type contentTreeReverseIndexMap struct { + init sync.Once + m map[any]contentNodeI } -// deleteSection deletes the entire section from s. -func (m *pageMaps) deleteSection(s string) { - m.withMaps(func(pm *pageMap) error { - pm.deleteSectionByPath(s) - return nil - }) +type sitePagesAssembler struct { + *Site + watching bool + incomingChanges *whatChanged + assembleChanges *whatChanged + ctx context.Context } -func (m *pageMaps) AssemblePages() error { - return m.withMaps(func(pm *pageMap) error { - if err := pm.CreateMissingNodes(); err != nil { - return err +func (m *pageMap) debugPrint(prefix string, maxLevel int, w io.Writer) { + noshift := false + var prevKey string + + pageWalker := &doctree.NodeShiftTreeWalker[contentNodeI]{ + NoShift: noshift, + Tree: m.treePages, + Prefix: prefix, + WalkContext: &doctree.WalkContext[contentNodeI]{}, + } + + resourceWalker := pageWalker.Extend() + resourceWalker.Tree = m.treeResources + + pageWalker.Handle = func(keyPage string, n contentNodeI, match doctree.DimensionFlag) (bool, error) { + level := strings.Count(keyPage, "/") + if level > maxLevel { + return false, nil + } + const indentStr = " " + p := n.(*pageState) + s := strings.TrimPrefix(keyPage, paths.CommonDir(prevKey, keyPage)) + lenIndent := len(keyPage) - len(s) + fmt.Fprint(w, strings.Repeat(indentStr, lenIndent)) + info := fmt.Sprintf("%s lm: %s (%s)", s, p.Lastmod().Format("2006-01-02"), p.Kind()) + fmt.Fprintln(w, info) + switch p.Kind() { + case kinds.KindTerm: + m.treeTaxonomyEntries.WalkPrefix( + doctree.LockTypeNone, + keyPage+"/", + func(s string, n *weightedContentNode) (bool, error) { + fmt.Fprint(w, strings.Repeat(indentStr, lenIndent+4)) + fmt.Fprintln(w, s) + return false, nil + }, + ) } - if err := pm.assemblePages(); err != nil { - return err + isBranch := n.isContentNodeBranch() + prevKey = keyPage + resourceWalker.Prefix = keyPage + "/" + + resourceWalker.Handle = func(ss string, n contentNodeI, match doctree.DimensionFlag) (bool, error) { + if isBranch { + ownerKey, _ := pageWalker.Tree.LongestPrefix(ss, true, nil) + if ownerKey != keyPage { + // Stop walking downwards, someone else owns this resource. + pageWalker.SkipPrefix(ownerKey + "/") + return false, nil + } + } + fmt.Fprint(w, strings.Repeat(indentStr, lenIndent+8)) + fmt.Fprintln(w, ss+" (resource)") + return false, nil } - if err := pm.createMissingTaxonomyNodes(); err != nil { - return err + return false, resourceWalker.Walk(context.Background()) + } + + err := pageWalker.Walk(context.Background()) + if err != nil { + panic(err) + } +} + +func (h *HugoSites) resolveAndClearStateForIdentities( + ctx context.Context, + l logg.LevelLogger, + cachebuster func(s string) bool, changes []identity.Identity, +) error { + h.Log.Debug().Log(logg.StringFunc( + func() string { + var sb strings.Builder + for _, change := range changes { + var key string + if kp, ok := change.(resource.Identifier); ok { + key = " " + kp.Key() + } + sb.WriteString(fmt.Sprintf("Direct dependencies of %q (%T%s) =>\n", change.IdentifierBase(), change, key)) + seen := map[string]bool{ + change.IdentifierBase(): true, + } + // Print the top level dependenies. + identity.WalkIdentitiesDeep(change, func(level int, id identity.Identity) bool { + if level > 1 { + return true + } + if !seen[id.IdentifierBase()] { + sb.WriteString(fmt.Sprintf(" %s%s\n", strings.Repeat(" ", level), id.IdentifierBase())) + } + seen[id.IdentifierBase()] = true + return false + }) + } + return sb.String() + }), + ) + + for _, id := range changes { + if staler, ok := id.(resource.Staler); ok { + h.Log.Trace(logg.StringFunc(func() string { return fmt.Sprintf("Marking stale: %s (%T)\n", id, id) })) + staler.MarkStale() } + } - // Handle any new sections created in the step above. - if err := pm.assembleSections(); err != nil { + // The order matters here: + // 1. Handle the cache busters first, as those may produce identities for the page reset step. + // 2. Then reset the page outputs, which may mark some resources as stale. + // 3. Then GC the cache. + if cachebuster != nil { + if err := loggers.TimeTrackfn(func() (logg.LevelLogger, error) { + ll := l.WithField("substep", "gc dynacache cachebuster") + + shouldDelete := func(k, v any) bool { + if cachebuster == nil { + return false + } + var b bool + if s, ok := k.(string); ok { + b = cachebuster(s) + } + + if b { + identity.WalkIdentitiesShallow(v, func(level int, id identity.Identity) bool { + // Add them to the change set so we can reset any page that depends on them. + changes = append(changes, id) + return false + }) + } + + return b + } + + h.MemCache.ClearMatching(shouldDelete) + + return ll, nil + }); err != nil { return err } + } - if pm.s.home == nil { - // Home is disabled, everything is. - pm.bundleTrees.DeletePrefix("") - return nil + // Remove duplicates + seen := make(map[identity.Identity]bool) + var n int + for _, id := range changes { + if !seen[id] { + seen[id] = true + changes[n] = id + n++ } + } + changes = changes[:n] - if err := pm.assembleTaxonomies(); err != nil { - return err - } + if err := loggers.TimeTrackfn(func() (logg.LevelLogger, error) { + // changesLeft: The IDs that the pages is dependent on. + // changesRight: The IDs that the pages depend on. + ll := l.WithField("substep", "resolve page output change set").WithField("changes", len(changes)) - if err := pm.createSiteTaxonomies(); err != nil { - return err + checkedCount, matchCount, err := h.resolveAndResetDependententPageOutputs(ctx, changes) + ll = ll.WithField("checked", checkedCount).WithField("matches", matchCount) + return ll, err + }); err != nil { + return err + } + + if err := loggers.TimeTrackfn(func() (logg.LevelLogger, error) { + ll := l.WithField("substep", "gc dynacache") + + h.MemCache.ClearOnRebuild(changes...) + h.Log.Trace(logg.StringFunc(func() string { + var sb strings.Builder + sb.WriteString("dynacache keys:\n") + for _, key := range h.MemCache.Keys(nil) { + sb.WriteString(fmt.Sprintf(" %s\n", key)) + } + return sb.String() + })) + return ll, nil + }); err != nil { + return err + } + + return nil +} + +// The left change set is the IDs that the pages is dependent on. +// The right change set is the IDs that the pages depend on. +func (h *HugoSites) resolveAndResetDependententPageOutputs(ctx context.Context, changes []identity.Identity) (int, int, error) { + if changes == nil { + return 0, 0, nil + } + + // This can be shared (many of the same IDs are repeated). + depsFinder := identity.NewFinder(identity.FinderConfig{}) + + h.Log.Trace(logg.StringFunc(func() string { + var sb strings.Builder + sb.WriteString("resolve page dependencies: ") + for _, id := range changes { + sb.WriteString(fmt.Sprintf(" %T: %s|", id, id.IdentifierBase())) } + return sb.String() + })) - sw := §ionWalker{m: pm.contentMap} - a := sw.applyAggregates() + var ( + resetCounter atomic.Int64 + checkedCounter atomic.Int64 + ) - if a.mainSection != "" { - // Note, sites that have no custom config share a common config struct pointer. - // This means that we currently do not support setting different values per language. - // The end user can, however, configure this per language if needed. - mainSections := []string{strings.TrimRight(a.mainSection, "/")} - pm.s.s.conf.C.SetMainSectionsIfNotSet(mainSections) + resetPo := func(po *pageOutput, r identity.FinderResult) { + if po.pco != nil { + po.pco.Reset() // Will invalidate content cache. } - pm.s.lastmod = a.datesAll.Lastmod() - if resource.IsZeroDates(pm.s.home) { - pm.s.home.m.Dates = a.datesAll + + po.renderState = 0 + po.p.resourcesPublishInit = &sync.Once{} + if r == identity.FinderFoundOneOfMany { + // Will force a re-render even in fast render mode. + po.renderOnce = false } + resetCounter.Add(1) + h.Log.Trace(logg.StringFunc(func() string { + p := po.p + return fmt.Sprintf("Resetting page output %s for %s for output %s\n", p.Kind(), p.Path(), po.f.Name) + })) + } - return nil + // This can be a relativeley expensive operations, so we do it in parallel. + g := rungroup.Run[*pageState](ctx, rungroup.Config[*pageState]{ + NumWorkers: h.numWorkers, + Handle: func(ctx context.Context, p *pageState) error { + if !p.isRenderedAny() { + // This needs no reset, so no need to check it. + return nil + } + // First check the top level dependency manager. + for _, id := range changes { + checkedCounter.Add(1) + if r := depsFinder.Contains(id, p.dependencyManager, 100); r > identity.FinderFoundOneOfManyRepetition { + for _, po := range p.pageOutputs { + resetPo(po, r) + } + // Done. + return nil + } + } + // Then do a more fine grained reset for each output format. + OUTPUTS: + for _, po := range p.pageOutputs { + if !po.isRendered() { + continue + } + for _, id := range changes { + checkedCounter.Add(1) + if r := depsFinder.Contains(id, po.dependencyManagerOutput, 2); r > identity.FinderFoundOneOfManyRepetition { + resetPo(po, r) + continue OUTPUTS + } + } + } + return nil + }, }) -} -func (m *pageMaps) walkBundles(fn func(n *contentNode) bool) { - _ = m.withMaps(func(pm *pageMap) error { - pm.bundleTrees.Walk(func(s string, n *contentNode) bool { - return fn(n) - }) - return nil + h.withPage(func(s string, p *pageState) bool { + var needToCheck bool + for _, po := range p.pageOutputs { + if po.isRendered() { + needToCheck = true + break + } + } + if needToCheck { + g.Enqueue(p) + } + return false }) -} -func (m *pageMaps) walkBranchesPrefix(prefix string, fn func(s string, n *contentNode) bool) { - _ = m.withMaps(func(pm *pageMap) error { - pm.branchTrees.WalkPrefix(prefix, func(s string, n *contentNode) bool { - return fn(s, n) - }) - return nil - }) -} + err := g.Wait() + resetCount := int(resetCounter.Load()) + checkedCount := int(checkedCounter.Load()) -func (m *pageMaps) withMaps(fn func(pm *pageMap) error) error { - g, _ := m.workers.Start(context.Background()) - for _, pm := range m.pmaps { - pm := pm - g.Run(func() error { - return fn(pm) - }) - } - return g.Wait() + return checkedCount, resetCount, err } -type pagesMapBucket struct { - // Cascading front matter. - cascade map[page.PageMatcher]maps.Params +// Calculate and apply aggregate values to the page tree (e.g. dates, cascades). +func (sa *sitePagesAssembler) applyAggregates() error { + sectionPageCount := map[string]int{} - owner *pageState // The branch node + pw := &doctree.NodeShiftTreeWalker[contentNodeI]{ + Tree: sa.pageMap.treePages, + LockType: doctree.LockTypeRead, + WalkContext: &doctree.WalkContext[contentNodeI]{}, + } + rw := pw.Extend() + rw.Tree = sa.pageMap.treeResources + sa.lastmod = time.Time{} - *pagesMapBucketPages -} + pw.Handle = func(keyPage string, n contentNodeI, match doctree.DimensionFlag) (bool, error) { + pageBundle := n.(*pageState) -type pagesMapBucketPages struct { - pagesInit sync.Once - pages page.Pages + if pageBundle.Kind() == kinds.KindTerm { + // Delay this until they're created. + return false, nil + } - pagesAndSectionsInit sync.Once - pagesAndSections page.Pages + if pageBundle.IsPage() { + rootSection := pageBundle.Section() + sectionPageCount[rootSection]++ + } - sectionsInit sync.Once - sections page.Pages -} + // Handle cascades first to get any default dates set. + var cascade map[page.PageMatcher]maps.Params + if keyPage == "" { + // Home page gets it's cascade from the site config. + cascade = sa.conf.Cascade.Config -func (b *pagesMapBucket) getPages() page.Pages { - b.pagesInit.Do(func() { - b.pages = b.owner.treeRef.getPages() - page.SortByDefault(b.pages) - }) - return b.pages -} + if pageBundle.m.cascade == nil { + // Pass the site cascade downwards. + pw.WalkContext.Data().Insert(keyPage, cascade) + } + } else { + _, data := pw.WalkContext.Data().LongestPrefix(keyPage) + if data != nil { + cascade = data.(map[page.PageMatcher]maps.Params) + } + } -func (b *pagesMapBucket) getPagesRecursive() page.Pages { - pages := b.owner.treeRef.getPagesRecursive() - page.SortByDefault(pages) - return pages -} + if (pageBundle.IsHome() || pageBundle.IsSection()) && pageBundle.m.setMetaPostCount > 0 { + oldDates := pageBundle.m.dates -func (b *pagesMapBucket) getPagesAndSections() page.Pages { - b.pagesAndSectionsInit.Do(func() { - b.pagesAndSections = b.owner.treeRef.getPagesAndSections() - }) - return b.pagesAndSections -} + // We need to wait until after the walk to determine if any of the dates have changed. + pw.WalkContext.AddPostHook( + func() error { + if oldDates != pageBundle.m.dates { + sa.assembleChanges.Add(pageBundle) + } + return nil + }, + ) + } -func (b *pagesMapBucket) getSections() page.Pages { - b.sectionsInit.Do(func() { - if b.owner.treeRef == nil { - return + // Combine the cascade map with front matter. + pageBundle.setMetaPost(cascade) + + // We receive cascade values from above. If this leads to a change compared + // to the previous value, we need to mark the page and its dependencies as changed. + if pageBundle.m.setMetaPostCascadeChanged { + sa.assembleChanges.Add(pageBundle) } - b.sections = b.owner.treeRef.getSections() - }) - return b.sections -} + const eventName = "dates" + if n.isContentNodeBranch() { + if pageBundle.m.cascade != nil { + // Pass it down. + pw.WalkContext.Data().Insert(keyPage, pageBundle.m.cascade) + } + wasZeroDates := resource.IsZeroDates(pageBundle.m.dates) + if wasZeroDates || pageBundle.IsHome() { + pw.WalkContext.AddEventListener(eventName, keyPage, func(e *doctree.Event[contentNodeI]) { + sp, ok := e.Source.(*pageState) + if !ok { + return + } + + if wasZeroDates { + pageBundle.m.dates.UpdateDateAndLastmodIfAfter(sp.m.dates) + } + + if pageBundle.IsHome() { + if pageBundle.m.dates.Lastmod().After(pageBundle.s.lastmod) { + pageBundle.s.lastmod = pageBundle.m.dates.Lastmod() + } + if sp.m.dates.Lastmod().After(pageBundle.s.lastmod) { + pageBundle.s.lastmod = sp.m.dates.Lastmod() + } + } + }) + } + } -func (b *pagesMapBucket) getTaxonomies() page.Pages { - b.sectionsInit.Do(func() { - var pas page.Pages - ref := b.owner.treeRef - ref.m.collectTaxonomies(ref.key, func(c *contentNode) { - pas = append(pas, c.p) - }) - page.SortByDefault(pas) - b.sections = pas - }) + // Send the date info up the tree. + pw.WalkContext.SendEvent(&doctree.Event[contentNodeI]{Source: n, Path: keyPage, Name: eventName}) - return b.sections -} + isBranch := n.isContentNodeBranch() + rw.Prefix = keyPage + "/" -func (b *pagesMapBucket) getTaxonomyEntries() page.Pages { - var pas page.Pages - ref := b.owner.treeRef - viewInfo := ref.n.viewInfo - prefix := strings.ToLower("/" + viewInfo.name.plural + "/" + viewInfo.termKey + "/") - ref.m.taxonomyEntries.WalkPrefix(prefix, func(s string, v any) bool { - n := v.(*contentNode) - pas = append(pas, n.viewInfo.ref.p) - return false - }) - page.SortByDefault(pas) - return pas -} + rw.Handle = func(resourceKey string, n contentNodeI, match doctree.DimensionFlag) (bool, error) { + if isBranch { + ownerKey, _ := pw.Tree.LongestPrefix(resourceKey, true, nil) + if ownerKey != keyPage { + // Stop walking downwards, someone else owns this resource. + rw.SkipPrefix(ownerKey + "/") + return false, nil + } + } + rs := n.(*resourceSource) + if rs.isPage() { + pageResource := rs.r.(*pageState) + relPath := pageResource.m.pathInfo.BaseRel(pageBundle.m.pathInfo) + pageResource.m.resourcePath = relPath + var cascade map[page.PageMatcher]maps.Params + // Apply cascade (if set) to the page. + _, data := pw.WalkContext.Data().LongestPrefix(resourceKey) + if data != nil { + cascade = data.(map[page.PageMatcher]maps.Params) + } + pageResource.setMetaPost(cascade) + } -type sectionAggregate struct { - datesAll resource.Dates - datesSection resource.Dates - pageCount int - mainSection string - mainSectionPageCount int -} + return false, nil + } + return false, rw.Walk(sa.ctx) + } -type sectionAggregateHandler struct { - sectionAggregate - sectionPageCount int + if err := pw.Walk(sa.ctx); err != nil { + return err + } - // Section - b *contentNode - s string -} + if err := pw.WalkContext.HandleEventsAndHooks(); err != nil { + return err + } -func (h *sectionAggregateHandler) String() string { - return fmt.Sprintf("%s/%s - %d - %s", h.sectionAggregate.datesAll, h.sectionAggregate.datesSection, h.sectionPageCount, h.s) -} + if !sa.s.conf.C.IsMainSectionsSet() { + var mainSection string + var maxcount int + for section, counter := range sectionPageCount { + if section != "" && counter > maxcount { + mainSection = section + maxcount = counter + } + } + sa.s.conf.C.SetMainSections([]string{mainSection}) -func (h *sectionAggregateHandler) isRootSection() bool { - return h.s != "/" && strings.Count(h.s, "/") == 2 -} + } -func (h *sectionAggregateHandler) handleNested(v sectionWalkHandler) error { - nested := v.(*sectionAggregateHandler) - h.sectionPageCount += nested.pageCount - h.pageCount += h.sectionPageCount - h.datesAll.UpdateDateAndLastmodIfAfter(nested.datesAll) - h.datesSection.UpdateDateAndLastmodIfAfter(nested.datesAll) return nil } -func (h *sectionAggregateHandler) handlePage(s string, n *contentNode) error { - h.sectionPageCount++ +func (sa *sitePagesAssembler) applyAggregatesToTaxonomiesAndTerms() error { + walkContext := &doctree.WalkContext[contentNodeI]{} + + handlePlural := func(key string) error { + var pw *doctree.NodeShiftTreeWalker[contentNodeI] + pw = &doctree.NodeShiftTreeWalker[contentNodeI]{ + Tree: sa.pageMap.treePages, + Prefix: key, // We also want to include the root taxonomy nodes, so no trailing slash. + LockType: doctree.LockTypeRead, + WalkContext: walkContext, + Handle: func(s string, n contentNodeI, match doctree.DimensionFlag) (bool, error) { + p := n.(*pageState) + if p.Kind() != kinds.KindTerm { + // The other kinds were handled in applyAggregates. + if p.m.cascade != nil { + // Pass it down. + pw.WalkContext.Data().Insert(s, p.m.cascade) + } + } + + if p.Kind() != kinds.KindTerm && p.Kind() != kinds.KindTaxonomy { + // Already handled. + return false, nil + } + + const eventName = "dates" + + if p.Kind() == kinds.KindTerm { + var cascade map[page.PageMatcher]maps.Params + _, data := pw.WalkContext.Data().LongestPrefix(s) + if data != nil { + cascade = data.(map[page.PageMatcher]maps.Params) + } + p.setMetaPost(cascade) + + if err := sa.pageMap.treeTaxonomyEntries.WalkPrefix( + doctree.LockTypeRead, + paths.AddTrailingSlash(s), + func(ss string, wn *weightedContentNode) (bool, error) { + // Send the date info up the tree. + pw.WalkContext.SendEvent(&doctree.Event[contentNodeI]{Source: wn.n, Path: ss, Name: eventName}) + return false, nil + }, + ); err != nil { + return false, err + } + } + + // Send the date info up the tree. + pw.WalkContext.SendEvent(&doctree.Event[contentNodeI]{Source: n, Path: s, Name: eventName}) + + if resource.IsZeroDates(p.m.dates) { + pw.WalkContext.AddEventListener(eventName, s, func(e *doctree.Event[contentNodeI]) { + sp, ok := e.Source.(*pageState) + if !ok { + return + } + + p.m.dates.UpdateDateAndLastmodIfAfter(sp.m.dates) + }) + } + + return false, nil + }, + } - var d resource.Dated - if n.p != nil { - d = n.p - } else if n.viewInfo != nil && n.viewInfo.ref != nil { - d = n.viewInfo.ref.p - } else { + if err := pw.Walk(sa.ctx); err != nil { + return err + } return nil } - h.datesAll.UpdateDateAndLastmodIfAfter(d) - h.datesSection.UpdateDateAndLastmodIfAfter(d) + for _, viewName := range sa.pageMap.cfg.taxonomyConfig.views { + if err := handlePlural(viewName.pluralTreeKey); err != nil { + return err + } + } + + if err := walkContext.HandleEventsAndHooks(); err != nil { + return err + } + return nil } -func (h *sectionAggregateHandler) handleSectionPost() error { - if h.sectionPageCount > h.mainSectionPageCount && h.isRootSection() { - h.mainSectionPageCount = h.sectionPageCount - h.mainSection = strings.TrimPrefix(h.s, "/") +func (sa *sitePagesAssembler) assembleTermsAndTranslations() error { + var ( + pages = sa.pageMap.treePages + entries = sa.pageMap.treeTaxonomyEntries + views = sa.pageMap.cfg.taxonomyConfig.views + ) + + lockType := doctree.LockTypeWrite + w := &doctree.NodeShiftTreeWalker[contentNodeI]{ + Tree: pages, + LockType: lockType, + Handle: func(s string, n contentNodeI, match doctree.DimensionFlag) (bool, error) { + ps := n.(*pageState) + + if ps.m.noLink() { + return false, nil + } + + // This is a little out of place, but is conveniently put here. + // Check if translationKey is set by user. + // This is to support the manual way of setting the translationKey in front matter. + if ps.m.translationKey != "" { + sa.s.h.translationKeyPages.Append(ps.m.translationKey, ps) + } + + if sa.pageMap.cfg.taxonomyTermDisabled { + return false, nil + } + + for _, viewName := range views { + vals := types.ToStringSlicePreserveString(getParam(ps, viewName.plural, false)) + if vals == nil { + continue + } + + w := getParamToLower(ps, viewName.plural+"_weight") + weight, err := cast.ToIntE(w) + if err != nil { + sa.Log.Warnf("Unable to convert taxonomy weight %#v to int for %q", w, n.Path()) + // wei