summaryrefslogtreecommitdiffstats
path: root/hugolib/pagecollections.go
diff options
context:
space:
mode:
authorBjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>2019-09-10 11:26:34 +0200
committerBjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>2020-02-18 09:49:42 +0100
commiteada236f87d9669885da1ff647672bb3dc6b4954 (patch)
treea0303f067b2cbe06c55637013dbd7702a551c64f /hugolib/pagecollections.go
parente5329f13c02b87f0c30f8837759c810cd90ff8da (diff)
Introduce a tree map for all content
This commit introduces a new data structure to store pages and their resources. This data structure is backed by radix trees. This simplies tree operations, makes all pages a bundle, and paves the way for #6310. It also solves a set of annoying issues (see list below). Not a motivation behind this, but this commit also makes Hugo in general a little bit faster and more memory effective (see benchmarks). Especially for partial rebuilds on content edits, but also when taxonomies is in use. ``` name old time/op new time/op delta SiteNew/Bundle_with_image/Edit-16 1.32ms ± 8% 1.00ms ± 9% -24.42% (p=0.029 n=4+4) SiteNew/Bundle_with_JSON_file/Edit-16 1.28ms ± 0% 0.94ms ± 0% -26.26% (p=0.029 n=4+4) SiteNew/Tags_and_categories/Edit-16 33.9ms ± 2% 21.8ms ± 1% -35.67% (p=0.029 n=4+4) SiteNew/Canonify_URLs/Edit-16 40.6ms ± 1% 37.7ms ± 3% -7.20% (p=0.029 n=4+4) SiteNew/Deep_content_tree/Edit-16 56.7ms ± 0% 51.7ms ± 1% -8.82% (p=0.029 n=4+4) SiteNew/Many_HTML_templates/Edit-16 19.9ms ± 2% 18.3ms ± 3% -7.64% (p=0.029 n=4+4) SiteNew/Page_collections/Edit-16 37.9ms ± 4% 34.0ms ± 2% -10.28% (p=0.029 n=4+4) SiteNew/Bundle_with_image-16 10.7ms ± 0% 10.6ms ± 0% -1.15% (p=0.029 n=4+4) SiteNew/Bundle_with_JSON_file-16 10.8ms ± 0% 10.7ms ± 0% -1.05% (p=0.029 n=4+4) SiteNew/Tags_and_categories-16 43.2ms ± 1% 39.6ms ± 1% -8.35% (p=0.029 n=4+4) SiteNew/Canonify_URLs-16 47.6ms ± 1% 47.3ms ± 0% ~ (p=0.057 n=4+4) SiteNew/Deep_content_tree-16 73.0ms ± 1% 74.2ms ± 1% ~ (p=0.114 n=4+4) SiteNew/Many_HTML_templates-16 37.9ms ± 0% 38.1ms ± 1% ~ (p=0.114 n=4+4) SiteNew/Page_collections-16 53.6ms ± 1% 54.7ms ± 1% +2.09% (p=0.029 n=4+4) name old alloc/op new alloc/op delta SiteNew/Bundle_with_image/Edit-16 486kB ± 0% 430kB ± 0% -11.47% (p=0.029 n=4+4) SiteNew/Bundle_with_JSON_file/Edit-16 265kB ± 0% 209kB ± 0% -21.06% (p=0.029 n=4+4) SiteNew/Tags_and_categories/Edit-16 13.6MB ± 0% 8.8MB ± 0% -34.93% (p=0.029 n=4+4) SiteNew/Canonify_URLs/Edit-16 66.5MB ± 0% 63.9MB ± 0% -3.95% (p=0.029 n=4+4) SiteNew/Deep_content_tree/Edit-16 28.8MB ± 0% 25.8MB ± 0% -10.55% (p=0.029 n=4+4) SiteNew/Many_HTML_templates/Edit-16 6.16MB ± 0% 5.56MB ± 0% -9.86% (p=0.029 n=4+4) SiteNew/Page_collections/Edit-16 16.9MB ± 0% 16.0MB ± 0% -5.19% (p=0.029 n=4+4) SiteNew/Bundle_with_image-16 2.28MB ± 0% 2.29MB ± 0% +0.35% (p=0.029 n=4+4) SiteNew/Bundle_with_JSON_file-16 2.07MB ± 0% 2.07MB ± 0% ~ (p=0.114 n=4+4) SiteNew/Tags_and_categories-16 14.3MB ± 0% 13.2MB ± 0% -7.30% (p=0.029 n=4+4) SiteNew/Canonify_URLs-16 69.1MB ± 0% 69.0MB ± 0% ~ (p=0.343 n=4+4) SiteNew/Deep_content_tree-16 31.3MB ± 0% 31.8MB ± 0% +1.49% (p=0.029 n=4+4) SiteNew/Many_HTML_templates-16 10.8MB ± 0% 10.9MB ± 0% +1.11% (p=0.029 n=4+4) SiteNew/Page_collections-16 21.4MB ± 0% 21.6MB ± 0% +1.15% (p=0.029 n=4+4) name old allocs/op new allocs/op delta SiteNew/Bundle_with_image/Edit-16 4.74k ± 0% 3.86k ± 0% -18.57% (p=0.029 n=4+4) SiteNew/Bundle_with_JSON_file/Edit-16 4.73k ± 0% 3.85k ± 0% -18.58% (p=0.029 n=4+4) SiteNew/Tags_and_categories/Edit-16 301k ± 0% 198k ± 0% -34.14% (p=0.029 n=4+4) SiteNew/Canonify_URLs/Edit-16 389k ± 0% 373k ± 0% -4.07% (p=0.029 n=4+4) SiteNew/Deep_content_tree/Edit-16 338k ± 0% 262k ± 0% -22.63% (p=0.029 n=4+4) SiteNew/Many_HTML_templates/Edit-16 102k ± 0% 88k ± 0% -13.81% (p=0.029 n=4+4) SiteNew/Page_collections/Edit-16 176k ± 0% 152k ± 0% -13.32% (p=0.029 n=4+4) SiteNew/Bundle_with_image-16 26.8k ± 0% 26.8k ± 0% +0.05% (p=0.029 n=4+4) SiteNew/Bundle_with_JSON_file-16 26.8k ± 0% 26.8k ± 0% +0.05% (p=0.029 n=4+4) SiteNew/Tags_and_categories-16 273k ± 0% 245k ± 0% -10.36% (p=0.029 n=4+4) SiteNew/Canonify_URLs-16 396k ± 0% 398k ± 0% +0.39% (p=0.029 n=4+4) SiteNew/Deep_content_tree-16 317k ± 0% 325k ± 0% +2.53% (p=0.029 n=4+4) SiteNew/Many_HTML_templates-16 146k ± 0% 147k ± 0% +0.98% (p=0.029 n=4+4) SiteNew/Page_collections-16 210k ± 0% 215k ± 0% +2.44% (p=0.029 n=4+4) ``` Fixes #6312 Fixes #6087 Fixes #6738 Fixes #6412 Fixes #6743 Fixes #6875 Fixes #6034 Fixes #6902 Fixes #6173 Fixes #6590
Diffstat (limited to 'hugolib/pagecollections.go')
-rw-r--r--hugolib/pagecollections.go489
1 files changed, 123 insertions, 366 deletions
diff --git a/hugolib/pagecollections.go b/hugolib/pagecollections.go
index 023222bdd..74d48fe22 100644
--- a/hugolib/pagecollections.go
+++ b/hugolib/pagecollections.go
@@ -17,43 +17,25 @@ import (
"fmt"
"path"
"path/filepath"
- "sort"
"strings"
"sync"
- "time"
- "github.com/gohugoio/hugo/resources/resource"
+ "github.com/gohugoio/hugo/common/herrors"
- "github.com/pkg/errors"
+ "github.com/gohugoio/hugo/helpers"
- "github.com/gohugoio/hugo/cache"
"github.com/gohugoio/hugo/resources/page"
)
-// Used in the page cache to mark more than one hit for a given key.
-var ambiguityFlag = &pageState{}
-
// PageCollections contains the page collections for a site.
type PageCollections struct {
- pagesMap *pagesMap
-
- // Includes absolute all pages (of all types), including drafts etc.
- rawAllPages pageStatePages
-
- // rawAllPages plus additional pages created during the build process.
- workAllPages pageStatePages
-
- // Includes headless bundles, i.e. bundles that produce no output for its content page.
- headlessPages pageStatePages
+ pageMap *pageMap
// Lazy initialized page collections
pages *lazyPagesFactory
regularPages *lazyPagesFactory
allPages *lazyPagesFactory
allRegularPages *lazyPagesFactory
-
- // The index for .Site.GetPage etc.
- pageIndex *cache.Lazy
}
// Pages returns all pages.
@@ -78,25 +60,6 @@ func (c *PageCollections) AllRegularPages() page.Pages {
return c.allRegularPages.get()
}
-// Get initializes the index if not already done so, then
-// looks up the given page ref, returns nil if no value found.
-func (c *PageCollections) getFromCache(ref string) (page.Page, error) {
- v, found, err := c.pageIndex.Get(ref)
- if err != nil {
- return nil, err
- }
- if !found {
- return nil, nil
- }
-
- p := v.(page.Page)
-
- if p != ambiguityFlag {
- return p, nil
- }
- return nil, fmt.Errorf("page reference %q is ambiguous", ref)
-}
-
type lazyPagesFactory struct {
pages page.Pages
@@ -115,83 +78,19 @@ func newLazyPagesFactory(factory page.PagesFactory) *lazyPagesFactory {
return &lazyPagesFactory{factory: factory}
}
-func newPageCollections() *PageCollections {
- return newPageCollectionsFromPages(nil)
-}
-
-func newPageCollectionsFromPages(pages pageStatePages) *PageCollections {
+func newPageCollections(m *pageMap) *PageCollections {
+ if m == nil {
+ panic("must provide a pageMap")
+ }
- c := &PageCollections{rawAllPages: pages}
+ c := &PageCollections{pageMap: m}
c.pages = newLazyPagesFactory(func() page.Pages {
- pages := make(page.Pages, len(c.workAllPages))
- for i, p := range c.workAllPages {
- pages[i] = p
- }
- return pages
+ return m.createListAllPages()
})
c.regularPages = newLazyPagesFactory(func() page.Pages {
- return c.findPagesByKindInWorkPages(page.KindPage, c.workAllPages)
- })
-
- c.pageIndex = cache.NewLazy(func() (map[string]interface{}, error) {
- index := make(map[string]interface{})
-
- add := func(ref string, p page.Page) {
- ref = strings.ToLower(ref)
- existing := index[ref]
- if existing == nil {
- index[ref] = p
- } else if existing != ambiguityFlag && existing != p {
- index[ref] = ambiguityFlag
- }
- }
-
- for _, pageCollection := range []pageStatePages{c.workAllPages, c.headlessPages} {
- for _, p := range pageCollection {
- if p.IsPage() {
- sourceRefs := p.sourceRefs()
- for _, ref := range sourceRefs {
- add(ref, p)
- }
- sourceRef := sourceRefs[0]
-
- // Ref/Relref supports this potentially ambiguous lookup.
- add(p.File().LogicalName(), p)
-
- translationBaseName := p.File().TranslationBaseName()
-
- dir, _ := path.Split(sourceRef)
- dir = strings.TrimSuffix(dir, "/")
-
- if translationBaseName == "index" {
- add(dir, p)
- add(path.Base(dir), p)
- } else {
- add(translationBaseName, p)
- }
-
- // We need a way to get to the current language version.
- pathWithNoExtensions := path.Join(dir, translationBaseName)
- add(pathWithNoExtensions, p)
- } else {
- sourceRefs := p.sourceRefs()
- for _, ref := range sourceRefs {
- add(ref, p)
- }
-
- ref := p.SectionsPath()
-
- // index the canonical, unambiguous virtual ref
- // e.g. /section
- // (this may already have been indexed above)
- add("/"+ref, p)
- }
- }
- }
-
- return index, nil
+ return c.findPagesByKindIn(page.KindPage, c.pages.get())
})
return c
@@ -249,307 +148,165 @@ func (c *PageCollections) getPage(typ string, sections ...string) page.Page {
return p
}
-// Case insensitive page lookup.
-func (c *PageCollections) getPageNew(context page.Page, ref string) (page.Page, error) {
- var anError error
-
- ref = strings.ToLower(ref)
-
- // Absolute (content root relative) reference.
- if strings.HasPrefix(ref, "/") {
- p, err := c.getFromCache(ref)
- if err == nil && p != nil {
- return p, nil
- }
- if err != nil {
- anError = err
- }
-
- } else if context != nil {
- // Try the page-relative path.
- var dir string
- if !context.File().IsZero() {
- dir = filepath.ToSlash(context.File().Dir())
- } else {
- dir = context.SectionsPath()
- }
- ppath := path.Join("/", strings.ToLower(dir), ref)
-
- p, err := c.getFromCache(ppath)
- if err == nil && p != nil {
- return p, nil
- }
- if err != nil {
- anError = err
- }
- }
-
- if !strings.HasPrefix(ref, "/") {
- // Many people will have "post/foo.md" in their content files.
- p, err := c.getFromCache("/" + ref)
- if err == nil && p != nil {
- return p, nil
- }
- if err != nil {
- anError = err
- }
- }
-
- // Last try.
- ref = strings.TrimPrefix(ref, "/")
- p, err := c.getFromCache(ref)
- if err != nil {
- anError = err
- }
-
- if p == nil && anError != nil {
- return nil, wrapErr(errors.Wrap(anError, "failed to resolve ref"), context)
+// getPageRef resolves a Page from ref/relRef, with a slightly more comprehensive
+// search path than getPageNew.
+func (c *PageCollections) getPageRef(context page.Page, ref string) (page.Page, error) {
+ n, err := c.getContentNode(context, true, ref)
+ if err != nil || n == nil || n.p == nil {
+ return nil, err
}
-
- return p, nil
+ return n.p, nil
}
-func (*PageCollections) findPagesByKindIn(kind string, inPages page.Pages) page.Pages {
- var pages page.Pages
- for _, p := range inPages {
- if p.Kind() == kind {
- pages = append(pages, p)
- }
+func (c *PageCollections) getPageNew(context page.Page, ref string) (page.Page, error) {
+ n, err := c.getContentNode(context, false, ref)
+ if err != nil || n == nil || n.p == nil {
+ return nil, err
}
- return pages
+ return n.p, nil
}
-func (c *PageCollections) findPagesByKind(kind string) page.Pages {
- return c.findPagesByKindIn(kind, c.Pages())
-}
+func (c *PageCollections) getSectionOrPage(ref string) (*contentNode, string) {
+ var n *contentNode
-func (c *PageCollections) findWorkPagesByKind(kind string) pageStatePages {
- var pages pageStatePages
- for _, p := range c.workAllPages {
- if p.Kind() == kind {
- pages = append(pages, p)
- }
- }
- return pages
-}
+ s, v, found := c.pageMap.sections.LongestPrefix(ref)
-func (*PageCollections) findPagesByKindInWorkPages(kind string, inPages pageStatePages) page.Pages {
- var pages page.Pages
- for _, p := range inPages {
- if p.Kind() == kind {
- pages = append(pages, p)
- }
+ if found {
+ n = v.(*contentNode)
}
- return pages
-}
-func (c *PageCollections) addPage(page *pageState) {
- c.rawAllPages = append(c.rawAllPages, page)
-}
-
-func (c *PageCollections) removePageFilename(filename string) {
- if i := c.rawAllPages.findPagePosByFilename(filename); i >= 0 {
- c.clearResourceCacheForPage(c.rawAllPages[i])
- c.rawAllPages = append(c.rawAllPages[:i], c.rawAllPages[i+1:]...)
+ if found && s == ref {
+ // A section
+ return n, ""
}
-}
+ m := c.pageMap
+ filename := strings.TrimPrefix(strings.TrimPrefix(ref, s), "/")
+ langSuffix := "." + m.s.Lang()
-func (c *PageCollections) removePage(page *pageState) {
- if i := c.rawAllPages.findPagePos(page); i >= 0 {
- c.clearResourceCacheForPage(c.rawAllPages[i])
- c.rawAllPages = append(c.rawAllPages[:i], c.rawAllPages[i+1:]...)
- }
-}
+ // Trim both extension and any language code.
+ name := helpers.PathNoExt(filename)
+ name = strings.TrimSuffix(name, langSuffix)
-func (c *PageCollections) replacePage(page *pageState) {
- // will find existing page that matches filepath and remove it
- c.removePage(page)
- c.addPage(page)
-}
+ // These are reserved bundle names and will always be stored by their owning
+ // folder name.
+ name = strings.TrimSuffix(name, "/index")
+ name = strings.TrimSuffix(name, "/_index")
-func (c *PageCollections) clearResourceCacheForPage(page *pageState) {
- if len(page.resources) > 0 {
- page.s.ResourceSpec.DeleteCacheByPrefix(page.targetPaths().SubResourceBaseTarget)
+ if !found {
+ return nil, name
}
-}
-func (c *PageCollections) assemblePagesMap(s *Site) error {
+ // Check if it's a section with filename provided.
+ if !n.p.File().IsZero() && n.p.File().LogicalName() == filename {
+ return n, name
+ }
- c.pagesMap = newPagesMap(s)
+ return m.getPage(s, name), name
- rootSections := make(map[string]bool)
+}
- // Add all branch nodes first.
- for _, p := range c.rawAllPages {
- rootSections[p.Section()] = true
- if p.IsPage() {
- continue
- }
- c.pagesMap.addPage(p)
+func (c *PageCollections) getContentNode(context page.Page, isReflink bool, ref string) (*contentNode, error) {
+ defer herrors.Recover()
+ ref = filepath.ToSlash(strings.ToLower(strings.TrimSpace(ref)))
+ if ref == "" {
+ ref = "/"
}
+ inRef := ref
- // Create missing home page and the first level sections if no
- // _index provided.
- s.home = c.pagesMap.getOrCreateHome()
- for k := range rootSections {
- c.pagesMap.createSectionIfNotExists(k)
+ var doSimpleLookup bool
+ if isReflink || context == nil {
+ // For Ref/Reflink and .Site.GetPage do simple name lookups for the potentially ambigous myarticle.md and /myarticle.md,
+ // but not when we get ./myarticle*, section/myarticle.
+ doSimpleLookup = ref[0] != '.' || ref[0] == '/' && strings.Count(ref, "/") == 1
}
- // Attach the regular pages to their section.
- for _, p := range c.rawAllPages {
- if p.IsNode() {
- continue
+ if context != nil && !strings.HasPrefix(ref, "/") {
+ // Try the page-relative path.
+ var base string
+ if context.File().IsZero() {
+ base = context.SectionsPath()
+ } else {
+ base = filepath.ToSlash(filepath.Dir(context.File().FileInfo().Meta().Path()))
}
- c.pagesMap.addPage(p)
+ ref = path.Join("/", strings.ToLower(base), ref)
}
- return nil
-}
-
-func (c *PageCollections) createWorkAllPages() error {
- c.workAllPages = make(pageStatePages, 0, len(c.rawAllPages))
- c.headlessPages = make(pageStatePages, 0)
-
- var (
- homeDates *resource.Dates
- sectionDates *resource.Dates
- siteLastmod time.Time
- siteLastDate time.Time
-
- sectionsParamId = "mainSections"
- sectionsParamIdLower = strings.ToLower(sectionsParamId)
- )
-
- mainSections, mainSectionsFound := c.pagesMap.s.Info.Params()[sectionsParamIdLower]
-
- var (
- bucketsToRemove []string
- rootBuckets []*pagesMapBucket
- walkErr error
- )
-
- c.pagesMap.r.Walk(func(s string, v interface{}) bool {
- bucket := v.(*pagesMapBucket)
- parentBucket := c.pagesMap.parentBucket(s)
-
- if parentBucket != nil {
-
- if !mainSectionsFound && strings.Count(s, "/") == 1 && bucket.owner.IsSection() {
- // Root section
- rootBuckets = append(rootBuckets, bucket)
- }
- }
-
- if bucket.owner.IsHome() {
- if resource.IsZeroDates(bucket.owner) {
- // Calculate dates from the page tree.
- homeDates = &bucket.owner.m.Dates
- }
- }
+ if !strings.HasPrefix(ref, "/") {
+ ref = "/" + ref
+ }
- sectionDates = nil
- if resource.IsZeroDates(bucket.owner) {
- sectionDates = &bucket.owner.m.Dates
- }
+ m := c.pageMap
- if parentBucket != nil {
- bucket.parent = parentBucket
- if bucket.owner.IsSection() {
- parentBucket.bucketSections = append(parentBucket.bucketSections, bucket)
- }
- }
+ // It's either a section, a page in a section or a taxonomy node.
+ // Start with the most likely:
+ n, name := c.getSectionOrPage(ref)
+ if n != nil {
+ return n, nil
+ }
- if bucket.isEmpty() {
- if bucket.owner.IsSection() && bucket.owner.File().IsZero() {
- // Check for any nested section.
- var hasDescendant bool
- c.pagesMap.r.WalkPrefix(s, func(ss string, v interface{}) bool {
- if s != ss {
- hasDescendant = true
- return true
- }
- return false
- })
- if !hasDescendant {
- // This is an auto-created section with, now, nothing in it.
- bucketsToRemove = append(bucketsToRemove, s)
- return false
- }
- }
+ if !strings.HasPrefix(inRef, "/") {
+ // Many people will have "post/foo.md" in their content files.
+ if n, _ := c.getSectionOrPage("/" + inRef); n != nil {
+ return n, nil
}
+ }
- if !bucket.disabled {
- c.workAllPages = append(c.workAllPages, bucket.owner)
+ // Check if it's a taxonomy node
+ s, v, found := m.taxonomies.LongestPrefix(ref)
+ if found {
+ if !m.onSameLevel(ref, s) {
+ return nil, nil
}
+ return v.(*contentNode), nil
+ }
- if !bucket.view {
- for _, p := range bucket.headlessPages {
- ps := p.(*pageState)
- ps.parent = bucket.owner
- c.headlessPages = append(c.headlessPages, ps)
- }
- for _, p := range bucket.pages {
- ps := p.(*pageState)
- ps.parent = bucket.owner
- c.workAllPages = append(c.workAllPages, ps)
-
- if homeDates != nil {
- homeDates.UpdateDateAndLastmodIfAfter(ps)
- }
-
- if sectionDates != nil {
- sectionDates.UpdateDateAndLastmodIfAfter(ps)
- }
-
- if p.Lastmod().After(siteLastmod) {
- siteLastmod = p.Lastmod()
- }
- if p.Date().After(siteLastDate) {
- siteLastDate = p.Date()
- }
+ getByName := func(s string) (*contentNode, error) {
+ n := m.pageReverseIndex.Get(s)
+ if n != nil {
+ if n == ambigousContentNode {
+ return nil, fmt.Errorf("page reference %q is ambiguous", ref)
}
+ return n, nil
}
- return false
- })
-
- if walkErr != nil {
- return walkErr
+ return nil, nil
}
- c.pagesMap.s.lastmod = siteLastmod
-
- if !mainSectionsFound {
+ var module string
+ if context != nil && !context.File().IsZero() {
+ module = context.File().FileInfo().Meta().Module()
+ }
- // Calculare main section
- var (
- maxRootBucketWeight int
- maxRootBucket *pagesMapBucket
- )
+ if module == "" && !c.pageMap.s.home.File().IsZero() {
+ module = c.pageMap.s.home.File().FileInfo().Meta().Module()
+ }
- for _, b := range rootBuckets {
- weight := len(b.pages) + (len(b.bucketSections) * 5)
- if weight >= maxRootBucketWeight {
- maxRootBucket = b
- maxRootBucketWeight = weight
- }
+ if module != "" {
+ n, err := getByName(module + ref)
+ if err != nil {
+ return nil, err
}
-
- if maxRootBucket != nil {
- // Try to make this as backwards compatible as possible.
- mainSections = []string{maxRootBucket.owner.Section()}
+ if n != nil {
+ return n, nil
}
}
- c.pagesMap.s.Info.Params()[sectionsParamId] = mainSections
- c.pagesMap.s.Info.Params()[sectionsParamIdLower] = mainSections
-
- for _, key := range bucketsToRemove {
- c.pagesMap.r.Delete(key)
+ if !doSimpleLookup {
+ return nil, nil
}
- sort.Sort(c.workAllPages)
+ // Ref/relref supports this potentially ambigous lookup.
+ return getByName(name)
+
+}
- return nil
+func (*PageCollections) findPagesByKindIn(kind string, inPages page.Pages) page.Pages {
+ var pages page.Pages
+ for _, p := range inPages {
+ if p.Kind() == kind {
+ pages = append(pages, p)
+ }
+ }
+ return pages
}