diff options
author | Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com> | 2019-09-10 11:26:34 +0200 |
---|---|---|
committer | Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com> | 2020-02-18 09:49:42 +0100 |
commit | eada236f87d9669885da1ff647672bb3dc6b4954 (patch) | |
tree | a0303f067b2cbe06c55637013dbd7702a551c64f /hugolib/pagecollections.go | |
parent | e5329f13c02b87f0c30f8837759c810cd90ff8da (diff) |
Introduce a tree map for all content
This commit introduces a new data structure to store pages and their resources.
This data structure is backed by radix trees.
This simplies tree operations, makes all pages a bundle, and paves the way for #6310.
It also solves a set of annoying issues (see list below).
Not a motivation behind this, but this commit also makes Hugo in general a little bit faster and more memory effective (see benchmarks). Especially for partial rebuilds on content edits, but also when taxonomies is in use.
```
name old time/op new time/op delta
SiteNew/Bundle_with_image/Edit-16 1.32ms ± 8% 1.00ms ± 9% -24.42% (p=0.029 n=4+4)
SiteNew/Bundle_with_JSON_file/Edit-16 1.28ms ± 0% 0.94ms ± 0% -26.26% (p=0.029 n=4+4)
SiteNew/Tags_and_categories/Edit-16 33.9ms ± 2% 21.8ms ± 1% -35.67% (p=0.029 n=4+4)
SiteNew/Canonify_URLs/Edit-16 40.6ms ± 1% 37.7ms ± 3% -7.20% (p=0.029 n=4+4)
SiteNew/Deep_content_tree/Edit-16 56.7ms ± 0% 51.7ms ± 1% -8.82% (p=0.029 n=4+4)
SiteNew/Many_HTML_templates/Edit-16 19.9ms ± 2% 18.3ms ± 3% -7.64% (p=0.029 n=4+4)
SiteNew/Page_collections/Edit-16 37.9ms ± 4% 34.0ms ± 2% -10.28% (p=0.029 n=4+4)
SiteNew/Bundle_with_image-16 10.7ms ± 0% 10.6ms ± 0% -1.15% (p=0.029 n=4+4)
SiteNew/Bundle_with_JSON_file-16 10.8ms ± 0% 10.7ms ± 0% -1.05% (p=0.029 n=4+4)
SiteNew/Tags_and_categories-16 43.2ms ± 1% 39.6ms ± 1% -8.35% (p=0.029 n=4+4)
SiteNew/Canonify_URLs-16 47.6ms ± 1% 47.3ms ± 0% ~ (p=0.057 n=4+4)
SiteNew/Deep_content_tree-16 73.0ms ± 1% 74.2ms ± 1% ~ (p=0.114 n=4+4)
SiteNew/Many_HTML_templates-16 37.9ms ± 0% 38.1ms ± 1% ~ (p=0.114 n=4+4)
SiteNew/Page_collections-16 53.6ms ± 1% 54.7ms ± 1% +2.09% (p=0.029 n=4+4)
name old alloc/op new alloc/op delta
SiteNew/Bundle_with_image/Edit-16 486kB ± 0% 430kB ± 0% -11.47% (p=0.029 n=4+4)
SiteNew/Bundle_with_JSON_file/Edit-16 265kB ± 0% 209kB ± 0% -21.06% (p=0.029 n=4+4)
SiteNew/Tags_and_categories/Edit-16 13.6MB ± 0% 8.8MB ± 0% -34.93% (p=0.029 n=4+4)
SiteNew/Canonify_URLs/Edit-16 66.5MB ± 0% 63.9MB ± 0% -3.95% (p=0.029 n=4+4)
SiteNew/Deep_content_tree/Edit-16 28.8MB ± 0% 25.8MB ± 0% -10.55% (p=0.029 n=4+4)
SiteNew/Many_HTML_templates/Edit-16 6.16MB ± 0% 5.56MB ± 0% -9.86% (p=0.029 n=4+4)
SiteNew/Page_collections/Edit-16 16.9MB ± 0% 16.0MB ± 0% -5.19% (p=0.029 n=4+4)
SiteNew/Bundle_with_image-16 2.28MB ± 0% 2.29MB ± 0% +0.35% (p=0.029 n=4+4)
SiteNew/Bundle_with_JSON_file-16 2.07MB ± 0% 2.07MB ± 0% ~ (p=0.114 n=4+4)
SiteNew/Tags_and_categories-16 14.3MB ± 0% 13.2MB ± 0% -7.30% (p=0.029 n=4+4)
SiteNew/Canonify_URLs-16 69.1MB ± 0% 69.0MB ± 0% ~ (p=0.343 n=4+4)
SiteNew/Deep_content_tree-16 31.3MB ± 0% 31.8MB ± 0% +1.49% (p=0.029 n=4+4)
SiteNew/Many_HTML_templates-16 10.8MB ± 0% 10.9MB ± 0% +1.11% (p=0.029 n=4+4)
SiteNew/Page_collections-16 21.4MB ± 0% 21.6MB ± 0% +1.15% (p=0.029 n=4+4)
name old allocs/op new allocs/op delta
SiteNew/Bundle_with_image/Edit-16 4.74k ± 0% 3.86k ± 0% -18.57% (p=0.029 n=4+4)
SiteNew/Bundle_with_JSON_file/Edit-16 4.73k ± 0% 3.85k ± 0% -18.58% (p=0.029 n=4+4)
SiteNew/Tags_and_categories/Edit-16 301k ± 0% 198k ± 0% -34.14% (p=0.029 n=4+4)
SiteNew/Canonify_URLs/Edit-16 389k ± 0% 373k ± 0% -4.07% (p=0.029 n=4+4)
SiteNew/Deep_content_tree/Edit-16 338k ± 0% 262k ± 0% -22.63% (p=0.029 n=4+4)
SiteNew/Many_HTML_templates/Edit-16 102k ± 0% 88k ± 0% -13.81% (p=0.029 n=4+4)
SiteNew/Page_collections/Edit-16 176k ± 0% 152k ± 0% -13.32% (p=0.029 n=4+4)
SiteNew/Bundle_with_image-16 26.8k ± 0% 26.8k ± 0% +0.05% (p=0.029 n=4+4)
SiteNew/Bundle_with_JSON_file-16 26.8k ± 0% 26.8k ± 0% +0.05% (p=0.029 n=4+4)
SiteNew/Tags_and_categories-16 273k ± 0% 245k ± 0% -10.36% (p=0.029 n=4+4)
SiteNew/Canonify_URLs-16 396k ± 0% 398k ± 0% +0.39% (p=0.029 n=4+4)
SiteNew/Deep_content_tree-16 317k ± 0% 325k ± 0% +2.53% (p=0.029 n=4+4)
SiteNew/Many_HTML_templates-16 146k ± 0% 147k ± 0% +0.98% (p=0.029 n=4+4)
SiteNew/Page_collections-16 210k ± 0% 215k ± 0% +2.44% (p=0.029 n=4+4)
```
Fixes #6312
Fixes #6087
Fixes #6738
Fixes #6412
Fixes #6743
Fixes #6875
Fixes #6034
Fixes #6902
Fixes #6173
Fixes #6590
Diffstat (limited to 'hugolib/pagecollections.go')
-rw-r--r-- | hugolib/pagecollections.go | 489 |
1 files changed, 123 insertions, 366 deletions
diff --git a/hugolib/pagecollections.go b/hugolib/pagecollections.go index 023222bdd..74d48fe22 100644 --- a/hugolib/pagecollections.go +++ b/hugolib/pagecollections.go @@ -17,43 +17,25 @@ import ( "fmt" "path" "path/filepath" - "sort" "strings" "sync" - "time" - "github.com/gohugoio/hugo/resources/resource" + "github.com/gohugoio/hugo/common/herrors" - "github.com/pkg/errors" + "github.com/gohugoio/hugo/helpers" - "github.com/gohugoio/hugo/cache" "github.com/gohugoio/hugo/resources/page" ) -// Used in the page cache to mark more than one hit for a given key. -var ambiguityFlag = &pageState{} - // PageCollections contains the page collections for a site. type PageCollections struct { - pagesMap *pagesMap - - // Includes absolute all pages (of all types), including drafts etc. - rawAllPages pageStatePages - - // rawAllPages plus additional pages created during the build process. - workAllPages pageStatePages - - // Includes headless bundles, i.e. bundles that produce no output for its content page. - headlessPages pageStatePages + pageMap *pageMap // Lazy initialized page collections pages *lazyPagesFactory regularPages *lazyPagesFactory allPages *lazyPagesFactory allRegularPages *lazyPagesFactory - - // The index for .Site.GetPage etc. - pageIndex *cache.Lazy } // Pages returns all pages. @@ -78,25 +60,6 @@ func (c *PageCollections) AllRegularPages() page.Pages { return c.allRegularPages.get() } -// Get initializes the index if not already done so, then -// looks up the given page ref, returns nil if no value found. -func (c *PageCollections) getFromCache(ref string) (page.Page, error) { - v, found, err := c.pageIndex.Get(ref) - if err != nil { - return nil, err - } - if !found { - return nil, nil - } - - p := v.(page.Page) - - if p != ambiguityFlag { - return p, nil - } - return nil, fmt.Errorf("page reference %q is ambiguous", ref) -} - type lazyPagesFactory struct { pages page.Pages @@ -115,83 +78,19 @@ func newLazyPagesFactory(factory page.PagesFactory) *lazyPagesFactory { return &lazyPagesFactory{factory: factory} } -func newPageCollections() *PageCollections { - return newPageCollectionsFromPages(nil) -} - -func newPageCollectionsFromPages(pages pageStatePages) *PageCollections { +func newPageCollections(m *pageMap) *PageCollections { + if m == nil { + panic("must provide a pageMap") + } - c := &PageCollections{rawAllPages: pages} + c := &PageCollections{pageMap: m} c.pages = newLazyPagesFactory(func() page.Pages { - pages := make(page.Pages, len(c.workAllPages)) - for i, p := range c.workAllPages { - pages[i] = p - } - return pages + return m.createListAllPages() }) c.regularPages = newLazyPagesFactory(func() page.Pages { - return c.findPagesByKindInWorkPages(page.KindPage, c.workAllPages) - }) - - c.pageIndex = cache.NewLazy(func() (map[string]interface{}, error) { - index := make(map[string]interface{}) - - add := func(ref string, p page.Page) { - ref = strings.ToLower(ref) - existing := index[ref] - if existing == nil { - index[ref] = p - } else if existing != ambiguityFlag && existing != p { - index[ref] = ambiguityFlag - } - } - - for _, pageCollection := range []pageStatePages{c.workAllPages, c.headlessPages} { - for _, p := range pageCollection { - if p.IsPage() { - sourceRefs := p.sourceRefs() - for _, ref := range sourceRefs { - add(ref, p) - } - sourceRef := sourceRefs[0] - - // Ref/Relref supports this potentially ambiguous lookup. - add(p.File().LogicalName(), p) - - translationBaseName := p.File().TranslationBaseName() - - dir, _ := path.Split(sourceRef) - dir = strings.TrimSuffix(dir, "/") - - if translationBaseName == "index" { - add(dir, p) - add(path.Base(dir), p) - } else { - add(translationBaseName, p) - } - - // We need a way to get to the current language version. - pathWithNoExtensions := path.Join(dir, translationBaseName) - add(pathWithNoExtensions, p) - } else { - sourceRefs := p.sourceRefs() - for _, ref := range sourceRefs { - add(ref, p) - } - - ref := p.SectionsPath() - - // index the canonical, unambiguous virtual ref - // e.g. /section - // (this may already have been indexed above) - add("/"+ref, p) - } - } - } - - return index, nil + return c.findPagesByKindIn(page.KindPage, c.pages.get()) }) return c @@ -249,307 +148,165 @@ func (c *PageCollections) getPage(typ string, sections ...string) page.Page { return p } -// Case insensitive page lookup. -func (c *PageCollections) getPageNew(context page.Page, ref string) (page.Page, error) { - var anError error - - ref = strings.ToLower(ref) - - // Absolute (content root relative) reference. - if strings.HasPrefix(ref, "/") { - p, err := c.getFromCache(ref) - if err == nil && p != nil { - return p, nil - } - if err != nil { - anError = err - } - - } else if context != nil { - // Try the page-relative path. - var dir string - if !context.File().IsZero() { - dir = filepath.ToSlash(context.File().Dir()) - } else { - dir = context.SectionsPath() - } - ppath := path.Join("/", strings.ToLower(dir), ref) - - p, err := c.getFromCache(ppath) - if err == nil && p != nil { - return p, nil - } - if err != nil { - anError = err - } - } - - if !strings.HasPrefix(ref, "/") { - // Many people will have "post/foo.md" in their content files. - p, err := c.getFromCache("/" + ref) - if err == nil && p != nil { - return p, nil - } - if err != nil { - anError = err - } - } - - // Last try. - ref = strings.TrimPrefix(ref, "/") - p, err := c.getFromCache(ref) - if err != nil { - anError = err - } - - if p == nil && anError != nil { - return nil, wrapErr(errors.Wrap(anError, "failed to resolve ref"), context) +// getPageRef resolves a Page from ref/relRef, with a slightly more comprehensive +// search path than getPageNew. +func (c *PageCollections) getPageRef(context page.Page, ref string) (page.Page, error) { + n, err := c.getContentNode(context, true, ref) + if err != nil || n == nil || n.p == nil { + return nil, err } - - return p, nil + return n.p, nil } -func (*PageCollections) findPagesByKindIn(kind string, inPages page.Pages) page.Pages { - var pages page.Pages - for _, p := range inPages { - if p.Kind() == kind { - pages = append(pages, p) - } +func (c *PageCollections) getPageNew(context page.Page, ref string) (page.Page, error) { + n, err := c.getContentNode(context, false, ref) + if err != nil || n == nil || n.p == nil { + return nil, err } - return pages + return n.p, nil } -func (c *PageCollections) findPagesByKind(kind string) page.Pages { - return c.findPagesByKindIn(kind, c.Pages()) -} +func (c *PageCollections) getSectionOrPage(ref string) (*contentNode, string) { + var n *contentNode -func (c *PageCollections) findWorkPagesByKind(kind string) pageStatePages { - var pages pageStatePages - for _, p := range c.workAllPages { - if p.Kind() == kind { - pages = append(pages, p) - } - } - return pages -} + s, v, found := c.pageMap.sections.LongestPrefix(ref) -func (*PageCollections) findPagesByKindInWorkPages(kind string, inPages pageStatePages) page.Pages { - var pages page.Pages - for _, p := range inPages { - if p.Kind() == kind { - pages = append(pages, p) - } + if found { + n = v.(*contentNode) } - return pages -} -func (c *PageCollections) addPage(page *pageState) { - c.rawAllPages = append(c.rawAllPages, page) -} - -func (c *PageCollections) removePageFilename(filename string) { - if i := c.rawAllPages.findPagePosByFilename(filename); i >= 0 { - c.clearResourceCacheForPage(c.rawAllPages[i]) - c.rawAllPages = append(c.rawAllPages[:i], c.rawAllPages[i+1:]...) + if found && s == ref { + // A section + return n, "" } -} + m := c.pageMap + filename := strings.TrimPrefix(strings.TrimPrefix(ref, s), "/") + langSuffix := "." + m.s.Lang() -func (c *PageCollections) removePage(page *pageState) { - if i := c.rawAllPages.findPagePos(page); i >= 0 { - c.clearResourceCacheForPage(c.rawAllPages[i]) - c.rawAllPages = append(c.rawAllPages[:i], c.rawAllPages[i+1:]...) - } -} + // Trim both extension and any language code. + name := helpers.PathNoExt(filename) + name = strings.TrimSuffix(name, langSuffix) -func (c *PageCollections) replacePage(page *pageState) { - // will find existing page that matches filepath and remove it - c.removePage(page) - c.addPage(page) -} + // These are reserved bundle names and will always be stored by their owning + // folder name. + name = strings.TrimSuffix(name, "/index") + name = strings.TrimSuffix(name, "/_index") -func (c *PageCollections) clearResourceCacheForPage(page *pageState) { - if len(page.resources) > 0 { - page.s.ResourceSpec.DeleteCacheByPrefix(page.targetPaths().SubResourceBaseTarget) + if !found { + return nil, name } -} -func (c *PageCollections) assemblePagesMap(s *Site) error { + // Check if it's a section with filename provided. + if !n.p.File().IsZero() && n.p.File().LogicalName() == filename { + return n, name + } - c.pagesMap = newPagesMap(s) + return m.getPage(s, name), name - rootSections := make(map[string]bool) +} - // Add all branch nodes first. - for _, p := range c.rawAllPages { - rootSections[p.Section()] = true - if p.IsPage() { - continue - } - c.pagesMap.addPage(p) +func (c *PageCollections) getContentNode(context page.Page, isReflink bool, ref string) (*contentNode, error) { + defer herrors.Recover() + ref = filepath.ToSlash(strings.ToLower(strings.TrimSpace(ref))) + if ref == "" { + ref = "/" } + inRef := ref - // Create missing home page and the first level sections if no - // _index provided. - s.home = c.pagesMap.getOrCreateHome() - for k := range rootSections { - c.pagesMap.createSectionIfNotExists(k) + var doSimpleLookup bool + if isReflink || context == nil { + // For Ref/Reflink and .Site.GetPage do simple name lookups for the potentially ambigous myarticle.md and /myarticle.md, + // but not when we get ./myarticle*, section/myarticle. + doSimpleLookup = ref[0] != '.' || ref[0] == '/' && strings.Count(ref, "/") == 1 } - // Attach the regular pages to their section. - for _, p := range c.rawAllPages { - if p.IsNode() { - continue + if context != nil && !strings.HasPrefix(ref, "/") { + // Try the page-relative path. + var base string + if context.File().IsZero() { + base = context.SectionsPath() + } else { + base = filepath.ToSlash(filepath.Dir(context.File().FileInfo().Meta().Path())) } - c.pagesMap.addPage(p) + ref = path.Join("/", strings.ToLower(base), ref) } - return nil -} - -func (c *PageCollections) createWorkAllPages() error { - c.workAllPages = make(pageStatePages, 0, len(c.rawAllPages)) - c.headlessPages = make(pageStatePages, 0) - - var ( - homeDates *resource.Dates - sectionDates *resource.Dates - siteLastmod time.Time - siteLastDate time.Time - - sectionsParamId = "mainSections" - sectionsParamIdLower = strings.ToLower(sectionsParamId) - ) - - mainSections, mainSectionsFound := c.pagesMap.s.Info.Params()[sectionsParamIdLower] - - var ( - bucketsToRemove []string - rootBuckets []*pagesMapBucket - walkErr error - ) - - c.pagesMap.r.Walk(func(s string, v interface{}) bool { - bucket := v.(*pagesMapBucket) - parentBucket := c.pagesMap.parentBucket(s) - - if parentBucket != nil { - - if !mainSectionsFound && strings.Count(s, "/") == 1 && bucket.owner.IsSection() { - // Root section - rootBuckets = append(rootBuckets, bucket) - } - } - - if bucket.owner.IsHome() { - if resource.IsZeroDates(bucket.owner) { - // Calculate dates from the page tree. - homeDates = &bucket.owner.m.Dates - } - } + if !strings.HasPrefix(ref, "/") { + ref = "/" + ref + } - sectionDates = nil - if resource.IsZeroDates(bucket.owner) { - sectionDates = &bucket.owner.m.Dates - } + m := c.pageMap - if parentBucket != nil { - bucket.parent = parentBucket - if bucket.owner.IsSection() { - parentBucket.bucketSections = append(parentBucket.bucketSections, bucket) - } - } + // It's either a section, a page in a section or a taxonomy node. + // Start with the most likely: + n, name := c.getSectionOrPage(ref) + if n != nil { + return n, nil + } - if bucket.isEmpty() { - if bucket.owner.IsSection() && bucket.owner.File().IsZero() { - // Check for any nested section. - var hasDescendant bool - c.pagesMap.r.WalkPrefix(s, func(ss string, v interface{}) bool { - if s != ss { - hasDescendant = true - return true - } - return false - }) - if !hasDescendant { - // This is an auto-created section with, now, nothing in it. - bucketsToRemove = append(bucketsToRemove, s) - return false - } - } + if !strings.HasPrefix(inRef, "/") { + // Many people will have "post/foo.md" in their content files. + if n, _ := c.getSectionOrPage("/" + inRef); n != nil { + return n, nil } + } - if !bucket.disabled { - c.workAllPages = append(c.workAllPages, bucket.owner) + // Check if it's a taxonomy node + s, v, found := m.taxonomies.LongestPrefix(ref) + if found { + if !m.onSameLevel(ref, s) { + return nil, nil } + return v.(*contentNode), nil + } - if !bucket.view { - for _, p := range bucket.headlessPages { - ps := p.(*pageState) - ps.parent = bucket.owner - c.headlessPages = append(c.headlessPages, ps) - } - for _, p := range bucket.pages { - ps := p.(*pageState) - ps.parent = bucket.owner - c.workAllPages = append(c.workAllPages, ps) - - if homeDates != nil { - homeDates.UpdateDateAndLastmodIfAfter(ps) - } - - if sectionDates != nil { - sectionDates.UpdateDateAndLastmodIfAfter(ps) - } - - if p.Lastmod().After(siteLastmod) { - siteLastmod = p.Lastmod() - } - if p.Date().After(siteLastDate) { - siteLastDate = p.Date() - } + getByName := func(s string) (*contentNode, error) { + n := m.pageReverseIndex.Get(s) + if n != nil { + if n == ambigousContentNode { + return nil, fmt.Errorf("page reference %q is ambiguous", ref) } + return n, nil } - return false - }) - - if walkErr != nil { - return walkErr + return nil, nil } - c.pagesMap.s.lastmod = siteLastmod - - if !mainSectionsFound { + var module string + if context != nil && !context.File().IsZero() { + module = context.File().FileInfo().Meta().Module() + } - // Calculare main section - var ( - maxRootBucketWeight int - maxRootBucket *pagesMapBucket - ) + if module == "" && !c.pageMap.s.home.File().IsZero() { + module = c.pageMap.s.home.File().FileInfo().Meta().Module() + } - for _, b := range rootBuckets { - weight := len(b.pages) + (len(b.bucketSections) * 5) - if weight >= maxRootBucketWeight { - maxRootBucket = b - maxRootBucketWeight = weight - } + if module != "" { + n, err := getByName(module + ref) + if err != nil { + return nil, err } - - if maxRootBucket != nil { - // Try to make this as backwards compatible as possible. - mainSections = []string{maxRootBucket.owner.Section()} + if n != nil { + return n, nil } } - c.pagesMap.s.Info.Params()[sectionsParamId] = mainSections - c.pagesMap.s.Info.Params()[sectionsParamIdLower] = mainSections - - for _, key := range bucketsToRemove { - c.pagesMap.r.Delete(key) + if !doSimpleLookup { + return nil, nil } - sort.Sort(c.workAllPages) + // Ref/relref supports this potentially ambigous lookup. + return getByName(name) + +} - return nil +func (*PageCollections) findPagesByKindIn(kind string, inPages page.Pages) page.Pages { + var pages page.Pages + for _, p := range inPages { + if p.Kind() == kind { + pages = append(pages, p) + } + } + return pages } |