diff options
Diffstat (limited to 'hugolib/pages_capture.go')
-rw-r--r-- | hugolib/pages_capture.go | 673 |
1 files changed, 236 insertions, 437 deletions
diff --git a/hugolib/pages_capture.go b/hugolib/pages_capture.go index c57c707de..acdc674e6 100644 --- a/hugolib/pages_capture.go +++ b/hugolib/pages_capture.go @@ -1,4 +1,4 @@ -// Copyright 2019 The Hugo Authors. All rights reserved. +// Copyright 2021 The Hugo Authors. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -15,190 +15,188 @@ package hugolib import ( "context" + "errors" "fmt" - pth "path" + "os" "path/filepath" - "reflect" - - "github.com/gohugoio/hugo/common/herrors" - "github.com/gohugoio/hugo/common/loggers" - "github.com/gohugoio/hugo/common/maps" - + "strings" + "sync" + "sync/atomic" + "time" + + "github.com/bep/logg" + "github.com/gohugoio/hugo/common/paths" + "github.com/gohugoio/hugo/common/rungroup" + "github.com/gohugoio/hugo/helpers" "github.com/gohugoio/hugo/parser/pageparser" - - "github.com/gohugoio/hugo/hugofs/files" + "github.com/spf13/afero" "github.com/gohugoio/hugo/source" + "github.com/gohugoio/hugo/common/loggers" "github.com/gohugoio/hugo/hugofs" - "github.com/spf13/afero" -) - -const ( - walkIsRootFileMetaKey = "walkIsRootFileMetaKey" ) func newPagesCollector( + ctx context.Context, + h *HugoSites, sp *source.SourceSpec, - contentMap *pageMaps, logger loggers.Logger, - contentTracker *contentChangeMap, - proc pagesCollectorProcessorProvider, filenames ...string) *pagesCollector { + infoLogger logg.LevelLogger, + m *pageMap, + ids []pathChange, +) *pagesCollector { return &pagesCollector{ - fs: sp.SourceFs, - contentMap: contentMap, - proc: proc, + ctx: ctx, + h: h, + fs: sp.BaseFs.Content.Fs, + m: m, sp: sp, logger: logger, - filenames: filenames, - tracker: contentTracker, + infoLogger: infoLogger, + ids: ids, + seenDirs: make(map[string]bool), } } -type contentDirKey struct { - dirname string - filename string - tp bundleDirType -} - -type fileinfoBundle struct { - header hugofs.FileMetaInfo - resources []hugofs.FileMetaInfo -} - -func (b *fileinfoBundle) containsResource(name string) bool { - for _, r := range b.resources { - if r.Name() == name { - return true - } - } - - return false -} - -type pageBundles map[string]*fileinfoBundle - type pagesCollector struct { - sp *source.SourceSpec - fs afero.Fs - logger loggers.Logger + ctx context.Context + h *HugoSites + sp *source.SourceSpec + logger loggers.Logger + infoLogger logg.LevelLogger - contentMap *pageMaps + m *pageMap - // Ordered list (bundle headers first) used in partial builds. - filenames []string + fs afero.Fs - // Content files tracker used in partial builds. - tracker *contentChangeMap + // List of paths that have changed. Used in partial builds. + ids []pathChange + seenDirs map[string]bool - proc pagesCollectorProcessorProvider + g rungroup.Group[hugofs.FileMetaInfo] } -// isCascadingEdit returns whether the dir represents a cascading edit. -// That is, if a front matter cascade section is removed, added or edited. -// If this is the case we must re-evaluate its descendants. -func (c *pagesCollector) isCascadingEdit(dir contentDirKey) (bool, string) { - // This is either a section or a taxonomy node. Find it. - prefix := cleanTreeKey(dir.dirname) - - section := "/" - var isCascade bool - - c.contentMap.walkBranchesPrefix(prefix, func(s string, n *contentNode) bool { - if n.fi == nil || dir.filename != n.fi.Meta().Filename { - return false - } - - f, err := n.fi.Meta().Open() - if err != nil { - // File may have been removed, assume a cascading edit. - // Some false positives is not too bad. - isCascade = true - return true - } - - pf, err := pageparser.ParseFrontMatterAndContent(f) - f.Close() - if err != nil { - isCascade = true - return true - } - - if n.p == nil || n.p.bucket == nil { - return true - } +func (c *pagesCollector) copyFile(fim hugofs.FileMetaInfo) error { + meta := fim.Meta() + f, err := meta.Open() + if err != nil { + return fmt.Errorf("copyFile: failed to open: %w", err) + } - section = s + s := c.m.s - maps.PrepareParams(pf.FrontMatter) - cascade1, ok := pf.FrontMatter["cascade"] - hasCascade := n.p.bucket.cascade != nil && len(n.p.bucket.cascade) > 0 - if !ok { - isCascade = hasCascade + target := filepath.Join(s.PathSpec.GetTargetLanguageBasePath(), meta.PathInfo.Path()) - return true - } - - if !hasCascade { - isCascade = true - return true - } + defer f.Close() - for _, v := range n.p.bucket.cascade { - isCascade = !reflect.DeepEqual(cascade1, v) - if isCascade { - break - } - } + fs := s.PublishFsStatic - return true - }) + s.PathSpec.ProcessingStats.Incr(&s.PathSpec.ProcessingStats.Files) - return isCascade, section + return helpers.WriteToDisk(filepath.Clean(target), f, fs) } -// Collect. +// Collect collects content by walking the file system and storing +// it in the content tree. +// It may be restricted by filenames set on the collector (partial build). func (c *pagesCollector) Collect() (collectErr error) { - c.proc.Start(context.Background()) + var ( + numWorkers = c.h.numWorkers + numFilesProcessedTotal atomic.Uint64 + numFilesProcessedLast uint64 + fileBatchTimer = time.Now() + fileBatchTimerMu sync.Mutex + ) + + l := c.infoLogger.WithField("substep", "collect") + + logFilesProcessed := func(force bool) { + fileBatchTimerMu.Lock() + if force || time.Since(fileBatchTimer) > 3*time.Second { + numFilesProcessedBatch := numFilesProcessedTotal.Load() - numFilesProcessedLast + numFilesProcessedLast = numFilesProcessedTotal.Load() + loggers.TimeTrackf(l, fileBatchTimer, + logg.Fields{ + logg.Field{Name: "files", Value: numFilesProcessedBatch}, + logg.Field{Name: "files_total", Value: numFilesProcessedTotal.Load()}, + }, + "", + ) + fileBatchTimer = time.Now() + } + fileBatchTimerMu.Unlock() + } + defer func() { - err := c.proc.Wait() - if collectErr == nil { - collectErr = err - } + logFilesProcessed(true) }() - if len(c.filenames) == 0 { - // Collect everything. - collectErr = c.collectDir("", false, nil) - } else { - for _, pm := range c.contentMap.pmaps { - pm.cfg.isRebuild = true - } - dirs := make(map[contentDirKey]bool) - for _, filename := range c.filenames { - dir, btype := c.tracker.resolveAndRemove(filename) - dirs[contentDirKey{dir, filename, btype}] = true - } - - for dir := range dirs { - for _, pm := range c.contentMap.pmaps { - pm.s.ResourceSpec.DeleteBySubstring(dir.dirname) + c.g = rungroup.Run[hugofs.FileMetaInfo](c.ctx, rungroup.Config[hugofs.FileMetaInfo]{ + NumWorkers: numWorkers, + Handle: func(ctx context.Context, fi hugofs.FileMetaInfo) error { + if err := c.m.AddFi(fi); err != nil { + if errors.Is(err, pageparser.ErrPlainHTMLDocumentsNotSupported) { + // Reclassify this as a static file. + if err := c.copyFile(fi); err != nil { + return err + } + } else { + return hugofs.AddFileInfoToError(err, fi, c.fs) + } } + numFilesProcessedTotal.Add(1) + if numFilesProcessedTotal.Load()%1000 == 0 { + logFilesProcessed(false) + } + return nil + }, + }) - switch dir.tp { - case bundleLeaf: - collectErr = c.collectDir(dir.dirname, true, nil) - case bundleBranch: - isCascading, section := c.isCascadingEdit(dir) - - if isCascading { - c.contentMap.deleteSection(section) - } - collectErr = c.collectDir(dir.dirname, !isCascading, nil) - default: + if c.ids == nil { + // Collect everything. + collectErr = c.collectDir(nil, false, nil) + } else { + for _, s := range c.h.Sites { + s.pageMap.cfg.isRebuild = true + } + + for _, id := range c.ids { + if id.p.IsLeafBundle() { + collectErr = c.collectDir( + id.p, + false, + func(fim hugofs.FileMetaInfo) bool { + return true + }, + ) + } else if id.p.IsBranchBundle() { + collectErr = c.collectDir( + id.p, + false, + func(fim hugofs.FileMetaInfo) bool { + if fim.IsDir() { + return true + } + fimp := fim.Meta().PathInfo + if fimp == nil { + return false + } + + return strings.HasPrefix(fimp.Path(), paths.AddTrailingSlash(id.p.Dir())) + }, + ) + } else { // We always start from a directory. - collectErr = c.collectDir(dir.dirname, true, func(fim hugofs.FileMetaInfo) bool { - return dir.filename == fim.Meta().Filename + collectErr = c.collectDir(id.p, id.isDir, func(fim hugofs.FileMetaInfo) bool { + if id.delete || id.isDir { + if id.isDir { + return strings.HasPrefix(fim.Meta().PathInfo.Path(), paths.AddTrailingSlash(id.p.Path())) + } + + return id.p.Dir() == fim.Meta().PathInfo.Dir() + } + return id.p.Path() == fim.Meta().PathInfo.Path() }) } @@ -209,160 +207,51 @@ func (c *pagesCollector) Collect() (collectErr error) { } - return -} - -func (c *pagesCollector) isBundleHeader(fi hugofs.FileMetaInfo) bool { - class := fi.Meta().Classifier - return class == files.ContentClassLeaf || class == files.ContentClassBranch -} - -func (c *pagesCollector) getLang(fi hugofs.FileMetaInfo) string { - lang := fi.Meta().Lang - if lang != "" { - return lang - } - return c.sp.Cfg.DefaultContentLanguage() -} - -func (c *pagesCollector) addToBundle(info hugofs.FileMetaInfo, btyp bundleDirType, bundles pageBundles) error { - getBundle := func(lang string) *fileinfoBundle { - return bundles[lang] + werr := c.g.Wait() + if collectErr == nil { + collectErr = werr } - cloneBundle := func(lang string) *fileinfoBundle { - // Every bundled content file needs a content file header. - // Use the default content language if found, else just - // pick one. - var ( - source *fileinfoBundle - found bool - ) - - source, found = bundles[c.sp.Cfg.DefaultContentLanguage()] - if !found { - for _, b := range bundles { - source = b - break - } - } - - if source == nil { - panic(fmt.Sprintf("no source found, %d", len(bundles))) - } - - clone := c.cloneFileInfo(source.header) - clone.Meta().Lang = lang - - return &fileinfoBundle{ - header: clone, - } - } + return +} - lang := c.getLang(info) - bundle := getBundle(lang) - isBundleHeader := c.isBundleHeader(info) - if bundle != nil && isBundleHeader { - // index.md file inside a bundle, see issue 6208. - info.Meta().Classifier = files.ContentClassContent - isBundleHeader = false - } - classifier := info.Meta().Classifier - isContent := classifier == files.ContentClassContent - if bundle == nil { - if isBundleHeader { - bundle = &fileinfoBundle{header: info} - bundles[lang] = bundle +func (c *pagesCollector) collectDir(dirPath *paths.Path, isDir bool, inFilter func(fim hugofs.FileMetaInfo) bool) error { + var dpath string + if dirPath != nil { + if isDir { + dpath = filepath.FromSlash(dirPath.Path()) } else { - if btyp == bundleBranch { - // No special logic for branch bundles. - // Every language needs its own _index.md file. - // Also, we only clone bundle headers for lonesome, bundled, - // content files. - return c.handleFiles(info) - } - - if isContent { - bundle = cloneBundle(lang) - bundles[lang] = bundle - } + dpath = filepath.FromSlash(dirPath.Dir()) } } - if !isBundleHeader && bundle != nil { - bundle.resources = append(bundle.resources, info) - } - - if classifier == files.ContentClassFile { - translations := info.Meta().Translations - - for lang, b := range bundles { - if !stringSliceContains(lang, translations...) && !b.containsResource(info.Name()) { - - // Clone and add it to the bundle. - clone := c.cloneFileInfo(info) - clone.Meta().Lang = lang - b.resources = append(b.resources, clone) - } - } + if c.seenDirs[dpath] { + return nil } + c.seenDirs[dpath] = true - return nil -} - -func (c *pagesCollector) cloneFileInfo(fi hugofs.FileMetaInfo) hugofs.FileMetaInfo { - return hugofs.NewFileMetaInfo(fi, hugofs.NewFileMeta()) -} - -func (c *pagesCollector) collectDir(dirname string, partial bool, inFilter func(fim hugofs.FileMetaInfo) bool) error { - fi, err := c.fs.Stat(dirname) + root, err := c.fs.Stat(dpath) if err != nil { - if herrors.IsNotExist(err) { - // May have been deleted. + if os.IsNotExist(err) { return nil } return err } - handleDir := func( - btype bundleDirType, - dir hugofs.FileMetaInfo, - path string, - readdir []hugofs.FileMetaInfo) error { - if btype > bundleNot && c.tracker != nil { - c.tracker.add(path, btype) - } - - if btype == bundleBranch { - if err := c.handleBundleBranch(readdir); err != nil { - return err - } - // A branch bundle is only this directory level, so keep walking. - return nil - } else if btype == bundleLeaf { - if err := c.handleBundleLeaf(dir, path, readdir); err != nil { - return err - } - - return nil - } - - if err := c.handleFiles(readdir...); err != nil { - return err - } + rootm := root.(hugofs.FileMetaInfo) - return nil + if err := c.collectDirDir(dpath, rootm, inFilter); err != nil { + return err } - filter := func(fim hugofs.FileMetaInfo) bool { - if fim.Meta().SkipDir { - return false - } + return nil +} +func (c *pagesCollector) collectDirDir(path string, root hugofs.FileMetaInfo, inFilter func(fim hugofs.FileMetaInfo) bool) error { + filter := func(fim hugofs.FileMetaInfo) bool { if c.sp.IgnoreFile(fim.Meta().Filename) { return false } - if inFilter != nil { return inFilter(fim) } @@ -370,83 +259,63 @@ func (c *pagesCollector) collectDir(dirname string, partial bool, inFilter func( } preHook := func(dir hugofs.FileMetaInfo, path string, readdir []hugofs.FileMetaInfo) ([]hugofs.FileMetaInfo, error) { - var btype bundleDirType - filtered := readdir[:0] for _, fi := range readdir { if filter(fi) { filtered = append(filtered, fi) - - if c.tracker != nil { - // Track symlinks. - c.tracker.addSymbolicLinkMapping(fi) - } } } - walkRoot := dir.Meta().IsRootFile readdir = filtered + if len(readdir) == 0 { + return nil, nil + } - // We merge language directories, so there can be duplicates, but they - // will be ordered, most important first. - var duplicates []int - seen := make(map[string]bool) - - for i, fi := range readdir { - + // Pick the first regular file. + var first hugofs.FileMetaInfo + for _, fi := range readdir { if fi.IsDir() { continue } + first = fi + break + } - meta := fi.Meta() - meta.IsRootFile = walkRoot - class := meta.Classifier - translationBase := meta.TranslationBaseNameWithExt - key := pth.Join(meta.Lang, translationBase) - - if seen[key] { - duplicates = append(duplicates, i) - continue - } - seen[key] = true + if first == nil { + // Only dirs, keep walking. + return readdir, nil + } - var thisBtype bundleDirType + // Any bundle file will always be first. + firstPi := first.Meta().PathInfo + if firstPi == nil { + panic(fmt.Sprintf("collectDirDir: no path info for %q", first.Meta().Filename)) + } - switch class { - case files.ContentClassLeaf: - thisBtype = bundleLeaf - case files.ContentClassBranch: - thisBtype = bundleBranch + if firstPi.IsLeafBundle() { + if err := c.handleBundleLeaf(dir, first, path, readdir); err != nil { + return nil, err } + return nil, filepath.SkipDir + } - // Folders with both index.md and _index.md type of files have - // undefined behaviour and can never work. - // The branch variant will win because of sort order, but log - // a warning about it. - if thisBtype > bundleNot && btype > bundleNot && thisBtype != btype { - c.logger.Warnf("Content directory %q have both index.* and _index.* files, pick one.", dir.Meta().Filename) - // Reclassify it so it will be handled as a content file inside the - // section, which is in line with the <= 0.55 behaviour. - meta.Classifier = files.ContentClassContent - } else if thisBtype > bundleNot { - btype = thisBtype + for _, fi := range readdir { + if fi.IsDir() { + continue } - } - - if len(duplicates) > 0 { - for i := len(duplicates) - 1; i >= 0; i-- { - idx := duplicates[i] - readdir = append(readdir[:idx], readdir[idx+1:]...) + meta := fi.Meta() + pi := meta.PathInfo + if pi == nil { + panic(fmt.Sprintf("no path info for %q", meta.Filename)) } - } - err := handleDir(btype, dir, path, readdir) - if err != nil { - return nil, err - } + if meta.Lang == "" { + panic("lang not set") + } - if btype == bundleLeaf || partial { - return nil, filepath.SkipDir + if err := c.g.Enqueue(fi); err != nil { + return nil, err + } } // Keep walking. @@ -454,126 +323,56 @@ func (c *pagesCollector) collectDir(dirname string, partial bool, inFilter func( } var postHook hugofs.WalkHook - if c.tracker != nil { - postHook = func(dir hugofs.FileMetaInfo, path string, readdir []hugofs.FileMetaInfo) ([]hugofs.FileMetaInfo, error) { - if c.tracker == nil { - // Nothing to do. - return readdir, nil - } - - return readdir, nil - } - } - - wfn := func(path string, info hugofs.FileMetaInfo, err error) error { - if err != nil { - return err - } + wfn := func(path string, fi hugofs.FileMetaInfo) error { return nil } - fim := fi.(hugofs.FileMetaInfo) - // Make sure the pages in this directory gets re-rendered, - // even in fast render mode. - fim.Meta().IsRootFile = true - - w := hugofs.NewWalkway(hugofs.WalkwayConfig{ - Fs: c.fs, - Logger: c.logger, - Root: dirname, - Info: fim, - HookPre: preHook, - HookPost: postHook, - WalkFn: wfn, - }) + w := hugofs.NewWalkway( + hugofs.WalkwayConfig{ + Logger: c.logger, + Root: path, + Info: root, + Fs: c.fs, + HookPre: preHook, + HookPost: postHook, + WalkFn: wfn, + }) return w.Walk() } -func (c *pagesCollector) handleBundleBranch(readdir []hugofs.FileMetaInfo) error { - // Maps bundles to its language. - bundles := pageBundles{} - - var contentFiles []hugofs.FileMetaInfo - - for _, fim := range readdir { - - if fim.IsDir() { - continue +func (c *pagesCollector) handleBundleLeaf(dir, bundle hugofs.FileMetaInfo, inPath string, readdir []hugofs.FileMetaInfo) error { + bundlePi := bundle.Meta().PathInfo + walk := func(path string, info hugofs.FileMetaInfo) error { + if info.IsDir() { + return nil } - meta := fim.Meta() + pi := info.Meta().PathInfo - switch meta.Classifier { - case files.ContentClassContent: - contentFiles = append(contentFiles, fim) - default: - if err := c.addToBundle(fim, bundleBranch, bundles); err != nil { - return err + if info != bundle { + // Everything inside a leaf bundle is a Resource, + // even the content pages. + // Note that we do allow index.md as page resources, but not in the bundle root. + if !pi.IsLeafBundle() || pi.Dir() != bundlePi.Dir() { + paths.ModifyPathBundleTypeResource(pi) } } - } - - // Make sure the section is created before its pages. - if err := c.proc.Process(bundles); err != nil { - return err - } - - return c.handleFiles(contentFiles...) -} - -func (c *pagesCollector) handleBundleLeaf(dir hugofs.FileMetaInfo, path string, readdir []hugofs.FileMetaInfo) error { - // Maps bundles to its language. - bundles := pageBundles{} - - walk := func(path string, info hugofs.FileMetaInfo, err error) error { - if err != nil { - return err - } - if info.IsDir() { - return nil - } - - return c.addToBundle(info, bundleLeaf, bundles) + return c.g.Enqueue(info) } // Start a new walker from the given path. - w := hugofs.NewWalkway(hugofs.WalkwayConfig{ - Root: path, - Fs: c.fs, - Logger: c.logger, - Info: dir, - DirEntries: readdir, - WalkFn: walk, - }) - - if err := w.Walk(); err != nil { - return err - } - - return c.proc.Process(bundles) -} - -func (c *pagesCollector) handleFiles(fis ...hugofs.FileMetaInfo) error { - for _, fi := range fis { - if fi.IsDir() { - continue - } + w := hugofs.NewWalkway( + hugofs.WalkwayConfig{ + Root: inPath, + Fs: c.fs, + Logger: c.logger, + Info: dir, + DirEntries: readdir, + WalkFn: walk, + }) - if err := c.proc.Process(fi); err != nil { - return err - } - } - return nil -} - -func stringSliceContains(k string, values ...string) bool { - for _, v := range values { - if k == v { - return true - } - } - return false + return w.Walk() } |