// Copyright 2021 The Hugo Authors. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package hugolib import ( "context" "fmt" "os" "path/filepath" "strings" "sync" "sync/atomic" "time" "github.com/bep/logg" "github.com/gohugoio/hugo/common/hstrings" "github.com/gohugoio/hugo/common/paths" "github.com/gohugoio/hugo/common/rungroup" "github.com/spf13/afero" "github.com/gohugoio/hugo/source" "github.com/gohugoio/hugo/common/loggers" "github.com/gohugoio/hugo/hugofs" ) func newPagesCollector( ctx context.Context, h *HugoSites, sp *source.SourceSpec, logger loggers.Logger, infoLogger logg.LevelLogger, m *pageMap, ids []pathChange, ) *pagesCollector { return &pagesCollector{ ctx: ctx, h: h, fs: sp.BaseFs.Content.Fs, m: m, sp: sp, logger: logger, infoLogger: infoLogger, ids: ids, seenDirs: make(map[string]bool), } } type pagesCollector struct { ctx context.Context h *HugoSites sp *source.SourceSpec logger loggers.Logger infoLogger logg.LevelLogger m *pageMap fs afero.Fs // List of paths that have changed. Used in partial builds. ids []pathChange seenDirs map[string]bool g rungroup.Group[hugofs.FileMetaInfo] } // Collect collects content by walking the file system and storing // it in the content tree. // It may be restricted by filenames set on the collector (partial build). func (c *pagesCollector) Collect() (collectErr error) { var ( numWorkers = c.h.numWorkers numFilesProcessedTotal atomic.Uint64 numFilesProcessedLast uint64 fileBatchTimer = time.Now() fileBatchTimerMu sync.Mutex ) l := c.infoLogger.WithField("substep", "collect") logFilesProcessed := func(force bool) { fileBatchTimerMu.Lock() if force || time.Since(fileBatchTimer) > 3*time.Second { numFilesProcessedBatch := numFilesProcessedTotal.Load() - numFilesProcessedLast numFilesProcessedLast = numFilesProcessedTotal.Load() loggers.TimeTrackf(l, fileBatchTimer, logg.Fields{ logg.Field{Name: "files", Value: numFilesProcessedBatch}, logg.Field{Name: "files_total", Value: numFilesProcessedTotal.Load()}, }, "", ) fileBatchTimer = time.Now() } fileBatchTimerMu.Unlock() } defer func() { logFilesProcessed(true) }() c.g = rungroup.Run[hugofs.FileMetaInfo](c.ctx, rungroup.Config[hugofs.FileMetaInfo]{ NumWorkers: numWorkers, Handle: func(ctx context.Context, fi hugofs.FileMetaInfo) error { if err := c.m.AddFi(fi); err != nil { return hugofs.AddFileInfoToError(err, fi, c.fs) } numFilesProcessedTotal.Add(1) if numFilesProcessedTotal.Load()%1000 == 0 { logFilesProcessed(false) } return nil }, }) if c.ids == nil { // Collect everything. collectErr = c.collectDir(nil, false, nil) } else { for _, s := range c.h.Sites { s.pageMap.cfg.isRebuild = true } for _, id := range c.ids { if id.p.IsLeafBundle() { collectErr = c.collectDir( id.p, false, func(fim hugofs.FileMetaInfo) bool { return true }, ) } else if id.p.IsBranchBundle() { collectErr = c.collectDir( id.p, false, func(fim hugofs.FileMetaInfo) bool { if fim.IsDir() { return true } fimp := fim.Meta().PathInfo if fimp == nil { return false } return strings.HasPrefix(fimp.Path(), paths.AddTrailingSlash(id.p.Dir())) }, ) } else { // We always start from a directory. collectErr = c.collectDir(id.p, id.isDir, func(fim hugofs.FileMetaInfo) bool { if id.delete || id.isDir { if id.isDir && fim.Meta().PathInfo.IsLeafBundle() { return strings.HasPrefix(fim.Meta().PathInfo.Path(), paths.AddTrailingSlash(id.p.Path())) } return id.p.Dir() == fim.Meta().PathInfo.Dir() } if fim.Meta().PathInfo.IsLeafBundle() && id.p.BundleType() == paths.PathTypeContentSingle { return id.p.Dir() == fim.Meta().PathInfo.Dir() } return id.p.Path() == fim.Meta().PathInfo.Path() }) } if collectErr != nil { break } } } werr := c.g.Wait() if collectErr == nil { collectErr = werr } return } func (c *pagesCollector) collectDir(dirPath *paths.Path, isDir bool, inFilter func(fim hugofs.FileMetaInfo) bool) error { var dpath string if dirPath != nil { if isDir { dpath = filepath.FromSlash(dirPath.Unnormalized().Path()) } else { dpath = filepath.FromSlash(dirPath.Unnormalized().Dir()) } } if c.seenDirs[dpath] { return nil } c.seenDirs[dpath] = true root, err := c.fs.Stat(dpath) if err != nil { if os.IsNotExist(err) { return nil } return err } rootm := root.(hugofs.FileMetaInfo) if err := c.collectDirDir(dpath, rootm, inFilter); err != nil { return err } return nil } func (c *pagesCollector) collectDirDir(path string, root hugofs.FileMetaInfo, inFilter func(fim hugofs.FileMetaInfo) bool) error { filter := func(fim hugofs.FileMetaInfo) bool { if inFilter != nil { return inFilter(fim) } return true } preHook := func(dir hugofs.FileMetaInfo, path string, readdir []hugofs.FileMetaInfo) ([]hugofs.FileMetaInfo, error) { filtered := readdir[:0] for _, fi := range readdir { if filter(fi) { filtered = append(filtered, fi) } } readdir = filtered if len(readdir) == 0 { return nil, nil } // Pick the first regular file. var first hugofs.FileMetaInfo for _, fi := range readdir { if fi.IsDir() { continue } first = fi break } if first == nil { // Only dirs, keep walking. return readdir, nil } // Any bundle file will always be first. firstPi := first.Meta().PathInfo if firstPi == nil { panic(fmt.Sprintf("collectDirDir: no path info for %q", first.Meta().Filename)) } if firstPi.IsLeafBundle() { if err := c.handleBundleLeaf(dir, first, path, readdir); err != nil { return nil, err } return nil, filepath.SkipDir } seen := map[hstrings.Tuple]bool{} for _, fi := range readdir { if fi.IsDir() { continue } pi := fi.Meta().PathInfo meta := fi.Meta() // Filter out duplicate page or resource. // These would eventually have been filtered out as duplicates when // inserting them into the document store, // but doing it here will preserve a consistent ordering. baseLang := hstrings.Tuple{First: pi.Base(), Second: meta.Lang} if seen[baseLang] { continue } seen[baseLang] = true if pi == nil { panic(fmt.Sprintf("no path info for %q", meta.Filename)) } if meta.Lang == "" { panic("lang not set") } if err := c.g.Enqueue(fi); err != nil { return nil, err } } // Keep walking. return readdir, nil } var postHook hugofs.WalkHook wfn := func(path string, fi hugofs.FileMetaInfo) error { return nil } w := hugofs.NewWalkway( hugofs.WalkwayConfig{ Logger: c.logger, Root: path, Info: root, Fs: c.fs, IgnoreFile: c.h.SourceSpec.IgnoreFile, HookPre: preHook, HookPost: postHook, WalkFn: wfn, }) return w.Walk() } func (c *pagesCollector) handleBundleLeaf(dir, bundle hugofs.FileMetaInfo, inPath string, readdir []hugofs.FileMetaInfo) error { bundlePi := bundle.Meta().PathInfo seen := map[hstrings.Tuple]bool{} walk := func(path string, info hugofs.FileMetaInfo) error { if info.IsDir() { return nil } pi := info.Meta().PathInfo if info != bundle { // Everything inside a leaf bundle is a Resource, // even the content pages. // Note that we do allow index.md as page resources, but not in the bundle root. if !pi.IsLeafBundle() || pi.Dir() != bundlePi.Dir() { paths.ModifyPathBundleTypeResource(pi) } } // Filter out duplicate page or resource. // These would eventually have been filtered out as duplicates when // inserting them into the document store, // but doing it here will preserve a consistent ordering. baseLang := hstrings.Tuple{First: pi.Base(), Second: info.Meta().Lang} if seen[baseLang] { return nil } seen[baseLang] = true return c.g.Enqueue(info) } // Start a new walker from the given path. w := hugofs.NewWalkway( hugofs.WalkwayConfig{ Root: inPath, Fs: c.fs, Logger: c.logger, Info: dir, DirEntries: readdir, IgnoreFile: c.h.SourceSpec.IgnoreFile, WalkFn: walk, }) return w.Walk() }