summaryrefslogtreecommitdiffstats
path: root/hugolib/pages_capture.go
diff options
context:
space:
mode:
Diffstat (limited to 'hugolib/pages_capture.go')
-rw-r--r--hugolib/pages_capture.go779
1 files changed, 779 insertions, 0 deletions
diff --git a/hugolib/pages_capture.go b/hugolib/pages_capture.go
new file mode 100644
index 000000000..094933793
--- /dev/null
+++ b/hugolib/pages_capture.go
@@ -0,0 +1,779 @@
+// Copyright 2019 The Hugo Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package hugolib
+
+import (
+ "context"
+ "fmt"
+ "os"
+ pth "path"
+ "path/filepath"
+ "strings"
+
+ "github.com/gohugoio/hugo/config"
+
+ "github.com/gohugoio/hugo/hugofs/files"
+
+ "github.com/gohugoio/hugo/resources"
+
+ "github.com/pkg/errors"
+ "golang.org/x/sync/errgroup"
+
+ "github.com/gohugoio/hugo/common/hugio"
+
+ "github.com/gohugoio/hugo/resources/resource"
+
+ "github.com/gohugoio/hugo/source"
+
+ "github.com/gohugoio/hugo/hugofs"
+
+ "github.com/gohugoio/hugo/common/loggers"
+ "github.com/spf13/afero"
+)
+
+func newPagesCollector(
+ sp *source.SourceSpec,
+ logger *loggers.Logger,
+ contentTracker *contentChangeMap,
+ proc pagesCollectorProcessorProvider, filenames ...string) *pagesCollector {
+
+ return &pagesCollector{
+ fs: sp.SourceFs,
+ proc: proc,
+ sp: sp,
+ logger: logger,
+ filenames: filenames,
+ tracker: contentTracker,
+ }
+}
+
+func newPagesProcessor(h *HugoSites, sp *source.SourceSpec, partialBuild bool) *pagesProcessor {
+
+ return &pagesProcessor{
+ h: h,
+ sp: sp,
+ partialBuild: partialBuild,
+ numWorkers: config.GetNumWorkerMultiplier() * 3,
+ }
+}
+
+type fileinfoBundle struct {
+ header hugofs.FileMetaInfo
+ resources []hugofs.FileMetaInfo
+}
+
+func (b *fileinfoBundle) containsResource(name string) bool {
+ for _, r := range b.resources {
+ if r.Name() == name {
+ return true
+ }
+ }
+
+ return false
+
+}
+
+type pageBundles map[string]*fileinfoBundle
+
+type pagesCollector struct {
+ sp *source.SourceSpec
+ fs afero.Fs
+ logger *loggers.Logger
+
+ // Ordered list (bundle headers first) used in partial builds.
+ filenames []string
+
+ // Content files tracker used in partial builds.
+ tracker *contentChangeMap
+
+ proc pagesCollectorProcessorProvider
+}
+
+type contentDirKey struct {
+ dirname string
+ filename string
+ tp bundleDirType
+}
+
+// Collect.
+func (c *pagesCollector) Collect() error {
+ c.proc.Start(context.Background())
+ if c.tracker != nil {
+ c.tracker.start()
+ defer c.tracker.stop()
+ }
+
+ var collectErr error
+ if len(c.filenames) == 0 {
+ // Collect everything.
+ collectErr = c.collectDir("", false, nil)
+ } else {
+ dirs := make(map[contentDirKey]bool)
+ for _, filename := range c.filenames {
+ dir, filename, btype := c.tracker.resolveAndRemove(filename)
+ dirs[contentDirKey{dir, filename, btype}] = true
+ }
+
+ for dir, _ := range dirs {
+ switch dir.tp {
+ case bundleLeaf, bundleBranch:
+ collectErr = c.collectDir(dir.dirname, true, nil)
+ default:
+ // We always start from a directory.
+ collectErr = c.collectDir(dir.dirname, true, func(fim hugofs.FileMetaInfo) bool {
+ return strings.HasSuffix(dir.filename, fim.Meta().Path())
+ })
+ }
+
+ if collectErr != nil {
+ break
+ }
+ }
+
+ }
+
+ err := c.proc.Wait()
+
+ if collectErr != nil {
+ return collectErr
+ }
+
+ return err
+}
+
+func (c *pagesCollector) collectDir(dirname string, partial bool, inFilter func(fim hugofs.FileMetaInfo) bool) error {
+ fi, err := c.fs.Stat(dirname)
+ if err != nil {
+ if os.IsNotExist(err) {
+ // May have been deleted.
+ return nil
+ }
+ return err
+ }
+
+ handleDir := func(
+ btype bundleDirType,
+ dir hugofs.FileMetaInfo,
+ path string,
+ readdir []hugofs.FileMetaInfo) error {
+
+ if btype > bundleNot && c.tracker != nil {
+ c.tracker.add(path, btype)
+ }
+
+ if btype == bundleBranch {
+ if err := c.handleBundleBranch(readdir); err != nil {
+ return err
+ }
+ // A branch bundle is only this directory level, so keep walking.
+ return nil
+ } else if btype == bundleLeaf {
+ if err := c.handleBundleLeaf(dir, path, readdir); err != nil {
+ return err
+ }
+
+ return nil
+ }
+
+ if err := c.handleFiles(readdir...); err != nil {
+ return err
+ }
+
+ return nil
+
+ }
+
+ filter := func(fim hugofs.FileMetaInfo) bool {
+ if fim.Meta().SkipDir() {
+ return false
+ }
+
+ if c.sp.IgnoreFile(fim.Meta().Filename()) {
+ return false
+ }
+
+ if inFilter != nil {
+ return inFilter(fim)
+ }
+ return true
+ }
+
+ preHook := func(dir hugofs.FileMetaInfo, path string, readdir []hugofs.FileMetaInfo) ([]hugofs.FileMetaInfo, error) {
+ var btype bundleDirType
+
+ filtered := readdir[:0]
+ for _, fi := range readdir {
+ if filter(fi) {
+ filtered = append(filtered, fi)
+ if c.tracker != nil {
+ // Track symlinks.
+ c.tracker.addSymbolicLinkMapping(fi)
+ }
+ }
+ }
+ readdir = filtered
+
+ // We merge language directories, so there can be duplicates, but they
+ // will be ordered, most important first.
+ var duplicates []int
+ seen := make(map[string]bool)
+
+ for i, fi := range readdir {
+
+ if fi.IsDir() {
+ continue
+ }
+
+ meta := fi.Meta()
+ class := meta.Classifier()
+ translationBase := meta.TranslationBaseNameWithExt()
+ key := pth.Join(meta.Lang(), translationBase)
+
+ if seen[key] {
+ duplicates = append(duplicates, i)
+ continue
+ }
+ seen[key] = true
+
+ var thisBtype bundleDirType
+
+ switch class {
+ case files.ContentClassLeaf:
+ thisBtype = bundleLeaf
+ case files.ContentClassBranch:
+ thisBtype = bundleBranch
+ }
+
+ // Folders with both index.md and _index.md type of files have
+ // undefined behaviour and can never work.
+ // The branch variant will win because of sort order, but log
+ // a warning about it.
+ if thisBtype > bundleNot && btype > bundleNot && thisBtype != btype {
+ c.logger.WARN.Printf("Content directory %q have both index.* and _index.* files, pick one.", dir.Meta().Filename())
+ // Reclassify it so it will be handled as a content file inside the
+ // section, which is in line with the <= 0.55 behaviour.
+ meta["classifier"] = files.ContentClassContent
+ } else if thisBtype > bundleNot {
+ btype = thisBtype
+ }
+
+ }
+
+ if len(duplicates) > 0 {
+ for i := len(duplicates) - 1; i >= 0; i-- {
+ idx := duplicates[i]
+ readdir = append(readdir[:idx], readdir[idx+1:]...)
+ }
+ }
+
+ err := handleDir(btype, dir, path, readdir)
+ if err != nil {
+ return nil, err
+ }
+
+ if btype == bundleLeaf || partial {
+ return nil, filepath.SkipDir
+ }
+
+ // Keep walking.
+ return readdir, nil
+
+ }
+
+ var postHook hugofs.WalkHook
+ if c.tracker != nil {
+ postHook = func(dir hugofs.FileMetaInfo, path string, readdir []hugofs.FileMetaInfo) ([]hugofs.FileMetaInfo, error) {
+ if c.tracker == nil {
+ // Nothing to do.
+ return readdir, nil
+ }
+
+ return readdir, nil
+ }
+ }
+
+ wfn := func(path string, info hugofs.FileMetaInfo, err error) error {
+ if err != nil {
+ return err
+ }
+
+ return nil
+ }
+
+ w := hugofs.NewWalkway(hugofs.WalkwayConfig{
+ Fs: c.fs,
+ Logger: c.logger,
+ Root: dirname,
+ Info: fi.(hugofs.FileMetaInfo),
+ HookPre: preHook,
+ HookPost: postHook,
+ WalkFn: wfn})
+
+ return w.Walk()
+
+}
+
+func (c *pagesCollector) isBundleHeader(fi hugofs.FileMetaInfo) bool {
+ class := fi.Meta().Classifier()
+ return class == files.ContentClassLeaf || class == files.ContentClassBranch
+}
+
+func (c *pagesCollector) getLang(fi hugofs.FileMetaInfo) string {
+ lang := fi.Meta().Lang()
+ if lang != "" {
+ return lang
+ }
+
+ return c.sp.DefaultContentLanguage
+}
+
+func (c *pagesCollector) addToBundle(info hugofs.FileMetaInfo, btyp bundleDirType, bundles pageBundles) error {
+ getBundle := func(lang string) *fileinfoBundle {
+ return bundles[lang]
+ }
+
+ cloneBundle := func(lang string) *fileinfoBundle {
+ // Every bundled file needs a content file header.
+ // Use the default content language if found, else just
+ // pick one.
+ var (
+ source *fileinfoBundle
+ found bool
+ )
+
+ source, found = bundles[c.sp.DefaultContentLanguage]
+ if !found {
+ for _, b := range bundles {
+ source = b
+ break
+ }
+ }
+
+ if source == nil {
+ panic(fmt.Sprintf("no source found, %d", len(bundles)))
+ }
+
+ clone := c.cloneFileInfo(source.header)
+ clone.Meta()["lang"] = lang
+
+ return &fileinfoBundle{
+ header: clone,
+ }
+ }
+
+ lang := c.getLang(info)
+ bundle := getBundle(lang)
+ isBundleHeader := c.isBundleHeader(info)
+ classifier := info.Meta().Classifier()
+ if bundle == nil {
+ if isBundleHeader {
+ bundle = &fileinfoBundle{header: info}
+ bundles[lang] = bundle
+ } else {
+ if btyp == bundleBranch {
+ // No special logic for branch bundles.
+ // Every language needs its own _index.md file.
+ return c.handleFiles(info)
+ }
+
+ bundle = cloneBundle(lang)
+ bundles[lang] = bundle
+ }
+ }
+
+ if !isBundleHeader {
+ bundle.resources = append(bundle.resources, info)
+ }
+
+ if classifier == files.ContentClassFile {
+ translations := info.Meta().Translations()
+ if len(translations) < len(bundles) {
+ for lang, b := range bundles {
+ if !stringSliceContains(lang, translations...) && !b.containsResource(info.Name()) {
+ // Clone and add it to the bundle.
+ clone := c.cloneFileInfo(info)
+ clone.Meta()["lang"] = lang
+ b.resources = append(b.resources, clone)
+ }
+ }
+ }
+ }
+
+ return nil
+}
+
+func (c *pagesCollector) cloneFileInfo(fi hugofs.FileMetaInfo) hugofs.FileMetaInfo {
+ cm := hugofs.FileMeta{}
+ meta := fi.Meta()
+ if meta == nil {
+ panic(fmt.Sprintf("not meta: %v", fi.Name()))
+ }
+ for k, v := range meta {
+ cm[k] = v
+ }
+
+ return hugofs.NewFileMetaInfo(fi, cm)
+}
+
+func (c *pagesCollector) handleBundleBranch(readdir []hugofs.FileMetaInfo) error {
+
+ // Maps bundles to its language.
+ bundles := pageBundles{}
+
+ for _, fim := range readdir {
+
+ if fim.IsDir() {
+ continue
+ }
+
+ meta := fim.Meta()
+
+ switch meta.Classifier() {
+ case files.ContentClassContent:
+ if err := c.handleFiles(fim); err != nil {
+ return err
+ }
+ default:
+ if err := c.addToBundle(fim, bundleBranch, bundles); err != nil {
+ return err
+ }
+ }
+
+ }
+
+ return c.proc.Process(bundles)
+
+}
+
+func (c *pagesCollector) handleBundleLeaf(dir hugofs.FileMetaInfo, path string, readdir []hugofs.FileMetaInfo) error {
+ // Maps bundles to its language.
+ bundles := pageBundles{}
+
+ walk := func(path string, info hugofs.FileMetaInfo, err error) error {
+ if err != nil {
+ return err
+ }
+ if info.IsDir() {
+ return nil
+ }
+
+ return c.addToBundle(info, bundleLeaf, bundles)
+
+ }
+
+ // Start a new walker from the given path.
+ w := hugofs.NewWalkway(hugofs.WalkwayConfig{
+ Root: path,
+ Fs: c.fs,
+ Logger: c.logger,
+ Info: dir,
+ DirEntries: readdir,
+ WalkFn: walk})
+
+ if err := w.Walk(); err != nil {
+ return err
+ }
+
+ return c.proc.Process(bundles)
+
+}
+
+func (c *pagesCollector) handleFiles(fis ...hugofs.FileMetaInfo) error {
+ for _, fi := range fis {
+ if fi.IsDir() {
+ continue
+ }
+
+ if err := c.proc.Process(fi); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+type pagesCollectorProcessorProvider interface {
+ Process(item interface{}) error
+ Start(ctx context.Context) context.Context
+ Wait() error
+}
+
+type pagesProcessor struct {
+ h *HugoSites
+ sp *source.SourceSpec
+
+ itemChan chan interface{}
+ itemGroup *errgroup.Group
+
+ // The output Pages
+ pagesChan chan *pageState
+ pagesGroup *errgroup.Group
+
+ numWorkers int
+
+ partialBuild bool
+}
+
+func (proc *pagesProcessor) Process(item interface{}) error {
+ proc.itemChan <- item
+ return nil
+}
+
+func (proc *pagesProcessor) Start(ctx context.Context) context.Context {
+ proc.pagesChan = make(chan *pageState, proc.numWorkers)
+ proc.pagesGroup, ctx = errgroup.WithContext(ctx)
+ proc.itemChan = make(chan interface{}, proc.numWorkers)
+ proc.itemGroup, ctx = errgroup.WithContext(ctx)
+
+ proc.pagesGroup.Go(func() error {
+ for p := range proc.pagesChan {
+ s := p.s
+ p.forceRender = proc.partialBuild
+
+ if p.forceRender {
+ s.replacePage(p)
+ } else {
+ s.addPage(p)
+ }
+ }
+ return nil
+ })
+
+ for i := 0; i < proc.numWorkers; i++ {
+ proc.itemGroup.Go(func() error {
+ for item := range proc.itemChan {
+ select {
+ case <-proc.h.Done():
+ return nil
+ default:
+ if err := proc.process(item); err != nil {
+ proc.h.SendError(err)
+ }
+ }
+ }
+
+ return nil
+ })
+ }
+
+ return ctx
+}
+
+func (proc *pagesProcessor) Wait() error {
+ close(proc.itemChan)
+
+ err := proc.itemGroup.Wait()
+
+ close(proc.pagesChan)
+
+ if err != nil {
+ return err
+ }
+
+ return proc.pagesGroup.Wait()
+}
+
+func (proc *pagesProcessor) newPageFromBundle(b *fileinfoBundle) (*pageState, error) {
+ p, err := proc.newPageFromFi(b.header, nil)
+ if err != nil {
+ return nil, err
+ }
+
+ if len(b.resources) > 0 {
+
+ resources := make(resource.Resources, len(b.resources))
+
+ for i, rfi := range b.resources {
+ meta := rfi.Meta()
+ classifier := meta.Classifier()
+ var r resource.Resource
+ switch classifier {
+ case files.ContentClassContent:
+ rp, err := proc.newPageFromFi(rfi, p)
+ if err != nil {
+ return nil, err
+ }
+ rp.m.resourcePath = filepath.ToSlash(strings.TrimPrefix(rp.Path(), p.File().Dir()))
+
+ r = rp
+
+ case files.ContentClassFile:
+ r, err = proc.newResource(rfi, p)
+ if err != nil {
+ return nil, err
+ }
+ default:
+ panic(fmt.Sprintf("invalid classifier: %q", classifier))
+ }
+
+ resources[i] = r
+
+ }
+
+ p.addResources(resources...)
+ }
+
+ return p, nil
+}
+
+func (proc *pagesProcessor) newPageFromFi(fim hugofs.FileMetaInfo, owner *pageState) (*pageState, error) {
+ fi, err := newFileInfo(proc.sp, fim)
+ if err != nil {
+ return nil, err
+ }
+
+ var s *Site
+ meta := fim.Meta()
+
+ if owner != nil {
+ s = owner.s
+ } else {
+ lang := meta.Lang()
+ s = proc.getSite(lang)
+ }
+
+ r := func() (hugio.ReadSeekCloser, error) {
+ return meta.Open()
+ }
+
+ p, err := newPageWithContent(fi, s, owner != nil, r)
+ if err != nil {
+ return nil, err
+ }
+ p.parent = owner
+ return p, nil
+}
+
+func (proc *pagesProcessor) newResource(fim hugofs.FileMetaInfo, owner *pageState) (resource.Resource, error) {
+
+ // TODO(bep) consolidate with multihost logic + clean up
+ outputFormats := owner.m.outputFormats()
+ seen := make(map[string]bool)
+ var targetBasePaths []string
+ // Make sure bundled resources are published to all of the ouptput formats'
+ // sub paths.
+ for _, f := range outputFormats {
+ p := f.Path
+ if seen[p] {
+ continue
+ }
+ seen[p] = true
+ targetBasePaths = append(targetBasePaths, p)
+
+ }
+
+ meta := fim.Meta()
+ r := func() (hugio.ReadSeekCloser, error) {
+ return meta.Open()
+ }
+
+ target := strings.TrimPrefix(meta.Path(), owner.File().Dir())
+
+ return owner.s.ResourceSpec.New(
+ resources.ResourceSourceDescriptor{
+ TargetPaths: owner.getTargetPaths,
+ OpenReadSeekCloser: r,
+ FileInfo: fim,
+ RelTargetFilename: target,
+ TargetBasePaths: targetBasePaths,
+ })
+}
+
+func (proc *pagesProcessor) getSite(lang string) *Site {
+ if lang == "" {
+ return proc.h.Sites[0]
+ }
+
+ for _, s := range proc.h.Sites {
+ if lang == s.Lang() {
+ return s
+ }
+ }
+ return proc.h.Sites[0]
+}
+
+func (proc *pagesProcessor) copyFile(fim hugofs.FileMetaInfo) error {
+ meta := fim.Meta()
+ s := proc.getSite(meta.Lang())
+ f, err := meta.Open()
+ if err != nil {
+ return errors.Wrap(err, "copyFile: failed to open")
+ }
+
+ target := filepath.Join(s.PathSpec.GetTargetLanguageBasePath(), meta.Path())
+
+ defer f.Close()
+
+ return s.publish(&s.PathSpec.ProcessingStats.Files, target, f)
+
+}
+
+func (proc *pagesProcessor) process(item interface{}) error {
+ send := func(p *pageState, err error) {
+ if err != nil {
+ proc.sendError(err)
+ } else {
+ proc.pagesChan <- p
+ }
+ }
+
+ switch v := item.(type) {
+ // Page bundles mapped to their language.
+ case pageBundles:
+ for _, bundle := range v {
+ if proc.shouldSkip(bundle.header) {
+ continue
+ }
+ send(proc.newPageFromBundle(bundle))
+ }
+ case hugofs.FileMetaInfo:
+ if proc.shouldSkip(v) {
+ return nil
+ }
+ meta := v.Meta()
+
+ classifier := meta.Classifier()
+ switch classifier {
+ case files.ContentClassContent:
+ send(proc.newPageFromFi(v, nil))
+ case files.ContentClassFile:
+ proc.sendError(proc.copyFile(v))
+ default:
+ panic(fmt.Sprintf("invalid classifier: %q", classifier))
+ }
+ default:
+ panic(fmt.Sprintf("unrecognized item type in Process: %T", item))
+ }
+
+ return nil
+}
+
+func (proc *pagesProcessor) sendError(err error) {
+ if err == nil {
+ return
+ }
+ proc.h.SendError(err)
+}
+
+func (proc *pagesProcessor) shouldSkip(fim hugofs.FileMetaInfo) bool {
+ return proc.sp.DisabledLanguages[fim.Meta().Lang()]
+}
+
+func stringSliceContains(k string, values ...string) bool {
+ for _, v := range values {
+ if k == v {
+ return true
+ }
+ }
+ return false
+}