Make Page an interface

The main motivation of this commit is to add a `page.Page` interface to replace the very file-oriented `hugolib.Page` struct. This is all a preparation step for issue #5074, "pages from other data sources". But this also fixes a set of annoying limitations, especially related to custom output formats, and shortcodes. Most notable changes: * The inner content of shortcodes using the `{{%` as the outer-most delimiter will now be sent to the content renderer, e.g. Blackfriday. This means that any markdown will partake in the global ToC and footnote context etc. * The Custom Output formats are now "fully virtualized". This removes many of the current limitations. * The taxonomy list type now has a reference to the `Page` object. This improves the taxonomy template `.Title` situation and make common template constructs much simpler. See #5074 Fixes #5763 Fixes #5758 Fixes #5090 Fixes #5204 Fixes #4695 Fixes #5607 Fixes #5707 Fixes #5719 Fixes #3113 Fixes #5706 Fixes #5767 Fixes #5723 Fixes #5769 Fixes #5770 Fixes #5771 Fixes #5759 Fixes #5776 Fixes #5777 Fixes #5778
author: Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com> 2019-01-02 12:33:26 +0100
committer: Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com> 2019-03-23 18:51:22 +0100
commit: 597e418cb02883418f2cebb41400e8e61413f651 (patch)
tree: 177ad9c540b2583b6dab138c9f0490d28989c7f7 /hugolib/page__per_output.go
parent: 44f5c1c14cb1f42cc5f01739c289e9cfc83602af (diff)
1 files changed, 445 insertions, 0 deletions
diff --git a/hugolib/page__per_output.go b/hugolib/page__per_output.go
new file mode 100644
index 000000000..05b35cc87
--- /dev/null
+++ b/hugolib/page__per_output.go
@@ -0,0 +1,445 @@
+// Copyright 2019 The Hugo Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package hugolib
+
+import (
+	"bytes"
+	"context"
+	"fmt"
+	"html/template"
+	"strings"
+	"sync"
+	"unicode/utf8"
+
+	"github.com/gohugoio/hugo/lazy"
+
+	bp "github.com/gohugoio/hugo/bufferpool"
+	"github.com/gohugoio/hugo/tpl"
+
+	"github.com/gohugoio/hugo/output"
+
+	"github.com/gohugoio/hugo/helpers"
+	"github.com/gohugoio/hugo/resources/page"
+	"github.com/gohugoio/hugo/resources/resource"
+)
+
+var (
+	nopTargetPath    = targetPathsHolder{}
+	nopPagePerOutput = struct {
+		resource.ResourceLinksProvider
+		page.ContentProvider
+		page.PageRenderProvider
+		page.PaginatorProvider
+		page.TableOfContentsProvider
+		page.AlternativeOutputFormatsProvider
+
+		targetPather
+	}{
+		page.NopPage,
+		page.NopPage,
+		page.NopPage,
+		page.NopPage,
+		page.NopPage,
+		page.NopPage,
+		nopTargetPath,
+	}
+)
+
+func newPageContentOutput(p *pageState) func(f output.Format) (*pageContentOutput, error) {
+
+	parent := p.init
+
+	return func(f output.Format) (*pageContentOutput, error) {
+		cp := &pageContentOutput{
+			p: p,
+			f: f,
+		}
+
+		initContent := func() error {
+			var err error
+			var hasVariants bool
+
+			cp.contentPlaceholders, hasVariants, err = p.shortcodeState.renderShortcodesForPage(p, f)
+			if err != nil {
+				return err
+			}
+
+			if p.render && !hasVariants {
+				// We can reuse this for the other output formats
+				cp.enableReuse()
+			}
+
+			cp.workContent = p.contentToRender(cp.contentPlaceholders)
+
+			isHTML := cp.p.m.markup == "html"
+
+			if p.renderable {
+				if !isHTML {
+					cp.workContent = cp.renderContent(p, cp.workContent)
+					tmpContent, tmpTableOfContents := helpers.ExtractTOC(cp.workContent)
+					cp.tableOfContents = helpers.BytesToHTML(tmpTableOfContents)
+					cp.workContent = tmpContent
+				}
+
+				if cp.placeholdersEnabled {
+					// ToC was accessed via .Page.TableOfContents in the shortcode,
+					// at a time when the ToC wasn't ready.
+					cp.contentPlaceholders[tocShortcodePlaceholder] = string(cp.tableOfContents)
+				}
+
+				if p.cmap.hasNonMarkdownShortcode || cp.placeholdersEnabled {
+					// There are one or more replacement tokens to be replaced.
+					cp.workContent, err = replaceShortcodeTokens(cp.workContent, cp.contentPlaceholders)
+					if err != nil {
+						return err
+					}
+				}
+
+				if cp.p.source.hasSummaryDivider {
+					if isHTML {
+						src := p.source.parsed.Input()
+
+						// Use the summary sections as they are provided by the user.
+						if p.source.posSummaryEnd != -1 {
+							cp.summary = helpers.BytesToHTML(src[p.source.posMainContent:p.source.posSummaryEnd])
+						}
+
+						if cp.p.source.posBodyStart != -1 {
+							cp.workContent = src[cp.p.source.posBodyStart:]
+						}
+
+					} else {
+						summary, content, err := splitUserDefinedSummaryAndContent(cp.p.m.markup, cp.workContent)
+						if err != nil {
+							cp.p.s.Log.ERROR.Printf("Failed to set user defined summary for page %q: %s", cp.p.pathOrTitle(), err)
+						} else {
+							cp.workContent = content
+							cp.summary = helpers.BytesToHTML(summary)
+						}
+					}
+				}
+			}
+
+			cp.content = helpers.BytesToHTML(cp.workContent)
+
+			if !p.renderable {
+				err := cp.addSelfTemplate()
+				return err
+			}
+
+			return nil
+
+		}
+
+		// Recursive loops can only happen in content files with template code (shortcodes etc.)
+		// Avoid creating new goroutines if we don't have to.
+		needTimeout := !p.renderable || p.shortcodeState.hasShortcodes()
+
+		if needTimeout {
+			cp.initMain = parent.BranchdWithTimeout(p.s.siteCfg.timeout, func(ctx context.Context) (interface{}, error) {
+				return nil, initContent()
+			})
+		} else {
+			cp.initMain = parent.Branch(func() (interface{}, error) {
+				return nil, initContent()
+			})
+		}
+
+		cp.initPlain = cp.initMain.Branch(func() (interface{}, error) {
+			cp.plain = helpers.StripHTML(string(cp.content))
+			cp.plainWords = strings.Fields(cp.plain)
+			cp.setWordCounts(p.m.isCJKLanguage)
+
+			if err := cp.setAutoSummary(); err != nil {
+				return err, nil
+			}
+
+			return nil, nil
+		})
+
+		return cp, nil
+
+	}
+
+}
+
+// pageContentOutput represents the Page content for a given output format.
+type pageContentOutput struct {
+	f output.Format
+
+	// If we can safely reuse this for other output formats.
+	reuse     bool
+	reuseInit sync.Once
+
+	p *pageState
+
+	// Lazy load dependencies
+	initMain  *lazy.Init
+	initPlain *lazy.Init
+
+	placeholdersEnabled     bool
+	placeholdersEnabledInit sync.Once
+
+	// Content state
+
+	workContent []byte
+
+	// Temporary storage of placeholders mapped to their content.
+	// These are shortcodes etc. Some of these will need to be replaced
+	// after any markup is rendered, so they share a common prefix.
+	contentPlaceholders map[string]string
+
+	// Content sections
+	content         template.HTML
+	summary         template.HTML
+	tableOfContents template.HTML
+
+	truncated bool
+
+	plainWords     []string
+	plain          string
+	fuzzyWordCount int
+	wordCount      int
+	readingTime    int
+}
+
+func (p *pageContentOutput) Content() (interface{}, error) {
+	p.p.s.initInit(p.initMain, p.p)
+	return p.content, nil
+}
+
+func (p *pageContentOutput) FuzzyWordCount() int {
+	p.p.s.initInit(p.initPlain, p.p)
+	return p.fuzzyWordCount
+}
+
+func (p *pageContentOutput) Len() int {
+	p.p.s.initInit(p.initMain, p.p)
+	return len(p.content)
+}
+
+func (p *pageContentOutput) Plain() string {
+	p.p.s.initInit(p.initPlain, p.p)
+	return p.plain
+}
+
+func (p *pageContentOutput) PlainWords() []string {
+	p.p.s.initInit(p.initPlain, p.p)
+	return p.plainWords
+}
+
+func (p *pageContentOutput) ReadingTime() int {
+	p.p.s.initInit(p.initPlain, p.p)
+	return p.readingTime
+}
+
+func (p *pageContentOutput) Summary() template.HTML {
+	p.p.s.initInit(p.initMain, p.p)
+	if !p.p.source.hasSummaryDivider {
+		p.p.s.initInit(p.initPlain, p.p)
+	}
+	return p.summary
+}
+
+func (p *pageContentOutput) TableOfContents() template.HTML {
+	p.p.s.initInit(p.initMain, p.p)
+	return p.tableOfContents
+}
+
+func (p *pageContentOutput) Truncated() bool {
+	if p.p.truncated {
+		return true
+	}
+	p.p.s.initInit(p.initPlain, p.p)
+	return p.truncated
+}
+
+func (p *pageContentOutput) WordCount() int {
+	p.p.s.initInit(p.initPlain, p.p)
+	return p.wordCount
+}
+
+func (p *pageContentOutput) setAutoSummary() error {
+	if p.p.source.hasSummaryDivider {
+		return nil
+	}
+
+	var summary string
+	var truncated bool
+
+	if p.p.m.isCJKLanguage {
+		summary, truncated = p.p.s.ContentSpec.TruncateWordsByRune(p.plainWords)
+	} else {
+		summary, truncated = p.p.s.ContentSpec.TruncateWordsToWholeSentence(p.plain)
+	}
+	p.summary = template.HTML(summary)
+
+	p.truncated = truncated
+
+	return nil
+
+}
+
+func (cp *pageContentOutput) renderContent(p page.Page, content []byte) []byte {
+	return cp.p.s.ContentSpec.RenderBytes(&helpers.RenderingContext{
+		Content: content, RenderTOC: true, PageFmt: cp.p.m.markup,
+		Cfg:        p.Language(),
+		DocumentID: p.File().UniqueID(), DocumentName: p.File().Path(),
+		Config: cp.p.getRenderingConfig()})
+}
+
+func (p *pageContentOutput) setWordCounts(isCJKLanguage bool) {
+	if isCJKLanguage {
+		p.wordCount = 0
+		for _, word := range p.plainWords {
+			runeCount := utf8.RuneCountInString(word)
+			if len(word) == runeCount {
+				p.wordCount++
+			} else {
+				p.wordCount += runeCount
+			}
+		}
+	} else {
+		p.wordCount = helpers.TotalWords(p.plain)
+	}
+
+	// TODO(bep) is set in a test. Fix that.
+	if p.fuzzyWordCount == 0 {
+		p.fuzzyWordCount = (p.wordCount + 100) / 100 * 100
+	}
+
+	if isCJKLanguage {
+		p.readingTime = (p.wordCount + 500) / 501
+	} else {
+		p.readingTime = (p.wordCount + 212) / 213
+	}
+}
+
+func (p *pageContentOutput) addSelfTemplate() error {
+	self := p.p.selfLayoutForOutput(p.f)
+	err := p.p.s.TemplateHandler().AddLateTemplate(self, string(p.content))
+	if err != nil {
+		return err
+	}
+	return nil
+}
+
+// A callback to signal that we have inserted a placeholder into the rendered
+// content. This avoids doing extra replacement work.
+func (p *pageContentOutput) enablePlaceholders() {
+	p.placeholdersEnabledInit.Do(func() {
+		p.placeholdersEnabled = true
+	})
+}
+
+func (p *pageContentOutput) enableReuse() {
+	p.reuseInit.Do(func() {
+		p.reuse = true
+	})
+}
+
+// these will be shifted out when rendering a given output format.
+type pagePerOutputProviders interface {
+	targetPather
+	page.ContentProvider
+	page.PaginatorProvider
+	page.TableOfContentsProvider
+	resource.ResourceLinksProvider
+}
+
+type targetPather interface {
+	targetPaths() page.TargetPaths
+}
+
+type targetPathsHolder struct {
+	paths page.TargetPaths
+	page.OutputFormat
+}
+
+func (t targetPathsHolder) targetPaths() page.TargetPaths {
+	return t.paths
+}
+
+func executeToString(templ tpl.Template, data interface{}) (string, error) {
+	b := bp.GetBuffer()
+	defer bp.PutBuffer(b)
+	if err := templ.Execute(b, data); err != nil {
+		return "", err
+	}
+	return b.String(), nil
+
+}
+
+func splitUserDefinedSummaryAndContent(markup string, c []byte) (summary []byte, content []byte, err error) {
+	defer func() {
+		if r := recover(); r != nil {
+			err = fmt.Errorf("summary split failed: %s", r)
+		}
+	}()
+
+	startDivider := bytes.Index(c, internalSummaryDividerBaseBytes)
+
+	if startDivider == -1 {
+		return
+	}
+
+	startTag := "p"
+	switch markup {
+	case "asciidoc":
+		startTag = "div"
+
+	}
+
+	// Walk back and forward to the surrounding tags.
+	start := bytes.LastIndex(c[:startDivider], []byte("<"+startTag))
+	end := bytes.Index(c[startDivider:], []byte("</"+startTag))
+
+	if start == -1 {
+		start = startDivider
+	} else {
+		start = startDivider - (startDivider - start)
+	}
+
+	if end == -1 {
+		end = startDivider + len(internalSummaryDividerBase)
+	} else {
+		end = startDivider + end + len(startTag) + 3
+	}
+
+	var addDiv bool
+
+	switch markup {
+	case "rst":
+		addDiv = true
+	}
+
+	withoutDivider := append(c[:start], bytes.Trim(c[end:], "\n")...)
+
+	if len(withoutDivider) > 0 {
+		summary = bytes.TrimSpace(withoutDivider[:start])
+	}
+
+	if addDiv {
+		// For the rst
+		summary = append(append([]byte(nil), summary...), []byte("</div>")...)
+	}
+
+	if err != nil {
+		return
+	}
+
+	content = bytes.TrimSpace(withoutDivider)
+
+	return
+}
author	Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>	2019-01-02 12:33:26 +0100
committer	Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>	2019-03-23 18:51:22 +0100
commit	597e418cb02883418f2cebb41400e8e61413f651 (patch)
tree	177ad9c540b2583b6dab138c9f0490d28989c7f7 /hugolib/page__per_output.go
parent	44f5c1c14cb1f42cc5f01739c289e9cfc83602af (diff)