summaryrefslogtreecommitdiffstats
path: root/hugolib/page__content.go
diff options
context:
space:
mode:
Diffstat (limited to 'hugolib/page__content.go')
-rw-r--r--hugolib/page__content.go700
1 files changed, 654 insertions, 46 deletions
diff --git a/hugolib/page__content.go b/hugolib/page__content.go
index 89c38bd84..64ce83f0e 100644
--- a/hugolib/page__content.go
+++ b/hugolib/page__content.go
@@ -14,36 +14,147 @@
package hugolib
import (
+ "bytes"
"context"
+ "errors"
"fmt"
+ "html/template"
+ "io"
+ "strings"
+ "unicode/utf8"
- "github.com/gohugoio/hugo/output"
+ "github.com/bep/logg"
+ "github.com/gohugoio/hugo/common/hcontext"
+ "github.com/gohugoio/hugo/common/herrors"
+ "github.com/gohugoio/hugo/common/hugio"
+ "github.com/gohugoio/hugo/helpers"
+ "github.com/gohugoio/hugo/identity"
+ "github.com/gohugoio/hugo/markup/converter"
+ "github.com/gohugoio/hugo/markup/tableofcontents"
+ "github.com/gohugoio/hugo/parser/metadecoders"
"github.com/gohugoio/hugo/parser/pageparser"
+ "github.com/gohugoio/hugo/resources"
+ "github.com/gohugoio/hugo/resources/resource"
+ "github.com/gohugoio/hugo/tpl"
+)
+
+const (
+ internalSummaryDividerBase = "HUGOMORE42"
)
var (
- internalSummaryDividerBase = "HUGOMORE42"
internalSummaryDividerBaseBytes = []byte(internalSummaryDividerBase)
internalSummaryDividerPre = []byte("\n\n" + internalSummaryDividerBase + "\n\n")
)
-// The content related items on a Page.
-type pageContent struct {
- selfLayout string
- truncated bool
+type pageContentReplacement struct {
+ val []byte
+
+ source pageparser.Item
+}
+
+func newCachedContent(m *pageMeta, pid uint64) (*cachedContent, error) {
+ var openSource hugio.OpenReadSeekCloser
+ var filename string
+ if m.f != nil {
+ meta := m.f.FileInfo().Meta()
+ openSource = func() (hugio.ReadSeekCloser, error) {
+ r, err := meta.Open()
+ if err != nil {
+ return nil, fmt.Errorf("failed to open file %q: %w", meta.Filename, err)
+ }
+ return r, nil
+ }
+ filename = m.f.Filename()
+ }
+
+ c := &cachedContent{
+ pm: m.s.pageMap,
+ StaleInfo: m,
+ shortcodeState: newShortcodeHandler(filename, m.s),
+ parseInfo: &contentParseInfo{
+ pid: pid,
+ },
+ cacheBaseKey: m.pathInfo.PathNoLang(),
+ openSource: openSource,
+ enableEmoji: m.s.conf.EnableEmoji,
+ }
+
+ source, err := c.contentSource()
+ if err != nil {
+ return nil, err
+ }
+
+ if err := c.parseContentFile(source); err != nil {
+ return nil, err
+ }
+
+ return c, nil
+}
+
+type cachedContent struct {
+ pm *pageMap
+
+ cacheBaseKey string
+
+ // The source bytes.
+ openSource hugio.OpenReadSeekCloser
+
+ resource.StaleInfo
+
+ shortcodeState *shortcodeHandler
+
+ // Parsed content.
+ parseInfo *contentParseInfo
+
+ enableEmoji bool
+}
+
+type contentParseInfo struct {
+ pid uint64
+ frontMatter map[string]any
- cmap *pageContentMap
+ // Whether the parsed content contains a summary separator.
+ hasSummaryDivider bool
+
+ // Whether there are more content after the summary divider.
+ summaryTruncated bool
+
+ // Returns the position in bytes after any front matter.
+ posMainContent int
+
+ // Indicates whether we must do placeholder replacements.
+ hasNonMarkdownShortcode bool
+
+ // Items from the page parser.
+ // These maps directly to the source
+ itemsStep1 pageparser.Items
+
+ // *shortcode, pageContentReplacement or pageparser.Item
+ itemsStep2 []any
+}
+
+func (p *contentParseInfo) AddBytes(item pageparser.Item) {
+ p.itemsStep2 = append(p.itemsStep2, item)
+}
- source rawPageContent
+func (p *contentParseInfo) AddReplacement(val []byte, source pageparser.Item) {
+ p.itemsStep2 = append(p.itemsStep2, pageContentReplacement{val: val, source: source})
}
-// returns the content to be processed by Goldmark or similar.
-func (p pageContent) contentToRender(ctx context.Context, parsed pageparser.Result, pm *pageContentMap, renderedShortcodes map[string]shortcodeRenderer) ([]byte, bool, error) {
- source := parsed.Input()
+func (p *contentParseInfo) AddShortcode(s *shortcode) {
+ p.itemsStep2 = append(p.itemsStep2, s)
+ if s.insertPlaceholder() {
+ p.hasNonMarkdownShortcode = true
+ }
+}
+
+// contentToRenderForItems returns the content to be processed by Goldmark or similar.
+func (pi *contentParseInfo) contentToRender(ctx context.Context, source []byte, renderedShortcodes map[string]shortcodeRenderer) ([]byte, bool, error) {
var hasVariants bool
c := make([]byte, 0, len(source)+(len(source)/10))
- for _, it := range pm.items {
+ for _, it := range pi.itemsStep2 {
switch v := it.(type) {
case pageparser.Item:
c = append(c, source[v.Pos():v.Pos()+len(v.Val(source))]...)
@@ -78,59 +189,556 @@ func (p pageContent) contentToRender(ctx context.Context, parsed pageparser.Resu
return c, hasVariants, nil
}
-func (p pageContent) selfLayoutForOutput(f output.Format) string {
- if p.selfLayout == "" {
- return ""
+func (c *cachedContent) IsZero() bool {
+ return len(c.parseInfo.itemsStep2) == 0
+}
+
+func (c *cachedContent) parseContentFile(source []byte) error {
+ if source == nil || c.openSource == nil {
+ return nil
}
- return p.selfLayout + f.Name
+
+ items, err := pageparser.ParseBytes(
+ source,
+ pageparser.Config{},
+ )
+ if err != nil {
+ return err
+ }
+
+ c.parseInfo.itemsStep1 = items
+
+ return c.parseInfo.mapItems(source, c.shortcodeState)
}
-type rawPageContent struct {
- hasSummaryDivider bool
+func (c *contentParseInfo) parseFrontMatter(it pageparser.Item, iter *pageparser.Iterator, source []byte) error {
+ if c.frontMatter != nil {
+ return nil
+ }
- // The AST of the parsed page. Contains information about:
- // shortcodes, front matter, summary indicators.
- parsed pageparser.Result
+ f := pageparser.FormatFromFrontMatterType(it.Type)
+ var err error
+ c.frontMatter, err = metadecoders.Default.UnmarshalToMap(it.Val(source), f)
+ if err != nil {
+ if fe, ok := err.(herrors.FileError); ok {
+ pos := fe.Position()
- // Returns the position in bytes after any front matter.
- posMainContent int
+ // Offset the starting position of front matter.
+ offset := iter.LineNumber(source) - 1
+ if f == metadecoders.YAML {
+ offset -= 1
+ }
+ pos.LineNumber += offset
- // These are set if we're able to determine this from the source.
- posSummaryEnd int
- posBodyStart int
+ fe.UpdatePosition(pos)
+ fe.SetFilename("") // It will be set later.
+
+ return fe
+ } else {
+ return err
+ }
+ }
+
+ return nil
}
-type pageContentReplacement struct {
- val []byte
+func (rn *contentParseInfo) mapItems(
+ source []byte,
+ s *shortcodeHandler,
+) error {
+ if len(rn.itemsStep1) == 0 {
+ return nil
+ }
- source pageparser.Item
+ fail := func(err error, i pageparser.Item) error {
+ if fe, ok := err.(herrors.FileError); ok {
+ return fe
+ }
+
+ pos := posFromInput("", source, i.Pos())
+
+ return herrors.NewFileErrorFromPos(err, pos)
+ }
+
+ iter := pageparser.NewIterator(rn.itemsStep1)
+
+ // the parser is guaranteed to return items in proper order or fail, so …
+ // … it's safe to keep some "global" state
+ var ordinal int
+
+Loop:
+ for {
+ it := iter.Next()
+
+ switch {
+ case it.Type == pageparser.TypeIgnore:
+ case it.IsFrontMatter():
+ if err := rn.parseFrontMatter(it, iter, source); err != nil {
+ return err
+ }
+ next := iter.Peek()
+ if !next.IsDone() {
+ rn.posMainContent = next.Pos()
+ }
+ case it.Type == pageparser.TypeLeadSummaryDivider:
+ posBody := -1
+ f := func(item pageparser.Item) bool {
+ if posBody == -1 && !item.IsDone() {
+ posBody = item.Pos()
+ }
+
+ if item.IsNonWhitespace(source) {
+ rn.summaryTruncated = true
+
+ // Done
+ return false
+ }
+ return true
+ }
+ iter.PeekWalk(f)
+
+ rn.hasSummaryDivider = true
+
+ // The content may be rendered by Goldmark or similar,
+ // and we need to track the summary.
+ rn.AddReplacement(internalSummaryDividerPre, it)
+
+ // Handle shortcode
+ case it.IsLeftShortcodeDelim():
+ // let extractShortcode handle left delim (will do so recursively)
+ iter.Backup()
+
+ currShortcode, err := s.extractShortcode(ordinal, 0, source, iter)
+ if err != nil {
+ return fail(err, it)
+ }
+
+ currShortcode.pos = it.Pos()
+ currShortcode.length = iter.Current().Pos() - it.Pos()
+ if currShortcode.placeholder == "" {
+ currShortcode.placeholder = createShortcodePlaceholder("s", rn.pid, currShortcode.ordinal)
+ }
+
+ if currShortcode.name != "" {
+ s.addName(currShortcode.name)
+ }
+
+ if currShortcode.params == nil {
+ var s []string
+ currShortcode.params = s
+ }
+
+ currShortcode.placeholder = createShortcodePlaceholder("s", rn.pid, ordinal)
+ ordinal++
+ s.shortcodes = append(s.shortcodes, currShortcode)
+
+ rn.AddShortcode(currShortcode)
+
+ case it.IsEOF():
+ break Loop
+ case it.IsError():
+ return fail(it.Err, it)
+ default:
+ rn.AddBytes(it)
+ }
+ }
+
+ return nil
}
-type pageContentMap struct {
+func (c *cachedContent) mustSource() []byte {
+ source, err := c.contentSource()
+ if err != nil {
+ panic(err)
+ }
+ return source
+}
- // If not, we can skip any pre-rendering of shortcodes.
- hasMarkdownShortcode bool
+func (c *cachedContent) contentSource() ([]byte, error) {
+ key := c.cacheBaseKey
+ v, err := c.pm.cacheContentSource.GetOrCreate(key, func(string) (*resources.StaleValue[[]byte], error) {
+ b, err := c.readSourceAll()
+ if err != nil {
+ return nil, err
+ }
- // Indicates whether we must do placeholder replacements.
- hasNonMarkdownShortcode bool
+ return &resources.StaleValue[[]byte]{
+ Value: b,
+ IsStaleFunc: func() bool {
+ return c.IsStale()
+ },
+ }, nil
+ })
+ if err != nil {
+ return nil, err
+ }
- // *shortcode, pageContentReplacement or pageparser.Item
- items []any
+ return v.Value, nil
}
-func (p *pageContentMap) AddBytes(item pageparser.Item) {
- p.items = append(p.items, item)
+func (c *cachedContent) readSourceAll() ([]byte, error) {
+ if c.openSource == nil {
+ return []byte{}, nil
+ }
+ r, err := c.openSource()
+ if err != nil {
+ return nil, err
+ }
+ defer r.Close()
+
+ return io.ReadAll(r)
}
-func (p *pageContentMap) AddReplacement(val []byte, source pageparser.Item) {
- p.items = append(p.items, pageContentReplacement{val: val, source: source})
+type contentTableOfContents struct {
+ // For Goldmark we split Parse and Render.
+ astDoc any
+
+ tableOfContents *tableofcontents.Fragments
+ tableOfContentsHTML template.HTML
+
+ // Temporary storage of placeholders mapped to their content.
+ // These are shortcodes etc. Some of these will need to be replaced
+ // after any markup is rendered, so they share a common prefix.
+ contentPlaceholders map[string]shortcodeRenderer
+
+ contentToRender []byte
}
-func (p *pageContentMap) AddShortcode(s *shortcode) {
- p.items = append(p.items, s)
- if s.insertPlaceholder() {
- p.hasNonMarkdownShortcode = true
- } else {
- p.hasMarkdownShortcode = true
+type contentSummary struct {
+ content template.HTML
+ summary template.HTML
+ summaryTruncated bool
+}
+
+type contentPlainPlainWords struct {
+ plain string
+ plainWords []string
+
+ summary template.HTML
+ summaryTruncated bool
+
+ wordCount int
+ fuzzyWordCount int
+ readingTime int
+}
+
+func (c *cachedContent) contentRendered(ctx context.Context, cp *pageContentOutput) (contentSummary, error) {
+ ctx = tpl.Context.DependencyScope.Set(ctx, pageDependencyScopeGlobal)
+ key := c.cacheBaseKey + "/" + cp.po.f.Name
+ versionv := cp.contentRenderedVersion
+
+ v, err := c.pm.cacheContentRendered.GetOrCreate(key, func(string) (*resources.StaleValue[contentSummary], error) {
+ cp.po.p.s.Log.Trace(logg.StringFunc(func() string {
+ return fmt.Sprintln("contentRendered", key)
+ }))
+
+ cp.po.p.s.h.contentRenderCounter.Add(1)
+ cp.contentRendered = true
+ po := cp.po
+
+ ct, err := c.contentToC(ctx, cp)
+ if err != nil {
+ return nil, err
+ }
+
+ rs := &resources.StaleValue[contentSummary]{
+ IsStaleFunc: func() bool {
+ return c.IsStale() || cp.contentRenderedVersion != versionv
+ },
+ }
+
+ if len(c.parseInfo.itemsStep2) == 0 {
+ // Nothing to do.
+ return rs, nil
+ }
+
+ var b []byte
+
+ if ct.astDoc != nil {
+ // The content is parsed, but not rendered.
+ r, ok, err := po.contentRenderer.RenderContent(ctx, ct.contentToRender, ct.astDoc)
+ if err != nil {
+ return nil, err
+ }
+ if !ok {
+ return nil, errors.New("invalid state: astDoc is set but RenderContent returned false")
+ }
+
+ b = r.Bytes()
+
+ } else {
+ // Copy the content to be rendered.
+ b = make([]byte, len(ct.contentToRender))
+ copy(b, ct.contentToRender)
+ }
+
+ // There are one or more replacement tokens to be replaced.
+ var hasShortcodeVariants bool
+ tokenHandler := func(ctx context.Context, token string) ([]byte, error) {
+ if token == tocShortcodePlaceholder {
+ return []byte(ct.tableOfContentsHTML), nil
+ }
+ renderer, found := ct.contentPlaceholders[token]
+ if found {
+ repl, more, err := renderer.renderShortcode(ctx)
+ if err != nil {
+ return nil, err
+ }
+ hasShortcodeVariants = hasShortcodeVariants || more
+ return repl, nil
+ }
+ // This should never happen.
+ panic(fmt.Errorf("unknown shortcode token %q (number of tokens: %d)", token, len(ct.contentPlaceholders)))
+ }
+
+ b, err = expandShortcodeTokens(ctx, b, tokenHandler)
+ if err != nil {
+ return nil, err
+ }
+ if hasShortcodeVariants {
+ cp.po.p.pageOutputTemplateVariationsState.Add(1)
+ }
+
+ var result contentSummary // hasVariants bool
+
+ if c.parseInfo.hasSummaryDivider {
+ isHTML := cp.po.p.m.markup == "html"
+ if isHTML {
+ // Use the summary sections as provided by the user.
+ i := bytes.Index(b, internalSummaryDividerPre)
+ result.summary = helpers.BytesToHTML(b[:i])
+ b = b[i+len(internalSummaryDividerPre):]
+
+ } else {
+ summary, content, err := splitUserDefinedSummaryAndContent(cp.po.p.m.markup, b)
+ if err != nil {
+ cp.po.p.s.Log.Errorf("Failed to set user defined summary for page %q: %s", cp.po.p.pathOrTitle(), err)
+ } else {
+ b = content
+ result.summary = helpers.BytesToHTML(summary)
+ }
+ }
+ result.summaryTruncated = c.parseInfo.summaryTruncated
+ }
+ result.content = helpers.BytesToHTML(b)
+ rs.Value = result
+
+ return rs, nil
+ })
+ if err != nil {
+ return contentSummary{}, cp.po.p.wrapError(err)
+ }
+
+ return v.Value, nil
+}
+
+func (c *cachedContent) mustContentToC(ctx context.Context, cp *pageContentOutput) contentTableOfContents {
+ ct, err := c.contentToC(ctx, cp)
+ if err != nil {
+ panic(err)
+ }
+ return ct
+}
+
+var setGetContentCallbackInContext = hcontext.NewContextDispatcher[func(*pageContentOutput, contentTableOfContents)]("contentCallback")
+
+func (c *cachedContent) contentToC(ctx context.Context, cp *pageContentOutput) (contentTableOfContents, error) {
+ key := c.cacheBaseKey + "/" + cp.po.f.Name
+ versionv := cp.contentRenderedVersion
+
+ v, err := c.pm.contentTableOfContents.GetOrCreate(key, func(string) (*resources.StaleValue[contentTableOfContents], error) {
+ source, err := c.contentSource()
+ if err != nil {
+ return nil, err
+ }
+
+ var ct contentTableOfContents
+ if err := cp.initRenderHooks(); err != nil {
+ return nil, err
+ }
+ f := cp.po.f
+ po := cp.po
+ p := po.p
+ ct.contentPlaceholders, err = c.shortcodeState.prepareShortcodesForPage(ctx, p, f, false)
+ if err != nil {
+ return nil, err
+ }
+
+ // Callback called from above (e.g. in .RenderString)
+ ctxCallback := func(cp2 *pageContentOutput, ct2 contentTableOfContents) {
+ // Merge content placeholders
+ for k, v := range ct2.contentPlaceholders {
+ ct.contentPlaceholders[k] = v
+ }
+
+ if p.s.conf.Internal.Watch {
+ for _, s := range cp2.po.p.content.shortcodeState.shortcodes {
+ for _, templ := range s.templs {
+ cp.trackDependency(templ.(identity.IdentityProvider))
+ }
+ }
+ }
+
+ // Transfer shortcode names so HasShortcode works for shortcodes from included pages.
+ cp.po.p.content.shortcodeState.transferNames(cp2.po.p.content.shortcodeState)
+ if cp2.po.p.pageOutputTemplateVariationsState.Load() > 0 {
+ cp.po.p.pageOutputTemplateVariationsState.Add(1)
+ }
+ }
+
+ ctx = setGetContentCallbackInContext.Set(ctx, ctxCallback)
+
+ var hasVariants bool
+ ct.contentToRender, hasVariants, err = c.parseInfo.contentToRender(ctx, source, ct.contentPlaceholders)
+ if err != nil {
+ return nil, err
+ }
+
+ if hasVariants {
+ p.pageOutputTemplateVariationsState.Add(1)
+ }
+
+ isHTML := cp.po.p.m.markup == "html"
+
+ if !isHTML {
+ createAndSetToC := func(tocProvider converter.TableOfContentsProvider) {
+ cfg := p.s.ContentSpec.Converters.GetMarkupConfig()
+ ct.tableOfContents = tocProvider.TableOfContents()
+ ct.tableOfContentsHTML = template.HTML(
+ ct.tableOfContents.ToHTML(
+ cfg.TableOfContents.StartLevel,
+ cfg.TableOfContents.EndLevel,
+ cfg.TableOfContents.Ordered,
+ ),
+ )
+ }
+
+ // If the converter supports doing the parsing separately, we do that.
+ parseResult, ok, err := po.contentRenderer.ParseContent(ctx, ct.contentToRender)
+ if err != nil {
+ return nil, err
+ }
+ if ok {
+ // This is Goldmark.
+ // Store away the parse result for later use.
+ createAndSetToC(parseResult)
+
+ ct.astDoc = parseResult.Doc()
+
+ } else {
+
+ // This is Asciidoctor etc.
+ r, err := po.contentRenderer.ParseAndRenderContent(ctx, ct.contentToRender, true)
+ if err != nil {
+ return nil, err
+ }
+
+ ct.contentToRender = r.Bytes()
+
+ if tocProvider, ok := r.(converter.TableOfContentsProvider); ok {
+ createAndSetToC(tocProvider)
+ } else {
+ tmpContent, tmpTableOfContents := helpers.ExtractTOC(ct.contentToRender)
+ ct.tableOfContentsHTML = helpers.BytesToHTML(tmpTableOfContents)
+ ct.tableOfContents = tableofcontents.Empty
+ ct.contentToRender = tmpContent
+ }
+ }
+ }
+
+ return &resources.StaleValue[contentTableOfContents]{
+ Value: ct,
+ IsStaleFunc: func() bool {
+ return c.IsStale() || cp.contentRenderedVersion != versionv
+ },
+ }, nil
+ })
+ if err != nil {
+ return contentTableOfContents{}, err
+ }
+
+ return v.Value, nil
+}
+
+func (c *cachedContent) contentPlain(ctx context.Context, cp *pageContentOutput) (contentPlainPlainWords, error) {
+ key := c.cacheBaseKey + "/" + cp.po.f.Name
+
+ versionv := cp.contentRenderedVersion
+
+ v, err := c.pm.cacheContentPlain.GetOrCreateWitTimeout(key, cp.po.p.s.Conf.Timeout(), func(string) (*resources.StaleValue[contentPlainPlainWords], error) {
+ var result contentPlainPlainWords
+ rs := &resources.StaleValue[contentPlainPlainWords]{
+ IsStaleFunc: func() bool {
+ return c.IsStale() || cp.contentRenderedVersion != versionv
+ },
+ }
+
+ rendered, err := c.contentRendered(ctx, cp)
+ if err != nil {
+ return nil, err
+ }
+
+ result.plain = tpl.StripHTML(string(rendered.content))
+ result.plainWords = strings.Fields(result.plain)
+
+ isCJKLanguage := cp.po.p.m.isCJKLanguage
+
+ if isCJKLanguage {
+ result.wordCount = 0
+ for _, word := range result.plainWords {
+ runeCount := utf8.RuneCountInString(word)
+ if len(word) == runeCount {
+ result.wordCount++
+ } else {
+ result.wordCount += runeCount
+ }
+ }
+ } else {
+ result.wordCount = helpers.TotalWords(result.plain)
+ }
+
+ // TODO(bep) is set in a test. Fix that.
+ if result.fuzzyWordCount == 0 {
+ result.fuzzyWordCount = (result.wordCount + 100) / 100 * 100
+ }
+
+ if isCJKLanguage {
+ result.readingTime = (result.wordCount + 500) / 501
+ } else {
+ result.readingTime = (result.wordCount + 212) / 213
+ }
+
+ if rendered.summary != "" {
+ result.summary = rendered.summary
+ result.summaryTruncated = rendered.summaryTruncated
+ } else if cp.po.p.m.summary != "" {
+ b, err := cp.po.contentRenderer.ParseAndRenderContent(ctx, []byte(cp.po.p.m.summary), false)
+ if err != nil {
+ return nil, err
+ }
+ html := cp.po.p.s.ContentSpec.TrimShortHTML(b.Bytes())
+ result.summary = helpers.BytesToHTML(html)
+ } else {
+ var summary string
+ var truncated bool
+ if isCJKLanguage {
+ summary, truncated = cp.po.p.s.ContentSpec.TruncateWordsByRune(result.plainWords)
+ } else {
+ summary, truncated = cp.po.p.s.ContentSpec.TruncateWordsToWholeSentence(result.plain)
+ }
+ result.summary = template.HTML(summary)
+ result.summaryTruncated = truncated
+ }
+
+ rs.Value = result
+
+ return rs, nil
+ })
+ if err != nil {
+ if herrors.IsTimeoutError(err) {
+ err = fmt.Errorf("timed out rendering the page content. You may have a circular loop in a shortcode, or your site may have resources that take longer to build than the `timeout` limit in your Hugo config file: %w", err)
+ }
+ return contentPlainPlainWords{}, err
}
+ return v.Value, nil
}