hugolib: Redo the summary delimiter logic

Now that we have a proper page parse tree, this can be greatly simplified. See #5324
author: Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com> 2018-10-19 11:30:57 +0200
committer: Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com> 2018-10-22 20:46:13 +0200
commit: 44da60d869578423dea529db62ed613588a2a560 (patch)
tree: 0e9839e0d4a23048ae57f145fb3dedc1ad8005f5
parent: 1e3e34002dae3d4a980141efcc86886e7de5bef8 (diff)
12 files changed, 74 insertions, 141 deletions
diff --git a/go.mod b/go.mod
index 5e498370f..aa73284e9 100644
--- a/go.mod
+++ b/go.mod
@@ -63,7 +63,6 @@ require (
 	golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e // indirect
 	golang.org/x/text v0.3.0
 	gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 // indirect
-	gopkg.in/yaml.v1 v1.0.0-20140924161607-9f9df34309c0
 	gopkg.in/yaml.v2 v2.2.1
 )
 
diff --git a/go.sum b/go.sum
index 7af553217..c41cacfb3 100644
--- a/go.sum
+++ b/go.sum
@@ -65,7 +65,6 @@ github.com/magefile/mage v1.4.0 h1:RI7B1CgnPAuu2O9lWszwya61RLmfL0KCdo+QyyI/Bhk=
 github.com/magefile/mage v1.4.0/go.mod h1:IUDi13rsHje59lecXokTfGX0QIzO45uVPlXnJYsXepA=
 github.com/magiconair/properties v1.8.0 h1:LLgXmsheXeRoUOBOjtwPQCWIYqM/LU1ayDtDePerRcY=
 github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ=
-github.com/markbates/inflect v0.0.0-20171215194931-a12c3aec81a6 h1:LZhVjIISSbj8qLf2qDPP0D8z0uvOWAW5C85ly5mJW6c=
 github.com/markbates/inflect v0.0.0-20171215194931-a12c3aec81a6/go.mod h1:oTeZL2KHA7CUX6X+fovmK9OvIOFuqu0TwdQrZjLTh88=
 github.com/mattn/go-isatty v0.0.4 h1:bnP0vzxcAdeI1zdubAl5PjU6zsERjGZb7raWodagDYs=
 github.com/mattn/go-isatty v0.0.4/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4=
@@ -144,7 +143,5 @@ golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY=
 gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
-gopkg.in/yaml.v1 v1.0.0-20140924161607-9f9df34309c0 h1:POO/ycCATvegFmVuPpQzZFJ+pGZeX22Ufu6fibxDVjU=
-gopkg.in/yaml.v1 v1.0.0-20140924161607-9f9df34309c0/go.mod h1:WDnlLJ4WF5VGsH/HVa3CI79GS0ol3YnhVnKP89i0kNg=
 gopkg.in/yaml.v2 v2.2.1 h1:mUhvW9EsL+naU5Q3cakzfE91YhliOondGd6ZrsDBHQE=
 gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
diff --git a/hugolib/page.go b/hugolib/page.go
index db4ac4e3e..2db0fb5d4 100644
--- a/hugolib/page.go
+++ b/hugolib/page.go
@@ -19,7 +19,6 @@ import (
 	"errors"
 	"fmt"
 	"reflect"
-	"unicode"
 
 	"github.com/gohugoio/hugo/media"
 	_errors "github.com/pkg/errors"
@@ -706,55 +705,13 @@ func (p *Page) UniqueID() string {
 }
 
 // for logging
+// TODO(bep) 2errors remove
 func (p *Page) lineNumRawContentStart() int {
 	return bytes.Count(p.frontmatter, []byte("\n")) + 1
 }
 
-var (
-	internalSummaryDivider = []byte("HUGOMORE42")
-)
-
-// replaceDivider replaces the <!--more--> with an internal value and returns
-// whether the contentis truncated or not.
-// Note: The content slice will be modified if needed.
-func replaceDivider(content, from, to []byte) ([]byte, bool) {
-	dividerIdx := bytes.Index(content, from)
-	if dividerIdx == -1 {
-		return content, false
-	}
-
-	afterSummary := content[dividerIdx+len(from):]
-
-	// If the raw content has nothing but whitespace after the summary
-	// marker then the page shouldn't be marked as truncated.  This check
-	// is simplest against the raw content because different markup engines
-	// (rst and asciidoc in particular) add div and p elements after the
-	// summary marker.
-	truncated := bytes.IndexFunc(afterSummary, func(r rune) bool { return !unicode.IsSpace(r) }) != -1
-
-	content = append(content[:dividerIdx], append(to, afterSummary...)...)
-
-	return content, truncated
-
-}
-
-// We have to replace the <!--more--> with something that survives all the
-// rendering engines.
-func (p *Page) replaceDivider(content []byte) []byte {
-	summaryDivider := helpers.SummaryDivider
-	if p.Markup == "org" {
-		summaryDivider = []byte("# more")
-	}
-
-	replaced, truncated := replaceDivider(content, summaryDivider, internalSummaryDivider)
-
-	p.truncated = truncated
-
-	return replaced
-}
-
-// Returns the page as summary and main if a user defined split is provided.
-func (p *Page) setUserDefinedSummaryIfProvided(rawContentCopy []byte) (*summaryContent, error) {
+// Returns the page as summary and main.
+func (p *Page) setUserDefinedSummary(rawContentCopy []byte) (*summaryContent, error) {
 
 	sc, err := splitUserDefinedSummaryAndContent(p.Markup, rawContentCopy)
 
@@ -1288,10 +1245,10 @@ func (p *Page) prepareForRender() error {
 		return err
 	}
 
-	if p.Markup != "html" {
+	if p.Markup != "html" && p.source.hasSummaryDivider {
 
 		// Now we know enough to create a summary of the page and count some words
-		summaryContent, err := p.setUserDefinedSummaryIfProvided(workContentCopy)
+		summaryContent, err := p.setUserDefinedSummary(workContentCopy)
 
 		if err != nil {
 			s.Log.ERROR.Printf("Failed to set user defined summary for page %q: %s", p.Path(), err)
diff --git a/hugolib/page_bundler_handlers.go b/hugolib/page_bundler_handlers.go
index 2d3a6a930..2ab0ebafe 100644
--- a/hugolib/page_bundler_handlers.go
+++ b/hugolib/page_bundler_handlers.go
@@ -276,8 +276,6 @@ func (c *contentHandlers) handlePageContent() contentHandler {
 			p.workContent = helpers.Emojify(p.workContent)
 		}
 
-		// TODO(bep) 2errors
-		p.workContent = p.replaceDivider(p.workContent)
 		p.workContent = p.renderContent(p.workContent)
 
 		tmpContent, tmpTableOfContents := helpers.ExtractTOC(p.workContent)
diff --git a/hugolib/page_content.go b/hugolib/page_content.go
index 7d5e3e8d6..0d715f38b 100644
--- a/hugolib/page_content.go
+++ b/hugolib/page_content.go
@@ -23,6 +23,10 @@ import (
 	"github.com/gohugoio/hugo/parser/pageparser"
 )
 
+var (
+	internalSummaryDivider = []byte("HUGOMORE42")
+)
+
 // The content related items on a Page.
 type pageContent struct {
 	renderable bool
@@ -41,11 +45,12 @@ type pageContent struct {
 }
 
 type rawPageContent struct {
+	hasSummaryDivider bool
+
 	// The AST of the parsed page. Contains information about:
 	// shortcBackup3odes, front matter, summary indicators.
 	// TODO(bep) 2errors add this to a new rawPagecContent struct
 	// with frontMatterItem (pos) etc.
-	// * also Result.Iterator, Result.Source
 	// * RawContent, RawContentWithoutFrontMatter
 	parsed pageparser.Result
 }
@@ -71,16 +76,15 @@ Loop:
 		it := iter.Next()
 
 		switch {
-		case it.Typ == pageparser.TypeIgnore:
-		case it.Typ == pageparser.TypeHTMLComment:
+		case it.Type == pageparser.TypeIgnore:
+		case it.Type == pageparser.TypeHTMLComment:
 			// Ignore. This is only a leading Front matter comment.
-		case it.Typ == pageparser.TypeHTMLDocument:
+		case it.Type == pageparser.TypeHTMLDocument:
 			// This is HTML only. No shortcode, front matter etc.
 			p.renderable = false
 			result.Write(it.Val)
-			// TODO(bep) 2errors commented out frontmatter
 		case it.IsFrontMatter():
-			f := metadecoders.FormatFromFrontMatterType(it.Typ)
+			f := metadecoders.FormatFromFrontMatterType(it.Type)
 			m, err := metadecoders.UnmarshalToMap(it.Val, f)
 			if err != nil {
 				return err
@@ -92,11 +96,23 @@ Loop:
 			if !p.shouldBuild() {
 				// Nothing more to do.
 				return nil
+			}
 
+		case it.Type == pageparser.TypeLeadSummaryDivider, it.Type == pageparser.TypeSummaryDividerOrg:
+			result.Write(internalSummaryDivider)
+			p.source.hasSummaryDivider = true
+			// Need to determine if the page is truncated.
+			f := func(item pageparser.Item) bool {
+				if item.IsNonWhitespace() {
+					p.truncated = true
+
+					// Done
+					return false
+				}
+				return true
 			}
+			iter.PeekWalk(f)
 
-		//case it.Typ == pageparser.TypeLeadSummaryDivider, it.Typ == pageparser.TypeSummaryDividerOrg:
-		// TODO(bep) 2errors store if divider is there and use that to determine if replace or not
 		// Handle shortcode
 		case it.IsLeftShortcodeDelim():
 			// let extractShortcode handle left delim (will do so recursively)
diff --git a/hugolib/page_test.go b/hugolib/page_test.go
index bb820b86e..7359140fc 100644
--- a/hugolib/page_test.go
+++ b/hugolib/page_test.go
@@ -1272,60 +1272,6 @@ func TestSliceToLower(t *testing.T) {
 	}
 }
 
-func TestReplaceDivider(t *testing.T) {
-	t.Parallel()
-
-	tests := []struct {
-		content           string
-		from              string
-		to                string
-		expectedContent   string
-		expectedTruncated bool
-	}{
-		{"none", "a", "b", "none", false},
-		{"summary <!--more--> content", "<!--more-->", "HUGO", "summary HUGO content", true},
-		{"summary\n\ndivider", "divider", "HUGO", "summary\n\nHUGO", false},
-		{"summary\n\ndivider\n\r", "divider", "HUGO", "summary\n\nHUGO\n\r", false},
-	}
-
-	for i, test := range tests {
-		replaced, truncated := replaceDivider([]byte(test.content), []byte(test.from), []byte(test.to))
-
-		if truncated != test.expectedTruncated {
-			t.Fatalf("[%d] Expected truncated to be %t, was %t", i, test.expectedTruncated, truncated)
-		}
-
-		if string(replaced) != test.expectedContent {
-			t.Fatalf("[%d] Expected content to be %q, was %q", i, test.expectedContent, replaced)
-		}
-	}
-}
-
-func BenchmarkReplaceDivider(b *testing.B) {
-	divider := "HUGO_DIVIDER"
-	from, to := []byte(divider), []byte("HUGO_REPLACED")
-
-	withDivider := make([][]byte, b.N)
-	noDivider := make([][]byte, b.N)
-
-	for i := 0; i < b.N; i++ {
-		withDivider[i] = []byte(strings.Repeat("Summary ", 5) + "\n" + divider + "\n" + strings.Repeat("Word ", 300))
-		noDivider[i] = []byte(strings.Repeat("Word ", 300))
-	}
-
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		_, t1 := replaceDivider(withDivider[i], from, to)
-		_, t2 := replaceDivider(noDivider[i], from, to)
-		if !t1 {
-			b.Fatal("Should be truncated")
-		}
-		if t2 {
-			b.Fatal("Should not be truncated")
-		}
-	}
-}
-
 func TestPagePaths(t *testing.T) {
 	t.Parallel()
 
diff --git a/parser/metadecoders/decoder.go b/parser/metadecoders/decoder.go
index 7527d7a08..280361a84 100644
--- a/parser/metadecoders/decoder.go
+++ b/parser/metadecoders/decoder.go
@@ -20,7 +20,7 @@ import (
 	"github.com/chaseadamsio/goorgeous"
 	"github.com/gohugoio/hugo/parser/pageparser"
 	"github.com/pkg/errors"
-	yaml "gopkg.in/yaml.v1"
+	yaml "gopkg.in/yaml.v2"
 )
 
 type Format string
diff --git a/parser/metadecoders/yaml.go b/parser/metadecoders/yaml.go
index 3a520ac07..21b23a9fd 100644
--- a/parser/metadecoders/yaml.go
+++ b/parser/metadecoders/yaml.go
@@ -19,7 +19,7 @@ import (
 	"fmt"
 
 	"github.com/spf13/cast"
-	yaml "gopkg.in/yaml.v1"
+	yaml "gopkg.in/yaml.v2"
 )
 
 // HandleYAMLData unmarshals YAML-encoded datum and returns a Go interface
diff --git a/parser/pageparser/item.go b/parser/pageparser/item.go
index d97fed734..afc3b5fab 100644
--- a/parser/pageparser/item.go
+++ b/parser/pageparser/item.go
@@ -13,10 +13,13 @@
 
 package pageparser
 
-import "fmt"
+import (
+	"bytes"
+	"fmt"
+)
 
 type Item struct {
-	Typ ItemType
+	Type ItemType
 	pos pos
 	Val []byte
 }
@@ -28,65 +31,69 @@ func (i Item) ValStr() string {
 }
 
 func (i Item) IsText() bool {
-	return i.Typ == tText
+	return i.Type == tText
+}
+
+func (i Item) IsNonWhitespace() bool {
+	return len(bytes.TrimSpace(i.Val)) > 0
 }
 
 func (i Item) IsShortcodeName() bool {
-	return i.Typ == tScName
+	return i.Type == tScName
 }
 
 func (i Item) IsLeftShortcodeDelim() bool {
-	return i.Typ == tLeftDelimScWithMarkup || i.Typ == tLeftDelimScNoMarkup
+	return i.Type == tLeftDelimScWithMarkup || i.Type == tLeftDelimScNoMarkup
 }
 
 func (i Item) IsRightShortcodeDelim() bool {
-	return i.Typ == tRightDelimScWithMarkup || i.Typ == tRightDelimScNoMarkup
+	return i.Type == tRightDelimScWithMarkup || i.Type == tRightDelimScNoMarkup
 }
 
 func (i Item) IsShortcodeClose() bool {
-	return i.Typ == tScClose
+	return i.Type == tScClose
 }
 
 func (i Item) IsShortcodeParam() bool {
-	return i.Typ == tScParam
+	return i.Type == tScParam
 }
 
 func (i Item) IsShortcodeParamVal() bool {
-	return i.Typ == tScParamVal
+	return i.Type == tScParamVal
 }
 
 func (i Item) IsShortcodeMarkupDelimiter() bool {
-	return i.Typ == tLeftDelimScWithMarkup || i.Typ == tRightDelimScWithMarkup
+	return i.Type == tLeftDelimScWithMarkup || i.Type == tRightDelimScWithMarkup
 }
 
 func (i Item) IsFrontMatter() bool {
-	return i.Typ >= TypeFrontMatterYAML && i.Typ <= TypeFrontMatterORG
+	return i.Type >= TypeFrontMatterYAML && i.Type <= TypeFrontMatterORG
 }
 
 func (i Item) IsDone() bool {
-	return i.Typ == tError || i.Typ == tEOF
+	return i.Type == tError || i.Type == tEOF
 }
 
 func (i Item) IsEOF() bool {
-	return i.Typ == tEOF
+	return i.Type == tEOF
 }
 
 func (i Item) IsError() bool {
-	return i.Typ == tError
+	return i.Type == tError
 }
 
 func (i Item) String() string {
 	switch {
-	case i.Typ == tEOF:
+	case i.Type == tEOF:
 		return "EOF"
-	case i.Typ == tError:
+	case i.Type == tError:
 		return string(i.Val)
-	case i.Typ > tKeywordMarker:
+	case i.Type > tKeywordMarker:
 		return fmt.Sprintf("<%s>", i.Val)
 	case len(i.Val) > 50:
-		return fmt.Sprintf("%v:%.20q...", i.Typ, i.Val)
+		return fmt.Sprintf("%v:%.20q...", i.Type, i.Val)
 	}
-	return fmt.Sprintf("%v:[%s]", i.Typ, i.Val)
+	return fmt.Sprintf("%v:[%s]", i.Type, i.Val)
 }
 
 type ItemType int
diff --git a/parser/pageparser/pagelexer.go b/parser/pageparser/pagelexer.go
index 7768b0b2f..a6a26016b 100644
--- a/parser/pageparser/pagelexer.go
+++ b/parser/pageparser/pagelexer.go
@@ -235,6 +235,7 @@ func lexMainSection(l *pageLexer) stateFunc {
 				}
 				l.summaryDividerChecked = true
 				l.pos += pos(len(summaryDivider))
+				//l.consumeCRLF()
 				l.emit(TypeLeadSummaryDivider)
 			} else if l.hasPrefix(summaryDividerOrg) {
 				if l.pos > l.start {
@@ -242,6 +243,7 @@ func lexMainSection(l *pageLexer) stateFunc {
 				}
 				l.summaryDividerChecked = true
 				l.pos += pos(len(summaryDividerOrg))
+				//l.consumeCRLF()
 				l.emit(TypeSummaryDividerOrg)
 			}
 		}
diff --git a/parser/pageparser/pageparser.go b/parser/pageparser/pageparser.go
index b4cdef75c..bc6f55dd8 100644
--- a/parser/pageparser/pageparser.go
+++ b/parser/pageparser/pageparser.go
@@ -86,7 +86,7 @@ func (t *Iterator) Backup() {
 // check for non-error and non-EOF types coming next
 func (t *Iterator) IsValueNext() bool {
 	i := t.Peek()
-	return i.Typ != tError && i.Typ != tEOF
+	return i.Type != tError && i.Type != tEOF
 }
 
 // look at, but do not consume, the next item
@@ -95,12 +95,23 @@ func (t *Iterator) Peek() Item {
 	return t.l.items[t.lastPos+1]
 }
 
+// PeekWalk will feed the next items in the iterator to walkFn
+// until it returns false.
+func (t *Iterator) PeekWalk(walkFn func(item Item) bool) {
+	for i := t.lastPos + 1; i < pos(len(t.l.items)); i++ {
+		item := t.l.items[i]
+		if !walkFn(item) {
+			break
+		}
+	}
+}
+
 // Consume is a convencience method to consume the next n tokens,
 // but back off Errors and EOF.
 func (t *Iterator) Consume(cnt int) {
 	for i := 0; i < cnt; i++ {
 		token := t.Next()
-		if token.Typ == tError || token.Typ == tEOF {
+		if token.Type == tError || token.Type == tEOF {
 			t.Backup()
 			break
 		}
diff --git a/parser/pageparser/pageparser_intro_test.go b/parser/pageparser/pageparser_intro_test.go
index bfd19c250..850254ac7 100644
--- a/parser/pageparser/pageparser_intro_test.go
+++ b/parser/pageparser/pageparser_intro_test.go
@@ -91,7 +91,7 @@ func collect(input []byte, skipFrontMatter bool, stateStart stateFunc) (items []
 	for {
 		item := t.Next()
 		items = append(items, item)
-		if item.Typ == tEOF || item.Typ == tError {
+		if item.Type == tEOF || item.Type == tError {
 			break
 		}
 	}
@@ -104,7 +104,7 @@ func equal(i1, i2 []Item) bool {
 		return false
 	}
 	for k := range i1 {
-		if i1[k].Typ != i2[k].Typ {
+		if i1[k].Type != i2[k].Type {
 			return false
 		}
 		if !reflect.DeepEqual(i1[k].Val, i2[k].Val) {
author	Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>	2018-10-19 11:30:57 +0200
committer	Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>	2018-10-22 20:46:13 +0200
commit	44da60d869578423dea529db62ed613588a2a560 (patch)
tree	0e9839e0d4a23048ae57f145fb3dedc1ad8005f5
parent	1e3e34002dae3d4a980141efcc86886e7de5bef8 (diff)