hugolib: Use []byte in shortcode parsing

See #5324
author: Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com> 2018-10-18 09:47:39 +0200
committer: Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com> 2018-10-22 19:57:44 +0200
commit: 1b7ecfc2e176315b69914756c70b46306561e4d1 (patch)
tree: c1b9c74418e700123dff9e382e13fae99f95f43b
parent: 27f5a906a2a34e3b8348c8baeea48355352b5bbb (diff)
6 files changed, 28 insertions, 25 deletions
diff --git a/hugolib/page.go b/hugolib/page.go
index 0359769e3..e867dd525 100644
--- a/hugolib/page.go
+++ b/hugolib/page.go
@@ -1871,7 +1871,7 @@ func (p *Page) SaveSource() error {
 // TODO(bep) lazy consolidate
 func (p *Page) processShortcodes() error {
 	p.shortcodeState = newShortcodeHandler(p)
-	tmpContent, err := p.shortcodeState.extractShortcodes(string(p.workContent), p.withoutContent())
+	tmpContent, err := p.shortcodeState.extractShortcodes(p.workContent, p.withoutContent())
 	if err != nil {
 		return err
 	}
diff --git a/hugolib/shortcode.go b/hugolib/shortcode.go
index f7141031d..a21a10ad2 100644
--- a/hugolib/shortcode.go
+++ b/hugolib/shortcode.go
@@ -553,9 +553,9 @@ Loop:
 
 			return sc, nil
 		case currItem.IsText():
-			sc.inner = append(sc.inner, currItem.Val)
+			sc.inner = append(sc.inner, currItem.ValStr())
 		case currItem.IsShortcodeName():
-			sc.name = currItem.Val
+			sc.name = currItem.ValStr()
 			// We pick the first template for an arbitrary output format
 			// if more than one. It is "all inner or no inner".
 			tmpl := getShortcodeTemplateForTemplateKey(scKey{}, sc.name, p.s.Tmpl)
@@ -576,11 +576,11 @@ Loop:
 				// named params
 				if sc.params == nil {
 					params := make(map[string]string)
-					params[currItem.Val] = pt.Next().Val
+					params[currItem.ValStr()] = pt.Next().ValStr()
 					sc.params = params
 				} else {
 					if params, ok := sc.params.(map[string]string); ok {
-						params[currItem.Val] = pt.Next().Val
+						params[currItem.ValStr()] = pt.Next().ValStr()
 					} else {
 						return sc, errShortCodeIllegalState
 					}
@@ -590,11 +590,11 @@ Loop:
 				// positional params
 				if sc.params == nil {
 					var params []string
-					params = append(params, currItem.Val)
+					params = append(params, currItem.ValStr())
 					sc.params = params
 				} else {
 					if params, ok := sc.params.([]string); ok {
-						params = append(params, currItem.Val)
+						params = append(params, currItem.ValStr())
 						sc.params = params
 					} else {
 						return sc, errShortCodeIllegalState
@@ -613,19 +613,21 @@ Loop:
 	return sc, nil
 }
 
-func (s *shortcodeHandler) extractShortcodes(stringToParse string, p *PageWithoutContent) (string, error) {
+var shortCodeStart = []byte("{{")
 
-	startIdx := strings.Index(stringToParse, "{{")
+func (s *shortcodeHandler) extractShortcodes(input []byte, p *PageWithoutContent) (string, error) {
+
+	startIdx := bytes.Index(input, shortCodeStart)
 
 	// short cut for docs with no shortcodes
 	if startIdx < 0 {
-		return stringToParse, nil
+		return string(input), nil
 	}
 
 	// the parser takes a string;
 	// since this is an internal API, it could make sense to use the mutable []byte all the way, but
 	// it seems that the time isn't really spent in the byte copy operations, and the impl. gets a lot cleaner
-	pt := pageparser.ParseFrom(stringToParse, startIdx)
+	pt := pageparser.ParseFrom(input, startIdx)
 
 	result := bp.GetBuffer()
 	defer bp.PutBuffer(result)
@@ -642,7 +644,7 @@ Loop:
 
 		switch {
 		case currItem.IsText():
-			result.WriteString(currItem.Val)
+			result.WriteString(currItem.ValStr())
 		case currItem.IsLeftShortcodeDelim():
 			// let extractShortcode handle left delim (will do so recursively)
 			pt.Backup()
diff --git a/hugolib/shortcode_test.go b/hugolib/shortcode_test.go
index 3385d31f0..f8837810c 100644
--- a/hugolib/shortcode_test.go
+++ b/hugolib/shortcode_test.go
@@ -424,7 +424,7 @@ func TestExtractShortcodes(t *testing.T) {
 			return fmt.Sprintf("HAHA%s-%dHBHB", shortcodePlaceholderPrefix, counter)
 		}
 
-		content, err := s.extractShortcodes(this.input, p.withoutContent())
+		content, err := s.extractShortcodes([]byte(this.input), p.withoutContent())
 
 		if b, ok := this.expect.(bool); ok && !b {
 			if err == nil {
diff --git a/parser/pageparser/item.go b/parser/pageparser/item.go
index 35bc8e268..6e93bb696 100644
--- a/parser/pageparser/item.go
+++ b/parser/pageparser/item.go
@@ -21,6 +21,10 @@ type Item struct {
 	Val []byte
 }
 
+func (i Item) ValStr() string {
+	return string(i.Val)
+}
+
 func (i Item) IsText() bool {
 	return i.typ == tText
 }
diff --git a/parser/pageparser/pagelexer.go b/parser/pageparser/pagelexer.go
index 3bdfb6c33..c15e977ca 100644
--- a/parser/pageparser/pagelexer.go
+++ b/parser/pageparser/pagelexer.go
@@ -60,17 +60,6 @@ type pageLexer struct {
 	items []Item
 }
 
-func Parse(s string) *Tokens {
-	return ParseFrom(s, 0)
-}
-
-func ParseFrom(s string, from int) *Tokens {
-	input := []byte(s)
-	lexer := newPageLexer(input, pos(from), lexMainSection) // TODO(bep) 2errors
-	lexer.run()
-	return &Tokens{lexer: lexer}
-}
-
 // note: the input position here is normally 0 (start), but
 // can be set if position of first shortcode is known
 // TODO(bep) 2errors byte
diff --git a/parser/pageparser/pageparser.go b/parser/pageparser/pageparser.go
index 5534ee64b..948c05edf 100644
--- a/parser/pageparser/pageparser.go
+++ b/parser/pageparser/pageparser.go
@@ -17,7 +17,15 @@
 // See slides here: http://cuddle.googlecode.com/hg/talk/lex.html
 package pageparser
 
-// The lexical scanning below
+func Parse(input []byte) *Tokens {
+	return ParseFrom(input, 0)
+}
+
+func ParseFrom(input []byte, from int) *Tokens {
+	lexer := newPageLexer(input, pos(from), lexMainSection) // TODO(bep) 2errors
+	lexer.run()
+	return &Tokens{lexer: lexer}
+}
 
 type Tokens struct {
 	lexer     *pageLexer
author	Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>	2018-10-18 09:47:39 +0200
committer	Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>	2018-10-22 19:57:44 +0200
commit	1b7ecfc2e176315b69914756c70b46306561e4d1 (patch)
tree	c1b9c74418e700123dff9e382e13fae99f95f43b
parent	27f5a906a2a34e3b8348c8baeea48355352b5bbb (diff)