summaryrefslogtreecommitdiffstats
path: root/parser
diff options
context:
space:
mode:
authorBjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>2018-10-23 14:37:09 +0200
committerBjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>2018-10-23 19:41:22 +0200
commit6636cf1bea77d20ef2a72a45fae59ac402fb133b (patch)
tree91c9435acd1a2139f8816abcd9b0d978ff2fa300 /parser
parentf669ef6bec25155d015b6ab231c53caef4fa5cdc (diff)
Resolve error handling/parser related TODOs
See #5324
Diffstat (limited to 'parser')
-rw-r--r--parser/pageparser/item.go5
-rw-r--r--parser/pageparser/pagelexer.go56
-rw-r--r--parser/pageparser/pageparser.go2
-rw-r--r--parser/pageparser/pageparser_intro_test.go8
4 files changed, 35 insertions, 36 deletions
diff --git a/parser/pageparser/item.go b/parser/pageparser/item.go
index 049db584a..0567bd8b9 100644
--- a/parser/pageparser/item.go
+++ b/parser/pageparser/item.go
@@ -103,10 +103,9 @@ const (
tEOF
// page items
- TypeHTMLDocument // document starting with < as first non-whitespace
+ TypeHTMLStart // document starting with < as first non-whitespace
TypeHTMLComment // We ignore leading comments
- TypeLeadSummaryDivider // <!--more-->
- TypeSummaryDividerOrg // # more
+ TypeLeadSummaryDivider // <!--more-->, # more
TypeFrontMatterYAML
TypeFrontMatterTOML
TypeFrontMatterJSON
diff --git a/parser/pageparser/pagelexer.go b/parser/pageparser/pagelexer.go
index e02475d42..ddf109b3d 100644
--- a/parser/pageparser/pagelexer.go
+++ b/parser/pageparser/pagelexer.go
@@ -48,6 +48,8 @@ type pageLexer struct {
start int // item start position
width int // width of last element
+ // The summary divider to look for.
+ summaryDivider []byte
// Set when we have parsed any summary divider
summaryDividerChecked bool
@@ -69,7 +71,6 @@ func (l *pageLexer) Input() []byte {
// note: the input position here is normally 0 (start), but
// can be set if position of first shortcode is known
-// TODO(bep) 2errors byte
func newPageLexer(input []byte, inputPosition int, stateStart stateFunc) *pageLexer {
lexer := &pageLexer{
input: input,
@@ -117,7 +118,7 @@ var (
delimTOML = []byte("+++")
delimYAML = []byte("---")
delimOrg = []byte("#+")
- htmlCOmmentStart = []byte("<!--")
+ htmlCommentStart = []byte("<!--")
htmlCOmmentEnd = []byte("-->")
)
@@ -195,17 +196,18 @@ func (l *pageLexer) consumeCRLF() bool {
func lexMainSection(l *pageLexer) stateFunc {
// Fast forward as far as possible.
- var l1, l2, l3 int
- if !l.summaryDividerChecked {
- // TODO(bep) 2errors make the summary divider per type
- l1 = l.index(summaryDivider)
- l2 = l.index(summaryDividerOrg)
- if l1 == -1 && l2 == -1 {
+ var l1, l2 int
+
+ if !l.summaryDividerChecked && l.summaryDivider != nil {
+ l1 = l.index(l.summaryDivider)
+ if l1 == -1 {
l.summaryDividerChecked = true
}
}
- l3 = l.index(leftDelimSc)
- skip := minPositiveIndex(l1, l2, l3)
+
+ l2 = l.index(leftDelimSc)
+ skip := minPositiveIndex(l1, l2)
+
if skip > 0 {
l.pos += skip
}
@@ -225,23 +227,14 @@ func lexMainSection(l *pageLexer) stateFunc {
return lexShortcodeLeftDelim
}
- if !l.summaryDividerChecked {
- if l.hasPrefix(summaryDivider) {
+ if !l.summaryDividerChecked && l.summaryDivider != nil {
+ if l.hasPrefix(l.summaryDivider) {
if l.pos > l.start {
l.emit(tText)
}
l.summaryDividerChecked = true
- l.pos += len(summaryDivider)
- //l.consumeCRLF()
+ l.pos += len(l.summaryDivider)
l.emit(TypeLeadSummaryDivider)
- } else if l.hasPrefix(summaryDividerOrg) {
- if l.pos > l.start {
- l.emit(tText)
- }
- l.summaryDividerChecked = true
- l.pos += len(summaryDividerOrg)
- //l.consumeCRLF()
- l.emit(TypeSummaryDividerOrg)
}
}
@@ -261,6 +254,8 @@ func (l *pageLexer) isShortCodeStart() bool {
}
func lexIntroSection(l *pageLexer) stateFunc {
+ l.summaryDivider = summaryDivider
+
LOOP:
for {
r := l.next()
@@ -283,7 +278,7 @@ LOOP:
// No front matter.
if r == '<' {
l.backup()
- if l.hasPrefix(htmlCOmmentStart) {
+ if l.hasPrefix(htmlCommentStart) {
right := l.index(htmlCOmmentEnd)
if right == -1 {
return l.errorf("starting HTML comment with no end")
@@ -291,10 +286,14 @@ LOOP:
l.pos += right + len(htmlCOmmentEnd)
l.emit(TypeHTMLComment)
} else {
- // Not need to look further. Hugo treats this as plain HTML,
- // no front matter, no shortcodes, no nothing.
- l.pos = len(l.input)
- l.emit(TypeHTMLDocument)
+ if l.pos > l.start {
+ l.emit(tText)
+ }
+ l.next()
+ // This is the start of a plain HTML document with no
+ // front matter. I still can contain shortcodes, so we
+ // have to keep looking.
+ l.emit(TypeHTMLStart)
}
}
break LOOP
@@ -365,10 +364,11 @@ func lexFrontMatterOrgMode(l *pageLexer) stateFunc {
#+DESCRIPTION: Just another golang parser for org content!
*/
+ l.summaryDivider = summaryDividerOrg
+
l.backup()
if !l.hasPrefix(delimOrg) {
- // TODO(bep) consider error
return lexMainSection
}
diff --git a/parser/pageparser/pageparser.go b/parser/pageparser/pageparser.go
index 6e75f195a..754397121 100644
--- a/parser/pageparser/pageparser.go
+++ b/parser/pageparser/pageparser.go
@@ -48,7 +48,7 @@ func Parse(r io.Reader) (Result, error) {
}
func parseMainSection(input []byte, from int) Result {
- lexer := newPageLexer(input, from, lexMainSection) // TODO(bep) 2errors
+ lexer := newPageLexer(input, from, lexMainSection)
lexer.run()
return lexer
}
diff --git a/parser/pageparser/pageparser_intro_test.go b/parser/pageparser/pageparser_intro_test.go
index 32de6dc44..ba4a2c84b 100644
--- a/parser/pageparser/pageparser_intro_test.go
+++ b/parser/pageparser/pageparser_intro_test.go
@@ -38,7 +38,7 @@ var (
tstFrontMatterJSON = nti(TypeFrontMatterJSON, tstJSON+"\r\n")
tstSomeText = nti(tText, "\nSome text.\n")
tstSummaryDivider = nti(TypeLeadSummaryDivider, "<!--more-->")
- tstSummaryDividerOrg = nti(TypeSummaryDividerOrg, "# more")
+ tstHtmlStart = nti(TypeHTMLStart, "<")
tstORG = `
#+TITLE: T1
@@ -54,8 +54,8 @@ var crLfReplacer = strings.NewReplacer("\r", "#", "\n", "$")
var frontMatterTests = []lexerTest{
{"empty", "", []Item{tstEOF}},
{"Byte order mark", "\ufeff\nSome text.\n", []Item{nti(TypeIgnore, "\ufeff"), tstSomeText, tstEOF}},
- {"HTML Document", ` <html> `, []Item{nti(TypeHTMLDocument, " <html> "), tstEOF}},
- {"HTML Document 2", `<html><h1>Hugo Rocks</h1></html>`, []Item{nti(TypeHTMLDocument, "<html><h1>Hugo Rocks</h1></html>"), tstEOF}},
+ {"HTML Document", ` <html> `, []Item{nti(tText, " "), tstHtmlStart, nti(tText, "html> "), tstEOF}},
+ {"HTML Document with shortcode", `<html>{{< sc1 >}}</html>`, []Item{tstHtmlStart, nti(tText, "html>"), tstLeftNoMD, tstSC1, tstRightNoMD, nti(tText, "</html>"), tstEOF}},
{"No front matter", "\nSome text.\n", []Item{tstSomeText, tstEOF}},
{"YAML front matter", "---\nfoo: \"bar\"\n---\n\nSome text.\n", []Item{tstFrontMatterYAML, tstSomeText, tstEOF}},
{"YAML empty front matter", "---\n---\n\nSome text.\n", []Item{nti(TypeFrontMatterYAML, ""), tstSomeText, tstEOF}},
@@ -65,7 +65,7 @@ var frontMatterTests = []lexerTest{
{"TOML front matter", "+++\nfoo = \"bar\"\n+++\n\nSome text.\n", []Item{tstFrontMatterTOML, tstSomeText, tstEOF}},
{"JSON front matter", tstJSON + "\r\n\nSome text.\n", []Item{tstFrontMatterJSON, tstSomeText, tstEOF}},
{"ORG front matter", tstORG + "\nSome text.\n", []Item{tstFrontMatterORG, tstSomeText, tstEOF}},
- {"Summary divider ORG", tstORG + "\nSome text.\n# more\nSome text.\n", []Item{tstFrontMatterORG, tstSomeText, tstSummaryDividerOrg, tstSomeText, tstEOF}},
+ {"Summary divider ORG", tstORG + "\nSome text.\n# more\nSome text.\n", []Item{tstFrontMatterORG, tstSomeText, nti(TypeLeadSummaryDivider, "# more"), tstSomeText, tstEOF}},
{"Summary divider", "+++\nfoo = \"bar\"\n+++\n\nSome text.\n<!--more-->\nSome text.\n", []Item{tstFrontMatterTOML, tstSomeText, tstSummaryDivider, tstSomeText, tstEOF}},
}