diff options
author | Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com> | 2018-11-28 10:21:54 +0100 |
---|---|---|
committer | Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com> | 2018-11-28 10:28:50 +0100 |
commit | 7540a62834d4465af8936967e430a9e05a1e1359 (patch) | |
tree | 081ac01fd4ba3ce125825eb40cfbb5911845dbec /parser/pageparser | |
parent | 7e75aeca80aead50d64902d2ff47e4ad4d013352 (diff) |
parser/pageparser: Fix handling of commented out front matter
When the page parser was rewritten in 0.51, this was interpreted literally, but commented out front matter is used in the wild to "hide it from GitHub", e.g:
```
<!--
+++
title = "hello"
+++
-->
```
Fixes #5478
Diffstat (limited to 'parser/pageparser')
-rw-r--r-- | parser/pageparser/item.go | 1 | ||||
-rw-r--r-- | parser/pageparser/pagelexer.go | 43 | ||||
-rw-r--r-- | parser/pageparser/pageparser_intro_test.go | 4 |
3 files changed, 37 insertions, 11 deletions
diff --git a/parser/pageparser/item.go b/parser/pageparser/item.go index 644c20e87..b9d47b16e 100644 --- a/parser/pageparser/item.go +++ b/parser/pageparser/item.go @@ -108,7 +108,6 @@ const ( // page items TypeHTMLStart // document starting with < as first non-whitespace - TypeHTMLComment // We ignore leading comments TypeLeadSummaryDivider // <!--more-->, # more TypeFrontMatterYAML TypeFrontMatterTOML diff --git a/parser/pageparser/pagelexer.go b/parser/pageparser/pagelexer.go index 94c1ff26b..5802c318b 100644 --- a/parser/pageparser/pagelexer.go +++ b/parser/pageparser/pagelexer.go @@ -53,6 +53,8 @@ type pageLexer struct { summaryDivider []byte // Set when we have parsed any summary divider summaryDividerChecked bool + // Whether we're in a HTML comment. + isInHTMLComment bool lexerShortcodeState @@ -120,7 +122,7 @@ var ( delimYAML = []byte("---") delimOrg = []byte("#+") htmlCommentStart = []byte("<!--") - htmlCOmmentEnd = []byte("-->") + htmlCommentEnd = []byte("-->") ) func (l *pageLexer) next() rune { @@ -195,6 +197,15 @@ func (l *pageLexer) consumeCRLF() bool { return consumed } +func (l *pageLexer) consumeToNextLine() { + for { + r := l.next() + if r == eof || isEndOfLine(r) { + return + } + } +} + func (l *pageLexer) consumeSpace() { for { r := l.next() @@ -206,6 +217,10 @@ func (l *pageLexer) consumeSpace() { } func lexMainSection(l *pageLexer) stateFunc { + if l.isInHTMLComment { + return lexEndFromtMatterHTMLComment + } + // Fast forward as far as possible. var l1, l2 int @@ -312,16 +327,15 @@ LOOP: case r == byteOrderMark: l.emit(TypeIgnore) case !isSpace(r) && !isEndOfLine(r): - // No front matter. if r == '<' { l.backup() if l.hasPrefix(htmlCommentStart) { - right := l.index(htmlCOmmentEnd) - if right == -1 { - return l.errorf("starting HTML comment with no end") - } - l.pos += right + len(htmlCOmmentEnd) - l.emit(TypeHTMLComment) + // This may be commented out front mattter, which should + // still be read. + l.consumeToNextLine() + l.isInHTMLComment = true + l.emit(TypeIgnore) + continue LOOP } else { if l.pos > l.start { l.emit(tText) @@ -341,6 +355,19 @@ LOOP: return lexMainSection } +func lexEndFromtMatterHTMLComment(l *pageLexer) stateFunc { + l.isInHTMLComment = false + right := l.index(htmlCommentEnd) + if right == -1 { + return l.errorf("starting HTML comment with no end") + } + l.pos += right + len(htmlCommentEnd) + l.emit(TypeIgnore) + + // Now move on to the shortcodes. + return lexMainSection +} + func lexDone(l *pageLexer) stateFunc { // Done! diff --git a/parser/pageparser/pageparser_intro_test.go b/parser/pageparser/pageparser_intro_test.go index ba48a3ee3..901b216c8 100644 --- a/parser/pageparser/pageparser_intro_test.go +++ b/parser/pageparser/pageparser_intro_test.go @@ -60,7 +60,8 @@ var frontMatterTests = []lexerTest{ {"No front matter", "\nSome text.\n", []Item{tstSomeText, tstEOF}}, {"YAML front matter", "---\nfoo: \"bar\"\n---\n\nSome text.\n", []Item{tstFrontMatterYAML, tstSomeText, tstEOF}}, {"YAML empty front matter", "---\n---\n\nSome text.\n", []Item{nti(TypeFrontMatterYAML, ""), tstSomeText, tstEOF}}, - {"YAML commented out front matter", "<!--\n---\nfoo: \"bar\"\n---\n-->\nSome text.\n", []Item{nti(TypeHTMLComment, "<!--\n---\nfoo: \"bar\"\n---\n-->"), tstSomeText, tstEOF}}, + {"YAML commented out front matter", "<!--\n---\nfoo: \"bar\"\n---\n-->\nSome text.\n", []Item{nti(TypeIgnore, "<!--\n"), tstFrontMatterYAML, nti(TypeIgnore, "-->"), tstSomeText, tstEOF}}, + {"YAML commented out front matter, no end", "<!--\n---\nfoo: \"bar\"\n---\nSome text.\n", []Item{nti(TypeIgnore, "<!--\n"), tstFrontMatterYAML, nti(tError, "starting HTML comment with no end")}}, // Note that we keep all bytes as they are, but we need to handle CRLF {"YAML front matter CRLF", "---\r\nfoo: \"bar\"\r\n---\n\nSome text.\n", []Item{tstFrontMatterYAMLCRLF, tstSomeText, tstEOF}}, {"TOML front matter", "+++\nfoo = \"bar\"\n+++\n\nSome text.\n", []Item{tstFrontMatterTOML, tstSomeText, tstEOF}}, @@ -78,7 +79,6 @@ var frontMatterTests = []lexerTest{ func TestFrontMatter(t *testing.T) { t.Parallel() for i, test := range frontMatterTests { - items := collect([]byte(test.input), false, lexIntroSection) if !equal(items, test.items) { got := crLfReplacer.Replace(fmt.Sprint(items)) |