summaryrefslogtreecommitdiffstats
path: root/parser
diff options
context:
space:
mode:
authorBjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>2024-02-07 10:30:32 +0100
committerBjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>2024-02-07 21:37:04 +0100
commit676e6875dac391f1ec1f7369ed2d59439f434806 (patch)
treeaf689eec1f48d511169f8b07f4c65420c7479413 /parser
parent068ccde4c75d018b977f4169308b3c994c6f3031 (diff)
Make HTML behave exactly like other content formats (note)
Fixes #11999
Diffstat (limited to 'parser')
-rw-r--r--parser/pageparser/pagelexer.go24
-rw-r--r--parser/pageparser/pagelexer_intro.go30
-rw-r--r--parser/pageparser/pageparser_intro_test.go4
3 files changed, 0 insertions, 58 deletions
diff --git a/parser/pageparser/pagelexer.go b/parser/pageparser/pagelexer.go
index bd903b771..5f90e3687 100644
--- a/parser/pageparser/pagelexer.go
+++ b/parser/pageparser/pagelexer.go
@@ -43,8 +43,6 @@ type pageLexer struct {
summaryDivider []byte
// Set when we have parsed any summary divider
summaryDividerChecked bool
- // Whether we're in a HTML comment.
- isInHTMLComment bool
lexerShortcodeState
@@ -102,8 +100,6 @@ var (
delimTOML = []byte("+++")
delimYAML = []byte("---")
delimOrg = []byte("#+")
- htmlCommentStart = []byte("<!--")
- htmlCommentEnd = []byte("-->")
)
func (l *pageLexer) next() rune {
@@ -232,13 +228,6 @@ func (l *pageLexer) errorf(format string, args ...any) stateFunc {
return nil
}
-// documentError can be used to signal a fatal error in the lexing process.
-// nil terminates the parser
-func (l *pageLexer) documentError(err error) stateFunc {
- l.err = err
- return nil
-}
-
func (l *pageLexer) consumeCRLF() bool {
var consumed bool
for _, r := range crLf {
@@ -251,15 +240,6 @@ func (l *pageLexer) consumeCRLF() bool {
return consumed
}
-func (l *pageLexer) consumeToNextLine() {
- for {
- r := l.next()
- if r == eof || isEndOfLine(r) {
- return
- }
- }
-}
-
func (l *pageLexer) consumeToSpace() {
for {
r := l.next()
@@ -441,10 +421,6 @@ func lexMainSection(l *pageLexer) stateFunc {
return lexDone
}
- if l.isInHTMLComment {
- return lexEndFrontMatterHTMLComment
- }
-
// Fast forward as far as possible.
skip := l.sectionHandlers.skip()
diff --git a/parser/pageparser/pagelexer_intro.go b/parser/pageparser/pagelexer_intro.go
index 25af4170b..0ff0958fe 100644
--- a/parser/pageparser/pagelexer_intro.go
+++ b/parser/pageparser/pagelexer_intro.go
@@ -13,10 +13,6 @@
package pageparser
-import "errors"
-
-var ErrPlainHTMLDocumentsNotSupported = errors.New("plain HTML documents not supported")
-
func lexIntroSection(l *pageLexer) stateFunc {
l.summaryDivider = summaryDivider
@@ -39,19 +35,6 @@ LOOP:
case r == byteOrderMark:
l.emit(TypeIgnore)
case !isSpace(r) && !isEndOfLine(r):
- if r == '<' {
- l.backup()
- if l.hasPrefix(htmlCommentStart) {
- // This may be commented out front matter, which should
- // still be read.
- l.consumeToNextLine()
- l.isInHTMLComment = true
- l.emit(TypeIgnore)
- continue LOOP
- } else {
- return l.documentError(ErrPlainHTMLDocumentsNotSupported)
- }
- }
break LOOP
}
}
@@ -60,19 +43,6 @@ LOOP:
return lexMainSection
}
-func lexEndFrontMatterHTMLComment(l *pageLexer) stateFunc {
- l.isInHTMLComment = false
- right := l.index(htmlCommentEnd)
- if right == -1 {
- return l.errorf("starting HTML comment with no end")
- }
- l.pos += right + len(htmlCommentEnd)
- l.emit(TypeIgnore)
-
- // Now move on to the shortcodes.
- return lexMainSection
-}
-
func lexFrontMatterJSON(l *pageLexer) stateFunc {
// Include the left delimiter
l.backup()
diff --git a/parser/pageparser/pageparser_intro_test.go b/parser/pageparser/pageparser_intro_test.go
index df2f2579b..12f4fc61c 100644
--- a/parser/pageparser/pageparser_intro_test.go
+++ b/parser/pageparser/pageparser_intro_test.go
@@ -61,13 +61,9 @@ var crLfReplacer = strings.NewReplacer("\r", "#", "\n", "$")
var frontMatterTests = []lexerTest{
{"empty", "", []typeText{tstEOF}, nil},
{"Byte order mark", "\ufeff\nSome text.\n", []typeText{nti(TypeIgnore, "\ufeff"), tstSomeText, tstEOF}, nil},
- {"HTML Document", ` <html> `, nil, ErrPlainHTMLDocumentsNotSupported},
- {"HTML Document with shortcode", `<html>{{< sc1 >}}</html>`, nil, ErrPlainHTMLDocumentsNotSupported},
{"No front matter", "\nSome text.\n", []typeText{tstSomeText, tstEOF}, nil},
{"YAML front matter", "---\nfoo: \"bar\"\n---\n\nSome text.\n", []typeText{tstFrontMatterYAML, tstSomeText, tstEOF}, nil},
{"YAML empty front matter", "---\n---\n\nSome text.\n", []typeText{nti(TypeFrontMatterYAML, ""), tstSomeText, tstEOF}, nil},
- {"YAML commented out front matter", "<!--\n---\nfoo: \"bar\"\n---\n-->\nSome text.\n", []typeText{nti(TypeIgnore, "<!--\n"), tstFrontMatterYAML, nti(TypeIgnore, "-->"), tstSomeText, tstEOF}, nil},
- {"YAML commented out front matter, no end", "<!--\n---\nfoo: \"bar\"\n---\nSome text.\n", []typeText{nti(TypeIgnore, "<!--\n"), tstFrontMatterYAML, nti(tError, "starting HTML comment with no end")}, nil},
// Note that we keep all bytes as they are, but we need to handle CRLF
{"YAML front matter CRLF", "---\r\nfoo: \"bar\"\r\n---\n\nSome text.\n", []typeText{tstFrontMatterYAMLCRLF, tstSomeText, tstEOF}, nil},
{"TOML front matter", "+++\nfoo = \"bar\"\n+++\n\nSome text.\n", []typeText{tstFrontMatterTOML, tstSomeText, tstEOF}, nil},