Make HTML behave exactly like other content formats (note)

Fixes #11999
author: Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com> 2024-02-07 10:30:32 +0100
committer: Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com> 2024-02-07 21:37:04 +0100
commit: 676e6875dac391f1ec1f7369ed2d59439f434806 (patch)
tree: af689eec1f48d511169f8b07f4c65420c7479413 /parser
parent: 068ccde4c75d018b977f4169308b3c994c6f3031 (diff)
3 files changed, 0 insertions, 58 deletions
diff --git a/parser/pageparser/pagelexer.go b/parser/pageparser/pagelexer.go
index bd903b771..5f90e3687 100644
--- a/parser/pageparser/pagelexer.go
+++ b/parser/pageparser/pagelexer.go
@@ -43,8 +43,6 @@ type pageLexer struct {
 	summaryDivider []byte
 	// Set when we have parsed any summary divider
 	summaryDividerChecked bool
-	// Whether we're in a HTML comment.
-	isInHTMLComment bool
 
 	lexerShortcodeState
 
@@ -102,8 +100,6 @@ var (
 	delimTOML         = []byte("+++")
 	delimYAML         = []byte("---")
 	delimOrg          = []byte("#+")
-	htmlCommentStart  = []byte("<!--")
-	htmlCommentEnd    = []byte("-->")
 )
 
 func (l *pageLexer) next() rune {
@@ -232,13 +228,6 @@ func (l *pageLexer) errorf(format string, args ...any) stateFunc {
 	return nil
 }
 
-// documentError can be used to signal a fatal error in the lexing process.
-// nil terminates the parser
-func (l *pageLexer) documentError(err error) stateFunc {
-	l.err = err
-	return nil
-}
-
 func (l *pageLexer) consumeCRLF() bool {
 	var consumed bool
 	for _, r := range crLf {
@@ -251,15 +240,6 @@ func (l *pageLexer) consumeCRLF() bool {
 	return consumed
 }
 
-func (l *pageLexer) consumeToNextLine() {
-	for {
-		r := l.next()
-		if r == eof || isEndOfLine(r) {
-			return
-		}
-	}
-}
-
 func (l *pageLexer) consumeToSpace() {
 	for {
 		r := l.next()
@@ -441,10 +421,6 @@ func lexMainSection(l *pageLexer) stateFunc {
 		return lexDone
 	}
 
-	if l.isInHTMLComment {
-		return lexEndFrontMatterHTMLComment
-	}
-
 	// Fast forward as far as possible.
 	skip := l.sectionHandlers.skip()
 
diff --git a/parser/pageparser/pagelexer_intro.go b/parser/pageparser/pagelexer_intro.go
index 25af4170b..0ff0958fe 100644
--- a/parser/pageparser/pagelexer_intro.go
+++ b/parser/pageparser/pagelexer_intro.go
@@ -13,10 +13,6 @@
 
 package pageparser
 
-import "errors"
-
-var ErrPlainHTMLDocumentsNotSupported = errors.New("plain HTML documents not supported")
-
 func lexIntroSection(l *pageLexer) stateFunc {
 	l.summaryDivider = summaryDivider
 
@@ -39,19 +35,6 @@ LOOP:
 		case r == byteOrderMark:
 			l.emit(TypeIgnore)
 		case !isSpace(r) && !isEndOfLine(r):
-			if r == '<' {
-				l.backup()
-				if l.hasPrefix(htmlCommentStart) {
-					// This may be commented out front matter, which should
-					// still be read.
-					l.consumeToNextLine()
-					l.isInHTMLComment = true
-					l.emit(TypeIgnore)
-					continue LOOP
-				} else {
-					return l.documentError(ErrPlainHTMLDocumentsNotSupported)
-				}
-			}
 			break LOOP
 		}
 	}
@@ -60,19 +43,6 @@ LOOP:
 	return lexMainSection
 }
 
-func lexEndFrontMatterHTMLComment(l *pageLexer) stateFunc {
-	l.isInHTMLComment = false
-	right := l.index(htmlCommentEnd)
-	if right == -1 {
-		return l.errorf("starting HTML comment with no end")
-	}
-	l.pos += right + len(htmlCommentEnd)
-	l.emit(TypeIgnore)
-
-	// Now move on to the shortcodes.
-	return lexMainSection
-}
-
 func lexFrontMatterJSON(l *pageLexer) stateFunc {
 	// Include the left delimiter
 	l.backup()
diff --git a/parser/pageparser/pageparser_intro_test.go b/parser/pageparser/pageparser_intro_test.go
index df2f2579b..12f4fc61c 100644
--- a/parser/pageparser/pageparser_intro_test.go
+++ b/parser/pageparser/pageparser_intro_test.go
@@ -61,13 +61,9 @@ var crLfReplacer = strings.NewReplacer("\r", "#", "\n", "$")
 var frontMatterTests = []lexerTest{
 	{"empty", "", []typeText{tstEOF}, nil},
 	{"Byte order mark", "\ufeff\nSome text.\n", []typeText{nti(TypeIgnore, "\ufeff"), tstSomeText, tstEOF}, nil},
-	{"HTML Document", `  <html>  `, nil, ErrPlainHTMLDocumentsNotSupported},
-	{"HTML Document with shortcode", `<html>{{< sc1 >}}</html>`, nil, ErrPlainHTMLDocumentsNotSupported},
 	{"No front matter", "\nSome text.\n", []typeText{tstSomeText, tstEOF}, nil},
 	{"YAML front matter", "---\nfoo: \"bar\"\n---\n\nSome text.\n", []typeText{tstFrontMatterYAML, tstSomeText, tstEOF}, nil},
 	{"YAML empty front matter", "---\n---\n\nSome text.\n", []typeText{nti(TypeFrontMatterYAML, ""), tstSomeText, tstEOF}, nil},
-	{"YAML commented out front matter", "<!--\n---\nfoo: \"bar\"\n---\n-->\nSome text.\n", []typeText{nti(TypeIgnore, "<!--\n"), tstFrontMatterYAML, nti(TypeIgnore, "-->"), tstSomeText, tstEOF}, nil},
-	{"YAML commented out front matter, no end", "<!--\n---\nfoo: \"bar\"\n---\nSome text.\n", []typeText{nti(TypeIgnore, "<!--\n"), tstFrontMatterYAML, nti(tError, "starting HTML comment with no end")}, nil},
 	// Note that we keep all bytes as they are, but we need to handle CRLF
 	{"YAML front matter CRLF", "---\r\nfoo: \"bar\"\r\n---\n\nSome text.\n", []typeText{tstFrontMatterYAMLCRLF, tstSomeText, tstEOF}, nil},
 	{"TOML front matter", "+++\nfoo = \"bar\"\n+++\n\nSome text.\n", []typeText{tstFrontMatterTOML, tstSomeText, tstEOF}, nil},
author	Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>	2024-02-07 10:30:32 +0100
committer	Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>	2024-02-07 21:37:04 +0100
commit	676e6875dac391f1ec1f7369ed2d59439f434806 (patch)
tree	af689eec1f48d511169f8b07f4c65420c7479413 /parser
parent	068ccde4c75d018b977f4169308b3c994c6f3031 (diff)