Fix handling of HTML files without front matter

This means that any HTML file inside /content will be treated as a regular file. If you want it processes with shortcodes and a layout, add front matter. The defintion of an HTML file here is: * File with extension .htm or .html * With first non-whitespace character "<" that isn't a HTML comment. This is in line with the documentation. Fixes #7030 Fixes #7028 See #6789
author: Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com> 2020-03-09 12:04:33 +0100
committer: Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com> 2020-03-09 15:01:55 +0100
commit: ffcb4aeb8e392a80da7cad0f1e03a4102efb24ec (patch)
tree: 945afe631ef8451f8a401b0a159a78b64e905713 /parser
parent: 8279d2e2271ee64725133d36a12d1d7e2158bffd (diff)
3 files changed, 4 insertions, 13 deletions
diff --git a/parser/pageparser/item.go b/parser/pageparser/item.go
index 48003ee86..9224fba94 100644
--- a/parser/pageparser/item.go
+++ b/parser/pageparser/item.go
@@ -140,7 +140,6 @@ const (
 	tEOF
 
 	// page items
-	TypeHTMLStart          // document starting with < as first non-whitespace
 	TypeLeadSummaryDivider // <!--more-->,  # more
 	TypeFrontMatterYAML
 	TypeFrontMatterTOML
diff --git a/parser/pageparser/pagelexer_intro.go b/parser/pageparser/pagelexer_intro.go
index 56dd4224d..539e6cfaa 100644
--- a/parser/pageparser/pagelexer_intro.go
+++ b/parser/pageparser/pagelexer_intro.go
@@ -42,21 +42,14 @@ LOOP:
 			if r == '<' {
 				l.backup()
 				if l.hasPrefix(htmlCommentStart) {
-					// This may be commented out front mattter, which should
+					// This may be commented out front matter, which should
 					// still be read.
 					l.consumeToNextLine()
 					l.isInHTMLComment = true
 					l.emit(TypeIgnore)
 					continue LOOP
 				} else {
-					if l.pos > l.start {
-						l.emit(tText)
-					}
-					l.next()
-					// This is the start of a plain HTML document with no
-					// front matter. I still can contain shortcodes, so we
-					// have to keep looking.
-					l.emit(TypeHTMLStart)
+					return l.errorf("plain HTML documents not supported")
 				}
 			}
 			break LOOP
diff --git a/parser/pageparser/pageparser_intro_test.go b/parser/pageparser/pageparser_intro_test.go
index 0f20ae5a1..e776cb3ee 100644
--- a/parser/pageparser/pageparser_intro_test.go
+++ b/parser/pageparser/pageparser_intro_test.go
@@ -38,7 +38,6 @@ var (
 	tstFrontMatterJSON     = nti(TypeFrontMatterJSON, tstJSON+"\r\n")
 	tstSomeText            = nti(tText, "\nSome text.\n")
 	tstSummaryDivider      = nti(TypeLeadSummaryDivider, "<!--more-->\n")
-	tstHtmlStart           = nti(TypeHTMLStart, "<")
 	tstNewline             = nti(tText, "\n")
 
 	tstORG = `
@@ -55,8 +54,8 @@ var crLfReplacer = strings.NewReplacer("\r", "#", "\n", "$")
 var frontMatterTests = []lexerTest{
 	{"empty", "", []Item{tstEOF}},
 	{"Byte order mark", "\ufeff\nSome text.\n", []Item{nti(TypeIgnore, "\ufeff"), tstSomeText, tstEOF}},
-	{"HTML Document", `  <html>  `, []Item{nti(tText, "  "), tstHtmlStart, nti(tText, "html>  "), tstEOF}},
-	{"HTML Document with shortcode", `<html>{{< sc1 >}}</html>`, []Item{tstHtmlStart, nti(tText, "html>"), tstLeftNoMD, tstSC1, tstRightNoMD, nti(tText, "</html>"), tstEOF}},
+	{"HTML Document", `  <html>  `, []Item{nti(tError, "plain HTML documents not supported")}},
+	{"HTML Document with shortcode", `<html>{{< sc1 >}}</html>`, []Item{nti(tError, "plain HTML documents not supported")}},
 	{"No front matter", "\nSome text.\n", []Item{tstSomeText, tstEOF}},
 	{"YAML front matter", "---\nfoo: \"bar\"\n---\n\nSome text.\n", []Item{tstFrontMatterYAML, tstSomeText, tstEOF}},
 	{"YAML empty front matter", "---\n---\n\nSome text.\n", []Item{nti(TypeFrontMatterYAML, ""), tstSomeText, tstEOF}},
author	Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>	2020-03-09 12:04:33 +0100
committer	Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>	2020-03-09 15:01:55 +0100
commit	ffcb4aeb8e392a80da7cad0f1e03a4102efb24ec (patch)
tree	945afe631ef8451f8a401b0a159a78b64e905713 /parser
parent	8279d2e2271ee64725133d36a12d1d7e2158bffd (diff)