diff options
author | Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com> | 2020-03-09 12:04:33 +0100 |
---|---|---|
committer | Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com> | 2020-03-09 15:01:55 +0100 |
commit | ffcb4aeb8e392a80da7cad0f1e03a4102efb24ec (patch) | |
tree | 945afe631ef8451f8a401b0a159a78b64e905713 /hugofs | |
parent | 8279d2e2271ee64725133d36a12d1d7e2158bffd (diff) |
Fix handling of HTML files without front matter
This means that any HTML file inside /content will be treated as a regular file.
If you want it processes with shortcodes and a layout, add front matter.
The defintion of an HTML file here is:
* File with extension .htm or .html
* With first non-whitespace character "<" that isn't a HTML comment.
This is in line with the documentation.
Fixes #7030
Fixes #7028
See #6789
Diffstat (limited to 'hugofs')
-rw-r--r-- | hugofs/files/classifier.go | 78 | ||||
-rw-r--r-- | hugofs/files/classifier_test.go | 12 | ||||
-rw-r--r-- | hugofs/filter_fs.go | 2 |
3 files changed, 90 insertions, 2 deletions
diff --git a/hugofs/files/classifier.go b/hugofs/files/classifier.go index e8f8241b7..5e26bbac0 100644 --- a/hugofs/files/classifier.go +++ b/hugofs/files/classifier.go @@ -14,10 +14,16 @@ package files import ( + "bufio" + "fmt" + "io" "os" "path/filepath" "sort" "strings" + "unicode" + + "github.com/spf13/afero" ) var ( @@ -32,6 +38,11 @@ var ( "pandoc", "pdc"} contentFileExtensionsSet map[string]bool + + htmlFileExtensions = []string{ + "html", "htm"} + + htmlFileExtensionsSet map[string]bool ) func init() { @@ -39,12 +50,20 @@ func init() { for _, ext := range contentFileExtensions { contentFileExtensionsSet[ext] = true } + htmlFileExtensionsSet = make(map[string]bool) + for _, ext := range htmlFileExtensions { + htmlFileExtensionsSet[ext] = true + } } func IsContentFile(filename string) bool { return contentFileExtensionsSet[strings.TrimPrefix(filepath.Ext(filename), ".")] } +func IsHTMLFile(filename string) bool { + return htmlFileExtensionsSet[strings.TrimPrefix(filepath.Ext(filename), ".")] +} + func IsContentExt(ext string) bool { return contentFileExtensionsSet[ext] } @@ -62,10 +81,33 @@ func (c ContentClass) IsBundle() bool { return c == ContentClassLeaf || c == ContentClassBranch } -func ClassifyContentFile(filename string) ContentClass { +func ClassifyContentFile(filename string, open func() (afero.File, error)) ContentClass { if !IsContentFile(filename) { return ContentClassFile } + + if IsHTMLFile(filename) { + // We need to look inside the file. If the first non-whitespace + // character is a "<", then we treat it as a regular file. + // Eearlier we created pages for these files, but that had all sorts + // of troubles, and isn't what it says in the documentation. + // See https://github.com/gohugoio/hugo/issues/7030 + if open == nil { + panic(fmt.Sprintf("no file opener provided for %q", filename)) + } + + f, err := open() + if err != nil { + return ContentClassFile + } + ishtml := isHTMLContent(f) + f.Close() + if ishtml { + return ContentClassFile + } + + } + if strings.HasPrefix(filename, "_index.") { return ContentClassBranch } @@ -77,6 +119,40 @@ func ClassifyContentFile(filename string) ContentClass { return ContentClassContent } +var htmlComment = []rune{'<', '!', '-', '-'} + +func isHTMLContent(r io.Reader) bool { + br := bufio.NewReader(r) + i := 0 + for { + c, _, err := br.ReadRune() + if err != nil { + break + } + + if i > 0 { + if i >= len(htmlComment) { + return false + } + + if c != htmlComment[i] { + return true + } + + i++ + continue + } + + if !unicode.IsSpace(c) { + if i == 0 && c != '<' { + return false + } + i++ + } + } + return true +} + const ( ComponentFolderArchetypes = "archetypes" ComponentFolderStatic = "static" diff --git a/hugofs/files/classifier_test.go b/hugofs/files/classifier_test.go index af188f349..0cd7e4177 100644 --- a/hugofs/files/classifier_test.go +++ b/hugofs/files/classifier_test.go @@ -15,6 +15,7 @@ package files import ( "path/filepath" + "strings" "testing" qt "github.com/frankban/quicktest" @@ -30,6 +31,17 @@ func TestIsContentFile(t *testing.T) { c.Assert(IsContentExt("json"), qt.Equals, false) } +func TestIsHTMLContent(t *testing.T) { + c := qt.New(t) + + c.Assert(isHTMLContent(strings.NewReader(" <html>")), qt.Equals, true) + c.Assert(isHTMLContent(strings.NewReader(" <!--\n---")), qt.Equals, false) + c.Assert(isHTMLContent(strings.NewReader(" <!--")), qt.Equals, true) + c.Assert(isHTMLContent(strings.NewReader(" ---<")), qt.Equals, false) + c.Assert(isHTMLContent(strings.NewReader(" foo <")), qt.Equals, false) + +} + func TestComponentFolders(t *testing.T) { c := qt.New(t) diff --git a/hugofs/filter_fs.go b/hugofs/filter_fs.go index a42cd233a..ca9c33361 100644 --- a/hugofs/filter_fs.go +++ b/hugofs/filter_fs.go @@ -66,7 +66,7 @@ func NewLanguageFs(langs map[string]int, fs afero.Fs) (afero.Fs, error) { metaKeyOrdinal: langs[lang], metaKeyTranslationBaseName: translationBaseName, metaKeyTranslationBaseNameWithExt: translationBaseNameWithExt, - metaKeyClassifier: files.ClassifyContentFile(fi.Name()), + metaKeyClassifier: files.ClassifyContentFile(fi.Name(), meta.GetOpener()), }) fis[i] = fim |