summaryrefslogtreecommitdiffstats
path: root/helpers
diff options
context:
space:
mode:
authorBjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>2022-05-25 10:56:14 +0200
committerBjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>2022-05-25 17:55:23 +0200
commit3854a6fa6c323d1c09aa71a0626c9eef62709294 (patch)
treeea3727c14f73fb73aef89d43795dd6d6f75f1220 /helpers
parentcd0112a05a9ddb7043c9808284f93d8099c48473 (diff)
Fix Plainify edge cases
This commit replaces the main part of `helpers.StripHTML` with Go's implementation in its html/template package. It's a little slower, but correctness is more important: ```bash BenchmarkStripHTMLOld-10 680316 1764 ns/op 728 B/op 4 allocs/op BenchmarkStripHTMLNew-10 384520 3099 ns/op 2089 B/op 10 allocs/op ``` Fixes #9199 Fixes #9909 Closes #9410
Diffstat (limited to 'helpers')
-rw-r--r--helpers/content.go40
-rw-r--r--helpers/content_test.go38
2 files changed, 0 insertions, 78 deletions
diff --git a/helpers/content.go b/helpers/content.go
index 835663b76..d04e34a07 100644
--- a/helpers/content.go
+++ b/helpers/content.go
@@ -34,7 +34,6 @@ import (
"github.com/gohugoio/hugo/markup"
- bp "github.com/gohugoio/hugo/bufferpool"
"github.com/gohugoio/hugo/config"
)
@@ -104,45 +103,6 @@ func NewContentSpec(cfg config.Provider, logger loggers.Logger, contentFs afero.
return spec, nil
}
-var stripHTMLReplacer = strings.NewReplacer("\n", " ", "</p>", "\n", "<br>", "\n", "<br />", "\n")
-
-// StripHTML accepts a string, strips out all HTML tags and returns it.
-func StripHTML(s string) string {
- // Shortcut strings with no tags in them
- if !strings.ContainsAny(s, "<>") {
- return s
- }
- s = stripHTMLReplacer.Replace(s)
-
- // Walk through the string removing all tags
- b := bp.GetBuffer()
- defer bp.PutBuffer(b)
- var inTag, isSpace, wasSpace bool
- for _, r := range s {
- if !inTag {
- isSpace = false
- }
-
- switch {
- case r == '<':
- inTag = true
- case r == '>':
- inTag = false
- case unicode.IsSpace(r):
- isSpace = true
- fallthrough
- default:
- if !inTag && (!isSpace || (isSpace && !wasSpace)) {
- b.WriteRune(r)
- }
- }
-
- wasSpace = isSpace
-
- }
- return b.String()
-}
-
// stripEmptyNav strips out empty <nav> tags from content.
func stripEmptyNav(in []byte) []byte {
return bytes.Replace(in, []byte("<nav>\n</nav>\n\n"), []byte(``), -1)
diff --git a/helpers/content_test.go b/helpers/content_test.go
index 4b67b44f0..54b7ef3f9 100644
--- a/helpers/content_test.go
+++ b/helpers/content_test.go
@@ -52,44 +52,6 @@ func TestTrimShortHTML(t *testing.T) {
}
}
-func TestStripHTML(t *testing.T) {
- type test struct {
- input, expected string
- }
- data := []test{
- {"<h1>strip h1 tag <h1>", "strip h1 tag "},
- {"<p> strip p tag </p>", " strip p tag "},
- {"</br> strip br<br>", " strip br\n"},
- {"</br> strip br2<br />", " strip br2\n"},
- {"This <strong>is</strong> a\nnewline", "This is a newline"},
- {"No Tags", "No Tags"},
- {`<p>Summary Next Line.
-<figure >
-
- <img src="/not/real" />
-
-
-</figure>
-.
-More text here.</p>
-
-<p>Some more text</p>`, "Summary Next Line. . More text here.\nSome more text\n"},
- }
- for i, d := range data {
- output := StripHTML(d.input)
- if d.expected != output {
- t.Errorf("Test %d failed. Expected %q got %q", i, d.expected, output)
- }
- }
-}
-
-func BenchmarkStripHTML(b *testing.B) {
- b.ResetTimer()
- for i := 0; i < b.N; i++ {
- StripHTML(tstHTMLContent)
- }
-}
-
func TestStripEmptyNav(t *testing.T) {
c := qt.New(t)
cleaned := stripEmptyNav([]byte("do<nav>\n</nav>\n\nbedobedo"))