summaryrefslogtreecommitdiffstats
path: root/markup
diff options
context:
space:
mode:
authorBjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>2020-01-05 11:52:00 +0100
committerBjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>2020-01-05 11:56:05 +0100
commit16e7c1120346bd853cf6510ffac8e94824bf2c7f (patch)
treee1f1c38bdf978015d8e9fd81e3c5c51d18fff327 /markup
parent8f071fc159ce9a0fc0ea14a73bde8f299bedd109 (diff)
markup/goldmark: Add an optional Blackfriday auto ID strategy
Fixes #6707
Diffstat (limited to 'markup')
-rw-r--r--markup/blackfriday/convert.go24
-rw-r--r--markup/blackfriday/convert_test.go42
-rw-r--r--markup/goldmark/autoid.go57
-rw-r--r--markup/goldmark/autoid_test.go33
-rw-r--r--markup/goldmark/convert.go3
-rw-r--r--markup/goldmark/convert_test.go15
-rw-r--r--markup/goldmark/goldmark_config/config.go1
7 files changed, 141 insertions, 34 deletions
diff --git a/markup/blackfriday/convert.go b/markup/blackfriday/convert.go
index bbbc2b377..d844c5554 100644
--- a/markup/blackfriday/convert.go
+++ b/markup/blackfriday/convert.go
@@ -15,6 +15,8 @@
package blackfriday
import (
+ "unicode"
+
"github.com/gohugoio/hugo/identity"
"github.com/gohugoio/hugo/markup/blackfriday/blackfriday_config"
"github.com/gohugoio/hugo/markup/converter"
@@ -61,7 +63,27 @@ type blackfridayConverter struct {
}
func (c *blackfridayConverter) SanitizeAnchorName(s string) string {
- return blackfriday.SanitizedAnchorName(s)
+ return SanitizedAnchorName(s)
+}
+
+// SanitizedAnchorName is how Blackfriday sanitizes anchor names.
+// Implementation borrowed from https://github.com/russross/blackfriday/blob/a477dd1646916742841ed20379f941cfa6c5bb6f/block.go#L1464
+func SanitizedAnchorName(text string) string {
+ var anchorName []rune
+ futureDash := false
+ for _, r := range text {
+ switch {
+ case unicode.IsLetter(r) || unicode.IsNumber(r):
+ if futureDash && len(anchorName) > 0 {
+ anchorName = append(anchorName, '-')
+ }
+ futureDash = false
+ anchorName = append(anchorName, unicode.ToLower(r))
+ default:
+ futureDash = true
+ }
+ }
+ return string(anchorName)
}
func (c *blackfridayConverter) AnchorSuffix() string {
diff --git a/markup/blackfriday/convert_test.go b/markup/blackfriday/convert_test.go
index b4d66dec6..d2d8d927e 100644
--- a/markup/blackfriday/convert_test.go
+++ b/markup/blackfriday/convert_test.go
@@ -179,3 +179,45 @@ This is a footnote.[^1] And then some.
c.Assert(s, qt.Contains, "This is a footnote.<sup class=\"footnote-ref\" id=\"fnref:testid:1\"><a href=\"#fn:testid:1\">1</a></sup>")
c.Assert(s, qt.Contains, "<a class=\"footnote-return\" href=\"#fnref:testid:1\"><sup>[return]</sup></a>")
}
+
+// Tests borrowed from https://github.com/russross/blackfriday/blob/a925a152c144ea7de0f451eaf2f7db9e52fa005a/block_test.go#L1817
+func TestSanitizedAnchorName(t *testing.T) {
+ tests := []struct {
+ text string
+ want string
+ }{
+ {
+ text: "This is a header",
+ want: "this-is-a-header",
+ },
+ {
+ text: "This is also a header",
+ want: "this-is-also-a-header",
+ },
+ {
+ text: "main.go",
+ want: "main-go",
+ },
+ {
+ text: "Article 123",
+ want: "article-123",
+ },
+ {
+ text: "<- Let's try this, shall we?",
+ want: "let-s-try-this-shall-we",
+ },
+ {
+ text: " ",
+ want: "",
+ },
+ {
+ text: "Hello, 世界",
+ want: "hello-世界",
+ },
+ }
+ for _, test := range tests {
+ if got := SanitizedAnchorName(test.text); got != test.want {
+ t.Errorf("SanitizedAnchorName(%q):\ngot %q\nwant %q", test.text, got, test.want)
+ }
+ }
+}
diff --git a/markup/goldmark/autoid.go b/markup/goldmark/autoid.go
index aaf1852d1..950d4a577 100644
--- a/markup/goldmark/autoid.go
+++ b/markup/goldmark/autoid.go
@@ -19,6 +19,8 @@ import (
"unicode"
"unicode/utf8"
+ "github.com/gohugoio/hugo/markup/blackfriday"
+
"github.com/gohugoio/hugo/markup/goldmark/goldmark_config"
"github.com/gohugoio/hugo/common/text"
@@ -30,34 +32,41 @@ import (
bp "github.com/gohugoio/hugo/bufferpool"
)
-func sanitizeAnchorNameString(s string, asciiOnly bool) string {
- return string(sanitizeAnchorName([]byte(s), asciiOnly))
+func sanitizeAnchorNameString(s string, idType string) string {
+ return string(sanitizeAnchorName([]byte(s), idType))
}
-func sanitizeAnchorName(b []byte, asciiOnly bool) []byte {
- return sanitizeAnchorNameWithHook(b, asciiOnly, nil)
+func sanitizeAnchorName(b []byte, idType string) []byte {
+ return sanitizeAnchorNameWithHook(b, idType, nil)
}
-func sanitizeAnchorNameWithHook(b []byte, asciiOnly bool, hook func(buf *bytes.Buffer)) []byte {
+func sanitizeAnchorNameWithHook(b []byte, idType string, hook func(buf *bytes.Buffer)) []byte {
buf := bp.GetBuffer()
- if asciiOnly {
- // Normalize it to preserve accents if possible.
- b = text.RemoveAccents(b)
- }
+ if idType == goldmark_config.AutoHeadingIDTypeBlackfriday {
+ // TODO(bep) make it more efficient.
+ buf.WriteString(blackfriday.SanitizedAnchorName(string(b)))
+ } else {
+ asciiOnly := idType == goldmark_config.AutoHeadingIDTypeGitHubAscii
- for len(b) > 0 {
- r, size := utf8.DecodeRune(b)
- switch {
- case asciiOnly && size != 1:
- case r == '-' || isSpace(r):
- buf.WriteRune('-')
- case isAlphaNumeric(r):
- buf.WriteRune(unicode.ToLower(r))
- default:
+ if asciiOnly {
+ // Normalize it to preserve accents if possible.
+ b = text.RemoveAccents(b)
}
- b = b[size:]
+ for len(b) > 0 {
+ r, size := utf8.DecodeRune(b)
+ switch {
+ case asciiOnly && size != 1:
+ case r == '-' || isSpace(r):
+ buf.WriteRune('-')
+ case isAlphaNumeric(r):
+ buf.WriteRune(unicode.ToLower(r))
+ default:
+ }
+
+ b = b[size:]
+ }
}
if hook != nil {
@@ -83,19 +92,19 @@ func isSpace(r rune) bool {
var _ parser.IDs = (*idFactory)(nil)
type idFactory struct {
- asciiOnly bool
- vals map[string]struct{}
+ idType string
+ vals map[string]struct{}
}
func newIDFactory(idType string) *idFactory {
return &idFactory{
- vals: make(map[string]struct{}),
- asciiOnly: idType == goldmark_config.AutoHeadingIDTypeGitHubAscii,
+ vals: make(map[string]struct{}),
+ idType: idType,
}
}
func (ids *idFactory) Generate(value []byte, kind ast.NodeKind) []byte {
- return sanitizeAnchorNameWithHook(value, ids.asciiOnly, func(buf *bytes.Buffer) {
+ return sanitizeAnchorNameWithHook(value, ids.idType, func(buf *bytes.Buffer) {
if buf.Len() == 0 {
if kind == ast.KindHeading {
buf.WriteString("heading")
diff --git a/markup/goldmark/autoid_test.go b/markup/goldmark/autoid_test.go
index 915c6a03c..1257b3482 100644
--- a/markup/goldmark/autoid_test.go
+++ b/markup/goldmark/autoid_test.go
@@ -17,6 +17,8 @@ import (
"strings"
"testing"
+ "github.com/gohugoio/hugo/markup/goldmark/goldmark_config"
+
qt "github.com/frankban/quicktest"
)
@@ -69,9 +71,9 @@ under_score
expect := expectlines[i]
c.Run(input, func(c *qt.C) {
b := []byte(input)
- got := string(sanitizeAnchorName(b, false))
+ got := string(sanitizeAnchorName(b, goldmark_config.AutoHeadingIDTypeGitHub))
c.Assert(got, qt.Equals, expect)
- c.Assert(sanitizeAnchorNameString(input, false), qt.Equals, expect)
+ c.Assert(sanitizeAnchorNameString(input, goldmark_config.AutoHeadingIDTypeGitHub), qt.Equals, expect)
c.Assert(string(b), qt.Equals, input)
})
}
@@ -80,16 +82,21 @@ under_score
func TestSanitizeAnchorNameAsciiOnly(t *testing.T) {
c := qt.New(t)
- c.Assert(sanitizeAnchorNameString("god is神真美好 good", true), qt.Equals, "god-is-good")
- c.Assert(sanitizeAnchorNameString("Resumé", true), qt.Equals, "resume")
+ c.Assert(sanitizeAnchorNameString("god is神真美好 good", goldmark_config.AutoHeadingIDTypeGitHubAscii), qt.Equals, "god-is-good")
+ c.Assert(sanitizeAnchorNameString("Resumé", goldmark_config.AutoHeadingIDTypeGitHubAscii), qt.Equals, "resume")
+
+}
+func TestSanitizeAnchorNameBlackfriday(t *testing.T) {
+ c := qt.New(t)
+ c.Assert(sanitizeAnchorNameString("Let's try this, shall we?", goldmark_config.AutoHeadingIDTypeBlackfriday), qt.Equals, "let-s-try-this-shall-we")
}
func BenchmarkSanitizeAnchorName(b *testing.B) {
input := []byte("God is good: 神真美好")
b.ResetTimer()
for i := 0; i < b.N; i++ {
- result := sanitizeAnchorName(input, false)
+ result := sanitizeAnchorName(input, goldmark_config.AutoHeadingIDTypeGitHub)
if len(result) != 24 {
b.Fatalf("got %d", len(result))
@@ -101,7 +108,7 @@ func BenchmarkSanitizeAnchorNameAsciiOnly(b *testing.B) {
input := []byte("God is good: 神真美好")
b.ResetTimer()
for i := 0; i < b.N; i++ {
- result := sanitizeAnchorName(input, true)
+ result := sanitizeAnchorName(input, goldmark_config.AutoHeadingIDTypeGitHubAscii)
if len(result) != 12 {
b.Fatalf("got %d", len(result))
@@ -109,11 +116,23 @@ func BenchmarkSanitizeAnchorNameAsciiOnly(b *testing.B) {
}
}
+func BenchmarkSanitizeAnchorNameBlackfriday(b *testing.B) {
+ input := []byte("God is good: 神真美好")
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ result := sanitizeAnchorName(input, goldmark_config.AutoHeadingIDTypeBlackfriday)
+ if len(result) != 24 {
+ b.Fatalf("got %d", len(result))
+
+ }
+ }
+}
+
func BenchmarkSanitizeAnchorNameString(b *testing.B) {
input := "God is good: 神真美好"
b.ResetTimer()
for i := 0; i < b.N; i++ {
- result := sanitizeAnchorNameString(input, false)
+ result := sanitizeAnchorNameString(input, goldmark_config.AutoHeadingIDTypeGitHub)
if len(result) != 24 {
b.Fatalf("got %d", len(result))
}
diff --git a/markup/goldmark/convert.go b/markup/goldmark/convert.go
index c6f958366..d4c353353 100644
--- a/markup/goldmark/convert.go
+++ b/markup/goldmark/convert.go
@@ -29,7 +29,6 @@ import (
"github.com/gohugoio/hugo/hugofs"
"github.com/gohugoio/hugo/markup/converter"
- "github.com/gohugoio/hugo/markup/goldmark/goldmark_config"
"github.com/gohugoio/hugo/markup/highlight"
"github.com/gohugoio/hugo/markup/tableofcontents"
"github.com/yuin/goldmark"
@@ -57,7 +56,7 @@ func (p provide) New(cfg converter.ProviderConfig) (converter.Provider, error) {
cfg: cfg,
md: md,
sanitizeAnchorName: func(s string) string {
- return sanitizeAnchorNameString(s, cfg.MarkupConfig.Goldmark.Parser.AutoHeadingIDType == goldmark_config.AutoHeadingIDTypeGitHub)
+ return sanitizeAnchorNameString(s, cfg.MarkupConfig.Goldmark.Parser.AutoHeadingIDType)
},
}, nil
}), nil
diff --git a/markup/goldmark/convert_test.go b/markup/goldmark/convert_test.go
index 3c173fb0a..31799b2a5 100644
--- a/markup/goldmark/convert_test.go
+++ b/markup/goldmark/convert_test.go
@@ -178,6 +178,21 @@ func TestConvertAutoIDAsciiOnly(t *testing.T) {
c.Assert(got, qt.Contains, "<h2 id=\"god-is-good-\">")
}
+func TestConvertAutoIDBlackfriday(t *testing.T) {
+ c := qt.New(t)
+
+ content := `
+## Let's try this, shall we?
+
+`
+ mconf := markup_config.Default
+ mconf.Goldmark.Parser.AutoHeadingIDType = goldmark_config.AutoHeadingIDTypeBlackfriday
+ b := convert(c, mconf, content)
+ got := string(b.Bytes())
+
+ c.Assert(got, qt.Contains, "<h2 id=\"let-s-try-this-shall-we\">")
+}
+
func TestCodeFence(t *testing.T) {
c := qt.New(t)
diff --git a/markup/goldmark/goldmark_config/config.go b/markup/goldmark/goldmark_config/config.go
index 47399b52c..af33e03dc 100644
--- a/markup/goldmark/goldmark_config/config.go
+++ b/markup/goldmark/goldmark_config/config.go
@@ -17,6 +17,7 @@ package goldmark_config
const (
AutoHeadingIDTypeGitHub = "github"
AutoHeadingIDTypeGitHubAscii = "github-ascii"
+ AutoHeadingIDTypeBlackfriday = "blackfriday"
)
// DefaultConfig holds the default Goldmark configuration.