summaryrefslogtreecommitdiffstats
path: root/markup
diff options
context:
space:
mode:
authorBjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>2020-01-04 11:28:19 +0100
committerBjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>2020-01-04 19:46:01 +0100
commita82d2700fcc772aada15d65b8f76913ca23f7404 (patch)
treefa1c09eb1523d7cda303982b5c08661af9a194d6 /markup
parentae816452b171b6b6aabca6a7423ed28a653baaa2 (diff)
markup/goldmark: Make auto IDs GitHub compatible
You can turn off this behaviour: ```toml [markup] [markup.goldmark] [markup.goldmark.parser] autoHeadingIDAsciiOnly = true ``` Note that the `anchorize` now adapts its behaviour depending on the default Markdown handler. Fixes #6616
Diffstat (limited to 'markup')
-rw-r--r--markup/blackfriday/convert.go7
-rw-r--r--markup/converter/converter.go5
-rw-r--r--markup/goldmark/autoid.go125
-rw-r--r--markup/goldmark/autoid_test.go121
-rw-r--r--markup/goldmark/convert.go20
-rw-r--r--markup/goldmark/convert_test.go54
-rw-r--r--markup/goldmark/goldmark_config/config.go4
7 files changed, 319 insertions, 17 deletions
diff --git a/markup/blackfriday/convert.go b/markup/blackfriday/convert.go
index 3df23c7ae..bbbc2b377 100644
--- a/markup/blackfriday/convert.go
+++ b/markup/blackfriday/convert.go
@@ -60,6 +60,10 @@ type blackfridayConverter struct {
cfg converter.ProviderConfig
}
+func (c *blackfridayConverter) SanitizeAnchorName(s string) string {
+ return blackfriday.SanitizedAnchorName(s)
+}
+
func (c *blackfridayConverter) AnchorSuffix() string {
if c.bf.PlainIDAnchors {
return ""
@@ -204,5 +208,6 @@ var blackfridayExtensionMap = map[string]int{
}
var (
- _ converter.DocumentInfo = (*blackfridayConverter)(nil)
+ _ converter.DocumentInfo = (*blackfridayConverter)(nil)
+ _ converter.AnchorNameSanitizer = (*blackfridayConverter)(nil)
)
diff --git a/markup/converter/converter.go b/markup/converter/converter.go
index a4585bd03..b8a5c92c1 100644
--- a/markup/converter/converter.go
+++ b/markup/converter/converter.go
@@ -87,6 +87,11 @@ type TableOfContentsProvider interface {
TableOfContents() tableofcontents.Root
}
+// AnchorNameSanitizer tells how a converter sanitizes anchor names.
+type AnchorNameSanitizer interface {
+ SanitizeAnchorName(s string) string
+}
+
// Bytes holds a byte slice and implements the Result interface.
type Bytes []byte
diff --git a/markup/goldmark/autoid.go b/markup/goldmark/autoid.go
new file mode 100644
index 000000000..6599f08d9
--- /dev/null
+++ b/markup/goldmark/autoid.go
@@ -0,0 +1,125 @@
+// Copyright 2019 The Hugo Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package goldmark
+
+import (
+ "bytes"
+ "strconv"
+ "unicode"
+ "unicode/utf8"
+
+ "github.com/gohugoio/hugo/common/text"
+
+ "github.com/yuin/goldmark/ast"
+ "github.com/yuin/goldmark/parser"
+ "github.com/yuin/goldmark/util"
+
+ bp "github.com/gohugoio/hugo/bufferpool"
+)
+
+func sanitizeAnchorNameString(s string, asciiOnly bool) string {
+ return string(sanitizeAnchorName([]byte(s), asciiOnly))
+}
+
+func sanitizeAnchorName(b []byte, asciiOnly bool) []byte {
+ return sanitizeAnchorNameWithHook(b, asciiOnly, nil)
+}
+
+func sanitizeAnchorNameWithHook(b []byte, asciiOnly bool, hook func(buf *bytes.Buffer)) []byte {
+ buf := bp.GetBuffer()
+
+ if asciiOnly {
+ // Normalize it to preserve accents if possible.
+ b = text.RemoveAccents(b)
+ }
+
+ for len(b) > 0 {
+ r, size := utf8.DecodeRune(b)
+ switch {
+ case asciiOnly && size != 1:
+ case isSpace(r):
+ buf.WriteString("-")
+ case r == '-' || isAlphaNumeric(r):
+ buf.WriteRune(unicode.ToLower(r))
+ default:
+ }
+
+ b = b[size:]
+ }
+
+ if hook != nil {
+ hook(buf)
+ }
+
+ result := make([]byte, buf.Len())
+ copy(result, buf.Bytes())
+
+ bp.PutBuffer(buf)
+
+ return result
+}
+
+func isAlphaNumeric(r rune) bool {
+ return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r)
+}
+
+func isSpace(r rune) bool {
+ return r == ' ' || r == '\t'
+}
+
+var _ parser.IDs = (*idFactory)(nil)
+
+type idFactory struct {
+ asciiOnly bool
+ vals map[string]struct{}
+}
+
+func newIDFactory(asciiOnly bool) *idFactory {
+ return &idFactory{
+ vals: make(map[string]struct{}),
+ asciiOnly: asciiOnly,
+ }
+}
+
+func (ids *idFactory) Generate(value []byte, kind ast.NodeKind) []byte {
+ return sanitizeAnchorNameWithHook(value, ids.asciiOnly, func(buf *bytes.Buffer) {
+ if buf.Len() == 0 {
+ if kind == ast.KindHeading {
+ buf.WriteString("heading")
+ } else {
+ buf.WriteString("id")
+ }
+ }
+
+ if _, found := ids.vals[util.BytesToReadOnlyString(buf.Bytes())]; found {
+ // Append a hypen and a number, starting with 1.
+ buf.WriteRune('-')
+ pos := buf.Len()
+ for i := 1; ; i++ {
+ buf.WriteString(strconv.Itoa(i))
+ if _, found := ids.vals[util.BytesToReadOnlyString(buf.Bytes())]; !found {
+ break
+ }
+ buf.Truncate(pos)
+ }
+ }
+
+ ids.vals[buf.String()] = struct{}{}
+
+ })
+}
+
+func (ids *idFactory) Put(value []byte) {
+ ids.vals[util.BytesToReadOnlyString(value)] = struct{}{}
+}
diff --git a/markup/goldmark/autoid_test.go b/markup/goldmark/autoid_test.go
new file mode 100644
index 000000000..915c6a03c
--- /dev/null
+++ b/markup/goldmark/autoid_test.go
@@ -0,0 +1,121 @@
+// Copyright 2019 The Hugo Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package goldmark
+
+import (
+ "strings"
+ "testing"
+
+ qt "github.com/frankban/quicktest"
+)
+
+func TestSanitizeAnchorName(t *testing.T) {
+ c := qt.New(t)
+
+ // Tests generated manually on github.com
+ tests := `
+God is good: 神真美好
+Number 32
+Question?
+1+2=3
+Special !"#$%&(parens)=?´* chars
+Resumé
+One-Hyphen
+Multiple--Hyphens
+Trailing hyphen-
+Many spaces here
+Forward/slash
+Backward\slash
+Under_score
+`
+
+ expect := `
+god-is-good-神真美好
+number-32
+question
+123
+special-parens-chars
+resumé
+one-hyphen
+multiple--hyphens
+trailing-hyphen-
+many---spaces--here
+forwardslash
+backwardslash
+under_score
+`
+
+ tests, expect = strings.TrimSpace(tests), strings.TrimSpace(expect)
+
+ testlines, expectlines := strings.Split(tests, "\n"), strings.Split(expect, "\n")
+
+ if len(testlines) != len(expectlines) {
+ panic("test setup failed")
+ }
+
+ for i, input := range testlines {
+ input := input
+ expect := expectlines[i]
+ c.Run(input, func(c *qt.C) {
+ b := []byte(input)
+ got := string(sanitizeAnchorName(b, false))
+ c.Assert(got, qt.Equals, expect)
+ c.Assert(sanitizeAnchorNameString(input, false), qt.Equals, expect)
+ c.Assert(string(b), qt.Equals, input)
+ })
+ }
+}
+
+func TestSanitizeAnchorNameAsciiOnly(t *testing.T) {
+ c := qt.New(t)
+
+ c.Assert(sanitizeAnchorNameString("god is神真美好 good", true), qt.Equals, "god-is-good")
+ c.Assert(sanitizeAnchorNameString("Resumé", true), qt.Equals, "resume")
+
+}
+
+func BenchmarkSanitizeAnchorName(b *testing.B) {
+ input := []byte("God is good: 神真美好")
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ result := sanitizeAnchorName(input, false)
+ if len(result) != 24 {
+ b.Fatalf("got %d", len(result))
+
+ }
+ }
+}
+
+func BenchmarkSanitizeAnchorNameAsciiOnly(b *testing.B) {
+ input := []byte("God is good: 神真美好")
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ result := sanitizeAnchorName(input, true)
+ if len(result) != 12 {
+ b.Fatalf("got %d", len(result))
+
+ }
+ }
+}
+
+func BenchmarkSanitizeAnchorNameString(b *testing.B) {
+ input := "God is good: 神真美好"
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ result := sanitizeAnchorNameString(input, false)
+ if len(result) != 24 {
+ b.Fatalf("got %d", len(result))
+ }
+ }
+}
diff --git a/markup/goldmark/convert.go b/markup/goldmark/convert.go
index af204125f..7d50839e2 100644
--- a/markup/goldmark/convert.go
+++ b/markup/goldmark/convert.go
@@ -50,19 +50,33 @@ type provide struct {
func (p provide) New(cfg converter.ProviderConfig) (converter.Provider, error) {
md := newMarkdown(cfg)
+
return converter.NewProvider("goldmark", func(ctx converter.DocumentContext) (converter.Converter, error) {
return &goldmarkConverter{
ctx: ctx,
cfg: cfg,
md: md,
+ sanitizeAnchorName: func(s string) string {
+ return sanitizeAnchorNameString(s, cfg.MarkupConfig.Goldmark.Parser.AutoHeadingIDAsciiOnly)
+ },
}, nil
}), nil
}
+var (
+ _ converter.AnchorNameSanitizer = (*goldmarkConverter)(nil)
+)
+
type goldmarkConverter struct {
md goldmark.Markdown
ctx converter.DocumentContext
cfg converter.ProviderConfig
+
+ sanitizeAnchorName func(s string) string
+}
+
+func (c *goldmarkConverter) SanitizeAnchorName(s string) string {
+ return c.sanitizeAnchorName(s)
}
func newMarkdown(pcfg converter.ProviderConfig) goldmark.Markdown {
@@ -226,7 +240,7 @@ func (c *goldmarkConverter) Convert(ctx converter.RenderContext) (result convert
buf := &bufWriter{Buffer: &bytes.Buffer{}}
result = buf
- pctx := newParserContext(ctx)
+ pctx := c.newParserContext(ctx)
reader := text.NewReader(ctx.Src)
doc := c.md.Parser().Parse(
@@ -265,8 +279,8 @@ func (c *goldmarkConverter) Supports(feature identity.Identity) bool {
return featureSet[feature.GetIdentity()]
}
-func newParserContext(rctx converter.RenderContext) *parserContext {
- ctx := parser.NewContext()
+func (c *goldmarkConverter) newParserContext(rctx converter.RenderContext) *parserContext {
+ ctx := parser.NewContext(parser.WithIDs(newIDFactory(c.cfg.MarkupConfig.Goldmark.Parser.AutoHeadingIDAsciiOnly)))
ctx.Set(tocEnableKey, rctx.RenderTOC)
return &parserContext{
Context: ctx,
diff --git a/markup/goldmark/convert_test.go b/markup/goldmark/convert_test.go
index 2a9727606..b9bf01ef5 100644
--- a/markup/goldmark/convert_test.go
+++ b/markup/goldmark/convert_test.go
@@ -28,6 +28,23 @@ import (
qt "github.com/frankban/quicktest"
)
+func convert(c *qt.C, mconf markup_config.Config, content string) converter.Result {
+
+ p, err := Provider.New(
+ converter.ProviderConfig{
+ MarkupConfig: mconf,
+ Logger: loggers.NewErrorLogger(),
+ },
+ )
+ c.Assert(err, qt.IsNil)
+ conv, err := p.New(converter.DocumentContext{DocumentID: "thedoc"})
+ c.Assert(err, qt.IsNil)
+ b, err := conv.Convert(converter.RenderContext{RenderTOC: true, Src: []byte(content)})
+ c.Assert(err, qt.IsNil)
+
+ return b
+}
+
func TestConvert(t *testing.T) {
c := qt.New(t)
@@ -92,29 +109,23 @@ description
: the description for the content.
+## 神真美好
+
+## 神真美好
+
+## 神真美好
+
[^1]: And that's the footnote.
`
// Code fences
content = strings.Replace(content, "§§§", "```", -1)
-
mconf := markup_config.Default
mconf.Highlight.NoClasses = false
mconf.Goldmark.Renderer.Unsafe = true
- p, err := Provider.New(
- converter.ProviderConfig{
- MarkupConfig: mconf,
- Logger: loggers.NewErrorLogger(),
- },
- )
- c.Assert(err, qt.IsNil)
- conv, err := p.New(converter.DocumentContext{DocumentID: "thedoc"})
- c.Assert(err, qt.IsNil)
- b, err := conv.Convert(converter.RenderContext{RenderTOC: true, Src: []byte(content)})
- c.Assert(err, qt.IsNil)
-
+ b := convert(c, mconf, content)
got := string(b.Bytes())
// Links
@@ -123,6 +134,9 @@ description
// Header IDs
c.Assert(got, qt.Contains, `<h2 id="custom">Custom ID</h2>`, qt.Commentf(got))
c.Assert(got, qt.Contains, `<h2 id="auto-id">Auto ID</h2>`, qt.Commentf(got))
+ c.Assert(got, qt.Contains, `<h2 id="神真美好">神真美好</h2>`, qt.Commentf(got))
+ c.Assert(got, qt.Contains, `<h2 id="神真美好-1">神真美好</h2>`, qt.Commentf(got))
+ c.Assert(got, qt.Contains, `<h2 id="神真美好-2">神真美好</h2>`, qt.Commentf(got))
// Code fences
c.Assert(got, qt.Contains, "<div class=\"highlight\"><pre class=\"chroma\"><code class=\"language-bash\" data-lang=\"bash\">LINE1\n</code></pre></div>")
@@ -148,6 +162,20 @@ description
}
+func TestConvertAutoIDAsciiOnly(t *testing.T) {
+ c := qt.New(t)
+
+ content := `
+## God is Good: 神真美好
+`
+ mconf := markup_config.Default
+ mconf.Goldmark.Parser.AutoHeadingIDAsciiOnly = true
+ b := convert(c, mconf, content)
+ got := string(b.Bytes())
+
+ c.Assert(got, qt.Contains, "<h2 id=\"god-is-good-\">")
+}
+
func TestCodeFence(t *testing.T) {
c := qt.New(t)
diff --git a/markup/goldmark/goldmark_config/config.go b/markup/goldmark/goldmark_config/config.go
index bf18a384d..2454eb46f 100644
--- a/markup/goldmark/goldmark_config/config.go
+++ b/markup/goldmark/goldmark_config/config.go
@@ -69,6 +69,10 @@ type Parser struct {
// auto generated heading ids.
AutoHeadingID bool
+ // When AutoHeadingID is enabled this will generate IDs with Ascii
+ // characters only.
+ AutoHeadingIDAsciiOnly bool
+
// Enables custom attributes.
Attribute bool
}