diff options
author | Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com> | 2020-01-04 11:28:19 +0100 |
---|---|---|
committer | Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com> | 2020-01-04 19:46:01 +0100 |
commit | a82d2700fcc772aada15d65b8f76913ca23f7404 (patch) | |
tree | fa1c09eb1523d7cda303982b5c08661af9a194d6 /markup/goldmark/autoid.go | |
parent | ae816452b171b6b6aabca6a7423ed28a653baaa2 (diff) |
markup/goldmark: Make auto IDs GitHub compatible
You can turn off this behaviour:
```toml
[markup]
[markup.goldmark]
[markup.goldmark.parser]
autoHeadingIDAsciiOnly = true
```
Note that the `anchorize` now adapts its behaviour depending on the default Markdown handler.
Fixes #6616
Diffstat (limited to 'markup/goldmark/autoid.go')
-rw-r--r-- | markup/goldmark/autoid.go | 125 |
1 files changed, 125 insertions, 0 deletions
diff --git a/markup/goldmark/autoid.go b/markup/goldmark/autoid.go new file mode 100644 index 000000000..6599f08d9 --- /dev/null +++ b/markup/goldmark/autoid.go @@ -0,0 +1,125 @@ +// Copyright 2019 The Hugo Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package goldmark + +import ( + "bytes" + "strconv" + "unicode" + "unicode/utf8" + + "github.com/gohugoio/hugo/common/text" + + "github.com/yuin/goldmark/ast" + "github.com/yuin/goldmark/parser" + "github.com/yuin/goldmark/util" + + bp "github.com/gohugoio/hugo/bufferpool" +) + +func sanitizeAnchorNameString(s string, asciiOnly bool) string { + return string(sanitizeAnchorName([]byte(s), asciiOnly)) +} + +func sanitizeAnchorName(b []byte, asciiOnly bool) []byte { + return sanitizeAnchorNameWithHook(b, asciiOnly, nil) +} + +func sanitizeAnchorNameWithHook(b []byte, asciiOnly bool, hook func(buf *bytes.Buffer)) []byte { + buf := bp.GetBuffer() + + if asciiOnly { + // Normalize it to preserve accents if possible. + b = text.RemoveAccents(b) + } + + for len(b) > 0 { + r, size := utf8.DecodeRune(b) + switch { + case asciiOnly && size != 1: + case isSpace(r): + buf.WriteString("-") + case r == '-' || isAlphaNumeric(r): + buf.WriteRune(unicode.ToLower(r)) + default: + } + + b = b[size:] + } + + if hook != nil { + hook(buf) + } + + result := make([]byte, buf.Len()) + copy(result, buf.Bytes()) + + bp.PutBuffer(buf) + + return result +} + +func isAlphaNumeric(r rune) bool { + return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r) +} + +func isSpace(r rune) bool { + return r == ' ' || r == '\t' +} + +var _ parser.IDs = (*idFactory)(nil) + +type idFactory struct { + asciiOnly bool + vals map[string]struct{} +} + +func newIDFactory(asciiOnly bool) *idFactory { + return &idFactory{ + vals: make(map[string]struct{}), + asciiOnly: asciiOnly, + } +} + +func (ids *idFactory) Generate(value []byte, kind ast.NodeKind) []byte { + return sanitizeAnchorNameWithHook(value, ids.asciiOnly, func(buf *bytes.Buffer) { + if buf.Len() == 0 { + if kind == ast.KindHeading { + buf.WriteString("heading") + } else { + buf.WriteString("id") + } + } + + if _, found := ids.vals[util.BytesToReadOnlyString(buf.Bytes())]; found { + // Append a hypen and a number, starting with 1. + buf.WriteRune('-') + pos := buf.Len() + for i := 1; ; i++ { + buf.WriteString(strconv.Itoa(i)) + if _, found := ids.vals[util.BytesToReadOnlyString(buf.Bytes())]; !found { + break + } + buf.Truncate(pos) + } + } + + ids.vals[buf.String()] = struct{}{} + + }) +} + +func (ids *idFactory) Put(value []byte) { + ids.vals[util.BytesToReadOnlyString(value)] = struct{}{} +} |