summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>2024-02-06 09:18:06 +0100
committerBjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>2024-02-06 09:23:43 +0100
commita949a3467364534b08449ef08b080fa5b3cceb11 (patch)
treef392171874200d71e6674d0aa05c577c0803145a
parentbd0200da6e5a1a3d635fdd62dfa267145a137edc (diff)
Add benchmark for transliterationbench/translit
-rw-r--r--common/text/transform.go74
-rw-r--r--common/text/transform_test.go16
2 files changed, 90 insertions, 0 deletions
diff --git a/common/text/transform.go b/common/text/transform.go
index de093af0d..d0ea4bf91 100644
--- a/common/text/transform.go
+++ b/common/text/transform.go
@@ -23,6 +23,64 @@ import (
"golang.org/x/text/unicode/norm"
)
+var transliteratePool = &sync.Pool{
+ New: func() any {
+ return transform.Chain(norm.NFD, runes.Remove(runes.In(unicode.Mn)),
+ runes.Map(func(r rune) rune {
+ switch r {
+ case 'ą':
+ return 'a'
+ case 'ć':
+ return 'c'
+ case 'ę':
+ return 'e'
+ case 'ł':
+ return 'l'
+ case 'ń':
+ return 'n'
+ case 'ó':
+ return 'o'
+ case 'ś':
+ return 's'
+ case 'ż':
+ return 'z'
+ case 'ź':
+ return 'z'
+ case 'ø':
+ return 'o'
+ }
+ return r
+ }),
+ norm.NFC)
+ },
+}
+
+var transliterateMap = map[rune]rune{
+ 'ą': 'a',
+ 'ć': 'c',
+ 'ę': 'e',
+ 'ł': 'l',
+ 'ń': 'n',
+ 'ó': 'o',
+ 'ś': 's',
+ 'ż': 'z',
+ 'ź': 'z',
+ 'ø': 'o',
+}
+
+var transliteratePoolMap = &sync.Pool{
+ New: func() any {
+ return transform.Chain(norm.NFD, runes.Remove(runes.In(unicode.Mn)),
+ runes.Map(func(r rune) rune {
+ if rr, ok := transliterateMap[r]; ok {
+ return rr
+ }
+ return r
+ }),
+ norm.NFC)
+ },
+}
+
var accentTransformerPool = &sync.Pool{
New: func() any {
return transform.Chain(norm.NFD, runes.Remove(runes.In(unicode.Mn)), norm.NFC)
@@ -47,6 +105,22 @@ func RemoveAccentsString(s string) string {
return s
}
+func TransliterateString(s string) string {
+ t := transliteratePool.Get().(transform.Transformer)
+ s, _, _ = transform.String(t, s)
+ t.Reset()
+ transliteratePool.Put(t)
+ return s
+}
+
+func TransliterateStringMap(s string) string {
+ t := transliteratePoolMap.Get().(transform.Transformer)
+ s, _, _ = transform.String(t, s)
+ t.Reset()
+ transliteratePoolMap.Put(t)
+ return s
+}
+
// Chomp removes trailing newline characters from s.
func Chomp(s string) string {
return strings.TrimRightFunc(s, func(r rune) bool {
diff --git a/common/text/transform_test.go b/common/text/transform_test.go
index 74bb37783..b157079ec 100644
--- a/common/text/transform_test.go
+++ b/common/text/transform_test.go
@@ -70,3 +70,19 @@ func BenchmarkVisitLinesAfter(b *testing.B) {
})
}
}
+
+func BenchmarkTransliterate(b *testing.B) {
+ s := "ƀ Ɓ Ƃ ƃ Ƅ ƅ Ɔ Ƈ ƈ Ɖ Ɗ Ƌ ƌ ƍ Ǝ Ə Ɛ Ƒ ƒ Ɠ Ɣ ƕ Ɩ Ɨ Ƙ ƙ ƚ ƛ Ɯ Ɲ ƞ Ɵ Ơ ơ Ƣ ƣ Ƥ ƥ Ʀ Ƨ ƨ Ʃ ƪ ƫ Ƭ ƭ Ʈ Ư ư Ʊ Ʋ Ƴ ƴ Ƶ ƶ Ʒ Ƹ ƹ ƺ ƻ Ƽ ƽ ƾ ƿ ǀ ǁ ǂ ǃ DŽ Dž dž LJ Lj lj NJ Nj nj Ǎ ǎ Ǐ ǐ Ǒ ǒ Ǔ ǔ Ǖ ǖ Ǘ ǘ Ǚ ǚ Ǜ ǜ ǝ Ǟ ǟ Ǡ ǡ Ǣ ǣ Ǥ ǥ Ǧ ǧ Ǩ ǩ Ǫ ǫ Ǭ ǭ Ǯ ǯ ǰ DZ Dz dz Ǵ ǵ Ǻ ǻ Ǽ ǽ Ǿ ǿ Ȁ ȁ Ȃ ȃ"
+
+ b.Run("switch", func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ _ = TransliterateString(s)
+ }
+ })
+
+ b.Run("map", func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ _ = TransliterateStringMap(s)
+ }
+ })
+}