diff options
-rw-r--r-- | common/text/transform.go | 74 | ||||
-rw-r--r-- | common/text/transform_test.go | 16 |
2 files changed, 90 insertions, 0 deletions
diff --git a/common/text/transform.go b/common/text/transform.go index de093af0d..d0ea4bf91 100644 --- a/common/text/transform.go +++ b/common/text/transform.go @@ -23,6 +23,64 @@ import ( "golang.org/x/text/unicode/norm" ) +var transliteratePool = &sync.Pool{ + New: func() any { + return transform.Chain(norm.NFD, runes.Remove(runes.In(unicode.Mn)), + runes.Map(func(r rune) rune { + switch r { + case 'ą': + return 'a' + case 'ć': + return 'c' + case 'ę': + return 'e' + case 'ł': + return 'l' + case 'ń': + return 'n' + case 'ó': + return 'o' + case 'ś': + return 's' + case 'ż': + return 'z' + case 'ź': + return 'z' + case 'ø': + return 'o' + } + return r + }), + norm.NFC) + }, +} + +var transliterateMap = map[rune]rune{ + 'ą': 'a', + 'ć': 'c', + 'ę': 'e', + 'ł': 'l', + 'ń': 'n', + 'ó': 'o', + 'ś': 's', + 'ż': 'z', + 'ź': 'z', + 'ø': 'o', +} + +var transliteratePoolMap = &sync.Pool{ + New: func() any { + return transform.Chain(norm.NFD, runes.Remove(runes.In(unicode.Mn)), + runes.Map(func(r rune) rune { + if rr, ok := transliterateMap[r]; ok { + return rr + } + return r + }), + norm.NFC) + }, +} + var accentTransformerPool = &sync.Pool{ New: func() any { return transform.Chain(norm.NFD, runes.Remove(runes.In(unicode.Mn)), norm.NFC) @@ -47,6 +105,22 @@ func RemoveAccentsString(s string) string { return s } +func TransliterateString(s string) string { + t := transliteratePool.Get().(transform.Transformer) + s, _, _ = transform.String(t, s) + t.Reset() + transliteratePool.Put(t) + return s +} + +func TransliterateStringMap(s string) string { + t := transliteratePoolMap.Get().(transform.Transformer) + s, _, _ = transform.String(t, s) + t.Reset() + transliteratePoolMap.Put(t) + return s +} + // Chomp removes trailing newline characters from s. func Chomp(s string) string { return strings.TrimRightFunc(s, func(r rune) bool { diff --git a/common/text/transform_test.go b/common/text/transform_test.go index 74bb37783..b157079ec 100644 --- a/common/text/transform_test.go +++ b/common/text/transform_test.go @@ -70,3 +70,19 @@ func BenchmarkVisitLinesAfter(b *testing.B) { }) } } + +func BenchmarkTransliterate(b *testing.B) { + s := "ƀ Ɓ Ƃ ƃ Ƅ ƅ Ɔ Ƈ ƈ Ɖ Ɗ Ƌ ƌ ƍ Ǝ Ə Ɛ Ƒ ƒ Ɠ Ɣ ƕ Ɩ Ɨ Ƙ ƙ ƚ ƛ Ɯ Ɲ ƞ Ɵ Ơ ơ Ƣ ƣ Ƥ ƥ Ʀ Ƨ ƨ Ʃ ƪ ƫ Ƭ ƭ Ʈ Ư ư Ʊ Ʋ Ƴ ƴ Ƶ ƶ Ʒ Ƹ ƹ ƺ ƻ Ƽ ƽ ƾ ƿ ǀ ǁ ǂ ǃ DŽ Dž dž LJ Lj lj NJ Nj nj Ǎ ǎ Ǐ ǐ Ǒ ǒ Ǔ ǔ Ǖ ǖ Ǘ ǘ Ǚ ǚ Ǜ ǜ ǝ Ǟ ǟ Ǡ ǡ Ǣ ǣ Ǥ ǥ Ǧ ǧ Ǩ ǩ Ǫ ǫ Ǭ ǭ Ǯ ǯ ǰ DZ Dz dz Ǵ ǵ Ǻ ǻ Ǽ ǽ Ǿ ǿ Ȁ ȁ Ȃ ȃ" + + b.Run("switch", func(b *testing.B) { + for i := 0; i < b.N; i++ { + _ = TransliterateString(s) + } + }) + + b.Run("map", func(b *testing.B) { + for i := 0; i < b.N; i++ { + _ = TransliterateStringMap(s) + } + }) +} |