diff options
author | Khon Trieu <tkt028@gmail.com> | 2020-06-21 15:19:38 +0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-06-21 17:19:38 +0900 |
commit | 4ec144c969efa6df1cd4d00a5f73f7613dc962fe (patch) | |
tree | d2ecdd0cd13f7682828bca0cb9dd41f014e292c2 | |
parent | 3e36f2b0acbb0632e6d74199e6619945f2cc016d (diff) |
Accented character normalization for Vietnamese characters (#2090)
Fix #2088
-rw-r--r-- | src/algo/normalize.go | 68 |
1 files changed, 68 insertions, 0 deletions
diff --git a/src/algo/normalize.go b/src/algo/normalize.go index 7a496441..93247908 100644 --- a/src/algo/normalize.go +++ b/src/algo/normalize.go @@ -405,6 +405,74 @@ var normalized map[rune]rune = map[rune]rune{ 0x024E: 'Y', // WITH STROKE, LATIN CAPITAL LETTER 0x028F: 'Y', // , LATIN LETTER SMALL CAPITAL 0x1D22: 'Z', // , LATIN LETTER SMALL CAPITAL + + 'Ắ': 'A', + 'Ấ': 'A', + 'Ằ': 'A', + 'Ầ': 'A', + 'Ẳ': 'A', + 'Ẩ': 'A', + 'Ẵ': 'A', + 'Ẫ': 'A', + 'Ặ': 'A', + 'Ậ': 'A', + + 'ắ': 'a', + 'ấ': 'a', + 'ằ': 'a', + 'ầ': 'a', + 'ẳ': 'a', + 'ẩ': 'a', + 'ẵ': 'a', + 'ẫ': 'a', + 'ặ': 'a', + 'ậ': 'a', + + 'Ế': 'E', + 'Ề': 'E', + 'Ể': 'E', + 'Ễ': 'E', + 'Ệ': 'E', + + 'ế': 'e', + 'ề': 'e', + 'ể': 'e', + 'ễ': 'e', + 'ệ': 'e', + + 'Ố': 'O', + 'Ớ': 'O', + 'Ồ': 'O', + 'Ờ': 'O', + 'Ổ': 'O', + 'Ở': 'O', + 'Ỗ': 'O', + 'Ỡ': 'O', + 'Ộ': 'O', + 'Ợ': 'O', + + 'ố': 'o', + 'ớ': 'o', + 'ồ': 'o', + 'ờ': 'o', + 'ổ': 'o', + 'ở': 'o', + 'ỗ': 'o', + 'ỡ': 'o', + 'ộ': 'o', + 'ợ': 'o', + + 'Ứ': 'U', + 'Ừ': 'U', + 'Ử': 'U', + 'Ữ': 'U', + 'Ự': 'U', + + 'ứ': 'u', + 'ừ': 'u', + 'ử': 'u', + 'ữ': 'u', + 'ự': 'u', } // NormalizeRunes normalizes latin script letters |