summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKhon Trieu <tkt028@gmail.com>2020-06-21 15:19:38 +0700
committerGitHub <noreply@github.com>2020-06-21 17:19:38 +0900
commit4ec144c969efa6df1cd4d00a5f73f7613dc962fe (patch)
treed2ecdd0cd13f7682828bca0cb9dd41f014e292c2
parent3e36f2b0acbb0632e6d74199e6619945f2cc016d (diff)
Accented character normalization for Vietnamese characters (#2090)
Fix #2088
-rw-r--r--src/algo/normalize.go68
1 files changed, 68 insertions, 0 deletions
diff --git a/src/algo/normalize.go b/src/algo/normalize.go
index 7a496441..93247908 100644
--- a/src/algo/normalize.go
+++ b/src/algo/normalize.go
@@ -405,6 +405,74 @@ var normalized map[rune]rune = map[rune]rune{
0x024E: 'Y', // WITH STROKE, LATIN CAPITAL LETTER
0x028F: 'Y', // , LATIN LETTER SMALL CAPITAL
0x1D22: 'Z', // , LATIN LETTER SMALL CAPITAL
+
+ 'Ắ': 'A',
+ 'Ấ': 'A',
+ 'Ằ': 'A',
+ 'Ầ': 'A',
+ 'Ẳ': 'A',
+ 'Ẩ': 'A',
+ 'Ẵ': 'A',
+ 'Ẫ': 'A',
+ 'Ặ': 'A',
+ 'Ậ': 'A',
+
+ 'ắ': 'a',
+ 'ấ': 'a',
+ 'ằ': 'a',
+ 'ầ': 'a',
+ 'ẳ': 'a',
+ 'ẩ': 'a',
+ 'ẵ': 'a',
+ 'ẫ': 'a',
+ 'ặ': 'a',
+ 'ậ': 'a',
+
+ 'Ế': 'E',
+ 'Ề': 'E',
+ 'Ể': 'E',
+ 'Ễ': 'E',
+ 'Ệ': 'E',
+
+ 'ế': 'e',
+ 'ề': 'e',
+ 'ể': 'e',
+ 'ễ': 'e',
+ 'ệ': 'e',
+
+ 'Ố': 'O',
+ 'Ớ': 'O',
+ 'Ồ': 'O',
+ 'Ờ': 'O',
+ 'Ổ': 'O',
+ 'Ở': 'O',
+ 'Ỗ': 'O',
+ 'Ỡ': 'O',
+ 'Ộ': 'O',
+ 'Ợ': 'O',
+
+ 'ố': 'o',
+ 'ớ': 'o',
+ 'ồ': 'o',
+ 'ờ': 'o',
+ 'ổ': 'o',
+ 'ở': 'o',
+ 'ỗ': 'o',
+ 'ỡ': 'o',
+ 'ộ': 'o',
+ 'ợ': 'o',
+
+ 'Ứ': 'U',
+ 'Ừ': 'U',
+ 'Ử': 'U',
+ 'Ữ': 'U',
+ 'Ự': 'U',
+
+ 'ứ': 'u',
+ 'ừ': 'u',
+ 'ử': 'u',
+ 'ữ': 'u',
+ 'ự': 'u',
}
// NormalizeRunes normalizes latin script letters