summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorpetr-tik <petr-tik@users.noreply.github.com>2019-04-26 08:47:12 +0100
committerpetr-tik <petr-tik@users.noreply.github.com>2019-04-26 08:47:12 +0100
commit1a90a1f3b0ffced46bac95da389754237495cb47 (patch)
treeed22d9e97d8b91b0060eb983bb89f03a156fb2ae
parent8e509213634ccbf82c3380e1b660ec1ffb735301 (diff)
parentdac50c6aeb471868d6b1462426907ecfce3eb420 (diff)
Merge branch 'master' of github.com:tantivy-search/tantivy into stamper_refactor
-rw-r--r--CHANGELOG.md5
-rw-r--r--src/postings/postings_writer.rs7
-rw-r--r--src/tokenizer/ascii_folding_filter.rs4064
-rw-r--r--src/tokenizer/lower_caser.rs19
-rw-r--r--src/tokenizer/mod.rs2
-rw-r--r--src/tokenizer/raw_tokenizer.rs9
-rw-r--r--src/tokenizer/remove_long.rs1
-rw-r--r--src/tokenizer/simple_tokenizer.rs23
-rw-r--r--src/tokenizer/stemmer.rs15
-rw-r--r--src/tokenizer/stop_word_filter.rs1
10 files changed, 4103 insertions, 43 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 068b291..7957d99 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,7 @@
Tantivy 0.10.0
-====================
-
+=====================
+- Added an ASCII folding filter (@drusellers)
+- Bugfix in `query.count` in presence of deletes (@pmasurel)
Minor
---------
diff --git a/src/postings/postings_writer.rs b/src/postings/postings_writer.rs
index d263258..d5d769b 100644
--- a/src/postings/postings_writer.rs
+++ b/src/postings/postings_writer.rs
@@ -214,6 +214,13 @@ pub trait PostingsWriter {
if token.text.len() <= MAX_TOKEN_LEN {
term.set_text(token.text.as_str());
self.subscribe(term_index, doc_id, token.position as u32, &term, heap);
+ } else {
+ info!(
+ "A token exceeding MAX_TOKEN_LEN ({}>{}) was dropped. Search for \
+ MAX_TOKEN_LEN in the documentation for more information.",
+ token.text.len(),
+ MAX_TOKEN_LEN
+ );
}
};
token_stream.process(&mut sink)
diff --git a/src/tokenizer/ascii_folding_filter.rs b/src/tokenizer/ascii_folding_filter.rs
new file mode 100644
index 0000000..cbd124a
--- /dev/null
+++ b/src/tokenizer/ascii_folding_filter.rs
@@ -0,0 +1,4064 @@
+use super::{Token, TokenFilter, TokenStream};
+use std::mem;
+
+/// This class converts alphabetic, numeric, and symbolic Unicode characters
+/// which are not in the first 127 ASCII characters (the "Basic Latin" Unicode
+/// block) into their ASCII equivalents, if one exists.
+#[derive(Clone)]
+pub struct AsciiFoldingFilter;
+
+impl<TailTokenStream> TokenFilter<TailTokenStream> for AsciiFoldingFilter
+where
+ TailTokenStream: TokenStream,
+{
+ type ResultTokenStream = AsciiFoldingFilterTokenStream<TailTokenStream>;
+
+ fn transform(&self, token_stream: TailTokenStream) -> Self::ResultTokenStream {
+ AsciiFoldingFilterTokenStream::wrap(token_stream)
+ }
+}
+
+pub struct AsciiFoldingFilterTokenStream<TailTokenStream> {
+ buffer: String,
+ tail: TailTokenStream,
+}
+
+impl<TailTokenStream> TokenStream for AsciiFoldingFilterTokenStream<TailTokenStream>
+where
+ TailTokenStream: TokenStream,
+{
+ fn advance(&mut self) -> bool {
+ if !self.tail.advance() {
+ return false;
+ }
+ if !self.token_mut().text.is_ascii() {
+ // ignore its already ascii
+ to_ascii(&mut self.tail.token_mut().text, &mut self.buffer);
+ mem::swap(&mut self.tail.token_mut().text, &mut self.buffer);
+ }
+ true
+ }
+
+ fn token(&self) -> &Token {
+ self.tail.token()
+ }
+
+ fn token_mut(&mut self) -> &mut Token {
+ self.tail.token_mut()
+ }
+}
+
+impl<TailTokenStream> AsciiFoldingFilterTokenStream<TailTokenStream>
+where
+ TailTokenStream: TokenStream,
+{
+ fn wrap(tail: TailTokenStream) -> AsciiFoldingFilterTokenStream<TailTokenStream> {
+ AsciiFoldingFilterTokenStream {
+ tail,
+ buffer: String::with_capacity(100),
+ }
+ }
+}
+
+// Returns a string that represents the ascii folded version of
+// the character. If the `char` does not require ascii folding
+// (e.g. simple ASCII chars like `A`) or if the `char`
+// does not have a sensible ascii equivalent (e.g.: Kanjis like 馬,
+// this function returns `None`.
+fn fold_non_ascii_char(c: char) -> Option<&'static str> {
+ match c {
+ '\u{00C0}' | // À [LATIN CAPITAL LETTER A WITH GRAVE]
+ '\u{00C1}' | // Á [LATIN CAPITAL LETTER A WITH ACUTE]
+ '\u{00C2}' | // Â [LATIN CAPITAL LETTER A WITH CIRCUMFLEX]
+ '\u{00C3}' | // Ã [LATIN CAPITAL LETTER A WITH TILDE]
+ '\u{00C4}' | // Ä [LATIN CAPITAL LETTER A WITH DIAERESIS]
+ '\u{00C5}' | // Å [LATIN CAPITAL LETTER A WITH RING ABOVE]
+ '\u{0100}' | // Ā [LATIN CAPITAL LETTER A WITH MACRON]
+ '\u{0102}' | // Ă [LATIN CAPITAL LETTER A WITH BREVE]
+ '\u{0104}' | // Ą [LATIN CAPITAL LETTER A WITH OGONEK]
+ '\u{018F}' | // Ə http://en.wikipedia.org/wiki/Schwa [LATIN CAPITAL LETTER SCHWA]
+ '\u{01CD}' | // Ǎ [LATIN CAPITAL LETTER A WITH CARON]
+ '\u{01DE}' | // Ǟ [LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON]
+ '\u{01E0}' | // Ǡ [LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON]
+ '\u{01FA}' | // Ǻ [LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE]
+ '\u{0200}' | // Ȁ [LATIN CAPITAL LETTER A WITH DOUBLE GRAVE]
+ '\u{0202}' | // Ȃ [LATIN CAPITAL LETTER A WITH INVERTED BREVE]
+ '\u{0226}' | // Ȧ [LATIN CAPITAL LETTER A WITH DOT ABOVE]
+ '\u{023A}' | // Ⱥ [LATIN CAPITAL LETTER A WITH STROKE]
+ '\u{1D00}' | // ᴀ [LATIN LETTER SMALL CAPITAL A]
+ '\u{1E00}' | // Ḁ [LATIN CAPITAL LETTER A WITH RING BELOW]
+ '\u{1EA0}' | // Ạ [LATIN CAPITAL LETTER A WITH DOT BELOW]
+ '\u{1EA2}' | // Ả [LATIN CAPITAL LETTER A WITH HOOK ABOVE]
+ '\u{1EA4}' | // Ấ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE]
+ '\u{1EA6}' | // Ầ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE]
+ '\u{1EA8}' | // Ẩ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE]
+ '\u{1EAA}' | // Ẫ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE]
+ '\u{1EAC}' | // Ậ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW]
+ '\u{1EAE}' | // Ắ [LATIN CAPITAL LETTER A WITH BREVE AND ACUTE]
+ '\u{1EB0}' | // Ằ [LATIN CAPITAL LETTER A WITH BREVE AND GRAVE]
+ '\u{1EB2}' | // Ẳ [LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE]
+ '\u{1EB4}' | // Ẵ [LATIN CAPITAL LETTER A WITH BREVE AND TILDE]
+ '\u{1EB6}' | // Ặ [LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW]
+ '\u{24B6}' | // Ⓐ [CIRCLED LATIN CAPITAL LETTER A]
+ '\u{FF21}' // A [FULLWIDTH LATIN CAPITAL LETTER A]
+ => Some("A"),
+ '\u{00E0}' | // à [LATIN SMALL LETTER A WITH GRAVE]
+ '\u{00E1}' | // á [LATIN SMALL LETTER A WITH ACUTE]
+ '\u{00E2}' | // â [LATIN SMALL LETTER A WITH CIRCUMFLEX]
+ '\u{00E3}' | // ã [LATIN SMALL LETTER A WITH TILDE]
+ '\u{00E4}' | // ä [LATIN SMALL LETTER A WITH DIAERESIS]
+ '\u{00E5}' | // å [LATIN SMALL LETTER A WITH RING ABOVE]
+ '\u{0101}' | // ā [LATIN SMALL LETTER A WITH MACRON]
+ '\u{0103}' | // ă [LATIN SMALL LETTER A WITH BREVE]
+ '\u{0105}' | // ą [LATIN SMALL LETTER A WITH OGONEK]
+ '\u{01CE}' | // ǎ [LATIN SMALL LETTER A WITH CARON]
+ '\u{01DF}' | // ǟ [LATIN SMALL LETTER A WITH DIAERESIS AND MACRON]
+ '\u{01E1}' | // ǡ [LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON]
+ '\u{01FB}' | // ǻ [LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE]
+ '\u{0201}' | // ȁ [LATIN SMALL LETTER A WITH DOUBLE GRAVE]
+ '\u{0203}' | // ȃ [LATIN SMALL LETTER A WITH INVERTED BREVE]
+ '\u{0227}' | // ȧ [LATIN SMALL LETTER A WITH DOT ABOVE]
+ '\u{0250}' | // ɐ [LATIN SMALL LETTER TURNED A]
+ '\u{0259}' | // ə [LATIN SMALL LETTER SCHWA]
+ '\u{025A}' | // ɚ [LATIN SMALL LETTER SCHWA WITH HOOK]
+ '\u{1D8F}' | // ᶏ [LATIN SMALL LETTER A WITH RETROFLEX HOOK]
+ '\u{1D95}' | // ᶕ [LATIN SMALL LETTER SCHWA WITH RETROFLEX HOOK]
+ '\u{1E01}' | // ạ [LATIN SMALL LETTER A WITH RING BELOW]
+ '\u{1E9A}' | // ả [LATIN SMALL LETTER A WITH RIGHT HALF RING]
+ '\u{1EA1}' | // ạ [LATIN SMALL LETTER A WITH DOT BELOW]
+ '\u{1EA3}' | // ả [LATIN SMALL LETTER A WITH HOOK ABOVE]
+ '\u{1EA5}' | // ấ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE]
+ '\u{1EA7}' | // ầ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE]
+ '\u{1EA9}' | // ẩ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE]
+ '\u{1EAB}' | // ẫ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE]
+ '\u{1EAD}' | // ậ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW]
+ '\u{1EAF}' | // ắ [LATIN SMALL LETTER A WITH BREVE AND ACUTE]
+ '\u{1EB1}' | // ằ [LATIN SMALL LETTER A WITH BREVE AND GRAVE]
+ '\u{1EB3}' | // ẳ [LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE]
+ '\u{1EB5}' | // ẵ [LATIN SMALL LETTER A WITH BREVE AND TILDE]
+ '\u{1EB7}' | // ặ [LATIN SMALL LETTER A WITH BREVE AND DOT BELOW]
+ '\u{2090}' | // ₐ [LATIN SUBSCRIPT SMALL LETTER A]
+ '\u{2094}' | // ₔ [LATIN SUBSCRIPT SMALL LETTER SCHWA]
+ '\u{24D0}' | // ⓐ [CIRCLED LATIN SMALL LETTER A]
+ '\u{2C65}' | // ⱥ [LATIN SMALL LETTER A WITH STROKE]
+ '\u{2C6F}' | // Ɐ [LATIN CAPITAL LETTER TURNED A]
+ '\u{FF41}' // a [FULLWIDTH LATIN SMALL LETTER A]
+ => Some("a"),
+ '\u{A732}' // Ꜳ [LATIN CAPITAL LETTER AA]
+ => Some("AA"),
+ '\u{00C6}' | // Æ [LATIN CAPITAL LETTER AE]
+ '\u{01E2}' | // Ǣ [LATIN CAPITAL LETTER AE WITH MACRON]
+ '\u{01FC}' | // Ǽ [LATIN CAPITAL LETTER AE WITH ACUTE]
+ '\u{1D01}' // ᴁ [LATIN LETTER SMALL CAPITAL AE]
+ => Some("AE"),
+ '\u{A734}' // Ꜵ [LATIN CAPITAL LETTER AO]
+ => Some("AO"),
+ '\u{A736}' // Ꜷ [LATIN CAPITAL LETTER AU]
+ => Some("AU"),
+ '\u{A738}' | // Ꜹ [LATIN CAPITAL LETTER AV]
+ '\u{A73A}' // Ꜻ [LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR]
+ => Some("AV"),
+ '\u{A73C}' // Ꜽ [LATIN CAPITAL LETTER AY]
+ => Some("AY"),
+ '\u{249C}' // ⒜ [PARENTHESIZED LATIN SMALL LETTER A]
+ => Some("(a)"),
+ '\u{A733}' // ꜳ [LATIN SMALL LETTER AA]
+ => Some("aa"),
+ '\u{00E6}' | // æ [LATIN SMALL LETTER AE]
+ '\u{01E3}' | // ǣ [LATIN SMALL LETTER AE WITH MACRON]
+ '\u{01FD}' | // ǽ [LATIN SMALL LETTER AE WITH ACUTE]
+ '\u{1D02}' // ᴂ [LATIN SMALL LETTER TURNED AE]
+ => Some("ae"),
+ '\u{A735}' // ꜵ [LATIN SMALL LETTER AO]
+ => Some("ao"),
+ '\u{A737}' // ꜷ [LATIN SMALL LETTER AU]
+ => Some("au"),
+ '\u{A739}' | // ꜹ [LATIN SMALL LETTER AV]
+ '\u{A73B}' // ꜻ [LATIN SMALL LETTER AV WITH HORIZONTAL BAR]
+ => Some("av"),
+ '\u{A73D}' // ꜽ [LATIN SMALL LETTER AY]
+ => Some("ay"),
+ '\u{0181}' | // Ɓ [LATIN CAPITAL LETTER B WITH HOOK]
+ '\u{0182}' | // Ƃ [LATIN CAPITAL LETTER B WITH TOPBAR]
+ '\u{0243}' | // Ƀ [LATIN CAPITAL LETTER B WITH STROKE]
+ '\u{0299}' | // ʙ [LATIN LETTER SMALL CAPITAL B]
+ '\u{1D03}' | // ᴃ [LATIN LETTER SMALL CAPITAL BARRED B]
+ '\u{1E02}' | // Ḃ [LATIN CAPITAL LETTER B WITH DOT ABOVE]
+ '\u{1E04}' | // Ḅ [LATIN CAPITAL LETTER B WITH DOT BELOW]
+ '\u{1E06}' | // Ḇ [LATIN CAPITAL LETTER B WITH LINE BELOW]
+ '\u{24B7}' | // Ⓑ [CIRCLED LATIN CAPITAL LETTER B]
+ '\u{FF22}' // B [FULLWIDTH LATIN CAPITAL LETTER B]
+ => Some("B"),
+ '\u{0180}' | // ƀ [LATIN SMALL LETTER B WITH STROKE]
+ '\u{0183}' | // ƃ [LATIN SMALL LETTER B WITH TOPBAR]
+ '\u{0253}' | // ɓ [LATIN SMALL LETTER B WITH HOOK]
+ '\u{1D6C}' | // ᵬ [LATIN SMALL LETTER B WITH MIDDLE TILDE]
+ '\u{1D80}' | // ᶀ [LATIN SMALL LETTER B WITH PALATAL HOOK]
+ '\u{1E03}' | // ḃ [LATIN SMALL LETTER B WITH DOT ABOVE]
+ '\u{1E05}' | // ḅ [LATIN SMALL LETTER B WITH DOT BELOW]
+ '\u{1E07}' | // ḇ [LATIN SMALL LETTER B WITH LINE BELOW]
+ '\u{24D1}' | // ⓑ [CIRCLED LATIN SMALL LETTER B]
+ '\u{FF42}' // b [FULLWIDTH LATIN SMALL LETTER B]
+ => Some("b"),
+ '\u{249D}' // ⒝ [PARENTHESIZED LATIN SMALL LETTER B]
+ => Some("(b)"),
+ '\u{00C7}' | // Ç [LATIN CAPITAL LETTER C WITH CEDILLA]
+ '\u{0106}' | // Ć [LATIN CAPITAL LETTER C WITH ACUTE]
+ '\u{0108}' | // Ĉ [LATIN CAPITAL LETTER C WITH CIRCUMFLEX]
+ '\u{010A}' | // Ċ [LATIN CAPITAL LETTER C WITH DOT ABOVE]
+ '\u{010C}' | // Č [LATIN CAPITAL LETTER C WITH CARON]
+ '\u{0187}' | // Ƈ [LATIN CAPITAL LETTER C WITH HOOK]
+ '\u{023B}' | // Ȼ [LATIN CAPITAL LETTER C WITH STROKE]
+ '\u{0297}' | // ʗ [LATIN LETTER STRETCHED C]
+ '\u{1D04}' | // ᴄ [LATIN LETTER SMALL CAPITAL C]
+ '\u{1E08}' | // Ḉ [LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE]
+ '\u{24B8}' | // Ⓒ [CIRCLED LATIN CAPITAL LETTER C]
+ '\u{FF23}' // C [FULLWIDTH LATIN CAPITAL LETTER C]
+ => Some("C"),
+ '\u{00E7}' | // ç [LATIN SMALL LETTER C WITH CEDILLA]
+ '\u{0107}' | // ć [LATIN SMALL LETTER C WITH ACUTE]
+ '\u{0109}' | // ĉ [LATIN SMALL LETTER C WITH CIRCUMFLEX]
+ '\u{010B}' | // ċ [LATIN SMALL LETTER C WITH DOT ABOVE]
+ '\u{010D}' | // č [LATIN SMALL LETTER C WITH CARON]
+ '\u{0188}' | // ƈ [LATIN SMALL LETTER C WITH HOOK]
+ '\u{023C}' | // ȼ [LATIN SMALL LETTER C WITH STROKE]
+ '\u{0255}' | // ɕ [LATIN SMALL LETTER C WITH CURL]
+ '\u{1E09}' | // ḉ [LATIN SMALL LETTER C WITH CEDILLA AND ACUTE]
+ '\u{2184}' | // ↄ [LATIN SMALL LETTER REVERSED C]
+ '\u{24D2}' | // ⓒ [CIRCLED LATIN SMALL LETTER C]
+ '\u{A73E}' | // Ꜿ [LATIN CAPITAL LETTER REVERSED C WITH DOT]
+ '\u{A73F}' | // ꜿ [LATIN SMALL LETTER REVERSED C WITH DOT]
+ '\u{FF43}' // c [FULLWIDTH LATIN SMALL LETTER C]
+ => Some("c"),
+ '\u{249E}' // ⒞ [PARENTHESIZED LATIN SMALL LETTER C]
+ => Some("(c)"),
+ '\u{00D0}' | // Ð [LATIN CAPITAL LETTER ETH]
+ '\u{010E}' | // Ď [LATIN CAPITAL LETTER D WITH CARON]
+ '\u{0110}' | // Đ [LATIN CAPITAL LETTER D WITH STROKE]
+ '\u{0189}' | // Ɖ [LATIN CAPITAL LETTER AFRICAN D]
+ '\u{018A}' | // Ɗ [LATIN CAPITAL LETTER D WITH HOOK]
+ '\u{018B}' | // Ƌ [LATIN CAPITAL LETTER D WITH TOPBAR]
+ '\u{1D05}' | // ᴅ [LATIN LETTER SMALL CAPITAL D]
+ '\u{1D06}' | // ᴆ [LATIN LETTER SMALL CAPITAL ETH]
+ '\u{1E0A}' | // Ḋ [LATIN CAPITAL LETTER D WITH DOT ABOVE]
+ '\u{1E0C}' | // Ḍ [LATIN CAPITAL LETTER D WITH DOT BELOW]
+ '\u{1E0E}' | // Ḏ [LATIN CAPITAL LETTER D WITH LINE BELOW]
+ '\u{1E10}' | // Ḑ [LATIN CAPITAL LETTER D WITH CEDILLA]
+ '\u{1E12}' | // Ḓ [LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW]
+ '\u{24B9}' | // Ⓓ [CIRCLED LATIN CAPITAL LETTER D]
+ '\u{A779}' | // Ꝺ [LATIN CAPITAL LETTER INSULAR D]
+ '\u{FF24}' // D [FULLWIDTH LATIN CAPITAL LETTER D]
+ => Some("D"),
+ '\u{00F0}' | // ð [LATIN SMALL LETTER ETH]
+ '\u{010F}' | // ď [LATIN SMALL LETTER D WITH CARON]
+ '\u{0111}' | // đ [LATIN SMALL LETTER D WITH STROKE]
+ '\u{018C}' | // ƌ [LATIN SMALL LETTER D WITH TOPBAR]
+ '\u{0221}' | // ȡ [LATIN SMALL LETTER D WITH CURL]
+ '\u{0256}' | // ɖ [LATIN SMALL LETTER D WITH TAIL]
+ '\u{0257}' | // ɗ [LATIN SMALL LETTER D WITH HOOK]
+ '\u{1D6D}' | // ᵭ [LATIN SMALL LETTER D WITH MIDDLE TILDE]
+ '\u{1D81}' | // ᶁ [LATIN SMALL LETTER D WITH PALATAL HOOK]
+ '\u{1D91}' | // ᶑ [LATIN SMALL LETTER D WITH HOOK AND TAIL]
+ '\u{1E0B}' | // ḋ [LATIN SMALL LETTER D WITH DOT ABOVE]
+ '\u{1E0D}' | // ḍ [LATIN SMALL LETTER D WITH DOT BELOW]
+ '\u{1E0F}' | // ḏ [LATIN SMALL LETTER D WITH LINE BELOW]
+ '\u{1E11}' | // ḑ [LATIN SMALL LETTER D WITH CEDILLA]
+ '\u{1E13}' | // ḓ [LATIN SMALL LETTER D WITH CIRCUMFLEX BELOW]
+ '\u{24D3}' | // ⓓ [CIRCLED LATIN SMALL LETTER D]
+ '\u{A77A}' | // ꝺ [LATIN SMALL LETTER INSULAR D]
+ '\u{FF44}' // d [FULLWIDTH LATIN SMALL LETTER D]
+ => Some("d"),
+ '\u{01C4}' | // DŽ [LATIN CAPITAL LETTER DZ WITH CARON]
+ '\u{01F1}' // DZ [LATIN CAPITAL LETTER DZ]
+ => Some("DZ"),
+ '\u{01C5}' | // Dž [LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON]
+ '\u{01F2}' // Dz [LATIN CAPITAL LETTER D WITH SMALL LETTER Z]
+ => Some("Dz"),
+ '\u{249F}' // ⒟ [PARENTHESIZED LATIN SMALL LETTER D]
+ => Some("(d)"),
+ '\u{0238}' // ȸ [LATIN SMALL LETTER DB DIGRAPH]
+ => Some("db"),
+ '\u{01C6}' | // dž [LATIN SMALL LETTER DZ WITH CARON]
+ '\u{01F3}' | // dz [LATIN SMALL LETTER DZ]
+ '\u{02A3}' | // ʣ [LATIN SMALL LETTER DZ DIGRAPH]
+ '\u{02A5}' // ʥ [LATIN SMALL LETTER DZ DIGRAPH WITH CURL]
+ => Some("dz"),
+ '\u{00C8}' | // È [LATIN CAPITAL LETTER E WITH GRAVE]
+ '\u{00C9}' | // É [LATIN CAPITAL LETTER E WITH ACUTE]
+ '\u{00CA}' | // Ê [LATIN CAPITAL LETTER E WITH CIRCUMFLEX]
+ '\u{00CB}' | // Ë [LATIN CAPITAL LETTER E WITH DIAERESIS]
+ '\u{0112}' | // Ē [LATIN CAPITAL LETTER E WITH MACRON]
+ '\u{0114}' | // Ĕ [LATIN CAPITAL LETTER E WITH BREVE]
+ '\u{0116}' | // Ė [LATIN CAPITAL LETTER E WITH DOT ABOVE]
+ '\u{0118}' | // Ę [LATIN CAPITAL LETTER E WITH OGONEK]
+ '\u{011A}' | // Ě [LATIN CAPITAL LETTER E WITH CARON]
+ '\u{018E}' | // Ǝ [LATIN CAPITAL LETTER REVERSED E]
+ '\u{0190}' | // Ɛ [LATIN CAPITAL LETTER OPEN E]
+ '\u{0204}' | // Ȅ [LATIN CAPITAL LETTER E WITH DOUBLE GRAVE]
+ '\u{0206}' | // Ȇ [LATIN CAPITAL LETTER E WITH INVERTED BREVE]
+ '\u{0228}' | // Ȩ [LATIN CAPITAL LETTER E WITH CEDILLA]
+ '\u{0246}' | // Ɇ [LATIN CAPITAL LETTER E WITH STROKE]
+ '\u{1D07}' | // ᴇ [LATIN LETTER SMALL CAPITAL E]
+ '\u{1E14}' | // Ḕ [LATIN CAPITAL LETTER E WITH MACRON AND GRAVE]
+ '\u{1E16}' | // Ḗ [LATIN CAPITAL LETTER E WITH MACRON AND ACUTE]
+ '\u{1E18}' | // Ḙ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW]
+ '\u{1E1A}' | // Ḛ [LATIN CAPITAL LETTER E WITH TILDE BELOW]
+ '\u{1E1C}' | // Ḝ [LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE]
+ '\u{1EB8}' | // Ẹ [LATIN CAPITAL LETTER E WITH DOT BELOW]
+ '\u{1EBA}' | // Ẻ [LATIN CAPITAL LETTER E WITH HOOK ABOVE]
+ '\u{1EBC}' | // Ẽ [LATIN CAPITAL LETTER E WITH TILDE]
+ '\u{1EBE}' | // Ế [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE]
+ '\u{1EC0}' | // Ề [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE]
+ '\u{1EC2}' | // Ể [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE]
+ '\u{1EC4}' | // Ễ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE]
+ '\u{1EC6}' | // Ệ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW]
+ '\u{24BA}' | // Ⓔ [CIRCLED LATIN CAPITAL LETTER E]
+ '\u{2C7B}' | // ⱻ [LATIN LETTER SMALL CAPITAL TURNED E]
+ '\u{FF25}' // E [FULLWIDTH LATIN CAPITAL LETTER E]
+ => Some("E"),
+ '\u{00E8}' | // è [LATIN SMALL LETTER E WITH GRAVE]
+ '\u{00E9}' | // é [LATIN SMALL LETTER E WITH ACUTE]
+ '\u{00EA}' | // ê [LATIN SMALL LETTER E WITH CIRCUMFLEX]
+ '\u{00EB}' | // ë [LATIN SMALL LETTER E WITH DIAERESIS]
+ '\u{0113}' | // ē [LATIN SMALL LETTER E WITH MACRON]
+ '\u{0115}' | // ĕ [LATIN SMALL LETTER E WITH BREVE]
+ '\u{0117}' | // ė [LATIN SMALL LETTER E WITH DOT ABOVE]
+ '\u{0119}' | // ę [LATIN SMALL LETTER E WITH OGONEK]
+ '\u{011B}' | // ě [LATIN SMALL LETTER E WITH CARON]
+ '\u{01DD}' | // ǝ [LATIN SMALL LETTER TURNED E]
+ '\u{0205}' | // ȅ [LATIN SMALL LETTER E WITH DOUBLE GRAVE]
+ '\u{0207}' | // ȇ [LATIN SMALL LETTER E WITH INVERTED BREVE]
+ '\u{0229}' | // ȩ [LATIN SMALL LETTER E WITH CEDILLA]
+ '\u{0247}' | // ɇ [LATIN SMALL LETTER E WITH STROKE]
+ '\u{0258}' | // ɘ [LATIN SMALL LETTER REVERSED E]
+ '\u{025B}' | // ɛ [LATIN SMALL LETTER OPEN E]
+ '\u{025C}' | // ɜ [LATIN SMALL LETTER REVERSED OPEN E]
+ '\u{025D}' | // ɝ [LATIN SMALL LETTER REVERSED OPEN E WITH HOOK]
+ '\u{025E}' | // ɞ [LATIN SMALL LETTER CLOSED REVERSED OPEN E]
+ '\u{029A}' | // ʚ [LATIN SMALL LETTER CLOSED OPEN E]
+ '\u{1D08}' | // ᴈ [LATIN SMALL LETTER TURNED OPEN E]
+ '\u{1D92}' | // ᶒ [LATIN SMALL LETTER E WITH RETROFLEX HOOK]
+ '\u{1D93}' | // ᶓ [LATIN SMALL LETTER OPEN E WITH RETROFLEX HOOK]
+ '\u{1D94}' | // ᶔ [LATIN SMALL LETTER REVERSED OPEN E WITH RETROFLEX HOOK]
+ '\u{1E15}' | // ḕ [LATIN SMALL LETTER E WITH MACRON AND GRAVE]
+ '\u{1E17}' | // ḗ [LATIN SMALL LETTER E WITH MACRON AND ACUTE]
+ '\u{1E19}' | // ḙ [LATIN SMALL LETTER E WITH CIRCUMFLEX BELOW]
+ '\u{1E1B}' | // ḛ [LATIN SMALL LETTER E WITH TILDE BELOW]
+ '\u{1E1D}' | // ḝ [LATIN SMALL LETTER E WITH CEDILLA AND BREVE]
+ '\u{1EB9}' | // ẹ [LATIN SMALL LETTER E WITH DOT BELOW]
+ '\u{1EBB}' | // ẻ [LATIN SMALL LETTER E WITH HOOK ABOVE]
+ '\u{1EBD}' | // ẽ [LATIN SMALL LETTER E WITH TILDE]
+ '\u{1EBF}' | // ế [LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE]
+ '\u{1EC1}' | // ề [LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE]
+ '\u{1EC3}' | // ể [LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE]
+ '\u{1EC5}' | // ễ [LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE]
+ '\u{1EC7}' | // ệ [LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW]
+ '\u{2091}' | // ₑ [LATIN SUBSCRIPT SMALL LETTER E]
+ '\u{24D4}' | // ⓔ [CIRCLED LATIN SMALL LETTER E]
+ '\u{2C78}' | // ⱸ [LATIN SMALL LETTER E WITH NOTCH]
+ '\u{FF45}' // e [FULLWIDTH LATIN SMALL LETTER E]
+ => Some("e"),
+ '\u{24A0}' // ⒠ [PARENTHESIZED LATIN SMALL LETTER E]
+ => Some("(e)"),
+ '\u{0191}' | // Ƒ [LATIN CAPITAL LETTER F WITH HOOK]
+ '\u{1E1E}' | // Ḟ [LATIN CAPITAL LETTER F WITH DOT ABOVE]
+ '\u{24BB}' | // Ⓕ [CIRCLED LATIN CAPITAL LETTER F]
+ '\u{A730}' | // ꜰ [LATIN LETTER SMALL CAPITAL F]
+ '\u{A77B}' | // Ꝼ [LATIN CAPITAL LETTER INSULAR F]
+ '\u{A7FB}' | // ꟻ [LATIN EPIGRAPHIC LETTER REVERSED F]
+ '\u{FF26}' // F [FULLWIDTH LATIN CAPITAL LETTER F]
+ => Some("F"),
+ '\u{0192}' | // ƒ [LATIN SMALL LETTER F WITH HOOK]
+ '\u{1D6E}' | // ᵮ [LATIN SMALL LETTER F WITH MIDDLE TILDE]
+ '\u{1D82}' | // ᶂ [LATIN SMALL LETTER F WITH PALATAL HOOK]
+ '\u{1E1F}' | // ḟ [LATIN SMALL LETTER F WITH DOT ABOVE]
+ '\u{1E9B}' | // ẛ [LATIN SMALL LETTER LONG S WITH DOT ABOVE]
+ '\u{24D5}' | // ⓕ [CIRCLED LATIN SMALL LETTER F]
+ '\u{A77C}' | // ꝼ [LATIN SMALL LETTER INSULAR F]
+ '\u{FF46}' // f [FULLWIDTH LATIN SMALL LETTER F]
+ => Some("f"),
+ '\u{24A1}' // ⒡ [PARENTHESIZED LATIN SMALL LETTER F]
+ => Some("(f)"),
+ '\u{FB00}' // ff [LATIN SMALL LIGATURE FF]
+ => Some("ff"),
+ '\u{FB03}' // ffi [LATIN SMALL LIGATURE FFI]
+ => Some("ffi"),
+ '\u{FB04}' // ffl [LATIN SMALL LIGATURE FFL]
+ => Some("ffl"),
+ '\u{FB01}' // fi [LATIN SMALL LIGATURE FI]
+ => Some("fi"),
+ '\u{FB02}' // fl [LATIN SMALL LIGATURE FL]
+ => Some("fl"),
+ '\u{011C}' | // Ĝ [LATIN CAPITAL LETTER G WITH CIRCUMFLEX]
+ '\u{011E}' | // Ğ [LATIN CAPITAL LETTER G WITH BREVE]
+ '\u{0120}' | // Ġ [LATIN CAPITAL LETTER G WITH DOT ABOVE]
+ '\u{0122}' | // Ģ [LATIN CAPITAL LETTER G WITH CEDILLA]
+ '\u{0193}' | // Ɠ [LATIN CAPITAL LETTER G WITH HOOK]
+ '\u{01E4}' | // Ǥ [LATIN CAPITAL LETTER G WITH STROKE]
+ '\u{01E5}' | // ǥ [LATIN SMALL LETTER G WITH STROKE]
+ '\u{01E6}' | // Ǧ [LATIN CAPITAL LETTER G WITH CARON]
+ '\u{01E7}' | // ǧ [LATIN SMALL LETTER G WITH CARON]
+ '\u{01F4}' | // Ǵ [LATIN CAPITAL LETTER G WITH ACUTE]
+ '\u{0262}' | // ɢ [LATIN LETTER SMALL CAPITAL G]
+ '\u{029B}' | // ʛ [LATIN LETTER SMALL CAPITAL G WITH HOOK]
+ '\u{1E20}' | // Ḡ [LATIN CAPITAL LETTER G WITH MACRON]
+ '\u{24BC}' | // Ⓖ [CIRCLED LATIN CAPITAL LETTER G]
+ '\u{A77D}' | // Ᵹ [LATIN CAPITAL LETTER INSULAR G]
+ '\u{A77E}' | // Ꝿ [LATIN CAPITAL LETTER TURNED INSULAR G]
+ '\u{FF27}' // G [FULLWIDTH LATIN CAPITAL LETTER G]
+ => Some("G"),
+ '\u{011D}' | // ĝ [LATIN SMALL LETTER G WITH CIRCUMFLEX]
+ '\u{011F}' | // ğ [LATIN SMALL LETTER G WITH BREVE]
+ '\u{0121}' | // ġ [LATIN SMALL LETTER G WITH DOT ABOVE]
+ '\u{0123}' | // ģ [LATIN SMALL LETTER G WITH CEDILLA]
+ '\u{01F5}' | // ǵ [LATIN SMALL LETTER G WITH ACUTE]
+ '\u{0260}' | // ɠ [LATIN SMALL LETTER G WITH HOOK]
+ '\u{0261}' | // ɡ [LATIN SMALL LETTER SCRIPT G]
+ '\u{1D77}' | // ᵷ [LATIN SMALL LETTER TURNED G]
+ '\u{1D79}' | // ᵹ [LATIN SMALL LETTER INSULAR G]
+ '\u{1D83}' | // ᶃ [LATIN SMALL LETTER G WITH PALATAL HOOK]
+ '\u{1E21}' | // ḡ [LATIN SMALL LETTER G WITH MACRON]
+ '\u{24D6}' | // ⓖ [CIRCLED LATIN SMALL LETTER G]
+ '\u{A77F}' | // ꝿ [LATIN SMALL LETTER TURNED INSULAR G]
+ '\u{FF47}' // g [FULLWIDTH LATIN SMALL LETTER G]
+ => Some("g"),
+ '\u{24A2}' // ⒢ [PARENTHESIZED LATIN SMALL LETTER G]
+ => Some("(g)"),
+ '\u{0124}' | // Ĥ [LATIN CAPITAL LETTER H WITH CIRCUMFLEX]
+ '\u{0126}' | // Ħ [LATIN CAPITAL LETTER H WITH STROKE]
+ '\u{021E}' | // Ȟ [LATIN CAPITAL LETTER H WITH CARON]
+ '\u{029C}' | // ʜ [LATIN LETTER SMALL CAPITAL H]
+ '\u{1E22}' | // Ḣ [LATIN CAPITAL LETTER H WITH DOT ABOVE]
+ '\u{1E24}' | // Ḥ [LATIN CAPITAL LETTER H WITH DOT BELOW]
+ '\u{1E26}' | // Ḧ [LATIN CAPITAL LETTER H WITH DIAERESIS]
+ '\u{1E28}' | // Ḩ [LATIN CAPITAL LETTER H WITH CEDILLA]
+ '\u{1E2A}' | // Ḫ [LATIN CAPITAL LETTER H WITH BREVE BELOW]
+ '\u{24BD}' | // Ⓗ [CIRCLED LATIN CAPITAL LETTER H]
+ '\u{2C67}' | // Ⱨ [LATIN CAPITAL LETTER H WITH DESCENDER]
+ '\u{2C75}' | // Ⱶ [LATIN CAPITAL LETTER HALF H]
+ '\u{FF28}' // H [FULLWIDTH LATIN CAPITAL LETTER H]
+ => Some("H"),
+ '\u{0125}' | // ĥ [LATIN SMALL LETTER H WITH CIRCUMFLEX]
+ '\u{0127}' | // ħ [LATIN SMALL LETTER H WITH STROKE]
+ '\u{021F}' | // ȟ [LATIN SMALL LETTER H WITH CARON]
+ '\u{0265}' | // ɥ [LATIN SMALL LETTER TURNED H]
+ '\u{0266}' | // ɦ [LATIN SMALL LETTER H WITH HOOK]
+ '\u{02AE}' | // ʮ [LATIN SMALL LETTER TURNED H WITH FISHHOOK]
+ '\u{02AF}' | // ʯ [LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL]
+ '\u{1E23}' | // ḣ [LATIN SMALL LETTER H WITH DOT ABOVE]
+ '\u{1E25}' | // ḥ [LATIN SMALL LETTER H WITH DOT BELOW]
+ '\u{1E27}' | // ḧ [LATIN SMALL LETTER H WITH DIAERESIS]
+ '\u{1E29}' | // ḩ [LATIN SMALL LETTER H WITH CEDILLA]
+ '\u{1E2B}' | // ḫ [LATIN SMALL LETTER H WITH BREVE BELOW]
+ '\u{1E96}' | // ẖ [LATIN SMALL LETTER H WITH LINE BELOW]
+ '\u{24D7}' | // ⓗ [CIRCLED LATIN SMALL LETTER H]
+ '\u{2C68}' | // ⱨ [LATIN SMALL LETTER H WITH DESCENDER]
+ '\u{2C76}' | // ⱶ [LATIN SMALL LETTER HALF H]
+ '\u{FF48}' // h [FULLWIDTH LATIN SMALL LETTER H]
+ => Some("h"),
+ '\u{01F6}' // Ƕ http://en.wikipedia.org/wiki/Hwair [LATIN CAPITAL LETTER HWAIR]
+ => Some("HV"),
+ '\u{24A3}' // ⒣ [PARENTHESIZED LATIN SMALL LETTER H]
+ => Some("(h)"),
+ '\u{0195}' // ƕ [LATIN SMALL LETTER HV]
+ => Some("hv"),
+ '\u{00CC}' | // Ì [LATIN CAPITAL LETTER I WITH GRAVE]
+ '\u{00CD}' | // Í [LATIN CAPITAL LETTER I WITH ACUTE]
+ '\u{00CE}' | // Î [LATIN CAPITAL LETTER I WITH CIRCUMFLEX]
+ '\u{00CF}' | // Ï [LATIN CAPITAL LETTER I WITH DIAERESIS]
+ '\u{0128}' | // Ĩ [LATIN CAPITAL LETTER I WITH TILDE]
+ '\u{012A}' | // Ī [LATIN CAPITAL LETTER I WITH MACRON]
+ '\u{012C}' | // Ĭ [LATIN CAPITAL LETTER I WITH BREVE]
+ '\u{012E}' | // Į [LATIN CAPITAL LETTER I WITH OGONEK]
+ '\u{0130}' | // İ [LATIN CAPITAL LETTER I WITH DOT ABOVE]
+ '\u{0196}' | // Ɩ [LATIN CAPITAL LETTER IOTA]
+ '\u{0197}' | // Ɨ [LATIN CAPITAL LETTER I WITH STROKE]
+ '\u{01CF}' | // Ǐ [LATIN CAPITAL LETTER I WITH CARON]
+ '\u{0208}' | // Ȉ [LATIN CAPITAL LETTER I WITH DOUBLE GRAVE]
+ '\u{020A}' | // Ȋ [LATIN CAPITAL LETTER I WITH INVERTED BREVE]
+ '\u{026A}' | // ɪ [LATIN LETTER SMALL CAPITAL I]
+ '\u{1D7B}' | // ᵻ [LATIN SMALL CAPITAL LETTER I WITH STROKE]
+ '\u{1E2C}' | // Ḭ [LATIN CAPITAL LETTER I WITH TILDE BELOW]
+ '\u{1E2E}' | // Ḯ [LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE]
+ '\u{1EC8}' | // Ỉ [LATIN CAPITAL LETTER I WITH HOOK ABOVE]
+ '\u{1ECA}' | // Ị [LATIN CAPITAL LETTER I WITH DOT BELOW]
+ '\u{24BE}' | // Ⓘ [CIRCLED LATIN CAPITAL LETTER I]
+ '\u{A7FE}' | // ꟾ [LATIN EPIGRAPHIC LETTER I LONGA]
+ '\u{FF29}' // I [FULLWIDTH LATIN CAPITAL LETTER I]
+ => Some("I"),
+ '\u{00EC}' | // ì [LATIN SMALL LETTER I WITH GRAVE]
+ '\u{00ED}' | // í [LATIN SMALL LETTER I WITH ACUTE]
+ '\u{00EE}' | // î [LATIN SMALL LETTER I WITH CIRCUMFLEX]
+ '\u{00EF}' | // ï [LATIN SMALL LETTER I WITH DIAERESIS]
+ '\u{0129}' | // ĩ [LATIN SMALL LETTER I WITH TILDE]
+ '\u{012B}' | // ī [LATIN SMALL LETTER I WITH MACRON]
+ '\u{012D}' | // ĭ [LATIN SMALL LETTER I WITH BREVE]
+ '\u{012F}' | // į [LATIN SMALL LETTER I WITH OGONEK]
+ '\u{0131}' | // ı [LATIN SMALL LETTER DOTLESS I]
+ '\u{01D0}' | // ǐ [LATIN SMALL LETTER I WITH CARON]
+ '\u{0209}' | // ȉ [LATIN SMALL LETTER I WITH DOUBLE GRAVE]
+ '\u{020B}' | // ȋ [LATIN SMALL LETTER I WITH INVERTED BREVE]
+ '\u{0268}' | // ɨ [LATIN SMALL LETTER I WITH STROKE]
+ '\u{1D09}' | // ᴉ [LATIN SMALL LETTER TURNED I]
+ '\u{1D62}' | // ᵢ [LATIN SUBSCRIPT SMALL LETTER I]
+ '\u{1D7C}' | // ᵼ [LATIN SMALL LETTER IOTA WITH STROKE]
+ '\u{1D96}' | // ᶖ [LATIN SMALL LETTER I WITH RETROFLEX HOOK]
+ '\u{1E2D}' | // ḭ [LATIN SMALL LETTER I WITH TILDE BELOW]
+ '\u{1E2F}' | // ḯ [LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE]
+ '\u{1EC9}' | // ỉ [LATIN SMALL LETTER I WITH HOOK ABOVE]
+ '\u{1ECB}' | // ị [LATIN SMALL LETTER I WITH DOT BELOW]
+ '\u{2071}' | // ⁱ [SUPERSCRIPT LATIN SMALL LETTER I]
+ '\u{24D8}' | // ⓘ [CIRCLED LATIN SMALL LETTER I]
+ '\u{FF49}' // i [FULLWIDTH LATIN SMALL LETTER I]
+ => Some("i"),
+ '\u{0132}' // IJ [LATIN CAPITAL LIGATURE IJ]
+ => Some("IJ"),
+ '\u{24A4}' // ⒤ [PARENTHESIZED LATIN SMALL LETTER I]
+ => Some("(i)"),
+ '\u{0133}' // ij [LATIN SMALL LIGATURE IJ]
+ => Some("ij"),
+ '\u{0134}' | // Ĵ [LATIN CAPITAL LETTER J WITH CIRCUMFLEX]
+ '\u{0248}' | // Ɉ [LATIN CAPITAL LETTER J WITH STROKE]
+ '\u{1D0A}' | // ᴊ [LATIN LETTER SMALL CAPITAL J]
+ '\u{24BF}' | // Ⓙ [CIRCLED LATIN CAPITAL LETTER J]
+ '\u{FF2A}' // J [FULLWIDTH LATIN CAPITAL LETTER J]
+ => Some("J"),
+ '\u{0135}' | // ĵ [LATIN SMALL LETTER J WITH CIRCUMFLEX]
+ '\u{01F0}' | // ǰ [LATIN SMALL LETTER J WITH CARON]
+ '\u{0237}' | // ȷ [LATIN SMALL LETTER DOTLESS J]
+ '\u{0249}' | // ɉ [LATIN SMALL LETTER J WITH STROKE]
+ '\u{025F}' | // ɟ [LATIN SMALL LETTER DOTLESS J WITH STROKE]
+ '\u{0284}' | // ʄ [LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK]
+ '\u{029D}' | // ʝ [LATIN SMALL LETTER J WITH CROSSED-TAIL]
+ '\u{24D9}' | // ⓙ [CIRCLED LATIN SMALL LETTER J]
+ '\u{2C7C}' | // ⱼ [LATIN SUBSCRIPT SMALL LETTER J]
+ '\u{FF4A}' // j [FULLWIDTH LATIN SMALL LETTER J]
+ => Some("j"),
+ '\u{24A5}' // ⒥ [PARENTHESIZED LATIN SMALL LETTER J]
+ => Some("(j)"),
+ '\u{0136}' | // Ķ [LATIN CAPITAL LETTER K WITH CEDILLA]
+ '\u{0198}' | // Ƙ [LATIN CAPITAL LETTER K WITH HOOK]
+ '\u{01E8}' | // Ǩ [LATIN CAPITAL LETTER K WITH CARON]
+ '\u{1D0B}' | // ᴋ [LATIN LETTER SMALL CAPITAL K]
+ '\u{1E30}' | // Ḱ [LATIN CAPITAL LETTER K WITH ACUTE]
+ '\u{1E32}' | // Ḳ [LATIN CAPITAL LETTER K WITH DOT BELOW]
+ '\u{1E34}' | // Ḵ [LATIN CAPITAL LETTER K WITH LINE BELOW]
+ '\u{24C0}' | // Ⓚ [CIRCLED LATIN CAPITAL LETTER K]
+ '\u{2C69}' | // Ⱪ [LATIN CAPITAL LETTER K WITH DESCENDER]
+ '\u{A740}' | // Ꝁ [LATIN CAPITAL LETTER K WITH STROKE]
+ '\u{A742}' | // Ꝃ [LATIN CAPITAL LETTER K WITH DIAGONAL STROKE]
+ '\u{A744}' | // Ꝅ [LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE]
+ '\u{FF2B}' // K [FULLWIDTH LATIN CAPITAL LETTER K]
+ => Some("K"),
+ '\u{0137}' | // ķ [LATIN SMALL LETTER K WITH CEDILLA]
+ '\u{0199}' | // ƙ [LATIN SMALL LETTER K WITH HOOK]
+ '\u{01E9}' | // ǩ [LATIN SMALL LETTER K WITH CARON]
+ '\u{029E}' | // ʞ [LATIN SMALL LETTER TURNED K]
+ '\u{1D84}' | // ᶄ [LATIN SMALL LETTER K WITH PALATAL HOOK]
+ '\u{1E31}' | // ḱ [LATIN SMALL LETTER K WITH ACUTE]
+ '\u{1E33}' | // ḳ [LATIN SMALL LETTER K WITH DOT BELOW]
+ '\u{1E35}' | // ḵ [LATIN SMALL LETTER K WITH LINE BELOW]
+ '\u{24DA}' | // ⓚ [CIRCLED LATIN SMALL LETTER K]
+ '\u{2C6A}' | // ⱪ [LATIN SMALL LETTER K WITH DESCENDER]
+ '\u{A741}' | // ꝁ [LATIN SMALL LETTER K WITH STROKE]
+ '\u{A743}' | // ꝃ [LATIN SMALL LETTER K WITH DIAGONAL STROKE]
+ '\u{A745}' | // ꝅ [LATIN SMALL LETTER K WITH STROKE AND DIAGONAL STROKE]
+ '\u{FF4B}' // k [FULLWIDTH LATIN SMALL LETTER K]
+ => Some("k"),
+ '\u{24A6}' // ⒦ [PARENTHESIZED LATIN SMALL LETTER K]
+ => Some("(k)"),
+ '\u{0139}' | // Ĺ [LATIN CAPITAL LETTER L WITH ACUTE]
+ '\u{013B}' | // Ļ [LATIN CAPITAL LETTER L WITH CEDILLA]
+ '\u{013D}' | // Ľ [LATIN CAPITAL LETTER L WITH CARON]
+ '\u{013F}' | // Ŀ [LATIN CAPITAL LETTER L WITH MIDDLE DOT]
+ '\u{0141}' | // Ł [LATIN CAPITAL LETTER L WITH STROKE]
+ '\u{023D}' | // Ƚ [LATIN CAPITAL LETTER L WITH BAR]
+ '\u{029F}' | // ʟ [LATIN LETTER SMALL CAPITAL L]
+ '\u{1D0C}' | // ᴌ [LATIN LETTER SMALL CAPITAL L WITH STROKE]
+ '\u{1E36}' | // Ḷ [LATIN CAPITAL LETTER L WITH DOT BELOW]
+ '\u{1E38}' | // Ḹ [LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON]
+ '\u{1E3A}' | // Ḻ [LATIN CAPITAL LETTER L WITH LINE BELOW]
+ '\u{1E3C}' | // Ḽ [LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW]
+ '\u{24C1}' | // Ⓛ [CIRCLED LATIN CAPITAL LETTER L]
+ '\u{2C60}' | // Ⱡ [LATIN CAPITAL LETTER L WITH DOUBLE BAR]
+ '\u{2C62}' | // Ɫ [LATIN CAPITAL LETTER L WITH MIDDLE TILDE]
+ '\u{A746}' | // Ꝇ [LATIN CAPITAL LETTER BROKEN L]
+ '\u{A748}' | // Ꝉ [LATIN CAPITAL LETTER L WITH HIGH STROKE]
+ '\u{A780}' | // Ꞁ [LATIN CAPITAL LETTER TURNED L]
+ '\u{FF2C}' // L [FULLWIDTH LATIN CAPITAL LETTER L]
+ => Some("L"),
+ '\u{013A}' | // ĺ [LATIN SMALL LETTER L WITH ACUTE]
+ '\u{013C}' | // ļ [LATIN SMALL LETTER L WITH CEDILLA]
+ '\u{013E}' | // ľ [LATIN SMALL LETTER L WITH CARON]
+ '\u{0140}' | // ŀ [LATIN SMALL LETTER L WITH MIDDLE DOT]
+ '\u{0142}' | // ł [LATIN SMALL LETTER L WITH STROKE]
+ '\u{019A}' | // ƚ [LATIN SMALL LETTER L WITH BAR]
+ '\u{0234}' | // ȴ [LATIN SMALL LETTER L WITH CURL]
+ '\u{026B}' | // ɫ [LATIN SMALL LETTER L WITH MIDDLE TILDE]
+ '\u{026C}' | // ɬ [LATIN SMALL LETTER L WITH BELT]
+ '\u{026D}' | // ɭ [LATIN SMALL LETTER L WITH RETROFLEX HOOK]
+ '\u{1D85}' | // ᶅ [LATIN SMALL LETTER L WITH PALATAL HOOK]
+ '\u{1E37}' | // ḷ [LATIN SMALL LETTER L WITH DOT BELOW]
+ '\u{1E39}' | // ḹ [LATIN SMALL LETTER L WITH DOT BELOW AND MACRON]
+ '\u{1E3B}' | // ḻ [LATIN SMALL LETTER L WITH LINE BELOW]
+ '\u{1E3D}' | // ḽ [LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW]
+ '\u{24DB}' | // ⓛ [CIRCLED LATIN SMALL LETTER L]
+ '\u{2C61}' | // ⱡ [LATIN SMALL LETTER L WITH DOUBLE BAR]
+ '\u{A747}' | // ꝇ [LATIN SMALL LETTER BROKEN L]
+ '\u{A749}' | // ꝉ [LATIN SMALL LETTER L WITH HIGH STROKE]
+ '\u{A781}' | // ꞁ [LATIN SMALL LETTER TURNED L]
+ '\u{FF4C}' // l [FULLWIDTH LATIN SMALL LETTER L]
+ => Some("l"),
+ '\u{01C7}' // LJ [LATIN CAPITAL LETTER LJ]
+ => Some("LJ"),
+ '\u{1EFA}' // Ỻ [LATIN CAPITAL LETTER MIDDLE-WELSH LL]
+ => Some("LL"),
+ '\u{01C8}' // Lj [LATIN CAPITAL LETTER L WITH SMALL LETTER J]
+ => Some("Lj"),
+ '\u{24A7}' // ⒧ [PARENTHESIZED LATIN SMALL LETTER L]
+ => Some("(l)"),
+ '\u{01C9}' // lj [LATIN SMALL LETTER LJ]
+ => Some("lj"),
+ '\u{1EFB}' // ỻ [LATIN SMALL LETTER MIDDLE-WELSH LL]
+ => Some("ll"),
+ '\u{02AA}' // ʪ [LATIN SMALL LETTER LS DIGRAPH]
+ => Some("ls"),
+ '\u{02AB}' // ʫ [LATIN SMALL LETTER LZ DIGRAPH]
+ => Some("lz"),
+ '\u{019C}' | // Ɯ [LATIN CAPITAL LETTER TURNED M]
+ '\u{1D0D}' | // ᴍ [LATIN LETTER SMALL CAPITAL M]
+ '\u{1E3E}' | // Ḿ [LATIN CAPITAL LETTER M WITH ACUTE]
+ '\u{1E40}' | // Ṁ [LATIN CAPITAL LETTER M WITH DOT ABOVE]
+ '\u{1E42}' | // Ṃ [LATIN CAPITAL LETTER M WITH DOT BELOW]
+ '\u{24C2}' | // Ⓜ [CIRCLED LATIN CAPITAL LETTER M]
+ '\u{2C6E}' | // Ɱ [LATIN CAPITAL LETTER M WITH HOOK]
+ '\u{A7FD}' | // ꟽ [LATIN EPIGRAPHIC LETTER INVERTED M]
+ '\u{A7FF}' | // ꟿ [LATIN EPIGRAPHIC LETTER ARCHAIC M]
+ '\u{FF2D}' // M [FULLWIDTH LATIN CAPITAL LETTER M]
+ => Some("M"),
+ '\u{026F}' | // ɯ [LATIN SMALL LETTER TURNED M]
+ '\u{0270}' | // ɰ [LATIN SMALL LETTER TURNED M WITH LONG LEG]
+ '\u{0271}' | // ɱ [LATIN SMALL LETTER M WITH HOOK]
+ '\u{1D6F}' | // ᵯ [LATIN SMALL LETTER M WITH MIDDLE TILDE]
+ '\u{1D86}' | // ᶆ [LATIN SMALL LETTER M WITH PALATAL HOOK]
+ '\u{1E3F}' | // ḿ [LATIN SMALL LETTER M WITH ACUTE]
+ '\u{1E41}' | // ṁ [LATIN SMALL LETTER M WITH DOT ABOVE]
+ '\u{1E43}' | // ṃ [LATIN SMALL LETTER M WITH DOT BELOW]
+ '\u{24DC}' | // ⓜ [CIRCLED LATIN SMALL LETTER M]
+ '\u{FF4D}' // m [FULLWIDTH LATIN SMALL LETTER M]
+ => Some("m"),
+ '\u{24A8}' // ⒨ [PARENTHESIZED LATIN SMALL LETTER M]
+ => Some("(m)"),
+ '\u{00D1}' | // Ñ [LATIN CAPITAL LETTER N WITH TILDE]
+ '\u{0143}' | // Ń [LATIN CAPITAL LETTER N WITH ACUTE]
+ '\u{0145}' | // Ņ [LATIN CAPITAL LETTER N WITH CEDILLA]
+ '\u{0147}' | // Ň [LATIN CAPITAL LETTER N WITH CARON]
+ '\u{014A}' | // Ŋ http://en.wikipedia.org/wiki/Eng_(letter) [LATIN CAPITAL LETTER ENG]
+ '\u{019D}