diff options
author | petr-tik <petr-tik@users.noreply.github.com> | 2019-04-26 08:47:12 +0100 |
---|---|---|
committer | petr-tik <petr-tik@users.noreply.github.com> | 2019-04-26 08:47:12 +0100 |
commit | 1a90a1f3b0ffced46bac95da389754237495cb47 (patch) | |
tree | ed22d9e97d8b91b0060eb983bb89f03a156fb2ae | |
parent | 8e509213634ccbf82c3380e1b660ec1ffb735301 (diff) | |
parent | dac50c6aeb471868d6b1462426907ecfce3eb420 (diff) |
Merge branch 'master' of github.com:tantivy-search/tantivy into stamper_refactor
-rw-r--r-- | CHANGELOG.md | 5 | ||||
-rw-r--r-- | src/postings/postings_writer.rs | 7 | ||||
-rw-r--r-- | src/tokenizer/ascii_folding_filter.rs | 4064 | ||||
-rw-r--r-- | src/tokenizer/lower_caser.rs | 19 | ||||
-rw-r--r-- | src/tokenizer/mod.rs | 2 | ||||
-rw-r--r-- | src/tokenizer/raw_tokenizer.rs | 9 | ||||
-rw-r--r-- | src/tokenizer/remove_long.rs | 1 | ||||
-rw-r--r-- | src/tokenizer/simple_tokenizer.rs | 23 | ||||
-rw-r--r-- | src/tokenizer/stemmer.rs | 15 | ||||
-rw-r--r-- | src/tokenizer/stop_word_filter.rs | 1 |
10 files changed, 4103 insertions, 43 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index 068b291..7957d99 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ Tantivy 0.10.0 -==================== - +===================== +- Added an ASCII folding filter (@drusellers) +- Bugfix in `query.count` in presence of deletes (@pmasurel) Minor --------- diff --git a/src/postings/postings_writer.rs b/src/postings/postings_writer.rs index d263258..d5d769b 100644 --- a/src/postings/postings_writer.rs +++ b/src/postings/postings_writer.rs @@ -214,6 +214,13 @@ pub trait PostingsWriter { if token.text.len() <= MAX_TOKEN_LEN { term.set_text(token.text.as_str()); self.subscribe(term_index, doc_id, token.position as u32, &term, heap); + } else { + info!( + "A token exceeding MAX_TOKEN_LEN ({}>{}) was dropped. Search for \ + MAX_TOKEN_LEN in the documentation for more information.", + token.text.len(), + MAX_TOKEN_LEN + ); } }; token_stream.process(&mut sink) diff --git a/src/tokenizer/ascii_folding_filter.rs b/src/tokenizer/ascii_folding_filter.rs new file mode 100644 index 0000000..cbd124a --- /dev/null +++ b/src/tokenizer/ascii_folding_filter.rs @@ -0,0 +1,4064 @@ +use super::{Token, TokenFilter, TokenStream}; +use std::mem; + +/// This class converts alphabetic, numeric, and symbolic Unicode characters +/// which are not in the first 127 ASCII characters (the "Basic Latin" Unicode +/// block) into their ASCII equivalents, if one exists. +#[derive(Clone)] +pub struct AsciiFoldingFilter; + +impl<TailTokenStream> TokenFilter<TailTokenStream> for AsciiFoldingFilter +where + TailTokenStream: TokenStream, +{ + type ResultTokenStream = AsciiFoldingFilterTokenStream<TailTokenStream>; + + fn transform(&self, token_stream: TailTokenStream) -> Self::ResultTokenStream { + AsciiFoldingFilterTokenStream::wrap(token_stream) + } +} + +pub struct AsciiFoldingFilterTokenStream<TailTokenStream> { + buffer: String, + tail: TailTokenStream, +} + +impl<TailTokenStream> TokenStream for AsciiFoldingFilterTokenStream<TailTokenStream> +where + TailTokenStream: TokenStream, +{ + fn advance(&mut self) -> bool { + if !self.tail.advance() { + return false; + } + if !self.token_mut().text.is_ascii() { + // ignore its already ascii + to_ascii(&mut self.tail.token_mut().text, &mut self.buffer); + mem::swap(&mut self.tail.token_mut().text, &mut self.buffer); + } + true + } + + fn token(&self) -> &Token { + self.tail.token() + } + + fn token_mut(&mut self) -> &mut Token { + self.tail.token_mut() + } +} + +impl<TailTokenStream> AsciiFoldingFilterTokenStream<TailTokenStream> +where + TailTokenStream: TokenStream, +{ + fn wrap(tail: TailTokenStream) -> AsciiFoldingFilterTokenStream<TailTokenStream> { + AsciiFoldingFilterTokenStream { + tail, + buffer: String::with_capacity(100), + } + } +} + +// Returns a string that represents the ascii folded version of +// the character. If the `char` does not require ascii folding +// (e.g. simple ASCII chars like `A`) or if the `char` +// does not have a sensible ascii equivalent (e.g.: Kanjis like 馬, +// this function returns `None`. +fn fold_non_ascii_char(c: char) -> Option<&'static str> { + match c { + '\u{00C0}' | // À [LATIN CAPITAL LETTER A WITH GRAVE] + '\u{00C1}' | // Á [LATIN CAPITAL LETTER A WITH ACUTE] + '\u{00C2}' | // Â [LATIN CAPITAL LETTER A WITH CIRCUMFLEX] + '\u{00C3}' | // Ã [LATIN CAPITAL LETTER A WITH TILDE] + '\u{00C4}' | // Ä [LATIN CAPITAL LETTER A WITH DIAERESIS] + '\u{00C5}' | // Å [LATIN CAPITAL LETTER A WITH RING ABOVE] + '\u{0100}' | // Ā [LATIN CAPITAL LETTER A WITH MACRON] + '\u{0102}' | // Ă [LATIN CAPITAL LETTER A WITH BREVE] + '\u{0104}' | // Ą [LATIN CAPITAL LETTER A WITH OGONEK] + '\u{018F}' | // Ə http://en.wikipedia.org/wiki/Schwa [LATIN CAPITAL LETTER SCHWA] + '\u{01CD}' | // Ǎ [LATIN CAPITAL LETTER A WITH CARON] + '\u{01DE}' | // Ǟ [LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON] + '\u{01E0}' | // Ǡ [LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON] + '\u{01FA}' | // Ǻ [LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE] + '\u{0200}' | // Ȁ [LATIN CAPITAL LETTER A WITH DOUBLE GRAVE] + '\u{0202}' | // Ȃ [LATIN CAPITAL LETTER A WITH INVERTED BREVE] + '\u{0226}' | // Ȧ [LATIN CAPITAL LETTER A WITH DOT ABOVE] + '\u{023A}' | // Ⱥ [LATIN CAPITAL LETTER A WITH STROKE] + '\u{1D00}' | // ᴀ [LATIN LETTER SMALL CAPITAL A] + '\u{1E00}' | // Ḁ [LATIN CAPITAL LETTER A WITH RING BELOW] + '\u{1EA0}' | // Ạ [LATIN CAPITAL LETTER A WITH DOT BELOW] + '\u{1EA2}' | // Ả [LATIN CAPITAL LETTER A WITH HOOK ABOVE] + '\u{1EA4}' | // Ấ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE] + '\u{1EA6}' | // Ầ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE] + '\u{1EA8}' | // Ẩ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE] + '\u{1EAA}' | // Ẫ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE] + '\u{1EAC}' | // Ậ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW] + '\u{1EAE}' | // Ắ [LATIN CAPITAL LETTER A WITH BREVE AND ACUTE] + '\u{1EB0}' | // Ằ [LATIN CAPITAL LETTER A WITH BREVE AND GRAVE] + '\u{1EB2}' | // Ẳ [LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE] + '\u{1EB4}' | // Ẵ [LATIN CAPITAL LETTER A WITH BREVE AND TILDE] + '\u{1EB6}' | // Ặ [LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW] + '\u{24B6}' | // Ⓐ [CIRCLED LATIN CAPITAL LETTER A] + '\u{FF21}' // A [FULLWIDTH LATIN CAPITAL LETTER A] + => Some("A"), + '\u{00E0}' | // à [LATIN SMALL LETTER A WITH GRAVE] + '\u{00E1}' | // á [LATIN SMALL LETTER A WITH ACUTE] + '\u{00E2}' | // â [LATIN SMALL LETTER A WITH CIRCUMFLEX] + '\u{00E3}' | // ã [LATIN SMALL LETTER A WITH TILDE] + '\u{00E4}' | // ä [LATIN SMALL LETTER A WITH DIAERESIS] + '\u{00E5}' | // å [LATIN SMALL LETTER A WITH RING ABOVE] + '\u{0101}' | // ā [LATIN SMALL LETTER A WITH MACRON] + '\u{0103}' | // ă [LATIN SMALL LETTER A WITH BREVE] + '\u{0105}' | // ą [LATIN SMALL LETTER A WITH OGONEK] + '\u{01CE}' | // ǎ [LATIN SMALL LETTER A WITH CARON] + '\u{01DF}' | // ǟ [LATIN SMALL LETTER A WITH DIAERESIS AND MACRON] + '\u{01E1}' | // ǡ [LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON] + '\u{01FB}' | // ǻ [LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE] + '\u{0201}' | // ȁ [LATIN SMALL LETTER A WITH DOUBLE GRAVE] + '\u{0203}' | // ȃ [LATIN SMALL LETTER A WITH INVERTED BREVE] + '\u{0227}' | // ȧ [LATIN SMALL LETTER A WITH DOT ABOVE] + '\u{0250}' | // ɐ [LATIN SMALL LETTER TURNED A] + '\u{0259}' | // ə [LATIN SMALL LETTER SCHWA] + '\u{025A}' | // ɚ [LATIN SMALL LETTER SCHWA WITH HOOK] + '\u{1D8F}' | // ᶏ [LATIN SMALL LETTER A WITH RETROFLEX HOOK] + '\u{1D95}' | // ᶕ [LATIN SMALL LETTER SCHWA WITH RETROFLEX HOOK] + '\u{1E01}' | // ạ [LATIN SMALL LETTER A WITH RING BELOW] + '\u{1E9A}' | // ả [LATIN SMALL LETTER A WITH RIGHT HALF RING] + '\u{1EA1}' | // ạ [LATIN SMALL LETTER A WITH DOT BELOW] + '\u{1EA3}' | // ả [LATIN SMALL LETTER A WITH HOOK ABOVE] + '\u{1EA5}' | // ấ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE] + '\u{1EA7}' | // ầ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE] + '\u{1EA9}' | // ẩ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE] + '\u{1EAB}' | // ẫ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE] + '\u{1EAD}' | // ậ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW] + '\u{1EAF}' | // ắ [LATIN SMALL LETTER A WITH BREVE AND ACUTE] + '\u{1EB1}' | // ằ [LATIN SMALL LETTER A WITH BREVE AND GRAVE] + '\u{1EB3}' | // ẳ [LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE] + '\u{1EB5}' | // ẵ [LATIN SMALL LETTER A WITH BREVE AND TILDE] + '\u{1EB7}' | // ặ [LATIN SMALL LETTER A WITH BREVE AND DOT BELOW] + '\u{2090}' | // ₐ [LATIN SUBSCRIPT SMALL LETTER A] + '\u{2094}' | // ₔ [LATIN SUBSCRIPT SMALL LETTER SCHWA] + '\u{24D0}' | // ⓐ [CIRCLED LATIN SMALL LETTER A] + '\u{2C65}' | // ⱥ [LATIN SMALL LETTER A WITH STROKE] + '\u{2C6F}' | // Ɐ [LATIN CAPITAL LETTER TURNED A] + '\u{FF41}' // a [FULLWIDTH LATIN SMALL LETTER A] + => Some("a"), + '\u{A732}' // Ꜳ [LATIN CAPITAL LETTER AA] + => Some("AA"), + '\u{00C6}' | // Æ [LATIN CAPITAL LETTER AE] + '\u{01E2}' | // Ǣ [LATIN CAPITAL LETTER AE WITH MACRON] + '\u{01FC}' | // Ǽ [LATIN CAPITAL LETTER AE WITH ACUTE] + '\u{1D01}' // ᴁ [LATIN LETTER SMALL CAPITAL AE] + => Some("AE"), + '\u{A734}' // Ꜵ [LATIN CAPITAL LETTER AO] + => Some("AO"), + '\u{A736}' // Ꜷ [LATIN CAPITAL LETTER AU] + => Some("AU"), + '\u{A738}' | // Ꜹ [LATIN CAPITAL LETTER AV] + '\u{A73A}' // Ꜻ [LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR] + => Some("AV"), + '\u{A73C}' // Ꜽ [LATIN CAPITAL LETTER AY] + => Some("AY"), + '\u{249C}' // ⒜ [PARENTHESIZED LATIN SMALL LETTER A] + => Some("(a)"), + '\u{A733}' // ꜳ [LATIN SMALL LETTER AA] + => Some("aa"), + '\u{00E6}' | // æ [LATIN SMALL LETTER AE] + '\u{01E3}' | // ǣ [LATIN SMALL LETTER AE WITH MACRON] + '\u{01FD}' | // ǽ [LATIN SMALL LETTER AE WITH ACUTE] + '\u{1D02}' // ᴂ [LATIN SMALL LETTER TURNED AE] + => Some("ae"), + '\u{A735}' // ꜵ [LATIN SMALL LETTER AO] + => Some("ao"), + '\u{A737}' // ꜷ [LATIN SMALL LETTER AU] + => Some("au"), + '\u{A739}' | // ꜹ [LATIN SMALL LETTER AV] + '\u{A73B}' // ꜻ [LATIN SMALL LETTER AV WITH HORIZONTAL BAR] + => Some("av"), + '\u{A73D}' // ꜽ [LATIN SMALL LETTER AY] + => Some("ay"), + '\u{0181}' | // Ɓ [LATIN CAPITAL LETTER B WITH HOOK] + '\u{0182}' | // Ƃ [LATIN CAPITAL LETTER B WITH TOPBAR] + '\u{0243}' | // Ƀ [LATIN CAPITAL LETTER B WITH STROKE] + '\u{0299}' | // ʙ [LATIN LETTER SMALL CAPITAL B] + '\u{1D03}' | // ᴃ [LATIN LETTER SMALL CAPITAL BARRED B] + '\u{1E02}' | // Ḃ [LATIN CAPITAL LETTER B WITH DOT ABOVE] + '\u{1E04}' | // Ḅ [LATIN CAPITAL LETTER B WITH DOT BELOW] + '\u{1E06}' | // Ḇ [LATIN CAPITAL LETTER B WITH LINE BELOW] + '\u{24B7}' | // Ⓑ [CIRCLED LATIN CAPITAL LETTER B] + '\u{FF22}' // B [FULLWIDTH LATIN CAPITAL LETTER B] + => Some("B"), + '\u{0180}' | // ƀ [LATIN SMALL LETTER B WITH STROKE] + '\u{0183}' | // ƃ [LATIN SMALL LETTER B WITH TOPBAR] + '\u{0253}' | // ɓ [LATIN SMALL LETTER B WITH HOOK] + '\u{1D6C}' | // ᵬ [LATIN SMALL LETTER B WITH MIDDLE TILDE] + '\u{1D80}' | // ᶀ [LATIN SMALL LETTER B WITH PALATAL HOOK] + '\u{1E03}' | // ḃ [LATIN SMALL LETTER B WITH DOT ABOVE] + '\u{1E05}' | // ḅ [LATIN SMALL LETTER B WITH DOT BELOW] + '\u{1E07}' | // ḇ [LATIN SMALL LETTER B WITH LINE BELOW] + '\u{24D1}' | // ⓑ [CIRCLED LATIN SMALL LETTER B] + '\u{FF42}' // b [FULLWIDTH LATIN SMALL LETTER B] + => Some("b"), + '\u{249D}' // ⒝ [PARENTHESIZED LATIN SMALL LETTER B] + => Some("(b)"), + '\u{00C7}' | // Ç [LATIN CAPITAL LETTER C WITH CEDILLA] + '\u{0106}' | // Ć [LATIN CAPITAL LETTER C WITH ACUTE] + '\u{0108}' | // Ĉ [LATIN CAPITAL LETTER C WITH CIRCUMFLEX] + '\u{010A}' | // Ċ [LATIN CAPITAL LETTER C WITH DOT ABOVE] + '\u{010C}' | // Č [LATIN CAPITAL LETTER C WITH CARON] + '\u{0187}' | // Ƈ [LATIN CAPITAL LETTER C WITH HOOK] + '\u{023B}' | // Ȼ [LATIN CAPITAL LETTER C WITH STROKE] + '\u{0297}' | // ʗ [LATIN LETTER STRETCHED C] + '\u{1D04}' | // ᴄ [LATIN LETTER SMALL CAPITAL C] + '\u{1E08}' | // Ḉ [LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE] + '\u{24B8}' | // Ⓒ [CIRCLED LATIN CAPITAL LETTER C] + '\u{FF23}' // C [FULLWIDTH LATIN CAPITAL LETTER C] + => Some("C"), + '\u{00E7}' | // ç [LATIN SMALL LETTER C WITH CEDILLA] + '\u{0107}' | // ć [LATIN SMALL LETTER C WITH ACUTE] + '\u{0109}' | // ĉ [LATIN SMALL LETTER C WITH CIRCUMFLEX] + '\u{010B}' | // ċ [LATIN SMALL LETTER C WITH DOT ABOVE] + '\u{010D}' | // č [LATIN SMALL LETTER C WITH CARON] + '\u{0188}' | // ƈ [LATIN SMALL LETTER C WITH HOOK] + '\u{023C}' | // ȼ [LATIN SMALL LETTER C WITH STROKE] + '\u{0255}' | // ɕ [LATIN SMALL LETTER C WITH CURL] + '\u{1E09}' | // ḉ [LATIN SMALL LETTER C WITH CEDILLA AND ACUTE] + '\u{2184}' | // ↄ [LATIN SMALL LETTER REVERSED C] + '\u{24D2}' | // ⓒ [CIRCLED LATIN SMALL LETTER C] + '\u{A73E}' | // Ꜿ [LATIN CAPITAL LETTER REVERSED C WITH DOT] + '\u{A73F}' | // ꜿ [LATIN SMALL LETTER REVERSED C WITH DOT] + '\u{FF43}' // c [FULLWIDTH LATIN SMALL LETTER C] + => Some("c"), + '\u{249E}' // ⒞ [PARENTHESIZED LATIN SMALL LETTER C] + => Some("(c)"), + '\u{00D0}' | // Ð [LATIN CAPITAL LETTER ETH] + '\u{010E}' | // Ď [LATIN CAPITAL LETTER D WITH CARON] + '\u{0110}' | // Đ [LATIN CAPITAL LETTER D WITH STROKE] + '\u{0189}' | // Ɖ [LATIN CAPITAL LETTER AFRICAN D] + '\u{018A}' | // Ɗ [LATIN CAPITAL LETTER D WITH HOOK] + '\u{018B}' | // Ƌ [LATIN CAPITAL LETTER D WITH TOPBAR] + '\u{1D05}' | // ᴅ [LATIN LETTER SMALL CAPITAL D] + '\u{1D06}' | // ᴆ [LATIN LETTER SMALL CAPITAL ETH] + '\u{1E0A}' | // Ḋ [LATIN CAPITAL LETTER D WITH DOT ABOVE] + '\u{1E0C}' | // Ḍ [LATIN CAPITAL LETTER D WITH DOT BELOW] + '\u{1E0E}' | // Ḏ [LATIN CAPITAL LETTER D WITH LINE BELOW] + '\u{1E10}' | // Ḑ [LATIN CAPITAL LETTER D WITH CEDILLA] + '\u{1E12}' | // Ḓ [LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW] + '\u{24B9}' | // Ⓓ [CIRCLED LATIN CAPITAL LETTER D] + '\u{A779}' | // Ꝺ [LATIN CAPITAL LETTER INSULAR D] + '\u{FF24}' // D [FULLWIDTH LATIN CAPITAL LETTER D] + => Some("D"), + '\u{00F0}' | // ð [LATIN SMALL LETTER ETH] + '\u{010F}' | // ď [LATIN SMALL LETTER D WITH CARON] + '\u{0111}' | // đ [LATIN SMALL LETTER D WITH STROKE] + '\u{018C}' | // ƌ [LATIN SMALL LETTER D WITH TOPBAR] + '\u{0221}' | // ȡ [LATIN SMALL LETTER D WITH CURL] + '\u{0256}' | // ɖ [LATIN SMALL LETTER D WITH TAIL] + '\u{0257}' | // ɗ [LATIN SMALL LETTER D WITH HOOK] + '\u{1D6D}' | // ᵭ [LATIN SMALL LETTER D WITH MIDDLE TILDE] + '\u{1D81}' | // ᶁ [LATIN SMALL LETTER D WITH PALATAL HOOK] + '\u{1D91}' | // ᶑ [LATIN SMALL LETTER D WITH HOOK AND TAIL] + '\u{1E0B}' | // ḋ [LATIN SMALL LETTER D WITH DOT ABOVE] + '\u{1E0D}' | // ḍ [LATIN SMALL LETTER D WITH DOT BELOW] + '\u{1E0F}' | // ḏ [LATIN SMALL LETTER D WITH LINE BELOW] + '\u{1E11}' | // ḑ [LATIN SMALL LETTER D WITH CEDILLA] + '\u{1E13}' | // ḓ [LATIN SMALL LETTER D WITH CIRCUMFLEX BELOW] + '\u{24D3}' | // ⓓ [CIRCLED LATIN SMALL LETTER D] + '\u{A77A}' | // ꝺ [LATIN SMALL LETTER INSULAR D] + '\u{FF44}' // d [FULLWIDTH LATIN SMALL LETTER D] + => Some("d"), + '\u{01C4}' | // DŽ [LATIN CAPITAL LETTER DZ WITH CARON] + '\u{01F1}' // DZ [LATIN CAPITAL LETTER DZ] + => Some("DZ"), + '\u{01C5}' | // Dž [LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON] + '\u{01F2}' // Dz [LATIN CAPITAL LETTER D WITH SMALL LETTER Z] + => Some("Dz"), + '\u{249F}' // ⒟ [PARENTHESIZED LATIN SMALL LETTER D] + => Some("(d)"), + '\u{0238}' // ȸ [LATIN SMALL LETTER DB DIGRAPH] + => Some("db"), + '\u{01C6}' | // dž [LATIN SMALL LETTER DZ WITH CARON] + '\u{01F3}' | // dz [LATIN SMALL LETTER DZ] + '\u{02A3}' | // ʣ [LATIN SMALL LETTER DZ DIGRAPH] + '\u{02A5}' // ʥ [LATIN SMALL LETTER DZ DIGRAPH WITH CURL] + => Some("dz"), + '\u{00C8}' | // È [LATIN CAPITAL LETTER E WITH GRAVE] + '\u{00C9}' | // É [LATIN CAPITAL LETTER E WITH ACUTE] + '\u{00CA}' | // Ê [LATIN CAPITAL LETTER E WITH CIRCUMFLEX] + '\u{00CB}' | // Ë [LATIN CAPITAL LETTER E WITH DIAERESIS] + '\u{0112}' | // Ē [LATIN CAPITAL LETTER E WITH MACRON] + '\u{0114}' | // Ĕ [LATIN CAPITAL LETTER E WITH BREVE] + '\u{0116}' | // Ė [LATIN CAPITAL LETTER E WITH DOT ABOVE] + '\u{0118}' | // Ę [LATIN CAPITAL LETTER E WITH OGONEK] + '\u{011A}' | // Ě [LATIN CAPITAL LETTER E WITH CARON] + '\u{018E}' | // Ǝ [LATIN CAPITAL LETTER REVERSED E] + '\u{0190}' | // Ɛ [LATIN CAPITAL LETTER OPEN E] + '\u{0204}' | // Ȅ [LATIN CAPITAL LETTER E WITH DOUBLE GRAVE] + '\u{0206}' | // Ȇ [LATIN CAPITAL LETTER E WITH INVERTED BREVE] + '\u{0228}' | // Ȩ [LATIN CAPITAL LETTER E WITH CEDILLA] + '\u{0246}' | // Ɇ [LATIN CAPITAL LETTER E WITH STROKE] + '\u{1D07}' | // ᴇ [LATIN LETTER SMALL CAPITAL E] + '\u{1E14}' | // Ḕ [LATIN CAPITAL LETTER E WITH MACRON AND GRAVE] + '\u{1E16}' | // Ḗ [LATIN CAPITAL LETTER E WITH MACRON AND ACUTE] + '\u{1E18}' | // Ḙ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW] + '\u{1E1A}' | // Ḛ [LATIN CAPITAL LETTER E WITH TILDE BELOW] + '\u{1E1C}' | // Ḝ [LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE] + '\u{1EB8}' | // Ẹ [LATIN CAPITAL LETTER E WITH DOT BELOW] + '\u{1EBA}' | // Ẻ [LATIN CAPITAL LETTER E WITH HOOK ABOVE] + '\u{1EBC}' | // Ẽ [LATIN CAPITAL LETTER E WITH TILDE] + '\u{1EBE}' | // Ế [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE] + '\u{1EC0}' | // Ề [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE] + '\u{1EC2}' | // Ể [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE] + '\u{1EC4}' | // Ễ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE] + '\u{1EC6}' | // Ệ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW] + '\u{24BA}' | // Ⓔ [CIRCLED LATIN CAPITAL LETTER E] + '\u{2C7B}' | // ⱻ [LATIN LETTER SMALL CAPITAL TURNED E] + '\u{FF25}' // E [FULLWIDTH LATIN CAPITAL LETTER E] + => Some("E"), + '\u{00E8}' | // è [LATIN SMALL LETTER E WITH GRAVE] + '\u{00E9}' | // é [LATIN SMALL LETTER E WITH ACUTE] + '\u{00EA}' | // ê [LATIN SMALL LETTER E WITH CIRCUMFLEX] + '\u{00EB}' | // ë [LATIN SMALL LETTER E WITH DIAERESIS] + '\u{0113}' | // ē [LATIN SMALL LETTER E WITH MACRON] + '\u{0115}' | // ĕ [LATIN SMALL LETTER E WITH BREVE] + '\u{0117}' | // ė [LATIN SMALL LETTER E WITH DOT ABOVE] + '\u{0119}' | // ę [LATIN SMALL LETTER E WITH OGONEK] + '\u{011B}' | // ě [LATIN SMALL LETTER E WITH CARON] + '\u{01DD}' | // ǝ [LATIN SMALL LETTER TURNED E] + '\u{0205}' | // ȅ [LATIN SMALL LETTER E WITH DOUBLE GRAVE] + '\u{0207}' | // ȇ [LATIN SMALL LETTER E WITH INVERTED BREVE] + '\u{0229}' | // ȩ [LATIN SMALL LETTER E WITH CEDILLA] + '\u{0247}' | // ɇ [LATIN SMALL LETTER E WITH STROKE] + '\u{0258}' | // ɘ [LATIN SMALL LETTER REVERSED E] + '\u{025B}' | // ɛ [LATIN SMALL LETTER OPEN E] + '\u{025C}' | // ɜ [LATIN SMALL LETTER REVERSED OPEN E] + '\u{025D}' | // ɝ [LATIN SMALL LETTER REVERSED OPEN E WITH HOOK] + '\u{025E}' | // ɞ [LATIN SMALL LETTER CLOSED REVERSED OPEN E] + '\u{029A}' | // ʚ [LATIN SMALL LETTER CLOSED OPEN E] + '\u{1D08}' | // ᴈ [LATIN SMALL LETTER TURNED OPEN E] + '\u{1D92}' | // ᶒ [LATIN SMALL LETTER E WITH RETROFLEX HOOK] + '\u{1D93}' | // ᶓ [LATIN SMALL LETTER OPEN E WITH RETROFLEX HOOK] + '\u{1D94}' | // ᶔ [LATIN SMALL LETTER REVERSED OPEN E WITH RETROFLEX HOOK] + '\u{1E15}' | // ḕ [LATIN SMALL LETTER E WITH MACRON AND GRAVE] + '\u{1E17}' | // ḗ [LATIN SMALL LETTER E WITH MACRON AND ACUTE] + '\u{1E19}' | // ḙ [LATIN SMALL LETTER E WITH CIRCUMFLEX BELOW] + '\u{1E1B}' | // ḛ [LATIN SMALL LETTER E WITH TILDE BELOW] + '\u{1E1D}' | // ḝ [LATIN SMALL LETTER E WITH CEDILLA AND BREVE] + '\u{1EB9}' | // ẹ [LATIN SMALL LETTER E WITH DOT BELOW] + '\u{1EBB}' | // ẻ [LATIN SMALL LETTER E WITH HOOK ABOVE] + '\u{1EBD}' | // ẽ [LATIN SMALL LETTER E WITH TILDE] + '\u{1EBF}' | // ế [LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE] + '\u{1EC1}' | // ề [LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE] + '\u{1EC3}' | // ể [LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE] + '\u{1EC5}' | // ễ [LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE] + '\u{1EC7}' | // ệ [LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW] + '\u{2091}' | // ₑ [LATIN SUBSCRIPT SMALL LETTER E] + '\u{24D4}' | // ⓔ [CIRCLED LATIN SMALL LETTER E] + '\u{2C78}' | // ⱸ [LATIN SMALL LETTER E WITH NOTCH] + '\u{FF45}' // e [FULLWIDTH LATIN SMALL LETTER E] + => Some("e"), + '\u{24A0}' // ⒠ [PARENTHESIZED LATIN SMALL LETTER E] + => Some("(e)"), + '\u{0191}' | // Ƒ [LATIN CAPITAL LETTER F WITH HOOK] + '\u{1E1E}' | // Ḟ [LATIN CAPITAL LETTER F WITH DOT ABOVE] + '\u{24BB}' | // Ⓕ [CIRCLED LATIN CAPITAL LETTER F] + '\u{A730}' | // ꜰ [LATIN LETTER SMALL CAPITAL F] + '\u{A77B}' | // Ꝼ [LATIN CAPITAL LETTER INSULAR F] + '\u{A7FB}' | // ꟻ [LATIN EPIGRAPHIC LETTER REVERSED F] + '\u{FF26}' // F [FULLWIDTH LATIN CAPITAL LETTER F] + => Some("F"), + '\u{0192}' | // ƒ [LATIN SMALL LETTER F WITH HOOK] + '\u{1D6E}' | // ᵮ [LATIN SMALL LETTER F WITH MIDDLE TILDE] + '\u{1D82}' | // ᶂ [LATIN SMALL LETTER F WITH PALATAL HOOK] + '\u{1E1F}' | // ḟ [LATIN SMALL LETTER F WITH DOT ABOVE] + '\u{1E9B}' | // ẛ [LATIN SMALL LETTER LONG S WITH DOT ABOVE] + '\u{24D5}' | // ⓕ [CIRCLED LATIN SMALL LETTER F] + '\u{A77C}' | // ꝼ [LATIN SMALL LETTER INSULAR F] + '\u{FF46}' // f [FULLWIDTH LATIN SMALL LETTER F] + => Some("f"), + '\u{24A1}' // ⒡ [PARENTHESIZED LATIN SMALL LETTER F] + => Some("(f)"), + '\u{FB00}' // ff [LATIN SMALL LIGATURE FF] + => Some("ff"), + '\u{FB03}' // ffi [LATIN SMALL LIGATURE FFI] + => Some("ffi"), + '\u{FB04}' // ffl [LATIN SMALL LIGATURE FFL] + => Some("ffl"), + '\u{FB01}' // fi [LATIN SMALL LIGATURE FI] + => Some("fi"), + '\u{FB02}' // fl [LATIN SMALL LIGATURE FL] + => Some("fl"), + '\u{011C}' | // Ĝ [LATIN CAPITAL LETTER G WITH CIRCUMFLEX] + '\u{011E}' | // Ğ [LATIN CAPITAL LETTER G WITH BREVE] + '\u{0120}' | // Ġ [LATIN CAPITAL LETTER G WITH DOT ABOVE] + '\u{0122}' | // Ģ [LATIN CAPITAL LETTER G WITH CEDILLA] + '\u{0193}' | // Ɠ [LATIN CAPITAL LETTER G WITH HOOK] + '\u{01E4}' | // Ǥ [LATIN CAPITAL LETTER G WITH STROKE] + '\u{01E5}' | // ǥ [LATIN SMALL LETTER G WITH STROKE] + '\u{01E6}' | // Ǧ [LATIN CAPITAL LETTER G WITH CARON] + '\u{01E7}' | // ǧ [LATIN SMALL LETTER G WITH CARON] + '\u{01F4}' | // Ǵ [LATIN CAPITAL LETTER G WITH ACUTE] + '\u{0262}' | // ɢ [LATIN LETTER SMALL CAPITAL G] + '\u{029B}' | // ʛ [LATIN LETTER SMALL CAPITAL G WITH HOOK] + '\u{1E20}' | // Ḡ [LATIN CAPITAL LETTER G WITH MACRON] + '\u{24BC}' | // Ⓖ [CIRCLED LATIN CAPITAL LETTER G] + '\u{A77D}' | // Ᵹ [LATIN CAPITAL LETTER INSULAR G] + '\u{A77E}' | // Ꝿ [LATIN CAPITAL LETTER TURNED INSULAR G] + '\u{FF27}' // G [FULLWIDTH LATIN CAPITAL LETTER G] + => Some("G"), + '\u{011D}' | // ĝ [LATIN SMALL LETTER G WITH CIRCUMFLEX] + '\u{011F}' | // ğ [LATIN SMALL LETTER G WITH BREVE] + '\u{0121}' | // ġ [LATIN SMALL LETTER G WITH DOT ABOVE] + '\u{0123}' | // ģ [LATIN SMALL LETTER G WITH CEDILLA] + '\u{01F5}' | // ǵ [LATIN SMALL LETTER G WITH ACUTE] + '\u{0260}' | // ɠ [LATIN SMALL LETTER G WITH HOOK] + '\u{0261}' | // ɡ [LATIN SMALL LETTER SCRIPT G] + '\u{1D77}' | // ᵷ [LATIN SMALL LETTER TURNED G] + '\u{1D79}' | // ᵹ [LATIN SMALL LETTER INSULAR G] + '\u{1D83}' | // ᶃ [LATIN SMALL LETTER G WITH PALATAL HOOK] + '\u{1E21}' | // ḡ [LATIN SMALL LETTER G WITH MACRON] + '\u{24D6}' | // ⓖ [CIRCLED LATIN SMALL LETTER G] + '\u{A77F}' | // ꝿ [LATIN SMALL LETTER TURNED INSULAR G] + '\u{FF47}' // g [FULLWIDTH LATIN SMALL LETTER G] + => Some("g"), + '\u{24A2}' // ⒢ [PARENTHESIZED LATIN SMALL LETTER G] + => Some("(g)"), + '\u{0124}' | // Ĥ [LATIN CAPITAL LETTER H WITH CIRCUMFLEX] + '\u{0126}' | // Ħ [LATIN CAPITAL LETTER H WITH STROKE] + '\u{021E}' | // Ȟ [LATIN CAPITAL LETTER H WITH CARON] + '\u{029C}' | // ʜ [LATIN LETTER SMALL CAPITAL H] + '\u{1E22}' | // Ḣ [LATIN CAPITAL LETTER H WITH DOT ABOVE] + '\u{1E24}' | // Ḥ [LATIN CAPITAL LETTER H WITH DOT BELOW] + '\u{1E26}' | // Ḧ [LATIN CAPITAL LETTER H WITH DIAERESIS] + '\u{1E28}' | // Ḩ [LATIN CAPITAL LETTER H WITH CEDILLA] + '\u{1E2A}' | // Ḫ [LATIN CAPITAL LETTER H WITH BREVE BELOW] + '\u{24BD}' | // Ⓗ [CIRCLED LATIN CAPITAL LETTER H] + '\u{2C67}' | // Ⱨ [LATIN CAPITAL LETTER H WITH DESCENDER] + '\u{2C75}' | // Ⱶ [LATIN CAPITAL LETTER HALF H] + '\u{FF28}' // H [FULLWIDTH LATIN CAPITAL LETTER H] + => Some("H"), + '\u{0125}' | // ĥ [LATIN SMALL LETTER H WITH CIRCUMFLEX] + '\u{0127}' | // ħ [LATIN SMALL LETTER H WITH STROKE] + '\u{021F}' | // ȟ [LATIN SMALL LETTER H WITH CARON] + '\u{0265}' | // ɥ [LATIN SMALL LETTER TURNED H] + '\u{0266}' | // ɦ [LATIN SMALL LETTER H WITH HOOK] + '\u{02AE}' | // ʮ [LATIN SMALL LETTER TURNED H WITH FISHHOOK] + '\u{02AF}' | // ʯ [LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL] + '\u{1E23}' | // ḣ [LATIN SMALL LETTER H WITH DOT ABOVE] + '\u{1E25}' | // ḥ [LATIN SMALL LETTER H WITH DOT BELOW] + '\u{1E27}' | // ḧ [LATIN SMALL LETTER H WITH DIAERESIS] + '\u{1E29}' | // ḩ [LATIN SMALL LETTER H WITH CEDILLA] + '\u{1E2B}' | // ḫ [LATIN SMALL LETTER H WITH BREVE BELOW] + '\u{1E96}' | // ẖ [LATIN SMALL LETTER H WITH LINE BELOW] + '\u{24D7}' | // ⓗ [CIRCLED LATIN SMALL LETTER H] + '\u{2C68}' | // ⱨ [LATIN SMALL LETTER H WITH DESCENDER] + '\u{2C76}' | // ⱶ [LATIN SMALL LETTER HALF H] + '\u{FF48}' // h [FULLWIDTH LATIN SMALL LETTER H] + => Some("h"), + '\u{01F6}' // Ƕ http://en.wikipedia.org/wiki/Hwair [LATIN CAPITAL LETTER HWAIR] + => Some("HV"), + '\u{24A3}' // ⒣ [PARENTHESIZED LATIN SMALL LETTER H] + => Some("(h)"), + '\u{0195}' // ƕ [LATIN SMALL LETTER HV] + => Some("hv"), + '\u{00CC}' | // Ì [LATIN CAPITAL LETTER I WITH GRAVE] + '\u{00CD}' | // Í [LATIN CAPITAL LETTER I WITH ACUTE] + '\u{00CE}' | // Î [LATIN CAPITAL LETTER I WITH CIRCUMFLEX] + '\u{00CF}' | // Ï [LATIN CAPITAL LETTER I WITH DIAERESIS] + '\u{0128}' | // Ĩ [LATIN CAPITAL LETTER I WITH TILDE] + '\u{012A}' | // Ī [LATIN CAPITAL LETTER I WITH MACRON] + '\u{012C}' | // Ĭ [LATIN CAPITAL LETTER I WITH BREVE] + '\u{012E}' | // Į [LATIN CAPITAL LETTER I WITH OGONEK] + '\u{0130}' | // İ [LATIN CAPITAL LETTER I WITH DOT ABOVE] + '\u{0196}' | // Ɩ [LATIN CAPITAL LETTER IOTA] + '\u{0197}' | // Ɨ [LATIN CAPITAL LETTER I WITH STROKE] + '\u{01CF}' | // Ǐ [LATIN CAPITAL LETTER I WITH CARON] + '\u{0208}' | // Ȉ [LATIN CAPITAL LETTER I WITH DOUBLE GRAVE] + '\u{020A}' | // Ȋ [LATIN CAPITAL LETTER I WITH INVERTED BREVE] + '\u{026A}' | // ɪ [LATIN LETTER SMALL CAPITAL I] + '\u{1D7B}' | // ᵻ [LATIN SMALL CAPITAL LETTER I WITH STROKE] + '\u{1E2C}' | // Ḭ [LATIN CAPITAL LETTER I WITH TILDE BELOW] + '\u{1E2E}' | // Ḯ [LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE] + '\u{1EC8}' | // Ỉ [LATIN CAPITAL LETTER I WITH HOOK ABOVE] + '\u{1ECA}' | // Ị [LATIN CAPITAL LETTER I WITH DOT BELOW] + '\u{24BE}' | // Ⓘ [CIRCLED LATIN CAPITAL LETTER I] + '\u{A7FE}' | // ꟾ [LATIN EPIGRAPHIC LETTER I LONGA] + '\u{FF29}' // I [FULLWIDTH LATIN CAPITAL LETTER I] + => Some("I"), + '\u{00EC}' | // ì [LATIN SMALL LETTER I WITH GRAVE] + '\u{00ED}' | // í [LATIN SMALL LETTER I WITH ACUTE] + '\u{00EE}' | // î [LATIN SMALL LETTER I WITH CIRCUMFLEX] + '\u{00EF}' | // ï [LATIN SMALL LETTER I WITH DIAERESIS] + '\u{0129}' | // ĩ [LATIN SMALL LETTER I WITH TILDE] + '\u{012B}' | // ī [LATIN SMALL LETTER I WITH MACRON] + '\u{012D}' | // ĭ [LATIN SMALL LETTER I WITH BREVE] + '\u{012F}' | // į [LATIN SMALL LETTER I WITH OGONEK] + '\u{0131}' | // ı [LATIN SMALL LETTER DOTLESS I] + '\u{01D0}' | // ǐ [LATIN SMALL LETTER I WITH CARON] + '\u{0209}' | // ȉ [LATIN SMALL LETTER I WITH DOUBLE GRAVE] + '\u{020B}' | // ȋ [LATIN SMALL LETTER I WITH INVERTED BREVE] + '\u{0268}' | // ɨ [LATIN SMALL LETTER I WITH STROKE] + '\u{1D09}' | // ᴉ [LATIN SMALL LETTER TURNED I] + '\u{1D62}' | // ᵢ [LATIN SUBSCRIPT SMALL LETTER I] + '\u{1D7C}' | // ᵼ [LATIN SMALL LETTER IOTA WITH STROKE] + '\u{1D96}' | // ᶖ [LATIN SMALL LETTER I WITH RETROFLEX HOOK] + '\u{1E2D}' | // ḭ [LATIN SMALL LETTER I WITH TILDE BELOW] + '\u{1E2F}' | // ḯ [LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE] + '\u{1EC9}' | // ỉ [LATIN SMALL LETTER I WITH HOOK ABOVE] + '\u{1ECB}' | // ị [LATIN SMALL LETTER I WITH DOT BELOW] + '\u{2071}' | // ⁱ [SUPERSCRIPT LATIN SMALL LETTER I] + '\u{24D8}' | // ⓘ [CIRCLED LATIN SMALL LETTER I] + '\u{FF49}' // i [FULLWIDTH LATIN SMALL LETTER I] + => Some("i"), + '\u{0132}' // IJ [LATIN CAPITAL LIGATURE IJ] + => Some("IJ"), + '\u{24A4}' // ⒤ [PARENTHESIZED LATIN SMALL LETTER I] + => Some("(i)"), + '\u{0133}' // ij [LATIN SMALL LIGATURE IJ] + => Some("ij"), + '\u{0134}' | // Ĵ [LATIN CAPITAL LETTER J WITH CIRCUMFLEX] + '\u{0248}' | // Ɉ [LATIN CAPITAL LETTER J WITH STROKE] + '\u{1D0A}' | // ᴊ [LATIN LETTER SMALL CAPITAL J] + '\u{24BF}' | // Ⓙ [CIRCLED LATIN CAPITAL LETTER J] + '\u{FF2A}' // J [FULLWIDTH LATIN CAPITAL LETTER J] + => Some("J"), + '\u{0135}' | // ĵ [LATIN SMALL LETTER J WITH CIRCUMFLEX] + '\u{01F0}' | // ǰ [LATIN SMALL LETTER J WITH CARON] + '\u{0237}' | // ȷ [LATIN SMALL LETTER DOTLESS J] + '\u{0249}' | // ɉ [LATIN SMALL LETTER J WITH STROKE] + '\u{025F}' | // ɟ [LATIN SMALL LETTER DOTLESS J WITH STROKE] + '\u{0284}' | // ʄ [LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK] + '\u{029D}' | // ʝ [LATIN SMALL LETTER J WITH CROSSED-TAIL] + '\u{24D9}' | // ⓙ [CIRCLED LATIN SMALL LETTER J] + '\u{2C7C}' | // ⱼ [LATIN SUBSCRIPT SMALL LETTER J] + '\u{FF4A}' // j [FULLWIDTH LATIN SMALL LETTER J] + => Some("j"), + '\u{24A5}' // ⒥ [PARENTHESIZED LATIN SMALL LETTER J] + => Some("(j)"), + '\u{0136}' | // Ķ [LATIN CAPITAL LETTER K WITH CEDILLA] + '\u{0198}' | // Ƙ [LATIN CAPITAL LETTER K WITH HOOK] + '\u{01E8}' | // Ǩ [LATIN CAPITAL LETTER K WITH CARON] + '\u{1D0B}' | // ᴋ [LATIN LETTER SMALL CAPITAL K] + '\u{1E30}' | // Ḱ [LATIN CAPITAL LETTER K WITH ACUTE] + '\u{1E32}' | // Ḳ [LATIN CAPITAL LETTER K WITH DOT BELOW] + '\u{1E34}' | // Ḵ [LATIN CAPITAL LETTER K WITH LINE BELOW] + '\u{24C0}' | // Ⓚ [CIRCLED LATIN CAPITAL LETTER K] + '\u{2C69}' | // Ⱪ [LATIN CAPITAL LETTER K WITH DESCENDER] + '\u{A740}' | // Ꝁ [LATIN CAPITAL LETTER K WITH STROKE] + '\u{A742}' | // Ꝃ [LATIN CAPITAL LETTER K WITH DIAGONAL STROKE] + '\u{A744}' | // Ꝅ [LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE] + '\u{FF2B}' // K [FULLWIDTH LATIN CAPITAL LETTER K] + => Some("K"), + '\u{0137}' | // ķ [LATIN SMALL LETTER K WITH CEDILLA] + '\u{0199}' | // ƙ [LATIN SMALL LETTER K WITH HOOK] + '\u{01E9}' | // ǩ [LATIN SMALL LETTER K WITH CARON] + '\u{029E}' | // ʞ [LATIN SMALL LETTER TURNED K] + '\u{1D84}' | // ᶄ [LATIN SMALL LETTER K WITH PALATAL HOOK] + '\u{1E31}' | // ḱ [LATIN SMALL LETTER K WITH ACUTE] + '\u{1E33}' | // ḳ [LATIN SMALL LETTER K WITH DOT BELOW] + '\u{1E35}' | // ḵ [LATIN SMALL LETTER K WITH LINE BELOW] + '\u{24DA}' | // ⓚ [CIRCLED LATIN SMALL LETTER K] + '\u{2C6A}' | // ⱪ [LATIN SMALL LETTER K WITH DESCENDER] + '\u{A741}' | // ꝁ [LATIN SMALL LETTER K WITH STROKE] + '\u{A743}' | // ꝃ [LATIN SMALL LETTER K WITH DIAGONAL STROKE] + '\u{A745}' | // ꝅ [LATIN SMALL LETTER K WITH STROKE AND DIAGONAL STROKE] + '\u{FF4B}' // k [FULLWIDTH LATIN SMALL LETTER K] + => Some("k"), + '\u{24A6}' // ⒦ [PARENTHESIZED LATIN SMALL LETTER K] + => Some("(k)"), + '\u{0139}' | // Ĺ [LATIN CAPITAL LETTER L WITH ACUTE] + '\u{013B}' | // Ļ [LATIN CAPITAL LETTER L WITH CEDILLA] + '\u{013D}' | // Ľ [LATIN CAPITAL LETTER L WITH CARON] + '\u{013F}' | // Ŀ [LATIN CAPITAL LETTER L WITH MIDDLE DOT] + '\u{0141}' | // Ł [LATIN CAPITAL LETTER L WITH STROKE] + '\u{023D}' | // Ƚ [LATIN CAPITAL LETTER L WITH BAR] + '\u{029F}' | // ʟ [LATIN LETTER SMALL CAPITAL L] + '\u{1D0C}' | // ᴌ [LATIN LETTER SMALL CAPITAL L WITH STROKE] + '\u{1E36}' | // Ḷ [LATIN CAPITAL LETTER L WITH DOT BELOW] + '\u{1E38}' | // Ḹ [LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON] + '\u{1E3A}' | // Ḻ [LATIN CAPITAL LETTER L WITH LINE BELOW] + '\u{1E3C}' | // Ḽ [LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW] + '\u{24C1}' | // Ⓛ [CIRCLED LATIN CAPITAL LETTER L] + '\u{2C60}' | // Ⱡ [LATIN CAPITAL LETTER L WITH DOUBLE BAR] + '\u{2C62}' | // Ɫ [LATIN CAPITAL LETTER L WITH MIDDLE TILDE] + '\u{A746}' | // Ꝇ [LATIN CAPITAL LETTER BROKEN L] + '\u{A748}' | // Ꝉ [LATIN CAPITAL LETTER L WITH HIGH STROKE] + '\u{A780}' | // Ꞁ [LATIN CAPITAL LETTER TURNED L] + '\u{FF2C}' // L [FULLWIDTH LATIN CAPITAL LETTER L] + => Some("L"), + '\u{013A}' | // ĺ [LATIN SMALL LETTER L WITH ACUTE] + '\u{013C}' | // ļ [LATIN SMALL LETTER L WITH CEDILLA] + '\u{013E}' | // ľ [LATIN SMALL LETTER L WITH CARON] + '\u{0140}' | // ŀ [LATIN SMALL LETTER L WITH MIDDLE DOT] + '\u{0142}' | // ł [LATIN SMALL LETTER L WITH STROKE] + '\u{019A}' | // ƚ [LATIN SMALL LETTER L WITH BAR] + '\u{0234}' | // ȴ [LATIN SMALL LETTER L WITH CURL] + '\u{026B}' | // ɫ [LATIN SMALL LETTER L WITH MIDDLE TILDE] + '\u{026C}' | // ɬ [LATIN SMALL LETTER L WITH BELT] + '\u{026D}' | // ɭ [LATIN SMALL LETTER L WITH RETROFLEX HOOK] + '\u{1D85}' | // ᶅ [LATIN SMALL LETTER L WITH PALATAL HOOK] + '\u{1E37}' | // ḷ [LATIN SMALL LETTER L WITH DOT BELOW] + '\u{1E39}' | // ḹ [LATIN SMALL LETTER L WITH DOT BELOW AND MACRON] + '\u{1E3B}' | // ḻ [LATIN SMALL LETTER L WITH LINE BELOW] + '\u{1E3D}' | // ḽ [LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW] + '\u{24DB}' | // ⓛ [CIRCLED LATIN SMALL LETTER L] + '\u{2C61}' | // ⱡ [LATIN SMALL LETTER L WITH DOUBLE BAR] + '\u{A747}' | // ꝇ [LATIN SMALL LETTER BROKEN L] + '\u{A749}' | // ꝉ [LATIN SMALL LETTER L WITH HIGH STROKE] + '\u{A781}' | // ꞁ [LATIN SMALL LETTER TURNED L] + '\u{FF4C}' // l [FULLWIDTH LATIN SMALL LETTER L] + => Some("l"), + '\u{01C7}' // LJ [LATIN CAPITAL LETTER LJ] + => Some("LJ"), + '\u{1EFA}' // Ỻ [LATIN CAPITAL LETTER MIDDLE-WELSH LL] + => Some("LL"), + '\u{01C8}' // Lj [LATIN CAPITAL LETTER L WITH SMALL LETTER J] + => Some("Lj"), + '\u{24A7}' // ⒧ [PARENTHESIZED LATIN SMALL LETTER L] + => Some("(l)"), + '\u{01C9}' // lj [LATIN SMALL LETTER LJ] + => Some("lj"), + '\u{1EFB}' // ỻ [LATIN SMALL LETTER MIDDLE-WELSH LL] + => Some("ll"), + '\u{02AA}' // ʪ [LATIN SMALL LETTER LS DIGRAPH] + => Some("ls"), + '\u{02AB}' // ʫ [LATIN SMALL LETTER LZ DIGRAPH] + => Some("lz"), + '\u{019C}' | // Ɯ [LATIN CAPITAL LETTER TURNED M] + '\u{1D0D}' | // ᴍ [LATIN LETTER SMALL CAPITAL M] + '\u{1E3E}' | // Ḿ [LATIN CAPITAL LETTER M WITH ACUTE] + '\u{1E40}' | // Ṁ [LATIN CAPITAL LETTER M WITH DOT ABOVE] + '\u{1E42}' | // Ṃ [LATIN CAPITAL LETTER M WITH DOT BELOW] + '\u{24C2}' | // Ⓜ [CIRCLED LATIN CAPITAL LETTER M] + '\u{2C6E}' | // Ɱ [LATIN CAPITAL LETTER M WITH HOOK] + '\u{A7FD}' | // ꟽ [LATIN EPIGRAPHIC LETTER INVERTED M] + '\u{A7FF}' | // ꟿ [LATIN EPIGRAPHIC LETTER ARCHAIC M] + '\u{FF2D}' // M [FULLWIDTH LATIN CAPITAL LETTER M] + => Some("M"), + '\u{026F}' | // ɯ [LATIN SMALL LETTER TURNED M] + '\u{0270}' | // ɰ [LATIN SMALL LETTER TURNED M WITH LONG LEG] + '\u{0271}' | // ɱ [LATIN SMALL LETTER M WITH HOOK] + '\u{1D6F}' | // ᵯ [LATIN SMALL LETTER M WITH MIDDLE TILDE] + '\u{1D86}' | // ᶆ [LATIN SMALL LETTER M WITH PALATAL HOOK] + '\u{1E3F}' | // ḿ [LATIN SMALL LETTER M WITH ACUTE] + '\u{1E41}' | // ṁ [LATIN SMALL LETTER M WITH DOT ABOVE] + '\u{1E43}' | // ṃ [LATIN SMALL LETTER M WITH DOT BELOW] + '\u{24DC}' | // ⓜ [CIRCLED LATIN SMALL LETTER M] + '\u{FF4D}' // m [FULLWIDTH LATIN SMALL LETTER M] + => Some("m"), + '\u{24A8}' // ⒨ [PARENTHESIZED LATIN SMALL LETTER M] + => Some("(m)"), + '\u{00D1}' | // Ñ [LATIN CAPITAL LETTER N WITH TILDE] + '\u{0143}' | // Ń [LATIN CAPITAL LETTER N WITH ACUTE] + '\u{0145}' | // Ņ [LATIN CAPITAL LETTER N WITH CEDILLA] + '\u{0147}' | // Ň [LATIN CAPITAL LETTER N WITH CARON] + '\u{014A}' | // Ŋ http://en.wikipedia.org/wiki/Eng_(letter) [LATIN CAPITAL LETTER ENG] + '\u{019D} |