diff options
author | Jonathan Slenders <jonathan@slenders.be> | 2014-09-20 15:56:12 +0200 |
---|---|---|
committer | Jonathan Slenders <jonathan@slenders.be> | 2014-09-20 15:56:12 +0200 |
commit | 7133f3e0f9c2a39be2906e86570e473089381283 (patch) | |
tree | 89d1f1712f09dec688267c882aad4d02bb84845c | |
parent | 578ffc772edc4cbde5c8e967f28410b32789552d (diff) |
Ship latest wcwidth library + also add wcwidth to requirements. (Prefer version from Pip)
-rw-r--r-- | prompt_toolkit/libs/wcwidth/__init__.py | 10 | ||||
-rw-r--r-- | prompt_toolkit/libs/wcwidth/table_comb.py | 135 | ||||
-rw-r--r-- | prompt_toolkit/libs/wcwidth/table_wide.py | 40 | ||||
-rw-r--r-- | prompt_toolkit/libs/wcwidth/wcwidth.py | 188 | ||||
-rw-r--r-- | prompt_toolkit/renderer.py | 7 | ||||
-rw-r--r-- | setup.py | 10 |
6 files changed, 383 insertions, 7 deletions
diff --git a/prompt_toolkit/libs/wcwidth/__init__.py b/prompt_toolkit/libs/wcwidth/__init__.py new file mode 100644 index 00000000..0da99b01 --- /dev/null +++ b/prompt_toolkit/libs/wcwidth/__init__.py @@ -0,0 +1,10 @@ +" wcwidth module, https://github.com/jquast/wcwidth " +from .wcwidth import ( + wcwidth, + wcswidth, +) + +__all__ = [ + 'wcwidth', + 'wcswidth', +] diff --git a/prompt_toolkit/libs/wcwidth/table_comb.py b/prompt_toolkit/libs/wcwidth/table_comb.py new file mode 100644 index 00000000..94eccfc9 --- /dev/null +++ b/prompt_toolkit/libs/wcwidth/table_comb.py @@ -0,0 +1,135 @@ +# Generated: 2014-05-05T02:09:32.625406+00:00 +# Source: DerivedCombiningClass-6.3.0.txt +# Date: 2013-07-05, 14:08:44 GMT [MD] +NONZERO_COMBINING = ( + (0x0300, 0x034e,), # Combining Grave Accent ..Combining Upwards Arrow + (0x0350, 0x036f,), # Combining Right Arrowhea..Combining Latin Small Le + (0x0483, 0x0487,), # Combining Cyrillic Titlo..Combining Cyrillic Pokry + (0x0591, 0x05bd,), # Hebrew Accent Etnahta ..Hebrew Point Meteg + (0x05bf, 0x05bf,), # Hebrew Point Rafe ..Hebrew Point Rafe + (0x05c1, 0x05c2,), # Hebrew Point Shin Dot ..Hebrew Point Sin Dot + (0x05c4, 0x05c5,), # Hebrew Mark Upper Dot ..Hebrew Mark Lower Dot + (0x05c7, 0x05c7,), # Hebrew Point Qamats Qata..Hebrew Point Qamats Qata + (0x0610, 0x061a,), # Arabic Sign Sallallahou ..Arabic Small Kasra + (0x064b, 0x065f,), # Arabic Fathatan ..Arabic Wavy Hamza Below + (0x0670, 0x0670,), # Arabic Letter Superscrip..Arabic Letter Superscrip + (0x06d6, 0x06dc,), # Arabic Small High Ligatu..Arabic Small High Seen + (0x06df, 0x06e4,), # Arabic Small High Rounde..Arabic Small High Madda + (0x06e7, 0x06e8,), # Arabic Small High Yeh ..Arabic Small High Noon + (0x06ea, 0x06ed,), # Arabic Empty Centre Low ..Arabic Small Low Meem + (0x0711, 0x0711,), # Syriac Letter Superscrip..Syriac Letter Superscrip + (0x0730, 0x074a,), # Syriac Pthaha Above ..Syriac Barrekh + (0x07eb, 0x07f3,), # Nko Combining Short High..Nko Combining Double Dot + (0x0816, 0x0819,), # Samaritan Mark In ..Samaritan Mark Dagesh + (0x081b, 0x0823,), # Samaritan Mark Epentheti..Samaritan Vowel Sign A + (0x0825, 0x0827,), # Samaritan Vowel Sign Sho..Samaritan Vowel Sign U + (0x0829, 0x082d,), # Samaritan Vowel Sign Lon..Samaritan Mark Nequdaa + (0x0859, 0x085b,), # Mandaic Affrication Mark..Mandaic Gemination Mark + (0x08e4, 0x08fe,), # Arabic Curly Fatha ..Arabic Damma With Dot + (0x093c, 0x093c,), # Devanagari Sign Nukta ..Devanagari Sign Nukta + (0x094d, 0x094d,), # Devanagari Sign Virama ..Devanagari Sign Virama + (0x0951, 0x0954,), # Devanagari Stress Sign U..Devanagari Acute Accent + (0x09bc, 0x09bc,), # Bengali Sign Nukta ..Bengali Sign Nukta + (0x09cd, 0x09cd,), # Bengali Sign Virama ..Bengali Sign Virama + (0x0a3c, 0x0a3c,), # Gurmukhi Sign Nukta ..Gurmukhi Sign Nukta + (0x0a4d, 0x0a4d,), # Gurmukhi Sign Virama ..Gurmukhi Sign Virama + (0x0abc, 0x0abc,), # Gujarati Sign Nukta ..Gujarati Sign Nukta + (0x0acd, 0x0acd,), # Gujarati Sign Virama ..Gujarati Sign Virama + (0x0b3c, 0x0b3c,), # Oriya Sign Nukta ..Oriya Sign Nukta + (0x0b4d, 0x0b4d,), # Oriya Sign Virama ..Oriya Sign Virama + (0x0bcd, 0x0bcd,), # Tamil Sign Virama ..Tamil Sign Virama + (0x0c4d, 0x0c4d,), # Telugu Sign Virama ..Telugu Sign Virama + (0x0c55, 0x0c56,), # Telugu Length Mark ..Telugu Ai Length Mark + (0x0cbc, 0x0cbc,), # Kannada Sign Nukta ..Kannada Sign Nukta + (0x0ccd, 0x0ccd,), # Kannada Sign Virama ..Kannada Sign Virama + (0x0d4d, 0x0d4d,), # Malayalam Sign Virama ..Malayalam Sign Virama + (0x0dca, 0x0dca,), # Sinhala Sign Al-lakuna ..Sinhala Sign Al-lakuna + (0x0e38, 0x0e3a,), # Thai Character Sara U ..Thai Character Phinthu + (0x0e48, 0x0e4b,), # Thai Character Mai Ek ..Thai Character Mai Chatt + (0x0eb8, 0x0eb9,), # Lao Vowel Sign U ..Lao Vowel Sign Uu + (0x0ec8, 0x0ecb,), # Lao Tone Mai Ek ..Lao Tone Mai Catawa + (0x0f18, 0x0f19,), # Tibetan Astrological Sig..Tibetan Astrological Sig + (0x0f35, 0x0f35,), # Tibetan Mark Ngas Bzung ..Tibetan Mark Ngas Bzung + (0x0f37, 0x0f37,), # Tibetan Mark Ngas Bzung ..Tibetan Mark Ngas Bzung + (0x0f39, 0x0f39,), # Tibetan Mark Tsa -phru ..Tibetan Mark Tsa -phru + (0x0f71, 0x0f72,), # Tibetan Vowel Sign Aa ..Tibetan Vowel Sign I + (0x0f74, 0x0f74,), # Tibetan Vowel Sign U ..Tibetan Vowel Sign U + (0x0f7a, 0x0f7d,), # Tibetan Vowel Sign E ..Tibetan Vowel Sign Oo + (0x0f80, 0x0f80,), # Tibetan Vowel Sign Rever..Tibetan Vowel Sign Rever + (0x0f82, 0x0f84,), # Tibetan Sign Nyi Zla Naa..Tibetan Mark Halanta + (0x0f86, 0x0f87,), # Tibetan Sign Lci Rtags ..Tibetan Sign Yang Rtags + (0x0fc6, 0x0fc6,), # Tibetan Symbol Padma Gda..Tibetan Symbol Padma Gda + (0x1037, 0x1037,), # Myanmar Sign Dot Below ..Myanmar Sign Dot Below + (0x1039, 0x103a,), # Myanmar Sign Virama ..Myanmar Sign Asat + (0x108d, 0x108d,), # Myanmar Sign Shan Counci..Myanmar Sign Shan Counci + (0x135d, 0x135f,), # Ethiopic Combining Gemin..Ethiopic Combining Gemin + (0x1714, 0x1714,), # Tagalog Sign Virama ..Tagalog Sign Virama + (0x1734, 0x1734,), # Hanunoo Sign Pamudpod ..Hanunoo Sign Pamudpod + (0x17d2, 0x17d2,), # Khmer Sign Coeng ..Khmer Sign Coeng + (0x17dd, 0x17dd,), # Khmer Sign Atthacan ..Khmer Sign Atthacan + (0x18a9, 0x18a9,), # Mongolian Letter Ali Gal..Mongolian Letter Ali Gal + (0x1939, 0x193b,), # Limbu Sign Mukphreng ..Limbu Sign Sa-i + (0x1a17, 0x1a18,), # Buginese Vowel Sign I ..Buginese Vowel Sign U + (0x1a60, 0x1a60,), # Tai Tham Sign Sakot ..Tai Tham Sign Sakot + (0x1a75, 0x1a7c,), # Tai Tham Sign Tone-1 ..Tai Tham Sign Khuen-lue + (0x1a7f, 0x1a7f,), # Tai Tham Combining Crypt..Tai Tham Combining Crypt + (0x1b34, 0x1b34,), # Balinese Sign Rerekan ..Balinese Sign Rerekan + (0x1b44, 0x1b44,), # Balinese Adeg Adeg ..Balinese Adeg Adeg + (0x1b6b, 0x1b73,), # Balinese Musical Symbol ..Balinese Musical Symbol + (0x1baa, 0x1bab,), # Sundanese Sign Pamaaeh ..Sundanese Sign Virama + (0x1be6, 0x1be6,), # Batak Sign Tompi ..Batak Sign Tompi + (0x1bf2, 0x1bf3,), # Batak Pangolat ..Batak Panongonan + (0x1c37, 0x1c37,), # Lepcha Sign Nukta ..Lepcha Sign Nukta + (0x1cd0, 0x1cd2,), # Vedic Tone Karshana ..Vedic Tone Prenkha + (0x1cd4, 0x1ce0,), # Vedic Sign Yajurvedic Mi..Vedic Tone Rigvedic Kash + (0x1ce2, 0x1ce8,), # Vedic Sign Visarga Svari..Vedic Sign Visarga Anuda + (0x1ced, 0x1ced,), # Vedic Sign Tiryak ..Vedic Sign Tiryak + (0x1cf4, 0x1cf4,), # Vedic Tone Candra Above ..Vedic Tone Candra Above + (0x1dc0, 0x1de6,), # Combining Dotted Grave A..Combining Latin Small Le + (0x1dfc, 0x1dff,), # Combining Double Inverte..Combining Right Arrowhea + (0x20d0, 0x20dc,), # Combining Left Harpoon A..Combining Four Dots Abov + (0x20e1, 0x20e1,), # Combining Left Right Arr..Combining Left Right Arr + (0x20e5, 0x20f0,), # Combining Reverse Solidu..Combining Asterisk Above + (0x2cef, 0x2cf1,), # Coptic Combining Ni Abov..Coptic Combining Spiritu + (0x2d7f, 0x2d7f,), # Tifinagh Consonant Joine..Tifinagh Consonant Joine + (0x2de0, 0x2dff,), # Combining Cyrillic Lette..Combining Cyrillic Lette + (0x302a, 0x302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M + (0x3099, 0x309a,), # Combining Katakana-hirag..Combining Katakana-hirag + (0xa66f, 0xa66f,), # Combining Cyrillic Vzmet..Combining Cyrillic Vzmet + (0xa674, 0xa67d,), # Combining Cyrillic Lette..Combining Cyrillic Payer + (0xa69f, 0xa69f,), # Combining Cyrillic Lette..Combining Cyrillic Lette + (0xa6f0, 0xa6f1,), # Bamum Combining Mark Koq..Bamum Combining Mark Tuk + (0xa806, 0xa806,), # Syloti Nagri Sign Hasant..Syloti Nagri Sign Hasant + (0xa8c4, 0xa8c4,), # Saurashtra Sign Virama ..Saurashtra Sign Virama + (0xa8e0, 0xa8f1,), # Combining Devanagari Dig..Combining Devanagari Sig + (0xa92b, 0xa92d,), # Kayah Li Tone Plophu ..Kayah Li Tone Calya Plop + (0xa953, 0xa953,), # Rejang Virama ..Rejang Virama + (0xa9b3, 0xa9b3,), # Javanese Sign Cecak Telu..Javanese Sign Cecak Telu + (0xa9c0, 0xa9c0,), # Javanese Pangkon ..Javanese Pangkon + (0xaab0, 0xaab0,), # Tai Viet Mai Kang ..Tai Viet Mai Kang + (0xaab2, 0xaab4,), # Tai Viet Vowel I ..Tai Viet Vowel U + (0xaab7, 0xaab8,), # Tai Viet Mai Khit ..Tai Viet Vowel Ia + (0xaabe, 0xaabf,), # Tai Viet Vowel Am ..Tai Viet Tone Mai Ek + (0xaac1, 0xaac1,), # Tai Viet Tone Mai Tho ..Tai Viet Tone Mai Tho + (0xaaf6, 0xaaf6,), # Meetei Mayek Virama ..Meetei Mayek Virama + (0xabed, 0xabed,), # Meetei Mayek Apun Iyek ..Meetei Mayek Apun Iyek + (0xfb1e, 0xfb1e,), # Hebrew Point Judeo-spani..Hebrew Point Judeo-spani + (0xfe20, 0xfe26,), # Combining Ligature Left ..Combining Conjoining Mac + (0x101fd, 0x101fd,), # Phaistos Disc Sign Combi..Phaistos Disc Sign Combi + (0x10a0d, 0x10a0d,), # Kharoshthi Sign Double R..Kharoshthi Sign Double R + (0x10a0f, 0x10a0f,), # Kharoshthi Sign Visarga ..Kharoshthi Sign Visarga + (0x10a38, 0x10a3a,), # Kharoshthi Sign Bar Abov..Kharoshthi Sign Dot Belo + (0x10a3f, 0x10a3f,), # Kharoshthi Virama ..Kharoshthi Virama + (0x11046, 0x11046,), # Brahmi Virama ..Brahmi Virama + (0x110b9, 0x110ba,), # Kaithi Sign Virama ..Kaithi Sign Nukta + (0x11100, 0x11102,), # Chakma Sign Candrabindu ..Chakma Sign Visarga + (0x11133, 0x11134,), # Chakma Virama ..Chakma Maayyaa + (0x111c0, 0x111c0,), # Sharada Sign Virama ..Sharada Sign Virama + (0x116b6, 0x116b7,), # Takri Sign Virama ..Takri Sign Nukta + (0x1d165, 0x1d169,), # Musical Symbol Combining..Musical Symbol Combining + (0x1d16d, 0x1d172,), # Musical Symbol Combining..Musical Symbol Combining + (0x1d17b, 0x1d182,), # Musical Symbol Combining..Musical Symbol Combining + (0x1d185, 0x1d18b,), # Musical Symbol Combining..Musical Symbol Combining + (0x1d1aa, 0x1d1ad,), # Musical Symbol Combining..Musical Symbol Combining + (0x1d242, 0x1d244,), # Combining Greek Musical ..Combining Greek Musical +) diff --git a/prompt_toolkit/libs/wcwidth/table_wide.py b/prompt_toolkit/libs/wcwidth/table_wide.py new file mode 100644 index 00000000..4a5af71c --- /dev/null +++ b/prompt_toolkit/libs/wcwidth/table_wide.py @@ -0,0 +1,40 @@ +# Generated: 2014-05-05T02:09:32.610884+00:00 +# Source: EastAsianWidth-6.3.0.txt +# Date: 2013-02-05, 20:09:00 GMT [KW, LI] +WIDE_EASTASIAN = ( + (0x1100, 0x115f,), # Hangul Choseong Kiyeok ..Hangul Choseong Filler + (0x2329, 0x232a,), # Left-pointing Angle Brac..Right-pointing Angle Bra + (0x2e80, 0x2e99,), # Cjk Radical Repeat ..Cjk Radical Rap + (0x2e9b, 0x2ef3,), # Cjk Radical Choke ..Cjk Radical C-simplified + (0x2f00, 0x2fd5,), # Kangxi Radical One ..Kangxi Radical Flute + (0x2ff0, 0x2ffb,), # Ideographic Description ..Ideographic Description + (0x3000, 0x303e,), # Ideographic Space ..Ideographic Variation In + (0x3041, 0x3096,), # Hiragana Letter Small A ..Hiragana Letter Small Ke + (0x3099, 0x30ff,), # Combining Katakana-hirag..Katakana Digraph Koto + (0x3105, 0x312d,), # Bopomofo Letter B ..Bopomofo Letter Ih + (0x3131, 0x318e,), # Hangul Letter Kiyeok ..Hangul Letter Araeae + (0x3190, 0x31ba,), # Ideographic Annotation L..Bopomofo Letter Zy + (0x31c0, 0x31e3,), # Cjk Stroke T ..Cjk Stroke Q + (0x31f0, 0x321e,), # Katakana Letter Small Ku..Parenthesized Korean Cha + (0x3220, 0x3247,), # Parenthesized Ideograph ..Circled Ideograph Koto + (0x3250, 0x32fe,), # Partnership Sign ..Circled Katakana Wo + (0x3300, 0x4dbf,), # Square Apaato .. + (0x4e00, 0xa48c,), # Cjk Unified Ideograph-4e..Yi Syllable Yyr + (0xa490, 0xa4c6,), # Yi Radical Qot ..Yi Radical Ke + (0xa960, 0xa97c,), # Hangul Choseong Tikeut-m..Hangul Choseong Ssangyeo + (0xac00, 0xd7a3,), # Hangul Syllable Ga ..Hangul Syllable Hih + (0xf900, 0xfaff,), # Cjk Compatibility Ideogr.. + (0xfe10, 0xfe19,), # Presentation Form For Ve..Presentation Form For Ve + (0xfe30, 0xfe52,), # Presentation Form For Ve..Small Full Stop + (0xfe54, 0xfe66,), # Small Semicolon ..Small Equals Sign + (0xfe68, 0xfe6b,), # Small Reverse Solidus ..Small Commercial At + (0xff01, 0xff60,), # Fullwidth Exclamation Ma..Fullwidth Right White Pa + (0xffe0, 0xffe6,), # Fullwidth Cent Sign ..Fullwidth Won Sign + (0x1b000, 0x1b001,), # Katakana Letter Archaic ..Hiragana Letter Archaic + (0x1f200, 0x1f202,), # Square Hiragana Hoka ..Squared Katakana Sa + (0x1f210, 0x1f23a,), # Squared Cjk Unified Ideo..Squared Cjk Unified Ideo + (0x1f240, 0x1f248,), # Tortoise Shell Bracketed..Tortoise Shell Bracketed + (0x1f250, 0x1f251,), # Circled Ideograph Advant..Circled Ideograph Accept + (0x20000, 0x2fffd,), # Cjk Unified Ideograph-20.. + (0x30000, 0x3fffd,), # (nil) .. +) diff --git a/prompt_toolkit/libs/wcwidth/wcwidth.py b/prompt_toolkit/libs/wcwidth/wcwidth.py new file mode 100644 index 00000000..1df34b19 --- /dev/null +++ b/prompt_toolkit/libs/wcwidth/wcwidth.py @@ -0,0 +1,188 @@ +""" +This is an implementation of wcwidth() and wcswidth() (defined in +IEEE Std 1002.1-2001) for Unicode. + +https://github.com/jquast/wcwidth +""" +# from Markus Kuhn's C code at: +# +# http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c +# +# This is an implementation of wcwidth() and wcswidth() (defined in +# IEEE Std 1002.1-2001) for Unicode. +# +# http://www.opengroup.org/onlinepubs/007904975/functions/wcwidth.html +# http://www.opengroup.org/onlinepubs/007904975/functions/wcswidth.html +# +# In fixed-width output devices, Latin characters all occupy a single +# "cell" position of equal width, whereas ideographic CJK characters +# occupy two such cells. Interoperability between terminal-line +# applications and (teletype-style) character terminals using the +# UTF-8 encoding requires agreement on which character should advance +# the cursor by how many cell positions. No established formal +# standards exist at present on which Unicode character shall occupy +# how many cell positions on character terminals. These routines are +# a first attempt of defining such behavior based on simple rules +# applied to data provided by the Unicode Consortium. +# +# For some graphical characters, the Unicode standard explicitly +# defines a character-cell width via the definition of the East Asian +# FullWidth (F), Wide (W), Half-width (H), and Narrow (Na) classes. +# In all these cases, there is no ambiguity about which width a +# terminal shall use. For characters in the East Asian Ambiguous (A) +# class, the width choice depends purely on a preference of backward +# compatibility with either historic CJK or Western practice. +# Choosing single-width for these characters is easy to justify as +# the appropriate long-term solution, as the CJK practice of +# displaying these characters as double-width comes from historic +# implementation simplicity (8-bit encoded characters were displayed +# single-width and 16-bit ones double-width, even for Greek, +# Cyrillic, etc.) and not any typographic considerations. +# +# Much less clear is the choice of width for the Not East Asian +# (Neutral) class. Existing practice does not dictate a width for any +# of these characters. It would nevertheless make sense +# typographically to allocate two character cells to characters such +# as for instance EM SPACE or VOLUME INTEGRAL, which cannot be +# represented adequately with a single-width glyph. The following +# routines at present merely assign a single-cell width to all +# neutral characters, in the interest of simplicity. This is not +# entirely satisfactory and should be reconsidered before +# establishing a formal standard in this area. At the moment, the +# decision which Not East Asian (Neutral) characters should be +# represented by double-width glyphs cannot yet be answered by +# applying a simple rule from the Unicode database content. Setting +# up a proper standard for the behavior of UTF-8 character terminals +# will require a careful analysis not only of each Unicode character, +# but also of each presentation form, something the author of these +# routines has avoided to do so far. +# +# http://www.unicode.org/unicode/reports/tr11/ +# +# Markus Kuhn -- 2007-05-26 (Unicode 5.0) +# +# Permission to use, copy, modify, and distribute this software +# for any purpose and without fee is hereby granted. The author +# disclaims all warranties with regard to this software. +# +# Latest version: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c + +from __future__ import division +from .table_wide import WIDE_EASTASIAN +from .table_comb import NONZERO_COMBINING + + +def _bisearch(ucs, table): + " auxiliary function for binary search in interval table " + lbound = 0 + ubound = len(table) - 1 + + if ucs < table[0][0] or ucs > table[ubound][1]: + return 0 + while ubound >= lbound: + mid = (lbound + ubound) // 2 + if ucs > table[mid][1]: + lbound = mid + 1 + elif ucs < table[mid][0]: + ubound = mid - 1 + else: + return 1 + + return 0 + + +def wcwidth(wc): + """wcwidth(wc) -> int + + The wcwidth() function returns 0 if the wc argument has no printable effect + on a terminal (such as NUL '\0'), -1 if wc is not printable, or has an + indeterminate effect on the terminal (control or combining). Otherwise, + the number of column positions the character occupies on a graphic terminal + (1 or 2). + + The following have a column width of -1: + + - Non-spacing and enclosing combining characters (general + category code Mn or Me in the Unicode database). Generally, + having a non-zero value returned by ``unicodedata.combining()``. + + - C0 control characters (U+001 through U+01F). + + - C1 control characters and DEL (U+07F through U+0A0). + + The following have a column width of 0: + + - NULL (U+0000, 0). + + - COMBINING GRAPHEME JOINER (U+034F). + + - ZERO WIDTH SPACE (U+200B) through + RIGHT-TO-LEFT MARK (U+200F). + + - LINE SEPERATOR (U+2028) and + PARAGRAPH SEPERATOR (U+2029). + + - LEFT-TO-RIGHT EMBEDDING (U+202A) through + RIGHT-TO-LEFT OVERRIDE (U+202E). + + - WORD JOINER (U+2060) through + INVISIBLE SEPARATOR (U+2063). + + The following have a column width of 1: + + - SOFT HYPHEN (U+00AD) has a column width of 1. + + - All remaining characters (including all printable + ISO 8859-1 and WGL4 characters, Unicode control characters, + etc.) have a column width of 1. + + The following have a column width of 2: + + - Spacing characters in the East Asian Wide (W) or East Asian + Full-width (F) category as defined in Unicode Technical + Report #11 have a column width of 2. + """ + ucs = ord(wc) + + # NOTE: created by hand, there isn't anything identifiable other than + # general Cf category code to identify these, and some characters in Cf + # category code are of non-zero width. + if (0 == ucs or + 0x034F == ucs or + 0x200B <= ucs <= 0x200F or + 0x2028 == ucs or + 0x2029 == ucs or + 0x202A <= ucs <= 0x202E or + 0x2060 <= ucs <= 0x2063): + return 0 + + # C0/C1 control characters + if ucs < 32 or 0x07F <= ucs < 0x0A0: + return -1 + + # combining characters have indeterminate effects unless + # combined with additional characters. + if _bisearch(ucs, NONZERO_COMBINING): + return -1 + + return 1 + _bisearch(ucs, WIDE_EASTASIAN) + + +def wcswidth(pwcs, n=None): + + """ + Return the width in character cells of the first ``n`` unicode string pwcs, + or -1 if a non-printable character is encountered. When ``n`` is None + (default), return the length of the entire string. + """ + + end = len(pwcs) if n is not None else n + idx = slice(0, end) + width = 0 + for char in pwcs[idx]: + wcw = wcwidth(char) + if wcw < 0: + return -1 + else: + width += wcw + return width diff --git a/prompt_toolkit/renderer.py b/prompt_toolkit/renderer.py index a4e7643a..cf98b345 100644 --- a/prompt_toolkit/renderer.py +++ b/prompt_toolkit/renderer.py @@ -8,13 +8,18 @@ import six import errno from .utils import get_size -from .libs.wcwidth import wcwidth from collections import defaultdict, namedtuple from pygments.formatters.terminal256 import Terminal256Formatter, EscapeSequence from pygments.style import Style from pygments.token import Token +try: + from wcwidth import wcwidth +except ImportError: + from .libs.wcwidth import wcwidth + + # Global variable to keep the colour table in memory. _tf = Terminal256Formatter() @@ -13,13 +13,11 @@ setup( long_description='', packages=find_packages('.'), install_requires = [ - 'pygments', 'docopt', 'six', - - # TODO: add wcwidth when released and stable on pypi - # 'wcwidth', - - # Required for the Python repl + 'docopt', 'jedi', + 'pygments', + 'six', + 'wcwidth', ], scripts = [ 'bin/ptpython', |