summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--prompt_toolkit/libs/wcwidth/__init__.py10
-rw-r--r--prompt_toolkit/libs/wcwidth/table_comb.py135
-rw-r--r--prompt_toolkit/libs/wcwidth/table_wide.py40
-rw-r--r--prompt_toolkit/libs/wcwidth/wcwidth.py188
-rw-r--r--prompt_toolkit/renderer.py7
-rw-r--r--setup.py10
6 files changed, 383 insertions, 7 deletions
diff --git a/prompt_toolkit/libs/wcwidth/__init__.py b/prompt_toolkit/libs/wcwidth/__init__.py
new file mode 100644
index 00000000..0da99b01
--- /dev/null
+++ b/prompt_toolkit/libs/wcwidth/__init__.py
@@ -0,0 +1,10 @@
+" wcwidth module, https://github.com/jquast/wcwidth "
+from .wcwidth import (
+ wcwidth,
+ wcswidth,
+)
+
+__all__ = [
+ 'wcwidth',
+ 'wcswidth',
+]
diff --git a/prompt_toolkit/libs/wcwidth/table_comb.py b/prompt_toolkit/libs/wcwidth/table_comb.py
new file mode 100644
index 00000000..94eccfc9
--- /dev/null
+++ b/prompt_toolkit/libs/wcwidth/table_comb.py
@@ -0,0 +1,135 @@
+# Generated: 2014-05-05T02:09:32.625406+00:00
+# Source: DerivedCombiningClass-6.3.0.txt
+# Date: 2013-07-05, 14:08:44 GMT [MD]
+NONZERO_COMBINING = (
+ (0x0300, 0x034e,), # Combining Grave Accent ..Combining Upwards Arrow
+ (0x0350, 0x036f,), # Combining Right Arrowhea..Combining Latin Small Le
+ (0x0483, 0x0487,), # Combining Cyrillic Titlo..Combining Cyrillic Pokry
+ (0x0591, 0x05bd,), # Hebrew Accent Etnahta ..Hebrew Point Meteg
+ (0x05bf, 0x05bf,), # Hebrew Point Rafe ..Hebrew Point Rafe
+ (0x05c1, 0x05c2,), # Hebrew Point Shin Dot ..Hebrew Point Sin Dot
+ (0x05c4, 0x05c5,), # Hebrew Mark Upper Dot ..Hebrew Mark Lower Dot
+ (0x05c7, 0x05c7,), # Hebrew Point Qamats Qata..Hebrew Point Qamats Qata
+ (0x0610, 0x061a,), # Arabic Sign Sallallahou ..Arabic Small Kasra
+ (0x064b, 0x065f,), # Arabic Fathatan ..Arabic Wavy Hamza Below
+ (0x0670, 0x0670,), # Arabic Letter Superscrip..Arabic Letter Superscrip
+ (0x06d6, 0x06dc,), # Arabic Small High Ligatu..Arabic Small High Seen
+ (0x06df, 0x06e4,), # Arabic Small High Rounde..Arabic Small High Madda
+ (0x06e7, 0x06e8,), # Arabic Small High Yeh ..Arabic Small High Noon
+ (0x06ea, 0x06ed,), # Arabic Empty Centre Low ..Arabic Small Low Meem
+ (0x0711, 0x0711,), # Syriac Letter Superscrip..Syriac Letter Superscrip
+ (0x0730, 0x074a,), # Syriac Pthaha Above ..Syriac Barrekh
+ (0x07eb, 0x07f3,), # Nko Combining Short High..Nko Combining Double Dot
+ (0x0816, 0x0819,), # Samaritan Mark In ..Samaritan Mark Dagesh
+ (0x081b, 0x0823,), # Samaritan Mark Epentheti..Samaritan Vowel Sign A
+ (0x0825, 0x0827,), # Samaritan Vowel Sign Sho..Samaritan Vowel Sign U
+ (0x0829, 0x082d,), # Samaritan Vowel Sign Lon..Samaritan Mark Nequdaa
+ (0x0859, 0x085b,), # Mandaic Affrication Mark..Mandaic Gemination Mark
+ (0x08e4, 0x08fe,), # Arabic Curly Fatha ..Arabic Damma With Dot
+ (0x093c, 0x093c,), # Devanagari Sign Nukta ..Devanagari Sign Nukta
+ (0x094d, 0x094d,), # Devanagari Sign Virama ..Devanagari Sign Virama
+ (0x0951, 0x0954,), # Devanagari Stress Sign U..Devanagari Acute Accent
+ (0x09bc, 0x09bc,), # Bengali Sign Nukta ..Bengali Sign Nukta
+ (0x09cd, 0x09cd,), # Bengali Sign Virama ..Bengali Sign Virama
+ (0x0a3c, 0x0a3c,), # Gurmukhi Sign Nukta ..Gurmukhi Sign Nukta
+ (0x0a4d, 0x0a4d,), # Gurmukhi Sign Virama ..Gurmukhi Sign Virama
+ (0x0abc, 0x0abc,), # Gujarati Sign Nukta ..Gujarati Sign Nukta
+ (0x0acd, 0x0acd,), # Gujarati Sign Virama ..Gujarati Sign Virama
+ (0x0b3c, 0x0b3c,), # Oriya Sign Nukta ..Oriya Sign Nukta
+ (0x0b4d, 0x0b4d,), # Oriya Sign Virama ..Oriya Sign Virama
+ (0x0bcd, 0x0bcd,), # Tamil Sign Virama ..Tamil Sign Virama
+ (0x0c4d, 0x0c4d,), # Telugu Sign Virama ..Telugu Sign Virama
+ (0x0c55, 0x0c56,), # Telugu Length Mark ..Telugu Ai Length Mark
+ (0x0cbc, 0x0cbc,), # Kannada Sign Nukta ..Kannada Sign Nukta
+ (0x0ccd, 0x0ccd,), # Kannada Sign Virama ..Kannada Sign Virama
+ (0x0d4d, 0x0d4d,), # Malayalam Sign Virama ..Malayalam Sign Virama
+ (0x0dca, 0x0dca,), # Sinhala Sign Al-lakuna ..Sinhala Sign Al-lakuna
+ (0x0e38, 0x0e3a,), # Thai Character Sara U ..Thai Character Phinthu
+ (0x0e48, 0x0e4b,), # Thai Character Mai Ek ..Thai Character Mai Chatt
+ (0x0eb8, 0x0eb9,), # Lao Vowel Sign U ..Lao Vowel Sign Uu
+ (0x0ec8, 0x0ecb,), # Lao Tone Mai Ek ..Lao Tone Mai Catawa
+ (0x0f18, 0x0f19,), # Tibetan Astrological Sig..Tibetan Astrological Sig
+ (0x0f35, 0x0f35,), # Tibetan Mark Ngas Bzung ..Tibetan Mark Ngas Bzung
+ (0x0f37, 0x0f37,), # Tibetan Mark Ngas Bzung ..Tibetan Mark Ngas Bzung
+ (0x0f39, 0x0f39,), # Tibetan Mark Tsa -phru ..Tibetan Mark Tsa -phru
+ (0x0f71, 0x0f72,), # Tibetan Vowel Sign Aa ..Tibetan Vowel Sign I
+ (0x0f74, 0x0f74,), # Tibetan Vowel Sign U ..Tibetan Vowel Sign U
+ (0x0f7a, 0x0f7d,), # Tibetan Vowel Sign E ..Tibetan Vowel Sign Oo
+ (0x0f80, 0x0f80,), # Tibetan Vowel Sign Rever..Tibetan Vowel Sign Rever
+ (0x0f82, 0x0f84,), # Tibetan Sign Nyi Zla Naa..Tibetan Mark Halanta
+ (0x0f86, 0x0f87,), # Tibetan Sign Lci Rtags ..Tibetan Sign Yang Rtags
+ (0x0fc6, 0x0fc6,), # Tibetan Symbol Padma Gda..Tibetan Symbol Padma Gda
+ (0x1037, 0x1037,), # Myanmar Sign Dot Below ..Myanmar Sign Dot Below
+ (0x1039, 0x103a,), # Myanmar Sign Virama ..Myanmar Sign Asat
+ (0x108d, 0x108d,), # Myanmar Sign Shan Counci..Myanmar Sign Shan Counci
+ (0x135d, 0x135f,), # Ethiopic Combining Gemin..Ethiopic Combining Gemin
+ (0x1714, 0x1714,), # Tagalog Sign Virama ..Tagalog Sign Virama
+ (0x1734, 0x1734,), # Hanunoo Sign Pamudpod ..Hanunoo Sign Pamudpod
+ (0x17d2, 0x17d2,), # Khmer Sign Coeng ..Khmer Sign Coeng
+ (0x17dd, 0x17dd,), # Khmer Sign Atthacan ..Khmer Sign Atthacan
+ (0x18a9, 0x18a9,), # Mongolian Letter Ali Gal..Mongolian Letter Ali Gal
+ (0x1939, 0x193b,), # Limbu Sign Mukphreng ..Limbu Sign Sa-i
+ (0x1a17, 0x1a18,), # Buginese Vowel Sign I ..Buginese Vowel Sign U
+ (0x1a60, 0x1a60,), # Tai Tham Sign Sakot ..Tai Tham Sign Sakot
+ (0x1a75, 0x1a7c,), # Tai Tham Sign Tone-1 ..Tai Tham Sign Khuen-lue
+ (0x1a7f, 0x1a7f,), # Tai Tham Combining Crypt..Tai Tham Combining Crypt
+ (0x1b34, 0x1b34,), # Balinese Sign Rerekan ..Balinese Sign Rerekan
+ (0x1b44, 0x1b44,), # Balinese Adeg Adeg ..Balinese Adeg Adeg
+ (0x1b6b, 0x1b73,), # Balinese Musical Symbol ..Balinese Musical Symbol
+ (0x1baa, 0x1bab,), # Sundanese Sign Pamaaeh ..Sundanese Sign Virama
+ (0x1be6, 0x1be6,), # Batak Sign Tompi ..Batak Sign Tompi
+ (0x1bf2, 0x1bf3,), # Batak Pangolat ..Batak Panongonan
+ (0x1c37, 0x1c37,), # Lepcha Sign Nukta ..Lepcha Sign Nukta
+ (0x1cd0, 0x1cd2,), # Vedic Tone Karshana ..Vedic Tone Prenkha
+ (0x1cd4, 0x1ce0,), # Vedic Sign Yajurvedic Mi..Vedic Tone Rigvedic Kash
+ (0x1ce2, 0x1ce8,), # Vedic Sign Visarga Svari..Vedic Sign Visarga Anuda
+ (0x1ced, 0x1ced,), # Vedic Sign Tiryak ..Vedic Sign Tiryak
+ (0x1cf4, 0x1cf4,), # Vedic Tone Candra Above ..Vedic Tone Candra Above
+ (0x1dc0, 0x1de6,), # Combining Dotted Grave A..Combining Latin Small Le
+ (0x1dfc, 0x1dff,), # Combining Double Inverte..Combining Right Arrowhea
+ (0x20d0, 0x20dc,), # Combining Left Harpoon A..Combining Four Dots Abov
+ (0x20e1, 0x20e1,), # Combining Left Right Arr..Combining Left Right Arr
+ (0x20e5, 0x20f0,), # Combining Reverse Solidu..Combining Asterisk Above
+ (0x2cef, 0x2cf1,), # Coptic Combining Ni Abov..Coptic Combining Spiritu
+ (0x2d7f, 0x2d7f,), # Tifinagh Consonant Joine..Tifinagh Consonant Joine
+ (0x2de0, 0x2dff,), # Combining Cyrillic Lette..Combining Cyrillic Lette
+ (0x302a, 0x302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M
+ (0x3099, 0x309a,), # Combining Katakana-hirag..Combining Katakana-hirag
+ (0xa66f, 0xa66f,), # Combining Cyrillic Vzmet..Combining Cyrillic Vzmet
+ (0xa674, 0xa67d,), # Combining Cyrillic Lette..Combining Cyrillic Payer
+ (0xa69f, 0xa69f,), # Combining Cyrillic Lette..Combining Cyrillic Lette
+ (0xa6f0, 0xa6f1,), # Bamum Combining Mark Koq..Bamum Combining Mark Tuk
+ (0xa806, 0xa806,), # Syloti Nagri Sign Hasant..Syloti Nagri Sign Hasant
+ (0xa8c4, 0xa8c4,), # Saurashtra Sign Virama ..Saurashtra Sign Virama
+ (0xa8e0, 0xa8f1,), # Combining Devanagari Dig..Combining Devanagari Sig
+ (0xa92b, 0xa92d,), # Kayah Li Tone Plophu ..Kayah Li Tone Calya Plop
+ (0xa953, 0xa953,), # Rejang Virama ..Rejang Virama
+ (0xa9b3, 0xa9b3,), # Javanese Sign Cecak Telu..Javanese Sign Cecak Telu
+ (0xa9c0, 0xa9c0,), # Javanese Pangkon ..Javanese Pangkon
+ (0xaab0, 0xaab0,), # Tai Viet Mai Kang ..Tai Viet Mai Kang
+ (0xaab2, 0xaab4,), # Tai Viet Vowel I ..Tai Viet Vowel U
+ (0xaab7, 0xaab8,), # Tai Viet Mai Khit ..Tai Viet Vowel Ia
+ (0xaabe, 0xaabf,), # Tai Viet Vowel Am ..Tai Viet Tone Mai Ek
+ (0xaac1, 0xaac1,), # Tai Viet Tone Mai Tho ..Tai Viet Tone Mai Tho
+ (0xaaf6, 0xaaf6,), # Meetei Mayek Virama ..Meetei Mayek Virama
+ (0xabed, 0xabed,), # Meetei Mayek Apun Iyek ..Meetei Mayek Apun Iyek
+ (0xfb1e, 0xfb1e,), # Hebrew Point Judeo-spani..Hebrew Point Judeo-spani
+ (0xfe20, 0xfe26,), # Combining Ligature Left ..Combining Conjoining Mac
+ (0x101fd, 0x101fd,), # Phaistos Disc Sign Combi..Phaistos Disc Sign Combi
+ (0x10a0d, 0x10a0d,), # Kharoshthi Sign Double R..Kharoshthi Sign Double R
+ (0x10a0f, 0x10a0f,), # Kharoshthi Sign Visarga ..Kharoshthi Sign Visarga
+ (0x10a38, 0x10a3a,), # Kharoshthi Sign Bar Abov..Kharoshthi Sign Dot Belo
+ (0x10a3f, 0x10a3f,), # Kharoshthi Virama ..Kharoshthi Virama
+ (0x11046, 0x11046,), # Brahmi Virama ..Brahmi Virama
+ (0x110b9, 0x110ba,), # Kaithi Sign Virama ..Kaithi Sign Nukta
+ (0x11100, 0x11102,), # Chakma Sign Candrabindu ..Chakma Sign Visarga
+ (0x11133, 0x11134,), # Chakma Virama ..Chakma Maayyaa
+ (0x111c0, 0x111c0,), # Sharada Sign Virama ..Sharada Sign Virama
+ (0x116b6, 0x116b7,), # Takri Sign Virama ..Takri Sign Nukta
+ (0x1d165, 0x1d169,), # Musical Symbol Combining..Musical Symbol Combining
+ (0x1d16d, 0x1d172,), # Musical Symbol Combining..Musical Symbol Combining
+ (0x1d17b, 0x1d182,), # Musical Symbol Combining..Musical Symbol Combining
+ (0x1d185, 0x1d18b,), # Musical Symbol Combining..Musical Symbol Combining
+ (0x1d1aa, 0x1d1ad,), # Musical Symbol Combining..Musical Symbol Combining
+ (0x1d242, 0x1d244,), # Combining Greek Musical ..Combining Greek Musical
+)
diff --git a/prompt_toolkit/libs/wcwidth/table_wide.py b/prompt_toolkit/libs/wcwidth/table_wide.py
new file mode 100644
index 00000000..4a5af71c
--- /dev/null
+++ b/prompt_toolkit/libs/wcwidth/table_wide.py
@@ -0,0 +1,40 @@
+# Generated: 2014-05-05T02:09:32.610884+00:00
+# Source: EastAsianWidth-6.3.0.txt
+# Date: 2013-02-05, 20:09:00 GMT [KW, LI]
+WIDE_EASTASIAN = (
+ (0x1100, 0x115f,), # Hangul Choseong Kiyeok ..Hangul Choseong Filler
+ (0x2329, 0x232a,), # Left-pointing Angle Brac..Right-pointing Angle Bra
+ (0x2e80, 0x2e99,), # Cjk Radical Repeat ..Cjk Radical Rap
+ (0x2e9b, 0x2ef3,), # Cjk Radical Choke ..Cjk Radical C-simplified
+ (0x2f00, 0x2fd5,), # Kangxi Radical One ..Kangxi Radical Flute
+ (0x2ff0, 0x2ffb,), # Ideographic Description ..Ideographic Description
+ (0x3000, 0x303e,), # Ideographic Space ..Ideographic Variation In
+ (0x3041, 0x3096,), # Hiragana Letter Small A ..Hiragana Letter Small Ke
+ (0x3099, 0x30ff,), # Combining Katakana-hirag..Katakana Digraph Koto
+ (0x3105, 0x312d,), # Bopomofo Letter B ..Bopomofo Letter Ih
+ (0x3131, 0x318e,), # Hangul Letter Kiyeok ..Hangul Letter Araeae
+ (0x3190, 0x31ba,), # Ideographic Annotation L..Bopomofo Letter Zy
+ (0x31c0, 0x31e3,), # Cjk Stroke T ..Cjk Stroke Q
+ (0x31f0, 0x321e,), # Katakana Letter Small Ku..Parenthesized Korean Cha
+ (0x3220, 0x3247,), # Parenthesized Ideograph ..Circled Ideograph Koto
+ (0x3250, 0x32fe,), # Partnership Sign ..Circled Katakana Wo
+ (0x3300, 0x4dbf,), # Square Apaato ..
+ (0x4e00, 0xa48c,), # Cjk Unified Ideograph-4e..Yi Syllable Yyr
+ (0xa490, 0xa4c6,), # Yi Radical Qot ..Yi Radical Ke
+ (0xa960, 0xa97c,), # Hangul Choseong Tikeut-m..Hangul Choseong Ssangyeo
+ (0xac00, 0xd7a3,), # Hangul Syllable Ga ..Hangul Syllable Hih
+ (0xf900, 0xfaff,), # Cjk Compatibility Ideogr..
+ (0xfe10, 0xfe19,), # Presentation Form For Ve..Presentation Form For Ve
+ (0xfe30, 0xfe52,), # Presentation Form For Ve..Small Full Stop
+ (0xfe54, 0xfe66,), # Small Semicolon ..Small Equals Sign
+ (0xfe68, 0xfe6b,), # Small Reverse Solidus ..Small Commercial At
+ (0xff01, 0xff60,), # Fullwidth Exclamation Ma..Fullwidth Right White Pa
+ (0xffe0, 0xffe6,), # Fullwidth Cent Sign ..Fullwidth Won Sign
+ (0x1b000, 0x1b001,), # Katakana Letter Archaic ..Hiragana Letter Archaic
+ (0x1f200, 0x1f202,), # Square Hiragana Hoka ..Squared Katakana Sa
+ (0x1f210, 0x1f23a,), # Squared Cjk Unified Ideo..Squared Cjk Unified Ideo
+ (0x1f240, 0x1f248,), # Tortoise Shell Bracketed..Tortoise Shell Bracketed
+ (0x1f250, 0x1f251,), # Circled Ideograph Advant..Circled Ideograph Accept
+ (0x20000, 0x2fffd,), # Cjk Unified Ideograph-20..
+ (0x30000, 0x3fffd,), # (nil) ..
+)
diff --git a/prompt_toolkit/libs/wcwidth/wcwidth.py b/prompt_toolkit/libs/wcwidth/wcwidth.py
new file mode 100644
index 00000000..1df34b19
--- /dev/null
+++ b/prompt_toolkit/libs/wcwidth/wcwidth.py
@@ -0,0 +1,188 @@
+"""
+This is an implementation of wcwidth() and wcswidth() (defined in
+IEEE Std 1002.1-2001) for Unicode.
+
+https://github.com/jquast/wcwidth
+"""
+# from Markus Kuhn's C code at:
+#
+# http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
+#
+# This is an implementation of wcwidth() and wcswidth() (defined in
+# IEEE Std 1002.1-2001) for Unicode.
+#
+# http://www.opengroup.org/onlinepubs/007904975/functions/wcwidth.html
+# http://www.opengroup.org/onlinepubs/007904975/functions/wcswidth.html
+#
+# In fixed-width output devices, Latin characters all occupy a single
+# "cell" position of equal width, whereas ideographic CJK characters
+# occupy two such cells. Interoperability between terminal-line
+# applications and (teletype-style) character terminals using the
+# UTF-8 encoding requires agreement on which character should advance
+# the cursor by how many cell positions. No established formal
+# standards exist at present on which Unicode character shall occupy
+# how many cell positions on character terminals. These routines are
+# a first attempt of defining such behavior based on simple rules
+# applied to data provided by the Unicode Consortium.
+#
+# For some graphical characters, the Unicode standard explicitly
+# defines a character-cell width via the definition of the East Asian
+# FullWidth (F), Wide (W), Half-width (H), and Narrow (Na) classes.
+# In all these cases, there is no ambiguity about which width a
+# terminal shall use. For characters in the East Asian Ambiguous (A)
+# class, the width choice depends purely on a preference of backward
+# compatibility with either historic CJK or Western practice.
+# Choosing single-width for these characters is easy to justify as
+# the appropriate long-term solution, as the CJK practice of
+# displaying these characters as double-width comes from historic
+# implementation simplicity (8-bit encoded characters were displayed
+# single-width and 16-bit ones double-width, even for Greek,
+# Cyrillic, etc.) and not any typographic considerations.
+#
+# Much less clear is the choice of width for the Not East Asian
+# (Neutral) class. Existing practice does not dictate a width for any
+# of these characters. It would nevertheless make sense
+# typographically to allocate two character cells to characters such
+# as for instance EM SPACE or VOLUME INTEGRAL, which cannot be
+# represented adequately with a single-width glyph. The following
+# routines at present merely assign a single-cell width to all
+# neutral characters, in the interest of simplicity. This is not
+# entirely satisfactory and should be reconsidered before
+# establishing a formal standard in this area. At the moment, the
+# decision which Not East Asian (Neutral) characters should be
+# represented by double-width glyphs cannot yet be answered by
+# applying a simple rule from the Unicode database content. Setting
+# up a proper standard for the behavior of UTF-8 character terminals
+# will require a careful analysis not only of each Unicode character,
+# but also of each presentation form, something the author of these
+# routines has avoided to do so far.
+#
+# http://www.unicode.org/unicode/reports/tr11/
+#
+# Markus Kuhn -- 2007-05-26 (Unicode 5.0)
+#
+# Permission to use, copy, modify, and distribute this software
+# for any purpose and without fee is hereby granted. The author
+# disclaims all warranties with regard to this software.
+#
+# Latest version: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
+
+from __future__ import division
+from .table_wide import WIDE_EASTASIAN
+from .table_comb import NONZERO_COMBINING
+
+
+def _bisearch(ucs, table):
+ " auxiliary function for binary search in interval table "
+ lbound = 0
+ ubound = len(table) - 1
+
+ if ucs < table[0][0] or ucs > table[ubound][1]:
+ return 0
+ while ubound >= lbound:
+ mid = (lbound + ubound) // 2
+ if ucs > table[mid][1]:
+ lbound = mid + 1
+ elif ucs < table[mid][0]:
+ ubound = mid - 1
+ else:
+ return 1
+
+ return 0
+
+
+def wcwidth(wc):
+ """wcwidth(wc) -> int
+
+ The wcwidth() function returns 0 if the wc argument has no printable effect
+ on a terminal (such as NUL '\0'), -1 if wc is not printable, or has an
+ indeterminate effect on the terminal (control or combining). Otherwise,
+ the number of column positions the character occupies on a graphic terminal
+ (1 or 2).
+
+ The following have a column width of -1:
+
+ - Non-spacing and enclosing combining characters (general
+ category code Mn or Me in the Unicode database). Generally,
+ having a non-zero value returned by ``unicodedata.combining()``.
+
+ - C0 control characters (U+001 through U+01F).
+
+ - C1 control characters and DEL (U+07F through U+0A0).
+
+ The following have a column width of 0:
+
+ - NULL (U+0000, 0).
+
+ - COMBINING GRAPHEME JOINER (U+034F).
+
+ - ZERO WIDTH SPACE (U+200B) through
+ RIGHT-TO-LEFT MARK (U+200F).
+
+ - LINE SEPERATOR (U+2028) and
+ PARAGRAPH SEPERATOR (U+2029).
+
+ - LEFT-TO-RIGHT EMBEDDING (U+202A) through
+ RIGHT-TO-LEFT OVERRIDE (U+202E).
+
+ - WORD JOINER (U+2060) through
+ INVISIBLE SEPARATOR (U+2063).
+
+ The following have a column width of 1:
+
+ - SOFT HYPHEN (U+00AD) has a column width of 1.
+
+ - All remaining characters (including all printable
+ ISO 8859-1 and WGL4 characters, Unicode control characters,
+ etc.) have a column width of 1.
+
+ The following have a column width of 2:
+
+ - Spacing characters in the East Asian Wide (W) or East Asian
+ Full-width (F) category as defined in Unicode Technical
+ Report #11 have a column width of 2.
+ """
+ ucs = ord(wc)
+
+ # NOTE: created by hand, there isn't anything identifiable other than
+ # general Cf category code to identify these, and some characters in Cf
+ # category code are of non-zero width.
+ if (0 == ucs or
+ 0x034F == ucs or
+ 0x200B <= ucs <= 0x200F or
+ 0x2028 == ucs or
+ 0x2029 == ucs or
+ 0x202A <= ucs <= 0x202E or
+ 0x2060 <= ucs <= 0x2063):
+ return 0
+
+ # C0/C1 control characters
+ if ucs < 32 or 0x07F <= ucs < 0x0A0:
+ return -1
+
+ # combining characters have indeterminate effects unless
+ # combined with additional characters.
+ if _bisearch(ucs, NONZERO_COMBINING):
+ return -1
+
+ return 1 + _bisearch(ucs, WIDE_EASTASIAN)
+
+
+def wcswidth(pwcs, n=None):
+
+ """
+ Return the width in character cells of the first ``n`` unicode string pwcs,
+ or -1 if a non-printable character is encountered. When ``n`` is None
+ (default), return the length of the entire string.
+ """
+
+ end = len(pwcs) if n is not None else n
+ idx = slice(0, end)
+ width = 0
+ for char in pwcs[idx]:
+ wcw = wcwidth(char)
+ if wcw < 0:
+ return -1
+ else:
+ width += wcw
+ return width
diff --git a/prompt_toolkit/renderer.py b/prompt_toolkit/renderer.py
index a4e7643a..cf98b345 100644
--- a/prompt_toolkit/renderer.py
+++ b/prompt_toolkit/renderer.py
@@ -8,13 +8,18 @@ import six
import errno
from .utils import get_size
-from .libs.wcwidth import wcwidth
from collections import defaultdict, namedtuple
from pygments.formatters.terminal256 import Terminal256Formatter, EscapeSequence
from pygments.style import Style
from pygments.token import Token
+try:
+ from wcwidth import wcwidth
+except ImportError:
+ from .libs.wcwidth import wcwidth
+
+
# Global variable to keep the colour table in memory.
_tf = Terminal256Formatter()
diff --git a/setup.py b/setup.py
index 70534430..8dd2764d 100644
--- a/setup.py
+++ b/setup.py
@@ -13,13 +13,11 @@ setup(
long_description='',
packages=find_packages('.'),
install_requires = [
- 'pygments', 'docopt', 'six',
-
- # TODO: add wcwidth when released and stable on pypi
- # 'wcwidth',
-
- # Required for the Python repl
+ 'docopt',
'jedi',
+ 'pygments',
+ 'six',
+ 'wcwidth',
],
scripts = [
'bin/ptpython',