diff options
author | Thomas Adam <thomas@xteddy.org> | 2023-09-17 21:00:34 +0100 |
---|---|---|
committer | Thomas Adam <thomas@xteddy.org> | 2023-09-17 21:03:06 +0100 |
commit | b202a2f1b517a3de7141fc35fbd9e39ed5ac5284 (patch) | |
tree | d35a4fd44a7f21d91d8dcac3badf6aae61cb04b4 /utf8.c | |
parent | 9f9156c0303ad9c50fd44e0561ef0f5bb21a418b (diff) | |
parent | 7e79108f8a0d109b058f07cd84f17957f730432a (diff) |
Merge branch 'obsd-master'
Diffstat (limited to 'utf8.c')
-rw-r--r-- | utf8.c | 225 |
1 files changed, 208 insertions, 17 deletions
@@ -26,6 +26,171 @@ #include "tmux.h" +static const wchar_t utf8_force_wide[] = { + 0x0261D, + 0x026F9, + 0x0270A, + 0x0270B, + 0x0270C, + 0x0270D, + 0x1F1E6, + 0x1F1E7, + 0x1F1E8, + 0x1F1E9, + 0x1F1EA, + 0x1F1EB, + 0x1F1EC, + 0x1F1ED, + 0x1F1EE, + 0x1F1EF, + 0x1F1F0, + 0x1F1F1, + 0x1F1F2, + 0x1F1F3, + 0x1F1F4, + 0x1F1F5, + 0x1F1F6, + 0x1F1F7, + 0x1F1F8, + 0x1F1F9, + 0x1F1FA, + 0x1F1FB, + 0x1F1FC, + 0x1F1FD, + 0x1F1FE, + 0x1F1FF, + 0x1F385, + 0x1F3C2, + 0x1F3C3, + 0x1F3C4, + 0x1F3C7, + 0x1F3CA, + 0x1F3CB, + 0x1F3CC, + 0x1F3FB, + 0x1F3FC, + 0x1F3FD, + 0x1F3FE, + 0x1F3FF, + 0x1F442, + 0x1F443, + 0x1F446, + 0x1F447, + 0x1F448, + 0x1F449, + 0x1F44A, + 0x1F44B, + 0x1F44C, + 0x1F44D, + 0x1F44E, + 0x1F44F, + 0x1F450, + 0x1F466, + 0x1F467, + 0x1F468, + 0x1F469, + 0x1F46B, + 0x1F46C, + 0x1F46D, + 0x1F46E, + 0x1F470, + 0x1F471, + 0x1F472, + 0x1F473, + 0x1F474, + 0x1F475, + 0x1F476, + 0x1F477, + 0x1F478, + 0x1F47C, + 0x1F481, + 0x1F482, + 0x1F483, + 0x1F485, + 0x1F486, + 0x1F487, + 0x1F48F, + 0x1F491, + 0x1F4AA, + 0x1F574, + 0x1F575, + 0x1F57A, + 0x1F590, + 0x1F595, + 0x1F596, + 0x1F645, + 0x1F646, + 0x1F647, + 0x1F64B, + 0x1F64C, + 0x1F64D, + 0x1F64E, + 0x1F64F, + 0x1F6A3, + 0x1F6B4, + 0x1F6B5, + 0x1F6B6, + 0x1F6C0, + 0x1F6CC, + 0x1F90C, + 0x1F90F, + 0x1F918, + 0x1F919, + 0x1F91A, + 0x1F91B, + 0x1F91C, + 0x1F91D, + 0x1F91E, + 0x1F91F, + 0x1F926, + 0x1F930, + 0x1F931, + 0x1F932, + 0x1F933, + 0x1F934, + 0x1F935, + 0x1F936, + 0x1F937, + 0x1F938, + 0x1F939, + 0x1F93D, + 0x1F93E, + 0x1F977, + 0x1F9B5, + 0x1F9B6, + 0x1F9B8, + 0x1F9B9, + 0x1F9BB, + 0x1F9CD, + 0x1F9CE, + 0x1F9CF, + 0x1F9D1, + 0x1F9D2, + 0x1F9D3, + 0x1F9D4, + 0x1F9D5, + 0x1F9D6, + 0x1F9D7, + 0x1F9D8, + 0x1F9D9, + 0x1F9DA, + 0x1F9DB, + 0x1F9DC, + 0x1F9DD, + 0x1FAC3, + 0x1FAC4, + 0x1FAC5, + 0x1FAF0, + 0x1FAF1, + 0x1FAF2, + 0x1FAF3, + 0x1FAF4, + 0x1FAF5, + 0x1FAF6, + 0x1FAF7, + 0x1FAF8 +}; + struct utf8_item { RB_ENTRY(utf8_item) index_entry; u_int index; @@ -122,6 +287,28 @@ utf8_put_item(const u_char *data, size_t size, u_int *index) return (0); } +static int +utf8_table_cmp(const void *vp1, const void *vp2) +{ + const wchar_t *wc1 = vp1, *wc2 = vp2; + + if (*wc1 < *wc2) + return (-1); + if (*wc1 > *wc2) + return (1); + return (0); +} + +/* Check if character in table. */ +int +utf8_in_table(wchar_t find, const wchar_t *table, u_int count) +{ + wchar_t *found; + + found = bsearch(&find, table, count, sizeof *table, utf8_table_cmp); + return (found != NULL); +} + /* Get UTF-8 character from data. */ enum utf8_state utf8_from_data(const struct utf8_data *ud, utf8_char *uc) @@ -216,24 +403,12 @@ utf8_width(struct utf8_data *ud, int *width) { wchar_t wc; -#ifdef HAVE_UTF8PROC - switch (utf8proc_mbtowc(&wc, ud->data, ud->size)) { -#else - switch (mbtowc(&wc, ud->data, ud->size)) { -#endif - case -1: - log_debug("UTF-8 %.*s, mbtowc() %d", (int)ud->size, ud->data, - errno); - mbtowc(NULL, NULL, MB_CUR_MAX); - return (UTF8_ERROR); - case 0: + if (utf8_towc(ud, &wc) != UTF8_DONE) return (UTF8_ERROR); + if (utf8_in_table(wc, utf8_force_wide, nitems(utf8_force_wide))) { + *width = 2; + return (UTF8_DONE); } - log_debug("UTF-8 %.*s is %05X", (int)ud->size, ud->data, (u_int)wc); -#ifdef HAVE_UTF8PROC - *width = utf8proc_wcwidth(wc); - log_debug("utf8proc_wcwidth(%05X) returned %d", (u_int)wc, *width); -#else *width = wcwidth(wc); log_debug("wcwidth(%05X) returned %d", (u_int)wc, *width); if (*width < 0) { @@ -243,12 +418,28 @@ utf8_width(struct utf8_data *ud, int *width) */ *width = (wc >= 0x80 && wc <= 0x9f) ? 0 : 1; } -#endif if (*width >= 0 && *width <= 0xff) return (UTF8_DONE); return (UTF8_ERROR); } +/* Convert UTF-8 character to wide character. */ +enum utf8_state +utf8_towc(const struct utf8_data *ud, wchar_t *wc) +{ + switch (mbtowc(wc, ud->data, ud->size)) { + case -1: + log_debug("UTF-8 %.*s, mbtowc() %d", (int)ud->size, ud->data, + errno); + mbtowc(NULL, NULL, MB_CUR_MAX); + return (UTF8_ERROR); + case 0: + return (UTF8_ERROR); + } + log_debug("UTF-8 %.*s is %05X", (int)ud->size, ud->data, (u_int)*wc); + return (UTF8_DONE); +} + /* * Open UTF-8 sequence. * |