summaryrefslogtreecommitdiffstats
path: root/utf8.c
diff options
context:
space:
mode:
authorThomas Adam <thomas@xteddy.org>2023-09-17 21:00:34 +0100
committerThomas Adam <thomas@xteddy.org>2023-09-17 21:03:06 +0100
commitb202a2f1b517a3de7141fc35fbd9e39ed5ac5284 (patch)
treed35a4fd44a7f21d91d8dcac3badf6aae61cb04b4 /utf8.c
parent9f9156c0303ad9c50fd44e0561ef0f5bb21a418b (diff)
parent7e79108f8a0d109b058f07cd84f17957f730432a (diff)
Merge branch 'obsd-master'
Diffstat (limited to 'utf8.c')
-rw-r--r--utf8.c225
1 files changed, 208 insertions, 17 deletions
diff --git a/utf8.c b/utf8.c
index 282eb7bd..ece1bb7e 100644
--- a/utf8.c
+++ b/utf8.c
@@ -26,6 +26,171 @@
#include "tmux.h"
+static const wchar_t utf8_force_wide[] = {
+ 0x0261D,
+ 0x026F9,
+ 0x0270A,
+ 0x0270B,
+ 0x0270C,
+ 0x0270D,
+ 0x1F1E6,
+ 0x1F1E7,
+ 0x1F1E8,
+ 0x1F1E9,
+ 0x1F1EA,
+ 0x1F1EB,
+ 0x1F1EC,
+ 0x1F1ED,
+ 0x1F1EE,
+ 0x1F1EF,
+ 0x1F1F0,
+ 0x1F1F1,
+ 0x1F1F2,
+ 0x1F1F3,
+ 0x1F1F4,
+ 0x1F1F5,
+ 0x1F1F6,
+ 0x1F1F7,
+ 0x1F1F8,
+ 0x1F1F9,
+ 0x1F1FA,
+ 0x1F1FB,
+ 0x1F1FC,
+ 0x1F1FD,
+ 0x1F1FE,
+ 0x1F1FF,
+ 0x1F385,
+ 0x1F3C2,
+ 0x1F3C3,
+ 0x1F3C4,
+ 0x1F3C7,
+ 0x1F3CA,
+ 0x1F3CB,
+ 0x1F3CC,
+ 0x1F3FB,
+ 0x1F3FC,
+ 0x1F3FD,
+ 0x1F3FE,
+ 0x1F3FF,
+ 0x1F442,
+ 0x1F443,
+ 0x1F446,
+ 0x1F447,
+ 0x1F448,
+ 0x1F449,
+ 0x1F44A,
+ 0x1F44B,
+ 0x1F44C,
+ 0x1F44D,
+ 0x1F44E,
+ 0x1F44F,
+ 0x1F450,
+ 0x1F466,
+ 0x1F467,
+ 0x1F468,
+ 0x1F469,
+ 0x1F46B,
+ 0x1F46C,
+ 0x1F46D,
+ 0x1F46E,
+ 0x1F470,
+ 0x1F471,
+ 0x1F472,
+ 0x1F473,
+ 0x1F474,
+ 0x1F475,
+ 0x1F476,
+ 0x1F477,
+ 0x1F478,
+ 0x1F47C,
+ 0x1F481,
+ 0x1F482,
+ 0x1F483,
+ 0x1F485,
+ 0x1F486,
+ 0x1F487,
+ 0x1F48F,
+ 0x1F491,
+ 0x1F4AA,
+ 0x1F574,
+ 0x1F575,
+ 0x1F57A,
+ 0x1F590,
+ 0x1F595,
+ 0x1F596,
+ 0x1F645,
+ 0x1F646,
+ 0x1F647,
+ 0x1F64B,
+ 0x1F64C,
+ 0x1F64D,
+ 0x1F64E,
+ 0x1F64F,
+ 0x1F6A3,
+ 0x1F6B4,
+ 0x1F6B5,
+ 0x1F6B6,
+ 0x1F6C0,
+ 0x1F6CC,
+ 0x1F90C,
+ 0x1F90F,
+ 0x1F918,
+ 0x1F919,
+ 0x1F91A,
+ 0x1F91B,
+ 0x1F91C,
+ 0x1F91D,
+ 0x1F91E,
+ 0x1F91F,
+ 0x1F926,
+ 0x1F930,
+ 0x1F931,
+ 0x1F932,
+ 0x1F933,
+ 0x1F934,
+ 0x1F935,
+ 0x1F936,
+ 0x1F937,
+ 0x1F938,
+ 0x1F939,
+ 0x1F93D,
+ 0x1F93E,
+ 0x1F977,
+ 0x1F9B5,
+ 0x1F9B6,
+ 0x1F9B8,
+ 0x1F9B9,
+ 0x1F9BB,
+ 0x1F9CD,
+ 0x1F9CE,
+ 0x1F9CF,
+ 0x1F9D1,
+ 0x1F9D2,
+ 0x1F9D3,
+ 0x1F9D4,
+ 0x1F9D5,
+ 0x1F9D6,
+ 0x1F9D7,
+ 0x1F9D8,
+ 0x1F9D9,
+ 0x1F9DA,
+ 0x1F9DB,
+ 0x1F9DC,
+ 0x1F9DD,
+ 0x1FAC3,
+ 0x1FAC4,
+ 0x1FAC5,
+ 0x1FAF0,
+ 0x1FAF1,
+ 0x1FAF2,
+ 0x1FAF3,
+ 0x1FAF4,
+ 0x1FAF5,
+ 0x1FAF6,
+ 0x1FAF7,
+ 0x1FAF8
+};
+
struct utf8_item {
RB_ENTRY(utf8_item) index_entry;
u_int index;
@@ -122,6 +287,28 @@ utf8_put_item(const u_char *data, size_t size, u_int *index)
return (0);
}
+static int
+utf8_table_cmp(const void *vp1, const void *vp2)
+{
+ const wchar_t *wc1 = vp1, *wc2 = vp2;
+
+ if (*wc1 < *wc2)
+ return (-1);
+ if (*wc1 > *wc2)
+ return (1);
+ return (0);
+}
+
+/* Check if character in table. */
+int
+utf8_in_table(wchar_t find, const wchar_t *table, u_int count)
+{
+ wchar_t *found;
+
+ found = bsearch(&find, table, count, sizeof *table, utf8_table_cmp);
+ return (found != NULL);
+}
+
/* Get UTF-8 character from data. */
enum utf8_state
utf8_from_data(const struct utf8_data *ud, utf8_char *uc)
@@ -216,24 +403,12 @@ utf8_width(struct utf8_data *ud, int *width)
{
wchar_t wc;
-#ifdef HAVE_UTF8PROC
- switch (utf8proc_mbtowc(&wc, ud->data, ud->size)) {
-#else
- switch (mbtowc(&wc, ud->data, ud->size)) {
-#endif
- case -1:
- log_debug("UTF-8 %.*s, mbtowc() %d", (int)ud->size, ud->data,
- errno);
- mbtowc(NULL, NULL, MB_CUR_MAX);
- return (UTF8_ERROR);
- case 0:
+ if (utf8_towc(ud, &wc) != UTF8_DONE)
return (UTF8_ERROR);
+ if (utf8_in_table(wc, utf8_force_wide, nitems(utf8_force_wide))) {
+ *width = 2;
+ return (UTF8_DONE);
}
- log_debug("UTF-8 %.*s is %05X", (int)ud->size, ud->data, (u_int)wc);
-#ifdef HAVE_UTF8PROC
- *width = utf8proc_wcwidth(wc);
- log_debug("utf8proc_wcwidth(%05X) returned %d", (u_int)wc, *width);
-#else
*width = wcwidth(wc);
log_debug("wcwidth(%05X) returned %d", (u_int)wc, *width);
if (*width < 0) {
@@ -243,12 +418,28 @@ utf8_width(struct utf8_data *ud, int *width)
*/
*width = (wc >= 0x80 && wc <= 0x9f) ? 0 : 1;
}
-#endif
if (*width >= 0 && *width <= 0xff)
return (UTF8_DONE);
return (UTF8_ERROR);
}
+/* Convert UTF-8 character to wide character. */
+enum utf8_state
+utf8_towc(const struct utf8_data *ud, wchar_t *wc)
+{
+ switch (mbtowc(wc, ud->data, ud->size)) {
+ case -1:
+ log_debug("UTF-8 %.*s, mbtowc() %d", (int)ud->size, ud->data,
+ errno);
+ mbtowc(NULL, NULL, MB_CUR_MAX);
+ return (UTF8_ERROR);
+ case 0:
+ return (UTF8_ERROR);
+ }
+ log_debug("UTF-8 %.*s is %05X", (int)ud->size, ud->data, (u_int)*wc);
+ return (UTF8_DONE);
+}
+
/*
* Open UTF-8 sequence.
*