summaryrefslogtreecommitdiffstats
path: root/utf8.c
diff options
context:
space:
mode:
authornicm <nicm>2023-09-01 14:29:11 +0000
committernicm <nicm>2023-09-01 14:29:11 +0000
commit9456258ccc03a1a959cfb7d020011d751b39bb1b (patch)
tree6f2331046cfde77a7125d3cc54082161f2fc9303 /utf8.c
parentc41d59f232a7243bfff34ec3f02adc76fddc91b2 (diff)
Rewrite combined character handling to be more consistent and to support
newer Unicode combined characters (which we have to "know" are combined since they are not width zero). GitHub issue 3600.
Diffstat (limited to 'utf8.c')
-rw-r--r--utf8.c8
1 files changed, 4 insertions, 4 deletions
diff --git a/utf8.c b/utf8.c
index 10ccf422..d26a49e4 100644
--- a/utf8.c
+++ b/utf8.c
@@ -136,8 +136,8 @@ utf8_from_data(const struct utf8_data *ud, utf8_char *uc)
goto fail;
if (ud->size <= 3) {
index = (((utf8_char)ud->data[2] << 16)|
- ((utf8_char)ud->data[1] << 8)|
- ((utf8_char)ud->data[0]));
+ ((utf8_char)ud->data[1] << 8)|
+ ((utf8_char)ud->data[0]));
} else if (utf8_put_item(ud->data, ud->size, &index) != 0)
goto fail;
*uc = UTF8_SET_SIZE(ud->size)|UTF8_SET_WIDTH(ud->width)|index;
@@ -226,9 +226,9 @@ utf8_width(struct utf8_data *ud, int *width)
case 0:
return (UTF8_ERROR);
}
- log_debug("UTF-8 %.*s is %08X", (int)ud->size, ud->data, (u_int)wc);
+ log_debug("UTF-8 %.*s is %05X", (int)ud->size, ud->data, (u_int)wc);
*width = wcwidth(wc);
- log_debug("wcwidth(%08X) returned %d", (u_int)wc, *width);
+ log_debug("wcwidth(%05X) returned %d", (u_int)wc, *width);
if (*width < 0) {
/*
* C1 control characters are nonprintable, so they are always