diff options
author | nicm <nicm> | 2023-09-01 14:29:11 +0000 |
---|---|---|
committer | nicm <nicm> | 2023-09-01 14:29:11 +0000 |
commit | 9456258ccc03a1a959cfb7d020011d751b39bb1b (patch) | |
tree | 6f2331046cfde77a7125d3cc54082161f2fc9303 /utf8.c | |
parent | c41d59f232a7243bfff34ec3f02adc76fddc91b2 (diff) |
Rewrite combined character handling to be more consistent and to support
newer Unicode combined characters (which we have to "know" are combined
since they are not width zero). GitHub issue 3600.
Diffstat (limited to 'utf8.c')
-rw-r--r-- | utf8.c | 8 |
1 files changed, 4 insertions, 4 deletions
@@ -136,8 +136,8 @@ utf8_from_data(const struct utf8_data *ud, utf8_char *uc) goto fail; if (ud->size <= 3) { index = (((utf8_char)ud->data[2] << 16)| - ((utf8_char)ud->data[1] << 8)| - ((utf8_char)ud->data[0])); + ((utf8_char)ud->data[1] << 8)| + ((utf8_char)ud->data[0])); } else if (utf8_put_item(ud->data, ud->size, &index) != 0) goto fail; *uc = UTF8_SET_SIZE(ud->size)|UTF8_SET_WIDTH(ud->width)|index; @@ -226,9 +226,9 @@ utf8_width(struct utf8_data *ud, int *width) case 0: return (UTF8_ERROR); } - log_debug("UTF-8 %.*s is %08X", (int)ud->size, ud->data, (u_int)wc); + log_debug("UTF-8 %.*s is %05X", (int)ud->size, ud->data, (u_int)wc); *width = wcwidth(wc); - log_debug("wcwidth(%08X) returned %d", (u_int)wc, *width); + log_debug("wcwidth(%05X) returned %d", (u_int)wc, *width); if (*width < 0) { /* * C1 control characters are nonprintable, so they are always |