diff options
author | nicm <nicm> | 2020-05-25 18:57:24 +0000 |
---|---|---|
committer | nicm <nicm> | 2020-05-25 18:57:24 +0000 |
commit | 6f03e49e68dfe0d9c0c7d49079c4383b26aca916 (patch) | |
tree | 86a94f09a878fe2d32cd3ef29a69db242208897f /utf8.c | |
parent | 35779d655d7eec4b904eeb3a670bbef02aba016d (diff) |
Use the internal representation for UTF-8 keys instead of wchar_t and
drop some code only needed for that.
Diffstat (limited to 'utf8.c')
-rw-r--r-- | utf8.c | 67 |
1 files changed, 19 insertions, 48 deletions
@@ -230,17 +230,27 @@ utf8_copy(struct utf8_data *to, const struct utf8_data *from) } /* Get width of Unicode character. */ -static int -utf8_width(wchar_t wc) +static enum utf8_state +utf8_width(struct utf8_data *ud, int *width) { - int width; + wchar_t wc; - width = wcwidth(wc); - if (width < 0 || width > 0xff) { - log_debug("Unicode %04lx, wcwidth() %d", (long)wc, width); - return (-1); + switch (mbtowc(&wc, ud->data, ud->size)) { + case -1: + log_debug("UTF-8 %.*s, mbtowc() %d", (int)ud->size, ud->data, + errno); + mbtowc(NULL, NULL, MB_CUR_MAX); + return (UTF8_ERROR); + case 0: + return (UTF8_ERROR); } - return (width); + *width = wcwidth(wc); + if (*width < 0 || *width > 0xff) { + log_debug("UTF-8 %.*s, wcwidth() %d", (int)ud->size, ud->data, + *width); + return (UTF8_ERROR); + } + return (UTF8_DONE); } /* @@ -270,7 +280,6 @@ utf8_open(struct utf8_data *ud, u_char ch) enum utf8_state utf8_append(struct utf8_data *ud, u_char ch) { - wchar_t wc; int width; if (ud->have >= ud->size) @@ -287,51 +296,13 @@ utf8_append(struct utf8_data *ud, u_char ch) if (ud->width == 0xff) return (UTF8_ERROR); - - if (utf8_combine(ud, &wc) != UTF8_DONE) - return (UTF8_ERROR); - if ((width = utf8_width(wc)) < 0) + if (utf8_width(ud, &width) != UTF8_DONE) return (UTF8_ERROR); ud->width = width; return (UTF8_DONE); } -/* Combine UTF-8 into Unicode. */ -enum utf8_state -utf8_combine(const struct utf8_data *ud, wchar_t *wc) -{ - switch (mbtowc(wc, ud->data, ud->size)) { - case -1: - log_debug("UTF-8 %.*s, mbtowc() %d", (int)ud->size, ud->data, - errno); - mbtowc(NULL, NULL, MB_CUR_MAX); - return (UTF8_ERROR); - case 0: - return (UTF8_ERROR); - default: - return (UTF8_DONE); - } -} - -/* Split Unicode into UTF-8. */ -enum utf8_state -utf8_split(wchar_t wc, struct utf8_data *ud) -{ - char s[MB_LEN_MAX]; - int slen; - - slen = wctomb(s, wc); - if (slen <= 0 || slen > (int)sizeof ud->data) - return (UTF8_ERROR); - - memcpy(ud->data, s, slen); - ud->size = slen; - - ud->width = utf8_width(wc); - return (UTF8_DONE); -} - /* * Encode len characters from src into dst, which is guaranteed to have four * bytes available for each character from src (for \abc or UTF-8) plus space |