summaryrefslogtreecommitdiffstats
path: root/utf8.c
diff options
context:
space:
mode:
authornicm <nicm>2015-11-12 11:05:34 +0000
committernicm <nicm>2015-11-12 11:05:34 +0000
commit69e0b8326ad0a983759518b90ed8632146341acf (patch)
tree03f69cf9a96b5e87b760243cc535878940bc7a02 /utf8.c
parent7062b0e65dcbb94bb190f6c50f4089b2ea6278bb (diff)
Support UTF-8 key bindings by expanding the key type from int to
uint64_t and converting UTF-8 to Unicode on input and the reverse on output. (This allows key bindings, there are still omissions - the largest being that the various prompts do not accept UTF-8.)
Diffstat (limited to 'utf8.c')
-rw-r--r--utf8.c34
1 files changed, 32 insertions, 2 deletions
diff --git a/utf8.c b/utf8.c
index 9a06c186..e61bf996 100644
--- a/utf8.c
+++ b/utf8.c
@@ -394,6 +394,8 @@ utf8_open(struct utf8_data *utf8data, u_char ch)
int
utf8_append(struct utf8_data *utf8data, u_char ch)
{
+ /* XXX this should do validity checks too! */
+
if (utf8data->have >= utf8data->size)
fatalx("UTF-8 character overflow");
if (utf8data->size > sizeof utf8data->data)
@@ -467,18 +469,46 @@ utf8_combine(const struct utf8_data *utf8data)
case 3:
value = utf8data->data[2] & 0x3f;
value |= (utf8data->data[1] & 0x3f) << 6;
- value |= (utf8data->data[0] & 0x0f) << 12;
+ value |= (utf8data->data[0] & 0xf) << 12;
break;
case 4:
value = utf8data->data[3] & 0x3f;
value |= (utf8data->data[2] & 0x3f) << 6;
value |= (utf8data->data[1] & 0x3f) << 12;
- value |= (utf8data->data[0] & 0x07) << 18;
+ value |= (utf8data->data[0] & 0x7) << 18;
break;
}
return (value);
}
+/* Split a UTF-8 character. */
+int
+utf8_split(u_int uc, struct utf8_data *utf8data)
+{
+ if (uc < 0x7f) {
+ utf8data->size = 1;
+ utf8data->data[0] = uc;
+ } else if (uc < 0x7ff) {
+ utf8data->size = 2;
+ utf8data->data[0] = 0xc0 | ((uc >> 6) & 0x1f);
+ utf8data->data[1] = 0x80 | (uc & 0x3f);
+ } else if (uc < 0xffff) {
+ utf8data->size = 3;
+ utf8data->data[0] = 0xe0 | ((uc >> 12) & 0xf);
+ utf8data->data[1] = 0x80 | ((uc >> 6) & 0x3f);
+ utf8data->data[2] = 0x80 | (uc & 0x3f);
+ } else if (uc < 0x1fffff) {
+ utf8data->size = 4;
+ utf8data->data[0] = 0xf0 | ((uc >> 18) & 0x7);
+ utf8data->data[1] = 0x80 | ((uc >> 12) & 0x3f);
+ utf8data->data[2] = 0x80 | ((uc >> 6) & 0x3f);
+ utf8data->data[3] = 0x80 | (uc & 0x3f);
+ } else
+ return (-1);
+ utf8data->width = utf8_width(utf8data);
+ return (0);
+}
+
/* Split a two-byte UTF-8 character. */
u_int
utf8_split2(u_int uc, u_char *ptr)