summaryrefslogtreecommitdiffstats
path: root/utf8.c
diff options
context:
space:
mode:
authornicm <nicm>2020-06-09 08:34:33 +0000
committernicm <nicm>2020-06-09 08:34:33 +0000
commitc60389acbfbacade1e2822ef9099dabaca0ad08e (patch)
tree3b5bb8b2ae7808f9e165ffbaef147dc62bce8514 /utf8.c
parenta4a3d8959890395b040ff49a12f72abc90a5da74 (diff)
It is not sensible to store pointers into an array we are going to
realloc (duh), use two trees instead.
Diffstat (limited to 'utf8.c')
-rw-r--r--utf8.c99
1 files changed, 53 insertions, 46 deletions
diff --git a/utf8.c b/utf8.c
index d88415f7..00fd5f09 100644
--- a/utf8.c
+++ b/utf8.c
@@ -28,16 +28,16 @@
#include "tmux.h"
struct utf8_item {
- u_int offset;
- RB_ENTRY(utf8_item) entry;
+ RB_ENTRY(utf8_item) index_entry;
+ u_int index;
+ RB_ENTRY(utf8_item) data_entry;
char data[UTF8_SIZE];
u_char size;
};
-RB_HEAD(utf8_tree, utf8_item);
static int
-utf8_cmp(struct utf8_item *ui1, struct utf8_item *ui2)
+utf8_data_cmp(struct utf8_item *ui1, struct utf8_item *ui2)
{
if (ui1->size < ui2->size)
return (-1);
@@ -45,12 +45,24 @@ utf8_cmp(struct utf8_item *ui1, struct utf8_item *ui2)
return (1);
return (memcmp(ui1->data, ui2->data, ui1->size));
}
-RB_GENERATE_STATIC(utf8_tree, utf8_item, entry, utf8_cmp);
-static struct utf8_tree utf8_tree = RB_INITIALIZER(utf8_tree);
+RB_HEAD(utf8_data_tree, utf8_item);
+RB_GENERATE_STATIC(utf8_data_tree, utf8_item, data_entry, utf8_data_cmp);
+static struct utf8_data_tree utf8_data_tree = RB_INITIALIZER(utf8_data_tree);
-static struct utf8_item *utf8_list;
-static u_int utf8_list_size;
-static u_int utf8_list_used;
+static int
+utf8_index_cmp(struct utf8_item *ui1, struct utf8_item *ui2)
+{
+ if (ui1->index < ui2->index)
+ return (-1);
+ if (ui1->index > ui2->index)
+ return (1);
+ return (0);
+}
+RB_HEAD(utf8_index_tree, utf8_item);
+RB_GENERATE_STATIC(utf8_index_tree, utf8_item, index_entry, utf8_index_cmp);
+static struct utf8_index_tree utf8_index_tree = RB_INITIALIZER(utf8_index_tree);
+
+static u_int utf8_next_index;
#define UTF8_GET_SIZE(uc) (((uc) >> 24) & 0x1f)
#define UTF8_GET_WIDTH(flags) (((uc) >> 29) - 1)
@@ -58,59 +70,56 @@ static u_int utf8_list_used;
#define UTF8_SET_SIZE(size) (((utf8_char)(size)) << 24)
#define UTF8_SET_WIDTH(width) ((((utf8_char)(width)) + 1) << 29)
-/* Get a UTF-8 item by offset. */
+/* Get a UTF-8 item from data. */
static struct utf8_item *
-utf8_get_item(const char *data, size_t size)
+utf8_item_by_data(const char *data, size_t size)
{
struct utf8_item ui;
memcpy(ui.data, data, size);
ui.size = size;
- return (RB_FIND(utf8_tree, &utf8_tree, &ui));
+ return (RB_FIND(utf8_data_tree, &utf8_data_tree, &ui));
}
-/* Expand UTF-8 list. */
-static int
-utf8_expand_list(void)
+/* Get a UTF-8 item from data. */
+static struct utf8_item *
+utf8_item_by_index(u_int index)
{
- if (utf8_list_size == 0xffffff)
- return (-1);
- if (utf8_list_size == 0)
- utf8_list_size = 256;
- else if (utf8_list_size > 0x7fffff)
- utf8_list_size = 0xffffff;
- else
- utf8_list_size *= 2;
- utf8_list = xreallocarray(utf8_list, utf8_list_size, sizeof *utf8_list);
- return (0);
+ struct utf8_item ui;
+
+ ui.index = index;
+
+ return (RB_FIND(utf8_index_tree, &utf8_index_tree, &ui));
}
/* Add a UTF-8 item. */
static int
-utf8_put_item(const char *data, size_t size, u_int *offset)
+utf8_put_item(const char *data, size_t size, u_int *index)
{
struct utf8_item *ui;
- ui = utf8_get_item(data, size);
+ ui = utf8_item_by_data(data, size);
if (ui != NULL) {
- *offset = ui->offset;
- log_debug("%s: have %.*s at %u", __func__, (int)size, data,
- *offset);
+ log_debug("%s: found %.*s = %u", __func__, (int)size, data,
+ *index);
+ *index = ui->index;
return (0);
}
- if (utf8_list_used == utf8_list_size && utf8_expand_list() != 0)
+ if (utf8_next_index == 0xffffff + 1)
return (-1);
- *offset = utf8_list_used++;
- ui = &utf8_list[*offset];
- ui->offset = *offset;
+ ui = xcalloc(1, sizeof *ui);
+ ui->index = utf8_next_index++;
+ RB_INSERT(utf8_index_tree, &utf8_index_tree, ui);
+
memcpy(ui->data, data, size);
ui->size = size;
- RB_INSERT(utf8_tree, &utf8_tree, ui);
+ RB_INSERT(utf8_data_tree, &utf8_data_tree, ui);
- log_debug("%s: added %.*s at %u", __func__, (int)size, data, *offset);
+ log_debug("%s: added %.*s = %u", __func__, (int)size, data, *index);
+ *index = ui->index;
return (0);
}
@@ -118,7 +127,7 @@ utf8_put_item(const char *data, size_t size, u_int *offset)
enum utf8_state
utf8_from_data(const struct utf8_data *ud, utf8_char *uc)
{
- u_int offset;
+ u_int index;
if (ud->width > 2)
fatalx("invalid UTF-8 width");
@@ -126,12 +135,12 @@ utf8_from_data(const struct utf8_data *ud, utf8_char *uc)
if (ud->size > UTF8_SIZE)
goto fail;
if (ud->size <= 3) {
- offset = (((utf8_char)ud->data[2] << 16)|
+ index = (((utf8_char)ud->data[2] << 16)|
((utf8_char)ud->data[1] << 8)|
((utf8_char)ud->data[0]));
- } else if (utf8_put_item(ud->data, ud->size, &offset) != 0)
+ } else if (utf8_put_item(ud->data, ud->size, &index) != 0)
goto fail;
- *uc = UTF8_SET_SIZE(ud->size)|UTF8_SET_WIDTH(ud->width)|offset;
+ *uc = UTF8_SET_SIZE(ud->size)|UTF8_SET_WIDTH(ud->width)|index;
log_debug("%s: (%d %d %.*s) -> %08x", __func__, ud->width, ud->size,
(int)ud->size, ud->data, *uc);
return (UTF8_DONE);
@@ -151,7 +160,7 @@ void
utf8_to_data(utf8_char uc, struct utf8_data *ud)
{
struct utf8_item *ui;
- u_int offset;
+ u_int index;
memset(ud, 0, sizeof *ud);
ud->size = ud->have = UTF8_GET_SIZE(uc);
@@ -162,13 +171,11 @@ utf8_to_data(utf8_char uc, struct utf8_data *ud)
ud->data[1] = ((uc >> 8) & 0xff);
ud->data[0] = (uc & 0xff);
} else {
- offset = (uc & 0xffffff);
- if (offset >= utf8_list_used)
+ index = (uc & 0xffffff);
+ if ((ui = utf8_item_by_index(index)) == NULL)
memset(ud->data, ' ', ud->size);
- else {
- ui = &utf8_list[offset];
+ else
memcpy(ud->data, ui->data, ud->size);
- }
}
log_debug("%s: %08x -> (%d %d %.*s)", __func__, uc, ud->width, ud->size,