diff options
author | pgen <p.gen.progs@gmail.com> | 2024-02-18 00:14:57 +0100 |
---|---|---|
committer | pgen <p.gen.progs@gmail.com> | 2024-02-18 23:14:02 +0100 |
commit | 5c63131b72c214ea8e5f5505f49ae7fff6fab44b (patch) | |
tree | 6b05afdd1183fea636a150518c8e0d4f9ad1cb9a | |
parent | 58d6a889133d9b6820471bebed029edce6a03212 (diff) |
Initial support for UTF-8 ext. grapheme clusters
-rw-r--r-- | README.rst | 7 | ||||
-rwxr-xr-x | smenu.1 | 6 | ||||
-rwxr-xr-x | smenu.c | 36 | ||||
-rw-r--r-- | tests/utf8/data7 | 3 | ||||
-rw-r--r-- | tests/utf8/data8 | 2 | ||||
-rw-r--r-- | tests/utf8/t0009.good | 15 | ||||
l--------- | tests/utf8/t0009.in | 1 | ||||
-rw-r--r-- | tests/utf8/t0009.tst | 4 | ||||
-rw-r--r-- | tests/utf8/t0010.good | 13 | ||||
l--------- | tests/utf8/t0010.in | 1 | ||||
-rw-r--r-- | tests/utf8/t0010.tst | 4 | ||||
-rw-r--r-- | utils.c | 35 | ||||
-rw-r--r-- | utils.h | 3 |
13 files changed, 110 insertions, 20 deletions
@@ -30,8 +30,11 @@ previous contents of the terminal are neither modified nor lost. I've tried to make it as easy to use as possible. It should work on all terminals managed in the ``terminfo`` database. -The ``UTF-8`` encoding is supported, including for large characters, -with the exception of grapheme groups for the time being. +``UTF-8`` encoding is supported, including for double-width characters. +Support for extended grapheme clusters is experimental but seems to work, +with best results when appropriate terminals are used such as wezterm +or iTerm. + The encoding of ``UTF-8`` glyphs must also be in canonical form, as no effort will be made to put them in this form. @@ -245,6 +245,12 @@ This can block sole shifting operations. With many terminal emulators, it is possible to use the mouse to interact with the screen content. +\fBWarning\fP, if groups of extended graphemes are present in the input, +mouse-based selection is only accurate if the terminal correctly displays +these graphemes. +An example of a non-functional terminal is xterm, an example of a +functional terminal is wezterm. + When the mouse is supported, the cursor can turn into an arrow (but not always) and the mouse can then be used as a point and click device as follows: @@ -4025,7 +4025,7 @@ get_message_lines(char *message, /* If needed, update the message maximum width. */ /* """""""""""""""""""""""""""""""""""""""""""" */ - n = wcswidth((w = utf8_strtowcs(str)), utf8_strlen(str)); + n = my_wcswidth((w = utf8_strtowcs(str)), utf8_strlen(str)); free(w); if (n > *message_max_width) @@ -4046,7 +4046,7 @@ get_message_lines(char *message, { ll_append(message_lines_list, xstrdup(ptr)); - n = wcswidth((w = utf8_strtowcs(ptr)), utf8_strlen(ptr)); + n = my_wcswidth((w = utf8_strtowcs(ptr)), utf8_strlen(ptr)); free(w); if (n > *message_max_width) @@ -4134,7 +4134,7 @@ build_metadata(term_t *term, long count, win_t *win) /* has already been utf8_validated/repaired. */ /* """"""""""""""""""""""""""""""""""""""""""""""""""""""""""" */ word_len = mbstowcs(NULL, word_a[i].str, 0); - word_width = wcswidth((w = utf8_strtowcs(word_a[i].str)), word_len); + word_width = my_wcswidth((w = utf8_strtowcs(word_a[i].str)), word_len); /* Manage the case where the word is larger than the terminal width. */ /* """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" */ @@ -4144,7 +4144,7 @@ build_metadata(term_t *term, long count, win_t *win) /* """"""""""""""""""""""""""""""" */ do { - word_width = wcswidth(w, word_len--); + word_width = my_wcswidth(w, word_len--); } while (word_len > 0 && word_width >= term->ncolumns - 2); } free(w); @@ -4718,9 +4718,9 @@ disp_message(ll_t *message_lines_list, /* Adjust size and len if the terminal is not large enough. */ /* """""""""""""""""""""""""""""""""""""""""""""""""""""""" */ - size = wcswidth(w, len); + size = my_wcswidth(w, len); while (len > 0 && size > term->ncolumns) - size = wcswidth(w, --len); + size = my_wcswidth(w, --len); free(w); @@ -7052,7 +7052,7 @@ gutter_action(char *ctx_name, win->gutter_a[i] = xcalloc(1, mblength + 1); memcpy(win->gutter_a[i], gutter + offset, mblength); - n = wcswidth((w = utf8_strtowcs(win->gutter_a[i])), 1); + n = my_wcswidth((w = utf8_strtowcs(win->gutter_a[i])), 1); free(w); if (n > 1) @@ -7798,7 +7798,7 @@ da_options_action(char *ctx_name, ctxopt_ctx_disp_usage(ctx_name, exit_after); } - n = wcswidth((w = utf8_strtowcs(daccess.left)), 1); + n = my_wcswidth((w = utf8_strtowcs(daccess.left)), 1); free(w); if (n > 1) @@ -7823,7 +7823,7 @@ da_options_action(char *ctx_name, ctxopt_ctx_disp_usage(ctx_name, exit_after); } - n = wcswidth((w = utf8_strtowcs(daccess.right)), 1); + n = my_wcswidth((w = utf8_strtowcs(daccess.right)), 1); free(w); if (n > 1) @@ -7976,7 +7976,7 @@ da_options_action(char *ctx_name, ctxopt_ctx_disp_usage(ctx_name, exit_after); } - n = wcswidth((w = utf8_strtowcs(daccess.num_sep)), 1); + n = my_wcswidth((w = utf8_strtowcs(daccess.num_sep)), 1); free(w); if (n > 1) @@ -11817,7 +11817,7 @@ main(int argc, char *argv[]) } s = (long)mbstowcs(NULL, word->str, 0); - s = wcswidth((tmpw = utf8_strtowcs(word->str)), s); + s = my_wcswidth((tmpw = utf8_strtowcs(word->str)), s); free(tmpw); if (s > col_max_size[col_index]) @@ -11845,7 +11845,7 @@ main(int argc, char *argv[]) /* """"""""""""""""""""""""""""" */ size = (long)mbstowcs(NULL, word->str, 0); - if ((size = wcswidth((tmpw = utf8_strtowcs(word->str)), size)) + if ((size = my_wcswidth((tmpw = utf8_strtowcs(word->str)), size)) > tab_max_size) tab_max_size = size; @@ -12135,7 +12135,7 @@ main(int argc, char *argv[]) s1 = (long)strlen(word_a[wi].str); word_width = mbstowcs(NULL, word_a[wi].str, 0); - s2 = wcswidth((w = utf8_strtowcs(word_a[wi].str)), word_width); + s2 = my_wcswidth((w = utf8_strtowcs(word_a[wi].str)), word_width); free(w); /* Use the 0x05 character as a placeholder to preserve the internal */ @@ -12179,7 +12179,7 @@ main(int argc, char *argv[]) s1 = (long)strlen(word_a[wi].str); word_width = mbstowcs(NULL, word_a[wi].str, 0); - s2 = wcswidth((w = utf8_strtowcs(word_a[wi].str)), word_width); + s2 = my_wcswidth((w = utf8_strtowcs(word_a[wi].str)), word_width); free(w); temp = xcalloc(1, tab_real_max_size + s1 - s2 + 1); memset(temp, ' ', tab_max_size + s1 - s2); @@ -14153,7 +14153,7 @@ main(int argc, char *argv[]) str = ((output_t *)(node->data))->output_str; fprintf(old_stdout, "%s", str); - width += wcswidth((w = utf8_strtowcs(str)), 65535); + width += my_wcswidth((w = utf8_strtowcs(str)), 65535); free(w); free(str); free(node->data); @@ -14161,7 +14161,7 @@ main(int argc, char *argv[]) if (win.sel_sep != NULL) { fprintf(old_stdout, "%s", win.sel_sep); - width += wcswidth((w = utf8_strtowcs(win.sel_sep)), 65535); + width += my_wcswidth((w = utf8_strtowcs(win.sel_sep)), 65535); free(w); } else @@ -14175,7 +14175,7 @@ main(int argc, char *argv[]) str = ((output_t *)(node->data))->output_str; fprintf(old_stdout, "%s", str); - width += wcswidth((w = utf8_strtowcs(str)), 65535); + width += my_wcswidth((w = utf8_strtowcs(str)), 65535); free(w); free(str); free(node->data); @@ -14216,7 +14216,7 @@ main(int argc, char *argv[]) rtrim(output_str, " \t", 0); } - width = wcswidth((w = utf8_strtowcs(output_str)), 65535); + width = my_wcswidth((w = utf8_strtowcs(output_str)), 65535); free(w); /* And print it. */ diff --git a/tests/utf8/data7 b/tests/utf8/data7 new file mode 100644 index 0000000..aa81599 --- /dev/null +++ b/tests/utf8/data7 @@ -0,0 +1,3 @@ +x \U01F1FA\U01F1F8 x +\uf09f87ab\uf09f87b7 x \uf09f87ab\uf09f87b7 +x \U01F1FA\U01F1F8 x diff --git a/tests/utf8/data8 b/tests/utf8/data8 new file mode 100644 index 0000000..7fc45b5 --- /dev/null +++ b/tests/utf8/data8 @@ -0,0 +1,2 @@ +x y \ue29da4\uefb88f\ue2808d\uf09f94a5 x y +\uf09f91a8\ue2808d\uf09f91a9\ue2808d\uf09f91a7 x y π©\U00200dπ» diff --git a/tests/utf8/t0009.good b/tests/utf8/t0009.good new file mode 100644 index 0000000..10598e9 --- /dev/null +++ b/tests/utf8/t0009.good @@ -0,0 +1,15 @@ +$ OUT=$(smenu -c t0009.in) + +x πΊπΈ x + +π«π· x π«π· + +x πΊπΈ x +6:07 7:07 +$ + +$ echo ":$OUT:" + +:x: + +$ exit 0 diff --git a/tests/utf8/t0009.in b/tests/utf8/t0009.in new file mode 120000 index 0000000..592b7e2 --- /dev/null +++ b/tests/utf8/t0009.in @@ -0,0 +1 @@ +data7
\ No newline at end of file diff --git a/tests/utf8/t0009.tst b/tests/utf8/t0009.tst new file mode 100644 index 0000000..9063d3a --- /dev/null +++ b/tests/utf8/t0009.tst @@ -0,0 +1,4 @@ +\S[300]\s[80]OUT=$(smenu -c t0009.in) +\S[300]\s[200]ljjl\r +\S[300]\s[80]echo ":$\s[80]OUT:" +exit 0 diff --git a/tests/utf8/t0010.good b/tests/utf8/t0010.good new file mode 100644 index 0000000..3f58828 --- /dev/null +++ b/tests/utf8/t0010.good @@ -0,0 +1,13 @@ +$ OUT=$(smenu -c t0010.in) + +x y β€οΈβπ₯ x y + +π¨βπ©βπ§ x y π©βπ» +11:07 12:07 13:07 +$ + +$ echo ":$OUT:" + +:π©βπ»: + +$ exit 0 diff --git a/tests/utf8/t0010.in b/tests/utf8/t0010.in new file mode 120000 index 0000000..3ce4e61 --- /dev/null +++ b/tests/utf8/t0010.in @@ -0,0 +1 @@ +data8
\ No newline at end of file diff --git a/tests/utf8/t0010.tst b/tests/utf8/t0010.tst new file mode 100644 index 0000000..b66f6b9 --- /dev/null +++ b/tests/utf8/t0010.tst @@ -0,0 +1,4 @@ +\S[300]\s[80]OUT=$(smenu -c t0010.in) +\S[300]\s[200]lljhhlll\r +\S[300]\s[80]echo ":$\s[80]OUT:" +exit 0 @@ -20,6 +20,7 @@ #include <stdarg.h> #include <wctype.h> #include "xmalloc.h" +#include "wchar.h" #include "list.h" #include "utf8.h" #include "utils.h" @@ -467,3 +468,37 @@ hexdump(const char *buf, FILE *fp, const char *prefix, size_t size) fprintf(fp, "\n"); } } + +/* ===================================================================== */ +/* Version of wcswidth which tries to support extended grapheme clusters */ +/* by taking into zero width characters. */ +/* ===================================================================== */ +int +my_wcswidth(const wchar_t *s, size_t n) +{ + int len = 0; + int l = 0; + int m = 0; + + if (s == NULL || *s == L'\0') + return 0; + + while (*s && m < n) + { + if ((l = wcwidth(*s)) >= 0) + { + /* Do not count zero-width-length glyphs. */ + /* """""""""""""""""""""""""""""""""""""" */ + if (*s != L'\x200d' && *(s + 1) != L'\x200d' && *(s + 1) != L'\xfe0f' + && *(s + 1) != L'\x20e3') + len += l; + } + else + return -1; /* wcwidth returned -1. */ + + s++; + m++; + } + + return len; +} @@ -83,4 +83,7 @@ strprint(char const *s); void hexdump(const char *buf, FILE *fp, const char *prefix, size_t size); +int +my_wcswidth(const wchar_t *s, size_t n); + #endif |