summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorpgen <p.gen.progs@gmail.com>2024-02-18 00:14:57 +0100
committerpgen <p.gen.progs@gmail.com>2024-02-18 23:14:02 +0100
commit5c63131b72c214ea8e5f5505f49ae7fff6fab44b (patch)
tree6b05afdd1183fea636a150518c8e0d4f9ad1cb9a
parent58d6a889133d9b6820471bebed029edce6a03212 (diff)
Initial support for UTF-8 ext. grapheme clusters
-rw-r--r--README.rst7
-rwxr-xr-xsmenu.16
-rwxr-xr-xsmenu.c36
-rw-r--r--tests/utf8/data73
-rw-r--r--tests/utf8/data82
-rw-r--r--tests/utf8/t0009.good15
l---------tests/utf8/t0009.in1
-rw-r--r--tests/utf8/t0009.tst4
-rw-r--r--tests/utf8/t0010.good13
l---------tests/utf8/t0010.in1
-rw-r--r--tests/utf8/t0010.tst4
-rw-r--r--utils.c35
-rw-r--r--utils.h3
13 files changed, 110 insertions, 20 deletions
diff --git a/README.rst b/README.rst
index e0445d1..5d5680f 100644
--- a/README.rst
+++ b/README.rst
@@ -30,8 +30,11 @@ previous contents of the terminal are neither modified nor lost.
I've tried to make it as easy to use as possible.
It should work on all terminals managed in the ``terminfo`` database.
-The ``UTF-8`` encoding is supported, including for large characters,
-with the exception of grapheme groups for the time being.
+``UTF-8`` encoding is supported, including for double-width characters.
+Support for extended grapheme clusters is experimental but seems to work,
+with best results when appropriate terminals are used such as wezterm
+or iTerm.
+
The encoding of ``UTF-8`` glyphs must also be in canonical form, as no
effort will be made to put them in this form.
diff --git a/smenu.1 b/smenu.1
index fe06b46..aa74110 100755
--- a/smenu.1
+++ b/smenu.1
@@ -245,6 +245,12 @@ This can block sole shifting operations.
With many terminal emulators, it is possible to use the mouse to interact
with the screen content.
+\fBWarning\fP, if groups of extended graphemes are present in the input,
+mouse-based selection is only accurate if the terminal correctly displays
+these graphemes.
+An example of a non-functional terminal is xterm, an example of a
+functional terminal is wezterm.
+
When the mouse is supported, the cursor can turn into an arrow (but
not always) and the mouse can then be used as a point and click device
as follows:
diff --git a/smenu.c b/smenu.c
index e09a0ae..a8bec6d 100755
--- a/smenu.c
+++ b/smenu.c
@@ -4025,7 +4025,7 @@ get_message_lines(char *message,
/* If needed, update the message maximum width. */
/* """""""""""""""""""""""""""""""""""""""""""" */
- n = wcswidth((w = utf8_strtowcs(str)), utf8_strlen(str));
+ n = my_wcswidth((w = utf8_strtowcs(str)), utf8_strlen(str));
free(w);
if (n > *message_max_width)
@@ -4046,7 +4046,7 @@ get_message_lines(char *message,
{
ll_append(message_lines_list, xstrdup(ptr));
- n = wcswidth((w = utf8_strtowcs(ptr)), utf8_strlen(ptr));
+ n = my_wcswidth((w = utf8_strtowcs(ptr)), utf8_strlen(ptr));
free(w);
if (n > *message_max_width)
@@ -4134,7 +4134,7 @@ build_metadata(term_t *term, long count, win_t *win)
/* has already been utf8_validated/repaired. */
/* """"""""""""""""""""""""""""""""""""""""""""""""""""""""""" */
word_len = mbstowcs(NULL, word_a[i].str, 0);
- word_width = wcswidth((w = utf8_strtowcs(word_a[i].str)), word_len);
+ word_width = my_wcswidth((w = utf8_strtowcs(word_a[i].str)), word_len);
/* Manage the case where the word is larger than the terminal width. */
/* """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" */
@@ -4144,7 +4144,7 @@ build_metadata(term_t *term, long count, win_t *win)
/* """"""""""""""""""""""""""""""" */
do
{
- word_width = wcswidth(w, word_len--);
+ word_width = my_wcswidth(w, word_len--);
} while (word_len > 0 && word_width >= term->ncolumns - 2);
}
free(w);
@@ -4718,9 +4718,9 @@ disp_message(ll_t *message_lines_list,
/* Adjust size and len if the terminal is not large enough. */
/* """""""""""""""""""""""""""""""""""""""""""""""""""""""" */
- size = wcswidth(w, len);
+ size = my_wcswidth(w, len);
while (len > 0 && size > term->ncolumns)
- size = wcswidth(w, --len);
+ size = my_wcswidth(w, --len);
free(w);
@@ -7052,7 +7052,7 @@ gutter_action(char *ctx_name,
win->gutter_a[i] = xcalloc(1, mblength + 1);
memcpy(win->gutter_a[i], gutter + offset, mblength);
- n = wcswidth((w = utf8_strtowcs(win->gutter_a[i])), 1);
+ n = my_wcswidth((w = utf8_strtowcs(win->gutter_a[i])), 1);
free(w);
if (n > 1)
@@ -7798,7 +7798,7 @@ da_options_action(char *ctx_name,
ctxopt_ctx_disp_usage(ctx_name, exit_after);
}
- n = wcswidth((w = utf8_strtowcs(daccess.left)), 1);
+ n = my_wcswidth((w = utf8_strtowcs(daccess.left)), 1);
free(w);
if (n > 1)
@@ -7823,7 +7823,7 @@ da_options_action(char *ctx_name,
ctxopt_ctx_disp_usage(ctx_name, exit_after);
}
- n = wcswidth((w = utf8_strtowcs(daccess.right)), 1);
+ n = my_wcswidth((w = utf8_strtowcs(daccess.right)), 1);
free(w);
if (n > 1)
@@ -7976,7 +7976,7 @@ da_options_action(char *ctx_name,
ctxopt_ctx_disp_usage(ctx_name, exit_after);
}
- n = wcswidth((w = utf8_strtowcs(daccess.num_sep)), 1);
+ n = my_wcswidth((w = utf8_strtowcs(daccess.num_sep)), 1);
free(w);
if (n > 1)
@@ -11817,7 +11817,7 @@ main(int argc, char *argv[])
}
s = (long)mbstowcs(NULL, word->str, 0);
- s = wcswidth((tmpw = utf8_strtowcs(word->str)), s);
+ s = my_wcswidth((tmpw = utf8_strtowcs(word->str)), s);
free(tmpw);
if (s > col_max_size[col_index])
@@ -11845,7 +11845,7 @@ main(int argc, char *argv[])
/* """"""""""""""""""""""""""""" */
size = (long)mbstowcs(NULL, word->str, 0);
- if ((size = wcswidth((tmpw = utf8_strtowcs(word->str)), size))
+ if ((size = my_wcswidth((tmpw = utf8_strtowcs(word->str)), size))
> tab_max_size)
tab_max_size = size;
@@ -12135,7 +12135,7 @@ main(int argc, char *argv[])
s1 = (long)strlen(word_a[wi].str);
word_width = mbstowcs(NULL, word_a[wi].str, 0);
- s2 = wcswidth((w = utf8_strtowcs(word_a[wi].str)), word_width);
+ s2 = my_wcswidth((w = utf8_strtowcs(word_a[wi].str)), word_width);
free(w);
/* Use the 0x05 character as a placeholder to preserve the internal */
@@ -12179,7 +12179,7 @@ main(int argc, char *argv[])
s1 = (long)strlen(word_a[wi].str);
word_width = mbstowcs(NULL, word_a[wi].str, 0);
- s2 = wcswidth((w = utf8_strtowcs(word_a[wi].str)), word_width);
+ s2 = my_wcswidth((w = utf8_strtowcs(word_a[wi].str)), word_width);
free(w);
temp = xcalloc(1, tab_real_max_size + s1 - s2 + 1);
memset(temp, ' ', tab_max_size + s1 - s2);
@@ -14153,7 +14153,7 @@ main(int argc, char *argv[])
str = ((output_t *)(node->data))->output_str;
fprintf(old_stdout, "%s", str);
- width += wcswidth((w = utf8_strtowcs(str)), 65535);
+ width += my_wcswidth((w = utf8_strtowcs(str)), 65535);
free(w);
free(str);
free(node->data);
@@ -14161,7 +14161,7 @@ main(int argc, char *argv[])
if (win.sel_sep != NULL)
{
fprintf(old_stdout, "%s", win.sel_sep);
- width += wcswidth((w = utf8_strtowcs(win.sel_sep)), 65535);
+ width += my_wcswidth((w = utf8_strtowcs(win.sel_sep)), 65535);
free(w);
}
else
@@ -14175,7 +14175,7 @@ main(int argc, char *argv[])
str = ((output_t *)(node->data))->output_str;
fprintf(old_stdout, "%s", str);
- width += wcswidth((w = utf8_strtowcs(str)), 65535);
+ width += my_wcswidth((w = utf8_strtowcs(str)), 65535);
free(w);
free(str);
free(node->data);
@@ -14216,7 +14216,7 @@ main(int argc, char *argv[])
rtrim(output_str, " \t", 0);
}
- width = wcswidth((w = utf8_strtowcs(output_str)), 65535);
+ width = my_wcswidth((w = utf8_strtowcs(output_str)), 65535);
free(w);
/* And print it. */
diff --git a/tests/utf8/data7 b/tests/utf8/data7
new file mode 100644
index 0000000..aa81599
--- /dev/null
+++ b/tests/utf8/data7
@@ -0,0 +1,3 @@
+x \U01F1FA\U01F1F8 x
+\uf09f87ab\uf09f87b7 x \uf09f87ab\uf09f87b7
+x \U01F1FA\U01F1F8 x
diff --git a/tests/utf8/data8 b/tests/utf8/data8
new file mode 100644
index 0000000..7fc45b5
--- /dev/null
+++ b/tests/utf8/data8
@@ -0,0 +1,2 @@
+x y \ue29da4\uefb88f\ue2808d\uf09f94a5 x y
+\uf09f91a8\ue2808d\uf09f91a9\ue2808d\uf09f91a7 x y πŸ‘©\U00200dπŸ’»
diff --git a/tests/utf8/t0009.good b/tests/utf8/t0009.good
new file mode 100644
index 0000000..10598e9
--- /dev/null
+++ b/tests/utf8/t0009.good
@@ -0,0 +1,15 @@
+$ OUT=$(smenu -c t0009.in)
+
+x πŸ‡ΊπŸ‡Έ x
+
+πŸ‡«πŸ‡· x πŸ‡«πŸ‡·
+
+x πŸ‡ΊπŸ‡Έ x
+6:07 7:07
+$
+
+$ echo ":$OUT:"
+
+:x:
+
+$ exit 0
diff --git a/tests/utf8/t0009.in b/tests/utf8/t0009.in
new file mode 120000
index 0000000..592b7e2
--- /dev/null
+++ b/tests/utf8/t0009.in
@@ -0,0 +1 @@
+data7 \ No newline at end of file
diff --git a/tests/utf8/t0009.tst b/tests/utf8/t0009.tst
new file mode 100644
index 0000000..9063d3a
--- /dev/null
+++ b/tests/utf8/t0009.tst
@@ -0,0 +1,4 @@
+\S[300]\s[80]OUT=$(smenu -c t0009.in)
+\S[300]\s[200]ljjl\r
+\S[300]\s[80]echo ":$\s[80]OUT:"
+exit 0
diff --git a/tests/utf8/t0010.good b/tests/utf8/t0010.good
new file mode 100644
index 0000000..3f58828
--- /dev/null
+++ b/tests/utf8/t0010.good
@@ -0,0 +1,13 @@
+$ OUT=$(smenu -c t0010.in)
+
+x y ❀️‍πŸ”₯ x y
+
+πŸ‘¨β€πŸ‘©β€πŸ‘§ x y πŸ‘©β€πŸ’»
+11:07 12:07 13:07
+$
+
+$ echo ":$OUT:"
+
+:πŸ‘©β€πŸ’»:
+
+$ exit 0
diff --git a/tests/utf8/t0010.in b/tests/utf8/t0010.in
new file mode 120000
index 0000000..3ce4e61
--- /dev/null
+++ b/tests/utf8/t0010.in
@@ -0,0 +1 @@
+data8 \ No newline at end of file
diff --git a/tests/utf8/t0010.tst b/tests/utf8/t0010.tst
new file mode 100644
index 0000000..b66f6b9
--- /dev/null
+++ b/tests/utf8/t0010.tst
@@ -0,0 +1,4 @@
+\S[300]\s[80]OUT=$(smenu -c t0010.in)
+\S[300]\s[200]lljhhlll\r
+\S[300]\s[80]echo ":$\s[80]OUT:"
+exit 0
diff --git a/utils.c b/utils.c
index 010d16a..59adb75 100644
--- a/utils.c
+++ b/utils.c
@@ -20,6 +20,7 @@
#include <stdarg.h>
#include <wctype.h>
#include "xmalloc.h"
+#include "wchar.h"
#include "list.h"
#include "utf8.h"
#include "utils.h"
@@ -467,3 +468,37 @@ hexdump(const char *buf, FILE *fp, const char *prefix, size_t size)
fprintf(fp, "\n");
}
}
+
+/* ===================================================================== */
+/* Version of wcswidth which tries to support extended grapheme clusters */
+/* by taking into zero width characters. */
+/* ===================================================================== */
+int
+my_wcswidth(const wchar_t *s, size_t n)
+{
+ int len = 0;
+ int l = 0;
+ int m = 0;
+
+ if (s == NULL || *s == L'\0')
+ return 0;
+
+ while (*s && m < n)
+ {
+ if ((l = wcwidth(*s)) >= 0)
+ {
+ /* Do not count zero-width-length glyphs. */
+ /* """""""""""""""""""""""""""""""""""""" */
+ if (*s != L'\x200d' && *(s + 1) != L'\x200d' && *(s + 1) != L'\xfe0f'
+ && *(s + 1) != L'\x20e3')
+ len += l;
+ }
+ else
+ return -1; /* wcwidth returned -1. */
+
+ s++;
+ m++;
+ }
+
+ return len;
+}
diff --git a/utils.h b/utils.h
index 775bad1..c77d4c3 100644
--- a/utils.h
+++ b/utils.h
@@ -83,4 +83,7 @@ strprint(char const *s);
void
hexdump(const char *buf, FILE *fp, const char *prefix, size_t size);
+int
+my_wcswidth(const wchar_t *s, size_t n);
+
#endif