summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNicholas Marriott <nicholas.marriott@gmail.com>2016-09-01 20:40:03 +0100
committerNicholas Marriott <nicholas.marriott@gmail.com>2016-09-01 20:40:03 +0100
commit6c94774b70f72952c4c512e4aa59a207ca1c34f2 (patch)
tree04502f9607958f3b4559e3bac5e5668e8d517ab0
parentae297cb487590d0bb8e42e21e28926a1f957ad0b (diff)
Add support for using utf8proc with --enable-utf8proc, useful for platforms
(like OS X) where the system implementation is crap. From Joshua Rubin.
-rw-r--r--Makefile.am7
-rw-r--r--compat.h9
-rw-r--r--compat/utf8proc.c70
-rw-r--r--configure.ac25
-rw-r--r--utf8.c12
5 files changed, 120 insertions, 3 deletions
diff --git a/Makefile.am b/Makefile.am
index 1b411afd..81dc4c71 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -185,7 +185,12 @@ dist_tmux_SOURCES = \
xterm-keys.c
nodist_tmux_SOURCES = osdep-@PLATFORM@.c
-# Pile in all the compat/ stuff that is needed.
+# Add compat file for utf8proc.
+if HAVE_UTF8PROC
+nodist_tmux_SOURCES += compat/utf8proc.c
+endif
+
+# Add compat for missing or broken functions.
if NO_FORKPTY
nodist_tmux_SOURCES += compat/forkpty-@PLATFORM@.c
endif
diff --git a/compat.h b/compat.h
index 7f17e193..28ca9c61 100644
--- a/compat.h
+++ b/compat.h
@@ -279,7 +279,14 @@ int openat(int, const char *, int, ...);
#ifndef HAVE_REALLOCARRAY
/* reallocarray.c */
-void *reallocarray(void *, size_t, size_t size);
+void *reallocarray(void *, size_t, size_t);
+#endif
+
+#ifdef HAVE_UTF8PROC
+/* utf8proc.c */
+int utf8proc_wcwidth(wchar_t);
+int utf8proc_mbtowc(wchar_t *, const char *, size_t);
+int utf8proc_wctomb(char *, wchar_t);
#endif
#ifdef HAVE_GETOPT
diff --git a/compat/utf8proc.c b/compat/utf8proc.c
new file mode 100644
index 00000000..023d762a
--- /dev/null
+++ b/compat/utf8proc.c
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2016 Joshua Rubin <joshua@rubixconsulting.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER
+ * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/types.h>
+
+#include <utf8proc.h>
+
+#include "tmux.h"
+
+int
+utf8proc_wcwidth(wchar_t wc)
+{
+ int cat;
+
+ cat = utf8proc_category(wc);
+ if (cat == UTF8PROC_CATEGORY_CO) {
+ /*
+ * The private use category is where powerline and similar
+ * codepoints are stored, they have "ambiguous" width - use 1.
+ */
+ return (1);
+ }
+ if (cat == UTF8PROC_CATEGORY_SO) {
+ /* Symbols, like emoji, should always use width 1. */
+ return (1);
+ }
+ return (utf8proc_charwidth(wc));
+}
+
+int
+utf8proc_mbtowc(wchar_t *pwc, const char *s, size_t n)
+{
+ utf8proc_ssize_t slen;
+
+ if (s == NULL)
+ return (0);
+
+ /*
+ * *pwc == -1 indicates invalid codepoint
+ * slen < 0 indicates an error
+ */
+ slen = utf8proc_iterate(s, n, pwc);
+ if (*pwc == (wchar_t)-1 || slen < 0)
+ return (-1);
+ return (slen);
+}
+
+int
+utf8proc_wctomb(char *s, wchar_t wc)
+{
+ if (s == NULL)
+ return (0);
+
+ if (!utf8proc_codepoint_valid(wc))
+ return (-1);
+ return (utf8proc_encode_char(wc, s));
+}
diff --git a/configure.ac b/configure.ac
index 636bfcc1..c14a6e61 100644
--- a/configure.ac
+++ b/configure.ac
@@ -152,7 +152,7 @@ if test "x$found_libevent" = xno; then
AC_MSG_ERROR("libevent not found")
fi
-# Look for ncurses
+# Look for ncurses.
PKG_CHECK_MODULES(
LIBNCURSES,
ncurses,
@@ -196,6 +196,29 @@ if test "x$found_utempter" = xyes; then
fi
fi
+# Look for utf8proc.
+AC_ARG_ENABLE(
+ utf8proc,
+ AC_HELP_STRING(--enable-utf8proc, use utf8proc if it is installed),
+ found_utf8proc=$enable_utf8proc,
+ found_utf8proc=yes
+)
+if test "x$found_utf8proc" = xyes; then
+ AC_CHECK_HEADER(utf8proc.h, found_utf8proc=yes, found_utf8proc=no)
+ if test "x$found_utf8proc" = xyes; then
+ AC_SEARCH_LIBS(
+ utf8proc_charwidth,
+ utf8proc,
+ found_utf8proc=yes,
+ found_utf8proc=no
+ )
+ if test "x$found_utf8proc" = xyes; then
+ AC_DEFINE(HAVE_UTF8PROC)
+ fi
+ fi
+fi
+AM_CONDITIONAL(HAVE_UTF8PROC, [test "x$found_utf8proc" = xyes])
+
# Check for b64_ntop.
AC_MSG_CHECKING(for b64_ntop)
AC_TRY_LINK(
diff --git a/utf8.c b/utf8.c
index bb0be34f..eb9b47a9 100644
--- a/utf8.c
+++ b/utf8.c
@@ -109,7 +109,11 @@ utf8_width(wchar_t wc)
{
int width;
+#ifdef HAVE_UTF8PROC
+ width = utf8proc_wcwidth(wc);
+#else
width = wcwidth(wc);
+#endif
if (width < 0 || width > 0xff) {
log_debug("Unicode %04x, wcwidth() %d", wc, width);
@@ -135,7 +139,11 @@ utf8_width(wchar_t wc)
enum utf8_state
utf8_combine(const struct utf8_data *ud, wchar_t *wc)
{
+#ifdef HAVE_UTF8PROC
+ switch (utf8proc_mbtowc(wc, ud->data, ud->size)) {
+#else
switch (mbtowc(wc, ud->data, ud->size)) {
+#endif
case -1:
log_debug("UTF-8 %.*s, mbtowc() %d", (int)ud->size, ud->data,
errno);
@@ -155,7 +163,11 @@ utf8_split(wchar_t wc, struct utf8_data *ud)
char s[MB_LEN_MAX];
int slen;
+#ifdef HAVE_UTF8PROC
+ slen = utf8proc_wctomb(s, wc);
+#else
slen = wctomb(s, wc);
+#endif
if (slen <= 0 || slen > (int)sizeof ud->data)
return (UTF8_ERROR);