summaryrefslogtreecommitdiffstats
path: root/utf8.c
diff options
context:
space:
mode:
authorThomas Adam <thomas@xteddy.org>2017-06-05 11:59:38 +0100
committerThomas Adam <thomas@xteddy.org>2017-06-05 11:59:38 +0100
commite62e17d0461cfb0bfb55ae3c9c7a3815235298fb (patch)
tree258f2a1e09eb8553842383636a9f6f8a3f75074a /utf8.c
parent1c83c0ebcd483c668f02f5f7a8cb80f8a0dc162c (diff)
parent2f04108f3a35271ef60b3028699b6363e1714140 (diff)
Merge branch 'obsd-master'
Conflicts: tmux.1 window.c
Diffstat (limited to 'utf8.c')
-rw-r--r--utf8.c24
1 files changed, 24 insertions, 0 deletions
diff --git a/utf8.c b/utf8.c
index 8afdac97..a91da360 100644
--- a/utf8.c
+++ b/utf8.c
@@ -232,6 +232,30 @@ utf8_stravis(char **dst, const char *src, int flag)
return (len);
}
+/* Does this string contain anything that isn't valid UTF-8? */
+int
+utf8_isvalid(const char *s)
+{
+ struct utf8_data ud;
+ const char *end;
+ enum utf8_state more;
+
+ end = s + strlen(s);
+ while (s < end) {
+ if ((more = utf8_open(&ud, *s)) == UTF8_MORE) {
+ while (++s < end && more == UTF8_MORE)
+ more = utf8_append(&ud, *s);
+ if (more == UTF8_DONE)
+ continue;
+ return (0);
+ }
+ if (*s < 0x20 || *s > 0x7e)
+ return (0);
+ s++;
+ }
+ return (1);
+}
+
/*
* Sanitize a string, changing any UTF-8 characters to '_'. Caller should free
* the returned string. Anything not valid printable ASCII or UTF-8 is