From 0b9536c75192b45977549a15345925e0d119f9c9 Mon Sep 17 00:00:00 2001
From: pgen
Date: Tue, 6 Jul 2021 00:15:04 +0200
Subject: Fix and rewrite cptoutf8 to make it more readable
---
utf8.c | 38 ++++++++++++++++----------------------
1 file changed, 16 insertions(+), 22 deletions(-)
diff --git a/utf8.c b/utf8.c
index 6873397..67be4e6 100644
--- a/utf8.c
+++ b/utf8.c
@@ -27,39 +27,33 @@
int
cptoutf8(char * utf8_str, uint32_t c)
{
- int len = 0;
- int first = 0;
- int i;
+ int len = 0;
if (c < 0x80)
{
- first = 0;
- len = 1;
+ utf8_str[0] = c;
+ len = 1;
}
else if (c < 0x800)
{
- first = 0xc0;
- len = 2;
+ utf8_str[0] = 0xC0 | ((c >> 6) & 0x1F);
+ utf8_str[1] = 0x80 | (c & 0x3F);
+ len = 2;
}
else if (c < 0x10000)
{
- first = 0xe0;
- len = 3;
+ utf8_str[0] = 0xE0 | ((c >> 12) & 0x0F);
+ utf8_str[1] = 0x80 | ((c >> 6) & 0x3F);
+ utf8_str[2] = 0x80 | (c & 0x3F);
+ len = 3;
}
- else if (c < 0x200000)
+ else if (c < 0x110000)
{
- first = 0xf0;
- len = 4;
- }
-
- if (utf8_str)
- {
- for (i = len - 1; i > 0; --i)
- {
- utf8_str[i] = (c & 0x3f) | 0x80;
- c >>= 6;
- }
- utf8_str[0] = c | first;
+ utf8_str[0] = 0xF0 | ((c >> 18) & 0x07);
+ utf8_str[1] = 0x80 | ((c >> 12) & 0x3F);
+ utf8_str[2] = 0x80 | ((c >> 6) & 0x3F);
+ utf8_str[3] = 0x80 | (c & 0x3F);
+ len = 4;
}
return len;
--
cgit v1.2.3