summaryrefslogtreecommitdiffstats
path: root/scripts
diff options
context:
space:
mode:
authorStephen Dolan <mu@netsoc.tcd.ie>2013-09-14 19:30:39 +0100
committerStephen Dolan <mu@netsoc.tcd.ie>2013-09-14 19:30:39 +0100
commitdae2422fd18487c89dd79dc54eb4345861ab7bdc (patch)
tree14f046128de9c35a57e3574a3a4365095b680d4f /scripts
parentf98e3e6c26b3b53701e1e6c2ff98ab6227cde3b8 (diff)
parent37cfc912c1f384d177162f8aa706452754d2c6ab (diff)
Merge branch 'libjq'
Conflicts: Makefile.am configure.ac
Diffstat (limited to 'scripts')
-rw-r--r--scripts/gen_utf8_tables.py32
1 files changed, 32 insertions, 0 deletions
diff --git a/scripts/gen_utf8_tables.py b/scripts/gen_utf8_tables.py
new file mode 100644
index 00000000..65d03a2b
--- /dev/null
+++ b/scripts/gen_utf8_tables.py
@@ -0,0 +1,32 @@
+#!/usr/bin/python
+
+# This program was used to generate jv_utf8_tables.gen.h
+
+mask = lambda n: (1 << n) - 1
+
+
+def print_table(type, name, t):
+ print("static const %s %s[] =" % (type, name))
+ first = True
+ for i in range(0,len(t),16):
+ print ((" {" if i == 0 else " ") +
+ ", ".join("0x%02x"%n for n in t[i:i+16]) +
+ ("," if i + 16 < len(t) else "};"))
+
+
+def utf8info(c):
+ if c < 0x80: return 1, mask(7)
+ if 0x80 <= c <= 0xBF: return 255, mask(6)
+ if 0xC0 <= c <= 0xC1: return 0, 0
+ if 0xC2 <= c <= 0xDF: return 2, mask(5)
+ if 0xE0 <= c <= 0xEF: return 3, mask(4)
+ if 0xF0 <= c <= 0xF4: return 4, mask(3)
+ if 0xF4 <= c <= 0xFF: return 0, 0
+
+table = lambda i: [utf8info(c)[i] for c in range(256)]
+
+print("#define UTF8_CONTINUATION_BYTE ((unsigned char)255)")
+
+print_table("unsigned char", "utf8_coding_length", table(0))
+print_table("unsigned char", "utf8_coding_bits", table(1))
+print_table("int", "utf8_first_codepoint", [0, 0x0, 0x80, 0x800, 0x10000])