summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorStephen Dolan <mu@netsoc.tcd.ie>2013-05-15 00:37:38 +0100
committerStephen Dolan <mu@netsoc.tcd.ie>2013-05-15 00:37:38 +0100
commite83e51eb56a1de6e627d346f027d3ceb09ae3807 (patch)
treeee9a376e0f88ab654fec370e972a9930d3bcdd9d
parentc496a924ce71317aa5560da0cbf5a4524bb8c226 (diff)
'length' function now measures string length in codepoints, not bytes.
-rw-r--r--builtin.c8
-rw-r--r--jq_test.c8
-rw-r--r--jv.c13
-rw-r--r--jv.h3
-rw-r--r--jv_aux.c4
-rw-r--r--jv_print.c2
-rw-r--r--lexer.l2
-rw-r--r--main.c2
-rw-r--r--tests/all.test4
9 files changed, 29 insertions, 17 deletions
diff --git a/builtin.c b/builtin.c
index 09ef9fcd..0b797142 100644
--- a/builtin.c
+++ b/builtin.c
@@ -187,7 +187,7 @@ static jv f_length(jv input) {
} else if (jv_get_kind(input) == JV_KIND_OBJECT) {
return jv_number(jv_object_length(input));
} else if (jv_get_kind(input) == JV_KIND_STRING) {
- return jv_number(jv_string_length(input));
+ return jv_number(jv_string_length_codepoints(input));
} else if (jv_get_kind(input) == JV_KIND_NULL) {
jv_free(input);
return jv_number(0);
@@ -220,7 +220,7 @@ static jv escape_string(jv input, const char* escapings) {
jv ret = jv_string("");
const char* i = jv_string_value(input);
- const char* end = i + jv_string_length(jv_copy(input));
+ const char* end = i + jv_string_length_bytes(jv_copy(input));
const char* cstart;
int c = 0;
while ((i = jvp_utf8_next((cstart = i), end, &c))) {
@@ -299,7 +299,7 @@ static jv f_format(jv input, jv fmt) {
jv line = jv_string("");
const char* s = jv_string_value(input);
- for (int i=0; i<jv_string_length(jv_copy(input)); i++) {
+ for (int i=0; i<jv_string_length_bytes(jv_copy(input)); i++) {
unsigned ch = (unsigned)(unsigned char)*s;
if (ch < 128 && unreserved[ch]) {
line = jv_string_append_buf(line, s, 1);
@@ -346,7 +346,7 @@ static jv f_format(jv input, jv fmt) {
jv line = jv_string("");
const char b64[64 + 1] = CHARS_ALPHANUM "+/";
const char* data = jv_string_value(input);
- int len = jv_string_length(jv_copy(input));
+ int len = jv_string_length_bytes(jv_copy(input));
for (int i=0; i<len; i+=3) {
uint32_t code = 0;
int n = len - i >= 3 ? 3 : len-i;
diff --git a/jq_test.c b/jq_test.c
index c71fdc66..25cea1c6 100644
--- a/jq_test.c
+++ b/jq_test.c
@@ -63,7 +63,7 @@ static void run_jq_tests(FILE *testdata) {
pass = 0;
}
jv as_string = jv_dump_string(jv_copy(expected), rand() & ~JV_PRINT_COLOUR);
- jv reparsed = jv_parse_sized(jv_string_value(as_string), jv_string_length(jv_copy(as_string)));
+ jv reparsed = jv_parse_sized(jv_string_value(as_string), jv_string_length_bytes(jv_copy(as_string)));
assert(jv_equal(jv_copy(expected), jv_copy(reparsed)));
jv_free(as_string);
jv_free(reparsed);
@@ -191,8 +191,8 @@ static void jv_test() {
assert(jv_equal(jv_string("foo"), jv_string_sized("foo", 3)));
char nasty[] = "foo\0";
jv shortstr = jv_string(nasty), longstr = jv_string_sized(nasty, sizeof(nasty));
- assert(jv_string_length(shortstr) == (int)strlen(nasty));
- assert(jv_string_length(longstr) == (int)sizeof(nasty));
+ assert(jv_string_length_bytes(shortstr) == (int)strlen(nasty));
+ assert(jv_string_length_bytes(longstr) == (int)sizeof(nasty));
char a1s[] = "hello", a2s[] = "hello", bs[] = "goodbye";
@@ -213,7 +213,7 @@ static void jv_test() {
for (int i=0; i<(int)sizeof(big); i++) big[i] = 'a';
big[sizeof(big)-1] = 0;
jv str = jv_string_fmt("%s", big);
- assert(jv_string_length(jv_copy(str)) == sizeof(big) - 1);
+ assert(jv_string_length_bytes(jv_copy(str)) == sizeof(big) - 1);
assert(!strcmp(big, jv_string_value(str)));
jv_free(str);
}
diff --git a/jv.c b/jv.c
index b03c024c..9316aec1 100644
--- a/jv.c
+++ b/jv.c
@@ -8,6 +8,7 @@
#include "jv_alloc.h"
#include "jv.h"
+#include "jv_unicode.h"
/*
* Internal refcounting helpers
@@ -530,13 +531,23 @@ jv jv_string(const char* str) {
return jv_string_sized(str, strlen(str));
}
-int jv_string_length(jv j) {
+int jv_string_length_bytes(jv j) {
assert(jv_get_kind(j) == JV_KIND_STRING);
int r = jvp_string_length(jvp_string_ptr(&j.val.nontrivial));
jv_free(j);
return r;
}
+int jv_string_length_codepoints(jv j) {
+ assert(jv_get_kind(j) == JV_KIND_STRING);
+ const char* i = jv_string_value(j);
+ const char* end = i + jv_string_length_bytes(jv_copy(j));
+ int c = 0, len = 0;
+ while ((i = jvp_utf8_next(i, end, &c))) len++;
+ jv_free(j);
+ return len;
+}
+
uint32_t jv_string_hash(jv j) {
assert(jv_get_kind(j) == JV_KIND_STRING);
uint32_t hash = jvp_string_hash(jvp_string_ptr(&j.val.nontrivial));
diff --git a/jv.h b/jv.h
index a64f3acc..d14e5d78 100644
--- a/jv.h
+++ b/jv.h
@@ -82,7 +82,8 @@ jv jv_array_slice(jv, int, int);
jv jv_string(const char*);
jv jv_string_sized(const char*, int);
-int jv_string_length(jv);
+int jv_string_length_bytes(jv);
+int jv_string_length_codepoints(jv);
uint32_t jv_string_hash(jv);
const char* jv_string_value(jv);
jv jv_string_concat(jv, jv);
diff --git a/jv_aux.c b/jv_aux.c
index 68811cd0..0c8cd8b7 100644
--- a/jv_aux.c
+++ b/jv_aux.c
@@ -380,8 +380,8 @@ jv jv_delpaths(jv object, jv paths) {
static int string_cmp(const void* pa, const void* pb){
const jv* a = pa;
const jv* b = pb;
- int lena = jv_string_length(jv_copy(*a));
- int lenb = jv_string_length(jv_copy(*b));
+ int lena = jv_string_length_bytes(jv_copy(*a));
+ int lenb = jv_string_length_bytes(jv_copy(*b));
int minlen = lena < lenb ? lena : lenb;
int r = memcmp(jv_string_value(*a), jv_string_value(*b), minlen);
if (r == 0) r = lena - lenb;
diff --git a/jv_print.c b/jv_print.c
index fc1370ed..5784337e 100644
--- a/jv_print.c
+++ b/jv_print.c
@@ -45,7 +45,7 @@ static void put_space(int n, FILE* fout, jv* strout) {
static void jvp_dump_string(jv str, int ascii_only, FILE* F, jv* S) {
assert(jv_get_kind(str) == JV_KIND_STRING);
const char* i = jv_string_value(str);
- const char* end = i + jv_string_length(jv_copy(str));
+ const char* end = i + jv_string_length_bytes(jv_copy(str));
const char* cstart;
int c = 0;
char buf[32];
diff --git a/lexer.l b/lexer.l
index 633b863a..59e527db 100644
--- a/lexer.l
+++ b/lexer.l
@@ -93,7 +93,7 @@ struct lexer_param;
(\\[^u(]|\\u[a-zA-Z0-9]{0,4})+ {
/* pass escapes to the json parser */
jv escapes = jv_string_fmt("\"%.*s\"", yyleng, yytext);
- yylval->literal = jv_parse_sized(jv_string_value(escapes), jv_string_length(jv_copy(escapes)));
+ yylval->literal = jv_parse_sized(jv_string_value(escapes), jv_string_length_bytes(jv_copy(escapes)));
jv_free(escapes);
return QQSTRING_TEXT;
}
diff --git a/main.c b/main.c
index 77c76548..73359ffe 100644
--- a/main.c
+++ b/main.c
@@ -69,7 +69,7 @@ static void process(jv value, int flags) {
jv result;
while (jv_is_valid(result = jq_next(jq))) {
if ((options & RAW_OUTPUT) && jv_get_kind(result) == JV_KIND_STRING) {
- fwrite(jv_string_value(result), 1, jv_string_length(jv_copy(result)), stdout);
+ fwrite(jv_string_value(result), 1, jv_string_length_bytes(jv_copy(result)), stdout);
jv_free(result);
} else {
int dumpopts;
diff --git a/tests/all.test b/tests/all.test
index 0119dad6..90ab7db7 100644
--- a/tests/all.test
+++ b/tests/all.test
@@ -302,8 +302,8 @@ null
[false, false, false, false, false, false, false, false, true ]
[.[] | length]
-[[], {}, [1,2], {"a":42}, "asdf"]
-[0, 0, 2, 1, 4]
+[[], {}, [1,2], {"a":42}, "asdf", "\u03bc"]
+[0, 0, 2, 1, 4, 1]
map(keys)
[{}, {"abcd":1,"abc":2,"abcde":3}, {"x":1, "z": 3, "y":2}]