From 884e6c7d8bc2595e9baade62bc1cfbc77a8a9dd3 Mon Sep 17 00:00:00 2001 From: Nicolas Williams Date: Fri, 29 Nov 2013 12:50:02 -0600 Subject: Add string slicing --- jv.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ jv.h | 1 + jv_aux.c | 16 ++++++++++++++-- 3 files changed, 72 insertions(+), 2 deletions(-) diff --git a/jv.c b/jv.c index 671f8d2f..999c776b 100644 --- a/jv.c +++ b/jv.c @@ -687,6 +687,63 @@ const char* jv_string_value(jv j) { return jvp_string_ptr(&j.val.nontrivial)->data; } +jv jv_string_slice(jv j, int start, int end) { + assert(jv_get_kind(j) == JV_KIND_STRING); + const char *s = jv_string_value(j); + int len = jv_string_length_bytes(jv_copy(j)); + int i; + const char *p, *e; + int c; + jv res; + + if (start < 0) start = len + start; + if (end < 0) end = len + end; + + if (start < 0) start = 0; + if (start > len) start = len; + if (end > len) end = len; + if (end < start) end = start; + if (start < 0 || start > end || end > len) + return jv_invalid_with_msg(jv_string("Invalid string slice indices")); + assert(0 <= start && start <= end && end <= len); + + /* Look for byte offset corresponding to start codepoints */ + for (p = s, i = 0; i < start; i++) { + p = jvp_utf8_next(p, s + len, &c); + if (p == NULL) { + jv_free(j); + return jv_string_empty(16); + } + if (c == -1) { + jv_free(j); + return jv_invalid_with_msg(jv_string("Invalid UTF-8 string")); + } + } + /* Look for byte offset corresponding to end codepoints */ + for (e = p; e != NULL && i < end; i++) { + e = jvp_utf8_next(e, s + len, &c); + if (e == NULL) { + e = s + len; + break; + } + if (c == -1) { + jv_free(j); + return jv_invalid_with_msg(jv_string("Invalid UTF-8 string")); + } + } + + /* + * NOTE: Ideally we should do here what jvp_array_slice() does instead + * of allocating a new string as we do! However, we assume NUL- + * terminated strings all over, and in the jv API, so for now we waste + * memory like a drunken navy programmer. There's probably nothing we + * can do about it. + */ + res = jv_string_sized(p, e - p); + jv_free(j); + return res; +} + jv jv_string_concat(jv a, jv b) { jvp_string* sb = jvp_string_ptr(&b.val.nontrivial); jvp_string_append(&a.val.nontrivial, sb->data, jvp_string_length(sb)); diff --git a/jv.h b/jv.h index 921345c4..1362acca 100644 --- a/jv.h +++ b/jv.h @@ -82,6 +82,7 @@ int jv_string_length_bytes(jv); int jv_string_length_codepoints(jv); unsigned long jv_string_hash(jv); const char* jv_string_value(jv); +jv jv_string_slice(jv j, int start, int end); jv jv_string_concat(jv, jv); jv jv_string_fmt(const char*, ...); jv jv_string_append_codepoint(jv a, uint32_t c); diff --git a/jv_aux.c b/jv_aux.c index 89f36e69..f3260c9d 100644 --- a/jv_aux.c +++ b/jv_aux.c @@ -3,15 +3,19 @@ #include #include "jv_alloc.h" -static int parse_slice(jv array, jv slice, int* pstart, int* pend) { +static int parse_slice(jv j, jv slice, int* pstart, int* pend) { // Array slices - int len = jv_array_length(jv_copy(array)); jv start_jv = jv_object_get(jv_copy(slice), jv_string("start")); jv end_jv = jv_object_get(slice, jv_string("end")); if (jv_get_kind(start_jv) == JV_KIND_NULL) { jv_free(start_jv); start_jv = jv_number(0); } + int len; + if (jv_get_kind(j) == JV_KIND_ARRAY) + len = jv_array_length(jv_copy(j)); + else + len = jv_string_length_codepoints(jv_copy(j)); if (jv_get_kind(end_jv) == JV_KIND_NULL) { jv_free(end_jv); end_jv = jv_number(len); @@ -61,6 +65,14 @@ jv jv_get(jv t, jv k) { v = jv_invalid_with_msg(jv_string_fmt("Start and end indices of an array slice must be numbers")); jv_free(t); } + } else if (jv_get_kind(t) == JV_KIND_STRING && jv_get_kind(k) == JV_KIND_OBJECT) { + int start, end; + if (parse_slice(t, k, &start, &end)) { + v = jv_string_slice(t, start, end); + } else { + v = jv_invalid_with_msg(jv_string_fmt("Start and end indices of an string slice must be numbers")); + jv_free(t); + } } else if (jv_get_kind(t) == JV_KIND_NULL && (jv_get_kind(k) == JV_KIND_STRING || jv_get_kind(k) == JV_KIND_NUMBER || -- cgit v1.2.3