From 884e6c7d8bc2595e9baade62bc1cfbc77a8a9dd3 Mon Sep 17 00:00:00 2001
From: Nicolas Williams <nico@cryptonector.com>
Date: Fri, 29 Nov 2013 12:50:02 -0600
Subject: Add string slicing

---
 jv.c     | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 jv.h     |  1 +
 jv_aux.c | 16 ++++++++++++++--
 3 files changed, 72 insertions(+), 2 deletions(-)

diff --git a/jv.c b/jv.c
index 671f8d2f..999c776b 100644
--- a/jv.c
+++ b/jv.c
@@ -687,6 +687,63 @@ const char* jv_string_value(jv j) {
   return jvp_string_ptr(&j.val.nontrivial)->data;
 }
 
+jv jv_string_slice(jv j, int start, int end) {
+  assert(jv_get_kind(j) == JV_KIND_STRING);
+  const char *s = jv_string_value(j);
+  int len = jv_string_length_bytes(jv_copy(j));
+  int i;
+  const char *p, *e;
+  int c;
+  jv res;
+
+  if (start < 0) start = len + start;
+  if (end < 0) end = len + end;
+
+  if (start < 0) start = 0;
+  if (start > len) start = len;
+  if (end > len) end = len;
+  if (end < start) end = start;
+  if (start < 0 || start > end || end > len)
+    return jv_invalid_with_msg(jv_string("Invalid string slice indices"));
+  assert(0 <= start && start <= end && end <= len);
+
+  /* Look for byte offset corresponding to start codepoints */
+  for (p = s, i = 0; i < start; i++) {
+    p = jvp_utf8_next(p, s + len, &c);
+    if (p == NULL) {
+      jv_free(j);
+      return jv_string_empty(16);
+    }
+    if (c == -1) {
+      jv_free(j);
+      return jv_invalid_with_msg(jv_string("Invalid UTF-8 string"));
+    }
+  }
+  /* Look for byte offset corresponding to end codepoints */
+  for (e = p; e != NULL && i < end; i++) {
+    e = jvp_utf8_next(e, s + len, &c);
+    if (e == NULL) {
+      e = s + len;
+      break;
+    }
+    if (c == -1) {
+      jv_free(j);
+      return jv_invalid_with_msg(jv_string("Invalid UTF-8 string"));
+    }
+  }
+
+  /*
+   * NOTE: Ideally we should do here what jvp_array_slice() does instead
+   * of allocating a new string as we do!  However, we assume NUL-
+   * terminated strings all over, and in the jv API, so for now we waste
+   * memory like a drunken navy programmer.  There's probably nothing we
+   * can do about it.
+   */
+  res = jv_string_sized(p, e - p);
+  jv_free(j);
+  return res;
+}
+
 jv jv_string_concat(jv a, jv b) {
   jvp_string* sb = jvp_string_ptr(&b.val.nontrivial);
   jvp_string_append(&a.val.nontrivial, sb->data, jvp_string_length(sb));
diff --git a/jv.h b/jv.h
index 921345c4..1362acca 100644
--- a/jv.h
+++ b/jv.h
@@ -82,6 +82,7 @@ int jv_string_length_bytes(jv);
 int jv_string_length_codepoints(jv);
 unsigned long jv_string_hash(jv);
 const char* jv_string_value(jv);
+jv jv_string_slice(jv j, int start, int end);
 jv jv_string_concat(jv, jv);
 jv jv_string_fmt(const char*, ...);
 jv jv_string_append_codepoint(jv a, uint32_t c);
diff --git a/jv_aux.c b/jv_aux.c
index 89f36e69..f3260c9d 100644
--- a/jv_aux.c
+++ b/jv_aux.c
@@ -3,15 +3,19 @@
 #include <assert.h>
 #include "jv_alloc.h"
 
-static int parse_slice(jv array, jv slice, int* pstart, int* pend) {
+static int parse_slice(jv j, jv slice, int* pstart, int* pend) {
   // Array slices
-  int len = jv_array_length(jv_copy(array));
   jv start_jv = jv_object_get(jv_copy(slice), jv_string("start"));
   jv end_jv = jv_object_get(slice, jv_string("end"));
   if (jv_get_kind(start_jv) == JV_KIND_NULL) {
     jv_free(start_jv);
     start_jv = jv_number(0);
   }
+  int len;
+  if (jv_get_kind(j) == JV_KIND_ARRAY)
+    len = jv_array_length(jv_copy(j));
+  else
+    len = jv_string_length_codepoints(jv_copy(j));
   if (jv_get_kind(end_jv) == JV_KIND_NULL) {
     jv_free(end_jv);
     end_jv = jv_number(len);
@@ -61,6 +65,14 @@ jv jv_get(jv t, jv k) {
       v = jv_invalid_with_msg(jv_string_fmt("Start and end indices of an array slice must be numbers"));
       jv_free(t);
     }
+  } else if (jv_get_kind(t) == JV_KIND_STRING && jv_get_kind(k) == JV_KIND_OBJECT) {
+    int start, end;
+    if (parse_slice(t, k, &start, &end)) {
+      v = jv_string_slice(t, start, end);
+    } else {
+      v = jv_invalid_with_msg(jv_string_fmt("Start and end indices of an string slice must be numbers"));
+      jv_free(t);
+    }
   } else if (jv_get_kind(t) == JV_KIND_NULL && 
              (jv_get_kind(k) == JV_KIND_STRING || 
               jv_get_kind(k) == JV_KIND_NUMBER || 
-- 
cgit v1.2.3