diff options
Diffstat (limited to 'src/jv_parse.c')
-rw-r--r-- | src/jv_parse.c | 852 |
1 files changed, 852 insertions, 0 deletions
diff --git a/src/jv_parse.c b/src/jv_parse.c new file mode 100644 index 00000000..3102ed4f --- /dev/null +++ b/src/jv_parse.c @@ -0,0 +1,852 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> +#include "jv.h" +#include "jv_dtoa.h" +#include "jv_unicode.h" +#include "jv_alloc.h" +#include "jv_dtoa.h" + +typedef const char* presult; + +#define TRY(x) do {presult msg__ = (x); if (msg__) return msg__; } while(0) +#ifdef __GNUC__ +#define pfunc __attribute__((warn_unused_result)) presult +#else +#define pfunc presult +#endif + +enum last_seen { + JV_LAST_NONE = 0, + JV_LAST_OPEN_ARRAY = '[', + JV_LAST_OPEN_OBJECT = '{', + JV_LAST_COLON = ':', + JV_LAST_COMMA = ',', + JV_LAST_VALUE = 'V', +}; + +struct jv_parser { + const char* curr_buf; + int curr_buf_length; + int curr_buf_pos; + int curr_buf_is_partial; + int eof; + unsigned bom_strip_position; + + int flags; + + jv* stack; // parser + int stackpos; // parser + int stacklen; // both (optimization; it's really pathlen for streaming) + jv path; // streamer + enum last_seen last_seen; // streamer + jv output; // streamer + jv next; // both + + char* tokenbuf; + int tokenpos; + int tokenlen; + + int line, column; + + struct dtoa_context dtoa; + + enum { + JV_PARSER_NORMAL, + JV_PARSER_STRING, + JV_PARSER_STRING_ESCAPE, + JV_PARSER_WAITING_FOR_RS // parse error, waiting for RS + } st; + unsigned int last_ch_was_ws:1; +}; + + +static void parser_init(struct jv_parser* p, int flags) { + p->flags = flags; + if ((p->flags & JV_PARSE_STREAMING)) { + p->path = jv_array(); + } else { + p->path = jv_invalid(); + p->flags &= ~(JV_PARSE_STREAM_ERRORS); + } + p->stack = 0; + p->stacklen = p->stackpos = 0; + p->last_seen = JV_LAST_NONE; + p->output = jv_invalid(); + p->next = jv_invalid(); + p->tokenbuf = 0; + p->tokenlen = p->tokenpos = 0; + if ((p->flags & JV_PARSE_SEQ)) + p->st = JV_PARSER_WAITING_FOR_RS; + else + p->st = JV_PARSER_NORMAL; + p->eof = 0; + p->curr_buf = 0; + p->curr_buf_length = p->curr_buf_pos = p->curr_buf_is_partial = 0; + p->bom_strip_position = 0; + p->last_ch_was_ws = 0; + p->line = 1; + p->column = 0; + jvp_dtoa_context_init(&p->dtoa); +} + +static void parser_reset(struct jv_parser* p) { + if ((p->flags & JV_PARSE_STREAMING)) { + jv_free(p->path); + p->path = jv_array(); + p->stacklen = 0; + } + p->last_seen = JV_LAST_NONE; + jv_free(p->output); + p->output = jv_invalid(); + jv_free(p->next); + p->next = jv_invalid(); + for (int i=0; i<p->stackpos; i++) + jv_free(p->stack[i]); + p->stackpos = 0; + p->tokenpos = 0; + p->st = JV_PARSER_NORMAL; +} + +static void parser_free(struct jv_parser* p) { + parser_reset(p); + jv_free(p->path); + jv_free(p->output); + jv_mem_free(p->stack); + jv_mem_free(p->tokenbuf); + jvp_dtoa_context_free(&p->dtoa); +} + +static pfunc value(struct jv_parser* p, jv val) { + if ((p->flags & JV_PARSE_STREAMING)) { + if (jv_is_valid(p->next) || p->last_seen == JV_LAST_VALUE) + return "Expected separator between values"; + if (p->stacklen > 0) + p->last_seen = JV_LAST_VALUE; + else + p->last_seen = JV_LAST_NONE; + } else { + if (jv_is_valid(p->next)) return "Expected separator between values"; + } + jv_free(p->next); + p->next = val; + return 0; +} + +static void push(struct jv_parser* p, jv v) { + assert(p->stackpos <= p->stacklen); + if (p->stackpos == p->stacklen) { + p->stacklen = p->stacklen * 2 + 10; + p->stack = jv_mem_realloc(p->stack, p->stacklen * sizeof(jv)); + } + assert(p->stackpos < p->stacklen); + p->stack[p->stackpos++] = v; +} + +static pfunc parse_token(struct jv_parser* p, char ch) { + switch (ch) { + case '[': + if (jv_is_valid(p->next)) return "Expected separator between values"; + push(p, jv_array()); + break; + + case '{': + if (jv_is_valid(p->next)) return "Expected separator between values"; + push(p, jv_object()); + break; + + case ':': + if (!jv_is_valid(p->next)) + return "Expected string key before ':'"; + if (p->stackpos == 0 || jv_get_kind(p->stack[p->stackpos-1]) != JV_KIND_OBJECT) + return "':' not as part of an object"; + if (jv_get_kind(p->next) != JV_KIND_STRING) + return "Object keys must be strings"; + push(p, p->next); + p->next = jv_invalid(); + break; + + case ',': + if (!jv_is_valid(p->next)) + return "Expected value before ','"; + if (p->stackpos == 0) + return "',' not as part of an object or array"; + if (jv_get_kind(p->stack[p->stackpos-1]) == JV_KIND_ARRAY) { + p->stack[p->stackpos-1] = jv_array_append(p->stack[p->stackpos-1], p->next); + p->next = jv_invalid(); + } else if (jv_get_kind(p->stack[p->stackpos-1]) == JV_KIND_STRING) { + assert(p->stackpos > 1 && jv_get_kind(p->stack[p->stackpos-2]) == JV_KIND_OBJECT); + p->stack[p->stackpos-2] = jv_object_set(p->stack[p->stackpos-2], + p->stack[p->stackpos-1], p->next); + p->stackpos--; + p->next = jv_invalid(); + } else { + // this case hits on input like {"a", "b"} + return "Objects must consist of key:value pairs"; + } + break; + + case ']': + if (p->stackpos == 0 || jv_get_kind(p->stack[p->stackpos-1]) != JV_KIND_ARRAY) + return "Unmatched ']'"; + if (jv_is_valid(p->next)) { + p->stack[p->stackpos-1] = jv_array_append(p->stack[p->stackpos-1], p->next); + p->next = jv_invalid(); + } else { + if (jv_array_length(jv_copy(p->stack[p->stackpos-1])) != 0) { + // this case hits on input like [1,2,3,] + return "Expected another array element"; + } + } + jv_free(p->next); + p->next = p->stack[--p->stackpos]; + break; + + case '}': + if (p->stackpos == 0) + return "Unmatched '}'"; + if (jv_is_valid(p->next)) { + if (jv_get_kind(p->stack[p->stackpos-1]) != JV_KIND_STRING) + return "Objects must consist of key:value pairs"; + assert(p->stackpos > 1 && jv_get_kind(p->stack[p->stackpos-2]) == JV_KIND_OBJECT); + p->stack[p->stackpos-2] = jv_object_set(p->stack[p->stackpos-2], + p->stack[p->stackpos-1], p->next); + p->stackpos--; + p->next = jv_invalid(); + } else { + if (jv_get_kind(p->stack[p->stackpos-1]) != JV_KIND_OBJECT) + return "Unmatched '}'"; + if (jv_object_length(jv_copy(p->stack[p->stackpos-1])) != 0) + return "Expected another key-value pair"; + } + jv_free(p->next); + p->next = p->stack[--p->stackpos]; + break; + } + return 0; +} + +static pfunc stream_token(struct jv_parser* p, char ch) { + jv_kind k; + jv last; + + switch (ch) { + case '[': + if (jv_is_valid(p->next)) + return "Expected a separator between values"; + p->path = jv_array_append(p->path, jv_number(0)); // push + p->last_seen = JV_LAST_OPEN_ARRAY; + p->stacklen++; + break; + + case '{': + if (p->last_seen == JV_LAST_VALUE) + return "Expected a separator between values"; + // Push object key: null, since we don't know it yet + p->path = jv_array_append(p->path, jv_null()); // push + p->last_seen = JV_LAST_OPEN_OBJECT; + p->stacklen++; + break; + + case ':': + if (p->stacklen == 0 || jv_get_kind(jv_array_get(jv_copy(p->path), p->stacklen - 1)) == JV_KIND_NUMBER) + return "':' not as part of an object"; + if (!jv_is_valid(p->next) || p->last_seen == JV_LAST_NONE) + return "Expected string key before ':'"; + if (jv_get_kind(p->next) != JV_KIND_STRING) + return "Object keys must be strings"; + if (p->last_seen != JV_LAST_VALUE) + return "':' should follow a key"; + p->last_seen = JV_LAST_COLON; + p->path = jv_array_set(p->path, p->stacklen - 1, p->next); + p->next = jv_invalid(); + break; + + case ',': + if (p->last_seen != JV_LAST_VALUE) + return "Expected value before ','"; + if (p->stacklen == 0) + return "',' not as part of an object or array"; + last = jv_array_get(jv_copy(p->path), p->stacklen - 1); + k = jv_get_kind(last); + if (k == JV_KIND_NUMBER) { + int idx = jv_number_value(last); + + if (jv_is_valid(p->next)) { + p->output = JV_ARRAY(jv_copy(p->path), p->next); + p->next = jv_invalid(); + } + p->path = jv_array_set(p->path, p->stacklen - 1, jv_number(idx + 1)); + p->last_seen = JV_LAST_COMMA; + } else if (k == JV_KIND_STRING) { + if (jv_is_valid(p->next)) { + p->output = JV_ARRAY(jv_copy(p->path), p->next); + p->next = jv_invalid(); + } + p->path = jv_array_set(p->path, p->stacklen - 1, jv_true()); // ready for another name:value pair + p->last_seen = JV_LAST_COMMA; + } else { + assert(k == JV_KIND_NULL); + // this case hits on input like {,} + // make sure to handle input like {"a", "b"} and {"a":, ...} + jv_free(last); + return "Objects must consist of key:value pairs"; + } + jv_free(last); + break; + + case ']': + if (p->stacklen == 0) + return "Unmatched ']' at the top-level"; + if (p->last_seen == JV_LAST_COMMA) + return "Expected another array element"; + if (p->last_seen == JV_LAST_OPEN_ARRAY) + assert(!jv_is_valid(p->next)); + + last = jv_array_get(jv_copy(p->path), p->stacklen - 1); + k = jv_get_kind(last); + jv_free(last); + + if (k != JV_KIND_NUMBER) + return "Unmatched ']' in the middle of an object"; + if (jv_is_valid(p->next)) { + p->output = JV_ARRAY(jv_copy(p->path), p->next, jv_true()); + p->next = jv_invalid(); + } else if (p->last_seen != JV_LAST_OPEN_ARRAY) { + p->output = JV_ARRAY(jv_copy(p->path)); + } + + p->path = jv_array_slice(p->path, 0, --(p->stacklen)); // pop + //assert(!jv_is_valid(p->next)); + jv_free(p->next); + p->next = jv_invalid(); + + if (p->last_seen == JV_LAST_OPEN_ARRAY) + p->output = JV_ARRAY(jv_copy(p->path), jv_array()); // Empty arrays are leaves + + if (p->stacklen == 0) + p->last_seen = JV_LAST_NONE; + else + p->last_seen = JV_LAST_VALUE; + break; + + case '}': + if (p->stacklen == 0) + return "Unmatched '}' at the top-level"; + if (p->last_seen == JV_LAST_COMMA) + return "Expected another key:value pair"; + if (p->last_seen == JV_LAST_OPEN_OBJECT) + assert(!jv_is_valid(p->next)); + + last = jv_array_get(jv_copy(p->path), p->stacklen - 1); + k = jv_get_kind(last); + jv_free(last); + if (k == JV_KIND_NUMBER) + return "Unmatched '}' in the middle of an array"; + + if (jv_is_valid(p->next)) { + if (k != JV_KIND_STRING) + return "Objects must consist of key:value pairs"; + p->output = JV_ARRAY(jv_copy(p->path), p->next, jv_true()); + p->next = jv_invalid(); + } else { + // Perhaps {"a":[]} + if (p->last_seen == JV_LAST_COLON) + // Looks like {"a":} + return "Missing value in key:value pair"; + if (p->last_seen == JV_LAST_COMMA) + // Looks like {"a":0,} + return "Expected another key-value pair"; + if (p->last_seen == JV_LAST_OPEN_ARRAY) + return "Unmatched '}' in the middle of an array"; + if (p->last_seen != JV_LAST_VALUE && p->last_seen != JV_LAST_OPEN_OBJECT) + return "Unmatched '}'"; + if (p->last_seen != JV_LAST_OPEN_OBJECT) + p->output = JV_ARRAY(jv_copy(p->path)); + } + p->path = jv_array_slice(p->path, 0, --(p->stacklen)); // pop + jv_free(p->next); + p->next = jv_invalid(); + + if (p->last_seen == JV_LAST_OPEN_OBJECT) + p->output = JV_ARRAY(jv_copy(p->path), jv_object()); // Empty arrays are leaves + + if (p->stacklen == 0) + p->last_seen = JV_LAST_NONE; + else + p->last_seen = JV_LAST_VALUE; + break; + } + return 0; +} + +static void tokenadd(struct jv_parser* p, char c) { + assert(p->tokenpos <= p->tokenlen); + if (p->tokenpos == p->tokenlen) { + p->tokenlen = p->tokenlen*2 + 256; + p->tokenbuf = jv_mem_realloc(p->tokenbuf, p->tokenlen); + } + assert(p->tokenpos < p->tokenlen); + p->tokenbuf[p->tokenpos++] = c; +} + +static int unhex4(char* hex) { + int r = 0; + for (int i=0; i<4; i++) { + char c = *hex++; + int n; + if ('0' <= c && c <= '9') n = c - '0'; + else if ('a' <= c && c <= 'f') n = c - 'a' + 10; + else if ('A' <= c && c <= 'F') n = c - 'A' + 10; + else return -1; + r <<= 4; + r |= n; + } + return r; +} + +static pfunc found_string(struct jv_parser* p) { + char* in = p->tokenbuf; + char* out = p->tokenbuf; + char* end = p->tokenbuf + p->tokenpos; + + while (in < end) { + char c = *in++; + if (c == '\\') { + if (in >= end) + return "Expected escape character at end of string"; + c = *in++; + switch (c) { + case '\\': + case '"': + case '/': *out++ = c; break; + case 'b': *out++ = '\b'; break; + case 'f': *out++ = '\f'; break; + case 't': *out++ = '\t'; break; + case 'n': *out++ = '\n'; break; + case 'r': *out++ = '\r'; break; + + case 'u': + /* ahh, the complicated case */ + if (in + 4 > end) + return "Invalid \\uXXXX escape"; + int hexvalue = unhex4(in); + if (hexvalue < 0) + return "Invalid characters in \\uXXXX escape"; + unsigned long codepoint = (unsigned long)hexvalue; + in += 4; + if (0xD800 <= codepoint && codepoint <= 0xDBFF) { + /* who thought UTF-16 surrogate pairs were a good idea? */ + if (in + 6 > end || in[0] != '\\' || in[1] != 'u') + return "Invalid \\uXXXX\\uXXXX surrogate pair escape"; + unsigned long surrogate = unhex4(in+2); + if (!(0xDC00 <= surrogate && surrogate <= 0xDFFF)) + return "Invalid \\uXXXX\\uXXXX surrogate pair escape"; + in += 6; + codepoint = 0x10000 + (((codepoint - 0xD800) << 10) + |(surrogate - 0xDC00)); + } + if (codepoint > 0x10FFFF) + codepoint = 0xFFFD; // U+FFFD REPLACEMENT CHARACTER + out += jvp_utf8_encode(codepoint, out); + break; + + default: + return "Invalid escape"; + } + } else { + if (c > 0 && c < 0x001f) + return "Invalid string: control characters from U+0000 through U+001F must be escaped"; + *out++ = c; + } + } + TRY(value(p, jv_string_sized(p->tokenbuf, out - p->tokenbuf))); + p->tokenpos = 0; + return 0; +} + +static pfunc check_literal(struct jv_parser* p) { + if (p->tokenpos == 0) return 0; + + const char* pattern = 0; + int plen; + jv v; + switch (p->tokenbuf[0]) { + case 't': pattern = "true"; plen = 4; v = jv_true(); break; + case 'f': pattern = "false"; plen = 5; v = jv_false(); break; + case 'n': pattern = "null"; plen = 4; v = jv_null(); break; + } + if (pattern) { + if (p->tokenpos != plen) return "Invalid literal"; + for (int i=0; i<plen; i++) + if (p->tokenbuf[i] != pattern[i]) + return "Invalid literal"; + TRY(value(p, v)); + } else { + // FIXME: better parser + p->tokenbuf[p->tokenpos] = 0; // FIXME: invalid + char* end = 0; + double d = jvp_strtod(&p->dtoa, p->tokenbuf, &end); + if (end == 0 || *end != 0) + return "Invalid numeric literal"; + TRY(value(p, jv_number(d))); + } + p->tokenpos = 0; + return 0; +} + +typedef enum { + LITERAL, + WHITESPACE, + STRUCTURE, + QUOTE, + INVALID +} chclass; + +static chclass classify(char c) { + switch (c) { + case ' ': + case '\t': + case '\r': + case '\n': + return WHITESPACE; + case '"': + return QUOTE; + case '[': + case ',': + case ']': + case '{': + case ':': + case '}': + return STRUCTURE; + default: + return LITERAL; + } +} + + +static const presult OK = "output produced"; + +static int parse_check_done(struct jv_parser* p, jv* out) { + if (p->stackpos == 0 && jv_is_valid(p->next)) { + *out = p->next; + p->next = jv_invalid(); + return 1; + } else { + return 0; + } +} + +static int stream_check_done(struct jv_parser* p, jv* out) { + if (p->stacklen == 0 && jv_is_valid(p->next)) { + *out = JV_ARRAY(jv_copy(p->path),p->next); + p->next = jv_invalid(); + return 1; + } else if (jv_is_valid(p->output)) { + if (jv_array_length(jv_copy(p->output)) > 2) { + // At end of an array or object, necessitating one more output by + // which to indicate this + *out = jv_array_slice(jv_copy(p->output), 0, 2); + p->output = jv_array_slice(p->output, 0, 1); // arrange one more output + } else { + // No further processing needed + *out = p->output; + p->output = jv_invalid(); + } + return 1; + } else { + return 0; + } +} + +static int parse_check_truncation(struct jv_parser* p) { + return ((p->flags & JV_PARSE_SEQ) && !p->last_ch_was_ws && (p->stackpos > 0 || p->tokenpos > 0 || jv_get_kind(p->next) == JV_KIND_NUMBER)); +} + +static int stream_check_truncation(struct jv_parser* p) { + jv_kind k = jv_get_kind(p->next); + return (p->stacklen > 0 || k == JV_KIND_NUMBER || k == JV_KIND_TRUE || k == JV_KIND_FALSE || k == JV_KIND_NULL); +} + +static int parse_is_top_num(struct jv_parser* p) { + return (p->stackpos == 0 && jv_get_kind(p->next) == JV_KIND_NUMBER); +} + +static int stream_is_top_num(struct jv_parser* p) { + return (p->stacklen == 0 && jv_get_kind(p->next) == JV_KIND_NUMBER); +} + +#define check_done(p, o) \ + (((p)->flags & JV_PARSE_STREAMING) ? stream_check_done((p), (o)) : parse_check_done((p), (o))) + +#define token(p, ch) \ + (((p)->flags & JV_PARSE_STREAMING) ? stream_token((p), (ch)) : parse_token((p), (ch))) + +#define check_truncation(p) \ + (((p)->flags & JV_PARSE_STREAMING) ? stream_check_truncation((p)) : parse_check_truncation((p))) + +#define is_top_num(p) \ + (((p)->flags & JV_PARSE_STREAMING) ? stream_is_top_num((p)) : parse_is_top_num((p))) + +static pfunc scan(struct jv_parser* p, char ch, jv* out) { + p->column++; + if (ch == '\n') { + p->line++; + p->column = 0; + } + if (ch == '\036' /* ASCII RS; see draft-ietf-json-sequence-07 */) { + if (check_truncation(p)) { + if (check_literal(p) == 0 && is_top_num(p)) + return "Potentially truncated top-level numeric value"; + return "Truncated value"; + } + TRY(check_literal(p)); + if (p->st == JV_PARSER_NORMAL && check_done(p, out)) + return OK; + // shouldn't happen? + assert(!jv_is_valid(*out)); + parser_reset(p); + jv_free(*out); + *out = jv_invalid(); + return OK; + } + presult answer = 0; + p->last_ch_was_ws = 0; + if (p->st == JV_PARSER_NORMAL) { + chclass cls = classify(ch); + if (cls == WHITESPACE) + p->last_ch_was_ws = 1; + if (cls != LITERAL) { + TRY(check_literal(p)); + if (check_done(p, out)) answer = OK; + } + switch (cls) { + case LITERAL: + tokenadd(p, ch); + break; + case WHITESPACE: + break; + case QUOTE: + p->st = JV_PARSER_STRING; + break; + case STRUCTURE: + TRY(token(p, ch)); + break; + case INVALID: + return "Invalid character"; + } + if (check_done(p, out)) answer = OK; + } else { + if (ch == '"' && p->st == JV_PARSER_STRING) { + TRY(found_string(p)); + p->st = JV_PARSER_NORMAL; + if (check_done(p, out)) answer = OK; + } else { + tokenadd(p, ch); + if (ch == '\\' && p->st == JV_PARSER_STRING) { + p->st = JV_PARSER_STRING_ESCAPE; + } else { + p->st = JV_PARSER_STRING; + } + } + } + return answer; +} + +struct jv_parser* jv_parser_new(int flags) { + struct jv_parser* p = jv_mem_alloc(sizeof(struct jv_parser)); + parser_init(p, flags); + p->flags = flags; + return p; +} + +void jv_parser_free(struct jv_parser* p) { + parser_free(p); + jv_mem_free(p); +} + +static const unsigned char UTF8_BOM[] = {0xEF,0xBB,0xBF}; + +int jv_parser_remaining(struct jv_parser* p) { + if (p->curr_buf == 0) + return 0; + return (p->curr_buf_length - p->curr_buf_pos); +} + +void jv_parser_set_buf(struct jv_parser* p, const char* buf, int length, int is_partial) { + assert((p->curr_buf == 0 || p->curr_buf_pos == p->curr_buf_length) + && "previous buffer not exhausted"); + while (length > 0 && p->bom_strip_position < sizeof(UTF8_BOM)) { + if ((unsigned char)*buf == UTF8_BOM[p->bom_strip_position]) { + // matched a BOM character + buf++; + length--; + p->bom_strip_position++; + } else { + if (p->bom_strip_position == 0) { + // no BOM in this document + p->bom_strip_position = sizeof(UTF8_BOM); + } else { + // malformed BOM (prefix present, rest missing) + p->bom_strip_position = 0xff; + } + } + } + p->curr_buf = buf; + p->curr_buf_length = length; + p->curr_buf_pos = 0; + p->curr_buf_is_partial = is_partial; +} + +static jv make_error(struct jv_parser*, const char *, ...) JV_PRINTF_LIKE(2, 3); + +static jv make_error(struct jv_parser* p, const char *fmt, ...) { + va_list ap; + va_start(ap, fmt); + jv e = jv_string_vfmt(fmt, ap); + va_end(ap); + if ((p->flags & JV_PARSE_STREAM_ERRORS)) + return JV_ARRAY(e, jv_copy(p->path)); + return jv_invalid_with_msg(e); +} + +jv jv_parser_next(struct jv_parser* p) { + if (p->eof) + return jv_invalid(); + if (!p->curr_buf) + return jv_invalid(); // Need a buffer + if (p->bom_strip_position == 0xff) { + if (!(p->flags & JV_PARSE_SEQ)) + return jv_invalid_with_msg(jv_string("Malformed BOM")); + p->st =JV_PARSER_WAITING_FOR_RS; + parser_reset(p); + } + jv value = jv_invalid(); + if ((p->flags & JV_PARSE_STREAMING) && stream_check_done(p, &value)) + return value; + char ch; + presult msg = 0; + while (!msg && p->curr_buf_pos < p->curr_buf_length) { + ch = p->curr_buf[p->curr_buf_pos++]; + if (p->st == JV_PARSER_WAITING_FOR_RS) { + if (ch == '\n') { + p->line++; + p->column = 0; + } else { + p->column++; + } + if (ch == '\036') + p->st = JV_PARSER_NORMAL; + continue; // need to resync, wait for RS + } + msg = scan(p, ch, &value); + } + if (msg == OK) { + return value; + } else if (msg) { + jv_free(value); + if (ch != '\036' && (p->flags & JV_PARSE_SEQ)) { + // Skip to the next RS + p->st = JV_PARSER_WAITING_FOR_RS; + value = make_error(p, "%s at line %d, column %d (need RS to resync)", msg, p->line, p->column); + parser_reset(p); + return value; + } + value = make_error(p, "%s at line %d, column %d", msg, p->line, p->column); + parser_reset(p); + if (!(p->flags & JV_PARSE_SEQ)) { + // We're not parsing a JSON text sequence; throw this buffer away. + // XXX We should fail permanently here. + p->curr_buf = 0; + p->curr_buf_pos = 0; + } // Else ch must be RS; don't clear buf so we can start parsing again after this ch + return value; + } else if (p->curr_buf_is_partial) { + assert(p->curr_buf_pos == p->curr_buf_length); + // need another buffer + return jv_invalid(); + } else { + // at EOF + p->eof = 1; + assert(p->curr_buf_pos == p->curr_buf_length); + jv_free(value); + if (p->st == JV_PARSER_WAITING_FOR_RS) + return make_error(p, "Unfinished abandoned text at EOF at line %d, column %d", p->line, p->column); + if (p->st != JV_PARSER_NORMAL) { + value = make_error(p, "Unfinished string at EOF at line %d, column %d", p->line, p->column); + parser_reset(p); + p->st = JV_PARSER_WAITING_FOR_RS; + return value; + } + if ((msg = check_literal(p))) { + value = make_error(p, "%s at EOF at line %d, column %d", msg, p->line, p->column); + parser_reset(p); + p->st = JV_PARSER_WAITING_FOR_RS; + return value; + } + if (((p->flags & JV_PARSE_STREAMING) && p->stacklen != 0) || + (!(p->flags & JV_PARSE_STREAMING) && p->stackpos != 0)) { + value = make_error(p, "Unfinished JSON term at EOF at line %d, column %d", p->line, p->column); + parser_reset(p); + p->st = JV_PARSER_WAITING_FOR_RS; + return value; + } + // p->next is either invalid (nothing here, but no syntax error) + // or valid (this is the value). either way it's the thing to return + if ((p->flags & JV_PARSE_STREAMING) && jv_is_valid(p->next)) { + value = JV_ARRAY(jv_copy(p->path), p->next); // except in streaming mode we've got to make it [path,value] + } else { + value = p->next; + } + p->next = jv_invalid(); + if ((p->flags & JV_PARSE_SEQ) && !p->last_ch_was_ws && jv_get_kind(value) == JV_KIND_NUMBER) { + jv_free(value); + return make_error(p, "Potentially truncated top-level numeric value at EOF at line %d, column %d", p->line, p->column); + } + return value; + } +} + +jv jv_parse_sized(const char* string, int length) { + struct jv_parser parser; + parser_init(&parser, 0); + jv_parser_set_buf(&parser, string, length, 0); + jv value = jv_parser_next(&parser); + if (jv_is_valid(value)) { + jv next = jv_parser_next(&parser); + if (jv_is_valid(next)) { + // multiple JSON values, we only wanted one + jv_free(value); + jv_free(next); + value = jv_invalid_with_msg(jv_string("Unexpected extra JSON values")); + } else if (jv_invalid_has_msg(jv_copy(next))) { + // parser error after the first JSON value + jv_free(value); + value = next; + } else { + // a single valid JSON value + jv_free(next); + } + } else if (jv_invalid_has_msg(jv_copy(value))) { + // parse error, we'll return it + } else { + // no value at all + jv_free(value); + value = jv_invalid_with_msg(jv_string("Expected JSON value")); + } + parser_free(&parser); + + if (!jv_is_valid(value) && jv_invalid_has_msg(jv_copy(value))) { + jv msg = jv_invalid_get_msg(value); + value = jv_invalid_with_msg(jv_string_fmt("%s (while parsing '%s')", + jv_string_value(msg), + string)); + jv_free(msg); + } + return value; +} + +jv jv_parse(const char* string) { + return jv_parse_sized(string, strlen(string)); +} |