summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorStephen Dolan <mu@netsoc.tcd.ie>2012-09-11 14:52:10 +0100
committerStephen Dolan <mu@netsoc.tcd.ie>2012-09-11 14:52:10 +0100
commit51a44edc63bf4f1749458faf5ae631e0d5023ba9 (patch)
treec241153719b4869308a7ab44c00a4e3ba3cefaf5
parent062a6aa6d7b9c8ead18cc9cbbe0f458a7e2b7db9 (diff)
JSON stream parser.
Allow multiple values on input as concatenated JSON objects, possibly separated by whitespace.
-rw-r--r--c/jv.c7
-rw-r--r--c/jv.h2
-rw-r--r--c/jv_dtoa.h4
-rw-r--r--c/jv_parse.c116
-rw-r--r--c/jv_parse.h17
-rw-r--r--c/jv_print.c2
-rw-r--r--c/main.c61
7 files changed, 153 insertions, 56 deletions
diff --git a/c/jv.c b/c/jv.c
index 214354ce..786e7b26 100644
--- a/c/jv.c
+++ b/c/jv.c
@@ -103,6 +103,13 @@ jv jv_invalid_get_msg(jv inv) {
return x;
}
+int jv_invalid_has_msg(jv inv) {
+ jv msg = jv_invalid_get_msg(inv);
+ int r = jv_get_kind(msg) != JV_KIND_NULL;
+ jv_free(msg);
+ return r;
+}
+
static void jvp_invalid_free(jv_complex* x) {
if (jvp_refcnt_dec(x)) {
jv_free(((jvp_invalid*)x->ptr)->errmsg);
diff --git a/c/jv.h b/c/jv.h
index 3c557c45..48ae0e0f 100644
--- a/c/jv.h
+++ b/c/jv.h
@@ -52,6 +52,8 @@ int jv_equal(jv, jv);
jv jv_invalid();
jv jv_invalid_with_msg(jv);
jv jv_invalid_get_msg(jv);
+int jv_invalid_has_msg(jv);
+
jv jv_null();
jv jv_true();
diff --git a/c/jv_dtoa.h b/c/jv_dtoa.h
index e9346c0e..3bafcf47 100644
--- a/c/jv_dtoa.h
+++ b/c/jv_dtoa.h
@@ -1,4 +1,5 @@
-
+#ifndef JV_DTOA_H
+#define JV_DTOA_H
#define Kmax 7
struct Bigint;
@@ -18,3 +19,4 @@ void jvp_freedtoa(struct dtoa_context* C, char *s);
#define JVP_DTOA_FMT_MAX_LEN 32
char* jvp_dtoa_fmt(struct dtoa_context* C, register char *b, double x);
+#endif
diff --git a/c/jv_parse.c b/c/jv_parse.c
index 7fd4d2d2..e4565ef7 100644
--- a/c/jv_parse.c
+++ b/c/jv_parse.c
@@ -22,11 +22,15 @@ void jv_parser_init(struct jv_parser* p) {
p->tokenbuf = 0;
p->tokenlen = p->tokenpos = 0;
p->st = JV_PARSER_NORMAL;
+ p->curr_buf = 0;
+ p->curr_buf_length = p->curr_buf_pos = p->curr_buf_is_partial = 0;
jvp_dtoa_context_init(&p->dtoa);
}
void jv_parser_free(struct jv_parser* p) {
jv_free(p->next);
+ for (int i=0; i<p->stackpos; i++)
+ jv_free(p->stack[i]);
free(p->stack);
free(p->tokenbuf);
jvp_dtoa_context_free(&p->dtoa);
@@ -271,13 +275,25 @@ static chclass classify(char c) {
}
+static presult OK = "output produced";
+static int check_done(struct jv_parser* p, jv* out) {
+ if (p->stackpos == 0 && jv_is_valid(p->next)) {
+ *out = p->next;
+ p->next = jv_invalid();
+ return 1;
+ } else {
+ return 0;
+ }
+}
-static pfunc scan(struct jv_parser* p, char ch) {
+static pfunc scan(struct jv_parser* p, char ch, jv* out) {
+ presult answer = 0;
if (p->st == JV_PARSER_NORMAL) {
chclass cls = classify(ch);
if (cls != LITERAL) {
TRY(check_literal(p));
+ if (check_done(p, out)) answer = OK;
}
switch (cls) {
case LITERAL:
@@ -294,10 +310,12 @@ static pfunc scan(struct jv_parser* p, char ch) {
case INVALID:
return "Invalid character";
}
+ if (check_done(p, out)) answer = OK;
} else {
if (ch == '"' && p->st == JV_PARSER_STRING) {
TRY(found_string(p));
p->st = JV_PARSER_NORMAL;
+ if (check_done(p, out)) answer = OK;
} else {
tokenadd(p, ch);
if (ch == '\\' && p->st == JV_PARSER_STRING) {
@@ -307,43 +325,87 @@ static pfunc scan(struct jv_parser* p, char ch) {
}
}
}
- return 0;
+ return answer;
}
-static pfunc finish(struct jv_parser* p) {
- if (p->st != JV_PARSER_NORMAL)
- return "Unfinished string";
- TRY(check_literal(p));
+void jv_parser_set_buf(struct jv_parser* p, const char* buf, int length, int is_partial) {
+ assert((p->curr_buf == 0 || p->curr_buf_pos == p->curr_buf_length)
+ && "previous buffer not exhausted");
+ p->curr_buf = buf;
+ p->curr_buf_length = length;
+ p->curr_buf_pos = 0;
+ p->curr_buf_is_partial = is_partial;
+}
- if (p->stackpos != 0)
- return "Unfinished JSON term";
-
- // this will happen on the empty string
- if (!jv_is_valid(p->next))
- return "Expected JSON value";
-
- return 0;
+jv jv_parser_next(struct jv_parser* p) {
+ assert(p->curr_buf && "a buffer must be provided");
+ jv value;
+ presult msg = 0;
+ while (!msg && p->curr_buf_pos < p->curr_buf_length) {
+ char ch = p->curr_buf[p->curr_buf_pos++];
+ msg = scan(p, ch, &value);
+ }
+ if (msg == OK) {
+ return value;
+ } else if (msg) {
+ return jv_invalid_with_msg(jv_string(msg));
+ } else if (p->curr_buf_is_partial) {
+ assert(p->curr_buf_pos == p->curr_buf_length);
+ // need another buffer
+ return jv_invalid();
+ } else {
+ assert(p->curr_buf_pos == p->curr_buf_length);
+ // at EOF
+ if (p->st != JV_PARSER_NORMAL)
+ return jv_invalid_with_msg(jv_string("Unfinished string"));
+ if ((msg = check_literal(p)))
+ return jv_invalid_with_msg(jv_string(msg));
+ if (p->stackpos != 0)
+ return jv_invalid_with_msg(jv_string("Unfinished JSON term"));
+ // p->next is either invalid (nothing here but no syntax error)
+ // or valid (this is the value). either way it's the thing to return
+ value = p->next;
+ p->next = jv_invalid();
+ return value;
+ }
}
jv jv_parse_sized(const char* string, int length) {
struct jv_parser parser;
jv_parser_init(&parser);
-
- const char* p = string;
- char ch;
- presult msg = 0;
- while (msg == 0 && p < string + length) {
- ch = *p++;
- msg = scan(&parser, ch);
- }
- if (msg == 0) msg = finish(&parser);
- jv value;
- if (msg) {
- value = jv_invalid_with_msg(jv_string_fmt("%s (while parsing '%s')", msg, string));
+ jv_parser_set_buf(&parser, string, length, 0);
+ jv value = jv_parser_next(&parser);
+ if (jv_is_valid(value)) {
+ jv next = jv_parser_next(&parser);
+ if (jv_is_valid(next)) {
+ // multiple JSON values, we only wanted one
+ jv_free(value);
+ jv_free(next);
+ value = jv_invalid_with_msg(jv_string("Unexpected extra JSON values"));
+ } else if (jv_invalid_has_msg(jv_copy(next))) {
+ // parser error after the first JSON value
+ jv_free(value);
+ value = next;
+ } else {
+ // a single valid JSON value
+ jv_free(next);
+ }
+ } else if (jv_invalid_has_msg(jv_copy(value))) {
+ // parse error, we'll return it
} else {
- value = jv_copy(parser.next);
+ // no value at all
+ jv_free(value);
+ value = jv_invalid_with_msg(jv_string("Expected JSON value"));
}
jv_parser_free(&parser);
+
+ if (!jv_is_valid(value) && jv_invalid_has_msg(jv_copy(value))) {
+ jv msg = jv_invalid_get_msg(value);
+ value = jv_invalid_with_msg(jv_string_fmt("%s (while parsing '%s')",
+ jv_string_value(msg),
+ string));
+ jv_free(msg);
+ }
return value;
}
diff --git a/c/jv_parse.h b/c/jv_parse.h
index 10270766..5b8e7cdf 100644
--- a/c/jv_parse.h
+++ b/c/jv_parse.h
@@ -1,5 +1,12 @@
-
+#ifndef JV_PARSE_H
+#define JV_PARSE_H
+#include "jv_dtoa.h"
struct jv_parser {
+ const char* curr_buf;
+ int curr_buf_length;
+ int curr_buf_pos;
+ int curr_buf_is_partial;
+
jv* stack;
int stackpos;
int stacklen;
@@ -17,3 +24,11 @@ struct jv_parser {
JV_PARSER_STRING_ESCAPE
} st;
};
+
+void jv_parser_init(struct jv_parser* p);
+void jv_parser_free(struct jv_parser* p);
+
+void jv_parser_set_buf(struct jv_parser* p, const char* buf, int length, int is_partial);
+
+jv jv_parser_next(struct jv_parser* p);
+#endif
diff --git a/c/jv_print.c b/c/jv_print.c
index c2f7f58d..e122a5c8 100644
--- a/c/jv_print.c
+++ b/c/jv_print.c
@@ -10,7 +10,7 @@ static void jv_dump_string(jv str, int ascii_only) {
assert(jv_get_kind(str) == JV_KIND_STRING);
const char* i = jv_string_value(str);
const char* end = i + jv_string_length(jv_copy(str));
- int c;
+ int c = 0;
while ((i = jvp_utf8_next(i, end, &c))) {
assert(c != -1);
int unicode_escape = 0;
diff --git a/c/main.c b/c/main.c
index f6d9e67f..05288d1b 100644
--- a/c/main.c
+++ b/c/main.c
@@ -4,6 +4,7 @@
#include "parser.tab.h"
#include "builtin.h"
#include "jv.h"
+#include "jv_parse.h"
#include "locfile.h"
int jq_parse(struct locfile* source, block* answer);
@@ -31,31 +32,6 @@ struct bytecode* jq_compile(const char* str) {
return bc;
}
-
-void run_program(struct bytecode* bc) {
-#if JQ_DEBUG
- dump_disassembly(0, bc);
- printf("\n");
-#endif
- char buf[409600];
- fgets(buf, sizeof(buf), stdin);
- jv value = jv_parse(buf);
- if (!jv_is_valid(value)) {
- assert(0 && "couldn't parse input"); //FIXME
- }
- jq_init(bc, value);
- jv result;
- while (jv_is_valid(result = jq_next())) {
- jv_dump(result);
- printf("\n");
- }
- jv_free(result);
- #if JQ_DEBUG
- printf("end of results\n");
- #endif
- jq_teardown();
-}
-
int skipline(const char* buf) {
int p = 0;
while (buf[p] == ' ' || buf[p] == '\t') p++;
@@ -128,7 +104,40 @@ int main(int argc, char* argv[]) {
if (argc == 1) { run_tests(); return 0; }
struct bytecode* bc = jq_compile(argv[1]);
if (!bc) return 1;
- run_program(bc);
+
+#if JQ_DEBUG
+ dump_disassembly(0, bc);
+ printf("\n");
+#endif
+
+ struct jv_parser parser;
+ jv_parser_init(&parser);
+ while (!feof(stdin)) {
+ char buf[4096];
+ if (!fgets(buf, sizeof(buf), stdin)) buf[0] = 0;
+ jv_parser_set_buf(&parser, buf, strlen(buf), !feof(stdin));
+ jv value;
+ while (jv_is_valid((value = jv_parser_next(&parser)))) {
+ jq_init(bc, value);
+ jv result;
+ while (jv_is_valid(result = jq_next())) {
+ jv_dump(result);
+ printf("\n");
+ }
+ jv_free(result);
+ jq_teardown();
+ }
+ if (jv_invalid_has_msg(jv_copy(value))) {
+ jv msg = jv_invalid_get_msg(value);
+ fprintf(stderr, "parse error: %s\n", jv_string_value(msg));
+ jv_free(msg);
+ break;
+ } else {
+ jv_free(value);
+ }
+ }
+ jv_parser_free(&parser);
+
bytecode_free(bc);
return 0;
}