summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNicolas Williams <nico@cryptonector.com>2014-10-10 22:19:38 -0500
committerNicolas Williams <nico@cryptonector.com>2014-10-12 08:44:40 -0500
commit89791a000ba8fd614d8a8fa59a5ba76f21ea4d1d (patch)
tree6aab1b7f73f996652a8e11bd2c74ad633ae55925
parent3411167c03fba129d44a0e7c9699767c9e8fd5cd (diff)
Add support for JSON sequence MIME type
Per draft-ietf-json-text-sequence-07 (which soon will be published as an RFC).
-rw-r--r--docs/content/3.manual/manual.yml11
-rw-r--r--jv.h2
-rw-r--r--jv_parse.c72
-rw-r--r--main.c37
-rwxr-xr-xtests/run40
5 files changed, 141 insertions, 21 deletions
diff --git a/docs/content/3.manual/manual.yml b/docs/content/3.manual/manual.yml
index a89feb67..31c7b2f0 100644
--- a/docs/content/3.manual/manual.yml
+++ b/docs/content/3.manual/manual.yml
@@ -92,6 +92,17 @@ sections:
Output the jq version and exit with zero.
+ * `--seq`:
+
+ Use the `application/json-seq` MIME type scheme for separating
+ JSON texts in jq's input and output. This means that an ASCII
+ RS (record separator) character is printed before each value on
+ output and an ASCII LF (line feed) is printed after every
+ output. Input JSON texts that fail to parse are ignored (but
+ warned about), discarding all subsequent input until the next
+ RS. This more also parses the output of jq without the `--seq`
+ option.
+
* `--slurp`/`-s`:
Instead of running the filter for each JSON object in the
diff --git a/jv.h b/jv.h
index 465070ad..08b89aec 100644
--- a/jv.h
+++ b/jv.h
@@ -156,6 +156,8 @@ void jv_dump(jv, int flags);
void jv_show(jv, int flags);
jv jv_dump_string(jv, int flags);
+#define JV_PARSE_SEQ 1
+
jv jv_parse(const char* string);
jv jv_parse_sized(const char* string, int length);
diff --git a/jv_parse.c b/jv_parse.c
index 5b703fd7..e534e93b 100644
--- a/jv_parse.c
+++ b/jv_parse.c
@@ -24,6 +24,8 @@ struct jv_parser {
int curr_buf_is_partial;
unsigned bom_strip_position;
+ int flags;
+
jv* stack;
int stackpos;
int stacklen;
@@ -40,12 +42,15 @@ struct jv_parser {
enum {
JV_PARSER_NORMAL,
JV_PARSER_STRING,
- JV_PARSER_STRING_ESCAPE
+ JV_PARSER_STRING_ESCAPE,
+ JV_PARSER_WAITING_FOR_RS // parse error, waiting for RS
} st;
+ unsigned int last_ch_was_ws:1;
};
static void parser_init(struct jv_parser* p) {
+ p->flags = 0;
p->stack = 0;
p->stacklen = p->stackpos = 0;
p->next = jv_invalid();
@@ -60,10 +65,18 @@ static void parser_init(struct jv_parser* p) {
jvp_dtoa_context_init(&p->dtoa);
}
-static void parser_free(struct jv_parser* p) {
+static void parser_reset(struct jv_parser* p) {
jv_free(p->next);
+ p->next = jv_invalid();
for (int i=0; i<p->stackpos; i++)
jv_free(p->stack[i]);
+ p->stackpos = 0;
+ p->tokenpos = 0;
+ p->st = JV_PARSER_NORMAL;
+}
+
+static void parser_free(struct jv_parser* p) {
+ parser_reset(p);
jv_mem_free(p->stack);
jv_mem_free(p->tokenbuf);
jvp_dtoa_context_free(&p->dtoa);
@@ -330,9 +343,26 @@ static pfunc scan(struct jv_parser* p, char ch, jv* out) {
p->line++;
p->column = 0;
}
+ if (ch == '\036' /* ASCII RS; see draft-ietf-json-sequence-07 */) {
+ TRY(check_literal(p));
+ if (p->st == JV_PARSER_NORMAL && check_done(p, out)) {
+ if ((p->flags & JV_PARSE_SEQ) && !p->last_ch_was_ws && jv_get_kind(*out) == JV_KIND_NUMBER) {
+ jv_free(*out);
+ *out = jv_invalid();
+ return "Potentially truncated top-level numeric value";
+ }
+ return OK;
+ }
+ parser_reset(p);
+ *out = jv_invalid();
+ return "Truncated value";
+ }
presult answer = 0;
+ p->last_ch_was_ws = 0;
if (p->st == JV_PARSER_NORMAL) {
chclass cls = classify(ch);
+ if (cls == WHITESPACE)
+ p->last_ch_was_ws = 1;
if (cls != LITERAL) {
TRY(check_literal(p));
if (check_done(p, out)) answer = OK;
@@ -373,6 +403,7 @@ static pfunc scan(struct jv_parser* p, char ch, jv* out) {
struct jv_parser* jv_parser_new(int flags) {
struct jv_parser* p = jv_mem_alloc(sizeof(struct jv_parser));
parser_init(p);
+ p->flags = flags;
return p;
}
@@ -412,14 +443,22 @@ jv jv_parser_next(struct jv_parser* p) {
assert(p->curr_buf && "a buffer must be provided");
if (p->bom_strip_position == 0xff) return jv_invalid_with_msg(jv_string("Malformed BOM"));
jv value;
+ char ch;
presult msg = 0;
while (!msg && p->curr_buf_pos < p->curr_buf_length) {
- char ch = p->curr_buf[p->curr_buf_pos++];
+ ch = p->curr_buf[p->curr_buf_pos++];
+ if (ch != '\036' && p->st == JV_PARSER_WAITING_FOR_RS)
+ continue; // need to resync, wait for RS
msg = scan(p, ch, &value);
}
if (msg == OK) {
return value;
} else if (msg) {
+ parser_reset(p);
+ if (ch != '\036' && (p->flags & JV_PARSE_SEQ)) {
+ p->st = JV_PARSER_WAITING_FOR_RS;
+ return jv_invalid_with_msg(jv_string_fmt("%s at line %d, column %d (need RS to resync)", msg, p->line, p->column));
+ }
return jv_invalid_with_msg(jv_string_fmt("%s at line %d, column %d", msg, p->line, p->column));
} else if (p->curr_buf_is_partial) {
assert(p->curr_buf_pos == p->curr_buf_length);
@@ -428,16 +467,31 @@ jv jv_parser_next(struct jv_parser* p) {
} else {
assert(p->curr_buf_pos == p->curr_buf_length);
// at EOF
- if (p->st != JV_PARSER_NORMAL)
- return jv_invalid_with_msg(jv_string("Unfinished string"));
- if ((msg = check_literal(p)))
- return jv_invalid_with_msg(jv_string(msg));
- if (p->stackpos != 0)
- return jv_invalid_with_msg(jv_string("Unfinished JSON term"));
+ if (p->st != JV_PARSER_WAITING_FOR_RS) {
+ if (p->st != JV_PARSER_NORMAL) {
+ parser_reset(p);
+ p->st = JV_PARSER_WAITING_FOR_RS;
+ return jv_invalid_with_msg(jv_string("Unfinished string"));
+ }
+ if ((msg = check_literal(p))) {
+ parser_reset(p);
+ p->st = JV_PARSER_WAITING_FOR_RS;
+ return jv_invalid_with_msg(jv_string(msg));
+ }
+ if (p->stackpos != 0) {
+ parser_reset(p);
+ p->st = JV_PARSER_WAITING_FOR_RS;
+ return jv_invalid_with_msg(jv_string("Unfinished JSON term"));
+ }
+ }
// p->next is either invalid (nothing here but no syntax error)
// or valid (this is the value). either way it's the thing to return
value = p->next;
p->next = jv_invalid();
+ if ((p->flags & JV_PARSE_SEQ) && !p->last_ch_was_ws && jv_get_kind(value) == JV_KIND_NUMBER) {
+ jv_free(value);
+ return jv_invalid_with_msg(jv_string("Potentially truncated top-level numeric value"));
+ }
return value;
}
}
diff --git a/main.c b/main.c
index 335ca7b2..8ebdb9fc 100644
--- a/main.c
+++ b/main.c
@@ -90,8 +90,9 @@ enum {
UNBUFFERED_OUTPUT = 2048,
EXIT_STATUS = 4096,
IN_PLACE = 8192,
+ SEQ = 16384,
/* debugging only */
- DUMP_DISASM = 16384,
+ DUMP_DISASM = 32768,
};
static int options = 0;
@@ -122,6 +123,8 @@ static int process(jq_state *jq, jv value, int flags) {
ret = 11;
else
ret = 0;
+ if (options & SEQ)
+ fwrite("\036", 1, 1, stdout);
jv_dump(result, dumpopts);
}
if (!(options & RAW_NO_LF))
@@ -284,6 +287,10 @@ int main(int argc, char* argv[]) {
options |= IN_PLACE;
if (!short_opts) continue;
}
+ if (isoption(argv[i], 0, "seq", &short_opts)) {
+ options |= SEQ;
+ if (!short_opts) continue;
+ }
if (isoption(argv[i], 'e', "exit-status", &short_opts)) {
options |= EXIT_STATUS;
if (!short_opts) continue;
@@ -444,7 +451,7 @@ int main(int argc, char* argv[]) {
slurped = jv_array();
}
}
- struct jv_parser* parser = jv_parser_new(0);
+ struct jv_parser* parser = jv_parser_new((options & SEQ) ? JV_PARSE_SEQ : 0);
char buf[4096];
int is_last = 0;
while (read_more(buf, sizeof(buf), &is_last)) {
@@ -461,22 +468,28 @@ int main(int argc, char* argv[]) {
} else {
jv_parser_set_buf(parser, buf, strlen(buf), !is_last);
jv value;
- while (jv_is_valid((value = jv_parser_next(parser)))) {
+ while (jv_is_valid(value = jv_parser_next(parser)) || jv_invalid_has_msg(jv_copy(value))) {
+ if (!jv_is_valid(value)) {
+ jv msg = jv_invalid_get_msg(value);
+ if (!(options & SEQ)) {
+ // We used to treat parse errors as fatal...
+ ret = 4;
+ fprintf(stderr, "parse error: %s\n", jv_string_value(msg));
+ jv_free(msg);
+ break;
+ }
+ fprintf(stderr, "ignoring parse error: %s\n", jv_string_value(msg));
+ jv_free(msg);
+ // ...but with --seq we attempt to recover.
+ continue;
+ }
if (options & SLURP) {
slurped = jv_array_append(slurped, value);
} else {
ret = process(jq, value, jq_flags);
+ value = jv_invalid();
}
}
- if (jv_invalid_has_msg(jv_copy(value))) {
- jv msg = jv_invalid_get_msg(value);
- fprintf(stderr, "parse error: %s\n", jv_string_value(msg));
- jv_free(msg);
- ret = 4;
- break;
- } else {
- jv_free(value);
- }
}
}
jv_parser_free(parser);
diff --git a/tests/run b/tests/run
index 0b5fcafc..252ad732 100755
--- a/tests/run
+++ b/tests/run
@@ -78,6 +78,46 @@ case "$v" in
*) true;;
esac
+## Test JSON sequence support
+
+cat > $d/expected <<EOF
+ignoring parse error: Potentially truncated top-level numeric value at line 1, column 2
+ignoring parse error: Truncated value at line 2, column 5
+ignoring parse error: Truncated value at line 2, column 25
+ignoring parse error: Truncated value at line 2, column 41
+EOF
+printf '1\0362 3\n[0,1\036[4,5]true"ab"{"c":4\036{}{"d":5,"e":6"\036false\n'|$VALGRIND $Q ./jq -ces --seq '. == [2,3,[4,5],true,"ab",{},false]' > /dev/null 2> $d/out
+cmp $d/out $d/expected
+
+cat > $d/expected <<EOF
+ignoring parse error: Potentially truncated top-level numeric value at line 1, column 2
+ignoring parse error: Truncated value at line 2, column 5
+ignoring parse error: Truncated value at line 2, column 25
+ignoring parse error: Invalid literal at line 3, column 1
+EOF
+printf '1\0362 3\n[0,1\036[4,5]true"ab"{"c":4\036{}{"d":5,"e":6"false\n\036null'|$VALGRIND $Q ./jq -ces --seq '. == [2,3,[4,5],true,"ab",{},null]' > /dev/null 2> $d/out
+cmp $d/out $d/expected
+
+# Note that here jq sees no inputs at all but it still succeeds because
+# --seq ignores parse errors
+cat > $d/expected <<EOF
+ignoring parse error: Unfinished string
+EOF
+printf '"foo'|./jq -ce --seq . > $d/out 2>&1
+cmp $d/out $d/expected
+
+# Numeric values truncated by EOF are ignored
+cat > $d/expected <<EOF
+ignoring parse error: Potentially truncated top-level numeric value
+EOF
+printf '1'|./jq -ce --seq . > $d/out 2>&1
+cmp $d/out $d/expected
+
+cat > $d/expected <<EOF
+EOF
+printf '1\n'|./jq -ces --seq '. == [1]' >/dev/null 2> $d/out
+cmp $d/out $d/expected
+
## Test library/module system
mods=$PWD/tests/modules