diff options
author | Nicolas Williams <nico@cryptonector.com> | 2015-04-23 18:27:53 -0500 |
---|---|---|
committer | Nicolas Williams <nico@cryptonector.com> | 2015-04-23 23:43:44 -0500 |
commit | dad6e42934ab255bf07ff4ffbae093240d4f6e90 (patch) | |
tree | f5f625e9c00ccb68f8139453835293082388d369 | |
parent | 7d938487dd61f129c1e1b40ff4fa9be27d1b24f5 (diff) |
--raw-input ought to read NULs (partial fix #760)
We can't know how many bytes fgets() read when we reach EOF and fgets()
didn't see a newline; we can only assume that at least strlen(buf) bytes
were read. This is quite obnoxious if one wants to use NULs in raw
input, but at least we can make reading "a\0b\0c\0" with no newline
yield "a\0b\0c", losing only the final sequence of NULs.
We can't use getline() either, since it will want to allocate a buffer
big enough for an entire line, and we might not have any newlines in our
input. A complete fix will have to use getc() or read(), preferably the
latter.
-rw-r--r-- | jq.h | 1 | ||||
-rwxr-xr-x | tests/run | 7 | ||||
-rw-r--r-- | util.c | 70 |
3 files changed, 56 insertions, 22 deletions
@@ -44,7 +44,6 @@ void jq_util_input_set_parser(jq_util_input_state, jv_parser *, int); void jq_util_input_free(jq_util_input_state *); void jq_util_input_add_input(jq_util_input_state, jv); int jq_util_input_open_errors(jq_util_input_state); -int jq_util_input_read_more(jq_util_input_state); jv jq_util_input_next_input(jq_util_input_state); jv jq_util_input_next_input_cb(jq_state *, void *); @@ -12,10 +12,9 @@ fi mods=$PWD/tests/modules +# jq-coded tests here: cat $@ | $VALGRIND $Q ./jq -L "$mods" --run-tests -set -x - clean=true d= clean () { @@ -32,6 +31,10 @@ if [ -z "$d" ]; then exit 0 fi +printf 'a\0b\nc\0d\ne' > $d/input +$VALGRIND $Q ./jq -Rse '. == "a\u0000b\nc\u0000d\ne"' $d/input +$VALGRIND $Q ./jq -Rne '[inputs] == ["a\u0000b", "c\u0000d", "e"]' $d/input + ## Test constant folding ## XXX If we add a builtin to list the program's disassembly then we can @@ -154,6 +154,7 @@ struct jq_util_input_state { int open_failures; jv slurped; char buf[4096]; + size_t buf_valid_len; }; static void fprinter(void *data, jv fname) { @@ -176,6 +177,7 @@ jq_util_input_state jq_util_input_init(jq_msg_cb err_cb, void *err_cb_data) { new_state->files = jv_array(); new_state->slurped = jv_invalid(); new_state->buf[0] = 0; + new_state->buf_valid_len = 0; return new_state; } @@ -220,7 +222,7 @@ static jv next_file(jq_util_input_state state) { return next; } -int jq_util_input_read_more(jq_util_input_state state) { +static int jq_util_input_read_more(jq_util_input_state state) { if (!state->current_input || feof(state->current_input) || ferror(state->current_input)) { if (state->current_input && ferror(state->current_input)) { // System-level input error on the stream. It will be closed (below). @@ -252,9 +254,41 @@ int jq_util_input_read_more(jq_util_input_state state) { } state->buf[0] = 0; + state->buf_valid_len = 0; if (state->current_input) { - if (!fgets(state->buf, sizeof(state->buf), state->current_input)) + memset(state->buf, 0, sizeof(state->buf)); + if (!fgets(state->buf, sizeof(state->buf), state->current_input)) { state->buf[0] = 0; + } else { + const char *p = memchr(state->buf, '\n', sizeof(state->buf)); + + if (p == NULL && state->parser != NULL) { + /* There should be no NULs in JSON texts */ + state->buf_valid_len = strlen(state->buf); + } else if (p == NULL && feof(state->current_input)) { + size_t i; + + /* + * XXX We can't know how many bytes we've read! + * + * We can't use getline() because there need not be any newlines + * in the input. The only entirely correct choices are: use + * fgetc() or read(), and of those the latter will be the + * best-performing. + * + * For now we guess how much fgets() read. + */ + for (p = state->buf, i = 0; i < sizeof(state->buf); i++) { + if (state->buf[i] != '\0') + p = &state->buf[i]; + } + state->buf_valid_len = p - state->buf + 1; + } else if (p == NULL) { + state->buf_valid_len = sizeof(state->buf); + } else { + state->buf_valid_len = (p - state->buf) + 1; + } + } } return jv_array_length(jv_copy(state->files)) == 0 && (!state->current_input || feof(state->current_input)); } @@ -272,29 +306,27 @@ jv jq_util_input_next_input(jq_util_input_state state) { if (state->parser == NULL) { // Raw input is_last = jq_util_input_read_more(state); - if (state->buf[0] == '\0') + if (state->buf_valid_len == 0) continue; - int len = strlen(state->buf); // Raw input doesn't support NULs - if (len > 0) { - if (jv_is_valid(state->slurped)) { - // Slurped raw input - state->slurped = jv_string_concat(state->slurped, jv_string(state->buf)); - } else { - if (!jv_is_valid(value)) - value = jv_string(""); - if (state->buf[len-1] == '\n') { - // whole line - state->buf[len-1] = 0; - return jv_string_concat(value, jv_string(state->buf)); - } - value = jv_string_concat(value, jv_string(state->buf)); - state->buf[0] = '\0'; + if (jv_is_valid(state->slurped)) { + // Slurped raw input + state->slurped = jv_string_concat(state->slurped, jv_string_sized(state->buf, state->buf_valid_len)); + } else { + if (!jv_is_valid(value)) + value = jv_string(""); + if (state->buf[state->buf_valid_len-1] == '\n') { + // whole line + state->buf[state->buf_valid_len-1] = 0; + return jv_string_concat(value, jv_string_sized(state->buf, state->buf_valid_len-1)); } + value = jv_string_concat(value, jv_string_sized(state->buf, state->buf_valid_len)); + state->buf[0] = '\0'; + state->buf_valid_len = 0; } } else { if (jv_parser_remaining(state->parser) == 0) { is_last = jq_util_input_read_more(state); - jv_parser_set_buf(state->parser, state->buf, strlen(state->buf), !is_last); + jv_parser_set_buf(state->parser, state->buf, state->buf_valid_len, !is_last); } value = jv_parser_next(state->parser); if (jv_is_valid(state->slurped)) { |