summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNicolas Williams <nico@cryptonector.com>2014-12-28 00:32:06 -0600
committerNicolas Williams <nico@cryptonector.com>2014-12-30 11:42:45 -0600
commit7dc34b92aff7a38e16c0ef608238d03e1ac3d213 (patch)
tree12c5b487c154b3a63d82778731cf5249544884a1
parentcbfc0d6130b784799256f29bb515727df9d894f4 (diff)
Add `label $name | EXP`; fix `break`
This is to fix the problem where `break` is dynamic, not lexical. With this it should be possible to do this sort of thing: label $break | inputs | if ... then $break|error else . end This is a backwards-incompatible change for master, but the previous `break` hadn't shipped yet. Still needed: - testing
-rw-r--r--builtin.c17
-rw-r--r--bytecode.h3
-rw-r--r--compile.c93
-rw-r--r--compile.h2
-rw-r--r--docs/content/3.manual/manual.yml62
-rw-r--r--execute.c7
-rw-r--r--lexer.l4
-rw-r--r--main.c1
-rw-r--r--opcode_list.h1
-rw-r--r--parser.y44
-rw-r--r--tests/all.test24
11 files changed, 225 insertions, 33 deletions
diff --git a/builtin.c b/builtin.c
index 6399aff1..1b1cd65a 100644
--- a/builtin.c
+++ b/builtin.c
@@ -989,7 +989,6 @@ static block bind_bytecoded_builtins(block b) {
static const char* const jq_builtins[] = {
"def error: error(.);",
- "def break: error(\"break\");",
"def map(f): [.[] | f];",
"def map_values(f): .[] |= f;",
"def select(f): if f then . else empty end;",
@@ -1024,14 +1023,14 @@ static const char* const jq_builtins[] = {
"def paths: path(recurse(if (type|. == \"array\" or . == \"object\") then .[] else empty end))|select(length > 0);",
"def paths(node_filter): . as $dot|paths|select(. as $p|$dot|getpath($p)|node_filter);",
"def any(generator; condition):"
- " [foreach generator as $i"
+ " [label | foreach generator as $i"
" (false;"
" if . then break elif $i | condition then true else . end;"
" if . then . else empty end)] | length == 1;",
"def any(condition): any(.[]; condition);",
"def any: any(.);",
"def all(generator; condition): "
- " [foreach generator as $i"
+ " [label | foreach generator as $i"
" (true;"
" if .|not then break elif $i | condition then . else false end;"
" if .|not then . else empty end)] | length == 0;",
@@ -1133,13 +1132,13 @@ static const char* const jq_builtins[] = {
"def while(cond; update): "
" def _while: "
" if cond then ., (update | _while) else empty end; "
- " try _while catch if .==\"break\" then empty else . end;",
+ " _while;",
"def until(cond; next): "
" def _until: "
" if cond then . else (next|_until) end;"
" _until;",
- "def limit($n; exp): if $n < 0 then exp else foreach exp as $item ([$n, null]; if .[0] < 1 then break else [.[0] -1, $item] end; .[1]) end;",
- "def first(g): foreach g as $item ([false, null]; if .[0]==true then break else [true, $item] end; .[1]);",
+ "def limit($n; exp): if $n < 0 then exp else label | foreach exp as $item ([$n, null]; if .[0] < 1 then break else [.[0] -1, $item] end; .[1]) end;",
+ "def first(g): label | foreach g as $item ([false, null]; if .[0]==true then break else [true, $item] end; .[1]);",
"def last(g): reduce g as $item (null; $item);",
"def nth($n; g): if $n < 0 then error(\"nth doesn't support negative indices\") else last(limit($n + 1; g)) end;",
"def first: .[0];",
@@ -1157,12 +1156,12 @@ static const char* const jq_builtins[] = {
" end;",
"def in(xs): . as $x | xs | has($x);",
"def inside(xs): . as $x | xs | contains($x);",
- "def input: try _input catch if .==\"break\" then empty else . end;",
+ "def input: _input;",
"def repeat(exp): "
" def _repeat: "
" exp, _repeat;"
- " try _repeat catch if .==\"break\" then empty else . end;",
- "def inputs: repeat(_input);",
+ " _repeat;",
+ "def inputs: try repeat(_input) catch if .==\"break\" then empty else .|error end;",
// # like ruby's downcase - only characters A to Z are affected
"def ascii_downcase:"
" explode | map( if 65 <= . and . <= 90 then . + 32 else . end) | implode;",
diff --git a/bytecode.h b/bytecode.h
index ee50d2cb..8662c4d9 100644
--- a/bytecode.h
+++ b/bytecode.h
@@ -25,6 +25,9 @@ enum {
OP_HAS_UFUNC = 64,
OP_IS_CALL_PSEUDO = 128,
OP_HAS_BINDING = 1024,
+ // NOTE: Not actually part of any op -- a pseudo-op flag for special
+ // handling of `break`.
+ OP_BIND_WILDCARD = 2048,
};
struct opcode_description {
opcode op;
diff --git a/compile.c b/compile.c
index 042161db..431f66eb 100644
--- a/compile.c
+++ b/compile.c
@@ -241,6 +241,7 @@ int block_has_only_binders_and_imports(block binders, int bindflags) {
int block_has_only_binders(block binders, int bindflags) {
bindflags |= OP_HAS_BINDING;
+ bindflags &= ~OP_BIND_WILDCARD;
for (inst* curr = binders.first; curr; curr = curr->next) {
if ((opcode_describe(curr->op)->flags & bindflags) != bindflags && curr->op != MODULEMETA) {
return 0;
@@ -291,11 +292,12 @@ static int block_count_refs(block binder, block body) {
return nrefs;
}
-static int block_bind_subblock(block binder, block body, int bindflags) {
+static int block_bind_subblock(block binder, block body, int bindflags, int break_distance) {
assert(block_is_single(binder));
- assert((opcode_describe(binder.first->op)->flags & bindflags) == bindflags);
+ assert((opcode_describe(binder.first->op)->flags & bindflags) == (bindflags & ~OP_BIND_WILDCARD));
assert(binder.first->symbol);
assert(binder.first->bound_by == 0 || binder.first->bound_by == binder.first);
+ assert(break_distance >= 0);
binder.first->bound_by = binder.first;
if (binder.first->nformals == -1)
@@ -303,9 +305,16 @@ static int block_bind_subblock(block binder, block body, int bindflags) {
int nrefs = 0;
for (inst* i = body.first; i; i = i->next) {
int flags = opcode_describe(i->op)->flags;
- if ((flags & bindflags) == bindflags &&
- i->bound_by == 0 &&
- !strcmp(i->symbol, binder.first->symbol)) {
+ if ((flags & bindflags) == (bindflags & ~OP_BIND_WILDCARD) && i->bound_by == 0 &&
+ (!strcmp(i->symbol, binder.first->symbol) ||
+ // Check for break/break2/break3; see parser.y
+ ((bindflags & OP_BIND_WILDCARD) && i->symbol[0] == '*' &&
+ break_distance <= 3 && (i->symbol[1] == '1' + break_distance) &&
+ i->symbol[2] == '\0'))) {
+ if (bindflags & OP_BIND_WILDCARD) {
+ jv_mem_free(i->symbol);
+ i->symbol = strdup(binder.first->symbol);
+ }
// bind this instruction
if (i->op == CALL_JQ && i->nactuals == -1)
i->nactuals = block_count_actuals(i->arglist);
@@ -313,11 +322,17 @@ static int block_bind_subblock(block binder, block body, int bindflags) {
i->bound_by = binder.first;
nrefs++;
}
+ } else if ((flags & bindflags) == (bindflags & ~OP_BIND_WILDCARD) && i->bound_by != 0 &&
+ !strncmp(binder.first->symbol, "*anonlabel", sizeof("*anonlabel") - 1) &&
+ !strncmp(i->symbol, "*anonlabel", sizeof("*anonlabel") - 1)) {
+ // Increment the break distance required for this binder to match
+ // a break whenever we come across a STOREV of *anonlabel...
+ break_distance++;
}
// binding recurses into closures
- nrefs += block_bind_subblock(binder, i->subfn, bindflags);
+ nrefs += block_bind_subblock(binder, i->subfn, bindflags, break_distance);
// binding recurses into argument list
- nrefs += block_bind_subblock(binder, i->arglist, bindflags);
+ nrefs += block_bind_subblock(binder, i->arglist, bindflags, break_distance);
}
return nrefs;
}
@@ -327,7 +342,7 @@ static int block_bind_each(block binder, block body, int bindflags) {
bindflags |= OP_HAS_BINDING;
int nrefs = 0;
for (inst* curr = binder.first; curr; curr = curr->next) {
- nrefs += block_bind_subblock(inst_block(curr), body, bindflags);
+ nrefs += block_bind_subblock(inst_block(curr), body, bindflags, 0);
}
return nrefs;
}
@@ -354,7 +369,7 @@ block block_bind_library(block binder, block body, int bindflags, const char* li
strcpy(tname, matchname);
strcpy(tname+matchlen,cname);
curr->symbol = tname;
- nrefs += block_bind_subblock(inst_block(curr), body, bindflags);
+ nrefs += block_bind_subblock(inst_block(curr), body, bindflags, 0);
curr->symbol = cname;
free(tname);
}
@@ -483,13 +498,13 @@ block gen_function(const char* name, block formals, block body) {
i->op = CLOSURE_PARAM;
body = gen_var_binding(gen_call(i->symbol, gen_noop()), i->symbol, body);
}
- block_bind_subblock(inst_block(i), body, OP_IS_CALL_PSEUDO | OP_HAS_BINDING);
+ block_bind_subblock(inst_block(i), body, OP_IS_CALL_PSEUDO | OP_HAS_BINDING, 0);
}
i->subfn = body;
i->symbol = strdup(name);
i->arglist = formals;
block b = inst_block(i);
- block_bind_subblock(b, b, OP_IS_CALL_PSEUDO | OP_HAS_BINDING);
+ block_bind_subblock(b, b, OP_IS_CALL_PSEUDO | OP_HAS_BINDING, 0);
return b;
}
@@ -707,10 +722,7 @@ block gen_foreach(const char* varname, block source, block init, block update, b
// want to output it, so we backtrack.
gen_op_simple(BACKTRACK));
inst_set_target(output, foreach); // make that JUMP go bast the BACKTRACK at the end of the loop
- block handler = gen_cond(gen_call("_equal", BLOCK(gen_lambda(gen_const(jv_string("break"))), gen_lambda(gen_noop()))),
- gen_op_simple(BACKTRACK),
- gen_call("error", gen_noop()));
- return gen_try(foreach, handler);
+ return foreach;
}
block gen_definedor(block a, block b) {
@@ -790,20 +802,45 @@ block gen_var_binding(block var, const char* name, block body) {
body, OP_HAS_VARIABLE));
}
+// Like gen_var_binding(), but bind `break`'s wildcard unbound variable
+static block gen_wildvar_binding(block var, const char* name, block body) {
+ return BLOCK(gen_op_simple(DUP), var,
+ block_bind(gen_op_unbound(STOREV, name),
+ body, OP_HAS_VARIABLE | OP_BIND_WILDCARD));
+}
+
block gen_cond(block cond, block iftrue, block iffalse) {
return BLOCK(gen_op_simple(DUP), cond,
gen_condbranch(BLOCK(gen_op_simple(POP), iftrue),
BLOCK(gen_op_simple(POP), iffalse)));
}
+block gen_try_handler(block handler) {
+ // Quite a pain just to hide jq's internal errors.
+ return gen_cond(// `if type=="object" and .__jq
+ gen_and(gen_call("_equal",
+ BLOCK(gen_lambda(gen_const(jv_string("object"))),
+ gen_lambda(gen_noop()))),
+ BLOCK(gen_subexp(gen_const(jv_string("__jq"))),
+ gen_noop(),
+ gen_op_simple(INDEX))),
+ // `then error`
+ gen_call("error", gen_noop()),
+ // `else HANDLER end`
+ handler);
+}
+
block gen_try(block exp, block handler) {
/*
- * Produce:
+ * Produce something like:
* FORK_OPT <address of handler>
* <exp>
* JUMP <end of handler>
* <handler>
*
+ * If this is not an internal try/catch, then catch and re-raise
+ * internal errors to prevent them from leaking.
+ *
* The handler will only execute if we backtrack to the FORK_OPT with
* an error (exception). If <exp> produces no value then FORK_OPT
* will backtrack (propagate the `empty`, as it were. If <exp>
@@ -817,6 +854,25 @@ block gen_try(block exp, block handler) {
return BLOCK(gen_op_target(FORK_OPT, exp), exp, handler);
}
+block gen_label(const char *label, block exp) {
+ block cond = gen_call("_equal",
+ BLOCK(gen_lambda(gen_noop()),
+ gen_lambda(gen_op_unbound(LOADV, label))));
+ return gen_wildvar_binding(gen_op_simple(GENLABEL), label,
+ BLOCK(gen_op_simple(POP),
+ // try exp catch if . == $label
+ // then empty
+ // else error end
+ //
+ // Can't use gen_binop(), as that's firmly
+ // stuck in parser.y as it refers to things
+ // like EQ.
+ gen_try(exp,
+ gen_cond(cond,
+ gen_op_simple(BACKTRACK),
+ gen_call("error", gen_noop())))));
+}
+
block gen_cbinding(const struct cfunction* cfunctions, int ncfunctions, block code) {
for (int cfunc=0; cfunc<ncfunctions; cfunc++) {
inst* i = inst_new(CLOSURE_CREATE_C);
@@ -855,7 +911,10 @@ static int expand_call_arglist(block* b) {
for (inst* curr; (curr = block_take(b));) {
if (opcode_describe(curr->op)->flags & OP_HAS_BINDING) {
if (!curr->bound_by) {
- locfile_locate(curr->locfile, curr->source, "jq: error: %s/%d is not defined", curr->symbol, block_count_actuals(curr->arglist));
+ if (curr->symbol[0] == '*' && curr->symbol[1] >= '1' && curr->symbol[1] <= '3' && curr->symbol[2] == '\0')
+ locfile_locate(curr->locfile, curr->source, "jq: error: break used outside labeled control structure");
+ else
+ locfile_locate(curr->locfile, curr->source, "jq: error: %s/%d is not defined", curr->symbol, block_count_actuals(curr->arglist));
errors++;
// don't process this instruction if it's not well-defined
ret = BLOCK(ret, inst_block(curr));
diff --git a/compile.h b/compile.h
index 7d5e6c1b..25544e1e 100644
--- a/compile.h
+++ b/compile.h
@@ -50,7 +50,9 @@ block gen_or(block a, block b);
block gen_var_binding(block var, const char* name, block body);
block gen_cond(block cond, block iftrue, block iffalse);
+block gen_try_handler(block handler);
block gen_try(block exp, block handler);
+block gen_label(const char *label, block exp);
block gen_cbinding(const struct cfunction* functions, int nfunctions, block b);
diff --git a/docs/content/3.manual/manual.yml b/docs/content/3.manual/manual.yml
index 354a595c..03553716 100644
--- a/docs/content/3.manual/manual.yml
+++ b/docs/content/3.manual/manual.yml
@@ -1740,6 +1740,55 @@ sections:
input: 'true'
output: ['"some exception"']
+ - title: Breaking out of control structures
+ body: |
+
+ A convenient use of try/catch is to break out of control
+ structures like `reduce`, `foreach`, `while`, and so on.
+
+ For example:
+
+ # Repeat an expression until it raises "break" as an
+ # error, then stop repeating without re-raising the error.
+ # But if the error caught is not "break" then re-raise it.
+ try repeat(exp) catch .=="break" then empty else error;
+
+ jq has a syntax for lexical labels to "break" or "go (back) to":
+
+ label | ... break ...
+
+ The `break` builtin will cause the program to return to act as
+ though the nearest `label` (to the left) produced `empty`.
+
+ The `break2` builtin will return the program to the second
+ nearest `label`, then `empty`.
+
+ The `break3` builtin will return the program to the third
+ nearest `label`, then `empty`.
+
+ A variation on this is named labels:
+
+ label $name | ... break $name
+
+ The `break $name` form returns to the named label and produces
+ `empty`.
+
+ In all cases the relationship between the `break` and
+ corresponding `label` is lexical: the label has to be
+ "visible" from the break.
+
+ To break out of a `reduce`, for example:
+
+ label | reduce .[] as $item (null; if .==false then break else ... end)
+
+ The following jq program produces a syntax error:
+
+ break
+
+ because:
+
+ jq: error: break used outside labeled control structure
+
- title: "`?` operator"
body: |
@@ -2171,8 +2220,7 @@ sections:
This is mostly useful only for constructing `reduce`- and
`limit`-like functions. But it is much more general, as it
- allows for partial reductions (see the example below), as well
- as breaking out of the "loop" with `break`.
+ allows for partial reductions (see the example below).
examples:
- program: '[foreach .[] as $item
@@ -2199,8 +2247,14 @@ sections:
def recurse(f): def r: ., (f | select(. != null) | r); r;
def while(cond; update):
- def w: if cond then ., (update | _while) else empty end;
- try _while catch if . == "break" then empty else . end;
+ def _while:
+ if cond then ., (update | _while) else empty end;
+ _while;
+
+ def repeat(exp):
+ def _repeat:
+ exp, _repeat;
+ _repeat;
- title: Generators and iterators
body: |
diff --git a/execute.c b/execute.c
index 67a1629b..3be6abf3 100644
--- a/execute.c
+++ b/execute.c
@@ -37,6 +37,7 @@ struct jq_state {
int subexp_nest;
int debug_trace_enabled;
int initial_execution;
+ unsigned next_label;
jv attrs;
jq_input_cb input_cb;
@@ -351,6 +352,11 @@ jv jq_next(jq_state *jq) {
break;
}
+ case GENLABEL: {
+ stack_push(jq, JV_OBJECT(jv_string("__jq"), jv_number(jq->next_label++)));
+ break;
+ }
+
case DUP: {
jv v = stack_pop(jq);
stack_push(jq, jv_copy(v));
@@ -838,6 +844,7 @@ jq_state *jq_init(void) {
return NULL;
jq->bc = 0;
+ jq->next_label = 0;
stack_init(&jq->stk);
jq->stk_top = 0;
diff --git a/lexer.l b/lexer.l
index 70ad050b..065ee72a 100644
--- a/lexer.l
+++ b/lexer.l
@@ -57,6 +57,10 @@ struct lexer_param;
"//" { return DEFINEDOR; }
"try" { return TRY; }
"catch" { return CATCH; }
+"label" { return LABEL; }
+"break" { return BREAK; }
+"break2" { return BREAK2; }
+"break3" { return BREAK3; }
"|=" { return SETPIPE; }
"+=" { return SETPLUS; }
"-=" { return SETMINUS; }
diff --git a/main.c b/main.c
index 5b12f48a..4457c58c 100644
--- a/main.c
+++ b/main.c
@@ -351,6 +351,7 @@ int main(int argc, char* argv[]) {
options |= EXIT_STATUS;
if (!short_opts) continue;
}
+ // FIXME: For --arg* we should check that the varname is acceptable
if (isoption(argv[i], 0, "arg", &short_opts)) {
if (i >= argc - 2) {
fprintf(stderr, "%s: --arg takes two parameters (e.g. -a varname value)\n", progname);
diff --git a/opcode_list.h b/opcode_list.h
index 38e63856..bdc4f48e 100644
--- a/opcode_list.h
+++ b/opcode_list.h
@@ -40,3 +40,4 @@ OP(TOP, NONE, 0, 0)
OP(CLOSURE_PARAM_REGULAR, DEFINITION, 0, 0)
OP(DEPS, CONSTANT, 0, 0)
OP(MODULEMETA, CONSTANT, 0, 0)
+OP(GENLABEL, NONE, 0, 1)
diff --git a/parser.y b/parser.y
index 44ae0096..72aad3ae 100644
--- a/parser.y
+++ b/parser.y
@@ -70,6 +70,10 @@ struct lexer_param;
%token OR "or"
%token TRY "try"
%token CATCH "catch"
+%token LABEL "label"
+%token BREAK "break"
+%token BREAK2 "break2"
+%token BREAK3 "break3"
%token SETPIPE "|="
%token SETPLUS "+="
%token SETMINUS "-="
@@ -149,6 +153,8 @@ int yylex(YYSTYPE* yylval, YYLTYPE* yylloc, block* answer, int* errors,
return tok;
}
+static unsigned int next_label = 0;
+
static block gen_dictpair(block k, block v) {
return BLOCK(gen_subexp(k), gen_subexp(v), gen_op_simple(INSERT));
}
@@ -335,7 +341,7 @@ Term "as" '$' IDENT '|' Exp {
"try" Exp "catch" Exp {
//$$ = BLOCK(gen_op_target(FORK_OPT, $2), $2, $4);
- $$ = gen_try($2, $4);
+ $$ = gen_try($2, gen_try_handler($4));
} |
"try" Exp {
//$$ = BLOCK(gen_op_target(FORK_OPT, $2), $2, gen_op_simple(BACKTRACK));
@@ -346,6 +352,19 @@ Term "as" '$' IDENT '|' Exp {
$$ = $2;
} |
+"label" '|' Exp {
+ jv v = jv_string_fmt("*anonlabel%u", next_label++);
+ $$ = gen_location(@$, locations, gen_label(jv_string_value(v), $3));
+ jv_free(v);
+} |
+
+"label" '$' IDENT '|' Exp {
+ jv v = jv_string_fmt("*label-%s", jv_string_value($3));
+ $$ = gen_location(@$, locations, gen_label(jv_string_value(v), $5));
+ jv_free($3);
+ jv_free(v);
+} |
+
// This rule conflicts with all the other rules using the '?' operator.
// It doesn't matter which bison prefers: all of them result in the same
// syntax and semantics, but the more specific rules optimize better
@@ -570,6 +589,29 @@ Term:
REC {
$$ = gen_call("recurse", gen_noop());
} |
+BREAK {
+ $$ = gen_location(@$, locations,
+ BLOCK(gen_op_unbound(LOADV, "*1"), // impossible symbol
+ gen_call("error", gen_noop())));
+} |
+BREAK2 {
+ $$ = gen_location(@$, locations,
+ BLOCK(gen_op_unbound(LOADV, "*2"), // impossible symbol
+ gen_call("error", gen_noop())));
+} |
+BREAK3 {
+ $$ = gen_location(@$, locations,
+ BLOCK(gen_op_unbound(LOADV, "*3"), // impossible symbol
+ gen_call("error", gen_noop())));
+} |
+BREAK '$' IDENT {
+ jv v = jv_string_fmt("*label-%s", jv_string_value($3)); // impossible symbol
+ $$ = gen_location(@$, locations,
+ BLOCK(gen_op_unbound(LOADV, jv_string_value(v)),
+ gen_call("error", gen_noop())));
+ jv_free(v);
+ jv_free($3);
+} |
Term FIELD '?' {
$$ = gen_index_opt($1, gen_const($2));
} |
diff --git a/tests/all.test b/tests/all.test
index 4477ec25..67b80763 100644
--- a/tests/all.test
+++ b/tests/all.test
@@ -247,15 +247,35 @@ null
1
[1,2,4,8,16,32,64]
-[while(.<100; .*2|if . > 10 then break else . end)]
+[label | while(.<100; .*2|if . > 10 then break else . end)]
1
[1,2,4,8]
+[(label $here | .[] | if .>1 then break $here else . end), "hi!"]
+[0,1,2]
+[0,1,"hi!"]
+
+[(label $here | .[] | if .>1 then break $here else . end), "hi!"]
+[0,2,1]
+[0,"hi!"]
+
+(label | (label | 2 | break2)), 1
+null
+1
+
+%%FAIL
+break
+jq: error: break used outside labeled control structure
+
+%%FAIL
+. as $foo | break $foo
+jq: error: *label-foo/0 is not defined
+
[.[]|[.,1]|until(.[0] < 1; [.[0] - 1, .[1] * .[0]])|.[1]]
[1,2,3,4,5]
[1,2,6,24,120]
-[foreach .[] as $item ([3, null]; if .[0] < 1 then break else [.[0] -1, $item] end; .[1])]
+[label | foreach .[] as $item ([3, null]; if .[0] < 1 then break else [.[0] -1, $item] end; .[1])]
[11,22,33,44,55,66,77,88,99]
[11,22,33]