#ifndef _GNU_SOURCE #define _GNU_SOURCE // for strdup #endif #include #include #include #include #include #include "compile.h" #include "bytecode.h" #include "locfile.h" #include "jv_alloc.h" #include "linker.h" /* The intermediate representation for jq filters is as a sequence of struct inst, which form a doubly-linked list via the next and prev pointers. A "block" represents a sequence of "struct inst", which may be empty. Blocks are generated by the parser bottom-up, so may have free variables (refer to things not defined). See inst.bound_by and inst.symbol. */ struct inst { struct inst* next; struct inst* prev; opcode op; struct { uint16_t intval; struct inst* target; jv constant; const struct cfunction* cfunc; } imm; struct locfile* locfile; location source; // Binding // An instruction requiring binding (for parameters/variables/functions) // is in one of three states: // inst->bound_by = NULL - Unbound free variable // inst->bound_by = inst - This instruction binds a variable // inst->bound_by = other - Uses variable bound by other instruction // Unbound instructions (references to other things that may or may not // exist) are created by "gen_foo_unbound", and bindings are created by // block_bind(definition, body), which binds all instructions in // body which are unboudn and refer to "definition" by name. struct inst* bound_by; char* symbol; int nformals; int nactuals; block subfn; // used by CLOSURE_CREATE (body of function) block arglist; // used by CLOSURE_CREATE (formals) and CALL_JQ (arguments) // This instruction is compiled as part of which function? // (only used during block_compile) struct bytecode* compiled; int bytecode_pos; // position just after this insn }; static inst* inst_new(opcode op) { inst* i = jv_mem_alloc(sizeof(inst)); i->next = i->prev = 0; i->op = op; i->bytecode_pos = -1; i->bound_by = 0; i->symbol = 0; i->nformals = -1; i->nactuals = -1; i->subfn = gen_noop(); i->arglist = gen_noop(); i->source = UNKNOWN_LOCATION; i->locfile = 0; return i; } static void inst_free(struct inst* i) { jv_mem_free(i->symbol); block_free(i->subfn); block_free(i->arglist); if (i->locfile) locfile_free(i->locfile); if (opcode_describe(i->op)->flags & OP_HAS_CONSTANT) { jv_free(i->imm.constant); } jv_mem_free(i); } static block inst_block(inst* i) { block b = {i,i}; return b; } int block_is_single(block b) { return b.first && b.first == b.last; } static inst* block_take(block* b) { if (b->first == 0) return 0; inst* i = b->first; if (i->next) { i->next->prev = 0; b->first = i->next; i->next = 0; } else { b->first = 0; b->last = 0; } return i; } block gen_location(location loc, struct locfile* l, block b) { for (inst* i = b.first; i; i = i->next) { if (i->source.start == UNKNOWN_LOCATION.start && i->source.end == UNKNOWN_LOCATION.end) { i->source = loc; i->locfile = locfile_retain(l); } } return b; } block gen_noop() { block b = {0,0}; return b; } int block_is_noop(block b) { return (b.first == 0 && b.last == 0); } block gen_op_simple(opcode op) { assert(opcode_describe(op)->length == 1); return inst_block(inst_new(op)); } block gen_const(jv constant) { assert(opcode_describe(LOADK)->flags & OP_HAS_CONSTANT); inst* i = inst_new(LOADK); i->imm.constant = constant; return inst_block(i); } block gen_const_global(jv constant, const char *name) { assert((opcode_describe(STORE_GLOBAL)->flags & (OP_HAS_CONSTANT | OP_HAS_VARIABLE | OP_HAS_BINDING)) == (OP_HAS_CONSTANT | OP_HAS_VARIABLE | OP_HAS_BINDING)); inst* i = inst_new(STORE_GLOBAL); i->imm.constant = constant; i->symbol = strdup(name); return inst_block(i); } block gen_op_pushk_under(jv constant) { assert(opcode_describe(PUSHK_UNDER)->flags & OP_HAS_CONSTANT); inst* i = inst_new(PUSHK_UNDER); i->imm.constant = constant; return inst_block(i); } int block_is_const(block b) { return (block_is_single(b) && (b.first->op == LOADK || b.first->op == PUSHK_UNDER)); } int block_is_const_inf(block b) { return (block_is_single(b) && b.first->op == LOADK && jv_get_kind(b.first->imm.constant) == JV_KIND_NUMBER && isinf(jv_number_value(b.first->imm.constant))); } jv_kind block_const_kind(block b) { assert(block_is_const(b)); return jv_get_kind(b.first->imm.constant); } jv block_const(block b) { assert(block_is_const(b)); return jv_copy(b.first->imm.constant); } block gen_op_target(opcode op, block target) { assert(opcode_describe(op)->flags & OP_HAS_BRANCH); assert(target.last); inst* i = inst_new(op); i->imm.target = target.last; return inst_block(i); } block gen_op_targetlater(opcode op) { assert(opcode_describe(op)->flags & OP_HAS_BRANCH); inst* i = inst_new(op); i->imm.target = 0; return inst_block(i); } void inst_set_target(block b, block target) { assert(block_is_single(b)); assert(opcode_describe(b.first->op)->flags & OP_HAS_BRANCH); assert(target.last); b.first->imm.target = target.last; } block gen_op_unbound(opcode op, const char* name) { assert(opcode_describe(op)->flags & OP_HAS_BINDING); inst* i = inst_new(op); i->symbol = strdup(name); return inst_block(i); } block gen_op_var_fresh(opcode op, const char* name) { assert(opcode_describe(op)->flags & OP_HAS_VARIABLE); return block_bind(gen_op_unbound(op, name), gen_noop(), OP_HAS_VARIABLE); } block gen_op_bound(opcode op, block binder) { assert(block_is_single(binder)); block b = gen_op_unbound(op, binder.first->symbol); b.first->bound_by = binder.first; return b; } block gen_dictpair(block k, block v) { return BLOCK(gen_subexp(k), gen_subexp(v), gen_op_simple(INSERT)); } static void inst_join(inst* a, inst* b) { assert(a && b); assert(!a->next); assert(!b->prev); a->next = b; b->prev = a; } void block_append(block* b, block b2) { if (b2.first) { if (b->last) { inst_join(b->last, b2.first); } else { b->first = b2.first; } b->last = b2.last; } } block block_join(block a, block b) { block c = a; block_append(&c, b); return c; } int block_has_only_binders_and_imports(block binders, int bindflags) { bindflags |= OP_HAS_BINDING; for (inst* curr = binders.first; curr; curr = curr->next) { if ((opcode_describe(curr->op)->flags & bindflags) != bindflags && curr->op != DEPS && curr->op != MODULEMETA) { return 0; } } return 1; } static int inst_is_binder(inst *i, int bindflags) { return !((opcode_describe(i->op)->flags & bindflags) != bindflags && i->op != MODULEMETA); } int block_has_only_binders(block binders, int bindflags) { bindflags |= OP_HAS_BINDING; bindflags &= ~OP_BIND_WILDCARD; for (inst* curr = binders.first; curr; curr = curr->next) { if ((opcode_describe(curr->op)->flags & bindflags) != bindflags && curr->op != MODULEMETA) { return 0; } } return 1; } // Count a binder's (function) formal params static int block_count_formals(block b) { int args = 0; if (b.first->op == CLOSURE_CREATE_C) return b.first->imm.cfunc->nargs - 1; for (inst* i = b.first->arglist.first; i; i = i->next) { assert(i->op == CLOSURE_PARAM); args++; } return args; } // Count a call site's actual params static int block_count_actuals(block b) { int args = 0; for (inst* i = b.first; i; i = i->next) { switch (i->op) { default: assert(0 && "Unknown function type"); break; case CLOSURE_CREATE: case CLOSURE_PARAM: case CLOSURE_CREATE_C: args++; break; } } return args; } static int block_count_refs(block binder, block body) { int nrefs = 0; for (inst* i = body.first; i; i = i->next) { if (i != binder.first && i->bound_by == binder.first) { nrefs++; } // counting recurses into closures nrefs += block_count_refs(binder, i->subfn); // counting recurses into argument list nrefs += block_count_refs(binder, i->arglist); } return nrefs; } static int block_bind_subblock(block binder, block body, int bindflags, int break_distance) { assert(block_is_single(binder)); assert((opcode_describe(binder.first->op)->flags & bindflags) == (bindflags & ~OP_BIND_WILDCARD)); assert(binder.first->symbol); assert(binder.first->bound_by == 0 || binder.first->bound_by == binder.first); assert(break_distance >= 0); binder.first->bound_by = binder.first; if (binder.first->nformals == -1) binder.first->nformals = block_count_formals(binder); int nrefs = 0; for (inst* i = body.first; i; i = i->next) { int flags = opcode_describe(i->op)->flags; if ((flags & bindflags) == (bindflags & ~OP_BIND_WILDCARD) && i->bound_by == 0 && (!strcmp(i->symbol, binder.first->symbol) || // Check for break/break2/break3; see parser.y ((bindflags & OP_BIND_WILDCARD) && i->symbol[0] == '*' && break_distance <= 3 && (i->symbol[1] == '1' + break_distance) && i->symbol[2] == '\0'))) { // bind this instruction if (i->op == CALL_JQ && i->nactuals == -1) i->nactuals = block_count_actuals(i->arglist); if (i->nactuals == -1 || i->nactuals == binder.first->nformals) { i->bound_by = binder.first; nrefs++; } } else if ((flags & bindflags) == (bindflags & ~OP_BIND_WILDCARD) && i->bound_by != 0 && !strncmp(binder.first->symbol, "*anonlabel", sizeof("*anonlabel") - 1) && !strncmp(i->symbol, "*anonlabel", sizeof("*anonlabel") - 1)) { // Increment the break distance required for this binder to match // a break whenever we come across a STOREV of *anonlabel... break_distance++; } // binding recurses into closures nrefs += block_bind_subblock(binder, i->subfn, bindflags, break_distance); // binding recurses into argument list nrefs += block_bind_subblock(binder, i->arglist, bindflags, break_distance); } return nrefs; } static int block_bind_each(block binder, block body, int bindflags) { assert(block_has_only_binders(binder, bindflags)); bindflags |= OP_HAS_BINDING; int nrefs = 0; for (inst* curr = binder.first; curr; curr = curr->next) { nrefs += block_bind_subblock(inst_block(curr), body, bindflags, 0); } return nrefs; } block block_bind(block binder, block body, int bindflags) { block_bind_each(binder, body, bindflags); return block_join(binder, body); } block block_bind_library(block binder, block body, int bindflags, const char *libname) { bindflags |= OP_HAS_BINDING; int nrefs = 0; int matchlen = (libname == NULL) ? 0 : strlen(libname); char *matchname = jv_mem_alloc(matchlen+2+1); matchname[0] = '\0'; if (libname != NULL && libname[0] != '\0') { strcpy(matchname,libname); strcpy(matchname+matchlen, "::"); matchlen += 2; } assert(block_has_only_binders(binder, bindflags)); for (inst *curr = binder.first; curr; curr = curr->next) { int bindflags2 = bindflags; char* cname = curr->symbol; char* tname = jv_mem_alloc(strlen(curr->symbol)+matchlen+1); strcpy(tname, matchname); strcpy(tname+matchlen, curr->symbol); // Ew if ((opcode_describe(curr->op)->flags & (OP_HAS_VARIABLE | OP_HAS_CONSTANT))) bindflags2 = OP_HAS_VARIABLE | OP_HAS_BINDING; // This mutation is ugly, even if we undo it curr->symbol = tname; nrefs += block_bind_subblock(inst_block(curr), body, bindflags2, 0); curr->symbol = cname; free(tname); } free(matchname); return body; // We don't return a join because we don't want those sticking around... } // Bind binder to body and throw away any defs in binder not referenced // (directly or indirectly) from body. block block_bind_referenced(block binder, block body, int bindflags) { assert(block_has_only_binders(binder, bindflags)); bindflags |= OP_HAS_BINDING; block refd = gen_noop(); block unrefd = gen_noop(); int nrefs; for (int last_kept = 0, kept = 0; ; ) { for (inst* curr; (curr = block_take(&binder));) { block b = inst_block(curr); nrefs = block_bind_each(b, body, bindflags); // Check if this binder is referenced from any of the ones we // already know are referenced by body. nrefs += block_count_refs(b, refd); nrefs += block_count_refs(b, body); if (nrefs) { refd = BLOCK(refd, b); kept++; } else { unrefd = BLOCK(unrefd, b); } } if (kept == last_kept) break; last_kept = kept; binder = unrefd; unrefd = gen_noop(); } block_free(unrefd); return block_join(refd, body); } block block_drop_unreferenced(block body) { inst* curr; block refd = gen_noop(); block unrefd = gen_noop(); int drop; do { drop = 0; while ((curr = block_take(&body)) && curr->op != TOP) { block b = inst_block(curr); if (block_count_refs(b,refd) + block_count_refs(b,body) == 0) { unrefd = BLOCK(unrefd, b); drop++; } else { refd = BLOCK(refd, b); } } if (curr && curr->op == TOP) { body = BLOCK(inst_block(curr),body); } body = BLOCK(refd, body); refd = gen_noop(); } while (drop != 0); block_free(unrefd); return body; } jv block_take_imports(block* body) { jv imports = jv_array(); inst* top = NULL; if (body->first && body->first->op == TOP) { top = block_take(body); } while (body->first && (body->first->op == MODULEMETA || body->first->op == DEPS)) { inst* dep = block_take(body); if (dep->op == DEPS) { imports = jv_array_append(imports, jv_copy(dep->imm.constant)); } inst_free(dep); } if (top) { *body = block_join(inst_block(top),*body); } return imports; } jv block_list_funcs(block body, int omit_underscores) { jv funcs = jv_object(); // Use the keys for set semantics. for (inst *pos = body.first; pos != NULL; pos = pos->next) { if (pos->op == CLOSURE_CREATE || pos->op == CLOSURE_CREATE_C) { if (pos->symbol != NULL && (!omit_underscores || pos->symbol[0] != '_')) { funcs = jv_object_set(funcs, jv_string_fmt("%s/%i", pos->symbol, pos->nformals), jv_null()); } } } return jv_keys_unsorted(funcs); } block gen_module(block metadata) { inst* i = inst_new(MODULEMETA); i->imm.constant = block_const(metadata); if (jv_get_kind(i->imm.constant) != JV_KIND_OBJECT) i->imm.constant = jv_object_set(jv_object(), jv_string("metadata"), i->imm.constant); block_free(metadata); return inst_block(i); } jv block_module_meta(block b) { if (b.first != NULL && b.first->op == MODULEMETA) return jv_copy(b.first->imm.constant); return jv_null(); } block gen_import(const char* name, const char* as, int is_data) { inst* i = inst_new(DEPS); jv meta = jv_object(); if (as != NULL) meta = jv_object_set(meta, jv_string("as"), jv_string(as)); meta = jv_object_set(meta, jv_string("is_data"), is_data ? jv_true() : jv_false()); meta = jv_object_set(meta, jv_string("relpath"), jv_string(name)); i->imm.constant = meta; return inst_block(i); } block gen_import_meta(block import, block metadata) { assert(block_is_single(import) && import.first->op == DEPS); assert(block_is_const(metadata) && block_const_kind(metadata) == JV_KIND_OBJECT); inst *i = import.first; i->imm.constant = jv_object_merge(block_const(metadata), i->imm.constant); block_free(metadata); return import; } block gen_function(const char* name, block formals, block body) { inst* i = inst_new(CLOSURE_CREATE); for (inst* i = formals.last; i; i = i->prev) { if (i->op == CLOSURE_PARAM_REGULAR) { i->op = CLOSURE_PARAM; body = gen_var_binding(gen_call(i->symbol, gen_noop()), i->symbol, body); } block_bind_subblock(inst_block(i), body, OP_IS_CALL_PSEUDO | OP_HAS_BINDING, 0); } i->subfn = body; i->symbol = strdup(name); i->arglist = formals; block b = inst_block(i); block_bind_subblock(b, b, OP_IS_CALL_PSEUDO | OP_HAS_BINDING, 0); return b; } block gen_param_regular(const char* name) { return gen_op_unbound(CLOSURE_PARAM_REGULAR, name); } block gen_param(const char* name) { return gen_op_unbound(CLOSURE_PARAM, name); } block gen_lambda(block body) { return gen_function("@lambda", gen_noop(), body); } block gen_call(const char* name, block args) { block b = gen_op_unbound(CALL_JQ, name); b.first->arglist = args; return b; } block gen_subexp(block a) { if (block_is_noop(a)) { return gen_op_simple(DUP); } if (block_is_single(a) && a.first->op == LOADK) { jv c = block_const(a); block_free(a); return gen_op_pushk_under(c); } return BLOCK(gen_op_simple(SUBEXP_BEGIN), a, gen_op_simple(SUBEXP_END)); } block gen_both(block a, block b) { block jump = gen_op_targetlater(JUMP); block fork = gen_op_target(FORK, jump); block c = BLOCK(fork, a, jump, b); inst_set_target(jump, c); return c; } block gen_const_object(block expr) { int is_const = 1; jv o = jv_object(); jv k = jv_null(); jv v = jv_null(); for (inst *i = expr.first; i; i = i->next) { if (i->op == PUSHK_UNDER) { k = jv_copy(i->imm.constant); i = i->next; } else if (i->op != SUBEXP_BEGIN || i->next == NULL || i->next->op != LOADK || i->next->next == NULL || i->next->next->op != SUBEXP_END) { is_const = 0; break; } else { k = jv_copy(i->next->imm.constant); i = i->next->next->next; } if (i != NULL && i->op == PUSHK_UNDER) { v = jv_copy(i->imm.constant); i = i->next; } else if (i == NULL || i->op != SUBEXP_BEGIN || i->next == NULL || i->next->op != LOADK || i->next->next == NULL || i->next->next->op != SUBEXP_END) { is_const = 0; break; } else { v = jv_copy(i->next->imm.constant); i = i->next->next->next; } if (i == NULL || i->op != INSERT) { is_const = 0; break; } if (jv_get_kind(k) != JV_KIND_STRING) { is_const = 0; break; } o = jv_object_set(o, k, v); k = jv_null(); v = jv_null(); } if (!is_const) { jv_free(o); jv_free(k); jv_free(v); block b = {0,0}; return b; } block_free(expr); return gen_const(o); } static block gen_const_array(block expr) { /* * An expr of all constant elements looks like this: * * 0009 FORK 0027 * 0011 FORK 0023 * 0013 FORK 0019 * 0015 LOADK 1 * 0017 JUMP 0021 * 0019 LOADK 2 * 0021 JUMP 0025 * 0023 LOADK 3 * 0025 JUMP 0029 * 0027 LOADK 4 * * That's: N-1 commas for N elements, N LOADKs, and a JUMP between * every LOADK. The sequence ends in a LOADK. Any deviation and it's * not a list of constants. * * Here we check for this pattern almost exactly. We don't check that * the targets of the FORK and JUMP instructions are in the right * sequence. */ int all_const = 1; int commas = 0; int normal = 1; jv a = jv_array(); for (inst *i = expr.first; i; i = i->next) { if (i->op == FORK) { commas++; if (i->imm.target == NULL || i->imm.target->op != JUMP || jv_array_length(jv_copy(a)) > 0) { normal = 0; break; } } else if (all_const && i->op == LOADK) { if (i->next != NULL && i->next->op != JUMP) { normal = 0; break; } a = jv_array_append(a, jv_copy(i->imm.constant)); } else if (i->op != JUMP || i->imm.target == NULL || i->imm.target->op != LOADK) { all_const = 0; } } if (all_const && normal && (expr.last == NULL || expr.last->op == LOADK) && jv_array_length(jv_copy(a)) == commas + 1) { block_free(expr); return gen_const(a); } jv_free(a); block b = {0,0}; return b; } block gen_collect(block expr) { block const_array = gen_const_array(expr); if (const_array.first != NULL) return const_array; block array_var = gen_op_var_fresh(STOREV, "collect"); block c = BLOCK(gen_op_simple(DUP), gen_const(jv_array()), array_var); block tail = BLOCK(gen_op_bound(APPEND, array_var), gen_op_simple(BACKTRACK)); return BLOCK(c, gen_op_target(FORK, tail), expr, tail, gen_op_bound(LOADVN, array_var)); } static block bind_matcher(block matcher, block body) { // cannot call block_bind(matcher, body) because that requires // block_has_only_binders(matcher), which is not true here as matchers // may also contain code to extract the correct elements for (inst* i = matcher.first; i; i = i->next) { if ((i->op == STOREV || i->op == STOREVN) && !i->bound_by) block_bind_subblock(inst_block(i), body, OP_HAS_VARIABLE, 0); } return BLOCK(matcher, body); } // Extract destructuring var names from the block // *vars should be a jv_object (for set semantics) static void block_get_unbound_vars(block b, jv *vars) { assert(vars != NULL); assert(jv_get_kind(*vars) == JV_KIND_OBJECT); for (inst* i = b.first; i; i = i->next) { if (i->subfn.first) { block_get_unbound_vars(i->subfn, vars); continue; } if ((i->op == STOREV || i->op == STOREVN) && i->bound_by == NULL) { *vars = jv_object_set(*vars, jv_string(i->symbol), jv_true()); } } } /* Build wrappers around destructuring matchers so that we can chain them * when we have errors. The approach is as follows: * FORK_OPT NEXT_MATCHER (unless last matcher) * existing_matcher_block * JUMP BODY */ static block bind_alternation_matchers(block matchers, block body) { block preamble = {0}; block altmatchers = {0}; block mb = {0}; // Pass through the matchers to find all destructured names. while (matchers.first && matchers.first->op == DESTRUCTURE_ALT) { block_append(&altmatchers, inst_block(block_take(&matchers))); } // We don't have any alternations here, so we can use the simplest case. if (altmatchers.first == NULL) { return bind_matcher(matchers, body); } // The final matcher needs to strip the error from the previous FORK_OPT block final_matcher = BLOCK(gen_op_simple(POP), gen_op_simple(DUP), matchers); // Collect var names jv all_vars = jv_object(); block_get_unbound_vars(altmatchers, &all_vars); block_get_unbound_vars(final_matcher, &all_vars); // We need a preamble of STOREVs to which to bind the matchers and the body. jv_object_keys_foreach(all_vars, key) { preamble = BLOCK(preamble, gen_op_simple(DUP), gen_const(jv_null()), gen_op_unbound(STOREV, jv_string_value(key))); jv_free(key); } jv_free(all_vars); // Now we build each matcher in turn for (inst *i = altmatchers.first; i; i = i->next) { block submatcher = i->subfn; // Get rid of the error from the previous matcher if (mb.first != NULL) { submatcher = BLOCK(gen_op_simple(POP), gen_op_simple(DUP), submatcher); } // If we're successful, jump to the end of the matchers submatcher = BLOCK(submatcher, gen_op_target(JUMP, final_matcher)); // FORK_OPT to the end of this submatcher so we can skip to the next one on error mb = BLOCK(mb, gen_op_target(FORK_OPT, submatcher), submatcher); // We're done with this inst and we don't want it anymore // But we can't let it free the submatcher block. i->subfn.first = i->subfn.last = NULL; } // We're done with these insts now. block_free(altmatchers); return bind_matcher(preamble, BLOCK(mb, final_matcher, body)); } block gen_reduce(block source, block matcher, block init, block body) { block res_var = gen_op_var_fresh(STOREV, "reduce"); block loop = BLOCK(gen_op_simple(DUPN), source, bind_alternation_matchers(matcher, BLOCK(gen_op_bound(LOADVN, res_var), body, gen_op_bound(STOREV, res_var))), gen_op_simple(BACKTRACK)); return BLOCK(gen_op_simple(DUP), init, res_var, gen_op_target(FORK, loop), loop, gen_op_bound(LOADVN, res_var)); } block gen_foreach(block source, block matcher, block init, block update, block extract) { block output = gen_op_targetlater(JUMP); block state_var = gen_op_var_fresh(STOREV, "foreach"); block loop = BLOCK(gen_op_simple(DUPN), // get a value from the source expression: source, // destructure the value into variable(s) for all the code // in the body to see bind_alternation_matchers(matcher, // load the loop state variable BLOCK(gen_op_bound(LOADVN, state_var), // generate updated state update, // save the updated state for value extraction gen_op_simple(DUP), // save new state gen_op_bound(STOREV, state_var), // extract an output... extract, // ...and output it by jumping // past the BACKTRACK that comes // right after the loop body, // which in turn is there // because... // // (Incidentally, extract can also // backtrack, e.g., if it calls // empty, in which case we don't // get here.) output))); block foreach = BLOCK(gen_op_simple(DUP), init, state_var, gen_op_target(FORK, loop), loop, // ...at this point `foreach`'s original input // will be on top of the stack, and we don't // want to output it, so we backtrack. gen_op_simple(BACKTRACK)); inst_set_target(output, foreach); // make that JUMP go bast the BACKTRACK at the end of the loop return foreach; } block gen_definedor(block a, block b) { // var found := false block found_var = gen_op_var_fresh(STOREV, "found"); block init = BLOCK(gen_op_simple(DUP), gen_const(jv_false()), found_var); // if found, backtrack. Otherwise execute b block backtrack = gen_op_simple(BACKTRACK); block tail = BLOCK(gen_op_simple(DUP), gen_op_bound(LOADV, found_var), gen_op_target(JUMP_F, backtrack), backtrack, gen_op_simple(POP), b); // try again block if_notfound = gen_op_simple(BACKTRACK); // found := true, produce result block if_found = BLOCK(gen_op_simple(DUP), gen_const(jv_true()), gen_op_bound(STOREV, found_var), gen_op_target(JUMP, tail)); return BLOCK(init, gen_op_target(FORK, if_notfound), a, gen_op_target(JUMP_F, if_found), if_found, if_notfound, tail); } int block_has_main(block top) { for (inst *c = top.first; c; c = c->next) { if (c->op == TOP) return 1; } return 0; } int block_is_funcdef(block b) { if (b.first != NULL && b.first->op == CLOSURE_CREATE) return 1; return 0; } block gen_condbranch(block iftrue, block iffalse) { iftrue = BLOCK(iftrue, gen_op_target(JUMP, iffalse)); return BLOCK(gen_op_target(JUMP_F, iftrue), iftrue, iffalse); } block gen_and(block a, block b) { // a and b = if a then (if b then true else false) else false return BLOCK(gen_op_simple(DUP), a, gen_condbranch(BLOCK(gen_op_simple(POP), b, gen_condbranch(gen_const(jv_true()), gen_const(jv_false()))), BLOCK(gen_op_simple(POP), gen_const(jv_false())))); } block gen_or(block a, block b) { // a or b = if a then true else (if b then true else false) return BLOCK(gen_op_simple(DUP), a, gen_condbranch(BLOCK(gen_op_simple(POP), gen_const(jv_true())), BLOCK(gen_op_simple(POP), b, gen_condbranch(gen_const(jv_true()), gen_const(jv_false()))))); } block gen_destructure_alt(block matcher) { for (inst *i = matcher.first; i; i = i->next) { if (i->op == STOREV) { i->op = STOREVN; } } inst* i = inst_new(DESTRUCTURE_ALT); i->subfn = matcher; return inst_block(i); } block gen_var_binding(block var, const char* name, block body) { return gen_destructure(var, gen_op_unbound(STOREV, name), body); } block gen_array_matcher(block left, block curr) { int index; if (block_is_noop(left)) index = 0; else { // `left` was returned by this function, so the third inst is the // constant containing the previously used index assert(left.first->op == DUP); assert(left.first->next != NULL); inst *i = NULL; if (left.first->next->op == PUSHK_UNDER) { i = left.first->next; } else { assert(left.first->next->op == SUBEXP_BEGIN); assert(left.first->next->next->op == LOADK); i = left.first->next->next; } index = 1 + (int) jv_number_value(i->imm.constant); } // `left` goes at the end so that the const index is in a predictable place return BLOCK(gen_op_simple(DUP), gen_subexp(gen_const(jv_number(index))), gen_op_simple(INDEX), curr, left); } block gen_object_matcher(block name, block curr) { return BLOCK(gen_op_simple(DUP), gen_subexp(name), gen_op_simple(INDEX), curr); } block gen_destructure(block var, block matchers, block body) { // var bindings can be added after coding the program; leave the TOP first. block top = gen_noop(); if (body.first && body.first->op == TOP) top = inst_block(block_take(&body)); if (matchers.first && matchers.first->op == DESTRUCTURE_ALT && !block_is_noop(var)) { block_append(&var, gen_op_simple(DUP)); block_append(&matchers, gen_op_simple(POP)); } return BLOCK(top, gen_op_simple(DUP), gen_subexp(var), gen_op_simple(POP), bind_alternation_matchers(matchers, body)); } // Like gen_var_binding(), but bind `break`'s wildcard unbound variable static block gen_wildvar_binding(block var, const char* name, block body) { return BLOCK(gen_op_simple(DUP), var, block_bind(gen_op_unbound(STOREV, name), body, OP_HAS_VARIABLE | OP_BIND_WILDCARD)); } block gen_cond(block cond, block iftrue, block iffalse) { return BLOCK(gen_op_simple(DUP), BLOCK(gen_subexp(cond), gen_op_simple(POP)), gen_condbranch(BLOCK(gen_op_simple(POP), iftrue), BLOCK(gen_op_simple(POP), iffalse))); } block gen_try_handler(block handler) { // Quite a pain just to hide jq's internal errors. return gen_cond(// `if type=="object" and .__jq gen_and(gen_call("_equal", BLOCK(gen_lambda(gen_const(jv_string("object"))), gen_lambda(gen_noop()))), BLOCK(gen_subexp(gen_const(jv_string("__jq"))), gen_noop(), gen_op_simple(INDEX))), // `then error` gen_call("error", gen_noop()), // `else HANDLER end` handler); } block gen_try(block exp, block handler) { /* * Produce something like: * FORK_OPT
* * JUMP * * * If this is not an internal try/catch, then catch and re-raise * internal errors to prevent them from leaking. * * The handler will only execute if we backtrack to the FORK_OPT with * an error (exception). If produces no value then FORK_OPT * will backtrack (propagate the `empty`, as it were. If * produces a value then we'll execute whatever bytecode follows this * sequence. */ if (!handler.first && !handler.last) // A hack to deal with `.` as the handler; we could use a real NOOP here handler = BLOCK(gen_op_simple(DUP), gen_op_simple(POP), handler); exp = BLOCK(exp, gen_op_target(JUMP, handler)); return BLOCK(gen_op_target(FORK_OPT, exp), exp, handler); } block gen_label(const char *label, block exp) { block cond = gen_call("_equal", BLOCK(gen_lambda(gen_noop()), gen_lambda(gen_op_unbound(LOADV, label)))); return gen_wildvar_binding(gen_op_simple(GENLABEL), label, BLOCK(gen_op_simple(POP), // try exp catch if . == $label // then empty // else error end // // Can't use gen_binop(), as that's firmly // stuck in parser.y as it refers to things // like EQ. gen_try(exp, gen_cond(cond, gen_op_simple(BACKTRACK), gen_call("error", gen_noop()))))); } block gen_cbinding(const struct cfunction* cfunctions, int ncfunctions, block code) { for (int cfunc=0; cfuncimm.cfunc = &cfunctions[cfunc]; i->symbol = strdup(i->imm.cfunc->name); code = block_bind(inst_block(i), code, OP_IS_CALL_PSEUDO); } return code; } static uint16_t nesting_level(struct bytecode* bc, inst* target) { uint16_t level = 0; assert(bc && target && target->compiled); while (bc && target->compiled != bc) { level++; bc = bc->parent; } assert(bc && bc == target->compiled); return level; } static int count_cfunctions(block b) { int n = 0; for (inst* i = b.first; i; i = i->next) { if (i->op == CLOSURE_CREATE_C) n++; n += count_cfunctions(i->subfn); } return n; } #ifndef WIN32 extern char **environ; #endif static jv make_env(jv env) { if (jv_is_valid(env)) return jv_copy(env); jv r = jv_object(); if (environ == NULL) return r; for (size_t i = 0; environ[i] != NULL; i++) { const char *eq; if ((eq = strchr(environ[i], '=')) == NULL) r = jv_object_delete(r, jv_string(environ[i])); else r = jv_object_set(r, jv_string_sized(environ[i], eq - environ[i]), jv_string(eq + 1)); } return jv_copy(r); } // Expands call instructions into a calling sequence static int expand_call_arglist(block* b, jv args, jv *env) { int errors = 0; block ret = gen_noop(); for (inst* curr; (curr = block_take(b));) { if (opcode_describe(curr->op)->flags & OP_HAS_BINDING) { if (!curr->bound_by && curr->op == LOADV && strcmp(curr->symbol, "ENV") == 0) { curr->op = LOADK; *env = curr->imm.constant = make_env(*env); } else if (!curr->bound_by && curr->op == LOADV && jv_object_has(jv_copy(args), jv_string(curr->symbol))) { curr->op = LOADK; curr->imm.constant = jv_object_get(jv_copy(args), jv_string(curr->symbol)); } else if (!curr->bound_by) { if (curr->symbol[0] == '*' && curr->symbol[1] >= '1' && curr->symbol[1] <= '3' && curr->symbol[2] == '\0') locfile_locate(curr->locfile, curr->source, "jq: error: break used outside labeled control structure"); else if (curr->op == LOADV) locfile_locate(curr->locfile, curr->source, "jq: error: $%s is not defined", curr->symbol); else locfile_locate(curr->locfile, curr->source, "jq: error: %s/%d is not defined", curr->symbol, block_count_actuals(curr->arglist)); errors++; // don't process this instruction if it's not well-defined ret = BLOCK(ret, inst_block(curr)); continue; } } block prelude = gen_noop(); if (curr->op == CALL_JQ) { int actual_args = 0, desired_args = 0; // We expand the argument list as a series of instructions switch (curr->bound_by->op) { default: assert(0 && "Unknown function type"); break; case CLOSURE_CREATE: case CLOSURE_PARAM: { block callargs = gen_noop(); for (inst* i; (i = block_take(&curr->arglist));) { assert(opcode_describe(i->op)->flags & OP_IS_CALL_PSEUDO); block b = inst_block(i); switch (i->op) { default: assert(0 && "Unknown type of parameter"); break; case CLOSURE_REF: block_append(&callargs, b); break; case CLOSURE_CREATE: block_append(&prelude, b); block_append(&callargs, gen_op_bound(CLOSURE_REF, b)); break; } actual_args++; } curr->imm.intval = actual_args; curr->arglist = callargs; if (curr->bound_by->op == CLOSURE_CREATE) { for (inst* i = curr->bound_by->arglist.first; i; i = i->next) { assert(i->op == CLOSURE_PARAM); desired_args++; } } break; } case CLOSURE_CREATE_C: { for (inst* i; (i = block_take(&curr->arglist)); ) { assert(i->op == CLOSURE_CREATE); // FIXME block body = i->subfn; i->subfn = gen_noop(); inst_free(i); // arguments should be pushed in reverse order, prepend them to prelude errors += expand_call_arglist(&body, args, env); prelude = BLOCK(gen_subexp(body), prelude); actual_args++; } assert(curr->op == CALL_JQ); curr->op = CALL_BUILTIN; curr->imm.intval = actual_args + 1 /* include the implicit input in arg count */; assert(curr->bound_by->op == CLOSURE_CREATE_C); desired_args = curr->bound_by->imm.cfunc->nargs - 1; assert(!curr->arglist.first); break; } } assert(actual_args == desired_args); // because now handle this above } ret = BLOCK(ret, prelude, inst_block(curr)); } *b = ret; return errors; } static int compile(struct bytecode* bc, block b, struct locfile* lf, jv args, jv *env) { int errors = 0; int pos = 0; int var_frame_idx = 0; bc->nsubfunctions = 0; errors += expand_call_arglist(&b, args, env); b = BLOCK(b, gen_op_simple(RET)); jv localnames = jv_array(); for (inst* curr = b.first; curr; curr = curr->next) { if (!curr->next) assert(curr == b.last); int length = opcode_describe(curr->op)->length; if (curr->op == CALL_JQ) { for (inst* arg = curr->arglist.first; arg; arg = arg->next) { length += 2; } } pos += length; curr->bytecode_pos = pos; curr->compiled = bc; assert(curr->op != CLOSURE_REF && curr->op != CLOSURE_PARAM); if ((opcode_describe(curr->op)->flags & OP_HAS_VARIABLE) && curr->bound_by == curr) { curr->imm.intval = var_frame_idx++; localnames = jv_array_append(localnames, jv_string(curr->symbol)); } if (curr->op == CLOSURE_CREATE) { assert(curr->bound_by == curr); curr->imm.intval = bc->nsubfunctions++; } if (curr->op == CLOSURE_CREATE_C) { assert(curr->bound_by == curr); int idx = bc->globals->ncfunctions++; bc->globals->cfunc_names = jv_array_append(bc->globals->cfunc_names, jv_string(curr->symbol)); bc->globals->cfunctions[idx] = *curr->imm.cfunc; curr->imm.intval = idx; } } if (pos > 0xFFFF) { // too long for program counter to fit in uint16_t locfile_locate(lf, UNKNOWN_LOCATION, "function compiled to %d bytes which is too long", pos); errors++; } bc->codelen = pos; bc->debuginfo = jv_object_set(bc->debuginfo, jv_string("locals"), localnames); if (bc->nsubfunctions) { bc->subfunctions = jv_mem_calloc(sizeof(struct bytecode*), bc->nsubfunctions); for (inst* curr = b.first; curr; curr = curr->next) { if (curr->op == CLOSURE_CREATE) { struct bytecode* subfn = jv_mem_alloc(sizeof(struct bytecode)); bc->subfunctions[curr->imm.intval] = subfn; subfn->globals = bc->globals; subfn->parent = bc; subfn->nclosures = 0; subfn->debuginfo = jv_object_set(jv_object(), jv_string("name"), jv_string(curr->symbol)); jv params = jv_array(); for (inst* param = curr->arglist.first; param; param = param->next) { assert(param->op == CLOSURE_PARAM); assert(param->bound_by == param); param->imm.intval = subfn->nclosures++; param->compiled = subfn; params = jv_array_append(params, jv_string(param->symbol)); } subfn->debuginfo = jv_object_set(subfn->debuginfo, jv_string("params"), params); errors += compile(subfn, curr->subfn, lf, args, env); curr->subfn = gen_noop(); } } } else { bc->subfunctions = 0; } uint16_t* code = jv_mem_calloc(sizeof(uint16_t), bc->codelen); bc->code = code; pos = 0; jv constant_pool = jv_array(); int maxvar = -1; if (!errors) for (inst* curr = b.first; curr; curr = curr->next) { const struct opcode_description* op = opcode_describe(curr->op); if (op->length == 0) continue; code[pos++] = curr->op; assert(curr->op != CLOSURE_REF && curr->op != CLOSURE_PARAM); if (curr->op == CALL_BUILTIN) { assert(curr->bound_by->op == CLOSURE_CREATE_C); assert(!curr->arglist.first); code[pos++] = (uint16_t)curr->imm.intval; code[pos++] = curr->bound_by->imm.intval; } else if (curr->op == CALL_JQ) { assert(curr->bound_by->op == CLOSURE_CREATE || curr->bound_by->op == CLOSURE_PARAM); code[pos++] = (uint16_t)curr->imm.intval; code[pos++] = nesting_level(bc, curr->bound_by); code[pos++] = curr->bound_by->imm.intval | (curr->bound_by->op == CLOSURE_CREATE ? ARG_NEWCLOSURE : 0); for (inst* arg = curr->arglist.first; arg; arg = arg->next) { assert(arg->op == CLOSURE_REF && arg->bound_by->op == CLOSURE_CREATE); code[pos++] = nesting_level(bc, arg->bound_by); code[pos++] = arg->bound_by->imm.intval | ARG_NEWCLOSURE; } } else if ((op->flags & OP_HAS_CONSTANT) && (op->flags & OP_HAS_VARIABLE)) { // STORE_GLOBAL: constant global, basically code[pos++] = jv_array_length(jv_copy(constant_pool)); constant_pool = jv_array_append(constant_pool, jv_copy(curr->imm.constant)); code[pos++] = nesting_level(bc, curr->bound_by); uint16_t var = (uint16_t)curr->bound_by->imm.intval; code[pos++] = var; } else if (op->flags & OP_HAS_CONSTANT) { code[pos++] = jv_array_length(jv_copy(constant_pool)); constant_pool = jv_array_append(constant_pool, jv_copy(curr->imm.constant)); } else if (op->flags & OP_HAS_VARIABLE) { code[pos++] = nesting_level(bc, curr->bound_by); uint16_t var = (uint16_t)curr->bound_by->imm.intval; code[pos++] = var; if (var > maxvar) maxvar = var; } else if (op->flags & OP_HAS_BRANCH) { assert(curr->imm.target->bytecode_pos != -1); assert(curr->imm.target->bytecode_pos > pos); // only forward branches code[pos] = curr->imm.target->bytecode_pos - (pos + 1); pos++; } else if (op->length > 1) { assert(0 && "codegen not implemented for this operation"); } } bc->constants = constant_pool; bc->nlocals = maxvar + 2; // FIXME: frames of size zero? block_free(b); return errors; } int block_compile(block b, struct bytecode** out, struct locfile* lf, jv args) { struct bytecode* bc = jv_mem_alloc(sizeof(struct bytecode)); bc->parent = 0; bc->nclosures = 0; bc->globals = jv_mem_alloc(sizeof(struct symbol_table)); int ncfunc = count_cfunctions(b); bc->globals->ncfunctions = 0; bc->globals->cfunctions = jv_mem_calloc(sizeof(struct cfunction), ncfunc); bc->globals->cfunc_names = jv_array(); bc->debuginfo = jv_object_set(jv_object(), jv_string("name"), jv_null()); jv env = jv_invalid(); int nerrors = compile(bc, b, lf, args, &env); jv_free(args); jv_free(env); assert(bc->globals->ncfunctions == ncfunc); if (nerrors > 0) { bytecode_free(bc); *out = 0; } else { *out = bc; } return nerrors; } void block_free(block b) { struct inst* next; for (struct inst* curr = b.first; curr; curr = next) { next = curr->next; inst_free(curr); } }