From 38b939688a596c2de1b3d254491005b2754c8569 Mon Sep 17 00:00:00 2001 From: William Langford Date: Wed, 9 Jul 2014 00:55:20 -0400 Subject: Added library system with -l, -L, and JQ_LIBRARY_PATH Created util.[ch] to hold common utilities. --- Makefile.am | 4 +- builtin.c | 8 +-- compile.c | 115 +++++++++++++++++++++++++++++++--- compile.h | 10 ++- configure.ac | 1 + execute.c | 55 ++++++++++++++--- jq.h | 6 ++ jv.c | 33 +--------- lexer.l | 3 + linker.c | 195 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ linker.h | 9 +++ locfile.c | 18 +++++- locfile.h | 4 +- main.c | 40 +++++++++++- opcode_list.h | 1 + parser.y | 60 +++++++++++++----- tests/run | 61 ++++++++++++++++-- util.c | 120 ++++++++++++++++++++++++++++++++++++ util.h | 26 ++++++++ 19 files changed, 690 insertions(+), 79 deletions(-) create mode 100644 linker.c create mode 100644 linker.h create mode 100644 util.c create mode 100644 util.h diff --git a/Makefile.am b/Makefile.am index 02f7d966..b7dd16bd 100644 --- a/Makefile.am +++ b/Makefile.am @@ -2,11 +2,11 @@ LIBJQ_INCS = jq_parser.h builtin.h bytecode.h compile.h exec_stack.h \ libm.h jv_alloc.h jv_dtoa.h jv_unicode.h locfile.h \ - opcode_list.h parser.y jv_utf8_tables.h lexer.l + opcode_list.h parser.y jv_utf8_tables.h lexer.l util.h linker.h LIBJQ_SRC = locfile.c bytecode.c compile.c execute.c builtin.c jv.c \ jv_parse.c jv_print.c jv_dtoa.c jv_unicode.c jv_aux.c jv_file.c \ - jv_alloc.c jq_test.c ${LIBJQ_INCS} + jv_alloc.c jq_test.c util.c linker.c ${LIBJQ_INCS} ### C build options diff --git a/builtin.c b/builtin.c index 6d53a582..0af44163 100644 --- a/builtin.c +++ b/builtin.c @@ -991,14 +991,14 @@ static const char* const jq_builtins[] = { static int builtins_bind_one(jq_state *jq, block* bb, const char* code) { - struct locfile src; - locfile_init(&src, jq, code, strlen(code)); + struct locfile* src; + src = locfile_init(jq, code, strlen(code)); block funcs; - int nerrors = jq_parse_library(&src, &funcs); + int nerrors = jq_parse_library(src, &funcs); if (nerrors == 0) { *bb = block_bind_referenced(funcs, *bb, OP_IS_CALL_PSEUDO); } - locfile_free(&src); + locfile_free(src); return nerrors; } diff --git a/compile.c b/compile.c index b0ff054c..0b0208ee 100644 --- a/compile.c +++ b/compile.c @@ -8,6 +8,7 @@ #include "bytecode.h" #include "locfile.h" #include "jv_alloc.h" +#include "linker.h" /* The intermediate representation for jq filters is as a sequence of @@ -34,6 +35,7 @@ struct inst { const struct cfunction* cfunc; } imm; + struct locfile* locfile; location source; // Binding @@ -74,6 +76,7 @@ static inst* inst_new(opcode op) { i->subfn = gen_noop(); i->arglist = gen_noop(); i->source = UNKNOWN_LOCATION; + i->locfile = 0; return i; } @@ -81,6 +84,8 @@ static void inst_free(struct inst* i) { jv_mem_free(i->symbol); block_free(i->subfn); block_free(i->arglist); + if (i->locfile) + locfile_free(i->locfile); if (opcode_describe(i->op)->flags & OP_HAS_CONSTANT) { jv_free(i->imm.constant); } @@ -110,11 +115,12 @@ static inst* block_take(block* b) { return i; } -block gen_location(location loc, block b) { +block gen_location(location loc, struct locfile* l, block b) { for (inst* i = b.first; i; i = i->next) { if (i->source.start == UNKNOWN_LOCATION.start && i->source.end == UNKNOWN_LOCATION.end) { i->source = loc; + i->locfile = locfile_retain(l); } } return b; @@ -205,6 +211,16 @@ block block_join(block a, block b) { return c; } +int block_has_only_binders_and_imports(block binders, int bindflags) { + bindflags |= OP_HAS_BINDING; + for (inst* curr = binders.first; curr; curr = curr->next) { + if ((opcode_describe(curr->op)->flags & bindflags) != bindflags && curr->op != DEPS) { + return 0; + } + } + return 1; +} + int block_has_only_binders(block binders, int bindflags) { bindflags |= OP_HAS_BINDING; for (inst* curr = binders.first; curr; curr = curr->next) { @@ -303,6 +319,28 @@ block block_bind(block binder, block body, int bindflags) { return block_join(binder, body); } +block block_bind_library(block binder, block body, int bindflags, const char* libname) { + assert(block_has_only_binders(binder, bindflags)); + bindflags |= OP_HAS_BINDING; + int nrefs = 0; + int matchlen = strlen(libname)+2; + char* matchname = malloc(matchlen+1); + strcpy(matchname,libname); + strcpy(matchname+matchlen-2,"::"); + for (inst *curr = binder.first; curr; curr = curr->next) { + char* cname = curr->symbol; + char* tname = malloc(strlen(curr->symbol)+matchlen+1); + strcpy(tname, matchname); + strcpy(tname+matchlen,cname); + curr->symbol = tname; + nrefs += block_bind_subblock(inst_block(curr), body, bindflags); + curr->symbol = cname; + free(tname); + } + free(matchname); + return body; // We don't return a join because we don't want those sticking around... +} + // Bind binder to body and throw away any defs in binder not referenced // (directly or indirectly) from body. block block_bind_referenced(block binder, block body, int bindflags) { @@ -318,6 +356,7 @@ block block_bind_referenced(block binder, block body, int bindflags) { // Check if this binder is referenced from any of the ones we // already know are referenced by body. nrefs += block_count_refs(b, refd); + nrefs += block_count_refs(b, body); if (nrefs) { refd = BLOCK(refd, b); kept++; @@ -335,6 +374,64 @@ block block_bind_referenced(block binder, block body, int bindflags) { return block_join(refd, body); } +block block_drop_unreferenced(block body) { + inst* curr; + block refd = gen_noop(); + block unrefd = gen_noop(); + int drop; + do { + drop = 0; + while((curr = block_take(&body)) && curr->op != TOP) { + block b = inst_block(curr); + if (block_count_refs(b,refd) + block_count_refs(b,body) == 0) { + unrefd = BLOCK(unrefd, b); + drop++; + } else { + refd = BLOCK(refd, b); + } + } + if (curr && curr->op == TOP) { + body = BLOCK(inst_block(curr),body); + } + body = BLOCK(refd, body); + refd = gen_noop(); + } while (drop != 0); + block_free(unrefd); + return body; +} + +jv block_take_imports(block* body) { + jv imports = jv_array(); + + inst* top = NULL; + if (body->first->op == TOP) { + top = block_take(body); + } + while (body->first && body->first->op == DEPS) { + inst* dep = block_take(body); + jv opts = jv_copy(dep->imm.constant); + opts = jv_object_set(opts,jv_string("name"),jv_string(dep->symbol)); + imports = jv_array_append(imports, opts); + inst_free(dep); + } + if (top) { + *body = block_join(inst_block(top),*body); + } + return imports; +} + +block gen_import(const char* name, const char* as, const char* search) { + inst* i = inst_new(DEPS); + i->symbol = strdup(name); + jv opts = jv_object(); + if (as) + opts = jv_object_set(opts, jv_string("as"), jv_string(as)); + if (search) + opts = jv_object_set(opts, jv_string("search"), jv_string(search)); + i->imm.constant = opts; + return inst_block(i); +} + block gen_function(const char* name, block formals, block body) { block_bind_each(formals, body, OP_IS_CALL_PSEUDO); inst* i = inst_new(CLOSURE_CREATE); @@ -577,13 +674,13 @@ static int count_cfunctions(block b) { // Expands call instructions into a calling sequence -static int expand_call_arglist(struct locfile* locations, block* b) { +static int expand_call_arglist(block* b) { int errors = 0; block ret = gen_noop(); for (inst* curr; (curr = block_take(b));) { if (opcode_describe(curr->op)->flags & OP_HAS_BINDING) { if (!curr->bound_by) { - locfile_locate(locations, curr->source, "error: %s/%d is not defined", curr->symbol, block_count_actuals(curr->arglist)); + locfile_locate(curr->locfile, curr->source, "error: %s/%d is not defined", curr->symbol, block_count_actuals(curr->arglist)); errors++; // don't process this instruction if it's not well-defined ret = BLOCK(ret, inst_block(curr)); @@ -634,7 +731,7 @@ static int expand_call_arglist(struct locfile* locations, block* b) { i->subfn = gen_noop(); inst_free(i); // arguments should be pushed in reverse order, prepend them to prelude - errors += expand_call_arglist(locations, &body); + errors += expand_call_arglist(&body); prelude = BLOCK(gen_subexp(body), prelude); actual_args++; } @@ -656,12 +753,12 @@ static int expand_call_arglist(struct locfile* locations, block* b) { return errors; } -static int compile(struct locfile* locations, struct bytecode* bc, block b) { +static int compile(struct bytecode* bc, block b) { int errors = 0; int pos = 0; int var_frame_idx = 0; bc->nsubfunctions = 0; - errors += expand_call_arglist(locations, &b); + errors += expand_call_arglist(&b); b = BLOCK(b, gen_op_simple(RET)); jv localnames = jv_array(); for (inst* curr = b.first; curr; curr = curr->next) { @@ -717,7 +814,7 @@ static int compile(struct locfile* locations, struct bytecode* bc, block b) { params = jv_array_append(params, jv_string(param->symbol)); } subfn->debuginfo = jv_object_set(subfn->debuginfo, jv_string("params"), params); - errors += compile(locations, subfn, curr->subfn); + errors += compile(subfn, curr->subfn); curr->subfn = gen_noop(); } } @@ -776,7 +873,7 @@ static int compile(struct locfile* locations, struct bytecode* bc, block b) { return errors; } -int block_compile(block b, struct locfile* locations, struct bytecode** out) { +int block_compile(block b, struct bytecode** out) { struct bytecode* bc = jv_mem_alloc(sizeof(struct bytecode)); bc->parent = 0; bc->nclosures = 0; @@ -786,7 +883,7 @@ int block_compile(block b, struct locfile* locations, struct bytecode** out) { bc->globals->cfunctions = jv_mem_alloc(sizeof(struct cfunction) * ncfunc); bc->globals->cfunc_names = jv_array(); bc->debuginfo = jv_object_set(jv_object(), jv_string("name"), jv_null()); - int nerrors = compile(locations, bc, b); + int nerrors = compile(bc, b); assert(bc->globals->ncfunctions == ncfunc); if (nerrors > 0) { bytecode_free(bc); diff --git a/compile.h b/compile.h index 9ad732a2..aba6a4d2 100644 --- a/compile.h +++ b/compile.h @@ -14,7 +14,7 @@ typedef struct block { inst* last; } block; -block gen_location(location, block); +block gen_location(location, struct locfile*, block); block gen_noop(); block gen_op_simple(opcode op); @@ -24,6 +24,7 @@ block gen_op_unbound(opcode op, const char* name); block gen_op_bound(opcode op, block binder); block gen_op_var_fresh(opcode op, const char* name); +block gen_import(const char* name, const char *as, const char *search); block gen_function(const char* name, block formals, block body); block gen_param(const char* name); block gen_lambda(block body); @@ -47,13 +48,18 @@ block gen_cbinding(const struct cfunction* functions, int nfunctions, block b); void block_append(block* b, block b2); block block_join(block a, block b); +int block_has_only_binders_and_imports(block, int bindflags); int block_has_only_binders(block, int bindflags); int block_has_main(block); int block_is_funcdef(block b); block block_bind(block binder, block body, int bindflags); +block block_bind_library(block binder, block body, int bindflags, const char* libname); block block_bind_referenced(block binder, block body, int bindflags); +block block_drop_unreferenced(block body); -int block_compile(block, struct locfile*, struct bytecode**); +jv block_take_imports(block* body); + +int block_compile(block, struct bytecode**); void block_free(block); diff --git a/configure.ac b/configure.ac index a0040e52..fd4d00c9 100644 --- a/configure.ac +++ b/configure.ac @@ -104,6 +104,7 @@ AC_CHECK_PROGS(valgrind_cmd, valgrind) if test "x$valgrind_cmd" = "x" ; then AC_MSG_WARN([valgrind is required to test jq.]) fi +AC_CHECK_FUNCS(memmem) dnl Don't attempt to build docs if there's no Ruby lying around diff --git a/execute.c b/execute.c index 74ba0c9c..ec88b092 100644 --- a/execute.c +++ b/execute.c @@ -4,6 +4,7 @@ #include #include #include +#include #include "exec_stack.h" #include "bytecode.h" @@ -15,6 +16,8 @@ #include "jq.h" #include "parser.h" #include "builtin.h" +#include "util.h" +#include "linker.h" struct jq_state { void (*nomem_handler)(void *); @@ -34,6 +37,8 @@ struct jq_state { int subexp_nest; int debug_trace_enabled; int initial_execution; + + jv attrs; }; struct closure { @@ -770,6 +775,7 @@ jq_state *jq_init(void) { jq->err_cb = NULL; jq->err_cb_data = NULL; + jq->attrs = jv_object(); jq->path = jv_null(); return jq; } @@ -819,6 +825,7 @@ void jq_teardown(jq_state **jq) { jq_reset(old_jq); bytecode_free(old_jq->bc); old_jq->bc = 0; + jv_free(old_jq->attrs); jv_mem_free(old_jq); } @@ -896,29 +903,28 @@ static struct bytecode *optimize(struct bytecode *bc) { int jq_compile_args(jq_state *jq, const char* str, jv args) { jv_nomem_handler(jq->nomem_handler, jq->nomem_handler_data); assert(jv_get_kind(args) == JV_KIND_ARRAY); - struct locfile locations; - locfile_init(&locations, jq, str, strlen(str)); + struct locfile* locations; + locations = locfile_init(jq, str, strlen(str)); block program; jq_reset(jq); if (jq->bc) { bytecode_free(jq->bc); jq->bc = 0; } - int nerrors = jq_parse(&locations, &program); + int nerrors = load_program(jq, locations, &program); if (nerrors == 0) { - for (int i=0; ibc); + nerrors = block_compile(program, &jq->bc); } } - jv_free(args); if (nerrors) { jv s = jv_string_fmt("%d compile %s", nerrors, nerrors > 1 ? "errors" : "error"); @@ -932,7 +938,8 @@ int jq_compile_args(jq_state *jq, const char* str, jv args) { } if (jq->bc) jq->bc = optimize(jq->bc); - locfile_free(&locations); + jv_free(args); + locfile_free(locations); return jq->bc != NULL; } @@ -940,6 +947,38 @@ int jq_compile(jq_state *jq, const char* str) { return jq_compile_args(jq, str, jv_array()); } +void jq_set_lib_origin(jq_state *jq, jv origin) { + assert(jq); + assert(jv_get_kind(origin) == JV_KIND_STRING); + jq_set_attr(jq, jv_string("ORIGIN"), origin); +} +jv jq_get_lib_origin(jq_state *jq) { + assert(jq); + return jq_get_attr(jq, jv_string("ORIGIN")); +} + +void jq_set_lib_dirs(jq_state *jq, jv dirs) { + assert(jq); + assert(jv_get_kind(dirs) == JV_KIND_ARRAY); + jq_set_attr(jq, jv_string("LIB_DIRS"), dirs); +} +jv jq_get_lib_dirs(jq_state *jq) { + assert(jq); + return jq_get_attr(jq, jv_string("LIB_DIRS")); +} + +void jq_set_attr(jq_state *jq, jv attr, jv val) { + assert(jq); + assert(jv_get_kind(attr) == JV_KIND_STRING); + assert(jv_is_valid(val)); + jq->attrs = jv_object_set(jq->attrs, attr, val); +} + +jv jq_get_attr(jq_state *jq, jv attr) { + assert(jq); + assert(jv_get_kind(attr) == JV_KIND_STRING); + return jv_object_get(jv_copy(jq->attrs), attr); +} void jq_dump_disassembly(jq_state *jq, int indent) { dump_disassembly(indent, jq->bc); } diff --git a/jq.h b/jq.h index 66b79387..eaadb597 100644 --- a/jq.h +++ b/jq.h @@ -20,4 +20,10 @@ void jq_start(jq_state *, jv value, int flags); jv jq_next(jq_state *); void jq_teardown(jq_state **); +void jq_set_lib_origin(jq_state *, jv origin); +jv jq_get_lib_origin(jq_state *); +void jq_set_lib_dirs(jq_state *, jv dirs); +jv jq_get_lib_dirs(jq_state *); +void jq_set_attr(jq_state *, jv attr, jv val); +jv jq_get_attr(jq_state *, jv attr); #endif /* !_JQ_H_ */ diff --git a/jv.c b/jv.c index d7f967de..020e65b9 100644 --- a/jv.c +++ b/jv.c @@ -10,6 +10,7 @@ #include "jv_alloc.h" #include "jv.h" #include "jv_unicode.h" +#include "util.h" /* * Internal refcounting helpers @@ -614,34 +615,6 @@ int jv_string_length_codepoints(jv j) { return len; } -#ifndef HAVE_MEMMEM -#ifdef memmem -#undef memmem -#endif -#define memmem my_memmem -static const void *memmem(const void *haystack, size_t haystacklen, - const void *needle, size_t needlelen) -{ - const char *h = haystack; - const char *n = needle; - size_t hi, hi2, ni; - - if (haystacklen < needlelen || haystacklen == 0) - return NULL; - for (hi = 0; hi < (haystacklen - needlelen + 1); hi++) { - for (ni = 0, hi2 = hi; ni < needlelen; ni++, hi2++) { - if (h[hi2] != n[ni]) - goto not_this; - } - - return &h[hi]; - -not_this: - continue; - } - return NULL; -} -#endif /* HAVE_MEMMEM */ jv jv_string_indexes(jv j, jv k) { assert(jv_get_kind(j) == JV_KIND_STRING); @@ -654,7 +627,7 @@ jv jv_string_indexes(jv j, jv k) { jv a = jv_array(); p = jstr; - while ((p = memmem(p, (jstr + jlen) - p, idxstr, idxlen)) != NULL) { + while ((p = jq_memmem(p, (jstr + jlen) - p, idxstr, idxlen)) != NULL) { a = jv_array_append(a, jv_number(p - jstr)); p += idxlen; } @@ -676,7 +649,7 @@ jv jv_string_split(jv j, jv sep) { assert(jv_get_refcnt(a) == 1); for (p = jstr; p < jstr + jlen; p = s + seplen) { - s = memmem(p, (jstr + jlen) - p, sepstr, seplen); + s = jq_memmem(p, (jstr + jlen) - p, sepstr, seplen); if (s == NULL) s = jstr + jlen; a = jv_array_append(a, jv_string_sized(p, s - p)); diff --git a/lexer.l b/lexer.l index b51ab1fc..8205e69e 100644 --- a/lexer.l +++ b/lexer.l @@ -42,6 +42,8 @@ struct lexer_param; "!=" { return NEQ; } "==" { return EQ; } "as" { return AS; } +"search" { return SEARCH; } +"import" { return IMPORT; } "def" { return DEF; } "if" { return IF; } "then" { return THEN; } @@ -114,6 +116,7 @@ struct lexer_param; [a-zA-Z_][a-zA-Z_0-9]* { yylval->literal = jv_string(yytext); return IDENT;} +[a-zA-Z_][a-zA-Z_0-9]*::[a-zA-Z_][a-zA-Z_0-9]* { yylval->literal = jv_string(yytext); return IDENT;} \.[a-zA-Z_][a-zA-Z_0-9]* { yylval->literal = jv_string(yytext+1); return FIELD;} [ \n\t]+ {} diff --git a/linker.c b/linker.c new file mode 100644 index 00000000..5fc58393 --- /dev/null +++ b/linker.c @@ -0,0 +1,195 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "jq_parser.h" +#include "locfile.h" +#include "jv.h" +#include "jq.h" +#include "parser.h" +#include "util.h" +#include "compile.h" + +struct lib_loading_state { + char **names; + block *defs; + uint64_t ct; +}; +static int load_library(jq_state *jq, jv lib_path, block *out_block, struct lib_loading_state *lib_state); + +// Given a lib_path to search first, creates a chain of search paths +// in the following order: +// 1. lib_path +// 2. -L paths passed in on the command line (from jq_state*) +// 3. JQ_LIBRARY_PATH environment variable +jv build_lib_search_chain(jq_state *jq, jv lib_path) { + assert(jv_get_kind(lib_path) == JV_KIND_STRING); + + jv out_paths = jv_array(); + if (jv_string_length_bytes(jv_copy(lib_path))) + out_paths = jv_array_append(out_paths, lib_path); + else + jv_free(lib_path); + jv lib_dirs = jq_get_lib_dirs(jq); + jv_array_foreach(lib_dirs, i, path) { + if (jv_string_length_bytes(jv_copy(path)) == 0) { + jv_free(path); + continue; + } + path = expand_path(path); + if (jv_is_valid(path)) { + out_paths = jv_array_append(out_paths, path); + } else { + jv emsg = jv_invalid_get_msg(path); + fprintf(stderr, "jq: warning: skipping search path: %s\n", jv_string_value(emsg)); + jv_free(emsg); + } + } + jv_free(lib_dirs); + return out_paths; +} + +static jv find_lib(jq_state *jq, jv lib_name, jv lib_search_path) { + assert(jv_get_kind(lib_search_path) == JV_KIND_STRING); + assert(jv_get_kind(lib_name) == JV_KIND_STRING); + + lib_search_path = expand_path(lib_search_path); + + struct stat st; + int ret; + + jv lib_search_paths = build_lib_search_chain(jq, lib_search_path); + + jv_array_foreach(lib_search_paths, i, spath) { + jv testpath = jq_realpath(jv_string_fmt("%s/%s.jq",jv_string_value(spath),jv_string_value(lib_name))); + + jv_free(spath); + ret = stat(jv_string_value(testpath),&st); + if (ret == 0) { + jv_free(lib_name); + jv_free(lib_search_paths); + return testpath; + } + jv_free(testpath); + } + jv output = jv_invalid_with_msg(jv_string_fmt("could not find library: %s", jv_string_value(lib_name))); + jv_free(lib_name); + jv_free(lib_search_paths); + return output; +} + +static int process_dependencies(jq_state *jq, jv lib_origin, block *src_block, struct lib_loading_state *lib_state) { + jv deps = block_take_imports(src_block); + block bk = *src_block; + int nerrors = 0; + + jv_array_foreach(deps, i, dep) { + jv name = jv_object_get(jv_copy(dep), jv_string("name")); + jv as = jv_object_get(jv_copy(dep), jv_string("as")); + if (!jv_is_valid(as)) { + jv_free(as); + as = jv_copy(name); + } + jv search = jv_object_get(dep, jv_string("search")); + if (!jv_is_valid(search)) { + jv_free(search); + search = jv_string(""); + } + if (strncmp("$ORIGIN/",jv_string_value(search),8) == 0) { + jv tsearch = jv_string_fmt("%s/%s",jv_string_value(lib_origin),jv_string_value(search)+8); + jv_free(search); + search = tsearch; + } + jv lib_path = find_lib(jq, name, search); + if (!jv_is_valid(lib_path)) { + jv emsg = jv_invalid_get_msg(lib_path); + fprintf(stderr, "jq: error: %s\n",jv_string_value(emsg)); + jv_free(emsg); + jv_free(lib_origin); + jv_free(as); + jv_free(deps); + return 1; + } + uint64_t state_idx = 0; + for (; state_idx < lib_state->ct; ++state_idx) { + if (strcmp(lib_state->names[state_idx],jv_string_value(lib_path)) == 0) + break; + } + if (state_idx < lib_state->ct) { // Found + bk = block_bind_library(lib_state->defs[state_idx], bk, OP_IS_CALL_PSEUDO, jv_string_value(as)); + jv_free(lib_path); + } else { // Not found. Add it to the table before binding. + block dep_def_block = gen_noop(); + nerrors += load_library(jq, lib_path, &dep_def_block, lib_state); + if (nerrors == 0) + bk = block_bind_library(dep_def_block, bk, OP_IS_CALL_PSEUDO, jv_string_value(as)); + else + block_free(dep_def_block); + } + jv_free(as); + } + jv_free(lib_origin); + jv_free(deps); + return nerrors; +} + +// Loads the library at lib_path into lib_state, putting the library's defs +// into *out_block +static int load_library(jq_state *jq, jv lib_path, block *out_block, struct lib_loading_state *lib_state) { + int nerrors = 0; + struct locfile* src; + block program; + jv data = jv_load_file(jv_string_value(lib_path), 1); + int state_idx; + if (jv_is_valid(data)) { + src = locfile_init(jq, jv_string_value(data), jv_string_length_bytes(jv_copy(data))); + nerrors += jq_parse_library(src, &program); + if (nerrors == 0) { + state_idx = lib_state->ct++; + lib_state->names = realloc(lib_state->names, lib_state->ct * sizeof(const char *)); + lib_state->defs = realloc(lib_state->defs, lib_state->ct * sizeof(block)); + lib_state->names[state_idx] = strdup(jv_string_value(lib_path)); + lib_state->defs[state_idx] = program; + char *lib_origin = strdup(jv_string_value(lib_path)); + nerrors += process_dependencies(jq, jv_string(dirname(lib_origin)), &lib_state->defs[state_idx], lib_state); + free(lib_origin); + *out_block = lib_state->defs[state_idx]; + } + locfile_free(src); + } + jv_free(lib_path); + jv_free(data); + return nerrors; +} + +int load_program(jq_state *jq, struct locfile* src, block *out_block) { + int nerrors = 0; + block program; + struct lib_loading_state lib_state = {0,0,0}; + nerrors = jq_parse(src, &program); + if (nerrors) + return nerrors; + + nerrors = process_dependencies(jq, jq_get_lib_origin(jq), &program, &lib_state); + block libs = gen_noop(); + for (uint64_t i = 0; i < lib_state.ct; ++i) { + free(lib_state.names[i]); + if (nerrors == 0) + libs = block_join(libs, lib_state.defs[i]); + else + block_free(lib_state.defs[i]); + } + free(lib_state.names); + free(lib_state.defs); + if (nerrors) + block_free(program); + else + *out_block = block_drop_unreferenced(block_join(libs, program)); + + return nerrors; +} diff --git a/linker.h b/linker.h new file mode 100644 index 00000000..4e9f2adf --- /dev/null +++ b/linker.h @@ -0,0 +1,9 @@ +#ifndef LINKER_H +#define LINKER_H + +int load_program(jq_state *jq, struct locfile* src, block *out_block); + + + + +#endif diff --git a/locfile.c b/locfile.c index 39343dbf..eaead6bb 100644 --- a/locfile.c +++ b/locfile.c @@ -9,11 +9,14 @@ #include "locfile.h" -void locfile_init(struct locfile* l, jq_state *jq, const char* data, int length) { +struct locfile* locfile_init(jq_state *jq, const char* data, int length) { + struct locfile* l = jv_mem_alloc(sizeof(struct locfile)); l->jq = jq; - l->data = data; + l->data = jv_mem_alloc(length); + memcpy((char*)l->data,data,length); l->length = length; l->nlines = 1; + l->refct = 1; for (int i=0; inlines++; } @@ -27,10 +30,19 @@ void locfile_init(struct locfile* l, jq_state *jq, const char* data, int length) } } l->linemap[l->nlines] = length+1; // virtual last \n + return l; } +struct locfile* locfile_retain(struct locfile* l) { + l->refct++; + return l; +} void locfile_free(struct locfile* l) { - jv_mem_free(l->linemap); + if (--(l->refct) == 0) { + jv_mem_free(l->linemap); + jv_mem_free((char*)l->data); + jv_mem_free(l); + } } static int locfile_get_line(struct locfile* l, int pos) { diff --git a/locfile.h b/locfile.h index 4e20df27..48c460f4 100644 --- a/locfile.h +++ b/locfile.h @@ -16,9 +16,11 @@ struct locfile { int nlines; char *error; jq_state *jq; + int refct; }; -void locfile_init(struct locfile* l, jq_state *jq, const char* data, int length); +struct locfile* locfile_init(jq_state *jq, const char* data, int length); +struct locfile* locfile_retain(struct locfile* l); void locfile_free(struct locfile* l); diff --git a/main.c b/main.c index 228180bc..5008f8b2 100644 --- a/main.c +++ b/main.c @@ -1,9 +1,10 @@ #include +#include +#include +#include #include #include -#include #include -#include #include #include "compile.h" #include "jv.h" @@ -197,6 +198,7 @@ int main(int argc, char* argv[]) { int jq_flags = 0; size_t short_opts = 0; jv program_arguments = jv_array(); + jv lib_search_paths = jv_array(); for (int i=1; i= argc - 1) { + fprintf(stderr, "-L takes a parameter: (e.g. -L /search/path or -L/search/path)\n"); + die(); + } else { + lib_search_paths = jv_array_append(lib_search_paths, jv_string(argv[i+1])); + i++; + } + continue; + } + if (isoption(argv[i], 's', "slurp", &short_opts)) { options |= SLURP; if (!short_opts) continue; @@ -318,6 +333,7 @@ int main(int argc, char* argv[]) { ret = 0; goto out; } + // check for unknown options... if this argument was a short option if (strlen(argv[i]) != short_opts + 1) { fprintf(stderr, "%s: Unknown option %s\n", progname, argv[i]); @@ -326,6 +342,26 @@ int main(int argc, char* argv[]) { } } + char *penv = getenv("JQ_LIBRARY_PATH"); + if (penv) { +#ifdef WIN32 +#define PATH_ENV_SEPARATOR ";" +#else +#define PATH_ENV_SEPARATOR ":" +#endif + lib_search_paths = jv_array_concat(lib_search_paths,jv_string_split(jv_string(penv),jv_string(PATH_ENV_SEPARATOR))); +#undef PATH_ENV_SEPARATOR + } + jq_set_lib_dirs(jq,lib_search_paths); + + char *origin = strdup(argv[0]); + if (origin == NULL) { + fprintf(stderr, "Error: out of memory\n"); + exit(1); + } + jq_set_lib_origin(jq,jv_string(dirname(origin))); + free(origin); + #if (!defined(WIN32) && defined(HAVE_ISATTY)) || defined(HAVE__ISATTY) #if defined(HAVE__ISATTY) && defined(isatty) diff --git a/opcode_list.h b/opcode_list.h index 284c3686..5ecf57e3 100644 --- a/opcode_list.h +++ b/opcode_list.h @@ -36,3 +36,4 @@ OP(CLOSURE_CREATE, DEFINITION, 0, 0) OP(CLOSURE_CREATE_C, DEFINITION, 0, 0) OP(TOP, NONE, 0, 0) +OP(DEPS, CONSTANT, 0, 0) diff --git a/parser.y b/parser.y index 48027896..8c33f520 100644 --- a/parser.y +++ b/parser.y @@ -56,7 +56,9 @@ struct lexer_param; %token NEQ "!=" %token DEFINEDOR "//" %token AS "as" +%token SEARCH "search" %token DEF "def" +%token IMPORT "import" %token IF "if" %token THEN "then" %token ELSE "else" @@ -102,7 +104,7 @@ struct lexer_param; %precedence "catch" -%type Exp Term MkDict MkDictPair ExpD ElseBody QQString FuncDef FuncDefs String +%type Exp Term MkDict MkDictPair ExpD ElseBody QQString FuncDef FuncDefs String Import Imports %{ #include "lexer.h" struct lexer_param { @@ -210,13 +212,21 @@ static block gen_update(block object, block val, int optype) { %% TopLevel: -Exp { - *answer = BLOCK(gen_op_simple(TOP), $1); +Imports Exp { + *answer = BLOCK($1, gen_op_simple(TOP), $2); } | -FuncDefs { - *answer = $1; +Imports FuncDefs { + *answer = BLOCK($1, $2); } +Imports: +%empty { + $$ = gen_noop(); +} | +Import Imports { + $$ = BLOCK($1, $2); +} + FuncDefs: %empty { $$ = gen_noop(); @@ -387,6 +397,28 @@ Term { $$ = $1; } +Import: +"import" IDENT ';' { + $$ = gen_import(jv_string_value($2), NULL, NULL); + jv_free($2); +} | +"import" IDENT "as" IDENT ';' { + $$ = gen_import(jv_string_value($2), jv_string_value($4), NULL); + jv_free($2); + jv_free($4); +} | +"import" IDENT "as" IDENT "search" QQSTRING_START QQSTRING_TEXT QQSTRING_END ';' { + $$ = gen_import(jv_string_value($2), jv_string_value($4), jv_string_value($7)); + jv_free($2); + jv_free($4); + jv_free($7); +} | +"import" IDENT "search" QQSTRING_START QQSTRING_TEXT QQSTRING_END ';' { + $$ = gen_import(jv_string_value($2), NULL, jv_string_value($5)); + jv_free($2); + jv_free($5); +} + FuncDef: "def" IDENT ':' Exp ';' { $$ = gen_function(jv_string_value($2), gen_noop(), $4); @@ -608,41 +640,41 @@ FORMAT { $$ = BLOCK(gen_subexp(gen_const(jv_object())), $2, gen_op_simple(POP)); } | '$' IDENT { - $$ = gen_location(@$, gen_op_unbound(LOADV, jv_string_value($2))); + $$ = gen_location(@$, locations, gen_op_unbound(LOADV, jv_string_value($2))); jv_free($2); } | IDENT { - $$ = gen_location(@$, gen_call(jv_string_value($1), gen_noop())); + $$ = gen_location(@$, locations, gen_call(jv_string_value($1), gen_noop())); jv_free($1); } | IDENT '(' Exp ')' { $$ = gen_call(jv_string_value($1), gen_lambda($3)); - $$ = gen_location(@1, $$); + $$ = gen_location(@1, locations, $$); jv_free($1); } | IDENT '(' Exp ';' Exp ')' { $$ = gen_call(jv_string_value($1), BLOCK(gen_lambda($3), gen_lambda($5))); - $$ = gen_location(@1, $$); + $$ = gen_location(@1, locations, $$); jv_free($1); } | IDENT '(' Exp ';' Exp ';' Exp ')' { $$ = gen_call(jv_string_value($1), BLOCK(gen_lambda($3), gen_lambda($5), gen_lambda($7))); - $$ = gen_location(@1, $$); + $$ = gen_location(@1, locations, $$); jv_free($1); } | IDENT '(' Exp ';' Exp ';' Exp ';' Exp ')' { $$ = gen_call(jv_string_value($1), BLOCK(gen_lambda($3), gen_lambda($5), gen_lambda($7), gen_lambda($9))); - $$ = gen_location(@1, $$); + $$ = gen_location(@1, locations, $$); jv_free($1); } | IDENT '(' Exp ';' Exp ';' Exp ';' Exp ';' Exp ')' { $$ = gen_call(jv_string_value($1), BLOCK(gen_lambda($3), gen_lambda($5), gen_lambda($7), gen_lambda($9), gen_lambda($11))); - $$ = gen_location(@1, $$); + $$ = gen_location(@1, locations, $$); jv_free($1); } | IDENT '(' Exp ';' Exp ';' Exp ';' Exp ';' Exp ';' Exp ')' { $$ = gen_call(jv_string_value($1), BLOCK(gen_lambda($3), gen_lambda($5), gen_lambda($7), gen_lambda($9), gen_lambda($11), gen_lambda($13))); - $$ = gen_location(@1, $$); + $$ = gen_location(@1, locations, $$); jv_free($1); } | '(' error ')' { $$ = gen_noop(); } | @@ -703,6 +735,6 @@ int jq_parse_library(struct locfile* locations, block* answer) { locfile_locate(locations, UNKNOWN_LOCATION, "error: library should only have function definitions, not a main expression"); return 1; } - assert(block_has_only_binders(*answer, OP_IS_CALL_PSEUDO)); + assert(block_has_only_binders_and_imports(*answer, OP_IS_CALL_PSEUDO)); return 0; } diff --git a/tests/run b/tests/run index d3556b63..ea0ec76e 100755 --- a/tests/run +++ b/tests/run @@ -3,12 +3,12 @@ set -e if which valgrind > /dev/null; then - VALGRIND='valgrind --error-exitcode=1 -q --leak-check=full --suppressions=tests/onig.supp' + VALGRIND='valgrind --error-exitcode=1 --leak-check=full --suppressions=tests/onig.supp' else VALGRIND= fi -cat $@ | $VALGRIND ./jq --run-tests +cat $@ | $VALGRIND -q ./jq --run-tests d= trap '[ -n "$d" ] && rm -rf "$d"' EXIT @@ -26,13 +26,66 @@ def g: "bar"; def fg: f+g; EOF -if [ "`HOME=$d $VALGRIND ./jq -nr fg`" != foobar ]; then +cat > "$d/a.jq" < "$d/b.jq" < "$d/c.jq" < "$d/syntaxerror.jq" <&2 exit 1 fi -if [ `HOME=$d $VALGRIND ./jq --debug-dump-disasm -n fg | grep '^[a-z]' | wc -l` -gt 3 ]; then +if [ `HOME=$d $VALGRIND -q ./jq --debug-dump-disasm -n fg | grep '^[a-z]' | wc -l` -gt 3 ]; then echo "Binding too many defs into program" 1>&2 exit 1 fi +if ! $VALGRIND -q ./jq -ner -L $d 'import a as foo; import b as bar; import a as foobar; def fooa: foo::a; [fooa, bar::a, bar::b, foo::a, foobar::a] | . == ["a","b","c","a","a"]' > /dev/null; then + echo "Module system appears to be broken" 1>&2 + exit 1 +fi + +if ! $VALGRIND -q ./jq -ner -L $d 'import c as foo; [foo::a, foo::c] | . == [0,"ac"]' > /dev/null; then + echo "Module system appears to be broken" 1>&2 + exit 1 +fi + +if $VALGRIND ./jq -ner -L $d 'import syntaxerror; .' > $d/out 2>&1; then + echo "Module system appears to be broken" 1>&2 + exit 1 +fi +if ! grep 'ERROR SUMMARY: 0 errors from 0 contexts' $d/out > /dev/null; then + echo "Module system has memory errors when modules have syntax errors" 1>&2 + cat $d/out + exit 1 +fi +if ! grep '^error: syntax error,' $d/out > /dev/null; then + echo "Module system not detecting syntax errors in modules correctly" 1>&2 + exit 1 +fi + +if $VALGRIND ./jq -ner -L $d '%::wat' > $d/out 2>&1 || + ! grep '^error: syntax error,' $d/out > /dev/null; then + echo "Syntax errors not detected?" 1>&2 + exit 1 +fi +if ! grep 'ERROR SUMMARY: 0 errors from 0 contexts' $d/out > /dev/null; then + echo "Memory errors when programs have syntax errors" 1>&2 + cat $d/out + exit 1 +fi diff --git a/util.c b/util.c new file mode 100644 index 00000000..da0b40c4 --- /dev/null +++ b/util.c @@ -0,0 +1,120 @@ + +#ifdef HAVE_MEMMEM +#define _GNU_SOURCE +#include +#endif +#include +#ifndef WIN32 +#include +#endif +#include +#include +#include + + +#include "util.h" +#include "jv.h" + +jv expand_path(jv path) { + assert(jv_get_kind(path) == JV_KIND_STRING); + const char *pstr = jv_string_value(path); + jv ret = path; + if (jv_string_length_bytes(jv_copy(path)) > 1 && pstr[0] == '~' && pstr[1] == '/') { + jv home = get_home(); + if (jv_is_valid(home)) { + ret = jv_string_fmt("%s/%s",jv_string_value(home),pstr+2); + jv_free(home); + } else { + jv emsg = jv_invalid_get_msg(home); + ret = jv_invalid_with_msg(jv_string_fmt("Could not expand %s. (%s)", pstr, jv_string_value(emsg))); + jv_free(emsg); + } + jv_free(path); + } + return ret; +} + +jv get_home() { + jv ret; + char *home = getenv("HOME"); + if (!home) { +#ifndef WIN32 + struct passwd* pwd = getpwuid(getuid()); + if (pwd) + ret = jv_string(pwd->pw_dir); + else + ret = jv_invalid_with_msg(jv_string("Could not find home directory.")); +#else + home = getenv("USERPROFILE"); + if (!home) { + char *hd = getenv("HOMEDRIVE"); + if (!hd) hd = ""; + home = getenv("HOMEPATH"); + if (!home) { + ret = jv_invalid_with_msg(jv_string("Could not find home directory.")); + } else { + ret = jv_string_fmt("%s%s",hd,home); + } + } else { + ret = jv_string(home); + } +#endif + } else { + ret = jv_string(home); + } + return ret; +} + + +jv jq_realpath(jv path) { + int path_max; + char *buf = NULL; +#ifdef _PC_PATH_MAX + path_max = pathconf(jv_string_value(path),_PC_PATH_MAX); +#else + path_max = PATH_MAX; +#endif + if (path_max > 0) { + buf = malloc(sizeof(char) * path_max); + } +#ifdef WIN32 + char *tmp = _fullpath(buf, jv_string_value(path), path_max); +#else + char *tmp = realpath(jv_string_value(path), buf); +#endif + if (tmp == NULL) { + free(buf); + return path; + } + jv_free(path); + path = jv_string(tmp); + free(tmp); + return path; +} + +const void *jq_memmem(const void *haystack, size_t haystacklen, + const void *needle, size_t needlelen) { +#ifdef HAVE_MEMMEM + return (const void*)memmem(haystack, haystacklen, needle, needlelen); +#else + const char *h = haystack; + const char *n = needle; + size_t hi, hi2, ni; + + if (haystacklen < needlelen || haystacklen == 0) + return NULL; + for (hi = 0; hi < (haystacklen - needlelen + 1); hi++) { + for (ni = 0, hi2 = hi; ni < needlelen; ni++, hi2++) { + if (h[hi2] != n[ni]) + goto not_this; + } + + return &h[hi]; + +not_this: + continue; + } + return NULL; +#endif /* !HAVE_MEMMEM */ +} + diff --git a/util.h b/util.h new file mode 100644 index 00000000..f2df3909 --- /dev/null +++ b/util.h @@ -0,0 +1,26 @@ +#ifndef UTIL_H +#define UTIL_H + +#include "jv.h" + +jv expand_path(jv); +jv get_home(void); +jv jq_realpath(jv); + +const void *jq_memmem(const void *haystack, size_t haystacklen, + const void *needle, size_t needlelen); + +#ifndef MIN +#define MIN(a,b) \ + ({ __typeof__ (a) _a = (a); \ + __typeof__ (b) _b = (b); \ + _a < _b ? _a : _b; }) +#endif +#ifndef MAX +#define MAX(a,b) \ + ({ __typeof__ (a) _a = (a); \ + __typeof__ (b) _b = (b); \ + _a > _b ? _a : _b; }) +#endif + +#endif /* UTIL_H */ -- cgit v1.2.3