#include #include #include #include #include "builtin.h" #include "compile.h" #include "jq_parser.h" #include "bytecode.h" #include "locfile.h" #include "jv_unicode.h" static jv type_error(jv bad, const char* msg) { jv err = jv_invalid_with_msg(jv_string_fmt("%s %s", jv_kind_name(jv_get_kind(bad)), msg)); jv_free(bad); return err; } static jv type_error2(jv bad1, jv bad2, const char* msg) { jv err = jv_invalid_with_msg(jv_string_fmt("%s and %s %s", jv_kind_name(jv_get_kind(bad1)), jv_kind_name(jv_get_kind(bad2)), msg)); jv_free(bad1); jv_free(bad2); return err; } static jv f_plus(jv input, jv a, jv b) { jv_free(input); if (jv_get_kind(a) == JV_KIND_NULL) { jv_free(a); return b; } else if (jv_get_kind(b) == JV_KIND_NULL) { jv_free(b); return a; } else if (jv_get_kind(a) == JV_KIND_NUMBER && jv_get_kind(b) == JV_KIND_NUMBER) { return jv_number(jv_number_value(a) + jv_number_value(b)); } else if (jv_get_kind(a) == JV_KIND_STRING && jv_get_kind(b) == JV_KIND_STRING) { return jv_string_concat(a, b); } else if (jv_get_kind(a) == JV_KIND_ARRAY && jv_get_kind(b) == JV_KIND_ARRAY) { return jv_array_concat(a, b); } else if (jv_get_kind(a) == JV_KIND_OBJECT && jv_get_kind(b) == JV_KIND_OBJECT) { return jv_object_merge(a, b); } else { return type_error2(a, b, "cannot be added"); } } static jv f_floor(jv input) { if (jv_get_kind(input) != JV_KIND_NUMBER) { return type_error(input, "cannot be floored"); } jv ret = jv_number(floor(jv_number_value(input))); jv_free(input); return ret; } static jv f_sqrt(jv input) { if (jv_get_kind(input) != JV_KIND_NUMBER) { return type_error(input, "has no square root"); } jv ret = jv_number(sqrt(jv_number_value(input))); jv_free(input); return ret; } static jv f_negate(jv input) { if (jv_get_kind(input) != JV_KIND_NUMBER) { return type_error(input, "cannot be negated"); } jv ret = jv_number(-jv_number_value(input)); jv_free(input); return ret; } static jv f_startswith(jv a, jv b) { int alen = jv_string_length_bytes(jv_copy(a)); int blen = jv_string_length_bytes(jv_copy(b)); jv ret; if (blen <= alen && memcmp(jv_string_value(a), jv_string_value(b), blen) == 0) ret = jv_true(); else ret = jv_false(); jv_free(a); jv_free(b); return ret; } static jv f_endswith(jv a, jv b) { const char *astr = jv_string_value(a); const char *bstr = jv_string_value(b); size_t alen = jv_string_length_bytes(jv_copy(a)); size_t blen = jv_string_length_bytes(jv_copy(b)); jv ret;; if (alen < blen || memcmp(astr + (alen - blen), bstr, blen) != 0) ret = jv_false(); else ret = jv_true(); jv_free(a); jv_free(b); return ret; } static jv f_ltrimstr(jv input, jv left) { if (jv_get_kind(f_startswith(jv_copy(input), jv_copy(left))) != JV_KIND_TRUE) { jv_free(left); return input; } /* * FIXME It'd be better to share the suffix with the original input -- * that we could do, we just can't share prefixes. */ int prefixlen = jv_string_length_bytes(left); jv res = jv_string_sized(jv_string_value(input) + prefixlen, jv_string_length_bytes(jv_copy(input)) - prefixlen); jv_free(input); return res; } static jv f_rtrimstr(jv input, jv right) { if (jv_get_kind(f_endswith(jv_copy(input), jv_copy(right))) == JV_KIND_TRUE) { jv res = jv_string_sized(jv_string_value(input), jv_string_length_bytes(jv_copy(input)) - jv_string_length_bytes(right)); jv_free(input); return res; } jv_free(right); return input; } static jv f_minus(jv input, jv a, jv b) { jv_free(input); if (jv_get_kind(a) == JV_KIND_NUMBER && jv_get_kind(b) == JV_KIND_NUMBER) { return jv_number(jv_number_value(a) - jv_number_value(b)); } else if (jv_get_kind(a) == JV_KIND_ARRAY && jv_get_kind(b) == JV_KIND_ARRAY) { jv out = jv_array(); jv_array_foreach(a, i, x) { int include = 1; jv_array_foreach(b, j, y) { if (jv_equal(jv_copy(x), y)) { include = 0; break; } } if (include) out = jv_array_append(out, jv_copy(x)); jv_free(x); } jv_free(a); jv_free(b); return out; } else { return type_error2(a, b, "cannot be subtracted"); } } static jv f_multiply(jv input, jv a, jv b) { jv_free(input); if (jv_get_kind(a) == JV_KIND_NUMBER && jv_get_kind(b) == JV_KIND_NUMBER) { return jv_number(jv_number_value(a) * jv_number_value(b)); } else if (jv_get_kind(a) == JV_KIND_STRING && jv_get_kind(b) == JV_KIND_NUMBER) { int n; size_t alen = jv_string_length_bytes(jv_copy(a)); jv res = a; for (n = jv_number_value(b) - 1; n > 0; n--) res = jv_string_append_buf(res, jv_string_value(a), alen); if (n < 0) { jv_free(a); return jv_null(); } return res; } else { return type_error2(a, b, "cannot be multiplied"); } } static jv f_divide(jv input, jv a, jv b) { jv_free(input); if (jv_get_kind(a) == JV_KIND_NUMBER && jv_get_kind(b) == JV_KIND_NUMBER) { return jv_number(jv_number_value(a) / jv_number_value(b)); } else if (jv_get_kind(a) == JV_KIND_STRING && jv_get_kind(b) == JV_KIND_STRING) { return jv_string_split(a, b); } else { return type_error2(a, b, "cannot be divided"); } } static jv f_mod(jv input, jv a, jv b) { jv_free(input); if (jv_get_kind(a) == JV_KIND_NUMBER && jv_get_kind(b) == JV_KIND_NUMBER) { return jv_number((intmax_t)jv_number_value(a) % (intmax_t)jv_number_value(b)); } else { return type_error2(a, b, "cannot be divided"); } } static jv f_equal(jv input, jv a, jv b) { jv_free(input); return jv_bool(jv_equal(a, b)); } static jv f_notequal(jv input, jv a, jv b) { jv_free(input); return jv_bool(!jv_equal(a, b)); } enum cmp_op { CMP_OP_LESS, CMP_OP_GREATER, CMP_OP_LESSEQ, CMP_OP_GREATEREQ }; static jv order_cmp(jv input, jv a, jv b, enum cmp_op op) { jv_free(input); int r = jv_cmp(a, b); return jv_bool((op == CMP_OP_LESS && r < 0) || (op == CMP_OP_LESSEQ && r <= 0) || (op == CMP_OP_GREATEREQ && r >= 0) || (op == CMP_OP_GREATER && r > 0)); } static jv f_less(jv input, jv a, jv b) { return order_cmp(input, a, b, CMP_OP_LESS); } static jv f_greater(jv input, jv a, jv b) { return order_cmp(input, a, b, CMP_OP_GREATER); } static jv f_lesseq(jv input, jv a, jv b) { return order_cmp(input, a, b, CMP_OP_LESSEQ); } static jv f_greatereq(jv input, jv a, jv b) { return order_cmp(input, a, b, CMP_OP_GREATEREQ); } static jv f_contains(jv a, jv b) { if (jv_get_kind(a) == jv_get_kind(b)) { return jv_bool(jv_contains(a, b)); } else { return type_error2(a, b, "cannot have their containment checked"); } } static jv f_dump(jv input) { return jv_dump_string(input, 0); } static jv f_json_parse(jv input) { if (jv_get_kind(input) != JV_KIND_STRING) return type_error(input, "only strings can be parsed"); jv res = jv_parse_sized(jv_string_value(input), jv_string_length_bytes(jv_copy(input))); jv_free(input); return res; } static jv f_tonumber(jv input) { if (jv_get_kind(input) == JV_KIND_NUMBER) { return input; } if (jv_get_kind(input) == JV_KIND_STRING) { jv parsed = jv_parse(jv_string_value(input)); if (!jv_is_valid(parsed) || jv_get_kind(parsed) == JV_KIND_NUMBER) { jv_free(input); return parsed; } } return type_error(input, "cannot be parsed as a number"); } static jv f_length(jv input) { if (jv_get_kind(input) == JV_KIND_ARRAY) { return jv_number(jv_array_length(input)); } else if (jv_get_kind(input) == JV_KIND_OBJECT) { return jv_number(jv_object_length(input)); } else if (jv_get_kind(input) == JV_KIND_STRING) { return jv_number(jv_string_length_codepoints(input)); } else if (jv_get_kind(input) == JV_KIND_NUMBER) { return jv_number(fabs(jv_number_value(input))); } else if (jv_get_kind(input) == JV_KIND_NULL) { jv_free(input); return jv_number(0); } else { return type_error(input, "has no length"); } } static jv f_tostring(jv input) { if (jv_get_kind(input) == JV_KIND_STRING) { return input; } else { return jv_dump_string(input, 0); } } #define CHARS_ALPHANUM "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789" static jv escape_string(jv input, const char* escapings) { assert(jv_get_kind(input) == JV_KIND_STRING); const char* lookup[128] = {0}; const char* p = escapings; while (*p) { lookup[(int)*p] = p+1; p++; p += strlen(p); p++; } jv ret = jv_string(""); const char* i = jv_string_value(input); const char* end = i + jv_string_length_bytes(jv_copy(input)); const char* cstart; int c = 0; while ((i = jvp_utf8_next((cstart = i), end, &c))) { assert(c != -1); if (c < 128 && lookup[c]) { ret = jv_string_append_str(ret, lookup[c]); } else { ret = jv_string_append_buf(ret, cstart, i - cstart); } } jv_free(input); return ret; } static jv f_format(jv input, jv fmt) { if (jv_get_kind(fmt) != JV_KIND_STRING) { jv_free(input); return type_error(fmt, "is not a valid format"); } const char* fmt_s = jv_string_value(fmt); if (!strcmp(fmt_s, "json")) { jv_free(fmt); return jv_dump_string(input, 0); } else if (!strcmp(fmt_s, "text")) { jv_free(fmt); return f_tostring(input); } else if (!strcmp(fmt_s, "csv")) { jv_free(fmt); if (jv_get_kind(input) != JV_KIND_ARRAY) return type_error(input, "cannot be csv-formatted, only array"); jv line = jv_string(""); jv_array_foreach(input, i, x) { if (i) line = jv_string_append_str(line, ","); switch (jv_get_kind(x)) { case JV_KIND_NULL: /* null rendered as empty string */ jv_free(x); break; case JV_KIND_TRUE: case JV_KIND_FALSE: line = jv_string_concat(line, jv_dump_string(x, 0)); break; case JV_KIND_NUMBER: if (jv_number_value(x) != jv_number_value(x)) { /* NaN, render as empty string */ jv_free(x); } else { line = jv_string_concat(line, jv_dump_string(x, 0)); } break; case JV_KIND_STRING: { line = jv_string_append_str(line, "\""); line = jv_string_concat(line, escape_string(x, "\"\"\"\0")); line = jv_string_append_str(line, "\""); break; } default: jv_free(input); jv_free(line); return type_error(x, "is not valid in a csv row"); } } jv_free(input); return line; } else if (!strcmp(fmt_s, "html")) { jv_free(fmt); return escape_string(f_tostring(input), "&&\0<<\0>>\0''\0\""\0"); } else if (!strcmp(fmt_s, "uri")) { jv_free(fmt); input = f_tostring(input); int unreserved[128] = {0}; const char* p = CHARS_ALPHANUM "-_.!~*'()"; while (*p) unreserved[(int)*p++] = 1; jv line = jv_string(""); const char* s = jv_string_value(input); for (int i=0; i= 3 ? 3 : len-i; for (int j=0; j<3; j++) { code <<= 8; code |= j < n ? (unsigned)data[i+j] : 0; } char buf[4]; for (int j=0; j<4; j++) { buf[j] = b64[(code >> (18 - j*6)) & 0x3f]; } if (n < 3) buf[3] = '='; if (n < 2) buf[2] = '='; line = jv_string_append_buf(line, buf, sizeof(buf)); } jv_free(input); return line; } else { jv_free(input); return jv_invalid_with_msg(jv_string_concat(fmt, jv_string(" is not a valid format"))); } } static jv f_keys(jv input) { if (jv_get_kind(input) == JV_KIND_OBJECT || jv_get_kind(input) == JV_KIND_ARRAY) { return jv_keys(input); } else { return type_error(input, "has no keys"); } } static jv f_sort(jv input){ if (jv_get_kind(input) == JV_KIND_ARRAY) { return jv_sort(input, jv_copy(input)); } else { return type_error(input, "cannot be sorted, as it is not an array"); } } static jv f_sort_by_impl(jv input, jv keys) { if (jv_get_kind(input) == JV_KIND_ARRAY && jv_get_kind(keys) == JV_KIND_ARRAY && jv_array_length(jv_copy(input)) == jv_array_length(jv_copy(keys))) { return jv_sort(input, keys); } else { return type_error2(input, keys, "cannot be sorted, as they are not both arrays"); } } static jv f_group_by_impl(jv input, jv keys) { if (jv_get_kind(input) == JV_KIND_ARRAY && jv_get_kind(keys) == JV_KIND_ARRAY && jv_array_length(jv_copy(input)) == jv_array_length(jv_copy(keys))) { return jv_group(input, keys); } else { return type_error2(input, keys, "cannot be sorted, as they are not both arrays"); } } static jv minmax_by(jv values, jv keys, int is_min) { if (jv_get_kind(values) != JV_KIND_ARRAY) return type_error2(values, keys, "cannot be iterated over"); if (jv_get_kind(keys) != JV_KIND_ARRAY) return type_error2(values, keys, "cannot be iterated over"); if (jv_array_length(jv_copy(values)) != jv_array_length(jv_copy(keys))) return type_error2(values, keys, "have wrong length"); if (jv_array_length(jv_copy(values)) == 0) { jv_free(values); jv_free(keys); return jv_null(); } jv ret = jv_array_get(jv_copy(values), 0); jv retkey = jv_array_get(jv_copy(keys), 0); for (int i=1; i 0);", "def leaf_paths: . as $dot|paths|select(. as $p|$dot|getpath($p)|type|. != \"array\" and . != \"object\");", "def any: reduce .[] as $i (false; . or $i);", "def all: reduce .[] as $i (true; . and $i);", }; static int builtins_bind_one(jq_state *jq, block* bb, const char* code) { struct locfile src; locfile_init(&src, jq, code, strlen(code)); block funcs; int nerrors = jq_parse_library(&src, &funcs); if (nerrors == 0) { *bb = block_bind_referenced(funcs, *bb, OP_IS_CALL_PSEUDO); } locfile_free(&src); return nerrors; } static int slurp_lib(jq_state *jq, block* bb) { int nerrors = 0; char* home = getenv("HOME"); if (home) { // silently ignore no $HOME jv filename = jv_string_append_str(jv_string(home), "/.jq"); jv data = jv_load_file(jv_string_value(filename), 1); if (jv_is_valid(data)) { nerrors = builtins_bind_one(jq, bb, jv_string_value(data) ); } jv_free(filename); jv_free(data); } return nerrors; } int builtins_bind(jq_state *jq, block* bb) { int nerrors = slurp_lib(jq, bb); if (nerrors) { block_free(*bb); return nerrors; } for (int i=(int)(sizeof(jq_builtins)/sizeof(jq_builtins[0]))-1; i>=0; i--) { nerrors = builtins_bind_one(jq, bb, jq_builtins[i]); assert(!nerrors); } *bb = bind_bytecoded_builtins(*bb); *bb = gen_cbinding(function_list, sizeof(function_list)/sizeof(function_list[0]), *bb); return nerrors; }