#include #include #include #include #include #include #include "builtin.h" #include "compile.h" #include "jq_parser.h" #include "bytecode.h" #include "linker.h" #include "locfile.h" #include "jv_unicode.h" static jv type_error(jv bad, const char* msg) { jv err = jv_invalid_with_msg(jv_string_fmt("%s %s", jv_kind_name(jv_get_kind(bad)), msg)); jv_free(bad); return err; } static jv type_error2(jv bad1, jv bad2, const char* msg) { jv err = jv_invalid_with_msg(jv_string_fmt("%s and %s %s", jv_kind_name(jv_get_kind(bad1)), jv_kind_name(jv_get_kind(bad2)), msg)); jv_free(bad1); jv_free(bad2); return err; } static jv f_plus(jq_state *jq, jv input, jv a, jv b) { jv_free(input); if (jv_get_kind(a) == JV_KIND_NULL) { jv_free(a); return b; } else if (jv_get_kind(b) == JV_KIND_NULL) { jv_free(b); return a; } else if (jv_get_kind(a) == JV_KIND_NUMBER && jv_get_kind(b) == JV_KIND_NUMBER) { return jv_number(jv_number_value(a) + jv_number_value(b)); } else if (jv_get_kind(a) == JV_KIND_STRING && jv_get_kind(b) == JV_KIND_STRING) { return jv_string_concat(a, b); } else if (jv_get_kind(a) == JV_KIND_ARRAY && jv_get_kind(b) == JV_KIND_ARRAY) { return jv_array_concat(a, b); } else if (jv_get_kind(a) == JV_KIND_OBJECT && jv_get_kind(b) == JV_KIND_OBJECT) { return jv_object_merge(a, b); } else { return type_error2(a, b, "cannot be added"); } } #define LIBM_DD(name) \ static jv f_ ## name(jq_state *jq, jv input) { \ if (jv_get_kind(input) != JV_KIND_NUMBER) { \ return type_error(input, "number required"); \ } \ jv ret = jv_number(name(jv_number_value(input))); \ jv_free(input); \ return ret; \ } #include "libm.h" #undef LIBM_DD static jv f_negate(jq_state *jq, jv input) { if (jv_get_kind(input) != JV_KIND_NUMBER) { return type_error(input, "cannot be negated"); } jv ret = jv_number(-jv_number_value(input)); jv_free(input); return ret; } static jv f_startswith(jq_state *jq, jv a, jv b) { int alen = jv_string_length_bytes(jv_copy(a)); int blen = jv_string_length_bytes(jv_copy(b)); jv ret; if (blen <= alen && memcmp(jv_string_value(a), jv_string_value(b), blen) == 0) ret = jv_true(); else ret = jv_false(); jv_free(a); jv_free(b); return ret; } static jv f_endswith(jq_state *jq, jv a, jv b) { const char *astr = jv_string_value(a); const char *bstr = jv_string_value(b); size_t alen = jv_string_length_bytes(jv_copy(a)); size_t blen = jv_string_length_bytes(jv_copy(b)); jv ret;; if (alen < blen || memcmp(astr + (alen - blen), bstr, blen) != 0) ret = jv_false(); else ret = jv_true(); jv_free(a); jv_free(b); return ret; } static jv f_ltrimstr(jq_state *jq, jv input, jv left) { if (jv_get_kind(f_startswith(jq, jv_copy(input), jv_copy(left))) != JV_KIND_TRUE) { jv_free(left); return input; } /* * FIXME It'd be better to share the suffix with the original input -- * that we could do, we just can't share prefixes. */ int prefixlen = jv_string_length_bytes(left); jv res = jv_string_sized(jv_string_value(input) + prefixlen, jv_string_length_bytes(jv_copy(input)) - prefixlen); jv_free(input); return res; } static jv f_rtrimstr(jq_state *jq, jv input, jv right) { if (jv_get_kind(f_endswith(jq, jv_copy(input), jv_copy(right))) == JV_KIND_TRUE) { jv res = jv_string_sized(jv_string_value(input), jv_string_length_bytes(jv_copy(input)) - jv_string_length_bytes(right)); jv_free(input); return res; } jv_free(right); return input; } static jv f_minus(jq_state *jq, jv input, jv a, jv b) { jv_free(input); if (jv_get_kind(a) == JV_KIND_NUMBER && jv_get_kind(b) == JV_KIND_NUMBER) { return jv_number(jv_number_value(a) - jv_number_value(b)); } else if (jv_get_kind(a) == JV_KIND_ARRAY && jv_get_kind(b) == JV_KIND_ARRAY) { jv out = jv_array(); jv_array_foreach(a, i, x) { int include = 1; jv_array_foreach(b, j, y) { if (jv_equal(jv_copy(x), y)) { include = 0; break; } } if (include) out = jv_array_append(out, jv_copy(x)); jv_free(x); } jv_free(a); jv_free(b); return out; } else { return type_error2(a, b, "cannot be subtracted"); } } static jv f_multiply(jq_state *jq, jv input, jv a, jv b) { jv_kind ak = jv_get_kind(a); jv_kind bk = jv_get_kind(b); jv_free(input); if (ak == JV_KIND_NUMBER && bk == JV_KIND_NUMBER) { return jv_number(jv_number_value(a) * jv_number_value(b)); } else if ((ak == JV_KIND_STRING && bk == JV_KIND_NUMBER) || (ak == JV_KIND_NUMBER && bk == JV_KIND_STRING)) { jv str = a; jv num = b; if (ak == JV_KIND_NUMBER) { str = b; num = a; } int n; size_t alen = jv_string_length_bytes(jv_copy(str)); jv res = str; for (n = jv_number_value(num) - 1; n > 0; n--) res = jv_string_append_buf(res, jv_string_value(str), alen); jv_free(num); if (n < 0) { jv_free(str); return jv_null(); } return res; } else if (ak == JV_KIND_OBJECT && bk == JV_KIND_OBJECT) { return jv_object_merge_recursive(a, b); } else { return type_error2(a, b, "cannot be multiplied"); } } static jv f_divide(jq_state *jq, jv input, jv a, jv b) { jv_free(input); if (jv_get_kind(a) == JV_KIND_NUMBER && jv_get_kind(b) == JV_KIND_NUMBER) { return jv_number(jv_number_value(a) / jv_number_value(b)); } else if (jv_get_kind(a) == JV_KIND_STRING && jv_get_kind(b) == JV_KIND_STRING) { return jv_string_split(a, b); } else { return type_error2(a, b, "cannot be divided"); } } static jv f_mod(jq_state *jq, jv input, jv a, jv b) { jv_free(input); if (jv_get_kind(a) == JV_KIND_NUMBER && jv_get_kind(b) == JV_KIND_NUMBER) { return jv_number((intmax_t)jv_number_value(a) % (intmax_t)jv_number_value(b)); } else { return type_error2(a, b, "cannot be divided"); } } static jv f_equal(jq_state *jq, jv input, jv a, jv b) { jv_free(input); return jv_bool(jv_equal(a, b)); } static jv f_notequal(jq_state *jq, jv input, jv a, jv b) { jv_free(input); return jv_bool(!jv_equal(a, b)); } enum cmp_op { CMP_OP_LESS, CMP_OP_GREATER, CMP_OP_LESSEQ, CMP_OP_GREATEREQ }; static jv order_cmp(jv input, jv a, jv b, enum cmp_op op) { jv_free(input); int r = jv_cmp(a, b); return jv_bool((op == CMP_OP_LESS && r < 0) || (op == CMP_OP_LESSEQ && r <= 0) || (op == CMP_OP_GREATEREQ && r >= 0) || (op == CMP_OP_GREATER && r > 0)); } static jv f_less(jq_state *jq, jv input, jv a, jv b) { return order_cmp(input, a, b, CMP_OP_LESS); } static jv f_greater(jq_state *jq, jv input, jv a, jv b) { return order_cmp(input, a, b, CMP_OP_GREATER); } static jv f_lesseq(jq_state *jq, jv input, jv a, jv b) { return order_cmp(input, a, b, CMP_OP_LESSEQ); } static jv f_greatereq(jq_state *jq, jv input, jv a, jv b) { return order_cmp(input, a, b, CMP_OP_GREATEREQ); } static jv f_contains(jq_state *jq, jv a, jv b) { if (jv_get_kind(a) == jv_get_kind(b)) { return jv_bool(jv_contains(a, b)); } else { return type_error2(a, b, "cannot have their containment checked"); } } static jv f_dump(jq_state *jq, jv input) { return jv_dump_string(input, 0); } static jv f_json_parse(jq_state *jq, jv input) { if (jv_get_kind(input) != JV_KIND_STRING) return type_error(input, "only strings can be parsed"); jv res = jv_parse_sized(jv_string_value(input), jv_string_length_bytes(jv_copy(input))); jv_free(input); return res; } static jv f_tonumber(jq_state *jq, jv input) { if (jv_get_kind(input) == JV_KIND_NUMBER) { return input; } if (jv_get_kind(input) == JV_KIND_STRING) { jv parsed = jv_parse(jv_string_value(input)); if (!jv_is_valid(parsed) || jv_get_kind(parsed) == JV_KIND_NUMBER) { jv_free(input); return parsed; } } return type_error(input, "cannot be parsed as a number"); } static jv f_length(jq_state *jq, jv input) { if (jv_get_kind(input) == JV_KIND_ARRAY) { return jv_number(jv_array_length(input)); } else if (jv_get_kind(input) == JV_KIND_OBJECT) { return jv_number(jv_object_length(input)); } else if (jv_get_kind(input) == JV_KIND_STRING) { return jv_number(jv_string_length_codepoints(input)); } else if (jv_get_kind(input) == JV_KIND_NUMBER) { return jv_number(fabs(jv_number_value(input))); } else if (jv_get_kind(input) == JV_KIND_NULL) { jv_free(input); return jv_number(0); } else { return type_error(input, "has no length"); } } static jv f_tostring(jq_state *jq, jv input) { if (jv_get_kind(input) == JV_KIND_STRING) { return input; } else { return jv_dump_string(input, 0); } } #define CHARS_ALPHANUM "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789" static jv escape_string(jv input, const char* escapings) { assert(jv_get_kind(input) == JV_KIND_STRING); const char* lookup[128] = {0}; const char* p = escapings; while (*p) { lookup[(int)*p] = p+1; p++; p += strlen(p); p++; } jv ret = jv_string(""); const char* i = jv_string_value(input); const char* end = i + jv_string_length_bytes(jv_copy(input)); const char* cstart; int c = 0; while ((i = jvp_utf8_next((cstart = i), end, &c))) { assert(c > 0); if (c < 128 && lookup[c]) { ret = jv_string_append_str(ret, lookup[c]); } else { ret = jv_string_append_buf(ret, cstart, i - cstart); } } jv_free(input); return ret; } static jv f_format(jq_state *jq, jv input, jv fmt) { if (jv_get_kind(fmt) != JV_KIND_STRING) { jv_free(input); return type_error(fmt, "is not a valid format"); } const char* fmt_s = jv_string_value(fmt); if (!strcmp(fmt_s, "json")) { jv_free(fmt); return jv_dump_string(input, 0); } else if (!strcmp(fmt_s, "text")) { jv_free(fmt); return f_tostring(jq, input); } else if (!strcmp(fmt_s, "csv") || !strcmp(fmt_s, "tsv")) { const char *quotes, *sep, *escapings; if (!strcmp(fmt_s, "csv")) { quotes = "\""; sep = ","; escapings = "\"\"\"\0"; } else { assert(!strcmp(fmt_s, "tsv")); quotes = ""; sep = "\t"; escapings = "\t\\t\0"; } jv_free(fmt); if (jv_get_kind(input) != JV_KIND_ARRAY) return type_error(input, "cannot be csv-formatted, only array"); jv line = jv_string(""); jv_array_foreach(input, i, x) { if (i) line = jv_string_append_str(line, sep); switch (jv_get_kind(x)) { case JV_KIND_NULL: /* null rendered as empty string */ jv_free(x); break; case JV_KIND_TRUE: case JV_KIND_FALSE: line = jv_string_concat(line, jv_dump_string(x, 0)); break; case JV_KIND_NUMBER: if (jv_number_value(x) != jv_number_value(x)) { /* NaN, render as empty string */ jv_free(x); } else { line = jv_string_concat(line, jv_dump_string(x, 0)); } break; case JV_KIND_STRING: { line = jv_string_append_str(line, quotes); line = jv_string_concat(line, escape_string(x, escapings)); line = jv_string_append_str(line, quotes); break; } default: jv_free(input); jv_free(line); return type_error(x, "is not valid in a csv row"); } } jv_free(input); return line; } else if (!strcmp(fmt_s, "html")) { jv_free(fmt); return escape_string(f_tostring(jq, input), "&&\0<<\0>>\0''\0\""\0"); } else if (!strcmp(fmt_s, "uri")) { jv_free(fmt); input = f_tostring(jq, input); int unreserved[128] = {0}; const char* p = CHARS_ALPHANUM "-_.!~*'()"; while (*p) unreserved[(int)*p++] = 1; jv line = jv_string(""); const char* s = jv_string_value(input); for (int i=0; i= 3 ? 3 : len-i; for (int j=0; j<3; j++) { code <<= 8; code |= j < n ? (unsigned)data[i+j] : 0; } char buf[4]; for (int j=0; j<4; j++) { buf[j] = b64[(code >> (18 - j*6)) & 0x3f]; } if (n < 3) buf[3] = '='; if (n < 2) buf[2] = '='; line = jv_string_append_buf(line, buf, sizeof(buf)); } jv_free(input); return line; } else { jv_free(input); return jv_invalid_with_msg(jv_string_concat(fmt, jv_string(" is not a valid format"))); } } static jv f_keys(jq_state *jq, jv input) { if (jv_get_kind(input) == JV_KIND_OBJECT || jv_get_kind(input) == JV_KIND_ARRAY) { return jv_keys(input); } else { return type_error(input, "has no keys"); } } static jv f_keys_unsorted(jq_state *jq, jv input) { if (jv_get_kind(input) == JV_KIND_OBJECT || jv_get_kind(input) == JV_KIND_ARRAY) { return jv_keys_unsorted(input); } else { return type_error(input, "has no keys"); } } static jv f_sort(jq_state *jq, jv input){ if (jv_get_kind(input) == JV_KIND_ARRAY) { return jv_sort(input, jv_copy(input)); } else { return type_error(input, "cannot be sorted, as it is not an array"); } } static jv f_sort_by_impl(jq_state *jq, jv input, jv keys) { if (jv_get_kind(input) == JV_KIND_ARRAY && jv_get_kind(keys) == JV_KIND_ARRAY && jv_array_length(jv_copy(input)) == jv_array_length(jv_copy(keys))) { return jv_sort(input, keys); } else { return type_error2(input, keys, "cannot be sorted, as they are not both arrays"); } } static jv f_group_by_impl(jq_state *jq, jv input, jv keys) { if (jv_get_kind(input) == JV_KIND_ARRAY && jv_get_kind(keys) == JV_KIND_ARRAY && jv_array_length(jv_copy(input)) == jv_array_length(jv_copy(keys))) { return jv_group(input, keys); } else { return type_error2(input, keys, "cannot be sorted, as they are not both arrays"); } } static int f_match_name_iter(const UChar* name, const UChar *name_end, int ngroups, int *groups, regex_t *reg, void *arg) { jv captures = *(jv*)arg; for (int i = 0; i < ngroups; ++i) { jv cap = jv_array_get(jv_copy(captures),groups[i]-1); if (jv_get_kind(cap) == JV_KIND_OBJECT) { cap = jv_object_set(cap, jv_string("name"), jv_string_sized((const char*)name, name_end-name)); captures = jv_array_set(captures,groups[i]-1,cap); } else { jv_free(cap); } } *(jv *)arg = captures; return 0; } static jv f_match(jq_state *jq, jv input, jv regex, jv modifiers, jv testmode) { int test = jv_equal(testmode, jv_true()); jv result; int onigret; int global = 0; regex_t *reg; OnigErrorInfo einfo; OnigRegion* region; if (jv_get_kind(input) != JV_KIND_STRING) { jv_free(regex); jv_free(modifiers); return type_error(input, "cannot be matched, as it is not a string"); } if (jv_get_kind(regex) != JV_KIND_STRING) { jv_free(input); jv_free(modifiers); return type_error(regex, "is not a string"); } OnigOptionType options = ONIG_OPTION_CAPTURE_GROUP; if (jv_get_kind(modifiers) == JV_KIND_STRING) { jv modarray = jv_string_explode(jv_copy(modifiers)); jv_array_foreach(modarray, i, mod) { switch ((int)jv_number_value(mod)) { case 'g': global = 1; break; case 'i': options |= ONIG_OPTION_IGNORECASE; break; case 'x': options |= ONIG_OPTION_EXTEND; break; case 'm': options |= ONIG_OPTION_MULTILINE; break; case 's': options |= ONIG_OPTION_SINGLELINE; break; case 'p': options |= ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE; break; case 'l': options |= ONIG_OPTION_FIND_LONGEST; break; case 'n': options |= ONIG_OPTION_FIND_NOT_EMPTY; break; default: jv_free(input); jv_free(regex); jv_free(modarray); return jv_invalid_with_msg(jv_string_concat(modifiers, jv_string(" is not a valid modifier string"))); } } jv_free(modarray); } else if (jv_get_kind(modifiers) != JV_KIND_NULL) { // If it isn't a string or null, then it is the wrong type... jv_free(input); jv_free(regex); return type_error(modifiers, "is not a string"); } jv_free(modifiers); onigret = onig_new(®, (const UChar*)jv_string_value(regex), (const UChar*)(jv_string_value(regex) + jv_string_length_bytes(jv_copy(regex))), options, ONIG_ENCODING_UTF8, ONIG_SYNTAX_PERL_NG, &einfo); if (onigret != ONIG_NORMAL) { UChar ebuf[ONIG_MAX_ERROR_MESSAGE_LEN]; onig_error_code_to_str(ebuf, onigret, &einfo); jv_free(input); jv_free(regex); return jv_invalid_with_msg(jv_string_concat(jv_string("Regex failure: "), jv_string((char*)ebuf))); } if (!test) result = jv_array(); const char *input_string = jv_string_value(input); const UChar* start = (const UChar*)jv_string_value(input); const unsigned long length = jv_string_length_bytes(jv_copy(input)); const UChar* end = start + length; region = onig_region_new(); do { onigret = onig_search(reg, (const UChar*)jv_string_value(input), end, /* string boundaries */ start, end, /* search boundaries */ region, ONIG_OPTION_NONE); if (onigret >= 0) { if (test) { result = jv_true(); break; } // Zero-width match if (region->end[0] == region->beg[0]) { unsigned long idx; const char *fr = (const char*)input_string; for (idx = 0; fr != input_string+region->beg[0]; idx++) { fr += jvp_utf8_decode_length(*fr); } jv match = jv_object_set(jv_object(), jv_string("offset"), jv_number(idx)); match = jv_object_set(match, jv_string("length"), jv_number(0)); match = jv_object_set(match, jv_string("string"), jv_string("")); match = jv_object_set(match, jv_string("captures"), jv_array()); result = jv_array_append(result, match); start += 1; continue; } unsigned long idx; unsigned long len; const char *fr = (const char*)input_string; for (idx = len = 0; fr < input_string+region->end[0]; len++) { if (fr == input_string+region->beg[0]) idx = len, len=0; fr += jvp_utf8_decode_length(*fr); } jv match = jv_object_set(jv_object(), jv_string("offset"), jv_number(idx)); unsigned long blen = region->end[0]-region->beg[0]; match = jv_object_set(match, jv_string("length"), jv_number(len)); match = jv_object_set(match, jv_string("string"), jv_string_sized(input_string+region->beg[0],blen)); jv captures = jv_array(); for (int i = 1; i < region->num_regs; ++i) { // Empty capture. if (region->beg[i] == region->end[i]) { // Didn't match. jv cap; if (region->beg[i] == -1) { cap = jv_object_set(jv_object(), jv_string("offset"), jv_number(-1)); cap = jv_object_set(cap, jv_string("string"), jv_null()); } else { fr = input_string; for (idx = 0; fr != input_string+region->beg[i]; idx++) { fr += jvp_utf8_decode_length(*fr); } cap = jv_object_set(jv_object(), jv_string("offset"), jv_number(idx)); cap = jv_object_set(cap, jv_string("string"), jv_string("")); } cap = jv_object_set(cap, jv_string("length"), jv_number(0)); cap = jv_object_set(cap, jv_string("name"), jv_null()); captures = jv_array_append(captures, cap); continue; } fr = input_string; for (idx = len = 0; fr != input_string+region->end[i]; len++) { if (fr == input_string+region->beg[i]) idx = len, len=0; fr += jvp_utf8_decode_length(*fr); } blen = region->end[i]-region->beg[i]; jv cap = jv_object_set(jv_object(), jv_string("offset"), jv_number(idx)); cap = jv_object_set(cap, jv_string("length"), jv_number(len)); cap = jv_object_set(cap, jv_string("string"), jv_string_sized(input_string+region->beg[i],blen)); cap = jv_object_set(cap, jv_string("name"), jv_null()); captures = jv_array_append(captures,cap); } onig_foreach_name(reg,f_match_name_iter,&captures); match = jv_object_set(match, jv_string("captures"), captures); result = jv_array_append(result, match); start = (const UChar*)(input_string+region->end[0]); onig_region_free(region,0); } else if (onigret == ONIG_MISMATCH) { if (test) result = jv_false(); break; } else { /* Error */ UChar ebuf[ONIG_MAX_ERROR_MESSAGE_LEN]; onig_error_code_to_str(ebuf, onigret, einfo); jv_free(result); result = jv_invalid_with_msg(jv_string_concat(jv_string("Regex failure: "), jv_string((char*)ebuf))); break; } } while (global && start != end); onig_region_free(region,1); region = NULL; if (region) onig_region_free(region,1); onig_free(reg); jv_free(input); jv_free(regex); return result; } static jv minmax_by(jv values, jv keys, int is_min) { if (jv_get_kind(values) != JV_KIND_ARRAY) return type_error2(values, keys, "cannot be iterated over"); if (jv_get_kind(keys) != JV_KIND_ARRAY) return type_error2(values, keys, "cannot be iterated over"); if (jv_array_length(jv_copy(values)) != jv_array_length(jv_copy(keys))) return type_error2(values, keys, "have wrong length"); if (jv_array_length(jv_copy(values)) == 0) { jv_free(values); jv_free(keys); return jv_null(); } jv ret = jv_array_get(jv_copy(values), 0); jv retkey = jv_array_get(jv_copy(keys), 0); for (int i=1; i=0; i--) { nerrors = builtins_bind_one(jq, bb, jq_builtins[i]); assert(!nerrors); } *bb = bind_bytecoded_builtins(*bb); *bb = gen_cbinding(function_list, sizeof(function_list)/sizeof(function_list[0]), *bb); return nerrors; }