diff options
author | Shaun Guth <sguth@practicefusion.com> | 2016-01-24 23:30:28 +0000 |
---|---|---|
committer | William Langford <wlangfor@gmail.com> | 2017-02-12 16:03:59 -0500 |
commit | e0b784ac6d7885669846337faad446be4ed0ded8 (patch) | |
tree | e9437d2c7f2d71e6d9fe6fd47e69932193156ac0 | |
parent | dc679081fa770c260ca9a569a8a4fdbb10bcdc20 (diff) |
Add @base64d for decoding base64 #47
-rw-r--r-- | AUTHORS | 1 | ||||
-rw-r--r-- | Makefile.am | 2 | ||||
-rw-r--r-- | appveyor.yml | 2 | ||||
-rw-r--r-- | docs/content/3.manual/manual.yml | 13 | ||||
-rw-r--r-- | src/builtin.c | 64 | ||||
-rw-r--r-- | tests/base64.test | 35 | ||||
-rwxr-xr-x | tests/base64test | 5 | ||||
-rw-r--r-- | tests/jq.test | 7 |
8 files changed, 124 insertions, 5 deletions
@@ -58,6 +58,7 @@ Ryoichi KATO <ryo1kato@gmail.com> - doc fixes Rémy Léone <remy.leone@gmail.com> - add .travis.yml Santiago Lapresta <santiago.lapresta@gmail.com> - join, arrays, all, any, other filters Sebastian Freundt <freundt@ga-group.nl> - build +Shaun Guth <shaun.guth@gmail.com> - base64d Shay Elkin <shay@everything.me> Simon Elsbrock <simon@iodev.org> - Debian Stefan Seemayer <stefan@seemayer.de> diff --git a/Makefile.am b/Makefile.am index c1eaf6de..cf5d74dc 100644 --- a/Makefile.am +++ b/Makefile.am @@ -115,7 +115,7 @@ endif ### Tests (make check) -TESTS = tests/optionaltest tests/mantest tests/jqtest tests/onigtest tests/shtest tests/utf8test +TESTS = tests/optionaltest tests/mantest tests/jqtest tests/onigtest tests/shtest tests/utf8test tests/base64test TESTS_ENVIRONMENT = NO_VALGRIND=$(NO_VALGRIND) diff --git a/appveyor.yml b/appveyor.yml index 719b4dce..e58981e7 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -26,7 +26,7 @@ build_script: test_script: # tests/optionaltest and tests/shtest fail on Windows; run them # anyways but ignore their failures. Also, trace shtest. - - bash -lc "exec 0</dev/null && cd $APPVEYOR_BUILD_FOLDER && make -j3 'TESTS=tests/mantest tests/jqtest tests/onigtest' check" + - bash -lc "exec 0</dev/null && cd $APPVEYOR_BUILD_FOLDER && make -j3 'TESTS=tests/mantest tests/jqtest tests/onigtest tests/base64test' check" - bash -lc "exec 0</dev/null && cd $APPVEYOR_BUILD_FOLDER && make TESTS=tests/optionaltest check || cat test-suite.log" - bash -lc "exec 0</dev/null && cd $APPVEYOR_BUILD_FOLDER && make TRACE_TESTS=1 TESTS=tests/shtest check || cat test-suite.log" diff --git a/docs/content/3.manual/manual.yml b/docs/content/3.manual/manual.yml index baad2ad3..0f917426 100644 --- a/docs/content/3.manual/manual.yml +++ b/docs/content/3.manual/manual.yml @@ -1770,6 +1770,11 @@ sections: The input is converted to base64 as specified by RFC 4648. + * `@base64d`: + + The inverse of `@base64`, input is decoded as specified by RFC 4648. + Note\: If the decoded string is not UTF-8, the results are undefined. + This syntax can be combined with string interpolation in a useful way. You can follow a `@foo` token with a string literal. The contents of the string literal will *not* be @@ -1799,6 +1804,14 @@ sections: input: "\"O'Hara's Ale\"" output: ["\"echo 'O'\\\\''Hara'\\\\''s Ale'\""] + - program: '@base64' + input: '"This is a message"' + output: ['"VGhpcyBpcyBhIG1lc3NhZ2U="'] + + - program: '@base64d' + input: '"VGhpcyBpcyBhIG1lc3NhZ2U="' + output: ['"This is a message"'] + - title: "Dates" body: | diff --git a/src/builtin.c b/src/builtin.c index aa0ab4d5..24e311e1 100644 --- a/src/builtin.c +++ b/src/builtin.c @@ -396,6 +396,24 @@ static jv f_utf8bytelength(jq_state *jq, jv input) { #define CHARS_ALPHANUM "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789" +static const unsigned char BASE64_ENCODE_TABLE[64 + 1] = CHARS_ALPHANUM "+/"; +static const unsigned char BASE64_INVALID_ENTRY = 0xFF; +static const unsigned char BASE64_DECODE_TABLE[255] = { + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 62, // + + 0xFF, 0xFF, 0xFF, + 63, // / + 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, // 0-9 + 0xFF, 0xFF, 0xFF, + 99, // = + 0xFF, 0xFF, 0xFF, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, // A-Z + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, // a-z + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF +}; + + static jv escape_string(jv input, const char* escapings) { assert(jv_get_kind(input) == JV_KIND_STRING); @@ -548,7 +566,6 @@ static jv f_format(jq_state *jq, jv input, jv fmt) { jv_free(fmt); input = f_tostring(jq, input); jv line = jv_string(""); - const char b64[64 + 1] = CHARS_ALPHANUM "+/"; const unsigned char* data = (const unsigned char*)jv_string_value(input); int len = jv_string_length_bytes(jv_copy(input)); for (int i=0; i<len; i+=3) { @@ -560,7 +577,7 @@ static jv f_format(jq_state *jq, jv input, jv fmt) { } char buf[4]; for (int j=0; j<4; j++) { - buf[j] = b64[(code >> (18 - j*6)) & 0x3f]; + buf[j] = BASE64_ENCODE_TABLE[(code >> (18 - j*6)) & 0x3f]; } if (n < 3) buf[3] = '='; if (n < 2) buf[2] = '='; @@ -568,6 +585,49 @@ static jv f_format(jq_state *jq, jv input, jv fmt) { } jv_free(input); return line; + } else if (!strcmp(fmt_s, "base64d")) { + jv_free(fmt); + input = f_tostring(jq, input); + const unsigned char* data = (const unsigned char*)jv_string_value(input); + int len = jv_string_length_bytes(jv_copy(input)); + size_t decoded_len = (3 * len) / 4; // 3 usable bytes for every 4 bytes of input + char *result = malloc(decoded_len * sizeof(char)); + memset(result, 0, decoded_len * sizeof(char)); + uint32_t ri = 0; + int input_bytes_read=0; + uint32_t code = 0; + for (int i=0; i<len && data[i] != '='; i++) { + if (BASE64_DECODE_TABLE[data[i]] == BASE64_INVALID_ENTRY) { + free(result); + return type_error(input, "is not valid base64 data"); + } + + code <<= 6; + code |= BASE64_DECODE_TABLE[data[i]]; + input_bytes_read++; + + if (input_bytes_read == 4) { + result[ri++] = (code >> 16) & 0xFF; + result[ri++] = (code >> 8) & 0xFF; + result[ri++] = code & 0xFF; + input_bytes_read = 0; + code = 0; + } + } + if (input_bytes_read == 3) { + result[ri++] = (code >> 10) & 0xFF; + result[ri++] = (code >> 2) & 0xFF; + } else if (input_bytes_read == 2) { + result[ri++] = (code >> 4) & 0xFF; + } else if (input_bytes_read == 1) { + free(result); + return type_error(input, "trailing base64 byte found"); + } + + jv line = jv_string_sized(result, ri); + jv_free(input); + free(result); + return line; } else { jv_free(input); return jv_invalid_with_msg(jv_string_concat(fmt, jv_string(" is not a valid format"))); diff --git a/tests/base64.test b/tests/base64.test new file mode 100644 index 00000000..0f82b0b7 --- /dev/null +++ b/tests/base64.test @@ -0,0 +1,35 @@ +# Tests are groups of three lines: program, input, expected output +# Blank lines and lines starting with # are ignored + +@base64 +"<>&'\"\t" +"PD4mJyIJ" + +# decoding encoded output results in same text +(@base64|@base64d) +"<>&'\"\t" +"<>&'\"\t" + +# regression test for #436 +@base64 +"foóbar\n" +"Zm/Ds2Jhcgo=" + +@base64d +"Zm/Ds2Jhcgo=" +"foóbar\n" + +# optional trailing equals padding (With padding, this is cWl4YmF6Cg==) +@base64d +"cWl4YmF6Cg" +"qixbaz\n" + +# invalid base64 characters (whitespace) +. | try @base64d catch . +"Not base64 data" +"string (\"Not base64...) is not valid base64 data" + +# invalid base64 (too many bytes, QUJD = "ABCD" +. | try @base64d catch . +"QUJDa" +"string (\"QUJDa\") trailing base64 byte found" diff --git a/tests/base64test b/tests/base64test new file mode 100755 index 00000000..85fe64b7 --- /dev/null +++ b/tests/base64test @@ -0,0 +1,5 @@ +#!/bin/sh + +. "${0%/*}/setup" "$@" + +$VALGRIND $Q $JQ -L "$mods" --run-tests $JQTESTDIR/base64.test diff --git a/tests/jq.test b/tests/jq.test index fa02b6d7..f510a917 100644 --- a/tests/jq.test +++ b/tests/jq.test @@ -61,7 +61,7 @@ null null "interpolation" -@text,@json,([1,.] | (@csv, @tsv)),@html,@uri,@sh,@base64 +@text,@json,([1,.] | (@csv, @tsv)),@html,@uri,@sh,@base64,(@base64 | @base64d) "<>&'\"\t" "<>&'\"\t" "\"<>&'\\\"\\t\"" @@ -71,12 +71,17 @@ null "%3C%3E%26'%22%09" "'<>&'\\''\"\t'" "PD4mJyIJ" +"<>&'\"\t" # regression test for #436 @base64 "foóbar\n" "Zm/Ds2Jhcgo=" +@base64d +"Zm/Ds2Jhcgo=" +"foóbar\n" + @uri "\u03bc" "%CE%BC" |