summaryrefslogtreecommitdiffstats
path: root/internals/src/bind/base64.rs
diff options
context:
space:
mode:
Diffstat (limited to 'internals/src/bind/base64.rs')
-rw-r--r--internals/src/bind/base64.rs262
1 files changed, 262 insertions, 0 deletions
diff --git a/internals/src/bind/base64.rs b/internals/src/bind/base64.rs
new file mode 100644
index 0000000..36bb735
--- /dev/null
+++ b/internals/src/bind/base64.rs
@@ -0,0 +1,262 @@
+use {base64 as extern_base64};
+use soft_ascii_string::{ SoftAsciiString, SoftAsciiChar};
+use failure::Fail;
+
+use ::utils::is_utf8_continuation_byte;
+use ::error::{EncodingError, EncodingErrorKind};
+
+use super::encoded_word::EncodedWordWriter;
+
+const CHARSET: extern_base64::CharacterSet = extern_base64::CharacterSet::Standard;
+const NO_LINE_WRAP: extern_base64::LineWrap = extern_base64::LineWrap::NoWrap;
+const LINE_WRAP: extern_base64::LineWrap =
+ extern_base64::LineWrap::Wrap(78, extern_base64::LineEnding::CRLF);
+const USE_PADDING: bool = true;
+const ECW_STRIP_WHITESPACE: bool = false;
+const NON_ECW_STRIP_WHITESPACE: bool = true;
+
+
+#[inline]
+pub fn normal_encode<R: AsRef<[u8]>>(input: R) -> SoftAsciiString {
+ let res = extern_base64::encode_config( input.as_ref(), extern_base64::Config::new(
+ //FIXME: check if line wrap should be used here, I thinks it should
+ CHARSET, USE_PADDING, NON_ECW_STRIP_WHITESPACE, LINE_WRAP
+ ));
+ SoftAsciiString::from_unchecked(res)
+}
+
+#[inline]
+pub fn normal_decode<R: AsRef<[u8]>>(input: R) -> Result<Vec<u8>, EncodingError> {
+ extern_base64::decode_config( input.as_ref(), extern_base64::Config::new(
+ CHARSET, USE_PADDING, NON_ECW_STRIP_WHITESPACE, LINE_WRAP
+ )).map_err(|err| err
+ .context(EncodingErrorKind::Malformed)
+ .into()
+ )
+}
+
+#[inline(always)]
+fn calc_max_input_len(max_output_len: usize) -> usize {
+ //NOTE: *3/4 is NOT correct due to the way this
+ // relies on non-floting point division
+ max_output_len / 4 * 3
+}
+
+//NOTE: base64 does not have to care about the EncodedWordContext,
+// it is valid under all of them anyway
+///
+/// # Note
+/// for now this only supports utf8/ascii input, as
+/// we have to know where we can split
+#[inline(always)]
+pub fn encoded_word_encode<O, R: AsRef<str>>( input: R, out: &mut O )
+ where O: EncodedWordWriter
+{
+ _encoded_word_encode(input.as_ref(), out)
+}
+
+fn _encoded_word_encode<O>( input: &str, out: &mut O )
+ where O: EncodedWordWriter
+{
+ let config = extern_base64::Config::new(
+ CHARSET, USE_PADDING, ECW_STRIP_WHITESPACE, NO_LINE_WRAP
+ );
+
+ debug_assert!( USE_PADDING == true, "size calculation is tailored for padding");
+
+ let max_output_len = out.max_payload_len();
+ let max_input_len = calc_max_input_len(max_output_len);
+ let mut rest = input;
+ let mut buff = String::with_capacity(max_output_len);
+
+ out.write_ecw_start();
+
+ loop {
+ buff.clear();
+
+ // additional bytes in uf8 always start with binary b10xxxxxx
+ let rest_len = rest.len();
+ let split_idx = if max_input_len >= rest_len {
+ rest_len
+ } else {
+ let mut tmp_split = max_input_len;
+ let rest_bytes = rest.as_bytes();
+
+ // the byte at the current index starts with that we are in a
+ // position where we can't split and have to move left until
+ // the beginning of the utf8
+ while is_utf8_continuation_byte(rest_bytes[tmp_split]) {
+ //UNDERFLOW_SAFE: if the string is correct (contains valid utf8) this cant undeflow as
+ // the first byte cant start with 0b10xxxxxx.
+ tmp_split -= 1;
+ }
+ tmp_split
+ };
+
+ let (this, _rest) = rest.split_at(split_idx);
+ //very important ;=)
+ rest = _rest;
+
+ extern_base64::encode_config_buf(this, config.clone(), &mut buff);
+ //FIXME add a write_str method to EncodedWordWriter
+ for ch in buff.chars() {
+ //SAFE: base64 consist of only ascii chars
+ out.write_char(SoftAsciiChar::from_unchecked(ch))
+ }
+
+ if rest.len() == 0 {
+ break
+ } else {
+ out.start_next_encoded_word();
+ }
+ }
+ out.write_ecw_end();
+}
+
+#[inline(always)]
+pub fn encoded_word_decode<R: AsRef<[u8]>>(input: R)
+ -> Result<Vec<u8>, EncodingError>
+{
+ extern_base64::decode_config(input.as_ref(), extern_base64::Config::new(
+ CHARSET, USE_PADDING, ECW_STRIP_WHITESPACE, NO_LINE_WRAP
+ )).map_err(|err| err
+ .context(EncodingErrorKind::Malformed)
+ .into()
+ )
+}
+
+
+
+
+#[cfg(test)]
+mod test {
+ use soft_ascii_string::SoftAsciiStr;
+ use bind::encoded_word::{VecWriter, EncodedWordEncoding};
+ use super::*;
+
+ #[test]
+ fn encoding_uses_line_wrap() {
+ let input = concat!(
+ "0123456789", "0123456789",
+ "0123456789", "0123456789",
+ "0123456789", "0123456789",
+ );
+
+ let res = normal_encode(input);
+
+ assert_eq!(res.as_str(),
+ "MDEyMzQ1Njc4OTAxMjM0NTY3ODkwMTIzNDU2Nzg5MDEyMzQ1Njc4OTAxMjM0NTY3ODkwMTIzNDU2Nz\r\ng5");
+
+ let dec = normal_decode(res).unwrap();
+
+ assert_eq!(dec, input.as_bytes());
+ }
+
+ #[test]
+ fn calc_max_input_len_from_max_output_len() {
+ assert!(USE_PADDING, "algorithm is specific to the usage of padding");
+ assert_eq!(45, calc_max_input_len(60));
+ assert_eq!(45, calc_max_input_len(61));
+ assert_eq!(45, calc_max_input_len(62));
+ assert_eq!(45, calc_max_input_len(63));
+ assert_eq!(48, calc_max_input_len(64));
+ }
+
+ #[test]
+ fn encode_decode_normal() {
+ let pairs: &[(&str,&[u8])] = &[
+ (
+ "this is some\r\nlong\r\ntest.",
+ b"dGhpcyBpcyBzb21lDQpsb25nDQp0ZXN0Lg=="
+ ),
+ (
+ "",
+ b""
+ )
+ ];
+ for &(raw, encoded) in pairs.iter() {
+ assert_eq!(
+ normal_encode(raw).as_bytes(),
+ encoded
+ );
+ assert_eq!(
+ assert_ok!(normal_decode(encoded)),
+ raw.as_bytes()
+ )
+
+ }
+ }
+
+ macro_rules! test_ecw_encode {
+ ($name:ident, data $data:expr => [$($item:expr),*]) => {
+ #[test]
+ fn $name() {
+ let test_data = $data;
+ let mut out = VecWriter::new(
+ SoftAsciiStr::from_unchecked("utf8"),
+ EncodedWordEncoding::Base64
+ );
+
+ encoded_word_encode( test_data, &mut out );
+
+ let expected = &[
+ $($item),*
+ ];
+
+ let iter = expected.iter()
+ .zip( out.data().iter().map(|x|x.as_str()) )
+ .enumerate();
+
+ for ( idx, (expected, got) ) in iter {
+ if *expected != got {
+ panic!( " item nr {}: {:?} != {:?} ", idx, expected, got );
+ }
+ }
+
+ let e_len = expected.len();
+ let g_len = out.data().len();
+ if e_len > g_len {
+ panic!( "expected following additional items: {:?}", &expected[g_len..e_len])
+ }
+ if e_len < g_len {
+ panic!( "got following additional items: {:?}", &out.data()[e_len..g_len])
+ }
+ }
+ };
+ }
+
+ test_ecw_encode! { ecw_simple,
+ data "()\"" => [
+ "=?utf8?B?KCki?="
+ ]
+ }
+
+ test_ecw_encode! { ecw_simple_max_len,
+ data "012345678901234567890123456789012345678944448888" => [
+ "=?utf8?B?MDEyMzQ1Njc4OTAxMjM0NTY3ODkwMTIzNDU2Nzg5MDEyMzQ1Njc4OTQ0NDQ4ODg4?="
+ ]
+ }
+
+ test_ecw_encode! { multiple_ecws,
+ data "012345678901234567890123456789012345678944448888NEWWORD" => [
+ "=?utf8?B?MDEyMzQ1Njc4OTAxMjM0NTY3ODkwMTIzNDU2Nzg5MDEyMzQ1Njc4OTQ0NDQ4ODg4?=",
+ "=?utf8?B?TkVXV09SRA==?="
+ ]
+ }
+
+ test_ecw_encode! { ecw_end_in_multibyte_codepoint,
+ data "01234567890123456789012345678901234567894444888↓" => [
+ "=?utf8?B?MDEyMzQ1Njc4OTAxMjM0NTY3ODkwMTIzNDU2Nzg5MDEyMzQ1Njc4OTQ0NDQ4ODg=?=",
+ "=?utf8?B?4oaT?="
+ ]
+ }
+
+
+ #[test]
+ fn decode_encoded_word() {
+ assert_eq!(
+ assert_ok!(encoded_word_decode("dGhpc19jcmF6eV9lbmNvZGVkX3dvcmQ=")),
+ b"this_crazy_encoded_word"
+ );
+ }
+} \ No newline at end of file