1 files changed, 262 insertions, 0 deletions
diff --git a/internals/src/bind/base64.rs b/internals/src/bind/base64.rs
new file mode 100644
index 0000000..36bb735
--- /dev/null
+++ b/internals/src/bind/base64.rs
@@ -0,0 +1,262 @@
+use {base64 as extern_base64};
+use soft_ascii_string::{ SoftAsciiString, SoftAsciiChar};
+use failure::Fail;
+
+use ::utils::is_utf8_continuation_byte;
+use ::error::{EncodingError, EncodingErrorKind};
+
+use super::encoded_word::EncodedWordWriter;
+
+const CHARSET: extern_base64::CharacterSet = extern_base64::CharacterSet::Standard;
+const NO_LINE_WRAP: extern_base64::LineWrap = extern_base64::LineWrap::NoWrap;
+const LINE_WRAP: extern_base64::LineWrap =
+    extern_base64::LineWrap::Wrap(78, extern_base64::LineEnding::CRLF);
+const USE_PADDING: bool = true;
+const ECW_STRIP_WHITESPACE: bool = false;
+const NON_ECW_STRIP_WHITESPACE: bool = true;
+
+
+#[inline]
+pub fn normal_encode<R: AsRef<[u8]>>(input: R) -> SoftAsciiString {
+    let res = extern_base64::encode_config( input.as_ref(), extern_base64::Config::new(
+        //FIXME: check if line wrap should be used here, I thinks it should
+        CHARSET, USE_PADDING, NON_ECW_STRIP_WHITESPACE, LINE_WRAP
+    ));
+    SoftAsciiString::from_unchecked(res)
+}
+
+#[inline]
+pub fn normal_decode<R: AsRef<[u8]>>(input: R) -> Result<Vec<u8>, EncodingError> {
+    extern_base64::decode_config( input.as_ref(), extern_base64::Config::new(
+        CHARSET, USE_PADDING, NON_ECW_STRIP_WHITESPACE, LINE_WRAP
+    )).map_err(|err| err
+        .context(EncodingErrorKind::Malformed)
+        .into()
+    )
+}
+
+#[inline(always)]
+fn calc_max_input_len(max_output_len: usize) -> usize {
+    //NOTE: *3/4 is NOT correct due to the way this
+    // relies on non-floting point division
+    max_output_len / 4 * 3
+}
+
+//NOTE: base64 does not have to care about the EncodedWordContext,
+// it is valid under all of them anyway
+///
+/// # Note
+/// for now this only supports utf8/ascii input, as
+/// we have to know where we can split
+#[inline(always)]
+pub fn encoded_word_encode<O, R: AsRef<str>>( input: R, out: &mut O )
+    where O: EncodedWordWriter
+{
+    _encoded_word_encode(input.as_ref(), out)
+}
+
+fn _encoded_word_encode<O>( input: &str, out: &mut O )
+    where O: EncodedWordWriter
+{
+    let config = extern_base64::Config::new(
+        CHARSET, USE_PADDING, ECW_STRIP_WHITESPACE, NO_LINE_WRAP
+    );
+
+    debug_assert!( USE_PADDING == true, "size calculation is tailored for padding");
+
+    let max_output_len = out.max_payload_len();
+    let max_input_len = calc_max_input_len(max_output_len);
+    let mut rest = input;
+    let mut buff = String::with_capacity(max_output_len);
+
+    out.write_ecw_start();
+
+    loop {
+        buff.clear();
+
+        // additional bytes in uf8 always start with binary b10xxxxxx
+        let rest_len = rest.len();
+        let split_idx = if max_input_len >= rest_len {
+            rest_len
+        } else {
+            let mut tmp_split = max_input_len;
+            let rest_bytes = rest.as_bytes();
+
+            // the byte at the current index starts with that we are in a
+            // position where we can't split and have to move left until
+            // the beginning of the utf8
+            while is_utf8_continuation_byte(rest_bytes[tmp_split]) {
+                //UNDERFLOW_SAFE: if the string is correct (contains valid utf8) this cant undeflow as
+                // the first byte cant start with 0b10xxxxxx.
+                tmp_split -= 1;
+            }
+            tmp_split
+        };
+
+        let (this, _rest) = rest.split_at(split_idx);
+        //very important ;=)
+        rest = _rest;
+
+        extern_base64::encode_config_buf(this, config.clone(), &mut buff);
+        //FIXME add a write_str method to EncodedWordWriter
+        for ch in buff.chars() {
+            //SAFE: base64 consist of only ascii chars
+            out.write_char(SoftAsciiChar::from_unchecked(ch))
+        }
+
+        if rest.len() == 0 {
+            break
+        } else {
+            out.start_next_encoded_word();
+        }
+    }
+    out.write_ecw_end();
+}
+
+#[inline(always)]
+pub fn encoded_word_decode<R: AsRef<[u8]>>(input: R)
+    -> Result<Vec<u8>, EncodingError>
+{
+    extern_base64::decode_config(input.as_ref(), extern_base64::Config::new(
+        CHARSET, USE_PADDING, ECW_STRIP_WHITESPACE, NO_LINE_WRAP
+    )).map_err(|err| err
+        .context(EncodingErrorKind::Malformed)
+        .into()
+    )
+}
+
+
+
+
+#[cfg(test)]
+mod test {
+    use soft_ascii_string::SoftAsciiStr;
+    use bind::encoded_word::{VecWriter, EncodedWordEncoding};
+    use super::*;
+
+    #[test]
+    fn encoding_uses_line_wrap() {
+        let input = concat!(
+            "0123456789", "0123456789",
+            "0123456789", "0123456789",
+            "0123456789", "0123456789",
+        );
+
+        let res = normal_encode(input);
+
+        assert_eq!(res.as_str(),
+           "MDEyMzQ1Njc4OTAxMjM0NTY3ODkwMTIzNDU2Nzg5MDEyMzQ1Njc4OTAxMjM0NTY3ODkwMTIzNDU2Nz\r\ng5");
+
+        let dec = normal_decode(res).unwrap();
+
+        assert_eq!(dec, input.as_bytes());
+    }
+
+    #[test]
+    fn calc_max_input_len_from_max_output_len() {
+        assert!(USE_PADDING, "algorithm is specific to the usage of padding");
+        assert_eq!(45, calc_max_input_len(60));
+        assert_eq!(45, calc_max_input_len(61));
+        assert_eq!(45, calc_max_input_len(62));
+        assert_eq!(45, calc_max_input_len(63));
+        assert_eq!(48, calc_max_input_len(64));
+    }
+
+    #[test]
+    fn encode_decode_normal() {
+        let pairs: &[(&str,&[u8])] = &[
+            (
+                "this is some\r\nlong\r\ntest.",
+                b"dGhpcyBpcyBzb21lDQpsb25nDQp0ZXN0Lg=="
+            ),
+            (
+                "",
+                b""
+            )
+        ];
+        for &(raw, encoded) in pairs.iter() {
+            assert_eq!(
+                normal_encode(raw).as_bytes(),
+                encoded
+            );
+            assert_eq!(
+                assert_ok!(normal_decode(encoded)),
+                raw.as_bytes()
+            )
+
+        }
+    }
+
+    macro_rules! test_ecw_encode {
+        ($name:ident, data $data:expr => [$($item:expr),*]) => {
+            #[test]
+            fn $name() {
+                let test_data = $data;
+                let mut out = VecWriter::new(
+                    SoftAsciiStr::from_unchecked("utf8"),
+                    EncodedWordEncoding::Base64
+                );
+
+                encoded_word_encode( test_data, &mut out );
+
+                let expected = &[
+                    $($item),*
+                ];
+
+                let iter = expected.iter()
+                    .zip( out.data().iter().map(|x|x.as_str()) )
+                    .enumerate();
+
+                for ( idx, (expected, got) ) in iter {
+                    if  *expected != got {
+                        panic!( " item nr {}: {:?} != {:?} ", idx, expected, got );
+                    }
+                }
+
+                let e_len = expected.len();
+                let g_len = out.data().len();
+                if e_len > g_len {
+                    panic!( "expected following additional items: {:?}", &expected[g_len..e_len])
+                }
+                if e_len < g_len {
+                    panic!( "got following additional items: {:?}", &out.data()[e_len..g_len])
+                }
+            }
+        };
+    }
+
+    test_ecw_encode! { ecw_simple,
+        data "()\"" => [
+            "=?utf8?B?KCki?="
+        ]
+    }
+
+    test_ecw_encode! { ecw_simple_max_len,
+        data "012345678901234567890123456789012345678944448888" => [
+            "=?utf8?B?MDEyMzQ1Njc4OTAxMjM0NTY3ODkwMTIzNDU2Nzg5MDEyMzQ1Njc4OTQ0NDQ4ODg4?="
+        ]
+    }
+
+    test_ecw_encode! { multiple_ecws,
+        data "012345678901234567890123456789012345678944448888NEWWORD" => [
+            "=?utf8?B?MDEyMzQ1Njc4OTAxMjM0NTY3ODkwMTIzNDU2Nzg5MDEyMzQ1Njc4OTQ0NDQ4ODg4?=",
+            "=?utf8?B?TkVXV09SRA==?="
+        ]
+    }
+
+    test_ecw_encode! { ecw_end_in_multibyte_codepoint,
+        data "01234567890123456789012345678901234567894444888↓" => [
+            "=?utf8?B?MDEyMzQ1Njc4OTAxMjM0NTY3ODkwMTIzNDU2Nzg5MDEyMzQ1Njc4OTQ0NDQ4ODg=?=",
+            "=?utf8?B?4oaT?="
+        ]
+    }
+
+
+    #[test]
+    fn decode_encoded_word() {
+        assert_eq!(
+            assert_ok!(encoded_word_decode("dGhpc19jcmF6eV9lbmNvZGVkX3dvcmQ=")),
+            b"this_crazy_encoded_word"
+        );
+    }
+}
+\ No newline at end of file