refactor: merged sources of mail-headers,mail-internals,mail-core, mail

Originally it was palaned to do a merge with `--allow-unrelated-history` but this can not be doesn as `mail-core` has a "invalid" history which has a merge conflict **with itself**. So even rewinding the history on a empty repo is not possible. Instead the code was directly coppied over losing history. But the history is still available in the different `history-backup-*` branches. It is just that the past history is decoupled from the current history.
author: Philipp Korber <p.korber@1aim.com> 2018-11-16 15:46:43 +0100
committer: Philipp Korber <p.korber@1aim.com> 2018-11-16 15:46:43 +0100
commit: 652d6f0ffeee7302a2cb51059bef75d8b0bb50be (patch)
tree: c3851592642938172f280f7428d43e08b0fe2cbe /internals
parent: 0947fe8996149fe20a6d47a793f9555790eb2eae (diff)
19 files changed, 4132 insertions, 0 deletions
diff --git a/internals/Cargo.toml b/internals/Cargo.toml
new file mode 100644
index 0000000..61a99ed
--- /dev/null
+++ b/internals/Cargo.toml
@@ -0,0 +1,35 @@
+[package]
+authors = ["Philipp Korber <p.korber@1aim.com>"]
+name = "mail-internals"
+description = "[mail-api] _internal_ parts for the mail-api crates"
+documentation = "https://docs.rs/mail-internals"
+keywords = ["mail-api"]
+categories = []
+license = "MIT OR Apache-2.0"
+repository = "https://github.com/1aim/mail"
+version = "0.2.0"
+readme = "./README.md"
+
+[dependencies]
+failure = "0.1.1"
+nom = "3.1.0"
+chrono = "0.4.0"
+base64 = "0.6.0"
+quoted_printable = "0.4.0"
+idna = "0.1.4"
+percent-encoding = "1.0.0"
+quoted-string = "0.6"
+media-type-impl-utils = { git="https://github.com/1aim/media-type-impl-utils" }
+soft-ascii-string = "1.0"
+vec1 = "1.0"
+
+[dependencies.mime]
+git="https://github.com/1aim/mime"
+branch="parser_revamp"
+features=["expose-param-utils"]
+version="0.4.0"
+
+
+[features]
+default = []
+traceing = []
diff --git a/internals/README.md b/internals/README.md
new file mode 100644
index 0000000..acb8693
--- /dev/null
+++ b/internals/README.md
@@ -0,0 +1,33 @@
+
+# mail-internal
+
+**Provides some internal functionality for the `mail` crate.**
+
+---
+
+The main part of this crate is the `EncodingBuffer` which
+is the place the headers write there content to (in an encoded
+form). Normally nothing in this crate needs to be used, the
+only exception is if you want to write your own mail header
+components for your custom mail header. In which case some
+of the thinks in this crate might prove usefull for you.
+(E.g. the `bind` module which binds some external crates
+like e.g. `quoted-string` and `idna`)
+
+Documentation can be [viewed on docs.rs](https://docs.rs/mail-internals)
+(once it is published).
+
+## License
+
+Licensed under either of
+
+ * Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0)
+ * MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT)
+
+at your option.
+
+### Contribution
+
+Unless you explicitly state otherwise, any contribution intentionally submitted
+for inclusion in the work by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any
+additional terms or conditions.
diff --git a/internals/src/bind/base64.rs b/internals/src/bind/base64.rs
new file mode 100644
index 0000000..36bb735
--- /dev/null
+++ b/internals/src/bind/base64.rs
@@ -0,0 +1,262 @@
+use {base64 as extern_base64};
+use soft_ascii_string::{ SoftAsciiString, SoftAsciiChar};
+use failure::Fail;
+
+use ::utils::is_utf8_continuation_byte;
+use ::error::{EncodingError, EncodingErrorKind};
+
+use super::encoded_word::EncodedWordWriter;
+
+const CHARSET: extern_base64::CharacterSet = extern_base64::CharacterSet::Standard;
+const NO_LINE_WRAP: extern_base64::LineWrap = extern_base64::LineWrap::NoWrap;
+const LINE_WRAP: extern_base64::LineWrap =
+    extern_base64::LineWrap::Wrap(78, extern_base64::LineEnding::CRLF);
+const USE_PADDING: bool = true;
+const ECW_STRIP_WHITESPACE: bool = false;
+const NON_ECW_STRIP_WHITESPACE: bool = true;
+
+
+#[inline]
+pub fn normal_encode<R: AsRef<[u8]>>(input: R) -> SoftAsciiString {
+    let res = extern_base64::encode_config( input.as_ref(), extern_base64::Config::new(
+        //FIXME: check if line wrap should be used here, I thinks it should
+        CHARSET, USE_PADDING, NON_ECW_STRIP_WHITESPACE, LINE_WRAP
+    ));
+    SoftAsciiString::from_unchecked(res)
+}
+
+#[inline]
+pub fn normal_decode<R: AsRef<[u8]>>(input: R) -> Result<Vec<u8>, EncodingError> {
+    extern_base64::decode_config( input.as_ref(), extern_base64::Config::new(
+        CHARSET, USE_PADDING, NON_ECW_STRIP_WHITESPACE, LINE_WRAP
+    )).map_err(|err| err
+        .context(EncodingErrorKind::Malformed)
+        .into()
+    )
+}
+
+#[inline(always)]
+fn calc_max_input_len(max_output_len: usize) -> usize {
+    //NOTE: *3/4 is NOT correct due to the way this
+    // relies on non-floting point division
+    max_output_len / 4 * 3
+}
+
+//NOTE: base64 does not have to care about the EncodedWordContext,
+// it is valid under all of them anyway
+///
+/// # Note
+/// for now this only supports utf8/ascii input, as
+/// we have to know where we can split
+#[inline(always)]
+pub fn encoded_word_encode<O, R: AsRef<str>>( input: R, out: &mut O )
+    where O: EncodedWordWriter
+{
+    _encoded_word_encode(input.as_ref(), out)
+}
+
+fn _encoded_word_encode<O>( input: &str, out: &mut O )
+    where O: EncodedWordWriter
+{
+    let config = extern_base64::Config::new(
+        CHARSET, USE_PADDING, ECW_STRIP_WHITESPACE, NO_LINE_WRAP
+    );
+
+    debug_assert!( USE_PADDING == true, "size calculation is tailored for padding");
+
+    let max_output_len = out.max_payload_len();
+    let max_input_len = calc_max_input_len(max_output_len);
+    let mut rest = input;
+    let mut buff = String::with_capacity(max_output_len);
+
+    out.write_ecw_start();
+
+    loop {
+        buff.clear();
+
+        // additional bytes in uf8 always start with binary b10xxxxxx
+        let rest_len = rest.len();
+        let split_idx = if max_input_len >= rest_len {
+            rest_len
+        } else {
+            let mut tmp_split = max_input_len;
+            let rest_bytes = rest.as_bytes();
+
+            // the byte at the current index starts with that we are in a
+            // position where we can't split and have to move left until
+            // the beginning of the utf8
+            while is_utf8_continuation_byte(rest_bytes[tmp_split]) {
+                //UNDERFLOW_SAFE: if the string is correct (contains valid utf8) this cant undeflow as
+                // the first byte cant start with 0b10xxxxxx.
+                tmp_split -= 1;
+            }
+            tmp_split
+        };
+
+        let (this, _rest) = rest.split_at(split_idx);
+        //very important ;=)
+        rest = _rest;
+
+        extern_base64::encode_config_buf(this, config.clone(), &mut buff);
+        //FIXME add a write_str method to EncodedWordWriter
+        for ch in buff.chars() {
+            //SAFE: base64 consist of only ascii chars
+            out.write_char(SoftAsciiChar::from_unchecked(ch))
+        }
+
+        if rest.len() == 0 {
+            break
+        } else {
+            out.start_next_encoded_word();
+        }
+    }
+    out.write_ecw_end();
+}
+
+#[inline(always)]
+pub fn encoded_word_decode<R: AsRef<[u8]>>(input: R)
+    -> Result<Vec<u8>, EncodingError>
+{
+    extern_base64::decode_config(input.as_ref(), extern_base64::Config::new(
+        CHARSET, USE_PADDING, ECW_STRIP_WHITESPACE, NO_LINE_WRAP
+    )).map_err(|err| err
+        .context(EncodingErrorKind::Malformed)
+        .into()
+    )
+}
+
+
+
+
+#[cfg(test)]
+mod test {
+    use soft_ascii_string::SoftAsciiStr;
+    use bind::encoded_word::{VecWriter, EncodedWordEncoding};
+    use super::*;
+
+    #[test]
+    fn encoding_uses_line_wrap() {
+        let input = concat!(
+            "0123456789", "0123456789",
+            "0123456789", "0123456789",
+            "0123456789", "0123456789",
+        );
+
+        let res = normal_encode(input);
+
+        assert_eq!(res.as_str(),
+           "MDEyMzQ1Njc4OTAxMjM0NTY3ODkwMTIzNDU2Nzg5MDEyMzQ1Njc4OTAxMjM0NTY3ODkwMTIzNDU2Nz\r\ng5");
+
+        let dec = normal_decode(res).unwrap();
+
+        assert_eq!(dec, input.as_bytes());
+    }
+
+    #[test]
+    fn calc_max_input_len_from_max_output_len() {
+        assert!(USE_PADDING, "algorithm is specific to the usage of padding");
+        assert_eq!(45, calc_max_input_len(60));
+        assert_eq!(45, calc_max_input_len(61));
+        assert_eq!(45, calc_max_input_len(62));
+        assert_eq!(45, calc_max_input_len(63));
+        assert_eq!(48, calc_max_input_len(64));
+    }
+
+    #[test]
+    fn encode_decode_normal() {
+        let pairs: &[(&str,&[u8])] = &[
+            (
+                "this is some\r\nlong\r\ntest.",
+                b"dGhpcyBpcyBzb21lDQpsb25nDQp0ZXN0Lg=="
+            ),
+            (
+                "",
+                b""
+            )
+        ];
+        for &(raw, encoded) in pairs.iter() {
+            assert_eq!(
+                normal_encode(raw).as_bytes(),
+                encoded
+            );
+            assert_eq!(
+                assert_ok!(normal_decode(encoded)),
+                raw.as_bytes()
+            )
+
+        }
+    }
+
+    macro_rules! test_ecw_encode {
+        ($name:ident, data $data:expr => [$($item:expr),*]) => {
+            #[test]
+            fn $name() {
+                let test_data = $data;
+                let mut out = VecWriter::new(
+                    SoftAsciiStr::from_unchecked("utf8"),
+                    EncodedWordEncoding::Base64
+                );
+
+                encoded_word_encode( test_data, &mut out );
+
+                let expected = &[
+                    $($item),*
+                ];
+
+                let iter = expected.iter()
+                    .zip( out.data().iter().map(|x|x.as_str()) )
+                    .enumerate();
+
+                for ( idx, (expected, got) ) in iter {
+                    if  *expected != got {
+                        panic!( " item nr {}: {:?} != {:?} ", idx, expected, got );
+                    }
+                }
+
+                let e_len = expected.len();
+                let g_len = out.data().len();
+                if e_len > g_len {
+                    panic!( "expected following additional items: {:?}", &expected[g_len..e_len])
+                }
+                if e_len < g_len {
+                    panic!( "got following additional items: {:?}", &out.data()[e_len..g_len])
+                }
+            }
+        };
+    }
+
+    test_ecw_encode! { ecw_simple,
+        data "()\"" => [
+            "=?utf8?B?KCki?="
+        ]
+    }
+
+    test_ecw_encode! { ecw_simple_max_len,
+        data "012345678901234567890123456789012345678944448888" => [
+            "=?utf8?B?MDEyMzQ1Njc4OTAxMjM0NTY3ODkwMTIzNDU2Nzg5MDEyMzQ1Njc4OTQ0NDQ4ODg4?="
+        ]
+    }
+
+    test_ecw_encode! { multiple_ecws,
+        data "012345678901234567890123456789012345678944448888NEWWORD" => [
+            "=?utf8?B?MDEyMzQ1Njc4OTAxMjM0NTY3ODkwMTIzNDU2Nzg5MDEyMzQ1Njc4OTQ0NDQ4ODg4?=",
+            "=?utf8?B?TkVXV09SRA==?="
+        ]
+    }
+
+    test_ecw_encode! { ecw_end_in_multibyte_codepoint,
+        data "01234567890123456789012345678901234567894444888↓" => [
+            "=?utf8?B?MDEyMzQ1Njc4OTAxMjM0NTY3ODkwMTIzNDU2Nzg5MDEyMzQ1Njc4OTQ0NDQ4ODg=?=",
+            "=?utf8?B?4oaT?="
+        ]
+    }
+
+
+    #[test]
+    fn decode_encoded_word() {
+        assert_eq!(
+            assert_ok!(encoded_word_decode("dGhpc19jcmF6eV9lbmNvZGVkX3dvcmQ=")),
+            b"this_crazy_encoded_word"
+        );
+    }
+}
+\ No newline at end of file
diff --git a/internals/src/bind/encoded_word/impls.rs b/internals/src/bind/encoded_word/impls.rs
new file mode 100644
index 0000000..f7051da
--- /dev/null
+++ b/internals/src/bind/encoded_word/impls.rs
@@ -0,0 +1,98 @@
+use soft_ascii_string::{ SoftAsciiString, SoftAsciiChar, SoftAsciiStr };
+
+use vec1::Vec1;
+use grammar::encoded_word::{ MAX_ECW_LEN, ECW_SEP_OVERHEAD };
+use ::encoder::EncodingWriter;
+use super::{ EncodedWordWriter, EncodedWordEncoding as Encoding };
+
+pub struct VecWriter<'a> {
+    data: Vec1<SoftAsciiString >,
+    charset: &'a SoftAsciiStr,
+    encoding: Encoding
+}
+
+impl<'a> VecWriter<'a> {
+    pub fn new(charset: &'a SoftAsciiStr, encoding: Encoding) -> Self {
+        let data = Vec1::new( SoftAsciiString::new() );
+        VecWriter { data, charset, encoding }
+    }
+
+    pub fn data( &self ) -> &[SoftAsciiString] {
+        &*self.data
+    }
+}
+
+impl<'a> Into<Vec1<SoftAsciiString>> for VecWriter<'a> {
+    fn into(self) -> Vec1<SoftAsciiString> {
+        self.data
+    }
+}
+
+impl<'a> EncodedWordWriter for VecWriter<'a> {
+
+    fn encoding( &self ) -> Encoding {
+        self.encoding
+    }
+
+    fn write_char( &mut self, ch: SoftAsciiChar ) {
+        self.data.last_mut().push( ch );
+    }
+
+    fn write_charset( &mut self ) {
+        self.data.last_mut().extend( self.charset.chars() )
+    }
+
+    fn write_ecw_seperator( &mut self ) {
+        self.data.push( SoftAsciiString::new() )
+    }
+
+    fn max_payload_len( &self ) -> usize {
+        MAX_ECW_LEN - ECW_SEP_OVERHEAD - self.charset.len() - 1
+    }
+}
+
+pub struct WriterWrapper<'a, 'b: 'a>{
+    charset: &'a SoftAsciiStr,
+    encoding: Encoding,
+    encoder_handle: &'a mut EncodingWriter<'b>
+}
+
+impl<'a, 'b: 'a> WriterWrapper<'a, 'b> {
+    pub fn new_with_charset(charset: &'a SoftAsciiStr,
+                            encoding: Encoding,
+                            encoder: &'a mut EncodingWriter<'b> ) -> Self
+    {
+        WriterWrapper { charset, encoding, encoder_handle: encoder }
+    }
+
+    pub fn new(encoding: Encoding,
+               encoder: &'a mut EncodingWriter<'b> ) -> Self
+    {
+        Self::new_with_charset(SoftAsciiStr::from_unchecked("utf8"), encoding, encoder)
+    }
+}
+
+impl<'a, 'b: 'a> EncodedWordWriter for WriterWrapper<'a, 'b> {
+
+    fn encoding( &self ) -> Encoding {
+        self.encoding
+    }
+
+    fn write_charset( &mut self ) {
+        //TODO fix
+        let _ = self.encoder_handle.write_str( self.charset );
+    }
+
+    fn write_ecw_seperator( &mut self ) {
+        self.encoder_handle.write_fws();
+    }
+
+    fn write_char( &mut self, ch: SoftAsciiChar ) {
+        //TODO fix
+        let _ = self.encoder_handle.write_char( ch );
+    }
+
+    fn max_payload_len( &self ) -> usize {
+        MAX_ECW_LEN - ECW_SEP_OVERHEAD - self.charset.len() - 1
+    }
+}
diff --git a/internals/src/bind/encoded_word/mod.rs b/internals/src/bind/encoded_word/mod.rs
new file mode 100644
index 0000000..ef61fc6
--- /dev/null
+++ b/internals/src/bind/encoded_word/mod.rs
@@ -0,0 +1,92 @@
+use soft_ascii_string::{ SoftAsciiStr, SoftAsciiChar };
+
+use super::{base64, quoted_printable};
+
+mod impls;
+pub use self::impls::*;
+
+#[derive(Debug, Copy, Clone, Hash, Eq, PartialEq)]
+pub enum EncodedWordEncoding {
+    Base64, QuotedPrintable
+}
+
+impl EncodedWordEncoding {
+
+    /// returns the acronym for the given encoding
+    /// used in a encoded word
+    pub fn acronym(&self) -> &'static SoftAsciiStr {
+        use self::EncodedWordEncoding::*;
+        match *self {
+            Base64 => SoftAsciiStr::from_unchecked("B"),
+            QuotedPrintable => SoftAsciiStr::from_unchecked("Q")
+        }
+    }
+
+    /// encodes a given utf8 string
+    ///
+    /// either `self::quoted_printable::encoded_word_encode`
+    /// or `self::base64::encoded_word_encode_utf8` is used
+    /// depending on which value `self` is.
+    ///
+    /// As both algorithm need to know about code point boundaries
+    /// only encoding utf8 is supported for now
+    ///
+    pub fn encode<R, O>(&self, input: R, out: &mut O)
+        where R: AsRef<str>, O: EncodedWordWriter
+    {
+        use self::EncodedWordEncoding::*;
+        let input: &str = input.as_ref();
+        match *self {
+            Base64 => {
+                base64::encoded_word_encode(input, out)
+            },
+            QuotedPrintable => {
+                quoted_printable::encoded_word_encode_utf8(input, out)
+            }
+        }
+    }
+}
+
+pub trait EncodedWordWriter {
+    fn write_char( &mut self, ch: SoftAsciiChar );
+    fn write_charset( &mut self );
+    fn encoding( &self ) -> EncodedWordEncoding;
+    fn write_ecw_seperator( &mut self );
+
+    /// Returns the maximal length of the paylod/encoded data
+    ///
+    /// Any number of calls to methods on in trait in any way
+    /// should never be able to change the returned value.
+    /// Only changing e.g. the charset or encoding should be
+    /// able to change what `max_paylod_len` returns.
+    fn max_payload_len( &self ) -> usize;
+
+    fn write_ecw_start( &mut self ) {
+        let qm = SoftAsciiChar::from_unchecked('?');
+        self.write_char(SoftAsciiChar::from_unchecked('='));
+        self.write_char(qm);
+        self.write_charset();
+        self.write_char(qm);
+        let acronym = self.encoding().acronym();
+        self.write_str( acronym );
+        self.write_char(qm);
+    }
+
+    fn write_ecw_end( &mut self ) {
+        self.write_char( SoftAsciiChar::from_unchecked('?') );
+        self.write_char( SoftAsciiChar::from_unchecked('=') );
+    }
+
+
+    fn start_next_encoded_word( &mut self )  {
+        self.write_ecw_end();
+        self.write_ecw_seperator();
+        self.write_ecw_start();
+    }
+
+    fn write_str( &mut self, s: &SoftAsciiStr ) {
+        for ch in s.chars() {
+            self.write_char(ch)
+        }
+    }
+}
+\ No newline at end of file
diff --git a/internals/src/bind/idna.rs b/internals/src/bind/idna.rs
new file mode 100644
index 0000000..7b82d69
--- /dev/null
+++ b/internals/src/bind/idna.rs
@@ -0,0 +1,73 @@
+use soft_ascii_string::SoftAsciiString;
+use idna;
+
+use ::error::{EncodingError, EncodingErrorKind};
+
+
+/// uses puny code on given domain to return a ascii representation
+///
+/// # Implementation Detail
+/// this function uses `idna::domain_to_ascii`, see the
+/// `idna` crates documentation fore more details on how
+/// exactly all edgecase are handled
+///
+/// # Note
+/// that this function does not validate the domain, e.g.
+/// if you puny code the domain `"this seems\0so;wrong"` it
+/// will return `Ok("this seems\0so;wrong")`
+///
+pub fn puny_code_domain<R: AsRef<str>>(domain: R)
+    -> Result<SoftAsciiString, EncodingError>
+{
+    _puny_code_domain(domain.as_ref())
+}
+
+fn _puny_code_domain(domain: &str)
+    -> Result<SoftAsciiString, EncodingError>
+{
+    match idna::domain_to_ascii(domain) {
+        Ok(asciified) => {
+            //SAFE: well we converted it to ascii, so it's ascii
+            Ok(SoftAsciiString::from_unchecked(asciified))
+        },
+        Err(_non_informative_err) => {
+            Err(EncodingErrorKind::NotEncodable { encoding: "punycode" }.into())
+        }
+    }
+}
+
+
+#[cfg(test)]
+mod test {
+    use idna;
+    use super::puny_code_domain;
+
+    #[test]
+    fn idna_does_not_validate() {
+        let domain = "this seems\0so;wrong";
+        assert_eq!(
+            domain.to_owned(),
+            assert_ok!( idna::domain_to_ascii(domain) )
+        );
+    }
+
+    #[test]
+    fn nop_puny_code() {
+        let domain = "is_ascii.notadomain";
+
+        let encoded = assert_ok!( puny_code_domain( domain ) );
+        assert_eq!(
+            &*encoded,
+            "is_ascii.notadomain"
+        );
+    }
+    #[test]
+    fn puny_code_ascii_mail() {
+        let domain = "nöt_ascii.ü";
+        let encoded = assert_ok!( puny_code_domain(domain) );
+        assert_eq!(
+            &*encoded,
+            "xn--nt_ascii-n4a.xn--tda"
+        );
+    }
+}
+\ No newline at end of file
diff --git a/internals/src/bind/mime.rs b/internals/src/bind/mime.rs
new file mode 100644
index 0000000..afede94
--- /dev/null
+++ b/internals/src/bind/mime.rs
@@ -0,0 +1,56 @@
+use std::borrow::Cow;
+
+use soft_ascii_string::{ SoftAsciiStr, SoftAsciiString};
+use grammar::is_token_char;
+use percent_encoding::{
+    EncodeSet,
+    percent_encode
+};
+
+#[derive(Debug, Eq, PartialEq, Clone, Copy, Hash)]
+struct MimeParamEncodingSet;
+impl EncodeSet for MimeParamEncodingSet {
+    fn contains(&self, byte: u8) -> bool {
+        //if it is in the encoding set we need to encode it
+        //which we need to to if it is _not_ a token char
+        !is_token_char(byte as char)
+    }
+}
+
+
+/// percent encodes a byte sequence so that it can be used
+/// in a RFC 2231 conform encoded mime header parameter
+pub fn percent_encode_param_value<'a, R>(input: &'a R) -> Cow<'a, SoftAsciiStr>
+    where R: ?Sized+AsRef<[u8]>
+{
+    let cow: Cow<'a, str> = percent_encode(input.as_ref(), MimeParamEncodingSet).into();
+    match cow {
+        Cow::Owned(o) =>
+            //SAFE: MimeParamEncodingSet makes all non-us-ascii bytes encoded AND
+            // percent_encoding::percent_encode always only produces ascii anyway
+            Cow::Owned(SoftAsciiString::from_unchecked(o)),
+        Cow::Borrowed(b) =>
+            Cow::Borrowed(SoftAsciiStr::from_unchecked(b))
+    }
+}
+
+
+#[cfg(test)]
+mod test {
+    use std::borrow::Cow;
+    use super::*;
+
+    #[test]
+    fn encode_simple() {
+        let input = "this is tüxt";
+        let res = percent_encode_param_value(input);
+        assert_eq!("this%20is%20t%C3%BCxt", res.as_str());
+    }
+
+    #[test]
+    fn no_encode_no_alloc() {
+        let input = "full_valid";
+        let res = percent_encode_param_value(input);
+        assert_eq!(res, Cow::Borrowed(input));
+    }
+}
+\ No newline at end of file
diff --git a/internals/src/bind/mod.rs b/internals/src/bind/mod.rs
new file mode 100644
index 0000000..6b475ee
--- /dev/null
+++ b/internals/src/bind/mod.rs
@@ -0,0 +1,8 @@
+//! This module contains bindings to a number of external crates.
+
+pub mod encoded_word;
+pub mod base64;
+pub mod quoted_string;
+pub mod quoted_printable;
+pub mod idna;
+pub mod mime;
diff --git a/internals/src/bind/quoted_printable.rs b/internals/src/bind/quoted_printable.rs
new file mode 100644
index 0000000..7ef6291
--- /dev/null
+++ b/internals/src/bind/quoted_printable.rs
@@ -0,0 +1,360 @@
+use soft_ascii_string::{ SoftAsciiChar, SoftAsciiString };
+use { quoted_printable as extern_quoted_printable };
+
+use failure::Fail;
+use ::error::{EncodingError, EncodingErrorKind};
+use super::encoded_word::EncodedWordWriter;
+
+/// a quoted printable encoding suitable for content transfer encoding,
+/// but _not_ suited for the encoding in encoded words
+pub fn normal_encode<A: AsRef<[u8]>>(data: A) -> SoftAsciiString {
+    let encoded = extern_quoted_printable::encode_to_str(data);
+    SoftAsciiString::from_unchecked(encoded)
+}
+
+/// a quoted printable decoding suitable for content transfer encoding
+#[inline]
+pub fn normal_decode<R: AsRef<[u8]>>(input: R)
+    -> Result<Vec<u8>, EncodingError>
+{
+    //extern_quoted_printable h
+    extern_quoted_printable::decode(
+        input.as_ref(), extern_quoted_printable::ParseMode::Strict
+    ).map_err(|err| err
+        .context(EncodingErrorKind::Malformed)
+        .into()
+    )
+}
+
+/// a quoted printable decoding suitable for decoding a quoted printable
+/// encpded text in encoded words
+#[inline(always)]
+pub fn encoded_word_decode<R: AsRef<[u8]>>( input: R ) -> Result<Vec<u8>, EncodingError> {
+    //we can just use the stadard decoding
+    normal_decode( input )
+}
+
+//FIXME we don't use EncodedWord context here,
+// instead we use the most restructive context as a basis,
+// making it compatilble with all context, but not nessesary
+// the best solution...
+/// Simple wrapper around ecoded_word_encode for utf8 strings only
+pub fn encoded_word_encode_utf8<'a,  O>(word: &str, writer: &mut O )
+    where O: EncodedWordWriter
+{
+    let iter = word.char_indices().map( |(idx, ch)| {
+        &word.as_bytes()[idx..idx+ch.len_utf8()]
+    });
+    encoded_word_encode(iter, writer );
+}
+
+///
+/// Quoted Printable encoding for Encoded Words in MIME-Headers
+///
+/// Which means:
+/// 1. there is a limit to the maximum number of characters
+///    - the limit is 75 INCLUDING the `=?charset?encoding?...?=` overhead
+///    - as such the line length limit of quoted printable can not be hit,
+///      the quoted printable part is at most 67 chars long, e.g. for utf8
+///      it is at most 64 chars
+/// 2. has to be one token, so no ' ','\t' and neither soft nor hard newlines
+/// 3. no '?' character
+///
+/// The input is a sequence of bytes split up in chunks where
+/// a split in multipl encoded words can be done between any
+/// two chunks but not in a chunk. Wrt. utf8 a chunk would
+/// correspond to a character, e.g. `[65]` for `'a'` and
+/// `[0xe2, 0x99, 0xa5]` for a `'♥'`.
+///
+/// Note that a chunk can with more than 21 byte is not guranteed to
+/// work, and can trigger a panic.
+///
+/// As this has to be safe for usage in all header contexts, additional
+/// to the chars required by the standard (i.e. '=') following chars are ALWAYS
+/// quoted' ', '\t', '?', '(', ')'. Also '\n','\r' see the note below for more
+/// details.
+///
+///
+/// # Panics:
+///
+/// 1. if the encoded size of a chunk is more than 16 byte, which can
+///    happen if a chunk has more than 5 bytes. For comparison utf8 has
+///    at most chunks with 4 bytes leading to at most 12 byte buffer usage.
+///
+/// 2. if max size if >76 as no new line handling is implemented and
+///    the max size for the use case can be at most 67 chars
+///
+/// 3. if a single encoded chunk can not be written as one because of
+///    the length limitation AFTER a new encoded word was started.
+///
+/// # Note:
+///   as it has to be a token no new line characters can appear in the output,
+///   BUT q-encoding also forbids the encoding of CRLF line breaks in TEXT!
+///   bodies, which is mean to not mess up with the limitations to the line
+///   length, but they are allowed to appear in non TEXT data, but this
+///   function should, but might not be limited to be used with text data,
+///   which should but might not be limited to data not containing any new
+///   line character. For now any appearance of '\r' or '\n' will be encoded
+///   like any other "special" byte, for the future a context might be needed.
+///   (Especially as encoded words can contain non-ascii text in which '\r','\n'
+///   might be encoded with completely different bytes, but when the RFC speaks of
+///   '\r','\n' it normally means the bytes 10/13 independent of the character set,
+///   or if they appear in a image, zip-archiev etc. )
+pub fn encoded_word_encode<'a, I, O>(input: I, out: &mut O )
+    where I: Iterator<Item=&'a [u8]>, O: EncodedWordWriter
+{
+    out.write_ecw_start();
+    let max_payload_len = out.max_payload_len();
+    let mut remaining = max_payload_len;
+    //WARN: on remaining being > 67
+    let mut buf = [SoftAsciiChar::from_unchecked('X'); 16];
+
+    for chunk in input {
+        let mut buf_idx = 0;
+
+        for byte in chunk {
+            let byte = *byte;
+            match byte {
+                // this is the way to go as long as we don't want to behave differently for
+                // different context, the COMMENT context allows more chars, and the
+                // TEXT context even more
+                b'!' | b'*' |
+                b'+' | b'-' |
+                b'/' | b'_' |
+                b'0'...b'9' |
+                b'A'...b'Z' |
+                b'a'...b'z'  => {
+                    buf[buf_idx] = SoftAsciiChar::from_unchecked(byte as char);
+                    buf_idx += 1;
+                },
+                _otherwise => {
+                    buf[buf_idx] = SoftAsciiChar::from_unchecked('=');
+                    buf[buf_idx+1] = lower_nibble_to_hex( byte >> 4 );
+                    buf[buf_idx+2] = lower_nibble_to_hex( byte );
+                    buf_idx += 3;
+                }
+            }
+        }
+        if buf_idx > remaining {
+            out.start_next_encoded_word();
+            remaining = max_payload_len;
+        }
+        if buf_idx > remaining {
+            panic!( "single character longer then max length ({:?}) of encoded word", remaining );
+        }
+        for idx in 0..buf_idx {
+            out.write_char( buf[idx]  )
+        }
+        remaining -= buf_idx;
+    }
+    out.write_ecw_end()
+}
+
+#[inline]
+fn lower_nibble_to_hex( half_byte: u8 ) -> SoftAsciiChar {
+    static CHARS: &[char] = &[
+        '0', '1', '2', '3', '4', '5',
+        '6', '7', '8', '9', 'A', 'B',
+        'C', 'D', 'E', 'F'
+    ];
+
+    SoftAsciiChar::from_unchecked(CHARS[ (half_byte & 0x0F) as usize ])
+}
+
+
+
+
+#[cfg(test)]
+mod test {
+    use soft_ascii_string::SoftAsciiStr;
+    use ::bind::encoded_word::EncodedWordEncoding;
+    use super::super::encoded_word::VecWriter;
+    use super::*;
+
+    #[test]
+    fn to_hex() {
+        let data = &[
+            ('0', 0b11110000),
+            ('0', 0b0 ),
+            ('7', 0b0111),
+            ('7', 0b10111),
+            ('F',  0b1111)
+        ];
+        for &(ch, byte) in data {
+            assert_eq!( lower_nibble_to_hex( byte), ch );
+        }
+
+    }
+
+    macro_rules! test_ecw_encode {
+        ($name:ident, data $data:expr => [$($item:expr),*]) => {
+            #[test]
+            fn $name() {
+                let test_data = $data;
+                let mut out = VecWriter::new(
+                    SoftAsciiStr::from_unchecked("utf8"),
+                    EncodedWordEncoding::QuotedPrintable
+                );
+
+                encoded_word_encode_utf8( test_data, &mut out );
+
+                let expected = &[
+                    $($item),*
+                ];
+                let iter = expected.iter()
+                    .zip( out.data().iter().map(|x|x.as_str()) )
+                    .enumerate();
+
+                for ( idx, (expected, got) ) in iter {
+                    if  *expected != got {
+                        panic!( " item nr {}: {:?} != {:?} ", idx, expected, got );
+                    }
+                }
+
+                let e_len = expected.len();
+                let g_len = out.data().len();
+                if e_len > g_len {
author	Philipp Korber <p.korber@1aim.com>	2018-11-16 15:46:43 +0100
committer	Philipp Korber <p.korber@1aim.com>	2018-11-16 15:46:43 +0100
commit	652d6f0ffeee7302a2cb51059bef75d8b0bb50be (patch)
tree	c3851592642938172f280f7428d43e08b0fe2cbe /internals
parent	0947fe8996149fe20a6d47a793f9555790eb2eae (diff)