1 files changed, 360 insertions, 0 deletions
diff --git a/internals/src/bind/quoted_printable.rs b/internals/src/bind/quoted_printable.rs
new file mode 100644
index 0000000..7ef6291
--- /dev/null
+++ b/internals/src/bind/quoted_printable.rs
@@ -0,0 +1,360 @@
+use soft_ascii_string::{ SoftAsciiChar, SoftAsciiString };
+use { quoted_printable as extern_quoted_printable };
+
+use failure::Fail;
+use ::error::{EncodingError, EncodingErrorKind};
+use super::encoded_word::EncodedWordWriter;
+
+/// a quoted printable encoding suitable for content transfer encoding,
+/// but _not_ suited for the encoding in encoded words
+pub fn normal_encode<A: AsRef<[u8]>>(data: A) -> SoftAsciiString {
+    let encoded = extern_quoted_printable::encode_to_str(data);
+    SoftAsciiString::from_unchecked(encoded)
+}
+
+/// a quoted printable decoding suitable for content transfer encoding
+#[inline]
+pub fn normal_decode<R: AsRef<[u8]>>(input: R)
+    -> Result<Vec<u8>, EncodingError>
+{
+    //extern_quoted_printable h
+    extern_quoted_printable::decode(
+        input.as_ref(), extern_quoted_printable::ParseMode::Strict
+    ).map_err(|err| err
+        .context(EncodingErrorKind::Malformed)
+        .into()
+    )
+}
+
+/// a quoted printable decoding suitable for decoding a quoted printable
+/// encpded text in encoded words
+#[inline(always)]
+pub fn encoded_word_decode<R: AsRef<[u8]>>( input: R ) -> Result<Vec<u8>, EncodingError> {
+    //we can just use the stadard decoding
+    normal_decode( input )
+}
+
+//FIXME we don't use EncodedWord context here,
+// instead we use the most restructive context as a basis,
+// making it compatilble with all context, but not nessesary
+// the best solution...
+/// Simple wrapper around ecoded_word_encode for utf8 strings only
+pub fn encoded_word_encode_utf8<'a,  O>(word: &str, writer: &mut O )
+    where O: EncodedWordWriter
+{
+    let iter = word.char_indices().map( |(idx, ch)| {
+        &word.as_bytes()[idx..idx+ch.len_utf8()]
+    });
+    encoded_word_encode(iter, writer );
+}
+
+///
+/// Quoted Printable encoding for Encoded Words in MIME-Headers
+///
+/// Which means:
+/// 1. there is a limit to the maximum number of characters
+///    - the limit is 75 INCLUDING the `=?charset?encoding?...?=` overhead
+///    - as such the line length limit of quoted printable can not be hit,
+///      the quoted printable part is at most 67 chars long, e.g. for utf8
+///      it is at most 64 chars
+/// 2. has to be one token, so no ' ','\t' and neither soft nor hard newlines
+/// 3. no '?' character
+///
+/// The input is a sequence of bytes split up in chunks where
+/// a split in multipl encoded words can be done between any
+/// two chunks but not in a chunk. Wrt. utf8 a chunk would
+/// correspond to a character, e.g. `[65]` for `'a'` and
+/// `[0xe2, 0x99, 0xa5]` for a `'♥'`.
+///
+/// Note that a chunk can with more than 21 byte is not guranteed to
+/// work, and can trigger a panic.
+///
+/// As this has to be safe for usage in all header contexts, additional
+/// to the chars required by the standard (i.e. '=') following chars are ALWAYS
+/// quoted' ', '\t', '?', '(', ')'. Also '\n','\r' see the note below for more
+/// details.
+///
+///
+/// # Panics:
+///
+/// 1. if the encoded size of a chunk is more than 16 byte, which can
+///    happen if a chunk has more than 5 bytes. For comparison utf8 has
+///    at most chunks with 4 bytes leading to at most 12 byte buffer usage.
+///
+/// 2. if max size if >76 as no new line handling is implemented and
+///    the max size for the use case can be at most 67 chars
+///
+/// 3. if a single encoded chunk can not be written as one because of
+///    the length limitation AFTER a new encoded word was started.
+///
+/// # Note:
+///   as it has to be a token no new line characters can appear in the output,
+///   BUT q-encoding also forbids the encoding of CRLF line breaks in TEXT!
+///   bodies, which is mean to not mess up with the limitations to the line
+///   length, but they are allowed to appear in non TEXT data, but this
+///   function should, but might not be limited to be used with text data,
+///   which should but might not be limited to data not containing any new
+///   line character. For now any appearance of '\r' or '\n' will be encoded
+///   like any other "special" byte, for the future a context might be needed.
+///   (Especially as encoded words can contain non-ascii text in which '\r','\n'
+///   might be encoded with completely different bytes, but when the RFC speaks of
+///   '\r','\n' it normally means the bytes 10/13 independent of the character set,
+///   or if they appear in a image, zip-archiev etc. )
+pub fn encoded_word_encode<'a, I, O>(input: I, out: &mut O )
+    where I: Iterator<Item=&'a [u8]>, O: EncodedWordWriter
+{
+    out.write_ecw_start();
+    let max_payload_len = out.max_payload_len();
+    let mut remaining = max_payload_len;
+    //WARN: on remaining being > 67
+    let mut buf = [SoftAsciiChar::from_unchecked('X'); 16];
+
+    for chunk in input {
+        let mut buf_idx = 0;
+
+        for byte in chunk {
+            let byte = *byte;
+            match byte {
+                // this is the way to go as long as we don't want to behave differently for
+                // different context, the COMMENT context allows more chars, and the
+                // TEXT context even more
+                b'!' | b'*' |
+                b'+' | b'-' |
+                b'/' | b'_' |
+                b'0'...b'9' |
+                b'A'...b'Z' |
+                b'a'...b'z'  => {
+                    buf[buf_idx] = SoftAsciiChar::from_unchecked(byte as char);
+                    buf_idx += 1;
+                },
+                _otherwise => {
+                    buf[buf_idx] = SoftAsciiChar::from_unchecked('=');
+                    buf[buf_idx+1] = lower_nibble_to_hex( byte >> 4 );
+                    buf[buf_idx+2] = lower_nibble_to_hex( byte );
+                    buf_idx += 3;
+                }
+            }
+        }
+        if buf_idx > remaining {
+            out.start_next_encoded_word();
+            remaining = max_payload_len;
+        }
+        if buf_idx > remaining {
+            panic!( "single character longer then max length ({:?}) of encoded word", remaining );
+        }
+        for idx in 0..buf_idx {
+            out.write_char( buf[idx]  )
+        }
+        remaining -= buf_idx;
+    }
+    out.write_ecw_end()
+}
+
+#[inline]
+fn lower_nibble_to_hex( half_byte: u8 ) -> SoftAsciiChar {
+    static CHARS: &[char] = &[
+        '0', '1', '2', '3', '4', '5',
+        '6', '7', '8', '9', 'A', 'B',
+        'C', 'D', 'E', 'F'
+    ];
+
+    SoftAsciiChar::from_unchecked(CHARS[ (half_byte & 0x0F) as usize ])
+}
+
+
+
+
+#[cfg(test)]
+mod test {
+    use soft_ascii_string::SoftAsciiStr;
+    use ::bind::encoded_word::EncodedWordEncoding;
+    use super::super::encoded_word::VecWriter;
+    use super::*;
+
+    #[test]
+    fn to_hex() {
+        let data = &[
+            ('0', 0b11110000),
+            ('0', 0b0 ),
+            ('7', 0b0111),
+            ('7', 0b10111),
+            ('F',  0b1111)
+        ];
+        for &(ch, byte) in data {
+            assert_eq!( lower_nibble_to_hex( byte), ch );
+        }
+
+    }
+
+    macro_rules! test_ecw_encode {
+        ($name:ident, data $data:expr => [$($item:expr),*]) => {
+            #[test]
+            fn $name() {
+                let test_data = $data;
+                let mut out = VecWriter::new(
+                    SoftAsciiStr::from_unchecked("utf8"),
+                    EncodedWordEncoding::QuotedPrintable
+                );
+
+                encoded_word_encode_utf8( test_data, &mut out );
+
+                let expected = &[
+                    $($item),*
+                ];
+                let iter = expected.iter()
+                    .zip( out.data().iter().map(|x|x.as_str()) )
+                    .enumerate();
+
+                for ( idx, (expected, got) ) in iter {
+                    if  *expected != got {
+                        panic!( " item nr {}: {:?} != {:?} ", idx, expected, got );
+                    }
+                }
+
+                let e_len = expected.len();
+                let g_len = out.data().len();
+                if e_len > g_len {
+                    panic!( "expected following additional items: {:?}", &expected[g_len..e_len])
+                }
+                if e_len < g_len {
+                    panic!( "got following additional items: {:?}", &out.data()[e_len..g_len])
+                }
+            }
+        };
+    }
+
+    test_ecw_encode! { can_be_used_in_comments,
+        data "()\"" => [
+            "=?utf8?Q?=28=29=22?="
+        ]
+    }
+
+    test_ecw_encode! { can_be_used_in_phrase,
+        data "{}~@#$%^&*()=|\\[]';:." => [
+            "=?utf8?Q?=7B=7D=7E=40=23=24=25=5E=26*=28=29=3D=7C=5C=5B=5D=27=3B=3A=2E?="
+        ]
+    }
+
+    test_ecw_encode! { bad_chars_in_all_contexts,
+        data "?= \t\r\n" => [
+            "=?utf8?Q?=3F=3D=20=09=0D=0A?="
+        ]
+    }
+
+    test_ecw_encode!{ encode_ascii,
+        data  "abcdefghijklmnopqrstuvwxyz \t?=0123456789!@#$%^&*()_+-" => [
+             "=?utf8?Q?abcdefghijklmnopqrstuvwxyz=20=09=3F=3D0123456789!=40=23=24=25=5E?=",
+             "=?utf8?Q?=26*=28=29_+-?="
+        ]
+    }
+
+    test_ecw_encode! { how_it_handales_newlines,
+        data "\r\n" => [
+            "=?utf8?Q?=0D=0A?="
+        ]
+    }
+
+
+    test_ecw_encode! { split_into_multiple_ecws,
+        data "0123456789012345678901234567890123456789012345678901234567891234newline" => [
+            "=?utf8?Q?0123456789012345678901234567890123456789012345678901234567891234?=",
+            "=?utf8?Q?newline?="
+        ]
+    }
+
+    test_ecw_encode!{ bigger_chunks,
+        data "ランダムテキスト ראַנדאָם טעקסט" => [
+            //ランダムテキス
+            "=?utf8?Q?=E3=83=A9=E3=83=B3=E3=83=80=E3=83=A0=E3=83=86=E3=82=AD=E3=82=B9?=",
+            //ト ראַנדאָם
+            "=?utf8?Q?=E3=83=88=20=D7=A8=D7=90=D6=B7=D7=A0=D7=93=D7=90=D6=B8=D7=9D=20?=",
+            //טעקסט
+            "=?utf8?Q?=D7=98=D7=A2=D7=A7=D7=A1=D7=98?="
+        ]
+    }
+
+    #[test]
+    fn ecw_decode() {
+        let pairs = [
+            ("=28=29=22", "()\""),
+            (
+                "=7B=7D=7E=40=23=24=25=5E=26*=28=29=3D=7C=5C=5B=5D=27=3B=3A=2E",
+                "{}~@#$%^&*()=|\\[]';:."
+            ),
+            (
+                "=3F=3D=20=09=0D=0A",
+                "?= \t\r\n"
+            ),
+            (
+                "=26*=28=29_+-",
+                "&*()_+-"
+            ),
+            (
+                "abcdefghijklmnopqrstuvwxyz=20=09=3F=3D0123456789!=40=23=24=25=5E",
+                "abcdefghijklmnopqrstuvwxyz \t?=0123456789!@#$%^"
+            ),
+            (
+                "=0D=0A",
+                "\r\n"
+            ),
+            (
+                "=E3=83=A9=E3=83=B3=E3=83=80=E3=83=A0=E3=83=86=E3=82=AD=E3=82=B9",
+                "ランダムテキス"
+            ),
+            (
+                "=E3=83=88=20=D7=A8=D7=90=D6=B7=D7=A0=D7=93=D7=90=D6=B8=D7=9D=20",
+                "ト ראַנדאָם "
+            ),
+            (
+                "=D7=98=D7=A2=D7=A7=D7=A1=D7=98",
+                "טעקסט"
+            )
+        ];
+        for &(inp, outp) in pairs.iter() {
+            let dec = assert_ok!(encoded_word_decode(inp));
+            let dec = String::from_utf8(dec).unwrap();
+            assert_eq!(
+                outp.as_bytes(),
+                dec.as_bytes()
+            );
+        }
+    }
+
+    #[test]
+    fn normal_encode_text() {
+        let text = concat!(
+            "This is a llllllllllllllllllllllllllllllllllllll00000000000000000000ng test    0123456789qwertyuio\r\n",
+            "With many lines\r\n",
+            "And utf→→→→8"
+        );
+        let encoded = normal_encode(text);
+        assert_eq!(
+            concat!(
+                "This is a llllllllllllllllllllllllllllllllllllll00000000000000000000ng test=\r\n",
+                "    0123456789qwertyuio\r\n",
+                "With many lines\r\n",
+                "And utf=E2=86=92=E2=86=92=E2=86=92=E2=86=928"
+            ),
+            encoded.as_str()
+        );
+    }
+
+    #[test]
+    fn normal_decode_text() {
+        let text = concat!(
+                "This is a llllllllllllllllllllllllllllllllllllll00000000000000000000ng test=\r\n",
+                "    0123456789qwertyuio\r\n",
+                "With many lines\r\n",
+                "And utf=E2=86=92=E2=86=92=E2=86=92=E2=86=928"
+            );
+        let encoded = String::from_utf8(normal_decode(text).unwrap()).unwrap();
+        assert_eq!(
+            concat!(
+                "This is a llllllllllllllllllllllllllllllllllllll00000000000000000000ng test    0123456789qwertyuio\r\n",
+                "With many lines\r\n",
+                "And utf→→→→8"
+            ),
+            encoded.as_str()
+        );
+    }
+}
+\ No newline at end of file