diff options
author | Philipp Korber <p.korber@1aim.com> | 2018-11-16 15:46:43 +0100 |
---|---|---|
committer | Philipp Korber <p.korber@1aim.com> | 2018-11-16 15:46:43 +0100 |
commit | 652d6f0ffeee7302a2cb51059bef75d8b0bb50be (patch) | |
tree | c3851592642938172f280f7428d43e08b0fe2cbe /internals | |
parent | 0947fe8996149fe20a6d47a793f9555790eb2eae (diff) |
refactor: merged sources of mail-headers,mail-internals,mail-core, mail
Originally it was palaned to do a merge with `--allow-unrelated-history`
but this can not be doesn as `mail-core` has a "invalid" history which
has a merge conflict **with itself**. So even rewinding the history on
a empty repo is not possible.
Instead the code was directly coppied over losing history.
But the history is still available in the different
`history-backup-*` branches. It is just that the past history
is decoupled from the current history.
Diffstat (limited to 'internals')
-rw-r--r-- | internals/Cargo.toml | 35 | ||||
-rw-r--r-- | internals/README.md | 33 | ||||
-rw-r--r-- | internals/src/bind/base64.rs | 262 | ||||
-rw-r--r-- | internals/src/bind/encoded_word/impls.rs | 98 | ||||
-rw-r--r-- | internals/src/bind/encoded_word/mod.rs | 92 | ||||
-rw-r--r-- | internals/src/bind/idna.rs | 73 | ||||
-rw-r--r-- | internals/src/bind/mime.rs | 56 | ||||
-rw-r--r-- | internals/src/bind/mod.rs | 8 | ||||
-rw-r--r-- | internals/src/bind/quoted_printable.rs | 360 | ||||
-rw-r--r-- | internals/src/bind/quoted_string.rs | 101 | ||||
-rw-r--r-- | internals/src/encoder/encodable.rs | 187 | ||||
-rw-r--r-- | internals/src/encoder/mod.rs | 1712 | ||||
-rw-r--r-- | internals/src/encoder/trace.rs | 167 | ||||
-rw-r--r-- | internals/src/error.rs | 239 | ||||
-rw-r--r-- | internals/src/grammar.rs | 423 | ||||
-rw-r--r-- | internals/src/lib.rs | 40 | ||||
-rw-r--r-- | internals/src/macros.rs | 61 | ||||
-rw-r--r-- | internals/src/mail_type.rs | 47 | ||||
-rw-r--r-- | internals/src/utils/mod.rs | 138 |
19 files changed, 4132 insertions, 0 deletions
diff --git a/internals/Cargo.toml b/internals/Cargo.toml new file mode 100644 index 0000000..61a99ed --- /dev/null +++ b/internals/Cargo.toml @@ -0,0 +1,35 @@ +[package] +authors = ["Philipp Korber <p.korber@1aim.com>"] +name = "mail-internals" +description = "[mail-api] _internal_ parts for the mail-api crates" +documentation = "https://docs.rs/mail-internals" +keywords = ["mail-api"] +categories = [] +license = "MIT OR Apache-2.0" +repository = "https://github.com/1aim/mail" +version = "0.2.0" +readme = "./README.md" + +[dependencies] +failure = "0.1.1" +nom = "3.1.0" +chrono = "0.4.0" +base64 = "0.6.0" +quoted_printable = "0.4.0" +idna = "0.1.4" +percent-encoding = "1.0.0" +quoted-string = "0.6" +media-type-impl-utils = { git="https://github.com/1aim/media-type-impl-utils" } +soft-ascii-string = "1.0" +vec1 = "1.0" + +[dependencies.mime] +git="https://github.com/1aim/mime" +branch="parser_revamp" +features=["expose-param-utils"] +version="0.4.0" + + +[features] +default = [] +traceing = [] diff --git a/internals/README.md b/internals/README.md new file mode 100644 index 0000000..acb8693 --- /dev/null +++ b/internals/README.md @@ -0,0 +1,33 @@ + +# mail-internal + +**Provides some internal functionality for the `mail` crate.** + +--- + +The main part of this crate is the `EncodingBuffer` which +is the place the headers write there content to (in an encoded +form). Normally nothing in this crate needs to be used, the +only exception is if you want to write your own mail header +components for your custom mail header. In which case some +of the thinks in this crate might prove usefull for you. +(E.g. the `bind` module which binds some external crates +like e.g. `quoted-string` and `idna`) + +Documentation can be [viewed on docs.rs](https://docs.rs/mail-internals) +(once it is published). + +## License + +Licensed under either of + + * Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0) + * MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT) + +at your option. + +### Contribution + +Unless you explicitly state otherwise, any contribution intentionally submitted +for inclusion in the work by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any +additional terms or conditions. diff --git a/internals/src/bind/base64.rs b/internals/src/bind/base64.rs new file mode 100644 index 0000000..36bb735 --- /dev/null +++ b/internals/src/bind/base64.rs @@ -0,0 +1,262 @@ +use {base64 as extern_base64}; +use soft_ascii_string::{ SoftAsciiString, SoftAsciiChar}; +use failure::Fail; + +use ::utils::is_utf8_continuation_byte; +use ::error::{EncodingError, EncodingErrorKind}; + +use super::encoded_word::EncodedWordWriter; + +const CHARSET: extern_base64::CharacterSet = extern_base64::CharacterSet::Standard; +const NO_LINE_WRAP: extern_base64::LineWrap = extern_base64::LineWrap::NoWrap; +const LINE_WRAP: extern_base64::LineWrap = + extern_base64::LineWrap::Wrap(78, extern_base64::LineEnding::CRLF); +const USE_PADDING: bool = true; +const ECW_STRIP_WHITESPACE: bool = false; +const NON_ECW_STRIP_WHITESPACE: bool = true; + + +#[inline] +pub fn normal_encode<R: AsRef<[u8]>>(input: R) -> SoftAsciiString { + let res = extern_base64::encode_config( input.as_ref(), extern_base64::Config::new( + //FIXME: check if line wrap should be used here, I thinks it should + CHARSET, USE_PADDING, NON_ECW_STRIP_WHITESPACE, LINE_WRAP + )); + SoftAsciiString::from_unchecked(res) +} + +#[inline] +pub fn normal_decode<R: AsRef<[u8]>>(input: R) -> Result<Vec<u8>, EncodingError> { + extern_base64::decode_config( input.as_ref(), extern_base64::Config::new( + CHARSET, USE_PADDING, NON_ECW_STRIP_WHITESPACE, LINE_WRAP + )).map_err(|err| err + .context(EncodingErrorKind::Malformed) + .into() + ) +} + +#[inline(always)] +fn calc_max_input_len(max_output_len: usize) -> usize { + //NOTE: *3/4 is NOT correct due to the way this + // relies on non-floting point division + max_output_len / 4 * 3 +} + +//NOTE: base64 does not have to care about the EncodedWordContext, +// it is valid under all of them anyway +/// +/// # Note +/// for now this only supports utf8/ascii input, as +/// we have to know where we can split +#[inline(always)] +pub fn encoded_word_encode<O, R: AsRef<str>>( input: R, out: &mut O ) + where O: EncodedWordWriter +{ + _encoded_word_encode(input.as_ref(), out) +} + +fn _encoded_word_encode<O>( input: &str, out: &mut O ) + where O: EncodedWordWriter +{ + let config = extern_base64::Config::new( + CHARSET, USE_PADDING, ECW_STRIP_WHITESPACE, NO_LINE_WRAP + ); + + debug_assert!( USE_PADDING == true, "size calculation is tailored for padding"); + + let max_output_len = out.max_payload_len(); + let max_input_len = calc_max_input_len(max_output_len); + let mut rest = input; + let mut buff = String::with_capacity(max_output_len); + + out.write_ecw_start(); + + loop { + buff.clear(); + + // additional bytes in uf8 always start with binary b10xxxxxx + let rest_len = rest.len(); + let split_idx = if max_input_len >= rest_len { + rest_len + } else { + let mut tmp_split = max_input_len; + let rest_bytes = rest.as_bytes(); + + // the byte at the current index starts with that we are in a + // position where we can't split and have to move left until + // the beginning of the utf8 + while is_utf8_continuation_byte(rest_bytes[tmp_split]) { + //UNDERFLOW_SAFE: if the string is correct (contains valid utf8) this cant undeflow as + // the first byte cant start with 0b10xxxxxx. + tmp_split -= 1; + } + tmp_split + }; + + let (this, _rest) = rest.split_at(split_idx); + //very important ;=) + rest = _rest; + + extern_base64::encode_config_buf(this, config.clone(), &mut buff); + //FIXME add a write_str method to EncodedWordWriter + for ch in buff.chars() { + //SAFE: base64 consist of only ascii chars + out.write_char(SoftAsciiChar::from_unchecked(ch)) + } + + if rest.len() == 0 { + break + } else { + out.start_next_encoded_word(); + } + } + out.write_ecw_end(); +} + +#[inline(always)] +pub fn encoded_word_decode<R: AsRef<[u8]>>(input: R) + -> Result<Vec<u8>, EncodingError> +{ + extern_base64::decode_config(input.as_ref(), extern_base64::Config::new( + CHARSET, USE_PADDING, ECW_STRIP_WHITESPACE, NO_LINE_WRAP + )).map_err(|err| err + .context(EncodingErrorKind::Malformed) + .into() + ) +} + + + + +#[cfg(test)] +mod test { + use soft_ascii_string::SoftAsciiStr; + use bind::encoded_word::{VecWriter, EncodedWordEncoding}; + use super::*; + + #[test] + fn encoding_uses_line_wrap() { + let input = concat!( + "0123456789", "0123456789", + "0123456789", "0123456789", + "0123456789", "0123456789", + ); + + let res = normal_encode(input); + + assert_eq!(res.as_str(), + "MDEyMzQ1Njc4OTAxMjM0NTY3ODkwMTIzNDU2Nzg5MDEyMzQ1Njc4OTAxMjM0NTY3ODkwMTIzNDU2Nz\r\ng5"); + + let dec = normal_decode(res).unwrap(); + + assert_eq!(dec, input.as_bytes()); + } + + #[test] + fn calc_max_input_len_from_max_output_len() { + assert!(USE_PADDING, "algorithm is specific to the usage of padding"); + assert_eq!(45, calc_max_input_len(60)); + assert_eq!(45, calc_max_input_len(61)); + assert_eq!(45, calc_max_input_len(62)); + assert_eq!(45, calc_max_input_len(63)); + assert_eq!(48, calc_max_input_len(64)); + } + + #[test] + fn encode_decode_normal() { + let pairs: &[(&str,&[u8])] = &[ + ( + "this is some\r\nlong\r\ntest.", + b"dGhpcyBpcyBzb21lDQpsb25nDQp0ZXN0Lg==" + ), + ( + "", + b"" + ) + ]; + for &(raw, encoded) in pairs.iter() { + assert_eq!( + normal_encode(raw).as_bytes(), + encoded + ); + assert_eq!( + assert_ok!(normal_decode(encoded)), + raw.as_bytes() + ) + + } + } + + macro_rules! test_ecw_encode { + ($name:ident, data $data:expr => [$($item:expr),*]) => { + #[test] + fn $name() { + let test_data = $data; + let mut out = VecWriter::new( + SoftAsciiStr::from_unchecked("utf8"), + EncodedWordEncoding::Base64 + ); + + encoded_word_encode( test_data, &mut out ); + + let expected = &[ + $($item),* + ]; + + let iter = expected.iter() + .zip( out.data().iter().map(|x|x.as_str()) ) + .enumerate(); + + for ( idx, (expected, got) ) in iter { + if *expected != got { + panic!( " item nr {}: {:?} != {:?} ", idx, expected, got ); + } + } + + let e_len = expected.len(); + let g_len = out.data().len(); + if e_len > g_len { + panic!( "expected following additional items: {:?}", &expected[g_len..e_len]) + } + if e_len < g_len { + panic!( "got following additional items: {:?}", &out.data()[e_len..g_len]) + } + } + }; + } + + test_ecw_encode! { ecw_simple, + data "()\"" => [ + "=?utf8?B?KCki?=" + ] + } + + test_ecw_encode! { ecw_simple_max_len, + data "012345678901234567890123456789012345678944448888" => [ + "=?utf8?B?MDEyMzQ1Njc4OTAxMjM0NTY3ODkwMTIzNDU2Nzg5MDEyMzQ1Njc4OTQ0NDQ4ODg4?=" + ] + } + + test_ecw_encode! { multiple_ecws, + data "012345678901234567890123456789012345678944448888NEWWORD" => [ + "=?utf8?B?MDEyMzQ1Njc4OTAxMjM0NTY3ODkwMTIzNDU2Nzg5MDEyMzQ1Njc4OTQ0NDQ4ODg4?=", + "=?utf8?B?TkVXV09SRA==?=" + ] + } + + test_ecw_encode! { ecw_end_in_multibyte_codepoint, + data "01234567890123456789012345678901234567894444888↓" => [ + "=?utf8?B?MDEyMzQ1Njc4OTAxMjM0NTY3ODkwMTIzNDU2Nzg5MDEyMzQ1Njc4OTQ0NDQ4ODg=?=", + "=?utf8?B?4oaT?=" + ] + } + + + #[test] + fn decode_encoded_word() { + assert_eq!( + assert_ok!(encoded_word_decode("dGhpc19jcmF6eV9lbmNvZGVkX3dvcmQ=")), + b"this_crazy_encoded_word" + ); + } +}
\ No newline at end of file diff --git a/internals/src/bind/encoded_word/impls.rs b/internals/src/bind/encoded_word/impls.rs new file mode 100644 index 0000000..f7051da --- /dev/null +++ b/internals/src/bind/encoded_word/impls.rs @@ -0,0 +1,98 @@ +use soft_ascii_string::{ SoftAsciiString, SoftAsciiChar, SoftAsciiStr }; + +use vec1::Vec1; +use grammar::encoded_word::{ MAX_ECW_LEN, ECW_SEP_OVERHEAD }; +use ::encoder::EncodingWriter; +use super::{ EncodedWordWriter, EncodedWordEncoding as Encoding }; + +pub struct VecWriter<'a> { + data: Vec1<SoftAsciiString >, + charset: &'a SoftAsciiStr, + encoding: Encoding +} + +impl<'a> VecWriter<'a> { + pub fn new(charset: &'a SoftAsciiStr, encoding: Encoding) -> Self { + let data = Vec1::new( SoftAsciiString::new() ); + VecWriter { data, charset, encoding } + } + + pub fn data( &self ) -> &[SoftAsciiString] { + &*self.data + } +} + +impl<'a> Into<Vec1<SoftAsciiString>> for VecWriter<'a> { + fn into(self) -> Vec1<SoftAsciiString> { + self.data + } +} + +impl<'a> EncodedWordWriter for VecWriter<'a> { + + fn encoding( &self ) -> Encoding { + self.encoding + } + + fn write_char( &mut self, ch: SoftAsciiChar ) { + self.data.last_mut().push( ch ); + } + + fn write_charset( &mut self ) { + self.data.last_mut().extend( self.charset.chars() ) + } + + fn write_ecw_seperator( &mut self ) { + self.data.push( SoftAsciiString::new() ) + } + + fn max_payload_len( &self ) -> usize { + MAX_ECW_LEN - ECW_SEP_OVERHEAD - self.charset.len() - 1 + } +} + +pub struct WriterWrapper<'a, 'b: 'a>{ + charset: &'a SoftAsciiStr, + encoding: Encoding, + encoder_handle: &'a mut EncodingWriter<'b> +} + +impl<'a, 'b: 'a> WriterWrapper<'a, 'b> { + pub fn new_with_charset(charset: &'a SoftAsciiStr, + encoding: Encoding, + encoder: &'a mut EncodingWriter<'b> ) -> Self + { + WriterWrapper { charset, encoding, encoder_handle: encoder } + } + + pub fn new(encoding: Encoding, + encoder: &'a mut EncodingWriter<'b> ) -> Self + { + Self::new_with_charset(SoftAsciiStr::from_unchecked("utf8"), encoding, encoder) + } +} + +impl<'a, 'b: 'a> EncodedWordWriter for WriterWrapper<'a, 'b> { + + fn encoding( &self ) -> Encoding { + self.encoding + } + + fn write_charset( &mut self ) { + //TODO fix + let _ = self.encoder_handle.write_str( self.charset ); + } + + fn write_ecw_seperator( &mut self ) { + self.encoder_handle.write_fws(); + } + + fn write_char( &mut self, ch: SoftAsciiChar ) { + //TODO fix + let _ = self.encoder_handle.write_char( ch ); + } + + fn max_payload_len( &self ) -> usize { + MAX_ECW_LEN - ECW_SEP_OVERHEAD - self.charset.len() - 1 + } +} diff --git a/internals/src/bind/encoded_word/mod.rs b/internals/src/bind/encoded_word/mod.rs new file mode 100644 index 0000000..ef61fc6 --- /dev/null +++ b/internals/src/bind/encoded_word/mod.rs @@ -0,0 +1,92 @@ +use soft_ascii_string::{ SoftAsciiStr, SoftAsciiChar }; + +use super::{base64, quoted_printable}; + +mod impls; +pub use self::impls::*; + +#[derive(Debug, Copy, Clone, Hash, Eq, PartialEq)] +pub enum EncodedWordEncoding { + Base64, QuotedPrintable +} + +impl EncodedWordEncoding { + + /// returns the acronym for the given encoding + /// used in a encoded word + pub fn acronym(&self) -> &'static SoftAsciiStr { + use self::EncodedWordEncoding::*; + match *self { + Base64 => SoftAsciiStr::from_unchecked("B"), + QuotedPrintable => SoftAsciiStr::from_unchecked("Q") + } + } + + /// encodes a given utf8 string + /// + /// either `self::quoted_printable::encoded_word_encode` + /// or `self::base64::encoded_word_encode_utf8` is used + /// depending on which value `self` is. + /// + /// As both algorithm need to know about code point boundaries + /// only encoding utf8 is supported for now + /// + pub fn encode<R, O>(&self, input: R, out: &mut O) + where R: AsRef<str>, O: EncodedWordWriter + { + use self::EncodedWordEncoding::*; + let input: &str = input.as_ref(); + match *self { + Base64 => { + base64::encoded_word_encode(input, out) + }, + QuotedPrintable => { + quoted_printable::encoded_word_encode_utf8(input, out) + } + } + } +} + +pub trait EncodedWordWriter { + fn write_char( &mut self, ch: SoftAsciiChar ); + fn write_charset( &mut self ); + fn encoding( &self ) -> EncodedWordEncoding; + fn write_ecw_seperator( &mut self ); + + /// Returns the maximal length of the paylod/encoded data + /// + /// Any number of calls to methods on in trait in any way + /// should never be able to change the returned value. + /// Only changing e.g. the charset or encoding should be + /// able to change what `max_paylod_len` returns. + fn max_payload_len( &self ) -> usize; + + fn write_ecw_start( &mut self ) { + let qm = SoftAsciiChar::from_unchecked('?'); + self.write_char(SoftAsciiChar::from_unchecked('=')); + self.write_char(qm); + self.write_charset(); + self.write_char(qm); + let acronym = self.encoding().acronym(); + self.write_str( acronym ); + self.write_char(qm); + } + + fn write_ecw_end( &mut self ) { + self.write_char( SoftAsciiChar::from_unchecked('?') ); + self.write_char( SoftAsciiChar::from_unchecked('=') ); + } + + + fn start_next_encoded_word( &mut self ) { + self.write_ecw_end(); + self.write_ecw_seperator(); + self.write_ecw_start(); + } + + fn write_str( &mut self, s: &SoftAsciiStr ) { + for ch in s.chars() { + self.write_char(ch) + } + } +}
\ No newline at end of file diff --git a/internals/src/bind/idna.rs b/internals/src/bind/idna.rs new file mode 100644 index 0000000..7b82d69 --- /dev/null +++ b/internals/src/bind/idna.rs @@ -0,0 +1,73 @@ +use soft_ascii_string::SoftAsciiString; +use idna; + +use ::error::{EncodingError, EncodingErrorKind}; + + +/// uses puny code on given domain to return a ascii representation +/// +/// # Implementation Detail +/// this function uses `idna::domain_to_ascii`, see the +/// `idna` crates documentation fore more details on how +/// exactly all edgecase are handled +/// +/// # Note +/// that this function does not validate the domain, e.g. +/// if you puny code the domain `"this seems\0so;wrong"` it +/// will return `Ok("this seems\0so;wrong")` +/// +pub fn puny_code_domain<R: AsRef<str>>(domain: R) + -> Result<SoftAsciiString, EncodingError> +{ + _puny_code_domain(domain.as_ref()) +} + +fn _puny_code_domain(domain: &str) + -> Result<SoftAsciiString, EncodingError> +{ + match idna::domain_to_ascii(domain) { + Ok(asciified) => { + //SAFE: well we converted it to ascii, so it's ascii + Ok(SoftAsciiString::from_unchecked(asciified)) + }, + Err(_non_informative_err) => { + Err(EncodingErrorKind::NotEncodable { encoding: "punycode" }.into()) + } + } +} + + +#[cfg(test)] +mod test { + use idna; + use super::puny_code_domain; + + #[test] + fn idna_does_not_validate() { + let domain = "this seems\0so;wrong"; + assert_eq!( + domain.to_owned(), + assert_ok!( idna::domain_to_ascii(domain) ) + ); + } + + #[test] + fn nop_puny_code() { + let domain = "is_ascii.notadomain"; + + let encoded = assert_ok!( puny_code_domain( domain ) ); + assert_eq!( + &*encoded, + "is_ascii.notadomain" + ); + } + #[test] + fn puny_code_ascii_mail() { + let domain = "nöt_ascii.ü"; + let encoded = assert_ok!( puny_code_domain(domain) ); + assert_eq!( + &*encoded, + "xn--nt_ascii-n4a.xn--tda" + ); + } +}
\ No newline at end of file diff --git a/internals/src/bind/mime.rs b/internals/src/bind/mime.rs new file mode 100644 index 0000000..afede94 --- /dev/null +++ b/internals/src/bind/mime.rs @@ -0,0 +1,56 @@ +use std::borrow::Cow; + +use soft_ascii_string::{ SoftAsciiStr, SoftAsciiString}; +use grammar::is_token_char; +use percent_encoding::{ + EncodeSet, + percent_encode +}; + +#[derive(Debug, Eq, PartialEq, Clone, Copy, Hash)] +struct MimeParamEncodingSet; +impl EncodeSet for MimeParamEncodingSet { + fn contains(&self, byte: u8) -> bool { + //if it is in the encoding set we need to encode it + //which we need to to if it is _not_ a token char + !is_token_char(byte as char) + } +} + + +/// percent encodes a byte sequence so that it can be used +/// in a RFC 2231 conform encoded mime header parameter +pub fn percent_encode_param_value<'a, R>(input: &'a R) -> Cow<'a, SoftAsciiStr> + where R: ?Sized+AsRef<[u8]> +{ + let cow: Cow<'a, str> = percent_encode(input.as_ref(), MimeParamEncodingSet).into(); + match cow { + Cow::Owned(o) => + //SAFE: MimeParamEncodingSet makes all non-us-ascii bytes encoded AND + // percent_encoding::percent_encode always only produces ascii anyway + Cow::Owned(SoftAsciiString::from_unchecked(o)), + Cow::Borrowed(b) => + Cow::Borrowed(SoftAsciiStr::from_unchecked(b)) + } +} + + +#[cfg(test)] +mod test { + use std::borrow::Cow; + use super::*; + + #[test] + fn encode_simple() { + let input = "this is tüxt"; + let res = percent_encode_param_value(input); + assert_eq!("this%20is%20t%C3%BCxt", res.as_str()); + } + + #[test] + fn no_encode_no_alloc() { + let input = "full_valid"; + let res = percent_encode_param_value(input); + assert_eq!(res, Cow::Borrowed(input)); + } +}
\ No newline at end of file diff --git a/internals/src/bind/mod.rs b/internals/src/bind/mod.rs new file mode 100644 index 0000000..6b475ee --- /dev/null +++ b/internals/src/bind/mod.rs @@ -0,0 +1,8 @@ +//! This module contains bindings to a number of external crates. + +pub mod encoded_word; +pub mod base64; +pub mod quoted_string; +pub mod quoted_printable; +pub mod idna; +pub mod mime; diff --git a/internals/src/bind/quoted_printable.rs b/internals/src/bind/quoted_printable.rs new file mode 100644 index 0000000..7ef6291 --- /dev/null +++ b/internals/src/bind/quoted_printable.rs @@ -0,0 +1,360 @@ +use soft_ascii_string::{ SoftAsciiChar, SoftAsciiString }; +use { quoted_printable as extern_quoted_printable }; + +use failure::Fail; +use ::error::{EncodingError, EncodingErrorKind}; +use super::encoded_word::EncodedWordWriter; + +/// a quoted printable encoding suitable for content transfer encoding, +/// but _not_ suited for the encoding in encoded words +pub fn normal_encode<A: AsRef<[u8]>>(data: A) -> SoftAsciiString { + let encoded = extern_quoted_printable::encode_to_str(data); + SoftAsciiString::from_unchecked(encoded) +} + +/// a quoted printable decoding suitable for content transfer encoding +#[inline] +pub fn normal_decode<R: AsRef<[u8]>>(input: R) + -> Result<Vec<u8>, EncodingError> +{ + //extern_quoted_printable h + extern_quoted_printable::decode( + input.as_ref(), extern_quoted_printable::ParseMode::Strict + ).map_err(|err| err + .context(EncodingErrorKind::Malformed) + .into() + ) +} + +/// a quoted printable decoding suitable for decoding a quoted printable +/// encpded text in encoded words +#[inline(always)] +pub fn encoded_word_decode<R: AsRef<[u8]>>( input: R ) -> Result<Vec<u8>, EncodingError> { + //we can just use the stadard decoding + normal_decode( input ) +} + +//FIXME we don't use EncodedWord context here, +// instead we use the most restructive context as a basis, +// making it compatilble with all context, but not nessesary +// the best solution... +/// Simple wrapper around ecoded_word_encode for utf8 strings only +pub fn encoded_word_encode_utf8<'a, O>(word: &str, writer: &mut O ) + where O: EncodedWordWriter +{ + let iter = word.char_indices().map( |(idx, ch)| { + &word.as_bytes()[idx..idx+ch.len_utf8()] + }); + encoded_word_encode(iter, writer ); +} + +/// +/// Quoted Printable encoding for Encoded Words in MIME-Headers +/// +/// Which means: +/// 1. there is a limit to the maximum number of characters +/// - the limit is 75 INCLUDING the `=?charset?encoding?...?=` overhead +/// - as such the line length limit of quoted printable can not be hit, +/// the quoted printable part is at most 67 chars long, e.g. for utf8 +/// it is at most 64 chars +/// 2. has to be one token, so no ' ','\t' and neither soft nor hard newlines +/// 3. no '?' character +/// +/// The input is a sequence of bytes split up in chunks where +/// a split in multipl encoded words can be done between any +/// two chunks but not in a chunk. Wrt. utf8 a chunk would +/// correspond to a character, e.g. `[65]` for `'a'` and +/// `[0xe2, 0x99, 0xa5]` for a `'♥'`. +/// +/// Note that a chunk can with more than 21 byte is not guranteed to +/// work, and can trigger a panic. +/// +/// As this has to be safe for usage in all header contexts, additional +/// to the chars required by the standard (i.e. '=') following chars are ALWAYS +/// quoted' ', '\t', '?', '(', ')'. Also '\n','\r' see the note below for more +/// details. +/// +/// +/// # Panics: +/// +/// 1. if the encoded size of a chunk is more than 16 byte, which can +/// happen if a chunk has more than 5 bytes. For comparison utf8 has +/// at most chunks with 4 bytes leading to at most 12 byte buffer usage. +/// +/// 2. if max size if >76 as no new line handling is implemented and +/// the max size for the use case can be at most 67 chars +/// +/// 3. if a single encoded chunk can not be written as one because of +/// the length limitation AFTER a new encoded word was started. +/// +/// # Note: +/// as it has to be a token no new line characters can appear in the output, +/// BUT q-encoding also forbids the encoding of CRLF line breaks in TEXT! +/// bodies, which is mean to not mess up with the limitations to the line +/// length, but they are allowed to appear in non TEXT data, but this +/// function should, but might not be limited to be used with text data, +/// which should but might not be limited to data not containing any new +/// line character. For now any appearance of '\r' or '\n' will be encoded +/// like any other "special" byte, for the future a context might be needed. +/// (Especially as encoded words can contain non-ascii text in which '\r','\n' +/// might be encoded with completely different bytes, but when the RFC speaks of +/// '\r','\n' it normally means the bytes 10/13 independent of the character set, +/// or if they appear in a image, zip-archiev etc. ) +pub fn encoded_word_encode<'a, I, O>(input: I, out: &mut O ) + where I: Iterator<Item=&'a [u8]>, O: EncodedWordWriter +{ + out.write_ecw_start(); + let max_payload_len = out.max_payload_len(); + let mut remaining = max_payload_len; + //WARN: on remaining being > 67 + let mut buf = [SoftAsciiChar::from_unchecked('X'); 16]; + + for chunk in input { + let mut buf_idx = 0; + + for byte in chunk { + let byte = *byte; + match byte { + // this is the way to go as long as we don't want to behave differently for + // different context, the COMMENT context allows more chars, and the + // TEXT context even more + b'!' | b'*' | + b'+' | b'-' | + b'/' | b'_' | + b'0'...b'9' | + b'A'...b'Z' | + b'a'...b'z' => { + buf[buf_idx] = SoftAsciiChar::from_unchecked(byte as char); + buf_idx += 1; + }, + _otherwise => { + buf[buf_idx] = SoftAsciiChar::from_unchecked('='); + buf[buf_idx+1] = lower_nibble_to_hex( byte >> 4 ); + buf[buf_idx+2] = lower_nibble_to_hex( byte ); + buf_idx += 3; + } + } + } + if buf_idx > remaining { + out.start_next_encoded_word(); + remaining = max_payload_len; + } + if buf_idx > remaining { + panic!( "single character longer then max length ({:?}) of encoded word", remaining ); + } + for idx in 0..buf_idx { + out.write_char( buf[idx] ) + } + remaining -= buf_idx; + } + out.write_ecw_end() +} + +#[inline] +fn lower_nibble_to_hex( half_byte: u8 ) -> SoftAsciiChar { + static CHARS: &[char] = &[ + '0', '1', '2', '3', '4', '5', + '6', '7', '8', '9', 'A', 'B', + 'C', 'D', 'E', 'F' + ]; + + SoftAsciiChar::from_unchecked(CHARS[ (half_byte & 0x0F) as usize ]) +} + + + + +#[cfg(test)] +mod test { + use soft_ascii_string::SoftAsciiStr; + use ::bind::encoded_word::EncodedWordEncoding; + use super::super::encoded_word::VecWriter; + use super::*; + + #[test] + fn to_hex() { + let data = &[ + ('0', 0b11110000), + ('0', 0b0 ), + ('7', 0b0111), + ('7', 0b10111), + ('F', 0b1111) + ]; + for &(ch, byte) in data { + assert_eq!( lower_nibble_to_hex( byte), ch ); + } + + } + + macro_rules! test_ecw_encode { + ($name:ident, data $data:expr => [$($item:expr),*]) => { + #[test] + fn $name() { + let test_data = $data; + let mut out = VecWriter::new( + SoftAsciiStr::from_unchecked("utf8"), + EncodedWordEncoding::QuotedPrintable + ); + + encoded_word_encode_utf8( test_data, &mut out ); + + let expected = &[ + $($item),* + ]; + let iter = expected.iter() + .zip( out.data().iter().map(|x|x.as_str()) ) + .enumerate(); + + for ( idx, (expected, got) ) in iter { + if *expected != got { + panic!( " item nr {}: {:?} != {:?} ", idx, expected, got ); + } + } + + let e_len = expected.len(); + let g_len = out.data().len(); + if e_len > g_len { |