WIP: Implement Decoder based on source code of Encoderdecoder

author: Matthias Beyer <mail@beyermatthias.de> 2019-12-26 13:05:08 +0100
committer: Matthias Beyer <mail@beyermatthias.de> 2019-12-26 13:05:08 +0100
commit: 54645cdc4b3706f9d9c95a3a2c82b126862fc839 (patch)
tree: c8b15b0b2bee4ad171b78136cbcff527c4df63f4
parent: 71ea078d4eb5d857680a69b01f7427ea43e2f5a2 (diff)
1 files changed, 1679 insertions, 0 deletions
diff --git a/internals/src/decoder/mod.rs b/internals/src/decoder/mod.rs
new file mode 100644
index 0000000..d2474f4
--- /dev/null
+++ b/internals/src/decoder/mod.rs
@@ -0,0 +1,1679 @@
+//! This module provides the Decoder.
+
+use std::borrow::Cow;
+use std::str;
+
+use failure::Fail;
+use soft_ascii_string::{SoftAsciiStr, SoftAsciiChar};
+
+use grammar::is_atext;
+use ::utils::{
+    is_utf8_continuation_byte,
+    vec_insert_bytes
+};
+use ::MailType;
+use ::error::{
+    DecodingError, DecodingErrorKind,
+    UNKNOWN, UTF_8, US_ASCII
+};
+
+#[cfg_attr(test, macro_use)]
+mod decodable;
+
+pub use self::decodable::*;
+
+/// as specified in RFC 5322 not including CRLF
+pub const LINE_LEN_SOFT_LIMIT: usize = 78;
+/// as specified in RFC 5322 (mail) + RFC 5321 (smtp) not including CRLF
+pub const LINE_LEN_HARD_LIMIT: usize = 998;
+
+pub const NEWLINE: &str = "\r\n";
+pub const NEWLINE_WITH_SPACE: &str = "\r\n ";
+
+/// DecodingBuffer for a Mail providing a buffer for decodable traits.
+pub struct DecodingBuffer {
+    mail_type: MailType,
+    buffer: Vec<u8>,
+}
+
+impl DecodingBuffer {
+
+    /// Create a new buffer only allowing input compatible with a the specified mail type.
+    pub fn new(mail_type: MailType) -> Self {
+        DecodingBuffer { mail_type, buffer: Vec::new() }
+    }
+
+    /// Returns the mail type for which the buffer was created.
+    pub fn mail_type( &self ) -> MailType {
+        self.mail_type
+    }
+
+    /// returns a new DecodingWriter which contains
+    /// a mutable reference to the current string buffer
+    ///
+    pub fn writer(&mut self) -> DecodingWriter {
+        DecodingWriter::new(self.mail_type, &mut self.buffer)
+    }
+
+    /// calls the provided function with a DecodingWriter cleaning up afterwards
+    ///
+    /// After calling `func` with the DecodingWriter following cleanup is performed:
+    /// - if `func` returned an error `handle.undo_header()` is called, this won't
+    ///   undo anything before a `finish_header()` call but will discard partial
+    ///   writes
+    /// - if `func` succeeded `handle.finish_header()` is called
+    pub fn write_header_line<FN>(&mut self, func: FN) -> Result<(), DecodingError>
+        where FN: FnOnce(&mut DecodingWriter) -> Result<(), DecodingError>
+    {
+        let mut handle  = self.writer();
+        match func(&mut handle) {
+            Ok(()) => {
+                handle.finish_header();
+                Ok(())
+            },
+            Err(e) => {
+                handle.undo_header();
+                Err(e)
+            }
+        }
+
+    }
+
+    pub fn write_blank_line(&mut self) {
+        //TODO/BENCH push_str vs. extends(&[u8])
+        self.buffer.extend(NEWLINE.as_bytes());
+    }
+
+    /// writes a body to the internal buffer, without verifying it's correctness
+    pub fn write_body_unchecked(&mut self, body: &impl AsRef<[u8]>) {
+        let slice = body.as_ref();
+        self.buffer.extend(slice);
+        if !slice.ends_with(NEWLINE.as_bytes()) {
+            self.buffer.extend(NEWLINE.as_bytes());
+        }
+    }
+
+    /// # Error
+    ///
+    /// This can fail if a body does not contain valid utf8.
+    pub fn as_str(&self) -> Result<&str, DecodingError> {
+        str::from_utf8(self.buffer.as_slice())
+            .map_err(|err| {
+                DecodingError::from((
+                    err.context(DecodingErrorKind::InvalidTextDecoding {
+                        expected_encoding: UTF_8,
+                        got_encoding: UNKNOWN
+                    }),
+                    self.mail_type()
+                ))
+            })
+    }
+
+    /// Converts the internal buffer into an utf-8 string if possible.
+    pub fn to_string(&self) -> Result<String, DecodingError> {
+        Ok(self.as_str()?.to_owned())
+    }
+
+    /// Lossy conversion of the internal buffer to an string.
+    pub fn to_string_lossy(&self) -> Cow<str> {
+        String::from_utf8_lossy(self.buffer.as_slice())
+    }
+
+    /// Return a slice view to the underlying buffer.
+    pub fn as_slice(&self) -> &[u8] {
+        &self.buffer
+    }
+
+}
+
+
+impl Into<Vec<u8>> for DecodingBuffer {
+    fn into(self) -> Vec<u8> {
+        self.buffer
+    }
+}
+
+impl Into<(MailType, Vec<u8>)> for DecodingBuffer {
+    fn into(self) -> (MailType, Vec<u8>) {
+        (self.mail_type, self.buffer)
+    }
+}
+
+/// A handle providing method to write to the underlying buffer
+/// keeping track of newlines the current line length and places
+/// where the line can be broken so that the soft line length
+/// limit (78) and the hard length limit (998) can be kept.
+///
+/// It's basically a string buffer which know how to brake
+/// lines at the right place.
+///
+/// Note any act of writing a header through `DecodingWriter`
+/// has to be concluded by either calling `finish_header` or `undo_header`.
+/// If not this handle will panic in _test_ builds when being dropped
+/// (and the thread is not already panicing) as writes through the handle are directly
+/// writes to the underlying buffer which now contains malformed/incomplete
+/// data. (Note that this Handle does not own any Drop types so if
+/// needed `forget`-ing it won't leak any memory)
+///
+///
+pub struct DecodingReader<'a> {
+    buffer: &'a mut Vec<u8>,
+    #[cfg(feature="traceing")]
+    trace: &'a mut Vec<TraceToken>,
+    mail_type: MailType,
+    line_start_idx: usize,
+    last_fws_idx: usize,
+    skipped_cr: bool,
+    /// if there had ben non WS chars since the last FWS
+    /// or last line start, if there had been a line
+    /// start since the last fws.
+    content_since_fws: bool,
+    /// represents if there had ben non WS chars before the last FWS
+    /// on the current line (false if there was no FWS yet on the current
+    /// line).
+    content_before_fws: bool,
+    /// represents if if a FWS was just marked (opt-FWS) or was written out
+    last_fws_has_char: bool,
+    header_start_idx: usize,
+    #[cfg(feature="traceing")]
+    trace_start_idx: usize
+}
+
+impl<'inner> DecodingReader<'inner> {
+
+    fn new(mail_type: MailType, buffer: &'inner mut Vec<u8>) -> Self {
+        let start_idx = buffer.len();
+        DecodingWriter {
+            buffer,
+            mail_type,
+            line_start_idx: start_idx,
+            last_fws_idx: start_idx,
+            skipped_cr: false,
+            content_since_fws: false,
+            content_before_fws: false,
+            header_start_idx: start_idx,
+            last_fws_has_char: false,
+        }
+    }
+
+    fn reinit(&mut self) {
+        let start_idx = self.buffer.len();
+        self.line_start_idx     = start_idx;
+        self.last_fws_idx       = start_idx;
+        self.skipped_cr         = false;
+        self.content_since_fws  = false;
+        self.content_before_fws = false;
+        self.header_start_idx   = start_idx;
+    }
+
+    /// Returns true if this type thinks we are in the process of writing a header.
+    pub fn has_unfinished_parts(&self) -> bool {
+        self.buffer.len() != self.header_start_idx
+    }
+
+    /// Returns the associated mail type.
+    pub fn mail_type(&self) -> MailType {
+        self.mail_type
+    }
+
+    /// Returns true if the current line has content, i.e. any non WS char.
+    pub fn line_has_content(&self) -> bool {
+        self.content_before_fws | self.content_since_fws
+    }
+
+    /// Returns the length of the current line in bytes.
+    pub fn current_line_byte_length(&self) -> usize {
+        self.buffer.len() - self.line_start_idx
+    }
+
+    /// marks the current position a a place where a soft
+    /// line break (i.e. "\r\n ") can be inserted
+    ///
+    /// # Trace (test build only)
+    /// does push a `MarkFWS` Token
+    pub fn mark_fws_pos(&mut self) {
+        self.content_before_fws |= self.content_since_fws;
+        self.content_since_fws   = false;
+        self.last_fws_idx        = self.buffer.len();
+        self.last_fws_has_char   = false;
+    }
+
+    /// writes a ascii char to the underlying buffer
+    ///
+    /// # Error
+    /// - fails if the hard line length limit is breached and the
+    ///   line can not be broken with soft line breaks
+    /// - buffer would contain a orphan '\r' or '\n' after the write
+    ///
+    /// # Trace (test build only)
+    /// does push `NowChar` and then can push `Text`,`CRLF`
+    pub fn write_char(&mut self, ch: SoftAsciiChar) -> Result<(), DecodingError>  {
+        let mut buffer = [0xff_u8; 4];
+        let ch: char   = ch.into();
+        let slice      = ch.encode_utf8(&mut buffer);
+        self.internal_write_char(slice)
+    }
+
+    /// writes a ascii str to the underlying buffer
+    ///
+    /// # Error
+    /// - fails if the hard line length limit is breached and the
+    ///   line can not be broken with soft line breaks
+    /// - buffer would contain a orphan '\r' or '\n' after the write
+    ///
+    /// Note that in case of an error part of the content might already
+    /// have been written to the buffer, therefore it is recommended
+    /// to call `undo_header` after an error (especially if the
+    /// handle is doped after this!)
+    ///
+    /// # Trace (test build only)
+    /// does push `NowStr` and then can push `Text`,`CRLF`
+    ///
+    pub fn write_str(&mut self, s: &SoftAsciiStr)  -> Result<(), DecodingError>  {
+        self.internal_write_str(s.as_str())
+    }
+
+
+    /// writes a utf8 str into a buffer for an internationalized mail
+    ///
+    /// # Error (ConditionalWriteResult)
+    /// - fails with `ConditionFailure` if the underlying MailType
+    ///    is not Internationalized
+    /// - fails with `GeneralFailure` if the hard line length limit is reached
+    /// - or if the buffer would contain a orphan '\r' or '\n' after the write
+    ///
+    /// Note that in case of an error part of the content might already
+    /// have been written to the buffer, therefore it is recommended
+    /// to call `undo_header` after an error (especially if the
+    /// handle is droped after this!)
+    ///
+    /// # Trace (test build only)
+    /// does push `NowUtf8` and then can push `Text`,`CRLF`
+    pub fn write_if_utf8<'short>(&'short mut self, s: &str)
+        -> ConditionalWriteResult<'short, 'inner>
+    {
+        if self.mail_type().is_internationalized() {
+            self.internal_write_str(s).into()
+        } else {
+            ConditionalWriteResult::ConditionFailure(self)
+        }
+    }
+
+    pub fn write_utf8(&mut self, s: &str) -> Result<(), DecodingError> {
+        if self.mail_type().is_internationalized() {
+            self.internal_write_str(s)
+        } else {
+            let mut err = DecodingError::from((
+                DecodingErrorKind::InvalidTextDecoding {
+                    expected_encoding: US_ASCII,
+                    got_encoding: UTF_8
+                },
+                self.mail_type()
+            ));
+            let raw_line = &self.buffer[self.line_start_idx..];
+            let mut line = String::from_utf8_lossy(raw_line).into_owned();
+            line.push_str(s);
+            err.set_str_context(line);
+            Err(err)
+        }
+    }
+
+    /// Writes a str assumed to be atext if it is atext given the mail type
+    ///
+    /// This method is mainly an optimization as the "is atext" and is
+    /// "is ascii if MailType is Ascii" aspects are checked at the same
+    /// time resulting in a str which you know is ascii _if_ the mail
+    /// type is Ascii and which might be non-us-ascii if the mail type
+    /// is Internationalized.
+    ///
+    /// # Error (ConditionalWriteResult)
+    /// - fails with `ConditionFailure` if the text is not valid atext,
+    ///   this indirectly also includes the utf8/Internationalization check
+    ///   as the `atext` grammar differs between normal and internationalized
+    ///   mail.
+    /// - fails with `GeneralFailure` if the hard line length limit is reached and
+    ///   the line can't be broken with soft line breaks
+    /// - or if buffer would contain a orphan '\r' or '\n' after the write
+    ///   (excluding a tailing `'\r'` as it is still valid if followed by an
+    ///    `'\n'`)
+    ///
+    /// Note that in case of an error part of the content might already
+    /// have been written to the buffer, therefore it is recommended
+    /// to call `undo_header` after an error (especially if the
+    /// handle is doped after this!)
+    ///
+    /// # Trace (test build only)
+    /// does push `NowAText` and then can push `Text`
+    ///
+    pub fn write_if_atext<'short>(&'short mut self, s: &str)
+        -> ConditionalWriteResult<'short, 'inner>
+    {
+        if s.chars().all( |ch| is_atext( ch, self.mail_type() ) ) {
+            #[cfg(feature="traceing")]
+            { self.trace.push(TraceToken::NowAText) }
+            // the ascii or not aspect is already converted by `is_atext`
+            self.internal_write_str(s).into()
+        } else {
+            ConditionalWriteResult::ConditionFailure(self)
+        }
+    }
+
+    /// passes the input `s` to the condition evaluation function `cond` and
+    /// then writes it _without additional checks_ to the buffer if `cond` returned
+    /// true
+    ///
+    pub fn write_if<'short, FN>(&'short mut self, s: &str, cond: FN)
+        -> ConditionalWriteResult<'short, 'inner>
+        where FN: FnOnce(&str) -> bool
+    {
+        if cond(s) {
+            #[cfg(feature="traceing")]
+            { self.trace.push(TraceToken::NowCondText) }
+            // the ascii or not aspect is already converted by `is_atext`
+            self.internal_write_str(s).into()
+        } else {
+            ConditionalWriteResult::ConditionFailure(self)
+        }
+    }
+
+    /// writes a string to the encoder without checking if it is compatible
+    /// with the mail type, if not used correctly this can write Utf8 to
+    /// an Ascii Mail, which is incorrect but has to be safe wrt. rust's safety.
+    ///
+    /// Use it as a replacement for cases similar to following:
+    ///
+    /// ```ignore
+    /// check_if_text_if_valid(text)?;
+    /// if mail_type.is_internationalized() {
+    ///     handle.write_utf8(text)?;
+    /// } else {
+    ///     handle.write_str(SoftAsciiStr::from_unchecked(text))?;
+    /// }
+    /// ```
+    ///
+    /// ==> instead ==>
+    ///
+    /// ```ignore
+    /// check_if_text_if_valid(text)?;
+    /// handle.write_str_unchecked(text)?;
+    /// ```
+    ///
+    /// through is gives a different tracing its roughly equivalent.
+    ///
+    pub fn write_str_unchecked( &mut self, s: &str) -> Result<(), DecodingError> {
+        #[cfg(feature="traceing")]
+        { self.trace.push(TraceToken::NowUnchecked) }
+        self.internal_write_str(s)
+    }
+
+    /// like finish_header, but won't start a new line
+    ///
+    /// This is meant to be used when _miss-using_ the
+    /// writer to write a "think", which is not a full
+    /// header. E.g. for testing if a header component
+    /// is written correctly. So you _normally_ should
+    /// not use it.
+    pub fn commit_partial_header(&mut self) {
+        #[cfg(feature="traceing")]
+        { if let Some(&TraceToken::End) = self.trace.last() {}
+            else { self.trace.push(TraceToken::End) } }
+        self.reinit();
+    }
+
+    /// finishes the writing of a header
+    ///
+    /// It makes sure the header ends in "\r\n".
+    /// If the header ends in a orphan '\r' this
+    /// method will just "use" it for the "\r\n".
+    ///
+    /// If the header ends in a CRLF/start of buffer
+    /// followed by only WS (' ' or '\t' ) the valid
+    /// header ending is reached by truncating away
+    /// the WS padding. This is needed as "blank" lines
+    /// are not allowed.
+    ///
+    /// # Trace (test build only)
+    /// - can push 0-1 of `[CRLF, TruncateToCRLF]`
+    /// - then does push `End`
+    /// - calling `finish_current()` multiple times in a row
+    ///   will not generate multiple `End` tokens, just one
+    pub fn finish_header(&mut self) {
+        self.start_new_line();
+        #[cfg(feature="traceing")]
+        { if let Some(&TraceToken::End) = self.trace.last() {}
+            else { self.trace.push(TraceToken::End) } }
+        self.reinit();
+    }
+
+    /// undoes all writes to the internal buffer
+    /// since the last `finish_header` or `undo_header` or
+    /// creation of this handle
+    ///
+    /// # Trace (test build only)
+    /// also removes tokens pushed since the last
+    /// `finish_header` or `undo_header` or creation of
+    /// this handle
+    ///
+    pub fn undo_header(&mut self) {
+        self.buffer.truncate(self.header_start_idx);
+        #[cfg(feature="traceing")]
+        { self.trace.truncate(self.trace_start_idx); }
+        self.reinit();
+    }
+
+
+
+    //---------------------------------------------------------------------------------------------/
+    //-/////////////////////////// methods only using the public iface   /////////////////////////-/
+
+    /// calls mark_fws_pos and then writes a space
+    ///
+    /// This method exists for convenience.
+    ///
+    /// Note that it can not fail a you just pushed
+    /// a place to brake the line before writing a space.
+    ///
+    /// Note that currently soft line breaks will not
+    /// collapse whitespace. As such if you use `write_fws`
+    /// and then the line is broken at that position it will
+    /// start with two spaces (one from `\r\n ` and one which
+    /// had been there before).
+    pub fn write_fws(&mut self) {
+        self.mark_fws_pos();
+        self.last_fws_has_char = true;
+        // OK: Can not error as we just marked a fws pos.
+        let _ = self.write_char(SoftAsciiChar::from_unchecked(' '));
+    }
+
+
+
+    //---------------------------------------------------------------------------------------------/
+    //-///////////////////////////          private methods               ////////////////////////-/
+
+    /// this might partial write some data and then fail.
+    /// while we could implement a undo option it makes
+    /// little sense for the use case the generally available
+    /// `undo_header` is enough.
+    fn internal_write_str(&mut self, s: &str)  -> Result<(), DecodingError>  {
+        if s.is_empty() {
+            return Ok(());
+        }
+        //TODO I think I wrote a iterator for this somewhere
+        let mut start = 0;
+        // the first byte is never a continuation byte so we start
+        // scanning at the second byte
+        for (idx_m1, bch) in s.as_bytes()[1..].iter().enumerate() {
+            if !is_utf8_continuation_byte(*bch) {
+                // the idx is 1 smaller then it should so add 1
+                let end = idx_m1 + 1;
+                self.internal_write_char(&s[start..end])?;
+                start = end;
+            }
+        }
+
+        //write last letter
+        self.internal_write_char(&s[start..])?;
+        Ok(())
+    }
+
+    /// if the line has at last one non-WS char a new line
+    /// will be started by adding `\r\n` if the current line
+    /// only consists of WS then a new line will be started by
+    /// removing the blank line (not that WS are only ' ' and '\r')
+    fn start_new_line(&mut self) {
+        if self.line_has_content() {
+            #[cfg(feature="traceing")]
+            { self.trace.push(TraceToken::CRLF) }
+
+            self.buffer.push(b'\r');
+            self.buffer.push(b'\n');
+        } else {
+            #[cfg(feature="traceing")]
+            {
+                if self.buffer.len() > self.line_start_idx {
+                    self.trace.push(TraceToken::TruncateToCRLF);
+                }
+            }
+            // e.g. if we "broke" the line on a tailing space => "\r\n  "
+            // this would not be valid so we cut awy the trailing white space
+            // be if we have "ab  " we do not want to cut away the trailing
+            // whitespace but just add "\r\n"
+            self.buffer.truncate(self.line_start_idx);
+        }
+        self.line_start_idx = self.buffer.len();
+        self.content_since_fws = false;
+        self.content_before_fws = false;
+        self.last_fws_idx = self.line_start_idx;
+
+    }
+
+    fn break_line_on_fws(&mut self) -> bool {
+        if self.content_before_fws && self.last_fws_idx > self.line_start_idx {
+            let newline =
+                if self.last_fws_has_char {
+                    debug_assert!([b' ', b'\t'].contains(&self.buffer[self.last_fws_idx]));
+                    NEWLINE
+                } else {
+                    NEWLINE_WITH_SPACE
+                };
+
+            vec_insert_bytes(&mut self.buffer, self.last_fws_idx, newline.as_bytes());
+            self.line_start_idx = self.last_fws_idx + 2;
+            self.content_before_fws = false;
+            true
+        } else {
+            false
+        }
+    }
+
+    /// # Constraints
+    ///
+    /// `unchecked_utf8_char` is expected to be exactly
+    /// one char, which means it's 1-4 bytes in length.
+    ///
+    /// The reason why a slice is expected instead of a
+    /// char is, that this function will at some point push
+    /// to a byte buffer requiring a `&[u8]` and many function
+    /// calling this function can directly produce a &[u8]/&str.
+    ///
+    /// # Panic
+    ///
+    /// Panics if `unchecked_utf8_char` is empty.
+    /// If debug assertions are enabled it also panics, if
+    /// unchecked_utf8_char is more than just one char.
+    fn internal_write_char(&mut self, unchecked_utf8_char: &str) -> Result<(), DecodingError> {
+        debug_assert_eq!(unchecked_utf8_char.chars().count(), 1);
+
+        let bch = unchecked_utf8_char.as_bytes()[0];
+        if bch == b'\n' {
+            if self.skipped_cr {
+                self.start_new_line()
+            } else {
+                ec_bail!(
+                    mail_type: self.mail_type(),
+                    kind: Malformed
+                );
+            }
+            self.skipped_cr = false;
+            return Ok(());
+        } else {
+            if self.skipped_cr {
+                ec_bail!(
+                    mail_type: self.mail_type(),
+                    kind: Malformed
+                );
+            }
+            if bch == b'\r' {
+                self.skipped_cr = true;
+                return Ok(());
+            } else {
+                self.skipped_cr = false;
+            }
+        }
+
+        if self.current_line_byte_length() >= LINE_LEN_SOFT_LIMIT {
+            self.break_line_on_fws();
+
+            if self.current_line_byte_length() >= LINE_LEN_HARD_LIMIT {
+                ec_bail!(
+                    mail_type: self.mail_type(),
+                    kind: HardLineLengthLimitBreached
+                );
+            }
+        }
+
+        self.buffer.extend(unchecked_utf8_char.as_bytes());
+        #[cfg(feature="traceing")]
+        {
+            //FIXME[rust/nll]: just use a `if let`-`else` with NLL's
+            let need_new =
+                if let Some(&mut TraceToken::Text(ref mut string)) = self.trace.last_mut() {
+                    string.push_str(unchecked_utf8_char);
+                    false
+                } else {
+                    true
+                };
+            if need_new {
+                let mut string = String::new();
+                string.push_str(unchecked_utf8_char);
+                self.trace.push(TraceToken::Text(string))
+            }
+
+        }
+
+        // we can't allow "blank" lines
+        if bch != b' ' && bch != b'\t' {
+            // if there is no fws this is equiv to line_has_content
+            // else line_has_content = self.content_before_fws|self.content_since_fws
+            self.content_since_fws = true;
+        }
+        Ok(())
+    }
+}
+
+pub enum ConditionalWriteResult<'a, 'b: 'a> {
+    Ok,
+    ConditionFailure(&'a mut DecodingWriter<'b>),
+    GeneralFailure(DecodingError)
+}
+
+impl<'a, 'b: 'a> From<Result<(), DecodingError>> for ConditionalWriteResult<'a, 'b> {
+    fn from(v: Result<(), DecodingError>) -> Self {
+        match v {
+            Ok(()) => ConditionalWriteResult::Ok,
+            Err(e) => ConditionalWriteResult::GeneralFailure(e)
+        }
+    }
+}
+
+impl<'a, 'b: 'a> ConditionalWriteResult<'a, 'b> {
+
+    #[inline]
+    pub fn handle_condition_failure<FN>(self, func: FN) -> Result<(), DecodingError>
+        where FN: FnOnce(&mut DecodingWriter) -> Result<(), DecodingError>
+    {
+        use self::ConditionalWriteResult as CWR;
+
+        match self {
+            CWR::Ok => Ok(()),
+            CWR::ConditionFailure(handle) => {
+                func(handle)
+            },
+            CWR::GeneralFailure(err) => Err(err)
+        }
+    }
+}
+
+
+
+
+
+#[cfg(test)]
+mod test {
+
+    use soft_ascii_string::{ SoftAsciiChar, SoftAsciiStr};
+    use ::MailType;
+    use ::error::DecodingErrorKind;
+
+    use super::TraceToken::*;
+    use super::{DecodingBuffer as _Decoder};
+
+    mod test_test_utilities {
+        use encoder::TraceToken::*;
+        use super::super::simplify_trace_tokens;
+
+        #[test]
+        fn does_simplify_tokens_strip_nows() {
+            let inp = vec![
+                NowChar,
+                Text("h".into()),
+                CRLF,
+                NowStr,
+                Text("y yo".into()),
+                CRLF,
+                NowUtf8,
+                Text(", what's".into()),
+                CRLF,
+                NowUnchecked,
+                Text("up!".into()),
+                CRLF,
+                NowAText,
+                Text("abc".into())
+            ];
+            let out = simplify_trace_tokens(inp);
+            assert_eq!(out, vec![
+                Text("h".into()),
+                CRLF,
+                Text("y yo".into()),
+                CRLF,
+                Text(", what's".into()),
+                CRLF,
+                Text("up!".into()),
+                CRLF,
+                Text("abc".into())
+            ])
+
+        }
+
+        #[test]
+        fn simplify_does_collapse_text() {
+            let inp = vec![
+                NowChar,
+                Text("h".into()),
+                NowStr,
+                Text("y yo".into()),
+                NowUtf8,
+                Text(", what's".into()),
+                NowUnchecked,
+                Text(" up! ".into()),
+                NowAText,
+                Text("abc".into())
+            ];
+            let out = simplify_trace_tokens(inp);
+            assert_eq!(out, vec![
+                Text("hy yo, what's up! abc".into())
+            ]);
+        }
+
+        #[test]
+        fn simplify_works_with_empty_text() {
+            let inp = vec![
+                NowStr,
+                Text("".into()),
+                CRLF,
+            ];
+            assert_eq!(simplify_trace_tokens(inp), vec![
+                Text("".into()),
+                CRLF
+            ])
+        }
+
+        #[test]
+        fn simplify_works_with_trailing_empty_text() {
+            let inp = vec![
+                Text("a".into()),
+                CRLF,
+                Text("".into()),
+            ];
+            assert_eq!(simplify_trace_tokens(inp), vec![
+                Text("a".into()),
+                CRLF,
+                Text("".into())
+            ])
+        }
+
+    }
+
+    mod DecodableInHeader {
+        #![allow(non_snake_case)]
+        use super::super::*;
+        use self::TraceToken::*;
+
+        #[test]
+        fn is_implemented_for_closures() {
+            let closure = enc_func!(|handle: &mut DecodingWriter| {
+                handle.write_utf8("hy ho")
+            });
+
+            let mut encoder = DecodingBuffer::new(MailType::Internationalized);
+            {
+                let mut handle = encoder.writer();
+                assert_ok!(closure.encode(&mut handle));
+                handle.finish_header();
+            }
+            assert_eq!(encoder.trace.as_slice(), &[
+                NowUtf8,
+                Text("hy ho".into()),
+                CRLF,
+                End
+            ])
+        }
+    }
+
+
+    mod DecodingBuffer {
+        #![allow(non_snake_case)]
+        use super::*;
+        use super::{ _Decoder as DecodingBuffer };
+
+        #[test]
+        fn new_encoder() {
+            let encoder = DecodingBuffer::new(MailType::Internationalized);
+            assert_eq!(encoder.mail_type(), MailType::Internationalized);
+        }
+
+        #[test]
+        fn write_body_unchecked() {
+            let mut encoder = DecodingBuffer::new(MailType::Ascii);
+            let body1 = "una body\r\n";
+            let body2 = "another body";
+
+            encoder.write_body_unchecked(&body1);
+            encoder.write_blank_line();
+            encoder.write_body_unchecked(&body2);
+
+            assert_eq!(
+                encoder.as_slice(),
+                concat!(
+                    "una body\r\n",
+                    "\r\n",
+                    "another body\r\n"
+                ).as_bytes()
+            )
+        }
+    }
+
+
+    mod DecodingWriter {
+        #![allow(non_snake_case)]
+        use std::mem;
+        use std::str;
+
+        use super::*;
+        use super::{ _Decoder as DecodingBuffer };
+
+        #[test]
+        fn commit_partial_and_drop_does_not_panic() {
+            let mut encoder = DecodingBuffer::new(MailType::Ascii);
+            {
+                let mut handle = encoder.writer();
+                assert_ok!(handle.write_str(SoftAsciiStr::from_unchecked("12")));
+                handle.commit_partial_header();
+            }
+            assert_eq!(encoder.as_slice(), b"12");
+        }
+
+        #[test]
+        fn undo_does_undo() {
+            let mut encoder = DecodingBuffer::new(MailType::Ascii);
+            {
+                let mut handle = encoder.writer();
+                assert_ok!(
+                    handle.write_str(SoftAsciiStr::from_unchecked("Header-One: 12")));
+                handle.undo_header();
+            }
+            assert_eq!(encoder.as_slice(), b"");
+        }
+
+        #[test]
+        fn undo_does_not_undo_to_much() {
+            let mut encoder = DecodingBuffer::new(MailType::Ascii);
+            {
+                let mut handle = encoder.writer();
+                assert_ok!(handle.write_str(SoftAsciiStr::from_str("Header-One: 12").unwrap()));
+                handle.finish_header();
+                assert_ok!(handle.write_str(SoftAsciiStr::from_str("ups: sa").unwrap()));
+                handle.undo_header();
+            }
+            assert_eq!(encoder.as_slice(), b"Header-One: 12\r\n");
+        }
+
+        #[test]
+        fn finish_adds_crlf_if_needed() {
+            let mut encoder = DecodingBuffer::new(MailType::Ascii);
+            {
+                let mut handle = encoder.writer();
+                assert_ok!(handle.write_str(SoftAsciiStr::from_str("Header-One: 12").unwrap()));
+                handle.finish_header();
+            }
+            assert_eq!(encoder.as_slice(), b"Header-One: 12\r\n");
+        }
+
+        #[test]
+        fn finish_does_not_add_crlf_if_not_needed() {
+            let mut encoder = DecodingBuffer::new(MailType::Ascii);
+            {
+                let mut handle = encoder.writer();
+                assert_ok!(handle.write_str(SoftAsciiStr::from_str("Header-One: 12\r\n").unwrap()));
+                handle.finish_header();
+            }
+            assert_eq!(encoder.as_slice(), b"Header-One: 12\r\n");
+        }
+
+        #[test]
+        fn finish_does_truncat_if_needed() {
+            let mut encoder = DecodingBuffer::new(MailType::Ascii);
+            {
+                let mut handle = encoder.writer();
+                assert_ok!(handle.write_str(SoftAsciiStr::from_str("Header-One: 12\r\n   ").unwrap()));
+                handle.finish_header();
+            }
+            assert_eq!(encoder.as_slice(), b"Header-One: 12\r\n");
+        }
+
+
+        #[test]
+        fn finish_can_handle_fws() {
+            let mut encoder = DecodingBuffer::new(MailType::Ascii);
+            {
+                let mut handle = encoder.writer();
+                assert_ok!(handle.write_str(SoftAsciiStr::from_str("Header-One: 12 +\r\n 4").unwrap()));
+                handle.finish_header();
+            }
+            assert_eq!(encoder.as_slice(), b"Header-One: 12 +\r\n 4\r\n");
+        }
+
+        #[test]
+        fn finish_only_truncats_if_needed() {
+            let mut encoder = DecodingBuffer::new(MailType::Ascii);
+            {
+                let mut handle = encoder.writer();
+                assert_ok!(handle.write_str(
+                    SoftAsciiStr::from_str("Header-One: 12 +\r\n 4  ").unwrap()));
+                handle.finish_header();
+            }
+            assert_eq!(encoder.as_slice(), b"Header-One: 12 +\r\n 4  \r\n");
+        }
+
+
+        #[test]
+        fn orphan_lf_error() {
+            let mut encoder = DecodingBuffer::new(MailType::Ascii);
+            {
+                let mut handle = encoder.writer();
+                assert_err!(handle.write_str(SoftAsciiStr::from_str("H: \na").unwrap()));
+                handle.undo_header()
+            }
+        }
+        #[test]
+        fn orphan_cr_error() {
+            let mut encoder = DecodingBuffer::new(MailType::Ascii);
+            {
+                let mut handle = encoder.writer();
+                assert_err!(handle.write_str(SoftAsciiStr::from_str("H: \ra").unwrap()));
+                handle.undo_header()
+            }
+        }
+
+        #[test]
+        fn orphan_trailing_lf() {
+            let mut encoder = DecodingBuffer::new(MailType::Ascii);
+            {
+                let mut handle = encoder.writer();
+                assert_err!(handle.write_str(SoftAsciiStr::from_str("H: a\n").unwrap()));
+                handle.undo_header();
+            }
+        }
+
+        #[test]
+        fn orphan_trailing_cr() {
+            let mut encoder = DecodingBuffer::new(MailType::Ascii);
+            {
+                let mut handle = encoder.writer();
+                assert_ok!(handle.write_str(SoftAsciiStr::from_str("H: a\r").unwrap()));
+                //it's fine not to error in the trailing \r case as we want to write
+                //a \r\n anyway
+                handle.finish_header();
+            }
+            assert_eq!(encoder.as_slice(), b"H: a\r\n");
+        }
+
+         #[test]
+        fn soft_line_limit_can_be_breached() {
+            let mut encoder = DecodingBuffer::new(MailType::Ascii);
+            {
+                let mut handle = encoder.writer();
+                for _ in 0u32..500 {
+                    assert_ok!(handle.internal_write_char("a"));
+                }
+                handle.finish_header();
+            }
+        }
+
+        #[test]
+        fn hard_line_limit_can_not_be_breached() {
+            let mut encoder = DecodingBuffer::new(MailType::Ascii);
+            {
+                let mut handle = encoder.writer();
+                for _ in 0u32..998 {
+                    assert_ok!(handle.internal_write_char("a"));
+                }
+
+                assert_err!(handle.internal_write_char("b"));
+                handle.finish_header();
+            }
+        }
+
+        #[test]
+        fn break_line_on_fws() {
+            let mut encoder = DecodingBuffer::new(MailType::Ascii);
+            {
+                let mut handle = encoder.writer();
+                assert_ok!(handle.write_str(SoftAsciiStr::from_str("A23456789:").unwrap()));
+                handle.mark_fws_pos();
+                assert_ok!(handle.write_str(SoftAsciiStr::from_str(concat!(
+                    "20_3456789",
+                    "30_3456789",
+                    "40_3456789",
+                    "50_3456789",
+                    "60_3456789",
+                    "70_3456789",
author	Matthias Beyer <mail@beyermatthias.de>	2019-12-26 13:05:08 +0100
committer	Matthias Beyer <mail@beyermatthias.de>	2019-12-26 13:05:08 +0100
commit	54645cdc4b3706f9d9c95a3a2c82b126862fc839 (patch)
tree	c8b15b0b2bee4ad171b78136cbcff527c4df63f4
parent	71ea078d4eb5d857680a69b01f7427ea43e2f5a2 (diff)