Add max line length to `LinesCodec` (#590)

* codec: add new constructor `with_max_length ` to `LinesCodec` * codec: add security note to docs Signed-off-by: Eliza Weisman <eliza@buoyant.io> * Fix Rust 1.25 compatibility * codec: Fix incorrect line lengths in tests (and add assertions) Signed-off-by: Eliza Weisman <eliza@buoyant.io> * codec: Fix off-by-one error in lines codec Signed-off-by: Eliza Weisman <eliza@buoyant.io> * codec: Fix call to decode rather than decode_eof in test Signed-off-by: Eliza Weisman <eliza@buoyant.io> * codec: Fix incorrect LinesCodec::decode_max_line_length This bug was introduced after the fix for the off-by-one error. Fortunately, the doctests caught it. Signed-off-by: Eliza Weisman <eliza@buoyant.io> * codec: Minor style improvements Signed-off-by: Eliza Weisman <eliza@buoyant.io> * codec: Don't allow LinesCodec length limit to be set after construction Signed-off-by: Eliza Weisman <eliza@buoyant.io> * codec: change LinesCodec to error and discard line when at max length * codec: Fix build on Rust 1.25 The slice patterns syntax wasn't supported yet in that release. Signed-off-by: Eliza Weisman <eliza@buoyant.io> * codec: Add test for out-of-bounds index when peeking Signed-off-by: Eliza Weisman <eliza@buoyant.io> * codec: Fix out of bounds index * codec: Fix incomplete comment Signed-off-by: Eliza Weisman <eliza@buoyant.io> * codec: Add test for line decoder buffer underrun
author: Eliza Weisman <eliza@buoyant.io> 2018-09-12 10:02:57 -0700
committer: Toby Lawrence <tobz@users.noreply.github.com> 2018-09-12 13:02:57 -0400
commit: 4ae6c997ee2706bf5d10164926e9a29941aab58f (patch)
tree: 233951a9b3b24bffc59a8d2517754af6d9a8051d /tokio-codec
parent: 0f44adf5f696ac73dcdcb1fb2b91b87553c57443 (diff)
2 files changed, 178 insertions, 8 deletions
diff --git a/tokio-codec/src/lines_codec.rs b/tokio-codec/src/lines_codec.rs
index bf4135b8..57556c68 100644
--- a/tokio-codec/src/lines_codec.rs
+++ b/tokio-codec/src/lines_codec.rs
@@ -1,6 +1,6 @@
 use bytes::{BufMut, BytesMut};
 use tokio_io::_tokio_codec::{Encoder, Decoder};
-use std::{io, str};
+use std::{error, fmt, io, str};
 
 /// A simple `Codec` implementation that splits up data into lines.
 #[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
@@ -12,12 +12,76 @@ pub struct LinesCodec {
     // The next time `decode` is called with `abcde\n`, the method will
     // only look at `de\n` before returning.
     next_index: usize,
+
+    /// The maximum length for a given line. If `None`, lines will be read
+    /// until a `\n` character is reached.
+    max_length: Option<usize>,
+
+    /// Are we currently discarding the remainder of a line which was over
+    /// the length limit?
+    is_discarding: bool,
+}
+
+/// Error indicating that the maximum line length was reached.
+#[derive(Debug)]
+pub struct LengthError {
+    limit: usize,
 }
 
 impl LinesCodec {
     /// Returns a `LinesCodec` for splitting up data into lines.
+    ///
+    /// # Note
+    ///
+    /// The returned `LinesCodec` will not have an upper bound on the length
+    /// of a buffered line. See the documentation for [`with_max_length`]
+    /// for information on why this could be a potential security risk.
+    ///
+    /// [`with_max_length`]: #method.with_max_length
     pub fn new() -> LinesCodec {
-        LinesCodec { next_index: 0 }
+        LinesCodec {
+            next_index: 0,
+            max_length: None,
+            is_discarding: false,
+        }
+    }
+
+    /// Returns a `LinesCodec` with a maximum line length limit.
+    ///
+    /// If this is set, lines will be ended when a `\n` character is read, _or_
+    /// when they reach the provided number of bytes. Otherwise, lines will
+    /// only be ended when a `\n` character is read.
+    ///
+    /// # Note
+    ///
+    /// Setting a length limit is highly recommended for any `LinesCodec` which
+    /// will be exposed to untrusted input. Otherwise, the size of the buffer
+    /// that holds the line currently being read is unbounded. An attacker could
+    /// exploit this unbounded buffer by sending an unbounded amount of input
+    /// without any `\n` characters, causing unbounded memory consumption.
+    pub fn with_max_length(limit: usize) -> Self {
+        LinesCodec {
+            max_length: Some(limit - 1),
+            ..LinesCodec::new()
+        }
+    }
+
+    /// Returns the current maximum line length when decoding, if one is set.
+    ///
+    /// ```
+    /// use tokio_codec::LinesCodec;
+    ///
+    /// let codec = LinesCodec::new();
+    /// assert_eq!(codec.decode_max_length(), None);
+    /// ```
+    /// ```
+    /// use tokio_codec::LinesCodec;
+    ///
+    /// let codec = LinesCodec::with_max_length(256);
+    /// assert_eq!(codec.decode_max_length(), Some(256));
+    /// ```
+    pub fn decode_max_length(&self) -> Option<usize> {
+        self.max_length.map(|len| len + 1)
     }
 }
 
@@ -41,15 +105,52 @@ impl Decoder for LinesCodec {
     type Error = io::Error;
 
     fn decode(&mut self, buf: &mut BytesMut) -> Result<Option<String>, io::Error> {
-        if let Some(newline_offset) =
-            buf[self.next_index..].iter().position(|b| *b == b'\n')
-        {
-            let newline_index = newline_offset + self.next_index;
+        let mut trim_and_offset = None;
+        for (offset, b) in buf[self.next_index..].iter().enumerate() {
+            trim_and_offset = match (b, self.max_length) {
+                // The current character is a newline, split here.
+                (&b'\n', _) => Some((1, offset)),
+                // There's a maximum line length set, and we've reached it.
+                (_, Some(max_len))
+                    if offset.saturating_sub(self.next_index) == max_len => {
+                    // If we're at the line length limit, check if the next
+                    // character(s) is a newline before we decide to return an
+                    // error.
+                    match (buf.get(offset + 1), buf.get(offset + 2)) {
+                        (Some(&b'\n'), _) => Some((1, offset + 1)),
+                        (Some(&b'\r'), Some(&b'\n')) => Some((2, offset + 2)),
+                        _ => {
+                            // We've reached the length limit, and we're not at
+                            // the end of a line. Subsequent calls to decode
+                            // will now discard from the buffer until we reach
+                            // a new line.
+                            self.is_discarding = true;
+                            self.next_index += offset;
+                            return Err(io::Error::new(
+                                io::ErrorKind::Other,
+                                LengthError { limit: max_len + 1 },
+                            ));
+                        }
+                    }
+                },
+                // The current character isn't a newline, and we aren't at the
+                // length limit, so keep going.
+                _ => continue,
+            };
+            break;
+        };
+        if let Some((trim_amt, offset)) = trim_and_offset {
+            let newline_index = offset + self.next_index;
             let line = buf.split_to(newline_index + 1);
-            let line = &line[..line.len()-1];
+            self.next_index = 0;
+            if self.is_discarding {
+                self.is_discarding = false;
+                return self.decode(buf);
+            }
+            let line = &line[..line.len() - trim_amt];
             let line = without_carriage_return(line);
             let line = utf8(line)?;
-            self.next_index = 0;
+
             Ok(Some(line.to_string()))
         } else {
             self.next_index = buf.len();
@@ -87,3 +188,15 @@ impl Encoder for LinesCodec {
         Ok(())
     }
 }
+
+impl fmt::Display for LengthError {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "reached maximum line length ({} characters)", self.limit)
+    }
+}
+
+impl error::Error for LengthError {
+    fn description(&self) -> &str {
+        "reached maximum line length"
+    }
+}
diff --git a/tokio-codec/tests/codecs.rs b/tokio-codec/tests/codecs.rs
index 925d7ef0..33953233 100644
--- a/tokio-codec/tests/codecs.rs
+++ b/tokio-codec/tests/codecs.rs
@@ -56,6 +56,63 @@ fn lines_decoder() {
 }
 
 #[test]
+fn lines_decoder_max_length() {
+    const MAX_LENGTH: usize = 6;
+
+    let mut codec = LinesCodec::with_max_length(MAX_LENGTH);
+    let buf = &mut BytesMut::new();
+
+    buf.reserve(200);
+    buf.put("line 1 is too long\nline 2\r\nline 3\n\r\n\r");
+
+    assert!(codec.decode(buf).is_err());
+    assert!(codec.decode(buf).is_err());
+
+    let line = codec.decode(buf).unwrap().unwrap();
+    assert!(line.len() <= MAX_LENGTH, "{:?}.len() <= {:?}", line, MAX_LENGTH);
+    assert_eq!("line 2", line);
+
+    let line = codec.decode(buf).unwrap().unwrap();
+    assert!(line.len() <= MAX_LENGTH, "{:?}.len() <= {:?}", line, MAX_LENGTH);
+    assert_eq!("line 3", line);
+
+    let line = codec.decode(buf).unwrap().unwrap();
+    assert!(line.len() <= MAX_LENGTH, "{:?}.len() <= {:?}", line, MAX_LENGTH);
+    assert_eq!("", line);
+
+    assert_eq!(None, codec.decode(buf).unwrap());
+    assert_eq!(None, codec.decode_eof(buf).unwrap());
+    buf.put("k");
+    assert_eq!(None, codec.decode(buf).unwrap());
+
+    let line = codec.decode_eof(buf).unwrap().unwrap();
+    assert!(line.len() <= MAX_LENGTH, "{:?}.len() <= {:?}", line, MAX_LENGTH);
+    assert_eq!("\rk", line);
+
+    assert_eq!(None, codec.decode(buf).unwrap());
+    assert_eq!(None, codec.decode_eof(buf).unwrap());
+
+    // Line that's one character too long. This could cause an out of bounds
+    // error if we peek at the next characters using slice indexing.
+    buf.put("aaabbbc");
+    assert!(codec.decode(buf).is_err());
+}
+#[test]
+fn lines_decoder_max_length_underrun() {
+    const MAX_LENGTH: usize = 6;
+
+    let mut codec = LinesCodec::with_max_length(MAX_LENGTH);
+    let buf = &mut BytesMut::new();
+    buf.put("line ");
+    assert_eq!(None, codec.decode(buf).unwrap());
+
+    buf.put("too l");
+    assert_eq!(None, codec.decode(buf).unwrap());
+    buf.put("ong\n");
+    assert_eq!("line too long", codec.decode(buf).unwrap().unwrap());
+}
+
+#[test]
 fn lines_encoder() {
     let mut codec = LinesCodec::new();
     let mut buf = BytesMut::new();
author	Eliza Weisman <eliza@buoyant.io>	2018-09-12 10:02:57 -0700
committer	Toby Lawrence <tobz@users.noreply.github.com>	2018-09-12 13:02:57 -0400
commit	4ae6c997ee2706bf5d10164926e9a29941aab58f (patch)
tree	233951a9b3b24bffc59a8d2517754af6d9a8051d /tokio-codec
parent	0f44adf5f696ac73dcdcb1fb2b91b87553c57443 (diff)