From 74426fb02b4dcb74d6ef555f2dec98f518e4be29 Mon Sep 17 00:00:00 2001 From: Kartikaya Gupta Date: Fri, 27 May 2016 16:28:27 -0400 Subject: Make the header parsing code more hideous but more correct --- src/lib.rs | 127 ++++++++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 114 insertions(+), 13 deletions(-) (limited to 'src') diff --git a/src/lib.rs b/src/lib.rs index 09710f3..e9031a4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,6 +1,6 @@ #[derive(Debug)] pub struct MailHeader<'a> { - name: &'a str, + key: &'a str, value: &'a str, } @@ -10,17 +10,94 @@ pub struct MailParseError { position: usize, } +enum HeaderParseState { + Initial, + Key, + PreValue, + Value, + ValueNewline, +} + pub fn parse_header(raw_data: &str) -> Result { - let ix = raw_data.find(':').ok_or(MailParseError { - description: "No ':' found in header".to_string(), - position: raw_data.len(), - }); - return ix.map(|ix| { - MailHeader { - name: &raw_data[0..ix], - value: &raw_data[ix + 1..], + let mut it = raw_data.chars(); + let mut ix = 0; + let mut c = match it.next() { + None => return Err(MailParseError { + description: "Empty string provided".to_string(), + position: 0, + }), + Some(v) => v, + }; + + let mut ix_key_end = None; + let mut ix_value_start = 0; + let mut ix_value_end = 0; + + let mut state = HeaderParseState::Initial; + loop { + match state { + HeaderParseState::Initial => { + if c == ' ' { + return Err(MailParseError { + description: "Header cannot start with a space; it is likely an overhanging line from a previous header".to_string(), + position: ix, + }); + }; + state = HeaderParseState::Key; + continue; + }, + HeaderParseState::Key => { + if c == ':' { + ix_key_end = Some(ix); + state = HeaderParseState::PreValue; + } else if c == '\n' { + return Err(MailParseError { + description: "Unexpected newline in header key".to_string(), + position: ix, + }); + } + } + HeaderParseState::PreValue => { + if c != ' ' { + ix_value_start = ix; + ix_value_end = ix; + state = HeaderParseState::Value; + continue; + } + } + HeaderParseState::Value => { + if c == '\n' { + state = HeaderParseState::ValueNewline; + } else { + ix_value_end = ix + 1; + } + } + HeaderParseState::ValueNewline => { + if c == ' ' { + state = HeaderParseState::Value; + continue; + } else { + break; + } + } } - }); + c = match it.next() { + None => break, + Some(v) => v, + }; + ix = ix + 1; + } + match ix_key_end { + Some(v) => Ok(MailHeader { + key: &raw_data[0..v], + value: &raw_data[ix_value_start..ix_value_end], + }), + + None => Err(MailParseError { + description: "Unable to determine end of the header key component".to_string(), + position: ix, + }), + } } #[cfg(test)] @@ -29,8 +106,32 @@ mod tests { #[test] fn parse_basic_header() { - let parsed = parse_header("Key: Value").expect(""); - assert_eq!(parsed.name, "Key"); - assert_eq!(parsed.value, " Value"); + let mut parsed = parse_header("Key: Value").expect(""); + assert_eq!(parsed.key, "Key"); + assert_eq!(parsed.value, "Value"); + + parsed = parse_header("Key : Value ").expect(""); + assert_eq!(parsed.key, "Key "); + assert_eq!(parsed.value, "Value "); + + parsed = parse_header("Key:").expect(""); + assert_eq!(parsed.key, "Key"); + assert_eq!(parsed.value, ""); + + parsed = parse_header(":\n").expect(""); + assert_eq!(parsed.key, ""); + assert_eq!(parsed.value, ""); + + parsed = parse_header("Key:Multi-line\n value").expect(""); + assert_eq!(parsed.key, "Key"); + assert_eq!(parsed.value, "Multi-line\n value"); + + parsed = parse_header("Key: Multi\n line\n value\n").expect(""); + assert_eq!(parsed.key, "Key"); + assert_eq!(parsed.value, "Multi\n line\n value"); + + parsed = parse_header("Key: One\nKey2: Two").expect(""); + assert_eq!(parsed.key, "Key"); + assert_eq!(parsed.value, "One"); } } -- cgit v1.2.3