summaryrefslogtreecommitdiffstats
path: root/src/header.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/header.rs')
-rw-r--r--src/header.rs347
1 files changed, 347 insertions, 0 deletions
diff --git a/src/header.rs b/src/header.rs
new file mode 100644
index 0000000..3447156
--- /dev/null
+++ b/src/header.rs
@@ -0,0 +1,347 @@
+use charset::decode_latin1;
+use charset::Charset;
+
+use error::*;
+use util::is_boundary;
+use util::find_from;
+
+/// A struct that represents a single header in the message.
+/// It holds slices into the raw byte array passed to parse_mail, and so the
+/// lifetime of this struct must be contained within the lifetime of the raw
+/// input. There are additional accessor functions on this struct to extract
+/// the data as Rust strings.
+#[derive(Debug)]
+pub struct MailHeader<'a> {
+ key: &'a [u8],
+ value: &'a [u8],
+}
+
+impl<'a> MailHeader<'a> {
+ /// Get the name of the header. Note that header names are case-insensitive.
+ pub fn get_key(&self) -> Result<String, MailParseError> {
+ Ok(decode_latin1(self.key).into_owned())
+ }
+
+ fn decode_word(&self, encoded: &str) -> Option<String> {
+ let ix_delim1 = encoded.find('?')?;
+ let ix_delim2 = find_from(encoded, ix_delim1 + 1, "?")?;
+
+ let charset = &encoded[0..ix_delim1];
+ let transfer_coding = &encoded[ix_delim1 + 1..ix_delim2];
+ let input = &encoded[ix_delim2 + 1..];
+
+ let decoded = match transfer_coding {
+ "B" | "b" => base64::decode(input.as_bytes()).ok()?,
+ "Q" | "q" => {
+ // The quoted_printable module does a trim_end on the input, so if
+ // that affects the output we should save and restore the trailing
+ // whitespace
+ let to_decode = input.replace("_", " ");
+ let trimmed = to_decode.trim_end();
+ let mut d = quoted_printable::decode(&trimmed, quoted_printable::ParseMode::Robust);
+ if d.is_ok() && to_decode.len() != trimmed.len() {
+ d.as_mut()
+ .unwrap()
+ .extend_from_slice(to_decode[trimmed.len()..].as_bytes());
+ }
+ d.ok()?
+ }
+ _ => return None,
+ };
+ let charset = Charset::for_label_no_replacement(charset.as_bytes())?;
+ let (cow, _) = charset.decode_without_bom_handling(&decoded);
+ Some(cow.into_owned())
+ }
+
+ /// Get the value of the header. Any sequences of newlines characters followed
+ /// by whitespace are collapsed into a single space. In effect, header values
+ /// wrapped across multiple lines are compacted back into one line, while
+ /// discarding the extra whitespace required by the MIME format. Additionally,
+ /// any quoted-printable words in the value are decoded.
+ ///
+ /// # Examples
+ /// ```
+ /// use mailparse::parse_header;
+ /// let (parsed, _) = parse_header(b"Subject: =?iso-8859-1?Q?=A1Hola,_se=F1or!?=").unwrap();
+ /// assert_eq!(parsed.get_key().unwrap(), "Subject");
+ /// assert_eq!(parsed.get_value().unwrap(), "\u{a1}Hola, se\u{f1}or!");
+ /// ```
+ pub fn get_value(&self) -> Result<String, MailParseError> {
+ let mut result = String::new();
+ let chars = decode_latin1(self.value);
+ let mut lines = chars.lines();
+ let mut add_space = false;
+ while let Some(line) = lines.next().map(str::trim_start) {
+ if add_space {
+ result.push(' ');
+ }
+ add_space = true;
+
+ let mut ix_search = 0;
+ loop {
+ match find_from(line, ix_search, "=?") {
+ Some(v) => {
+ let ix_begin = v + 2;
+ if !is_boundary(line, ix_begin.checked_sub(3)) {
+ result.push_str(&line[ix_search..ix_begin]);
+ ix_search = ix_begin;
+ continue;
+ }
+ result.push_str(&line[ix_search..ix_begin - 2]);
+ let mut ix_end_search = ix_begin;
+ loop {
+ match find_from(line, ix_end_search, "?=") {
+ Some(ix_end) => {
+ if !is_boundary(line, ix_end.checked_add(2)) {
+ ix_end_search = ix_end + 2;
+ continue;
+ }
+ match self.decode_word(&line[ix_begin..ix_end]) {
+ Some(v) => {
+ result.push_str(&v);
+ add_space = false;
+ }
+ None => result.push_str(&line[ix_begin - 2..ix_end + 2]),
+ };
+ ix_search = ix_end;
+ }
+ None => {
+ result.push_str(&"=?");
+ ix_search = ix_begin - 2;
+ }
+ };
+ break;
+ }
+ ix_search += 2;
+ continue;
+ }
+ None => {
+ result.push_str(&line[ix_search..]);
+ break;
+ }
+ };
+ }
+ }
+ Ok(result)
+ }
+}
+
+#[derive(Debug)]
+enum HeaderParseState {
+ Initial,
+ Key,
+ PreValue,
+ Value,
+ ValueNewline,
+}
+
+/// Parse a single header from the raw data given.
+/// This function takes raw byte data, and starts parsing it, expecting there
+/// to be a MIME header key-value pair right at the beginning. It parses that
+/// header and returns it, along with the index at which the next header is
+/// expected to start. If you just want to parse a single header, you can ignore
+/// the second component of the tuple, which is the index of the next header.
+/// Error values are returned if the data could not be successfully interpreted
+/// as a MIME key-value pair.
+///
+/// # Examples
+/// ```
+/// use mailparse::parse_header;
+/// let (parsed, _) = parse_header(concat!(
+/// "Subject: Hello, sir,\n",
+/// " I am multiline\n",
+/// "Next:Header").as_bytes())
+/// .unwrap();
+/// assert_eq!(parsed.get_key().unwrap(), "Subject");
+/// assert_eq!(parsed.get_value().unwrap(), "Hello, sir, I am multiline");
+/// ```
+pub fn parse_header(raw_data: &[u8]) -> Result<(MailHeader, usize), MailParseError> {
+ let mut it = raw_data.iter();
+ let mut ix = 0;
+ let mut c = match it.next() {
+ None => return Err(MailParseError::Generic("Empty string provided")),
+ Some(v) => *v,
+ };
+
+ let mut ix_key_end = None;
+ let mut ix_value_start = 0;
+ let mut ix_value_end = 0;
+
+ let mut state = HeaderParseState::Initial;
+ loop {
+ match state {
+ HeaderParseState::Initial => {
+ if c == b' ' {
+ return Err(MailParseError::Generic(
+ "Header cannot start with a space; it is \
+ likely an overhanging line from a \
+ previous header",
+ ));
+ };
+ state = HeaderParseState::Key;
+ continue;
+ }
+ HeaderParseState::Key => {
+ if c == b':' {
+ ix_key_end = Some(ix);
+ state = HeaderParseState::PreValue;
+ } else if c == b'\n' {
+ return Err(MailParseError::Generic("Unexpected newline in header key"));
+ }
+ }
+ HeaderParseState::PreValue => {
+ if c != b' ' {
+ ix_value_start = ix;
+ ix_value_end = ix;
+ state = HeaderParseState::Value;
+ continue;
+ }
+ }
+ HeaderParseState::Value => {
+ if c == b'\n' {
+ state = HeaderParseState::ValueNewline;
+ } else {
+ ix_value_end = ix + 1;
+ }
+ }
+ HeaderParseState::ValueNewline => {
+ if c == b' ' || c == b'\t' {
+ state = HeaderParseState::Value;
+ continue;
+ } else {
+ break;
+ }
+ }
+ }
+ ix += 1;
+ c = match it.next() {
+ None => break,
+ Some(v) => *v,
+ };
+ }
+ match ix_key_end {
+ Some(v) => Ok((
+ MailHeader {
+ key: &raw_data[0..v],
+ value: &raw_data[ix_value_start..ix_value_end],
+ },
+ ix,
+ )),
+
+ None => Err(MailParseError::Generic(
+ "Unable to determine end of the header key component",
+ )),
+ }
+}
+
+/// A trait that is implemented by the [MailHeader] slice. These functions are
+/// also available on Vec<MailHeader> which is returned by the parse_headers
+/// function. It provides a map-like interface to look up header values by their
+/// name.
+pub trait MailHeaderMap {
+ /// Look through the list of headers and return the value of the first one
+ /// that matches the provided key. It returns Ok(None) if the no matching
+ /// header was found. Header names are matched case-insensitively.
+ ///
+ /// # Examples
+ /// ```
+ /// use mailparse::{parse_mail, MailHeaderMap};
+ /// let headers = parse_mail(concat!(
+ /// "Subject: Test\n",
+ /// "\n",
+ /// "This is a test message").as_bytes())
+ /// .unwrap().headers;
+ /// assert_eq!(headers.get_first_value("Subject").unwrap(), Some("Test".to_string()));
+ /// ```
+ fn get_first_value(&self, key: &str) -> Result<Option<String>, MailParseError>;
+
+ /// Look through the list of headers and return the values of all headers
+ /// matching the provided key. Returns an empty vector if no matching headers
+ /// were found. The order of the returned values is the same as the order
+ /// of the matching headers in the message. Header names are matched
+ /// case-insensitively.
+ ///
+ /// # Examples
+ /// ```
+ /// use mailparse::{parse_mail, MailHeaderMap};
+ /// let headers = parse_mail(concat!(
+ /// "Key: Value1\n",
+ /// "Key: Value2").as_bytes())
+ /// .unwrap().headers;
+ /// assert_eq!(headers.get_all_values("Key").unwrap(),
+ /// vec!["Value1".to_string(), "Value2".to_string()]);
+ /// ```
+ fn get_all_values(&self, key: &str) -> Result<Vec<String>, MailParseError>;
+}
+
+impl<'a> MailHeaderMap for [MailHeader<'a>] {
+ fn get_first_value(&self, key: &str) -> Result<Option<String>, MailParseError> {
+ for x in self {
+ if x.get_key()?.eq_ignore_ascii_case(key) {
+ return x.get_value().map(Some);
+ }
+ }
+ Ok(None)
+ }
+
+ fn get_all_values(&self, key: &str) -> Result<Vec<String>, MailParseError> {
+ let mut values: Vec<String> = Vec::new();
+ for x in self {
+ if x.get_key()?.eq_ignore_ascii_case(key) {
+ values.push(x.get_value()?);
+ }
+ }
+ Ok(values)
+ }
+}
+
+/// Parses all the headers from the raw data given.
+/// This function takes raw byte data, and starts parsing it, expecting there
+/// to be zero or more MIME header key-value pair right at the beginning,
+/// followed by two consecutive newlines (i.e. a blank line). It parses those
+/// headers and returns them in a vector. The normal vector functions can be
+/// used to access the headers linearly, or the MailHeaderMap trait can be used
+/// to access them in a map-like fashion. Along with this vector, the function
+/// returns the index at which the message body is expected to start. If you
+/// just care about the headers, you can ignore the second component of the
+/// returned tuple.
+/// Error values are returned if there was some sort of parsing error.
+///
+/// # Examples
+/// ```
+/// use mailparse::{parse_headers, MailHeaderMap};
+/// let (headers, _) = parse_headers(concat!(
+/// "Subject: Test\n",
+/// "From: me@myself.com\n",
+/// "To: you@yourself.com").as_bytes())
+/// .unwrap();
+/// assert_eq!(headers[1].get_key().unwrap(), "From");
+/// assert_eq!(headers.get_first_value("To").unwrap(), Some("you@yourself.com".to_string()));
+/// ```
+pub fn parse_headers(raw_data: &[u8]) -> Result<(Vec<MailHeader>, usize), MailParseError> {
+ let mut headers: Vec<MailHeader> = Vec::new();
+ let mut ix = 0;
+ loop {
+ if ix >= raw_data.len() {
+ break;
+ } else if raw_data[ix] == b'\n' {
+ ix += 1;
+ break;
+ } else if raw_data[ix] == b'\r' {
+ if ix + 1 < raw_data.len() && raw_data[ix + 1] == b'\n' {
+ ix += 2;
+ break;
+ } else {
+ return Err(MailParseError::Generic(
+ "Headers were followed by an unexpected lone \
+ CR character!",
+ ));
+ }
+ }
+ let (header, ix_next) = parse_header(&raw_data[ix..])?;
+ headers.push(header);
+ ix += ix_next;
+ }
+ Ok((headers, ix))
+}
+