diff options
author | Neal H. Walfield <neal@pep.foundation> | 2019-05-02 19:01:38 +0200 |
---|---|---|
committer | Neal H. Walfield <neal@pep.foundation> | 2019-05-03 10:43:24 +0200 |
commit | b42df6efe2f20cbfc32856cdc89f3ac738f568fd (patch) | |
tree | 662fe1816db0936ea59345afcb7cd48a2439ade0 | |
parent | f2a55bae4b262836baee2b6fcacf43473ab1875c (diff) |
openpgp: More efficiently identify valid ASCII-armor
- Don't decode base64 data that definitely can't possibly contain a
valid OpenPGP message.
-rw-r--r-- | openpgp/Cargo.toml | 1 | ||||
-rw-r--r-- | openpgp/src/armor.rs | 185 | ||||
-rw-r--r-- | openpgp/src/lib.rs | 2 |
3 files changed, 129 insertions, 59 deletions
diff --git a/openpgp/Cargo.toml b/openpgp/Cargo.toml index 88a4ded2..320c0628 100644 --- a/openpgp/Cargo.toml +++ b/openpgp/Cargo.toml @@ -27,6 +27,7 @@ bzip2 = { version = "0.3.2", optional = true } failure = "0.1.2" flate2 = { version = "1.0.1", optional = true } lalrpop-util = "0.16" +lazy_static = "1.3" memsec = "0.5.4" nettle = "5.0" quickcheck = "0.8" diff --git a/openpgp/src/armor.rs b/openpgp/src/armor.rs index 5b85619d..2a287f02 100644 --- a/openpgp/src/armor.rs +++ b/openpgp/src/armor.rs @@ -34,7 +34,10 @@ use std::cmp::min; use std::str; use quickcheck::{Arbitrary, Gen}; -use packet::Header; +use packet::prelude::*; +use packet::BodyLength; +use packet::ctb::{CTBNew, CTBOld}; +use serialize::SerializeInto; /// The encoded output stream must be represented in lines of no more /// than 76 characters each (see (see [RFC 4880, section @@ -120,6 +123,19 @@ impl Kind { format!("-----END PGP {}-----", self.blurb()) } + /// Returns the length of the header. + /// + /// This does not include any trailing newline. It is simply the + /// length of: + /// + /// ```norun + /// -----BEGIN PGP BLUB ----- + /// ``` + fn header_len(&self) -> usize { + "-----BEGIN PGP -----".len() + + self.blurb().len() + } + /// Returns the maximal size of the footer with CRC. fn footer_max_len(&self) -> usize { (5 // CRC @@ -504,79 +520,108 @@ impl<'a> Reader<'a> { fn initialize(&mut self) -> Result<()> { if self.initialized { return Ok(()) } + // The range of the first 6 bits of a message is limited. + // Save cpu cycles by only considering base64 data that starts + // with one of those characters. + lazy_static!{ + static ref START_CHARS : Vec<u8> = { + let mut valid_start = Vec::new(); + for &tag in [ Tag::PKESK, Tag::SKESK, + Tag::OnePassSig, Tag::Signature, + Tag::PublicKey, Tag::SecretKey, + Tag::CompressedData, Tag::Literal ].into_iter() { + let mut ctb = [ 0u8; 1 ]; + let mut o = [ 0u8; 4 ]; + + CTBNew::new(tag).serialize_into(&mut ctb[..]).unwrap(); + base64::encode_config_slice(&ctb[..], base64::MIME, &mut o[..]); + valid_start.push(o[0]); + + CTBOld::new(tag, BodyLength::Full(0)).unwrap() + .serialize_into(&mut ctb[..]).unwrap(); + base64::encode_config_slice(&ctb[..], base64::MIME, &mut o[..]); + valid_start.push(o[0]); + } + + // The standard start of an ASCII armor header e.g., + // + // -----BEGIN PGP MESSAGE----- + valid_start.push('-' as u8); + + valid_start.sort(); + valid_start.dedup(); + valid_start + }; + + } + // Look for the Armor Header Line, skipping any garbage in the // process. - let mut n = 0; let mut found_blob = false; - 'search: loop { - self.source.consume(n); + let start_chars = if self.strict { + &[b'-'][..] + } else { + &START_CHARS[..] + }; - let line = self.source.read_to('\n' as u8)?; - n = line.len(); - if n == 0 { - return Err( - Error::new(ErrorKind::InvalidInput, - "Reached EOF looking for Armor Header Line")); + let mut lines = 0; + let n = 'search: loop { + if lines > 0 { + // Find the start of the next line. + self.source.drop_through(&[b'\n'])?; } + lines += 1; - // If the user did not specify what kind of data we want, - // we aggressively try to decode any data, even if we do - // not see a valid header. - if ! self.strict { - // Try the whole string, as well as substrings - // starting at each whitespace sequence. - let mut offset = 0; - loop { - if is_armored_pgp_blob(&line[offset..]) { - // Consume anything up to this point. - n = offset; - found_blob = true; - break 'search; - } - - if let Some(o) = &line[offset..].iter() - .position(|c| c.is_ascii_whitespace()) - { - offset += *o; - - // Skip whitespaces. - while offset < line.len() - && line[offset].is_ascii_whitespace() - { - offset += 1; - } - } else { - // No armored blob found in this line. - break; - } - } + // Ignore leading whitespace, etc. + while self.source.data_hard(1)?[0].is_ascii_whitespace() { + self.source.consume(1); } - if line.len() < 27 { - // Line is too short to contain the shortest header. + // Don't bother if the first byte is not plausible. + if !start_chars.binary_search(&self.source.data_hard(1)?[0]).is_ok() + { + self.source.consume(1); continue; } - for i in 0..(line.len() - 27 + 1) { - if let Some(kind) = Kind::detect(&line[i..]) { - if self.kind == None { - // Found any! - self.kind = Some(kind); - break 'search; - } + { + let mut input = self.source.data(128)?; + let n = input.len(); - if self.kind == Some(kind) { - // Found it! - break 'search; - } + if n == 0 { + return Err( + Error::new(ErrorKind::InvalidInput, + "Reached EOF looking for Armor Header Line")); + } + if n > 128 { + input = &input[..128]; } - if ! line[i].is_ascii_whitespace() { - // Non-whitespace prefix. - continue 'search; + if input[0] == '-' as u8 { + // Possible ASCII-armor header. + if let Some(kind) = Kind::detect(&input) { + if self.kind == None { + // Found any! + self.kind = Some(kind); + break 'search kind.header_len(); + } + + if self.kind == Some(kind) { + // Found it! + break 'search kind.header_len(); + } + } + } else if ! self.strict { + // The user did not specify what kind of data she + // wants. We aggressively try to decode any data, + // even if we do not see a valid header. + if is_armored_pgp_blob(input) { + found_blob = true; + break 'search 0; + } } } - } + }; self.source.consume(n); if found_blob { @@ -585,13 +630,28 @@ impl<'a> Reader<'a> { return Ok(()); } - // Read the headers. + // We consumed the header above, but not any trailing + // whitespace and the trailing new line. We do that now. + // Other data between the header and the new line are not + // allowed. But, instead of failing, we try to recover, by + // stopping at the first non-whitespace character. + let n = { + let line = self.source.read_to('\n' as u8)?; + line.iter().position(|&c| { + !c.is_ascii_whitespace() + }).unwrap_or(line.len()) + }; + self.source.consume(n); + + // Read the key-value headers. let mut n = 0; + let mut lines = 0; loop { self.source.consume(n); let line = self.source.read_to('\n' as u8)?; n = line.len(); + lines += 1; let line = str::from_utf8(line); // Ignore---don't error out---lines that are not valid UTF8. @@ -622,6 +682,13 @@ impl<'a> Reader<'a> { if line.trim_start().len() == 0 { // Empty line. break; + } else if lines == 1 { + // This is the first line and we don't have a + // key-value pair. It seems more likely that + // we're just missing a newline and this invalid + // header is actually part of the body. + n = 0; + break; } } else { let key = key_value[0]; diff --git a/openpgp/src/lib.rs b/openpgp/src/lib.rs index f9bf609a..7fe62790 100644 --- a/openpgp/src/lib.rs +++ b/openpgp/src/lib.rs @@ -67,6 +67,8 @@ extern crate rand; extern crate time; extern crate sequoia_rfc2822 as rfc2822; + +#[macro_use] extern crate lazy_static; #[macro_use] mod macros; |