openpgp: Move base64 utilities to their own module.

author: Justus Winter <justus@sequoia-pgp.org> 2021-02-12 13:22:55 +0100
committer: Justus Winter <justus@sequoia-pgp.org> 2021-02-12 17:06:12 +0100
commit: a62b53c42d0b909521b77ca24e540d4b84bc87d6 (patch)
tree: 98cacacf3dafe3b4cad539d71387709d89521ce8
parent: 53cd6d1f3558eeea08cf0236be88704cd1b118cb (diff)
2 files changed, 208 insertions, 199 deletions
diff --git a/openpgp/src/armor.rs b/openpgp/src/armor.rs
index 8f7f69ab..0409162d 100644
--- a/openpgp/src/armor.rs
+++ b/openpgp/src/armor.rs
@@ -41,12 +41,14 @@ use std::borrow::Cow;
 #[cfg(test)]
 use quickcheck::{Arbitrary, Gen};
 
-use crate::vec_truncate;
 use crate::packet::prelude::*;
 use crate::packet::header::{BodyLength, CTBNew, CTBOld};
 use crate::parse::Cookie;
 use crate::serialize::MarshalInto;
 
+mod base64_utils;
+use base64_utils::*;
+
 /// The encoded output stream must be represented in lines of no more
 /// than 76 characters each (see (see [RFC 4880, section
 /// 6.3](https://tools.ietf.org/html/rfc4880#section-6.3).  GnuPG uses
@@ -994,204 +996,6 @@ fn common_prefix<A: AsRef<[u8]>, B: AsRef<[u8]>>(a: A, b: B) -> usize {
     a.as_ref().iter().zip(b.as_ref().iter()).take_while(|(a, b)| a == b).count()
 }
 
-/// Remove whitespace, etc. from the base64 data.
-///
-/// This function returns the filtered base64 data (i.e., stripped of
-/// all skipable data like whitespace), and the amount of unfiltered
-/// data that corresponds to.  Thus, if we have the following 7 bytes:
-///
-/// ```text
-///     ab  cde
-///     0123456
-/// ```
-///
-/// This function returns ("abcd", 6), because the 'd' is the last
-/// character in the last complete base64 chunk, and it is at offset 5.
-///
-/// If 'd' is followed by whitespace, it is undefined whether that
-/// whitespace is included in the count.
-///
-/// This function only returns full chunks of base64 data.  As a
-/// consequence, if base64_data_max is less than 4, then this will not
-/// return any data.
-///
-/// This function will stop after it sees base64 padding, and if it
-/// sees invalid base64 data.
-fn base64_filter(mut bytes: Cow<[u8]>, base64_data_max: usize,
-                 mut prefix_remaining: usize, prefix_len: usize)
-    -> (Cow<[u8]>, usize, usize)
-{
-    let mut leading_whitespace = 0;
-
-    // Round down to the nearest chunk size.
-    let base64_data_max = base64_data_max / 4 * 4;
-
-    // Number of bytes of base64 data.  Since we update `bytes` in
-    // place, the base64 data is `&bytes[..base64_len]`.
-    let mut base64_len = 0;
-
-    // Offset of the next byte of unfiltered data to process.
-    let mut unfiltered_offset = 0;
-
-    // Offset of the last byte of the last ***complete*** base64 chunk
-    // in the unfiltered data.
-    let mut unfiltered_complete_len = 0;
-
-    // Number of bytes of padding that we've seen so far.
-    let mut padding = 0;
-
-    while unfiltered_offset < bytes.len()
-        && base64_len < base64_data_max
-        // A valid base64 chunk never starts with padding.
-        && ! (padding > 0 && base64_len % 4 == 0)
-    {
-        // If we have some prefix to skip, skip it.
-        if prefix_remaining > 0 {
-            prefix_remaining -= 1;
-            if unfiltered_offset == 0 {
-                match bytes {
-                    Cow::Borrowed(s) => {
-                        // We're at the beginning.  Avoid moving
-                        // data by cutting off the start of the
-                        // slice.
-                        bytes = Cow::Borrowed(&s[1..]);
-                        leading_whitespace += 1;
-                        continue;
-                    }
-                    Cow::Owned(_) => (),
-                }
-            }
-            unfiltered_offset += 1;
-            continue;
-        }
-        match bytes[unfiltered_offset] {
-            // White space.
-            c if c.is_ascii_whitespace() => {
-                if c == b'\n' {
-                    prefix_remaining = prefix_len;
-                }
-                if unfiltered_offset == 0 {
-                    match bytes {
-                        Cow::Borrowed(s) => {
-                            // We're at the beginning.  Avoid moving
-                            // data by cutting off the start of the
-                            // slice.
-                            bytes = Cow::Borrowed(&s[1..]);
-                            leading_whitespace += 1;
-                            continue;
-                        }
-                        Cow::Owned(_) => (),
-                    }
-                }
-            }
-
-            // Padding.
-            b'=' => {
-                if padding == 2 {
-                    // There can never be more than two bytes of
-                    // padding.
-                    break;
-                }
-                if base64_len % 4 == 0 {
-                    // Padding can never occur at the start of a
-                    // base64 chunk.
-                    break;
-                }
-
-                if unfiltered_offset != base64_len {
-                    bytes.to_mut()[base64_len] = b'=';
-                }
-                base64_len += 1;
-                if base64_len % 4 == 0 {
-                    unfiltered_complete_len = unfiltered_offset + 1;
-                }
-                padding += 1;
-            }
-
-            // The only thing that can occur after padding is
-            // whitespace or padding.  Those cases were covered above.
-            _ if padding > 0 => break,
-
-            // Base64 data!
-            b if is_base64_char(&b) => {
-                if unfiltered_offset != base64_len {
-                    bytes.to_mut()[base64_len] = b;
-                }
-                base64_len += 1;
-                if base64_len % 4 == 0 {
-                    unfiltered_complete_len = unfiltered_offset + 1;
-                }
-            }
-
-            // Not base64 data.
-            _ => break,
-        }
-
-        unfiltered_offset += 1;
-    }
-
-    let base64_len = base64_len - (base64_len % 4);
-    unfiltered_complete_len += leading_whitespace;
-    match bytes {
-        Cow::Borrowed(s) =>
-            (Cow::Borrowed(&s[..base64_len]), unfiltered_complete_len,
-             prefix_remaining),
-        Cow::Owned(mut v) => {
-            vec_truncate(&mut v, base64_len);
-            (Cow::Owned(v), unfiltered_complete_len, prefix_remaining)
-        }
-    }
-}
-
-/// Checks whether the given bytes contain armored OpenPGP data.
-fn is_armored_pgp_blob(bytes: &[u8]) -> bool {
-    // Get up to 32 bytes of base64 data.  That's 24 bytes of data
-    // (ignoring padding), which is more than enough to get the first
-    // packet's header.
-    let (bytes, _, _) = base64_filter(Cow::Borrowed(bytes), 32, 0, 0);
-
-    match base64::decode_config(&bytes, base64::STANDARD) {
-        Ok(d) => {
-            // Don't consider an empty message to be valid.
-            if d.len() == 0 {
-                false
-            } else {
-                let mut br = buffered_reader::Memory::new(&d);
-                if let Ok(header) = Header::parse(&mut br) {
-                    header.ctb().tag().valid_start_of_message()
-                        && header.valid(false).is_ok()
-                } else {
-                    false
-                }
-            }
-        },
-        Err(_err) => false,
-    }
-}
-
-/// Checks whether the given byte is in the base64 character set.
-fn is_base64_char(b: &u8) -> bool {
-    b.is_ascii_alphanumeric() || *b == '+' as u8 || *b == '/' as u8
-}
-
-/// Returns the number of bytes of base64 data are needed to encode
-/// `s` bytes of raw data.
-fn base64_size(s: usize) -> usize {
-    (s + 3 - 1) / 3 * 4
-}
-
-#[test]
-fn base64_size_test() {
-    assert_eq!(base64_size(0), 0);
-    assert_eq!(base64_size(1), 4);
-    assert_eq!(base64_size(2), 4);
-    assert_eq!(base64_size(3), 4);
-    assert_eq!(base64_size(4), 8);
-    assert_eq!(base64_size(5), 8);
-    assert_eq!(base64_size(6), 8);
-    assert_eq!(base64_size(7), 12);
-}
-
 impl<'a> IoReader<'a> {
     fn read_armored_data(&mut self, buf: &mut [u8]) -> Result<usize> {
         let (consumed, decoded) = if self.buffer.len() > 0 {
diff --git a/openpgp/src/armor/base64_utils.rs b/openpgp/src/armor/base64_utils.rs
new file mode 100644
index 00000000..e67ea6ee
--- /dev/null
+++ b/openpgp/src/armor/base64_utils.rs
@@ -0,0 +1,205 @@
+use std::{
+    borrow::Cow,
+};
+
+use crate::{
+    packet::Header,
+};
+
+/// Remove whitespace, etc. from the base64 data.
+///
+/// This function returns the filtered base64 data (i.e., stripped of
+/// all skipable data like whitespace), and the amount of unfiltered
+/// data that corresponds to.  Thus, if we have the following 7 bytes:
+///
+/// ```text
+///     ab  cde
+///     0123456
+/// ```
+///
+/// This function returns ("abcd", 6), because the 'd' is the last
+/// character in the last complete base64 chunk, and it is at offset 5.
+///
+/// If 'd' is followed by whitespace, it is undefined whether that
+/// whitespace is included in the count.
+///
+/// This function only returns full chunks of base64 data.  As a
+/// consequence, if base64_data_max is less than 4, then this will not
+/// return any data.
+///
+/// This function will stop after it sees base64 padding, and if it
+/// sees invalid base64 data.
+pub fn base64_filter(mut bytes: Cow<[u8]>, base64_data_max: usize,
+                     mut prefix_remaining: usize, prefix_len: usize)
+    -> (Cow<[u8]>, usize, usize)
+{
+    let mut leading_whitespace = 0;
+
+    // Round down to the nearest chunk size.
+    let base64_data_max = base64_data_max / 4 * 4;
+
+    // Number of bytes of base64 data.  Since we update `bytes` in
+    // place, the base64 data is `&bytes[..base64_len]`.
+    let mut base64_len = 0;
+
+    // Offset of the next byte of unfiltered data to process.
+    let mut unfiltered_offset = 0;
+
+    // Offset of the last byte of the last ***complete*** base64 chunk
+    // in the unfiltered data.
+    let mut unfiltered_complete_len = 0;
+
+    // Number of bytes of padding that we've seen so far.
+    let mut padding = 0;
+
+    while unfiltered_offset < bytes.len()
+        && base64_len < base64_data_max
+        // A valid base64 chunk never starts with padding.
+        && ! (padding > 0 && base64_len % 4 == 0)
+    {
+        // If we have some prefix to skip, skip it.
+        if prefix_remaining > 0 {
+            prefix_remaining -= 1;
+            if unfiltered_offset == 0 {
+                match bytes {
+                    Cow::Borrowed(s) => {
+                        // We're at the beginning.  Avoid moving
+                        // data by cutting off the start of the
+                        // slice.
+                        bytes = Cow::Borrowed(&s[1..]);
+                        leading_whitespace += 1;
+                        continue;
+                    }
+                    Cow::Owned(_) => (),
+                }
+            }
+            unfiltered_offset += 1;
+            continue;
+        }
+        match bytes[unfiltered_offset] {
+            // White space.
+            c if c.is_ascii_whitespace() => {
+                if c == b'\n' {
+                    prefix_remaining = prefix_len;
+                }
+                if unfiltered_offset == 0 {
+                    match bytes {
+                        Cow::Borrowed(s) => {
+                            // We're at the beginning.  Avoid moving
+                            // data by cutting off the start of the
+                            // slice.
+                            bytes = Cow::Borrowed(&s[1..]);
+                            leading_whitespace += 1;
+                            continue;
+                        }
+                        Cow::Owned(_) => (),
+                    }
+                }
+            }
+
+            // Padding.
+            b'=' => {
+                if padding == 2 {
+                    // There can never be more than two bytes of
+                    // padding.
+                    break;
+                }
+                if base64_len % 4 == 0 {
+                    // Padding can never occur at the start of a
+                    // base64 chunk.
+                    break;
+                }
+
+                if unfiltered_offset != base64_len {
+                    bytes.to_mut()[base64_len] = b'=';
+                }
+                base64_len += 1;
+                if base64_len % 4 == 0 {
+                    unfiltered_complete_len = unfiltered_offset + 1;
+                }
+                padding += 1;
+            }
+
+            // The only thing that can occur after padding is
+            // whitespace or padding.  Those cases were covered above.
+            _ if padding > 0 => break,
+
+            // Base64 data!
+            b if is_base64_char(&b) => {
+                if unfiltered_offset != base64_len {
+                    bytes.to_mut()[base64_len] = b;
+                }
+                base64_len += 1;
+                if base64_len % 4 == 0 {
+                    unfiltered_complete_len = unfiltered_offset + 1;
+                }
+            }
+
+            // Not base64 data.
+            _ => break,
+        }
+
+        unfiltered_offset += 1;
+    }
+
+    let base64_len = base64_len - (base64_len % 4);
+    unfiltered_complete_len += leading_whitespace;
+    match bytes {
+        Cow::Borrowed(s) =>
+            (Cow::Borrowed(&s[..base64_len]), unfiltered_complete_len,
+             prefix_remaining),
+        Cow::Owned(mut v) => {
+            crate::vec_truncate(&mut v, base64_len);
+            (Cow::Owned(v), unfiltered_complete_len, prefix_remaining)
+        }
+    }
+}
+
+/// Checks whether the given bytes contain armored OpenPGP data.
+pub fn is_armored_pgp_blob(bytes: &[u8]) -> bool {
+    // Get up to 32 bytes of base64 data.  That's 24 bytes of data
+    // (ignoring padding), which is more than enough to get the first
+    // packet's header.
+    let (bytes, _, _) = base64_filter(Cow::Borrowed(bytes), 32, 0, 0);
+
+    match base64::decode_config(&bytes, base64::STANDARD) {
+        Ok(d) => {
+            // Don't consider an empty message to be valid.
+            if d.len() == 0 {
+                false
+            } else {
+                let mut br = buffered_reader::Memory::new(&d);
+                if let Ok(header) = Header::parse(&mut br) {
+                    header.ctb().tag().valid_start_of_message()
+                        && header.valid(false).is_ok()
+                } else {
+                    false
+                }
+            }
+        },
+        Err(_err) => false,
+    }
+}
+
+/// Checks whether the given byte is in the base64 character set.
+pub fn is_base64_char(b: &u8) -> bool {
+    b.is_ascii_alphanumeric() || *b == '+' as u8 || *b == '/' as u8
+}
+
+/// Returns the number of bytes of base64 data are needed to encode
+/// `s` bytes of raw data.
+pub fn base64_size(s: usize) -> usize {
+    (s + 3 - 1) / 3 * 4
+}
+
+#[test]
+fn base64_size_test() {
+    assert_eq!(base64_size(0), 0);
+    assert_eq!(base64_size(1), 4);
+    assert_eq!(base64_size(2), 4);
+    assert_eq!(base64_size(3), 4);
+    assert_eq!(base64_size(4), 8);
+    assert_eq!(base64_size(5), 8);
+    assert_eq!(base64_size(6), 8);
+    assert_eq!(base64_size(7), 12);
+}
author	Justus Winter <justus@sequoia-pgp.org>	2021-02-12 13:22:55 +0100
committer	Justus Winter <justus@sequoia-pgp.org>	2021-02-12 17:06:12 +0100
commit	a62b53c42d0b909521b77ca24e540d4b84bc87d6 (patch)
tree	98cacacf3dafe3b4cad539d71387709d89521ce8
parent	53cd6d1f3558eeea08cf0236be88704cd1b118cb (diff)