openpgp: Fix the cleartext signature framework.

- Previously, we considered the line break immediately before the signature marker to be part of the signature. This is not correct. See Section 7.1 of RFC4880: The line ending (i.e., the <CR><LF>) before the '-----BEGIN PGP SIGNATURE-----' line that terminates the signed text is not considered part of the signed text. - This interpretation allows us to preserve the final newline in the signed text. See https://gitlab.com/sequoia-pgp/sequoia-sop/-/issues/16 where dkg requests: [...] if a trailing newline goes in, a trailing newline comes out. - The previous interpretation required very careful handling of the trailing line break, making sure it is not hashed. This was complicated by the fact that line breaks may use two characters, and the two characters may straddle reads/writes. So, this change of interpretation makes the code quite a bit simpler. - To clarify, signing the four octet string "test" yields a message looking like -----BEGIN PGP SIGNED MESSAGE----- Hash: SHA512 test -----BEGIN PGP SIGNATURE----- ... -----END PGP SIGNATURE----- Whereas signing the five octet string "test\n" now yields -----BEGIN PGP SIGNED MESSAGE----- Hash: SHA512 test -----BEGIN PGP SIGNATURE----- ... -----END PGP SIGNATURE----- - It is worth pointing out that GnuPG does something similar to what we did before: it makes sure any signed text ends in a line break (see `copy_clearsig_text`), but "swallows" this newline by using it as delimiter between text and signature (see `clearsign_file` where no explicit newline is emitted before the armor filter is pushed and the signatures are emitted). When verifying a message, GnuPG will emit the final line break, being very careful not to hash it (see `handle_plaintext`). The result is that any message signed by GnuPG seems to end in a line break when verified by GnuPG, but will never end in a line break when verified with Sequoia (or any other implementation that considers the signature-separating line break to be not part of the message, like OpenPGP.js). Signature validity is not affected.
author: Justus Winter <justus@sequoia-pgp.org> 2023-05-24 11:18:18 +0200
committer: Justus Winter <justus@sequoia-pgp.org> 2023-06-06 08:47:21 +0200
commit: 4f3d260a013835a8ab0b2f15f45e22ca4678ab24 (patch)
tree: c9c485e53b73841efe07a29f71bc0906396d1361
parent: 940a7267a61418661f61da2f0676069096ca2626 (diff)
6 files changed, 145 insertions, 134 deletions
diff --git a/openpgp/src/armor.rs b/openpgp/src/armor.rs
index d88ac3dc..e8fefb36 100644
--- a/openpgp/src/armor.rs
+++ b/openpgp/src/armor.rs
@@ -1461,6 +1461,15 @@ impl<'a> Reader<'a> {
                         self.source.consume(l);
                     }
 
+                    // Trim the final newline, it is not part of the
+                    // message, but separates the signature marker
+                    // from the text.
+                    let c = text.pop();
+                    assert!(c.is_none() || c == Some(b'\n'));
+                    if text.ends_with(b"\r") {
+                        text.pop();
+                    }
+
                     // Now, we have the whole text.
                     let mut literal = Literal::new(DataFormat::Text);
                     literal.set_body(text);
@@ -2350,16 +2359,25 @@ mod test {
         }
 
         f(crate::tests::message("a-problematic-poem.txt.cleartext.sig"),
-          crate::tests::message("a-problematic-poem.txt"), HashAlgorithm::SHA256)?;
+          {
+              // The test vector, created by GnuPG, does not preserve
+              // the final newline.
+              let mut reference =
+                  crate::tests::message("a-problematic-poem.txt").to_vec();
+              assert_eq!(reference.pop(), Some(b'\n'));
+              reference
+          }, HashAlgorithm::SHA256)?;
         f(crate::tests::message("a-cypherpunks-manifesto.txt.cleartext.sig"),
           {
+              // The test vector, created by GnuPG, does not preserve
+              // the final newline.
+              //
               // The transformation process trims trailing whitespace,
               // and the manifesto has a trailing whitespace right at
               // the end.
               let mut manifesto = crate::tests::manifesto().to_vec();
-              let ws_at = manifesto.len() - 2;
-              let ws = manifesto.remove(ws_at);
-              assert_eq!(ws, b' ');
+              assert_eq!(manifesto.pop(), Some(b'\n'));
+              assert_eq!(manifesto.pop(), Some(b' '));
               manifesto
           }, HashAlgorithm::SHA256)?;
         Ok(())
diff --git a/openpgp/src/parse/hashed_reader.rs b/openpgp/src/parse/hashed_reader.rs
index 12be20ae..f70775cc 100644
--- a/openpgp/src/parse/hashed_reader.rs
+++ b/openpgp/src/parse/hashed_reader.rs
@@ -300,7 +300,7 @@ impl Cookie {
         }
     }
 
-    fn hash_update_csf(&mut self, mut data: &[u8]) {
+    fn hash_update_csf(&mut self, data: &[u8]) {
         let level = self.level.unwrap_or(0);
         let hashes_for = self.hashes_for;
         let ngroups = self.sig_groups.len();
@@ -316,19 +316,6 @@ impl Cookie {
         tracer!(TRACE, "Cookie::hash_update_csf", level);
         t!("Cleartext Signature Framework message");
 
-        // If we stashed half of a \r\n newline away, see if we get
-        // the second half now.  If we do, and data is empty then, we
-        // return without hashing it.  This is important so that we
-        // can avoid hashing the final newline, even if we happen to
-        // read it in two invocations of this function.
-        if self.hash_stash.as_ref().map(|buf| buf.as_slice() == &b"\r"[..])
-            .unwrap_or(false)
-            && data.get(0).cloned() == Some(b'\n')
-        {
-            self.hash_stash.as_mut().expect("checked above").push(b'\n');
-            data = &data[1..];
-        }
-
         if data.is_empty() {
             return;
         }
@@ -339,50 +326,14 @@ impl Cookie {
             return;
         }
 
-        // Hash stashed data first.
-        if let Some(stashed_data) = self.hash_stash.take() {
-            for h in self.sig_groups[0].hashes.iter_mut() {
-                t!("{:?}: {:?} hashing {} stashed bytes.",
-                   hashes_for, h.map(|ctx| ctx.algo()),
-                   stashed_data.len());
-                assert!(matches!(h, HashingMode::Text(_)
-                                 | HashingMode::TextLastWasCr(_)),
-                        "CSF transformation uses text signatures");
-                h.update(&stashed_data[..]);
-            }
-        }
-
-        // We hash everything but the last newline.
-
-        // There is exactly one group.
-        assert_eq!(ngroups, 1);
-
-        // Compute the length of data that should be hashed.
-        // If it ends in a newline, we delay hashing it.
-        let l = data.len() - if data.ends_with(b"\r\n") {
-            2
-        } else if data.ends_with(b"\n") || data.ends_with(b"\r") {
-            1
-        } else {
-            0
-        };
-
-        // Hash everything but the last newline now.
+        // Hash the data.
         for h in self.sig_groups[0].hashes.iter_mut() {
             t!("{:?}: {:?} hashing {} bytes.",
-               hashes_for, h.map(|ctx| ctx.algo()), l);
+               hashes_for, h.map(|ctx| ctx.algo()), data.len());
             assert!(matches!(h, HashingMode::Text(_)
                              | HashingMode::TextLastWasCr(_)),
                     "CSF transformation uses text signatures");
-            h.update(&data[..l]);
-        }
-
-        // The newline we stash away.  If more text is written
-        // later, we will hash it then.  Otherwise, it is
-        // implicitly omitted when the filter is dropped.
-        if ! data[l..].is_empty() {
-            t!("Stashing newline: {:?}", &data[l..]);
-            self.hash_stash = Some(data[l..].to_vec());
+            h.update(data);
         }
     }
 }
diff --git a/openpgp/src/parse/stream.rs b/openpgp/src/parse/stream.rs
index 74036553..b59005e7 100644
--- a/openpgp/src/parse/stream.rs
+++ b/openpgp/src/parse/stream.rs
@@ -3204,13 +3204,15 @@ pub mod test {
             (crate::tests::message("a-cypherpunks-manifesto.txt.cleartext.sig")
              .to_vec(),
              {
+                 // The test vector, created by GnuPG, does not preserve
+                 // the final newline.
+                 //
                  // The transformation process trims trailing whitespace,
                  // and the manifesto has a trailing whitespace right at
                  // the end.
                  let mut manifesto = crate::tests::manifesto().to_vec();
-                 let ws_at = manifesto.len() - 2;
-                 let ws = manifesto.remove(ws_at);
-                 assert_eq!(ws, b' ');
+                 assert_eq!(manifesto.pop(), Some(b'\n'));
+                 assert_eq!(manifesto.pop(), Some(b' '));
                  manifesto
              },
              false,
@@ -3218,7 +3220,14 @@ pub mod test {
              VHelper::new(1, 0, 0, 0, certs.clone())),
             (crate::tests::message("a-problematic-poem.txt.cleartext.sig")
              .to_vec(),
-             crate::tests::message("a-problematic-poem.txt").to_vec(),
+             {
+                 // The test vector, created by GnuPG, does not preserve
+                 // the final newline.
+                 let mut reference =
+                     crate::tests::message("a-problematic-poem.txt").to_vec();
+                 assert_eq!(reference.pop(), Some(b'\n'));
+                 reference
+             },
              false,
              None,
              VHelper::new(1, 0, 0, 0, certs.clone())),
diff --git a/openpgp/src/serialize/stream.rs b/openpgp/src/serialize/stream.rs
index df74c73b..24e4b165 100644
--- a/openpgp/src/serialize/stream.rs
+++ b/openpgp/src/serialize/stream.rs
@@ -640,12 +640,6 @@ pub struct Signer<'a> {
     hash: HashingMode<Box<dyn crypto::hash::Digest>>,
     cookie: Cookie,
     position: u64,
-
-    /// When creating a message using the cleartext signature
-    /// framework, the final newline is not part of the signature,
-    /// hence, we delay hashing up to two bytes so that we can omit
-    /// them when the message is finalized.
-    hash_stash: Vec<u8>,
 }
 assert_send_and_sync!(Signer<'_>);
 
@@ -821,7 +815,6 @@ impl<'a> Signer<'a> {
                 private: Private::Signer,
             },
             position: 0,
-            hash_stash: Vec::with_capacity(0),
         }
     }
 
@@ -913,10 +906,6 @@ impl<'a> Signer<'a> {
     ///
     /// Note:
     ///
-    /// - If your message does not end in a newline, creating a signed
-    ///   message using the Cleartext Signature Framework will add
-    ///   one.
-    ///
     /// - The cleartext signature framework does not hash trailing
     ///   whitespace (in this case, space and tab, see [Section 7.1 of
     ///   RFC 4880] for more information).  We align what we emit and
@@ -996,7 +985,7 @@ impl<'a> Signer<'a> {
     ///
     /// let mut content = Vec::new();
     /// verifier.read_to_end(&mut content)?;
-    /// assert_eq!(content, b"Make it so, number one!\n");
+    /// assert_eq!(content, b"Make it so, number one!");
     /// # Ok(()) }
     /// ```
     //
@@ -1328,9 +1317,13 @@ impl<'a> Signer<'a> {
     fn emit_signatures(&mut self) -> Result<()> {
         if self.mode == SignatureMode::Cleartext {
             // Pop off the DashEscapeFilter.
-            let inner = self.inner.take().expect("It's the DashEscapeFilter")
+            let mut inner =
+                self.inner.take().expect("It's the DashEscapeFilter")
                 .into_inner()?.expect("It's the DashEscapeFilter");
 
+            // Add the separating newline that is not part of the message.
+            writeln!(inner)?;
+
             // And install an armorer.
             self.inner =
                 Some(writer::BoxStack::from(
@@ -1410,42 +1403,7 @@ impl<'a> Write for Signer<'a> {
 
         if let Ok(amount) = written {
             let data = &buf[..amount];
-
-            if self.mode == Cleartext {
-                // Delay hashing the last two bytes, because we the
-                // final newline is not part of the signature (see
-                // Section 7.1 of RFC4880).
-
-                // First, hash the stashed bytes.  We know that it is
-                // a newline, but we know that more text follows (buf
-                // is not empty), so it cannot be the last.
-                assert!(! buf.is_empty());
-                self.hash.update(&self.hash_stash[..]);
-                crate::vec_truncate(&mut self.hash_stash, 0);
-
-                // Compute the length of data that should be hashed.
-                // If it ends in a newline, we delay hashing it.
-                let l = data.len() - if data.ends_with(b"\r\n") {
-                    2
-                } else if data.ends_with(b"\n") {
-                    1
-                } else {
-                    0
-                };
-
-                // XXX: This logic breaks if we get a b"\r\n" in two
-                // writes.  However, TrailingWSFilter will only emit
-                // b"\r\n" in one write.
-
-                // Hash everything but the last newline now.
-                self.hash.update(&data[..l]);
-                // The newline we stash away.  If more text is written
-                // later, we will hash it then.  Otherwise, it is
-                // implicitly omitted when the signer is finalized.
-                self.hash_stash.extend_from_slice(&data[l..]);
-            } else {
-                self.hash.update(data);
-            }
+            self.hash.update(data);
             self.position += amount as u64;
         }
 
diff --git a/openpgp/src/serialize/stream/dash_escape.rs b/openpgp/src/serialize/stream/dash_escape.rs
index 35d6acf0..630d92a6 100644
--- a/openpgp/src/serialize/stream/dash_escape.rs
+++ b/openpgp/src/serialize/stream/dash_escape.rs
@@ -52,18 +52,13 @@ impl<'a, C: 'a> DashEscapeFilter<'a, C> {
     ///
     /// Any extra data is buffered.
     ///
-    /// If `done` is set, then flushes any data, and writes a final
-    /// newline.
+    /// If `done` is set, then flushes any data.
     fn write_out(&mut self, other: &[u8], done: bool)
                  -> io::Result<()> {
         // XXX: Currently, we don't mind copying the data.  This
         // could be optimized.
         self.buffer.extend_from_slice(other);
 
-        if done && ! self.buffer.is_empty() && ! self.buffer.ends_with(b"\n") {
-            self.buffer.push(b'\n');
-        }
-
         // Write out all whole lines (i.e. those terminated by a
         // newline).  This is a bit awkward, because we only know that
         // a line was whole when we are looking at the next line.
@@ -80,6 +75,16 @@ impl<'a, C: 'a> DashEscapeFilter<'a, C> {
             last_line = Some(line);
         }
 
+        if done {
+            if let Some(l) = last_line.take() {
+                if l.starts_with(b"-") || l.starts_with(b"From ") {
+                    // Dash-escape!
+                    self.inner.write_all(b"- ")?;
+                }
+                self.inner.write_all(l)?;
+            }
+        }
+
         let new_buffer = last_line.map(|l| l.to_vec())
             .unwrap_or_else(Vec::new);
         crate::vec_truncate(&mut self.buffer, 0);
@@ -175,7 +180,7 @@ mod test {
             // No final newline.
             m.finalize()?;
         }
-        assert_eq!(&buf[..], &b"01234567\n89abcdef\n"[..]);
+        assert_eq!(&buf[..], &b"01234567\n89abcdef"[..]);
 
         Ok(())
     }
@@ -207,7 +212,7 @@ mod test {
             // No final newline.
             m.finalize()?;
         }
-        assert_eq!(&buf[..], &b"- -0123-4567\n- -89ab-cdef-\n"[..]);
+        assert_eq!(&buf[..], &b"- -0123-4567\n- -89ab-cdef-"[..]);
 
         Ok(())
     }
@@ -239,7 +244,39 @@ mod test {
             // No final newline.
             m.finalize()?;
         }
-        assert_eq!(&buf[..], &b"- From 0123From 4567\n- From 89abFrom cdefFrom \n"[..]);
+        assert_eq!(&buf[..], &b"- From 0123From 4567\n- From 89abFrom cdefFrom "[..]);
+
+        Ok(())
+    }
+
+    #[test]
+    fn one_line() -> Result<()> {
+        let mut buf = Vec::new();
+        {
+            let m = Message::new(&mut buf);
+            let mut m = DashEscapeFilter::new(m, Default::default());
+            m.write_all(b"From 0123")?;
+            m.write_all(b"From 4567")?;
+            m.write_all(b"From 89ab")?;
+            m.write_all(b"From cdef")?;
+            m.write_all(b"From \n")?;
+            m.finalize()?;
+        }
+        assert_eq!(&buf[..], &b"- From 0123From 4567From 89abFrom cdefFrom \n"[..]);
+
+        let mut buf = Vec::new();
+        {
+            let m = Message::new(&mut buf);
+            let mut m = DashEscapeFilter::new(m, Default::default());
+            m.write_all(b"From 0123")?;
+            m.write_all(b"From 4567")?;
+            m.write_all(b"From 89ab")?;
+            m.write_all(b"From cdef")?;
+            m.write_all(b"From ")?;
+            // No final newline.
+            m.finalize()?;
+        }
+        assert_eq!(&buf[..], &b"- From 0123From 4567From 89abFrom cdefFrom "[..]);
 
         Ok(())
     }
diff --git a/openpgp/src/serialize/stream/trim_whitespace.rs b/openpgp/src/serialize/stream/trim_whitespace.rs
index 86e9791a..14ff76f0 100644
--- a/openpgp/src/serialize/stream/trim_whitespace.rs
+++ b/openpgp/src/serialize/stream/trim_whitespace.rs
@@ -52,43 +52,50 @@ impl<'a, C: 'a> TrailingWSFilter<'a, C> {
     ///
     /// Any extra data is buffered.
     ///
-    /// If `done` is set, then flushes any data, and writes a final
-    /// newline.
+    /// If `done` is set, then flushes any data.
     fn write_out(&mut self, other: &[u8], done: bool)
                  -> io::Result<()> {
         // XXX: Currently, we don't mind copying the data.  This
         // could be optimized.
         self.buffer.extend_from_slice(other);
 
-        if done && ! self.buffer.is_empty() && ! self.buffer.ends_with(b"\n") {
-            self.buffer.push(b'\n');
-        }
-
         // Write out all whole lines (i.e. those terminated by a
         // newline).  This is a bit awkward, because we only know that
         // a line was whole when we are looking at the next line.
         let mut last_line: Option<&[u8]> = None;
         for line in self.buffer.split(|b| *b == b'\n') {
             if let Some(mut l) = last_line.take() {
+                let crlf_line_end = l.ends_with(b"\r");
+                if crlf_line_end {
+                    l = &l[..l.len() - 1];
+                }
+
                 // Trim trailing whitespace according to Section 7.1
                 // of RFC4880, i.e. "spaces (0x20) and tabs (0x09)".
                 while Some(&b' ') == l.last() || Some(&b'\t') == l.last() {
                     l = &l[..l.len() - 1];
                 }
 
-                // To simplify the logic in Signer::write, we emit the
-                // newline in one write.
-                if l.ends_with(b"\r") {
-                    self.inner.write_all(&l[..l.len() - 1])?;
+                self.inner.write_all(l)?;
+                if crlf_line_end {
                     self.inner.write_all(b"\r\n")?;
                 } else {
-                    self.inner.write_all(l)?;
                     self.inner.write_all(b"\n")?;
                 }
             }
             last_line = Some(line);
         }
 
+        if done {
+            if let Some(mut l) = last_line {
+                // Flush the last line.
+                while Some(&b' ') == l.last() || Some(&b'\t') == l.last() {
+                    l = &l[..l.len() - 1];
+                }
+                self.inner.write_all(l)?;
+            }
+        }
+
         let new_buffer = last_line.map(|l| l.to_vec())
             .unwrap_or_else(Vec::new);
         crate::vec_truncate(&mut self.buffer, 0);
@@ -184,7 +191,7 @@ mod test {
             // No final newline.
             m.finalize()?;
         }
-        assert_eq!(&buf[..], &b"01234567\n89abcdef\n"[..]);
+        assert_eq!(&buf[..], &b"01234567\n89abcdef"[..]);
 
         Ok(())
     }
@@ -215,7 +222,7 @@ mod test {
             // No final newline.
             m.finalize()?;
         }
-        assert_eq!(&buf[..], &b"0123 4567\n89ab cdef\n"[..]);
+        assert_eq!(&buf[..], &b"0123 4567\n89ab cdef"[..]);
 
         Ok(())
     }
@@ -246,7 +253,7 @@ mod test {
             // No final newline.
             m.finalize()?;
         }
-        assert_eq!(&buf[..], &b"0123\t4567\n89ab\tcdef\n"[..]);
+        assert_eq!(&buf[..], &b"0123\t4567\n89ab\tcdef"[..]);
 
         Ok(())
     }
@@ -277,7 +284,38 @@ mod test {
             // No final newline.
             m.finalize()?;
         }
-        assert_eq!(&buf[..], &b"0123\x0c4567\x0c\n89ab\x0ccdef\x0c\n"[..]);
+        assert_eq!(&buf[..], &b"0123\x0c4567\x0c\n89ab\x0ccdef\x0c"[..]);
+
+        Ok(())
+    }
+
+    #[test]
+    fn one_line() -> Result<()> {
+        let mut buf = Vec::new();
+        {
+            let m = Message::new(&mut buf);
+            let mut m = TrailingWSFilter::new(m, Default::default());
+            m.write_all(b"0123 ")?;
+            m.write_all(b"4567 ")?;
+            m.write_all(b"89ab ")?;
+            m.write_all(b"cdef ")?;
+            m.write_all(b"\n")?;
+            m.finalize()?;
+        }
+        assert_eq!(&buf[..], &b"0123 4567 89ab cdef\n"[..]);
+
+        let mut buf = Vec::new();
+        {
+            let m = Message::new(&mut buf);
+            let mut m = TrailingWSFilter::new(m, Default::default());
+            m.write_all(b"0123 ")?;
+            m.write_all(b"4567 ")?;
+            m.write_all(b"89ab ")?;
+            m.write_all(b"cdef ")?;
+            // No final newline.
+            m.finalize()?;
+        }
+        assert_eq!(&buf[..], &b"0123 4567 89ab cdef"[..]);
 
         Ok(())
     }
author	Justus Winter <justus@sequoia-pgp.org>	2023-05-24 11:18:18 +0200
committer	Justus Winter <justus@sequoia-pgp.org>	2023-06-06 08:47:21 +0200
commit	4f3d260a013835a8ab0b2f15f45e22ca4678ab24 (patch)
tree	c9c485e53b73841efe07a29f71bc0906396d1361
parent	940a7267a61418661f61da2f0676069096ca2626 (diff)