diff options
author | Justus Winter <justus@sequoia-pgp.org> | 2023-09-27 11:04:48 +0200 |
---|---|---|
committer | Justus Winter <justus@sequoia-pgp.org> | 2023-09-27 11:20:00 +0200 |
commit | 181758cd99d245c598a7c7f91729c428fb1b9060 (patch) | |
tree | 6eddab336a5ef06e6ceb3679641e8a568eacb83f | |
parent | ea31e87029288bfe5897b1dd6a02cdc291946c9e (diff) |
openpgp: Impl Eq for Regex, RegexSet, add accessors for the raw REs.
- Fixes #973.
-rw-r--r-- | openpgp/NEWS | 4 | ||||
-rw-r--r-- | openpgp/src/regex/mod.rs | 62 |
2 files changed, 65 insertions, 1 deletions
diff --git a/openpgp/NEWS b/openpgp/NEWS index e41158a5..b7770495 100644 --- a/openpgp/NEWS +++ b/openpgp/NEWS @@ -44,6 +44,10 @@ - packet::Key::generate_elgamal - parse::PacketParser::start_hashing - parse::PacketParserBuilder::automatic_hashing + - impl Eq, PartialEq for regex::Regex + - regex::Regex::as_str + - impl Eq, PartialEq for regex::RegexSet + - regex::RegexSet::as_bytes - impl Default for types::AEADAlgorithm - serialize::stream::Encryptor2 - types::AEADAlgorithm::GCM diff --git a/openpgp/src/regex/mod.rs b/openpgp/src/regex/mod.rs index 6c102797..e453ebba 100644 --- a/openpgp/src/regex/mod.rs +++ b/openpgp/src/regex/mod.rs @@ -378,13 +378,32 @@ fn generate_class(caret: bool, chars: impl Iterator<Item=char>) -> Hir /// See the [module-level documentation] for more details. /// /// [module-level documentation]: self +/// +/// # A note on equality +/// +/// We define equality on `Regex` as the equality of the uncompiled +/// regular expression given to the constructor and whether +/// sanitizations are enabled. #[derive(Clone, Debug)] pub struct Regex { + /// The original regular expression. + /// + /// Equality is defined using this and `disable_sanitizations`. + re: String, regex: regex::Regex, disable_sanitizations: bool, } assert_send_and_sync!(Regex); +impl PartialEq for Regex { + fn eq(&self, other: &Self) -> bool { + self.re == other.re + && self.disable_sanitizations == other.disable_sanitizations + } +} + +impl Eq for Regex {} + impl Regex { /// Parses and compiles the regular expression. /// @@ -411,6 +430,7 @@ impl Regex { .build()?; Ok(Self { + re: re.into(), regex, disable_sanitizations: false, }) @@ -430,6 +450,11 @@ impl Regex { Self::new(std::str::from_utf8(re)?) } + /// Returns the string-representation of the regular expression. + pub fn as_str(&self) -> &str { + &self.re + } + /// Controls whether matched strings must pass a sanity check. /// /// If `false` (the default), i.e., sanity checks are enabled, and @@ -530,13 +555,32 @@ assert_send_and_sync!(RegexSet_); /// See the [module-level documentation] for more details. /// /// [module-level documentation]: self +/// +/// # A note on equality +/// +/// We define equality on `RegexSet` as the equality of the uncompiled +/// regular expressions given to the constructor and whether +/// sanitizations are enabled. #[derive(Clone)] pub struct RegexSet { + /// The original regular expressions. + /// + /// Equality is defined using this and `disable_sanitizations`. + re_bytes: Vec<Vec<u8>>, re_set: RegexSet_, disable_sanitizations: bool, } assert_send_and_sync!(RegexSet); +impl PartialEq for RegexSet { + fn eq(&self, other: &Self) -> bool { + self.re_bytes == other.re_bytes + && self.disable_sanitizations == other.disable_sanitizations + } +} + +impl Eq for RegexSet {} + impl fmt::Debug for RegexSet { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let mut d = f.debug_struct("RegexSet"); @@ -600,8 +644,11 @@ impl RegexSet { let mut had_good = false; let mut had_bad = false; + let mut re_bytes = Vec::new(); for re in res { let re = re.borrow(); + re_bytes.push(re.as_bytes().into()); + let lexer = Lexer::new(re); match grammar::RegexParser::new().parse(re, lexer) { Ok(hir) => { @@ -622,6 +669,7 @@ impl RegexSet { if had_bad && ! had_good { t!("All regular expressions were invalid."); Ok(RegexSet { + re_bytes, re_set: RegexSet_::Invalid, disable_sanitizations: false, }) @@ -629,14 +677,17 @@ impl RegexSet { // Match everything. t!("No regular expressions provided."); Ok(RegexSet { + re_bytes, re_set: RegexSet_::Everything, disable_sanitizations: false, }) } else { // Match any of the regular expressions. Ok(RegexSet { + re_bytes, re_set: RegexSet_::Regex( Regex { + re: String::new(), regex: regex::RegexBuilder::new( &Hir::alternation(regexes).to_string()) .build()?, @@ -714,12 +765,14 @@ impl RegexSet { { let mut have_valid_utf8 = false; let mut have_invalid_utf8 = false; + let mut re_bytes = Vec::new(); let re_set = Self::new( res .into_iter() .scan((&mut have_valid_utf8, &mut have_invalid_utf8), |(valid, invalid), re| { + re_bytes.push(re.borrow().to_vec()); if let Ok(re) = std::str::from_utf8(re.borrow()) { **valid = true; Some(Some(re)) @@ -734,16 +787,22 @@ impl RegexSet { // None of the strings were valid UTF-8. Reject // everything. Ok(RegexSet { + re_bytes, re_set: RegexSet_::Invalid, disable_sanitizations: false, }) } else { // We had nothing or at least one string was valid UTF-8. // RegexSet::new did the right thing. - re_set + re_set.map(|mut r| { r.re_bytes = re_bytes; r }) } } + /// Returns the bytes-representation of the regular expressions. + pub fn as_bytes(&self) -> &[Vec<u8>] { + &self.re_bytes + } + /// Creates a `RegexSet` from the regular expressions stored in a /// trust signature. /// @@ -873,6 +932,7 @@ impl RegexSet { pub fn everything() -> Result<Self> { Ok(Self { + re_bytes: vec![vec![]], re_set: RegexSet_::Everything, disable_sanitizations: false, }) |