From 30bbee2ea7831b4e0c090d6dfe9f007365713484 Mon Sep 17 00:00:00 2001 From: "Neal H. Walfield" Date: Tue, 24 Oct 2023 12:03:06 +0200 Subject: openpgp: Upgrade regex-syntax. - Upgrade regex-syntax to 0.8. - Fixes #1056. --- Cargo.lock | 8 ++++- openpgp/Cargo.toml | 2 +- openpgp/src/regex/grammar.lalrpop | 64 ++++++++++++++++++--------------- openpgp/src/regex/mod.rs | 74 ++++++++++++++++++++++++++++++++++++--- 4 files changed, 113 insertions(+), 35 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 33c18f87..99ba4971 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2176,6 +2176,12 @@ version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a5996294f19bd3aae0453a862ad728f60e6600695733dd5df01da90c54363a3c" +[[package]] +name = "regex-syntax" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" + [[package]] name = "reqwest" version = "0.11.20" @@ -2514,7 +2520,7 @@ dependencies = [ "rand", "rand_core", "regex", - "regex-syntax 0.6.29", + "regex-syntax 0.8.2", "ripemd", "rpassword", "rsa", diff --git a/openpgp/Cargo.toml b/openpgp/Cargo.toml index bdb42247..7630b5dd 100644 --- a/openpgp/Cargo.toml +++ b/openpgp/Cargo.toml @@ -41,7 +41,7 @@ memsec = { version = ">=0.5, <0.7", default-features = false } nettle = { version = "7.3", optional = true } once_cell = "1" regex = "1" -regex-syntax = "0.6" +regex-syntax = "0.8" sha1collisiondetection = { version = "0.3.1", default-features = false, features = ["std"] } thiserror = "1.0.2" xxhash-rust = { version = "0.8", features = ["xxh3"] } diff --git a/openpgp/src/regex/grammar.lalrpop b/openpgp/src/regex/grammar.lalrpop index fccd84f8..369afe0f 100644 --- a/openpgp/src/regex/grammar.lalrpop +++ b/openpgp/src/regex/grammar.lalrpop @@ -25,7 +25,7 @@ pub(crate) Regex : Hir = { // This is actually required for version 1.3.7 of the regex // crate, which is the version that is in Debian Bullseye. // See issue #694 for details. - if r.iter().any(|b| b.kind().is_empty()) { + if r.iter().any(|b| *b.kind() == hir::HirKind::Empty) { hir::Hir::empty() } else { Hir::alternation(r) @@ -46,14 +46,11 @@ Branch : Hir = { hir::Hir::empty() }, => { - if p.iter().all(|p| p.kind().is_empty()) { + if p.iter().all(|p| *p.kind() == hir::HirKind::Empty) { // All pieces are empty. Just return empty. hir::Hir::empty() } else { - hir::Hir::group(hir::Group { - kind: hir::GroupKind::NonCapturing, - hir: Box::new(hir::Hir::concat(p)), - }) + hir::Hir::concat(p) } }, } @@ -61,41 +58,44 @@ Branch : Hir = { Piece : Hir = { => a, STAR => { - if a.kind().is_empty() { + if *a.kind() == hir::HirKind::Empty { // Piece is empty. This is equivalent to empty so just // return it. a } else { hir::Hir::repetition(hir::Repetition { - kind: hir::RepetitionKind::ZeroOrMore, + min: 0, + max: None, greedy: true, - hir: Box::new(a) + sub: Box::new(a) }) } }, PLUS => { - if a.kind().is_empty() { + if *a.kind() == hir::HirKind::Empty { // Piece is empty. This is equivalent to empty so just // return it. a } else { hir::Hir::repetition(hir::Repetition { - kind: hir::RepetitionKind::OneOrMore, + min: 1, + max: None, greedy: true, - hir: Box::new(a) + sub: Box::new(a) }) } }, QUESTION => { - if a.kind().is_empty() { + if *a.kind() == hir::HirKind::Empty { // Piece is empty. This is equivalent to empty so just // return it. a } else { hir::Hir::repetition(hir::Repetition { - kind: hir::RepetitionKind::ZeroOrOne, + min: 0, + max: Some(1), greedy: true, - hir: Box::new(a) + sub: Box::new(a) }) } }, @@ -103,38 +103,44 @@ Piece : Hir = { Atom : Hir = { LPAREN RPAREN => { - if r.kind().is_empty() { - r - } else { - hir::Hir::group(hir::Group { - kind: hir::GroupKind::NonCapturing, - hir: Box::new(r), - }) - } + r }, Range, DOT => { - hir::Hir::any(false) + hir::Hir::dot(hir::Dot::AnyChar) }, CARET => { - hir::Hir::anchor(hir::Anchor::StartText) + hir::Hir::look(hir::Look::Start) }, DOLLAR => { - hir::Hir::anchor(hir::Anchor::EndText) + hir::Hir::look(hir::Look::End) }, BACKSLASH => { - hir::Hir::literal(hir::Literal::Unicode(t.to_char())) + // "A buffer of length four is large enough to encode any + // char." + // + // https://doc.rust-lang.org/std/primitive.char.html#method.encode_utf8 + let mut buffer = [0; 4]; + // Convert the Unicode character t to a string. + let s = t.to_char().encode_utf8(&mut buffer); + hir::Hir::literal(s.as_bytes()) }, DASH => { - hir::Hir::literal(hir::Literal::Unicode('-')) + hir::Hir::literal("-".as_bytes()) }, => { - hir::Hir::literal(hir::Literal::Unicode(t.to_char())) + // "A buffer of length four is large enough to encode any + // char." + // + // https://doc.rust-lang.org/std/primitive.char.html#method.encode_utf8 + let mut buffer = [0; 4]; + let s = t.to_char().encode_utf8(&mut buffer); + hir::Hir::literal(s.as_bytes()) }, } diff --git a/openpgp/src/regex/mod.rs b/openpgp/src/regex/mod.rs index e453ebba..c42c1727 100644 --- a/openpgp/src/regex/mod.rs +++ b/openpgp/src/regex/mod.rs @@ -653,10 +653,6 @@ impl RegexSet { match grammar::RegexParser::new().parse(re, lexer) { Ok(hir) => { had_good = true; - let hir = hir::Hir::group(hir::Group { - kind: hir::GroupKind::NonCapturing, - hir: Box::new(hir), - }); regexes.push(hir); } Err(err) => { @@ -1471,6 +1467,65 @@ mod tests { (true, "xabcdey"), (false, "xa(b(c)d)ey"), ]); + a("x(a|b)y", &[ + (false, "xy"), + (true, "xay"), + (true, "xby"), + (false, "xaay"), + (false, "xbby"), + (false, "xaby"), + (false, "xaaby"), + (false, "xabby"), + (false, "xaabby"), + (false, "xcy"), + ]); + a("x(a|bc)y", &[ + (false, "xy"), + (true, "xay"), + (false, "xby"), + (true, "xbcy"), + (false, "xaay"), + (false, "xbby"), + (false, "xaby"), + (false, "xabcy"), + (false, "xabby"), + (false, "xaabby"), + (false, "xcy"), + (false, "xacy"), + ]); + a("x(a|b|c)y", &[ + (false, "xy"), + (true, "xay"), + (true, "xby"), + (true, "xcy"), + (false, "xaay"), + (false, "xbby"), + (false, "xaby"), + (false, "xabcy"), + (false, "xabby"), + (false, "xaabby"), + (false, "xacy"), + ]); + a("x(a|b)(c|d)y", &[ + (false, "xy"), + (false, "xay"), + (false, "xby"), + (false, "xcy"), + (false, "xdy"), + (false, "xaay"), + (false, "xbby"), + (false, "xccy"), + (false, "xddy"), + (false, "xaby"), + (false, "xcdy"), + (true, "xacy"), + (true, "xady"), + (true, "xbcy"), + (true, "xbdy"), + (false, "xabcy"), + (false, "xabby"), + (false, "xaabby"), + ]); a("x(a+|b+)y", &[ (false, "xy"), (true, "xay"), @@ -2077,7 +2132,10 @@ mod tests { // Try to make sure one re does not leak into another. let re = RegexSet::new(&[ "cd$", "^ab" ])?; assert!(re.is_match("abxx")); + assert!(! re.is_match("xabxx")); assert!(re.is_match("xxcd")); + assert!(! re.is_match("xxcdx")); + assert!(re.is_match("abcdx")); // Invalid regular expressions should be ignored. let re = RegexSet::new(&[ "[ab", "cd]", "x" ])?; @@ -2112,6 +2170,14 @@ mod tests { assert!(re.is_match("cd]")); assert!(re.is_match("x")); + // The empty branch of the alternation should match everything. + let re = RegexSet::new(&[ "ab|", "cd" ])?; + assert!(re.is_match("a")); + assert!(re.is_match("b")); + assert!(re.is_match("x")); + assert!(re.is_match("xyx")); + assert!(re.is_match("")); + Ok(()) } -- cgit v1.2.3