summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeal H. Walfield <neal@pep.foundation>2023-10-24 12:03:06 +0200
committerNeal H. Walfield <neal@pep.foundation>2023-10-24 12:03:48 +0200
commit30bbee2ea7831b4e0c090d6dfe9f007365713484 (patch)
tree36e7074dbdbf72b5f15db1a49844385f9353705b
parent160dc30e0c897bc822e19f0acc5d972941de98d4 (diff)
openpgp: Upgrade regex-syntax.
- Upgrade regex-syntax to 0.8. - Fixes #1056.
-rw-r--r--Cargo.lock8
-rw-r--r--openpgp/Cargo.toml2
-rw-r--r--openpgp/src/regex/grammar.lalrpop64
-rw-r--r--openpgp/src/regex/mod.rs74
4 files changed, 113 insertions, 35 deletions
diff --git a/Cargo.lock b/Cargo.lock
index 33c18f87..99ba4971 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2177,6 +2177,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a5996294f19bd3aae0453a862ad728f60e6600695733dd5df01da90c54363a3c"
[[package]]
+name = "regex-syntax"
+version = "0.8.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f"
+
+[[package]]
name = "reqwest"
version = "0.11.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -2514,7 +2520,7 @@ dependencies = [
"rand",
"rand_core",
"regex",
- "regex-syntax 0.6.29",
+ "regex-syntax 0.8.2",
"ripemd",
"rpassword",
"rsa",
diff --git a/openpgp/Cargo.toml b/openpgp/Cargo.toml
index bdb42247..7630b5dd 100644
--- a/openpgp/Cargo.toml
+++ b/openpgp/Cargo.toml
@@ -41,7 +41,7 @@ memsec = { version = ">=0.5, <0.7", default-features = false }
nettle = { version = "7.3", optional = true }
once_cell = "1"
regex = "1"
-regex-syntax = "0.6"
+regex-syntax = "0.8"
sha1collisiondetection = { version = "0.3.1", default-features = false, features = ["std"] }
thiserror = "1.0.2"
xxhash-rust = { version = "0.8", features = ["xxh3"] }
diff --git a/openpgp/src/regex/grammar.lalrpop b/openpgp/src/regex/grammar.lalrpop
index fccd84f8..369afe0f 100644
--- a/openpgp/src/regex/grammar.lalrpop
+++ b/openpgp/src/regex/grammar.lalrpop
@@ -25,7 +25,7 @@ pub(crate) Regex : Hir = {
// This is actually required for version 1.3.7 of the regex
// crate, which is the version that is in Debian Bullseye.
// See issue #694 for details.
- if r.iter().any(|b| b.kind().is_empty()) {
+ if r.iter().any(|b| *b.kind() == hir::HirKind::Empty) {
hir::Hir::empty()
} else {
Hir::alternation(r)
@@ -46,14 +46,11 @@ Branch : Hir = {
hir::Hir::empty()
},
<p:Piece+> => {
- if p.iter().all(|p| p.kind().is_empty()) {
+ if p.iter().all(|p| *p.kind() == hir::HirKind::Empty) {
// All pieces are empty. Just return empty.
hir::Hir::empty()
} else {
- hir::Hir::group(hir::Group {
- kind: hir::GroupKind::NonCapturing,
- hir: Box::new(hir::Hir::concat(p)),
- })
+ hir::Hir::concat(p)
}
},
}
@@ -61,41 +58,44 @@ Branch : Hir = {
Piece : Hir = {
<a:Atom> => a,
<a:Atom> STAR => {
- if a.kind().is_empty() {
+ if *a.kind() == hir::HirKind::Empty {
// Piece is empty. This is equivalent to empty so just
// return it.
a
} else {
hir::Hir::repetition(hir::Repetition {
- kind: hir::RepetitionKind::ZeroOrMore,
+ min: 0,
+ max: None,
greedy: true,
- hir: Box::new(a)
+ sub: Box::new(a)
})
}
},
<a:Atom> PLUS => {
- if a.kind().is_empty() {
+ if *a.kind() == hir::HirKind::Empty {
// Piece is empty. This is equivalent to empty so just
// return it.
a
} else {
hir::Hir::repetition(hir::Repetition {
- kind: hir::RepetitionKind::OneOrMore,
+ min: 1,
+ max: None,
greedy: true,
- hir: Box::new(a)
+ sub: Box::new(a)
})
}
},
<a:Atom> QUESTION => {
- if a.kind().is_empty() {
+ if *a.kind() == hir::HirKind::Empty {
// Piece is empty. This is equivalent to empty so just
// return it.
a
} else {
hir::Hir::repetition(hir::Repetition {
- kind: hir::RepetitionKind::ZeroOrOne,
+ min: 0,
+ max: Some(1),
greedy: true,
- hir: Box::new(a)
+ sub: Box::new(a)
})
}
},
@@ -103,38 +103,44 @@ Piece : Hir = {
Atom : Hir = {
LPAREN <r:Regex> RPAREN => {
- if r.kind().is_empty() {
- r
- } else {
- hir::Hir::group(hir::Group {
- kind: hir::GroupKind::NonCapturing,
- hir: Box::new(r),
- })
- }
+ r
},
Range,
DOT => {
- hir::Hir::any(false)
+ hir::Hir::dot(hir::Dot::AnyChar)
},
CARET => {
- hir::Hir::anchor(hir::Anchor::StartText)
+ hir::Hir::look(hir::Look::Start)
},
DOLLAR => {
- hir::Hir::anchor(hir::Anchor::EndText)
+ hir::Hir::look(hir::Look::End)
},
BACKSLASH <t:AnyChar> => {
- hir::Hir::literal(hir::Literal::Unicode(t.to_char()))
+ // "A buffer of length four is large enough to encode any
+ // char."
+ //
+ // https://doc.rust-lang.org/std/primitive.char.html#method.encode_utf8
+ let mut buffer = [0; 4];
+ // Convert the Unicode character t to a string.
+ let s = t.to_char().encode_utf8(&mut buffer);
+ hir::Hir::literal(s.as_bytes())
},
DASH => {
- hir::Hir::literal(hir::Literal::Unicode('-'))
+ hir::Hir::literal("-".as_bytes())
},
<t:OTHER> => {
- hir::Hir::literal(hir::Literal::Unicode(t.to_char()))
+ // "A buffer of length four is large enough to encode any
+ // char."
+ //
+ // https://doc.rust-lang.org/std/primitive.char.html#method.encode_utf8
+ let mut buffer = [0; 4];
+ let s = t.to_char().encode_utf8(&mut buffer);
+ hir::Hir::literal(s.as_bytes())
},
}
diff --git a/openpgp/src/regex/mod.rs b/openpgp/src/regex/mod.rs
index e453ebba..c42c1727 100644
--- a/openpgp/src/regex/mod.rs
+++ b/openpgp/src/regex/mod.rs
@@ -653,10 +653,6 @@ impl RegexSet {
match grammar::RegexParser::new().parse(re, lexer) {
Ok(hir) => {
had_good = true;
- let hir = hir::Hir::group(hir::Group {
- kind: hir::GroupKind::NonCapturing,
- hir: Box::new(hir),
- });
regexes.push(hir);
}
Err(err) => {
@@ -1471,6 +1467,65 @@ mod tests {
(true, "xabcdey"),
(false, "xa(b(c)d)ey"),
]);
+ a("x(a|b)y", &[
+ (false, "xy"),
+ (true, "xay"),
+ (true, "xby"),
+ (false, "xaay"),
+ (false, "xbby"),
+ (false, "xaby"),
+ (false, "xaaby"),
+ (false, "xabby"),
+ (false, "xaabby"),
+ (false, "xcy"),
+ ]);
+ a("x(a|bc)y", &[
+ (false, "xy"),
+ (true, "xay"),
+ (false, "xby"),
+ (true, "xbcy"),
+ (false, "xaay"),
+ (false, "xbby"),
+ (false, "xaby"),
+ (false, "xabcy"),
+ (false, "xabby"),
+ (false, "xaabby"),
+ (false, "xcy"),
+ (false, "xacy"),
+ ]);
+ a("x(a|b|c)y", &[
+ (false, "xy"),
+ (true, "xay"),
+ (true, "xby"),
+ (true, "xcy"),
+ (false, "xaay"),
+ (false, "xbby"),
+ (false, "xaby"),
+ (false, "xabcy"),
+ (false, "xabby"),
+ (false, "xaabby"),
+ (false, "xacy"),
+ ]);
+ a("x(a|b)(c|d)y", &[
+ (false, "xy"),
+ (false, "xay"),
+ (false, "xby"),
+ (false, "xcy"),
+ (false, "xdy"),
+ (false, "xaay"),
+ (false, "xbby"),
+ (false, "xccy"),
+ (false, "xddy"),
+ (false, "xaby"),
+ (false, "xcdy"),
+ (true, "xacy"),
+ (true, "xady"),
+ (true, "xbcy"),
+ (true, "xbdy"),
+ (false, "xabcy"),
+ (false, "xabby"),
+ (false, "xaabby"),
+ ]);
a("x(a+|b+)y", &[
(false, "xy"),
(true, "xay"),
@@ -2077,7 +2132,10 @@ mod tests {
// Try to make sure one re does not leak into another.
let re = RegexSet::new(&[ "cd$", "^ab" ])?;
assert!(re.is_match("abxx"));
+ assert!(! re.is_match("xabxx"));
assert!(re.is_match("xxcd"));
+ assert!(! re.is_match("xxcdx"));
+ assert!(re.is_match("abcdx"));
// Invalid regular expressions should be ignored.
let re = RegexSet::new(&[ "[ab", "cd]", "x" ])?;
@@ -2112,6 +2170,14 @@ mod tests {
assert!(re.is_match("cd]"));
assert!(re.is_match("x"));
+ // The empty branch of the alternation should match everything.
+ let re = RegexSet::new(&[ "ab|", "cd" ])?;
+ assert!(re.is_match("a"));
+ assert!(re.is_match("b"));
+ assert!(re.is_match("x"));
+ assert!(re.is_match("xyx"));
+ assert!(re.is_match(""));
+
Ok(())
}