summaryrefslogtreecommitdiffstats
path: root/src/command
diff options
context:
space:
mode:
authorDenys Séguret <cano.petrole@gmail.com>2022-10-04 20:04:59 +0200
committerGitHub <noreply@github.com>2022-10-04 20:04:59 +0200
commit3e2890739526baadb11139c4297a1bc02951fe06 (patch)
treef6588eeb965bda2401e2612ad7027506c57ed6a3 /src/command
parent818d069c51efe3a1736743a06c714ae7c351818a (diff)
New escaping rules (#609)
As was noticed by @FedericoStra, escaping for regular expressions was painful. For example you had to do `/\\d` to search for a digit. This PR brings more complex escaping rules so that less escaping is necessary: - when after '/': only ' ', ':', '/' and '\' need escaping - otherwise, '&,' '|', '(', ')', '\' need escaping Fix #592
Diffstat (limited to 'src/command')
-rw-r--r--src/command/parts.rs303
1 files changed, 258 insertions, 45 deletions
diff --git a/src/command/parts.rs b/src/command/parts.rs
index b355de5..6dc2559 100644
--- a/src/command/parts.rs
+++ b/src/command/parts.rs
@@ -8,7 +8,7 @@ use {
};
/// An intermediate parsed representation of the raw string
-#[derive(Debug, Clone)]
+#[derive(Debug, Clone, PartialEq)]
pub struct CommandParts {
pub raw_pattern: String, // may be empty
pub pattern: BeTree<PatternOperator, PatternParts>,
@@ -31,60 +31,77 @@ impl CommandParts {
.as_ref()
.map_or(false, |vi| !vi.is_empty())
}
- pub fn from(mut raw: String) -> Self {
+ pub fn from<S: Into<String>>(raw: S) -> Self {
+ let mut raw = raw.into();
let mut invocation_start_pos: Option<usize> = None;
- let mut escaping = false;
let mut pt = BeTree::new();
- for (pos, c) in raw.char_indices() {
- if c == '\\' {
- if escaping {
- escaping = false;
- } else {
- escaping = true;
- continue;
+ let mut chars = raw.char_indices().peekable();
+ let mut escape_cur_char = false;
+ let mut escape_next_char = false;
+ // we loop on chars and build the pattern tree until we reach an unescaped ' ' or ':'
+ while let Some((pos, cur_char)) = chars.next() {
+ let between_slashes = pt.current_atom()
+ .map_or(
+ false,
+ |pp: &PatternParts| pp.is_between_slashes(),
+ );
+ match cur_char {
+ c if escape_cur_char => {
+ // Escaping is used to prevent characters from being consumed at the
+ // composite pattern level (or, and, parens) or as the separator between
+ // the pattern and the verb. An escaped char is usable in a pattern atom.
+ pt.mutate_or_create_atom(PatternParts::default).push(c);
}
- }
- if !escaping {
- if c == ' ' || c == ':' {
+ '\\' => {
+ // Pattern escaping rules:
+ // - when after '/': only ' ', ':', '/' and '\' need escaping
+ // - otherwise, '&,' '|', '(', ')' need escaping too ('(' is only here for
+ // symmetry)
+ let between_slashes = match pt.current_atom() {
+ Some(pattern_parts) => pattern_parts.is_between_slashes(),
+ None => false,
+ };
+ escape_next_char = match chars.peek() {
+ None => false, // End of the string, we can't be escaping
+ Some((_, next_char)) => match (next_char, between_slashes) {
+ (' ' | ':' | '/' | '\\', _) => true,
+ ('&' | '|' | '!' | '(' | ')', false) => true,
+ _ => false,
+ }
+ };
+ if !escape_next_char {
+ // if the '\' isn't used for escaping, it's used as its char value
+ pt.mutate_or_create_atom(PatternParts::default).push('\\');
+ }
+ }
+ ' ' | ':' => { // ending the pattern part
invocation_start_pos = Some(pos);
break;
}
- if c == '/' {
+ '/' => { // starting an atom part
pt.mutate_or_create_atom(PatternParts::default).add_part();
- continue;
}
- let allow_inter_pattern_token = match pt.current_atom() {
- Some(pattern_parts) => pattern_parts.allow_inter_pattern_token(),
- None => true,
- };
- if allow_inter_pattern_token {
- match c {
- '|' if pt.accept_binary_operator() => {
- pt.push_operator(PatternOperator::Or);
- continue;
- }
- '&' if pt.accept_binary_operator() => {
- pt.push_operator(PatternOperator::And);
- continue;
- }
- '!' if pt.accept_unary_operator() => {
- pt.push_operator(PatternOperator::Not);
- continue;
- }
- '(' if pt.accept_opening_par() => {
- pt.open_par();
- continue;
- }
- ')' if pt.accept_closing_par() => {
- pt.close_par();
- continue;
- }
- _ => {}
- }
+ '|' if !between_slashes && pt.accept_binary_operator() => {
+ pt.push_operator(PatternOperator::Or);
+ }
+ '&' if !between_slashes && pt.accept_binary_operator() => {
+ pt.push_operator(PatternOperator::And);
+ }
+ '!' if !between_slashes && pt.accept_unary_operator() => {
+ pt.push_operator(PatternOperator::Not);
+ }
+ '(' if !between_slashes && pt.accept_opening_par() => {
+ pt.open_par();
+ }
+ ')' if !between_slashes && pt.accept_closing_par() => {
+ pt.close_par();
+ }
+ _ => {
+ pt.mutate_or_create_atom(PatternParts::default).push(cur_char);
}
}
- pt.mutate_or_create_atom(PatternParts::default).push(c);
- escaping = false;
+ escape_cur_char = escape_next_char;
+ escape_next_char = false;
}
let mut verb_invocation = None;
if let Some(pos) = invocation_start_pos {
@@ -123,3 +140,199 @@ impl CommandParts {
}
+#[cfg(test)]
+mod test_command_parts {
+
+ use {
+ crate::{
+ command::CommandParts,
+ pattern::*,
+ verb::VerbInvocation,
+ },
+ bet::{BeTree, Token},
+ };
+
+ fn pp(a: &[&str]) -> PatternParts {
+ a.try_into().unwrap()
+ }
+
+ fn check(
+ input: &str,
+ raw_pattern: &str,
+ mut pattern_tokens: Vec<Token<PatternOperator, PatternParts>>,
+ verb_invocation: Option<&str>,
+ ) {
+ let left = CommandParts::from(input);
+ dbg!(&left);
+ let mut pattern = BeTree::new();
+ for token in pattern_tokens.drain(..) {
+ pattern.push(token);
+ }
+ let right = CommandParts {
+ raw_pattern: raw_pattern.to_string(),
+ pattern,
+ verb_invocation: verb_invocation.map(|s| VerbInvocation::from(s)),
+ };
+ dbg!(&right);
+ assert_eq!(left, right);
+ }
+
+ #[test]
+ fn parse_empty() {
+ check(
+ "",
+ "",
+ vec![],
+ None,
+ );
+ }
+ #[test]
+ fn parse_just_semicolon() {
+ check(
+ ":",
+ "",
+ vec![],
+ Some(""),
+ );
+ }
+ #[test]
+ fn parse_no_pattern() {
+ check(
+ " cd /",
+ "",
+ vec![],
+ Some("cd /"),
+ );
+ }
+ #[test]
+ fn parse_pattern_and_invocation() {
+ check(
+ "/r cd /",
+ "/r",
+ vec![
+ Token::Atom(pp(&["", "r"])),
+ ],
+ Some("cd /"),
+ );
+ }
+ #[test]
+ fn parse_pattern_between_slashes() {
+ check(
+ r#"/&"#,
+ r#"/&"#,
+ vec![
+ Token::Atom(pp(&["", "&"])),
+ ],
+ None,
+ );
+ check(
+ r#"/&/&r/a(\w-)+/ rm"#,
+ r#"/&/&r/a(\w-)+/"#,
+ vec![
+ Token::Atom(pp(&["", "&", ""])),
+ Token::Operator(PatternOperator::And),
+ Token::Atom(pp(&["r", r#"a(\w-)+"#, ""])),
+ ],
+ Some("rm"),
+ );
+ }
+ #[test]
+ fn parse_pattern_with_space() {
+ check(
+ r#"a\ b"#,
+ r#"a\ b"#,
+ vec![
+ Token::Atom(pp(&["a b"])),
+ ],
+ None,
+ );
+ }
+ #[test]
+ fn parse_pattern_with_slash() {
+ check(
+ r#"r/a\ b\//i cd /"#,
+ r#"r/a\ b\//i"#,
+ vec![
+ Token::Atom(pp(&["r", "a b/", "i"])),
+ ],
+ Some("cd /"),
+ );
+ }
+ #[test]
+ fn parse_fuzzy_pattern_searching_parenthesis() {
+ check(
+ r#"\("#,
+ r#"\("#,
+ vec![
+ Token::Atom(pp(&["("])),
+ ],
+ None,
+ );
+ }
+ #[test]
+ fn parse_regex_pattern_searching_parenthesis() {
+ check(
+ r#"/\("#,
+ r#"/\("#,
+ vec![
+ Token::Atom(pp(&["", r#"\("#])),
+ ],
+ None,
+ );
+ }
+ #[test]
+ fn parse_composite_pattern() {
+ check(
+ "(/txt$/&!truc)&c/rex",
+ "(/txt$/&!truc)&c/rex",
+ vec![
+ Token::OpeningParenthesis,
+ Token::Atom(pp(&["", "txt$", ""])),
+ Token::Operator(PatternOperator::And),
+ Token::Operator(PatternOperator::Not),
+ Token::Atom(pp(&["truc"])),
+ Token::ClosingParenthesis,
+ Token::Operator(PatternOperator::And),
+ Token::Atom(pp(&["c", "rex"])),
+ ],
+ None
+ );
+ }
+ #[test]
+ fn parse_unclosed_composite_pattern() {
+ check(
+ r#"!/\.json$/&(c/isize/|c/i32:rm"#,
+ r#"!/\.json$/&(c/isize/|c/i32"#,
+ vec![
+ Token::Operator(PatternOperator::Not),
+ Token::Atom(pp(&["", r#"\.json$"#, ""])),
+ Token::Operator(PatternOperator::And),
+ Token::OpeningParenthesis,
+ Token::Atom(pp(&["c", "isize", ""])),
+ Token::Operator(PatternOperator::Or),
+ Token::Atom(pp(&["c", "i32"])),
+ ],
+ Some("rm"),
+ );
+ }
+ #[test]
+ fn issue_592() { // https://github.com/Canop/broot/issues/592
+ check(
+ r#"\t"#,
+ r#"\t"#,
+ vec![
+ Token::Atom(pp(&[r#"\t"#])),
+ ],
+ None,
+ );
+ check(
+ r#"r/@(\.[^.]+)+/ cp .."#,
+ r#"r/@(\.[^.]+)+/"#,
+ vec![
+ Token::Atom(pp(&["r", r#"@(\.[^.]+)+"#, ""])),
+ ],
+ Some("cp .."),
+ );
+ }
+}
+