diff options
author | Jalil David Salamé Messina <jalil.salame@gmail.com> | 2024-05-04 15:26:47 +0200 |
---|---|---|
committer | Ben Wiederhake <BenWiederhake.GitHub@gmx.de> | 2024-05-04 19:55:49 +0200 |
commit | 3c47f2769899668b55a12124837019ca5879c2bd (patch) | |
tree | ecbefef32abd541dd90dfd08b3c7df087aaac35a | |
parent | 3b96ff1d10a4d74e7018c6a84474de75d168136c (diff) |
tr: calculate complement set early
Fixes #6163 and adds a test to verify that a regression is not caused.
Instead of inverting the conditions to check (e.g. delete characters **not** present in set1) invert
set1 when passed the complement flag (`-c`, `-C`, `--complement`). This is done by calculating set1
then "inverting" it by subtracting from the "full" (universe) set (0..=u8::MAX).
This fixes issue 6163 because it was caused by a combination of the `-c` and `-t` flag. `-c` is the
abovementioned complement flag and `-t`/`--truncate-set1` truncates set1 to the length of set2. What
happened in issue 6163 is that `set1={b'Y'}` and `set2={b'Z'}`, when truncated set1 stays the same
and we proceed. The problem is GNU utils does not consider set1 to be `{b'Y'}`, but the complement
of `{b'Y'}`, that is `U \ {b'Y'}={0, 1, ..., b'X', b'Z', ...}`, thus it is truncated to `{0}`.
We can verify this by doing: `printf '\0' | tr -c -t Y Z`, which prints `Z` to stdout as expected.
Additionally, by calculating the complement of set1 we no longer need to consider the complement
flag when doing the translate operation, this allows us to delete a lot of code.
-rw-r--r-- | src/uu/tr/src/operation.rs | 157 | ||||
-rw-r--r-- | src/uu/tr/src/tr.rs | 14 | ||||
-rw-r--r-- | tests/by-util/test_tr.rs | 21 |
3 files changed, 56 insertions, 136 deletions
diff --git a/src/uu/tr/src/operation.rs b/src/uu/tr/src/operation.rs index 77ace6b40..6e1f039a3 100644 --- a/src/uu/tr/src/operation.rs +++ b/src/uu/tr/src/operation.rs @@ -19,6 +19,7 @@ use std::{ error::Error, fmt::{Debug, Display}, io::{BufRead, Write}, + ops::Not, }; use uucore::error::UError; @@ -125,6 +126,7 @@ impl Sequence { pub fn solve_set_characters( set1_str: &[u8], set2_str: &[u8], + complement_flag: bool, truncate_set1_flag: bool, ) -> Result<(Vec<u8>, Vec<u8>), BadSequence> { let set1 = Self::from_str(set1_str)?; @@ -189,6 +191,9 @@ impl Sequence { }, }; let mut set1_solved: Vec<_> = set1.iter().flat_map(Self::flatten).collect(); + if complement_flag { + set1_solved = (0..=u8::MAX).filter(|x| !set1_solved.contains(x)).collect(); + } if truncate_set1_flag { set1_solved.truncate(set2_solved.len()); } @@ -369,56 +374,28 @@ impl<A: SymbolTranslator, B: SymbolTranslator> SymbolTranslator for ChainedSymbo #[derive(Debug)] pub struct DeleteOperation { set: Vec<u8>, - complement_flag: bool, } impl DeleteOperation { - pub fn new(set: Vec<u8>, complement_flag: bool) -> Self { - Self { - set, - complement_flag, - } + pub fn new(set: Vec<u8>) -> Self { + Self { set } } } impl SymbolTranslator for DeleteOperation { fn translate(&mut self, current: u8) -> Option<u8> { - let found = self.set.iter().any(|sequence| *sequence == current); - if self.complement_flag == found { - Some(current) - } else { - None - } - } -} - -pub struct TranslateOperationComplement { - iter: u8, - set2_iter: usize, - set1: Vec<u8>, - set2: Vec<u8>, - translation_map: HashMap<u8, u8>, -} - -impl TranslateOperationComplement { - fn new(set1: Vec<u8>, set2: Vec<u8>) -> Self { - Self { - iter: 0, - set2_iter: 0, - set1, - set2, - translation_map: HashMap::new(), - } + // keep if not present in the set + self.set.contains(¤t).not().then_some(current) } } #[derive(Debug)] -pub struct TranslateOperationStandard { +pub struct TranslateOperation { translation_map: HashMap<u8, u8>, } -impl TranslateOperationStandard { - fn new(set1: Vec<u8>, set2: Vec<u8>) -> Result<Self, BadSequence> { +impl TranslateOperation { + pub fn new(set1: Vec<u8>, set2: Vec<u8>) -> Result<Self, BadSequence> { if let Some(fallback) = set2.last().copied() { Ok(Self { translation_map: set1 @@ -436,86 +413,27 @@ impl TranslateOperationStandard { } } -pub enum TranslateOperation { - Standard(TranslateOperationStandard), - Complement(TranslateOperationComplement), -} - -impl TranslateOperation { - fn next_complement_char(iter: u8, ignore_list: &[u8]) -> (u8, u8) { - (iter..) - .filter(|c| !ignore_list.iter().any(|s| s == c)) - .map(|c| (c + 1, c)) - .next() - .expect("exhausted all possible characters") - } -} - -impl TranslateOperation { - pub fn new(set1: Vec<u8>, set2: Vec<u8>, complement: bool) -> Result<Self, BadSequence> { - if complement { - Ok(Self::Complement(TranslateOperationComplement::new( - set1, set2, - ))) - } else { - Ok(Self::Standard(TranslateOperationStandard::new(set1, set2)?)) - } - } -} - impl SymbolTranslator for TranslateOperation { fn translate(&mut self, current: u8) -> Option<u8> { - match self { - Self::Standard(TranslateOperationStandard { translation_map }) => Some( - translation_map - .iter() - .find_map(|(l, r)| if l.eq(¤t) { Some(*r) } else { None }) - .unwrap_or(current), - ), - Self::Complement(TranslateOperationComplement { - iter, - set2_iter, - set1, - set2, - translation_map, - }) => { - // First, try to see if current char is already mapped - // If so, return the mapped char - // Else, pop from set2 - // If we popped something, map the next complement character to this value - // If set2 is empty, we just map the current char directly to fallback --- to avoid looping unnecessarily - if let Some(c) = set1.iter().find(|c| c.eq(&¤t)) { - Some(*c) - } else { - while translation_map.get(¤t).is_none() { - if let Some(value) = set2.get(*set2_iter) { - let (next_iter, next_key) = Self::next_complement_char(*iter, &*set1); - *iter = next_iter; - *set2_iter = set2_iter.saturating_add(1); - translation_map.insert(next_key, *value); - } else { - translation_map.insert(current, *set2.last().unwrap()); - } - } - Some(*translation_map.get(¤t).unwrap()) - } - } - } + Some( + self.translation_map + .get(¤t) + .copied() + .unwrap_or(current), + ) } } #[derive(Debug, Clone)] pub struct SqueezeOperation { set1: HashSet<u8>, - complement: bool, previous: Option<u8>, } impl SqueezeOperation { - pub fn new(set1: Vec<u8>, complement: bool) -> Self { + pub fn new(set1: Vec<u8>) -> Self { Self { set1: set1.into_iter().collect(), - complement, previous: None, } } @@ -523,35 +441,16 @@ impl SqueezeOperation { impl SymbolTranslator for SqueezeOperation { fn translate(&mut self, current: u8) -> Option<u8> { - if self.complement { - let next = if self.set1.contains(¤t) { - Some(current) - } else { - match self.previous { - Some(v) => { - if v.eq(¤t) { - None - } else { - Some(current) - } - } - None => Some(current), - } - }; - self.previous = Some(current); - next + let next = if self.set1.contains(¤t) { + match self.previous { + Some(v) if v == current => None, + _ => Some(current), + } } else { - let next = if self.set1.contains(¤t) { - match self.previous { - Some(v) if v == current => None, - _ => Some(current), - } - } else { - Some(current) - }; - self.previous = Some(current); - next - } + Some(current) + }; + self.previous = Some(current); + next } } diff --git a/src/uu/tr/src/tr.rs b/src/uu/tr/src/tr.rs index 6f78f13db..4a182e555 100644 --- a/src/uu/tr/src/tr.rs +++ b/src/uu/tr/src/tr.rs @@ -121,26 +121,26 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { // '*_op' are the operations that need to be applied, in order. if delete_flag { if squeeze_flag { - let delete_op = DeleteOperation::new(set1, complement_flag); - let squeeze_op = SqueezeOperation::new(set2, false); + let delete_op = DeleteOperation::new(set1); + let squeeze_op = SqueezeOperation::new(set2); let op = delete_op.chain(squeeze_op); translate_input(&mut locked_stdin, &mut buffered_stdout, op); } else { - let op = DeleteOperation::new(set1, complement_flag); + let op = DeleteOperation::new(set1); translate_input(&mut locked_stdin, &mut buffered_stdout, op); } } else if squeeze_flag { if sets_len < 2 { - let op = SqueezeOperation::new(set1, complement_flag); + let op = SqueezeOperation::new(set1); translate_input(&mut locked_stdin, &mut buffered_stdout, op); } else { - let translate_op = TranslateOperation::new(set1, set2.clone(), complement_flag)?; - let squeeze_op = SqueezeOperation::new(set2, false); + let translate_op = TranslateOperation::new(set1, set2.clone())?; + let squeeze_op = SqueezeOperation::new(set2); let op = translate_op.chain(squeeze_op); translate_input(&mut locked_stdin, &mut buffered_stdout, op); } } else { - let op = TranslateOperation::new(set1, set2, complement_flag)?; + let op = TranslateOperation::new(set1, set2)?; translate_input(&mut locked_stdin, &mut buffered_stdout, op); } Ok(()) diff --git a/tests/by-util/test_tr.rs b/tests/by-util/test_tr.rs index c8ed0a7fb..77e69112e 100644 --- a/tests/by-util/test_tr.rs +++ b/tests/by-util/test_tr.rs @@ -1313,3 +1313,24 @@ fn check_regression_class_blank() { .no_stderr() .stdout_only("a12b"); } + +// Check regression found in https://github.com/uutils/coreutils/issues/6163 +#[test] +fn check_regression_issue_6163_no_match() { + new_ucmd!() + .args(&["-c", "-t", "Y", "Z"]) + .pipe_in("X\n") + .succeeds() + .no_stderr() + .stdout_only("X\n"); +} + +#[test] +fn check_regression_issue_6163_match() { + new_ucmd!() + .args(&["-c", "-t", "Y", "Z"]) + .pipe_in("\0\n") + .succeeds() + .no_stderr() + .stdout_only("Z\n"); +} |