summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJalil David Salamé Messina <jalil.salame@gmail.com>2024-05-04 15:26:47 +0200
committerBen Wiederhake <BenWiederhake.GitHub@gmx.de>2024-05-04 19:55:49 +0200
commit3c47f2769899668b55a12124837019ca5879c2bd (patch)
treeecbefef32abd541dd90dfd08b3c7df087aaac35a
parent3b96ff1d10a4d74e7018c6a84474de75d168136c (diff)
tr: calculate complement set early
Fixes #6163 and adds a test to verify that a regression is not caused. Instead of inverting the conditions to check (e.g. delete characters **not** present in set1) invert set1 when passed the complement flag (`-c`, `-C`, `--complement`). This is done by calculating set1 then "inverting" it by subtracting from the "full" (universe) set (0..=u8::MAX). This fixes issue 6163 because it was caused by a combination of the `-c` and `-t` flag. `-c` is the abovementioned complement flag and `-t`/`--truncate-set1` truncates set1 to the length of set2. What happened in issue 6163 is that `set1={b'Y'}` and `set2={b'Z'}`, when truncated set1 stays the same and we proceed. The problem is GNU utils does not consider set1 to be `{b'Y'}`, but the complement of `{b'Y'}`, that is `U \ {b'Y'}={0, 1, ..., b'X', b'Z', ...}`, thus it is truncated to `{0}`. We can verify this by doing: `printf '\0' | tr -c -t Y Z`, which prints `Z` to stdout as expected. Additionally, by calculating the complement of set1 we no longer need to consider the complement flag when doing the translate operation, this allows us to delete a lot of code.
-rw-r--r--src/uu/tr/src/operation.rs157
-rw-r--r--src/uu/tr/src/tr.rs14
-rw-r--r--tests/by-util/test_tr.rs21
3 files changed, 56 insertions, 136 deletions
diff --git a/src/uu/tr/src/operation.rs b/src/uu/tr/src/operation.rs
index 77ace6b40..6e1f039a3 100644
--- a/src/uu/tr/src/operation.rs
+++ b/src/uu/tr/src/operation.rs
@@ -19,6 +19,7 @@ use std::{
error::Error,
fmt::{Debug, Display},
io::{BufRead, Write},
+ ops::Not,
};
use uucore::error::UError;
@@ -125,6 +126,7 @@ impl Sequence {
pub fn solve_set_characters(
set1_str: &[u8],
set2_str: &[u8],
+ complement_flag: bool,
truncate_set1_flag: bool,
) -> Result<(Vec<u8>, Vec<u8>), BadSequence> {
let set1 = Self::from_str(set1_str)?;
@@ -189,6 +191,9 @@ impl Sequence {
},
};
let mut set1_solved: Vec<_> = set1.iter().flat_map(Self::flatten).collect();
+ if complement_flag {
+ set1_solved = (0..=u8::MAX).filter(|x| !set1_solved.contains(x)).collect();
+ }
if truncate_set1_flag {
set1_solved.truncate(set2_solved.len());
}
@@ -369,56 +374,28 @@ impl<A: SymbolTranslator, B: SymbolTranslator> SymbolTranslator for ChainedSymbo
#[derive(Debug)]
pub struct DeleteOperation {
set: Vec<u8>,
- complement_flag: bool,
}
impl DeleteOperation {
- pub fn new(set: Vec<u8>, complement_flag: bool) -> Self {
- Self {
- set,
- complement_flag,
- }
+ pub fn new(set: Vec<u8>) -> Self {
+ Self { set }
}
}
impl SymbolTranslator for DeleteOperation {
fn translate(&mut self, current: u8) -> Option<u8> {
- let found = self.set.iter().any(|sequence| *sequence == current);
- if self.complement_flag == found {
- Some(current)
- } else {
- None
- }
- }
-}
-
-pub struct TranslateOperationComplement {
- iter: u8,
- set2_iter: usize,
- set1: Vec<u8>,
- set2: Vec<u8>,
- translation_map: HashMap<u8, u8>,
-}
-
-impl TranslateOperationComplement {
- fn new(set1: Vec<u8>, set2: Vec<u8>) -> Self {
- Self {
- iter: 0,
- set2_iter: 0,
- set1,
- set2,
- translation_map: HashMap::new(),
- }
+ // keep if not present in the set
+ self.set.contains(&current).not().then_some(current)
}
}
#[derive(Debug)]
-pub struct TranslateOperationStandard {
+pub struct TranslateOperation {
translation_map: HashMap<u8, u8>,
}
-impl TranslateOperationStandard {
- fn new(set1: Vec<u8>, set2: Vec<u8>) -> Result<Self, BadSequence> {
+impl TranslateOperation {
+ pub fn new(set1: Vec<u8>, set2: Vec<u8>) -> Result<Self, BadSequence> {
if let Some(fallback) = set2.last().copied() {
Ok(Self {
translation_map: set1
@@ -436,86 +413,27 @@ impl TranslateOperationStandard {
}
}
-pub enum TranslateOperation {
- Standard(TranslateOperationStandard),
- Complement(TranslateOperationComplement),
-}
-
-impl TranslateOperation {
- fn next_complement_char(iter: u8, ignore_list: &[u8]) -> (u8, u8) {
- (iter..)
- .filter(|c| !ignore_list.iter().any(|s| s == c))
- .map(|c| (c + 1, c))
- .next()
- .expect("exhausted all possible characters")
- }
-}
-
-impl TranslateOperation {
- pub fn new(set1: Vec<u8>, set2: Vec<u8>, complement: bool) -> Result<Self, BadSequence> {
- if complement {
- Ok(Self::Complement(TranslateOperationComplement::new(
- set1, set2,
- )))
- } else {
- Ok(Self::Standard(TranslateOperationStandard::new(set1, set2)?))
- }
- }
-}
-
impl SymbolTranslator for TranslateOperation {
fn translate(&mut self, current: u8) -> Option<u8> {
- match self {
- Self::Standard(TranslateOperationStandard { translation_map }) => Some(
- translation_map
- .iter()
- .find_map(|(l, r)| if l.eq(&current) { Some(*r) } else { None })
- .unwrap_or(current),
- ),
- Self::Complement(TranslateOperationComplement {
- iter,
- set2_iter,
- set1,
- set2,
- translation_map,
- }) => {
- // First, try to see if current char is already mapped
- // If so, return the mapped char
- // Else, pop from set2
- // If we popped something, map the next complement character to this value
- // If set2 is empty, we just map the current char directly to fallback --- to avoid looping unnecessarily
- if let Some(c) = set1.iter().find(|c| c.eq(&&current)) {
- Some(*c)
- } else {
- while translation_map.get(&current).is_none() {
- if let Some(value) = set2.get(*set2_iter) {
- let (next_iter, next_key) = Self::next_complement_char(*iter, &*set1);
- *iter = next_iter;
- *set2_iter = set2_iter.saturating_add(1);
- translation_map.insert(next_key, *value);
- } else {
- translation_map.insert(current, *set2.last().unwrap());
- }
- }
- Some(*translation_map.get(&current).unwrap())
- }
- }
- }
+ Some(
+ self.translation_map
+ .get(&current)
+ .copied()
+ .unwrap_or(current),
+ )
}
}
#[derive(Debug, Clone)]
pub struct SqueezeOperation {
set1: HashSet<u8>,
- complement: bool,
previous: Option<u8>,
}
impl SqueezeOperation {
- pub fn new(set1: Vec<u8>, complement: bool) -> Self {
+ pub fn new(set1: Vec<u8>) -> Self {
Self {
set1: set1.into_iter().collect(),
- complement,
previous: None,
}
}
@@ -523,35 +441,16 @@ impl SqueezeOperation {
impl SymbolTranslator for SqueezeOperation {
fn translate(&mut self, current: u8) -> Option<u8> {
- if self.complement {
- let next = if self.set1.contains(&current) {
- Some(current)
- } else {
- match self.previous {
- Some(v) => {
- if v.eq(&current) {
- None
- } else {
- Some(current)
- }
- }
- None => Some(current),
- }
- };
- self.previous = Some(current);
- next
+ let next = if self.set1.contains(&current) {
+ match self.previous {
+ Some(v) if v == current => None,
+ _ => Some(current),
+ }
} else {
- let next = if self.set1.contains(&current) {
- match self.previous {
- Some(v) if v == current => None,
- _ => Some(current),
- }
- } else {
- Some(current)
- };
- self.previous = Some(current);
- next
- }
+ Some(current)
+ };
+ self.previous = Some(current);
+ next
}
}
diff --git a/src/uu/tr/src/tr.rs b/src/uu/tr/src/tr.rs
index 6f78f13db..4a182e555 100644
--- a/src/uu/tr/src/tr.rs
+++ b/src/uu/tr/src/tr.rs
@@ -121,26 +121,26 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
// '*_op' are the operations that need to be applied, in order.
if delete_flag {
if squeeze_flag {
- let delete_op = DeleteOperation::new(set1, complement_flag);
- let squeeze_op = SqueezeOperation::new(set2, false);
+ let delete_op = DeleteOperation::new(set1);
+ let squeeze_op = SqueezeOperation::new(set2);
let op = delete_op.chain(squeeze_op);
translate_input(&mut locked_stdin, &mut buffered_stdout, op);
} else {
- let op = DeleteOperation::new(set1, complement_flag);
+ let op = DeleteOperation::new(set1);
translate_input(&mut locked_stdin, &mut buffered_stdout, op);
}
} else if squeeze_flag {
if sets_len < 2 {
- let op = SqueezeOperation::new(set1, complement_flag);
+ let op = SqueezeOperation::new(set1);
translate_input(&mut locked_stdin, &mut buffered_stdout, op);
} else {
- let translate_op = TranslateOperation::new(set1, set2.clone(), complement_flag)?;
- let squeeze_op = SqueezeOperation::new(set2, false);
+ let translate_op = TranslateOperation::new(set1, set2.clone())?;
+ let squeeze_op = SqueezeOperation::new(set2);
let op = translate_op.chain(squeeze_op);
translate_input(&mut locked_stdin, &mut buffered_stdout, op);
}
} else {
- let op = TranslateOperation::new(set1, set2, complement_flag)?;
+ let op = TranslateOperation::new(set1, set2)?;
translate_input(&mut locked_stdin, &mut buffered_stdout, op);
}
Ok(())
diff --git a/tests/by-util/test_tr.rs b/tests/by-util/test_tr.rs
index c8ed0a7fb..77e69112e 100644
--- a/tests/by-util/test_tr.rs
+++ b/tests/by-util/test_tr.rs
@@ -1313,3 +1313,24 @@ fn check_regression_class_blank() {
.no_stderr()
.stdout_only("a12b");
}
+
+// Check regression found in https://github.com/uutils/coreutils/issues/6163
+#[test]
+fn check_regression_issue_6163_no_match() {
+ new_ucmd!()
+ .args(&["-c", "-t", "Y", "Z"])
+ .pipe_in("X\n")
+ .succeeds()
+ .no_stderr()
+ .stdout_only("X\n");
+}
+
+#[test]
+fn check_regression_issue_6163_match() {
+ new_ucmd!()
+ .args(&["-c", "-t", "Y", "Z"])
+ .pipe_in("\0\n")
+ .succeeds()
+ .no_stderr()
+ .stdout_only("Z\n");
+}