diff options
-rw-r--r-- | src/choice.rs | 282 | ||||
-rw-r--r-- | src/config.rs | 12 | ||||
-rw-r--r-- | src/main.rs | 5 | ||||
-rw-r--r-- | src/opt.rs | 24 | ||||
-rw-r--r-- | src/writeable.rs | 20 | ||||
-rw-r--r-- | src/writer.rs | 31 | ||||
-rw-r--r-- | test/choose_0_3_c.txt | 6 | ||||
-rwxr-xr-x | test/e2e_test.sh | 1 |
8 files changed, 270 insertions, 111 deletions
diff --git a/src/choice.rs b/src/choice.rs index 2f89ff0..bf193f6 100644 --- a/src/choice.rs +++ b/src/choice.rs @@ -1,7 +1,10 @@ use std::convert::TryInto; +use std::io::{BufWriter, Write}; +use std::iter::FromIterator; use crate::config::Config; -use crate::io::{BufWriter, Write}; +use crate::writeable::Writeable; +use crate::writer::WriteReceiver; #[derive(Debug)] pub struct Choice { @@ -29,134 +32,149 @@ impl Choice { config: &Config, handle: &mut BufWriter<W>, ) { - let mut line_iter = config - .separator - .split(line) - .filter(|s| !s.is_empty() || config.opt.non_greedy); - - if self.is_reverse_range() && !self.has_negative_index() { - if self.end > 0 { - line_iter.nth((self.end - 1).try_into().unwrap()); - } + if config.opt.character_wise { + let line_chars = line[0..line.len() - 1].chars(); + self.print_choice_generic(line_chars, config, handle); + } else { + let line_iter = config + .separator + .split(line) + .filter(|s| !s.is_empty() || config.opt.non_greedy); + self.print_choice_generic(line_iter, config, handle); + } + } - let mut stack = Vec::new(); - for i in 0..=(self.start - self.end) { - match line_iter.next() { - Some(s) => stack.push(s), - None => break, - } + pub fn is_reverse_range(&self) -> bool { + self.reversed + } - if self.start <= self.end + i { - break; - } - } + pub fn has_negative_index(&self) -> bool { + self.negative_index + } - let mut iter = stack.iter().rev().peekable(); - loop { - match iter.next() { - Some(s) => { - Choice::write_bytes(s.as_bytes(), config, handle, iter.peek().is_some()) - } - None => break, - } - } + fn print_choice_generic<W, T, I>(&self, mut iter: I, config: &Config, handle: &mut BufWriter<W>) + where + W: Write, + T: Writeable + Copy, + I: Iterator<Item = T>, + { + if self.is_reverse_range() && !self.has_negative_index() { + self.print_choice_reverse(iter, config, handle); } else if self.has_negative_index() { - let vec = line_iter.collect::<Vec<&str>>(); - self.print_negative_choice(vec, config, handle); + self.print_choice_negative(iter, config, handle); } else { if self.start > 0 { - line_iter.nth((self.start - 1).try_into().unwrap()); - } - - let mut peek_line_iter = line_iter.peekable(); - for i in self.start..=self.end { - match peek_line_iter.next() { - Some(s) => Choice::write_bytes( - s.as_bytes(), - config, - handle, - peek_line_iter.peek().is_some() && i != self.end, - ), - None => break, - }; + iter.nth((self.start - 1).try_into().unwrap()); } + let range = self.end.checked_sub(self.start).unwrap(); + Choice::print_choice_loop_max_items(iter, config, handle, range); } } - fn print_negative_choice<W: Write>( - &self, - vec: Vec<&str>, + fn print_choice_loop<W, T, I>(iter: I, config: &Config, handle: &mut BufWriter<W>) + where + W: Write, + T: Writeable + Copy, + I: Iterator<Item = T>, + { + Choice::print_choice_loop_max_items(iter, config, handle, isize::max_value()); + } + + fn print_choice_loop_max_items<W, T, I>( + iter: I, config: &Config, handle: &mut BufWriter<W>, - ) { - let start = if self.start >= 0 { - self.start.try_into().unwrap() - } else { - vec.len() - .checked_sub(self.start.abs().try_into().unwrap()) - .unwrap() - }; + max_items: isize, + ) where + W: Write, + T: Writeable + Copy, + I: Iterator<Item = T>, + { + let mut peek_iter = iter.peekable(); + for i in 0..=max_items { + match peek_iter.next() { + Some(s) => { + handle.write_choice(s, config, peek_iter.peek().is_some() && i != max_items); + } + None => break, + }; + } + } - let end = if self.end >= 0 { - self.end.try_into().unwrap() - } else { - vec.len() - .checked_sub(self.end.abs().try_into().unwrap()) - .unwrap() - }; + fn print_choice_negative<W, T, I>(&self, iter: I, config: &Config, handle: &mut BufWriter<W>) + where + W: Write, + T: Writeable + Copy, + I: Iterator<Item = T>, + { + let vec = Vec::from_iter(iter); + let (start, end) = self.get_negative_start_end(&vec); if end > start { for word in vec[start..std::cmp::min(end, vec.len() - 1)].iter() { - Choice::write_bytes(word.as_bytes(), config, handle, true); + handle.write_choice(*word, config, true); } - Choice::write_bytes( - vec[std::cmp::min(end, vec.len() - 1)].as_bytes(), - config, - handle, - false, - ); + handle.write_choice(vec[std::cmp::min(end, vec.len() - 1)], config, false); } else if self.start < 0 { for word in vec[end + 1..=std::cmp::min(start, vec.len() - 1)] .iter() .rev() { - Choice::write_bytes(word.as_bytes(), config, handle, true); + handle.write_choice(*word, config, true); } - Choice::write_bytes(vec[end].as_bytes(), config, handle, false); + handle.write_choice(vec[end], config, false); } } - fn write_bytes<WriterType: Write>( - b: &[u8], - config: &Config, - handle: &mut BufWriter<WriterType>, - print_separator: bool, - ) { - let num_bytes_written = match handle.write(b) { - Ok(x) => x, - Err(e) => { - eprintln!("Failed to write to output: {}", e); - 0 + fn print_choice_reverse<W, T, I>(&self, mut iter: I, config: &Config, handle: &mut BufWriter<W>) + where + W: Write, + T: Writeable + Copy, + I: Iterator<Item = T>, + { + if self.end > 0 { + iter.nth((self.end - 1).try_into().unwrap()); + } + + let mut stack = Vec::new(); + for i in 0..=(self.start - self.end) { + match iter.next() { + Some(s) => stack.push(s), + None => break, } - }; - if num_bytes_written > 0 && print_separator { - Choice::write_separator(config, handle); - }; - } - pub fn write_separator<W: Write>(config: &Config, handle: &mut BufWriter<W>) { - match handle.write(&config.output_separator) { - Ok(_) => (), - Err(e) => eprintln!("Failed to write to output: {}", e), + if self.start <= self.end + i { + break; + } } - } - pub fn is_reverse_range(&self) -> bool { - self.reversed + let mut peek_iter = stack.iter().rev().peekable(); + loop { + match peek_iter.next() { + Some(s) => handle.write_choice(*s, config, peek_iter.peek().is_some()), + None => break, + } + } } - pub fn has_negative_index(&self) -> bool { - self.negative_index + fn get_negative_start_end<T>(&self, vec: &Vec<T>) -> (usize, usize) { + let start = if self.start >= 0 { + self.start.try_into().unwrap() + } else { + vec.len() + .checked_sub(self.start.abs().try_into().unwrap()) + .unwrap() + }; + + let end = if self.end >= 0 { + self.end.try_into().unwrap() + } else { + vec.len() + .checked_sub(self.end.abs().try_into().unwrap()) + .unwrap() + }; + + return (start, end); } } @@ -662,6 +680,76 @@ mod tests { config.opt.choice[0].print_choice(&String::from("a b c d"), &config, &mut handle); assert_eq!(String::from("abc"), MockStdout::str_from_buf_writer(handle)); } + + #[test] + fn print_0_to_2_character_wise() { + let config = Config::from_iter(vec!["choose", "0:2", "-c"]); + let mut handle = BufWriter::new(MockStdout::new()); + config.opt.choice[0].print_choice(&String::from("abcd\n"), &config, &mut handle); + assert_eq!(String::from("abc"), MockStdout::str_from_buf_writer(handle)); + } + + #[test] + fn print_2_to_end_character_wise() { + let config = Config::from_iter(vec!["choose", "2:", "-c"]); + let mut handle = BufWriter::new(MockStdout::new()); + config.opt.choice[0].print_choice(&String::from("abcd\n"), &config, &mut handle); + assert_eq!(String::from("cd"), MockStdout::str_from_buf_writer(handle)); + } + + #[test] + fn print_start_to_2_character_wise() { + let config = Config::from_iter(vec!["choose", ":2", "-c"]); + let mut handle = BufWriter::new(MockStdout::new()); + config.opt.choice[0].print_choice(&String::from("abcd\n"), &config, &mut handle); + assert_eq!(String::from("abc"), MockStdout::str_from_buf_writer(handle)); + } + + #[test] + fn print_0_to_2_character_wise_exclusive() { + let config = Config::from_iter(vec!["choose", "0:2", "-c", "-x"]); + let mut handle = BufWriter::new(MockStdout::new()); + config.opt.choice[0].print_choice(&String::from("abcd\n"), &config, &mut handle); + assert_eq!(String::from("ab"), MockStdout::str_from_buf_writer(handle)); + } + + #[test] + fn print_0_to_2_character_wise_with_output_delimeter() { + let config = Config::from_iter(vec!["choose", "0:2", "-c", "-o", ":"]); + let mut handle = BufWriter::new(MockStdout::new()); + config.opt.choice[0].print_choice(&String::from("abcd\n"), &config, &mut handle); + assert_eq!( + String::from("a:b:c"), + MockStdout::str_from_buf_writer(handle) + ); + } + + #[test] + fn print_after_end_character_wise() { + let config = Config::from_iter(vec!["choose", "0:9", "-c"]); + let mut handle = BufWriter::new(MockStdout::new()); + config.opt.choice[0].print_choice(&String::from("abcd\n"), &config, &mut handle); + assert_eq!( + String::from("abcd"), + MockStdout::str_from_buf_writer(handle) + ); + } + + #[test] + fn print_2_to_0_character_wise() { + let config = Config::from_iter(vec!["choose", "2:0", "-c"]); + let mut handle = BufWriter::new(MockStdout::new()); + config.opt.choice[0].print_choice(&String::from("abcd\n"), &config, &mut handle); + assert_eq!(String::from("cba"), MockStdout::str_from_buf_writer(handle)); + } + + #[test] + fn print_neg_2_to_end_character_wise() { + let config = Config::from_iter(vec!["choose", "-2:", "-c"]); + let mut handle = BufWriter::new(MockStdout::new()); + config.opt.choice[0].print_choice(&String::from("abcd\n"), &config, &mut handle); + assert_eq!(String::from("cd"), MockStdout::str_from_buf_writer(handle)); + } } mod is_reverse_range_tests { diff --git a/src/config.rs b/src/config.rs index d87a602..300408b 100644 --- a/src/config.rs +++ b/src/config.rs @@ -51,9 +51,15 @@ impl Config { } }; - let output_separator = match opt.output_field_separator.clone() { - Some(s) => s.into_boxed_str().into_boxed_bytes(), - None => Box::new([0x20; 1]), + let output_separator = match opt.character_wise { + false => match opt.output_field_separator.clone() { + Some(s) => s.into_boxed_str().into_boxed_bytes(), + None => Box::new([0x20; 1]), + }, + true => match opt.output_field_separator.clone() { + Some(s) => s.into_boxed_str().into_boxed_bytes(), + None => Box::new([]), + }, }; Config { diff --git a/src/main.rs b/src/main.rs index a22b553..6872f99 100644 --- a/src/main.rs +++ b/src/main.rs @@ -10,8 +10,11 @@ mod choice; mod config; mod opt; mod reader; +mod writeable; +mod writer; use config::Config; use opt::Opt; +use writer::WriteReceiver; fn main() { let opt = Opt::from_args(); @@ -43,7 +46,7 @@ fn main() { while let Some(choice) = choice_iter.next() { choice.print_choice(&l, &config, &mut handle); if choice_iter.peek().is_some() { - choice::Choice::write_separator(&config, &mut handle); + handle.write_separator(&config); } } match handle.write(b"\n") { @@ -8,30 +8,34 @@ use crate::config::Config; #[structopt(name = "choose", about = "`choose` sections from each line of files")] #[structopt(setting = structopt::clap::AppSettings::AllowLeadingHyphen)] pub struct Opt { - /// Specify field separator other than whitespace, using Rust `regex` syntax + /// Choose fields by character number #[structopt(short, long)] - pub field_separator: Option<String>, - - /// Specify output field separator - #[structopt(short, long, parse(from_str = Config::parse_output_field_separator))] - pub output_field_separator: Option<String>, + pub character_wise: bool, - /// Use non-greedy field separators + /// Activate debug mode #[structopt(short, long)] - pub non_greedy: bool, + pub debug: bool, /// Use exclusive ranges, similar to array indexing in many programming languages #[structopt(short = "x", long)] pub exclusive: bool, - /// Activate debug mode + /// Specify field separator other than whitespace, using Rust `regex` syntax #[structopt(short, long)] - pub debug: bool, + pub field_separator: Option<String>, /// Input file #[structopt(short, long, parse(from_os_str))] pub input: Option<PathBuf>, + /// Use non-greedy field separators + #[structopt(short, long)] + pub non_greedy: bool, + + /// Specify output field separator + #[structopt(short, long, parse(from_str = Config::parse_output_field_separator))] + pub output_field_separator: Option<String>, + /// Fields to print. Either x, x:, :y, or x:y, where x and y are integers, colons indicate a /// range, and an empty field on either side of the colon continues to the beginning or end of /// the line. diff --git a/src/writeable.rs b/src/writeable.rs new file mode 100644 index 0000000..8dadde3 --- /dev/null +++ b/src/writeable.rs @@ -0,0 +1,20 @@ +pub trait Writeable { + fn to_byte_buf(&self) -> Box<[u8]>; +} + +impl Writeable for &str { + fn to_byte_buf(&self) -> Box<[u8]> { + return Box::from(self.as_bytes()); + } +} + +impl Writeable for char { + fn to_byte_buf(&self) -> Box<[u8]> { + let mut buf = [0; 4]; + return self + .encode_utf8(&mut buf) + .to_owned() + .into_boxed_str() + .into_boxed_bytes(); + } +} diff --git a/src/writer.rs b/src/writer.rs new file mode 100644 index 0000000..00a93a1 --- /dev/null +++ b/src/writer.rs @@ -0,0 +1,31 @@ +use std::io::{BufWriter, Write}; + +use crate::config::Config; +use crate::writeable::Writeable; + +pub trait WriteReceiver { + fn write_choice<Wa: Writeable>(&mut self, b: Wa, config: &Config, print_separator: bool); + fn write_separator(&mut self, config: &Config); +} + +impl<W: Write> WriteReceiver for BufWriter<W> { + fn write_choice<Wa: Writeable>(&mut self, b: Wa, config: &Config, print_separator: bool) { + let num_bytes_written = match self.write(&b.to_byte_buf()) { + Ok(x) => x, + Err(e) => { + eprintln!("Failed to write to output: {}", e); + 0 + } + }; + if num_bytes_written > 0 && print_separator { + self.write_separator(config); + }; + } + + fn write_separator(&mut self, config: &Config) { + match self.write(&config.output_separator) { + Ok(_) => (), + Err(e) => eprintln!("Failed to write to output: {}", e), + } + } +} diff --git a/test/choose_0_3_c.txt b/test/choose_0_3_c.txt new file mode 100644 index 0000000..a7aeeb0 --- /dev/null +++ b/test/choose_0_3_c.txt @@ -0,0 +1,6 @@ +em i +idid +trud +s au +iat +pa q diff --git a/test/e2e_test.sh b/test/e2e_test.sh index 69ddd54..5fb22e9 100755 --- a/test/e2e_test.sh +++ b/test/e2e_test.sh @@ -17,6 +17,7 @@ diff -w <(cargo run -- -4:-2 -i ${test_dir}/lorem.txt 2>/dev/null) <(cat "${test diff -w <(cargo run -- 1:3 -o % -i ${test_dir}/lorem.txt 2>/dev/null) <(cat "${test_dir}/choose_1:3of%.txt") diff -w <(cargo run -- 1 3 -o % -i ${test_dir}/lorem.txt 2>/dev/null) <(cat "${test_dir}/choose_1_3of%.txt") diff -w <(cargo run -- 1 3 -o '' -i ${test_dir}/lorem.txt 2>/dev/null) <(cat "${test_dir}/choose_1_3of.txt") +diff -w <(cargo run -- 3:6 -c -i ${test_dir}/lorem.txt 2>/dev/null) <(cat "${test_dir}/choose_0_3_c.txt") # add tests for different delimiters # add tests using piping |