diff options
author | Ryan Geary <rtgnj42@gmail.com> | 2020-05-11 16:41:23 -0400 |
---|---|---|
committer | Ryan Geary <rtgnj42@gmail.com> | 2020-06-02 14:41:30 -0400 |
commit | a66621380b370c011de01a6d22dc2ebc638d0019 (patch) | |
tree | 77f3740f2a4e876f2ca0bfeddac72a7418fbfd5e | |
parent | 5f7724967265f938eed28f6180d972ba76566cfa (diff) |
Add character-wise `choose`ing
Alphabetize structopt options
Add character-wise tests
Add character-wise switch
Add print-after-end test
Add empty default separator for char-wise mode
Add char-wise forward and negative printing
Add pure reverse printing
Change to char_wise to user `char` instead of `u8`
Adds support for unicode (read: emojis)
Adds a newline char to end of each char-wise test because that's how it
is
Add writing traits for better code structure
Merge repetitive codepaths with generics
Unify print_choice_* funtion names
Reorder functions in choice module
Rename variable to avoid name confusion
Make default case for loop more readable
Abstract default case print loop
Add e2e test
Move vec create to print_choice_negative
-rw-r--r-- | src/choice.rs | 282 | ||||
-rw-r--r-- | src/config.rs | 12 | ||||
-rw-r--r-- | src/main.rs | 5 | ||||
-rw-r--r-- | src/opt.rs | 24 | ||||
-rw-r--r-- | src/writeable.rs | 20 | ||||
-rw-r--r-- | src/writer.rs | 31 | ||||
-rw-r--r-- | test/choose_0_3_c.txt | 6 | ||||
-rwxr-xr-x | test/e2e_test.sh | 1 |
8 files changed, 270 insertions, 111 deletions
diff --git a/src/choice.rs b/src/choice.rs index 2f89ff0..bf193f6 100644 --- a/src/choice.rs +++ b/src/choice.rs @@ -1,7 +1,10 @@ use std::convert::TryInto; +use std::io::{BufWriter, Write}; +use std::iter::FromIterator; use crate::config::Config; -use crate::io::{BufWriter, Write}; +use crate::writeable::Writeable; +use crate::writer::WriteReceiver; #[derive(Debug)] pub struct Choice { @@ -29,134 +32,149 @@ impl Choice { config: &Config, handle: &mut BufWriter<W>, ) { - let mut line_iter = config - .separator - .split(line) - .filter(|s| !s.is_empty() || config.opt.non_greedy); - - if self.is_reverse_range() && !self.has_negative_index() { - if self.end > 0 { - line_iter.nth((self.end - 1).try_into().unwrap()); - } + if config.opt.character_wise { + let line_chars = line[0..line.len() - 1].chars(); + self.print_choice_generic(line_chars, config, handle); + } else { + let line_iter = config + .separator + .split(line) + .filter(|s| !s.is_empty() || config.opt.non_greedy); + self.print_choice_generic(line_iter, config, handle); + } + } - let mut stack = Vec::new(); - for i in 0..=(self.start - self.end) { - match line_iter.next() { - Some(s) => stack.push(s), - None => break, - } + pub fn is_reverse_range(&self) -> bool { + self.reversed + } - if self.start <= self.end + i { - break; - } - } + pub fn has_negative_index(&self) -> bool { + self.negative_index + } - let mut iter = stack.iter().rev().peekable(); - loop { - match iter.next() { - Some(s) => { - Choice::write_bytes(s.as_bytes(), config, handle, iter.peek().is_some()) - } - None => break, - } - } + fn print_choice_generic<W, T, I>(&self, mut iter: I, config: &Config, handle: &mut BufWriter<W>) + where + W: Write, + T: Writeable + Copy, + I: Iterator<Item = T>, + { + if self.is_reverse_range() && !self.has_negative_index() { + self.print_choice_reverse(iter, config, handle); } else if self.has_negative_index() { - let vec = line_iter.collect::<Vec<&str>>(); - self.print_negative_choice(vec, config, handle); + self.print_choice_negative(iter, config, handle); } else { if self.start > 0 { - line_iter.nth((self.start - 1).try_into().unwrap()); - } - - let mut peek_line_iter = line_iter.peekable(); - for i in self.start..=self.end { - match peek_line_iter.next() { - Some(s) => Choice::write_bytes( - s.as_bytes(), - config, - handle, - peek_line_iter.peek().is_some() && i != self.end, - ), - None => break, - }; + iter.nth((self.start - 1).try_into().unwrap()); } + let range = self.end.checked_sub(self.start).unwrap(); + Choice::print_choice_loop_max_items(iter, config, handle, range); } } - fn print_negative_choice<W: Write>( - &self, - vec: Vec<&str>, + fn print_choice_loop<W, T, I>(iter: I, config: &Config, handle: &mut BufWriter<W>) + where + W: Write, + T: Writeable + Copy, + I: Iterator<Item = T>, + { + Choice::print_choice_loop_max_items(iter, config, handle, isize::max_value()); + } + + fn print_choice_loop_max_items<W, T, I>( + iter: I, config: &Config, handle: &mut BufWriter<W>, - ) { - let start = if self.start >= 0 { - self.start.try_into().unwrap() - } else { - vec.len() - .checked_sub(self.start.abs().try_into().unwrap()) - .unwrap() - }; + max_items: isize, + ) where + W: Write, + T: Writeable + Copy, + I: Iterator<Item = T>, + { + let mut peek_iter = iter.peekable(); + for i in 0..=max_items { + match peek_iter.next() { + Some(s) => { + handle.write_choice(s, config, peek_iter.peek().is_some() && i != max_items); + } + None => break, + }; + } + } - let end = if self.end >= 0 { - self.end.try_into().unwrap() - } else { - vec.len() - .checked_sub(self.end.abs().try_into().unwrap()) - .unwrap() - }; + fn print_choice_negative<W, T, I>(&self, iter: I, config: &Config, handle: &mut BufWriter<W>) + where + W: Write, + T: Writeable + Copy, + I: Iterator<Item = T>, + { + let vec = Vec::from_iter(iter); + let (start, end) = self.get_negative_start_end(&vec); if end > start { for word in vec[start..std::cmp::min(end, vec.len() - 1)].iter() { - Choice::write_bytes(word.as_bytes(), config, handle, true); + handle.write_choice(*word, config, true); } - Choice::write_bytes( - vec[std::cmp::min(end, vec.len() - 1)].as_bytes(), - config, - handle, - false, - ); + handle.write_choice(vec[std::cmp::min(end, vec.len() - 1)], config, false); } else if self.start < 0 { for word in vec[end + 1..=std::cmp::min(start, vec.len() - 1)] .iter() .rev() { - Choice::write_bytes(word.as_bytes(), config, handle, true); + handle.write_choice(*word, config, true); } - Choice::write_bytes(vec[end].as_bytes(), config, handle, false); + handle.write_choice(vec[end], config, false); } } - fn write_bytes<WriterType: Write>( - b: &[u8], - config: &Config, - handle: &mut BufWriter<WriterType>, - print_separator: bool, - ) { - let num_bytes_written = match handle.write(b) { - Ok(x) => x, - Err(e) => { - eprintln!("Failed to write to output: {}", e); - 0 + fn print_choice_reverse<W, T, I>(&self, mut iter: I, config: &Config, handle: &mut BufWriter<W>) + where + W: Write, + T: Writeable + Copy, + I: Iterator<Item = T>, + { + if self.end > 0 { + iter.nth((self.end - 1).try_into().unwrap()); + } + + let mut stack = Vec::new(); + for i in 0..=(self.start - self.end) { + match iter.next() { + Some(s) => stack.push(s), + None => break, } - }; - if num_bytes_written > 0 && print_separator { - Choice::write_separator(config, handle); - }; - } - pub fn write_separator<W: Write>(config: &Config, handle: &mut BufWriter<W>) { - match handle.write(&config.output_separator) { - Ok(_) => (), - Err(e) => eprintln!("Failed to write to output: {}", e), + if self.start <= self.end + i { + break; + } } - } - pub fn is_reverse_range(&self) -> bool { - self.reversed + let mut peek_iter = stack.iter().rev().peekable(); + loop { + match peek_iter.next() { + Some(s) => handle.write_choice(*s, config, peek_iter.peek().is_some()), + None => break, + } + } } - pub fn has_negative_index(&self) -> bool { - self.negative_index + fn get_negative_start_end<T>(&self, vec: &Vec<T>) -> (usize, usize) { + let start = if self.start >= 0 { + self.start.try_into().unwrap() + } else { + vec.len() + .checked_sub(self.start.abs().try_into().unwrap()) + .unwrap() + }; + + let end = if self.end >= 0 { + self.end.try_into().unwrap() + } else { + vec.len() + .checked_sub(self.end.abs().try_into().unwrap()) + .unwrap() + }; + + return (start, end); } } @@ -662,6 +680,76 @@ mod tests { config.opt.choice[0].print_choice(&String::from("a b c d"), &config, &mut handle); assert_eq!(String::from("abc"), MockStdout::str_from_buf_writer(handle)); } + + #[test] + fn print_0_to_2_character_wise() { + let config = Config::from_iter(vec!["choose", "0:2", "-c"]); + let mut handle = BufWriter::new(MockStdout::new()); + config.opt.choice[0].print_choice(&String::from("abcd\n"), &config, &mut handle); + assert_eq!(String::from("abc"), MockStdout::str_from_buf_writer(handle)); + } + + #[test] + fn print_2_to_end_character_wise() { + let config = Config::from_iter(vec!["choose", "2:", "-c"]); + let mut handle = BufWriter::new(MockStdout::new()); + config.opt.choice[0].print_choice(&String::from("abcd\n"), &config, &mut handle); + assert_eq!(String::from("cd"), MockStdout::str_from_buf_writer(handle)); + } + + #[test] + fn print_start_to_2_character_wise() { + let config = Config::from_iter(vec!["choose", ":2", "-c"]); + let mut handle = BufWriter::new(MockStdout::new()); + config.opt.choice[0].print_choice(&String::from("abcd\n"), &config, &mut handle); + assert_eq!(String::from("abc"), MockStdout::str_from_buf_writer(handle)); + } + + #[test] + fn print_0_to_2_character_wise_exclusive() { + let config = Config::from_iter(vec!["choose", "0:2", "-c", "-x"]); + let mut handle = BufWriter::new(MockStdout::new()); + config.opt.choice[0].print_choice(&String::from("abcd\n"), &config, &mut handle); + assert_eq!(String::from("ab"), MockStdout::str_from_buf_writer(handle)); + } + + #[test] + fn print_0_to_2_character_wise_with_output_delimeter() { + let config = Config::from_iter(vec!["choose", "0:2", "-c", "-o", ":"]); + let mut handle = BufWriter::new(MockStdout::new()); + config.opt.choice[0].print_choice(&String::from("abcd\n"), &config, &mut handle); + assert_eq!( + String::from("a:b:c"), + MockStdout::str_from_buf_writer(handle) + ); + } + + #[test] + fn print_after_end_character_wise() { + let config = Config::from_iter(vec!["choose", "0:9", "-c"]); + let mut handle = BufWriter::new(MockStdout::new()); + config.opt.choice[0].print_choice(&String::from("abcd\n"), &config, &mut handle); + assert_eq!( + String::from("abcd"), + MockStdout::str_from_buf_writer(handle) + ); + } + + #[test] + fn print_2_to_0_character_wise() { + let config = Config::from_iter(vec!["choose", "2:0", "-c"]); + let mut handle = BufWriter::new(MockStdout::new()); + config.opt.choice[0].print_choice(&String::from("abcd\n"), &config, &mut handle); + assert_eq!(String::from("cba"), MockStdout::str_from_buf_writer(handle)); + } + + #[test] + fn print_neg_2_to_end_character_wise() { + let config = Config::from_iter(vec!["choose", "-2:", "-c"]); + let mut handle = BufWriter::new(MockStdout::new()); + config.opt.choice[0].print_choice(&String::from("abcd\n"), &config, &mut handle); + assert_eq!(String::from("cd"), MockStdout::str_from_buf_writer(handle)); + } } mod is_reverse_range_tests { diff --git a/src/config.rs b/src/config.rs index d87a602..300408b 100644 --- a/src/config.rs +++ b/src/config.rs @@ -51,9 +51,15 @@ impl Config { } }; - let output_separator = match opt.output_field_separator.clone() { - Some(s) => s.into_boxed_str().into_boxed_bytes(), - None => Box::new([0x20; 1]), + let output_separator = match opt.character_wise { + false => match opt.output_field_separator.clone() { + Some(s) => s.into_boxed_str().into_boxed_bytes(), + None => Box::new([0x20; 1]), + }, + true => match opt.output_field_separator.clone() { + Some(s) => s.into_boxed_str().into_boxed_bytes(), + None => Box::new([]), + }, }; Config { diff --git a/src/main.rs b/src/main.rs index a22b553..6872f99 100644 --- a/src/main.rs +++ b/src/main.rs @@ -10,8 +10,11 @@ mod choice; mod config; mod opt; mod reader; +mod writeable; +mod writer; use config::Config; use opt::Opt; +use writer::WriteReceiver; fn main() { let opt = Opt::from_args(); @@ -43,7 +46,7 @@ fn main() { while let Some(choice) = choice_iter.next() { choice.print_choice(&l, &config, &mut handle); if choice_iter.peek().is_some() { - choice::Choice::write_separator(&config, &mut handle); + handle.write_separator(&config); } } match handle.write(b"\n") { @@ -8,30 +8,34 @@ use crate::config::Config; #[structopt(name = "choose", about = "`choose` sections from each line of files")] #[structopt(setting = structopt::clap::AppSettings::AllowLeadingHyphen)] pub struct Opt { - /// Specify field separator other than whitespace, using Rust `regex` syntax + /// Choose fields by character number #[structopt(short, long)] - pub field_separator: Option<String>, - - /// Specify output field separator - #[structopt(short, long, parse(from_str = Config::parse_output_field_separator))] - pub output_field_separator: Option<String>, + pub character_wise: bool, - /// Use non-greedy field separators + /// Activate debug mode #[structopt(short, long)] - pub non_greedy: bool, + pub debug: bool, /// Use exclusive ranges, similar to array indexing in many programming languages #[structopt(short = "x", long)] pub exclusive: bool, - /// Activate debug mode + /// Specify field separator other than whitespace, using Rust `regex` syntax #[structopt(short, long)] - pub debug: bool, + pub field_separator: Option<String>, /// Input file #[structopt(short, long, parse(from_os_str))] pub input: Option<PathBuf>, + /// Use non-greedy field separators + #[structopt(short, long)] + pub non_greedy: bool, + + /// Specify output field separator + #[structopt(short, long, parse(from_str = Config::parse_output_field_separator))] + pub output_field_separator: Option<String>, + /// Fields to print. Either x, x:, :y, or x:y, where x and y are integers, colons indicate a /// range, and an empty field on either side of the colon continues to the beginning or end of /// the line. diff --git a/src/writeable.rs b/src/writeable.rs new file mode 100644 index 0000000..8dadde3 --- /dev/null +++ b/src/writeable.rs @@ -0,0 +1,20 @@ +pub trait Writeable { + fn to_byte_buf(&self) -> Box<[u8]>; +} + +impl Writeable for &str { + fn to_byte_buf(&self) -> Box<[u8]> { + return Box::from(self.as_bytes()); + } +} + +impl Writeable for char { + fn to_byte_buf(&self) -> Box<[u8]> { + let mut buf = [0; 4]; + return self + .encode_utf8(&mut buf) + .to_owned() + .into_boxed_str() + .into_boxed_bytes(); + } +} diff --git a/src/writer.rs b/src/writer.rs new file mode 100644 index 0000000..00a93a1 --- /dev/null +++ b/src/writer.rs @@ -0,0 +1,31 @@ +use std::io::{BufWriter, Write}; + +use crate::config::Config; +use crate::writeable::Writeable; + +pub trait WriteReceiver { + fn write_choice<Wa: Writeable>(&mut self, b: Wa, config: &Config, print_separator: bool); + fn write_separator(&mut self, config: &Config); +} + +impl<W: Write> WriteReceiver for BufWriter<W> { + fn write_choice<Wa: Writeable>(&mut self, b: Wa, config: &Config, print_separator: bool) { + let num_bytes_written = match self.write(&b.to_byte_buf()) { + Ok(x) => x, + Err(e) => { + eprintln!("Failed to write to output: {}", e); + 0 + } + }; + if num_bytes_written > 0 && print_separator { + self.write_separator(config); + }; + } + + fn write_separator(&mut self, config: &Config) { + match self.write(&config.output_separator) { + Ok(_) => (), + Err(e) => eprintln!("Failed to write to output: {}", e), + } + } +} diff --git a/test/choose_0_3_c.txt b/test/choose_0_3_c.txt new file mode 100644 index 0000000..a7aeeb0 --- /dev/null +++ b/test/choose_0_3_c.txt @@ -0,0 +1,6 @@ +em i +idid +trud +s au +iat +pa q diff --git a/test/e2e_test.sh b/test/e2e_test.sh index 69ddd54..5fb22e9 100755 --- a/test/e2e_test.sh +++ b/test/e2e_test.sh @@ -17,6 +17,7 @@ diff -w <(cargo run -- -4:-2 -i ${test_dir}/lorem.txt 2>/dev/null) <(cat "${test diff -w <(cargo run -- 1:3 -o % -i ${test_dir}/lorem.txt 2>/dev/null) <(cat "${test_dir}/choose_1:3of%.txt") diff -w <(cargo run -- 1 3 -o % -i ${test_dir}/lorem.txt 2>/dev/null) <(cat "${test_dir}/choose_1_3of%.txt") diff -w <(cargo run -- 1 3 -o '' -i ${test_dir}/lorem.txt 2>/dev/null) <(cat "${test_dir}/choose_1_3of.txt") +diff -w <(cargo run -- 3:6 -c -i ${test_dir}/lorem.txt 2>/dev/null) <(cat "${test_dir}/choose_0_3_c.txt") # add tests for different delimiters # add tests using piping |