summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRyan Geary <rtgnj42@gmail.com>2020-05-11 16:41:23 -0400
committerRyan Geary <rtgnj42@gmail.com>2020-06-02 14:41:30 -0400
commita66621380b370c011de01a6d22dc2ebc638d0019 (patch)
tree77f3740f2a4e876f2ca0bfeddac72a7418fbfd5e
parent5f7724967265f938eed28f6180d972ba76566cfa (diff)
Add character-wise `choose`ing
Alphabetize structopt options Add character-wise tests Add character-wise switch Add print-after-end test Add empty default separator for char-wise mode Add char-wise forward and negative printing Add pure reverse printing Change to char_wise to user `char` instead of `u8` Adds support for unicode (read: emojis) Adds a newline char to end of each char-wise test because that's how it is Add writing traits for better code structure Merge repetitive codepaths with generics Unify print_choice_* funtion names Reorder functions in choice module Rename variable to avoid name confusion Make default case for loop more readable Abstract default case print loop Add e2e test Move vec create to print_choice_negative
-rw-r--r--src/choice.rs282
-rw-r--r--src/config.rs12
-rw-r--r--src/main.rs5
-rw-r--r--src/opt.rs24
-rw-r--r--src/writeable.rs20
-rw-r--r--src/writer.rs31
-rw-r--r--test/choose_0_3_c.txt6
-rwxr-xr-xtest/e2e_test.sh1
8 files changed, 270 insertions, 111 deletions
diff --git a/src/choice.rs b/src/choice.rs
index 2f89ff0..bf193f6 100644
--- a/src/choice.rs
+++ b/src/choice.rs
@@ -1,7 +1,10 @@
use std::convert::TryInto;
+use std::io::{BufWriter, Write};
+use std::iter::FromIterator;
use crate::config::Config;
-use crate::io::{BufWriter, Write};
+use crate::writeable::Writeable;
+use crate::writer::WriteReceiver;
#[derive(Debug)]
pub struct Choice {
@@ -29,134 +32,149 @@ impl Choice {
config: &Config,
handle: &mut BufWriter<W>,
) {
- let mut line_iter = config
- .separator
- .split(line)
- .filter(|s| !s.is_empty() || config.opt.non_greedy);
-
- if self.is_reverse_range() && !self.has_negative_index() {
- if self.end > 0 {
- line_iter.nth((self.end - 1).try_into().unwrap());
- }
+ if config.opt.character_wise {
+ let line_chars = line[0..line.len() - 1].chars();
+ self.print_choice_generic(line_chars, config, handle);
+ } else {
+ let line_iter = config
+ .separator
+ .split(line)
+ .filter(|s| !s.is_empty() || config.opt.non_greedy);
+ self.print_choice_generic(line_iter, config, handle);
+ }
+ }
- let mut stack = Vec::new();
- for i in 0..=(self.start - self.end) {
- match line_iter.next() {
- Some(s) => stack.push(s),
- None => break,
- }
+ pub fn is_reverse_range(&self) -> bool {
+ self.reversed
+ }
- if self.start <= self.end + i {
- break;
- }
- }
+ pub fn has_negative_index(&self) -> bool {
+ self.negative_index
+ }
- let mut iter = stack.iter().rev().peekable();
- loop {
- match iter.next() {
- Some(s) => {
- Choice::write_bytes(s.as_bytes(), config, handle, iter.peek().is_some())
- }
- None => break,
- }
- }
+ fn print_choice_generic<W, T, I>(&self, mut iter: I, config: &Config, handle: &mut BufWriter<W>)
+ where
+ W: Write,
+ T: Writeable + Copy,
+ I: Iterator<Item = T>,
+ {
+ if self.is_reverse_range() && !self.has_negative_index() {
+ self.print_choice_reverse(iter, config, handle);
} else if self.has_negative_index() {
- let vec = line_iter.collect::<Vec<&str>>();
- self.print_negative_choice(vec, config, handle);
+ self.print_choice_negative(iter, config, handle);
} else {
if self.start > 0 {
- line_iter.nth((self.start - 1).try_into().unwrap());
- }
-
- let mut peek_line_iter = line_iter.peekable();
- for i in self.start..=self.end {
- match peek_line_iter.next() {
- Some(s) => Choice::write_bytes(
- s.as_bytes(),
- config,
- handle,
- peek_line_iter.peek().is_some() && i != self.end,
- ),
- None => break,
- };
+ iter.nth((self.start - 1).try_into().unwrap());
}
+ let range = self.end.checked_sub(self.start).unwrap();
+ Choice::print_choice_loop_max_items(iter, config, handle, range);
}
}
- fn print_negative_choice<W: Write>(
- &self,
- vec: Vec<&str>,
+ fn print_choice_loop<W, T, I>(iter: I, config: &Config, handle: &mut BufWriter<W>)
+ where
+ W: Write,
+ T: Writeable + Copy,
+ I: Iterator<Item = T>,
+ {
+ Choice::print_choice_loop_max_items(iter, config, handle, isize::max_value());
+ }
+
+ fn print_choice_loop_max_items<W, T, I>(
+ iter: I,
config: &Config,
handle: &mut BufWriter<W>,
- ) {
- let start = if self.start >= 0 {
- self.start.try_into().unwrap()
- } else {
- vec.len()
- .checked_sub(self.start.abs().try_into().unwrap())
- .unwrap()
- };
+ max_items: isize,
+ ) where
+ W: Write,
+ T: Writeable + Copy,
+ I: Iterator<Item = T>,
+ {
+ let mut peek_iter = iter.peekable();
+ for i in 0..=max_items {
+ match peek_iter.next() {
+ Some(s) => {
+ handle.write_choice(s, config, peek_iter.peek().is_some() && i != max_items);
+ }
+ None => break,
+ };
+ }
+ }
- let end = if self.end >= 0 {
- self.end.try_into().unwrap()
- } else {
- vec.len()
- .checked_sub(self.end.abs().try_into().unwrap())
- .unwrap()
- };
+ fn print_choice_negative<W, T, I>(&self, iter: I, config: &Config, handle: &mut BufWriter<W>)
+ where
+ W: Write,
+ T: Writeable + Copy,
+ I: Iterator<Item = T>,
+ {
+ let vec = Vec::from_iter(iter);
+ let (start, end) = self.get_negative_start_end(&vec);
if end > start {
for word in vec[start..std::cmp::min(end, vec.len() - 1)].iter() {
- Choice::write_bytes(word.as_bytes(), config, handle, true);
+ handle.write_choice(*word, config, true);
}
- Choice::write_bytes(
- vec[std::cmp::min(end, vec.len() - 1)].as_bytes(),
- config,
- handle,
- false,
- );
+ handle.write_choice(vec[std::cmp::min(end, vec.len() - 1)], config, false);
} else if self.start < 0 {
for word in vec[end + 1..=std::cmp::min(start, vec.len() - 1)]
.iter()
.rev()
{
- Choice::write_bytes(word.as_bytes(), config, handle, true);
+ handle.write_choice(*word, config, true);
}
- Choice::write_bytes(vec[end].as_bytes(), config, handle, false);
+ handle.write_choice(vec[end], config, false);
}
}
- fn write_bytes<WriterType: Write>(
- b: &[u8],
- config: &Config,
- handle: &mut BufWriter<WriterType>,
- print_separator: bool,
- ) {
- let num_bytes_written = match handle.write(b) {
- Ok(x) => x,
- Err(e) => {
- eprintln!("Failed to write to output: {}", e);
- 0
+ fn print_choice_reverse<W, T, I>(&self, mut iter: I, config: &Config, handle: &mut BufWriter<W>)
+ where
+ W: Write,
+ T: Writeable + Copy,
+ I: Iterator<Item = T>,
+ {
+ if self.end > 0 {
+ iter.nth((self.end - 1).try_into().unwrap());
+ }
+
+ let mut stack = Vec::new();
+ for i in 0..=(self.start - self.end) {
+ match iter.next() {
+ Some(s) => stack.push(s),
+ None => break,
}
- };
- if num_bytes_written > 0 && print_separator {
- Choice::write_separator(config, handle);
- };
- }
- pub fn write_separator<W: Write>(config: &Config, handle: &mut BufWriter<W>) {
- match handle.write(&config.output_separator) {
- Ok(_) => (),
- Err(e) => eprintln!("Failed to write to output: {}", e),
+ if self.start <= self.end + i {
+ break;
+ }
}
- }
- pub fn is_reverse_range(&self) -> bool {
- self.reversed
+ let mut peek_iter = stack.iter().rev().peekable();
+ loop {
+ match peek_iter.next() {
+ Some(s) => handle.write_choice(*s, config, peek_iter.peek().is_some()),
+ None => break,
+ }
+ }
}
- pub fn has_negative_index(&self) -> bool {
- self.negative_index
+ fn get_negative_start_end<T>(&self, vec: &Vec<T>) -> (usize, usize) {
+ let start = if self.start >= 0 {
+ self.start.try_into().unwrap()
+ } else {
+ vec.len()
+ .checked_sub(self.start.abs().try_into().unwrap())
+ .unwrap()
+ };
+
+ let end = if self.end >= 0 {
+ self.end.try_into().unwrap()
+ } else {
+ vec.len()
+ .checked_sub(self.end.abs().try_into().unwrap())
+ .unwrap()
+ };
+
+ return (start, end);
}
}
@@ -662,6 +680,76 @@ mod tests {
config.opt.choice[0].print_choice(&String::from("a b c d"), &config, &mut handle);
assert_eq!(String::from("abc"), MockStdout::str_from_buf_writer(handle));
}
+
+ #[test]
+ fn print_0_to_2_character_wise() {
+ let config = Config::from_iter(vec!["choose", "0:2", "-c"]);
+ let mut handle = BufWriter::new(MockStdout::new());
+ config.opt.choice[0].print_choice(&String::from("abcd\n"), &config, &mut handle);
+ assert_eq!(String::from("abc"), MockStdout::str_from_buf_writer(handle));
+ }
+
+ #[test]
+ fn print_2_to_end_character_wise() {
+ let config = Config::from_iter(vec!["choose", "2:", "-c"]);
+ let mut handle = BufWriter::new(MockStdout::new());
+ config.opt.choice[0].print_choice(&String::from("abcd\n"), &config, &mut handle);
+ assert_eq!(String::from("cd"), MockStdout::str_from_buf_writer(handle));
+ }
+
+ #[test]
+ fn print_start_to_2_character_wise() {
+ let config = Config::from_iter(vec!["choose", ":2", "-c"]);
+ let mut handle = BufWriter::new(MockStdout::new());
+ config.opt.choice[0].print_choice(&String::from("abcd\n"), &config, &mut handle);
+ assert_eq!(String::from("abc"), MockStdout::str_from_buf_writer(handle));
+ }
+
+ #[test]
+ fn print_0_to_2_character_wise_exclusive() {
+ let config = Config::from_iter(vec!["choose", "0:2", "-c", "-x"]);
+ let mut handle = BufWriter::new(MockStdout::new());
+ config.opt.choice[0].print_choice(&String::from("abcd\n"), &config, &mut handle);
+ assert_eq!(String::from("ab"), MockStdout::str_from_buf_writer(handle));
+ }
+
+ #[test]
+ fn print_0_to_2_character_wise_with_output_delimeter() {
+ let config = Config::from_iter(vec!["choose", "0:2", "-c", "-o", ":"]);
+ let mut handle = BufWriter::new(MockStdout::new());
+ config.opt.choice[0].print_choice(&String::from("abcd\n"), &config, &mut handle);
+ assert_eq!(
+ String::from("a:b:c"),
+ MockStdout::str_from_buf_writer(handle)
+ );
+ }
+
+ #[test]
+ fn print_after_end_character_wise() {
+ let config = Config::from_iter(vec!["choose", "0:9", "-c"]);
+ let mut handle = BufWriter::new(MockStdout::new());
+ config.opt.choice[0].print_choice(&String::from("abcd\n"), &config, &mut handle);
+ assert_eq!(
+ String::from("abcd"),
+ MockStdout::str_from_buf_writer(handle)
+ );
+ }
+
+ #[test]
+ fn print_2_to_0_character_wise() {
+ let config = Config::from_iter(vec!["choose", "2:0", "-c"]);
+ let mut handle = BufWriter::new(MockStdout::new());
+ config.opt.choice[0].print_choice(&String::from("abcd\n"), &config, &mut handle);
+ assert_eq!(String::from("cba"), MockStdout::str_from_buf_writer(handle));
+ }
+
+ #[test]
+ fn print_neg_2_to_end_character_wise() {
+ let config = Config::from_iter(vec!["choose", "-2:", "-c"]);
+ let mut handle = BufWriter::new(MockStdout::new());
+ config.opt.choice[0].print_choice(&String::from("abcd\n"), &config, &mut handle);
+ assert_eq!(String::from("cd"), MockStdout::str_from_buf_writer(handle));
+ }
}
mod is_reverse_range_tests {
diff --git a/src/config.rs b/src/config.rs
index d87a602..300408b 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -51,9 +51,15 @@ impl Config {
}
};
- let output_separator = match opt.output_field_separator.clone() {
- Some(s) => s.into_boxed_str().into_boxed_bytes(),
- None => Box::new([0x20; 1]),
+ let output_separator = match opt.character_wise {
+ false => match opt.output_field_separator.clone() {
+ Some(s) => s.into_boxed_str().into_boxed_bytes(),
+ None => Box::new([0x20; 1]),
+ },
+ true => match opt.output_field_separator.clone() {
+ Some(s) => s.into_boxed_str().into_boxed_bytes(),
+ None => Box::new([]),
+ },
};
Config {
diff --git a/src/main.rs b/src/main.rs
index a22b553..6872f99 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -10,8 +10,11 @@ mod choice;
mod config;
mod opt;
mod reader;
+mod writeable;
+mod writer;
use config::Config;
use opt::Opt;
+use writer::WriteReceiver;
fn main() {
let opt = Opt::from_args();
@@ -43,7 +46,7 @@ fn main() {
while let Some(choice) = choice_iter.next() {
choice.print_choice(&l, &config, &mut handle);
if choice_iter.peek().is_some() {
- choice::Choice::write_separator(&config, &mut handle);
+ handle.write_separator(&config);
}
}
match handle.write(b"\n") {
diff --git a/src/opt.rs b/src/opt.rs
index b64991d..0b9a83c 100644
--- a/src/opt.rs
+++ b/src/opt.rs
@@ -8,30 +8,34 @@ use crate::config::Config;
#[structopt(name = "choose", about = "`choose` sections from each line of files")]
#[structopt(setting = structopt::clap::AppSettings::AllowLeadingHyphen)]
pub struct Opt {
- /// Specify field separator other than whitespace, using Rust `regex` syntax
+ /// Choose fields by character number
#[structopt(short, long)]
- pub field_separator: Option<String>,
-
- /// Specify output field separator
- #[structopt(short, long, parse(from_str = Config::parse_output_field_separator))]
- pub output_field_separator: Option<String>,
+ pub character_wise: bool,
- /// Use non-greedy field separators
+ /// Activate debug mode
#[structopt(short, long)]
- pub non_greedy: bool,
+ pub debug: bool,
/// Use exclusive ranges, similar to array indexing in many programming languages
#[structopt(short = "x", long)]
pub exclusive: bool,
- /// Activate debug mode
+ /// Specify field separator other than whitespace, using Rust `regex` syntax
#[structopt(short, long)]
- pub debug: bool,
+ pub field_separator: Option<String>,
/// Input file
#[structopt(short, long, parse(from_os_str))]
pub input: Option<PathBuf>,
+ /// Use non-greedy field separators
+ #[structopt(short, long)]
+ pub non_greedy: bool,
+
+ /// Specify output field separator
+ #[structopt(short, long, parse(from_str = Config::parse_output_field_separator))]
+ pub output_field_separator: Option<String>,
+
/// Fields to print. Either x, x:, :y, or x:y, where x and y are integers, colons indicate a
/// range, and an empty field on either side of the colon continues to the beginning or end of
/// the line.
diff --git a/src/writeable.rs b/src/writeable.rs
new file mode 100644
index 0000000..8dadde3
--- /dev/null
+++ b/src/writeable.rs
@@ -0,0 +1,20 @@
+pub trait Writeable {
+ fn to_byte_buf(&self) -> Box<[u8]>;
+}
+
+impl Writeable for &str {
+ fn to_byte_buf(&self) -> Box<[u8]> {
+ return Box::from(self.as_bytes());
+ }
+}
+
+impl Writeable for char {
+ fn to_byte_buf(&self) -> Box<[u8]> {
+ let mut buf = [0; 4];
+ return self
+ .encode_utf8(&mut buf)
+ .to_owned()
+ .into_boxed_str()
+ .into_boxed_bytes();
+ }
+}
diff --git a/src/writer.rs b/src/writer.rs
new file mode 100644
index 0000000..00a93a1
--- /dev/null
+++ b/src/writer.rs
@@ -0,0 +1,31 @@
+use std::io::{BufWriter, Write};
+
+use crate::config::Config;
+use crate::writeable::Writeable;
+
+pub trait WriteReceiver {
+ fn write_choice<Wa: Writeable>(&mut self, b: Wa, config: &Config, print_separator: bool);
+ fn write_separator(&mut self, config: &Config);
+}
+
+impl<W: Write> WriteReceiver for BufWriter<W> {
+ fn write_choice<Wa: Writeable>(&mut self, b: Wa, config: &Config, print_separator: bool) {
+ let num_bytes_written = match self.write(&b.to_byte_buf()) {
+ Ok(x) => x,
+ Err(e) => {
+ eprintln!("Failed to write to output: {}", e);
+ 0
+ }
+ };
+ if num_bytes_written > 0 && print_separator {
+ self.write_separator(config);
+ };
+ }
+
+ fn write_separator(&mut self, config: &Config) {
+ match self.write(&config.output_separator) {
+ Ok(_) => (),
+ Err(e) => eprintln!("Failed to write to output: {}", e),
+ }
+ }
+}
diff --git a/test/choose_0_3_c.txt b/test/choose_0_3_c.txt
new file mode 100644
index 0000000..a7aeeb0
--- /dev/null
+++ b/test/choose_0_3_c.txt
@@ -0,0 +1,6 @@
+em i
+idid
+trud
+s au
+iat
+pa q
diff --git a/test/e2e_test.sh b/test/e2e_test.sh
index 69ddd54..5fb22e9 100755
--- a/test/e2e_test.sh
+++ b/test/e2e_test.sh
@@ -17,6 +17,7 @@ diff -w <(cargo run -- -4:-2 -i ${test_dir}/lorem.txt 2>/dev/null) <(cat "${test
diff -w <(cargo run -- 1:3 -o % -i ${test_dir}/lorem.txt 2>/dev/null) <(cat "${test_dir}/choose_1:3of%.txt")
diff -w <(cargo run -- 1 3 -o % -i ${test_dir}/lorem.txt 2>/dev/null) <(cat "${test_dir}/choose_1_3of%.txt")
diff -w <(cargo run -- 1 3 -o '' -i ${test_dir}/lorem.txt 2>/dev/null) <(cat "${test_dir}/choose_1_3of.txt")
+diff -w <(cargo run -- 3:6 -c -i ${test_dir}/lorem.txt 2>/dev/null) <(cat "${test_dir}/choose_0_3_c.txt")
# add tests for different delimiters
# add tests using piping