summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/choice.rs282
-rw-r--r--src/config.rs12
-rw-r--r--src/main.rs5
-rw-r--r--src/opt.rs24
-rw-r--r--src/writeable.rs20
-rw-r--r--src/writer.rs31
-rw-r--r--test/choose_0_3_c.txt6
-rwxr-xr-xtest/e2e_test.sh1
8 files changed, 270 insertions, 111 deletions
diff --git a/src/choice.rs b/src/choice.rs
index 2f89ff0..bf193f6 100644
--- a/src/choice.rs
+++ b/src/choice.rs
@@ -1,7 +1,10 @@
use std::convert::TryInto;
+use std::io::{BufWriter, Write};
+use std::iter::FromIterator;
use crate::config::Config;
-use crate::io::{BufWriter, Write};
+use crate::writeable::Writeable;
+use crate::writer::WriteReceiver;
#[derive(Debug)]
pub struct Choice {
@@ -29,134 +32,149 @@ impl Choice {
config: &Config,
handle: &mut BufWriter<W>,
) {
- let mut line_iter = config
- .separator
- .split(line)
- .filter(|s| !s.is_empty() || config.opt.non_greedy);
-
- if self.is_reverse_range() && !self.has_negative_index() {
- if self.end > 0 {
- line_iter.nth((self.end - 1).try_into().unwrap());
- }
+ if config.opt.character_wise {
+ let line_chars = line[0..line.len() - 1].chars();
+ self.print_choice_generic(line_chars, config, handle);
+ } else {
+ let line_iter = config
+ .separator
+ .split(line)
+ .filter(|s| !s.is_empty() || config.opt.non_greedy);
+ self.print_choice_generic(line_iter, config, handle);
+ }
+ }
- let mut stack = Vec::new();
- for i in 0..=(self.start - self.end) {
- match line_iter.next() {
- Some(s) => stack.push(s),
- None => break,
- }
+ pub fn is_reverse_range(&self) -> bool {
+ self.reversed
+ }
- if self.start <= self.end + i {
- break;
- }
- }
+ pub fn has_negative_index(&self) -> bool {
+ self.negative_index
+ }
- let mut iter = stack.iter().rev().peekable();
- loop {
- match iter.next() {
- Some(s) => {
- Choice::write_bytes(s.as_bytes(), config, handle, iter.peek().is_some())
- }
- None => break,
- }
- }
+ fn print_choice_generic<W, T, I>(&self, mut iter: I, config: &Config, handle: &mut BufWriter<W>)
+ where
+ W: Write,
+ T: Writeable + Copy,
+ I: Iterator<Item = T>,
+ {
+ if self.is_reverse_range() && !self.has_negative_index() {
+ self.print_choice_reverse(iter, config, handle);
} else if self.has_negative_index() {
- let vec = line_iter.collect::<Vec<&str>>();
- self.print_negative_choice(vec, config, handle);
+ self.print_choice_negative(iter, config, handle);
} else {
if self.start > 0 {
- line_iter.nth((self.start - 1).try_into().unwrap());
- }
-
- let mut peek_line_iter = line_iter.peekable();
- for i in self.start..=self.end {
- match peek_line_iter.next() {
- Some(s) => Choice::write_bytes(
- s.as_bytes(),
- config,
- handle,
- peek_line_iter.peek().is_some() && i != self.end,
- ),
- None => break,
- };
+ iter.nth((self.start - 1).try_into().unwrap());
}
+ let range = self.end.checked_sub(self.start).unwrap();
+ Choice::print_choice_loop_max_items(iter, config, handle, range);
}
}
- fn print_negative_choice<W: Write>(
- &self,
- vec: Vec<&str>,
+ fn print_choice_loop<W, T, I>(iter: I, config: &Config, handle: &mut BufWriter<W>)
+ where
+ W: Write,
+ T: Writeable + Copy,
+ I: Iterator<Item = T>,
+ {
+ Choice::print_choice_loop_max_items(iter, config, handle, isize::max_value());
+ }
+
+ fn print_choice_loop_max_items<W, T, I>(
+ iter: I,
config: &Config,
handle: &mut BufWriter<W>,
- ) {
- let start = if self.start >= 0 {
- self.start.try_into().unwrap()
- } else {
- vec.len()
- .checked_sub(self.start.abs().try_into().unwrap())
- .unwrap()
- };
+ max_items: isize,
+ ) where
+ W: Write,
+ T: Writeable + Copy,
+ I: Iterator<Item = T>,
+ {
+ let mut peek_iter = iter.peekable();
+ for i in 0..=max_items {
+ match peek_iter.next() {
+ Some(s) => {
+ handle.write_choice(s, config, peek_iter.peek().is_some() && i != max_items);
+ }
+ None => break,
+ };
+ }
+ }
- let end = if self.end >= 0 {
- self.end.try_into().unwrap()
- } else {
- vec.len()
- .checked_sub(self.end.abs().try_into().unwrap())
- .unwrap()
- };
+ fn print_choice_negative<W, T, I>(&self, iter: I, config: &Config, handle: &mut BufWriter<W>)
+ where
+ W: Write,
+ T: Writeable + Copy,
+ I: Iterator<Item = T>,
+ {
+ let vec = Vec::from_iter(iter);
+ let (start, end) = self.get_negative_start_end(&vec);
if end > start {
for word in vec[start..std::cmp::min(end, vec.len() - 1)].iter() {
- Choice::write_bytes(word.as_bytes(), config, handle, true);
+ handle.write_choice(*word, config, true);
}
- Choice::write_bytes(
- vec[std::cmp::min(end, vec.len() - 1)].as_bytes(),
- config,
- handle,
- false,
- );
+ handle.write_choice(vec[std::cmp::min(end, vec.len() - 1)], config, false);
} else if self.start < 0 {
for word in vec[end + 1..=std::cmp::min(start, vec.len() - 1)]
.iter()
.rev()
{
- Choice::write_bytes(word.as_bytes(), config, handle, true);
+ handle.write_choice(*word, config, true);
}
- Choice::write_bytes(vec[end].as_bytes(), config, handle, false);
+ handle.write_choice(vec[end], config, false);
}
}
- fn write_bytes<WriterType: Write>(
- b: &[u8],
- config: &Config,
- handle: &mut BufWriter<WriterType>,
- print_separator: bool,
- ) {
- let num_bytes_written = match handle.write(b) {
- Ok(x) => x,
- Err(e) => {
- eprintln!("Failed to write to output: {}", e);
- 0
+ fn print_choice_reverse<W, T, I>(&self, mut iter: I, config: &Config, handle: &mut BufWriter<W>)
+ where
+ W: Write,
+ T: Writeable + Copy,
+ I: Iterator<Item = T>,
+ {
+ if self.end > 0 {
+ iter.nth((self.end - 1).try_into().unwrap());
+ }
+
+ let mut stack = Vec::new();
+ for i in 0..=(self.start - self.end) {
+ match iter.next() {
+ Some(s) => stack.push(s),
+ None => break,
}
- };
- if num_bytes_written > 0 && print_separator {
- Choice::write_separator(config, handle);
- };
- }
- pub fn write_separator<W: Write>(config: &Config, handle: &mut BufWriter<W>) {
- match handle.write(&config.output_separator) {
- Ok(_) => (),
- Err(e) => eprintln!("Failed to write to output: {}", e),
+ if self.start <= self.end + i {
+ break;
+ }
}
- }
- pub fn is_reverse_range(&self) -> bool {
- self.reversed
+ let mut peek_iter = stack.iter().rev().peekable();
+ loop {
+ match peek_iter.next() {
+ Some(s) => handle.write_choice(*s, config, peek_iter.peek().is_some()),
+ None => break,
+ }
+ }
}
- pub fn has_negative_index(&self) -> bool {
- self.negative_index
+ fn get_negative_start_end<T>(&self, vec: &Vec<T>) -> (usize, usize) {
+ let start = if self.start >= 0 {
+ self.start.try_into().unwrap()
+ } else {
+ vec.len()
+ .checked_sub(self.start.abs().try_into().unwrap())
+ .unwrap()
+ };
+
+ let end = if self.end >= 0 {
+ self.end.try_into().unwrap()
+ } else {
+ vec.len()
+ .checked_sub(self.end.abs().try_into().unwrap())
+ .unwrap()
+ };
+
+ return (start, end);
}
}
@@ -662,6 +680,76 @@ mod tests {
config.opt.choice[0].print_choice(&String::from("a b c d"), &config, &mut handle);
assert_eq!(String::from("abc"), MockStdout::str_from_buf_writer(handle));
}
+
+ #[test]
+ fn print_0_to_2_character_wise() {
+ let config = Config::from_iter(vec!["choose", "0:2", "-c"]);
+ let mut handle = BufWriter::new(MockStdout::new());
+ config.opt.choice[0].print_choice(&String::from("abcd\n"), &config, &mut handle);
+ assert_eq!(String::from("abc"), MockStdout::str_from_buf_writer(handle));
+ }
+
+ #[test]
+ fn print_2_to_end_character_wise() {
+ let config = Config::from_iter(vec!["choose", "2:", "-c"]);
+ let mut handle = BufWriter::new(MockStdout::new());
+ config.opt.choice[0].print_choice(&String::from("abcd\n"), &config, &mut handle);
+ assert_eq!(String::from("cd"), MockStdout::str_from_buf_writer(handle));
+ }
+
+ #[test]
+ fn print_start_to_2_character_wise() {
+ let config = Config::from_iter(vec!["choose", ":2", "-c"]);
+ let mut handle = BufWriter::new(MockStdout::new());
+ config.opt.choice[0].print_choice(&String::from("abcd\n"), &config, &mut handle);
+ assert_eq!(String::from("abc"), MockStdout::str_from_buf_writer(handle));
+ }
+
+ #[test]
+ fn print_0_to_2_character_wise_exclusive() {
+ let config = Config::from_iter(vec!["choose", "0:2", "-c", "-x"]);
+ let mut handle = BufWriter::new(MockStdout::new());
+ config.opt.choice[0].print_choice(&String::from("abcd\n"), &config, &mut handle);
+ assert_eq!(String::from("ab"), MockStdout::str_from_buf_writer(handle));
+ }
+
+ #[test]
+ fn print_0_to_2_character_wise_with_output_delimeter() {
+ let config = Config::from_iter(vec!["choose", "0:2", "-c", "-o", ":"]);
+ let mut handle = BufWriter::new(MockStdout::new());
+ config.opt.choice[0].print_choice(&String::from("abcd\n"), &config, &mut handle);
+ assert_eq!(
+ String::from("a:b:c"),
+ MockStdout::str_from_buf_writer(handle)
+ );
+ }
+
+ #[test]
+ fn print_after_end_character_wise() {
+ let config = Config::from_iter(vec!["choose", "0:9", "-c"]);
+ let mut handle = BufWriter::new(MockStdout::new());
+ config.opt.choice[0].print_choice(&String::from("abcd\n"), &config, &mut handle);
+ assert_eq!(
+ String::from("abcd"),
+ MockStdout::str_from_buf_writer(handle)
+ );
+ }
+
+ #[test]
+ fn print_2_to_0_character_wise() {
+ let config = Config::from_iter(vec!["choose", "2:0", "-c"]);
+ let mut handle = BufWriter::new(MockStdout::new());
+ config.opt.choice[0].print_choice(&String::from("abcd\n"), &config, &mut handle);
+ assert_eq!(String::from("cba"), MockStdout::str_from_buf_writer(handle));
+ }
+
+ #[test]
+ fn print_neg_2_to_end_character_wise() {
+ let config = Config::from_iter(vec!["choose", "-2:", "-c"]);
+ let mut handle = BufWriter::new(MockStdout::new());
+ config.opt.choice[0].print_choice(&String::from("abcd\n"), &config, &mut handle);
+ assert_eq!(String::from("cd"), MockStdout::str_from_buf_writer(handle));
+ }
}
mod is_reverse_range_tests {
diff --git a/src/config.rs b/src/config.rs
index d87a602..300408b 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -51,9 +51,15 @@ impl Config {
}
};
- let output_separator = match opt.output_field_separator.clone() {
- Some(s) => s.into_boxed_str().into_boxed_bytes(),
- None => Box::new([0x20; 1]),
+ let output_separator = match opt.character_wise {
+ false => match opt.output_field_separator.clone() {
+ Some(s) => s.into_boxed_str().into_boxed_bytes(),
+ None => Box::new([0x20; 1]),
+ },
+ true => match opt.output_field_separator.clone() {
+ Some(s) => s.into_boxed_str().into_boxed_bytes(),
+ None => Box::new([]),
+ },
};
Config {
diff --git a/src/main.rs b/src/main.rs
index a22b553..6872f99 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -10,8 +10,11 @@ mod choice;
mod config;
mod opt;
mod reader;
+mod writeable;
+mod writer;
use config::Config;
use opt::Opt;
+use writer::WriteReceiver;
fn main() {
let opt = Opt::from_args();
@@ -43,7 +46,7 @@ fn main() {
while let Some(choice) = choice_iter.next() {
choice.print_choice(&l, &config, &mut handle);
if choice_iter.peek().is_some() {
- choice::Choice::write_separator(&config, &mut handle);
+ handle.write_separator(&config);
}
}
match handle.write(b"\n") {
diff --git a/src/opt.rs b/src/opt.rs
index b64991d..0b9a83c 100644
--- a/src/opt.rs
+++ b/src/opt.rs
@@ -8,30 +8,34 @@ use crate::config::Config;
#[structopt(name = "choose", about = "`choose` sections from each line of files")]
#[structopt(setting = structopt::clap::AppSettings::AllowLeadingHyphen)]
pub struct Opt {
- /// Specify field separator other than whitespace, using Rust `regex` syntax
+ /// Choose fields by character number
#[structopt(short, long)]
- pub field_separator: Option<String>,
-
- /// Specify output field separator
- #[structopt(short, long, parse(from_str = Config::parse_output_field_separator))]
- pub output_field_separator: Option<String>,
+ pub character_wise: bool,
- /// Use non-greedy field separators
+ /// Activate debug mode
#[structopt(short, long)]
- pub non_greedy: bool,
+ pub debug: bool,
/// Use exclusive ranges, similar to array indexing in many programming languages
#[structopt(short = "x", long)]
pub exclusive: bool,
- /// Activate debug mode
+ /// Specify field separator other than whitespace, using Rust `regex` syntax
#[structopt(short, long)]
- pub debug: bool,
+ pub field_separator: Option<String>,
/// Input file
#[structopt(short, long, parse(from_os_str))]
pub input: Option<PathBuf>,
+ /// Use non-greedy field separators
+ #[structopt(short, long)]
+ pub non_greedy: bool,
+
+ /// Specify output field separator
+ #[structopt(short, long, parse(from_str = Config::parse_output_field_separator))]
+ pub output_field_separator: Option<String>,
+
/// Fields to print. Either x, x:, :y, or x:y, where x and y are integers, colons indicate a
/// range, and an empty field on either side of the colon continues to the beginning or end of
/// the line.
diff --git a/src/writeable.rs b/src/writeable.rs
new file mode 100644
index 0000000..8dadde3
--- /dev/null
+++ b/src/writeable.rs
@@ -0,0 +1,20 @@
+pub trait Writeable {
+ fn to_byte_buf(&self) -> Box<[u8]>;
+}
+
+impl Writeable for &str {
+ fn to_byte_buf(&self) -> Box<[u8]> {
+ return Box::from(self.as_bytes());
+ }
+}
+
+impl Writeable for char {
+ fn to_byte_buf(&self) -> Box<[u8]> {
+ let mut buf = [0; 4];
+ return self
+ .encode_utf8(&mut buf)
+ .to_owned()
+ .into_boxed_str()
+ .into_boxed_bytes();
+ }
+}
diff --git a/src/writer.rs b/src/writer.rs
new file mode 100644
index 0000000..00a93a1
--- /dev/null
+++ b/src/writer.rs
@@ -0,0 +1,31 @@
+use std::io::{BufWriter, Write};
+
+use crate::config::Config;
+use crate::writeable::Writeable;
+
+pub trait WriteReceiver {
+ fn write_choice<Wa: Writeable>(&mut self, b: Wa, config: &Config, print_separator: bool);
+ fn write_separator(&mut self, config: &Config);
+}
+
+impl<W: Write> WriteReceiver for BufWriter<W> {
+ fn write_choice<Wa: Writeable>(&mut self, b: Wa, config: &Config, print_separator: bool) {
+ let num_bytes_written = match self.write(&b.to_byte_buf()) {
+ Ok(x) => x,
+ Err(e) => {
+ eprintln!("Failed to write to output: {}", e);
+ 0
+ }
+ };
+ if num_bytes_written > 0 && print_separator {
+ self.write_separator(config);
+ };
+ }
+
+ fn write_separator(&mut self, config: &Config) {
+ match self.write(&config.output_separator) {
+ Ok(_) => (),
+ Err(e) => eprintln!("Failed to write to output: {}", e),
+ }
+ }
+}
diff --git a/test/choose_0_3_c.txt b/test/choose_0_3_c.txt
new file mode 100644
index 0000000..a7aeeb0
--- /dev/null
+++ b/test/choose_0_3_c.txt
@@ -0,0 +1,6 @@
+em i
+idid
+trud
+s au
+iat
+pa q
diff --git a/test/e2e_test.sh b/test/e2e_test.sh
index 69ddd54..5fb22e9 100755
--- a/test/e2e_test.sh
+++ b/test/e2e_test.sh
@@ -17,6 +17,7 @@ diff -w <(cargo run -- -4:-2 -i ${test_dir}/lorem.txt 2>/dev/null) <(cat "${test
diff -w <(cargo run -- 1:3 -o % -i ${test_dir}/lorem.txt 2>/dev/null) <(cat "${test_dir}/choose_1:3of%.txt")
diff -w <(cargo run -- 1 3 -o % -i ${test_dir}/lorem.txt 2>/dev/null) <(cat "${test_dir}/choose_1_3of%.txt")
diff -w <(cargo run -- 1 3 -o '' -i ${test_dir}/lorem.txt 2>/dev/null) <(cat "${test_dir}/choose_1_3of.txt")
+diff -w <(cargo run -- 3:6 -c -i ${test_dir}/lorem.txt 2>/dev/null) <(cat "${test_dir}/choose_0_3_c.txt")
# add tests for different delimiters
# add tests using piping