diff options
author | Ryan Geary <rtgnj42@gmail.com> | 2020-02-24 23:59:51 -0500 |
---|---|---|
committer | Ryan Geary <rtgnj42@gmail.com> | 2020-03-10 00:03:14 -0400 |
commit | 389c29822b2af5f4a828734eb0a2d70b4f82a900 (patch) | |
tree | 7983809185cf846c2182f0eac89654359860d91a | |
parent | c729ad3f004eb2313a4dfa206b7103939abeaac4 (diff) |
Improve performance and add profiling tooling
Add tags, todo, *.bench, *.svg and bench_output to .gitignore
Add test/bench.sh script. bench.sh runs the `bench` command on each
test/long*txt file with range 3:5 and saves the output to a file for
comparing performance across file sizes.
Inline printing in get_choice_slice
Change BufWriter<..stdout..> to BufWriter<T>
Add MockStdout for testing printing
Add more reverse range tests
Simplify word finding with a more uniform bounds check.
Add Makefile for generating flamegraphs
Redefine Choice struct as a start and end integer
Improve algorithm for finding words to print
Settle exclusivity at Config construction time
Add tests for nonexistant field_seps
Add regression test for preceding separator
Use handle.write instead of write! macro for tremendous speed up
-rw-r--r-- | .gitignore | 7 | ||||
-rw-r--r-- | Makefile | 11 | ||||
-rw-r--r-- | src/choice.rs | 379 | ||||
-rw-r--r-- | src/config.rs | 72 | ||||
-rw-r--r-- | src/main.rs | 2 | ||||
-rwxr-xr-x | test/bench.sh | 10 |
6 files changed, 323 insertions, 158 deletions
@@ -1,2 +1,9 @@ /target **/*.rs.bk +tags +todo +bench_output/ +**/*.bench +**/*.svg +test/long*txt +perf.data* diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..5dea5a3 --- /dev/null +++ b/Makefile @@ -0,0 +1,11 @@ +flamegraph: release + perf record --call-graph dwarf,16384 -e cpu-clock -F 997 target/release/choose -i test/long_long_long_long.txt 3:5 + perf script | stackcollapse-perf.pl | stackcollapse-recursive.pl | c++filt | flamegraph.pl > flamegraphs/working.svg + +flamegraph_commit: release + perf record --call-graph dwarf,16384 -e cpu-clock -F 997 target/release/choose -i test/long_long_long_long.txt 3:5 + perf script | stackcollapse-perf.pl | stackcollapse-recursive.pl | c++filt | flamegraph.pl > flamegraphs/`git log -n 1 --pretty=format:"%h"`.svg + +.PHONY: release +release: + cargo build --release diff --git a/src/choice.rs b/src/choice.rs index 62477c1..b7bf879 100644 --- a/src/choice.rs +++ b/src/choice.rs @@ -1,89 +1,74 @@ -use crate::io::{BufWriter, Write}; -use std::convert::TryInto; - use crate::config::Config; - -pub type Range = (Option<u32>, Option<u32>); +use crate::io::{BufWriter, Write}; #[derive(Debug)] -pub enum Choice { - Field(u32), - FieldRange(Range), +pub struct Choice { + pub start: usize, + pub end: usize, } impl Choice { - pub fn print_choice( + pub fn new(start: usize, end: usize) -> Self { + Choice { start, end } + } + + pub fn print_choice<WriterType: Write>( &self, line: &String, config: &Config, - handle: &mut BufWriter<std::io::StdoutLock>, + handle: &mut BufWriter<WriterType>, ) { - write!(handle, "{}", self.get_choice_slice(line, config).join(" ")); - } - - pub fn is_reverse_range(&self) -> bool { - match self { - Choice::Field(_) => false, - Choice::FieldRange(r) => match r { - (Some(start), Some(end)) => end < start, - _ => false, - }, - } - } + let mut line_iter = config.separator.split(line).filter(|s| !s.is_empty()); - fn get_choice_slice<'a>(&self, line: &'a String, config: &Config) -> Vec<&'a str> { - let words = config - .separator - .split(line) - .into_iter() - .filter(|s| !s.is_empty()) - .enumerate(); - - let mut slices = match self { - Choice::Field(i) => words - .filter(|x| x.0 == *i as usize) - .map(|x| x.1) - .collect::<Vec<&str>>(), - Choice::FieldRange(r) => match r { - (None, None) => words.map(|x| x.1).collect::<Vec<&str>>(), - (Some(start), None) => words - .filter(|x| x.0 >= (*start).try_into().unwrap()) - .map(|x| x.1) - .collect::<Vec<&str>>(), - (None, Some(end)) => { - let e: usize = if config.opt.exclusive { - (end - 1).try_into().unwrap() - } else { - (*end).try_into().unwrap() - }; - words - .filter(|x| x.0 <= e) - .map(|x| x.1) - .collect::<Vec<&str>>() + if self.is_reverse_range() { + if self.end > 0 { + line_iter.nth(self.end - 1); + } + + let mut stack = Vec::new(); + for i in 0..=(self.start - self.end) { + match line_iter.next() { + Some(s) => stack.push(s), + None => break, } - (Some(start), Some(end)) => { - let e: usize = if config.opt.exclusive { - (end - 1).try_into().unwrap() - } else { - (*end).try_into().unwrap() - }; - words - .filter(|x| { - (x.0 <= e && x.0 >= (*start).try_into().unwrap()) - || self.is_reverse_range() - && (x.0 >= e && x.0 <= (*start).try_into().unwrap()) - }) - .map(|x| x.1) - .collect::<Vec<&str>>() + + if self.start <= self.end + i { + break; } - }, - }; + } - if self.is_reverse_range() { - slices.reverse(); + loop { + match stack.pop() { + Some(s) => Choice::write_bytes(handle, s.as_bytes()), + None => break, + } + } + } else { + if self.start > 0 { + line_iter.nth(self.start - 1); + } + + for i in 0..=(self.end - self.start) { + match line_iter.next() { + Some(s) => Choice::write_bytes(handle, s.as_bytes()), + None => break, + }; + + if self.end <= self.start + i { + break; + } + } } + } - return slices; + fn write_bytes<WriterType: Write>(handle: &mut BufWriter<WriterType>, b: &[u8]) { + handle.write(b).unwrap(); + handle.write(b" ").unwrap(); + } + + #[cfg_attr(feature = "flame_it", flame)] + pub fn is_reverse_range(&self) -> bool { + self.end < self.start } } @@ -92,6 +77,7 @@ mod tests { use crate::config::{Config, Opt}; use std::ffi::OsString; + use std::io::{self, BufWriter, Write}; use structopt::StructOpt; impl Config { @@ -104,128 +90,293 @@ mod tests { } } - mod get_choice_slice_tests { + struct MockStdout { + pub buffer: String, + } + + impl MockStdout { + fn new() -> Self { + MockStdout { + buffer: String::new(), + } + } + + fn str_from_buf_writer(b: BufWriter<MockStdout>) -> String { + match b.into_inner() { + Ok(b) => b.buffer, + Err(_) => panic!("Failed to access BufWriter inner writer"), + } + .trim_end() + .to_string() + } + } + + impl Write for MockStdout { + fn write(&mut self, buf: &[u8]) -> io::Result<usize> { + let mut bytes_written = 0; + for i in buf { + self.buffer.push(*i as char); + bytes_written += 1; + } + Ok(bytes_written) + } + + fn flush(&mut self) -> io::Result<()> { + Ok(()) + } + } + + mod print_choice_tests { use super::*; #[test] fn print_0() { let config = Config::from_iter(vec!["choose", "0"]); + let mut handle = BufWriter::new(MockStdout::new()); + + config.opt.choice[0].print_choice( + &String::from("rust is pretty cool"), + &config, + &mut handle, + ); + assert_eq!( - vec!["rust"], - config.opt.choice[0] - .get_choice_slice(&String::from("rust is pretty cool"), &config) + String::from("rust"), + MockStdout::str_from_buf_writer(handle) ); } #[test] fn print_after_end() { let config = Config::from_iter(vec!["choose", "10"]); - assert_eq!( - Vec::<&str>::new(), - config.opt.choice[0] - .get_choice_slice(&String::from("rust is pretty cool"), &config) + let mut handle = BufWriter::new(MockStdout::new()); + + config.opt.choice[0].print_choice( + &String::from("rust is pretty cool"), + &config, + &mut handle, ); + + assert_eq!(String::new(), MockStdout::str_from_buf_writer(handle)); } #[test] fn print_out_of_order() { let config = Config::from_iter(vec!["choose", "3", "1"]); - assert_eq!( - vec!["cool"], - config.opt.choice[0] - .get_choice_slice(&String::from("rust is pretty cool"), &config) + let mut handle = BufWriter::new(MockStdout::new()); + let mut handle1 = BufWriter::new(MockStdout::new()); + + config.opt.choice[0].print_choice( + &String::from("rust is pretty cool"), + &config, + &mut handle, ); + assert_eq!( - vec!["is"], - config.opt.choice[1] - .get_choice_slice(&String::from("rust is pretty cool"), &config) + String::from("cool"), + MockStdout::str_from_buf_writer(handle) ); + + config.opt.choice[1].print_choice( + &String::from("rust is pretty cool"), + &config, + &mut handle1, + ); + + assert_eq!(String::from("is"), MockStdout::str_from_buf_writer(handle1)); } #[test] fn print_1_to_3_exclusive() { let config = Config::from_iter(vec!["choose", "1:3", "-x"]); + let mut handle = BufWriter::new(MockStdout::new()); + config.opt.choice[0].print_choice( + &String::from("rust is pretty cool"), + &config, + &mut handle, + ); assert_eq!( - vec!["is", "pretty"], - config.opt.choice[0] - .get_choice_slice(&String::from("rust is pretty cool"), &config) + String::from("is pretty"), + MockStdout::str_from_buf_writer(handle) ); } #[test] fn print_1_to_3() { let config = Config::from_iter(vec!["choose", "1:3"]); + let mut handle = BufWriter::new(MockStdout::new()); + config.opt.choice[0].print_choice( + &String::from("rust is pretty cool"), + &config, + &mut handle, + ); assert_eq!( - vec!["is", "pretty", "cool"], - config.opt.choice[0] - .get_choice_slice(&String::from("rust is pretty cool"), &config) + String::from("is pretty cool"), + MockStdout::str_from_buf_writer(handle) ); } #[test] fn print_1_to_3_separated_by_hashtag() { let config = Config::from_iter(vec!["choose", "1:3", "-f", "#"]); + let mut handle = BufWriter::new(MockStdout::new()); + config.opt.choice[0].print_choice( + &String::from("rust#is#pretty#cool"), + &config, + &mut handle, + ); assert_eq!( - vec!["is", "pretty", "cool"], - config.opt.choice[0] - .get_choice_slice(&String::from("rust#is#pretty#cool"), &config) + String::from("is pretty cool"), + MockStdout::str_from_buf_writer(handle) ); } #[test] fn print_1_to_3_separated_by_varying_multiple_hashtag_exclusive() { let config = Config::from_iter(vec!["choose", "1:3", "-f", "#", "-x"]); + let mut handle = BufWriter::new(MockStdout::new()); + config.opt.choice[0].print_choice( + &String::from("rust##is###pretty####cool"), + &config, + &mut handle, + ); assert_eq!( - vec!["is", "pretty"], - config.opt.choice[0] - .get_choice_slice(&String::from("rust##is###pretty####cool"), &config) + String::from("is pretty"), + MockStdout::str_from_buf_writer(handle) ); } #[test] fn print_1_to_3_separated_by_varying_multiple_hashtag() { let config = Config::from_iter(vec!["choose", "1:3", "-f", "#"]); + let mut handle = BufWriter::new(MockStdout::new()); + config.opt.choice[0].print_choice( + &String::from("rust##is###pretty####cool"), + &config, + &mut handle, + ); assert_eq!( - vec!["is", "pretty", "cool"], - config.opt.choice[0] - .get_choice_slice(&String::from("rust##is###pretty####cool"), &config) + String::from("is pretty cool"), + MockStdout::str_from_buf_writer(handle) ); } #[test] fn print_1_to_3_separated_by_regex_group_vowels_exclusive() { let config = Config::from_iter(vec!["choose", "1:3", "-f", "[aeiou]", "-x"]); + let mut handle = BufWriter::new(MockStdout::new()); + config.opt.choice[0].print_choice( + &String::from("the quick brown fox jumped over the lazy dog"), + &config, + &mut handle, + ); assert_eq!( - vec![" q", "ck br"], - config.opt.choice[0].get_choice_slice( - &String::from("the quick brown fox jumped over the lazy dog"), - &config - ) + String::from(" q ck br"), + MockStdout::str_from_buf_writer(handle) ); } #[test] fn print_1_to_3_separated_by_regex_group_vowels() { let config = Config::from_iter(vec!["choose", "1:3", "-f", "[aeiou]"]); + let mut handle = BufWriter::new(MockStdout::new()); + config.opt.choice[0].print_choice( + &String::from("the quick brown fox jumped over the lazy dog"), + &config, + &mut handle, + ); assert_eq!( - vec![" q", "ck br", "wn f"], - config.opt.choice[0].get_choice_slice( - &String::from("the quick brown fox jumped over the lazy dog"), - &config - ) + String::from(" q ck br wn f"), + MockStdout::str_from_buf_writer(handle) ); } #[test] fn print_3_to_1() { let config = Config::from_iter(vec!["choose", "3:1"]); + let mut handle = BufWriter::new(MockStdout::new()); + config.opt.choice[0].print_choice( + &String::from("rust lang is pretty darn cool"), + &config, + &mut handle, + ); assert_eq!( - vec!["pretty", "is", "lang"], - config.opt.choice[0] - .get_choice_slice(&String::from("rust lang is pretty darn cool"), &config) + String::from("pretty is lang"), + MockStdout::str_from_buf_writer(handle) + ); + } + + #[test] + fn print_3_to_1_exclusive() { + let config = Config::from_iter(vec!["choose", "3:1", "-x"]); + let mut handle = BufWriter::new(MockStdout::new()); + config.opt.choice[0].print_choice( + &String::from("rust lang is pretty darn cool"), + &config, + &mut handle, + ); + assert_eq!( + String::from("is lang"), + MockStdout::str_from_buf_writer(handle) + ); + } + + #[test] + fn print_1_to_3_nonexistant_field_separator() { + let config = Config::from_iter(vec!["choose", "1:3", "-f", "#"]); + let mut handle = BufWriter::new(MockStdout::new()); + config.opt.choice[0].print_choice( + &String::from("rust lang is pretty darn cool"), + &config, + &mut handle, ); + assert_eq!(String::from(""), MockStdout::str_from_buf_writer(handle)); } + #[test] + fn print_0_nonexistant_field_separator() { + let config = Config::from_iter(vec!["choose", "0", "-f", "#"]); + let mut handle = BufWriter::new(MockStdout::new()); + config.opt.choice[0].print_choice( + &String::from("rust lang is pretty darn cool"), + &config, + &mut handle, + ); + assert_eq!( + String::from("rust lang is pretty darn cool"), + MockStdout::str_from_buf_writer(handle) + ); + } + + #[test] + fn print_0_to_3_nonexistant_field_separator() { + let config = Config::from_iter(vec!["choose", "0:3", "-f", "#"]); + let mut handle = BufWriter::new(MockStdout::new()); + config.opt.choice[0].print_choice( + &String::from("rust lang is pretty darn cool"), + &config, + &mut handle, + ); + assert_eq!( + String::from("rust lang is pretty darn cool"), + MockStdout::str_from_buf_writer(handle) + ); + } + + #[test] + fn print_0_with_preceding_separator() { + let config = Config::from_iter(vec!["choose", "0"]); + let mut handle = BufWriter::new(MockStdout::new()); + config.opt.choice[0].print_choice( + &String::from(" rust lang is pretty darn cool"), + &config, + &mut handle, + ); + assert_eq!( + String::from("rust"), + MockStdout::str_from_buf_writer(handle) + ); + } } mod is_reverse_range_tests { @@ -260,7 +411,5 @@ mod tests { let config = Config::from_iter(vec!["choose", "4:2"]); assert_eq!(true, config.opt.choice[0].is_reverse_range()); } - } - } diff --git a/src/config.rs b/src/config.rs index 8dfba5e..b435d02 100644 --- a/src/config.rs +++ b/src/config.rs @@ -38,7 +38,17 @@ pub struct Config { } impl Config { - pub fn new(opt: Opt) -> Self { + pub fn new(mut opt: Opt) -> Self { + if opt.exclusive { + for mut choice in &mut opt.choice { + if choice.is_reverse_range() { + choice.start = choice.start - 1; + } else { + choice.end = choice.end - 1; + } + } + } + let separator = Regex::new(match &opt.field_separator { Some(s) => s, None => "[[:space:]]", @@ -57,7 +67,7 @@ impl Config { let cap = match re.captures_iter(src).next() { Some(v) => v, None => match src.parse() { - Ok(x) => return Ok(Choice::Field(x)), + Ok(x) => return Ok(Choice::new(x, x)), Err(_) => { eprintln!("failed to parse choice argument: {}", src); // Exit code of 2 means failed to parse choice argument @@ -67,10 +77,10 @@ impl Config { }; let start = if cap[1].is_empty() { - None + usize::min_value() } else { match cap[1].parse() { - Ok(x) => Some(x), + Ok(x) => x, Err(_) => { eprintln!("failed to parse range start: {}", &cap[1]); process::exit(2); @@ -79,10 +89,10 @@ impl Config { }; let end = if cap[2].is_empty() { - None + usize::max_value() } else { match cap[2].parse() { - Ok(x) => Some(x), + Ok(x) => x, Err(_) => { eprintln!("failed to parse range end: {}", &cap[2]); process::exit(2); @@ -90,7 +100,7 @@ impl Config { } }; - return Ok(Choice::FieldRange((start, end))); + return Ok(Choice::new(start, end)); } } @@ -102,62 +112,41 @@ mod tests { use super::*; #[test] - fn parse_single_choice() { + fn parse_single_choice_start() { let result = Config::parse_choice("6").unwrap(); - assert_eq!( - 6, - match result { - Choice::Field(x) => x, - _ => panic!(), - } - ) + assert_eq!(6, result.start) + } + + #[test] + fn parse_single_choice_end() { + let result = Config::parse_choice("6").unwrap(); + assert_eq!(6, result.end) } #[test] fn parse_none_started_range() { let result = Config::parse_choice(":5").unwrap(); - assert_eq!( - (None, Some(5)), - match result { - Choice::FieldRange(x) => x, - _ => panic!(), - } - ) + assert_eq!((usize::min_value(), 5), (result.start, result.end)) } #[test] fn parse_none_terminated_range() { let result = Config::parse_choice("5:").unwrap(); - assert_eq!( - (Some(5), None), - match result { - Choice::FieldRange(x) => x, - _ => panic!(), - } - ) + assert_eq!((5, usize::max_value()), (result.start, result.end)) } #[test] fn parse_full_range() { let result = Config::parse_choice("5:7").unwrap(); - assert_eq!( - (Some(5), Some(7)), - match result { - Choice::FieldRange(x) => x, - _ => panic!(), - } - ) + assert_eq!((5, 7), (result.start, result.end)) } #[test] fn parse_beginning_to_end_range() { let result = Config::parse_choice(":").unwrap(); assert_eq!( - (None, None), - match result { - Choice::FieldRange(x) => x, - _ => panic!(), - } + (usize::min_value(), usize::max_value()), + (result.start, result.end) ) } @@ -173,5 +162,4 @@ mod tests { //assert!(Config::parse_choice("d:i").is_err()); //} } - } diff --git a/src/main.rs b/src/main.rs index 1050950..51f1b1f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -28,7 +28,7 @@ fn main() { for choice in &config.opt.choice { choice.print_choice(&l, &config, &mut handle); } - writeln!(handle, ""); + handle.write(b"\n").unwrap(); } Err(e) => println!("ERROR: {}", e), } diff --git a/test/bench.sh b/test/bench.sh new file mode 100755 index 0000000..53160b1 --- /dev/null +++ b/test/bench.sh @@ -0,0 +1,10 @@ +#!/bin/bash +cargo build --release # always be up to date +output="bench_output" +mkdir -p $output +inputs=($(find test -name "long*txt" | sort -r)) +for i in {0..4} +do + echo ${inputs[$i]} + bench "target/release/choose 3:5 -i ${inputs[$i]}" > $output/$1$i.bench +done |