summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRyan Geary <rtgnj42@gmail.com>2020-02-24 23:59:51 -0500
committerRyan Geary <rtgnj42@gmail.com>2020-03-10 00:03:14 -0400
commit389c29822b2af5f4a828734eb0a2d70b4f82a900 (patch)
tree7983809185cf846c2182f0eac89654359860d91a
parentc729ad3f004eb2313a4dfa206b7103939abeaac4 (diff)
Improve performance and add profiling tooling
Add tags, todo, *.bench, *.svg and bench_output to .gitignore Add test/bench.sh script. bench.sh runs the `bench` command on each test/long*txt file with range 3:5 and saves the output to a file for comparing performance across file sizes. Inline printing in get_choice_slice Change BufWriter<..stdout..> to BufWriter<T> Add MockStdout for testing printing Add more reverse range tests Simplify word finding with a more uniform bounds check. Add Makefile for generating flamegraphs Redefine Choice struct as a start and end integer Improve algorithm for finding words to print Settle exclusivity at Config construction time Add tests for nonexistant field_seps Add regression test for preceding separator Use handle.write instead of write! macro for tremendous speed up
-rw-r--r--.gitignore7
-rw-r--r--Makefile11
-rw-r--r--src/choice.rs379
-rw-r--r--src/config.rs72
-rw-r--r--src/main.rs2
-rwxr-xr-xtest/bench.sh10
6 files changed, 323 insertions, 158 deletions
diff --git a/.gitignore b/.gitignore
index 53eaa21..24e2899 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,9 @@
/target
**/*.rs.bk
+tags
+todo
+bench_output/
+**/*.bench
+**/*.svg
+test/long*txt
+perf.data*
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..5dea5a3
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,11 @@
+flamegraph: release
+ perf record --call-graph dwarf,16384 -e cpu-clock -F 997 target/release/choose -i test/long_long_long_long.txt 3:5
+ perf script | stackcollapse-perf.pl | stackcollapse-recursive.pl | c++filt | flamegraph.pl > flamegraphs/working.svg
+
+flamegraph_commit: release
+ perf record --call-graph dwarf,16384 -e cpu-clock -F 997 target/release/choose -i test/long_long_long_long.txt 3:5
+ perf script | stackcollapse-perf.pl | stackcollapse-recursive.pl | c++filt | flamegraph.pl > flamegraphs/`git log -n 1 --pretty=format:"%h"`.svg
+
+.PHONY: release
+release:
+ cargo build --release
diff --git a/src/choice.rs b/src/choice.rs
index 62477c1..b7bf879 100644
--- a/src/choice.rs
+++ b/src/choice.rs
@@ -1,89 +1,74 @@
-use crate::io::{BufWriter, Write};
-use std::convert::TryInto;
-
use crate::config::Config;
-
-pub type Range = (Option<u32>, Option<u32>);
+use crate::io::{BufWriter, Write};
#[derive(Debug)]
-pub enum Choice {
- Field(u32),
- FieldRange(Range),
+pub struct Choice {
+ pub start: usize,
+ pub end: usize,
}
impl Choice {
- pub fn print_choice(
+ pub fn new(start: usize, end: usize) -> Self {
+ Choice { start, end }
+ }
+
+ pub fn print_choice<WriterType: Write>(
&self,
line: &String,
config: &Config,
- handle: &mut BufWriter<std::io::StdoutLock>,
+ handle: &mut BufWriter<WriterType>,
) {
- write!(handle, "{}", self.get_choice_slice(line, config).join(" "));
- }
-
- pub fn is_reverse_range(&self) -> bool {
- match self {
- Choice::Field(_) => false,
- Choice::FieldRange(r) => match r {
- (Some(start), Some(end)) => end < start,
- _ => false,
- },
- }
- }
+ let mut line_iter = config.separator.split(line).filter(|s| !s.is_empty());
- fn get_choice_slice<'a>(&self, line: &'a String, config: &Config) -> Vec<&'a str> {
- let words = config
- .separator
- .split(line)
- .into_iter()
- .filter(|s| !s.is_empty())
- .enumerate();
-
- let mut slices = match self {
- Choice::Field(i) => words
- .filter(|x| x.0 == *i as usize)
- .map(|x| x.1)
- .collect::<Vec<&str>>(),
- Choice::FieldRange(r) => match r {
- (None, None) => words.map(|x| x.1).collect::<Vec<&str>>(),
- (Some(start), None) => words
- .filter(|x| x.0 >= (*start).try_into().unwrap())
- .map(|x| x.1)
- .collect::<Vec<&str>>(),
- (None, Some(end)) => {
- let e: usize = if config.opt.exclusive {
- (end - 1).try_into().unwrap()
- } else {
- (*end).try_into().unwrap()
- };
- words
- .filter(|x| x.0 <= e)
- .map(|x| x.1)
- .collect::<Vec<&str>>()
+ if self.is_reverse_range() {
+ if self.end > 0 {
+ line_iter.nth(self.end - 1);
+ }
+
+ let mut stack = Vec::new();
+ for i in 0..=(self.start - self.end) {
+ match line_iter.next() {
+ Some(s) => stack.push(s),
+ None => break,
}
- (Some(start), Some(end)) => {
- let e: usize = if config.opt.exclusive {
- (end - 1).try_into().unwrap()
- } else {
- (*end).try_into().unwrap()
- };
- words
- .filter(|x| {
- (x.0 <= e && x.0 >= (*start).try_into().unwrap())
- || self.is_reverse_range()
- && (x.0 >= e && x.0 <= (*start).try_into().unwrap())
- })
- .map(|x| x.1)
- .collect::<Vec<&str>>()
+
+ if self.start <= self.end + i {
+ break;
}
- },
- };
+ }
- if self.is_reverse_range() {
- slices.reverse();
+ loop {
+ match stack.pop() {
+ Some(s) => Choice::write_bytes(handle, s.as_bytes()),
+ None => break,
+ }
+ }
+ } else {
+ if self.start > 0 {
+ line_iter.nth(self.start - 1);
+ }
+
+ for i in 0..=(self.end - self.start) {
+ match line_iter.next() {
+ Some(s) => Choice::write_bytes(handle, s.as_bytes()),
+ None => break,
+ };
+
+ if self.end <= self.start + i {
+ break;
+ }
+ }
}
+ }
- return slices;
+ fn write_bytes<WriterType: Write>(handle: &mut BufWriter<WriterType>, b: &[u8]) {
+ handle.write(b).unwrap();
+ handle.write(b" ").unwrap();
+ }
+
+ #[cfg_attr(feature = "flame_it", flame)]
+ pub fn is_reverse_range(&self) -> bool {
+ self.end < self.start
}
}
@@ -92,6 +77,7 @@ mod tests {
use crate::config::{Config, Opt};
use std::ffi::OsString;
+ use std::io::{self, BufWriter, Write};
use structopt::StructOpt;
impl Config {
@@ -104,128 +90,293 @@ mod tests {
}
}
- mod get_choice_slice_tests {
+ struct MockStdout {
+ pub buffer: String,
+ }
+
+ impl MockStdout {
+ fn new() -> Self {
+ MockStdout {
+ buffer: String::new(),
+ }
+ }
+
+ fn str_from_buf_writer(b: BufWriter<MockStdout>) -> String {
+ match b.into_inner() {
+ Ok(b) => b.buffer,
+ Err(_) => panic!("Failed to access BufWriter inner writer"),
+ }
+ .trim_end()
+ .to_string()
+ }
+ }
+
+ impl Write for MockStdout {
+ fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
+ let mut bytes_written = 0;
+ for i in buf {
+ self.buffer.push(*i as char);
+ bytes_written += 1;
+ }
+ Ok(bytes_written)
+ }
+
+ fn flush(&mut self) -> io::Result<()> {
+ Ok(())
+ }
+ }
+
+ mod print_choice_tests {
use super::*;
#[test]
fn print_0() {
let config = Config::from_iter(vec!["choose", "0"]);
+ let mut handle = BufWriter::new(MockStdout::new());
+
+ config.opt.choice[0].print_choice(
+ &String::from("rust is pretty cool"),
+ &config,
+ &mut handle,
+ );
+
assert_eq!(
- vec!["rust"],
- config.opt.choice[0]
- .get_choice_slice(&String::from("rust is pretty cool"), &config)
+ String::from("rust"),
+ MockStdout::str_from_buf_writer(handle)
);
}
#[test]
fn print_after_end() {
let config = Config::from_iter(vec!["choose", "10"]);
- assert_eq!(
- Vec::<&str>::new(),
- config.opt.choice[0]
- .get_choice_slice(&String::from("rust is pretty cool"), &config)
+ let mut handle = BufWriter::new(MockStdout::new());
+
+ config.opt.choice[0].print_choice(
+ &String::from("rust is pretty cool"),
+ &config,
+ &mut handle,
);
+
+ assert_eq!(String::new(), MockStdout::str_from_buf_writer(handle));
}
#[test]
fn print_out_of_order() {
let config = Config::from_iter(vec!["choose", "3", "1"]);
- assert_eq!(
- vec!["cool"],
- config.opt.choice[0]
- .get_choice_slice(&String::from("rust is pretty cool"), &config)
+ let mut handle = BufWriter::new(MockStdout::new());
+ let mut handle1 = BufWriter::new(MockStdout::new());
+
+ config.opt.choice[0].print_choice(
+ &String::from("rust is pretty cool"),
+ &config,
+ &mut handle,
);
+
assert_eq!(
- vec!["is"],
- config.opt.choice[1]
- .get_choice_slice(&String::from("rust is pretty cool"), &config)
+ String::from("cool"),
+ MockStdout::str_from_buf_writer(handle)
);
+
+ config.opt.choice[1].print_choice(
+ &String::from("rust is pretty cool"),
+ &config,
+ &mut handle1,
+ );
+
+ assert_eq!(String::from("is"), MockStdout::str_from_buf_writer(handle1));
}
#[test]
fn print_1_to_3_exclusive() {
let config = Config::from_iter(vec!["choose", "1:3", "-x"]);
+ let mut handle = BufWriter::new(MockStdout::new());
+ config.opt.choice[0].print_choice(
+ &String::from("rust is pretty cool"),
+ &config,
+ &mut handle,
+ );
assert_eq!(
- vec!["is", "pretty"],
- config.opt.choice[0]
- .get_choice_slice(&String::from("rust is pretty cool"), &config)
+ String::from("is pretty"),
+ MockStdout::str_from_buf_writer(handle)
);
}
#[test]
fn print_1_to_3() {
let config = Config::from_iter(vec!["choose", "1:3"]);
+ let mut handle = BufWriter::new(MockStdout::new());
+ config.opt.choice[0].print_choice(
+ &String::from("rust is pretty cool"),
+ &config,
+ &mut handle,
+ );
assert_eq!(
- vec!["is", "pretty", "cool"],
- config.opt.choice[0]
- .get_choice_slice(&String::from("rust is pretty cool"), &config)
+ String::from("is pretty cool"),
+ MockStdout::str_from_buf_writer(handle)
);
}
#[test]
fn print_1_to_3_separated_by_hashtag() {
let config = Config::from_iter(vec!["choose", "1:3", "-f", "#"]);
+ let mut handle = BufWriter::new(MockStdout::new());
+ config.opt.choice[0].print_choice(
+ &String::from("rust#is#pretty#cool"),
+ &config,
+ &mut handle,
+ );
assert_eq!(
- vec!["is", "pretty", "cool"],
- config.opt.choice[0]
- .get_choice_slice(&String::from("rust#is#pretty#cool"), &config)
+ String::from("is pretty cool"),
+ MockStdout::str_from_buf_writer(handle)
);
}
#[test]
fn print_1_to_3_separated_by_varying_multiple_hashtag_exclusive() {
let config = Config::from_iter(vec!["choose", "1:3", "-f", "#", "-x"]);
+ let mut handle = BufWriter::new(MockStdout::new());
+ config.opt.choice[0].print_choice(
+ &String::from("rust##is###pretty####cool"),
+ &config,
+ &mut handle,
+ );
assert_eq!(
- vec!["is", "pretty"],
- config.opt.choice[0]
- .get_choice_slice(&String::from("rust##is###pretty####cool"), &config)
+ String::from("is pretty"),
+ MockStdout::str_from_buf_writer(handle)
);
}
#[test]
fn print_1_to_3_separated_by_varying_multiple_hashtag() {
let config = Config::from_iter(vec!["choose", "1:3", "-f", "#"]);
+ let mut handle = BufWriter::new(MockStdout::new());
+ config.opt.choice[0].print_choice(
+ &String::from("rust##is###pretty####cool"),
+ &config,
+ &mut handle,
+ );
assert_eq!(
- vec!["is", "pretty", "cool"],
- config.opt.choice[0]
- .get_choice_slice(&String::from("rust##is###pretty####cool"), &config)
+ String::from("is pretty cool"),
+ MockStdout::str_from_buf_writer(handle)
);
}
#[test]
fn print_1_to_3_separated_by_regex_group_vowels_exclusive() {
let config = Config::from_iter(vec!["choose", "1:3", "-f", "[aeiou]", "-x"]);
+ let mut handle = BufWriter::new(MockStdout::new());
+ config.opt.choice[0].print_choice(
+ &String::from("the quick brown fox jumped over the lazy dog"),
+ &config,
+ &mut handle,
+ );
assert_eq!(
- vec![" q", "ck br"],
- config.opt.choice[0].get_choice_slice(
- &String::from("the quick brown fox jumped over the lazy dog"),
- &config
- )
+ String::from(" q ck br"),
+ MockStdout::str_from_buf_writer(handle)
);
}
#[test]
fn print_1_to_3_separated_by_regex_group_vowels() {
let config = Config::from_iter(vec!["choose", "1:3", "-f", "[aeiou]"]);
+ let mut handle = BufWriter::new(MockStdout::new());
+ config.opt.choice[0].print_choice(
+ &String::from("the quick brown fox jumped over the lazy dog"),
+ &config,
+ &mut handle,
+ );
assert_eq!(
- vec![" q", "ck br", "wn f"],
- config.opt.choice[0].get_choice_slice(
- &String::from("the quick brown fox jumped over the lazy dog"),
- &config
- )
+ String::from(" q ck br wn f"),
+ MockStdout::str_from_buf_writer(handle)
);
}
#[test]
fn print_3_to_1() {
let config = Config::from_iter(vec!["choose", "3:1"]);
+ let mut handle = BufWriter::new(MockStdout::new());
+ config.opt.choice[0].print_choice(
+ &String::from("rust lang is pretty darn cool"),
+ &config,
+ &mut handle,
+ );
assert_eq!(
- vec!["pretty", "is", "lang"],
- config.opt.choice[0]
- .get_choice_slice(&String::from("rust lang is pretty darn cool"), &config)
+ String::from("pretty is lang"),
+ MockStdout::str_from_buf_writer(handle)
+ );
+ }
+
+ #[test]
+ fn print_3_to_1_exclusive() {
+ let config = Config::from_iter(vec!["choose", "3:1", "-x"]);
+ let mut handle = BufWriter::new(MockStdout::new());
+ config.opt.choice[0].print_choice(
+ &String::from("rust lang is pretty darn cool"),
+ &config,
+ &mut handle,
+ );
+ assert_eq!(
+ String::from("is lang"),
+ MockStdout::str_from_buf_writer(handle)
+ );
+ }
+
+ #[test]
+ fn print_1_to_3_nonexistant_field_separator() {
+ let config = Config::from_iter(vec!["choose", "1:3", "-f", "#"]);
+ let mut handle = BufWriter::new(MockStdout::new());
+ config.opt.choice[0].print_choice(
+ &String::from("rust lang is pretty darn cool"),
+ &config,
+ &mut handle,
);
+ assert_eq!(String::from(""), MockStdout::str_from_buf_writer(handle));
}
+ #[test]
+ fn print_0_nonexistant_field_separator() {
+ let config = Config::from_iter(vec!["choose", "0", "-f", "#"]);
+ let mut handle = BufWriter::new(MockStdout::new());
+ config.opt.choice[0].print_choice(
+ &String::from("rust lang is pretty darn cool"),
+ &config,
+ &mut handle,
+ );
+ assert_eq!(
+ String::from("rust lang is pretty darn cool"),
+ MockStdout::str_from_buf_writer(handle)
+ );
+ }
+
+ #[test]
+ fn print_0_to_3_nonexistant_field_separator() {
+ let config = Config::from_iter(vec!["choose", "0:3", "-f", "#"]);
+ let mut handle = BufWriter::new(MockStdout::new());
+ config.opt.choice[0].print_choice(
+ &String::from("rust lang is pretty darn cool"),
+ &config,
+ &mut handle,
+ );
+ assert_eq!(
+ String::from("rust lang is pretty darn cool"),
+ MockStdout::str_from_buf_writer(handle)
+ );
+ }
+
+ #[test]
+ fn print_0_with_preceding_separator() {
+ let config = Config::from_iter(vec!["choose", "0"]);
+ let mut handle = BufWriter::new(MockStdout::new());
+ config.opt.choice[0].print_choice(
+ &String::from(" rust lang is pretty darn cool"),
+ &config,
+ &mut handle,
+ );
+ assert_eq!(
+ String::from("rust"),
+ MockStdout::str_from_buf_writer(handle)
+ );
+ }
}
mod is_reverse_range_tests {
@@ -260,7 +411,5 @@ mod tests {
let config = Config::from_iter(vec!["choose", "4:2"]);
assert_eq!(true, config.opt.choice[0].is_reverse_range());
}
-
}
-
}
diff --git a/src/config.rs b/src/config.rs
index 8dfba5e..b435d02 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -38,7 +38,17 @@ pub struct Config {
}
impl Config {
- pub fn new(opt: Opt) -> Self {
+ pub fn new(mut opt: Opt) -> Self {
+ if opt.exclusive {
+ for mut choice in &mut opt.choice {
+ if choice.is_reverse_range() {
+ choice.start = choice.start - 1;
+ } else {
+ choice.end = choice.end - 1;
+ }
+ }
+ }
+
let separator = Regex::new(match &opt.field_separator {
Some(s) => s,
None => "[[:space:]]",
@@ -57,7 +67,7 @@ impl Config {
let cap = match re.captures_iter(src).next() {
Some(v) => v,
None => match src.parse() {
- Ok(x) => return Ok(Choice::Field(x)),
+ Ok(x) => return Ok(Choice::new(x, x)),
Err(_) => {
eprintln!("failed to parse choice argument: {}", src);
// Exit code of 2 means failed to parse choice argument
@@ -67,10 +77,10 @@ impl Config {
};
let start = if cap[1].is_empty() {
- None
+ usize::min_value()
} else {
match cap[1].parse() {
- Ok(x) => Some(x),
+ Ok(x) => x,
Err(_) => {
eprintln!("failed to parse range start: {}", &cap[1]);
process::exit(2);
@@ -79,10 +89,10 @@ impl Config {
};
let end = if cap[2].is_empty() {
- None
+ usize::max_value()
} else {
match cap[2].parse() {
- Ok(x) => Some(x),
+ Ok(x) => x,
Err(_) => {
eprintln!("failed to parse range end: {}", &cap[2]);
process::exit(2);
@@ -90,7 +100,7 @@ impl Config {
}
};
- return Ok(Choice::FieldRange((start, end)));
+ return Ok(Choice::new(start, end));
}
}
@@ -102,62 +112,41 @@ mod tests {
use super::*;
#[test]
- fn parse_single_choice() {
+ fn parse_single_choice_start() {
let result = Config::parse_choice("6").unwrap();
- assert_eq!(
- 6,
- match result {
- Choice::Field(x) => x,
- _ => panic!(),
- }
- )
+ assert_eq!(6, result.start)
+ }
+
+ #[test]
+ fn parse_single_choice_end() {
+ let result = Config::parse_choice("6").unwrap();
+ assert_eq!(6, result.end)
}
#[test]
fn parse_none_started_range() {
let result = Config::parse_choice(":5").unwrap();
- assert_eq!(
- (None, Some(5)),
- match result {
- Choice::FieldRange(x) => x,
- _ => panic!(),
- }
- )
+ assert_eq!((usize::min_value(), 5), (result.start, result.end))
}
#[test]
fn parse_none_terminated_range() {
let result = Config::parse_choice("5:").unwrap();
- assert_eq!(
- (Some(5), None),
- match result {
- Choice::FieldRange(x) => x,
- _ => panic!(),
- }
- )
+ assert_eq!((5, usize::max_value()), (result.start, result.end))
}
#[test]
fn parse_full_range() {
let result = Config::parse_choice("5:7").unwrap();
- assert_eq!(
- (Some(5), Some(7)),
- match result {
- Choice::FieldRange(x) => x,
- _ => panic!(),
- }
- )
+ assert_eq!((5, 7), (result.start, result.end))
}
#[test]
fn parse_beginning_to_end_range() {
let result = Config::parse_choice(":").unwrap();
assert_eq!(
- (None, None),
- match result {
- Choice::FieldRange(x) => x,
- _ => panic!(),
- }
+ (usize::min_value(), usize::max_value()),
+ (result.start, result.end)
)
}
@@ -173,5 +162,4 @@ mod tests {
//assert!(Config::parse_choice("d:i").is_err());
//}
}
-
}
diff --git a/src/main.rs b/src/main.rs
index 1050950..51f1b1f 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -28,7 +28,7 @@ fn main() {
for choice in &config.opt.choice {
choice.print_choice(&l, &config, &mut handle);
}
- writeln!(handle, "");
+ handle.write(b"\n").unwrap();
}
Err(e) => println!("ERROR: {}", e),
}
diff --git a/test/bench.sh b/test/bench.sh
new file mode 100755
index 0000000..53160b1
--- /dev/null
+++ b/test/bench.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+cargo build --release # always be up to date
+output="bench_output"
+mkdir -p $output
+inputs=($(find test -name "long*txt" | sort -r))
+for i in {0..4}
+do
+ echo ${inputs[$i]}
+ bench "target/release/choose 3:5 -i ${inputs[$i]}" > $output/$1$i.bench
+done