diff options
author | Astro <astro@spaceboyz.net> | 2017-05-16 02:23:38 +0200 |
---|---|---|
committer | Andrew Gallant <jamslam@gmail.com> | 2017-05-15 20:23:38 -0400 |
commit | 9ff070cef00db6948aa0f67408f49b1fddd7947f (patch) | |
tree | 8110a6410ab00d12b58cd333075566a9420d12ea | |
parent | 1fa55169f5622d1e69b9de9264c64728196f1835 (diff) |
Implement numeric and reversed sorting (#57)
add numeric sorting to xsv sort command
-rw-r--r-- | src/cmd/sort.rs | 62 | ||||
-rw-r--r-- | tests/test_sort.rs | 44 |
2 files changed, 100 insertions, 6 deletions
diff --git a/src/cmd/sort.rs b/src/cmd/sort.rs index 2703a4c..bf6beae 100644 --- a/src/cmd/sort.rs +++ b/src/cmd/sort.rs @@ -4,6 +4,7 @@ use CliResult; use config::{Config, Delimiter}; use select::SelectColumns; use util; +use std::str::from_utf8; static USAGE: &'static str = " Sorts CSV data lexicographically. @@ -16,6 +17,8 @@ Usage: sort options: -s, --select <arg> Select a subset of columns to sort. See 'xsv select --help' for the format details. + -N, --numeric Compare according to string numerical value + -R, --reverse Reverse order Common options: -h, --help Display this message @@ -32,6 +35,8 @@ Common options: struct Args { arg_input: Option<String>, flag_select: SelectColumns, + flag_numeric: bool, + flag_reverse: bool, flag_output: Option<String>, flag_no_headers: bool, flag_delimiter: Option<Delimiter>, @@ -40,6 +45,8 @@ struct Args { pub fn run(argv: &[&str]) -> CliResult<()> { let args: Args = try!(util::get_args(USAGE, argv)); + let numeric = args.flag_numeric; + let reverse = args.flag_reverse; let rconfig = Config::new(&args.arg_input) .delimiter(args.flag_delimiter) .no_headers(args.flag_no_headers) @@ -52,12 +59,32 @@ pub fn run(argv: &[&str]) -> CliResult<()> { let sel = try!(rconfig.selection(&*headers)); let mut all = try!(rdr.byte_records().collect::<Result<Vec<_>, _>>()); - all.sort_by(|r1, r2| { - // TODO: Numeric sorting. The tricky part, IMO, is figuring out - // how to expose it in the CLI interface. Not sure of the right - // answer at the moment. - iter_cmp(sel.select(&**r1), sel.select(&**r2)) - }); + match (numeric, reverse) { + (false, false) => + all.sort_by(|r1, r2| { + let a = sel.select(r1.as_slice()); + let b = sel.select(r2.as_slice()); + iter_cmp(a, b) + }), + (true, false) => + all.sort_by(|r1, r2| { + let a = sel.select(r1.as_slice()); + let b = sel.select(r2.as_slice()); + iter_cmp_num(a, b) + }), + (false, true) => + all.sort_by(|r1, r2| { + let a = sel.select(r1.as_slice()); + let b = sel.select(r2.as_slice()); + iter_cmp(b, a) + }), + (true, true) => + all.sort_by(|r1, r2| { + let a = sel.select(r1.as_slice()); + let b = sel.select(r2.as_slice()); + iter_cmp_num(b, a) + }), + } try!(rconfig.write_headers(&mut rdr, &mut wtr)); for r in all.into_iter() { @@ -81,3 +108,26 @@ pub fn iter_cmp<A, L, R>(mut a: L, mut b: R) -> cmp::Ordering } } } + +/// Try parsing `a` and `b` as numbers when ordering +pub fn iter_cmp_num<'a, L, R>(mut a: L, mut b: R) -> cmp::Ordering + where L: Iterator<Item=&'a [u8]>, R: Iterator<Item=&'a [u8]> { + loop { + match (next_num(&mut a), next_num(&mut b)) { + (None, None) => return cmp::Ordering::Equal, + (None, _ ) => return cmp::Ordering::Less, + (_ , None) => return cmp::Ordering::Greater, + (Some(x), Some(y)) => match x.cmp(&y) { + cmp::Ordering::Equal => (), + non_eq => return non_eq, + }, + } + } +} + +fn next_num<'a, X>(xs: &mut X) -> Option<i64> + where X: Iterator<Item=&'a [u8]> { + xs.next() + .and_then(|bytes| from_utf8(bytes).ok()) + .and_then(|s| s.parse::<i64>().ok()) +} diff --git a/tests/test_sort.rs b/tests/test_sort.rs index 47f55ce..8e3bc44 100644 --- a/tests/test_sort.rs +++ b/tests/test_sort.rs @@ -53,6 +53,50 @@ fn sort_select() { assert_eq!(got, expected); } +#[test] +fn sort_numeric() { + let wrk = Workdir::new("sort_numeric"); + wrk.create("in.csv", vec![ + svec!["N", "S"], + svec!["10", "a"], + svec!["2", "c"], + svec!["1", "b"], + ]); + + let mut cmd = wrk.command("sort"); + cmd.arg("-N").arg("in.csv"); + + let got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd); + let expected = vec![ + svec!["N", "S"], + svec!["1", "b"], + svec!["2", "c"], + svec!["10", "a"], + ]; + assert_eq!(got, expected); +} + +#[test] +fn sort_reverse() { + let wrk = Workdir::new("sort_reverse"); + wrk.create("in.csv", vec![ + svec!["R", "S"], + svec!["1", "b"], + svec!["2", "a"], + ]); + + let mut cmd = wrk.command("sort"); + cmd.arg("-R").arg("--no-headers").arg("in.csv"); + + let got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd); + let expected = vec![ + svec!["R", "S"], + svec!["2", "a"], + svec!["1", "b"], + ]; + assert_eq!(got, expected); +} + /// Order `a` and `b` lexicographically using `Ord` pub fn iter_cmp<A, L, R>(mut a: L, mut b: R) -> cmp::Ordering where A: Ord, L: Iterator<Item=A>, R: Iterator<Item=A> { |