summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAstro <astro@spaceboyz.net>2017-05-16 02:23:38 +0200
committerAndrew Gallant <jamslam@gmail.com>2017-05-15 20:23:38 -0400
commit9ff070cef00db6948aa0f67408f49b1fddd7947f (patch)
tree8110a6410ab00d12b58cd333075566a9420d12ea
parent1fa55169f5622d1e69b9de9264c64728196f1835 (diff)
Implement numeric and reversed sorting (#57)
add numeric sorting to xsv sort command
-rw-r--r--src/cmd/sort.rs62
-rw-r--r--tests/test_sort.rs44
2 files changed, 100 insertions, 6 deletions
diff --git a/src/cmd/sort.rs b/src/cmd/sort.rs
index 2703a4c..bf6beae 100644
--- a/src/cmd/sort.rs
+++ b/src/cmd/sort.rs
@@ -4,6 +4,7 @@ use CliResult;
use config::{Config, Delimiter};
use select::SelectColumns;
use util;
+use std::str::from_utf8;
static USAGE: &'static str = "
Sorts CSV data lexicographically.
@@ -16,6 +17,8 @@ Usage:
sort options:
-s, --select <arg> Select a subset of columns to sort.
See 'xsv select --help' for the format details.
+ -N, --numeric Compare according to string numerical value
+ -R, --reverse Reverse order
Common options:
-h, --help Display this message
@@ -32,6 +35,8 @@ Common options:
struct Args {
arg_input: Option<String>,
flag_select: SelectColumns,
+ flag_numeric: bool,
+ flag_reverse: bool,
flag_output: Option<String>,
flag_no_headers: bool,
flag_delimiter: Option<Delimiter>,
@@ -40,6 +45,8 @@ struct Args {
pub fn run(argv: &[&str]) -> CliResult<()> {
let args: Args = try!(util::get_args(USAGE, argv));
+ let numeric = args.flag_numeric;
+ let reverse = args.flag_reverse;
let rconfig = Config::new(&args.arg_input)
.delimiter(args.flag_delimiter)
.no_headers(args.flag_no_headers)
@@ -52,12 +59,32 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
let sel = try!(rconfig.selection(&*headers));
let mut all = try!(rdr.byte_records().collect::<Result<Vec<_>, _>>());
- all.sort_by(|r1, r2| {
- // TODO: Numeric sorting. The tricky part, IMO, is figuring out
- // how to expose it in the CLI interface. Not sure of the right
- // answer at the moment.
- iter_cmp(sel.select(&**r1), sel.select(&**r2))
- });
+ match (numeric, reverse) {
+ (false, false) =>
+ all.sort_by(|r1, r2| {
+ let a = sel.select(r1.as_slice());
+ let b = sel.select(r2.as_slice());
+ iter_cmp(a, b)
+ }),
+ (true, false) =>
+ all.sort_by(|r1, r2| {
+ let a = sel.select(r1.as_slice());
+ let b = sel.select(r2.as_slice());
+ iter_cmp_num(a, b)
+ }),
+ (false, true) =>
+ all.sort_by(|r1, r2| {
+ let a = sel.select(r1.as_slice());
+ let b = sel.select(r2.as_slice());
+ iter_cmp(b, a)
+ }),
+ (true, true) =>
+ all.sort_by(|r1, r2| {
+ let a = sel.select(r1.as_slice());
+ let b = sel.select(r2.as_slice());
+ iter_cmp_num(b, a)
+ }),
+ }
try!(rconfig.write_headers(&mut rdr, &mut wtr));
for r in all.into_iter() {
@@ -81,3 +108,26 @@ pub fn iter_cmp<A, L, R>(mut a: L, mut b: R) -> cmp::Ordering
}
}
}
+
+/// Try parsing `a` and `b` as numbers when ordering
+pub fn iter_cmp_num<'a, L, R>(mut a: L, mut b: R) -> cmp::Ordering
+ where L: Iterator<Item=&'a [u8]>, R: Iterator<Item=&'a [u8]> {
+ loop {
+ match (next_num(&mut a), next_num(&mut b)) {
+ (None, None) => return cmp::Ordering::Equal,
+ (None, _ ) => return cmp::Ordering::Less,
+ (_ , None) => return cmp::Ordering::Greater,
+ (Some(x), Some(y)) => match x.cmp(&y) {
+ cmp::Ordering::Equal => (),
+ non_eq => return non_eq,
+ },
+ }
+ }
+}
+
+fn next_num<'a, X>(xs: &mut X) -> Option<i64>
+ where X: Iterator<Item=&'a [u8]> {
+ xs.next()
+ .and_then(|bytes| from_utf8(bytes).ok())
+ .and_then(|s| s.parse::<i64>().ok())
+}
diff --git a/tests/test_sort.rs b/tests/test_sort.rs
index 47f55ce..8e3bc44 100644
--- a/tests/test_sort.rs
+++ b/tests/test_sort.rs
@@ -53,6 +53,50 @@ fn sort_select() {
assert_eq!(got, expected);
}
+#[test]
+fn sort_numeric() {
+ let wrk = Workdir::new("sort_numeric");
+ wrk.create("in.csv", vec![
+ svec!["N", "S"],
+ svec!["10", "a"],
+ svec!["2", "c"],
+ svec!["1", "b"],
+ ]);
+
+ let mut cmd = wrk.command("sort");
+ cmd.arg("-N").arg("in.csv");
+
+ let got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
+ let expected = vec![
+ svec!["N", "S"],
+ svec!["1", "b"],
+ svec!["2", "c"],
+ svec!["10", "a"],
+ ];
+ assert_eq!(got, expected);
+}
+
+#[test]
+fn sort_reverse() {
+ let wrk = Workdir::new("sort_reverse");
+ wrk.create("in.csv", vec![
+ svec!["R", "S"],
+ svec!["1", "b"],
+ svec!["2", "a"],
+ ]);
+
+ let mut cmd = wrk.command("sort");
+ cmd.arg("-R").arg("--no-headers").arg("in.csv");
+
+ let got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
+ let expected = vec![
+ svec!["R", "S"],
+ svec!["2", "a"],
+ svec!["1", "b"],
+ ];
+ assert_eq!(got, expected);
+}
+
/// Order `a` and `b` lexicographically using `Ord`
pub fn iter_cmp<A, L, R>(mut a: L, mut b: R) -> cmp::Ordering
where A: Ord, L: Iterator<Item=A>, R: Iterator<Item=A> {