summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrew Gallant <jamslam@gmail.com>2014-11-21 23:40:10 -0500
committerAndrew Gallant <jamslam@gmail.com>2014-11-21 23:40:10 -0500
commit13ab17adfa6d973e20324680836c3c659d2cec23 (patch)
treedba32be7a695ecc87204dae0c8c8b68d47b06475
parent8963797936ff0d739c05b052a33d346090253d67 (diff)
Miscellaneous updates.0.4.1
-rw-r--r--Cargo.lock4
-rw-r--r--Cargo.toml9
-rw-r--r--src/cmd/cat.rs10
-rw-r--r--src/cmd/count.rs21
-rw-r--r--src/cmd/fixlengths.rs6
-rw-r--r--src/cmd/join.rs20
-rw-r--r--src/main.rs4
-rw-r--r--src/select.rs3
8 files changed, 49 insertions, 28 deletions
diff --git a/Cargo.lock b/Cargo.lock
index 9daac39..683b61e 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2,7 +2,7 @@
name = "xsv"
version = "0.4.0"
dependencies = [
- "csv 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "csv 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)",
"docopt 0.6.8 (registry+https://github.com/rust-lang/crates.io-index)",
"quickcheck 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
"streaming-stats 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -11,7 +11,7 @@ dependencies = [
[[package]]
name = "csv"
-version = "0.5.1"
+version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
diff --git a/Cargo.toml b/Cargo.toml
index 97f869d..5005eba 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
[package]
name = "xsv"
-version = "0.4.0"
+version = "0.4.1"
authors = ["Andrew Gallant <jamslam@gmail.com>"]
description = "A high performance CSV command line toolkit."
documentation = "http://burntsushi.net/rustdoc/xsv/"
@@ -10,6 +10,13 @@ readme = "README.md"
keywords = ["csv", "tsv", "slice", "command"]
license = "Unlicense"
+[profile.release]
+opt-level = 3
+lto = true
+
+[profile.test]
+opt-level = 2
+
[dependencies]
csv = "0.5.*"
streaming-stats = "0.1.*"
diff --git a/src/cmd/cat.rs b/src/cmd/cat.rs
index d63be06..fe3e383 100644
--- a/src/cmd/cat.rs
+++ b/src/cmd/cat.rs
@@ -9,14 +9,14 @@ use util;
static USAGE: &'static str = "
Concatenates CSV data by column or by row.
-When concatenating by column, the columns will be written in the same order
-as the inputs given. The number of rows in the result is always equivalent to
-to the minimum number of rows across all given CSV data. (This behavior can
-be reversed with the '--pad' flag.)
+When concatenating by column, the columns will be written in the same order as
+the inputs given. The number of rows in the result is always equivalent to to
+the minimum number of rows across all given CSV data. (This behavior can be
+reversed with the '--pad' flag.)
When concatenating by row, all CSV data must have the same number of columns.
If you need to rearrange the columns or fix the lengths of records, use the
-'slice' or 'fixlengths' commands. Also, only the headers of the *first* CSV
+'select' or 'fixlengths' commands. Also, only the headers of the *first* CSV
data given are used. Headers in subsequent inputs are ignored. (This behavior
can be disabled with --no-headers.)
diff --git a/src/cmd/count.rs b/src/cmd/count.rs
index d73a41f..55a208f 100644
--- a/src/cmd/count.rs
+++ b/src/cmd/count.rs
@@ -1,3 +1,7 @@
+use std::error::FromError;
+
+use csv::NextField;
+
use CliResult;
use config::{Delimiter, Config};
use util;
@@ -5,14 +9,16 @@ use util;
static USAGE: &'static str = "
Prints a count of the number of records in the CSV data.
+Note that the count will not include the header row (unless --no-headers is
+given).
+
Usage:
xsv count [options] [<input>]
Common options:
-h, --help Display this message
- -n, --no-headers When set, the first row will not be interpreted
- as headers. (i.e., They are not searched, analyzed,
- sliced, etc.)
+ -n, --no-headers When set, the first row will not be included in
+ the count.
-d, --delimiter <arg> The field delimiter for reading CSV data.
Must be a single character. [default: ,]
";
@@ -36,15 +42,16 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
None => {
let mut rdr = try!(conf.reader());
let mut count = 0u64;
- let mut seen_field = false;
while !rdr.done() {
loop {
match rdr.next_field() {
- None => break,
- Some(r) => { seen_field = true; try!(r); }
+ NextField::EndOfCsv => break,
+ NextField::EndOfRecord => { count += 1; break; }
+ NextField::Error(err) =>
+ return Err(FromError::from_error(err)),
+ NextField::Data(_) => {}
}
}
- if seen_field { count += 1; }
}
if !args.flag_no_headers && count > 0 {
count - 1
diff --git a/src/cmd/fixlengths.rs b/src/cmd/fixlengths.rs
index c9b6537..fd60676 100644
--- a/src/cmd/fixlengths.rs
+++ b/src/cmd/fixlengths.rs
@@ -6,8 +6,8 @@ use config::{Config, Delimiter};
use util;
static USAGE: &'static str = "
-Transforms CSV data so that all records have the same length. The length is the
-length of the longest record in the data. Records with smaller lengths are
+Transforms CSV data so that all records have the same length. The length is
+the length of the longest record in the data. Records with smaller lengths are
padded with empty fields.
This requires two complete scans of the CSV data: one for determining the
@@ -66,7 +66,7 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
while !rdr.done() {
let mut count = 0u;
loop {
- match rdr.next_field() {
+ match rdr.next_field().into_iter_result() {
None => break,
Some(r) => { try!(r); }
}
diff --git a/src/cmd/join.rs b/src/cmd/join.rs
index 3687424..ed10338 100644
--- a/src/cmd/join.rs
+++ b/src/cmd/join.rs
@@ -140,7 +140,7 @@ impl<R: io::Reader + io::Seek, W: io::Writer> IoState<R, W> {
try!(validx.idx.seek(rowi as u64));
let mut row1 = row.iter().map(|f| Ok(f.as_slice()));
- let row2 = validx.idx.csv().by_ref();
+ let row2 = unsafe { validx.idx.csv().byte_fields() };
let combined = row1.by_ref().chain(row2);
try!(self.wtr.write_results(combined));
}
@@ -177,7 +177,9 @@ impl<R: io::Reader + io::Seek, W: io::Writer> IoState<R, W> {
for &rowi in rows.iter() {
try!(validx.idx.seek(rowi as u64));
let row1 = row.iter().map(|f| Ok(f.as_slice()));
- let row2 = validx.idx.csv().by_ref();
+ let row2 = unsafe {
+ validx.idx.csv().byte_fields()
+ };
if right {
try!(self.wtr.write_results(row2.chain(row1)));
} else {
@@ -214,7 +216,9 @@ impl<R: io::Reader + io::Seek, W: io::Writer> IoState<R, W> {
try!(validx.idx.seek(rowi as u64));
let row1 = row1.iter().map(|f| Ok(f[]));
- let row2 = validx.idx.csv().by_ref();
+ let row2 = unsafe {
+ validx.idx.csv().byte_fields()
+ };
try!(self.wtr.write_results(row1.chain(row2)));
}
}
@@ -227,7 +231,9 @@ impl<R: io::Reader + io::Seek, W: io::Writer> IoState<R, W> {
if !written {
try!(validx.idx.seek(i as u64));
let row1 = pad1.iter().map(|f| Ok(f[]));
- let row2 = validx.idx.csv().by_ref();
+ let row2 = unsafe {
+ validx.idx.csv().byte_fields()
+ };
try!(self.wtr.write_results(row1.chain(row2)));
}
}
@@ -244,11 +250,13 @@ impl<R: io::Reader + io::Seek, W: io::Writer> IoState<R, W> {
// Skip the header row. The raw byte interface won't
// do it for us.
if !self.no_headers && first {
- for f in self.rdr2 { try!(f); }
+ for f in unsafe { self.rdr2.byte_fields() } { try!(f); }
first = false;
}
let row1 = row1.iter().map(|f| Ok(f[]));
- let row2 = self.rdr2.by_ref();
+ let row2 = unsafe {
+ self.rdr2.byte_fields()
+ };
try!(self.wtr.write_results(row1.chain(row2)));
}
}
diff --git a/src/main.rs b/src/main.rs
index 962c3b0..8a91194 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -159,10 +159,10 @@ impl Command {
}
}
-pub type CliResult<T> = Result<T, CliError>;
+type CliResult<T> = Result<T, CliError>;
#[deriving(Show)]
-pub enum CliError {
+enum CliError {
Flag(docopt::Error),
Csv(csv::Error),
Io(io::IoError),
diff --git a/src/select.rs b/src/select.rs
index 6c19229..c11dd08 100644
--- a/src/select.rs
+++ b/src/select.rs
@@ -373,8 +373,7 @@ impl NormalSelection {
Option<T>,
iter::Enumerate<I>,
&'a [bool]>> {
- let set = self.as_slice();
- row.enumerate().scan(set, |set, (i, v)| {
+ row.enumerate().scan(self.as_slice(), |set, (i, v)| {
if i < set.len() && set[i] { Some(Some(v)) } else { Some(None) }
}).filter_map(|v| v)
}