summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrew Gallant <jamslam@gmail.com>2019-04-04 15:14:29 -0400
committerAndrew Gallant <jamslam@gmail.com>2019-04-05 23:24:08 -0400
commit9b8f5cbabab547904bdfcb333fca2771d43db561 (patch)
tree7591e332750eaa26495d4f015814568cf861b0cf
parentc52da74ac30aa73a28cf64b9a4d2cdc63c42394a (diff)
config: switch to using bstrs
This lets us implement correct Unicode trimming and also simplifies the parsing logic a bit. This also removes the last platform specific bits of code in ripgrep core.
-rw-r--r--Cargo.toml1
-rw-r--r--GUIDE.md6
-rw-r--r--doc/rg.1.txt.tpl4
-rw-r--r--src/config.rs54
4 files changed, 17 insertions, 48 deletions
diff --git a/Cargo.toml b/Cargo.toml
index d28758f4..2c35cd73 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -46,6 +46,7 @@ members = [
]
[dependencies]
+bstr = "0.1.2"
grep = { version = "0.2.3", path = "grep" }
ignore = { version = "0.4.4", path = "ignore" }
lazy_static = "1.1.0"
diff --git a/GUIDE.md b/GUIDE.md
index 343c812e..0094a7b4 100644
--- a/GUIDE.md
+++ b/GUIDE.md
@@ -525,9 +525,9 @@ config file. Once the environment variable is set, open the file and just type
in the flags you want set automatically. There are only two rules for
describing the format of the config file:
-1. Every line is a shell argument, after trimming ASCII whitespace.
-2. Lines starting with `#` (optionally preceded by any amount of
- ASCII whitespace) are ignored.
+1. Every line is a shell argument, after trimming whitespace.
+2. Lines starting with `#` (optionally preceded by any amount of whitespace)
+are ignored.
In particular, there is no escaping. Each line is given to ripgrep as a single
command line argument verbatim.
diff --git a/doc/rg.1.txt.tpl b/doc/rg.1.txt.tpl
index a6f72260..1c542b6b 100644
--- a/doc/rg.1.txt.tpl
+++ b/doc/rg.1.txt.tpl
@@ -107,9 +107,9 @@ ripgrep supports reading configuration files that change ripgrep's default
behavior. The format of the configuration file is an "rc" style and is very
simple. It is defined by two rules:
- 1. Every line is a shell argument, after trimming ASCII whitespace.
+ 1. Every line is a shell argument, after trimming whitespace.
2. Lines starting with *#* (optionally preceded by any amount of
- ASCII whitespace) are ignored.
+ whitespace) are ignored.
ripgrep will look for a single configuration file if and only if the
*RIPGREP_CONFIG_PATH* environment variable is set and is non-empty.
diff --git a/src/config.rs b/src/config.rs
index f10c5a86..a5e492ec 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -5,10 +5,11 @@
use std::env;
use std::error::Error;
use std::fs::File;
-use std::io::{self, BufRead};
+use std::io;
use std::ffi::OsString;
use std::path::{Path, PathBuf};
+use bstr::io::BufReadExt;
use log;
use crate::Result;
@@ -76,62 +77,29 @@ fn parse<P: AsRef<Path>>(
fn parse_reader<R: io::Read>(
rdr: R,
) -> Result<(Vec<OsString>, Vec<Box<Error>>)> {
- let mut bufrdr = io::BufReader::new(rdr);
+ let bufrdr = io::BufReader::new(rdr);
let (mut args, mut errs) = (vec![], vec![]);
- let mut line = vec![];
let mut line_number = 0;
- while {
- line.clear();
+ bufrdr.for_byte_line_with_terminator(|line| {
line_number += 1;
- bufrdr.read_until(b'\n', &mut line)? > 0
- } {
- trim(&mut line);
+
+ let line = line.trim();
if line.is_empty() || line[0] == b'#' {
- continue;
+ return Ok(true);
}
- match bytes_to_os_string(&line) {
+ match line.to_os_str() {
Ok(osstr) => {
- args.push(osstr);
+ args.push(osstr.to_os_string());
}
Err(err) => {
errs.push(format!("{}: {}", line_number, err).into());
}
}
- }
+ Ok(true)
+ })?;
Ok((args, errs))
}
-/// Trim the given bytes of whitespace according to the ASCII definition.
-fn trim(x: &mut Vec<u8>) {
- let upto = x.iter().take_while(|b| is_space(**b)).count();
- x.drain(..upto);
- let revto = x.len() - x.iter().rev().take_while(|b| is_space(**b)).count();
- x.drain(revto..);
-}
-
-/// Returns true if and only if the given byte is an ASCII space character.
-fn is_space(b: u8) -> bool {
- b == b'\t'
- || b == b'\n'
- || b == b'\x0B'
- || b == b'\x0C'
- || b == b'\r'
- || b == b' '
-}
-
-/// On Unix, get an OsString from raw bytes.
-#[cfg(unix)]
-fn bytes_to_os_string(bytes: &[u8]) -> Result<OsString> {
- use std::os::unix::ffi::OsStringExt;
- Ok(OsString::from_vec(bytes.to_vec()))
-}
-
-/// On non-Unix (like Windows), require UTF-8.
-#[cfg(not(unix))]
-fn bytes_to_os_string(bytes: &[u8]) -> Result<OsString> {
- String::from_utf8(bytes.to_vec()).map(OsString::from).map_err(From::from)
-}
-
#[cfg(test)]
mod tests {
use std::ffi::OsString;