diff options
author | Andrew Gallant <jamslam@gmail.com> | 2019-04-04 15:14:29 -0400 |
---|---|---|
committer | Andrew Gallant <jamslam@gmail.com> | 2019-04-05 23:24:08 -0400 |
commit | 9b8f5cbabab547904bdfcb333fca2771d43db561 (patch) | |
tree | 7591e332750eaa26495d4f015814568cf861b0cf /src | |
parent | c52da74ac30aa73a28cf64b9a4d2cdc63c42394a (diff) |
config: switch to using bstrs
This lets us implement correct Unicode trimming and also simplifies the
parsing logic a bit. This also removes the last platform specific bits of
code in ripgrep core.
Diffstat (limited to 'src')
-rw-r--r-- | src/config.rs | 54 |
1 files changed, 11 insertions, 43 deletions
diff --git a/src/config.rs b/src/config.rs index f10c5a86..a5e492ec 100644 --- a/src/config.rs +++ b/src/config.rs @@ -5,10 +5,11 @@ use std::env; use std::error::Error; use std::fs::File; -use std::io::{self, BufRead}; +use std::io; use std::ffi::OsString; use std::path::{Path, PathBuf}; +use bstr::io::BufReadExt; use log; use crate::Result; @@ -76,62 +77,29 @@ fn parse<P: AsRef<Path>>( fn parse_reader<R: io::Read>( rdr: R, ) -> Result<(Vec<OsString>, Vec<Box<Error>>)> { - let mut bufrdr = io::BufReader::new(rdr); + let bufrdr = io::BufReader::new(rdr); let (mut args, mut errs) = (vec![], vec![]); - let mut line = vec![]; let mut line_number = 0; - while { - line.clear(); + bufrdr.for_byte_line_with_terminator(|line| { line_number += 1; - bufrdr.read_until(b'\n', &mut line)? > 0 - } { - trim(&mut line); + + let line = line.trim(); if line.is_empty() || line[0] == b'#' { - continue; + return Ok(true); } - match bytes_to_os_string(&line) { + match line.to_os_str() { Ok(osstr) => { - args.push(osstr); + args.push(osstr.to_os_string()); } Err(err) => { errs.push(format!("{}: {}", line_number, err).into()); } } - } + Ok(true) + })?; Ok((args, errs)) } -/// Trim the given bytes of whitespace according to the ASCII definition. -fn trim(x: &mut Vec<u8>) { - let upto = x.iter().take_while(|b| is_space(**b)).count(); - x.drain(..upto); - let revto = x.len() - x.iter().rev().take_while(|b| is_space(**b)).count(); - x.drain(revto..); -} - -/// Returns true if and only if the given byte is an ASCII space character. -fn is_space(b: u8) -> bool { - b == b'\t' - || b == b'\n' - || b == b'\x0B' - || b == b'\x0C' - || b == b'\r' - || b == b' ' -} - -/// On Unix, get an OsString from raw bytes. -#[cfg(unix)] -fn bytes_to_os_string(bytes: &[u8]) -> Result<OsString> { - use std::os::unix::ffi::OsStringExt; - Ok(OsString::from_vec(bytes.to_vec())) -} - -/// On non-Unix (like Windows), require UTF-8. -#[cfg(not(unix))] -fn bytes_to_os_string(bytes: &[u8]) -> Result<OsString> { - String::from_utf8(bytes.to_vec()).map(OsString::from).map_err(From::from) -} - #[cfg(test)] mod tests { use std::ffi::OsString; |