diff --git a/Cargo.toml b/Cargo.toml index d28758f4..2c35cd73 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -46,6 +46,7 @@ members = [ ] [dependencies] +bstr = "0.1.2" grep = { version = "0.2.3", path = "grep" } ignore = { version = "0.4.4", path = "ignore" } lazy_static = "1.1.0" diff --git a/GUIDE.md b/GUIDE.md index 343c812e..0094a7b4 100644 --- a/GUIDE.md +++ b/GUIDE.md @@ -525,9 +525,9 @@ config file. Once the environment variable is set, open the file and just type in the flags you want set automatically. There are only two rules for describing the format of the config file: -1. Every line is a shell argument, after trimming ASCII whitespace. -2. Lines starting with `#` (optionally preceded by any amount of - ASCII whitespace) are ignored. +1. Every line is a shell argument, after trimming whitespace. +2. Lines starting with `#` (optionally preceded by any amount of whitespace) +are ignored. In particular, there is no escaping. Each line is given to ripgrep as a single command line argument verbatim. diff --git a/doc/rg.1.txt.tpl b/doc/rg.1.txt.tpl index a6f72260..1c542b6b 100644 --- a/doc/rg.1.txt.tpl +++ b/doc/rg.1.txt.tpl @@ -107,9 +107,9 @@ ripgrep supports reading configuration files that change ripgrep's default behavior. The format of the configuration file is an "rc" style and is very simple. It is defined by two rules: - 1. Every line is a shell argument, after trimming ASCII whitespace. + 1. Every line is a shell argument, after trimming whitespace. 2. Lines starting with *#* (optionally preceded by any amount of - ASCII whitespace) are ignored. + whitespace) are ignored. ripgrep will look for a single configuration file if and only if the *RIPGREP_CONFIG_PATH* environment variable is set and is non-empty. diff --git a/src/config.rs b/src/config.rs index f10c5a86..a5e492ec 100644 --- a/src/config.rs +++ b/src/config.rs @@ -5,10 +5,11 @@ use std::env; use std::error::Error; use std::fs::File; -use std::io::{self, BufRead}; +use std::io; use std::ffi::OsString; use std::path::{Path, PathBuf}; +use bstr::io::BufReadExt; use log; use crate::Result; @@ -76,62 +77,29 @@ fn parse>( fn parse_reader( rdr: R, ) -> Result<(Vec, Vec>)> { - let mut bufrdr = io::BufReader::new(rdr); + let bufrdr = io::BufReader::new(rdr); let (mut args, mut errs) = (vec![], vec![]); - let mut line = vec![]; let mut line_number = 0; - while { - line.clear(); + bufrdr.for_byte_line_with_terminator(|line| { line_number += 1; - bufrdr.read_until(b'\n', &mut line)? > 0 - } { - trim(&mut line); + + let line = line.trim(); if line.is_empty() || line[0] == b'#' { - continue; + return Ok(true); } - match bytes_to_os_string(&line) { + match line.to_os_str() { Ok(osstr) => { - args.push(osstr); + args.push(osstr.to_os_string()); } Err(err) => { errs.push(format!("{}: {}", line_number, err).into()); } } - } + Ok(true) + })?; Ok((args, errs)) } -/// Trim the given bytes of whitespace according to the ASCII definition. -fn trim(x: &mut Vec) { - let upto = x.iter().take_while(|b| is_space(**b)).count(); - x.drain(..upto); - let revto = x.len() - x.iter().rev().take_while(|b| is_space(**b)).count(); - x.drain(revto..); -} - -/// Returns true if and only if the given byte is an ASCII space character. -fn is_space(b: u8) -> bool { - b == b'\t' - || b == b'\n' - || b == b'\x0B' - || b == b'\x0C' - || b == b'\r' - || b == b' ' -} - -/// On Unix, get an OsString from raw bytes. -#[cfg(unix)] -fn bytes_to_os_string(bytes: &[u8]) -> Result { - use std::os::unix::ffi::OsStringExt; - Ok(OsString::from_vec(bytes.to_vec())) -} - -/// On non-Unix (like Windows), require UTF-8. -#[cfg(not(unix))] -fn bytes_to_os_string(bytes: &[u8]) -> Result { - String::from_utf8(bytes.to_vec()).map(OsString::from).map_err(From::from) -} - #[cfg(test)] mod tests { use std::ffi::OsString;