mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2024-12-07 11:13:17 +02:00
config: switch to using bstrs
This lets us implement correct Unicode trimming and also simplifies the parsing logic a bit. This also removes the last platform specific bits of code in ripgrep core.
This commit is contained in:
parent
5e50a3c43c
commit
26a83c6301
@ -46,6 +46,7 @@ members = [
|
||||
]
|
||||
|
||||
[dependencies]
|
||||
bstr = "0.1.2"
|
||||
grep = { version = "0.2.3", path = "grep" }
|
||||
ignore = { version = "0.4.4", path = "ignore" }
|
||||
lazy_static = "1.1.0"
|
||||
|
6
GUIDE.md
6
GUIDE.md
@ -525,9 +525,9 @@ config file. Once the environment variable is set, open the file and just type
|
||||
in the flags you want set automatically. There are only two rules for
|
||||
describing the format of the config file:
|
||||
|
||||
1. Every line is a shell argument, after trimming ASCII whitespace.
|
||||
2. Lines starting with `#` (optionally preceded by any amount of
|
||||
ASCII whitespace) are ignored.
|
||||
1. Every line is a shell argument, after trimming whitespace.
|
||||
2. Lines starting with `#` (optionally preceded by any amount of whitespace)
|
||||
are ignored.
|
||||
|
||||
In particular, there is no escaping. Each line is given to ripgrep as a single
|
||||
command line argument verbatim.
|
||||
|
@ -107,9 +107,9 @@ ripgrep supports reading configuration files that change ripgrep's default
|
||||
behavior. The format of the configuration file is an "rc" style and is very
|
||||
simple. It is defined by two rules:
|
||||
|
||||
1. Every line is a shell argument, after trimming ASCII whitespace.
|
||||
1. Every line is a shell argument, after trimming whitespace.
|
||||
2. Lines starting with *#* (optionally preceded by any amount of
|
||||
ASCII whitespace) are ignored.
|
||||
whitespace) are ignored.
|
||||
|
||||
ripgrep will look for a single configuration file if and only if the
|
||||
*RIPGREP_CONFIG_PATH* environment variable is set and is non-empty.
|
||||
|
@ -5,10 +5,11 @@
|
||||
use std::env;
|
||||
use std::error::Error;
|
||||
use std::fs::File;
|
||||
use std::io::{self, BufRead};
|
||||
use std::io;
|
||||
use std::ffi::OsString;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use bstr::io::BufReadExt;
|
||||
use log;
|
||||
|
||||
use crate::Result;
|
||||
@ -76,62 +77,29 @@ fn parse<P: AsRef<Path>>(
|
||||
fn parse_reader<R: io::Read>(
|
||||
rdr: R,
|
||||
) -> Result<(Vec<OsString>, Vec<Box<Error>>)> {
|
||||
let mut bufrdr = io::BufReader::new(rdr);
|
||||
let bufrdr = io::BufReader::new(rdr);
|
||||
let (mut args, mut errs) = (vec![], vec![]);
|
||||
let mut line = vec![];
|
||||
let mut line_number = 0;
|
||||
while {
|
||||
line.clear();
|
||||
bufrdr.for_byte_line_with_terminator(|line| {
|
||||
line_number += 1;
|
||||
bufrdr.read_until(b'\n', &mut line)? > 0
|
||||
} {
|
||||
trim(&mut line);
|
||||
|
||||
let line = line.trim();
|
||||
if line.is_empty() || line[0] == b'#' {
|
||||
continue;
|
||||
return Ok(true);
|
||||
}
|
||||
match bytes_to_os_string(&line) {
|
||||
match line.to_os_str() {
|
||||
Ok(osstr) => {
|
||||
args.push(osstr);
|
||||
args.push(osstr.to_os_string());
|
||||
}
|
||||
Err(err) => {
|
||||
errs.push(format!("{}: {}", line_number, err).into());
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(true)
|
||||
})?;
|
||||
Ok((args, errs))
|
||||
}
|
||||
|
||||
/// Trim the given bytes of whitespace according to the ASCII definition.
|
||||
fn trim(x: &mut Vec<u8>) {
|
||||
let upto = x.iter().take_while(|b| is_space(**b)).count();
|
||||
x.drain(..upto);
|
||||
let revto = x.len() - x.iter().rev().take_while(|b| is_space(**b)).count();
|
||||
x.drain(revto..);
|
||||
}
|
||||
|
||||
/// Returns true if and only if the given byte is an ASCII space character.
|
||||
fn is_space(b: u8) -> bool {
|
||||
b == b'\t'
|
||||
|| b == b'\n'
|
||||
|| b == b'\x0B'
|
||||
|| b == b'\x0C'
|
||||
|| b == b'\r'
|
||||
|| b == b' '
|
||||
}
|
||||
|
||||
/// On Unix, get an OsString from raw bytes.
|
||||
#[cfg(unix)]
|
||||
fn bytes_to_os_string(bytes: &[u8]) -> Result<OsString> {
|
||||
use std::os::unix::ffi::OsStringExt;
|
||||
Ok(OsString::from_vec(bytes.to_vec()))
|
||||
}
|
||||
|
||||
/// On non-Unix (like Windows), require UTF-8.
|
||||
#[cfg(not(unix))]
|
||||
fn bytes_to_os_string(bytes: &[u8]) -> Result<OsString> {
|
||||
String::from_utf8(bytes.to_vec()).map(OsString::from).map_err(From::from)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::ffi::OsString;
|
||||
|
Loading…
Reference in New Issue
Block a user