mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2024-12-12 19:18:24 +02:00
config: switch to using bstrs
This lets us implement correct Unicode trimming and also simplifies the parsing logic a bit. This also removes the last platform specific bits of code in ripgrep core.
This commit is contained in:
parent
5e50a3c43c
commit
26a83c6301
@ -46,6 +46,7 @@ members = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
bstr = "0.1.2"
|
||||||
grep = { version = "0.2.3", path = "grep" }
|
grep = { version = "0.2.3", path = "grep" }
|
||||||
ignore = { version = "0.4.4", path = "ignore" }
|
ignore = { version = "0.4.4", path = "ignore" }
|
||||||
lazy_static = "1.1.0"
|
lazy_static = "1.1.0"
|
||||||
|
6
GUIDE.md
6
GUIDE.md
@ -525,9 +525,9 @@ config file. Once the environment variable is set, open the file and just type
|
|||||||
in the flags you want set automatically. There are only two rules for
|
in the flags you want set automatically. There are only two rules for
|
||||||
describing the format of the config file:
|
describing the format of the config file:
|
||||||
|
|
||||||
1. Every line is a shell argument, after trimming ASCII whitespace.
|
1. Every line is a shell argument, after trimming whitespace.
|
||||||
2. Lines starting with `#` (optionally preceded by any amount of
|
2. Lines starting with `#` (optionally preceded by any amount of whitespace)
|
||||||
ASCII whitespace) are ignored.
|
are ignored.
|
||||||
|
|
||||||
In particular, there is no escaping. Each line is given to ripgrep as a single
|
In particular, there is no escaping. Each line is given to ripgrep as a single
|
||||||
command line argument verbatim.
|
command line argument verbatim.
|
||||||
|
@ -107,9 +107,9 @@ ripgrep supports reading configuration files that change ripgrep's default
|
|||||||
behavior. The format of the configuration file is an "rc" style and is very
|
behavior. The format of the configuration file is an "rc" style and is very
|
||||||
simple. It is defined by two rules:
|
simple. It is defined by two rules:
|
||||||
|
|
||||||
1. Every line is a shell argument, after trimming ASCII whitespace.
|
1. Every line is a shell argument, after trimming whitespace.
|
||||||
2. Lines starting with *#* (optionally preceded by any amount of
|
2. Lines starting with *#* (optionally preceded by any amount of
|
||||||
ASCII whitespace) are ignored.
|
whitespace) are ignored.
|
||||||
|
|
||||||
ripgrep will look for a single configuration file if and only if the
|
ripgrep will look for a single configuration file if and only if the
|
||||||
*RIPGREP_CONFIG_PATH* environment variable is set and is non-empty.
|
*RIPGREP_CONFIG_PATH* environment variable is set and is non-empty.
|
||||||
|
@ -5,10 +5,11 @@
|
|||||||
use std::env;
|
use std::env;
|
||||||
use std::error::Error;
|
use std::error::Error;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::{self, BufRead};
|
use std::io;
|
||||||
use std::ffi::OsString;
|
use std::ffi::OsString;
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
|
|
||||||
|
use bstr::io::BufReadExt;
|
||||||
use log;
|
use log;
|
||||||
|
|
||||||
use crate::Result;
|
use crate::Result;
|
||||||
@ -76,62 +77,29 @@ fn parse<P: AsRef<Path>>(
|
|||||||
fn parse_reader<R: io::Read>(
|
fn parse_reader<R: io::Read>(
|
||||||
rdr: R,
|
rdr: R,
|
||||||
) -> Result<(Vec<OsString>, Vec<Box<Error>>)> {
|
) -> Result<(Vec<OsString>, Vec<Box<Error>>)> {
|
||||||
let mut bufrdr = io::BufReader::new(rdr);
|
let bufrdr = io::BufReader::new(rdr);
|
||||||
let (mut args, mut errs) = (vec![], vec![]);
|
let (mut args, mut errs) = (vec![], vec![]);
|
||||||
let mut line = vec![];
|
|
||||||
let mut line_number = 0;
|
let mut line_number = 0;
|
||||||
while {
|
bufrdr.for_byte_line_with_terminator(|line| {
|
||||||
line.clear();
|
|
||||||
line_number += 1;
|
line_number += 1;
|
||||||
bufrdr.read_until(b'\n', &mut line)? > 0
|
|
||||||
} {
|
let line = line.trim();
|
||||||
trim(&mut line);
|
|
||||||
if line.is_empty() || line[0] == b'#' {
|
if line.is_empty() || line[0] == b'#' {
|
||||||
continue;
|
return Ok(true);
|
||||||
}
|
}
|
||||||
match bytes_to_os_string(&line) {
|
match line.to_os_str() {
|
||||||
Ok(osstr) => {
|
Ok(osstr) => {
|
||||||
args.push(osstr);
|
args.push(osstr.to_os_string());
|
||||||
}
|
}
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
errs.push(format!("{}: {}", line_number, err).into());
|
errs.push(format!("{}: {}", line_number, err).into());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
Ok(true)
|
||||||
|
})?;
|
||||||
Ok((args, errs))
|
Ok((args, errs))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Trim the given bytes of whitespace according to the ASCII definition.
|
|
||||||
fn trim(x: &mut Vec<u8>) {
|
|
||||||
let upto = x.iter().take_while(|b| is_space(**b)).count();
|
|
||||||
x.drain(..upto);
|
|
||||||
let revto = x.len() - x.iter().rev().take_while(|b| is_space(**b)).count();
|
|
||||||
x.drain(revto..);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns true if and only if the given byte is an ASCII space character.
|
|
||||||
fn is_space(b: u8) -> bool {
|
|
||||||
b == b'\t'
|
|
||||||
|| b == b'\n'
|
|
||||||
|| b == b'\x0B'
|
|
||||||
|| b == b'\x0C'
|
|
||||||
|| b == b'\r'
|
|
||||||
|| b == b' '
|
|
||||||
}
|
|
||||||
|
|
||||||
/// On Unix, get an OsString from raw bytes.
|
|
||||||
#[cfg(unix)]
|
|
||||||
fn bytes_to_os_string(bytes: &[u8]) -> Result<OsString> {
|
|
||||||
use std::os::unix::ffi::OsStringExt;
|
|
||||||
Ok(OsString::from_vec(bytes.to_vec()))
|
|
||||||
}
|
|
||||||
|
|
||||||
/// On non-Unix (like Windows), require UTF-8.
|
|
||||||
#[cfg(not(unix))]
|
|
||||||
fn bytes_to_os_string(bytes: &[u8]) -> Result<OsString> {
|
|
||||||
String::from_utf8(bytes.to_vec()).map(OsString::from).map_err(From::from)
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use std::ffi::OsString;
|
use std::ffi::OsString;
|
||||||
|
Loading…
Reference in New Issue
Block a user