mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2025-04-24 17:12:16 +02:00
The top-level listing was just getting a bit too long for my taste. So put all of the code in one directory and shrink the large top-level mess to a small top-level mess. NOTE: This commit only contains renames. The subsequent commit will actually make ripgrep build again. We do it this way with the naive hope that this will make it easier for git history to track the renames. Sigh.
196 lines
5.8 KiB
Rust
196 lines
5.8 KiB
Rust
use std::error;
|
|
use std::ffi::OsStr;
|
|
use std::fmt;
|
|
use std::fs::File;
|
|
use std::io;
|
|
use std::path::Path;
|
|
use std::str;
|
|
|
|
use bstr::io::BufReadExt;
|
|
|
|
use escape::{escape, escape_os};
|
|
|
|
/// An error that occurs when a pattern could not be converted to valid UTF-8.
|
|
///
|
|
/// The purpose of this error is to give a more targeted failure mode for
|
|
/// patterns written by end users that are not valid UTF-8.
|
|
#[derive(Clone, Debug, Eq, PartialEq)]
|
|
pub struct InvalidPatternError {
|
|
original: String,
|
|
valid_up_to: usize,
|
|
}
|
|
|
|
impl InvalidPatternError {
|
|
/// Returns the index in the given string up to which valid UTF-8 was
|
|
/// verified.
|
|
pub fn valid_up_to(&self) -> usize {
|
|
self.valid_up_to
|
|
}
|
|
}
|
|
|
|
impl error::Error for InvalidPatternError {
|
|
fn description(&self) -> &str {
|
|
"invalid pattern"
|
|
}
|
|
}
|
|
|
|
impl fmt::Display for InvalidPatternError {
|
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
|
write!(
|
|
f,
|
|
"found invalid UTF-8 in pattern at byte offset {} \
|
|
(use hex escape sequences to match arbitrary bytes \
|
|
in a pattern, e.g., \\xFF): '{}'",
|
|
self.valid_up_to, self.original,
|
|
)
|
|
}
|
|
}
|
|
|
|
impl From<InvalidPatternError> for io::Error {
|
|
fn from(paterr: InvalidPatternError) -> io::Error {
|
|
io::Error::new(io::ErrorKind::Other, paterr)
|
|
}
|
|
}
|
|
|
|
/// Convert an OS string into a regular expression pattern.
|
|
///
|
|
/// This conversion fails if the given pattern is not valid UTF-8, in which
|
|
/// case, a targeted error with more information about where the invalid UTF-8
|
|
/// occurs is given. The error also suggests the use of hex escape sequences,
|
|
/// which are supported by many regex engines.
|
|
pub fn pattern_from_os(pattern: &OsStr) -> Result<&str, InvalidPatternError> {
|
|
pattern.to_str().ok_or_else(|| {
|
|
let valid_up_to = pattern
|
|
.to_string_lossy()
|
|
.find('\u{FFFD}')
|
|
.expect("a Unicode replacement codepoint for invalid UTF-8");
|
|
InvalidPatternError {
|
|
original: escape_os(pattern),
|
|
valid_up_to: valid_up_to,
|
|
}
|
|
})
|
|
}
|
|
|
|
/// Convert arbitrary bytes into a regular expression pattern.
|
|
///
|
|
/// This conversion fails if the given pattern is not valid UTF-8, in which
|
|
/// case, a targeted error with more information about where the invalid UTF-8
|
|
/// occurs is given. The error also suggests the use of hex escape sequences,
|
|
/// which are supported by many regex engines.
|
|
pub fn pattern_from_bytes(
|
|
pattern: &[u8],
|
|
) -> Result<&str, InvalidPatternError> {
|
|
str::from_utf8(pattern).map_err(|err| InvalidPatternError {
|
|
original: escape(pattern),
|
|
valid_up_to: err.valid_up_to(),
|
|
})
|
|
}
|
|
|
|
/// Read patterns from a file path, one per line.
|
|
///
|
|
/// If there was a problem reading or if any of the patterns contain invalid
|
|
/// UTF-8, then an error is returned. If there was a problem with a specific
|
|
/// pattern, then the error message will include the line number and the file
|
|
/// path.
|
|
pub fn patterns_from_path<P: AsRef<Path>>(path: P) -> io::Result<Vec<String>> {
|
|
let path = path.as_ref();
|
|
let file = File::open(path).map_err(|err| {
|
|
io::Error::new(
|
|
io::ErrorKind::Other,
|
|
format!("{}: {}", path.display(), err),
|
|
)
|
|
})?;
|
|
patterns_from_reader(file).map_err(|err| {
|
|
io::Error::new(
|
|
io::ErrorKind::Other,
|
|
format!("{}:{}", path.display(), err),
|
|
)
|
|
})
|
|
}
|
|
|
|
/// Read patterns from stdin, one per line.
|
|
///
|
|
/// If there was a problem reading or if any of the patterns contain invalid
|
|
/// UTF-8, then an error is returned. If there was a problem with a specific
|
|
/// pattern, then the error message will include the line number and the fact
|
|
/// that it came from stdin.
|
|
pub fn patterns_from_stdin() -> io::Result<Vec<String>> {
|
|
let stdin = io::stdin();
|
|
let locked = stdin.lock();
|
|
patterns_from_reader(locked).map_err(|err| {
|
|
io::Error::new(io::ErrorKind::Other, format!("<stdin>:{}", err))
|
|
})
|
|
}
|
|
|
|
/// Read patterns from any reader, one per line.
|
|
///
|
|
/// If there was a problem reading or if any of the patterns contain invalid
|
|
/// UTF-8, then an error is returned. If there was a problem with a specific
|
|
/// pattern, then the error message will include the line number.
|
|
///
|
|
/// Note that this routine uses its own internal buffer, so the caller should
|
|
/// not provide their own buffered reader if possible.
|
|
///
|
|
/// # Example
|
|
///
|
|
/// This shows how to parse patterns, one per line.
|
|
///
|
|
/// ```
|
|
/// use grep_cli::patterns_from_reader;
|
|
///
|
|
/// # fn example() -> Result<(), Box<::std::error::Error>> {
|
|
/// let patterns = "\
|
|
/// foo
|
|
/// bar\\s+foo
|
|
/// [a-z]{3}
|
|
/// ";
|
|
///
|
|
/// assert_eq!(patterns_from_reader(patterns.as_bytes())?, vec![
|
|
/// r"foo",
|
|
/// r"bar\s+foo",
|
|
/// r"[a-z]{3}",
|
|
/// ]);
|
|
/// # Ok(()) }
|
|
/// ```
|
|
pub fn patterns_from_reader<R: io::Read>(rdr: R) -> io::Result<Vec<String>> {
|
|
let mut patterns = vec![];
|
|
let mut line_number = 0;
|
|
io::BufReader::new(rdr).for_byte_line(|line| {
|
|
line_number += 1;
|
|
match pattern_from_bytes(line) {
|
|
Ok(pattern) => {
|
|
patterns.push(pattern.to_string());
|
|
Ok(true)
|
|
}
|
|
Err(err) => Err(io::Error::new(
|
|
io::ErrorKind::Other,
|
|
format!("{}: {}", line_number, err),
|
|
)),
|
|
}
|
|
})?;
|
|
Ok(patterns)
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn bytes() {
|
|
let pat = b"abc\xFFxyz";
|
|
let err = pattern_from_bytes(pat).unwrap_err();
|
|
assert_eq!(3, err.valid_up_to());
|
|
}
|
|
|
|
#[test]
|
|
#[cfg(unix)]
|
|
fn os() {
|
|
use std::ffi::OsStr;
|
|
use std::os::unix::ffi::OsStrExt;
|
|
|
|
let pat = OsStr::from_bytes(b"abc\xFFxyz");
|
|
let err = pattern_from_os(pat).unwrap_err();
|
|
assert_eq!(3, err.valid_up_to());
|
|
}
|
|
}
|