1
0
mirror of https://github.com/BurntSushi/ripgrep.git synced 2025-02-09 14:14:56 +02:00

searcher: polish

This updates some dependencies and brings code style in line with my
current practice.
This commit is contained in:
Andrew Gallant 2023-09-28 12:58:11 -04:00
parent e30bbb8cff
commit d53b7310ee
11 changed files with 268 additions and 246 deletions

4
Cargo.lock generated
View File

@ -319,9 +319,9 @@ checksum = "8f232d6ef707e1956a43342693d2a31e72989554d58299d7a88738cc95b0d35c"
[[package]]
name = "memmap2"
version = "0.5.10"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "83faa42c0a078c393f6b29d5db232d8be22776a891f8f56e5284faee4a20b327"
checksum = "43a5a03cefb0d953ec0be133036f14e109412fa594edc2f77227249db66cc3ed"
dependencies = [
"libc",
]

View File

@ -11,16 +11,16 @@ repository = "https://github.com/BurntSushi/ripgrep/tree/master/crates/searcher"
readme = "README.md"
keywords = ["regex", "grep", "egrep", "search", "pattern"]
license = "Unlicense OR MIT"
edition = "2018"
edition = "2021"
[dependencies]
bstr = { version = "1.6.0", default-features = false, features = ["std"] }
encoding_rs = "0.8.14"
encoding_rs_io = "0.1.6"
bstr = { version = "1.6.2", default-features = false, features = ["std"] }
encoding_rs = "0.8.33"
encoding_rs_io = "0.1.7"
grep-matcher = { version = "0.1.6", path = "../matcher" }
log = "0.4.5"
memchr = "2.6.2"
memmap = { package = "memmap2", version = "0.5.3" }
log = "0.4.20"
memchr = "2.6.3"
memmap = { package = "memmap2", version = "0.8.0" }
[dev-dependencies]
grep-regex = { version = "0.1.11", path = "../regex" }

View File

@ -38,12 +38,12 @@ This example shows how to execute the searcher and read the search results
using the [`UTF8`](sinks::UTF8) implementation of `Sink`.
```
use std::error::Error;
use grep_matcher::Matcher;
use grep_regex::RegexMatcher;
use grep_searcher::Searcher;
use grep_searcher::sinks::UTF8;
use {
grep_matcher::Matcher,
grep_regex::RegexMatcher,
grep_searcher::Searcher,
grep_searcher::sinks::UTF8,
};
const SHERLOCK: &'static [u8] = b"\
For the Doctor Watsons of this world, as opposed to the Sherlock
@ -54,28 +54,26 @@ but Doctor Watson has to have it taken out for him and dusted,
and exhibited clearly, with a label attached.
";
# fn main() { example().unwrap() }
fn example() -> Result<(), Box<Error>> {
let matcher = RegexMatcher::new(r"Doctor \w+")?;
let mut matches: Vec<(u64, String)> = vec![];
Searcher::new().search_slice(&matcher, SHERLOCK, UTF8(|lnum, line| {
// We are guaranteed to find a match, so the unwrap is OK.
let mymatch = matcher.find(line.as_bytes())?.unwrap();
matches.push((lnum, line[mymatch].to_string()));
Ok(true)
}))?;
let matcher = RegexMatcher::new(r"Doctor \w+")?;
let mut matches: Vec<(u64, String)> = vec![];
Searcher::new().search_slice(&matcher, SHERLOCK, UTF8(|lnum, line| {
// We are guaranteed to find a match, so the unwrap is OK.
let mymatch = matcher.find(line.as_bytes())?.unwrap();
matches.push((lnum, line[mymatch].to_string()));
Ok(true)
}))?;
assert_eq!(matches.len(), 2);
assert_eq!(
matches[0],
(1, "Doctor Watsons".to_string())
);
assert_eq!(
matches[1],
(5, "Doctor Watson".to_string())
);
Ok(())
}
assert_eq!(matches.len(), 2);
assert_eq!(
matches[0],
(1, "Doctor Watsons".to_string())
);
assert_eq!(
matches[1],
(5, "Doctor Watson".to_string())
);
# Ok::<(), Box<dyn std::error::Error>>(())
```
See also `examples/search-stdin.rs` from the root of this crate's directory
@ -85,14 +83,16 @@ searches stdin.
#![deny(missing_docs)]
pub use crate::lines::{LineIter, LineStep};
pub use crate::searcher::{
BinaryDetection, ConfigError, Encoding, MmapChoice, Searcher,
SearcherBuilder,
};
pub use crate::sink::sinks;
pub use crate::sink::{
Sink, SinkContext, SinkContextKind, SinkError, SinkFinish, SinkMatch,
pub use crate::{
lines::{LineIter, LineStep},
searcher::{
BinaryDetection, ConfigError, Encoding, MmapChoice, Searcher,
SearcherBuilder,
},
sink::{
sinks, Sink, SinkContext, SinkContextKind, SinkError, SinkFinish,
SinkMatch,
},
};
#[macro_use]

View File

@ -1,4 +1,3 @@
use std::cmp;
use std::io;
use bstr::ByteSlice;
@ -15,7 +14,7 @@ pub(crate) const DEFAULT_BUFFER_CAPACITY: usize = 64 * (1 << 10); // 64 KB
///
/// The default is to eagerly allocate without a limit.
#[derive(Clone, Copy, Debug)]
pub enum BufferAllocation {
pub(crate) enum BufferAllocation {
/// Attempt to expand the size of the buffer until either at least the next
/// line fits into memory or until all available memory is exhausted.
///
@ -35,7 +34,7 @@ impl Default for BufferAllocation {
/// Create a new error to be used when a configured allocation limit has been
/// reached.
pub fn alloc_error(limit: usize) -> io::Error {
pub(crate) fn alloc_error(limit: usize) -> io::Error {
let msg = format!("configured allocation limit ({}) exceeded", limit);
io::Error::new(io::ErrorKind::Other, msg)
}
@ -49,7 +48,7 @@ pub fn alloc_error(limit: usize) -> io::Error {
/// using textual patterns. Of course, there are many cases in which this isn't
/// true, which is why binary detection is disabled by default.
#[derive(Clone, Copy, Debug)]
pub enum BinaryDetection {
pub(crate) enum BinaryDetection {
/// No binary detection is performed. Data reported by the line buffer may
/// contain arbitrary bytes.
None,
@ -108,18 +107,18 @@ impl Default for Config {
/// A builder for constructing line buffers.
#[derive(Clone, Debug, Default)]
pub struct LineBufferBuilder {
pub(crate) struct LineBufferBuilder {
config: Config,
}
impl LineBufferBuilder {
/// Create a new builder for a buffer.
pub fn new() -> LineBufferBuilder {
pub(crate) fn new() -> LineBufferBuilder {
LineBufferBuilder { config: Config::default() }
}
/// Create a new line buffer from this builder's configuration.
pub fn build(&self) -> LineBuffer {
pub(crate) fn build(&self) -> LineBuffer {
LineBuffer {
config: self.config,
buf: vec![0; self.config.capacity],
@ -139,7 +138,10 @@ impl LineBufferBuilder {
///
/// This is set to a reasonable default and probably shouldn't be changed
/// unless there's a specific reason to do so.
pub fn capacity(&mut self, capacity: usize) -> &mut LineBufferBuilder {
pub(crate) fn capacity(
&mut self,
capacity: usize,
) -> &mut LineBufferBuilder {
self.config.capacity = capacity;
self
}
@ -155,7 +157,10 @@ impl LineBufferBuilder {
/// is incomplete.
///
/// By default, this is set to `b'\n'`.
pub fn line_terminator(&mut self, lineterm: u8) -> &mut LineBufferBuilder {
pub(crate) fn line_terminator(
&mut self,
lineterm: u8,
) -> &mut LineBufferBuilder {
self.config.lineterm = lineterm;
self
}
@ -174,7 +179,7 @@ impl LineBufferBuilder {
/// a value of `0` is sensible, and in particular, will guarantee that a
/// line buffer will never allocate additional memory beyond its initial
/// capacity.
pub fn buffer_alloc(
pub(crate) fn buffer_alloc(
&mut self,
behavior: BufferAllocation,
) -> &mut LineBufferBuilder {
@ -188,7 +193,7 @@ impl LineBufferBuilder {
///
/// By default, this is disabled. In general, binary detection should be
/// viewed as an imperfect heuristic.
pub fn binary_detection(
pub(crate) fn binary_detection(
&mut self,
detection: BinaryDetection,
) -> &mut LineBufferBuilder {
@ -200,7 +205,7 @@ impl LineBufferBuilder {
/// A line buffer reader efficiently reads a line oriented buffer from an
/// arbitrary reader.
#[derive(Debug)]
pub struct LineBufferReader<'b, R> {
pub(crate) struct LineBufferReader<'b, R> {
rdr: R,
line_buffer: &'b mut LineBuffer,
}
@ -211,7 +216,7 @@ impl<'b, R: io::Read> LineBufferReader<'b, R> {
///
/// This does not change the binary detection behavior of the given line
/// buffer.
pub fn new(
pub(crate) fn new(
rdr: R,
line_buffer: &'b mut LineBuffer,
) -> LineBufferReader<'b, R> {
@ -225,13 +230,13 @@ impl<'b, R: io::Read> LineBufferReader<'b, R> {
/// correspond to an offset in memory. It is typically used for reporting
/// purposes. It can also be used for counting the number of bytes that
/// have been searched.
pub fn absolute_byte_offset(&self) -> u64 {
pub(crate) fn absolute_byte_offset(&self) -> u64 {
self.line_buffer.absolute_byte_offset()
}
/// If binary data was detected, then this returns the absolute byte offset
/// at which binary data was initially found.
pub fn binary_byte_offset(&self) -> Option<u64> {
pub(crate) fn binary_byte_offset(&self) -> Option<u64> {
self.line_buffer.binary_byte_offset()
}
@ -248,25 +253,25 @@ impl<'b, R: io::Read> LineBufferReader<'b, R> {
/// This forwards any errors returned by the underlying reader, and will
/// also return an error if the buffer must be expanded past its allocation
/// limit, as governed by the buffer allocation strategy.
pub fn fill(&mut self) -> Result<bool, io::Error> {
pub(crate) fn fill(&mut self) -> Result<bool, io::Error> {
self.line_buffer.fill(&mut self.rdr)
}
/// Return the contents of this buffer.
pub fn buffer(&self) -> &[u8] {
pub(crate) fn buffer(&self) -> &[u8] {
self.line_buffer.buffer()
}
/// Return the buffer as a BStr, used for convenient equality checking
/// in tests only.
#[cfg(test)]
fn bstr(&self) -> &::bstr::BStr {
fn bstr(&self) -> &bstr::BStr {
self.buffer().as_bstr()
}
/// Consume the number of bytes provided. This must be less than or equal
/// to the number of bytes returned by `buffer`.
pub fn consume(&mut self, amt: usize) {
pub(crate) fn consume(&mut self, amt: usize) {
self.line_buffer.consume(amt);
}
@ -286,7 +291,7 @@ impl<'b, R: io::Read> LineBufferReader<'b, R> {
/// Line buffers cannot be used directly, but instead must be used via the
/// LineBufferReader.
#[derive(Clone, Debug)]
pub struct LineBuffer {
pub(crate) struct LineBuffer {
/// The configuration of this buffer.
config: Config,
/// The primary buffer with which to hold data.
@ -322,7 +327,7 @@ impl LineBuffer {
///
/// This permits dynamically changing the binary detection strategy on
/// an existing line buffer without needing to create a new one.
pub fn set_binary_detection(&mut self, binary: BinaryDetection) {
pub(crate) fn set_binary_detection(&mut self, binary: BinaryDetection) {
self.config.binary = binary;
}
@ -497,12 +502,12 @@ impl LineBuffer {
}
// `len` is used for computing the next allocation size. The capacity
// is permitted to start at `0`, so we make sure it's at least `1`.
let len = cmp::max(1, self.buf.len());
let len = std::cmp::max(1, self.buf.len());
let additional = match self.config.buffer_alloc {
BufferAllocation::Eager => len * 2,
BufferAllocation::Error(limit) => {
let used = self.buf.len() - self.config.capacity;
let n = cmp::min(len * 2, limit - used);
let n = std::cmp::min(len * 2, limit - used);
if n == 0 {
return Err(alloc_error(self.config.capacity + limit));
}
@ -541,9 +546,9 @@ fn replace_bytes(bytes: &mut [u8], src: u8, replacement: u8) -> Option<usize> {
#[cfg(test)]
mod tests {
use super::*;
use bstr::{ByteSlice, ByteVec};
use std::str;
use super::*;
const SHERLOCK: &'static str = "\
For the Doctor Watsons of this world, as opposed to the Sherlock

View File

@ -2,8 +2,10 @@
A collection of routines for performing operations on lines.
*/
use bstr::ByteSlice;
use grep_matcher::{LineTerminator, Match};
use {
bstr::ByteSlice,
grep_matcher::{LineTerminator, Match},
};
/// An iterator over lines in a particular slice of bytes.
///
@ -21,10 +23,8 @@ impl<'b> LineIter<'b> {
/// Create a new line iterator that yields lines in the given bytes that
/// are terminated by `line_term`.
pub fn new(line_term: u8, bytes: &'b [u8]) -> LineIter<'b> {
LineIter {
bytes: bytes,
stepper: LineStep::new(line_term, 0, bytes.len()),
}
let stepper = LineStep::new(line_term, 0, bytes.len());
LineIter { bytes, stepper }
}
}
@ -61,7 +61,7 @@ impl LineStep {
///
/// This panics if `start` is not less than or equal to `end`.
pub fn new(line_term: u8, start: usize, end: usize) -> LineStep {
LineStep { line_term, pos: start, end: end }
LineStep { line_term, pos: start, end }
}
/// Return the start and end position of the next line in the given bytes.
@ -108,14 +108,17 @@ impl LineStep {
}
/// Count the number of occurrences of `line_term` in `bytes`.
pub fn count(bytes: &[u8], line_term: u8) -> u64 {
pub(crate) fn count(bytes: &[u8], line_term: u8) -> u64 {
memchr::memchr_iter(line_term, bytes).count() as u64
}
/// Given a line that possibly ends with a terminator, return that line without
/// the terminator.
#[inline(always)]
pub fn without_terminator(bytes: &[u8], line_term: LineTerminator) -> &[u8] {
pub(crate) fn without_terminator(
bytes: &[u8],
line_term: LineTerminator,
) -> &[u8] {
let line_term = line_term.as_bytes();
let start = bytes.len().saturating_sub(line_term.len());
if bytes.get(start..) == Some(line_term) {
@ -129,7 +132,7 @@ pub fn without_terminator(bytes: &[u8], line_term: LineTerminator) -> &[u8] {
///
/// Line terminators are considered part of the line they terminate.
#[inline(always)]
pub fn locate(bytes: &[u8], line_term: u8, range: Match) -> Match {
pub(crate) fn locate(bytes: &[u8], line_term: u8, range: Match) -> Match {
let line_start =
bytes[..range.start()].rfind_byte(line_term).map_or(0, |i| i + 1);
let line_end =
@ -151,7 +154,7 @@ pub fn locate(bytes: &[u8], line_term: u8, range: Match) -> Match {
///
/// If `bytes` ends with a line terminator, then the terminator itself is
/// considered part of the last line.
pub fn preceding(bytes: &[u8], line_term: u8, count: usize) -> usize {
pub(crate) fn preceding(bytes: &[u8], line_term: u8, count: usize) -> usize {
preceding_by_pos(bytes, bytes.len(), line_term, count)
}
@ -195,10 +198,9 @@ fn preceding_by_pos(
#[cfg(test)]
mod tests {
use super::*;
use grep_matcher::Match;
use std::ops::Range;
use std::str;
use super::*;
const SHERLOCK: &'static str = "\
For the Doctor Watsons of this world, as opposed to the Sherlock
@ -222,7 +224,7 @@ and exhibited clearly, with a label attached.\
results
}
fn line_ranges(text: &str) -> Vec<Range<usize>> {
fn line_ranges(text: &str) -> Vec<std::ops::Range<usize>> {
let mut results = vec![];
let mut it = LineStep::new(b'\n', 0, text.len());
while let Some(m) = it.next_match(text.as_bytes()) {

View File

@ -1,15 +1,16 @@
use std::cmp;
use bstr::ByteSlice;
use crate::line_buffer::BinaryDetection;
use crate::lines::{self, LineStep};
use crate::searcher::{Config, Range, Searcher};
use crate::sink::{
Sink, SinkContext, SinkContextKind, SinkError, SinkFinish, SinkMatch,
};
use grep_matcher::{LineMatchKind, Matcher};
use crate::{
line_buffer::BinaryDetection,
lines::{self, LineStep},
searcher::{Config, Range, Searcher},
sink::{
Sink, SinkContext, SinkContextKind, SinkError, SinkFinish, SinkMatch,
},
};
enum FastMatchResult {
Continue,
Stop,
@ -17,7 +18,7 @@ enum FastMatchResult {
}
#[derive(Debug)]
pub struct Core<'s, M: 's, S> {
pub(crate) struct Core<'s, M: 's, S> {
config: &'s Config,
matcher: M,
searcher: &'s Searcher,
@ -35,7 +36,7 @@ pub struct Core<'s, M: 's, S> {
}
impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
pub fn new(
pub(crate) fn new(
searcher: &'s Searcher,
matcher: M,
sink: S,
@ -45,14 +46,14 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
if searcher.config.line_number { Some(1) } else { None };
let core = Core {
config: &searcher.config,
matcher: matcher,
searcher: searcher,
sink: sink,
binary: binary,
matcher,
searcher,
sink,
binary,
pos: 0,
absolute_byte_offset: 0,
binary_byte_offset: None,
line_number: line_number,
line_number,
last_line_counted: 0,
last_line_visited: 0,
after_context_left: 0,
@ -69,23 +70,23 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
core
}
pub fn pos(&self) -> usize {
pub(crate) fn pos(&self) -> usize {
self.pos
}
pub fn set_pos(&mut self, pos: usize) {
pub(crate) fn set_pos(&mut self, pos: usize) {
self.pos = pos;
}
pub fn binary_byte_offset(&self) -> Option<u64> {
pub(crate) fn binary_byte_offset(&self) -> Option<u64> {
self.binary_byte_offset.map(|offset| offset as u64)
}
pub fn matcher(&self) -> &M {
pub(crate) fn matcher(&self) -> &M {
&self.matcher
}
pub fn matched(
pub(crate) fn matched(
&mut self,
buf: &[u8],
range: &Range,
@ -93,18 +94,18 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
self.sink_matched(buf, range)
}
pub fn binary_data(
pub(crate) fn binary_data(
&mut self,
binary_byte_offset: u64,
) -> Result<bool, S::Error> {
self.sink.binary_data(&self.searcher, binary_byte_offset)
}
pub fn begin(&mut self) -> Result<bool, S::Error> {
pub(crate) fn begin(&mut self) -> Result<bool, S::Error> {
self.sink.begin(&self.searcher)
}
pub fn finish(
pub(crate) fn finish(
&mut self,
byte_count: u64,
binary_byte_offset: Option<u64>,
@ -115,7 +116,10 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
)
}
pub fn match_by_line(&mut self, buf: &[u8]) -> Result<bool, S::Error> {
pub(crate) fn match_by_line(
&mut self,
buf: &[u8],
) -> Result<bool, S::Error> {
if self.is_line_by_line_fast() {
match self.match_by_line_fast(buf)? {
FastMatchResult::SwitchToSlow => self.match_by_line_slow(buf),
@ -127,7 +131,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
}
}
pub fn roll(&mut self, buf: &[u8]) -> usize {
pub(crate) fn roll(&mut self, buf: &[u8]) -> usize {
let consumed = if self.config.max_context() == 0 {
buf.len()
} else {
@ -141,7 +145,8 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
self.config.line_term.as_byte(),
self.config.max_context(),
);
let consumed = cmp::max(context_start, self.last_line_visited);
let consumed =
std::cmp::max(context_start, self.last_line_visited);
consumed
};
self.count_lines(buf, consumed);
@ -152,7 +157,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
consumed
}
pub fn detect_binary(
pub(crate) fn detect_binary(
&mut self,
buf: &[u8],
range: &Range,
@ -177,7 +182,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
}
}
pub fn before_context_by_line(
pub(crate) fn before_context_by_line(
&mut self,
buf: &[u8],
upto: usize,
@ -213,7 +218,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
Ok(true)
}
pub fn after_context_by_line(
pub(crate) fn after_context_by_line(
&mut self,
buf: &[u8],
upto: usize,
@ -238,7 +243,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
Ok(true)
}
pub fn other_context_by_line(
pub(crate) fn other_context_by_line(
&mut self,
buf: &[u8],
upto: usize,

View File

@ -1,16 +1,14 @@
use std::cmp;
use std::io;
use crate::line_buffer::{LineBufferReader, DEFAULT_BUFFER_CAPACITY};
use crate::lines::{self, LineStep};
use crate::sink::{Sink, SinkError};
use grep_matcher::Matcher;
use crate::searcher::core::Core;
use crate::searcher::{Config, Range, Searcher};
use crate::{
line_buffer::{LineBufferReader, DEFAULT_BUFFER_CAPACITY},
lines::{self, LineStep},
searcher::{core::Core, Config, Range, Searcher},
sink::{Sink, SinkError},
};
#[derive(Debug)]
pub struct ReadByLine<'s, M, R, S> {
pub(crate) struct ReadByLine<'s, M, R, S> {
config: &'s Config,
core: Core<'s, M, S>,
rdr: LineBufferReader<'s, R>,
@ -19,10 +17,10 @@ pub struct ReadByLine<'s, M, R, S> {
impl<'s, M, R, S> ReadByLine<'s, M, R, S>
where
M: Matcher,
R: io::Read,
R: std::io::Read,
S: Sink,
{
pub fn new(
pub(crate) fn new(
searcher: &'s Searcher,
matcher: M,
read_from: LineBufferReader<'s, R>,
@ -37,7 +35,7 @@ where
}
}
pub fn run(mut self) -> Result<(), S::Error> {
pub(crate) fn run(mut self) -> Result<(), S::Error> {
if self.core.begin()? {
while self.fill()? && self.core.match_by_line(self.rdr.buffer())? {
}
@ -87,13 +85,13 @@ where
}
#[derive(Debug)]
pub struct SliceByLine<'s, M, S> {
pub(crate) struct SliceByLine<'s, M, S> {
core: Core<'s, M, S>,
slice: &'s [u8],
}
impl<'s, M: Matcher, S: Sink> SliceByLine<'s, M, S> {
pub fn new(
pub(crate) fn new(
searcher: &'s Searcher,
matcher: M,
slice: &'s [u8],
@ -103,14 +101,14 @@ impl<'s, M: Matcher, S: Sink> SliceByLine<'s, M, S> {
SliceByLine {
core: Core::new(searcher, matcher, write_to, true),
slice: slice,
slice,
}
}
pub fn run(mut self) -> Result<(), S::Error> {
pub(crate) fn run(mut self) -> Result<(), S::Error> {
if self.core.begin()? {
let binary_upto =
cmp::min(self.slice.len(), DEFAULT_BUFFER_CAPACITY);
std::cmp::min(self.slice.len(), DEFAULT_BUFFER_CAPACITY);
let binary_range = Range::new(0, binary_upto);
if !self.core.detect_binary(self.slice, &binary_range)? {
while !self.slice[self.core.pos()..].is_empty()
@ -132,7 +130,7 @@ impl<'s, M: Matcher, S: Sink> SliceByLine<'s, M, S> {
}
#[derive(Debug)]
pub struct MultiLine<'s, M, S> {
pub(crate) struct MultiLine<'s, M, S> {
config: &'s Config,
core: Core<'s, M, S>,
slice: &'s [u8],
@ -140,7 +138,7 @@ pub struct MultiLine<'s, M, S> {
}
impl<'s, M: Matcher, S: Sink> MultiLine<'s, M, S> {
pub fn new(
pub(crate) fn new(
searcher: &'s Searcher,
matcher: M,
slice: &'s [u8],
@ -151,15 +149,15 @@ impl<'s, M: Matcher, S: Sink> MultiLine<'s, M, S> {
MultiLine {
config: &searcher.config,
core: Core::new(searcher, matcher, write_to, true),
slice: slice,
slice,
last_match: None,
}
}
pub fn run(mut self) -> Result<(), S::Error> {
pub(crate) fn run(mut self) -> Result<(), S::Error> {
if self.core.begin()? {
let binary_upto =
cmp::min(self.slice.len(), DEFAULT_BUFFER_CAPACITY);
std::cmp::min(self.slice.len(), DEFAULT_BUFFER_CAPACITY);
let binary_range = Range::new(0, binary_upto);
if !self.core.detect_binary(self.slice, &binary_range)? {
let mut keepgoing = true;
@ -347,8 +345,10 @@ impl<'s, M: Matcher, S: Sink> MultiLine<'s, M, S> {
#[cfg(test)]
mod tests {
use crate::searcher::{BinaryDetection, SearcherBuilder};
use crate::testutil::{KitchenSink, RegexMatcher, SearcherTester};
use crate::{
searcher::{BinaryDetection, SearcherBuilder},
testutil::{KitchenSink, RegexMatcher, SearcherTester},
};
use super::*;

View File

@ -1,5 +1,4 @@
use std::fs::File;
use std::path::Path;
use std::{fs::File, path::Path};
use memmap::Mmap;

View File

@ -1,19 +1,25 @@
use std::cell::RefCell;
use std::cmp;
use std::fmt;
use std::fs::File;
use std::io::{self, Read};
use std::path::Path;
use crate::line_buffer::{
self, alloc_error, BufferAllocation, LineBuffer, LineBufferBuilder,
LineBufferReader, DEFAULT_BUFFER_CAPACITY,
use std::{
cell::RefCell,
cmp,
fs::File,
io::{self, Read},
path::Path,
};
use {
encoding_rs,
encoding_rs_io::DecodeReaderBytesBuilder,
grep_matcher::{LineTerminator, Match, Matcher},
};
use crate::{
line_buffer::{
self, alloc_error, BufferAllocation, LineBuffer, LineBufferBuilder,
LineBufferReader, DEFAULT_BUFFER_CAPACITY,
},
searcher::glue::{MultiLine, ReadByLine, SliceByLine},
sink::{Sink, SinkError},
};
use crate::searcher::glue::{MultiLine, ReadByLine, SliceByLine};
use crate::sink::{Sink, SinkError};
use encoding_rs;
use encoding_rs_io::DecodeReaderBytesBuilder;
use grep_matcher::{LineTerminator, Match, Matcher};
pub use self::mmap::MmapChoice;
@ -232,6 +238,7 @@ impl Config {
/// This error occurs when a non-sensical configuration is present when trying
/// to construct a `Searcher` from a `SearcherBuilder`.
#[derive(Clone, Debug, Eq, PartialEq)]
#[non_exhaustive]
pub enum ConfigError {
/// Indicates that the heap limit configuration prevents all possible
/// search strategies from being used. For example, if the heap limit is
@ -250,23 +257,12 @@ pub enum ConfigError {
/// The provided encoding label that could not be found.
label: Vec<u8>,
},
/// Hints that destructuring should not be exhaustive.
///
/// This enum may grow additional variants, so this makes sure clients
/// don't count on exhaustive matching. (Otherwise, adding a new variant
/// could break existing code.)
#[doc(hidden)]
__Nonexhaustive,
}
impl ::std::error::Error for ConfigError {
fn description(&self) -> &str {
"grep-searcher configuration error"
}
}
impl std::error::Error for ConfigError {}
impl fmt::Display for ConfigError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
impl std::fmt::Display for ConfigError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match *self {
ConfigError::SearchUnavailable => {
write!(f, "grep config error: no available searchers")
@ -284,7 +280,6 @@ impl fmt::Display for ConfigError {
"grep config error: unknown encoding: {}",
String::from_utf8_lossy(label),
),
_ => panic!("BUG: unexpected variant found"),
}
}
}
@ -331,8 +326,8 @@ impl SearcherBuilder {
.bom_sniffing(self.config.bom_sniffing);
Searcher {
config: config,
decode_builder: decode_builder,
config,
decode_builder,
decode_buffer: RefCell::new(vec![0; 8 * (1 << 10)]),
line_buffer: RefCell::new(self.config.line_buffer()),
multi_line_buffer: RefCell::new(vec![]),
@ -676,9 +671,9 @@ impl Searcher {
log::trace!("{:?}: searching via memory map", path);
return self.search_slice(matcher, &mmap, write_to);
}
// Fast path for multi-line searches of files when memory maps are
// not enabled. This pre-allocates a buffer roughly the size of the
// file, which isn't possible when searching an arbitrary io::Read.
// Fast path for multi-line searches of files when memory maps are not
// enabled. This pre-allocates a buffer roughly the size of the file,
// which isn't possible when searching an arbitrary std::io::Read.
if self.multi_line_with_matcher(&matcher) {
log::trace!(
"{:?}: reading entire file on to heap for mulitline",
@ -699,8 +694,8 @@ impl Searcher {
}
}
/// Execute a search over any implementation of `io::Read` and write the
/// results to the given sink.
/// Execute a search over any implementation of `std::io::Read` and write
/// the results to the given sink.
///
/// When possible, this implementation will search the reader incrementally
/// without reading it into memory. In some cases---for example, if multi
@ -1016,9 +1011,10 @@ fn slice_has_bom(slice: &[u8]) -> bool {
#[cfg(test)]
mod tests {
use super::*;
use crate::testutil::{KitchenSink, RegexMatcher};
use super::*;
#[test]
fn config_error_heap_limit() {
let matcher = RegexMatcher::new("");

View File

@ -1,23 +1,24 @@
use std::error;
use std::fmt;
use std::io;
use grep_matcher::LineTerminator;
use crate::lines::LineIter;
use crate::searcher::{ConfigError, Searcher};
use crate::{
lines::LineIter,
searcher::{ConfigError, Searcher},
};
/// A trait that describes errors that can be reported by searchers and
/// implementations of `Sink`.
///
/// Unless you have a specialized use case, you probably don't need to
/// implement this trait explicitly. It's likely that using `io::Error` (which
/// implements this trait) for your error type is good enough, largely because
/// most errors that occur during search will likely be an `io::Error`.
/// implement this trait explicitly. It's likely that using `std::io::Error`
/// (which implements this trait) for your error type is good enough,
/// largely because most errors that occur during search will likely be an
/// `std::io::Error`.
pub trait SinkError: Sized {
/// A constructor for converting any value that satisfies the
/// `fmt::Display` trait into an error.
fn error_message<T: fmt::Display>(message: T) -> Self;
/// `std::fmt::Display` trait into an error.
fn error_message<T: std::fmt::Display>(message: T) -> Self;
/// A constructor for converting I/O errors that occur while searching into
/// an error of this type.
@ -36,10 +37,10 @@ pub trait SinkError: Sized {
}
}
/// An `io::Error` can be used as an error for `Sink` implementations out of
/// the box.
/// An `std::io::Error` can be used as an error for `Sink` implementations out
/// of the box.
impl SinkError for io::Error {
fn error_message<T: fmt::Display>(message: T) -> io::Error {
fn error_message<T: std::fmt::Display>(message: T) -> io::Error {
io::Error::new(io::ErrorKind::Other, message.to_string())
}
@ -48,11 +49,13 @@ impl SinkError for io::Error {
}
}
/// A `Box<std::error::Error>` can be used as an error for `Sink`
/// A `Box<dyn std::error::Error>` can be used as an error for `Sink`
/// implementations out of the box.
impl SinkError for Box<dyn error::Error> {
fn error_message<T: fmt::Display>(message: T) -> Box<dyn error::Error> {
Box::<dyn error::Error>::from(message.to_string())
impl SinkError for Box<dyn std::error::Error> {
fn error_message<T: std::fmt::Display>(
message: T,
) -> Box<dyn std::error::Error> {
Box::<dyn std::error::Error>::from(message.to_string())
}
}
@ -74,7 +77,7 @@ impl SinkError for Box<dyn error::Error> {
///
/// * What to do when a match is found. Callers must provide this.
/// * What to do when an error occurs. Callers must provide this via the
/// [`SinkError`] trait. Generally, callers can just use `io::Error` for
/// [`SinkError`] trait. Generally, callers can just use `std::io::Error` for
/// this, which already implements `SinkError`.
/// * What to do when a contextual line is found. By default, these are
/// ignored.
@ -408,13 +411,14 @@ impl<'b> SinkMatch<'b> {
self.line_number
}
/// TODO
/// Exposes as much of the underlying buffer that was search as possible.
#[inline]
pub fn buffer(&self) -> &'b [u8] {
self.buffer
}
/// TODO
/// Returns a range that corresponds to where [`SinkMatch::bytes`] appears
/// in [`SinkMatch::buffer`].
#[inline]
pub fn bytes_range_in_buffer(&self) -> std::ops::Range<usize> {
self.bytes_range_in_buffer.clone()
@ -506,16 +510,16 @@ impl<'b> SinkContext<'b> {
/// an error is reported at the first match and searching stops.
/// * Context lines, context breaks and summary data reported at the end of
/// a search are all ignored.
/// * Implementors are forced to use `io::Error` as their error type.
/// * Implementors are forced to use `std::io::Error` as their error type.
///
/// If you need more flexibility, then you're advised to implement the `Sink`
/// trait directly.
pub mod sinks {
use std::io;
use std::str;
use crate::searcher::Searcher;
use super::{Sink, SinkError, SinkMatch};
use crate::searcher::Searcher;
/// A sink that provides line numbers and matches as strings while ignoring
/// everything else.
@ -527,8 +531,8 @@ pub mod sinks {
///
/// The closure accepts two parameters: a line number and a UTF-8 string
/// containing the matched data. The closure returns a
/// `Result<bool, io::Error>`. If the `bool` is `false`, then the search
/// stops immediately. Otherwise, searching continues.
/// `Result<bool, std::io::Error>`. If the `bool` is `false`, then the
/// search stops immediately. Otherwise, searching continues.
///
/// If multi line mode was enabled, the line number refers to the line
/// number of the first line in the match.
@ -548,7 +552,7 @@ pub mod sinks {
_searcher: &Searcher,
mat: &SinkMatch<'_>,
) -> Result<bool, io::Error> {
let matched = match str::from_utf8(mat.bytes()) {
let matched = match std::str::from_utf8(mat.bytes()) {
Ok(matched) => matched,
Err(err) => return Err(io::Error::error_message(err)),
};
@ -575,8 +579,8 @@ pub mod sinks {
///
/// The closure accepts two parameters: a line number and a UTF-8 string
/// containing the matched data. The closure returns a
/// `Result<bool, io::Error>`. If the `bool` is `false`, then the search
/// stops immediately. Otherwise, searching continues.
/// `Result<bool, std::io::Error>`. If the `bool` is `false`, then the
/// search stops immediately. Otherwise, searching continues.
///
/// If multi line mode was enabled, the line number refers to the line
/// number of the first line in the match.
@ -598,7 +602,7 @@ pub mod sinks {
) -> Result<bool, io::Error> {
use std::borrow::Cow;
let matched = match str::from_utf8(mat.bytes()) {
let matched = match std::str::from_utf8(mat.bytes()) {
Ok(matched) => Cow::Borrowed(matched),
// TODO: In theory, it should be possible to amortize
// allocation here, but `std` doesn't provide such an API.
@ -624,9 +628,9 @@ pub mod sinks {
/// searcher was not configured to count lines.
///
/// The closure accepts two parameters: a line number and a raw byte string
/// containing the matched data. The closure returns a `Result<bool,
/// io::Error>`. If the `bool` is `false`, then the search stops
/// immediately. Otherwise, searching continues.
/// containing the matched data. The closure returns a
/// `Result<bool, std::io::Error>`. If the `bool` is `false`, then the
/// search stops immediately. Otherwise, searching continues.
///
/// If multi line mode was enabled, the line number refers to the line
/// number of the first line in the match.

View File

@ -1,14 +1,17 @@
use std::io::{self, Write};
use std::str;
use bstr::ByteSlice;
use grep_matcher::{
LineMatchKind, LineTerminator, Match, Matcher, NoCaptures, NoError,
use {
bstr::ByteSlice,
grep_matcher::{
LineMatchKind, LineTerminator, Match, Matcher, NoCaptures, NoError,
},
regex::bytes::{Regex, RegexBuilder},
};
use regex::bytes::{Regex, RegexBuilder};
use crate::searcher::{BinaryDetection, Searcher, SearcherBuilder};
use crate::sink::{Sink, SinkContext, SinkFinish, SinkMatch};
use crate::{
searcher::{BinaryDetection, Searcher, SearcherBuilder},
sink::{Sink, SinkContext, SinkFinish, SinkMatch},
};
/// A simple regex matcher.
///
@ -18,7 +21,7 @@ use crate::sink::{Sink, SinkContext, SinkFinish, SinkMatch};
/// this optimization is detected automatically by inspecting and possibly
/// modifying the regex itself.)
#[derive(Clone, Debug)]
pub struct RegexMatcher {
pub(crate) struct RegexMatcher {
regex: Regex,
line_term: Option<LineTerminator>,
every_line_is_candidate: bool,
@ -26,22 +29,18 @@ pub struct RegexMatcher {
impl RegexMatcher {
/// Create a new regex matcher.
pub fn new(pattern: &str) -> RegexMatcher {
pub(crate) fn new(pattern: &str) -> RegexMatcher {
let regex = RegexBuilder::new(pattern)
.multi_line(true) // permits ^ and $ to match at \n boundaries
.build()
.unwrap();
RegexMatcher {
regex: regex,
line_term: None,
every_line_is_candidate: false,
}
RegexMatcher { regex, line_term: None, every_line_is_candidate: false }
}
/// Forcefully set the line terminator of this matcher.
///
/// By default, this matcher has no line terminator set.
pub fn set_line_term(
pub(crate) fn set_line_term(
&mut self,
line_term: Option<LineTerminator>,
) -> &mut RegexMatcher {
@ -52,7 +51,10 @@ impl RegexMatcher {
/// Whether to return every line as a candidate or not.
///
/// This forces searchers to handle the case of reporting a false positive.
pub fn every_line_is_candidate(&mut self, yes: bool) -> &mut RegexMatcher {
pub(crate) fn every_line_is_candidate(
&mut self,
yes: bool,
) -> &mut RegexMatcher {
self.every_line_is_candidate = yes;
self
}
@ -108,17 +110,17 @@ impl Matcher for RegexMatcher {
/// This is useful for tests because it lets us easily confirm whether data
/// is being passed to Sink correctly.
#[derive(Clone, Debug)]
pub struct KitchenSink(Vec<u8>);
pub(crate) struct KitchenSink(Vec<u8>);
impl KitchenSink {
/// Create a new implementation of Sink that includes everything in the
/// kitchen.
pub fn new() -> KitchenSink {
pub(crate) fn new() -> KitchenSink {
KitchenSink(vec![])
}
/// Return the data written to this sink.
pub fn as_bytes(&self) -> &[u8] {
pub(crate) fn as_bytes(&self) -> &[u8] {
&self.0
}
}
@ -199,7 +201,7 @@ impl Sink for KitchenSink {
/// The tester works by assuming you want to test all pertinent code paths.
/// These can be trimmed down as necessary via the various builder methods.
#[derive(Debug)]
pub struct SearcherTester {
pub(crate) struct SearcherTester {
haystack: String,
pattern: String,
filter: Option<::regex::Regex>,
@ -221,7 +223,7 @@ pub struct SearcherTester {
impl SearcherTester {
/// Create a new tester for testing searchers.
pub fn new(haystack: &str, pattern: &str) -> SearcherTester {
pub(crate) fn new(haystack: &str, pattern: &str) -> SearcherTester {
SearcherTester {
haystack: haystack.to_string(),
pattern: pattern.to_string(),
@ -245,7 +247,7 @@ impl SearcherTester {
/// Execute the test. If the test succeeds, then this returns successfully.
/// If the test fails, then it panics with an informative message.
pub fn test(&self) {
pub(crate) fn test(&self) {
// Check for configuration errors.
if self.expected_no_line_number.is_none() {
panic!("an 'expected' string with NO line numbers must be given");
@ -300,7 +302,7 @@ impl SearcherTester {
/// printf debugging and only want one particular test configuration to
/// execute.
#[allow(dead_code)]
pub fn filter(&mut self, pattern: &str) -> &mut SearcherTester {
pub(crate) fn filter(&mut self, pattern: &str) -> &mut SearcherTester {
self.filter = Some(::regex::Regex::new(pattern).unwrap());
self
}
@ -311,13 +313,13 @@ impl SearcherTester {
/// Note that in order to see these in tests that aren't failing, you'll
/// want to use `cargo test -- --nocapture`.
#[allow(dead_code)]
pub fn print_labels(&mut self, yes: bool) -> &mut SearcherTester {
pub(crate) fn print_labels(&mut self, yes: bool) -> &mut SearcherTester {
self.print_labels = yes;
self
}
/// Set the expected search results, without line numbers.
pub fn expected_no_line_number(
pub(crate) fn expected_no_line_number(
&mut self,
exp: &str,
) -> &mut SearcherTester {
@ -326,7 +328,7 @@ impl SearcherTester {
}
/// Set the expected search results, with line numbers.
pub fn expected_with_line_number(
pub(crate) fn expected_with_line_number(
&mut self,
exp: &str,
) -> &mut SearcherTester {
@ -337,7 +339,7 @@ impl SearcherTester {
/// Set the expected search results, without line numbers, when performing
/// a search on a slice. When not present, `expected_no_line_number` is
/// used instead.
pub fn expected_slice_no_line_number(
pub(crate) fn expected_slice_no_line_number(
&mut self,
exp: &str,
) -> &mut SearcherTester {
@ -349,7 +351,7 @@ impl SearcherTester {
/// search on a slice. When not present, `expected_with_line_number` is
/// used instead.
#[allow(dead_code)]
pub fn expected_slice_with_line_number(
pub(crate) fn expected_slice_with_line_number(
&mut self,
exp: &str,
) -> &mut SearcherTester {
@ -362,7 +364,7 @@ impl SearcherTester {
/// This is enabled by default. When enabled, the string that is expected
/// when line numbers are present must be provided. Otherwise, the expected
/// string isn't required.
pub fn line_number(&mut self, yes: bool) -> &mut SearcherTester {
pub(crate) fn line_number(&mut self, yes: bool) -> &mut SearcherTester {
self.line_number = yes;
self
}
@ -370,7 +372,7 @@ impl SearcherTester {
/// Whether to test search using the line-by-line searcher or not.
///
/// By default, this is enabled.
pub fn by_line(&mut self, yes: bool) -> &mut SearcherTester {
pub(crate) fn by_line(&mut self, yes: bool) -> &mut SearcherTester {
self.by_line = yes;
self
}
@ -379,7 +381,7 @@ impl SearcherTester {
///
/// By default, this is enabled.
#[allow(dead_code)]
pub fn multi_line(&mut self, yes: bool) -> &mut SearcherTester {
pub(crate) fn multi_line(&mut self, yes: bool) -> &mut SearcherTester {
self.multi_line = yes;
self
}
@ -387,7 +389,7 @@ impl SearcherTester {
/// Whether to perform an inverted search or not.
///
/// By default, this is disabled.
pub fn invert_match(&mut self, yes: bool) -> &mut SearcherTester {
pub(crate) fn invert_match(&mut self, yes: bool) -> &mut SearcherTester {
self.invert_match = yes;
self
}
@ -395,7 +397,7 @@ impl SearcherTester {
/// Whether to enable binary detection on all searches.
///
/// By default, this is disabled.
pub fn binary_detection(
pub(crate) fn binary_detection(
&mut self,
detection: BinaryDetection,
) -> &mut SearcherTester {
@ -412,7 +414,10 @@ impl SearcherTester {
/// impact the number of bytes searched when performing binary detection.
/// For convenience, it can be useful to disable the automatic heap limit
/// test.
pub fn auto_heap_limit(&mut self, yes: bool) -> &mut SearcherTester {
pub(crate) fn auto_heap_limit(
&mut self,
yes: bool,
) -> &mut SearcherTester {
self.auto_heap_limit = yes;
self
}
@ -420,7 +425,10 @@ impl SearcherTester {
/// Set the number of lines to include in the "after" context.
///
/// The default is `0`, which is equivalent to not printing any context.
pub fn after_context(&mut self, lines: usize) -> &mut SearcherTester {
pub(crate) fn after_context(
&mut self,
lines: usize,
) -> &mut SearcherTester {
self.after_context = lines;
self
}
@ -428,7 +436,10 @@ impl SearcherTester {
/// Set the number of lines to include in the "before" context.
///
/// The default is `0`, which is equivalent to not printing any context.
pub fn before_context(&mut self, lines: usize) -> &mut SearcherTester {
pub(crate) fn before_context(
&mut self,
lines: usize,
) -> &mut SearcherTester {
self.before_context = lines;
self
}
@ -440,7 +451,7 @@ impl SearcherTester {
/// requesting an unbounded number of before and after contextual lines.
///
/// This is disabled by default.
pub fn passthru(&mut self, yes: bool) -> &mut SearcherTester {
pub(crate) fn passthru(&mut self, yes: bool) -> &mut SearcherTester {
self.passthru = yes;
self
}