1
0
mirror of https://github.com/BurntSushi/ripgrep.git synced 2025-04-19 09:02:15 +02:00

searcher: polish

This updates some dependencies and brings code style in line with my
current practice.
This commit is contained in:
Andrew Gallant 2023-09-28 12:58:11 -04:00
parent e30bbb8cff
commit d53b7310ee
11 changed files with 268 additions and 246 deletions

4
Cargo.lock generated
View File

@ -319,9 +319,9 @@ checksum = "8f232d6ef707e1956a43342693d2a31e72989554d58299d7a88738cc95b0d35c"
[[package]] [[package]]
name = "memmap2" name = "memmap2"
version = "0.5.10" version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "83faa42c0a078c393f6b29d5db232d8be22776a891f8f56e5284faee4a20b327" checksum = "43a5a03cefb0d953ec0be133036f14e109412fa594edc2f77227249db66cc3ed"
dependencies = [ dependencies = [
"libc", "libc",
] ]

View File

@ -11,16 +11,16 @@ repository = "https://github.com/BurntSushi/ripgrep/tree/master/crates/searcher"
readme = "README.md" readme = "README.md"
keywords = ["regex", "grep", "egrep", "search", "pattern"] keywords = ["regex", "grep", "egrep", "search", "pattern"]
license = "Unlicense OR MIT" license = "Unlicense OR MIT"
edition = "2018" edition = "2021"
[dependencies] [dependencies]
bstr = { version = "1.6.0", default-features = false, features = ["std"] } bstr = { version = "1.6.2", default-features = false, features = ["std"] }
encoding_rs = "0.8.14" encoding_rs = "0.8.33"
encoding_rs_io = "0.1.6" encoding_rs_io = "0.1.7"
grep-matcher = { version = "0.1.6", path = "../matcher" } grep-matcher = { version = "0.1.6", path = "../matcher" }
log = "0.4.5" log = "0.4.20"
memchr = "2.6.2" memchr = "2.6.3"
memmap = { package = "memmap2", version = "0.5.3" } memmap = { package = "memmap2", version = "0.8.0" }
[dev-dependencies] [dev-dependencies]
grep-regex = { version = "0.1.11", path = "../regex" } grep-regex = { version = "0.1.11", path = "../regex" }

View File

@ -38,12 +38,12 @@ This example shows how to execute the searcher and read the search results
using the [`UTF8`](sinks::UTF8) implementation of `Sink`. using the [`UTF8`](sinks::UTF8) implementation of `Sink`.
``` ```
use std::error::Error; use {
grep_matcher::Matcher,
use grep_matcher::Matcher; grep_regex::RegexMatcher,
use grep_regex::RegexMatcher; grep_searcher::Searcher,
use grep_searcher::Searcher; grep_searcher::sinks::UTF8,
use grep_searcher::sinks::UTF8; };
const SHERLOCK: &'static [u8] = b"\ const SHERLOCK: &'static [u8] = b"\
For the Doctor Watsons of this world, as opposed to the Sherlock For the Doctor Watsons of this world, as opposed to the Sherlock
@ -54,28 +54,26 @@ but Doctor Watson has to have it taken out for him and dusted,
and exhibited clearly, with a label attached. and exhibited clearly, with a label attached.
"; ";
# fn main() { example().unwrap() } let matcher = RegexMatcher::new(r"Doctor \w+")?;
fn example() -> Result<(), Box<Error>> { let mut matches: Vec<(u64, String)> = vec![];
let matcher = RegexMatcher::new(r"Doctor \w+")?; Searcher::new().search_slice(&matcher, SHERLOCK, UTF8(|lnum, line| {
let mut matches: Vec<(u64, String)> = vec![]; // We are guaranteed to find a match, so the unwrap is OK.
Searcher::new().search_slice(&matcher, SHERLOCK, UTF8(|lnum, line| { let mymatch = matcher.find(line.as_bytes())?.unwrap();
// We are guaranteed to find a match, so the unwrap is OK. matches.push((lnum, line[mymatch].to_string()));
let mymatch = matcher.find(line.as_bytes())?.unwrap(); Ok(true)
matches.push((lnum, line[mymatch].to_string())); }))?;
Ok(true)
}))?;
assert_eq!(matches.len(), 2); assert_eq!(matches.len(), 2);
assert_eq!( assert_eq!(
matches[0], matches[0],
(1, "Doctor Watsons".to_string()) (1, "Doctor Watsons".to_string())
); );
assert_eq!( assert_eq!(
matches[1], matches[1],
(5, "Doctor Watson".to_string()) (5, "Doctor Watson".to_string())
); );
Ok(())
} # Ok::<(), Box<dyn std::error::Error>>(())
``` ```
See also `examples/search-stdin.rs` from the root of this crate's directory See also `examples/search-stdin.rs` from the root of this crate's directory
@ -85,14 +83,16 @@ searches stdin.
#![deny(missing_docs)] #![deny(missing_docs)]
pub use crate::lines::{LineIter, LineStep}; pub use crate::{
pub use crate::searcher::{ lines::{LineIter, LineStep},
BinaryDetection, ConfigError, Encoding, MmapChoice, Searcher, searcher::{
SearcherBuilder, BinaryDetection, ConfigError, Encoding, MmapChoice, Searcher,
}; SearcherBuilder,
pub use crate::sink::sinks; },
pub use crate::sink::{ sink::{
Sink, SinkContext, SinkContextKind, SinkError, SinkFinish, SinkMatch, sinks, Sink, SinkContext, SinkContextKind, SinkError, SinkFinish,
SinkMatch,
},
}; };
#[macro_use] #[macro_use]

View File

@ -1,4 +1,3 @@
use std::cmp;
use std::io; use std::io;
use bstr::ByteSlice; use bstr::ByteSlice;
@ -15,7 +14,7 @@ pub(crate) const DEFAULT_BUFFER_CAPACITY: usize = 64 * (1 << 10); // 64 KB
/// ///
/// The default is to eagerly allocate without a limit. /// The default is to eagerly allocate without a limit.
#[derive(Clone, Copy, Debug)] #[derive(Clone, Copy, Debug)]
pub enum BufferAllocation { pub(crate) enum BufferAllocation {
/// Attempt to expand the size of the buffer until either at least the next /// Attempt to expand the size of the buffer until either at least the next
/// line fits into memory or until all available memory is exhausted. /// line fits into memory or until all available memory is exhausted.
/// ///
@ -35,7 +34,7 @@ impl Default for BufferAllocation {
/// Create a new error to be used when a configured allocation limit has been /// Create a new error to be used when a configured allocation limit has been
/// reached. /// reached.
pub fn alloc_error(limit: usize) -> io::Error { pub(crate) fn alloc_error(limit: usize) -> io::Error {
let msg = format!("configured allocation limit ({}) exceeded", limit); let msg = format!("configured allocation limit ({}) exceeded", limit);
io::Error::new(io::ErrorKind::Other, msg) io::Error::new(io::ErrorKind::Other, msg)
} }
@ -49,7 +48,7 @@ pub fn alloc_error(limit: usize) -> io::Error {
/// using textual patterns. Of course, there are many cases in which this isn't /// using textual patterns. Of course, there are many cases in which this isn't
/// true, which is why binary detection is disabled by default. /// true, which is why binary detection is disabled by default.
#[derive(Clone, Copy, Debug)] #[derive(Clone, Copy, Debug)]
pub enum BinaryDetection { pub(crate) enum BinaryDetection {
/// No binary detection is performed. Data reported by the line buffer may /// No binary detection is performed. Data reported by the line buffer may
/// contain arbitrary bytes. /// contain arbitrary bytes.
None, None,
@ -108,18 +107,18 @@ impl Default for Config {
/// A builder for constructing line buffers. /// A builder for constructing line buffers.
#[derive(Clone, Debug, Default)] #[derive(Clone, Debug, Default)]
pub struct LineBufferBuilder { pub(crate) struct LineBufferBuilder {
config: Config, config: Config,
} }
impl LineBufferBuilder { impl LineBufferBuilder {
/// Create a new builder for a buffer. /// Create a new builder for a buffer.
pub fn new() -> LineBufferBuilder { pub(crate) fn new() -> LineBufferBuilder {
LineBufferBuilder { config: Config::default() } LineBufferBuilder { config: Config::default() }
} }
/// Create a new line buffer from this builder's configuration. /// Create a new line buffer from this builder's configuration.
pub fn build(&self) -> LineBuffer { pub(crate) fn build(&self) -> LineBuffer {
LineBuffer { LineBuffer {
config: self.config, config: self.config,
buf: vec![0; self.config.capacity], buf: vec![0; self.config.capacity],
@ -139,7 +138,10 @@ impl LineBufferBuilder {
/// ///
/// This is set to a reasonable default and probably shouldn't be changed /// This is set to a reasonable default and probably shouldn't be changed
/// unless there's a specific reason to do so. /// unless there's a specific reason to do so.
pub fn capacity(&mut self, capacity: usize) -> &mut LineBufferBuilder { pub(crate) fn capacity(
&mut self,
capacity: usize,
) -> &mut LineBufferBuilder {
self.config.capacity = capacity; self.config.capacity = capacity;
self self
} }
@ -155,7 +157,10 @@ impl LineBufferBuilder {
/// is incomplete. /// is incomplete.
/// ///
/// By default, this is set to `b'\n'`. /// By default, this is set to `b'\n'`.
pub fn line_terminator(&mut self, lineterm: u8) -> &mut LineBufferBuilder { pub(crate) fn line_terminator(
&mut self,
lineterm: u8,
) -> &mut LineBufferBuilder {
self.config.lineterm = lineterm; self.config.lineterm = lineterm;
self self
} }
@ -174,7 +179,7 @@ impl LineBufferBuilder {
/// a value of `0` is sensible, and in particular, will guarantee that a /// a value of `0` is sensible, and in particular, will guarantee that a
/// line buffer will never allocate additional memory beyond its initial /// line buffer will never allocate additional memory beyond its initial
/// capacity. /// capacity.
pub fn buffer_alloc( pub(crate) fn buffer_alloc(
&mut self, &mut self,
behavior: BufferAllocation, behavior: BufferAllocation,
) -> &mut LineBufferBuilder { ) -> &mut LineBufferBuilder {
@ -188,7 +193,7 @@ impl LineBufferBuilder {
/// ///
/// By default, this is disabled. In general, binary detection should be /// By default, this is disabled. In general, binary detection should be
/// viewed as an imperfect heuristic. /// viewed as an imperfect heuristic.
pub fn binary_detection( pub(crate) fn binary_detection(
&mut self, &mut self,
detection: BinaryDetection, detection: BinaryDetection,
) -> &mut LineBufferBuilder { ) -> &mut LineBufferBuilder {
@ -200,7 +205,7 @@ impl LineBufferBuilder {
/// A line buffer reader efficiently reads a line oriented buffer from an /// A line buffer reader efficiently reads a line oriented buffer from an
/// arbitrary reader. /// arbitrary reader.
#[derive(Debug)] #[derive(Debug)]
pub struct LineBufferReader<'b, R> { pub(crate) struct LineBufferReader<'b, R> {
rdr: R, rdr: R,
line_buffer: &'b mut LineBuffer, line_buffer: &'b mut LineBuffer,
} }
@ -211,7 +216,7 @@ impl<'b, R: io::Read> LineBufferReader<'b, R> {
/// ///
/// This does not change the binary detection behavior of the given line /// This does not change the binary detection behavior of the given line
/// buffer. /// buffer.
pub fn new( pub(crate) fn new(
rdr: R, rdr: R,
line_buffer: &'b mut LineBuffer, line_buffer: &'b mut LineBuffer,
) -> LineBufferReader<'b, R> { ) -> LineBufferReader<'b, R> {
@ -225,13 +230,13 @@ impl<'b, R: io::Read> LineBufferReader<'b, R> {
/// correspond to an offset in memory. It is typically used for reporting /// correspond to an offset in memory. It is typically used for reporting
/// purposes. It can also be used for counting the number of bytes that /// purposes. It can also be used for counting the number of bytes that
/// have been searched. /// have been searched.
pub fn absolute_byte_offset(&self) -> u64 { pub(crate) fn absolute_byte_offset(&self) -> u64 {
self.line_buffer.absolute_byte_offset() self.line_buffer.absolute_byte_offset()
} }
/// If binary data was detected, then this returns the absolute byte offset /// If binary data was detected, then this returns the absolute byte offset
/// at which binary data was initially found. /// at which binary data was initially found.
pub fn binary_byte_offset(&self) -> Option<u64> { pub(crate) fn binary_byte_offset(&self) -> Option<u64> {
self.line_buffer.binary_byte_offset() self.line_buffer.binary_byte_offset()
} }
@ -248,25 +253,25 @@ impl<'b, R: io::Read> LineBufferReader<'b, R> {
/// This forwards any errors returned by the underlying reader, and will /// This forwards any errors returned by the underlying reader, and will
/// also return an error if the buffer must be expanded past its allocation /// also return an error if the buffer must be expanded past its allocation
/// limit, as governed by the buffer allocation strategy. /// limit, as governed by the buffer allocation strategy.
pub fn fill(&mut self) -> Result<bool, io::Error> { pub(crate) fn fill(&mut self) -> Result<bool, io::Error> {
self.line_buffer.fill(&mut self.rdr) self.line_buffer.fill(&mut self.rdr)
} }
/// Return the contents of this buffer. /// Return the contents of this buffer.
pub fn buffer(&self) -> &[u8] { pub(crate) fn buffer(&self) -> &[u8] {
self.line_buffer.buffer() self.line_buffer.buffer()
} }
/// Return the buffer as a BStr, used for convenient equality checking /// Return the buffer as a BStr, used for convenient equality checking
/// in tests only. /// in tests only.
#[cfg(test)] #[cfg(test)]
fn bstr(&self) -> &::bstr::BStr { fn bstr(&self) -> &bstr::BStr {
self.buffer().as_bstr() self.buffer().as_bstr()
} }
/// Consume the number of bytes provided. This must be less than or equal /// Consume the number of bytes provided. This must be less than or equal
/// to the number of bytes returned by `buffer`. /// to the number of bytes returned by `buffer`.
pub fn consume(&mut self, amt: usize) { pub(crate) fn consume(&mut self, amt: usize) {
self.line_buffer.consume(amt); self.line_buffer.consume(amt);
} }
@ -286,7 +291,7 @@ impl<'b, R: io::Read> LineBufferReader<'b, R> {
/// Line buffers cannot be used directly, but instead must be used via the /// Line buffers cannot be used directly, but instead must be used via the
/// LineBufferReader. /// LineBufferReader.
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub struct LineBuffer { pub(crate) struct LineBuffer {
/// The configuration of this buffer. /// The configuration of this buffer.
config: Config, config: Config,
/// The primary buffer with which to hold data. /// The primary buffer with which to hold data.
@ -322,7 +327,7 @@ impl LineBuffer {
/// ///
/// This permits dynamically changing the binary detection strategy on /// This permits dynamically changing the binary detection strategy on
/// an existing line buffer without needing to create a new one. /// an existing line buffer without needing to create a new one.
pub fn set_binary_detection(&mut self, binary: BinaryDetection) { pub(crate) fn set_binary_detection(&mut self, binary: BinaryDetection) {
self.config.binary = binary; self.config.binary = binary;
} }
@ -497,12 +502,12 @@ impl LineBuffer {
} }
// `len` is used for computing the next allocation size. The capacity // `len` is used for computing the next allocation size. The capacity
// is permitted to start at `0`, so we make sure it's at least `1`. // is permitted to start at `0`, so we make sure it's at least `1`.
let len = cmp::max(1, self.buf.len()); let len = std::cmp::max(1, self.buf.len());
let additional = match self.config.buffer_alloc { let additional = match self.config.buffer_alloc {
BufferAllocation::Eager => len * 2, BufferAllocation::Eager => len * 2,
BufferAllocation::Error(limit) => { BufferAllocation::Error(limit) => {
let used = self.buf.len() - self.config.capacity; let used = self.buf.len() - self.config.capacity;
let n = cmp::min(len * 2, limit - used); let n = std::cmp::min(len * 2, limit - used);
if n == 0 { if n == 0 {
return Err(alloc_error(self.config.capacity + limit)); return Err(alloc_error(self.config.capacity + limit));
} }
@ -541,9 +546,9 @@ fn replace_bytes(bytes: &mut [u8], src: u8, replacement: u8) -> Option<usize> {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*;
use bstr::{ByteSlice, ByteVec}; use bstr::{ByteSlice, ByteVec};
use std::str;
use super::*;
const SHERLOCK: &'static str = "\ const SHERLOCK: &'static str = "\
For the Doctor Watsons of this world, as opposed to the Sherlock For the Doctor Watsons of this world, as opposed to the Sherlock

View File

@ -2,8 +2,10 @@
A collection of routines for performing operations on lines. A collection of routines for performing operations on lines.
*/ */
use bstr::ByteSlice; use {
use grep_matcher::{LineTerminator, Match}; bstr::ByteSlice,
grep_matcher::{LineTerminator, Match},
};
/// An iterator over lines in a particular slice of bytes. /// An iterator over lines in a particular slice of bytes.
/// ///
@ -21,10 +23,8 @@ impl<'b> LineIter<'b> {
/// Create a new line iterator that yields lines in the given bytes that /// Create a new line iterator that yields lines in the given bytes that
/// are terminated by `line_term`. /// are terminated by `line_term`.
pub fn new(line_term: u8, bytes: &'b [u8]) -> LineIter<'b> { pub fn new(line_term: u8, bytes: &'b [u8]) -> LineIter<'b> {
LineIter { let stepper = LineStep::new(line_term, 0, bytes.len());
bytes: bytes, LineIter { bytes, stepper }
stepper: LineStep::new(line_term, 0, bytes.len()),
}
} }
} }
@ -61,7 +61,7 @@ impl LineStep {
/// ///
/// This panics if `start` is not less than or equal to `end`. /// This panics if `start` is not less than or equal to `end`.
pub fn new(line_term: u8, start: usize, end: usize) -> LineStep { pub fn new(line_term: u8, start: usize, end: usize) -> LineStep {
LineStep { line_term, pos: start, end: end } LineStep { line_term, pos: start, end }
} }
/// Return the start and end position of the next line in the given bytes. /// Return the start and end position of the next line in the given bytes.
@ -108,14 +108,17 @@ impl LineStep {
} }
/// Count the number of occurrences of `line_term` in `bytes`. /// Count the number of occurrences of `line_term` in `bytes`.
pub fn count(bytes: &[u8], line_term: u8) -> u64 { pub(crate) fn count(bytes: &[u8], line_term: u8) -> u64 {
memchr::memchr_iter(line_term, bytes).count() as u64 memchr::memchr_iter(line_term, bytes).count() as u64
} }
/// Given a line that possibly ends with a terminator, return that line without /// Given a line that possibly ends with a terminator, return that line without
/// the terminator. /// the terminator.
#[inline(always)] #[inline(always)]
pub fn without_terminator(bytes: &[u8], line_term: LineTerminator) -> &[u8] { pub(crate) fn without_terminator(
bytes: &[u8],
line_term: LineTerminator,
) -> &[u8] {
let line_term = line_term.as_bytes(); let line_term = line_term.as_bytes();
let start = bytes.len().saturating_sub(line_term.len()); let start = bytes.len().saturating_sub(line_term.len());
if bytes.get(start..) == Some(line_term) { if bytes.get(start..) == Some(line_term) {
@ -129,7 +132,7 @@ pub fn without_terminator(bytes: &[u8], line_term: LineTerminator) -> &[u8] {
/// ///
/// Line terminators are considered part of the line they terminate. /// Line terminators are considered part of the line they terminate.
#[inline(always)] #[inline(always)]
pub fn locate(bytes: &[u8], line_term: u8, range: Match) -> Match { pub(crate) fn locate(bytes: &[u8], line_term: u8, range: Match) -> Match {
let line_start = let line_start =
bytes[..range.start()].rfind_byte(line_term).map_or(0, |i| i + 1); bytes[..range.start()].rfind_byte(line_term).map_or(0, |i| i + 1);
let line_end = let line_end =
@ -151,7 +154,7 @@ pub fn locate(bytes: &[u8], line_term: u8, range: Match) -> Match {
/// ///
/// If `bytes` ends with a line terminator, then the terminator itself is /// If `bytes` ends with a line terminator, then the terminator itself is
/// considered part of the last line. /// considered part of the last line.
pub fn preceding(bytes: &[u8], line_term: u8, count: usize) -> usize { pub(crate) fn preceding(bytes: &[u8], line_term: u8, count: usize) -> usize {
preceding_by_pos(bytes, bytes.len(), line_term, count) preceding_by_pos(bytes, bytes.len(), line_term, count)
} }
@ -195,10 +198,9 @@ fn preceding_by_pos(
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*;
use grep_matcher::Match; use grep_matcher::Match;
use std::ops::Range;
use std::str; use super::*;
const SHERLOCK: &'static str = "\ const SHERLOCK: &'static str = "\
For the Doctor Watsons of this world, as opposed to the Sherlock For the Doctor Watsons of this world, as opposed to the Sherlock
@ -222,7 +224,7 @@ and exhibited clearly, with a label attached.\
results results
} }
fn line_ranges(text: &str) -> Vec<Range<usize>> { fn line_ranges(text: &str) -> Vec<std::ops::Range<usize>> {
let mut results = vec![]; let mut results = vec![];
let mut it = LineStep::new(b'\n', 0, text.len()); let mut it = LineStep::new(b'\n', 0, text.len());
while let Some(m) = it.next_match(text.as_bytes()) { while let Some(m) = it.next_match(text.as_bytes()) {

View File

@ -1,15 +1,16 @@
use std::cmp;
use bstr::ByteSlice; use bstr::ByteSlice;
use crate::line_buffer::BinaryDetection;
use crate::lines::{self, LineStep};
use crate::searcher::{Config, Range, Searcher};
use crate::sink::{
Sink, SinkContext, SinkContextKind, SinkError, SinkFinish, SinkMatch,
};
use grep_matcher::{LineMatchKind, Matcher}; use grep_matcher::{LineMatchKind, Matcher};
use crate::{
line_buffer::BinaryDetection,
lines::{self, LineStep},
searcher::{Config, Range, Searcher},
sink::{
Sink, SinkContext, SinkContextKind, SinkError, SinkFinish, SinkMatch,
},
};
enum FastMatchResult { enum FastMatchResult {
Continue, Continue,
Stop, Stop,
@ -17,7 +18,7 @@ enum FastMatchResult {
} }
#[derive(Debug)] #[derive(Debug)]
pub struct Core<'s, M: 's, S> { pub(crate) struct Core<'s, M: 's, S> {
config: &'s Config, config: &'s Config,
matcher: M, matcher: M,
searcher: &'s Searcher, searcher: &'s Searcher,
@ -35,7 +36,7 @@ pub struct Core<'s, M: 's, S> {
} }
impl<'s, M: Matcher, S: Sink> Core<'s, M, S> { impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
pub fn new( pub(crate) fn new(
searcher: &'s Searcher, searcher: &'s Searcher,
matcher: M, matcher: M,
sink: S, sink: S,
@ -45,14 +46,14 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
if searcher.config.line_number { Some(1) } else { None }; if searcher.config.line_number { Some(1) } else { None };
let core = Core { let core = Core {
config: &searcher.config, config: &searcher.config,
matcher: matcher, matcher,
searcher: searcher, searcher,
sink: sink, sink,
binary: binary, binary,
pos: 0, pos: 0,
absolute_byte_offset: 0, absolute_byte_offset: 0,
binary_byte_offset: None, binary_byte_offset: None,
line_number: line_number, line_number,
last_line_counted: 0, last_line_counted: 0,
last_line_visited: 0, last_line_visited: 0,
after_context_left: 0, after_context_left: 0,
@ -69,23 +70,23 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
core core
} }
pub fn pos(&self) -> usize { pub(crate) fn pos(&self) -> usize {
self.pos self.pos
} }
pub fn set_pos(&mut self, pos: usize) { pub(crate) fn set_pos(&mut self, pos: usize) {
self.pos = pos; self.pos = pos;
} }
pub fn binary_byte_offset(&self) -> Option<u64> { pub(crate) fn binary_byte_offset(&self) -> Option<u64> {
self.binary_byte_offset.map(|offset| offset as u64) self.binary_byte_offset.map(|offset| offset as u64)
} }
pub fn matcher(&self) -> &M { pub(crate) fn matcher(&self) -> &M {
&self.matcher &self.matcher
} }
pub fn matched( pub(crate) fn matched(
&mut self, &mut self,
buf: &[u8], buf: &[u8],
range: &Range, range: &Range,
@ -93,18 +94,18 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
self.sink_matched(buf, range) self.sink_matched(buf, range)
} }
pub fn binary_data( pub(crate) fn binary_data(
&mut self, &mut self,
binary_byte_offset: u64, binary_byte_offset: u64,
) -> Result<bool, S::Error> { ) -> Result<bool, S::Error> {
self.sink.binary_data(&self.searcher, binary_byte_offset) self.sink.binary_data(&self.searcher, binary_byte_offset)
} }
pub fn begin(&mut self) -> Result<bool, S::Error> { pub(crate) fn begin(&mut self) -> Result<bool, S::Error> {
self.sink.begin(&self.searcher) self.sink.begin(&self.searcher)
} }
pub fn finish( pub(crate) fn finish(
&mut self, &mut self,
byte_count: u64, byte_count: u64,
binary_byte_offset: Option<u64>, binary_byte_offset: Option<u64>,
@ -115,7 +116,10 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
) )
} }
pub fn match_by_line(&mut self, buf: &[u8]) -> Result<bool, S::Error> { pub(crate) fn match_by_line(
&mut self,
buf: &[u8],
) -> Result<bool, S::Error> {
if self.is_line_by_line_fast() { if self.is_line_by_line_fast() {
match self.match_by_line_fast(buf)? { match self.match_by_line_fast(buf)? {
FastMatchResult::SwitchToSlow => self.match_by_line_slow(buf), FastMatchResult::SwitchToSlow => self.match_by_line_slow(buf),
@ -127,7 +131,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
} }
} }
pub fn roll(&mut self, buf: &[u8]) -> usize { pub(crate) fn roll(&mut self, buf: &[u8]) -> usize {
let consumed = if self.config.max_context() == 0 { let consumed = if self.config.max_context() == 0 {
buf.len() buf.len()
} else { } else {
@ -141,7 +145,8 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
self.config.line_term.as_byte(), self.config.line_term.as_byte(),
self.config.max_context(), self.config.max_context(),
); );
let consumed = cmp::max(context_start, self.last_line_visited); let consumed =
std::cmp::max(context_start, self.last_line_visited);
consumed consumed
}; };
self.count_lines(buf, consumed); self.count_lines(buf, consumed);
@ -152,7 +157,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
consumed consumed
} }
pub fn detect_binary( pub(crate) fn detect_binary(
&mut self, &mut self,
buf: &[u8], buf: &[u8],
range: &Range, range: &Range,
@ -177,7 +182,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
} }
} }
pub fn before_context_by_line( pub(crate) fn before_context_by_line(
&mut self, &mut self,
buf: &[u8], buf: &[u8],
upto: usize, upto: usize,
@ -213,7 +218,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
Ok(true) Ok(true)
} }
pub fn after_context_by_line( pub(crate) fn after_context_by_line(
&mut self, &mut self,
buf: &[u8], buf: &[u8],
upto: usize, upto: usize,
@ -238,7 +243,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
Ok(true) Ok(true)
} }
pub fn other_context_by_line( pub(crate) fn other_context_by_line(
&mut self, &mut self,
buf: &[u8], buf: &[u8],
upto: usize, upto: usize,

View File

@ -1,16 +1,14 @@
use std::cmp;
use std::io;
use crate::line_buffer::{LineBufferReader, DEFAULT_BUFFER_CAPACITY};
use crate::lines::{self, LineStep};
use crate::sink::{Sink, SinkError};
use grep_matcher::Matcher; use grep_matcher::Matcher;
use crate::searcher::core::Core; use crate::{
use crate::searcher::{Config, Range, Searcher}; line_buffer::{LineBufferReader, DEFAULT_BUFFER_CAPACITY},
lines::{self, LineStep},
searcher::{core::Core, Config, Range, Searcher},
sink::{Sink, SinkError},
};
#[derive(Debug)] #[derive(Debug)]
pub struct ReadByLine<'s, M, R, S> { pub(crate) struct ReadByLine<'s, M, R, S> {
config: &'s Config, config: &'s Config,
core: Core<'s, M, S>, core: Core<'s, M, S>,
rdr: LineBufferReader<'s, R>, rdr: LineBufferReader<'s, R>,
@ -19,10 +17,10 @@ pub struct ReadByLine<'s, M, R, S> {
impl<'s, M, R, S> ReadByLine<'s, M, R, S> impl<'s, M, R, S> ReadByLine<'s, M, R, S>
where where
M: Matcher, M: Matcher,
R: io::Read, R: std::io::Read,
S: Sink, S: Sink,
{ {
pub fn new( pub(crate) fn new(
searcher: &'s Searcher, searcher: &'s Searcher,
matcher: M, matcher: M,
read_from: LineBufferReader<'s, R>, read_from: LineBufferReader<'s, R>,
@ -37,7 +35,7 @@ where
} }
} }
pub fn run(mut self) -> Result<(), S::Error> { pub(crate) fn run(mut self) -> Result<(), S::Error> {
if self.core.begin()? { if self.core.begin()? {
while self.fill()? && self.core.match_by_line(self.rdr.buffer())? { while self.fill()? && self.core.match_by_line(self.rdr.buffer())? {
} }
@ -87,13 +85,13 @@ where
} }
#[derive(Debug)] #[derive(Debug)]
pub struct SliceByLine<'s, M, S> { pub(crate) struct SliceByLine<'s, M, S> {
core: Core<'s, M, S>, core: Core<'s, M, S>,
slice: &'s [u8], slice: &'s [u8],
} }
impl<'s, M: Matcher, S: Sink> SliceByLine<'s, M, S> { impl<'s, M: Matcher, S: Sink> SliceByLine<'s, M, S> {
pub fn new( pub(crate) fn new(
searcher: &'s Searcher, searcher: &'s Searcher,
matcher: M, matcher: M,
slice: &'s [u8], slice: &'s [u8],
@ -103,14 +101,14 @@ impl<'s, M: Matcher, S: Sink> SliceByLine<'s, M, S> {
SliceByLine { SliceByLine {
core: Core::new(searcher, matcher, write_to, true), core: Core::new(searcher, matcher, write_to, true),
slice: slice, slice,
} }
} }
pub fn run(mut self) -> Result<(), S::Error> { pub(crate) fn run(mut self) -> Result<(), S::Error> {
if self.core.begin()? { if self.core.begin()? {
let binary_upto = let binary_upto =
cmp::min(self.slice.len(), DEFAULT_BUFFER_CAPACITY); std::cmp::min(self.slice.len(), DEFAULT_BUFFER_CAPACITY);
let binary_range = Range::new(0, binary_upto); let binary_range = Range::new(0, binary_upto);
if !self.core.detect_binary(self.slice, &binary_range)? { if !self.core.detect_binary(self.slice, &binary_range)? {
while !self.slice[self.core.pos()..].is_empty() while !self.slice[self.core.pos()..].is_empty()
@ -132,7 +130,7 @@ impl<'s, M: Matcher, S: Sink> SliceByLine<'s, M, S> {
} }
#[derive(Debug)] #[derive(Debug)]
pub struct MultiLine<'s, M, S> { pub(crate) struct MultiLine<'s, M, S> {
config: &'s Config, config: &'s Config,
core: Core<'s, M, S>, core: Core<'s, M, S>,
slice: &'s [u8], slice: &'s [u8],
@ -140,7 +138,7 @@ pub struct MultiLine<'s, M, S> {
} }
impl<'s, M: Matcher, S: Sink> MultiLine<'s, M, S> { impl<'s, M: Matcher, S: Sink> MultiLine<'s, M, S> {
pub fn new( pub(crate) fn new(
searcher: &'s Searcher, searcher: &'s Searcher,
matcher: M, matcher: M,
slice: &'s [u8], slice: &'s [u8],
@ -151,15 +149,15 @@ impl<'s, M: Matcher, S: Sink> MultiLine<'s, M, S> {
MultiLine { MultiLine {
config: &searcher.config, config: &searcher.config,
core: Core::new(searcher, matcher, write_to, true), core: Core::new(searcher, matcher, write_to, true),
slice: slice, slice,
last_match: None, last_match: None,
} }
} }
pub fn run(mut self) -> Result<(), S::Error> { pub(crate) fn run(mut self) -> Result<(), S::Error> {
if self.core.begin()? { if self.core.begin()? {
let binary_upto = let binary_upto =
cmp::min(self.slice.len(), DEFAULT_BUFFER_CAPACITY); std::cmp::min(self.slice.len(), DEFAULT_BUFFER_CAPACITY);
let binary_range = Range::new(0, binary_upto); let binary_range = Range::new(0, binary_upto);
if !self.core.detect_binary(self.slice, &binary_range)? { if !self.core.detect_binary(self.slice, &binary_range)? {
let mut keepgoing = true; let mut keepgoing = true;
@ -347,8 +345,10 @@ impl<'s, M: Matcher, S: Sink> MultiLine<'s, M, S> {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use crate::searcher::{BinaryDetection, SearcherBuilder}; use crate::{
use crate::testutil::{KitchenSink, RegexMatcher, SearcherTester}; searcher::{BinaryDetection, SearcherBuilder},
testutil::{KitchenSink, RegexMatcher, SearcherTester},
};
use super::*; use super::*;

View File

@ -1,5 +1,4 @@
use std::fs::File; use std::{fs::File, path::Path};
use std::path::Path;
use memmap::Mmap; use memmap::Mmap;

View File

@ -1,19 +1,25 @@
use std::cell::RefCell; use std::{
use std::cmp; cell::RefCell,
use std::fmt; cmp,
use std::fs::File; fs::File,
use std::io::{self, Read}; io::{self, Read},
use std::path::Path; path::Path,
};
use crate::line_buffer::{
self, alloc_error, BufferAllocation, LineBuffer, LineBufferBuilder, use {
LineBufferReader, DEFAULT_BUFFER_CAPACITY, encoding_rs,
encoding_rs_io::DecodeReaderBytesBuilder,
grep_matcher::{LineTerminator, Match, Matcher},
};
use crate::{
line_buffer::{
self, alloc_error, BufferAllocation, LineBuffer, LineBufferBuilder,
LineBufferReader, DEFAULT_BUFFER_CAPACITY,
},
searcher::glue::{MultiLine, ReadByLine, SliceByLine},
sink::{Sink, SinkError},
}; };
use crate::searcher::glue::{MultiLine, ReadByLine, SliceByLine};
use crate::sink::{Sink, SinkError};
use encoding_rs;
use encoding_rs_io::DecodeReaderBytesBuilder;
use grep_matcher::{LineTerminator, Match, Matcher};
pub use self::mmap::MmapChoice; pub use self::mmap::MmapChoice;
@ -232,6 +238,7 @@ impl Config {
/// This error occurs when a non-sensical configuration is present when trying /// This error occurs when a non-sensical configuration is present when trying
/// to construct a `Searcher` from a `SearcherBuilder`. /// to construct a `Searcher` from a `SearcherBuilder`.
#[derive(Clone, Debug, Eq, PartialEq)] #[derive(Clone, Debug, Eq, PartialEq)]
#[non_exhaustive]
pub enum ConfigError { pub enum ConfigError {
/// Indicates that the heap limit configuration prevents all possible /// Indicates that the heap limit configuration prevents all possible
/// search strategies from being used. For example, if the heap limit is /// search strategies from being used. For example, if the heap limit is
@ -250,23 +257,12 @@ pub enum ConfigError {
/// The provided encoding label that could not be found. /// The provided encoding label that could not be found.
label: Vec<u8>, label: Vec<u8>,
}, },
/// Hints that destructuring should not be exhaustive.
///
/// This enum may grow additional variants, so this makes sure clients
/// don't count on exhaustive matching. (Otherwise, adding a new variant
/// could break existing code.)
#[doc(hidden)]
__Nonexhaustive,
} }
impl ::std::error::Error for ConfigError { impl std::error::Error for ConfigError {}
fn description(&self) -> &str {
"grep-searcher configuration error"
}
}
impl fmt::Display for ConfigError { impl std::fmt::Display for ConfigError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match *self { match *self {
ConfigError::SearchUnavailable => { ConfigError::SearchUnavailable => {
write!(f, "grep config error: no available searchers") write!(f, "grep config error: no available searchers")
@ -284,7 +280,6 @@ impl fmt::Display for ConfigError {
"grep config error: unknown encoding: {}", "grep config error: unknown encoding: {}",
String::from_utf8_lossy(label), String::from_utf8_lossy(label),
), ),
_ => panic!("BUG: unexpected variant found"),
} }
} }
} }
@ -331,8 +326,8 @@ impl SearcherBuilder {
.bom_sniffing(self.config.bom_sniffing); .bom_sniffing(self.config.bom_sniffing);
Searcher { Searcher {
config: config, config,
decode_builder: decode_builder, decode_builder,
decode_buffer: RefCell::new(vec![0; 8 * (1 << 10)]), decode_buffer: RefCell::new(vec![0; 8 * (1 << 10)]),
line_buffer: RefCell::new(self.config.line_buffer()), line_buffer: RefCell::new(self.config.line_buffer()),
multi_line_buffer: RefCell::new(vec![]), multi_line_buffer: RefCell::new(vec![]),
@ -676,9 +671,9 @@ impl Searcher {
log::trace!("{:?}: searching via memory map", path); log::trace!("{:?}: searching via memory map", path);
return self.search_slice(matcher, &mmap, write_to); return self.search_slice(matcher, &mmap, write_to);
} }
// Fast path for multi-line searches of files when memory maps are // Fast path for multi-line searches of files when memory maps are not
// not enabled. This pre-allocates a buffer roughly the size of the // enabled. This pre-allocates a buffer roughly the size of the file,
// file, which isn't possible when searching an arbitrary io::Read. // which isn't possible when searching an arbitrary std::io::Read.
if self.multi_line_with_matcher(&matcher) { if self.multi_line_with_matcher(&matcher) {
log::trace!( log::trace!(
"{:?}: reading entire file on to heap for mulitline", "{:?}: reading entire file on to heap for mulitline",
@ -699,8 +694,8 @@ impl Searcher {
} }
} }
/// Execute a search over any implementation of `io::Read` and write the /// Execute a search over any implementation of `std::io::Read` and write
/// results to the given sink. /// the results to the given sink.
/// ///
/// When possible, this implementation will search the reader incrementally /// When possible, this implementation will search the reader incrementally
/// without reading it into memory. In some cases---for example, if multi /// without reading it into memory. In some cases---for example, if multi
@ -1016,9 +1011,10 @@ fn slice_has_bom(slice: &[u8]) -> bool {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*;
use crate::testutil::{KitchenSink, RegexMatcher}; use crate::testutil::{KitchenSink, RegexMatcher};
use super::*;
#[test] #[test]
fn config_error_heap_limit() { fn config_error_heap_limit() {
let matcher = RegexMatcher::new(""); let matcher = RegexMatcher::new("");

View File

@ -1,23 +1,24 @@
use std::error;
use std::fmt;
use std::io; use std::io;
use grep_matcher::LineTerminator; use grep_matcher::LineTerminator;
use crate::lines::LineIter; use crate::{
use crate::searcher::{ConfigError, Searcher}; lines::LineIter,
searcher::{ConfigError, Searcher},
};
/// A trait that describes errors that can be reported by searchers and /// A trait that describes errors that can be reported by searchers and
/// implementations of `Sink`. /// implementations of `Sink`.
/// ///
/// Unless you have a specialized use case, you probably don't need to /// Unless you have a specialized use case, you probably don't need to
/// implement this trait explicitly. It's likely that using `io::Error` (which /// implement this trait explicitly. It's likely that using `std::io::Error`
/// implements this trait) for your error type is good enough, largely because /// (which implements this trait) for your error type is good enough,
/// most errors that occur during search will likely be an `io::Error`. /// largely because most errors that occur during search will likely be an
/// `std::io::Error`.
pub trait SinkError: Sized { pub trait SinkError: Sized {
/// A constructor for converting any value that satisfies the /// A constructor for converting any value that satisfies the
/// `fmt::Display` trait into an error. /// `std::fmt::Display` trait into an error.
fn error_message<T: fmt::Display>(message: T) -> Self; fn error_message<T: std::fmt::Display>(message: T) -> Self;
/// A constructor for converting I/O errors that occur while searching into /// A constructor for converting I/O errors that occur while searching into
/// an error of this type. /// an error of this type.
@ -36,10 +37,10 @@ pub trait SinkError: Sized {
} }
} }
/// An `io::Error` can be used as an error for `Sink` implementations out of /// An `std::io::Error` can be used as an error for `Sink` implementations out
/// the box. /// of the box.
impl SinkError for io::Error { impl SinkError for io::Error {
fn error_message<T: fmt::Display>(message: T) -> io::Error { fn error_message<T: std::fmt::Display>(message: T) -> io::Error {
io::Error::new(io::ErrorKind::Other, message.to_string()) io::Error::new(io::ErrorKind::Other, message.to_string())
} }
@ -48,11 +49,13 @@ impl SinkError for io::Error {
} }
} }
/// A `Box<std::error::Error>` can be used as an error for `Sink` /// A `Box<dyn std::error::Error>` can be used as an error for `Sink`
/// implementations out of the box. /// implementations out of the box.
impl SinkError for Box<dyn error::Error> { impl SinkError for Box<dyn std::error::Error> {
fn error_message<T: fmt::Display>(message: T) -> Box<dyn error::Error> { fn error_message<T: std::fmt::Display>(
Box::<dyn error::Error>::from(message.to_string()) message: T,
) -> Box<dyn std::error::Error> {
Box::<dyn std::error::Error>::from(message.to_string())
} }
} }
@ -74,7 +77,7 @@ impl SinkError for Box<dyn error::Error> {
/// ///
/// * What to do when a match is found. Callers must provide this. /// * What to do when a match is found. Callers must provide this.
/// * What to do when an error occurs. Callers must provide this via the /// * What to do when an error occurs. Callers must provide this via the
/// [`SinkError`] trait. Generally, callers can just use `io::Error` for /// [`SinkError`] trait. Generally, callers can just use `std::io::Error` for
/// this, which already implements `SinkError`. /// this, which already implements `SinkError`.
/// * What to do when a contextual line is found. By default, these are /// * What to do when a contextual line is found. By default, these are
/// ignored. /// ignored.
@ -408,13 +411,14 @@ impl<'b> SinkMatch<'b> {
self.line_number self.line_number
} }
/// TODO /// Exposes as much of the underlying buffer that was search as possible.
#[inline] #[inline]
pub fn buffer(&self) -> &'b [u8] { pub fn buffer(&self) -> &'b [u8] {
self.buffer self.buffer
} }
/// TODO /// Returns a range that corresponds to where [`SinkMatch::bytes`] appears
/// in [`SinkMatch::buffer`].
#[inline] #[inline]
pub fn bytes_range_in_buffer(&self) -> std::ops::Range<usize> { pub fn bytes_range_in_buffer(&self) -> std::ops::Range<usize> {
self.bytes_range_in_buffer.clone() self.bytes_range_in_buffer.clone()
@ -506,16 +510,16 @@ impl<'b> SinkContext<'b> {
/// an error is reported at the first match and searching stops. /// an error is reported at the first match and searching stops.
/// * Context lines, context breaks and summary data reported at the end of /// * Context lines, context breaks and summary data reported at the end of
/// a search are all ignored. /// a search are all ignored.
/// * Implementors are forced to use `io::Error` as their error type. /// * Implementors are forced to use `std::io::Error` as their error type.
/// ///
/// If you need more flexibility, then you're advised to implement the `Sink` /// If you need more flexibility, then you're advised to implement the `Sink`
/// trait directly. /// trait directly.
pub mod sinks { pub mod sinks {
use std::io; use std::io;
use std::str;
use crate::searcher::Searcher;
use super::{Sink, SinkError, SinkMatch}; use super::{Sink, SinkError, SinkMatch};
use crate::searcher::Searcher;
/// A sink that provides line numbers and matches as strings while ignoring /// A sink that provides line numbers and matches as strings while ignoring
/// everything else. /// everything else.
@ -527,8 +531,8 @@ pub mod sinks {
/// ///
/// The closure accepts two parameters: a line number and a UTF-8 string /// The closure accepts two parameters: a line number and a UTF-8 string
/// containing the matched data. The closure returns a /// containing the matched data. The closure returns a
/// `Result<bool, io::Error>`. If the `bool` is `false`, then the search /// `Result<bool, std::io::Error>`. If the `bool` is `false`, then the
/// stops immediately. Otherwise, searching continues. /// search stops immediately. Otherwise, searching continues.
/// ///
/// If multi line mode was enabled, the line number refers to the line /// If multi line mode was enabled, the line number refers to the line
/// number of the first line in the match. /// number of the first line in the match.
@ -548,7 +552,7 @@ pub mod sinks {
_searcher: &Searcher, _searcher: &Searcher,
mat: &SinkMatch<'_>, mat: &SinkMatch<'_>,
) -> Result<bool, io::Error> { ) -> Result<bool, io::Error> {
let matched = match str::from_utf8(mat.bytes()) { let matched = match std::str::from_utf8(mat.bytes()) {
Ok(matched) => matched, Ok(matched) => matched,
Err(err) => return Err(io::Error::error_message(err)), Err(err) => return Err(io::Error::error_message(err)),
}; };
@ -575,8 +579,8 @@ pub mod sinks {
/// ///
/// The closure accepts two parameters: a line number and a UTF-8 string /// The closure accepts two parameters: a line number and a UTF-8 string
/// containing the matched data. The closure returns a /// containing the matched data. The closure returns a
/// `Result<bool, io::Error>`. If the `bool` is `false`, then the search /// `Result<bool, std::io::Error>`. If the `bool` is `false`, then the
/// stops immediately. Otherwise, searching continues. /// search stops immediately. Otherwise, searching continues.
/// ///
/// If multi line mode was enabled, the line number refers to the line /// If multi line mode was enabled, the line number refers to the line
/// number of the first line in the match. /// number of the first line in the match.
@ -598,7 +602,7 @@ pub mod sinks {
) -> Result<bool, io::Error> { ) -> Result<bool, io::Error> {
use std::borrow::Cow; use std::borrow::Cow;
let matched = match str::from_utf8(mat.bytes()) { let matched = match std::str::from_utf8(mat.bytes()) {
Ok(matched) => Cow::Borrowed(matched), Ok(matched) => Cow::Borrowed(matched),
// TODO: In theory, it should be possible to amortize // TODO: In theory, it should be possible to amortize
// allocation here, but `std` doesn't provide such an API. // allocation here, but `std` doesn't provide such an API.
@ -624,9 +628,9 @@ pub mod sinks {
/// searcher was not configured to count lines. /// searcher was not configured to count lines.
/// ///
/// The closure accepts two parameters: a line number and a raw byte string /// The closure accepts two parameters: a line number and a raw byte string
/// containing the matched data. The closure returns a `Result<bool, /// containing the matched data. The closure returns a
/// io::Error>`. If the `bool` is `false`, then the search stops /// `Result<bool, std::io::Error>`. If the `bool` is `false`, then the
/// immediately. Otherwise, searching continues. /// search stops immediately. Otherwise, searching continues.
/// ///
/// If multi line mode was enabled, the line number refers to the line /// If multi line mode was enabled, the line number refers to the line
/// number of the first line in the match. /// number of the first line in the match.

View File

@ -1,14 +1,17 @@
use std::io::{self, Write}; use std::io::{self, Write};
use std::str;
use bstr::ByteSlice; use {
use grep_matcher::{ bstr::ByteSlice,
LineMatchKind, LineTerminator, Match, Matcher, NoCaptures, NoError, grep_matcher::{
LineMatchKind, LineTerminator, Match, Matcher, NoCaptures, NoError,
},
regex::bytes::{Regex, RegexBuilder},
}; };
use regex::bytes::{Regex, RegexBuilder};
use crate::searcher::{BinaryDetection, Searcher, SearcherBuilder}; use crate::{
use crate::sink::{Sink, SinkContext, SinkFinish, SinkMatch}; searcher::{BinaryDetection, Searcher, SearcherBuilder},
sink::{Sink, SinkContext, SinkFinish, SinkMatch},
};
/// A simple regex matcher. /// A simple regex matcher.
/// ///
@ -18,7 +21,7 @@ use crate::sink::{Sink, SinkContext, SinkFinish, SinkMatch};
/// this optimization is detected automatically by inspecting and possibly /// this optimization is detected automatically by inspecting and possibly
/// modifying the regex itself.) /// modifying the regex itself.)
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub struct RegexMatcher { pub(crate) struct RegexMatcher {
regex: Regex, regex: Regex,
line_term: Option<LineTerminator>, line_term: Option<LineTerminator>,
every_line_is_candidate: bool, every_line_is_candidate: bool,
@ -26,22 +29,18 @@ pub struct RegexMatcher {
impl RegexMatcher { impl RegexMatcher {
/// Create a new regex matcher. /// Create a new regex matcher.
pub fn new(pattern: &str) -> RegexMatcher { pub(crate) fn new(pattern: &str) -> RegexMatcher {
let regex = RegexBuilder::new(pattern) let regex = RegexBuilder::new(pattern)
.multi_line(true) // permits ^ and $ to match at \n boundaries .multi_line(true) // permits ^ and $ to match at \n boundaries
.build() .build()
.unwrap(); .unwrap();
RegexMatcher { RegexMatcher { regex, line_term: None, every_line_is_candidate: false }
regex: regex,
line_term: None,
every_line_is_candidate: false,
}
} }
/// Forcefully set the line terminator of this matcher. /// Forcefully set the line terminator of this matcher.
/// ///
/// By default, this matcher has no line terminator set. /// By default, this matcher has no line terminator set.
pub fn set_line_term( pub(crate) fn set_line_term(
&mut self, &mut self,
line_term: Option<LineTerminator>, line_term: Option<LineTerminator>,
) -> &mut RegexMatcher { ) -> &mut RegexMatcher {
@ -52,7 +51,10 @@ impl RegexMatcher {
/// Whether to return every line as a candidate or not. /// Whether to return every line as a candidate or not.
/// ///
/// This forces searchers to handle the case of reporting a false positive. /// This forces searchers to handle the case of reporting a false positive.
pub fn every_line_is_candidate(&mut self, yes: bool) -> &mut RegexMatcher { pub(crate) fn every_line_is_candidate(
&mut self,
yes: bool,
) -> &mut RegexMatcher {
self.every_line_is_candidate = yes; self.every_line_is_candidate = yes;
self self
} }
@ -108,17 +110,17 @@ impl Matcher for RegexMatcher {
/// This is useful for tests because it lets us easily confirm whether data /// This is useful for tests because it lets us easily confirm whether data
/// is being passed to Sink correctly. /// is being passed to Sink correctly.
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub struct KitchenSink(Vec<u8>); pub(crate) struct KitchenSink(Vec<u8>);
impl KitchenSink { impl KitchenSink {
/// Create a new implementation of Sink that includes everything in the /// Create a new implementation of Sink that includes everything in the
/// kitchen. /// kitchen.
pub fn new() -> KitchenSink { pub(crate) fn new() -> KitchenSink {
KitchenSink(vec![]) KitchenSink(vec![])
} }
/// Return the data written to this sink. /// Return the data written to this sink.
pub fn as_bytes(&self) -> &[u8] { pub(crate) fn as_bytes(&self) -> &[u8] {
&self.0 &self.0
} }
} }
@ -199,7 +201,7 @@ impl Sink for KitchenSink {
/// The tester works by assuming you want to test all pertinent code paths. /// The tester works by assuming you want to test all pertinent code paths.
/// These can be trimmed down as necessary via the various builder methods. /// These can be trimmed down as necessary via the various builder methods.
#[derive(Debug)] #[derive(Debug)]
pub struct SearcherTester { pub(crate) struct SearcherTester {
haystack: String, haystack: String,
pattern: String, pattern: String,
filter: Option<::regex::Regex>, filter: Option<::regex::Regex>,
@ -221,7 +223,7 @@ pub struct SearcherTester {
impl SearcherTester { impl SearcherTester {
/// Create a new tester for testing searchers. /// Create a new tester for testing searchers.
pub fn new(haystack: &str, pattern: &str) -> SearcherTester { pub(crate) fn new(haystack: &str, pattern: &str) -> SearcherTester {
SearcherTester { SearcherTester {
haystack: haystack.to_string(), haystack: haystack.to_string(),
pattern: pattern.to_string(), pattern: pattern.to_string(),
@ -245,7 +247,7 @@ impl SearcherTester {
/// Execute the test. If the test succeeds, then this returns successfully. /// Execute the test. If the test succeeds, then this returns successfully.
/// If the test fails, then it panics with an informative message. /// If the test fails, then it panics with an informative message.
pub fn test(&self) { pub(crate) fn test(&self) {
// Check for configuration errors. // Check for configuration errors.
if self.expected_no_line_number.is_none() { if self.expected_no_line_number.is_none() {
panic!("an 'expected' string with NO line numbers must be given"); panic!("an 'expected' string with NO line numbers must be given");
@ -300,7 +302,7 @@ impl SearcherTester {
/// printf debugging and only want one particular test configuration to /// printf debugging and only want one particular test configuration to
/// execute. /// execute.
#[allow(dead_code)] #[allow(dead_code)]
pub fn filter(&mut self, pattern: &str) -> &mut SearcherTester { pub(crate) fn filter(&mut self, pattern: &str) -> &mut SearcherTester {
self.filter = Some(::regex::Regex::new(pattern).unwrap()); self.filter = Some(::regex::Regex::new(pattern).unwrap());
self self
} }
@ -311,13 +313,13 @@ impl SearcherTester {
/// Note that in order to see these in tests that aren't failing, you'll /// Note that in order to see these in tests that aren't failing, you'll
/// want to use `cargo test -- --nocapture`. /// want to use `cargo test -- --nocapture`.
#[allow(dead_code)] #[allow(dead_code)]
pub fn print_labels(&mut self, yes: bool) -> &mut SearcherTester { pub(crate) fn print_labels(&mut self, yes: bool) -> &mut SearcherTester {
self.print_labels = yes; self.print_labels = yes;
self self
} }
/// Set the expected search results, without line numbers. /// Set the expected search results, without line numbers.
pub fn expected_no_line_number( pub(crate) fn expected_no_line_number(
&mut self, &mut self,
exp: &str, exp: &str,
) -> &mut SearcherTester { ) -> &mut SearcherTester {
@ -326,7 +328,7 @@ impl SearcherTester {
} }
/// Set the expected search results, with line numbers. /// Set the expected search results, with line numbers.
pub fn expected_with_line_number( pub(crate) fn expected_with_line_number(
&mut self, &mut self,
exp: &str, exp: &str,
) -> &mut SearcherTester { ) -> &mut SearcherTester {
@ -337,7 +339,7 @@ impl SearcherTester {
/// Set the expected search results, without line numbers, when performing /// Set the expected search results, without line numbers, when performing
/// a search on a slice. When not present, `expected_no_line_number` is /// a search on a slice. When not present, `expected_no_line_number` is
/// used instead. /// used instead.
pub fn expected_slice_no_line_number( pub(crate) fn expected_slice_no_line_number(
&mut self, &mut self,
exp: &str, exp: &str,
) -> &mut SearcherTester { ) -> &mut SearcherTester {
@ -349,7 +351,7 @@ impl SearcherTester {
/// search on a slice. When not present, `expected_with_line_number` is /// search on a slice. When not present, `expected_with_line_number` is
/// used instead. /// used instead.
#[allow(dead_code)] #[allow(dead_code)]
pub fn expected_slice_with_line_number( pub(crate) fn expected_slice_with_line_number(
&mut self, &mut self,
exp: &str, exp: &str,
) -> &mut SearcherTester { ) -> &mut SearcherTester {
@ -362,7 +364,7 @@ impl SearcherTester {
/// This is enabled by default. When enabled, the string that is expected /// This is enabled by default. When enabled, the string that is expected
/// when line numbers are present must be provided. Otherwise, the expected /// when line numbers are present must be provided. Otherwise, the expected
/// string isn't required. /// string isn't required.
pub fn line_number(&mut self, yes: bool) -> &mut SearcherTester { pub(crate) fn line_number(&mut self, yes: bool) -> &mut SearcherTester {
self.line_number = yes; self.line_number = yes;
self self
} }
@ -370,7 +372,7 @@ impl SearcherTester {
/// Whether to test search using the line-by-line searcher or not. /// Whether to test search using the line-by-line searcher or not.
/// ///
/// By default, this is enabled. /// By default, this is enabled.
pub fn by_line(&mut self, yes: bool) -> &mut SearcherTester { pub(crate) fn by_line(&mut self, yes: bool) -> &mut SearcherTester {
self.by_line = yes; self.by_line = yes;
self self
} }
@ -379,7 +381,7 @@ impl SearcherTester {
/// ///
/// By default, this is enabled. /// By default, this is enabled.
#[allow(dead_code)] #[allow(dead_code)]
pub fn multi_line(&mut self, yes: bool) -> &mut SearcherTester { pub(crate) fn multi_line(&mut self, yes: bool) -> &mut SearcherTester {
self.multi_line = yes; self.multi_line = yes;
self self
} }
@ -387,7 +389,7 @@ impl SearcherTester {
/// Whether to perform an inverted search or not. /// Whether to perform an inverted search or not.
/// ///
/// By default, this is disabled. /// By default, this is disabled.
pub fn invert_match(&mut self, yes: bool) -> &mut SearcherTester { pub(crate) fn invert_match(&mut self, yes: bool) -> &mut SearcherTester {
self.invert_match = yes; self.invert_match = yes;
self self
} }
@ -395,7 +397,7 @@ impl SearcherTester {
/// Whether to enable binary detection on all searches. /// Whether to enable binary detection on all searches.
/// ///
/// By default, this is disabled. /// By default, this is disabled.
pub fn binary_detection( pub(crate) fn binary_detection(
&mut self, &mut self,
detection: BinaryDetection, detection: BinaryDetection,
) -> &mut SearcherTester { ) -> &mut SearcherTester {
@ -412,7 +414,10 @@ impl SearcherTester {
/// impact the number of bytes searched when performing binary detection. /// impact the number of bytes searched when performing binary detection.
/// For convenience, it can be useful to disable the automatic heap limit /// For convenience, it can be useful to disable the automatic heap limit
/// test. /// test.
pub fn auto_heap_limit(&mut self, yes: bool) -> &mut SearcherTester { pub(crate) fn auto_heap_limit(
&mut self,
yes: bool,
) -> &mut SearcherTester {
self.auto_heap_limit = yes; self.auto_heap_limit = yes;
self self
} }
@ -420,7 +425,10 @@ impl SearcherTester {
/// Set the number of lines to include in the "after" context. /// Set the number of lines to include in the "after" context.
/// ///
/// The default is `0`, which is equivalent to not printing any context. /// The default is `0`, which is equivalent to not printing any context.
pub fn after_context(&mut self, lines: usize) -> &mut SearcherTester { pub(crate) fn after_context(
&mut self,
lines: usize,
) -> &mut SearcherTester {
self.after_context = lines; self.after_context = lines;
self self
} }
@ -428,7 +436,10 @@ impl SearcherTester {
/// Set the number of lines to include in the "before" context. /// Set the number of lines to include in the "before" context.
/// ///
/// The default is `0`, which is equivalent to not printing any context. /// The default is `0`, which is equivalent to not printing any context.
pub fn before_context(&mut self, lines: usize) -> &mut SearcherTester { pub(crate) fn before_context(
&mut self,
lines: usize,
) -> &mut SearcherTester {
self.before_context = lines; self.before_context = lines;
self self
} }
@ -440,7 +451,7 @@ impl SearcherTester {
/// requesting an unbounded number of before and after contextual lines. /// requesting an unbounded number of before and after contextual lines.
/// ///
/// This is disabled by default. /// This is disabled by default.
pub fn passthru(&mut self, yes: bool) -> &mut SearcherTester { pub(crate) fn passthru(&mut self, yes: bool) -> &mut SearcherTester {
self.passthru = yes; self.passthru = yes;
self self
} }