mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2025-02-09 14:14:56 +02:00
searcher: polish
This updates some dependencies and brings code style in line with my current practice.
This commit is contained in:
parent
e30bbb8cff
commit
d53b7310ee
4
Cargo.lock
generated
4
Cargo.lock
generated
@ -319,9 +319,9 @@ checksum = "8f232d6ef707e1956a43342693d2a31e72989554d58299d7a88738cc95b0d35c"
|
||||
|
||||
[[package]]
|
||||
name = "memmap2"
|
||||
version = "0.5.10"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "83faa42c0a078c393f6b29d5db232d8be22776a891f8f56e5284faee4a20b327"
|
||||
checksum = "43a5a03cefb0d953ec0be133036f14e109412fa594edc2f77227249db66cc3ed"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
@ -11,16 +11,16 @@ repository = "https://github.com/BurntSushi/ripgrep/tree/master/crates/searcher"
|
||||
readme = "README.md"
|
||||
keywords = ["regex", "grep", "egrep", "search", "pattern"]
|
||||
license = "Unlicense OR MIT"
|
||||
edition = "2018"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
bstr = { version = "1.6.0", default-features = false, features = ["std"] }
|
||||
encoding_rs = "0.8.14"
|
||||
encoding_rs_io = "0.1.6"
|
||||
bstr = { version = "1.6.2", default-features = false, features = ["std"] }
|
||||
encoding_rs = "0.8.33"
|
||||
encoding_rs_io = "0.1.7"
|
||||
grep-matcher = { version = "0.1.6", path = "../matcher" }
|
||||
log = "0.4.5"
|
||||
memchr = "2.6.2"
|
||||
memmap = { package = "memmap2", version = "0.5.3" }
|
||||
log = "0.4.20"
|
||||
memchr = "2.6.3"
|
||||
memmap = { package = "memmap2", version = "0.8.0" }
|
||||
|
||||
[dev-dependencies]
|
||||
grep-regex = { version = "0.1.11", path = "../regex" }
|
||||
|
@ -38,12 +38,12 @@ This example shows how to execute the searcher and read the search results
|
||||
using the [`UTF8`](sinks::UTF8) implementation of `Sink`.
|
||||
|
||||
```
|
||||
use std::error::Error;
|
||||
|
||||
use grep_matcher::Matcher;
|
||||
use grep_regex::RegexMatcher;
|
||||
use grep_searcher::Searcher;
|
||||
use grep_searcher::sinks::UTF8;
|
||||
use {
|
||||
grep_matcher::Matcher,
|
||||
grep_regex::RegexMatcher,
|
||||
grep_searcher::Searcher,
|
||||
grep_searcher::sinks::UTF8,
|
||||
};
|
||||
|
||||
const SHERLOCK: &'static [u8] = b"\
|
||||
For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
@ -54,28 +54,26 @@ but Doctor Watson has to have it taken out for him and dusted,
|
||||
and exhibited clearly, with a label attached.
|
||||
";
|
||||
|
||||
# fn main() { example().unwrap() }
|
||||
fn example() -> Result<(), Box<Error>> {
|
||||
let matcher = RegexMatcher::new(r"Doctor \w+")?;
|
||||
let mut matches: Vec<(u64, String)> = vec![];
|
||||
Searcher::new().search_slice(&matcher, SHERLOCK, UTF8(|lnum, line| {
|
||||
// We are guaranteed to find a match, so the unwrap is OK.
|
||||
let mymatch = matcher.find(line.as_bytes())?.unwrap();
|
||||
matches.push((lnum, line[mymatch].to_string()));
|
||||
Ok(true)
|
||||
}))?;
|
||||
let matcher = RegexMatcher::new(r"Doctor \w+")?;
|
||||
let mut matches: Vec<(u64, String)> = vec![];
|
||||
Searcher::new().search_slice(&matcher, SHERLOCK, UTF8(|lnum, line| {
|
||||
// We are guaranteed to find a match, so the unwrap is OK.
|
||||
let mymatch = matcher.find(line.as_bytes())?.unwrap();
|
||||
matches.push((lnum, line[mymatch].to_string()));
|
||||
Ok(true)
|
||||
}))?;
|
||||
|
||||
assert_eq!(matches.len(), 2);
|
||||
assert_eq!(
|
||||
matches[0],
|
||||
(1, "Doctor Watsons".to_string())
|
||||
);
|
||||
assert_eq!(
|
||||
matches[1],
|
||||
(5, "Doctor Watson".to_string())
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
assert_eq!(matches.len(), 2);
|
||||
assert_eq!(
|
||||
matches[0],
|
||||
(1, "Doctor Watsons".to_string())
|
||||
);
|
||||
assert_eq!(
|
||||
matches[1],
|
||||
(5, "Doctor Watson".to_string())
|
||||
);
|
||||
|
||||
# Ok::<(), Box<dyn std::error::Error>>(())
|
||||
```
|
||||
|
||||
See also `examples/search-stdin.rs` from the root of this crate's directory
|
||||
@ -85,14 +83,16 @@ searches stdin.
|
||||
|
||||
#![deny(missing_docs)]
|
||||
|
||||
pub use crate::lines::{LineIter, LineStep};
|
||||
pub use crate::searcher::{
|
||||
BinaryDetection, ConfigError, Encoding, MmapChoice, Searcher,
|
||||
SearcherBuilder,
|
||||
};
|
||||
pub use crate::sink::sinks;
|
||||
pub use crate::sink::{
|
||||
Sink, SinkContext, SinkContextKind, SinkError, SinkFinish, SinkMatch,
|
||||
pub use crate::{
|
||||
lines::{LineIter, LineStep},
|
||||
searcher::{
|
||||
BinaryDetection, ConfigError, Encoding, MmapChoice, Searcher,
|
||||
SearcherBuilder,
|
||||
},
|
||||
sink::{
|
||||
sinks, Sink, SinkContext, SinkContextKind, SinkError, SinkFinish,
|
||||
SinkMatch,
|
||||
},
|
||||
};
|
||||
|
||||
#[macro_use]
|
||||
|
@ -1,4 +1,3 @@
|
||||
use std::cmp;
|
||||
use std::io;
|
||||
|
||||
use bstr::ByteSlice;
|
||||
@ -15,7 +14,7 @@ pub(crate) const DEFAULT_BUFFER_CAPACITY: usize = 64 * (1 << 10); // 64 KB
|
||||
///
|
||||
/// The default is to eagerly allocate without a limit.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub enum BufferAllocation {
|
||||
pub(crate) enum BufferAllocation {
|
||||
/// Attempt to expand the size of the buffer until either at least the next
|
||||
/// line fits into memory or until all available memory is exhausted.
|
||||
///
|
||||
@ -35,7 +34,7 @@ impl Default for BufferAllocation {
|
||||
|
||||
/// Create a new error to be used when a configured allocation limit has been
|
||||
/// reached.
|
||||
pub fn alloc_error(limit: usize) -> io::Error {
|
||||
pub(crate) fn alloc_error(limit: usize) -> io::Error {
|
||||
let msg = format!("configured allocation limit ({}) exceeded", limit);
|
||||
io::Error::new(io::ErrorKind::Other, msg)
|
||||
}
|
||||
@ -49,7 +48,7 @@ pub fn alloc_error(limit: usize) -> io::Error {
|
||||
/// using textual patterns. Of course, there are many cases in which this isn't
|
||||
/// true, which is why binary detection is disabled by default.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub enum BinaryDetection {
|
||||
pub(crate) enum BinaryDetection {
|
||||
/// No binary detection is performed. Data reported by the line buffer may
|
||||
/// contain arbitrary bytes.
|
||||
None,
|
||||
@ -108,18 +107,18 @@ impl Default for Config {
|
||||
|
||||
/// A builder for constructing line buffers.
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct LineBufferBuilder {
|
||||
pub(crate) struct LineBufferBuilder {
|
||||
config: Config,
|
||||
}
|
||||
|
||||
impl LineBufferBuilder {
|
||||
/// Create a new builder for a buffer.
|
||||
pub fn new() -> LineBufferBuilder {
|
||||
pub(crate) fn new() -> LineBufferBuilder {
|
||||
LineBufferBuilder { config: Config::default() }
|
||||
}
|
||||
|
||||
/// Create a new line buffer from this builder's configuration.
|
||||
pub fn build(&self) -> LineBuffer {
|
||||
pub(crate) fn build(&self) -> LineBuffer {
|
||||
LineBuffer {
|
||||
config: self.config,
|
||||
buf: vec![0; self.config.capacity],
|
||||
@ -139,7 +138,10 @@ impl LineBufferBuilder {
|
||||
///
|
||||
/// This is set to a reasonable default and probably shouldn't be changed
|
||||
/// unless there's a specific reason to do so.
|
||||
pub fn capacity(&mut self, capacity: usize) -> &mut LineBufferBuilder {
|
||||
pub(crate) fn capacity(
|
||||
&mut self,
|
||||
capacity: usize,
|
||||
) -> &mut LineBufferBuilder {
|
||||
self.config.capacity = capacity;
|
||||
self
|
||||
}
|
||||
@ -155,7 +157,10 @@ impl LineBufferBuilder {
|
||||
/// is incomplete.
|
||||
///
|
||||
/// By default, this is set to `b'\n'`.
|
||||
pub fn line_terminator(&mut self, lineterm: u8) -> &mut LineBufferBuilder {
|
||||
pub(crate) fn line_terminator(
|
||||
&mut self,
|
||||
lineterm: u8,
|
||||
) -> &mut LineBufferBuilder {
|
||||
self.config.lineterm = lineterm;
|
||||
self
|
||||
}
|
||||
@ -174,7 +179,7 @@ impl LineBufferBuilder {
|
||||
/// a value of `0` is sensible, and in particular, will guarantee that a
|
||||
/// line buffer will never allocate additional memory beyond its initial
|
||||
/// capacity.
|
||||
pub fn buffer_alloc(
|
||||
pub(crate) fn buffer_alloc(
|
||||
&mut self,
|
||||
behavior: BufferAllocation,
|
||||
) -> &mut LineBufferBuilder {
|
||||
@ -188,7 +193,7 @@ impl LineBufferBuilder {
|
||||
///
|
||||
/// By default, this is disabled. In general, binary detection should be
|
||||
/// viewed as an imperfect heuristic.
|
||||
pub fn binary_detection(
|
||||
pub(crate) fn binary_detection(
|
||||
&mut self,
|
||||
detection: BinaryDetection,
|
||||
) -> &mut LineBufferBuilder {
|
||||
@ -200,7 +205,7 @@ impl LineBufferBuilder {
|
||||
/// A line buffer reader efficiently reads a line oriented buffer from an
|
||||
/// arbitrary reader.
|
||||
#[derive(Debug)]
|
||||
pub struct LineBufferReader<'b, R> {
|
||||
pub(crate) struct LineBufferReader<'b, R> {
|
||||
rdr: R,
|
||||
line_buffer: &'b mut LineBuffer,
|
||||
}
|
||||
@ -211,7 +216,7 @@ impl<'b, R: io::Read> LineBufferReader<'b, R> {
|
||||
///
|
||||
/// This does not change the binary detection behavior of the given line
|
||||
/// buffer.
|
||||
pub fn new(
|
||||
pub(crate) fn new(
|
||||
rdr: R,
|
||||
line_buffer: &'b mut LineBuffer,
|
||||
) -> LineBufferReader<'b, R> {
|
||||
@ -225,13 +230,13 @@ impl<'b, R: io::Read> LineBufferReader<'b, R> {
|
||||
/// correspond to an offset in memory. It is typically used for reporting
|
||||
/// purposes. It can also be used for counting the number of bytes that
|
||||
/// have been searched.
|
||||
pub fn absolute_byte_offset(&self) -> u64 {
|
||||
pub(crate) fn absolute_byte_offset(&self) -> u64 {
|
||||
self.line_buffer.absolute_byte_offset()
|
||||
}
|
||||
|
||||
/// If binary data was detected, then this returns the absolute byte offset
|
||||
/// at which binary data was initially found.
|
||||
pub fn binary_byte_offset(&self) -> Option<u64> {
|
||||
pub(crate) fn binary_byte_offset(&self) -> Option<u64> {
|
||||
self.line_buffer.binary_byte_offset()
|
||||
}
|
||||
|
||||
@ -248,25 +253,25 @@ impl<'b, R: io::Read> LineBufferReader<'b, R> {
|
||||
/// This forwards any errors returned by the underlying reader, and will
|
||||
/// also return an error if the buffer must be expanded past its allocation
|
||||
/// limit, as governed by the buffer allocation strategy.
|
||||
pub fn fill(&mut self) -> Result<bool, io::Error> {
|
||||
pub(crate) fn fill(&mut self) -> Result<bool, io::Error> {
|
||||
self.line_buffer.fill(&mut self.rdr)
|
||||
}
|
||||
|
||||
/// Return the contents of this buffer.
|
||||
pub fn buffer(&self) -> &[u8] {
|
||||
pub(crate) fn buffer(&self) -> &[u8] {
|
||||
self.line_buffer.buffer()
|
||||
}
|
||||
|
||||
/// Return the buffer as a BStr, used for convenient equality checking
|
||||
/// in tests only.
|
||||
#[cfg(test)]
|
||||
fn bstr(&self) -> &::bstr::BStr {
|
||||
fn bstr(&self) -> &bstr::BStr {
|
||||
self.buffer().as_bstr()
|
||||
}
|
||||
|
||||
/// Consume the number of bytes provided. This must be less than or equal
|
||||
/// to the number of bytes returned by `buffer`.
|
||||
pub fn consume(&mut self, amt: usize) {
|
||||
pub(crate) fn consume(&mut self, amt: usize) {
|
||||
self.line_buffer.consume(amt);
|
||||
}
|
||||
|
||||
@ -286,7 +291,7 @@ impl<'b, R: io::Read> LineBufferReader<'b, R> {
|
||||
/// Line buffers cannot be used directly, but instead must be used via the
|
||||
/// LineBufferReader.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct LineBuffer {
|
||||
pub(crate) struct LineBuffer {
|
||||
/// The configuration of this buffer.
|
||||
config: Config,
|
||||
/// The primary buffer with which to hold data.
|
||||
@ -322,7 +327,7 @@ impl LineBuffer {
|
||||
///
|
||||
/// This permits dynamically changing the binary detection strategy on
|
||||
/// an existing line buffer without needing to create a new one.
|
||||
pub fn set_binary_detection(&mut self, binary: BinaryDetection) {
|
||||
pub(crate) fn set_binary_detection(&mut self, binary: BinaryDetection) {
|
||||
self.config.binary = binary;
|
||||
}
|
||||
|
||||
@ -497,12 +502,12 @@ impl LineBuffer {
|
||||
}
|
||||
// `len` is used for computing the next allocation size. The capacity
|
||||
// is permitted to start at `0`, so we make sure it's at least `1`.
|
||||
let len = cmp::max(1, self.buf.len());
|
||||
let len = std::cmp::max(1, self.buf.len());
|
||||
let additional = match self.config.buffer_alloc {
|
||||
BufferAllocation::Eager => len * 2,
|
||||
BufferAllocation::Error(limit) => {
|
||||
let used = self.buf.len() - self.config.capacity;
|
||||
let n = cmp::min(len * 2, limit - used);
|
||||
let n = std::cmp::min(len * 2, limit - used);
|
||||
if n == 0 {
|
||||
return Err(alloc_error(self.config.capacity + limit));
|
||||
}
|
||||
@ -541,9 +546,9 @@ fn replace_bytes(bytes: &mut [u8], src: u8, replacement: u8) -> Option<usize> {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use bstr::{ByteSlice, ByteVec};
|
||||
use std::str;
|
||||
|
||||
use super::*;
|
||||
|
||||
const SHERLOCK: &'static str = "\
|
||||
For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
|
@ -2,8 +2,10 @@
|
||||
A collection of routines for performing operations on lines.
|
||||
*/
|
||||
|
||||
use bstr::ByteSlice;
|
||||
use grep_matcher::{LineTerminator, Match};
|
||||
use {
|
||||
bstr::ByteSlice,
|
||||
grep_matcher::{LineTerminator, Match},
|
||||
};
|
||||
|
||||
/// An iterator over lines in a particular slice of bytes.
|
||||
///
|
||||
@ -21,10 +23,8 @@ impl<'b> LineIter<'b> {
|
||||
/// Create a new line iterator that yields lines in the given bytes that
|
||||
/// are terminated by `line_term`.
|
||||
pub fn new(line_term: u8, bytes: &'b [u8]) -> LineIter<'b> {
|
||||
LineIter {
|
||||
bytes: bytes,
|
||||
stepper: LineStep::new(line_term, 0, bytes.len()),
|
||||
}
|
||||
let stepper = LineStep::new(line_term, 0, bytes.len());
|
||||
LineIter { bytes, stepper }
|
||||
}
|
||||
}
|
||||
|
||||
@ -61,7 +61,7 @@ impl LineStep {
|
||||
///
|
||||
/// This panics if `start` is not less than or equal to `end`.
|
||||
pub fn new(line_term: u8, start: usize, end: usize) -> LineStep {
|
||||
LineStep { line_term, pos: start, end: end }
|
||||
LineStep { line_term, pos: start, end }
|
||||
}
|
||||
|
||||
/// Return the start and end position of the next line in the given bytes.
|
||||
@ -108,14 +108,17 @@ impl LineStep {
|
||||
}
|
||||
|
||||
/// Count the number of occurrences of `line_term` in `bytes`.
|
||||
pub fn count(bytes: &[u8], line_term: u8) -> u64 {
|
||||
pub(crate) fn count(bytes: &[u8], line_term: u8) -> u64 {
|
||||
memchr::memchr_iter(line_term, bytes).count() as u64
|
||||
}
|
||||
|
||||
/// Given a line that possibly ends with a terminator, return that line without
|
||||
/// the terminator.
|
||||
#[inline(always)]
|
||||
pub fn without_terminator(bytes: &[u8], line_term: LineTerminator) -> &[u8] {
|
||||
pub(crate) fn without_terminator(
|
||||
bytes: &[u8],
|
||||
line_term: LineTerminator,
|
||||
) -> &[u8] {
|
||||
let line_term = line_term.as_bytes();
|
||||
let start = bytes.len().saturating_sub(line_term.len());
|
||||
if bytes.get(start..) == Some(line_term) {
|
||||
@ -129,7 +132,7 @@ pub fn without_terminator(bytes: &[u8], line_term: LineTerminator) -> &[u8] {
|
||||
///
|
||||
/// Line terminators are considered part of the line they terminate.
|
||||
#[inline(always)]
|
||||
pub fn locate(bytes: &[u8], line_term: u8, range: Match) -> Match {
|
||||
pub(crate) fn locate(bytes: &[u8], line_term: u8, range: Match) -> Match {
|
||||
let line_start =
|
||||
bytes[..range.start()].rfind_byte(line_term).map_or(0, |i| i + 1);
|
||||
let line_end =
|
||||
@ -151,7 +154,7 @@ pub fn locate(bytes: &[u8], line_term: u8, range: Match) -> Match {
|
||||
///
|
||||
/// If `bytes` ends with a line terminator, then the terminator itself is
|
||||
/// considered part of the last line.
|
||||
pub fn preceding(bytes: &[u8], line_term: u8, count: usize) -> usize {
|
||||
pub(crate) fn preceding(bytes: &[u8], line_term: u8, count: usize) -> usize {
|
||||
preceding_by_pos(bytes, bytes.len(), line_term, count)
|
||||
}
|
||||
|
||||
@ -195,10 +198,9 @@ fn preceding_by_pos(
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use grep_matcher::Match;
|
||||
use std::ops::Range;
|
||||
use std::str;
|
||||
|
||||
use super::*;
|
||||
|
||||
const SHERLOCK: &'static str = "\
|
||||
For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
@ -222,7 +224,7 @@ and exhibited clearly, with a label attached.\
|
||||
results
|
||||
}
|
||||
|
||||
fn line_ranges(text: &str) -> Vec<Range<usize>> {
|
||||
fn line_ranges(text: &str) -> Vec<std::ops::Range<usize>> {
|
||||
let mut results = vec![];
|
||||
let mut it = LineStep::new(b'\n', 0, text.len());
|
||||
while let Some(m) = it.next_match(text.as_bytes()) {
|
||||
|
@ -1,15 +1,16 @@
|
||||
use std::cmp;
|
||||
|
||||
use bstr::ByteSlice;
|
||||
|
||||
use crate::line_buffer::BinaryDetection;
|
||||
use crate::lines::{self, LineStep};
|
||||
use crate::searcher::{Config, Range, Searcher};
|
||||
use crate::sink::{
|
||||
Sink, SinkContext, SinkContextKind, SinkError, SinkFinish, SinkMatch,
|
||||
};
|
||||
use grep_matcher::{LineMatchKind, Matcher};
|
||||
|
||||
use crate::{
|
||||
line_buffer::BinaryDetection,
|
||||
lines::{self, LineStep},
|
||||
searcher::{Config, Range, Searcher},
|
||||
sink::{
|
||||
Sink, SinkContext, SinkContextKind, SinkError, SinkFinish, SinkMatch,
|
||||
},
|
||||
};
|
||||
|
||||
enum FastMatchResult {
|
||||
Continue,
|
||||
Stop,
|
||||
@ -17,7 +18,7 @@ enum FastMatchResult {
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Core<'s, M: 's, S> {
|
||||
pub(crate) struct Core<'s, M: 's, S> {
|
||||
config: &'s Config,
|
||||
matcher: M,
|
||||
searcher: &'s Searcher,
|
||||
@ -35,7 +36,7 @@ pub struct Core<'s, M: 's, S> {
|
||||
}
|
||||
|
||||
impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
pub fn new(
|
||||
pub(crate) fn new(
|
||||
searcher: &'s Searcher,
|
||||
matcher: M,
|
||||
sink: S,
|
||||
@ -45,14 +46,14 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
if searcher.config.line_number { Some(1) } else { None };
|
||||
let core = Core {
|
||||
config: &searcher.config,
|
||||
matcher: matcher,
|
||||
searcher: searcher,
|
||||
sink: sink,
|
||||
binary: binary,
|
||||
matcher,
|
||||
searcher,
|
||||
sink,
|
||||
binary,
|
||||
pos: 0,
|
||||
absolute_byte_offset: 0,
|
||||
binary_byte_offset: None,
|
||||
line_number: line_number,
|
||||
line_number,
|
||||
last_line_counted: 0,
|
||||
last_line_visited: 0,
|
||||
after_context_left: 0,
|
||||
@ -69,23 +70,23 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
core
|
||||
}
|
||||
|
||||
pub fn pos(&self) -> usize {
|
||||
pub(crate) fn pos(&self) -> usize {
|
||||
self.pos
|
||||
}
|
||||
|
||||
pub fn set_pos(&mut self, pos: usize) {
|
||||
pub(crate) fn set_pos(&mut self, pos: usize) {
|
||||
self.pos = pos;
|
||||
}
|
||||
|
||||
pub fn binary_byte_offset(&self) -> Option<u64> {
|
||||
pub(crate) fn binary_byte_offset(&self) -> Option<u64> {
|
||||
self.binary_byte_offset.map(|offset| offset as u64)
|
||||
}
|
||||
|
||||
pub fn matcher(&self) -> &M {
|
||||
pub(crate) fn matcher(&self) -> &M {
|
||||
&self.matcher
|
||||
}
|
||||
|
||||
pub fn matched(
|
||||
pub(crate) fn matched(
|
||||
&mut self,
|
||||
buf: &[u8],
|
||||
range: &Range,
|
||||
@ -93,18 +94,18 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
self.sink_matched(buf, range)
|
||||
}
|
||||
|
||||
pub fn binary_data(
|
||||
pub(crate) fn binary_data(
|
||||
&mut self,
|
||||
binary_byte_offset: u64,
|
||||
) -> Result<bool, S::Error> {
|
||||
self.sink.binary_data(&self.searcher, binary_byte_offset)
|
||||
}
|
||||
|
||||
pub fn begin(&mut self) -> Result<bool, S::Error> {
|
||||
pub(crate) fn begin(&mut self) -> Result<bool, S::Error> {
|
||||
self.sink.begin(&self.searcher)
|
||||
}
|
||||
|
||||
pub fn finish(
|
||||
pub(crate) fn finish(
|
||||
&mut self,
|
||||
byte_count: u64,
|
||||
binary_byte_offset: Option<u64>,
|
||||
@ -115,7 +116,10 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
)
|
||||
}
|
||||
|
||||
pub fn match_by_line(&mut self, buf: &[u8]) -> Result<bool, S::Error> {
|
||||
pub(crate) fn match_by_line(
|
||||
&mut self,
|
||||
buf: &[u8],
|
||||
) -> Result<bool, S::Error> {
|
||||
if self.is_line_by_line_fast() {
|
||||
match self.match_by_line_fast(buf)? {
|
||||
FastMatchResult::SwitchToSlow => self.match_by_line_slow(buf),
|
||||
@ -127,7 +131,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn roll(&mut self, buf: &[u8]) -> usize {
|
||||
pub(crate) fn roll(&mut self, buf: &[u8]) -> usize {
|
||||
let consumed = if self.config.max_context() == 0 {
|
||||
buf.len()
|
||||
} else {
|
||||
@ -141,7 +145,8 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
self.config.line_term.as_byte(),
|
||||
self.config.max_context(),
|
||||
);
|
||||
let consumed = cmp::max(context_start, self.last_line_visited);
|
||||
let consumed =
|
||||
std::cmp::max(context_start, self.last_line_visited);
|
||||
consumed
|
||||
};
|
||||
self.count_lines(buf, consumed);
|
||||
@ -152,7 +157,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
consumed
|
||||
}
|
||||
|
||||
pub fn detect_binary(
|
||||
pub(crate) fn detect_binary(
|
||||
&mut self,
|
||||
buf: &[u8],
|
||||
range: &Range,
|
||||
@ -177,7 +182,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn before_context_by_line(
|
||||
pub(crate) fn before_context_by_line(
|
||||
&mut self,
|
||||
buf: &[u8],
|
||||
upto: usize,
|
||||
@ -213,7 +218,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
pub fn after_context_by_line(
|
||||
pub(crate) fn after_context_by_line(
|
||||
&mut self,
|
||||
buf: &[u8],
|
||||
upto: usize,
|
||||
@ -238,7 +243,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
pub fn other_context_by_line(
|
||||
pub(crate) fn other_context_by_line(
|
||||
&mut self,
|
||||
buf: &[u8],
|
||||
upto: usize,
|
||||
|
@ -1,16 +1,14 @@
|
||||
use std::cmp;
|
||||
use std::io;
|
||||
|
||||
use crate::line_buffer::{LineBufferReader, DEFAULT_BUFFER_CAPACITY};
|
||||
use crate::lines::{self, LineStep};
|
||||
use crate::sink::{Sink, SinkError};
|
||||
use grep_matcher::Matcher;
|
||||
|
||||
use crate::searcher::core::Core;
|
||||
use crate::searcher::{Config, Range, Searcher};
|
||||
use crate::{
|
||||
line_buffer::{LineBufferReader, DEFAULT_BUFFER_CAPACITY},
|
||||
lines::{self, LineStep},
|
||||
searcher::{core::Core, Config, Range, Searcher},
|
||||
sink::{Sink, SinkError},
|
||||
};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct ReadByLine<'s, M, R, S> {
|
||||
pub(crate) struct ReadByLine<'s, M, R, S> {
|
||||
config: &'s Config,
|
||||
core: Core<'s, M, S>,
|
||||
rdr: LineBufferReader<'s, R>,
|
||||
@ -19,10 +17,10 @@ pub struct ReadByLine<'s, M, R, S> {
|
||||
impl<'s, M, R, S> ReadByLine<'s, M, R, S>
|
||||
where
|
||||
M: Matcher,
|
||||
R: io::Read,
|
||||
R: std::io::Read,
|
||||
S: Sink,
|
||||
{
|
||||
pub fn new(
|
||||
pub(crate) fn new(
|
||||
searcher: &'s Searcher,
|
||||
matcher: M,
|
||||
read_from: LineBufferReader<'s, R>,
|
||||
@ -37,7 +35,7 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
pub fn run(mut self) -> Result<(), S::Error> {
|
||||
pub(crate) fn run(mut self) -> Result<(), S::Error> {
|
||||
if self.core.begin()? {
|
||||
while self.fill()? && self.core.match_by_line(self.rdr.buffer())? {
|
||||
}
|
||||
@ -87,13 +85,13 @@ where
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct SliceByLine<'s, M, S> {
|
||||
pub(crate) struct SliceByLine<'s, M, S> {
|
||||
core: Core<'s, M, S>,
|
||||
slice: &'s [u8],
|
||||
}
|
||||
|
||||
impl<'s, M: Matcher, S: Sink> SliceByLine<'s, M, S> {
|
||||
pub fn new(
|
||||
pub(crate) fn new(
|
||||
searcher: &'s Searcher,
|
||||
matcher: M,
|
||||
slice: &'s [u8],
|
||||
@ -103,14 +101,14 @@ impl<'s, M: Matcher, S: Sink> SliceByLine<'s, M, S> {
|
||||
|
||||
SliceByLine {
|
||||
core: Core::new(searcher, matcher, write_to, true),
|
||||
slice: slice,
|
||||
slice,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn run(mut self) -> Result<(), S::Error> {
|
||||
pub(crate) fn run(mut self) -> Result<(), S::Error> {
|
||||
if self.core.begin()? {
|
||||
let binary_upto =
|
||||
cmp::min(self.slice.len(), DEFAULT_BUFFER_CAPACITY);
|
||||
std::cmp::min(self.slice.len(), DEFAULT_BUFFER_CAPACITY);
|
||||
let binary_range = Range::new(0, binary_upto);
|
||||
if !self.core.detect_binary(self.slice, &binary_range)? {
|
||||
while !self.slice[self.core.pos()..].is_empty()
|
||||
@ -132,7 +130,7 @@ impl<'s, M: Matcher, S: Sink> SliceByLine<'s, M, S> {
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct MultiLine<'s, M, S> {
|
||||
pub(crate) struct MultiLine<'s, M, S> {
|
||||
config: &'s Config,
|
||||
core: Core<'s, M, S>,
|
||||
slice: &'s [u8],
|
||||
@ -140,7 +138,7 @@ pub struct MultiLine<'s, M, S> {
|
||||
}
|
||||
|
||||
impl<'s, M: Matcher, S: Sink> MultiLine<'s, M, S> {
|
||||
pub fn new(
|
||||
pub(crate) fn new(
|
||||
searcher: &'s Searcher,
|
||||
matcher: M,
|
||||
slice: &'s [u8],
|
||||
@ -151,15 +149,15 @@ impl<'s, M: Matcher, S: Sink> MultiLine<'s, M, S> {
|
||||
MultiLine {
|
||||
config: &searcher.config,
|
||||
core: Core::new(searcher, matcher, write_to, true),
|
||||
slice: slice,
|
||||
slice,
|
||||
last_match: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn run(mut self) -> Result<(), S::Error> {
|
||||
pub(crate) fn run(mut self) -> Result<(), S::Error> {
|
||||
if self.core.begin()? {
|
||||
let binary_upto =
|
||||
cmp::min(self.slice.len(), DEFAULT_BUFFER_CAPACITY);
|
||||
std::cmp::min(self.slice.len(), DEFAULT_BUFFER_CAPACITY);
|
||||
let binary_range = Range::new(0, binary_upto);
|
||||
if !self.core.detect_binary(self.slice, &binary_range)? {
|
||||
let mut keepgoing = true;
|
||||
@ -347,8 +345,10 @@ impl<'s, M: Matcher, S: Sink> MultiLine<'s, M, S> {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::searcher::{BinaryDetection, SearcherBuilder};
|
||||
use crate::testutil::{KitchenSink, RegexMatcher, SearcherTester};
|
||||
use crate::{
|
||||
searcher::{BinaryDetection, SearcherBuilder},
|
||||
testutil::{KitchenSink, RegexMatcher, SearcherTester},
|
||||
};
|
||||
|
||||
use super::*;
|
||||
|
||||
|
@ -1,5 +1,4 @@
|
||||
use std::fs::File;
|
||||
use std::path::Path;
|
||||
use std::{fs::File, path::Path};
|
||||
|
||||
use memmap::Mmap;
|
||||
|
||||
|
@ -1,19 +1,25 @@
|
||||
use std::cell::RefCell;
|
||||
use std::cmp;
|
||||
use std::fmt;
|
||||
use std::fs::File;
|
||||
use std::io::{self, Read};
|
||||
use std::path::Path;
|
||||
|
||||
use crate::line_buffer::{
|
||||
self, alloc_error, BufferAllocation, LineBuffer, LineBufferBuilder,
|
||||
LineBufferReader, DEFAULT_BUFFER_CAPACITY,
|
||||
use std::{
|
||||
cell::RefCell,
|
||||
cmp,
|
||||
fs::File,
|
||||
io::{self, Read},
|
||||
path::Path,
|
||||
};
|
||||
|
||||
use {
|
||||
encoding_rs,
|
||||
encoding_rs_io::DecodeReaderBytesBuilder,
|
||||
grep_matcher::{LineTerminator, Match, Matcher},
|
||||
};
|
||||
|
||||
use crate::{
|
||||
line_buffer::{
|
||||
self, alloc_error, BufferAllocation, LineBuffer, LineBufferBuilder,
|
||||
LineBufferReader, DEFAULT_BUFFER_CAPACITY,
|
||||
},
|
||||
searcher::glue::{MultiLine, ReadByLine, SliceByLine},
|
||||
sink::{Sink, SinkError},
|
||||
};
|
||||
use crate::searcher::glue::{MultiLine, ReadByLine, SliceByLine};
|
||||
use crate::sink::{Sink, SinkError};
|
||||
use encoding_rs;
|
||||
use encoding_rs_io::DecodeReaderBytesBuilder;
|
||||
use grep_matcher::{LineTerminator, Match, Matcher};
|
||||
|
||||
pub use self::mmap::MmapChoice;
|
||||
|
||||
@ -232,6 +238,7 @@ impl Config {
|
||||
/// This error occurs when a non-sensical configuration is present when trying
|
||||
/// to construct a `Searcher` from a `SearcherBuilder`.
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
#[non_exhaustive]
|
||||
pub enum ConfigError {
|
||||
/// Indicates that the heap limit configuration prevents all possible
|
||||
/// search strategies from being used. For example, if the heap limit is
|
||||
@ -250,23 +257,12 @@ pub enum ConfigError {
|
||||
/// The provided encoding label that could not be found.
|
||||
label: Vec<u8>,
|
||||
},
|
||||
/// Hints that destructuring should not be exhaustive.
|
||||
///
|
||||
/// This enum may grow additional variants, so this makes sure clients
|
||||
/// don't count on exhaustive matching. (Otherwise, adding a new variant
|
||||
/// could break existing code.)
|
||||
#[doc(hidden)]
|
||||
__Nonexhaustive,
|
||||
}
|
||||
|
||||
impl ::std::error::Error for ConfigError {
|
||||
fn description(&self) -> &str {
|
||||
"grep-searcher configuration error"
|
||||
}
|
||||
}
|
||||
impl std::error::Error for ConfigError {}
|
||||
|
||||
impl fmt::Display for ConfigError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
impl std::fmt::Display for ConfigError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match *self {
|
||||
ConfigError::SearchUnavailable => {
|
||||
write!(f, "grep config error: no available searchers")
|
||||
@ -284,7 +280,6 @@ impl fmt::Display for ConfigError {
|
||||
"grep config error: unknown encoding: {}",
|
||||
String::from_utf8_lossy(label),
|
||||
),
|
||||
_ => panic!("BUG: unexpected variant found"),
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -331,8 +326,8 @@ impl SearcherBuilder {
|
||||
.bom_sniffing(self.config.bom_sniffing);
|
||||
|
||||
Searcher {
|
||||
config: config,
|
||||
decode_builder: decode_builder,
|
||||
config,
|
||||
decode_builder,
|
||||
decode_buffer: RefCell::new(vec![0; 8 * (1 << 10)]),
|
||||
line_buffer: RefCell::new(self.config.line_buffer()),
|
||||
multi_line_buffer: RefCell::new(vec![]),
|
||||
@ -676,9 +671,9 @@ impl Searcher {
|
||||
log::trace!("{:?}: searching via memory map", path);
|
||||
return self.search_slice(matcher, &mmap, write_to);
|
||||
}
|
||||
// Fast path for multi-line searches of files when memory maps are
|
||||
// not enabled. This pre-allocates a buffer roughly the size of the
|
||||
// file, which isn't possible when searching an arbitrary io::Read.
|
||||
// Fast path for multi-line searches of files when memory maps are not
|
||||
// enabled. This pre-allocates a buffer roughly the size of the file,
|
||||
// which isn't possible when searching an arbitrary std::io::Read.
|
||||
if self.multi_line_with_matcher(&matcher) {
|
||||
log::trace!(
|
||||
"{:?}: reading entire file on to heap for mulitline",
|
||||
@ -699,8 +694,8 @@ impl Searcher {
|
||||
}
|
||||
}
|
||||
|
||||
/// Execute a search over any implementation of `io::Read` and write the
|
||||
/// results to the given sink.
|
||||
/// Execute a search over any implementation of `std::io::Read` and write
|
||||
/// the results to the given sink.
|
||||
///
|
||||
/// When possible, this implementation will search the reader incrementally
|
||||
/// without reading it into memory. In some cases---for example, if multi
|
||||
@ -1016,9 +1011,10 @@ fn slice_has_bom(slice: &[u8]) -> bool {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::testutil::{KitchenSink, RegexMatcher};
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn config_error_heap_limit() {
|
||||
let matcher = RegexMatcher::new("");
|
||||
|
@ -1,23 +1,24 @@
|
||||
use std::error;
|
||||
use std::fmt;
|
||||
use std::io;
|
||||
|
||||
use grep_matcher::LineTerminator;
|
||||
|
||||
use crate::lines::LineIter;
|
||||
use crate::searcher::{ConfigError, Searcher};
|
||||
use crate::{
|
||||
lines::LineIter,
|
||||
searcher::{ConfigError, Searcher},
|
||||
};
|
||||
|
||||
/// A trait that describes errors that can be reported by searchers and
|
||||
/// implementations of `Sink`.
|
||||
///
|
||||
/// Unless you have a specialized use case, you probably don't need to
|
||||
/// implement this trait explicitly. It's likely that using `io::Error` (which
|
||||
/// implements this trait) for your error type is good enough, largely because
|
||||
/// most errors that occur during search will likely be an `io::Error`.
|
||||
/// implement this trait explicitly. It's likely that using `std::io::Error`
|
||||
/// (which implements this trait) for your error type is good enough,
|
||||
/// largely because most errors that occur during search will likely be an
|
||||
/// `std::io::Error`.
|
||||
pub trait SinkError: Sized {
|
||||
/// A constructor for converting any value that satisfies the
|
||||
/// `fmt::Display` trait into an error.
|
||||
fn error_message<T: fmt::Display>(message: T) -> Self;
|
||||
/// `std::fmt::Display` trait into an error.
|
||||
fn error_message<T: std::fmt::Display>(message: T) -> Self;
|
||||
|
||||
/// A constructor for converting I/O errors that occur while searching into
|
||||
/// an error of this type.
|
||||
@ -36,10 +37,10 @@ pub trait SinkError: Sized {
|
||||
}
|
||||
}
|
||||
|
||||
/// An `io::Error` can be used as an error for `Sink` implementations out of
|
||||
/// the box.
|
||||
/// An `std::io::Error` can be used as an error for `Sink` implementations out
|
||||
/// of the box.
|
||||
impl SinkError for io::Error {
|
||||
fn error_message<T: fmt::Display>(message: T) -> io::Error {
|
||||
fn error_message<T: std::fmt::Display>(message: T) -> io::Error {
|
||||
io::Error::new(io::ErrorKind::Other, message.to_string())
|
||||
}
|
||||
|
||||
@ -48,11 +49,13 @@ impl SinkError for io::Error {
|
||||
}
|
||||
}
|
||||
|
||||
/// A `Box<std::error::Error>` can be used as an error for `Sink`
|
||||
/// A `Box<dyn std::error::Error>` can be used as an error for `Sink`
|
||||
/// implementations out of the box.
|
||||
impl SinkError for Box<dyn error::Error> {
|
||||
fn error_message<T: fmt::Display>(message: T) -> Box<dyn error::Error> {
|
||||
Box::<dyn error::Error>::from(message.to_string())
|
||||
impl SinkError for Box<dyn std::error::Error> {
|
||||
fn error_message<T: std::fmt::Display>(
|
||||
message: T,
|
||||
) -> Box<dyn std::error::Error> {
|
||||
Box::<dyn std::error::Error>::from(message.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
@ -74,7 +77,7 @@ impl SinkError for Box<dyn error::Error> {
|
||||
///
|
||||
/// * What to do when a match is found. Callers must provide this.
|
||||
/// * What to do when an error occurs. Callers must provide this via the
|
||||
/// [`SinkError`] trait. Generally, callers can just use `io::Error` for
|
||||
/// [`SinkError`] trait. Generally, callers can just use `std::io::Error` for
|
||||
/// this, which already implements `SinkError`.
|
||||
/// * What to do when a contextual line is found. By default, these are
|
||||
/// ignored.
|
||||
@ -408,13 +411,14 @@ impl<'b> SinkMatch<'b> {
|
||||
self.line_number
|
||||
}
|
||||
|
||||
/// TODO
|
||||
/// Exposes as much of the underlying buffer that was search as possible.
|
||||
#[inline]
|
||||
pub fn buffer(&self) -> &'b [u8] {
|
||||
self.buffer
|
||||
}
|
||||
|
||||
/// TODO
|
||||
/// Returns a range that corresponds to where [`SinkMatch::bytes`] appears
|
||||
/// in [`SinkMatch::buffer`].
|
||||
#[inline]
|
||||
pub fn bytes_range_in_buffer(&self) -> std::ops::Range<usize> {
|
||||
self.bytes_range_in_buffer.clone()
|
||||
@ -506,16 +510,16 @@ impl<'b> SinkContext<'b> {
|
||||
/// an error is reported at the first match and searching stops.
|
||||
/// * Context lines, context breaks and summary data reported at the end of
|
||||
/// a search are all ignored.
|
||||
/// * Implementors are forced to use `io::Error` as their error type.
|
||||
/// * Implementors are forced to use `std::io::Error` as their error type.
|
||||
///
|
||||
/// If you need more flexibility, then you're advised to implement the `Sink`
|
||||
/// trait directly.
|
||||
pub mod sinks {
|
||||
use std::io;
|
||||
use std::str;
|
||||
|
||||
use crate::searcher::Searcher;
|
||||
|
||||
use super::{Sink, SinkError, SinkMatch};
|
||||
use crate::searcher::Searcher;
|
||||
|
||||
/// A sink that provides line numbers and matches as strings while ignoring
|
||||
/// everything else.
|
||||
@ -527,8 +531,8 @@ pub mod sinks {
|
||||
///
|
||||
/// The closure accepts two parameters: a line number and a UTF-8 string
|
||||
/// containing the matched data. The closure returns a
|
||||
/// `Result<bool, io::Error>`. If the `bool` is `false`, then the search
|
||||
/// stops immediately. Otherwise, searching continues.
|
||||
/// `Result<bool, std::io::Error>`. If the `bool` is `false`, then the
|
||||
/// search stops immediately. Otherwise, searching continues.
|
||||
///
|
||||
/// If multi line mode was enabled, the line number refers to the line
|
||||
/// number of the first line in the match.
|
||||
@ -548,7 +552,7 @@ pub mod sinks {
|
||||
_searcher: &Searcher,
|
||||
mat: &SinkMatch<'_>,
|
||||
) -> Result<bool, io::Error> {
|
||||
let matched = match str::from_utf8(mat.bytes()) {
|
||||
let matched = match std::str::from_utf8(mat.bytes()) {
|
||||
Ok(matched) => matched,
|
||||
Err(err) => return Err(io::Error::error_message(err)),
|
||||
};
|
||||
@ -575,8 +579,8 @@ pub mod sinks {
|
||||
///
|
||||
/// The closure accepts two parameters: a line number and a UTF-8 string
|
||||
/// containing the matched data. The closure returns a
|
||||
/// `Result<bool, io::Error>`. If the `bool` is `false`, then the search
|
||||
/// stops immediately. Otherwise, searching continues.
|
||||
/// `Result<bool, std::io::Error>`. If the `bool` is `false`, then the
|
||||
/// search stops immediately. Otherwise, searching continues.
|
||||
///
|
||||
/// If multi line mode was enabled, the line number refers to the line
|
||||
/// number of the first line in the match.
|
||||
@ -598,7 +602,7 @@ pub mod sinks {
|
||||
) -> Result<bool, io::Error> {
|
||||
use std::borrow::Cow;
|
||||
|
||||
let matched = match str::from_utf8(mat.bytes()) {
|
||||
let matched = match std::str::from_utf8(mat.bytes()) {
|
||||
Ok(matched) => Cow::Borrowed(matched),
|
||||
// TODO: In theory, it should be possible to amortize
|
||||
// allocation here, but `std` doesn't provide such an API.
|
||||
@ -624,9 +628,9 @@ pub mod sinks {
|
||||
/// searcher was not configured to count lines.
|
||||
///
|
||||
/// The closure accepts two parameters: a line number and a raw byte string
|
||||
/// containing the matched data. The closure returns a `Result<bool,
|
||||
/// io::Error>`. If the `bool` is `false`, then the search stops
|
||||
/// immediately. Otherwise, searching continues.
|
||||
/// containing the matched data. The closure returns a
|
||||
/// `Result<bool, std::io::Error>`. If the `bool` is `false`, then the
|
||||
/// search stops immediately. Otherwise, searching continues.
|
||||
///
|
||||
/// If multi line mode was enabled, the line number refers to the line
|
||||
/// number of the first line in the match.
|
||||
|
@ -1,14 +1,17 @@
|
||||
use std::io::{self, Write};
|
||||
use std::str;
|
||||
|
||||
use bstr::ByteSlice;
|
||||
use grep_matcher::{
|
||||
LineMatchKind, LineTerminator, Match, Matcher, NoCaptures, NoError,
|
||||
use {
|
||||
bstr::ByteSlice,
|
||||
grep_matcher::{
|
||||
LineMatchKind, LineTerminator, Match, Matcher, NoCaptures, NoError,
|
||||
},
|
||||
regex::bytes::{Regex, RegexBuilder},
|
||||
};
|
||||
use regex::bytes::{Regex, RegexBuilder};
|
||||
|
||||
use crate::searcher::{BinaryDetection, Searcher, SearcherBuilder};
|
||||
use crate::sink::{Sink, SinkContext, SinkFinish, SinkMatch};
|
||||
use crate::{
|
||||
searcher::{BinaryDetection, Searcher, SearcherBuilder},
|
||||
sink::{Sink, SinkContext, SinkFinish, SinkMatch},
|
||||
};
|
||||
|
||||
/// A simple regex matcher.
|
||||
///
|
||||
@ -18,7 +21,7 @@ use crate::sink::{Sink, SinkContext, SinkFinish, SinkMatch};
|
||||
/// this optimization is detected automatically by inspecting and possibly
|
||||
/// modifying the regex itself.)
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct RegexMatcher {
|
||||
pub(crate) struct RegexMatcher {
|
||||
regex: Regex,
|
||||
line_term: Option<LineTerminator>,
|
||||
every_line_is_candidate: bool,
|
||||
@ -26,22 +29,18 @@ pub struct RegexMatcher {
|
||||
|
||||
impl RegexMatcher {
|
||||
/// Create a new regex matcher.
|
||||
pub fn new(pattern: &str) -> RegexMatcher {
|
||||
pub(crate) fn new(pattern: &str) -> RegexMatcher {
|
||||
let regex = RegexBuilder::new(pattern)
|
||||
.multi_line(true) // permits ^ and $ to match at \n boundaries
|
||||
.build()
|
||||
.unwrap();
|
||||
RegexMatcher {
|
||||
regex: regex,
|
||||
line_term: None,
|
||||
every_line_is_candidate: false,
|
||||
}
|
||||
RegexMatcher { regex, line_term: None, every_line_is_candidate: false }
|
||||
}
|
||||
|
||||
/// Forcefully set the line terminator of this matcher.
|
||||
///
|
||||
/// By default, this matcher has no line terminator set.
|
||||
pub fn set_line_term(
|
||||
pub(crate) fn set_line_term(
|
||||
&mut self,
|
||||
line_term: Option<LineTerminator>,
|
||||
) -> &mut RegexMatcher {
|
||||
@ -52,7 +51,10 @@ impl RegexMatcher {
|
||||
/// Whether to return every line as a candidate or not.
|
||||
///
|
||||
/// This forces searchers to handle the case of reporting a false positive.
|
||||
pub fn every_line_is_candidate(&mut self, yes: bool) -> &mut RegexMatcher {
|
||||
pub(crate) fn every_line_is_candidate(
|
||||
&mut self,
|
||||
yes: bool,
|
||||
) -> &mut RegexMatcher {
|
||||
self.every_line_is_candidate = yes;
|
||||
self
|
||||
}
|
||||
@ -108,17 +110,17 @@ impl Matcher for RegexMatcher {
|
||||
/// This is useful for tests because it lets us easily confirm whether data
|
||||
/// is being passed to Sink correctly.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct KitchenSink(Vec<u8>);
|
||||
pub(crate) struct KitchenSink(Vec<u8>);
|
||||
|
||||
impl KitchenSink {
|
||||
/// Create a new implementation of Sink that includes everything in the
|
||||
/// kitchen.
|
||||
pub fn new() -> KitchenSink {
|
||||
pub(crate) fn new() -> KitchenSink {
|
||||
KitchenSink(vec![])
|
||||
}
|
||||
|
||||
/// Return the data written to this sink.
|
||||
pub fn as_bytes(&self) -> &[u8] {
|
||||
pub(crate) fn as_bytes(&self) -> &[u8] {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
@ -199,7 +201,7 @@ impl Sink for KitchenSink {
|
||||
/// The tester works by assuming you want to test all pertinent code paths.
|
||||
/// These can be trimmed down as necessary via the various builder methods.
|
||||
#[derive(Debug)]
|
||||
pub struct SearcherTester {
|
||||
pub(crate) struct SearcherTester {
|
||||
haystack: String,
|
||||
pattern: String,
|
||||
filter: Option<::regex::Regex>,
|
||||
@ -221,7 +223,7 @@ pub struct SearcherTester {
|
||||
|
||||
impl SearcherTester {
|
||||
/// Create a new tester for testing searchers.
|
||||
pub fn new(haystack: &str, pattern: &str) -> SearcherTester {
|
||||
pub(crate) fn new(haystack: &str, pattern: &str) -> SearcherTester {
|
||||
SearcherTester {
|
||||
haystack: haystack.to_string(),
|
||||
pattern: pattern.to_string(),
|
||||
@ -245,7 +247,7 @@ impl SearcherTester {
|
||||
|
||||
/// Execute the test. If the test succeeds, then this returns successfully.
|
||||
/// If the test fails, then it panics with an informative message.
|
||||
pub fn test(&self) {
|
||||
pub(crate) fn test(&self) {
|
||||
// Check for configuration errors.
|
||||
if self.expected_no_line_number.is_none() {
|
||||
panic!("an 'expected' string with NO line numbers must be given");
|
||||
@ -300,7 +302,7 @@ impl SearcherTester {
|
||||
/// printf debugging and only want one particular test configuration to
|
||||
/// execute.
|
||||
#[allow(dead_code)]
|
||||
pub fn filter(&mut self, pattern: &str) -> &mut SearcherTester {
|
||||
pub(crate) fn filter(&mut self, pattern: &str) -> &mut SearcherTester {
|
||||
self.filter = Some(::regex::Regex::new(pattern).unwrap());
|
||||
self
|
||||
}
|
||||
@ -311,13 +313,13 @@ impl SearcherTester {
|
||||
/// Note that in order to see these in tests that aren't failing, you'll
|
||||
/// want to use `cargo test -- --nocapture`.
|
||||
#[allow(dead_code)]
|
||||
pub fn print_labels(&mut self, yes: bool) -> &mut SearcherTester {
|
||||
pub(crate) fn print_labels(&mut self, yes: bool) -> &mut SearcherTester {
|
||||
self.print_labels = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the expected search results, without line numbers.
|
||||
pub fn expected_no_line_number(
|
||||
pub(crate) fn expected_no_line_number(
|
||||
&mut self,
|
||||
exp: &str,
|
||||
) -> &mut SearcherTester {
|
||||
@ -326,7 +328,7 @@ impl SearcherTester {
|
||||
}
|
||||
|
||||
/// Set the expected search results, with line numbers.
|
||||
pub fn expected_with_line_number(
|
||||
pub(crate) fn expected_with_line_number(
|
||||
&mut self,
|
||||
exp: &str,
|
||||
) -> &mut SearcherTester {
|
||||
@ -337,7 +339,7 @@ impl SearcherTester {
|
||||
/// Set the expected search results, without line numbers, when performing
|
||||
/// a search on a slice. When not present, `expected_no_line_number` is
|
||||
/// used instead.
|
||||
pub fn expected_slice_no_line_number(
|
||||
pub(crate) fn expected_slice_no_line_number(
|
||||
&mut self,
|
||||
exp: &str,
|
||||
) -> &mut SearcherTester {
|
||||
@ -349,7 +351,7 @@ impl SearcherTester {
|
||||
/// search on a slice. When not present, `expected_with_line_number` is
|
||||
/// used instead.
|
||||
#[allow(dead_code)]
|
||||
pub fn expected_slice_with_line_number(
|
||||
pub(crate) fn expected_slice_with_line_number(
|
||||
&mut self,
|
||||
exp: &str,
|
||||
) -> &mut SearcherTester {
|
||||
@ -362,7 +364,7 @@ impl SearcherTester {
|
||||
/// This is enabled by default. When enabled, the string that is expected
|
||||
/// when line numbers are present must be provided. Otherwise, the expected
|
||||
/// string isn't required.
|
||||
pub fn line_number(&mut self, yes: bool) -> &mut SearcherTester {
|
||||
pub(crate) fn line_number(&mut self, yes: bool) -> &mut SearcherTester {
|
||||
self.line_number = yes;
|
||||
self
|
||||
}
|
||||
@ -370,7 +372,7 @@ impl SearcherTester {
|
||||
/// Whether to test search using the line-by-line searcher or not.
|
||||
///
|
||||
/// By default, this is enabled.
|
||||
pub fn by_line(&mut self, yes: bool) -> &mut SearcherTester {
|
||||
pub(crate) fn by_line(&mut self, yes: bool) -> &mut SearcherTester {
|
||||
self.by_line = yes;
|
||||
self
|
||||
}
|
||||
@ -379,7 +381,7 @@ impl SearcherTester {
|
||||
///
|
||||
/// By default, this is enabled.
|
||||
#[allow(dead_code)]
|
||||
pub fn multi_line(&mut self, yes: bool) -> &mut SearcherTester {
|
||||
pub(crate) fn multi_line(&mut self, yes: bool) -> &mut SearcherTester {
|
||||
self.multi_line = yes;
|
||||
self
|
||||
}
|
||||
@ -387,7 +389,7 @@ impl SearcherTester {
|
||||
/// Whether to perform an inverted search or not.
|
||||
///
|
||||
/// By default, this is disabled.
|
||||
pub fn invert_match(&mut self, yes: bool) -> &mut SearcherTester {
|
||||
pub(crate) fn invert_match(&mut self, yes: bool) -> &mut SearcherTester {
|
||||
self.invert_match = yes;
|
||||
self
|
||||
}
|
||||
@ -395,7 +397,7 @@ impl SearcherTester {
|
||||
/// Whether to enable binary detection on all searches.
|
||||
///
|
||||
/// By default, this is disabled.
|
||||
pub fn binary_detection(
|
||||
pub(crate) fn binary_detection(
|
||||
&mut self,
|
||||
detection: BinaryDetection,
|
||||
) -> &mut SearcherTester {
|
||||
@ -412,7 +414,10 @@ impl SearcherTester {
|
||||
/// impact the number of bytes searched when performing binary detection.
|
||||
/// For convenience, it can be useful to disable the automatic heap limit
|
||||
/// test.
|
||||
pub fn auto_heap_limit(&mut self, yes: bool) -> &mut SearcherTester {
|
||||
pub(crate) fn auto_heap_limit(
|
||||
&mut self,
|
||||
yes: bool,
|
||||
) -> &mut SearcherTester {
|
||||
self.auto_heap_limit = yes;
|
||||
self
|
||||
}
|
||||
@ -420,7 +425,10 @@ impl SearcherTester {
|
||||
/// Set the number of lines to include in the "after" context.
|
||||
///
|
||||
/// The default is `0`, which is equivalent to not printing any context.
|
||||
pub fn after_context(&mut self, lines: usize) -> &mut SearcherTester {
|
||||
pub(crate) fn after_context(
|
||||
&mut self,
|
||||
lines: usize,
|
||||
) -> &mut SearcherTester {
|
||||
self.after_context = lines;
|
||||
self
|
||||
}
|
||||
@ -428,7 +436,10 @@ impl SearcherTester {
|
||||
/// Set the number of lines to include in the "before" context.
|
||||
///
|
||||
/// The default is `0`, which is equivalent to not printing any context.
|
||||
pub fn before_context(&mut self, lines: usize) -> &mut SearcherTester {
|
||||
pub(crate) fn before_context(
|
||||
&mut self,
|
||||
lines: usize,
|
||||
) -> &mut SearcherTester {
|
||||
self.before_context = lines;
|
||||
self
|
||||
}
|
||||
@ -440,7 +451,7 @@ impl SearcherTester {
|
||||
/// requesting an unbounded number of before and after contextual lines.
|
||||
///
|
||||
/// This is disabled by default.
|
||||
pub fn passthru(&mut self, yes: bool) -> &mut SearcherTester {
|
||||
pub(crate) fn passthru(&mut self, yes: bool) -> &mut SearcherTester {
|
||||
self.passthru = yes;
|
||||
self
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user