ripgrep/grep/src/search.rs

use std::cmp;
use std::io;

use memchr::{memchr, memrchr};
use regex::bytes::{Regex, RegexBuilder};
use syntax;

use literals::LiteralSets;
use nonl;
use {Error, Result};

#[derive(Clone, Debug, Default, Eq, PartialEq)]
pub struct Match {
    start: usize,
    end: usize,
    line: Option<usize>,
    locations: Vec<(usize, usize)>,
}

impl Match {
    pub fn new() -> Match {
        Match::default()
    }

    /// Return the starting byte offset of the line that matched.
    #[inline]
    pub fn start(&self) -> usize {
        self.start
    }

    /// Return the ending byte offset of the line that matched.
    #[inline]
    pub fn end(&self) -> usize {
        self.end
    }

    /// Return the line number that this match corresponds to.
    ///
    /// Note that this is `None` if line numbers aren't being computed. Line
    /// number tracking can be enabled using `GrepBuilder`.
    #[inline]
    pub fn line(&self) -> Option<usize> {
        self.line
    }

    /// Return the exact start and end locations (in byte offsets) of every
    /// regex match in this line.
    ///
    /// Note that this always returns an empty slice if exact locations aren't
    /// computed. Exact location tracking can be enabled using `GrepBuilder`.
    #[inline]
    pub fn locations(&self) -> &[(usize, usize)] {
        &self.locations
    }
}

#[derive(Clone, Debug)]
pub struct Grep {
    re: Regex,
    required: Option<Regex>,
    opts: Options,
}

#[derive(Clone, Debug)]
pub struct GrepBuilder {
    pattern: String,
    opts: Options,
}

#[derive(Clone, Debug)]
struct Options {
    case_insensitive: bool,
    lines: bool,
    locations: bool,
    line_terminator: u8,
    size_limit: usize,
    dfa_size_limit: usize,
}

impl Default for Options {
    fn default() -> Options {
        Options {
            case_insensitive: false,
            lines: false,
            locations: false,
            line_terminator: b'\n',
            size_limit: 10 * (1 << 20),
            dfa_size_limit: 10 * (1 << 20),
        }
    }
}

impl GrepBuilder {
    /// Create a new builder for line searching.
    ///
    /// The pattern given should be a regular expression. The precise syntax
    /// supported is documented on the regex crate.
    pub fn new(pattern: &str) -> GrepBuilder {
        GrepBuilder {
            pattern: pattern.to_string(),
            opts: Options::default(),
        }
    }

    /// Sets whether line numbers are reported for each match.
    ///
    /// When enabled (disabled by default), every matching line is tagged with
    /// its corresponding line number according to the line terminator that is
    /// set. Note that this requires extra processing which can slow down
    /// search.
    pub fn line_numbers(mut self, yes: bool) -> GrepBuilder {
        self.opts.lines = yes;
        self
    }

    /// Set whether precise match locations are reported for each matching
    /// line.
    ///
    /// When enabled (disabled by default), every match of the regex on each
    /// matchling line is reported via byte offsets. Note that this requires
    /// extra processing which can slow down search.
    pub fn locations(mut self, yes: bool) -> GrepBuilder {
        self.opts.locations = yes;
        self
    }

    /// Set the line terminator.
    ///
    /// The line terminator can be any ASCII character and serves to delineate
    /// the match boundaries in the text searched.
    ///
    /// This panics if `ascii_byte` is greater than `0x7F` (i.e., not ASCII).
    pub fn line_terminator(mut self, ascii_byte: u8) -> GrepBuilder {
        assert!(ascii_byte <= 0x7F);
        self.opts.line_terminator = ascii_byte;
        self
    }

    /// Set the case sensitive flag (`i`) on the regex.
    pub fn case_insensitive(mut self, yes: bool) -> GrepBuilder {
        self.opts.case_insensitive = yes;
        self
    }

    /// Set the approximate size limit of the compiled regular expression.
    ///
    /// This roughly corresponds to the number of bytes occupied by a
    /// single compiled program. If the program exceeds this number, then a
    /// compilation error is returned.
    pub fn size_limit(mut self, limit: usize) -> GrepBuilder {
        self.opts.size_limit = limit;
        self
    }

    /// Set the approximate size of the cache used by the DFA.
    ///
    /// This roughly corresponds to the number of bytes that the DFA will use
    /// while searching.
    ///
    /// Note that this is a per thread limit. There is no way to set a global
    /// limit. In particular, if a regex is used from multiple threads
    /// simulanteously, then each thread may use up to the number of bytes
    /// specified here.
    pub fn dfa_size_limit(mut self, limit: usize) -> GrepBuilder {
        self.opts.dfa_size_limit = limit;
        self
    }

    /// Create a line searcher.
    ///
    /// If there was a problem parsing or compiling the regex with the given
    /// options, then an error is returned.
    pub fn create(self) -> Result<Grep> {
        let expr = try!(self.parse());
        let literals = LiteralSets::create(&expr);
        let re = try!(
            RegexBuilder::new(&expr.to_string())
                .case_insensitive(self.opts.case_insensitive)
                .multi_line(true)
                .unicode(true)
                .size_limit(self.opts.size_limit)
                .dfa_size_limit(self.opts.dfa_size_limit)
                .compile()
        );
        Ok(Grep {
            re: re,
            required: literals.to_regex(),
            opts: self.opts,
        })
    }

    /// Parses the underlying pattern and ensures the pattern can never match
    /// the line terminator.
    fn parse(&self) -> Result<syntax::Expr> {
        let expr =
            try!(syntax::ExprBuilder::new()
                 .allow_bytes(true)
                 .unicode(true)
                 .parse(&self.pattern));
        Ok(try!(nonl::remove(expr, self.opts.line_terminator)))
    }
}

impl Grep {
    pub fn iter<'b, 's>(&'s self, buf: &'b [u8]) -> Iter<'b, 's> {
        Iter {
            searcher: self,
            buf: buf,
            start: 0,
        }
    }

    pub fn buffered_reader<'g, R: io::Read>(
        &'g self,
        buf: Buffer,
        rdr: R,
    ) -> GrepBuffered<'g, R> {
        GrepBuffered {
            grep: self,
            rdr: rdr,
            b: buf,
            pos: 0,
            start: 0,
            lastnl: 0,
            end: 0,
        }
    }

    pub fn read_match(
        &self,
        mat: &mut Match,
        buf: &[u8],
        mut start: usize,
    ) -> bool {
        if start >= buf.len() {
            return false;
        }
        if let Some(ref req) = self.required {
            while start < buf.len() {
                let e = match req.shortest_match(&buf[start..]) {
                    None => return false,
                    Some(e) => start + e,
                };
                let (prevnl, nextnl) = self.find_line(buf, e, e);
                match self.re.shortest_match(&buf[prevnl..nextnl]) {
                    None => {
                        start = nextnl + 1;
                        continue;
                    }
                    Some(_) => {
                        self.fill_match(mat, prevnl, nextnl);
                        return true;
                    }
                }
            }
            false
        } else {
            let e = match self.re.shortest_match(&buf[start..]) {
                None => return false,
                Some(e) => start + e,
            };
            let (s, e) = self.find_line(buf, e, e);
            self.fill_match(mat, s, e);
            true
        }
    }

    fn fill_match(&self, mat: &mut Match, start: usize, end: usize) {
        mat.start = start;
        mat.end = end;
    }

    fn find_line(&self, buf: &[u8], s: usize, e: usize) -> (usize, usize) {
        (self.find_line_start(buf, s), self.find_line_end(buf, e))
    }

    fn find_line_start(&self, buf: &[u8], pos: usize) -> usize {
        memrchr(self.opts.line_terminator, &buf[0..pos]).map_or(0, |i| i + 1)
    }

    fn find_line_end(&self, buf: &[u8], pos: usize) -> usize {
        memchr(self.opts.line_terminator, &buf[pos..])
            .map_or(buf.len(), |i| pos + i)
    }
}

pub struct Buffer {
    buf: Vec<u8>,
    tmp: Vec<u8>,
}

impl Buffer {
    pub fn new() -> Buffer {
        Buffer::with_capacity(16 * (1<<10))
    }

    pub fn with_capacity(cap: usize) -> Buffer {
        Buffer {
            buf: vec![0; cap],
            tmp: Vec::new(),
        }
    }
}

pub struct GrepBuffered<'g, R> {
    grep: &'g Grep,
    rdr: R,
    b: Buffer,
    pos: usize,
    start: usize,
    lastnl: usize,
    end: usize,
}

impl<'g, R: io::Read> GrepBuffered<'g, R> {
    pub fn into_buffer(self) -> Buffer {
        self.b
    }

    pub fn iter<'b>(&'b mut self) -> IterBuffered<'b, 'g, R> {
        IterBuffered { grep: self }
    }

    pub fn read_match(
        &mut self,
        mat: &mut Match,
    ) -> Result<bool> {
        loop {
            // If the starting position is equal to the end of the last search,
            // then it's time to refill the buffer for more searching.
            if self.start == self.lastnl {
                if !try!(self.fill()) {
                    return Ok(false);
                }
            }
            let ok = self.grep.read_match(
                mat, &self.b.buf[..self.lastnl], self.start);
            if !ok {
                // This causes the next iteration to refill the buffer with
                // more bytes to search.
                self.start = self.lastnl;
                continue;
            }
            // Move start to the first possible byte of the next line.
            self.start = cmp::min(
                self.lastnl, mat.end.checked_add(1).unwrap());
            mat.start += self.pos;
            mat.end += self.pos;
            return Ok(true);
        }
    }

    fn fill(&mut self) -> Result<bool> {
        {
            // The buffer might have leftover bytes that have not been
            // searched yet. Leftovers correspond to all bytes proceding the
            // final \n in the current buffer.
            //
            // TODO(ag): Seems like we should be able to memmove from the end
            // of the buffer to the beginning, but let's do it the stupid (but
            // safe) way for now.
            let leftovers = &self.b.buf[self.lastnl..self.end];
            self.b.tmp.clear();
            self.b.tmp.resize(leftovers.len(), 0);
            self.b.tmp.copy_from_slice(leftovers);
        }
        // Move the leftovers to the beginning of our buffer.
        self.b.buf[0..self.b.tmp.len()].copy_from_slice(&self.b.tmp);
        // Fill the rest with fresh bytes.
        let nread = try!(self.rdr.read(&mut self.b.buf[self.b.tmp.len()..]));
        // Now update our position in all of the bytes searched.
        self.pos += self.start;
        self.start = 0;
        // The end is the total number of bytes read plus whatever we had for
        // leftovers.
        self.end = self.b.tmp.len() + nread;
        // Find the last new line. All searches on this buffer will be capped
        // at this position since any proceding bytes may correspond to a
        // partial line.
        //
        // This is a little complicated because we must handle the case where
        // the buffer is not full and no new line character could be found.
        // We detect this case because this could potentially be a partial
        // line. If we fill our buffer and still can't find a `\n`, then we
        // give up.
        let mut start = 0;
        let term = self.grep.opts.line_terminator;
        loop {
            match memrchr(term, &self.b.buf[start..self.end]) {
                Some(i) => {
                    self.lastnl = start + i + 1;
                    break;
                }
                None => {
                    // If we couldn't find a new line and our buffer is
                    // completely full, then this line is terribly long and we
                    // return an error.
                    if self.end == self.b.buf.len() {
                        return Err(Error::LineTooLong(self.b.buf.len()));
                    }
                    // Otherwise we try to ask for more bytes and look again.
                    let nread = try!(
                        self.rdr.read(&mut self.b.buf[self.end..]));
                    // If we got nothing then we're at EOF and we no longer
                    // need to care about leftovers.
                    if nread == 0 {
                        self.lastnl = self.end;
                        break;
                    }
                    start = self.end;
                    self.end += nread;
                }
            }
        }
        // If end is zero, then we've hit EOF and we have no leftovers.
        Ok(self.end > 0)
    }
}

pub struct Iter<'b, 's> {
    searcher: &'s Grep,
    buf: &'b [u8],
    start: usize,
}

impl<'b, 's> Iterator for Iter<'b, 's> {
    type Item = Match;

    fn next(&mut self) -> Option<Match> {
        let mut mat = Match::default();
        if !self.searcher.read_match(&mut mat, self.buf, self.start) {
            self.start = self.buf.len();
            return None;
        }
        self.start = mat.end + 1;
        Some(mat)
    }
}

pub struct IterBuffered<'b, 'g: 'b, R: 'b> {
    grep: &'b mut GrepBuffered<'g, R>,
}

impl<'b, 'g, R: io::Read> Iterator for IterBuffered<'b, 'g, R> {
    type Item = Result<Match>;

    fn next(&mut self) -> Option<Result<Match>> {
        let mut mat = Match::default();
        match self.grep.read_match(&mut mat) {
            Err(err) => Some(Err(err)),
            Ok(false) => None,
            Ok(true) => Some(Ok(mat)),
        }
    }
}

#[allow(dead_code)]
fn s(bytes: &[u8]) -> String {
    String::from_utf8(bytes.to_vec()).unwrap()
}

#[cfg(test)]
mod tests {
    #![allow(unused_imports)]

    use memchr::{memchr, memrchr};
    use regex::bytes::Regex;

    use super::{Buffer, GrepBuilder, s};

    static SHERLOCK: &'static [u8] = include_bytes!("./data/sherlock.txt");

    fn find_lines(pat: &str, haystack: &[u8]) -> Vec<(usize, usize)> {
        let re = Regex::new(pat).unwrap();
        let mut lines = vec![];
        for (s, e) in re.find_iter(haystack) {
            let start = memrchr(b'\n', &haystack[..s])
                        .map_or(0, |i| i + 1);
            let end = memchr(b'\n', &haystack[e..])
                      .map_or(haystack.len(), |i| e + i);
            lines.push((start, end));
        }
        lines
    }

    fn grep_lines(pat: &str, haystack: &[u8]) -> Vec<(usize, usize)> {
        let g = GrepBuilder::new(pat).create().unwrap();
        let mut bg = g.buffered_reader(Buffer::new(), haystack);
        bg.iter().map(|r| r.unwrap()).map(|m| (m.start(), m.end())).collect()
    }

    #[test]
    fn buffered_literal() {
        let expected = find_lines("Sherlock Holmes", SHERLOCK);
        let got = grep_lines("Sherlock Holmes", SHERLOCK);
        assert_eq!(expected.len(), got.len());
        assert_eq!(expected, got);
    }
}
progress 2016-06-22 21:19:02 -04:00			`use std::cmp;`
refactor progress 2016-06-20 16:53:48 -04:00			`use std::io;`

			`use memchr::{memchr, memrchr};`
			`use regex::bytes::{Regex, RegexBuilder};`
			`use syntax;`

			`use literals::LiteralSets;`
			`use nonl;`
progress 2016-06-22 21:19:02 -04:00			`use {Error, Result};`

			`#[derive(Clone, Debug, Default, Eq, PartialEq)]`
			`pub struct Match {`
			`start: usize,`
			`end: usize,`
			`line: Option<usize>,`
			`locations: Vec<(usize, usize)>,`
			`}`

			`impl Match {`
			`pub fn new() -> Match {`
			`Match::default()`
			`}`

			`/// Return the starting byte offset of the line that matched.`
			`#[inline]`
			`pub fn start(&self) -> usize {`
			`self.start`
			`}`

			`/// Return the ending byte offset of the line that matched.`
			`#[inline]`
			`pub fn end(&self) -> usize {`
			`self.end`
			`}`

			`/// Return the line number that this match corresponds to.`
			`///`
			/// Note that this is `None` if line numbers aren't being computed. Line
			/// number tracking can be enabled using `GrepBuilder`.
			`#[inline]`
			`pub fn line(&self) -> Option<usize> {`
			`self.line`
			`}`

			`/// Return the exact start and end locations (in byte offsets) of every`
			`/// regex match in this line.`
			`///`
			`/// Note that this always returns an empty slice if exact locations aren't`
			/// computed. Exact location tracking can be enabled using `GrepBuilder`.
			`#[inline]`
			`pub fn locations(&self) -> &[(usize, usize)] {`
			`&self.locations`
			`}`
			`}`
refactor progress 2016-06-20 16:53:48 -04:00
			`#[derive(Clone, Debug)]`
			`pub struct Grep {`
			`re: Regex,`
			`required: Option<Regex>,`
			`opts: Options,`
			`}`

			`#[derive(Clone, Debug)]`
			`pub struct GrepBuilder {`
			`pattern: String,`
			`opts: Options,`
			`}`

			`#[derive(Clone, Debug)]`
			`struct Options {`
			`case_insensitive: bool,`
			`lines: bool,`
			`locations: bool,`
			`line_terminator: u8,`
			`size_limit: usize,`
			`dfa_size_limit: usize,`
			`}`

			`impl Default for Options {`
			`fn default() -> Options {`
			`Options {`
			`case_insensitive: false,`
			`lines: false,`
			`locations: false,`
			`line_terminator: b'\n',`
			`size_limit: 10 * (1 << 20),`
			`dfa_size_limit: 10 * (1 << 20),`
			`}`
			`}`
			`}`

			`impl GrepBuilder {`
			`/// Create a new builder for line searching.`
			`///`
			`/// The pattern given should be a regular expression. The precise syntax`
			`/// supported is documented on the regex crate.`
			`pub fn new(pattern: &str) -> GrepBuilder {`
			`GrepBuilder {`
			`pattern: pattern.to_string(),`
			`opts: Options::default(),`
			`}`
			`}`

			`/// Sets whether line numbers are reported for each match.`
			`///`
			`/// When enabled (disabled by default), every matching line is tagged with`
Refactor buffered test. 2016-08-08 19:17:25 -04:00			`/// its corresponding line number according to the line terminator that is`
refactor progress 2016-06-20 16:53:48 -04:00			`/// set. Note that this requires extra processing which can slow down`
			`/// search.`
			`pub fn line_numbers(mut self, yes: bool) -> GrepBuilder {`
			`self.opts.lines = yes;`
			`self`
			`}`

			`/// Set whether precise match locations are reported for each matching`
			`/// line.`
			`///`
			`/// When enabled (disabled by default), every match of the regex on each`
			`/// matchling line is reported via byte offsets. Note that this requires`
			`/// extra processing which can slow down search.`
			`pub fn locations(mut self, yes: bool) -> GrepBuilder {`
			`self.opts.locations = yes;`
			`self`
			`}`

			`/// Set the line terminator.`
			`///`
			`/// The line terminator can be any ASCII character and serves to delineate`
			`/// the match boundaries in the text searched.`
			`///`
			/// This panics if `ascii_byte` is greater than `0x7F` (i.e., not ASCII).
			`pub fn line_terminator(mut self, ascii_byte: u8) -> GrepBuilder {`
			`assert!(ascii_byte <= 0x7F);`
			`self.opts.line_terminator = ascii_byte;`
			`self`
			`}`

			/// Set the case sensitive flag (`i`) on the regex.
			`pub fn case_insensitive(mut self, yes: bool) -> GrepBuilder {`
			`self.opts.case_insensitive = yes;`
			`self`
			`}`

			`/// Set the approximate size limit of the compiled regular expression.`
			`///`
			`/// This roughly corresponds to the number of bytes occupied by a`
			`/// single compiled program. If the program exceeds this number, then a`
			`/// compilation error is returned.`
			`pub fn size_limit(mut self, limit: usize) -> GrepBuilder {`
			`self.opts.size_limit = limit;`
			`self`
			`}`

			`/// Set the approximate size of the cache used by the DFA.`
			`///`
			`/// This roughly corresponds to the number of bytes that the DFA will use`
			`/// while searching.`
			`///`
			`/// Note that this is a per thread limit. There is no way to set a global`
			`/// limit. In particular, if a regex is used from multiple threads`
			`/// simulanteously, then each thread may use up to the number of bytes`
			`/// specified here.`
			`pub fn dfa_size_limit(mut self, limit: usize) -> GrepBuilder {`
			`self.opts.dfa_size_limit = limit;`
			`self`
			`}`

			`/// Create a line searcher.`
			`///`
			`/// If there was a problem parsing or compiling the regex with the given`
			`/// options, then an error is returned.`
			`pub fn create(self) -> Result<Grep> {`
			`let expr = try!(self.parse());`
			`let literals = LiteralSets::create(&expr);`
			`let re = try!(`
			`RegexBuilder::new(&expr.to_string())`
			`.case_insensitive(self.opts.case_insensitive)`
			`.multi_line(true)`
			`.unicode(true)`
			`.size_limit(self.opts.size_limit)`
			`.dfa_size_limit(self.opts.dfa_size_limit)`
			`.compile()`
			`);`
			`Ok(Grep {`
			`re: re,`
			`required: literals.to_regex(),`
			`opts: self.opts,`
			`})`
			`}`

			`/// Parses the underlying pattern and ensures the pattern can never match`
			`/// the line terminator.`
			`fn parse(&self) -> Result<syntax::Expr> {`
			`let expr =`
			`try!(syntax::ExprBuilder::new()`
			`.allow_bytes(true)`
			`.unicode(true)`
			`.parse(&self.pattern));`
			`Ok(try!(nonl::remove(expr, self.opts.line_terminator)))`
			`}`
			`}`

			`impl Grep {`
			`pub fn iter<'b, 's>(&'s self, buf: &'b [u8]) -> Iter<'b, 's> {`
			`Iter {`
			`searcher: self,`
			`buf: buf,`
			`start: 0,`
			`}`
			`}`

progress 2016-06-22 21:19:02 -04:00			`pub fn buffered_reader<'g, R: io::Read>(`
			`&'g self,`
			`buf: Buffer,`
			`rdr: R,`
			`) -> GrepBuffered<'g, R> {`
			`GrepBuffered {`
			`grep: self,`
			`rdr: rdr,`
			`b: buf,`
			`pos: 0,`
			`start: 0,`
			`lastnl: 0,`
			`end: 0,`
			`}`
			`}`

refactor progress 2016-06-20 16:53:48 -04:00			`pub fn read_match(`
			`&self,`
			`mat: &mut Match,`
			`buf: &[u8],`
			`mut start: usize,`
			`) -> bool {`
			`if start >= buf.len() {`
			`return false;`
			`}`
			`if let Some(ref req) = self.required {`
			`while start < buf.len() {`
			`let e = match req.shortest_match(&buf[start..]) {`
			`None => return false,`
			`Some(e) => start + e,`
			`};`
			`let (prevnl, nextnl) = self.find_line(buf, e, e);`
			`match self.re.shortest_match(&buf[prevnl..nextnl]) {`
			`None => {`
			`start = nextnl + 1;`
			`continue;`
			`}`
			`Some(_) => {`
			`self.fill_match(mat, prevnl, nextnl);`
			`return true;`
			`}`
			`}`
			`}`
			`false`
			`} else {`
			`let e = match self.re.shortest_match(&buf[start..]) {`
			`None => return false,`
			`Some(e) => start + e,`
			`};`
			`let (s, e) = self.find_line(buf, e, e);`
			`self.fill_match(mat, s, e);`
			`true`
			`}`
			`}`

			`fn fill_match(&self, mat: &mut Match, start: usize, end: usize) {`
			`mat.start = start;`
			`mat.end = end;`
			`}`

			`fn find_line(&self, buf: &[u8], s: usize, e: usize) -> (usize, usize) {`
			`(self.find_line_start(buf, s), self.find_line_end(buf, e))`
			`}`

			`fn find_line_start(&self, buf: &[u8], pos: usize) -> usize {`
			`memrchr(self.opts.line_terminator, &buf[0..pos]).map_or(0, \|i\| i + 1)`
			`}`

			`fn find_line_end(&self, buf: &[u8], pos: usize) -> usize {`
			`memchr(self.opts.line_terminator, &buf[pos..])`
			`.map_or(buf.len(), \|i\| pos + i)`
			`}`
			`}`

progress 2016-06-22 21:19:02 -04:00			`pub struct Buffer {`
			`buf: Vec<u8>,`
			`tmp: Vec<u8>,`
refactor progress 2016-06-20 16:53:48 -04:00			`}`

progress 2016-06-22 21:19:02 -04:00			`impl Buffer {`
			`pub fn new() -> Buffer {`
			`Buffer::with_capacity(16 * (1<<10))`
refactor progress 2016-06-20 16:53:48 -04:00			`}`

progress 2016-06-22 21:19:02 -04:00			`pub fn with_capacity(cap: usize) -> Buffer {`
			`Buffer {`
			`buf: vec![0; cap],`
			`tmp: Vec::new(),`
			`}`
refactor progress 2016-06-20 16:53:48 -04:00			`}`
progress 2016-06-22 21:19:02 -04:00			`}`
refactor progress 2016-06-20 16:53:48 -04:00
progress 2016-06-22 21:19:02 -04:00			`pub struct GrepBuffered<'g, R> {`
			`grep: &'g Grep,`
			`rdr: R,`
			`b: Buffer,`
			`pos: usize,`
			`start: usize,`
			`lastnl: usize,`
			`end: usize,`
			`}`

			`impl<'g, R: io::Read> GrepBuffered<'g, R> {`
			`pub fn into_buffer(self) -> Buffer {`
			`self.b`
refactor progress 2016-06-20 16:53:48 -04:00			`}`

progress 2016-06-22 21:19:02 -04:00			`pub fn iter<'b>(&'b mut self) -> IterBuffered<'b, 'g, R> {`
			`IterBuffered { grep: self }`
refactor progress 2016-06-20 16:53:48 -04:00			`}`

progress 2016-06-22 21:19:02 -04:00			`pub fn read_match(`
			`&mut self,`
			`mat: &mut Match,`
			`) -> Result<bool> {`
			`loop {`
Refactor buffered test. 2016-08-08 19:17:25 -04:00			`// If the starting position is equal to the end of the last search,`
			`// then it's time to refill the buffer for more searching.`
progress 2016-06-22 21:19:02 -04:00			`if self.start == self.lastnl {`
			`if !try!(self.fill()) {`
			`return Ok(false);`
			`}`
			`}`
			`let ok = self.grep.read_match(`
			`mat, &self.b.buf[..self.lastnl], self.start);`
			`if !ok {`
Refactor buffered test. 2016-08-08 19:17:25 -04:00			`// This causes the next iteration to refill the buffer with`
			`// more bytes to search.`
progress 2016-06-22 21:19:02 -04:00			`self.start = self.lastnl;`
			`continue;`
			`}`
			`// Move start to the first possible byte of the next line.`
			`self.start = cmp::min(`
			`self.lastnl, mat.end.checked_add(1).unwrap());`
			`mat.start += self.pos;`
			`mat.end += self.pos;`
			`return Ok(true);`
			`}`
			`}`

			`fn fill(&mut self) -> Result<bool> {`
			`{`
			`// The buffer might have leftover bytes that have not been`
			`// searched yet. Leftovers correspond to all bytes proceding the`
			`// final \n in the current buffer.`
			`//`
			`// TODO(ag): Seems like we should be able to memmove from the end`
			`// of the buffer to the beginning, but let's do it the stupid (but`
			`// safe) way for now.`
			`let leftovers = &self.b.buf[self.lastnl..self.end];`
			`self.b.tmp.clear();`
			`self.b.tmp.resize(leftovers.len(), 0);`
			`self.b.tmp.copy_from_slice(leftovers);`
			`}`
			`// Move the leftovers to the beginning of our buffer.`
			`self.b.buf[0..self.b.tmp.len()].copy_from_slice(&self.b.tmp);`
			`// Fill the rest with fresh bytes.`
			`let nread = try!(self.rdr.read(&mut self.b.buf[self.b.tmp.len()..]));`
Refactor buffered test. 2016-08-08 19:17:25 -04:00			`// Now update our position in all of the bytes searched.`
progress 2016-06-22 21:19:02 -04:00			`self.pos += self.start;`
			`self.start = 0;`
			`// The end is the total number of bytes read plus whatever we had for`
			`// leftovers.`
			`self.end = self.b.tmp.len() + nread;`
			`// Find the last new line. All searches on this buffer will be capped`
			`// at this position since any proceding bytes may correspond to a`
			`// partial line.`
			`//`
Refactor buffered test. 2016-08-08 19:17:25 -04:00			`// This is a little complicated because we must handle the case where`
progress 2016-06-22 21:19:02 -04:00			`// the buffer is not full and no new line character could be found.`
			`// We detect this case because this could potentially be a partial`
			// line. If we fill our buffer and still can't find a `\n`, then we
			`// give up.`
			`let mut start = 0;`
			`let term = self.grep.opts.line_terminator;`
			`loop {`
			`match memrchr(term, &self.b.buf[start..self.end]) {`
			`Some(i) => {`
			`self.lastnl = start + i + 1;`
			`break;`
			`}`
			`None => {`
			`// If we couldn't find a new line and our buffer is`
			`// completely full, then this line is terribly long and we`
			`// return an error.`
			`if self.end == self.b.buf.len() {`
			`return Err(Error::LineTooLong(self.b.buf.len()));`
			`}`
			`// Otherwise we try to ask for more bytes and look again.`
			`let nread = try!(`
			`self.rdr.read(&mut self.b.buf[self.end..]));`
Refactor buffered test. 2016-08-08 19:17:25 -04:00			`// If we got nothing then we're at EOF and we no longer`
progress 2016-06-22 21:19:02 -04:00			`// need to care about leftovers.`
			`if nread == 0 {`
			`self.lastnl = self.end;`
			`break;`
			`}`
			`start = self.end;`
			`self.end += nread;`
			`}`
			`}`
			`}`
			`// If end is zero, then we've hit EOF and we have no leftovers.`
			`Ok(self.end > 0)`
refactor progress 2016-06-20 16:53:48 -04:00			`}`
			`}`

			`pub struct Iter<'b, 's> {`
			`searcher: &'s Grep,`
			`buf: &'b [u8],`
			`start: usize,`
			`}`

			`impl<'b, 's> Iterator for Iter<'b, 's> {`
			`type Item = Match;`

			`fn next(&mut self) -> Option<Match> {`
			`let mut mat = Match::default();`
			`if !self.searcher.read_match(&mut mat, self.buf, self.start) {`
			`self.start = self.buf.len();`
			`return None;`
			`}`
			`self.start = mat.end + 1;`
			`Some(mat)`
			`}`
			`}`

progress 2016-06-22 21:19:02 -04:00			`pub struct IterBuffered<'b, 'g: 'b, R: 'b> {`
			`grep: &'b mut GrepBuffered<'g, R>,`
refactor progress 2016-06-20 16:53:48 -04:00			`}`

progress 2016-06-22 21:19:02 -04:00			`impl<'b, 'g, R: io::Read> Iterator for IterBuffered<'b, 'g, R> {`
			`type Item = Result<Match>;`

			`fn next(&mut self) -> Option<Result<Match>> {`
			`let mut mat = Match::default();`
			`match self.grep.read_match(&mut mat) {`
			`Err(err) => Some(Err(err)),`
			`Ok(false) => None,`
			`Ok(true) => Some(Ok(mat)),`
refactor progress 2016-06-20 16:53:48 -04:00			`}`
progress 2016-06-22 21:19:02 -04:00			`}`
			`}`

			`#[allow(dead_code)]`
			`fn s(bytes: &[u8]) -> String {`
			`String::from_utf8(bytes.to_vec()).unwrap()`
			`}`

			`#[cfg(test)]`
			`mod tests {`
			`#![allow(unused_imports)]`

update 2016-08-05 00:10:58 -04:00			`use memchr::{memchr, memrchr};`
			`use regex::bytes::Regex;`

progress 2016-06-22 21:19:02 -04:00			`use super::{Buffer, GrepBuilder, s};`

			`static SHERLOCK: &'static [u8] = include_bytes!("./data/sherlock.txt");`

Refactor buffered test. 2016-08-08 19:17:25 -04:00			`fn find_lines(pat: &str, haystack: &[u8]) -> Vec<(usize, usize)> {`
			`let re = Regex::new(pat).unwrap();`
			`let mut lines = vec![];`
			`for (s, e) in re.find_iter(haystack) {`
			`let start = memrchr(b'\n', &haystack[..s])`
			`.map_or(0, \|i\| i + 1);`
			`let end = memchr(b'\n', &haystack[e..])`
			`.map_or(haystack.len(), \|i\| e + i);`
			`lines.push((start, end));`
			`}`
			`lines`
			`}`

			`fn grep_lines(pat: &str, haystack: &[u8]) -> Vec<(usize, usize)> {`
			`let g = GrepBuilder::new(pat).create().unwrap();`
			`let mut bg = g.buffered_reader(Buffer::new(), haystack);`
			`bg.iter().map(\|r\| r.unwrap()).map(\|m\| (m.start(), m.end())).collect()`
			`}`

progress 2016-06-22 21:19:02 -04:00			`#[test]`
Refactor buffered test. 2016-08-08 19:17:25 -04:00			`fn buffered_literal() {`
			`let expected = find_lines("Sherlock Holmes", SHERLOCK);`
			`let got = grep_lines("Sherlock Holmes", SHERLOCK);`
			`assert_eq!(expected.len(), got.len());`
			`assert_eq!(expected, got);`
refactor progress 2016-06-20 16:53:48 -04:00			`}`
			`}`