Lots of progress:

- Refactored interaction between CLI args and rest of xrep. - Filling in a lot more options, including file type filtering. - Fixing some bugs in globbing/ignoring. - More documentation.
2025-05-19 05:33:04 +02:00 · 2016-09-05 00:52:23 -04:00 · 2016-09-05 00:52:23 -04:00 · 812cdb13c6
commit 812cdb13c6
parent 0bf278e72f
9 changed files with 1567 additions and 408 deletions
--- a/src/args.rs
+++ b/src/args.rs
@ -0,0 +1,551 @@
+use std::cmp;
+use std::env;
+use std::io;
+use std::path::{Path, PathBuf};
+
+use docopt::Docopt;
+use env_logger;
+use grep::{Grep, GrepBuilder};
+use log;
+use num_cpus;
+use regex;
+use walkdir::WalkDir;
+
+use gitignore::{Gitignore, GitignoreBuilder};
+use ignore::Ignore;
+use out::Out;
+use printer::Printer;
+use search::{InputBuffer, Searcher};
+use types::{FileTypeDef, Types, TypesBuilder};
+use walk;
+
+use Result;
+
+/// The Docopt usage string.
+///
+/// If you've never heard of Docopt before, see: http://docopt.org
+/// (TL;DR: The CLI parser is generated from the usage string below.)
+const USAGE: &'static str = "
+Usage: xrep [options] <pattern> [<path> ...]
+       xrep [options] --files [<path> ...]
+       xrep [options] --type-list
+       xrep --help
+       xrep --version
+
+xrep is like the silver searcher and grep, but faster than both.
+
+Common options:
+    -a, --text                 Search binary files as if they were text.
+    -c, --count                Only show count of line matches for each file.
+    -g, --glob GLOB ...        Include or exclude files for searching that
+                               match the given glob. This always overrides any
+                               other ignore logic. Multiple glob flags may be
+                               used. Globbing rules match .gitignore globs.
+                               Precede a glob with a '!' to exclude it.
+    -h, --help                 Show this usage message.
+    -i, --ignore-case          Case insensitive search.
+    -n, --line-number          Show line numbers (1-based).
+    -q, --quiet                Do not print anything to stdout.
+    -t, --type TYPE ...        Only search files matching TYPE. Multiple type
+                               flags may be provided. Use the --type-list flag
+                               to list all available types.
+    -T, --type-not TYPE ...    Do not search files matching TYPE. Multiple
+                               not-type flags may be provided.
+    -v, --invert-match         Invert matching.
+    -w, --word-regexp          Only show matches surrounded by word boundaries.
+                               This is equivalent to putting \\b before and
+                               after the search pattern.
+
+Less common options:
+    -A, --after-context NUM
+        Show NUM lines after each match.
+
+    -B, --before-context NUM
+        Show NUM lines before each match.
+
+    -C, --context NUM
+        Show NUM lines before and after each match.
+
+    --context-separator ARG
+        The string to use when separating non-continuous context lines. Escape
+        sequences may be used. [default: --]
+
+    --debug
+        Show debug messages.
+
+    --files
+        Print each file that would be searched (but don't search).
+
+    -H, --with-filename
+        Prefix each match with the file name that contains it. This is the
+        default when more than one file is searched.
+
+    --hidden
+        Search hidden directories and files.
+
+    -L, --follow
+        Follow symlinks.
+
+    --line-terminator ARG
+        The byte to use for a line terminator. Escape sequences may be used.
+        [default: \\n]
+
+    --no-ignore
+        Don't respect ignore files (.gitignore, .xrepignore, etc.)
+
+    -Q, --literal
+        Treat the pattern as a literal string instead of a regular expression.
+
+    --threads ARG
+        The number of threads to use. Defaults to the number of logical CPUs
+        (capped at 6). [default: 0]
+
+    --version
+        Show the version number of xrep and exit.
+
+File type management options:
+    --type-list
+        Show all supported file types and their associated globs.
+
+    --type-add ARG ...
+        Add a new glob for a particular file type.
+        Example: --type-add html:*.html,*.htm
+
+    --type-clear TYPE ...
+        Clear the file type globs for TYPE.
+";
+
+/// RawArgs are the args as they are parsed from Docopt. They aren't used
+/// directly by the rest of xrep.
+#[derive(Debug, RustcDecodable)]
+pub struct RawArgs {
+    arg_pattern: String,
+    arg_path: Vec<String>,
+    flag_after_context: usize,
+    flag_before_context: usize,
+    flag_context: usize,
+    flag_context_separator: String,
+    flag_count: bool,
+    flag_debug: bool,
+    flag_files: bool,
+    flag_follow: bool,
+    flag_glob: Vec<String>,
+    flag_hidden: bool,
+    flag_ignore_case: bool,
+    flag_invert_match: bool,
+    flag_line_number: bool,
+    flag_line_terminator: String,
+    flag_literal: bool,
+    flag_no_ignore: bool,
+    flag_quiet: bool,
+    flag_text: bool,
+    flag_threads: usize,
+    flag_type: Vec<String>,
+    flag_type_not: Vec<String>,
+    flag_type_list: bool,
+    flag_type_add: Vec<String>,
+    flag_type_clear: Vec<String>,
+    flag_with_filename: bool,
+    flag_word_regexp: bool,
+}
+
+/// Args are transformed/normalized from RawArgs.
+#[derive(Debug)]
+pub struct Args {
+    pattern: String,
+    paths: Vec<PathBuf>,
+    after_context: usize,
+    before_context: usize,
+    context_separator: Vec<u8>,
+    count: bool,
+    eol: u8,
+    files: bool,
+    follow: bool,
+    glob_overrides: Option<Gitignore>,
+    hidden: bool,
+    ignore_case: bool,
+    invert_match: bool,
+    line_number: bool,
+    no_ignore: bool,
+    quiet: bool,
+    text: bool,
+    threads: usize,
+    type_defs: Vec<FileTypeDef>,
+    type_list: bool,
+    types: Types,
+    with_filename: bool,
+}
+
+impl RawArgs {
+    /// Convert arguments parsed into a configuration used by xrep.
+    fn to_args(&self) -> Result<Args> {
+        let pattern = {
+            let pattern =
+                if self.flag_literal {
+                    regex::quote(&self.arg_pattern)
+                } else {
+                    self.arg_pattern.clone()
+                };
+            if self.flag_word_regexp {
+                format!(r"\b{}\b", pattern)
+            } else {
+                pattern
+            }
+        };
+        let paths =
+            if self.arg_path.is_empty() {
+                vec![Path::new("./").to_path_buf()]
+            } else {
+                self.arg_path.iter().map(|p| {
+                    Path::new(p).to_path_buf()
+                }).collect()
+            };
+        let (after_context, before_context) =
+            if self.flag_context > 0 {
+                (self.flag_context, self.flag_context)
+            } else {
+                (self.flag_after_context, self.flag_before_context)
+            };
+        let eol = {
+            let eol = unescape(&self.flag_line_terminator);
+            if eol.is_empty() {
+                errored!("Empty line terminator is not allowed.");
+            } else if eol.len() > 1 {
+                errored!("Line terminators are limited to exactly 1 byte.");
+            }
+            eol[0]
+        };
+        let glob_overrides =
+            if self.flag_glob.is_empty() {
+                None
+            } else {
+                let cwd = try!(env::current_dir());
+                let mut bgi = GitignoreBuilder::new(cwd);
+                for pat in &self.flag_glob {
+                    try!(bgi.add("<argv>", pat));
+                }
+                Some(try!(bgi.build()))
+            };
+        let threads =
+            if self.flag_threads == 0 {
+                cmp::min(6, num_cpus::get())
+            } else {
+                self.flag_threads
+            };
+        let mut with_filename = self.flag_with_filename;
+        if !with_filename {
+            with_filename = paths.len() > 1 || paths[0].is_dir();
+        }
+        let mut btypes = TypesBuilder::new();
+        btypes.add_defaults();
+        try!(self.add_types(&mut btypes));
+        let types = try!(btypes.build());
+        Ok(Args {
+            pattern: pattern,
+            paths: paths,
+            after_context: after_context,
+            before_context: before_context,
+            context_separator: unescape(&self.flag_context_separator),
+            count: self.flag_count,
+            eol: eol,
+            files: self.flag_files,
+            follow: self.flag_follow,
+            glob_overrides: glob_overrides,
+            hidden: self.flag_hidden,
+            ignore_case: self.flag_ignore_case,
+            invert_match: self.flag_invert_match,
+            line_number: self.flag_line_number,
+            no_ignore: self.flag_no_ignore,
+            quiet: self.flag_quiet,
+            text: self.flag_text,
+            threads: threads,
+            type_defs: btypes.definitions(),
+            type_list: self.flag_type_list,
+            types: types,
+            with_filename: with_filename,
+        })
+    }
+
+    fn add_types(&self, types: &mut TypesBuilder) -> Result<()> {
+        for ty in &self.flag_type_clear {
+            types.clear(ty);
+        }
+        for def in &self.flag_type_add {
+            try!(types.add_def(def));
+        }
+        for ty in &self.flag_type {
+            types.select(ty);
+        }
+        for ty in &self.flag_type_not {
+            types.select_not(ty);
+        }
+        Ok(())
+    }
+}
+
+impl Args {
+    /// Parse the command line arguments for this process.
+    ///
+    /// If a CLI usage error occurred, then exit the process and print a usage
+    /// or error message. Similarly, if the user requested the version of
+    /// xrep, then print the version and exit.
+    ///
+    /// Also, initialize a global logger.
+    pub fn parse() -> Result<Args> {
+        let raw: RawArgs =
+            Docopt::new(USAGE)
+                .and_then(|d| d.version(Some(version())).decode())
+                .unwrap_or_else(|e| e.exit());
+
+        let mut logb = env_logger::LogBuilder::new();
+        if raw.flag_debug {
+            logb.filter(None, log::LogLevelFilter::Debug);
+        } else {
+            logb.filter(None, log::LogLevelFilter::Warn);
+        }
+        if let Err(err) = logb.init() {
+            errored!("failed to initialize logger: {}", err);
+        }
+
+        raw.to_args().map_err(From::from)
+    }
+
+    /// Returns true if xrep should print the files it will search and exit
+    /// (but not do any actual searching).
+    pub fn files(&self) -> bool {
+        self.files
+    }
+
+    /// Create a new line based matcher. The matcher returned can be used
+    /// across multiple threads simultaneously. This matcher only supports
+    /// basic searching of regular expressions in a single buffer.
+    ///
+    /// The pattern and other flags are taken from the command line.
+    pub fn grep(&self) -> Result<Grep> {
+        GrepBuilder::new(&self.pattern)
+            .case_insensitive(self.ignore_case)
+            .line_terminator(self.eol)
+            .build()
+            .map_err(From::from)
+    }
+
+    /// Creates a new input buffer that is used in searching.
+    pub fn input_buffer(&self) -> InputBuffer {
+        let mut inp = InputBuffer::new();
+        inp.eol(self.eol);
+        inp
+    }
+
+    /// Create a new printer of individual search results that writes to the
+    /// writer given.
+    pub fn printer<W: io::Write>(&self, wtr: W) -> Printer<W> {
+        Printer::new(wtr)
+            .context_separator(self.context_separator.clone())
+            .eol(self.eol)
+            .quiet(self.quiet)
+            .with_filename(self.with_filename)
+    }
+
+    /// Create a new printer of search results for an entire file that writes
+    /// to the writer given.
+    pub fn out<W: io::Write>(&self, wtr: W) -> Out<W> {
+        let mut out = Out::new(wtr);
+        if self.before_context > 0 || self.after_context > 0 {
+            out = out.file_separator(self.context_separator.clone());
+        }
+        out
+    }
+
+    /// Return the paths that should be searched.
+    pub fn paths(&self) -> &[PathBuf] {
+        &self.paths
+    }
+
+    /// Create a new line based searcher whose configuration is taken from the
+    /// command line. This searcher supports a dizzying array of features:
+    /// inverted matching, line counting, context control and more.
+    pub fn searcher<'a, R: io::Read, W: io::Write>(
+        &self,
+        inp: &'a mut InputBuffer,
+        printer: &'a mut Printer<W>,
+        grep: &'a Grep,
+        path: &'a Path,
+        rdr: R,
+    ) -> Searcher<'a, R, W> {
+        Searcher::new(inp, printer, grep, path, rdr)
+            .after_context(self.after_context)
+            .before_context(self.before_context)
+            .count(self.count)
+            .eol(self.eol)
+            .line_number(self.line_number)
+            .invert_match(self.invert_match)
+            .text(self.text)
+    }
+
+    /// Returns the number of worker search threads that should be used.
+    pub fn threads(&self) -> usize {
+        self.threads
+    }
+
+    /// Returns a list of type definitions currently loaded.
+    pub fn type_defs(&self) -> &[FileTypeDef] {
+        &self.type_defs
+    }
+
+    /// Returns true if xrep should print the type definitions currently loaded
+    /// and then exit.
+    pub fn type_list(&self) -> bool {
+        self.type_list
+    }
+
+    /// Create a new recursive directory iterator at the path given.
+    pub fn walker(&self, path: &Path) -> walk::Iter {
+        let wd = WalkDir::new(path).follow_links(self.follow);
+        let mut ig = Ignore::new();
+        ig.ignore_hidden(!self.hidden);
+        ig.no_ignore(self.no_ignore);
+        ig.add_types(self.types.clone());
+        if let Some(ref overrides) = self.glob_overrides {
+            ig.add_override(overrides.clone());
+        }
+        walk::Iter::new(ig, wd)
+    }
+}
+
+fn version() -> String {
+    let (maj, min, pat) = (
+        option_env!("CARGO_PKG_VERSION_MAJOR"),
+        option_env!("CARGO_PKG_VERSION_MINOR"),
+        option_env!("CARGO_PKG_VERSION_PATCH"),
+    );
+    match (maj, min, pat) {
+        (Some(maj), Some(min), Some(pat)) =>
+            format!("{}.{}.{}", maj, min, pat),
+        _ => "".to_owned(),
+    }
+}
+
+/// A single state in the state machine used by `unescape`.
+#[derive(Clone, Copy, Eq, PartialEq)]
+enum State {
+    Escape,
+    HexFirst,
+    HexSecond(char),
+    Literal,
+}
+
+/// Unescapes a string given on the command line. It supports a limit set of
+/// escape sequences:
+///
+/// * \t, \r and \n are mapped to their corresponding ASCII bytes.
+/// * \xZZ hexadecimal escapes are mapped to their byte.
+fn unescape(s: &str) -> Vec<u8> {
+    use self::State::*;
+
+    let mut bytes = vec![];
+    let mut state = Literal;
+    for c in s.chars() {
+        match state {
+            Escape => {
+                match c {
+                    'n' => { bytes.push(b'\n'); state = Literal; }
+                    'r' => { bytes.push(b'\r'); state = Literal; }
+                    't' => { bytes.push(b'\t'); state = Literal; }
+                    'x' => { state = HexFirst; }
+                    c => {
+                        bytes.extend(&format!(r"\{}", c).into_bytes());
+                        state = Literal;
+                    }
+                }
+            }
+            HexFirst => {
+                match c {
+                    '0'...'9' | 'A'...'F' | 'a'...'f' => {
+                        state = HexSecond(c);
+                    }
+                    c => {
+                        bytes.extend(&format!(r"\x{}", c).into_bytes());
+                        state = Literal;
+                    }
+                }
+            }
+            HexSecond(first) => {
+                match c {
+                    '0'...'9' | 'A'...'F' | 'a'...'f' => {
+                        let ordinal = format!("{}{}", first, c);
+                        let byte = u8::from_str_radix(&ordinal, 16).unwrap();
+                        bytes.push(byte);
+                        state = Literal;
+                    }
+                    c => {
+                        let original = format!(r"\x{}{}", first, c);
+                        bytes.extend(&original.into_bytes());
+                        state = Literal;
+                    }
+                }
+            }
+            Literal => {
+                match c {
+                    '\\' => { state = Escape; }
+                    c => { bytes.extend(c.to_string().as_bytes()); }
+                }
+            }
+        }
+    }
+    match state {
+        Escape => bytes.push(b'\\'),
+        HexFirst => bytes.extend(b"\\x"),
+        HexSecond(c) => bytes.extend(&format!("\\x{}", c).into_bytes()),
+        Literal => {}
+    }
+    bytes
+}
+
+#[cfg(test)]
+mod tests {
+    use super::unescape;
+
+    fn b(bytes: &'static [u8]) -> Vec<u8> {
+        bytes.to_vec()
+    }
+
+    #[test]
+    fn unescape_nul() {
+        assert_eq!(b(b"\x00"), unescape(r"\x00"));
+    }
+
+    #[test]
+    fn unescape_nl() {
+        assert_eq!(b(b"\n"), unescape(r"\n"));
+    }
+
+    #[test]
+    fn unescape_tab() {
+        assert_eq!(b(b"\t"), unescape(r"\t"));
+    }
+
+    #[test]
+    fn unescape_carriage() {
+        assert_eq!(b(b"\r"), unescape(r"\r"));
+    }
+
+    #[test]
+    fn unescape_nothing_simple() {
+        assert_eq!(b(b"\\a"), unescape(r"\a"));
+    }
+
+    #[test]
+    fn unescape_nothing_hex0() {
+        assert_eq!(b(b"\\x"), unescape(r"\x"));
+    }
+
+    #[test]
+    fn unescape_nothing_hex1() {
+        assert_eq!(b(b"\\xz"), unescape(r"\xz"));
+    }
+
+    #[test]
+    fn unescape_nothing_hex2() {
+        assert_eq!(b(b"\\xzz"), unescape(r"\xzz"));
+    }
+}
--- a/src/gitignore.rs
+++ b/src/gitignore.rs
@ -79,6 +79,7 @@ impl From<io::Error> for Error {
 }

 /// Gitignore is a matcher for the glob patterns in a single gitignore file.
+#[derive(Clone, Debug)]
 pub struct Gitignore {
    set: glob::Set,
    root: PathBuf,
@ -136,22 +137,26 @@ impl Gitignore {
    pub fn matched_utf8(&self, path: &str, is_dir: bool) -> Match {
        // A single regex with a bunch of alternations of glob patterns is
        // unfortunately typically faster than a regex, so we use it as a
-        // first pass filter. We still need to run the RegexSet to most
+        // first pass filter. We still need to run the RegexSet to get the most
        // recently defined glob that matched.
        if !self.set.is_match(path) {
            return Match::None;
        }
-        let pat = match self.set.matches(path).iter().last() {
-            None => return Match::None,
-            Some(i) => &self.patterns[i],
-        };
-        if pat.whitelist {
-            Match::Whitelist(&pat)
-        } else if !pat.only_dir || is_dir {
-            Match::Ignored(&pat)
-        } else {
-            Match::None
+        // The regex set can't actually pick the right glob that matched all
+        // on its own. In particular, some globs require that only directories
+        // can match. Thus, only accept a match from the regex set if the given
+        // path satisfies the corresponding glob's directory criteria.
+        for i in self.set.matches(path).iter().rev() {
+            let pat = &self.patterns[i];
+            if !pat.only_dir || is_dir {
+                return if pat.whitelist {
+                    Match::Whitelist(pat)
+                } else {
+                    Match::Ignored(pat)
+                };
+            }
        }
+        Match::None
    }
 }

@ -177,6 +182,24 @@ impl<'a> Match<'a> {
            Match::None | Match::Whitelist(_) => false,
        }
    }
+
+    /// Returns true if the match result didn't match any globs.
+    pub fn is_none(&self) -> bool {
+        match *self {
+            Match::None => true,
+            Match::Ignored(_) | Match::Whitelist(_) => false,
+        }
+    }
+
+    /// Inverts the match so that Ignored becomes Whitelisted and Whitelisted
+    /// becomes Ignored. A non-match remains the same.
+    pub fn invert(self) -> Match<'a> {
+        match self {
+            Match::None => Match::None,
+            Match::Ignored(pat) => Match::Whitelist(pat),
+            Match::Whitelist(pat) => Match::Ignored(pat),
+        }
+    }
 }

 /// GitignoreBuilder constructs a matcher for a single set of globs from a
@ -231,7 +254,6 @@ impl GitignoreBuilder {
    /// Add each pattern line from the file path given.
    pub fn add_path<P: AsRef<Path>>(&mut self, path: P) -> Result<(), Error> {
        let rdr = io::BufReader::new(try!(File::open(&path)));
-        // println!("adding ignores from: {}", path.as_ref().display());
        for line in rdr.lines() {
            try!(self.add(&path, &try!(line)));
        }
--- a/src/glob.rs
+++ b/src/glob.rs
@ -77,6 +77,8 @@ impl Set {
    /// Returns every glob pattern (by sequence number) that matches the given
    /// path.
    pub fn matches<T: AsRef<[u8]>>(&self, path: T) -> SetMatches {
+        // TODO(burntsushi): If we split this out into a separate crate, don't
+        // expose the regex::SetMatches type in the public API.
        self.set.matches(path.as_ref())
    }

--- a/src/ignore.rs
+++ b/src/ignore.rs
@ -18,6 +18,7 @@ use std::fmt;
 use std::path::{Path, PathBuf};

 use gitignore::{self, Gitignore, GitignoreBuilder, Match};
+use types::Types;

 /// Represents an error that can occur when parsing a gitignore file.
 #[derive(Debug)]
@ -56,7 +57,13 @@ pub struct Ignore {
    /// A stack of ignore patterns at each directory level of traversal.
    /// A directory that contributes no ignore patterns is `None`.
    stack: Vec<Option<IgnoreDir>>,
+    /// A set of override globs that are always checked first. A match (whether
+    /// it's whitelist or blacklist) trumps anything in stack.
+    overrides: Option<Gitignore>,
+    /// A file type matcher.
+    types: Option<Types>,
    ignore_hidden: bool,
+    no_ignore: bool,
 }

 impl Ignore {
@ -64,7 +71,10 @@ impl Ignore {
    pub fn new() -> Ignore {
        Ignore {
            stack: vec![],
+            overrides: None,
+            types: None,
            ignore_hidden: true,
+            no_ignore: false,
        }
    }

@ -74,11 +84,34 @@ impl Ignore {
        self
    }

+    /// When set, ignore files are ignored.
+    pub fn no_ignore(&mut self, yes: bool) -> &mut Ignore {
+        self.no_ignore = yes;
+        self
+    }
+
+    /// Add a set of globs that overrides all other match logic.
+    pub fn add_override(&mut self, gi: Gitignore) -> &mut Ignore {
+        self.overrides = Some(gi);
+        self
+    }
+
+    /// Add a file type matcher. The file type matcher has the lowest
+    /// precedence.
+    pub fn add_types(&mut self, types: Types) -> &mut Ignore {
+        self.types = Some(types);
+        self
+    }
+
    /// Add a directory to the stack.
    ///
    /// Note that even if this returns an error, the directory is added to the
    /// stack (and therefore should be popped).
    pub fn push<P: AsRef<Path>>(&mut self, path: P) -> Result<(), Error> {
+        if self.no_ignore {
+            self.stack.push(None);
+            return Ok(());
+        }
        match IgnoreDir::new(path) {
            Ok(id) => {
                self.stack.push(id);
@ -102,24 +135,57 @@ impl Ignore {
    /// Returns true if and only if the given file path should be ignored.
    pub fn ignored<P: AsRef<Path>>(&self, path: P, is_dir: bool) -> bool {
        let path = path.as_ref();
+        if let Some(ref overrides) = self.overrides {
+            let mat = overrides.matched(path, is_dir).invert();
+            if let Some(is_ignored) = self.ignore_match(path, mat) {
+                return is_ignored;
+            }
+        }
        if self.ignore_hidden && is_hidden(&path) {
+            debug!("{} ignored because it is hidden", path.display());
            return true;
        }
        for id in self.stack.iter().rev().filter_map(|id| id.as_ref()) {
-            match id.matched(path, is_dir) {
-                Match::Whitelist(ref pat) => {
-                    debug!("{} whitelisted by {:?}", path.display(), pat);
-                    return false;
-                }
-                Match::Ignored(ref pat) => {
-                    debug!("{} ignored by {:?}", path.display(), pat);
+            let mat = id.matched(path, is_dir);
+            if let Some(is_ignored) = self.ignore_match(path, mat) {
+                if is_ignored {
                    return true;
                }
-                Match::None => {}
+                // If this path is whitelisted by an ignore, then fallthrough
+                // and let the file type matcher have a say.
+                break;
+            }
+        }
+        if let Some(ref types) = self.types {
+            let mat = types.matched(path, is_dir);
+            if let Some(is_ignored) = self.ignore_match(path, mat) {
+                return is_ignored;
            }
        }
        false
    }
+
+    /// Returns true if the given match says the given pattern should be
+    /// ignored or false if the given pattern should be explicitly whitelisted.
+    /// Returns None otherwise.
+    pub fn ignore_match<P: AsRef<Path>>(
+        &self,
+        path: P,
+        mat: Match,
+    ) -> Option<bool> {
+        let path = path.as_ref();
+        match mat {
+            Match::Whitelist(ref pat) => {
+                debug!("{} whitelisted by {:?}", path.display(), pat);
+                Some(false)
+            }
+            Match::Ignored(ref pat) => {
+                debug!("{} ignored by {:?}", path.display(), pat);
+                Some(true)
+            }
+            Match::None => None,
+        }
+    }
 }

 /// IgnoreDir represents a set of ignore patterns retrieved from a single
--- a/src/main.rs
+++ b/src/main.rs
@ -19,7 +19,6 @@ extern crate rustc_serialize;
 extern crate thread_local;
 extern crate walkdir;

-use std::cmp;
 use std::error::Error;
 use std::fs::File;
 use std::io::{self, Write};
@ -30,14 +29,13 @@ use std::sync::Arc;
 use std::thread;

 use crossbeam::sync::chase_lev::{self, Steal, Stealer};
-use docopt::Docopt;
-use grep::{Grep, GrepBuilder};
+use grep::Grep;
 use parking_lot::Mutex;
-use walkdir::WalkDir;

-use ignore::Ignore;
+use args::Args;
+use out::Out;
 use printer::Printer;
-use search::{InputBuffer, Searcher};
+use search::InputBuffer;

 macro_rules! errored {
    ($($tt:tt)*) => {
@ -52,64 +50,22 @@ macro_rules! eprintln {
    }}
 }

+mod args;
 mod gitignore;
 mod glob;
 mod ignore;
+mod out;
 mod printer;
 mod search;
+mod types;
 mod walk;

-const USAGE: &'static str = "
-Usage: xrep [options] <pattern> [<path> ...]
-       xrep --files [<path> ...]
-
-xrep is like the silver searcher and grep, but faster than both.
-
-WARNING: Searching stdin isn't yet supported.
-
-Options:
-    -c, --count                Suppress normal output and show count of line
-                               matches.
-    -A, --after-context NUM    Show NUM lines after each match.
-    -B, --before-context NUM   Show NUM lines before each match.
-    -C, --context NUM          Show NUM lines before and after each match.
-    --debug                    Show debug messages.
-    --files                    Print each file that would be searched
-                               (but don't search).
-    --hidden                   Search hidden directories and files.
-    -i, --ignore-case          Case insensitive search.
-    -L, --follow               Follow symlinks.
-    -n, --line-number          Show line numbers (1-based).
-    -t, --threads ARG          The number of threads to use. Defaults to the
-                               number of logical CPUs. [default: 0]
-    -v, --invert-match         Invert matching.
-";
-
-#[derive(RustcDecodable)]
-struct Args {
-    arg_pattern: String,
-    arg_path: Vec<String>,
-    flag_after_context: usize,
-    flag_before_context: usize,
-    flag_context: usize,
-    flag_count: bool,
-    flag_debug: bool,
-    flag_files: bool,
-    flag_follow: bool,
-    flag_hidden: bool,
-    flag_ignore_case: bool,
-    flag_invert_match: bool,
-    flag_line_number: bool,
-    flag_threads: usize,
-}
-
 pub type Result<T> = result::Result<T, Box<Error + Send + Sync>>;

 fn main() {
-    let args: Args = Docopt::new(USAGE).and_then(|d| d.decode())
-                                       .unwrap_or_else(|e| e.exit());
-    match run(args) {
-        Ok(_) => process::exit(0),
+    match Args::parse().and_then(run) {
+        Ok(count) if count == 0 => process::exit(1),
+        Ok(count) => process::exit(0),
        Err(err) => {
            let _ = writeln!(&mut io::stderr(), "{}", err);
            process::exit(1);
@ -117,194 +73,158 @@ fn main() {
    }
 }

-fn run(mut args: Args) -> Result<()> {
-    let mut logb = env_logger::LogBuilder::new();
-    if args.flag_debug {
-        logb.filter(None, log::LogLevelFilter::Debug);
-    } else {
-        logb.filter(None, log::LogLevelFilter::Warn);
-    }
-    if let Err(err) = logb.init() {
-        errored!("failed to initialize logger: {}", err);
-    }
-
-    if args.arg_path.is_empty() {
-        args.arg_path.push("./".to_string());
-    }
-    if args.arg_path.iter().any(|p| p == "-") {
-        errored!("searching <stdin> isn't yet supported");
-    }
-    if args.flag_files {
+fn run(args: Args) -> Result<u64> {
+    if args.files() {
        return run_files(args);
    }
+    if args.type_list() {
+        return run_types(args);
+    }
    let args = Arc::new(args);
+    let out = Arc::new(Mutex::new(args.out(io::stdout())));
    let mut workers = vec![];
-    let out = Arc::new(Mutex::new(Out::new(args.clone(), io::stdout())));

-    let mut chan_work_send = {
-        let (worker, stealer) = chase_lev::deque();
-        for _ in 0..args.num_workers() {
-            let grepb =
-                GrepBuilder::new(&args.arg_pattern)
-                .case_insensitive(args.flag_ignore_case);
+    let mut workq = {
+        let (workq, stealer) = chase_lev::deque();
+        for _ in 0..args.threads() {
            let worker = Worker {
                args: args.clone(),
                out: out.clone(),
                chan_work: stealer.clone(),
-                inpbuf: InputBuffer::new(),
+                inpbuf: args.input_buffer(),
                outbuf: Some(vec![]),
-                grep: try!(grepb.build()),
+                grep: try!(args.grep()),
            };
            workers.push(thread::spawn(move || worker.run()));
        }
-        worker
+        workq
    };
-
-    for p in &args.arg_path {
-        for path in args.walker(p) {
-            chan_work_send.push(Message::Some(path));
+    for p in args.paths() {
+        if p == Path::new("-") {
+            workq.push(Work::Stdin)
+        } else {
+            for path in args.walker(p) {
+                workq.push(Work::File(path));
+            }
        }
    }
    for _ in 0..workers.len() {
-        chan_work_send.push(Message::Quit);
+        workq.push(Work::Quit);
    }
+    let mut match_count = 0;
    for worker in workers {
-        worker.join().unwrap();
+        match_count += worker.join().unwrap();
    }
-    Ok(())
+    Ok(match_count)
 }

-fn run_files(args: Args) -> Result<()> {
+fn run_files(args: Args) -> Result<u64> {
    let mut printer = Printer::new(io::BufWriter::new(io::stdout()));
-    for p in &args.arg_path {
-        for path in args.walker(p) {
-            printer.path(path);
+    let mut file_count = 0;
+    for p in args.paths() {
+        if p == Path::new("-") {
+            printer.path(&Path::new("<stdin>"));
+            file_count += 1;
+        } else {
+            for path in args.walker(p) {
+                printer.path(path);
+                file_count += 1;
+            }
        }
    }
-    Ok(())
+    Ok(file_count)
 }

-impl Args {
-    fn printer<W: io::Write>(&self, wtr: W) -> Printer<W> {
-        Printer::new(wtr)
-    }
-
-    fn num_workers(&self) -> usize {
-        let mut num = self.flag_threads;
-        if num == 0 {
-            num = cmp::min(8, num_cpus::get());
-        }
-        num
-    }
-
-    fn walker<P: AsRef<Path>>(&self, path: P) -> walk::Iter {
-        let wd = WalkDir::new(path).follow_links(self.flag_follow);
-        let mut ig = Ignore::new();
-        ig.ignore_hidden(!self.flag_hidden);
-        walk::Iter::new(ig, wd)
-    }
-
-    fn before_context(&self) -> usize {
-        if self.flag_context > 0 {
-            self.flag_context
-        } else {
-            self.flag_before_context
-        }
-    }
-
-    fn after_context(&self) -> usize {
-        if self.flag_context > 0 {
-            self.flag_context
-        } else {
-            self.flag_after_context
-        }
-    }
-
-    fn has_context(&self) -> bool {
-        self.before_context() > 0 || self.after_context() > 0
+fn run_types(args: Args) -> Result<u64> {
+    let mut printer = Printer::new(io::BufWriter::new(io::stdout()));
+    let mut ty_count = 0;
+    for def in args.type_defs() {
+        printer.type_def(def);
+        ty_count += 1;
    }
+    Ok(ty_count)
 }

-enum Message<T> {
-    Some(T),
+enum Work {
+    File(PathBuf),
+    Stdin,
    Quit,
 }

 struct Worker {
    args: Arc<Args>,
    out: Arc<Mutex<Out<io::Stdout>>>,
-    chan_work: Stealer<Message<PathBuf>>,
+    chan_work: Stealer<Work>,
    inpbuf: InputBuffer,
    outbuf: Option<Vec<u8>>,
    grep: Grep,
 }

 impl Worker {
-    fn run(mut self) {
+    fn run(mut self) -> u64 {
+        let mut match_count = 0;
        loop {
-            let path = match self.chan_work.steal() {
+            let (path, file) = match self.chan_work.steal() {
                Steal::Empty | Steal::Abort => continue,
-                Steal::Data(Message::Quit) => break,
-                Steal::Data(Message::Some(path)) => path,
-            };
-            let file = match File::open(&path) {
-                Ok(file) => file,
-                Err(err) => {
-                    eprintln!("{}: {}", path.display(), err);
-                    continue;
+                Steal::Data(Work::Quit) => break,
+                Steal::Data(Work::File(path)) => {
+                    match File::open(&path) {
+                        Ok(file) => (path, Some(file)),
+                        Err(err) => {
+                            eprintln!("{}: {}", path.display(), err);
+                            continue;
+                        }
+                    }
+                }
+                Steal::Data(Work::Stdin) => {
+                    (Path::new("<stdin>").to_path_buf(), None)
                }
            };
            let mut outbuf = self.outbuf.take().unwrap();
            outbuf.clear();
            let mut printer = self.args.printer(outbuf);
            {
-                let mut searcher = Searcher::new(
-                    &mut self.inpbuf,
-                    &mut printer,
-                    &self.grep,
-                    &path,
-                    file,
-                );
-                searcher = searcher.count(self.args.flag_count);
-                searcher = searcher.line_number(self.args.flag_line_number);
-                searcher = searcher.invert_match(self.args.flag_invert_match);
-                searcher = searcher.after_context(self.args.after_context());
-                searcher = searcher.before_context(self.args.before_context());
-                if let Err(err) = searcher.run() {
-                    eprintln!("{}", err);
+                let result = match file {
+                    None => {
+                        let stdin = io::stdin();
+                        let stdin = stdin.lock();
+                        self.search(&mut printer, &path, stdin)
+                    }
+                    Some(file) => {
+                        self.search(&mut printer, &path, file)
+                    }
+                };
+                match result {
+                    Ok(count) => {
+                        match_count += count;
+                    }
+                    Err(err) => {
+                        eprintln!("{}", err);
+                    }
                }
            }
            let outbuf = printer.into_inner();
            if !outbuf.is_empty() {
                let mut out = self.out.lock();
-                out.write_file_matches(&outbuf);
+                out.write(&outbuf);
            }
            self.outbuf = Some(outbuf);
        }
-    }
-}
-
-struct Out<W: io::Write> {
-    args: Arc<Args>,
-    wtr: io::BufWriter<W>,
-    printed: bool,
-}
-
-impl<W: io::Write> Out<W> {
-    fn new(args: Arc<Args>, wtr: W) -> Out<W> {
-        Out {
-            args: args,
-            wtr: io::BufWriter::new(wtr),
-            printed: false,
-        }
+        match_count
    }

-    fn write_file_matches(&mut self, buf: &[u8]) {
-        if self.printed && self.args.has_context() {
-            let _ = self.wtr.write_all(b"--\n");
-        }
-        let _ = self.wtr.write_all(buf);
-        let _ = self.wtr.flush();
-        self.printed = true;
+    fn search<R: io::Read, W: io::Write>(
+        &mut self,
+        printer: &mut Printer<W>,
+        path: &Path,
+        rdr: R,
+    ) -> Result<u64> {
+        self.args.searcher(
+            &mut self.inpbuf,
+            printer,
+            &self.grep,
+            path,
+            rdr,
+        ).run().map_err(From::from)
    }
 }
--- a/src/out.rs
+++ b/src/out.rs
@ -0,0 +1,45 @@
+use std::io::{self, Write};
+
+/// Out controls the actual output of all search results for a particular file
+/// to the end user.
+///
+/// (The difference between Out and Printer is that a Printer works with
+/// individual search results where as Out works with search results for each
+/// file as a whole. For example, it knows when to print a file separator.)
+pub struct Out<W: io::Write> {
+    wtr: io::BufWriter<W>,
+    printed: bool,
+    file_separator: Vec<u8>,
+}
+
+impl<W: io::Write> Out<W> {
+    /// Create a new Out that writes to the wtr given.
+    pub fn new(wtr: W) -> Out<W> {
+        Out {
+            wtr: io::BufWriter::new(wtr),
+            printed: false,
+            file_separator: vec![],
+        }
+    }
+
+    /// If set, the separator is printed between matches from different files.
+    /// By default, no separator is printed.
+    ///
+    /// If sep is empty, then no file separator is printed.
+    pub fn file_separator(mut self, sep: Vec<u8>) -> Out<W> {
+        self.file_separator = sep;
+        self
+    }
+
+    /// Write the search results of a single file to the underlying wtr and
+    /// flush wtr.
+    pub fn write(&mut self, buf: &[u8]) {
+        if self.printed && !self.file_separator.is_empty() {
+            let _ = self.wtr.write_all(&self.file_separator);
+            let _ = self.wtr.write_all(b"\n");
+        }
+        let _ = self.wtr.write_all(buf);
+        let _ = self.wtr.flush();
+        self.printed = true;
+    }
+}
--- a/src/printer.rs
+++ b/src/printer.rs
@ -1,53 +1,121 @@
 use std::io;
 use std::path::Path;

-macro_rules! wln {
-    ($($tt:tt)*) => {
-        let _ = writeln!($($tt)*);
-    }
-}
-
-macro_rules! w {
-    ($($tt:tt)*) => {
-        let _ = write!($($tt)*);
-    }
-}
+use types::FileTypeDef;

+/// Printer encapsulates all output logic for searching.
+///
+/// Note that we currently ignore all write errors. It's probably worthwhile
+/// to fix this, but printers are only ever used for writes to stdout or
+/// writes to memory, neither of which commonly fail.
 pub struct Printer<W> {
+    /// The underlying writer.
    wtr: W,
+    /// Whether anything has been printed to wtr yet.
    has_printed: bool,
+    /// The string to use to separate non-contiguous runs of context lines.
+    context_separator: Vec<u8>,
+    /// The end-of-line terminator used by the printer. In general, eols are
+    /// printed via the match directly, but occasionally we need to insert them
+    /// ourselves (for example, to print a context separator).
+    eol: u8,
+    /// Whether to suppress all output.
+    quiet: bool,
+    /// Whether to prefix each match with the corresponding file name.
+    with_filename: bool,
 }

 impl<W: io::Write> Printer<W> {
+    /// Create a new printer that writes to wtr.
    pub fn new(wtr: W) -> Printer<W> {
        Printer {
            wtr: wtr,
            has_printed: false,
+            context_separator: "--".to_string().into_bytes(),
+            eol: b'\n',
+            quiet: false,
+            with_filename: false,
        }
    }

+    /// Set the context separator. The default is `--`.
+    pub fn context_separator(mut self, sep: Vec<u8>) -> Printer<W> {
+        self.context_separator = sep;
+        self
+    }
+
+    /// Set the end-of-line terminator. The default is `\n`.
+    pub fn eol(mut self, eol: u8) -> Printer<W> {
+        self.eol = eol;
+        self
+    }
+
+    /// When set, all output is suppressed.
+    pub fn quiet(mut self, yes: bool) -> Printer<W> {
+        self.quiet = yes;
+        self
+    }
+
+    /// When set, each match is prefixed with the file name that it came from.
+    pub fn with_filename(mut self, yes: bool) -> Printer<W> {
+        self.with_filename = yes;
+        self
+    }
+
+    /// Returns true if and only if something has been printed.
    pub fn has_printed(&self) -> bool {
        self.has_printed
    }

-    pub fn into_inner(self) -> W {
+    /// Flushes the underlying writer and returns it.
+    pub fn into_inner(mut self) -> W {
+        let _ = self.wtr.flush();
        self.wtr
    }

+    /// Prints a type definition.
+    pub fn type_def(&mut self, def: &FileTypeDef) {
+        self.write(def.name().as_bytes());
+        self.write(b": ");
+        let mut first = true;
+        for pat in def.patterns() {
+            if !first {
+                self.write(b", ");
+            }
+            self.write(pat.as_bytes());
+            first = false;
+        }
+        self.write_eol();
+    }
+
+    /// Prints the given path.
    pub fn path<P: AsRef<Path>>(&mut self, path: P) {
-        wln!(&mut self.wtr, "{}", path.as_ref().display());
+        self.write(path.as_ref().to_string_lossy().as_bytes());
+        self.write_eol();
    }

+    /// Prints the given path and a count of the number of matches found.
    pub fn path_count<P: AsRef<Path>>(&mut self, path: P, count: u64) {
-        wln!(&mut self.wtr, "{}:{}", path.as_ref().display(), count);
+        if self.with_filename {
+            self.write(path.as_ref().to_string_lossy().as_bytes());
+            self.write(b":");
+        }
+        self.write(count.to_string().as_bytes());
+        self.write_eol();
    }

-    pub fn count(&mut self, count: u64) {
-        wln!(&mut self.wtr, "{}", count);
-    }
-
-    pub fn context_separator(&mut self) {
-        wln!(&mut self.wtr, "--");
+    /// Prints the context separator.
+    pub fn context_separate(&mut self) {
+        // N.B. We can't use `write` here because of borrowing restrictions.
+        if self.quiet {
+            return;
+        }
+        if self.context_separator.is_empty() {
+            return;
+        }
+        self.has_printed = true;
+        let _ = self.wtr.write_all(&self.context_separator);
+        let _ = self.wtr.write_all(&[self.eol]);
    }

    pub fn matched<P: AsRef<Path>>(
@ -58,15 +126,17 @@ impl<W: io::Write> Printer<W> {
        end: usize,
        line_number: Option<u64>,
    ) {
-        self.write(path.as_ref().to_string_lossy().as_bytes());
-        self.write(b":");
+        if self.with_filename {
+            self.write(path.as_ref().to_string_lossy().as_bytes());
+            self.write(b":");
+        }
        if let Some(line_number) = line_number {
            self.write(line_number.to_string().as_bytes());
            self.write(b":");
        }
        self.write(&buf[start..end]);
-        if buf[start..end].last() != Some(&b'\n') {
-            self.write(b"\n");
+        if buf[start..end].last() != Some(&self.eol) {
+            self.write_eol();
        }
    }

@ -78,24 +148,30 @@ impl<W: io::Write> Printer<W> {
        end: usize,
        line_number: Option<u64>,
    ) {
-        self.write(path.as_ref().to_string_lossy().as_bytes());
-        self.write(b"-");
+        if self.with_filename {
+            self.write(path.as_ref().to_string_lossy().as_bytes());
+            self.write(b"-");
+        }
        if let Some(line_number) = line_number {
            self.write(line_number.to_string().as_bytes());
            self.write(b"-");
        }
        self.write(&buf[start..end]);
-        if buf[start..end].last() != Some(&b'\n') {
-            self.write(b"\n");
+        if buf[start..end].last() != Some(&self.eol) {
+            self.write_eol();
        }
    }

-    pub fn binary_matched<P: AsRef<Path>>(&mut self, path: P) {
-        wln!(&mut self.wtr, "Binary file {} matches", path.as_ref().display());
-    }
-
    fn write(&mut self, buf: &[u8]) {
+        if self.quiet {
+            return;
+        }
        self.has_printed = true;
        let _ = self.wtr.write_all(buf);
    }
+
+    fn write_eol(&mut self) {
+        let eol = self.eol;
+        self.write(&[eol]);
+    }
 }
--- a/src/search.rs
+++ b/src/search.rs
@ -20,6 +20,7 @@ const READ_SIZE: usize = 8 * (1<<10);
 /// Error describes errors that can occur while searching.
 #[derive(Debug)]
 pub enum Error {
+    /// A standard I/O error attached to a particular file path.
    Io {
        err: io::Error,
        path: PathBuf,
@ -57,6 +58,7 @@ impl fmt::Display for Error {
 }

 pub struct Searcher<'a, R, W: 'a> {
+    opts: Options,
    inp: &'a mut InputBuffer,
    printer: &'a mut Printer<W>,
    grep: &'a Grep,
@ -68,11 +70,32 @@ pub struct Searcher<'a, R, W: 'a> {
    last_printed: usize,
    last_line: usize,
    after_context_remaining: usize,
+}
+
+/// Options for configuring search.
+#[derive(Clone)]
+struct Options {
+    after_context: usize,
+    before_context: usize,
    count: bool,
+    eol: u8,
    invert_match: bool,
    line_number: bool,
-    before_context: usize,
-    after_context: usize,
+    text: bool,
+}
+
+impl Default for Options {
+    fn default() -> Options {
+        Options {
+            after_context: 0,
+            before_context: 0,
+            count: false,
+            eol: b'\n',
+            invert_match: false,
+            line_number: false,
+            text: false,
+        }
+    }
 }

 impl<'a, R: io::Read, W: io::Write> Searcher<'a, R, W> {
@ -96,6 +119,7 @@ impl<'a, R: io::Read, W: io::Write> Searcher<'a, R, W> {
        haystack: R,
    ) -> Searcher<'a, R, W> {
        Searcher {
+            opts: Options::default(),
            inp: inp,
            printer: printer,
            grep: grep,
@ -107,47 +131,54 @@ impl<'a, R: io::Read, W: io::Write> Searcher<'a, R, W> {
            last_printed: 0,
            last_line: 0,
            after_context_remaining: 0,
-            count: false,
-            invert_match: false,
-            line_number: false,
-            before_context: 0,
-            after_context: 0,
        }
    }

-    /// If enabled, searching will print a count instead of each match.
-    ///
-    /// Disabled by default.
-    pub fn count(mut self, yes: bool) -> Self {
-        self.count = yes;
-        self
-    }
-
-    /// If enabled, matching is inverted so that lines that *don't* match the
-    /// given pattern are treated as matches.
-    pub fn invert_match(mut self, yes: bool) -> Self {
-        self.invert_match = yes;
-        self
-    }
-
-    /// If enabled, compute line numbers and prefix each line of output with
-    /// them.
-    pub fn line_number(mut self, yes: bool) -> Self {
-        self.line_number = yes;
+    /// The number of contextual lines to show after each match. The default
+    /// is zero.
+    pub fn after_context(mut self, count: usize) -> Self {
+        self.opts.after_context = count;
        self
    }

    /// The number of contextual lines to show before each match. The default
    /// is zero.
    pub fn before_context(mut self, count: usize) -> Self {
-        self.before_context = count;
+        self.opts.before_context = count;
        self
    }

-    /// The number of contextual lines to show after each match. The default
-    /// is zero.
-    pub fn after_context(mut self, count: usize) -> Self {
-        self.after_context = count;
+    /// If enabled, searching will print a count instead of each match.
+    ///
+    /// Disabled by default.
+    pub fn count(mut self, yes: bool) -> Self {
+        self.opts.count = yes;
+        self
+    }
+
+    /// Set the end-of-line byte used by this searcher.
+    pub fn eol(mut self, eol: u8) -> Self {
+        self.opts.eol = eol;
+        self
+    }
+
+    /// If enabled, matching is inverted so that lines that *don't* match the
+    /// given pattern are treated as matches.
+    pub fn invert_match(mut self, yes: bool) -> Self {
+        self.opts.invert_match = yes;
+        self
+    }
+
+    /// If enabled, compute line numbers and prefix each line of output with
+    /// them.
+    pub fn line_number(mut self, yes: bool) -> Self {
+        self.opts.line_number = yes;
+        self
+    }
+
+    /// If enabled, search binary files as if they were text.
+    pub fn text(mut self, yes: bool) -> Self {
+        self.opts.text = yes;
        self
    }

@ -157,16 +188,16 @@ impl<'a, R: io::Read, W: io::Write> Searcher<'a, R, W> {
    pub fn run(mut self) -> Result<u64, Error> {
        self.inp.reset();
        self.match_count = 0;
-        self.line_count = if self.line_number { Some(0) } else { None };
+        self.line_count = if self.opts.line_number { Some(0) } else { None };
        self.last_match = Match::default();
        self.after_context_remaining = 0;
        loop {
            let upto = self.inp.lastnl;
            self.print_after_context(upto);
            if !try!(self.fill()) {
-                if self.inp.is_binary {
-                    self.printer.binary_matched(self.path);
-                }
+                break;
+            }
+            if !self.opts.text && self.inp.is_binary {
                break;
            }
            while self.inp.pos < self.inp.lastnl {
@ -174,7 +205,7 @@ impl<'a, R: io::Read, W: io::Write> Searcher<'a, R, W> {
                    &mut self.last_match,
                    &mut self.inp.buf[..self.inp.lastnl],
                    self.inp.pos);
-                if self.invert_match {
+                if self.opts.invert_match {
                    let upto =
                        if matched {
                            self.last_match.start()
@ -189,7 +220,7 @@ impl<'a, R: io::Read, W: io::Write> Searcher<'a, R, W> {
                    }
                } else if matched {
                    self.match_count += 1;
-                    if !self.count {
+                    if !self.opts.count {
                        let start = self.last_match.start();
                        let end = self.last_match.end();
                        self.print_after_context(start);
@ -204,32 +235,36 @@ impl<'a, R: io::Read, W: io::Write> Searcher<'a, R, W> {
                }
            }
        }
-        if self.count && self.match_count > 0 {
+        if self.opts.count && self.match_count > 0 {
            self.printer.path_count(self.path, self.match_count);
        }
        Ok(self.match_count)
    }

+    #[inline(always)]
    fn fill(&mut self) -> Result<bool, Error> {
-        let mut keep_from = self.inp.lastnl;
-        if self.before_context > 0 || self.after_context > 0 {
-            keep_from = start_of_previous_lines(
+        let mut keep = self.inp.lastnl;
+        if self.opts.before_context > 0 || self.opts.after_context > 0 {
+            let lines = 1 + cmp::max(
+                self.opts.before_context, self.opts.after_context);
+            keep = start_of_previous_lines(
+                self.opts.eol,
                &self.inp.buf,
                self.inp.lastnl.saturating_sub(1),
-                cmp::max(self.before_context, self.after_context) + 1);
+                lines);
        }
-        if keep_from < self.last_printed {
-            self.last_printed = self.last_printed - keep_from;
+        if keep < self.last_printed {
+            self.last_printed = self.last_printed - keep;
        } else {
            self.last_printed = 0;
        }
-        if keep_from <= self.last_line {
-            self.last_line = self.last_line - keep_from;
+        if keep <= self.last_line {
+            self.last_line = self.last_line - keep;
        } else {
-            self.count_lines(keep_from);
+            self.count_lines(keep);
            self.last_line = 0;
        }
-        let ok = try!(self.inp.fill(&mut self.haystack, keep_from).map_err(|err| {
+        let ok = try!(self.inp.fill(&mut self.haystack, keep).map_err(|err| {
            Error::from_io(err, &self.path)
        }));
        Ok(ok)
@ -237,10 +272,10 @@ impl<'a, R: io::Read, W: io::Write> Searcher<'a, R, W> {

    #[inline(always)]
    fn print_inverted_matches(&mut self, upto: usize) {
-        debug_assert!(self.invert_match);
-        let mut it = IterLines::new(self.inp.pos);
+        debug_assert!(self.opts.invert_match);
+        let mut it = IterLines::new(self.opts.eol, self.inp.pos);
        while let Some((start, end)) = it.next(&self.inp.buf[..upto]) {
-            if !self.count {
+            if !self.opts.count {
                self.print_match(start, end);
            }
            self.inp.pos = end;
@ -250,7 +285,7 @@ impl<'a, R: io::Read, W: io::Write> Searcher<'a, R, W> {

    #[inline(always)]
    fn print_before_context(&mut self, upto: usize) {
-        if self.count || self.before_context == 0 {
+        if self.opts.count || self.opts.before_context == 0 {
            return;
        }
        let start = self.last_printed;
@ -260,10 +295,11 @@ impl<'a, R: io::Read, W: io::Write> Searcher<'a, R, W> {
        }
        let before_context_start =
            start + start_of_previous_lines(
+                self.opts.eol,
                &self.inp.buf[start..],
                end - start - 1,
-                self.before_context);
-        let mut it = IterLines::new(before_context_start);
+                self.opts.before_context);
+        let mut it = IterLines::new(self.opts.eol, before_context_start);
        while let Some((s, e)) = it.next(&self.inp.buf[..end]) {
            self.print_separator(s);
            self.print_context(s, e);
@ -272,12 +308,12 @@ impl<'a, R: io::Read, W: io::Write> Searcher<'a, R, W> {

    #[inline(always)]
    fn print_after_context(&mut self, upto: usize) {
-        if self.count || self.after_context_remaining == 0 {
+        if self.opts.count || self.after_context_remaining == 0 {
            return;
        }
        let start = self.last_printed;
        let end = upto;
-        let mut it = IterLines::new(start);
+        let mut it = IterLines::new(self.opts.eol, start);
        while let Some((s, e)) = it.next(&self.inp.buf[..end]) {
            self.print_context(s, e);
            self.after_context_remaining -= 1;
@ -295,7 +331,7 @@ impl<'a, R: io::Read, W: io::Write> Searcher<'a, R, W> {
        self.printer.matched(
            &self.path, &self.inp.buf, start, end, self.line_count);
        self.last_printed = end;
-        self.after_context_remaining = self.after_context;
+        self.after_context_remaining = self.opts.after_context;
    }

    #[inline(always)]
@ -309,21 +345,23 @@ impl<'a, R: io::Read, W: io::Write> Searcher<'a, R, W> {

    #[inline(always)]
    fn print_separator(&mut self, before: usize) {
-        if self.before_context == 0 && self.after_context == 0 {
+        if self.opts.before_context == 0 && self.opts.after_context == 0 {
            return;
        }
        if !self.printer.has_printed() {
            return;
        }
-        if (self.last_printed == 0 && before > 0) || self.last_printed < before {
-            self.printer.context_separator();
+        if (self.last_printed == 0 && before > 0)
+            || self.last_printed < before {
+            self.printer.context_separate();
        }
    }

    #[inline(always)]
    fn count_lines(&mut self, upto: usize) {
        if let Some(ref mut line_count) = self.line_count {
-            *line_count += count_lines(&self.inp.buf[self.last_line..upto]);
+            *line_count += count_lines(
+                &self.inp.buf[self.last_line..upto], self.opts.eol);
            self.last_line = upto;
        }
    }
@ -337,15 +375,53 @@ impl<'a, R: io::Read, W: io::Write> Searcher<'a, R, W> {
    }
 }

+/// InputBuffer encapsulates the logic of maintaining a ~fixed sized buffer
+/// on which to search. There are three key pieces of complexity:
+///
+/// 1. We must be able to handle lines that are longer than the size of the
+///    buffer. For this reason, the buffer is allowed to expand (and is
+///    therefore not technically fixed). Note that once a buffer expands, it
+///    will never contract.
+/// 2. The contents of the buffer may end with a partial line, so we must keep
+///    track of where the last complete line ends. Namely, the partial line
+///    is only completed on subsequent reads *after* searching up through
+///    the last complete line is done.
+/// 3. When printing the context of a match, the last N lines of the buffer
+///    may need to be rolled over into the next buffer. For example, a match
+///    may occur at the beginning of a buffer, in which case, lines at the end
+///    of the previous contents of the buffer need to be printed.
+///
+/// An InputBuffer is designed to be reused and isn't tied to any particular
+/// reader.
 pub struct InputBuffer {
+    /// The number of bytes to attempt to read at a time. Once set, this is
+    /// never changed.
    read_size: usize,
+    /// The end-of-line terminator used in this buffer.
+    eol: u8,
+    /// A scratch buffer.
+    tmp: Vec<u8>,
+    /// A buffer to read bytes into. All searches are executed directly against
+    /// this buffer and pos/lastnl/end point into it.
    buf: Vec<u8>,
-    tmp1: Vec<u8>,
-    tmp2: Vec<u8>,
+    /// The current position in buf. The current position represents where the
+    /// next search should start.
    pos: usize,
+    /// The position immediately following the last line terminator in buf.
+    /// This may be equal to end.
+    ///
+    /// Searching should never cross this boundary. In particular, the contents
+    /// of the buffer following this position may correspond to *partial* line.
+    /// All contents before this position are complete lines.
    lastnl: usize,
+    /// The end position of the buffer. Data after this position is not
+    /// specified.
    end: usize,
+    /// Set to true if and only if no reads have occurred yet.
    first: bool,
+    /// Set to true if and only if the contents of buf are determined to be
+    /// "binary" (i.e., not searchable text). Note that its value may be
+    /// falsely negative *or* falsely positive. It is only a heuristic.
    is_binary: bool,
 }

@ -367,9 +443,9 @@ impl InputBuffer {
        }
        InputBuffer {
            read_size: cap,
+            eol: b'\n',
            buf: vec![0; cap],
-            tmp1: vec![],
-            tmp2: vec![],
+            tmp: vec![],
            pos: 0,
            lastnl: 0,
            end: 0,
@ -378,6 +454,12 @@ impl InputBuffer {
        }
    }

+    /// Set the end-of-line terminator used by this input buffer.
+    pub fn eol(&mut self, eol: u8) {
+        self.eol = eol;
+    }
+
+    /// Resets this buffer so that it may be reused with a new reader.
    fn reset(&mut self) {
        self.pos = 0;
        self.lastnl = 0;
@ -386,36 +468,30 @@ impl InputBuffer {
        self.is_binary = false;
    }

+    /// Fill the contents of this buffer with the reader given. The reader
+    /// given should be the same in every call to fill unless reset has been
+    /// called.
+    ///
+    /// The bytes in buf[keep_from..end] are rolled over into the beginning
+    /// of the buffer.
    fn fill<R: io::Read>(
        &mut self,
        rdr: &mut R,
        keep_from: usize,
    ) -> Result<bool, io::Error> {
-        self.pos = 0;
-        self.tmp1.clear();
-        self.tmp2.clear();
-
-        // Save the leftovers from the previous fill before anything else.
-        if self.lastnl < self.end {
-            self.tmp1.extend_from_slice(&self.buf[self.lastnl..self.end]);
-        }
-        // If we need to save lines to account for context, do that here.
-        // These context lines have already been searched, but make up the
-        // first bytes of this buffer.
-        if keep_from < self.lastnl {
-            self.tmp2.extend_from_slice(&self.buf[keep_from..self.lastnl]);
-            self.buf[0..self.tmp2.len()].copy_from_slice(&self.tmp2);
-            self.pos = self.tmp2.len();
-        }
-        if !self.tmp1.is_empty() {
-            let (start, end) = (self.pos, self.pos + self.tmp1.len());
-            self.buf[start..end].copy_from_slice(&self.tmp1);
-            self.end = end;
-        } else {
-            self.end = self.pos;
-        }
+        // Rollover bytes from buf[keep_from..end] and update our various
+        // pointers. N.B. This could be done with the unsafe ptr::copy, but
+        // I haven't been able to produce a benchmark that notices a difference
+        // in performance. (Invariably, ptr::copy is also clearer IMO.)
+        self.tmp.clear();
+        self.tmp.extend_from_slice(&self.buf[keep_from..self.end]);
+        self.buf[0..self.tmp.len()].copy_from_slice(&self.tmp);
+        self.pos = self.lastnl - keep_from;
        self.lastnl = 0;
+        self.end = self.tmp.len();
        while self.lastnl == 0 {
+            // If our buffer isn't big enough to hold the contents of a full
+            // read, expand it.
            if self.buf.len() - self.end < self.read_size {
                let min_len = self.read_size + self.buf.len() - self.end;
                let new_len = cmp::max(min_len, self.buf.len() * 2);
@ -423,22 +499,28 @@ impl InputBuffer {
            }
            let n = try!(rdr.read(
                &mut self.buf[self.end..self.end + self.read_size]));
-            if self.first {
-                if is_binary(&self.buf[self.end..self.end + n]) {
-                    self.is_binary = true;
-                    return Ok(false);
-                }
+            if self.first && is_binary(&self.buf[self.end..self.end + n]) {
+                self.is_binary = true;
+            }
+            if self.is_binary {
+                replace_buf(
+                    &mut self.buf[self.end..self.end + n], b'\x00', self.eol);
            }
            self.first = false;
+            // We assume that reading 0 bytes means we've hit EOF.
            if n == 0 {
+                // If we've searched everything up to the end of the buffer,
+                // then there's nothing left to do.
                if self.end - self.pos == 0 {
                    return Ok(false);
                }
+                // Even if we hit EOF, we might still have to search the
+                // last line if it didn't contain a trailing terminator.
                self.lastnl = self.end;
                break;
            }
            self.lastnl =
-                memrchr(b'\n', &self.buf[self.end..self.end + n])
+                memrchr(self.eol, &self.buf[self.end..self.end + n])
                .map(|i| self.end + i + 1)
                .unwrap_or(0);
            self.end += n;
@ -450,7 +532,7 @@ impl InputBuffer {
 /// Returns true if and only if the given buffer is determined to be "binary"
 /// or otherwise not contain text data that is usefully searchable.
 ///
-/// Note that this may return both false positives and false negatives!
+/// Note that this may return both false positives and false negatives.
 #[inline(always)]
 fn is_binary(buf: &[u8]) -> bool {
    if buf.len() >= 4 && &buf[0..4] == b"%PDF" {
@ -461,15 +543,31 @@ fn is_binary(buf: &[u8]) -> bool {

 /// Count the number of lines in the given buffer.
 #[inline(always)]
-fn count_lines(mut buf: &[u8]) -> u64 {
+fn count_lines(mut buf: &[u8], eol: u8) -> u64 {
    let mut count = 0;
-    while let Some(pos) = memchr(b'\n', buf) {
+    while let Some(pos) = memchr(eol, buf) {
        count += 1;
        buf = &buf[pos + 1..];
    }
    count
 }

+/// Replaces a with b in buf.
+fn replace_buf(buf: &mut [u8], a: u8, b: u8) {
+    if a == b {
+        return;
+    }
+    let mut pos = 0;
+    while let Some(i) = memchr(a, &buf[pos..]).map(|i| pos + i) {
+        buf[i] = b;
+        pos = i + 1;
+        while buf.get(pos) == Some(&a) {
+            buf[pos] = b;
+            pos += 1;
+        }
+    }
+}
+
 /// An "iterator" over lines in a particular buffer.
 ///
 /// Idiomatic Rust would borrow the buffer and use it as internal state to
@ -477,6 +575,7 @@ fn count_lines(mut buf: &[u8]) -> u64 {
 /// the borrow in the search code. (Because the borrow prevents composition
 /// through other mutable methods.)
 struct IterLines {
+    eol: u8,
    pos: usize,
 }

@ -485,8 +584,9 @@ impl IterLines {
    ///
    /// The buffer is passed to the `next` method.
    #[inline(always)]
-    fn new(start: usize) -> IterLines {
+    fn new(eol: u8, start: usize) -> IterLines {
        IterLines {
+            eol: eol,
            pos: start,
        }
    }
@ -497,7 +597,7 @@ impl IterLines {
    /// The range returned includes the new line.
    #[inline(always)]
    fn next(&mut self, buf: &[u8]) -> Option<(usize, usize)> {
-        match memchr(b'\n', &buf[self.pos..]) {
+        match memchr(self.eol, &buf[self.pos..]) {
            None => {
                if self.pos < buf.len() {
                    let start = self.pos;
@ -528,10 +628,13 @@ impl IterLines {
 /// The position returned corresponds to the first byte in the given line.
 #[inline(always)]
 fn start_of_previous_lines(
+    eol: u8,
    buf: &[u8],
    mut end: usize,
    mut count: usize,
 ) -> usize {
+    // TODO(burntsushi): This function needs to be badly simplified. The case
+    // analysis is impossible to follow.
    if buf[..end].is_empty() {
        return 0;
    }
@ -541,14 +644,14 @@ fn start_of_previous_lines(
    if end == buf.len() {
        end -= 1;
    }
-    if buf[end] == b'\n' {
+    if buf[end] == eol {
        if end == 0 {
            return end + 1;
        }
        end -= 1;
    }
    while count > 0 {
-        if buf[end] == b'\n' {
+        if buf[end] == eol {
            count -= 1;
            if count == 0 {
                return end + 1;
@ -559,7 +662,7 @@ fn start_of_previous_lines(
            end -= 1;
            continue;
        }
-        match memrchr(b'\n', &buf[..end]) {
+        match memrchr(eol, &buf[..end]) {
            None => {
                return 0;
            }
@ -567,7 +670,7 @@ fn start_of_previous_lines(
                count -= 1;
                end = i;
                if end == 0 {
-                    if buf[end] == b'\n' && count == 0 {
+                    if buf[end] == eol && count == 0 {
                        end += 1;
                    }
                    return end;
@ -579,10 +682,6 @@ fn start_of_previous_lines(
    end + 2
 }

-fn show(bytes: &[u8]) -> &str {
-    ::std::str::from_utf8(bytes).unwrap()
-}
-
 #[cfg(test)]
 mod tests {
    use std::io;
@ -668,102 +767,105 @@ fn main() {

    #[test]
    fn previous_lines() {
+        let eol = b'\n';
        let text = SHERLOCK.as_bytes();
        assert_eq!(366, text.len());

-        assert_eq!(0, start_of_previous_lines(text, 366, 100));
-        assert_eq!(366, start_of_previous_lines(text, 366, 0));
+        assert_eq!(0, start_of_previous_lines(eol, text, 366, 100));
+        assert_eq!(366, start_of_previous_lines(eol, text, 366, 0));

-        assert_eq!(321, start_of_previous_lines(text, 366, 1));
-        assert_eq!(321, start_of_previous_lines(text, 365, 1));
-        assert_eq!(321, start_of_previous_lines(text, 364, 1));
-        assert_eq!(321, start_of_previous_lines(text, 322, 1));
-        assert_eq!(321, start_of_previous_lines(text, 321, 1));
-        assert_eq!(258, start_of_previous_lines(text, 320, 1));
+        assert_eq!(321, start_of_previous_lines(eol, text, 366, 1));
+        assert_eq!(321, start_of_previous_lines(eol, text, 365, 1));
+        assert_eq!(321, start_of_previous_lines(eol, text, 364, 1));
+        assert_eq!(321, start_of_previous_lines(eol, text, 322, 1));
+        assert_eq!(321, start_of_previous_lines(eol, text, 321, 1));
+        assert_eq!(258, start_of_previous_lines(eol, text, 320, 1));

-        assert_eq!(258, start_of_previous_lines(text, 366, 2));
-        assert_eq!(258, start_of_previous_lines(text, 365, 2));
-        assert_eq!(258, start_of_previous_lines(text, 364, 2));
-        assert_eq!(258, start_of_previous_lines(text, 322, 2));
-        assert_eq!(258, start_of_previous_lines(text, 321, 2));
-        assert_eq!(193, start_of_previous_lines(text, 320, 2));
+        assert_eq!(258, start_of_previous_lines(eol, text, 366, 2));
+        assert_eq!(258, start_of_previous_lines(eol, text, 365, 2));
+        assert_eq!(258, start_of_previous_lines(eol, text, 364, 2));
+        assert_eq!(258, start_of_previous_lines(eol, text, 322, 2));
+        assert_eq!(258, start_of_previous_lines(eol, text, 321, 2));
+        assert_eq!(193, start_of_previous_lines(eol, text, 320, 2));

-        assert_eq!(65, start_of_previous_lines(text, 66, 1));
-        assert_eq!(0, start_of_previous_lines(text, 66, 2));
-        assert_eq!(64, start_of_previous_lines(text, 64, 0));
-        assert_eq!(0, start_of_previous_lines(text, 64, 1));
-        assert_eq!(0, start_of_previous_lines(text, 64, 2));
+        assert_eq!(65, start_of_previous_lines(eol, text, 66, 1));
+        assert_eq!(0, start_of_previous_lines(eol, text, 66, 2));
+        assert_eq!(64, start_of_previous_lines(eol, text, 64, 0));
+        assert_eq!(0, start_of_previous_lines(eol, text, 64, 1));
+        assert_eq!(0, start_of_previous_lines(eol, text, 64, 2));

-        assert_eq!(0, start_of_previous_lines(text, 0, 2));
-        assert_eq!(0, start_of_previous_lines(text, 0, 1));
+        assert_eq!(0, start_of_previous_lines(eol, text, 0, 2));
+        assert_eq!(0, start_of_previous_lines(eol, text, 0, 1));
    }

    #[test]
    fn previous_lines_short() {
+        let eol = b'\n';
        let text = &b"a\nb\nc\nd\ne\nf\n"[..];
        assert_eq!(12, text.len());

-        assert_eq!(10, start_of_previous_lines(text, 12, 1));
-        assert_eq!(8, start_of_previous_lines(text, 12, 2));
-        assert_eq!(6, start_of_previous_lines(text, 12, 3));
-        assert_eq!(4, start_of_previous_lines(text, 12, 4));
-        assert_eq!(2, start_of_previous_lines(text, 12, 5));
-        assert_eq!(0, start_of_previous_lines(text, 12, 6));
-        assert_eq!(0, start_of_previous_lines(text, 12, 7));
-        assert_eq!(10, start_of_previous_lines(text, 11, 1));
-        assert_eq!(8, start_of_previous_lines(text, 11, 2));
-        assert_eq!(6, start_of_previous_lines(text, 11, 3));
-        assert_eq!(4, start_of_previous_lines(text, 11, 4));
-        assert_eq!(2, start_of_previous_lines(text, 11, 5));
-        assert_eq!(0, start_of_previous_lines(text, 11, 6));
-        assert_eq!(0, start_of_previous_lines(text, 11, 7));
-        assert_eq!(10, start_of_previous_lines(text, 10, 1));
-        assert_eq!(8, start_of_previous_lines(text, 10, 2));
-        assert_eq!(6, start_of_previous_lines(text, 10, 3));
-        assert_eq!(4, start_of_previous_lines(text, 10, 4));
-        assert_eq!(2, start_of_previous_lines(text, 10, 5));
-        assert_eq!(0, start_of_previous_lines(text, 10, 6));
-        assert_eq!(0, start_of_previous_lines(text, 10, 7));
+        assert_eq!(10, start_of_previous_lines(eol, text, 12, 1));
+        assert_eq!(8, start_of_previous_lines(eol, text, 12, 2));
+        assert_eq!(6, start_of_previous_lines(eol, text, 12, 3));
+        assert_eq!(4, start_of_previous_lines(eol, text, 12, 4));
+        assert_eq!(2, start_of_previous_lines(eol, text, 12, 5));
+        assert_eq!(0, start_of_previous_lines(eol, text, 12, 6));
+        assert_eq!(0, start_of_previous_lines(eol, text, 12, 7));
+        assert_eq!(10, start_of_previous_lines(eol, text, 11, 1));
+        assert_eq!(8, start_of_previous_lines(eol, text, 11, 2));
+        assert_eq!(6, start_of_previous_lines(eol, text, 11, 3));
+        assert_eq!(4, start_of_previous_lines(eol, text, 11, 4));
+        assert_eq!(2, start_of_previous_lines(eol, text, 11, 5));
+        assert_eq!(0, start_of_previous_lines(eol, text, 11, 6));
+        assert_eq!(0, start_of_previous_lines(eol, text, 11, 7));
+        assert_eq!(10, start_of_previous_lines(eol, text, 10, 1));
+        assert_eq!(8, start_of_previous_lines(eol, text, 10, 2));
+        assert_eq!(6, start_of_previous_lines(eol, text, 10, 3));
+        assert_eq!(4, start_of_previous_lines(eol, text, 10, 4));
+        assert_eq!(2, start_of_previous_lines(eol, text, 10, 5));
+        assert_eq!(0, start_of_previous_lines(eol, text, 10, 6));
+        assert_eq!(0, start_of_previous_lines(eol, text, 10, 7));

-        assert_eq!(8, start_of_previous_lines(text, 9, 1));
-        assert_eq!(8, start_of_previous_lines(text, 8, 1));
+        assert_eq!(8, start_of_previous_lines(eol, text, 9, 1));
+        assert_eq!(8, start_of_previous_lines(eol, text, 8, 1));

-        assert_eq!(6, start_of_previous_lines(text, 7, 1));
-        assert_eq!(6, start_of_previous_lines(text, 6, 1));
+        assert_eq!(6, start_of_previous_lines(eol, text, 7, 1));
+        assert_eq!(6, start_of_previous_lines(eol, text, 6, 1));

-        assert_eq!(4, start_of_previous_lines(text, 5, 1));
-        assert_eq!(4, start_of_previous_lines(text, 4, 1));
+        assert_eq!(4, start_of_previous_lines(eol, text, 5, 1));
+        assert_eq!(4, start_of_previous_lines(eol, text, 4, 1));

-        assert_eq!(2, start_of_previous_lines(text, 3, 1));
-        assert_eq!(2, start_of_previous_lines(text, 2, 1));
+        assert_eq!(2, start_of_previous_lines(eol, text, 3, 1));
+        assert_eq!(2, start_of_previous_lines(eol, text, 2, 1));

-        assert_eq!(0, start_of_previous_lines(text, 1, 1));
-        assert_eq!(0, start_of_previous_lines(text, 0, 1));
+        assert_eq!(0, start_of_previous_lines(eol, text, 1, 1));
+        assert_eq!(0, start_of_previous_lines(eol, text, 0, 1));
    }

    #[test]
    fn previous_lines_empty() {
+        let eol = b'\n';
        let text = &b"\n\n\nd\ne\nf\n"[..];
        assert_eq!(9, text.len());

-        assert_eq!(7, start_of_previous_lines(text, 9, 1));
-        assert_eq!(5, start_of_previous_lines(text, 9, 2));
-        assert_eq!(3, start_of_previous_lines(text, 9, 3));
-        assert_eq!(2, start_of_previous_lines(text, 9, 4));
-        assert_eq!(1, start_of_previous_lines(text, 9, 5));
-        assert_eq!(0, start_of_previous_lines(text, 9, 6));
-        assert_eq!(0, start_of_previous_lines(text, 9, 7));
+        assert_eq!(7, start_of_previous_lines(eol, text, 9, 1));
+        assert_eq!(5, start_of_previous_lines(eol, text, 9, 2));
+        assert_eq!(3, start_of_previous_lines(eol, text, 9, 3));
+        assert_eq!(2, start_of_previous_lines(eol, text, 9, 4));
+        assert_eq!(1, start_of_previous_lines(eol, text, 9, 5));
+        assert_eq!(0, start_of_previous_lines(eol, text, 9, 6));
+        assert_eq!(0, start_of_previous_lines(eol, text, 9, 7));

        let text = &b"a\n\n\nd\ne\nf\n"[..];
        assert_eq!(10, text.len());

-        assert_eq!(8, start_of_previous_lines(text, 10, 1));
-        assert_eq!(6, start_of_previous_lines(text, 10, 2));
-        assert_eq!(4, start_of_previous_lines(text, 10, 3));
-        assert_eq!(3, start_of_previous_lines(text, 10, 4));
-        assert_eq!(2, start_of_previous_lines(text, 10, 5));
-        assert_eq!(0, start_of_previous_lines(text, 10, 6));
-        assert_eq!(0, start_of_previous_lines(text, 10, 7));
+        assert_eq!(8, start_of_previous_lines(eol, text, 10, 1));
+        assert_eq!(6, start_of_previous_lines(eol, text, 10, 2));
+        assert_eq!(4, start_of_previous_lines(eol, text, 10, 3));
+        assert_eq!(3, start_of_previous_lines(eol, text, 10, 4));
+        assert_eq!(2, start_of_previous_lines(eol, text, 10, 5));
+        assert_eq!(0, start_of_previous_lines(eol, text, 10, 6));
+        assert_eq!(0, start_of_previous_lines(eol, text, 10, 7));
    }

    #[test]
@ -776,6 +878,23 @@ fn main() {
 ");
    }

+    #[test]
+    fn binary() {
+        let text = "Sherlock\n\x00Holmes\n";
+        let (count, out) = search("Sherlock|Holmes", text, |s|s);
+        assert_eq!(0, count);
+        assert_eq!(out, "");
+    }
+
+
+    #[test]
+    fn binary_text() {
+        let text = "Sherlock\n\x00Holmes\n";
+        let (count, out) = search("Sherlock|Holmes", text, |s| s.text(true));
+        assert_eq!(2, count);
+        assert_eq!(out, "/baz.rs:Sherlock\n/baz.rs:Holmes\n");
+    }
+
    #[test]
    fn line_numbers() {
        let (count, out) = search_smallcap(
--- a/src/types.rs
+++ b/src/types.rs
@ -0,0 +1,358 @@
+/*!
+The types module provides a way of associating glob patterns on file names to
+file types.
+*/
+
+use std::collections::HashMap;
+use std::error::Error as StdError;
+use std::fmt;
+use std::path::Path;
+
+use gitignore::{self, Gitignore, GitignoreBuilder, Match, Pattern};
+
+const TYPE_EXTENSIONS: &'static [(&'static str, &'static [&'static str])] = &[
+    ("asm", &["*.asm", "*.s", "*.S"]),
+    ("awk", &["*.awk"]),
+    ("c", &["*.c", "*.h", "*.H"]),
+    ("cbor", &["*.cbor"]),
+    ("clojure", &["*.clj", "*.cljs"]),
+    ("cmake", &["CMakeLists.txt"]),
+    ("coffeescript", &["*.coffee"]),
+    ("cpp", &[
+        "*.C", "*.cc", "*.cpp", "*.cxx",
+        "*.h", "*.H", "*.hh", "*.hpp",
+    ]),
+    ("csharp", &["*.cs"]),
+    ("css", &["*.css"]),
+    ("cython", &["*.pyx"]),
+    ("dart", &["*.dart"]),
+    ("d", &["*.d"]),
+    ("elisp", &["*.el"]),
+    ("erlang", &["*.erl", "*.hrl"]),
+    ("fortran", &[
+        "*.f", "*.F", "*.f77", "*.F77", "*.pfo",
+        "*.f90", "*.F90", "*.f95", "*.F95",
+    ]),
+    ("go", &["*.go"]),
+    ("groovy", &["*.groovy"]),
+    ("haskell", &["*.hs", "*.lhs"]),
+    ("html", &["*.htm", "*.html"]),
+    ("java", &["*.java"]),
+    ("js", &["*.js"]),
+    ("json", &["*.json"]),
+    ("jsonl", &["*.jsonl"]),
+    ("lisp", &["*.el", "*.jl", "*.lisp", "*.lsp", "*.sc", "*.scm"]),
+    ("lua", &["*.lua"]),
+    ("m4", &["*.ac", "*.m4"]),
+    ("make", &["gnumakefile", "Gnumakefile", "makefile", "Makefile", "*.mk"]),
+    ("markdown", &["*.md"]),
+    ("matlab", &["*.m"]),
+    ("mk", &["mkfile"]),
+    ("ml", &["*.ml"]),
+    ("objc", &["*.h", "*.m"]),
+    ("objcpp", &["*.h", "*.mm"]),
+    ("ocaml", &["*.ml", "*.mli", "*.mll", "*.mly"]),
+    ("perl", &["*.perl", "*.pl", "*.PL", "*.plh", "*.plx", "*.pm"]),
+    ("php", &["*.php", "*.php3", "*.php4", "*.php5", "*.phtml"]),
+    ("py", &["*.py"]),
+    ("rr", &["*.R"]),
+    ("rst", &["*.rst"]),
+    ("ruby", &["*.rb"]),
+    ("rust", &["*.rs"]),
+    ("scala", &["*.scala"]),
+    ("sh", &["*.bash", "*.csh", "*.ksh", "*.sh", "*.tcsh"]),
+    ("sql", &["*.sql"]),
+    ("tex", &["*.tex", "*.cls", "*.sty"]),
+    ("txt", &["*.txt"]),
+    ("toml", &["*.toml", "Cargo.lock"]),
+    ("vala", &["*.vala"]),
+    ("vimscript", &["*.vim"]),
+    ("xml", &["*.xml"]),
+    ("yacc", &["*.y"]),
+    ("yaml", &["*.yaml", "*.yml"]),
+];
+
+/// Describes all the possible failure conditions for building a file type
+/// matcher.
+#[derive(Debug)]
+pub enum Error {
+    /// We tried to select (or negate) a file type that is not defined.
+    UnrecognizedFileType(String),
+    /// A user specified file type definition could not be parsed.
+    InvalidDefinition,
+    /// There was an error building the matcher (probably a bad glob).
+    Gitignore(gitignore::Error),
+}
+
+impl StdError for Error {
+    fn description(&self) -> &str {
+        match *self {
+            Error::UnrecognizedFileType(_) => "unrecognized file type",
+            Error::InvalidDefinition => "invalid definition",
+            Error::Gitignore(ref err) => err.description(),
+        }
+    }
+}
+
+impl fmt::Display for Error {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match *self {
+            Error::UnrecognizedFileType(ref ty) => {
+                write!(f, "unrecognized file type: {}", ty)
+            }
+            Error::InvalidDefinition => {
+                write!(f, "invalid definition (format is type:glob, e.g., \
+                           html:*.html)")
+            }
+            Error::Gitignore(ref err) => err.fmt(f),
+        }
+    }
+}
+
+impl From<gitignore::Error> for Error {
+    fn from(err: gitignore::Error) -> Error {
+        Error::Gitignore(err)
+    }
+}
+
+/// A single file type definition.
+#[derive(Clone, Debug)]
+pub struct FileTypeDef {
+    name: String,
+    pats: Vec<String>,
+}
+
+impl FileTypeDef {
+    /// Return the name of this file type.
+    pub fn name(&self) -> &str {
+        &self.name
+    }
+
+    /// Return the glob patterns used to recognize this file type.
+    pub fn patterns(&self) -> &[String] {
+        &self.pats
+    }
+}
+
+/// Types is a file type matcher.
+#[derive(Clone, Debug)]
+pub struct Types {
+    gi: Option<Gitignore>,
+    has_selected: bool,
+    unmatched_pat: Pattern,
+}
+
+impl Types {
+    /// Creates a new file type matcher from the given Gitignore matcher. If
+    /// not Gitignore matcher is provided, then the file type matcher has no
+    /// effect.
+    ///
+    /// If has_selected is true, then at least one file type was selected.
+    /// Therefore, any non-matches should be ignored.
+    fn new(gi: Option<Gitignore>, has_selected: bool) -> Types {
+        Types {
+            gi: gi,
+            has_selected: has_selected,
+            unmatched_pat: Pattern {
+                from: Path::new("<filetype>").to_path_buf(),
+                original: "<none>".to_string(),
+                pat: "<none>".to_string(),
+                whitelist: false,
+                only_dir: false,
+            },
+        }
+    }
+
+    /// Returns a match for the given path against this file type matcher.
+    ///
+    /// The path is considered whitelisted if it matches a selected file type.
+    /// The path is considered ignored if it matched a negated file type.
+    /// If at least one file type is selected and path doesn't match, then
+    /// the path is also considered ignored.
+    pub fn matched<P: AsRef<Path>>(&self, path: P, is_dir: bool) -> Match {
+        // File types don't apply to directories.
+        if is_dir {
+            return Match::None;
+        }
+        let path = path.as_ref();
+        self.gi.as_ref()
+            .map(|gi| {
+                let path = &*path.to_string_lossy();
+                let mat = gi.matched_utf8(path, is_dir).invert();
+                if self.has_selected && mat.is_none() {
+                    Match::Ignored(&self.unmatched_pat)
+                } else {
+                    mat
+                }
+            })
+            .unwrap_or(Match::None)
+    }
+}
+
+/// TypesBuilder builds a type matcher from a set of file type definitions and
+/// a set of file type selections.
+pub struct TypesBuilder {
+    types: HashMap<String, Vec<String>>,
+    select: Vec<String>,
+    select_not: Vec<String>,
+}
+
+impl TypesBuilder {
+    /// Create a new builder for a file type matcher.
+    pub fn new() -> TypesBuilder {
+        TypesBuilder {
+            types: HashMap::new(),
+            select: vec![],
+            select_not: vec![],
+        }
+    }
+
+    /// Build the current set of file type definitions *and* selections into
+    /// a file type matcher.
+    pub fn build(&self) -> Result<Types, Error> {
+        if self.select.is_empty() && self.select_not.is_empty() {
+            return Ok(Types::new(None, false));
+        }
+        let mut bgi = GitignoreBuilder::new("/");
+        for name in &self.select {
+            let globs = match self.types.get(name) {
+                Some(globs) => globs,
+                None => {
+                    return Err(Error::UnrecognizedFileType(name.to_string()));
+                }
+            };
+            for glob in globs {
+                try!(bgi.add("<filetype>", glob));
+            }
+        }
+        for name in &self.select_not {
+            let globs = match self.types.get(name) {
+                Some(globs) => globs,
+                None => {
+                    return Err(Error::UnrecognizedFileType(name.to_string()));
+                }
+            };
+            for glob in globs {
+                try!(bgi.add("<filetype>", &format!("!{}", glob)));
+            }
+        }
+        Ok(Types::new(Some(try!(bgi.build())), !self.select.is_empty()))
+    }
+
+    /// Return the set of current file type definitions.
+    pub fn definitions(&self) -> Vec<FileTypeDef> {
+        let mut defs = vec![];
+        for (ref name, ref pats) in &self.types {
+            let mut pats = pats.to_vec();
+            pats.sort();
+            defs.push(FileTypeDef {
+                name: name.to_string(),
+                pats: pats,
+            });
+        }
+        defs.sort_by(|def1, def2| def1.name().cmp(def2.name()));
+        defs
+    }
+
+    /// Select the file type given by `name`.
+    pub fn select(&mut self, name: &str) -> &mut TypesBuilder {
+        self.select.push(name.to_string());
+        self
+    }
+
+    /// Ignore the file type given by `name`.
+    pub fn select_not(&mut self, name: &str) -> &mut TypesBuilder {
+        self.select_not.push(name.to_string());
+        self
+    }
+
+    /// Clear any file type definitions for the type given.
+    pub fn clear(&mut self, name: &str) -> &mut TypesBuilder {
+        self.types.remove(name);
+        self
+    }
+
+    /// Add a new file type definition. `name` can be arbitrary and `pat`
+    /// should be a glob recognizing file paths belonging to the `name` type.
+    pub fn add(&mut self, name: &str, pat: &str) -> &mut TypesBuilder {
+        self.types.entry(name.to_string())
+            .or_insert(vec![]).push(pat.to_string());
+        self
+    }
+
+    /// Add a new file type definition specified in string form. The format
+    /// is `name:glob`. Names may not include a colon.
+    pub fn add_def(&mut self, def: &str) -> Result<(), Error> {
+        let name: String = def.chars().take_while(|&c| c != ':').collect();
+        let pat: String = def.chars().skip(name.chars().count() + 1).collect();
+        if name.is_empty() || pat.is_empty() {
+            return Err(Error::InvalidDefinition);
+        }
+        self.add(&name, &pat);
+        Ok(())
+    }
+
+    /// Add a set of default file type definitions.
+    pub fn add_defaults(&mut self) -> &mut TypesBuilder {
+        for &(name, exts) in TYPE_EXTENSIONS {
+            for ext in exts {
+                self.add(name, ext);
+            }
+        }
+        self
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::TypesBuilder;
+
+    macro_rules! matched {
+        ($name:ident, $types:expr, $sel:expr, $selnot:expr,
+         $path:expr) => {
+            matched!($name, $types, $sel, $selnot, $path, true);
+        };
+        (not, $name:ident, $types:expr, $sel:expr, $selnot:expr,
+         $path:expr) => {
+            matched!($name, $types, $sel, $selnot, $path, false);
+        };
+        ($name:ident, $types:expr, $sel:expr, $selnot:expr,
+         $path:expr, $matched:expr) => {
+            #[test]
+            fn $name() {
+                let mut btypes = TypesBuilder::new();
+                for tydef in $types {
+                    btypes.add_def(tydef).unwrap();
+                }
+                for sel in $sel {
+                    btypes.select(sel);
+                }
+                for selnot in $selnot {
+                    btypes.select_not(selnot);
+                }
+                let types = btypes.build().unwrap();
+                let mat = types.matched($path, false);
+                assert_eq!($matched, !mat.is_ignored());
+            }
+        };
+    }
+
+    fn types() -> Vec<&'static str> {
+        vec![
+            "html:*.html",
+            "html:*.htm",
+            "rust:*.rs",
+            "js:*.js",
+        ]
+    }
+
+    matched!(match1, types(), vec!["rust"], vec![], "lib.rs");
+    matched!(match2, types(), vec!["html"], vec![], "index.html");
+    matched!(match3, types(), vec!["html"], vec![], "index.htm");
+    matched!(match4, types(), vec!["html", "rust"], vec![], "main.rs");
+    matched!(match5, types(), vec![], vec![], "index.html");
+    matched!(match6, types(), vec![], vec!["rust"], "index.html");
+
+    matched!(not, matchnot1, types(), vec!["rust"], vec![], "index.html");
+    matched!(not, matchnot2, types(), vec![], vec!["rust"], "main.rs");
+}