2018-08-03 23:26:22 +02:00
|
|
|
use std::path::Path;
|
|
|
|
|
|
|
|
use ignore::{self, DirEntry};
|
2019-01-19 17:15:56 +02:00
|
|
|
use log;
|
2018-08-03 23:26:22 +02:00
|
|
|
|
|
|
|
/// A configuration for describing how subjects should be built.
|
|
|
|
#[derive(Clone, Debug)]
|
|
|
|
struct Config {
|
|
|
|
strip_dot_prefix: bool,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Default for Config {
|
|
|
|
fn default() -> Config {
|
2020-02-18 01:08:47 +02:00
|
|
|
Config { strip_dot_prefix: false }
|
2018-08-03 23:26:22 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// A builder for constructing things to search over.
|
|
|
|
#[derive(Clone, Debug)]
|
|
|
|
pub struct SubjectBuilder {
|
|
|
|
config: Config,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl SubjectBuilder {
|
|
|
|
/// Return a new subject builder with a default configuration.
|
|
|
|
pub fn new() -> SubjectBuilder {
|
|
|
|
SubjectBuilder { config: Config::default() }
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Create a new subject from a possibly missing directory entry.
|
|
|
|
///
|
|
|
|
/// If the directory entry isn't present, then the corresponding error is
|
|
|
|
/// logged if messages have been configured. Otherwise, if the subject is
|
|
|
|
/// deemed searchable, then it is returned.
|
|
|
|
pub fn build_from_result(
|
|
|
|
&self,
|
|
|
|
result: Result<DirEntry, ignore::Error>,
|
|
|
|
) -> Option<Subject> {
|
|
|
|
match result {
|
|
|
|
Ok(dent) => self.build(dent),
|
|
|
|
Err(err) => {
|
2019-01-26 22:42:55 +02:00
|
|
|
err_message!("{}", err);
|
2018-08-03 23:26:22 +02:00
|
|
|
None
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Create a new subject using this builder's configuration.
|
|
|
|
///
|
|
|
|
/// If a subject could not be created or should otherwise not be searched,
|
|
|
|
/// then this returns `None` after emitting any relevant log messages.
|
|
|
|
pub fn build(&self, dent: DirEntry) -> Option<Subject> {
|
|
|
|
let subj = Subject {
|
|
|
|
dent: dent,
|
|
|
|
strip_dot_prefix: self.config.strip_dot_prefix,
|
|
|
|
};
|
|
|
|
if let Some(ignore_err) = subj.dent.error() {
|
|
|
|
ignore_message!("{}", ignore_err);
|
|
|
|
}
|
binary: rejigger ripgrep's handling of binary files
This commit attempts to surface binary filtering in a slightly more
user friendly way. Namely, before, ripgrep would silently stop
searching a file if it detected a NUL byte, even if it had previously
printed a match. This can lead to the user quite reasonably assuming
that there are no more matches, since a partial search is fairly
unintuitive. (ripgrep has this behavior by default because it really
wants to NOT search binary files at all, just like it doesn't search
gitignored or hidden files.)
With this commit, if a match has already been printed and ripgrep detects
a NUL byte, then it will print a warning message indicating that the search
stopped prematurely.
Moreover, this commit adds a new flag, --binary, which causes ripgrep to
stop filtering binary files, but in a way that still avoids dumping
binary data into terminals. That is, the --binary flag makes ripgrep
behave more like grep's default behavior.
For files explicitly specified in a search, e.g., `rg foo some-file`,
then no binary filtering is applied (just like no gitignore and no
hidden file filtering is applied). Instead, ripgrep behaves as if you
gave the --binary flag for all explicitly given files.
This was a fairly invasive change, and potentially increases the UX
complexity of ripgrep around binary files. (Before, there were two
binary modes, where as now there are three.) However, ripgrep is now a
bit louder with warning messages when binary file detection might
otherwise be hiding potential matches, so hopefully this is a net
improvement.
Finally, the `-uuu` convenience now maps to `--no-ignore --hidden
--binary`, since this is closer to the actualy intent of the
`--unrestricted` flag, i.e., to reduce ripgrep's smart filtering. As a
consequence, `rg -uuu foo` should now search roughly the same number of
bytes as `grep -r foo`, and `rg -uuua foo` should search roughly the
same number of bytes as `grep -ra foo`. (The "roughly" weasel word is
used because grep's and ripgrep's binary file detection might differ
somewhat---perhaps based on buffer sizes---which can impact exactly what
is and isn't searched.)
See the numerous tests in tests/binary.rs for intended behavior.
Fixes #306, Fixes #855
2019-04-09 01:28:38 +02:00
|
|
|
// If this entry was explicitly provided by an end user, then we always
|
|
|
|
// want to search it.
|
|
|
|
if subj.is_explicit() {
|
2018-08-03 23:26:22 +02:00
|
|
|
return Some(subj);
|
|
|
|
}
|
binary: rejigger ripgrep's handling of binary files
This commit attempts to surface binary filtering in a slightly more
user friendly way. Namely, before, ripgrep would silently stop
searching a file if it detected a NUL byte, even if it had previously
printed a match. This can lead to the user quite reasonably assuming
that there are no more matches, since a partial search is fairly
unintuitive. (ripgrep has this behavior by default because it really
wants to NOT search binary files at all, just like it doesn't search
gitignored or hidden files.)
With this commit, if a match has already been printed and ripgrep detects
a NUL byte, then it will print a warning message indicating that the search
stopped prematurely.
Moreover, this commit adds a new flag, --binary, which causes ripgrep to
stop filtering binary files, but in a way that still avoids dumping
binary data into terminals. That is, the --binary flag makes ripgrep
behave more like grep's default behavior.
For files explicitly specified in a search, e.g., `rg foo some-file`,
then no binary filtering is applied (just like no gitignore and no
hidden file filtering is applied). Instead, ripgrep behaves as if you
gave the --binary flag for all explicitly given files.
This was a fairly invasive change, and potentially increases the UX
complexity of ripgrep around binary files. (Before, there were two
binary modes, where as now there are three.) However, ripgrep is now a
bit louder with warning messages when binary file detection might
otherwise be hiding potential matches, so hopefully this is a net
improvement.
Finally, the `-uuu` convenience now maps to `--no-ignore --hidden
--binary`, since this is closer to the actualy intent of the
`--unrestricted` flag, i.e., to reduce ripgrep's smart filtering. As a
consequence, `rg -uuu foo` should now search roughly the same number of
bytes as `grep -r foo`, and `rg -uuua foo` should search roughly the
same number of bytes as `grep -ra foo`. (The "roughly" weasel word is
used because grep's and ripgrep's binary file detection might differ
somewhat---perhaps based on buffer sizes---which can impact exactly what
is and isn't searched.)
See the numerous tests in tests/binary.rs for intended behavior.
Fixes #306, Fixes #855
2019-04-09 01:28:38 +02:00
|
|
|
// At this point, we only want to search something if it's explicitly a
|
2018-08-03 23:26:22 +02:00
|
|
|
// file. This omits symlinks. (If ripgrep was configured to follow
|
|
|
|
// symlinks, then they have already been followed by the directory
|
|
|
|
// traversal.)
|
|
|
|
if subj.is_file() {
|
|
|
|
return Some(subj);
|
|
|
|
}
|
|
|
|
// We got nothin. Emit a debug message, but only if this isn't a
|
|
|
|
// directory. Otherwise, emitting messages for directories is just
|
|
|
|
// noisy.
|
|
|
|
if !subj.is_dir() {
|
2019-01-19 17:15:56 +02:00
|
|
|
log::debug!(
|
2018-08-03 23:26:22 +02:00
|
|
|
"ignoring {}: failed to pass subject filter: \
|
|
|
|
file type: {:?}, metadata: {:?}",
|
2020-02-18 01:08:47 +02:00
|
|
|
subj.dent.path().display(),
|
|
|
|
subj.dent.file_type(),
|
|
|
|
subj.dent.metadata()
|
2018-08-03 23:26:22 +02:00
|
|
|
);
|
|
|
|
}
|
|
|
|
None
|
|
|
|
}
|
|
|
|
|
|
|
|
/// When enabled, if the subject's file path starts with `./` then it is
|
|
|
|
/// stripped.
|
|
|
|
///
|
|
|
|
/// This is useful when implicitly searching the current working directory.
|
|
|
|
pub fn strip_dot_prefix(&mut self, yes: bool) -> &mut SubjectBuilder {
|
|
|
|
self.config.strip_dot_prefix = yes;
|
|
|
|
self
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// A subject is a thing we want to search. Generally, a subject is either a
|
|
|
|
/// file or stdin.
|
|
|
|
#[derive(Clone, Debug)]
|
|
|
|
pub struct Subject {
|
|
|
|
dent: DirEntry,
|
|
|
|
strip_dot_prefix: bool,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Subject {
|
|
|
|
/// Return the file path corresponding to this subject.
|
|
|
|
///
|
|
|
|
/// If this subject corresponds to stdin, then a special `<stdin>` path
|
|
|
|
/// is returned instead.
|
|
|
|
pub fn path(&self) -> &Path {
|
|
|
|
if self.strip_dot_prefix && self.dent.path().starts_with("./") {
|
|
|
|
self.dent.path().strip_prefix("./").unwrap()
|
|
|
|
} else {
|
|
|
|
self.dent.path()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Returns true if and only if this entry corresponds to stdin.
|
|
|
|
pub fn is_stdin(&self) -> bool {
|
|
|
|
self.dent.is_stdin()
|
|
|
|
}
|
|
|
|
|
binary: rejigger ripgrep's handling of binary files
This commit attempts to surface binary filtering in a slightly more
user friendly way. Namely, before, ripgrep would silently stop
searching a file if it detected a NUL byte, even if it had previously
printed a match. This can lead to the user quite reasonably assuming
that there are no more matches, since a partial search is fairly
unintuitive. (ripgrep has this behavior by default because it really
wants to NOT search binary files at all, just like it doesn't search
gitignored or hidden files.)
With this commit, if a match has already been printed and ripgrep detects
a NUL byte, then it will print a warning message indicating that the search
stopped prematurely.
Moreover, this commit adds a new flag, --binary, which causes ripgrep to
stop filtering binary files, but in a way that still avoids dumping
binary data into terminals. That is, the --binary flag makes ripgrep
behave more like grep's default behavior.
For files explicitly specified in a search, e.g., `rg foo some-file`,
then no binary filtering is applied (just like no gitignore and no
hidden file filtering is applied). Instead, ripgrep behaves as if you
gave the --binary flag for all explicitly given files.
This was a fairly invasive change, and potentially increases the UX
complexity of ripgrep around binary files. (Before, there were two
binary modes, where as now there are three.) However, ripgrep is now a
bit louder with warning messages when binary file detection might
otherwise be hiding potential matches, so hopefully this is a net
improvement.
Finally, the `-uuu` convenience now maps to `--no-ignore --hidden
--binary`, since this is closer to the actualy intent of the
`--unrestricted` flag, i.e., to reduce ripgrep's smart filtering. As a
consequence, `rg -uuu foo` should now search roughly the same number of
bytes as `grep -r foo`, and `rg -uuua foo` should search roughly the
same number of bytes as `grep -ra foo`. (The "roughly" weasel word is
used because grep's and ripgrep's binary file detection might differ
somewhat---perhaps based on buffer sizes---which can impact exactly what
is and isn't searched.)
See the numerous tests in tests/binary.rs for intended behavior.
Fixes #306, Fixes #855
2019-04-09 01:28:38 +02:00
|
|
|
/// Returns true if and only if this entry corresponds to a subject to
|
|
|
|
/// search that was explicitly supplied by an end user.
|
|
|
|
///
|
|
|
|
/// Generally, this corresponds to either stdin or an explicit file path
|
|
|
|
/// argument. e.g., in `rg foo some-file ./some-dir/`, `some-file` is
|
|
|
|
/// an explicit subject, but, e.g., `./some-dir/some-other-file` is not.
|
|
|
|
///
|
|
|
|
/// However, note that ripgrep does not see through shell globbing. e.g.,
|
|
|
|
/// in `rg foo ./some-dir/*`, `./some-dir/some-other-file` will be treated
|
|
|
|
/// as an explicit subject.
|
|
|
|
pub fn is_explicit(&self) -> bool {
|
|
|
|
// stdin is obvious. When an entry has a depth of 0, that means it
|
|
|
|
// was explicitly provided to our directory iterator, which means it
|
|
|
|
// was in turn explicitly provided by the end user. The !is_dir check
|
|
|
|
// means that we want to search files even if their symlinks, again,
|
|
|
|
// because they were explicitly provided. (And we never want to try
|
|
|
|
// to search a directory.)
|
|
|
|
self.is_stdin() || (self.dent.depth() == 0 && !self.is_dir())
|
|
|
|
}
|
|
|
|
|
2019-01-26 22:42:55 +02:00
|
|
|
/// Returns true if and only if this subject points to a directory after
|
|
|
|
/// following symbolic links.
|
2018-08-03 23:26:22 +02:00
|
|
|
fn is_dir(&self) -> bool {
|
2019-01-26 22:42:55 +02:00
|
|
|
let ft = match self.dent.file_type() {
|
|
|
|
None => return false,
|
|
|
|
Some(ft) => ft,
|
|
|
|
};
|
|
|
|
if ft.is_dir() {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
// If this is a symlink, then we want to follow it to determine
|
|
|
|
// whether it's a directory or not.
|
|
|
|
self.dent.path_is_symlink() && self.dent.path().is_dir()
|
2018-08-03 23:26:22 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Returns true if and only if this subject points to a file.
|
|
|
|
fn is_file(&self) -> bool {
|
|
|
|
self.dent.file_type().map_or(false, |ft| ft.is_file())
|
|
|
|
}
|
|
|
|
}
|