mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2024-12-12 19:18:24 +02:00
ignore: add 'stdout' skipping to the walker
This commit adds a new 'skip_stdout' option to the directory walker. When enabled, it will skip yielding any directory entries that are believed to correspond to stdout for the current process. This is useful for filtering out 'results' in a command like 'grep -r foo > results' in order to avoid an unbounded feedback mechanism.
This commit is contained in:
parent
d599f0b3c7
commit
e5bb750995
@ -455,6 +455,7 @@ pub struct WalkBuilder {
|
|||||||
same_file_system: bool,
|
same_file_system: bool,
|
||||||
sorter: Option<Sorter>,
|
sorter: Option<Sorter>,
|
||||||
threads: usize,
|
threads: usize,
|
||||||
|
skip: Option<Arc<Handle>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
@ -472,6 +473,7 @@ impl fmt::Debug for WalkBuilder {
|
|||||||
.field("max_filesize", &self.max_filesize)
|
.field("max_filesize", &self.max_filesize)
|
||||||
.field("follow_links", &self.follow_links)
|
.field("follow_links", &self.follow_links)
|
||||||
.field("threads", &self.threads)
|
.field("threads", &self.threads)
|
||||||
|
.field("skip", &self.skip)
|
||||||
.finish()
|
.finish()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -493,6 +495,7 @@ impl WalkBuilder {
|
|||||||
same_file_system: false,
|
same_file_system: false,
|
||||||
sorter: None,
|
sorter: None,
|
||||||
threads: 0,
|
threads: 0,
|
||||||
|
skip: None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -535,6 +538,7 @@ impl WalkBuilder {
|
|||||||
ig_root: ig_root.clone(),
|
ig_root: ig_root.clone(),
|
||||||
ig: ig_root.clone(),
|
ig: ig_root.clone(),
|
||||||
max_filesize: self.max_filesize,
|
max_filesize: self.max_filesize,
|
||||||
|
skip: self.skip.clone(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -552,6 +556,7 @@ impl WalkBuilder {
|
|||||||
follow_links: self.follow_links,
|
follow_links: self.follow_links,
|
||||||
same_file_system: self.same_file_system,
|
same_file_system: self.same_file_system,
|
||||||
threads: self.threads,
|
threads: self.threads,
|
||||||
|
skip: self.skip.clone(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -792,6 +797,26 @@ impl WalkBuilder {
|
|||||||
self.same_file_system = yes;
|
self.same_file_system = yes;
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Do not yield directory entries that are believed to correspond to
|
||||||
|
/// stdout.
|
||||||
|
///
|
||||||
|
/// This is useful when a command is invoked via shell redirection to a
|
||||||
|
/// file that is also being read. For example, `grep -r foo ./ > results`
|
||||||
|
/// might end up trying to search `results` even though it is also writing
|
||||||
|
/// to it, which could cause an unbounded feedback loop. Setting this
|
||||||
|
/// option prevents this from happening by skipping over the `results`
|
||||||
|
/// file.
|
||||||
|
///
|
||||||
|
/// This is disabled by default.
|
||||||
|
pub fn skip_stdout(&mut self, yes: bool) -> &mut WalkBuilder {
|
||||||
|
if yes {
|
||||||
|
self.skip = stdout_handle().map(Arc::new);
|
||||||
|
} else {
|
||||||
|
self.skip = None;
|
||||||
|
}
|
||||||
|
self
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Walk is a recursive directory iterator over file paths in one or more
|
/// Walk is a recursive directory iterator over file paths in one or more
|
||||||
@ -806,6 +831,7 @@ pub struct Walk {
|
|||||||
ig_root: Ignore,
|
ig_root: Ignore,
|
||||||
ig: Ignore,
|
ig: Ignore,
|
||||||
max_filesize: Option<u64>,
|
max_filesize: Option<u64>,
|
||||||
|
skip: Option<Arc<Handle>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Walk {
|
impl Walk {
|
||||||
@ -818,12 +844,17 @@ impl Walk {
|
|||||||
WalkBuilder::new(path).build()
|
WalkBuilder::new(path).build()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn skip_entry(&self, ent: &walkdir::DirEntry) -> bool {
|
fn skip_entry(&self, ent: &DirEntry) -> Result<bool, Error> {
|
||||||
if ent.depth() == 0 {
|
if ent.depth() == 0 {
|
||||||
return false;
|
return Ok(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
let is_dir = ent.file_type().is_dir();
|
if let Some(ref stdout) = self.skip {
|
||||||
|
if path_equals(ent, stdout)? {
|
||||||
|
return Ok(true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let is_dir = ent.file_type().map_or(false, |ft| ft.is_dir());
|
||||||
let max_size = self.max_filesize;
|
let max_size = self.max_filesize;
|
||||||
let should_skip_path = skip_path(&self.ig, ent.path(), is_dir);
|
let should_skip_path = skip_path(&self.ig, ent.path(), is_dir);
|
||||||
let should_skip_filesize = if !is_dir && max_size.is_some() {
|
let should_skip_filesize = if !is_dir && max_size.is_some() {
|
||||||
@ -832,7 +863,7 @@ impl Walk {
|
|||||||
false
|
false
|
||||||
};
|
};
|
||||||
|
|
||||||
should_skip_path || should_skip_filesize
|
Ok(should_skip_path || should_skip_filesize)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -874,7 +905,12 @@ impl Iterator for Walk {
|
|||||||
self.ig = self.ig.parent().unwrap();
|
self.ig = self.ig.parent().unwrap();
|
||||||
}
|
}
|
||||||
Ok(WalkEvent::Dir(ent)) => {
|
Ok(WalkEvent::Dir(ent)) => {
|
||||||
if self.skip_entry(&ent) {
|
let mut ent = DirEntry::new_walkdir(ent, None);
|
||||||
|
let should_skip = match self.skip_entry(&ent) {
|
||||||
|
Err(err) => return Some(Err(err)),
|
||||||
|
Ok(should_skip) => should_skip,
|
||||||
|
};
|
||||||
|
if should_skip {
|
||||||
self.it.as_mut().unwrap().it.skip_current_dir();
|
self.it.as_mut().unwrap().it.skip_current_dir();
|
||||||
// Still need to push this on the stack because
|
// Still need to push this on the stack because
|
||||||
// we'll get a WalkEvent::Exit event for this dir.
|
// we'll get a WalkEvent::Exit event for this dir.
|
||||||
@ -885,13 +921,19 @@ impl Iterator for Walk {
|
|||||||
}
|
}
|
||||||
let (igtmp, err) = self.ig.add_child(ent.path());
|
let (igtmp, err) = self.ig.add_child(ent.path());
|
||||||
self.ig = igtmp;
|
self.ig = igtmp;
|
||||||
return Some(Ok(DirEntry::new_walkdir(ent, err)));
|
ent.err = err;
|
||||||
|
return Some(Ok(ent));
|
||||||
}
|
}
|
||||||
Ok(WalkEvent::File(ent)) => {
|
Ok(WalkEvent::File(ent)) => {
|
||||||
if self.skip_entry(&ent) {
|
let ent = DirEntry::new_walkdir(ent, None);
|
||||||
|
let should_skip = match self.skip_entry(&ent) {
|
||||||
|
Err(err) => return Some(Err(err)),
|
||||||
|
Ok(should_skip) => should_skip,
|
||||||
|
};
|
||||||
|
if should_skip {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
return Some(Ok(DirEntry::new_walkdir(ent, None)));
|
return Some(Ok(ent));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -993,6 +1035,7 @@ pub struct WalkParallel {
|
|||||||
follow_links: bool,
|
follow_links: bool,
|
||||||
same_file_system: bool,
|
same_file_system: bool,
|
||||||
threads: usize,
|
threads: usize,
|
||||||
|
skip: Option<Arc<Handle>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl WalkParallel {
|
impl WalkParallel {
|
||||||
@ -1080,6 +1123,7 @@ impl WalkParallel {
|
|||||||
max_depth: self.max_depth,
|
max_depth: self.max_depth,
|
||||||
max_filesize: self.max_filesize,
|
max_filesize: self.max_filesize,
|
||||||
follow_links: self.follow_links,
|
follow_links: self.follow_links,
|
||||||
|
skip: self.skip.clone(),
|
||||||
};
|
};
|
||||||
handles.push(thread::spawn(|| worker.run()));
|
handles.push(thread::spawn(|| worker.run()));
|
||||||
}
|
}
|
||||||
@ -1208,6 +1252,9 @@ struct Worker {
|
|||||||
/// Whether to follow symbolic links or not. When this is enabled, loop
|
/// Whether to follow symbolic links or not. When this is enabled, loop
|
||||||
/// detection is performed.
|
/// detection is performed.
|
||||||
follow_links: bool,
|
follow_links: bool,
|
||||||
|
/// A file handle to skip, currently is either `None` or stdout, if it's
|
||||||
|
/// a file and it has been requested to skip files identical to stdout.
|
||||||
|
skip: Option<Arc<Handle>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Worker {
|
impl Worker {
|
||||||
@ -1335,6 +1382,15 @@ impl Worker {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if let Some(ref stdout) = self.skip {
|
||||||
|
let is_stdout = match path_equals(&dent, stdout) {
|
||||||
|
Ok(is_stdout) => is_stdout,
|
||||||
|
Err(err) => return (self.f)(Err(err)),
|
||||||
|
};
|
||||||
|
if is_stdout {
|
||||||
|
return WalkState::Continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
let is_dir = dent.is_dir();
|
let is_dir = dent.is_dir();
|
||||||
let max_size = self.max_filesize;
|
let max_size = self.max_filesize;
|
||||||
let should_skip_path = skip_path(ig, dent.path(), is_dir);
|
let should_skip_path = skip_path(ig, dent.path(), is_dir);
|
||||||
@ -1541,6 +1597,52 @@ fn skip_path(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns a handle to stdout for filtering search.
|
||||||
|
///
|
||||||
|
/// A handle is returned if and only if stdout is being redirected to a file.
|
||||||
|
/// The handle returned corresponds to that file.
|
||||||
|
///
|
||||||
|
/// This can be used to ensure that we do not attempt to search a file that we
|
||||||
|
/// may also be writing to.
|
||||||
|
fn stdout_handle() -> Option<Handle> {
|
||||||
|
let h = match Handle::stdout() {
|
||||||
|
Err(_) => return None,
|
||||||
|
Ok(h) => h,
|
||||||
|
};
|
||||||
|
let md = match h.as_file().metadata() {
|
||||||
|
Err(_) => return None,
|
||||||
|
Ok(md) => md,
|
||||||
|
};
|
||||||
|
if !md.is_file() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
Some(h)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns true if and only if the given directory entry is believed to be
|
||||||
|
/// equivalent to the given handle. If there was a problem querying the path
|
||||||
|
/// for information to determine equality, then that error is returned.
|
||||||
|
fn path_equals(dent: &DirEntry, handle: &Handle) -> Result<bool, Error> {
|
||||||
|
#[cfg(unix)]
|
||||||
|
fn never_equal(dent: &DirEntry, handle: &Handle) -> bool {
|
||||||
|
dent.ino() != Some(handle.ino())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(not(unix))]
|
||||||
|
fn never_equal(_: &DirEntry, _: &Handle) -> bool {
|
||||||
|
false
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we know for sure that these two things aren't equal, then avoid
|
||||||
|
// the costly extra stat call to determine equality.
|
||||||
|
if dent.is_stdin() || never_equal(dent, handle) {
|
||||||
|
return Ok(false);
|
||||||
|
}
|
||||||
|
Handle::from_path(dent.path())
|
||||||
|
.map(|h| &h == handle)
|
||||||
|
.map_err(|err| Error::Io(err).with_path(dent.path()))
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns true if and only if the given path is on the same device as the
|
/// Returns true if and only if the given path is on the same device as the
|
||||||
/// given root device.
|
/// given root device.
|
||||||
fn is_same_file_system(root_device: u64, path: &Path) -> Result<bool, Error> {
|
fn is_same_file_system(root_device: u64, path: &Path) -> Result<bool, Error> {
|
||||||
|
Loading…
Reference in New Issue
Block a user