diff --git a/CHANGELOG.md b/CHANGELOG.md index ed4f5674..439c0b3d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -50,6 +50,8 @@ Bug fixes: Statically compile PCRE2 into macOS release artifacts on `aarch64`. * [BUG #3173](https://github.com/BurntSushi/ripgrep/issues/3173): Fix ancestor ignore filter bug when searching whitelisted hidden files. +* [BUG #3179](https://github.com/BurntSushi/ripgrep/issues/3179): + Fix gitignore bug when searching absolute paths with global gitignores. * [BUG #3180](https://github.com/BurntSushi/ripgrep/issues/3180): Fix a panicking bug when using `-U/--multiline` and `-r/--replace`. diff --git a/crates/core/flags/hiargs.rs b/crates/core/flags/hiargs.rs index a98bde23..526c91e6 100644 --- a/crates/core/flags/hiargs.rs +++ b/crates/core/flags/hiargs.rs @@ -45,6 +45,7 @@ pub(crate) struct HiArgs { context: ContextMode, context_separator: ContextSeparator, crlf: bool, + cwd: PathBuf, dfa_size_limit: Option, encoding: EncodingMode, engine: EngineChoice, @@ -262,6 +263,7 @@ impl HiArgs { context: low.context, context_separator: low.context_separator, crlf: low.crlf, + cwd: state.cwd, dfa_size_limit: low.dfa_size_limit, encoding: low.encoding, engine: low.engine, @@ -897,7 +899,8 @@ impl HiArgs { .git_ignore(!self.no_ignore_vcs) .git_exclude(!self.no_ignore_vcs && !self.no_ignore_exclude) .require_git(!self.no_require_git) - .ignore_case_insensitive(self.ignore_file_case_insensitive); + .ignore_case_insensitive(self.ignore_file_case_insensitive) + .current_dir(&self.cwd); if !self.no_ignore_dot { builder.add_custom_ignore_filename(".rgignore"); } @@ -947,10 +950,12 @@ impl State { fn new() -> anyhow::Result { use std::io::IsTerminal; + let cwd = current_dir()?; + log::debug!("read CWD from environment: {}", cwd.display()); Ok(State { is_terminal_stdout: std::io::stdout().is_terminal(), stdin_consumed: false, - cwd: current_dir()?, + cwd, }) } } diff --git a/crates/ignore/src/dir.rs b/crates/ignore/src/dir.rs index 8c1434aa..2591771e 100644 --- a/crates/ignore/src/dir.rs +++ b/crates/ignore/src/dir.rs @@ -118,6 +118,18 @@ struct IgnoreInner { /// The absolute base path of this matcher. Populated only if parent /// directories are added. absolute_base: Option>, + /// The directory that gitignores should be interpreted relative to. + /// + /// Usually this is the directory containing the gitignore file. But in + /// some cases, like for global gitignores or for gitignores specified + /// explicitly, this should generally be set to the current working + /// directory. This is only used for global gitignores or "explicit" + /// gitignores. + /// + /// When `None`, this means the CWD could not be determined or is unknown. + /// In this case, global gitignore files are ignored because they otherwise + /// cannot be matched correctly. + global_gitignores_relative_to: Option, /// Explicit global ignore matchers specified by the caller. explicit_ignores: Arc>, /// Ignore files used in addition to `.ignore` @@ -319,6 +331,10 @@ impl Ignore { parent: Some(self.clone()), is_absolute_parent: false, absolute_base: self.0.absolute_base.clone(), + global_gitignores_relative_to: self + .0 + .global_gitignores_relative_to + .clone(), explicit_ignores: self.0.explicit_ignores.clone(), custom_ignore_filenames: self.0.custom_ignore_filenames.clone(), custom_ignore_matcher: custom_ig_matcher, @@ -582,6 +598,16 @@ pub(crate) struct IgnoreBuilder { explicit_ignores: Vec, /// Ignore files in addition to .ignore. custom_ignore_filenames: Vec, + /// The directory that gitignores should be interpreted relative to. + /// + /// Usually this is the directory containing the gitignore file. But in + /// some cases, like for global gitignores or for gitignores specified + /// explicitly, this should generally be set to the current working + /// directory. This is only used for global gitignores or "explicit" + /// gitignores. + /// + /// When `None`, global gitignores are ignored. + global_gitignores_relative_to: Option, /// Ignore config. opts: IgnoreOptions, } @@ -589,8 +615,9 @@ pub(crate) struct IgnoreBuilder { impl IgnoreBuilder { /// Create a new builder for an `Ignore` matcher. /// - /// All relative file paths are resolved with respect to the current - /// working directory. + /// It is likely a bug to use this without also calling `current_dir()` + /// outside of tests. This isn't made mandatory because this is an internal + /// abstraction and it's annoying to update tests. pub(crate) fn new() -> IgnoreBuilder { IgnoreBuilder { dir: Path::new("").to_path_buf(), @@ -598,6 +625,7 @@ impl IgnoreBuilder { types: Arc::new(Types::empty()), explicit_ignores: vec![], custom_ignore_filenames: vec![], + global_gitignores_relative_to: None, opts: IgnoreOptions { hidden: true, ignore: true, @@ -616,10 +644,20 @@ impl IgnoreBuilder { /// The matcher returned won't match anything until ignore rules from /// directories are added to it. pub(crate) fn build(&self) -> Ignore { + self.build_with_cwd(None) + } + + /// Builds a new `Ignore` matcher using the given CWD directory. + /// + /// The matcher returned won't match anything until ignore rules from + /// directories are added to it. + pub(crate) fn build_with_cwd(&self, cwd: Option) -> Ignore { + let global_gitignores_relative_to = + cwd.or_else(|| self.global_gitignores_relative_to.clone()); let git_global_matcher = if !self.opts.git_global { Gitignore::empty() - } else { - let mut builder = GitignoreBuilder::new(""); + } else if let Some(ref cwd) = global_gitignores_relative_to { + let mut builder = GitignoreBuilder::new(cwd); builder .case_insensitive(self.opts.ignore_case_insensitive) .unwrap(); @@ -628,6 +666,11 @@ impl IgnoreBuilder { log::debug!("{}", err); } gi + } else { + log::debug!( + "ignoring global gitignore file because CWD is not known" + ); + Gitignore::empty() }; Ignore(Arc::new(IgnoreInner { @@ -638,6 +681,7 @@ impl IgnoreBuilder { parent: None, is_absolute_parent: true, absolute_base: None, + global_gitignores_relative_to, explicit_ignores: Arc::new(self.explicit_ignores.clone()), custom_ignore_filenames: Arc::new( self.custom_ignore_filenames.clone(), @@ -652,6 +696,15 @@ impl IgnoreBuilder { })) } + /// Set the current directory used for matching global gitignores. + pub(crate) fn current_dir( + &mut self, + cwd: impl Into, + ) -> &mut IgnoreBuilder { + self.global_gitignores_relative_to = Some(cwd.into()); + self + } + /// Add an override matcher. /// /// By default, no override matcher is used. diff --git a/crates/ignore/src/gitignore.rs b/crates/ignore/src/gitignore.rs index 17eb3bd8..e3c604f9 100644 --- a/crates/ignore/src/gitignore.rs +++ b/crates/ignore/src/gitignore.rs @@ -128,7 +128,10 @@ impl Gitignore { /// `$XDG_CONFIG_HOME/git/ignore` is read. If `$XDG_CONFIG_HOME` is not /// set or is empty, then `$HOME/.config/git/ignore` is used instead. pub fn global() -> (Gitignore, Option) { - GitignoreBuilder::new("").build_global() + match std::env::current_dir() { + Ok(cwd) => GitignoreBuilder::new(cwd).build_global(), + Err(err) => (Gitignore::empty(), Some(err.into())), + } } /// Creates a new empty gitignore matcher that never matches anything. diff --git a/crates/ignore/src/walk.rs b/crates/ignore/src/walk.rs index 3e06fb3e..f697c509 100644 --- a/crates/ignore/src/walk.rs +++ b/crates/ignore/src/walk.rs @@ -4,8 +4,8 @@ use std::{ fs::{self, FileType, Metadata}, io, path::{Path, PathBuf}, - sync::Arc, sync::atomic::{AtomicBool, AtomicUsize, Ordering as AtomicOrdering}, + sync::{Arc, OnceLock}, }; use { @@ -492,6 +492,18 @@ pub struct WalkBuilder { threads: usize, skip: Option>, filter: Option, + /// The directory that gitignores should be interpreted relative to. + /// + /// Usually this is the directory containing the gitignore file. But in + /// some cases, like for global gitignores or for gitignores specified + /// explicitly, this should generally be set to the current working + /// directory. This is only used for global gitignores or "explicit" + /// gitignores. + /// + /// When `None`, the CWD is fetched from `std::env::current_dir()`. If + /// that fails, then global gitignores are ignored (an error is logged). + global_gitignores_relative_to: + OnceLock>>, } #[derive(Clone)] @@ -512,8 +524,15 @@ impl std::fmt::Debug for WalkBuilder { .field("min_depth", &self.min_depth) .field("max_filesize", &self.max_filesize) .field("follow_links", &self.follow_links) + .field("same_file_system", &self.same_file_system) + .field("sorter", &"<...>") .field("threads", &self.threads) .field("skip", &self.skip) + .field("filter", &"<...>") + .field( + "global_gitignores_relative_to", + &self.global_gitignores_relative_to, + ) .finish() } } @@ -538,6 +557,7 @@ impl WalkBuilder { threads: 0, skip: None, filter: None, + global_gitignores_relative_to: OnceLock::new(), } } @@ -582,7 +602,10 @@ impl WalkBuilder { }) .collect::>() .into_iter(); - let ig_root = self.ig_builder.build(); + let ig_root = self + .get_or_set_current_dir() + .map(|cwd| self.ig_builder.build_with_cwd(Some(cwd.to_path_buf()))) + .unwrap_or_else(|| self.ig_builder.build()); Walk { its, it: None, @@ -600,9 +623,13 @@ impl WalkBuilder { /// Instead, the returned value must be run with a closure. e.g., /// `builder.build_parallel().run(|| |path| { println!("{path:?}"); WalkState::Continue })`. pub fn build_parallel(&self) -> WalkParallel { + let ig_root = self + .get_or_set_current_dir() + .map(|cwd| self.ig_builder.build_with_cwd(Some(cwd.to_path_buf()))) + .unwrap_or_else(|| self.ig_builder.build()); WalkParallel { paths: self.paths.clone().into_iter(), - ig_root: self.ig_builder.build(), + ig_root, max_depth: self.max_depth, min_depth: self.min_depth, max_filesize: self.max_filesize, @@ -679,12 +706,25 @@ impl WalkBuilder { /// /// This has lower precedence than all other sources of ignore rules. /// + /// # Errors + /// /// If there was a problem adding the ignore file, then an error is /// returned. Note that the error may indicate *partial* failure. For /// example, if an ignore file contains an invalid glob, all other globs /// are still applied. + /// + /// An error will also occur if this walker could not get the current + /// working directory (and `WalkBuilder::current_dir` isn't set). pub fn add_ignore>(&mut self, path: P) -> Option { - let mut builder = GitignoreBuilder::new(""); + let path = path.as_ref(); + let Some(cwd) = self.get_or_set_current_dir() else { + let err = std::io::Error::other(format!( + "CWD is not known, ignoring global gitignore {}", + path.display() + )); + return Some(err.into()); + }; + let mut builder = GitignoreBuilder::new(cwd); let mut errs = PartialErrorBuilder::default(); errs.maybe_push(builder.add(path)); match builder.build() { @@ -937,6 +977,55 @@ impl WalkBuilder { self.filter = Some(Filter(Arc::new(filter))); self } + + /// Set the current working directory used for matching global gitignores. + /// + /// If this is not set, then this walker will attempt to discover the + /// correct path from the environment's current working directory. If + /// that fails, then global gitignore files will be ignored. + /// + /// Global gitignore files come from things like a user's git configuration + /// or from gitignore files added via [`WalkBuilder::add_ignore`]. + pub fn current_dir( + &mut self, + cwd: impl Into, + ) -> &mut WalkBuilder { + let cwd = cwd.into(); + self.ig_builder.current_dir(cwd.clone()); + if let Err(cwd) = self.global_gitignores_relative_to.set(Ok(cwd)) { + // OK because `Err` from `set` implies a value exists. + *self.global_gitignores_relative_to.get_mut().unwrap() = cwd; + } + self + } + + /// Gets the currently configured CWD on this walk builder. + /// + /// This is "lazy." That is, we only ask for the CWD from the environment + /// if `WalkBuilder::current_dir` hasn't been called yet. And we ensure + /// that we only do it once. + fn get_or_set_current_dir(&self) -> Option<&Path> { + let result = self.global_gitignores_relative_to.get_or_init(|| { + let result = std::env::current_dir().map_err(Arc::new); + match result { + Ok(ref path) => { + log::trace!( + "automatically discovered CWD: {}", + path.display() + ); + } + Err(ref err) => { + log::debug!( + "failed to find CWD \ + (global gitignores will be ignored): \ + {err}" + ); + } + } + result + }); + result.as_ref().ok().map(|path| &**path) + } } /// Walk is a recursive directory iterator over file paths in one or more diff --git a/tests/regression.rs b/tests/regression.rs index 93e4ba67..a196b401 100644 --- a/tests/regression.rs +++ b/tests/regression.rs @@ -1655,6 +1655,56 @@ rgtest!(r3173_hidden_whitelist_only_dot, |dir: Dir, _: TestCommand| { eqnice!(cmd().args(&["--files", "./"]).stdout(), "./.foo.txt\n"); }); +// See: https://github.com/BurntSushi/ripgrep/issues/3179 +rgtest!(r3179_global_gitignore_cwd, |dir: Dir, mut cmd: TestCommand| { + dir.create_dir("a/b/c"); + dir.create("a/b/c/haystack", ""); + dir.create(".test.gitignore", "/haystack"); + + // I'm not sure in which cases this can fail. If it + // does and it's unavoidable, feel free to submit a + // patch that skips this test when this canonicalization + // fails. + // + // The reason we canonicalize here is strange, and it is + // perhaps papering over a bug in ripgrep. But on macOS, + // `TMPDIR` is set to `/var/blah/blah`. However, `/var` + // is symlinked to `/private/var`. So the CWD detected by + // the process is `/private/var`. So it turns out that the + // CWD is not a proper prefix of `dir.path()` here. So we + // cheat around this by forcing our path to be canonicalized + // so it's `/private/var` everywhere. + // + // Arguably, ripgrep should still work here without + // canonicalization. But it's not actually quite clear + // to me how to do it. I *believe* the solution here is + // that gitignore matching should be relative to the directory + // path given to `WalkBuider::{add,new}`, and *not* to the + // CWD. But this is a very big change to how `ignore` works + // I think. At least conceptually. So that will need to be + // something we do when we rewrite `ignore`. Sigh. + // + // ... but, on Windows, path canonicalization seems to + // totally fuck things up, so skip it there. HEAVY sigh. + let dir_path = if cfg!(windows) { + dir.path().to_path_buf() + } else { + dir.path().canonicalize().unwrap() + }; + let ignore_file_path = dir_path.join(".test.gitignore"); + cmd.current_dir("a/b/c") + .arg("--files") + .arg("--ignore-file") + .arg(ignore_file_path.display().to_string()) + // This is a key part of the reproduction. When just providing `.` + // to ignore's walker (as ripgrep does when a path to search isn't + // provided), then everything works as one expects. Because there's + // nothing to strip off of the paths being searched. But when one + // provides an absolute path, the stripping didn't work. + .arg(&dir_path) + .assert_err(); +}); + // See: https://github.com/BurntSushi/ripgrep/issues/3180 rgtest!(r3180_look_around_panic, |dir: Dir, mut cmd: TestCommand| { dir.create("haystack", " b b b b b b b b\nc\n");