From 40e310a9f9e21929acb5b0eb364ffad51df80608 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Sat, 25 Aug 2018 22:53:27 -0400 Subject: [PATCH] ripgrep: add --sort and --sortr flags These flags each accept one of five choices: none, path, modified, accessed or created. The value indicates how the results are sorted. For --sort, results are sorted in ascending order where as for --sortr, results are sorted in descending order. Closes #404 --- complete/_rg | 19 +++++- src/app.rs | 81 +++++++++++++++++++++++- src/args.rs | 169 +++++++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 258 insertions(+), 11 deletions(-) diff --git a/complete/_rg b/complete/_rg index 77f8394c..a3406471 100644 --- a/complete/_rg +++ b/complete/_rg @@ -188,8 +188,21 @@ _rg() { {-r+,--replace=}'[specify string used to replace matches]:replace string' + '(sort)' # File-sorting options - '(threads)--sort-files[sort results by file path (disables parallelism)]' - $no"--no-sort-files[don't sort results by file path]" + '(threads)--sort=[sort results in ascending order (disables parallelism)]:sort method:(( + none\:"no sorting" + path\:"sort by file path" + modified\:"sort by last modified time" + accessed\:"sort by last accessed time" + created\:"sort by creation time" + ))' + '(threads)--sortr=[sort results in descending order (disables parallelism)]:sort method:(( + none\:"no sorting" + path\:"sort by file path" + modified\:"sort by last modified time" + accessed\:"sort by last accessed time" + created\:"sort by creation time" + ))' + '!(threads)--sort-files[sort results by file path (disables parallelism)]' + '(stats)' # Statistics options '(--files file-match)--stats[show search statistics]' @@ -200,7 +213,7 @@ _rg() { $no"(--null-data)--no-text[don't search binary files as if they were text]" + '(threads)' # Thread-count options - '(--sort-files)'{-j+,--threads=}'[specify approximate number of threads to use]:number of threads' + '(sort)'{-j+,--threads=}'[specify approximate number of threads to use]:number of threads' + '(trim)' # Trim options '--trim[trim any ASCII whitespace prefix from each line]' diff --git a/src/app.rs b/src/app.rs index 1394f9d2..19306c2c 100644 --- a/src/app.rs +++ b/src/app.rs @@ -600,6 +600,8 @@ pub fn all_args_and_flags() -> Vec { flag_search_zip(&mut args); flag_smart_case(&mut args); flag_sort_files(&mut args); + flag_sort(&mut args); + flag_sortr(&mut args); flag_stats(&mut args); flag_text(&mut args); flag_threads(&mut args); @@ -1923,8 +1925,10 @@ This overrides the -s/--case-sensitive and -i/--ignore-case flags. } fn flag_sort_files(args: &mut Vec) { - const SHORT: &str = "Sort results by file path. Implies --threads=1."; + const SHORT: &str = "DEPRECATED"; const LONG: &str = long!("\ +DEPRECATED: Use --sort or --sortr instead. + Sort results by file path. Note that this currently disables all parallelism and runs search in a single thread. @@ -1932,12 +1936,83 @@ This flag can be disabled with --no-sort-files. "); let arg = RGArg::switch("sort-files") .help(SHORT).long_help(LONG) - .overrides("no-sort-files"); + .hidden() + .overrides("no-sort-files") + .overrides("sort") + .overrides("sortr"); args.push(arg); let arg = RGArg::switch("no-sort-files") .hidden() - .overrides("sort-files"); + .overrides("sort-files") + .overrides("sort") + .overrides("sortr"); + args.push(arg); +} + +fn flag_sort(args: &mut Vec) { + const SHORT: &str = + "Sort results in ascending order. Implies --threads=1."; + const LONG: &str = long!("\ +This flag enables sorting of results in ascending order. The possible values +for this flag are: + + path Sort by file path. + modified Sort by the last modified time on a file. + accessed Sort by the last accessed time on a file. + created Sort by the cretion time on a file. + none Do not sort results. + +If the sorting criteria isn't available on your system (for example, creation +time is not available on ext4 file systems), then ripgrep will attempt to +detect this and print an error without searching any results. Otherwise, the +sort order is unspecified. + +To sort results in reverse or descending order, use the --sortr flag. Also, +this flag overrides --sortr. + +Note that sorting results currently always forces ripgrep to abandon +parallelism and run in a single thread. +"); + let arg = RGArg::flag("sort", "SORTBY") + .help(SHORT).long_help(LONG) + .possible_values(&["path", "modified", "accessed", "created", "none"]) + .overrides("sortr") + .overrides("sort-files") + .overrides("no-sort-files"); + args.push(arg); +} + +fn flag_sortr(args: &mut Vec) { + const SHORT: &str = + "Sort results in descending order. Implies --threads=1."; + const LONG: &str = long!("\ +This flag enables sorting of results in descending order. The possible values +for this flag are: + + path Sort by file path. + modified Sort by the last modified time on a file. + accessed Sort by the last accessed time on a file. + created Sort by the cretion time on a file. + none Do not sort results. + +If the sorting criteria isn't available on your system (for example, creation +time is not available on ext4 file systems), then ripgrep will attempt to +detect this and print an error without searching any results. Otherwise, the +sort order is unspecified. + +To sort results in ascending order, use the --sort flag. Also, this flag +overrides --sort. + +Note that sorting results currently always forces ripgrep to abandon +parallelism and run in a single thread. +"); + let arg = RGArg::flag("sortr", "SORTBY") + .help(SHORT).long_help(LONG) + .possible_values(&["path", "modified", "accessed", "created", "none"]) + .overrides("sort") + .overrides("sort-files") + .overrides("no-sort-files"); args.push(arg); } diff --git a/src/args.rs b/src/args.rs index 6e79cb0a..674b2af5 100644 --- a/src/args.rs +++ b/src/args.rs @@ -1,10 +1,11 @@ use std::cmp; use std::env; use std::ffi::OsStr; -use std::fs::File; +use std::fs::{self, File}; use std::io::{self, BufRead}; use std::path::{Path, PathBuf}; use std::sync::Arc; +use std::time::SystemTime; use atty; use clap; @@ -360,6 +361,120 @@ enum OutputKind { JSON, } +/// The sort criteria, if present. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +struct SortBy { + /// Whether to reverse the sort criteria (i.e., descending order). + reverse: bool, + /// The actual sorting criteria. + kind: SortByKind, +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +enum SortByKind { + /// No sorting at all. + None, + /// Sort by path. + Path, + /// Sort by last modified time. + LastModified, + /// Sort by last accessed time. + LastAccessed, + /// Sort by creation time. + Created, +} + +impl SortBy { + fn asc(kind: SortByKind) -> SortBy { + SortBy { reverse: false, kind: kind } + } + + fn desc(kind: SortByKind) -> SortBy { + SortBy { reverse: true, kind: kind } + } + + fn none() -> SortBy { + SortBy::asc(SortByKind::None) + } + + /// Try to check that the sorting criteria selected is actually supported. + /// If it isn't, then an error is returned. + fn check(&self) -> Result<()> { + match self.kind { + SortByKind::None | SortByKind::Path => {} + SortByKind::LastModified => { + env::current_exe()?.metadata()?.modified()?; + } + SortByKind::LastAccessed => { + env::current_exe()?.metadata()?.accessed()?; + } + SortByKind::Created => { + env::current_exe()?.metadata()?.created()?; + } + } + Ok(()) + } + + fn configure_walk_builder(self, builder: &mut WalkBuilder) { + // This isn't entirely optimal. In particular, we will wind up issuing + // a stat for many files redundantly. Aside from having potentially + // inconsistent results with respect to sorting, this is also slow. + // We could fix this here at the expense of memory by caching stat + // calls. A better fix would be to find a way to push this down into + // directory traversal itself, but that's a somewhat nasty change. + match self.kind { + SortByKind::None => {} + SortByKind::Path => { + if self.reverse { + builder.sort_by_file_name(|a, b| a.cmp(b).reverse()); + } else { + builder.sort_by_file_name(|a, b| a.cmp(b)); + } + } + SortByKind::LastModified => { + builder.sort_by_file_path(move |a, b| { + sort_by_metadata_time( + a, b, + self.reverse, + |md| md.modified(), + ) + }); + } + SortByKind::LastAccessed => { + builder.sort_by_file_path(move |a, b| { + sort_by_metadata_time( + a, b, + self.reverse, + |md| md.accessed(), + ) + }); + } + SortByKind::Created => { + builder.sort_by_file_path(move |a, b| { + sort_by_metadata_time( + a, b, + self.reverse, + |md| md.created(), + ) + }); + } + } + } +} + +impl SortByKind { + fn new(kind: &str) -> SortByKind { + match kind { + "none" => SortByKind::None, + "path" => SortByKind::Path, + "modified" => SortByKind::LastModified, + "accessed" => SortByKind::LastAccessed, + "created" => SortByKind::Created, + _ => SortByKind::None, + } + } +} + impl ArgMatches { /// Create an ArgMatches from clap's parse result. fn new(clap_matches: clap::ArgMatches<'static>) -> ArgMatches { @@ -678,9 +793,9 @@ impl ArgMatches { if !self.no_ignore() { builder.add_custom_ignore_filename(".rgignore"); } - if self.is_present("sort-files") { - builder.sort_by_file_name(|a, b| a.cmp(b)); - } + let sortby = self.sort_by()?; + sortby.check()?; + sortby.configure_walk_builder(&mut builder); Ok(builder) } } @@ -1234,6 +1349,22 @@ impl ArgMatches { self.value_of_lossy("replace").map(|s| s.into_bytes()) } + /// Returns the sorting criteria based on command line parameters. + fn sort_by(&self) -> Result { + // For backcompat, continue supporting deprecated --sort-files flag. + if self.is_present("sort-files") { + return Ok(SortBy::asc(SortByKind::Path)); + } + let sortby = match self.value_of_lossy("sort") { + None => match self.value_of_lossy("sortr") { + None => return Ok(SortBy::none()), + Some(choice) => SortBy::desc(SortByKind::new(&choice)), + } + Some(choice) => SortBy::asc(SortByKind::new(&choice)), + }; + Ok(sortby) + } + /// Returns true if and only if aggregate statistics for a search should /// be tracked. /// @@ -1289,7 +1420,7 @@ impl ArgMatches { /// Return the number of threads that should be used for parallelism. fn threads(&self) -> Result { - if self.is_present("sort-files") { + if self.sort_by()?.kind != SortByKind::None { return Ok(1); } let threads = self.usize_of("threads")?.unwrap_or(0); @@ -1503,6 +1634,34 @@ fn u64_to_usize( } } +/// Builds a comparator for sorting two files according to a system time +/// extracted from the file's metadata. +/// +/// If there was a problem extracting the metadata or if the time is not +/// available, then both entries compare equal. +fn sort_by_metadata_time( + p1: &Path, + p2: &Path, + reverse: bool, + get_time: G, +) -> cmp::Ordering +where G: Fn(&fs::Metadata) -> io::Result +{ + let t1 = match p1.metadata().and_then(|md| get_time(&md)) { + Ok(t) => t, + Err(_) => return cmp::Ordering::Equal, + }; + let t2 = match p2.metadata().and_then(|md| get_time(&md)) { + Ok(t) => t, + Err(_) => return cmp::Ordering::Equal, + }; + if reverse { + t1.cmp(&t2).reverse() + } else { + t1.cmp(&t2) + } +} + /// Returns true if and only if stdin is deemed searchable. #[cfg(unix)] fn stdin_is_readable() -> bool {