diff --git a/Cargo.toml b/Cargo.toml index 2a079292..bac7e3f4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -45,6 +45,7 @@ termcolor = { version = "0.3.0", path = "termcolor" } [build-dependencies] clap = "2.18" lazy_static = "0.2" +regex = "0.2.1" [features] avx-accel = ["bytecount/avx-accel"] diff --git a/build.rs b/build.rs index 8a7c4900..4d0e2d13 100644 --- a/build.rs +++ b/build.rs @@ -2,6 +2,7 @@ extern crate clap; #[macro_use] extern crate lazy_static; +extern crate regex; use std::env; use std::fs; diff --git a/doc/rg.1 b/doc/rg.1 index cbc0c89e..1b66f2a2 100644 --- a/doc/rg.1 +++ b/doc/rg.1 @@ -1,4 +1,4 @@ -.\" Automatically generated by Pandoc 1.19.1 +.\" Automatically generated by Pandoc 1.19.2.1 .\" .TH "rg" "1" .hy @@ -275,11 +275,21 @@ Follow symlinks. .RS .RE .TP -.B \-m, \-\-max\-count NUM +.B \-m, \-\-max\-count \f[I]NUM\f[] Limit the number of matching lines per file searched to NUM. .RS .RE .TP +.B \-\-max\-filesize \f[I]NUM\f[]+\f[I]SUFFIX\f[]? +Ignore files larger than \f[I]NUM\f[] in size. +Directories will never be ignored. +.RS +.PP +\f[I]SUFFIX\f[] is optional and may be one of K, M or G. +These correspond to kilobytes, megabytes and gigabytes respectively. +If omitted the input is treated as bytes. +.RE +.TP .B \-\-maxdepth \f[I]NUM\f[] Descend at most NUM directories below the command line arguments. A value of zero searches only the starting\-points themselves. diff --git a/doc/rg.1.md b/doc/rg.1.md index 8804fcc5..830a3bb4 100644 --- a/doc/rg.1.md +++ b/doc/rg.1.md @@ -187,9 +187,16 @@ Project home page: https://github.com/BurntSushi/ripgrep -L, --follow : Follow symlinks. --m, --max-count NUM +-m, --max-count *NUM* : Limit the number of matching lines per file searched to NUM. +--max-filesize *NUM*+*SUFFIX*? +: Ignore files larger than *NUM* in size. Directories will never be ignored. + + *SUFFIX* is optional and may be one of K, M or G. These correspond to + kilobytes, megabytes and gigabytes respectively. If omitted the input is + treated as bytes. + --maxdepth *NUM* : Descend at most NUM directories below the command line arguments. A value of zero searches only the starting-points themselves. diff --git a/src/app.rs b/src/app.rs index 9ead9c5b..6d5177c4 100644 --- a/src/app.rs +++ b/src/app.rs @@ -1,6 +1,7 @@ use std::collections::HashMap; use clap::{App, AppSettings, Arg, ArgSettings}; +use regex::Regex; const ABOUT: &'static str = " ripgrep (rg) recursively searches your current directory for a regex pattern. @@ -145,6 +146,9 @@ fn app(next_line_help: bool, doc: F) -> App<'static, 'static> .arg(flag("max-count") .short("m").value_name("NUM").takes_value(true) .validator(validate_number)) + .arg(flag("max-filesize") + .value_name("NUM+SUFFIX?").takes_value(true) + .validator(validate_max_filesize)) .arg(flag("maxdepth") .value_name("NUM").takes_value(true) .validator(validate_number)) @@ -371,6 +375,13 @@ lazy_static! { doc!(h, "max-count", "Limit the number of matches.", "Limit the number of matching lines per file searched to NUM."); + doc!(h, "max-filesize", + "Ignore files larger than NUM in size.", + "Ignore files larger than NUM in size. Does not ignore directories. \ + \n\nThe input format accepts suffixes of K, M or G which \ + correspond to kilobytes, megabytes and gigabytes. If no suffix is \ + provided the input is treated as bytes. \ + \n\nExample: --max-filesize 50K or --max-filesize 80M"); doc!(h, "maxdepth", "Descend at most NUM directories.", "Limit the depth of directory traversal to NUM levels beyond \ @@ -491,3 +502,24 @@ lazy_static! { fn validate_number(s: String) -> Result<(), String> { s.parse::().map(|_|()).map_err(|err| err.to_string()) } + +fn validate_max_filesize(s: String) -> Result<(), String> { + let re = Regex::new(r#"^(\d+)([KMG])?$"#).unwrap(); + let caps = try!(re.captures(&s) + .ok_or("invalid format for max-filesize argument")); + + let value = caps.get(1); + let suffix = caps.get(2).map(|x| x.as_str()); + + match value { + Some(value) => { + try!(value.as_str().parse::().map_err(|err| err.to_string())); + } + None => () + } + + match suffix { + None | Some("K") | Some("M") | Some("G") => Ok(()), + _ => Err(From::from("invalid suffix for max-filesize argument")) + } +} diff --git a/src/args.rs b/src/args.rs index a968617f..5f6d9916 100644 --- a/src/args.rs +++ b/src/args.rs @@ -55,6 +55,7 @@ pub struct Args { line_number: bool, line_per_match: bool, max_count: Option, + max_filesize: Option, maxdepth: Option, mmap: bool, no_ignore: bool, @@ -285,6 +286,7 @@ impl Args { wd.follow_links(self.follow); wd.hidden(!self.hidden); wd.max_depth(self.maxdepth); + wd.max_filesize(self.max_filesize); wd.overrides(self.glob_overrides.clone()); wd.types(self.types.clone()); wd.git_global(!self.no_ignore && !self.no_ignore_vcs); @@ -342,6 +344,7 @@ impl<'a> ArgMatches<'a> { line_number: self.line_number(), line_per_match: self.is_present("vimgrep"), max_count: try!(self.usize_of("max-count")).map(|max| max as u64), + max_filesize: try!(self.max_filesize()), maxdepth: try!(self.usize_of("maxdepth")), mmap: mmap, no_ignore: self.no_ignore(), @@ -779,6 +782,33 @@ impl<'a> ArgMatches<'a> { btypes.build().map_err(From::from) } + /// Parses the max-filesize argument option into a byte count. + fn max_filesize(&self) -> Result> { + use regex::Regex; + + let max_filesize = match self.value_of_lossy("max-filesize") { + Some(x) => x, + None => return Ok(None) + }; + + let re = Regex::new(r#"^(\d+)([KMG])?$"#).unwrap(); + let caps = try!(re.captures(&max_filesize) + .ok_or("invalid format for max-filesize argument")); + + let value = match caps.get(1) { + Some(value) => Some(try!(value.as_str().parse::())), + None => None + }; + let suffix = caps.get(2).map(|x| x.as_str()); + match suffix { + None => Ok(value), + Some("K") => Ok(value.map(|x| x * 1024)), + Some("M") => Ok(value.map(|x| x * 1024 * 1024)), + Some("G") => Ok(value.map(|x| x * 1024 * 1024 * 1024)), + _ => Err(From::from("invalid suffix for max-filesize argument")) + } + } + /// Returns true if ignore files should be ignored. fn no_ignore(&self) -> bool { self.is_present("no-ignore") diff --git a/tests/tests.rs b/tests/tests.rs index aeacca54..fd60d672 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -432,6 +432,18 @@ sherlock!(context_line_numbers, "world|attached", assert_eq!(lines, expected); }); +sherlock!(max_filesize_parse_error_length, "Sherlock", ".", +|wd: WorkDir, mut cmd: Command| { + cmd.arg("--max-filesize").arg("44444444444444444444"); + wd.assert_err(&mut cmd); +}); + +sherlock!(max_filesize_parse_error_suffix, "Sherlock", ".", +|wd: WorkDir, mut cmd: Command| { + cmd.arg("--max-filesize").arg("45k"); + wd.assert_err(&mut cmd); +}); + sherlock!(ignore_hidden, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { wd.remove("sherlock"); wd.create(".sherlock", hay::SHERLOCK);