summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authortiehuis <marctiehuis@gmail.com>2017-02-28 17:53:52 +1300
committerAndrew Gallant <jamslam@gmail.com>2017-03-08 10:17:18 -0500
commit714ae822418e13b1084d377654b3419f22a51866 (patch)
treeaf54173d5af5712b8c53645e026e55d7548cb908
parent49fd6687120ef11192847ef47191bbc6d32a2ed9 (diff)
Add `--max-filesize` option to cli
The --max-filesize option allows filtering files which are larger than the specified limit. This is potentially useful if one is attempting to search a number of large files without common file-types/suffixes. See #369.
-rw-r--r--Cargo.toml1
-rw-r--r--build.rs1
-rw-r--r--doc/rg.114
-rw-r--r--doc/rg.1.md9
-rw-r--r--src/app.rs32
-rw-r--r--src/args.rs30
-rw-r--r--tests/tests.rs12
7 files changed, 96 insertions, 3 deletions
diff --git a/Cargo.toml b/Cargo.toml
index 2a079292..bac7e3f4 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -45,6 +45,7 @@ termcolor = { version = "0.3.0", path = "termcolor" }
[build-dependencies]
clap = "2.18"
lazy_static = "0.2"
+regex = "0.2.1"
[features]
avx-accel = ["bytecount/avx-accel"]
diff --git a/build.rs b/build.rs
index 8a7c4900..4d0e2d13 100644
--- a/build.rs
+++ b/build.rs
@@ -2,6 +2,7 @@
extern crate clap;
#[macro_use]
extern crate lazy_static;
+extern crate regex;
use std::env;
use std::fs;
diff --git a/doc/rg.1 b/doc/rg.1
index cbc0c89e..1b66f2a2 100644
--- a/doc/rg.1
+++ b/doc/rg.1
@@ -1,4 +1,4 @@
-.\" Automatically generated by Pandoc 1.19.1
+.\" Automatically generated by Pandoc 1.19.2.1
.\"
.TH "rg" "1"
.hy
@@ -275,11 +275,21 @@ Follow symlinks.
.RS
.RE
.TP
-.B \-m, \-\-max\-count NUM
+.B \-m, \-\-max\-count \f[I]NUM\f[]
Limit the number of matching lines per file searched to NUM.
.RS
.RE
.TP
+.B \-\-max\-filesize \f[I]NUM\f[]+\f[I]SUFFIX\f[]?
+Ignore files larger than \f[I]NUM\f[] in size.
+Directories will never be ignored.
+.RS
+.PP
+\f[I]SUFFIX\f[] is optional and may be one of K, M or G.
+These correspond to kilobytes, megabytes and gigabytes respectively.
+If omitted the input is treated as bytes.
+.RE
+.TP
.B \-\-maxdepth \f[I]NUM\f[]
Descend at most NUM directories below the command line arguments.
A value of zero searches only the starting\-points themselves.
diff --git a/doc/rg.1.md b/doc/rg.1.md
index 8804fcc5..830a3bb4 100644
--- a/doc/rg.1.md
+++ b/doc/rg.1.md
@@ -187,9 +187,16 @@ Project home page: https://github.com/BurntSushi/ripgrep
-L, --follow
: Follow symlinks.
--m, --max-count NUM
+-m, --max-count *NUM*
: Limit the number of matching lines per file searched to NUM.
+--max-filesize *NUM*+*SUFFIX*?
+: Ignore files larger than *NUM* in size. Directories will never be ignored.
+
+ *SUFFIX* is optional and may be one of K, M or G. These correspond to
+ kilobytes, megabytes and gigabytes respectively. If omitted the input is
+ treated as bytes.
+
--maxdepth *NUM*
: Descend at most NUM directories below the command line arguments.
A value of zero searches only the starting-points themselves.
diff --git a/src/app.rs b/src/app.rs
index 9ead9c5b..6d5177c4 100644
--- a/src/app.rs
+++ b/src/app.rs
@@ -1,6 +1,7 @@
use std::collections::HashMap;
use clap::{App, AppSettings, Arg, ArgSettings};
+use regex::Regex;
const ABOUT: &'static str = "
ripgrep (rg) recursively searches your current directory for a regex pattern.
@@ -145,6 +146,9 @@ fn app<F>(next_line_help: bool, doc: F) -> App<'static, 'static>
.arg(flag("max-count")
.short("m").value_name("NUM").takes_value(true)
.validator(validate_number))
+ .arg(flag("max-filesize")
+ .value_name("NUM+SUFFIX?").takes_value(true)
+ .validator(validate_max_filesize))
.arg(flag("maxdepth")
.value_name("NUM").takes_value(true)
.validator(validate_number))
@@ -371,6 +375,13 @@ lazy_static! {
doc!(h, "max-count",
"Limit the number of matches.",
"Limit the number of matching lines per file searched to NUM.");
+ doc!(h, "max-filesize",
+ "Ignore files larger than NUM in size.",
+ "Ignore files larger than NUM in size. Does not ignore directories. \
+ \n\nThe input format accepts suffixes of K, M or G which \
+ correspond to kilobytes, megabytes and gigabytes. If no suffix is \
+ provided the input is treated as bytes. \
+ \n\nExample: --max-filesize 50K or --max-filesize 80M");
doc!(h, "maxdepth",
"Descend at most NUM directories.",
"Limit the depth of directory traversal to NUM levels beyond \
@@ -491,3 +502,24 @@ lazy_static! {
fn validate_number(s: String) -> Result<(), String> {
s.parse::<usize>().map(|_|()).map_err(|err| err.to_string())
}
+
+fn validate_max_filesize(s: String) -> Result<(), String> {
+ let re = Regex::new(r#"^(\d+)([KMG])?$"#).unwrap();
+ let caps = try!(re.captures(&s)
+ .ok_or("invalid format for max-filesize argument"));
+
+ let value = caps.get(1);
+ let suffix = caps.get(2).map(|x| x.as_str());
+
+ match value {
+ Some(value) => {
+ try!(value.as_str().parse::<u64>().map_err(|err| err.to_string()));
+ }
+ None => ()
+ }
+
+ match suffix {
+ None | Some("K") | Some("M") | Some("G") => Ok(()),
+ _ => Err(From::from("invalid suffix for max-filesize argument"))
+ }
+}
diff --git a/src/args.rs b/src/args.rs
index a968617f..5f6d9916 100644
--- a/src/args.rs
+++ b/src/args.rs
@@ -55,6 +55,7 @@ pub struct Args {
line_number: bool,
line_per_match: bool,
max_count: Option<u64>,
+ max_filesize: Option<u64>,
maxdepth: Option<usize>,
mmap: bool,
no_ignore: bool,
@@ -285,6 +286,7 @@ impl Args {
wd.follow_links(self.follow);
wd.hidden(!self.hidden);
wd.max_depth(self.maxdepth);
+ wd.max_filesize(self.max_filesize);
wd.overrides(self.glob_overrides.clone());
wd.types(self.types.clone());
wd.git_global(!self.no_ignore && !self.no_ignore_vcs);
@@ -342,6 +344,7 @@ impl<'a> ArgMatches<'a> {
line_number: self.line_number(),
line_per_match: self.is_present("vimgrep"),
max_count: try!(self.usize_of("max-count")).map(|max| max as u64),
+ max_filesize: try!(self.max_filesize()),
maxdepth: try!(self.usize_of("maxdepth")),
mmap: mmap,
no_ignore: self.no_ignore(),
@@ -779,6 +782,33 @@ impl<'a> ArgMatches<'a> {
btypes.build().map_err(From::from)
}
+ /// Parses the max-filesize argument option into a byte count.
+ fn max_filesize(&self) -> Result<Option<u64>> {
+ use regex::Regex;
+
+ let max_filesize = match self.value_of_lossy("max-filesize") {
+ Some(x) => x,
+ None => return Ok(None)
+ };
+
+ let re = Regex::new(r#"^(\d+)([KMG])?$"#).unwrap();
+ let caps = try!(re.captures(&max_filesize)
+ .ok_or("invalid format for max-filesize argument"));
+
+ let value = match caps.get(1) {
+ Some(value) => Some(try!(value.as_str().parse::<u64>())),
+ None => None
+ };
+ let suffix = caps.get(2).map(|x| x.as_str());
+ match suffix {
+ None => Ok(value),
+ Some("K") => Ok(value.map(|x| x * 1024)),
+ Some("M") => Ok(value.map(|x| x * 1024 * 1024)),
+ Some("G") => Ok(value.map(|x| x * 1024 * 1024 * 1024)),
+ _ => Err(From::from("invalid suffix for max-filesize argument"))
+ }
+ }
+
/// Returns true if ignore files should be ignored.
fn no_ignore(&self) -> bool {
self.is_present("no-ignore")
diff --git a/tests/tests.rs b/tests/tests.rs
index aeacca54..fd60d672 100644
--- a/tests/tests.rs
+++ b/tests/tests.rs
@@ -432,6 +432,18 @@ sherlock!(context_line_numbers, "world|attached",
assert_eq!(lines, expected);
});
+sherlock!(max_filesize_parse_error_length, "Sherlock", ".",
+|wd: WorkDir, mut cmd: Command| {
+ cmd.arg("--max-filesize").arg("44444444444444444444");
+ wd.assert_err(&mut cmd);
+});
+
+sherlock!(max_filesize_parse_error_suffix, "Sherlock", ".",
+|wd: WorkDir, mut cmd: Command| {
+ cmd.arg("--max-filesize").arg("45k");
+ wd.assert_err(&mut cmd);
+});
+
sherlock!(ignore_hidden, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
wd.remove("sherlock");
wd.create(".sherlock", hay::SHERLOCK);