summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrew Gallant <jamslam@gmail.com>2018-09-04 22:45:24 -0400
committerAndrew Gallant <jamslam@gmail.com>2018-09-04 23:18:55 -0400
commit241bc8f8fcfdc725afa65ee539c37960b10550b1 (patch)
treed5ae44155e4d3c91f6a70c5a23f2d2d29d99d668
parentb6e30124e07fb3b3530bae2c0cf19e0893aa9831 (diff)
ripgrep: add --pre-glob flag
The --pre-glob flag is like the --glob flag, except it applies to filtering files through the preprocessor instead of for search. This makes it possible to apply the preprocessor to only a small subset of files, which can greatly reduce the process overhead of using a preprocessor when searching large directories.
-rw-r--r--CHANGELOG.md2
-rw-r--r--complete/_rg3
-rw-r--r--src/app.rs150
-rw-r--r--src/args.rs12
-rw-r--r--src/search.rs33
-rw-r--r--tests/misc.rs18
6 files changed, 159 insertions, 59 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 226ca6a7..f757f550 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -46,6 +46,8 @@ Feature enhancements:
The `--passthru` flag now works with the `--replace` flag.
* FEATURE:
Add `--line-buffered` and `--block-buffered` for forcing a buffer strategy.
+* FEATURE:
+ Add `--pre-glob` for filtering files through the `--pre` flag.
Bug fixes:
diff --git a/complete/_rg b/complete/_rg
index 2bbdf992..6f7b0ef8 100644
--- a/complete/_rg
+++ b/complete/_rg
@@ -183,6 +183,9 @@ _rg() {
'(-z --search-zip)--pre=[specify preprocessor utility]:preprocessor utility:_command_names -e'
$no'--no-pre[disable preprocessor utility]'
+ + pre-glob # Preprocessing glob options
+ '*--pre-glob[include/exclude files for preprocessing with --pre]'
+
+ '(pretty-vimgrep)' # Pretty/vimgrep display options
'(heading)'{-p,--pretty}'[alias for --color=always --heading -n]'
'(heading passthru)--vimgrep[show results in vim-compatible format]'
diff --git a/src/app.rs b/src/app.rs
index e0e1eda7..03239277 100644
--- a/src/app.rs
+++ b/src/app.rs
@@ -598,6 +598,7 @@ pub fn all_args_and_flags() -> Vec<RGArg> {
flag_passthru(&mut args);
flag_pcre2(&mut args);
flag_pre(&mut args);
+ flag_pre_glob(&mut args);
flag_pretty(&mut args);
flag_quiet(&mut args);
flag_regex_size_limit(&mut args);
@@ -1819,6 +1820,97 @@ This flag can be disabled with --no-pcre2.
args.push(arg);
}
+fn flag_pre(args: &mut Vec<RGArg>) {
+ const SHORT: &str = "search outputs of COMMAND FILE for each FILE";
+ const LONG: &str = long!("\
+For each input FILE, search the standard output of COMMAND FILE rather than the
+contents of FILE. This option expects the COMMAND program to either be an
+absolute path or to be available in your PATH. Either an empty string COMMAND
+or the `--no-pre` flag will disable this behavior.
+
+ WARNING: When this flag is set, ripgrep will unconditionally spawn a
+ process for every file that is searched. Therefore, this can incur an
+ unnecessarily large performance penalty if you don't otherwise need the
+ flexibility offered by this flag.
+
+A preprocessor is not run when ripgrep is searching stdin.
+
+When searching over sets of files that may require one of several decoders
+as preprocessors, COMMAND should be a wrapper program or script which first
+classifies FILE based on magic numbers/content or based on the FILE name and
+then dispatches to an appropriate preprocessor. Each COMMAND also has its
+standard input connected to FILE for convenience.
+
+For example, a shell script for COMMAND might look like:
+
+ case \"$1\" in
+ *.pdf)
+ exec pdftotext \"$1\" -
+ ;;
+ *)
+ case $(file \"$1\") in
+ *Zstandard*)
+ exec pzstd -cdq
+ ;;
+ *)
+ exec cat
+ ;;
+ esac
+ ;;
+ esac
+
+The above script uses `pdftotext` to convert a PDF file to plain text. For
+all other files, the script uses the `file` utility to sniff the type of the
+file based on its contents. If it is a compressed file in the Zstandard format,
+then `pzstd` is used to decompress the contents to stdout.
+
+This overrides the -z/--search-zip flag.
+");
+ let arg = RGArg::flag("pre", "COMMAND")
+ .help(SHORT).long_help(LONG)
+ .overrides("no-pre")
+ .overrides("search-zip");
+ args.push(arg);
+
+ let arg = RGArg::switch("no-pre")
+ .hidden()
+ .overrides("pre");
+ args.push(arg);
+}
+
+fn flag_pre_glob(args: &mut Vec<RGArg>) {
+ const SHORT: &str =
+ "Include or exclude files from a preprocessing command.";
+ const LONG: &str = long!("\
+This flag works in conjunction with the --pre flag. Namely, when one or more
+--pre-glob flags are given, then only files that match the given set of globs
+will be handed to the command specified by the --pre flag. Any non-matching
+files will be searched without using the preprocessor command.
+
+This flag is useful when searching many files with the --pre flag. Namely,
+it permits the ability to avoid process overhead for files that don't need
+preprocessing. For example, given the following shell script, 'pre-pdftotext':
+
+ #!/bin/sh
+
+ pdftotext \"$1\" -
+
+then it is possible to use '--pre pre-pdftotext --pre-glob \'*.pdf\'' to make
+it so ripgrep only executes the 'pre-pdftotext' command on files with a '.pdf'
+extension.
+
+Multiple --pre-glob flags may be used. Globbing rules match .gitignore globs.
+Precede a glob with a ! to exclude it.
+
+This flag has no effect if the --pre flag is not used.
+");
+ let arg = RGArg::flag("pre-glob", "GLOB")
+ .help(SHORT).long_help(LONG)
+ .multiple()
+ .allow_leading_hyphen();
+ args.push(arg);
+}
+
fn flag_pretty(args: &mut Vec<RGArg>) {
const SHORT: &str = "Alias for --color always --heading --line-number.";
const LONG: &str = long!("\
@@ -1924,64 +2016,6 @@ This flag can be disabled with --no-search-zip.
args.push(arg);
}
-fn flag_pre(args: &mut Vec<RGArg>) {
- const SHORT: &str = "search outputs of COMMAND FILE for each FILE";
- const LONG: &str = long!("\
-For each input FILE, search the standard output of COMMAND FILE rather than the
-contents of FILE. This option expects the COMMAND program to either be an
-absolute path or to be available in your PATH. Either an empty string COMMAND
-or the `--no-pre` flag will disable this behavior.
-
- WARNING: When this flag is set, ripgrep will unconditionally spawn a
- process for every file that is searched. Therefore, this can incur an
- unnecessarily large performance penalty if you don't otherwise need the
- flexibility offered by this flag.
-
-A preprocessor is not run when ripgrep is searching stdin.
-
-When searching over sets of files that may require one of several decoders
-as preprocessors, COMMAND should be a wrapper program or script which first
-classifies FILE based on magic numbers/content or based on the FILE name and
-then dispatches to an appropriate preprocessor. Each COMMAND also has its
-standard input connected to FILE for convenience.
-
-For example, a shell script for COMMAND might look like:
-
- case \"$1\" in
- *.pdf)
- exec pdftotext \"$1\" -
- ;;
- *)
- case $(file \"$1\") in
- *Zstandard*)
- exec pzstd -cdq
- ;;
- *)
- exec cat
- ;;
- esac
- ;;
- esac
-
-The above script uses `pdftotext` to convert a PDF file to plain text. For
-all other files, the script uses the `file` utility to sniff the type of the
-file based on its contents. If it is a compressed file in the Zstandard format,
-then `pzstd` is used to decompress the contents to stdout.
-
-This overrides the -z/--search-zip flag.
-");
- let arg = RGArg::flag("pre", "COMMAND")
- .help(SHORT).long_help(LONG)
- .overrides("no-pre")
- .overrides("search-zip");
- args.push(arg);
-
- let arg = RGArg::switch("no-pre")
- .hidden()
- .overrides("pre");
- args.push(arg);
-}
-
fn flag_smart_case(args: &mut Vec<RGArg>) {
const SHORT: &str = "Smart case search.";
const LONG: &str = long!("\
diff --git a/src/args.rs b/src/args.rs
index f8a29cae..1a38d3ef 100644
--- a/src/args.rs
+++ b/src/args.rs
@@ -285,6 +285,7 @@ impl Args {
builder
.json_stats(self.matches().is_present("json"))
.preprocessor(self.matches().preprocessor())
+ .preprocessor_globs(self.matches().preprocessor_globs()?)
.search_zip(self.matches().is_present("search-zip"));
Ok(builder.build(matcher, searcher, printer))
}
@@ -1323,6 +1324,17 @@ impl ArgMatches {
Some(Path::new(path).to_path_buf())
}
+ /// Builds the set of globs for filtering files to apply to the --pre
+ /// flag. If no --pre-globs are available, then this always returns an
+ /// empty set of globs.
+ fn preprocessor_globs(&self) -> Result<Override> {
+ let mut builder = OverrideBuilder::new(env::current_dir()?);
+ for glob in self.values_of_lossy_vec("pre-glob") {
+ builder.add(&glob)?;
+ }
+ Ok(builder.build()?)
+ }
+
/// Parse the regex-size-limit argument option into a byte count.
fn regex_size_limit(&self) -> Result<Option<usize>> {
let r = self.parse_human_readable_size("regex-size-limit")?;
diff --git a/src/search.rs b/src/search.rs
index 457f8f7a..9baf513f 100644
--- a/src/search.rs
+++ b/src/search.rs
@@ -11,6 +11,7 @@ use grep::pcre2::{RegexMatcher as PCRE2RegexMatcher};
use grep::printer::{JSON, Standard, Summary, Stats};
use grep::regex::{RegexMatcher as RustRegexMatcher};
use grep::searcher::Searcher;
+use ignore::overrides::Override;
use serde_json as json;
use termcolor::WriteColor;
@@ -23,6 +24,7 @@ use subject::Subject;
struct Config {
json_stats: bool,
preprocessor: Option<PathBuf>,
+ preprocessor_globs: Override,
search_zip: bool,
}
@@ -31,6 +33,7 @@ impl Default for Config {
Config {
json_stats: false,
preprocessor: None,
+ preprocessor_globs: Override::empty(),
search_zip: false,
}
}
@@ -108,6 +111,17 @@ impl SearchWorkerBuilder {
self
}
+ /// Set the globs for determining which files should be run through the
+ /// preprocessor. By default, with no globs and a preprocessor specified,
+ /// every file is run through the preprocessor.
+ pub fn preprocessor_globs(
+ &mut self,
+ globs: Override,
+ ) -> &mut SearchWorkerBuilder {
+ self.config.preprocessor_globs = globs;
+ self
+ }
+
/// Enable the decompression and searching of common compressed files.
///
/// When enabled, if a particular file path is recognized as a compressed
@@ -298,7 +312,7 @@ impl<W: WriteColor> SearchWorker<W> {
let stdin = io::stdin();
// A `return` here appeases the borrow checker. NLL will fix this.
return self.search_reader(path, stdin.lock());
- } else if self.config.preprocessor.is_some() {
+ } else if self.should_preprocess(path) {
self.search_preprocessor(path)
} else if self.should_decompress(path) {
self.search_decompress(path)
@@ -316,6 +330,20 @@ impl<W: WriteColor> SearchWorker<W> {
self.decomp_builder.get_matcher().has_command(path)
}
+ /// Returns true if and only if the given file path should be run through
+ /// the preprocessor.
+ fn should_preprocess(&self, path: &Path) -> bool {
+ if !self.config.preprocessor.is_some() {
+ return false;
+ }
+ if self.config.preprocessor_globs.is_empty() {
+ return true;
+ }
+ !self.config.preprocessor_globs.matched(path, false).is_ignore()
+ }
+
+ /// Search the given file path by first asking the preprocessor for the
+ /// data to search instead of opening the path directly.
fn search_preprocessor(
&mut self,
path: &Path,
@@ -333,6 +361,9 @@ impl<W: WriteColor> SearchWorker<W> {
})
}
+ /// Attempt to decompress the data at the given file path and search the
+ /// result. If the given file path isn't recognized as a compressed file,
+ /// then search it without doing any decompression.
fn search_decompress(
&mut self,
path: &Path,
diff --git a/tests/misc.rs b/tests/misc.rs
index 62226ceb..9b5a7a75 100644
--- a/tests/misc.rs
+++ b/tests/misc.rs
@@ -816,6 +816,24 @@ be, to a very large extent, the result of luck. Sherlock Holmes
eqnice!(expected, cmd.stdout());
});
+rgtest!(preprocessing_glob, |dir: Dir, mut cmd: TestCommand| {
+ if !cmd_exists("xzcat") {
+ return;
+ }
+
+ dir.create("sherlock", SHERLOCK);
+ dir.create_bytes("sherlock.xz", include_bytes!("./data/sherlock.xz"));
+ cmd.args(&["--pre", "xzcat", "--pre-glob", "*.xz", "Sherlock"]);
+
+ let expected = "\
+sherlock.xz:For the Doctor Watsons of this world, as opposed to the Sherlock
+sherlock.xz:be, to a very large extent, the result of luck. Sherlock Holmes
+sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
+sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
+";
+ eqnice!(sort_lines(expected), sort_lines(&cmd.stdout()));
+});
+
rgtest!(compressed_gzip, |dir: Dir, mut cmd: TestCommand| {
if !cmd_exists("gzip") {
return;