diff options
author | Balaji Sivaraman <balaji@balajisivaraman.com> | 2018-02-20 21:03:07 +0530 |
---|---|---|
committer | Andrew Gallant <jamslam@gmail.com> | 2018-03-10 10:38:25 -0500 |
commit | 27fc9f2fd341bd3ed672f79d43bf983514188b96 (patch) | |
tree | 6a0d42ebe069ac6077eace1968332246ab51cb77 /src | |
parent | 96f73293c0b734d91b55ad1e6940da0f706eed65 (diff) |
search: add a --count-matches flag
This commit introduces a new flag, --count-matches, which will cause
ripgrep to report a total count of all matches instead of a count of
total lines matched.
Closes #566, Closes #814
Diffstat (limited to 'src')
-rw-r--r-- | src/app.rs | 31 | ||||
-rw-r--r-- | src/args.rs | 23 | ||||
-rw-r--r-- | src/search_buffer.rs | 33 | ||||
-rw-r--r-- | src/search_stream.rs | 37 | ||||
-rw-r--r-- | src/worker.rs | 13 |
5 files changed, 131 insertions, 6 deletions
@@ -517,6 +517,7 @@ pub fn all_args_and_flags() -> Vec<RGArg> { flag_context(&mut args); flag_context_separator(&mut args); flag_count(&mut args); + flag_count_matches(&mut args); flag_debug(&mut args); flag_dfa_size_limit(&mut args); flag_encoding(&mut args); @@ -636,7 +637,8 @@ This overrides the --context flag. } fn flag_byte_offset(args: &mut Vec<RGArg>) { - const SHORT: &str = "Print the 0-based byte offset for each matching line."; + const SHORT: &str = + "Print the 0-based byte offset for each matching line."; const LONG: &str = long!("\ Print the 0-based byte offset within the input file before each line of output. If -o (--only-matching) is @@ -771,7 +773,7 @@ sequences like \\x7F or \\t may be used. The default value is --. } fn flag_count(args: &mut Vec<RGArg>) { - const SHORT: &str = "Only show the count of matches for each file."; + const SHORT: &str = "Only show the count of matching lines for each file."; const LONG: &str = long!("\ This flag suppresses normal output and shows the number of lines that match the given patterns for each file searched. Each file containing a match has its @@ -781,9 +783,32 @@ that match and not the total number of matches. If only one file is given to ripgrep, then only the count is printed if there is a match. The --with-filename flag can be used to force printing the file path in this case. + +This overrides the --count-matches flag. "); let arg = RGArg::switch("count").short("c") - .help(SHORT).long_help(LONG); + .help(SHORT).long_help(LONG).overrides("count-matches"); + args.push(arg); +} + +fn flag_count_matches(args: &mut Vec<RGArg>) { + const SHORT: &str = + "Only show the count of individual matches for each file."; + const LONG: &str = long!("\ +This flag suppresses normal output and shows the number of individual +matches of the given patterns for each file searched. Each file +containing matches has its path and match count printed on each line. +Note that this reports the total number of individual matches and not +the number of lines that match. + +If only one file is given to ripgrep, then only the count is printed if there +is a match. The --with-filename flag can be used to force printing the file +path in this case. + +This overrides the --count flag. +"); + let arg = RGArg::switch("count-matches") + .help(SHORT).long_help(LONG).overrides("count"); args.push(arg); } diff --git a/src/args.rs b/src/args.rs index b8714deb..309f5db8 100644 --- a/src/args.rs +++ b/src/args.rs @@ -41,6 +41,7 @@ pub struct Args { column: bool, context_separator: Vec<u8>, count: bool, + count_matches: bool, encoding: Option<&'static Encoding>, files_with_matches: bool, files_without_matches: bool, @@ -200,6 +201,7 @@ impl Args { pub fn file_separator(&self) -> Option<Vec<u8>> { let contextless = self.count + || self.count_matches || self.files_with_matches || self.files_without_matches; let use_heading_sep = self.heading && !contextless; @@ -262,6 +264,7 @@ impl Args { .before_context(self.before_context) .byte_offset(self.byte_offset) .count(self.count) + .count_matches(self.count_matches) .encoding(self.encoding) .files_with_matches(self.files_with_matches) .files_without_matches(self.files_without_matches) @@ -358,6 +361,7 @@ impl<'a> ArgMatches<'a> { let mmap = self.mmap(&paths)?; let with_filename = self.with_filename(&paths); let (before_context, after_context) = self.contexts()?; + let (count, count_matches) = self.counts(); let quiet = self.is_present("quiet"); let args = Args { paths: paths, @@ -368,7 +372,8 @@ impl<'a> ArgMatches<'a> { colors: self.color_specs()?, column: self.column(), context_separator: self.context_separator(), - count: self.is_present("count"), + count: count, + count_matches: count_matches, encoding: self.encoding()?, files_with_matches: self.is_present("files-with-matches"), files_without_matches: self.is_present("files-without-match"), @@ -732,6 +737,22 @@ impl<'a> ArgMatches<'a> { }) } + /// Returns whether the -c/--count or the --count-matches flags were + /// passed from the command line. + /// + /// If --count-matches and --invert-match were passed in, behave + /// as if --count and --invert-match were passed in (i.e. rg will + /// count inverted matches as per existing behavior). + fn counts(&self) -> (bool, bool) { + let count = self.is_present("count"); + let count_matches = self.is_present("count-matches"); + let invert_matches = self.is_present("invert-match"); + if count_matches && invert_matches { + return (true, false); + } + (count, count_matches) + } + /// Returns the user's color choice based on command line parameters and /// environment. fn color_choice(&self) -> termcolor::ColorChoice { diff --git a/src/search_buffer.rs b/src/search_buffer.rs index 59869a9b..2777a06c 100644 --- a/src/search_buffer.rs +++ b/src/search_buffer.rs @@ -22,6 +22,7 @@ pub struct BufferSearcher<'a, W: 'a> { path: &'a Path, buf: &'a [u8], match_line_count: u64, + match_count: Option<u64>, line_count: Option<u64>, byte_offset: Option<u64>, last_line: usize, @@ -41,6 +42,7 @@ impl<'a, W: WriteColor> BufferSearcher<'a, W> { path: path, buf: buf, match_line_count: 0, + match_count: None, line_count: None, byte_offset: None, last_line: 0, @@ -65,6 +67,15 @@ impl<'a, W: WriteColor> BufferSearcher<'a, W> { self } + /// If enabled, searching will print the count of individual matches + /// instead of each match. + /// + /// Disabled by default. + pub fn count_matches(mut self, yes: bool) -> Self { + self.opts.count_matches = yes; + self + } + /// If enabled, searching will print the path instead of each match. /// /// Disabled by default. @@ -135,6 +146,7 @@ impl<'a, W: WriteColor> BufferSearcher<'a, W> { // The memory map searcher uses one contiguous block of bytes, so the // offsets given the printer are sufficient to compute the byte offset. self.byte_offset = if self.opts.byte_offset { Some(0) } else { None }; + self.match_count = if self.opts.count_matches { Some(0) } else { None }; let mut last_end = 0; for m in self.grep.iter(self.buf) { if self.opts.invert_match { @@ -153,6 +165,10 @@ impl<'a, W: WriteColor> BufferSearcher<'a, W> { } if self.opts.count && self.match_line_count > 0 { self.printer.path_count(self.path, self.match_line_count); + } else if self.opts.count_matches + && self.match_count.map_or(false, |c| c > 0) + { + self.printer.path_count(self.path, self.match_count.unwrap()); } if self.opts.files_with_matches && self.match_line_count > 0 { self.printer.path(self.path); @@ -164,8 +180,18 @@ impl<'a, W: WriteColor> BufferSearcher<'a, W> { } #[inline(always)] + fn count_individual_matches(&mut self, start: usize, end: usize) { + if let Some(ref mut count) = self.match_count { + for _ in self.grep.regex().find_iter(&self.buf[start..end]) { + *count += 1; + } + } + } + + #[inline(always)] pub fn print_match(&mut self, start: usize, end: usize) { self.match_line_count += 1; + self.count_individual_matches(start, end); if self.opts.skip_matches() { return; } @@ -318,6 +344,13 @@ and exhibited clearly, with a label attached.\ } #[test] + fn count_matches() { + let (_, out) = search( + "the", SHERLOCK, |s| s.count_matches(true)); + assert_eq!(out, "/baz.rs:4\n"); + } + + #[test] fn files_with_matches() { let (count, out) = search( "Sherlock", SHERLOCK, |s| s.files_with_matches(true)); diff --git a/src/search_stream.rs b/src/search_stream.rs index 5ccb09b9..b218dd19 100644 --- a/src/search_stream.rs +++ b/src/search_stream.rs @@ -68,6 +68,7 @@ pub struct Searcher<'a, R, W: 'a> { path: &'a Path, haystack: R, match_line_count: u64, + match_count: Option<u64>, line_count: Option<u64>, byte_offset: Option<u64>, last_match: Match, @@ -83,6 +84,7 @@ pub struct Options { pub before_context: usize, pub byte_offset: bool, pub count: bool, + pub count_matches: bool, pub files_with_matches: bool, pub files_without_matches: bool, pub eol: u8, @@ -100,6 +102,7 @@ impl Default for Options { before_context: 0, byte_offset: false, count: false, + count_matches: false, files_with_matches: false, files_without_matches: false, eol: b'\n', @@ -114,11 +117,11 @@ impl Default for Options { } impl Options { - /// Several options (--quiet, --count, --files-with-matches, + /// Several options (--quiet, --count, --count-matches, --files-with-matches, /// --files-without-match) imply that we shouldn't ever display matches. pub fn skip_matches(&self) -> bool { self.count || self.files_with_matches || self.files_without_matches - || self.quiet + || self.quiet || self.count_matches } /// Some options (--quiet, --files-with-matches, --files-without-match) @@ -167,6 +170,7 @@ impl<'a, R: io::Read, W: WriteColor> Searcher<'a, R, W> { path: path, haystack: haystack, match_line_count: 0, + match_count: None, line_count: None, byte_offset: None, last_match: Match::default(), @@ -208,6 +212,15 @@ impl<'a, R: io::Read, W: WriteColor> Searcher<'a, R, W> { self } + /// If enabled, searching will print the count of individual matches + /// instead of each match. + /// + /// Disabled by default. + pub fn count_matches(mut self, yes: bool) -> Self { + self.opts.count_matches = yes; + self + } + /// If enabled, searching will print the path instead of each match. /// /// Disabled by default. @@ -274,6 +287,7 @@ impl<'a, R: io::Read, W: WriteColor> Searcher<'a, R, W> { self.match_line_count = 0; self.line_count = if self.opts.line_number { Some(0) } else { None }; self.byte_offset = if self.opts.byte_offset { Some(0) } else { None }; + self.match_count = if self.opts.count_matches { Some(0) } else { None }; self.last_match = Match::default(); self.after_context_remaining = 0; while !self.terminate() { @@ -326,6 +340,8 @@ impl<'a, R: io::Read, W: WriteColor> Searcher<'a, R, W> { if self.match_line_count > 0 { if self.opts.count { self.printer.path_count(self.path, self.match_line_count); + } else if self.opts.count_matches { + self.printer.path_count(self.path, self.match_count.unwrap()); } else if self.opts.files_with_matches { self.printer.path(self.path); } @@ -428,6 +444,7 @@ impl<'a, R: io::Read, W: WriteColor> Searcher<'a, R, W> { #[inline(always)] fn print_match(&mut self, start: usize, end: usize) { self.match_line_count += 1; + self.count_individual_matches(start, end); if self.opts.skip_matches() { return; } @@ -473,6 +490,15 @@ impl<'a, R: io::Read, W: WriteColor> Searcher<'a, R, W> { } #[inline(always)] + fn count_individual_matches(&mut self, start: usize, end: usize) { + if let Some(ref mut count) = self.match_count { + for _ in self.grep.regex().find_iter(&self.inp.buf[start..end]) { + *count += 1; + } + } + } + + #[inline(always)] fn count_lines(&mut self, upto: usize) { if let Some(ref mut line_count) = self.line_count { *line_count += count_lines( @@ -1067,6 +1093,13 @@ fn main() { } #[test] + fn count_matches() { + let (_, out) = search_smallcap( + "the", SHERLOCK, |s| s.count_matches(true)); + assert_eq!(out, "/baz.rs:4\n"); + } + + #[test] fn files_with_matches() { let (count, out) = search_smallcap( "Sherlock", SHERLOCK, |s| s.files_with_matches(true)); diff --git a/src/worker.rs b/src/worker.rs index e5f7546a..a8327cda 100644 --- a/src/worker.rs +++ b/src/worker.rs @@ -35,6 +35,7 @@ struct Options { before_context: usize, byte_offset: bool, count: bool, + count_matches: bool, files_with_matches: bool, files_without_matches: bool, eol: u8, @@ -56,6 +57,7 @@ impl Default for Options { before_context: 0, byte_offset: false, count: false, + count_matches: false, files_with_matches: false, files_without_matches: false, eol: b'\n', @@ -126,6 +128,15 @@ impl WorkerBuilder { self } + /// If enabled, searching will print the count of individual matches + /// instead of each match. + /// + /// Disabled by default. + pub fn count_matches(mut self, yes: bool) -> Self { + self.opts.count_matches = yes; + self + } + /// Set the encoding to use to read each file. /// /// If the encoding is `None` (the default), then the encoding is @@ -297,6 +308,7 @@ impl Worker { .before_context(self.opts.before_context) .byte_offset(self.opts.byte_offset) .count(self.opts.count) + .count_matches(self.opts.count_matches) .files_with_matches(self.opts.files_with_matches) .files_without_matches(self.opts.files_without_matches) .eol(self.opts.eol) @@ -337,6 +349,7 @@ impl Worker { Ok(searcher .byte_offset(self.opts.byte_offset) .count(self.opts.count) + .count_matches(self.opts.count_matches) .files_with_matches(self.opts.files_with_matches) .files_without_matches(self.opts.files_without_matches) .eol(self.opts.eol) |