summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorBalaji Sivaraman <balaji@balajisivaraman.com>2018-02-20 21:03:07 +0530
committerAndrew Gallant <jamslam@gmail.com>2018-03-10 10:38:25 -0500
commit27fc9f2fd341bd3ed672f79d43bf983514188b96 (patch)
tree6a0d42ebe069ac6077eace1968332246ab51cb77 /src
parent96f73293c0b734d91b55ad1e6940da0f706eed65 (diff)
search: add a --count-matches flag
This commit introduces a new flag, --count-matches, which will cause ripgrep to report a total count of all matches instead of a count of total lines matched. Closes #566, Closes #814
Diffstat (limited to 'src')
-rw-r--r--src/app.rs31
-rw-r--r--src/args.rs23
-rw-r--r--src/search_buffer.rs33
-rw-r--r--src/search_stream.rs37
-rw-r--r--src/worker.rs13
5 files changed, 131 insertions, 6 deletions
diff --git a/src/app.rs b/src/app.rs
index 6be318b4..9d1d734c 100644
--- a/src/app.rs
+++ b/src/app.rs
@@ -517,6 +517,7 @@ pub fn all_args_and_flags() -> Vec<RGArg> {
flag_context(&mut args);
flag_context_separator(&mut args);
flag_count(&mut args);
+ flag_count_matches(&mut args);
flag_debug(&mut args);
flag_dfa_size_limit(&mut args);
flag_encoding(&mut args);
@@ -636,7 +637,8 @@ This overrides the --context flag.
}
fn flag_byte_offset(args: &mut Vec<RGArg>) {
- const SHORT: &str = "Print the 0-based byte offset for each matching line.";
+ const SHORT: &str =
+ "Print the 0-based byte offset for each matching line.";
const LONG: &str = long!("\
Print the 0-based byte offset within the input file
before each line of output. If -o (--only-matching) is
@@ -771,7 +773,7 @@ sequences like \\x7F or \\t may be used. The default value is --.
}
fn flag_count(args: &mut Vec<RGArg>) {
- const SHORT: &str = "Only show the count of matches for each file.";
+ const SHORT: &str = "Only show the count of matching lines for each file.";
const LONG: &str = long!("\
This flag suppresses normal output and shows the number of lines that match
the given patterns for each file searched. Each file containing a match has its
@@ -781,9 +783,32 @@ that match and not the total number of matches.
If only one file is given to ripgrep, then only the count is printed if there
is a match. The --with-filename flag can be used to force printing the file
path in this case.
+
+This overrides the --count-matches flag.
");
let arg = RGArg::switch("count").short("c")
- .help(SHORT).long_help(LONG);
+ .help(SHORT).long_help(LONG).overrides("count-matches");
+ args.push(arg);
+}
+
+fn flag_count_matches(args: &mut Vec<RGArg>) {
+ const SHORT: &str =
+ "Only show the count of individual matches for each file.";
+ const LONG: &str = long!("\
+This flag suppresses normal output and shows the number of individual
+matches of the given patterns for each file searched. Each file
+containing matches has its path and match count printed on each line.
+Note that this reports the total number of individual matches and not
+the number of lines that match.
+
+If only one file is given to ripgrep, then only the count is printed if there
+is a match. The --with-filename flag can be used to force printing the file
+path in this case.
+
+This overrides the --count flag.
+");
+ let arg = RGArg::switch("count-matches")
+ .help(SHORT).long_help(LONG).overrides("count");
args.push(arg);
}
diff --git a/src/args.rs b/src/args.rs
index b8714deb..309f5db8 100644
--- a/src/args.rs
+++ b/src/args.rs
@@ -41,6 +41,7 @@ pub struct Args {
column: bool,
context_separator: Vec<u8>,
count: bool,
+ count_matches: bool,
encoding: Option<&'static Encoding>,
files_with_matches: bool,
files_without_matches: bool,
@@ -200,6 +201,7 @@ impl Args {
pub fn file_separator(&self) -> Option<Vec<u8>> {
let contextless =
self.count
+ || self.count_matches
|| self.files_with_matches
|| self.files_without_matches;
let use_heading_sep = self.heading && !contextless;
@@ -262,6 +264,7 @@ impl Args {
.before_context(self.before_context)
.byte_offset(self.byte_offset)
.count(self.count)
+ .count_matches(self.count_matches)
.encoding(self.encoding)
.files_with_matches(self.files_with_matches)
.files_without_matches(self.files_without_matches)
@@ -358,6 +361,7 @@ impl<'a> ArgMatches<'a> {
let mmap = self.mmap(&paths)?;
let with_filename = self.with_filename(&paths);
let (before_context, after_context) = self.contexts()?;
+ let (count, count_matches) = self.counts();
let quiet = self.is_present("quiet");
let args = Args {
paths: paths,
@@ -368,7 +372,8 @@ impl<'a> ArgMatches<'a> {
colors: self.color_specs()?,
column: self.column(),
context_separator: self.context_separator(),
- count: self.is_present("count"),
+ count: count,
+ count_matches: count_matches,
encoding: self.encoding()?,
files_with_matches: self.is_present("files-with-matches"),
files_without_matches: self.is_present("files-without-match"),
@@ -732,6 +737,22 @@ impl<'a> ArgMatches<'a> {
})
}
+ /// Returns whether the -c/--count or the --count-matches flags were
+ /// passed from the command line.
+ ///
+ /// If --count-matches and --invert-match were passed in, behave
+ /// as if --count and --invert-match were passed in (i.e. rg will
+ /// count inverted matches as per existing behavior).
+ fn counts(&self) -> (bool, bool) {
+ let count = self.is_present("count");
+ let count_matches = self.is_present("count-matches");
+ let invert_matches = self.is_present("invert-match");
+ if count_matches && invert_matches {
+ return (true, false);
+ }
+ (count, count_matches)
+ }
+
/// Returns the user's color choice based on command line parameters and
/// environment.
fn color_choice(&self) -> termcolor::ColorChoice {
diff --git a/src/search_buffer.rs b/src/search_buffer.rs
index 59869a9b..2777a06c 100644
--- a/src/search_buffer.rs
+++ b/src/search_buffer.rs
@@ -22,6 +22,7 @@ pub struct BufferSearcher<'a, W: 'a> {
path: &'a Path,
buf: &'a [u8],
match_line_count: u64,
+ match_count: Option<u64>,
line_count: Option<u64>,
byte_offset: Option<u64>,
last_line: usize,
@@ -41,6 +42,7 @@ impl<'a, W: WriteColor> BufferSearcher<'a, W> {
path: path,
buf: buf,
match_line_count: 0,
+ match_count: None,
line_count: None,
byte_offset: None,
last_line: 0,
@@ -65,6 +67,15 @@ impl<'a, W: WriteColor> BufferSearcher<'a, W> {
self
}
+ /// If enabled, searching will print the count of individual matches
+ /// instead of each match.
+ ///
+ /// Disabled by default.
+ pub fn count_matches(mut self, yes: bool) -> Self {
+ self.opts.count_matches = yes;
+ self
+ }
+
/// If enabled, searching will print the path instead of each match.
///
/// Disabled by default.
@@ -135,6 +146,7 @@ impl<'a, W: WriteColor> BufferSearcher<'a, W> {
// The memory map searcher uses one contiguous block of bytes, so the
// offsets given the printer are sufficient to compute the byte offset.
self.byte_offset = if self.opts.byte_offset { Some(0) } else { None };
+ self.match_count = if self.opts.count_matches { Some(0) } else { None };
let mut last_end = 0;
for m in self.grep.iter(self.buf) {
if self.opts.invert_match {
@@ -153,6 +165,10 @@ impl<'a, W: WriteColor> BufferSearcher<'a, W> {
}
if self.opts.count && self.match_line_count > 0 {
self.printer.path_count(self.path, self.match_line_count);
+ } else if self.opts.count_matches
+ && self.match_count.map_or(false, |c| c > 0)
+ {
+ self.printer.path_count(self.path, self.match_count.unwrap());
}
if self.opts.files_with_matches && self.match_line_count > 0 {
self.printer.path(self.path);
@@ -164,8 +180,18 @@ impl<'a, W: WriteColor> BufferSearcher<'a, W> {
}
#[inline(always)]
+ fn count_individual_matches(&mut self, start: usize, end: usize) {
+ if let Some(ref mut count) = self.match_count {
+ for _ in self.grep.regex().find_iter(&self.buf[start..end]) {
+ *count += 1;
+ }
+ }
+ }
+
+ #[inline(always)]
pub fn print_match(&mut self, start: usize, end: usize) {
self.match_line_count += 1;
+ self.count_individual_matches(start, end);
if self.opts.skip_matches() {
return;
}
@@ -318,6 +344,13 @@ and exhibited clearly, with a label attached.\
}
#[test]
+ fn count_matches() {
+ let (_, out) = search(
+ "the", SHERLOCK, |s| s.count_matches(true));
+ assert_eq!(out, "/baz.rs:4\n");
+ }
+
+ #[test]
fn files_with_matches() {
let (count, out) = search(
"Sherlock", SHERLOCK, |s| s.files_with_matches(true));
diff --git a/src/search_stream.rs b/src/search_stream.rs
index 5ccb09b9..b218dd19 100644
--- a/src/search_stream.rs
+++ b/src/search_stream.rs
@@ -68,6 +68,7 @@ pub struct Searcher<'a, R, W: 'a> {
path: &'a Path,
haystack: R,
match_line_count: u64,
+ match_count: Option<u64>,
line_count: Option<u64>,
byte_offset: Option<u64>,
last_match: Match,
@@ -83,6 +84,7 @@ pub struct Options {
pub before_context: usize,
pub byte_offset: bool,
pub count: bool,
+ pub count_matches: bool,
pub files_with_matches: bool,
pub files_without_matches: bool,
pub eol: u8,
@@ -100,6 +102,7 @@ impl Default for Options {
before_context: 0,
byte_offset: false,
count: false,
+ count_matches: false,
files_with_matches: false,
files_without_matches: false,
eol: b'\n',
@@ -114,11 +117,11 @@ impl Default for Options {
}
impl Options {
- /// Several options (--quiet, --count, --files-with-matches,
+ /// Several options (--quiet, --count, --count-matches, --files-with-matches,
/// --files-without-match) imply that we shouldn't ever display matches.
pub fn skip_matches(&self) -> bool {
self.count || self.files_with_matches || self.files_without_matches
- || self.quiet
+ || self.quiet || self.count_matches
}
/// Some options (--quiet, --files-with-matches, --files-without-match)
@@ -167,6 +170,7 @@ impl<'a, R: io::Read, W: WriteColor> Searcher<'a, R, W> {
path: path,
haystack: haystack,
match_line_count: 0,
+ match_count: None,
line_count: None,
byte_offset: None,
last_match: Match::default(),
@@ -208,6 +212,15 @@ impl<'a, R: io::Read, W: WriteColor> Searcher<'a, R, W> {
self
}
+ /// If enabled, searching will print the count of individual matches
+ /// instead of each match.
+ ///
+ /// Disabled by default.
+ pub fn count_matches(mut self, yes: bool) -> Self {
+ self.opts.count_matches = yes;
+ self
+ }
+
/// If enabled, searching will print the path instead of each match.
///
/// Disabled by default.
@@ -274,6 +287,7 @@ impl<'a, R: io::Read, W: WriteColor> Searcher<'a, R, W> {
self.match_line_count = 0;
self.line_count = if self.opts.line_number { Some(0) } else { None };
self.byte_offset = if self.opts.byte_offset { Some(0) } else { None };
+ self.match_count = if self.opts.count_matches { Some(0) } else { None };
self.last_match = Match::default();
self.after_context_remaining = 0;
while !self.terminate() {
@@ -326,6 +340,8 @@ impl<'a, R: io::Read, W: WriteColor> Searcher<'a, R, W> {
if self.match_line_count > 0 {
if self.opts.count {
self.printer.path_count(self.path, self.match_line_count);
+ } else if self.opts.count_matches {
+ self.printer.path_count(self.path, self.match_count.unwrap());
} else if self.opts.files_with_matches {
self.printer.path(self.path);
}
@@ -428,6 +444,7 @@ impl<'a, R: io::Read, W: WriteColor> Searcher<'a, R, W> {
#[inline(always)]
fn print_match(&mut self, start: usize, end: usize) {
self.match_line_count += 1;
+ self.count_individual_matches(start, end);
if self.opts.skip_matches() {
return;
}
@@ -473,6 +490,15 @@ impl<'a, R: io::Read, W: WriteColor> Searcher<'a, R, W> {
}
#[inline(always)]
+ fn count_individual_matches(&mut self, start: usize, end: usize) {
+ if let Some(ref mut count) = self.match_count {
+ for _ in self.grep.regex().find_iter(&self.inp.buf[start..end]) {
+ *count += 1;
+ }
+ }
+ }
+
+ #[inline(always)]
fn count_lines(&mut self, upto: usize) {
if let Some(ref mut line_count) = self.line_count {
*line_count += count_lines(
@@ -1067,6 +1093,13 @@ fn main() {
}
#[test]
+ fn count_matches() {
+ let (_, out) = search_smallcap(
+ "the", SHERLOCK, |s| s.count_matches(true));
+ assert_eq!(out, "/baz.rs:4\n");
+ }
+
+ #[test]
fn files_with_matches() {
let (count, out) = search_smallcap(
"Sherlock", SHERLOCK, |s| s.files_with_matches(true));
diff --git a/src/worker.rs b/src/worker.rs
index e5f7546a..a8327cda 100644
--- a/src/worker.rs
+++ b/src/worker.rs
@@ -35,6 +35,7 @@ struct Options {
before_context: usize,
byte_offset: bool,
count: bool,
+ count_matches: bool,
files_with_matches: bool,
files_without_matches: bool,
eol: u8,
@@ -56,6 +57,7 @@ impl Default for Options {
before_context: 0,
byte_offset: false,
count: false,
+ count_matches: false,
files_with_matches: false,
files_without_matches: false,
eol: b'\n',
@@ -126,6 +128,15 @@ impl WorkerBuilder {
self
}
+ /// If enabled, searching will print the count of individual matches
+ /// instead of each match.
+ ///
+ /// Disabled by default.
+ pub fn count_matches(mut self, yes: bool) -> Self {
+ self.opts.count_matches = yes;
+ self
+ }
+
/// Set the encoding to use to read each file.
///
/// If the encoding is `None` (the default), then the encoding is
@@ -297,6 +308,7 @@ impl Worker {
.before_context(self.opts.before_context)
.byte_offset(self.opts.byte_offset)
.count(self.opts.count)
+ .count_matches(self.opts.count_matches)
.files_with_matches(self.opts.files_with_matches)
.files_without_matches(self.opts.files_without_matches)
.eol(self.opts.eol)
@@ -337,6 +349,7 @@ impl Worker {
Ok(searcher
.byte_offset(self.opts.byte_offset)
.count(self.opts.count)
+ .count_matches(self.opts.count_matches)
.files_with_matches(self.opts.files_with_matches)
.files_without_matches(self.opts.files_without_matches)
.eol(self.opts.eol)