summaryrefslogtreecommitdiffstats
path: root/src/search_buffer.rs
diff options
context:
space:
mode:
authorAndrew Gallant <jamslam@gmail.com>2018-08-03 17:26:22 -0400
committerAndrew Gallant <jamslam@gmail.com>2018-08-20 07:10:19 -0400
commitbb110c1ebeeda452046830b3991f705f5759da92 (patch)
treecc2b0112a3ca9b8d05cf1e953553907d71564082 /src/search_buffer.rs
parentd9ca5293569efb255608d3c601107bcfe7060f15 (diff)
ripgrep: migrate to libripgrep
This commit does the work to delete the old `grep` crate and effectively rewrite most of ripgrep core to use the new libripgrep crates. The new `grep` crate is now a facade that collects the various crates that make up libripgrep. The most complex part of ripgrep core is now arguably the translation between command line parameters and the library options, which is ultimately where we want to be.
Diffstat (limited to 'src/search_buffer.rs')
-rw-r--r--src/search_buffer.rs424
1 files changed, 0 insertions, 424 deletions
diff --git a/src/search_buffer.rs b/src/search_buffer.rs
deleted file mode 100644
index 2777a06c..00000000
--- a/src/search_buffer.rs
+++ /dev/null
@@ -1,424 +0,0 @@
-/*!
-The `search_buffer` module is responsible for searching a single file all in a
-single buffer. Typically, the source of the buffer is a memory map. This can
-be useful for when memory maps are faster than streaming search.
-
-Note that this module doesn't quite support everything that `search_stream`
-does. Notably, showing contexts.
-*/
-use std::cmp;
-use std::path::Path;
-
-use grep::Grep;
-use termcolor::WriteColor;
-
-use printer::Printer;
-use search_stream::{IterLines, Options, count_lines, is_binary};
-
-pub struct BufferSearcher<'a, W: 'a> {
- opts: Options,
- printer: &'a mut Printer<W>,
- grep: &'a Grep,
- path: &'a Path,
- buf: &'a [u8],
- match_line_count: u64,
- match_count: Option<u64>,
- line_count: Option<u64>,
- byte_offset: Option<u64>,
- last_line: usize,
-}
-
-impl<'a, W: WriteColor> BufferSearcher<'a, W> {
- pub fn new(
- printer: &'a mut Printer<W>,
- grep: &'a Grep,
- path: &'a Path,
- buf: &'a [u8],
- ) -> BufferSearcher<'a, W> {
- BufferSearcher {
- opts: Options::default(),
- printer: printer,
- grep: grep,
- path: path,
- buf: buf,
- match_line_count: 0,
- match_count: None,
- line_count: None,
- byte_offset: None,
- last_line: 0,
- }
- }
-
- /// If enabled, searching will print a 0-based offset of the
- /// matching line (or the actual match if -o is specified) before
- /// printing the line itself.
- ///
- /// Disabled by default.
- pub fn byte_offset(mut self, yes: bool) -> Self {
- self.opts.byte_offset = yes;
- self
- }
-
- /// If enabled, searching will print a count instead of each match.
- ///
- /// Disabled by default.
- pub fn count(mut self, yes: bool) -> Self {
- self.opts.count = yes;
- self
- }
-
- /// If enabled, searching will print the count of individual matches
- /// instead of each match.
- ///
- /// Disabled by default.
- pub fn count_matches(mut self, yes: bool) -> Self {
- self.opts.count_matches = yes;
- self
- }
-
- /// If enabled, searching will print the path instead of each match.
- ///
- /// Disabled by default.
- pub fn files_with_matches(mut self, yes: bool) -> Self {
- self.opts.files_with_matches = yes;
- self
- }
-
- /// If enabled, searching will print the path of files that *don't* match
- /// the given pattern.
- ///
- /// Disabled by default.
- pub fn files_without_matches(mut self, yes: bool) -> Self {
- self.opts.files_without_matches = yes;
- self
- }
-
- /// Set the end-of-line byte used by this searcher.
- pub fn eol(mut self, eol: u8) -> Self {
- self.opts.eol = eol;
- self
- }
-
- /// If enabled, matching is inverted so that lines that *don't* match the
- /// given pattern are treated as matches.
- pub fn invert_match(mut self, yes: bool) -> Self {
- self.opts.invert_match = yes;
- self
- }
-
- /// If enabled, compute line numbers and prefix each line of output with
- /// them.
- pub fn line_number(mut self, yes: bool) -> Self {
- self.opts.line_number = yes;
- self
- }
-
- /// Limit the number of matches to the given count.
- ///
- /// The default is None, which corresponds to no limit.
- pub fn max_count(mut self, count: Option<u64>) -> Self {
- self.opts.max_count = count;
- self
- }
-
- /// If enabled, don't show any output and quit searching after the first
- /// match is found.
- pub fn quiet(mut self, yes: bool) -> Self {
- self.opts.quiet = yes;
- self
- }
-
- /// If enabled, search binary files as if they were text.
- pub fn text(mut self, yes: bool) -> Self {
- self.opts.text = yes;
- self
- }
-
- #[inline(never)]
- pub fn run(mut self) -> u64 {
- let binary_upto = cmp::min(10_240, self.buf.len());
- if !self.opts.text && is_binary(&self.buf[..binary_upto], true) {
- return 0;
- }
-
- self.match_line_count = 0;
- self.line_count = if self.opts.line_number { Some(0) } else { None };
- // The memory map searcher uses one contiguous block of bytes, so the
- // offsets given the printer are sufficient to compute the byte offset.
- self.byte_offset = if self.opts.byte_offset { Some(0) } else { None };
- self.match_count = if self.opts.count_matches { Some(0) } else { None };
- let mut last_end = 0;
- for m in self.grep.iter(self.buf) {
- if self.opts.invert_match {
- self.print_inverted_matches(last_end, m.start());
- } else {
- self.print_match(m.start(), m.end());
- }
- last_end = m.end();
- if self.opts.terminate(self.match_line_count) {
- break;
- }
- }
- if self.opts.invert_match && !self.opts.terminate(self.match_line_count) {
- let upto = self.buf.len();
- self.print_inverted_matches(last_end, upto);
- }
- if self.opts.count && self.match_line_count > 0 {
- self.printer.path_count(self.path, self.match_line_count);
- } else if self.opts.count_matches
- && self.match_count.map_or(false, |c| c > 0)
- {
- self.printer.path_count(self.path, self.match_count.unwrap());
- }
- if self.opts.files_with_matches && self.match_line_count > 0 {
- self.printer.path(self.path);
- }
- if self.opts.files_without_matches && self.match_line_count == 0 {
- self.printer.path(self.path);
- }
- self.match_line_count
- }
-
- #[inline(always)]
- fn count_individual_matches(&mut self, start: usize, end: usize) {
- if let Some(ref mut count) = self.match_count {
- for _ in self.grep.regex().find_iter(&self.buf[start..end]) {
- *count += 1;
- }
- }
- }
-
- #[inline(always)]
- pub fn print_match(&mut self, start: usize, end: usize) {
- self.match_line_count += 1;
- self.count_individual_matches(start, end);
- if self.opts.skip_matches() {
- return;
- }
- self.count_lines(start);
- self.add_line(end);
- self.printer.matched(
- self.grep.regex(), self.path, self.buf,
- start, end, self.line_count, self.byte_offset);
- }
-
- #[inline(always)]
- fn print_inverted_matches(&mut self, start: usize, end: usize) {
- debug_assert!(self.opts.invert_match);
- let mut it = IterLines::new(self.opts.eol, start);
- while let Some((s, e)) = it.next(&self.buf[..end]) {
- if self.opts.terminate(self.match_line_count) {
- return;
- }
- self.print_match(s, e);
- }
- }
-
- #[inline(always)]
- fn count_lines(&mut self, upto: usize) {
- if let Some(ref mut line_count) = self.line_count {
- *line_count += count_lines(
- &self.buf[self.last_line..upto], self.opts.eol);
- self.last_line = upto;
- }
- }
-
- #[inline(always)]
- fn add_line(&mut self, line_end: usize) {
- if let Some(ref mut line_count) = self.line_count {
- *line_count += 1;
- self.last_line = line_end;
- }
- }
-}
-
-#[cfg(test)]
-mod tests {
- use std::path::Path;
-
- use grep::GrepBuilder;
-
- use printer::Printer;
- use termcolor;
-
- use super::BufferSearcher;
-
- const SHERLOCK: &'static str = "\
-For the Doctor Watsons of this world, as opposed to the Sherlock
-Holmeses, success in the province of detective work must always
-be, to a very large extent, the result of luck. Sherlock Holmes
-can extract a clew from a wisp of straw or a flake of cigar ash;
-but Doctor Watson has to have it taken out for him and dusted,
-and exhibited clearly, with a label attached.\
-";
-
- fn test_path() -> &'static Path {
- &Path::new("/baz.rs")
- }
-
- type TestSearcher<'a> = BufferSearcher<'a, termcolor::NoColor<Vec<u8>>>;
-
- fn search<F: FnMut(TestSearcher) -> TestSearcher>(
- pat: &str,
- haystack: &str,
- mut map: F,
- ) -> (u64, String) {
- let outbuf = termcolor::NoColor::new(vec![]);
- let mut pp = Printer::new(outbuf).with_filename(true);
- let grep = GrepBuilder::new(pat).build().unwrap();
- let count = {
- let searcher = BufferSearcher::new(
- &mut pp, &grep, test_path(), haystack.as_bytes());
- map(searcher).run()
- };
- (count, String::from_utf8(pp.into_inner().into_inner()).unwrap())
- }
-
- #[test]
- fn basic_search() {
- let (count, out) = search("Sherlock", SHERLOCK, |s|s);
- assert_eq!(2, count);
- assert_eq!(out, "\
-/baz.rs:For the Doctor Watsons of this world, as opposed to the Sherlock
-/baz.rs:be, to a very large extent, the result of luck. Sherlock Holmes
-");
- }
-
- #[test]
- fn binary() {
- let text = "Sherlock\n\x00Holmes\n";
- let (count, out) = search("Sherlock|Holmes", text, |s|s);
- assert_eq!(0, count);
- assert_eq!(out, "");
- }
-
-
- #[test]
- fn binary_text() {
- let text = "Sherlock\n\x00Holmes\n";
- let (count, out) = search("Sherlock|Holmes", text, |s| s.text(true));
- assert_eq!(2, count);
- assert_eq!(out, "/baz.rs:Sherlock\n/baz.rs:\x00Holmes\n");
- }
-
- #[test]
- fn line_numbers() {
- let (count, out) = search(
- "Sherlock", SHERLOCK, |s| s.line_number(true));
- assert_eq!(2, count);
- assert_eq!(out, "\
-/baz.rs:1:For the Doctor Watsons of this world, as opposed to the Sherlock
-/baz.rs:3:be, to a very large extent, the result of luck. Sherlock Holmes
-");
- }
-
- #[test]
- fn byte_offset() {
- let (_, out) = search(
- "Sherlock", SHERLOCK, |s| s.byte_offset(true));
- assert_eq!(out, "\
-/baz.rs:0:For the Doctor Watsons of this world, as opposed to the Sherlock
-/baz.rs:129:be, to a very large extent, the result of luck. Sherlock Holmes
-");
- }
-
- #[test]
- fn byte_offset_inverted() {
- let (_, out) = search("Sherlock", SHERLOCK, |s| {
- s.invert_match(true).byte_offset(true)
- });
- assert_eq!(out, "\
-/baz.rs:65:Holmeses, success in the province of detective work must always
-/baz.rs:193:can extract a clew from a wisp of straw or a flake of cigar ash;
-/baz.rs:258:but Doctor Watson has to have it taken out for him and dusted,
-/baz.rs:321:and exhibited clearly, with a label attached.
-");
- }
-
- #[test]
- fn count() {
- let (count, out) = search(
- "Sherlock", SHERLOCK, |s| s.count(true));
- assert_eq!(2, count);
- assert_eq!(out, "/baz.rs:2\n");
- }
-
- #[test]
- fn count_matches() {
- let (_, out) = search(
- "the", SHERLOCK, |s| s.count_matches(true));
- assert_eq!(out, "/baz.rs:4\n");
- }
-
- #[test]
- fn files_with_matches() {
- let (count, out) = search(
- "Sherlock", SHERLOCK, |s| s.files_with_matches(true));
- assert_eq!(1, count);
- assert_eq!(out, "/baz.rs\n");
- }
-
- #[test]
- fn files_without_matches() {
- let (count, out) = search(
- "zzzz", SHERLOCK, |s| s.files_without_matches(true));
- assert_eq!(0, count);
- assert_eq!(out, "/baz.rs\n");
- }
-
- #[test]
- fn max_count() {
- let (count, out) = search(
- "Sherlock", SHERLOCK, |s| s.max_count(Some(1)));
- assert_eq!(1, count);
- assert_eq!(out, "\
-/baz.rs:For the Doctor Watsons of this world, as opposed to the Sherlock
-");
- }
-
- #[test]
- fn invert_match_max_count() {
- let (count, out) = search(
- "zzzz", SHERLOCK, |s| s.invert_match(true).max_count(Some(1)));
- assert_eq!(1, count);
- assert_eq!(out, "\
-/baz.rs:For the Doctor Watsons of this world, as opposed to the Sherlock
-");
- }
-
- #[test]
- fn invert_match() {
- let (count, out) = search(
- "Sherlock", SHERLOCK, |s| s.invert_match(true));
- assert_eq!(4, count);
- assert_eq!(out, "\
-/baz.rs:Holmeses, success in the province of detective work must always
-/baz.rs:can extract a clew from a wisp of straw or a flake of cigar ash;
-/baz.rs:but Doctor Watson has to have it taken out for him and dusted,
-/baz.rs:and exhibited clearly, with a label attached.
-");
- }
-
- #[test]
- fn invert_match_line_numbers() {
- let (count, out) = search("Sherlock", SHERLOCK, |s| {
- s.invert_match(true).line_number(true)
- });
- assert_eq!(4, count);
- assert_eq!(out, "\
-/baz.rs:2:Holmeses, success in the province of detective work must always
-/baz.rs:4:can extract a clew from a wisp of straw or a flake of cigar ash;
-/baz.rs:5:but Doctor Watson has to have it taken out for him and dusted,
-/baz.rs:6:and exhibited clearly, with a label attached.
-");
- }
-
- #[test]
- fn invert_match_count() {
- let (count, out) = search("Sherlock", SHERLOCK, |s| {
- s.invert_match(true).count(true)
- });
- assert_eq!(4, count);
- assert_eq!(out, "/baz.rs:4\n");
- }
-}