/*! The `search_buffer` module is responsible for searching a single file all in a single buffer. Typically, the source of the buffer is a memory map. This can be useful for when memory maps are faster than streaming search. Note that this module doesn't quite support everything that `search_stream` does. Notably, showing contexts. */ use std::cmp; use std::path::Path; use grep::Grep; use termcolor::WriteColor; use printer::Printer; use search_stream::{IterLines, Options, count_lines, is_binary}; pub struct BufferSearcher<'a, W: 'a> { opts: Options, printer: &'a mut Printer, grep: &'a Grep, path: &'a Path, buf: &'a [u8], match_line_count: u64, match_count: Option, line_count: Option, byte_offset: Option, last_line: usize, } impl<'a, W: WriteColor> BufferSearcher<'a, W> { pub fn new( printer: &'a mut Printer, grep: &'a Grep, path: &'a Path, buf: &'a [u8], ) -> BufferSearcher<'a, W> { BufferSearcher { opts: Options::default(), printer: printer, grep: grep, path: path, buf: buf, match_line_count: 0, match_count: None, line_count: None, byte_offset: None, last_line: 0, } } /// If enabled, searching will print a 0-based offset of the /// matching line (or the actual match if -o is specified) before /// printing the line itself. /// /// Disabled by default. pub fn byte_offset(mut self, yes: bool) -> Self { self.opts.byte_offset = yes; self } /// If enabled, searching will print a count instead of each match. /// /// Disabled by default. pub fn count(mut self, yes: bool) -> Self { self.opts.count = yes; self } /// If enabled, searching will print the count of individual matches /// instead of each match. /// /// Disabled by default. pub fn count_matches(mut self, yes: bool) -> Self { self.opts.count_matches = yes; self } /// If enabled, searching will print the path instead of each match. /// /// Disabled by default. pub fn files_with_matches(mut self, yes: bool) -> Self { self.opts.files_with_matches = yes; self } /// If enabled, searching will print the path of files that *don't* match /// the given pattern. /// /// Disabled by default. pub fn files_without_matches(mut self, yes: bool) -> Self { self.opts.files_without_matches = yes; self } /// Set the end-of-line byte used by this searcher. pub fn eol(mut self, eol: u8) -> Self { self.opts.eol = eol; self } /// If enabled, matching is inverted so that lines that *don't* match the /// given pattern are treated as matches. pub fn invert_match(mut self, yes: bool) -> Self { self.opts.invert_match = yes; self } /// If enabled, compute line numbers and prefix each line of output with /// them. pub fn line_number(mut self, yes: bool) -> Self { self.opts.line_number = yes; self } /// Limit the number of matches to the given count. /// /// The default is None, which corresponds to no limit. pub fn max_count(mut self, count: Option) -> Self { self.opts.max_count = count; self } /// If enabled, don't show any output and quit searching after the first /// match is found. pub fn quiet(mut self, yes: bool) -> Self { self.opts.quiet = yes; self } /// If enabled, search binary files as if they were text. pub fn text(mut self, yes: bool) -> Self { self.opts.text = yes; self } #[inline(never)] pub fn run(mut self) -> u64 { let binary_upto = cmp::min(10_240, self.buf.len()); if !self.opts.text && is_binary(&self.buf[..binary_upto], true) { return 0; } self.match_line_count = 0; self.line_count = if self.opts.line_number { Some(0) } else { None }; // The memory map searcher uses one contiguous block of bytes, so the // offsets given the printer are sufficient to compute the byte offset. self.byte_offset = if self.opts.byte_offset { Some(0) } else { None }; self.match_count = if self.opts.count_matches { Some(0) } else { None }; let mut last_end = 0; for m in self.grep.iter(self.buf) { if self.opts.invert_match { self.print_inverted_matches(last_end, m.start()); } else { self.print_match(m.start(), m.end()); } last_end = m.end(); if self.opts.terminate(self.match_line_count) { break; } } if self.opts.invert_match && !self.opts.terminate(self.match_line_count) { let upto = self.buf.len(); self.print_inverted_matches(last_end, upto); } if self.opts.count && self.match_line_count > 0 { self.printer.path_count(self.path, self.match_line_count); } else if self.opts.count_matches && self.match_count.map_or(false, |c| c > 0) { self.printer.path_count(self.path, self.match_count.unwrap()); } if self.opts.files_with_matches && self.match_line_count > 0 { self.printer.path(self.path); } if self.opts.files_without_matches && self.match_line_count == 0 { self.printer.path(self.path); } self.match_line_count } #[inline(always)] fn count_individual_matches(&mut self, start: usize, end: usize) { if let Some(ref mut count) = self.match_count { for _ in self.grep.regex().find_iter(&self.buf[start..end]) { *count += 1; } } } #[inline(always)] pub fn print_match(&mut self, start: usize, end: usize) { self.match_line_count += 1; self.count_individual_matches(start, end); if self.opts.skip_matches() { return; } self.count_lines(start); self.add_line(end); self.printer.matched( self.grep.regex(), self.path, self.buf, start, end, self.line_count, self.byte_offset); } #[inline(always)] fn print_inverted_matches(&mut self, start: usize, end: usize) { debug_assert!(self.opts.invert_match); let mut it = IterLines::new(self.opts.eol, start); while let Some((s, e)) = it.next(&self.buf[..end]) { if self.opts.terminate(self.match_line_count) { return; } self.print_match(s, e); } } #[inline(always)] fn count_lines(&mut self, upto: usize) { if let Some(ref mut line_count) = self.line_count { *line_count += count_lines( &self.buf[self.last_line..upto], self.opts.eol); self.last_line = upto; } } #[inline(always)] fn add_line(&mut self, line_end: usize) { if let Some(ref mut line_count) = self.line_count { *line_count += 1; self.last_line = line_end; } } } #[cfg(test)] mod tests { use std::path::Path; use grep::GrepBuilder; use printer::Printer; use termcolor; use super::BufferSearcher; const SHERLOCK: &'static str = "\ For the Doctor Watsons of this world, as opposed to the Sherlock Holmeses, success in the province of detective work must always be, to a very large extent, the result of luck. Sherlock Holmes can extract a clew from a wisp of straw or a flake of cigar ash; but Doctor Watson has to have it taken out for him and dusted, and exhibited clearly, with a label attached.\ "; fn test_path() -> &'static Path { &Path::new("/baz.rs") } type TestSearcher<'a> = BufferSearcher<'a, termcolor::NoColor>>; fn search TestSearcher>( pat: &str, haystack: &str, mut map: F, ) -> (u64, String) { let outbuf = termcolor::NoColor::new(vec![]); let mut pp = Printer::new(outbuf).with_filename(true); let grep = GrepBuilder::new(pat).build().unwrap(); let count = { let searcher = BufferSearcher::new( &mut pp, &grep, test_path(), haystack.as_bytes()); map(searcher).run() }; (count, String::from_utf8(pp.into_inner().into_inner()).unwrap()) } #[test] fn basic_search() { let (count, out) = search("Sherlock", SHERLOCK, |s|s); assert_eq!(2, count); assert_eq!(out, "\ /baz.rs:For the Doctor Watsons of this world, as opposed to the Sherlock /baz.rs:be, to a very large extent, the result of luck. Sherlock Holmes "); } #[test] fn binary() { let text = "Sherlock\n\x00Holmes\n"; let (count, out) = search("Sherlock|Holmes", text, |s|s); assert_eq!(0, count); assert_eq!(out, ""); } #[test] fn binary_text() { let text = "Sherlock\n\x00Holmes\n"; let (count, out) = search("Sherlock|Holmes", text, |s| s.text(true)); assert_eq!(2, count); assert_eq!(out, "/baz.rs:Sherlock\n/baz.rs:\x00Holmes\n"); } #[test] fn line_numbers() { let (count, out) = search( "Sherlock", SHERLOCK, |s| s.line_number(true)); assert_eq!(2, count); assert_eq!(out, "\ /baz.rs:1:For the Doctor Watsons of this world, as opposed to the Sherlock /baz.rs:3:be, to a very large extent, the result of luck. Sherlock Holmes "); } #[test] fn byte_offset() { let (_, out) = search( "Sherlock", SHERLOCK, |s| s.byte_offset(true)); assert_eq!(out, "\ /baz.rs:0:For the Doctor Watsons of this world, as opposed to the Sherlock /baz.rs:129:be, to a very large extent, the result of luck. Sherlock Holmes "); } #[test] fn byte_offset_inverted() { let (_, out) = search("Sherlock", SHERLOCK, |s| { s.invert_match(true).byte_offset(true) }); assert_eq!(out, "\ /baz.rs:65:Holmeses, success in the province of detective work must always /baz.rs:193:can extract a clew from a wisp of straw or a flake of cigar ash; /baz.rs:258:but Doctor Watson has to have it taken out for him and dusted, /baz.rs:321:and exhibited clearly, with a label attached. "); } #[test] fn count() { let (count, out) = search( "Sherlock", SHERLOCK, |s| s.count(true)); assert_eq!(2, count); assert_eq!(out, "/baz.rs:2\n"); } #[test] fn count_matches() { let (_, out) = search( "the", SHERLOCK, |s| s.count_matches(true)); assert_eq!(out, "/baz.rs:4\n"); } #[test] fn files_with_matches() { let (count, out) = search( "Sherlock", SHERLOCK, |s| s.files_with_matches(true)); assert_eq!(1, count); assert_eq!(out, "/baz.rs\n"); } #[test] fn files_without_matches() { let (count, out) = search( "zzzz", SHERLOCK, |s| s.files_without_matches(true)); assert_eq!(0, count); assert_eq!(out, "/baz.rs\n"); } #[test] fn max_count() { let (count, out) = search( "Sherlock", SHERLOCK, |s| s.max_count(Some(1))); assert_eq!(1, count); assert_eq!(out, "\ /baz.rs:For the Doctor Watsons of this world, as opposed to the Sherlock "); } #[test] fn invert_match_max_count() { let (count, out) = search( "zzzz", SHERLOCK, |s| s.invert_match(true).max_count(Some(1))); assert_eq!(1, count); assert_eq!(out, "\ /baz.rs:For the Doctor Watsons of this world, as opposed to the Sherlock "); } #[test] fn invert_match() { let (count, out) = search( "Sherlock", SHERLOCK, |s| s.invert_match(true)); assert_eq!(4, count); assert_eq!(out, "\ /baz.rs:Holmeses, success in the province of detective work must always /baz.rs:can extract a clew from a wisp of straw or a flake of cigar ash; /baz.rs:but Doctor Watson has to have it taken out for him and dusted, /baz.rs:and exhibited clearly, with a label attached. "); } #[test] fn invert_match_line_numbers() { let (count, out) = search("Sherlock", SHERLOCK, |s| { s.invert_match(true).line_number(true) }); assert_eq!(4, count); assert_eq!(out, "\ /baz.rs:2:Holmeses, success in the province of detective work must always /baz.rs:4:can extract a clew from a wisp of straw or a flake of cigar ash; /baz.rs:5:but Doctor Watson has to have it taken out for him and dusted, /baz.rs:6:and exhibited clearly, with a label attached. "); } #[test] fn invert_match_count() { let (count, out) = search("Sherlock", SHERLOCK, |s| { s.invert_match(true).count(true) }); assert_eq!(4, count); assert_eq!(out, "/baz.rs:4\n"); } }