1 files changed, 936 insertions, 0 deletions
diff --git a/crates/printer/src/json.rs b/crates/printer/src/json.rs
new file mode 100644
index 00000000..1b3bc4f1
--- /dev/null
+++ b/crates/printer/src/json.rs
@@ -0,0 +1,936 @@
+use std::io::{self, Write};
+use std::path::Path;
+use std::time::Instant;
+
+use grep_matcher::{Match, Matcher};
+use grep_searcher::{
+    Searcher, Sink, SinkContext, SinkContextKind, SinkError, SinkFinish,
+    SinkMatch,
+};
+use serde_json as json;
+
+use counter::CounterWriter;
+use jsont;
+use stats::Stats;
+
+/// The configuration for the JSON printer.
+///
+/// This is manipulated by the JSONBuilder and then referenced by the actual
+/// implementation. Once a printer is build, the configuration is frozen and
+/// cannot changed.
+#[derive(Debug, Clone)]
+struct Config {
+    pretty: bool,
+    max_matches: Option<u64>,
+    always_begin_end: bool,
+}
+
+impl Default for Config {
+    fn default() -> Config {
+        Config { pretty: false, max_matches: None, always_begin_end: false }
+    }
+}
+
+/// A builder for a JSON lines printer.
+///
+/// The builder permits configuring how the printer behaves. The JSON printer
+/// has fewer configuration options than the standard printer because it is
+/// a structured format, and the printer always attempts to find the most
+/// information possible.
+///
+/// Some configuration options, such as whether line numbers are included or
+/// whether contextual lines are shown, are drawn directly from the
+/// `grep_searcher::Searcher`'s configuration.
+///
+/// Once a `JSON` printer is built, its configuration cannot be changed.
+#[derive(Clone, Debug)]
+pub struct JSONBuilder {
+    config: Config,
+}
+
+impl JSONBuilder {
+    /// Return a new builder for configuring the JSON printer.
+    pub fn new() -> JSONBuilder {
+        JSONBuilder { config: Config::default() }
+    }
+
+    /// Create a JSON printer that writes results to the given writer.
+    pub fn build<W: io::Write>(&self, wtr: W) -> JSON<W> {
+        JSON {
+            config: self.config.clone(),
+            wtr: CounterWriter::new(wtr),
+            matches: vec![],
+        }
+    }
+
+    /// Print JSON in a pretty printed format.
+    ///
+    /// Enabling this will no longer produce a "JSON lines" format, in that
+    /// each JSON object printed may span multiple lines.
+    ///
+    /// This is disabled by default.
+    pub fn pretty(&mut self, yes: bool) -> &mut JSONBuilder {
+        self.config.pretty = yes;
+        self
+    }
+
+    /// Set the maximum amount of matches that are printed.
+    ///
+    /// If multi line search is enabled and a match spans multiple lines, then
+    /// that match is counted exactly once for the purposes of enforcing this
+    /// limit, regardless of how many lines it spans.
+    pub fn max_matches(&mut self, limit: Option<u64>) -> &mut JSONBuilder {
+        self.config.max_matches = limit;
+        self
+    }
+
+    /// When enabled, the `begin` and `end` messages are always emitted, even
+    /// when no match is found.
+    ///
+    /// When disabled, the `begin` and `end` messages are only shown if there
+    /// is at least one `match` or `context` message.
+    ///
+    /// This is disabled by default.
+    pub fn always_begin_end(&mut self, yes: bool) -> &mut JSONBuilder {
+        self.config.always_begin_end = yes;
+        self
+    }
+}
+
+/// The JSON printer, which emits results in a JSON lines format.
+///
+/// This type is generic over `W`, which represents any implementation of
+/// the standard library `io::Write` trait.
+///
+/// # Format
+///
+/// This section describes the JSON format used by this printer.
+///
+/// To skip the rigamarole, take a look at the
+/// [example](#example)
+/// at the end.
+///
+/// ## Overview
+///
+/// The format of this printer is the [JSON Lines](http://jsonlines.org/)
+/// format. Specifically, this printer emits a sequence of messages, where
+/// each message is encoded as a single JSON value on a single line. There are
+/// four different types of messages (and this number may expand over time):
+///
+/// * **begin** - A message that indicates a file is being searched.
+/// * **end** - A message the indicates a file is done being searched. This
+///   message also include summary statistics about the search.
+/// * **match** - A message that indicates a match was found. This includes
+///   the text and offsets of the match.
+/// * **context** - A message that indicates a contextual line was found.
+///   This includes the text of the line, along with any match information if
+///   the search was inverted.
+///
+/// Every message is encoded in the same envelope format, which includes a tag
+/// indicating the message type along with an object for the payload:
+///
+/// ```json
+/// {
+///     "type": "{begin|end|match|context}",
+///     "data": { ... }
+/// }
+/// ```
+///
+/// The message itself is encoded in the envelope's `data` key.
+///
+/// ## Text encoding
+///
+/// Before describing each message format, we first must briefly discuss text
+/// encoding, since it factors into every type of message. In particular, JSON
+/// may only be encoded in UTF-8, UTF-16 or UTF-32. For the purposes of this
+/// printer, we need only worry about UTF-8. The problem here is that searching
+/// is not limited to UTF-8 exclusively, which in turn implies that matches
+/// may be reported that contain invalid UTF-8. Moreover, this printer may
+/// also print file paths, and the encoding of file paths is itself not
+/// guarnateed to be valid UTF-8. Therefore, this printer must deal with the
+/// presence of invalid UTF-8 somehow. The printer could silently ignore such
+/// things completely, or even lossily transcode invalid UTF-8 to valid UTF-8
+/// by replacing all invalid sequences with the Unicode replacement character.
+/// However, this would prevent consumers of this format from accessing the
+/// original data in a non-lossy way.
+///
+/// Therefore, this printer will emit valid UTF-8 encoded bytes as normal
+/// JSON strings and otherwise base64 encode data that isn't valid UTF-8. To
+/// communicate whether this process occurs or not, strings are keyed by the
+/// name `text` where as arbitrary bytes are keyed by `bytes`.
+///
+/// For example, when a path is included in a message, it is formatted like so,
+/// if and only if the path is valid UTF-8:
+///
+/// ```json
+/// {
+///     "path": {
+///         "text": "/home/ubuntu/lib.rs"
+///     }
+/// }
+/// ```
+///
+/// If instead our path was `/home/ubuntu/lib\xFF.rs`, where the `\xFF` byte
+/// makes it invalid UTF-8, the path would instead be encoded like so:
+///
+/// ```json
+/// {
+///     "path": {
+///         "bytes": "L2hvbWUvdWJ1bnR1L2xpYv8ucnM="
+///     }
+/// }
+/// ```
+///
+/// This same representation is used for reporting matches as well.
+///
+/// The printer guarantees that the `text` field is used whenever the
+/// underlying bytes are valid UTF-8.
+///
+/// ## Wire format
+///
+/// This section documents the wire format emitted by this printer, starting
+/// with the four types of messages.
+///
+/// Each message has its own format, and is contained inside an envelope that
+/// indicates the type of message. The envelope has these fields:
+///
+/// * **type** - A string indicating the type of this message. It may be one
+///   of four possible strings: `begin`, `end`, `match` or `context`. This
+///   list may expand over time.
+/// * **data** - The actual message data. The format of this field depends on
+///   the value of `type`. The possible message formats are
+///   [`begin`](#message-begin),
+///   [`end`](#message-end),
+///   [`match`](#message-match),
+///   [`context`](#message-context).
+///
+/// #### Message: **begin**
+///
+/// This message indicates that a search has begun. It has these fields:
+///
+/// * **path** - An
+///   [arbitrary data object](#object-arbitrary-data)
+///   representing the file path corresponding to the search, if one is
+///   present. If no file path is available, then this field is `null`.
+///
+/// #### Message: **end**
+///
+/// This message indicates that a search has finished. It has these fields:
+///
+/// * **path** - An
+///   [arbitrary data object](#object-arbitrary-data)
+///   representing the file path corresponding to the search, if one is
+///   present. If no file path is available, then this field is `null`.
+/// * **binary_offset** - The absolute offset in the data searched
+///   corresponding to the place at which binary data was detected. If no
+///   binary data was detected (or if binary detection was disabled), then this
+///   field is `null`.
+/// * **stats** - A [`stats` object](#object-stats) that contains summary
+///   statistics for the previous search.
+///
+/// #### Message: **match**
+///
+/// This message indicates that a match has been found. A match generally
+/// corresponds to a single line of text, although it may correspond to
+/// multiple lines if the search can emit matches over multiple lines. It
+/// has these fields:
+///
+/// * **path** - An
+///   [arbitrary data object](#object-arbitrary-data)
+///   representing the file path corresponding to the search, if one is
+///   present. If no file path is available, then this field is `null`.
+/// * **lines** - An
+///   [arbitrary data object](#object-arbitrary-data)
+///   representing one or more lines contained in this match.
+/// * **line_number** - If the searcher has been configured to report line
+///   numbers, then this corresponds to the line number of the first line
+///   in `lines`. If no line numbers are available, then this is `null`.
+/// * **absolute_offset** - The absolute byte offset corresponding to the start
+///   of `lines` in the data being searched.
+/// * **submatches** - An array of [`submatch` objects](#object-submatch)
+///   corresponding to matches in `lines`. The offsets included in each
+///   `submatch` correspond to byte offsets into `lines`. (If `lines` is base64
+///   encoded, then the byte offsets correspond to the data after base64
+///   decoding.) The `submatch` objects are guaranteed to be sorted by their
+///   starting offsets. Note that it is possible for this array to be empty,
+///   for example, when searching reports inverted matches.
+///
+/// #### Message: **context**
+///
+/// This message indicates that a contextual line has been found. A contextual
+/// line is a line that doesn't contain a match, but is generally adjacent to
+/// a line that does contain a match. The precise way in which contextual lines
+/// are reported is determined by the searcher. It has these fields, which are
+/// exactly the same fields found in a [`match`](#message-match):
+///
+/// * **path** - An
+///   [arbitrary data object](#object-arbitrary-data)
+///   representing the file path corresponding to the search, if one is
+///   present. If no file path is available, then this field is `null`.
+/// * **lines** - An
+///   [arbitrary data object](#object-arbitrary-data)
+///   representing one or more lines contained in this context. This includes
+///   line terminators, if they're present.
+/// * **line_number** - If the searcher has been configured to report line
+///   numbers, then this corresponds to the line number of the first line
+///   in `lines`. If no line numbers are available, then this is `null`.
+/// * **absolute_offset** - The absolute byte offset corresponding to the start
+///   of `lines` in the data being searched.
+/// * **submatches** - An array of [`submatch` objects](#object-submatch)
+///   corresponding to matches in `lines`. The offsets included in each
+///   `submatch` correspond to byte offsets into `lines`. (If `lines` is base64
+///   encoded, then the byte offsets correspond to the data after base64
+///   decoding.) The `submatch` objects are guaranteed to be sorted by
+///   their starting offsets. Note that it is possible for this array to be
+///   non-empty, for example, when searching reports inverted matches such that
+///   the original matcher could match things in the contextual lines.
+///
+/// #### Object: **submatch**
+///
+/// This object describes submatches found within `match` or `context`
+/// messages. The `start` and `end` fields indicate the half-open interval on
+/// which the match occurs (`start` is included, but `end` is not). It is
+/// guaranteed that `start <= end`. It has these fields:
+///
+/// * **match** - An
+///   [arbitrary data object](#object-arbitrary-data)
+///   corresponding to the text in this submatch.
+/// * **start** - A byte offset indicating the start of this match. This offset
+///   is generally reported in terms of the parent object's data. For example,
+///   the `lines` field in the
+///   [`match`](#message-match) or [`context`](#message-context)
+///   messages.
+/// * **end** - A byte offset indicating the end of this match. This offset
+///   is generally reported in terms of the parent object's data. For example,
+///   the `lines` field in the
+///   [`match`](#message-match) or [`context`](#message-context)
+///   messages.
+///
+/// #### Object: **stats**
+///
+/// This object is included in messages and contains summary statistics about
+/// a search. It has these fields:
+///
+/// * **elapsed** - A [`duration` object](#object-duration) describing the
+///   length of time that elapsed while performing the search.
+/// * **searches** - The number of searches that have run. For this printer,
+///   this value is always `1`. (Implementations may emit additional message
+///   types that use this same `stats` object that represents summary
+///   statistics over multiple searches.)
+/// * **searches_with_match** - The number of searches that have run that have
+///   found at least one match. This is never more than `searches`.
+/// * **bytes_searched** - The total number of bytes that have been searched.
+/// * **bytes_printed** - The total number of bytes that have been printed.
+///   This includes everything emitted by this printer.
+/// * **matched_lines** - The total number of lines that participated in a
+///   match. When matches may contain multiple lines, then this includes every
+///   line that is part of every match.
+/// * **matches** - The total number of matches. There may be multiple matches
+///   per line. When matches may contain multiple lines, each match is counted
+///   only once, regardless of how many lines it spans.
+///
+/// #### Object: **duration**
+///
+/// This object includes a few fields for describing a duration. Two of its
+/// fields, `secs` and `nanos`, can be combined to give nanosecond precision
+/// on systems that support it. It has these fields:
+///
+/// * **secs** - A whole number of seconds indicating the length of this
+///   duration.
+/// * **nanos** - A fractional part of this duration represent by nanoseconds.
+///   If nanosecond precision isn't supported, then this is typically rounded
+///   up to the nearest number of nanoseconds.
+/// * **human** - A human readable string describing the length of the
+///   duration. The format of the string is itself unspecified.
+///
+/// #### Object: **arbitrary data**
+///
+/// This object is used whenever arbitrary data needs to be represented as a
+/// JSON value. This object contains two fields, where generally only one of
+/// the fields is present:
+///
+/// * **text** - A normal JSON string that is UTF-8 encoded. This field is
+///   populated if and only if the underlying data is valid UTF-8.
+/// * **bytes** - A normal JSON string that is a base64 encoding of the
+///   underlying bytes.
+///
+/// More information on the motivation for this representation can be seen in
+/// the section [text encoding](#text-encoding) above.
+///
+/// ## Example
+///
+/// This section shows a small example that includes all message types.
+///
+/// Here's the file we want to search, located at `/home/andrew/sherlock`:
+///
+/// ```text
+/// For the Doctor Watsons of this world, as opposed to the Sherlock
+/// Holmeses, success in the province of detective work must always
+/// be, to a very large extent, the result of luck. Sherlock Holmes
+/// can extract a clew from a wisp of straw or a flake of cigar ash;
+/// but Doctor Watson has to have it taken out for him and dusted,
+/// and exhibited clearly, with a label attached.
+/// ```
+///
+/// Searching for `Watson` with a `before_context` of `1` with line numbers
+/// enabled shows something like this using the standard printer:
+///
+/// ```text
+/// sherlock:1:For the Doctor Watsons of this world, as opposed to the Sherlock
+/// --
+/// sherlock-4-can extract a clew from a wisp of straw or a flake of cigar ash;
+/// sherlock:5:but Doctor Watson has to have it taken out for him and dusted,
+/// ```
+///
+/// Here's what the same search looks like using the JSON wire format described
+/// above, where in we show semi-prettified JSON (instead of a strict JSON
+/// Lines format), for illustrative purposes:
+///
+/// ```json
+/// {
+///   "type": "begin",
+///   "data": {
+///     "path": {"text": "/home/andrew/sherlock"}}
+///   }
+/// }
+/// {
+///   "type": "match",
+///   "data": {
+///     "path": {"text": "/home/andrew/sherlock"},
+///     "lines": {"text": "For the Doctor Watsons of this world, as opposed to the Sherlock\n"},
+///     "line_number": 1,
+///     "absolute_offset": 0,
+///     "submatches": [
+///       {"match": {"text": "Watson"}, "start": 15, "end": 21}
+///     ]
+///   }
+/// }
+/// {
+///   "type": "context",
+///   "data": {
+///     "path": {"text": "/home/andrew/sherlock"},
+///     "lines": {"text": "can extract a clew from a wisp of straw or a flake of cigar ash;\n"},
+///     "line_number": 4,
+///     "absolute_offset": 193,
+///     "submatches": []
+///   }
+/// }
+/// {
+///   "type": "match",
+///   "data": {
+///     "path": {"text": "/home/andrew/sherlock"},
+///     "lines": {"text": "but Doctor Watson has to have it taken out for him and dusted,\n"},
+///     "line_number": 5,
+///     "absolute_offset": 258,
+///     "submatches": [
+///       {"match": {"text": "Watson"}, "start": 11, "end": 17}
+///     ]
+///   }
+/// }
+/// {
+///   "type": "end",
+///   "data": {
+///     "path": {"text": "/home/andrew/sherlock"},
+///     "binary_offset": null,
+///     "stats": {
+///       "elapsed": {"secs": 0, "nanos": 36296, "human": "0.0000s"},
+///       "searches": 1,
+///       "searches_with_match": 1,
+///       "bytes_searched": 367,
+///       "bytes_printed": 1151,
+///       "matched_lines": 2,
+///       "matches": 2
+///     }
+///   }
+/// }
+/// ```
+#[derive(Debug)]
+pub struct JSON<W> {
+    config: Config,
+    wtr: CounterWriter<W>,
+    matches: Vec<Match>,
+}
+
+impl<W: io::Write> JSON<W> {
+    /// Return a JSON lines printer with a default configuration that writes
+    /// matches to the given writer.
+    pub fn new(wtr: W) -> JSON<W> {
+        JSONBuilder::new().build(wtr)
+    }
+
+    /// Return an implementation of `Sink` for the JSON printer.
+    ///
+    /// This does not associate the printer with a file path, which means this
+    /// implementation will never print a file path along with the matches.
+    pub fn sink<'s, M: Matcher>(
+        &'s mut self,
+        matcher: M,
+    ) -> JSONSink<'static, 's, M, W> {
+        JSONSink {
+            matcher: matcher,
+            json: self,
+            path: None,
+            start_time: Instant::now(),
+            match_count: 0,
+            after_context_remaining: 0,
+            binary_byte_offset: None,
+            begin_printed: false,
+            stats: Stats::new(),
+        }
+    }
+
+    /// Return an implementation of `Sink` associated with a file path.
+    ///
+    /// When the printer is associated with a path, then it may, depending on
+    /// its configuration, print the path along with the matches found.
+    pub fn sink_with_path<'p, 's, M, P>(
+        &'s mut self,
+        matcher: M,
+        path: &'p P,
+    ) -> JSONSink<'p, 's, M, W>
+    where
+        M: Matcher,
+        P: ?Sized + AsRef<Path>,
+    {
+        JSONSink {
+            matcher: matcher,
+            json: self,
+            path: Some(path.as_ref()),
+            start_time: Instant::now(),
+            match_count: 0,
+            after_context_remaining: 0,
+            binary_byte_offset: None,
+            begin_printed: false,
+            stats: Stats::new(),
+        }
+    }
+
+    /// Write the given message followed by a new line. The new line is
+    /// determined from the configuration of the given searcher.
+    fn write_message(&mut self, message: &jsont::Message) -> io::Result<()> {
+        if self.config.pretty {
+            json::to_writer_pretty(&mut self.wtr, message)?;
+        } else {
+            json::to_writer(&mut self.wtr, message)?;
+        }
+        self.wtr.write(&[b'\n'])?;
+        Ok(())
+    }
+}
+
+impl<W> JSON<W> {
+    /// Returns true if and only if this printer has written at least one byte
+    /// to the underlying writer during any of the previous searches.
+    pub fn has_written(&self) -> bool {
+        self.wtr.total_count() > 0
+    }
+
+    /// Return a mutable reference to the underlying writer.
+    pub fn get_mut(&mut self) -> &mut W {
+        self.wtr.get_mut()
+    }
+
+    /// Consume this printer and return back ownership of the underlying
+    /// writer.
+    pub fn into_inner(self) -> W {
+        self.wtr.into_inner()
+    }
+}
+
+/// An implementation of `Sink` associated with a matcher and an optional file
+/// path for the JSON printer.
+///
+/// This type is generic over a few type parameters:
+///
+/// * `'p` refers to the lifetime of the file path, if one is provided. When
+///   no file path is given, then this is `'static`.
+/// * `'s` refers to the lifetime of the
+///   [`JSON`](struct.JSON.html)
+///   printer that this type borrows.
+/// * `M` refers to the type of matcher used by
+///   `grep_searcher::Searcher` that is reporting results to this sink.
+/// * `W` refers to the underlying writer that this printer is writing its
+///   output to.
+#[derive(Debug)]
+pub struct JSONSink<'p, 's, M: Matcher, W: 's> {
+    matcher: M,
+    json: &'s mut JSON<W>,
+    path: Option<&'p Path>,
+    start_time: Instant,
+    match_count: u64,
+    after_context_remaining: u64,
+    binary_byte_offset: Option<u64>,
+    begin_printed: bool,
+    stats: Stats,
+}
+
+impl<'p, 's, M: Matcher, W: io::Write> JSONSink<'p, 's, M, W> {
+    /// Returns true if and only if this printer received a match in the
+    /// previous search.
+    ///
+    /// This is unaffected by the result of searches before the previous
+    /// search.
+    pub fn has_match(&self) -> bool {
+        self.match_count > 0
+    }
+
+    /// Return the total number of matches reported to this sink.
+    ///
+    /// This corresponds to the number of times `Sink::matched` is called.
+    pub fn match_count(&self) -> u64 {
+        self.match_count
+    }
+
+    /// If binary data was found in the previous search, this returns the
+    /// offset at which the binary data was first detected.
+    ///
+    /// The offset returned is an absolute offset relative to the entire
+    /// set of bytes searched.
+    ///
+    /// This is unaffected by the result of searches before the previous
+    /// search. e.g., If the search prior to the previous search found binary
+    /// data but the previous search found no binary data, then this will
+    /// return `None`.
+    pub fn binary_byte_offset(&self) -> Option<u64> {
+        self.binary_byte_offset
+    }
+
+    /// Return a reference to the stats produced by the printer for all
+    /// searches executed on this sink.
+    pub fn stats(&self) -> &Stats {
+        &self.stats
+    }
+
+    /// Execute the matcher over the given bytes and record the match
+    /// locations if the current configuration demands match granularity.
+    fn record_matches(&mut self, bytes: &[u8]) -> io::Result<()> {
+        self.json.matches.clear();
+        // If printing requires knowing the location of each individual match,
+        // then compute and stored those right now for use later. While this
+        // adds an extra copy for storing the matches, we do amortize the
+        // allocation for it and this greatly simplifies the printing logic to
+        // the extent that it's easy to ensure that we never do more than
+        // one search to find the matches.
+        let matches = &mut self.json.matches;
+        self.matcher
+            .find_iter(bytes, |m| {
+                matches.push(m);
+                true
+            })
+            .map_err(io::Error::error_message)?;
+        // Don't report empty matches appearing at the end of the bytes.
+        if !matches.is_empty()
+            && matches.last().unwrap().is_empty()
+            && matches.last().unwrap().start() >= bytes.len()
+        {
+            matches.pop().unwrap();
+        }
+        Ok(())
+    }
+
+    /// Returns true if this printer should quit.
+    ///
+    /// This implements the logic for handling quitting after seeing a certain
+    /// amount of matches. In most cases, the logic is simple, but we must
+    /// permit all "after" contextual lines to print after reaching the limit.
+    fn should_quit(&self) -> bool {
+        let limit = match self.json.config.max_matches {
+            None => return false,
+            Some(limit) => limit,
+        };
+        if self.match_count < limit {
+            return false;
+        }
+        self.after_context_remaining == 0
+    }
+
+    /// Write the "begin" message.
+    fn write_begin_message(&mut self) -> io::Result<()> {
+        if self.begin_printed {
+            return Ok(());
+        }
+        let msg = jsont::Message::Begin(jsont::Begin { path: self.path });
+        self.json.write_message(&msg)?;
+        self.begin_printed = true;
+        Ok(())
+    }
+}
+
+impl<'p, 's, M: Matcher, W: io::Write> Sink for JSONSink<'p, 's, M, W> {
+    type Error = io::Error;
+
+    fn matched(
+        &mut self,
+        searcher: &Searcher,
+        mat: &SinkMatch,
+    ) -> Result<bool, io::Error> {
+        self.write_begin_message()?;
+
+        self.match_count += 1;
+        self.after_context_remaining = searcher.after_context() as u64;
+        self.record_matches(mat.bytes())?;
+        self.stats.add_matches(self.json.matches.len() as u64);
+        self.stats.add_matched_lines(mat.lines().count() as u64);
+
+        let submatches = SubMatches::new(mat.bytes(), &self.json.matches);
+        let msg = jsont::Message::Match(jsont::Match {
+            path: self.path,
+            lines: mat.bytes(),
+            line_number: mat.line_number(),
+            absolute_offset: mat.absolute_byte_offset(),
+            submatches: submatches.as_slice(),
+        });
+        self.json.write_message(&msg)?;
+        Ok(!self.should_quit())
+    }
+
+    fn context(
+        &mut self,
+        searcher: &Searcher,
+        ctx: &SinkContext,
+    ) -> Result<bool, io::Error> {
+        self.write_begin_message()?;
+        self.json.matches.clear();
+
+        if ctx.kind() == &SinkContextKind::After {
+            self.after_context_remaining =
+                self.after_context_remaining.saturating_sub(1);
+        }
+        let submatches = if searcher.invert_match() {
+            self.record_matches(ctx.bytes())?;
+            SubMatches::new(ctx.bytes(), &self.json.matches)
+        } else {
+            SubMatches::empty()
+        };
+        let msg = jsont::Message::Context(jsont::Context {
+            path: self.path,
+            lines: ctx.bytes(),
+            line_number: ctx.line_number(),
+            absolute_offset: ctx.absolute_byte_offset(),
+            submatches: submatches.as_slice(),
+        });
+        self.json.write_message(&msg)?;
+        Ok(!self.should_quit())
+    }
+
+    fn begin(&mut self, _searcher: &Searcher) -> Result<bool, io::Error> {
+        self.json.wtr.reset_count();
+        self.start_time = Instant::now();
+        self.match_count = 0;
+        self.after_context_remaining = 0;
+        self.binary_byte_offset = None;
+        if self.json.config.max_matches == Some(0) {
+            return Ok(false);
+        }
+
+        if !self.json.config.always_begin_end {
+            return Ok(true);
+        }
+        self.write_begin_message()?;
+        Ok(true)
+    }
+
+    fn finish(
+        &mut self,
+        _searcher: &Searcher,
+        finish: &SinkFinish,
+    ) -> Result<(), io::Error> {
+        if !self.begin_printed {
+            return Ok(());
+        }
+
+        self.binary_byte_offset = finish.binary_byte_offset();
+        self.stats.add_elapsed(self.start_time.elapsed());
+        self.stats.add_searches(1);
+        if self.match_count > 0 {
+            self.stats.add_searches_with_match(1);
+        }
+        self.stats.add_bytes_searched(finish.byte_count());
+        self.stats.add_bytes_printed(self.json.wtr.count());
+
+        let msg = jsont::Message::End(jsont::End {
+            path: self.path,
+            binary_offset: finish.binary_byte_offset(),
+            stats: self.stats.clone(),
+        });
+        self.json.write_message(&msg)?;
+        Ok(())
+    }
+}
+
+/// SubMatches represents a set of matches in a contiguous range of bytes.
+///
+/// A simpler representation for this would just simply be `Vec<SubMatch>`,
+/// but the common case is exactly one match per range of bytes, which we
+/// specialize here using a fixed size array without any allocation.
+enum SubMatches<'a> {
+    Empty,
+    Small([jsont::SubMatch<'a>; 1]),
+    Big(Vec<jsont::SubMatch<'a>>),
+}
+
+impl<'a> SubMatches<'a> {
+    /// Create a new set of match ranges from a set of matches and the
+    /// corresponding bytes that those matches apply to.
+    fn new(bytes: &'a [u8], matches: &[Match]) -> SubMatches<'a> {
+        if matches.len() == 1 {
+            let mat = matches[0];
+            SubMatches::Small([jsont::SubMatch {
+                m: &bytes[mat],
+                start: mat.start(),
+                end: mat.end(),
+            }])
+        } else {
+            let mut match_ranges = vec![];
+            for &mat in matches {
+                match_ranges.push(jsont::SubMatch {
+                    m: &bytes[mat],
+                    start: mat.start(),
+                    end: mat.end(),
+                });
+            }
+            SubMatches::Big(match_ranges)
+        }
+    }
+
+    /// Create an empty set of match ranges.
+    fn empty() -> SubMatches<'static> {
+        SubMatches::Empty
+    }
+
+    /// Return this set of match ranges as a slice.
+    fn as_slice(&self) -> &[jsont::SubMatch] {
+        match *self {
+            SubMatches::Empty => &[],
+            SubMatches::Small(ref x) => x,
+            SubMatches::Big(ref x) => x,
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use grep_matcher::LineTerminator;
+    use grep_regex::{RegexMatcher, RegexMatcherBuilder};
+    use grep_searcher::SearcherBuilder;
+
+    use super::{JSONBuilder, JSON};
+
+    const SHERLOCK: &'static [u8] = b"\
+For the Doctor Watsons of this world, as opposed to the Sherlock
+Holmeses, success in the province of detective work must always
+be, to a very large extent, the result of luck. Sherlock Holmes
+can extract a clew from a wisp of straw or a flake of cigar ash;
+but Doctor Watson has to have it taken out for him and dusted,
+and exhibited clearly, with a label attached.
+";
+
+    fn printer_contents(printer: &mut JSON<Vec<u8>>) -> String {
+        String::from_utf8(printer.get_mut().to_owned()).unwrap()
+    }
+
+    #[test]
+    fn binary_detection() {
+        use grep_searcher::BinaryDetection;
+
+        const BINARY: &'static [u8] = b"\
+For the Doctor Watsons of this world, as opposed to the Sherlock
+Holmeses, success in the province of detective work must always
+be, to a very large extent, the result of luck. Sherlock Holmes
+can extract a clew \x00 from a wisp of straw or a flake of cigar ash;
+but Doctor Watson has to have it taken out for him and dusted,
+and exhibited clearly, with a label attached.\
+";
+
+        let matcher = RegexMatcher::new(r"Watson").unwrap();
+        let mut printer = JSONBuilder::new().build(vec![]);
+        SearcherBuilder::new()
+            .binary_detection(BinaryDetection::quit(b'\x00'))
+            .heap_limit(Some(80))
+            .build()
+            .search_reader(&matcher, BINARY, printer.sink(&matcher))
+            .unwrap();
+        let got = printer_contents(&mut printer);
+
+        assert_eq!(got.lines().count(), 3);
+        let last = got.lines().last().unwrap();
+        assert!(last.contains(r#""binary_offset":212,"#));
+    }
+
+    #[test]
+    fn max_matches() {
+        let matcher = RegexMatcher::new(r"Watson").unwrap();
+        let mut printer =
+            JSONBuilder::new().max_matches(Some(1)).build(vec![]);
+        SearcherBuilder::new()
+            .build()
+            .search_reader(&matcher, SHERLOCK, printer.sink(&matcher))
+            .unwrap();
+        let got = printer_contents(&mut printer);
+
+        assert_eq!(got.lines().count(), 3);
+    }
+
+    #[test]
+    fn no_match() {
+        let matcher = RegexMatcher::new(r"DOES NOT MATCH").unwrap();
+        let mut printer = JSONBuilder::new().build(vec![]);
+        SearcherBuilder::new()
+            .build()
+            .search_reader(&matcher, SHERLOCK, printer.sink(&matcher))
+            .unwrap();
+        let got = printer_contents(&mut printer);
+
+        assert!(got.is_empty());
+    }
+
+    #[test]
+    fn always_begin_end_no_match() {
+        let matcher = RegexMatcher::new(r"DOES NOT MATCH").unwrap();
+        let mut printer =
+            JSONBuilder::new().always_begin_end(true).build(vec![]);
+        SearcherBuilder::new()
+            .build()
+            .search_reader(&matcher, SHERLOCK, printer.sink(&matcher))
+            .unwrap();
+        let got = printer_contents(&mut printer);
+
+        assert_eq!(got.lines().count(), 2);
+        assert!(got.contains("begin") && got.contains("end"));
+    }
+
+    #[test]
+    fn missing_crlf() {
+        let haystack = "test\r\n".as_bytes();
+
+        let matcher = RegexMatcherBuilder::new().build("test").unwrap();
+        let mut printer = JSONBuilder::new().build(vec![]);
+        SearcherBuilder::new()
+            .build()
+            .search_reader(&matcher, haystack, printer.sink(&matcher))
+            .unwrap();
+        let got = printer_contents(&mut printer);
+        assert_eq!(got.lines().count(), 3);
+        assert!(
+            got.lines().nth(1).unwrap().contains(r"test\r\n"),
+            r"missing 'test\r\n' in '{}'",
+            got.lines().nth(1).unwrap(),
+        );
+
+        let matcher =
+            RegexMatcherBuilder::new().crlf(true).build("test").unwrap();
+        let mut printer = JSONBuilder::new().build(vec![]);
+        SearcherBuilder::new()
+            .line_terminator(LineTerminator::crlf())
+            .build()
+            .search_reader(&matcher, haystack, printer.sink(&matcher))
+            .unwrap();
+        let got = printer_contents(&mut printer);
+        assert_eq!(got.lines().count(), 3);
+        assert!(
+            got.lines().nth(1).unwrap().contains(r"test\r\n"),
+            r"missing 'test\r\n' in '{}'",
+            got.lines().nth(1).unwrap(),
+        );
+    }
+}