diff options
author | Andrew Gallant <jamslam@gmail.com> | 2023-11-21 13:35:45 -0500 |
---|---|---|
committer | Andrew Gallant <jamslam@gmail.com> | 2023-11-21 18:39:32 -0500 |
commit | ae2a09915fe9d3b4308a2b6e5710b583d22f165d (patch) | |
tree | 29c910ca46afb927e8a605f21a3a93df239efd1a | |
parent | 9c84575229131239f92149cd5790ddb553d7eea8 (diff) |
printer: drop dependency on `base64` crate
Instead, we just roll our own. A slow version of this is pretty simple
to do, and that's what we write here. The `base64` crate supports a lot
more functionality and is quite fast, but we care about neither of those
things for this particular aspect of ripgrep. (base64 is only used for
non-UTF-8 data or file paths, which are both quite rare.)
-rw-r--r-- | Cargo.lock | 7 | ||||
-rw-r--r-- | crates/printer/Cargo.toml | 3 | ||||
-rw-r--r-- | crates/printer/src/jsont.rs | 79 |
3 files changed, 77 insertions, 12 deletions
@@ -24,12 +24,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" [[package]] -name = "base64" -version = "0.21.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ba43ea6f343b788c8764558649e08df62f86c6ef251fdaeb1ffd010a9ae50a2" - -[[package]] name = "bstr" version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -185,7 +179,6 @@ dependencies = [ name = "grep-printer" version = "0.1.7" dependencies = [ - "base64", "bstr", "grep-matcher", "grep-regex", diff --git a/crates/printer/Cargo.toml b/crates/printer/Cargo.toml index 10537c53..caec3576 100644 --- a/crates/printer/Cargo.toml +++ b/crates/printer/Cargo.toml @@ -16,10 +16,9 @@ edition = "2021" [features] default = ["serde"] -serde = ["dep:base64", "dep:serde", "dep:serde_json"] +serde = ["dep:serde", "dep:serde_json"] [dependencies] -base64 = { version = "0.21.4", optional = true } bstr = "1.6.2" grep-matcher = { version = "0.1.6", path = "../matcher" } grep-searcher = { version = "0.1.11", path = "../searcher" } diff --git a/crates/printer/src/jsont.rs b/crates/printer/src/jsont.rs index 6e5e85df..4d0cf944 100644 --- a/crates/printer/src/jsont.rs +++ b/crates/printer/src/jsont.rs @@ -207,11 +207,84 @@ impl<'a> serde::Serialize for Data<'a> { match *self { Data::Text { ref text } => state.serialize_field("text", text)?, Data::Bytes { bytes } => { - use base64::engine::{general_purpose::STANDARD, Engine}; - let encoded = STANDARD.encode(bytes); - state.serialize_field("bytes", &encoded)?; + // use base64::engine::{general_purpose::STANDARD, Engine}; + // let encoded = STANDARD.encode(bytes); + state.serialize_field("bytes", &base64_standard(bytes))?; } } state.end() } } + +/// Implements "standard" base64 encoding as described in RFC 3548[1]. +/// +/// We roll our own here instead of bringing in something heavier weight like +/// the `base64` crate. In particular, we really don't care about perf much +/// here, since this is only used for data or file paths that are not valid +/// UTF-8. +/// +/// [1]: https://tools.ietf.org/html/rfc3548#section-3 +fn base64_standard(bytes: &[u8]) -> String { + const ALPHABET: &[u8] = + b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + + let mut out = String::new(); + let mut it = bytes.chunks_exact(3); + while let Some(chunk) = it.next() { + let group24 = (usize::from(chunk[0]) << 16) + | (usize::from(chunk[1]) << 8) + | usize::from(chunk[2]); + let index1 = (group24 >> 18) & 0b111_111; + let index2 = (group24 >> 12) & 0b111_111; + let index3 = (group24 >> 6) & 0b111_111; + let index4 = (group24 >> 0) & 0b111_111; + out.push(char::from(ALPHABET[index1])); + out.push(char::from(ALPHABET[index2])); + out.push(char::from(ALPHABET[index3])); + out.push(char::from(ALPHABET[index4])); + } + match it.remainder() { + &[] => {} + &[byte0] => { + let group8 = usize::from(byte0); + let index1 = (group8 >> 2) & 0b111_111; + let index2 = (group8 << 4) & 0b111_111; + out.push(char::from(ALPHABET[index1])); + out.push(char::from(ALPHABET[index2])); + out.push('='); + out.push('='); + } + &[byte0, byte1] => { + let group16 = (usize::from(byte0) << 8) | usize::from(byte1); + let index1 = (group16 >> 10) & 0b111_111; + let index2 = (group16 >> 4) & 0b111_111; + let index3 = (group16 << 2) & 0b111_111; + out.push(char::from(ALPHABET[index1])); + out.push(char::from(ALPHABET[index2])); + out.push(char::from(ALPHABET[index3])); + out.push('='); + } + _ => unreachable!("remainder must have length < 3"), + } + out +} + +#[cfg(test)] +mod tests { + use super::*; + + // Tests taken from RFC 4648[1]. + // + // [1]: https://datatracker.ietf.org/doc/html/rfc4648#section-10 + #[test] + fn base64_basic() { + let b64 = |s: &str| base64_standard(s.as_bytes()); + assert_eq!(b64(""), ""); + assert_eq!(b64("f"), "Zg=="); + assert_eq!(b64("fo"), "Zm8="); + assert_eq!(b64("foo"), "Zm9v"); + assert_eq!(b64("foob"), "Zm9vYg=="); + assert_eq!(b64("fooba"), "Zm9vYmE="); + assert_eq!(b64("foobar"), "Zm9vYmFy"); + } +} |