summaryrefslogtreecommitdiffstats
path: root/crates
diff options
context:
space:
mode:
authorAndrew Gallant <jamslam@gmail.com>2023-11-21 13:35:45 -0500
committerAndrew Gallant <jamslam@gmail.com>2023-11-21 18:39:32 -0500
commitae2a09915fe9d3b4308a2b6e5710b583d22f165d (patch)
tree29c910ca46afb927e8a605f21a3a93df239efd1a /crates
parent9c84575229131239f92149cd5790ddb553d7eea8 (diff)
printer: drop dependency on `base64` crate
Instead, we just roll our own. A slow version of this is pretty simple to do, and that's what we write here. The `base64` crate supports a lot more functionality and is quite fast, but we care about neither of those things for this particular aspect of ripgrep. (base64 is only used for non-UTF-8 data or file paths, which are both quite rare.)
Diffstat (limited to 'crates')
-rw-r--r--crates/printer/Cargo.toml3
-rw-r--r--crates/printer/src/jsont.rs79
2 files changed, 77 insertions, 5 deletions
diff --git a/crates/printer/Cargo.toml b/crates/printer/Cargo.toml
index 10537c53..caec3576 100644
--- a/crates/printer/Cargo.toml
+++ b/crates/printer/Cargo.toml
@@ -16,10 +16,9 @@ edition = "2021"
[features]
default = ["serde"]
-serde = ["dep:base64", "dep:serde", "dep:serde_json"]
+serde = ["dep:serde", "dep:serde_json"]
[dependencies]
-base64 = { version = "0.21.4", optional = true }
bstr = "1.6.2"
grep-matcher = { version = "0.1.6", path = "../matcher" }
grep-searcher = { version = "0.1.11", path = "../searcher" }
diff --git a/crates/printer/src/jsont.rs b/crates/printer/src/jsont.rs
index 6e5e85df..4d0cf944 100644
--- a/crates/printer/src/jsont.rs
+++ b/crates/printer/src/jsont.rs
@@ -207,11 +207,84 @@ impl<'a> serde::Serialize for Data<'a> {
match *self {
Data::Text { ref text } => state.serialize_field("text", text)?,
Data::Bytes { bytes } => {
- use base64::engine::{general_purpose::STANDARD, Engine};
- let encoded = STANDARD.encode(bytes);
- state.serialize_field("bytes", &encoded)?;
+ // use base64::engine::{general_purpose::STANDARD, Engine};
+ // let encoded = STANDARD.encode(bytes);
+ state.serialize_field("bytes", &base64_standard(bytes))?;
}
}
state.end()
}
}
+
+/// Implements "standard" base64 encoding as described in RFC 3548[1].
+///
+/// We roll our own here instead of bringing in something heavier weight like
+/// the `base64` crate. In particular, we really don't care about perf much
+/// here, since this is only used for data or file paths that are not valid
+/// UTF-8.
+///
+/// [1]: https://tools.ietf.org/html/rfc3548#section-3
+fn base64_standard(bytes: &[u8]) -> String {
+ const ALPHABET: &[u8] =
+ b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+
+ let mut out = String::new();
+ let mut it = bytes.chunks_exact(3);
+ while let Some(chunk) = it.next() {
+ let group24 = (usize::from(chunk[0]) << 16)
+ | (usize::from(chunk[1]) << 8)
+ | usize::from(chunk[2]);
+ let index1 = (group24 >> 18) & 0b111_111;
+ let index2 = (group24 >> 12) & 0b111_111;
+ let index3 = (group24 >> 6) & 0b111_111;
+ let index4 = (group24 >> 0) & 0b111_111;
+ out.push(char::from(ALPHABET[index1]));
+ out.push(char::from(ALPHABET[index2]));
+ out.push(char::from(ALPHABET[index3]));
+ out.push(char::from(ALPHABET[index4]));
+ }
+ match it.remainder() {
+ &[] => {}
+ &[byte0] => {
+ let group8 = usize::from(byte0);
+ let index1 = (group8 >> 2) & 0b111_111;
+ let index2 = (group8 << 4) & 0b111_111;
+ out.push(char::from(ALPHABET[index1]));
+ out.push(char::from(ALPHABET[index2]));
+ out.push('=');
+ out.push('=');
+ }
+ &[byte0, byte1] => {
+ let group16 = (usize::from(byte0) << 8) | usize::from(byte1);
+ let index1 = (group16 >> 10) & 0b111_111;
+ let index2 = (group16 >> 4) & 0b111_111;
+ let index3 = (group16 << 2) & 0b111_111;
+ out.push(char::from(ALPHABET[index1]));
+ out.push(char::from(ALPHABET[index2]));
+ out.push(char::from(ALPHABET[index3]));
+ out.push('=');
+ }
+ _ => unreachable!("remainder must have length < 3"),
+ }
+ out
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ // Tests taken from RFC 4648[1].
+ //
+ // [1]: https://datatracker.ietf.org/doc/html/rfc4648#section-10
+ #[test]
+ fn base64_basic() {
+ let b64 = |s: &str| base64_standard(s.as_bytes());
+ assert_eq!(b64(""), "");
+ assert_eq!(b64("f"), "Zg==");
+ assert_eq!(b64("fo"), "Zm8=");
+ assert_eq!(b64("foo"), "Zm9v");
+ assert_eq!(b64("foob"), "Zm9vYg==");
+ assert_eq!(b64("fooba"), "Zm9vYmE=");
+ assert_eq!(b64("foobar"), "Zm9vYmFy");
+ }
+}