printer: drop dependency on base64 crate

Instead, we just roll our own. A slow version of this is pretty simple to do, and that's what we write here. The `base64` crate supports a lot more functionality and is quite fast, but we care about neither of those things for this particular aspect of ripgrep. (base64 is only used for non-UTF-8 data or file paths, which are both quite rare.)
2025-08-02 21:21:58 -07:00 · 2023-11-21 13:35:45 -05:00
parent 9c84575229
commit ae2a09915f
3 changed files with 77 additions and 12 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -23,12 +23,6 @@ version = "1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
 [[package]]
 name = "base64"
 version = "0.21.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9ba43ea6f343b788c8764558649e08df62f86c6ef251fdaeb1ffd010a9ae50a2"
 [[package]]
 name = "bstr"
 version = "1.7.0"
@@ -185,7 +179,6 @@ dependencies = [
 name = "grep-printer"
 version = "0.1.7"
 dependencies = [
 "base64",
 "bstr",
 "grep-matcher",
 "grep-regex",
--- a/crates/printer/Cargo.toml
+++ b/crates/printer/Cargo.toml
@@ -16,10 +16,9 @@ edition = "2021"
 [features]
 default = ["serde"]
-serde = ["dep:base64", "dep:serde", "dep:serde_json"]
+serde = ["dep:serde", "dep:serde_json"]
 [dependencies]
 base64 = { version = "0.21.4", optional = true }
 bstr = "1.6.2"
 grep-matcher = { version = "0.1.6", path = "../matcher" }
 grep-searcher = { version = "0.1.11", path = "../searcher" }
--- a/crates/printer/src/jsont.rs
+++ b/crates/printer/src/jsont.rs
@@ -207,11 +207,84 @@ impl<'a> serde::Serialize for Data<'a> {
        match *self {
            Data::Text { ref text } => state.serialize_field("text", text)?,
            Data::Bytes { bytes } => {
-                use base64::engine::{general_purpose::STANDARD, Engine};
+                // use base64::engine::{general_purpose::STANDARD, Engine};
-                let encoded = STANDARD.encode(bytes);
+                // let encoded = STANDARD.encode(bytes);
-                state.serialize_field("bytes", &encoded)?;
+                state.serialize_field("bytes", &base64_standard(bytes))?;
            }
        }
        state.end()
    }
 }
 /// Implements "standard" base64 encoding as described in RFC 3548[1].
 ///
 /// We roll our own here instead of bringing in something heavier weight like
 /// the `base64` crate. In particular, we really don't care about perf much
 /// here, since this is only used for data or file paths that are not valid
 /// UTF-8.
 ///
 /// [1]: https://tools.ietf.org/html/rfc3548#section-3
 fn base64_standard(bytes: &[u8]) -> String {
    const ALPHABET: &[u8] =
        b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
    let mut out = String::new();
    let mut it = bytes.chunks_exact(3);
    while let Some(chunk) = it.next() {
        let group24 = (usize::from(chunk[0]) << 16)
            | (usize::from(chunk[1]) << 8)
            | usize::from(chunk[2]);
        let index1 = (group24 >> 18) & 0b111_111;
        let index2 = (group24 >> 12) & 0b111_111;
        let index3 = (group24 >> 6) & 0b111_111;
        let index4 = (group24 >> 0) & 0b111_111;
        out.push(char::from(ALPHABET[index1]));
        out.push(char::from(ALPHABET[index2]));
        out.push(char::from(ALPHABET[index3]));
        out.push(char::from(ALPHABET[index4]));
    }
    match it.remainder() {
        &[] => {}
        &[byte0] => {
            let group8 = usize::from(byte0);
            let index1 = (group8 >> 2) & 0b111_111;
            let index2 = (group8 << 4) & 0b111_111;
            out.push(char::from(ALPHABET[index1]));
            out.push(char::from(ALPHABET[index2]));
            out.push('=');
            out.push('=');
        }
        &[byte0, byte1] => {
            let group16 = (usize::from(byte0) << 8) | usize::from(byte1);
            let index1 = (group16 >> 10) & 0b111_111;
            let index2 = (group16 >> 4) & 0b111_111;
            let index3 = (group16 << 2) & 0b111_111;
            out.push(char::from(ALPHABET[index1]));
            out.push(char::from(ALPHABET[index2]));
            out.push(char::from(ALPHABET[index3]));
            out.push('=');
        }
        _ => unreachable!("remainder must have length < 3"),
    }
    out
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    // Tests taken from RFC 4648[1].
    //
    // [1]: https://datatracker.ietf.org/doc/html/rfc4648#section-10
    #[test]
    fn base64_basic() {
        let b64 = |s: &str| base64_standard(s.as_bytes());
        assert_eq!(b64(""), "");
        assert_eq!(b64("f"), "Zg==");
        assert_eq!(b64("fo"), "Zm8=");
        assert_eq!(b64("foo"), "Zm9v");
        assert_eq!(b64("foob"), "Zm9vYg==");
        assert_eq!(b64("fooba"), "Zm9vYmE=");
        assert_eq!(b64("foobar"), "Zm9vYmFy");
    }
 }