printer: drop dependency on base64 crate

Instead, we just roll our own. A slow version of this is pretty simple
to do, and that's what we write here. The `base64` crate supports a lot
more functionality and is quite fast, but we care about neither of those
things for this particular aspect of ripgrep. (base64 is only used for
non-UTF-8 data or file paths, which are both quite rare.)
This commit is contained in:
Andrew Gallant 2023-11-21 13:35:45 -05:00
parent 9c84575229
commit ae2a09915f
3 changed files with 77 additions and 12 deletions

7
Cargo.lock generated
View File

@ -23,12 +23,6 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
[[package]]
name = "base64"
version = "0.21.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9ba43ea6f343b788c8764558649e08df62f86c6ef251fdaeb1ffd010a9ae50a2"
[[package]] [[package]]
name = "bstr" name = "bstr"
version = "1.7.0" version = "1.7.0"
@ -185,7 +179,6 @@ dependencies = [
name = "grep-printer" name = "grep-printer"
version = "0.1.7" version = "0.1.7"
dependencies = [ dependencies = [
"base64",
"bstr", "bstr",
"grep-matcher", "grep-matcher",
"grep-regex", "grep-regex",

View File

@ -16,10 +16,9 @@ edition = "2021"
[features] [features]
default = ["serde"] default = ["serde"]
serde = ["dep:base64", "dep:serde", "dep:serde_json"] serde = ["dep:serde", "dep:serde_json"]
[dependencies] [dependencies]
base64 = { version = "0.21.4", optional = true }
bstr = "1.6.2" bstr = "1.6.2"
grep-matcher = { version = "0.1.6", path = "../matcher" } grep-matcher = { version = "0.1.6", path = "../matcher" }
grep-searcher = { version = "0.1.11", path = "../searcher" } grep-searcher = { version = "0.1.11", path = "../searcher" }

View File

@ -207,11 +207,84 @@ impl<'a> serde::Serialize for Data<'a> {
match *self { match *self {
Data::Text { ref text } => state.serialize_field("text", text)?, Data::Text { ref text } => state.serialize_field("text", text)?,
Data::Bytes { bytes } => { Data::Bytes { bytes } => {
use base64::engine::{general_purpose::STANDARD, Engine}; // use base64::engine::{general_purpose::STANDARD, Engine};
let encoded = STANDARD.encode(bytes); // let encoded = STANDARD.encode(bytes);
state.serialize_field("bytes", &encoded)?; state.serialize_field("bytes", &base64_standard(bytes))?;
} }
} }
state.end() state.end()
} }
} }
/// Implements "standard" base64 encoding as described in RFC 3548[1].
///
/// We roll our own here instead of bringing in something heavier weight like
/// the `base64` crate. In particular, we really don't care about perf much
/// here, since this is only used for data or file paths that are not valid
/// UTF-8.
///
/// [1]: https://tools.ietf.org/html/rfc3548#section-3
fn base64_standard(bytes: &[u8]) -> String {
const ALPHABET: &[u8] =
b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
let mut out = String::new();
let mut it = bytes.chunks_exact(3);
while let Some(chunk) = it.next() {
let group24 = (usize::from(chunk[0]) << 16)
| (usize::from(chunk[1]) << 8)
| usize::from(chunk[2]);
let index1 = (group24 >> 18) & 0b111_111;
let index2 = (group24 >> 12) & 0b111_111;
let index3 = (group24 >> 6) & 0b111_111;
let index4 = (group24 >> 0) & 0b111_111;
out.push(char::from(ALPHABET[index1]));
out.push(char::from(ALPHABET[index2]));
out.push(char::from(ALPHABET[index3]));
out.push(char::from(ALPHABET[index4]));
}
match it.remainder() {
&[] => {}
&[byte0] => {
let group8 = usize::from(byte0);
let index1 = (group8 >> 2) & 0b111_111;
let index2 = (group8 << 4) & 0b111_111;
out.push(char::from(ALPHABET[index1]));
out.push(char::from(ALPHABET[index2]));
out.push('=');
out.push('=');
}
&[byte0, byte1] => {
let group16 = (usize::from(byte0) << 8) | usize::from(byte1);
let index1 = (group16 >> 10) & 0b111_111;
let index2 = (group16 >> 4) & 0b111_111;
let index3 = (group16 << 2) & 0b111_111;
out.push(char::from(ALPHABET[index1]));
out.push(char::from(ALPHABET[index2]));
out.push(char::from(ALPHABET[index3]));
out.push('=');
}
_ => unreachable!("remainder must have length < 3"),
}
out
}
#[cfg(test)]
mod tests {
use super::*;
// Tests taken from RFC 4648[1].
//
// [1]: https://datatracker.ietf.org/doc/html/rfc4648#section-10
#[test]
fn base64_basic() {
let b64 = |s: &str| base64_standard(s.as_bytes());
assert_eq!(b64(""), "");
assert_eq!(b64("f"), "Zg==");
assert_eq!(b64("fo"), "Zm8=");
assert_eq!(b64("foo"), "Zm9v");
assert_eq!(b64("foob"), "Zm9vYg==");
assert_eq!(b64("fooba"), "Zm9vYmE=");
assert_eq!(b64("foobar"), "Zm9vYmFy");
}
}