base64_bytes/lib.rs
1//! Intelligent serialization for binary blobs.
2//!
3//! Where `Vec<u8>` always serializes as an array of bytes, this crate provides serialization
4//! functions which try to make an intelligent decision about how to serialize a byte vector based
5//! on the serialization format.
6//!
7//! For binary formats like [`bincode`](https://docs.rs/bincode/latest/bincode/), the array-of-bytes
8//! serialization works great: it is compact and introduces very little overhead. But for
9//! human-readable types such as [`serde_json`](https://docs.rs/serde_json/latest/serde_json/), it's
10//! far from ideal. The text encoding of an array introduces substantial overhead, and the resulting
11//! array of opaque bytes isn't particularly readable anyways.
12//!
13//! `base64-bytes` uses the [`is_human_readable`](serde::Serializer::is_human_readable) property of
14//! a serializer to distinguish these cases. For binary formats, it uses the default `Vec<u8>`
15//! serialization. For human-readable formats, it uses a much more compact and conventional base 64
16//! encoding.
17//!
18//! # Usage
19//!
20//! The interface consists of [`serialize`] and [`deserialize`] functions. While these _can_ be
21//! called directly, they are intended to be used with serde's
22//! [field attributes](https://serde.rs/field-attrs.html) controlling serialization, like:
23//!
24//! ```
25//! use serde::{Deserialize, Serialize};
26//!
27//! #[derive(Deserialize, Serialize)]
28//! struct SomeType {
29//! #[serde(
30//! serialize_with = "base64_bytes::serialize",
31//! deserialize_with = "base64_bytes::deserialize",
32//! )]
33//! bytes: Vec<u8>,
34//! }
35//! ```
36//!
37//! Or, as a shorthand:
38//!
39//! ```
40//! use serde::{Deserialize, Serialize};
41//!
42//! #[derive(Deserialize, Serialize)]
43//! struct SomeType {
44//! #[serde(with = "base64_bytes")]
45//! bytes: Vec<u8>,
46//! }
47//! ```
48//!
49
50use base64::{engine::general_purpose::STANDARD as BASE64, Engine};
51use serde::{
52 de::{Deserialize, Deserializer, Error},
53 ser::{Serialize, Serializer},
54};
55
56/// Serialize a byte vector.
57pub fn serialize<S: Serializer, T: AsRef<[u8]>>(v: &T, s: S) -> Result<S::Ok, S::Error> {
58 if s.is_human_readable() {
59 BASE64.encode(v).serialize(s)
60 } else {
61 v.as_ref().serialize(s)
62 }
63}
64
65/// Deserialize a byte vector.
66pub fn deserialize<'a, D: Deserializer<'a>>(d: D) -> Result<Vec<u8>, D::Error> {
67 if d.is_human_readable() {
68 Ok(BASE64
69 .decode(String::deserialize(d)?)
70 .map_err(|err| D::Error::custom(format!("invalid base64: {err}")))?)
71 } else {
72 Ok(Vec::deserialize(d)?)
73 }
74}
75
76#[cfg(test)]
77mod test {
78 use crate::BASE64;
79 use base64::Engine;
80 use rand::RngCore;
81 use serde::{Deserialize, Serialize};
82
83 #[derive(Debug, PartialEq, Eq, Deserialize, Serialize)]
84 struct Test {
85 #[serde(with = "crate")]
86 bytes: Vec<u8>,
87 }
88
89 #[test]
90 fn test_bytes_serde() {
91 let mut rng = rand::thread_rng();
92
93 for len in [0, 1, 10, 1000] {
94 let mut t = Test {
95 bytes: vec![0; len],
96 };
97 rng.fill_bytes(&mut t.bytes);
98
99 // The binary serialization should be highly efficient: just the length followed by the
100 // raw bytes.
101 let binary = bincode::serialize(&t).unwrap();
102 assert_eq!(binary[..8], (len as u64).to_le_bytes());
103 assert_eq!(t.bytes, binary[8..]);
104 // Check deserialization.
105 assert_eq!(t, bincode::deserialize::<Test>(&binary).unwrap());
106
107 // The JSON serialization should return a base 64 string.
108 let json = serde_json::to_value(&t).unwrap();
109 assert_eq!(json["bytes"].as_str().unwrap(), BASE64.encode(&t.bytes));
110 // Check deserialization.
111 assert_eq!(t, serde_json::from_value::<Test>(json).unwrap());
112 }
113 }
114}