holochain_serialized_bytes/
lib.rs

1extern crate rmp_serde;
2extern crate serde;
3extern crate serde_json;
4
5use serde::{Deserialize, Serialize};
6use std::convert::Infallible;
7use std::convert::TryFrom;
8
9pub mod prelude;
10
11#[cfg_attr(feature = "trace", tracing::instrument)]
12pub fn encode<T: serde::Serialize + std::fmt::Debug>(
13    val: &T,
14) -> Result<Vec<u8>, SerializedBytesError> {
15    let buf = Vec::with_capacity(128);
16    let mut se = rmp_serde::encode::Serializer::new(buf).with_struct_map();
17    val.serialize(&mut se).map_err(|err| {
18        #[cfg(feature = "trace")]
19        tracing::warn!("Failed to serialize input");
20        SerializedBytesError::Serialize(err.to_string())
21    })?;
22    let ret = se.into_inner();
23    #[cfg(feature = "trace")]
24    tracing::trace!(
25        "Serialized {} input into {:?}",
26        std::any::type_name::<T>(),
27        ret
28    );
29    Ok(ret)
30}
31
32#[cfg_attr(feature = "trace", tracing::instrument)]
33pub fn decode<'a, R, T>(input: &'a R) -> Result<T, SerializedBytesError>
34where
35    R: AsRef<[u8]> + ?Sized + std::fmt::Debug,
36    T: Deserialize<'a> + std::fmt::Debug,
37{
38    let ret = rmp_serde::from_slice(input.as_ref()).map_err(|err| {
39        #[cfg(feature = "trace")]
40        tracing::warn!(
41            "Failed to deserialize input into: {}",
42            std::any::type_name::<T>()
43        );
44        SerializedBytesError::Deserialize(err.to_string())
45    })?;
46    #[cfg(feature = "trace")]
47    tracing::trace!("Deserialized input into: {:?}", ret);
48    Ok(ret)
49}
50
51#[derive(
52    Clone,
53    Debug,
54    PartialEq,
55    Eq,
56    Hash,
57    PartialOrd,
58    Ord,
59    serde::Serialize,
60    serde::Deserialize,
61    thiserror::Error,
62)]
63pub enum SerializedBytesError {
64    /// somehow failed to move to bytes
65    /// most likely hit a messagepack limit https://github.com/msgpack/msgpack/blob/master/spec.md#limitation
66    Serialize(String),
67    /// somehow failed to restore bytes
68    /// i mean, this could be anything, how do i know what's wrong with your bytes?
69    Deserialize(String),
70}
71
72impl std::fmt::Display for SerializedBytesError {
73    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
74        write!(f, "{:?}", self)
75    }
76}
77
78impl From<SerializedBytesError> for String {
79    fn from(sb: SerializedBytesError) -> Self {
80        match sb {
81            SerializedBytesError::Serialize(s) => s,
82            SerializedBytesError::Deserialize(s) => s,
83        }
84    }
85}
86
87impl From<Infallible> for SerializedBytesError {
88    fn from(_: Infallible) -> Self {
89        unreachable!()
90    }
91}
92
93#[derive(Clone, Debug)]
94#[cfg_attr(
95    feature = "fuzzing",
96    derive(arbitrary::Arbitrary, proptest_derive::Arbitrary)
97)]
98/// UnsafeBytes the only way to implement a custom round trip through bytes for SerializedBytes
99/// It is intended to be an internal implementation in TryFrom implementations
100/// The assumption is that any code using UnsafeBytes is NOT valid messagepack data
101/// This allows us to enforce that all data round-tripping through SerializedBytes is via TryFrom
102/// and also allow for custom non-messagepack canonical representations of data types.
103pub struct UnsafeBytes(Vec<u8>);
104
105impl From<Vec<u8>> for UnsafeBytes {
106    fn from(v: Vec<u8>) -> Self {
107        Self(v)
108    }
109}
110
111impl From<UnsafeBytes> for Vec<u8> {
112    fn from(unsafe_bytes: UnsafeBytes) -> Vec<u8> {
113        unsafe_bytes.0
114    }
115}
116
117impl From<UnsafeBytes> for SerializedBytes {
118    fn from(b: UnsafeBytes) -> Self {
119        SerializedBytes(b.0)
120    }
121}
122
123impl From<SerializedBytes> for UnsafeBytes {
124    fn from(sb: SerializedBytes) -> Self {
125        UnsafeBytes(sb.0)
126    }
127}
128
129/// A Canonical Serialized Bytes representation for data
130/// If you have a data structure that needs a canonical byte representation use this
131/// Always round-trip through SerializedBytes via. a single TryFrom implementation.
132/// This ensures that the internal bytes of SerializedBytes are indeed canonical.
133/// The corrolary is that if bytes are NOT wrapped in SerializedBytes we can assume they are NOT
134/// canonical.
135/// Typically we need a canonical serialization when data is to be handled at the byte level by
136/// independently implemented and maintained systems.
137///
138/// Examples of times we need a canonical set of bytes to represent data:
139/// - cryptographic operations
140/// - moving across the host/guest wasm boundary
141/// - putting data on the network
142///
143/// Examples of times where we may not need a canonical representation and so may not need this:
144/// - round tripping data through a database that has its own serialization preferences
145/// - debug output or logging of data that is to be human readible
146/// - moving between data types within a single system that has no external facing representation
147///
148/// uses #[repr(transparent)] to maximise compatibility with ffi
149/// @see https://doc.rust-lang.org/1.26.2/unstable-book/language-features/repr-transparent.html#enter-reprtransparent
150///
151/// uses serde_bytes for efficient serialization and deserialization
152/// without this __every byte will be individually round tripped through serde__
153/// @see https://crates.io/crates/serde_bytes
154#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize, Hash)]
155#[cfg_attr(
156    feature = "fuzzing",
157    derive(arbitrary::Arbitrary, proptest_derive::Arbitrary)
158)]
159#[repr(transparent)]
160pub struct SerializedBytes(#[serde(with = "serde_bytes")] Vec<u8>);
161
162impl SerializedBytes {
163    pub fn bytes(&self) -> &Vec<u8> {
164        &self.0
165    }
166}
167
168/// A bit of magic to convert the internal messagepack bytes into roughly equivalent JSON output
169/// for the purposes of debugging.
170/// 90% of the time you probably want this if you are a dev, to see something that "looks like" a
171/// data structure when you do {:?} in a formatted string, rather than a vector of bytes
172/// in the remaining 10% of situations where you want to debug the real messagepack bytes, call the
173/// .bytes() method on SerializedBytes and debug that.
174impl std::fmt::Debug for SerializedBytes {
175    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
176        // NB: there is a crate::decode function which currently simply uses
177        // from_read_ref. If that ever changes, this may become inconsistent
178        // and should be changed also.
179        let mut deserializer = rmp_serde::Deserializer::from_read_ref(&self.0);
180        let writer = Vec::new();
181        let mut serializer = serde_json::ser::Serializer::new(writer);
182        if serde_transcode::transcode(&mut deserializer, &mut serializer).is_err() {
183            write!(f, "<invalid msgpack>")
184        } else {
185            let s = unsafe { String::from_utf8_unchecked(serializer.into_inner()) };
186            write!(f, "{}", s)
187        }
188    }
189}
190
191#[macro_export]
192/// unidiomatic way to derive default trait implementations of TryFrom in/out of SerializedBytes
193///
194/// Two main reasons this was done rather than the normal Derive approach:
195/// - Derive requires a separate crate
196/// - Derive doesn't allow for use of $crate to set unambiguous fully qualified paths to things
197///
198/// Both of these limitations push dependency management downstream to consumer crates more than we
199/// want to.
200/// This implementation allows us to manage all dependencies explicitly right here in this crate.
201///
202/// There is a default implementation of SerializedBytes into and out of ()
203/// this is the ONLY supported direct primitive round-trip, which maps to `nil` in messagepack
204/// for all other primitives, wrap them in a new type or enum
205///
206/// e.g. do NOT do this:
207/// `u32::try_from(serialized_bytes)?;`
208///
209/// instead do this:
210/// ```
211/// use holochain_serialized_bytes::prelude::*;
212///
213/// #[derive(Serialize, Deserialize, Debug)]
214/// pub struct SomeType(u32);
215/// holochain_serial!(SomeType);
216/// let serialized_bytes = SerializedBytes::try_from(SomeType(50)).unwrap();
217/// let some_type = SomeType::try_from(serialized_bytes).unwrap();
218/// ```
219///
220/// put `SomeType` in a separate crate that can be shared by all producers and consumers of the
221/// serialized bytes in a minimal way.
222/// this is a bit more upfront work but it's the only way the compiler can check a type maps to
223/// a specific serialization across different systems, crate versions, and refactors.
224///
225/// for example, say we changed `SomeType(u32)` to `SomeType(u64)` in the shared crate
226/// with the new type the compiler can enforce roundtripping of bytes works everywhere `SomeType`
227/// is used, provided all producers and consumers use the same version of the shared crate.
228/// in the case where we have no `SomeType` and would use integers directly, there is no safety.
229/// the system can't tell the difference between a type mismatch (e.g. you just wrote u32 in the
230/// wrong spot in one of the systems) and a serialization mismatch (e.g. the serialized bytes
231/// produced by some system were consumed by another system using a different version of the shared
232/// crate or something).
233///
234/// Developers then have to manually mentally impose the meaning of primitives over the top of code
235/// across different code-bases that are ostensibly separate projects.
236/// This introduces the effect where you can't understand/refactor one component of the system
237/// without understanding and refactoring all the other components in the same PR/QA step.
238///
239/// An explicit goal of SerializedBytes is to introduce stability of byte-level data interfaces
240/// across systems, provided they share the same version of a shared types crate.
241/// This means that that one component can implement a new version of the shared types and know
242/// that it will be compatible with other components when they similarly upgrade AND other
243/// components are safe to delay upgrading to the latest version of the shared crate until they are
244/// ready to move. Essentially it allows for async development workflows around serialized data.
245///
246/// This is especially important for wasm as the wasm hosts and guests may not even be developed
247/// by the same people/organisations, so there MUST be some compiler level guarantee that at least
248/// the shared types within the same shared crate version have compatible serialization logic.
249///
250/// usually when working with primitives we are within a single system, i.e. a single compilation
251/// context, a single set of dependencies, a single release/QA lifecycle
252/// in this case, while we _could_ wrap every single primitive in a new type for maximum compile
253/// time safety it is often 'overkill' and we can eat some ambiguity for the sake of ergonomics and
254/// minimising the number of parallel types/trait implementations.
255/// in the case of parallel, decoupled, independently maintained systems that rely on byte-level
256/// canonical representation of things that will fail (e.g. cryptographically break or (de)allocate
257/// memory incorrectly) if even one byte is wrong, the guide-rails provided by new types and enums
258/// are worth the additional up-front effort of creating a few extra shared crates/types.
259///
260/// see the readme for more discussion around this
261macro_rules! holochain_serial {
262    ( $( $t:ty ),* ) => {
263
264        $(
265            impl std::convert::TryFrom<&$t> for $crate::SerializedBytes {
266                type Error = $crate::SerializedBytesError;
267                fn try_from(t: &$t) -> std::result::Result<$crate::SerializedBytes, $crate::SerializedBytesError> {
268                    $crate::encode(t).map(|v|
269                        $crate::SerializedBytes::from($crate::UnsafeBytes::from(v))
270                    )
271                }
272            }
273
274            impl std::convert::TryFrom<$t> for $crate::SerializedBytes {
275                type Error = $crate::SerializedBytesError;
276                fn try_from(t: $t) -> std::result::Result<$crate::SerializedBytes, $crate::SerializedBytesError> {
277                    $crate::SerializedBytes::try_from(&t)
278                }
279            }
280
281            impl std::convert::TryFrom<$crate::SerializedBytes> for $t {
282                type Error = $crate::SerializedBytesError;
283                fn try_from(sb: $crate::SerializedBytes) -> std::result::Result<$t, $crate::SerializedBytesError> {
284                    $crate::decode(sb.bytes())
285                }
286            }
287        )*
288
289    };
290}
291
292holochain_serial!(());
293
294impl Default for SerializedBytes {
295    fn default() -> Self {
296        SerializedBytes::try_from(()).unwrap()
297    }
298}
299
300impl<'a> TryFrom<&'a SerializedBytes> for SerializedBytes {
301    type Error = SerializedBytesError;
302    fn try_from(s: &'a SerializedBytes) -> Result<Self, Self::Error> {
303        Ok(s.clone())
304    }
305}