aingle_middleware_bytes/
lib.rs

1pub extern crate serde;
2extern crate serde_json;
3
4extern crate rmp_serde;
5
6use serde::{Deserialize, Serialize};
7use std::convert::Infallible;
8use std::convert::TryFrom;
9
10pub mod prelude;
11
12#[cfg_attr(feature = "trace", tracing::instrument)]
13pub fn encode<T: serde::Serialize + std::fmt::Debug>(
14    val: &T,
15) -> Result<Vec<u8>, SerializedBytesError> {
16    let buf = Vec::with_capacity(128);
17    let mut se = rmp_serde::encode::Serializer::new(buf)
18        .with_struct_map()
19        .with_string_variants();
20    val.serialize(&mut se).map_err(|err| {
21        #[cfg(feature = "trace")]
22        tracing::warn!("Failed to serialize input");
23        SerializedBytesError::Serialize(err.to_string())
24    })?;
25    let ret = se.into_inner();
26    #[cfg(feature = "trace")]
27    tracing::trace!(
28        "Serialized {} input into {:?}",
29        std::any::type_name::<T>(),
30        ret
31    );
32    Ok(ret)
33}
34
35#[cfg_attr(feature = "trace", tracing::instrument)]
36pub fn decode<'a, R, T>(input: &'a R) -> Result<T, SerializedBytesError>
37where
38    R: AsRef<[u8]> + ?Sized + std::fmt::Debug,
39    T: Deserialize<'a> + std::fmt::Debug,
40{
41    let ret = rmp_serde::from_read_ref(input).map_err(|err| {
42        #[cfg(feature = "trace")]
43        tracing::warn!(
44            "Failed to deserialize input into: {}",
45            std::any::type_name::<T>()
46        );
47        SerializedBytesError::Deserialize(err.to_string())
48    })?;
49    #[cfg(feature = "trace")]
50    tracing::trace!("Deserialized input into: {:?}", ret);
51    Ok(ret)
52}
53
54#[derive(
55    Clone,
56    Debug,
57    PartialEq,
58    Eq,
59    Hash,
60    PartialOrd,
61    Ord,
62    serde::Serialize,
63    serde::Deserialize,
64    thiserror::Error,
65)]
66pub enum SerializedBytesError {
67    /// somehow failed to move to bytes
68    /// most likely hit a messagepack limit https://github.com/msgpack/msgpack/blob/master/spec.md#limitation
69    Serialize(String),
70    /// somehow failed to restore bytes
71    /// i mean, this could be anything, how do i know what's wrong with your bytes?
72    Deserialize(String),
73}
74
75impl std::fmt::Display for SerializedBytesError {
76    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
77        write!(f, "{:?}", self)
78    }
79}
80
81impl From<SerializedBytesError> for String {
82    fn from(sb: SerializedBytesError) -> Self {
83        match sb {
84            SerializedBytesError::Serialize(s) => s,
85            SerializedBytesError::Deserialize(s) => s,
86        }
87    }
88}
89
90impl From<Infallible> for SerializedBytesError {
91    fn from(_: Infallible) -> Self {
92        unreachable!()
93    }
94}
95
96#[derive(Clone, Debug)]
97/// UnsafeBytes the only way to implement a custom round trip through bytes for SerializedBytes
98/// It is intended to be an internal implementation in TryFrom implementations
99/// The assumption is that any code using UnsafeBytes is NOT valid messagepack data
100/// This allows us to enforce that all data round-tripping through SerializedBytes is via TryFrom
101/// and also allow for custom non-messagepack canonical representations of data types.
102pub struct UnsafeBytes(Vec<u8>);
103
104impl From<Vec<u8>> for UnsafeBytes {
105    fn from(v: Vec<u8>) -> Self {
106        Self(v)
107    }
108}
109
110impl From<UnsafeBytes> for Vec<u8> {
111    fn from(unsafe_bytes: UnsafeBytes) -> Vec<u8> {
112        unsafe_bytes.0
113    }
114}
115
116impl From<UnsafeBytes> for SerializedBytes {
117    fn from(b: UnsafeBytes) -> Self {
118        SerializedBytes(b.0)
119    }
120}
121
122impl From<SerializedBytes> for UnsafeBytes {
123    fn from(sb: SerializedBytes) -> Self {
124        UnsafeBytes(sb.0)
125    }
126}
127
128/// A Canonical Serialized Bytes representation for data
129/// If you have a data structure that needs a canonical byte representation use this
130/// Always round-trip through SerializedBytes via. a single TryFrom implementation.
131/// This ensures that the internal bytes of SerializedBytes are indeed canonical.
132/// The corrolary is that if bytes are NOT wrapped in SerializedBytes we can assume they are NOT
133/// canonical.
134/// Typically we need a canonical middleware when data is to be handled at the byte level by
135/// independently implemented and maintained systems.
136///
137/// Examples of times we need a canonical set of bytes to represent data:
138/// - cryptographic operations
139/// - moving across the host/guest wasm boundary
140/// - putting data on the network
141///
142/// Examples of times where we may not need a canonical representation and so may not need this:
143/// - round tripping data through a database that has its own middleware preferences
144/// - debug output or logging of data that is to be human readible
145/// - moving between data types within a single system that has no external facing representation
146///
147/// uses #[repr(transparent)] to maximise compatibility with ffi
148/// @see https://doc.rust-lang.org/1.26.2/unstable-book/language-features/repr-transparent.html#enter-reprtransparent
149///
150/// uses serde_bytes for efficient middleware and demiddleware
151/// without this __every byte will be individually round tripped through serde__
152/// @see https://crates.io/crates/serde_bytes
153#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize, Hash)]
154#[repr(transparent)]
155pub struct SerializedBytes(#[serde(with = "serde_bytes")] Vec<u8>);
156
157impl SerializedBytes {
158    pub fn bytes(&self) -> &Vec<u8> {
159        &self.0
160    }
161}
162
163/// A bit of magic to convert the internal messagepack bytes into roughly equivalent JSON output
164/// for the purposes of debugging.
165/// 90% of the time you probably want this if you are a dev, to see something that "looks like" a
166/// data structure when you do {:?} in a formatted string, rather than a vector of bytes
167/// in the remaining 10% of situations where you want to debug the real messagepack bytes, call the
168/// .bytes() method on SerializedBytes and debug that.
169impl std::fmt::Debug for SerializedBytes {
170    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
171        // NB: there is a crate::decode function which currently simply uses
172        // from_read_ref. If that ever changes, this may become inconsistent
173        // and should be changed also.
174        let mut deserializer = rmp_serde::Deserializer::from_read_ref(&self.0);
175        let writer = Vec::new();
176        let mut serializer = serde_json::ser::Serializer::new(writer);
177        serde_transcode::transcode(&mut deserializer, &mut serializer).unwrap();
178        let s = unsafe { String::from_utf8_unchecked(serializer.into_inner()) };
179        write!(f, "{}", s)
180    }
181}
182
183#[macro_export]
184/// unidiomatic way to derive default trait implementations of TryFrom in/out of SerializedBytes
185///
186/// Two main reasons this was done rather than the normal Derive approach:
187/// - Derive requires a separate crate
188/// - Derive doesn't allow for use of $crate to set unambiguous fully qualified paths to things
189///
190/// Both of these limitations push dependency management downstream to consumer crates more than we
191/// want to.
192/// This implementation allows us to manage all dependencies explicitly right here in this crate.
193///
194/// There is a default implementation of SerializedBytes into and out of ()
195/// this is the ONLY supported direct primitive round-trip, which maps to `nil` in messagepack
196/// for all other primitives, wrap them in a new type or enum
197///
198/// e.g. do NOT do this:
199/// `u32::try_from(serialized_bytes)?;`
200///
201/// instead do this:
202/// ```
203/// use aingle_middleware_bytes::prelude::*;
204///
205/// #[derive(Serialize, Deserialize, Debug)]
206/// pub struct SomeType(u32);
207/// aingle_serial!(SomeType);
208/// let serialized_bytes = SerializedBytes::try_from(SomeType(50)).unwrap();
209/// let some_type = SomeType::try_from(serialized_bytes).unwrap();
210/// ```
211///
212/// put `SomeType` in a separate crate that can be shared by all producers and consumers of the
213/// serialized bytes in a minimal way.
214/// this is a bit more upfront work but it's the only way the compiler can check a type maps to
215/// a specific middleware across different systems, crate versions, and refactors.
216///
217/// for example, say we changed `SomeType(u32)` to `SomeType(u64)` in the shared crate
218/// with the new type the compiler can enforce roundtripping of bytes works everywhere `SomeType`
219/// is used, provided all producers and consumers use the same version of the shared crate.
220/// in the case where we have no `SomeType` and would use integers directly, there is no safety.
221/// the system can't tell the difference between a type mismatch (e.g. you just wrote u32 in the
222/// wrong spot in one of the systems) and a middleware mismatch (e.g. the serialized bytes
223/// produced by some system were consumed by another system using a different version of the shared
224/// crate or something).
225///
226/// Developers then have to manually mentally impose the meaning of primitives over the top of code
227/// across different code-bases that are ostensibly separate projects.
228/// This introduces the effect where you can't understand/refactor one component of the system
229/// without understanding and refactoring all the other components in the same PR/QA step.
230///
231/// An explicit goal of SerializedBytes is to introduce stability of byte-level data interfaces
232/// across systems, provided they share the same version of a shared types crate.
233/// This means that that one component can implement a new version of the shared types and know
234/// that it will be compatible with other components when they similarly upgrade AND other
235/// components are safe to delay upgrading to the latest version of the shared crate until they are
236/// ready to move. Essentially it allows for async development workflows around serialized data.
237///
238/// This is especially important for wasm as the wasm hosts and guests may not even be developed
239/// by the same people/organisations, so there MUST be some compiler level guarantee that at least
240/// the shared types within the same shared crate version have compatible middleware logic.
241///
242/// usually when working with primitives we are within a single system, i.e. a single compilation
243/// context, a single set of dependencies, a single release/QA lifecycle
244/// in this case, while we _could_ wrap every single primitive in a new type for maximum compile
245/// time safety it is often 'overkill' and we can eat some ambiguity for the sake of ergonomics and
246/// minimising the number of parallel types/trait implementations.
247/// in the case of parallel, decoupled, independently maintiained systems that rely on byte-level
248/// canonical representation of things that will fail (e.g. cryptographically break or (de)allocate
249/// memory incorrectly) if even one byte is wrong, the guide-rails provided by new types and enums
250/// are worth the additional up-front effort of creating a few extra shared crates/types.
251///
252/// see the readme for more discussion around this
253macro_rules! aingle_serial {
254    ( $( $t:ty ),* ) => {
255
256        $(
257            impl std::convert::TryFrom<&$t> for $crate::SerializedBytes {
258                type Error = $crate::SerializedBytesError;
259                fn try_from(t: &$t) -> std::result::Result<$crate::SerializedBytes, $crate::SerializedBytesError> {
260                    $crate::encode(t).map(|v|
261                        $crate::SerializedBytes::from($crate::UnsafeBytes::from(v))
262                    )
263                }
264            }
265
266            impl std::convert::TryFrom<$t> for $crate::SerializedBytes {
267                type Error = $crate::SerializedBytesError;
268                fn try_from(t: $t) -> std::result::Result<$crate::SerializedBytes, $crate::SerializedBytesError> {
269                    $crate::SerializedBytes::try_from(&t)
270                }
271            }
272
273            impl std::convert::TryFrom<$crate::SerializedBytes> for $t {
274                type Error = $crate::SerializedBytesError;
275                fn try_from(sb: $crate::SerializedBytes) -> std::result::Result<$t, $crate::SerializedBytesError> {
276                    $crate::decode(sb.bytes())
277                }
278            }
279        )*
280
281    };
282}
283
284aingle_serial!(());
285
286impl Default for SerializedBytes {
287    fn default() -> Self {
288        SerializedBytes::try_from(()).unwrap()
289    }
290}
291
292impl<'a> TryFrom<&'a SerializedBytes> for SerializedBytes {
293    type Error = SerializedBytesError;
294    fn try_from(s: &'a SerializedBytes) -> Result<Self, Self::Error> {
295        Ok(s.clone())
296    }
297}
298
299#[cfg(test)]
300pub mod tests {
301
302    use super::prelude::*;
303    use std::convert::TryInto;
304
305    /// struct with a utf8 string in it
306    #[derive(Serialize, Deserialize, PartialEq, Debug, Clone, SerializedBytes)]
307    struct Foo {
308        inner: String,
309    }
310
311    /// struct with raw bytes in it
312    #[derive(Serialize, Deserialize, PartialEq, Debug, Clone, SerializedBytes)]
313    struct Bar {
314        whatever: Vec<u8>,
315    }
316
317    #[derive(Serialize, Deserialize, PartialEq, Debug, Clone, SerializedBytes)]
318    enum BazResult {
319        Ok(Vec<u8>),
320        Err(String),
321    }
322
323    /// struct with raw bytes in it
324    #[derive(Serialize, Deserialize, PartialEq, Debug, Clone, SerializedBytes)]
325    struct Baz {
326        wow: Option<BazResult>,
327    }
328
329    #[derive(Serialize, Deserialize, PartialEq, Debug, Clone, SerializedBytes)]
330    struct Tiny(u8);
331
332    #[derive(Serialize, Deserialize, PartialEq, Debug, Clone, SerializedBytes)]
333    struct SomeBytes(Vec<u8>);
334
335    #[derive(Serialize, Deserialize, PartialEq, Debug, Clone, SerializedBytes)]
336    struct IncludesSerializedBytes {
337        inner: SerializedBytes,
338    }
339
340    fn fixture_foo() -> Foo {
341        Foo {
342            inner: "foo".into(),
343        }
344    }
345
346    fn fixture_bar() -> Bar {
347        Bar {
348            whatever: vec![1_u8, 2_u8, 3_u8],
349        }
350    }
351
352    #[cfg(feature = "trace")]
353    #[test]
354    fn test_trace() {
355        let collector = tracing_subscriber::fmt()
356            .with_max_level(tracing::Level::TRACE)
357            .finish();
358
359        #[derive(Debug)]
360        struct BadSerialize;
361
362        impl serde::Serialize for BadSerialize {
363            fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error>
364            where
365                S: serde::Serializer,
366            {
367                Err(serde::ser::Error::custom("Cannot serialize!"))
368            }
369        }
370
371        tracing::subscriber::with_default(collector, || {
372            let bad_bytes = vec![1, 2, 3];
373            let encode_error: Result<Vec<u8>, SerializedBytesError> = encode(&BadSerialize);
374            assert_eq!(
375                encode_error,
376                Err(SerializedBytesError::Serialize("Cannot serialize!".into()))
377            );
378
379            let decode_error: Result<String, SerializedBytesError> = decode(&bad_bytes);
380            assert_eq!(
381                decode_error,
382                Err(SerializedBytesError::Deserialize(
383                    "invalid type: integer `1`, expected a string".into()
384                ))
385            );
386
387            let encode: Result<Vec<u8>, SerializedBytesError> = encode(&());
388            assert_eq!(encode.unwrap(), vec![192],);
389
390            let decode: Result<(), SerializedBytesError> = decode(&vec![192]);
391            assert_eq!(decode.unwrap(), ());
392        });
393    }
394
395    #[test]
396    fn round_trip() {
397        macro_rules! do_test {
398            ( $t:ty, $i:expr, $o:expr ) => {{
399                let i = $i;
400                let sb: SerializedBytes = i.clone().try_into().unwrap();
401                // this isn't for testing it just shows how the debug output looks
402                println!("{:?}", &sb);
403
404                assert_eq!(&$o, sb.bytes(),);
405
406                let returned: $t = sb.try_into().unwrap();
407
408                assert_eq!(returned, i);
409
410                // as ref
411                let sb2 = SerializedBytes::try_from(&i).unwrap();
412
413                assert_eq!(&$o, sb2.bytes());
414            }};
415        }
416
417        do_test!(
418            Foo,
419            fixture_foo(),
420            vec![
421                129_u8, 165_u8, 105_u8, 110_u8, 110_u8, 101_u8, 114_u8, 163_u8, 102_u8, 111_u8,
422                111_u8,
423            ]
424        );
425
426        do_test!(
427            Bar,
428            fixture_bar(),
429            vec![
430                129_u8, 168_u8, 119_u8, 104_u8, 97_u8, 116_u8, 101_u8, 118_u8, 101_u8, 114_u8,
431                147_u8, 1_u8, 2_u8, 3_u8,
432            ]
433        );
434
435        do_test!(
436            Baz,
437            Baz {
438                wow: Some(BazResult::Ok(vec![2, 5, 6]))
439            },
440            vec![129, 163, 119, 111, 119, 129, 162, 79, 107, 147, 2, 5, 6]
441        );
442
443        do_test!(Tiny, Tiny(5), vec![5]);
444
445        do_test!(
446            SomeBytes,
447            SomeBytes(vec![1_u8, 90_u8, 155_u8]),
448            vec![147, 1, 90, 204, 155]
449        );
450
451        do_test!((), (), vec![192]);
452
453        do_test!(
454            IncludesSerializedBytes,
455            IncludesSerializedBytes {
456                inner: fixture_foo().try_into().unwrap()
457            },
458            vec![
459                129, 165, 105, 110, 110, 101, 114, 196, 11, 129, 165, 105, 110, 110, 101, 114, 163,
460                102, 111, 111
461            ]
462        );
463    }
464
465    #[test]
466    fn self_noop() {
467        let sb: SerializedBytes = fixture_foo().try_into().unwrap();
468
469        let sb_2: SerializedBytes = sb.clone().try_into().unwrap();
470
471        assert_eq!(sb, sb_2,);
472    }
473
474    #[test]
475    fn provide_own_bytes() {
476        let bytes = vec![1_u8, 90_u8, 155_u8];
477        let own_bytes = UnsafeBytes::from(bytes.clone());
478        let sb: SerializedBytes = own_bytes.clone().into();
479
480        assert_eq!(sb.bytes(), &bytes,);
481
482        let own_bytes_restored: UnsafeBytes = sb.into();
483
484        assert_eq!(&own_bytes.0, &own_bytes_restored.0,);
485        assert_eq!(&bytes, &own_bytes.0);
486        assert_eq!(&bytes, &own_bytes_restored.0);
487    }
488
489    #[test]
490    fn default_test() {
491        assert_eq!(&vec![192_u8], SerializedBytes::default().bytes());
492    }
493}