aingle_middleware_bytes/lib.rs
1pub extern crate serde;
2extern crate serde_json;
3
4extern crate rmp_serde;
5
6use serde::{Deserialize, Serialize};
7use std::convert::Infallible;
8use std::convert::TryFrom;
9
10pub mod prelude;
11
12#[cfg_attr(feature = "trace", tracing::instrument)]
13pub fn encode<T: serde::Serialize + std::fmt::Debug>(
14 val: &T,
15) -> Result<Vec<u8>, SerializedBytesError> {
16 let buf = Vec::with_capacity(128);
17 let mut se = rmp_serde::encode::Serializer::new(buf)
18 .with_struct_map()
19 .with_string_variants();
20 val.serialize(&mut se).map_err(|err| {
21 #[cfg(feature = "trace")]
22 tracing::warn!("Failed to serialize input");
23 SerializedBytesError::Serialize(err.to_string())
24 })?;
25 let ret = se.into_inner();
26 #[cfg(feature = "trace")]
27 tracing::trace!(
28 "Serialized {} input into {:?}",
29 std::any::type_name::<T>(),
30 ret
31 );
32 Ok(ret)
33}
34
35#[cfg_attr(feature = "trace", tracing::instrument)]
36pub fn decode<'a, R, T>(input: &'a R) -> Result<T, SerializedBytesError>
37where
38 R: AsRef<[u8]> + ?Sized + std::fmt::Debug,
39 T: Deserialize<'a> + std::fmt::Debug,
40{
41 let ret = rmp_serde::from_read_ref(input).map_err(|err| {
42 #[cfg(feature = "trace")]
43 tracing::warn!(
44 "Failed to deserialize input into: {}",
45 std::any::type_name::<T>()
46 );
47 SerializedBytesError::Deserialize(err.to_string())
48 })?;
49 #[cfg(feature = "trace")]
50 tracing::trace!("Deserialized input into: {:?}", ret);
51 Ok(ret)
52}
53
54#[derive(
55 Clone,
56 Debug,
57 PartialEq,
58 Eq,
59 Hash,
60 PartialOrd,
61 Ord,
62 serde::Serialize,
63 serde::Deserialize,
64 thiserror::Error,
65)]
66pub enum SerializedBytesError {
67 /// somehow failed to move to bytes
68 /// most likely hit a messagepack limit https://github.com/msgpack/msgpack/blob/master/spec.md#limitation
69 Serialize(String),
70 /// somehow failed to restore bytes
71 /// i mean, this could be anything, how do i know what's wrong with your bytes?
72 Deserialize(String),
73}
74
75impl std::fmt::Display for SerializedBytesError {
76 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
77 write!(f, "{:?}", self)
78 }
79}
80
81impl From<SerializedBytesError> for String {
82 fn from(sb: SerializedBytesError) -> Self {
83 match sb {
84 SerializedBytesError::Serialize(s) => s,
85 SerializedBytesError::Deserialize(s) => s,
86 }
87 }
88}
89
90impl From<Infallible> for SerializedBytesError {
91 fn from(_: Infallible) -> Self {
92 unreachable!()
93 }
94}
95
96#[derive(Clone, Debug)]
97/// UnsafeBytes the only way to implement a custom round trip through bytes for SerializedBytes
98/// It is intended to be an internal implementation in TryFrom implementations
99/// The assumption is that any code using UnsafeBytes is NOT valid messagepack data
100/// This allows us to enforce that all data round-tripping through SerializedBytes is via TryFrom
101/// and also allow for custom non-messagepack canonical representations of data types.
102pub struct UnsafeBytes(Vec<u8>);
103
104impl From<Vec<u8>> for UnsafeBytes {
105 fn from(v: Vec<u8>) -> Self {
106 Self(v)
107 }
108}
109
110impl From<UnsafeBytes> for Vec<u8> {
111 fn from(unsafe_bytes: UnsafeBytes) -> Vec<u8> {
112 unsafe_bytes.0
113 }
114}
115
116impl From<UnsafeBytes> for SerializedBytes {
117 fn from(b: UnsafeBytes) -> Self {
118 SerializedBytes(b.0)
119 }
120}
121
122impl From<SerializedBytes> for UnsafeBytes {
123 fn from(sb: SerializedBytes) -> Self {
124 UnsafeBytes(sb.0)
125 }
126}
127
128/// A Canonical Serialized Bytes representation for data
129/// If you have a data structure that needs a canonical byte representation use this
130/// Always round-trip through SerializedBytes via. a single TryFrom implementation.
131/// This ensures that the internal bytes of SerializedBytes are indeed canonical.
132/// The corrolary is that if bytes are NOT wrapped in SerializedBytes we can assume they are NOT
133/// canonical.
134/// Typically we need a canonical middleware when data is to be handled at the byte level by
135/// independently implemented and maintained systems.
136///
137/// Examples of times we need a canonical set of bytes to represent data:
138/// - cryptographic operations
139/// - moving across the host/guest wasm boundary
140/// - putting data on the network
141///
142/// Examples of times where we may not need a canonical representation and so may not need this:
143/// - round tripping data through a database that has its own middleware preferences
144/// - debug output or logging of data that is to be human readible
145/// - moving between data types within a single system that has no external facing representation
146///
147/// uses #[repr(transparent)] to maximise compatibility with ffi
148/// @see https://doc.rust-lang.org/1.26.2/unstable-book/language-features/repr-transparent.html#enter-reprtransparent
149///
150/// uses serde_bytes for efficient middleware and demiddleware
151/// without this __every byte will be individually round tripped through serde__
152/// @see https://crates.io/crates/serde_bytes
153#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize, Hash)]
154#[repr(transparent)]
155pub struct SerializedBytes(#[serde(with = "serde_bytes")] Vec<u8>);
156
157impl SerializedBytes {
158 pub fn bytes(&self) -> &Vec<u8> {
159 &self.0
160 }
161}
162
163/// A bit of magic to convert the internal messagepack bytes into roughly equivalent JSON output
164/// for the purposes of debugging.
165/// 90% of the time you probably want this if you are a dev, to see something that "looks like" a
166/// data structure when you do {:?} in a formatted string, rather than a vector of bytes
167/// in the remaining 10% of situations where you want to debug the real messagepack bytes, call the
168/// .bytes() method on SerializedBytes and debug that.
169impl std::fmt::Debug for SerializedBytes {
170 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
171 // NB: there is a crate::decode function which currently simply uses
172 // from_read_ref. If that ever changes, this may become inconsistent
173 // and should be changed also.
174 let mut deserializer = rmp_serde::Deserializer::from_read_ref(&self.0);
175 let writer = Vec::new();
176 let mut serializer = serde_json::ser::Serializer::new(writer);
177 serde_transcode::transcode(&mut deserializer, &mut serializer).unwrap();
178 let s = unsafe { String::from_utf8_unchecked(serializer.into_inner()) };
179 write!(f, "{}", s)
180 }
181}
182
183#[macro_export]
184/// unidiomatic way to derive default trait implementations of TryFrom in/out of SerializedBytes
185///
186/// Two main reasons this was done rather than the normal Derive approach:
187/// - Derive requires a separate crate
188/// - Derive doesn't allow for use of $crate to set unambiguous fully qualified paths to things
189///
190/// Both of these limitations push dependency management downstream to consumer crates more than we
191/// want to.
192/// This implementation allows us to manage all dependencies explicitly right here in this crate.
193///
194/// There is a default implementation of SerializedBytes into and out of ()
195/// this is the ONLY supported direct primitive round-trip, which maps to `nil` in messagepack
196/// for all other primitives, wrap them in a new type or enum
197///
198/// e.g. do NOT do this:
199/// `u32::try_from(serialized_bytes)?;`
200///
201/// instead do this:
202/// ```
203/// use aingle_middleware_bytes::prelude::*;
204///
205/// #[derive(Serialize, Deserialize, Debug)]
206/// pub struct SomeType(u32);
207/// aingle_serial!(SomeType);
208/// let serialized_bytes = SerializedBytes::try_from(SomeType(50)).unwrap();
209/// let some_type = SomeType::try_from(serialized_bytes).unwrap();
210/// ```
211///
212/// put `SomeType` in a separate crate that can be shared by all producers and consumers of the
213/// serialized bytes in a minimal way.
214/// this is a bit more upfront work but it's the only way the compiler can check a type maps to
215/// a specific middleware across different systems, crate versions, and refactors.
216///
217/// for example, say we changed `SomeType(u32)` to `SomeType(u64)` in the shared crate
218/// with the new type the compiler can enforce roundtripping of bytes works everywhere `SomeType`
219/// is used, provided all producers and consumers use the same version of the shared crate.
220/// in the case where we have no `SomeType` and would use integers directly, there is no safety.
221/// the system can't tell the difference between a type mismatch (e.g. you just wrote u32 in the
222/// wrong spot in one of the systems) and a middleware mismatch (e.g. the serialized bytes
223/// produced by some system were consumed by another system using a different version of the shared
224/// crate or something).
225///
226/// Developers then have to manually mentally impose the meaning of primitives over the top of code
227/// across different code-bases that are ostensibly separate projects.
228/// This introduces the effect where you can't understand/refactor one component of the system
229/// without understanding and refactoring all the other components in the same PR/QA step.
230///
231/// An explicit goal of SerializedBytes is to introduce stability of byte-level data interfaces
232/// across systems, provided they share the same version of a shared types crate.
233/// This means that that one component can implement a new version of the shared types and know
234/// that it will be compatible with other components when they similarly upgrade AND other
235/// components are safe to delay upgrading to the latest version of the shared crate until they are
236/// ready to move. Essentially it allows for async development workflows around serialized data.
237///
238/// This is especially important for wasm as the wasm hosts and guests may not even be developed
239/// by the same people/organisations, so there MUST be some compiler level guarantee that at least
240/// the shared types within the same shared crate version have compatible middleware logic.
241///
242/// usually when working with primitives we are within a single system, i.e. a single compilation
243/// context, a single set of dependencies, a single release/QA lifecycle
244/// in this case, while we _could_ wrap every single primitive in a new type for maximum compile
245/// time safety it is often 'overkill' and we can eat some ambiguity for the sake of ergonomics and
246/// minimising the number of parallel types/trait implementations.
247/// in the case of parallel, decoupled, independently maintiained systems that rely on byte-level
248/// canonical representation of things that will fail (e.g. cryptographically break or (de)allocate
249/// memory incorrectly) if even one byte is wrong, the guide-rails provided by new types and enums
250/// are worth the additional up-front effort of creating a few extra shared crates/types.
251///
252/// see the readme for more discussion around this
253macro_rules! aingle_serial {
254 ( $( $t:ty ),* ) => {
255
256 $(
257 impl std::convert::TryFrom<&$t> for $crate::SerializedBytes {
258 type Error = $crate::SerializedBytesError;
259 fn try_from(t: &$t) -> std::result::Result<$crate::SerializedBytes, $crate::SerializedBytesError> {
260 $crate::encode(t).map(|v|
261 $crate::SerializedBytes::from($crate::UnsafeBytes::from(v))
262 )
263 }
264 }
265
266 impl std::convert::TryFrom<$t> for $crate::SerializedBytes {
267 type Error = $crate::SerializedBytesError;
268 fn try_from(t: $t) -> std::result::Result<$crate::SerializedBytes, $crate::SerializedBytesError> {
269 $crate::SerializedBytes::try_from(&t)
270 }
271 }
272
273 impl std::convert::TryFrom<$crate::SerializedBytes> for $t {
274 type Error = $crate::SerializedBytesError;
275 fn try_from(sb: $crate::SerializedBytes) -> std::result::Result<$t, $crate::SerializedBytesError> {
276 $crate::decode(sb.bytes())
277 }
278 }
279 )*
280
281 };
282}
283
284aingle_serial!(());
285
286impl Default for SerializedBytes {
287 fn default() -> Self {
288 SerializedBytes::try_from(()).unwrap()
289 }
290}
291
292impl<'a> TryFrom<&'a SerializedBytes> for SerializedBytes {
293 type Error = SerializedBytesError;
294 fn try_from(s: &'a SerializedBytes) -> Result<Self, Self::Error> {
295 Ok(s.clone())
296 }
297}
298
299#[cfg(test)]
300pub mod tests {
301
302 use super::prelude::*;
303 use std::convert::TryInto;
304
305 /// struct with a utf8 string in it
306 #[derive(Serialize, Deserialize, PartialEq, Debug, Clone, SerializedBytes)]
307 struct Foo {
308 inner: String,
309 }
310
311 /// struct with raw bytes in it
312 #[derive(Serialize, Deserialize, PartialEq, Debug, Clone, SerializedBytes)]
313 struct Bar {
314 whatever: Vec<u8>,
315 }
316
317 #[derive(Serialize, Deserialize, PartialEq, Debug, Clone, SerializedBytes)]
318 enum BazResult {
319 Ok(Vec<u8>),
320 Err(String),
321 }
322
323 /// struct with raw bytes in it
324 #[derive(Serialize, Deserialize, PartialEq, Debug, Clone, SerializedBytes)]
325 struct Baz {
326 wow: Option<BazResult>,
327 }
328
329 #[derive(Serialize, Deserialize, PartialEq, Debug, Clone, SerializedBytes)]
330 struct Tiny(u8);
331
332 #[derive(Serialize, Deserialize, PartialEq, Debug, Clone, SerializedBytes)]
333 struct SomeBytes(Vec<u8>);
334
335 #[derive(Serialize, Deserialize, PartialEq, Debug, Clone, SerializedBytes)]
336 struct IncludesSerializedBytes {
337 inner: SerializedBytes,
338 }
339
340 fn fixture_foo() -> Foo {
341 Foo {
342 inner: "foo".into(),
343 }
344 }
345
346 fn fixture_bar() -> Bar {
347 Bar {
348 whatever: vec![1_u8, 2_u8, 3_u8],
349 }
350 }
351
352 #[cfg(feature = "trace")]
353 #[test]
354 fn test_trace() {
355 let collector = tracing_subscriber::fmt()
356 .with_max_level(tracing::Level::TRACE)
357 .finish();
358
359 #[derive(Debug)]
360 struct BadSerialize;
361
362 impl serde::Serialize for BadSerialize {
363 fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error>
364 where
365 S: serde::Serializer,
366 {
367 Err(serde::ser::Error::custom("Cannot serialize!"))
368 }
369 }
370
371 tracing::subscriber::with_default(collector, || {
372 let bad_bytes = vec![1, 2, 3];
373 let encode_error: Result<Vec<u8>, SerializedBytesError> = encode(&BadSerialize);
374 assert_eq!(
375 encode_error,
376 Err(SerializedBytesError::Serialize("Cannot serialize!".into()))
377 );
378
379 let decode_error: Result<String, SerializedBytesError> = decode(&bad_bytes);
380 assert_eq!(
381 decode_error,
382 Err(SerializedBytesError::Deserialize(
383 "invalid type: integer `1`, expected a string".into()
384 ))
385 );
386
387 let encode: Result<Vec<u8>, SerializedBytesError> = encode(&());
388 assert_eq!(encode.unwrap(), vec![192],);
389
390 let decode: Result<(), SerializedBytesError> = decode(&vec![192]);
391 assert_eq!(decode.unwrap(), ());
392 });
393 }
394
395 #[test]
396 fn round_trip() {
397 macro_rules! do_test {
398 ( $t:ty, $i:expr, $o:expr ) => {{
399 let i = $i;
400 let sb: SerializedBytes = i.clone().try_into().unwrap();
401 // this isn't for testing it just shows how the debug output looks
402 println!("{:?}", &sb);
403
404 assert_eq!(&$o, sb.bytes(),);
405
406 let returned: $t = sb.try_into().unwrap();
407
408 assert_eq!(returned, i);
409
410 // as ref
411 let sb2 = SerializedBytes::try_from(&i).unwrap();
412
413 assert_eq!(&$o, sb2.bytes());
414 }};
415 }
416
417 do_test!(
418 Foo,
419 fixture_foo(),
420 vec![
421 129_u8, 165_u8, 105_u8, 110_u8, 110_u8, 101_u8, 114_u8, 163_u8, 102_u8, 111_u8,
422 111_u8,
423 ]
424 );
425
426 do_test!(
427 Bar,
428 fixture_bar(),
429 vec![
430 129_u8, 168_u8, 119_u8, 104_u8, 97_u8, 116_u8, 101_u8, 118_u8, 101_u8, 114_u8,
431 147_u8, 1_u8, 2_u8, 3_u8,
432 ]
433 );
434
435 do_test!(
436 Baz,
437 Baz {
438 wow: Some(BazResult::Ok(vec![2, 5, 6]))
439 },
440 vec![129, 163, 119, 111, 119, 129, 162, 79, 107, 147, 2, 5, 6]
441 );
442
443 do_test!(Tiny, Tiny(5), vec![5]);
444
445 do_test!(
446 SomeBytes,
447 SomeBytes(vec![1_u8, 90_u8, 155_u8]),
448 vec![147, 1, 90, 204, 155]
449 );
450
451 do_test!((), (), vec![192]);
452
453 do_test!(
454 IncludesSerializedBytes,
455 IncludesSerializedBytes {
456 inner: fixture_foo().try_into().unwrap()
457 },
458 vec![
459 129, 165, 105, 110, 110, 101, 114, 196, 11, 129, 165, 105, 110, 110, 101, 114, 163,
460 102, 111, 111
461 ]
462 );
463 }
464
465 #[test]
466 fn self_noop() {
467 let sb: SerializedBytes = fixture_foo().try_into().unwrap();
468
469 let sb_2: SerializedBytes = sb.clone().try_into().unwrap();
470
471 assert_eq!(sb, sb_2,);
472 }
473
474 #[test]
475 fn provide_own_bytes() {
476 let bytes = vec![1_u8, 90_u8, 155_u8];
477 let own_bytes = UnsafeBytes::from(bytes.clone());
478 let sb: SerializedBytes = own_bytes.clone().into();
479
480 assert_eq!(sb.bytes(), &bytes,);
481
482 let own_bytes_restored: UnsafeBytes = sb.into();
483
484 assert_eq!(&own_bytes.0, &own_bytes_restored.0,);
485 assert_eq!(&bytes, &own_bytes.0);
486 assert_eq!(&bytes, &own_bytes_restored.0);
487 }
488
489 #[test]
490 fn default_test() {
491 assert_eq!(&vec![192_u8], SerializedBytes::default().bytes());
492 }
493}