1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
pub extern crate serde;
extern crate serde_json;

extern crate rmp_serde;

use serde::{Deserialize, Serialize};
use std::convert::TryFrom;

pub mod prelude;

pub fn encode<T: serde::Serialize>(val: &T) -> Result<Vec<u8>, rmp_serde::encode::Error> {
    let buf = Vec::with_capacity(128);
    let mut se = rmp_serde::encode::Serializer::new(buf)
        .with_struct_map()
        .with_string_variants();
    val.serialize(&mut se)?;
    Ok(se.into_inner())
}

pub fn decode<'a, R, T>(rd: &'a R) -> Result<T, rmp_serde::decode::Error>
where
    R: AsRef<[u8]> + ?Sized,
    T: Deserialize<'a>,
{
    rmp_serde::from_read_ref(rd)
}

#[derive(
    Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize, thiserror::Error,
)]
pub enum SerializedBytesError {
    /// somehow failed to move to bytes
    /// most likely hit a messagepack limit https://github.com/msgpack/msgpack/blob/master/spec.md#limitation
    ToBytes(String),
    /// somehow failed to restore bytes
    /// i mean, this could be anything, how do i know what's wrong with your bytes?
    FromBytes(String),
}

impl std::fmt::Display for SerializedBytesError {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "{:?}", self)
    }
}

impl From<SerializedBytesError> for String {
    fn from(sb: SerializedBytesError) -> Self {
        format!("{:?}", sb)
    }
}

#[derive(Clone, Debug)]
/// UnsafeBytes the only way to implement a custom round trip through bytes for SerializedBytes
/// It is intended to be an internal implementation in TryFrom implementations
/// The assumption is that any code using UnsafeBytes is NOT valid messagepack data
/// This allows us to enforce that all data round-tripping through SerializedBytes is via TryFrom
/// and also allow for custom non-messagepack canonical representations of data types.
pub struct UnsafeBytes(Vec<u8>);

impl From<Vec<u8>> for UnsafeBytes {
    fn from(v: Vec<u8>) -> Self {
        Self(v)
    }
}

impl From<UnsafeBytes> for Vec<u8> {
    fn from(unsafe_bytes: UnsafeBytes) -> Vec<u8> {
        unsafe_bytes.0
    }
}

impl From<UnsafeBytes> for SerializedBytes {
    fn from(b: UnsafeBytes) -> Self {
        SerializedBytes(b.0)
    }
}

impl From<SerializedBytes> for UnsafeBytes {
    fn from(sb: SerializedBytes) -> Self {
        UnsafeBytes(sb.0)
    }
}

/// A Canonical Serialized Bytes representation for data
/// If you have a data structure that needs a canonical byte representation use this
/// Always round-trip through SerializedBytes via. a single TryFrom implementation.
/// This ensures that the internal bytes of SerializedBytes are indeed canonical.
/// The corrolary is that if bytes are NOT wrapped in SerializedBytes we can assume they are NOT
/// canonical.
/// Typically we need a canonical serialization when data is to be handled at the byte level by
/// independently implemented and maintained systems.
///
/// Examples of times we need a canonical set of bytes to represent data:
/// - cryptographic operations
/// - moving across the host/guest wasm boundary
/// - putting data on the network
///
/// Examples of times where we may not need a canonical representation and so may not need this:
/// - round tripping data through a database that has its own serialization preferences
/// - debug output or logging of data that is to be human readible
/// - moving between data types within a single system that has no external facing representation
///
/// uses #[repr(transparent)] to maximise compatibility with ffi
/// @see https://doc.rust-lang.org/1.26.2/unstable-book/language-features/repr-transparent.html#enter-reprtransparent
///
/// uses serde_bytes for efficient serialization and deserialization
/// without this __every byte will be individually round tripped through serde__
/// @see https://crates.io/crates/serde_bytes
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize, Hash)]
#[repr(transparent)]
pub struct SerializedBytes(#[serde(with = "serde_bytes")] Vec<u8>);

impl SerializedBytes {
    pub fn bytes(&self) -> &Vec<u8> {
        &self.0
    }
}

/// A bit of magic to convert the internal messagepack bytes into roughly equivalent JSON output
/// for the purposes of debugging.
/// 90% of the time you probably want this if you are a dev, to see something that "looks like" a
/// data structure when you do {:?} in a formatted string, rather than a vector of bytes
/// in the remaining 10% of situations where you want to debug the real messagepack bytes, call the
/// .bytes() method on SerializedBytes and debug that.
impl std::fmt::Debug for SerializedBytes {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        // NB: there is a crate::decode function which currently simply uses
        // from_read_ref. If that ever changes, this may become inconsistent
        // and should be changed also.
        let mut deserializer = rmp_serde::Deserializer::from_read_ref(&self.0);
        let writer = Vec::new();
        let mut serializer = serde_json::ser::Serializer::new(writer);
        serde_transcode::transcode(&mut deserializer, &mut serializer).unwrap();
        let s = unsafe { String::from_utf8_unchecked(serializer.into_inner()) };
        write!(f, "{}", s)
    }
}

#[macro_export]
/// unidiomatic way to derive default trait implementations of TryFrom in/out of SerializedBytes
///
/// Two main reasons this was done rather than the normal Derive approach:
/// - Derive requires a separate crate
/// - Derive doesn't allow for use of $crate to set unambiguous fully qualified paths to things
///
/// Both of these limitations push dependency management downstream to consumer crates more than we
/// want to.
/// This implementation allows us to manage all dependencies explicitly right here in this crate.
///
/// There is a default implementation of SerializedBytes into and out of ()
/// this is the ONLY supported direct primitive round-trip, which maps to `nil` in messagepack
/// for all other primitives, wrap them in a new type or enum
///
/// e.g. do NOT do this:
/// `u32::try_from(serialized_bytes)?;`
///
/// instead do this:
/// ```
/// use holochain_serialized_bytes::prelude::*;
///
/// #[derive(Serialize, Deserialize)]
/// pub struct SomeType(u32);
/// holochain_serial!(SomeType);
/// let serialized_bytes = SerializedBytes::try_from(SomeType(50)).unwrap();
/// let some_type = SomeType::try_from(serialized_bytes).unwrap();
/// ```
///
/// put `SomeType` in a separate crate that can be shared by all producers and consumers of the
/// serialized bytes in a minimal way.
/// this is a bit more upfront work but it's the only way the compiler can check a type maps to
/// a specific serialization across different systems, crate versions, and refactors.
///
/// for example, say we changed `SomeType(u32)` to `SomeType(u64)` in the shared crate
/// with the new type the compiler can enforce roundtripping of bytes works everywhere `SomeType`
/// is used, provided all producers and consumers use the same version of the shared crate.
/// in the case where we have no `SomeType` and would use integers directly, there is no safety.
/// the system can't tell the difference between a type mismatch (e.g. you just wrote u32 in the
/// wrong spot in one of the systems) and a serialization mismatch (e.g. the serialized bytes
/// produced by some system were consumed by another system using a different version of the shared
/// crate or something).
///
/// Developers then have to manually mentally impose the meaning of primitives over the top of code
/// across different code-bases that are ostensibly separate projects.
/// This introduces the effect where you can't understand/refactor one component of the system
/// without understanding and refactoring all the other components in the same PR/QA step.
///
/// An explicit goal of SerializedBytes is to introduce stability of byte-level data interfaces
/// across systems, provided they share the same version of a shared types crate.
/// This means that that one component can implement a new version of the shared types and know
/// that it will be compatible with other components when they similarly upgrade AND other
/// components are safe to delay upgrading to the latest version of the shared crate until they are
/// ready to move. Essentially it allows for async development workflows around serialized data.
///
/// This is especially important for wasm as the wasm hosts and guests may not even be developed
/// by the same people/organisations, so there MUST be some compiler level guarantee that at least
/// the shared types within the same shared crate version have compatible serialization logic.
///
/// usually when working with primitives we are within a single system, i.e. a single compilation
/// context, a single set of dependencies, a single release/QA lifecycle
/// in this case, while we _could_ wrap every single primitive in a new type for maximum compile
/// time safety it is often 'overkill' and we can eat some ambiguity for the sake of ergonomics and
/// minimising the number of parallel types/trait implementations.
/// in the case of parallel, decoupled, independently maintiained systems that rely on byte-level
/// canonical representation of things that will fail (e.g. cryptographically break or (de)allocate
/// memory incorrectly) if even one byte is wrong, the guide-rails provided by new types and enums
/// are worth the additional up-front effort of creating a few extra shared crates/types.
///
/// see the readme for more discussion around this
macro_rules! holochain_serial {
    ( $( $t:ty ),* ) => {

        $(
            impl std::convert::TryFrom<&$t> for $crate::SerializedBytes {
                type Error = $crate::SerializedBytesError;
                fn try_from(t: &$t) -> std::result::Result<$crate::SerializedBytes, $crate::SerializedBytesError> {
                    match $crate::encode(t) {
                        Ok(v) => Ok($crate::SerializedBytes::from($crate::UnsafeBytes::from(v))),
                        Err(e) => Err($crate::SerializedBytesError::ToBytes(e.to_string())),
                    }
                }
            }

            impl std::convert::TryFrom<$t> for $crate::SerializedBytes {
                type Error = $crate::SerializedBytesError;
                fn try_from(t: $t) -> std::result::Result<$crate::SerializedBytes, $crate::SerializedBytesError> {
                    $crate::SerializedBytes::try_from(&t)
                }
            }

            impl std::convert::TryFrom<$crate::SerializedBytes> for $t {
                type Error = $crate::SerializedBytesError;
                fn try_from(sb: $crate::SerializedBytes) -> std::result::Result<$t, $crate::SerializedBytesError> {
                    match $crate::decode(sb.bytes()) {
                        Ok(v) => Ok(v),
                        Err(e) => Err($crate::SerializedBytesError::FromBytes(e.to_string())),
                    }
                }
            }
        )*

    };
}

holochain_serial!(());

impl Default for SerializedBytes {
    fn default() -> Self {
        SerializedBytes::try_from(()).unwrap()
    }
}

#[cfg(test)]
pub mod tests {

    use super::prelude::*;
    use std::convert::TryInto;

    /// struct with a utf8 string in it
    #[derive(Serialize, Deserialize, PartialEq, Debug, Clone, SerializedBytes)]
    struct Foo {
        inner: String,
    }

    /// struct with raw bytes in it
    #[derive(Serialize, Deserialize, PartialEq, Debug, Clone, SerializedBytes)]
    struct Bar {
        whatever: Vec<u8>,
    }

    #[derive(Serialize, Deserialize, PartialEq, Debug, Clone, SerializedBytes)]
    enum BazResult {
        Ok(Vec<u8>),
        Err(String),
    }

    /// struct with raw bytes in it
    #[derive(Serialize, Deserialize, PartialEq, Debug, Clone, SerializedBytes)]
    struct Baz {
        wow: Option<BazResult>,
    }

    #[derive(Serialize, Deserialize, PartialEq, Debug, Clone, SerializedBytes)]
    struct Tiny(u8);

    #[derive(Serialize, Deserialize, PartialEq, Debug, Clone, SerializedBytes)]
    struct SomeBytes(Vec<u8>);

    #[derive(Serialize, Deserialize, PartialEq, Debug, Clone, SerializedBytes)]
    struct IncludesSerializedBytes {
        inner: SerializedBytes,
    }

    fn fixture_foo() -> Foo {
        Foo {
            inner: "foo".into(),
        }
    }

    fn fixture_bar() -> Bar {
        Bar {
            whatever: vec![1_u8, 2_u8, 3_u8],
        }
    }

    #[test]
    fn round_trip() {
        macro_rules! do_test {
            ( $t:ty, $i:expr, $o:expr ) => {{
                let i = $i;
                let sb: SerializedBytes = i.clone().try_into().unwrap();
                // this isn't for testing it just shows how the debug output looks
                println!("{:?}", &sb);

                assert_eq!(&$o, sb.bytes(),);

                let returned: $t = sb.try_into().unwrap();

                assert_eq!(returned, i);

                // as ref
                let sb2 = SerializedBytes::try_from(&i).unwrap();

                assert_eq!(&$o, sb2.bytes());
            }};
        }

        do_test!(
            Foo,
            fixture_foo(),
            vec![
                129_u8, 165_u8, 105_u8, 110_u8, 110_u8, 101_u8, 114_u8, 163_u8, 102_u8, 111_u8,
                111_u8,
            ]
        );

        do_test!(
            Bar,
            fixture_bar(),
            vec![
                129_u8, 168_u8, 119_u8, 104_u8, 97_u8, 116_u8, 101_u8, 118_u8, 101_u8, 114_u8,
                147_u8, 1_u8, 2_u8, 3_u8,
            ]
        );

        do_test!(
            Baz,
            Baz {
                wow: Some(BazResult::Ok(vec![2, 5, 6]))
            },
            vec![129, 163, 119, 111, 119, 129, 162, 79, 107, 147, 2, 5, 6]
        );

        do_test!(Tiny, Tiny(5), vec![5]);

        do_test!(
            SomeBytes,
            SomeBytes(vec![1_u8, 90_u8, 155_u8]),
            vec![147, 1, 90, 204, 155]
        );

        do_test!((), (), vec![192]);

        do_test!(
            IncludesSerializedBytes,
            IncludesSerializedBytes {
                inner: fixture_foo().try_into().unwrap()
            },
            vec![
                129, 165, 105, 110, 110, 101, 114, 196, 11, 129, 165, 105, 110, 110, 101, 114, 163,
                102, 111, 111
            ]
        );
    }

    #[test]
    fn self_noop() {
        let sb: SerializedBytes = fixture_foo().try_into().unwrap();

        let sb_2: SerializedBytes = sb.clone().try_into().unwrap();

        assert_eq!(sb, sb_2,);
    }

    #[test]
    fn provide_own_bytes() {
        let bytes = vec![1_u8, 90_u8, 155_u8];
        let own_bytes = UnsafeBytes::from(bytes.clone());
        let sb: SerializedBytes = own_bytes.clone().into();

        assert_eq!(sb.bytes(), &bytes,);

        let own_bytes_restored: UnsafeBytes = sb.into();

        assert_eq!(&own_bytes.0, &own_bytes_restored.0,);
        assert_eq!(&bytes, &own_bytes.0);
        assert_eq!(&bytes, &own_bytes_restored.0);
    }

    #[test]
    fn default_test() {
        assert_eq!(&vec![192_u8], SerializedBytes::default().bytes());
    }
}