serde_smile/ser/
mod.rs

1//! Serialize a Rust data structure into Smile data.
2use crate::ser::compound::{Compound, Mode};
3use crate::ser::key_serializer::{KeySerializer, MaybeStatic};
4use crate::ser::string_cache::StringCache;
5use crate::value::{BigDecimal, BigInteger};
6use crate::Error;
7use serde::ser::SerializeStruct;
8use serde::{serde_if_integer128, Serialize};
9use std::borrow::Cow;
10use std::convert::TryFrom;
11use std::io::Write;
12
13mod big_decimal_serializer;
14mod big_integer_serializer;
15mod compound;
16mod key_serializer;
17mod string_cache;
18
19/// Serializes the given data structure to a Smile byte vector using default serializer settings.
20pub fn to_vec<T>(value: &T) -> Result<Vec<u8>, Error>
21where
22    T: Serialize + ?Sized,
23{
24    let mut buf = vec![];
25    to_writer(&mut buf, value)?;
26    Ok(buf)
27}
28
29/// Serializes the given data structure as Smile into the IO stream using default serializer settings.
30pub fn to_writer<W, T>(writer: W, value: &T) -> Result<(), Error>
31where
32    W: Write,
33    T: ?Sized + Serialize,
34{
35    let mut serializer = Serializer::new(writer);
36    value.serialize(&mut serializer)
37}
38
39/// A builder to configure a [`Serializer`].
40pub struct Builder {
41    raw_binary: bool,
42    shared_strings: bool,
43    shared_properties: bool,
44}
45
46impl Builder {
47    /// Enables the transmission of binary data in "raw" form.
48    ///
49    /// This format is more performant and space efficient, but Smile framing tokens may be present in the encoded
50    /// binary data.
51    ///
52    /// Defaults to `false`.
53    pub fn raw_binary(&mut self, raw_binary: bool) -> &mut Self {
54        self.raw_binary = raw_binary;
55        self
56    }
57
58    /// Enables deduplication of repeated value strings.
59    ///
60    /// Defaults to `false`.
61    pub fn shared_strings(&mut self, shared_strings: bool) -> &mut Self {
62        self.shared_strings = shared_strings;
63        self
64    }
65
66    /// Enables deduplication of repeated map key strings.
67    ///
68    /// Defaults to `true`.
69    pub fn shared_properties(&mut self, shared_properties: bool) -> &mut Self {
70        self.shared_properties = shared_properties;
71        self
72    }
73
74    /// Creates a new [`Serializer`].
75    pub fn build<W>(&self, writer: W) -> Serializer<W>
76    where
77        W: Write,
78    {
79        let mut flags = 0;
80        if self.raw_binary {
81            flags |= 0x04;
82        }
83        if self.shared_strings {
84            flags |= 0x02;
85        }
86        if self.shared_properties {
87            flags |= 0x01;
88        }
89        let header = [b':', b')', b'\n', flags];
90
91        Serializer {
92            writer,
93            header: Some(header),
94            raw_binary: self.raw_binary,
95            shared_strings: if self.shared_strings {
96                Some(StringCache::new())
97            } else {
98                None
99            },
100            shared_properties: if self.shared_properties {
101                Some(StringCache::new())
102            } else {
103                None
104            },
105        }
106    }
107}
108
109/// A structure for serializing Rust values into Smile.
110pub struct Serializer<W> {
111    writer: W,
112    header: Option<[u8; 4]>,
113    raw_binary: bool,
114    shared_strings: Option<StringCache>,
115    shared_properties: Option<StringCache>,
116}
117
118impl Serializer<()> {
119    /// Returns a builder used to configure a `Serializer`.
120    pub fn builder() -> Builder {
121        Builder {
122            raw_binary: false,
123            shared_strings: false,
124            shared_properties: true,
125        }
126    }
127}
128
129impl<W> Serializer<W>
130where
131    W: Write,
132{
133    /// Creates a new `Serializer` with default settings.
134    pub fn new(writer: W) -> Self {
135        Serializer::builder().build(writer)
136    }
137
138    /// Writes the Smile header to the writer, if not already written.
139    ///
140    /// This will happen automatically when the first value is serialized, but this method can be
141    /// used to explicitly write it if desired.
142    pub fn write_header(&mut self) -> Result<(), Error> {
143        let Some(header) = self.header.take() else {
144            return Ok(());
145        };
146        self.writer.write_all(&header).map_err(Error::io)?;
147        Ok(())
148    }
149
150    /// Writes the Smile end of stream token to the writer.
151    ///
152    /// The end of stream indicator is not required in a Smile encoding, but can help with framing
153    /// in some contexts.
154    ///
155    /// This should only be called after serializing all data.
156    pub fn end(&mut self) -> Result<(), Error> {
157        self.write_header()?;
158        self.writer.write_all(&[0xff]).map_err(Error::io)
159    }
160
161    /// Returns a shared reference to the inner writer.
162    pub fn get_ref(&self) -> &W {
163        &self.writer
164    }
165
166    /// Returns a mutable reference to the inner writer.
167    pub fn get_mut(&mut self) -> &mut W {
168        &mut self.writer
169    }
170
171    /// Consumes the `Serializer`, returning the inner writer.
172    pub fn into_inner(self) -> W {
173        self.writer
174    }
175
176    fn serialize_vint(&mut self, mut v: u64) -> Result<(), Error> {
177        let mut buf = [0; 10];
178
179        let mut i = 9;
180        // the last byte only stores 6 bits
181        buf[i] = v as u8 & 0x3f | 0x80;
182        v >>= 6;
183
184        while v != 0 {
185            i -= 1;
186            buf[i] = v as u8 & 0x7f;
187            v >>= 7;
188        }
189
190        self.writer.write_all(&buf[i..]).map_err(Error::io)
191    }
192
193    fn serialize_shared_str(&mut self, v: &str) -> Result<bool, Error> {
194        let shared_strings = match &mut self.shared_strings {
195            Some(shared_strings) => shared_strings,
196            None => return Ok(false),
197        };
198
199        if v.len() > 64 {
200            return Ok(false);
201        }
202
203        match shared_strings.get(v) {
204            Some(backref) => {
205                if backref <= 30 {
206                    self.writer
207                        .write_all(&[backref as u8 + 1])
208                        .map_err(Error::io)?;
209                } else {
210                    let buf = [0xec | (backref >> 8) as u8, backref as u8];
211                    self.writer.write_all(&buf).map_err(Error::io)?;
212                }
213                Ok(true)
214            }
215            None => {
216                shared_strings.intern(Cow::Owned(v.to_string()));
217                Ok(false)
218            }
219        }
220    }
221
222    fn serialize_7_bit_binary(&mut self, v: &[u8]) -> Result<(), Error> {
223        self.serialize_vint(v.len() as u64)?;
224
225        let mut it = v.chunks_exact(7);
226        for chunk in &mut it {
227            let buf = [
228                chunk[0] >> 1,
229                ((chunk[0] << 6) | (chunk[1] >> 2)) & 0x7f,
230                ((chunk[1] << 5) | (chunk[2] >> 3)) & 0x7f,
231                ((chunk[2] << 4) | (chunk[3] >> 4)) & 0x7f,
232                ((chunk[3] << 3) | (chunk[4] >> 5)) & 0x7f,
233                ((chunk[4] << 2) | (chunk[5] >> 6)) & 0x7f,
234                ((chunk[5] << 1) | (chunk[6] >> 7)) & 0x7f,
235                chunk[6] & 0x7f,
236            ];
237            self.writer.write_all(&buf).map_err(Error::io)?;
238        }
239
240        if it.remainder().is_empty() {
241            return Ok(());
242        }
243
244        let mut buf = [0; 7];
245        let len = it.remainder().len();
246
247        for (i, &b) in it.remainder().iter().enumerate() {
248            buf[i] |= b >> (i + 1);
249            buf[i + 1] = (b << (6 - i)) & 0x7f;
250        }
251        // the last byte is annoyingly not actually shifted to its normal place
252        buf[len] >>= 7 - len;
253        self.writer.write_all(&buf[..len + 1]).map_err(Error::io)
254    }
255
256    fn serialize_big_integer(&mut self, v: &[u8]) -> Result<(), Error> {
257        self.write_header()?;
258        self.writer.write_all(&[0x26]).map_err(Error::io)?;
259        self.serialize_7_bit_binary(v)
260    }
261
262    fn serialize_static_key(&mut self, v: &'static str) -> Result<(), Error> {
263        KeySerializer { ser: self }.serialize_maybe_static_str(MaybeStatic::Static(v))
264    }
265}
266
267impl<'a, W> serde::Serializer for &'a mut Serializer<W>
268where
269    W: Write,
270{
271    type Ok = ();
272
273    type Error = Error;
274
275    type SerializeSeq = Compound<'a, W>;
276
277    type SerializeTuple = Compound<'a, W>;
278
279    type SerializeTupleStruct = Compound<'a, W>;
280
281    type SerializeTupleVariant = Compound<'a, W>;
282
283    type SerializeMap = Compound<'a, W>;
284
285    type SerializeStruct = Compound<'a, W>;
286
287    type SerializeStructVariant = Compound<'a, W>;
288
289    fn serialize_bool(self, v: bool) -> Result<Self::Ok, Self::Error> {
290        self.write_header()?;
291        let b = if v { 0x23 } else { 0x22 };
292        self.writer.write_all(&[b]).map_err(Error::io)
293    }
294
295    fn serialize_i8(self, v: i8) -> Result<Self::Ok, Self::Error> {
296        self.serialize_i32(i32::from(v))
297    }
298
299    fn serialize_i16(self, v: i16) -> Result<Self::Ok, Self::Error> {
300        self.serialize_i32(i32::from(v))
301    }
302
303    fn serialize_i32(self, v: i32) -> Result<Self::Ok, Self::Error> {
304        self.write_header()?;
305        let zigzag = zigzag_i32(v);
306
307        if zigzag < 32 {
308            self.writer
309                .write_all(&[0xc0 + zigzag as u8])
310                .map_err(Error::io)
311        } else {
312            self.writer.write_all(&[0x24]).map_err(Error::io)?;
313            self.serialize_vint(zigzag)
314        }
315    }
316
317    fn serialize_i64(self, v: i64) -> Result<Self::Ok, Self::Error> {
318        match i32::try_from(v) {
319            Ok(v) => self.serialize_i32(v),
320            Err(_) => {
321                self.write_header()?;
322                self.writer.write_all(&[0x25]).map_err(Error::io)?;
323                let zigzag = zigzag_i64(v);
324                self.serialize_vint(zigzag)
325            }
326        }
327    }
328
329    serde_if_integer128! {
330        fn serialize_i128(self, v: i128) -> Result<Self::Ok, Self::Error> {
331            match i64::try_from(v) {
332                Ok(v) => self.serialize_i64(v),
333                Err(_) => self.serialize_big_integer(&v.to_be_bytes()),
334            }
335        }
336    }
337
338    fn serialize_u8(self, v: u8) -> Result<Self::Ok, Self::Error> {
339        self.serialize_i32(i32::from(v))
340    }
341
342    fn serialize_u16(self, v: u16) -> Result<Self::Ok, Self::Error> {
343        self.serialize_i32(i32::from(v))
344    }
345
346    fn serialize_u32(self, v: u32) -> Result<Self::Ok, Self::Error> {
347        self.serialize_i64(i64::from(v))
348    }
349
350    fn serialize_u64(self, v: u64) -> Result<Self::Ok, Self::Error> {
351        match i64::try_from(v) {
352            Ok(v) => self.serialize_i64(v),
353            Err(_) => {
354                // we need an extra byte for the sign bit
355                let mut buf = [0; 9];
356                buf[1..].copy_from_slice(&v.to_be_bytes());
357                self.serialize_big_integer(&buf)
358            }
359        }
360    }
361
362    serde_if_integer128! {
363        fn serialize_u128(self, v: u128) -> Result<Self::Ok, Self::Error> {
364            match i128::try_from(v) {
365                Ok(v) => self.serialize_i128(v),
366                Err(_) => {
367                    // we need an extra byte for the sign bit
368                    let mut buf = [0; 17];
369                    buf[1..].copy_from_slice(&v.to_be_bytes());
370                    self.serialize_big_integer(&buf)
371                }
372            }
373        }
374    }
375
376    fn serialize_f32(self, v: f32) -> Result<Self::Ok, Self::Error> {
377        self.write_header()?;
378        let bits = v.to_bits();
379        let buf = [
380            0x28,
381            (bits >> 28) as u8 & 0x7f,
382            (bits >> 21) as u8 & 0x7f,
383            (bits >> 14) as u8 & 0x7f,
384            (bits >> 7) as u8 & 0x7f,
385            bits as u8 & 0x7f,
386        ];
387        self.writer.write_all(&buf).map_err(Error::io)
388    }
389
390    fn serialize_f64(self, v: f64) -> Result<Self::Ok, Self::Error> {
391        self.write_header()?;
392        let bits = v.to_bits();
393        let buf = [
394            0x29,
395            (bits >> 63) as u8 & 0x7f,
396            (bits >> 56) as u8 & 0x7f,
397            (bits >> 49) as u8 & 0x7f,
398            (bits >> 42) as u8 & 0x7f,
399            (bits >> 35) as u8 & 0x7f,
400            (bits >> 28) as u8 & 0x7f,
401            (bits >> 21) as u8 & 0x7f,
402            (bits >> 14) as u8 & 0x7f,
403            (bits >> 7) as u8 & 0x7f,
404            bits as u8 & 0x7f,
405        ];
406        self.writer.write_all(&buf).map_err(Error::io)
407    }
408
409    fn serialize_char(self, v: char) -> Result<Self::Ok, Self::Error> {
410        self.serialize_str(v.encode_utf8(&mut [0; 4]))
411    }
412
413    fn serialize_str(self, v: &str) -> Result<Self::Ok, Self::Error> {
414        self.write_header()?;
415        if v.is_empty() {
416            return self.writer.write_all(&[0x20]).map_err(Error::io);
417        }
418
419        if self.serialize_shared_str(v)? {
420            return Ok(());
421        }
422
423        #[allow(clippy::collapsible_else_if)]
424        if v.is_ascii() {
425            if v.len() <= 32 {
426                self.writer
427                    .write_all(&[0x40 + v.len() as u8 - 1])
428                    .map_err(Error::io)?;
429                self.writer.write_all(v.as_bytes()).map_err(Error::io)?;
430            } else if v.len() <= 64 {
431                self.writer
432                    .write_all(&[0x60 + v.len() as u8 - 33])
433                    .map_err(Error::io)?;
434                self.writer.write_all(v.as_bytes()).map_err(Error::io)?;
435            } else {
436                self.writer.write_all(&[0xe0]).map_err(Error::io)?;
437                self.writer.write_all(v.as_bytes()).map_err(Error::io)?;
438                self.writer.write_all(&[0xfc]).map_err(Error::io)?;
439            }
440        } else {
441            if v.len() <= 33 {
442                self.writer
443                    .write_all(&[0x80 + v.len() as u8 - 2])
444                    .map_err(Error::io)?;
445                self.writer.write_all(v.as_bytes()).map_err(Error::io)?;
446            } else if v.len() <= 64 {
447                self.writer
448                    .write_all(&[0xa0 + v.len() as u8 - 34])
449                    .map_err(Error::io)?;
450                self.writer.write_all(v.as_bytes()).map_err(Error::io)?;
451            } else {
452                self.writer.write_all(&[0xe4]).map_err(Error::io)?;
453                self.writer.write_all(v.as_bytes()).map_err(Error::io)?;
454                self.writer.write_all(&[0xfc]).map_err(Error::io)?;
455            }
456        }
457
458        Ok(())
459    }
460
461    fn serialize_bytes(self, v: &[u8]) -> Result<Self::Ok, Self::Error> {
462        self.write_header()?;
463        if self.raw_binary {
464            self.writer.write_all(&[0xfd]).map_err(Error::io)?;
465            self.serialize_vint(v.len() as u64)?;
466            self.writer.write_all(v).map_err(Error::io)
467        } else {
468            self.writer.write_all(&[0xe8]).map_err(Error::io)?;
469            self.serialize_7_bit_binary(v)
470        }
471    }
472
473    fn serialize_none(self) -> Result<Self::Ok, Self::Error> {
474        self.serialize_unit()
475    }
476
477    fn serialize_some<T>(self, value: &T) -> Result<Self::Ok, Self::Error>
478    where
479        T: Serialize + ?Sized,
480    {
481        value.serialize(self)
482    }
483
484    fn serialize_unit(self) -> Result<Self::Ok, Self::Error> {
485        self.write_header()?;
486        self.writer.write_all(&[0x21]).map_err(Error::io)
487    }
488
489    fn serialize_unit_struct(self, _: &'static str) -> Result<Self::Ok, Self::Error> {
490        self.serialize_unit()
491    }
492
493    fn serialize_unit_variant(
494        self,
495        _name: &'static str,
496        _variant_index: u32,
497        variant: &'static str,
498    ) -> Result<Self::Ok, Self::Error> {
499        self.serialize_str(variant)
500    }
501
502    fn serialize_newtype_struct<T>(
503        self,
504        _name: &'static str,
505        value: &T,
506    ) -> Result<Self::Ok, Self::Error>
507    where
508        T: Serialize + ?Sized,
509    {
510        value.serialize(self)
511    }
512
513    fn serialize_newtype_variant<T>(
514        self,
515        _name: &'static str,
516        _variant_index: u32,
517        variant: &'static str,
518        value: &T,
519    ) -> Result<Self::Ok, Self::Error>
520    where
521        T: Serialize + ?Sized,
522    {
523        let mut ser = self.serialize_map(Some(1))?;
524        SerializeStruct::serialize_field(&mut ser, variant, value)?;
525        SerializeStruct::end(ser)
526    }
527
528    fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> {
529        self.write_header()?;
530        self.writer.write_all(&[0xf8]).map_err(Error::io)?;
531        Ok(Compound {
532            ser: self,
533            mode: Mode::Normal,
534        })
535    }
536
537    fn serialize_tuple(self, len: usize) -> Result<Self::SerializeTuple, Self::Error> {
538        self.serialize_seq(Some(len))
539    }
540
541    fn serialize_tuple_struct(
542        self,
543        _name: &'static str,
544        len: usize,
545    ) -> Result<Self::SerializeTupleStruct, Self::Error> {
546        self.serialize_tuple(len)
547    }
548
549    fn serialize_tuple_variant(
550        self,
551        _name: &'static str,
552        _variant_index: u32,
553        variant: &'static str,
554        _len: usize,
555    ) -> Result<Self::SerializeTupleVariant, Self::Error> {
556        self.write_header()?;
557        self.writer.write_all(&[0xfa]).map_err(Error::io)?;
558        self.serialize_static_key(variant)?;
559        self.writer.write_all(&[0xf8]).map_err(Error::io)?;
560        Ok(Compound {
561            ser: self,
562            mode: Mode::Normal,
563        })
564    }
565
566    fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> {
567        self.write_header()?;
568        self.writer.write_all(&[0xfa]).map_err(Error::io)?;
569        Ok(Compound {
570            ser: self,
571            mode: Mode::Normal,
572        })
573    }
574
575    fn serialize_struct(
576        self,
577        name: &'static str,
578        len: usize,
579    ) -> Result<Self::SerializeStruct, Self::Error> {
580        if name == BigInteger::STRUCT_NAME {
581            return Ok(Compound {
582                ser: self,
583                mode: Mode::BigInteger,
584            });
585        }
586
587        if name == BigDecimal::STRUCT_NAME {
588            return Ok(Compound {
589                ser: self,
590                mode: Mode::BigDecimal,
591            });
592        }
593
594        self.serialize_map(Some(len))
595    }
596
597    fn serialize_struct_variant(
598        self,
599        _name: &'static str,
600        _variant_index: u32,
601        variant: &'static str,
602        _len: usize,
603    ) -> Result<Self::SerializeStructVariant, Self::Error> {
604        self.write_header()?;
605        self.writer.write_all(&[0xfa]).map_err(Error::io)?;
606        self.serialize_static_key(variant)?;
607        self.writer.write_all(&[0xfa]).map_err(Error::io)?;
608        Ok(Compound {
609            ser: self,
610            mode: Mode::Normal,
611        })
612    }
613
614    fn is_human_readable(&self) -> bool {
615        false
616    }
617}
618
619#[inline]
620fn zigzag_i32(v: i32) -> u64 {
621    ((v << 1) ^ (v >> 31)) as u32 as u64
622}
623
624#[inline]
625fn zigzag_i64(v: i64) -> u64 {
626    ((v << 1) ^ (v >> 63)) as u64
627}