1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
//! A set of primitives and [Serde](https://serde.rs) serializers for
//! fast, prefix-free encoding which preserves lexicographic ordering of values.
//!
//! It is intended for encoding keys and values in key-value databases.
//!
//! ## OMG! Yet another serialization format?
//!
//! In most existing designs, prefix-free encoding of byte sequences is performed by escaping
//! "end-of-sequence" bytes. This takes extra space, and makes it difficult to know sequence length
//! without processing the whole input buffer; this also complicates memory allocation for
//! deserialized data. Instead, we take advantage of the fact that exact record size is always
//! known in key-value databases. This implementation relies on "two-sided" buffer design:
//! sequence lengths are varint-encoded and pushed to the tail end of the buffer, so
//! it is possible to get original length of serialized byte sequence(s) by deserializing
//! a few bytes only.
//! For serialization, this implementation provides (very fast) calculation of exact size
//! of serialized data length before serialization itself. These features
//! enable effective and predictable buffer management for repetitive scans and no-heap
//! (`#[no-std]`) targets.
//!
//! ## Features
//!
//! * encodings in both ascending and descending lexicographic orderings are supported
//! * encoding puts lengths of variable-size sequences to the end of serialized data,
//!   so resulting encoding is prefix-free and friendly to lexicographic ordering
//! * zero allocations, supports `#[no_std]` environments
//! * method to cheaply get exact size of serialized data without doing actual serialization,
//!   for effective buffer management
//! * space-efficient varint encoding for sequence lengths and discriminants
//! * easily customizable (endianness, encoding of primitive types etc.), with useful pre-sets
//! * reader/writer traits for double-ended buffers, so you can implement your own or use
//!   implementations provided by the crate
//! * no unsafe code
//!
//! ## Cargo.toml features and dependencies
//!
//! * `serde` (on by default): include `serde` serializer and deserializer.
//!    If you need only primitives, you can opt out.
//! * `std` (on by default): opt out for `#[no-std]` use, you will lose some convenience methods
//!   which use `Vec<u8>`
//!
//! ## Stability guarantees
//! The underlying encoding format is simple and unlikely to change.
//! As a safeguard, `Serializer` and `Deserializer` implement `FormatVersion` trait for all serializer parameter
//! pre-sets (`params::AscendingOrder`, `params::PortableBinary`, `params::NativeBinary`).
//!
//! Note: serializing with descending lexicographic order is particularly useful for key-value
//! databases like _rocksdb_, where reverse iteration is slower than forward iteration.

#![crate_name = "ordcode"]
#![deny(clippy::all, clippy::pedantic)]
#![allow(clippy::missing_errors_doc, clippy::map_err_ignore )]

#[cfg(feature="serde")] #[macro_use] extern crate serde;

#[macro_use] mod errors;
#[doc(inline)]
pub use errors::Error;

/// A convenient Result type
pub type Result<T = (), E = errors::Error> = core::result::Result<T, E>;

#[macro_use] pub mod primitives;
pub mod varint;
pub mod bytes_esc;

pub mod params;
pub mod buf;

#[doc(inline)]
pub use params::Order;
pub use buf::{DeBytesReader, DeBytesWriter, ReadFromTail, WriteToTail };

#[cfg(feature="serde")] mod size_calc;
#[cfg(feature="serde")] mod ord_ser;
#[cfg(feature="serde")] mod ord_de;

#[doc(inline)]
#[cfg(feature="serde")] pub use ord_ser::Serializer;
#[doc(inline)]
#[cfg(feature="serde")] pub use ord_de::Deserializer;
#[doc(inline)]
#[cfg(feature="serde")] pub use size_calc::SizeCalc;

/// Current version of data encoding format for [`Serializer`] parametrized with
/// some [`params::SerializerParams`].
pub trait FormatVersion<P: params::SerializerParams> {
    const VERSION: u32;
}

/// Calculate exact size of serialized data for a [`serde::Serialize`] value.
///
/// Useful for calculating exact size of serialized objects for buffer allocations.
/// Calculation process is inexpensive, for fixed-size objects it evaluates to compile-time
/// constant, or a few `len()` method calls for variable-size objects.
///
/// ```
/// # use ordcode::{ calc_size, params };
/// # use serde::ser::Serialize;
///
/// #[derive(serde_derive::Serialize)]
/// struct Foo(u16, String);
/// let foo = Foo(1, "abc".to_string());
///
/// let data_size = calc_size(&foo, params::AscendingOrder).unwrap();
/// assert_eq!(data_size, 6);
/// ```
#[cfg(feature="serde")]
pub fn calc_size<T, P>(value: &T, _params: P) -> Result<usize>
    where T: ?Sized + serde::ser::Serialize,
          P: params::SerializerParams,
{
    let mut sc = size_calc::SizeCalc::<P>::new();
    value.serialize(&mut sc)?;
    Ok(sc.size())
}

/// Convenience method: same as [`calc_size()`], with [`params::AscendingOrder`]
#[cfg(feature="serde")]
pub fn calc_size_asc<T>(value: &T) -> Result<usize>
    where T: ?Sized + serde::ser::Serialize,
{
    calc_size(value, params::AscendingOrder)
}

/// Serialize `value` into pre-allocated byte buffer.
///
/// Buffer is supposed to be large enough to hold serialized data. You can use [`calc_size()`]
/// to get exact size of required buffer before serialization.
///
/// Returns the actual size of serialized data.
///
/// *Example*
/// ```
/// # use ordcode::{ Order, calc_size_asc, ser_to_buf_ordered };
/// # use serde::ser::Serialize;
///
/// #[derive(serde_derive::Serialize)]
/// struct Foo(u16, String);
/// let foo = Foo(1, "abc".to_string());
///
/// assert_eq!(calc_size_asc(&foo).unwrap(), 6);
/// let mut buf = [0_u8; 100];  // actually, need 6 bytes for this, but can use larger buffer
/// assert_eq!(ser_to_buf_ordered(&mut buf, &foo, Order::Ascending).unwrap(), 6);
/// assert_eq!(&buf[2..5], b"abc");
/// assert_eq!(buf[5], 7); // last byte is string length (3) in varint encoding
/// ```
#[cfg(feature="serde")]
pub fn ser_to_buf_ordered<T>(buf: &mut [u8], value: &T, order: Order) -> Result<usize>
    where T: ?Sized + serde::ser::Serialize,
{
    let mut de_buf = DeBytesWriter::new(buf);
    let mut ser = new_ser_asc(&mut de_buf);
    value.serialize(&mut ser)?;
    let len = de_buf.finalize()?;
    if matches!(order, Order::Descending) {
        primitives::invert_buffer(buf);
    }
    Ok(len)
}

/// Serialize `value` into pre-allocated, exact size byte buffer
///
/// Buffer is expected to be of exact size to hold serialized data. You can use [`calc_size()`]
/// to get exact size of required buffer before serialization.
///
/// In case of buffer underflow or buffer overflow, corresponding error is returned.
///
/// *Example*
/// ```
/// # use ordcode::{ Order, calc_size_asc, ser_to_buf_asc_exact };
/// # use serde::ser::Serialize;
///
/// #[derive(serde_derive::Serialize)]
/// struct Foo(u16, String);
/// let foo = Foo(1, "abc".to_string());
///
/// assert_eq!(calc_size_asc(&foo).unwrap(), 6);
/// let mut buf = [0_u8; 6];  // need buffer of exact size!
/// ser_to_buf_asc_exact(&mut buf, &foo).unwrap();
/// assert_eq!(&buf[2..5], b"abc");
/// assert_eq!(buf[5], 7); // last byte is string length (3) in varint encoding
/// ```
#[cfg(feature="serde")]
pub fn ser_to_buf_asc_exact<T>(buf: &mut [u8], value: &T) -> Result
    where T: ?Sized + serde::ser::Serialize,
{
    let mut de_buf = DeBytesWriter::new(buf);
    let mut ser = new_ser_asc(&mut de_buf);
    value.serialize(&mut ser)?;
    de_buf.is_complete()
}

/// Serialize `value` into byte vector
///
/// *Example*
/// ```
/// # use ordcode::{ Order, ser_to_vec_ordered };
/// # use serde::ser::Serialize;
///
/// #[derive(serde_derive::Serialize)]
/// struct Foo(u16, String);
/// let foo = Foo(1, "abc".to_string());
///
/// let buf = ser_to_vec_ordered(&foo, Order::Ascending).unwrap();
/// assert_eq!(&buf[2..5], b"abc");
/// assert_eq!(buf[5], 7); // last byte is string length (3) in varint encoding
/// ```
#[cfg(all(feature="std", feature="serde"))]
pub fn ser_to_vec_ordered<T>(value: &T, order: Order) -> Result<Vec<u8>>
    where T: ?Sized + serde::ser::Serialize,
{
    let mut byte_buf = vec![0_u8; calc_size(value, params::AscendingOrder)?];
    let mut de_buf = DeBytesWriter::new(byte_buf.as_mut_slice());
    let mut ser = new_ser_asc(&mut de_buf);
    value.serialize(&mut ser)?;
    de_buf.is_complete()?;
    if matches!(order, Order::Descending) {
        primitives::invert_buffer(&mut byte_buf);
    }
    Ok(byte_buf)
}

/// Deserialize value from byte slice with [`params::AscendingOrder`]
///
/// *Example*
/// ```
/// # use serde::de::Deserialize;
/// # use ordcode::de_from_bytes_asc;
///
/// #[derive(serde_derive::Deserialize)]
/// struct Foo(u16, String);
///
/// let buf = [0_u8, 1, b'a', b'b', b'c', 7];
/// let foo: Foo = de_from_bytes_asc(&buf).unwrap();
/// assert_eq!(foo.0, 1);
/// assert_eq!(foo.1, "abc");
/// ```
#[cfg(feature="serde")]
pub fn de_from_bytes_asc<I, T>(input: I) -> Result<T>
    where I: AsRef<[u8]>,
          T: serde::de::DeserializeOwned,
{
    let mut reader = DeBytesReader::new(input.as_ref());
    let mut deser = new_de_asc(&mut reader);
    T::deserialize(&mut deser)
}
/// Deserialize value from mutable byte slice.
///
/// For [`Order::Descending`], the buffer will be inverted in-place.
///
/// *Example*
/// ```
/// # use serde::de::Deserialize;
/// # use ordcode::{ Order, de_from_bytes_ordered, primitives };
///
/// #[derive(serde_derive::Deserialize)]
/// struct Foo(u16, String);
///
/// let mut buf = [255_u8, 254, 158, 157, 156, 248];
/// let foo: Foo = de_from_bytes_ordered(&mut buf, Order::Descending).unwrap();
/// assert_eq!(foo.0, 1);
/// assert_eq!(foo.1, "abc");
/// ```
#[cfg(feature="serde")]
pub fn de_from_bytes_ordered<I, T>(mut input: I, order: Order) -> Result<T>
    where I: AsMut<[u8]>,
          T: serde::de::DeserializeOwned,
{
    if matches!(order, Order::Descending) {
        primitives::invert_buffer(input.as_mut());
    }
    let mut reader = DeBytesReader::new(input.as_mut());
    let mut deser = new_de_asc(&mut reader);
    T::deserialize(&mut deser)
}

/// Create new default serializer instance (with [`params::AscendingOrder`])
#[cfg(feature="serde")]
#[inline]
pub fn new_ser_asc<W>(writer: W) -> Serializer<W, params::AscendingOrder>
    where W: buf::TailWriteBytes,
{
    Serializer::new(writer, params::AscendingOrder)
}

/// Create new default deserializer instance (with [`params::AscendingOrder`])
#[cfg(feature="serde")]
#[inline]
pub fn new_de_asc<R>(reader: R) -> Deserializer<R, params::AscendingOrder>
    where R: buf::TailReadBytes,
{
    Deserializer::new(reader, params::AscendingOrder)
}