serde_columnar/
lib.rs

1//! # Introduction
2//!
3//! `serde_columnar` is a crate that provides columnar storage for **List** and **Map** with compressible serialization and deserialization capabilities.
4//!
5//! Columnar storage is very useful when you want to compress serialized data and you know that one or more fields of consecutive structs in the array have the same or equal difference values.
6//!
7//! For example, you want to store this array:
8//!
9//! ```
10//! [{a: 1, b: 1}, {a: 1, b: 2}, {a: 1, b: 3}, ...]
11//! ```
12//! After columnar storage, it can be stored as:
13//!
14//! ```
15//! a: [1, 1, 1,...] ---Rle---> [N, 1]
16//! b: [1, 2, 3,...] ---DeltaRle---> [N, 1] (each value is 1 greater than the previous one)
17//! ```
18//!
19//! # Usage
20//!
21//! ```rust ignore
22//! type ID = u64;
23//! #[columnar(vec, ser, de)]
24//! #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
25//! pub struct Data {
26//!     #[columnar(strategy = "Rle")]
27//!     num: u32,
28//!     #[columnar(strategy = "DeltaRle", original_type = "u64")]
29//!     id: ID,
30//!     #[columnar(strategy = "Rle")]
31//!     gender: String,
32//!     #[columnar(strategy = "BoolRle")]
33//!     married: bool
34//!     #[columnar(strategy = "DeltaOfDelta")]
35//!     time: i64
36//! }
37//!
38//! #[columnar]
39//! #[derive(Debug, Serialize, Deserialize)]
40//! pub struct VecStore {
41//!     #[columnar(type = "vec")]
42//!     pub data: Vec<Data>
43//! }
44//!
45//!
46//! let store = VecStore::new(...);
47//! let bytes = serde_columnar::to_vec(&store).unwrap();
48//! let store = serde_columnar::from_bytes::<VecStore>(&bytes).unwrap();
49//!
50//! ```
51//!
52//! # More Details
53//!
54//! ## Container
55//!
56//! - `#[columnar]` means that some fields (marked by `#[columnar(type = "vec"|"map")]`) of this structure can be serialized and deserialized by columnar encoding
57//! - `#[columnar(vec, map)]` means the struct can be a row inside `Vec-like` or `Map-like`
58//! - `#[columnar(ser, de)]` means the struct can be serialized or deserialized or both by columnar encoding
59//!
60//! ## Field Attributes
61//!
62//! - `#[columnar(type = "vec"|"map")]`:
63//!   - vec means the decorated field T is a container, holds Value and satisfies `&T: IntoIter<Item=&Value>` `T: FromIterator<Value>`
64//!   - map means the decorated field T is a container, holds Value and satisfies `&T: IntoIter<Item=(&K, &Value)>` `T: FromIterator<(K, Value)>`
65//! - `#[columnar(strategy = "Rle"|"BoolRle"|"DeltaRle"|"DeltaOfDelta")]`: You can only choose one from
66//!   - Rle [crate::strategy::AnyRleEncoder]
67//!   - BoolRle [crate::strategy::BoolRleEncoder]
68//!   - DeltaRle [crate::strategy::DeltaRleEncoder]
69//!   - DeltaOfDelta [crate::strategy::DeltaOfDeltaEncoder]
70//! - `#[columnar(original_type="u32")]`: this attribute is used to tell the columnar encoding the original type of the field, which is used when the field is a number
71//! - `#[columnar(skip)]`: the same as the [skip](https://serde.rs/field-attrs.html#skip) attribute in serde
72//!
73
74mod err;
75
76pub use err::ColumnarError;
77use std::ops::DerefMut;
78mod column;
79pub use column::{
80    bool_rle::BoolRleColumn,
81    delta_of_delta::DeltaOfDeltaColumn,
82    delta_rle::{DeltaRleColumn, DeltaRleable},
83    rle::{RleColumn, Rleable},
84    ColumnAttr, ColumnTrait, GenericColumn,
85};
86mod columnar_internal;
87pub use columnar_internal::{ColumnarDecoder, ColumnarEncoder};
88pub mod iterable;
89mod row;
90pub use itertools::{izip, Itertools, MultiUnzip};
91pub use row::{KeyRowDe, KeyRowSer, RowDe, RowSer};
92use serde::{Deserialize, Serialize};
93mod strategy;
94pub use strategy::{
95    AnyRleDecoder, AnyRleEncoder, BoolRleDecoder, BoolRleEncoder, DeltaOfDeltaDecoder,
96    DeltaOfDeltaEncoder, DeltaRleDecoder, DeltaRleEncoder,
97};
98mod wrap;
99pub use wrap::{ColumnarMap, ColumnarVec};
100
101pub use postcard::Error as PostcardError;
102pub use serde_columnar_derive::*;
103pub mod __serde_utils;
104
105#[cfg(feature = "bench")]
106extern crate lazy_static;
107
108#[cfg(feature = "analyze")]
109mod analyze;
110#[cfg(feature = "analyze")]
111pub use analyze::{AnalyzeResult, AnalyzeResults, FieldAnalyze};
112#[cfg(feature = "analyze")]
113pub use serde_columnar_derive::FieldAnalyze;
114
115pub fn to_vec<T: Serialize>(val: &T) -> Result<Vec<u8>, ColumnarError> {
116    let mut encoder = ColumnarEncoder::new();
117    val.serialize(encoder.deref_mut())
118        .map_err(|e| ColumnarError::SerializeError(e as postcard::Error))?;
119    Ok(encoder.into_bytes())
120}
121
122pub fn from_bytes<'de, 'a: 'de, T: Deserialize<'de>>(bytes: &'a [u8]) -> Result<T, ColumnarError> {
123    let mut decoder = ColumnarDecoder::<'de>::new(bytes);
124    T::deserialize(decoder.deref_mut())
125        .map_err(|e| ColumnarError::SerializeError(e as postcard::Error))
126}
127
128pub fn iter_from_bytes<'de, T: iterable::TableIter<'de>>(
129    bytes: &'de [u8],
130) -> Result<T::Iter, ColumnarError> {
131    let mut decoder = ColumnarDecoder::<'de>::new(bytes);
132    T::Iter::deserialize(decoder.deref_mut())
133        .map_err(|e| ColumnarError::SerializeError(e as postcard::Error))
134}