serde_columnar/lib.rs
1//! # Introduction
2//!
3//! `serde_columnar` is a crate that provides columnar storage for **List** and **Map** with compressible serialization and deserialization capabilities.
4//!
5//! Columnar storage is very useful when you want to compress serialized data and you know that one or more fields of consecutive structs in the array have the same or equal difference values.
6//!
7//! For example, you want to store this array:
8//!
9//! ```
10//! [{a: 1, b: 1}, {a: 1, b: 2}, {a: 1, b: 3}, ...]
11//! ```
12//! After columnar storage, it can be stored as:
13//!
14//! ```
15//! a: [1, 1, 1,...] ---Rle---> [N, 1]
16//! b: [1, 2, 3,...] ---DeltaRle---> [N, 1] (each value is 1 greater than the previous one)
17//! ```
18//!
19//! # Usage
20//!
21//! ```rust ignore
22//! type ID = u64;
23//! #[columnar(vec, ser, de)]
24//! #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
25//! pub struct Data {
26//! #[columnar(strategy = "Rle")]
27//! num: u32,
28//! #[columnar(strategy = "DeltaRle", original_type = "u64")]
29//! id: ID,
30//! #[columnar(strategy = "Rle")]
31//! gender: String,
32//! #[columnar(strategy = "BoolRle")]
33//! married: bool
34//! #[columnar(strategy = "DeltaOfDelta")]
35//! time: i64
36//! }
37//!
38//! #[columnar]
39//! #[derive(Debug, Serialize, Deserialize)]
40//! pub struct VecStore {
41//! #[columnar(type = "vec")]
42//! pub data: Vec<Data>
43//! }
44//!
45//!
46//! let store = VecStore::new(...);
47//! let bytes = serde_columnar::to_vec(&store).unwrap();
48//! let store = serde_columnar::from_bytes::<VecStore>(&bytes).unwrap();
49//!
50//! ```
51//!
52//! # More Details
53//!
54//! ## Container
55//!
56//! - `#[columnar]` means that some fields (marked by `#[columnar(type = "vec"|"map")]`) of this structure can be serialized and deserialized by columnar encoding
57//! - `#[columnar(vec, map)]` means the struct can be a row inside `Vec-like` or `Map-like`
58//! - `#[columnar(ser, de)]` means the struct can be serialized or deserialized or both by columnar encoding
59//!
60//! ## Field Attributes
61//!
62//! - `#[columnar(type = "vec"|"map")]`:
63//! - vec means the decorated field T is a container, holds Value and satisfies `&T: IntoIter<Item=&Value>` `T: FromIterator<Value>`
64//! - map means the decorated field T is a container, holds Value and satisfies `&T: IntoIter<Item=(&K, &Value)>` `T: FromIterator<(K, Value)>`
65//! - `#[columnar(strategy = "Rle"|"BoolRle"|"DeltaRle"|"DeltaOfDelta")]`: You can only choose one from
66//! - Rle [crate::strategy::AnyRleEncoder]
67//! - BoolRle [crate::strategy::BoolRleEncoder]
68//! - DeltaRle [crate::strategy::DeltaRleEncoder]
69//! - DeltaOfDelta [crate::strategy::DeltaOfDeltaEncoder]
70//! - `#[columnar(original_type="u32")]`: this attribute is used to tell the columnar encoding the original type of the field, which is used when the field is a number
71//! - `#[columnar(skip)]`: the same as the [skip](https://serde.rs/field-attrs.html#skip) attribute in serde
72//!
73
74mod err;
75
76pub use err::ColumnarError;
77use std::ops::DerefMut;
78mod column;
79pub use column::{
80 bool_rle::BoolRleColumn,
81 delta_of_delta::DeltaOfDeltaColumn,
82 delta_rle::{DeltaRleColumn, DeltaRleable},
83 rle::{RleColumn, Rleable},
84 ColumnAttr, ColumnTrait, GenericColumn,
85};
86mod columnar_internal;
87pub use columnar_internal::{ColumnarDecoder, ColumnarEncoder};
88pub mod iterable;
89mod row;
90pub use itertools::{izip, Itertools, MultiUnzip};
91pub use row::{KeyRowDe, KeyRowSer, RowDe, RowSer};
92use serde::{Deserialize, Serialize};
93mod strategy;
94pub use strategy::{
95 AnyRleDecoder, AnyRleEncoder, BoolRleDecoder, BoolRleEncoder, DeltaOfDeltaDecoder,
96 DeltaOfDeltaEncoder, DeltaRleDecoder, DeltaRleEncoder, Strategy,
97};
98mod wrap;
99pub use wrap::{ColumnarMap, ColumnarVec};
100
101pub use postcard::Error as PostcardError;
102pub use serde_columnar_derive::*;
103
104#[cfg(feature = "bench")]
105extern crate lazy_static;
106
107#[cfg(feature = "analyze")]
108mod analyze;
109#[cfg(feature = "analyze")]
110pub use analyze::{AnalyzeResult, AnalyzeResults, FieldAnalyze};
111#[cfg(feature = "analyze")]
112pub use serde_columnar_derive::FieldAnalyze;
113
114pub fn to_vec<T: Serialize>(val: &T) -> Result<Vec<u8>, ColumnarError> {
115 let mut encoder = ColumnarEncoder::new();
116 val.serialize(encoder.deref_mut())
117 .map_err(|e| ColumnarError::SerializeError(e as postcard::Error))?;
118 Ok(encoder.into_bytes())
119}
120
121pub fn from_bytes<'de, 'a: 'de, T: Deserialize<'de>>(bytes: &'a [u8]) -> Result<T, ColumnarError> {
122 let mut decoder = ColumnarDecoder::<'de>::new(bytes);
123 T::deserialize(decoder.deref_mut())
124 .map_err(|e| ColumnarError::SerializeError(e as postcard::Error))
125}
126
127pub fn iter_from_bytes<'de, T: iterable::TableIter<'de>>(
128 bytes: &'de [u8],
129) -> Result<T::Iter, ColumnarError> {
130 let mut decoder = ColumnarDecoder::<'de>::new(bytes);
131 T::Iter::deserialize(decoder.deref_mut())
132 .map_err(|e| ColumnarError::SerializeError(e as postcard::Error))
133}