tree_buf/lib.rs
1#[doc(hidden)]
2pub mod internal;
3
4pub mod experimental;
5
6pub mod prelude {
7 // Likely the minimum API that should go here. It's easier to add later than to remove.
8
9 #[cfg(feature = "macros")]
10 pub use tree_buf_macros::{Decode, Encode};
11
12 #[cfg(feature = "decode")]
13 pub use crate::decode;
14
15 #[cfg(feature = "encode")]
16 pub use crate::encode;
17
18 // This section makes everything interesting available to the rest of the crate
19 // without bothering to manage imports.
20 pub(crate) use crate::internal::encodings::varint::size_for_varint;
21 pub(crate) use crate::{internal::error::*, internal::*};
22
23 #[cfg(feature = "decode")]
24 pub(crate) type DecodeResult<T> = Result<T, DecodeError>;
25
26 pub(crate) use firestorm::{profile_fn, profile_method, profile_section};
27}
28
29#[cfg(feature = "decode")]
30pub use internal::error::DecodeError;
31
32#[cfg(feature = "decode")]
33pub use internal::Decodable;
34
35#[cfg(feature = "encode")]
36pub use internal::Encodable;
37
38pub use internal::options;
39
40pub use crate::prelude::*;
41
42pub use internal::Ignore;
43
44pub fn encode<T: Encodable>(value: &T) -> Vec<u8> {
45 let options = EncodeOptionsDefault;
46 encode_with_options(value, &options)
47}
48
49#[cfg(feature = "encode")]
50pub fn encode_with_options<T: Encodable>(value: &T, options: &impl EncodeOptions) -> Vec<u8> {
51 profile_fn!(encode_with_options);
52 use internal::encodings::varint::encode_suffix_varint;
53
54 let mut lens = Vec::new();
55 let mut bytes = Vec::new();
56 let mut stream = EncoderStream::new(&mut bytes, &mut lens, options);
57 stream.encode_with_id(|stream| T::encode_root(value, stream));
58
59 for len in lens.iter().rev() {
60 encode_suffix_varint(*len as u64, &mut bytes);
61 }
62
63 bytes
64}
65
66#[cfg(feature = "decode")]
67pub fn decode<T: Decodable>(bytes: &[u8]) -> DecodeResult<T> {
68 let options = DecodeOptionsDefault;
69 decode_with_options(bytes, &options)
70}
71
72#[cfg(feature = "decode")]
73pub fn decode_with_options<T: Decodable>(bytes: &[u8], options: &impl DecodeOptions) -> DecodeResult<T> {
74 profile_fn!(T, decode_with_options);
75 let sticks = decode_root(bytes)?;
76 T::decode(sticks, options)
77}
78
79// TODO: Figure out recursion, at least enough to handle this: https://docs.rs/serde_json/1.0.44/serde_json/value/enum.Value.html
80// TODO: Nullable should be able to handle recursion as well, even if Option doesn't. (Option<Box<T>> could though)
81
82// See also: c94adae3-9778-4a42-a454-650a97a87483
83// TODO: (Performance) When recursion is not involved, there is a maximum to the amount of schema info needed to encode
84// In order to do a 1 pass encode on the data yet keep all the schema at the beginning of the file one could reserve
85// the maximum amount of buffer necessary for the schema, then encode the data to the primary buffer, encode the schema
86// to the beginning of the primary buffer and move it to be flush with the data. Technically, the schema will be 2-pass
87// but this may be much less than the data.
88//
89// If we add a special sort of Recursion(depth) RootTypeId and ArrayTypeId then the schema may have a max size even
90// with recursion. This may have the added benefit of requiring less redundancy in the schema when recursion is involved.
91// One tricky part is that each array per recursion depth requires its own length. This could be dealt with by having the
92// Recursion Branch be it's own data set with it's own schema information? Not ideal.
93//
94// The crux of the matter comes down to whether we want to have each depth and path for recursion be it's own branch.
95// If the branch is shared, then this may come at a loss for clustering and schemas. Data is needed to make a decision.
96// What is recursion typically used for? There's the generic JavaScript "Value" type case. Trees. What else? How do these cluster?
97// In the generic JavaScript value case, much of the clustering info seems lost anyway.
98
99// TODO: Evaluate TurboPFor https://github.com/powturbo/TurboPFor
100// or consider the best parts of it. The core differentiator here
101// is the ability to use this.
102
103// TODO: Automatic type extraction for json:
104// http://stevehanov.ca/blog/?id=104
105
106// TODO: Add decimal type
107// This seems a reasonable starting point: https://github.com/paupino/rust-decimal
108//
109// TODO: Look at Apache Arrow Flight, Parquet, Avro
110// TODO: Look at bincode
111// TODO: Look at HDF5
112// TODO: Look at zebra +
113// TODO: Read this https://en.wikipedia.org/wiki/Column-oriented_DBMS#Column-oriented_systems