1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
#[cfg(feature = "profile")]
extern crate flame;

#[cfg(feature = "profile")]
#[macro_export]
macro_rules! profile {
    ($T:ty, $name:expr) => {
        // TODO: This does heap allocations.
        // Tried various things to make this const,
        // but even lazy_static runs into issues around
        // accessing the type generics.
        let _profile_guard_name = format!("{} - {}", $name, ::std::any::type_name::<$T>());
        let _profile_guard = ::flame::start_guard(_profile_guard_name);
    };
    ($name:expr) => {
        profile!(Self, $name);
    };
}

#[cfg(not(feature = "profile"))]
#[macro_export]
macro_rules! profile {
    ($($arg:tt)*) => {};
}

#[doc(hidden)]
pub mod internal;

pub mod experimental;

pub mod prelude {
    // Likely the minimum API that should go here. It's easier to add later than to remove.

    #[cfg(feature = "macros")]
    pub use tree_buf_macros::{Read, Write};

    #[cfg(feature = "read")]
    pub use crate::read;

    #[cfg(feature = "write")]
    pub use crate::write;

    // This section makes everything interesting available to the rest of the crate
    // without bothering to manage imports.
    pub(crate) use crate::{internal::error::*, internal::*};

    #[cfg(feature = "read")]
    pub(crate) type ReadResult<T> = Result<T, ReadError>;

    #[cfg(feature = "profile")]
    pub(crate) use flame;

    pub(crate) use profile;
}

#[cfg(feature = "read")]
pub use internal::error::ReadError;

#[cfg(feature = "read")]
pub use internal::Readable;

#[cfg(feature = "write")]
pub use internal::Writable;

#[cfg(feature = "write")]
pub use internal::options;

pub use crate::prelude::*;

pub use internal::Ignore;

pub fn write<T: Writable>(value: &T) -> Vec<u8> {
    let options = EncodeOptionsDefault;
    write_with_options(value, &options)
}

#[cfg(feature = "write")]
pub fn write_with_options<T: Writable>(value: &T, options: &impl EncodeOptions) -> Vec<u8> {
    profile!(T, "write_with_options");
    use internal::encodings::varint::encode_suffix_varint;

    let mut lens = Vec::new();
    let mut bytes = Vec::new();
    let mut stream = WriterStream::new(&mut bytes, &mut lens, options);
    stream.write_with_id(|stream| T::write_root(value, stream));

    for len in lens.iter().rev() {
        encode_suffix_varint(*len as u64, &mut bytes);
    }

    bytes
}

#[cfg(feature = "read")]
pub fn read<T: Readable>(bytes: &[u8]) -> ReadResult<T> {
    let options = DecodeOptionsDefault;
    read_with_options(bytes, &options)
}

#[cfg(feature = "read")]
pub fn read_with_options<T: Readable>(bytes: &[u8], options: &impl DecodeOptions) -> ReadResult<T> {
    profile!(T, "read_with_options");
    let sticks = read_root(bytes)?;
    T::read(sticks, options)
}

// TODO: Figure out recursion, at least enough to handle this: https://docs.rs/serde_json/1.0.44/serde_json/value/enum.Value.html
// TODO: Nullable should be able to handle recursion as well, even if Option doesn't. (Option<Box<T>> could though)

// See also: c94adae3-9778-4a42-a454-650a97a87483
// TODO: (Performance) When recursion is not involved, there is a maximum to the amount of schema info needed to write
//       In order to do a 1 pass write on the data yet keep all the schema at the beginning of the file one could reserve
//       the maximum amount of buffer necessary for the schema, then write the data to the primary buffer, write the schema
//       to the beginning of the primary buffer and move it to be flush with the data. Technically, the schema will be 2-pass
//       but this may be much less than the data.
//
//       If we add a special sort of Recursion(depth) RootTypeId and ArrayTypeId then the schema may have a max size even
//       with recursion. This may have the added benefit of requiring less redundancy in the schema when recursion is involved.
//       One tricky part is that each array per recursion depth requires its own length. This could be dealt with by having the
//       Recursion Branch be it's own data set with it's own schema information? Not ideal.
//
//       The crux of the matter comes down to whether we want to have each depth and path for recursion be it's own branch.
//       If the branch is shared, then this may come at a loss for clustering and schemas. Data is needed to make a decision.
//       What is recursion typically used for? There's the generic JavaScript "Value" type case. Trees. What else? How do these cluster?
//       In the generic JavaScript value case, much of the clustering info seems lost anyway.

// TODO: Evaluate TurboPFor https://github.com/powturbo/TurboPFor
// or consider the best parts of it. The core differentiator here
// is the ability to use this.

// TODO: Automatic type extraction for json:
// http://stevehanov.ca/blog/?id=104

// TODO: Add decimal type
// This seems a reasonable starting point: https://github.com/paupino/rust-decimal
//
// TODO: Consider RLE encoding and Dictionary encoding as pre-processors