1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
#![doc(
    html_favicon_url = "https://raw.githubusercontent.com/meilisearch/grenad/main/assets/grenad-pomegranate.ico?raw=true"
)]
#![doc(
    html_logo_url = "https://raw.githubusercontent.com/meilisearch/grenad/main/assets/grenad-pomegranate-logo.png?raw=true"
)]

//! This library provides tools to sort, merge, write, and read immutable key-value pairs.
//! The entries in the grenad files are _immutable_ and the only way to modify them is by _creating
//! a new file_ with the changes.
//!
//! # Features
//!
//! You can define which compression schemes to support, there are currently a few
//! available choices, these determine which types will be available inside the above modules:
//!
//! - _Snappy_ with the [`snap`](https://crates.io/crates/snap) crate.
//! - _Zlib_ with the [`flate2`](https://crates.io/crates/flate2) crate.
//! - _Lz4_ with the [`lz4_flex`](https://crates.io/crates/lz4_flex) crate.
//!
//! If you need more performances you can enable the `rayon` feature that will enable a bunch
//! of new settings like being able to make the `Sorter` sort in parallel.
//!
//! # Examples
//!
//! ## Use the `Writer` and `Reader` structs
//!
//! You can use the [`Writer`] struct to store key-value pairs into the specified
//! [`std::io::Write`] type. The [`Reader`] type can then be used to read the entries.
//!
//! The entries provided to the [`Writer`] struct must be given in lexicographic order.
//!
//! ```rust
//! use std::io::Cursor;
//!
//! use grenad::{Reader, Writer};
//!
//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
//! let mut writer = Writer::memory();
//!
//! // We insert our key-value pairs in lexicographic order.
//! writer.insert("first-counter", 119_u32.to_ne_bytes())?;
//! writer.insert("second-counter", 384_u32.to_ne_bytes())?;
//!
//! // We create a reader from our writer.
//! let cursor = writer.into_inner().map(Cursor::new)?;
//! let mut cursor = Reader::new(cursor)?.into_cursor()?;
//!
//! // We can see that the sum of u32s is valid here.
//! assert_eq!(cursor.move_on_next()?, Some((&b"first-counter"[..], &119_u32.to_ne_bytes()[..])));
//! assert_eq!(cursor.move_on_next()?, Some((&b"second-counter"[..], &384_u32.to_ne_bytes()[..])));
//! assert_eq!(cursor.move_on_next()?, None);
//!
//! // We can also jum on any given entry.
//! assert_eq!(cursor.move_on_key_greater_than_or_equal_to("first")?, Some((&b"first-counter"[..], &119_u32.to_ne_bytes()[..])));
//! assert_eq!(cursor.move_on_key_equal_to("second-counter")?, Some((&b"second-counter"[..], &384_u32.to_ne_bytes()[..])));
//! assert_eq!(cursor.move_on_key_lower_than_or_equal_to("abracadabra")?, None);
//! # Ok(()) }
//! ```
//!
//! ## Use the `Merger` struct
//!
//! In this example we show how you can merge multiple [`Reader`]s
//! by using a _merge function_ when a conflict is encountered.
//!
//! The entries yielded by the [`Merger`] struct are returned in lexicographic order,
//! a good way to write them back into a new [`Writer`].
//!
//! ```rust
//! use std::array::TryFromSliceError;
//! use std::borrow::Cow;
//! use std::convert::TryInto;
//! use std::io::Cursor;
//!
//! use grenad::{MergerBuilder, Reader, Writer};
//!
//! // This merge function:
//! //  - parses u32s from native-endian bytes,
//! //  - wrapping sums them and,
//! //  - outputs the result as native-endian bytes.
//! fn wrapping_sum_u32s<'a>(
//!  _key: &[u8],
//!  values: &[Cow<'a, [u8]>],
//! ) -> Result<Cow<'a, [u8]>, TryFromSliceError>
//! {
//!     let mut output: u32 = 0;
//!     for bytes in values.iter().map(AsRef::as_ref) {
//!         let num = bytes.try_into().map(u32::from_ne_bytes)?;
//!         output = output.wrapping_add(num);
//!     }
//!     Ok(Cow::Owned(output.to_ne_bytes().to_vec()))
//! }
//!
//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
//! // We create our writers in memory to insert our key-value pairs.
//! let mut writera = Writer::memory();
//! let mut writerb = Writer::memory();
//! let mut writerc = Writer::memory();
//!
//! // We insert our key-value pairs in lexicographic order
//! // and mix them between our writers.
//! writera.insert("first-counter", 32_u32.to_ne_bytes())?;
//! writera.insert("second-counter", 64_u32.to_ne_bytes())?;
//! writerb.insert("first-counter", 23_u32.to_ne_bytes())?;
//! writerb.insert("second-counter", 320_u32.to_ne_bytes())?;
//! writerc.insert("first-counter", 64_u32.to_ne_bytes())?;
//!
//! // We create readers from our writers.
//! let cursora = writera.into_inner().map(Cursor::new)?;
//! let cursorb = writerb.into_inner().map(Cursor::new)?;
//! let cursorc = writerc.into_inner().map(Cursor::new)?;
//! let readera = Reader::new(cursora)?.into_cursor()?;
//! let readerb = Reader::new(cursorb)?.into_cursor()?;
//! let readerc = Reader::new(cursorc)?.into_cursor()?;
//!
//! // We create a merger that will sum our u32s when necessary,
//! // and we add our readers to the list of readers to merge.
//! let merger_builder = MergerBuilder::new(wrapping_sum_u32s);
//! let merger = merger_builder.add(readera).add(readerb).add(readerc).build();
//!
//! // We can iterate over the entries in key-order.
//! let mut iter = merger.into_stream_merger_iter()?;
//!
//! // We can see that the sum of u32s is valid here.
//! assert_eq!(iter.next()?, Some((&b"first-counter"[..], &119_u32.to_ne_bytes()[..])));
//! assert_eq!(iter.next()?, Some((&b"second-counter"[..], &384_u32.to_ne_bytes()[..])));
//! assert_eq!(iter.next()?, None);
//! # Ok(()) }
//! ```
//!
//! ## Use the `Sorter` struct
//!
//! In this example we show how by defining a _merge function_, we can insert
//! multiple entries with the same key and output them in lexicographic order.
//!
//! The [`Sorter`] accepts the entries in any given order, will reorder them in-memory and
//! merge them with the _merge function_ when required. It is authorized to have a memory budget
//! during its construction and will try to follow it as closely as possible.
//!
//! ```rust
//! use std::array::TryFromSliceError;
//! use std::borrow::Cow;
//! use std::convert::TryInto;
//!
//! use grenad::{CursorVec, SorterBuilder};
//!
//! // This merge function:
//! //  - parses u32s from native-endian bytes,
//! //  - wrapping sums them and,
//! //  - outputs the result as native-endian bytes.
//! fn wrapping_sum_u32s<'a>(
//!  _key: &[u8],
//!  values: &[Cow<'a, [u8]>],
//! ) -> Result<Cow<'a, [u8]>, TryFromSliceError>
//! {
//!     let mut output: u32 = 0;
//!     for bytes in values.iter().map(AsRef::as_ref) {
//!         let num = bytes.try_into().map(u32::from_ne_bytes)?;
//!         output = output.wrapping_add(num);
//!     }
//!     Ok(Cow::Owned(output.to_ne_bytes().to_vec()))
//! }
//!
//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
//! // We create a sorter that will sum our u32s when necessary.
//! let mut sorter = SorterBuilder::new(wrapping_sum_u32s).chunk_creator(CursorVec).build();
//!
//! // We insert multiple entries with the same key but different values
//! // in arbitrary order, the sorter will take care of merging them for us.
//! sorter.insert("first-counter", 32_u32.to_ne_bytes())?;
//! sorter.insert("first-counter", 23_u32.to_ne_bytes())?;
//! sorter.insert("first-counter", 64_u32.to_ne_bytes())?;
//!
//! sorter.insert("second-counter", 320_u32.to_ne_bytes())?;
//! sorter.insert("second-counter", 64_u32.to_ne_bytes())?;
//!
//! // We can iterate over the entries in key-order.
//! let mut iter = sorter.into_stream_merger_iter()?;
//!
//! // We can see that the sum of u32s is valid here.
//! assert_eq!(iter.next()?, Some((&b"first-counter"[..], &119_u32.to_ne_bytes()[..])));
//! assert_eq!(iter.next()?, Some((&b"second-counter"[..], &384_u32.to_ne_bytes()[..])));
//! assert_eq!(iter.next()?, None);
//! # Ok(()) }
//! ```

#[cfg(test)]
#[macro_use]
extern crate quickcheck;

use std::mem;

mod block;
mod block_writer;
mod compression;
mod count_write;
mod error;
mod merger;
mod metadata;
mod reader;
mod sorter;
mod varint;
mod writer;

pub use self::compression::CompressionType;
pub use self::error::Error;
pub use self::merger::{Merger, MergerBuilder, MergerIter};
pub use self::metadata::FileVersion;
pub use self::reader::{PrefixIter, RangeIter, Reader, ReaderCursor, RevPrefixIter, RevRangeIter};
#[cfg(feature = "tempfile")]
pub use self::sorter::TempFileChunk;
pub use self::sorter::{
    ChunkCreator, CursorVec, DefaultChunkCreator, SortAlgorithm, Sorter, SorterBuilder,
};
pub use self::writer::{Writer, WriterBuilder};

/// Sometimes we need to use an unsafe trick to make the compiler happy.
/// You can read more about the issue [on the Rust's Github issues].
///
/// [on the Rust's Github issues]: https://github.com/rust-lang/rust/issues/47680
unsafe fn transmute_entry_to_static(key: &[u8], val: &[u8]) -> (&'static [u8], &'static [u8]) {
    (mem::transmute(key), mem::transmute(val))
}