1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
// Copyright 2018 Andre-Philippe Paquet
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//! Immutable persisted index (on disk) that can be built in one pass using a
//! sorted iterator, or can use [extsort](https://crates.io/crates/extsort) to externally sort the iterator first, and
//! then build the index from it.
//!
//! The index allows random lookups and sorted scans. An indexed entry consists
//! of a key and a value. The key needs to implement `Eq` and `Ord`, and both
//! the key and values need to implement a `Serializable` trait for serialization
//! to and from disk.
//!
//! The index is built using a skip list like data structure, but in which
//! lookups are starting from the end of the index instead of from the
//! beginning. This allow building the index in a single pass on a sorted
//! iterator, since starting from the beginning would require knowing
//! checkpoints/nodes ahead in the file.
//!
//! # Examples
//! ```rust
//! extern crate extindex;
//!
//! use std::io::{Read, Write};
//! use extindex::{Builder, Serializable, Entry, Reader};
//!
//! #[derive(Ord, PartialOrd, Eq, PartialEq, Debug)]
//! struct TestString(String);
//!
//! impl Serializable for TestString {
//! fn size(&self) -> Option<usize> {
//! Some(self.0.as_bytes().len())
//! }
//!
//! fn serialize<W: Write>(&self, write: &mut W) -> Result<(), std::io::Error> {
//! write.write_all(self.0.as_bytes()).map(|_| ())
//! }
//!
//! fn deserialize<R: Read>(data: &mut R, size: usize) -> Result<TestString, std::io::Error> {
//! let mut bytes = vec![0u8; size];
//! data.read_exact(&mut bytes)?;
//! Ok(TestString(String::from_utf8_lossy(&bytes).to_string()))
//! }
//! }
//!
//! let index_file = tempfile::NamedTempFile::new().unwrap();
//!
//! let builder = Builder::new(index_file.path());
//! let entries = vec![
//! Entry::new(TestString("mykey".to_string()), TestString("my value".to_string()))
//! ];
//! builder.build(entries.into_iter()).unwrap();
//!
//! let reader = Reader::<TestString, TestString>::open(index_file).unwrap();
//! assert!(reader.find(&TestString("mykey".to_string())).unwrap().is_some());
//! assert!(reader.find(&TestString("notfound".to_string())).unwrap().is_none());
//! ```
extern crate log;
pub use crate::;
pub use crateSerdeWrapper;