1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
//! Hannoy is a key-value backed [HNSW][1] implementation based on [arroy][2].
//!
//! Many popular HNSW libraries are built in memory, meaning you need enough RAM to store all the vectors you're indexing. Instead, `hannoy` uses
//! [LMDB][3] — a memory-mapped KV store — as a storage backend.
//!
//! This is more well-suited for machines running multiple programs, or cases where the
//! dataset you're indexing won't fit in memory. LMDB also supports non-blocking concurrent reads by design, meaning its safe to query the index in
//! multi-threaded environments.
//!
//! [1]: https://www.pinecone.io/learn/series/faiss/hnsw/
//! [2]: https://github.com/meilisearch/arroy
//! [3]: https://en.wikipedia.org/wiki/Lightning_Memory-Mapped_Database
//!
//! # Examples
//!
//! Open an LMDB database, store some vectors in it and query the nearest item from some query vector. This is the most
//! trivial way to use hannoy and it's fairly easy. Just do not forget to [`HannoyBuilder::build<M0,M>`] and [`heed::RwTxn::commit`]
//! when you are done inserting your items.
//!
//! ```rust
//! use hannoy::{distances::Cosine, Database, Reader, Result, Writer};
//! use heed::EnvOpenOptions;
//! use rand::{rngs::StdRng, SeedableRng};
//!
//! fn main() -> Result<()> {
//! const DIM: usize = 3;
//! let vecs: Vec<[f32; DIM]> = vec![[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]];
//!
//! let env = unsafe {
//! EnvOpenOptions::new()
//! .map_size(1024 * 1024 * 1024 * 1) // 1GiB
//! .open("./")
//! }
//! .unwrap();
//!
//! let mut wtxn = env.write_txn().unwrap();
//! let db: Database<Cosine> = env.create_database(&mut wtxn, None)?;
//! let writer: Writer<Cosine> = Writer::new(db, 0, DIM);
//!
//! // insert into lmdb
//! writer.add_item(&mut wtxn, 0, &vecs[0])?;
//! writer.add_item(&mut wtxn, 1, &vecs[1])?;
//! writer.add_item(&mut wtxn, 2, &vecs[2])?;
//!
//! // ...and build hnsw
//! let mut rng = StdRng::seed_from_u64(42);
//!
//! let mut builder = writer.builder(&mut rng);
//! builder.ef_construction(100).build::<16,32>(&mut wtxn)?;
//! wtxn.commit()?;
//!
//! // search hnsw using a new lmdb read transaction
//! let rtxn = env.read_txn()?;
//! let reader = Reader::<Cosine>::open(&rtxn, 0, db)?;
//!
//! let query = vec![0.0, 1.0, 0.0];
//! let nns = reader.nns(1).ef_search(10).by_vector(&rtxn, &query)?;
//!
//! dbg!("{:?}", &nns);
//! Ok(())
//! }
//! ```
pub use Distance;
pub use Error;
use ;
use ;
use ;
use ;
pub use ;
pub use RoaringBitmapCodec;
pub use ;
/// The set of types used by the [`Distance`] trait.
/// The set of distances implementing the [`Distance`] and supported by hannoy.
/// A custom Result type that is returning an hannoy error by default.
pub type Result<T, E = Error> = Result;
/// The database required by hannoy for reading or writing operations.
pub type Database<D> = Database;
/// An identifier for the items stored in the database.
pub type ItemId = u32;
/// An indentifier for the links of the hnsw. We can guarantee mathematically there will always be
/// less than 256 layers.
pub type LayerId = u8;
/// The number of iterations to process before checking if the indexing process should be cancelled.
const CANCELLATION_PROBING: usize = 10000;