victor_db/lib.rs
1//! A browser-optimized vector database. Backed by the private virtual filesystem API on web.
2//!
3//! You're viewing this on crates.io, so you're probably interested in the native version. The native version supports running with the native filesystem or in memory.
4//!
5//! If you want to use it on the web, [check out victor-db on npm](https://www.npmjs.com/package/victor-db).
6//!
7//! ## In-memory database
8//!
9//! Use this if you want to run victor in-memory (all data is lost when the program exits).
10//!
11//! The in-memory version is useful for testing and applications where you don't need to persist data:
12//! ```rust
13//! # tokio_test::block_on(async {
14//! // use victor_db::memory for the in-memory implementation
15//! use victor_db::memory::{Db, DirectoryHandle};
16//!
17//! // create a new in-memory database
18//! let mut victor = Db::new(DirectoryHandle::default());
19//!
20//! // add some embeddings to the database
21//! victor
22//! .add(
23//! vec!["Pineapple", "Rocks"], // documents
24//! vec!["Pizza Toppings"], // tags (only used for filtering)
25//! )
26//! .await;
27//!
28//! // add another embedding to the database, this time with no tags
29//! victor.add_single("Cheese pizza", vec!["Pizza Flavors"]).await;
30//!
31//! // read the 10 closest results from victor that are tagged with "Pizza Toppings"
32//! // (only 2 will be returned because we only inserted two embeddings)
33//! let nearest = victor
34//! .search("Hawaiian pizza", vec!["Pizza Toppings"], 10)
35//! .await
36//! .first()
37//! .unwrap()
38//! .content
39//! .clone();
40//! assert_eq!(nearest, "Pineapple".to_string());
41//!
42//! // Clear the database
43//! victor.clear_db().await.unwrap();
44//! # })
45//! ```
46//!
47//! ## Native database
48//!
49//! Use this if you want to persist your database to disk.
50//!
51//! ```rust
52//! # tokio_test::block_on(async {
53//! // use victor_db::native for the native filesystem implementation
54//! use victor_db::native::Db;
55//! use std::path::PathBuf;
56//!
57//! // create a new native database under "./victor_test_data"
58//! let _ = std::fs::create_dir("./victor_test_data");
59//! let mut victor = Db::new(PathBuf::from("./victor_test_data"));
60//!
61//! // add some embeddings to the database
62//! victor
63//! .add(
64//! vec!["Pineapple", "Rocks"], // documents
65//! vec!["Pizza Toppings"], // tags (only used for filtering)
66//! )
67//! .await;
68//!
69//! // add another embedding to the database, this time with no tags
70//! victor.add_single("Cheese pizza", vec!["Pizza Flavors"]).await;
71//!
72//! // read the 10 closest results from victor that are tagged with "Pizza Toppings"
73//! // (only 2 will be returned because we only inserted two embeddings)
74//! let nearest = victor
75//! .search("Hawaiian pizza", vec!["Pizza Toppings"], 10)
76//! .await
77//! .first()
78//! .unwrap()
79//! .content
80//! .clone();
81//! assert_eq!(nearest, "Pineapple".to_string());
82//!
83//! // Clear the database
84//! victor.clear_db().await.unwrap();
85//! # })
86//! ```
87//!
88//! See the docs for [`Victor`] for more information.
89
90#![deny(missing_docs)]
91
92mod db;
93mod decomposition;
94mod filesystem;
95mod packed_vector;
96mod similarity;
97mod utils;
98
99#[cfg(not(target_arch = "wasm32"))]
100pub use db::Victor;
101
102#[cfg(test)]
103mod tests;
104
105#[cfg(target_arch = "wasm32")]
106use {
107 wasm_bindgen::prelude::*, wasm_bindgen_futures::JsFuture, web_sys::FileSystemDirectoryHandle,
108};
109
110#[cfg(target_arch = "wasm32")]
111type Victor = crate::db::Victor<filesystem::web::DirectoryHandle>;
112
113// Native
114
115/// Victor's native filesystem implementation.
116///
117/// Use this if you want to persist your database to disk.
118#[cfg(not(target_arch = "wasm32"))]
119pub mod native {
120 use crate::db::Victor;
121
122 /// A native vector database.
123 pub type Db = Victor<crate::filesystem::native::DirectoryHandle>;
124}
125
126/// Victor's in-memory implementation.
127///
128/// Use this if you want to run victor in-memory (all data is lost when the program exits).
129#[cfg(not(target_arch = "wasm32"))]
130pub mod memory {
131 use crate::db::Victor;
132
133 /// The directory handle type for the in-memory filesystem.
134 pub use crate::filesystem::memory::DirectoryHandle;
135
136 /// An in-memory vector database.
137 pub type Db = Victor<DirectoryHandle>;
138}
139
140// Wasm
141
142#[cfg(target_arch = "wasm32")]
143#[allow(unused_macros)]
144macro_rules! console_log {
145 ($($t:tt)*) => (log(&format_args!($($t)*).to_string()))
146}
147
148#[cfg(target_arch = "wasm32")]
149#[allow(unused_macros)]
150macro_rules! console_warn {
151 ($($t:tt)*) => (warn(&format_args!($($t)*).to_string()))
152}
153#[cfg(target_arch = "wasm32")]
154#[wasm_bindgen]
155extern "C" {
156 #[wasm_bindgen(js_namespace = console)]
157 fn log(s: &str);
158 #[wasm_bindgen(js_namespace = console)]
159 fn warn(s: &str);
160}
161
162/// A browser-optimized vector database.
163#[cfg(target_arch = "wasm32")]
164#[wasm_bindgen]
165pub struct Db {
166 victor: crate::db::Victor<filesystem::web::DirectoryHandle>,
167}
168
169#[cfg(target_arch = "wasm32")]
170#[wasm_bindgen]
171impl Db {
172 /// Connect to victor.
173 #[wasm_bindgen(constructor)]
174 pub async fn new() -> Self {
175 utils::set_panic_hook();
176
177 let window = web_sys::window().ok_or(JsValue::NULL).unwrap();
178 let navigator = window.navigator();
179 let file_system_directory_handle = FileSystemDirectoryHandle::from(
180 JsFuture::from(navigator.storage().get_directory())
181 .await
182 .unwrap(),
183 );
184
185 let victor = Victor::new(file_system_directory_handle);
186
187 Self { victor }
188 }
189
190 /// Add a document to the database.
191 pub async fn insert(&mut self, content: &str, embedding: &[f64], tags: Option<Vec<JsValue>>) {
192 let embedding = embedding.iter().map(|x| *x as f32).collect::<Vec<_>>();
193
194 let tags = tags
195 .map(|tags| {
196 tags.into_iter()
197 .map(|x| x.as_string().unwrap())
198 .collect::<Vec<_>>()
199 })
200 .unwrap_or(vec![]);
201
202 self.victor
203 .add_single_embedding(content, embedding, tags)
204 .await;
205 }
206
207 /// Search the database for the nearest neighbors to a given embedding.
208 pub async fn search(
209 &mut self,
210 embedding: &[f64],
211 tags: Option<Vec<JsValue>>,
212 top_n: Option<f64>,
213 ) -> JsValue {
214 let embedding = embedding.iter().map(|x| *x as f32).collect::<Vec<_>>();
215
216 let tags = tags
217 .map(|tags| {
218 tags.into_iter()
219 .map(|x| x.as_string().unwrap())
220 .collect::<Vec<_>>()
221 })
222 .unwrap_or(vec![]);
223
224 let nearest_neighbors = self
225 .victor
226 .search_embedding(embedding, tags, top_n.unwrap_or(10.0) as u32)
227 .await;
228
229 serde_wasm_bindgen::to_value(&nearest_neighbors).unwrap()
230 }
231
232 /// Clear the database, permanently removing all data.
233 pub async fn clear(&mut self) {
234 utils::set_panic_hook();
235
236 let result = self.victor.clear_db().await; // ignore the error if there is one
237 if !result.is_ok() {
238 console_warn!("Failed to clear victor data: {:?}", result);
239 }
240 }
241}