victor_db/
lib.rs

1//! A browser-optimized vector database. Backed by the private virtual filesystem API on web.
2//!
3//! You're viewing this on crates.io, so you're probably interested in the native version. The native version supports running with the native filesystem or in memory.
4//!
5//! If you want to use it on the web, [check out victor-db on npm](https://www.npmjs.com/package/victor-db).
6//!
7//! ## In-memory database
8//!
9//! Use this if you want to run victor in-memory (all data is lost when the program exits).
10//!
11//! The in-memory version is useful for testing and applications where you don't need to persist data:
12//! ```rust
13//! # tokio_test::block_on(async {
14//! // use victor_db::memory for the in-memory implementation
15//! use victor_db::memory::{Db, DirectoryHandle};
16//!
17//! // create a new in-memory database
18//! let mut victor = Db::new(DirectoryHandle::default());
19//!
20//! // add some embeddings to the database
21//! victor
22//!     .add(
23//!         vec!["Pineapple", "Rocks"], // documents
24//!         vec!["Pizza Toppings"],     // tags (only used for filtering)
25//!     )
26//!     .await;
27//!
28//! // add another embedding to the database, this time with no tags
29//! victor.add_single("Cheese pizza", vec!["Pizza Flavors"]).await;
30//!
31//! // read the 10 closest results from victor that are tagged with "Pizza Toppings"
32//! // (only 2 will be returned because we only inserted two embeddings)
33//! let nearest = victor
34//!     .search("Hawaiian pizza", vec!["Pizza Toppings"], 10)
35//!     .await
36//!     .first()
37//!     .unwrap()
38//!     .content
39//!     .clone();
40//! assert_eq!(nearest, "Pineapple".to_string());
41//!
42//! // Clear the database
43//! victor.clear_db().await.unwrap();
44//! # })
45//! ```
46//!
47//! ## Native database
48//!
49//! Use this if you want to persist your database to disk.
50//!
51//! ```rust
52//! # tokio_test::block_on(async {
53//! // use victor_db::native for the native filesystem implementation
54//! use victor_db::native::Db;
55//! use std::path::PathBuf;
56//!
57//! // create a new native database under "./victor_test_data"
58//! let _ = std::fs::create_dir("./victor_test_data");
59//! let mut victor = Db::new(PathBuf::from("./victor_test_data"));
60//!
61//! // add some embeddings to the database
62//! victor
63//!     .add(
64//!         vec!["Pineapple", "Rocks"], // documents
65//!         vec!["Pizza Toppings"],     // tags (only used for filtering)
66//!     )
67//!     .await;
68//!
69//! // add another embedding to the database, this time with no tags
70//! victor.add_single("Cheese pizza", vec!["Pizza Flavors"]).await;
71//!
72//! // read the 10 closest results from victor that are tagged with "Pizza Toppings"
73//! // (only 2 will be returned because we only inserted two embeddings)
74//! let nearest = victor
75//!     .search("Hawaiian pizza", vec!["Pizza Toppings"], 10)
76//!     .await
77//!     .first()
78//!     .unwrap()
79//!     .content
80//!     .clone();
81//! assert_eq!(nearest, "Pineapple".to_string());
82//!
83//! // Clear the database
84//! victor.clear_db().await.unwrap();
85//! # })
86//! ```
87//!
88//! See the docs for [`Victor`] for more information.
89
90#![deny(missing_docs)]
91
92mod db;
93mod decomposition;
94mod filesystem;
95mod packed_vector;
96mod similarity;
97mod utils;
98
99#[cfg(not(target_arch = "wasm32"))]
100pub use db::Victor;
101
102#[cfg(test)]
103mod tests;
104
105#[cfg(target_arch = "wasm32")]
106use {
107    wasm_bindgen::prelude::*, wasm_bindgen_futures::JsFuture, web_sys::FileSystemDirectoryHandle,
108};
109
110#[cfg(target_arch = "wasm32")]
111type Victor = crate::db::Victor<filesystem::web::DirectoryHandle>;
112
113// Native
114
115/// Victor's native filesystem implementation.
116///
117/// Use this if you want to persist your database to disk.
118#[cfg(not(target_arch = "wasm32"))]
119pub mod native {
120    use crate::db::Victor;
121
122    /// A native vector database.
123    pub type Db = Victor<crate::filesystem::native::DirectoryHandle>;
124}
125
126/// Victor's in-memory implementation.
127///
128/// Use this if you want to run victor in-memory (all data is lost when the program exits).
129#[cfg(not(target_arch = "wasm32"))]
130pub mod memory {
131    use crate::db::Victor;
132
133    /// The directory handle type for the in-memory filesystem.
134    pub use crate::filesystem::memory::DirectoryHandle;
135
136    /// An in-memory vector database.
137    pub type Db = Victor<DirectoryHandle>;
138}
139
140// Wasm
141
142#[cfg(target_arch = "wasm32")]
143#[allow(unused_macros)]
144macro_rules! console_log {
145    ($($t:tt)*) => (log(&format_args!($($t)*).to_string()))
146}
147
148#[cfg(target_arch = "wasm32")]
149#[allow(unused_macros)]
150macro_rules! console_warn {
151    ($($t:tt)*) => (warn(&format_args!($($t)*).to_string()))
152}
153#[cfg(target_arch = "wasm32")]
154#[wasm_bindgen]
155extern "C" {
156    #[wasm_bindgen(js_namespace = console)]
157    fn log(s: &str);
158    #[wasm_bindgen(js_namespace = console)]
159    fn warn(s: &str);
160}
161
162/// A browser-optimized vector database.
163#[cfg(target_arch = "wasm32")]
164#[wasm_bindgen]
165pub struct Db {
166    victor: crate::db::Victor<filesystem::web::DirectoryHandle>,
167}
168
169#[cfg(target_arch = "wasm32")]
170#[wasm_bindgen]
171impl Db {
172    /// Connect to victor.
173    #[wasm_bindgen(constructor)]
174    pub async fn new() -> Self {
175        utils::set_panic_hook();
176
177        let window = web_sys::window().ok_or(JsValue::NULL).unwrap();
178        let navigator = window.navigator();
179        let file_system_directory_handle = FileSystemDirectoryHandle::from(
180            JsFuture::from(navigator.storage().get_directory())
181                .await
182                .unwrap(),
183        );
184
185        let victor = Victor::new(file_system_directory_handle);
186
187        Self { victor }
188    }
189
190    /// Add a document to the database.
191    pub async fn insert(&mut self, content: &str, embedding: &[f64], tags: Option<Vec<JsValue>>) {
192        let embedding = embedding.iter().map(|x| *x as f32).collect::<Vec<_>>();
193
194        let tags = tags
195            .map(|tags| {
196                tags.into_iter()
197                    .map(|x| x.as_string().unwrap())
198                    .collect::<Vec<_>>()
199            })
200            .unwrap_or(vec![]);
201
202        self.victor
203            .add_single_embedding(content, embedding, tags)
204            .await;
205    }
206
207    /// Search the database for the nearest neighbors to a given embedding.
208    pub async fn search(
209        &mut self,
210        embedding: &[f64],
211        tags: Option<Vec<JsValue>>,
212        top_n: Option<f64>,
213    ) -> JsValue {
214        let embedding = embedding.iter().map(|x| *x as f32).collect::<Vec<_>>();
215
216        let tags = tags
217            .map(|tags| {
218                tags.into_iter()
219                    .map(|x| x.as_string().unwrap())
220                    .collect::<Vec<_>>()
221            })
222            .unwrap_or(vec![]);
223
224        let nearest_neighbors = self
225            .victor
226            .search_embedding(embedding, tags, top_n.unwrap_or(10.0) as u32)
227            .await;
228
229        serde_wasm_bindgen::to_value(&nearest_neighbors).unwrap()
230    }
231
232    /// Clear the database, permanently removing all data.
233    pub async fn clear(&mut self) {
234        utils::set_panic_hook();
235
236        let result = self.victor.clear_db().await; // ignore the error if there is one
237        if !result.is_ok() {
238            console_warn!("Failed to clear victor data: {:?}", result);
239        }
240    }
241}