iqdb_build/lib.rs
1//! # iqdb-build
2//!
3//! Bulk index construction for the **iqdb** embedded vector-database spine.
4//! Loading a million vectors into an index one [`insert`](iqdb_index::IndexCore::insert)
5//! at a time is slow; `iqdb-build` is the fast path. It is generic over the
6//! [`iqdb_index::Index`] trait, so the same builder constructs flat, HNSW, and
7//! IVF indexes without naming a concrete type.
8//!
9//! The crate exposes the three iQDB API tiers:
10//!
11//! - **Tier 1 — the lazy path.** The free functions [`build`] (construct a
12//! fresh index from a stream of vectors, with default configuration) and
13//! [`build_into`] (bulk-insert into an index you already hold, including a
14//! `&mut dyn IndexCore` trait object). One call each.
15//! - **Tier 2 — the configured path.** [`IndexBuilder`], which carries the
16//! `dim`, `metric`, and the backend's own [`Config`](iqdb_index::Index::Config)
17//! so you can tune the index it constructs.
18//! [`build_parallel`](IndexBuilder::build_parallel) splits the input into
19//! shards and constructs them concurrently on rayon's pool;
20//! [`build_merged`](IndexBuilder::build_merged) folds those shards back into a
21//! single index — the full *split → build → merge* pipeline in one call. An
22//! optional [`on_progress`](IndexBuilder::on_progress) callback reports shard
23//! completion.
24//! - **Tier 3 — the trait seam.** The [`iqdb_index::Index`] and
25//! [`iqdb_index::IndexCore`] traits, plus this crate's [`Mergeable`] (how a
26//! backend absorbs another instance of itself): implement them and the same
27//! builder constructs and merges your backend.
28//!
29//! Every fallible call returns [`iqdb_types::Result`]; errors raised by the
30//! backend ([`Index::new`](iqdb_index::Index::new) and
31//! [`insert_batch`](iqdb_index::IndexCore::insert_batch)) are propagated
32//! unchanged. The crate adds no error type of its own and never panics on bad
33//! input.
34//!
35//! ## Example
36//!
37//! ```
38//! use std::sync::Arc;
39//! use iqdb_build::IndexBuilder;
40//! use iqdb_types::{DistanceMetric, VectorId};
41//! # use iqdb_index::{Index, IndexCore, IndexStats};
42//! # use iqdb_types::{Hit, IqdbError, Metadata, Result, SearchParams};
43//! # struct Flat { dim: usize, metric: DistanceMetric, rows: Vec<(VectorId, Arc<[f32]>)> }
44//! # #[derive(Clone, Default)] struct FlatConfig;
45//! # impl IndexCore for Flat {
46//! # fn insert(&mut self, id: VectorId, v: Arc<[f32]>, _m: Option<Metadata>) -> Result<()> {
47//! # if v.len() != self.dim { return Err(IqdbError::DimensionMismatch { expected: self.dim, found: v.len() }); }
48//! # if self.rows.iter().any(|(e, _)| e == &id) { return Err(IqdbError::Duplicate); }
49//! # self.rows.push((id, v)); Ok(())
50//! # }
51//! # fn delete(&mut self, id: &VectorId) -> Result<()> { match self.rows.iter().position(|(e, _)| e == id) { Some(p) => { let _ = self.rows.remove(p); Ok(()) } None => Err(IqdbError::NotFound) } }
52//! # fn search(&self, q: &[f32], p: &SearchParams) -> Result<Vec<Hit>> {
53//! # let mut h: Vec<Hit> = self.rows.iter().map(|(id, v)| Hit { id: id.clone(), distance: q.iter().zip(v.iter()).map(|(a, b)| (a - b) * (a - b)).sum(), metadata: None }).collect();
54//! # h.sort_by(|a, b| a.distance.total_cmp(&b.distance)); h.truncate(p.k); Ok(h)
55//! # }
56//! # fn len(&self) -> usize { self.rows.len() }
57//! # fn dim(&self) -> usize { self.dim }
58//! # fn metric(&self) -> DistanceMetric { self.metric }
59//! # fn flush(&mut self) -> Result<()> { Ok(()) }
60//! # fn stats(&self) -> IndexStats { IndexStats { n_vectors: self.rows.len(), index_type: "flat", ..IndexStats::default() } }
61//! # }
62//! # impl Index for Flat { type Config = FlatConfig; fn new(dim: usize, metric: DistanceMetric, _c: Self::Config) -> Result<Self> { if dim == 0 { return Err(IqdbError::InvalidConfig { reason: "dim must be > 0" }); } Ok(Flat { dim, metric, rows: Vec::new() }) } }
63//! # fn main() -> iqdb_types::Result<()> {
64//! // Build a 3-dimensional index from three vectors in one call.
65//! let items = vec![
66//! (VectorId::from(1u64), Arc::from([0.0_f32, 0.0, 0.0].as_slice()), None),
67//! (VectorId::from(2u64), Arc::from([1.0_f32, 0.0, 0.0].as_slice()), None),
68//! (VectorId::from(3u64), Arc::from([0.0_f32, 1.0, 0.0].as_slice()), None),
69//! ];
70//! let index: Flat = IndexBuilder::new(3, DistanceMetric::Euclidean).build(items)?;
71//! assert_eq!(index.len(), 3);
72//! # Ok(()) }
73//! ```
74
75#![cfg_attr(docsrs, feature(doc_cfg))]
76#![deny(warnings)]
77#![deny(missing_docs)]
78#![deny(unsafe_op_in_unsafe_fn)]
79#![deny(unused_must_use)]
80#![deny(unused_results)]
81#![deny(clippy::unwrap_used)]
82#![deny(clippy::expect_used)]
83#![deny(clippy::todo)]
84#![deny(clippy::unimplemented)]
85#![deny(clippy::print_stdout)]
86#![deny(clippy::print_stderr)]
87#![deny(clippy::dbg_macro)]
88#![deny(clippy::unreachable)]
89#![deny(clippy::undocumented_unsafe_blocks)]
90#![forbid(unsafe_code)]
91
92mod builder;
93mod merge;
94mod parallel;
95
96pub use crate::builder::{BuildItem, IndexBuilder, build, build_into};
97pub use crate::merge::{Mergeable, merge};
98pub use crate::parallel::BuildProgress;
99
100/// The version of this crate, taken from `Cargo.toml` at compile time.
101///
102/// Exposed so a consumer can report the exact `iqdb-build` build it links
103/// against — useful in diagnostics and version-skew checks across the iqdb
104/// crate family.
105///
106/// # Examples
107///
108/// ```
109/// // Carries a `major.minor.patch` SemVer core.
110/// let version = iqdb_build::VERSION;
111/// assert_eq!(version.split('.').count(), 3);
112/// assert!(version.split('.').all(|part| !part.is_empty()));
113/// ```
114pub const VERSION: &str = env!("CARGO_PKG_VERSION");