claw_vector/index/
selector.rs1use std::path::Path;
4
5use tracing::instrument;
6
7use crate::{
8 config::VectorConfig,
9 error::{VectorError, VectorResult},
10 index::{flat::FlatIndex, hnsw::HnswIndex},
11 types::DistanceMetric,
12};
13
14pub const HNSW_THRESHOLD: usize = 1_000;
16
17pub enum IndexSelector {
19 Flat(FlatIndex),
21 Hnsw(Box<HnswIndex>),
23}
24
25impl IndexSelector {
26 pub fn new(dimensions: usize, distance: DistanceMetric, _config: &VectorConfig) -> Self {
28 IndexSelector::Flat(FlatIndex::new(dimensions, distance))
29 }
30
31 #[instrument(skip(self, vector, config))]
33 pub fn insert(
34 &mut self,
35 id: usize,
36 vector: Vec<f32>,
37 config: &VectorConfig,
38 ) -> VectorResult<()> {
39 match self {
40 IndexSelector::Flat(flat) => {
41 flat.insert(id, vector)?;
42 if flat.len() > HNSW_THRESHOLD {
43 self.migrate_to_hnsw(config)?;
44 }
45 }
46 IndexSelector::Hnsw(hnsw) => hnsw.insert(id, &vector)?,
47 }
48 Ok(())
49 }
50
51 #[instrument(skip(self, items, config))]
53 pub fn insert_batch(
54 &mut self,
55 items: Vec<(usize, Vec<f32>)>,
56 config: &VectorConfig,
57 ) -> VectorResult<()> {
58 match self {
59 IndexSelector::Flat(flat) => {
60 flat.insert_batch(items)?;
61 if flat.len() > HNSW_THRESHOLD {
62 self.migrate_to_hnsw(config)?;
63 }
64 }
65 IndexSelector::Hnsw(hnsw) => hnsw.insert_batch(&items)?,
66 }
67 Ok(())
68 }
69
70 #[instrument(skip(self, query))]
72 pub fn search(
73 &self,
74 query: &[f32],
75 top_k: usize,
76 ef_search: usize,
77 ) -> VectorResult<Vec<(usize, f32)>> {
78 match self {
79 IndexSelector::Flat(flat) => flat.search(query, top_k),
80 IndexSelector::Hnsw(hnsw) => hnsw.search(query, top_k, ef_search),
81 }
82 }
83
84 #[instrument(skip(self))]
86 pub fn delete(&mut self, id: usize) -> VectorResult<bool> {
87 match self {
88 IndexSelector::Flat(flat) => flat.delete(id),
89 IndexSelector::Hnsw(hnsw) => {
90 hnsw.delete(id)?;
91 Ok(true)
92 }
93 }
94 }
95
96 pub fn len(&self) -> usize {
98 match self {
99 IndexSelector::Flat(f) => f.len(),
100 IndexSelector::Hnsw(h) => h.len(),
101 }
102 }
103
104 pub fn is_empty(&self) -> bool {
106 self.len() == 0
107 }
108
109 pub fn is_hnsw(&self) -> bool {
111 matches!(self, IndexSelector::Hnsw(_))
112 }
113
114 #[instrument(skip(self, config))]
116 pub fn migrate_to_hnsw(&mut self, config: &VectorConfig) -> VectorResult<()> {
117 let hnsw = match self {
118 IndexSelector::Flat(flat) => {
119 tracing::info!(elements = flat.len(), "migrating flat index to HNSW");
120 flat.to_hnsw(config)?
121 }
122 IndexSelector::Hnsw(_) => return Ok(()),
123 };
124 *self = IndexSelector::Hnsw(Box::new(hnsw));
125 Ok(())
126 }
127
128 #[instrument(skip(self))]
130 pub fn save(&self, dir: &Path, workspace_id: &str, collection: &str) -> VectorResult<()> {
131 let col_dir = dir.join(workspace_id).join(collection);
132 std::fs::create_dir_all(&col_dir)?;
133 let kind = if self.is_hnsw() { "hnsw" } else { "flat" };
134 std::fs::write(
135 col_dir.join("index.meta.json"),
136 serde_json::to_string(&serde_json::json!({ "index_type": kind }))?,
137 )?;
138 match self {
139 IndexSelector::Flat(flat) => {
140 std::fs::write(
141 col_dir.join("flat.json"),
142 serde_json::to_string(&flat.all_vectors()?)?,
143 )?;
144 }
145 IndexSelector::Hnsw(hnsw) => hnsw.save(&col_dir, collection)?,
146 }
147 Ok(())
148 }
149
150 #[instrument(skip(config))]
152 pub fn load(
153 dir: &Path,
154 workspace_id: &str,
155 collection: &str,
156 config: &VectorConfig,
157 distance: DistanceMetric,
158 dimensions: usize,
159 ) -> VectorResult<Self> {
160 let col_dir = dir.join(workspace_id).join(collection);
161 let meta: serde_json::Value =
162 serde_json::from_reader(std::fs::File::open(col_dir.join("index.meta.json"))?)?;
163 match meta["index_type"]
164 .as_str()
165 .ok_or_else(|| VectorError::Index("missing index_type".into()))?
166 {
167 "flat" => {
168 let vecs: Vec<(usize, Vec<f32>)> =
169 serde_json::from_str(&std::fs::read_to_string(col_dir.join("flat.json"))?)?;
170 let flat = FlatIndex::new(dimensions, distance);
171 flat.insert_batch(vecs)?;
172 Ok(IndexSelector::Flat(flat))
173 }
174 "hnsw" => Ok(IndexSelector::Hnsw(Box::new(HnswIndex::load(
175 &col_dir, collection, config, distance,
176 )?))),
177 other => Err(VectorError::Index(format!("unknown index_type '{other}'"))),
178 }
179 }
180}