1use crate::{Result, ScoredSpan, Span};
2use crate::index::cosine_similarity;
3use serde::{Serialize, Deserialize};
4use std::path::Path;
5
6pub trait ApproxIndex: Sized {
8 fn build(spans: Vec<Span>) -> Self;
10
11 fn search(&self, query_embedding: &[f32], k: usize) -> Result<Vec<ScoredSpan>>;
13
14 fn save_to_disk(&self, dir: &Path) -> Result<()>;
16
17 fn load_from_disk(dir: &Path) -> Result<Option<Self>>;
19
20 fn spans(&self) -> &[Span];
22}
23
24pub struct HnswBackend(pub crate::index::VectorIndex);
26
27impl ApproxIndex for HnswBackend {
28 fn build(spans: Vec<Span>) -> Self {
29 Self(crate::index::VectorIndex::build(spans))
30 }
31
32 fn search(&self, query_embedding: &[f32], k: usize) -> Result<Vec<ScoredSpan>> {
33 self.0.search(query_embedding, k)
34 }
35
36 fn save_to_disk(&self, dir: &Path) -> Result<()> {
37 self.0.save_to_disk(dir)
38 }
39
40 fn load_from_disk(dir: &Path) -> Result<Option<Self>> {
41 Ok(crate::index::VectorIndex::load_from_disk(dir)?.map(HnswBackend))
42 }
43
44 fn spans(&self) -> &[Span] {
45 self.0.spans()
46 }
47}
48
49#[derive(Serialize, Deserialize)]
55pub struct InstantBackend {
56 dimension: usize,
57 spans: Vec<Span>,
58 embeddings: Vec<Vec<f32>>,
59}
60
61#[derive(Serialize, Deserialize)]
62struct InstantBackendOnDisk {
63 version: u32,
64 dimension: usize,
65 spans: Vec<SpanLite>,
66 embeddings: Vec<Vec<f32>>,
67}
68
69#[derive(Serialize, Deserialize, Clone)]
70struct SpanLite {
71 id: String,
72 artifact_id: String,
73 start_line: usize,
74 end_line: usize,
75 text: String,
76 token_count: usize,
77 embedding_model: Option<String>,
78}
79
80impl From<&Span> for SpanLite {
81 fn from(s: &Span) -> Self {
82 SpanLite {
83 id: s.id.clone(),
84 artifact_id: s.artifact_id.clone(),
85 start_line: s.start_line,
86 end_line: s.end_line,
87 text: s.text.clone(),
88 token_count: s.token_count,
89 embedding_model: s.embedding_model.clone(),
90 }
91 }
92}
93
94impl From<SpanLite> for Span {
95 fn from(s: SpanLite) -> Self {
96 Span {
97 id: s.id,
98 artifact_id: s.artifact_id,
99 start_line: s.start_line,
100 end_line: s.end_line,
101 text: s.text,
102 embedding: None, embedding_model: s.embedding_model,
104 token_count: s.token_count,
105 metadata: None,
106 }
107 }
108}
109
110impl ApproxIndex for InstantBackend {
111 fn build(spans: Vec<Span>) -> Self {
112 let embeddings: Vec<Vec<f32>> = spans
114 .iter()
115 .map(|s| s.embedding.clone().unwrap_or_default())
116 .collect();
117 let dimension = embeddings.first().map(|e| e.len()).unwrap_or(0);
118 Self {
119 dimension,
120 spans,
121 embeddings,
122 }
123 }
124
125 fn search(&self, query_embedding: &[f32], k: usize) -> Result<Vec<ScoredSpan>> {
126 if query_embedding.len() != self.dimension || self.dimension == 0 {
127 return Ok(Vec::new());
128 }
129 let mut scored: Vec<ScoredSpan> = self
131 .spans
132 .iter()
133 .zip(self.embeddings.iter())
134 .map(|(s, emb)| {
135 let score = if emb.len() == self.dimension {
136 cosine_similarity(query_embedding, emb)
137 } else {
138 0.0
139 };
140 ScoredSpan {
141 span: s.clone(),
142 score,
143 }
144 })
145 .collect();
146 scored.sort_by(|a, b| {
148 b.score
149 .partial_cmp(&a.score)
150 .unwrap_or(std::cmp::Ordering::Equal)
151 .then_with(|| a.span.artifact_id.cmp(&b.span.artifact_id))
152 .then_with(|| a.span.start_line.cmp(&b.span.start_line))
153 });
154 scored.truncate(k.min(scored.len()));
155 Ok(scored)
156 }
157
158 fn save_to_disk(&self, dir: &Path) -> Result<()> {
159 use std::fs;
160 fs::create_dir_all(dir)?;
161 let slim: Vec<SpanLite> = self.spans.iter().map(SpanLite::from).collect();
163 let on_disk = InstantBackendOnDisk {
164 version: 1,
165 dimension: self.dimension,
166 spans: slim,
167 embeddings: self.embeddings.clone(),
168 };
169 let data = bincode::serialize(&on_disk)
170 .map_err(|e| crate::types::Error::Other(anyhow::anyhow!("serialize instant index: {}", e)))?;
171 let tmp = dir.join("instant.idx.tmp");
172 let dst = dir.join("instant.idx");
173 fs::write(&tmp, data)?;
174 fs::rename(tmp, dst)?;
175 Ok(())
176 }
177
178 fn load_from_disk(dir: &Path) -> Result<Option<Self>> {
179 use std::fs;
180 let path = dir.join("instant.idx");
181 if !path.exists() {
182 return Ok(None);
183 }
184 let bytes = fs::read(path)?;
185 let on_disk: InstantBackendOnDisk = bincode::deserialize(&bytes)
186 .map_err(|e| crate::types::Error::Other(anyhow::anyhow!("deserialize instant index: {}", e)))?;
187 if on_disk.version != 1 {
188 return Ok(None);
189 }
190 let spans: Vec<Span> = on_disk.spans.into_iter().map(Span::from).collect();
192 Ok(Some(InstantBackend {
193 dimension: on_disk.dimension,
194 spans,
195 embeddings: on_disk.embeddings,
196 }))
197 }
198
199 fn spans(&self) -> &[Span] {
200 &self.spans
201 }
202}
203