1use crate::{Result, ScoredSpan, Span};
6use crate::index::cosine_similarity;
7use serde::{Serialize, Deserialize};
8use std::path::Path;
9
10pub trait ApproxIndex: Sized {
12 fn build(spans: Vec<Span>) -> Self;
14
15 fn search(&self, query_embedding: &[f32], k: usize) -> Result<Vec<ScoredSpan>>;
17
18 fn save_to_disk(&self, dir: &Path) -> Result<()>;
20
21 fn load_from_disk(dir: &Path) -> Result<Option<Self>>;
23
24 fn spans(&self) -> &[Span];
26}
27
28pub struct HnswBackend(pub crate::index::VectorIndex);
30
31impl ApproxIndex for HnswBackend {
32 fn build(spans: Vec<Span>) -> Self {
33 Self(crate::index::VectorIndex::build(spans))
34 }
35
36 fn search(&self, query_embedding: &[f32], k: usize) -> Result<Vec<ScoredSpan>> {
37 self.0.search(query_embedding, k)
38 }
39
40 fn save_to_disk(&self, dir: &Path) -> Result<()> {
41 self.0.save_to_disk(dir)
42 }
43
44 fn load_from_disk(dir: &Path) -> Result<Option<Self>> {
45 Ok(crate::index::VectorIndex::load_from_disk(dir)?.map(HnswBackend))
46 }
47
48 fn spans(&self) -> &[Span] {
49 self.0.spans()
50 }
51}
52
53#[derive(Serialize, Deserialize)]
59pub struct InstantBackend {
60 dimension: usize,
61 spans: Vec<Span>,
62 embeddings: Vec<Vec<f32>>,
63}
64
65#[derive(Serialize, Deserialize)]
66struct InstantBackendOnDisk {
67 version: u32,
68 dimension: usize,
69 spans: Vec<SpanLite>,
70 embeddings: Vec<Vec<f32>>,
71}
72
73#[derive(Serialize, Deserialize, Clone)]
74struct SpanLite {
75 id: String,
76 artifact_id: String,
77 start_line: usize,
78 end_line: usize,
79 text: String,
80 token_count: usize,
81 embedding_model: Option<String>,
82}
83
84impl From<&Span> for SpanLite {
85 fn from(s: &Span) -> Self {
86 SpanLite {
87 id: s.id.clone(),
88 artifact_id: s.artifact_id.clone(),
89 start_line: s.start_line,
90 end_line: s.end_line,
91 text: s.text.clone(),
92 token_count: s.token_count,
93 embedding_model: s.embedding_model.clone(),
94 }
95 }
96}
97
98impl From<SpanLite> for Span {
99 fn from(s: SpanLite) -> Self {
100 Span {
101 id: s.id,
102 artifact_id: s.artifact_id,
103 start_line: s.start_line,
104 end_line: s.end_line,
105 text: s.text,
106 embedding: None, embedding_model: s.embedding_model,
108 token_count: s.token_count,
109 metadata: None,
110 }
111 }
112}
113
114impl ApproxIndex for InstantBackend {
115 fn build(spans: Vec<Span>) -> Self {
116 let embeddings: Vec<Vec<f32>> = spans
118 .iter()
119 .map(|s| s.embedding.clone().unwrap_or_default())
120 .collect();
121 let dimension = embeddings.first().map(|e| e.len()).unwrap_or(0);
122 Self {
123 dimension,
124 spans,
125 embeddings,
126 }
127 }
128
129 fn search(&self, query_embedding: &[f32], k: usize) -> Result<Vec<ScoredSpan>> {
130 if query_embedding.len() != self.dimension || self.dimension == 0 {
131 return Ok(Vec::new());
132 }
133 let mut scored: Vec<ScoredSpan> = self
135 .spans
136 .iter()
137 .zip(self.embeddings.iter())
138 .map(|(s, emb)| {
139 let score = if emb.len() == self.dimension {
140 cosine_similarity(query_embedding, emb)
141 } else {
142 0.0
143 };
144 ScoredSpan {
145 span: s.clone(),
146 score,
147 }
148 })
149 .collect();
150 scored.sort_by(|a, b| {
152 b.score
153 .partial_cmp(&a.score)
154 .unwrap_or(std::cmp::Ordering::Equal)
155 .then_with(|| a.span.artifact_id.cmp(&b.span.artifact_id))
156 .then_with(|| a.span.start_line.cmp(&b.span.start_line))
157 });
158 scored.truncate(k.min(scored.len()));
159 Ok(scored)
160 }
161
162 fn save_to_disk(&self, dir: &Path) -> Result<()> {
163 use std::fs;
164 fs::create_dir_all(dir)?;
165 let slim: Vec<SpanLite> = self.spans.iter().map(SpanLite::from).collect();
167 let on_disk = InstantBackendOnDisk {
168 version: 1,
169 dimension: self.dimension,
170 spans: slim,
171 embeddings: self.embeddings.clone(),
172 };
173 let data = bincode::serialize(&on_disk)
174 .map_err(|e| crate::types::Error::Other(anyhow::anyhow!("serialize instant index: {}", e)))?;
175 let tmp = dir.join("instant.idx.tmp");
176 let dst = dir.join("instant.idx");
177 fs::write(&tmp, data)?;
178 fs::rename(tmp, dst)?;
179 Ok(())
180 }
181
182 fn load_from_disk(dir: &Path) -> Result<Option<Self>> {
183 use std::fs;
184 let path = dir.join("instant.idx");
185 if !path.exists() {
186 return Ok(None);
187 }
188 let bytes = fs::read(path)?;
189 let on_disk: InstantBackendOnDisk = bincode::deserialize(&bytes)
190 .map_err(|e| crate::types::Error::Other(anyhow::anyhow!("deserialize instant index: {}", e)))?;
191 if on_disk.version != 1 {
192 return Ok(None);
193 }
194 let spans: Vec<Span> = on_disk.spans.into_iter().map(Span::from).collect();
196 Ok(Some(InstantBackend {
197 dimension: on_disk.dimension,
198 spans,
199 embeddings: on_disk.embeddings,
200 }))
201 }
202
203 fn spans(&self) -> &[Span] {
204 &self.spans
205 }
206}
207