codelens_engine/
embedding_store.rs1use anyhow::Result;
5use serde::Serialize;
6use std::collections::BTreeMap;
7
8#[derive(Debug, Clone)]
10pub struct EmbeddingChunk {
11 pub file_path: String,
12 pub symbol_name: String,
13 pub kind: String,
14 pub line: usize,
15 pub signature: String,
16 pub name_path: String,
17 pub text: String,
18 pub embedding: Vec<f32>,
20 pub doc_embedding: Option<Vec<f32>>,
22}
23
24#[derive(Debug, Clone, Serialize)]
26pub struct ScoredChunk {
27 pub file_path: String,
28 pub symbol_name: String,
29 pub kind: String,
30 pub line: usize,
31 pub signature: String,
32 pub name_path: String,
33 pub score: f64,
34}
35
36pub trait EmbeddingStore: Send + Sync {
39 fn upsert(&self, chunks: &[EmbeddingChunk]) -> Result<usize>;
41
42 fn insert(&self, chunks: &[EmbeddingChunk]) -> Result<usize>;
44
45 fn search(&self, query_vec: &[f32], top_k: usize) -> Result<Vec<ScoredChunk>>;
47
48 fn search_dual(
51 &self,
52 query_vec: &[f32],
53 top_k: usize,
54 doc_weight: f64,
55 ) -> Result<Vec<ScoredChunk>> {
56 let _ = doc_weight;
58 self.search(query_vec, top_k)
59 }
60
61 fn delete_by_file(&self, file_paths: &[&str]) -> Result<usize>;
63
64 fn clear(&self) -> Result<()>;
66
67 fn count(&self) -> Result<usize>;
69
70 fn get_embedding(
72 &self,
73 _file_path: &str,
74 _symbol_name: &str,
75 ) -> Result<Option<EmbeddingChunk>> {
76 Ok(None)
77 }
78
79 fn embeddings_for_scored_chunks(&self, chunks: &[ScoredChunk]) -> Result<Vec<EmbeddingChunk>> {
82 let mut resolved = Vec::with_capacity(chunks.len());
83 for chunk in chunks {
84 if let Some(embedding) = self.get_embedding(&chunk.file_path, &chunk.symbol_name)? {
85 resolved.push(embedding);
86 }
87 }
88 Ok(resolved)
89 }
90
91 fn all_with_embeddings(&self) -> Result<Vec<EmbeddingChunk>> {
93 Ok(Vec::new()) }
95
96 fn embeddings_for_files(&self, file_paths: &[&str]) -> Result<Vec<EmbeddingChunk>> {
99 let file_set: std::collections::BTreeSet<&str> = file_paths.iter().copied().collect();
100 Ok(self
101 .all_with_embeddings()?
102 .into_iter()
103 .filter(|chunk| file_set.contains(chunk.file_path.as_str()))
104 .collect())
105 }
106
107 fn for_each_embedding_batch(
110 &self,
111 batch_size: usize,
112 visitor: &mut dyn FnMut(Vec<EmbeddingChunk>) -> Result<()>,
113 ) -> Result<()> {
114 if batch_size == 0 {
115 return Ok(());
116 }
117
118 let all = self.all_with_embeddings()?;
119 for chunk_batch in all.chunks(batch_size) {
120 visitor(chunk_batch.to_vec())?;
121 }
122 Ok(())
123 }
124
125 fn for_each_file_embeddings(
129 &self,
130 visitor: &mut dyn FnMut(String, Vec<EmbeddingChunk>) -> Result<()>,
131 ) -> Result<()> {
132 let mut by_file: BTreeMap<String, Vec<EmbeddingChunk>> = BTreeMap::new();
133 for chunk in self.all_with_embeddings()? {
134 by_file
135 .entry(chunk.file_path.clone())
136 .or_default()
137 .push(chunk);
138 }
139 for (file_path, chunks) in by_file {
140 visitor(file_path, chunks)?;
141 }
142 Ok(())
143 }
144}