Skip to main content

lean_ctx/core/
dense_backend.rs

1use std::path::Path;
2
3use crate::core::hybrid_search::{DenseSearchResult, HybridConfig, HybridResult};
4use crate::core::vector_index::BM25Index;
5#[cfg(feature = "qdrant")]
6use crate::core::vector_index::ChunkKind;
7
8#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9pub enum DenseBackendKind {
10    Local,
11    #[cfg(feature = "qdrant")]
12    Qdrant,
13}
14
15impl DenseBackendKind {
16    pub fn try_from_env() -> Result<Self, String> {
17        let explicit = std::env::var("LEANCTX_DENSE_BACKEND")
18            .ok()
19            .map(|v| v.trim().to_ascii_lowercase())
20            .filter(|v| !v.is_empty());
21
22        let inferred_qdrant = std::env::var("LEANCTX_QDRANT_URL")
23            .ok()
24            .is_some_and(|v| !v.trim().is_empty());
25
26        let requested = explicit.or_else(|| inferred_qdrant.then_some("qdrant".to_string()));
27
28        match requested.as_deref() {
29            None | Some("local") => Ok(Self::Local),
30            Some("qdrant") => {
31                #[cfg(feature = "qdrant")]
32                {
33                    Ok(Self::Qdrant)
34                }
35                #[cfg(not(feature = "qdrant"))]
36                {
37                    Err("Dense backend 'qdrant' requested, but feature 'qdrant' is not enabled. Rebuild with --features qdrant.".to_string())
38                }
39            }
40            Some(other) => Err(format!(
41                "Unknown LEANCTX_DENSE_BACKEND={other:?} (expected 'local' or 'qdrant')"
42            )),
43        }
44    }
45
46    pub fn label(&self) -> &'static str {
47        match self {
48            Self::Local => "local",
49            #[cfg(feature = "qdrant")]
50            Self::Qdrant => "qdrant",
51        }
52    }
53}
54
55#[cfg(feature = "embeddings")]
56#[allow(clippy::too_many_arguments)]
57pub fn dense_results_as_hybrid(
58    backend: DenseBackendKind,
59    root: &Path,
60    index: &BM25Index,
61    engine: &crate::core::embeddings::EmbeddingEngine,
62    aligned_embeddings: &[Vec<f32>],
63    changed_files: &[String],
64    query: &str,
65    top_k: usize,
66    filter: Option<&dyn Fn(&str) -> bool>,
67) -> Result<Vec<HybridResult>, String> {
68    let dense = dense_results(
69        backend,
70        root,
71        index,
72        engine,
73        aligned_embeddings,
74        changed_files,
75        query,
76        top_k,
77        filter,
78    )?;
79
80    Ok(dense
81        .into_iter()
82        .map(|d| HybridResult {
83            file_path: d.file_path,
84            symbol_name: d.symbol_name,
85            kind: d.kind,
86            start_line: d.start_line,
87            end_line: d.end_line,
88            snippet: d.snippet,
89            rrf_score: d.similarity as f64,
90            bm25_score: None,
91            dense_score: Some(d.similarity),
92            bm25_rank: None,
93            dense_rank: None,
94        })
95        .collect())
96}
97
98#[cfg(feature = "embeddings")]
99#[allow(clippy::too_many_arguments)]
100pub fn hybrid_results(
101    backend: DenseBackendKind,
102    root: &Path,
103    index: &BM25Index,
104    engine: &crate::core::embeddings::EmbeddingEngine,
105    aligned_embeddings: &[Vec<f32>],
106    changed_files: &[String],
107    query: &str,
108    top_k: usize,
109    config: &HybridConfig,
110    filter: Option<&dyn Fn(&str) -> bool>,
111) -> Result<Vec<HybridResult>, String> {
112    match backend {
113        DenseBackendKind::Local => {
114            let _ = (root, changed_files);
115            let mut results = crate::core::hybrid_search::hybrid_search(
116                query,
117                index,
118                Some(engine),
119                Some(aligned_embeddings),
120                top_k,
121                config,
122            );
123            if let Some(pred) = filter {
124                results.retain(|r| pred(&r.file_path));
125            }
126            results.truncate(top_k);
127            Ok(results)
128        }
129        #[cfg(feature = "qdrant")]
130        DenseBackendKind::Qdrant => {
131            let bm25_k = config.bm25_candidates.max(top_k);
132            let dense_k = config.dense_candidates.max(top_k);
133
134            let mut bm25 = index.search(query, bm25_k);
135            if let Some(pred) = filter {
136                bm25.retain(|r| pred(&r.file_path));
137            }
138
139            let dense = dense_results(
140                backend,
141                root,
142                index,
143                engine,
144                aligned_embeddings,
145                changed_files,
146                query,
147                dense_k,
148                filter,
149            )?;
150
151            let mut fused =
152                crate::core::hybrid_search::reciprocal_rank_fusion(&bm25, &dense, config, top_k);
153            if let Some(pred) = filter {
154                fused.retain(|r| pred(&r.file_path));
155            }
156            fused.truncate(top_k);
157            Ok(fused)
158        }
159    }
160}
161
162#[cfg(feature = "embeddings")]
163#[allow(clippy::too_many_arguments)]
164fn dense_results(
165    backend: DenseBackendKind,
166    root: &Path,
167    index: &BM25Index,
168    engine: &crate::core::embeddings::EmbeddingEngine,
169    aligned_embeddings: &[Vec<f32>],
170    changed_files: &[String],
171    query: &str,
172    top_k: usize,
173    filter: Option<&dyn Fn(&str) -> bool>,
174) -> Result<Vec<DenseSearchResult>, String> {
175    match backend {
176        DenseBackendKind::Local => {
177            let _ = (root, changed_files);
178            dense_results_local(index, engine, aligned_embeddings, query, top_k, filter)
179        }
180        #[cfg(feature = "qdrant")]
181        DenseBackendKind::Qdrant => dense_results_qdrant(
182            root,
183            index,
184            engine,
185            aligned_embeddings,
186            changed_files,
187            query,
188            top_k,
189            filter,
190        ),
191    }
192}
193
194#[cfg(feature = "embeddings")]
195fn dense_results_local(
196    index: &BM25Index,
197    engine: &crate::core::embeddings::EmbeddingEngine,
198    aligned_embeddings: &[Vec<f32>],
199    query: &str,
200    top_k: usize,
201    filter: Option<&dyn Fn(&str) -> bool>,
202) -> Result<Vec<DenseSearchResult>, String> {
203    use crate::core::embeddings::cosine_similarity;
204
205    let query_embedding = engine
206        .embed(query)
207        .map_err(|e| format!("embedding failed: {e}"))?;
208
209    let mut scored: Vec<(usize, f32)> = aligned_embeddings
210        .iter()
211        .enumerate()
212        .filter(|(i, _)| {
213            let Some(pred) = filter else { return true };
214            index.chunks.get(*i).is_some_and(|c| pred(&c.file_path))
215        })
216        .map(|(i, emb)| (i, cosine_similarity(&query_embedding, emb)))
217        .collect();
218
219    scored.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
220    scored.truncate(top_k);
221
222    Ok(scored
223        .into_iter()
224        .filter_map(|(idx, sim)| {
225            let chunk = index.chunks.get(idx)?;
226            let snippet = chunk.content.lines().take(5).collect::<Vec<_>>().join("\n");
227            Some(DenseSearchResult {
228                chunk_idx: idx,
229                similarity: sim,
230                file_path: chunk.file_path.clone(),
231                symbol_name: chunk.symbol_name.clone(),
232                kind: chunk.kind.clone(),
233                start_line: chunk.start_line,
234                end_line: chunk.end_line,
235                snippet,
236            })
237        })
238        .collect())
239}
240
241#[cfg(feature = "qdrant")]
242#[cfg(feature = "embeddings")]
243fn dense_results_qdrant(
244    root: &Path,
245    index: &BM25Index,
246    engine: &crate::core::embeddings::EmbeddingEngine,
247    aligned_embeddings: &[Vec<f32>],
248    changed_files: &[String],
249    query: &str,
250    top_k: usize,
251    filter: Option<&dyn Fn(&str) -> bool>,
252) -> Result<Vec<DenseSearchResult>, String> {
253    let store = crate::core::qdrant_store::QdrantStore::from_env()?;
254    let collection = store.collection_name(root, engine.dimensions())?;
255    let created_new = store.ensure_collection(&collection, engine.dimensions())?;
256    store.sync_index(
257        &collection,
258        index,
259        aligned_embeddings,
260        changed_files,
261        created_new,
262    )?;
263
264    let query_vec = engine
265        .embed(query)
266        .map_err(|e| format!("embedding failed: {e}"))?;
267
268    let hits = store.search(&collection, &query_vec, top_k)?;
269    let mut out = Vec::with_capacity(hits.len());
270    for hit in hits {
271        if let Some(pred) = filter {
272            if !pred(&hit.file_path) {
273                continue;
274            }
275        }
276        let snippet = snippet_from_disk(root, &hit.file_path, hit.start_line, hit.end_line, 5);
277        out.push(DenseSearchResult {
278            chunk_idx: 0,
279            similarity: hit.score,
280            file_path: hit.file_path,
281            symbol_name: hit.symbol_name,
282            kind: hit.kind,
283            start_line: hit.start_line,
284            end_line: hit.end_line,
285            snippet,
286        });
287    }
288    Ok(out)
289}
290
291#[cfg(feature = "qdrant")]
292fn snippet_from_disk(
293    root: &Path,
294    rel_path: &str,
295    start_line: usize,
296    end_line: usize,
297    max_lines: usize,
298) -> String {
299    let Ok(path) = crate::core::pathjail::jail_path(&root.join(rel_path), root) else {
300        return String::new();
301    };
302    let Ok(content) = std::fs::read_to_string(path) else {
303        return String::new();
304    };
305    let lines: Vec<&str> = content.lines().collect();
306    if lines.is_empty() {
307        return String::new();
308    }
309    let start = start_line.saturating_sub(1).min(lines.len());
310    let end = end_line.max(start_line).min(lines.len());
311    let mut slice = &lines[start..end];
312    if slice.len() > max_lines {
313        slice = &slice[..max_lines];
314    }
315    slice.join("\n")
316}
317
318#[cfg(feature = "qdrant")]
319fn chunk_kind_str(kind: &ChunkKind) -> &'static str {
320    match kind {
321        ChunkKind::Function => "Function",
322        ChunkKind::Struct => "Struct",
323        ChunkKind::Impl => "Impl",
324        ChunkKind::Module => "Module",
325        ChunkKind::Class => "Class",
326        ChunkKind::Method => "Method",
327        ChunkKind::Other => "Other",
328    }
329}
330
331#[cfg(feature = "qdrant")]
332pub(crate) fn kind_from_str(s: &str) -> ChunkKind {
333    match s {
334        "Function" => ChunkKind::Function,
335        "Struct" => ChunkKind::Struct,
336        "Impl" => ChunkKind::Impl,
337        "Module" => ChunkKind::Module,
338        "Class" => ChunkKind::Class,
339        "Method" => ChunkKind::Method,
340        _ => ChunkKind::Other,
341    }
342}
343
344#[cfg(feature = "qdrant")]
345pub(crate) fn kind_to_str(kind: &ChunkKind) -> &'static str {
346    chunk_kind_str(kind)
347}
348
349#[cfg(test)]
350mod tests {
351    use super::*;
352    use std::sync::Mutex;
353
354    static ENV_LOCK: Mutex<()> = Mutex::new(());
355
356    fn set_env(key: &str, value: Option<&str>) -> Option<String> {
357        let old = std::env::var(key).ok();
358        match value {
359            Some(v) => std::env::set_var(key, v),
360            None => std::env::remove_var(key),
361        }
362        old
363    }
364
365    fn restore_env(key: &str, old: Option<String>) {
366        match old {
367            Some(v) => std::env::set_var(key, v),
368            None => std::env::remove_var(key),
369        }
370    }
371
372    #[test]
373    fn dense_backend_defaults_to_local() {
374        let _g = ENV_LOCK.lock().unwrap();
375        let old_backend = set_env("LEANCTX_DENSE_BACKEND", None);
376        let old_url = set_env("LEANCTX_QDRANT_URL", None);
377
378        let got = DenseBackendKind::try_from_env().unwrap();
379        assert_eq!(got, DenseBackendKind::Local);
380
381        restore_env("LEANCTX_DENSE_BACKEND", old_backend);
382        restore_env("LEANCTX_QDRANT_URL", old_url);
383    }
384
385    #[test]
386    fn dense_backend_unknown_value_errors() {
387        let _g = ENV_LOCK.lock().unwrap();
388        let old_backend = set_env("LEANCTX_DENSE_BACKEND", Some("wat"));
389        let old_url = set_env("LEANCTX_QDRANT_URL", None);
390
391        let err = DenseBackendKind::try_from_env().unwrap_err();
392        assert!(err.contains("Unknown LEANCTX_DENSE_BACKEND"));
393
394        restore_env("LEANCTX_DENSE_BACKEND", old_backend);
395        restore_env("LEANCTX_QDRANT_URL", old_url);
396    }
397
398    #[cfg(feature = "qdrant")]
399    #[test]
400    fn dense_backend_infers_qdrant_from_url() {
401        let _g = ENV_LOCK.lock().unwrap();
402        let old_backend = set_env("LEANCTX_DENSE_BACKEND", None);
403        let old_url = set_env("LEANCTX_QDRANT_URL", Some("http://127.0.0.1:6333"));
404
405        let got = DenseBackendKind::try_from_env().unwrap();
406        assert_eq!(got, DenseBackendKind::Qdrant);
407
408        restore_env("LEANCTX_DENSE_BACKEND", old_backend);
409        restore_env("LEANCTX_QDRANT_URL", old_url);
410    }
411
412    #[cfg(not(feature = "qdrant"))]
413    #[test]
414    fn dense_backend_qdrant_requires_feature() {
415        let _g = ENV_LOCK.lock().unwrap();
416        let old_backend = set_env("LEANCTX_DENSE_BACKEND", Some("qdrant"));
417        let old_url = set_env("LEANCTX_QDRANT_URL", None);
418
419        let err = DenseBackendKind::try_from_env().unwrap_err();
420        assert!(err.contains("feature 'qdrant' is not enabled"));
421
422        restore_env("LEANCTX_DENSE_BACKEND", old_backend);
423        restore_env("LEANCTX_QDRANT_URL", old_url);
424    }
425}