Skip to main content

lean_ctx/core/
dense_backend.rs

1use std::path::Path;
2
3use crate::core::bm25_index::BM25Index;
4#[cfg(feature = "qdrant")]
5use crate::core::bm25_index::ChunkKind;
6use crate::core::hybrid_search::{DenseSearchResult, HybridConfig, HybridResult};
7
8#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9pub enum DenseBackendKind {
10    Local,
11    #[cfg(feature = "qdrant")]
12    Qdrant,
13}
14
15impl DenseBackendKind {
16    pub fn try_from_env() -> Result<Self, String> {
17        let explicit = std::env::var("LEANCTX_DENSE_BACKEND")
18            .ok()
19            .map(|v| v.trim().to_ascii_lowercase())
20            .filter(|v| !v.is_empty());
21
22        let inferred_qdrant = std::env::var("LEANCTX_QDRANT_URL")
23            .ok()
24            .is_some_and(|v| !v.trim().is_empty());
25
26        let requested = explicit.or_else(|| inferred_qdrant.then_some("qdrant".to_string()));
27
28        match requested.as_deref() {
29            None | Some("local") => Ok(Self::Local),
30            Some("qdrant") => {
31                #[cfg(feature = "qdrant")]
32                {
33                    Ok(Self::Qdrant)
34                }
35                #[cfg(not(feature = "qdrant"))]
36                {
37                    Err("Dense backend 'qdrant' requested, but feature 'qdrant' is not enabled. Rebuild with --features qdrant.".to_string())
38                }
39            }
40            Some(other) => Err(format!(
41                "Unknown LEANCTX_DENSE_BACKEND={other:?} (expected 'local' or 'qdrant')"
42            )),
43        }
44    }
45
46    pub fn label(&self) -> &'static str {
47        match self {
48            Self::Local => "local",
49            #[cfg(feature = "qdrant")]
50            Self::Qdrant => "qdrant",
51        }
52    }
53}
54
55#[cfg(feature = "embeddings")]
56#[allow(clippy::too_many_arguments)]
57pub fn dense_results_as_hybrid(
58    backend: DenseBackendKind,
59    root: &Path,
60    index: &BM25Index,
61    engine: &crate::core::embeddings::EmbeddingEngine,
62    aligned_embeddings: &[Vec<f32>],
63    changed_files: &[String],
64    query: &str,
65    top_k: usize,
66    filter: Option<&dyn Fn(&str) -> bool>,
67) -> Result<Vec<HybridResult>, String> {
68    let dense = dense_results(
69        backend,
70        root,
71        index,
72        engine,
73        aligned_embeddings,
74        changed_files,
75        query,
76        top_k,
77        filter,
78    )?;
79
80    Ok(dense
81        .into_iter()
82        .map(|d| HybridResult {
83            file_path: d.file_path,
84            symbol_name: d.symbol_name,
85            kind: d.kind,
86            start_line: d.start_line,
87            end_line: d.end_line,
88            snippet: d.snippet,
89            rrf_score: d.similarity as f64,
90            bm25_score: None,
91            dense_score: Some(d.similarity),
92            bm25_rank: None,
93            dense_rank: None,
94        })
95        .collect())
96}
97
98#[cfg(feature = "embeddings")]
99#[allow(clippy::too_many_arguments)]
100pub fn hybrid_results(
101    backend: DenseBackendKind,
102    root: &Path,
103    index: &BM25Index,
104    engine: &crate::core::embeddings::EmbeddingEngine,
105    aligned_embeddings: &[Vec<f32>],
106    changed_files: &[String],
107    query: &str,
108    top_k: usize,
109    config: &HybridConfig,
110    filter: Option<&dyn Fn(&str) -> bool>,
111    graph_file_ranks: Option<&std::collections::HashMap<String, usize>>,
112) -> Result<Vec<HybridResult>, String> {
113    match backend {
114        DenseBackendKind::Local => {
115            let _ = (root, changed_files);
116            let mut results = crate::core::hybrid_search::hybrid_search(
117                query,
118                index,
119                Some(engine),
120                Some(aligned_embeddings),
121                top_k,
122                config,
123                graph_file_ranks,
124            );
125            if let Some(pred) = filter {
126                results.retain(|r| pred(&r.file_path));
127            }
128            results.truncate(top_k);
129            Ok(results)
130        }
131        #[cfg(feature = "qdrant")]
132        DenseBackendKind::Qdrant => {
133            let bm25_k = config.bm25_candidates.max(top_k);
134            let dense_k = config.dense_candidates.max(top_k);
135
136            let mut bm25 = index.search(query, bm25_k);
137            if let Some(pred) = filter {
138                bm25.retain(|r| pred(&r.file_path));
139            }
140
141            let dense = dense_results(
142                backend,
143                root,
144                index,
145                engine,
146                aligned_embeddings,
147                changed_files,
148                query,
149                dense_k,
150                filter,
151            )?;
152
153            let mut fused = crate::core::hybrid_search::reciprocal_rank_fusion(
154                &bm25,
155                &dense,
156                config,
157                top_k,
158                graph_file_ranks,
159            );
160            if let Some(pred) = filter {
161                fused.retain(|r| pred(&r.file_path));
162            }
163            fused.truncate(top_k);
164            Ok(fused)
165        }
166    }
167}
168
169#[cfg(feature = "embeddings")]
170#[allow(clippy::too_many_arguments)]
171fn dense_results(
172    backend: DenseBackendKind,
173    root: &Path,
174    index: &BM25Index,
175    engine: &crate::core::embeddings::EmbeddingEngine,
176    aligned_embeddings: &[Vec<f32>],
177    changed_files: &[String],
178    query: &str,
179    top_k: usize,
180    filter: Option<&dyn Fn(&str) -> bool>,
181) -> Result<Vec<DenseSearchResult>, String> {
182    match backend {
183        DenseBackendKind::Local => {
184            let _ = (root, changed_files);
185            dense_results_local(index, engine, aligned_embeddings, query, top_k, filter)
186        }
187        #[cfg(feature = "qdrant")]
188        DenseBackendKind::Qdrant => dense_results_qdrant(
189            root,
190            index,
191            engine,
192            aligned_embeddings,
193            changed_files,
194            query,
195            top_k,
196            filter,
197        ),
198    }
199}
200
201#[cfg(feature = "embeddings")]
202fn dense_results_local(
203    index: &BM25Index,
204    engine: &crate::core::embeddings::EmbeddingEngine,
205    aligned_embeddings: &[Vec<f32>],
206    query: &str,
207    top_k: usize,
208    filter: Option<&dyn Fn(&str) -> bool>,
209) -> Result<Vec<DenseSearchResult>, String> {
210    use crate::core::embeddings::cosine_similarity;
211
212    let query_embedding = engine
213        .embed(query)
214        .map_err(|e| format!("embedding failed: {e}"))?;
215
216    let top = top_k_by_similarity(
217        &query_embedding,
218        aligned_embeddings,
219        top_k,
220        |i| {
221            let Some(pred) = filter else { return true };
222            index.chunks.get(i).is_some_and(|c| pred(&c.file_path))
223        },
224        cosine_similarity,
225    );
226
227    Ok(top
228        .into_iter()
229        .filter_map(|(idx, sim)| {
230            let chunk = index.chunks.get(idx)?;
231            let snippet = chunk.content.lines().take(5).collect::<Vec<_>>().join("\n");
232            Some(DenseSearchResult {
233                chunk_idx: idx,
234                similarity: sim,
235                file_path: chunk.file_path.clone(),
236                symbol_name: chunk.symbol_name.clone(),
237                kind: chunk.kind.clone(),
238                start_line: chunk.start_line,
239                end_line: chunk.end_line,
240                snippet,
241            })
242        })
243        .collect())
244}
245
246/// Min-heap based Top-K selection: O(n log k) instead of O(n log n) full sort.
247#[cfg(feature = "embeddings")]
248fn top_k_by_similarity(
249    query: &[f32],
250    embeddings: &[Vec<f32>],
251    k: usize,
252    filter: impl Fn(usize) -> bool,
253    similarity_fn: fn(&[f32], &[f32]) -> f32,
254) -> Vec<(usize, f32)> {
255    use std::cmp::Ordering;
256    use std::collections::BinaryHeap;
257
258    #[derive(PartialEq)]
259    struct MinEntry(f32, usize);
260
261    impl Eq for MinEntry {}
262    impl PartialOrd for MinEntry {
263        fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
264            Some(self.cmp(other))
265        }
266    }
267    impl Ord for MinEntry {
268        fn cmp(&self, other: &Self) -> Ordering {
269            other
270                .0
271                .partial_cmp(&self.0)
272                .unwrap_or(Ordering::Equal)
273                .then_with(|| self.1.cmp(&other.1))
274        }
275    }
276
277    let mut heap: BinaryHeap<MinEntry> = BinaryHeap::with_capacity(k + 1);
278
279    for (i, emb) in embeddings.iter().enumerate() {
280        if !filter(i) {
281            continue;
282        }
283        let sim = similarity_fn(query, emb);
284        if heap.len() < k {
285            heap.push(MinEntry(sim, i));
286        } else if let Some(min) = heap.peek() {
287            if sim > min.0 {
288                heap.pop();
289                heap.push(MinEntry(sim, i));
290            }
291        }
292    }
293
294    let mut result: Vec<(usize, f32)> = heap.into_iter().map(|e| (e.1, e.0)).collect();
295    result.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
296    result
297}
298
299#[cfg(feature = "qdrant")]
300#[cfg(feature = "embeddings")]
301fn dense_results_qdrant(
302    root: &Path,
303    index: &BM25Index,
304    engine: &crate::core::embeddings::EmbeddingEngine,
305    aligned_embeddings: &[Vec<f32>],
306    changed_files: &[String],
307    query: &str,
308    top_k: usize,
309    filter: Option<&dyn Fn(&str) -> bool>,
310) -> Result<Vec<DenseSearchResult>, String> {
311    let store = crate::core::qdrant_store::QdrantStore::from_env()?;
312    let collection = store.collection_name(root, engine.dimensions())?;
313    let created_new = store.ensure_collection(&collection, engine.dimensions())?;
314    store.sync_index(
315        &collection,
316        index,
317        aligned_embeddings,
318        changed_files,
319        created_new,
320    )?;
321
322    let query_vec = engine
323        .embed(query)
324        .map_err(|e| format!("embedding failed: {e}"))?;
325
326    let hits = store.search(&collection, &query_vec, top_k)?;
327    let mut out = Vec::with_capacity(hits.len());
328    for hit in hits {
329        if let Some(pred) = filter {
330            if !pred(&hit.file_path) {
331                continue;
332            }
333        }
334        let snippet = snippet_from_disk(root, &hit.file_path, hit.start_line, hit.end_line, 5);
335        out.push(DenseSearchResult {
336            chunk_idx: 0,
337            similarity: hit.score,
338            file_path: hit.file_path,
339            symbol_name: hit.symbol_name,
340            kind: hit.kind,
341            start_line: hit.start_line,
342            end_line: hit.end_line,
343            snippet,
344        });
345    }
346    Ok(out)
347}
348
349#[cfg(feature = "qdrant")]
350fn snippet_from_disk(
351    root: &Path,
352    rel_path: &str,
353    start_line: usize,
354    end_line: usize,
355    max_lines: usize,
356) -> String {
357    let Ok(path) = crate::core::pathjail::jail_path(&root.join(rel_path), root) else {
358        return String::new();
359    };
360    let Ok(content) = std::fs::read_to_string(path) else {
361        return String::new();
362    };
363    let lines: Vec<&str> = content.lines().collect();
364    if lines.is_empty() {
365        return String::new();
366    }
367    let start = start_line.saturating_sub(1).min(lines.len());
368    let end = end_line.max(start_line).min(lines.len());
369    let mut slice = &lines[start..end];
370    if slice.len() > max_lines {
371        slice = &slice[..max_lines];
372    }
373    slice.join("\n")
374}
375
376#[cfg(feature = "qdrant")]
377fn chunk_kind_str(kind: &ChunkKind) -> &'static str {
378    match kind {
379        ChunkKind::Function => "Function",
380        ChunkKind::Struct => "Struct",
381        ChunkKind::Impl => "Impl",
382        ChunkKind::Module => "Module",
383        ChunkKind::Class => "Class",
384        ChunkKind::Method => "Method",
385        ChunkKind::Other => "Other",
386    }
387}
388
389#[cfg(feature = "qdrant")]
390pub(crate) fn kind_from_str(s: &str) -> ChunkKind {
391    match s {
392        "Function" => ChunkKind::Function,
393        "Struct" => ChunkKind::Struct,
394        "Impl" => ChunkKind::Impl,
395        "Module" => ChunkKind::Module,
396        "Class" => ChunkKind::Class,
397        "Method" => ChunkKind::Method,
398        _ => ChunkKind::Other,
399    }
400}
401
402#[cfg(feature = "qdrant")]
403pub(crate) fn kind_to_str(kind: &ChunkKind) -> &'static str {
404    chunk_kind_str(kind)
405}
406
407#[cfg(test)]
408mod tests {
409    use super::*;
410    use std::sync::Mutex;
411
412    static ENV_LOCK: Mutex<()> = Mutex::new(());
413
414    fn set_env(key: &str, value: Option<&str>) -> Option<String> {
415        let old = std::env::var(key).ok();
416        match value {
417            Some(v) => std::env::set_var(key, v),
418            None => std::env::remove_var(key),
419        }
420        old
421    }
422
423    fn restore_env(key: &str, old: Option<String>) {
424        match old {
425            Some(v) => std::env::set_var(key, v),
426            None => std::env::remove_var(key),
427        }
428    }
429
430    #[test]
431    fn dense_backend_defaults_to_local() {
432        let _g = ENV_LOCK.lock().unwrap();
433        let old_backend = set_env("LEANCTX_DENSE_BACKEND", None);
434        let old_url = set_env("LEANCTX_QDRANT_URL", None);
435
436        let got = DenseBackendKind::try_from_env().unwrap();
437        assert_eq!(got, DenseBackendKind::Local);
438
439        restore_env("LEANCTX_DENSE_BACKEND", old_backend);
440        restore_env("LEANCTX_QDRANT_URL", old_url);
441    }
442
443    #[test]
444    fn dense_backend_unknown_value_errors() {
445        let _g = ENV_LOCK.lock().unwrap();
446        let old_backend = set_env("LEANCTX_DENSE_BACKEND", Some("wat"));
447        let old_url = set_env("LEANCTX_QDRANT_URL", None);
448
449        let err = DenseBackendKind::try_from_env().unwrap_err();
450        assert!(err.contains("Unknown LEANCTX_DENSE_BACKEND"));
451
452        restore_env("LEANCTX_DENSE_BACKEND", old_backend);
453        restore_env("LEANCTX_QDRANT_URL", old_url);
454    }
455
456    #[cfg(feature = "qdrant")]
457    #[test]
458    fn dense_backend_infers_qdrant_from_url() {
459        let _g = ENV_LOCK.lock().unwrap();
460        let old_backend = set_env("LEANCTX_DENSE_BACKEND", None);
461        let old_url = set_env("LEANCTX_QDRANT_URL", Some("http://127.0.0.1:6333"));
462
463        let got = DenseBackendKind::try_from_env().unwrap();
464        assert_eq!(got, DenseBackendKind::Qdrant);
465
466        restore_env("LEANCTX_DENSE_BACKEND", old_backend);
467        restore_env("LEANCTX_QDRANT_URL", old_url);
468    }
469
470    #[cfg(not(feature = "qdrant"))]
471    #[test]
472    fn dense_backend_qdrant_requires_feature() {
473        let _g = ENV_LOCK.lock().unwrap();
474        let old_backend = set_env("LEANCTX_DENSE_BACKEND", Some("qdrant"));
475        let old_url = set_env("LEANCTX_QDRANT_URL", None);
476
477        let err = DenseBackendKind::try_from_env().unwrap_err();
478        assert!(err.contains("feature 'qdrant' is not enabled"));
479
480        restore_env("LEANCTX_DENSE_BACKEND", old_backend);
481        restore_env("LEANCTX_QDRANT_URL", old_url);
482    }
483}