Skip to main content

lean_ctx/core/
dense_backend.rs

1use std::path::Path;
2
3use crate::core::hybrid_search::{DenseSearchResult, HybridConfig, HybridResult};
4use crate::core::vector_index::BM25Index;
5#[cfg(feature = "qdrant")]
6use crate::core::vector_index::ChunkKind;
7
8#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9pub enum DenseBackendKind {
10    Local,
11    #[cfg(feature = "qdrant")]
12    Qdrant,
13}
14
15impl DenseBackendKind {
16    pub fn try_from_env() -> Result<Self, String> {
17        let explicit = std::env::var("LEANCTX_DENSE_BACKEND")
18            .ok()
19            .map(|v| v.trim().to_ascii_lowercase())
20            .filter(|v| !v.is_empty());
21
22        let inferred_qdrant = std::env::var("LEANCTX_QDRANT_URL")
23            .ok()
24            .is_some_and(|v| !v.trim().is_empty());
25
26        let requested = explicit.or_else(|| inferred_qdrant.then_some("qdrant".to_string()));
27
28        match requested.as_deref() {
29            None | Some("local") => Ok(Self::Local),
30            Some("qdrant") => {
31                #[cfg(feature = "qdrant")]
32                {
33                    Ok(Self::Qdrant)
34                }
35                #[cfg(not(feature = "qdrant"))]
36                {
37                    Err("Dense backend 'qdrant' requested, but feature 'qdrant' is not enabled. Rebuild with --features qdrant.".to_string())
38                }
39            }
40            Some(other) => Err(format!(
41                "Unknown LEANCTX_DENSE_BACKEND={other:?} (expected 'local' or 'qdrant')"
42            )),
43        }
44    }
45
46    pub fn label(&self) -> &'static str {
47        match self {
48            Self::Local => "local",
49            #[cfg(feature = "qdrant")]
50            Self::Qdrant => "qdrant",
51        }
52    }
53}
54
55#[cfg(feature = "embeddings")]
56#[allow(clippy::too_many_arguments)]
57pub fn dense_results_as_hybrid(
58    backend: DenseBackendKind,
59    root: &Path,
60    index: &BM25Index,
61    engine: &crate::core::embeddings::EmbeddingEngine,
62    aligned_embeddings: &[Vec<f32>],
63    changed_files: &[String],
64    query: &str,
65    top_k: usize,
66    filter: Option<&dyn Fn(&str) -> bool>,
67) -> Result<Vec<HybridResult>, String> {
68    let dense = dense_results(
69        backend,
70        root,
71        index,
72        engine,
73        aligned_embeddings,
74        changed_files,
75        query,
76        top_k,
77        filter,
78    )?;
79
80    Ok(dense
81        .into_iter()
82        .map(|d| HybridResult {
83            file_path: d.file_path,
84            symbol_name: d.symbol_name,
85            kind: d.kind,
86            start_line: d.start_line,
87            end_line: d.end_line,
88            snippet: d.snippet,
89            rrf_score: d.similarity as f64,
90            bm25_score: None,
91            dense_score: Some(d.similarity),
92            bm25_rank: None,
93            dense_rank: None,
94        })
95        .collect())
96}
97
98#[cfg(feature = "embeddings")]
99#[allow(clippy::too_many_arguments)]
100pub fn hybrid_results(
101    backend: DenseBackendKind,
102    root: &Path,
103    index: &BM25Index,
104    engine: &crate::core::embeddings::EmbeddingEngine,
105    aligned_embeddings: &[Vec<f32>],
106    changed_files: &[String],
107    query: &str,
108    top_k: usize,
109    config: &HybridConfig,
110    filter: Option<&dyn Fn(&str) -> bool>,
111    graph_file_ranks: Option<&std::collections::HashMap<String, usize>>,
112) -> Result<Vec<HybridResult>, String> {
113    match backend {
114        DenseBackendKind::Local => {
115            let _ = (root, changed_files);
116            let mut results = crate::core::hybrid_search::hybrid_search(
117                query,
118                index,
119                Some(engine),
120                Some(aligned_embeddings),
121                top_k,
122                config,
123                graph_file_ranks,
124            );
125            if let Some(pred) = filter {
126                results.retain(|r| pred(&r.file_path));
127            }
128            results.truncate(top_k);
129            Ok(results)
130        }
131        #[cfg(feature = "qdrant")]
132        DenseBackendKind::Qdrant => {
133            let bm25_k = config.bm25_candidates.max(top_k);
134            let dense_k = config.dense_candidates.max(top_k);
135
136            let mut bm25 = index.search(query, bm25_k);
137            if let Some(pred) = filter {
138                bm25.retain(|r| pred(&r.file_path));
139            }
140
141            let dense = dense_results(
142                backend,
143                root,
144                index,
145                engine,
146                aligned_embeddings,
147                changed_files,
148                query,
149                dense_k,
150                filter,
151            )?;
152
153            let mut fused = crate::core::hybrid_search::reciprocal_rank_fusion(
154                &bm25,
155                &dense,
156                config,
157                top_k,
158                graph_file_ranks,
159            );
160            if let Some(pred) = filter {
161                fused.retain(|r| pred(&r.file_path));
162            }
163            fused.truncate(top_k);
164            Ok(fused)
165        }
166    }
167}
168
169#[cfg(feature = "embeddings")]
170#[allow(clippy::too_many_arguments)]
171fn dense_results(
172    backend: DenseBackendKind,
173    root: &Path,
174    index: &BM25Index,
175    engine: &crate::core::embeddings::EmbeddingEngine,
176    aligned_embeddings: &[Vec<f32>],
177    changed_files: &[String],
178    query: &str,
179    top_k: usize,
180    filter: Option<&dyn Fn(&str) -> bool>,
181) -> Result<Vec<DenseSearchResult>, String> {
182    match backend {
183        DenseBackendKind::Local => {
184            let _ = (root, changed_files);
185            dense_results_local(index, engine, aligned_embeddings, query, top_k, filter)
186        }
187        #[cfg(feature = "qdrant")]
188        DenseBackendKind::Qdrant => dense_results_qdrant(
189            root,
190            index,
191            engine,
192            aligned_embeddings,
193            changed_files,
194            query,
195            top_k,
196            filter,
197        ),
198    }
199}
200
201#[cfg(feature = "embeddings")]
202fn dense_results_local(
203    index: &BM25Index,
204    engine: &crate::core::embeddings::EmbeddingEngine,
205    aligned_embeddings: &[Vec<f32>],
206    query: &str,
207    top_k: usize,
208    filter: Option<&dyn Fn(&str) -> bool>,
209) -> Result<Vec<DenseSearchResult>, String> {
210    use crate::core::embeddings::cosine_similarity;
211
212    let query_embedding = engine
213        .embed(query)
214        .map_err(|e| format!("embedding failed: {e}"))?;
215
216    let mut scored: Vec<(usize, f32)> = aligned_embeddings
217        .iter()
218        .enumerate()
219        .filter(|(i, _)| {
220            let Some(pred) = filter else { return true };
221            index.chunks.get(*i).is_some_and(|c| pred(&c.file_path))
222        })
223        .map(|(i, emb)| (i, cosine_similarity(&query_embedding, emb)))
224        .collect();
225
226    scored.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
227    scored.truncate(top_k);
228
229    Ok(scored
230        .into_iter()
231        .filter_map(|(idx, sim)| {
232            let chunk = index.chunks.get(idx)?;
233            let snippet = chunk.content.lines().take(5).collect::<Vec<_>>().join("\n");
234            Some(DenseSearchResult {
235                chunk_idx: idx,
236                similarity: sim,
237                file_path: chunk.file_path.clone(),
238                symbol_name: chunk.symbol_name.clone(),
239                kind: chunk.kind.clone(),
240                start_line: chunk.start_line,
241                end_line: chunk.end_line,
242                snippet,
243            })
244        })
245        .collect())
246}
247
248#[cfg(feature = "qdrant")]
249#[cfg(feature = "embeddings")]
250fn dense_results_qdrant(
251    root: &Path,
252    index: &BM25Index,
253    engine: &crate::core::embeddings::EmbeddingEngine,
254    aligned_embeddings: &[Vec<f32>],
255    changed_files: &[String],
256    query: &str,
257    top_k: usize,
258    filter: Option<&dyn Fn(&str) -> bool>,
259) -> Result<Vec<DenseSearchResult>, String> {
260    let store = crate::core::qdrant_store::QdrantStore::from_env()?;
261    let collection = store.collection_name(root, engine.dimensions())?;
262    let created_new = store.ensure_collection(&collection, engine.dimensions())?;
263    store.sync_index(
264        &collection,
265        index,
266        aligned_embeddings,
267        changed_files,
268        created_new,
269    )?;
270
271    let query_vec = engine
272        .embed(query)
273        .map_err(|e| format!("embedding failed: {e}"))?;
274
275    let hits = store.search(&collection, &query_vec, top_k)?;
276    let mut out = Vec::with_capacity(hits.len());
277    for hit in hits {
278        if let Some(pred) = filter {
279            if !pred(&hit.file_path) {
280                continue;
281            }
282        }
283        let snippet = snippet_from_disk(root, &hit.file_path, hit.start_line, hit.end_line, 5);
284        out.push(DenseSearchResult {
285            chunk_idx: 0,
286            similarity: hit.score,
287            file_path: hit.file_path,
288            symbol_name: hit.symbol_name,
289            kind: hit.kind,
290            start_line: hit.start_line,
291            end_line: hit.end_line,
292            snippet,
293        });
294    }
295    Ok(out)
296}
297
298#[cfg(feature = "qdrant")]
299fn snippet_from_disk(
300    root: &Path,
301    rel_path: &str,
302    start_line: usize,
303    end_line: usize,
304    max_lines: usize,
305) -> String {
306    let Ok(path) = crate::core::pathjail::jail_path(&root.join(rel_path), root) else {
307        return String::new();
308    };
309    let Ok(content) = std::fs::read_to_string(path) else {
310        return String::new();
311    };
312    let lines: Vec<&str> = content.lines().collect();
313    if lines.is_empty() {
314        return String::new();
315    }
316    let start = start_line.saturating_sub(1).min(lines.len());
317    let end = end_line.max(start_line).min(lines.len());
318    let mut slice = &lines[start..end];
319    if slice.len() > max_lines {
320        slice = &slice[..max_lines];
321    }
322    slice.join("\n")
323}
324
325#[cfg(feature = "qdrant")]
326fn chunk_kind_str(kind: &ChunkKind) -> &'static str {
327    match kind {
328        ChunkKind::Function => "Function",
329        ChunkKind::Struct => "Struct",
330        ChunkKind::Impl => "Impl",
331        ChunkKind::Module => "Module",
332        ChunkKind::Class => "Class",
333        ChunkKind::Method => "Method",
334        ChunkKind::Other => "Other",
335    }
336}
337
338#[cfg(feature = "qdrant")]
339pub(crate) fn kind_from_str(s: &str) -> ChunkKind {
340    match s {
341        "Function" => ChunkKind::Function,
342        "Struct" => ChunkKind::Struct,
343        "Impl" => ChunkKind::Impl,
344        "Module" => ChunkKind::Module,
345        "Class" => ChunkKind::Class,
346        "Method" => ChunkKind::Method,
347        _ => ChunkKind::Other,
348    }
349}
350
351#[cfg(feature = "qdrant")]
352pub(crate) fn kind_to_str(kind: &ChunkKind) -> &'static str {
353    chunk_kind_str(kind)
354}
355
356#[cfg(test)]
357mod tests {
358    use super::*;
359    use std::sync::Mutex;
360
361    static ENV_LOCK: Mutex<()> = Mutex::new(());
362
363    fn set_env(key: &str, value: Option<&str>) -> Option<String> {
364        let old = std::env::var(key).ok();
365        match value {
366            Some(v) => std::env::set_var(key, v),
367            None => std::env::remove_var(key),
368        }
369        old
370    }
371
372    fn restore_env(key: &str, old: Option<String>) {
373        match old {
374            Some(v) => std::env::set_var(key, v),
375            None => std::env::remove_var(key),
376        }
377    }
378
379    #[test]
380    fn dense_backend_defaults_to_local() {
381        let _g = ENV_LOCK.lock().unwrap();
382        let old_backend = set_env("LEANCTX_DENSE_BACKEND", None);
383        let old_url = set_env("LEANCTX_QDRANT_URL", None);
384
385        let got = DenseBackendKind::try_from_env().unwrap();
386        assert_eq!(got, DenseBackendKind::Local);
387
388        restore_env("LEANCTX_DENSE_BACKEND", old_backend);
389        restore_env("LEANCTX_QDRANT_URL", old_url);
390    }
391
392    #[test]
393    fn dense_backend_unknown_value_errors() {
394        let _g = ENV_LOCK.lock().unwrap();
395        let old_backend = set_env("LEANCTX_DENSE_BACKEND", Some("wat"));
396        let old_url = set_env("LEANCTX_QDRANT_URL", None);
397
398        let err = DenseBackendKind::try_from_env().unwrap_err();
399        assert!(err.contains("Unknown LEANCTX_DENSE_BACKEND"));
400
401        restore_env("LEANCTX_DENSE_BACKEND", old_backend);
402        restore_env("LEANCTX_QDRANT_URL", old_url);
403    }
404
405    #[cfg(feature = "qdrant")]
406    #[test]
407    fn dense_backend_infers_qdrant_from_url() {
408        let _g = ENV_LOCK.lock().unwrap();
409        let old_backend = set_env("LEANCTX_DENSE_BACKEND", None);
410        let old_url = set_env("LEANCTX_QDRANT_URL", Some("http://127.0.0.1:6333"));
411
412        let got = DenseBackendKind::try_from_env().unwrap();
413        assert_eq!(got, DenseBackendKind::Qdrant);
414
415        restore_env("LEANCTX_DENSE_BACKEND", old_backend);
416        restore_env("LEANCTX_QDRANT_URL", old_url);
417    }
418
419    #[cfg(not(feature = "qdrant"))]
420    #[test]
421    fn dense_backend_qdrant_requires_feature() {
422        let _g = ENV_LOCK.lock().unwrap();
423        let old_backend = set_env("LEANCTX_DENSE_BACKEND", Some("qdrant"));
424        let old_url = set_env("LEANCTX_QDRANT_URL", None);
425
426        let err = DenseBackendKind::try_from_env().unwrap_err();
427        assert!(err.contains("feature 'qdrant' is not enabled"));
428
429        restore_env("LEANCTX_DENSE_BACKEND", old_backend);
430        restore_env("LEANCTX_QDRANT_URL", old_url);
431    }
432}