1use std::path::Path;
2
3use crate::core::bm25_index::BM25Index;
4#[cfg(feature = "qdrant")]
5use crate::core::bm25_index::ChunkKind;
6use crate::core::hybrid_search::{DenseSearchResult, HybridConfig, HybridResult};
7
8#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9pub enum DenseBackendKind {
10 Local,
11 #[cfg(feature = "qdrant")]
12 Qdrant,
13}
14
15impl DenseBackendKind {
16 pub fn try_from_env() -> Result<Self, String> {
17 let explicit = std::env::var("LEANCTX_DENSE_BACKEND")
18 .ok()
19 .map(|v| v.trim().to_ascii_lowercase())
20 .filter(|v| !v.is_empty());
21
22 let inferred_qdrant = std::env::var("LEANCTX_QDRANT_URL")
23 .ok()
24 .is_some_and(|v| !v.trim().is_empty());
25
26 let requested = explicit.or_else(|| inferred_qdrant.then_some("qdrant".to_string()));
27
28 match requested.as_deref() {
29 None | Some("local") => Ok(Self::Local),
30 Some("qdrant") => {
31 #[cfg(feature = "qdrant")]
32 {
33 Ok(Self::Qdrant)
34 }
35 #[cfg(not(feature = "qdrant"))]
36 {
37 Err("Dense backend 'qdrant' requested, but feature 'qdrant' is not enabled. Rebuild with --features qdrant.".to_string())
38 }
39 }
40 Some(other) => Err(format!(
41 "Unknown LEANCTX_DENSE_BACKEND={other:?} (expected 'local' or 'qdrant')"
42 )),
43 }
44 }
45
46 pub fn label(&self) -> &'static str {
47 match self {
48 Self::Local => "local",
49 #[cfg(feature = "qdrant")]
50 Self::Qdrant => "qdrant",
51 }
52 }
53}
54
55#[cfg(feature = "embeddings")]
56#[allow(clippy::too_many_arguments)]
57pub fn dense_results_as_hybrid(
58 backend: DenseBackendKind,
59 root: &Path,
60 index: &BM25Index,
61 engine: &crate::core::embeddings::EmbeddingEngine,
62 aligned_embeddings: &[Vec<f32>],
63 changed_files: &[String],
64 query: &str,
65 top_k: usize,
66 filter: Option<&dyn Fn(&str) -> bool>,
67) -> Result<Vec<HybridResult>, String> {
68 let dense = dense_results(
69 backend,
70 root,
71 index,
72 engine,
73 aligned_embeddings,
74 changed_files,
75 query,
76 top_k,
77 filter,
78 )?;
79
80 Ok(dense
81 .into_iter()
82 .map(|d| HybridResult {
83 file_path: d.file_path,
84 symbol_name: d.symbol_name,
85 kind: d.kind,
86 start_line: d.start_line,
87 end_line: d.end_line,
88 snippet: d.snippet,
89 rrf_score: d.similarity as f64,
90 bm25_score: None,
91 dense_score: Some(d.similarity),
92 bm25_rank: None,
93 dense_rank: None,
94 })
95 .collect())
96}
97
98#[cfg(feature = "embeddings")]
99#[allow(clippy::too_many_arguments)]
100pub fn hybrid_results(
101 backend: DenseBackendKind,
102 root: &Path,
103 index: &BM25Index,
104 engine: &crate::core::embeddings::EmbeddingEngine,
105 aligned_embeddings: &[Vec<f32>],
106 changed_files: &[String],
107 query: &str,
108 top_k: usize,
109 config: &HybridConfig,
110 filter: Option<&dyn Fn(&str) -> bool>,
111 graph_file_ranks: Option<&std::collections::HashMap<String, usize>>,
112) -> Result<Vec<HybridResult>, String> {
113 match backend {
114 DenseBackendKind::Local => {
115 let _ = (root, changed_files);
116 let mut results = crate::core::hybrid_search::hybrid_search(
117 query,
118 index,
119 Some(engine),
120 Some(aligned_embeddings),
121 top_k,
122 config,
123 graph_file_ranks,
124 );
125 if let Some(pred) = filter {
126 results.retain(|r| pred(&r.file_path));
127 }
128 results.truncate(top_k);
129 Ok(results)
130 }
131 #[cfg(feature = "qdrant")]
132 DenseBackendKind::Qdrant => {
133 let bm25_k = config.bm25_candidates.max(top_k);
134 let dense_k = config.dense_candidates.max(top_k);
135
136 let mut bm25 = index.search(query, bm25_k);
137 if let Some(pred) = filter {
138 bm25.retain(|r| pred(&r.file_path));
139 }
140
141 let dense = dense_results(
142 backend,
143 root,
144 index,
145 engine,
146 aligned_embeddings,
147 changed_files,
148 query,
149 dense_k,
150 filter,
151 )?;
152
153 let mut fused = crate::core::hybrid_search::reciprocal_rank_fusion(
154 &bm25,
155 &dense,
156 config,
157 top_k,
158 graph_file_ranks,
159 );
160 if let Some(pred) = filter {
161 fused.retain(|r| pred(&r.file_path));
162 }
163 fused.truncate(top_k);
164 Ok(fused)
165 }
166 }
167}
168
169#[cfg(feature = "embeddings")]
170#[allow(clippy::too_many_arguments)]
171fn dense_results(
172 backend: DenseBackendKind,
173 root: &Path,
174 index: &BM25Index,
175 engine: &crate::core::embeddings::EmbeddingEngine,
176 aligned_embeddings: &[Vec<f32>],
177 changed_files: &[String],
178 query: &str,
179 top_k: usize,
180 filter: Option<&dyn Fn(&str) -> bool>,
181) -> Result<Vec<DenseSearchResult>, String> {
182 match backend {
183 DenseBackendKind::Local => {
184 let _ = (root, changed_files);
185 dense_results_local(index, engine, aligned_embeddings, query, top_k, filter)
186 }
187 #[cfg(feature = "qdrant")]
188 DenseBackendKind::Qdrant => dense_results_qdrant(
189 root,
190 index,
191 engine,
192 aligned_embeddings,
193 changed_files,
194 query,
195 top_k,
196 filter,
197 ),
198 }
199}
200
201#[cfg(feature = "embeddings")]
202fn dense_results_local(
203 index: &BM25Index,
204 engine: &crate::core::embeddings::EmbeddingEngine,
205 aligned_embeddings: &[Vec<f32>],
206 query: &str,
207 top_k: usize,
208 filter: Option<&dyn Fn(&str) -> bool>,
209) -> Result<Vec<DenseSearchResult>, String> {
210 use crate::core::embeddings::cosine_similarity;
211
212 let query_embedding = engine
213 .embed(query)
214 .map_err(|e| format!("embedding failed: {e}"))?;
215
216 let top = top_k_by_similarity(
217 &query_embedding,
218 aligned_embeddings,
219 top_k,
220 |i| {
221 let Some(pred) = filter else { return true };
222 index.chunks.get(i).is_some_and(|c| pred(&c.file_path))
223 },
224 cosine_similarity,
225 );
226
227 Ok(top
228 .into_iter()
229 .filter_map(|(idx, sim)| {
230 let chunk = index.chunks.get(idx)?;
231 let snippet = chunk.content.lines().take(5).collect::<Vec<_>>().join("\n");
232 Some(DenseSearchResult {
233 chunk_idx: idx,
234 similarity: sim,
235 file_path: chunk.file_path.clone(),
236 symbol_name: chunk.symbol_name.clone(),
237 kind: chunk.kind.clone(),
238 start_line: chunk.start_line,
239 end_line: chunk.end_line,
240 snippet,
241 })
242 })
243 .collect())
244}
245
246#[cfg(feature = "embeddings")]
248fn top_k_by_similarity(
249 query: &[f32],
250 embeddings: &[Vec<f32>],
251 k: usize,
252 filter: impl Fn(usize) -> bool,
253 similarity_fn: fn(&[f32], &[f32]) -> f32,
254) -> Vec<(usize, f32)> {
255 use std::cmp::Ordering;
256 use std::collections::BinaryHeap;
257
258 #[derive(PartialEq)]
259 struct MinEntry(f32, usize);
260
261 impl Eq for MinEntry {}
262 impl PartialOrd for MinEntry {
263 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
264 Some(self.cmp(other))
265 }
266 }
267 impl Ord for MinEntry {
268 fn cmp(&self, other: &Self) -> Ordering {
269 other
270 .0
271 .partial_cmp(&self.0)
272 .unwrap_or(Ordering::Equal)
273 .then_with(|| self.1.cmp(&other.1))
274 }
275 }
276
277 let mut heap: BinaryHeap<MinEntry> = BinaryHeap::with_capacity(k + 1);
278
279 for (i, emb) in embeddings.iter().enumerate() {
280 if !filter(i) {
281 continue;
282 }
283 let sim = similarity_fn(query, emb);
284 if heap.len() < k {
285 heap.push(MinEntry(sim, i));
286 } else if let Some(min) = heap.peek() {
287 if sim > min.0 {
288 heap.pop();
289 heap.push(MinEntry(sim, i));
290 }
291 }
292 }
293
294 let mut result: Vec<(usize, f32)> = heap.into_iter().map(|e| (e.1, e.0)).collect();
295 result.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
296 result
297}
298
299#[cfg(feature = "qdrant")]
300#[cfg(feature = "embeddings")]
301fn dense_results_qdrant(
302 root: &Path,
303 index: &BM25Index,
304 engine: &crate::core::embeddings::EmbeddingEngine,
305 aligned_embeddings: &[Vec<f32>],
306 changed_files: &[String],
307 query: &str,
308 top_k: usize,
309 filter: Option<&dyn Fn(&str) -> bool>,
310) -> Result<Vec<DenseSearchResult>, String> {
311 let store = crate::core::qdrant_store::QdrantStore::from_env()?;
312 let collection = store.collection_name(root, engine.dimensions())?;
313 let created_new = store.ensure_collection(&collection, engine.dimensions())?;
314 store.sync_index(
315 &collection,
316 index,
317 aligned_embeddings,
318 changed_files,
319 created_new,
320 )?;
321
322 let query_vec = engine
323 .embed(query)
324 .map_err(|e| format!("embedding failed: {e}"))?;
325
326 let hits = store.search(&collection, &query_vec, top_k)?;
327 let mut out = Vec::with_capacity(hits.len());
328 for hit in hits {
329 if let Some(pred) = filter {
330 if !pred(&hit.file_path) {
331 continue;
332 }
333 }
334 let snippet = snippet_from_disk(root, &hit.file_path, hit.start_line, hit.end_line, 5);
335 out.push(DenseSearchResult {
336 chunk_idx: 0,
337 similarity: hit.score,
338 file_path: hit.file_path,
339 symbol_name: hit.symbol_name,
340 kind: hit.kind,
341 start_line: hit.start_line,
342 end_line: hit.end_line,
343 snippet,
344 });
345 }
346 Ok(out)
347}
348
349#[cfg(feature = "qdrant")]
350fn snippet_from_disk(
351 root: &Path,
352 rel_path: &str,
353 start_line: usize,
354 end_line: usize,
355 max_lines: usize,
356) -> String {
357 let Ok(path) = crate::core::pathjail::jail_path(&root.join(rel_path), root) else {
358 return String::new();
359 };
360 let Ok(content) = std::fs::read_to_string(path) else {
361 return String::new();
362 };
363 let lines: Vec<&str> = content.lines().collect();
364 if lines.is_empty() {
365 return String::new();
366 }
367 let start = start_line.saturating_sub(1).min(lines.len());
368 let end = end_line.max(start_line).min(lines.len());
369 let mut slice = &lines[start..end];
370 if slice.len() > max_lines {
371 slice = &slice[..max_lines];
372 }
373 slice.join("\n")
374}
375
376#[cfg(feature = "qdrant")]
377fn chunk_kind_str(kind: &ChunkKind) -> &'static str {
378 match kind {
379 ChunkKind::Function => "Function",
380 ChunkKind::Struct => "Struct",
381 ChunkKind::Impl => "Impl",
382 ChunkKind::Module => "Module",
383 ChunkKind::Class => "Class",
384 ChunkKind::Method => "Method",
385 ChunkKind::Other => "Other",
386 }
387}
388
389#[cfg(feature = "qdrant")]
390pub(crate) fn kind_from_str(s: &str) -> ChunkKind {
391 match s {
392 "Function" => ChunkKind::Function,
393 "Struct" => ChunkKind::Struct,
394 "Impl" => ChunkKind::Impl,
395 "Module" => ChunkKind::Module,
396 "Class" => ChunkKind::Class,
397 "Method" => ChunkKind::Method,
398 _ => ChunkKind::Other,
399 }
400}
401
402#[cfg(feature = "qdrant")]
403pub(crate) fn kind_to_str(kind: &ChunkKind) -> &'static str {
404 chunk_kind_str(kind)
405}
406
407#[cfg(test)]
408mod tests {
409 use super::*;
410 use std::sync::Mutex;
411
412 static ENV_LOCK: Mutex<()> = Mutex::new(());
413
414 fn set_env(key: &str, value: Option<&str>) -> Option<String> {
415 let old = std::env::var(key).ok();
416 match value {
417 Some(v) => std::env::set_var(key, v),
418 None => std::env::remove_var(key),
419 }
420 old
421 }
422
423 fn restore_env(key: &str, old: Option<String>) {
424 match old {
425 Some(v) => std::env::set_var(key, v),
426 None => std::env::remove_var(key),
427 }
428 }
429
430 #[test]
431 fn dense_backend_defaults_to_local() {
432 let _g = ENV_LOCK.lock().unwrap();
433 let old_backend = set_env("LEANCTX_DENSE_BACKEND", None);
434 let old_url = set_env("LEANCTX_QDRANT_URL", None);
435
436 let got = DenseBackendKind::try_from_env().unwrap();
437 assert_eq!(got, DenseBackendKind::Local);
438
439 restore_env("LEANCTX_DENSE_BACKEND", old_backend);
440 restore_env("LEANCTX_QDRANT_URL", old_url);
441 }
442
443 #[test]
444 fn dense_backend_unknown_value_errors() {
445 let _g = ENV_LOCK.lock().unwrap();
446 let old_backend = set_env("LEANCTX_DENSE_BACKEND", Some("wat"));
447 let old_url = set_env("LEANCTX_QDRANT_URL", None);
448
449 let err = DenseBackendKind::try_from_env().unwrap_err();
450 assert!(err.contains("Unknown LEANCTX_DENSE_BACKEND"));
451
452 restore_env("LEANCTX_DENSE_BACKEND", old_backend);
453 restore_env("LEANCTX_QDRANT_URL", old_url);
454 }
455
456 #[cfg(feature = "qdrant")]
457 #[test]
458 fn dense_backend_infers_qdrant_from_url() {
459 let _g = ENV_LOCK.lock().unwrap();
460 let old_backend = set_env("LEANCTX_DENSE_BACKEND", None);
461 let old_url = set_env("LEANCTX_QDRANT_URL", Some("http://127.0.0.1:6333"));
462
463 let got = DenseBackendKind::try_from_env().unwrap();
464 assert_eq!(got, DenseBackendKind::Qdrant);
465
466 restore_env("LEANCTX_DENSE_BACKEND", old_backend);
467 restore_env("LEANCTX_QDRANT_URL", old_url);
468 }
469
470 #[cfg(not(feature = "qdrant"))]
471 #[test]
472 fn dense_backend_qdrant_requires_feature() {
473 let _g = ENV_LOCK.lock().unwrap();
474 let old_backend = set_env("LEANCTX_DENSE_BACKEND", Some("qdrant"));
475 let old_url = set_env("LEANCTX_QDRANT_URL", None);
476
477 let err = DenseBackendKind::try_from_env().unwrap_err();
478 assert!(err.contains("feature 'qdrant' is not enabled"));
479
480 restore_env("LEANCTX_DENSE_BACKEND", old_backend);
481 restore_env("LEANCTX_QDRANT_URL", old_url);
482 }
483}