Skip to main content

lexa_core/
search.rs

1use std::cmp::Ordering;
2use std::collections::HashMap;
3
4use rusqlite::{params, Connection};
5
6use crate::db::LexaDb;
7use crate::embed::{matryoshka_truncate, vector_blob, PREVIEW_DIMS};
8use crate::query::{fts_query, tokenize};
9use crate::types::{LexaError, SearchHit, SearchTier, TierBreakdown};
10use crate::Result;
11
12/// Reciprocal Rank Fusion constant. 60 is the value used in the original RRF
13/// paper (Cormack et al., 2009) and reused in Exa's published search
14/// composition. It controls how much weight rank-1 has over rank-50.
15const RRF_K: f32 = 60.0;
16
17/// Top-K returned by the BM25 (sparse) retriever before fusion.
18const SPARSE_TOP_K: usize = 50;
19
20/// Top-K returned by the dense (binary-quantized vector) retriever before fusion.
21const DENSE_TOP_K: usize = 50;
22
23/// First-stage candidate count for the Matryoshka 256-bit preview KNN.
24/// 8× the final dense top-K — bench measurement on the lexa repo showed
25/// 4× was too tight (caused 3/20 agent regression vs single-stage 768d).
26/// Widening to 8× recovers the full quality at single-digit ms cost
27/// because the second stage's `WHERE rowid IN (...)` filter is O(K).
28const PREVIEW_TOP_K: usize = DENSE_TOP_K * 8;
29
30/// Number of fused candidates handed to the deep-tier reranker. We trim to
31/// 15 (down from a previous 30) because BGE-reranker-base is the dominant
32/// latency cost at the deep tier and tends to introduce noise when scoring
33/// barely-relevant tail candidates. Anthropic's contextual-retrieval
34/// research and Exa's reranker pipeline both place the gain plateau in
35/// the 15–30 range.
36const RERANK_CANDIDATES: usize = 15;
37
38/// Blend factor for combining the cross-encoder rerank score with the
39/// fused RRF score in the deep tier. The cross-encoder logit gets
40/// sigmoid-squashed into `[0, 1]` and mixed with the RRF score so a
41/// strong cross-encoder vote *adjusts* the RRF order without overriding
42/// it entirely (which empirically hurt SciFact nDCG with the previous
43/// `score += rerank_score` blend).
44const RERANK_BLEND: f32 = 0.7;
45
46/// Maximum characters retained in a hit excerpt fallback. The query-aware
47/// `highlight` function below normally produces tighter spans (~80–150
48/// chars); this constant only applies when no sentence in the chunk has
49/// any query-token overlap and we fall back to plain truncation.
50const EXCERPT_MAX_CHARS: usize = 500;
51
52/// Soft target length for a query-aware highlight. Matches Exa's contents-
53/// API "highlight" idea: a short span containing the answer, cheap for an
54/// LLM consumer.
55const HIGHLIGHT_TARGET_CHARS: usize = 220;
56
57#[derive(Debug, Clone)]
58pub struct SearchOptions {
59    pub query: String,
60    pub tier: SearchTier,
61    pub limit: usize,
62    /// Additional reformulations of the query that the deep tier should
63    /// search alongside the original. Mirrors Exa Deep's `additionalQueries`
64    /// parameter — auto 2–3 paraphrases run in parallel and RRF-fused with
65    /// the main query's result list before reranking. Ignored on
66    /// non-Deep tiers.
67    pub additional_queries: Vec<String>,
68}
69
70impl SearchOptions {
71    pub fn new(query: impl Into<String>) -> Self {
72        Self {
73            query: query.into(),
74            tier: SearchTier::Auto,
75            limit: 10,
76            additional_queries: Vec::new(),
77        }
78    }
79}
80
81pub fn search_impl(db: &LexaDb, options: &SearchOptions) -> Result<Vec<SearchHit>> {
82    let conn = db.conn();
83    let limit = options.limit.max(1);
84
85    // Auto routing: pick the cheapest tier likely to answer the query well.
86    // We surface the routed tier in `TierBreakdown::routed_to` so callers
87    // can debug the decision.
88    let (effective_tier, routed_to) = if options.tier == SearchTier::Auto {
89        let routed = classify_query(&options.query);
90        (routed, Some(routed))
91    } else {
92        (options.tier, None)
93    };
94
95    let mut hits = match effective_tier {
96        SearchTier::Auto => unreachable!("Auto resolves to a concrete tier above"),
97        SearchTier::Instant => {
98            let bm25 = bm25_search(conn, &options.query, SPARSE_TOP_K)?;
99            hydrate(conn, &options.query, &rank_to_rrf(&bm25), &bm25, &[], limit)?.0
100        }
101        SearchTier::Dense => {
102            let vector = vector_search(db, &options.query, DENSE_TOP_K)?;
103            hydrate(
104                conn,
105                &options.query,
106                &rank_to_rrf(&vector),
107                &[],
108                &vector,
109                limit,
110            )?
111            .0
112        }
113        SearchTier::Fast | SearchTier::Deep => {
114            // Run BM25 (FTS5 SQL, ~1–2 ms) on the main thread while the
115            // embedder forward pass (~7 ms ONNX) runs on a scoped worker.
116            // `Connection` is `!Sync` so we cannot move the SQL onto the
117            // worker, but `Mutex<Embedder>` is `Send + Sync`, so the
118            // embedder is what we ship across the thread boundary. After
119            // both finish we hit the binary KNN on the main connection
120            // (~1 ms).
121            let embedder_lock = db.embedder()?;
122            let query_str = options.query.as_str();
123            let (bm25, embedding) = std::thread::scope(|scope| -> Result<_> {
124                let embed_handle = scope.spawn(|| -> Result<Vec<f32>> {
125                    let mut guard = embedder_lock
126                        .lock()
127                        .map_err(|err| LexaError::Embedding(err.to_string()))?;
128                    guard.embed_query(query_str)
129                });
130                let bm25 = bm25_search(conn, query_str, SPARSE_TOP_K)?;
131                let embedding = embed_handle
132                    .join()
133                    .map_err(|_| LexaError::Embedding("embed worker panicked".into()))??;
134                Ok((bm25, embedding))
135            })?;
136            let vector = vector_knn(conn, &embedding, DENSE_TOP_K)?;
137
138            // Deep tier supports `additionalQueries` à la Exa Deep: fan out
139            // 2–3 reformulations through the same hybrid pipeline and
140            // RRF-fuse all 2(N+1) ranked lists. Fast tier ignores them.
141            let fused =
142                if effective_tier == SearchTier::Deep && !options.additional_queries.is_empty() {
143                    let mut all_lists: Vec<Vec<(i64, f32)>> =
144                        Vec::with_capacity(2 + options.additional_queries.len() * 2);
145                    all_lists.push(bm25.clone());
146                    all_lists.push(vector.clone());
147                    for extra in &options.additional_queries {
148                        let extra_str = extra.as_str();
149                        let (extra_bm25, extra_emb) = std::thread::scope(|scope| -> Result<_> {
150                            let h = scope.spawn(|| -> Result<Vec<f32>> {
151                                let mut guard = embedder_lock
152                                    .lock()
153                                    .map_err(|err| LexaError::Embedding(err.to_string()))?;
154                                guard.embed_query(extra_str)
155                            });
156                            let b = bm25_search(conn, extra_str, SPARSE_TOP_K)?;
157                            let e = h.join().map_err(|_| {
158                                LexaError::Embedding("embed worker panicked".into())
159                            })??;
160                            Ok((b, e))
161                        })?;
162                        let extra_vec = vector_knn(conn, &extra_emb, DENSE_TOP_K)?;
163                        all_lists.push(extra_bm25);
164                        all_lists.push(extra_vec);
165                    }
166                    let refs: Vec<&[(i64, f32)]> = all_lists.iter().map(Vec::as_slice).collect();
167                    fuse_many(&refs)
168                } else {
169                    fuse(&bm25, &vector)
170                };
171
172            let candidate_count = if effective_tier == SearchTier::Deep {
173                RERANK_CANDIDATES
174            } else {
175                limit
176            };
177            let (mut hits, full_texts) = hydrate(
178                conn,
179                &options.query,
180                &fused,
181                &bm25,
182                &vector,
183                candidate_count,
184            )?;
185            if effective_tier == SearchTier::Deep && !hits.is_empty() {
186                rerank(db, &options.query, &mut hits, &full_texts)?;
187            }
188            hits.truncate(limit);
189            hits
190        }
191    };
192
193    if let Some(tier) = routed_to {
194        for hit in &mut hits {
195            hit.breakdown.routed_to = Some(tier);
196        }
197    }
198
199    Ok(hits)
200}
201
202/// Inspect the query and pick the cheapest tier likely to answer it well.
203/// This is the local equivalent of Exa's `auto` policy.
204///
205/// Heuristics, in priority order:
206/// - explicit `[deep]` prefix → `Deep`
207/// - **single** identifier-shaped token (snake_case, CamelCase with both
208///   upper and lower, or `::` / `.` path) → `Instant`
209/// - long question-form (≥ 6 tokens AND ends with `?`) → `Deep`
210/// - default → `Fast`
211///
212/// We deliberately keep `Instant` routing narrow — only the "user pasted a
213/// symbol they were looking at" case. A natural-language query that
214/// happens to contain a CamelCase identifier ("the BGE reranker pipeline")
215/// is still a natural-language query and belongs on `Fast` so the dense
216/// retriever sees it.
217fn classify_query(query: &str) -> SearchTier {
218    let trimmed = query.trim();
219    if let Some(rest) = trimmed.strip_prefix("[deep]") {
220        let _ = rest;
221        return SearchTier::Deep;
222    }
223
224    let tokens: Vec<&str> = trimmed
225        .split_whitespace()
226        .filter(|tok| tok.chars().any(char::is_alphanumeric))
227        .collect();
228
229    if tokens.is_empty() {
230        return SearchTier::Fast;
231    }
232
233    if tokens.len() == 1 {
234        let tok = tokens[0];
235        let snake_case = tok.contains('_') && tok.chars().any(|c| c.is_ascii_alphanumeric());
236        let mixed_case = tok.chars().any(|c| c.is_ascii_uppercase())
237            && tok.chars().any(|c| c.is_ascii_lowercase());
238        let path_like = tok.contains("::") || (tok.contains('.') && !tok.ends_with('.'));
239        if snake_case || mixed_case || path_like {
240            return SearchTier::Instant;
241        }
242    }
243
244    if tokens.len() >= 6 && trimmed.ends_with('?') {
245        return SearchTier::Deep;
246    }
247
248    SearchTier::Fast
249}
250
251/// Two-stage Matryoshka KNN against the binary-quantized vector indices.
252///
253/// Stage 1 runs Hamming KNN over the 256-bit `vectors_bin_preview` table
254/// (3× less memory bandwidth than the 768-bit table) and returns
255/// `PREVIEW_TOP_K` candidate `rowid`s. Stage 2 re-scores those candidates
256/// against the full 768-bit `vectors_bin` table using the same
257/// `vec_distance_hamming` op, returning the requested `limit`.
258///
259/// This is the local analogue of Exa's published two-stage Matryoshka
260/// design: a coarse pass over a prefix dimension (Exa uses 256 of 4096;
261/// we use 256 of 768) followed by a full-dim re-rank over the survivors.
262/// Quality stays within ~0.5 nDCG of full-dim because Nomic v1.5 is MRL-
263/// trained at exactly the {64, 128, 256, 512, 768} canonical points.
264fn vector_knn(conn: &Connection, embedding: &[f32], limit: usize) -> Result<Vec<(i64, f32)>> {
265    // Stage 1: coarse Matryoshka KNN at 256 bits.
266    let preview_blob = vector_blob(&matryoshka_truncate(embedding, PREVIEW_DIMS));
267    let mut preview_stmt = conn.prepare_cached(
268        "SELECT rowid
269         FROM vectors_bin_preview
270         WHERE embedding MATCH vec_quantize_binary(?1) AND k = ?2
271         ORDER BY distance",
272    )?;
273    let preview_ids: Vec<i64> = preview_stmt
274        .query_map(params![preview_blob, PREVIEW_TOP_K as i64], |row| {
275            row.get::<_, i64>(0)
276        })?
277        .collect::<std::result::Result<Vec<_>, _>>()?;
278
279    if preview_ids.is_empty() {
280        return Ok(Vec::new());
281    }
282
283    // Stage 2: re-score the survivors at full 768 bits.
284    let full_blob = vector_blob(embedding);
285    // sqlite-vec's vec_distance_hamming works on `bit[N]` blobs; combined
286    // with a `rowid IN (json_each(?))` filter we get the re-rank in a
287    // single round trip without an enormous variadic IN list.
288    let preview_ids_json = serde_json::to_string(&preview_ids)?;
289    let mut rescore_stmt = conn.prepare_cached(
290        "SELECT v.rowid,
291                vec_distance_hamming(v.embedding, vec_quantize_binary(?1)) AS distance
292         FROM vectors_bin AS v
293         WHERE v.rowid IN (SELECT value FROM json_each(?2))
294         ORDER BY distance
295         LIMIT ?3",
296    )?;
297    let rows =
298        rescore_stmt.query_map(params![full_blob, preview_ids_json, limit as i64], |row| {
299            let id: i64 = row.get(0)?;
300            let distance: f64 = row.get(1)?;
301            Ok((id, (1.0 / (1.0 + distance)) as f32))
302        })?;
303    rows.collect::<std::result::Result<Vec<_>, _>>()
304        .map_err(Into::into)
305}
306
307fn bm25_search(conn: &Connection, query: &str, limit: usize) -> Result<Vec<(i64, f32)>> {
308    let fts_query = fts_query(query);
309    if fts_query.is_empty() {
310        return Ok(Vec::new());
311    }
312    let mut stmt = conn.prepare_cached(
313        "SELECT rowid, bm25(chunks_fts) AS rank
314         FROM chunks_fts
315         WHERE chunks_fts MATCH ?1
316         ORDER BY rank
317         LIMIT ?2",
318    )?;
319    let rows = stmt.query_map(params![fts_query, limit as i64], |row| {
320        let id: i64 = row.get(0)?;
321        let rank: f64 = row.get(1)?;
322        Ok((id, (1.0 / (1.0 + rank.abs())) as f32))
323    })?;
324    rows.collect::<std::result::Result<Vec<_>, _>>()
325        .map_err(Into::into)
326}
327
328fn vector_search(db: &LexaDb, query: &str, limit: usize) -> Result<Vec<(i64, f32)>> {
329    let embedding = {
330        let lock = db.embedder()?;
331        let mut guard = lock
332            .lock()
333            .map_err(|err| LexaError::Embedding(err.to_string()))?;
334        guard.embed_query(query)?
335    };
336    vector_knn(db.conn(), &embedding, limit)
337}
338
339/// Reciprocal Rank Fusion over an arbitrary number of ranked lists.
340/// Used by the Fast tier (BM25 + dense, two lists) and by the Deep tier's
341/// `additionalQueries` fan-out (main BM25 + main dense + N reformulation
342/// BM25 + N reformulation dense).
343fn fuse_many(lists: &[&[(i64, f32)]]) -> Vec<(i64, f32)> {
344    let mut scores = HashMap::<i64, f32>::new();
345    for list in lists {
346        for (rank, (id, _)) in list.iter().enumerate() {
347            *scores.entry(*id).or_default() += 1.0 / (RRF_K + rank as f32 + 1.0);
348        }
349    }
350    let mut fused: Vec<_> = scores.into_iter().collect();
351    fused.sort_by(score_desc);
352    fused
353}
354
355fn fuse(bm25: &[(i64, f32)], vector: &[(i64, f32)]) -> Vec<(i64, f32)> {
356    fuse_many(&[bm25, vector])
357}
358
359fn rank_to_rrf(items: &[(i64, f32)]) -> Vec<(i64, f32)> {
360    items
361        .iter()
362        .enumerate()
363        .map(|(rank, (id, _))| (*id, 1.0 / (RRF_K + rank as f32 + 1.0)))
364        .collect()
365}
366
367/// Hydrate ranked chunk IDs into full `SearchHit`s plus the per-hit full
368/// chunk text (which the deep-tier reranker uses instead of the short
369/// display excerpt). The display excerpt is produced by `highlight`, the
370/// query-aware span picker; the reranker gets the full text.
371fn hydrate(
372    conn: &Connection,
373    query: &str,
374    ranked: &[(i64, f32)],
375    bm25: &[(i64, f32)],
376    vector: &[(i64, f32)],
377    limit: usize,
378) -> Result<(Vec<SearchHit>, Vec<String>)> {
379    let bm25_rank = ranks(bm25);
380    let vector_rank = ranks(vector);
381    let bm25_scores = score_map(bm25);
382    let vector_scores = score_map(vector);
383    let mut hits = Vec::new();
384    let mut full_texts = Vec::new();
385    let mut stmt = conn.prepare_cached(
386        "SELECT d.path, c.line_start, c.line_end, c.text, c.context
387         FROM chunks c JOIN documents d ON d.id = c.doc_id
388         WHERE c.id = ?1",
389    )?;
390    for (id, score) in ranked.iter().take(limit) {
391        let (hit, text) = stmt.query_row(params![id], |row| {
392            let text: String = row.get(3)?;
393            let heading: Option<String> = row.get(4)?;
394            let hit = SearchHit {
395                path: row.get(0)?,
396                line_start: row.get(1)?,
397                line_end: row.get(2)?,
398                score: *score,
399                excerpt: highlight(query, &text),
400                heading,
401                breakdown: TierBreakdown {
402                    bm25_rank: bm25_rank.get(id).copied(),
403                    vector_rank: vector_rank.get(id).copied(),
404                    bm25_score: bm25_scores.get(id).copied().unwrap_or_default(),
405                    vector_score: vector_scores.get(id).copied().unwrap_or_default(),
406                    rerank_score: None,
407                    routed_to: None,
408                },
409            };
410            Ok((hit, text))
411        })?;
412        hits.push(hit);
413        full_texts.push(text);
414    }
415    Ok((hits, full_texts))
416}
417
418/// Sigmoid for the rerank blend. f32 std doesn't expose one and the
419/// cross-encoder logits we get back from BGE-reranker-base are unbounded,
420/// so we squash to `[0, 1]` before blending with the bounded RRF score.
421fn sigmoid(x: f32) -> f32 {
422    1.0 / (1.0 + (-x).exp())
423}
424
425/// Cross-encoder reranking on the deep-tier candidate set. Sends the
426/// **full chunk text** (not the truncated display excerpt) so the reranker
427/// has the context it was trained on, then blends the squashed cross-encoder
428/// score with the existing RRF score (`RERANK_BLEND` weight on the
429/// reranker, `1 - RERANK_BLEND` on RRF). This avoids the previous failure
430/// mode where unbounded reranker logits completely overrode the fusion
431/// order — which on SciFact dropped deep-tier nDCG @10 from 0.706 to 0.643.
432fn rerank(db: &LexaDb, query: &str, hits: &mut [SearchHit], full_texts: &[String]) -> Result<()> {
433    let docs: Vec<String> = full_texts.to_vec();
434    let scores = {
435        let lock = db.reranker()?;
436        let mut guard = lock
437            .lock()
438            .map_err(|err| LexaError::Embedding(err.to_string()))?;
439        guard.rerank(query, &docs)?
440    };
441    for (idx, raw_score) in scores {
442        if let Some(hit) = hits.get_mut(idx) {
443            let rrf = hit.score;
444            let squashed = sigmoid(raw_score);
445            hit.score = RERANK_BLEND * squashed + (1.0 - RERANK_BLEND) * rrf;
446            hit.breakdown.rerank_score = Some(raw_score);
447        }
448    }
449    hits.sort_by(|left, right| {
450        right
451            .score
452            .partial_cmp(&left.score)
453            .unwrap_or(Ordering::Equal)
454    });
455    Ok(())
456}
457
458fn ranks(items: &[(i64, f32)]) -> HashMap<i64, usize> {
459    items
460        .iter()
461        .enumerate()
462        .map(|(idx, (id, _))| (*id, idx + 1))
463        .collect()
464}
465
466fn score_map(items: &[(i64, f32)]) -> HashMap<i64, f32> {
467    items.iter().copied().collect()
468}
469
470fn score_desc(left: &(i64, f32), right: &(i64, f32)) -> Ordering {
471    right.1.partial_cmp(&left.1).unwrap_or(Ordering::Equal)
472}
473
474/// Query-aware highlight extraction. Mirrors Exa's contents-API
475/// "highlights": pick the chunk's most query-relevant sentence span
476/// instead of returning a fixed-width truncation. On a typical SciFact
477/// chunk this drops the displayed excerpt from ~500 chars to ~80–150
478/// chars, which is what Exa's published ~10× LLM-token-reduction claim
479/// rests on.
480///
481/// Algorithm:
482/// 1. Tokenize the query with the same `crate::query::tokenize` used by
483///    BM25 (lowercases, drops short tokens, strips stopwords).
484/// 2. Split the chunk into sentence spans on `[.!?;\n]\s+`.
485/// 3. Score each sentence by *unique* query-token overlap (set
486///    intersection) so a sentence that contains the answer's noun gets
487///    rewarded once, not once per duplicate.
488/// 4. Pick the highest-scoring sentence; expand by ±1 sentence if the
489///    result is shorter than `HIGHLIGHT_TARGET_CHARS`.
490/// 5. If no sentence has any overlap, fall back to `excerpt`'s plain
491///    truncation.
492fn highlight(query: &str, text: &str) -> String {
493    let query_tokens: std::collections::HashSet<String> = tokenize(query).collect();
494    if query_tokens.is_empty() {
495        return excerpt(text);
496    }
497
498    let compact = text.split_whitespace().collect::<Vec<_>>().join(" ");
499    if compact.is_empty() {
500        return String::new();
501    }
502
503    // Sentence split on terminal punctuation. The pattern is conservative:
504    // require punctuation followed by whitespace, otherwise we'd split on
505    // every `.` in identifiers and floats.
506    let sentences = split_sentences(&compact);
507    if sentences.is_empty() {
508        return excerpt(&compact);
509    }
510
511    let scores: Vec<(usize, usize)> = sentences
512        .iter()
513        .enumerate()
514        .map(|(idx, sentence)| {
515            let tokens: std::collections::HashSet<String> = tokenize(sentence).collect();
516            let overlap = query_tokens.intersection(&tokens).count();
517            (idx, overlap)
518        })
519        .collect();
520
521    let best = scores.iter().max_by_key(|(_, score)| *score).copied();
522    let Some((best_idx, best_score)) = best else {
523        return excerpt(&compact);
524    };
525    if best_score == 0 {
526        // No sentence shares any non-stopword token with the query — fall
527        // back to plain truncation rather than emit a misleading highlight.
528        return excerpt(&compact);
529    }
530
531    // Expand the window with neighbours until we hit the soft target or
532    // exhaust the chunk.
533    let mut start = best_idx;
534    let mut end = best_idx;
535    let mut span_len = sentences[best_idx].len();
536    while span_len < HIGHLIGHT_TARGET_CHARS {
537        let grew = if start > 0
538            && (end + 1 == sentences.len() || start.abs_diff(0) <= end + 1 - best_idx)
539        {
540            start -= 1;
541            span_len += sentences[start].len() + 1;
542            true
543        } else if end + 1 < sentences.len() {
544            end += 1;
545            span_len += sentences[end].len() + 1;
546            true
547        } else {
548            false
549        };
550        if !grew {
551            break;
552        }
553    }
554
555    let span: String = sentences[start..=end].join(" ");
556    // Cap at 1.5× the target so a long-running sentence doesn't blow the
557    // budget on its own.
558    let cap = HIGHLIGHT_TARGET_CHARS * 3 / 2;
559    if span.len() <= cap {
560        span
561    } else {
562        let mut cut = cap;
563        while cut > 0 && !span.is_char_boundary(cut) {
564            cut -= 1;
565        }
566        format!("{}...", &span[..cut])
567    }
568}
569
570fn split_sentences(text: &str) -> Vec<&str> {
571    let bytes = text.as_bytes();
572    let mut starts = vec![0];
573    let mut i = 0;
574    while i < bytes.len() {
575        let b = bytes[i];
576        if matches!(b, b'.' | b'!' | b'?' | b';' | b'\n')
577            && i + 1 < bytes.len()
578            && (bytes[i + 1] == b' ' || bytes[i + 1] == b'\n' || bytes[i + 1] == b'\t')
579        {
580            // Move start to the character *after* the whitespace.
581            let mut j = i + 1;
582            while j < bytes.len() && (bytes[j] == b' ' || bytes[j] == b'\n' || bytes[j] == b'\t') {
583                j += 1;
584            }
585            if j < bytes.len() && text.is_char_boundary(j) {
586                starts.push(j);
587            }
588            i = j;
589            continue;
590        }
591        i += 1;
592    }
593    starts.push(text.len());
594    starts
595        .windows(2)
596        .map(|w| text[w[0]..w[1]].trim())
597        .filter(|s| !s.is_empty())
598        .collect()
599}
600
601/// Compact whitespace and truncate at a UTF-8 char boundary. Used as a
602/// fallback when `highlight` can't find any query overlap and we still
603/// want *something* to show the user.
604fn excerpt(text: &str) -> String {
605    let compact = text.split_whitespace().collect::<Vec<_>>().join(" ");
606    if compact.len() <= EXCERPT_MAX_CHARS {
607        return compact;
608    }
609    let mut end = EXCERPT_MAX_CHARS;
610    while end > 0 && !compact.is_char_boundary(end) {
611        end -= 1;
612    }
613    format!("{}...", &compact[..end])
614}
615
616#[cfg(test)]
617mod tests {
618    use super::*;
619
620    #[test]
621    fn rrf_boosts_overlap() {
622        let bm25 = vec![(1, 1.0), (2, 0.8)];
623        let vector = vec![(3, 1.0), (1, 0.7)];
624        let fused = fuse(&bm25, &vector);
625        assert_eq!(fused[0].0, 1);
626    }
627
628    #[test]
629    fn classify_routes_single_identifier_to_instant() {
630        assert_eq!(classify_query("vec_quantize_binary"), SearchTier::Instant);
631        assert_eq!(classify_query("LexaDb::open"), SearchTier::Instant);
632        assert_eq!(classify_query("Embedder::embed_query"), SearchTier::Instant);
633    }
634
635    #[test]
636    fn classify_keeps_natural_language_with_identifiers_on_fast() {
637        // A natural-language query containing a CamelCase term still needs
638        // the dense retriever; Instant would lose semantic recall.
639        assert_eq!(
640            classify_query("matryoshka_truncate helper that re-normalizes"),
641            SearchTier::Fast
642        );
643        assert_eq!(
644            classify_query("the BGE cross encoder reranker"),
645            SearchTier::Fast
646        );
647    }
648
649    #[test]
650    fn classify_routes_explicit_deep_prefix() {
651        assert_eq!(
652            classify_query("[deep] explain the rerank pipeline"),
653            SearchTier::Deep
654        );
655    }
656
657    #[test]
658    fn classify_routes_long_questions_to_deep() {
659        assert_eq!(
660            classify_query("how does the reranker score truncated excerpts in deep tier?"),
661            SearchTier::Deep
662        );
663    }
664
665    #[test]
666    fn classify_defaults_to_fast() {
667        assert_eq!(
668            classify_query("hybrid lexical dense retrieval"),
669            SearchTier::Fast
670        );
671        assert_eq!(
672            classify_query("binary quantized vector search"),
673            SearchTier::Fast
674        );
675    }
676
677    #[test]
678    fn highlight_picks_query_relevant_sentence() {
679        // Filler so long that even with neighbour expansion the highlight
680        // can't pull it all in.
681        let filler: String = "alpha beta gamma delta. ".repeat(20);
682        let text = format!(
683            "{filler}\
684             The reranker scores candidates by cross encoder logits. \
685             {filler}"
686        );
687        let span = highlight("reranker cross encoder logits", &text);
688        assert!(span.contains("reranker"));
689        assert!(span.contains("cross encoder"));
690        // The highlight stays bounded — must not include the entire filler.
691        assert!(span.len() <= HIGHLIGHT_TARGET_CHARS * 3 / 2 + 4);
692    }
693
694    #[test]
695    fn highlight_falls_back_when_no_overlap() {
696        let text = "Some prose without any of the query's words.";
697        let span = highlight("matryoshka quantization", text);
698        // Falls back to plain truncation; should equal `excerpt(text)`.
699        assert_eq!(span, excerpt(text));
700    }
701
702    #[test]
703    fn highlight_caps_at_soft_target() {
704        // Build a 4 kb text with one tiny target sentence in the middle so
705        // the expansion logic can't possibly stretch past the cap.
706        let prefix: String = "ipsum dolor sit amet. ".repeat(50);
707        let suffix: String = "vivamus sed lacus. ".repeat(50);
708        let text = format!("{}TARGET token here. {}", prefix, suffix);
709        let span = highlight("target token", &text);
710        assert!(span.len() <= HIGHLIGHT_TARGET_CHARS * 3 / 2 + 4 /* "..." */);
711    }
712
713    #[test]
714    fn hash_fallback_can_score() {
715        let query = crate::embed::hash_embedding("config validation");
716        let doc = crate::embed::hash_embedding("configuration validation function");
717        assert!(crate::embed::cosine(&query, &doc) > -1.0);
718    }
719}