difflore_core/context/retrieval/
mod.rs

1mod past_verdicts;
2mod query_embed;
3mod rule_bodies;
4mod rules;
5mod scoring;
6
7pub use past_verdicts::{
8    PastVerdictRecaller, merge_past_verdicts, retrieve_past_verdicts,
9    retrieve_past_verdicts_by_text, retrieve_past_verdicts_by_text_with_team,
10    retrieve_past_verdicts_with_team,
11};
12pub use rule_bodies::{RenderedRuleBody, RenderedRuleExample, render_full_rule_bodies};
13pub use rules::{
14    RetrievalOptions, apply_explicit_recall_threshold, apply_intent_alignment_gate, retrieve_rules,
15    retrieve_rules_with_confidence,
16};
17pub use scoring::{RuleKind, effective_confidence, infer_rule_kind};
18
19#[derive(Debug, Clone)]
20pub struct ScoredRuleChunk {
21    pub skill_id: String,
22    pub content: String,
23    pub score: f64,
24    /// Confidence from the skill record (0.0-1.0). Used for display and ranking.
25    pub confidence: f64,
26}
27
28fn compare_scored_rule_chunks(a: &ScoredRuleChunk, b: &ScoredRuleChunk) -> std::cmp::Ordering {
29    b.score
30        .total_cmp(&a.score)
31        .then_with(|| a.skill_id.cmp(&b.skill_id))
32}
33
34/// Merge multiple groups of scored rule chunks into one ranked list.
35///
36/// De-dupes by `skill_id`. On collision the higher-scoring copy wins.
37/// Sorts descending by `score` and truncates to `limit`. Pure / sync —
38/// the orchestrator, the MCP tool helpers, and the CLI search command
39/// share this canonical implementation; runtime callers should pass one
40/// current repo/project scope, not a cross-project set.
41pub fn merge_scored_rule_chunks(
42    groups: impl IntoIterator<Item = Vec<ScoredRuleChunk>>,
43    limit: usize,
44) -> Vec<ScoredRuleChunk> {
45    let mut by_skill_id: std::collections::HashMap<String, ScoredRuleChunk> =
46        std::collections::HashMap::new();
47    for group in groups {
48        for chunk in group {
49            match by_skill_id.get(&chunk.skill_id) {
50                Some(existing) if existing.score >= chunk.score => {}
51                _ => {
52                    by_skill_id.insert(chunk.skill_id.clone(), chunk);
53                }
54            }
55        }
56    }
57    let mut merged: Vec<_> = by_skill_id.into_values().collect();
58    merged.sort_by(compare_scored_rule_chunks);
59    merged.truncate(limit);
60    merged
61}
62
63fn unique_repo_scopes(repo_scopes: &[String]) -> Vec<String> {
64    let mut unique = Vec::new();
65    for scope in repo_scopes {
66        let scope = scope.trim().to_ascii_lowercase();
67        if scope.is_empty() {
68            continue;
69        }
70        if !unique.iter().any(|existing| existing == &scope) {
71            unique.push(scope);
72        }
73    }
74    unique
75}
76
77fn search_filter(
78    target_file: Option<&str>,
79    repo_scope: Option<&str>,
80) -> crate::context::index_db::QueryFilter {
81    crate::context::index_db::QueryFilter {
82        language: target_file.and_then(detect_language_from_path),
83        repo_scope: repo_scope.map(String::from),
84    }
85}
86
87fn rule_title(content: &str, fallback: &str) -> String {
88    content
89        .lines()
90        .find_map(|line| line.strip_prefix("Rule Name:").map(|s| s.trim().to_owned()))
91        .filter(|t| !t.is_empty())
92        .unwrap_or_else(|| fallback.to_owned())
93}
94
95fn lexical_terms(query: &str) -> Vec<String> {
96    const STOP_WORDS: &[&str] = &[
97        "about", "after", "again", "against", "all", "and", "any", "are", "around", "because",
98        "been", "before", "being", "between", "but", "can", "cannot", "could", "does", "doing",
99        "done", "each", "for", "from", "had", "has", "have", "how", "into", "its", "more", "must",
100        "our", "out", "over", "rule", "rules", "should", "than", "that", "the", "their", "then",
101        "there", "these", "this", "those", "through", "use", "using", "was", "were", "what",
102        "when", "where", "which", "while", "with", "without", "would", "you", "your",
103    ];
104
105    let mut terms = Vec::new();
106    for term in query
107        .split(|ch: char| !ch.is_ascii_alphanumeric())
108        .map(str::trim)
109        .filter(|term| term.len() >= 3)
110    {
111        let term = term.to_ascii_lowercase();
112        if STOP_WORDS.contains(&term.as_str()) || terms.iter().any(|existing| existing == &term) {
113            continue;
114        }
115        terms.push(term);
116    }
117    terms
118}
119
120fn normalized_query_key(query: &str) -> String {
121    query
122        .split(|ch: char| !ch.is_ascii_alphanumeric())
123        .map(str::trim)
124        .filter(|term| !term.is_empty())
125        .map(str::to_ascii_lowercase)
126        .collect::<Vec<_>>()
127        .join(" ")
128}
129
130fn retrieval_query_variants<'a>(query: &'a str, lexical_query: &'a str) -> Vec<&'a str> {
131    let query = query.trim();
132    let lexical_query = lexical_query.trim();
133    let mut variants = Vec::with_capacity(2);
134    if !query.is_empty() {
135        variants.push(query);
136    }
137
138    let query_key = normalized_query_key(query);
139    let lexical_key = normalized_query_key(lexical_query);
140    if !lexical_query.is_empty() && !lexical_key.is_empty() && lexical_key != query_key {
141        variants.push(lexical_query);
142    }
143
144    variants
145}
146
147fn lexical_boost(chunk: &ScoredRuleChunk, terms: &[String]) -> f64 {
148    if terms.is_empty() {
149        return 0.0;
150    }
151
152    let title = rule_title(&chunk.content, &chunk.skill_id).to_ascii_lowercase();
153    let content = chunk.content.to_ascii_lowercase();
154    let mut title_hits = 0usize;
155    let mut content_hits = 0usize;
156
157    for term in terms {
158        if title.contains(term) {
159            title_hits += 1;
160        }
161        if content.contains(term) {
162            content_hits += 1;
163        }
164    }
165
166    let total = terms.len() as f64;
167    let title_ratio = title_hits as f64 / total;
168    let content_ratio = content_hits as f64 / total;
169    let mut boost = 0.24f64.mul_add(title_ratio, 0.08 * content_ratio);
170    if title_hits >= 2 {
171        boost += 0.12;
172    }
173    if title_hits >= terms.len().min(3) {
174        boost += 0.08;
175    }
176    boost.min(0.45)
177}
178
179pub fn rerank_scored_rule_chunks_by_lexical_query(
180    mut chunks: Vec<ScoredRuleChunk>,
181    lexical_query: &str,
182    limit: usize,
183) -> Vec<ScoredRuleChunk> {
184    let terms = lexical_terms(lexical_query);
185    for chunk in &mut chunks {
186        chunk.score += lexical_boost(chunk, &terms);
187    }
188
189    chunks.sort_by(compare_scored_rule_chunks);
190    chunks.truncate(limit);
191    chunks
192}
193
194/// Options for the CLI/MCP search-style retrieval helper. The helper fans
195/// out across repo scopes, applies the shared confidence + age decay inputs
196/// at the `retrieve_rules_with_confidence` layer, merges duplicates, then
197/// applies the same lexical re-rank used by the MCP search tools.
198pub struct RuleSearchRetrievalOptions<'a> {
199    pub query: &'a str,
200    pub lexical_query: &'a str,
201    pub top_k: usize,
202    pub confidence_map: Option<&'a std::collections::HashMap<String, f64>>,
203    pub age_days_map: Option<&'a std::collections::HashMap<String, f32>>,
204    pub target_file: Option<&'a str>,
205    pub repo_scopes: &'a [String],
206    pub ann_enabled: bool,
207    pub embedding_timeout: Option<std::time::Duration>,
208    /// Retry a timed-out query embed once with a longer cold-absorbing budget
209    /// (see `embed_query_aligned_to_index`). Set by the human-waiting CLI path
210    /// so a cold first recall keeps semantic ranking; left `false` by the
211    /// latency-critical hook/MCP callers that must fast-degrade to lexical.
212    pub cold_start_retry: bool,
213    pub adaptive_prune: bool,
214}
215
216/// One canonical multi-scope rule fan-out.
217///
218/// The CLI/MCP `search` path (`retrieve_rules_for_search`) and the
219/// orchestrator's `prepare`/`debug` path used to implement the *same*
220/// algorithm twice: dedup+cap repo scopes, clamp `top_k`, derive the
221/// per-scope SQL filter (language from the target file), fan out across
222/// scope × query-variant, merge by best-score dedup, then lexical
223/// re-rank and truncate. The only real divergence between the two was
224/// defaults — search expands an intent query-variant lane and never
225/// constrains to an eligible-skill set, whereas the orchestrator passes
226/// a single query plus an `eligible_skill_ids` allow-list. Those are now
227/// just different inputs to this one function.
228pub(crate) struct RuleFanoutQuery<'a> {
229    /// Primary query string (path + intent, or just intent).
230    pub query: &'a str,
231    /// Lexical lane used for the final re-rank and for deciding whether
232    /// to add a second (intent-only) retrieval variant. Pass the same
233    /// string as `query` to collapse to a single-variant, single-lane
234    /// fan-out (the orchestrator's behaviour).
235    pub lexical_query: &'a str,
236    pub top_k: usize,
237    pub confidence_map: Option<&'a std::collections::HashMap<String, f64>>,
238    pub eligible_skill_ids: Option<&'a std::collections::HashSet<String>>,
239    pub age_days_map: Option<&'a std::collections::HashMap<String, f32>>,
240    pub target_file: Option<&'a str>,
241    pub repo_scopes: &'a [String],
242    pub ann_enabled: bool,
243    pub embedding_timeout: Option<std::time::Duration>,
244    /// See [`RuleSearchRetrievalOptions::cold_start_retry`]. Forwarded to the
245    /// per-scope/per-variant `RetrievalOptions` so every concurrent embed in
246    /// the fan-out honours the same cold-start policy.
247    pub cold_start_retry: bool,
248    pub adaptive_prune: bool,
249}
250
251pub(crate) async fn retrieve_rules_fanout(
252    index_pool: &crate::SqlitePool,
253    query: RuleFanoutQuery<'_>,
254) -> Result<Vec<ScoredRuleChunk>, crate::CoreError> {
255    let RuleFanoutQuery {
256        query,
257        lexical_query,
258        top_k,
259        confidence_map,
260        eligible_skill_ids,
261        age_days_map,
262        target_file,
263        repo_scopes,
264        ann_enabled,
265        embedding_timeout,
266        cold_start_retry,
267        adaptive_prune,
268    } = query;
269
270    if top_k == 0 {
271        return Ok(Vec::new());
272    }
273    let top_k = top_k.min(50);
274    let repo_scopes: Vec<String> = unique_repo_scopes(repo_scopes)
275        .into_iter()
276        .take(4)
277        .collect();
278    let candidate_limit = top_k.saturating_mul(5).clamp(top_k, 50);
279    // A `None` filter retrieves the whole per-project index. That is safe
280    // BECAUSE the index is the scope boundary: it only holds rules copied in
281    // for the current project's scopes (see `filter_rules_for_repo_scopes`,
282    // which now copies nothing when there is no scope). When the caller did
283    // detect scopes, narrow further per scope.
284    let scope_filters: Vec<Option<String>> = if repo_scopes.is_empty() {
285        vec![None]
286    } else {
287        repo_scopes.into_iter().map(Some).collect()
288    };
289
290    let query_variants = retrieval_query_variants(query, lexical_query);
291    let mut retrievals = Vec::with_capacity(scope_filters.len() * query_variants.len());
292    for repo_scope in &scope_filters {
293        for query_variant in &query_variants {
294            let filter = search_filter(target_file, repo_scope.as_deref());
295            retrievals.push(async move {
296                retrieve_rules_with_confidence(
297                    index_pool,
298                    query_variant,
299                    RetrievalOptions {
300                        top_k: Some(candidate_limit),
301                        confidence_map,
302                        eligible_skill_ids,
303                        age_days_map,
304                        target_file,
305                        filter: Some(&filter),
306                        ann_enabled,
307                        embedding_timeout,
308                        cold_start_retry,
309                        adaptive_prune,
310                        ..Default::default()
311                    },
312                )
313                .await
314            });
315        }
316    }
317    let mut groups = Vec::with_capacity(retrievals.len());
318    for group in futures_util::future::join_all(retrievals).await {
319        groups.push(group?);
320    }
321
322    let merged = merge_scored_rule_chunks(groups, candidate_limit);
323    Ok(rerank_scored_rule_chunks_by_lexical_query(
324        merged,
325        lexical_query,
326        top_k,
327    ))
328}
329
330pub async fn retrieve_rules_for_search(
331    index_pool: &crate::SqlitePool,
332    options: RuleSearchRetrievalOptions<'_>,
333) -> Result<Vec<ScoredRuleChunk>, crate::CoreError> {
334    let RuleSearchRetrievalOptions {
335        query,
336        lexical_query,
337        top_k,
338        confidence_map,
339        age_days_map,
340        target_file,
341        repo_scopes,
342        ann_enabled,
343        embedding_timeout,
344        cold_start_retry,
345        adaptive_prune,
346    } = options;
347
348    retrieve_rules_fanout(
349        index_pool,
350        RuleFanoutQuery {
351            query,
352            lexical_query,
353            top_k,
354            confidence_map,
355            // The search path intentionally never constrains to an
356            // engine-eligible allow-list — callers filter afterwards.
357            eligible_skill_ids: None,
358            age_days_map,
359            target_file,
360            repo_scopes,
361            ann_enabled,
362            embedding_timeout,
363            cold_start_retry,
364            adaptive_prune,
365        },
366    )
367    .await
368}
369
370/// Reciprocal Rank Fusion constant. Standard value from the original
371/// Cormack-Clarke-Buettcher paper; 60 is a robust default that makes
372/// lower-ranked but co-occurring results surface reliably.
373const RRF_K: f64 = 60.0;
374
375/// Map a file path (or bare filename) to a canonical language tag. Matches
376/// the spelling used in skill tags so `QueryFilter.language` can round-trip
377/// cleanly between the MCP caller and the indexed chunk metadata. Unknown
378/// extensions return `None` — callers pass that through as "no language
379/// filter" rather than guessing at a language that'd drop real hits.
380///
381/// Single canonical extension-to-language map shared by orchestrator and
382/// retrieval call sites.
383pub fn detect_language_from_path(path: &str) -> Option<String> {
384    let lower = path.to_ascii_lowercase();
385    // Match on the last dotted suffix so compound names like `foo.d.ts`
386    // collapse to `ts`.
387    let ext = lower.rsplit('.').next()?;
388    Some(
389        match ext {
390            "rs" => "rust",
391            "ts" | "tsx" => "typescript",
392            "js" | "jsx" | "mjs" | "cjs" => "javascript",
393            "py" | "pyi" => "python",
394            "go" => "go",
395            "java" => "java",
396            "kt" | "kts" => "kotlin",
397            "swift" => "swift",
398            "rb" => "ruby",
399            "php" => "php",
400            "cpp" | "cc" | "cxx" | "hpp" | "hh" => "cpp",
401            "c" | "h" => "c",
402            "cs" => "csharp",
403            _ => return None,
404        }
405        .to_owned(),
406    )
407}
408
409/// Count concreteness signals in a rule's content. Used to boost
410/// concrete rules over slogan rules at ranking time.
411///
412/// Looks for backticked tokens (`useQuery`), path-like fragments
413/// (`packages/router-core/`), and version literals (`v1.2`,
414/// `Node 20.11`). Each kind capped at 3 hits so a giant code-fence rule
415/// doesn't run away. Total saturated at 6 in the caller.
416fn concreteness_score(content: &str) -> usize {
417    let mut score = 0usize;
418    // Backticks: the simplest "I name a specific thing" signal.
419    let backticks = content.matches('`').count() / 2; // each token wraps in two backticks
420    score += backticks.min(3);
421    // Path-like fragments: at least one slash-separated word with a dot
422    // extension (foo/bar.ts), or a dotted package name (`a.b.c`).
423    let path_like = content
424        .split_whitespace()
425        .filter(|w| {
426            w.contains('/')
427                && w.split('/')
428                    .next_back()
429                    .is_some_and(|tail| tail.contains('.') && tail.len() > 3)
430        })
431        .count();
432    score += path_like.min(3);
433    // Version-ish: any `vN.N` or `N.N.N` substring.
434    let version_like = content
435        .split_whitespace()
436        .filter(|w| {
437            let trimmed = w.trim_matches(|c: char| !c.is_ascii_alphanumeric() && c != '.');
438            trimmed.starts_with('v')
439                && trimmed.len() > 2
440                && trimmed[1..]
441                    .chars()
442                    .next()
443                    .is_some_and(|c| c.is_ascii_digit())
444                || trimmed
445                    .split('.')
446                    .filter(|s| s.parse::<u32>().is_ok())
447                    .count()
448                    >= 2
449        })
450        .count();
451    score += version_like.min(2);
452    score
453}
454
455/// Absolute floor for `ScoredRuleChunk.score`. Anything at or below
456/// this is RRF rounding noise — usually a chunk that was admitted via
457/// the file-pattern cascade safety net but has zero lexical or
458/// semantic overlap with the query. Keeping these in the result set
459/// burns agent tokens for negative value.
460const MIN_RELEVANCE_SCORE: f64 = 0.001;
461
462/// Adaptive top-K injection threshold. When the top-ranked rule's score
463/// is below this, return zero rules instead of padding to k. Weak rules
464/// on simple tasks are more likely to distract than help.
465///
466/// Threshold value: ~5× `MIN_RELEVANCE_SCORE`. RRF with k=60 produces a
467/// top-1 score around 0.008-0.015 for genuinely strong matches and
468/// 0.001-0.003 for cascade-only tail. The 0.005 cut sits in the gap.
469const ADAPTIVE_INJECT_THRESHOLD: f64 = 0.005;
470
471/// Relative floor — drop tail rules whose score is less than this
472/// fraction of the top-ranked rule's score. Catches the "everything
473/// scored 0.02" pathological flat distribution we saw in claude
474/// sessions, where 10 rules within 5% of each other meant the agent
475/// couldn't tell signal from noise. 0.35 keeps any rule worth at
476/// least ~1/3 of the leader and drops the rest.
477const RELATIVE_RELEVANCE_FLOOR: f64 = 0.35;
478
479/// Absolute relevance floor for the EXPLICIT recall surfaces
480/// (`search_rules` tool + CLI `recall`), applied by
481/// [`rules::apply_explicit_recall_threshold`] on the FINAL re-ranked
482/// score. When even the top hit is below this, the whole result set is
483/// treated as noise and explicit recall returns nothing rather than weak
484/// filler rules.
485///
486/// Tuned conservatively. After the lexical-intent re-rank a genuinely
487/// relevant top hit is boosted well into the 0.1+ range, and
488/// exact-title-strict / starter hits sit at `2.0 + conf` — all an order
489/// of magnitude above this floor, so strong matches are never suppressed.
490/// A cascade-only / no-intent-overlap top hit (the Codecov-in-a-wrong-file
491/// case the audit flagged) gets no lexical boost and stays in the raw
492/// fused RRF band (~0.001–0.005), falling below the floor. The value sits
493/// in the gap: 2× the hook's `ADAPTIVE_INJECT_THRESHOLD` (0.005) and below
494/// the boosted strong-match range.
495const EXPLICIT_RECALL_MIN_RELEVANCE: f64 = 0.01;
496
497/// Relative tail floor for the explicit recall gate — drop results below
498/// this fraction of the (surviving) top hit. Deliberately looser than the
499/// in-retrieval `RELATIVE_RELEVANCE_FLOOR` (0.35): an explicit user query
500/// should keep more of a genuine result set and only shed the clearly
501/// irrelevant tail (e.g. a rule worth <1/5 of the leader), never trim a
502/// borderline-but-related supporting rule.
503const EXPLICIT_RECALL_RELATIVE_FLOOR: f64 = 0.20;
504
505/// Minimum count of distinct salient (non-stop-word) query terms that a
506/// candidate rule's *directive* (its `Rule Name:` title + leading body)
507/// must share with the query intent to be considered intent-aligned.
508///
509/// This is the core of the intent-alignment gate (the leak-free A/B
510/// diagnosis): hybrid retrieval + the relative floors admit rules that are
511/// TOPICALLY ADJACENT but address a different ACTION/SUBJECT — e.g. a
512/// "return false vs panic" directive recalls "panic-message wording" and
513/// "test-timing" rules because they share the topical anchor token
514/// (`panic`) and the same file area. A LONE shared anchor token is exactly
515/// that noise, so the bar is **2**: a genuinely on-subject directive shares
516/// the verb/object pair (`return` + `false`, or `validate` + `input`), not
517/// just the one topical word. Set to 2 so a single topical-anchor overlap
518/// is insufficient while the on-subject rule (which shares the action plus
519/// its object) clears it. Self-recall — where the query *is* the rule's own
520/// intent text — shares nearly every directive term, so it clears this by a
521/// wide margin and is never regressed.
522///
523/// Iter-2 (2026-06-02): the count alone proved too weak — two GENERIC anchors
524/// (e.g. `panic` + `input`) could clear it without any real subject overlap.
525/// The absolute path now ALSO requires the shared set to contain at least
526/// [`MIN_DISTINCTIVE_SHARED_TERMS`] non-generic term AND to cover
527/// [`MIN_RULE_DIRECTIVE_COVERAGE_RATIO`] of the rule's own directive, so this
528/// count is a necessary-but-not-sufficient gate, not the whole test.
529const MIN_INTENT_DIRECTIVE_OVERLAP: usize = 2;
530
531/// Alternate (ratio) path to intent alignment: a candidate also passes when
532/// the SHARED (distinctive) terms cover at least this fraction of the query's
533/// salient terms, even if the absolute count is below
534/// [`MIN_INTENT_DIRECTIVE_OVERLAP`]. This is what keeps SHORT, sharp queries
535/// — and the realistic per-rule fan-out of a two-word intent ("batching and
536/// retries", where each seeded rule matches one of the two terms) — from
537/// over-pruning: a 2-salient-term intent whose single shared term is half the
538/// query is still a real subject match, not a lone-anchor coincidence.
539///
540/// Kept at **0.5** (a half-coverage match). The iter-2 precision gain comes
541/// from the DISTINCTIVENESS requirement ([`MIN_DISTINCTIVE_SHARED_TERMS`])
542/// that now guards BOTH this path and the absolute path, not from raising
543/// this fraction — raising it to 0.6 was tried and regressed the realistic
544/// "one rule matches each half of a two-word intent" fan-out. A lone GENERIC
545/// anchor that is half of a 2-term query no longer slips through here, because
546/// the distinctiveness gate rejects it before this ratio is ever consulted.
547const MIN_INTENT_DIRECTIVE_OVERLAP_RATIO: f64 = 0.5;
548
549/// Minimum number of shared terms that are DISTINCTIVE — i.e. not in the
550/// generic topical/code-anchor set ([`is_generic_anchor`]) — required for a
551/// concern match. This is the iter-2 precision lever and guards BOTH the
552/// absolute-overlap and ratio paths.
553///
554/// The leak-free A/B diagnosis is precise: the extra false positives came
555/// from rules sharing a generic TOPICAL ANCHOR (`panic`, `test`, `error`)
556/// plus incidental filler, not a genuine subject/action overlap. Counting raw
557/// term overlap can't tell "shares the subject" from "name-drops the same
558/// topic word": an all-anchor pair scores >= 2 just like a real verb/object
559/// pair. Requiring at least one DISTINCTIVE shared term forces the overlap to
560/// include a specific subject/action token (`false`, `invalid`, `hijack`,
561/// `memchr`, `yaml`), not merely the topic everyone in that file area
562/// mentions. A match made ONLY of generic anchors fails the gate regardless of
563/// count or ratio — which is exactly the topically-adjacent, wrong-subject
564/// case the A/B blamed for the FP penalty. Chosen over a rule-side coverage
565/// ratio because distinctiveness keys off the SHARED set, so it is robust to
566/// directive phrasing (body-phrased directives, bare-label titles) that a
567/// fixed coverage floor mis-handles.
568const MIN_DISTINCTIVE_SHARED_TERMS: usize = 1;
569
570/// Score ceiling above which a candidate is EXEMPT from the intent-alignment
571/// gate entirely — it is kept regardless of directive overlap. The explicit
572/// paths inject very high-value, already-intent-validated signals after
573/// fusion: exact-title-strict matches (`2.0 + conf`), the cross-repo starter
574/// set, and the lexical-intent re-rank boost. A candidate scoring at or above
575/// this ceiling earned its place through one of those strong signals, so the
576/// alignment gate must never second-guess it (that would risk the
577/// strong-match / self-recall regression the goal forbids). Sits an order of
578/// magnitude above the boosted strong-match RRF band (~0.1–0.45 after the
579/// lexical re-rank) but below the exact-title-strict `2.0 + conf` floor, so
580/// it exempts the unambiguous winners while still scrutinising the
581/// topically-adjacent middle band that the diagnosis flagged.
582const INTENT_ALIGNMENT_EXEMPT_SCORE: f64 = 0.6;
583
584#[cfg(test)]
585mod tests {
586    use super::rules::pattern_allows;
587    use super::*;
588    use crate::cloud::api_types::RecallPastVerdictsRequest;
589    use crate::context::index_db::{QueryFilter, open_pool_at, upsert_rule_chunks};
590    use crate::context::rule_source::RuleDocument;
591    use crate::context::types::{PastVerdict, PastVerdictScope};
592    use crate::errors::CoreError;
593    use crate::review_trajectory::{TrajectoryBuilder, TrajectoryStep};
594    use async_trait::async_trait;
595    use tempfile::TempDir;
596
597    // -- Cascade pattern_allows tests (Iter-9) --
598
599    #[test]
600    fn pattern_allows_table() {
601        // Each row: (pattern_json, path, expected). Covers null/empty universal
602        // pass-through, single glob, directory scope, Windows path
603        // normalisation, and malformed-JSON over-recall.
604        let cases: &[(Option<&str>, &str, bool)] = &[
605            (None, "tokio/src/io/uring.rs", true),
606            (Some(""), "tokio/src/io/uring.rs", true),
607            (Some("[]"), "tokio/src/io/uring.rs", true),
608            (Some(r#"["**/*.rs"]"#), "tokio/src/io/uring.rs", true),
609            (Some(r#"["**/*.rs"]"#), ".github/workflows/ci.yml", false),
610            (
611                Some(r#"["tokio/src/io/**"]"#),
612                "tokio/src/io/uring.rs",
613                true,
614            ),
615            (
616                Some(r#"["tokio/src/io/**"]"#),
617                "tokio/src/runtime/mod.rs",
618                false,
619            ),
620            (
621                Some(r#"["tokio/src/io/**"]"#),
622                "tokio\\src\\io\\uring.rs",
623                true,
624            ),
625            (
626                Some(r#"["tokio/src/io/**"]"#),
627                "/tokio/src/io/uring.rs",
628                true,
629            ),
630            // Invalid JSON shouldn't silently drop a rule — better to over-recall
631            // than to lose signal on a parse error.
632            (Some("not-json"), "any/path.rs", true),
633            (Some("{}"), "any/path.rs", true),
634        ];
635        for (pat, path, expected) in cases {
636            assert_eq!(
637                pattern_allows(*pat, path),
638                *expected,
639                "pat={pat:?} path={path}"
640            );
641        }
642    }
643
644    // -- detect_language_from_path tests --
645
646    #[test]
647    fn detect_language_from_path_covers_common_extensions() {
648        assert_eq!(
649            detect_language_from_path("src/main.rs").as_deref(),
650            Some("rust")
651        );
652        assert_eq!(
653            detect_language_from_path("apps/web/index.tsx").as_deref(),
654            Some("typescript")
655        );
656        assert_eq!(
657            detect_language_from_path("scripts/build.py").as_deref(),
658            Some("python")
659        );
660        assert_eq!(
661            detect_language_from_path("api/handler.go").as_deref(),
662            Some("go")
663        );
664    }
665
666    #[test]
667    fn detect_language_from_path_returns_none_for_unknown_ext() {
668        assert!(detect_language_from_path("README.md").is_none());
669        assert!(detect_language_from_path("no_extension").is_none());
670    }
671
672    #[test]
673    fn shared_search_repo_scopes_are_case_insensitive() {
674        assert_eq!(
675            unique_repo_scopes(&[
676                "Difflore-Fixtures/Vite".to_owned(),
677                " ".to_owned(),
678                "difflore-fixtures/vite".to_owned(),
679                "ViteJS/Vite".to_owned(),
680            ]),
681            vec![
682                "difflore-fixtures/vite".to_owned(),
683                "vitejs/vite".to_owned()
684            ]
685        );
686    }
687
688    // -- Past verdict recall tests --
689
690    struct ErroringRecaller;
691
692    #[async_trait]
693    impl PastVerdictRecaller for ErroringRecaller {
694        async fn recall(
695            &self,
696            _req: RecallPastVerdictsRequest,
697        ) -> Result<Vec<PastVerdict>, CoreError> {
698            Err(CoreError::Internal("simulated failure".into()))
699        }
700    }
701
702    struct StaticRecaller(Vec<PastVerdict>);
703
704    #[async_trait]
705    impl PastVerdictRecaller for StaticRecaller {
706        async fn recall(
707            &self,
708            _req: RecallPastVerdictsRequest,
709        ) -> Result<Vec<PastVerdict>, CoreError> {
710            Ok(self.0.clone())
711        }
712    }
713
714    struct RecordingRecaller(tokio::sync::Mutex<Option<RecallPastVerdictsRequest>>);
715
716    #[async_trait]
717    impl PastVerdictRecaller for RecordingRecaller {
718        async fn recall(
719            &self,
720            req: RecallPastVerdictsRequest,
721        ) -> Result<Vec<PastVerdict>, CoreError> {
722            *self.0.lock().await = Some(req);
723            Ok(Vec::new())
724        }
725    }
726
727    fn verdict(id: &str, status: &str) -> PastVerdict {
728        PastVerdict {
729            extraction_id: id.to_owned(),
730            code_snippet: format!("snippet for {id}"),
731            issue_text: format!("issue for {id}"),
732            status: status.to_owned(),
733            reason: Some(format!("reason-{id}")),
734            similarity: 0.87,
735            created_at: "2026-04-10T00:00:00Z".to_owned(),
736            signature: None,
737            source_pr_number: None,
738            source_pr_title: None,
739            source_pr_url: None,
740        }
741    }
742
743    fn scored(id: &str, score: f64) -> ScoredRuleChunk {
744        ScoredRuleChunk {
745            skill_id: id.to_owned(),
746            content: format!("Rule ID: {id}\nRule Name: {id}\n\nbody"),
747            score,
748            confidence: 0.7,
749        }
750    }
751
752    fn embedding_blob(embedding: &[f32]) -> Vec<u8> {
753        embedding
754            .iter()
755            .flat_map(|value| value.to_le_bytes())
756            .collect()
757    }
758
759    #[test]
760    fn merge_scored_rule_chunks_tie_breaks_by_skill_id() {
761        let merged = merge_scored_rule_chunks(
762            vec![vec![scored("rule-b", 0.5)], vec![scored("rule-a", 0.5)]],
763            2,
764        );
765        let ids: Vec<_> = merged.iter().map(|r| r.skill_id.as_str()).collect();
766        assert_eq!(ids, vec!["rule-a", "rule-b"]);
767    }
768
769    #[test]
770    fn rerank_scored_rule_chunks_tie_breaks_by_skill_id() {
771        let ranked = rerank_scored_rule_chunks_by_lexical_query(
772            vec![scored("rule-b", 0.5), scored("rule-a", 0.5)],
773            "",
774            2,
775        );
776        let ids: Vec<_> = ranked.iter().map(|r| r.skill_id.as_str()).collect();
777        assert_eq!(ids, vec!["rule-a", "rule-b"]);
778    }
779
780    #[test]
781    fn retrieval_query_variants_adds_intent_lane_when_file_query_differs() {
782        assert_eq!(
783            retrieval_query_variants(
784                "src/context.go Bind handlers must check returned error",
785                "Bind handlers must check returned error",
786            ),
787            vec![
788                "src/context.go Bind handlers must check returned error",
789                "Bind handlers must check returned error",
790            ],
791        );
792        assert_eq!(
793            retrieval_query_variants("Bind handlers", "bind handlers"),
794            vec!["Bind handlers"],
795        );
796        assert_eq!(retrieval_query_variants("", "please"), vec!["please"]);
797    }
798
799    #[tokio::test]
800    async fn retrieve_rules_for_search_uses_intent_lane_to_escape_path_noise() {
801        let tmp = TempDir::new().unwrap();
802        let path = tmp.path().join("idx.db");
803        let pool = open_pool_at(&path).await.unwrap();
804        let repo = "gin-gonic/gin";
805        let mut rules = Vec::new();
806        for i in 0..8 {
807            let mut rule = rule_doc(
808                &format!("path-noise-{i}"),
809                "context go context go context go path-only convention",
810                Some("go"),
811                Some(repo),
812            );
813            rule.file_patterns = Some(r#"["**/*.go"]"#.to_owned());
814            rules.push(rule);
815        }
816        let mut signal = rule_doc(
817            "bind-error",
818            "Bind handlers must check returned error before continuing",
819            Some("go"),
820            Some(repo),
821        );
822        signal.file_patterns = Some(r#"["**/*.go"]"#.to_owned());
823        rules.push(signal);
824        upsert_rule_chunks(&pool, &rules).await.unwrap();
825
826        let hits = retrieve_rules_for_search(
827            &pool,
828            RuleSearchRetrievalOptions {
829                query: "src/context.go",
830                lexical_query: "Bind handlers must check returned error",
831                top_k: 1,
832                confidence_map: None,
833                age_days_map: None,
834                target_file: Some("src/context.go"),
835                repo_scopes: &[repo.to_owned()],
836                ann_enabled: false,
837                embedding_timeout: Some(std::time::Duration::from_millis(2500)),
838                cold_start_retry: false,
839                adaptive_prune: false,
840            },
841        )
842        .await
843        .unwrap();
844
845        assert_eq!(
846            hits.first().map(|hit| hit.skill_id.as_str()),
847            Some("bind-error")
848        );
849    }
850
851    #[tokio::test]
852    async fn retrieve_rules_for_search_without_repo_scopes_uses_project_index() {
853        let tmp = TempDir::new().unwrap();
854        let path = tmp.path().join("idx.db");
855        let pool = open_pool_at(&path).await.unwrap();
856        let rules = vec![rule_doc(
857            "signal",
858            "Avoid unwrap in request handlers; return structured errors",
859            Some("rust"),
860            Some("acme/widgets"),
861        )];
862        upsert_rule_chunks(&pool, &rules).await.unwrap();
863
864        let hits = retrieve_rules_for_search(
865            &pool,
866            RuleSearchRetrievalOptions {
867                query: "src/http/handler.rs Avoid unwrap in request handlers",
868                lexical_query: "Avoid unwrap in request handlers",
869                top_k: 1,
870                confidence_map: None,
871                age_days_map: None,
872                target_file: Some("src/http/handler.rs"),
873                repo_scopes: &[],
874                ann_enabled: false,
875                embedding_timeout: Some(std::time::Duration::from_millis(2500)),
876                cold_start_retry: false,
877                adaptive_prune: false,
878            },
879        )
880        .await
881        .unwrap();
882
883        assert_eq!(
884            hits.first().map(|hit| hit.skill_id.as_str()),
885            Some("signal")
886        );
887    }
888
889    #[test]
890    fn merge_past_verdicts_tie_breaks_by_extraction_id() {
891        let merged = merge_past_verdicts(
892            vec![
893                vec![verdict("verdict-b", "approved")],
894                vec![verdict("verdict-a", "approved")],
895            ],
896            2,
897        );
898        let ids: Vec<_> = merged.iter().map(|v| v.extraction_id.as_str()).collect();
899        assert_eq!(ids, vec!["verdict-a", "verdict-b"]);
900    }
901
902    #[tokio::test]
903    async fn test_retrieve_past_verdicts_returns_empty_on_error() {
904        let recaller = ErroringRecaller;
905        let emb = vec![0.1f32; 8];
906        let out = retrieve_past_verdicts(
907            &recaller,
908            &emb,
909            Some("repo-1"),
910            PastVerdictScope::Team,
911            5,
912            None,
913        )
914        .await;
915        assert!(
916            out.is_empty(),
917            "errors must be downgraded to an empty Vec, got {} items",
918            out.len()
919        );
920    }
921
922    #[tokio::test]
923    async fn test_retrieve_past_verdicts_forwards_rows_on_success() {
924        let recaller = StaticRecaller(vec![verdict("e1", "approved"), verdict("e2", "rejected")]);
925        let emb = vec![0.0f32; 4];
926        let out =
927            retrieve_past_verdicts(&recaller, &emb, None, PastVerdictScope::Personal, 3, None)
928                .await;
929        assert_eq!(out.len(), 2);
930        assert_eq!(out[0].extraction_id, "e1");
931        assert_eq!(out[1].status, "rejected");
932    }
933
934    #[tokio::test]
935    async fn text_past_verdict_recall_forwards_team_scope() {
936        let recaller = RecordingRecaller(tokio::sync::Mutex::new(None));
937
938        let _ = retrieve_past_verdicts_by_text_with_team(
939            &recaller,
940            "router cache invalidation",
941            Some("acme/widgets"),
942            PastVerdictScope::Team,
943            7,
944            Some("src/router.ts"),
945            Some("team-1"),
946        )
947        .await;
948
949        let req = recaller.0.lock().await.clone().expect("request captured");
950        assert_eq!(req.scope, "team");
951        assert_eq!(req.team_id.as_deref(), Some("team-1"));
952        assert_eq!(req.repo_id.as_deref(), Some("acme/widgets"));
953        assert_eq!(req.target_file.as_deref(), Some("src/router.ts"));
954        assert_eq!(req.k, 7);
955    }
956
957    #[tokio::test]
958    async fn embedding_past_verdict_recall_forwards_team_scope() {
959        let recaller = RecordingRecaller(tokio::sync::Mutex::new(None));
960        let embedding = vec![0.25, 0.5, 0.75];
961
962        let _ = retrieve_past_verdicts_with_team(
963            &recaller,
964            &embedding,
965            Some("acme/widgets"),
966            PastVerdictScope::Team,
967            4,
968            Some("src/router.ts"),
969            Some("team-1"),
970        )
971        .await;
972
973        let req = recaller.0.lock().await.clone().expect("request captured");
974        assert_eq!(req.scope, "team");
975        assert_eq!(req.team_id.as_deref(), Some("team-1"));
976        assert_eq!(req.repo_id.as_deref(), Some("acme/widgets"));
977        assert_eq!(req.target_file.as_deref(), Some("src/router.ts"));
978        assert_eq!(req.embedding, embedding);
979        assert_eq!(req.query_text, None);
980        assert_eq!(req.k, 4);
981    }
982
983    // -- Hybrid retrieval tests --
984
985    fn rule_doc(
986        id: &str,
987        content: &str,
988        language: Option<&str>,
989        repo_scope: Option<&str>,
990    ) -> RuleDocument {
991        RuleDocument {
992            skill_id: id.to_owned(),
993            title: id.to_owned(),
994            content: content.to_owned(),
995            confidence: 0.7,
996            file_patterns: None,
997            language: language.map(String::from),
998            repo_scope: repo_scope.map(String::from),
999        }
1000    }
1001
1002    #[tokio::test]
1003    async fn rrf_fusion_prefers_results_ranked_high_by_both() {
1004        // A chunk that matches BOTH the keyword query and the semantic
1005        // embedding should outrank a chunk that matches only one path.
1006        let tmp = TempDir::new().unwrap();
1007        let path = tmp.path().join("idx.db");
1008        let pool = open_pool_at(&path).await.unwrap();
1009
1010        let rules = vec![
1011            // Rule A — contains the query token AND lots of co-occurring
1012            // words that also skew the bag-of-words embedding.
1013            rule_doc(
1014                "A",
1015                "prefer structured_logging for observability when emitting structured_logging events",
1016                None,
1017                None,
1018            ),
1019            // Rule B — token-only hit.
1020            rule_doc(
1021                "B",
1022                "avoid structured_logging in tests; use a stub logger instead",
1023                None,
1024                None,
1025            ),
1026            // Rule C — no token hit, unrelated content.
1027            rule_doc(
1028                "C",
1029                "always write unit tests for every public api",
1030                None,
1031                None,
1032            ),
1033        ];
1034        upsert_rule_chunks(&pool, &rules).await.unwrap();
1035
1036        let mut tb = TrajectoryBuilder::new();
1037        let hits = retrieve_rules_with_confidence(
1038            &pool,
1039            "structured_logging observability",
1040            RetrievalOptions {
1041                top_k: Some(3),
1042                trajectory: Some(&mut tb),
1043                ..Default::default()
1044            },
1045        )
1046        .await
1047        .unwrap();
1048
1049        // A should rank first because it wins both the FTS path (token
1050        // appears twice) and the bag-of-words embedding (token has the
1051        // highest contribution to the summed vector).
1052        assert!(!hits.is_empty());
1053        assert_eq!(hits[0].skill_id, "A", "A should RRF-win over B and C");
1054
1055        // Verify the HybridFusion step was emitted.
1056        let has_fusion = tb
1057            .steps()
1058            .iter()
1059            .any(|s| matches!(s, TrajectoryStep::HybridFusion { .. }));
1060        assert!(has_fusion, "HybridFusion trajectory step must fire");
1061    }
1062
1063    #[tokio::test]
1064    async fn sha1_embedder_path_weights_fts_higher() {
1065        // With the default (offline) SHA1 embedder is_semantic() = false
1066        // → the RRF weights shift to (0.2 emb, 0.8 fts). A token-only
1067        // match (no embedding overlap) must still rank before a
1068        // semantic-adjacent-but-token-absent rule.
1069        let tmp = TempDir::new().unwrap();
1070        let path = tmp.path().join("idx.db");
1071        let pool = open_pool_at(&path).await.unwrap();
1072
1073        let rules = vec![
1074            // Exact rare-token hit, no overlap with the other rules.
1075            rule_doc(
1076                "keyword",
1077                "do not shadow with deprecated_zzz_api in request handlers",
1078                None,
1079                None,
1080            ),
1081            // Semantically-adjacent content but NO token overlap with the query.
1082            rule_doc(
1083                "semantic",
1084                "request handlers should use async primitives carefully",
1085                None,
1086                None,
1087            ),
1088        ];
1089        upsert_rule_chunks(&pool, &rules).await.unwrap();
1090
1091        let hits = retrieve_rules_with_confidence(
1092            &pool,
1093            "deprecated_zzz_api",
1094            RetrievalOptions {
1095                top_k: Some(2),
1096                ..Default::default()
1097            },
1098        )
1099        .await
1100        .unwrap();
1101
1102        assert!(!hits.is_empty());
1103        assert_eq!(
1104            hits[0].skill_id, "keyword",
1105            "under SHA1 embedder, FTS hit should win over a generic semantic neighbour"
1106        );
1107    }
1108
1109    #[tokio::test]
1110    async fn linear_scan_excludes_mismatched_embedding_dims() {
1111        let tmp = TempDir::new().unwrap();
1112        let path = tmp.path().join("idx.db");
1113        let pool = open_pool_at(&path).await.unwrap();
1114        let query = "dim_mismatch_probe";
1115        let query_emb = crate::context::embedding::embed_text(query);
1116        let stale_embedding = vec![query_emb[0], query_emb[1]];
1117        let stale_blob = embedding_blob(&stale_embedding);
1118
1119        sqlx::query(
1120            "INSERT INTO rule_chunks (id, skill_id, content, embedding, file_patterns, language, repo_scope)
1121             VALUES (?1, ?2, ?3, ?4, NULL, NULL, NULL)",
1122        )
1123        .bind("rule-stale")
1124        .bind("stale")
1125        .bind("unrelated content that should not match the query lexically")
1126        .bind(stale_blob)
1127        .execute(&pool)
1128        .await
1129        .unwrap();
1130
1131        let hits = retrieve_rules_with_confidence(
1132            &pool,
1133            query,
1134            RetrievalOptions {
1135                top_k: Some(5),
1136                ann_enabled: false,
1137                ..Default::default()
1138            },
1139        )
1140        .await
1141        .unwrap();
1142
1143        assert!(
1144            hits.is_empty(),
1145            "stale chunks from a different embedding dim must not enter linear cosine ranking"
1146        );
1147    }
1148
1149    #[tokio::test]
1150    async fn strict_cascade_does_not_fallback_to_foreign_file_patterns() {
1151        let tmp = TempDir::new().unwrap();
1152        let path = tmp.path().join("idx.db");
1153        let pool = open_pool_at(&path).await.unwrap();
1154        let mut foreign = rule_doc(
1155            "foreign",
1156            "python request handlers should avoid sync database calls",
1157            Some("python"),
1158            Some("acme/widgets"),
1159        );
1160        foreign.file_patterns = Some(r#"["**/*.py"]"#.to_owned());
1161        upsert_rule_chunks(&pool, &[foreign]).await.unwrap();
1162
1163        let filter = QueryFilter {
1164            language: None,
1165            repo_scope: Some("acme/widgets".to_owned()),
1166        };
1167        let hits = retrieve_rules_with_confidence(
1168            &pool,
1169            "request handlers database",
1170            RetrievalOptions {
1171                top_k: Some(5),
1172                target_file: Some("src/server.rs"),
1173                filter: Some(&filter),
1174                ann_enabled: false,
1175                ..Default::default()
1176            },
1177        )
1178        .await
1179        .unwrap();
1180
1181        assert!(
1182            hits.is_empty(),
1183            "explicit **/*.py rule must not be recalled for src/server.rs"
1184        );
1185    }
1186
1187    #[tokio::test]
1188    async fn strict_cascade_keeps_universal_rules_for_target_file() {
1189        let tmp = TempDir::new().unwrap();
1190        let path = tmp.path().join("idx.db");
1191        let pool = open_pool_at(&path).await.unwrap();
1192        upsert_rule_chunks(
1193            &pool,
1194            &[rule_doc(
1195                "universal",
1196                "request handlers should return structured errors",
1197                None,
1198                Some("acme/widgets"),
1199            )],
1200        )
1201        .await
1202        .unwrap();
1203
1204        let filter = QueryFilter {
1205            language: None,
1206            repo_scope: Some("acme/widgets".to_owned()),
1207        };
1208        let hits = retrieve_rules_with_confidence(
1209            &pool,
1210            "request handlers structured errors",
1211            RetrievalOptions {
1212                top_k: Some(5),
1213                target_file: Some("src/server.rs"),
1214                filter: Some(&filter),
1215                ann_enabled: false,
1216                ..Default::default()
1217            },
1218        )
1219        .await
1220        .unwrap();
1221
1222        assert_eq!(
1223            hits.first().map(|hit| hit.skill_id.as_str()),
1224            Some("universal")
1225        );
1226    }
1227
1228    #[tokio::test]
1229    async fn retrieve_emits_retrieval_filter_step_when_filter_active() {
1230        let tmp = TempDir::new().unwrap();
1231        let path = tmp.path().join("idx.db");
1232        let pool = open_pool_at(&path).await.unwrap();
1233
1234        let rules = vec![
1235            rule_doc("rust-1", "rust-specific rule content", Some("rust"), None),
1236            rule_doc("py-1", "python-specific rule content", Some("python"), None),
1237        ];
1238        upsert_rule_chunks(&pool, &rules).await.unwrap();
1239
1240        let mut tb = TrajectoryBuilder::new();
1241        let filter = QueryFilter {
1242            language: Some("rust".into()),
1243            repo_scope: None,
1244        };
1245        let _ = retrieve_rules_with_confidence(
1246            &pool,
1247            "rule",
1248            RetrievalOptions {
1249                top_k: Some(5),
1250                filter: Some(&filter),
1251                trajectory: Some(&mut tb),
1252                ..Default::default()
1253            },
1254        )
1255        .await
1256        .unwrap();
1257
1258        let got = tb
1259            .steps()
1260            .iter()
1261            .find_map(|s| match s {
1262                TrajectoryStep::RetrievalFilter { before, after } => Some((*before, *after)),
1263                _ => None,
1264            })
1265            .expect("RetrievalFilter step must fire when filter is active");
1266        assert_eq!(got.0, 2, "before = 2 (total chunks)");
1267        assert_eq!(got.1, 1, "after = 1 (only rust chunk survives)");
1268    }
1269}
difflore_core/context/retrieval/mod.rs

difflore_core/context/retrieval/
mod.rs