1mod past_verdicts;
2mod query_embed;
3mod rule_bodies;
4mod rules;
5mod scoring;
6
7pub use past_verdicts::{
8 PastVerdictRecaller, merge_past_verdicts, retrieve_past_verdicts,
9 retrieve_past_verdicts_by_text, retrieve_past_verdicts_by_text_with_team,
10 retrieve_past_verdicts_with_team,
11};
12pub use rule_bodies::{RenderedRuleBody, RenderedRuleExample, render_full_rule_bodies};
13pub use rules::{
14 RetrievalOptions, apply_explicit_recall_threshold, apply_intent_alignment_gate, retrieve_rules,
15 retrieve_rules_with_confidence,
16};
17pub use scoring::{RuleKind, effective_confidence, infer_rule_kind};
18
19#[derive(Debug, Clone)]
20pub struct ScoredRuleChunk {
21 pub skill_id: String,
22 pub content: String,
23 pub score: f64,
24 pub confidence: f64,
26}
27
28fn compare_scored_rule_chunks(a: &ScoredRuleChunk, b: &ScoredRuleChunk) -> std::cmp::Ordering {
29 b.score
30 .total_cmp(&a.score)
31 .then_with(|| a.skill_id.cmp(&b.skill_id))
32}
33
34pub fn merge_scored_rule_chunks(
42 groups: impl IntoIterator<Item = Vec<ScoredRuleChunk>>,
43 limit: usize,
44) -> Vec<ScoredRuleChunk> {
45 let mut by_skill_id: std::collections::HashMap<String, ScoredRuleChunk> =
46 std::collections::HashMap::new();
47 for group in groups {
48 for chunk in group {
49 match by_skill_id.get(&chunk.skill_id) {
50 Some(existing) if existing.score >= chunk.score => {}
51 _ => {
52 by_skill_id.insert(chunk.skill_id.clone(), chunk);
53 }
54 }
55 }
56 }
57 let mut merged: Vec<_> = by_skill_id.into_values().collect();
58 merged.sort_by(compare_scored_rule_chunks);
59 merged.truncate(limit);
60 merged
61}
62
63fn unique_repo_scopes(repo_scopes: &[String]) -> Vec<String> {
64 let mut unique = Vec::new();
65 for scope in repo_scopes {
66 let scope = scope.trim().to_ascii_lowercase();
67 if scope.is_empty() {
68 continue;
69 }
70 if !unique.iter().any(|existing| existing == &scope) {
71 unique.push(scope);
72 }
73 }
74 unique
75}
76
77fn search_filter(
78 target_file: Option<&str>,
79 repo_scope: Option<&str>,
80) -> crate::context::index_db::QueryFilter {
81 crate::context::index_db::QueryFilter {
82 language: target_file.and_then(detect_language_from_path),
83 repo_scope: repo_scope.map(String::from),
84 }
85}
86
87fn rule_title(content: &str, fallback: &str) -> String {
88 content
89 .lines()
90 .find_map(|line| line.strip_prefix("Rule Name:").map(|s| s.trim().to_owned()))
91 .filter(|t| !t.is_empty())
92 .unwrap_or_else(|| fallback.to_owned())
93}
94
95fn lexical_terms(query: &str) -> Vec<String> {
96 const STOP_WORDS: &[&str] = &[
97 "about", "after", "again", "against", "all", "and", "any", "are", "around", "because",
98 "been", "before", "being", "between", "but", "can", "cannot", "could", "does", "doing",
99 "done", "each", "for", "from", "had", "has", "have", "how", "into", "its", "more", "must",
100 "our", "out", "over", "rule", "rules", "should", "than", "that", "the", "their", "then",
101 "there", "these", "this", "those", "through", "use", "using", "was", "were", "what",
102 "when", "where", "which", "while", "with", "without", "would", "you", "your",
103 ];
104
105 let mut terms = Vec::new();
106 for term in query
107 .split(|ch: char| !ch.is_ascii_alphanumeric())
108 .map(str::trim)
109 .filter(|term| term.len() >= 3)
110 {
111 let term = term.to_ascii_lowercase();
112 if STOP_WORDS.contains(&term.as_str()) || terms.iter().any(|existing| existing == &term) {
113 continue;
114 }
115 terms.push(term);
116 }
117 terms
118}
119
120fn normalized_query_key(query: &str) -> String {
121 query
122 .split(|ch: char| !ch.is_ascii_alphanumeric())
123 .map(str::trim)
124 .filter(|term| !term.is_empty())
125 .map(str::to_ascii_lowercase)
126 .collect::<Vec<_>>()
127 .join(" ")
128}
129
130fn retrieval_query_variants<'a>(query: &'a str, lexical_query: &'a str) -> Vec<&'a str> {
131 let query = query.trim();
132 let lexical_query = lexical_query.trim();
133 let mut variants = Vec::with_capacity(2);
134 if !query.is_empty() {
135 variants.push(query);
136 }
137
138 let query_key = normalized_query_key(query);
139 let lexical_key = normalized_query_key(lexical_query);
140 if !lexical_query.is_empty() && !lexical_key.is_empty() && lexical_key != query_key {
141 variants.push(lexical_query);
142 }
143
144 variants
145}
146
147fn lexical_boost(chunk: &ScoredRuleChunk, terms: &[String]) -> f64 {
148 if terms.is_empty() {
149 return 0.0;
150 }
151
152 let title = rule_title(&chunk.content, &chunk.skill_id).to_ascii_lowercase();
153 let content = chunk.content.to_ascii_lowercase();
154 let mut title_hits = 0usize;
155 let mut content_hits = 0usize;
156
157 for term in terms {
158 if title.contains(term) {
159 title_hits += 1;
160 }
161 if content.contains(term) {
162 content_hits += 1;
163 }
164 }
165
166 let total = terms.len() as f64;
167 let title_ratio = title_hits as f64 / total;
168 let content_ratio = content_hits as f64 / total;
169 let mut boost = 0.24f64.mul_add(title_ratio, 0.08 * content_ratio);
170 if title_hits >= 2 {
171 boost += 0.12;
172 }
173 if title_hits >= terms.len().min(3) {
174 boost += 0.08;
175 }
176 boost.min(0.45)
177}
178
179pub fn rerank_scored_rule_chunks_by_lexical_query(
180 mut chunks: Vec<ScoredRuleChunk>,
181 lexical_query: &str,
182 limit: usize,
183) -> Vec<ScoredRuleChunk> {
184 let terms = lexical_terms(lexical_query);
185 for chunk in &mut chunks {
186 chunk.score += lexical_boost(chunk, &terms);
187 }
188
189 chunks.sort_by(compare_scored_rule_chunks);
190 chunks.truncate(limit);
191 chunks
192}
193
194pub struct RuleSearchRetrievalOptions<'a> {
199 pub query: &'a str,
200 pub lexical_query: &'a str,
201 pub top_k: usize,
202 pub confidence_map: Option<&'a std::collections::HashMap<String, f64>>,
203 pub age_days_map: Option<&'a std::collections::HashMap<String, f32>>,
204 pub target_file: Option<&'a str>,
205 pub repo_scopes: &'a [String],
206 pub ann_enabled: bool,
207 pub embedding_timeout: Option<std::time::Duration>,
208 pub cold_start_retry: bool,
213 pub adaptive_prune: bool,
214}
215
216pub(crate) struct RuleFanoutQuery<'a> {
229 pub query: &'a str,
231 pub lexical_query: &'a str,
236 pub top_k: usize,
237 pub confidence_map: Option<&'a std::collections::HashMap<String, f64>>,
238 pub eligible_skill_ids: Option<&'a std::collections::HashSet<String>>,
239 pub age_days_map: Option<&'a std::collections::HashMap<String, f32>>,
240 pub target_file: Option<&'a str>,
241 pub repo_scopes: &'a [String],
242 pub ann_enabled: bool,
243 pub embedding_timeout: Option<std::time::Duration>,
244 pub cold_start_retry: bool,
248 pub adaptive_prune: bool,
249}
250
251pub(crate) async fn retrieve_rules_fanout(
252 index_pool: &crate::SqlitePool,
253 query: RuleFanoutQuery<'_>,
254) -> Result<Vec<ScoredRuleChunk>, crate::CoreError> {
255 let RuleFanoutQuery {
256 query,
257 lexical_query,
258 top_k,
259 confidence_map,
260 eligible_skill_ids,
261 age_days_map,
262 target_file,
263 repo_scopes,
264 ann_enabled,
265 embedding_timeout,
266 cold_start_retry,
267 adaptive_prune,
268 } = query;
269
270 if top_k == 0 {
271 return Ok(Vec::new());
272 }
273 let top_k = top_k.min(50);
274 let repo_scopes: Vec<String> = unique_repo_scopes(repo_scopes)
275 .into_iter()
276 .take(4)
277 .collect();
278 let candidate_limit = top_k.saturating_mul(5).clamp(top_k, 50);
279 let scope_filters: Vec<Option<String>> = if repo_scopes.is_empty() {
285 vec![None]
286 } else {
287 repo_scopes.into_iter().map(Some).collect()
288 };
289
290 let query_variants = retrieval_query_variants(query, lexical_query);
291 let mut retrievals = Vec::with_capacity(scope_filters.len() * query_variants.len());
292 for repo_scope in &scope_filters {
293 for query_variant in &query_variants {
294 let filter = search_filter(target_file, repo_scope.as_deref());
295 retrievals.push(async move {
296 retrieve_rules_with_confidence(
297 index_pool,
298 query_variant,
299 RetrievalOptions {
300 top_k: Some(candidate_limit),
301 confidence_map,
302 eligible_skill_ids,
303 age_days_map,
304 target_file,
305 filter: Some(&filter),
306 ann_enabled,
307 embedding_timeout,
308 cold_start_retry,
309 adaptive_prune,
310 ..Default::default()
311 },
312 )
313 .await
314 });
315 }
316 }
317 let mut groups = Vec::with_capacity(retrievals.len());
318 for group in futures_util::future::join_all(retrievals).await {
319 groups.push(group?);
320 }
321
322 let merged = merge_scored_rule_chunks(groups, candidate_limit);
323 Ok(rerank_scored_rule_chunks_by_lexical_query(
324 merged,
325 lexical_query,
326 top_k,
327 ))
328}
329
330pub async fn retrieve_rules_for_search(
331 index_pool: &crate::SqlitePool,
332 options: RuleSearchRetrievalOptions<'_>,
333) -> Result<Vec<ScoredRuleChunk>, crate::CoreError> {
334 let RuleSearchRetrievalOptions {
335 query,
336 lexical_query,
337 top_k,
338 confidence_map,
339 age_days_map,
340 target_file,
341 repo_scopes,
342 ann_enabled,
343 embedding_timeout,
344 cold_start_retry,
345 adaptive_prune,
346 } = options;
347
348 retrieve_rules_fanout(
349 index_pool,
350 RuleFanoutQuery {
351 query,
352 lexical_query,
353 top_k,
354 confidence_map,
355 eligible_skill_ids: None,
358 age_days_map,
359 target_file,
360 repo_scopes,
361 ann_enabled,
362 embedding_timeout,
363 cold_start_retry,
364 adaptive_prune,
365 },
366 )
367 .await
368}
369
370const RRF_K: f64 = 60.0;
374
375pub fn detect_language_from_path(path: &str) -> Option<String> {
384 let lower = path.to_ascii_lowercase();
385 let ext = lower.rsplit('.').next()?;
388 Some(
389 match ext {
390 "rs" => "rust",
391 "ts" | "tsx" => "typescript",
392 "js" | "jsx" | "mjs" | "cjs" => "javascript",
393 "py" | "pyi" => "python",
394 "go" => "go",
395 "java" => "java",
396 "kt" | "kts" => "kotlin",
397 "swift" => "swift",
398 "rb" => "ruby",
399 "php" => "php",
400 "cpp" | "cc" | "cxx" | "hpp" | "hh" => "cpp",
401 "c" | "h" => "c",
402 "cs" => "csharp",
403 _ => return None,
404 }
405 .to_owned(),
406 )
407}
408
409fn concreteness_score(content: &str) -> usize {
417 let mut score = 0usize;
418 let backticks = content.matches('`').count() / 2; score += backticks.min(3);
421 let path_like = content
424 .split_whitespace()
425 .filter(|w| {
426 w.contains('/')
427 && w.split('/')
428 .next_back()
429 .is_some_and(|tail| tail.contains('.') && tail.len() > 3)
430 })
431 .count();
432 score += path_like.min(3);
433 let version_like = content
435 .split_whitespace()
436 .filter(|w| {
437 let trimmed = w.trim_matches(|c: char| !c.is_ascii_alphanumeric() && c != '.');
438 trimmed.starts_with('v')
439 && trimmed.len() > 2
440 && trimmed[1..]
441 .chars()
442 .next()
443 .is_some_and(|c| c.is_ascii_digit())
444 || trimmed
445 .split('.')
446 .filter(|s| s.parse::<u32>().is_ok())
447 .count()
448 >= 2
449 })
450 .count();
451 score += version_like.min(2);
452 score
453}
454
455const MIN_RELEVANCE_SCORE: f64 = 0.001;
461
462const ADAPTIVE_INJECT_THRESHOLD: f64 = 0.005;
470
471const RELATIVE_RELEVANCE_FLOOR: f64 = 0.35;
478
479const EXPLICIT_RECALL_MIN_RELEVANCE: f64 = 0.01;
496
497const EXPLICIT_RECALL_RELATIVE_FLOOR: f64 = 0.20;
504
505const MIN_INTENT_DIRECTIVE_OVERLAP: usize = 2;
530
531const MIN_INTENT_DIRECTIVE_OVERLAP_RATIO: f64 = 0.5;
548
549const MIN_DISTINCTIVE_SHARED_TERMS: usize = 1;
569
570const INTENT_ALIGNMENT_EXEMPT_SCORE: f64 = 0.6;
583
584#[cfg(test)]
585mod tests {
586 use super::rules::pattern_allows;
587 use super::*;
588 use crate::cloud::api_types::RecallPastVerdictsRequest;
589 use crate::context::index_db::{QueryFilter, open_pool_at, upsert_rule_chunks};
590 use crate::context::rule_source::RuleDocument;
591 use crate::context::types::{PastVerdict, PastVerdictScope};
592 use crate::errors::CoreError;
593 use crate::review_trajectory::{TrajectoryBuilder, TrajectoryStep};
594 use async_trait::async_trait;
595 use tempfile::TempDir;
596
597 #[test]
600 fn pattern_allows_table() {
601 let cases: &[(Option<&str>, &str, bool)] = &[
605 (None, "tokio/src/io/uring.rs", true),
606 (Some(""), "tokio/src/io/uring.rs", true),
607 (Some("[]"), "tokio/src/io/uring.rs", true),
608 (Some(r#"["**/*.rs"]"#), "tokio/src/io/uring.rs", true),
609 (Some(r#"["**/*.rs"]"#), ".github/workflows/ci.yml", false),
610 (
611 Some(r#"["tokio/src/io/**"]"#),
612 "tokio/src/io/uring.rs",
613 true,
614 ),
615 (
616 Some(r#"["tokio/src/io/**"]"#),
617 "tokio/src/runtime/mod.rs",
618 false,
619 ),
620 (
621 Some(r#"["tokio/src/io/**"]"#),
622 "tokio\\src\\io\\uring.rs",
623 true,
624 ),
625 (
626 Some(r#"["tokio/src/io/**"]"#),
627 "/tokio/src/io/uring.rs",
628 true,
629 ),
630 (Some("not-json"), "any/path.rs", true),
633 (Some("{}"), "any/path.rs", true),
634 ];
635 for (pat, path, expected) in cases {
636 assert_eq!(
637 pattern_allows(*pat, path),
638 *expected,
639 "pat={pat:?} path={path}"
640 );
641 }
642 }
643
644 #[test]
647 fn detect_language_from_path_covers_common_extensions() {
648 assert_eq!(
649 detect_language_from_path("src/main.rs").as_deref(),
650 Some("rust")
651 );
652 assert_eq!(
653 detect_language_from_path("apps/web/index.tsx").as_deref(),
654 Some("typescript")
655 );
656 assert_eq!(
657 detect_language_from_path("scripts/build.py").as_deref(),
658 Some("python")
659 );
660 assert_eq!(
661 detect_language_from_path("api/handler.go").as_deref(),
662 Some("go")
663 );
664 }
665
666 #[test]
667 fn detect_language_from_path_returns_none_for_unknown_ext() {
668 assert!(detect_language_from_path("README.md").is_none());
669 assert!(detect_language_from_path("no_extension").is_none());
670 }
671
672 #[test]
673 fn shared_search_repo_scopes_are_case_insensitive() {
674 assert_eq!(
675 unique_repo_scopes(&[
676 "Difflore-Fixtures/Vite".to_owned(),
677 " ".to_owned(),
678 "difflore-fixtures/vite".to_owned(),
679 "ViteJS/Vite".to_owned(),
680 ]),
681 vec![
682 "difflore-fixtures/vite".to_owned(),
683 "vitejs/vite".to_owned()
684 ]
685 );
686 }
687
688 struct ErroringRecaller;
691
692 #[async_trait]
693 impl PastVerdictRecaller for ErroringRecaller {
694 async fn recall(
695 &self,
696 _req: RecallPastVerdictsRequest,
697 ) -> Result<Vec<PastVerdict>, CoreError> {
698 Err(CoreError::Internal("simulated failure".into()))
699 }
700 }
701
702 struct StaticRecaller(Vec<PastVerdict>);
703
704 #[async_trait]
705 impl PastVerdictRecaller for StaticRecaller {
706 async fn recall(
707 &self,
708 _req: RecallPastVerdictsRequest,
709 ) -> Result<Vec<PastVerdict>, CoreError> {
710 Ok(self.0.clone())
711 }
712 }
713
714 struct RecordingRecaller(tokio::sync::Mutex<Option<RecallPastVerdictsRequest>>);
715
716 #[async_trait]
717 impl PastVerdictRecaller for RecordingRecaller {
718 async fn recall(
719 &self,
720 req: RecallPastVerdictsRequest,
721 ) -> Result<Vec<PastVerdict>, CoreError> {
722 *self.0.lock().await = Some(req);
723 Ok(Vec::new())
724 }
725 }
726
727 fn verdict(id: &str, status: &str) -> PastVerdict {
728 PastVerdict {
729 extraction_id: id.to_owned(),
730 code_snippet: format!("snippet for {id}"),
731 issue_text: format!("issue for {id}"),
732 status: status.to_owned(),
733 reason: Some(format!("reason-{id}")),
734 similarity: 0.87,
735 created_at: "2026-04-10T00:00:00Z".to_owned(),
736 signature: None,
737 source_pr_number: None,
738 source_pr_title: None,
739 source_pr_url: None,
740 }
741 }
742
743 fn scored(id: &str, score: f64) -> ScoredRuleChunk {
744 ScoredRuleChunk {
745 skill_id: id.to_owned(),
746 content: format!("Rule ID: {id}\nRule Name: {id}\n\nbody"),
747 score,
748 confidence: 0.7,
749 }
750 }
751
752 fn embedding_blob(embedding: &[f32]) -> Vec<u8> {
753 embedding
754 .iter()
755 .flat_map(|value| value.to_le_bytes())
756 .collect()
757 }
758
759 #[test]
760 fn merge_scored_rule_chunks_tie_breaks_by_skill_id() {
761 let merged = merge_scored_rule_chunks(
762 vec![vec![scored("rule-b", 0.5)], vec![scored("rule-a", 0.5)]],
763 2,
764 );
765 let ids: Vec<_> = merged.iter().map(|r| r.skill_id.as_str()).collect();
766 assert_eq!(ids, vec!["rule-a", "rule-b"]);
767 }
768
769 #[test]
770 fn rerank_scored_rule_chunks_tie_breaks_by_skill_id() {
771 let ranked = rerank_scored_rule_chunks_by_lexical_query(
772 vec![scored("rule-b", 0.5), scored("rule-a", 0.5)],
773 "",
774 2,
775 );
776 let ids: Vec<_> = ranked.iter().map(|r| r.skill_id.as_str()).collect();
777 assert_eq!(ids, vec!["rule-a", "rule-b"]);
778 }
779
780 #[test]
781 fn retrieval_query_variants_adds_intent_lane_when_file_query_differs() {
782 assert_eq!(
783 retrieval_query_variants(
784 "src/context.go Bind handlers must check returned error",
785 "Bind handlers must check returned error",
786 ),
787 vec![
788 "src/context.go Bind handlers must check returned error",
789 "Bind handlers must check returned error",
790 ],
791 );
792 assert_eq!(
793 retrieval_query_variants("Bind handlers", "bind handlers"),
794 vec!["Bind handlers"],
795 );
796 assert_eq!(retrieval_query_variants("", "please"), vec!["please"]);
797 }
798
799 #[tokio::test]
800 async fn retrieve_rules_for_search_uses_intent_lane_to_escape_path_noise() {
801 let tmp = TempDir::new().unwrap();
802 let path = tmp.path().join("idx.db");
803 let pool = open_pool_at(&path).await.unwrap();
804 let repo = "gin-gonic/gin";
805 let mut rules = Vec::new();
806 for i in 0..8 {
807 let mut rule = rule_doc(
808 &format!("path-noise-{i}"),
809 "context go context go context go path-only convention",
810 Some("go"),
811 Some(repo),
812 );
813 rule.file_patterns = Some(r#"["**/*.go"]"#.to_owned());
814 rules.push(rule);
815 }
816 let mut signal = rule_doc(
817 "bind-error",
818 "Bind handlers must check returned error before continuing",
819 Some("go"),
820 Some(repo),
821 );
822 signal.file_patterns = Some(r#"["**/*.go"]"#.to_owned());
823 rules.push(signal);
824 upsert_rule_chunks(&pool, &rules).await.unwrap();
825
826 let hits = retrieve_rules_for_search(
827 &pool,
828 RuleSearchRetrievalOptions {
829 query: "src/context.go",
830 lexical_query: "Bind handlers must check returned error",
831 top_k: 1,
832 confidence_map: None,
833 age_days_map: None,
834 target_file: Some("src/context.go"),
835 repo_scopes: &[repo.to_owned()],
836 ann_enabled: false,
837 embedding_timeout: Some(std::time::Duration::from_millis(2500)),
838 cold_start_retry: false,
839 adaptive_prune: false,
840 },
841 )
842 .await
843 .unwrap();
844
845 assert_eq!(
846 hits.first().map(|hit| hit.skill_id.as_str()),
847 Some("bind-error")
848 );
849 }
850
851 #[tokio::test]
852 async fn retrieve_rules_for_search_without_repo_scopes_uses_project_index() {
853 let tmp = TempDir::new().unwrap();
854 let path = tmp.path().join("idx.db");
855 let pool = open_pool_at(&path).await.unwrap();
856 let rules = vec![rule_doc(
857 "signal",
858 "Avoid unwrap in request handlers; return structured errors",
859 Some("rust"),
860 Some("acme/widgets"),
861 )];
862 upsert_rule_chunks(&pool, &rules).await.unwrap();
863
864 let hits = retrieve_rules_for_search(
865 &pool,
866 RuleSearchRetrievalOptions {
867 query: "src/http/handler.rs Avoid unwrap in request handlers",
868 lexical_query: "Avoid unwrap in request handlers",
869 top_k: 1,
870 confidence_map: None,
871 age_days_map: None,
872 target_file: Some("src/http/handler.rs"),
873 repo_scopes: &[],
874 ann_enabled: false,
875 embedding_timeout: Some(std::time::Duration::from_millis(2500)),
876 cold_start_retry: false,
877 adaptive_prune: false,
878 },
879 )
880 .await
881 .unwrap();
882
883 assert_eq!(
884 hits.first().map(|hit| hit.skill_id.as_str()),
885 Some("signal")
886 );
887 }
888
889 #[test]
890 fn merge_past_verdicts_tie_breaks_by_extraction_id() {
891 let merged = merge_past_verdicts(
892 vec![
893 vec![verdict("verdict-b", "approved")],
894 vec![verdict("verdict-a", "approved")],
895 ],
896 2,
897 );
898 let ids: Vec<_> = merged.iter().map(|v| v.extraction_id.as_str()).collect();
899 assert_eq!(ids, vec!["verdict-a", "verdict-b"]);
900 }
901
902 #[tokio::test]
903 async fn test_retrieve_past_verdicts_returns_empty_on_error() {
904 let recaller = ErroringRecaller;
905 let emb = vec![0.1f32; 8];
906 let out = retrieve_past_verdicts(
907 &recaller,
908 &emb,
909 Some("repo-1"),
910 PastVerdictScope::Team,
911 5,
912 None,
913 )
914 .await;
915 assert!(
916 out.is_empty(),
917 "errors must be downgraded to an empty Vec, got {} items",
918 out.len()
919 );
920 }
921
922 #[tokio::test]
923 async fn test_retrieve_past_verdicts_forwards_rows_on_success() {
924 let recaller = StaticRecaller(vec![verdict("e1", "approved"), verdict("e2", "rejected")]);
925 let emb = vec![0.0f32; 4];
926 let out =
927 retrieve_past_verdicts(&recaller, &emb, None, PastVerdictScope::Personal, 3, None)
928 .await;
929 assert_eq!(out.len(), 2);
930 assert_eq!(out[0].extraction_id, "e1");
931 assert_eq!(out[1].status, "rejected");
932 }
933
934 #[tokio::test]
935 async fn text_past_verdict_recall_forwards_team_scope() {
936 let recaller = RecordingRecaller(tokio::sync::Mutex::new(None));
937
938 let _ = retrieve_past_verdicts_by_text_with_team(
939 &recaller,
940 "router cache invalidation",
941 Some("acme/widgets"),
942 PastVerdictScope::Team,
943 7,
944 Some("src/router.ts"),
945 Some("team-1"),
946 )
947 .await;
948
949 let req = recaller.0.lock().await.clone().expect("request captured");
950 assert_eq!(req.scope, "team");
951 assert_eq!(req.team_id.as_deref(), Some("team-1"));
952 assert_eq!(req.repo_id.as_deref(), Some("acme/widgets"));
953 assert_eq!(req.target_file.as_deref(), Some("src/router.ts"));
954 assert_eq!(req.k, 7);
955 }
956
957 #[tokio::test]
958 async fn embedding_past_verdict_recall_forwards_team_scope() {
959 let recaller = RecordingRecaller(tokio::sync::Mutex::new(None));
960 let embedding = vec![0.25, 0.5, 0.75];
961
962 let _ = retrieve_past_verdicts_with_team(
963 &recaller,
964 &embedding,
965 Some("acme/widgets"),
966 PastVerdictScope::Team,
967 4,
968 Some("src/router.ts"),
969 Some("team-1"),
970 )
971 .await;
972
973 let req = recaller.0.lock().await.clone().expect("request captured");
974 assert_eq!(req.scope, "team");
975 assert_eq!(req.team_id.as_deref(), Some("team-1"));
976 assert_eq!(req.repo_id.as_deref(), Some("acme/widgets"));
977 assert_eq!(req.target_file.as_deref(), Some("src/router.ts"));
978 assert_eq!(req.embedding, embedding);
979 assert_eq!(req.query_text, None);
980 assert_eq!(req.k, 4);
981 }
982
983 fn rule_doc(
986 id: &str,
987 content: &str,
988 language: Option<&str>,
989 repo_scope: Option<&str>,
990 ) -> RuleDocument {
991 RuleDocument {
992 skill_id: id.to_owned(),
993 title: id.to_owned(),
994 content: content.to_owned(),
995 confidence: 0.7,
996 file_patterns: None,
997 language: language.map(String::from),
998 repo_scope: repo_scope.map(String::from),
999 }
1000 }
1001
1002 #[tokio::test]
1003 async fn rrf_fusion_prefers_results_ranked_high_by_both() {
1004 let tmp = TempDir::new().unwrap();
1007 let path = tmp.path().join("idx.db");
1008 let pool = open_pool_at(&path).await.unwrap();
1009
1010 let rules = vec![
1011 rule_doc(
1014 "A",
1015 "prefer structured_logging for observability when emitting structured_logging events",
1016 None,
1017 None,
1018 ),
1019 rule_doc(
1021 "B",
1022 "avoid structured_logging in tests; use a stub logger instead",
1023 None,
1024 None,
1025 ),
1026 rule_doc(
1028 "C",
1029 "always write unit tests for every public api",
1030 None,
1031 None,
1032 ),
1033 ];
1034 upsert_rule_chunks(&pool, &rules).await.unwrap();
1035
1036 let mut tb = TrajectoryBuilder::new();
1037 let hits = retrieve_rules_with_confidence(
1038 &pool,
1039 "structured_logging observability",
1040 RetrievalOptions {
1041 top_k: Some(3),
1042 trajectory: Some(&mut tb),
1043 ..Default::default()
1044 },
1045 )
1046 .await
1047 .unwrap();
1048
1049 assert!(!hits.is_empty());
1053 assert_eq!(hits[0].skill_id, "A", "A should RRF-win over B and C");
1054
1055 let has_fusion = tb
1057 .steps()
1058 .iter()
1059 .any(|s| matches!(s, TrajectoryStep::HybridFusion { .. }));
1060 assert!(has_fusion, "HybridFusion trajectory step must fire");
1061 }
1062
1063 #[tokio::test]
1064 async fn sha1_embedder_path_weights_fts_higher() {
1065 let tmp = TempDir::new().unwrap();
1070 let path = tmp.path().join("idx.db");
1071 let pool = open_pool_at(&path).await.unwrap();
1072
1073 let rules = vec![
1074 rule_doc(
1076 "keyword",
1077 "do not shadow with deprecated_zzz_api in request handlers",
1078 None,
1079 None,
1080 ),
1081 rule_doc(
1083 "semantic",
1084 "request handlers should use async primitives carefully",
1085 None,
1086 None,
1087 ),
1088 ];
1089 upsert_rule_chunks(&pool, &rules).await.unwrap();
1090
1091 let hits = retrieve_rules_with_confidence(
1092 &pool,
1093 "deprecated_zzz_api",
1094 RetrievalOptions {
1095 top_k: Some(2),
1096 ..Default::default()
1097 },
1098 )
1099 .await
1100 .unwrap();
1101
1102 assert!(!hits.is_empty());
1103 assert_eq!(
1104 hits[0].skill_id, "keyword",
1105 "under SHA1 embedder, FTS hit should win over a generic semantic neighbour"
1106 );
1107 }
1108
1109 #[tokio::test]
1110 async fn linear_scan_excludes_mismatched_embedding_dims() {
1111 let tmp = TempDir::new().unwrap();
1112 let path = tmp.path().join("idx.db");
1113 let pool = open_pool_at(&path).await.unwrap();
1114 let query = "dim_mismatch_probe";
1115 let query_emb = crate::context::embedding::embed_text(query);
1116 let stale_embedding = vec![query_emb[0], query_emb[1]];
1117 let stale_blob = embedding_blob(&stale_embedding);
1118
1119 sqlx::query(
1120 "INSERT INTO rule_chunks (id, skill_id, content, embedding, file_patterns, language, repo_scope)
1121 VALUES (?1, ?2, ?3, ?4, NULL, NULL, NULL)",
1122 )
1123 .bind("rule-stale")
1124 .bind("stale")
1125 .bind("unrelated content that should not match the query lexically")
1126 .bind(stale_blob)
1127 .execute(&pool)
1128 .await
1129 .unwrap();
1130
1131 let hits = retrieve_rules_with_confidence(
1132 &pool,
1133 query,
1134 RetrievalOptions {
1135 top_k: Some(5),
1136 ann_enabled: false,
1137 ..Default::default()
1138 },
1139 )
1140 .await
1141 .unwrap();
1142
1143 assert!(
1144 hits.is_empty(),
1145 "stale chunks from a different embedding dim must not enter linear cosine ranking"
1146 );
1147 }
1148
1149 #[tokio::test]
1150 async fn strict_cascade_does_not_fallback_to_foreign_file_patterns() {
1151 let tmp = TempDir::new().unwrap();
1152 let path = tmp.path().join("idx.db");
1153 let pool = open_pool_at(&path).await.unwrap();
1154 let mut foreign = rule_doc(
1155 "foreign",
1156 "python request handlers should avoid sync database calls",
1157 Some("python"),
1158 Some("acme/widgets"),
1159 );
1160 foreign.file_patterns = Some(r#"["**/*.py"]"#.to_owned());
1161 upsert_rule_chunks(&pool, &[foreign]).await.unwrap();
1162
1163 let filter = QueryFilter {
1164 language: None,
1165 repo_scope: Some("acme/widgets".to_owned()),
1166 };
1167 let hits = retrieve_rules_with_confidence(
1168 &pool,
1169 "request handlers database",
1170 RetrievalOptions {
1171 top_k: Some(5),
1172 target_file: Some("src/server.rs"),
1173 filter: Some(&filter),
1174 ann_enabled: false,
1175 ..Default::default()
1176 },
1177 )
1178 .await
1179 .unwrap();
1180
1181 assert!(
1182 hits.is_empty(),
1183 "explicit **/*.py rule must not be recalled for src/server.rs"
1184 );
1185 }
1186
1187 #[tokio::test]
1188 async fn strict_cascade_keeps_universal_rules_for_target_file() {
1189 let tmp = TempDir::new().unwrap();
1190 let path = tmp.path().join("idx.db");
1191 let pool = open_pool_at(&path).await.unwrap();
1192 upsert_rule_chunks(
1193 &pool,
1194 &[rule_doc(
1195 "universal",
1196 "request handlers should return structured errors",
1197 None,
1198 Some("acme/widgets"),
1199 )],
1200 )
1201 .await
1202 .unwrap();
1203
1204 let filter = QueryFilter {
1205 language: None,
1206 repo_scope: Some("acme/widgets".to_owned()),
1207 };
1208 let hits = retrieve_rules_with_confidence(
1209 &pool,
1210 "request handlers structured errors",
1211 RetrievalOptions {
1212 top_k: Some(5),
1213 target_file: Some("src/server.rs"),
1214 filter: Some(&filter),
1215 ann_enabled: false,
1216 ..Default::default()
1217 },
1218 )
1219 .await
1220 .unwrap();
1221
1222 assert_eq!(
1223 hits.first().map(|hit| hit.skill_id.as_str()),
1224 Some("universal")
1225 );
1226 }
1227
1228 #[tokio::test]
1229 async fn retrieve_emits_retrieval_filter_step_when_filter_active() {
1230 let tmp = TempDir::new().unwrap();
1231 let path = tmp.path().join("idx.db");
1232 let pool = open_pool_at(&path).await.unwrap();
1233
1234 let rules = vec![
1235 rule_doc("rust-1", "rust-specific rule content", Some("rust"), None),
1236 rule_doc("py-1", "python-specific rule content", Some("python"), None),
1237 ];
1238 upsert_rule_chunks(&pool, &rules).await.unwrap();
1239
1240 let mut tb = TrajectoryBuilder::new();
1241 let filter = QueryFilter {
1242 language: Some("rust".into()),
1243 repo_scope: None,
1244 };
1245 let _ = retrieve_rules_with_confidence(
1246 &pool,
1247 "rule",
1248 RetrievalOptions {
1249 top_k: Some(5),
1250 filter: Some(&filter),
1251 trajectory: Some(&mut tb),
1252 ..Default::default()
1253 },
1254 )
1255 .await
1256 .unwrap();
1257
1258 let got = tb
1259 .steps()
1260 .iter()
1261 .find_map(|s| match s {
1262 TrajectoryStep::RetrievalFilter { before, after } => Some((*before, *after)),
1263 _ => None,
1264 })
1265 .expect("RetrievalFilter step must fire when filter is active");
1266 assert_eq!(got.0, 2, "before = 2 (total chunks)");
1267 assert_eq!(got.1, 1, "after = 1 (only rust chunk survives)");
1268 }
1269}