use std::io;
use anyhow::{Context, bail};
use difflore_core::context::embedding::ActiveEmbedderKind;
use difflore_core::context::retrieval::RenderedRuleBody;
use difflore_core::context::types::PastVerdict;
use globset::Glob;
use crate::installer;
use crate::runtime::CommandContext;
use crate::style::{self, sym};
use crate::support::util::{exit_code, project_path};
mod presentation;
mod retrieval;
mod search;
use presentation::{
cross_repo_starter_json, local_rules_json, recall_diagnostics_json, render_cloud_recall_human,
render_cross_repo_starter_human, render_local_recall_human, render_zero_match_compact_human,
};
use retrieval::{
build_zero_match_diagnostics, cross_repo_starter_hits, recall_cloud_review_memory,
recall_local_rules, record_local_recall,
};
struct RecallSemanticState {
semantic: bool,
mode: &'static str,
}
impl RecallSemanticState {
const fn from_kind(kind: &ActiveEmbedderKind) -> Self {
match kind {
ActiveEmbedderKind::Cloud => Self {
semantic: true,
mode: "cloud",
},
ActiveEmbedderKind::Byok { .. } => Self {
semantic: true,
mode: "byok",
},
ActiveEmbedderKind::Sha1 => Self {
semantic: false,
mode: "keyword",
},
}
}
const fn keyword_only_note(&self) -> Option<&'static str> {
if self.semantic {
None
} else {
Some(
"semantic matching is using local keyword fallback; managed vectors: `difflore cloud login`; BYOK vectors: `difflore embeddings setup`",
)
}
}
}
pub(crate) struct RecallArgs {
pub(crate) intent: Option<String>,
pub(crate) file: Option<String>,
pub(crate) diff: bool,
pub(crate) top_k: usize,
pub(crate) json: bool,
pub(crate) verbose: bool,
pub(crate) copy: bool,
}
impl From<crate::cli::RecallCliArgs> for RecallArgs {
fn from(args: crate::cli::RecallCliArgs) -> Self {
Self {
intent: args.intent,
file: args.file,
diff: args.diff,
top_k: args.top_k,
json: args.json,
verbose: args.verbose,
copy: args.copy,
}
}
}
pub(crate) async fn handle_recall(ctx: &CommandContext, args: RecallArgs) {
let RecallArgs {
intent,
file,
diff,
top_k,
json,
verbose,
copy,
} = args;
let (resolved_intent, resolved_file, diff_files) =
match resolve_intent_and_file(intent, file, diff) {
Ok(triple) => triple,
Err(e) => {
eprintln!("{} {:#}", style::err(sym::ERR), e);
exit_code(2);
}
};
if copy {
handle_recall_copy(ctx, resolved_intent, resolved_file, &diff_files, top_k).await;
return;
}
if !json {
let header = if diff {
"Top memories for current diff".to_owned()
} else {
format!("Top memories for: {resolved_intent}")
};
println!("{}", style::ok(&header));
println!();
}
let (local, cloud) = recall_local_and_cloud(
ctx,
&resolved_intent,
resolved_file.as_deref(),
&diff_files,
top_k,
"cli-recall",
)
.await;
let zero_match_diagnostics = if local.matches.is_empty() && cloud.verdicts.is_empty() {
Some(build_zero_match_diagnostics(
&local,
&cloud,
&resolved_intent,
resolved_file.as_deref(),
))
} else {
None
};
let semantic_state = RecallSemanticState::from_kind(
&difflore_core::context::embedding::probe_active_embedder().await,
);
if json {
let recalled_at = chrono::Utc::now().to_rfc3339();
let queried_file = resolved_file.as_deref();
let scope_files = strict_scope_files(queried_file, &diff_files);
let strict_match_count = local
.matches
.iter()
.filter(|hit| strict_pattern_match_any_file(&hit.file_patterns, &scope_files))
.count();
let any_strict = strict_match_count > 0;
let mut payload = serde_json::json!({
"schemaVersion": crate::commands::ai_contract::CLI_SCHEMA_VERSION,
"intent": resolved_intent,
"file": queried_file,
"recalledAt": recalled_at,
"fileScopeFallback": local.file_scope_fallback,
"strictFileMatch": any_strict,
"strictMatchCount": strict_match_count,
"semanticRanking": {
"semantic": semantic_state.semantic,
"mode": semantic_state.mode,
"note": semantic_state.keyword_only_note(),
},
"localRules": local_rules_json(&local, &scope_files),
"cloudReviewMemory": {
"loggedIn": cloud.logged_in,
"repoFullName": cloud.repo_full_name,
"scope": cloud.scope,
"teamId": cloud.team_id,
"verdicts": cloud.verdicts,
},
});
if let Some(diagnostics) = zero_match_diagnostics.as_ref()
&& let Some(object) = payload.as_object_mut()
{
object.insert(
"diagnostics".to_owned(),
recall_diagnostics_json(diagnostics),
);
}
if local.matches.is_empty()
&& local.rules_indexed == 0
&& let Some(file) = queried_file
{
let starter = cross_repo_starter_hits(ctx, &resolved_intent, file, top_k).await;
if !starter.is_empty()
&& let Some(object) = payload.as_object_mut()
{
object.insert(
"crossRepoStarter".to_owned(),
cross_repo_starter_json(&starter),
);
}
}
println!("{}", crate::support::util::json_or(&payload, "{}"));
return;
}
if let Some(diagnostics) = zero_match_diagnostics.as_ref() {
render_zero_match_compact_human(diagnostics);
} else {
let scope_files = strict_scope_files(resolved_file.as_deref(), &diff_files);
render_local_recall_human(
&local,
&resolved_intent,
resolved_file.as_deref(),
&scope_files,
verbose,
);
if !local.matches.is_empty()
&& let Some(note) = semantic_state.keyword_only_note()
{
println!(" {} {}", style::amber(sym::WARN), style::pewter(note));
}
if local.matches.is_empty()
&& local.rules_indexed == 0
&& let Some(file) = resolved_file.as_deref()
{
let starter = cross_repo_starter_hits(ctx, &resolved_intent, file, top_k).await;
render_cross_repo_starter_human(&starter, file);
}
println!();
render_cloud_recall_human(&cloud, &resolved_intent, resolved_file.as_deref(), verbose);
}
println!();
if !local.matches.is_empty() {
let snapshot = installer::collect_status_snapshot();
let installed: Vec<&'static str> = snapshot
.clients
.iter()
.filter(|c| matches!(c.state, installer::InstallState::Installed))
.map(|c| c.name)
.collect();
if installed.is_empty() {
println!(
" {} No agents are wired yet; these local rules are ready once you run {} so Claude/Codex/Cursor can recall them.",
style::pewter(sym::BULLET),
style::cmd("difflore agents install"),
);
} else {
let names = installed.join(", ");
let n = local.matches.len();
println!(
" {} {} will see {} local rule{} like these next time {} touch{} a matching file in this repo.",
style::emerald(sym::OK),
names,
n,
if n == 1 { "" } else { "s" },
if installed.len() == 1 { "it" } else { "they" },
if installed.len() == 1 { "es" } else { "" },
);
}
}
println!();
if zero_match_diagnostics.is_none() {
println!(
"next: {} {}",
style::cmd("difflore status"),
style::pewter("see matched memories, agent readiness, and accepted edits"),
);
}
if diff {
let summary =
crate::commands::status::compact_value_summary_for_current_project(&ctx.db).await;
if let Some(line) = crate::commands::status::render_compact_value_summary(&summary) {
println!(" {}", style::pewter(&line));
}
}
}
fn resolve_intent_and_file(
intent: Option<String>,
file: Option<String>,
diff: bool,
) -> anyhow::Result<(String, Option<String>, Vec<String>)> {
if !diff {
let intent = intent.context("missing intent. Provide a phrase, or pass `--diff`.")?;
return Ok((intent, file, Vec::new()));
}
let files = git_diff_files()?;
if files.is_empty() {
bail!("`--diff` found no changed files. Stage or modify some files first.");
}
let target_file = file.or_else(|| primary_recall_file(&files));
let synthetic_intent = match intent {
Some(text) if !text.trim().is_empty() => text,
_ => {
let diff_text = git_diff_text().unwrap_or_default();
let review_intent = difflore_core::context::intent_filter::build_review_intent_text(
target_file.as_deref(),
&diff_text,
);
if review_intent.trim().is_empty() {
format!("changes in {}", files.join(", "))
} else {
review_intent
}
}
};
Ok((synthetic_intent, target_file, files))
}
fn git_diff_files() -> anyhow::Result<Vec<String>> {
let cwd = project_path();
let mut seen = std::collections::BTreeSet::new();
let mut out: Vec<String> = Vec::new();
for args in [
&["diff", "--name-only"][..],
&["diff", "--name-only", "--cached"][..],
] {
let output = difflore_core::infra::git::git_command(&cwd)
.args(args)
.output()
.map_err(|error| git_spawn_error(&error))?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
if stderr.to_ascii_lowercase().contains("not a git repository") {
bail!(
"`--diff` requires a git repo. cd into one, or pass an intent phrase \
(e.g. `difflore recall \"input validation\"`)."
);
}
bail!("git {} failed: {}", args.join(" "), stderr.trim());
}
for line in String::from_utf8_lossy(&output.stdout).lines() {
let trimmed = line.trim();
if !trimmed.is_empty() && seen.insert(trimmed.to_owned()) {
out.push(trimmed.to_owned());
}
}
}
Ok(out)
}
fn git_diff_text() -> anyhow::Result<String> {
let cwd = project_path();
let mut out = String::new();
for args in [
&["diff", "--no-ext-diff", "--unified=8"][..],
&["diff", "--cached", "--no-ext-diff", "--unified=8"][..],
] {
let output = difflore_core::infra::git::git_command(&cwd)
.args(args)
.output()
.map_err(|error| git_spawn_error(&error))?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
bail!("git {} failed: {}", args.join(" "), stderr.trim());
}
out.push_str(&String::from_utf8_lossy(&output.stdout));
if !out.ends_with('\n') {
out.push('\n');
}
}
Ok(out)
}
fn git_spawn_error(error: &io::Error) -> anyhow::Error {
if error.kind() == io::ErrorKind::NotFound {
anyhow::anyhow!("`git` not found on PATH (install it, then retry)")
} else {
anyhow::anyhow!("failed to run `git`: {error}")
}
}
async fn handle_recall_copy(
ctx: &CommandContext,
intent: String,
file: Option<String>,
diff_files: &[String],
top_k: usize,
) {
let (local, cloud) = recall_local_and_cloud(
ctx,
&intent,
file.as_deref(),
diff_files,
top_k,
"cli-recall-copy",
)
.await;
if local.matches.is_empty() && cloud.verdicts.is_empty() {
let diagnostics = build_zero_match_diagnostics(&local, &cloud, &intent, file.as_deref());
if local.repo_full_name.is_none() {
println!(
"_difflore recalled 0 local rules for \"{intent}\"; this checkout has no supported git remote, and local recall is repo-scoped (add one with `git remote -v`)._"
);
} else if local.rules_indexed == 0 {
println!(
"_difflore recalled 0 local rules for \"{intent}\" because this repo has no local rules yet._"
);
} else {
println!("_difflore recalled 0 local rules for \"{intent}\"._");
}
if !cloud.logged_in {
println!("_Cloud PR review memory is available after sign-in._");
} else if cloud.repo_full_name.is_none() {
println!("_Cloud PR review memory needs a supported repo remote._");
}
println!();
println!("_Likely causes:_");
for cause in diagnostics.possible_causes {
println!("- _{}_", cause.message);
}
println!();
println!("_Next steps:_");
for step in diagnostics.next_steps {
match step.command {
Some(command) => println!("- `{command}`: _{}_", step.message),
None => println!("- _{}_", step.message),
}
}
return;
}
println!(
"**difflore recalled {} local rule{} for \"{}\":**",
local.matches.len(),
if local.matches.len() == 1 { "" } else { "s" },
intent,
);
println!();
if local.matches.is_empty() {
println!("- _No local rule matched._");
}
for hit in &local.matches {
let source = hit
.source_repo
.as_deref()
.filter(|repo| !repo.trim().is_empty())
.unwrap_or("review memory");
println!(
"- **{}** <- learned from `{}`",
truncate_one_line(&hit.title, 110),
source,
);
}
if !cloud.verdicts.is_empty() {
println!();
println!(
"**Cloud PR review rules appended ({}):**",
cloud.verdicts.len(),
);
for verdict in &cloud.verdicts {
let source = source_label(verdict, cloud.repo_full_name.as_deref())
.unwrap_or_else(|| "review memory".to_owned());
println!(
"- **{}** <- learned from `{}`",
truncate_one_line(&verdict.issue_text, 110),
source,
);
}
} else if !cloud.logged_in {
println!();
println!("_Cloud PR review memory is available after sign-in._");
} else if cloud.repo_full_name.is_none() {
println!();
println!("_Cloud PR review memory needs a supported repo remote._");
}
let semantic_state = RecallSemanticState::from_kind(
&difflore_core::context::embedding::probe_active_embedder().await,
);
if let Some(note) = semantic_state.keyword_only_note() {
println!();
println!("_{note}_");
}
println!();
println!("_Generated by `difflore recall`_");
}
async fn recall_local_and_cloud(
ctx: &CommandContext,
intent: &str,
file: Option<&str>,
diff_files: &[String],
top_k: usize,
session_id: &str,
) -> (LocalRecallResult, CloudRecallResult) {
let local_branch = async {
let local = recall_local_rules(ctx, intent, file, diff_files, top_k).await;
record_local_recall(ctx, &local, intent, file, diff_files, top_k, session_id).await;
local
};
let cloud_branch = recall_cloud_review_memory(ctx, intent, file, top_k);
tokio::join!(local_branch, cloud_branch)
}
pub(super) struct LocalRuleHit {
pub(super) id: String,
pub(super) title: String,
pub(super) preview: String,
pub(super) bad: Option<String>,
pub(super) fix: Option<String>,
pub(super) rank_score: f64,
pub(super) raw_score: f64,
pub(super) confidence: f64,
pub(super) file_patterns: Vec<String>,
pub(super) source_repo: Option<String>,
pub(super) origin: Option<String>,
pub(super) source_rank: Option<u8>,
pub(super) body: Option<RenderedRuleBody>,
}
pub(super) struct LocalRecallResult {
pub(super) rules_indexed: usize,
pub(super) repo_full_name: Option<String>,
pub(super) matches: Vec<LocalRuleHit>,
pub(super) file_scope_fallback: bool,
pub(super) trace: RecallTrace,
}
#[derive(Debug, Clone, Default)]
pub(super) struct RecallTrace {
pub(super) repo_scopes: Vec<String>,
pub(super) candidate_limit: usize,
pub(super) candidates_retrieved: usize,
pub(super) candidates_after_exact_merge: usize,
pub(super) candidates_after_intent_gate: usize,
pub(super) candidates_after_relevance_gate: usize,
pub(super) metadata_missing_dropped: usize,
pub(super) returned: usize,
}
pub(super) struct RecallDiagnostics {
pub(super) summary: String,
pub(super) possible_causes: Vec<DiagnosticItem>,
pub(super) next_steps: Vec<DiagnosticStep>,
}
pub(super) struct DiagnosticItem {
pub(super) code: &'static str,
pub(super) message: String,
}
#[derive(Clone)]
pub(super) struct DiagnosticStep {
pub(super) command: Option<String>,
pub(super) message: String,
}
pub(super) fn candidate_pool_size(top_k: usize) -> usize {
top_k.saturating_mul(4).clamp(top_k, top_k.max(40))
}
pub(super) fn query_looks_broad(intent: &str) -> bool {
let meaningful_words = intent
.split(|c: char| !c.is_alphanumeric())
.filter(|word| {
let lower = word.to_ascii_lowercase();
lower.len() > 2
&& !matches!(
lower.as_str(),
"ask"
| "bug"
| "change"
| "changes"
| "code"
| "current"
| "diff"
| "file"
| "files"
| "fix"
| "issue"
| "review"
| "thing"
| "update"
)
})
.count();
meaningful_words <= 2 || intent.trim().chars().count() <= 18
}
pub(super) fn more_specific_query_example(intent: &str, file: Option<&str>) -> String {
if intent_looks_like_diff(intent) {
if let Some(file) = file.and_then(file_extension_hint) {
return format!("{file} review convention for the current diff");
}
return "review convention for the current diff".to_owned();
}
if let Some(file) = file.and_then(file_extension_hint) {
return format!("{file} review convention for {intent}");
}
format!("{intent} around validation, error handling, or team decisions")
}
fn file_extension_hint(file: &str) -> Option<&'static str> {
match std::path::Path::new(file)
.extension()
.and_then(|ext| ext.to_str())
.map(str::to_ascii_lowercase)
.as_deref()
{
Some("rs") => Some("Rust"),
Some("ts" | "tsx" | "js" | "jsx") => Some("TypeScript"),
Some("go") => Some("Go"),
Some("py") => Some("Python"),
Some("rb") => Some("Ruby"),
Some("java") => Some("Java"),
_ => None,
}
}
pub(super) fn recall_command(intent: &str, file: Option<&str>) -> String {
let mut command = format!("difflore recall {}", quote_cli_arg(intent));
if let Some(file) = file.map(str::trim).filter(|file| !file.is_empty()) {
command.push_str(" --file ");
command.push_str("e_cli_arg(file));
}
command
}
fn intent_looks_like_diff(intent: &str) -> bool {
let trimmed = intent.trim_start();
trimmed.starts_with("diff --git")
|| trimmed.contains("\n@@")
|| trimmed
.lines()
.any(|line| line.starts_with("--- a/") || line.starts_with("+++ b/"))
}
pub(super) fn recall_subject(intent: &str) -> String {
if intent_looks_like_diff(intent) {
"this diff".to_owned()
} else {
format!("\"{}\"", truncate_one_line(intent, 72))
}
}
fn quote_cli_arg(value: &str) -> String {
format!("\"{}\"", value.replace('\\', "\\\\").replace('"', "\\\""))
}
pub(super) fn strict_file_pattern_match(patterns: &[String], file: Option<&str>) -> bool {
let Some(file) = file.map(str::trim).filter(|file| !file.is_empty()) else {
return false;
};
let normalised = file.trim_start_matches('/').replace('\\', "/");
patterns.iter().any(|pattern| {
let normalised_pattern = pattern.trim().trim_start_matches('/').replace('\\', "/");
if normalised_pattern.is_empty() {
return false;
}
Glob::new(&normalised_pattern).map_or_else(
|_| normalised_pattern == normalised,
|glob| glob.compile_matcher().is_match(&normalised),
)
})
}
pub(super) fn strict_pattern_match_any_file(patterns: &[String], files: &[String]) -> bool {
files
.iter()
.any(|file| strict_file_pattern_match(patterns, Some(file)))
}
pub(super) fn strict_scope_files(file: Option<&str>, diff_files: &[String]) -> Vec<String> {
if diff_files.is_empty() {
file.map(str::trim)
.filter(|file| !file.is_empty())
.map(|file| vec![file.to_owned()])
.unwrap_or_default()
} else {
diff_files.to_vec()
}
}
pub(super) fn local_rule_title(content: &str, fallback: &str) -> String {
search::rule_title(content, fallback)
}
pub(super) struct CloudRecallResult {
pub(super) logged_in: bool,
pub(super) repo_full_name: Option<String>,
pub(super) scope: &'static str,
pub(super) team_id: Option<String>,
pub(super) verdicts: Vec<PastVerdict>,
}
pub(super) fn source_label(verdict: &PastVerdict, repo: Option<&str>) -> Option<String> {
match (repo, verdict.source_pr_number) {
(Some(repo), Some(number)) => Some(format!("{repo}#{number}")),
(Some(repo), None) => Some(repo.to_owned()),
(None, Some(number)) => Some(format!("source PR #{number}")),
(None, None) => None,
}
}
pub(super) fn truncate_one_line(value: &str, max_chars: usize) -> String {
let cleaned = value.split_whitespace().collect::<Vec<_>>().join(" ");
if cleaned.chars().count() <= max_chars {
return cleaned;
}
let mut out: String = cleaned.chars().take(max_chars.saturating_sub(3)).collect();
out.push_str("...");
out
}
fn is_source_or_test_file(file: &str) -> bool {
let normalized = file.replace('\\', "/").to_ascii_lowercase();
let Some(ext) = std::path::Path::new(&normalized)
.extension()
.and_then(|ext| ext.to_str())
else {
return false;
};
crate::support::file_ext::is_source_code_extension(ext)
}
fn primary_recall_file(files: &[String]) -> Option<String> {
files
.iter()
.find(|p| is_source_or_test_file(p))
.or_else(|| files.iter().find(|p| is_manifest_or_lockfile(p)))
.or_else(|| files.iter().find(|p| is_reviewable_config_file(p)))
.or_else(|| files.first())
.cloned()
}
fn is_manifest_or_lockfile(file: &str) -> bool {
let normalized = file.replace('\\', "/").to_ascii_lowercase();
let basename = normalized.rsplit('/').next().unwrap_or(normalized.as_str());
matches!(
basename,
"package.json"
| "pnpm-lock.yaml"
| "package-lock.json"
| "yarn.lock"
| "bun.lockb"
| "cargo.toml"
| "cargo.lock"
| "go.mod"
| "go.sum"
| "gemfile"
| "gemfile.lock"
| "pyproject.toml"
| "poetry.lock"
| "requirements.txt"
| "pom.xml"
| "build.gradle"
| "build.gradle.kts"
) || basename.ends_with(".gemspec")
}
fn is_reviewable_config_file(file: &str) -> bool {
let normalized = file.replace('\\', "/").to_ascii_lowercase();
if normalized.starts_with(".github/workflows/") {
return true;
}
let basename = normalized.rsplit('/').next().unwrap_or(normalized.as_str());
matches!(
basename,
"dockerfile"
| "docker-compose.yml"
| "docker-compose.yaml"
| "tsconfig.json"
| "vite.config.ts"
| "vite.config.js"
| "webpack.config.js"
| "eslint.config.js"
| "eslint.config.mjs"
| ".eslintrc"
| ".eslintrc.json"
| ".prettierrc"
| "rubocop.yml"
| ".rubocop.yml"
)
}
#[cfg(test)]
mod tests {
use super::*;
use super::retrieval::{
ExampleSide, build_local_hits, classify_example_heading, content_only_file_scope_fallback,
divergent_example_lines, extract_rule_examples, filter_starter_by_relevance,
first_example_code_line, is_markdown_section_break,
};
use difflore_core::context::retrieval::{RenderedRuleExample, ScoredRuleChunk};
use difflore_core::skills::SearchSkillMeta;
#[test]
fn semantic_state_sha1_is_keyword_only_with_honest_note() {
let state = RecallSemanticState::from_kind(&ActiveEmbedderKind::Sha1);
assert!(!state.semantic, "SHA1 hash is not semantic");
assert_eq!(state.mode, "keyword");
let note = state.keyword_only_note().expect("keyword path has a note");
assert!(
note.contains("local keyword fallback"),
"note must name local keyword fallback: {note}"
);
assert!(note.contains("difflore cloud login"), "note: {note}");
assert!(note.contains("difflore embeddings setup"), "note: {note}");
}
#[test]
fn semantic_state_providers_are_semantic_with_no_note() {
let cloud = RecallSemanticState::from_kind(&ActiveEmbedderKind::Cloud);
assert!(cloud.semantic);
assert_eq!(cloud.mode, "cloud");
assert!(
cloud.keyword_only_note().is_none(),
"a semantic provider must not emit a keyword-only note"
);
let byok = RecallSemanticState::from_kind(&ActiveEmbedderKind::Byok {
provider_host: "api.openai.com".to_owned(),
model: "text-embedding-3-small".to_owned(),
dim: 1536,
});
assert!(byok.semantic);
assert_eq!(byok.mode, "byok");
assert!(byok.keyword_only_note().is_none());
}
#[test]
fn intent_looks_like_diff_requires_structural_markers() {
assert!(intent_looks_like_diff(
"diff --git a/src/lib.rs b/src/lib.rs\n@@ -1 +1 @@"
));
assert!(intent_looks_like_diff(
"--- a/src/lib.rs\n+++ b/src/lib.rs\n@@ -1 +1 @@"
));
assert!(!intent_looks_like_diff(
"Please review this long, multi-line intent.\nIt mentions +added and -removed concepts but is not a patch."
));
assert!(!intent_looks_like_diff(
&"explain the validation flow ".repeat(12)
));
}
#[test]
fn examples_plain_bad_good_blocks() {
let content = "Rule Name: Cap bodies\nType: review\n\n\
Some prose about limits.\n\n\
Bad:\ndata, _ := io.ReadAll(r.Body)\n\n\
Good:\nr.Body = http.MaxBytesReader(w, r.Body, max)";
let (bad, fix) = extract_rule_examples(content);
assert_eq!(bad.as_deref(), Some("data, _ := io.ReadAll(r.Body)"));
assert_eq!(
fix.as_deref(),
Some("r.Body = http.MaxBytesReader(w, r.Body, max)")
);
}
#[test]
fn examples_generated_examples_section_with_fences() {
let content = "# Avoid unbounded reads\n\nProse.\n\n\
### Examples\n\n\
❌ Bad:\n```go\nbody, _ := io.ReadAll(r.Body)\nuse(body)\n```\n\n\
✅ Good:\n```go\nr.Body = http.MaxBytesReader(w, r.Body, 10<<20)\nbody, err := io.ReadAll(r.Body)\n```\n";
let (bad, fix) = extract_rule_examples(content);
assert_eq!(bad.as_deref(), Some("body, _ := io.ReadAll(r.Body)"));
assert_eq!(
fix.as_deref(),
Some("r.Body = http.MaxBytesReader(w, r.Body, 10<<20)")
);
}
#[test]
fn examples_markdown_wrong_correct_headings_with_fences() {
let content = "# Switch defaults\n\n\
### ❌ Wrong\n```go\nswitch v {\ncase A:\n}\n```\n\n\
### ✅ Correct\n```go\nswitch v {\ncase A:\ndefault:\n\treturn fmt.Errorf(\"unhandled %v\", v)\n}\n```\n";
let (bad, fix) = extract_rule_examples(content);
assert_eq!(bad.as_deref(), Some("}"));
assert_eq!(fix.as_deref(), Some("default:"));
assert_ne!(bad, fix);
}
#[test]
fn examples_before_after_same_signature_surfaces_the_real_change() {
let content = "# Apply visibility\n\n\
### ❌ Bad\n```go\n\
func applyVisibility(opts *RequestOptions, visibility string) {\n\
\topts.Visibility = visibility\n\
}\n```\n\n\
### ✅ Fix\n```go\n\
func applyVisibility(opts *RequestOptions, visibility string) {\n\
\tif visibility == \"\" {\n\
\t\treturn\n\
\t}\n\
\topts.Visibility = visibility\n\
}\n```\n";
let (bad, fix) = extract_rule_examples(content);
assert_eq!(bad.as_deref(), Some("opts.Visibility = visibility"));
assert_eq!(fix.as_deref(), Some("if visibility == \"\" {"));
assert_ne!(bad, fix);
assert!(
!bad.as_deref().unwrap().contains("func applyVisibility"),
"bad must not be the shared function signature",
);
}
#[test]
fn examples_real_enum_switch_rule_diverges_past_shared_signature() {
let content = "# Enum Switch Must Have a Default Early-Return Clause\n\n\
## Bad\n\n\
```go\n\
func applyVisibility(opts *RequestOptions, visibility string) {\n\
\tswitch visibility {\n\
\tcase \"public\":\n\
\t\topts.Private = boolPtr(false)\n\
\t\topts.Internal = boolPtr(false)\n\
\tcase \"private\":\n\
\t\topts.Private = boolPtr(true)\n\
\t\topts.Internal = boolPtr(false)\n\
\tcase \"internal\":\n\
\t\topts.Private = boolPtr(false)\n\
\t\topts.Internal = boolPtr(true)\n\
\t// no default — unrecognized value silently sets nothing,\n\
\t// but pointer fields may already be non-nil from earlier code\n\
\t}\n\
}\n\
```\n\n\
## Good\n\n\
```go\n\
func applyVisibility(opts *RequestOptions, visibility string) {\n\
\tswitch visibility {\n\
\tcase \"public\":\n\
\t\topts.Private = boolPtr(false)\n\
\t\topts.Internal = boolPtr(false)\n\
\tcase \"private\":\n\
\t\topts.Private = boolPtr(true)\n\
\t\topts.Internal = boolPtr(false)\n\
\tcase \"internal\":\n\
\t\topts.Private = boolPtr(false)\n\
\t\topts.Internal = boolPtr(true)\n\
\tdefault:\n\
\t\treturn // unrecognized value: leave struct untouched\n\
\t}\n\
}\n\
```\n";
let (bad, fix) = extract_rule_examples(content);
assert_eq!(
bad.as_deref(),
Some("// no default — unrecognized value silently sets nothing,"),
);
assert_eq!(fix.as_deref(), Some("default:"));
assert_ne!(bad, fix);
assert!(!bad.as_deref().unwrap().contains("func applyVisibility"));
assert!(!fix.as_deref().unwrap().contains("func applyVisibility"));
}
#[test]
fn divergent_lines_keep_first_lines_when_first_lines_differ() {
let bad = "data, _ := io.ReadAll(r.Body)\nuse(data)";
let fix = "r.Body = http.MaxBytesReader(w, r.Body, max)\nbody, _ := io.ReadAll(r.Body)";
assert_eq!(
divergent_example_lines(Some(bad), Some(fix)),
(
Some("data, _ := io.ReadAll(r.Body)".to_owned()),
Some("r.Body = http.MaxBytesReader(w, r.Body, max)".to_owned()),
),
);
}
#[test]
fn divergent_lines_single_line_blocks_kept_as_is() {
let line = "switch v {";
assert_eq!(
divergent_example_lines(Some(line), Some(line)),
(Some("switch v {".to_owned()), Some("switch v {".to_owned())),
);
}
#[test]
fn divergent_lines_identical_blocks_keep_first_lines() {
let block = "func f() {\n\tdoThing()\n}";
assert_eq!(
divergent_example_lines(Some(block), Some(block)),
(Some("func f() {".to_owned()), Some("func f() {".to_owned())),
);
}
#[test]
fn divergent_lines_good_adds_trailing_lines_surfaces_added_line() {
let bad = "func f() {\n\tdoThing()";
let fix = "func f() {\n\tdoThing()\n\tdoExtra()\n}";
assert_eq!(
divergent_example_lines(Some(bad), Some(fix)),
(Some("func f() {".to_owned()), Some("doExtra()".to_owned())),
);
}
#[test]
fn divergent_lines_bad_has_extra_trailing_keeps_first_lines() {
let bad = "func f() {\n\tdoThing()\n\tleak()\n}";
let fix = "func f() {\n\tdoThing()";
assert_eq!(
divergent_example_lines(Some(bad), Some(fix)),
(Some("func f() {".to_owned()), Some("func f() {".to_owned())),
);
}
#[test]
fn divergent_lines_one_side_missing_returns_present_first_line() {
assert_eq!(
divergent_example_lines(Some("panic(err)\nmore()"), None),
(Some("panic(err)".to_owned()), None),
);
assert_eq!(
divergent_example_lines(None, Some("return nil\nmore()")),
(None, Some("return nil".to_owned())),
);
assert_eq!(divergent_example_lines(None, None), (None, None));
}
#[test]
fn examples_wrong_right_without_hashes() {
let content = "Use errors.Is for sentinel comparison.\n\n\
❌ Wrong\n```go\nif err == io.EOF {}\n```\n\n\
✅ Right\n```go\nif errors.Is(err, io.EOF) {}\n```\n";
let (bad, fix) = extract_rule_examples(content);
assert_eq!(bad.as_deref(), Some("if err == io.EOF {}"));
assert_eq!(fix.as_deref(), Some("if errors.Is(err, io.EOF) {}"));
}
#[test]
fn examples_bad_example_good_example_labels() {
let content = "## Examples\n\n\
Bad example:\n```ts\nconst x = await fetch(url)\n```\n\n\
Good example:\n```ts\nconst x = await fetchWithTimeout(url, 5000)\n```\n";
let (bad, fix) = extract_rule_examples(content);
assert_eq!(bad.as_deref(), Some("const x = await fetch(url)"));
assert_eq!(
fix.as_deref(),
Some("const x = await fetchWithTimeout(url, 5000)")
);
}
#[test]
fn examples_wrong_correct_colon_labels_no_fence() {
let content = "Prefer guard clauses.\n\n\
Wrong:\nif (ok) { doStuff(); }\n\n\
Correct:\nif (!ok) return;\ndoStuff();";
let (bad, fix) = extract_rule_examples(content);
assert_eq!(bad.as_deref(), Some("if (ok) { doStuff(); }"));
assert_eq!(fix.as_deref(), Some("if (!ok) return;"));
}
#[test]
fn examples_bad_fix_labels() {
let content = "Bad:\nx == nil\n\nFix:\nx.IsZero()";
let (bad, fix) = extract_rule_examples(content);
assert_eq!(bad.as_deref(), Some("x == nil"));
assert_eq!(fix.as_deref(), Some("x.IsZero()"));
}
#[test]
fn examples_anti_pattern_better_headings() {
let content = "### ❌ Anti-pattern: Separate state\n```rust\nlet a = 1;\n```\n\n\
### ✅ Better: Capture together\n```rust\nlet b = 2;\n```\n";
let (bad, fix) = extract_rule_examples(content);
assert_eq!(bad.as_deref(), Some("let a = 1;"));
assert_eq!(fix.as_deref(), Some("let b = 2;"));
}
#[test]
fn examples_none_when_no_recognizable_section() {
let content = "Rule Name: Be careful\nType: review\n\n\
Always validate user input before using it in a query. Discuss \
with the team if unsure. No code samples here.";
assert_eq!(extract_rule_examples(content), (None, None));
}
#[test]
fn examples_one_side_only_bad() {
let content = "### ❌ Wrong\n```go\npanic(err)\n```\n\nSome trailing prose, no good block.";
let (bad, fix) = extract_rule_examples(content);
assert_eq!(bad.as_deref(), Some("panic(err)"));
assert_eq!(fix, None);
}
#[test]
fn examples_one_side_only_fix() {
let content = "✅ Good:\n```go\nreturn fmt.Errorf(\"wrap: %w\", err)\n```\n";
let (bad, fix) = extract_rule_examples(content);
assert_eq!(bad, None);
assert_eq!(fix.as_deref(), Some("return fmt.Errorf(\"wrap: %w\", err)"));
}
#[test]
fn examples_skip_blank_lines_inside_fence() {
let content = "### ❌ Wrong\n```go\n\n\n actualCode()\n```\n";
let (bad, _fix) = extract_rule_examples(content);
assert_eq!(bad.as_deref(), Some("actualCode()"));
}
#[test]
fn examples_inline_bad_prose_is_not_a_heading() {
let content =
"Closing matters.\n\nBad: this silently leaks a file descriptor on the error path.";
assert_eq!(extract_rule_examples(content), (None, None));
}
#[test]
fn examples_inline_comment_markers_in_one_shared_fence() {
let content = "# Gate prefetch and prompt\n\n## Example\n\n\
```go\n\
// \u{274c} Bad \u{2014} two different conditions for the same decision\n\
if reviewerSearchFunc != nil {\n\
\tgo prefetchReviewers()\n\
}\n\
if useReviewerSearch {\n\
\tshowReviewerPrompt(prefetchedReviewers)\n\
}\n\n\
// \u{2705} Good \u{2014} single source of truth\n\
useReviewerSearch := reviewerSearchFunc != nil\n\
if useReviewerSearch {\n\
\tgo prefetchReviewers()\n\
}\n\
```\n\n\
## How to Apply\n\n\
When reviewing code that prefetches...\n";
let (bad, fix) = extract_rule_examples(content);
assert_eq!(bad.as_deref(), Some("if reviewerSearchFunc != nil {"));
assert_eq!(
fix.as_deref(),
Some("useReviewerSearch := reviewerSearchFunc != nil"),
"fix must be the good code line, never the trailing `## How to Apply` heading",
);
}
#[test]
fn first_example_code_line_stops_at_section_break() {
assert_eq!(
first_example_code_line("```\n```\n\n## How to Apply\nprose"),
None,
"a block with only a closing fence then a heading has no code line",
);
assert_eq!(
first_example_code_line("real_code()\n## Next"),
Some("real_code()".to_owned()),
);
assert_eq!(
first_example_code_line("## Heading first\ncode()"),
None,
"stop immediately at a leading heading",
);
}
#[test]
fn is_markdown_section_break_recognizes_headings_and_rules() {
assert!(is_markdown_section_break("# Title"));
assert!(is_markdown_section_break("### Examples"));
assert!(is_markdown_section_break("---"));
assert!(is_markdown_section_break("***"));
assert!(!is_markdown_section_break("let x = 1; // # not a heading"));
assert!(!is_markdown_section_break("#nohash"));
assert!(!is_markdown_section_break("#"));
assert!(!is_markdown_section_break("plain prose"));
}
#[test]
fn examples_section_heading_examples_is_not_a_side() {
assert_eq!(classify_example_heading("### Examples"), None);
assert_eq!(classify_example_heading("## Examples"), None);
assert_eq!(classify_example_heading("Examples"), None);
}
#[test]
fn classify_heading_is_case_and_decoration_insensitive() {
assert_eq!(classify_example_heading("BAD:"), Some(ExampleSide::Bad));
assert_eq!(
classify_example_heading("**Good example:**"),
Some(ExampleSide::Fix)
);
assert_eq!(
classify_example_heading("### ❌ wrong"),
Some(ExampleSide::Bad)
);
assert_eq!(
classify_example_heading("- Correct:"),
Some(ExampleSide::Fix)
);
assert_eq!(classify_example_heading("random line"), None);
assert_eq!(classify_example_heading(""), None);
}
#[test]
fn build_local_hits_populates_bad_fix_from_content() {
let scored = vec![ScoredRuleChunk {
skill_id: "r1".to_owned(),
content: "Rule Name: Cap bodies\n\n### ❌ Wrong\n```go\nio.ReadAll(r.Body)\n```\n\n### ✅ Correct\n```go\nhttp.MaxBytesReader(w, r.Body, max)\n```\n".to_owned(),
score: 0.9,
confidence: 0.8,
}];
let mut metas = std::collections::HashMap::new();
metas.insert(
"r1".to_owned(),
SearchSkillMeta {
file_patterns: vec!["**/*.go".to_owned()],
source_repo: Some("acme/widgets".to_owned()),
origin: Some("pr_review".to_owned()),
},
);
let hits = build_local_hits(&scored, &metas);
assert_eq!(hits.len(), 1);
assert_eq!(hits[0].origin.as_deref(), Some("pr_review"));
assert_eq!(
hits[0].source_rank,
Some(difflore_core::context::retrieval::source_rank("pr_review"))
);
assert_eq!(hits[0].bad.as_deref(), Some("io.ReadAll(r.Body)"));
assert_eq!(
hits[0].fix.as_deref(),
Some("http.MaxBytesReader(w, r.Body, max)")
);
}
#[test]
fn local_rules_json_includes_bad_fix() {
let local = LocalRecallResult {
rules_indexed: 1,
repo_full_name: Some("acme/widgets".to_owned()),
file_scope_fallback: false,
matches: vec![LocalRuleHit {
id: "rule-1".to_owned(),
title: "Cap bodies".to_owned(),
preview: "p".to_owned(),
bad: Some("io.ReadAll(r.Body)".to_owned()),
fix: Some("http.MaxBytesReader(...)".to_owned()),
rank_score: 1.0,
raw_score: 0.4,
confidence: 0.8,
file_patterns: vec!["**/*.go".to_owned()],
source_repo: Some("acme/widgets".to_owned()),
origin: Some("pr_review".to_owned()),
source_rank: Some(difflore_core::context::retrieval::source_rank("pr_review")),
body: None,
}],
trace: RecallTrace::default(),
};
let json = local_rules_json(&local, &["internal/x.go".to_owned()]);
assert_eq!(json["results"][0]["bad"], "io.ReadAll(r.Body)");
assert_eq!(json["results"][0]["fix"], "http.MaxBytesReader(...)");
let bare = LocalRecallResult {
rules_indexed: 1,
repo_full_name: Some("acme/widgets".to_owned()),
file_scope_fallback: false,
matches: vec![LocalRuleHit {
id: "rule-2".to_owned(),
title: "No examples".to_owned(),
preview: "p".to_owned(),
bad: None,
fix: None,
rank_score: 1.0,
raw_score: 0.4,
confidence: 0.8,
file_patterns: Vec::new(),
source_repo: None,
origin: None,
source_rank: None,
body: None,
}],
trace: RecallTrace::default(),
};
let bare_json = local_rules_json(&bare, &[]);
assert!(bare_json["results"][0]["bad"].is_null());
assert!(bare_json["results"][0]["fix"].is_null());
}
#[test]
fn local_rules_json_surfaces_full_body_for_recalled_rule_with_example() {
let local = LocalRecallResult {
rules_indexed: 1,
repo_full_name: Some("acme/widgets".to_owned()),
file_scope_fallback: false,
matches: vec![LocalRuleHit {
id: "rule-cap".to_owned(),
title: "Cap request bodies".to_owned(),
preview: "Rule Name: Cap request bodies".to_owned(),
bad: Some("data, _ := io.ReadAll(r.Body)".to_owned()),
fix: Some("r.Body = http.MaxBytesReader(w, r.Body, max)".to_owned()),
rank_score: 1.0,
raw_score: 0.42,
confidence: 0.82,
file_patterns: vec!["**/*.go".to_owned()],
source_repo: Some("acme/widgets".to_owned()),
origin: Some("pr_review".to_owned()),
source_rank: Some(difflore_core::context::retrieval::source_rank("pr_review")),
body: Some(RenderedRuleBody {
body: "## Rule rule-cap — Cap request bodies\n### Cases\n❌ Counter-example:\n```\ndata, _ := io.ReadAll(r.Body)\n```\n✅ Conforming:\n```\nr.Body = http.MaxBytesReader(w, r.Body, max)\n```\n".to_owned(),
origin: "pr_review".to_owned(),
confidence: 0.82,
trigger: Some("Touching an HTTP body read".to_owned()),
check: Some("Is the body capped before reading?".to_owned()),
examples: vec![RenderedRuleExample {
bad_code: "data, _ := io.ReadAll(r.Body)".to_owned(),
good_code: "r.Body = http.MaxBytesReader(w, r.Body, max)".to_owned(),
description: Some("reviewer flagged unbounded read".to_owned()),
}],
}),
}],
trace: RecallTrace::default(),
};
let json = local_rules_json(&local, &["internal/server.go".to_owned()]);
let result = &json["results"][0];
assert_eq!(result["skillId"], "rule-cap");
assert_eq!(result["bad"], "data, _ := io.ReadAll(r.Body)");
assert_eq!(
result["fix"],
"r.Body = http.MaxBytesReader(w, r.Body, max)"
);
assert!(!result["body"].is_null(), "body must be non-null");
assert!(
result["body"]
.as_str()
.expect("body string")
.contains("### Cases")
);
assert_eq!(
result["examples"][0]["badCode"],
"data, _ := io.ReadAll(r.Body)"
);
assert_eq!(
result["examples"][0]["goodCode"],
"r.Body = http.MaxBytesReader(w, r.Body, max)"
);
assert_eq!(result["origin"], "pr_review");
assert_eq!(result["sourceRank"], 2);
assert_eq!(result["check"], "Is the body capped before reading?");
assert_eq!(result["trigger"], "Touching an HTTP body read");
}
#[test]
fn local_rules_json_omits_body_fields_when_not_hydrated() {
let local = LocalRecallResult {
rules_indexed: 1,
repo_full_name: Some("acme/widgets".to_owned()),
file_scope_fallback: false,
matches: vec![LocalRuleHit {
id: "rule-unhydrated".to_owned(),
title: "Unhydrated".to_owned(),
preview: "p".to_owned(),
bad: None,
fix: None,
rank_score: 1.0,
raw_score: 0.2,
confidence: 0.7,
file_patterns: Vec::new(),
source_repo: None,
origin: None,
source_rank: None,
body: None,
}],
trace: RecallTrace::default(),
};
let json = local_rules_json(&local, &[]);
let result = &json["results"][0];
assert!(result.get("body").is_none(), "no body key when unhydrated");
assert!(result.get("examples").is_none());
assert_eq!(result["skillId"], "rule-unhydrated");
assert!(result["bad"].is_null());
}
#[test]
fn candidate_pool_size_never_panics_in_documented_range() {
for top_k in 1..=50usize {
let pool = candidate_pool_size(top_k);
assert!(pool >= top_k, "pool {pool} must be >= top_k {top_k}");
}
assert_eq!(candidate_pool_size(5), 20);
assert_eq!(candidate_pool_size(10), 40);
assert_eq!(candidate_pool_size(20), 40);
assert_eq!(candidate_pool_size(45), 45);
assert_eq!(candidate_pool_size(50), 50);
}
#[test]
fn cross_repo_starter_json_carries_attribution() {
let hits = vec![LocalRuleHit {
id: "r1".to_owned(),
title: "Return 413 for oversized bodies".to_owned(),
preview: "body...".to_owned(),
bad: Some("data, _ := io.ReadAll(r.Body)".to_owned()),
fix: Some("r.Body = http.MaxBytesReader(w, r.Body, max)".to_owned()),
rank_score: 0.9,
raw_score: 0.4,
confidence: 0.8,
file_patterns: vec!["**/*.go".to_owned()],
source_repo: Some("gin-gonic/gin".to_owned()),
origin: Some("pr_review".to_owned()),
source_rank: Some(difflore_core::context::retrieval::source_rank("pr_review")),
body: None,
}];
let json = cross_repo_starter_json(&hits);
let arr = json.as_array().expect("array");
assert_eq!(arr.len(), 1);
assert_eq!(arr[0]["sourceRepo"], "gin-gonic/gin");
assert_eq!(arr[0]["origin"], "pr_review");
assert_eq!(arr[0]["sourceRank"], 2);
assert_eq!(arr[0]["title"], "Return 413 for oversized bodies");
assert_eq!(arr[0]["filePatterns"][0], "**/*.go");
assert!(
cross_repo_starter_json(&[])
.as_array()
.expect("array")
.is_empty()
);
}
fn starter_hit(id: &str, raw_score: f64) -> LocalRuleHit {
LocalRuleHit {
id: id.to_owned(),
title: id.to_owned(),
preview: String::new(),
bad: None,
fix: None,
rank_score: 1.0,
raw_score,
confidence: 0.8,
file_patterns: vec!["**/*.go".to_owned()],
source_repo: Some("x/y".to_owned()),
origin: None,
source_rank: None,
body: None,
}
}
#[test]
fn starter_relevance_floor_drops_low_intent_hits() {
let hits = vec![
starter_hit("strong", 0.40),
starter_hit("weak", 0.05),
starter_hit("border", 0.12),
];
let kept = filter_starter_by_relevance(hits, 0.12);
let ids: Vec<&str> = kept.iter().map(|h| h.id.as_str()).collect();
assert_eq!(ids, vec!["strong", "border"]);
}
#[test]
fn starter_relevance_floor_empty_when_all_irrelevant() {
let hits = vec![starter_hit("a", 0.01), starter_hit("b", 0.08)];
assert!(filter_starter_by_relevance(hits, 0.12).is_empty());
}
#[test]
fn cross_repo_starter_json_carries_scores() {
let json = cross_repo_starter_json(&[starter_hit("r1", 0.37)]);
assert!((json[0]["rawScore"].as_f64().expect("rawScore") - 0.37).abs() < 1e-9);
assert!(json[0]["rankScore"].as_f64().is_some());
}
fn scored_chunk(id: &str, score: f64) -> ScoredRuleChunk {
ScoredRuleChunk {
skill_id: id.to_owned(),
content: format!("Rule ID: {id}\nRule Name: {id}\n\nbody"),
score,
confidence: 0.7,
}
}
#[test]
fn explicit_recall_gate_drops_all_weak_local_candidates() {
let mut scored = vec![
scored_chunk("noise-1", 0.004),
scored_chunk("noise-2", 0.003),
scored_chunk("noise-3", 0.0015),
];
difflore_core::context::retrieval::apply_explicit_recall_threshold(&mut scored);
assert!(
scored.is_empty(),
"all-weak local candidates must produce a zero-match recall"
);
}
#[test]
fn explicit_recall_gate_keeps_strong_local_match() {
let mut scored = vec![scored_chunk("strong", 0.32), scored_chunk("tail", 0.02)];
difflore_core::context::retrieval::apply_explicit_recall_threshold(&mut scored);
assert_eq!(
scored
.iter()
.map(|s| s.skill_id.as_str())
.collect::<Vec<_>>(),
vec!["strong"],
"the strong match survives; the far-weaker tail is dropped"
);
}
#[test]
fn build_local_hits_skips_chunks_with_missing_meta() {
let scored = vec![
ScoredRuleChunk {
skill_id: "live-rule".to_owned(),
content: "Rule Name: Live rule\nAlways assert status".to_owned(),
score: 0.9,
confidence: 0.8,
},
ScoredRuleChunk {
skill_id: "ghost-rule".to_owned(),
content: "Rule Name: Ghost rule\nDeleted skill row".to_owned(),
score: 0.5,
confidence: 0.6,
},
];
let mut metas = std::collections::HashMap::new();
metas.insert(
"live-rule".to_owned(),
SearchSkillMeta {
file_patterns: vec!["**/*.go".to_owned()],
source_repo: Some("acme/widgets".to_owned()),
origin: None,
},
);
let hits = build_local_hits(&scored, &metas);
assert_eq!(hits.len(), 1, "ghost rule with no meta must be skipped");
assert_eq!(hits[0].id, "live-rule");
assert!(
!hits.iter().any(|hit| hit.id == "ghost-rule"),
"stale chunk must not appear in hits",
);
}
#[test]
fn local_rules_json_uses_cli_only_shape() {
let local = LocalRecallResult {
rules_indexed: 3,
repo_full_name: Some("acme/widgets".to_owned()),
file_scope_fallback: true,
matches: vec![LocalRuleHit {
id: "rule-1".to_owned(),
title: "Prefer explicit status assertions".to_owned(),
preview: "Rule Name: Prefer explicit status assertions".to_owned(),
bad: Some("assert!(resp.ok())".to_owned()),
fix: Some("assert_eq!(resp.status(), 200)".to_owned()),
rank_score: 1.0,
raw_score: 0.42,
confidence: 0.7,
file_patterns: vec!["**/*.go".to_owned()],
source_repo: Some("acme/widgets".to_owned()),
origin: None,
source_rank: None,
body: None,
}],
trace: RecallTrace::default(),
};
let json = local_rules_json(&local, &["internal/server.go".to_owned()]);
assert_eq!(json["rulesIndexed"], 3);
assert_eq!(json["repoFullName"], "acme/widgets");
assert_eq!(json["fileScopeFallback"], true);
assert_eq!(json["results"][0]["skillId"], "rule-1");
assert_eq!(json["results"][0]["sourceRepo"], "acme/widgets");
assert_eq!(json["results"][0]["strictFileMatch"], true);
let no_match = local_rules_json(&local, &["README.md".to_owned()]);
assert_eq!(no_match["results"][0]["strictFileMatch"], false);
let changeset = local_rules_json(
&local,
&["README.md".to_owned(), "internal/server.go".to_owned()],
);
assert_eq!(changeset["results"][0]["strictFileMatch"], true);
}
#[test]
fn file_scoped_recall_marks_content_only_matches_as_fallback() {
let content_only = vec![LocalRuleHit {
id: "rule-global".to_owned(),
title: "Review: generic praise".to_owned(),
preview: "Rule Name: generic praise".to_owned(),
bad: None,
fix: None,
rank_score: 1.0,
raw_score: 0.2,
confidence: 0.6,
file_patterns: Vec::new(),
source_repo: Some("acme/widgets".to_owned()),
origin: None,
source_rank: None,
body: None,
}];
let strict = vec![LocalRuleHit {
id: "rule-scoped".to_owned(),
title: "Review: scoped".to_owned(),
preview: "Rule Name: scoped".to_owned(),
bad: None,
fix: None,
rank_score: 1.0,
raw_score: 0.2,
confidence: 0.6,
file_patterns: vec!["src/**/*.rs".to_owned()],
source_repo: Some("acme/widgets".to_owned()),
origin: None,
source_rank: None,
body: None,
}];
let scope = vec!["src/lib.rs".to_owned()];
assert!(content_only_file_scope_fallback(&content_only, &scope));
assert!(!content_only_file_scope_fallback(&strict, &scope));
assert!(!content_only_file_scope_fallback(&content_only, &[]));
let changeset = vec!["README.md".to_owned(), "src/lib.rs".to_owned()];
assert!(!content_only_file_scope_fallback(&strict, &changeset));
}
#[test]
fn zero_match_diagnostics_empty_corpus_points_to_import_and_accept() {
let local = LocalRecallResult {
rules_indexed: 0,
repo_full_name: Some("acme/widgets".to_owned()),
matches: Vec::new(),
file_scope_fallback: false,
trace: RecallTrace::default(),
};
let cloud = CloudRecallResult {
logged_in: false,
repo_full_name: Some("acme/widgets".to_owned()),
scope: "personal",
team_id: None,
verdicts: Vec::new(),
};
let diagnostics =
build_zero_match_diagnostics(&local, &cloud, "unwrap", Some("src/lib.rs"));
let json = recall_diagnostics_json(&diagnostics);
assert!(
diagnostics
.possible_causes
.iter()
.any(|cause| cause.code == "local_corpus_empty")
);
assert_eq!(
diagnostics.next_steps[0].command.as_deref(),
Some("difflore import-reviews --max-prs 50")
);
assert!(
diagnostics
.next_steps
.iter()
.any(|step| step.command.as_deref()
== Some("difflore import-reviews --max-prs 50"))
);
assert!(
diagnostics
.next_steps
.iter()
.all(|step| step.command.as_deref() != Some("difflore candidates accept --top 3"))
);
assert_eq!(json["possibleCauses"][0]["code"], "local_corpus_empty");
}
#[test]
fn zero_match_diagnostics_no_remote_points_to_git_remote_not_import() {
let local = LocalRecallResult {
rules_indexed: 0,
repo_full_name: None,
matches: Vec::new(),
file_scope_fallback: false,
trace: RecallTrace::default(),
};
let cloud = CloudRecallResult {
logged_in: false,
repo_full_name: None,
scope: "personal",
team_id: None,
verdicts: Vec::new(),
};
let diagnostics =
build_zero_match_diagnostics(&local, &cloud, "handle the 413 error path", None);
let json = recall_diagnostics_json(&diagnostics);
assert_eq!(
diagnostics.possible_causes[0].code, "repo_scope_missing",
"missing repo scope must be the primary cause for a no-remote checkout"
);
assert!(
diagnostics
.possible_causes
.iter()
.all(|cause| cause.code != "local_corpus_empty"),
"a no-remote checkout must not be mislabeled as an empty corpus"
);
assert!(
diagnostics
.next_steps
.iter()
.any(|step| step.command.as_deref() == Some("git remote -v")),
"the actionable step is adding a supported git remote"
);
assert!(
diagnostics
.next_steps
.iter()
.all(|step| step.command.as_deref() != Some("difflore import-reviews --max-prs 50")),
"import-reviews is not offered when there is no repo scope to attach rules to"
);
assert_eq!(json["possibleCauses"][0]["code"], "repo_scope_missing");
}
#[test]
fn zero_match_diagnostics_explain_path_hint_and_broad_query() {
let local = LocalRecallResult {
rules_indexed: 12,
repo_full_name: Some("acme/widgets".to_owned()),
matches: Vec::new(),
file_scope_fallback: false,
trace: RecallTrace::default(),
};
let cloud = CloudRecallResult {
logged_in: true,
repo_full_name: Some("acme/widgets".to_owned()),
scope: "personal",
team_id: None,
verdicts: Vec::new(),
};
let diagnostics =
build_zero_match_diagnostics(&local, &cloud, "fix bug", Some("web/app.tsx"));
assert!(
diagnostics
.possible_causes
.iter()
.any(|cause| cause.code == "file_path_hint")
);
assert!(
diagnostics
.possible_causes
.iter()
.any(|cause| cause.code == "query_too_broad")
);
assert!(
diagnostics
.next_steps
.iter()
.any(|step| { step.command.as_deref() == Some("difflore status") })
);
assert!(diagnostics.next_steps.iter().any(|step| {
step.command
.as_deref()
.is_some_and(|command| command.contains("TypeScript review convention"))
}));
}
#[test]
fn strict_file_pattern_match_requires_real_glob_match() {
let patterns = vec!["src/**/*.rs".to_owned()];
assert!(strict_file_pattern_match(&patterns, Some("src/lib.rs")));
assert!(!strict_file_pattern_match(&patterns, Some("README.md")));
assert!(!strict_file_pattern_match(&[], Some("src/lib.rs")));
assert!(!strict_file_pattern_match(&patterns, None));
}
#[test]
fn strict_file_pattern_match_falls_back_to_literal_for_invalid_globs() {
let patterns = vec!["src/[broken.rs".to_owned()];
assert!(strict_file_pattern_match(&patterns, Some("src/[broken.rs")));
assert!(!strict_file_pattern_match(&patterns, Some("src/other.rs")));
}
#[test]
fn strict_pattern_match_any_file_uses_any_path_semantics() {
let patterns = vec!["src/**/*.rs".to_owned()];
let hit = vec!["README.md".to_owned(), "src/lib.rs".to_owned()];
assert!(strict_pattern_match_any_file(&patterns, &hit));
let miss = vec!["README.md".to_owned(), "docs/usage.md".to_owned()];
assert!(!strict_pattern_match_any_file(&patterns, &miss));
assert!(!strict_pattern_match_any_file(&patterns, &[]));
assert!(!strict_pattern_match_any_file(&[], &hit));
}
#[test]
fn strict_scope_files_prefers_changeset_over_single_file() {
let diff_files = vec!["src/foo.rs".to_owned(), "src/bar.rs".to_owned()];
assert_eq!(
strict_scope_files(Some("src/foo.rs"), &diff_files),
diff_files,
);
assert_eq!(
strict_scope_files(Some("src/foo.rs"), &[]),
vec!["src/foo.rs".to_owned()],
);
assert!(strict_scope_files(None, &[]).is_empty());
assert!(strict_scope_files(Some(" "), &[]).is_empty());
}
#[test]
fn primary_recall_file_prefers_source_then_manifest_then_config() {
let source = vec![
".changeset/release.md".to_owned(),
"packages/app/package.json".to_owned(),
"src/lib.ts".to_owned(),
];
assert_eq!(primary_recall_file(&source).as_deref(), Some("src/lib.ts"));
let manifest = vec![
".changeset/release.md".to_owned(),
"examples/react/package.json".to_owned(),
"pnpm-lock.yaml".to_owned(),
];
assert_eq!(
primary_recall_file(&manifest).as_deref(),
Some("examples/react/package.json")
);
let config = vec![
"docs/usage.md".to_owned(),
".github/workflows/release.yml".to_owned(),
];
assert_eq!(
primary_recall_file(&config).as_deref(),
Some(".github/workflows/release.yml")
);
}
#[test]
fn source_label_prefers_repo_and_pr_number() {
let verdict = PastVerdict {
extraction_id: "extraction-1".to_owned(),
issue_text: "Use errors.Is".to_owned(),
code_snippet: String::new(),
status: "accepted".to_owned(),
reason: None,
similarity: 0.9,
created_at: "2026-05-06T00:00:00Z".to_owned(),
signature: None,
source_pr_number: Some(42),
source_pr_title: None,
source_pr_url: None,
};
assert_eq!(
source_label(&verdict, Some("acme/widgets")).as_deref(),
Some("acme/widgets#42"),
);
}
}