use difflore_core::context::retrieval::ScoredRuleChunk;
use difflore_core::context::types::{PastVerdict, PastVerdictScope};
use difflore_core::skills::SearchSkillMeta;
use crate::commands::util::project_path;
use crate::style::{self, sym};
use super::{
CloudRecallResult, CommandContext, DiagnosticItem, DiagnosticStep, LocalRecallResult,
LocalRuleHit, RecallDiagnostics, candidate_pool_size, local_rule_title,
more_specific_query_example, query_looks_broad, recall_command, recall_command_for_zero_match,
strict_file_pattern_match, truncate_one_line,
};
const RECALL_INDEX_EMBEDDING_TIMEOUT: std::time::Duration = std::time::Duration::from_millis(2500);
pub(super) async fn recall_local_rules(
ctx: &CommandContext,
intent: &str,
file: Option<&str>,
top_k: usize,
) -> LocalRecallResult {
let top_k = crate::commands::util::clamp_with_warn("--top-k", top_k, 1, 50, false);
let db = &ctx.db;
let rules = match difflore_core::context::rule_source::load_rules_from_db(db).await {
Ok(rules) => rules,
Err(error) => {
eprintln!(
"{} failed to load local rules: {error}",
style::err(sym::ERR)
);
return LocalRecallResult {
rules_indexed: 0,
repo_full_name: None,
matches: Vec::new(),
file_scope_fallback: false,
};
}
};
let mut rules_indexed = 0usize;
let detected_repo_full_names =
difflore_core::git::detect_github_repo_full_names(&project_path());
let repo_full_names = difflore_core::skills::expand_repo_scopes_with_source_aliases(
db,
&detected_repo_full_names,
)
.await
.unwrap_or(detected_repo_full_names);
let repo_full_name = repo_full_names.first().cloned();
let Some(primary_scope) = repo_full_name.clone() else {
return LocalRecallResult {
rules_indexed,
repo_full_name: None,
matches: Vec::new(),
file_scope_fallback: false,
};
};
let repo_scopes: Vec<String> = if repo_full_names.is_empty() {
vec![primary_scope.clone()]
} else {
repo_full_names.clone()
};
let index_pool = match difflore_core::context::index_db::get_pool_for_cwd().await {
Ok(pool) => pool,
Err(error) => {
eprintln!(
"{} failed to open local index DB: {error}",
style::err(sym::ERR)
);
return LocalRecallResult {
rules_indexed,
repo_full_name: None,
matches: Vec::new(),
file_scope_fallback: false,
};
}
};
match difflore_core::context::orchestrator::ensure_rules_indexed_for_repo_scopes_with_embedding_timeout(
db,
&index_pool,
&repo_scopes,
Some(RECALL_INDEX_EMBEDDING_TIMEOUT),
)
.await
{
Ok(count) => rules_indexed = count,
Err(error) => {
eprintln!(
"{} failed to refresh local rule index: {error}",
style::err(sym::ERR)
);
}
}
let query = match file {
Some(file) => format!("{file} {intent}"),
None => intent.to_owned(),
};
let ranking_inputs = difflore_core::context::rule_source::load_rule_ranking_inputs(db).await;
let pool_k = candidate_pool_size(top_k);
let scored = match crate::commands::search::retrieve_rules_for_search(
&index_pool,
&query,
intent,
pool_k,
ranking_inputs.confidence_map.as_ref(),
ranking_inputs.age_days_map.as_ref(),
file,
repo_scopes.as_slice(),
)
.await
{
Ok(scored) => scored,
Err(error) => {
eprintln!(
"{} local rule retrieval failed: {error}",
style::err(sym::ERR)
);
Vec::new()
}
};
let mut scored = crate::commands::search::merge_exact_title_matches(
&rules,
intent,
repo_scopes.as_slice(),
scored,
pool_k,
);
difflore_core::context::retrieval::apply_intent_alignment_gate(&mut scored, intent);
difflore_core::context::retrieval::apply_explicit_recall_threshold(&mut scored);
let ids: Vec<String> = scored.iter().map(|hit| hit.skill_id.clone()).collect();
let metas = difflore_core::skills::fetch_search_meta(db, &ids).await;
let mut hits = build_local_hits(&scored, &metas);
if file.is_some() {
hits.sort_by(|a, b| {
let a_strict = strict_file_pattern_match(&a.file_patterns, file);
let b_strict = strict_file_pattern_match(&b.file_patterns, file);
b_strict.cmp(&a_strict)
});
}
hits.truncate(top_k);
hydrate_full_rule_bodies(db, &mut hits).await;
let file_scope_fallback = content_only_file_scope_fallback(&hits, file);
LocalRecallResult {
rules_indexed,
repo_full_name,
matches: hits,
file_scope_fallback,
}
}
pub(super) fn content_only_file_scope_fallback(hits: &[LocalRuleHit], file: Option<&str>) -> bool {
file.is_some()
&& !hits.is_empty()
&& !hits
.iter()
.any(|hit| strict_file_pattern_match(&hit.file_patterns, file))
}
pub(super) fn build_local_hits(
scored: &[ScoredRuleChunk],
metas: &std::collections::HashMap<String, SearchSkillMeta>,
) -> Vec<LocalRuleHit> {
let max_score = scored
.iter()
.map(|hit| hit.score)
.fold(f64::NEG_INFINITY, f64::max);
scored
.iter()
.filter_map(|hit| {
let meta = metas.get(&hit.skill_id)?;
let rank_score = if max_score > 0.0 {
hit.score / max_score
} else {
0.0
};
let (bad, fix) = extract_rule_examples(&hit.content);
Some(LocalRuleHit {
id: hit.skill_id.clone(),
title: local_rule_title(&hit.content, &hit.skill_id),
preview: truncate_one_line(&hit.content, 200),
bad,
fix,
rank_score,
raw_score: hit.score,
confidence: hit.confidence,
file_patterns: meta.file_patterns.clone(),
source_repo: meta.source_repo.clone(),
body: None,
})
})
.collect()
}
pub(super) async fn hydrate_full_rule_bodies(
db: &difflore_core::SqlitePool,
hits: &mut [LocalRuleHit],
) {
if hits.is_empty() {
return;
}
let ids: Vec<String> = hits.iter().map(|hit| hit.id.clone()).collect();
let mut bodies = difflore_core::context::retrieval::render_full_rule_bodies(db, &ids)
.await
.unwrap_or_default();
for hit in hits.iter_mut() {
let Some(rendered) = bodies.remove(&hit.id) else {
continue;
};
let db_bad = rendered.first_bad_code();
let db_fix = rendered.first_good_code();
if db_bad.is_some() || db_fix.is_some() {
let (bad_line, fix_line) =
divergent_example_lines(db_bad.as_deref(), db_fix.as_deref());
if bad_line.is_some() {
hit.bad = bad_line;
}
if fix_line.is_some() {
hit.fix = fix_line;
}
}
hit.body = Some(rendered);
}
}
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
pub(super) enum ExampleSide {
Bad,
Fix,
}
pub(super) fn classify_example_heading(line: &str) -> Option<ExampleSide> {
let trimmed = line.trim();
let decorated = trimmed.starts_with('#')
|| trimmed.starts_with('*')
|| trimmed.starts_with('-')
|| trimmed.starts_with('>')
|| trimmed.contains('❌')
|| trimmed.contains('✅');
let stripped: String = trimmed
.trim_start_matches(['#', '*', '-', '>', ' '])
.chars()
.map(|c| {
if c == '❌' || c == '✅' || c == '*' || c == '`' {
' '
} else {
c
}
})
.collect();
let lower = stripped.to_ascii_lowercase();
let tokens: Vec<&str> = lower
.split(|c: char| !c.is_ascii_alphanumeric())
.filter(|t| !t.is_empty())
.collect();
let [first, rest @ ..] = tokens.as_slice() else {
return None;
};
let rest_is_qualifier_only = rest.iter().all(|t| {
matches!(
*t,
"example" | "examples" | "code" | "way" | "approach" | "pattern"
)
});
if !decorated && !rest_is_qualifier_only {
return None;
}
match *first {
"bad" | "wrong" | "incorrect" | "anti" | "antipattern" => Some(ExampleSide::Bad),
"good" | "correct" | "right" | "fix" | "fixed" | "better" => Some(ExampleSide::Fix),
_ => None,
}
}
pub(super) fn meaningful_example_code_lines(block: &str) -> Vec<String> {
let mut lines = Vec::new();
for raw in block.lines() {
let trimmed = raw.trim();
if is_markdown_section_break(trimmed) {
break;
}
if trimmed.is_empty()
|| trimmed.starts_with("```")
|| trimmed.starts_with("~~~")
|| trimmed == "-"
|| trimmed == "*"
{
continue;
}
lines.push(trimmed.to_owned());
}
lines
}
#[cfg(test)]
pub(super) fn first_example_code_line(block: &str) -> Option<String> {
meaningful_example_code_lines(block).into_iter().next()
}
pub(super) fn is_markdown_section_break(trimmed: &str) -> bool {
if trimmed == "---" || trimmed == "***" || trimmed == "___" {
return true;
}
let hashes = trimmed.chars().take_while(|&c| c == '#').count();
(1..=6).contains(&hashes)
&& trimmed[hashes..]
.chars()
.next()
.is_some_and(char::is_whitespace)
}
pub(super) fn extract_rule_examples(content: &str) -> (Option<String>, Option<String>) {
let lines: Vec<&str> = content.lines().collect();
let mut headings: Vec<(usize, ExampleSide)> = Vec::new();
for (idx, line) in lines.iter().enumerate() {
if let Some(side) = classify_example_heading(line) {
headings.push((idx, side));
}
}
if headings.is_empty() {
return (None, None);
}
let mut bad_block: Option<String> = None;
let mut fix_block: Option<String> = None;
for (n, &(start, side)) in headings.iter().enumerate() {
let end = headings
.get(n + 1)
.map_or(lines.len(), |&(next_start, _)| next_start);
let block = lines[start + 1..end].join("\n");
match side {
ExampleSide::Bad if bad_block.is_none() => bad_block = Some(block),
ExampleSide::Fix if fix_block.is_none() => fix_block = Some(block),
_ => {}
}
}
divergent_example_lines(bad_block.as_deref(), fix_block.as_deref())
}
pub(super) fn divergent_example_lines(
bad_block: Option<&str>,
fix_block: Option<&str>,
) -> (Option<String>, Option<String>) {
let bad_lines = bad_block.map(meaningful_example_code_lines);
let fix_lines = fix_block.map(meaningful_example_code_lines);
let bad_first = bad_lines.as_ref().and_then(|l| l.first().cloned());
let fix_first = fix_lines.as_ref().and_then(|l| l.first().cloned());
let (Some(bad_lines), Some(fix_lines)) = (bad_lines, fix_lines) else {
return (bad_first, fix_first);
};
let (Some(bad_head), Some(fix_head)) = (bad_first.clone(), fix_first.clone()) else {
return (bad_first, fix_first);
};
if bad_head.trim() != fix_head.trim() {
return (Some(bad_head), Some(fix_head));
}
let mut i = 0;
while i < bad_lines.len() && i < fix_lines.len() && bad_lines[i].trim() == fix_lines[i].trim() {
i += 1;
}
match (bad_lines.get(i), fix_lines.get(i)) {
(Some(bad_div), Some(fix_div)) => (Some(bad_div.clone()), Some(fix_div.clone())),
(None, Some(fix_div)) => (Some(bad_head), Some(fix_div.clone())),
(Some(_) | None, None) => (Some(bad_head), Some(fix_head)),
}
}
const STARTER_RELEVANCE_FLOOR: f64 = 0.12;
pub(super) fn filter_starter_by_relevance(
hits: Vec<LocalRuleHit>,
floor: f64,
) -> Vec<LocalRuleHit> {
hits.into_iter()
.filter(|hit| hit.raw_score >= floor)
.collect()
}
pub(super) async fn cross_repo_starter_hits(
ctx: &CommandContext,
intent: &str,
file: &str,
top_k: usize,
) -> Vec<LocalRuleHit> {
let db = &ctx.db;
let Ok(starter_pool) =
difflore_core::context::orchestrator::ensure_cross_repo_starter_indexed(db).await
else {
return Vec::new();
};
let query = format!("{file} {intent}");
let ranking_inputs = difflore_core::context::rule_source::load_rule_ranking_inputs(db).await;
let pool_k = candidate_pool_size(top_k);
let Ok(scored) = crate::commands::search::retrieve_rules_for_search(
&starter_pool,
&query,
intent,
pool_k,
ranking_inputs.confidence_map.as_ref(),
ranking_inputs.age_days_map.as_ref(),
Some(file),
&[],
)
.await
else {
return Vec::new();
};
let ids: Vec<String> = scored.iter().map(|hit| hit.skill_id.clone()).collect();
let metas = difflore_core::skills::fetch_search_meta(db, &ids).await;
let mut hits = build_local_hits(&scored, &metas);
hits.retain(|hit| strict_file_pattern_match(&hit.file_patterns, Some(file)));
let mut hits = filter_starter_by_relevance(hits, STARTER_RELEVANCE_FLOOR);
hits.truncate(top_k);
hits
}
pub(super) async fn record_local_recall(
ctx: &CommandContext,
local: &LocalRecallResult,
intent: &str,
file: Option<&str>,
top_k: usize,
session_id: &str,
) {
if local.matches.is_empty() {
return;
}
let db = &ctx.db;
let query = match file {
Some(file) => format!("{file} {intent}"),
None => intent.to_owned(),
};
let recalls: Vec<_> = local
.matches
.iter()
.enumerate()
.map(
|(index, hit)| difflore_core::rule_outcomes::RuleRecallInput {
rule_id: hit.id.as_str(),
session_id: Some(session_id),
repo_full_name: local.repo_full_name.as_deref(),
file_path: file,
query_text: query.as_str(),
rank: index as i64 + 1,
top_k: top_k as i64,
strict_file_match: strict_file_pattern_match(&hit.file_patterns, file),
},
)
.collect();
let _ = difflore_core::rule_outcomes::record_recalled_with_context(db, &recalls).await;
let ids: Vec<String> = local.matches.iter().map(|hit| hit.id.clone()).collect();
emit_rule_fired_observation(ctx, &ids, intent, file, session_id).await;
}
pub(super) fn build_zero_match_diagnostics(
local: &LocalRecallResult,
cloud: &CloudRecallResult,
intent: &str,
file: Option<&str>,
) -> RecallDiagnostics {
let mut possible_causes = Vec::new();
let mut next_steps = Vec::new();
let no_scope = local.repo_full_name.is_none();
let empty_corpus = !no_scope && local.rules_indexed == 0;
if no_scope {
possible_causes.push(DiagnosticItem {
code: "repo_scope_missing",
message: "No GitHub origin/upstream remote was detected; local recall scopes rules by repo, so an unscoped checkout retrieves nothing. This is by design, not an empty corpus.".to_owned(),
});
} else if empty_corpus {
possible_causes.push(DiagnosticItem {
code: "local_corpus_empty",
message: "No accepted local rules are indexed for this repo yet, so offline recall has nothing to retrieve.".to_owned(),
});
} else {
possible_causes.push(DiagnosticItem {
code: "repo_scoped_no_overlap",
message: format!(
"{} local rule{} exist for this repo scope, but none overlapped the query strongly enough.",
local.rules_indexed,
if local.rules_indexed == 1 { "" } else { "s" },
),
});
}
if let Some(file) = file.map(str::trim).filter(|file| !file.is_empty()) {
possible_causes.push(DiagnosticItem {
code: "file_pattern_scope",
message: format!(
"`{file}` may not match the accepted rules' file patterns, or the file scope may be narrower than the memory you need."
),
});
next_steps.push(DiagnosticStep {
command: Some(recall_command_for_zero_match(intent, None)),
message: "retry without the file scope to test whether file patterns are filtering out relevant memory".to_owned(),
});
} else {
possible_causes.push(DiagnosticItem {
code: "no_file_scope",
message: "Most review memory is scoped to file patterns, so a bare query often matches nothing without a file to anchor it.".to_owned(),
});
next_steps.push(DiagnosticStep {
command: Some(recall_command(intent, Some("path/to/file"))),
message: "add --file <path> so DiffLore can match the rules scoped to that file"
.to_owned(),
});
}
if query_looks_broad(intent) {
possible_causes.push(DiagnosticItem {
code: "query_too_broad",
message: "The query is broad; recall works best with review-language details like API names, failure modes, or the convention being checked.".to_owned(),
});
next_steps.push(DiagnosticStep {
command: Some(recall_command(
&more_specific_query_example(intent, file),
file,
)),
message: "retry with a more specific review phrase".to_owned(),
});
}
if !cloud.logged_in {
possible_causes.push(DiagnosticItem {
code: "cloud_not_logged_in",
message: "Cloud review memory was skipped because you are not logged in.".to_owned(),
});
} else if cloud.repo_full_name.is_none() {
possible_causes.push(DiagnosticItem {
code: "cloud_repo_scope_missing",
message: "Cloud review memory was skipped because no GitHub repo remote was detected."
.to_owned(),
});
} else {
possible_causes.push(DiagnosticItem {
code: "cloud_no_overlap",
message: "Cloud review memory did not find an imported PR review verdict for this repo, file, and query.".to_owned(),
});
}
if no_scope {
next_steps.push(DiagnosticStep {
command: Some("git remote -v".to_owned()),
message: "local recall is repo-scoped; add a GitHub origin/upstream remote (or run inside a repo that has one) so this checkout has memory to retrieve".to_owned(),
});
} else if empty_corpus {
next_steps.push(DiagnosticStep {
command: Some("difflore import-reviews --max-prs 50".to_owned()),
message: "create local memories from recent PR review history".to_owned(),
});
} else {
next_steps.push(DiagnosticStep {
command: Some("difflore status".to_owned()),
message: "inspect local memory readiness and the current next action".to_owned(),
});
next_steps.push(DiagnosticStep {
command: Some("difflore import-reviews --max-prs 50".to_owned()),
message:
"mine more review history if the current repo has no memory for this topic yet"
.to_owned(),
});
}
if empty_corpus {
prioritize_empty_corpus_steps(&mut next_steps);
}
RecallDiagnostics {
summary: "No local rules or cloud review memories matched; recall ran, but the available memory did not overlap this scope.".to_owned(),
possible_causes,
next_steps,
}
}
pub(super) fn prioritize_empty_corpus_steps(next_steps: &mut [DiagnosticStep]) {
next_steps.sort_by_key(|step| match step.command.as_deref() {
Some("difflore import-reviews --max-prs 50") => 0,
_ => 3,
});
}
pub(super) async fn emit_rule_fired_observation(
ctx: &CommandContext,
rule_ids: &[String],
intent: &str,
file: Option<&str>,
session_id: &str,
) {
if rule_ids.is_empty() {
return;
}
let client = ctx.cloud().await;
let event = difflore_core::cloud::observations::ObservationEvent::RuleFired {
rule_ids: rule_ids.iter().take(10).cloned().collect(),
file_path: file.map(ToOwned::to_owned),
intent: Some(intent.to_owned()),
session_id: session_id.to_owned(),
fired_at: chrono::Utc::now(),
};
let _ = difflore_core::cloud::observations::enqueue_and_flush_default(event, client).await;
}
pub(super) async fn recall_cloud_review_memory(
ctx: &CommandContext,
intent: &str,
file: Option<&str>,
top_k: usize,
) -> CloudRecallResult {
let client = ctx.cloud().await;
let has_saved_token = client.is_logged_in();
let detected_repo_full_names =
difflore_core::git::detect_github_repo_full_names(&project_path());
let repo_full_names = difflore_core::skills::expand_repo_scopes_with_source_aliases(
&ctx.db,
&detected_repo_full_names,
)
.await
.unwrap_or(detected_repo_full_names);
let repo_full_name = repo_full_names.first().cloned();
if !has_saved_token {
return CloudRecallResult {
logged_in: false,
repo_full_name,
scope: PastVerdictScope::Personal.as_str(),
team_id: None,
verdicts: Vec::new(),
};
}
let cloud_status = difflore_core::cloud::sync::fetch_cloud_status(client).await;
if !cloud_status.logged_in {
return CloudRecallResult {
logged_in: false,
repo_full_name,
scope: PastVerdictScope::Personal.as_str(),
team_id: None,
verdicts: Vec::new(),
};
}
let team_id = cloud_status.team_id.clone();
let scope = if team_id.is_some() {
PastVerdictScope::Team
} else {
PastVerdictScope::Personal
};
let top_k = crate::commands::util::clamp_with_warn("--top-k", top_k, 1, 10, false);
if repo_full_names.is_empty() {
return CloudRecallResult {
logged_in: true,
repo_full_name,
scope: scope.as_str(),
team_id,
verdicts: Vec::new(),
};
}
let repos: Vec<String> = repo_full_names.iter().take(4).cloned().collect();
let groups = match repos.as_slice() {
[] => Vec::new(),
[repo] => {
vec![
recall_cloud_repo_verdicts(
client,
intent,
file,
top_k,
repo,
scope,
team_id.as_deref(),
)
.await,
]
}
[repo_a, repo_b] => {
let (a, b) = tokio::join!(
recall_cloud_repo_verdicts(
client,
intent,
file,
top_k,
repo_a,
scope,
team_id.as_deref()
),
recall_cloud_repo_verdicts(
client,
intent,
file,
top_k,
repo_b,
scope,
team_id.as_deref()
)
);
vec![a, b]
}
[repo_a, repo_b, repo_c] => {
let (a, b, c) = tokio::join!(
recall_cloud_repo_verdicts(
client,
intent,
file,
top_k,
repo_a,
scope,
team_id.as_deref()
),
recall_cloud_repo_verdicts(
client,
intent,
file,
top_k,
repo_b,
scope,
team_id.as_deref()
),
recall_cloud_repo_verdicts(
client,
intent,
file,
top_k,
repo_c,
scope,
team_id.as_deref()
)
);
vec![a, b, c]
}
[repo_a, repo_b, repo_c, repo_d, ..] => {
let (a, b, c, d) = tokio::join!(
recall_cloud_repo_verdicts(
client,
intent,
file,
top_k,
repo_a,
scope,
team_id.as_deref()
),
recall_cloud_repo_verdicts(
client,
intent,
file,
top_k,
repo_b,
scope,
team_id.as_deref()
),
recall_cloud_repo_verdicts(
client,
intent,
file,
top_k,
repo_c,
scope,
team_id.as_deref()
),
recall_cloud_repo_verdicts(
client,
intent,
file,
top_k,
repo_d,
scope,
team_id.as_deref()
)
);
vec![a, b, c, d]
}
};
let mut seen = std::collections::HashSet::new();
let mut verdicts: Vec<PastVerdict> = Vec::new();
for group in groups {
for v in group {
if seen.insert(v.extraction_id.clone()) {
verdicts.push(v);
}
}
}
verdicts.truncate(top_k);
CloudRecallResult {
logged_in: true,
repo_full_name,
scope: scope.as_str(),
team_id,
verdicts,
}
}
pub(super) async fn recall_cloud_repo_verdicts(
client: &difflore_core::cloud::client::CloudClient,
intent: &str,
file: Option<&str>,
top_k: usize,
repo: &str,
scope: PastVerdictScope,
team_id: Option<&str>,
) -> Vec<PastVerdict> {
difflore_core::context::retrieval::retrieve_past_verdicts_by_text_with_team(
client,
intent,
Some(repo),
scope,
top_k as u32,
file,
team_id,
)
.await
}