use serde_json::Value;
use sqlx::SqlitePool;
use std::collections::HashMap;
use std::path::PathBuf;
use std::sync::{Mutex, OnceLock};
use tokio::io::{AsyncBufReadExt, AsyncWriteExt, BufReader};
use crate::cloud::client::CloudClient;
use crate::context::{EmbeddingDiagnostics, gather_embedding_diagnostics_with_activity};
use crate::errors::CoreError;
use crate::review_trajectory::TrajectoryStep;
pub async fn run(db: SqlitePool) -> Result<(), Box<dyn std::error::Error>> {
let cloud = CloudClient::create().await;
let state = McpState {
db,
cloud,
index_pool: None,
};
let stdin = tokio::io::stdin();
let mut stdout = tokio::io::stdout();
let mut reader = BufReader::new(stdin);
let mut line = String::new();
loop {
line.clear();
let n = reader.read_line(&mut line).await?;
if n == 0 {
break;
}
let trimmed = line.trim();
if trimmed.is_empty() {
continue;
}
let msg: Value = match serde_json::from_str(trimmed) {
Ok(v) => v,
Err(e) => {
let err = jsonrpc_error(Value::Null, -32700, &format!("Parse error: {e}"));
let out = jsonrpc_line_bytes(&err);
stdout.write_all(&out).await?;
stdout.flush().await?;
continue;
}
};
if let Some(response) = handle_message(&state, &msg).await {
let out = jsonrpc_line_bytes(&response);
stdout.write_all(&out).await?;
stdout.flush().await?;
}
}
Ok(())
}
fn jsonrpc_line_bytes(value: &Value) -> Vec<u8> {
match serde_json::to_vec(value) {
Ok(mut out) => {
out.push(b'\n');
out
}
Err(e) => {
eprintln!("[difflore-mcp] failed to serialize JSON-RPC response: {e}");
b"{\"jsonrpc\":\"2.0\",\"id\":null,\"error\":{\"code\":-32603,\"message\":\"serialize failed\"}}\n".to_vec()
}
}
}
#[derive(Debug, Clone)]
pub struct HookRuleContext {
pub rendered: String,
pub rules_injected: usize,
pub rule_ids: Vec<String>,
}
fn hook_embedding_health_header(diag: &EmbeddingDiagnostics) -> String {
if !diag.degraded && diag.vector_lane_available {
return String::new();
}
let reason = diag
.degraded_reason
.as_deref()
.unwrap_or("unknown_embedding_state");
format!(
"> DiffLore retrieval health: embeddingDegraded={} vectorLaneAvailable={} reason={reason}. \
Treat injected memories as lower-confidence unless strict file/source evidence applies.\n\n",
diag.degraded, diag.vector_lane_available
)
}
const CROSS_REPO_STARTER_HOOK_TOP_K: usize = 3;
pub async fn fetch_relevant_rules_for_hook(
db: &SqlitePool,
index_pool: &SqlitePool,
file: &str,
intent: &str,
session_id: Option<&str>,
) -> Result<HookRuleContext, CoreError> {
let trace = crate::env::trace_hook();
let started = std::time::Instant::now();
let mut last = started;
let mut mark = |label: &str| {
if trace {
let now = std::time::Instant::now();
eprintln!(
"[difflore.hook.trace] {label}: +{}ms total={}ms",
now.duration_since(last).as_millis(),
now.duration_since(started).as_millis()
);
last = now;
}
};
let ext_key = super::hook_short_circuit::extension_key(file);
let short_circuit_mode = crate::env::hook_short_circuit_mode();
let short_circuit_cache = super::hook_short_circuit::global_cache();
let is_post_edit_path = intent != "pre-read";
let short_circuit_now = is_post_edit_path
&& !ext_key.is_empty()
&& match short_circuit_mode {
crate::env::HookShortCircuitMode::Off => false,
crate::env::HookShortCircuitMode::Force => true,
crate::env::HookShortCircuitMode::Auto => {
short_circuit_cache.should_short_circuit(&ext_key)
}
};
if short_circuit_now {
if trace {
eprintln!(
"[difflore.hook.trace] short_circuit ext={ext_key} mode={short_circuit_mode:?} elapsed=0ms"
);
}
return Ok(HookRuleContext {
rendered: String::new(),
rules_injected: 0,
rule_ids: Vec::new(),
});
}
let query = format!("{file} {intent}");
let detected_repos = detect_git_remote_owner_repos();
let repo_scopes = crate::skills::expand_repo_scopes_with_source_aliases(db, &detected_repos)
.await
.unwrap_or(detected_repos);
let scoped_count = if repo_scopes.is_empty() {
0
} else {
crate::context::orchestrator::ensure_rules_indexed_for_repo_scopes_with_embedding_timeout(
db,
index_pool,
&repo_scopes,
Some(std::time::Duration::from_millis(800)),
)
.await
.map_err(|e| CoreError::Internal(format!("hook rule index rebuild failed: {e}")))?
};
mark("ensure_rules_indexed");
let embedding_diag = gather_embedding_diagnostics_with_activity(index_pool).await;
mark("embedding_diagnostics");
let target_file = if file == "unknown" { None } else { Some(file) };
let ranking_inputs = crate::context::rule_source::load_rule_ranking_inputs(db).await;
mark("load_rule_ranking_inputs");
let hook_top_k =
super::recall_sampler::maybe_bump_top_k(5usize, crate::env::deep_recall_sample_rate());
let candidate_limit = hook_top_k.saturating_mul(5).clamp(hook_top_k, 50);
let mut scored = tools::util::retrieve_rules_with_repo_scopes(
index_pool,
tools::util::RetrieveRulesArgs {
query: &query,
lexical_query: None,
top_k: candidate_limit,
target_file,
repo_scopes: &repo_scopes,
confidence_map: ranking_inputs.confidence_map.as_ref(),
age_days_map: ranking_inputs.age_days_map.as_ref(),
ann_enabled: true,
embedding_timeout: Some(std::time::Duration::from_millis(800)),
adaptive_prune: true,
},
)
.await?;
mark("retrieve_rules");
const HOOK_MIN_RAW_SCORE: f64 = 0.005;
scored.retain(|r| r.score >= HOOK_MIN_RAW_SCORE);
let candidate_ids: Vec<String> = scored.iter().map(|s| s.skill_id.clone()).collect();
let meta_map = tools::util::fetch_skills_by_ids(db, &candidate_ids)
.await
.unwrap_or_default();
let strict_skill_ids = tools::util::strict_file_match_ids_for_meta(&meta_map, target_file);
scored = tools::util::rerank_scored_rule_chunks_for_mcp_by_strict_file_matches(
scored,
intent,
hook_top_k,
&strict_skill_ids,
);
let mut cross_repo_starter = false;
if scored.is_empty()
&& scoped_count == 0
&& crate::env::hook_cross_repo_starter_enabled()
&& let Some(tf) = target_file
{
let cross = tools::util::cross_repo_starter_scored(
db,
&query,
tf,
ranking_inputs.confidence_map.as_ref(),
ranking_inputs.age_days_map.as_ref(),
CROSS_REPO_STARTER_HOOK_TOP_K,
)
.await;
if !cross.is_empty() {
scored = cross;
cross_repo_starter = true;
}
mark("cross_repo_starter");
}
let (hook_label, hook_tool) = if intent == "pre-read" {
("pre-read", "hook_pre_read")
} else {
("post-edit", "hook_post_edit")
};
if scored.is_empty() {
let served_event = serve_and_record(
db,
RuleServe {
tool: hook_tool,
session_id,
event_session_id: session_id.unwrap_or("hook"),
repo_full_name: repo_scopes.first().map(String::as_str),
target_file,
query: &query,
rule_ids: &[],
top_k: i64::try_from(hook_top_k).unwrap_or(i64::MAX),
strict_match_count: 0,
estimated_tokens: 0,
},
None,
)
.await;
let _ = crate::cloud::observations::enqueue_default(served_event).await;
if is_post_edit_path && !ext_key.is_empty() {
short_circuit_cache.record(&ext_key, true);
}
return Ok(HookRuleContext {
rendered: String::new(),
rules_injected: 0,
rule_ids: Vec::new(),
});
}
let skill_ids_all: Vec<String> = scored.iter().map(|s| s.skill_id.clone()).collect();
let examples_fut = crate::context::rule_source::load_rule_examples_batch(db, &skill_ids_all);
let trust_evidence_fut =
super::trust_proof::fetch_default_cloud_top_rule_trust_evidence_for_hook();
let (examples_result, trust_evidence) = tokio::join!(examples_fut, trust_evidence_fut);
let examples_map = examples_result.unwrap_or_default();
mark("load_rule_examples_batch");
const HOOK_INJECTION_TOKEN_BUDGET: usize = 1500;
let mut text = hook_embedding_health_header(&embedding_diag);
if cross_repo_starter {
text.push_str(
"> No memory is scoped to THIS repo yet. The memories below are transferable rules \
from your OTHER repos, matched to this file — starter suggestions, not this repo's \
own judgment. Run `difflore import-reviews` to capture this repo's memory.\n\n",
);
}
let mut injected = 0usize;
let mut skill_ids: Vec<String> = Vec::with_capacity(scored.len());
let max_score_hot = scored
.iter()
.map(|r| r.score)
.fold(f64::NEG_INFINITY, f64::max);
for rule in &scored {
let rel = if max_score_hot > 0.0 {
rule.score / max_score_hot
} else {
0.0
};
let rule_text = render_rule_block(&RuleBlockArgs {
position: injected + 1,
rel,
rule,
trust_evidence: &trust_evidence,
examples: examples_map.get(&rule.skill_id),
example_bad_label: "- Bad:",
example_good_label: "- Good:",
});
let projected_tokens = estimate_tokens(&text) + estimate_tokens(&rule_text);
if injected > 0 && projected_tokens > HOOK_INJECTION_TOKEN_BUDGET {
break;
}
text.push_str(&rule_text);
skill_ids.push(rule.skill_id.clone());
injected += 1;
}
let n = injected;
text.push_str(&format!(
"\n> DiffLore surfaced {} team memor{} via {hook_label} hook as silent context. \
If a memory actually applies to the current change, cite its number AND the \
`learned from <repo>` source if the header shows one — e.g. \"applying Memory 2: \
Don't strip null from coalesce (learned from acme/widgets)\" — so the user sees \
which past team review judgment guided the change. Otherwise ignore — do not \
narrate or list memories that do not apply.",
n,
if n == 1 { "y" } else { "ies" },
));
emit_trajectory_step(&TrajectoryStep::McpResponseSize {
tool: format!("hook_{hook_label}"),
total_tokens: estimate_tokens(&text),
rules_injected: n,
});
let origin_step = rule_hits_by_origin(db, &skill_ids).await;
emit_trajectory_step(&origin_step);
let strict_match_count =
tools::util::strict_file_match_count_for_ids(&meta_map, &skill_ids, target_file);
let served_event = serve_and_record(
db,
RuleServe {
tool: hook_tool,
session_id,
event_session_id: session_id.unwrap_or("hook"),
repo_full_name: repo_scopes.first().map(String::as_str),
target_file,
query: &query,
rule_ids: &skill_ids,
top_k: i64::try_from(hook_top_k).unwrap_or(i64::MAX),
strict_match_count,
estimated_tokens: estimate_tokens(&text) as i64,
},
None,
)
.await;
let _ = crate::cloud::observations::enqueue_default(served_event).await;
mark("emit_telemetry");
let _ = crate::cloud::observations::enqueue_default(
crate::cloud::observations::ObservationEvent::RuleFired {
rule_ids: skill_ids.clone(),
file_path: target_file.map(ToOwned::to_owned),
intent: Some(intent.to_owned()),
session_id: session_id.unwrap_or("hook").to_owned(),
fired_at: chrono::Utc::now(),
},
)
.await;
if is_post_edit_path && !ext_key.is_empty() {
short_circuit_cache.record(&ext_key, false);
}
Ok(HookRuleContext {
rendered: text,
rules_injected: n,
rule_ids: skill_ids,
})
}
use super::serve_render::{RuleBlockArgs, RuleServe, render_rule_block, serve_and_record};
use super::{
McpState, emit_trajectory_step, estimate_tokens, handle_message, jsonrpc_error,
rule_hits_by_origin, tools,
};
fn repo_detection_cache() -> &'static Mutex<HashMap<PathBuf, Vec<String>>> {
static CACHE: OnceLock<Mutex<HashMap<PathBuf, Vec<String>>>> = OnceLock::new();
CACHE.get_or_init(|| Mutex::new(HashMap::new()))
}
pub(crate) fn detect_git_remote_owner_repos() -> Vec<String> {
let cwd = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
{
let guard = repo_detection_cache()
.lock()
.unwrap_or_else(std::sync::PoisonError::into_inner);
if let Some(repos) = guard.get(&cwd) {
return repos.clone();
}
}
let repos = detect_git_remote_owner_repos_uncached();
let mut guard = repo_detection_cache()
.lock()
.unwrap_or_else(std::sync::PoisonError::into_inner);
guard.insert(cwd, repos.clone());
repos
}
fn detect_git_remote_owner_repos_uncached() -> Vec<String> {
let mut repos = Vec::new();
for remote in ["origin", "upstream"] {
let output = match std::process::Command::new("git")
.args(["remote", "get-url", remote])
.output()
{
Ok(output) if output.status.success() => output,
_ => continue,
};
let url = String::from_utf8_lossy(&output.stdout).trim().to_owned();
let Some(repo) = parse_github_owner_repo(&url) else {
continue;
};
if !repos.iter().any(|existing| existing == &repo) {
repos.push(repo);
}
}
repos
}
pub(crate) fn parse_github_owner_repo(url: &str) -> Option<String> {
crate::git::parse_github_remote_url(url)
}
#[cfg(test)]
mod tests {
use crate::context::EmbeddingDiagnostics;
use super::hook_embedding_health_header;
fn diag(
degraded: bool,
vector_lane_available: bool,
reason: Option<&str>,
) -> EmbeddingDiagnostics {
EmbeddingDiagnostics {
active_profile: "sha1:local:128".to_owned(),
index_profile: Some("cloud:text-embedding-3-small:1536".to_owned()),
profile_match: false,
degraded,
degraded_reason: reason.map(str::to_owned),
vector_lane_available,
}
}
#[test]
fn hook_header_surfaces_embedding_degradation_to_agent_text() {
let rendered = hook_embedding_health_header(&diag(true, false, Some("provider_fallback")));
assert!(
rendered.contains("embeddingDegraded=true"),
"hook header must surface degraded state: {rendered}"
);
assert!(
rendered.contains("vectorLaneAvailable=false"),
"hook header must surface vector lane availability: {rendered}"
);
assert!(
rendered.contains("provider_fallback"),
"hook header must preserve stable reason token: {rendered}"
);
}
#[test]
fn hook_header_stays_quiet_for_healthy_embedding_lane() {
let rendered = hook_embedding_health_header(&EmbeddingDiagnostics {
active_profile: "sha1:local:128".to_owned(),
index_profile: Some("sha1:local:128".to_owned()),
profile_match: true,
degraded: false,
degraded_reason: None,
vector_lane_available: true,
});
assert!(
rendered.is_empty(),
"healthy lane should not spend hook tokens"
);
}
}