1use serde_json::Value;
2use sqlx::SqlitePool;
3use std::collections::HashMap;
4use std::path::PathBuf;
5use std::sync::{Mutex, OnceLock};
6use tokio::io::{AsyncBufReadExt, AsyncWriteExt, BufReader};
7
8use crate::cloud::client::CloudClient;
9use crate::context::{EmbeddingDiagnostics, gather_embedding_diagnostics_with_activity};
10use crate::errors::CoreError;
11use crate::review_trajectory::TrajectoryStep;
12
13pub async fn run(db: SqlitePool) -> Result<(), Box<dyn std::error::Error>> {
18 let cloud = CloudClient::create().await;
19 let state = McpState {
24 db,
25 cloud,
26 index_pool: None,
27 };
28
29 let stdin = tokio::io::stdin();
30 let mut stdout = tokio::io::stdout();
31 let mut reader = BufReader::new(stdin);
32 let mut line = String::new();
33
34 loop {
35 line.clear();
36 let n = reader.read_line(&mut line).await?;
37 if n == 0 {
38 break;
40 }
41
42 let trimmed = line.trim();
43 if trimmed.is_empty() {
44 continue;
45 }
46
47 let msg: Value = match serde_json::from_str(trimmed) {
48 Ok(v) => v,
49 Err(e) => {
50 let err = jsonrpc_error(Value::Null, -32700, &format!("Parse error: {e}"));
51 let out = jsonrpc_line_bytes(&err);
52 stdout.write_all(&out).await?;
53 stdout.flush().await?;
54 continue;
55 }
56 };
57
58 if let Some(response) = handle_message(&state, &msg).await {
59 let out = jsonrpc_line_bytes(&response);
60 stdout.write_all(&out).await?;
61 stdout.flush().await?;
62 }
63 }
64
65 Ok(())
66}
67
68fn jsonrpc_line_bytes(value: &Value) -> Vec<u8> {
69 match serde_json::to_vec(value) {
70 Ok(mut out) => {
71 out.push(b'\n');
72 out
73 }
74 Err(e) => {
75 eprintln!("[difflore-mcp] failed to serialize JSON-RPC response: {e}");
76 b"{\"jsonrpc\":\"2.0\",\"id\":null,\"error\":{\"code\":-32603,\"message\":\"serialize failed\"}}\n".to_vec()
77 }
78 }
79}
80
81#[derive(Debug, Clone)]
87pub struct HookRuleContext {
88 pub rendered: String,
91 pub rules_injected: usize,
93 pub rule_ids: Vec<String>,
96}
97
98fn hook_embedding_health_header(diag: &EmbeddingDiagnostics) -> String {
99 if !diag.degraded && diag.vector_lane_available {
100 return String::new();
101 }
102
103 let reason = diag
104 .degraded_reason
105 .as_deref()
106 .unwrap_or("unknown_embedding_state");
107 format!(
108 "> DiffLore retrieval health: embeddingDegraded={} vectorLaneAvailable={} reason={reason}. \
109 Treat injected memories as lower-confidence unless strict file/source evidence applies.\n\n",
110 diag.degraded, diag.vector_lane_available
111 )
112}
113
114const CROSS_REPO_STARTER_HOOK_TOP_K: usize = 3;
118
119pub async fn fetch_relevant_rules_for_hook(
130 db: &SqlitePool,
131 index_pool: &SqlitePool,
132 file: &str,
133 intent: &str,
134 session_id: Option<&str>,
135) -> Result<HookRuleContext, CoreError> {
136 let trace = crate::env::trace_hook();
137 let started = std::time::Instant::now();
138 let mut last = started;
139 let mut mark = |label: &str| {
140 if trace {
141 let now = std::time::Instant::now();
142 eprintln!(
143 "[difflore.hook.trace] {label}: +{}ms total={}ms",
144 now.duration_since(last).as_millis(),
145 now.duration_since(started).as_millis()
146 );
147 last = now;
148 }
149 };
150
151 let ext_key = super::hook_short_circuit::extension_key(file);
154 let short_circuit_mode = crate::env::hook_short_circuit_mode();
155 let short_circuit_cache = super::hook_short_circuit::global_cache();
156 let is_post_edit_path = intent != "pre-read";
157 let short_circuit_now = is_post_edit_path
158 && !ext_key.is_empty()
159 && match short_circuit_mode {
160 crate::env::HookShortCircuitMode::Off => false,
161 crate::env::HookShortCircuitMode::Force => true,
162 crate::env::HookShortCircuitMode::Auto => {
163 short_circuit_cache.should_short_circuit(&ext_key)
164 }
165 };
166 if short_circuit_now {
167 if trace {
170 eprintln!(
171 "[difflore.hook.trace] short_circuit ext={ext_key} mode={short_circuit_mode:?} elapsed=0ms"
172 );
173 }
174 return Ok(HookRuleContext {
175 rendered: String::new(),
176 rules_injected: 0,
177 rule_ids: Vec::new(),
178 });
179 }
180
181 let query = format!("{file} {intent}");
182 let detected_repos = detect_git_remote_owner_repos();
185 let repo_scopes = crate::skills::expand_repo_scopes_with_source_aliases(db, &detected_repos)
186 .await
187 .unwrap_or(detected_repos);
188
189 let scoped_count = if repo_scopes.is_empty() {
190 0
191 } else {
192 crate::context::orchestrator::ensure_rules_indexed_for_repo_scopes_with_embedding_timeout(
193 db,
194 index_pool,
195 &repo_scopes,
196 Some(std::time::Duration::from_millis(800)),
197 )
198 .await
199 .map_err(|e| CoreError::Internal(format!("hook rule index rebuild failed: {e}")))?
200 };
201 mark("ensure_rules_indexed");
202 let embedding_diag = gather_embedding_diagnostics_with_activity(index_pool).await;
203 mark("embedding_diagnostics");
204
205 let target_file = if file == "unknown" { None } else { Some(file) };
206 let ranking_inputs = crate::context::rule_source::load_rule_ranking_inputs(db).await;
208 mark("load_rule_ranking_inputs");
209 let hook_top_k =
214 super::recall_sampler::maybe_bump_top_k(5usize, crate::env::deep_recall_sample_rate());
215 let candidate_limit = hook_top_k.saturating_mul(5).clamp(hook_top_k, 50);
216 let mut scored = tools::util::retrieve_rules_with_repo_scopes(
217 index_pool,
218 tools::util::RetrieveRulesArgs {
219 query: &query,
220 lexical_query: None,
221 top_k: candidate_limit,
222 target_file,
223 repo_scopes: &repo_scopes,
224 confidence_map: ranking_inputs.confidence_map.as_ref(),
225 age_days_map: ranking_inputs.age_days_map.as_ref(),
226 ann_enabled: true,
227 embedding_timeout: Some(std::time::Duration::from_millis(800)),
228 adaptive_prune: true,
229 },
230 )
231 .await?;
232 mark("retrieve_rules");
233
234 const HOOK_MIN_RAW_SCORE: f64 = 0.005;
238 scored.retain(|r| r.score >= HOOK_MIN_RAW_SCORE);
239 let candidate_ids: Vec<String> = scored.iter().map(|s| s.skill_id.clone()).collect();
240 let meta_map = tools::util::fetch_skills_by_ids(db, &candidate_ids)
241 .await
242 .unwrap_or_default();
243 let strict_skill_ids = tools::util::strict_file_match_ids_for_meta(&meta_map, target_file);
244 scored = tools::util::rerank_scored_rule_chunks_for_mcp_by_strict_file_matches(
245 scored,
246 intent,
247 hook_top_k,
248 &strict_skill_ids,
249 );
250
251 let mut cross_repo_starter = false;
255 if scored.is_empty()
256 && scoped_count == 0
257 && crate::env::hook_cross_repo_starter_enabled()
258 && let Some(tf) = target_file
259 {
260 let cross = tools::util::cross_repo_starter_scored(
261 db,
262 &query,
263 tf,
264 ranking_inputs.confidence_map.as_ref(),
265 ranking_inputs.age_days_map.as_ref(),
266 CROSS_REPO_STARTER_HOOK_TOP_K,
267 )
268 .await;
269 if !cross.is_empty() {
270 scored = cross;
271 cross_repo_starter = true;
272 }
273 mark("cross_repo_starter");
274 }
275
276 let (hook_label, hook_tool) = if intent == "pre-read" {
277 ("pre-read", "hook_pre_read")
278 } else {
279 ("post-edit", "hook_post_edit")
280 };
281
282 if scored.is_empty() {
283 let served_event = serve_and_record(
285 db,
286 RuleServe {
287 tool: hook_tool,
288 session_id,
289 event_session_id: session_id.unwrap_or("hook"),
290 repo_full_name: repo_scopes.first().map(String::as_str),
291 target_file,
292 query: &query,
293 rule_ids: &[],
294 top_k: i64::try_from(hook_top_k).unwrap_or(i64::MAX),
295 strict_match_count: 0,
296 estimated_tokens: 0,
297 },
298 None,
299 )
300 .await;
301 let _ = crate::cloud::observations::enqueue_default(served_event).await;
302 if is_post_edit_path && !ext_key.is_empty() {
303 short_circuit_cache.record(&ext_key, true);
304 }
305 return Ok(HookRuleContext {
306 rendered: String::new(),
307 rules_injected: 0,
308 rule_ids: Vec::new(),
309 });
310 }
311
312 let skill_ids_all: Vec<String> = scored.iter().map(|s| s.skill_id.clone()).collect();
313 let examples_fut = crate::context::rule_source::load_rule_examples_batch(db, &skill_ids_all);
314 let trust_evidence_fut =
315 super::trust_proof::fetch_default_cloud_top_rule_trust_evidence_for_hook();
316 let (examples_result, trust_evidence) = tokio::join!(examples_fut, trust_evidence_fut);
317 let examples_map = examples_result.unwrap_or_default();
318 mark("load_rule_examples_batch");
319
320 const HOOK_INJECTION_TOKEN_BUDGET: usize = 1500;
323 let mut text = hook_embedding_health_header(&embedding_diag);
324 if cross_repo_starter {
325 text.push_str(
326 "> No memory is scoped to THIS repo yet. The memories below are transferable rules \
327 from your OTHER repos, matched to this file — starter suggestions, not this repo's \
328 own judgment. Run `difflore import-reviews` to capture this repo's memory.\n\n",
329 );
330 }
331 let mut injected = 0usize;
332 let mut skill_ids: Vec<String> = Vec::with_capacity(scored.len());
333 let max_score_hot = scored
334 .iter()
335 .map(|r| r.score)
336 .fold(f64::NEG_INFINITY, f64::max);
337 for rule in &scored {
338 let rel = if max_score_hot > 0.0 {
339 rule.score / max_score_hot
340 } else {
341 0.0
342 };
343 let rule_text = render_rule_block(&RuleBlockArgs {
346 position: injected + 1,
347 rel,
348 rule,
349 trust_evidence: &trust_evidence,
350 examples: examples_map.get(&rule.skill_id),
351 example_bad_label: "- Bad:",
352 example_good_label: "- Good:",
353 });
354
355 let projected_tokens = estimate_tokens(&text) + estimate_tokens(&rule_text);
359 if injected > 0 && projected_tokens > HOOK_INJECTION_TOKEN_BUDGET {
360 break;
361 }
362 text.push_str(&rule_text);
363 skill_ids.push(rule.skill_id.clone());
364 injected += 1;
365 }
366
367 let n = injected;
368 text.push_str(&format!(
369 "\n> DiffLore surfaced {} team memor{} via {hook_label} hook as silent context. \
370 If a memory actually applies to the current change, cite its number AND the \
371 `learned from <repo>` source if the header shows one — e.g. \"applying Memory 2: \
372 Don't strip null from coalesce (learned from acme/widgets)\" — so the user sees \
373 which past team review judgment guided the change. Otherwise ignore — do not \
374 narrate or list memories that do not apply.",
375 n,
376 if n == 1 { "y" } else { "ies" },
377 ));
378
379 emit_trajectory_step(&TrajectoryStep::McpResponseSize {
381 tool: format!("hook_{hook_label}"),
382 total_tokens: estimate_tokens(&text),
383 rules_injected: n,
384 });
385 let origin_step = rule_hits_by_origin(db, &skill_ids).await;
386 emit_trajectory_step(&origin_step);
387 let strict_match_count =
388 tools::util::strict_file_match_count_for_ids(&meta_map, &skill_ids, target_file);
389 let served_event = serve_and_record(
391 db,
392 RuleServe {
393 tool: hook_tool,
394 session_id,
395 event_session_id: session_id.unwrap_or("hook"),
396 repo_full_name: repo_scopes.first().map(String::as_str),
397 target_file,
398 query: &query,
399 rule_ids: &skill_ids,
400 top_k: i64::try_from(hook_top_k).unwrap_or(i64::MAX),
401 strict_match_count,
402 estimated_tokens: estimate_tokens(&text) as i64,
403 },
404 None,
405 )
406 .await;
407 let _ = crate::cloud::observations::enqueue_default(served_event).await;
408 mark("emit_telemetry");
409
410 let _ = crate::cloud::observations::enqueue_default(
411 crate::cloud::observations::ObservationEvent::RuleFired {
412 rule_ids: skill_ids.clone(),
413 file_path: target_file.map(ToOwned::to_owned),
414 intent: Some(intent.to_owned()),
415 session_id: session_id.unwrap_or("hook").to_owned(),
416 fired_at: chrono::Utc::now(),
417 },
418 )
419 .await;
420
421 if is_post_edit_path && !ext_key.is_empty() {
422 short_circuit_cache.record(&ext_key, false);
423 }
424
425 Ok(HookRuleContext {
426 rendered: text,
427 rules_injected: n,
428 rule_ids: skill_ids,
429 })
430}
431
432use super::serve_render::{RuleBlockArgs, RuleServe, render_rule_block, serve_and_record};
442use super::{
443 McpState, emit_trajectory_step, estimate_tokens, handle_message, jsonrpc_error,
444 rule_hits_by_origin, tools,
445};
446
447fn repo_detection_cache() -> &'static Mutex<HashMap<PathBuf, Vec<String>>> {
448 static CACHE: OnceLock<Mutex<HashMap<PathBuf, Vec<String>>>> = OnceLock::new();
449 CACHE.get_or_init(|| Mutex::new(HashMap::new()))
450}
451
452pub(crate) fn detect_git_remote_owner_repos() -> Vec<String> {
453 let cwd = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
454 {
455 let guard = repo_detection_cache()
456 .lock()
457 .unwrap_or_else(std::sync::PoisonError::into_inner);
458 if let Some(repos) = guard.get(&cwd) {
459 return repos.clone();
460 }
461 }
462
463 let repos = detect_git_remote_owner_repos_uncached();
464 let mut guard = repo_detection_cache()
465 .lock()
466 .unwrap_or_else(std::sync::PoisonError::into_inner);
467 guard.insert(cwd, repos.clone());
468 repos
469}
470
471fn detect_git_remote_owner_repos_uncached() -> Vec<String> {
472 let mut repos = Vec::new();
473 for remote in ["origin", "upstream"] {
474 let output = match std::process::Command::new("git")
475 .args(["remote", "get-url", remote])
476 .output()
477 {
478 Ok(output) if output.status.success() => output,
479 _ => continue,
480 };
481 let url = String::from_utf8_lossy(&output.stdout).trim().to_owned();
482 let Some(repo) = parse_github_owner_repo(&url) else {
483 continue;
484 };
485 if !repos.iter().any(|existing| existing == &repo) {
486 repos.push(repo);
487 }
488 }
489 repos
490}
491
492pub(crate) fn parse_github_owner_repo(url: &str) -> Option<String> {
496 crate::git::parse_github_remote_url(url)
497}
498
499#[cfg(test)]
500mod tests {
501 use crate::context::EmbeddingDiagnostics;
502
503 use super::hook_embedding_health_header;
504
505 fn diag(
506 degraded: bool,
507 vector_lane_available: bool,
508 reason: Option<&str>,
509 ) -> EmbeddingDiagnostics {
510 EmbeddingDiagnostics {
511 active_profile: "sha1:local:128".to_owned(),
512 index_profile: Some("cloud:text-embedding-3-small:1536".to_owned()),
513 profile_match: false,
514 degraded,
515 degraded_reason: reason.map(str::to_owned),
516 vector_lane_available,
517 }
518 }
519
520 #[test]
521 fn hook_header_surfaces_embedding_degradation_to_agent_text() {
522 let rendered = hook_embedding_health_header(&diag(true, false, Some("provider_fallback")));
523 assert!(
524 rendered.contains("embeddingDegraded=true"),
525 "hook header must surface degraded state: {rendered}"
526 );
527 assert!(
528 rendered.contains("vectorLaneAvailable=false"),
529 "hook header must surface vector lane availability: {rendered}"
530 );
531 assert!(
532 rendered.contains("provider_fallback"),
533 "hook header must preserve stable reason token: {rendered}"
534 );
535 }
536
537 #[test]
538 fn hook_header_stays_quiet_for_healthy_embedding_lane() {
539 let rendered = hook_embedding_health_header(&EmbeddingDiagnostics {
540 active_profile: "sha1:local:128".to_owned(),
541 index_profile: Some("sha1:local:128".to_owned()),
542 profile_match: true,
543 degraded: false,
544 degraded_reason: None,
545 vector_lane_available: true,
546 });
547 assert!(
548 rendered.is_empty(),
549 "healthy lane should not spend hook tokens"
550 );
551 }
552}