Skip to main content

innate_core/
hook.rs

1#[derive(Subcommand)]
2pub enum HookCommands {
3    /// Process a Claude Code Stop hook payload from stdin and record session events
4    Stop,
5    /// UserPromptSubmit hook: recall relevant knowledge for the prompt and print it to
6    /// stdout (injected into context). Relevance-gated so it stays quiet when nothing fits.
7    Prompt,
8    /// SessionStart hook: warm up context with high-relevance project knowledge.
9    SessionStart,
10}
11fn extract_content_text(content: Option<&serde_json::Value>) -> String {
12    match content {
13        None => String::new(),
14        Some(serde_json::Value::String(s)) => s.clone(),
15        Some(serde_json::Value::Array(arr)) => arr
16            .iter()
17            .filter_map(|b| b.get("text").and_then(|t| t.as_str()))
18            .collect::<Vec<_>>()
19            .join(" "),
20        _ => String::new(),
21    }
22}
23
24fn run_hook_stop() -> anyhow::Result<()> {
25    use std::io::{Read, Write};
26
27    let mut input = String::new();
28    std::io::stdin().read_to_string(&mut input)?;
29
30    let data: serde_json::Value = serde_json::from_str(&input).unwrap_or(serde_json::Value::Null);
31
32    // The real Stop/SubagentStop payload carries `transcript_path` (a .jsonl file, one message
33    // per line nested under "message"), NOT an inline transcript array. Read it to recover the
34    // user query and to detect whether innate_recall was actually used this session.
35    let transcript_text = data
36        .get("transcript_path")
37        .and_then(|v| v.as_str())
38        .and_then(|p| std::fs::read_to_string(p).ok())
39        .unwrap_or_default();
40
41    // Only treat this as a knowledge-using session if innate_recall was actually *invoked*.
42    // A bare substring match would false-positive on the tool's name in system-reminder tool
43    // listings, so require a tool_use block that names it (transcript lines, or inline payload).
44    let recall_used = transcript_text
45        .lines()
46        .any(|l| l.contains("tool_use") && l.contains("innate_recall"))
47        || (input.contains("tool_use") && input.contains("innate_recall"));
48
49    // Summary: the payload hands us the last assistant message directly — prefer it.
50    let mut summary: String = data
51        .get("last_assistant_message")
52        .and_then(|v| v.as_str())
53        .unwrap_or("")
54        .chars()
55        .take(400)
56        .collect();
57    let mut query = String::new();
58
59    // Newest-first scan of the transcript file for the user query (and assistant fallback).
60    for line in transcript_text.lines().rev() {
61        if !query.is_empty() && !summary.is_empty() {
62            break;
63        }
64        let Ok(m) = serde_json::from_str::<serde_json::Value>(line) else {
65            continue;
66        };
67        let role = m
68            .pointer("/message/role")
69            .and_then(|r| r.as_str())
70            .unwrap_or("");
71        let content = m.pointer("/message/content");
72        if query.is_empty() && role == "user" {
73            let q = extract_content_text(content);
74            if !q.trim().is_empty() {
75                query = q.chars().take(200).collect();
76            }
77        }
78        if summary.is_empty() && role == "assistant" {
79            summary = extract_content_text(content).chars().take(400).collect();
80        }
81    }
82
83    // Backward-compat: older payloads and tests pass an inline transcript/messages array.
84    if query.is_empty() || summary.is_empty() {
85        let empty = vec![];
86        let transcript = data
87            .get("transcript")
88            .or_else(|| data.get("messages"))
89            .and_then(|v| v.as_array())
90            .unwrap_or(&empty);
91        for m in transcript.iter().rev() {
92            let role = m.get("role").and_then(|r| r.as_str()).unwrap_or("");
93            if query.is_empty() && role == "user" {
94                query = extract_content_text(m.get("content"))
95                    .chars()
96                    .take(200)
97                    .collect();
98            }
99            if summary.is_empty() && role == "assistant" {
100                summary = extract_content_text(m.get("content"))
101                    .chars()
102                    .take(400)
103                    .collect();
104            }
105            if !query.is_empty() && !summary.is_empty() {
106                break;
107            }
108        }
109    }
110
111    let mut events: Vec<serde_json::Value> = Vec::new();
112    if !query.is_empty() {
113        events.push(json!({"event_type": "session_start", "query": query.trim()}));
114    }
115    // outcome=unknown (not ok): the Stop hook cannot know which chunks were actually used or
116    // whether they helped. Leave the authoritative ok/fail + per-chunk feedback to the agent's
117    // explicit innate_record call; this coarse signal must not inflate confidence on its own.
118    if !summary.is_empty() && recall_used {
119        events.push(json!({"event_type": "tool_success", "output_summary": summary.trim(), "outcome": "unknown"}));
120    }
121    events.push(json!({"event_type": "session_end"}));
122
123    let log_path = crate::paths::session_log_path();
124
125    if let Some(parent) = log_path.parent() {
126        std::fs::create_dir_all(parent)?;
127    }
128
129    let mut file = std::fs::OpenOptions::new()
130        .create(true)
131        .append(true)
132        .open(&log_path)?;
133
134    for event in &events {
135        writeln!(file, "{}", serde_json::to_string(event)?)?;
136    }
137
138    Ok(())
139}
140
141pub(crate) fn run_command(action: &HookCommands, db_path: &Path) -> anyhow::Result<()> {
142    match action {
143        HookCommands::Stop => run_hook_stop(),
144        // Recall hooks are auxiliary and must never break the session: on any error we
145        // swallow it and exit cleanly so the harness keeps going.
146        HookCommands::Prompt => {
147            let _ = run_hook_recall(db_path, HookKind::Prompt);
148            Ok(())
149        }
150        HookCommands::SessionStart => {
151            let _ = run_hook_recall(db_path, HookKind::SessionStart);
152            Ok(())
153        }
154    }
155}
156
157#[derive(Clone, Copy)]
158enum HookKind {
159    Prompt,
160    SessionStart,
161}
162
163/// Default relevance gate for always-on recall hooks. Fused scores roughly span [0, ~1.05]
164/// (weights: content .55 + trigger .25 + confidence .10 + context .15). 0.40 keeps strong
165/// semantic matches and drops weak ones. Override with `INNATE_HOOK_MIN_SCORE`.
166const DEFAULT_HOOK_MIN_SCORE: f64 = 0.40;
167
168/// UserPromptSubmit / SessionStart hook: recall relevant knowledge and print it to stdout so
169/// Claude Code injects it into the conversation. Relevance-gated so it stays silent when nothing
170/// fits — high frequency without noise.
171fn run_hook_recall(db_path: &Path, kind: HookKind) -> anyhow::Result<()> {
172    use std::io::Read;
173
174    let mut input = String::new();
175    std::io::stdin().read_to_string(&mut input)?;
176    let data: serde_json::Value = serde_json::from_str(&input).unwrap_or(serde_json::Value::Null);
177
178    // Derive the recall query. UserPromptSubmit carries the user's prompt; SessionStart has no
179    // query, so warm up from the project directory name as a coarse canonical project intent.
180    let query: String = match kind {
181        HookKind::Prompt => data
182            .get("prompt")
183            .and_then(|v| v.as_str())
184            .unwrap_or("")
185            .chars()
186            .take(500)
187            .collect(),
188        HookKind::SessionStart => {
189            let cwd = data
190                .get("cwd")
191                .and_then(|v| v.as_str())
192                .or_else(|| data.get("workspace").and_then(|v| v.as_str()))
193                .unwrap_or("");
194            std::path::Path::new(cwd)
195                .file_name()
196                .and_then(|s| s.to_str())
197                .unwrap_or("")
198                .to_string()
199        }
200    };
201    if query.trim().is_empty() {
202        return Ok(());
203    }
204
205    let min_score = std::env::var("INNATE_HOOK_MIN_SCORE")
206        .ok()
207        .and_then(|v| v.parse::<f64>().ok())
208        .unwrap_or(DEFAULT_HOOK_MIN_SCORE);
209
210    let kb = crate::open_kb(db_path)?;
211    let result = kb.recall(RecallParams {
212        query: &query,
213        budget: 4000,
214        trace: true,
215        include_sparks: false,
216        top: Some(5),
217        source: "hook",
218        expand_deps: "false",
219        allow_trim: false,
220        refine_mode: "off",
221        min_score: Some(min_score),
222        session_only: false,
223    })?;
224
225    if result.knowledge.is_empty() {
226        return Ok(());
227    }
228
229    // Stdout becomes context. Be explicit that these are recalled chunks and that the agent
230    // must cite the IDs it actually uses in innate_record — this is what keeps feedback precise.
231    let mut out = String::new();
232    out.push_str("<innate-recall>\n");
233    out.push_str(&format!(
234        "Innate recalled {} relevant knowledge chunk(s). Apply what helps; \
235         when you finish, call innate_record(trace_id, outcome, used=[ids you actually applied], \
236         feedback_up/down=[ids that helped/misled]).\n\n",
237        result.knowledge.len()
238    ));
239    for c in &result.knowledge {
240        let id = c.get("id").and_then(|v| v.as_str()).unwrap_or("?");
241        let content = c.get("content").and_then(|v| v.as_str()).unwrap_or("");
242        let conf = c.get("confidence").and_then(|v| v.as_f64()).unwrap_or(0.0);
243        out.push_str(&format!("- [{id}] (confidence {conf:.2}) {content}\n"));
244    }
245    out.push_str(&format!("\ntrace_id: {}\n", result.trace_id));
246    out.push_str("</innate-recall>");
247    println!("{out}");
248
249    Ok(())
250}
251
252use crate::kb::RecallParams;
253use clap::Subcommand;
254use serde_json::json;
255use std::path::Path;