codemem_engine/hooks/
mod.rs

1//! Hooks module: PostToolUse hook handler for passive capture from AI coding assistants.
2//!
3//! Parses PostToolUse JSON payloads from stdin, extracts relevant information
4//! based on tool type, and creates appropriate memories with auto-tagging.
5
6pub mod diff;
7mod extractors;
8pub mod triggers;
9
10use codemem_core::{CodememError, MemoryType, RelationshipType};
11use serde::Deserialize;
12use std::collections::HashMap;
13
14pub use triggers::{check_triggers, AutoInsight};
15
16use extractors::{
17    extract_agent_communication, extract_bash, extract_edit, extract_glob, extract_grep,
18    extract_list_dir, extract_read, extract_web, extract_write,
19};
20
21/// Maximum file size to process (100KB).
22const MAX_CONTENT_SIZE: usize = 100 * 1024;
23
24/// PostToolUse hook payload from an AI coding assistant.
25///
26/// `tool_response` is `serde_json::Value` because Claude Code sends it as a
27/// JSON object (not a plain string). String-valued responses still deserialize
28/// correctly into `Value::String`.
29#[derive(Debug, Deserialize)]
30pub struct HookPayload {
31    pub tool_name: String,
32    pub tool_input: serde_json::Value,
33    pub tool_response: serde_json::Value,
34    pub session_id: Option<String>,
35    pub cwd: Option<String>,
36    /// Name of the hook event (e.g. "PostToolUse").
37    pub hook_event_name: Option<String>,
38    /// Path to the conversation transcript file.
39    pub transcript_path: Option<String>,
40    /// Permission mode the assistant is running in.
41    pub permission_mode: Option<String>,
42    /// Unique ID of the tool use that triggered this hook.
43    pub tool_use_id: Option<String>,
44}
45
46impl HookPayload {
47    /// Extract meaningful text content from the tool response.
48    ///
49    /// Handles known Claude Code response shapes before falling back to
50    /// raw JSON serialization:
51    ///
52    /// - `Value::String` → inner text (legacy / simple tools)
53    /// - Read tool: `{file: {content: "..."}}` → the file content
54    /// - Text-bearing: `{text: "..."}` → the text value
55    /// - Stdout-bearing: `{stdout: "..."}` → stdout value
56    /// - `Value::Null` → empty string
57    /// - anything else → compact JSON serialization
58    pub fn tool_response_text(&self) -> String {
59        match &self.tool_response {
60            serde_json::Value::String(s) => s.clone(),
61            serde_json::Value::Null => String::new(),
62            serde_json::Value::Object(obj) => {
63                // Read tool: {file: {content: "..."}}
64                if let Some(content) = obj
65                    .get("file")
66                    .and_then(|f| f.get("content"))
67                    .and_then(|c| c.as_str())
68                {
69                    return content.to_string();
70                }
71                // Text-bearing responses: {text: "..."}
72                if let Some(text) = obj.get("text").and_then(|t| t.as_str()) {
73                    return text.to_string();
74                }
75                // Stdout-bearing responses: {stdout: "..."}
76                if let Some(stdout) = obj.get("stdout").and_then(|s| s.as_str()) {
77                    return stdout.to_string();
78                }
79                // Fallback: compact JSON
80                serde_json::to_string(&self.tool_response).unwrap_or_default()
81            }
82            other => other.to_string(),
83        }
84    }
85}
86
87/// Extracted memory from a hook payload.
88#[derive(Debug)]
89pub struct ExtractedMemory {
90    pub content: String,
91    pub memory_type: MemoryType,
92    pub tags: Vec<String>,
93    pub metadata: HashMap<String, serde_json::Value>,
94    pub graph_node: Option<codemem_core::GraphNode>,
95    pub graph_edges: Vec<PendingEdge>,
96    pub session_id: Option<String>,
97}
98
99/// A pending edge to be created once both nodes exist.
100#[derive(Debug)]
101pub struct PendingEdge {
102    pub src_id: String,
103    pub dst_id: String,
104    pub relationship: RelationshipType,
105}
106
107/// Parse a hook payload from JSON string.
108pub fn parse_payload(json: &str) -> Result<HookPayload, CodememError> {
109    serde_json::from_str(json)
110        .map_err(|e| CodememError::Hook(format!("Failed to parse payload: {e}")))
111}
112
113/// Extract memory from a hook payload.
114pub fn extract(payload: &HookPayload) -> Result<Option<ExtractedMemory>, CodememError> {
115    // Check response size to skip very large payloads.
116    // For strings, check directly. For objects, check the extracted text content
117    // since that's what we actually store (avoids double-serialization).
118    let response_text = payload.tool_response_text();
119    if response_text.len() > MAX_CONTENT_SIZE {
120        tracing::debug!("Skipping large response ({} bytes)", response_text.len());
121        return Ok(None);
122    }
123
124    match payload.tool_name.as_str() {
125        "Read" => extract_read(payload, &response_text),
126        "Glob" => extract_glob(payload, &response_text),
127        "Grep" => extract_grep(payload, &response_text),
128        "Edit" | "MultiEdit" => extract_edit(payload),
129        "Write" => extract_write(payload, &response_text),
130        "Bash" => extract_bash(payload, &response_text),
131        "WebFetch" | "WebSearch" => extract_web(payload, &response_text),
132        "Agent" | "SendMessage" => extract_agent_communication(payload, &response_text),
133        "ListFiles" | "ListDir" => extract_list_dir(payload, &response_text),
134        _ => {
135            tracing::debug!("Unknown tool: {}", payload.tool_name);
136            Ok(None)
137        }
138    }
139}
140
141/// Populate `graph_edges` on an `ExtractedMemory` by checking which file graph
142/// nodes already exist in the database.  This creates edges between files that
143/// were previously Read and are now being Edited or Written, capturing the
144/// common explore-then-modify workflow.
145///
146/// `existing_node_ids` should be the set of graph-node IDs already persisted
147/// (e.g. from `storage.all_graph_nodes()`).
148pub fn resolve_edges(
149    extracted: &mut ExtractedMemory,
150    existing_node_ids: &std::collections::HashSet<String>,
151) {
152    // Only file-level tools produce a graph_node with id "file:<path>"
153    let current_node_id = match &extracted.graph_node {
154        Some(node) => node.id.clone(),
155        None => return,
156    };
157
158    // Determine the tool that produced this memory
159    let tool = extracted
160        .metadata
161        .get("tool")
162        .and_then(|v| v.as_str())
163        .unwrap_or("");
164
165    // Only Edit and Write events create edges back to previously-seen files.
166    // If the same file was previously Read, the file node already exists.
167    // An edit/write after a read represents an evolution of understanding.
168    match tool {
169        "Edit" | "Write" => {
170            if existing_node_ids.contains(&current_node_id) {
171                extracted.graph_edges.push(PendingEdge {
172                    src_id: current_node_id,
173                    dst_id: String::new(), // self-edge marker
174                    relationship: RelationshipType::EvolvedInto,
175                });
176            }
177        }
178        _ => {}
179    }
180}
181
182/// Resolve pending edges into concrete `Edge` values, given the memory ID that
183/// was just stored and the set of existing graph-node IDs.
184///
185/// Self-edge markers (dst_id == "") use the same node as both src and dst,
186/// representing a file that evolved (was read then edited/written).
187pub fn materialize_edges(pending: &[PendingEdge], memory_id: &str) -> Vec<codemem_core::Edge> {
188    let now = chrono::Utc::now();
189    pending
190        .iter()
191        .map(|pe| {
192            // Self-edge marker: dst_id is empty, so create a self-referencing edge.
193            if pe.dst_id.is_empty() {
194                // For an EVOLVED_INTO self-reference, the src node already exists
195                // from the prior Read; we create an edge from the existing node
196                // to itself, annotated with the memory that triggered it.
197                let edge_id = format!("{}-{}-{}", pe.src_id, pe.relationship, memory_id);
198                let mut props = HashMap::new();
199                props.insert(
200                    "triggered_by".to_string(),
201                    serde_json::Value::String(memory_id.to_string()),
202                );
203                codemem_core::Edge {
204                    id: edge_id,
205                    src: pe.src_id.clone(),
206                    dst: pe.src_id.clone(),
207                    relationship: pe.relationship,
208                    weight: 1.0,
209                    properties: props,
210                    created_at: now,
211                    valid_from: None,
212                    valid_to: None,
213                }
214            } else {
215                let edge_id = format!("{}-{}-{}", pe.src_id, pe.relationship, pe.dst_id);
216                codemem_core::Edge {
217                    id: edge_id,
218                    src: pe.src_id.clone(),
219                    dst: pe.dst_id.clone(),
220                    relationship: pe.relationship,
221                    weight: 1.0,
222                    properties: HashMap::new(),
223                    created_at: now,
224                    valid_from: None,
225                    valid_to: None,
226                }
227            }
228        })
229        .collect()
230}
231
232/// Content hash for deduplication.
233pub use codemem_core::content_hash;
234
235#[cfg(test)]
236#[path = "tests/lib_tests.rs"]
237mod tests;
238
239#[cfg(test)]
240#[path = "tests/hooks_integration.rs"]
241mod hooks_integration_tests;
codemem_engine/hooks/mod.rs

codemem_engine/hooks/
mod.rs