Skip to main content

codemem_engine/hooks/
mod.rs

1//! Hooks module: PostToolUse hook handler for passive capture from AI coding assistants.
2//!
3//! Parses PostToolUse JSON payloads from stdin, extracts relevant information
4//! based on tool type, and creates appropriate memories with auto-tagging.
5
6pub mod diff;
7mod extractors;
8pub mod triggers;
9
10use codemem_core::{CodememError, MemoryType, RelationshipType};
11use serde::Deserialize;
12use std::collections::HashMap;
13
14pub use triggers::{check_triggers, AutoInsight};
15
16use extractors::{
17    extract_agent_communication, extract_bash, extract_edit, extract_glob, extract_grep,
18    extract_list_dir, extract_read, extract_web, extract_write,
19};
20
21/// Maximum file size to process (100KB).
22const MAX_CONTENT_SIZE: usize = 100 * 1024;
23
24/// PostToolUse hook payload from an AI coding assistant.
25#[derive(Debug, Deserialize)]
26pub struct HookPayload {
27    pub tool_name: String,
28    pub tool_input: serde_json::Value,
29    pub tool_response: String,
30    pub session_id: Option<String>,
31    pub cwd: Option<String>,
32}
33
34/// Extracted memory from a hook payload.
35#[derive(Debug)]
36pub struct ExtractedMemory {
37    pub content: String,
38    pub memory_type: MemoryType,
39    pub tags: Vec<String>,
40    pub metadata: HashMap<String, serde_json::Value>,
41    pub graph_node: Option<codemem_core::GraphNode>,
42    pub graph_edges: Vec<PendingEdge>,
43    pub session_id: Option<String>,
44}
45
46/// A pending edge to be created once both nodes exist.
47#[derive(Debug)]
48pub struct PendingEdge {
49    pub src_id: String,
50    pub dst_id: String,
51    pub relationship: RelationshipType,
52}
53
54/// Parse a hook payload from JSON string.
55pub fn parse_payload(json: &str) -> Result<HookPayload, CodememError> {
56    serde_json::from_str(json)
57        .map_err(|e| CodememError::Hook(format!("Failed to parse payload: {e}")))
58}
59
60/// Extract memory from a hook payload.
61pub fn extract(payload: &HookPayload) -> Result<Option<ExtractedMemory>, CodememError> {
62    // Skip large responses
63    if payload.tool_response.len() > MAX_CONTENT_SIZE {
64        tracing::debug!(
65            "Skipping large response ({} bytes)",
66            payload.tool_response.len()
67        );
68        return Ok(None);
69    }
70
71    match payload.tool_name.as_str() {
72        "Read" => extract_read(payload),
73        "Glob" => extract_glob(payload),
74        "Grep" => extract_grep(payload),
75        "Edit" | "MultiEdit" => extract_edit(payload),
76        "Write" => extract_write(payload),
77        "Bash" => extract_bash(payload),
78        "WebFetch" | "WebSearch" => extract_web(payload),
79        "Agent" | "SendMessage" => extract_agent_communication(payload),
80        "ListFiles" | "ListDir" => extract_list_dir(payload),
81        _ => {
82            tracing::debug!("Unknown tool: {}", payload.tool_name);
83            Ok(None)
84        }
85    }
86}
87
88/// Populate `graph_edges` on an `ExtractedMemory` by checking which file graph
89/// nodes already exist in the database.  This creates edges between files that
90/// were previously Read and are now being Edited or Written, capturing the
91/// common explore-then-modify workflow.
92///
93/// `existing_node_ids` should be the set of graph-node IDs already persisted
94/// (e.g. from `storage.all_graph_nodes()`).
95pub fn resolve_edges(
96    extracted: &mut ExtractedMemory,
97    existing_node_ids: &std::collections::HashSet<String>,
98) {
99    // Only file-level tools produce a graph_node with id "file:<path>"
100    let current_node_id = match &extracted.graph_node {
101        Some(node) => node.id.clone(),
102        None => return,
103    };
104
105    // Determine the tool that produced this memory
106    let tool = extracted
107        .metadata
108        .get("tool")
109        .and_then(|v| v.as_str())
110        .unwrap_or("");
111
112    // Only Edit and Write events create edges back to previously-seen files.
113    // If the same file was previously Read, the file node already exists.
114    // An edit/write after a read represents an evolution of understanding.
115    match tool {
116        "Edit" | "Write" => {
117            if existing_node_ids.contains(&current_node_id) {
118                extracted.graph_edges.push(PendingEdge {
119                    src_id: current_node_id,
120                    dst_id: String::new(), // self-edge marker
121                    relationship: RelationshipType::EvolvedInto,
122                });
123            }
124        }
125        _ => {}
126    }
127}
128
129/// Resolve pending edges into concrete `Edge` values, given the memory ID that
130/// was just stored and the set of existing graph-node IDs.
131///
132/// Self-edge markers (dst_id == "") use the same node as both src and dst,
133/// representing a file that evolved (was read then edited/written).
134pub fn materialize_edges(pending: &[PendingEdge], memory_id: &str) -> Vec<codemem_core::Edge> {
135    let now = chrono::Utc::now();
136    pending
137        .iter()
138        .map(|pe| {
139            // Self-edge marker: dst_id is empty, so create a self-referencing edge.
140            if pe.dst_id.is_empty() {
141                // For an EVOLVED_INTO self-reference, the src node already exists
142                // from the prior Read; we create an edge from the existing node
143                // to itself, annotated with the memory that triggered it.
144                let edge_id = format!("{}-{}-{}", pe.src_id, pe.relationship, memory_id);
145                let mut props = HashMap::new();
146                props.insert(
147                    "triggered_by".to_string(),
148                    serde_json::Value::String(memory_id.to_string()),
149                );
150                codemem_core::Edge {
151                    id: edge_id,
152                    src: pe.src_id.clone(),
153                    dst: pe.src_id.clone(),
154                    relationship: pe.relationship,
155                    weight: 1.0,
156                    properties: props,
157                    created_at: now,
158                    valid_from: None,
159                    valid_to: None,
160                }
161            } else {
162                let edge_id = format!("{}-{}-{}", pe.src_id, pe.relationship, pe.dst_id);
163                codemem_core::Edge {
164                    id: edge_id,
165                    src: pe.src_id.clone(),
166                    dst: pe.dst_id.clone(),
167                    relationship: pe.relationship,
168                    weight: 1.0,
169                    properties: HashMap::new(),
170                    created_at: now,
171                    valid_from: None,
172                    valid_to: None,
173                }
174            }
175        })
176        .collect()
177}
178
179/// Content hash for deduplication.
180pub use codemem_core::content_hash;
181
182#[cfg(test)]
183#[path = "tests/lib_tests.rs"]
184mod tests;
185
186#[cfg(test)]
187#[path = "tests/hooks_integration.rs"]
188mod hooks_integration_tests;