Skip to main content

codemem_hooks/
lib.rs

1//! codemem-hooks: PostToolUse hook handler for passive capture from AI coding assistants.
2//!
3//! Parses PostToolUse JSON payloads from stdin, extracts relevant information
4//! based on tool type, and creates appropriate memories with auto-tagging.
5
6pub mod diff;
7
8use codemem_core::{CodememError, GraphNode, MemoryType, NodeKind, RelationshipType};
9use serde::Deserialize;
10use sha2::{Digest, Sha256};
11use std::collections::HashMap;
12
13/// Maximum file size to process (100KB).
14const MAX_CONTENT_SIZE: usize = 100 * 1024;
15
16/// PostToolUse hook payload from an AI coding assistant.
17#[derive(Debug, Deserialize)]
18pub struct HookPayload {
19    pub tool_name: String,
20    pub tool_input: serde_json::Value,
21    pub tool_response: String,
22    pub session_id: Option<String>,
23    pub cwd: Option<String>,
24}
25
26/// Extracted memory from a hook payload.
27#[derive(Debug)]
28pub struct ExtractedMemory {
29    pub content: String,
30    pub memory_type: MemoryType,
31    pub tags: Vec<String>,
32    pub metadata: HashMap<String, serde_json::Value>,
33    pub graph_node: Option<GraphNode>,
34    pub graph_edges: Vec<PendingEdge>,
35    pub session_id: Option<String>,
36}
37
38/// A pending edge to be created once both nodes exist.
39#[derive(Debug)]
40pub struct PendingEdge {
41    pub src_id: String,
42    pub dst_id: String,
43    pub relationship: RelationshipType,
44}
45
46/// Parse a hook payload from JSON string.
47pub fn parse_payload(json: &str) -> Result<HookPayload, CodememError> {
48    serde_json::from_str(json)
49        .map_err(|e| CodememError::Hook(format!("Failed to parse payload: {e}")))
50}
51
52/// Extract memory from a hook payload.
53pub fn extract(payload: &HookPayload) -> Result<Option<ExtractedMemory>, CodememError> {
54    // Skip large responses
55    if payload.tool_response.len() > MAX_CONTENT_SIZE {
56        tracing::debug!(
57            "Skipping large response ({} bytes)",
58            payload.tool_response.len()
59        );
60        return Ok(None);
61    }
62
63    match payload.tool_name.as_str() {
64        "Read" => extract_read(payload),
65        "Glob" => extract_glob(payload),
66        "Grep" => extract_grep(payload),
67        "Edit" | "MultiEdit" => extract_edit(payload),
68        "Write" => extract_write(payload),
69        _ => {
70            tracing::debug!("Unknown tool: {}", payload.tool_name);
71            Ok(None)
72        }
73    }
74}
75
76/// Populate `graph_edges` on an `ExtractedMemory` by checking which file graph
77/// nodes already exist in the database.  This creates edges between files that
78/// were previously Read and are now being Edited or Written, capturing the
79/// common explore-then-modify workflow.
80///
81/// `existing_node_ids` should be the set of graph-node IDs already persisted
82/// (e.g. from `storage.all_graph_nodes()`).
83pub fn resolve_edges(
84    extracted: &mut ExtractedMemory,
85    existing_node_ids: &std::collections::HashSet<String>,
86) {
87    // Only file-level tools produce a graph_node with id "file:<path>"
88    let current_node_id = match &extracted.graph_node {
89        Some(node) => node.id.clone(),
90        None => return,
91    };
92
93    // Determine the tool that produced this memory
94    let tool = extracted
95        .metadata
96        .get("tool")
97        .and_then(|v| v.as_str())
98        .unwrap_or("");
99
100    // Only Edit and Write events create edges back to previously-seen files
101    match tool {
102        "Edit" => {
103            // If the same file was previously Read, the file node already exists.
104            // An edit after a read represents an evolution of understanding.
105            if existing_node_ids.contains(&current_node_id) {
106                extracted.graph_edges.push(PendingEdge {
107                    src_id: current_node_id,
108                    dst_id: String::new(), // self-edge marker; will be skipped
109                    relationship: RelationshipType::EvolvedInto,
110                });
111            }
112        }
113        "Write" => {
114            // A Write to a previously-seen file is also an evolution.
115            if existing_node_ids.contains(&current_node_id) {
116                extracted.graph_edges.push(PendingEdge {
117                    src_id: current_node_id,
118                    dst_id: String::new(),
119                    relationship: RelationshipType::EvolvedInto,
120                });
121            }
122        }
123        _ => {}
124    }
125}
126
127/// Resolve pending edges into concrete `Edge` values, given the memory ID that
128/// was just stored and the set of existing graph-node IDs.
129///
130/// Self-edge markers (dst_id == "") use the same node as both src and dst,
131/// representing a file that evolved (was read then edited/written).
132pub fn materialize_edges(pending: &[PendingEdge], memory_id: &str) -> Vec<codemem_core::Edge> {
133    let now = chrono::Utc::now();
134    pending
135        .iter()
136        .map(|pe| {
137            // Skip self-edge markers where src == dst would be meaningless
138            if pe.dst_id.is_empty() {
139                // For an EVOLVED_INTO self-reference, the src node already exists
140                // from the prior Read; we create an edge from the existing node
141                // to itself, annotated with the memory that triggered it.
142                let edge_id = format!("{}-{}-{}", pe.src_id, pe.relationship, memory_id);
143                let mut props = HashMap::new();
144                props.insert(
145                    "triggered_by".to_string(),
146                    serde_json::Value::String(memory_id.to_string()),
147                );
148                codemem_core::Edge {
149                    id: edge_id,
150                    src: pe.src_id.clone(),
151                    dst: pe.src_id.clone(),
152                    relationship: pe.relationship,
153                    weight: 1.0,
154                    properties: props,
155                    created_at: now,
156                }
157            } else {
158                let edge_id = format!("{}-{}-{}", pe.src_id, pe.relationship, pe.dst_id);
159                codemem_core::Edge {
160                    id: edge_id,
161                    src: pe.src_id.clone(),
162                    dst: pe.dst_id.clone(),
163                    relationship: pe.relationship,
164                    weight: 1.0,
165                    properties: HashMap::new(),
166                    created_at: now,
167                }
168            }
169        })
170        .collect()
171}
172
173/// Content hash for deduplication.
174pub fn content_hash(content: &str) -> String {
175    let mut hasher = Sha256::new();
176    hasher.update(content.as_bytes());
177    format!("{:x}", hasher.finalize())
178}
179
180/// Extract memory from a Read tool use.
181fn extract_read(payload: &HookPayload) -> Result<Option<ExtractedMemory>, CodememError> {
182    let file_path = payload
183        .tool_input
184        .get("file_path")
185        .and_then(|v| v.as_str())
186        .unwrap_or("unknown");
187
188    // Create a summary of the file content
189    let content = format!(
190        "File read: {}\n\n{}",
191        file_path,
192        truncate(&payload.tool_response, 2000)
193    );
194
195    let tags = extract_tags_from_path(file_path);
196
197    let graph_node = Some(GraphNode {
198        id: format!("file:{file_path}"),
199        kind: NodeKind::File,
200        label: file_path.to_string(),
201        payload: HashMap::new(),
202        centrality: 0.0,
203        memory_id: None,
204        namespace: None,
205    });
206
207    Ok(Some(ExtractedMemory {
208        content,
209        memory_type: MemoryType::Context,
210        tags,
211        metadata: {
212            let mut m = HashMap::new();
213            m.insert(
214                "file_path".to_string(),
215                serde_json::Value::String(file_path.to_string()),
216            );
217            m.insert(
218                "tool".to_string(),
219                serde_json::Value::String("Read".to_string()),
220            );
221            m
222        },
223        graph_node,
224        graph_edges: vec![],
225        session_id: payload.session_id.clone(),
226    }))
227}
228
229/// Extract memory from a Glob tool use.
230fn extract_glob(payload: &HookPayload) -> Result<Option<ExtractedMemory>, CodememError> {
231    let pattern = payload
232        .tool_input
233        .get("pattern")
234        .and_then(|v| v.as_str())
235        .unwrap_or("*");
236
237    let content = format!(
238        "Glob search: {}\nResults:\n{}",
239        pattern,
240        truncate(&payload.tool_response, 2000)
241    );
242
243    let tags = vec![format!("glob:{pattern}"), "discovery".to_string()];
244
245    Ok(Some(ExtractedMemory {
246        content,
247        memory_type: MemoryType::Pattern,
248        tags,
249        metadata: {
250            let mut m = HashMap::new();
251            m.insert(
252                "pattern".to_string(),
253                serde_json::Value::String(pattern.to_string()),
254            );
255            m.insert(
256                "tool".to_string(),
257                serde_json::Value::String("Glob".to_string()),
258            );
259            m
260        },
261        graph_node: None,
262        graph_edges: vec![],
263        session_id: payload.session_id.clone(),
264    }))
265}
266
267/// Extract memory from a Grep tool use.
268fn extract_grep(payload: &HookPayload) -> Result<Option<ExtractedMemory>, CodememError> {
269    let pattern = payload
270        .tool_input
271        .get("pattern")
272        .and_then(|v| v.as_str())
273        .unwrap_or("");
274
275    let content = format!(
276        "Grep search: {}\nMatches:\n{}",
277        pattern,
278        truncate(&payload.tool_response, 2000)
279    );
280
281    let tags = vec![format!("pattern:{pattern}"), "search".to_string()];
282
283    Ok(Some(ExtractedMemory {
284        content,
285        memory_type: MemoryType::Pattern,
286        tags,
287        metadata: {
288            let mut m = HashMap::new();
289            m.insert(
290                "pattern".to_string(),
291                serde_json::Value::String(pattern.to_string()),
292            );
293            m.insert(
294                "tool".to_string(),
295                serde_json::Value::String("Grep".to_string()),
296            );
297            m
298        },
299        graph_node: None,
300        graph_edges: vec![],
301        session_id: payload.session_id.clone(),
302    }))
303}
304
305/// Extract memory from an Edit/MultiEdit tool use.
306fn extract_edit(payload: &HookPayload) -> Result<Option<ExtractedMemory>, CodememError> {
307    let file_path = payload
308        .tool_input
309        .get("file_path")
310        .and_then(|v| v.as_str())
311        .unwrap_or("unknown");
312
313    let old_string = payload
314        .tool_input
315        .get("old_string")
316        .and_then(|v| v.as_str())
317        .unwrap_or("");
318
319    let new_string = payload
320        .tool_input
321        .get("new_string")
322        .and_then(|v| v.as_str())
323        .unwrap_or("");
324
325    let content = format!(
326        "Edit: {}\nChanged:\n  - {}\n  + {}",
327        file_path,
328        truncate(old_string, 500),
329        truncate(new_string, 500)
330    );
331
332    let tags = extract_tags_from_path(file_path);
333
334    let graph_node = Some(GraphNode {
335        id: format!("file:{file_path}"),
336        kind: NodeKind::File,
337        label: file_path.to_string(),
338        payload: HashMap::new(),
339        centrality: 0.0,
340        memory_id: None,
341        namespace: None,
342    });
343
344    Ok(Some(ExtractedMemory {
345        content,
346        memory_type: MemoryType::Decision,
347        tags,
348        metadata: {
349            let mut m = HashMap::new();
350            m.insert(
351                "file_path".to_string(),
352                serde_json::Value::String(file_path.to_string()),
353            );
354            m.insert(
355                "tool".to_string(),
356                serde_json::Value::String("Edit".to_string()),
357            );
358            m
359        },
360        graph_node,
361        graph_edges: vec![],
362        session_id: payload.session_id.clone(),
363    }))
364}
365
366/// Extract memory from a Write tool use.
367fn extract_write(payload: &HookPayload) -> Result<Option<ExtractedMemory>, CodememError> {
368    let file_path = payload
369        .tool_input
370        .get("file_path")
371        .and_then(|v| v.as_str())
372        .unwrap_or("unknown");
373
374    let content = format!(
375        "File written: {}\n\n{}",
376        file_path,
377        truncate(&payload.tool_response, 2000)
378    );
379
380    let tags = extract_tags_from_path(file_path);
381
382    let graph_node = Some(GraphNode {
383        id: format!("file:{file_path}"),
384        kind: NodeKind::File,
385        label: file_path.to_string(),
386        payload: HashMap::new(),
387        centrality: 0.0,
388        memory_id: None,
389        namespace: None,
390    });
391
392    Ok(Some(ExtractedMemory {
393        content,
394        memory_type: MemoryType::Decision,
395        tags,
396        metadata: {
397            let mut m = HashMap::new();
398            m.insert(
399                "file_path".to_string(),
400                serde_json::Value::String(file_path.to_string()),
401            );
402            m.insert(
403                "tool".to_string(),
404                serde_json::Value::String("Write".to_string()),
405            );
406            m
407        },
408        graph_node,
409        graph_edges: vec![],
410        session_id: payload.session_id.clone(),
411    }))
412}
413
414/// Extract entity tags from a file path.
415fn extract_tags_from_path(path: &str) -> Vec<String> {
416    let mut tags = Vec::new();
417
418    // Add file extension tag
419    if let Some(ext) = std::path::Path::new(path)
420        .extension()
421        .and_then(|e| e.to_str())
422    {
423        tags.push(format!("ext:{ext}"));
424    }
425
426    // Add directory path components as tags
427    let parts: Vec<&str> = path.split('/').collect();
428    if parts.len() > 1 {
429        // Add parent directory
430        if let Some(parent) = parts.get(parts.len() - 2) {
431            tags.push(format!("dir:{parent}"));
432        }
433    }
434
435    // Add filename
436    if let Some(filename) = std::path::Path::new(path)
437        .file_name()
438        .and_then(|f| f.to_str())
439    {
440        tags.push(format!("file:{filename}"));
441    }
442
443    tags
444}
445
446/// Truncate string to max length.
447fn truncate(s: &str, max_len: usize) -> &str {
448    if s.len() <= max_len {
449        s
450    } else {
451        &s[..max_len]
452    }
453}
454
455#[cfg(test)]
456mod tests {
457    use super::*;
458
459    #[test]
460    fn parse_read_payload() {
461        let json = r#"{
462            "tool_name": "Read",
463            "tool_input": {"file_path": "src/main.rs"},
464            "tool_response": "fn main() { println!(\"hello\"); }"
465        }"#;
466
467        let payload = parse_payload(json).unwrap();
468        assert_eq!(payload.tool_name, "Read");
469
470        let extracted = extract(&payload).unwrap().unwrap();
471        assert_eq!(extracted.memory_type, MemoryType::Context);
472        assert!(extracted.tags.contains(&"ext:rs".to_string()));
473    }
474
475    #[test]
476    fn parse_edit_payload() {
477        let json = r#"{
478            "tool_name": "Edit",
479            "tool_input": {"file_path": "src/lib.rs", "old_string": "foo", "new_string": "bar"},
480            "tool_response": "OK"
481        }"#;
482
483        let payload = parse_payload(json).unwrap();
484        let extracted = extract(&payload).unwrap().unwrap();
485        assert_eq!(extracted.memory_type, MemoryType::Decision);
486    }
487
488    #[test]
489    fn skip_large_response() {
490        let large_response = "x".repeat(MAX_CONTENT_SIZE + 1);
491        let json = format!(
492            r#"{{"tool_name": "Read", "tool_input": {{"file_path": "big.txt"}}, "tool_response": "{large_response}"}}"#
493        );
494
495        let payload = parse_payload(&json).unwrap();
496        assert!(extract(&payload).unwrap().is_none());
497    }
498
499    #[test]
500    fn content_hash_deterministic() {
501        let h1 = content_hash("hello");
502        let h2 = content_hash("hello");
503        assert_eq!(h1, h2);
504    }
505
506    #[test]
507    fn resolve_edges_edit_after_read_creates_evolved_into() {
508        // Simulate: file was previously Read (node exists), now being Edited
509        let json = r#"{
510            "tool_name": "Edit",
511            "tool_input": {"file_path": "src/lib.rs", "old_string": "foo", "new_string": "bar"},
512            "tool_response": "OK"
513        }"#;
514
515        let payload = parse_payload(json).unwrap();
516        let mut extracted = extract(&payload).unwrap().unwrap();
517        assert!(extracted.graph_edges.is_empty());
518
519        // The file node "file:src/lib.rs" already exists from a prior Read
520        let mut existing = std::collections::HashSet::new();
521        existing.insert("file:src/lib.rs".to_string());
522
523        resolve_edges(&mut extracted, &existing);
524
525        assert_eq!(extracted.graph_edges.len(), 1);
526        assert_eq!(extracted.graph_edges[0].src_id, "file:src/lib.rs");
527        assert_eq!(
528            extracted.graph_edges[0].relationship,
529            RelationshipType::EvolvedInto
530        );
531    }
532
533    #[test]
534    fn resolve_edges_write_after_read_creates_evolved_into() {
535        let json = r#"{
536            "tool_name": "Write",
537            "tool_input": {"file_path": "src/new.rs"},
538            "tool_response": "File written"
539        }"#;
540
541        let payload = parse_payload(json).unwrap();
542        let mut extracted = extract(&payload).unwrap().unwrap();
543
544        // The file node exists from a prior Read
545        let mut existing = std::collections::HashSet::new();
546        existing.insert("file:src/new.rs".to_string());
547
548        resolve_edges(&mut extracted, &existing);
549
550        assert_eq!(extracted.graph_edges.len(), 1);
551        assert_eq!(
552            extracted.graph_edges[0].relationship,
553            RelationshipType::EvolvedInto
554        );
555    }
556
557    #[test]
558    fn resolve_edges_edit_no_prior_read_no_edges() {
559        let json = r#"{
560            "tool_name": "Edit",
561            "tool_input": {"file_path": "src/lib.rs", "old_string": "foo", "new_string": "bar"},
562            "tool_response": "OK"
563        }"#;
564
565        let payload = parse_payload(json).unwrap();
566        let mut extracted = extract(&payload).unwrap().unwrap();
567
568        // No prior file nodes exist
569        let existing = std::collections::HashSet::new();
570        resolve_edges(&mut extracted, &existing);
571
572        assert!(extracted.graph_edges.is_empty());
573    }
574
575    #[test]
576    fn resolve_edges_read_never_creates_edges() {
577        let json = r#"{
578            "tool_name": "Read",
579            "tool_input": {"file_path": "src/main.rs"},
580            "tool_response": "fn main() {}"
581        }"#;
582
583        let payload = parse_payload(json).unwrap();
584        let mut extracted = extract(&payload).unwrap().unwrap();
585
586        let mut existing = std::collections::HashSet::new();
587        existing.insert("file:src/main.rs".to_string());
588
589        resolve_edges(&mut extracted, &existing);
590
591        // Read events should not create edges
592        assert!(extracted.graph_edges.is_empty());
593    }
594
595    #[test]
596    fn resolve_edges_glob_no_graph_node_no_edges() {
597        let json = r#"{
598            "tool_name": "Glob",
599            "tool_input": {"pattern": "**/*.rs"},
600            "tool_response": "src/main.rs\nsrc/lib.rs"
601        }"#;
602
603        let payload = parse_payload(json).unwrap();
604        let mut extracted = extract(&payload).unwrap().unwrap();
605
606        let existing = std::collections::HashSet::new();
607        resolve_edges(&mut extracted, &existing);
608
609        // Glob has no graph_node, so no edges
610        assert!(extracted.graph_edges.is_empty());
611    }
612
613    #[test]
614    fn materialize_edges_self_reference() {
615        let pending = vec![PendingEdge {
616            src_id: "file:src/lib.rs".to_string(),
617            dst_id: String::new(),
618            relationship: RelationshipType::EvolvedInto,
619        }];
620
621        let edges = materialize_edges(&pending, "memory-123");
622
623        assert_eq!(edges.len(), 1);
624        assert_eq!(edges[0].src, "file:src/lib.rs");
625        assert_eq!(edges[0].dst, "file:src/lib.rs");
626        assert_eq!(edges[0].relationship, RelationshipType::EvolvedInto);
627        assert!(edges[0].properties.contains_key("triggered_by"));
628        assert_eq!(
629            edges[0].properties["triggered_by"],
630            serde_json::Value::String("memory-123".to_string())
631        );
632    }
633
634    #[test]
635    fn materialize_edges_explicit_src_dst() {
636        let pending = vec![PendingEdge {
637            src_id: "file:src/a.rs".to_string(),
638            dst_id: "file:src/b.rs".to_string(),
639            relationship: RelationshipType::RelatesTo,
640        }];
641
642        let edges = materialize_edges(&pending, "memory-456");
643
644        assert_eq!(edges.len(), 1);
645        assert_eq!(edges[0].src, "file:src/a.rs");
646        assert_eq!(edges[0].dst, "file:src/b.rs");
647        assert_eq!(edges[0].relationship, RelationshipType::RelatesTo);
648    }
649
650    #[test]
651    fn materialize_edges_empty_pending() {
652        let edges = materialize_edges(&[], "memory-789");
653        assert!(edges.is_empty());
654    }
655}