Skip to main content

codemem_hooks/
lib.rs

1//! codemem-hooks: PostToolUse hook handler for passive capture from AI coding assistants.
2//!
3//! Parses PostToolUse JSON payloads from stdin, extracts relevant information
4//! based on tool type, and creates appropriate memories with auto-tagging.
5
6pub mod diff;
7
8use codemem_core::{CodememError, GraphNode, MemoryType, NodeKind, RelationshipType};
9use serde::Deserialize;
10use sha2::{Digest, Sha256};
11use std::collections::HashMap;
12
13/// Maximum file size to process (100KB).
14const MAX_CONTENT_SIZE: usize = 100 * 1024;
15
16/// PostToolUse hook payload from an AI coding assistant.
17#[derive(Debug, Deserialize)]
18pub struct HookPayload {
19    pub tool_name: String,
20    pub tool_input: serde_json::Value,
21    pub tool_response: String,
22    pub session_id: Option<String>,
23    pub cwd: Option<String>,
24}
25
26/// Extracted memory from a hook payload.
27#[derive(Debug)]
28pub struct ExtractedMemory {
29    pub content: String,
30    pub memory_type: MemoryType,
31    pub tags: Vec<String>,
32    pub metadata: HashMap<String, serde_json::Value>,
33    pub graph_node: Option<GraphNode>,
34    pub graph_edges: Vec<PendingEdge>,
35    pub session_id: Option<String>,
36}
37
38/// A pending edge to be created once both nodes exist.
39#[derive(Debug)]
40pub struct PendingEdge {
41    pub src_id: String,
42    pub dst_id: String,
43    pub relationship: RelationshipType,
44}
45
46/// Parse a hook payload from JSON string.
47pub fn parse_payload(json: &str) -> Result<HookPayload, CodememError> {
48    serde_json::from_str(json)
49        .map_err(|e| CodememError::Hook(format!("Failed to parse payload: {e}")))
50}
51
52/// Extract memory from a hook payload.
53pub fn extract(payload: &HookPayload) -> Result<Option<ExtractedMemory>, CodememError> {
54    // Skip large responses
55    if payload.tool_response.len() > MAX_CONTENT_SIZE {
56        tracing::debug!(
57            "Skipping large response ({} bytes)",
58            payload.tool_response.len()
59        );
60        return Ok(None);
61    }
62
63    match payload.tool_name.as_str() {
64        "Read" => extract_read(payload),
65        "Glob" => extract_glob(payload),
66        "Grep" => extract_grep(payload),
67        "Edit" | "MultiEdit" => extract_edit(payload),
68        "Write" => extract_write(payload),
69        _ => {
70            tracing::debug!("Unknown tool: {}", payload.tool_name);
71            Ok(None)
72        }
73    }
74}
75
76/// Populate `graph_edges` on an `ExtractedMemory` by checking which file graph
77/// nodes already exist in the database.  This creates edges between files that
78/// were previously Read and are now being Edited or Written, capturing the
79/// common explore-then-modify workflow.
80///
81/// `existing_node_ids` should be the set of graph-node IDs already persisted
82/// (e.g. from `storage.all_graph_nodes()`).
83pub fn resolve_edges(
84    extracted: &mut ExtractedMemory,
85    existing_node_ids: &std::collections::HashSet<String>,
86) {
87    // Only file-level tools produce a graph_node with id "file:<path>"
88    let current_node_id = match &extracted.graph_node {
89        Some(node) => node.id.clone(),
90        None => return,
91    };
92
93    // Determine the tool that produced this memory
94    let tool = extracted
95        .metadata
96        .get("tool")
97        .and_then(|v| v.as_str())
98        .unwrap_or("");
99
100    // Only Edit and Write events create edges back to previously-seen files
101    match tool {
102        "Edit" => {
103            // If the same file was previously Read, the file node already exists.
104            // An edit after a read represents an evolution of understanding.
105            if existing_node_ids.contains(&current_node_id) {
106                extracted.graph_edges.push(PendingEdge {
107                    src_id: current_node_id,
108                    dst_id: String::new(), // self-edge marker; will be skipped
109                    relationship: RelationshipType::EvolvedInto,
110                });
111            }
112        }
113        "Write" => {
114            // A Write to a previously-seen file is also an evolution.
115            if existing_node_ids.contains(&current_node_id) {
116                extracted.graph_edges.push(PendingEdge {
117                    src_id: current_node_id,
118                    dst_id: String::new(),
119                    relationship: RelationshipType::EvolvedInto,
120                });
121            }
122        }
123        _ => {}
124    }
125}
126
127/// Resolve pending edges into concrete `Edge` values, given the memory ID that
128/// was just stored and the set of existing graph-node IDs.
129///
130/// Self-edge markers (dst_id == "") use the same node as both src and dst,
131/// representing a file that evolved (was read then edited/written).
132pub fn materialize_edges(pending: &[PendingEdge], memory_id: &str) -> Vec<codemem_core::Edge> {
133    let now = chrono::Utc::now();
134    pending
135        .iter()
136        .map(|pe| {
137            // Skip self-edge markers where src == dst would be meaningless
138            if pe.dst_id.is_empty() {
139                // For an EVOLVED_INTO self-reference, the src node already exists
140                // from the prior Read; we create an edge from the existing node
141                // to itself, annotated with the memory that triggered it.
142                let edge_id = format!("{}-{}-{}", pe.src_id, pe.relationship, memory_id);
143                let mut props = HashMap::new();
144                props.insert(
145                    "triggered_by".to_string(),
146                    serde_json::Value::String(memory_id.to_string()),
147                );
148                codemem_core::Edge {
149                    id: edge_id,
150                    src: pe.src_id.clone(),
151                    dst: pe.src_id.clone(),
152                    relationship: pe.relationship,
153                    weight: 1.0,
154                    properties: props,
155                    created_at: now,
156                    valid_from: None,
157                    valid_to: None,
158                }
159            } else {
160                let edge_id = format!("{}-{}-{}", pe.src_id, pe.relationship, pe.dst_id);
161                codemem_core::Edge {
162                    id: edge_id,
163                    src: pe.src_id.clone(),
164                    dst: pe.dst_id.clone(),
165                    relationship: pe.relationship,
166                    weight: 1.0,
167                    properties: HashMap::new(),
168                    created_at: now,
169                    valid_from: None,
170                    valid_to: None,
171                }
172            }
173        })
174        .collect()
175}
176
177/// Content hash for deduplication.
178pub fn content_hash(content: &str) -> String {
179    let mut hasher = Sha256::new();
180    hasher.update(content.as_bytes());
181    format!("{:x}", hasher.finalize())
182}
183
184/// Extract memory from a Read tool use.
185fn extract_read(payload: &HookPayload) -> Result<Option<ExtractedMemory>, CodememError> {
186    let file_path = payload
187        .tool_input
188        .get("file_path")
189        .and_then(|v| v.as_str())
190        .unwrap_or("unknown");
191
192    // Create a summary of the file content
193    let content = format!(
194        "File read: {}\n\n{}",
195        file_path,
196        truncate(&payload.tool_response, 2000)
197    );
198
199    let tags = extract_tags_from_path(file_path);
200
201    let graph_node = Some(GraphNode {
202        id: format!("file:{file_path}"),
203        kind: NodeKind::File,
204        label: file_path.to_string(),
205        payload: HashMap::new(),
206        centrality: 0.0,
207        memory_id: None,
208        namespace: None,
209    });
210
211    Ok(Some(ExtractedMemory {
212        content,
213        memory_type: MemoryType::Context,
214        tags,
215        metadata: {
216            let mut m = HashMap::new();
217            m.insert(
218                "file_path".to_string(),
219                serde_json::Value::String(file_path.to_string()),
220            );
221            m.insert(
222                "tool".to_string(),
223                serde_json::Value::String("Read".to_string()),
224            );
225            m
226        },
227        graph_node,
228        graph_edges: vec![],
229        session_id: payload.session_id.clone(),
230    }))
231}
232
233/// Extract memory from a Glob tool use.
234fn extract_glob(payload: &HookPayload) -> Result<Option<ExtractedMemory>, CodememError> {
235    let pattern = payload
236        .tool_input
237        .get("pattern")
238        .and_then(|v| v.as_str())
239        .unwrap_or("*");
240
241    let content = format!(
242        "Glob search: {}\nResults:\n{}",
243        pattern,
244        truncate(&payload.tool_response, 2000)
245    );
246
247    let tags = vec![format!("glob:{pattern}"), "discovery".to_string()];
248
249    Ok(Some(ExtractedMemory {
250        content,
251        memory_type: MemoryType::Pattern,
252        tags,
253        metadata: {
254            let mut m = HashMap::new();
255            m.insert(
256                "pattern".to_string(),
257                serde_json::Value::String(pattern.to_string()),
258            );
259            m.insert(
260                "tool".to_string(),
261                serde_json::Value::String("Glob".to_string()),
262            );
263            m
264        },
265        graph_node: None,
266        graph_edges: vec![],
267        session_id: payload.session_id.clone(),
268    }))
269}
270
271/// Extract memory from a Grep tool use.
272fn extract_grep(payload: &HookPayload) -> Result<Option<ExtractedMemory>, CodememError> {
273    let pattern = payload
274        .tool_input
275        .get("pattern")
276        .and_then(|v| v.as_str())
277        .unwrap_or("");
278
279    let content = format!(
280        "Grep search: {}\nMatches:\n{}",
281        pattern,
282        truncate(&payload.tool_response, 2000)
283    );
284
285    let tags = vec![format!("pattern:{pattern}"), "search".to_string()];
286
287    Ok(Some(ExtractedMemory {
288        content,
289        memory_type: MemoryType::Pattern,
290        tags,
291        metadata: {
292            let mut m = HashMap::new();
293            m.insert(
294                "pattern".to_string(),
295                serde_json::Value::String(pattern.to_string()),
296            );
297            m.insert(
298                "tool".to_string(),
299                serde_json::Value::String("Grep".to_string()),
300            );
301            m
302        },
303        graph_node: None,
304        graph_edges: vec![],
305        session_id: payload.session_id.clone(),
306    }))
307}
308
309/// Extract memory from an Edit/MultiEdit tool use.
310fn extract_edit(payload: &HookPayload) -> Result<Option<ExtractedMemory>, CodememError> {
311    let file_path = payload
312        .tool_input
313        .get("file_path")
314        .and_then(|v| v.as_str())
315        .unwrap_or("unknown");
316
317    let old_string = payload
318        .tool_input
319        .get("old_string")
320        .and_then(|v| v.as_str())
321        .unwrap_or("");
322
323    let new_string = payload
324        .tool_input
325        .get("new_string")
326        .and_then(|v| v.as_str())
327        .unwrap_or("");
328
329    let content = format!(
330        "Edit: {}\nChanged:\n  - {}\n  + {}",
331        file_path,
332        truncate(old_string, 500),
333        truncate(new_string, 500)
334    );
335
336    let tags = extract_tags_from_path(file_path);
337
338    let graph_node = Some(GraphNode {
339        id: format!("file:{file_path}"),
340        kind: NodeKind::File,
341        label: file_path.to_string(),
342        payload: HashMap::new(),
343        centrality: 0.0,
344        memory_id: None,
345        namespace: None,
346    });
347
348    Ok(Some(ExtractedMemory {
349        content,
350        memory_type: MemoryType::Decision,
351        tags,
352        metadata: {
353            let mut m = HashMap::new();
354            m.insert(
355                "file_path".to_string(),
356                serde_json::Value::String(file_path.to_string()),
357            );
358            m.insert(
359                "tool".to_string(),
360                serde_json::Value::String("Edit".to_string()),
361            );
362            m
363        },
364        graph_node,
365        graph_edges: vec![],
366        session_id: payload.session_id.clone(),
367    }))
368}
369
370/// Extract memory from a Write tool use.
371fn extract_write(payload: &HookPayload) -> Result<Option<ExtractedMemory>, CodememError> {
372    let file_path = payload
373        .tool_input
374        .get("file_path")
375        .and_then(|v| v.as_str())
376        .unwrap_or("unknown");
377
378    let content = format!(
379        "File written: {}\n\n{}",
380        file_path,
381        truncate(&payload.tool_response, 2000)
382    );
383
384    let tags = extract_tags_from_path(file_path);
385
386    let graph_node = Some(GraphNode {
387        id: format!("file:{file_path}"),
388        kind: NodeKind::File,
389        label: file_path.to_string(),
390        payload: HashMap::new(),
391        centrality: 0.0,
392        memory_id: None,
393        namespace: None,
394    });
395
396    Ok(Some(ExtractedMemory {
397        content,
398        memory_type: MemoryType::Decision,
399        tags,
400        metadata: {
401            let mut m = HashMap::new();
402            m.insert(
403                "file_path".to_string(),
404                serde_json::Value::String(file_path.to_string()),
405            );
406            m.insert(
407                "tool".to_string(),
408                serde_json::Value::String("Write".to_string()),
409            );
410            m
411        },
412        graph_node,
413        graph_edges: vec![],
414        session_id: payload.session_id.clone(),
415    }))
416}
417
418/// Extract entity tags from a file path.
419fn extract_tags_from_path(path: &str) -> Vec<String> {
420    let mut tags = Vec::new();
421
422    // Add file extension tag
423    if let Some(ext) = std::path::Path::new(path)
424        .extension()
425        .and_then(|e| e.to_str())
426    {
427        tags.push(format!("ext:{ext}"));
428    }
429
430    // Add directory path components as tags
431    let parts: Vec<&str> = path.split('/').collect();
432    if parts.len() > 1 {
433        // Add parent directory
434        if let Some(parent) = parts.get(parts.len() - 2) {
435            tags.push(format!("dir:{parent}"));
436        }
437    }
438
439    // Add filename
440    if let Some(filename) = std::path::Path::new(path)
441        .file_name()
442        .and_then(|f| f.to_str())
443    {
444        tags.push(format!("file:{filename}"));
445    }
446
447    tags
448}
449
450/// Truncate string to max length.
451fn truncate(s: &str, max_len: usize) -> &str {
452    if s.len() <= max_len {
453        s
454    } else {
455        &s[..max_len]
456    }
457}
458
459#[cfg(test)]
460mod tests {
461    use super::*;
462
463    #[test]
464    fn parse_read_payload() {
465        let json = r#"{
466            "tool_name": "Read",
467            "tool_input": {"file_path": "src/main.rs"},
468            "tool_response": "fn main() { println!(\"hello\"); }"
469        }"#;
470
471        let payload = parse_payload(json).unwrap();
472        assert_eq!(payload.tool_name, "Read");
473
474        let extracted = extract(&payload).unwrap().unwrap();
475        assert_eq!(extracted.memory_type, MemoryType::Context);
476        assert!(extracted.tags.contains(&"ext:rs".to_string()));
477    }
478
479    #[test]
480    fn parse_edit_payload() {
481        let json = r#"{
482            "tool_name": "Edit",
483            "tool_input": {"file_path": "src/lib.rs", "old_string": "foo", "new_string": "bar"},
484            "tool_response": "OK"
485        }"#;
486
487        let payload = parse_payload(json).unwrap();
488        let extracted = extract(&payload).unwrap().unwrap();
489        assert_eq!(extracted.memory_type, MemoryType::Decision);
490    }
491
492    #[test]
493    fn skip_large_response() {
494        let large_response = "x".repeat(MAX_CONTENT_SIZE + 1);
495        let json = format!(
496            r#"{{"tool_name": "Read", "tool_input": {{"file_path": "big.txt"}}, "tool_response": "{large_response}"}}"#
497        );
498
499        let payload = parse_payload(&json).unwrap();
500        assert!(extract(&payload).unwrap().is_none());
501    }
502
503    #[test]
504    fn content_hash_deterministic() {
505        let h1 = content_hash("hello");
506        let h2 = content_hash("hello");
507        assert_eq!(h1, h2);
508    }
509
510    #[test]
511    fn resolve_edges_edit_after_read_creates_evolved_into() {
512        // Simulate: file was previously Read (node exists), now being Edited
513        let json = r#"{
514            "tool_name": "Edit",
515            "tool_input": {"file_path": "src/lib.rs", "old_string": "foo", "new_string": "bar"},
516            "tool_response": "OK"
517        }"#;
518
519        let payload = parse_payload(json).unwrap();
520        let mut extracted = extract(&payload).unwrap().unwrap();
521        assert!(extracted.graph_edges.is_empty());
522
523        // The file node "file:src/lib.rs" already exists from a prior Read
524        let mut existing = std::collections::HashSet::new();
525        existing.insert("file:src/lib.rs".to_string());
526
527        resolve_edges(&mut extracted, &existing);
528
529        assert_eq!(extracted.graph_edges.len(), 1);
530        assert_eq!(extracted.graph_edges[0].src_id, "file:src/lib.rs");
531        assert_eq!(
532            extracted.graph_edges[0].relationship,
533            RelationshipType::EvolvedInto
534        );
535    }
536
537    #[test]
538    fn resolve_edges_write_after_read_creates_evolved_into() {
539        let json = r#"{
540            "tool_name": "Write",
541            "tool_input": {"file_path": "src/new.rs"},
542            "tool_response": "File written"
543        }"#;
544
545        let payload = parse_payload(json).unwrap();
546        let mut extracted = extract(&payload).unwrap().unwrap();
547
548        // The file node exists from a prior Read
549        let mut existing = std::collections::HashSet::new();
550        existing.insert("file:src/new.rs".to_string());
551
552        resolve_edges(&mut extracted, &existing);
553
554        assert_eq!(extracted.graph_edges.len(), 1);
555        assert_eq!(
556            extracted.graph_edges[0].relationship,
557            RelationshipType::EvolvedInto
558        );
559    }
560
561    #[test]
562    fn resolve_edges_edit_no_prior_read_no_edges() {
563        let json = r#"{
564            "tool_name": "Edit",
565            "tool_input": {"file_path": "src/lib.rs", "old_string": "foo", "new_string": "bar"},
566            "tool_response": "OK"
567        }"#;
568
569        let payload = parse_payload(json).unwrap();
570        let mut extracted = extract(&payload).unwrap().unwrap();
571
572        // No prior file nodes exist
573        let existing = std::collections::HashSet::new();
574        resolve_edges(&mut extracted, &existing);
575
576        assert!(extracted.graph_edges.is_empty());
577    }
578
579    #[test]
580    fn resolve_edges_read_never_creates_edges() {
581        let json = r#"{
582            "tool_name": "Read",
583            "tool_input": {"file_path": "src/main.rs"},
584            "tool_response": "fn main() {}"
585        }"#;
586
587        let payload = parse_payload(json).unwrap();
588        let mut extracted = extract(&payload).unwrap().unwrap();
589
590        let mut existing = std::collections::HashSet::new();
591        existing.insert("file:src/main.rs".to_string());
592
593        resolve_edges(&mut extracted, &existing);
594
595        // Read events should not create edges
596        assert!(extracted.graph_edges.is_empty());
597    }
598
599    #[test]
600    fn resolve_edges_glob_no_graph_node_no_edges() {
601        let json = r#"{
602            "tool_name": "Glob",
603            "tool_input": {"pattern": "**/*.rs"},
604            "tool_response": "src/main.rs\nsrc/lib.rs"
605        }"#;
606
607        let payload = parse_payload(json).unwrap();
608        let mut extracted = extract(&payload).unwrap().unwrap();
609
610        let existing = std::collections::HashSet::new();
611        resolve_edges(&mut extracted, &existing);
612
613        // Glob has no graph_node, so no edges
614        assert!(extracted.graph_edges.is_empty());
615    }
616
617    #[test]
618    fn materialize_edges_self_reference() {
619        let pending = vec![PendingEdge {
620            src_id: "file:src/lib.rs".to_string(),
621            dst_id: String::new(),
622            relationship: RelationshipType::EvolvedInto,
623        }];
624
625        let edges = materialize_edges(&pending, "memory-123");
626
627        assert_eq!(edges.len(), 1);
628        assert_eq!(edges[0].src, "file:src/lib.rs");
629        assert_eq!(edges[0].dst, "file:src/lib.rs");
630        assert_eq!(edges[0].relationship, RelationshipType::EvolvedInto);
631        assert!(edges[0].properties.contains_key("triggered_by"));
632        assert_eq!(
633            edges[0].properties["triggered_by"],
634            serde_json::Value::String("memory-123".to_string())
635        );
636    }
637
638    #[test]
639    fn materialize_edges_explicit_src_dst() {
640        let pending = vec![PendingEdge {
641            src_id: "file:src/a.rs".to_string(),
642            dst_id: "file:src/b.rs".to_string(),
643            relationship: RelationshipType::RelatesTo,
644        }];
645
646        let edges = materialize_edges(&pending, "memory-456");
647
648        assert_eq!(edges.len(), 1);
649        assert_eq!(edges[0].src, "file:src/a.rs");
650        assert_eq!(edges[0].dst, "file:src/b.rs");
651        assert_eq!(edges[0].relationship, RelationshipType::RelatesTo);
652    }
653
654    #[test]
655    fn materialize_edges_empty_pending() {
656        let edges = materialize_edges(&[], "memory-789");
657        assert!(edges.is_empty());
658    }
659}