Skip to main content

codemem_hooks/
lib.rs

1//! codemem-hooks: PostToolUse hook handler for passive capture from AI coding assistants.
2//!
3//! Parses PostToolUse JSON payloads from stdin, extracts relevant information
4//! based on tool type, and creates appropriate memories with auto-tagging.
5
6pub mod diff;
7
8use codemem_core::{CodememError, GraphNode, MemoryType, NodeKind, RelationshipType};
9use serde::Deserialize;
10use sha2::{Digest, Sha256};
11use std::collections::HashMap;
12
13/// Maximum file size to process (100KB).
14const MAX_CONTENT_SIZE: usize = 100 * 1024;
15
16/// PostToolUse hook payload from an AI coding assistant.
17#[derive(Debug, Deserialize)]
18pub struct HookPayload {
19    pub tool_name: String,
20    pub tool_input: serde_json::Value,
21    pub tool_response: String,
22    pub session_id: Option<String>,
23    pub cwd: Option<String>,
24}
25
26/// Extracted memory from a hook payload.
27#[derive(Debug)]
28pub struct ExtractedMemory {
29    pub content: String,
30    pub memory_type: MemoryType,
31    pub tags: Vec<String>,
32    pub metadata: HashMap<String, serde_json::Value>,
33    pub graph_node: Option<GraphNode>,
34    pub graph_edges: Vec<PendingEdge>,
35    pub session_id: Option<String>,
36}
37
38/// A pending edge to be created once both nodes exist.
39#[derive(Debug)]
40pub struct PendingEdge {
41    pub src_id: String,
42    pub dst_id: String,
43    pub relationship: RelationshipType,
44}
45
46/// Parse a hook payload from JSON string.
47pub fn parse_payload(json: &str) -> Result<HookPayload, CodememError> {
48    serde_json::from_str(json)
49        .map_err(|e| CodememError::Hook(format!("Failed to parse payload: {e}")))
50}
51
52/// Extract memory from a hook payload.
53pub fn extract(payload: &HookPayload) -> Result<Option<ExtractedMemory>, CodememError> {
54    // Skip large responses
55    if payload.tool_response.len() > MAX_CONTENT_SIZE {
56        tracing::debug!(
57            "Skipping large response ({} bytes)",
58            payload.tool_response.len()
59        );
60        return Ok(None);
61    }
62
63    match payload.tool_name.as_str() {
64        "Read" => extract_read(payload),
65        "Glob" => extract_glob(payload),
66        "Grep" => extract_grep(payload),
67        "Edit" | "MultiEdit" => extract_edit(payload),
68        "Write" => extract_write(payload),
69        _ => {
70            tracing::debug!("Unknown tool: {}", payload.tool_name);
71            Ok(None)
72        }
73    }
74}
75
76/// Populate `graph_edges` on an `ExtractedMemory` by checking which file graph
77/// nodes already exist in the database.  This creates edges between files that
78/// were previously Read and are now being Edited or Written, capturing the
79/// common explore-then-modify workflow.
80///
81/// `existing_node_ids` should be the set of graph-node IDs already persisted
82/// (e.g. from `storage.all_graph_nodes()`).
83pub fn resolve_edges(
84    extracted: &mut ExtractedMemory,
85    existing_node_ids: &std::collections::HashSet<String>,
86) {
87    // Only file-level tools produce a graph_node with id "file:<path>"
88    let current_node_id = match &extracted.graph_node {
89        Some(node) => node.id.clone(),
90        None => return,
91    };
92
93    // Determine the tool that produced this memory
94    let tool = extracted
95        .metadata
96        .get("tool")
97        .and_then(|v| v.as_str())
98        .unwrap_or("");
99
100    // Only Edit and Write events create edges back to previously-seen files
101    match tool {
102        "Edit" => {
103            // If the same file was previously Read, the file node already exists.
104            // An edit after a read represents an evolution of understanding.
105            if existing_node_ids.contains(&current_node_id) {
106                extracted.graph_edges.push(PendingEdge {
107                    src_id: current_node_id,
108                    dst_id: String::new(), // self-edge marker; will be skipped
109                    relationship: RelationshipType::EvolvedInto,
110                });
111            }
112        }
113        "Write" => {
114            // A Write to a previously-seen file is also an evolution.
115            if existing_node_ids.contains(&current_node_id) {
116                extracted.graph_edges.push(PendingEdge {
117                    src_id: current_node_id,
118                    dst_id: String::new(),
119                    relationship: RelationshipType::EvolvedInto,
120                });
121            }
122        }
123        _ => {}
124    }
125}
126
127/// Resolve pending edges into concrete `Edge` values, given the memory ID that
128/// was just stored and the set of existing graph-node IDs.
129///
130/// Self-edge markers (dst_id == "") use the same node as both src and dst,
131/// representing a file that evolved (was read then edited/written).
132pub fn materialize_edges(pending: &[PendingEdge], memory_id: &str) -> Vec<codemem_core::Edge> {
133    let now = chrono::Utc::now();
134    pending
135        .iter()
136        .map(|pe| {
137            // Skip self-edge markers where src == dst would be meaningless
138            if pe.dst_id.is_empty() {
139                // For an EVOLVED_INTO self-reference, the src node already exists
140                // from the prior Read; we create an edge from the existing node
141                // to itself, annotated with the memory that triggered it.
142                let edge_id = format!("{}-{}-{}", pe.src_id, pe.relationship, memory_id);
143                let mut props = HashMap::new();
144                props.insert(
145                    "triggered_by".to_string(),
146                    serde_json::Value::String(memory_id.to_string()),
147                );
148                codemem_core::Edge {
149                    id: edge_id,
150                    src: pe.src_id.clone(),
151                    dst: pe.src_id.clone(),
152                    relationship: pe.relationship,
153                    weight: 1.0,
154                    properties: props,
155                    created_at: now,
156                    valid_from: None,
157                    valid_to: None,
158                }
159            } else {
160                let edge_id = format!("{}-{}-{}", pe.src_id, pe.relationship, pe.dst_id);
161                codemem_core::Edge {
162                    id: edge_id,
163                    src: pe.src_id.clone(),
164                    dst: pe.dst_id.clone(),
165                    relationship: pe.relationship,
166                    weight: 1.0,
167                    properties: HashMap::new(),
168                    created_at: now,
169                    valid_from: None,
170                    valid_to: None,
171                }
172            }
173        })
174        .collect()
175}
176
177/// Content hash for deduplication.
178pub fn content_hash(content: &str) -> String {
179    let mut hasher = Sha256::new();
180    hasher.update(content.as_bytes());
181    format!("{:x}", hasher.finalize())
182}
183
184/// Extract memory from a Read tool use.
185fn extract_read(payload: &HookPayload) -> Result<Option<ExtractedMemory>, CodememError> {
186    let file_path = payload
187        .tool_input
188        .get("file_path")
189        .and_then(|v| v.as_str())
190        .unwrap_or("unknown");
191
192    // Create a summary of the file content
193    let content = format!(
194        "File read: {}\n\n{}",
195        file_path,
196        truncate(&payload.tool_response, 2000)
197    );
198
199    let tags = extract_tags_from_path(file_path);
200
201    let graph_node = Some(GraphNode {
202        id: format!("file:{file_path}"),
203        kind: NodeKind::File,
204        label: file_path.to_string(),
205        payload: HashMap::new(),
206        centrality: 0.0,
207        memory_id: None,
208        namespace: None,
209    });
210
211    Ok(Some(ExtractedMemory {
212        content,
213        memory_type: MemoryType::Context,
214        tags,
215        metadata: {
216            let mut m = HashMap::new();
217            m.insert(
218                "file_path".to_string(),
219                serde_json::Value::String(file_path.to_string()),
220            );
221            m.insert(
222                "tool".to_string(),
223                serde_json::Value::String("Read".to_string()),
224            );
225            m
226        },
227        graph_node,
228        graph_edges: vec![],
229        session_id: payload.session_id.clone(),
230    }))
231}
232
233/// Extract memory from a Glob tool use.
234fn extract_glob(payload: &HookPayload) -> Result<Option<ExtractedMemory>, CodememError> {
235    let pattern = payload
236        .tool_input
237        .get("pattern")
238        .and_then(|v| v.as_str())
239        .unwrap_or("*");
240
241    let content = format!(
242        "Glob search: {}\nResults:\n{}",
243        pattern,
244        truncate(&payload.tool_response, 2000)
245    );
246
247    let tags = vec![format!("glob:{pattern}"), "discovery".to_string()];
248
249    Ok(Some(ExtractedMemory {
250        content,
251        memory_type: MemoryType::Pattern,
252        tags,
253        metadata: {
254            let mut m = HashMap::new();
255            m.insert(
256                "pattern".to_string(),
257                serde_json::Value::String(pattern.to_string()),
258            );
259            m.insert(
260                "tool".to_string(),
261                serde_json::Value::String("Glob".to_string()),
262            );
263            m
264        },
265        graph_node: None,
266        graph_edges: vec![],
267        session_id: payload.session_id.clone(),
268    }))
269}
270
271/// Extract memory from a Grep tool use.
272fn extract_grep(payload: &HookPayload) -> Result<Option<ExtractedMemory>, CodememError> {
273    let pattern = payload
274        .tool_input
275        .get("pattern")
276        .and_then(|v| v.as_str())
277        .unwrap_or("");
278
279    let content = format!(
280        "Grep search: {}\nMatches:\n{}",
281        pattern,
282        truncate(&payload.tool_response, 2000)
283    );
284
285    let tags = vec![format!("pattern:{pattern}"), "search".to_string()];
286
287    Ok(Some(ExtractedMemory {
288        content,
289        memory_type: MemoryType::Pattern,
290        tags,
291        metadata: {
292            let mut m = HashMap::new();
293            m.insert(
294                "pattern".to_string(),
295                serde_json::Value::String(pattern.to_string()),
296            );
297            m.insert(
298                "tool".to_string(),
299                serde_json::Value::String("Grep".to_string()),
300            );
301            m
302        },
303        graph_node: None,
304        graph_edges: vec![],
305        session_id: payload.session_id.clone(),
306    }))
307}
308
309/// Extract memory from an Edit/MultiEdit tool use.
310fn extract_edit(payload: &HookPayload) -> Result<Option<ExtractedMemory>, CodememError> {
311    let file_path = payload
312        .tool_input
313        .get("file_path")
314        .and_then(|v| v.as_str())
315        .unwrap_or("unknown");
316
317    let old_string = payload
318        .tool_input
319        .get("old_string")
320        .and_then(|v| v.as_str())
321        .unwrap_or("");
322
323    let new_string = payload
324        .tool_input
325        .get("new_string")
326        .and_then(|v| v.as_str())
327        .unwrap_or("");
328
329    let content = format!(
330        "Edit: {}\nChanged:\n  - {}\n  + {}",
331        file_path,
332        truncate(old_string, 500),
333        truncate(new_string, 500)
334    );
335
336    let tags = extract_tags_from_path(file_path);
337
338    let graph_node = Some(GraphNode {
339        id: format!("file:{file_path}"),
340        kind: NodeKind::File,
341        label: file_path.to_string(),
342        payload: HashMap::new(),
343        centrality: 0.0,
344        memory_id: None,
345        namespace: None,
346    });
347
348    Ok(Some(ExtractedMemory {
349        content,
350        memory_type: MemoryType::Decision,
351        tags,
352        metadata: {
353            let mut m = HashMap::new();
354            m.insert(
355                "file_path".to_string(),
356                serde_json::Value::String(file_path.to_string()),
357            );
358            m.insert(
359                "tool".to_string(),
360                serde_json::Value::String("Edit".to_string()),
361            );
362            m
363        },
364        graph_node,
365        graph_edges: vec![],
366        session_id: payload.session_id.clone(),
367    }))
368}
369
370/// Extract memory from a Write tool use.
371fn extract_write(payload: &HookPayload) -> Result<Option<ExtractedMemory>, CodememError> {
372    let file_path = payload
373        .tool_input
374        .get("file_path")
375        .and_then(|v| v.as_str())
376        .unwrap_or("unknown");
377
378    let content = format!(
379        "File written: {}\n\n{}",
380        file_path,
381        truncate(&payload.tool_response, 2000)
382    );
383
384    let tags = extract_tags_from_path(file_path);
385
386    let graph_node = Some(GraphNode {
387        id: format!("file:{file_path}"),
388        kind: NodeKind::File,
389        label: file_path.to_string(),
390        payload: HashMap::new(),
391        centrality: 0.0,
392        memory_id: None,
393        namespace: None,
394    });
395
396    Ok(Some(ExtractedMemory {
397        content,
398        memory_type: MemoryType::Decision,
399        tags,
400        metadata: {
401            let mut m = HashMap::new();
402            m.insert(
403                "file_path".to_string(),
404                serde_json::Value::String(file_path.to_string()),
405            );
406            m.insert(
407                "tool".to_string(),
408                serde_json::Value::String("Write".to_string()),
409            );
410            m
411        },
412        graph_node,
413        graph_edges: vec![],
414        session_id: payload.session_id.clone(),
415    }))
416}
417
418/// Extract entity tags from a file path.
419fn extract_tags_from_path(path: &str) -> Vec<String> {
420    let mut tags = Vec::new();
421
422    // Add file extension tag
423    if let Some(ext) = std::path::Path::new(path)
424        .extension()
425        .and_then(|e| e.to_str())
426    {
427        tags.push(format!("ext:{ext}"));
428    }
429
430    // Add directory path components as tags
431    let parts: Vec<&str> = path.split('/').collect();
432    if parts.len() > 1 {
433        // Add parent directory
434        if let Some(parent) = parts.get(parts.len() - 2) {
435            tags.push(format!("dir:{parent}"));
436        }
437    }
438
439    // Add filename
440    if let Some(filename) = std::path::Path::new(path)
441        .file_name()
442        .and_then(|f| f.to_str())
443    {
444        tags.push(format!("file:{filename}"));
445    }
446
447    tags
448}
449
450/// Truncate string to max length.
451fn truncate(s: &str, max_len: usize) -> &str {
452    if s.len() <= max_len {
453        s
454    } else {
455        &s[..max_len]
456    }
457}
458
459// ── Trigger-Based Auto-Insights ─────────────────────────────────────────
460
461/// An auto-insight generated by trigger-based analysis during PostToolUse.
462#[derive(Debug, Clone)]
463pub struct AutoInsight {
464    /// The insight content to store as a memory.
465    pub content: String,
466    /// Tags to attach to the insight memory.
467    pub tags: Vec<String>,
468    /// Importance score for the insight.
469    pub importance: f64,
470    /// Unique tag used for deduplication within a session.
471    pub dedup_tag: String,
472}
473
474/// Check trigger conditions against session activity and return any auto-insights.
475///
476/// Three triggers are evaluated:
477/// 1. **Directory focus**: 3+ files read from the same directory suggests deep exploration.
478/// 2. **Edit after read**: Editing a file that was previously read indicates an informed change.
479/// 3. **Repeated search**: Same search pattern used 2+ times suggests a recurring need.
480///
481/// Each trigger checks `has_auto_insight()` to avoid duplicate insights within the same session.
482pub fn check_triggers(
483    storage: &dyn codemem_core::StorageBackend,
484    session_id: &str,
485    tool_name: &str,
486    file_path: Option<&str>,
487    pattern: Option<&str>,
488) -> Vec<AutoInsight> {
489    let mut insights = Vec::new();
490
491    // Trigger 1: 3+ files read from the same directory
492    if tool_name == "Read" {
493        if let Some(fp) = file_path {
494            let directory = std::path::Path::new(fp)
495                .parent()
496                .map(|p| p.to_string_lossy().to_string())
497                .unwrap_or_default();
498            if !directory.is_empty() {
499                let dedup_tag = format!("dir_focus:{}", directory);
500                let already_exists = storage
501                    .has_auto_insight(session_id, &dedup_tag)
502                    .unwrap_or(true);
503                if !already_exists {
504                    let count = storage
505                        .count_directory_reads(session_id, &directory)
506                        .unwrap_or(0);
507                    if count >= 3 {
508                        insights.push(AutoInsight {
509                            content: format!(
510                                "Deep exploration of directory '{}': {} files read in this session. \
511                                 This area may be a focus of the current task.",
512                                directory, count
513                            ),
514                            tags: vec![
515                                "auto-insight".to_string(),
516                                "directory-focus".to_string(),
517                                format!("dir:{}", directory),
518                            ],
519                            importance: 0.6,
520                            dedup_tag,
521                        });
522                    }
523                }
524            }
525        }
526    }
527
528    // Trigger 2: Edit after read — an informed change
529    if matches!(tool_name, "Edit" | "Write") {
530        if let Some(fp) = file_path {
531            let dedup_tag = format!("edit_after_read:{}", fp);
532            let already_exists = storage
533                .has_auto_insight(session_id, &dedup_tag)
534                .unwrap_or(true);
535            if !already_exists {
536                let was_read = storage
537                    .was_file_read_in_session(session_id, fp)
538                    .unwrap_or(false);
539                if was_read {
540                    insights.push(AutoInsight {
541                        content: format!(
542                            "File '{}' was read and then modified in this session, \
543                             indicating an informed change based on code review.",
544                            fp
545                        ),
546                        tags: vec![
547                            "auto-insight".to_string(),
548                            "edit-after-read".to_string(),
549                            format!(
550                                "file:{}",
551                                std::path::Path::new(fp)
552                                    .file_name()
553                                    .and_then(|f| f.to_str())
554                                    .unwrap_or("unknown")
555                            ),
556                        ],
557                        importance: 0.5,
558                        dedup_tag,
559                    });
560                }
561            }
562        }
563    }
564
565    // Trigger 3: Same search pattern used 2+ times
566    if matches!(tool_name, "Grep" | "Glob") {
567        if let Some(pat) = pattern {
568            let dedup_tag = format!("repeated_search:{}", pat);
569            let already_exists = storage
570                .has_auto_insight(session_id, &dedup_tag)
571                .unwrap_or(true);
572            if !already_exists {
573                let count = storage
574                    .count_search_pattern_in_session(session_id, pat)
575                    .unwrap_or(0);
576                if count >= 2 {
577                    insights.push(AutoInsight {
578                        content: format!(
579                            "Search pattern '{}' used {} times in this session. \
580                             Consider storing a permanent memory for this recurring lookup.",
581                            pat, count
582                        ),
583                        tags: vec![
584                            "auto-insight".to_string(),
585                            "repeated-search".to_string(),
586                            format!("pattern:{}", pat),
587                        ],
588                        importance: 0.5,
589                        dedup_tag,
590                    });
591                }
592            }
593        }
594    }
595
596    insights
597}
598
599#[cfg(test)]
600#[path = "tests/lib_tests.rs"]
601mod tests;