1pub mod diff;
7
8use codemem_core::{CodememError, GraphNode, MemoryType, NodeKind, RelationshipType};
9use serde::Deserialize;
10use sha2::{Digest, Sha256};
11use std::collections::HashMap;
12
13const MAX_CONTENT_SIZE: usize = 100 * 1024;
15
16#[derive(Debug, Deserialize)]
18pub struct HookPayload {
19 pub tool_name: String,
20 pub tool_input: serde_json::Value,
21 pub tool_response: String,
22 pub session_id: Option<String>,
23 pub cwd: Option<String>,
24}
25
26#[derive(Debug)]
28pub struct ExtractedMemory {
29 pub content: String,
30 pub memory_type: MemoryType,
31 pub tags: Vec<String>,
32 pub metadata: HashMap<String, serde_json::Value>,
33 pub graph_node: Option<GraphNode>,
34 pub graph_edges: Vec<PendingEdge>,
35 pub session_id: Option<String>,
36}
37
38#[derive(Debug)]
40pub struct PendingEdge {
41 pub src_id: String,
42 pub dst_id: String,
43 pub relationship: RelationshipType,
44}
45
46pub fn parse_payload(json: &str) -> Result<HookPayload, CodememError> {
48 serde_json::from_str(json)
49 .map_err(|e| CodememError::Hook(format!("Failed to parse payload: {e}")))
50}
51
52pub fn extract(payload: &HookPayload) -> Result<Option<ExtractedMemory>, CodememError> {
54 if payload.tool_response.len() > MAX_CONTENT_SIZE {
56 tracing::debug!(
57 "Skipping large response ({} bytes)",
58 payload.tool_response.len()
59 );
60 return Ok(None);
61 }
62
63 match payload.tool_name.as_str() {
64 "Read" => extract_read(payload),
65 "Glob" => extract_glob(payload),
66 "Grep" => extract_grep(payload),
67 "Edit" | "MultiEdit" => extract_edit(payload),
68 "Write" => extract_write(payload),
69 "Bash" => extract_bash(payload),
70 "WebFetch" | "WebSearch" => extract_web(payload),
71 "Agent" | "SendMessage" => extract_agent_communication(payload),
72 "ListFiles" | "ListDir" => extract_list_dir(payload),
73 _ => {
74 tracing::debug!("Unknown tool: {}", payload.tool_name);
75 Ok(None)
76 }
77 }
78}
79
80pub fn resolve_edges(
88 extracted: &mut ExtractedMemory,
89 existing_node_ids: &std::collections::HashSet<String>,
90) {
91 let current_node_id = match &extracted.graph_node {
93 Some(node) => node.id.clone(),
94 None => return,
95 };
96
97 let tool = extracted
99 .metadata
100 .get("tool")
101 .and_then(|v| v.as_str())
102 .unwrap_or("");
103
104 match tool {
108 "Edit" | "Write" => {
109 if existing_node_ids.contains(¤t_node_id) {
110 extracted.graph_edges.push(PendingEdge {
111 src_id: current_node_id,
112 dst_id: String::new(), relationship: RelationshipType::EvolvedInto,
114 });
115 }
116 }
117 _ => {}
118 }
119}
120
121pub fn materialize_edges(pending: &[PendingEdge], memory_id: &str) -> Vec<codemem_core::Edge> {
127 let now = chrono::Utc::now();
128 pending
129 .iter()
130 .map(|pe| {
131 if pe.dst_id.is_empty() {
133 let edge_id = format!("{}-{}-{}", pe.src_id, pe.relationship, memory_id);
137 let mut props = HashMap::new();
138 props.insert(
139 "triggered_by".to_string(),
140 serde_json::Value::String(memory_id.to_string()),
141 );
142 codemem_core::Edge {
143 id: edge_id,
144 src: pe.src_id.clone(),
145 dst: pe.src_id.clone(),
146 relationship: pe.relationship,
147 weight: 1.0,
148 properties: props,
149 created_at: now,
150 valid_from: None,
151 valid_to: None,
152 }
153 } else {
154 let edge_id = format!("{}-{}-{}", pe.src_id, pe.relationship, pe.dst_id);
155 codemem_core::Edge {
156 id: edge_id,
157 src: pe.src_id.clone(),
158 dst: pe.dst_id.clone(),
159 relationship: pe.relationship,
160 weight: 1.0,
161 properties: HashMap::new(),
162 created_at: now,
163 valid_from: None,
164 valid_to: None,
165 }
166 }
167 })
168 .collect()
169}
170
171pub fn content_hash(content: &str) -> String {
173 let mut hasher = Sha256::new();
174 hasher.update(content.as_bytes());
175 format!("{:x}", hasher.finalize())
176}
177
178fn build_file_extraction(
180 payload: &HookPayload,
181 file_path: &str,
182 content: String,
183 memory_type: MemoryType,
184 tool_name: &str,
185) -> ExtractedMemory {
186 let tags = extract_tags_from_path(file_path);
187 let graph_node = Some(GraphNode {
188 id: format!("file:{file_path}"),
189 kind: NodeKind::File,
190 label: file_path.to_string(),
191 payload: HashMap::new(),
192 centrality: 0.0,
193 memory_id: None,
194 namespace: None,
195 });
196 let mut metadata = HashMap::new();
197 metadata.insert(
198 "file_path".to_string(),
199 serde_json::Value::String(file_path.to_string()),
200 );
201 metadata.insert(
202 "tool".to_string(),
203 serde_json::Value::String(tool_name.to_string()),
204 );
205 ExtractedMemory {
206 content,
207 memory_type,
208 tags,
209 metadata,
210 graph_node,
211 graph_edges: vec![],
212 session_id: payload.session_id.clone(),
213 }
214}
215
216fn extract_read(payload: &HookPayload) -> Result<Option<ExtractedMemory>, CodememError> {
218 let file_path = payload
219 .tool_input
220 .get("file_path")
221 .and_then(|v| v.as_str())
222 .unwrap_or("unknown");
223
224 let content = format!(
225 "File read: {}\n\n{}",
226 file_path,
227 truncate(&payload.tool_response, 2000)
228 );
229
230 Ok(Some(build_file_extraction(
231 payload,
232 file_path,
233 content,
234 MemoryType::Context,
235 "Read",
236 )))
237}
238
239fn extract_glob(payload: &HookPayload) -> Result<Option<ExtractedMemory>, CodememError> {
241 let pattern = payload
242 .tool_input
243 .get("pattern")
244 .and_then(|v| v.as_str())
245 .unwrap_or("*");
246
247 let content = format!(
248 "Glob search: {}\nResults:\n{}",
249 pattern,
250 truncate(&payload.tool_response, 2000)
251 );
252
253 let tags = vec![format!("glob:{pattern}"), "discovery".to_string()];
254
255 Ok(Some(ExtractedMemory {
256 content,
257 memory_type: MemoryType::Pattern,
258 tags,
259 metadata: {
260 let mut m = HashMap::new();
261 m.insert(
262 "pattern".to_string(),
263 serde_json::Value::String(pattern.to_string()),
264 );
265 m.insert(
266 "tool".to_string(),
267 serde_json::Value::String("Glob".to_string()),
268 );
269 m
270 },
271 graph_node: None,
272 graph_edges: vec![],
273 session_id: payload.session_id.clone(),
274 }))
275}
276
277fn extract_grep(payload: &HookPayload) -> Result<Option<ExtractedMemory>, CodememError> {
279 let pattern = payload
280 .tool_input
281 .get("pattern")
282 .and_then(|v| v.as_str())
283 .unwrap_or("");
284
285 let content = format!(
286 "Grep search: {}\nMatches:\n{}",
287 pattern,
288 truncate(&payload.tool_response, 2000)
289 );
290
291 let tags = vec![format!("pattern:{pattern}"), "search".to_string()];
292
293 Ok(Some(ExtractedMemory {
294 content,
295 memory_type: MemoryType::Pattern,
296 tags,
297 metadata: {
298 let mut m = HashMap::new();
299 m.insert(
300 "pattern".to_string(),
301 serde_json::Value::String(pattern.to_string()),
302 );
303 m.insert(
304 "tool".to_string(),
305 serde_json::Value::String("Grep".to_string()),
306 );
307 m
308 },
309 graph_node: None,
310 graph_edges: vec![],
311 session_id: payload.session_id.clone(),
312 }))
313}
314
315fn extract_edit(payload: &HookPayload) -> Result<Option<ExtractedMemory>, CodememError> {
317 let file_path = payload
318 .tool_input
319 .get("file_path")
320 .and_then(|v| v.as_str())
321 .unwrap_or("unknown");
322
323 let old_string = payload
324 .tool_input
325 .get("old_string")
326 .and_then(|v| v.as_str())
327 .unwrap_or("");
328
329 let new_string = payload
330 .tool_input
331 .get("new_string")
332 .and_then(|v| v.as_str())
333 .unwrap_or("");
334
335 let content = format!(
336 "Edit: {}\nChanged:\n - {}\n + {}",
337 file_path,
338 truncate(old_string, 500),
339 truncate(new_string, 500)
340 );
341
342 Ok(Some(build_file_extraction(
343 payload,
344 file_path,
345 content,
346 MemoryType::Decision,
347 "Edit",
348 )))
349}
350
351fn extract_write(payload: &HookPayload) -> Result<Option<ExtractedMemory>, CodememError> {
353 let file_path = payload
354 .tool_input
355 .get("file_path")
356 .and_then(|v| v.as_str())
357 .unwrap_or("unknown");
358
359 let content = format!(
360 "File written: {}\n\n{}",
361 file_path,
362 truncate(&payload.tool_response, 2000)
363 );
364
365 Ok(Some(build_file_extraction(
366 payload,
367 file_path,
368 content,
369 MemoryType::Decision,
370 "Write",
371 )))
372}
373
374fn extract_bash(payload: &HookPayload) -> Result<Option<ExtractedMemory>, CodememError> {
376 let command = payload
377 .tool_input
378 .get("command")
379 .and_then(|v| v.as_str())
380 .unwrap_or("");
381
382 let first_word = command.split_whitespace().next().unwrap_or("unknown");
383 let response = truncate(&payload.tool_response, 2000);
384
385 let content = format!("Bash command: {}\nOutput:\n{}", command, response);
386
387 let mut tags = vec!["bash".to_string(), format!("command:{first_word}")];
388
389 if let Some(dir) = payload.tool_input.get("cwd").and_then(|v| v.as_str()) {
391 tags.push(format!("dir:{dir}"));
392 } else if let Some(dir) = payload.cwd.as_deref() {
393 tags.push(format!("dir:{dir}"));
394 }
395
396 let response_lower = payload.tool_response.to_lowercase();
398 if response_lower.contains("error:")
399 || response_lower.contains("failed")
400 || payload
401 .tool_input
402 .get("exit_code")
403 .and_then(|v| v.as_i64())
404 .is_some_and(|c| c != 0)
405 {
406 tags.push("error".to_string());
407 }
408
409 let mut metadata = HashMap::new();
410 metadata.insert(
411 "tool".to_string(),
412 serde_json::Value::String("Bash".to_string()),
413 );
414 metadata.insert(
415 "command".to_string(),
416 serde_json::Value::String(command.to_string()),
417 );
418
419 let graph_node = extract_file_path_from_command(command).map(|fp| GraphNode {
421 id: format!("file:{fp}"),
422 kind: NodeKind::File,
423 label: fp.to_string(),
424 payload: HashMap::new(),
425 centrality: 0.0,
426 memory_id: None,
427 namespace: None,
428 });
429
430 Ok(Some(ExtractedMemory {
431 content,
432 memory_type: MemoryType::Context,
433 tags,
434 metadata,
435 graph_node,
436 graph_edges: vec![],
437 session_id: payload.session_id.clone(),
438 }))
439}
440
441fn extract_file_path_from_command(command: &str) -> Option<&str> {
444 for token in command.split_whitespace() {
445 if token.starts_with('-') {
447 continue;
448 }
449 let path = std::path::Path::new(token);
451 if token.contains('/') || path.extension().is_some() {
452 if !token.starts_with("http://") && !token.starts_with("https://") && token.len() > 1 {
454 return Some(token);
455 }
456 }
457 }
458 None
459}
460
461fn extract_web(payload: &HookPayload) -> Result<Option<ExtractedMemory>, CodememError> {
463 let url = payload
464 .tool_input
465 .get("url")
466 .and_then(|v| v.as_str())
467 .unwrap_or("");
468
469 let query = payload
470 .tool_input
471 .get("query")
472 .and_then(|v| v.as_str())
473 .unwrap_or("");
474
475 let response = truncate(&payload.tool_response, 2000);
476
477 let content = if !url.is_empty() {
478 format!("Web fetch: {url}\nResponse:\n{response}")
479 } else {
480 format!("Web search: {query}\nResults:\n{response}")
481 };
482
483 let mut tags = vec!["web-research".to_string()];
484
485 if !url.is_empty() {
487 if let Some(domain) = extract_domain(url) {
488 tags.push(format!("url:{domain}"));
489 }
490 }
491
492 if !query.is_empty() {
493 tags.push(format!("query:{query}"));
494 }
495
496 let mut metadata = HashMap::new();
497 metadata.insert(
498 "tool".to_string(),
499 serde_json::Value::String(payload.tool_name.clone()),
500 );
501 if !url.is_empty() {
502 metadata.insert(
503 "url".to_string(),
504 serde_json::Value::String(url.to_string()),
505 );
506 }
507 if !query.is_empty() {
508 metadata.insert(
509 "query".to_string(),
510 serde_json::Value::String(query.to_string()),
511 );
512 }
513
514 Ok(Some(ExtractedMemory {
515 content,
516 memory_type: MemoryType::Context,
517 tags,
518 metadata,
519 graph_node: None,
520 graph_edges: vec![],
521 session_id: payload.session_id.clone(),
522 }))
523}
524
525fn extract_domain(url: &str) -> Option<&str> {
527 let after_scheme = url
528 .strip_prefix("https://")
529 .or_else(|| url.strip_prefix("http://"))
530 .unwrap_or(url);
531 let domain = after_scheme.split('/').next()?;
532 if domain.is_empty() {
533 None
534 } else {
535 Some(domain)
536 }
537}
538
539fn extract_agent_communication(
541 payload: &HookPayload,
542) -> Result<Option<ExtractedMemory>, CodememError> {
543 let response = truncate(&payload.tool_response, 2000);
544
545 let content = format!("Agent communication ({}): {}", payload.tool_name, response);
546
547 let mut metadata = HashMap::new();
548 metadata.insert(
549 "tool".to_string(),
550 serde_json::Value::String(payload.tool_name.clone()),
551 );
552
553 Ok(Some(ExtractedMemory {
554 content,
555 memory_type: MemoryType::Context,
556 tags: vec!["agent-communication".to_string()],
557 metadata,
558 graph_node: None,
559 graph_edges: vec![],
560 session_id: payload.session_id.clone(),
561 }))
562}
563
564fn extract_list_dir(payload: &HookPayload) -> Result<Option<ExtractedMemory>, CodememError> {
566 let directory = payload
567 .tool_input
568 .get("path")
569 .or_else(|| payload.tool_input.get("directory"))
570 .and_then(|v| v.as_str())
571 .unwrap_or(".");
572
573 let response = truncate(&payload.tool_response, 2000);
574 let content = format!("Listed directory: {directory}\n{response}");
575
576 let mut tags = vec!["discovery".to_string()];
577 if let Some(name) = std::path::Path::new(directory)
579 .file_name()
580 .and_then(|f| f.to_str())
581 {
582 tags.push(format!("dir:{name}"));
583 }
584
585 let mut metadata = HashMap::new();
586 metadata.insert(
587 "tool".to_string(),
588 serde_json::Value::String(payload.tool_name.clone()),
589 );
590 metadata.insert(
591 "directory".to_string(),
592 serde_json::Value::String(directory.to_string()),
593 );
594
595 Ok(Some(ExtractedMemory {
596 content,
597 memory_type: MemoryType::Context,
598 tags,
599 metadata,
600 graph_node: None,
601 graph_edges: vec![],
602 session_id: payload.session_id.clone(),
603 }))
604}
605
606fn extract_tags_from_path(path: &str) -> Vec<String> {
608 let mut tags = Vec::new();
609
610 if let Some(ext) = std::path::Path::new(path)
612 .extension()
613 .and_then(|e| e.to_str())
614 {
615 tags.push(format!("ext:{ext}"));
616 }
617
618 let parts: Vec<&str> = path.split('/').collect();
620 if parts.len() > 1 {
621 if let Some(parent) = parts.get(parts.len() - 2) {
623 tags.push(format!("dir:{parent}"));
624 }
625 }
626
627 if let Some(filename) = std::path::Path::new(path)
629 .file_name()
630 .and_then(|f| f.to_str())
631 {
632 tags.push(format!("file:{filename}"));
633 }
634
635 tags
636}
637
638fn truncate(s: &str, max_len: usize) -> &str {
640 if s.len() <= max_len {
641 s
642 } else {
643 let mut end = max_len;
644 while end > 0 && !s.is_char_boundary(end) {
645 end -= 1;
646 }
647 &s[..end]
648 }
649}
650
651#[derive(Debug, Clone)]
655pub struct AutoInsight {
656 pub content: String,
658 pub tags: Vec<String>,
660 pub importance: f64,
662 pub dedup_tag: String,
664}
665
666pub fn check_triggers(
675 storage: &dyn codemem_core::StorageBackend,
676 session_id: &str,
677 tool_name: &str,
678 file_path: Option<&str>,
679 pattern: Option<&str>,
680) -> Vec<AutoInsight> {
681 let mut insights = Vec::new();
682
683 if tool_name == "Read" {
685 if let Some(fp) = file_path {
686 let directory = std::path::Path::new(fp)
687 .parent()
688 .map(|p| p.to_string_lossy().to_string())
689 .unwrap_or_default();
690 if !directory.is_empty() {
691 let dedup_tag = format!("dir_focus:{}", directory);
692 let already_exists = storage
693 .has_auto_insight(session_id, &dedup_tag)
694 .unwrap_or(true);
695 if !already_exists {
696 let count = storage
697 .count_directory_reads(session_id, &directory)
698 .unwrap_or(0);
699 if count >= 3 {
700 insights.push(AutoInsight {
701 content: format!(
702 "Deep exploration of directory '{}': {} files read in this session. \
703 This area may be a focus of the current task.",
704 directory, count
705 ),
706 tags: vec![
707 "auto-insight".to_string(),
708 "directory-focus".to_string(),
709 format!("dir:{}", directory),
710 ],
711 importance: 0.6,
712 dedup_tag,
713 });
714 }
715 }
716 }
717 }
718 }
719
720 if matches!(tool_name, "Edit" | "Write") {
722 if let Some(fp) = file_path {
723 let dedup_tag = format!("edit_after_read:{}", fp);
724 let already_exists = storage
725 .has_auto_insight(session_id, &dedup_tag)
726 .unwrap_or(true);
727 if !already_exists {
728 let was_read = storage
729 .was_file_read_in_session(session_id, fp)
730 .unwrap_or(false);
731 if was_read {
732 insights.push(AutoInsight {
733 content: format!(
734 "File '{}' was read and then modified in this session, \
735 indicating an informed change based on code review.",
736 fp
737 ),
738 tags: vec![
739 "auto-insight".to_string(),
740 "edit-after-read".to_string(),
741 format!(
742 "file:{}",
743 std::path::Path::new(fp)
744 .file_name()
745 .and_then(|f| f.to_str())
746 .unwrap_or("unknown")
747 ),
748 ],
749 importance: 0.5,
750 dedup_tag,
751 });
752 }
753 }
754 }
755 }
756
757 if tool_name == "Read" {
760 if let Some(fp) = file_path {
761 let directory = std::path::Path::new(fp)
762 .parent()
763 .map(|p| p.to_string_lossy().to_string())
764 .unwrap_or_default();
765 if !directory.is_empty() {
766 let module_name = std::path::Path::new(&directory)
767 .file_name()
768 .and_then(|f| f.to_str())
769 .unwrap_or("unknown");
770 let dedup_tag = format!("exploring_module:{}", directory);
771 let already_exists = storage
772 .has_auto_insight(session_id, &dedup_tag)
773 .unwrap_or(true);
774 if !already_exists {
775 let count = storage
776 .count_directory_reads(session_id, &directory)
777 .unwrap_or(0);
778 if count >= 3 {
779 insights.push(AutoInsight {
780 content: format!(
781 "Exploring '{}' module: {} files read. Building understanding of this area.",
782 module_name, count
783 ),
784 tags: vec![
785 "auto-insight".to_string(),
786 "exploring-module".to_string(),
787 format!("module:{}", module_name),
788 ],
789 importance: 0.55,
790 dedup_tag,
791 });
792 }
793 }
794 }
795 }
796 }
797
798 if tool_name == "Bash" {
800 let has_error = storage
801 .count_search_pattern_in_session(session_id, "error")
802 .unwrap_or(0)
803 > 0;
804 if has_error {
805 let area = file_path
806 .and_then(|fp| {
807 std::path::Path::new(fp)
808 .parent()
809 .and_then(|p| p.file_name())
810 .and_then(|f| f.to_str())
811 })
812 .unwrap_or("project");
813 let dedup_tag = format!("debugging:{}", area);
814 let already_exists = storage
815 .has_auto_insight(session_id, &dedup_tag)
816 .unwrap_or(true);
817 if !already_exists {
818 insights.push(AutoInsight {
819 content: format!(
820 "Debugging in '{}': error output detected in bash commands during this session.",
821 area
822 ),
823 tags: vec![
824 "auto-insight".to_string(),
825 "debugging".to_string(),
826 format!("area:{}", area),
827 ],
828 importance: 0.6,
829 dedup_tag,
830 });
831 }
832 }
833 }
834
835 if matches!(tool_name, "Grep" | "Glob") {
837 if let Some(pat) = pattern {
838 let dedup_tag = format!("repeated_search:{}", pat);
839 let already_exists = storage
840 .has_auto_insight(session_id, &dedup_tag)
841 .unwrap_or(true);
842 if !already_exists {
843 let count = storage
844 .count_search_pattern_in_session(session_id, pat)
845 .unwrap_or(0);
846 if count >= 2 {
847 insights.push(AutoInsight {
848 content: format!(
849 "Search pattern '{}' used {} times in this session. \
850 Consider storing a permanent memory for this recurring lookup.",
851 pat, count
852 ),
853 tags: vec![
854 "auto-insight".to_string(),
855 "repeated-search".to_string(),
856 format!("pattern:{}", pat),
857 ],
858 importance: 0.5,
859 dedup_tag,
860 });
861 }
862 }
863 }
864 }
865
866 insights
867}
868
869#[cfg(test)]
870#[path = "tests/lib_tests.rs"]
871mod tests;