1use serde::{Deserialize, Serialize};
7use serde_json::Value;
8use sha2::{Digest, Sha256};
9use std::collections::HashSet;
10
11use crate::claude_payload::NormalizedHookEvent;
12
13pub(crate) fn truncate_str(s: &str, max_chars: usize) -> String {
17 if s.chars().count() <= max_chars {
18 s.to_string()
19 } else {
20 let truncated: String = s.chars().take(max_chars).collect();
21 format!("{}...", truncated)
22 }
23}
24
25#[derive(Debug, Clone, Serialize, Deserialize)]
27pub struct MemoryCandidate {
28 pub candidate_id: String,
29 pub source_event_name: String,
30 pub source_agent: String,
31 pub signal_score: f32,
32 pub provisional_category: Option<String>,
33 pub memory_text: String,
34 pub evidence: Value,
35 pub labels: Vec<String>,
36}
37
38const SIMPLE_BASH_COMMANDS: &[&str] = &["ls", "pwd", "whoami", "date", "uptime", "echo"];
40
41const HIGH_SIGNAL_PATTERNS: &[&str] = &[
43 "test result:",
44 "passed",
45 "failed",
46 "error:",
47 "warning:",
48 "version",
49 "/",
50 ".",
51 "compilation",
52 "build",
53];
54
55pub fn derive_candidates(
62 event: &NormalizedHookEvent,
63 seen_fingerprints: &mut HashSet<String>,
64) -> Vec<MemoryCandidate> {
65 let mut signal_score = 0.0f32;
67
68 let is_low_signal_bash = if event.tool_name.as_deref() == Some("Bash") {
70 let command = event
71 .tool_input
72 .as_ref()
73 .and_then(|v| v.get("command"))
74 .and_then(|c| c.as_str())
75 .unwrap_or("");
76
77 let is_simple = SIMPLE_BASH_COMMANDS
78 .iter()
79 .any(|&simple| command.trim().starts_with(simple));
80
81 if is_simple {
82 signal_score += 0.1;
84 true
85 } else if let Some(response) = &event.tool_response_text {
86 let pattern_matches = HIGH_SIGNAL_PATTERNS
88 .iter()
89 .filter(|&&pattern| response.to_lowercase().contains(pattern))
90 .count();
91 signal_score += 0.2 + (pattern_matches as f32 * 0.1).min(0.4);
93 false
94 } else {
95 false
96 }
97 } else {
98 false
99 };
100
101 if !is_low_signal_bash {
103 let has_tool_input = event
104 .tool_input
105 .as_ref()
106 .map(|v| !v.is_null() && !v.as_object().map(|o| o.is_empty()).unwrap_or(false))
107 .unwrap_or(false);
108
109 if event.tool_name.is_some() && has_tool_input {
110 signal_score += 0.3;
111 }
112
113 if let Some(tool) = &event.tool_name {
115 let tool_lower = tool.to_lowercase();
116 if tool_lower == "read"
117 || tool_lower == "write"
118 || tool_lower == "edit"
119 || tool_lower == "multi_edit"
120 || tool_lower == "glob"
121 || tool_lower == "grep"
122 {
123 signal_score += 0.2;
124 }
125 }
126 }
127
128 if let Some(msg) = &event.assistant_message_text {
130 if msg.len() > 50 {
131 signal_score += 0.3;
132 } else if msg.len() > 20 {
133 signal_score += 0.15;
134 }
135 }
136
137 if let Some(msg) = &event.user_message_text {
139 if msg.len() > 100 {
140 signal_score += 0.35;
141 } else if msg.len() > 20 {
142 signal_score += 0.2;
143 }
144 }
145
146 if event.event_name == "user-prompt-submit" && event.user_message_text.is_some() {
148 signal_score += 0.3;
149 }
150
151 let event_lower = event.event_name.to_lowercase();
153 if event_lower.contains("plan") || event_lower.contains("review") {
154 signal_score += 0.2;
155 }
156
157 if event_lower.contains("error")
159 || event_lower.contains("fail")
160 || event_lower.contains("crash")
161 {
162 signal_score += 0.25;
163 }
164
165 if event_lower.contains("build")
167 || event_lower.contains("test")
168 || event_lower.contains("compile")
169 {
170 signal_score += 0.2;
171 }
172
173 let has_meaningful_tool_input = event.tool_input.as_ref().is_some_and(|v| {
175 !v.is_null() && !v.as_object().is_some_and(|o| o.is_empty()) && v.to_string().len() > 5
176 });
177 let has_any_content = has_meaningful_tool_input
178 || event
179 .tool_response_text
180 .as_ref()
181 .is_some_and(|s| s.len() > 10)
182 || event
183 .assistant_message_text
184 .as_ref()
185 .is_some_and(|s| s.len() > 20)
186 || event
187 .user_message_text
188 .as_ref()
189 .is_some_and(|s| s.len() > 10);
190
191 if !has_any_content {
192 return Vec::new();
193 }
194
195 if signal_score < 0.3 {
197 return Vec::new();
198 }
199
200 let tool_input_hash = if let Some(input) = &event.tool_input {
202 let mut hasher = Sha256::new();
203 hasher.update(input.to_string().as_bytes());
204 format!("{:x}", hasher.finalize())
205 } else {
206 String::new()
207 };
208
209 let fingerprint = format!(
210 "{}|{}|{}|{}",
211 event.session_id.as_deref().unwrap_or(""),
212 event.event_name,
213 event.tool_name.as_deref().unwrap_or(""),
214 tool_input_hash
215 );
216
217 if seen_fingerprints.contains(&fingerprint) {
218 return Vec::new(); }
220
221 seen_fingerprints.insert(fingerprint);
222
223 let memory_text = derive_memory_text(event);
225
226 let evidence = build_evidence(event);
228
229 let labels = derive_labels(event, signal_score);
231
232 let provisional_category = derive_provisional_category(event, signal_score);
234
235 let candidate_id = uuid::Uuid::new_v4().to_string();
236
237 vec![MemoryCandidate {
238 candidate_id,
239 source_event_name: event.event_name.clone(),
240 source_agent: event.agent.clone(),
241 signal_score,
242 provisional_category,
243 memory_text,
244 evidence,
245 labels,
246 }]
247}
248
249fn derive_memory_text(event: &NormalizedHookEvent) -> String {
251 if event.tool_name.as_deref() == Some("Bash") {
253 let command = event
254 .tool_input
255 .as_ref()
256 .and_then(|v| v.get("command"))
257 .and_then(|c| c.as_str())
258 .unwrap_or("");
259
260 let excerpt = event
261 .tool_response_text
262 .as_ref()
263 .map(|s| truncate_str(s, 12000))
264 .unwrap_or_default();
265
266 if !excerpt.is_empty() {
267 return format!("Ran `{}` → {}", command, excerpt);
268 }
269 }
270
271 if event.event_name == "user-prompt-submit" {
273 if let Some(msg) = &event.user_message_text {
274 return msg.clone();
275 }
276 }
277
278 let event_lower = event.event_name.to_lowercase();
280 if event_lower.contains("plan") || event_lower.contains("review") {
281 if let Some(input) = &event.tool_input {
282 if let Some(plan) = input.get("plan").and_then(|p| p.as_str()) {
283 return format!("Plan: {}", plan);
284 }
285 }
286 if let Some(name) = &event.tool_name {
287 return format!("Plan: {}", name);
288 }
289 }
290
291 if let Some(msg) = &event.assistant_message_text {
293 if msg.to_lowercase().contains("decision")
294 || msg.to_lowercase().contains("will")
295 || msg.to_lowercase().contains("going to")
296 {
297 let excerpt = truncate_str(msg, 12000);
298 return format!("Decision: {}", excerpt);
299 }
300 }
301
302 let parts: Vec<&str> = vec![
304 event.tool_response_text.as_deref(),
305 event.assistant_message_text.as_deref(),
306 event.user_message_text.as_deref(),
307 ]
308 .into_iter()
309 .flatten()
310 .collect();
311
312 if parts.is_empty() {
313 format!("Event: {}", event.event_name)
314 } else {
315 parts.join(" | ")
316 }
317}
318
319fn build_evidence(event: &NormalizedHookEvent) -> Value {
321 let mut evidence = serde_json::Map::new();
322
323 if let Some(name) = &event.tool_name {
324 evidence.insert("tool_name".to_string(), Value::String(name.clone()));
325 }
326
327 if let Some(input) = &event.tool_input {
328 evidence.insert("tool_input".to_string(), input.clone());
329 }
330
331 if let Some(response) = &event.tool_response_text {
332 let excerpt = truncate_str(response, 200);
333 evidence.insert("tool_response_excerpt".to_string(), Value::String(excerpt));
334 }
335
336 if let Some(msg) = &event.assistant_message_text {
337 let excerpt = truncate_str(msg, 200);
338 evidence.insert(
339 "assistant_message_excerpt".to_string(),
340 Value::String(excerpt),
341 );
342 }
343
344 if let Some(msg) = &event.user_message_text {
345 let excerpt = truncate_str(msg, 200);
346 evidence.insert("user_message_excerpt".to_string(), Value::String(excerpt));
347 }
348
349 Value::Object(evidence)
350}
351
352fn derive_labels(event: &NormalizedHookEvent, signal_score: f32) -> Vec<String> {
354 let mut labels = Vec::new();
355
356 if signal_score >= 0.7 {
358 labels.push("high-signal".to_string());
359 } else if signal_score >= 0.5 {
360 labels.push("medium-signal".to_string());
361 }
362
363 if let Some(name) = &event.tool_name {
365 labels.push(format!("tool:{}", name.to_lowercase()));
366 }
367
368 let event_lower = event.event_name.to_lowercase();
370 if event_lower.contains("plan") {
371 labels.push("plan".to_string());
372 }
373 if event_lower.contains("review") {
374 labels.push("review".to_string());
375 }
376 if event_lower.contains("error") {
377 labels.push("error".to_string());
378 }
379
380 if event_lower.contains("test") || event_lower.contains("verify") {
382 labels.push("verification".to_string());
383 }
384
385 labels
386}
387
388fn derive_provisional_category(event: &NormalizedHookEvent, signal_score: f32) -> Option<String> {
390 let event_lower = event.event_name.to_lowercase();
391
392 if event_lower.contains("user-prompt") {
394 if let Some(msg) = &event.user_message_text {
395 if msg.to_lowercase().contains("prefer")
396 || msg.to_lowercase().contains("always")
397 || msg.to_lowercase().contains("never")
398 {
399 return Some("preferences".to_string());
400 }
401 }
402 }
403
404 if event_lower.contains("plan") || event_lower.contains("review") {
406 return Some("context".to_string());
407 }
408
409 if (event_lower.contains("test") || event_lower.contains("verify")) && signal_score > 0.6 {
411 return Some("facts".to_string());
412 }
413
414 if event.tool_name.as_deref() == Some("Bash") {
416 if let Some(response) = &event.tool_response_text {
417 if response.contains("test result:")
418 || response.contains("passed")
419 || response.contains("failed")
420 {
421 return Some("facts".to_string());
422 }
423 }
424 }
425
426 None
427}
428
429#[cfg(test)]
430mod tests {
431 use super::*;
432 use crate::claude_payload::normalize_claude_payload;
433 use serde_json::json;
434
435 #[test]
436 fn test_noise_event_yields_no_candidates() {
437 let raw = json!({
438 "tool_name": "Bash",
439 "tool_input": {"command": "ls"},
440 "tool_response": "file1.txt\nfile2.txt"
441 });
442
443 let event = normalize_claude_payload("claude-code", "post-tool-use", &raw);
444 let mut seen = HashSet::new();
445 let candidates = derive_candidates(&event, &mut seen);
446
447 assert!(candidates.is_empty());
449 }
450
451 #[test]
452 fn test_bash_verification_event_yields_candidate() {
453 let raw = json!({
454 "tool_name": "Bash",
455 "tool_input": {"command": "cargo test"},
456 "tool_response": "running 12 tests\ntest result: ok. 12 passed; 0 failed",
457 "session_id": "sess-123"
458 });
459
460 let event = normalize_claude_payload("claude-code", "post-tool-use", &raw);
461 let mut seen = HashSet::new();
462 let candidates = derive_candidates(&event, &mut seen);
463
464 assert_eq!(candidates.len(), 1);
465 let candidate = &candidates[0];
466 assert!(candidate.signal_score >= 0.4);
467 assert!(candidate.memory_text.contains("Ran"));
468 assert!(candidate.labels.iter().any(|l| l == "tool:bash"));
469 }
470
471 #[test]
472 fn test_user_preference_prompt_yields_candidate() {
473 let raw = json!({
474 "event_name": "user-prompt-submit",
475 "user_message": "I always prefer to use rustfmt with a 4-space indent. Please configure this for all my projects."
476 });
477
478 let event = normalize_claude_payload("claude-code", "user-prompt-submit", &raw);
479 let mut seen = HashSet::new();
480 let candidates = derive_candidates(&event, &mut seen);
481
482 assert_eq!(candidates.len(), 1);
483 let candidate = &candidates[0];
484 assert!(candidate.signal_score >= 0.5);
485 assert_eq!(
486 candidate.provisional_category,
487 Some("preferences".to_string())
488 );
489 assert!(candidate.memory_text.contains("prefer"));
490 }
491
492 #[test]
493 fn test_duplicate_suppression_works() {
494 let raw = json!({
495 "tool_name": "Bash",
496 "tool_input": {"command": "cargo test"},
497 "tool_response": "test result: ok",
498 "session_id": "sess-456"
499 });
500
501 let event = normalize_claude_payload("claude-code", "post-tool-use", &raw);
502 let mut seen = HashSet::new();
503
504 let first = derive_candidates(&event, &mut seen);
505 assert_eq!(first.len(), 1);
506
507 let second = derive_candidates(&event, &mut seen);
508 assert_eq!(second.len(), 0); }
510
511 #[test]
512 fn test_plan_event_yields_candidate() {
513 let raw = json!({
514 "event_name": "plan-review",
515 "tool_name": "Plan",
516 "tool_input": {"plan": "Implement feature X, then test"}
517 });
518
519 let event = normalize_claude_payload("claude-code", "plan-review", &raw);
520 let mut seen = HashSet::new();
521 let candidates = derive_candidates(&event, &mut seen);
522
523 assert_eq!(candidates.len(), 1);
524 let candidate = &candidates[0];
525 assert!(candidate.signal_score >= 0.3);
526 assert!(candidate.labels.contains(&"plan".to_string()));
527 assert_eq!(candidate.provisional_category, Some("context".to_string()));
528 }
529
530 #[test]
531 fn test_empty_event_yields_no_candidates() {
532 let raw = json!({});
533
534 let event = normalize_claude_payload("claude-code", "empty", &raw);
535 let mut seen = HashSet::new();
536 let candidates = derive_candidates(&event, &mut seen);
537
538 assert!(candidates.is_empty());
539 }
540
541 #[test]
542 fn test_high_signal_label_added() {
543 let raw = json!({
544 "event_name": "user-prompt-submit",
545 "user_message": "I always prefer using tabs over spaces in my code. This is a strong preference that applies to all languages.",
546 "assistant_message": "I'll configure your editor to use tabs by default for all file types."
547 });
548
549 let event = normalize_claude_payload("claude-code", "user-prompt-submit", &raw);
550 let mut seen = HashSet::new();
551 let candidates = derive_candidates(&event, &mut seen);
552
553 assert_eq!(candidates.len(), 1);
554 let candidate = &candidates[0];
555 assert!(candidate.signal_score >= 0.7);
556 assert!(candidate.labels.contains(&"high-signal".to_string()));
557 }
558
559 #[test]
560 fn test_evidence_construction() {
561 let raw = json!({
562 "tool_name": "Read",
563 "tool_input": {"file_path": "src/main.rs"},
564 "tool_response": "This is a very long response that should be truncated in the evidence because it exceeds the maximum character limit for excerpts.",
565 "assistant_message": "The file contains the main function with error handling."
566 });
567
568 let event = normalize_claude_payload("claude-code", "post-tool-use", &raw);
569 let mut seen = HashSet::new();
570 let candidates = derive_candidates(&event, &mut seen);
571
572 assert_eq!(candidates.len(), 1);
573 let candidate = &candidates[0];
574 assert!(candidate.evidence.get("tool_name").is_some());
575 assert!(candidate.evidence.get("tool_input").is_some());
576
577 let excerpt = candidate
578 .evidence
579 .get("tool_response_excerpt")
580 .and_then(|v| v.as_str());
581 assert!(excerpt.is_some());
582 assert!(excerpt.unwrap().len() <= 203); }
584
585 #[test]
586 fn test_truncate_utf8_multibyte() {
587 let s = "日本語テスト文字列";
589 assert_eq!(truncate_str(s, 100), s);
590 let truncated = truncate_str(s, 4);
591 assert_eq!(truncated, "日本語テ...");
592 assert!(std::str::from_utf8(truncated.as_bytes()).is_ok());
594 }
595
596 #[test]
597 fn test_truncate_mixed_ascii_multibyte() {
598 let s = "Hello日本語World";
599 assert_eq!(truncate_str(s, 100), s);
600 let truncated = truncate_str(s, 7);
601 assert_eq!(truncated, "Hello日本...");
602 assert!(std::str::from_utf8(truncated.as_bytes()).is_ok());
603 }
604
605 #[test]
606 fn test_truncate_empty_and_short() {
607 assert_eq!(truncate_str("", 10), "");
608 assert_eq!(truncate_str("hi", 10), "hi");
609 assert_eq!(truncate_str("hello", 5), "hello");
610 }
611
612 #[test]
613 fn test_truncate_exact_boundary() {
614 let s = "abcdefghij";
615 assert_eq!(truncate_str(s, 10), s); let longer = "abcdefghijklmno";
617 assert_eq!(truncate_str(longer, 10), "abcdefghij...");
618 }
619
620 #[test]
621 fn test_different_sessions_different_fingerprints() {
622 let raw1 = json!({
623 "tool_name": "Bash",
624 "tool_input": {"command": "echo test"},
625 "tool_response": "test",
626 "session_id": "sess-A"
627 });
628
629 let raw2 = json!({
630 "tool_name": "Bash",
631 "tool_input": {"command": "echo test"},
632 "tool_response": "test",
633 "session_id": "sess-B"
634 });
635
636 let event1 = normalize_claude_payload("claude-code", "post-tool-use", &raw1);
637 let event2 = normalize_claude_payload("claude-code", "post-tool-use", &raw2);
638
639 let mut seen = HashSet::new();
640 let candidates1 = derive_candidates(&event1, &mut seen);
641 let candidates2 = derive_candidates(&event2, &mut seen);
642
643 let total: usize = candidates1.len() + candidates2.len();
646 assert!(total <= 2);
647 }
648}