1use serde::{Deserialize, Serialize};
7use serde_json::Value;
8use sha2::{Digest, Sha256};
9use std::collections::HashSet;
10
11use crate::claude_payload::NormalizedHookEvent;
12
13pub(crate) fn truncate_str(s: &str, max_chars: usize) -> String {
17 if s.chars().count() <= max_chars {
18 s.to_string()
19 } else {
20 let truncated: String = s.chars().take(max_chars).collect();
21 format!("{}...", truncated)
22 }
23}
24
25#[derive(Debug, Clone, Serialize, Deserialize)]
27pub struct MemoryCandidate {
28 pub candidate_id: String,
29 pub source_event_name: String,
30 pub source_agent: String,
31 pub signal_score: f32,
32 pub provisional_category: Option<String>,
33 pub memory_text: String,
34 pub evidence: Value,
35 pub labels: Vec<String>,
36}
37
38const SIMPLE_BASH_COMMANDS: &[&str] = &["ls", "pwd", "whoami", "date", "uptime", "echo"];
40
41const HIGH_SIGNAL_PATTERNS: &[&str] = &[
43 "test result:",
44 "passed",
45 "failed",
46 "error:",
47 "warning:",
48 "version",
49 "/",
50 ".",
51 "compilation",
52 "build",
53];
54
55pub fn derive_candidates(
62 event: &NormalizedHookEvent,
63 seen_fingerprints: &mut HashSet<String>,
64) -> Vec<MemoryCandidate> {
65 let mut signal_score = 0.0f32;
67
68 let is_low_signal_bash = if event.tool_name.as_deref() == Some("Bash") {
70 let command = event
71 .tool_input
72 .as_ref()
73 .and_then(|v| v.get("command"))
74 .and_then(|c| c.as_str())
75 .unwrap_or("");
76
77 let is_simple = SIMPLE_BASH_COMMANDS
78 .iter()
79 .any(|&simple| command.trim().starts_with(simple));
80
81 if is_simple {
82 signal_score += 0.1;
84 true
85 } else if let Some(response) = &event.tool_response_text {
86 let has_facts = HIGH_SIGNAL_PATTERNS
88 .iter()
89 .any(|&pattern| response.to_lowercase().contains(pattern));
90 if has_facts {
91 signal_score += 0.3;
92 }
93 false
94 } else {
95 false
96 }
97 } else {
98 false
99 };
100
101 if !is_low_signal_bash {
103 let has_tool_input = event
104 .tool_input
105 .as_ref()
106 .map(|v| !v.is_null() && !v.as_object().map(|o| o.is_empty()).unwrap_or(false))
107 .unwrap_or(false);
108
109 if event.tool_name.is_some() && has_tool_input {
110 signal_score += 0.3;
111 }
112 }
113
114 if event
116 .assistant_message_text
117 .as_ref()
118 .map(|s| s.len() > 20)
119 .unwrap_or(false)
120 {
121 signal_score += 0.2;
122 }
123
124 if event
126 .user_message_text
127 .as_ref()
128 .map(|s| s.len() > 20)
129 .unwrap_or(false)
130 {
131 signal_score += 0.2;
132 }
133
134 if event.event_name == "user-prompt-submit" && event.user_message_text.is_some() {
136 signal_score += 0.3;
137 }
138
139 let event_lower = event.event_name.to_lowercase();
141 if event_lower.contains("plan") || event_lower.contains("review") {
142 signal_score += 0.2;
143 }
144
145 let has_meaningful_tool_input = event.tool_input.as_ref().is_some_and(|v| {
147 !v.is_null() && !v.as_object().is_some_and(|o| o.is_empty()) && v.to_string().len() > 5
148 });
149 let has_any_content = has_meaningful_tool_input
150 || event
151 .tool_response_text
152 .as_ref()
153 .is_some_and(|s| s.len() > 10)
154 || event
155 .assistant_message_text
156 .as_ref()
157 .is_some_and(|s| s.len() > 20)
158 || event
159 .user_message_text
160 .as_ref()
161 .is_some_and(|s| s.len() > 10);
162
163 if !has_any_content {
164 return Vec::new();
165 }
166
167 if signal_score < 0.4 {
169 return Vec::new();
170 }
171
172 let tool_input_hash = if let Some(input) = &event.tool_input {
174 let mut hasher = Sha256::new();
175 hasher.update(input.to_string().as_bytes());
176 format!("{:x}", hasher.finalize())
177 } else {
178 String::new()
179 };
180
181 let fingerprint = format!(
182 "{}|{}|{}|{}",
183 event.session_id.as_deref().unwrap_or(""),
184 event.event_name,
185 event.tool_name.as_deref().unwrap_or(""),
186 tool_input_hash
187 );
188
189 if seen_fingerprints.contains(&fingerprint) {
190 return Vec::new(); }
192
193 seen_fingerprints.insert(fingerprint);
194
195 let memory_text = derive_memory_text(event);
197
198 let evidence = build_evidence(event);
200
201 let labels = derive_labels(event, signal_score);
203
204 let provisional_category = derive_provisional_category(event, signal_score);
206
207 let candidate_id = uuid::Uuid::new_v4().to_string();
208
209 vec![MemoryCandidate {
210 candidate_id,
211 source_event_name: event.event_name.clone(),
212 source_agent: event.agent.clone(),
213 signal_score,
214 provisional_category,
215 memory_text,
216 evidence,
217 labels,
218 }]
219}
220
221fn derive_memory_text(event: &NormalizedHookEvent) -> String {
223 if event.tool_name.as_deref() == Some("Bash") {
225 let command = event
226 .tool_input
227 .as_ref()
228 .and_then(|v| v.get("command"))
229 .and_then(|c| c.as_str())
230 .unwrap_or("");
231
232 let excerpt = event
233 .tool_response_text
234 .as_ref()
235 .map(|s| truncate_str(s, 100))
236 .unwrap_or_default();
237
238 if !excerpt.is_empty() {
239 return format!("Ran `{}` → {}", command, excerpt);
240 }
241 }
242
243 if event.event_name == "user-prompt-submit" {
245 if let Some(msg) = &event.user_message_text {
246 return msg.clone();
247 }
248 }
249
250 let event_lower = event.event_name.to_lowercase();
252 if event_lower.contains("plan") || event_lower.contains("review") {
253 if let Some(input) = &event.tool_input {
254 if let Some(plan) = input.get("plan").and_then(|p| p.as_str()) {
255 return format!("Plan: {}", plan);
256 }
257 }
258 if let Some(name) = &event.tool_name {
259 return format!("Plan: {}", name);
260 }
261 }
262
263 if let Some(msg) = &event.assistant_message_text {
265 if msg.to_lowercase().contains("decision")
266 || msg.to_lowercase().contains("will")
267 || msg.to_lowercase().contains("going to")
268 {
269 let excerpt = truncate_str(msg, 150);
270 return format!("Decision: {}", excerpt);
271 }
272 }
273
274 let parts: Vec<&str> = vec![
276 event.tool_response_text.as_deref(),
277 event.assistant_message_text.as_deref(),
278 event.user_message_text.as_deref(),
279 ]
280 .into_iter()
281 .flatten()
282 .collect();
283
284 if parts.is_empty() {
285 format!("Event: {}", event.event_name)
286 } else {
287 parts.join(" | ")
288 }
289}
290
291fn build_evidence(event: &NormalizedHookEvent) -> Value {
293 let mut evidence = serde_json::Map::new();
294
295 if let Some(name) = &event.tool_name {
296 evidence.insert("tool_name".to_string(), Value::String(name.clone()));
297 }
298
299 if let Some(input) = &event.tool_input {
300 evidence.insert("tool_input".to_string(), input.clone());
301 }
302
303 if let Some(response) = &event.tool_response_text {
304 let excerpt = truncate_str(response, 200);
305 evidence.insert("tool_response_excerpt".to_string(), Value::String(excerpt));
306 }
307
308 if let Some(msg) = &event.assistant_message_text {
309 let excerpt = truncate_str(msg, 200);
310 evidence.insert(
311 "assistant_message_excerpt".to_string(),
312 Value::String(excerpt),
313 );
314 }
315
316 if let Some(msg) = &event.user_message_text {
317 let excerpt = truncate_str(msg, 200);
318 evidence.insert("user_message_excerpt".to_string(), Value::String(excerpt));
319 }
320
321 Value::Object(evidence)
322}
323
324fn derive_labels(event: &NormalizedHookEvent, signal_score: f32) -> Vec<String> {
326 let mut labels = Vec::new();
327
328 if signal_score >= 0.7 {
330 labels.push("high-signal".to_string());
331 } else if signal_score >= 0.5 {
332 labels.push("medium-signal".to_string());
333 }
334
335 if let Some(name) = &event.tool_name {
337 labels.push(format!("tool:{}", name.to_lowercase()));
338 }
339
340 let event_lower = event.event_name.to_lowercase();
342 if event_lower.contains("plan") {
343 labels.push("plan".to_string());
344 }
345 if event_lower.contains("review") {
346 labels.push("review".to_string());
347 }
348 if event_lower.contains("error") {
349 labels.push("error".to_string());
350 }
351
352 if event_lower.contains("test") || event_lower.contains("verify") {
354 labels.push("verification".to_string());
355 }
356
357 labels
358}
359
360fn derive_provisional_category(event: &NormalizedHookEvent, signal_score: f32) -> Option<String> {
362 let event_lower = event.event_name.to_lowercase();
363
364 if event_lower.contains("user-prompt") {
366 if let Some(msg) = &event.user_message_text {
367 if msg.to_lowercase().contains("prefer")
368 || msg.to_lowercase().contains("always")
369 || msg.to_lowercase().contains("never")
370 {
371 return Some("preferences".to_string());
372 }
373 }
374 }
375
376 if event_lower.contains("plan") || event_lower.contains("review") {
378 return Some("context".to_string());
379 }
380
381 if (event_lower.contains("test") || event_lower.contains("verify")) && signal_score > 0.6 {
383 return Some("facts".to_string());
384 }
385
386 if event.tool_name.as_deref() == Some("Bash") {
388 if let Some(response) = &event.tool_response_text {
389 if response.contains("test result:")
390 || response.contains("passed")
391 || response.contains("failed")
392 {
393 return Some("facts".to_string());
394 }
395 }
396 }
397
398 None
399}
400
401#[cfg(test)]
402mod tests {
403 use super::*;
404 use crate::claude_payload::normalize_claude_payload;
405 use serde_json::json;
406
407 #[test]
408 fn test_noise_event_yields_no_candidates() {
409 let raw = json!({
410 "tool_name": "Bash",
411 "tool_input": {"command": "ls"},
412 "tool_response": "file1.txt\nfile2.txt"
413 });
414
415 let event = normalize_claude_payload("claude-code", "post-tool-use", &raw);
416 let mut seen = HashSet::new();
417 let candidates = derive_candidates(&event, &mut seen);
418
419 assert!(candidates.is_empty());
421 }
422
423 #[test]
424 fn test_bash_verification_event_yields_candidate() {
425 let raw = json!({
426 "tool_name": "Bash",
427 "tool_input": {"command": "cargo test"},
428 "tool_response": "running 12 tests\ntest result: ok. 12 passed; 0 failed",
429 "session_id": "sess-123"
430 });
431
432 let event = normalize_claude_payload("claude-code", "post-tool-use", &raw);
433 let mut seen = HashSet::new();
434 let candidates = derive_candidates(&event, &mut seen);
435
436 assert_eq!(candidates.len(), 1);
437 let candidate = &candidates[0];
438 assert!(candidate.signal_score >= 0.4);
439 assert!(candidate.memory_text.contains("Ran"));
440 assert!(candidate.labels.iter().any(|l| l == "tool:bash"));
441 }
442
443 #[test]
444 fn test_user_preference_prompt_yields_candidate() {
445 let raw = json!({
446 "event_name": "user-prompt-submit",
447 "user_message": "I always prefer to use rustfmt with a 4-space indent. Please configure this for all my projects."
448 });
449
450 let event = normalize_claude_payload("claude-code", "user-prompt-submit", &raw);
451 let mut seen = HashSet::new();
452 let candidates = derive_candidates(&event, &mut seen);
453
454 assert_eq!(candidates.len(), 1);
455 let candidate = &candidates[0];
456 assert!(candidate.signal_score >= 0.5);
457 assert_eq!(
458 candidate.provisional_category,
459 Some("preferences".to_string())
460 );
461 assert!(candidate.memory_text.contains("prefer"));
462 }
463
464 #[test]
465 fn test_duplicate_suppression_works() {
466 let raw = json!({
467 "tool_name": "Bash",
468 "tool_input": {"command": "cargo test"},
469 "tool_response": "test result: ok",
470 "session_id": "sess-456"
471 });
472
473 let event = normalize_claude_payload("claude-code", "post-tool-use", &raw);
474 let mut seen = HashSet::new();
475
476 let first = derive_candidates(&event, &mut seen);
477 assert_eq!(first.len(), 1);
478
479 let second = derive_candidates(&event, &mut seen);
480 assert_eq!(second.len(), 0); }
482
483 #[test]
484 fn test_plan_event_yields_candidate() {
485 let raw = json!({
486 "event_name": "plan-review",
487 "tool_name": "Plan",
488 "tool_input": {"plan": "Implement feature X, then test"}
489 });
490
491 let event = normalize_claude_payload("claude-code", "plan-review", &raw);
492 let mut seen = HashSet::new();
493 let candidates = derive_candidates(&event, &mut seen);
494
495 assert_eq!(candidates.len(), 1);
496 let candidate = &candidates[0];
497 assert!(candidate.signal_score >= 0.3);
498 assert!(candidate.labels.contains(&"plan".to_string()));
499 assert_eq!(candidate.provisional_category, Some("context".to_string()));
500 }
501
502 #[test]
503 fn test_empty_event_yields_no_candidates() {
504 let raw = json!({});
505
506 let event = normalize_claude_payload("claude-code", "empty", &raw);
507 let mut seen = HashSet::new();
508 let candidates = derive_candidates(&event, &mut seen);
509
510 assert!(candidates.is_empty());
511 }
512
513 #[test]
514 fn test_high_signal_label_added() {
515 let raw = json!({
516 "event_name": "user-prompt-submit",
517 "user_message": "I always prefer using tabs over spaces in my code. This is a strong preference that applies to all languages.",
518 "assistant_message": "I'll configure your editor to use tabs by default for all file types."
519 });
520
521 let event = normalize_claude_payload("claude-code", "user-prompt-submit", &raw);
522 let mut seen = HashSet::new();
523 let candidates = derive_candidates(&event, &mut seen);
524
525 assert_eq!(candidates.len(), 1);
526 let candidate = &candidates[0];
527 assert!(candidate.signal_score >= 0.7);
528 assert!(candidate.labels.contains(&"high-signal".to_string()));
529 }
530
531 #[test]
532 fn test_evidence_construction() {
533 let raw = json!({
534 "tool_name": "Read",
535 "tool_input": {"file_path": "src/main.rs"},
536 "tool_response": "This is a very long response that should be truncated in the evidence because it exceeds the maximum character limit for excerpts.",
537 "assistant_message": "The file contains the main function with error handling."
538 });
539
540 let event = normalize_claude_payload("claude-code", "post-tool-use", &raw);
541 let mut seen = HashSet::new();
542 let candidates = derive_candidates(&event, &mut seen);
543
544 assert_eq!(candidates.len(), 1);
545 let candidate = &candidates[0];
546 assert!(candidate.evidence.get("tool_name").is_some());
547 assert!(candidate.evidence.get("tool_input").is_some());
548
549 let excerpt = candidate
550 .evidence
551 .get("tool_response_excerpt")
552 .and_then(|v| v.as_str());
553 assert!(excerpt.is_some());
554 assert!(excerpt.unwrap().len() <= 203); }
556
557 #[test]
558 fn test_truncate_utf8_multibyte() {
559 let s = "日本語テスト文字列";
561 assert_eq!(truncate_str(s, 100), s);
562 let truncated = truncate_str(s, 4);
563 assert_eq!(truncated, "日本語テ...");
564 assert!(std::str::from_utf8(truncated.as_bytes()).is_ok());
566 }
567
568 #[test]
569 fn test_truncate_mixed_ascii_multibyte() {
570 let s = "Hello日本語World";
571 assert_eq!(truncate_str(s, 100), s);
572 let truncated = truncate_str(s, 7);
573 assert_eq!(truncated, "Hello日本...");
574 assert!(std::str::from_utf8(truncated.as_bytes()).is_ok());
575 }
576
577 #[test]
578 fn test_truncate_empty_and_short() {
579 assert_eq!(truncate_str("", 10), "");
580 assert_eq!(truncate_str("hi", 10), "hi");
581 assert_eq!(truncate_str("hello", 5), "hello");
582 }
583
584 #[test]
585 fn test_truncate_exact_boundary() {
586 let s = "abcdefghij";
587 assert_eq!(truncate_str(s, 10), s); let longer = "abcdefghijklmno";
589 assert_eq!(truncate_str(longer, 10), "abcdefghij...");
590 }
591
592 #[test]
593 fn test_different_sessions_different_fingerprints() {
594 let raw1 = json!({
595 "tool_name": "Bash",
596 "tool_input": {"command": "echo test"},
597 "tool_response": "test",
598 "session_id": "sess-A"
599 });
600
601 let raw2 = json!({
602 "tool_name": "Bash",
603 "tool_input": {"command": "echo test"},
604 "tool_response": "test",
605 "session_id": "sess-B"
606 });
607
608 let event1 = normalize_claude_payload("claude-code", "post-tool-use", &raw1);
609 let event2 = normalize_claude_payload("claude-code", "post-tool-use", &raw2);
610
611 let mut seen = HashSet::new();
612 let candidates1 = derive_candidates(&event1, &mut seen);
613 let candidates2 = derive_candidates(&event2, &mut seen);
614
615 let total: usize = candidates1.len() + candidates2.len();
618 assert!(total <= 2);
619 }
620}