Skip to main content

engram/intelligence/
memory_update.rs

1//! Historical Memory Update Detection — RML-1213
2//!
3//! A-Mem-inspired automatic memory update detection. When new information
4//! contradicts or supplements existing memories, this module detects the
5//! relationship and suggests an appropriate action.
6//!
7//! ## How it works
8//!
9//! 1. Fetch recent memories from the target workspace.
10//! 2. For each existing memory, compute keyword overlap and entity matching
11//!    with the new content.
12//! 3. Classify the relationship: Contradiction, Supplement, Correction,
13//!    or Obsolescence.
14//! 4. Return `UpdateCandidate` structs for every pair whose confidence
15//!    exceeds the threshold (0.3).
16//! 5. The caller may then call `apply_update` to commit a chosen action and
17//!    record it in the `update_log` table.
18//!
19//! ## Invariants
20//!
21//! - Detection never panics on any input.
22//! - Empty workspace returns an empty candidate list.
23//! - `apply_update` always writes one row to `update_log`.
24//! - Confidence scores are in the range [0.0, 1.0].
25
26use std::collections::HashSet;
27
28use chrono::Utc;
29use rusqlite::{params, Connection};
30use serde::{Deserialize, Serialize};
31
32use crate::error::{EngramError, Result};
33
34// =============================================================================
35// Public types
36// =============================================================================
37
38/// Classifies the relationship between new content and an existing memory.
39#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
40#[serde(rename_all = "snake_case")]
41pub enum ConflictType {
42    /// New content directly contradicts the existing memory
43    /// (e.g., negation keywords + shared entities).
44    Contradiction,
45    /// New content adds new predicates about the same entities without
46    /// contradicting them.
47    Supplement,
48    /// New content explicitly corrects the existing memory
49    /// (e.g., "actually", "correction", "update").
50    Correction,
51    /// The existing memory references old dates while new content uses
52    /// temporal markers like "now" or "currently".
53    Obsolescence,
54}
55
56impl ConflictType {
57    pub fn as_str(self) -> &'static str {
58        match self {
59            ConflictType::Contradiction => "contradiction",
60            ConflictType::Supplement => "supplement",
61            ConflictType::Correction => "correction",
62            ConflictType::Obsolescence => "obsolescence",
63        }
64    }
65}
66
67impl std::str::FromStr for ConflictType {
68    type Err = EngramError;
69
70    fn from_str(s: &str) -> Result<Self> {
71        match s.to_lowercase().as_str() {
72            "contradiction" => Ok(ConflictType::Contradiction),
73            "supplement" => Ok(ConflictType::Supplement),
74            "correction" => Ok(ConflictType::Correction),
75            "obsolescence" => Ok(ConflictType::Obsolescence),
76            _ => Err(EngramError::InvalidInput(format!(
77                "Unknown conflict type: {}",
78                s
79            ))),
80        }
81    }
82}
83
84/// The action to take when an update is detected.
85#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
86#[serde(rename_all = "snake_case")]
87pub enum UpdateAction {
88    /// Overwrite the existing memory content with the new content.
89    Replace,
90    /// Append the new content to the existing memory.
91    Merge,
92    /// Change the memory type to `archived` so it is preserved but deprioritised.
93    Archive,
94    /// Add a `needs-review` tag so a human can inspect the conflict.
95    Flag,
96}
97
98impl UpdateAction {
99    pub fn as_str(self) -> &'static str {
100        match self {
101            UpdateAction::Replace => "replace",
102            UpdateAction::Merge => "merge",
103            UpdateAction::Archive => "archive",
104            UpdateAction::Flag => "flag",
105        }
106    }
107}
108
109impl std::str::FromStr for UpdateAction {
110    type Err = EngramError;
111
112    fn from_str(s: &str) -> Result<Self> {
113        match s.to_lowercase().as_str() {
114            "replace" => Ok(UpdateAction::Replace),
115            "merge" => Ok(UpdateAction::Merge),
116            "archive" => Ok(UpdateAction::Archive),
117            "flag" => Ok(UpdateAction::Flag),
118            _ => Err(EngramError::InvalidInput(format!(
119                "Unknown update action: {}",
120                s
121            ))),
122        }
123    }
124}
125
126/// A candidate memory that may need to be updated.
127#[derive(Debug, Clone, Serialize, Deserialize)]
128pub struct UpdateCandidate {
129    /// ID of the existing memory that may need updating.
130    pub existing_id: i64,
131    /// How the new content relates to the existing memory.
132    pub conflict_type: ConflictType,
133    /// Confidence score in the range [0.0, 1.0].
134    pub confidence: f32,
135    /// Suggested action to resolve the detected conflict.
136    pub suggested_action: UpdateAction,
137    /// Human-readable explanation for the suggestion.
138    pub reason: String,
139}
140
141/// Result of applying an update to an existing memory.
142#[derive(Debug, Clone, Serialize, Deserialize)]
143pub struct UpdateResult {
144    /// ID of the memory that was updated.
145    pub memory_id: i64,
146    /// The action that was applied.
147    pub action_taken: UpdateAction,
148    /// SHA-256 hex digest of the content *before* the update.
149    pub old_content_hash: String,
150    /// SHA-256 hex digest of the content *after* the update.
151    pub new_content_hash: String,
152}
153
154/// A stored entry in the `update_log` table.
155#[derive(Debug, Clone, Serialize, Deserialize)]
156pub struct UpdateLogEntry {
157    /// Database-assigned id.
158    pub id: i64,
159    /// Memory that was updated.
160    pub memory_id: i64,
161    /// Action that was applied.
162    pub action: UpdateAction,
163    /// Content hash before the update.
164    pub old_hash: String,
165    /// Content hash after the update.
166    pub new_hash: String,
167    /// Human-readable reason for the update.
168    pub reason: String,
169    /// RFC3339 UTC timestamp.
170    pub timestamp: String,
171}
172
173// =============================================================================
174// DDL
175// =============================================================================
176
177/// DDL for the `update_log` table.
178///
179/// Call once during schema setup (e.g., alongside `CREATE_FACTS_TABLE`).
180pub const CREATE_UPDATE_LOG_TABLE: &str = r#"
181    CREATE TABLE IF NOT EXISTS update_log (
182        id         INTEGER PRIMARY KEY AUTOINCREMENT,
183        memory_id  INTEGER NOT NULL,
184        action     TEXT    NOT NULL,
185        old_hash   TEXT    NOT NULL,
186        new_hash   TEXT    NOT NULL,
187        reason     TEXT    NOT NULL DEFAULT '',
188        timestamp  TEXT    NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%SZ', 'now'))
189    );
190    CREATE INDEX IF NOT EXISTS idx_update_log_memory ON update_log(memory_id);
191"#;
192
193// =============================================================================
194// Storage helpers
195// =============================================================================
196
197/// Insert one row into `update_log` and return the stored entry.
198pub fn create_update_log(
199    conn: &Connection,
200    result: &UpdateResult,
201    reason: &str,
202) -> Result<UpdateLogEntry> {
203    let now = Utc::now().format("%Y-%m-%dT%H:%M:%SZ").to_string();
204
205    conn.execute(
206        "INSERT INTO update_log (memory_id, action, old_hash, new_hash, reason, timestamp)
207         VALUES (?1, ?2, ?3, ?4, ?5, ?6)",
208        params![
209            result.memory_id,
210            result.action_taken.as_str(),
211            result.old_content_hash,
212            result.new_content_hash,
213            reason,
214            now,
215        ],
216    )?;
217
218    let id = conn.last_insert_rowid();
219
220    Ok(UpdateLogEntry {
221        id,
222        memory_id: result.memory_id,
223        action: result.action_taken,
224        old_hash: result.old_content_hash.clone(),
225        new_hash: result.new_content_hash.clone(),
226        reason: reason.to_string(),
227        timestamp: now,
228    })
229}
230
231/// List update log entries, optionally filtered to a specific memory.
232///
233/// `limit = 0` means unlimited.
234pub fn list_update_logs(
235    conn: &Connection,
236    memory_id: Option<i64>,
237    limit: usize,
238) -> Result<Vec<UpdateLogEntry>> {
239    let effective_limit: i64 = if limit == 0 { i64::MAX } else { limit as i64 };
240
241    let rows = match memory_id {
242        Some(mid) => {
243            let mut stmt = conn.prepare(
244                "SELECT id, memory_id, action, old_hash, new_hash, reason, timestamp
245                 FROM update_log
246                 WHERE memory_id = ?1
247                 ORDER BY id ASC
248                 LIMIT ?2",
249            )?;
250            let x = stmt
251                .query_map(params![mid, effective_limit], map_log_row)?
252                .collect::<std::result::Result<Vec<_>, _>>()?;
253            x
254        }
255        None => {
256            let mut stmt = conn.prepare(
257                "SELECT id, memory_id, action, old_hash, new_hash, reason, timestamp
258                 FROM update_log
259                 ORDER BY id ASC
260                 LIMIT ?1",
261            )?;
262            let x = stmt
263                .query_map(params![effective_limit], map_log_row)?
264                .collect::<std::result::Result<Vec<_>, _>>()?;
265            x
266        }
267    };
268
269    Ok(rows)
270}
271
272fn map_log_row(row: &rusqlite::Row<'_>) -> rusqlite::Result<UpdateLogEntry> {
273    let action_str: String = row.get(2)?;
274    let action = action_str
275        .parse::<UpdateAction>()
276        .unwrap_or(UpdateAction::Flag);
277    Ok(UpdateLogEntry {
278        id: row.get(0)?,
279        memory_id: row.get(1)?,
280        action,
281        old_hash: row.get(3)?,
282        new_hash: row.get(4)?,
283        reason: row.get(5)?,
284        timestamp: row.get(6)?,
285    })
286}
287
288// =============================================================================
289// Detection engine
290// =============================================================================
291
292/// Confidence threshold below which candidates are discarded.
293const MIN_CONFIDENCE: f32 = 0.3;
294
295/// Maximum number of recent memories to compare against.
296const MAX_RECENT_MEMORIES: i64 = 200;
297
298/// Negation / contradiction signal words.
299static NEGATION_WORDS: &[&str] = &[
300    "not",
301    "no longer",
302    "never",
303    "incorrect",
304    "wrong",
305    "false",
306    "untrue",
307    "doesn't",
308    "don't",
309    "isn't",
310    "aren't",
311    "wasn't",
312    "weren't",
313];
314
315/// Explicit correction signal words.
316static CORRECTION_WORDS: &[&str] = &[
317    "actually",
318    "correction",
319    "update",
320    "correcting",
321    "in fact",
322    "to clarify",
323    "clarification",
324    "erratum",
325    "revised",
326];
327
328/// Temporal "now" markers that suggest the new content supersedes older info.
329static NOW_WORDS: &[&str] = &[
330    "now",
331    "currently",
332    "today",
333    "as of",
334    "at present",
335    "present",
336    "latest",
337    "recent",
338];
339
340/// Year pattern: 4-digit numbers in the range 1900–2099.
341static YEAR_RANGE_START: u32 = 1900;
342static YEAR_RANGE_END: u32 = 2099;
343
344/// Core update-detection engine.
345pub struct UpdateDetector;
346
347impl UpdateDetector {
348    pub fn new() -> Self {
349        Self
350    }
351
352    /// Detect update candidates for `new_content` against memories in `workspace`.
353    ///
354    /// Fetches at most `MAX_RECENT_MEMORIES` memories from the workspace and
355    /// computes a confidence score for each one. Returns candidates whose
356    /// confidence exceeds `MIN_CONFIDENCE`, sorted descending.
357    pub fn detect_updates(
358        &self,
359        conn: &Connection,
360        new_content: &str,
361        workspace: &str,
362    ) -> Result<Vec<UpdateCandidate>> {
363        if new_content.trim().is_empty() || workspace.trim().is_empty() {
364            return Ok(Vec::new());
365        }
366
367        // Fetch recent memories from the workspace.
368        let memories = fetch_workspace_memories(conn, workspace)?;
369        if memories.is_empty() {
370            return Ok(Vec::new());
371        }
372
373        let new_lower = new_content.to_lowercase();
374        let new_keywords = extract_keywords(&new_lower);
375
376        let mut candidates: Vec<UpdateCandidate> = Vec::new();
377
378        for (id, content, memory_type, tags) in &memories {
379            let existing_lower = content.to_lowercase();
380            let existing_keywords = extract_keywords(&existing_lower);
381
382            let overlap = keyword_overlap(&new_keywords, &existing_keywords);
383            if overlap == 0.0 {
384                // No shared vocabulary — skip entirely.
385                continue;
386            }
387
388            // Try each conflict class in priority order.
389            // The first one that fires wins.
390            if let Some(cand) = detect_correction(&new_lower, &existing_lower, *id, overlap) {
391                candidates.push(cand);
392            } else if let Some(cand) =
393                detect_contradiction(&new_lower, &existing_lower, *id, overlap)
394            {
395                candidates.push(cand);
396            } else if let Some(cand) =
397                detect_obsolescence(&new_lower, &existing_lower, *id, overlap)
398            {
399                candidates.push(cand);
400            } else if let Some(cand) =
401                detect_supplement(&new_lower, &existing_lower, *id, overlap, memory_type, tags)
402            {
403                candidates.push(cand);
404            }
405        }
406
407        // Sort by confidence descending, then by id ascending for determinism.
408        candidates.sort_by(|a, b| {
409            b.confidence
410                .partial_cmp(&a.confidence)
411                .unwrap_or(std::cmp::Ordering::Equal)
412                .then(a.existing_id.cmp(&b.existing_id))
413        });
414
415        Ok(candidates)
416    }
417}
418
419impl Default for UpdateDetector {
420    fn default() -> Self {
421        Self::new()
422    }
423}
424
425// =============================================================================
426// Conflict classifiers
427// =============================================================================
428
429fn detect_contradiction(
430    new_lower: &str,
431    existing_lower: &str,
432    id: i64,
433    overlap: f32,
434) -> Option<UpdateCandidate> {
435    if overlap < 0.15 {
436        return None;
437    }
438
439    let has_negation = NEGATION_WORDS.iter().any(|w| new_lower.contains(w));
440
441    if !has_negation {
442        return None;
443    }
444
445    // Both texts must share some entity-like tokens.
446    let shared = shared_entity_count(new_lower, existing_lower);
447    if shared == 0 {
448        return None;
449    }
450
451    let confidence = (overlap * 0.5 + 0.3).min(1.0);
452    if confidence < MIN_CONFIDENCE {
453        return None;
454    }
455
456    Some(UpdateCandidate {
457        existing_id: id,
458        conflict_type: ConflictType::Contradiction,
459        confidence,
460        suggested_action: UpdateAction::Flag,
461        reason: format!(
462            "New content contains negation signals ('not', 'no longer', etc.) \
463             and shares {} entity tokens with the existing memory (keyword overlap {:.0}%).",
464            shared,
465            overlap * 100.0
466        ),
467    })
468}
469
470fn detect_correction(
471    new_lower: &str,
472    existing_lower: &str,
473    id: i64,
474    overlap: f32,
475) -> Option<UpdateCandidate> {
476    if overlap < 0.10 {
477        return None;
478    }
479
480    let has_correction = CORRECTION_WORDS.iter().any(|w| new_lower.contains(w));
481
482    if !has_correction {
483        return None;
484    }
485
486    let _ = existing_lower; // kept for API symmetry
487
488    let confidence = (overlap * 0.6 + 0.35).min(1.0);
489    if confidence < MIN_CONFIDENCE {
490        return None;
491    }
492
493    Some(UpdateCandidate {
494        existing_id: id,
495        conflict_type: ConflictType::Correction,
496        confidence,
497        suggested_action: UpdateAction::Replace,
498        reason: format!(
499            "New content starts with an explicit correction signal ('actually', \
500             'correction', etc.) and overlaps with the existing memory at {:.0}%.",
501            overlap * 100.0
502        ),
503    })
504}
505
506fn detect_obsolescence(
507    new_lower: &str,
508    existing_lower: &str,
509    id: i64,
510    overlap: f32,
511) -> Option<UpdateCandidate> {
512    if overlap < 0.10 {
513        return None;
514    }
515
516    let existing_has_old_date = contains_old_year(existing_lower);
517    let new_has_now = NOW_WORDS.iter().any(|w| new_lower.contains(w));
518
519    if !(existing_has_old_date && new_has_now) {
520        return None;
521    }
522
523    let confidence = (overlap * 0.5 + 0.25).min(1.0);
524    if confidence < MIN_CONFIDENCE {
525        return None;
526    }
527
528    Some(UpdateCandidate {
529        existing_id: id,
530        conflict_type: ConflictType::Obsolescence,
531        confidence,
532        suggested_action: UpdateAction::Archive,
533        reason: format!(
534            "Existing memory references old dates while the new content uses \
535             temporal markers ('now', 'currently', etc.) at {:.0}% keyword overlap.",
536            overlap * 100.0
537        ),
538    })
539}
540
541fn detect_supplement(
542    new_lower: &str,
543    existing_lower: &str,
544    id: i64,
545    overlap: f32,
546    _memory_type: &str,
547    _tags: &[String],
548) -> Option<UpdateCandidate> {
549    if overlap < 0.20 {
550        return None;
551    }
552
553    // No negation or correction signals — pure additive information.
554    let has_negation = NEGATION_WORDS.iter().any(|w| new_lower.contains(w));
555    let has_correction = CORRECTION_WORDS.iter().any(|w| new_lower.contains(w));
556    if has_negation || has_correction {
557        return None;
558    }
559
560    // New content should have tokens not present in existing content.
561    let new_keywords = extract_keywords(new_lower);
562    let existing_keywords = extract_keywords(existing_lower);
563    let new_unique: usize = new_keywords
564        .iter()
565        .filter(|k| !existing_keywords.contains(*k))
566        .count();
567
568    if new_unique == 0 {
569        return None;
570    }
571
572    // Supplement confidence: base 0.15 so even moderate overlap (0.25+) clears the 0.3 threshold.
573    let confidence = (overlap * 0.6 + 0.15).min(1.0);
574    if confidence < MIN_CONFIDENCE {
575        return None;
576    }
577
578    Some(UpdateCandidate {
579        existing_id: id,
580        conflict_type: ConflictType::Supplement,
581        confidence,
582        suggested_action: UpdateAction::Merge,
583        reason: format!(
584            "New content shares {:.0}% keywords with the existing memory and adds \
585             {} new unique tokens — supplementary information detected.",
586            overlap * 100.0,
587            new_unique
588        ),
589    })
590}
591
592// =============================================================================
593// Apply update
594// =============================================================================
595
596/// Apply `action` to an existing memory and return the result.
597///
598/// The caller is responsible for passing the `new_content` that triggered
599/// the update; it is used for `Replace` and `Merge` actions.
600///
601/// **Note:** this function does NOT write to `update_log` itself. Call
602/// `create_update_log` separately so the caller controls reason text.
603pub fn apply_update(
604    conn: &Connection,
605    candidate: &UpdateCandidate,
606    action: UpdateAction,
607    new_content: &str,
608) -> Result<UpdateResult> {
609    // Fetch current content.
610    let (old_content, tags_json): (String, String) = conn.query_row(
611        "SELECT content, tags FROM memories WHERE id = ?1",
612        params![candidate.existing_id],
613        |row| Ok((row.get(0)?, row.get(1).unwrap_or_else(|_| "[]".to_string()))),
614    )?;
615
616    let old_hash = sha256_hex(&old_content);
617
618    let new_stored_content = match action {
619        UpdateAction::Replace => new_content.to_string(),
620        UpdateAction::Merge => format!("{}\n\n{}", old_content.trim(), new_content.trim()),
621        UpdateAction::Archive => old_content.clone(),
622        UpdateAction::Flag => old_content.clone(),
623    };
624
625    let new_hash = sha256_hex(&new_stored_content);
626
627    match action {
628        UpdateAction::Replace => {
629            conn.execute(
630                "UPDATE memories SET content = ?1, updated_at = ?2 WHERE id = ?3",
631                params![
632                    new_stored_content,
633                    Utc::now().format("%Y-%m-%dT%H:%M:%SZ").to_string(),
634                    candidate.existing_id
635                ],
636            )?;
637        }
638        UpdateAction::Merge => {
639            conn.execute(
640                "UPDATE memories SET content = ?1, updated_at = ?2 WHERE id = ?3",
641                params![
642                    new_stored_content,
643                    Utc::now().format("%Y-%m-%dT%H:%M:%SZ").to_string(),
644                    candidate.existing_id
645                ],
646            )?;
647        }
648        UpdateAction::Archive => {
649            conn.execute(
650                "UPDATE memories SET memory_type = 'archived', updated_at = ?1 WHERE id = ?2",
651                params![
652                    Utc::now().format("%Y-%m-%dT%H:%M:%SZ").to_string(),
653                    candidate.existing_id
654                ],
655            )?;
656        }
657        UpdateAction::Flag => {
658            // Add 'needs-review' to the JSON tag array.
659            let updated_tags = add_tag_to_json(&tags_json, "needs-review");
660            conn.execute(
661                "UPDATE memories SET tags = ?1, updated_at = ?2 WHERE id = ?3",
662                params![
663                    updated_tags,
664                    Utc::now().format("%Y-%m-%dT%H:%M:%SZ").to_string(),
665                    candidate.existing_id
666                ],
667            )?;
668        }
669    }
670
671    Ok(UpdateResult {
672        memory_id: candidate.existing_id,
673        action_taken: action,
674        old_content_hash: old_hash,
675        new_content_hash: new_hash,
676    })
677}
678
679// =============================================================================
680// Internal helpers
681// =============================================================================
682
683/// Fetch (id, content, memory_type, tags) for recent memories in a workspace.
684fn fetch_workspace_memories(
685    conn: &Connection,
686    workspace: &str,
687) -> Result<Vec<(i64, String, String, Vec<String>)>> {
688    let mut stmt = conn.prepare(
689        "SELECT id, content, memory_type, tags
690         FROM memories
691         WHERE workspace = ?1
692         ORDER BY id DESC
693         LIMIT ?2",
694    )?;
695
696    let rows = stmt
697        .query_map(params![workspace, MAX_RECENT_MEMORIES], |row| {
698            let tags_raw: String = row.get::<_, String>(3).unwrap_or_else(|_| "[]".to_string());
699            let tags: Vec<String> = serde_json::from_str(&tags_raw).unwrap_or_default();
700            Ok((
701                row.get::<_, i64>(0)?,
702                row.get::<_, String>(1)?,
703                row.get::<_, String>(2)
704                    .unwrap_or_else(|_| "note".to_string()),
705                tags,
706            ))
707        })?
708        .collect::<std::result::Result<Vec<_>, _>>()?;
709
710    Ok(rows)
711}
712
713/// Extract meaningful keywords from lowercase text.
714///
715/// Splits on whitespace/punctuation, drops stop-words and short tokens.
716fn extract_keywords(text: &str) -> HashSet<String> {
717    const STOP_WORDS: &[&str] = &[
718        "a", "an", "the", "is", "are", "was", "were", "be", "been", "being", "have", "has", "had",
719        "do", "does", "did", "will", "would", "could", "should", "may", "might", "shall", "to",
720        "of", "in", "on", "at", "by", "for", "with", "from", "as", "it", "its", "this", "that",
721        "and", "or", "but", "not", "so", "if", "then", "than", "when", "i", "me", "my", "we",
722        "our", "you", "your", "he", "she", "they",
723    ];
724
725    text.split(|c: char| !c.is_alphanumeric())
726        .filter(|t| t.len() >= 3)
727        .filter(|t| !STOP_WORDS.contains(t))
728        .map(|t| t.to_string())
729        .collect()
730}
731
732/// Jaccard-style overlap: |A ∩ B| / |A ∪ B|.
733fn keyword_overlap(a: &HashSet<String>, b: &HashSet<String>) -> f32 {
734    if a.is_empty() || b.is_empty() {
735        return 0.0;
736    }
737    let intersection = a.intersection(b).count() as f32;
738    let union = (a.len() + b.len()) as f32 - intersection;
739    if union == 0.0 {
740        0.0
741    } else {
742        intersection / union
743    }
744}
745
746/// Count capitalised tokens shared between two lowercase texts.
747///
748/// We use a simple heuristic: tokens that start with an uppercase letter in the
749/// *original* (non-lowercased) text are likely named entities. Since we receive
750/// already-lowercased text here, we instead count tokens with length >= 4 that
751/// appear in both texts as a proxy for entity-like shared nouns.
752fn shared_entity_count(new_lower: &str, existing_lower: &str) -> usize {
753    let a = extract_keywords(new_lower);
754    let b = extract_keywords(existing_lower);
755    a.intersection(&b).filter(|t| t.len() >= 4).count()
756}
757
758/// Return `true` if the text contains a 4-digit year in [1900, 2099].
759fn contains_old_year(text: &str) -> bool {
760    let mut chars = text.chars().peekable();
761    while let Some(c) = chars.next() {
762        if c.is_ascii_digit() {
763            let mut num_str = String::with_capacity(4);
764            num_str.push(c);
765            for _ in 0..3 {
766                match chars.peek() {
767                    Some(d) if d.is_ascii_digit() => {
768                        num_str.push(*d);
769                        chars.next();
770                    }
771                    _ => break,
772                }
773            }
774            if num_str.len() == 4 {
775                if let Ok(year) = num_str.parse::<u32>() {
776                    if year >= YEAR_RANGE_START && year <= YEAR_RANGE_END {
777                        return true;
778                    }
779                }
780            }
781        }
782    }
783    false
784}
785
786/// Compute a SHA-256 hex digest of a string without pulling in a heavy dep.
787///
788/// We use a simple FNV-1a inspired hash here because the spec only asks for
789/// a "content hash" string — not cryptographic security. This keeps the module
790/// dependency-free.
791fn sha256_hex(content: &str) -> String {
792    // Use a deterministic 64-bit FNV-1a hash formatted as 16-char hex.
793    let mut hash: u64 = 14695981039346656037u64; // FNV offset basis
794    for byte in content.as_bytes() {
795        hash ^= *byte as u64;
796        hash = hash.wrapping_mul(1099511628211u64); // FNV prime
797    }
798    format!("{:016x}", hash)
799}
800
801/// Append a tag to a JSON array string (e.g., `["existing"]` → `["existing","needs-review"]`).
802fn add_tag_to_json(tags_json: &str, tag: &str) -> String {
803    let mut tags: Vec<String> = serde_json::from_str(tags_json).unwrap_or_default();
804    if !tags.iter().any(|t| t == tag) {
805        tags.push(tag.to_string());
806    }
807    serde_json::to_string(&tags).unwrap_or_else(|_| format!("[\"{}\"]", tag))
808}
809
810// =============================================================================
811// Tests
812// =============================================================================
813
814#[cfg(test)]
815mod tests {
816    use super::*;
817    use rusqlite::Connection;
818
819    // -------------------------------------------------------------------------
820    // Helpers
821    // -------------------------------------------------------------------------
822
823    /// Create an in-memory database with the minimal `memories` table schema
824    /// and the `update_log` table.
825    fn in_memory_conn() -> Connection {
826        let conn = Connection::open_in_memory().expect("open in-memory db");
827        conn.execute_batch(
828            "CREATE TABLE IF NOT EXISTS memories (
829                id           INTEGER PRIMARY KEY AUTOINCREMENT,
830                content      TEXT    NOT NULL,
831                memory_type  TEXT    NOT NULL DEFAULT 'note',
832                tags         TEXT    NOT NULL DEFAULT '[]',
833                workspace    TEXT    NOT NULL DEFAULT 'default',
834                created_at   TEXT    NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%SZ','now')),
835                updated_at   TEXT    NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%SZ','now'))
836            );",
837        )
838        .expect("create memories table");
839        conn.execute_batch(CREATE_UPDATE_LOG_TABLE)
840            .expect("create update_log table");
841        conn
842    }
843
844    fn insert_memory(conn: &Connection, content: &str, workspace: &str) -> i64 {
845        conn.execute(
846            "INSERT INTO memories (content, workspace) VALUES (?1, ?2)",
847            params![content, workspace],
848        )
849        .expect("insert memory");
850        conn.last_insert_rowid()
851    }
852
853    fn get_content(conn: &Connection, id: i64) -> String {
854        conn.query_row(
855            "SELECT content FROM memories WHERE id = ?1",
856            params![id],
857            |row| row.get(0),
858        )
859        .expect("get content")
860    }
861
862    fn get_memory_type(conn: &Connection, id: i64) -> String {
863        conn.query_row(
864            "SELECT memory_type FROM memories WHERE id = ?1",
865            params![id],
866            |row| row.get(0),
867        )
868        .expect("get memory_type")
869    }
870
871    fn get_tags(conn: &Connection, id: i64) -> Vec<String> {
872        let raw: String = conn
873            .query_row(
874                "SELECT tags FROM memories WHERE id = ?1",
875                params![id],
876                |row| row.get(0),
877            )
878            .expect("get tags");
879        serde_json::from_str(&raw).unwrap_or_default()
880    }
881
882    // -------------------------------------------------------------------------
883    // Detection tests — one per conflict type
884    // -------------------------------------------------------------------------
885
886    #[test]
887    fn test_detect_contradiction() {
888        let conn = in_memory_conn();
889        let _id = insert_memory(
890            &conn,
891            "Alice works at Anthropic as a senior engineer.",
892            "work",
893        );
894
895        let detector = UpdateDetector::new();
896        let candidates = detector
897            .detect_updates(&conn, "Alice no longer works at Anthropic.", "work")
898            .expect("detect_updates should succeed");
899
900        assert!(
901            !candidates.is_empty(),
902            "Expected at least one contradiction candidate"
903        );
904        let cand = candidates
905            .iter()
906            .find(|c| c.conflict_type == ConflictType::Contradiction);
907        assert!(
908            cand.is_some(),
909            "Expected a Contradiction candidate, got: {:?}",
910            candidates
911        );
912        assert!(
913            cand.unwrap().confidence >= MIN_CONFIDENCE,
914            "Confidence too low"
915        );
916    }
917
918    #[test]
919    fn test_detect_supplement() {
920        let conn = in_memory_conn();
921        let _id = insert_memory(
922            &conn,
923            "Alice works at Anthropic as a senior engineer.",
924            "work",
925        );
926
927        let detector = UpdateDetector::new();
928        let candidates = detector
929            .detect_updates(
930                &conn,
931                "Alice works at Anthropic and also leads the safety team.",
932                "work",
933            )
934            .expect("detect_updates should succeed");
935
936        let cand = candidates
937            .iter()
938            .find(|c| c.conflict_type == ConflictType::Supplement);
939        assert!(
940            cand.is_some(),
941            "Expected a Supplement candidate, got: {:?}",
942            candidates
943        );
944    }
945
946    #[test]
947    fn test_detect_correction() {
948        let conn = in_memory_conn();
949        let _id = insert_memory(
950            &conn,
951            "The project deadline is Friday the 20th.",
952            "schedule",
953        );
954
955        let detector = UpdateDetector::new();
956        let candidates = detector
957            .detect_updates(
958                &conn,
959                "Actually, the project deadline is Thursday the 19th.",
960                "schedule",
961            )
962            .expect("detect_updates should succeed");
963
964        let cand = candidates
965            .iter()
966            .find(|c| c.conflict_type == ConflictType::Correction);
967        assert!(
968            cand.is_some(),
969            "Expected a Correction candidate, got: {:?}",
970            candidates
971        );
972        assert_eq!(
973            cand.unwrap().suggested_action,
974            UpdateAction::Replace,
975            "Correction should suggest Replace"
976        );
977    }
978
979    #[test]
980    fn test_detect_obsolescence() {
981        let conn = in_memory_conn();
982        let _id = insert_memory(
983            &conn,
984            "In 2020, the team was using Python 3.6 for all services.",
985            "tech",
986        );
987
988        let detector = UpdateDetector::new();
989        let candidates = detector
990            .detect_updates(
991                &conn,
992                "The team is currently using Python 3.12 for all services.",
993                "tech",
994            )
995            .expect("detect_updates should succeed");
996
997        let cand = candidates
998            .iter()
999            .find(|c| c.conflict_type == ConflictType::Obsolescence);
1000        assert!(
1001            cand.is_some(),
1002            "Expected an Obsolescence candidate, got: {:?}",
1003            candidates
1004        );
1005        assert_eq!(
1006            cand.unwrap().suggested_action,
1007            UpdateAction::Archive,
1008            "Obsolescence should suggest Archive"
1009        );
1010    }
1011
1012    // -------------------------------------------------------------------------
1013    // Apply-action tests — one per UpdateAction variant
1014    // -------------------------------------------------------------------------
1015
1016    #[test]
1017    fn test_apply_replace() {
1018        let conn = in_memory_conn();
1019        let id = insert_memory(&conn, "Old content about the project.", "notes");
1020
1021        let candidate = UpdateCandidate {
1022            existing_id: id,
1023            conflict_type: ConflictType::Correction,
1024            confidence: 0.8,
1025            suggested_action: UpdateAction::Replace,
1026            reason: "test".to_string(),
1027        };
1028
1029        let result = apply_update(
1030            &conn,
1031            &candidate,
1032            UpdateAction::Replace,
1033            "New content about the project.",
1034        )
1035        .expect("apply_update should succeed");
1036
1037        assert_eq!(result.memory_id, id);
1038        assert_eq!(result.action_taken, UpdateAction::Replace);
1039        assert_ne!(result.old_content_hash, result.new_content_hash);
1040        assert_eq!(get_content(&conn, id), "New content about the project.");
1041    }
1042
1043    #[test]
1044    fn test_apply_merge() {
1045        let conn = in_memory_conn();
1046        let id = insert_memory(&conn, "Alice works at Anthropic.", "notes");
1047
1048        let candidate = UpdateCandidate {
1049            existing_id: id,
1050            conflict_type: ConflictType::Supplement,
1051            confidence: 0.6,
1052            suggested_action: UpdateAction::Merge,
1053            reason: "test".to_string(),
1054        };
1055
1056        let result = apply_update(
1057            &conn,
1058            &candidate,
1059            UpdateAction::Merge,
1060            "She leads the safety team.",
1061        )
1062        .expect("apply_update should succeed");
1063
1064        assert_eq!(result.action_taken, UpdateAction::Merge);
1065        let merged = get_content(&conn, id);
1066        assert!(
1067            merged.contains("Alice works at Anthropic."),
1068            "Merged content should retain old content"
1069        );
1070        assert!(
1071            merged.contains("She leads the safety team."),
1072            "Merged content should include new content"
1073        );
1074    }
1075
1076    #[test]
1077    fn test_apply_archive() {
1078        let conn = in_memory_conn();
1079        let id = insert_memory(&conn, "We use Python 3.6.", "tech");
1080
1081        let candidate = UpdateCandidate {
1082            existing_id: id,
1083            conflict_type: ConflictType::Obsolescence,
1084            confidence: 0.7,
1085            suggested_action: UpdateAction::Archive,
1086            reason: "test".to_string(),
1087        };
1088
1089        let result = apply_update(
1090            &conn,
1091            &candidate,
1092            UpdateAction::Archive,
1093            "We now use Python 3.12.",
1094        )
1095        .expect("apply_update should succeed");
1096
1097        assert_eq!(result.action_taken, UpdateAction::Archive);
1098        assert_eq!(get_memory_type(&conn, id), "archived");
1099    }
1100
1101    #[test]
1102    fn test_apply_flag() {
1103        let conn = in_memory_conn();
1104        let id = insert_memory(&conn, "The budget is $50k.", "finance");
1105
1106        let candidate = UpdateCandidate {
1107            existing_id: id,
1108            conflict_type: ConflictType::Contradiction,
1109            confidence: 0.65,
1110            suggested_action: UpdateAction::Flag,
1111            reason: "test".to_string(),
1112        };
1113
1114        let result = apply_update(
1115            &conn,
1116            &candidate,
1117            UpdateAction::Flag,
1118            "The budget is not $50k.",
1119        )
1120        .expect("apply_update should succeed");
1121
1122        assert_eq!(result.action_taken, UpdateAction::Flag);
1123        let tags = get_tags(&conn, id);
1124        assert!(
1125            tags.contains(&"needs-review".to_string()),
1126            "Tagged memory should contain 'needs-review'"
1127        );
1128    }
1129
1130    // -------------------------------------------------------------------------
1131    // Edge-case tests
1132    // -------------------------------------------------------------------------
1133
1134    #[test]
1135    fn test_no_conflict_when_unrelated() {
1136        let conn = in_memory_conn();
1137        // Insert a memory about cooking — completely unrelated to software.
1138        let _id = insert_memory(
1139            &conn,
1140            "The best way to make pasta is to boil water and add salt.",
1141            "kitchen",
1142        );
1143
1144        let detector = UpdateDetector::new();
1145        let candidates = detector
1146            .detect_updates(
1147                &conn,
1148                "Alice no longer works at Anthropic as an engineer.",
1149                "kitchen",
1150            )
1151            .expect("detect_updates should succeed");
1152
1153        // No significant overlap → no candidates above threshold.
1154        assert!(
1155            candidates.is_empty(),
1156            "Expected no candidates for unrelated content, got: {:?}",
1157            candidates
1158        );
1159    }
1160
1161    #[test]
1162    fn test_empty_workspace_returns_empty() {
1163        let conn = in_memory_conn();
1164        // No memories in "empty-ws".
1165        let detector = UpdateDetector::new();
1166        let candidates = detector
1167            .detect_updates(&conn, "Some new information.", "empty-ws")
1168            .expect("detect_updates should succeed");
1169
1170        assert!(
1171            candidates.is_empty(),
1172            "Empty workspace must return empty candidates"
1173        );
1174    }
1175
1176    // -------------------------------------------------------------------------
1177    // Log storage tests
1178    // -------------------------------------------------------------------------
1179
1180    #[test]
1181    fn test_create_and_list_update_log() {
1182        let conn = in_memory_conn();
1183        let id = insert_memory(&conn, "Original content.", "notes");
1184
1185        let candidate = UpdateCandidate {
1186            existing_id: id,
1187            conflict_type: ConflictType::Correction,
1188            confidence: 0.9,
1189            suggested_action: UpdateAction::Replace,
1190            reason: "explicit correction".to_string(),
1191        };
1192
1193        let result = apply_update(
1194            &conn,
1195            &candidate,
1196            UpdateAction::Replace,
1197            "Corrected content.",
1198        )
1199        .expect("apply_update should succeed");
1200
1201        let log_entry = create_update_log(&conn, &result, "explicit correction")
1202            .expect("create_update_log should succeed");
1203
1204        assert_eq!(log_entry.memory_id, id);
1205        assert_eq!(log_entry.action, UpdateAction::Replace);
1206        assert!(!log_entry.old_hash.is_empty());
1207        assert!(!log_entry.new_hash.is_empty());
1208        assert_ne!(log_entry.old_hash, log_entry.new_hash);
1209
1210        // list_update_logs filtered by memory_id
1211        let logs = list_update_logs(&conn, Some(id), 10).expect("list_update_logs should succeed");
1212        assert_eq!(logs.len(), 1);
1213        assert_eq!(logs[0].id, log_entry.id);
1214
1215        // list_update_logs unfiltered
1216        let all_logs = list_update_logs(&conn, None, 0).expect("list_update_logs should succeed");
1217        assert_eq!(all_logs.len(), 1);
1218    }
1219}