spool-memory 0.1.1

Local-first developer memory system — persistent, structured knowledge for AI coding tools
Documentation
//! Heuristic contradiction detection for lifecycle memories.
//!
//! Identifies when a new memory's summary conflicts with existing
//! accepted/canonical memories of the same type. Detection is
//! conservative — better to miss a contradiction than false-positive.

use crate::domain::note::tokenize;
use crate::domain::{MemoryLifecycleState, MemoryRecord};
use serde::Serialize;
use std::collections::BTreeSet;

#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
pub struct ContradictionHit {
    pub existing_record_id: String,
    pub existing_title: String,
    pub signal: ContradictionSignal,
}

#[derive(Debug, Clone, Copy, Serialize, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum ContradictionSignal {
    Negation,
    Replacement,
}

/// Minimum Jaccard similarity threshold for two summaries to be
/// considered topically related enough to check for contradiction.
const OVERLAP_THRESHOLD: f64 = 0.3;

/// Minimum token count in either set before overlap is meaningful.
const MIN_TOKENS: usize = 2;

/// Chinese negation markers.
const ZH_NEGATION: &[&str] = &[
    "", "", "", "", "停止", "取消", "禁止", "不要", "不再",
];

/// English negation markers.
const EN_NEGATION: &[&str] = &[
    "not",
    "don't",
    "never",
    "stop",
    "cancel",
    "disable",
    "remove",
    "no longer",
];

/// Chinese replacement markers.
const ZH_REPLACEMENT: &[&str] = &["替代", "改用", "换成", "替换", "而不是", "弃用"];

/// English replacement markers.
const EN_REPLACEMENT: &[&str] = &[
    "instead of",
    "replace",
    "switch to",
    "migrate to",
    "move from",
    "rather than",
];

/// Detect contradictions between a new memory's summary and existing records.
/// Only checks records with the same memory_type and active states (Accepted/Canonical).
/// Returns empty vec when no contradictions found.
pub fn detect(
    new_summary: &str,
    new_memory_type: &str,
    existing: &[(String, MemoryRecord)],
) -> Vec<ContradictionHit> {
    let new_tokens = tokenize(new_summary);
    if new_tokens.len() < MIN_TOKENS {
        return Vec::new();
    }
    let new_lower = new_summary.to_lowercase();

    let mut hits = Vec::new();

    for (record_id, record) in existing {
        if record.memory_type != new_memory_type {
            continue;
        }
        if !matches!(
            record.state,
            MemoryLifecycleState::Accepted | MemoryLifecycleState::Canonical
        ) {
            continue;
        }

        let existing_tokens = tokenize(&record.summary);
        if existing_tokens.len() < MIN_TOKENS {
            continue;
        }

        let similarity = jaccard(&new_tokens, &existing_tokens);
        if similarity < OVERLAP_THRESHOLD {
            continue;
        }

        let has_replacement = contains_any_marker(&new_lower, ZH_REPLACEMENT)
            || contains_any_marker(&new_lower, EN_REPLACEMENT);
        let has_negation = contains_any_marker(&new_lower, ZH_NEGATION)
            || contains_any_marker(&new_lower, EN_NEGATION);

        // Prefer Replacement over Negation when both match.
        let signal = if has_replacement {
            Some(ContradictionSignal::Replacement)
        } else if has_negation {
            Some(ContradictionSignal::Negation)
        } else {
            None
        };

        if let Some(signal) = signal {
            hits.push(ContradictionHit {
                existing_record_id: record_id.clone(),
                existing_title: record.title.clone(),
                signal,
            });
        }
    }

    hits
}

fn jaccard(a: &BTreeSet<String>, b: &BTreeSet<String>) -> f64 {
    let intersection = a.intersection(b).count();
    let union = a.union(b).count();
    if union == 0 {
        return 0.0;
    }
    intersection as f64 / union as f64
}

fn contains_any_marker(text: &str, markers: &[&str]) -> bool {
    markers.iter().any(|marker| text.contains(marker))
}

#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
pub struct DedupSuggestion {
    pub record_id_a: String,
    pub record_id_b: String,
    pub title_a: String,
    pub title_b: String,
    pub similarity: u32,
}

pub fn find_duplicates(records: &[(String, MemoryRecord)], threshold: f64) -> Vec<DedupSuggestion> {
    let mut suggestions = Vec::new();
    let active: Vec<_> = records
        .iter()
        .filter(|(_, r)| {
            matches!(
                r.state,
                MemoryLifecycleState::Accepted | MemoryLifecycleState::Canonical
            )
        })
        .collect();

    for i in 0..active.len() {
        let tokens_a = tokenize(&active[i].1.summary);
        if tokens_a.len() < MIN_TOKENS {
            continue;
        }
        for j in (i + 1)..active.len() {
            if active[i].1.memory_type != active[j].1.memory_type {
                continue;
            }
            let tokens_b = tokenize(&active[j].1.summary);
            if tokens_b.len() < MIN_TOKENS {
                continue;
            }
            let sim = jaccard(&tokens_a, &tokens_b);
            if sim >= threshold {
                suggestions.push(DedupSuggestion {
                    record_id_a: active[i].0.clone(),
                    record_id_b: active[j].0.clone(),
                    title_a: active[i].1.title.clone(),
                    title_b: active[j].1.title.clone(),
                    similarity: (sim * 100.0) as u32,
                });
            }
        }
    }
    suggestions.sort_by_key(|s| std::cmp::Reverse(s.similarity));
    suggestions
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::domain::{MemoryLifecycleState, MemoryPromotionAction, MemoryRecord, MemoryScope};

    fn record(
        title: &str,
        summary: &str,
        memory_type: &str,
        state: MemoryLifecycleState,
    ) -> MemoryRecord {
        let base = MemoryRecord::new_manual(title, summary, memory_type, MemoryScope::User, "test");
        match state {
            MemoryLifecycleState::Accepted => base,
            MemoryLifecycleState::Canonical => {
                base.apply(MemoryPromotionAction::PromoteToCanonical)
            }
            MemoryLifecycleState::Candidate => MemoryRecord::new_ai_proposal(
                title,
                summary,
                memory_type,
                MemoryScope::User,
                "test",
            ),
            MemoryLifecycleState::Archived => base.apply(MemoryPromotionAction::Archive),
            MemoryLifecycleState::Draft => {
                let mut r = MemoryRecord::new_ai_proposal(
                    title,
                    summary,
                    memory_type,
                    MemoryScope::User,
                    "test",
                );
                r.state = MemoryLifecycleState::Draft;
                r
            }
        }
    }

    fn existing_list(items: Vec<(&str, MemoryRecord)>) -> Vec<(String, MemoryRecord)> {
        items
            .into_iter()
            .map(|(id, r)| (id.to_string(), r))
            .collect()
    }

    #[test]
    fn detect_finds_negation_same_type() {
        let existing = existing_list(vec![(
            "rec-1",
            record(
                "用 cargo install",
                "用 cargo install 安装 binary 到 ~/.cargo/bin",
                "preference",
                MemoryLifecycleState::Accepted,
            ),
        )]);

        let hits = detect("不用 cargo install 安装 binary", "preference", &existing);
        assert_eq!(hits.len(), 1);
        assert_eq!(hits[0].existing_record_id, "rec-1");
        assert_eq!(hits[0].signal, ContradictionSignal::Negation);
    }

    #[test]
    fn detect_finds_replacement() {
        let existing = existing_list(vec![(
            "rec-2",
            record(
                "用 React",
                "前端框架用 React 构建 UI 组件",
                "preference",
                MemoryLifecycleState::Accepted,
            ),
        )]);

        let hits = detect("改用 Vue 替代 React 构建 UI 组件", "preference", &existing);
        assert_eq!(hits.len(), 1);
        assert_eq!(hits[0].existing_record_id, "rec-2");
        assert_eq!(hits[0].signal, ContradictionSignal::Replacement);
    }

    #[test]
    fn detect_skips_different_type() {
        let existing = existing_list(vec![(
            "rec-3",
            record(
                "用 cargo install",
                "用 cargo install 安装 binary 到 ~/.cargo/bin",
                "workflow",
                MemoryLifecycleState::Accepted,
            ),
        )]);

        let hits = detect("不用 cargo install 安装 binary", "preference", &existing);
        assert!(hits.is_empty());
    }

    #[test]
    fn detect_skips_low_overlap() {
        let existing = existing_list(vec![(
            "rec-4",
            record(
                "用 cargo install",
                "用 cargo install 安装 binary 到 ~/.cargo/bin",
                "preference",
                MemoryLifecycleState::Accepted,
            ),
        )]);

        let hits = detect("不要在周末加班写代码", "preference", &existing);
        assert!(hits.is_empty());
    }

    #[test]
    fn detect_skips_archived() {
        let existing = existing_list(vec![(
            "rec-5",
            record(
                "用 cargo install",
                "用 cargo install 安装 binary 到 ~/.cargo/bin",
                "preference",
                MemoryLifecycleState::Archived,
            ),
        )]);

        let hits = detect("不用 cargo install 安装 binary", "preference", &existing);
        assert!(hits.is_empty());
    }

    #[test]
    fn detect_skips_candidate_state() {
        let existing = existing_list(vec![(
            "rec-6",
            record(
                "用 cargo install",
                "用 cargo install 安装 binary 到 ~/.cargo/bin",
                "preference",
                MemoryLifecycleState::Candidate,
            ),
        )]);

        let hits = detect("不用 cargo install 安装 binary", "preference", &existing);
        assert!(hits.is_empty());
    }

    #[test]
    fn detect_handles_empty_existing() {
        let hits = detect("不用 cargo install", "preference", &[]);
        assert!(hits.is_empty());
    }

    #[test]
    fn detect_english_negation() {
        let existing = existing_list(vec![(
            "rec-7",
            record(
                "Use JWT",
                "use JWT tokens for API authentication",
                "preference",
                MemoryLifecycleState::Accepted,
            ),
        )]);

        let hits = detect(
            "don't use JWT tokens for API authentication, use sessions",
            "preference",
            &existing,
        );
        assert_eq!(hits.len(), 1);
        assert_eq!(hits[0].existing_record_id, "rec-7");
        assert_eq!(hits[0].signal, ContradictionSignal::Negation);
    }

    #[test]
    fn detect_english_replacement() {
        let existing = existing_list(vec![(
            "rec-8",
            record(
                "Deploy to AWS",
                "deploy all services to AWS infrastructure",
                "preference",
                MemoryLifecycleState::Canonical,
            ),
        )]);

        let hits = detect(
            "migrate to GCP instead of AWS for all services infrastructure",
            "preference",
            &existing,
        );
        assert_eq!(hits.len(), 1);
        assert_eq!(hits[0].existing_record_id, "rec-8");
        assert_eq!(hits[0].signal, ContradictionSignal::Replacement);
    }
}