Skip to main content

spool/
contradiction.rs

1//! Heuristic contradiction detection for lifecycle memories.
2//!
3//! Identifies when a new memory's summary conflicts with existing
4//! accepted/canonical memories of the same type. Detection is
5//! conservative — better to miss a contradiction than false-positive.
6
7use crate::domain::note::tokenize;
8use crate::domain::{MemoryLifecycleState, MemoryRecord};
9use serde::Serialize;
10use std::collections::BTreeSet;
11
12#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
13pub struct ContradictionHit {
14    pub existing_record_id: String,
15    pub existing_title: String,
16    pub signal: ContradictionSignal,
17}
18
19#[derive(Debug, Clone, Copy, Serialize, PartialEq, Eq)]
20#[serde(rename_all = "snake_case")]
21pub enum ContradictionSignal {
22    Negation,
23    Replacement,
24}
25
26/// Minimum Jaccard similarity threshold for two summaries to be
27/// considered topically related enough to check for contradiction.
28const OVERLAP_THRESHOLD: f64 = 0.3;
29
30/// Minimum token count in either set before overlap is meaningful.
31const MIN_TOKENS: usize = 2;
32
33/// Chinese negation markers.
34const ZH_NEGATION: &[&str] = &[
35    "不", "没", "别", "勿", "停止", "取消", "禁止", "不要", "不再",
36];
37
38/// English negation markers.
39const EN_NEGATION: &[&str] = &[
40    "not",
41    "don't",
42    "never",
43    "stop",
44    "cancel",
45    "disable",
46    "remove",
47    "no longer",
48];
49
50/// Chinese replacement markers.
51const ZH_REPLACEMENT: &[&str] = &["替代", "改用", "换成", "替换", "而不是", "弃用"];
52
53/// English replacement markers.
54const EN_REPLACEMENT: &[&str] = &[
55    "instead of",
56    "replace",
57    "switch to",
58    "migrate to",
59    "move from",
60    "rather than",
61];
62
63/// Detect contradictions between a new memory's summary and existing records.
64/// Only checks records with the same memory_type and active states (Accepted/Canonical).
65/// Returns empty vec when no contradictions found.
66pub fn detect(
67    new_summary: &str,
68    new_memory_type: &str,
69    existing: &[(String, MemoryRecord)],
70) -> Vec<ContradictionHit> {
71    let new_tokens = tokenize(new_summary);
72    if new_tokens.len() < MIN_TOKENS {
73        return Vec::new();
74    }
75    let new_lower = new_summary.to_lowercase();
76
77    let mut hits = Vec::new();
78
79    for (record_id, record) in existing {
80        if record.memory_type != new_memory_type {
81            continue;
82        }
83        if !matches!(
84            record.state,
85            MemoryLifecycleState::Accepted | MemoryLifecycleState::Canonical
86        ) {
87            continue;
88        }
89
90        let existing_tokens = tokenize(&record.summary);
91        if existing_tokens.len() < MIN_TOKENS {
92            continue;
93        }
94
95        let similarity = jaccard(&new_tokens, &existing_tokens);
96        if similarity < OVERLAP_THRESHOLD {
97            continue;
98        }
99
100        let has_replacement = contains_any_marker(&new_lower, ZH_REPLACEMENT)
101            || contains_any_marker(&new_lower, EN_REPLACEMENT);
102        let has_negation = contains_any_marker(&new_lower, ZH_NEGATION)
103            || contains_any_marker(&new_lower, EN_NEGATION);
104
105        // Prefer Replacement over Negation when both match.
106        let signal = if has_replacement {
107            Some(ContradictionSignal::Replacement)
108        } else if has_negation {
109            Some(ContradictionSignal::Negation)
110        } else {
111            None
112        };
113
114        if let Some(signal) = signal {
115            hits.push(ContradictionHit {
116                existing_record_id: record_id.clone(),
117                existing_title: record.title.clone(),
118                signal,
119            });
120        }
121    }
122
123    hits
124}
125
126fn jaccard(a: &BTreeSet<String>, b: &BTreeSet<String>) -> f64 {
127    let intersection = a.intersection(b).count();
128    let union = a.union(b).count();
129    if union == 0 {
130        return 0.0;
131    }
132    intersection as f64 / union as f64
133}
134
135fn contains_any_marker(text: &str, markers: &[&str]) -> bool {
136    markers.iter().any(|marker| text.contains(marker))
137}
138
139#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
140pub struct DedupSuggestion {
141    pub record_id_a: String,
142    pub record_id_b: String,
143    pub title_a: String,
144    pub title_b: String,
145    pub similarity: u32,
146}
147
148pub fn find_duplicates(records: &[(String, MemoryRecord)], threshold: f64) -> Vec<DedupSuggestion> {
149    let mut suggestions = Vec::new();
150    let active: Vec<_> = records
151        .iter()
152        .filter(|(_, r)| {
153            matches!(
154                r.state,
155                MemoryLifecycleState::Accepted | MemoryLifecycleState::Canonical
156            )
157        })
158        .collect();
159
160    for i in 0..active.len() {
161        let tokens_a = tokenize(&active[i].1.summary);
162        if tokens_a.len() < MIN_TOKENS {
163            continue;
164        }
165        for j in (i + 1)..active.len() {
166            if active[i].1.memory_type != active[j].1.memory_type {
167                continue;
168            }
169            let tokens_b = tokenize(&active[j].1.summary);
170            if tokens_b.len() < MIN_TOKENS {
171                continue;
172            }
173            let sim = jaccard(&tokens_a, &tokens_b);
174            if sim >= threshold {
175                suggestions.push(DedupSuggestion {
176                    record_id_a: active[i].0.clone(),
177                    record_id_b: active[j].0.clone(),
178                    title_a: active[i].1.title.clone(),
179                    title_b: active[j].1.title.clone(),
180                    similarity: (sim * 100.0) as u32,
181                });
182            }
183        }
184    }
185    suggestions.sort_by_key(|s| std::cmp::Reverse(s.similarity));
186    suggestions
187}
188
189#[cfg(test)]
190mod tests {
191    use super::*;
192    use crate::domain::{MemoryLifecycleState, MemoryPromotionAction, MemoryRecord, MemoryScope};
193
194    fn record(
195        title: &str,
196        summary: &str,
197        memory_type: &str,
198        state: MemoryLifecycleState,
199    ) -> MemoryRecord {
200        let base = MemoryRecord::new_manual(title, summary, memory_type, MemoryScope::User, "test");
201        match state {
202            MemoryLifecycleState::Accepted => base,
203            MemoryLifecycleState::Canonical => {
204                base.apply(MemoryPromotionAction::PromoteToCanonical)
205            }
206            MemoryLifecycleState::Candidate => MemoryRecord::new_ai_proposal(
207                title,
208                summary,
209                memory_type,
210                MemoryScope::User,
211                "test",
212            ),
213            MemoryLifecycleState::Archived => base.apply(MemoryPromotionAction::Archive),
214            MemoryLifecycleState::Draft => {
215                let mut r = MemoryRecord::new_ai_proposal(
216                    title,
217                    summary,
218                    memory_type,
219                    MemoryScope::User,
220                    "test",
221                );
222                r.state = MemoryLifecycleState::Draft;
223                r
224            }
225        }
226    }
227
228    fn existing_list(items: Vec<(&str, MemoryRecord)>) -> Vec<(String, MemoryRecord)> {
229        items
230            .into_iter()
231            .map(|(id, r)| (id.to_string(), r))
232            .collect()
233    }
234
235    #[test]
236    fn detect_finds_negation_same_type() {
237        let existing = existing_list(vec![(
238            "rec-1",
239            record(
240                "用 cargo install",
241                "用 cargo install 安装 binary 到 ~/.cargo/bin",
242                "preference",
243                MemoryLifecycleState::Accepted,
244            ),
245        )]);
246
247        let hits = detect("不用 cargo install 安装 binary", "preference", &existing);
248        assert_eq!(hits.len(), 1);
249        assert_eq!(hits[0].existing_record_id, "rec-1");
250        assert_eq!(hits[0].signal, ContradictionSignal::Negation);
251    }
252
253    #[test]
254    fn detect_finds_replacement() {
255        let existing = existing_list(vec![(
256            "rec-2",
257            record(
258                "用 React",
259                "前端框架用 React 构建 UI 组件",
260                "preference",
261                MemoryLifecycleState::Accepted,
262            ),
263        )]);
264
265        let hits = detect("改用 Vue 替代 React 构建 UI 组件", "preference", &existing);
266        assert_eq!(hits.len(), 1);
267        assert_eq!(hits[0].existing_record_id, "rec-2");
268        assert_eq!(hits[0].signal, ContradictionSignal::Replacement);
269    }
270
271    #[test]
272    fn detect_skips_different_type() {
273        let existing = existing_list(vec![(
274            "rec-3",
275            record(
276                "用 cargo install",
277                "用 cargo install 安装 binary 到 ~/.cargo/bin",
278                "workflow",
279                MemoryLifecycleState::Accepted,
280            ),
281        )]);
282
283        let hits = detect("不用 cargo install 安装 binary", "preference", &existing);
284        assert!(hits.is_empty());
285    }
286
287    #[test]
288    fn detect_skips_low_overlap() {
289        let existing = existing_list(vec![(
290            "rec-4",
291            record(
292                "用 cargo install",
293                "用 cargo install 安装 binary 到 ~/.cargo/bin",
294                "preference",
295                MemoryLifecycleState::Accepted,
296            ),
297        )]);
298
299        let hits = detect("不要在周末加班写代码", "preference", &existing);
300        assert!(hits.is_empty());
301    }
302
303    #[test]
304    fn detect_skips_archived() {
305        let existing = existing_list(vec![(
306            "rec-5",
307            record(
308                "用 cargo install",
309                "用 cargo install 安装 binary 到 ~/.cargo/bin",
310                "preference",
311                MemoryLifecycleState::Archived,
312            ),
313        )]);
314
315        let hits = detect("不用 cargo install 安装 binary", "preference", &existing);
316        assert!(hits.is_empty());
317    }
318
319    #[test]
320    fn detect_skips_candidate_state() {
321        let existing = existing_list(vec![(
322            "rec-6",
323            record(
324                "用 cargo install",
325                "用 cargo install 安装 binary 到 ~/.cargo/bin",
326                "preference",
327                MemoryLifecycleState::Candidate,
328            ),
329        )]);
330
331        let hits = detect("不用 cargo install 安装 binary", "preference", &existing);
332        assert!(hits.is_empty());
333    }
334
335    #[test]
336    fn detect_handles_empty_existing() {
337        let hits = detect("不用 cargo install", "preference", &[]);
338        assert!(hits.is_empty());
339    }
340
341    #[test]
342    fn detect_english_negation() {
343        let existing = existing_list(vec![(
344            "rec-7",
345            record(
346                "Use JWT",
347                "use JWT tokens for API authentication",
348                "preference",
349                MemoryLifecycleState::Accepted,
350            ),
351        )]);
352
353        let hits = detect(
354            "don't use JWT tokens for API authentication, use sessions",
355            "preference",
356            &existing,
357        );
358        assert_eq!(hits.len(), 1);
359        assert_eq!(hits[0].existing_record_id, "rec-7");
360        assert_eq!(hits[0].signal, ContradictionSignal::Negation);
361    }
362
363    #[test]
364    fn detect_english_replacement() {
365        let existing = existing_list(vec![(
366            "rec-8",
367            record(
368                "Deploy to AWS",
369                "deploy all services to AWS infrastructure",
370                "preference",
371                MemoryLifecycleState::Canonical,
372            ),
373        )]);
374
375        let hits = detect(
376            "migrate to GCP instead of AWS for all services infrastructure",
377            "preference",
378            &existing,
379        );
380        assert_eq!(hits.len(), 1);
381        assert_eq!(hits[0].existing_record_id, "rec-8");
382        assert_eq!(hits[0].signal, ContradictionSignal::Replacement);
383    }
384}