Skip to main content

spool/knowledge/
cluster.rs

1//! Knowledge consolidation: detect fragmented memories that can be merged,
2//! and detect stale/superseded records that should be archived.
3//!
4//! ## Consolidation
5//! Scans accepted/canonical records, clusters by entities/tags Jaccard
6//! similarity > 0.5, and suggests merging clusters of 3+ records.
7//!
8//! ## Pruning
9//! Detects records that should be archived:
10//! - Superseded: record A supersedes B, and A is accepted/canonical → B should be archived
11//! - Expired: `valid_until` date has passed
12//! - Stale: not referenced in 180+ days AND not constraint/preference type
13
14use crate::domain::{MemoryLifecycleState, MemoryRecord, MemoryScope};
15use crate::lifecycle_service::{LifecycleAction, LifecycleService};
16use crate::lifecycle_store::{
17    LedgerEntry, LifecycleStore, RecordMemoryRequest, TransitionMetadata, latest_state_entries,
18    lifecycle_root_from_config,
19};
20use crate::reference_tracker;
21use serde::Serialize;
22use std::collections::{BTreeSet, HashSet};
23use std::path::Path;
24use ts_rs::TS;
25
26// ─── Public types ────────────────────────────────────────────────────
27
28#[derive(Debug, Clone, Serialize)]
29pub struct ConsolidationSuggestion {
30    pub cluster_records: Vec<String>,
31    pub suggested_title: String,
32    pub shared_entities: Vec<String>,
33    pub shared_tags: Vec<String>,
34}
35
36#[derive(Debug, Clone, Serialize, TS)]
37#[ts(export, export_to = "../frontend/src/lib/types/generated/")]
38pub struct PruneSuggestion {
39    pub record_id: String,
40    pub title: String,
41    pub reason: PruneReason,
42}
43
44#[derive(Debug, Clone, Serialize, TS)]
45#[serde(tag = "kind", rename_all = "snake_case")]
46#[ts(export, export_to = "../frontend/src/lib/types/generated/")]
47pub enum PruneReason {
48    Superseded { by: String },
49    Expired { valid_until: String },
50    Stale { days_since_reference: u64 },
51}
52
53#[derive(Debug, Clone, Serialize)]
54pub struct ConsolidateApplyResult {
55    pub merged_record_id: String,
56    pub archived_record_ids: Vec<String>,
57}
58
59#[derive(Debug, Clone, Serialize)]
60pub struct PruneApplyResult {
61    pub archived_record_ids: Vec<String>,
62}
63
64// ─── Detection ───────────────────────────────────────────────────────
65
66/// Minimum Jaccard similarity for two records to be considered related.
67const SIMILARITY_THRESHOLD: f64 = 0.5;
68
69/// Minimum cluster size to suggest consolidation.
70const MIN_CLUSTER_SIZE: usize = 3;
71
72/// Days without reference before a record is considered stale.
73const STALENESS_DAYS: u64 = 180;
74
75/// Memory types exempt from staleness pruning.
76const STALENESS_EXEMPT_TYPES: &[&str] = &["constraint", "preference"];
77
78/// Detect clusters of related accepted/canonical records that could be merged.
79pub fn detect_consolidation_candidates(entries: &[LedgerEntry]) -> Vec<ConsolidationSuggestion> {
80    let active: Vec<&LedgerEntry> = entries
81        .iter()
82        .filter(|e| {
83            matches!(
84                e.record.state,
85                MemoryLifecycleState::Accepted | MemoryLifecycleState::Canonical
86            )
87        })
88        .collect();
89
90    if active.len() < MIN_CLUSTER_SIZE {
91        return Vec::new();
92    }
93
94    // Build adjacency: two records are "related" if Jaccard(entities+tags) > threshold
95    // and they share the same scope context (same project_id or both user-scope).
96    let n = active.len();
97    let mut adjacency: Vec<Vec<bool>> = vec![vec![false; n]; n];
98
99    for i in 0..n {
100        for j in (i + 1)..n {
101            if !scope_compatible(&active[i].record, &active[j].record) {
102                continue;
103            }
104            let sim = entity_tag_jaccard(&active[i].record, &active[j].record);
105            if sim > SIMILARITY_THRESHOLD {
106                adjacency[i][j] = true;
107                adjacency[j][i] = true;
108            }
109        }
110    }
111
112    // Greedy clustering: find connected components via BFS
113    let mut visited = vec![false; n];
114    let mut suggestions = Vec::new();
115
116    for start in 0..n {
117        if visited[start] {
118            continue;
119        }
120        let mut cluster = Vec::new();
121        let mut queue = vec![start];
122        while let Some(node) = queue.pop() {
123            if visited[node] {
124                continue;
125            }
126            visited[node] = true;
127            cluster.push(node);
128            for neighbor in 0..n {
129                if !visited[neighbor] && adjacency[node][neighbor] {
130                    queue.push(neighbor);
131                }
132            }
133        }
134
135        if cluster.len() >= MIN_CLUSTER_SIZE {
136            let records: Vec<&LedgerEntry> = cluster.iter().map(|&idx| active[idx]).collect();
137            suggestions.push(build_suggestion(&records));
138        }
139    }
140
141    suggestions
142}
143
144/// Detect records that should be archived (superseded, expired, or stale).
145pub fn detect_prune_candidates(
146    entries: &[LedgerEntry],
147    lifecycle_root: &Path,
148) -> Vec<PruneSuggestion> {
149    let active: Vec<&LedgerEntry> = entries
150        .iter()
151        .filter(|e| {
152            matches!(
153                e.record.state,
154                MemoryLifecycleState::Accepted | MemoryLifecycleState::Canonical
155            )
156        })
157        .collect();
158
159    let active_ids: HashSet<&str> = active.iter().map(|e| e.record_id.as_str()).collect();
160    let ref_map = reference_tracker::read(lifecycle_root);
161    let mut suggestions = Vec::new();
162    let mut already_suggested: HashSet<String> = HashSet::new();
163
164    // 1. Supersedes chain: if A supersedes B and A is active, B should be archived
165    for entry in &active {
166        if let Some(ref superseded_id) = entry.record.supersedes
167            && active_ids.contains(superseded_id.as_str())
168            && !already_suggested.contains(superseded_id)
169        {
170            let title = active
171                .iter()
172                .find(|e| e.record_id == *superseded_id)
173                .map(|e| e.record.title.clone())
174                .unwrap_or_default();
175            suggestions.push(PruneSuggestion {
176                record_id: superseded_id.clone(),
177                title,
178                reason: PruneReason::Superseded {
179                    by: entry.record_id.clone(),
180                },
181            });
182            already_suggested.insert(superseded_id.clone());
183        }
184    }
185
186    // 2. Expired: valid_until < now
187    let now_secs = std::time::SystemTime::now()
188        .duration_since(std::time::UNIX_EPOCH)
189        .unwrap_or_default()
190        .as_secs();
191
192    for entry in &active {
193        if already_suggested.contains(&entry.record_id) {
194            continue;
195        }
196        if let Some(ref valid_until) = entry.record.valid_until
197            && is_expired(valid_until, now_secs)
198        {
199            suggestions.push(PruneSuggestion {
200                record_id: entry.record_id.clone(),
201                title: entry.record.title.clone(),
202                reason: PruneReason::Expired {
203                    valid_until: valid_until.clone(),
204                },
205            });
206            already_suggested.insert(entry.record_id.clone());
207        }
208    }
209
210    // 3. Stale: not referenced in 180+ days, non-exempt type
211    for entry in &active {
212        if already_suggested.contains(&entry.record_id) {
213            continue;
214        }
215        if STALENESS_EXEMPT_TYPES.contains(&entry.record.memory_type.as_str()) {
216            continue;
217        }
218        let age = ref_map
219            .records
220            .get(&entry.record_id)
221            .and_then(reference_tracker::age_days);
222        if let Some(days) = age
223            && days >= STALENESS_DAYS
224        {
225            suggestions.push(PruneSuggestion {
226                record_id: entry.record_id.clone(),
227                title: entry.record.title.clone(),
228                reason: PruneReason::Stale {
229                    days_since_reference: days,
230                },
231            });
232        }
233    }
234
235    suggestions
236}
237
238// ─── Apply functions ─────────────────────────────────────────────────
239
240/// Execute a consolidation: create merged record + archive fragments.
241pub fn apply_consolidation(
242    config_path: &Path,
243    suggestion: &ConsolidationSuggestion,
244    entries: &[LedgerEntry],
245) -> anyhow::Result<ConsolidateApplyResult> {
246    let service = LifecycleService::new();
247
248    // Gather fragment records
249    let fragments: Vec<&LedgerEntry> = entries
250        .iter()
251        .filter(|e| suggestion.cluster_records.contains(&e.record_id))
252        .collect();
253
254    // Build merged summary
255    let summary = fragments
256        .iter()
257        .map(|e| e.record.summary.as_str())
258        .collect::<Vec<_>>()
259        .join("\n---\n");
260
261    // Union of entities/tags/triggers
262    let entities: Vec<String> = union_strings(fragments.iter().map(|e| &e.record.entities));
263    let tags: Vec<String> = union_strings(fragments.iter().map(|e| &e.record.tags));
264    let triggers: Vec<String> = union_strings(fragments.iter().map(|e| &e.record.triggers));
265
266    // Determine scope from first fragment
267    let scope = fragments
268        .first()
269        .map(|e| e.record.scope)
270        .unwrap_or(MemoryScope::User);
271    let project_id = fragments.first().and_then(|e| e.record.project_id.clone());
272    let user_id = fragments.first().and_then(|e| e.record.user_id.clone());
273    let memory_type = fragments
274        .first()
275        .map(|e| e.record.memory_type.clone())
276        .unwrap_or_else(|| "knowledge".to_string());
277
278    // Create merged record
279    let result = service.record_manual(
280        config_path,
281        RecordMemoryRequest {
282            title: suggestion.suggested_title.clone(),
283            summary,
284            memory_type,
285            scope,
286            source_ref: "consolidation:merge".to_string(),
287            project_id,
288            user_id,
289            sensitivity: None,
290            metadata: TransitionMetadata {
291                actor: Some("spool-consolidate".to_string()),
292                reason: Some("merged from fragmented records".to_string()),
293                evidence_refs: suggestion.cluster_records.clone(),
294            },
295            entities,
296            tags,
297            triggers,
298            related_files: union_strings(fragments.iter().map(|e| &e.record.related_files)),
299            related_records: suggestion.cluster_records.clone(),
300            supersedes: None,
301            applies_to: union_strings(fragments.iter().map(|e| &e.record.applies_to)),
302            valid_until: None,
303        },
304    )?;
305
306    let merged_id = result.entry.record_id.clone();
307
308    // Archive each fragment
309    let mut archived_ids = Vec::new();
310    for record_id in &suggestion.cluster_records {
311        service.apply_action_with_metadata(
312            config_path,
313            record_id,
314            LifecycleAction::Archive,
315            TransitionMetadata {
316                actor: Some("spool-consolidate".to_string()),
317                reason: Some(format!("consolidated into {merged_id}")),
318                evidence_refs: Vec::new(),
319            },
320        )?;
321        archived_ids.push(record_id.clone());
322    }
323
324    Ok(ConsolidateApplyResult {
325        merged_record_id: merged_id,
326        archived_record_ids: archived_ids,
327    })
328}
329
330/// Execute pruning: archive each prunable record.
331pub fn apply_prune(
332    config_path: &Path,
333    suggestions: &[PruneSuggestion],
334) -> anyhow::Result<PruneApplyResult> {
335    let service = LifecycleService::new();
336    let mut archived_ids = Vec::new();
337
338    for suggestion in suggestions {
339        let reason = match &suggestion.reason {
340            PruneReason::Superseded { by } => format!("superseded by {by}"),
341            PruneReason::Expired { valid_until } => format!("expired (valid_until: {valid_until})"),
342            PruneReason::Stale {
343                days_since_reference,
344            } => format!("stale ({days_since_reference} days without reference)"),
345        };
346        service.apply_action_with_metadata(
347            config_path,
348            &suggestion.record_id,
349            LifecycleAction::Archive,
350            TransitionMetadata {
351                actor: Some("spool-prune".to_string()),
352                reason: Some(reason),
353                evidence_refs: Vec::new(),
354            },
355        )?;
356        archived_ids.push(suggestion.record_id.clone());
357    }
358
359    Ok(PruneApplyResult {
360        archived_record_ids: archived_ids,
361    })
362}
363
364// ─── Helpers ─────────────────────────────────────────────────────────
365
366fn scope_compatible(a: &MemoryRecord, b: &MemoryRecord) -> bool {
367    match (&a.project_id, &b.project_id) {
368        (Some(pa), Some(pb)) => pa == pb,
369        (None, None) => a.scope == b.scope,
370        _ => false,
371    }
372}
373
374fn entity_tag_jaccard(a: &MemoryRecord, b: &MemoryRecord) -> f64 {
375    let set_a: BTreeSet<&str> = a
376        .entities
377        .iter()
378        .chain(a.tags.iter())
379        .map(String::as_str)
380        .collect();
381    let set_b: BTreeSet<&str> = b
382        .entities
383        .iter()
384        .chain(b.tags.iter())
385        .map(String::as_str)
386        .collect();
387
388    if set_a.is_empty() && set_b.is_empty() {
389        return 0.0;
390    }
391
392    let intersection = set_a.intersection(&set_b).count();
393    let union = set_a.union(&set_b).count();
394    if union == 0 {
395        return 0.0;
396    }
397    intersection as f64 / union as f64
398}
399
400fn build_suggestion(records: &[&LedgerEntry]) -> ConsolidationSuggestion {
401    let cluster_records: Vec<String> = records.iter().map(|e| e.record_id.clone()).collect();
402
403    // Find shared entities and tags
404    let all_entities: Vec<BTreeSet<&str>> = records
405        .iter()
406        .map(|e| e.record.entities.iter().map(String::as_str).collect())
407        .collect();
408    let all_tags: Vec<BTreeSet<&str>> = records
409        .iter()
410        .map(|e| e.record.tags.iter().map(String::as_str).collect())
411        .collect();
412
413    let shared_entities = intersect_all(&all_entities);
414    let shared_tags = intersect_all(&all_tags);
415
416    // Build suggested title from shared entities/tags
417    let suggested_title = if !shared_entities.is_empty() {
418        format!("Consolidated: {}", shared_entities.join(", "))
419    } else if !shared_tags.is_empty() {
420        format!("Consolidated: {}", shared_tags.join(", "))
421    } else {
422        format!("Consolidated ({} records)", records.len())
423    };
424
425    ConsolidationSuggestion {
426        cluster_records,
427        suggested_title,
428        shared_entities,
429        shared_tags,
430    }
431}
432
433fn intersect_all(sets: &[BTreeSet<&str>]) -> Vec<String> {
434    if sets.is_empty() {
435        return Vec::new();
436    }
437    let mut result: BTreeSet<&str> = sets[0].clone();
438    for set in &sets[1..] {
439        result = result.intersection(set).copied().collect();
440    }
441    result.into_iter().map(String::from).collect()
442}
443
444fn union_strings<'a>(iter: impl Iterator<Item = &'a Vec<String>>) -> Vec<String> {
445    let mut set: BTreeSet<String> = BTreeSet::new();
446    for vec in iter {
447        for item in vec {
448            set.insert(item.clone());
449        }
450    }
451    set.into_iter().collect()
452}
453
454/// Parse ISO 8601 date and check if it's before `now_secs`.
455fn is_expired(valid_until: &str, now_secs: u64) -> bool {
456    // Support "YYYY-MM-DD" and "YYYY-MM-DDTHH:MM:SSZ" formats
457    let date_str = if valid_until.len() >= 10 {
458        &valid_until[..10]
459    } else {
460        return false;
461    };
462
463    let parts: Vec<&str> = date_str.split('-').collect();
464    if parts.len() != 3 {
465        return false;
466    }
467    let year: u64 = match parts[0].parse() {
468        Ok(v) => v,
469        Err(_) => return false,
470    };
471    let month: u64 = match parts[1].parse() {
472        Ok(v) => v,
473        Err(_) => return false,
474    };
475    let day: u64 = match parts[2].parse() {
476        Ok(v) => v,
477        Err(_) => return false,
478    };
479
480    if !(1..=12).contains(&month) || !(1..=31).contains(&day) {
481        return false;
482    }
483
484    // Convert to approximate unix seconds (end of day)
485    let expiry_secs = match ymd_to_approx_secs(year, month, day) {
486        Some(s) => s + 86400, // end of the expiry day
487        None => return false,
488    };
489
490    now_secs > expiry_secs
491}
492
493/// Approximate year-month-day to unix seconds.
494fn ymd_to_approx_secs(year: u64, month: u64, day: u64) -> Option<u64> {
495    if year < 1970 {
496        return None;
497    }
498    // Use the same algorithm as reference_tracker
499    let y = if month <= 2 { year - 1 } else { year };
500    let m = if month <= 2 { month + 9 } else { month - 3 };
501    let era = y / 400;
502    let yoe = y - era * 400;
503    let doy = (153 * m + 2) / 5 + day - 1;
504    let doe = yoe * 365 + yoe / 4 - yoe / 100 + doy;
505    let days = era * 146097 + doe;
506    days.checked_sub(719468).map(|d| d * 86400)
507}
508
509/// Load entries from the lifecycle store for a given config path.
510pub fn load_entries(config_path: &Path) -> anyhow::Result<Vec<LedgerEntry>> {
511    let config_dir = config_path.parent().unwrap_or_else(|| Path::new("."));
512    let lifecycle_root = lifecycle_root_from_config(config_dir);
513    let store = LifecycleStore::new(lifecycle_root.as_path());
514    latest_state_entries(&store)
515}
516
517/// Resolve lifecycle root from config path.
518pub fn resolve_lifecycle_root(config_path: &Path) -> std::path::PathBuf {
519    let config_dir = config_path.parent().unwrap_or_else(|| Path::new("."));
520    lifecycle_root_from_config(config_dir)
521}
522
523// ─── Tests ───────────────────────────────────────────────────────────
524
525#[cfg(test)]
526mod tests {
527    use super::*;
528    use crate::domain::{
529        MemoryLedgerAction, MemoryLifecycleState, MemoryOrigin, MemoryRecord, MemoryScope,
530        MemorySourceKind,
531    };
532    use crate::lifecycle_store::{LedgerEntry, TransitionMetadata};
533
534    fn make_entry(
535        record_id: &str,
536        title: &str,
537        entities: Vec<&str>,
538        tags: Vec<&str>,
539        state: MemoryLifecycleState,
540    ) -> LedgerEntry {
541        LedgerEntry {
542            schema_version: "memory-ledger.v1".to_string(),
543            recorded_at: "2026-05-10T00:00:00Z".to_string(),
544            record_id: record_id.to_string(),
545            scope_key: "user:long".to_string(),
546            action: MemoryLedgerAction::RecordManual,
547            source_kind: MemorySourceKind::Manual,
548            metadata: TransitionMetadata::default(),
549            record: MemoryRecord {
550                title: title.to_string(),
551                summary: format!("Summary for {title}"),
552                memory_type: "workflow".to_string(),
553                scope: MemoryScope::User,
554                state,
555                origin: MemoryOrigin {
556                    source_kind: MemorySourceKind::Manual,
557                    source_ref: "test".to_string(),
558                },
559                project_id: None,
560                user_id: Some("long".to_string()),
561                sensitivity: None,
562                entities: entities.into_iter().map(String::from).collect(),
563                tags: tags.into_iter().map(String::from).collect(),
564                triggers: Vec::new(),
565                related_files: Vec::new(),
566                related_records: Vec::new(),
567                supersedes: None,
568                applies_to: Vec::new(),
569                valid_until: None,
570            },
571        }
572    }
573
574    #[test]
575    fn should_detect_cluster_of_related_records() {
576        let entries = vec![
577            make_entry(
578                "r1",
579                "Rust error handling",
580                vec!["rust", "error"],
581                vec!["coding"],
582                MemoryLifecycleState::Accepted,
583            ),
584            make_entry(
585                "r2",
586                "Rust error patterns",
587                vec!["rust", "error"],
588                vec!["coding", "patterns"],
589                MemoryLifecycleState::Accepted,
590            ),
591            make_entry(
592                "r3",
593                "Rust error recovery",
594                vec!["rust", "error"],
595                vec!["coding"],
596                MemoryLifecycleState::Canonical,
597            ),
598        ];
599
600        let suggestions = detect_consolidation_candidates(&entries);
601        assert_eq!(suggestions.len(), 1);
602        assert_eq!(suggestions[0].cluster_records.len(), 3);
603        assert!(suggestions[0].shared_entities.contains(&"rust".to_string()));
604        assert!(
605            suggestions[0]
606                .shared_entities
607                .contains(&"error".to_string())
608        );
609    }
610
611    #[test]
612    fn should_not_cluster_unrelated_records() {
613        let entries = vec![
614            make_entry(
615                "r1",
616                "Rust error handling",
617                vec!["rust", "error"],
618                vec!["coding"],
619                MemoryLifecycleState::Accepted,
620            ),
621            make_entry(
622                "r2",
623                "Python testing",
624                vec!["python", "testing"],
625                vec!["qa"],
626                MemoryLifecycleState::Accepted,
627            ),
628            make_entry(
629                "r3",
630                "Go concurrency",
631                vec!["go", "concurrency"],
632                vec!["performance"],
633                MemoryLifecycleState::Accepted,
634            ),
635        ];
636
637        let suggestions = detect_consolidation_candidates(&entries);
638        assert!(suggestions.is_empty());
639    }
640
641    #[test]
642    fn should_skip_archived_records_in_clustering() {
643        let entries = vec![
644            make_entry(
645                "r1",
646                "Rust error handling",
647                vec!["rust", "error"],
648                vec!["coding"],
649                MemoryLifecycleState::Accepted,
650            ),
651            make_entry(
652                "r2",
653                "Rust error patterns",
654                vec!["rust", "error"],
655                vec!["coding"],
656                MemoryLifecycleState::Archived,
657            ),
658            make_entry(
659                "r3",
660                "Rust error recovery",
661                vec!["rust", "error"],
662                vec!["coding"],
663                MemoryLifecycleState::Accepted,
664            ),
665        ];
666
667        let suggestions = detect_consolidation_candidates(&entries);
668        // Only 2 active records, below MIN_CLUSTER_SIZE
669        assert!(suggestions.is_empty());
670    }
671
672    #[test]
673    fn should_detect_superseded_records_for_pruning() {
674        let mut entries = vec![
675            make_entry(
676                "old-1",
677                "Old approach",
678                vec!["rust"],
679                vec![],
680                MemoryLifecycleState::Accepted,
681            ),
682            make_entry(
683                "new-1",
684                "New approach",
685                vec!["rust"],
686                vec![],
687                MemoryLifecycleState::Accepted,
688            ),
689        ];
690        entries[1].record.supersedes = Some("old-1".to_string());
691
692        let temp = tempfile::tempdir().unwrap();
693        let suggestions = detect_prune_candidates(&entries, temp.path());
694        assert_eq!(suggestions.len(), 1);
695        assert_eq!(suggestions[0].record_id, "old-1");
696        assert!(matches!(
697            suggestions[0].reason,
698            PruneReason::Superseded { .. }
699        ));
700    }
701
702    #[test]
703    fn should_detect_expired_records_for_pruning() {
704        let mut entries = vec![make_entry(
705            "exp-1",
706            "Temporary rule",
707            vec!["temp"],
708            vec![],
709            MemoryLifecycleState::Accepted,
710        )];
711        entries[0].record.valid_until = Some("2020-01-01".to_string());
712
713        let temp = tempfile::tempdir().unwrap();
714        let suggestions = detect_prune_candidates(&entries, temp.path());
715        assert_eq!(suggestions.len(), 1);
716        assert_eq!(suggestions[0].record_id, "exp-1");
717        assert!(matches!(suggestions[0].reason, PruneReason::Expired { .. }));
718    }
719
720    #[test]
721    fn should_detect_stale_records_for_pruning() {
722        let entries = vec![make_entry(
723            "stale-1",
724            "Old workflow",
725            vec!["workflow"],
726            vec![],
727            MemoryLifecycleState::Accepted,
728        )];
729
730        let temp = tempfile::tempdir().unwrap();
731        // Write a reference tracker with an old timestamp
732        let now_secs = std::time::SystemTime::now()
733            .duration_since(std::time::UNIX_EPOCH)
734            .unwrap()
735            .as_secs();
736        let old_secs = now_secs - 200 * 86400; // 200 days ago
737        let old_ts = crate::reference_tracker::tests::unix_secs_to_iso8601_for_test(old_secs);
738        let map = reference_tracker::ReferenceMap {
739            schema_version: "reference-tracker.v1".to_string(),
740            records: std::collections::BTreeMap::from([(
741                "stale-1".to_string(),
742                reference_tracker::ReferenceEntry {
743                    last_referenced_at: old_ts,
744                    count: 1,
745                },
746            )]),
747        };
748        std::fs::write(
749            temp.path().join("reference-tracker.json"),
750            serde_json::to_string_pretty(&map).unwrap(),
751        )
752        .unwrap();
753
754        let suggestions = detect_prune_candidates(&entries, temp.path());
755        assert_eq!(suggestions.len(), 1);
756        assert_eq!(suggestions[0].record_id, "stale-1");
757        assert!(matches!(suggestions[0].reason, PruneReason::Stale { .. }));
758    }
759
760    #[test]
761    fn should_not_prune_preference_type_for_staleness() {
762        let mut entries = vec![make_entry(
763            "pref-1",
764            "My preference",
765            vec!["style"],
766            vec![],
767            MemoryLifecycleState::Accepted,
768        )];
769        entries[0].record.memory_type = "preference".to_string();
770
771        let temp = tempfile::tempdir().unwrap();
772        let now_secs = std::time::SystemTime::now()
773            .duration_since(std::time::UNIX_EPOCH)
774            .unwrap()
775            .as_secs();
776        let old_secs = now_secs - 200 * 86400;
777        let old_ts = crate::reference_tracker::tests::unix_secs_to_iso8601_for_test(old_secs);
778        let map = reference_tracker::ReferenceMap {
779            schema_version: "reference-tracker.v1".to_string(),
780            records: std::collections::BTreeMap::from([(
781                "pref-1".to_string(),
782                reference_tracker::ReferenceEntry {
783                    last_referenced_at: old_ts,
784                    count: 1,
785                },
786            )]),
787        };
788        std::fs::write(
789            temp.path().join("reference-tracker.json"),
790            serde_json::to_string_pretty(&map).unwrap(),
791        )
792        .unwrap();
793
794        let suggestions = detect_prune_candidates(&entries, temp.path());
795        assert!(suggestions.is_empty());
796    }
797
798    #[test]
799    fn is_expired_should_handle_various_formats() {
800        let now = 1_800_000_000; // ~2027
801        assert!(is_expired("2026-01-01", now));
802        assert!(!is_expired("2028-01-01", now));
803        assert!(is_expired("2025-12-31T23:59:59Z", now));
804        assert!(!is_expired("invalid", now));
805        assert!(!is_expired("", now));
806    }
807
808    #[test]
809    fn entity_tag_jaccard_should_compute_correctly() {
810        let a = MemoryRecord::new_manual("a", "a", "workflow", MemoryScope::User, "test");
811        let mut b = MemoryRecord::new_manual("b", "b", "workflow", MemoryScope::User, "test");
812
813        // Both empty → 0.0
814        assert_eq!(entity_tag_jaccard(&a, &b), 0.0);
815
816        // Identical sets
817        let mut a2 = a.clone();
818        a2.entities = vec!["rust".to_string(), "error".to_string()];
819        b.entities = vec!["rust".to_string(), "error".to_string()];
820        assert_eq!(entity_tag_jaccard(&a2, &b), 1.0);
821
822        // Partial overlap
823        b.entities = vec!["rust".to_string(), "testing".to_string()];
824        let sim = entity_tag_jaccard(&a2, &b);
825        // intersection=1 (rust), union=3 (rust, error, testing) → 1/3
826        assert!((sim - 1.0 / 3.0).abs() < 0.01);
827    }
828}