use crate::domain::{MemoryLifecycleState, MemoryRecord, MemoryScope};
use crate::lifecycle_service::{LifecycleAction, LifecycleService};
use crate::lifecycle_store::{
LedgerEntry, LifecycleStore, RecordMemoryRequest, TransitionMetadata, latest_state_entries,
lifecycle_root_from_config,
};
use crate::reference_tracker;
use serde::Serialize;
use std::collections::{BTreeSet, HashSet};
use std::path::Path;
use ts_rs::TS;
#[derive(Debug, Clone, Serialize)]
pub struct ConsolidationSuggestion {
pub cluster_records: Vec<String>,
pub suggested_title: String,
pub shared_entities: Vec<String>,
pub shared_tags: Vec<String>,
}
#[derive(Debug, Clone, Serialize, TS)]
#[ts(export, export_to = "../frontend/src/lib/types/generated/")]
pub struct PruneSuggestion {
pub record_id: String,
pub title: String,
pub reason: PruneReason,
}
#[derive(Debug, Clone, Serialize, TS)]
#[serde(tag = "kind", rename_all = "snake_case")]
#[ts(export, export_to = "../frontend/src/lib/types/generated/")]
pub enum PruneReason {
Superseded { by: String },
Expired { valid_until: String },
Stale { days_since_reference: u64 },
}
#[derive(Debug, Clone, Serialize)]
pub struct ConsolidateApplyResult {
pub merged_record_id: String,
pub archived_record_ids: Vec<String>,
}
#[derive(Debug, Clone, Serialize)]
pub struct PruneApplyResult {
pub archived_record_ids: Vec<String>,
}
const SIMILARITY_THRESHOLD: f64 = 0.5;
const MIN_CLUSTER_SIZE: usize = 3;
const STALENESS_DAYS: u64 = 180;
const STALENESS_EXEMPT_TYPES: &[&str] = &["constraint", "preference"];
pub fn detect_consolidation_candidates(entries: &[LedgerEntry]) -> Vec<ConsolidationSuggestion> {
let active: Vec<&LedgerEntry> = entries
.iter()
.filter(|e| {
matches!(
e.record.state,
MemoryLifecycleState::Accepted | MemoryLifecycleState::Canonical
)
})
.collect();
if active.len() < MIN_CLUSTER_SIZE {
return Vec::new();
}
let n = active.len();
let mut adjacency: Vec<Vec<bool>> = vec![vec![false; n]; n];
for i in 0..n {
for j in (i + 1)..n {
if !scope_compatible(&active[i].record, &active[j].record) {
continue;
}
let sim = entity_tag_jaccard(&active[i].record, &active[j].record);
if sim > SIMILARITY_THRESHOLD {
adjacency[i][j] = true;
adjacency[j][i] = true;
}
}
}
let mut visited = vec![false; n];
let mut suggestions = Vec::new();
for start in 0..n {
if visited[start] {
continue;
}
let mut cluster = Vec::new();
let mut queue = vec![start];
while let Some(node) = queue.pop() {
if visited[node] {
continue;
}
visited[node] = true;
cluster.push(node);
for neighbor in 0..n {
if !visited[neighbor] && adjacency[node][neighbor] {
queue.push(neighbor);
}
}
}
if cluster.len() >= MIN_CLUSTER_SIZE {
let records: Vec<&LedgerEntry> = cluster.iter().map(|&idx| active[idx]).collect();
suggestions.push(build_suggestion(&records));
}
}
suggestions
}
pub fn detect_prune_candidates(
entries: &[LedgerEntry],
lifecycle_root: &Path,
) -> Vec<PruneSuggestion> {
let active: Vec<&LedgerEntry> = entries
.iter()
.filter(|e| {
matches!(
e.record.state,
MemoryLifecycleState::Accepted | MemoryLifecycleState::Canonical
)
})
.collect();
let active_ids: HashSet<&str> = active.iter().map(|e| e.record_id.as_str()).collect();
let ref_map = reference_tracker::read(lifecycle_root);
let mut suggestions = Vec::new();
let mut already_suggested: HashSet<String> = HashSet::new();
for entry in &active {
if let Some(ref superseded_id) = entry.record.supersedes
&& active_ids.contains(superseded_id.as_str())
&& !already_suggested.contains(superseded_id)
{
let title = active
.iter()
.find(|e| e.record_id == *superseded_id)
.map(|e| e.record.title.clone())
.unwrap_or_default();
suggestions.push(PruneSuggestion {
record_id: superseded_id.clone(),
title,
reason: PruneReason::Superseded {
by: entry.record_id.clone(),
},
});
already_suggested.insert(superseded_id.clone());
}
}
let now_secs = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.as_secs();
for entry in &active {
if already_suggested.contains(&entry.record_id) {
continue;
}
if let Some(ref valid_until) = entry.record.valid_until
&& is_expired(valid_until, now_secs)
{
suggestions.push(PruneSuggestion {
record_id: entry.record_id.clone(),
title: entry.record.title.clone(),
reason: PruneReason::Expired {
valid_until: valid_until.clone(),
},
});
already_suggested.insert(entry.record_id.clone());
}
}
for entry in &active {
if already_suggested.contains(&entry.record_id) {
continue;
}
if STALENESS_EXEMPT_TYPES.contains(&entry.record.memory_type.as_str()) {
continue;
}
let age = ref_map
.records
.get(&entry.record_id)
.and_then(reference_tracker::age_days);
if let Some(days) = age
&& days >= STALENESS_DAYS
{
suggestions.push(PruneSuggestion {
record_id: entry.record_id.clone(),
title: entry.record.title.clone(),
reason: PruneReason::Stale {
days_since_reference: days,
},
});
}
}
suggestions
}
pub fn apply_consolidation(
config_path: &Path,
suggestion: &ConsolidationSuggestion,
entries: &[LedgerEntry],
) -> anyhow::Result<ConsolidateApplyResult> {
let service = LifecycleService::new();
let fragments: Vec<&LedgerEntry> = entries
.iter()
.filter(|e| suggestion.cluster_records.contains(&e.record_id))
.collect();
let summary = fragments
.iter()
.map(|e| e.record.summary.as_str())
.collect::<Vec<_>>()
.join("\n---\n");
let entities: Vec<String> = union_strings(fragments.iter().map(|e| &e.record.entities));
let tags: Vec<String> = union_strings(fragments.iter().map(|e| &e.record.tags));
let triggers: Vec<String> = union_strings(fragments.iter().map(|e| &e.record.triggers));
let scope = fragments
.first()
.map(|e| e.record.scope)
.unwrap_or(MemoryScope::User);
let project_id = fragments.first().and_then(|e| e.record.project_id.clone());
let user_id = fragments.first().and_then(|e| e.record.user_id.clone());
let memory_type = fragments
.first()
.map(|e| e.record.memory_type.clone())
.unwrap_or_else(|| "knowledge".to_string());
let result = service.record_manual(
config_path,
RecordMemoryRequest {
title: suggestion.suggested_title.clone(),
summary,
memory_type,
scope,
source_ref: "consolidation:merge".to_string(),
project_id,
user_id,
sensitivity: None,
metadata: TransitionMetadata {
actor: Some("spool-consolidate".to_string()),
reason: Some("merged from fragmented records".to_string()),
evidence_refs: suggestion.cluster_records.clone(),
},
entities,
tags,
triggers,
related_files: union_strings(fragments.iter().map(|e| &e.record.related_files)),
related_records: suggestion.cluster_records.clone(),
supersedes: None,
applies_to: union_strings(fragments.iter().map(|e| &e.record.applies_to)),
valid_until: None,
},
)?;
let merged_id = result.entry.record_id.clone();
let mut archived_ids = Vec::new();
for record_id in &suggestion.cluster_records {
service.apply_action_with_metadata(
config_path,
record_id,
LifecycleAction::Archive,
TransitionMetadata {
actor: Some("spool-consolidate".to_string()),
reason: Some(format!("consolidated into {merged_id}")),
evidence_refs: Vec::new(),
},
)?;
archived_ids.push(record_id.clone());
}
Ok(ConsolidateApplyResult {
merged_record_id: merged_id,
archived_record_ids: archived_ids,
})
}
pub fn apply_prune(
config_path: &Path,
suggestions: &[PruneSuggestion],
) -> anyhow::Result<PruneApplyResult> {
let service = LifecycleService::new();
let mut archived_ids = Vec::new();
for suggestion in suggestions {
let reason = match &suggestion.reason {
PruneReason::Superseded { by } => format!("superseded by {by}"),
PruneReason::Expired { valid_until } => format!("expired (valid_until: {valid_until})"),
PruneReason::Stale {
days_since_reference,
} => format!("stale ({days_since_reference} days without reference)"),
};
service.apply_action_with_metadata(
config_path,
&suggestion.record_id,
LifecycleAction::Archive,
TransitionMetadata {
actor: Some("spool-prune".to_string()),
reason: Some(reason),
evidence_refs: Vec::new(),
},
)?;
archived_ids.push(suggestion.record_id.clone());
}
Ok(PruneApplyResult {
archived_record_ids: archived_ids,
})
}
fn scope_compatible(a: &MemoryRecord, b: &MemoryRecord) -> bool {
match (&a.project_id, &b.project_id) {
(Some(pa), Some(pb)) => pa == pb,
(None, None) => a.scope == b.scope,
_ => false,
}
}
fn entity_tag_jaccard(a: &MemoryRecord, b: &MemoryRecord) -> f64 {
let set_a: BTreeSet<&str> = a
.entities
.iter()
.chain(a.tags.iter())
.map(String::as_str)
.collect();
let set_b: BTreeSet<&str> = b
.entities
.iter()
.chain(b.tags.iter())
.map(String::as_str)
.collect();
if set_a.is_empty() && set_b.is_empty() {
return 0.0;
}
let intersection = set_a.intersection(&set_b).count();
let union = set_a.union(&set_b).count();
if union == 0 {
return 0.0;
}
intersection as f64 / union as f64
}
fn build_suggestion(records: &[&LedgerEntry]) -> ConsolidationSuggestion {
let cluster_records: Vec<String> = records.iter().map(|e| e.record_id.clone()).collect();
let all_entities: Vec<BTreeSet<&str>> = records
.iter()
.map(|e| e.record.entities.iter().map(String::as_str).collect())
.collect();
let all_tags: Vec<BTreeSet<&str>> = records
.iter()
.map(|e| e.record.tags.iter().map(String::as_str).collect())
.collect();
let shared_entities = intersect_all(&all_entities);
let shared_tags = intersect_all(&all_tags);
let suggested_title = if !shared_entities.is_empty() {
format!("Consolidated: {}", shared_entities.join(", "))
} else if !shared_tags.is_empty() {
format!("Consolidated: {}", shared_tags.join(", "))
} else {
format!("Consolidated ({} records)", records.len())
};
ConsolidationSuggestion {
cluster_records,
suggested_title,
shared_entities,
shared_tags,
}
}
fn intersect_all(sets: &[BTreeSet<&str>]) -> Vec<String> {
if sets.is_empty() {
return Vec::new();
}
let mut result: BTreeSet<&str> = sets[0].clone();
for set in &sets[1..] {
result = result.intersection(set).copied().collect();
}
result.into_iter().map(String::from).collect()
}
fn union_strings<'a>(iter: impl Iterator<Item = &'a Vec<String>>) -> Vec<String> {
let mut set: BTreeSet<String> = BTreeSet::new();
for vec in iter {
for item in vec {
set.insert(item.clone());
}
}
set.into_iter().collect()
}
fn is_expired(valid_until: &str, now_secs: u64) -> bool {
let date_str = if valid_until.len() >= 10 {
&valid_until[..10]
} else {
return false;
};
let parts: Vec<&str> = date_str.split('-').collect();
if parts.len() != 3 {
return false;
}
let year: u64 = match parts[0].parse() {
Ok(v) => v,
Err(_) => return false,
};
let month: u64 = match parts[1].parse() {
Ok(v) => v,
Err(_) => return false,
};
let day: u64 = match parts[2].parse() {
Ok(v) => v,
Err(_) => return false,
};
if !(1..=12).contains(&month) || !(1..=31).contains(&day) {
return false;
}
let expiry_secs = match ymd_to_approx_secs(year, month, day) {
Some(s) => s + 86400, None => return false,
};
now_secs > expiry_secs
}
fn ymd_to_approx_secs(year: u64, month: u64, day: u64) -> Option<u64> {
if year < 1970 {
return None;
}
let y = if month <= 2 { year - 1 } else { year };
let m = if month <= 2 { month + 9 } else { month - 3 };
let era = y / 400;
let yoe = y - era * 400;
let doy = (153 * m + 2) / 5 + day - 1;
let doe = yoe * 365 + yoe / 4 - yoe / 100 + doy;
let days = era * 146097 + doe;
days.checked_sub(719468).map(|d| d * 86400)
}
pub fn load_entries(config_path: &Path) -> anyhow::Result<Vec<LedgerEntry>> {
let config_dir = config_path.parent().unwrap_or_else(|| Path::new("."));
let lifecycle_root = lifecycle_root_from_config(config_dir);
let store = LifecycleStore::new(lifecycle_root.as_path());
latest_state_entries(&store)
}
pub fn resolve_lifecycle_root(config_path: &Path) -> std::path::PathBuf {
let config_dir = config_path.parent().unwrap_or_else(|| Path::new("."));
lifecycle_root_from_config(config_dir)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::domain::{
MemoryLedgerAction, MemoryLifecycleState, MemoryOrigin, MemoryRecord, MemoryScope,
MemorySourceKind,
};
use crate::lifecycle_store::{LedgerEntry, TransitionMetadata};
fn make_entry(
record_id: &str,
title: &str,
entities: Vec<&str>,
tags: Vec<&str>,
state: MemoryLifecycleState,
) -> LedgerEntry {
LedgerEntry {
schema_version: "memory-ledger.v1".to_string(),
recorded_at: "2026-05-10T00:00:00Z".to_string(),
record_id: record_id.to_string(),
scope_key: "user:long".to_string(),
action: MemoryLedgerAction::RecordManual,
source_kind: MemorySourceKind::Manual,
metadata: TransitionMetadata::default(),
record: MemoryRecord {
title: title.to_string(),
summary: format!("Summary for {title}"),
memory_type: "workflow".to_string(),
scope: MemoryScope::User,
state,
origin: MemoryOrigin {
source_kind: MemorySourceKind::Manual,
source_ref: "test".to_string(),
},
project_id: None,
user_id: Some("long".to_string()),
sensitivity: None,
entities: entities.into_iter().map(String::from).collect(),
tags: tags.into_iter().map(String::from).collect(),
triggers: Vec::new(),
related_files: Vec::new(),
related_records: Vec::new(),
supersedes: None,
applies_to: Vec::new(),
valid_until: None,
},
}
}
#[test]
fn should_detect_cluster_of_related_records() {
let entries = vec![
make_entry(
"r1",
"Rust error handling",
vec!["rust", "error"],
vec!["coding"],
MemoryLifecycleState::Accepted,
),
make_entry(
"r2",
"Rust error patterns",
vec!["rust", "error"],
vec!["coding", "patterns"],
MemoryLifecycleState::Accepted,
),
make_entry(
"r3",
"Rust error recovery",
vec!["rust", "error"],
vec!["coding"],
MemoryLifecycleState::Canonical,
),
];
let suggestions = detect_consolidation_candidates(&entries);
assert_eq!(suggestions.len(), 1);
assert_eq!(suggestions[0].cluster_records.len(), 3);
assert!(suggestions[0].shared_entities.contains(&"rust".to_string()));
assert!(
suggestions[0]
.shared_entities
.contains(&"error".to_string())
);
}
#[test]
fn should_not_cluster_unrelated_records() {
let entries = vec![
make_entry(
"r1",
"Rust error handling",
vec!["rust", "error"],
vec!["coding"],
MemoryLifecycleState::Accepted,
),
make_entry(
"r2",
"Python testing",
vec!["python", "testing"],
vec!["qa"],
MemoryLifecycleState::Accepted,
),
make_entry(
"r3",
"Go concurrency",
vec!["go", "concurrency"],
vec!["performance"],
MemoryLifecycleState::Accepted,
),
];
let suggestions = detect_consolidation_candidates(&entries);
assert!(suggestions.is_empty());
}
#[test]
fn should_skip_archived_records_in_clustering() {
let entries = vec![
make_entry(
"r1",
"Rust error handling",
vec!["rust", "error"],
vec!["coding"],
MemoryLifecycleState::Accepted,
),
make_entry(
"r2",
"Rust error patterns",
vec!["rust", "error"],
vec!["coding"],
MemoryLifecycleState::Archived,
),
make_entry(
"r3",
"Rust error recovery",
vec!["rust", "error"],
vec!["coding"],
MemoryLifecycleState::Accepted,
),
];
let suggestions = detect_consolidation_candidates(&entries);
assert!(suggestions.is_empty());
}
#[test]
fn should_detect_superseded_records_for_pruning() {
let mut entries = vec![
make_entry(
"old-1",
"Old approach",
vec!["rust"],
vec![],
MemoryLifecycleState::Accepted,
),
make_entry(
"new-1",
"New approach",
vec!["rust"],
vec![],
MemoryLifecycleState::Accepted,
),
];
entries[1].record.supersedes = Some("old-1".to_string());
let temp = tempfile::tempdir().unwrap();
let suggestions = detect_prune_candidates(&entries, temp.path());
assert_eq!(suggestions.len(), 1);
assert_eq!(suggestions[0].record_id, "old-1");
assert!(matches!(
suggestions[0].reason,
PruneReason::Superseded { .. }
));
}
#[test]
fn should_detect_expired_records_for_pruning() {
let mut entries = vec![make_entry(
"exp-1",
"Temporary rule",
vec!["temp"],
vec![],
MemoryLifecycleState::Accepted,
)];
entries[0].record.valid_until = Some("2020-01-01".to_string());
let temp = tempfile::tempdir().unwrap();
let suggestions = detect_prune_candidates(&entries, temp.path());
assert_eq!(suggestions.len(), 1);
assert_eq!(suggestions[0].record_id, "exp-1");
assert!(matches!(suggestions[0].reason, PruneReason::Expired { .. }));
}
#[test]
fn should_detect_stale_records_for_pruning() {
let entries = vec![make_entry(
"stale-1",
"Old workflow",
vec!["workflow"],
vec![],
MemoryLifecycleState::Accepted,
)];
let temp = tempfile::tempdir().unwrap();
let now_secs = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_secs();
let old_secs = now_secs - 200 * 86400; let old_ts = crate::reference_tracker::tests::unix_secs_to_iso8601_for_test(old_secs);
let map = reference_tracker::ReferenceMap {
schema_version: "reference-tracker.v1".to_string(),
records: std::collections::BTreeMap::from([(
"stale-1".to_string(),
reference_tracker::ReferenceEntry {
last_referenced_at: old_ts,
count: 1,
},
)]),
};
std::fs::write(
temp.path().join("reference-tracker.json"),
serde_json::to_string_pretty(&map).unwrap(),
)
.unwrap();
let suggestions = detect_prune_candidates(&entries, temp.path());
assert_eq!(suggestions.len(), 1);
assert_eq!(suggestions[0].record_id, "stale-1");
assert!(matches!(suggestions[0].reason, PruneReason::Stale { .. }));
}
#[test]
fn should_not_prune_preference_type_for_staleness() {
let mut entries = vec![make_entry(
"pref-1",
"My preference",
vec!["style"],
vec![],
MemoryLifecycleState::Accepted,
)];
entries[0].record.memory_type = "preference".to_string();
let temp = tempfile::tempdir().unwrap();
let now_secs = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_secs();
let old_secs = now_secs - 200 * 86400;
let old_ts = crate::reference_tracker::tests::unix_secs_to_iso8601_for_test(old_secs);
let map = reference_tracker::ReferenceMap {
schema_version: "reference-tracker.v1".to_string(),
records: std::collections::BTreeMap::from([(
"pref-1".to_string(),
reference_tracker::ReferenceEntry {
last_referenced_at: old_ts,
count: 1,
},
)]),
};
std::fs::write(
temp.path().join("reference-tracker.json"),
serde_json::to_string_pretty(&map).unwrap(),
)
.unwrap();
let suggestions = detect_prune_candidates(&entries, temp.path());
assert!(suggestions.is_empty());
}
#[test]
fn is_expired_should_handle_various_formats() {
let now = 1_800_000_000; assert!(is_expired("2026-01-01", now));
assert!(!is_expired("2028-01-01", now));
assert!(is_expired("2025-12-31T23:59:59Z", now));
assert!(!is_expired("invalid", now));
assert!(!is_expired("", now));
}
#[test]
fn entity_tag_jaccard_should_compute_correctly() {
let a = MemoryRecord::new_manual("a", "a", "workflow", MemoryScope::User, "test");
let mut b = MemoryRecord::new_manual("b", "b", "workflow", MemoryScope::User, "test");
assert_eq!(entity_tag_jaccard(&a, &b), 0.0);
let mut a2 = a.clone();
a2.entities = vec!["rust".to_string(), "error".to_string()];
b.entities = vec!["rust".to_string(), "error".to_string()];
assert_eq!(entity_tag_jaccard(&a2, &b), 1.0);
b.entities = vec!["rust".to_string(), "testing".to_string()];
let sim = entity_tag_jaccard(&a2, &b);
assert!((sim - 1.0 / 3.0).abs() < 0.01);
}
}