use crate::domain::MemoryLifecycleState;
use crate::knowledge::cluster::{self as consolidation, PruneSuggestion};
use crate::lifecycle_store::{LedgerEntry, LifecycleStore, latest_state_entries};
use crate::vault_writer::{MEMORY_LEDGER_COMPILED_DIR, MEMORY_LEDGER_DIR};
use anyhow::{Context, Result};
use serde::Serialize;
use std::collections::HashSet;
use std::fs;
use std::path::Path;
use ts_rs::TS;
#[derive(Debug, Clone, Serialize, TS)]
#[ts(export, export_to = "../frontend/src/lib/types/generated/")]
pub struct BrokenCrossRef {
pub record_id: String,
pub title: String,
pub missing_target: String,
#[ts(type = "string")]
pub field: &'static str,
}
#[derive(Debug, Clone, Serialize, TS)]
#[ts(export, export_to = "../frontend/src/lib/types/generated/")]
pub struct OrphanNote {
pub relative_path: String,
pub record_id: String,
}
#[derive(Debug, Clone, Serialize, TS)]
#[ts(export, export_to = "../frontend/src/lib/types/generated/")]
pub struct LintReport {
pub total_active_records: usize,
pub prune_suggestions: Vec<PruneSuggestion>,
pub broken_cross_refs: Vec<BrokenCrossRef>,
pub orphan_notes: Vec<OrphanNote>,
}
impl LintReport {
pub fn is_clean(&self) -> bool {
self.prune_suggestions.is_empty()
&& self.broken_cross_refs.is_empty()
&& self.orphan_notes.is_empty()
}
pub fn summary_line(&self) -> String {
format!(
"lint: {} active records, {} prune, {} broken cross-refs, {} orphan notes",
self.total_active_records,
self.prune_suggestions.len(),
self.broken_cross_refs.len(),
self.orphan_notes.len(),
)
}
}
pub fn run_lint_from_config(config_path: &Path) -> Result<LintReport> {
let config = crate::app::load(config_path)
.with_context(|| format!("failed to load config {}", config_path.display()))?;
let vault_root = crate::app::resolve_override_path(&config.vault.root, config_path)
.context("failed to resolve vault root")?;
let config_dir = config_path.parent().unwrap_or_else(|| Path::new("."));
let lifecycle_root = crate::lifecycle_store::lifecycle_root_from_config(config_dir);
let store = LifecycleStore::new(&lifecycle_root);
let entries = latest_state_entries(&store).context("failed to read ledger entries")?;
Ok(run_lint(&entries, &lifecycle_root, &vault_root))
}
pub fn run_lint(entries: &[LedgerEntry], lifecycle_root: &Path, vault_root: &Path) -> LintReport {
let total_active_records = entries
.iter()
.filter(|e| {
matches!(
e.record.state,
MemoryLifecycleState::Accepted | MemoryLifecycleState::Canonical
)
})
.count();
let prune_suggestions = consolidation::detect_prune_candidates(entries, lifecycle_root);
let broken_cross_refs = detect_broken_cross_refs(entries);
let orphan_notes = detect_orphan_notes(entries, vault_root).unwrap_or_default();
LintReport {
total_active_records,
prune_suggestions,
broken_cross_refs,
orphan_notes,
}
}
fn detect_broken_cross_refs(entries: &[LedgerEntry]) -> Vec<BrokenCrossRef> {
let known_ids: HashSet<&str> = entries.iter().map(|e| e.record_id.as_str()).collect();
let mut broken = Vec::new();
for entry in entries {
if !matches!(
entry.record.state,
MemoryLifecycleState::Accepted | MemoryLifecycleState::Canonical
) {
continue;
}
for target in &entry.record.related_records {
if !known_ids.contains(target.as_str()) {
broken.push(BrokenCrossRef {
record_id: entry.record_id.clone(),
title: entry.record.title.clone(),
missing_target: target.clone(),
field: "related_records",
});
}
}
if let Some(ref target) = entry.record.supersedes
&& !known_ids.contains(target.as_str())
{
broken.push(BrokenCrossRef {
record_id: entry.record_id.clone(),
title: entry.record.title.clone(),
missing_target: target.clone(),
field: "supersedes",
});
}
}
broken
}
fn detect_orphan_notes(entries: &[LedgerEntry], vault_root: &Path) -> Result<Vec<OrphanNote>> {
let known_ids: HashSet<&str> = entries.iter().map(|e| e.record_id.as_str()).collect();
let mut orphans = Vec::new();
for rel_dir in [MEMORY_LEDGER_DIR, MEMORY_LEDGER_COMPILED_DIR] {
let dir = vault_root.join(rel_dir);
if !dir.is_dir() {
continue;
}
let reader = match fs::read_dir(&dir) {
Ok(r) => r,
Err(_) => continue,
};
for entry in reader.flatten() {
let path = entry.path();
if path.extension().and_then(|s| s.to_str()) != Some("md") {
continue;
}
let record_id = match path.file_stem().and_then(|s| s.to_str()) {
Some(s) => s.to_string(),
None => continue,
};
if known_ids.contains(record_id.as_str()) {
continue;
}
let rel_path = relative_path(vault_root, &path);
orphans.push(OrphanNote {
relative_path: rel_path,
record_id,
});
}
}
Ok(orphans)
}
fn relative_path(base: &Path, absolute: &Path) -> String {
absolute
.strip_prefix(base)
.map(|p| p.display().to_string())
.unwrap_or_else(|_| absolute.display().to_string())
.replace(std::path::MAIN_SEPARATOR, "/")
}
pub fn render_lint_markdown(report: &LintReport) -> String {
let mut out = String::new();
out.push_str("# Wiki Lint Report\n\n");
out.push_str(&format!("{}\n\n", report.summary_line()));
if report.is_clean() {
out.push_str("✓ 知识库干净,无需清理。\n");
return out;
}
if !report.prune_suggestions.is_empty() {
out.push_str("## 可归档\n\n");
for s in &report.prune_suggestions {
out.push_str(&format!(
"- `{}` {} — {:?}\n",
s.record_id, s.title, s.reason
));
}
out.push('\n');
}
if !report.broken_cross_refs.is_empty() {
out.push_str("## 断链\n\n");
for b in &report.broken_cross_refs {
out.push_str(&format!(
"- `{}` {} → `{}` 缺失 (字段 {})\n",
b.record_id, b.title, b.missing_target, b.field
));
}
out.push('\n');
}
if !report.orphan_notes.is_empty() {
out.push_str("## 孤儿 note\n\n");
for o in &report.orphan_notes {
out.push_str(&format!(
"- `{}` (record_id `{}`)\n",
o.relative_path, o.record_id
));
}
out.push('\n');
}
out
}
#[cfg(test)]
mod tests {
use super::*;
use crate::domain::{
MemoryLedgerAction, MemoryLifecycleState, MemoryOrigin, MemoryRecord, MemoryScope,
MemorySourceKind,
};
use crate::lifecycle_store::TransitionMetadata;
use std::fs;
use tempfile::tempdir;
fn entry_with(record_id: &str, state: MemoryLifecycleState) -> LedgerEntry {
LedgerEntry {
schema_version: "memory-ledger.v1".to_string(),
recorded_at: "unix:1".to_string(),
record_id: record_id.to_string(),
scope_key: "user:long".to_string(),
action: MemoryLedgerAction::RecordManual,
source_kind: MemorySourceKind::Manual,
metadata: TransitionMetadata::default(),
record: MemoryRecord {
title: format!("title-{record_id}"),
summary: "s".to_string(),
memory_type: "preference".to_string(),
scope: MemoryScope::User,
state,
origin: MemoryOrigin {
source_kind: MemorySourceKind::Manual,
source_ref: "m".to_string(),
},
project_id: None,
user_id: None,
sensitivity: None,
entities: Vec::new(),
tags: Vec::new(),
triggers: Vec::new(),
related_files: Vec::new(),
related_records: Vec::new(),
supersedes: None,
applies_to: Vec::new(),
valid_until: None,
},
}
}
#[test]
fn detect_broken_cross_refs_should_flag_missing_related_records_and_supersedes() {
let mut a = entry_with("rec-a", MemoryLifecycleState::Accepted);
a.record.related_records = vec!["rec-b".to_string(), "rec-missing".to_string()];
a.record.supersedes = Some("rec-also-missing".to_string());
let b = entry_with("rec-b", MemoryLifecycleState::Accepted);
let mut c = entry_with("rec-c", MemoryLifecycleState::Candidate);
c.record.related_records = vec!["rec-never".to_string()];
let entries = vec![a, b, c];
let broken = detect_broken_cross_refs(&entries);
let missing: HashSet<_> = broken.iter().map(|b| b.missing_target.clone()).collect();
assert!(missing.contains("rec-missing"));
assert!(missing.contains("rec-also-missing"));
assert!(!missing.contains("rec-never"));
assert!(!missing.contains("rec-b"));
}
#[test]
fn detect_orphan_notes_should_find_md_files_without_matching_record() {
let temp = tempdir().unwrap();
let extracted = temp.path().join(MEMORY_LEDGER_DIR);
let compiled = temp.path().join(MEMORY_LEDGER_COMPILED_DIR);
fs::create_dir_all(&extracted).unwrap();
fs::create_dir_all(&compiled).unwrap();
fs::write(extracted.join("rec-known.md"), "# known").unwrap();
fs::write(extracted.join("rec-orphan-a.md"), "# orphan").unwrap();
fs::write(compiled.join("wiki-orphan.md"), "# compiled orphan").unwrap();
fs::write(extracted.join("not-markdown.txt"), "skip").unwrap();
let entries = vec![entry_with("rec-known", MemoryLifecycleState::Accepted)];
let orphans = detect_orphan_notes(&entries, temp.path()).unwrap();
let ids: HashSet<_> = orphans.iter().map(|o| o.record_id.clone()).collect();
assert!(ids.contains("rec-orphan-a"));
assert!(ids.contains("wiki-orphan"));
assert!(!ids.contains("rec-known"));
assert!(!ids.contains("not-markdown"));
}
#[test]
fn run_lint_should_count_active_and_compose_sub_reports() {
let temp = tempdir().unwrap();
let lifecycle_root = temp.path().join(".spool");
fs::create_dir_all(&lifecycle_root).unwrap();
let a = entry_with("rec-a", MemoryLifecycleState::Accepted);
let b = entry_with("rec-b", MemoryLifecycleState::Canonical);
let c = entry_with("rec-c", MemoryLifecycleState::Candidate);
let report = run_lint(&[a, b, c], &lifecycle_root, temp.path());
assert_eq!(report.total_active_records, 2);
assert!(report.prune_suggestions.is_empty());
assert!(report.broken_cross_refs.is_empty());
assert!(report.orphan_notes.is_empty());
assert!(report.is_clean());
}
#[test]
fn render_lint_markdown_should_highlight_issues() {
let temp = tempdir().unwrap();
let mut a = entry_with("rec-a", MemoryLifecycleState::Accepted);
a.record.related_records = vec!["rec-gone".to_string()];
let report = run_lint(&[a], temp.path(), temp.path());
let md = render_lint_markdown(&report);
assert!(md.contains("断链"));
assert!(md.contains("rec-gone"));
}
}