use anyhow::Result;
use regex::Regex;
use serde::{Deserialize, Serialize};
use serde_json::{json, Value};
use std::collections::HashSet;
use std::path::Path;
use std::sync::OnceLock;
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct PathAnchor {
pub path: String,
pub hash: String,
}
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct AnchorFile {
#[serde(default)]
pub anchors: Vec<PathAnchor>,
}
const HEADER_COMMENT: &str = "\
# Anchor sidecar — tracks content hashes for referenced paths.\n\
# Auto-generated by codescout. Edit anchors list manually if needed.\n\n";
pub fn read_anchor_file(path: &Path) -> Result<AnchorFile> {
match std::fs::read_to_string(path) {
Ok(contents) => {
let anchor_file: AnchorFile = toml::from_str(&contents)?;
Ok(anchor_file)
}
Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(AnchorFile::default()),
Err(e) => Err(e.into()),
}
}
pub fn write_anchor_file(path: &Path, anchor_file: &AnchorFile) -> Result<()> {
let body = toml::to_string_pretty(anchor_file)?;
let content = format!("{HEADER_COMMENT}{body}");
crate::util::fs::atomic_write(path, &content)?;
Ok(())
}
fn path_re() -> &'static Regex {
static PATH_RE: OnceLock<Regex> = OnceLock::new();
PATH_RE.get_or_init(|| {
Regex::new(
r"(?:^|[`\s\|(])((src/[\w/._-]+\.\w+|\.codescout/[\w/._-]+\.\w+|Cargo\.toml|CLAUDE\.md|docs/[\w/._-]+\.\w+))",
)
.unwrap()
})
}
pub fn extract_paths(content: &str) -> Vec<String> {
let mut seen = HashSet::new();
let mut result = Vec::new();
for cap in path_re().captures_iter(content) {
let mut path = cap[1].to_string();
if let Some(colon_pos) = path.rfind(':') {
if path[colon_pos + 1..].chars().all(|c| c.is_ascii_digit()) {
path.truncate(colon_pos);
}
}
if seen.insert(path.clone()) {
result.push(path);
}
}
result
}
pub fn seed_anchors(project_root: &Path, content: &str) -> Result<AnchorFile> {
let paths = extract_paths(content);
let mut anchors = Vec::new();
for p in paths {
let full = project_root.join(&p);
if full.is_file() {
let hash = super::hash::hash_file(&full)?;
anchors.push(PathAnchor { path: p, hash });
}
}
Ok(AnchorFile { anchors })
}
pub fn merge_anchors(
project_root: &Path,
existing: &AnchorFile,
new_seed: &AnchorFile,
) -> Result<AnchorFile> {
let mut seen = HashSet::new();
let mut anchors = Vec::new();
for a in &new_seed.anchors {
if seen.insert(a.path.clone()) {
anchors.push(a.clone());
}
}
for a in &existing.anchors {
if seen.insert(a.path.clone()) {
let full = project_root.join(&a.path);
if let Ok(hash) = super::hash::hash_file(&full) {
anchors.push(PathAnchor {
path: a.path.clone(),
hash,
});
}
}
}
Ok(AnchorFile { anchors })
}
#[derive(Debug, Clone, PartialEq, Serialize)]
#[serde(rename_all = "lowercase")]
pub enum AnchorStatus {
Changed,
Deleted,
}
#[derive(Debug, Clone, Serialize)]
pub struct StaleFile {
pub path: String,
pub status: AnchorStatus,
}
#[derive(Debug)]
pub struct StalenessReport {
pub stale_files: Vec<StaleFile>,
}
impl StalenessReport {
pub fn is_fresh(&self) -> bool {
self.stale_files.is_empty()
}
}
pub fn check_path_staleness(
project_root: &Path,
anchor_file: &AnchorFile,
) -> Result<StalenessReport> {
let mut stale_files = Vec::new();
for anchor in &anchor_file.anchors {
let full = project_root.join(&anchor.path);
if !full.exists() {
stale_files.push(StaleFile {
path: anchor.path.clone(),
status: AnchorStatus::Deleted,
});
} else {
let current_hash = super::hash::hash_file(&full)?;
if current_hash != anchor.hash {
stale_files.push(StaleFile {
path: anchor.path.clone(),
status: AnchorStatus::Changed,
});
}
}
}
Ok(StalenessReport { stale_files })
}
pub fn check_all_memories(project_root: &Path, memories_dir: &Path) -> Result<Value> {
let mut stale = Vec::new();
let mut fresh: Vec<Value> = Vec::new();
let mut untracked: Vec<String> = Vec::new();
if !memories_dir.exists() {
return Ok(json!({ "stale": stale, "fresh": fresh, "untracked": untracked }));
}
for entry in walkdir::WalkDir::new(memories_dir).into_iter().flatten() {
let path = entry.path();
if !entry.file_type().is_file() || path.extension().is_none_or(|e| e != "md") {
continue;
}
let Ok(rel) = path.strip_prefix(memories_dir) else {
continue;
};
let topic = rel.with_extension("").to_string_lossy().replace('\\', "/");
let sidecar = anchor_path_for_topic(memories_dir, &topic);
if !sidecar.exists() {
untracked.push(topic);
continue;
}
let anchor_file = read_anchor_file(&sidecar)?;
let report = check_path_staleness(project_root, &anchor_file)?;
if report.is_fresh() {
fresh.push(json!(topic));
} else {
let changed: Vec<&str> = report
.stale_files
.iter()
.filter(|f| f.status == AnchorStatus::Changed)
.map(|f| f.path.as_str())
.collect();
let deleted: Vec<&str> = report
.stale_files
.iter()
.filter(|f| f.status == AnchorStatus::Deleted)
.map(|f| f.path.as_str())
.collect();
let total_anchored = anchor_file.anchors.len();
let total_stale = report.stale_files.len();
let mut entry = json!({
"topic": topic,
"reason": format!("{} of {} anchored files changed", total_stale, total_anchored),
});
if !changed.is_empty() {
entry["changed_files"] = json!(changed);
}
if !deleted.is_empty() {
entry["deleted_files"] = json!(deleted);
}
stale.push(entry);
}
}
fresh.sort_by(|a, b| a.as_str().cmp(&b.as_str()));
untracked.sort();
Ok(json!({
"stale": stale,
"fresh": fresh,
"untracked": untracked,
}))
}
pub fn anchor_path_for_topic(memories_dir: &Path, topic: &str) -> std::path::PathBuf {
let safe = super::sanitize_topic(topic);
memories_dir.join(format!("{}.anchors.toml", safe))
}
pub fn update_anchors_on_write(
project_root: &Path,
memories_dir: &Path,
topic: &str,
content: &str,
) -> Result<()> {
let sidecar_path = anchor_path_for_topic(memories_dir, topic);
let existing = read_anchor_file(&sidecar_path)?;
let new_seed = seed_anchors(project_root, content)?;
let merged = if existing.anchors.is_empty() {
new_seed
} else {
merge_anchors(project_root, &existing, &new_seed)?
};
if !merged.anchors.is_empty() {
write_anchor_file(&sidecar_path, &merged)?;
}
Ok(())
}
pub fn refresh_hashes(project_root: &Path, memories_dir: &Path, topic: &str) -> Result<()> {
let sidecar_path = anchor_path_for_topic(memories_dir, topic);
let mut anchor_file = read_anchor_file(&sidecar_path)?;
anchor_file.anchors.retain_mut(|a| {
let full = project_root.join(&a.path);
if let Ok(hash) = super::hash::hash_file(&full) {
a.hash = hash;
true
} else {
false }
});
write_anchor_file(&sidecar_path, &anchor_file)?;
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::tempdir;
#[test]
fn roundtrip_anchor_file() {
let dir = tempdir().unwrap();
let anchors_path = dir.path().join("architecture.anchors.toml");
let anchors = AnchorFile {
anchors: vec![
PathAnchor {
path: "src/server.rs".to_string(),
hash: "abc123".to_string(),
},
PathAnchor {
path: "src/tools/mod.rs".to_string(),
hash: "def456".to_string(),
},
],
};
write_anchor_file(&anchors_path, &anchors).unwrap();
let loaded = read_anchor_file(&anchors_path).unwrap();
assert_eq!(loaded.anchors.len(), 2);
assert_eq!(loaded.anchors[0].path, "src/server.rs");
assert_eq!(loaded.anchors[0].hash, "abc123");
}
#[test]
fn read_missing_returns_empty() {
let dir = tempdir().unwrap();
let anchors_path = dir.path().join("nonexistent.anchors.toml");
let loaded = read_anchor_file(&anchors_path).unwrap();
assert!(loaded.anchors.is_empty());
}
#[test]
fn extract_paths_from_content() {
let content = "## Key Abstractions\n\
| `Tool` trait | `src/tools/mod.rs:228` | Core tool abstraction |\n\
| `OutputGuard` | `src/tools/output.rs` | Progressive disclosure |\n\
See also `Cargo.toml` and `docs/ARCHITECTURE.md`.\n\
Not a path: src without extension or random text.";
let paths = extract_paths(content);
assert!(paths.contains(&"src/tools/mod.rs".to_string()));
assert!(paths.contains(&"src/tools/output.rs".to_string()));
assert!(paths.contains(&"Cargo.toml".to_string()));
assert!(paths.contains(&"docs/ARCHITECTURE.md".to_string()));
assert!(!paths.contains(&"src/tools/mod.rs:228".to_string()));
}
#[test]
fn extract_paths_deduplicates() {
let content = "See `src/server.rs` and also `src/server.rs` again.";
let paths = extract_paths(content);
assert_eq!(paths.len(), 1);
}
#[test]
fn seed_anchors_only_for_existing_files() {
let dir = tempdir().unwrap();
let root = dir.path();
std::fs::create_dir_all(root.join("src/tools")).unwrap();
std::fs::write(root.join("src/tools/mod.rs"), "fn main() {}").unwrap();
let content = "Uses `src/tools/mod.rs` and `src/nonexistent.rs`.";
let anchors = seed_anchors(root, content).unwrap();
assert_eq!(anchors.anchors.len(), 1);
assert_eq!(anchors.anchors[0].path, "src/tools/mod.rs");
assert!(!anchors.anchors[0].hash.is_empty());
}
#[test]
fn merge_preserves_user_added_paths() {
let dir = tempdir().unwrap();
let root = dir.path();
std::fs::create_dir_all(root.join("src")).unwrap();
std::fs::write(root.join("src/a.rs"), "a").unwrap();
std::fs::write(root.join("src/b.rs"), "b").unwrap();
std::fs::write(root.join("src/c.rs"), "c").unwrap();
let existing = AnchorFile {
anchors: vec![
PathAnchor {
path: "src/a.rs".into(),
hash: "old_hash".into(),
},
PathAnchor {
path: "src/b.rs".into(),
hash: "user_added".into(),
},
],
};
let new_seed = seed_anchors(root, "Uses `src/a.rs` and `src/c.rs`.").unwrap();
let merged = merge_anchors(root, &existing, &new_seed).unwrap();
let paths: Vec<&str> = merged.anchors.iter().map(|a| a.path.as_str()).collect();
assert!(paths.contains(&"src/a.rs"));
assert!(paths.contains(&"src/b.rs"));
assert!(paths.contains(&"src/c.rs"));
let a = merged
.anchors
.iter()
.find(|a| a.path == "src/a.rs")
.unwrap();
assert_ne!(a.hash, "old_hash");
}
#[test]
fn check_staleness_detects_changes() {
let dir = tempdir().unwrap();
let root = dir.path();
std::fs::create_dir_all(root.join("src")).unwrap();
std::fs::write(root.join("src/a.rs"), "version 1").unwrap();
let anchors = seed_anchors(root, "Uses `src/a.rs`.").unwrap();
let report = check_path_staleness(root, &anchors).unwrap();
assert!(report.stale_files.is_empty());
std::fs::write(root.join("src/a.rs"), "version 2").unwrap();
let report = check_path_staleness(root, &anchors).unwrap();
assert_eq!(report.stale_files.len(), 1);
assert_eq!(report.stale_files[0].status, AnchorStatus::Changed);
std::fs::remove_file(root.join("src/a.rs")).unwrap();
let report = check_path_staleness(root, &anchors).unwrap();
assert_eq!(report.stale_files[0].status, AnchorStatus::Deleted);
}
#[test]
fn check_staleness_all_fresh() {
let dir = tempdir().unwrap();
let root = dir.path();
std::fs::create_dir_all(root.join("src")).unwrap();
std::fs::write(root.join("src/a.rs"), "stable").unwrap();
let anchors = seed_anchors(root, "Uses `src/a.rs`.").unwrap();
let report = check_path_staleness(root, &anchors).unwrap();
assert!(report.is_fresh());
}
#[test]
fn check_all_memories_untracked() {
let dir = tempdir().unwrap();
let root = dir.path();
let memories_dir = root.join("memories");
std::fs::create_dir_all(&memories_dir).unwrap();
std::fs::write(memories_dir.join("arch.md"), "# Arch").unwrap();
let result = check_all_memories(root, &memories_dir).unwrap();
let untracked = result["untracked"].as_array().unwrap();
assert_eq!(untracked.len(), 1);
assert_eq!(untracked[0].as_str().unwrap(), "arch");
assert!(result["fresh"].as_array().unwrap().is_empty());
assert!(result["stale"].as_array().unwrap().is_empty());
}
#[test]
fn check_all_memories_fresh() {
let dir = tempdir().unwrap();
let root = dir.path();
let memories_dir = root.join("memories");
std::fs::create_dir_all(&memories_dir).unwrap();
std::fs::create_dir_all(root.join("src")).unwrap();
std::fs::write(root.join("src/lib.rs"), "stable content").unwrap();
std::fs::write(memories_dir.join("overview.md"), "References `src/lib.rs`.").unwrap();
let anchors = seed_anchors(root, "References `src/lib.rs`.").unwrap();
let sidecar_path = anchor_path_for_topic(&memories_dir, "overview");
write_anchor_file(&sidecar_path, &anchors).unwrap();
let result = check_all_memories(root, &memories_dir).unwrap();
let fresh = result["fresh"].as_array().unwrap();
assert_eq!(fresh.len(), 1);
assert_eq!(fresh[0].as_str().unwrap(), "overview");
assert!(result["untracked"].as_array().unwrap().is_empty());
assert!(result["stale"].as_array().unwrap().is_empty());
}
#[test]
fn check_all_memories_stale() {
let dir = tempdir().unwrap();
let root = dir.path();
let memories_dir = root.join("memories");
std::fs::create_dir_all(&memories_dir).unwrap();
std::fs::create_dir_all(root.join("src")).unwrap();
std::fs::write(root.join("src/lib.rs"), "version 1").unwrap();
let anchors = seed_anchors(root, "References `src/lib.rs`.").unwrap();
std::fs::write(memories_dir.join("overview.md"), "References `src/lib.rs`.").unwrap();
let sidecar_path = anchor_path_for_topic(&memories_dir, "overview");
write_anchor_file(&sidecar_path, &anchors).unwrap();
std::fs::write(root.join("src/lib.rs"), "version 2").unwrap();
let result = check_all_memories(root, &memories_dir).unwrap();
let stale = result["stale"].as_array().unwrap();
assert_eq!(stale.len(), 1);
assert_eq!(stale[0]["topic"].as_str().unwrap(), "overview");
let changed = stale[0]["changed_files"].as_array().unwrap();
assert_eq!(changed.len(), 1);
assert!(changed.iter().any(|v| v.as_str().unwrap() == "src/lib.rs"));
assert!(result["fresh"].as_array().unwrap().is_empty());
assert!(result["untracked"].as_array().unwrap().is_empty());
}
#[test]
fn check_all_memories_nested_untracked() {
let dir = tempdir().unwrap();
let root = dir.path();
let memories_dir = root.join("memories");
std::fs::create_dir_all(memories_dir.join("debugging")).unwrap();
std::fs::write(memories_dir.join("debugging/async-patterns.md"), "# Async").unwrap();
let result = check_all_memories(root, &memories_dir).unwrap();
let untracked = result["untracked"].as_array().unwrap();
assert_eq!(untracked.len(), 1);
assert_eq!(untracked[0].as_str().unwrap(), "debugging/async-patterns");
assert!(result["fresh"].as_array().unwrap().is_empty());
assert!(result["stale"].as_array().unwrap().is_empty());
}
#[test]
fn check_all_memories_nested_fresh() {
let dir = tempdir().unwrap();
let root = dir.path();
let memories_dir = root.join("memories");
std::fs::create_dir_all(memories_dir.join("debugging")).unwrap();
std::fs::create_dir_all(root.join("src")).unwrap();
std::fs::write(root.join("src/lib.rs"), "stable content").unwrap();
std::fs::write(
memories_dir.join("debugging/async-patterns.md"),
"References `src/lib.rs`.",
)
.unwrap();
let anchors = seed_anchors(root, "References `src/lib.rs`.").unwrap();
let sidecar = anchor_path_for_topic(&memories_dir, "debugging/async-patterns");
write_anchor_file(&sidecar, &anchors).unwrap();
let result = check_all_memories(root, &memories_dir).unwrap();
let fresh = result["fresh"].as_array().unwrap();
assert_eq!(fresh.len(), 1);
assert_eq!(fresh[0].as_str().unwrap(), "debugging/async-patterns");
assert!(result["untracked"].as_array().unwrap().is_empty());
assert!(result["stale"].as_array().unwrap().is_empty());
}
#[test]
fn check_all_memories_nested_stale() {
let dir = tempdir().unwrap();
let root = dir.path();
let memories_dir = root.join("memories");
std::fs::create_dir_all(memories_dir.join("debugging")).unwrap();
std::fs::create_dir_all(root.join("src")).unwrap();
std::fs::write(root.join("src/lib.rs"), "version 1").unwrap();
let anchors = seed_anchors(root, "References `src/lib.rs`.").unwrap();
std::fs::write(
memories_dir.join("debugging/async-patterns.md"),
"References `src/lib.rs`.",
)
.unwrap();
let sidecar = anchor_path_for_topic(&memories_dir, "debugging/async-patterns");
write_anchor_file(&sidecar, &anchors).unwrap();
std::fs::write(root.join("src/lib.rs"), "version 2").unwrap();
let result = check_all_memories(root, &memories_dir).unwrap();
let stale = result["stale"].as_array().unwrap();
assert_eq!(stale.len(), 1);
assert_eq!(
stale[0]["topic"].as_str().unwrap(),
"debugging/async-patterns"
);
let changed = stale[0]["changed_files"].as_array().unwrap();
assert_eq!(changed.len(), 1);
assert!(changed.iter().any(|v| v.as_str().unwrap() == "src/lib.rs"));
assert!(result["fresh"].as_array().unwrap().is_empty());
assert!(result["untracked"].as_array().unwrap().is_empty());
}
#[test]
fn check_all_memories_skips_non_md_files() {
let dir = tempdir().unwrap();
let root = dir.path();
let memories_dir = root.join("memories");
std::fs::create_dir_all(&memories_dir).unwrap();
std::fs::write(memories_dir.join("topics.md"), "# Topics").unwrap();
std::fs::write(memories_dir.join(".hidden"), "should be ignored").unwrap();
let result = check_all_memories(root, &memories_dir).unwrap();
let untracked = result["untracked"].as_array().unwrap();
let fresh = result["fresh"].as_array().unwrap();
let stale = result["stale"].as_array().unwrap();
assert!(
!untracked
.iter()
.any(|v| v.as_str().unwrap_or("") == ".hidden"),
"dotfile must not appear in untracked"
);
assert!(fresh.is_empty(), "dotfile must not appear in fresh");
assert!(stale.is_empty(), "dotfile must not appear in stale");
assert_eq!(untracked.len(), 1);
assert_eq!(untracked[0].as_str().unwrap(), "topics");
}
#[test]
fn stale_file_serializes_to_json() {
let sf = super::StaleFile {
path: "src/foo.rs".to_string(),
status: super::AnchorStatus::Changed,
};
let json = serde_json::to_value(&sf).unwrap();
assert_eq!(json["path"], "src/foo.rs");
assert_eq!(json["status"], "changed");
let sf_deleted = super::StaleFile {
path: "src/bar.rs".to_string(),
status: super::AnchorStatus::Deleted,
};
let json = serde_json::to_value(&sf_deleted).unwrap();
assert_eq!(json["status"], "deleted");
}
#[test]
fn anchor_path_blocks_traversal() {
let memories_dir = std::path::PathBuf::from("/tmp/test_memories");
let path = anchor_path_for_topic(&memories_dir, "../../etc/passwd");
assert!(
path.starts_with(&memories_dir),
"anchor path {:?} must be inside {:?}",
path,
memories_dir,
);
}
}