#![allow(
clippy::same_name_method,
reason = "rust-embed derive generates conflicting method names"
)]
use std::collections::BTreeMap;
#[cfg(test)]
use std::fmt;
use std::fs;
use std::io::{self, ErrorKind, Write};
use std::path::{Path, PathBuf};
use anyhow::{Context, Result};
use rust_embed::RustEmbed;
use crate::git::AnchorKind;
use crate::memory::{MEMORY_SHIPPED_DIR, Memory};
#[derive(RustEmbed)]
#[folder = "memory/"]
struct CorpusAssets;
#[derive(Debug, Clone, PartialEq, Eq)]
pub(crate) struct Asset {
pub(crate) uid: String,
pub(crate) toml: String,
pub(crate) md: String,
}
#[derive(Debug, Clone)]
pub(crate) struct RawChild {
pub(crate) name: String,
pub(crate) is_dir: bool,
pub(crate) toml: Option<String>,
pub(crate) md: Option<String>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub(crate) enum SkipReason {
ForeignDir,
StrayFile,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub(crate) struct Skipped {
pub(crate) name: String,
pub(crate) reason: SkipReason,
}
#[derive(Debug, Default, PartialEq, Eq)]
pub(crate) struct CorpusPlan {
pub(crate) new: Vec<Asset>,
pub(crate) changed: Vec<Asset>,
pub(crate) unchanged: Vec<String>,
pub(crate) prune: Vec<String>,
pub(crate) skipped: Vec<Skipped>,
}
impl CorpusPlan {
pub(crate) fn is_inert(&self) -> bool {
self.new.is_empty() && self.changed.is_empty() && self.prune.is_empty()
}
}
fn is_inv(m: &Memory) -> bool {
m.scope.repo.is_empty() && m.anchor.kind == AnchorKind::None
}
#[cfg(test)]
#[derive(Debug, Clone, PartialEq, Eq)]
pub(crate) enum Violation {
NonEmptyRepo(String),
Anchored(&'static str),
ReferenceType,
Schema(String),
ScopeFloor,
}
#[cfg(test)]
impl fmt::Display for Violation {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::NonEmptyRepo(r) => {
write!(
f,
"repo coordinate must be empty for a global master (found '{r}')"
)
}
Self::Anchored(k) => {
write!(
f,
"master must be unanchored (anchor_kind=none), found '{k}'"
)
}
Self::ReferenceType => write!(
f,
"memory_type \"reference\" is forbidden — author references as \"signpost\""
),
Self::Schema(e) => write!(f, "master fails schema validation: {e}"),
Self::ScopeFloor => write!(
f,
"scope floor unmet — a master needs >=1 of paths/globs/commands (never tag-only)"
),
}
}
}
#[cfg(test)]
pub(crate) fn lint_master(toml: &str) -> Result<(), Vec<Violation>> {
let mut violations = Vec::new();
let is_reference = raw_memory_type(toml).as_deref() == Some("reference");
if is_reference {
violations.push(Violation::ReferenceType);
}
match Memory::parse(toml) {
Ok(m) => {
if !m.scope.repo.is_empty() {
violations.push(Violation::NonEmptyRepo(m.scope.repo.clone()));
}
if m.anchor.kind != AnchorKind::None {
violations.push(Violation::Anchored(m.anchor.kind.as_str()));
}
if m.scope.paths.is_empty() && m.scope.globs.is_empty() && m.scope.commands.is_empty() {
violations.push(Violation::ScopeFloor);
}
}
Err(e) if !is_reference => violations.push(Violation::Schema(e.to_string())),
Err(_) => {}
}
if violations.is_empty() {
Ok(())
} else {
Err(violations)
}
}
#[cfg(test)]
fn raw_memory_type(toml: &str) -> Option<String> {
toml.parse::<toml::Value>()
.ok()?
.get("memory_type")?
.as_str()
.map(str::to_owned)
}
pub(crate) fn plan_corpus(assets: &[Asset], children: &[RawChild]) -> CorpusPlan {
let mut plan = CorpusPlan::default();
let mut inv: BTreeMap<String, (String, String)> = BTreeMap::new();
for child in children {
if !child.is_dir {
plan.skipped.push(Skipped {
name: child.name.clone(),
reason: SkipReason::StrayFile,
});
continue;
}
let parsed = child.toml.as_deref().and_then(|t| Memory::parse(t).ok());
match parsed {
Some(m) if is_inv(&m) => {
inv.insert(
child.name.clone(),
(
child.toml.clone().unwrap_or_default(),
child.md.clone().unwrap_or_default(),
),
);
}
_ => plan.skipped.push(Skipped {
name: child.name.clone(),
reason: SkipReason::ForeignDir,
}),
}
}
for asset in assets {
match inv.get(&asset.uid) {
Some((toml, md)) if *toml == asset.toml && *md == asset.md => {
plan.unchanged.push(asset.uid.clone());
}
Some(_) => plan.changed.push(asset.clone()),
None => plan.new.push(asset.clone()),
}
}
for uid in inv.keys() {
if !assets.iter().any(|a| &a.uid == uid) {
plan.prune.push(uid.clone());
}
}
plan
}
pub(crate) fn embedded_assets() -> Vec<Asset> {
let files = CorpusAssets::iter().filter_map(|p| {
let path = p.as_ref().to_owned();
CorpusAssets::get(&path).map(|f| (path, f.data.into_owned()))
});
gather_assets(files)
}
fn gather_assets<I>(files: I) -> Vec<Asset>
where
I: IntoIterator<Item = (String, Vec<u8>)>,
{
let mut tomls: BTreeMap<String, String> = BTreeMap::new();
let mut mds: BTreeMap<String, String> = BTreeMap::new();
for (path, data) in files {
let mut parts = path.splitn(2, '/');
let (Some(uid), Some(file)) = (parts.next(), parts.next()) else {
continue;
};
if !crate::memory::is_uid(uid) {
continue;
}
let Ok(text) = String::from_utf8(data) else {
continue;
};
match file {
"memory.toml" => {
tomls.insert(uid.to_owned(), text);
}
"memory.md" => {
mds.insert(uid.to_owned(), text);
}
_ => {}
}
}
tomls
.into_iter()
.filter_map(|(uid, toml)| {
let md = mds.get(&uid)?.clone();
Some(Asset { uid, toml, md })
})
.collect()
}
fn gather_children(shipped: &Path) -> Result<Vec<RawChild>> {
let mut out = Vec::new();
let entries = match fs::read_dir(shipped) {
Ok(entries) => entries,
Err(e) if e.kind() == ErrorKind::NotFound => return Ok(out),
Err(e) => {
return Err(e).with_context(|| format!("Failed to read {}", shipped.display()));
}
};
for entry in entries {
let entry = entry?;
let Ok(name) = entry.file_name().into_string() else {
continue;
};
if !entry.file_type()?.is_dir() {
out.push(RawChild {
name,
is_dir: false,
toml: None,
md: None,
});
continue;
}
let dir = entry.path();
out.push(RawChild {
name,
is_dir: true,
toml: fs::read_to_string(dir.join("memory.toml")).ok(),
md: fs::read_to_string(dir.join("memory.md")).ok(),
});
}
Ok(out)
}
#[derive(Debug)]
pub(crate) struct SyncReport {
pub(crate) plan: CorpusPlan,
}
pub(crate) fn sync_corpus(root: &Path, assets: &[Asset], dry_run: bool) -> Result<SyncReport> {
let shipped = root.join(MEMORY_SHIPPED_DIR);
let children = gather_children(&shipped)?;
let plan = plan_corpus(assets, &children);
if !dry_run {
apply(&shipped, &plan)?;
}
Ok(SyncReport { plan })
}
fn apply(shipped: &Path, plan: &CorpusPlan) -> Result<()> {
for asset in plan.new.iter().chain(plan.changed.iter()) {
let dir = shipped.join(&asset.uid);
fs::create_dir_all(&dir).with_context(|| format!("Failed to create {}", dir.display()))?;
let toml_path = dir.join("memory.toml");
fs::write(&toml_path, &asset.toml)
.with_context(|| format!("Failed to write {}", toml_path.display()))?;
let md_path = dir.join("memory.md");
fs::write(&md_path, &asset.md)
.with_context(|| format!("Failed to write {}", md_path.display()))?;
}
for uid in &plan.prune {
let dir = shipped.join(uid);
fs::remove_dir_all(&dir).with_context(|| format!("Failed to prune {}", dir.display()))?;
}
Ok(())
}
pub(crate) fn run_sync(path: Option<PathBuf>, dry_run: bool, yes: bool) -> Result<()> {
let Ok(root) = crate::root::find(path, &crate::root::default_markers()) else {
writeln!(io::stdout(), "Not in a doctrine repo — nothing to sync.")?;
return Ok(());
};
let shipped = root.join(MEMORY_SHIPPED_DIR);
let assets = embedded_assets();
let preview = sync_corpus(&root, &assets, true)?;
print_plan(&preview.plan, &shipped, dry_run)?;
if dry_run || preview.plan.is_inert() {
return Ok(());
}
if !yes
&& !crate::install::prompt_confirm(&format!(
"Apply corpus sync to {}? [y/N] ",
shipped.display()
))?
{
writeln!(io::stdout(), "Aborted.")?;
return Ok(());
}
sync_corpus(&root, &assets, false)?;
Ok(())
}
fn print_plan(plan: &CorpusPlan, shipped: &Path, dry_run: bool) -> Result<()> {
let mut out = io::stdout();
let tag = if dry_run { "[dry-run] " } else { "" };
writeln!(
out,
"{tag}corpus sync → {}: {} new, {} changed, {} unchanged, {} prune",
shipped.display(),
plan.new.len(),
plan.changed.len(),
plan.unchanged.len(),
plan.prune.len(),
)?;
for skip in &plan.skipped {
let what = match skip.reason {
SkipReason::ForeignDir => "foreign dir (not a doctrine master)",
SkipReason::StrayFile => "stray file",
};
writeln!(out, " left untouched: {} — {what}", skip.name)?;
}
Ok(())
}
pub(crate) fn run_sync_install(path: Option<PathBuf>, dry_run: bool, yes: bool) -> Result<()> {
let root = crate::root::find(path, &crate::root::default_markers())?;
let exec = std::env::current_exe().context("Failed to resolve the doctrine executable path")?;
let spec = crate::boot::HookSpec::sync(&exec);
if !yes && !dry_run {
let proceed = crate::install::prompt_confirm(&format!(
"Wire the doctrine memory-sync session hook into {}? [y/N] ",
root.display()
))?;
if !proceed {
writeln!(io::stdout(), "Aborted.")?;
return Ok(());
}
}
let mut out = io::stdout();
let tag = if dry_run { "[dry-run] " } else { "" };
match crate::boot::install_claude_hook(&root, &spec, dry_run)? {
crate::boot::RefreshOutcome::Wired(cmd) => {
writeln!(out, " {tag}claude: wired sync hook: {cmd}")?;
}
crate::boot::RefreshOutcome::Refreshed(cmd) => {
writeln!(out, " {tag}claude: refreshed sync hook: {cmd}")?;
}
crate::boot::RefreshOutcome::None => {
writeln!(out, " {tag}claude: sync hook already current")?;
}
crate::boot::RefreshOutcome::PrintedFallback => {
writeln!(
out,
" claude: settings are malformed — add this hook manually:"
)?;
writeln!(out, "{}", crate::boot::fallback_for(&spec))?;
}
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
fn inv_master(uid: &str, body: &str) -> Asset {
let toml = format!(
r#"memory_uid = "{uid}"
schema_version = 1
memory_type = "fact"
status = "active"
title = "t"
summary = "s"
created = "2026-01-01"
updated = "2026-01-01"
[scope]
workspace = "global"
repo = ""
[git]
anchor_kind = ""
[review]
verification_state = "unverified"
reviewed = ""
review_by = ""
[trust]
trust_level = "standard"
[ranking]
severity = "info"
weight = 0
"#
);
Asset {
uid: uid.to_owned(),
toml,
md: format!("# {uid}\n\n{body}\n"),
}
}
fn inv_child(asset: &Asset) -> RawChild {
RawChild {
name: asset.uid.clone(),
is_dir: true,
toml: Some(asset.toml.clone()),
md: Some(asset.md.clone()),
}
}
const UID_A: &str = "mem_00000000000000000000000000000001";
const UID_B: &str = "mem_00000000000000000000000000000002";
#[test]
fn empty_embed_and_empty_disk_is_inert() {
let plan = plan_corpus(&[], &[]);
assert!(plan.is_inert());
assert_eq!(plan, CorpusPlan::default());
}
#[test]
fn absent_on_disk_is_new() {
let a = inv_master(UID_A, "alpha");
let plan = plan_corpus(&[a.clone()], &[]);
assert_eq!(plan.new, vec![a]);
assert!(plan.changed.is_empty() && plan.prune.is_empty());
}
#[test]
fn identical_on_disk_is_unchanged_and_inert() {
let a = inv_master(UID_A, "alpha");
let plan = plan_corpus(&[a.clone()], &[inv_child(&a)]);
assert_eq!(plan.unchanged, vec![UID_A.to_owned()]);
assert!(plan.is_inert(), "identical input must produce zero writes");
}
#[test]
fn differing_body_is_changed() {
let asset = inv_master(UID_A, "new-body");
let stale = inv_master(UID_A, "old-body");
let plan = plan_corpus(&[asset.clone()], &[inv_child(&stale)]);
assert_eq!(plan.changed, vec![asset]);
assert!(plan.unchanged.is_empty());
}
#[test]
fn inv_orphan_absent_from_embed_is_pruned() {
let orphan = inv_master(UID_B, "orphan");
let plan = plan_corpus(&[], &[inv_child(&orphan)]);
assert_eq!(plan.prune, vec![UID_B.to_owned()]);
}
#[test]
fn plan_never_names_items_path() {
let a = inv_master(UID_A, "alpha");
let plan = plan_corpus(&[a.clone()], &[]);
for entry in plan.new.iter().chain(plan.changed.iter()) {
assert!(!entry.uid.contains('/'), "uid must be a bare dir name");
}
}
#[test]
fn stray_file_survives_and_is_reported() {
let stray = RawChild {
name: "README".to_owned(),
is_dir: false,
toml: None,
md: None,
};
let plan = plan_corpus(&[], &[stray]);
assert!(plan.prune.is_empty(), "a stray file must never be pruned");
assert_eq!(
plan.skipped,
vec![Skipped {
name: "README".to_owned(),
reason: SkipReason::StrayFile,
}]
);
}
#[test]
fn unparseable_dir_survives_and_is_reported() {
let junk = RawChild {
name: "mem_garbage".to_owned(),
is_dir: true,
toml: Some("not valid toml {{{".to_owned()),
md: None,
};
let plan = plan_corpus(&[], &[junk]);
assert!(plan.prune.is_empty(), "an unparseable dir must survive");
assert_eq!(plan.skipped[0].reason, SkipReason::ForeignDir);
}
#[test]
fn parseable_non_inv_dir_survives() {
let mut scoped = inv_master(UID_A, "scoped");
scoped.toml = scoped
.toml
.replace(r#"repo = """#, r#"repo = "github.com/x/y""#);
scoped.toml = scoped.toml.replace(
r#"anchor_kind = """#,
"anchor_kind = \"commit\"\ncommit = \"abc\"\ntree = \"def\"",
);
let child = RawChild {
name: UID_A.to_owned(),
is_dir: true,
toml: Some(scoped.toml.clone()),
md: Some(scoped.md.clone()),
};
let plan = plan_corpus(&[], &[child]);
assert!(
plan.prune.is_empty(),
"a parseable non-INV dir must survive prune"
);
assert_eq!(plan.skipped[0].reason, SkipReason::ForeignDir);
}
#[test]
fn gather_assets_pairs_toml_and_md_and_ignores_gitkeep() {
let files = vec![
(".gitkeep".to_owned(), Vec::new()),
(format!("{UID_A}/memory.toml"), b"toml-a".to_vec()),
(format!("{UID_A}/memory.md"), b"md-a".to_vec()),
];
let assets = gather_assets(files);
assert_eq!(assets.len(), 1);
assert_eq!(assets[0].uid, UID_A);
assert_eq!(assets[0].toml, "toml-a");
assert_eq!(assets[0].md, "md-a");
}
#[test]
fn gather_assets_drops_incomplete_pair() {
let files = vec![(format!("{UID_A}/memory.toml"), b"lonely".to_vec())];
assert!(gather_assets(files).is_empty());
}
#[test]
fn gather_assets_skips_key_symlink_aliases() {
let files = vec![
(format!("{UID_A}/memory.toml"), b"toml-a".to_vec()),
(format!("{UID_A}/memory.md"), b"md-a".to_vec()),
(
"mem.signpost.doctrine.overview/memory.toml".to_owned(),
b"toml-a".to_vec(),
),
(
"mem.signpost.doctrine.overview/memory.md".to_owned(),
b"md-a".to_vec(),
),
];
let assets = gather_assets(files);
assert_eq!(assets.len(), 1, "the alias must not double the master");
assert_eq!(assets[0].uid, UID_A);
}
fn shipped_dir(root: &Path) -> PathBuf {
root.join(MEMORY_SHIPPED_DIR)
}
#[test]
fn sync_populates_then_is_idempotent() {
let tmp = tempfile::tempdir().unwrap();
let a = inv_master(UID_A, "alpha");
let b = inv_master(UID_B, "beta");
let assets = vec![a.clone(), b.clone()];
let first = sync_corpus(tmp.path(), &assets, false).unwrap();
assert_eq!(first.plan.new.len(), 2);
let toml = shipped_dir(tmp.path()).join(UID_A).join("memory.toml");
assert_eq!(fs::read_to_string(&toml).unwrap(), a.toml);
let second = sync_corpus(tmp.path(), &assets, false).unwrap();
assert!(
second.plan.is_inert(),
"re-sync of identical assets must write nothing"
);
assert_eq!(second.plan.unchanged.len(), 2);
}
#[test]
fn sync_prunes_inv_orphan_but_spares_foreign_file() {
let tmp = tempfile::tempdir().unwrap();
let shipped = shipped_dir(tmp.path());
let orphan = inv_master(UID_B, "orphan");
let odir = shipped.join(UID_B);
fs::create_dir_all(&odir).unwrap();
fs::write(odir.join("memory.toml"), &orphan.toml).unwrap();
fs::write(odir.join("memory.md"), &orphan.md).unwrap();
let foreign = shipped.join("KEEP_ME");
fs::write(&foreign, "hands off").unwrap();
let a = inv_master(UID_A, "alpha");
let report = sync_corpus(tmp.path(), &[a.clone()], false).unwrap();
assert_eq!(report.plan.prune, vec![UID_B.to_owned()]);
assert!(!odir.exists(), "the INV orphan must be pruned");
assert!(foreign.exists(), "the foreign file must survive");
assert!(shipped.join(UID_A).join("memory.toml").exists());
}
#[test]
fn dry_run_writes_nothing() {
let tmp = tempfile::tempdir().unwrap();
let a = inv_master(UID_A, "alpha");
let report = sync_corpus(tmp.path(), &[a], true).unwrap();
assert_eq!(report.plan.new.len(), 1);
assert!(!shipped_dir(tmp.path()).join(UID_A).exists());
}
#[test]
fn sync_with_no_shipped_root_is_clean() {
let tmp = tempfile::tempdir().unwrap();
let report = sync_corpus(tmp.path(), &[], false).unwrap();
assert!(report.plan.is_inert());
}
fn clean_master_toml() -> String {
r#"memory_uid = "mem_00000000000000000000000000000009"
schema_version = 1
memory_type = "fact"
status = "active"
title = "t"
summary = "s"
created = "2026-01-01"
updated = "2026-01-01"
[scope]
workspace = "global"
repo = ""
paths = [".doctrine/spec/tech/"]
[git]
anchor_kind = ""
[review]
verification_state = "unverified"
reviewed = ""
review_by = ""
[trust]
trust_level = "standard"
[ranking]
severity = "info"
weight = 0
"#
.to_owned()
}
#[test]
fn lint_passes_a_clean_master() {
assert!(
lint_master(&clean_master_toml()).is_ok(),
"a repo=\"\"/anchor-none master with a path scope must lint clean"
);
}
#[test]
fn lint_flags_a_non_empty_repo() {
let toml = clean_master_toml().replace(r#"repo = """#, r#"repo = "github.com/x/y""#);
let v = lint_master(&toml).unwrap_err();
assert!(
v.iter()
.any(|x| matches!(x, Violation::NonEmptyRepo(r) if r == "github.com/x/y")),
"expected NonEmptyRepo, got {v:?}"
);
}
#[test]
fn lint_flags_a_present_anchor() {
let toml = clean_master_toml().replace(
r#"anchor_kind = """#,
"anchor_kind = \"commit\"\ncommit = \"abc\"\ntree = \"def\"",
);
let v = lint_master(&toml).unwrap_err();
assert!(
v.iter().any(|x| matches!(x, Violation::Anchored("commit"))),
"expected Anchored(commit), got {v:?}"
);
}
#[test]
fn lint_flags_a_reference_type_with_a_dedicated_signal() {
let toml =
clean_master_toml().replace(r#"memory_type = "fact""#, r#"memory_type = "reference""#);
let v = lint_master(&toml).unwrap_err();
assert!(
v.contains(&Violation::ReferenceType),
"expected ReferenceType, got {v:?}"
);
assert!(
!v.iter().any(|x| matches!(x, Violation::Schema(_))),
"the reference literal must not also surface as a generic Schema bail: {v:?}"
);
}
#[test]
fn lint_flags_a_tag_only_scope() {
let toml = clean_master_toml().replace(
r#"paths = [".doctrine/spec/tech/"]"#,
r#"tags = ["doctrine"]"#,
);
let v = lint_master(&toml).unwrap_err();
assert!(
v.contains(&Violation::ScopeFloor),
"expected ScopeFloor, got {v:?}"
);
}
#[test]
fn lint_flags_an_unknown_type_as_schema() {
let toml =
clean_master_toml().replace(r#"memory_type = "fact""#, r#"memory_type = "bogus""#);
let v = lint_master(&toml).unwrap_err();
assert!(
v.iter().any(|x| matches!(x, Violation::Schema(_))),
"an unknown (non-reference) type must surface as Schema, got {v:?}"
);
}
#[test]
fn every_embedded_master_lints_clean() {
for asset in embedded_assets() {
if let Err(violations) = lint_master(&asset.toml) {
let detail: Vec<String> = violations.iter().map(ToString::to_string).collect();
panic!(
"embedded master {} fails master-lint: {}",
asset.uid,
detail.join("; ")
);
}
}
}
}