use anyhow::Result;
use serde::{Deserialize, Serialize};
use serde_json::{json, Value};
use super::{RecoverableError, ToolContext};
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct Violation {
pub check: String,
pub artifact_id: Option<String>,
pub path: String,
pub detail: String,
}
impl Violation {
fn new(
check: &str,
artifact_id: Option<String>,
path: impl Into<String>,
detail: impl Into<String>,
) -> Self {
Self {
check: check.into(),
artifact_id,
path: path.into(),
detail: detail.into(),
}
}
}
pub async fn call(ctx: &ToolContext, args: Value) -> Result<Value> {
if let Some(fix) = args.get("fix").and_then(Value::as_str) {
return run_fix(ctx, fix, args.get("root").and_then(Value::as_str)).await;
}
let cat = ctx.catalog.lock();
let mut all_violations: Vec<Violation> = Vec::new();
all_violations.extend(scan_artifact_paths(&cat.conn)?);
all_violations.extend(scan_commits_git_root(&cat.conn)?);
drop(cat);
let mut by_check: std::collections::BTreeMap<String, usize> = Default::default();
for v in &all_violations {
*by_check.entry(v.check.clone()).or_insert(0) += 1;
}
Ok(json!({
"violations": all_violations,
"summary": {
"total": all_violations.len(),
"by_check": by_check,
},
}))
}
fn validate_prune_request<'a>(fix: &str, root: Option<&'a str>) -> Result<&'a std::path::Path> {
if fix != "prune_missing" {
return Err(RecoverableError::new(format!(
"unknown fix '{fix}' — supported: prune_missing (requires root=<absolute path of the dead/renamed repo root>)"
)));
}
let root = root.ok_or_else(|| {
RecoverableError::new(
"fix=prune_missing requires root=<absolute path of the dead/renamed repo root to prune>",
)
})?;
let root_path = std::path::Path::new(root);
if !root_path.is_absolute() {
return Err(RecoverableError::new(format!(
"root must be an absolute path, got '{root}'"
)));
}
if root_path.exists() {
return Err(RecoverableError::new(format!(
"root '{root}' still exists on disk — prune_missing only removes rows under a dead/renamed root; nothing pruned"
)));
}
Ok(root_path)
}
async fn run_fix(ctx: &ToolContext, fix: &str, root: Option<&str>) -> Result<Value> {
let root_path = validate_prune_request(fix, root)?;
let cat = ctx.catalog.lock();
let (artifact_rows, commit_rows) = prune_dead_root(&cat.conn, root_path)?;
drop(cat);
Ok(json!({
"fix": "prune_missing",
"root": root_path.to_string_lossy(),
"pruned": { "artifact_rows": artifact_rows, "commit_rows": commit_rows },
}))
}
fn prune_dead_root(conn: &rusqlite::Connection, root: &std::path::Path) -> Result<(usize, usize)> {
let root_fwd = format!("{}", crate::util::fs::RepoPath::from_path(root));
let under = format!("{root_fwd}/%");
let artifact_rows = conn.execute(
"DELETE FROM artifact WHERE abs_path = ?1 OR abs_path LIKE ?2",
rusqlite::params![root_fwd, under],
)?;
let commit_rows = conn.execute(
"DELETE FROM commits WHERE git_root = ?1 OR git_root LIKE ?2",
rusqlite::params![root_fwd, under],
)?;
Ok((artifact_rows, commit_rows))
}
fn scan_artifact_paths(conn: &rusqlite::Connection) -> Result<Vec<Violation>> {
let mut stmt = conn.prepare("SELECT id, abs_path FROM artifact")?;
let rows: Vec<(String, String)> = stmt
.query_map([], |r| Ok((r.get::<_, String>(0)?, r.get::<_, String>(1)?)))?
.collect::<rusqlite::Result<_>>()?;
let mut violations = Vec::new();
for (id, abs_path) in &rows {
if let Some(v) = check_abs_path_must_be_absolute(id, abs_path) {
violations.push(v);
}
if let Some(v) = check_backslash(id, abs_path, "backslash_in_abs_path") {
violations.push(v);
}
if let Some(v) = check_ads_colon(id, abs_path) {
violations.push(v);
}
if let Some(v) = check_dotdot_segment(id, abs_path) {
violations.push(v);
}
if let Some(v) = check_missing_file(id, abs_path) {
violations.push(v);
}
}
Ok(violations)
}
fn scan_commits_git_root(conn: &rusqlite::Connection) -> Result<Vec<Violation>> {
let mut stmt = conn.prepare("SELECT DISTINCT git_root FROM commits")?;
let roots: Vec<String> = stmt
.query_map([], |r| r.get::<_, String>(0))?
.collect::<rusqlite::Result<_>>()?;
let mut violations = Vec::new();
for root in &roots {
if let Some(pos) = root.find('\\') {
violations.push(Violation::new(
"backslash_in_git_root",
None,
root.clone(),
format!("backslash at byte position {pos}"),
));
}
}
Ok(violations)
}
fn check_backslash(id: &str, abs_path: &str, check_name: &str) -> Option<Violation> {
abs_path.find('\\').map(|pos| {
Violation::new(
check_name,
Some(id.to_string()),
abs_path,
format!("backslash at byte position {pos}"),
)
})
}
fn check_ads_colon(id: &str, abs_path: &str) -> Option<Violation> {
let bytes = abs_path.as_bytes();
let starts_with_drive = bytes.len() >= 2 && bytes[0].is_ascii_alphabetic() && bytes[1] == b':';
let tail = if starts_with_drive {
&abs_path[2..]
} else {
abs_path
};
tail.find(':').map(|pos_in_tail| {
let absolute_pos = pos_in_tail + if starts_with_drive { 2 } else { 0 };
Violation::new(
"ads_colon_in_abs_path",
Some(id.to_string()),
abs_path,
format!("colon at byte position {absolute_pos} (outside drive prefix)"),
)
})
}
fn check_dotdot_segment(id: &str, abs_path: &str) -> Option<Violation> {
if abs_path.split('/').any(|seg| seg == "..") {
Some(Violation::new(
"dotdot_segment_in_abs_path",
Some(id.to_string()),
abs_path,
"path contains a '..' segment",
))
} else {
None
}
}
fn check_missing_file(id: &str, abs_path: &str) -> Option<Violation> {
if std::path::Path::new(abs_path).exists() {
None
} else {
Some(Violation::new(
"missing_file",
Some(id.to_string()),
abs_path,
"file does not exist on disk",
))
}
}
fn check_abs_path_must_be_absolute(id: &str, abs_path: &str) -> Option<Violation> {
let bytes = abs_path.as_bytes();
let starts_with_posix_root = bytes.first() == Some(&b'/');
let starts_with_drive = bytes.len() >= 2 && bytes[0].is_ascii_alphabetic() && bytes[1] == b':';
if starts_with_posix_root || starts_with_drive {
return None;
}
Some(Violation::new(
"abs_path_must_be_absolute",
Some(id.to_string()),
abs_path,
"abs_path is relative — schema requires absolute form (leading '/' or '<drive>:')",
))
}
#[cfg(test)]
mod tests {
use super::*;
use crate::librarian::catalog::Catalog;
use rusqlite::params;
fn seed_artifact(cat: &Catalog, id: &str, abs_path: &str) {
cat.conn
.execute(
"INSERT INTO artifact \
(id, abs_path, kind, status, created_at, updated_at, file_mtime, file_sha256) \
VALUES (?1, ?2, 'spec', 'active', 0, 0, 0, '')",
params![id, abs_path],
)
.unwrap();
}
fn seed_commit(cat: &Catalog, hash: &str, git_root: &str) {
cat.conn
.execute(
"INSERT INTO commits (hash, git_root) VALUES (?1, ?2)",
params![hash, git_root],
)
.unwrap();
}
#[test]
fn check_backslash_finds_byte_position() {
let v = check_backslash("a1", "C:/foo\\bar.md", "backslash_in_abs_path").unwrap();
assert_eq!(v.check, "backslash_in_abs_path");
assert_eq!(v.artifact_id.as_deref(), Some("a1"));
assert_eq!(v.path, "C:/foo\\bar.md");
assert!(v.detail.contains("position 6"));
}
#[test]
fn check_backslash_skips_clean_path() {
assert!(check_backslash("a1", "/home/x/foo.md", "backslash_in_abs_path").is_none());
assert!(check_backslash("a1", "C:/users/x/foo.md", "backslash_in_abs_path").is_none());
}
#[test]
fn check_ads_colon_exempts_drive_prefix() {
assert!(check_ads_colon("a1", "C:/Users/marius/foo.md").is_none());
assert!(check_ads_colon("a1", "/home/marius/foo.md").is_none());
}
#[test]
fn check_ads_colon_flags_post_drive_colon() {
let v = check_ads_colon("a1", "C:/foo.txt:stream").unwrap();
assert_eq!(v.check, "ads_colon_in_abs_path");
assert!(v.detail.contains("position"));
}
#[test]
fn check_ads_colon_flags_colon_without_drive_prefix() {
let v = check_ads_colon("a1", "/home/foo:bar").unwrap();
assert_eq!(v.check, "ads_colon_in_abs_path");
}
#[test]
fn check_dotdot_segment_flags_only_segment_dotdot() {
assert!(check_dotdot_segment("a1", "/home/x/../etc").is_some());
assert!(check_dotdot_segment("a1", "/home/x/..").is_some());
assert!(check_dotdot_segment("a1", "..").is_some());
assert!(check_dotdot_segment("a1", "/home/x/foo..bar.md").is_none());
assert!(check_dotdot_segment("a1", "/home/x/.hidden").is_none());
}
#[test]
fn check_missing_file_for_obviously_absent_path() {
let v = check_missing_file("a1", "/nonexistent/path/that/will/never/exist.md").unwrap();
assert_eq!(v.check, "missing_file");
}
#[test]
fn check_abs_path_must_be_absolute_accepts_posix_and_drive() {
assert!(check_abs_path_must_be_absolute("a1", "/home/x/foo.md").is_none());
assert!(check_abs_path_must_be_absolute("a1", "/").is_none());
assert!(check_abs_path_must_be_absolute("a1", "C:/Users/x/foo.md").is_none());
assert!(check_abs_path_must_be_absolute("a1", "z:/").is_none());
}
#[test]
fn check_abs_path_must_be_absolute_flags_relative() {
let v = check_abs_path_must_be_absolute("a1", "docs/foo.md").unwrap();
assert_eq!(v.check, "abs_path_must_be_absolute");
assert_eq!(v.path, "docs/foo.md");
assert!(v.detail.contains("relative"));
assert!(check_abs_path_must_be_absolute("a1", "Cusers/foo.md").is_some());
assert!(check_abs_path_must_be_absolute("a1", "").is_some());
}
#[tokio::test]
async fn doctor_call_surfaces_seeded_drift() {
let cat = Catalog::open_in_memory().unwrap();
seed_artifact(&cat, "bad-backslash", "C:/users\\marius\\foo.md");
seed_artifact(&cat, "bad-ads", "C:/users/foo.txt:stream");
seed_artifact(&cat, "bad-dotdot", "/home/marius/../etc/passwd");
seed_artifact(&cat, "bad-missing", "/definitely/not/a/real/path.md");
seed_artifact(&cat, "bad-relative", "docs/issues/foo.md");
#[cfg(unix)]
let clean_path = "/tmp";
#[cfg(windows)]
let clean_path = "C:/Windows";
seed_artifact(&cat, "clean", clean_path);
seed_commit(&cat, "abc123", "C:/users\\marius");
let v = scan_artifact_paths(&cat.conn).unwrap();
let mut by_check: std::collections::BTreeMap<&str, usize> = Default::default();
for x in &v {
*by_check.entry(x.check.as_str()).or_insert(0) += 1;
}
assert_eq!(by_check.get("backslash_in_abs_path").copied(), Some(1));
assert_eq!(by_check.get("ads_colon_in_abs_path").copied(), Some(1));
assert_eq!(by_check.get("dotdot_segment_in_abs_path").copied(), Some(1));
assert_eq!(by_check.get("abs_path_must_be_absolute").copied(), Some(1));
assert_eq!(by_check.get("missing_file").copied(), Some(5));
let r = scan_commits_git_root(&cat.conn).unwrap();
assert_eq!(r.len(), 1);
assert_eq!(r[0].check, "backslash_in_git_root");
}
#[test]
fn validate_prune_request_gates() {
assert!(validate_prune_request("zap", Some("/gone")).is_err());
assert!(validate_prune_request("prune_missing", None).is_err());
assert!(validate_prune_request("prune_missing", Some("relative/path")).is_err());
assert!(validate_prune_request("prune_missing", Some("/tmp")).is_err());
assert!(
validate_prune_request("prune_missing", Some("/definitely/not/a/real/root/xyz"))
.is_ok()
);
}
#[test]
fn prune_dead_root_removes_rows_under_root_only() {
let cat = Catalog::open_in_memory().unwrap();
seed_artifact(&cat, "g1", "/gone/repo");
seed_artifact(&cat, "g2", "/gone/repo/a.md");
seed_artifact(&cat, "g3", "/gone/repo/docs/b.md");
seed_artifact(&cat, "sib", "/gone/repo-other/c.md");
seed_artifact(&cat, "keep", "/tmp/keep.md");
seed_commit(&cat, "deadc0de", "/gone/repo");
seed_commit(&cat, "livecdef", "/tmp");
let (arts, commits) =
prune_dead_root(&cat.conn, std::path::Path::new("/gone/repo")).unwrap();
assert_eq!(arts, 3, "the 3 rows at/under /gone/repo are removed");
assert_eq!(commits, 1, "the /gone/repo commit is removed");
let exists = |id: &str| -> i64 {
cat.conn
.query_row("SELECT COUNT(*) FROM artifact WHERE id = ?1", [id], |r| {
r.get(0)
})
.unwrap()
};
assert_eq!(
exists("sib"),
1,
"/gone/repo-other not matched by the prefix"
);
assert_eq!(exists("keep"), 1);
let n_com: i64 = cat
.conn
.query_row("SELECT COUNT(*) FROM commits", [], |r| r.get(0))
.unwrap();
assert_eq!(n_com, 1, "only the /tmp commit remains");
}
}