#![cfg_attr(
not(test),
expect(
dead_code,
reason = "pure git seam; consumers wired by capture (PHASE-02), record (PHASE-04), verify (PHASE-05)"
)
)]
use std::path::Path;
use std::process::Command;
use serde_json::{Number, Value};
use sha2::{Digest, Sha256};
pub(crate) const REMOTE_NORMALIZER: &str = "forget.remote.v1";
pub(crate) const CHECKOUT_NORMALIZER: &str = "forget.checkout.v1";
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) enum AnchorKind {
Commit,
CheckoutState,
None,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) enum RepoIdKind {
Explicit,
Remote,
LocalRoot,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) enum Confidence {
High,
Medium,
Low,
}
impl AnchorKind {
pub(crate) fn parse(s: &str) -> Result<Self, String> {
Ok(match s {
"commit" => Self::Commit,
"checkout_state" => Self::CheckoutState,
"none" => Self::None,
other => return Err(format!("unknown anchor_kind {other:?}")),
})
}
pub(crate) fn as_str(self) -> &'static str {
match self {
Self::Commit => "commit",
Self::CheckoutState => "checkout_state",
Self::None => "none",
}
}
}
impl RepoIdKind {
pub(crate) fn parse(s: &str) -> Result<Self, String> {
Ok(match s {
"explicit" => Self::Explicit,
"remote" => Self::Remote,
"local_root" => Self::LocalRoot,
other => return Err(format!("unknown repo_id_kind {other:?}")),
})
}
pub(crate) fn as_str(self) -> &'static str {
match self {
Self::Explicit => "explicit",
Self::Remote => "remote",
Self::LocalRoot => "local_root",
}
}
}
impl Confidence {
pub(crate) fn parse(s: &str) -> Result<Self, String> {
Ok(match s {
"high" => Self::High,
"medium" => Self::Medium,
"low" => Self::Low,
other => return Err(format!("unknown confidence {other:?}")),
})
}
pub(crate) fn as_str(self) -> &'static str {
match self {
Self::High => "high",
Self::Medium => "medium",
Self::Low => "low",
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub(crate) struct RepoIdentity {
pub repo_id: String,
pub kind: RepoIdKind,
pub confidence: Confidence,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub(crate) struct Frame {
pub anchor_kind: AnchorKind,
pub repo: RepoIdentity,
pub commit: String,
pub tree: String,
pub ref_name: String,
pub checkout_state_id: String,
pub base_commit: String,
}
const HEX: &[u8; 16] = b"0123456789abcdef";
#[derive(Debug, thiserror::Error)]
#[error("non-integer number in canonical payload: {0}")]
pub(crate) struct NonIntegerNumber(pub String);
pub(crate) fn canonical_bytes(value: &Value) -> Result<Vec<u8>, NonIntegerNumber> {
let mut out = Vec::new();
write_value(value, &mut out)?;
Ok(out)
}
fn write_value(value: &Value, out: &mut Vec<u8>) -> Result<(), NonIntegerNumber> {
match value {
Value::Null => out.extend_from_slice(b"null"),
Value::Bool(true) => out.extend_from_slice(b"true"),
Value::Bool(false) => out.extend_from_slice(b"false"),
Value::Number(n) => write_number(n, out)?,
Value::String(s) => write_string(s, out),
Value::Array(items) => {
out.push(b'[');
for (i, item) in items.iter().enumerate() {
if i > 0 {
out.push(b',');
}
write_value(item, out)?;
}
out.push(b']');
}
Value::Object(map) => {
let mut keys: Vec<&String> = map.keys().collect();
keys.sort_by(|a, b| a.as_bytes().cmp(b.as_bytes()));
out.push(b'{');
for (i, key) in keys.iter().enumerate() {
if i > 0 {
out.push(b',');
}
write_string(key, out);
out.push(b':');
if let Some(v) = map.get(key.as_str()) {
write_value(v, out)?;
}
}
out.push(b'}');
}
}
Ok(())
}
fn write_number(n: &Number, out: &mut Vec<u8>) -> Result<(), NonIntegerNumber> {
if let Some(i) = n.as_i64() {
out.extend_from_slice(i.to_string().as_bytes());
Ok(())
} else if let Some(u) = n.as_u64() {
out.extend_from_slice(u.to_string().as_bytes());
Ok(())
} else {
Err(NonIntegerNumber(n.to_string()))
}
}
fn write_string(s: &str, out: &mut Vec<u8>) {
out.push(b'"');
for c in s.chars() {
match c {
'"' => out.extend_from_slice(b"\\\""),
'\\' => out.extend_from_slice(b"\\\\"),
'\u{08}' => out.extend_from_slice(b"\\b"),
'\u{09}' => out.extend_from_slice(b"\\t"),
'\u{0A}' => out.extend_from_slice(b"\\n"),
'\u{0C}' => out.extend_from_slice(b"\\f"),
'\u{0D}' => out.extend_from_slice(b"\\r"),
c if u32::from(c) < 0x20 => write_control_escape(c, out),
c => {
let mut buf = [0_u8; 4];
out.extend_from_slice(c.encode_utf8(&mut buf).as_bytes());
}
}
}
out.push(b'"');
}
fn write_control_escape(c: char, out: &mut Vec<u8>) {
let code = u32::from(c);
let hi = usize::try_from((code >> 4) & 0xf).unwrap_or(0);
let lo = usize::try_from(code & 0xf).unwrap_or(0);
out.extend_from_slice(b"\\u00");
if let Some(&h) = HEX.get(hi) {
out.push(h);
}
if let Some(&l) = HEX.get(lo) {
out.push(l);
}
}
pub(crate) fn sha256(bytes: &[u8]) -> String {
let mut h = Sha256::new();
h.update(bytes);
hex::encode(h.finalize())
}
pub(crate) fn checkout_state_id(
index_tree: &str,
worktree_fingerprint: &str,
untracked_fingerprint: &str,
) -> String {
let value = serde_json::json!({
"normalizer": CHECKOUT_NORMALIZER,
"index_tree": index_tree,
"worktree_fingerprint": worktree_fingerprint,
"untracked_fingerprint": untracked_fingerprint,
});
sha256(&canonical_bytes(&value).unwrap_or_default())
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub(crate) struct NormalizedRemote {
pub host: String,
pub port: Option<u16>,
pub path: String,
pub repo_id: String,
}
#[derive(Debug, Clone, Copy)]
struct SchemeInfo {
default_port: u16,
drop_default: bool,
}
fn scheme_info(scheme: &str) -> Option<SchemeInfo> {
match scheme {
"ssh" => Some(SchemeInfo {
default_port: 22,
drop_default: true,
}),
"https" => Some(SchemeInfo {
default_port: 443,
drop_default: true,
}),
"http" => Some(SchemeInfo {
default_port: 80,
drop_default: true,
}),
"git" => Some(SchemeInfo {
default_port: 9418,
drop_default: false,
}),
_ => None,
}
}
fn clean_path(path: &str) -> String {
let trimmed = path.trim_matches('/');
let without_git = trimmed.strip_suffix(".git").unwrap_or(trimmed);
without_git.trim_end_matches('/').to_string()
}
fn host_and_port(hostport: &str, scheme: SchemeInfo) -> (String, Option<u16>) {
let (host, explicit_port) = match hostport.rsplit_once(':') {
Some((h, p)) => (h, p.parse::<u16>().ok()),
None => (hostport, None),
};
let port = explicit_port.unwrap_or(scheme.default_port);
let rendered = if scheme.drop_default && port == scheme.default_port {
None
} else {
Some(port)
};
(host.to_lowercase(), rendered)
}
pub(crate) fn normalize_remote_url(raw: &str) -> Option<NormalizedRemote> {
let raw = raw.trim();
let (host, port, path) = if let Some(idx) = raw.find("://") {
let scheme = scheme_info(raw.get(..idx)?)?;
let rest = raw.get(idx + 3..)?;
let after_user = rest.rsplit_once('@').map_or(rest, |(_, h)| h);
let (hostport, path) = after_user
.split_once('/')
.map_or((after_user, ""), |(h, p)| (h, p));
let (host, port) = host_and_port(hostport, scheme);
(host, port, clean_path(path))
} else if let Some((hostpart, path)) = raw.split_once(':') {
let host = hostpart.rsplit_once('@').map_or(hostpart, |(_, h)| h);
if host.is_empty() || path.is_empty() {
return None;
}
(host.to_lowercase(), None, clean_path(path))
} else {
return None;
};
if host.is_empty() || path.is_empty() {
return None;
}
let repo_id = match port {
Some(p) => format!("{host}:{p}/{path}"),
None => format!("{host}/{path}"),
};
Some(NormalizedRemote {
host,
port,
path,
repo_id,
})
}
const NORMATIVE_FLAGS: &[&str] = &[
"-c",
"core.autocrlf=false",
"-c",
"core.eol=lf",
"-c",
"core.fileMode=true",
];
const CONFIG_EXPLICIT_REPO_ID: &str = "doctrine.repo.id";
const CONFIG_PREFERRED_REMOTE: &str = "doctrine.repo.preferredremote";
#[derive(Debug, thiserror::Error)]
pub(crate) enum CaptureError {
#[error("unsupported: multi-root repository ({0} root commits)")]
MultiRoot(usize),
#[error("unsupported: submodule entry (gitlink mode 160000)")]
Submodule,
#[error("ambiguous remote selection: multiple remotes without origin: {0:?}")]
AmbiguousRemote(Vec<String>),
#[error("git command failed: {0}")]
Git(String),
#[error("io error during capture: {0}")]
Io(String),
}
fn run_git(root: &Path, args: &[&str]) -> Result<std::process::Output, CaptureError> {
Command::new("git")
.arg("-C")
.arg(root)
.args(NORMATIVE_FLAGS)
.args(args)
.output()
.map_err(|e| CaptureError::Git(format!("spawn git {}: {e}", args.join(" "))))
}
pub(crate) fn git_bytes(root: &Path, args: &[&str]) -> Result<Vec<u8>, CaptureError> {
let output = run_git(root, args)?;
if output.status.success() {
Ok(output.stdout)
} else {
Err(CaptureError::Git(format!(
"{}: {}",
args.join(" "),
String::from_utf8_lossy(&output.stderr).trim()
)))
}
}
pub(crate) fn git_text(root: &Path, args: &[&str]) -> Result<String, CaptureError> {
let bytes = git_bytes(root, args)?;
let text = String::from_utf8(bytes)
.map_err(|_ignored| CaptureError::Git(format!("non-utf8 output: {}", args.join(" "))))?;
Ok(text.trim().to_string())
}
fn git_opt(root: &Path, args: &[&str]) -> Result<Option<String>, CaptureError> {
let output = run_git(root, args)?;
if !output.status.success() {
return Ok(None);
}
let text = String::from_utf8(output.stdout)
.map_err(|_ignored| CaptureError::Git(format!("non-utf8 output: {}", args.join(" "))))?;
Ok(Some(text.trim().to_string()))
}
fn trunk_tree_ish(root: &Path) -> anyhow::Result<Option<String>> {
trunk_ladder(root, std::env::var_os("DOCTRINE_TRUNK_REF").as_deref())
}
fn trunk_ladder(root: &Path, explicit: Option<&std::ffi::OsStr>) -> anyhow::Result<Option<String>> {
let peel = |r: &str| -> anyhow::Result<Option<String>> {
let spec = format!("{r}^{{commit}}");
Ok(git_opt(root, &["rev-parse", "--verify", "--quiet", &spec])?)
};
if let Some(explicit) = explicit {
let explicit = explicit.to_string_lossy();
return match peel(&explicit)? {
Some(sha) => Ok(Some(sha)),
None => anyhow::bail!("DOCTRINE_TRUNK_REF={explicit} does not resolve to a commit"),
};
}
for candidate in ["origin/HEAD", "main", "master"] {
if let Some(sha) = peel(candidate)? {
return Ok(Some(sha));
}
}
Ok(None)
}
pub(crate) fn trunk_entity_ids(root: &Path, kind_dir: &str) -> anyhow::Result<Vec<u32>> {
let Some(tree_ish) = trunk_tree_ish(root)? else {
return Ok(Vec::new());
};
let pathspec = format!("{kind_dir}/");
let listing = git_opt(
root,
&["ls-tree", "-d", "--name-only", &tree_ish, "--", &pathspec],
)?;
let Some(listing) = listing else {
return Ok(Vec::new());
};
let ids = listing
.lines()
.filter_map(|line| line.rsplit('/').next())
.filter_map(|base| base.parse::<u32>().ok())
.collect();
Ok(ids)
}
pub(crate) fn capture(repo_root: &Path) -> Result<Frame, CaptureError> {
match git_opt(repo_root, &["rev-parse", "--is-inside-work-tree"])? {
Some(ref v) if v == "true" => {}
_ => return Ok(none_frame()),
}
let head_commit = git_opt(repo_root, &["rev-parse", "--verify", "HEAD^{commit}"])?;
let born = head_commit.is_some();
let repo = derive_repo_identity(repo_root, born)?;
let Some(commit) = head_commit else {
return Ok(Frame {
anchor_kind: AnchorKind::None,
repo,
commit: String::new(),
tree: String::new(),
ref_name: String::new(),
checkout_state_id: String::new(),
base_commit: String::new(),
});
};
let roots = git_text(repo_root, &["rev-list", "--max-parents=0", "HEAD"])?;
let root_count = roots.lines().filter(|l| !l.is_empty()).count();
if root_count > 1 {
return Err(CaptureError::MultiRoot(root_count));
}
let tree = git_text(repo_root, &["rev-parse", "HEAD^{tree}"])?;
let ref_name = git_opt(repo_root, &["symbolic-ref", "--quiet", "HEAD"])?.unwrap_or_default();
reject_submodules(repo_root)?;
let index_tree = git_text(repo_root, &["write-tree"])?;
let diff_bytes = git_bytes(
repo_root,
&["diff", "HEAD", "--binary", "--no-textconv", "--no-ext-diff"],
)?;
let untracked_fp = untracked_fingerprint(repo_root)?;
let dirty = index_tree != tree || !diff_bytes.is_empty() || untracked_fp.is_some();
if dirty {
let worktree_fp = sha256(&diff_bytes);
let untracked = untracked_fp.unwrap_or_else(|| sha256(b""));
Ok(Frame {
anchor_kind: AnchorKind::CheckoutState,
repo,
commit: String::new(), tree,
ref_name,
checkout_state_id: checkout_state_id(&index_tree, &worktree_fp, &untracked),
base_commit: commit, })
} else {
Ok(Frame {
anchor_kind: AnchorKind::Commit,
repo,
commit: commit.clone(),
tree,
ref_name,
checkout_state_id: String::new(),
base_commit: commit,
})
}
}
pub(crate) fn unanchored_frame() -> Frame {
none_frame()
}
fn none_frame() -> Frame {
Frame {
anchor_kind: AnchorKind::None,
repo: RepoIdentity {
repo_id: String::new(),
kind: RepoIdKind::LocalRoot,
confidence: Confidence::Low,
},
commit: String::new(),
tree: String::new(),
ref_name: String::new(),
checkout_state_id: String::new(),
base_commit: String::new(),
}
}
pub(crate) fn explicit_identity(raw: &str) -> RepoIdentity {
let raw = raw.trim();
let repo_id = normalize_remote_url(raw).map_or_else(|| raw.to_string(), |n| n.repo_id);
RepoIdentity {
repo_id,
kind: RepoIdKind::Explicit,
confidence: Confidence::High,
}
}
fn derive_repo_identity(root: &Path, born: bool) -> Result<RepoIdentity, CaptureError> {
let root_commit = if born {
git_text(root, &["rev-list", "--max-parents=0", "HEAD"])?
.lines()
.next()
.map(str::to_string)
} else {
None
};
if let Some(explicit) = git_opt(root, &["config", "--get", CONFIG_EXPLICIT_REPO_ID])?
&& !explicit.is_empty()
{
return Ok(explicit_identity(&explicit));
}
let remotes = list_remotes(root)?;
if let Some(selected) = select_remote(root, &remotes)? {
let raw = git_text(root, &["remote", "get-url", &selected])?;
if let Some(normalized) = normalize_remote_url(&raw) {
return Ok(RepoIdentity {
repo_id: normalized.repo_id,
kind: RepoIdKind::Remote,
confidence: Confidence::High,
});
}
}
let repo_id = match &root_commit {
Some(sha) => format!("repo:git-root:{sha}"),
None => "repo:git-root:unborn".to_string(),
};
Ok(RepoIdentity {
repo_id,
kind: RepoIdKind::LocalRoot,
confidence: if born {
Confidence::Medium
} else {
Confidence::Low
},
})
}
fn list_remotes(root: &Path) -> Result<Vec<String>, CaptureError> {
let mut remotes: Vec<String> = git_text(root, &["remote"])?
.lines()
.map(str::trim)
.filter(|l| !l.is_empty())
.map(str::to_string)
.collect();
remotes.sort();
Ok(remotes)
}
fn select_remote(root: &Path, remotes: &[String]) -> Result<Option<String>, CaptureError> {
if remotes.is_empty() {
return Ok(None);
}
if let Some(preferred) = git_opt(root, &["config", "--get", CONFIG_PREFERRED_REMOTE])?
&& remotes.contains(&preferred)
{
return Ok(Some(preferred));
}
if remotes.iter().any(|r| r == "origin") {
return Ok(Some("origin".to_string()));
}
match remotes.len() {
1 => Ok(remotes.first().cloned()),
_ => Err(CaptureError::AmbiguousRemote(remotes.to_vec())),
}
}
fn reject_submodules(root: &Path) -> Result<(), CaptureError> {
let staged = git_text(root, &["ls-files", "--stage"])?;
for line in staged.lines() {
if let Some("160000") = line.split_whitespace().next() {
return Err(CaptureError::Submodule);
}
}
Ok(())
}
fn untracked_fingerprint(root: &Path) -> Result<Option<String>, CaptureError> {
let raw = git_bytes(root, &["ls-files", "--others", "--exclude-standard", "-z"])?;
let mut paths: Vec<&[u8]> = raw.split(|b| *b == 0).filter(|p| !p.is_empty()).collect();
if paths.is_empty() {
return Ok(None);
}
paths.sort_unstable();
let mut acc: Vec<u8> = Vec::new();
for path in paths {
let path_str = std::str::from_utf8(path)
.map_err(|_ignored| CaptureError::Git("non-utf8 untracked path".to_string()))?;
let full = root.join(path_str);
let is_symlink =
std::fs::symlink_metadata(&full).is_ok_and(|meta| meta.file_type().is_symlink());
let hash = if is_symlink {
symlink_target_hash(&full)?
} else {
git_text(root, &["hash-object", "--", path_str])?
};
acc.extend_from_slice(path);
acc.push(0);
acc.extend_from_slice(hash.as_bytes());
acc.push(b'\n');
}
Ok(Some(sha256(&acc)))
}
#[cfg(unix)]
fn symlink_target_hash(full: &Path) -> Result<String, CaptureError> {
use std::os::unix::ffi::OsStrExt;
let target = std::fs::read_link(full)
.map_err(|e| CaptureError::Io(format!("readlink {}: {e}", full.display())))?;
Ok(sha256(target.as_os_str().as_bytes()))
}
#[cfg(not(unix))]
fn symlink_target_hash(full: &Path) -> Result<String, CaptureError> {
let target = std::fs::read_link(full)
.map_err(|e| CaptureError::Io(format!("readlink {}: {e}", full.display())))?;
Ok(sha256(target.to_string_lossy().as_bytes()))
}
pub(crate) fn commits_touching(
root: &Path,
paths: &[String],
since: &str,
target: &str,
) -> Option<u32> {
if paths.is_empty() || since.is_empty() || target.is_empty() {
return None;
}
let ancestry = run_git(root, &["merge-base", "--is-ancestor", since, target]).ok()?;
if !ancestry.status.success() {
return None;
}
let range = format!("{since}..{target}");
let mut args = vec!["rev-list", "--count", &range, "--"];
args.extend(paths.iter().map(String::as_str));
git_opt(root, &args).ok().flatten()?.parse::<u32>().ok()
}
#[cfg_attr(
not(test),
expect(
dead_code,
reason = "SL-042 P3 reconcile-reader seam: head_sha feeds coverage_scan's \
staleness resolution; no bins/lib consumer until the CLI reader \
slice wires it"
)
)]
pub(crate) fn head_sha(root: &Path) -> Option<String> {
git_opt(root, &["rev-parse", "--verify", "HEAD^{commit}"])
.ok()
.flatten()
}
#[cfg(test)]
mod tests {
use serde_json::{Value, json};
use std::path::{Path, PathBuf};
use std::process::Command;
use super::{
AnchorKind, CHECKOUT_NORMALIZER, CaptureError, Confidence, Frame, REMOTE_NORMALIZER,
RepoIdKind, RepoIdentity, canonical_bytes, capture, checkout_state_id, commits_touching,
explicit_identity, normalize_remote_url, sha256,
};
fn canon(v: &Value) -> String {
let bytes = canonical_bytes(v).unwrap_or_default();
String::from_utf8(bytes).unwrap_or_default()
}
#[test]
fn anchor_kind_token_round_trips() {
for k in [
AnchorKind::Commit,
AnchorKind::CheckoutState,
AnchorKind::None,
] {
assert_eq!(AnchorKind::parse(k.as_str()).unwrap(), k);
}
assert_eq!(
AnchorKind::as_str(AnchorKind::CheckoutState),
"checkout_state"
);
assert!(AnchorKind::parse("bogus").is_err());
}
#[test]
fn repo_id_kind_token_round_trips() {
for k in [
RepoIdKind::Explicit,
RepoIdKind::Remote,
RepoIdKind::LocalRoot,
] {
assert_eq!(RepoIdKind::parse(k.as_str()).unwrap(), k);
}
assert_eq!(RepoIdKind::as_str(RepoIdKind::LocalRoot), "local_root");
assert!(RepoIdKind::parse("bogus").is_err());
}
#[test]
fn confidence_token_round_trips() {
for c in [Confidence::High, Confidence::Medium, Confidence::Low] {
assert_eq!(Confidence::parse(c.as_str()).unwrap(), c);
}
assert!(Confidence::parse("bogus").is_err());
}
fn sample_frame() -> Frame {
Frame {
anchor_kind: AnchorKind::Commit,
repo: RepoIdentity {
repo_id: "github.com/org/repo".to_string(),
kind: RepoIdKind::Remote,
confidence: Confidence::High,
},
commit: "abc123".to_string(),
tree: "tree123".to_string(),
ref_name: "refs/heads/main".to_string(),
checkout_state_id: String::new(),
base_commit: "abc123".to_string(),
}
}
#[test]
fn frame_carries_anchor_and_identity() {
let f = sample_frame();
assert_eq!(f.anchor_kind, AnchorKind::Commit);
assert_eq!(f.repo.repo_id, "github.com/org/repo");
assert_eq!(f.repo.kind, RepoIdKind::Remote);
assert_eq!(f.repo.confidence, Confidence::High);
assert_eq!(f.commit, f.base_commit);
assert_eq!(f.tree, "tree123");
assert_eq!(f.ref_name, "refs/heads/main");
assert!(f.checkout_state_id.is_empty());
}
#[test]
fn frame_variants_are_distinct() {
assert_ne!(AnchorKind::Commit, AnchorKind::CheckoutState);
assert_ne!(AnchorKind::CheckoutState, AnchorKind::None);
assert_ne!(RepoIdKind::Explicit, RepoIdKind::LocalRoot);
assert_ne!(Confidence::Medium, Confidence::Low);
}
#[test]
fn canonical_primitives_encode_to_literals() {
assert_eq!(canon(&json!(null)), "null");
assert_eq!(canon(&json!(true)), "true");
assert_eq!(canon(&json!(false)), "false");
assert_eq!(canon(&json!(0)), "0");
assert_eq!(canon(&json!(-1)), "-1");
assert_eq!(canon(&json!(42)), "42");
}
#[test]
fn canonical_sorts_object_keys_bytewise_and_keeps_array_order() {
assert_eq!(canon(&json!({})), "{}");
assert_eq!(canon(&json!([])), "[]");
assert_eq!(canon(&json!({ "b": 1, "a": 2 })), "{\"a\":2,\"b\":1}");
assert_eq!(canon(&json!([3, 1, 2])), "[3,1,2]");
let nested = json!({ "z": [1, { "b": 2, "a": 3 }], "a": null });
assert_eq!(canon(&nested), "{\"a\":null,\"z\":[1,{\"a\":3,\"b\":2}]}");
}
#[test]
fn canonical_escapes_only_the_minimal_set() {
assert_eq!(canon(&json!("a\"b\\c")), "\"a\\\"b\\\\c\"");
assert_eq!(canon(&json!("\n\t")), "\"\\n\\t\"");
assert_eq!(canon(&Value::String("\u{01}".to_owned())), "\"\\u0001\"");
assert_eq!(canon(&json!("é→")), "\"é→\"");
}
#[test]
fn canonical_rejects_floats_and_exponent_forms() {
assert!(canonical_bytes(&json!(1.5)).is_err(), "fractional rejected");
let exp: Value = serde_json::from_str("1e3").unwrap_or(Value::Null);
assert!(canonical_bytes(&exp).is_err(), "exponent form rejected");
let dot_zero: Value = serde_json::from_str("1.0").unwrap_or(Value::Null);
assert!(
canonical_bytes(&dot_zero).is_err(),
"1.0 float-form rejected"
);
}
#[test]
fn sha256_is_lowercase_hex_of_known_vector() {
assert_eq!(
sha256(b""),
"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
);
}
#[test]
fn checkout_state_id_is_deterministic() {
assert_eq!(
checkout_state_id("tree", "wf", "uf"),
checkout_state_id("tree", "wf", "uf")
);
}
#[test]
fn checkout_state_id_changes_with_each_input() {
let base = checkout_state_id("tree", "wf", "uf");
assert_ne!(base, checkout_state_id("tree2", "wf", "uf"));
assert_ne!(base, checkout_state_id("tree", "wf2", "uf"));
assert_ne!(base, checkout_state_id("tree", "wf", "uf2"));
}
#[test]
fn checkout_state_id_binds_the_normalizer_tag() {
let value = json!({
"normalizer": CHECKOUT_NORMALIZER,
"index_tree": "t",
"worktree_fingerprint": "w",
"untracked_fingerprint": "u",
});
let expected = sha256(&canonical_bytes(&value).unwrap_or_default());
assert_eq!(checkout_state_id("t", "w", "u"), expected);
}
#[test]
fn normalize_remote_url_table() {
let cases = [
("https://github.com/org/repo.git", "github.com/org/repo"),
("https://github.com/org/repo", "github.com/org/repo"),
("git@github.com:org/repo.git", "github.com/org/repo"),
("ssh://git@github.com/org/repo.git", "github.com/org/repo"),
("ssh://git@github.com:22/org/repo", "github.com/org/repo"),
("https://github.com:443/org/repo", "github.com/org/repo"),
(
"ssh://git@git.example.com:2222/org/repo",
"git.example.com:2222/org/repo",
),
(
"https://git.example.com:8443/org/repo",
"git.example.com:8443/org/repo",
),
(
"git://git.example.com/org/repo",
"git.example.com:9418/org/repo",
),
("https://GitHub.com/Org/Repo.git", "github.com/Org/Repo"),
(
"https://user:token@github.com/org/repo",
"github.com/org/repo",
),
("https://github.com/org/repo/", "github.com/org/repo"),
];
for (raw, expected) in cases {
assert_eq!(
normalize_remote_url(raw).map(|g| g.repo_id).as_deref(),
Some(expected),
"input: {raw}"
);
}
}
#[test]
fn normalize_remote_url_rejects_garbage() {
assert!(normalize_remote_url("not a url").is_none());
assert!(normalize_remote_url("").is_none());
assert!(normalize_remote_url("https://").is_none());
}
#[test]
fn normalize_remote_url_exposes_components() {
let n = normalize_remote_url("ssh://git@git.example.com:2222/Org/Repo.git");
assert!(n.is_some(), "should normalize");
if let Some(n) = n {
assert_eq!(n.host, "git.example.com");
assert_eq!(n.port, Some(2222));
assert_eq!(n.path, "Org/Repo");
assert_eq!(n.repo_id, "git.example.com:2222/Org/Repo");
}
}
#[test]
fn remote_normalizer_tag_is_frozen() {
assert_eq!(REMOTE_NORMALIZER, "forget.remote.v1");
}
const FIXED_DATE: &str = "2026-01-01T00:00:00 +0000";
struct ScratchRepo {
_dir: tempfile::TempDir,
path: PathBuf,
}
impl ScratchRepo {
fn new() -> Self {
let dir = tempfile::tempdir().expect("tempdir");
let path = dir.path().to_path_buf();
let repo = Self { _dir: dir, path };
repo.git(&["init", "-b", "main"]);
repo.git(&["config", "user.name", "Doctrine Test"]);
repo.git(&["config", "user.email", "test@doctrine.invalid"]);
repo
}
fn path(&self) -> &Path {
&self.path
}
fn git(&self, args: &[&str]) -> String {
let output = Command::new("git")
.arg("-C")
.arg(&self.path)
.args(args)
.env("GIT_AUTHOR_DATE", FIXED_DATE)
.env("GIT_COMMITTER_DATE", FIXED_DATE)
.output()
.expect("spawn git");
assert!(
output.status.success(),
"git {args:?} failed: {}",
String::from_utf8_lossy(&output.stderr).trim()
);
String::from_utf8_lossy(&output.stdout).trim().to_string()
}
fn write(&self, rel: &str, contents: &str) {
let full = self.path.join(rel);
if let Some(parent) = full.parent() {
std::fs::create_dir_all(parent).expect("create parent");
}
std::fs::write(&full, contents).expect("write file");
}
fn commit(&self, rel: &str, contents: &str, message: &str) -> String {
self.write(rel, contents);
self.git(&["add", rel]);
self.git(&["commit", "-m", message]);
self.git(&["rev-parse", "HEAD"])
}
}
#[test]
fn clean_checkout_anchors_to_head_commit() {
let repo = ScratchRepo::new();
let head = repo.commit("a.txt", "hello", "init");
let tree = repo.git(&["rev-parse", "HEAD^{tree}"]);
let frame = capture(repo.path()).expect("capture clean");
assert_eq!(frame.anchor_kind, AnchorKind::Commit);
assert_eq!(frame.commit, head);
assert_eq!(frame.base_commit, head);
assert_eq!(frame.tree, tree);
assert_eq!(frame.ref_name, "refs/heads/main");
assert!(
frame.checkout_state_id.is_empty(),
"clean tree carries no checkout_state_id"
);
}
#[test]
fn dirty_tracked_change_anchors_to_checkout_state() {
let repo = ScratchRepo::new();
let head = repo.commit("a.txt", "hello", "init");
repo.write("a.txt", "hello world");
let frame = capture(repo.path()).expect("capture dirty");
assert_eq!(frame.anchor_kind, AnchorKind::CheckoutState);
assert!(frame.commit.is_empty(), "commit empty iff dirty");
assert!(!frame.checkout_state_id.is_empty());
assert_eq!(
frame.base_commit, head,
"base_commit carries HEAD when dirty"
);
}
#[test]
fn untracked_only_is_dirty() {
let repo = ScratchRepo::new();
repo.commit("a.txt", "hello", "init");
repo.write("untracked.txt", "new");
let frame = capture(repo.path()).expect("capture untracked");
assert_eq!(frame.anchor_kind, AnchorKind::CheckoutState);
assert!(frame.commit.is_empty());
assert!(!frame.checkout_state_id.is_empty());
}
#[test]
fn detached_head_is_anchored_with_empty_ref() {
let repo = ScratchRepo::new();
let first = repo.commit("a.txt", "1", "first");
repo.commit("b.txt", "2", "second");
repo.git(&["checkout", &first]);
let frame = capture(repo.path()).expect("capture detached");
assert_eq!(frame.anchor_kind, AnchorKind::Commit, "still anchored");
assert_eq!(frame.commit, first);
assert!(
frame.ref_name.is_empty(),
"detached HEAD has empty ref_name"
);
}
#[test]
fn unborn_repo_is_none_anchor() {
let repo = ScratchRepo::new(); let frame = capture(repo.path()).expect("capture unborn");
assert_eq!(frame.anchor_kind, AnchorKind::None);
assert!(frame.commit.is_empty());
assert!(frame.base_commit.is_empty());
}
#[test]
fn non_repo_is_none_anchor_not_error() {
let dir = tempfile::tempdir().expect("tempdir"); let frame = capture(dir.path()).expect("non-repo must not error");
assert_eq!(frame.anchor_kind, AnchorKind::None);
assert_eq!(frame.repo.repo_id, "");
}
#[test]
fn recapture_of_unchanged_dirty_tree_is_stable() {
let repo = ScratchRepo::new();
repo.commit("a.txt", "hello", "init");
repo.write("a.txt", "changed");
let a = capture(repo.path()).expect("capture a");
let b = capture(repo.path()).expect("capture b");
assert_eq!(a.checkout_state_id, b.checkout_state_id);
}
#[test]
fn editing_worktree_changes_checkout_state_id() {
let repo = ScratchRepo::new();
repo.commit("a.txt", "hello", "init");
repo.write("a.txt", "first edit");
let first = capture(repo.path()).expect("capture first");
repo.write("a.txt", "second edit");
let second = capture(repo.path()).expect("capture second");
assert_ne!(first.checkout_state_id, second.checkout_state_id);
}
#[test]
fn origin_remote_drives_high_confidence_repo_id() {
let repo = ScratchRepo::new();
repo.commit("a.txt", "hello", "init");
repo.git(&["remote", "add", "origin", "https://github.com/org/repo.git"]);
let frame = capture(repo.path()).expect("capture remote");
assert_eq!(frame.repo.kind, RepoIdKind::Remote);
assert_eq!(frame.repo.confidence, Confidence::High);
assert_eq!(frame.repo.repo_id, "github.com/org/repo");
}
#[test]
fn two_remotes_without_origin_are_ambiguous() {
let repo = ScratchRepo::new();
repo.commit("a.txt", "hello", "init");
repo.git(&["remote", "add", "alpha", "https://github.com/org/alpha.git"]);
repo.git(&["remote", "add", "beta", "https://github.com/org/beta.git"]);
let result = capture(repo.path());
assert!(
matches!(result, Err(CaptureError::AmbiguousRemote(_))),
"got {result:?}"
);
}
#[test]
fn no_remote_falls_back_to_local_root_medium() {
let repo = ScratchRepo::new();
repo.commit("a.txt", "hello", "init");
let root = repo.git(&["rev-list", "--max-parents=0", "HEAD"]);
let frame = capture(repo.path()).expect("capture local-root");
assert_eq!(frame.repo.kind, RepoIdKind::LocalRoot);
assert_eq!(frame.repo.confidence, Confidence::Medium);
assert_eq!(frame.repo.repo_id, format!("repo:git-root:{root}"));
}
#[test]
fn explicit_config_repo_id_wins_over_remote() {
let repo = ScratchRepo::new();
repo.commit("a.txt", "hello", "init");
repo.git(&["remote", "add", "origin", "https://github.com/org/repo.git"]);
repo.git(&["config", "doctrine.repo.id", "custom/identity"]);
let frame = capture(repo.path()).expect("capture explicit");
assert_eq!(frame.repo.kind, RepoIdKind::Explicit);
assert_eq!(frame.repo.confidence, Confidence::High);
assert_eq!(frame.repo.repo_id, "custom/identity");
}
#[test]
fn preferred_remote_config_overrides_origin() {
let repo = ScratchRepo::new();
repo.commit("a.txt", "hello", "init");
repo.git(&[
"remote",
"add",
"origin",
"https://github.com/org/origin.git",
]);
repo.git(&["remote", "add", "fork", "https://github.com/me/fork.git"]);
repo.git(&["config", "doctrine.repo.preferredremote", "fork"]);
let frame = capture(repo.path()).expect("capture preferred");
assert_eq!(frame.repo.repo_id, "github.com/me/fork");
}
#[test]
fn explicit_identity_strips_userinfo_from_credentialed_repo() {
let id = explicit_identity("https://user:token@github.com/org/repo.git");
assert_eq!(id.kind, RepoIdKind::Explicit);
assert_eq!(id.confidence, Confidence::High);
assert_eq!(id.repo_id, "github.com/org/repo", "userinfo dropped");
}
#[test]
fn explicit_identity_keeps_non_url_value_verbatim() {
let id = explicit_identity("org/project");
assert_eq!(id.repo_id, "org/project");
assert_eq!(id.kind, RepoIdKind::Explicit);
}
#[test]
fn submodule_gitlink_entry_is_rejected() {
let repo = ScratchRepo::new();
let head = repo.commit("a.txt", "hello", "init");
repo.git(&[
"update-index",
"--add",
"--cacheinfo",
&format!("160000,{head},sub"),
]);
let result = capture(repo.path());
assert!(
matches!(result, Err(CaptureError::Submodule)),
"got {result:?}"
);
}
#[cfg(unix)]
#[test]
fn symlink_repo_captures_clean() {
let repo = ScratchRepo::new();
repo.commit("a.txt", "hello", "init");
std::os::unix::fs::symlink("a.txt", repo.path().join("link")).expect("symlink");
repo.git(&["add", "link"]);
repo.git(&["commit", "-m", "add symlink"]);
let frame = capture(repo.path()).expect("capture symlink repo");
assert_eq!(
frame.anchor_kind,
AnchorKind::Commit,
"clean symlink tree anchors on its commit"
);
assert!(frame.checkout_state_id.is_empty());
}
#[cfg(unix)]
#[test]
fn tracked_symlink_repoint_is_dirty() {
let repo = ScratchRepo::new();
repo.commit("a.txt", "hello", "init");
std::os::unix::fs::symlink("a.txt", repo.path().join("link")).expect("symlink");
repo.git(&["add", "link"]);
repo.git(&["commit", "-m", "add symlink"]);
let link = repo.path().join("link");
std::fs::remove_file(&link).expect("rm link");
std::os::unix::fs::symlink("a.txt.other", &link).expect("re-symlink");
let a = capture(repo.path()).expect("capture repointed");
let b = capture(repo.path()).expect("recapture");
assert_eq!(
a.anchor_kind,
AnchorKind::CheckoutState,
"a changed tracked symlink makes the tree dirty"
);
assert!(
!a.checkout_state_id.is_empty(),
"the dirty tracked symlink carries a checkout_state_id"
);
assert_eq!(a, b, "tracked-symlink-repoint capture is deterministic");
}
#[cfg(unix)]
#[test]
fn untracked_symlink_ignores_pointee_content() {
let repo = ScratchRepo::new();
repo.commit("a.txt", "hello", "init");
let ext = tempfile::tempdir().expect("ext tempdir");
let pointee = ext.path().join("pointee");
std::fs::write(&pointee, "original").expect("write pointee");
std::os::unix::fs::symlink(&pointee, repo.path().join("link")).expect("symlink");
let csid1 = capture(repo.path()).expect("capture 1").checkout_state_id;
std::fs::write(&pointee, "mutated content, a different length entirely")
.expect("rewrite pointee");
let csid2 = capture(repo.path()).expect("capture 2").checkout_state_id;
assert!(!csid1.is_empty(), "untracked symlink makes the tree dirty");
assert_eq!(
csid1, csid2,
"csid must be invariant to symlink target *content* (no-follow)"
);
}
#[cfg(unix)]
#[test]
fn untracked_symlink_tracks_target_path() {
let repo = ScratchRepo::new();
repo.commit("a.txt", "hello", "init");
let link = repo.path().join("link");
std::os::unix::fs::symlink("first", &link).expect("symlink first");
let csid1 = capture(repo.path())
.expect("capture first")
.checkout_state_id;
std::fs::remove_file(&link).expect("rm link");
std::os::unix::fs::symlink("second", &link).expect("symlink second");
let csid2 = capture(repo.path())
.expect("capture second")
.checkout_state_id;
assert_ne!(
csid1, csid2,
"repointing the symlink must change the csid (link text captured)"
);
}
#[cfg(unix)]
#[test]
fn dangling_untracked_symlink_ok() {
let repo = ScratchRepo::new();
repo.commit("a.txt", "hello", "init");
std::os::unix::fs::symlink("does/not/exist", repo.path().join("link")).expect("symlink");
let a = capture(repo.path()).expect("capture dangling symlink");
let b = capture(repo.path()).expect("recapture");
assert_eq!(
a.anchor_kind,
AnchorKind::CheckoutState,
"untracked symlink makes the tree dirty"
);
assert_eq!(a, b, "dangling-symlink capture is deterministic");
}
#[cfg(unix)]
#[test]
fn untracked_symlink_non_utf8_target_bytes() {
use std::os::unix::ffi::OsStrExt;
let repo = ScratchRepo::new();
repo.commit("a.txt", "hello", "init");
let target = std::ffi::OsStr::from_bytes(&[0xFF, 0xFE]);
std::os::unix::fs::symlink(target, repo.path().join("link")).expect("symlink");
let a = capture(repo.path()).expect("capture non-utf8 symlink target");
let b = capture(repo.path()).expect("recapture");
assert_eq!(a.anchor_kind, AnchorKind::CheckoutState);
assert_eq!(a, b, "non-utf8 symlink target capture is deterministic");
}
#[cfg(unix)]
#[test]
fn untracked_newline_in_name_is_deterministic() {
use std::os::unix::ffi::OsStrExt;
let repo = ScratchRepo::new();
repo.commit("a.txt", "hello", "init");
let name = std::ffi::OsStr::from_bytes(b"wei\nrd.txt");
std::fs::write(repo.path().join(name), "contents").expect("write newline file");
let a = capture(repo.path()).expect("capture newline-name file");
let b = capture(repo.path()).expect("recapture");
assert_eq!(
a.anchor_kind,
AnchorKind::CheckoutState,
"an untracked newline-named file makes the tree dirty"
);
assert!(!a.checkout_state_id.is_empty());
assert_eq!(a, b, "newline-in-name capture is deterministic");
}
#[test]
fn multi_root_repository_is_rejected() {
let repo = ScratchRepo::new();
repo.commit("a.txt", "hello", "init");
repo.git(&["checkout", "--orphan", "other"]);
let _ = Command::new("git")
.arg("-C")
.arg(repo.path())
.args(["rm", "-rf", "."])
.output();
repo.commit("b.txt", "world", "second root");
repo.git(&["checkout", "main"]);
repo.git(&[
"merge",
"other",
"--allow-unrelated-histories",
"-m",
"merge roots",
]);
let result = capture(repo.path());
assert!(
matches!(result, Err(CaptureError::MultiRoot(2))),
"got {result:?}"
);
}
#[test]
fn conformance_golden_vector() {
let repo = ScratchRepo::new();
repo.commit("a.txt", "hello", "init");
repo.git(&["remote", "add", "origin", "https://github.com/org/repo.git"]);
repo.write("untracked.txt", "world");
let frame = capture(repo.path()).expect("capture golden");
assert_eq!(frame.repo.repo_id, "github.com/org/repo");
assert_eq!(frame.repo.kind, RepoIdKind::Remote);
assert_eq!(frame.anchor_kind, AnchorKind::CheckoutState);
assert_eq!(
frame.checkout_state_id,
"88d9489028e302700c2e6430e6df1d06539dccfd283d2ed99995258482ccf86c",
"conformance golden checkout_state_id"
);
}
fn p(s: &str) -> Vec<String> {
vec![s.to_string()]
}
#[test]
fn empty_paths_returns_none_without_spawning() {
let dir = tempfile::tempdir().expect("tempdir");
assert_eq!(
commits_touching(dir.path(), &[], "deadbeef", "cafebabe"),
None
);
}
#[test]
fn empty_endpoints_return_none() {
let repo = ScratchRepo::new();
let head = repo.commit("a.txt", "x", "init");
assert_eq!(commits_touching(repo.path(), &p("a.txt"), "", &head), None);
assert_eq!(commits_touching(repo.path(), &p("a.txt"), &head, ""), None);
}
#[test]
fn no_commits_since_anchor_is_zero() {
let repo = ScratchRepo::new();
let head = repo.commit("a.txt", "x", "init");
assert_eq!(
commits_touching(repo.path(), &p("a.txt"), &head, &head),
Some(0)
);
}
#[test]
fn counts_commits_touching_scoped_path() {
let repo = ScratchRepo::new();
let base = repo.commit("a.txt", "1", "init");
repo.commit("a.txt", "2", "edit");
let tip = repo.commit("a.txt", "3", "edit again");
assert_eq!(
commits_touching(repo.path(), &p("a.txt"), &base, &tip),
Some(2)
);
}
#[test]
fn pathspec_narrows_out_other_paths() {
let repo = ScratchRepo::new();
let base = repo.commit("a.txt", "1", "init");
let tip = repo.commit("b.txt", "1", "unrelated");
assert_eq!(
commits_touching(repo.path(), &p("a.txt"), &base, &tip),
Some(0)
);
}
#[test]
fn non_ancestor_since_returns_none_not_overcount() {
let repo = ScratchRepo::new();
let older = repo.commit("a.txt", "1", "init");
let newer = repo.commit("a.txt", "2", "edit");
assert_eq!(
commits_touching(repo.path(), &p("a.txt"), &newer, &older),
None
);
}
#[test]
fn missing_object_returns_none() {
let repo = ScratchRepo::new();
let head = repo.commit("a.txt", "1", "init");
let bogus = "0000000000000000000000000000000000000000";
assert_eq!(
commits_touching(repo.path(), &p("a.txt"), bogus, &head),
None
);
assert_eq!(
commits_touching(repo.path(), &p("a.txt"), &head, bogus),
None
);
}
#[test]
fn detached_head_with_frozen_target_still_counts() {
let repo = ScratchRepo::new();
let base = repo.commit("a.txt", "1", "init");
let tip = repo.commit("a.txt", "2", "edit");
repo.git(&["checkout", &base]); assert_eq!(
commits_touching(repo.path(), &p("a.txt"), &base, &tip),
Some(1)
);
}
use std::ffi::OsStr;
fn commit_slice_dirs(repo: &ScratchRepo, ids: &[u32]) {
for id in ids {
repo.write(&format!(".doctrine/slice/{id:03}/slice.toml"), "x = 1\n");
}
repo.write(".doctrine/slice/scratch-notes/n.md", "ignore me\n");
repo.git(&["add", "-A"]);
repo.git(&["commit", "-m", "seed slices"]);
}
#[test]
fn trunk_entity_ids_reads_committed_numeric_dirs() {
let repo = ScratchRepo::new();
commit_slice_dirs(&repo, &[1, 2, 4]);
let mut ids = super::trunk_entity_ids(repo.path(), ".doctrine/slice").unwrap();
ids.sort_unstable();
assert_eq!(ids, vec![1, 2, 4]);
}
#[test]
fn trunk_entity_ids_does_not_reprepend_doctrine() {
let repo = ScratchRepo::new();
commit_slice_dirs(&repo, &[7]);
let ids = super::trunk_entity_ids(repo.path(), ".doctrine/slice").unwrap();
assert_eq!(ids, vec![7], "prefixed kind_dir must not be re-prepended");
}
#[test]
fn trunk_entity_ids_empty_without_trunk() {
let repo = ScratchRepo::new(); assert_eq!(super::trunk_tree_ish(repo.path()).unwrap(), None);
assert_eq!(
super::trunk_entity_ids(repo.path(), ".doctrine/slice").unwrap(),
Vec::<u32>::new()
);
}
#[test]
fn trunk_ladder_explicit_unpeelable_ref_is_hard_error() {
let repo = ScratchRepo::new();
repo.commit("a.txt", "hello", "init"); let bad = OsStr::new("refs/heads/does-not-exist");
let err = super::trunk_ladder(repo.path(), Some(bad)).unwrap_err();
assert!(
err.to_string().contains("DOCTRINE_TRUNK_REF"),
"error names the offending override: {err}"
);
}
#[test]
fn trunk_ladder_explicit_valid_ref_wins() {
let repo = ScratchRepo::new();
let head = repo.commit("a.txt", "hello", "init");
let sha = super::trunk_ladder(repo.path(), Some(OsStr::new("main"))).unwrap();
assert_eq!(sha, Some(head));
}
}