#![cfg_attr(
not(test),
expect(
dead_code,
reason = "pure git seam; consumers wired by capture (PHASE-02), record (PHASE-04), verify (PHASE-05)"
)
)]
use std::path::Path;
use std::process::Command;
use serde_json::{Number, Value};
use sha2::{Digest, Sha256};
pub(crate) const REMOTE_NORMALIZER: &str = "forget.remote.v1";
pub(crate) const CHECKOUT_NORMALIZER: &str = "forget.checkout.v1";
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) enum AnchorKind {
Commit,
CheckoutState,
None,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) enum RepoIdKind {
Explicit,
Remote,
LocalRoot,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) enum Confidence {
High,
Medium,
Low,
}
impl AnchorKind {
pub(crate) fn parse(s: &str) -> Result<Self, String> {
Ok(match s {
"commit" => Self::Commit,
"checkout_state" => Self::CheckoutState,
"none" => Self::None,
other => return Err(format!("unknown anchor_kind {other:?}")),
})
}
pub(crate) fn as_str(self) -> &'static str {
match self {
Self::Commit => "commit",
Self::CheckoutState => "checkout_state",
Self::None => "none",
}
}
}
impl RepoIdKind {
pub(crate) fn parse(s: &str) -> Result<Self, String> {
Ok(match s {
"explicit" => Self::Explicit,
"remote" => Self::Remote,
"local_root" => Self::LocalRoot,
other => return Err(format!("unknown repo_id_kind {other:?}")),
})
}
pub(crate) fn as_str(self) -> &'static str {
match self {
Self::Explicit => "explicit",
Self::Remote => "remote",
Self::LocalRoot => "local_root",
}
}
}
impl Confidence {
pub(crate) fn parse(s: &str) -> Result<Self, String> {
Ok(match s {
"high" => Self::High,
"medium" => Self::Medium,
"low" => Self::Low,
other => return Err(format!("unknown confidence {other:?}")),
})
}
pub(crate) fn as_str(self) -> &'static str {
match self {
Self::High => "high",
Self::Medium => "medium",
Self::Low => "low",
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub(crate) struct RepoIdentity {
pub repo_id: String,
pub kind: RepoIdKind,
pub confidence: Confidence,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub(crate) struct Frame {
pub anchor_kind: AnchorKind,
pub repo: RepoIdentity,
pub commit: String,
pub tree: String,
pub ref_name: String,
pub checkout_state_id: String,
pub base_commit: String,
}
const HEX: &[u8; 16] = b"0123456789abcdef";
#[derive(Debug, thiserror::Error)]
#[error("non-integer number in canonical payload: {0}")]
pub(crate) struct NonIntegerNumber(pub String);
pub(crate) fn canonical_bytes(value: &Value) -> Result<Vec<u8>, NonIntegerNumber> {
let mut out = Vec::new();
write_value(value, &mut out)?;
Ok(out)
}
fn write_value(value: &Value, out: &mut Vec<u8>) -> Result<(), NonIntegerNumber> {
match value {
Value::Null => out.extend_from_slice(b"null"),
Value::Bool(true) => out.extend_from_slice(b"true"),
Value::Bool(false) => out.extend_from_slice(b"false"),
Value::Number(n) => write_number(n, out)?,
Value::String(s) => write_string(s, out),
Value::Array(items) => {
out.push(b'[');
for (i, item) in items.iter().enumerate() {
if i > 0 {
out.push(b',');
}
write_value(item, out)?;
}
out.push(b']');
}
Value::Object(map) => {
let mut keys: Vec<&String> = map.keys().collect();
keys.sort_by(|a, b| a.as_bytes().cmp(b.as_bytes()));
out.push(b'{');
for (i, key) in keys.iter().enumerate() {
if i > 0 {
out.push(b',');
}
write_string(key, out);
out.push(b':');
if let Some(v) = map.get(key.as_str()) {
write_value(v, out)?;
}
}
out.push(b'}');
}
}
Ok(())
}
fn write_number(n: &Number, out: &mut Vec<u8>) -> Result<(), NonIntegerNumber> {
if let Some(i) = n.as_i64() {
out.extend_from_slice(i.to_string().as_bytes());
Ok(())
} else if let Some(u) = n.as_u64() {
out.extend_from_slice(u.to_string().as_bytes());
Ok(())
} else {
Err(NonIntegerNumber(n.to_string()))
}
}
fn write_string(s: &str, out: &mut Vec<u8>) {
out.push(b'"');
for c in s.chars() {
match c {
'"' => out.extend_from_slice(b"\\\""),
'\\' => out.extend_from_slice(b"\\\\"),
'\u{08}' => out.extend_from_slice(b"\\b"),
'\u{09}' => out.extend_from_slice(b"\\t"),
'\u{0A}' => out.extend_from_slice(b"\\n"),
'\u{0C}' => out.extend_from_slice(b"\\f"),
'\u{0D}' => out.extend_from_slice(b"\\r"),
c if u32::from(c) < 0x20 => write_control_escape(c, out),
c => {
let mut buf = [0_u8; 4];
out.extend_from_slice(c.encode_utf8(&mut buf).as_bytes());
}
}
}
out.push(b'"');
}
fn write_control_escape(c: char, out: &mut Vec<u8>) {
let code = u32::from(c);
let hi = usize::try_from((code >> 4) & 0xf).unwrap_or(0);
let lo = usize::try_from(code & 0xf).unwrap_or(0);
out.extend_from_slice(b"\\u00");
if let Some(&h) = HEX.get(hi) {
out.push(h);
}
if let Some(&l) = HEX.get(lo) {
out.push(l);
}
}
pub(crate) fn sha256(bytes: &[u8]) -> String {
let mut h = Sha256::new();
h.update(bytes);
hex::encode(h.finalize())
}
pub(crate) fn checkout_state_id(
index_tree: &str,
worktree_fingerprint: &str,
untracked_fingerprint: &str,
) -> String {
let value = serde_json::json!({
"normalizer": CHECKOUT_NORMALIZER,
"index_tree": index_tree,
"worktree_fingerprint": worktree_fingerprint,
"untracked_fingerprint": untracked_fingerprint,
});
sha256(&canonical_bytes(&value).unwrap_or_default())
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub(crate) struct NormalizedRemote {
pub host: String,
pub port: Option<u16>,
pub path: String,
pub repo_id: String,
}
#[derive(Debug, Clone, Copy)]
struct SchemeInfo {
default_port: u16,
drop_default: bool,
}
fn scheme_info(scheme: &str) -> Option<SchemeInfo> {
match scheme {
"ssh" => Some(SchemeInfo {
default_port: 22,
drop_default: true,
}),
"https" => Some(SchemeInfo {
default_port: 443,
drop_default: true,
}),
"http" => Some(SchemeInfo {
default_port: 80,
drop_default: true,
}),
"git" => Some(SchemeInfo {
default_port: 9418,
drop_default: false,
}),
_ => None,
}
}
fn clean_path(path: &str) -> String {
let trimmed = path.trim_matches('/');
let without_git = trimmed.strip_suffix(".git").unwrap_or(trimmed);
without_git.trim_end_matches('/').to_string()
}
fn host_and_port(hostport: &str, scheme: SchemeInfo) -> (String, Option<u16>) {
let (host, explicit_port) = match hostport.rsplit_once(':') {
Some((h, p)) => (h, p.parse::<u16>().ok()),
None => (hostport, None),
};
let port = explicit_port.unwrap_or(scheme.default_port);
let rendered = if scheme.drop_default && port == scheme.default_port {
None
} else {
Some(port)
};
(host.to_lowercase(), rendered)
}
pub(crate) fn normalize_remote_url(raw: &str) -> Option<NormalizedRemote> {
let raw = raw.trim();
let (host, port, path) = if let Some(idx) = raw.find("://") {
let scheme = scheme_info(raw.get(..idx)?)?;
let rest = raw.get(idx + 3..)?;
let after_user = rest.rsplit_once('@').map_or(rest, |(_, h)| h);
let (hostport, path) = after_user
.split_once('/')
.map_or((after_user, ""), |(h, p)| (h, p));
let (host, port) = host_and_port(hostport, scheme);
(host, port, clean_path(path))
} else if let Some((hostpart, path)) = raw.split_once(':') {
let host = hostpart.rsplit_once('@').map_or(hostpart, |(_, h)| h);
if host.is_empty() || path.is_empty() {
return None;
}
(host.to_lowercase(), None, clean_path(path))
} else {
return None;
};
if host.is_empty() || path.is_empty() {
return None;
}
let repo_id = match port {
Some(p) => format!("{host}:{p}/{path}"),
None => format!("{host}/{path}"),
};
Some(NormalizedRemote {
host,
port,
path,
repo_id,
})
}
const NORMATIVE_FLAGS: &[&str] = &[
"-c",
"core.autocrlf=false",
"-c",
"core.eol=lf",
"-c",
"core.fileMode=true",
];
const CONFIG_EXPLICIT_REPO_ID: &str = "doctrine.repo.id";
const CONFIG_PREFERRED_REMOTE: &str = "doctrine.repo.preferredremote";
#[derive(Debug, thiserror::Error)]
pub(crate) enum CaptureError {
#[error("unsupported: multi-root repository ({0} root commits)")]
MultiRoot(usize),
#[error("unsupported: submodule entry (gitlink mode 160000)")]
Submodule,
#[error("ambiguous remote selection: multiple remotes without origin: {0:?}")]
AmbiguousRemote(Vec<String>),
#[error("git command failed: {0}")]
Git(String),
#[error("io error during capture: {0}")]
Io(String),
}
fn run_git(root: &Path, args: &[&str]) -> Result<std::process::Output, CaptureError> {
run_git_env(root, args, &[])
}
fn run_git_env(
root: &Path,
args: &[&str],
envs: &[(&str, &std::ffi::OsStr)],
) -> Result<std::process::Output, CaptureError> {
let mut cmd = Command::new("git");
cmd.arg("-C").arg(root).args(NORMATIVE_FLAGS).args(args);
for (key, val) in envs {
cmd.env(key, val);
}
cmd.output()
.map_err(|e| CaptureError::Git(format!("spawn git {}: {e}", args.join(" "))))
}
pub(crate) fn git_bytes(root: &Path, args: &[&str]) -> Result<Vec<u8>, CaptureError> {
let output = run_git(root, args)?;
if output.status.success() {
Ok(output.stdout)
} else {
Err(CaptureError::Git(format!(
"{}: {}",
args.join(" "),
String::from_utf8_lossy(&output.stderr).trim()
)))
}
}
pub(crate) fn git_text(root: &Path, args: &[&str]) -> Result<String, CaptureError> {
let bytes = git_bytes(root, args)?;
let text = String::from_utf8(bytes)
.map_err(|_ignored| CaptureError::Git(format!("non-utf8 output: {}", args.join(" "))))?;
Ok(text.trim().to_string())
}
pub(crate) fn git_opt(root: &Path, args: &[&str]) -> Result<Option<String>, CaptureError> {
let output = run_git(root, args)?;
if !output.status.success() {
return Ok(None);
}
let text = String::from_utf8(output.stdout)
.map_err(|_ignored| CaptureError::Git(format!("non-utf8 output: {}", args.join(" "))))?;
Ok(Some(text.trim().to_string()))
}
pub(crate) fn git_apply_index(root: &Path, patch: &str) -> Result<(), CaptureError> {
use std::io::Write as _;
use std::process::Stdio;
let mut child = Command::new("git")
.arg("-C")
.arg(root)
.args(NORMATIVE_FLAGS)
.args(["apply", "--3way", "--index"])
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.spawn()
.map_err(|e| CaptureError::Git(format!("spawn git apply: {e}")))?;
child
.stdin
.take()
.ok_or_else(|| CaptureError::Git("git apply: no stdin pipe".to_owned()))?
.write_all(patch.as_bytes())
.map_err(|e| CaptureError::Git(format!("git apply: write stdin: {e}")))?;
let output = child
.wait_with_output()
.map_err(|e| CaptureError::Git(format!("git apply: wait: {e}")))?;
if output.status.success() {
Ok(())
} else {
Err(CaptureError::Git(format!(
"apply --3way --index: {}",
String::from_utf8_lossy(&output.stderr).trim()
)))
}
}
pub(crate) fn git_cherry(
root: &Path,
upstream: &str,
head: &str,
) -> Result<Vec<String>, CaptureError> {
let text = git_text(root, &["cherry", upstream, head])?;
Ok(text.lines().map(str::to_owned).collect())
}
pub(crate) fn git_status_ok(root: &Path, args: &[&str]) -> Result<bool, CaptureError> {
Ok(run_git(root, args)?.status.success())
}
fn git_env_text(
root: &Path,
args: &[&str],
envs: &[(&str, &std::ffi::OsStr)],
) -> Result<String, CaptureError> {
let output = run_git_env(root, args, envs)?;
if output.status.success() {
let text = String::from_utf8(output.stdout).map_err(|_ignored| {
CaptureError::Git(format!("non-utf8 output: {}", args.join(" ")))
})?;
Ok(text.trim().to_string())
} else {
Err(CaptureError::Git(format!(
"{}: {}",
args.join(" "),
String::from_utf8_lossy(&output.stderr).trim()
)))
}
}
struct ScratchIndex {
path: std::path::PathBuf,
}
impl ScratchIndex {
fn new(root: &Path) -> Result<Self, CaptureError> {
let git_dir = git_text(root, &["rev-parse", "--absolute-git-dir"])?;
let git_dir = Path::new(&git_dir);
if let Ok(entries) = std::fs::read_dir(git_dir) {
for entry in entries.flatten() {
if entry
.file_name()
.to_string_lossy()
.starts_with("doctrine-filter-index.")
{
drop(std::fs::remove_file(entry.path()));
}
}
}
let name = format!("doctrine-filter-index.{}", std::process::id());
let path = git_dir.join(name);
Ok(Self { path })
}
}
impl Drop for ScratchIndex {
fn drop(&mut self) {
drop(std::fs::remove_file(&self.path));
}
}
pub(crate) fn filter_tree(
root: &Path,
source_tree: &str,
exclude: &[&str],
) -> Result<String, CaptureError> {
let scratch = ScratchIndex::new(root)?;
let env: [(&str, &std::ffi::OsStr); 1] = [("GIT_INDEX_FILE", scratch.path.as_os_str())];
git_env_text(root, &["read-tree", source_tree], &env)?;
if !exclude.is_empty() {
let mut args = vec![
"rm",
"--cached",
"-r",
"-f",
"--ignore-unmatch",
"--quiet",
"--",
];
args.extend_from_slice(exclude);
git_env_text(root, &args, &env)?;
}
git_env_text(root, &["write-tree"], &env)
}
fn hash_object_stdin(root: &Path, content: &str) -> Result<String, CaptureError> {
use std::io::Write as _;
use std::process::Stdio;
let mut child = Command::new("git")
.arg("-C")
.arg(root)
.args(NORMATIVE_FLAGS)
.args(["hash-object", "-w", "--stdin"])
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.spawn()
.map_err(|e| CaptureError::Git(format!("spawn git hash-object: {e}")))?;
child
.stdin
.take()
.ok_or_else(|| CaptureError::Git("git hash-object: no stdin pipe".to_owned()))?
.write_all(content.as_bytes())
.map_err(|e| CaptureError::Git(format!("git hash-object: write stdin: {e}")))?;
let output = child
.wait_with_output()
.map_err(|e| CaptureError::Git(format!("git hash-object: wait: {e}")))?;
if output.status.success() {
let text = String::from_utf8(output.stdout)
.map_err(|_ignored| CaptureError::Git("hash-object: non-utf8 oid".to_owned()))?;
Ok(text.trim().to_string())
} else {
Err(CaptureError::Git(format!(
"hash-object: {}",
String::from_utf8_lossy(&output.stderr).trim()
)))
}
}
pub(crate) fn tree_with_file(
root: &Path,
base_tree: &str,
path: &str,
content: &str,
) -> Result<String, CaptureError> {
let scratch = ScratchIndex::new(root)?;
let env: [(&str, &std::ffi::OsStr); 1] = [("GIT_INDEX_FILE", scratch.path.as_os_str())];
git_env_text(root, &["read-tree", base_tree], &env)?;
let blob = hash_object_stdin(root, content)?;
let cacheinfo = format!("100644,{blob},{path}");
git_env_text(
root,
&["update-index", "--add", "--cacheinfo", &cacheinfo],
&env,
)?;
git_env_text(root, &["write-tree"], &env)
}
pub(crate) fn read_path_at(
root: &Path,
refish: &str,
path: &str,
) -> Result<Option<String>, CaptureError> {
git_opt(root, &["cat-file", "-p", &format!("{refish}:{path}")])
}
pub(crate) fn commit_tree(
root: &Path,
tree: &str,
parent: &str,
msg: &str,
) -> Result<String, CaptureError> {
git_text(root, &["commit-tree", tree, "-p", parent, "-m", msg])
}
pub(crate) enum MergeTree {
Clean { tree: String },
Conflict,
}
pub(crate) fn merge_tree(
root: &Path,
merge_base: &str,
ours: &str,
theirs: &str,
) -> Result<MergeTree, CaptureError> {
let base_flag = format!("--merge-base={merge_base}");
let output = run_git(
root,
&["merge-tree", "--write-tree", &base_flag, ours, theirs],
)?;
match output.status.code() {
Some(0) => {
let tree = String::from_utf8(output.stdout)
.map_err(|_ignored| CaptureError::Git("merge-tree: non-utf8 oid".to_owned()))?;
Ok(MergeTree::Clean {
tree: tree.trim().to_string(),
})
}
Some(1) => Ok(MergeTree::Conflict),
_ => Err(CaptureError::Git(format!(
"merge-tree --merge-base={merge_base} {ours} {theirs}: {}",
String::from_utf8_lossy(&output.stderr).trim()
))),
}
}
pub(crate) fn commit_tree_merge(
root: &Path,
tree: &str,
first_parent: &str,
second_parent: &str,
msg: &str,
) -> Result<String, CaptureError> {
git_text(
root,
&[
"commit-tree",
tree,
"-p",
first_parent,
"-p",
second_parent,
"-m",
msg,
],
)
}
pub(crate) enum RefCas {
Updated,
Moved { actual: Option<String> },
}
pub(crate) fn update_ref_cas(
root: &Path,
refname: &str,
new_oid: &str,
expected_old: &str,
) -> Result<RefCas, CaptureError> {
let output = run_git(root, &["update-ref", refname, new_oid, expected_old])?;
if output.status.success() {
Ok(RefCas::Updated)
} else {
let actual = git_opt(root, &["rev-parse", "--verify", "--quiet", refname])?;
Ok(RefCas::Moved { actual })
}
}
pub(crate) const ZERO_OID: &str = "0000000000000000000000000000000000000000";
#[derive(Debug)]
pub(crate) enum ReplayOutcome {
NoOp,
Applied,
Moved { actual: Option<String> },
}
pub(crate) fn replay_ref(
root: &Path,
refname: &str,
expected_old: &str,
planned: &str,
) -> Result<ReplayOutcome, CaptureError> {
let actual = git_opt(root, &["rev-parse", "--verify", "--quiet", refname])?;
let current = actual.as_deref().unwrap_or(ZERO_OID);
if current == planned {
Ok(ReplayOutcome::NoOp)
} else if current == expected_old {
match update_ref_cas(root, refname, planned, expected_old)? {
RefCas::Updated => Ok(ReplayOutcome::Applied),
RefCas::Moved { actual: raced } => Ok(ReplayOutcome::Moved { actual: raced }),
}
} else {
Ok(ReplayOutcome::Moved { actual })
}
}
pub(crate) fn is_ancestor(
root: &Path,
ancestor: &str,
descendant: &str,
) -> Result<bool, CaptureError> {
let output = run_git(root, &["merge-base", "--is-ancestor", ancestor, descendant])?;
match output.status.code() {
Some(0) => Ok(true),
Some(1) => Ok(false),
_ => Err(CaptureError::Git(format!(
"merge-base --is-ancestor {ancestor} {descendant}: {}",
String::from_utf8_lossy(&output.stderr).trim()
))),
}
}
pub(crate) fn parents(root: &Path, commit: &str) -> Result<Vec<String>, CaptureError> {
let line = git_text(root, &["rev-list", "--parents", "-n", "1", commit])?;
Ok(line.split_whitespace().skip(1).map(str::to_owned).collect())
}
pub(crate) fn merge_base(root: &Path, a: &str, b: &str) -> Result<Option<String>, CaptureError> {
let output = run_git(root, &["merge-base", a, b])?;
match output.status.code() {
Some(0) => {
let text = String::from_utf8(output.stdout)
.map_err(|_ignored| CaptureError::Git("merge-base: non-utf8 oid".to_owned()))?;
Ok(Some(text.trim().to_string()))
}
Some(1) => Ok(None),
_ => Err(CaptureError::Git(format!(
"merge-base {a} {b}: {}",
String::from_utf8_lossy(&output.stderr).trim()
))),
}
}
fn trunk_tree_ish(root: &Path) -> anyhow::Result<Option<String>> {
trunk_ladder(root, std::env::var_os("DOCTRINE_TRUNK_REF").as_deref())
}
fn trunk_ladder(root: &Path, explicit: Option<&std::ffi::OsStr>) -> anyhow::Result<Option<String>> {
let peel = |r: &str| -> anyhow::Result<Option<String>> {
let spec = format!("{r}^{{commit}}");
Ok(git_opt(root, &["rev-parse", "--verify", "--quiet", &spec])?)
};
if let Some(explicit) = explicit {
let explicit = explicit.to_string_lossy();
return match peel(&explicit)? {
Some(sha) => Ok(Some(sha)),
None => anyhow::bail!("DOCTRINE_TRUNK_REF={explicit} does not resolve to a commit"),
};
}
for candidate in ["origin/HEAD", "main", "master"] {
if let Some(sha) = peel(candidate)? {
return Ok(Some(sha));
}
}
Ok(None)
}
pub(crate) fn trunk_commit(root: &Path) -> anyhow::Result<Option<String>> {
trunk_tree_ish(root)
}
pub(crate) fn trunk_entity_ids(root: &Path, kind_dir: &str) -> anyhow::Result<Vec<u32>> {
let Some(tree_ish) = trunk_tree_ish(root)? else {
return Ok(Vec::new());
};
let pathspec = format!("{kind_dir}/");
let listing = git_opt(
root,
&["ls-tree", "-d", "--name-only", &tree_ish, "--", &pathspec],
)?;
let Some(listing) = listing else {
return Ok(Vec::new());
};
let ids = listing
.lines()
.filter_map(|line| line.rsplit('/').next())
.filter_map(|base| base.parse::<u32>().ok())
.collect();
Ok(ids)
}
pub(crate) fn capture(repo_root: &Path) -> Result<Frame, CaptureError> {
match git_opt(repo_root, &["rev-parse", "--is-inside-work-tree"])? {
Some(ref v) if v == "true" => {}
_ => return Ok(none_frame()),
}
let head_commit = git_opt(repo_root, &["rev-parse", "--verify", "HEAD^{commit}"])?;
let born = head_commit.is_some();
let repo = derive_repo_identity(repo_root, born)?;
let Some(commit) = head_commit else {
return Ok(Frame {
anchor_kind: AnchorKind::None,
repo,
commit: String::new(),
tree: String::new(),
ref_name: String::new(),
checkout_state_id: String::new(),
base_commit: String::new(),
});
};
let roots = git_text(repo_root, &["rev-list", "--max-parents=0", "HEAD"])?;
let root_count = roots.lines().filter(|l| !l.is_empty()).count();
if root_count > 1 {
return Err(CaptureError::MultiRoot(root_count));
}
let tree = git_text(repo_root, &["rev-parse", "HEAD^{tree}"])?;
let ref_name = git_opt(repo_root, &["symbolic-ref", "--quiet", "HEAD"])?.unwrap_or_default();
reject_submodules(repo_root)?;
let index_tree = git_text(repo_root, &["write-tree"])?;
let diff_bytes = git_bytes(
repo_root,
&["diff", "HEAD", "--binary", "--no-textconv", "--no-ext-diff"],
)?;
let untracked_fp = untracked_fingerprint(repo_root)?;
let dirty = index_tree != tree || !diff_bytes.is_empty() || untracked_fp.is_some();
if dirty {
let worktree_fp = sha256(&diff_bytes);
let untracked = untracked_fp.unwrap_or_else(|| sha256(b""));
Ok(Frame {
anchor_kind: AnchorKind::CheckoutState,
repo,
commit: String::new(), tree,
ref_name,
checkout_state_id: checkout_state_id(&index_tree, &worktree_fp, &untracked),
base_commit: commit, })
} else {
Ok(Frame {
anchor_kind: AnchorKind::Commit,
repo,
commit: commit.clone(),
tree,
ref_name,
checkout_state_id: String::new(),
base_commit: commit,
})
}
}
pub(crate) fn unanchored_frame() -> Frame {
none_frame()
}
fn none_frame() -> Frame {
Frame {
anchor_kind: AnchorKind::None,
repo: RepoIdentity {
repo_id: String::new(),
kind: RepoIdKind::LocalRoot,
confidence: Confidence::Low,
},
commit: String::new(),
tree: String::new(),
ref_name: String::new(),
checkout_state_id: String::new(),
base_commit: String::new(),
}
}
pub(crate) fn explicit_identity(raw: &str) -> RepoIdentity {
let raw = raw.trim();
let repo_id = normalize_remote_url(raw).map_or_else(|| raw.to_string(), |n| n.repo_id);
RepoIdentity {
repo_id,
kind: RepoIdKind::Explicit,
confidence: Confidence::High,
}
}
fn derive_repo_identity(root: &Path, born: bool) -> Result<RepoIdentity, CaptureError> {
let root_commit = if born {
git_text(root, &["rev-list", "--max-parents=0", "HEAD"])?
.lines()
.next()
.map(str::to_string)
} else {
None
};
if let Some(explicit) = git_opt(root, &["config", "--get", CONFIG_EXPLICIT_REPO_ID])?
&& !explicit.is_empty()
{
return Ok(explicit_identity(&explicit));
}
let remotes = list_remotes(root)?;
if let Some(selected) = select_remote(root, &remotes)? {
let raw = git_text(root, &["remote", "get-url", &selected])?;
if let Some(normalized) = normalize_remote_url(&raw) {
return Ok(RepoIdentity {
repo_id: normalized.repo_id,
kind: RepoIdKind::Remote,
confidence: Confidence::High,
});
}
}
let repo_id = match &root_commit {
Some(sha) => format!("repo:git-root:{sha}"),
None => "repo:git-root:unborn".to_string(),
};
Ok(RepoIdentity {
repo_id,
kind: RepoIdKind::LocalRoot,
confidence: if born {
Confidence::Medium
} else {
Confidence::Low
},
})
}
fn list_remotes(root: &Path) -> Result<Vec<String>, CaptureError> {
let mut remotes: Vec<String> = git_text(root, &["remote"])?
.lines()
.map(str::trim)
.filter(|l| !l.is_empty())
.map(str::to_string)
.collect();
remotes.sort();
Ok(remotes)
}
fn select_remote(root: &Path, remotes: &[String]) -> Result<Option<String>, CaptureError> {
if remotes.is_empty() {
return Ok(None);
}
if let Some(preferred) = git_opt(root, &["config", "--get", CONFIG_PREFERRED_REMOTE])?
&& remotes.contains(&preferred)
{
return Ok(Some(preferred));
}
if remotes.iter().any(|r| r == "origin") {
return Ok(Some("origin".to_string()));
}
match remotes.len() {
1 => Ok(remotes.first().cloned()),
_ => Err(CaptureError::AmbiguousRemote(remotes.to_vec())),
}
}
fn reject_submodules(root: &Path) -> Result<(), CaptureError> {
let staged = git_text(root, &["ls-files", "--stage"])?;
for line in staged.lines() {
if let Some("160000") = line.split_whitespace().next() {
return Err(CaptureError::Submodule);
}
}
Ok(())
}
fn untracked_fingerprint(root: &Path) -> Result<Option<String>, CaptureError> {
let raw = git_bytes(root, &["ls-files", "--others", "--exclude-standard", "-z"])?;
let mut paths: Vec<&[u8]> = raw.split(|b| *b == 0).filter(|p| !p.is_empty()).collect();
if paths.is_empty() {
return Ok(None);
}
paths.sort_unstable();
let mut acc: Vec<u8> = Vec::new();
for path in paths {
let path_str = std::str::from_utf8(path)
.map_err(|_ignored| CaptureError::Git("non-utf8 untracked path".to_string()))?;
let full = root.join(path_str);
let is_symlink =
std::fs::symlink_metadata(&full).is_ok_and(|meta| meta.file_type().is_symlink());
let hash = if is_symlink {
symlink_target_hash(&full)?
} else {
git_text(root, &["hash-object", "--", path_str])?
};
acc.extend_from_slice(path);
acc.push(0);
acc.extend_from_slice(hash.as_bytes());
acc.push(b'\n');
}
Ok(Some(sha256(&acc)))
}
#[cfg(unix)]
fn symlink_target_hash(full: &Path) -> Result<String, CaptureError> {
use std::os::unix::ffi::OsStrExt;
let target = std::fs::read_link(full)
.map_err(|e| CaptureError::Io(format!("readlink {}: {e}", full.display())))?;
Ok(sha256(target.as_os_str().as_bytes()))
}
#[cfg(not(unix))]
fn symlink_target_hash(full: &Path) -> Result<String, CaptureError> {
let target = std::fs::read_link(full)
.map_err(|e| CaptureError::Io(format!("readlink {}: {e}", full.display())))?;
Ok(sha256(target.to_string_lossy().as_bytes()))
}
pub(crate) fn commits_touching(
root: &Path,
paths: &[String],
since: &str,
target: &str,
) -> Option<u32> {
if paths.is_empty() || since.is_empty() || target.is_empty() {
return None;
}
let ancestry = run_git(root, &["merge-base", "--is-ancestor", since, target]).ok()?;
if !ancestry.status.success() {
return None;
}
let range = format!("{since}..{target}");
let mut args = vec!["rev-list", "--count", &range, "--"];
args.extend(paths.iter().map(String::as_str));
git_opt(root, &args).ok().flatten()?.parse::<u32>().ok()
}
pub(crate) fn head_sha(root: &Path) -> Option<String> {
git_opt(root, &["rev-parse", "--verify", "HEAD^{commit}"])
.ok()
.flatten()
}
#[cfg(test)]
mod tests {
use serde_json::{Value, json};
use std::path::{Path, PathBuf};
use std::process::Command;
use super::{
AnchorKind, CHECKOUT_NORMALIZER, CaptureError, Confidence, Frame, REMOTE_NORMALIZER,
RepoIdKind, RepoIdentity, canonical_bytes, capture, checkout_state_id, commits_touching,
explicit_identity, normalize_remote_url, sha256,
};
fn canon(v: &Value) -> String {
let bytes = canonical_bytes(v).unwrap_or_default();
String::from_utf8(bytes).unwrap_or_default()
}
#[test]
fn anchor_kind_token_round_trips() {
for k in [
AnchorKind::Commit,
AnchorKind::CheckoutState,
AnchorKind::None,
] {
assert_eq!(AnchorKind::parse(k.as_str()).unwrap(), k);
}
assert_eq!(
AnchorKind::as_str(AnchorKind::CheckoutState),
"checkout_state"
);
assert!(AnchorKind::parse("bogus").is_err());
}
#[test]
fn repo_id_kind_token_round_trips() {
for k in [
RepoIdKind::Explicit,
RepoIdKind::Remote,
RepoIdKind::LocalRoot,
] {
assert_eq!(RepoIdKind::parse(k.as_str()).unwrap(), k);
}
assert_eq!(RepoIdKind::as_str(RepoIdKind::LocalRoot), "local_root");
assert!(RepoIdKind::parse("bogus").is_err());
}
#[test]
fn confidence_token_round_trips() {
for c in [Confidence::High, Confidence::Medium, Confidence::Low] {
assert_eq!(Confidence::parse(c.as_str()).unwrap(), c);
}
assert!(Confidence::parse("bogus").is_err());
}
fn sample_frame() -> Frame {
Frame {
anchor_kind: AnchorKind::Commit,
repo: RepoIdentity {
repo_id: "github.com/org/repo".to_string(),
kind: RepoIdKind::Remote,
confidence: Confidence::High,
},
commit: "abc123".to_string(),
tree: "tree123".to_string(),
ref_name: "refs/heads/main".to_string(),
checkout_state_id: String::new(),
base_commit: "abc123".to_string(),
}
}
#[test]
fn frame_carries_anchor_and_identity() {
let f = sample_frame();
assert_eq!(f.anchor_kind, AnchorKind::Commit);
assert_eq!(f.repo.repo_id, "github.com/org/repo");
assert_eq!(f.repo.kind, RepoIdKind::Remote);
assert_eq!(f.repo.confidence, Confidence::High);
assert_eq!(f.commit, f.base_commit);
assert_eq!(f.tree, "tree123");
assert_eq!(f.ref_name, "refs/heads/main");
assert!(f.checkout_state_id.is_empty());
}
#[test]
fn frame_variants_are_distinct() {
assert_ne!(AnchorKind::Commit, AnchorKind::CheckoutState);
assert_ne!(AnchorKind::CheckoutState, AnchorKind::None);
assert_ne!(RepoIdKind::Explicit, RepoIdKind::LocalRoot);
assert_ne!(Confidence::Medium, Confidence::Low);
}
#[test]
fn canonical_primitives_encode_to_literals() {
assert_eq!(canon(&json!(null)), "null");
assert_eq!(canon(&json!(true)), "true");
assert_eq!(canon(&json!(false)), "false");
assert_eq!(canon(&json!(0)), "0");
assert_eq!(canon(&json!(-1)), "-1");
assert_eq!(canon(&json!(42)), "42");
}
#[test]
fn canonical_sorts_object_keys_bytewise_and_keeps_array_order() {
assert_eq!(canon(&json!({})), "{}");
assert_eq!(canon(&json!([])), "[]");
assert_eq!(canon(&json!({ "b": 1, "a": 2 })), "{\"a\":2,\"b\":1}");
assert_eq!(canon(&json!([3, 1, 2])), "[3,1,2]");
let nested = json!({ "z": [1, { "b": 2, "a": 3 }], "a": null });
assert_eq!(canon(&nested), "{\"a\":null,\"z\":[1,{\"a\":3,\"b\":2}]}");
}
#[test]
fn canonical_escapes_only_the_minimal_set() {
assert_eq!(canon(&json!("a\"b\\c")), "\"a\\\"b\\\\c\"");
assert_eq!(canon(&json!("\n\t")), "\"\\n\\t\"");
assert_eq!(canon(&Value::String("\u{01}".to_owned())), "\"\\u0001\"");
assert_eq!(canon(&json!("é→")), "\"é→\"");
}
#[test]
fn canonical_rejects_floats_and_exponent_forms() {
assert!(canonical_bytes(&json!(1.5)).is_err(), "fractional rejected");
let exp: Value = serde_json::from_str("1e3").unwrap_or(Value::Null);
assert!(canonical_bytes(&exp).is_err(), "exponent form rejected");
let dot_zero: Value = serde_json::from_str("1.0").unwrap_or(Value::Null);
assert!(
canonical_bytes(&dot_zero).is_err(),
"1.0 float-form rejected"
);
}
#[test]
fn sha256_is_lowercase_hex_of_known_vector() {
assert_eq!(
sha256(b""),
"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
);
}
#[test]
fn checkout_state_id_is_deterministic() {
assert_eq!(
checkout_state_id("tree", "wf", "uf"),
checkout_state_id("tree", "wf", "uf")
);
}
#[test]
fn checkout_state_id_changes_with_each_input() {
let base = checkout_state_id("tree", "wf", "uf");
assert_ne!(base, checkout_state_id("tree2", "wf", "uf"));
assert_ne!(base, checkout_state_id("tree", "wf2", "uf"));
assert_ne!(base, checkout_state_id("tree", "wf", "uf2"));
}
#[test]
fn checkout_state_id_binds_the_normalizer_tag() {
let value = json!({
"normalizer": CHECKOUT_NORMALIZER,
"index_tree": "t",
"worktree_fingerprint": "w",
"untracked_fingerprint": "u",
});
let expected = sha256(&canonical_bytes(&value).unwrap_or_default());
assert_eq!(checkout_state_id("t", "w", "u"), expected);
}
#[test]
fn normalize_remote_url_table() {
let cases = [
("https://github.com/org/repo.git", "github.com/org/repo"),
("https://github.com/org/repo", "github.com/org/repo"),
("git@github.com:org/repo.git", "github.com/org/repo"),
("ssh://git@github.com/org/repo.git", "github.com/org/repo"),
("ssh://git@github.com:22/org/repo", "github.com/org/repo"),
("https://github.com:443/org/repo", "github.com/org/repo"),
(
"ssh://git@git.example.com:2222/org/repo",
"git.example.com:2222/org/repo",
),
(
"https://git.example.com:8443/org/repo",
"git.example.com:8443/org/repo",
),
(
"git://git.example.com/org/repo",
"git.example.com:9418/org/repo",
),
("https://GitHub.com/Org/Repo.git", "github.com/Org/Repo"),
(
"https://user:token@github.com/org/repo",
"github.com/org/repo",
),
("https://github.com/org/repo/", "github.com/org/repo"),
];
for (raw, expected) in cases {
assert_eq!(
normalize_remote_url(raw).map(|g| g.repo_id).as_deref(),
Some(expected),
"input: {raw}"
);
}
}
#[test]
fn normalize_remote_url_rejects_garbage() {
assert!(normalize_remote_url("not a url").is_none());
assert!(normalize_remote_url("").is_none());
assert!(normalize_remote_url("https://").is_none());
}
#[test]
fn normalize_remote_url_exposes_components() {
let n = normalize_remote_url("ssh://git@git.example.com:2222/Org/Repo.git");
assert!(n.is_some(), "should normalize");
if let Some(n) = n {
assert_eq!(n.host, "git.example.com");
assert_eq!(n.port, Some(2222));
assert_eq!(n.path, "Org/Repo");
assert_eq!(n.repo_id, "git.example.com:2222/Org/Repo");
}
}
#[test]
fn remote_normalizer_tag_is_frozen() {
assert_eq!(REMOTE_NORMALIZER, "forget.remote.v1");
}
const FIXED_DATE: &str = "2026-01-01T00:00:00 +0000";
struct ScratchRepo {
_dir: tempfile::TempDir,
path: PathBuf,
}
impl ScratchRepo {
fn new() -> Self {
let dir = tempfile::tempdir().expect("tempdir");
let path = dir.path().to_path_buf();
let repo = Self { _dir: dir, path };
repo.git(&["init", "-b", "main"]);
repo.git(&["config", "user.name", "Doctrine Test"]);
repo.git(&["config", "user.email", "test@doctrine.invalid"]);
repo
}
fn path(&self) -> &Path {
&self.path
}
fn git(&self, args: &[&str]) -> String {
let output = Command::new("git")
.arg("-C")
.arg(&self.path)
.args(args)
.env("GIT_AUTHOR_DATE", FIXED_DATE)
.env("GIT_COMMITTER_DATE", FIXED_DATE)
.output()
.expect("spawn git");
assert!(
output.status.success(),
"git {args:?} failed: {}",
String::from_utf8_lossy(&output.stderr).trim()
);
String::from_utf8_lossy(&output.stdout).trim().to_string()
}
fn write(&self, rel: &str, contents: &str) {
let full = self.path.join(rel);
if let Some(parent) = full.parent() {
std::fs::create_dir_all(parent).expect("create parent");
}
std::fs::write(&full, contents).expect("write file");
}
fn commit(&self, rel: &str, contents: &str, message: &str) -> String {
self.write(rel, contents);
self.git(&["add", rel]);
self.git(&["commit", "-m", message]);
self.git(&["rev-parse", "HEAD"])
}
}
#[test]
fn clean_checkout_anchors_to_head_commit() {
let repo = ScratchRepo::new();
let head = repo.commit("a.txt", "hello", "init");
let tree = repo.git(&["rev-parse", "HEAD^{tree}"]);
let frame = capture(repo.path()).expect("capture clean");
assert_eq!(frame.anchor_kind, AnchorKind::Commit);
assert_eq!(frame.commit, head);
assert_eq!(frame.base_commit, head);
assert_eq!(frame.tree, tree);
assert_eq!(frame.ref_name, "refs/heads/main");
assert!(
frame.checkout_state_id.is_empty(),
"clean tree carries no checkout_state_id"
);
}
#[test]
fn dirty_tracked_change_anchors_to_checkout_state() {
let repo = ScratchRepo::new();
let head = repo.commit("a.txt", "hello", "init");
repo.write("a.txt", "hello world");
let frame = capture(repo.path()).expect("capture dirty");
assert_eq!(frame.anchor_kind, AnchorKind::CheckoutState);
assert!(frame.commit.is_empty(), "commit empty iff dirty");
assert!(!frame.checkout_state_id.is_empty());
assert_eq!(
frame.base_commit, head,
"base_commit carries HEAD when dirty"
);
}
#[test]
fn untracked_only_is_dirty() {
let repo = ScratchRepo::new();
repo.commit("a.txt", "hello", "init");
repo.write("untracked.txt", "new");
let frame = capture(repo.path()).expect("capture untracked");
assert_eq!(frame.anchor_kind, AnchorKind::CheckoutState);
assert!(frame.commit.is_empty());
assert!(!frame.checkout_state_id.is_empty());
}
#[test]
fn detached_head_is_anchored_with_empty_ref() {
let repo = ScratchRepo::new();
let first = repo.commit("a.txt", "1", "first");
repo.commit("b.txt", "2", "second");
repo.git(&["checkout", &first]);
let frame = capture(repo.path()).expect("capture detached");
assert_eq!(frame.anchor_kind, AnchorKind::Commit, "still anchored");
assert_eq!(frame.commit, first);
assert!(
frame.ref_name.is_empty(),
"detached HEAD has empty ref_name"
);
}
#[test]
fn unborn_repo_is_none_anchor() {
let repo = ScratchRepo::new(); let frame = capture(repo.path()).expect("capture unborn");
assert_eq!(frame.anchor_kind, AnchorKind::None);
assert!(frame.commit.is_empty());
assert!(frame.base_commit.is_empty());
}
#[test]
fn non_repo_is_none_anchor_not_error() {
let dir = tempfile::tempdir().expect("tempdir"); let frame = capture(dir.path()).expect("non-repo must not error");
assert_eq!(frame.anchor_kind, AnchorKind::None);
assert_eq!(frame.repo.repo_id, "");
}
#[test]
fn recapture_of_unchanged_dirty_tree_is_stable() {
let repo = ScratchRepo::new();
repo.commit("a.txt", "hello", "init");
repo.write("a.txt", "changed");
let a = capture(repo.path()).expect("capture a");
let b = capture(repo.path()).expect("capture b");
assert_eq!(a.checkout_state_id, b.checkout_state_id);
}
#[test]
fn editing_worktree_changes_checkout_state_id() {
let repo = ScratchRepo::new();
repo.commit("a.txt", "hello", "init");
repo.write("a.txt", "first edit");
let first = capture(repo.path()).expect("capture first");
repo.write("a.txt", "second edit");
let second = capture(repo.path()).expect("capture second");
assert_ne!(first.checkout_state_id, second.checkout_state_id);
}
#[test]
fn origin_remote_drives_high_confidence_repo_id() {
let repo = ScratchRepo::new();
repo.commit("a.txt", "hello", "init");
repo.git(&["remote", "add", "origin", "https://github.com/org/repo.git"]);
let frame = capture(repo.path()).expect("capture remote");
assert_eq!(frame.repo.kind, RepoIdKind::Remote);
assert_eq!(frame.repo.confidence, Confidence::High);
assert_eq!(frame.repo.repo_id, "github.com/org/repo");
}
#[test]
fn two_remotes_without_origin_are_ambiguous() {
let repo = ScratchRepo::new();
repo.commit("a.txt", "hello", "init");
repo.git(&["remote", "add", "alpha", "https://github.com/org/alpha.git"]);
repo.git(&["remote", "add", "beta", "https://github.com/org/beta.git"]);
let result = capture(repo.path());
assert!(
matches!(result, Err(CaptureError::AmbiguousRemote(_))),
"got {result:?}"
);
}
#[test]
fn no_remote_falls_back_to_local_root_medium() {
let repo = ScratchRepo::new();
repo.commit("a.txt", "hello", "init");
let root = repo.git(&["rev-list", "--max-parents=0", "HEAD"]);
let frame = capture(repo.path()).expect("capture local-root");
assert_eq!(frame.repo.kind, RepoIdKind::LocalRoot);
assert_eq!(frame.repo.confidence, Confidence::Medium);
assert_eq!(frame.repo.repo_id, format!("repo:git-root:{root}"));
}
#[test]
fn explicit_config_repo_id_wins_over_remote() {
let repo = ScratchRepo::new();
repo.commit("a.txt", "hello", "init");
repo.git(&["remote", "add", "origin", "https://github.com/org/repo.git"]);
repo.git(&["config", "doctrine.repo.id", "custom/identity"]);
let frame = capture(repo.path()).expect("capture explicit");
assert_eq!(frame.repo.kind, RepoIdKind::Explicit);
assert_eq!(frame.repo.confidence, Confidence::High);
assert_eq!(frame.repo.repo_id, "custom/identity");
}
#[test]
fn preferred_remote_config_overrides_origin() {
let repo = ScratchRepo::new();
repo.commit("a.txt", "hello", "init");
repo.git(&[
"remote",
"add",
"origin",
"https://github.com/org/origin.git",
]);
repo.git(&["remote", "add", "fork", "https://github.com/me/fork.git"]);
repo.git(&["config", "doctrine.repo.preferredremote", "fork"]);
let frame = capture(repo.path()).expect("capture preferred");
assert_eq!(frame.repo.repo_id, "github.com/me/fork");
}
#[test]
fn explicit_identity_strips_userinfo_from_credentialed_repo() {
let id = explicit_identity("https://user:token@github.com/org/repo.git");
assert_eq!(id.kind, RepoIdKind::Explicit);
assert_eq!(id.confidence, Confidence::High);
assert_eq!(id.repo_id, "github.com/org/repo", "userinfo dropped");
}
#[test]
fn explicit_identity_keeps_non_url_value_verbatim() {
let id = explicit_identity("org/project");
assert_eq!(id.repo_id, "org/project");
assert_eq!(id.kind, RepoIdKind::Explicit);
}
#[test]
fn submodule_gitlink_entry_is_rejected() {
let repo = ScratchRepo::new();
let head = repo.commit("a.txt", "hello", "init");
repo.git(&[
"update-index",
"--add",
"--cacheinfo",
&format!("160000,{head},sub"),
]);
let result = capture(repo.path());
assert!(
matches!(result, Err(CaptureError::Submodule)),
"got {result:?}"
);
}
#[cfg(unix)]
#[test]
fn symlink_repo_captures_clean() {
let repo = ScratchRepo::new();
repo.commit("a.txt", "hello", "init");
std::os::unix::fs::symlink("a.txt", repo.path().join("link")).expect("symlink");
repo.git(&["add", "link"]);
repo.git(&["commit", "-m", "add symlink"]);
let frame = capture(repo.path()).expect("capture symlink repo");
assert_eq!(
frame.anchor_kind,
AnchorKind::Commit,
"clean symlink tree anchors on its commit"
);
assert!(frame.checkout_state_id.is_empty());
}
#[cfg(unix)]
#[test]
fn tracked_symlink_repoint_is_dirty() {
let repo = ScratchRepo::new();
repo.commit("a.txt", "hello", "init");
std::os::unix::fs::symlink("a.txt", repo.path().join("link")).expect("symlink");
repo.git(&["add", "link"]);
repo.git(&["commit", "-m", "add symlink"]);
let link = repo.path().join("link");
std::fs::remove_file(&link).expect("rm link");
std::os::unix::fs::symlink("a.txt.other", &link).expect("re-symlink");
let a = capture(repo.path()).expect("capture repointed");
let b = capture(repo.path()).expect("recapture");
assert_eq!(
a.anchor_kind,
AnchorKind::CheckoutState,
"a changed tracked symlink makes the tree dirty"
);
assert!(
!a.checkout_state_id.is_empty(),
"the dirty tracked symlink carries a checkout_state_id"
);
assert_eq!(a, b, "tracked-symlink-repoint capture is deterministic");
}
#[cfg(unix)]
#[test]
fn untracked_symlink_ignores_pointee_content() {
let repo = ScratchRepo::new();
repo.commit("a.txt", "hello", "init");
let ext = tempfile::tempdir().expect("ext tempdir");
let pointee = ext.path().join("pointee");
std::fs::write(&pointee, "original").expect("write pointee");
std::os::unix::fs::symlink(&pointee, repo.path().join("link")).expect("symlink");
let csid1 = capture(repo.path()).expect("capture 1").checkout_state_id;
std::fs::write(&pointee, "mutated content, a different length entirely")
.expect("rewrite pointee");
let csid2 = capture(repo.path()).expect("capture 2").checkout_state_id;
assert!(!csid1.is_empty(), "untracked symlink makes the tree dirty");
assert_eq!(
csid1, csid2,
"csid must be invariant to symlink target *content* (no-follow)"
);
}
#[cfg(unix)]
#[test]
fn untracked_symlink_tracks_target_path() {
let repo = ScratchRepo::new();
repo.commit("a.txt", "hello", "init");
let link = repo.path().join("link");
std::os::unix::fs::symlink("first", &link).expect("symlink first");
let csid1 = capture(repo.path())
.expect("capture first")
.checkout_state_id;
std::fs::remove_file(&link).expect("rm link");
std::os::unix::fs::symlink("second", &link).expect("symlink second");
let csid2 = capture(repo.path())
.expect("capture second")
.checkout_state_id;
assert_ne!(
csid1, csid2,
"repointing the symlink must change the csid (link text captured)"
);
}
#[cfg(unix)]
#[test]
fn dangling_untracked_symlink_ok() {
let repo = ScratchRepo::new();
repo.commit("a.txt", "hello", "init");
std::os::unix::fs::symlink("does/not/exist", repo.path().join("link")).expect("symlink");
let a = capture(repo.path()).expect("capture dangling symlink");
let b = capture(repo.path()).expect("recapture");
assert_eq!(
a.anchor_kind,
AnchorKind::CheckoutState,
"untracked symlink makes the tree dirty"
);
assert_eq!(a, b, "dangling-symlink capture is deterministic");
}
#[cfg(unix)]
#[test]
fn untracked_symlink_non_utf8_target_bytes() {
use std::os::unix::ffi::OsStrExt;
let repo = ScratchRepo::new();
repo.commit("a.txt", "hello", "init");
let target = std::ffi::OsStr::from_bytes(&[0xFF, 0xFE]);
std::os::unix::fs::symlink(target, repo.path().join("link")).expect("symlink");
let a = capture(repo.path()).expect("capture non-utf8 symlink target");
let b = capture(repo.path()).expect("recapture");
assert_eq!(a.anchor_kind, AnchorKind::CheckoutState);
assert_eq!(a, b, "non-utf8 symlink target capture is deterministic");
}
#[cfg(unix)]
#[test]
fn untracked_newline_in_name_is_deterministic() {
use std::os::unix::ffi::OsStrExt;
let repo = ScratchRepo::new();
repo.commit("a.txt", "hello", "init");
let name = std::ffi::OsStr::from_bytes(b"wei\nrd.txt");
std::fs::write(repo.path().join(name), "contents").expect("write newline file");
let a = capture(repo.path()).expect("capture newline-name file");
let b = capture(repo.path()).expect("recapture");
assert_eq!(
a.anchor_kind,
AnchorKind::CheckoutState,
"an untracked newline-named file makes the tree dirty"
);
assert!(!a.checkout_state_id.is_empty());
assert_eq!(a, b, "newline-in-name capture is deterministic");
}
#[test]
fn multi_root_repository_is_rejected() {
let repo = ScratchRepo::new();
repo.commit("a.txt", "hello", "init");
repo.git(&["checkout", "--orphan", "other"]);
let _ = Command::new("git")
.arg("-C")
.arg(repo.path())
.args(["rm", "-rf", "."])
.output();
repo.commit("b.txt", "world", "second root");
repo.git(&["checkout", "main"]);
repo.git(&[
"merge",
"other",
"--allow-unrelated-histories",
"-m",
"merge roots",
]);
let result = capture(repo.path());
assert!(
matches!(result, Err(CaptureError::MultiRoot(2))),
"got {result:?}"
);
}
#[test]
fn conformance_golden_vector() {
let repo = ScratchRepo::new();
repo.commit("a.txt", "hello", "init");
repo.git(&["remote", "add", "origin", "https://github.com/org/repo.git"]);
repo.write("untracked.txt", "world");
let frame = capture(repo.path()).expect("capture golden");
assert_eq!(frame.repo.repo_id, "github.com/org/repo");
assert_eq!(frame.repo.kind, RepoIdKind::Remote);
assert_eq!(frame.anchor_kind, AnchorKind::CheckoutState);
assert_eq!(
frame.checkout_state_id,
"88d9489028e302700c2e6430e6df1d06539dccfd283d2ed99995258482ccf86c",
"conformance golden checkout_state_id"
);
}
fn p(s: &str) -> Vec<String> {
vec![s.to_string()]
}
#[test]
fn empty_paths_returns_none_without_spawning() {
let dir = tempfile::tempdir().expect("tempdir");
assert_eq!(
commits_touching(dir.path(), &[], "deadbeef", "cafebabe"),
None
);
}
#[test]
fn empty_endpoints_return_none() {
let repo = ScratchRepo::new();
let head = repo.commit("a.txt", "x", "init");
assert_eq!(commits_touching(repo.path(), &p("a.txt"), "", &head), None);
assert_eq!(commits_touching(repo.path(), &p("a.txt"), &head, ""), None);
}
#[test]
fn no_commits_since_anchor_is_zero() {
let repo = ScratchRepo::new();
let head = repo.commit("a.txt", "x", "init");
assert_eq!(
commits_touching(repo.path(), &p("a.txt"), &head, &head),
Some(0)
);
}
#[test]
fn counts_commits_touching_scoped_path() {
let repo = ScratchRepo::new();
let base = repo.commit("a.txt", "1", "init");
repo.commit("a.txt", "2", "edit");
let tip = repo.commit("a.txt", "3", "edit again");
assert_eq!(
commits_touching(repo.path(), &p("a.txt"), &base, &tip),
Some(2)
);
}
#[test]
fn pathspec_narrows_out_other_paths() {
let repo = ScratchRepo::new();
let base = repo.commit("a.txt", "1", "init");
let tip = repo.commit("b.txt", "1", "unrelated");
assert_eq!(
commits_touching(repo.path(), &p("a.txt"), &base, &tip),
Some(0)
);
}
#[test]
fn non_ancestor_since_returns_none_not_overcount() {
let repo = ScratchRepo::new();
let older = repo.commit("a.txt", "1", "init");
let newer = repo.commit("a.txt", "2", "edit");
assert_eq!(
commits_touching(repo.path(), &p("a.txt"), &newer, &older),
None
);
}
#[test]
fn missing_object_returns_none() {
let repo = ScratchRepo::new();
let head = repo.commit("a.txt", "1", "init");
let bogus = "0000000000000000000000000000000000000000";
assert_eq!(
commits_touching(repo.path(), &p("a.txt"), bogus, &head),
None
);
assert_eq!(
commits_touching(repo.path(), &p("a.txt"), &head, bogus),
None
);
}
#[test]
fn detached_head_with_frozen_target_still_counts() {
let repo = ScratchRepo::new();
let base = repo.commit("a.txt", "1", "init");
let tip = repo.commit("a.txt", "2", "edit");
repo.git(&["checkout", &base]); assert_eq!(
commits_touching(repo.path(), &p("a.txt"), &base, &tip),
Some(1)
);
}
use std::ffi::OsStr;
fn commit_slice_dirs(repo: &ScratchRepo, ids: &[u32]) {
for id in ids {
repo.write(&format!(".doctrine/slice/{id:03}/slice.toml"), "x = 1\n");
}
repo.write(".doctrine/slice/scratch-notes/n.md", "ignore me\n");
repo.git(&["add", "-A"]);
repo.git(&["commit", "-m", "seed slices"]);
}
#[test]
fn trunk_entity_ids_reads_committed_numeric_dirs() {
let repo = ScratchRepo::new();
commit_slice_dirs(&repo, &[1, 2, 4]);
let mut ids = super::trunk_entity_ids(repo.path(), ".doctrine/slice").unwrap();
ids.sort_unstable();
assert_eq!(ids, vec![1, 2, 4]);
}
#[test]
fn trunk_entity_ids_does_not_reprepend_doctrine() {
let repo = ScratchRepo::new();
commit_slice_dirs(&repo, &[7]);
let ids = super::trunk_entity_ids(repo.path(), ".doctrine/slice").unwrap();
assert_eq!(ids, vec![7], "prefixed kind_dir must not be re-prepended");
}
#[test]
fn trunk_entity_ids_empty_without_trunk() {
let repo = ScratchRepo::new(); assert_eq!(super::trunk_tree_ish(repo.path()).unwrap(), None);
assert_eq!(
super::trunk_entity_ids(repo.path(), ".doctrine/slice").unwrap(),
Vec::<u32>::new()
);
}
#[test]
fn trunk_ladder_explicit_unpeelable_ref_is_hard_error() {
let repo = ScratchRepo::new();
repo.commit("a.txt", "hello", "init"); let bad = OsStr::new("refs/heads/does-not-exist");
let err = super::trunk_ladder(repo.path(), Some(bad)).unwrap_err();
assert!(
err.to_string().contains("DOCTRINE_TRUNK_REF"),
"error names the offending override: {err}"
);
}
#[test]
fn trunk_ladder_explicit_valid_ref_wins() {
let repo = ScratchRepo::new();
let head = repo.commit("a.txt", "hello", "init");
let sha = super::trunk_ladder(repo.path(), Some(OsStr::new("main"))).unwrap();
assert_eq!(sha, Some(head));
}
#[test]
fn filter_tree_excludes_paths_and_leaves_live_index_untouched() {
let repo = ScratchRepo::new();
repo.commit("keep.txt", "k", "init");
repo.write(".doctrine/dispatch/64/journal.toml", "rows");
repo.git(&["add", "."]);
repo.git(&["commit", "-m", "add ledger"]);
let tree = repo.git(&["rev-parse", "HEAD^{tree}"]);
let index_before = std::fs::read(repo.path().join(".git/index")).expect("read index");
let filtered =
super::filter_tree(repo.path(), &tree, &[".doctrine/dispatch/64"]).expect("filter");
let listing = repo.git(&["ls-tree", "-r", "--name-only", &filtered]);
assert!(
listing.contains("keep.txt"),
"kept path survives: {listing}"
);
assert!(
!listing.contains("journal.toml"),
"excluded path dropped: {listing}"
);
let index_after = std::fs::read(repo.path().join(".git/index")).expect("read index");
assert_eq!(
index_before, index_after,
"live index byte-for-byte unchanged"
);
}
#[test]
fn filter_tree_empty_exclude_is_identity() {
let repo = ScratchRepo::new();
repo.commit("a.txt", "1", "init");
let tree = repo.git(&["rev-parse", "HEAD^{tree}"]);
let filtered = super::filter_tree(repo.path(), &tree, &[]).expect("filter");
assert_eq!(filtered, tree, "identity filter re-emits the same tree");
}
#[test]
fn commit_tree_against_parent_without_checkout() {
let repo = ScratchRepo::new();
let parent = repo.commit("a.txt", "1", "first");
repo.commit("b.txt", "2", "second");
let tip_tree = repo.git(&["rev-parse", "HEAD^{tree}"]);
let head_before = repo.git(&["rev-parse", "HEAD"]);
let a_before = std::fs::read_to_string(repo.path().join("a.txt")).expect("read a");
let commit = super::commit_tree(repo.path(), &tip_tree, &parent, "synth").expect("commit");
assert_eq!(
repo.git(&["rev-parse", &format!("{commit}^")]),
parent,
"parent is as supplied"
);
assert_eq!(
repo.git(&["diff", "--name-only", &parent, &commit]),
"b.txt",
"diff parent..commit is exactly the second-commit delta"
);
assert_eq!(
repo.git(&["rev-parse", "HEAD"]),
head_before,
"HEAD unmoved"
);
assert_eq!(
std::fs::read_to_string(repo.path().join("a.txt")).expect("read a"),
a_before,
"working tree untouched"
);
}
#[test]
fn update_ref_cas_succeeds_only_at_expected_old() {
let repo = ScratchRepo::new();
let c1 = repo.commit("a.txt", "1", "first");
let c2 = repo.commit("b.txt", "2", "second");
let zero = "0".repeat(40);
let refname = "refs/review/x";
assert!(matches!(
super::update_ref_cas(repo.path(), refname, &c1, &zero).expect("create"),
super::RefCas::Updated
));
assert_eq!(repo.git(&["rev-parse", refname]), c1);
match super::update_ref_cas(repo.path(), refname, &c2, &zero).expect("cas") {
super::RefCas::Moved { actual } => assert_eq!(actual.as_deref(), Some(c1.as_str())),
super::RefCas::Updated => panic!("expected Moved at wrong expected-old"),
}
assert_eq!(repo.git(&["rev-parse", refname]), c1, "ref not clobbered");
assert!(matches!(
super::update_ref_cas(repo.path(), refname, &c2, &c1).expect("cas2"),
super::RefCas::Updated
));
assert_eq!(repo.git(&["rev-parse", refname]), c2);
}
#[test]
fn tree_with_file_splices_blob_without_touching_index() {
let repo = ScratchRepo::new();
repo.commit("keep.txt", "k", "init");
let base = repo.git(&["rev-parse", "HEAD^{tree}"]);
let index_before = std::fs::read(repo.path().join(".git/index")).expect("read index");
let tree = super::tree_with_file(
repo.path(),
&base,
".doctrine/dispatch/064/journal.toml",
"rows = []\n",
)
.expect("splice");
let listing = repo.git(&["ls-tree", "-r", "--name-only", &tree]);
assert!(
listing.contains("keep.txt"),
"base path retained: {listing}"
);
assert!(
listing.contains(".doctrine/dispatch/064/journal.toml"),
"spliced path present: {listing}"
);
let blob = repo.git(&[
"cat-file",
"-p",
&format!("{tree}:.doctrine/dispatch/064/journal.toml"),
]);
assert_eq!(blob, "rows = []", "spliced content readable at path");
let index_after = std::fs::read(repo.path().join(".git/index")).expect("read index");
assert_eq!(index_before, index_after, "live index untouched");
}
#[test]
fn replay_ref_no_op_apply_refuse() {
let repo = ScratchRepo::new();
let c1 = repo.commit("a.txt", "1", "first");
let c2 = repo.commit("b.txt", "2", "second");
let c3 = repo.commit("c.txt", "3", "third");
let zero = "0".repeat(40);
let refname = "refs/heads/trunk-x";
assert!(matches!(
super::replay_ref(repo.path(), refname, &zero, &c1).expect("create"),
super::ReplayOutcome::Applied
));
assert_eq!(repo.git(&["rev-parse", refname]), c1);
assert!(matches!(
super::replay_ref(repo.path(), refname, &zero, &c1).expect("replay"),
super::ReplayOutcome::NoOp
));
assert_eq!(repo.git(&["rev-parse", refname]), c1);
match super::replay_ref(repo.path(), refname, &c2, &c3).expect("diverge") {
super::ReplayOutcome::Moved { actual } => {
assert_eq!(actual.as_deref(), Some(c1.as_str()));
}
other => panic!("expected Moved, got {other:?}"),
}
assert_eq!(repo.git(&["rev-parse", refname]), c1, "not clobbered");
assert!(matches!(
super::replay_ref(repo.path(), refname, &c1, &c2).expect("apply"),
super::ReplayOutcome::Applied
));
assert_eq!(repo.git(&["rev-parse", refname]), c2);
}
#[test]
fn is_ancestor_reads_exit_code() {
let repo = ScratchRepo::new();
let c1 = repo.commit("a.txt", "1", "first");
let c2 = repo.commit("b.txt", "2", "second");
assert!(super::is_ancestor(repo.path(), &c1, &c2).expect("c1<c2"));
assert!(!super::is_ancestor(repo.path(), &c2, &c1).expect("c2!<c1"));
assert!(super::is_ancestor(repo.path(), &c1, &c1).expect("reflexive"));
}
#[test]
fn merge_base_returns_fork_point_or_none() {
let repo = ScratchRepo::new();
let base = repo.commit("a.txt", "1", "base");
repo.git(&["branch", "feature"]);
repo.commit("main.txt", "m", "main advances"); repo.git(&["checkout", "feature"]);
let feat = repo.commit("feat.txt", "f", "feature commit");
assert_eq!(
super::merge_base(repo.path(), &feat, "main").expect("merge-base"),
Some(base.clone()),
"fork-point is the shared base, not either tip"
);
repo.git(&["checkout", "--orphan", "island"]);
let island = repo.commit("island.txt", "i", "unrelated root");
assert_eq!(
super::merge_base(repo.path(), &island, &base).expect("merge-base unrelated"),
None,
"unrelated histories share no merge-base"
);
}
#[test]
fn read_path_at_present_some_absent_none() {
let repo = ScratchRepo::new();
let head = repo.commit(
".doctrine/dispatch/064/journal.toml",
"rows = []\n",
"ledger",
);
assert_eq!(
super::read_path_at(repo.path(), &head, ".doctrine/dispatch/064/journal.toml")
.expect("present"),
Some("rows = []".to_owned()),
"present path yields its blob content"
);
assert_eq!(
super::read_path_at(repo.path(), &head, ".doctrine/dispatch/064/absent.toml")
.expect("absent"),
None,
"absent path yields None, not an error"
);
}
}