use std::collections::HashSet;
use objects::{
error::HeddleError,
object::{ChangeId, ContentHash, FileMode, MarkerName, Principal, State, ThreadName},
store::ObjectStore,
};
use repo::{AudienceTier, Repository as HeddleRepository, visible};
use sley::{
CommitObject, EntryKind, GitObjectType, ObjectFormat, ObjectId, RefPrecondition,
ReferenceTarget, Repository as SleyRepository, Signature, plumbing::sley_object::EncodedObject,
};
use crate::bridge::{
git_core::{
GitBridge, GitBridgeError, GitResult, LocalGitIdentity, SyncMapping,
count_exported_commits, delete_reference_if_present,
git_config_identity_with_global_fallback, git_err, principal_is_default_unknown,
read_or_seed_mirror_managed_refs, set_reference, write_mirror_managed_refs,
},
git_notes,
git_reconstruct::{commit_object_id, reconstruct_commit_bytes, write_commit_object},
git_sync::{sync_marker_to_tag, sync_track_to_branch},
git_util::{ExportStats, ExportedRef},
};
const SUBMODULE_PREFIX: &str = "heddle-submodule:";
fn has_git_fidelity(state: &State) -> bool {
state.raw_message.is_some()
}
fn identity_is_byte_faithful(who: &Principal) -> bool {
!who.name.contains('\u{FFFD}') && !who.email.contains('\u{FFFD}')
}
fn commit_is_byte_faithful(state: &State) -> bool {
has_git_fidelity(state)
&& !state.git_lossy
&& identity_is_byte_faithful(&state.attribution.principal)
&& state
.committer
.as_ref()
.map(identity_is_byte_faithful)
.unwrap_or(true)
}
pub(crate) fn export_state(
mapping: &mut SyncMapping,
heddle_repo: &HeddleRepository,
repo: &SleyRepository,
state_id: &ChangeId,
identity: Option<&LocalGitIdentity>,
message_override: Option<&str>,
audience: &AudienceTier,
) -> GitResult<Option<ObjectId>> {
let state = heddle_repo
.store()
.get_state(state_id)?
.ok_or(GitBridgeError::StateNotFound(*state_id))?;
let tier = heddle_repo
.effective_visibility_tier(state_id)
.map_err(|e| GitBridgeError::Git(format!("resolve visibility for {state_id}: {e:#}")))?;
if !visible(&tier, audience) {
return Ok(None);
}
if has_git_fidelity(&state) {
let content = reconstruct_commit_bytes(heddle_repo, repo, mapping, &state)?;
return Ok(Some(write_commit_object(repo, &content)?));
}
let git_tree_oid = export_tree(heddle_repo, repo, &state.tree)?;
let hosted_url = heddle_repo
.config()
.hosted
.upstream_url
.as_deref()
.filter(|s| !s.is_empty());
let message = match message_override {
Some(message) => GitBridge::build_commit_message_with_footer_with_body(
&state, message, hosted_url, 0,
),
None => {
GitBridge::build_commit_message_with_footer(&state, hosted_url, 0)
}
};
let parent_oids: Vec<ObjectId> = state
.parents
.iter()
.map(|parent_id| {
mapping
.get_git(parent_id)
.ok_or(GitBridgeError::StateNotFound(*parent_id))
})
.collect::<GitResult<Vec<_>>>()?;
let sig = if principal_is_default_unknown(&state.attribution.principal) {
let Some(identity) = identity else {
return Err(GitBridgeError::Git(
"refusing to write a Git commit with Unknown <unknown@example.com>; configure user.name/user.email, HEDDLE_PRINCIPAL_NAME/HEDDLE_PRINCIPAL_EMAIL, or .heddle principal".to_string(),
));
};
identity.to_signature(state.created_at.timestamp())
} else {
state_to_signature(&state)
};
let commit = CommitObject {
tree: git_tree_oid,
parents: parent_oids,
author: sig.to_ident_bytes(),
committer: sig.to_ident_bytes(),
encoding: None,
message: message.into_bytes(),
};
Ok(Some(
repo.write_object(EncodedObject::new(GitObjectType::Commit, commit.write()))
.map_err(git_err)?,
))
}
pub fn export_tree(
heddle_repo: &HeddleRepository,
repo: &SleyRepository,
tree_hash: &ContentHash,
) -> GitResult<ObjectId> {
let tree = heddle_repo
.store()
.get_tree(tree_hash)?
.ok_or_else(|| HeddleError::NotFound(format!("tree {}", tree_hash)))?;
let empty_tree = ObjectId::empty_tree(repo.object_format());
let mut editor = repo.edit_tree(&empty_tree).map_err(git_err)?;
for entry in tree.entries() {
let (kind, id) = if entry.is_tree() {
(
EntryKind::Tree,
export_tree(heddle_repo, repo, &entry.hash)?,
)
} else {
let stub = heddle_repo
.redaction_stub_for_blob(&entry.hash)
.map_err(|err| HeddleError::Config(format!("redaction lookup failed: {err}")))?;
if let Some(stub_text) = stub {
let kind = match entry.mode {
FileMode::Symlink => EntryKind::Symlink,
FileMode::Executable => EntryKind::BlobExecutable,
_ => EntryKind::Blob,
};
let oid = repo.write_blob(stub_text.as_bytes()).map_err(git_err)?;
(kind, oid)
} else {
let blob = heddle_repo
.store()
.get_blob(&entry.hash)?
.ok_or_else(|| HeddleError::NotFound(format!("blob {}", entry.hash)))?;
if entry.mode == FileMode::Normal
&& let Some(oid) = submodule_oid_from_blob(blob.content())
{
(EntryKind::Commit, oid)
} else {
let kind = match entry.mode {
FileMode::Normal => EntryKind::Blob,
FileMode::Executable => EntryKind::BlobExecutable,
FileMode::Symlink => EntryKind::Symlink,
};
let oid = repo.write_blob(blob.content()).map_err(git_err)?;
(kind, oid)
}
}
};
editor.upsert(entry.name.as_str(), kind, id);
}
repo.write_tree(editor).map_err(git_err)
}
pub fn export_all(bridge: &mut GitBridge) -> GitResult<ExportStats> {
bridge.with_mapping_rollback(|bridge| export_scoped(bridge, None))
}
pub fn export_current_thread(bridge: &mut GitBridge, thread: &str) -> GitResult<ExportStats> {
bridge.with_mapping_rollback(|bridge| export_scoped(bridge, Some(thread)))
}
fn export_scoped(bridge: &mut GitBridge, thread: Option<&str>) -> GitResult<ExportStats> {
bridge.init_mirror()?;
let states = match thread {
Some(thread) => {
let Some(state_id) = bridge
.heddle_repo
.refs()
.get_thread(&ThreadName::new(thread))?
else {
return Err(GitBridgeError::Git(format!(
"thread '{thread}' has no state to export"
)));
};
reachable_states(bridge.heddle_repo, &[state_id])?
}
None => bridge.heddle_repo.store().list_states()?,
};
let mut stats = ExportStats::default();
bridge.build_existing_mapping(None)?;
let identity = git_config_identity_with_global_fallback(bridge.heddle_repo.root())?;
let audience = AudienceTier::Public;
let sorted_states = bridge.sort_states_topologically(&states)?;
let reachable: HashSet<ChangeId> = sorted_states.iter().copied().collect();
let repo = bridge.open_git_repo()?;
bridge.mapping.retain_git_objects(&repo);
bridge.seed_git_checkpoint_mappings_from_checkout(&repo)?;
bridge.seed_ingest_identity_mappings_from_mirror(&repo)?;
let remote_names = git_remote_names(bridge.heddle_repo);
let threads: Vec<String> = {
let mut all: Vec<String> = bridge
.heddle_repo
.refs()
.list_threads()?
.into_iter()
.filter(|thread| !is_remote_tracking_thread_name(thread, &remote_names))
.map(|t| t.to_string())
.collect();
if let Some(t) = thread
&& !all.iter().any(|x| x == t)
{
all.push(t.to_string());
}
all
};
let markers: Vec<MarkerName> = bridge.heddle_repo.refs().list_markers()?;
let mut frontier_roots: Vec<ChangeId> = Vec::new();
for track_name in &threads {
if let Some(tip) = bridge
.heddle_repo
.refs()
.get_thread(&ThreadName::new(track_name))?
{
frontier_roots.push(tip);
}
}
for marker_name in &markers {
if let Some(state_id) = bridge.heddle_repo.refs().get_marker(marker_name)? {
frontier_roots.push(state_id);
}
}
let frontier_reachable = reachable_states(bridge.heddle_repo, &frontier_roots)?;
let pre_purge_targets: Vec<(ChangeId, ObjectId)> =
bridge.mapping.iter().map(|(c, o)| (*c, *o)).collect();
let purge_reachable: HashSet<ChangeId> = sorted_states
.iter()
.copied()
.chain(frontier_reachable.iter().copied())
.collect();
let purge_sorted =
bridge.sort_states_topologically(&purge_reachable.iter().copied().collect::<Vec<_>>())?;
purge_unserved_mappings(
bridge.heddle_repo,
&mut bridge.mapping,
&purge_sorted,
&purge_reachable,
&audience,
)?;
let mut newly_minted: HashSet<ObjectId> = HashSet::new();
for state_id in sorted_states {
if bridge.mapping.has_heddle(&state_id) {
if let Some(state) = bridge.heddle_repo.store().get_state(&state_id)?
&& has_git_fidelity(&state)
{
let mapped = bridge.mapping.get_git(&state_id);
if commit_is_byte_faithful(&state) {
let content = reconstruct_commit_bytes(
bridge.heddle_repo,
&repo,
&bridge.mapping,
&state,
)?;
let reconstructed = commit_object_id(&content);
if mapped.map(|m| m == reconstructed).unwrap_or(true) {
write_commit_object(&repo, &content)?;
}
}
}
continue;
}
let parent_withheld = bridge
.heddle_repo
.store()
.get_state(&state_id)?
.map(|state| {
state
.parents
.iter()
.any(|p| reachable.contains(p) && bridge.mapping.get_git(p).is_none())
})
.unwrap_or(false);
if parent_withheld {
continue;
}
let message_override = bridge
.commit_message_overrides
.get(&state_id)
.map(String::as_str);
let Some(git_oid) = export_state(
&mut bridge.mapping,
bridge.heddle_repo,
&repo,
&state_id,
identity.as_ref(),
message_override,
&audience,
)?
else {
continue;
};
bridge.mapping.insert(state_id, git_oid);
newly_minted.insert(git_oid);
if let Some(state) = bridge.heddle_repo.store().get_state(&state_id)? {
let note = git_notes::HeddleNote::from_state(&state);
git_notes::write_note(&repo, git_oid, ¬e)?;
}
}
let note_target_roots: Vec<ChangeId> = pre_purge_targets
.iter()
.map(|(c, _)| *c)
.chain(bridge.mapping.iter().map(|(c, _)| *c))
.collect();
let note_reachable_vec = reachable_states(bridge.heddle_repo, ¬e_target_roots)?;
let note_reachable: HashSet<ChangeId> = note_reachable_vec.iter().copied().collect();
let note_sorted = bridge.sort_states_topologically(¬e_reachable_vec)?;
let note_served =
served_change_ids(bridge.heddle_repo, ¬e_sorted, ¬e_reachable, &audience)?;
let note_targets: Vec<(ChangeId, ObjectId)> =
bridge.mapping.iter().map(|(c, o)| (*c, *o)).collect();
for (change_id, git_oid) in note_targets {
if note_served.contains(&change_id)
&& git_notes::read_note(&repo, git_oid)?.is_none()
&& let Some(state) = bridge.heddle_repo.store().get_state(&change_id)?
{
let note = git_notes::HeddleNote::from_state(&state);
git_notes::write_note(&repo, git_oid, ¬e)?;
}
}
let served_note_oids: HashSet<ObjectId> = pre_purge_targets
.iter()
.copied()
.chain(bridge.mapping.iter().map(|(c, o)| (*c, *o)))
.filter(|(c, _)| note_served.contains(c))
.map(|(_, oid)| oid)
.collect();
let notes_to_retract: HashSet<ObjectId> = pre_purge_targets
.iter()
.filter(|(c, _)| !note_served.contains(c))
.map(|(_, oid)| *oid)
.filter(|oid| !served_note_oids.contains(oid))
.collect();
git_notes::remove_notes(&repo, ¬es_to_retract)?;
let desired = project_desired_refs(bridge.heddle_repo, &bridge.mapping, &threads, &markers)?;
let frontier_served = {
let reachable_set: HashSet<ChangeId> = frontier_reachable.iter().copied().collect();
let sorted = bridge.sort_states_topologically(&frontier_reachable)?;
served_change_ids(bridge.heddle_repo, &sorted, &reachable_set, &audience)?
};
let served_oids: HashSet<ObjectId> = frontier_served
.iter()
.filter_map(|state| bridge.mapping.get_git(state))
.collect();
let mut managed_record = read_or_seed_mirror_managed_refs(&repo)?;
for track_name in &threads {
if bridge
.heddle_repo
.refs()
.get_thread(&ThreadName::new(track_name))?
.is_none()
{
continue;
}
let branch_ref = format!("refs/heads/{track_name}");
let in_scope = thread.is_none() || thread == Some(track_name.as_str());
let desired_oid = desired.get(&branch_ref).copied();
let existing_oid = branch_tip_oid(&repo, &branch_ref);
match reconcile_ref(
ReconcileNs::Head,
desired_oid,
existing_oid,
in_scope,
false,
&served_oids,
) {
ReconcileOp::Write => {
let git_oid = desired_oid.expect("Write implies a desired target");
sync_track_to_branch(&repo, track_name, git_oid)?;
managed_record.insert(branch_ref.clone(), git_oid);
stats.threads_synced += 1;
stats.branches.push(ExportedRef {
name: track_name.clone(),
tip: git_oid,
});
}
ReconcileOp::ForceRewind => {
let git_oid = desired_oid.expect("ForceRewind implies a desired target");
set_reference(
&repo,
&branch_ref,
git_oid,
RefPrecondition::Any,
"heddle: retract embargoed thread frontier",
)?;
managed_record.insert(branch_ref.clone(), git_oid);
stats.threads_synced += 1;
stats.branches.push(ExportedRef {
name: track_name.clone(),
tip: git_oid,
});
}
ReconcileOp::Delete => {
delete_reference_if_present(&repo, &branch_ref)?;
managed_record.remove(&branch_ref);
}
ReconcileOp::Skip | ReconcileOp::Preserve => {}
}
}
let mut tag_names: std::collections::BTreeSet<String> =
markers.iter().map(|m| m.to_string()).collect();
for full_name in managed_record.keys() {
if let Some(tag) = full_name.strip_prefix("refs/tags/") {
tag_names.insert(tag.to_string());
}
}
for name in &tag_names {
let tag_ref = format!("refs/tags/{name}");
let existing_raw_oid = direct_ref_oid(&repo, &tag_ref);
let existing_oid = existing_raw_oid.and_then(|oid| peel_to_commit_oid(&repo, oid));
let desired_oid = desired.get(&tag_ref).copied();
let in_scope = thread.is_none();
let marker_served_unminted = match bridge
.heddle_repo
.refs()
.get_marker(&MarkerName::new(name.as_str()))?
{
Some(state) => {
bridge.mapping.get_git(&state).is_none() && frontier_served.contains(&state)
}
None => false,
};
if let (Some(desired), Some(raw), Some(peeled)) =
(desired_oid, existing_raw_oid, existing_oid)
&& raw != desired
&& peeled == desired
{
managed_record.insert(tag_ref.clone(), raw);
stats.markers_synced += 1;
stats.tags.push(ExportedRef {
name: name.clone(),
tip: raw,
});
continue;
}
match reconcile_ref(
ReconcileNs::Tag,
desired_oid,
existing_oid,
in_scope,
marker_served_unminted,
&served_oids,
) {
ReconcileOp::Write => {
let git_oid = desired_oid.expect("Write implies a desired target");
sync_marker_to_tag(&repo, name, git_oid)?;
managed_record.insert(tag_ref.clone(), git_oid);
stats.markers_synced += 1;
stats.tags.push(ExportedRef {
name: name.clone(),
tip: git_oid,
});
}
ReconcileOp::Delete => {
delete_reference_if_present(&repo, &tag_ref)?;
managed_record.remove(&tag_ref);
}
ReconcileOp::Preserve | ReconcileOp::Skip | ReconcileOp::ForceRewind => {}
}
}
write_mirror_managed_refs(&repo, &managed_record)?;
let counts = count_exported_commits(&repo, &newly_minted)?;
stats.commits_total = counts.total;
stats.states_exported = counts.newly;
bridge.save_mapping_to_disk()?;
Ok(stats)
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum ReconcileNs {
Head,
Tag,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum ReconcileOp {
Skip,
Write,
ForceRewind,
Preserve,
Delete,
}
fn reconcile_ref(
ns: ReconcileNs,
desired_oid: Option<ObjectId>,
existing_oid: Option<ObjectId>,
in_scope: bool,
marker_served_unminted: bool,
served_oids: &HashSet<ObjectId>,
) -> ReconcileOp {
let existing_served = existing_oid
.map(|oid| served_oids.contains(&oid))
.unwrap_or(false);
match (desired_oid, existing_oid) {
(Some(_), None) if !in_scope => ReconcileOp::Skip,
(Some(_), None) => ReconcileOp::Write,
(Some(_), Some(_)) if ns == ReconcileNs::Head => {
if existing_served {
ReconcileOp::Write
} else {
ReconcileOp::ForceRewind
}
}
(Some(_), Some(_)) => ReconcileOp::Write,
(None, None) => ReconcileOp::Skip,
(None, Some(_)) if marker_served_unminted && existing_served => ReconcileOp::Preserve,
(None, Some(_)) => ReconcileOp::Delete,
}
}
fn git_remote_names(heddle_repo: &HeddleRepository) -> HashSet<String> {
let Ok(repo) = SleyRepository::discover(heddle_repo.root()) else {
return HashSet::new();
};
repo.remote_names()
.unwrap_or_default()
.into_iter()
.filter(|name| !name.trim().is_empty())
.collect()
}
fn is_remote_tracking_thread_name(thread: &str, remote_names: &HashSet<String>) -> bool {
let Some((remote, branch)) = thread.split_once('/') else {
return false;
};
!branch.is_empty() && remote_names.contains(remote)
}
fn purge_unserved_mappings(
heddle_repo: &HeddleRepository,
mapping: &mut SyncMapping,
sorted_states: &[ChangeId],
reachable: &HashSet<ChangeId>,
audience: &AudienceTier,
) -> GitResult<HashSet<ObjectId>> {
let served = served_change_ids(heddle_repo, sorted_states, reachable, audience)?;
let mut purged: HashSet<ObjectId> = HashSet::new();
for state_id in sorted_states {
if !served.contains(state_id)
&& let Some(oid) = mapping.remove(state_id)
{
purged.insert(oid);
}
}
Ok(purged)
}
fn served_change_ids(
heddle_repo: &HeddleRepository,
sorted_states: &[ChangeId],
reachable: &HashSet<ChangeId>,
audience: &AudienceTier,
) -> GitResult<HashSet<ChangeId>> {
let mut served: HashSet<ChangeId> = HashSet::new();
for state_id in sorted_states {
let tier = heddle_repo
.effective_visibility_tier(state_id)
.map_err(|e| {
GitBridgeError::Git(format!("resolve visibility for {state_id}: {e:#}"))
})?;
let parents_served = match heddle_repo.store().get_state(state_id)? {
Some(state) => state
.parents
.iter()
.all(|p| !reachable.contains(p) || served.contains(p)),
None => true,
};
if visible(&tier, audience) && parents_served {
served.insert(*state_id);
}
}
Ok(served)
}
fn branch_tip_oid(repo: &SleyRepository, ref_name: &str) -> Option<ObjectId> {
let oid = repo
.find_reference(ref_name)
.ok()
.flatten()?
.peeled_oid(repo)
.ok()
.flatten()?;
peel_to_commit_oid(repo, oid)
}
fn direct_ref_oid(repo: &SleyRepository, ref_name: &str) -> Option<ObjectId> {
match repo.find_reference(ref_name).ok()??.target {
ReferenceTarget::Direct(oid) => Some(oid),
ReferenceTarget::Symbolic(_) => None,
}
}
fn peel_to_commit_oid(repo: &SleyRepository, mut oid: ObjectId) -> Option<ObjectId> {
loop {
let object = repo.read_object(&oid).ok()?;
match object.object_type {
GitObjectType::Commit => return Some(oid),
GitObjectType::Tag => {
oid = repo.read_tag(&oid).ok()?.object;
}
_ => return None,
}
}
}
fn project_desired_refs(
heddle_repo: &HeddleRepository,
mapping: &SyncMapping,
threads: &[String],
markers: &[MarkerName],
) -> GitResult<std::collections::HashMap<String, ObjectId>> {
let mut desired = std::collections::HashMap::new();
for track_name in threads {
let Some(tip) = heddle_repo
.refs()
.get_thread(&ThreadName::new(track_name))?
else {
continue;
};
if let Some(git_oid) = frontier_git_oid(heddle_repo, mapping, tip)? {
desired.insert(format!("refs/heads/{track_name}"), git_oid);
}
}
for marker_name in markers {
let Some(state_id) = heddle_repo.refs().get_marker(marker_name)? else {
continue;
};
if let Some(git_oid) = mapping.get_git(&state_id) {
desired.insert(format!("refs/tags/{marker_name}"), git_oid);
}
}
Ok(desired)
}
fn frontier_git_oid(
heddle_repo: &HeddleRepository,
mapping: &SyncMapping,
tip: ChangeId,
) -> GitResult<Option<ObjectId>> {
let mut visited = HashSet::new();
let mut stack = vec![tip];
let mut frontier: Vec<ChangeId> = Vec::new();
while let Some(id) = stack.pop() {
if !visited.insert(id) {
continue;
}
if mapping.get_git(&id).is_some() {
frontier.push(id);
continue;
}
if let Some(state) = heddle_repo.store().get_state(&id)? {
stack.extend(state.parents.iter().copied());
}
}
let chosen = frontier.into_iter().min_by_key(|c| c.to_string_full());
Ok(chosen.and_then(|c| mapping.get_git(&c)))
}
fn reachable_states(
heddle_repo: &HeddleRepository,
roots: &[ChangeId],
) -> GitResult<Vec<ChangeId>> {
let mut stack = roots.to_vec();
let mut seen = HashSet::new();
let mut states = Vec::new();
while let Some(state_id) = stack.pop() {
if !seen.insert(state_id) {
continue;
}
states.push(state_id);
if let Some(state) = heddle_repo.store().get_state(&state_id)? {
stack.extend(state.parents.iter().copied());
}
}
Ok(states)
}
fn state_to_signature(state: &objects::object::State) -> Signature {
let seconds = state.created_at.timestamp();
let raw = format!(
"{} <{}> {} +0000",
state.attribution.principal.name, state.attribution.principal.email, seconds
)
.into_bytes();
Signature {
name: sley::plumbing::sley_core::ByteString::new(
state.attribution.principal.name.as_bytes().to_vec(),
),
email: sley::plumbing::sley_core::ByteString::new(
state.attribution.principal.email.as_bytes().to_vec(),
),
time: sley::GitTime::new(seconds, 0),
raw,
}
}
fn submodule_oid_from_blob(content: &[u8]) -> Option<ObjectId> {
let text = std::str::from_utf8(content).ok()?;
let text = text.trim();
let trimmed = text.strip_prefix(SUBMODULE_PREFIX)?.trim();
ObjectId::from_hex(ObjectFormat::Sha1, trimmed).ok()
}
#[cfg(test)]
mod tests {
use objects::object::{Attribution, ContentHash, Principal, State};
use super::*;
fn fidelity_state() -> State {
State::new(
ContentHash::from_bytes([7u8; 32]),
vec![],
Attribution::human(Principal::new("Alice", "alice@example.com")),
)
.with_raw_message("an imported commit\n")
}
#[test]
fn byte_faithful_when_fidelity_present_and_not_lossy() {
assert!(commit_is_byte_faithful(&fidelity_state()));
}
#[test]
fn lossy_marker_blocks_reconstruction() {
let lossy = fidelity_state().with_git_lossy(true);
assert!(
!commit_is_byte_faithful(&lossy),
"a state carrying the canonical git_lossy marker must NOT be \
reconstructed from state, regardless of import surface"
);
}
}