cli/bridge/git_export.rs
1// SPDX-License-Identifier: Apache-2.0
2//! Export Heddle states to Git commits functionality.
3
4use std::collections::HashSet;
5
6use tracing::debug;
7
8use objects::{
9 error::HeddleError,
10 object::{ChangeId, ContentHash, MarkerName, Principal, State, ThreadName, TreeEntryTarget},
11 store::ObjectStore,
12};
13use repo::{AudienceTier, Repository as HeddleRepository, visible};
14use sley::{
15 CommitObject, EntryKind, GitObjectType, ObjectId, RefPrecondition, ReferenceTarget,
16 Repository as SleyRepository, Signature, plumbing::sley_object::EncodedObject,
17};
18
19use crate::bridge::{
20 git_core::{
21 GitBridge, GitBridgeError, GitResult, LocalGitIdentity, SyncMapping,
22 copy_reachable_objects, count_exported_commits, delete_reference_if_present,
23 git_config_identity_with_global_fallback, git_err, principal_is_default_unknown,
24 read_or_seed_mirror_managed_refs, set_reference, write_mirror_managed_refs,
25 },
26 git_notes,
27 git_reconstruct::{commit_object_id, reconstruct_commit_bytes, write_commit_object},
28 git_sync::{sync_marker_to_tag, sync_track_to_branch},
29 git_util::{ExportStats, ExportedRef},
30};
31
32/// Whether `state` carries a captured original git commit to reconstruct
33/// byte-exactly (the #565 de-lossy fidelity fields). When true, export
34/// regenerates the commit object from state via [`reconstruct_commit_bytes`]
35/// with NO W2 footer and NO `"No intent specified"` placeholder — any injected
36/// byte would push the minted object off the original SHA (#567). When false
37/// (a native heddle commit, no original to preserve), export mints with the
38/// footer/placeholder as before.
39///
40/// `raw_message` is the load-bearing signal: the git importer always records it
41/// (even as an empty body for an empty-message commit) for an imported commit,
42/// and never for a native one.
43fn has_git_fidelity(state: &State) -> bool {
44 state.raw_message.is_some()
45}
46
47/// Whether `who`'s name/email round-trip byte-exactly through reconstruction.
48/// `Principal.name/email` are `String`, so the git importer replaced any non-UTF8
49/// identity byte with U+FFFD when it called `to_string()` on the raw actor bytes
50/// (the #565-deferred gap; `Principal` is still `String`, see #564). Those
51/// replaced bytes can't be regenerated, so reconstruction would hash off the
52/// original SHA. A literal U+FFFD that was itself valid UTF-8 in the original
53/// survives fine — so this can only FALSE-POSITIVE into the safe verbatim
54/// fallback, never a wrong-SHA mint.
55fn identity_is_byte_faithful(who: &Principal) -> bool {
56 !who.name.contains('\u{FFFD}') && !who.email.contains('\u{FFFD}')
57}
58
59/// Whether reconstructing `state`'s commit object from Heddle state alone is
60/// guaranteed byte-exact to the original commit — the precondition for the #567
61/// reconstruct-from-state path. False for the two #564 lossy gaps:
62/// 1. non-UTF8 author/committer identity bytes (see [`identity_is_byte_faithful`]);
63/// 2. lossy imports, where unrepresentable tree entries were dropped/converted
64/// so the rebuilt tree — hence commit — OID diverges.
65///
66/// (2) is read off ONE canonical signal — [`State::git_lossy`] — that lossy
67/// import population paths set, rather than enumerating import surfaces or
68/// relying on bridge mapping sidecar state. The state flag closes the whole
69/// class, including any future lossy entry point.
70///
71/// When false the caller MUST keep the verbatim mirror bytes / preserved mapped
72/// OID (or fall through to the native mint) rather than mint a wrong-SHA
73/// reconstructed object.
74///
75/// `pub(crate)` so the checkout write-through path (#568 P1,
76/// `git_core::write_thread_state_checkout_from_existing_mirror`) reads the SAME
77/// single faithful-or-lossy discriminator the export path does — reconstruct
78/// faithful commits from state, mirror-backstop the lossy residual. Keeping ONE
79/// chokepoint for the decision means a new consumer cannot drift to a different
80/// (wrong-SHA) rule.
81pub(crate) fn commit_is_byte_faithful(state: &State) -> bool {
82 has_git_fidelity(state)
83 && !state.git_lossy
84 && identity_is_byte_faithful(&state.attribution.principal)
85 && state
86 .committer
87 .as_ref()
88 .map(identity_is_byte_faithful)
89 .unwrap_or(true)
90}
91
92pub(crate) struct ExportStateOptions<'a> {
93 pub(crate) identity: Option<&'a LocalGitIdentity>,
94 pub(crate) message_override: Option<&'a str>,
95 pub(crate) parent_override: Option<&'a [ObjectId]>,
96 pub(crate) audience: &'a AudienceTier,
97}
98
99/// Export a single state to Git for `audience`.
100///
101/// Returns `Ok(None)` — **absence** — when the state's effective visibility
102/// tier is not visible to `audience`: the public mirror never mints a Git
103/// commit (no stub, no partial tree) for an embargoed state (spike §5.0/§5.3).
104/// The caller realizes downward-closure by also withholding any state whose
105/// parent was withheld, so an embargoed commit *and its descendants* stay
106/// absent from the mirror.
107pub(crate) fn export_state(
108 mapping: &mut SyncMapping,
109 heddle_repo: &HeddleRepository,
110 repo: &SleyRepository,
111 state_id: &ChangeId,
112 options: ExportStateOptions<'_>,
113) -> GitResult<Option<ObjectId>> {
114 let state = heddle_repo
115 .store()
116 .get_state(state_id)?
117 .ok_or(GitBridgeError::StateNotFound(*state_id))?;
118
119 // Audience-aware minting. The visibility decision lives here, at the state
120 // walk where the `ChangeId` is in scope — never in the blob-keyed
121 // `export_tree` (no `ChangeId`/audience).
122 let tier = heddle_repo
123 .effective_visibility_tier(state_id)
124 .map_err(|e| GitBridgeError::Git(format!("resolve visibility for {state_id}: {e:#}")))?;
125 if !visible(&tier, options.audience) {
126 return Ok(None);
127 }
128
129 // Fidelity mint (#567): the state carries a captured original git commit
130 // (#565 fields — `raw_message` is the load-bearing signal). MINT the commit
131 // object from that raw metadata via `reconstruct_commit_bytes` — NO footer,
132 // NO placeholder, NO message override — so the minted bytes preserve the
133 // original message/identities/headers rather than the native intent+footer.
134 // This is the path that lets the git mirror be dropped (#568): a correct
135 // export no longer depends on the mirror holding the verbatim imported bytes.
136 //
137 // Routing (#567 round 3): export keys off (is byte-faithful?) AND (does a
138 // bridge mapping exist?). The verbatim / mapped-OID fallback for a lossy
139 // commit applies ONLY when a bridge mapping holds a TRACKED original OID to
140 // preserve — and that branch lives in `export_scoped`'s already-mapped path.
141 // `export_state` is only ever reached for an UNMAPPED state (the caller's
142 // `has_heddle` guard), so there is NO original OID to match and NO verbatim
143 // mirror bytes to fall back to. Every unmapped fidelity state therefore MINTS
144 // from its own raw metadata — a `--lossy` one is NOT rejected into a
145 // nonexistent verbatim source (the r2 over-correction, #567 round 3):
146 // * byte-faithful (a clean ingest-backed import, native heddle commit with
147 // fidelity, ...) -> the derived OID coincides with the original commit SHA;
148 // * lossy / non-UTF8 (ingest-backed import with lossy tree conversion) -> a
149 // DERIVED OID that still preserves raw_message/identities/headers. With
150 // no original to match this is correct, not the wrong-SHA bug the r2
151 // `git_lossy` guard (rightly) blocks ONLY for a MAPPED commit.
152 if has_git_fidelity(&state) {
153 let content = reconstruct_commit_bytes(heddle_repo, repo, mapping, &state)?;
154 return Ok(Some(write_commit_object(repo, &content)?));
155 }
156
157 // Native heddle commit: no original to preserve. Mint a raw commit object
158 // and inject the durable W2 footer (and the "No intent specified"
159 // placeholder for an empty intent) — these ride ONLY native commits.
160 let git_tree_oid = export_tree(heddle_repo, repo, &state.tree)?;
161 // R6: emit the W2 footer on every exported commit. The footer is
162 // durable across remotes; per-scope breakdowns ride on the opt-in
163 // git note. For first-pass we audit nothing about the state's
164 // annotation set (the audience defaults to "public"); a follow-up
165 // landed with `bridge git export --audience` threads the count
166 // through here. See `git_util::build_commit_message_with_footer`.
167 let hosted_url = heddle_repo
168 .config()
169 .hosted
170 .upstream_url
171 .as_deref()
172 .filter(|s| !s.is_empty());
173 let message = match options.message_override {
174 Some(message) => GitBridge::build_commit_message_with_footer_with_body(
175 &state, message, hosted_url, /*omitted=*/ 0,
176 ),
177 None => {
178 GitBridge::build_commit_message_with_footer(&state, hosted_url, /*omitted=*/ 0)
179 }
180 };
181 let parent_oids: Vec<ObjectId> = if let Some(parents) = options.parent_override {
182 parents.to_vec()
183 } else {
184 state
185 .parents
186 .iter()
187 .map(|parent_id| {
188 mapping
189 .get_git(parent_id)
190 .ok_or(GitBridgeError::StateNotFound(*parent_id))
191 })
192 .collect::<GitResult<Vec<_>>>()?
193 };
194
195 let sig = if principal_is_default_unknown(&state.attribution.principal) {
196 let Some(identity) = options.identity else {
197 return Err(GitBridgeError::Git(
198 "refusing to write a Git commit with Unknown <unknown@example.com>; configure user.name/user.email, HEDDLE_PRINCIPAL_NAME/HEDDLE_PRINCIPAL_EMAIL, or .heddle principal".to_string(),
199 ));
200 };
201 identity.to_signature(state.created_at.timestamp())
202 } else {
203 state_to_signature(&state)
204 };
205 let commit = CommitObject {
206 tree: git_tree_oid,
207 parents: parent_oids,
208 author: sig.to_ident_bytes(),
209 committer: sig.to_ident_bytes(),
210 encoding: None,
211 message: message.into_bytes(),
212 };
213 Ok(Some(
214 repo.write_object(EncodedObject::new(GitObjectType::Commit, commit.write()))
215 .map_err(git_err)?,
216 ))
217}
218
219/// Export a Heddle tree to Git.
220pub fn export_tree(
221 heddle_repo: &HeddleRepository,
222 repo: &SleyRepository,
223 tree_hash: &ContentHash,
224) -> GitResult<ObjectId> {
225 let tree = heddle_repo
226 .store()
227 .get_tree(tree_hash)?
228 .ok_or_else(|| HeddleError::NotFound(format!("tree {}", tree_hash)))?;
229
230 let empty_tree = ObjectId::empty_tree(repo.object_format());
231 let mut editor = repo.edit_tree(&empty_tree).map_err(git_err)?;
232
233 for entry in tree.entries() {
234 let write_blob_entry = |hash: &ContentHash| -> GitResult<ObjectId> {
235 // Redaction safety: if the blob carries an active redaction
236 // record, export the stub instead of the bytes. This is the
237 // single chokepoint between Heddle-side redactions and any
238 // downstream Git remote (GitHub, internal mirrors, ...).
239 // Bytes that escape via the bridge are bytes that escape,
240 // full stop — we cannot retroactively scrub them from
241 // outside repos. The check sits *here*, not in
242 // `materialize_blob`, because export reads `blob.content()`
243 // directly (we never touch the materialize path) and writes
244 // the raw bytes through `repo.write_blob`.
245 let stub = heddle_repo
246 .redaction_stub_for_blob(hash)
247 .map_err(|err| HeddleError::Config(format!("redaction lookup failed: {err}")))?;
248
249 if let Some(stub_text) = stub {
250 // Stubs are text-only and ASCII safe across newline/BOM quirks.
251 return repo.write_blob(stub_text.as_bytes()).map_err(git_err);
252 }
253
254 let blob = heddle_repo
255 .store()
256 .get_blob(hash)?
257 .ok_or_else(|| HeddleError::NotFound(format!("blob {}", hash)))?;
258 repo.write_blob(blob.content()).map_err(git_err)
259 };
260 let (kind, id) = match entry.target() {
261 TreeEntryTarget::Tree { hash } => {
262 (EntryKind::Tree, export_tree(heddle_repo, repo, hash)?)
263 }
264 TreeEntryTarget::Blob { hash, executable } => {
265 let kind = if *executable {
266 EntryKind::BlobExecutable
267 } else {
268 EntryKind::Blob
269 };
270 (kind, write_blob_entry(hash)?)
271 }
272 TreeEntryTarget::Symlink { hash } => (EntryKind::Symlink, write_blob_entry(hash)?),
273 TreeEntryTarget::Gitlink { target } => (EntryKind::Commit, *target),
274 // A native child-spool edge points at a spool-id + state-id, NOT a
275 // git commit OID, so it has no valid git submodule (mode 160000)
276 // representation. Emitting a `Commit` entry here would fabricate a
277 // bogus submodule pointer, so we deliberately SKIP the entry on
278 // git-export. (Git-import never produces spoollinks — only native
279 // spool operations do — so nothing round-trips back through here.)
280 TreeEntryTarget::Spoollink { spool_id, state_id } => {
281 debug!(
282 name = entry.name(),
283 %spool_id,
284 %state_id,
285 "skipping SPOOLLINK entry on git-export: no valid git submodule representation"
286 );
287 continue;
288 }
289 };
290
291 editor.upsert(entry.name(), kind, id);
292 }
293
294 repo.write_tree(editor).map_err(git_err)
295}
296
297/// Export all Heddle states to Git commits.
298pub fn export_all(bridge: &mut GitBridge) -> GitResult<ExportStats> {
299 bridge.with_mapping_rollback(|bridge| export_scoped(bridge, None))
300}
301
302/// Export one Heddle thread to its matching Git branch.
303pub fn export_current_thread(bridge: &mut GitBridge, thread: &str) -> GitResult<ExportStats> {
304 bridge.with_mapping_rollback(|bridge| export_scoped(bridge, Some(thread)))
305}
306
307fn export_scoped(bridge: &mut GitBridge, thread: Option<&str>) -> GitResult<ExportStats> {
308 bridge.init_mirror()?;
309
310 let states = match thread {
311 Some(thread) => {
312 let Some(state_id) = bridge
313 .heddle_repo
314 .refs()
315 .get_thread(&ThreadName::new(thread))?
316 else {
317 return Err(GitBridgeError::Git(format!(
318 "thread '{thread}' has no state to export"
319 )));
320 };
321 reachable_states(bridge.heddle_repo, &[state_id])?
322 }
323 None => bridge.heddle_repo.store().list_states()?,
324 };
325 let mut stats = ExportStats::default();
326
327 bridge.build_existing_mapping(None)?;
328 let identity = git_config_identity_with_global_fallback(bridge.heddle_repo.root())?;
329
330 // The Git bridge publishes the PUBLIC mirror — the export audience is
331 // always `Public`. Per-commit visibility is enforced here, in the OSS
332 // bridge, by emitting absence (the authoritative wire serve gate is weft's
333 // job, spike §10 #4).
334 let audience = AudienceTier::Public;
335
336 let sorted_states = bridge.sort_states_topologically(&states)?;
337 // Reachable set, used to tell a withheld parent (absent from the mapping
338 // but present in this export) apart from a genuinely-missing shallow
339 // boundary (absent from both).
340 let reachable: HashSet<ChangeId> = sorted_states.iter().copied().collect();
341 let repo = bridge.open_git_repo()?;
342 bridge.mapping.retain_git_objects(&repo);
343 bridge.seed_git_checkpoint_mappings_from_checkout(&repo)?;
344 bridge.seed_ingest_identity_mappings_from_mirror(&repo)?;
345
346 // The desired/actual ref sets span the WHOLE mirror, not just this export's
347 // scoped thread: a prior all-thread export can leave `refs/heads`/`refs/tags`
348 // for OTHER threads/markers whose commits — or their ancestors — were later
349 // marked Private. Reconciling only the scoped thread would keep serving those
350 // now-embargoed commits via the other thread's branch (heddle#316 cross-thread
351 // embargo leak). So purge + project + reconcile over every heddle-managed
352 // thread/marker regardless of scope; the mint loop below stays scoped (only the
353 // requested thread's new commits are minted), so widening changes WHICH refs
354 // are reconciled, never what gets created.
355 let remote_names = git_remote_names(bridge.heddle_repo);
356 let threads: Vec<String> = {
357 let mut all: Vec<String> = bridge
358 .heddle_repo
359 .refs()
360 .list_threads()?
361 .into_iter()
362 .filter(|thread| !is_remote_tracking_thread_name(thread, &remote_names))
363 .map(|t| t.to_string())
364 .collect();
365 // A scoped export's own thread may be a remote-tracking name the filter
366 // drops; keep it so the requested thread is always reconciled.
367 if let Some(t) = thread
368 && !all.iter().any(|x| x == t)
369 {
370 all.push(t.to_string());
371 }
372 all
373 };
374 let markers: Vec<MarkerName> = bridge.heddle_repo.refs().list_markers()?;
375
376 // Roots of the whole-mirror served frontier: every reconciled thread's tip and
377 // every marker's state. Purging over their reachable closure (below) drops any
378 // out-of-scope commit whose tier — or an ancestor's — is now unserved, so
379 // `project_desired_refs` lags those branches/tags correctly even on a scoped
380 // export (heddle#316).
381 let mut frontier_roots: Vec<ChangeId> = Vec::new();
382 for track_name in &threads {
383 if let Some(tip) = bridge
384 .heddle_repo
385 .refs()
386 .get_thread(&ThreadName::new(track_name))?
387 {
388 frontier_roots.push(tip);
389 }
390 }
391 for marker_name in &markers {
392 if let Some(state_id) = bridge.heddle_repo.refs().get_marker(marker_name)? {
393 frontier_roots.push(state_id);
394 }
395 }
396 let frontier_reachable = reachable_states(bridge.heddle_repo, &frontier_roots)?;
397
398 // Re-validate the served set against CURRENT visibility before anything treats
399 // a mapping as "already served". A state minted while public in a prior export
400 // can be marked under-tier later; `build_existing_mapping` rebuilds its stale
401 // ChangeId→OID mapping from the notes/sidecar every run, so without this purge
402 // the frontier walk, the note re-write, and the tag sync would all keep serving
403 // the now-embargoed commit. Purging is downward-closed: a still-visible state
404 // whose ancestor is embargoed is withheld too (its Git commit chains to the
405 // embargoed one). The purge spans the mint set UNION the whole-mirror frontier,
406 // so a scoped export still drops an out-of-scope thread's now-embargoed tip; for
407 // an all-thread export the frontier ⊆ the mint set and this reduces to the prior
408 // behavior. After this, `mapping` == the served set across every reconciled ref,
409 // exactly what `frontier_git_oid` assumes.
410 // Snapshot EVERY mapped target before the purge mutates the mapping: these are
411 // exactly the commits that may already carry a `refs/notes/*` entry in the
412 // mirror, so the notes-ref retraction below must consider all of them —
413 // including the states the purge is about to drop AND any orphaned mapping a
414 // deleted thread left behind, which no current-ref frontier reaches (heddle#316).
415 let pre_purge_targets: Vec<(ChangeId, ObjectId)> =
416 bridge.mapping.iter().map(|(c, o)| (*c, *o)).collect();
417
418 let purge_reachable: HashSet<ChangeId> = sorted_states
419 .iter()
420 .copied()
421 .chain(frontier_reachable.iter().copied())
422 .collect();
423 let purge_sorted =
424 bridge.sort_states_topologically(&purge_reachable.iter().copied().collect::<Vec<_>>())?;
425 // The purge MUTATES the mapping down to the served set. Its returned drop-set
426 // (the OIDs THIS run withheld) is deliberately NOT used to classify EXISTING
427 // mirror tips: a scoped run's purge omits a tip embargoed in a PRIOR run, or
428 // out of this run's purge reach, so classifying by it misreads such a tip as
429 // served and keeps serving it. Existing-tip served classification (heads + tags
430 // below) uses the whole-mirror served-OID set (`served_oids`) instead
431 // (heddle#316).
432 purge_unserved_mappings(
433 bridge.heddle_repo,
434 &mut bridge.mapping,
435 &purge_sorted,
436 &purge_reachable,
437 &audience,
438 )?;
439
440 // Git OIDs minted during this run. Used below to partition the copied
441 // ref set into newly-written vs already-mapped — so the "newly" count
442 // is a subset of the same walk that produces the total, never a
443 // parallel tally over `list_states()` that could include an orphan
444 // state reachable from no copied ref.
445 let mut newly_minted: HashSet<ObjectId> = HashSet::new();
446
447 for state_id in sorted_states {
448 // Already mapped to a git object — the common case for git-imported
449 // states (the import populated the ChangeId→OID mapping) and for
450 // native commits a prior export already minted. Not re-counted as
451 // "newly minted" (the total is decided below by ref-reachability).
452 if bridge.mapping.has_heddle(&state_id) {
453 // For an IMPORTED commit (#565 fidelity fields present),
454 // REGENERATE the object from state into the mirror rather than
455 // leaning on the verbatim imported bytes still being there (#567).
456 // Byte-identical, so the OID is unchanged and the write is
457 // idempotent today; what changes is that a correct export no
458 // longer DEPENDS on the mirror's verbatim copy — the step that
459 // lets the mirror be dropped (#568). Native already-mapped commits
460 // have no original to reconstruct (raw_message is None), so they
461 // are left to their prior mint; re-minting those is out of scope.
462 if let Some(state) = bridge.heddle_repo.store().get_state(&state_id)?
463 && has_git_fidelity(&state)
464 {
465 let mapped = bridge.mapping.get_git(&state_id);
466 // Incremental-export fast path (perf, latent O(history) fix): the
467 // regenerate-from-state step below is purely a #567 idempotent
468 // re-write — it rebuilds the commit object from state and writes it
469 // so a correct export no longer DEPENDS on the mirror's verbatim
470 // bytes. But when the mapped commit object is ALREADY in the mirror,
471 // that re-write is a no-op (sley hashes-then-skips), preceded by
472 // `reconstruct_commit_bytes`'s FULL recursive tree re-walk + re-hash.
473 // On a deep imported history every already-mapped commit hits this
474 // branch on every export, so that re-walk is paid once per historical
475 // commit per export — O(total history). Skipping when the object is
476 // present makes it O(commits whose object is missing), with
477 // byte-identical output: the served object, its OID, and the mapping
478 // are all unchanged. The reconstruct still runs (and the safety net
479 // still guards the write) for any mapped commit whose object is NOT
480 // yet in the mirror — the case #567/#568 actually need it for.
481 if mapped.is_some_and(|oid| repo.read_object(&oid).is_ok()) {
482 continue;
483 }
484 // mirror still required for non-byte-faithful commits (non-UTF8
485 // identities, --lossy); #568 mirror elimination must account for
486 // these, and full de-lossy needs byte-preserving identities (#564
487 // follow-up).
488 // Fidelity guard (#567): regenerate from state ONLY when the
489 // state is fully byte-faithful to the original import. A
490 // non-byte-faithful commit (non-UTF8 identity, or a `--lossy`
491 // import — both import-lossy and ingest-lossy carry the canonical
492 // `git_lossy` flag) would reconstruct to a WRONG SHA, so leave it
493 // on the preserved mapped OID — the verbatim mirror bytes stay the
494 // served object (the pre-#567 behavior for that commit).
495 if commit_is_byte_faithful(&state) {
496 let content = reconstruct_commit_bytes(
497 bridge.heddle_repo,
498 &repo,
499 &bridge.mapping,
500 &state,
501 )?;
502 // Safety net: the regenerated object MUST hash to the mapped
503 // OID. A mismatch means reconstruction diverged from the
504 // imported bytes (an undetected fidelity gap), so fall back to
505 // the verbatim mirror / mapped OID rather than write a
506 // wrong-SHA object.
507 let reconstructed = commit_object_id(&content);
508 if mapped.map(|m| m == reconstructed).unwrap_or(true) {
509 write_commit_object(&repo, &content)?;
510 }
511 }
512 }
513 continue;
514 }
515
516 // Downward-closure (spike §5.0): withhold a state whose parent was
517 // itself withheld for this audience. Processed in topo order, so a
518 // parent's mapped-ness is already decided. A parent absent from the
519 // mapping but present in `reachable` was withheld → withhold this
520 // child too (and, transitively, all its descendants). A parent absent
521 // from both is a shallow boundary (public-by-absence) — let the mint
522 // proceed exactly as before.
523 let parent_withheld = bridge
524 .heddle_repo
525 .store()
526 .get_state(&state_id)?
527 .map(|state| {
528 state
529 .parents
530 .iter()
531 .any(|p| reachable.contains(p) && bridge.mapping.get_git(p).is_none())
532 })
533 .unwrap_or(false);
534 if parent_withheld {
535 continue;
536 }
537
538 let message_override = bridge
539 .commit_message_overrides
540 .get(&state_id)
541 .map(String::as_str);
542 let parent_override = bridge
543 .commit_parent_overrides
544 .get(&state_id)
545 .map(Vec::as_slice);
546 if let Some(parents) = parent_override
547 && !parents.is_empty()
548 {
549 let checkout_repo =
550 SleyRepository::discover(bridge.heddle_repo.root()).map_err(git_err)?;
551 copy_reachable_objects(&checkout_repo, &repo, parents.iter().copied())?;
552 }
553 let Some(git_oid) = export_state(
554 &mut bridge.mapping,
555 bridge.heddle_repo,
556 &repo,
557 &state_id,
558 ExportStateOptions {
559 identity: identity.as_ref(),
560 message_override,
561 parent_override,
562 audience: &audience,
563 },
564 )?
565 else {
566 // Embargoed for this audience — emit absence (no commit minted).
567 continue;
568 };
569 bridge.mapping.insert(state_id, git_oid);
570 newly_minted.insert(git_oid);
571
572 // Attach a heddle note to the freshly-created commit so the
573 // change_id survives a fresh `git clone` of the destination
574 // (when only the git side travels, without our sidecar).
575 if let Some(state) = bridge.heddle_repo.store().get_state(&state_id)? {
576 let note = git_notes::HeddleNote::from_state(&state);
577 git_notes::write_note(&repo, git_oid, ¬e)?;
578 }
579 }
580
581 // The downward-closure served set across EVERY note target — the pre-purge
582 // mapping (commits that may already carry a note in the mirror) UNION the
583 // current post-mint mapping (served states + freshly minted commits),
584 // computed over the FULL ancestry of all of them. The branch purge is
585 // ref-rooted (it walks the whole-mirror frontier of current thread tips +
586 // markers), so it never examines an ORPHANED mapping a deleted thread left
587 // behind; without this closure such a commit's note — public-tier but with a
588 // now-Private ancestor — would slip past both the backfill gate and the
589 // retraction below. This is the SAME served rule the branch frontier uses,
590 // applied to notes (heddle#316). For an all-states export it reduces to the
591 // post-purge served set, so behavior there is unchanged.
592 let note_target_roots: Vec<ChangeId> = pre_purge_targets
593 .iter()
594 .map(|(c, _)| *c)
595 .chain(bridge.mapping.iter().map(|(c, _)| *c))
596 .collect();
597 let note_reachable_vec = reachable_states(bridge.heddle_repo, ¬e_target_roots)?;
598 let note_reachable: HashSet<ChangeId> = note_reachable_vec.iter().copied().collect();
599 let note_sorted = bridge.sort_states_topologically(¬e_reachable_vec)?;
600 let note_served =
601 served_change_ids(bridge.heddle_repo, ¬e_sorted, ¬e_reachable, &audience)?;
602
603 // For states whose git_oid was already in the mapping (the SHA-stable
604 // path above), make sure the note is present too. This covers two
605 // cases: (a) the state was imported from a non-heddle git source and
606 // never had a note, and (b) the note was deleted from the mirror.
607 let note_targets: Vec<(ChangeId, ObjectId)> =
608 bridge.mapping.iter().map(|(c, o)| (*c, *o)).collect();
609 for (change_id, git_oid) in note_targets {
610 // Gate the backfill on the downward-closure served set, not the commit's
611 // DIRECT tier. The mapping can carry orphaned entries (a deleted thread's
612 // commits) the ref-rooted purge never examined; gating on direct
613 // visibility alone would re-publish a note for a public commit whose
614 // ancestor became Private — a commit the branch downward-closure
615 // withholds. `note_served` is the same served notion the branch frontier
616 // uses, so no note-write site can emit metadata for an unserved commit
617 // (heddle#316).
618 if note_served.contains(&change_id)
619 && git_notes::read_note(&repo, git_oid)?.is_none()
620 && let Some(state) = bridge.heddle_repo.store().get_state(&change_id)?
621 {
622 let note = git_notes::HeddleNote::from_state(&state);
623 git_notes::write_note(&repo, git_oid, ¬e)?;
624 }
625 }
626
627 // Retract the notes for every mapped target that is NOT served under the
628 // downward-closure rule. The mirror copies `refs/notes/*`
629 // (`collect_ref_updates`) alongside branches and tags, so a note left for an
630 // unserved commit keeps leaking its metadata even after its branch/tag were
631 // retracted. This is the notes-ref sibling of the branch/tag retraction
632 // above (heddle#316). Considering EVERY pre-purge target — not just the
633 // `embargoed_oids` the ref-rooted purge dropped — catches an orphaned note an
634 // ancestor embargo stranded on a deleted thread's commit. Guard the
635 // degenerate case where a still-served state maps to the same git OID by
636 // keeping any OID a served target maps to.
637 let served_note_oids: HashSet<ObjectId> = pre_purge_targets
638 .iter()
639 .copied()
640 .chain(bridge.mapping.iter().map(|(c, o)| (*c, *o)))
641 .filter(|(c, _)| note_served.contains(c))
642 .map(|(_, oid)| oid)
643 .collect();
644 let notes_to_retract: HashSet<ObjectId> = pre_purge_targets
645 .iter()
646 .filter(|(c, _)| !note_served.contains(c))
647 .map(|(_, oid)| *oid)
648 .filter(|oid| !served_note_oids.contains(oid))
649 .collect();
650 git_notes::remove_notes(&repo, ¬es_to_retract)?;
651
652 // THE PROJECTION (heddle#316 r13): the desired heddle-owned ref-set for this
653 // audience — heads lagged to the served frontier, tags at served markers — as
654 // a pure function of the post-purge served `mapping` + audience + ownership.
655 // Every mirror ref op below (set / forced embargo retract / delete) is DERIVED
656 // from this ONE map, so a ref surface can never drift out of one enforcement
657 // pass while another keeps serving it. The mirror MATERIALIZES this desired
658 // set; downstream `plan_destination_reconcile` then reconciles each
659 // destination against it — one projection, one reconcile, all destinations.
660 let desired = project_desired_refs(bridge.heddle_repo, &bridge.mapping, &threads, &markers)?;
661
662 // The downward-closure served set over the WHOLE-MIRROR frontier — the SAME
663 // closure the purge ran over (every thread tip + every marker state). A state is
664 // served iff visible to this audience AND every reachable ancestor is served.
665 // Drives BOTH the served-OID set just below AND (further down) the tag
666 // classifier's served-but-unminted axis.
667 let frontier_served = {
668 let reachable_set: HashSet<ChangeId> = frontier_reachable.iter().copied().collect();
669 let sorted = bridge.sort_states_topologically(&frontier_reachable)?;
670 served_change_ids(bridge.heddle_repo, &sorted, &reachable_set, &audience)?
671 };
672
673 // The whole-mirror SERVED-OID set: the git OID of every served frontier state.
674 // An EXISTING mirror tip (head or tag) is "served" iff it is one of these — an
675 // actually-served commit RIGHT NOW — independent of whether THIS run's purge
676 // happened to drop it. `frontier_served` is downward-closed at the ChangeId
677 // level (served ⟹ every reachable ancestor served) and every minted commit's
678 // parents are themselves mapped, so the mapped OIDs of `frontier_served` already
679 // form the downward-closed git-ancestry set — no separate git walk is needed
680 // (heddle#316). Replaces the prior `embargoed_oids` (this-run-only purge
681 // drop-set) classification that leaked a prior-run / out-of-scope embargo.
682 let served_oids: HashSet<ObjectId> = frontier_served
683 .iter()
684 .filter_map(|state| bridge.mapping.get_git(state))
685 .collect();
686
687 // The mirror's NAME-KEYED ownership record (heddle#316): a mirror ref is
688 // MANAGED iff heddle recorded WRITING it under that full name — NEVER by OID
689 // membership (the r20c bug that classified a foreign ref at a heddle OID as
690 // heddle's). The mirror analog of the destination's `heddle-exported-refs`
691 // record. Read BEFORE the head/tag loops mutate any ref so a genuine first run
692 // (absent record) seeds from the prior-run ref set rather than misreading every
693 // pre-existing ref as foreign — which would silently stop embargo retraction.
694 let mut managed_record = read_or_seed_mirror_managed_refs(&repo)?;
695
696 // Reconcile the mirror's HEADS via the shared `reconcile_ref` decision. Iterate
697 // the CURRENT threads: a dropped thread's stale branch is intentionally NOT
698 // pruned (the #289 dropped-thread contract) — it is never iterated, survives in
699 // the mirror, and stays in the managed record so the push still copies it. The
700 // desired head target is the maximal served ancestor-or-self of the thread tip
701 // (`frontier_git_oid`, via `project_desired_refs`). The existing tip is
702 // classified against the whole-mirror served-OID set, so a still-served tip
703 // fast-forwards, an embargoed tip force-rewinds to its served ancestor, and a
704 // whole-line-embargoed head is deleted. A scoped export reconciles every current
705 // thread but MATERIALIZES (creates) only the one it was scoped to.
706 for track_name in &threads {
707 if bridge
708 .heddle_repo
709 .refs()
710 .get_thread(&ThreadName::new(track_name))?
711 .is_none()
712 {
713 // A listed thread name with no tip is neither synced nor pruned.
714 continue;
715 }
716 let branch_ref = format!("refs/heads/{track_name}");
717 let in_scope = thread.is_none() || thread == Some(track_name.as_str());
718 let desired_oid = desired.get(&branch_ref).copied();
719 let existing_oid = branch_tip_oid(&repo, &branch_ref);
720 match reconcile_ref(
721 ReconcileNs::Head,
722 desired_oid,
723 existing_oid,
724 in_scope,
725 /* marker_served_unminted */ false,
726 &served_oids,
727 ) {
728 ReconcileOp::Write => {
729 let git_oid = desired_oid.expect("Write implies a desired target");
730 sync_track_to_branch(&repo, track_name, git_oid)?;
731 managed_record.insert(branch_ref.clone(), git_oid);
732 stats.threads_synced += 1;
733 stats.branches.push(ExportedRef {
734 name: track_name.clone(),
735 tip: git_oid,
736 });
737 }
738 ReconcileOp::ForceRewind => {
739 let git_oid = desired_oid.expect("ForceRewind implies a desired target");
740 set_reference(
741 &repo,
742 &branch_ref,
743 git_oid,
744 RefPrecondition::Any,
745 "heddle: retract embargoed thread frontier",
746 )?;
747 managed_record.insert(branch_ref.clone(), git_oid);
748 stats.threads_synced += 1;
749 stats.branches.push(ExportedRef {
750 name: track_name.clone(),
751 tip: git_oid,
752 });
753 }
754 ReconcileOp::Delete => {
755 delete_reference_if_present(&repo, &branch_ref)?;
756 managed_record.remove(&branch_ref);
757 }
758 // A head has no preserve path — `frontier_git_oid` recomputes the
759 // target every run, so a head is always rewound/deleted, never kept at
760 // a stale tip (Preserve is unreachable for `ReconcileNs::Head`).
761 ReconcileOp::Skip | ReconcileOp::Preserve => {}
762 }
763 }
764
765 // Reconcile the mirror's TAGS via the SAME `reconcile_ref` decision as heads.
766 // Iterate the UNION of current markers AND the managed-record tag names: a
767 // DELETED marker drops out of `markers`, so its stale managed mirror tag is
768 // reachable only via the managed-record side (heddle#316 S3 — a deleted marker
769 // must delete its tag). A FOREIGN tag heddle never wrote is in NEITHER set, so
770 // it is never visited: it survives untouched and stays out of the push frontier
771 // (`collect_managed_ref_updates`). The desired tag target comes from the
772 // projection (a marker minted this run); the served-but-unminted vs embargoed
773 // split (r18 PRESERVE vs r19 DELETE) is the existing tag's served-ness combined
774 // with `marker_served_unminted`.
775 let mut tag_names: std::collections::BTreeSet<String> =
776 markers.iter().map(|m| m.to_string()).collect();
777 for full_name in managed_record.keys() {
778 if let Some(tag) = full_name.strip_prefix("refs/tags/") {
779 tag_names.insert(tag.to_string());
780 }
781 }
782
783 for name in &tag_names {
784 let tag_ref = format!("refs/tags/{name}");
785 let existing_raw_oid = direct_ref_oid(&repo, &tag_ref);
786 let existing_oid = existing_raw_oid.and_then(|oid| peel_to_commit_oid(&repo, oid));
787 let desired_oid = desired.get(&tag_ref).copied();
788 let in_scope = thread.is_none();
789 // A live marker whose served target was NOT minted into the mapping this
790 // run (a scoped export that didn't reach it). The desired projection omits
791 // such a tag (it only publishes minted markers), so the reconcile sees
792 // `desired_oid == None`; this flag plus the existing tag's served-ness is
793 // the sole axis splitting r18-PRESERVE from r19-DELETE.
794 let marker_served_unminted = match bridge
795 .heddle_repo
796 .refs()
797 .get_marker(&MarkerName::new(name.as_str()))?
798 {
799 Some(state) => {
800 bridge.mapping.get_git(&state).is_none() && frontier_served.contains(&state)
801 }
802 None => false,
803 };
804 if let (Some(desired), Some(raw), Some(peeled)) =
805 (desired_oid, existing_raw_oid, existing_oid)
806 && raw != desired
807 && peeled == desired
808 {
809 managed_record.insert(tag_ref.clone(), raw);
810 stats.markers_synced += 1;
811 stats.tags.push(ExportedRef {
812 name: name.clone(),
813 tip: raw,
814 });
815 continue;
816 }
817 match reconcile_ref(
818 ReconcileNs::Tag,
819 desired_oid,
820 existing_oid,
821 in_scope,
822 marker_served_unminted,
823 &served_oids,
824 ) {
825 ReconcileOp::Write => {
826 let git_oid = desired_oid.expect("Write implies a desired target");
827 sync_marker_to_tag(&repo, name, git_oid)?;
828 managed_record.insert(tag_ref.clone(), git_oid);
829 stats.markers_synced += 1;
830 stats.tags.push(ExportedRef {
831 name: name.clone(),
832 tip: git_oid,
833 });
834 }
835 ReconcileOp::Delete => {
836 delete_reference_if_present(&repo, &tag_ref)?;
837 managed_record.remove(&tag_ref);
838 }
839 // PRESERVE keeps the existing served tag (still managed → stays in the
840 // record); SKIP is a no-op. A tag is free-move and never force-rewinds
841 // (ForceRewind is unreachable for `ReconcileNs::Tag`).
842 ReconcileOp::Preserve | ReconcileOp::Skip | ReconcileOp::ForceRewind => {}
843 }
844 }
845
846 // Persist the updated ownership record so the next reconcile — and the push
847 // frontier (`collect_managed_ref_updates`) — read heddle's managed set by name.
848 write_mirror_managed_refs(&repo, &managed_record)?;
849
850 // Every count in the summary is a partition of the SINGLE copied ref
851 // set: `total` is unique commits reachable from the mirror's branch/tag
852 // tips (the exact ref set `copy_mirror_to_path` writes via
853 // `collect_ref_updates`), and `states_exported` ("newly") is the subset
854 // of THAT walk minted this run. Deriving both from one walk — rather
855 // than tallying `states_exported` inline over `list_states()` — makes
856 // `newly + already == total` hold by construction: a state minted into
857 // the mirror but reachable from no copied ref (e.g. a dropped thread's
858 // orphan history) is in neither count, so the impossible
859 // "1 total (2 newly written)" summary cannot occur.
860 let counts = count_exported_commits(&repo, &newly_minted)?;
861 stats.commits_total = counts.total;
862 stats.states_exported = counts.newly;
863
864 bridge.save_mapping_to_disk()?;
865
866 Ok(stats)
867}
868
869/// Which namespace a reconciled mirror ref lives in. The reconcile DECISION is
870/// one shape for both; the only namespace-specific axis is how "write the desired
871/// target" lands — a head is fast-forward-guarded (and force-rewound for an
872/// embargo retract), a tag is free-move.
873#[derive(Debug, Clone, Copy, PartialEq, Eq)]
874enum ReconcileNs {
875 Head,
876 Tag,
877}
878
879/// The op the mirror reconcile applies to a single ref. The SINGLE decision the
880/// head and tag reconciles share (heddle#316): a foreign ref never reaches here
881/// (the iteration set is current threads/markers ∪ heddle-managed names), so every
882/// arm acts on a ref heddle owns.
883#[derive(Debug, Clone, Copy, PartialEq, Eq)]
884enum ReconcileOp {
885 /// Nothing to do — a scoped export declining to materialize an out-of-scope
886 /// ref, or a genuine no-op (no desired target and nothing to retract).
887 Skip,
888 /// Write the desired target through the namespace's guarded path: a head
889 /// fast-forwards (or creates); a tag force-retargets (or creates).
890 Write,
891 /// Force-set a head to the desired target past the fast-forward guard — the
892 /// embargo retract that rewinds an embargoed tip to its served ancestor.
893 ForceRewind,
894 /// Keep an existing served tag whose marker target is served-but-unminted this
895 /// run (r18). A later all-thread export re-mints and advances it.
896 Preserve,
897 /// Delete the ref — its line/marker has no served frontier (whole-line embargo,
898 /// r19 embargoed-existing tag, or a deleted marker's stale tag).
899 Delete,
900}
901
902/// The mirror reconcile decision — IDENTICAL in shape for heads and tags
903/// (heddle#316). `desired_oid` is the served target the projection wants published
904/// (`None` ⇒ nothing served for this ref this run); `existing_oid` is the mirror
905/// ref's CURRENT tip, already PEELED to a commit by [`branch_tip_oid`] (so an
906/// annotated foreign tag colliding with a marker name is tested by its commit, not
907/// its tag-object OID — heddle#316 risk #2). `in_scope` gates only
908/// MATERIALIZATION: a scoped export reconciles existing refs but never CREATES a
909/// brand-new one the caller did not ask for. `marker_served_unminted` is set only
910/// for a tag whose live marker target is served but was not minted this run — the
911/// sole axis that, combined with `existing_served`, splits r18-PRESERVE from
912/// r19-DELETE. `served_oids` is the whole-mirror served-OID set classifying the
913/// existing tip (NOT this run's purge drop-set, which omits a prior-run /
914/// out-of-scope embargo).
915fn reconcile_ref(
916 ns: ReconcileNs,
917 desired_oid: Option<ObjectId>,
918 existing_oid: Option<ObjectId>,
919 in_scope: bool,
920 marker_served_unminted: bool,
921 served_oids: &HashSet<ObjectId>,
922) -> ReconcileOp {
923 // `existing_oid` is already the peeled commit OID (`branch_tip_oid`), so this
924 // membership test compares commit-against-commit (risk #2).
925 let existing_served = existing_oid
926 .map(|oid| served_oids.contains(&oid))
927 .unwrap_or(false);
928 match (desired_oid, existing_oid) {
929 // Scoped export, would-create: never materialize a ref the caller did not
930 // ask to export.
931 (Some(_), None) if !in_scope => ReconcileOp::Skip,
932 // Create a fresh ref at the served target.
933 (Some(_), None) => ReconcileOp::Write,
934 // Head with an existing tip: a still-served tip fast-forwards (r17 FF guard
935 // applies); an embargoed tip is force-rewound to its served ancestor.
936 (Some(_), Some(_)) if ns == ReconcileNs::Head => {
937 if existing_served {
938 ReconcileOp::Write
939 } else {
940 ReconcileOp::ForceRewind
941 }
942 }
943 // Tag with an existing tip: free-move force-retarget to the served target.
944 (Some(_), Some(_)) => ReconcileOp::Write,
945 // Nothing served, nothing present.
946 (None, None) => ReconcileOp::Skip,
947 // Nothing served, but a tag exists whose marker target is served-but-
948 // unminted AND the existing tag is itself served: PRESERVE (r18).
949 (None, Some(_)) if marker_served_unminted && existing_served => ReconcileOp::Preserve,
950 // Nothing served, an existing ref remains: DELETE (whole-line embargo, r19
951 // embargoed existing tag, or a deleted marker's stale tag).
952 (None, Some(_)) => ReconcileOp::Delete,
953 }
954}
955
956pub(crate) fn git_remote_names(heddle_repo: &HeddleRepository) -> HashSet<String> {
957 let Ok(repo) = SleyRepository::discover(heddle_repo.root()) else {
958 return HashSet::new();
959 };
960 repo.remote_names()
961 .unwrap_or_default()
962 .into_iter()
963 .filter(|name| !name.trim().is_empty())
964 .collect()
965}
966
967pub(crate) fn is_remote_tracking_thread_name(thread: &str, remote_names: &HashSet<String>) -> bool {
968 let Some((remote, branch)) = thread.split_once('/') else {
969 return false;
970 };
971 !branch.is_empty() && remote_names.contains(remote)
972}
973
974/// Purge from `mapping` every reachable state whose effective visibility is no
975/// longer served by `audience`, and return the Git OIDs that were dropped so
976/// the caller can retract any ref still pointing at them.
977///
978/// A state can be minted while public and only later marked under-tier; its
979/// stale ChangeId→OID mapping is rebuilt from the notes/sidecar on every
980/// export, so the served set must be re-derived against CURRENT visibility
981/// here rather than trusted from the mapping. The purge is downward-closed: a
982/// still-visible state is unserved if any reachable ancestor is unserved,
983/// because its minted Git commit chains to the ancestor's (now-embargoed)
984/// commit. `sorted_states` is topological (parents before children), so a
985/// parent's served-ness is decided before its child is examined.
986fn purge_unserved_mappings(
987 heddle_repo: &HeddleRepository,
988 mapping: &mut SyncMapping,
989 sorted_states: &[ChangeId],
990 reachable: &HashSet<ChangeId>,
991 audience: &AudienceTier,
992) -> GitResult<HashSet<ObjectId>> {
993 let served = served_change_ids(heddle_repo, sorted_states, reachable, audience)?;
994 let mut purged: HashSet<ObjectId> = HashSet::new();
995 for state_id in sorted_states {
996 if !served.contains(state_id)
997 && let Some(oid) = mapping.remove(state_id)
998 {
999 purged.insert(oid);
1000 }
1001 }
1002 Ok(purged)
1003}
1004
1005/// The downward-closure served set (spike §5.0): a state is served iff it is
1006/// visible to `audience` AND every *reachable* parent is itself served. The
1007/// topo order of `sorted_states` guarantees a parent's servedness is already
1008/// decided when its child is visited. A parent outside `reachable` is a shallow
1009/// boundary (public-by-absence, treated as served).
1010///
1011/// The single notion of "served" shared by the branch-frontier purge and the
1012/// notes-ref retraction — so a note can never be published for a commit whose
1013/// branch the same rule would withhold (heddle#316).
1014fn served_change_ids(
1015 heddle_repo: &HeddleRepository,
1016 sorted_states: &[ChangeId],
1017 reachable: &HashSet<ChangeId>,
1018 audience: &AudienceTier,
1019) -> GitResult<HashSet<ChangeId>> {
1020 let mut served: HashSet<ChangeId> = HashSet::new();
1021 for state_id in sorted_states {
1022 let tier = heddle_repo
1023 .effective_visibility_tier(state_id)
1024 .map_err(|e| {
1025 GitBridgeError::Git(format!("resolve visibility for {state_id}: {e:#}"))
1026 })?;
1027 let parents_served = match heddle_repo.store().get_state(state_id)? {
1028 Some(state) => state
1029 .parents
1030 .iter()
1031 .all(|p| !reachable.contains(p) || served.contains(p)),
1032 None => true,
1033 };
1034 if visible(&tier, audience) && parents_served {
1035 served.insert(*state_id);
1036 }
1037 }
1038 Ok(served)
1039}
1040
1041/// Resolve `ref_name` to its tip commit OID in the mirror, or `None` when the
1042/// ref is absent or unpeelable.
1043fn branch_tip_oid(repo: &SleyRepository, ref_name: &str) -> Option<ObjectId> {
1044 let oid = repo
1045 .find_reference(ref_name)
1046 .ok()
1047 .flatten()?
1048 .peeled_oid(repo)
1049 .ok()
1050 .flatten()?;
1051 peel_to_commit_oid(repo, oid)
1052}
1053
1054fn direct_ref_oid(repo: &SleyRepository, ref_name: &str) -> Option<ObjectId> {
1055 match repo.find_reference(ref_name).ok()??.target {
1056 ReferenceTarget::Direct(oid) => Some(oid),
1057 ReferenceTarget::Symbolic(_) => None,
1058 }
1059}
1060
1061fn peel_to_commit_oid(repo: &SleyRepository, mut oid: ObjectId) -> Option<ObjectId> {
1062 loop {
1063 let object = repo.read_object(&oid).ok()?;
1064 match object.object_type {
1065 GitObjectType::Commit => return Some(oid),
1066 GitObjectType::Tag => {
1067 oid = repo.read_tag(&oid).ok()?.object;
1068 }
1069 _ => return None,
1070 }
1071 }
1072}
1073
1074/// Project the DESIRED heddle-owned ref-set for an export: full ref name → its
1075/// served target OID. A ref appears iff heddle should publish it now; a ref the
1076/// projection omits is one the mirror reconcile must DELETE (its prior export is
1077/// stale). This is the single place that decides WHICH refs exist and at WHAT
1078/// target — the mirror reconcile, and downstream every destination reconcile,
1079/// derive their ops (create / fast-forward / forced rewind / delete / skip) from
1080/// this set, so a surface can never silently drop out of one enforcement pass
1081/// while another keeps serving it (heddle#316 r13).
1082///
1083/// * heads — `refs/heads/<thread>` at the maximal SERVED ancestor-or-self of the
1084/// thread tip ([`frontier_git_oid`]); a thread whose whole line is unserved is
1085/// ABSENT (downward-closed: an embargoed commit and its descendants stay off
1086/// the public branch).
1087/// * tags — `refs/tags/<marker>` at the marker's served state; a marker whose
1088/// state is not served (embargoed, withheld for a withheld ancestor, or
1089/// retargeted to a never-minted Private state) is ABSENT.
1090///
1091/// Notes (`refs/notes/heddle`) are the history-bearing member of the desired set
1092/// and are projected by content rebuild (backfill + [`git_notes::remove_notes`])
1093/// upstream rather than a target swap, so they are not enumerated here.
1094fn project_desired_refs(
1095 heddle_repo: &HeddleRepository,
1096 mapping: &SyncMapping,
1097 threads: &[String],
1098 markers: &[MarkerName],
1099) -> GitResult<std::collections::HashMap<String, ObjectId>> {
1100 let mut desired = std::collections::HashMap::new();
1101 for track_name in threads {
1102 let Some(tip) = heddle_repo
1103 .refs()
1104 .get_thread(&ThreadName::new(track_name))?
1105 else {
1106 continue;
1107 };
1108 if let Some(git_oid) = frontier_git_oid(heddle_repo, mapping, tip)? {
1109 desired.insert(format!("refs/heads/{track_name}"), git_oid);
1110 }
1111 }
1112 for marker_name in markers {
1113 let Some(state_id) = heddle_repo.refs().get_marker(marker_name)? else {
1114 continue;
1115 };
1116 if let Some(git_oid) = mapping.get_git(&state_id) {
1117 desired.insert(format!("refs/tags/{marker_name}"), git_oid);
1118 }
1119 }
1120 Ok(desired)
1121}
1122
1123/// The Git OID the public branch should lag to for a thread whose raw tip is
1124/// `tip`: the maximal **served** ancestor-or-self of `tip`. A state is served
1125/// iff it is present in the mapping — `purge_unserved_mappings` runs first to
1126/// drop any mapped-but-now-embargoed state (and its descendants), so the mapped
1127/// set is exactly the served set. Returns `None` when no ancestor of `tip` is
1128/// served (the whole line is embargoed to its root → absence).
1129fn frontier_git_oid(
1130 heddle_repo: &HeddleRepository,
1131 mapping: &SyncMapping,
1132 tip: ChangeId,
1133) -> GitResult<Option<ObjectId>> {
1134 let mut visited = HashSet::new();
1135 let mut stack = vec![tip];
1136 let mut frontier: Vec<ChangeId> = Vec::new();
1137 while let Some(id) = stack.pop() {
1138 if !visited.insert(id) {
1139 continue;
1140 }
1141 // Stop at the first served (mapped) state on each downward path: that
1142 // is a maximal served ancestor — its own served ancestors are
1143 // dominated by it, so we do not descend past it.
1144 if mapping.get_git(&id).is_some() {
1145 frontier.push(id);
1146 continue;
1147 }
1148 if let Some(state) = heddle_repo.store().get_state(&id)? {
1149 stack.extend(state.parents.iter().copied());
1150 }
1151 }
1152 // A linear thread yields exactly one maximal served state. A merge whose
1153 // embargo splits the DAG can leave an antichain of ≥2 maximal served
1154 // states; advertising each sibling line under its own ref is the
1155 // multi-root work deferred to issues #4/#5. Until then the branch lags
1156 // deterministically (lowest ChangeId) — never published from a raw
1157 // embargoed tip — and the other lines are absent from this branch.
1158 let chosen = frontier.into_iter().min_by_key(|c| c.to_string_full());
1159 Ok(chosen.and_then(|c| mapping.get_git(&c)))
1160}
1161
1162fn reachable_states(
1163 heddle_repo: &HeddleRepository,
1164 roots: &[ChangeId],
1165) -> GitResult<Vec<ChangeId>> {
1166 let mut stack = roots.to_vec();
1167 let mut seen = HashSet::new();
1168 let mut states = Vec::new();
1169 while let Some(state_id) = stack.pop() {
1170 if !seen.insert(state_id) {
1171 continue;
1172 }
1173 states.push(state_id);
1174 if let Some(state) = heddle_repo.store().get_state(&state_id)? {
1175 stack.extend(state.parents.iter().copied());
1176 }
1177 }
1178 Ok(states)
1179}
1180
1181fn state_to_signature(state: &objects::object::State) -> Signature {
1182 let seconds = state.created_at.timestamp();
1183 let raw = format!(
1184 "{} <{}> {} +0000",
1185 state.attribution.principal.name, state.attribution.principal.email, seconds
1186 )
1187 .into_bytes();
1188 Signature {
1189 name: sley::plumbing::sley_core::ByteString::new(
1190 state.attribution.principal.name.as_bytes().to_vec(),
1191 ),
1192 email: sley::plumbing::sley_core::ByteString::new(
1193 state.attribution.principal.email.as_bytes().to_vec(),
1194 ),
1195 time: sley::GitTime::new(seconds, 0),
1196 raw,
1197 }
1198}
1199
1200#[cfg(test)]
1201mod tests {
1202 use objects::object::{Attribution, ContentHash, Principal, State};
1203
1204 use super::*;
1205
1206 fn fidelity_state() -> State {
1207 State::new(
1208 ContentHash::from_bytes([7u8; 32]),
1209 vec![],
1210 Attribution::human(Principal::new("Alice", "alice@example.com")),
1211 )
1212 .with_raw_message("an imported commit\n")
1213 }
1214
1215 /// The fidelity guard reconstructs a byte-faithful imported commit.
1216 #[test]
1217 fn byte_faithful_when_fidelity_present_and_not_lossy() {
1218 assert!(commit_is_byte_faithful(&fidelity_state()));
1219 }
1220
1221 /// The canonical `git_lossy` marker — set by BOTH `import --lossy` and
1222 /// `ingest --lossy` — routes the commit OFF the reconstruct path regardless
1223 /// of which import surface produced it. A lossy import drops/converts tree
1224 /// entries, so reconstructing from state would mint a wrong SHA.
1225 #[test]
1226 fn lossy_marker_blocks_reconstruction() {
1227 let lossy = fidelity_state().with_git_lossy(true);
1228 assert!(
1229 !commit_is_byte_faithful(&lossy),
1230 "a state carrying the canonical git_lossy marker must NOT be \
1231 reconstructed from state, regardless of import surface"
1232 );
1233 }
1234}