cli/bridge/git_export.rs
1// SPDX-License-Identifier: Apache-2.0
2//! Export Heddle states to Git commits functionality.
3
4use std::collections::HashSet;
5
6use objects::{
7 error::HeddleError,
8 object::{ChangeId, ContentHash, FileMode, MarkerName, Principal, State, ThreadName},
9 store::ObjectStore,
10};
11use repo::{AudienceTier, Repository as HeddleRepository, visible};
12use sley::{
13 CommitObject, EntryKind, GitObjectType, ObjectFormat, ObjectId, RefPrecondition,
14 ReferenceTarget, Repository as SleyRepository, Signature, plumbing::sley_object::EncodedObject,
15};
16
17use crate::bridge::{
18 git_core::{
19 GitBridge, GitBridgeError, GitResult, LocalGitIdentity, SyncMapping,
20 count_exported_commits, delete_reference_if_present,
21 git_config_identity_with_global_fallback, git_err, principal_is_default_unknown,
22 read_or_seed_mirror_managed_refs, set_reference, write_mirror_managed_refs,
23 },
24 git_notes,
25 git_reconstruct::{commit_object_id, reconstruct_commit_bytes, write_commit_object},
26 git_sync::{sync_marker_to_tag, sync_track_to_branch},
27 git_util::{ExportStats, ExportedRef},
28};
29
30const SUBMODULE_PREFIX: &str = "heddle-submodule:";
31
32/// Whether `state` carries a captured original git commit to reconstruct
33/// byte-exactly (the #565 de-lossy fidelity fields). When true, export
34/// regenerates the commit object from state via [`reconstruct_commit_bytes`]
35/// with NO W2 footer and NO `"No intent specified"` placeholder — any injected
36/// byte would push the minted object off the original SHA (#567). When false
37/// (a native heddle commit, no original to preserve), export mints with the
38/// footer/placeholder as before.
39///
40/// `raw_message` is the load-bearing signal: the git importer always records it
41/// (even as an empty body for an empty-message commit) for an imported commit,
42/// and never for a native one.
43fn has_git_fidelity(state: &State) -> bool {
44 state.raw_message.is_some()
45}
46
47/// Whether `who`'s name/email round-trip byte-exactly through reconstruction.
48/// `Principal.name/email` are `String`, so the git importer replaced any non-UTF8
49/// identity byte with U+FFFD when it called `to_string()` on the raw actor bytes
50/// (the #565-deferred gap; `Principal` is still `String`, see #564). Those
51/// replaced bytes can't be regenerated, so reconstruction would hash off the
52/// original SHA. A literal U+FFFD that was itself valid UTF-8 in the original
53/// survives fine — so this can only FALSE-POSITIVE into the safe verbatim
54/// fallback, never a wrong-SHA mint.
55fn identity_is_byte_faithful(who: &Principal) -> bool {
56 !who.name.contains('\u{FFFD}') && !who.email.contains('\u{FFFD}')
57}
58
59/// Whether reconstructing `state`'s commit object from Heddle state alone is
60/// guaranteed byte-exact to the original commit — the precondition for the #567
61/// reconstruct-from-state path. False for the two #564 lossy gaps:
62/// 1. non-UTF8 author/committer identity bytes (see [`identity_is_byte_faithful`]);
63/// 2. lossy imports, where unrepresentable tree entries were dropped/converted
64/// so the rebuilt tree — hence commit — OID diverges.
65///
66/// (2) is read off ONE canonical signal — [`State::git_lossy`] — that lossy
67/// import population paths set, rather than enumerating import surfaces or
68/// relying on bridge mapping sidecar state. The state flag closes the whole
69/// class, including any future lossy entry point.
70///
71/// When false the caller MUST keep the verbatim mirror bytes / preserved mapped
72/// OID (or fall through to the native mint) rather than mint a wrong-SHA
73/// reconstructed object.
74fn commit_is_byte_faithful(state: &State) -> bool {
75 has_git_fidelity(state)
76 && !state.git_lossy
77 && identity_is_byte_faithful(&state.attribution.principal)
78 && state
79 .committer
80 .as_ref()
81 .map(identity_is_byte_faithful)
82 .unwrap_or(true)
83}
84
85/// Export a single state to Git for `audience`.
86///
87/// Returns `Ok(None)` — **absence** — when the state's effective visibility
88/// tier is not visible to `audience`: the public mirror never mints a Git
89/// commit (no stub, no partial tree) for an embargoed state (spike §5.0/§5.3).
90/// The caller realizes downward-closure by also withholding any state whose
91/// parent was withheld, so an embargoed commit *and its descendants* stay
92/// absent from the mirror.
93pub(crate) fn export_state(
94 mapping: &mut SyncMapping,
95 heddle_repo: &HeddleRepository,
96 repo: &SleyRepository,
97 state_id: &ChangeId,
98 identity: Option<&LocalGitIdentity>,
99 message_override: Option<&str>,
100 audience: &AudienceTier,
101) -> GitResult<Option<ObjectId>> {
102 let state = heddle_repo
103 .store()
104 .get_state(state_id)?
105 .ok_or(GitBridgeError::StateNotFound(*state_id))?;
106
107 // Audience-aware minting. The visibility decision lives here, at the state
108 // walk where the `ChangeId` is in scope — never in the blob-keyed
109 // `export_tree` (no `ChangeId`/audience).
110 let tier = heddle_repo
111 .effective_visibility_tier(state_id)
112 .map_err(|e| GitBridgeError::Git(format!("resolve visibility for {state_id}: {e:#}")))?;
113 if !visible(&tier, audience) {
114 return Ok(None);
115 }
116
117 // Fidelity mint (#567): the state carries a captured original git commit
118 // (#565 fields — `raw_message` is the load-bearing signal). MINT the commit
119 // object from that raw metadata via `reconstruct_commit_bytes` — NO footer,
120 // NO placeholder, NO message override — so the minted bytes preserve the
121 // original message/identities/headers rather than the native intent+footer.
122 // This is the path that lets the git mirror be dropped (#568): a correct
123 // export no longer depends on the mirror holding the verbatim imported bytes.
124 //
125 // Routing (#567 round 3): export keys off (is byte-faithful?) AND (does a
126 // bridge mapping exist?). The verbatim / mapped-OID fallback for a lossy
127 // commit applies ONLY when a bridge mapping holds a TRACKED original OID to
128 // preserve — and that branch lives in `export_scoped`'s already-mapped path.
129 // `export_state` is only ever reached for an UNMAPPED state (the caller's
130 // `has_heddle` guard), so there is NO original OID to match and NO verbatim
131 // mirror bytes to fall back to. Every unmapped fidelity state therefore MINTS
132 // from its own raw metadata — a `--lossy` one is NOT rejected into a
133 // nonexistent verbatim source (the r2 over-correction, #567 round 3):
134 // * byte-faithful (a clean ingest-backed import, native heddle commit with
135 // fidelity, ...) -> the derived OID coincides with the original commit SHA;
136 // * lossy / non-UTF8 (ingest-backed import with lossy tree conversion) -> a
137 // DERIVED OID that still preserves raw_message/identities/headers. With
138 // no original to match this is correct, not the wrong-SHA bug the r2
139 // `git_lossy` guard (rightly) blocks ONLY for a MAPPED commit.
140 if has_git_fidelity(&state) {
141 let content = reconstruct_commit_bytes(heddle_repo, repo, mapping, &state)?;
142 return Ok(Some(write_commit_object(repo, &content)?));
143 }
144
145 // Native heddle commit: no original to preserve. Mint a raw commit object
146 // and inject the durable W2 footer (and the "No intent specified"
147 // placeholder for an empty intent) — these ride ONLY native commits.
148 let git_tree_oid = export_tree(heddle_repo, repo, &state.tree)?;
149 // R6: emit the W2 footer on every exported commit. The footer is
150 // durable across remotes; per-scope breakdowns ride on the opt-in
151 // git note. For first-pass we audit nothing about the state's
152 // annotation set (the audience defaults to "public"); a follow-up
153 // landed with `bridge git export --audience` threads the count
154 // through here. See `git_util::build_commit_message_with_footer`.
155 let hosted_url = heddle_repo
156 .config()
157 .hosted
158 .upstream_url
159 .as_deref()
160 .filter(|s| !s.is_empty());
161 let message = match message_override {
162 Some(message) => GitBridge::build_commit_message_with_footer_with_body(
163 &state, message, hosted_url, /*omitted=*/ 0,
164 ),
165 None => {
166 GitBridge::build_commit_message_with_footer(&state, hosted_url, /*omitted=*/ 0)
167 }
168 };
169 let parent_oids: Vec<ObjectId> = state
170 .parents
171 .iter()
172 .map(|parent_id| {
173 mapping
174 .get_git(parent_id)
175 .ok_or(GitBridgeError::StateNotFound(*parent_id))
176 })
177 .collect::<GitResult<Vec<_>>>()?;
178
179 let sig = if principal_is_default_unknown(&state.attribution.principal) {
180 let Some(identity) = identity else {
181 return Err(GitBridgeError::Git(
182 "refusing to write a Git commit with Unknown <unknown@example.com>; configure user.name/user.email, HEDDLE_PRINCIPAL_NAME/HEDDLE_PRINCIPAL_EMAIL, or .heddle principal".to_string(),
183 ));
184 };
185 identity.to_signature(state.created_at.timestamp())
186 } else {
187 state_to_signature(&state)
188 };
189 let commit = CommitObject {
190 tree: git_tree_oid,
191 parents: parent_oids,
192 author: sig.to_ident_bytes(),
193 committer: sig.to_ident_bytes(),
194 encoding: None,
195 message: message.into_bytes(),
196 };
197 Ok(Some(
198 repo.write_object(EncodedObject::new(GitObjectType::Commit, commit.write()))
199 .map_err(git_err)?,
200 ))
201}
202
203/// Export a Heddle tree to Git.
204pub fn export_tree(
205 heddle_repo: &HeddleRepository,
206 repo: &SleyRepository,
207 tree_hash: &ContentHash,
208) -> GitResult<ObjectId> {
209 let tree = heddle_repo
210 .store()
211 .get_tree(tree_hash)?
212 .ok_or_else(|| HeddleError::NotFound(format!("tree {}", tree_hash)))?;
213
214 let empty_tree = ObjectId::empty_tree(repo.object_format());
215 let mut editor = repo.edit_tree(&empty_tree).map_err(git_err)?;
216
217 for entry in tree.entries() {
218 let (kind, id) = if entry.is_tree() {
219 (
220 EntryKind::Tree,
221 export_tree(heddle_repo, repo, &entry.hash)?,
222 )
223 } else {
224 // Redaction safety: if the blob carries an active redaction
225 // record, export the stub instead of the bytes. This is the
226 // single chokepoint between Heddle-side redactions and any
227 // downstream Git remote (GitHub, internal mirrors, ...).
228 // Bytes that escape via the bridge are bytes that escape,
229 // full stop — we cannot retroactively scrub them from
230 // outside repos. The check sits *here*, not in
231 // `materialize_blob`, because export reads `blob.content()`
232 // directly (we never touch the materialize path) and writes
233 // the raw bytes through `repo.write_blob`.
234 let stub = heddle_repo
235 .redaction_stub_for_blob(&entry.hash)
236 .map_err(|err| HeddleError::Config(format!("redaction lookup failed: {err}")))?;
237
238 if let Some(stub_text) = stub {
239 // Stubs are text-only; ASCII safe across newline/BOM
240 // quirks and submodule-pointer detection.
241 let kind = match entry.mode {
242 FileMode::Symlink => EntryKind::Symlink,
243 FileMode::Executable => EntryKind::BlobExecutable,
244 _ => EntryKind::Blob,
245 };
246 let oid = repo.write_blob(stub_text.as_bytes()).map_err(git_err)?;
247 (kind, oid)
248 } else {
249 let blob = heddle_repo
250 .store()
251 .get_blob(&entry.hash)?
252 .ok_or_else(|| HeddleError::NotFound(format!("blob {}", entry.hash)))?;
253
254 if entry.mode == FileMode::Normal
255 && let Some(oid) = submodule_oid_from_blob(blob.content())
256 {
257 (EntryKind::Commit, oid)
258 } else {
259 let kind = match entry.mode {
260 FileMode::Normal => EntryKind::Blob,
261 FileMode::Executable => EntryKind::BlobExecutable,
262 FileMode::Symlink => EntryKind::Symlink,
263 };
264 let oid = repo.write_blob(blob.content()).map_err(git_err)?;
265 (kind, oid)
266 }
267 }
268 };
269
270 editor.upsert(entry.name.as_str(), kind, id);
271 }
272
273 repo.write_tree(editor).map_err(git_err)
274}
275
276/// Export all Heddle states to Git commits.
277pub fn export_all(bridge: &mut GitBridge) -> GitResult<ExportStats> {
278 bridge.with_mapping_rollback(|bridge| export_scoped(bridge, None))
279}
280
281/// Export one Heddle thread to its matching Git branch.
282pub fn export_current_thread(bridge: &mut GitBridge, thread: &str) -> GitResult<ExportStats> {
283 bridge.with_mapping_rollback(|bridge| export_scoped(bridge, Some(thread)))
284}
285
286fn export_scoped(bridge: &mut GitBridge, thread: Option<&str>) -> GitResult<ExportStats> {
287 bridge.init_mirror()?;
288
289 let states = match thread {
290 Some(thread) => {
291 let Some(state_id) = bridge
292 .heddle_repo
293 .refs()
294 .get_thread(&ThreadName::new(thread))?
295 else {
296 return Err(GitBridgeError::Git(format!(
297 "thread '{thread}' has no state to export"
298 )));
299 };
300 reachable_states(bridge.heddle_repo, &[state_id])?
301 }
302 None => bridge.heddle_repo.store().list_states()?,
303 };
304 let mut stats = ExportStats::default();
305
306 bridge.build_existing_mapping(None)?;
307 let identity = git_config_identity_with_global_fallback(bridge.heddle_repo.root())?;
308
309 // The Git bridge publishes the PUBLIC mirror — the export audience is
310 // always `Public`. Per-commit visibility is enforced here, in the OSS
311 // bridge, by emitting absence (the authoritative wire serve gate is weft's
312 // job, spike §10 #4).
313 let audience = AudienceTier::Public;
314
315 let sorted_states = bridge.sort_states_topologically(&states)?;
316 // Reachable set, used to tell a withheld parent (absent from the mapping
317 // but present in this export) apart from a genuinely-missing shallow
318 // boundary (absent from both).
319 let reachable: HashSet<ChangeId> = sorted_states.iter().copied().collect();
320 let repo = bridge.open_git_repo()?;
321 bridge.mapping.retain_git_objects(&repo);
322 bridge.seed_git_checkpoint_mappings_from_checkout(&repo)?;
323 bridge.seed_ingest_identity_mappings_from_mirror(&repo)?;
324
325 // The desired/actual ref sets span the WHOLE mirror, not just this export's
326 // scoped thread: a prior all-thread export can leave `refs/heads`/`refs/tags`
327 // for OTHER threads/markers whose commits — or their ancestors — were later
328 // marked Private. Reconciling only the scoped thread would keep serving those
329 // now-embargoed commits via the other thread's branch (heddle#316 cross-thread
330 // embargo leak). So purge + project + reconcile over every heddle-managed
331 // thread/marker regardless of scope; the mint loop below stays scoped (only the
332 // requested thread's new commits are minted), so widening changes WHICH refs
333 // are reconciled, never what gets created.
334 let remote_names = git_remote_names(bridge.heddle_repo);
335 let threads: Vec<String> = {
336 let mut all: Vec<String> = bridge
337 .heddle_repo
338 .refs()
339 .list_threads()?
340 .into_iter()
341 .filter(|thread| !is_remote_tracking_thread_name(thread, &remote_names))
342 .map(|t| t.to_string())
343 .collect();
344 // A scoped export's own thread may be a remote-tracking name the filter
345 // drops; keep it so the requested thread is always reconciled.
346 if let Some(t) = thread
347 && !all.iter().any(|x| x == t)
348 {
349 all.push(t.to_string());
350 }
351 all
352 };
353 let markers: Vec<MarkerName> = bridge.heddle_repo.refs().list_markers()?;
354
355 // Roots of the whole-mirror served frontier: every reconciled thread's tip and
356 // every marker's state. Purging over their reachable closure (below) drops any
357 // out-of-scope commit whose tier — or an ancestor's — is now unserved, so
358 // `project_desired_refs` lags those branches/tags correctly even on a scoped
359 // export (heddle#316).
360 let mut frontier_roots: Vec<ChangeId> = Vec::new();
361 for track_name in &threads {
362 if let Some(tip) = bridge
363 .heddle_repo
364 .refs()
365 .get_thread(&ThreadName::new(track_name))?
366 {
367 frontier_roots.push(tip);
368 }
369 }
370 for marker_name in &markers {
371 if let Some(state_id) = bridge.heddle_repo.refs().get_marker(marker_name)? {
372 frontier_roots.push(state_id);
373 }
374 }
375 let frontier_reachable = reachable_states(bridge.heddle_repo, &frontier_roots)?;
376
377 // Re-validate the served set against CURRENT visibility before anything treats
378 // a mapping as "already served". A state minted while public in a prior export
379 // can be marked under-tier later; `build_existing_mapping` rebuilds its stale
380 // ChangeId→OID mapping from the notes/sidecar every run, so without this purge
381 // the frontier walk, the note re-write, and the tag sync would all keep serving
382 // the now-embargoed commit. Purging is downward-closed: a still-visible state
383 // whose ancestor is embargoed is withheld too (its Git commit chains to the
384 // embargoed one). The purge spans the mint set UNION the whole-mirror frontier,
385 // so a scoped export still drops an out-of-scope thread's now-embargoed tip; for
386 // an all-thread export the frontier ⊆ the mint set and this reduces to the prior
387 // behavior. After this, `mapping` == the served set across every reconciled ref,
388 // exactly what `frontier_git_oid` assumes.
389 // Snapshot EVERY mapped target before the purge mutates the mapping: these are
390 // exactly the commits that may already carry a `refs/notes/*` entry in the
391 // mirror, so the notes-ref retraction below must consider all of them —
392 // including the states the purge is about to drop AND any orphaned mapping a
393 // deleted thread left behind, which no current-ref frontier reaches (heddle#316).
394 let pre_purge_targets: Vec<(ChangeId, ObjectId)> =
395 bridge.mapping.iter().map(|(c, o)| (*c, *o)).collect();
396
397 let purge_reachable: HashSet<ChangeId> = sorted_states
398 .iter()
399 .copied()
400 .chain(frontier_reachable.iter().copied())
401 .collect();
402 let purge_sorted =
403 bridge.sort_states_topologically(&purge_reachable.iter().copied().collect::<Vec<_>>())?;
404 // The purge MUTATES the mapping down to the served set. Its returned drop-set
405 // (the OIDs THIS run withheld) is deliberately NOT used to classify EXISTING
406 // mirror tips: a scoped run's purge omits a tip embargoed in a PRIOR run, or
407 // out of this run's purge reach, so classifying by it misreads such a tip as
408 // served and keeps serving it. Existing-tip served classification (heads + tags
409 // below) uses the whole-mirror served-OID set (`served_oids`) instead
410 // (heddle#316).
411 purge_unserved_mappings(
412 bridge.heddle_repo,
413 &mut bridge.mapping,
414 &purge_sorted,
415 &purge_reachable,
416 &audience,
417 )?;
418
419 // Git OIDs minted during this run. Used below to partition the copied
420 // ref set into newly-written vs already-mapped — so the "newly" count
421 // is a subset of the same walk that produces the total, never a
422 // parallel tally over `list_states()` that could include an orphan
423 // state reachable from no copied ref.
424 let mut newly_minted: HashSet<ObjectId> = HashSet::new();
425
426 for state_id in sorted_states {
427 // Already mapped to a git object — the common case for git-imported
428 // states (the import populated the ChangeId→OID mapping) and for
429 // native commits a prior export already minted. Not re-counted as
430 // "newly minted" (the total is decided below by ref-reachability).
431 if bridge.mapping.has_heddle(&state_id) {
432 // For an IMPORTED commit (#565 fidelity fields present),
433 // REGENERATE the object from state into the mirror rather than
434 // leaning on the verbatim imported bytes still being there (#567).
435 // Byte-identical, so the OID is unchanged and the write is
436 // idempotent today; what changes is that a correct export no
437 // longer DEPENDS on the mirror's verbatim copy — the step that
438 // lets the mirror be dropped (#568). Native already-mapped commits
439 // have no original to reconstruct (raw_message is None), so they
440 // are left to their prior mint; re-minting those is out of scope.
441 if let Some(state) = bridge.heddle_repo.store().get_state(&state_id)?
442 && has_git_fidelity(&state)
443 {
444 let mapped = bridge.mapping.get_git(&state_id);
445 // mirror still required for non-byte-faithful commits (non-UTF8
446 // identities, --lossy); #568 mirror elimination must account for
447 // these, and full de-lossy needs byte-preserving identities (#564
448 // follow-up).
449 // Fidelity guard (#567): regenerate from state ONLY when the
450 // state is fully byte-faithful to the original import. A
451 // non-byte-faithful commit (non-UTF8 identity, or a `--lossy`
452 // import — both import-lossy and ingest-lossy carry the canonical
453 // `git_lossy` flag) would reconstruct to a WRONG SHA, so leave it
454 // on the preserved mapped OID — the verbatim mirror bytes stay the
455 // served object (the pre-#567 behavior for that commit).
456 if commit_is_byte_faithful(&state) {
457 let content = reconstruct_commit_bytes(
458 bridge.heddle_repo,
459 &repo,
460 &bridge.mapping,
461 &state,
462 )?;
463 // Safety net: the regenerated object MUST hash to the mapped
464 // OID. A mismatch means reconstruction diverged from the
465 // imported bytes (an undetected fidelity gap), so fall back to
466 // the verbatim mirror / mapped OID rather than write a
467 // wrong-SHA object.
468 let reconstructed = commit_object_id(&content);
469 if mapped.map(|m| m == reconstructed).unwrap_or(true) {
470 write_commit_object(&repo, &content)?;
471 }
472 }
473 }
474 continue;
475 }
476
477 // Downward-closure (spike §5.0): withhold a state whose parent was
478 // itself withheld for this audience. Processed in topo order, so a
479 // parent's mapped-ness is already decided. A parent absent from the
480 // mapping but present in `reachable` was withheld → withhold this
481 // child too (and, transitively, all its descendants). A parent absent
482 // from both is a shallow boundary (public-by-absence) — let the mint
483 // proceed exactly as before.
484 let parent_withheld = bridge
485 .heddle_repo
486 .store()
487 .get_state(&state_id)?
488 .map(|state| {
489 state
490 .parents
491 .iter()
492 .any(|p| reachable.contains(p) && bridge.mapping.get_git(p).is_none())
493 })
494 .unwrap_or(false);
495 if parent_withheld {
496 continue;
497 }
498
499 let message_override = bridge
500 .commit_message_overrides
501 .get(&state_id)
502 .map(String::as_str);
503 let Some(git_oid) = export_state(
504 &mut bridge.mapping,
505 bridge.heddle_repo,
506 &repo,
507 &state_id,
508 identity.as_ref(),
509 message_override,
510 &audience,
511 )?
512 else {
513 // Embargoed for this audience — emit absence (no commit minted).
514 continue;
515 };
516 bridge.mapping.insert(state_id, git_oid);
517 newly_minted.insert(git_oid);
518
519 // Attach a heddle note to the freshly-created commit so the
520 // change_id survives a fresh `git clone` of the destination
521 // (when only the git side travels, without our sidecar).
522 if let Some(state) = bridge.heddle_repo.store().get_state(&state_id)? {
523 let note = git_notes::HeddleNote::from_state(&state);
524 git_notes::write_note(&repo, git_oid, ¬e)?;
525 }
526 }
527
528 // The downward-closure served set across EVERY note target — the pre-purge
529 // mapping (commits that may already carry a note in the mirror) UNION the
530 // current post-mint mapping (served states + freshly minted commits),
531 // computed over the FULL ancestry of all of them. The branch purge is
532 // ref-rooted (it walks the whole-mirror frontier of current thread tips +
533 // markers), so it never examines an ORPHANED mapping a deleted thread left
534 // behind; without this closure such a commit's note — public-tier but with a
535 // now-Private ancestor — would slip past both the backfill gate and the
536 // retraction below. This is the SAME served rule the branch frontier uses,
537 // applied to notes (heddle#316). For an all-states export it reduces to the
538 // post-purge served set, so behavior there is unchanged.
539 let note_target_roots: Vec<ChangeId> = pre_purge_targets
540 .iter()
541 .map(|(c, _)| *c)
542 .chain(bridge.mapping.iter().map(|(c, _)| *c))
543 .collect();
544 let note_reachable_vec = reachable_states(bridge.heddle_repo, ¬e_target_roots)?;
545 let note_reachable: HashSet<ChangeId> = note_reachable_vec.iter().copied().collect();
546 let note_sorted = bridge.sort_states_topologically(¬e_reachable_vec)?;
547 let note_served =
548 served_change_ids(bridge.heddle_repo, ¬e_sorted, ¬e_reachable, &audience)?;
549
550 // For states whose git_oid was already in the mapping (the SHA-stable
551 // path above), make sure the note is present too. This covers two
552 // cases: (a) the state was imported from a non-heddle git source and
553 // never had a note, and (b) the note was deleted from the mirror.
554 let note_targets: Vec<(ChangeId, ObjectId)> =
555 bridge.mapping.iter().map(|(c, o)| (*c, *o)).collect();
556 for (change_id, git_oid) in note_targets {
557 // Gate the backfill on the downward-closure served set, not the commit's
558 // DIRECT tier. The mapping can carry orphaned entries (a deleted thread's
559 // commits) the ref-rooted purge never examined; gating on direct
560 // visibility alone would re-publish a note for a public commit whose
561 // ancestor became Private — a commit the branch downward-closure
562 // withholds. `note_served` is the same served notion the branch frontier
563 // uses, so no note-write site can emit metadata for an unserved commit
564 // (heddle#316).
565 if note_served.contains(&change_id)
566 && git_notes::read_note(&repo, git_oid)?.is_none()
567 && let Some(state) = bridge.heddle_repo.store().get_state(&change_id)?
568 {
569 let note = git_notes::HeddleNote::from_state(&state);
570 git_notes::write_note(&repo, git_oid, ¬e)?;
571 }
572 }
573
574 // Retract the notes for every mapped target that is NOT served under the
575 // downward-closure rule. The mirror copies `refs/notes/*`
576 // (`collect_ref_updates`) alongside branches and tags, so a note left for an
577 // unserved commit keeps leaking its metadata even after its branch/tag were
578 // retracted. This is the notes-ref sibling of the branch/tag retraction
579 // above (heddle#316). Considering EVERY pre-purge target — not just the
580 // `embargoed_oids` the ref-rooted purge dropped — catches an orphaned note an
581 // ancestor embargo stranded on a deleted thread's commit. Guard the
582 // degenerate case where a still-served state maps to the same git OID by
583 // keeping any OID a served target maps to.
584 let served_note_oids: HashSet<ObjectId> = pre_purge_targets
585 .iter()
586 .copied()
587 .chain(bridge.mapping.iter().map(|(c, o)| (*c, *o)))
588 .filter(|(c, _)| note_served.contains(c))
589 .map(|(_, oid)| oid)
590 .collect();
591 let notes_to_retract: HashSet<ObjectId> = pre_purge_targets
592 .iter()
593 .filter(|(c, _)| !note_served.contains(c))
594 .map(|(_, oid)| *oid)
595 .filter(|oid| !served_note_oids.contains(oid))
596 .collect();
597 git_notes::remove_notes(&repo, ¬es_to_retract)?;
598
599 // THE PROJECTION (heddle#316 r13): the desired heddle-owned ref-set for this
600 // audience — heads lagged to the served frontier, tags at served markers — as
601 // a pure function of the post-purge served `mapping` + audience + ownership.
602 // Every mirror ref op below (set / forced embargo retract / delete) is DERIVED
603 // from this ONE map, so a ref surface can never drift out of one enforcement
604 // pass while another keeps serving it. The mirror MATERIALIZES this desired
605 // set; downstream `plan_destination_reconcile` then reconciles each
606 // destination against it — one projection, one reconcile, all destinations.
607 let desired = project_desired_refs(bridge.heddle_repo, &bridge.mapping, &threads, &markers)?;
608
609 // The downward-closure served set over the WHOLE-MIRROR frontier — the SAME
610 // closure the purge ran over (every thread tip + every marker state). A state is
611 // served iff visible to this audience AND every reachable ancestor is served.
612 // Drives BOTH the served-OID set just below AND (further down) the tag
613 // classifier's served-but-unminted axis.
614 let frontier_served = {
615 let reachable_set: HashSet<ChangeId> = frontier_reachable.iter().copied().collect();
616 let sorted = bridge.sort_states_topologically(&frontier_reachable)?;
617 served_change_ids(bridge.heddle_repo, &sorted, &reachable_set, &audience)?
618 };
619
620 // The whole-mirror SERVED-OID set: the git OID of every served frontier state.
621 // An EXISTING mirror tip (head or tag) is "served" iff it is one of these — an
622 // actually-served commit RIGHT NOW — independent of whether THIS run's purge
623 // happened to drop it. `frontier_served` is downward-closed at the ChangeId
624 // level (served ⟹ every reachable ancestor served) and every minted commit's
625 // parents are themselves mapped, so the mapped OIDs of `frontier_served` already
626 // form the downward-closed git-ancestry set — no separate git walk is needed
627 // (heddle#316). Replaces the prior `embargoed_oids` (this-run-only purge
628 // drop-set) classification that leaked a prior-run / out-of-scope embargo.
629 let served_oids: HashSet<ObjectId> = frontier_served
630 .iter()
631 .filter_map(|state| bridge.mapping.get_git(state))
632 .collect();
633
634 // The mirror's NAME-KEYED ownership record (heddle#316): a mirror ref is
635 // MANAGED iff heddle recorded WRITING it under that full name — NEVER by OID
636 // membership (the r20c bug that classified a foreign ref at a heddle OID as
637 // heddle's). The mirror analog of the destination's `heddle-exported-refs`
638 // record. Read BEFORE the head/tag loops mutate any ref so a genuine first run
639 // (absent record) seeds from the prior-run ref set rather than misreading every
640 // pre-existing ref as foreign — which would silently stop embargo retraction.
641 let mut managed_record = read_or_seed_mirror_managed_refs(&repo)?;
642
643 // Reconcile the mirror's HEADS via the shared `reconcile_ref` decision. Iterate
644 // the CURRENT threads: a dropped thread's stale branch is intentionally NOT
645 // pruned (the #289 dropped-thread contract) — it is never iterated, survives in
646 // the mirror, and stays in the managed record so the push still copies it. The
647 // desired head target is the maximal served ancestor-or-self of the thread tip
648 // (`frontier_git_oid`, via `project_desired_refs`). The existing tip is
649 // classified against the whole-mirror served-OID set, so a still-served tip
650 // fast-forwards, an embargoed tip force-rewinds to its served ancestor, and a
651 // whole-line-embargoed head is deleted. A scoped export reconciles every current
652 // thread but MATERIALIZES (creates) only the one it was scoped to.
653 for track_name in &threads {
654 if bridge
655 .heddle_repo
656 .refs()
657 .get_thread(&ThreadName::new(track_name))?
658 .is_none()
659 {
660 // A listed thread name with no tip is neither synced nor pruned.
661 continue;
662 }
663 let branch_ref = format!("refs/heads/{track_name}");
664 let in_scope = thread.is_none() || thread == Some(track_name.as_str());
665 let desired_oid = desired.get(&branch_ref).copied();
666 let existing_oid = branch_tip_oid(&repo, &branch_ref);
667 match reconcile_ref(
668 ReconcileNs::Head,
669 desired_oid,
670 existing_oid,
671 in_scope,
672 /* marker_served_unminted */ false,
673 &served_oids,
674 ) {
675 ReconcileOp::Write => {
676 let git_oid = desired_oid.expect("Write implies a desired target");
677 sync_track_to_branch(&repo, track_name, git_oid)?;
678 managed_record.insert(branch_ref.clone(), git_oid);
679 stats.threads_synced += 1;
680 stats.branches.push(ExportedRef {
681 name: track_name.clone(),
682 tip: git_oid,
683 });
684 }
685 ReconcileOp::ForceRewind => {
686 let git_oid = desired_oid.expect("ForceRewind implies a desired target");
687 set_reference(
688 &repo,
689 &branch_ref,
690 git_oid,
691 RefPrecondition::Any,
692 "heddle: retract embargoed thread frontier",
693 )?;
694 managed_record.insert(branch_ref.clone(), git_oid);
695 stats.threads_synced += 1;
696 stats.branches.push(ExportedRef {
697 name: track_name.clone(),
698 tip: git_oid,
699 });
700 }
701 ReconcileOp::Delete => {
702 delete_reference_if_present(&repo, &branch_ref)?;
703 managed_record.remove(&branch_ref);
704 }
705 // A head has no preserve path — `frontier_git_oid` recomputes the
706 // target every run, so a head is always rewound/deleted, never kept at
707 // a stale tip (Preserve is unreachable for `ReconcileNs::Head`).
708 ReconcileOp::Skip | ReconcileOp::Preserve => {}
709 }
710 }
711
712 // Reconcile the mirror's TAGS via the SAME `reconcile_ref` decision as heads.
713 // Iterate the UNION of current markers AND the managed-record tag names: a
714 // DELETED marker drops out of `markers`, so its stale managed mirror tag is
715 // reachable only via the managed-record side (heddle#316 S3 — a deleted marker
716 // must delete its tag). A FOREIGN tag heddle never wrote is in NEITHER set, so
717 // it is never visited: it survives untouched and stays out of the push frontier
718 // (`collect_managed_ref_updates`). The desired tag target comes from the
719 // projection (a marker minted this run); the served-but-unminted vs embargoed
720 // split (r18 PRESERVE vs r19 DELETE) is the existing tag's served-ness combined
721 // with `marker_served_unminted`.
722 let mut tag_names: std::collections::BTreeSet<String> =
723 markers.iter().map(|m| m.to_string()).collect();
724 for full_name in managed_record.keys() {
725 if let Some(tag) = full_name.strip_prefix("refs/tags/") {
726 tag_names.insert(tag.to_string());
727 }
728 }
729
730 for name in &tag_names {
731 let tag_ref = format!("refs/tags/{name}");
732 let existing_raw_oid = direct_ref_oid(&repo, &tag_ref);
733 let existing_oid = existing_raw_oid.and_then(|oid| peel_to_commit_oid(&repo, oid));
734 let desired_oid = desired.get(&tag_ref).copied();
735 let in_scope = thread.is_none();
736 // A live marker whose served target was NOT minted into the mapping this
737 // run (a scoped export that didn't reach it). The desired projection omits
738 // such a tag (it only publishes minted markers), so the reconcile sees
739 // `desired_oid == None`; this flag plus the existing tag's served-ness is
740 // the sole axis splitting r18-PRESERVE from r19-DELETE.
741 let marker_served_unminted = match bridge
742 .heddle_repo
743 .refs()
744 .get_marker(&MarkerName::new(name.as_str()))?
745 {
746 Some(state) => {
747 bridge.mapping.get_git(&state).is_none() && frontier_served.contains(&state)
748 }
749 None => false,
750 };
751 if let (Some(desired), Some(raw), Some(peeled)) =
752 (desired_oid, existing_raw_oid, existing_oid)
753 && raw != desired
754 && peeled == desired
755 {
756 managed_record.insert(tag_ref.clone(), raw);
757 stats.markers_synced += 1;
758 stats.tags.push(ExportedRef {
759 name: name.clone(),
760 tip: raw,
761 });
762 continue;
763 }
764 match reconcile_ref(
765 ReconcileNs::Tag,
766 desired_oid,
767 existing_oid,
768 in_scope,
769 marker_served_unminted,
770 &served_oids,
771 ) {
772 ReconcileOp::Write => {
773 let git_oid = desired_oid.expect("Write implies a desired target");
774 sync_marker_to_tag(&repo, name, git_oid)?;
775 managed_record.insert(tag_ref.clone(), git_oid);
776 stats.markers_synced += 1;
777 stats.tags.push(ExportedRef {
778 name: name.clone(),
779 tip: git_oid,
780 });
781 }
782 ReconcileOp::Delete => {
783 delete_reference_if_present(&repo, &tag_ref)?;
784 managed_record.remove(&tag_ref);
785 }
786 // PRESERVE keeps the existing served tag (still managed → stays in the
787 // record); SKIP is a no-op. A tag is free-move and never force-rewinds
788 // (ForceRewind is unreachable for `ReconcileNs::Tag`).
789 ReconcileOp::Preserve | ReconcileOp::Skip | ReconcileOp::ForceRewind => {}
790 }
791 }
792
793 // Persist the updated ownership record so the next reconcile — and the push
794 // frontier (`collect_managed_ref_updates`) — read heddle's managed set by name.
795 write_mirror_managed_refs(&repo, &managed_record)?;
796
797 // Every count in the summary is a partition of the SINGLE copied ref
798 // set: `total` is unique commits reachable from the mirror's branch/tag
799 // tips (the exact ref set `copy_mirror_to_path` writes via
800 // `collect_ref_updates`), and `states_exported` ("newly") is the subset
801 // of THAT walk minted this run. Deriving both from one walk — rather
802 // than tallying `states_exported` inline over `list_states()` — makes
803 // `newly + already == total` hold by construction: a state minted into
804 // the mirror but reachable from no copied ref (e.g. a dropped thread's
805 // orphan history) is in neither count, so the impossible
806 // "1 total (2 newly written)" summary cannot occur.
807 let counts = count_exported_commits(&repo, &newly_minted)?;
808 stats.commits_total = counts.total;
809 stats.states_exported = counts.newly;
810
811 bridge.save_mapping_to_disk()?;
812
813 Ok(stats)
814}
815
816/// Which namespace a reconciled mirror ref lives in. The reconcile DECISION is
817/// one shape for both; the only namespace-specific axis is how "write the desired
818/// target" lands — a head is fast-forward-guarded (and force-rewound for an
819/// embargo retract), a tag is free-move.
820#[derive(Debug, Clone, Copy, PartialEq, Eq)]
821enum ReconcileNs {
822 Head,
823 Tag,
824}
825
826/// The op the mirror reconcile applies to a single ref. The SINGLE decision the
827/// head and tag reconciles share (heddle#316): a foreign ref never reaches here
828/// (the iteration set is current threads/markers ∪ heddle-managed names), so every
829/// arm acts on a ref heddle owns.
830#[derive(Debug, Clone, Copy, PartialEq, Eq)]
831enum ReconcileOp {
832 /// Nothing to do — a scoped export declining to materialize an out-of-scope
833 /// ref, or a genuine no-op (no desired target and nothing to retract).
834 Skip,
835 /// Write the desired target through the namespace's guarded path: a head
836 /// fast-forwards (or creates); a tag force-retargets (or creates).
837 Write,
838 /// Force-set a head to the desired target past the fast-forward guard — the
839 /// embargo retract that rewinds an embargoed tip to its served ancestor.
840 ForceRewind,
841 /// Keep an existing served tag whose marker target is served-but-unminted this
842 /// run (r18). A later all-thread export re-mints and advances it.
843 Preserve,
844 /// Delete the ref — its line/marker has no served frontier (whole-line embargo,
845 /// r19 embargoed-existing tag, or a deleted marker's stale tag).
846 Delete,
847}
848
849/// The mirror reconcile decision — IDENTICAL in shape for heads and tags
850/// (heddle#316). `desired_oid` is the served target the projection wants published
851/// (`None` ⇒ nothing served for this ref this run); `existing_oid` is the mirror
852/// ref's CURRENT tip, already PEELED to a commit by [`branch_tip_oid`] (so an
853/// annotated foreign tag colliding with a marker name is tested by its commit, not
854/// its tag-object OID — heddle#316 risk #2). `in_scope` gates only
855/// MATERIALIZATION: a scoped export reconciles existing refs but never CREATES a
856/// brand-new one the caller did not ask for. `marker_served_unminted` is set only
857/// for a tag whose live marker target is served but was not minted this run — the
858/// sole axis that, combined with `existing_served`, splits r18-PRESERVE from
859/// r19-DELETE. `served_oids` is the whole-mirror served-OID set classifying the
860/// existing tip (NOT this run's purge drop-set, which omits a prior-run /
861/// out-of-scope embargo).
862fn reconcile_ref(
863 ns: ReconcileNs,
864 desired_oid: Option<ObjectId>,
865 existing_oid: Option<ObjectId>,
866 in_scope: bool,
867 marker_served_unminted: bool,
868 served_oids: &HashSet<ObjectId>,
869) -> ReconcileOp {
870 // `existing_oid` is already the peeled commit OID (`branch_tip_oid`), so this
871 // membership test compares commit-against-commit (risk #2).
872 let existing_served = existing_oid
873 .map(|oid| served_oids.contains(&oid))
874 .unwrap_or(false);
875 match (desired_oid, existing_oid) {
876 // Scoped export, would-create: never materialize a ref the caller did not
877 // ask to export.
878 (Some(_), None) if !in_scope => ReconcileOp::Skip,
879 // Create a fresh ref at the served target.
880 (Some(_), None) => ReconcileOp::Write,
881 // Head with an existing tip: a still-served tip fast-forwards (r17 FF guard
882 // applies); an embargoed tip is force-rewound to its served ancestor.
883 (Some(_), Some(_)) if ns == ReconcileNs::Head => {
884 if existing_served {
885 ReconcileOp::Write
886 } else {
887 ReconcileOp::ForceRewind
888 }
889 }
890 // Tag with an existing tip: free-move force-retarget to the served target.
891 (Some(_), Some(_)) => ReconcileOp::Write,
892 // Nothing served, nothing present.
893 (None, None) => ReconcileOp::Skip,
894 // Nothing served, but a tag exists whose marker target is served-but-
895 // unminted AND the existing tag is itself served: PRESERVE (r18).
896 (None, Some(_)) if marker_served_unminted && existing_served => ReconcileOp::Preserve,
897 // Nothing served, an existing ref remains: DELETE (whole-line embargo, r19
898 // embargoed existing tag, or a deleted marker's stale tag).
899 (None, Some(_)) => ReconcileOp::Delete,
900 }
901}
902
903fn git_remote_names(heddle_repo: &HeddleRepository) -> HashSet<String> {
904 let Ok(repo) = SleyRepository::discover(heddle_repo.root()) else {
905 return HashSet::new();
906 };
907 repo.remote_names()
908 .unwrap_or_default()
909 .into_iter()
910 .filter(|name| !name.trim().is_empty())
911 .collect()
912}
913
914fn is_remote_tracking_thread_name(thread: &str, remote_names: &HashSet<String>) -> bool {
915 let Some((remote, branch)) = thread.split_once('/') else {
916 return false;
917 };
918 !branch.is_empty() && remote_names.contains(remote)
919}
920
921/// Purge from `mapping` every reachable state whose effective visibility is no
922/// longer served by `audience`, and return the Git OIDs that were dropped so
923/// the caller can retract any ref still pointing at them.
924///
925/// A state can be minted while public and only later marked under-tier; its
926/// stale ChangeId→OID mapping is rebuilt from the notes/sidecar on every
927/// export, so the served set must be re-derived against CURRENT visibility
928/// here rather than trusted from the mapping. The purge is downward-closed: a
929/// still-visible state is unserved if any reachable ancestor is unserved,
930/// because its minted Git commit chains to the ancestor's (now-embargoed)
931/// commit. `sorted_states` is topological (parents before children), so a
932/// parent's served-ness is decided before its child is examined.
933fn purge_unserved_mappings(
934 heddle_repo: &HeddleRepository,
935 mapping: &mut SyncMapping,
936 sorted_states: &[ChangeId],
937 reachable: &HashSet<ChangeId>,
938 audience: &AudienceTier,
939) -> GitResult<HashSet<ObjectId>> {
940 let served = served_change_ids(heddle_repo, sorted_states, reachable, audience)?;
941 let mut purged: HashSet<ObjectId> = HashSet::new();
942 for state_id in sorted_states {
943 if !served.contains(state_id)
944 && let Some(oid) = mapping.remove(state_id)
945 {
946 purged.insert(oid);
947 }
948 }
949 Ok(purged)
950}
951
952/// The downward-closure served set (spike §5.0): a state is served iff it is
953/// visible to `audience` AND every *reachable* parent is itself served. The
954/// topo order of `sorted_states` guarantees a parent's servedness is already
955/// decided when its child is visited. A parent outside `reachable` is a shallow
956/// boundary (public-by-absence, treated as served).
957///
958/// The single notion of "served" shared by the branch-frontier purge and the
959/// notes-ref retraction — so a note can never be published for a commit whose
960/// branch the same rule would withhold (heddle#316).
961fn served_change_ids(
962 heddle_repo: &HeddleRepository,
963 sorted_states: &[ChangeId],
964 reachable: &HashSet<ChangeId>,
965 audience: &AudienceTier,
966) -> GitResult<HashSet<ChangeId>> {
967 let mut served: HashSet<ChangeId> = HashSet::new();
968 for state_id in sorted_states {
969 let tier = heddle_repo
970 .effective_visibility_tier(state_id)
971 .map_err(|e| {
972 GitBridgeError::Git(format!("resolve visibility for {state_id}: {e:#}"))
973 })?;
974 let parents_served = match heddle_repo.store().get_state(state_id)? {
975 Some(state) => state
976 .parents
977 .iter()
978 .all(|p| !reachable.contains(p) || served.contains(p)),
979 None => true,
980 };
981 if visible(&tier, audience) && parents_served {
982 served.insert(*state_id);
983 }
984 }
985 Ok(served)
986}
987
988/// Resolve `ref_name` to its tip commit OID in the mirror, or `None` when the
989/// ref is absent or unpeelable.
990fn branch_tip_oid(repo: &SleyRepository, ref_name: &str) -> Option<ObjectId> {
991 let oid = repo
992 .find_reference(ref_name)
993 .ok()
994 .flatten()?
995 .peeled_oid(repo)
996 .ok()
997 .flatten()?;
998 peel_to_commit_oid(repo, oid)
999}
1000
1001fn direct_ref_oid(repo: &SleyRepository, ref_name: &str) -> Option<ObjectId> {
1002 match repo.find_reference(ref_name).ok()??.target {
1003 ReferenceTarget::Direct(oid) => Some(oid),
1004 ReferenceTarget::Symbolic(_) => None,
1005 }
1006}
1007
1008fn peel_to_commit_oid(repo: &SleyRepository, mut oid: ObjectId) -> Option<ObjectId> {
1009 loop {
1010 let object = repo.read_object(&oid).ok()?;
1011 match object.object_type {
1012 GitObjectType::Commit => return Some(oid),
1013 GitObjectType::Tag => {
1014 oid = repo.read_tag(&oid).ok()?.object;
1015 }
1016 _ => return None,
1017 }
1018 }
1019}
1020
1021/// Project the DESIRED heddle-owned ref-set for an export: full ref name → its
1022/// served target OID. A ref appears iff heddle should publish it now; a ref the
1023/// projection omits is one the mirror reconcile must DELETE (its prior export is
1024/// stale). This is the single place that decides WHICH refs exist and at WHAT
1025/// target — the mirror reconcile, and downstream every destination reconcile,
1026/// derive their ops (create / fast-forward / forced rewind / delete / skip) from
1027/// this set, so a surface can never silently drop out of one enforcement pass
1028/// while another keeps serving it (heddle#316 r13).
1029///
1030/// * heads — `refs/heads/<thread>` at the maximal SERVED ancestor-or-self of the
1031/// thread tip ([`frontier_git_oid`]); a thread whose whole line is unserved is
1032/// ABSENT (downward-closed: an embargoed commit and its descendants stay off
1033/// the public branch).
1034/// * tags — `refs/tags/<marker>` at the marker's served state; a marker whose
1035/// state is not served (embargoed, withheld for a withheld ancestor, or
1036/// retargeted to a never-minted Private state) is ABSENT.
1037///
1038/// Notes (`refs/notes/heddle`) are the history-bearing member of the desired set
1039/// and are projected by content rebuild (backfill + [`git_notes::remove_notes`])
1040/// upstream rather than a target swap, so they are not enumerated here.
1041fn project_desired_refs(
1042 heddle_repo: &HeddleRepository,
1043 mapping: &SyncMapping,
1044 threads: &[String],
1045 markers: &[MarkerName],
1046) -> GitResult<std::collections::HashMap<String, ObjectId>> {
1047 let mut desired = std::collections::HashMap::new();
1048 for track_name in threads {
1049 let Some(tip) = heddle_repo
1050 .refs()
1051 .get_thread(&ThreadName::new(track_name))?
1052 else {
1053 continue;
1054 };
1055 if let Some(git_oid) = frontier_git_oid(heddle_repo, mapping, tip)? {
1056 desired.insert(format!("refs/heads/{track_name}"), git_oid);
1057 }
1058 }
1059 for marker_name in markers {
1060 let Some(state_id) = heddle_repo.refs().get_marker(marker_name)? else {
1061 continue;
1062 };
1063 if let Some(git_oid) = mapping.get_git(&state_id) {
1064 desired.insert(format!("refs/tags/{marker_name}"), git_oid);
1065 }
1066 }
1067 Ok(desired)
1068}
1069
1070/// The Git OID the public branch should lag to for a thread whose raw tip is
1071/// `tip`: the maximal **served** ancestor-or-self of `tip`. A state is served
1072/// iff it is present in the mapping — `purge_unserved_mappings` runs first to
1073/// drop any mapped-but-now-embargoed state (and its descendants), so the mapped
1074/// set is exactly the served set. Returns `None` when no ancestor of `tip` is
1075/// served (the whole line is embargoed to its root → absence).
1076fn frontier_git_oid(
1077 heddle_repo: &HeddleRepository,
1078 mapping: &SyncMapping,
1079 tip: ChangeId,
1080) -> GitResult<Option<ObjectId>> {
1081 let mut visited = HashSet::new();
1082 let mut stack = vec![tip];
1083 let mut frontier: Vec<ChangeId> = Vec::new();
1084 while let Some(id) = stack.pop() {
1085 if !visited.insert(id) {
1086 continue;
1087 }
1088 // Stop at the first served (mapped) state on each downward path: that
1089 // is a maximal served ancestor — its own served ancestors are
1090 // dominated by it, so we do not descend past it.
1091 if mapping.get_git(&id).is_some() {
1092 frontier.push(id);
1093 continue;
1094 }
1095 if let Some(state) = heddle_repo.store().get_state(&id)? {
1096 stack.extend(state.parents.iter().copied());
1097 }
1098 }
1099 // A linear thread yields exactly one maximal served state. A merge whose
1100 // embargo splits the DAG can leave an antichain of ≥2 maximal served
1101 // states; advertising each sibling line under its own ref is the
1102 // multi-root work deferred to issues #4/#5. Until then the branch lags
1103 // deterministically (lowest ChangeId) — never published from a raw
1104 // embargoed tip — and the other lines are absent from this branch.
1105 let chosen = frontier.into_iter().min_by_key(|c| c.to_string_full());
1106 Ok(chosen.and_then(|c| mapping.get_git(&c)))
1107}
1108
1109fn reachable_states(
1110 heddle_repo: &HeddleRepository,
1111 roots: &[ChangeId],
1112) -> GitResult<Vec<ChangeId>> {
1113 let mut stack = roots.to_vec();
1114 let mut seen = HashSet::new();
1115 let mut states = Vec::new();
1116 while let Some(state_id) = stack.pop() {
1117 if !seen.insert(state_id) {
1118 continue;
1119 }
1120 states.push(state_id);
1121 if let Some(state) = heddle_repo.store().get_state(&state_id)? {
1122 stack.extend(state.parents.iter().copied());
1123 }
1124 }
1125 Ok(states)
1126}
1127
1128fn state_to_signature(state: &objects::object::State) -> Signature {
1129 let seconds = state.created_at.timestamp();
1130 let raw = format!(
1131 "{} <{}> {} +0000",
1132 state.attribution.principal.name, state.attribution.principal.email, seconds
1133 )
1134 .into_bytes();
1135 Signature {
1136 name: sley::plumbing::sley_core::ByteString::new(
1137 state.attribution.principal.name.as_bytes().to_vec(),
1138 ),
1139 email: sley::plumbing::sley_core::ByteString::new(
1140 state.attribution.principal.email.as_bytes().to_vec(),
1141 ),
1142 time: sley::GitTime::new(seconds, 0),
1143 raw,
1144 }
1145}
1146
1147fn submodule_oid_from_blob(content: &[u8]) -> Option<ObjectId> {
1148 let text = std::str::from_utf8(content).ok()?;
1149 let text = text.trim();
1150 let trimmed = text.strip_prefix(SUBMODULE_PREFIX)?.trim();
1151
1152 ObjectId::from_hex(ObjectFormat::Sha1, trimmed).ok()
1153}
1154
1155#[cfg(test)]
1156mod tests {
1157 use objects::object::{Attribution, ContentHash, Principal, State};
1158
1159 use super::*;
1160
1161 fn fidelity_state() -> State {
1162 State::new(
1163 ContentHash::from_bytes([7u8; 32]),
1164 vec![],
1165 Attribution::human(Principal::new("Alice", "alice@example.com")),
1166 )
1167 .with_raw_message("an imported commit\n")
1168 }
1169
1170 /// The fidelity guard reconstructs a byte-faithful imported commit.
1171 #[test]
1172 fn byte_faithful_when_fidelity_present_and_not_lossy() {
1173 assert!(commit_is_byte_faithful(&fidelity_state()));
1174 }
1175
1176 /// The canonical `git_lossy` marker — set by BOTH `import --lossy` and
1177 /// `ingest --lossy` — routes the commit OFF the reconstruct path regardless
1178 /// of which import surface produced it. A lossy import drops/converts tree
1179 /// entries, so reconstructing from state would mint a wrong SHA.
1180 #[test]
1181 fn lossy_marker_blocks_reconstruction() {
1182 let lossy = fidelity_state().with_git_lossy(true);
1183 assert!(
1184 !commit_is_byte_faithful(&lossy),
1185 "a state carrying the canonical git_lossy marker must NOT be \
1186 reconstructed from state, regardless of import surface"
1187 );
1188 }
1189}