Skip to main content

cli/bridge/
git_export.rs

1// SPDX-License-Identifier: Apache-2.0
2//! Export Heddle states to Git commits functionality.
3
4use std::collections::HashSet;
5
6use objects::{
7    error::HeddleError,
8    object::{ChangeId, ContentHash, FileMode, MarkerName, Principal, State, ThreadName},
9    store::ObjectStore,
10};
11use repo::{AudienceTier, Repository as HeddleRepository, visible};
12use sley::{
13    CommitObject, EntryKind, GitObjectType, ObjectFormat, ObjectId, RefPrecondition,
14    ReferenceTarget, Repository as SleyRepository, Signature, plumbing::sley_object::EncodedObject,
15};
16
17use crate::bridge::{
18    git_core::{
19        GitBridge, GitBridgeError, GitResult, LocalGitIdentity, SyncMapping,
20        count_exported_commits, delete_reference_if_present,
21        git_config_identity_with_global_fallback, git_err, principal_is_default_unknown,
22        read_or_seed_mirror_managed_refs, set_reference, write_mirror_managed_refs,
23    },
24    git_notes,
25    git_reconstruct::{commit_object_id, reconstruct_commit_bytes, write_commit_object},
26    git_sync::{sync_marker_to_tag, sync_track_to_branch},
27    git_util::{ExportStats, ExportedRef},
28};
29
30const SUBMODULE_PREFIX: &str = "heddle-submodule:";
31
32/// Whether `state` carries a captured original git commit to reconstruct
33/// byte-exactly (the #565 de-lossy fidelity fields). When true, export
34/// regenerates the commit object from state via [`reconstruct_commit_bytes`]
35/// with NO W2 footer and NO `"No intent specified"` placeholder — any injected
36/// byte would push the minted object off the original SHA (#567). When false
37/// (a native heddle commit, no original to preserve), export mints with the
38/// footer/placeholder as before.
39///
40/// `raw_message` is the load-bearing signal: the git importer always records it
41/// (even as an empty body for an empty-message commit) for an imported commit,
42/// and never for a native one.
43fn has_git_fidelity(state: &State) -> bool {
44    state.raw_message.is_some()
45}
46
47/// Whether `who`'s name/email round-trip byte-exactly through reconstruction.
48/// `Principal.name/email` are `String`, so the git importer replaced any non-UTF8
49/// identity byte with U+FFFD when it called `to_string()` on the raw actor bytes
50/// (the #565-deferred gap; `Principal` is still `String`, see #564). Those
51/// replaced bytes can't be regenerated, so reconstruction would hash off the
52/// original SHA. A literal U+FFFD that was itself valid UTF-8 in the original
53/// survives fine — so this can only FALSE-POSITIVE into the safe verbatim
54/// fallback, never a wrong-SHA mint.
55fn identity_is_byte_faithful(who: &Principal) -> bool {
56    !who.name.contains('\u{FFFD}') && !who.email.contains('\u{FFFD}')
57}
58
59/// Whether reconstructing `state`'s commit object from Heddle state alone is
60/// guaranteed byte-exact to the original commit — the precondition for the #567
61/// reconstruct-from-state path. False for the two #564 lossy gaps:
62///   1. non-UTF8 author/committer identity bytes (see [`identity_is_byte_faithful`]);
63///   2. lossy imports, where unrepresentable tree entries were dropped/converted
64///      so the rebuilt tree — hence commit — OID diverges.
65///
66/// (2) is read off ONE canonical signal — [`State::git_lossy`] — that lossy
67/// import population paths set, rather than enumerating import surfaces or
68/// relying on bridge mapping sidecar state. The state flag closes the whole
69/// class, including any future lossy entry point.
70///
71/// When false the caller MUST keep the verbatim mirror bytes / preserved mapped
72/// OID (or fall through to the native mint) rather than mint a wrong-SHA
73/// reconstructed object.
74fn commit_is_byte_faithful(state: &State) -> bool {
75    has_git_fidelity(state)
76        && !state.git_lossy
77        && identity_is_byte_faithful(&state.attribution.principal)
78        && state
79            .committer
80            .as_ref()
81            .map(identity_is_byte_faithful)
82            .unwrap_or(true)
83}
84
85/// Export a single state to Git for `audience`.
86///
87/// Returns `Ok(None)` — **absence** — when the state's effective visibility
88/// tier is not visible to `audience`: the public mirror never mints a Git
89/// commit (no stub, no partial tree) for an embargoed state (spike §5.0/§5.3).
90/// The caller realizes downward-closure by also withholding any state whose
91/// parent was withheld, so an embargoed commit *and its descendants* stay
92/// absent from the mirror.
93pub(crate) fn export_state(
94    mapping: &mut SyncMapping,
95    heddle_repo: &HeddleRepository,
96    repo: &SleyRepository,
97    state_id: &ChangeId,
98    identity: Option<&LocalGitIdentity>,
99    message_override: Option<&str>,
100    audience: &AudienceTier,
101) -> GitResult<Option<ObjectId>> {
102    let state = heddle_repo
103        .store()
104        .get_state(state_id)?
105        .ok_or(GitBridgeError::StateNotFound(*state_id))?;
106
107    // Audience-aware minting. The visibility decision lives here, at the state
108    // walk where the `ChangeId` is in scope — never in the blob-keyed
109    // `export_tree` (no `ChangeId`/audience).
110    let tier = heddle_repo
111        .effective_visibility_tier(state_id)
112        .map_err(|e| GitBridgeError::Git(format!("resolve visibility for {state_id}: {e:#}")))?;
113    if !visible(&tier, audience) {
114        return Ok(None);
115    }
116
117    // Fidelity mint (#567): the state carries a captured original git commit
118    // (#565 fields — `raw_message` is the load-bearing signal). MINT the commit
119    // object from that raw metadata via `reconstruct_commit_bytes` — NO footer,
120    // NO placeholder, NO message override — so the minted bytes preserve the
121    // original message/identities/headers rather than the native intent+footer.
122    // This is the path that lets the git mirror be dropped (#568): a correct
123    // export no longer depends on the mirror holding the verbatim imported bytes.
124    //
125    // Routing (#567 round 3): export keys off (is byte-faithful?) AND (does a
126    // bridge mapping exist?). The verbatim / mapped-OID fallback for a lossy
127    // commit applies ONLY when a bridge mapping holds a TRACKED original OID to
128    // preserve — and that branch lives in `export_scoped`'s already-mapped path.
129    // `export_state` is only ever reached for an UNMAPPED state (the caller's
130    // `has_heddle` guard), so there is NO original OID to match and NO verbatim
131    // mirror bytes to fall back to. Every unmapped fidelity state therefore MINTS
132    // from its own raw metadata — a `--lossy` one is NOT rejected into a
133    // nonexistent verbatim source (the r2 over-correction, #567 round 3):
134    //   * byte-faithful (a clean ingest-backed import, native heddle commit with
135    //     fidelity, ...) -> the derived OID coincides with the original commit SHA;
136    //   * lossy / non-UTF8 (ingest-backed import with lossy tree conversion) -> a
137    //     DERIVED OID that still preserves raw_message/identities/headers. With
138    //     no original to match this is correct, not the wrong-SHA bug the r2
139    //     `git_lossy` guard (rightly) blocks ONLY for a MAPPED commit.
140    if has_git_fidelity(&state) {
141        let content = reconstruct_commit_bytes(heddle_repo, repo, mapping, &state)?;
142        return Ok(Some(write_commit_object(repo, &content)?));
143    }
144
145    // Native heddle commit: no original to preserve. Mint a raw commit object
146    // and inject the durable W2 footer (and the "No intent specified"
147    // placeholder for an empty intent) — these ride ONLY native commits.
148    let git_tree_oid = export_tree(heddle_repo, repo, &state.tree)?;
149    // R6: emit the W2 footer on every exported commit. The footer is
150    // durable across remotes; per-scope breakdowns ride on the opt-in
151    // git note. For first-pass we audit nothing about the state's
152    // annotation set (the audience defaults to "public"); a follow-up
153    // landed with `bridge git export --audience` threads the count
154    // through here. See `git_util::build_commit_message_with_footer`.
155    let hosted_url = heddle_repo
156        .config()
157        .hosted
158        .upstream_url
159        .as_deref()
160        .filter(|s| !s.is_empty());
161    let message = match message_override {
162        Some(message) => GitBridge::build_commit_message_with_footer_with_body(
163            &state, message, hosted_url, /*omitted=*/ 0,
164        ),
165        None => {
166            GitBridge::build_commit_message_with_footer(&state, hosted_url, /*omitted=*/ 0)
167        }
168    };
169    let parent_oids: Vec<ObjectId> = state
170        .parents
171        .iter()
172        .map(|parent_id| {
173            mapping
174                .get_git(parent_id)
175                .ok_or(GitBridgeError::StateNotFound(*parent_id))
176        })
177        .collect::<GitResult<Vec<_>>>()?;
178
179    let sig = if principal_is_default_unknown(&state.attribution.principal) {
180        let Some(identity) = identity else {
181            return Err(GitBridgeError::Git(
182                "refusing to write a Git commit with Unknown <unknown@example.com>; configure user.name/user.email, HEDDLE_PRINCIPAL_NAME/HEDDLE_PRINCIPAL_EMAIL, or .heddle principal".to_string(),
183            ));
184        };
185        identity.to_signature(state.created_at.timestamp())
186    } else {
187        state_to_signature(&state)
188    };
189    let commit = CommitObject {
190        tree: git_tree_oid,
191        parents: parent_oids,
192        author: sig.to_ident_bytes(),
193        committer: sig.to_ident_bytes(),
194        encoding: None,
195        message: message.into_bytes(),
196    };
197    Ok(Some(
198        repo.write_object(EncodedObject::new(GitObjectType::Commit, commit.write()))
199            .map_err(git_err)?,
200    ))
201}
202
203/// Export a Heddle tree to Git.
204pub fn export_tree(
205    heddle_repo: &HeddleRepository,
206    repo: &SleyRepository,
207    tree_hash: &ContentHash,
208) -> GitResult<ObjectId> {
209    let tree = heddle_repo
210        .store()
211        .get_tree(tree_hash)?
212        .ok_or_else(|| HeddleError::NotFound(format!("tree {}", tree_hash)))?;
213
214    let empty_tree = ObjectId::empty_tree(repo.object_format());
215    let mut editor = repo.edit_tree(&empty_tree).map_err(git_err)?;
216
217    for entry in tree.entries() {
218        let (kind, id) = if entry.is_tree() {
219            (
220                EntryKind::Tree,
221                export_tree(heddle_repo, repo, &entry.hash)?,
222            )
223        } else {
224            // Redaction safety: if the blob carries an active redaction
225            // record, export the stub instead of the bytes. This is the
226            // single chokepoint between Heddle-side redactions and any
227            // downstream Git remote (GitHub, internal mirrors, ...).
228            // Bytes that escape via the bridge are bytes that escape,
229            // full stop — we cannot retroactively scrub them from
230            // outside repos. The check sits *here*, not in
231            // `materialize_blob`, because export reads `blob.content()`
232            // directly (we never touch the materialize path) and writes
233            // the raw bytes through `repo.write_blob`.
234            let stub = heddle_repo
235                .redaction_stub_for_blob(&entry.hash)
236                .map_err(|err| HeddleError::Config(format!("redaction lookup failed: {err}")))?;
237
238            if let Some(stub_text) = stub {
239                // Stubs are text-only; ASCII safe across newline/BOM
240                // quirks and submodule-pointer detection.
241                let kind = match entry.mode {
242                    FileMode::Symlink => EntryKind::Symlink,
243                    FileMode::Executable => EntryKind::BlobExecutable,
244                    _ => EntryKind::Blob,
245                };
246                let oid = repo.write_blob(stub_text.as_bytes()).map_err(git_err)?;
247                (kind, oid)
248            } else {
249                let blob = heddle_repo
250                    .store()
251                    .get_blob(&entry.hash)?
252                    .ok_or_else(|| HeddleError::NotFound(format!("blob {}", entry.hash)))?;
253
254                if entry.mode == FileMode::Normal
255                    && let Some(oid) = submodule_oid_from_blob(blob.content())
256                {
257                    (EntryKind::Commit, oid)
258                } else {
259                    let kind = match entry.mode {
260                        FileMode::Normal => EntryKind::Blob,
261                        FileMode::Executable => EntryKind::BlobExecutable,
262                        FileMode::Symlink => EntryKind::Symlink,
263                    };
264                    let oid = repo.write_blob(blob.content()).map_err(git_err)?;
265                    (kind, oid)
266                }
267            }
268        };
269
270        editor.upsert(entry.name.as_str(), kind, id);
271    }
272
273    repo.write_tree(editor).map_err(git_err)
274}
275
276/// Export all Heddle states to Git commits.
277pub fn export_all(bridge: &mut GitBridge) -> GitResult<ExportStats> {
278    bridge.with_mapping_rollback(|bridge| export_scoped(bridge, None))
279}
280
281/// Export one Heddle thread to its matching Git branch.
282pub fn export_current_thread(bridge: &mut GitBridge, thread: &str) -> GitResult<ExportStats> {
283    bridge.with_mapping_rollback(|bridge| export_scoped(bridge, Some(thread)))
284}
285
286fn export_scoped(bridge: &mut GitBridge, thread: Option<&str>) -> GitResult<ExportStats> {
287    bridge.init_mirror()?;
288
289    let states = match thread {
290        Some(thread) => {
291            let Some(state_id) = bridge
292                .heddle_repo
293                .refs()
294                .get_thread(&ThreadName::new(thread))?
295            else {
296                return Err(GitBridgeError::Git(format!(
297                    "thread '{thread}' has no state to export"
298                )));
299            };
300            reachable_states(bridge.heddle_repo, &[state_id])?
301        }
302        None => bridge.heddle_repo.store().list_states()?,
303    };
304    let mut stats = ExportStats::default();
305
306    bridge.build_existing_mapping(None)?;
307    let identity = git_config_identity_with_global_fallback(bridge.heddle_repo.root())?;
308
309    // The Git bridge publishes the PUBLIC mirror — the export audience is
310    // always `Public`. Per-commit visibility is enforced here, in the OSS
311    // bridge, by emitting absence (the authoritative wire serve gate is weft's
312    // job, spike §10 #4).
313    let audience = AudienceTier::Public;
314
315    let sorted_states = bridge.sort_states_topologically(&states)?;
316    // Reachable set, used to tell a withheld parent (absent from the mapping
317    // but present in this export) apart from a genuinely-missing shallow
318    // boundary (absent from both).
319    let reachable: HashSet<ChangeId> = sorted_states.iter().copied().collect();
320    let repo = bridge.open_git_repo()?;
321    bridge.mapping.retain_git_objects(&repo);
322    bridge.seed_git_checkpoint_mappings_from_checkout(&repo)?;
323    bridge.seed_ingest_identity_mappings_from_mirror(&repo)?;
324
325    // The desired/actual ref sets span the WHOLE mirror, not just this export's
326    // scoped thread: a prior all-thread export can leave `refs/heads`/`refs/tags`
327    // for OTHER threads/markers whose commits — or their ancestors — were later
328    // marked Private. Reconciling only the scoped thread would keep serving those
329    // now-embargoed commits via the other thread's branch (heddle#316 cross-thread
330    // embargo leak). So purge + project + reconcile over every heddle-managed
331    // thread/marker regardless of scope; the mint loop below stays scoped (only the
332    // requested thread's new commits are minted), so widening changes WHICH refs
333    // are reconciled, never what gets created.
334    let remote_names = git_remote_names(bridge.heddle_repo);
335    let threads: Vec<String> = {
336        let mut all: Vec<String> = bridge
337            .heddle_repo
338            .refs()
339            .list_threads()?
340            .into_iter()
341            .filter(|thread| !is_remote_tracking_thread_name(thread, &remote_names))
342            .map(|t| t.to_string())
343            .collect();
344        // A scoped export's own thread may be a remote-tracking name the filter
345        // drops; keep it so the requested thread is always reconciled.
346        if let Some(t) = thread
347            && !all.iter().any(|x| x == t)
348        {
349            all.push(t.to_string());
350        }
351        all
352    };
353    let markers: Vec<MarkerName> = bridge.heddle_repo.refs().list_markers()?;
354
355    // Roots of the whole-mirror served frontier: every reconciled thread's tip and
356    // every marker's state. Purging over their reachable closure (below) drops any
357    // out-of-scope commit whose tier — or an ancestor's — is now unserved, so
358    // `project_desired_refs` lags those branches/tags correctly even on a scoped
359    // export (heddle#316).
360    let mut frontier_roots: Vec<ChangeId> = Vec::new();
361    for track_name in &threads {
362        if let Some(tip) = bridge
363            .heddle_repo
364            .refs()
365            .get_thread(&ThreadName::new(track_name))?
366        {
367            frontier_roots.push(tip);
368        }
369    }
370    for marker_name in &markers {
371        if let Some(state_id) = bridge.heddle_repo.refs().get_marker(marker_name)? {
372            frontier_roots.push(state_id);
373        }
374    }
375    let frontier_reachable = reachable_states(bridge.heddle_repo, &frontier_roots)?;
376
377    // Re-validate the served set against CURRENT visibility before anything treats
378    // a mapping as "already served". A state minted while public in a prior export
379    // can be marked under-tier later; `build_existing_mapping` rebuilds its stale
380    // ChangeId→OID mapping from the notes/sidecar every run, so without this purge
381    // the frontier walk, the note re-write, and the tag sync would all keep serving
382    // the now-embargoed commit. Purging is downward-closed: a still-visible state
383    // whose ancestor is embargoed is withheld too (its Git commit chains to the
384    // embargoed one). The purge spans the mint set UNION the whole-mirror frontier,
385    // so a scoped export still drops an out-of-scope thread's now-embargoed tip; for
386    // an all-thread export the frontier ⊆ the mint set and this reduces to the prior
387    // behavior. After this, `mapping` == the served set across every reconciled ref,
388    // exactly what `frontier_git_oid` assumes.
389    // Snapshot EVERY mapped target before the purge mutates the mapping: these are
390    // exactly the commits that may already carry a `refs/notes/*` entry in the
391    // mirror, so the notes-ref retraction below must consider all of them —
392    // including the states the purge is about to drop AND any orphaned mapping a
393    // deleted thread left behind, which no current-ref frontier reaches (heddle#316).
394    let pre_purge_targets: Vec<(ChangeId, ObjectId)> =
395        bridge.mapping.iter().map(|(c, o)| (*c, *o)).collect();
396
397    let purge_reachable: HashSet<ChangeId> = sorted_states
398        .iter()
399        .copied()
400        .chain(frontier_reachable.iter().copied())
401        .collect();
402    let purge_sorted =
403        bridge.sort_states_topologically(&purge_reachable.iter().copied().collect::<Vec<_>>())?;
404    // The purge MUTATES the mapping down to the served set. Its returned drop-set
405    // (the OIDs THIS run withheld) is deliberately NOT used to classify EXISTING
406    // mirror tips: a scoped run's purge omits a tip embargoed in a PRIOR run, or
407    // out of this run's purge reach, so classifying by it misreads such a tip as
408    // served and keeps serving it. Existing-tip served classification (heads + tags
409    // below) uses the whole-mirror served-OID set (`served_oids`) instead
410    // (heddle#316).
411    purge_unserved_mappings(
412        bridge.heddle_repo,
413        &mut bridge.mapping,
414        &purge_sorted,
415        &purge_reachable,
416        &audience,
417    )?;
418
419    // Git OIDs minted during this run. Used below to partition the copied
420    // ref set into newly-written vs already-mapped — so the "newly" count
421    // is a subset of the same walk that produces the total, never a
422    // parallel tally over `list_states()` that could include an orphan
423    // state reachable from no copied ref.
424    let mut newly_minted: HashSet<ObjectId> = HashSet::new();
425
426    for state_id in sorted_states {
427        // Already mapped to a git object — the common case for git-imported
428        // states (the import populated the ChangeId→OID mapping) and for
429        // native commits a prior export already minted. Not re-counted as
430        // "newly minted" (the total is decided below by ref-reachability).
431        if bridge.mapping.has_heddle(&state_id) {
432            // For an IMPORTED commit (#565 fidelity fields present),
433            // REGENERATE the object from state into the mirror rather than
434            // leaning on the verbatim imported bytes still being there (#567).
435            // Byte-identical, so the OID is unchanged and the write is
436            // idempotent today; what changes is that a correct export no
437            // longer DEPENDS on the mirror's verbatim copy — the step that
438            // lets the mirror be dropped (#568). Native already-mapped commits
439            // have no original to reconstruct (raw_message is None), so they
440            // are left to their prior mint; re-minting those is out of scope.
441            if let Some(state) = bridge.heddle_repo.store().get_state(&state_id)?
442                && has_git_fidelity(&state)
443            {
444                let mapped = bridge.mapping.get_git(&state_id);
445                // mirror still required for non-byte-faithful commits (non-UTF8
446                // identities, --lossy); #568 mirror elimination must account for
447                // these, and full de-lossy needs byte-preserving identities (#564
448                // follow-up).
449                // Fidelity guard (#567): regenerate from state ONLY when the
450                // state is fully byte-faithful to the original import. A
451                // non-byte-faithful commit (non-UTF8 identity, or a `--lossy`
452                // import — both import-lossy and ingest-lossy carry the canonical
453                // `git_lossy` flag) would reconstruct to a WRONG SHA, so leave it
454                // on the preserved mapped OID — the verbatim mirror bytes stay the
455                // served object (the pre-#567 behavior for that commit).
456                if commit_is_byte_faithful(&state) {
457                    let content = reconstruct_commit_bytes(
458                        bridge.heddle_repo,
459                        &repo,
460                        &bridge.mapping,
461                        &state,
462                    )?;
463                    // Safety net: the regenerated object MUST hash to the mapped
464                    // OID. A mismatch means reconstruction diverged from the
465                    // imported bytes (an undetected fidelity gap), so fall back to
466                    // the verbatim mirror / mapped OID rather than write a
467                    // wrong-SHA object.
468                    let reconstructed = commit_object_id(&content);
469                    if mapped.map(|m| m == reconstructed).unwrap_or(true) {
470                        write_commit_object(&repo, &content)?;
471                    }
472                }
473            }
474            continue;
475        }
476
477        // Downward-closure (spike §5.0): withhold a state whose parent was
478        // itself withheld for this audience. Processed in topo order, so a
479        // parent's mapped-ness is already decided. A parent absent from the
480        // mapping but present in `reachable` was withheld → withhold this
481        // child too (and, transitively, all its descendants). A parent absent
482        // from both is a shallow boundary (public-by-absence) — let the mint
483        // proceed exactly as before.
484        let parent_withheld = bridge
485            .heddle_repo
486            .store()
487            .get_state(&state_id)?
488            .map(|state| {
489                state
490                    .parents
491                    .iter()
492                    .any(|p| reachable.contains(p) && bridge.mapping.get_git(p).is_none())
493            })
494            .unwrap_or(false);
495        if parent_withheld {
496            continue;
497        }
498
499        let message_override = bridge
500            .commit_message_overrides
501            .get(&state_id)
502            .map(String::as_str);
503        let Some(git_oid) = export_state(
504            &mut bridge.mapping,
505            bridge.heddle_repo,
506            &repo,
507            &state_id,
508            identity.as_ref(),
509            message_override,
510            &audience,
511        )?
512        else {
513            // Embargoed for this audience — emit absence (no commit minted).
514            continue;
515        };
516        bridge.mapping.insert(state_id, git_oid);
517        newly_minted.insert(git_oid);
518
519        // Attach a heddle note to the freshly-created commit so the
520        // change_id survives a fresh `git clone` of the destination
521        // (when only the git side travels, without our sidecar).
522        if let Some(state) = bridge.heddle_repo.store().get_state(&state_id)? {
523            let note = git_notes::HeddleNote::from_state(&state);
524            git_notes::write_note(&repo, git_oid, &note)?;
525        }
526    }
527
528    // The downward-closure served set across EVERY note target — the pre-purge
529    // mapping (commits that may already carry a note in the mirror) UNION the
530    // current post-mint mapping (served states + freshly minted commits),
531    // computed over the FULL ancestry of all of them. The branch purge is
532    // ref-rooted (it walks the whole-mirror frontier of current thread tips +
533    // markers), so it never examines an ORPHANED mapping a deleted thread left
534    // behind; without this closure such a commit's note — public-tier but with a
535    // now-Private ancestor — would slip past both the backfill gate and the
536    // retraction below. This is the SAME served rule the branch frontier uses,
537    // applied to notes (heddle#316). For an all-states export it reduces to the
538    // post-purge served set, so behavior there is unchanged.
539    let note_target_roots: Vec<ChangeId> = pre_purge_targets
540        .iter()
541        .map(|(c, _)| *c)
542        .chain(bridge.mapping.iter().map(|(c, _)| *c))
543        .collect();
544    let note_reachable_vec = reachable_states(bridge.heddle_repo, &note_target_roots)?;
545    let note_reachable: HashSet<ChangeId> = note_reachable_vec.iter().copied().collect();
546    let note_sorted = bridge.sort_states_topologically(&note_reachable_vec)?;
547    let note_served =
548        served_change_ids(bridge.heddle_repo, &note_sorted, &note_reachable, &audience)?;
549
550    // For states whose git_oid was already in the mapping (the SHA-stable
551    // path above), make sure the note is present too. This covers two
552    // cases: (a) the state was imported from a non-heddle git source and
553    // never had a note, and (b) the note was deleted from the mirror.
554    let note_targets: Vec<(ChangeId, ObjectId)> =
555        bridge.mapping.iter().map(|(c, o)| (*c, *o)).collect();
556    for (change_id, git_oid) in note_targets {
557        // Gate the backfill on the downward-closure served set, not the commit's
558        // DIRECT tier. The mapping can carry orphaned entries (a deleted thread's
559        // commits) the ref-rooted purge never examined; gating on direct
560        // visibility alone would re-publish a note for a public commit whose
561        // ancestor became Private — a commit the branch downward-closure
562        // withholds. `note_served` is the same served notion the branch frontier
563        // uses, so no note-write site can emit metadata for an unserved commit
564        // (heddle#316).
565        if note_served.contains(&change_id)
566            && git_notes::read_note(&repo, git_oid)?.is_none()
567            && let Some(state) = bridge.heddle_repo.store().get_state(&change_id)?
568        {
569            let note = git_notes::HeddleNote::from_state(&state);
570            git_notes::write_note(&repo, git_oid, &note)?;
571        }
572    }
573
574    // Retract the notes for every mapped target that is NOT served under the
575    // downward-closure rule. The mirror copies `refs/notes/*`
576    // (`collect_ref_updates`) alongside branches and tags, so a note left for an
577    // unserved commit keeps leaking its metadata even after its branch/tag were
578    // retracted. This is the notes-ref sibling of the branch/tag retraction
579    // above (heddle#316). Considering EVERY pre-purge target — not just the
580    // `embargoed_oids` the ref-rooted purge dropped — catches an orphaned note an
581    // ancestor embargo stranded on a deleted thread's commit. Guard the
582    // degenerate case where a still-served state maps to the same git OID by
583    // keeping any OID a served target maps to.
584    let served_note_oids: HashSet<ObjectId> = pre_purge_targets
585        .iter()
586        .copied()
587        .chain(bridge.mapping.iter().map(|(c, o)| (*c, *o)))
588        .filter(|(c, _)| note_served.contains(c))
589        .map(|(_, oid)| oid)
590        .collect();
591    let notes_to_retract: HashSet<ObjectId> = pre_purge_targets
592        .iter()
593        .filter(|(c, _)| !note_served.contains(c))
594        .map(|(_, oid)| *oid)
595        .filter(|oid| !served_note_oids.contains(oid))
596        .collect();
597    git_notes::remove_notes(&repo, &notes_to_retract)?;
598
599    // THE PROJECTION (heddle#316 r13): the desired heddle-owned ref-set for this
600    // audience — heads lagged to the served frontier, tags at served markers — as
601    // a pure function of the post-purge served `mapping` + audience + ownership.
602    // Every mirror ref op below (set / forced embargo retract / delete) is DERIVED
603    // from this ONE map, so a ref surface can never drift out of one enforcement
604    // pass while another keeps serving it. The mirror MATERIALIZES this desired
605    // set; downstream `plan_destination_reconcile` then reconciles each
606    // destination against it — one projection, one reconcile, all destinations.
607    let desired = project_desired_refs(bridge.heddle_repo, &bridge.mapping, &threads, &markers)?;
608
609    // The downward-closure served set over the WHOLE-MIRROR frontier — the SAME
610    // closure the purge ran over (every thread tip + every marker state). A state is
611    // served iff visible to this audience AND every reachable ancestor is served.
612    // Drives BOTH the served-OID set just below AND (further down) the tag
613    // classifier's served-but-unminted axis.
614    let frontier_served = {
615        let reachable_set: HashSet<ChangeId> = frontier_reachable.iter().copied().collect();
616        let sorted = bridge.sort_states_topologically(&frontier_reachable)?;
617        served_change_ids(bridge.heddle_repo, &sorted, &reachable_set, &audience)?
618    };
619
620    // The whole-mirror SERVED-OID set: the git OID of every served frontier state.
621    // An EXISTING mirror tip (head or tag) is "served" iff it is one of these — an
622    // actually-served commit RIGHT NOW — independent of whether THIS run's purge
623    // happened to drop it. `frontier_served` is downward-closed at the ChangeId
624    // level (served ⟹ every reachable ancestor served) and every minted commit's
625    // parents are themselves mapped, so the mapped OIDs of `frontier_served` already
626    // form the downward-closed git-ancestry set — no separate git walk is needed
627    // (heddle#316). Replaces the prior `embargoed_oids` (this-run-only purge
628    // drop-set) classification that leaked a prior-run / out-of-scope embargo.
629    let served_oids: HashSet<ObjectId> = frontier_served
630        .iter()
631        .filter_map(|state| bridge.mapping.get_git(state))
632        .collect();
633
634    // The mirror's NAME-KEYED ownership record (heddle#316): a mirror ref is
635    // MANAGED iff heddle recorded WRITING it under that full name — NEVER by OID
636    // membership (the r20c bug that classified a foreign ref at a heddle OID as
637    // heddle's). The mirror analog of the destination's `heddle-exported-refs`
638    // record. Read BEFORE the head/tag loops mutate any ref so a genuine first run
639    // (absent record) seeds from the prior-run ref set rather than misreading every
640    // pre-existing ref as foreign — which would silently stop embargo retraction.
641    let mut managed_record = read_or_seed_mirror_managed_refs(&repo)?;
642
643    // Reconcile the mirror's HEADS via the shared `reconcile_ref` decision. Iterate
644    // the CURRENT threads: a dropped thread's stale branch is intentionally NOT
645    // pruned (the #289 dropped-thread contract) — it is never iterated, survives in
646    // the mirror, and stays in the managed record so the push still copies it. The
647    // desired head target is the maximal served ancestor-or-self of the thread tip
648    // (`frontier_git_oid`, via `project_desired_refs`). The existing tip is
649    // classified against the whole-mirror served-OID set, so a still-served tip
650    // fast-forwards, an embargoed tip force-rewinds to its served ancestor, and a
651    // whole-line-embargoed head is deleted. A scoped export reconciles every current
652    // thread but MATERIALIZES (creates) only the one it was scoped to.
653    for track_name in &threads {
654        if bridge
655            .heddle_repo
656            .refs()
657            .get_thread(&ThreadName::new(track_name))?
658            .is_none()
659        {
660            // A listed thread name with no tip is neither synced nor pruned.
661            continue;
662        }
663        let branch_ref = format!("refs/heads/{track_name}");
664        let in_scope = thread.is_none() || thread == Some(track_name.as_str());
665        let desired_oid = desired.get(&branch_ref).copied();
666        let existing_oid = branch_tip_oid(&repo, &branch_ref);
667        match reconcile_ref(
668            ReconcileNs::Head,
669            desired_oid,
670            existing_oid,
671            in_scope,
672            /* marker_served_unminted */ false,
673            &served_oids,
674        ) {
675            ReconcileOp::Write => {
676                let git_oid = desired_oid.expect("Write implies a desired target");
677                sync_track_to_branch(&repo, track_name, git_oid)?;
678                managed_record.insert(branch_ref.clone(), git_oid);
679                stats.threads_synced += 1;
680                stats.branches.push(ExportedRef {
681                    name: track_name.clone(),
682                    tip: git_oid,
683                });
684            }
685            ReconcileOp::ForceRewind => {
686                let git_oid = desired_oid.expect("ForceRewind implies a desired target");
687                set_reference(
688                    &repo,
689                    &branch_ref,
690                    git_oid,
691                    RefPrecondition::Any,
692                    "heddle: retract embargoed thread frontier",
693                )?;
694                managed_record.insert(branch_ref.clone(), git_oid);
695                stats.threads_synced += 1;
696                stats.branches.push(ExportedRef {
697                    name: track_name.clone(),
698                    tip: git_oid,
699                });
700            }
701            ReconcileOp::Delete => {
702                delete_reference_if_present(&repo, &branch_ref)?;
703                managed_record.remove(&branch_ref);
704            }
705            // A head has no preserve path — `frontier_git_oid` recomputes the
706            // target every run, so a head is always rewound/deleted, never kept at
707            // a stale tip (Preserve is unreachable for `ReconcileNs::Head`).
708            ReconcileOp::Skip | ReconcileOp::Preserve => {}
709        }
710    }
711
712    // Reconcile the mirror's TAGS via the SAME `reconcile_ref` decision as heads.
713    // Iterate the UNION of current markers AND the managed-record tag names: a
714    // DELETED marker drops out of `markers`, so its stale managed mirror tag is
715    // reachable only via the managed-record side (heddle#316 S3 — a deleted marker
716    // must delete its tag). A FOREIGN tag heddle never wrote is in NEITHER set, so
717    // it is never visited: it survives untouched and stays out of the push frontier
718    // (`collect_managed_ref_updates`). The desired tag target comes from the
719    // projection (a marker minted this run); the served-but-unminted vs embargoed
720    // split (r18 PRESERVE vs r19 DELETE) is the existing tag's served-ness combined
721    // with `marker_served_unminted`.
722    let mut tag_names: std::collections::BTreeSet<String> =
723        markers.iter().map(|m| m.to_string()).collect();
724    for full_name in managed_record.keys() {
725        if let Some(tag) = full_name.strip_prefix("refs/tags/") {
726            tag_names.insert(tag.to_string());
727        }
728    }
729
730    for name in &tag_names {
731        let tag_ref = format!("refs/tags/{name}");
732        let existing_raw_oid = direct_ref_oid(&repo, &tag_ref);
733        let existing_oid = existing_raw_oid.and_then(|oid| peel_to_commit_oid(&repo, oid));
734        let desired_oid = desired.get(&tag_ref).copied();
735        let in_scope = thread.is_none();
736        // A live marker whose served target was NOT minted into the mapping this
737        // run (a scoped export that didn't reach it). The desired projection omits
738        // such a tag (it only publishes minted markers), so the reconcile sees
739        // `desired_oid == None`; this flag plus the existing tag's served-ness is
740        // the sole axis splitting r18-PRESERVE from r19-DELETE.
741        let marker_served_unminted = match bridge
742            .heddle_repo
743            .refs()
744            .get_marker(&MarkerName::new(name.as_str()))?
745        {
746            Some(state) => {
747                bridge.mapping.get_git(&state).is_none() && frontier_served.contains(&state)
748            }
749            None => false,
750        };
751        if let (Some(desired), Some(raw), Some(peeled)) =
752            (desired_oid, existing_raw_oid, existing_oid)
753            && raw != desired
754            && peeled == desired
755        {
756            managed_record.insert(tag_ref.clone(), raw);
757            stats.markers_synced += 1;
758            stats.tags.push(ExportedRef {
759                name: name.clone(),
760                tip: raw,
761            });
762            continue;
763        }
764        match reconcile_ref(
765            ReconcileNs::Tag,
766            desired_oid,
767            existing_oid,
768            in_scope,
769            marker_served_unminted,
770            &served_oids,
771        ) {
772            ReconcileOp::Write => {
773                let git_oid = desired_oid.expect("Write implies a desired target");
774                sync_marker_to_tag(&repo, name, git_oid)?;
775                managed_record.insert(tag_ref.clone(), git_oid);
776                stats.markers_synced += 1;
777                stats.tags.push(ExportedRef {
778                    name: name.clone(),
779                    tip: git_oid,
780                });
781            }
782            ReconcileOp::Delete => {
783                delete_reference_if_present(&repo, &tag_ref)?;
784                managed_record.remove(&tag_ref);
785            }
786            // PRESERVE keeps the existing served tag (still managed → stays in the
787            // record); SKIP is a no-op. A tag is free-move and never force-rewinds
788            // (ForceRewind is unreachable for `ReconcileNs::Tag`).
789            ReconcileOp::Preserve | ReconcileOp::Skip | ReconcileOp::ForceRewind => {}
790        }
791    }
792
793    // Persist the updated ownership record so the next reconcile — and the push
794    // frontier (`collect_managed_ref_updates`) — read heddle's managed set by name.
795    write_mirror_managed_refs(&repo, &managed_record)?;
796
797    // Every count in the summary is a partition of the SINGLE copied ref
798    // set: `total` is unique commits reachable from the mirror's branch/tag
799    // tips (the exact ref set `copy_mirror_to_path` writes via
800    // `collect_ref_updates`), and `states_exported` ("newly") is the subset
801    // of THAT walk minted this run. Deriving both from one walk — rather
802    // than tallying `states_exported` inline over `list_states()` — makes
803    // `newly + already == total` hold by construction: a state minted into
804    // the mirror but reachable from no copied ref (e.g. a dropped thread's
805    // orphan history) is in neither count, so the impossible
806    // "1 total (2 newly written)" summary cannot occur.
807    let counts = count_exported_commits(&repo, &newly_minted)?;
808    stats.commits_total = counts.total;
809    stats.states_exported = counts.newly;
810
811    bridge.save_mapping_to_disk()?;
812
813    Ok(stats)
814}
815
816/// Which namespace a reconciled mirror ref lives in. The reconcile DECISION is
817/// one shape for both; the only namespace-specific axis is how "write the desired
818/// target" lands — a head is fast-forward-guarded (and force-rewound for an
819/// embargo retract), a tag is free-move.
820#[derive(Debug, Clone, Copy, PartialEq, Eq)]
821enum ReconcileNs {
822    Head,
823    Tag,
824}
825
826/// The op the mirror reconcile applies to a single ref. The SINGLE decision the
827/// head and tag reconciles share (heddle#316): a foreign ref never reaches here
828/// (the iteration set is current threads/markers ∪ heddle-managed names), so every
829/// arm acts on a ref heddle owns.
830#[derive(Debug, Clone, Copy, PartialEq, Eq)]
831enum ReconcileOp {
832    /// Nothing to do — a scoped export declining to materialize an out-of-scope
833    /// ref, or a genuine no-op (no desired target and nothing to retract).
834    Skip,
835    /// Write the desired target through the namespace's guarded path: a head
836    /// fast-forwards (or creates); a tag force-retargets (or creates).
837    Write,
838    /// Force-set a head to the desired target past the fast-forward guard — the
839    /// embargo retract that rewinds an embargoed tip to its served ancestor.
840    ForceRewind,
841    /// Keep an existing served tag whose marker target is served-but-unminted this
842    /// run (r18). A later all-thread export re-mints and advances it.
843    Preserve,
844    /// Delete the ref — its line/marker has no served frontier (whole-line embargo,
845    /// r19 embargoed-existing tag, or a deleted marker's stale tag).
846    Delete,
847}
848
849/// The mirror reconcile decision — IDENTICAL in shape for heads and tags
850/// (heddle#316). `desired_oid` is the served target the projection wants published
851/// (`None` ⇒ nothing served for this ref this run); `existing_oid` is the mirror
852/// ref's CURRENT tip, already PEELED to a commit by [`branch_tip_oid`] (so an
853/// annotated foreign tag colliding with a marker name is tested by its commit, not
854/// its tag-object OID — heddle#316 risk #2). `in_scope` gates only
855/// MATERIALIZATION: a scoped export reconciles existing refs but never CREATES a
856/// brand-new one the caller did not ask for. `marker_served_unminted` is set only
857/// for a tag whose live marker target is served but was not minted this run — the
858/// sole axis that, combined with `existing_served`, splits r18-PRESERVE from
859/// r19-DELETE. `served_oids` is the whole-mirror served-OID set classifying the
860/// existing tip (NOT this run's purge drop-set, which omits a prior-run /
861/// out-of-scope embargo).
862fn reconcile_ref(
863    ns: ReconcileNs,
864    desired_oid: Option<ObjectId>,
865    existing_oid: Option<ObjectId>,
866    in_scope: bool,
867    marker_served_unminted: bool,
868    served_oids: &HashSet<ObjectId>,
869) -> ReconcileOp {
870    // `existing_oid` is already the peeled commit OID (`branch_tip_oid`), so this
871    // membership test compares commit-against-commit (risk #2).
872    let existing_served = existing_oid
873        .map(|oid| served_oids.contains(&oid))
874        .unwrap_or(false);
875    match (desired_oid, existing_oid) {
876        // Scoped export, would-create: never materialize a ref the caller did not
877        // ask to export.
878        (Some(_), None) if !in_scope => ReconcileOp::Skip,
879        // Create a fresh ref at the served target.
880        (Some(_), None) => ReconcileOp::Write,
881        // Head with an existing tip: a still-served tip fast-forwards (r17 FF guard
882        // applies); an embargoed tip is force-rewound to its served ancestor.
883        (Some(_), Some(_)) if ns == ReconcileNs::Head => {
884            if existing_served {
885                ReconcileOp::Write
886            } else {
887                ReconcileOp::ForceRewind
888            }
889        }
890        // Tag with an existing tip: free-move force-retarget to the served target.
891        (Some(_), Some(_)) => ReconcileOp::Write,
892        // Nothing served, nothing present.
893        (None, None) => ReconcileOp::Skip,
894        // Nothing served, but a tag exists whose marker target is served-but-
895        // unminted AND the existing tag is itself served: PRESERVE (r18).
896        (None, Some(_)) if marker_served_unminted && existing_served => ReconcileOp::Preserve,
897        // Nothing served, an existing ref remains: DELETE (whole-line embargo, r19
898        // embargoed existing tag, or a deleted marker's stale tag).
899        (None, Some(_)) => ReconcileOp::Delete,
900    }
901}
902
903fn git_remote_names(heddle_repo: &HeddleRepository) -> HashSet<String> {
904    let Ok(repo) = SleyRepository::discover(heddle_repo.root()) else {
905        return HashSet::new();
906    };
907    repo.remote_names()
908        .unwrap_or_default()
909        .into_iter()
910        .filter(|name| !name.trim().is_empty())
911        .collect()
912}
913
914fn is_remote_tracking_thread_name(thread: &str, remote_names: &HashSet<String>) -> bool {
915    let Some((remote, branch)) = thread.split_once('/') else {
916        return false;
917    };
918    !branch.is_empty() && remote_names.contains(remote)
919}
920
921/// Purge from `mapping` every reachable state whose effective visibility is no
922/// longer served by `audience`, and return the Git OIDs that were dropped so
923/// the caller can retract any ref still pointing at them.
924///
925/// A state can be minted while public and only later marked under-tier; its
926/// stale ChangeId→OID mapping is rebuilt from the notes/sidecar on every
927/// export, so the served set must be re-derived against CURRENT visibility
928/// here rather than trusted from the mapping. The purge is downward-closed: a
929/// still-visible state is unserved if any reachable ancestor is unserved,
930/// because its minted Git commit chains to the ancestor's (now-embargoed)
931/// commit. `sorted_states` is topological (parents before children), so a
932/// parent's served-ness is decided before its child is examined.
933fn purge_unserved_mappings(
934    heddle_repo: &HeddleRepository,
935    mapping: &mut SyncMapping,
936    sorted_states: &[ChangeId],
937    reachable: &HashSet<ChangeId>,
938    audience: &AudienceTier,
939) -> GitResult<HashSet<ObjectId>> {
940    let served = served_change_ids(heddle_repo, sorted_states, reachable, audience)?;
941    let mut purged: HashSet<ObjectId> = HashSet::new();
942    for state_id in sorted_states {
943        if !served.contains(state_id)
944            && let Some(oid) = mapping.remove(state_id)
945        {
946            purged.insert(oid);
947        }
948    }
949    Ok(purged)
950}
951
952/// The downward-closure served set (spike §5.0): a state is served iff it is
953/// visible to `audience` AND every *reachable* parent is itself served. The
954/// topo order of `sorted_states` guarantees a parent's servedness is already
955/// decided when its child is visited. A parent outside `reachable` is a shallow
956/// boundary (public-by-absence, treated as served).
957///
958/// The single notion of "served" shared by the branch-frontier purge and the
959/// notes-ref retraction — so a note can never be published for a commit whose
960/// branch the same rule would withhold (heddle#316).
961fn served_change_ids(
962    heddle_repo: &HeddleRepository,
963    sorted_states: &[ChangeId],
964    reachable: &HashSet<ChangeId>,
965    audience: &AudienceTier,
966) -> GitResult<HashSet<ChangeId>> {
967    let mut served: HashSet<ChangeId> = HashSet::new();
968    for state_id in sorted_states {
969        let tier = heddle_repo
970            .effective_visibility_tier(state_id)
971            .map_err(|e| {
972                GitBridgeError::Git(format!("resolve visibility for {state_id}: {e:#}"))
973            })?;
974        let parents_served = match heddle_repo.store().get_state(state_id)? {
975            Some(state) => state
976                .parents
977                .iter()
978                .all(|p| !reachable.contains(p) || served.contains(p)),
979            None => true,
980        };
981        if visible(&tier, audience) && parents_served {
982            served.insert(*state_id);
983        }
984    }
985    Ok(served)
986}
987
988/// Resolve `ref_name` to its tip commit OID in the mirror, or `None` when the
989/// ref is absent or unpeelable.
990fn branch_tip_oid(repo: &SleyRepository, ref_name: &str) -> Option<ObjectId> {
991    let oid = repo
992        .find_reference(ref_name)
993        .ok()
994        .flatten()?
995        .peeled_oid(repo)
996        .ok()
997        .flatten()?;
998    peel_to_commit_oid(repo, oid)
999}
1000
1001fn direct_ref_oid(repo: &SleyRepository, ref_name: &str) -> Option<ObjectId> {
1002    match repo.find_reference(ref_name).ok()??.target {
1003        ReferenceTarget::Direct(oid) => Some(oid),
1004        ReferenceTarget::Symbolic(_) => None,
1005    }
1006}
1007
1008fn peel_to_commit_oid(repo: &SleyRepository, mut oid: ObjectId) -> Option<ObjectId> {
1009    loop {
1010        let object = repo.read_object(&oid).ok()?;
1011        match object.object_type {
1012            GitObjectType::Commit => return Some(oid),
1013            GitObjectType::Tag => {
1014                oid = repo.read_tag(&oid).ok()?.object;
1015            }
1016            _ => return None,
1017        }
1018    }
1019}
1020
1021/// Project the DESIRED heddle-owned ref-set for an export: full ref name → its
1022/// served target OID. A ref appears iff heddle should publish it now; a ref the
1023/// projection omits is one the mirror reconcile must DELETE (its prior export is
1024/// stale). This is the single place that decides WHICH refs exist and at WHAT
1025/// target — the mirror reconcile, and downstream every destination reconcile,
1026/// derive their ops (create / fast-forward / forced rewind / delete / skip) from
1027/// this set, so a surface can never silently drop out of one enforcement pass
1028/// while another keeps serving it (heddle#316 r13).
1029///
1030/// * heads — `refs/heads/<thread>` at the maximal SERVED ancestor-or-self of the
1031///   thread tip ([`frontier_git_oid`]); a thread whose whole line is unserved is
1032///   ABSENT (downward-closed: an embargoed commit and its descendants stay off
1033///   the public branch).
1034/// * tags — `refs/tags/<marker>` at the marker's served state; a marker whose
1035///   state is not served (embargoed, withheld for a withheld ancestor, or
1036///   retargeted to a never-minted Private state) is ABSENT.
1037///
1038/// Notes (`refs/notes/heddle`) are the history-bearing member of the desired set
1039/// and are projected by content rebuild (backfill + [`git_notes::remove_notes`])
1040/// upstream rather than a target swap, so they are not enumerated here.
1041fn project_desired_refs(
1042    heddle_repo: &HeddleRepository,
1043    mapping: &SyncMapping,
1044    threads: &[String],
1045    markers: &[MarkerName],
1046) -> GitResult<std::collections::HashMap<String, ObjectId>> {
1047    let mut desired = std::collections::HashMap::new();
1048    for track_name in threads {
1049        let Some(tip) = heddle_repo
1050            .refs()
1051            .get_thread(&ThreadName::new(track_name))?
1052        else {
1053            continue;
1054        };
1055        if let Some(git_oid) = frontier_git_oid(heddle_repo, mapping, tip)? {
1056            desired.insert(format!("refs/heads/{track_name}"), git_oid);
1057        }
1058    }
1059    for marker_name in markers {
1060        let Some(state_id) = heddle_repo.refs().get_marker(marker_name)? else {
1061            continue;
1062        };
1063        if let Some(git_oid) = mapping.get_git(&state_id) {
1064            desired.insert(format!("refs/tags/{marker_name}"), git_oid);
1065        }
1066    }
1067    Ok(desired)
1068}
1069
1070/// The Git OID the public branch should lag to for a thread whose raw tip is
1071/// `tip`: the maximal **served** ancestor-or-self of `tip`. A state is served
1072/// iff it is present in the mapping — `purge_unserved_mappings` runs first to
1073/// drop any mapped-but-now-embargoed state (and its descendants), so the mapped
1074/// set is exactly the served set. Returns `None` when no ancestor of `tip` is
1075/// served (the whole line is embargoed to its root → absence).
1076fn frontier_git_oid(
1077    heddle_repo: &HeddleRepository,
1078    mapping: &SyncMapping,
1079    tip: ChangeId,
1080) -> GitResult<Option<ObjectId>> {
1081    let mut visited = HashSet::new();
1082    let mut stack = vec![tip];
1083    let mut frontier: Vec<ChangeId> = Vec::new();
1084    while let Some(id) = stack.pop() {
1085        if !visited.insert(id) {
1086            continue;
1087        }
1088        // Stop at the first served (mapped) state on each downward path: that
1089        // is a maximal served ancestor — its own served ancestors are
1090        // dominated by it, so we do not descend past it.
1091        if mapping.get_git(&id).is_some() {
1092            frontier.push(id);
1093            continue;
1094        }
1095        if let Some(state) = heddle_repo.store().get_state(&id)? {
1096            stack.extend(state.parents.iter().copied());
1097        }
1098    }
1099    // A linear thread yields exactly one maximal served state. A merge whose
1100    // embargo splits the DAG can leave an antichain of ≥2 maximal served
1101    // states; advertising each sibling line under its own ref is the
1102    // multi-root work deferred to issues #4/#5. Until then the branch lags
1103    // deterministically (lowest ChangeId) — never published from a raw
1104    // embargoed tip — and the other lines are absent from this branch.
1105    let chosen = frontier.into_iter().min_by_key(|c| c.to_string_full());
1106    Ok(chosen.and_then(|c| mapping.get_git(&c)))
1107}
1108
1109fn reachable_states(
1110    heddle_repo: &HeddleRepository,
1111    roots: &[ChangeId],
1112) -> GitResult<Vec<ChangeId>> {
1113    let mut stack = roots.to_vec();
1114    let mut seen = HashSet::new();
1115    let mut states = Vec::new();
1116    while let Some(state_id) = stack.pop() {
1117        if !seen.insert(state_id) {
1118            continue;
1119        }
1120        states.push(state_id);
1121        if let Some(state) = heddle_repo.store().get_state(&state_id)? {
1122            stack.extend(state.parents.iter().copied());
1123        }
1124    }
1125    Ok(states)
1126}
1127
1128fn state_to_signature(state: &objects::object::State) -> Signature {
1129    let seconds = state.created_at.timestamp();
1130    let raw = format!(
1131        "{} <{}> {} +0000",
1132        state.attribution.principal.name, state.attribution.principal.email, seconds
1133    )
1134    .into_bytes();
1135    Signature {
1136        name: sley::plumbing::sley_core::ByteString::new(
1137            state.attribution.principal.name.as_bytes().to_vec(),
1138        ),
1139        email: sley::plumbing::sley_core::ByteString::new(
1140            state.attribution.principal.email.as_bytes().to_vec(),
1141        ),
1142        time: sley::GitTime::new(seconds, 0),
1143        raw,
1144    }
1145}
1146
1147fn submodule_oid_from_blob(content: &[u8]) -> Option<ObjectId> {
1148    let text = std::str::from_utf8(content).ok()?;
1149    let text = text.trim();
1150    let trimmed = text.strip_prefix(SUBMODULE_PREFIX)?.trim();
1151
1152    ObjectId::from_hex(ObjectFormat::Sha1, trimmed).ok()
1153}
1154
1155#[cfg(test)]
1156mod tests {
1157    use objects::object::{Attribution, ContentHash, Principal, State};
1158
1159    use super::*;
1160
1161    fn fidelity_state() -> State {
1162        State::new(
1163            ContentHash::from_bytes([7u8; 32]),
1164            vec![],
1165            Attribution::human(Principal::new("Alice", "alice@example.com")),
1166        )
1167        .with_raw_message("an imported commit\n")
1168    }
1169
1170    /// The fidelity guard reconstructs a byte-faithful imported commit.
1171    #[test]
1172    fn byte_faithful_when_fidelity_present_and_not_lossy() {
1173        assert!(commit_is_byte_faithful(&fidelity_state()));
1174    }
1175
1176    /// The canonical `git_lossy` marker — set by BOTH `import --lossy` and
1177    /// `ingest --lossy` — routes the commit OFF the reconstruct path regardless
1178    /// of which import surface produced it. A lossy import drops/converts tree
1179    /// entries, so reconstructing from state would mint a wrong SHA.
1180    #[test]
1181    fn lossy_marker_blocks_reconstruction() {
1182        let lossy = fidelity_state().with_git_lossy(true);
1183        assert!(
1184            !commit_is_byte_faithful(&lossy),
1185            "a state carrying the canonical git_lossy marker must NOT be \
1186             reconstructed from state, regardless of import surface"
1187        );
1188    }
1189}