Skip to main content

cli/bridge/
git_import.rs

1// SPDX-License-Identifier: Apache-2.0
2//! Import Git commits into Heddle states functionality.
3
4use std::{collections::HashSet, path::Path};
5
6use chrono::{TimeZone, Utc};
7use objects::object::{Agent, Attribution, ChangeId, Principal, State, Status};
8use repo::Repository as HeddleRepository;
9use tracing::warn;
10
11pub use super::git_import_tree::{GitTreeImporter, import_git_tree};
12use crate::bridge::{
13    git_core::{
14        GitBridge, GitBridgeError, GitResult, RefNamespace, RefUpdate, SyncMapping,
15        apply_ref_updates, copy_reachable_objects, git_err, open_repo,
16        thread_is_unclaimed_bootstrap,
17    },
18    git_notes,
19    git_util::{ImportStats, PartialMirrorRef, SkippedRef},
20};
21
22/// One source ref the import will consider, with both its immediate target
23/// (the OID stored on disk for that ref — for annotated tags this is the
24/// tag *object* OID) and the peeled commit OID we use to walk ancestry.
25///
26/// Keeping both is what lets the bridge round-trip annotated tags as actual
27/// tag objects: we copy the tag object into the mirror and write the
28/// mirror's ref pointing at it, and later `sync_marker_to_tag`'s
29/// already-exists check sees the existing ref peel to the right commit and
30/// preserves the annotated form unchanged.
31struct RefPlan {
32    short_name: String,
33    namespace: RefNamespace,
34    /// The OID the source ref points at directly. For lightweight tags
35    /// and branches this is a commit; for annotated tags it's a tag
36    /// object that wraps a commit.
37    immediate_oid: gix::hash::ObjectId,
38    /// The commit reachable by peeling `immediate_oid` through any tag
39    /// chain. Used as a tip for ancestry walking.
40    peeled_commit_oid: gix::hash::ObjectId,
41}
42
43/// Peel `reference` to its final OID and confirm the OID is a commit. If
44/// it's a blob (e.g. `git/git`'s `refs/tags/junio-gpg-pub` pointing at a
45/// GPG public key), a tree (e.g. `git-lfs`'s `refs/tags/core-gpg-keys`),
46/// or anything else, return `Ok(None)`. The caller is expected to log
47/// and record the skip via `SkippedRef`.
48///
49/// Heddle's marker model currently requires the target to be a commit;
50/// the long-term fix is a `MarkerTarget::NonCommitRef { peeled_oid,
51/// peeled_kind }` variant that round-trips losslessly. Until that lands,
52/// this guard prevents the import from crashing on the very common
53/// "tag-points-at-non-commit-blob" pattern in mature OSS repos.
54fn peel_to_commit_oid(
55    repo: &gix::Repository,
56    reference: &mut gix::Reference,
57) -> GitResult<Result<gix::hash::ObjectId, gix::objs::Kind>> {
58    let oid = reference.peel_to_id().map_err(git_err)?.detach();
59    let object = repo.find_object(oid).map_err(git_err)?;
60    if object.kind == gix::objs::Kind::Commit {
61        Ok(Ok(oid))
62    } else {
63        Ok(Err(object.kind))
64    }
65}
66
67fn remote_tracking_ref_suggestions(
68    repo: &gix::Repository,
69    missing: &[String],
70) -> GitResult<Vec<String>> {
71    let missing = missing.iter().map(String::as_str).collect::<HashSet<_>>();
72    let mut suggestions = Vec::new();
73
74    for reference in repo
75        .references()
76        .map_err(git_err)?
77        .prefixed("refs/remotes/")
78        .map_err(git_err)?
79    {
80        let mut reference = reference.map_err(git_err)?;
81        let Some(_) = reference.target().try_id() else {
82            continue;
83        };
84        let short = reference.name().shorten().to_string();
85        if short.ends_with("/HEAD") {
86            continue;
87        }
88        if peel_to_commit_oid(repo, &mut reference)?.is_err() {
89            continue;
90        }
91        let Some((_remote, branch)) = short.split_once('/') else {
92            continue;
93        };
94        if missing.contains(branch) {
95            suggestions.push(format!(
96                "Remote-tracking branch '{short}' exists. Import it with `heddle bridge git import --ref {short}`. If you want a local branch with the shorter name later, create it in Heddle and sync it back through `heddle push`."
97            ));
98        }
99    }
100
101    suggestions.sort();
102    suggestions.dedup();
103    Ok(suggestions)
104}
105
106/// Resolve a heddle change_id for a git commit. Tried in order:
107///   1. **Sidecar mapping** (already loaded into `mapping`): if the git_oid
108///      is already known, reuse the change_id without scanning anything.
109///   2. **`refs/notes/heddle`**: if a note attached to this commit carries
110///      a change_id, adopt it. This is how identity survives a fresh
111///      `git clone` of a heddle-exported repo.
112///   3. **Legacy `Heddle-Change-Id:` trailer**: kept for backward
113///      compatibility with commits exported by pre-Phase-B builds.
114///   4. **Deterministic from git SHA**: the original heddle behavior —
115///      take the first 16 bytes of the git SHA. Two heddle repos that
116///      independently import the same git commit get the same change_id,
117///      which is what we want.
118fn resolve_identity(
119    mapping: &SyncMapping,
120    repo: &gix::Repository,
121    git_oid: gix::hash::ObjectId,
122    trailers: &std::collections::HashMap<String, String>,
123) -> GitResult<(ChangeId, Option<git_notes::HeddleNote>)> {
124    if let Some(existing) = mapping.get_heddle(git_oid) {
125        return Ok((existing, None));
126    }
127    if let Some(note) = git_notes::read_note(repo, git_oid)? {
128        let change_id = ChangeId::parse(&note.change_id)?;
129        return Ok((change_id, Some(note)));
130    }
131    if let Some(id_str) = trailers.get(GitBridge::TRAILER_CHANGE_ID) {
132        return Ok((ChangeId::parse(id_str)?, None));
133    }
134    let oid_hex = git_oid.to_hex_with_len(40).to_string();
135    let bytes = hex::decode(&oid_hex[..32])
136        .map_err(|err| GitBridgeError::InvalidMapping(err.to_string()))?;
137    let mut change_id_bytes = [0u8; 16];
138    change_id_bytes.copy_from_slice(&bytes);
139    Ok((ChangeId::from_bytes(change_id_bytes), None))
140}
141
142/// Import a single Git commit as a Heddle state.
143pub fn import_commit(
144    mapping: &mut SyncMapping,
145    heddle_repo: &HeddleRepository,
146    repo: &gix::Repository,
147    tree_importer: &mut GitTreeImporter<'_>,
148    git_oid: gix::hash::ObjectId,
149) -> GitResult<ChangeId> {
150    let commit = repo.find_commit(git_oid).map_err(git_err)?;
151    let message = commit.message_raw_sloppy().to_string();
152    let author = commit.author().map_err(git_err)?;
153    let author_name = author.name.to_string();
154    let author_email = author.email.to_string();
155    let timestamp = author.time().map_err(git_err)?.seconds;
156    let tree_id = commit.tree_id().map_err(git_err)?.detach();
157    let parent_git_oids: Vec<gix::hash::ObjectId> =
158        commit.parent_ids().map(|id| id.detach()).collect();
159
160    let trailers = GitBridge::parse_trailers(&message);
161    let (change_id, note) = resolve_identity(mapping, repo, git_oid, &trailers)?;
162
163    let parent_oids: Vec<ChangeId> = parent_git_oids
164        .iter()
165        .map(|parent_oid| {
166            mapping
167                .get_heddle(*parent_oid)
168                .ok_or_else(|| GitBridgeError::CommitNotFound(parent_oid.to_string()))
169        })
170        .collect::<GitResult<Vec<_>>>()?;
171
172    let tree_hash = tree_importer.import_tree(tree_id)?;
173
174    let principal = Principal::new(author_name, author_email);
175
176    // Agent / confidence / status: prefer the note (Phase-B-and-later format)
177    // and fall back to legacy trailers for pre-Phase-B history.
178    let agent = note
179        .as_ref()
180        .and_then(|n| n.agent.as_ref())
181        .map(|a| Agent::new(a.provider.clone(), a.model.clone()))
182        .or_else(|| {
183            trailers
184                .get(GitBridge::TRAILER_AGENT)
185                .and_then(|agent_str| {
186                    let parts: Vec<&str> = agent_str.split('/').collect();
187                    if parts.len() == 2 {
188                        Some(Agent::new(parts[0], parts[1]))
189                    } else {
190                        None
191                    }
192                })
193        });
194
195    let attribution = if let Some(agent) = agent {
196        Attribution::with_agent(principal, agent)
197    } else {
198        Attribution::human(principal)
199    };
200
201    let intent = GitBridge::extract_intent(&message);
202    let confidence = note.as_ref().and_then(|n| n.confidence).or_else(|| {
203        trailers
204            .get(GitBridge::TRAILER_CONFIDENCE)
205            .and_then(|c| c.parse::<f32>().ok())
206            .map(|c| c.clamp(0.0, 1.0))
207    });
208    let status = note
209        .as_ref()
210        .map(|n| match n.status.as_str() {
211            "published" => Status::Published,
212            _ => Status::Draft,
213        })
214        .or_else(|| {
215            trailers
216                .get(GitBridge::TRAILER_STATUS)
217                .map(|s| match s.as_str() {
218                    "published" => Status::Published,
219                    _ => Status::Draft,
220                })
221        })
222        .unwrap_or(Status::Draft);
223
224    let created_at = Utc.timestamp_opt(timestamp, 0).single().ok_or_else(|| {
225        GitBridgeError::InvalidMapping(format!("invalid Git timestamp: {}", timestamp))
226    })?;
227
228    let state = State::new(tree_hash, parent_oids, attribution)
229        .with_change_id(change_id)
230        .with_intent(intent.unwrap_or_else(|| "Imported from Git".to_string()))
231        .with_timestamp(created_at)
232        .with_status(status);
233
234    let state = if let Some(c) = confidence {
235        state.with_confidence(c)
236    } else {
237        state
238    };
239
240    heddle_repo.store().put_state(&state)?;
241
242    Ok(change_id)
243}
244
245/// Import Git commits into Heddle states.
246pub fn import_all(bridge: &mut GitBridge, git_path: Option<&Path>) -> GitResult<ImportStats> {
247    import_with_ref_filter(bridge, git_path, None)
248}
249
250pub fn import_selected_refs(
251    bridge: &mut GitBridge,
252    git_path: Option<&Path>,
253    refs: &[String],
254) -> GitResult<ImportStats> {
255    let wanted = refs.iter().cloned().collect::<HashSet<_>>();
256    import_with_ref_filter(bridge, git_path, Some(&wanted))
257}
258
259fn import_with_ref_filter(
260    bridge: &mut GitBridge,
261    git_path: Option<&Path>,
262    wanted_refs: Option<&HashSet<String>>,
263) -> GitResult<ImportStats> {
264    let repo = if let Some(path) = git_path {
265        open_repo(path)?
266    } else {
267        bridge.open_git_repo()?
268    };
269
270    let mut stats = ImportStats::default();
271    let mut plans: Vec<RefPlan> = Vec::new();
272
273    // Build per-ref plans for branches and tags. Each plan captures the
274    // immediate target (annotated-tag-aware) and the peeled commit (for
275    // ancestry walking). Non-commit-pointing refs are recorded in
276    // `skipped_non_commit_refs` and excluded from the plan list.
277    for reference in repo
278        .references()
279        .map_err(git_err)?
280        .local_branches()
281        .map_err(git_err)?
282    {
283        let mut reference = reference.map_err(git_err)?;
284        let short = reference.name().shorten().to_string();
285        if wanted_refs.is_some_and(|wanted| !wanted.contains(&short)) {
286            continue;
287        }
288        let immediate = match reference.target().try_id() {
289            Some(id) => id.to_owned(),
290            None => continue, // symbolic ref (e.g. HEAD) — not a real ref to import
291        };
292        match peel_to_commit_oid(&repo, &mut reference)? {
293            Ok(commit_oid) => plans.push(RefPlan {
294                short_name: short,
295                namespace: RefNamespace::Branch,
296                immediate_oid: immediate,
297                peeled_commit_oid: commit_oid,
298            }),
299            Err(kind) => {
300                // A *branch* pointing at a non-commit is exceedingly rare
301                // and strongly suggests upstream corruption. Record + skip.
302                warn!(
303                    "skipping local branch {} -> {} (not a commit, kind={:?})",
304                    short, immediate, kind
305                );
306                stats.skipped_non_commit_refs.push(SkippedRef {
307                    name: format!("refs/heads/{short}"),
308                    peeled_oid: immediate.to_string(),
309                    peeled_kind: format!("{kind:?}"),
310                });
311            }
312        }
313    }
314    if wanted_refs.is_some() {
315        for reference in repo
316            .references()
317            .map_err(git_err)?
318            .prefixed("refs/remotes/")
319            .map_err(git_err)?
320        {
321            let mut reference = reference.map_err(git_err)?;
322            let short = reference.name().shorten().to_string();
323            if short.ends_with("/HEAD") {
324                continue;
325            }
326            if wanted_refs.is_some_and(|wanted| !wanted.contains(&short)) {
327                continue;
328            }
329            let immediate = match reference.target().try_id() {
330                Some(id) => id.to_owned(),
331                None => continue,
332            };
333            match peel_to_commit_oid(&repo, &mut reference)? {
334                Ok(commit_oid) => plans.push(RefPlan {
335                    short_name: short,
336                    namespace: RefNamespace::Branch,
337                    immediate_oid: immediate,
338                    peeled_commit_oid: commit_oid,
339                }),
340                Err(kind) => {
341                    warn!(
342                        "skipping remote-tracking branch {} -> {} (not a commit, kind={:?})",
343                        short, immediate, kind
344                    );
345                    stats.skipped_non_commit_refs.push(SkippedRef {
346                        name: format!("refs/remotes/{short}"),
347                        peeled_oid: immediate.to_string(),
348                        peeled_kind: format!("{kind:?}"),
349                    });
350                }
351            }
352        }
353    }
354    for reference in repo
355        .references()
356        .map_err(git_err)?
357        .tags()
358        .map_err(git_err)?
359    {
360        let mut reference = reference.map_err(git_err)?;
361        let short = reference.name().shorten().to_string();
362        if wanted_refs.is_some_and(|wanted| !wanted.contains(&short)) {
363            continue;
364        }
365        let immediate = match reference.target().try_id() {
366            Some(id) => id.to_owned(),
367            None => continue,
368        };
369        match peel_to_commit_oid(&repo, &mut reference)? {
370            Ok(commit_oid) => plans.push(RefPlan {
371                short_name: short,
372                namespace: RefNamespace::Tag,
373                immediate_oid: immediate,
374                peeled_commit_oid: commit_oid,
375            }),
376            Err(kind) => {
377                // A tag pointing at a non-commit IS a real-world pattern
378                // (junio-gpg-pub, core-gpg-keys, etc.). Skip with a
379                // record so we don't lose track that this ref existed
380                // upstream.
381                warn!(
382                    "skipping tag {} -> {} (not a commit, kind={:?}); \
383                     non-commit-pointing tags are not yet representable in heddle's \
384                     marker model",
385                    short, immediate, kind
386                );
387                stats.skipped_non_commit_refs.push(SkippedRef {
388                    name: format!("refs/tags/{short}"),
389                    peeled_oid: immediate.to_string(),
390                    peeled_kind: format!("{kind:?}"),
391                });
392            }
393        }
394    }
395
396    if let Some(wanted_refs) = wanted_refs {
397        let planned = plans
398            .iter()
399            .map(|plan| plan.short_name.clone())
400            .collect::<HashSet<_>>();
401        let mut missing = wanted_refs
402            .iter()
403            .filter(|name| !planned.contains(*name))
404            .cloned()
405            .collect::<Vec<_>>();
406        missing.sort();
407        if !missing.is_empty() {
408            let mut message = format!(
409                "requested ref(s) not found or not commit-pointing: {}",
410                missing.join(", ")
411            );
412            let suggestions = remote_tracking_ref_suggestions(&repo, &missing)?;
413            if !suggestions.is_empty() {
414                message.push_str("\n\n");
415                message.push_str(&suggestions.join("\n"));
416            }
417            return Err(GitBridgeError::CommitNotFound(message));
418        }
419    }
420
421    // Populate the bridge mirror with the source's reachable objects AND
422    // its refs verbatim (when we're importing from an external path
423    // rather than the mirror itself).
424    //
425    // Mirror population enables two things downstream:
426    //   1. **SHA-stable export**: `bridge export --destination Y`
427    //      copies the original commit bytes verbatim from the mirror,
428    //      so destination commits keep their original SHAs.
429    //   2. **Annotated tag preservation**: writing the source ref into
430    //      the mirror at its IMMEDIATE target (the tag object OID, not
431    //      the peeled commit) makes the existing-ref check in
432    //      `sync_marker_to_tag` skip the rewrite — leaving the
433    //      annotated tag intact through to the destination push.
434    //
435    // We do this **per ref** rather than as a single bulk copy. A ref
436    // whose ancestry references a missing object (a known failure mode
437    // in real-world repos like git-lfs, where pack data carries dangling
438    // references that `git fsck` doesn't catch) doesn't poison the rest
439    // of the mirror — only that one ref loses SHA stability.
440    if git_path.is_some() {
441        bridge.init_mirror()?;
442        let mirror_repo = bridge.open_git_repo()?;
443        if mirror_repo.git_dir() != repo.git_dir() {
444            let mut successful_updates: Vec<RefUpdate> = Vec::new();
445            for plan in &plans {
446                // Roots include both the immediate target (tag object for
447                // annotated tags) and the peeled commit (so the walker
448                // descends through commit→tree→blob even when the
449                // immediate object is a tag).
450                let roots = [plan.immediate_oid, plan.peeled_commit_oid];
451                match copy_reachable_objects(&repo, &mirror_repo, roots) {
452                    Ok(()) => successful_updates.push(RefUpdate {
453                        name: plan.short_name.clone(),
454                        target: plan.immediate_oid,
455                        namespace: plan.namespace,
456                    }),
457                    Err(err) => {
458                        let full = match plan.namespace {
459                            RefNamespace::Branch => format!("refs/heads/{}", plan.short_name),
460                            RefNamespace::Tag => format!("refs/tags/{}", plan.short_name),
461                            RefNamespace::Note => format!("refs/notes/{}", plan.short_name),
462                        };
463                        warn!(
464                            "partial mirror for {} (target {}): {}; \
465                             SHA-stable export degraded for commits reachable only \
466                             from this ref",
467                            full, plan.immediate_oid, err
468                        );
469                        stats.partial_mirror_refs.push(PartialMirrorRef {
470                            name: full,
471                            error: err.to_string(),
472                        });
473                    }
474                }
475            }
476            // Write source refs into the mirror. For annotated tags this
477            // points refs/tags/<name> at the tag object (not the peeled
478            // commit), which is what preserves the annotated form across
479            // export.
480            apply_ref_updates(
481                &mirror_repo,
482                &successful_updates,
483                "heddle: import refs from source",
484            )?;
485        }
486    }
487
488    bridge.build_existing_mapping(Some(repo.path()))?;
489
490    let mut tree_importer = GitTreeImporter::new(bridge.heddle_repo, &repo);
491    bridge.heddle_repo.store().begin_snapshot_write_batch()?;
492    let import_result = (|| -> GitResult<()> {
493        let mut visiting = HashSet::new();
494        let mut imported = HashSet::new();
495        for plan in &plans {
496            let tip = plan.peeled_commit_oid;
497            import_commit_ancestry(
498                bridge,
499                &repo,
500                &mut tree_importer,
501                tip,
502                &mut visiting,
503                &mut imported,
504                &mut stats,
505            )?;
506        }
507        Ok(())
508    })();
509    match import_result {
510        Ok(()) => {
511            bridge.write_mapping_tmp_to_disk()?;
512            bridge.heddle_repo.store().flush_snapshot_write_batch()?;
513            bridge.commit_mapping_tmp_to_disk()?;
514        }
515        Err(error) => {
516            bridge.heddle_repo.store().abort_snapshot_write_batch();
517            return Err(error);
518        }
519    }
520
521    for plan in plans
522        .iter()
523        .filter(|plan| plan.namespace == RefNamespace::Branch)
524    {
525        let name = &plan.short_name;
526        if wanted_refs.is_some_and(|wanted| !wanted.contains(name.as_str())) {
527            continue;
528        }
529        if let Some(change_id) = bridge.mapping.get_heddle(plan.peeled_commit_oid) {
530            if let Some(existing) = bridge.heddle_repo.refs().get_thread(name.as_str())?
531                && !thread_can_adopt_change(bridge.heddle_repo, &existing, &change_id)?
532            {
533                return Err(GitBridgeError::Conflict(format!(
534                    "thread {} at {} differs from branch {} at {}. \
535                     To recover, switch to '{}' and run `heddle sync` after \
536                     resolving the divergent history, or explicitly reset the \
537                     Heddle thread if the Git branch should replace it.",
538                    name, existing, name, change_id, name
539                )));
540            }
541
542            bridge
543                .heddle_repo
544                .refs()
545                .set_thread(name.as_str(), &change_id)
546                .map_err(|e| {
547                    GitBridgeError::InvalidMapping(format!(
548                        "set_thread failed for '{}': {}",
549                        name, e
550                    ))
551                })?;
552            stats.branches_synced += 1;
553        }
554    }
555
556    for tag in repo
557        .references()
558        .map_err(git_err)?
559        .tags()
560        .map_err(git_err)?
561    {
562        let mut tag = tag.map_err(git_err)?;
563        let name = tag.name().shorten().to_string();
564        if wanted_refs.is_some_and(|wanted| !wanted.contains(&name)) {
565            continue;
566        }
567        // Skip non-commit-pointing tags here too; the tips loop already
568        // recorded them in `skipped_non_commit_refs`.
569        let oid = match peel_to_commit_oid(&repo, &mut tag)? {
570            Ok(oid) => oid,
571            Err(_) => continue,
572        };
573        if let Some(change_id) = bridge.mapping.get_heddle(oid) {
574            if let Ok(Some(existing)) = bridge.heddle_repo.refs().get_marker(&name)
575                && existing != change_id
576            {
577                return Err(GitBridgeError::Conflict(format!(
578                    "marker {} at {} differs from tag {} at {}",
579                    name, existing, name, change_id
580                )));
581            }
582
583            if let Err(e) = bridge.heddle_repo.refs().create_marker(&name, &change_id) {
584                warn!(
585                    "Failed to create marker '{}' during git import: {}",
586                    name, e
587                );
588            }
589            stats.tags_synced += 1;
590        }
591    }
592
593    Ok(stats)
594}
595
596pub(crate) fn thread_can_adopt_change(
597    heddle_repo: &HeddleRepository,
598    existing: &ChangeId,
599    change_id: &ChangeId,
600) -> GitResult<bool> {
601    if existing == change_id {
602        return Ok(true);
603    }
604    if thread_is_unclaimed_bootstrap(heddle_repo, existing)? {
605        return Ok(true);
606    }
607    proto::is_ancestor(heddle_repo.store(), *existing, *change_id)
608        .map_err(|err| GitBridgeError::InvalidMapping(err.to_string()))
609}
610
611/// Phase work for the iterative ancestry walker.
612///
613/// `Enter(oid)` schedules a commit for visit: discover its parents and
614/// queue them. `Emit(oid)` finalizes a commit: import it as a heddle
615/// state once all its parents have already been emitted.
616///
617/// We separate the phases because we need post-order traversal (parents
618/// before children), and a single-marker stack can't express "I've
619/// queued this commit's parents but haven't emitted the commit itself
620/// yet" without keeping per-node state outside the stack.
621enum WalkPhase {
622    Enter(gix::hash::ObjectId),
623    Emit(gix::hash::ObjectId),
624}
625
626/// Iterative ancestry walk — post-order DFS using an explicit stack
627/// instead of recursion.
628///
629/// **Why this matters:** the previous version recursed once per parent
630/// hop, so the call stack grew as deep as the longest chain in the
631/// commit DAG. On `git/git` (84k commits) this overflowed the main
632/// thread's 8MB stack after ~1 second and aborted with SIGABRT before
633/// any state was written. With the explicit stack we're bounded only by
634/// heap memory, which scales with the DAG's total node count rather
635/// than its depth.
636///
637/// Behavior is otherwise unchanged: parents are processed before their
638/// children, already-imported nodes are skipped, and re-entering a node
639/// that's still in flight (a merge with two paths to the same ancestor)
640/// is a no-op.
641fn import_commit_ancestry(
642    bridge: &mut GitBridge<'_>,
643    repo: &gix::Repository,
644    tree_importer: &mut GitTreeImporter<'_>,
645    git_oid: gix::hash::ObjectId,
646    visiting: &mut HashSet<gix::hash::ObjectId>,
647    imported: &mut HashSet<gix::hash::ObjectId>,
648    stats: &mut ImportStats,
649) -> GitResult<()> {
650    let mut stack: Vec<WalkPhase> = vec![WalkPhase::Enter(git_oid)];
651
652    while let Some(phase) = stack.pop() {
653        match phase {
654            WalkPhase::Enter(oid) => {
655                // Skip only if we've fully processed this OID earlier in
656                // the same walk. We deliberately do NOT skip on
657                // `mapping.has_git(oid)` here — even when the mapping
658                // already knows the change_id (e.g. recovered from
659                // refs/notes/heddle on a fresh re-import of an exported
660                // repo), the heddle state for this commit may not yet
661                // exist in the store. Letting the walk continue ensures
662                // `import_commit` runs and writes the state.
663                if imported.contains(&oid) {
664                    continue;
665                }
666                if !visiting.insert(oid) {
667                    // Already in flight via another merge path — its Emit
668                    // is already scheduled, no need to re-queue.
669                    continue;
670                }
671
672                let commit = repo.find_commit(oid).map_err(git_err)?;
673                let parent_git_oids: Vec<gix::hash::ObjectId> =
674                    commit.parent_ids().map(|id| id.detach()).collect();
675
676                // Schedule emit AFTER all parents are processed. Stack is
677                // LIFO so the Emit goes on first; then parents on top of
678                // it pop first. Reverse so the original parent order is
679                // preserved.
680                stack.push(WalkPhase::Emit(oid));
681                for parent_oid in parent_git_oids.into_iter().rev() {
682                    stack.push(WalkPhase::Enter(parent_oid));
683                }
684            }
685            WalkPhase::Emit(oid) => {
686                // Decide whether to call import_commit by checking the
687                // *store*, not the mapping: the mapping can carry an
688                // entry recovered from a note that has no matching state
689                // object yet. `import_commit` is idempotent — if the
690                // change_id (from mapping or trailer or derived) already
691                // has a state in the store, `put_state` overwrites it
692                // with identical bytes.
693                let existing_change_id = bridge.mapping.get_heddle(oid);
694                let needs_state = match existing_change_id {
695                    Some(cid) => bridge.heddle_repo.store().get_state(&cid)?.is_none(),
696                    None => true,
697                };
698                if needs_state {
699                    let change_id = import_commit(
700                        &mut bridge.mapping,
701                        bridge.heddle_repo,
702                        repo,
703                        tree_importer,
704                        oid,
705                    )?;
706                    bridge.mapping.insert(change_id, oid);
707                    stats.commits_imported += 1;
708                    stats.states_created += 1;
709                }
710                visiting.remove(&oid);
711                imported.insert(oid);
712            }
713        }
714    }
715
716    Ok(())
717}