Skip to main content

cli/bridge/
git_mapping.rs

1// SPDX-License-Identifier: Apache-2.0
2//! Persistence and discovery for Git bridge mappings.
3
4use std::{
5    collections::HashSet,
6    fs::{self, File},
7    io::Write,
8    path::{Path, PathBuf},
9};
10
11use objects::{object::ChangeId, store::ObjectStore};
12use serde::{Deserialize, Serialize};
13use sley::{ObjectFormat, ObjectId as SleyObjectId, ReferenceTarget, Repository as SleyRepository};
14
15use super::git_core::{GitBridge, GitBridgeError, GitResult, SyncMapping, git_err};
16
17#[derive(Debug, Serialize, Deserialize)]
18struct MappingEntry {
19    change_id: String,
20    git_oid: String,
21}
22
23#[derive(Debug, Serialize, Deserialize, Default)]
24struct MappingFile {
25    entries: Vec<MappingEntry>,
26}
27
28#[derive(Debug, Default)]
29struct GitIdentityIndex {
30    mapping: SyncMapping,
31}
32
33impl GitIdentityIndex {
34    fn from_notes(repo: &SleyRepository) -> GitResult<Self> {
35        let mut index = Self::default();
36        for (change_id, git_oid) in super::git_notes::read_identity_mappings(repo)? {
37            index.mapping.insert_checked(change_id, git_oid)?;
38        }
39        Ok(index)
40    }
41
42    fn fill_gaps_from_cache(&mut self, cache: &SyncMapping) {
43        for (change_id, git_oid) in cache.iter() {
44            if self.mapping.get_git(change_id) == Some(*git_oid) {
45                continue;
46            }
47            if self.mapping.has_heddle(change_id) || self.mapping.has_git(*git_oid) {
48                continue;
49            }
50            self.mapping.insert(*change_id, *git_oid);
51        }
52    }
53
54    fn into_mapping(self) -> SyncMapping {
55        self.mapping
56    }
57}
58
59impl<'a> GitBridge<'a> {
60    pub(crate) fn mapping_path(&self) -> PathBuf {
61        self.heddle_repo
62            .heddle_dir()
63            .join("git-bridge")
64            .join("bridge-mapping.json")
65    }
66
67    pub(crate) fn mapping_tmp_path(&self) -> PathBuf {
68        self.mapping_path().with_extension("json.tmp")
69    }
70
71    fn read_mapping_cache_from_disk(&self) -> GitResult<SyncMapping> {
72        self.recover_mapping_tmp()?;
73        let path = self.mapping_path();
74        if !path.exists() {
75            return Ok(SyncMapping::new());
76        }
77
78        let data = fs::read_to_string(&path)?;
79        let file: MappingFile = serde_json::from_str(&data)
80            .map_err(|err| GitBridgeError::InvalidMapping(err.to_string()))?;
81
82        let mut mapping = SyncMapping::new();
83        for entry in file.entries {
84            let change_id = ChangeId::parse(&entry.change_id)?;
85            let git_oid = parse_stored_git_oid(&entry.git_oid)?;
86            mapping.insert_checked(change_id, git_oid)?;
87        }
88
89        Ok(mapping)
90    }
91
92    fn recover_mapping_tmp(&self) -> GitResult<()> {
93        let path = self.mapping_path();
94        let tmp_path = self.mapping_tmp_path();
95        if !tmp_path.exists() {
96            return Ok(());
97        }
98        if !path.exists() {
99            fs::rename(&tmp_path, &path)?;
100        } else {
101            fs::remove_file(&tmp_path)?;
102        }
103        Ok(())
104    }
105
106    fn mapping_bytes(mapping: &SyncMapping) -> GitResult<Vec<u8>> {
107        let entries = mapping
108            .iter()
109            .map(|(change_id, git_oid)| MappingEntry {
110                change_id: change_id.to_string_full(),
111                git_oid: git_oid.to_string(),
112            })
113            .collect();
114
115        let file = MappingFile { entries };
116        serde_json::to_vec_pretty(&file)
117            .map_err(|err| GitBridgeError::InvalidMapping(err.to_string()))
118    }
119
120    pub(crate) fn write_mapping_tmp_to_disk(&self) -> GitResult<PathBuf> {
121        self.write_mapping_tmp_value_to_disk(&self.mapping)
122    }
123
124    fn write_mapping_tmp_value_to_disk(&self, mapping: &SyncMapping) -> GitResult<PathBuf> {
125        let path = self.mapping_path();
126        let tmp_path = self.mapping_tmp_path();
127        if let Some(parent) = path.parent() {
128            fs::create_dir_all(parent)?;
129            let parent_file = File::open(parent)?;
130            parent_file.sync_all()?;
131        }
132
133        let data = Self::mapping_bytes(mapping)?;
134        let mut file = File::create(&tmp_path)?;
135        file.write_all(&data)?;
136        file.sync_all()?;
137        Ok(tmp_path)
138    }
139
140    pub(crate) fn commit_mapping_tmp_to_disk(&self) -> GitResult<()> {
141        let path = self.mapping_path();
142        let tmp_path = self.mapping_tmp_path();
143        if !tmp_path.exists() {
144            return Err(GitBridgeError::InvalidMapping(format!(
145                "mapping temp file is missing: {}",
146                tmp_path.display()
147            )));
148        }
149        fs::rename(&tmp_path, &path)?;
150        if let Some(parent) = path.parent() {
151            let parent_file = File::open(parent)?;
152            parent_file.sync_all()?;
153        }
154        Ok(())
155    }
156
157    pub(crate) fn save_mapping_to_disk(&self) -> GitResult<()> {
158        self.write_mapping_tmp_to_disk()?;
159        // Fault-injection checkpoint: a crash here leaves the
160        // sidecar in tmp form (`bridge-mapping.json.tmp`) without a
161        // committed `bridge-mapping.json`. The next mapping-cache read
162        // atomically renames the tmp into place. Tested by
163        // `bridge_recovers_from_crash_after_tmp_before_commit`.
164        objects::fault_inject::maybe_panic_at("mapping_after_tmp_before_commit");
165        self.commit_mapping_tmp_to_disk()
166    }
167
168    /// Build the export identity mapping from portable metadata and the served
169    /// bridge cache. `refs/notes/heddle` is authoritative because it travels
170    /// with Git history; `bridge-mapping.json` is the local served/export cache
171    /// after visibility filtering. Ingest identity lives separately at
172    /// `.heddle/ingest/sha_map.sqlite` and is intentionally not folded in here.
173    pub(crate) fn build_existing_mapping(&mut self, git_repo_path: Option<&Path>) -> GitResult<()> {
174        let repo = match git_repo_path {
175            Some(path) => super::git_core::open_repo(path)?,
176            None => self.open_git_repo()?,
177        };
178
179        let cache = self.read_mapping_cache_from_disk()?;
180        let live_cache = self.mapping.clone();
181        let mut index = GitIdentityIndex::from_notes(&repo)?;
182        index.fill_gaps_from_cache(&live_cache);
183        index.fill_gaps_from_cache(&cache);
184        self.mapping = index.into_mapping();
185        Ok(())
186    }
187
188    pub(crate) fn seed_ingest_identity_mappings_from_mirror(
189        &mut self,
190        repo: &SleyRepository,
191    ) -> GitResult<()> {
192        let ingest = self.heddle_repo.git_overlay_ingest_commit_mapping()?;
193        for (git_sha, change_id) in ingest {
194            let change_id = ChangeId::parse(&change_id)?;
195            if self.heddle_repo.store().get_state(&change_id)?.is_none() {
196                continue;
197            }
198            if self.mapping.has_heddle(&change_id) {
199                continue;
200            }
201            let git_oid = parse_stored_git_oid(&git_sha)?;
202            if self.mapping.has_git(git_oid) || repo.read_object(&git_oid).is_err() {
203                continue;
204            }
205            self.mapping.insert(change_id, git_oid);
206        }
207        Ok(())
208    }
209
210    #[cfg_attr(not(feature = "git-overlay"), allow(dead_code))]
211    pub(crate) fn prune_unreachable_mapping_entries(&mut self) -> GitResult<usize> {
212        let repo = self.open_git_repo()?;
213        self.mapping = self.read_mapping_cache_from_disk()?;
214        let reachable: HashSet<_> = collect_commit_oids(&repo)?.into_iter().collect();
215        let removed = self.mapping.retain_git_object_set(&reachable);
216        if removed > 0 {
217            self.save_mapping_to_disk()?;
218        }
219        Ok(removed)
220    }
221
222    /// Consolidate the git-overlay MIRROR (`.heddle/git`) — heddle's canonical
223    /// git object store, a bare sley repo — by packing every on-disk object into
224    /// a single pack and dropping the now-redundant loose copies.
225    ///
226    /// The mirror accumulates one loose object per minted/imported commit, tree,
227    /// and blob (thousands on a real clone). Loose-object reads dominate the
228    /// uninstrumented cost of `git_overlay_worktree_status`, which every read
229    /// command (status/diff/verify) and write command (capture/commit) pays.
230    /// `heddle maintenance gc` already consolidates heddle's NATIVE store; this
231    /// brings the mirror to parity.
232    ///
233    /// Correctness: this uses [`repack_all_objects`], which gathers EVERY object
234    /// on disk (every loose object and every pack), not the reachability closure
235    /// of any ref set. That matters because the mirror holds more than the
236    /// current checkout — every thread's `refs/heads/*`, markers, `refs/notes/heddle`,
237    /// and the served-frontier record — AND because some lossy/non-UTF8 imports'
238    /// verbatim bytes live ONLY in the mirror and cannot be re-minted from heddle
239    /// state (see `git_export.rs` `commit_is_byte_faithful`). Packing everything
240    /// on disk preserves all of them and is content-addressed, so OIDs are
241    /// byte-for-byte unchanged. The prune only drops loose objects whose canonical
242    /// copy is now in the new pack, so it is lossless and fsck stays clean.
243    /// Idempotent: a second run finds nothing new loose and is a no-op.
244    ///
245    /// Returns the number of loose objects consolidated into the pack (and thus
246    /// removed from disk). `Ok(0)` when the mirror has no objects to pack.
247    #[cfg_attr(not(feature = "git-overlay"), allow(dead_code))]
248    pub(crate) fn consolidate_mirror(&self) -> GitResult<usize> {
249        use sley::plumbing::sley_odb::{install_repack_result, repack_all_objects};
250
251        let repo = self.open_git_repo()?;
252        let git_dir = repo.git_dir().to_path_buf();
253        let format = repo.object_format();
254
255        let Some(result) = repack_all_objects(&git_dir, format).map_err(git_err)? else {
256            return Ok(0);
257        };
258        let pruned_loose = result.packed_loose.len();
259        // prune = true: write the new pack, then drop the loose objects and
260        // superseded packs the new pack now serves (install-before-delete; the
261        // installer validates the new pack's checksum before removing anything).
262        install_repack_result(&git_dir, format, &result, true).map_err(git_err)?;
263        Ok(pruned_loose)
264    }
265}
266
267/// Walk all branch- and tag-tipped commit ancestry. Skips refs that peel
268/// to non-commit objects (annotated-tag-points-at-blob/tree), matching the
269/// marker model's current commit-target-only constraint.
270fn collect_commit_oids(repo: &SleyRepository) -> GitResult<Vec<SleyObjectId>> {
271    let mut tips = Vec::new();
272
273    for reference in repo.references().list_refs().map_err(git_err)? {
274        if !(reference.name.starts_with("refs/heads/") || reference.name.starts_with("refs/tags/"))
275        {
276            continue;
277        }
278        let oid = match reference.target {
279            ReferenceTarget::Direct(oid) => oid,
280            ReferenceTarget::Symbolic(_) => {
281                let Some(reference) = repo.find_reference(&reference.name).map_err(git_err)? else {
282                    continue;
283                };
284                let Some(oid) = reference.peeled_oid(repo).map_err(git_err)? else {
285                    continue;
286                };
287                oid
288            }
289        };
290        if let Ok(commit_oid) = sley::plumbing::sley_rev::peel_to_commit(
291            repo.objects().as_ref(),
292            repo.object_format(),
293            &oid,
294        ) {
295            tips.push(commit_oid);
296        }
297    }
298
299    let mut seen = HashSet::new();
300    let mut stack = tips;
301    while let Some(oid) = stack.pop() {
302        if !seen.insert(oid) {
303            continue;
304        }
305        let commit = repo.read_commit(&oid).map_err(git_err)?;
306        stack.extend(commit.parents);
307    }
308
309    Ok(seen.into_iter().collect())
310}
311
312fn parse_stored_git_oid(value: &str) -> GitResult<SleyObjectId> {
313    let format = match value.len() {
314        40 => ObjectFormat::Sha1,
315        64 => ObjectFormat::Sha256,
316        _ => {
317            return Err(GitBridgeError::InvalidMapping(format!(
318                "invalid git oid length for {value}"
319            )));
320        }
321    };
322    SleyObjectId::from_hex(format, value)
323        .map_err(|err| GitBridgeError::InvalidMapping(err.to_string()))
324}