Skip to main content

mkit_git_bridge/
map.rs

1//! blake3↔sha1 mapping cache and per-remote export state
2//! (SPEC-GIT-BRIDGE §12.3).
3//!
4//! Everything here is a **disposable cache**: translation is
5//! deterministic, so a missing or corrupt file means "rebuild", never
6//! an error. The map file is append-only text (`<64hex> <40hex>\n`);
7//! [`load_map`] skips lines that do not parse (so a partially-written
8//! file still loads), and [`map_is_intact`] reports ANY malformed or
9//! blank line so the import driver can trigger the full rebuild —
10//! surviving lines of a damaged file are not evidence the rest
11//! exists. Ref state is rewritten whole via temp-file + rename.
12
13use crate::error::BridgeError;
14use crate::gitobj::{Sha1Id, sha1_from_hex, sha1_hex};
15use mkit_core::Hash;
16use mkit_core::hash::{from_hex, to_hex};
17use std::collections::HashMap;
18use std::io::Write as _;
19use std::path::{Path, PathBuf};
20
21/// `.mkit/git/<remote>/` — the per-remote bridge state directory.
22/// Remote names are restricted to the mkit ref-segment charset so the
23/// directory name is always safe.
24pub fn state_dir(mkit_dir: &Path, remote: &str) -> Result<PathBuf, BridgeError> {
25    if remote.is_empty()
26        || !remote
27            .bytes()
28            .all(|b| b.is_ascii_alphanumeric() || b == b'.' || b == b'_' || b == b'-')
29        || remote == "."
30        || remote == ".."
31    {
32        return Err(BridgeError::Source(format!(
33            "remote name {remote:?} is not a valid bridge state name"
34        )));
35    }
36    Ok(mkit_dir.join("git").join(remote))
37}
38
39const MAP_FILE: &str = "map";
40const REFS_FILE: &str = "refs";
41const IMPORT_REFS_FILE: &str = "refs-import";
42
43/// Recorded direction of a state dir (SPEC-GIT-IMPORT §6): one dir
44/// serves one direction; `fork` couples an import source with
45/// passthrough export. Immutable once stamped.
46#[derive(Debug, Clone, Copy, PartialEq, Eq)]
47pub enum Direction {
48    Import,
49    Export,
50    Fork,
51}
52
53impl Direction {
54    /// Stable on-disk / display token for this direction.
55    #[must_use]
56    pub fn as_str(self) -> &'static str {
57        match self {
58            Self::Import => "import",
59            Self::Export => "export",
60            Self::Fork => "fork",
61        }
62    }
63
64    fn parse(s: &str) -> Option<Self> {
65        Some(match s {
66            "import" => Self::Import,
67            "export" => Self::Export,
68            "fork" => Self::Fork,
69            _ => return None,
70        })
71    }
72}
73
74fn read_stamp(dir: &Path, name: &str) -> Result<Option<String>, BridgeError> {
75    match std::fs::read_to_string(dir.join(name)) {
76        Ok(v) => Ok(Some(v.trim().to_owned())),
77        Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(None),
78        Err(e) => Err(e.into()),
79    }
80}
81
82fn write_stamp(dir: &Path, name: &str, value: &str) -> Result<(), BridgeError> {
83    std::fs::create_dir_all(dir)?;
84    // Temp + content-fsync + rename + dir-fsync: stamps are bindings
85    // (direction, signer, source, …) — a torn or vanished stamp after
86    // power loss either wedges the state dir or silently unbinds it.
87    let tmp = dir.join(format!(".{name}.tmp"));
88    {
89        let mut f = std::fs::File::create(&tmp)?;
90        f.write_all(format!("{value}\n").as_bytes())?;
91        f.sync_all()?;
92    }
93    std::fs::rename(&tmp, dir.join(name))?;
94    if let Ok(d) = std::fs::File::open(dir) {
95        let _ = d.sync_all();
96    }
97    Ok(())
98}
99
100/// Durable write of a named binding file (`source`, `dest`) — same
101/// guarantees as the internal stamps.
102pub fn write_binding(dir: &Path, name: &str, value: &str) -> Result<(), BridgeError> {
103    write_stamp(dir, name, value)
104}
105
106/// Read the recorded direction, if stamped.
107pub fn read_direction(dir: &Path) -> Result<Option<Direction>, BridgeError> {
108    match read_stamp(dir, "direction")? {
109        None => Ok(None),
110        Some(v) => Direction::parse(&v).map(Some).ok_or_else(|| {
111            // A present-but-unparsable stamp must NOT read as absent:
112            // bind_direction would silently rebind a state dir whose
113            // direction the spec pins as immutable (§6).
114            BridgeError::Source(format!(
115                "direction stamp is corrupt ({v:?}); refusing to guess — \
116                 restore or remove the state dir"
117            ))
118        }),
119    }
120}
121
122/// Stamp the direction, or verify it matches an existing stamp.
123/// `Export → Fork` upgrades are refused like any other mismatch (the
124/// map semantics differ); `Import → Fork` is the supported upgrade
125/// (fork = import + passthrough export over the same source).
126pub fn bind_direction(dir: &Path, want: Direction) -> Result<(), BridgeError> {
127    match read_direction(dir)? {
128        None => write_stamp(dir, "direction", want.as_str()),
129        Some(have) if have == want => Ok(()),
130        Some(Direction::Import) if want == Direction::Fork => {
131            write_stamp(dir, "direction", want.as_str())
132        }
133        Some(have) => Err(BridgeError::Source(format!(
134            "state dir is bound to direction '{}'; '{}' is not allowed here \
135             (one direction per state dir — use a different --remote-name)",
136            have.as_str(),
137            want.as_str()
138        ))),
139    }
140}
141
142/// Read the pinned importer pubkey (64 lowercase hex), if stamped.
143pub fn read_signer(dir: &Path) -> Result<Option<[u8; 32]>, BridgeError> {
144    match read_stamp(dir, "signer")? {
145        None => Ok(None),
146        Some(v) => crate::gitobj::bytes_from_hex(&v, 32)
147            .map(|b| {
148                let mut k = [0u8; 32];
149                k.copy_from_slice(&b);
150                Some(k)
151            })
152            .ok_or_else(|| {
153                // Same rule as the direction stamp: corruption must
154                // not silently unpin the importer key (§4).
155                BridgeError::Source(
156                    "signer stamp is corrupt; refusing to re-pin — restore or \
157                     remove the state dir"
158                        .into(),
159                )
160            }),
161    }
162}
163
164/// Pin the importer key, or refuse a mismatch (SPEC-GIT-IMPORT §4).
165pub fn bind_signer(dir: &Path, key: &[u8; 32]) -> Result<(), BridgeError> {
166    match read_signer(dir)? {
167        None => write_stamp(dir, "signer", &crate::gitobj::bytes_hex(key)),
168        Some(have) if have == *key => Ok(()),
169        Some(have) => Err(BridgeError::Source(format!(
170            "this import is pinned to importer key {}…; the available key is {}…. \
171             Designated-importer model: pull this history over mkit transport from \
172             the importer, or install the pinned key (SPEC-GIT-IMPORT §4)",
173            &crate::gitobj::bytes_hex(&have)[..16],
174            &crate::gitobj::bytes_hex(key)[..16]
175        ))),
176    }
177}
178
179/// Record that this state dir's imported history contains
180/// historic-mode-normalized trees (SPEC-GIT-IMPORT §3.3). Sticky: a
181/// normalized tree cannot reproduce its original sha1, so a later
182/// import→fork upgrade must refuse (SPEC-GIT-BRIDGE §14.3 fork audit
183/// would otherwise report false tampering forever).
184pub fn mark_normalized(dir: &Path) -> Result<(), BridgeError> {
185    write_stamp(dir, "normalized", "1")
186}
187
188/// Whether [`mark_normalized`] was ever stamped.
189pub fn read_normalized(dir: &Path) -> Result<bool, BridgeError> {
190    Ok(read_stamp(dir, "normalized")?.is_some())
191}
192
193/// Read / pin the import-spec version (SPEC-GIT-IMPORT §1.2).
194pub fn bind_import_spec(dir: &Path, version: u32) -> Result<(), BridgeError> {
195    match read_stamp(dir, "import-spec")? {
196        None => write_stamp(dir, "import-spec", &version.to_string()),
197        Some(v) if v == version.to_string() => Ok(()),
198        Some(v) => Err(BridgeError::Source(format!(
199            "state recorded import-spec {v}, this build implements {version}; \
200             incremental pulls across mapping versions are refused — re-import \
201             under a new --remote-name (SPEC-GIT-IMPORT §1.2)"
202        ))),
203    }
204}
205
206/// Whether every non-empty line of the map file parses. A missing
207/// file is intact (nothing to distrust). Any malformed line —
208/// torn tail or mid-file corruption — means the cache may be
209/// MISSING entries that recorded refs rely on, so callers must
210/// rebuild rather than trust the surviving lines alone (§12.3).
211pub fn map_is_intact(dir: &Path) -> Result<bool, BridgeError> {
212    let path = dir.join(MAP_FILE);
213    let data = match std::fs::read(&path) {
214        Ok(d) => d,
215        Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(true),
216        Err(e) => return Err(e.into()),
217    };
218    let Ok(text) = std::str::from_utf8(&data) else {
219        return Ok(false);
220    };
221    for line in text.lines() {
222        if line.is_empty() {
223            // The format has no blank-line record: an internal blank
224            // is a dropped mapping, not noise.
225            return Ok(false);
226        }
227        let Some((b3, s1)) = line.split_once(' ') else {
228            return Ok(false);
229        };
230        if from_hex(b3).is_err() || sha1_from_hex(s1).is_none() {
231            return Ok(false);
232        }
233    }
234    Ok(true)
235}
236
237/// Load the map inverted (sha1 → blake3) for the import direction.
238/// Parsed directly from the file lines: translation is many-to-one
239/// (two historic-mode spellings of a tree normalize to ONE mkit
240/// tree), so inverting the blake3-keyed [`load_map`] would drop a
241/// sha1 and force a pointless re-translation every fetch.
242pub fn load_map_inverse(dir: &Path) -> Result<HashMap<Sha1Id, Hash>, BridgeError> {
243    let path = dir.join(MAP_FILE);
244    let data = match std::fs::read(&path) {
245        Ok(d) => String::from_utf8_lossy(&d).into_owned(),
246        Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(HashMap::new()),
247        Err(e) => return Err(e.into()),
248    };
249    let mut map = HashMap::new();
250    for line in data.lines() {
251        let Some((b3, s1)) = line.split_once(' ') else {
252            continue;
253        };
254        let (Ok(h), Some(id)) = (from_hex(b3), sha1_from_hex(s1)) else {
255            continue;
256        };
257        map.insert(id, h);
258    }
259    Ok(map)
260}
261
262/// Append pairs given in import orientation (sha1, blake3) — the file
263/// format stays blake3-first either way.
264pub fn append_map_import(dir: &Path, pairs: &[(Sha1Id, Hash)]) -> Result<(), BridgeError> {
265    let flipped: Vec<(Hash, Sha1Id)> = pairs.iter().map(|(s, b)| (*b, *s)).collect();
266    append_map(dir, &flipped)
267}
268
269/// Load the blake3→sha1 map. Missing file = empty map. Lines that do
270/// not parse (torn tail from a crash) are ignored.
271pub fn load_map(dir: &Path) -> Result<HashMap<Hash, Sha1Id>, BridgeError> {
272    let path = dir.join(MAP_FILE);
273    // §12.3: corruption (including undecodable bytes) means "rebuild",
274    // never an error — lossy decoding turns garbage into skipped lines.
275    let data = match std::fs::read(&path) {
276        Ok(d) => String::from_utf8_lossy(&d).into_owned(),
277        Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(HashMap::new()),
278        Err(e) => return Err(e.into()),
279    };
280    let mut map = HashMap::new();
281    for line in data.lines() {
282        let Some((b3, s1)) = line.split_once(' ') else {
283            continue;
284        };
285        let (Ok(h), Some(id)) = (from_hex(b3), sha1_from_hex(s1)) else {
286            continue;
287        };
288        map.insert(h, id);
289    }
290    Ok(map)
291}
292
293/// Append newly translated pairs. Append-only by design: entries for
294/// rewritten-away commits stay valid forever (determinism), so no
295/// compaction or invalidation exists (§12.2).
296pub fn append_map(dir: &Path, pairs: &[(Hash, Sha1Id)]) -> Result<(), BridgeError> {
297    if pairs.is_empty() {
298        return Ok(());
299    }
300    std::fs::create_dir_all(dir)?;
301    let mut out = String::new();
302    for (h, id) in pairs {
303        out.push_str(&to_hex(h));
304        out.push(' ');
305        out.push_str(&sha1_hex(id));
306        out.push('\n');
307    }
308    let mut f = std::fs::OpenOptions::new()
309        .create(true)
310        .append(true)
311        .open(dir.join(MAP_FILE))?;
312    f.write_all(out.as_bytes())?;
313    f.sync_all()?;
314    // Dir fsync so the FIRST append's file creation is as durable as
315    // the stamps' (later appends find it a no-op-cost write).
316    if let Ok(d) = std::fs::File::open(dir) {
317        let _ = d.sync_all();
318    }
319    Ok(())
320}
321
322/// Last-exported state for one ref: what the bridge last pushed.
323/// Used as the `--force-with-lease` expectation (§12.2).
324#[derive(Debug, Clone, PartialEq, Eq)]
325pub struct RefState {
326    pub ref_name: String,
327    pub mkit_hash: Hash,
328    pub git_id: Sha1Id,
329}
330
331/// Load per-ref EXPORT state (push leases). Missing file = empty.
332pub fn load_ref_state(dir: &Path) -> Result<Vec<RefState>, BridgeError> {
333    load_ref_state_file(dir, REFS_FILE)
334}
335
336/// Load per-ref IMPORT state (last-seen upstream tips). Kept separate
337/// from the export leases: in a fork-mode state dir both directions
338/// track the same ref names against different remotes.
339pub fn load_import_ref_state(dir: &Path) -> Result<Vec<RefState>, BridgeError> {
340    load_ref_state_file(dir, IMPORT_REFS_FILE)
341}
342
343fn load_ref_state_file(dir: &Path, file: &str) -> Result<Vec<RefState>, BridgeError> {
344    let path = dir.join(file);
345    let data = match std::fs::read(&path) {
346        Ok(d) => String::from_utf8_lossy(&d).into_owned(),
347        Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(Vec::new()),
348        Err(e) => return Err(e.into()),
349    };
350    let mut out = Vec::new();
351    for line in data.lines() {
352        let mut parts = line.splitn(3, ' ');
353        let (Some(name), Some(b3), Some(s1)) = (parts.next(), parts.next(), parts.next()) else {
354            continue;
355        };
356        let (Ok(h), Some(id)) = (from_hex(b3), sha1_from_hex(s1)) else {
357            continue;
358        };
359        out.push(RefState {
360            ref_name: name.to_owned(),
361            mkit_hash: h,
362            git_id: id,
363        });
364    }
365    Ok(out)
366}
367
368/// Rewrite the whole export ref-state file atomically (temp + rename).
369pub fn store_ref_state(dir: &Path, states: &[RefState]) -> Result<(), BridgeError> {
370    store_ref_state_file(dir, REFS_FILE, states)
371}
372
373/// Rewrite the import ref-state file (see [`load_import_ref_state`]).
374pub fn store_import_ref_state(dir: &Path, states: &[RefState]) -> Result<(), BridgeError> {
375    store_ref_state_file(dir, IMPORT_REFS_FILE, states)
376}
377
378fn store_ref_state_file(dir: &Path, file: &str, states: &[RefState]) -> Result<(), BridgeError> {
379    std::fs::create_dir_all(dir)?;
380    let mut out = String::new();
381    for s in states {
382        out.push_str(&s.ref_name);
383        out.push(' ');
384        out.push_str(&to_hex(&s.mkit_hash));
385        out.push(' ');
386        out.push_str(&sha1_hex(&s.git_id));
387        out.push('\n');
388    }
389    // Per-target temp name: `refs` and `refs-import` rewrites must
390    // not race each other onto one temp path (fetch + export can run
391    // concurrently against a fork state dir).
392    let tmp = dir.join(format!(".{file}.tmp"));
393    {
394        use std::io::Write as _;
395        let mut f = std::fs::File::create(&tmp)?;
396        f.write_all(out.as_bytes())?;
397        // Content fsync before rename: this file is the lease /
398        // tracking source of truth, and a durable name over torn
399        // pages would be worse than the old file.
400        f.sync_all()?;
401    }
402    std::fs::rename(&tmp, dir.join(file))?;
403    if let Ok(d) = std::fs::File::open(dir) {
404        let _ = d.sync_all();
405    }
406    Ok(())
407}
408
409#[cfg(test)]
410mod tests {
411    use super::*;
412
413    #[test]
414    fn map_round_trips_and_tolerates_torn_tail() {
415        let dir = tempfile::tempdir().unwrap();
416        let pairs = vec![([1u8; 32], [2u8; 20]), ([3u8; 32], [4u8; 20])];
417        append_map(dir.path(), &pairs).unwrap();
418        // Simulate a torn append.
419        let mut f = std::fs::OpenOptions::new()
420            .append(true)
421            .open(dir.path().join(MAP_FILE))
422            .unwrap();
423        f.write_all(b"deadbeef").unwrap();
424        drop(f);
425        let map = load_map(dir.path()).unwrap();
426        assert_eq!(map.len(), 2);
427        assert_eq!(map[&[1u8; 32]], [2u8; 20]);
428    }
429
430    #[test]
431    fn map_intact_detection() {
432        let dir = tempfile::tempdir().unwrap();
433        // Missing file: intact (nothing to distrust).
434        assert!(map_is_intact(dir.path()).unwrap());
435        let pairs = vec![([1u8; 32], [2u8; 20]), ([3u8; 32], [4u8; 20])];
436        append_map(dir.path(), &pairs).unwrap();
437        assert!(map_is_intact(dir.path()).unwrap());
438        // Malformed line.
439        let good = std::fs::read_to_string(dir.path().join("map")).unwrap();
440        std::fs::write(dir.path().join("map"), format!("{good}GARBAGE\n")).unwrap();
441        assert!(!map_is_intact(dir.path()).unwrap());
442        // Internal blank line (a dropped record, not noise).
443        let lines: Vec<&str> = good.lines().collect();
444        std::fs::write(
445            dir.path().join("map"),
446            format!("{}\n\n{}\n", lines[0], lines[1]),
447        )
448        .unwrap();
449        assert!(!map_is_intact(dir.path()).unwrap());
450    }
451
452    #[test]
453    fn ref_state_round_trips() {
454        let dir = tempfile::tempdir().unwrap();
455        let states = vec![RefState {
456            ref_name: "refs/heads/main".into(),
457            mkit_hash: [7; 32],
458            git_id: [9; 20],
459        }];
460        store_ref_state(dir.path(), &states).unwrap();
461        assert_eq!(load_ref_state(dir.path()).unwrap(), states);
462    }
463
464    #[test]
465    fn state_dir_rejects_traversal() {
466        let mkit = Path::new("/tmp/.mkit");
467        assert!(state_dir(mkit, "origin").is_ok());
468        assert!(state_dir(mkit, "..").is_err());
469        assert!(state_dir(mkit, "a/b").is_err());
470        assert!(state_dir(mkit, "").is_err());
471    }
472}