Skip to main content

mkit_git_bridge/
reconstruct.rs

1//! Verification-grade inverse of [`crate::translate`]
2//! (SPEC-GIT-BRIDGE §9).
3//!
4//! **Not an import path.** Every function here is defined only on git
5//! objects the v1 mapping can emit. Two mechanisms enforce that, and
6//! it matters which catches what: a handful of parse-time rejections
7//! (the reserved `mkit-remix-source` carrier, a malformed
8//! `mkit-object-type`) surface as [`BridgeError::NotBridgeObject`];
9//! EVERYTHING ELSE — header order, duplicate or unknown `mkit-*`
10//! headers, foreign modes (`160000`), any off-spec shape — fails
11//! closed through the *re-translation equality check* (rebuild the
12//! mkit object, translate it forward, require byte equality with the
13//! input), surfacing as [`BridgeError::Integrity`]. There is NO
14//! header whitelist: the equality check IS the load-bearing guard, so
15//! do not weaken it expecting one.
16
17use crate::author;
18use crate::error::BridgeError;
19use crate::gitobj::{GitObject, GitType, Sha1Id, sha1_from_hex};
20use crate::headers;
21use crate::translate;
22use mkit_core::object::{
23    Blob, ChunkedBlob, Commit, EntryMode, Object, ObjectType, Tag, Tree, TreeEntry,
24};
25use mkit_core::worktree::CHUNK_THRESHOLD;
26use mkit_core::{ChunkIterator, FastCdc, Hash};
27use std::collections::HashMap;
28
29/// A reconstructed mkit object: its serialized v1 bytes and BLAKE3
30/// hash. `extras` carries the chunk blobs a large flattened blob
31/// re-chunks into (empty for everything else).
32#[derive(Debug, Clone, PartialEq, Eq)]
33pub struct Reconstructed {
34    pub hash: Hash,
35    pub bytes: Vec<u8>,
36    pub object: Object,
37    pub extras: Vec<(Hash, Vec<u8>)>,
38}
39
40fn finish(object: Object, extras: Vec<(Hash, Vec<u8>)>) -> Result<Reconstructed, BridgeError> {
41    let bytes = mkit_core::serialize(&object)
42        .map_err(|e| BridgeError::Integrity(format!("reserialize: {e}")))?;
43    // §9 validity clause: the rebuilt bytes MUST deserialize under
44    // SPEC-OBJECTS. Re-translation byte-equality proves bridge shape,
45    // but only this proves the result is a legal mkit object (it
46    // catches e.g. forged git trees with `.git`/duplicate entry
47    // names, which no mkit source could have produced).
48    match mkit_core::deserialize(&bytes) {
49        Ok(round) if round == object => {}
50        Ok(_) => {
51            return Err(BridgeError::Integrity(
52                "reconstructed bytes round-trip to a different object".into(),
53            ));
54        }
55        Err(e) => {
56            return Err(BridgeError::NotBridgeObject(format!(
57                "reconstructed object is not legal under SPEC-OBJECTS: {e}"
58            )));
59        }
60    }
61    let hash = mkit_core::hash::hash(&bytes);
62    Ok(Reconstructed {
63        hash,
64        bytes,
65        object,
66        extras,
67    })
68}
69
70/// §9 blob rule: ≤ 1 MiB → plain blob; larger → pinned-FastCDC
71/// chunk blobs + manifest (mirrors `worktree::store_file_object`).
72pub fn reconstruct_blob(body: &[u8]) -> Result<Reconstructed, BridgeError> {
73    if body.len() as u64 <= CHUNK_THRESHOLD {
74        return finish(
75            Object::Blob(Blob {
76                data: body.to_vec(),
77            }),
78            Vec::new(),
79        );
80    }
81    let mut extras = Vec::new();
82    let chunks: Vec<Hash> = ChunkIterator::new(FastCdc::v1(), body)
83        .map(|b| {
84            let chunk = Object::Blob(Blob {
85                data: body[b.offset..b.offset + b.length].to_vec(),
86            });
87            let bytes = mkit_core::serialize(&chunk)
88                .map_err(|e| BridgeError::Integrity(format!("chunk serialize: {e}")))?;
89            let h = mkit_core::hash::hash(&bytes);
90            extras.push((h, bytes));
91            Ok::<_, BridgeError>(h)
92        })
93        .collect::<Result<_, _>>()?;
94    let manifest = Object::ChunkedBlob(ChunkedBlob {
95        total_size: body.len() as u64,
96        chunk_size: 0,
97        chunks,
98    });
99    finish(manifest, extras)
100}
101
102/// §9 tree rule. `resolve` maps a child git id back to the BLAKE3 of
103/// the already-reconstructed child (bottom-up order is the caller's
104/// job).
105pub fn reconstruct_tree(
106    body: &[u8],
107    resolve: &impl Fn(&Sha1Id) -> Option<Hash>,
108) -> Result<Reconstructed, BridgeError> {
109    let mut entries = Vec::new();
110    let mut local: HashMap<Hash, Sha1Id> = HashMap::new();
111    let mut rest = body;
112    while !rest.is_empty() {
113        let sp = rest
114            .iter()
115            .position(|&b| b == b' ')
116            .ok_or_else(|| not_bridge("tree entry missing mode terminator"))?;
117        let mode = match &rest[..sp] {
118            b"100644" => EntryMode::Blob,
119            b"40000" => EntryMode::Tree,
120            b"120000" => EntryMode::Symlink,
121            b"100755" => EntryMode::Executable,
122            other => {
123                return Err(not_bridge(&format!(
124                    "git tree mode {:?} has no mkit equivalent",
125                    String::from_utf8_lossy(other)
126                )));
127            }
128        };
129        rest = &rest[sp + 1..];
130        let nul = rest
131            .iter()
132            .position(|&b| b == 0)
133            .ok_or_else(|| not_bridge("tree entry missing NUL"))?;
134        let name = rest[..nul].to_vec();
135        rest = &rest[nul + 1..];
136        if rest.len() < 20 {
137            return Err(not_bridge("tree entry truncated id"));
138        }
139        let mut id = [0u8; 20];
140        id.copy_from_slice(&rest[..20]);
141        rest = &rest[20..];
142        let child = resolve(&id).ok_or_else(|| not_bridge("tree child id not reconstructible"))?;
143        local.insert(child, id);
144        entries.push(TreeEntry {
145            name,
146            mode,
147            object_hash: child,
148        });
149    }
150    // mkit canonical order: byte-lex on the raw name.
151    entries.sort_by(|a, b| a.name.cmp(&b.name));
152    let tree = Tree { entries };
153    // Prove exact bridge shape by re-translation.
154    let retrans = translate::translate_tree(&tree, &|h| local.get(h).copied())?;
155    if retrans.body != body {
156        return Err(BridgeError::Integrity(
157            "tree re-translation mismatch (not a bridge-emitted tree)".into(),
158        ));
159    }
160    finish(Object::Tree(tree), Vec::new())
161}
162
163/// §9 commit rule: self-contained (everything rides in headers).
164pub fn reconstruct_commit(body: &[u8]) -> Result<Reconstructed, BridgeError> {
165    let parsed = ParsedBody::parse(body)?;
166    parsed.check_schema()?;
167    let tree_id = parsed.required_git_id("tree")?;
168    let parent_ids = parsed.all_git_ids("parent")?;
169    let author_line = parsed.required(b"author")?;
170    let timestamp = author::parse_timestamp(author_line)
171        .ok_or_else(|| not_bridge("author line is not bridge-synthesized"))?;
172    let identity = headers::parse_identity(parsed.required_str(headers::MKIT_AUTHOR)?)
173        .ok_or_else(|| not_bridge("mkit-author header malformed"))?;
174    let commit = Commit {
175        tree_hash: parsed.required_hash(headers::MKIT_TREE)?,
176        parents: parsed.all_hashes(headers::MKIT_PARENT)?,
177        author: identity,
178        signer: parsed.required_hash(headers::MKIT_SIGNER)?,
179        message: parsed.message.to_vec(),
180        timestamp,
181        message_hash: parsed
182            .optional_hash(headers::MKIT_MESSAGE_HASH)?
183            .unwrap_or(mkit_core::hash::ZERO),
184        content_digest: parsed
185            .optional_hash(headers::MKIT_CONTENT_DIGEST)?
186            .unwrap_or(mkit_core::hash::ZERO),
187        signature: parsed.required_signature(headers::MKIT_SIGNATURE)?,
188    };
189    if commit.parents.len() != parent_ids.len() {
190        return Err(not_bridge("parent / mkit-parent count mismatch"));
191    }
192    let probe = mkit_core::hash::ZERO; // hash input unused by translate_commit checks
193    let retrans = translate::translate_commit(&probe, &commit, &tree_id, &parent_ids)?;
194    if retrans.body != body {
195        return Err(BridgeError::Integrity(
196            "commit re-translation mismatch (not a bridge-emitted commit)".into(),
197        ));
198    }
199    finish(Object::Commit(commit), Vec::new())
200}
201
202/// §9 tag rule: self-contained like commits.
203pub fn reconstruct_tag(body: &[u8]) -> Result<Reconstructed, BridgeError> {
204    let parsed = ParsedBody::parse(body)?;
205    parsed.check_schema()?;
206    let target_id = parsed.required_git_id("object")?;
207    let name = parsed.required(b"tag")?.to_vec();
208    let tagger_line = parsed.required(b"tagger")?;
209    let timestamp = author::parse_timestamp(tagger_line)
210        .ok_or_else(|| not_bridge("tagger line is not bridge-synthesized"))?;
211    let tagger = headers::parse_identity(parsed.required_str(headers::MKIT_TAGGER)?)
212        .ok_or_else(|| not_bridge("mkit-tagger header malformed"))?;
213    let tt_hex = parsed.required_str(headers::MKIT_TARGET_TYPE)?;
214    let tt_byte = crate::gitobj::bytes_from_hex(tt_hex, 1)
215        .ok_or_else(|| not_bridge("mkit-target-type malformed"))?[0];
216    // Only target types the v1 mapping can emit (§7.1): remix/delta
217    // targets are refused at translation time and unknown bytes are
218    // future formats.
219    let target_type = match tt_byte {
220        0x01 => ObjectType::Blob,
221        0x02 => ObjectType::Tree,
222        0x03 => ObjectType::Commit,
223        0x05 => ObjectType::ChunkedBlob,
224        0x07 => ObjectType::Tag,
225        _ => return Err(not_bridge("mkit-target-type not bridge-emittable")),
226    };
227    let tag = Tag {
228        target: parsed.required_hash(headers::MKIT_TARGET)?,
229        target_type,
230        name,
231        tagger,
232        signer: parsed.required_hash(headers::MKIT_SIGNER)?,
233        message: parsed.message.to_vec(),
234        timestamp,
235        signature: parsed.required_signature(headers::MKIT_SIGNATURE)?,
236    };
237    let probe = mkit_core::hash::ZERO;
238    let retrans = translate::translate_tag(&probe, &tag, &target_id)?;
239    if retrans.body != body {
240        return Err(BridgeError::Integrity(
241            "tag re-translation mismatch (not a bridge-emitted tag)".into(),
242        ));
243    }
244    finish(Object::Tag(tag), Vec::new())
245}
246
247/// Dispatch on git type.
248pub fn reconstruct(
249    obj: &GitObject,
250    resolve: &impl Fn(&Sha1Id) -> Option<Hash>,
251) -> Result<Reconstructed, BridgeError> {
252    match obj.gtype {
253        GitType::Blob => reconstruct_blob(&obj.body),
254        GitType::Tree => reconstruct_tree(&obj.body, resolve),
255        GitType::Commit => reconstruct_commit(&obj.body),
256        GitType::Tag => reconstruct_tag(&obj.body),
257    }
258}
259
260fn not_bridge(msg: &str) -> BridgeError {
261    BridgeError::NotBridgeObject(msg.to_owned())
262}
263
264// ─── header-block parsing ───────────────────────────────────────────
265
266struct ParsedBody<'a> {
267    headers: Vec<(&'a [u8], &'a [u8])>,
268    message: &'a [u8],
269}
270
271impl<'a> ParsedBody<'a> {
272    fn parse(body: &'a [u8]) -> Result<Self, BridgeError> {
273        let split = body
274            .windows(2)
275            .position(|w| w == b"\n\n")
276            .ok_or_else(|| not_bridge("no header/message separator"))?;
277        let (head, message) = (&body[..=split], &body[split + 2..]);
278        let mut headers = Vec::new();
279        for line in head.split(|&b| b == b'\n').filter(|l| !l.is_empty()) {
280            if line.starts_with(b" ") {
281                // The bridge never emits continuation lines (§6.1).
282                return Err(not_bridge("continuation header line"));
283            }
284            let sp = line
285                .iter()
286                .position(|&b| b == b' ')
287                .ok_or_else(|| not_bridge("header line without value"))?;
288            let key = &line[..sp];
289            if headers::RESERVED.iter().any(|r| r.as_bytes() == key) {
290                return Err(not_bridge("reserved mkit-* header present"));
291            }
292            headers.push((key, &line[sp + 1..]));
293        }
294        Ok(Self { headers, message })
295    }
296
297    /// §1.2: an actionable error for missing/foreign schema versions
298    /// (instead of the generic re-translation mismatch).
299    fn check_schema(&self) -> Result<(), BridgeError> {
300        match self.required_str(headers::MKIT_SCHEMA) {
301            Ok(v) if v == headers::SCHEMA_VALUE => Ok(()),
302            Ok(v) => Err(not_bridge(&format!(
303                "mkit-schema {v} is not covered by bridge mapping v1"
304            ))),
305            Err(_) => Err(not_bridge("missing mkit-schema header")),
306        }
307    }
308
309    fn all(&self, key: &[u8]) -> Vec<&'a [u8]> {
310        self.headers
311            .iter()
312            .filter(|(k, _)| *k == key)
313            .map(|(_, v)| *v)
314            .collect()
315    }
316
317    fn required(&self, key: &[u8]) -> Result<&'a [u8], BridgeError> {
318        match self.all(key).as_slice() {
319            [v] => Ok(v),
320            [] => Err(not_bridge(&format!(
321                "missing {}",
322                String::from_utf8_lossy(key)
323            ))),
324            _ => Err(not_bridge(&format!(
325                "duplicate {}",
326                String::from_utf8_lossy(key)
327            ))),
328        }
329    }
330
331    fn required_str(&self, key: &str) -> Result<&'a str, BridgeError> {
332        std::str::from_utf8(self.required(key.as_bytes())?)
333            .map_err(|_| not_bridge(&format!("{key} not UTF-8")))
334    }
335
336    fn required_git_id(&self, key: &str) -> Result<Sha1Id, BridgeError> {
337        sha1_from_hex(self.required_str(key)?)
338            .ok_or_else(|| not_bridge(&format!("{key} is not a 40-hex id")))
339    }
340
341    fn all_git_ids(&self, key: &str) -> Result<Vec<Sha1Id>, BridgeError> {
342        self.all(key.as_bytes())
343            .into_iter()
344            .map(|v| {
345                std::str::from_utf8(v)
346                    .ok()
347                    .and_then(sha1_from_hex)
348                    .ok_or_else(|| not_bridge(&format!("{key} is not a 40-hex id")))
349            })
350            .collect()
351    }
352
353    fn required_hash(&self, key: &str) -> Result<[u8; 32], BridgeError> {
354        headers::parse_hash(self.required_str(key)?)
355            .ok_or_else(|| not_bridge(&format!("{key} is not a 64-hex hash")))
356    }
357
358    fn optional_hash(&self, key: &str) -> Result<Option<[u8; 32]>, BridgeError> {
359        match self.all(key.as_bytes()).as_slice() {
360            [] => Ok(None),
361            [v] => std::str::from_utf8(v)
362                .ok()
363                .and_then(headers::parse_hash)
364                .map(Some)
365                .ok_or_else(|| not_bridge(&format!("{key} is not a 64-hex hash"))),
366            _ => Err(not_bridge(&format!("duplicate {key}"))),
367        }
368    }
369
370    fn required_signature(&self, key: &str) -> Result<[u8; 64], BridgeError> {
371        headers::parse_signature(self.required_str(key)?)
372            .ok_or_else(|| not_bridge(&format!("{key} is not a 128-hex signature")))
373    }
374
375    fn all_hashes(&self, key: &str) -> Result<Vec<[u8; 32]>, BridgeError> {
376        self.all(key.as_bytes())
377            .into_iter()
378            .map(|v| {
379                std::str::from_utf8(v)
380                    .ok()
381                    .and_then(headers::parse_hash)
382                    .ok_or_else(|| not_bridge(&format!("{key} is not a 64-hex hash")))
383            })
384            .collect()
385    }
386}