1use crate::author;
18use crate::error::BridgeError;
19use crate::gitobj::{GitObject, GitType, Sha1Id, sha1_from_hex};
20use crate::headers;
21use crate::translate;
22use mkit_core::object::{
23 Blob, ChunkedBlob, Commit, EntryMode, Object, ObjectType, Tag, Tree, TreeEntry,
24};
25use mkit_core::worktree::CHUNK_THRESHOLD;
26use mkit_core::{ChunkIterator, FastCdc, Hash};
27use std::collections::HashMap;
28
29#[derive(Debug, Clone, PartialEq, Eq)]
33pub struct Reconstructed {
34 pub hash: Hash,
35 pub bytes: Vec<u8>,
36 pub object: Object,
37 pub extras: Vec<(Hash, Vec<u8>)>,
38}
39
40fn finish(object: Object, extras: Vec<(Hash, Vec<u8>)>) -> Result<Reconstructed, BridgeError> {
41 let bytes = mkit_core::serialize(&object)
42 .map_err(|e| BridgeError::Integrity(format!("reserialize: {e}")))?;
43 match mkit_core::deserialize(&bytes) {
49 Ok(round) if round == object => {}
50 Ok(_) => {
51 return Err(BridgeError::Integrity(
52 "reconstructed bytes round-trip to a different object".into(),
53 ));
54 }
55 Err(e) => {
56 return Err(BridgeError::NotBridgeObject(format!(
57 "reconstructed object is not legal under SPEC-OBJECTS: {e}"
58 )));
59 }
60 }
61 let hash = mkit_core::hash::hash(&bytes);
62 Ok(Reconstructed {
63 hash,
64 bytes,
65 object,
66 extras,
67 })
68}
69
70pub fn reconstruct_blob(body: &[u8]) -> Result<Reconstructed, BridgeError> {
73 if body.len() as u64 <= CHUNK_THRESHOLD {
74 return finish(
75 Object::Blob(Blob {
76 data: body.to_vec(),
77 }),
78 Vec::new(),
79 );
80 }
81 let mut extras = Vec::new();
82 let chunks: Vec<Hash> = ChunkIterator::new(FastCdc::v1(), body)
83 .map(|b| {
84 let chunk = Object::Blob(Blob {
85 data: body[b.offset..b.offset + b.length].to_vec(),
86 });
87 let bytes = mkit_core::serialize(&chunk)
88 .map_err(|e| BridgeError::Integrity(format!("chunk serialize: {e}")))?;
89 let h = mkit_core::hash::hash(&bytes);
90 extras.push((h, bytes));
91 Ok::<_, BridgeError>(h)
92 })
93 .collect::<Result<_, _>>()?;
94 let manifest = Object::ChunkedBlob(ChunkedBlob {
95 total_size: body.len() as u64,
96 chunk_size: 0,
97 chunks,
98 });
99 finish(manifest, extras)
100}
101
102pub fn reconstruct_tree(
106 body: &[u8],
107 resolve: &impl Fn(&Sha1Id) -> Option<Hash>,
108) -> Result<Reconstructed, BridgeError> {
109 let mut entries = Vec::new();
110 let mut local: HashMap<Hash, Sha1Id> = HashMap::new();
111 let mut rest = body;
112 while !rest.is_empty() {
113 let sp = rest
114 .iter()
115 .position(|&b| b == b' ')
116 .ok_or_else(|| not_bridge("tree entry missing mode terminator"))?;
117 let mode = match &rest[..sp] {
118 b"100644" => EntryMode::Blob,
119 b"40000" => EntryMode::Tree,
120 b"120000" => EntryMode::Symlink,
121 b"100755" => EntryMode::Executable,
122 other => {
123 return Err(not_bridge(&format!(
124 "git tree mode {:?} has no mkit equivalent",
125 String::from_utf8_lossy(other)
126 )));
127 }
128 };
129 rest = &rest[sp + 1..];
130 let nul = rest
131 .iter()
132 .position(|&b| b == 0)
133 .ok_or_else(|| not_bridge("tree entry missing NUL"))?;
134 let name = rest[..nul].to_vec();
135 rest = &rest[nul + 1..];
136 if rest.len() < 20 {
137 return Err(not_bridge("tree entry truncated id"));
138 }
139 let mut id = [0u8; 20];
140 id.copy_from_slice(&rest[..20]);
141 rest = &rest[20..];
142 let child = resolve(&id).ok_or_else(|| not_bridge("tree child id not reconstructible"))?;
143 local.insert(child, id);
144 entries.push(TreeEntry {
145 name,
146 mode,
147 object_hash: child,
148 });
149 }
150 entries.sort_by(|a, b| a.name.cmp(&b.name));
152 let tree = Tree { entries };
153 let retrans = translate::translate_tree(&tree, &|h| local.get(h).copied())?;
155 if retrans.body != body {
156 return Err(BridgeError::Integrity(
157 "tree re-translation mismatch (not a bridge-emitted tree)".into(),
158 ));
159 }
160 finish(Object::Tree(tree), Vec::new())
161}
162
163pub fn reconstruct_commit(body: &[u8]) -> Result<Reconstructed, BridgeError> {
165 let parsed = ParsedBody::parse(body)?;
166 parsed.check_schema()?;
167 let tree_id = parsed.required_git_id("tree")?;
168 let parent_ids = parsed.all_git_ids("parent")?;
169 let author_line = parsed.required(b"author")?;
170 let timestamp = author::parse_timestamp(author_line)
171 .ok_or_else(|| not_bridge("author line is not bridge-synthesized"))?;
172 let identity = headers::parse_identity(parsed.required_str(headers::MKIT_AUTHOR)?)
173 .ok_or_else(|| not_bridge("mkit-author header malformed"))?;
174 let commit = Commit {
175 tree_hash: parsed.required_hash(headers::MKIT_TREE)?,
176 parents: parsed.all_hashes(headers::MKIT_PARENT)?,
177 author: identity,
178 signer: parsed.required_hash(headers::MKIT_SIGNER)?,
179 message: parsed.message.to_vec(),
180 timestamp,
181 message_hash: parsed
182 .optional_hash(headers::MKIT_MESSAGE_HASH)?
183 .unwrap_or(mkit_core::hash::ZERO),
184 content_digest: parsed
185 .optional_hash(headers::MKIT_CONTENT_DIGEST)?
186 .unwrap_or(mkit_core::hash::ZERO),
187 signature: parsed.required_signature(headers::MKIT_SIGNATURE)?,
188 };
189 if commit.parents.len() != parent_ids.len() {
190 return Err(not_bridge("parent / mkit-parent count mismatch"));
191 }
192 let probe = mkit_core::hash::ZERO; let retrans = translate::translate_commit(&probe, &commit, &tree_id, &parent_ids)?;
194 if retrans.body != body {
195 return Err(BridgeError::Integrity(
196 "commit re-translation mismatch (not a bridge-emitted commit)".into(),
197 ));
198 }
199 finish(Object::Commit(commit), Vec::new())
200}
201
202pub fn reconstruct_tag(body: &[u8]) -> Result<Reconstructed, BridgeError> {
204 let parsed = ParsedBody::parse(body)?;
205 parsed.check_schema()?;
206 let target_id = parsed.required_git_id("object")?;
207 let name = parsed.required(b"tag")?.to_vec();
208 let tagger_line = parsed.required(b"tagger")?;
209 let timestamp = author::parse_timestamp(tagger_line)
210 .ok_or_else(|| not_bridge("tagger line is not bridge-synthesized"))?;
211 let tagger = headers::parse_identity(parsed.required_str(headers::MKIT_TAGGER)?)
212 .ok_or_else(|| not_bridge("mkit-tagger header malformed"))?;
213 let tt_hex = parsed.required_str(headers::MKIT_TARGET_TYPE)?;
214 let tt_byte = crate::gitobj::bytes_from_hex(tt_hex, 1)
215 .ok_or_else(|| not_bridge("mkit-target-type malformed"))?[0];
216 let target_type = match tt_byte {
220 0x01 => ObjectType::Blob,
221 0x02 => ObjectType::Tree,
222 0x03 => ObjectType::Commit,
223 0x05 => ObjectType::ChunkedBlob,
224 0x07 => ObjectType::Tag,
225 _ => return Err(not_bridge("mkit-target-type not bridge-emittable")),
226 };
227 let tag = Tag {
228 target: parsed.required_hash(headers::MKIT_TARGET)?,
229 target_type,
230 name,
231 tagger,
232 signer: parsed.required_hash(headers::MKIT_SIGNER)?,
233 message: parsed.message.to_vec(),
234 timestamp,
235 signature: parsed.required_signature(headers::MKIT_SIGNATURE)?,
236 };
237 let probe = mkit_core::hash::ZERO;
238 let retrans = translate::translate_tag(&probe, &tag, &target_id)?;
239 if retrans.body != body {
240 return Err(BridgeError::Integrity(
241 "tag re-translation mismatch (not a bridge-emitted tag)".into(),
242 ));
243 }
244 finish(Object::Tag(tag), Vec::new())
245}
246
247pub fn reconstruct(
249 obj: &GitObject,
250 resolve: &impl Fn(&Sha1Id) -> Option<Hash>,
251) -> Result<Reconstructed, BridgeError> {
252 match obj.gtype {
253 GitType::Blob => reconstruct_blob(&obj.body),
254 GitType::Tree => reconstruct_tree(&obj.body, resolve),
255 GitType::Commit => reconstruct_commit(&obj.body),
256 GitType::Tag => reconstruct_tag(&obj.body),
257 }
258}
259
260fn not_bridge(msg: &str) -> BridgeError {
261 BridgeError::NotBridgeObject(msg.to_owned())
262}
263
264struct ParsedBody<'a> {
267 headers: Vec<(&'a [u8], &'a [u8])>,
268 message: &'a [u8],
269}
270
271impl<'a> ParsedBody<'a> {
272 fn parse(body: &'a [u8]) -> Result<Self, BridgeError> {
273 let split = body
274 .windows(2)
275 .position(|w| w == b"\n\n")
276 .ok_or_else(|| not_bridge("no header/message separator"))?;
277 let (head, message) = (&body[..=split], &body[split + 2..]);
278 let mut headers = Vec::new();
279 for line in head.split(|&b| b == b'\n').filter(|l| !l.is_empty()) {
280 if line.starts_with(b" ") {
281 return Err(not_bridge("continuation header line"));
283 }
284 let sp = line
285 .iter()
286 .position(|&b| b == b' ')
287 .ok_or_else(|| not_bridge("header line without value"))?;
288 let key = &line[..sp];
289 if headers::RESERVED.iter().any(|r| r.as_bytes() == key) {
290 return Err(not_bridge("reserved mkit-* header present"));
291 }
292 headers.push((key, &line[sp + 1..]));
293 }
294 Ok(Self { headers, message })
295 }
296
297 fn check_schema(&self) -> Result<(), BridgeError> {
300 match self.required_str(headers::MKIT_SCHEMA) {
301 Ok(v) if v == headers::SCHEMA_VALUE => Ok(()),
302 Ok(v) => Err(not_bridge(&format!(
303 "mkit-schema {v} is not covered by bridge mapping v1"
304 ))),
305 Err(_) => Err(not_bridge("missing mkit-schema header")),
306 }
307 }
308
309 fn all(&self, key: &[u8]) -> Vec<&'a [u8]> {
310 self.headers
311 .iter()
312 .filter(|(k, _)| *k == key)
313 .map(|(_, v)| *v)
314 .collect()
315 }
316
317 fn required(&self, key: &[u8]) -> Result<&'a [u8], BridgeError> {
318 match self.all(key).as_slice() {
319 [v] => Ok(v),
320 [] => Err(not_bridge(&format!(
321 "missing {}",
322 String::from_utf8_lossy(key)
323 ))),
324 _ => Err(not_bridge(&format!(
325 "duplicate {}",
326 String::from_utf8_lossy(key)
327 ))),
328 }
329 }
330
331 fn required_str(&self, key: &str) -> Result<&'a str, BridgeError> {
332 std::str::from_utf8(self.required(key.as_bytes())?)
333 .map_err(|_| not_bridge(&format!("{key} not UTF-8")))
334 }
335
336 fn required_git_id(&self, key: &str) -> Result<Sha1Id, BridgeError> {
337 sha1_from_hex(self.required_str(key)?)
338 .ok_or_else(|| not_bridge(&format!("{key} is not a 40-hex id")))
339 }
340
341 fn all_git_ids(&self, key: &str) -> Result<Vec<Sha1Id>, BridgeError> {
342 self.all(key.as_bytes())
343 .into_iter()
344 .map(|v| {
345 std::str::from_utf8(v)
346 .ok()
347 .and_then(sha1_from_hex)
348 .ok_or_else(|| not_bridge(&format!("{key} is not a 40-hex id")))
349 })
350 .collect()
351 }
352
353 fn required_hash(&self, key: &str) -> Result<[u8; 32], BridgeError> {
354 headers::parse_hash(self.required_str(key)?)
355 .ok_or_else(|| not_bridge(&format!("{key} is not a 64-hex hash")))
356 }
357
358 fn optional_hash(&self, key: &str) -> Result<Option<[u8; 32]>, BridgeError> {
359 match self.all(key.as_bytes()).as_slice() {
360 [] => Ok(None),
361 [v] => std::str::from_utf8(v)
362 .ok()
363 .and_then(headers::parse_hash)
364 .map(Some)
365 .ok_or_else(|| not_bridge(&format!("{key} is not a 64-hex hash"))),
366 _ => Err(not_bridge(&format!("duplicate {key}"))),
367 }
368 }
369
370 fn required_signature(&self, key: &str) -> Result<[u8; 64], BridgeError> {
371 headers::parse_signature(self.required_str(key)?)
372 .ok_or_else(|| not_bridge(&format!("{key} is not a 128-hex signature")))
373 }
374
375 fn all_hashes(&self, key: &str) -> Result<Vec<[u8; 32]>, BridgeError> {
376 self.all(key.as_bytes())
377 .into_iter()
378 .map(|v| {
379 std::str::from_utf8(v)
380 .ok()
381 .and_then(headers::parse_hash)
382 .ok_or_else(|| not_bridge(&format!("{key} is not a 64-hex hash")))
383 })
384 .collect()
385 }
386}