1use crate::author;
12use crate::error::{BridgeError, Refusal};
13use crate::gitobj::{GitObject, GitType, Sha1Id, bytes_hex};
14use crate::headers;
15use crate::refname;
16use mkit_core::object::{ChunkedBlob, Commit, EntryMode, Object, ObjectType, Tag, Tree};
17use mkit_core::{Hash, ObjectStore};
18use std::collections::HashMap;
19
20pub trait ObjectSource {
22 fn read_object(&self, h: &Hash) -> Result<Object, BridgeError>;
23}
24
25impl ObjectSource for ObjectStore {
26 fn read_object(&self, h: &Hash) -> Result<Object, BridgeError> {
27 ObjectStore::read_object(self, h).map_err(|e| match e {
28 mkit_core::store::StoreError::Decode(
29 mkit_core::MkitError::UnsupportedObjectVersion,
30 ) => Refusal::SchemaVersion { object: *h }.into(),
31 other => BridgeError::Source(format!("{}: {other}", mkit_core::to_hex(h))),
32 })
33 }
34}
35
36#[derive(Debug, Clone, Copy, PartialEq, Eq)]
39pub struct TranslationBatch {
40 pub root: Sha1Id,
41 pub emitted: usize,
42}
43
44#[must_use]
46pub fn git_mode(mode: EntryMode) -> &'static [u8] {
47 match mode {
48 EntryMode::Blob => b"100644",
49 EntryMode::Tree => b"40000",
50 EntryMode::Symlink => b"120000",
51 EntryMode::Executable => b"100755",
52 }
53}
54
55#[must_use]
57pub fn git_type_of(t: ObjectType) -> Option<GitType> {
58 Some(match t {
59 ObjectType::Blob | ObjectType::ChunkedBlob => GitType::Blob,
60 ObjectType::Tree => GitType::Tree,
61 ObjectType::Commit => GitType::Commit,
62 ObjectType::Tag => GitType::Tag,
63 ObjectType::Remix | ObjectType::Delta => return None,
64 })
65}
66
67#[must_use]
71pub fn translate_blob(data: &[u8]) -> GitObject {
72 GitObject {
73 gtype: GitType::Blob,
74 body: data.to_vec(),
75 }
76}
77
78pub fn translate_chunked<S: ObjectSource>(
85 hash: &Hash,
86 manifest: &ChunkedBlob,
87 source: &S,
88) -> Result<GitObject, BridgeError> {
89 if manifest.chunk_size != 0 {
90 return Err(Refusal::FixedSizeChunking {
91 object: *hash,
92 chunk_size: manifest.chunk_size,
93 }
94 .into());
95 }
96 if manifest.total_size <= mkit_core::worktree::CHUNK_THRESHOLD {
97 return Err(Refusal::NonCanonicalChunking {
99 object: *hash,
100 detail: "total size at or below the 1 MiB chunking threshold",
101 }
102 .into());
103 }
104 let total = usize::try_from(manifest.total_size)
105 .map_err(|_| BridgeError::Source("manifest total_size exceeds usize".into()))?;
106 let mut body = Vec::with_capacity(total);
107 let mut lengths = Vec::with_capacity(manifest.chunks.len());
108 for chunk_hash in &manifest.chunks {
109 match source.read_object(chunk_hash)? {
110 Object::Blob(b) => {
111 lengths.push(b.data.len());
112 body.extend_from_slice(&b.data);
113 }
114 other => {
115 return Err(BridgeError::Source(format!(
116 "chunk {} is a {}, not a blob",
117 mkit_core::to_hex(chunk_hash),
118 other.object_type().name()
119 )));
120 }
121 }
122 }
123 if body.len() as u64 != manifest.total_size {
124 return Err(BridgeError::Source(format!(
125 "chunked blob {}: concatenated {} bytes, manifest says {}",
126 mkit_core::to_hex(hash),
127 body.len(),
128 manifest.total_size
129 )));
130 }
131 let canonical: Vec<usize> = mkit_core::ChunkIterator::new(mkit_core::FastCdc::v1(), &body)
134 .map(|b| b.length)
135 .collect();
136 if canonical != lengths {
137 return Err(Refusal::NonCanonicalChunking {
138 object: *hash,
139 detail: "chunk boundaries differ from the pinned FastCDC output",
140 }
141 .into());
142 }
143 Ok(GitObject {
144 gtype: GitType::Blob,
145 body,
146 })
147}
148
149pub fn translate_tree(
151 tree: &Tree,
152 resolve: &impl Fn(&Hash) -> Option<Sha1Id>,
153) -> Result<GitObject, BridgeError> {
154 let mut entries: Vec<(&mkit_core::object::TreeEntry, Sha1Id)> = tree
155 .entries
156 .iter()
157 .map(|e| {
158 resolve(&e.object_hash).map(|id| (e, id)).ok_or_else(|| {
159 BridgeError::Source(format!(
160 "tree entry {:?} child not translated",
161 String::from_utf8_lossy(&e.name)
162 ))
163 })
164 })
165 .collect::<Result<_, _>>()?;
166 let mut keyed: Vec<(Vec<u8>, &mkit_core::object::TreeEntry, Sha1Id)> = entries
169 .drain(..)
170 .map(|(e, id)| {
171 let mut k = e.name.clone();
172 if e.mode == EntryMode::Tree {
173 k.push(b'/');
174 }
175 (k, e, id)
176 })
177 .collect();
178 keyed.sort_by(|a, b| a.0.cmp(&b.0));
179 let mut body = Vec::new();
180 for (_, e, id) in keyed {
181 body.extend_from_slice(git_mode(e.mode));
182 body.push(b' ');
183 body.extend_from_slice(&e.name);
184 body.push(0);
185 body.extend_from_slice(&id);
186 }
187 Ok(GitObject {
188 gtype: GitType::Tree,
189 body,
190 })
191}
192
193pub fn translate_commit(
195 hash: &Hash,
196 c: &Commit,
197 tree_id: &Sha1Id,
198 parent_ids: &[Sha1Id],
199) -> Result<GitObject, BridgeError> {
200 if c.timestamp > i64::MAX as u64 {
201 return Err(Refusal::TimestampOverflow {
202 object: *hash,
203 timestamp: c.timestamp,
204 }
205 .into());
206 }
207 let mut body = Vec::new();
208 push_line(
209 &mut body,
210 b"tree",
211 crate::gitobj::sha1_hex(tree_id).as_bytes(),
212 );
213 for pid in parent_ids {
214 push_line(
215 &mut body,
216 b"parent",
217 crate::gitobj::sha1_hex(pid).as_bytes(),
218 );
219 }
220 let person = author::line(&c.author, c.timestamp);
221 push_line(&mut body, b"author", &person);
222 push_line(&mut body, b"committer", &person);
223 push_line(
224 &mut body,
225 headers::MKIT_SCHEMA.as_bytes(),
226 headers::SCHEMA_VALUE.as_bytes(),
227 );
228 push_line(
229 &mut body,
230 headers::MKIT_AUTHOR.as_bytes(),
231 headers::identity_value(&c.author).as_bytes(),
232 );
233 push_line(
234 &mut body,
235 headers::MKIT_SIGNER.as_bytes(),
236 bytes_hex(&c.signer).as_bytes(),
237 );
238 push_line(
239 &mut body,
240 headers::MKIT_SIGNATURE.as_bytes(),
241 bytes_hex(&c.signature).as_bytes(),
242 );
243 push_line(
244 &mut body,
245 headers::MKIT_TREE.as_bytes(),
246 headers::hash_value(&c.tree_hash).as_bytes(),
247 );
248 for p in &c.parents {
249 push_line(
250 &mut body,
251 headers::MKIT_PARENT.as_bytes(),
252 headers::hash_value(p).as_bytes(),
253 );
254 }
255 if c.message_hash != mkit_core::hash::ZERO {
256 push_line(
257 &mut body,
258 headers::MKIT_MESSAGE_HASH.as_bytes(),
259 headers::hash_value(&c.message_hash).as_bytes(),
260 );
261 }
262 if c.content_digest != mkit_core::hash::ZERO {
263 push_line(
264 &mut body,
265 headers::MKIT_CONTENT_DIGEST.as_bytes(),
266 headers::hash_value(&c.content_digest).as_bytes(),
267 );
268 }
269 body.push(b'\n');
270 body.extend_from_slice(&c.message);
271 Ok(GitObject {
272 gtype: GitType::Commit,
273 body,
274 })
275}
276
277pub fn translate_tag(hash: &Hash, t: &Tag, target_id: &Sha1Id) -> Result<GitObject, BridgeError> {
279 if t.timestamp > i64::MAX as u64 {
280 return Err(Refusal::TimestampOverflow {
281 object: *hash,
282 timestamp: t.timestamp,
283 }
284 .into());
285 }
286 if refname::check_tag_name(&t.name).is_err() {
287 return Err(Refusal::TagName { object: *hash }.into());
288 }
289 let Some(target_gtype) = git_type_of(t.target_type) else {
290 return Err(Refusal::Remix { object: t.target }.into());
292 };
293 let mut body = Vec::new();
294 push_line(
295 &mut body,
296 b"object",
297 crate::gitobj::sha1_hex(target_id).as_bytes(),
298 );
299 push_line(&mut body, b"type", target_gtype.name().as_bytes());
300 push_line(&mut body, b"tag", &t.name);
301 let person = author::line(&t.tagger, t.timestamp);
302 push_line(&mut body, b"tagger", &person);
303 push_line(
304 &mut body,
305 headers::MKIT_SCHEMA.as_bytes(),
306 headers::SCHEMA_VALUE.as_bytes(),
307 );
308 push_line(
309 &mut body,
310 headers::MKIT_TAGGER.as_bytes(),
311 headers::identity_value(&t.tagger).as_bytes(),
312 );
313 push_line(
314 &mut body,
315 headers::MKIT_SIGNER.as_bytes(),
316 bytes_hex(&t.signer).as_bytes(),
317 );
318 push_line(
319 &mut body,
320 headers::MKIT_SIGNATURE.as_bytes(),
321 bytes_hex(&t.signature).as_bytes(),
322 );
323 push_line(
324 &mut body,
325 headers::MKIT_TARGET.as_bytes(),
326 headers::hash_value(&t.target).as_bytes(),
327 );
328 push_line(
329 &mut body,
330 headers::MKIT_TARGET_TYPE.as_bytes(),
331 format!("{:02x}", t.target_type as u8).as_bytes(),
332 );
333 body.push(b'\n');
334 body.extend_from_slice(&t.message);
335 Ok(GitObject {
336 gtype: GitType::Tag,
337 body,
338 })
339}
340
341fn push_line(body: &mut Vec<u8>, key: &[u8], value: &[u8]) {
342 body.extend_from_slice(key);
343 body.push(b' ');
344 body.extend_from_slice(value);
345 body.push(b'\n');
346}
347
348#[allow(clippy::implicit_hasher)]
357pub fn translate_closure<S: ObjectSource>(
358 source: &S,
359 root: &Hash,
360 known: &mut HashMap<Hash, Sha1Id>,
361 sink: &mut dyn FnMut(&Hash, &GitObject) -> Result<(), BridgeError>,
362) -> Result<TranslationBatch, BridgeError> {
363 let mut emitted = 0usize;
364 let mut stack: Vec<(Hash, bool)> = vec![(*root, false)];
368 let mut parsed: HashMap<Hash, Object> = HashMap::new();
369
370 while let Some((h, expanded)) = stack.pop() {
371 if known.contains_key(&h) {
372 continue;
373 }
374 if !expanded {
375 let obj = match parsed.get(&h) {
376 Some(_) => continue, None => source.read_object(&h)?,
378 };
379 let deps = dependencies(&h, &obj)?;
380 stack.push((h, true));
381 parsed.insert(h, obj);
382 for d in deps {
383 if !known.contains_key(&d) && !parsed.contains_key(&d) {
384 stack.push((d, false));
385 }
386 }
387 continue;
388 }
389 let obj = parsed
390 .remove(&h)
391 .ok_or_else(|| BridgeError::Source("post-visit without parse".into()))?;
392 let git = translate_one(source, &h, &obj, &|child| known.get(child).copied())?;
393 let id = git.id();
394 sink(&h, &git)?;
395 known.insert(h, id);
396 emitted += 1;
397 }
398
399 let root_id = known
400 .get(root)
401 .copied()
402 .ok_or_else(|| BridgeError::Source("root not translated".into()))?;
403 Ok(TranslationBatch {
404 root: root_id,
405 emitted,
406 })
407}
408
409fn dependencies(hash: &Hash, obj: &Object) -> Result<Vec<Hash>, BridgeError> {
413 Ok(match obj {
414 Object::Blob(_) | Object::ChunkedBlob(_) => Vec::new(),
415 Object::Tree(t) => t.entries.iter().map(|e| e.object_hash).collect(),
416 Object::Commit(c) => {
417 let mut v = Vec::with_capacity(1 + c.parents.len());
418 v.push(c.tree_hash);
419 v.extend_from_slice(&c.parents);
420 v
421 }
422 Object::Tag(t) => vec![t.target],
423 Object::Remix(_) => return Err(Refusal::Remix { object: *hash }.into()),
424 Object::Delta(_) => {
425 return Err(BridgeError::Source(format!(
426 "delta object {} in store (pack-only type)",
427 mkit_core::to_hex(hash)
428 )));
429 }
430 })
431}
432
433fn translate_one<S: ObjectSource>(
434 source: &S,
435 hash: &Hash,
436 obj: &Object,
437 resolve: &impl Fn(&Hash) -> Option<Sha1Id>,
438) -> Result<GitObject, BridgeError> {
439 match obj {
440 Object::Blob(b) => {
441 if b.data.len() as u64 > mkit_core::worktree::CHUNK_THRESHOLD {
445 return Err(Refusal::NonCanonicalChunking {
446 object: *hash,
447 detail: "plain blob above the 1 MiB chunking threshold",
448 }
449 .into());
450 }
451 Ok(translate_blob(&b.data))
452 }
453 Object::ChunkedBlob(m) => translate_chunked(hash, m, source),
454 Object::Tree(t) => translate_tree(t, resolve),
455 Object::Commit(c) => {
456 let tree_id = resolve(&c.tree_hash)
457 .ok_or_else(|| BridgeError::Source("commit tree not translated".into()))?;
458 let parent_ids: Vec<Sha1Id> = c
459 .parents
460 .iter()
461 .map(|p| {
462 resolve(p)
463 .ok_or_else(|| BridgeError::Source("commit parent not translated".into()))
464 })
465 .collect::<Result<_, _>>()?;
466 translate_commit(hash, c, &tree_id, &parent_ids)
467 }
468 Object::Tag(t) => {
469 let target_id = resolve(&t.target)
470 .ok_or_else(|| BridgeError::Source("tag target not translated".into()))?;
471 translate_tag(hash, t, &target_id)
472 }
473 Object::Remix(_) => Err(Refusal::Remix { object: *hash }.into()),
474 Object::Delta(_) => Err(BridgeError::Source("delta is pack-only".into())),
475 }
476}