1use crate::gitobj::{Sha1Id, sha1_from_hex};
20use std::fmt;
21
22pub const MAX_HEADER_BLOCK: usize = 10 * 1024 * 1024;
26
27#[derive(Debug, Clone, PartialEq, Eq)]
29pub enum GitParseError {
30 Malformed(&'static str),
32 Header(&'static str),
35 BadId(&'static str),
37 PersonTimestamp,
39}
40
41impl fmt::Display for GitParseError {
42 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
43 match self {
44 Self::Malformed(d) => write!(f, "malformed git object: {d}"),
45 Self::Header(d) => write!(f, "git header: {d}"),
46 Self::BadId(d) => write!(f, "git id field: {d}"),
47 Self::PersonTimestamp => write!(f, "person line has no parseable timestamp"),
48 }
49 }
50}
51
52impl std::error::Error for GitParseError {}
53
54#[derive(Debug, Clone, PartialEq, Eq)]
56pub struct Person {
57 pub identity: Vec<u8>,
61 pub timestamp: i64,
64 pub timezone: Option<Vec<u8>>,
66}
67
68pub fn parse_person(value: &[u8]) -> Result<Person, GitParseError> {
80 let identity_end = value
82 .iter()
83 .rposition(|&b| b == b'>')
84 .filter(|>| value[..gt].contains(&b'<'))
85 .map(|gt| gt + 1);
86
87 if let Some(end) = identity_end {
88 let identity = value[..end].to_vec();
89 let rest = &value[end..];
90 let (timestamp, timezone) = parse_ts_tz(rest).ok_or(GitParseError::PersonTimestamp)?;
91 return Ok(Person {
92 identity,
93 timestamp,
94 timezone,
95 });
96 }
97
98 if let Some((cut, timestamp, timezone)) = trailing_ts_tz(value) {
100 return Ok(Person {
101 identity: value[..cut].to_vec(),
102 timestamp,
103 timezone: Some(timezone),
104 });
105 }
106 Err(GitParseError::PersonTimestamp)
107}
108
109fn parse_ts_tz(rest: &[u8]) -> Option<(i64, Option<Vec<u8>>)> {
112 let mut tokens = rest.split(|&b| b == b' ').filter(|t| !t.is_empty());
113 let ts_tok = tokens.next()?;
114 let ts = parse_i64(ts_tok)?;
115 let tz = tokens.next().filter(|t| is_tz(t)).map(<[u8]>::to_vec);
116 Some((ts, tz))
117}
118
119fn trailing_ts_tz(value: &[u8]) -> Option<(usize, i64, Vec<u8>)> {
122 let last_sp = value.iter().rposition(|&b| b == b' ')?;
123 let tz = &value[last_sp + 1..];
124 if !is_tz(tz) {
125 return None;
126 }
127 let prev_sp = value[..last_sp].iter().rposition(|&b| b == b' ')?;
128 let secs = &value[prev_sp + 1..last_sp];
129 let ts = parse_i64(secs)?;
130 Some((prev_sp, ts, tz.to_vec()))
131}
132
133fn is_tz(t: &[u8]) -> bool {
134 t.len() == 5 && (t[0] == b'+' || t[0] == b'-') && t[1..].iter().all(u8::is_ascii_digit)
135}
136
137fn parse_i64(t: &[u8]) -> Option<i64> {
138 let s = std::str::from_utf8(t).ok()?;
139 if s.starts_with('+') {
142 return None;
143 }
144 s.parse::<i64>().ok()
145}
146
147type Headers = Vec<(Vec<u8>, Vec<u8>)>;
152
153fn split_headers(body: &[u8]) -> Result<(Headers, &[u8]), GitParseError> {
156 let sep = body
157 .windows(2)
158 .position(|w| w == b"\n\n")
159 .ok_or(GitParseError::Malformed("no header/message separator"))?;
160 if sep + 1 > MAX_HEADER_BLOCK {
161 return Err(GitParseError::Malformed("header block over cap"));
162 }
163 let (head, message) = (&body[..sep], &body[sep + 2..]);
164 let mut headers: Headers = Vec::new();
165 for line in head.split(|&b| b == b'\n') {
166 if let Some(cont) = line.strip_prefix(b" ") {
167 match headers.last_mut() {
169 Some((_, v)) => {
170 v.push(b'\n');
171 v.extend_from_slice(cont);
172 }
173 None => return Err(GitParseError::Malformed("leading continuation line")),
174 }
175 continue;
176 }
177 let sp = line
178 .iter()
179 .position(|&b| b == b' ')
180 .ok_or(GitParseError::Malformed("header line without value"))?;
181 headers.push((line[..sp].to_vec(), line[sp + 1..].to_vec()));
182 }
183 Ok((headers, message))
184}
185
186fn one(headers: &Headers, key: &[u8], what: &'static str) -> Result<Vec<u8>, GitParseError> {
187 let mut found = None;
188 for (k, v) in headers {
189 if k == key {
190 if found.is_some() {
191 return Err(GitParseError::Header(what));
192 }
193 found = Some(v.clone());
194 }
195 }
196 found.ok_or(GitParseError::Header(what))
197}
198
199fn id_of(value: &[u8], what: &'static str) -> Result<Sha1Id, GitParseError> {
200 std::str::from_utf8(value)
201 .ok()
202 .map(str::to_ascii_lowercase)
203 .as_deref()
204 .and_then(sha1_from_hex)
205 .ok_or(GitParseError::BadId(what))
206}
207
208#[derive(Debug, Clone, PartialEq, Eq)]
210pub struct GitCommit {
211 pub tree: Sha1Id,
212 pub parents: Vec<Sha1Id>,
213 pub author: Person,
214 pub committer: Person,
215 pub message: Vec<u8>,
217 pub has_gpgsig: bool,
220}
221
222pub fn parse_commit(body: &[u8]) -> Result<GitCommit, GitParseError> {
224 let (headers, message) = split_headers(body)?;
225 let tree = id_of(
226 &one(&headers, b"tree", "tree missing or duplicated")?,
227 "tree",
228 )?;
229 let mut parents = Vec::new();
230 for (k, v) in &headers {
231 if k == b"parent" {
232 parents.push(id_of(v, "parent")?);
233 }
234 }
235 let author = parse_person(&one(&headers, b"author", "author missing or duplicated")?)?;
236 let committer = parse_person(&one(
237 &headers,
238 b"committer",
239 "committer missing or duplicated",
240 )?)?;
241 let has_gpgsig = headers
242 .iter()
243 .any(|(k, _)| k == b"gpgsig" || k == b"gpgsig-sha256");
244 Ok(GitCommit {
245 tree,
246 parents,
247 author,
248 committer,
249 message: message.to_vec(),
250 has_gpgsig,
251 })
252}
253
254#[derive(Debug, Clone, PartialEq, Eq)]
256pub struct GitTag {
257 pub object: Sha1Id,
258 pub target_type: Vec<u8>,
260 pub name: Vec<u8>,
261 pub tagger: Option<Person>,
263 pub message: Vec<u8>,
264 pub has_signature: bool,
266}
267
268pub fn parse_tag(body: &[u8]) -> Result<GitTag, GitParseError> {
270 let (headers, message) = split_headers(body)?;
271 let object = id_of(
272 &one(&headers, b"object", "object missing or duplicated")?,
273 "object",
274 )?;
275 let target_type = one(&headers, b"type", "type missing or duplicated")?;
276 let name = one(&headers, b"tag", "tag name missing or duplicated")?;
277 let tagger = match headers.iter().find(|(k, _)| k == b"tagger") {
278 Some((_, v)) => Some(parse_person(v)?),
279 None => None,
280 };
281 let has_signature = message
282 .windows(b"-----BEGIN PGP SIGNATURE-----".len())
283 .any(|w| w == b"-----BEGIN PGP SIGNATURE-----");
284 Ok(GitTag {
285 object,
286 target_type,
287 name,
288 tagger,
289 message: message.to_vec(),
290 has_signature,
291 })
292}
293
294#[derive(Debug, Clone, PartialEq, Eq)]
296pub struct GitTreeEntry {
297 pub mode: Vec<u8>,
298 pub name: Vec<u8>,
299 pub id: Sha1Id,
300}
301
302pub fn parse_tree(body: &[u8]) -> Result<Vec<GitTreeEntry>, GitParseError> {
305 let mut entries = Vec::new();
306 let mut rest = body;
307 while !rest.is_empty() {
308 let sp = rest
309 .iter()
310 .position(|&b| b == b' ')
311 .ok_or(GitParseError::Malformed(
312 "tree entry missing mode terminator",
313 ))?;
314 let mode = rest[..sp].to_vec();
315 if mode.is_empty() || mode.len() > 7 || !mode.iter().all(u8::is_ascii_digit) {
316 return Err(GitParseError::Malformed("tree entry mode not octal"));
317 }
318 rest = &rest[sp + 1..];
319 let nul = rest
320 .iter()
321 .position(|&b| b == 0)
322 .ok_or(GitParseError::Malformed("tree entry missing NUL"))?;
323 let name = rest[..nul].to_vec();
324 if name.is_empty() {
325 return Err(GitParseError::Malformed("tree entry with empty name"));
326 }
327 rest = &rest[nul + 1..];
328 if rest.len() < 20 {
329 return Err(GitParseError::Malformed("tree entry truncated id"));
330 }
331 let mut id = [0u8; 20];
332 id.copy_from_slice(&rest[..20]);
333 rest = &rest[20..];
334 entries.push(GitTreeEntry { mode, name, id });
335 }
336 Ok(entries)
337}
338
339#[derive(Debug, Clone, Copy, PartialEq, Eq)]
341pub enum ModeMapping {
342 Canonical(mkit_core::object::EntryMode),
344 Normalized(mkit_core::object::EntryMode),
347 Gitlink,
349 Unknown,
351}
352
353#[must_use]
355pub fn map_mode(mode: &[u8]) -> ModeMapping {
356 use mkit_core::object::EntryMode;
357 match mode {
358 b"100644" => ModeMapping::Canonical(EntryMode::Blob),
359 b"40000" => ModeMapping::Canonical(EntryMode::Tree),
360 b"120000" => ModeMapping::Canonical(EntryMode::Symlink),
361 b"100755" => ModeMapping::Canonical(EntryMode::Executable),
362 b"100664" | b"100640" | b"100600" => ModeMapping::Normalized(EntryMode::Blob),
363 b"040000" => ModeMapping::Normalized(EntryMode::Tree),
364 b"160000" => ModeMapping::Gitlink,
365 _ => ModeMapping::Unknown,
366 }
367}
368
369#[cfg(test)]
370mod tests {
371 use super::*;
372 use mkit_core::object::EntryMode;
373
374 #[test]
375 fn person_plain() {
376 let p = parse_person(b"Alice Example <alice@example.com> 1700000000 +0200").unwrap();
377 assert_eq!(p.identity, b"Alice Example <alice@example.com>");
378 assert_eq!(p.timestamp, 1_700_000_000);
379 assert_eq!(p.timezone.as_deref(), Some(b"+0200".as_slice()));
380 }
381
382 #[test]
383 fn person_malformations_preserved_verbatim() {
384 let p = parse_person(b"Weird Name<a@b> 5 +0000").unwrap();
387 assert_eq!(p.identity, b"Weird Name<a@b>");
388 let p = parse_person(b"A <b> C <d@e> 5 +0000").unwrap();
389 assert_eq!(p.identity, b"A <b> C <d@e>");
390 }
391
392 #[test]
393 fn person_negative_timestamp_parses() {
394 let p = parse_person(b"Old Soul <o@s> -86400 +0000").unwrap();
396 assert_eq!(p.timestamp, -86400);
397 }
398
399 #[test]
400 fn person_bracketless_rules() {
401 let p = parse_person(b"Just A Name 1700000000 +0000").unwrap();
402 assert_eq!(p.identity, b"Just A Name");
403 assert_eq!(p.timestamp, 1_700_000_000);
404 assert_eq!(
406 parse_person(b"no timestamp here"),
407 Err(GitParseError::PersonTimestamp)
408 );
409 }
410
411 #[test]
412 fn commit_with_gpgsig_continuation() {
413 let lines: &[&[u8]] = &[
416 b"tree 4b825dc642cb6eb9a060e54bf8d69288fbee4904",
417 b"parent ce013625030ba8dba906f756967f9e9ca394464a",
418 b"author A <a@x> 1700000000 +0000",
419 b"committer B <b@x> 1700000001 -0500",
420 b"gpgsig -----BEGIN SSH SIGNATURE-----",
421 b" U1NIU0lHbGluZTI=",
422 b" -----END SSH SIGNATURE-----",
423 b"",
424 b"msg body",
425 b"",
426 b"with blank line",
427 ];
428 let mut body = lines.join(&b"\n"[..]);
429 body.push(b'\n');
430 let c = parse_commit(&body).unwrap();
431 assert_eq!(c.parents.len(), 1);
432 assert!(c.has_gpgsig);
433 assert_eq!(c.author.identity, b"A <a@x>");
434 assert_eq!(c.committer.timestamp, 1_700_000_001);
435 assert_eq!(c.message, b"msg body\n\nwith blank line\n");
436 }
437
438 #[test]
439 fn commit_rejects_missing_or_duplicate_required() {
440 assert!(parse_commit(b"author A <a@x> 5 +0000\ncommitter A <a@x> 5 +0000\n\nx").is_err());
441 let dup = b"tree 4b825dc642cb6eb9a060e54bf8d69288fbee4904\n\
442tree 4b825dc642cb6eb9a060e54bf8d69288fbee4904\n\
443author A <a@x> 5 +0000\ncommitter A <a@x> 5 +0000\n\nx";
444 assert!(parse_commit(dup).is_err());
445 }
446
447 #[test]
448 fn commit_tolerates_unknown_and_encoding_headers() {
449 let body = b"tree 4b825dc642cb6eb9a060e54bf8d69288fbee4904\n\
450author A <a@x> 5 +0000\n\
451committer A <a@x> 5 +0000\n\
452encoding ISO-8859-1\n\
453x-custom whatever\n\
454\n\
455Ren\xe9\n";
456 let c = parse_commit(body).unwrap();
457 assert_eq!(c.message, b"Ren\xe9\n");
458 }
459
460 #[test]
461 fn tag_with_and_without_tagger() {
462 let body = b"object ce013625030ba8dba906f756967f9e9ca394464a\n\
463type commit\ntag v1.0.0\ntagger T <t@x> 5 +0000\n\nrelease\n";
464 let t = parse_tag(body).unwrap();
465 assert_eq!(t.name, b"v1.0.0");
466 assert!(t.tagger.is_some());
467 let body = b"object ce013625030ba8dba906f756967f9e9ca394464a\n\
469type commit\ntag old\n\nancient\n";
470 let t = parse_tag(body).unwrap();
471 assert!(t.tagger.is_none());
472 }
473
474 #[test]
475 fn tree_parses_and_modes_classify() {
476 let mut body = Vec::new();
477 for (mode, name) in [
478 (&b"100644"[..], &b"a.txt"[..]),
479 (b"040000", b"olddir"),
480 (b"160000", b"sub"),
481 ] {
482 body.extend_from_slice(mode);
483 body.push(b' ');
484 body.extend_from_slice(name);
485 body.push(0);
486 body.extend_from_slice(&[7u8; 20]);
487 }
488 let entries = parse_tree(&body).unwrap();
489 assert_eq!(entries.len(), 3);
490 assert_eq!(
491 map_mode(&entries[0].mode),
492 ModeMapping::Canonical(EntryMode::Blob)
493 );
494 assert_eq!(
495 map_mode(&entries[1].mode),
496 ModeMapping::Normalized(EntryMode::Tree)
497 );
498 assert_eq!(map_mode(&entries[2].mode), ModeMapping::Gitlink);
499 assert_eq!(map_mode(b"777777"), ModeMapping::Unknown);
500 }
501
502 #[test]
503 fn parsers_never_panic_on_junk() {
504 for junk in [
505 &b""[..],
506 b"\n\n",
507 b" leading continuation\n\nx",
508 b"tree short\n\nx",
509 b"\x00\xff\xfe",
510 ] {
511 let _ = parse_commit(junk);
512 let _ = parse_tag(junk);
513 let _ = parse_tree(junk);
514 let _ = parse_person(junk);
515 }
516 }
517}