1use std::collections::{HashMap, HashSet};
8use std::io::Write;
9
10use crate::diff::{diff_trees, DiffEntry, DiffStatus};
11use crate::error::{Error, Result};
12use crate::objects::{parse_commit, parse_tag, CommitData, ObjectId, ObjectKind};
13use crate::pathspec::matches_pathspec_list;
14use crate::refs;
15use crate::repo::Repository;
16use crate::rev_list::{rev_list, OrderingMode, RevListOptions};
17
18use crate::index::{MODE_GITLINK, MODE_TREE};
19
20#[derive(Debug, Clone, Default)]
22pub struct FastExportOptions {
23 pub all: bool,
25 pub anonymize: bool,
27 pub anonymize_maps: Vec<String>,
29 pub use_done_feature: bool,
31 pub no_data: bool,
33 pub revisions: Vec<String>,
35 pub paths: Vec<String>,
37}
38
39struct AnonState<'a> {
40 seeds: &'a HashMap<String, String>,
41 paths: HashMap<String, String>,
42 refs: HashMap<String, String>,
43 objs: HashMap<String, String>,
44 idents: HashMap<String, String>,
45 tag_msgs: HashMap<String, String>,
46 path_n: u32,
47 ref_n: u32,
48 oid_n: u32,
49 ident_n: u32,
50 subject_n: u32,
51 tag_msg_n: u32,
52 blob_n: u32,
53}
54
55impl<'a> AnonState<'a> {
56 fn new(seeds: &'a HashMap<String, String>) -> Self {
57 Self {
58 seeds,
59 paths: HashMap::new(),
60 refs: HashMap::new(),
61 objs: HashMap::new(),
62 idents: HashMap::new(),
63 tag_msgs: HashMap::new(),
64 path_n: 0,
65 ref_n: 0,
66 oid_n: 0,
67 ident_n: 0,
68 subject_n: 0,
69 tag_msg_n: 0,
70 blob_n: 0,
71 }
72 }
73
74 fn map_token(
75 map: &mut HashMap<String, String>,
76 seeds: &HashMap<String, String>,
77 key: &str,
78 gen: impl FnOnce() -> String,
79 ) -> String {
80 if let Some(v) = seeds.get(key) {
81 return v.clone();
82 }
83 if let Some(v) = map.get(key) {
84 return v.clone();
85 }
86 let v = gen();
87 map.insert(key.to_string(), v.clone());
88 v
89 }
90
91 fn path_seed_lookup(comp: &str, seeds: &HashMap<String, String>) -> Option<String> {
92 if let Some(v) = seeds.get(comp) {
93 return Some(v.clone());
94 }
95 if let Some(dot) = comp.find('.') {
96 let stem = &comp[..dot];
97 if let Some(v) = seeds.get(stem) {
98 let ext = &comp[dot..];
99 return Some(format!("{v}{ext}"));
100 }
101 }
102 None
103 }
104
105 fn anonymize_path_component(&mut self, comp: &str) -> String {
106 if let Some(mapped) = Self::path_seed_lookup(comp, self.seeds) {
107 return Self::map_token(&mut self.paths, &HashMap::new(), comp, || mapped);
108 }
109 Self::map_token(&mut self.paths, self.seeds, comp, || {
110 let n = self.path_n;
111 self.path_n += 1;
112 format!("path{n}")
113 })
114 }
115
116 fn anonymize_path(&mut self, path: &str) -> String {
117 if !path.is_empty() && self.seeds.contains_key(path) {
118 return self.seeds[path].clone();
119 }
120 let mut out = String::new();
121 for (i, part) in path.split('/').enumerate() {
122 if i > 0 {
123 out.push('/');
124 }
125 out.push_str(&self.anonymize_path_component(part));
126 }
127 out
128 }
129
130 fn anonymize_refname(&mut self, refname: &str) -> String {
131 const PREFIXES: &[&str] = &["refs/heads/", "refs/tags/", "refs/remotes/", "refs/"];
132 let mut rest = refname;
133 let mut prefix = "";
134 for p in PREFIXES {
135 if let Some(stripped) = refname.strip_prefix(p) {
136 prefix = p;
137 rest = stripped;
138 break;
139 }
140 }
141 let mut out = prefix.to_string();
142 if rest.is_empty() {
143 return out;
144 }
145 for (i, comp) in rest.split('/').enumerate() {
146 if i > 0 {
147 out.push('/');
148 }
149 out.push_str(&Self::map_token(&mut self.refs, self.seeds, comp, || {
150 let n = self.ref_n;
151 self.ref_n += 1;
152 format!("ref{n}")
153 }));
154 }
155 out
156 }
157
158 fn anonymize_oid_hex(&mut self, hex: &str) -> String {
159 Self::map_token(&mut self.objs, self.seeds, hex, || {
160 self.oid_n += 1;
161 format!("{:040x}", self.oid_n as u128)
162 })
163 }
164
165 fn anonymize_ident_line(&mut self, line: &str) -> String {
166 let Some(space) = line.find(' ') else {
168 return line.to_owned();
169 };
170 let header = &line[..space + 1];
171 let rest = line[space + 1..].trim_end();
172 let Some(gt) = rest.rfind('>') else {
173 return format!("{header}Malformed Ident <malformed@example.com> 0 -0000");
174 };
175 let name_email = &rest[..gt + 1];
176 let after = rest[gt + 1..].trim_start();
177 let key = name_email.to_string();
178 let ident = Self::map_token(&mut self.idents, self.seeds, &key, || {
179 let n = self.ident_n;
180 self.ident_n += 1;
181 format!("User {n} <user{n}@example.com>")
182 });
183 format!("{header}{ident} {after}")
184 }
185
186 fn anonymize_commit_message(&mut self) -> String {
187 let n = self.subject_n;
188 self.subject_n += 1;
189 format!("subject {n}\n\nbody\n")
190 }
191
192 fn anonymize_tag_message(&mut self, msg: &str) -> String {
193 Self::map_token(&mut self.tag_msgs, self.seeds, msg, || {
194 let n = self.tag_msg_n;
195 self.tag_msg_n += 1;
196 format!("tag message {n}")
197 })
198 }
199
200 fn anonymize_blob_payload(&mut self) -> Vec<u8> {
201 let n = self.blob_n;
202 self.blob_n += 1;
203 format!("anonymous blob {n}").into_bytes()
204 }
205}
206
207fn parse_anonymize_maps(entries: &[String]) -> Result<HashMap<String, String>> {
208 let mut out = HashMap::new();
209 for raw in entries {
210 let raw = raw.trim();
211 if raw.is_empty() {
212 return Err(Error::InvalidRef(
213 "--anonymize-map token cannot be empty".to_owned(),
214 ));
215 }
216 if let Some((k, v)) = raw.split_once(':') {
217 if k.is_empty() || v.is_empty() {
218 return Err(Error::InvalidRef(
219 "--anonymize-map token cannot be empty".to_owned(),
220 ));
221 }
222 out.insert(k.to_string(), v.to_string());
223 } else {
224 out.insert(raw.to_string(), raw.to_string());
225 }
226 }
227 Ok(out)
228}
229
230fn revision_source_tips(repo: &Repository) -> Result<Vec<(String, ObjectId)>> {
236 let mut tips = refs::list_refs(&repo.git_dir, "refs/heads/")?;
237 for (name, oid) in refs::list_refs(&repo.git_dir, "refs/tags/")? {
238 let tip = match peel_tag_to_commit_oid(repo, oid) {
239 Ok(c) => c,
240 Err(_) => continue,
241 };
242 tips.push((name, tip));
243 }
244 Ok(tips)
245}
246
247fn ref_source_for_commit(
248 repo: &Repository,
249 oid: ObjectId,
250 head_branches: &[(String, ObjectId)],
251) -> Result<String> {
252 let mut best: Option<(&str, (u8, usize))> = None;
253 for (name, tip) in head_branches {
254 if *tip != oid {
255 continue;
256 }
257 let score = (
258 if name.starts_with("refs/heads/") {
259 0
260 } else {
261 1
262 },
263 name.len(),
264 );
265 if best.is_none_or(|(_, s)| score < s) {
266 best = Some((name.as_str(), score));
267 }
268 }
269 if let Some((n, _)) = best {
270 return Ok(n.to_string());
271 }
272 let mut source: HashMap<ObjectId, String> = HashMap::new();
274 let mut queue: std::collections::VecDeque<ObjectId> = std::collections::VecDeque::new();
275 for (name, tip) in head_branches {
276 if source.insert(*tip, name.clone()).is_none() {
277 queue.push_back(*tip);
278 }
279 }
280 while let Some(c) = queue.pop_front() {
281 let pname = source.get(&c).cloned().unwrap_or_default();
282 let commit = load_commit(repo, c)?;
283 for p in commit.parents {
284 if source.contains_key(&p) {
285 continue;
286 }
287 source.insert(p, pname.clone());
288 queue.push_back(p);
289 }
290 }
291 source
292 .get(&oid)
293 .cloned()
294 .ok_or_else(|| Error::InvalidRef(format!("no ref source for commit {oid}")))
295}
296
297fn load_commit(repo: &Repository, oid: ObjectId) -> Result<CommitData> {
298 let obj = repo.odb.read(&oid)?;
299 if obj.kind != ObjectKind::Commit {
300 return Err(Error::CorruptObject(format!(
301 "expected commit, got {}",
302 obj.kind.as_str()
303 )));
304 }
305 parse_commit(&obj.data)
306}
307
308fn peel_tag_to_commit_oid(repo: &Repository, mut oid: ObjectId) -> Result<ObjectId> {
309 loop {
310 let obj = repo.odb.read(&oid)?;
311 match obj.kind {
312 ObjectKind::Commit => return Ok(oid),
313 ObjectKind::Tag => {
314 let t = parse_tag(&obj.data)?;
315 oid = t.object;
316 }
317 _ => {
318 return Err(Error::CorruptObject(
319 "tag does not point to a commit".to_owned(),
320 ));
321 }
322 }
323 }
324}
325
326fn depth_first_diff_sort(entries: &mut [DiffEntry]) {
327 entries.sort_by(|a, b| {
328 let pa = a.path();
329 let pb = b.path();
330 let la = pa.len();
331 let lb = pb.len();
332 let minlen = la.min(lb);
333 let cmp = pa.as_bytes()[..minlen].cmp(&pb.as_bytes()[..minlen]);
334 if cmp != std::cmp::Ordering::Equal {
335 return cmp;
336 }
337 let len_cmp = lb.cmp(&la);
338 if len_cmp != std::cmp::Ordering::Equal {
339 return len_cmp;
340 }
341 let ar = matches!(a.status, DiffStatus::Renamed);
342 let br = matches!(b.status, DiffStatus::Renamed);
343 ar.cmp(&br)
344 });
345}
346
347fn diff_entry_matches_paths(entry: &DiffEntry, paths: &[String]) -> bool {
348 if paths.is_empty() {
349 return true;
350 }
351 matches_pathspec_list(entry.path(), paths)
352 || entry
353 .old_path
354 .as_deref()
355 .is_some_and(|path| matches_pathspec_list(path, paths))
356}
357
358fn export_ref_for_non_all(repo: &Repository) -> Result<String> {
359 refs::read_head(&repo.git_dir)?.ok_or_else(|| {
360 Error::InvalidRef("fast-export: detached HEAD export not implemented".to_owned())
361 })
362}
363
364pub fn export_stream(
370 repo: &Repository,
371 mut writer: impl Write,
372 options: &FastExportOptions,
373) -> Result<()> {
374 let seeds = if options.anonymize {
375 parse_anonymize_maps(&options.anonymize_maps)?
376 } else {
377 HashMap::new()
378 };
379
380 if !options.anonymize && !options.anonymize_maps.is_empty() {
381 return Err(Error::InvalidRef(
382 "the option '--anonymize-map' requires '--anonymize'".to_owned(),
383 ));
384 }
385
386 let head_branches = revision_source_tips(repo)?;
387 let non_all_export_ref = if options.all {
388 None
389 } else {
390 Some(export_ref_for_non_all(repo)?)
391 };
392
393 let opts = RevListOptions {
394 all_refs: options.all,
395 ordering: OrderingMode::Topo,
396 reverse: true,
397 paths: options.paths.clone(),
398 ..RevListOptions::default()
399 };
400 let positive_specs = if options.all {
401 &[] as &[String]
402 } else {
403 options.revisions.as_slice()
404 };
405 let rev_result = rev_list(repo, positive_specs, &[] as &[String], &opts)?;
406 let commits: Vec<ObjectId> = rev_result.commits;
407
408 let commit_set: HashSet<ObjectId> = commits.iter().copied().collect();
409
410 let mut marks: HashMap<ObjectId, u32> = HashMap::new();
411 let mut next_mark: u32 = 0;
412
413 let mut anon = if options.anonymize {
414 Some(AnonState::new(&seeds))
415 } else {
416 None
417 };
418
419 if options.use_done_feature {
420 writeln!(writer, "feature done")?;
421 }
422
423 for oid in &commits {
424 let raw_commit = load_commit(repo, *oid)?;
425 let parent_tree = if let Some(p) = raw_commit.parents.first() {
426 let pc = load_commit(repo, *p)?;
427 Some(pc.tree)
428 } else {
429 None
430 };
431 let diffs = diff_trees(&repo.odb, parent_tree.as_ref(), Some(&raw_commit.tree), "")?;
432 let mut diff_vec: Vec<DiffEntry> = diffs
433 .into_iter()
434 .filter(|e| {
435 matches!(
436 e.status,
437 DiffStatus::Added
438 | DiffStatus::Deleted
439 | DiffStatus::Modified
440 | DiffStatus::Renamed
441 | DiffStatus::Copied
442 | DiffStatus::TypeChanged
443 ) && diff_entry_matches_paths(e, &options.paths)
444 })
445 .collect();
446 depth_first_diff_sort(&mut diff_vec);
447
448 if !options.no_data {
449 for e in &diff_vec {
450 if e.status == DiffStatus::Deleted {
451 continue;
452 }
453 let mode = u32::from_str_radix(e.new_mode.trim(), 8).unwrap_or(0);
454 if mode == MODE_TREE || mode == MODE_GITLINK {
455 continue;
456 }
457 let blob_oid = e.new_oid;
458 if marks.contains_key(&blob_oid) {
459 continue;
460 }
461 next_mark += 1;
462 marks.insert(blob_oid, next_mark);
463 writeln!(writer, "blob")?;
464 writeln!(writer, "mark :{next_mark}")?;
465 let payload = if let Some(a) = anon.as_mut() {
466 a.anonymize_blob_payload()
467 } else {
468 let o = repo.odb.read(&blob_oid)?;
469 if o.kind != ObjectKind::Blob {
470 return Err(Error::CorruptObject("expected blob".to_owned()));
471 }
472 o.data
473 };
474 writeln!(writer, "data {}", payload.len())?;
475 writer.write_all(&payload)?;
476 writeln!(writer)?;
477 }
478 }
479
480 let refname = if let Some(export_ref) = non_all_export_ref.as_deref() {
481 export_ref.to_owned()
482 } else {
483 ref_source_for_commit(repo, *oid, &head_branches)?
484 };
485 let export_ref = if let Some(a) = anon.as_mut() {
486 a.anonymize_refname(&refname)
487 } else {
488 refname.clone()
489 };
490
491 if raw_commit.parents.is_empty() {
492 writeln!(writer, "reset {export_ref}")?;
493 }
494
495 next_mark += 1;
496 let commit_mark = next_mark;
497 marks.insert(*oid, commit_mark);
498
499 writeln!(writer, "commit {export_ref}")?;
500 writeln!(writer, "mark :{commit_mark}")?;
501
502 let author_line = if let Some(a) = anon.as_mut() {
503 a.anonymize_ident_line(&format!("author {}", raw_commit.author))
504 } else {
505 format!("author {}", raw_commit.author)
506 };
507 let committer_line = if let Some(a) = anon.as_mut() {
508 a.anonymize_ident_line(&format!("committer {}", raw_commit.committer))
509 } else {
510 format!("committer {}", raw_commit.committer)
511 };
512 writeln!(writer, "{author_line}")?;
513 writeln!(writer, "{committer_line}")?;
514
515 let message = if let Some(a) = anon.as_mut() {
516 a.anonymize_commit_message()
517 } else {
518 raw_commit.message.clone()
519 };
520 let msg_bytes = message.as_bytes();
521 writeln!(writer, "data {}", msg_bytes.len())?;
522 writer.write_all(msg_bytes)?;
523 writeln!(writer)?;
524
525 let exported_parents = raw_commit
526 .parents
527 .iter()
528 .filter_map(|p| marks.get(p).copied())
529 .collect::<Vec<_>>();
530 for (i, m) in exported_parents.iter().enumerate() {
531 let label = if i == 0 { "from" } else { "merge" };
532 write!(writer, "{label} ")?;
533 writeln!(writer, ":{m}")?;
534 }
535 if !options.paths.is_empty() && exported_parents.is_empty() {
536 writeln!(writer, "deleteall")?;
537 }
538
539 let mut changed: HashSet<String> = HashSet::new();
540 for e in &diff_vec {
541 match e.status {
542 DiffStatus::Deleted => {
543 let path = if let Some(a) = anon.as_mut() {
544 a.anonymize_path(e.path())
545 } else {
546 e.path().to_string()
547 };
548 writeln!(writer, "D {path}")?;
549 changed.insert(e.path().to_string());
550 }
551 DiffStatus::Renamed | DiffStatus::Copied => {
552 let old_p = e.old_path.as_deref().unwrap_or("");
553 let skip_modify = e.old_oid == e.new_oid
554 && e.old_mode == e.new_mode
555 && !changed.contains(old_p);
556 if !changed.contains(old_p) {
557 let op = if let Some(a) = anon.as_mut() {
558 a.anonymize_path(old_p)
559 } else {
560 old_p.to_string()
561 };
562 let np = if let Some(a) = anon.as_mut() {
563 a.anonymize_path(e.path())
564 } else {
565 e.path().to_string()
566 };
567 writeln!(writer, "{} {op} {np}", e.status.letter())?;
568 }
569 if !skip_modify {
570 fallthrough_modify(
571 repo,
572 &mut writer,
573 e,
574 &marks,
575 anon.as_mut(),
576 options.anonymize,
577 options.no_data,
578 )?;
579 }
580 changed.insert(old_p.to_string());
581 changed.insert(e.path().to_string());
582 }
583 DiffStatus::Added | DiffStatus::Modified | DiffStatus::TypeChanged => {
584 fallthrough_modify(
585 repo,
586 &mut writer,
587 e,
588 &marks,
589 anon.as_mut(),
590 options.anonymize,
591 options.no_data,
592 )?;
593 changed.insert(e.path().to_string());
594 }
595 _ => {}
596 }
597 }
598 writeln!(writer)?;
599 }
600
601 let tag_refs = refs::list_refs(&repo.git_dir, "refs/tags/")?;
603 for (full_name, tag_oid) in tag_refs {
604 let tag_obj = repo.odb.read(&tag_oid)?;
605 if tag_obj.kind != ObjectKind::Tag {
606 continue;
607 }
608 let tag_data = parse_tag(&tag_obj.data)?;
609 let Ok(target_commit) = peel_tag_to_commit_oid(repo, tag_data.object) else {
610 continue;
611 };
612 if !commit_set.contains(&target_commit) {
613 continue;
614 }
615 let Some(&tip_mark) = marks.get(&target_commit) else {
616 continue;
617 };
618
619 let export_name = if let Some(a) = anon.as_mut() {
620 a.anonymize_refname(&full_name)
621 } else {
622 full_name.clone()
623 };
624 let short_name = export_name
625 .strip_prefix("refs/tags/")
626 .unwrap_or(&export_name)
627 .to_string();
628
629 let tagger_line = if let Some(t) = tag_data.tagger.as_deref() {
630 if let Some(a) = anon.as_mut() {
631 a.anonymize_ident_line(&format!("tagger {t}"))
632 } else {
633 format!("tagger {t}")
634 }
635 } else {
636 String::new()
637 };
638
639 let msg = if options.anonymize {
640 anon.as_mut()
641 .map(|a| a.anonymize_tag_message(&tag_data.message))
642 .unwrap_or_default()
643 } else {
644 tag_data.message.clone()
645 };
646
647 writeln!(writer, "tag {short_name}")?;
648 writeln!(writer, "from :{tip_mark}")?;
649 if !tagger_line.is_empty() {
650 writeln!(writer, "{tagger_line}")?;
651 }
652 let msg_bytes = msg.as_bytes();
653 writeln!(writer, "data {}", msg_bytes.len())?;
654 writer.write_all(msg_bytes)?;
655 writeln!(writer)?;
656 }
657
658 if options.use_done_feature {
659 writeln!(writer, "done")?;
660 }
661
662 Ok(())
663}
664
665fn fallthrough_modify(
666 _repo: &Repository,
667 writer: &mut impl Write,
668 e: &DiffEntry,
669 marks: &HashMap<ObjectId, u32>,
670 mut anon: Option<&mut AnonState>,
671 _anonymize: bool,
672 no_data: bool,
673) -> Result<()> {
674 let mode = u32::from_str_radix(e.new_mode.trim(), 8).unwrap_or(0);
675 let path = if let Some(a) = anon.as_mut() {
676 a.anonymize_path(e.path())
677 } else {
678 e.path().to_string()
679 };
680 if mode == MODE_GITLINK {
681 let hex = e.new_oid.to_hex();
682 let oid_out = if let Some(a) = anon {
683 a.anonymize_oid_hex(&hex)
684 } else {
685 hex
686 };
687 writeln!(writer, "M {:06o} {oid_out} {path}", mode)?;
688 return Ok(());
689 }
690 if no_data {
691 let hex = e.new_oid.to_hex();
692 let oid_out = if let Some(a) = anon.as_mut() {
693 a.anonymize_oid_hex(&hex)
694 } else {
695 hex
696 };
697 writeln!(writer, "M {:06o} {oid_out} {path}", mode)?;
698 return Ok(());
699 }
700 let Some(&bm) = marks.get(&e.new_oid) else {
701 return Err(Error::IndexError(format!(
702 "fast-export: missing mark for blob {}",
703 e.new_oid
704 )));
705 };
706 writeln!(writer, "M {:06o} :{bm} {path}", mode)?;
707 Ok(())
708}