1use std::collections::{HashMap, HashSet};
8use std::io::Write;
9
10use crate::diff::{diff_trees, DiffEntry, DiffStatus};
11use crate::error::{Error, Result};
12use crate::objects::{parse_commit, parse_tag, CommitData, ObjectId, ObjectKind};
13use crate::refs;
14use crate::repo::Repository;
15use crate::rev_list::{rev_list, OrderingMode, RevListOptions};
16
17use crate::index::{MODE_GITLINK, MODE_TREE};
18
19#[derive(Debug, Clone, Default)]
21pub struct FastExportOptions {
22 pub all: bool,
24 pub anonymize: bool,
26 pub anonymize_maps: Vec<String>,
28 pub use_done_feature: bool,
30 pub no_data: bool,
32}
33
34struct AnonState<'a> {
35 seeds: &'a HashMap<String, String>,
36 paths: HashMap<String, String>,
37 refs: HashMap<String, String>,
38 objs: HashMap<String, String>,
39 idents: HashMap<String, String>,
40 tag_msgs: HashMap<String, String>,
41 path_n: u32,
42 ref_n: u32,
43 oid_n: u32,
44 ident_n: u32,
45 subject_n: u32,
46 tag_msg_n: u32,
47 blob_n: u32,
48}
49
50impl<'a> AnonState<'a> {
51 fn new(seeds: &'a HashMap<String, String>) -> Self {
52 Self {
53 seeds,
54 paths: HashMap::new(),
55 refs: HashMap::new(),
56 objs: HashMap::new(),
57 idents: HashMap::new(),
58 tag_msgs: HashMap::new(),
59 path_n: 0,
60 ref_n: 0,
61 oid_n: 0,
62 ident_n: 0,
63 subject_n: 0,
64 tag_msg_n: 0,
65 blob_n: 0,
66 }
67 }
68
69 fn map_token(
70 map: &mut HashMap<String, String>,
71 seeds: &HashMap<String, String>,
72 key: &str,
73 gen: impl FnOnce() -> String,
74 ) -> String {
75 if let Some(v) = seeds.get(key) {
76 return v.clone();
77 }
78 if let Some(v) = map.get(key) {
79 return v.clone();
80 }
81 let v = gen();
82 map.insert(key.to_string(), v.clone());
83 v
84 }
85
86 fn path_seed_lookup(comp: &str, seeds: &HashMap<String, String>) -> Option<String> {
87 if let Some(v) = seeds.get(comp) {
88 return Some(v.clone());
89 }
90 if let Some(dot) = comp.find('.') {
91 let stem = &comp[..dot];
92 if let Some(v) = seeds.get(stem) {
93 let ext = &comp[dot..];
94 return Some(format!("{v}{ext}"));
95 }
96 }
97 None
98 }
99
100 fn anonymize_path_component(&mut self, comp: &str) -> String {
101 if let Some(mapped) = Self::path_seed_lookup(comp, self.seeds) {
102 return Self::map_token(&mut self.paths, &HashMap::new(), comp, || mapped);
103 }
104 Self::map_token(&mut self.paths, self.seeds, comp, || {
105 let n = self.path_n;
106 self.path_n += 1;
107 format!("path{n}")
108 })
109 }
110
111 fn anonymize_path(&mut self, path: &str) -> String {
112 if !path.is_empty() && self.seeds.contains_key(path) {
113 return self.seeds[path].clone();
114 }
115 let mut out = String::new();
116 for (i, part) in path.split('/').enumerate() {
117 if i > 0 {
118 out.push('/');
119 }
120 out.push_str(&self.anonymize_path_component(part));
121 }
122 out
123 }
124
125 fn anonymize_refname(&mut self, refname: &str) -> String {
126 const PREFIXES: &[&str] = &["refs/heads/", "refs/tags/", "refs/remotes/", "refs/"];
127 let mut rest = refname;
128 let mut prefix = "";
129 for p in PREFIXES {
130 if let Some(stripped) = refname.strip_prefix(p) {
131 prefix = p;
132 rest = stripped;
133 break;
134 }
135 }
136 let mut out = prefix.to_string();
137 if rest.is_empty() {
138 return out;
139 }
140 for (i, comp) in rest.split('/').enumerate() {
141 if i > 0 {
142 out.push('/');
143 }
144 out.push_str(&Self::map_token(&mut self.refs, self.seeds, comp, || {
145 let n = self.ref_n;
146 self.ref_n += 1;
147 format!("ref{n}")
148 }));
149 }
150 out
151 }
152
153 fn anonymize_oid_hex(&mut self, hex: &str) -> String {
154 Self::map_token(&mut self.objs, self.seeds, hex, || {
155 self.oid_n += 1;
156 format!("{:040x}", self.oid_n as u128)
157 })
158 }
159
160 fn anonymize_ident_line(&mut self, line: &str) -> String {
161 let Some(space) = line.find(' ') else {
163 return line.to_owned();
164 };
165 let header = &line[..space + 1];
166 let rest = line[space + 1..].trim_end();
167 let Some(gt) = rest.rfind('>') else {
168 return format!("{header}Malformed Ident <malformed@example.com> 0 -0000");
169 };
170 let name_email = &rest[..gt + 1];
171 let after = rest[gt + 1..].trim_start();
172 let key = name_email.to_string();
173 let ident = Self::map_token(&mut self.idents, self.seeds, &key, || {
174 let n = self.ident_n;
175 self.ident_n += 1;
176 format!("User {n} <user{n}@example.com>")
177 });
178 format!("{header}{ident} {after}")
179 }
180
181 fn anonymize_commit_message(&mut self) -> String {
182 let n = self.subject_n;
183 self.subject_n += 1;
184 format!("subject {n}\n\nbody\n")
185 }
186
187 fn anonymize_tag_message(&mut self, msg: &str) -> String {
188 Self::map_token(&mut self.tag_msgs, self.seeds, msg, || {
189 let n = self.tag_msg_n;
190 self.tag_msg_n += 1;
191 format!("tag message {n}")
192 })
193 }
194
195 fn anonymize_blob_payload(&mut self) -> Vec<u8> {
196 let n = self.blob_n;
197 self.blob_n += 1;
198 format!("anonymous blob {n}").into_bytes()
199 }
200}
201
202fn parse_anonymize_maps(entries: &[String]) -> Result<HashMap<String, String>> {
203 let mut out = HashMap::new();
204 for raw in entries {
205 let raw = raw.trim();
206 if raw.is_empty() {
207 return Err(Error::InvalidRef(
208 "--anonymize-map token cannot be empty".to_owned(),
209 ));
210 }
211 if let Some((k, v)) = raw.split_once(':') {
212 if k.is_empty() || v.is_empty() {
213 return Err(Error::InvalidRef(
214 "--anonymize-map token cannot be empty".to_owned(),
215 ));
216 }
217 out.insert(k.to_string(), v.to_string());
218 } else {
219 out.insert(raw.to_string(), raw.to_string());
220 }
221 }
222 Ok(out)
223}
224
225fn revision_source_tips(repo: &Repository) -> Result<Vec<(String, ObjectId)>> {
231 let mut tips = refs::list_refs(&repo.git_dir, "refs/heads/")?;
232 for (name, oid) in refs::list_refs(&repo.git_dir, "refs/tags/")? {
233 let tip = match peel_tag_to_commit_oid(repo, oid) {
234 Ok(c) => c,
235 Err(_) => continue,
236 };
237 tips.push((name, tip));
238 }
239 Ok(tips)
240}
241
242fn ref_source_for_commit(
243 repo: &Repository,
244 oid: ObjectId,
245 head_branches: &[(String, ObjectId)],
246) -> Result<String> {
247 let mut best: Option<(&str, (u8, usize))> = None;
248 for (name, tip) in head_branches {
249 if *tip != oid {
250 continue;
251 }
252 let score = (
253 if name.starts_with("refs/heads/") {
254 0
255 } else {
256 1
257 },
258 name.len(),
259 );
260 if best.is_none_or(|(_, s)| score < s) {
261 best = Some((name.as_str(), score));
262 }
263 }
264 if let Some((n, _)) = best {
265 return Ok(n.to_string());
266 }
267 let mut source: HashMap<ObjectId, String> = HashMap::new();
269 let mut queue: std::collections::VecDeque<ObjectId> = std::collections::VecDeque::new();
270 for (name, tip) in head_branches {
271 if source.insert(*tip, name.clone()).is_none() {
272 queue.push_back(*tip);
273 }
274 }
275 while let Some(c) = queue.pop_front() {
276 let pname = source.get(&c).cloned().unwrap_or_default();
277 let commit = load_commit(repo, c)?;
278 for p in commit.parents {
279 if source.contains_key(&p) {
280 continue;
281 }
282 source.insert(p, pname.clone());
283 queue.push_back(p);
284 }
285 }
286 source
287 .get(&oid)
288 .cloned()
289 .ok_or_else(|| Error::InvalidRef(format!("no ref source for commit {oid}")))
290}
291
292fn load_commit(repo: &Repository, oid: ObjectId) -> Result<CommitData> {
293 let obj = repo.odb.read(&oid)?;
294 if obj.kind != ObjectKind::Commit {
295 return Err(Error::CorruptObject(format!(
296 "expected commit, got {}",
297 obj.kind.as_str()
298 )));
299 }
300 parse_commit(&obj.data)
301}
302
303fn peel_tag_to_commit_oid(repo: &Repository, mut oid: ObjectId) -> Result<ObjectId> {
304 loop {
305 let obj = repo.odb.read(&oid)?;
306 match obj.kind {
307 ObjectKind::Commit => return Ok(oid),
308 ObjectKind::Tag => {
309 let t = parse_tag(&obj.data)?;
310 oid = t.object;
311 }
312 _ => {
313 return Err(Error::CorruptObject(
314 "tag does not point to a commit".to_owned(),
315 ));
316 }
317 }
318 }
319}
320
321fn depth_first_diff_sort(entries: &mut [DiffEntry]) {
322 entries.sort_by(|a, b| {
323 let pa = a.path();
324 let pb = b.path();
325 let la = pa.len();
326 let lb = pb.len();
327 let minlen = la.min(lb);
328 let cmp = pa.as_bytes()[..minlen].cmp(&pb.as_bytes()[..minlen]);
329 if cmp != std::cmp::Ordering::Equal {
330 return cmp;
331 }
332 let len_cmp = lb.cmp(&la);
333 if len_cmp != std::cmp::Ordering::Equal {
334 return len_cmp;
335 }
336 let ar = matches!(a.status, DiffStatus::Renamed);
337 let br = matches!(b.status, DiffStatus::Renamed);
338 ar.cmp(&br)
339 });
340}
341
342pub fn export_stream(
348 repo: &Repository,
349 mut writer: impl Write,
350 options: &FastExportOptions,
351) -> Result<()> {
352 if !options.all {
353 return Err(Error::InvalidRef(
354 "fast-export: only --all is implemented".to_owned(),
355 ));
356 }
357
358 let seeds = if options.anonymize {
359 parse_anonymize_maps(&options.anonymize_maps)?
360 } else {
361 HashMap::new()
362 };
363
364 if !options.anonymize && !options.anonymize_maps.is_empty() {
365 return Err(Error::InvalidRef(
366 "the option '--anonymize-map' requires '--anonymize'".to_owned(),
367 ));
368 }
369
370 let head_branches = revision_source_tips(repo)?;
371
372 let opts = RevListOptions {
373 all_refs: true,
374 ordering: OrderingMode::Topo,
375 reverse: true,
376 ..RevListOptions::default()
377 };
378 let rev_result = rev_list(repo, &[] as &[String], &[] as &[String], &opts)?;
379 let commits: Vec<ObjectId> = rev_result.commits;
380
381 let commit_set: HashSet<ObjectId> = commits.iter().copied().collect();
382
383 let mut marks: HashMap<ObjectId, u32> = HashMap::new();
384 let mut next_mark: u32 = 0;
385
386 let mut anon = if options.anonymize {
387 Some(AnonState::new(&seeds))
388 } else {
389 None
390 };
391
392 if options.use_done_feature {
393 writeln!(writer, "feature done")?;
394 }
395
396 for oid in &commits {
397 let raw_commit = load_commit(repo, *oid)?;
398 let parent_tree = if let Some(p) = raw_commit.parents.first() {
399 let pc = load_commit(repo, *p)?;
400 Some(pc.tree)
401 } else {
402 None
403 };
404 let diffs = diff_trees(&repo.odb, parent_tree.as_ref(), Some(&raw_commit.tree), "")?;
405 let mut diff_vec: Vec<DiffEntry> = diffs
406 .into_iter()
407 .filter(|e| {
408 matches!(
409 e.status,
410 DiffStatus::Added
411 | DiffStatus::Deleted
412 | DiffStatus::Modified
413 | DiffStatus::Renamed
414 | DiffStatus::Copied
415 | DiffStatus::TypeChanged
416 )
417 })
418 .collect();
419 depth_first_diff_sort(&mut diff_vec);
420
421 if !options.no_data {
422 for e in &diff_vec {
423 if e.status == DiffStatus::Deleted {
424 continue;
425 }
426 let mode = u32::from_str_radix(e.new_mode.trim(), 8).unwrap_or(0);
427 if mode == MODE_TREE || mode == MODE_GITLINK {
428 continue;
429 }
430 let blob_oid = e.new_oid;
431 if marks.contains_key(&blob_oid) {
432 continue;
433 }
434 next_mark += 1;
435 marks.insert(blob_oid, next_mark);
436 writeln!(writer, "blob")?;
437 writeln!(writer, "mark :{next_mark}")?;
438 let payload = if let Some(a) = anon.as_mut() {
439 a.anonymize_blob_payload()
440 } else {
441 let o = repo.odb.read(&blob_oid)?;
442 if o.kind != ObjectKind::Blob {
443 return Err(Error::CorruptObject("expected blob".to_owned()));
444 }
445 o.data
446 };
447 writeln!(writer, "data {}", payload.len())?;
448 writer.write_all(&payload)?;
449 writeln!(writer)?;
450 }
451 }
452
453 let refname = ref_source_for_commit(repo, *oid, &head_branches)?;
454 let export_ref = if let Some(a) = anon.as_mut() {
455 a.anonymize_refname(&refname)
456 } else {
457 refname.clone()
458 };
459
460 if raw_commit.parents.is_empty() {
461 writeln!(writer, "reset {export_ref}")?;
462 }
463
464 next_mark += 1;
465 let commit_mark = next_mark;
466 marks.insert(*oid, commit_mark);
467
468 writeln!(writer, "commit {export_ref}")?;
469 writeln!(writer, "mark :{commit_mark}")?;
470
471 let author_line = if let Some(a) = anon.as_mut() {
472 a.anonymize_ident_line(&format!("author {}", raw_commit.author))
473 } else {
474 format!("author {}", raw_commit.author)
475 };
476 let committer_line = if let Some(a) = anon.as_mut() {
477 a.anonymize_ident_line(&format!("committer {}", raw_commit.committer))
478 } else {
479 format!("committer {}", raw_commit.committer)
480 };
481 writeln!(writer, "{author_line}")?;
482 writeln!(writer, "{committer_line}")?;
483
484 let message = if let Some(a) = anon.as_mut() {
485 a.anonymize_commit_message()
486 } else {
487 raw_commit.message.clone()
488 };
489 let msg_bytes = message.as_bytes();
490 writeln!(writer, "data {}", msg_bytes.len())?;
491 writer.write_all(msg_bytes)?;
492 writeln!(writer)?;
493
494 for (i, p) in raw_commit.parents.iter().enumerate() {
495 let label = if i == 0 { "from" } else { "merge" };
496 write!(writer, "{label} ")?;
497 if let Some(&m) = marks.get(p) {
498 writeln!(writer, ":{m}")?;
499 } else {
500 let hex = p.to_hex();
501 let out = if let Some(a) = anon.as_mut() {
502 a.anonymize_oid_hex(&hex)
503 } else {
504 hex
505 };
506 writeln!(writer, "{out}")?;
507 }
508 }
509
510 let mut changed: HashSet<String> = HashSet::new();
511 for e in &diff_vec {
512 match e.status {
513 DiffStatus::Deleted => {
514 let path = if let Some(a) = anon.as_mut() {
515 a.anonymize_path(e.path())
516 } else {
517 e.path().to_string()
518 };
519 writeln!(writer, "D {path}")?;
520 changed.insert(e.path().to_string());
521 }
522 DiffStatus::Renamed | DiffStatus::Copied => {
523 let old_p = e.old_path.as_deref().unwrap_or("");
524 let skip_modify = e.old_oid == e.new_oid
525 && e.old_mode == e.new_mode
526 && !changed.contains(old_p);
527 if !changed.contains(old_p) {
528 let op = if let Some(a) = anon.as_mut() {
529 a.anonymize_path(old_p)
530 } else {
531 old_p.to_string()
532 };
533 let np = if let Some(a) = anon.as_mut() {
534 a.anonymize_path(e.path())
535 } else {
536 e.path().to_string()
537 };
538 writeln!(writer, "{} {op} {np}", e.status.letter())?;
539 }
540 if !skip_modify {
541 fallthrough_modify(
542 repo,
543 &mut writer,
544 e,
545 &marks,
546 anon.as_mut(),
547 options.anonymize,
548 options.no_data,
549 )?;
550 }
551 changed.insert(old_p.to_string());
552 changed.insert(e.path().to_string());
553 }
554 DiffStatus::Added | DiffStatus::Modified | DiffStatus::TypeChanged => {
555 fallthrough_modify(
556 repo,
557 &mut writer,
558 e,
559 &marks,
560 anon.as_mut(),
561 options.anonymize,
562 options.no_data,
563 )?;
564 changed.insert(e.path().to_string());
565 }
566 _ => {}
567 }
568 }
569 writeln!(writer)?;
570 }
571
572 let tag_refs = refs::list_refs(&repo.git_dir, "refs/tags/")?;
574 for (full_name, tag_oid) in tag_refs {
575 let tag_obj = repo.odb.read(&tag_oid)?;
576 if tag_obj.kind != ObjectKind::Tag {
577 continue;
578 }
579 let tag_data = parse_tag(&tag_obj.data)?;
580 let Ok(target_commit) = peel_tag_to_commit_oid(repo, tag_data.object) else {
581 continue;
582 };
583 if !commit_set.contains(&target_commit) {
584 continue;
585 }
586 let Some(&tip_mark) = marks.get(&target_commit) else {
587 continue;
588 };
589
590 let export_name = if let Some(a) = anon.as_mut() {
591 a.anonymize_refname(&full_name)
592 } else {
593 full_name.clone()
594 };
595 let short_name = export_name
596 .strip_prefix("refs/tags/")
597 .unwrap_or(&export_name)
598 .to_string();
599
600 let tagger_line = if let Some(t) = tag_data.tagger.as_deref() {
601 if let Some(a) = anon.as_mut() {
602 a.anonymize_ident_line(&format!("tagger {t}"))
603 } else {
604 format!("tagger {t}")
605 }
606 } else {
607 String::new()
608 };
609
610 let msg = if options.anonymize {
611 anon.as_mut()
612 .map(|a| a.anonymize_tag_message(&tag_data.message))
613 .unwrap_or_default()
614 } else {
615 tag_data.message.clone()
616 };
617
618 writeln!(writer, "tag {short_name}")?;
619 writeln!(writer, "from :{tip_mark}")?;
620 if !tagger_line.is_empty() {
621 writeln!(writer, "{tagger_line}")?;
622 }
623 let msg_bytes = msg.as_bytes();
624 writeln!(writer, "data {}", msg_bytes.len())?;
625 writer.write_all(msg_bytes)?;
626 writeln!(writer)?;
627 }
628
629 if options.use_done_feature {
630 writeln!(writer, "done")?;
631 }
632
633 Ok(())
634}
635
636fn fallthrough_modify(
637 _repo: &Repository,
638 writer: &mut impl Write,
639 e: &DiffEntry,
640 marks: &HashMap<ObjectId, u32>,
641 mut anon: Option<&mut AnonState>,
642 _anonymize: bool,
643 no_data: bool,
644) -> Result<()> {
645 let mode = u32::from_str_radix(e.new_mode.trim(), 8).unwrap_or(0);
646 let path = if let Some(a) = anon.as_mut() {
647 a.anonymize_path(e.path())
648 } else {
649 e.path().to_string()
650 };
651 if mode == MODE_GITLINK {
652 let hex = e.new_oid.to_hex();
653 let oid_out = if let Some(a) = anon {
654 a.anonymize_oid_hex(&hex)
655 } else {
656 hex
657 };
658 writeln!(writer, "M {:06o} {oid_out} {path}", mode)?;
659 return Ok(());
660 }
661 if no_data {
662 let hex = e.new_oid.to_hex();
663 let oid_out = if let Some(a) = anon.as_mut() {
664 a.anonymize_oid_hex(&hex)
665 } else {
666 hex
667 };
668 writeln!(writer, "M {:06o} {oid_out} {path}", mode)?;
669 return Ok(());
670 }
671 let Some(&bm) = marks.get(&e.new_oid) else {
672 return Err(Error::IndexError(format!(
673 "fast-export: missing mark for blob {}",
674 e.new_oid
675 )));
676 };
677 writeln!(writer, "M {:06o} :{bm} {path}", mode)?;
678 Ok(())
679}