Skip to main content

grit_lib/
fast_export.rs

1//! [`git fast-export`](https://git-scm.com/docs/git-fast-export) stream generation.
2//!
3//! Supports the subset needed by upstream tests: `--all`, `--anonymize`,
4//! `--anonymize-map`, topological commit order with `reverse` (oldest first),
5//! blob/commit marks, per-commit tree diffs, and annotated tags on commits.
6
7use std::collections::{HashMap, HashSet};
8use std::io::Write;
9
10use crate::diff::{diff_trees, DiffEntry, DiffStatus};
11use crate::error::{Error, Result};
12use crate::objects::{parse_commit, parse_tag, CommitData, ObjectId, ObjectKind};
13use crate::pathspec::matches_pathspec_list;
14use crate::refs;
15use crate::repo::Repository;
16use crate::rev_list::{rev_list, OrderingMode, RevListOptions};
17
18use crate::index::{MODE_GITLINK, MODE_TREE};
19
20/// Options for [`export_stream`].
21#[derive(Debug, Clone, Default)]
22pub struct FastExportOptions {
23    /// Export all heads under `refs/heads/` (and reachable history).
24    pub all: bool,
25    /// Replace paths, idents, messages, and non-mark OIDs with stable placeholders.
26    pub anonymize: bool,
27    /// `from:to` or bare `token` mappings (last duplicate key wins, matching Git).
28    pub anonymize_maps: Vec<String>,
29    /// Emit `feature done` / trailing `done` (matches `git fast-import` when the feature is negotiated).
30    pub use_done_feature: bool,
31    /// Omit `blob` commands and emit `M` lines with full object ids (matches `git fast-export --no-data`).
32    pub no_data: bool,
33    /// Positive revision arguments to export when `all` is false.
34    pub revisions: Vec<String>,
35    /// Pathspecs limiting exported commits and file commands.
36    pub paths: Vec<String>,
37}
38
39struct AnonState<'a> {
40    seeds: &'a HashMap<String, String>,
41    paths: HashMap<String, String>,
42    refs: HashMap<String, String>,
43    objs: HashMap<String, String>,
44    idents: HashMap<String, String>,
45    tag_msgs: HashMap<String, String>,
46    path_n: u32,
47    ref_n: u32,
48    oid_n: u32,
49    ident_n: u32,
50    subject_n: u32,
51    tag_msg_n: u32,
52    blob_n: u32,
53}
54
55impl<'a> AnonState<'a> {
56    fn new(seeds: &'a HashMap<String, String>) -> Self {
57        Self {
58            seeds,
59            paths: HashMap::new(),
60            refs: HashMap::new(),
61            objs: HashMap::new(),
62            idents: HashMap::new(),
63            tag_msgs: HashMap::new(),
64            path_n: 0,
65            ref_n: 0,
66            oid_n: 0,
67            ident_n: 0,
68            subject_n: 0,
69            tag_msg_n: 0,
70            blob_n: 0,
71        }
72    }
73
74    fn map_token(
75        map: &mut HashMap<String, String>,
76        seeds: &HashMap<String, String>,
77        key: &str,
78        gen: impl FnOnce() -> String,
79    ) -> String {
80        if let Some(v) = seeds.get(key) {
81            return v.clone();
82        }
83        if let Some(v) = map.get(key) {
84            return v.clone();
85        }
86        let v = gen();
87        map.insert(key.to_string(), v.clone());
88        v
89    }
90
91    fn path_seed_lookup(comp: &str, seeds: &HashMap<String, String>) -> Option<String> {
92        if let Some(v) = seeds.get(comp) {
93            return Some(v.clone());
94        }
95        if let Some(dot) = comp.find('.') {
96            let stem = &comp[..dot];
97            if let Some(v) = seeds.get(stem) {
98                let ext = &comp[dot..];
99                return Some(format!("{v}{ext}"));
100            }
101        }
102        None
103    }
104
105    fn anonymize_path_component(&mut self, comp: &str) -> String {
106        if let Some(mapped) = Self::path_seed_lookup(comp, self.seeds) {
107            return Self::map_token(&mut self.paths, &HashMap::new(), comp, || mapped);
108        }
109        Self::map_token(&mut self.paths, self.seeds, comp, || {
110            let n = self.path_n;
111            self.path_n += 1;
112            format!("path{n}")
113        })
114    }
115
116    fn anonymize_path(&mut self, path: &str) -> String {
117        if !path.is_empty() && self.seeds.contains_key(path) {
118            return self.seeds[path].clone();
119        }
120        let mut out = String::new();
121        for (i, part) in path.split('/').enumerate() {
122            if i > 0 {
123                out.push('/');
124            }
125            out.push_str(&self.anonymize_path_component(part));
126        }
127        out
128    }
129
130    fn anonymize_refname(&mut self, refname: &str) -> String {
131        const PREFIXES: &[&str] = &["refs/heads/", "refs/tags/", "refs/remotes/", "refs/"];
132        let mut rest = refname;
133        let mut prefix = "";
134        for p in PREFIXES {
135            if let Some(stripped) = refname.strip_prefix(p) {
136                prefix = p;
137                rest = stripped;
138                break;
139            }
140        }
141        let mut out = prefix.to_string();
142        if rest.is_empty() {
143            return out;
144        }
145        for (i, comp) in rest.split('/').enumerate() {
146            if i > 0 {
147                out.push('/');
148            }
149            out.push_str(&Self::map_token(&mut self.refs, self.seeds, comp, || {
150                let n = self.ref_n;
151                self.ref_n += 1;
152                format!("ref{n}")
153            }));
154        }
155        out
156    }
157
158    fn anonymize_oid_hex(&mut self, hex: &str) -> String {
159        Self::map_token(&mut self.objs, self.seeds, hex, || {
160            self.oid_n += 1;
161            format!("{:040x}", self.oid_n as u128)
162        })
163    }
164
165    fn anonymize_ident_line(&mut self, line: &str) -> String {
166        // "author NAME <EMAIL> DATE TZ" — preserve header word and date tail.
167        let Some(space) = line.find(' ') else {
168            return line.to_owned();
169        };
170        let header = &line[..space + 1];
171        let rest = line[space + 1..].trim_end();
172        let Some(gt) = rest.rfind('>') else {
173            return format!("{header}Malformed Ident <malformed@example.com> 0 -0000");
174        };
175        let name_email = &rest[..gt + 1];
176        let after = rest[gt + 1..].trim_start();
177        let key = name_email.to_string();
178        let ident = Self::map_token(&mut self.idents, self.seeds, &key, || {
179            let n = self.ident_n;
180            self.ident_n += 1;
181            format!("User {n} <user{n}@example.com>")
182        });
183        format!("{header}{ident} {after}")
184    }
185
186    fn anonymize_commit_message(&mut self) -> String {
187        let n = self.subject_n;
188        self.subject_n += 1;
189        format!("subject {n}\n\nbody\n")
190    }
191
192    fn anonymize_tag_message(&mut self, msg: &str) -> String {
193        Self::map_token(&mut self.tag_msgs, self.seeds, msg, || {
194            let n = self.tag_msg_n;
195            self.tag_msg_n += 1;
196            format!("tag message {n}")
197        })
198    }
199
200    fn anonymize_blob_payload(&mut self) -> Vec<u8> {
201        let n = self.blob_n;
202        self.blob_n += 1;
203        format!("anonymous blob {n}").into_bytes()
204    }
205}
206
207fn parse_anonymize_maps(entries: &[String]) -> Result<HashMap<String, String>> {
208    let mut out = HashMap::new();
209    for raw in entries {
210        let raw = raw.trim();
211        if raw.is_empty() {
212            return Err(Error::InvalidRef(
213                "--anonymize-map token cannot be empty".to_owned(),
214            ));
215        }
216        if let Some((k, v)) = raw.split_once(':') {
217            if k.is_empty() || v.is_empty() {
218                return Err(Error::InvalidRef(
219                    "--anonymize-map token cannot be empty".to_owned(),
220                ));
221            }
222            out.insert(k.to_string(), v.to_string());
223        } else {
224            out.insert(raw.to_string(), raw.to_string());
225        }
226    }
227    Ok(out)
228}
229
230/// Ref tips used to assign each exported commit a `commit <ref>` line (Git `revision_sources`).
231///
232/// Includes `refs/heads/*` and peeled `refs/tags/*` so tagged-only commits (e.g. `git tag E` with no
233/// branch) still get a valid source ref. Without tags, `fast-export --all` can fail with
234/// `no ref source for commit` when the walk reaches a commit reachable only via tags.
235fn revision_source_tips(repo: &Repository) -> Result<Vec<(String, ObjectId)>> {
236    let mut tips = refs::list_refs(&repo.git_dir, "refs/heads/")?;
237    for (name, oid) in refs::list_refs(&repo.git_dir, "refs/tags/")? {
238        let tip = match peel_tag_to_commit_oid(repo, oid) {
239            Ok(c) => c,
240            Err(_) => continue,
241        };
242        tips.push((name, tip));
243    }
244    Ok(tips)
245}
246
247fn ref_source_for_commit(
248    repo: &Repository,
249    oid: ObjectId,
250    head_branches: &[(String, ObjectId)],
251) -> Result<String> {
252    let mut best: Option<(&str, (u8, usize))> = None;
253    for (name, tip) in head_branches {
254        if *tip != oid {
255            continue;
256        }
257        let score = (
258            if name.starts_with("refs/heads/") {
259                0
260            } else {
261                1
262            },
263            name.len(),
264        );
265        if best.is_none_or(|(_, s)| score < s) {
266            best = Some((name.as_str(), score));
267        }
268    }
269    if let Some((n, _)) = best {
270        return Ok(n.to_string());
271    }
272    // Propagate first-seen ref name along parents (matches Git `revision_sources`).
273    let mut source: HashMap<ObjectId, String> = HashMap::new();
274    let mut queue: std::collections::VecDeque<ObjectId> = std::collections::VecDeque::new();
275    for (name, tip) in head_branches {
276        if source.insert(*tip, name.clone()).is_none() {
277            queue.push_back(*tip);
278        }
279    }
280    while let Some(c) = queue.pop_front() {
281        let pname = source.get(&c).cloned().unwrap_or_default();
282        let commit = load_commit(repo, c)?;
283        for p in commit.parents {
284            if source.contains_key(&p) {
285                continue;
286            }
287            source.insert(p, pname.clone());
288            queue.push_back(p);
289        }
290    }
291    source
292        .get(&oid)
293        .cloned()
294        .ok_or_else(|| Error::InvalidRef(format!("no ref source for commit {oid}")))
295}
296
297fn load_commit(repo: &Repository, oid: ObjectId) -> Result<CommitData> {
298    let obj = repo.odb.read(&oid)?;
299    if obj.kind != ObjectKind::Commit {
300        return Err(Error::CorruptObject(format!(
301            "expected commit, got {}",
302            obj.kind.as_str()
303        )));
304    }
305    parse_commit(&obj.data)
306}
307
308fn peel_tag_to_commit_oid(repo: &Repository, mut oid: ObjectId) -> Result<ObjectId> {
309    loop {
310        let obj = repo.odb.read(&oid)?;
311        match obj.kind {
312            ObjectKind::Commit => return Ok(oid),
313            ObjectKind::Tag => {
314                let t = parse_tag(&obj.data)?;
315                oid = t.object;
316            }
317            _ => {
318                return Err(Error::CorruptObject(
319                    "tag does not point to a commit".to_owned(),
320                ));
321            }
322        }
323    }
324}
325
326fn depth_first_diff_sort(entries: &mut [DiffEntry]) {
327    entries.sort_by(|a, b| {
328        let pa = a.path();
329        let pb = b.path();
330        let la = pa.len();
331        let lb = pb.len();
332        let minlen = la.min(lb);
333        let cmp = pa.as_bytes()[..minlen].cmp(&pb.as_bytes()[..minlen]);
334        if cmp != std::cmp::Ordering::Equal {
335            return cmp;
336        }
337        let len_cmp = lb.cmp(&la);
338        if len_cmp != std::cmp::Ordering::Equal {
339            return len_cmp;
340        }
341        let ar = matches!(a.status, DiffStatus::Renamed);
342        let br = matches!(b.status, DiffStatus::Renamed);
343        ar.cmp(&br)
344    });
345}
346
347fn diff_entry_matches_paths(entry: &DiffEntry, paths: &[String]) -> bool {
348    if paths.is_empty() {
349        return true;
350    }
351    matches_pathspec_list(entry.path(), paths)
352        || entry
353            .old_path
354            .as_deref()
355            .is_some_and(|path| matches_pathspec_list(path, paths))
356}
357
358fn export_ref_for_non_all(repo: &Repository) -> Result<String> {
359    refs::read_head(&repo.git_dir)?.ok_or_else(|| {
360        Error::InvalidRef("fast-export: detached HEAD export not implemented".to_owned())
361    })
362}
363
364/// Write a fast-import stream for the repository to `writer`.
365///
366/// # Errors
367///
368/// Propagates object database, ref, and revision walk errors.
369pub fn export_stream(
370    repo: &Repository,
371    mut writer: impl Write,
372    options: &FastExportOptions,
373) -> Result<()> {
374    let seeds = if options.anonymize {
375        parse_anonymize_maps(&options.anonymize_maps)?
376    } else {
377        HashMap::new()
378    };
379
380    if !options.anonymize && !options.anonymize_maps.is_empty() {
381        return Err(Error::InvalidRef(
382            "the option '--anonymize-map' requires '--anonymize'".to_owned(),
383        ));
384    }
385
386    let head_branches = revision_source_tips(repo)?;
387    let non_all_export_ref = if options.all {
388        None
389    } else {
390        Some(export_ref_for_non_all(repo)?)
391    };
392
393    let opts = RevListOptions {
394        all_refs: options.all,
395        ordering: OrderingMode::Topo,
396        reverse: true,
397        paths: options.paths.clone(),
398        ..RevListOptions::default()
399    };
400    let positive_specs = if options.all {
401        &[] as &[String]
402    } else {
403        options.revisions.as_slice()
404    };
405    let rev_result = rev_list(repo, positive_specs, &[] as &[String], &opts)?;
406    let commits: Vec<ObjectId> = rev_result.commits;
407
408    let commit_set: HashSet<ObjectId> = commits.iter().copied().collect();
409
410    let mut marks: HashMap<ObjectId, u32> = HashMap::new();
411    let mut next_mark: u32 = 0;
412
413    let mut anon = if options.anonymize {
414        Some(AnonState::new(&seeds))
415    } else {
416        None
417    };
418
419    if options.use_done_feature {
420        writeln!(writer, "feature done")?;
421    }
422
423    for oid in &commits {
424        let raw_commit = load_commit(repo, *oid)?;
425        let parent_tree = if let Some(p) = raw_commit.parents.first() {
426            let pc = load_commit(repo, *p)?;
427            Some(pc.tree)
428        } else {
429            None
430        };
431        let diffs = diff_trees(&repo.odb, parent_tree.as_ref(), Some(&raw_commit.tree), "")?;
432        let mut diff_vec: Vec<DiffEntry> = diffs
433            .into_iter()
434            .filter(|e| {
435                matches!(
436                    e.status,
437                    DiffStatus::Added
438                        | DiffStatus::Deleted
439                        | DiffStatus::Modified
440                        | DiffStatus::Renamed
441                        | DiffStatus::Copied
442                        | DiffStatus::TypeChanged
443                ) && diff_entry_matches_paths(e, &options.paths)
444            })
445            .collect();
446        depth_first_diff_sort(&mut diff_vec);
447
448        if !options.no_data {
449            for e in &diff_vec {
450                if e.status == DiffStatus::Deleted {
451                    continue;
452                }
453                let mode = u32::from_str_radix(e.new_mode.trim(), 8).unwrap_or(0);
454                if mode == MODE_TREE || mode == MODE_GITLINK {
455                    continue;
456                }
457                let blob_oid = e.new_oid;
458                if marks.contains_key(&blob_oid) {
459                    continue;
460                }
461                next_mark += 1;
462                marks.insert(blob_oid, next_mark);
463                writeln!(writer, "blob")?;
464                writeln!(writer, "mark :{next_mark}")?;
465                let payload = if let Some(a) = anon.as_mut() {
466                    a.anonymize_blob_payload()
467                } else {
468                    let o = repo.odb.read(&blob_oid)?;
469                    if o.kind != ObjectKind::Blob {
470                        return Err(Error::CorruptObject("expected blob".to_owned()));
471                    }
472                    o.data
473                };
474                writeln!(writer, "data {}", payload.len())?;
475                writer.write_all(&payload)?;
476                writeln!(writer)?;
477            }
478        }
479
480        let refname = if let Some(export_ref) = non_all_export_ref.as_deref() {
481            export_ref.to_owned()
482        } else {
483            ref_source_for_commit(repo, *oid, &head_branches)?
484        };
485        let export_ref = if let Some(a) = anon.as_mut() {
486            a.anonymize_refname(&refname)
487        } else {
488            refname.clone()
489        };
490
491        if raw_commit.parents.is_empty() {
492            writeln!(writer, "reset {export_ref}")?;
493        }
494
495        next_mark += 1;
496        let commit_mark = next_mark;
497        marks.insert(*oid, commit_mark);
498
499        writeln!(writer, "commit {export_ref}")?;
500        writeln!(writer, "mark :{commit_mark}")?;
501
502        let author_line = if let Some(a) = anon.as_mut() {
503            a.anonymize_ident_line(&format!("author {}", raw_commit.author))
504        } else {
505            format!("author {}", raw_commit.author)
506        };
507        let committer_line = if let Some(a) = anon.as_mut() {
508            a.anonymize_ident_line(&format!("committer {}", raw_commit.committer))
509        } else {
510            format!("committer {}", raw_commit.committer)
511        };
512        writeln!(writer, "{author_line}")?;
513        writeln!(writer, "{committer_line}")?;
514
515        let message = if let Some(a) = anon.as_mut() {
516            a.anonymize_commit_message()
517        } else {
518            raw_commit.message.clone()
519        };
520        let msg_bytes = message.as_bytes();
521        writeln!(writer, "data {}", msg_bytes.len())?;
522        writer.write_all(msg_bytes)?;
523        writeln!(writer)?;
524
525        let exported_parents = raw_commit
526            .parents
527            .iter()
528            .filter_map(|p| marks.get(p).copied())
529            .collect::<Vec<_>>();
530        for (i, m) in exported_parents.iter().enumerate() {
531            let label = if i == 0 { "from" } else { "merge" };
532            write!(writer, "{label} ")?;
533            writeln!(writer, ":{m}")?;
534        }
535        if !options.paths.is_empty() && exported_parents.is_empty() {
536            writeln!(writer, "deleteall")?;
537        }
538
539        let mut changed: HashSet<String> = HashSet::new();
540        for e in &diff_vec {
541            match e.status {
542                DiffStatus::Deleted => {
543                    let path = if let Some(a) = anon.as_mut() {
544                        a.anonymize_path(e.path())
545                    } else {
546                        e.path().to_string()
547                    };
548                    writeln!(writer, "D {path}")?;
549                    changed.insert(e.path().to_string());
550                }
551                DiffStatus::Renamed | DiffStatus::Copied => {
552                    let old_p = e.old_path.as_deref().unwrap_or("");
553                    let skip_modify = e.old_oid == e.new_oid
554                        && e.old_mode == e.new_mode
555                        && !changed.contains(old_p);
556                    if !changed.contains(old_p) {
557                        let op = if let Some(a) = anon.as_mut() {
558                            a.anonymize_path(old_p)
559                        } else {
560                            old_p.to_string()
561                        };
562                        let np = if let Some(a) = anon.as_mut() {
563                            a.anonymize_path(e.path())
564                        } else {
565                            e.path().to_string()
566                        };
567                        writeln!(writer, "{} {op} {np}", e.status.letter())?;
568                    }
569                    if !skip_modify {
570                        fallthrough_modify(
571                            repo,
572                            &mut writer,
573                            e,
574                            &marks,
575                            anon.as_mut(),
576                            options.anonymize,
577                            options.no_data,
578                        )?;
579                    }
580                    changed.insert(old_p.to_string());
581                    changed.insert(e.path().to_string());
582                }
583                DiffStatus::Added | DiffStatus::Modified | DiffStatus::TypeChanged => {
584                    fallthrough_modify(
585                        repo,
586                        &mut writer,
587                        e,
588                        &marks,
589                        anon.as_mut(),
590                        options.anonymize,
591                        options.no_data,
592                    )?;
593                    changed.insert(e.path().to_string());
594                }
595                _ => {}
596            }
597        }
598        writeln!(writer)?;
599    }
600
601    // Annotated tags that point at exported commits
602    let tag_refs = refs::list_refs(&repo.git_dir, "refs/tags/")?;
603    for (full_name, tag_oid) in tag_refs {
604        let tag_obj = repo.odb.read(&tag_oid)?;
605        if tag_obj.kind != ObjectKind::Tag {
606            continue;
607        }
608        let tag_data = parse_tag(&tag_obj.data)?;
609        let Ok(target_commit) = peel_tag_to_commit_oid(repo, tag_data.object) else {
610            continue;
611        };
612        if !commit_set.contains(&target_commit) {
613            continue;
614        }
615        let Some(&tip_mark) = marks.get(&target_commit) else {
616            continue;
617        };
618
619        let export_name = if let Some(a) = anon.as_mut() {
620            a.anonymize_refname(&full_name)
621        } else {
622            full_name.clone()
623        };
624        let short_name = export_name
625            .strip_prefix("refs/tags/")
626            .unwrap_or(&export_name)
627            .to_string();
628
629        let tagger_line = if let Some(t) = tag_data.tagger.as_deref() {
630            if let Some(a) = anon.as_mut() {
631                a.anonymize_ident_line(&format!("tagger {t}"))
632            } else {
633                format!("tagger {t}")
634            }
635        } else {
636            String::new()
637        };
638
639        let msg = if options.anonymize {
640            anon.as_mut()
641                .map(|a| a.anonymize_tag_message(&tag_data.message))
642                .unwrap_or_default()
643        } else {
644            tag_data.message.clone()
645        };
646
647        writeln!(writer, "tag {short_name}")?;
648        writeln!(writer, "from :{tip_mark}")?;
649        if !tagger_line.is_empty() {
650            writeln!(writer, "{tagger_line}")?;
651        }
652        let msg_bytes = msg.as_bytes();
653        writeln!(writer, "data {}", msg_bytes.len())?;
654        writer.write_all(msg_bytes)?;
655        writeln!(writer)?;
656    }
657
658    if options.use_done_feature {
659        writeln!(writer, "done")?;
660    }
661
662    Ok(())
663}
664
665fn fallthrough_modify(
666    _repo: &Repository,
667    writer: &mut impl Write,
668    e: &DiffEntry,
669    marks: &HashMap<ObjectId, u32>,
670    mut anon: Option<&mut AnonState>,
671    _anonymize: bool,
672    no_data: bool,
673) -> Result<()> {
674    let mode = u32::from_str_radix(e.new_mode.trim(), 8).unwrap_or(0);
675    let path = if let Some(a) = anon.as_mut() {
676        a.anonymize_path(e.path())
677    } else {
678        e.path().to_string()
679    };
680    if mode == MODE_GITLINK {
681        let hex = e.new_oid.to_hex();
682        let oid_out = if let Some(a) = anon {
683            a.anonymize_oid_hex(&hex)
684        } else {
685            hex
686        };
687        writeln!(writer, "M {:06o} {oid_out} {path}", mode)?;
688        return Ok(());
689    }
690    if no_data {
691        let hex = e.new_oid.to_hex();
692        let oid_out = if let Some(a) = anon.as_mut() {
693            a.anonymize_oid_hex(&hex)
694        } else {
695            hex
696        };
697        writeln!(writer, "M {:06o} {oid_out} {path}", mode)?;
698        return Ok(());
699    }
700    let Some(&bm) = marks.get(&e.new_oid) else {
701        return Err(Error::IndexError(format!(
702            "fast-export: missing mark for blob {}",
703            e.new_oid
704        )));
705    };
706    writeln!(writer, "M {:06o} :{bm} {path}", mode)?;
707    Ok(())
708}