Skip to main content

panproto_git/
import.rs

1//! Import git repositories into panproto-vcs.
2//!
3//! Walks the git commit DAG topologically, parses each commit's file tree
4//! into a panproto project schema, and creates panproto-vcs commits that
5//! preserve authorship, timestamps, and parent structure.
6
7use std::collections::HashMap;
8use std::hash::BuildHasher;
9use std::path::PathBuf;
10
11use panproto_project::ProjectBuilder;
12use panproto_vcs::{CommitObject, Object, ObjectId, Store};
13use rustc_hash::FxHashMap;
14
15use crate::error::GitBridgeError;
16
17/// Result of importing a git repository.
18#[derive(Debug)]
19pub struct ImportResult {
20    /// Number of commits imported.
21    pub commit_count: usize,
22    /// The panproto-vcs object ID of the HEAD commit after import.
23    pub head_id: ObjectId,
24    /// Mapping from git commit OIDs to panproto-vcs object IDs.
25    pub oid_map: Vec<(git2::Oid, ObjectId)>,
26}
27
28/// Import a range of git commits into a panproto-vcs store.
29///
30/// Walks the git commit DAG starting from `revspec` (e.g. "HEAD", "main",
31/// "HEAD~10..HEAD") in topological order. For each commit:
32///
33/// 1. Reads all files from the git tree
34/// 2. Parses them into a project schema via `panproto-project`
35/// 3. Stores the schema as a panproto-vcs object
36/// 4. Creates a panproto-vcs commit preserving author, timestamp, message, parents
37///
38/// This is a convenience wrapper around [`import_git_repo_incremental`] with
39/// an empty `known` map, which re-imports the entire history reachable from
40/// `revspec`. For repeated imports against a persistent store, prefer
41/// [`import_git_repo_incremental`] to avoid walking already-imported ancestors.
42///
43/// # Errors
44///
45/// Returns [`GitBridgeError`] if git operations, parsing, or VCS operations fail.
46pub fn import_git_repo<S: Store>(
47    git_repo: &git2::Repository,
48    panproto_store: &mut S,
49    revspec: &str,
50) -> Result<ImportResult, GitBridgeError> {
51    import_git_repo_incremental(git_repo, panproto_store, revspec, &FxHashMap::default())
52}
53
54/// Incrementally import a range of git commits into a panproto-vcs store.
55///
56/// Like [`import_git_repo`], but skips commits whose git OID appears in
57/// `known`. The `known` map provides the panproto-vcs [`ObjectId`] that
58/// each already-imported git commit was translated to, so that children
59/// of skipped commits can be wired up to the correct panproto parent.
60///
61/// Skipping is performed via `git2`'s revwalk `hide`, so the walker never
62/// visits ancestors of known commits either. This makes repeated imports
63/// against a persistent store run in time proportional to the *new*
64/// commits, not the full history.
65///
66/// # Edge cases
67///
68/// - If `revspec` itself resolves to a commit in `known`, no commits are
69///   imported and [`ImportResult::head_id`] is set from the `known` map.
70/// - If a new commit has a parent that is neither in `known` nor walked
71///   (i.e. the `known` map is inconsistent with the actual DAG), that
72///   parent is dropped from the panproto commit's parents, matching the
73///   behavior of the non-incremental path.
74///
75/// # Errors
76///
77/// Returns [`GitBridgeError`] if git operations, parsing, or VCS operations fail.
78pub fn import_git_repo_incremental<S: Store, H: BuildHasher>(
79    git_repo: &git2::Repository,
80    panproto_store: &mut S,
81    revspec: &str,
82    known: &HashMap<git2::Oid, ObjectId, H>,
83) -> Result<ImportResult, GitBridgeError> {
84    // Resolve the revspec to a commit.
85    let obj = git_repo.revparse_single(revspec)?;
86    let head_commit = obj
87        .peel_to_commit()
88        .map_err(|e| GitBridgeError::ObjectRead {
89            oid: obj.id().to_string(),
90            reason: format!("not a commit: {e}"),
91        })?;
92    let head_git_oid = head_commit.id();
93
94    // Collect new commits in topological order (parents before children),
95    // skipping any commit reachable from a `known` entry.
96    let mut commits = Vec::new();
97    collect_new_ancestors(git_repo, head_git_oid, known, &mut commits)?;
98
99    // Seed the git→panproto map with already-known entries so that new
100    // commits can resolve parents that live on the "known" side of the cut.
101    let mut git_to_panproto: FxHashMap<git2::Oid, ObjectId> =
102        known.iter().map(|(&k, &v)| (k, v)).collect();
103    let mut oid_map: Vec<(git2::Oid, ObjectId)> = Vec::new();
104    let mut last_id = ObjectId::ZERO;
105
106    for git_oid in &commits {
107        let git_commit = git_repo.find_commit(*git_oid)?;
108        let tree = git_commit.tree()?;
109
110        // Parse all files in the tree into a project schema.
111        let mut project_builder = ProjectBuilder::new();
112        walk_git_tree(git_repo, &tree, &PathBuf::new(), &mut project_builder)?;
113
114        // Build the project schema.
115        let project = if project_builder.file_count() == 0 {
116            // Empty tree (initial commit with no files). Create a minimal schema.
117            let proto = panproto_protocols::raw_file::protocol();
118            let builder = panproto_schema::SchemaBuilder::new(&proto);
119
120            builder
121                .vertex("root", "file", None)
122                .map_err(|e| {
123                    GitBridgeError::Project(panproto_project::ProjectError::CoproductFailed {
124                        reason: format!("empty tree schema: {e}"),
125                    })
126                })?
127                .build()
128                .map_err(|e| {
129                    GitBridgeError::Project(panproto_project::ProjectError::CoproductFailed {
130                        reason: format!("empty tree build: {e}"),
131                    })
132                })?
133        } else {
134            project_builder.build()?.schema
135        };
136
137        // Store the schema.
138        let schema_id = panproto_store.put(&Object::Schema(Box::new(project)))?;
139
140        // Map parent git OIDs to panproto-vcs parent IDs.
141        let parents: Vec<ObjectId> = git_commit
142            .parent_ids()
143            .filter_map(|parent_oid| git_to_panproto.get(&parent_oid).copied())
144            .collect();
145
146        // Extract author info.
147        let author_sig = git_commit.author();
148        let author = author_sig.name().unwrap_or("unknown").to_owned();
149        let timestamp = u64::try_from(author_sig.when().seconds()).unwrap_or(0);
150        let message = git_commit.message().unwrap_or("(no message)").to_owned();
151
152        // Create panproto-vcs commit.
153        let commit = CommitObject::builder(schema_id, "project", &author, &message)
154            .parents(parents)
155            .timestamp(timestamp)
156            .build();
157
158        let commit_id = panproto_store.put(&Object::Commit(commit))?;
159
160        git_to_panproto.insert(*git_oid, commit_id);
161        oid_map.push((*git_oid, commit_id));
162        last_id = commit_id;
163    }
164
165    // Determine the head panproto ID. If no new commits were imported,
166    // the requested head must already live in `known`; fall back to that.
167    if commits.is_empty() {
168        if let Some(&id) = known.get(&head_git_oid) {
169            last_id = id;
170        }
171    }
172
173    // Note: this function does not set any local refs. Naming the result
174    // (e.g. `refs/heads/<branch>`) is the caller's responsibility because
175    // only the caller knows which branch it is importing.
176
177    Ok(ImportResult {
178        commit_count: commits.len(),
179        head_id: last_id,
180        oid_map,
181    })
182}
183
184/// Collect ancestor commits in topological order (parents first), skipping
185/// any commit reachable from an entry in `known`.
186fn collect_new_ancestors<H: BuildHasher>(
187    repo: &git2::Repository,
188    head: git2::Oid,
189    known: &HashMap<git2::Oid, ObjectId, H>,
190    result: &mut Vec<git2::Oid>,
191) -> Result<(), GitBridgeError> {
192    let mut revwalk = repo.revwalk()?;
193    revwalk.push(head)?;
194    revwalk.set_sorting(git2::Sort::TOPOLOGICAL | git2::Sort::REVERSE)?;
195
196    // Hide known commits and all their ancestors from the walk.
197    for git_oid in known.keys() {
198        // A known OID may not correspond to a commit reachable from `head`
199        // (e.g. leftover mapping from a deleted branch). `hide` errors in
200        // that case; ignore so an out-of-date map doesn't break imports.
201        let _ = revwalk.hide(*git_oid);
202    }
203
204    for oid_result in revwalk {
205        result.push(oid_result?);
206    }
207
208    Ok(())
209}
210
211/// Recursively walk a git tree, adding each file to the project builder.
212fn walk_git_tree(
213    repo: &git2::Repository,
214    tree: &git2::Tree<'_>,
215    prefix: &std::path::Path,
216    builder: &mut ProjectBuilder,
217) -> Result<(), GitBridgeError> {
218    for entry in tree {
219        let name = entry.name().unwrap_or("(unnamed)");
220        let path = prefix.join(name);
221
222        match entry.kind() {
223            Some(git2::ObjectType::Blob) => {
224                let blob = repo.find_blob(entry.id())?;
225                let content = blob.content();
226                builder.add_file(&path, content)?;
227            }
228            Some(git2::ObjectType::Tree) => {
229                let subtree = repo.find_tree(entry.id())?;
230                walk_git_tree(repo, &subtree, &path, builder)?;
231            }
232            _ => {
233                // Skip submodules, symbolic links, etc.
234            }
235        }
236    }
237
238    Ok(())
239}