Skip to main content

panproto_git/
import.rs

1//! Import git repositories into panproto-vcs.
2//!
3//! Walks the git commit DAG topologically, parses each commit's file tree
4//! into a panproto project schema, and creates panproto-vcs commits that
5//! preserve authorship, timestamps, and parent structure.
6
7use std::path::PathBuf;
8
9use panproto_project::ProjectBuilder;
10use panproto_vcs::{CommitObject, Object, ObjectId, Store};
11
12use crate::error::GitBridgeError;
13
14/// Result of importing a git repository.
15#[derive(Debug)]
16pub struct ImportResult {
17    /// Number of commits imported.
18    pub commit_count: usize,
19    /// The panproto-vcs object ID of the HEAD commit after import.
20    pub head_id: ObjectId,
21    /// Mapping from git commit OIDs to panproto-vcs object IDs.
22    pub oid_map: Vec<(git2::Oid, ObjectId)>,
23}
24
25/// Import a range of git commits into a panproto-vcs store.
26///
27/// Walks the git commit DAG starting from `revspec` (e.g. "HEAD", "main",
28/// "HEAD~10..HEAD") in topological order. For each commit:
29///
30/// 1. Reads all files from the git tree
31/// 2. Parses them into a project schema via `panproto-project`
32/// 3. Stores the schema as a panproto-vcs object
33/// 4. Creates a panproto-vcs commit preserving author, timestamp, message, parents
34///
35/// # Errors
36///
37/// Returns [`GitBridgeError`] if git operations, parsing, or VCS operations fail.
38pub fn import_git_repo<S: Store>(
39    git_repo: &git2::Repository,
40    panproto_store: &mut S,
41    revspec: &str,
42) -> Result<ImportResult, GitBridgeError> {
43    // Resolve the revspec to a commit.
44    let obj = git_repo.revparse_single(revspec)?;
45    let head_commit = obj
46        .peel_to_commit()
47        .map_err(|e| GitBridgeError::ObjectRead {
48            oid: obj.id().to_string(),
49            reason: format!("not a commit: {e}"),
50        })?;
51
52    // Collect commits in topological order (parents before children).
53    let mut commits = Vec::new();
54    collect_ancestors(git_repo, head_commit.id(), &mut commits)?;
55
56    // Import each commit.
57    let mut oid_map: Vec<(git2::Oid, ObjectId)> = Vec::new();
58    let mut git_to_panproto: rustc_hash::FxHashMap<git2::Oid, ObjectId> =
59        rustc_hash::FxHashMap::default();
60    let mut last_id = ObjectId::ZERO;
61
62    for git_oid in &commits {
63        let git_commit = git_repo.find_commit(*git_oid)?;
64        let tree = git_commit.tree()?;
65
66        // Parse all files in the tree into a project schema.
67        let mut project_builder = ProjectBuilder::new();
68        walk_git_tree(git_repo, &tree, &PathBuf::new(), &mut project_builder)?;
69
70        // Build the project schema.
71        let project = if project_builder.file_count() == 0 {
72            // Empty tree (initial commit with no files). Create a minimal schema.
73            let proto = panproto_protocols::raw_file::protocol();
74            let builder = panproto_schema::SchemaBuilder::new(&proto);
75
76            builder
77                .vertex("root", "file", None)
78                .map_err(|e| {
79                    GitBridgeError::Project(panproto_project::ProjectError::CoproductFailed {
80                        reason: format!("empty tree schema: {e}"),
81                    })
82                })?
83                .build()
84                .map_err(|e| {
85                    GitBridgeError::Project(panproto_project::ProjectError::CoproductFailed {
86                        reason: format!("empty tree build: {e}"),
87                    })
88                })?
89        } else {
90            project_builder.build()?.schema
91        };
92
93        // Store the schema.
94        let schema_id = panproto_store.put(&Object::Schema(Box::new(project)))?;
95
96        // Map parent git OIDs to panproto-vcs parent IDs.
97        let parents: Vec<ObjectId> = git_commit
98            .parent_ids()
99            .filter_map(|parent_oid| git_to_panproto.get(&parent_oid).copied())
100            .collect();
101
102        // Extract author info.
103        let author_sig = git_commit.author();
104        let author = author_sig.name().unwrap_or("unknown").to_owned();
105        let timestamp = u64::try_from(author_sig.when().seconds()).unwrap_or(0);
106        let message = git_commit.message().unwrap_or("(no message)").to_owned();
107
108        // Create panproto-vcs commit.
109        let commit = CommitObject::builder(schema_id, "project", &author, &message)
110            .parents(parents)
111            .timestamp(timestamp)
112            .build();
113
114        let commit_id = panproto_store.put(&Object::Commit(commit))?;
115
116        git_to_panproto.insert(*git_oid, commit_id);
117        oid_map.push((*git_oid, commit_id));
118        last_id = commit_id;
119    }
120
121    // Set HEAD to the last imported commit.
122    if !commits.is_empty() {
123        panproto_store.set_ref("refs/heads/main", last_id)?;
124    }
125
126    Ok(ImportResult {
127        commit_count: commits.len(),
128        head_id: last_id,
129        oid_map,
130    })
131}
132
133/// Collect all ancestor commits in topological order (parents first).
134fn collect_ancestors(
135    repo: &git2::Repository,
136    head: git2::Oid,
137    result: &mut Vec<git2::Oid>,
138) -> Result<(), GitBridgeError> {
139    let mut revwalk = repo.revwalk()?;
140    revwalk.push(head)?;
141    revwalk.set_sorting(git2::Sort::TOPOLOGICAL | git2::Sort::REVERSE)?;
142
143    for oid_result in revwalk {
144        result.push(oid_result?);
145    }
146
147    Ok(())
148}
149
150/// Recursively walk a git tree, adding each file to the project builder.
151fn walk_git_tree(
152    repo: &git2::Repository,
153    tree: &git2::Tree<'_>,
154    prefix: &std::path::Path,
155    builder: &mut ProjectBuilder,
156) -> Result<(), GitBridgeError> {
157    for entry in tree {
158        let name = entry.name().unwrap_or("(unnamed)");
159        let path = prefix.join(name);
160
161        match entry.kind() {
162            Some(git2::ObjectType::Blob) => {
163                let blob = repo.find_blob(entry.id())?;
164                let content = blob.content();
165                builder.add_file(&path, content)?;
166            }
167            Some(git2::ObjectType::Tree) => {
168                let subtree = repo.find_tree(entry.id())?;
169                walk_git_tree(repo, &subtree, &path, builder)?;
170            }
171            _ => {
172                // Skip submodules, symbolic links, etc.
173            }
174        }
175    }
176
177    Ok(())
178}