Skip to main content

cai_ingest/
git.rs

1//! Git repository scanner
2
3use crate::error::IngestError;
4use cai_core::{Entry, Metadata, Source};
5use chrono::{DateTime, Utc};
6use git2::{Repository, Time};
7use std::collections::HashMap;
8use std::path::{Path, PathBuf};
9use tracing::debug;
10
11/// Scanner for Git repository commits
12pub struct GitScanner {
13    /// Repository path
14    repo_path: PathBuf,
15}
16
17impl GitScanner {
18    /// Create a new Git scanner
19    ///
20    /// # Arguments
21    /// * `repo_path` - Path to the Git repository
22    pub fn new<P: AsRef<Path>>(repo_path: P) -> Self {
23        Self {
24            repo_path: repo_path.as_ref().to_path_buf(),
25        }
26    }
27
28    /// Scan repository and convert commits to entries
29    pub fn scan(&self) -> Result<Vec<Entry>, IngestError> {
30        let repo = Repository::open(&self.repo_path).map_err(IngestError::GitError)?;
31
32        // Get repo URL if available
33        let repo_url = get_remote_url(&repo);
34
35        let mut revwalk = repo.revwalk().map_err(IngestError::GitError)?;
36
37        // Try to push HEAD - this will fail on empty repositories
38        match revwalk.push_head() {
39            Ok(_) => {}
40            Err(_) => {
41                // Empty repository or unborn HEAD
42                return Err(IngestError::NoFilesFound(
43                    self.repo_path.display().to_string(),
44                ));
45            }
46        }
47
48        let mut entries = Vec::new();
49
50        for oid in revwalk {
51            let oid = oid.map_err(IngestError::GitError)?;
52            let commit = repo.find_commit(oid).map_err(IngestError::GitError)?;
53
54            debug!("Scanning commit: {}", commit.id());
55
56            let entry = self.commit_to_entry(&commit, &repo_url)?;
57            entries.push(entry);
58        }
59
60        if entries.is_empty() {
61            return Err(IngestError::NoFilesFound(
62                self.repo_path.display().to_string(),
63            ));
64        }
65
66        Ok(entries)
67    }
68
69    /// Scan only commits since a given date
70    pub fn scan_since(&self, since: DateTime<Utc>) -> Result<Vec<Entry>, IngestError> {
71        let all_entries = self.scan()?;
72        Ok(all_entries
73            .into_iter()
74            .filter(|e| e.timestamp >= since)
75            .collect())
76    }
77
78    fn commit_to_entry(
79        &self,
80        commit: &git2::Commit,
81        repo_url: &Option<String>,
82    ) -> Result<Entry, IngestError> {
83        let id = format!("git-{}", commit.id());
84        let timestamp = git_time_to_datetime(commit.time());
85
86        // Build prompt from commit message (first line) and author
87        let prompt = format!(
88            "{}\n\nAuthor: {} <{}>",
89            commit.summary().unwrap_or(""),
90            commit.author().name().unwrap_or(""),
91            commit.author().email().unwrap_or("")
92        );
93
94        // Use full message as response
95        let response = commit.message().unwrap_or("").to_string();
96
97        let mut extra = HashMap::new();
98        extra.insert(
99            "author_name".to_string(),
100            commit.author().name().unwrap_or("").to_string(),
101        );
102        extra.insert(
103            "author_email".to_string(),
104            commit.author().email().unwrap_or("").to_string(),
105        );
106        extra.insert(
107            "committer_name".to_string(),
108            commit.committer().name().unwrap_or("").to_string(),
109        );
110        extra.insert(
111            "committer_email".to_string(),
112            commit.committer().email().unwrap_or("").to_string(),
113        );
114
115        // Add parent commit IDs
116        if let Ok(parent_id) = commit.parent_id(0) {
117            extra.insert("parent_commit".to_string(), parent_id.to_string());
118        }
119
120        Ok(Entry {
121            id,
122            source: Source::Git,
123            timestamp,
124            prompt,
125            response,
126            metadata: Metadata {
127                file_path: None,
128                repo_url: repo_url.clone(),
129                commit_hash: Some(commit.id().to_string()),
130                language: None,
131                extra,
132            },
133        })
134    }
135}
136
137/// Get the remote URL of a repository
138fn get_remote_url(repo: &Repository) -> Option<String> {
139    repo.find_remote("origin")
140        .ok()
141        .and_then(|r| r.url().map(|u| u.to_string()))
142}
143
144/// Convert git2 Time to DateTime<Utc>
145fn git_time_to_datetime(time: Time) -> DateTime<Utc> {
146    DateTime::from_timestamp(time.seconds(), 0).unwrap_or_else(Utc::now)
147}
148
149#[cfg(test)]
150mod tests {
151    use super::*;
152    use std::fs;
153    use tempfile::TempDir;
154
155    #[test]
156    fn test_scan_git_repo() {
157        let temp_dir = TempDir::new().unwrap();
158        let repo_path = temp_dir.path();
159
160        // Initialize a git repo
161        let repo = Repository::init(repo_path).unwrap();
162
163        // Create a test file and commit
164        let test_file = repo_path.join("test.txt");
165        fs::write(&test_file, "test content").unwrap();
166
167        let mut index = repo.index().unwrap();
168        index.add_path(Path::new("test.txt")).unwrap();
169        index.write().unwrap();
170
171        let tree_id = index.write_tree().unwrap();
172        let tree = repo.find_tree(tree_id).unwrap();
173
174        let sig = git2::Signature::now("Test User", "test@example.com").unwrap();
175        let oid = repo
176            .commit(
177                Some("HEAD"),
178                &sig,
179                &sig,
180                "Test commit message\n\nThis is a test commit with more details.",
181                &tree,
182                &[],
183            )
184            .unwrap();
185
186        // Scan the repo
187        let scanner = GitScanner::new(repo_path);
188        let entries = scanner.scan().unwrap();
189
190        assert_eq!(entries.len(), 1);
191        let entry = &entries[0];
192        assert!(entry.id.starts_with("git-"));
193        assert_eq!(entry.source, Source::Git);
194        assert!(entry.prompt.contains("Test commit message"));
195        assert!(entry.response.contains("more details"));
196        assert_eq!(entry.metadata.commit_hash, Some(oid.to_string()));
197    }
198
199    #[test]
200    fn test_scan_empty_repo() {
201        let temp_dir = TempDir::new().unwrap();
202        let repo_path = temp_dir.path();
203
204        // Initialize empty git repo (no commits)
205        Repository::init(repo_path).unwrap();
206
207        let scanner = GitScanner::new(repo_path);
208        let result = scanner.scan();
209
210        assert!(result.is_err());
211        assert!(matches!(result.unwrap_err(), IngestError::NoFilesFound(_)));
212    }
213}