1use crate::error::IngestError;
4use cai_core::{Entry, Metadata, Source};
5use chrono::{DateTime, Utc};
6use git2::{Repository, Time};
7use std::collections::HashMap;
8use std::path::{Path, PathBuf};
9use tracing::debug;
10
11pub struct GitScanner {
13 repo_path: PathBuf,
15}
16
17impl GitScanner {
18 pub fn new<P: AsRef<Path>>(repo_path: P) -> Self {
23 Self {
24 repo_path: repo_path.as_ref().to_path_buf(),
25 }
26 }
27
28 pub fn scan(&self) -> Result<Vec<Entry>, IngestError> {
30 let repo = Repository::open(&self.repo_path).map_err(IngestError::GitError)?;
31
32 let repo_url = get_remote_url(&repo);
34
35 let mut revwalk = repo.revwalk().map_err(IngestError::GitError)?;
36
37 match revwalk.push_head() {
39 Ok(_) => {}
40 Err(_) => {
41 return Err(IngestError::NoFilesFound(
43 self.repo_path.display().to_string(),
44 ));
45 }
46 }
47
48 let mut entries = Vec::new();
49
50 for oid in revwalk {
51 let oid = oid.map_err(IngestError::GitError)?;
52 let commit = repo.find_commit(oid).map_err(IngestError::GitError)?;
53
54 debug!("Scanning commit: {}", commit.id());
55
56 let entry = self.commit_to_entry(&commit, &repo_url)?;
57 entries.push(entry);
58 }
59
60 if entries.is_empty() {
61 return Err(IngestError::NoFilesFound(
62 self.repo_path.display().to_string(),
63 ));
64 }
65
66 Ok(entries)
67 }
68
69 pub fn scan_since(&self, since: DateTime<Utc>) -> Result<Vec<Entry>, IngestError> {
71 let all_entries = self.scan()?;
72 Ok(all_entries
73 .into_iter()
74 .filter(|e| e.timestamp >= since)
75 .collect())
76 }
77
78 fn commit_to_entry(
79 &self,
80 commit: &git2::Commit,
81 repo_url: &Option<String>,
82 ) -> Result<Entry, IngestError> {
83 let id = format!("git-{}", commit.id());
84 let timestamp = git_time_to_datetime(commit.time());
85
86 let prompt = format!(
88 "{}\n\nAuthor: {} <{}>",
89 commit.summary().unwrap_or(""),
90 commit.author().name().unwrap_or(""),
91 commit.author().email().unwrap_or("")
92 );
93
94 let response = commit.message().unwrap_or("").to_string();
96
97 let mut extra = HashMap::new();
98 extra.insert(
99 "author_name".to_string(),
100 commit.author().name().unwrap_or("").to_string(),
101 );
102 extra.insert(
103 "author_email".to_string(),
104 commit.author().email().unwrap_or("").to_string(),
105 );
106 extra.insert(
107 "committer_name".to_string(),
108 commit.committer().name().unwrap_or("").to_string(),
109 );
110 extra.insert(
111 "committer_email".to_string(),
112 commit.committer().email().unwrap_or("").to_string(),
113 );
114
115 if let Ok(parent_id) = commit.parent_id(0) {
117 extra.insert("parent_commit".to_string(), parent_id.to_string());
118 }
119
120 Ok(Entry {
121 id,
122 source: Source::Git,
123 timestamp,
124 prompt,
125 response,
126 metadata: Metadata {
127 file_path: None,
128 repo_url: repo_url.clone(),
129 commit_hash: Some(commit.id().to_string()),
130 language: None,
131 extra,
132 },
133 })
134 }
135}
136
137fn get_remote_url(repo: &Repository) -> Option<String> {
139 repo.find_remote("origin")
140 .ok()
141 .and_then(|r| r.url().map(|u| u.to_string()))
142}
143
144fn git_time_to_datetime(time: Time) -> DateTime<Utc> {
146 DateTime::from_timestamp(time.seconds(), 0).unwrap_or_else(Utc::now)
147}
148
149#[cfg(test)]
150mod tests {
151 use super::*;
152 use std::fs;
153 use tempfile::TempDir;
154
155 #[test]
156 fn test_scan_git_repo() {
157 let temp_dir = TempDir::new().unwrap();
158 let repo_path = temp_dir.path();
159
160 let repo = Repository::init(repo_path).unwrap();
162
163 let test_file = repo_path.join("test.txt");
165 fs::write(&test_file, "test content").unwrap();
166
167 let mut index = repo.index().unwrap();
168 index.add_path(Path::new("test.txt")).unwrap();
169 index.write().unwrap();
170
171 let tree_id = index.write_tree().unwrap();
172 let tree = repo.find_tree(tree_id).unwrap();
173
174 let sig = git2::Signature::now("Test User", "test@example.com").unwrap();
175 let oid = repo
176 .commit(
177 Some("HEAD"),
178 &sig,
179 &sig,
180 "Test commit message\n\nThis is a test commit with more details.",
181 &tree,
182 &[],
183 )
184 .unwrap();
185
186 let scanner = GitScanner::new(repo_path);
188 let entries = scanner.scan().unwrap();
189
190 assert_eq!(entries.len(), 1);
191 let entry = &entries[0];
192 assert!(entry.id.starts_with("git-"));
193 assert_eq!(entry.source, Source::Git);
194 assert!(entry.prompt.contains("Test commit message"));
195 assert!(entry.response.contains("more details"));
196 assert_eq!(entry.metadata.commit_hash, Some(oid.to_string()));
197 }
198
199 #[test]
200 fn test_scan_empty_repo() {
201 let temp_dir = TempDir::new().unwrap();
202 let repo_path = temp_dir.path();
203
204 Repository::init(repo_path).unwrap();
206
207 let scanner = GitScanner::new(repo_path);
208 let result = scanner.scan();
209
210 assert!(result.is_err());
211 assert!(matches!(result.unwrap_err(), IngestError::NoFilesFound(_)));
212 }
213}