1use crate::error::{GitError, Result};
7use crate::models::{BranchInfo, ChangeType, CommitInfo, DiffHunk, FileChange, GitInfo, TagInfo};
8use gix::objs::tree::EntryKind;
9use gix::{Repository, discover};
10use imara_diff::{Algorithm, UnifiedDiffBuilder, diff};
11use std::collections::{HashMap, HashSet};
12use std::path::Path;
13
14const MAX_FILE_SIZE: u64 = 10 * 1024 * 1024;
16
17pub fn extract(repo_path: &Path) -> Result<GitInfo> {
38 let repo = discover(repo_path).map_err(|e| GitError::RepositoryNotFound {
39 path: repo_path.to_path_buf(),
40 message: e.to_string(),
41 })?;
42
43 let repo: Repository = repo.into();
44 let branches = get_branches(&repo)?;
45 let tags = get_tags(&repo)?;
46 let commits = get_all_commits(&repo, &branches)?;
47
48 Ok(GitInfo { branches, tags, commits })
49}
50
51pub fn organize_info(git_info: &GitInfo) -> Result<String> {
61 let commit_map: HashMap<&str, &CommitInfo> = git_info
63 .commits
64 .iter()
65 .map(|c| (c.id.as_str(), c))
66 .collect();
67
68 let mut branch_commits: HashMap<String, Vec<&CommitInfo>> = HashMap::new();
70
71 for branch in &git_info.branches {
72 let mut commits_for_branch = Vec::new();
73 let mut visited = HashSet::new();
74 let mut to_visit = Vec::new();
75
76 to_visit.push(branch.commit_id.as_str());
78
79 while let Some(commit_id) = to_visit.pop() {
81 if visited.contains(commit_id) {
82 continue;
83 }
84 visited.insert(commit_id);
85
86 if let Some(commit) = commit_map.get(commit_id) {
87 commits_for_branch.push(*commit);
88
89 for parent_id in &commit.parent_ids {
91 if !visited.contains(parent_id.as_str()) {
92 to_visit.push(parent_id.as_str());
93 }
94 }
95 }
96 }
97
98 commits_for_branch.sort_by(|a, b| b.timestamp.cmp(&a.timestamp));
100
101 branch_commits.insert(branch.name.clone(), commits_for_branch);
102 }
103
104 Ok(serde_json::to_string_pretty(&branch_commits)?)
106}
107
108fn get_branches(repo: &Repository) -> Result<Vec<BranchInfo>> {
110 let mut branches = Vec::new();
111
112 let head = repo
113 .head()
114 .map_err(|e| GitError::ReferencesError(format!("Failed to get HEAD: {}", e)))?;
115 let head_name = head.referent_name().map(|n| n.as_bstr().to_string());
116
117 let refs = repo
118 .references()
119 .map_err(|e| GitError::ReferencesError(e.to_string()))?;
120
121 for reference in refs
122 .all()
123 .map_err(|e| GitError::ReferencesError(e.to_string()))?
124 .flatten()
125 {
126 let ref_name = reference.name().as_bstr().to_string();
127
128 if ref_name.starts_with("refs/heads/") {
130 let branch_name = ref_name.strip_prefix("refs/heads/").unwrap().to_string();
131 let is_head = head_name.as_ref().map_or(false, |h| h == &ref_name);
132 let commit_id = reference.id().to_hex().to_string();
133
134 branches.push(BranchInfo {
135 name: branch_name,
136 is_head,
137 commit_id,
138 is_remote: false,
139 });
140 }
141 else if ref_name.starts_with("refs/remotes/") && !ref_name.ends_with("/HEAD") {
143 let branch_name = ref_name.strip_prefix("refs/remotes/").unwrap().to_string();
144 let commit_id = reference.id().to_hex().to_string();
145
146 branches.push(BranchInfo {
147 name: branch_name,
148 is_head: false,
149 commit_id,
150 is_remote: true,
151 });
152 }
153 }
154
155 Ok(branches)
156}
157
158fn get_tags(repo: &Repository) -> Result<Vec<TagInfo>> {
160 let mut tags = Vec::new();
161
162 let refs = repo
163 .references()
164 .map_err(|e| GitError::ReferencesError(e.to_string()))?;
165
166 for reference in refs
167 .prefixed("refs/tags/")
168 .map_err(|e| GitError::ReferencesError(e.to_string()))?
169 .flatten()
170 {
171 let ref_name = reference.name().as_bstr().to_string();
172 let tag_name = ref_name
173 .strip_prefix("refs/tags/")
174 .unwrap_or(&ref_name)
175 .to_string();
176
177 let target_id = reference.id().to_hex().to_string();
178
179 let (is_annotated, tagger, message) = match repo.find_object(reference.id().detach()) {
181 Ok(obj) => {
182 if let Ok(tag) = obj.try_into_tag() {
183 if let Ok(decoded) = tag.decode() {
184 let tagger_str = decoded.tagger.map(|sig| {
185 format!("{} <{}>", sig.name, sig.email)
186 });
187 let msg = Some(decoded.message.to_string());
188 (true, tagger_str, msg)
189 } else {
190 (true, None, None)
191 }
192 } else {
193 (false, None, None)
194 }
195 }
196 Err(_) => (false, None, None),
197 };
198
199 tags.push(TagInfo {
200 name: tag_name,
201 target_id,
202 is_annotated,
203 tagger,
204 message,
205 });
206 }
207
208 Ok(tags)
209}
210
211fn get_all_commits(repo: &Repository, branches: &[BranchInfo]) -> Result<Vec<CommitInfo>> {
213 let mut seen_commits: HashSet<String> = HashSet::new();
214 let mut commits: Vec<CommitInfo> = Vec::new();
215 let mut to_process: Vec<gix::ObjectId> = Vec::new();
216
217 for branch in branches {
219 if let Ok(oid) = gix::ObjectId::from_hex(branch.commit_id.as_bytes()) {
220 if !seen_commits.contains(&branch.commit_id) {
221 to_process.push(oid);
222 }
223 }
224 }
225
226 while let Some(commit_id) = to_process.pop() {
228 let commit_id_str = commit_id.to_hex().to_string();
229
230 if seen_commits.contains(&commit_id_str) {
231 continue;
232 }
233 seen_commits.insert(commit_id_str.clone());
234
235 let commit_obj = match repo.find_object(commit_id) {
237 Ok(obj) => obj,
238 Err(_) => continue,
239 };
240
241 let commit = match commit_obj.try_into_commit() {
242 Ok(c) => c,
243 Err(_) => continue,
244 };
245
246 let commit_decoded = match commit.decode() {
247 Ok(d) => d,
248 Err(_) => continue,
249 };
250
251 let parent_ids: Vec<String> = commit
253 .parent_ids()
254 .map(|id| id.to_hex().to_string())
255 .collect();
256
257 for parent_id in commit.parent_ids() {
259 let parent_id_str = parent_id.to_hex().to_string();
260 if !seen_commits.contains(&parent_id_str) {
261 to_process.push(parent_id.detach());
262 }
263 }
264
265 let file_changes = get_commit_diff(repo, &commit, commit.parent_ids().next())?;
267
268 let tree_id = match commit.tree() {
271 Ok(tree) => tree.id().to_hex().to_string(),
272 Err(_) => String::new(),
273 };
274
275 let message = commit_decoded.message();
276 let author = commit_decoded.author();
277
278 commits.push(CommitInfo {
279 id: commit_id_str,
280 tree_id,
281 message: message.title.to_string(),
282 author: format!("{} <{}>", author.name, author.email),
283 timestamp: author.seconds() as i64,
284 parent_ids,
285 file_changes,
286 });
287 }
288
289 commits.sort_by(|a, b| b.timestamp.cmp(&a.timestamp));
291
292 Ok(commits)
293}
294
295fn get_commit_diff(
297 repo: &Repository,
298 commit: &gix::Commit,
299 parent_id: Option<gix::Id>,
300) -> Result<Vec<FileChange>> {
301 let mut file_changes = Vec::new();
302
303 let commit_tree = commit
305 .tree()
306 .map_err(|e| GitError::TreeReadError(e.to_string()))?;
307
308 let parent_tree = if let Some(pid) = parent_id {
310 let parent_obj = repo
311 .find_object(pid.detach())
312 .map_err(|e| GitError::TreeReadError(e.to_string()))?;
313 let parent_commit = parent_obj
314 .try_into_commit()
315 .map_err(|e| GitError::TreeReadError(e.to_string()))?;
316 Some(
317 parent_commit
318 .tree()
319 .map_err(|e| GitError::TreeReadError(e.to_string()))?,
320 )
321 } else {
322 None
323 };
324
325 let mut old_entries: HashMap<String, (gix::ObjectId, EntryKind)> = HashMap::new();
327 let mut new_entries: HashMap<String, (gix::ObjectId, EntryKind)> = HashMap::new();
328
329 if let Some(ref tree) = parent_tree {
331 collect_tree_entries(repo, tree, String::new(), &mut old_entries)?;
332 }
333
334 collect_tree_entries(repo, &commit_tree, String::new(), &mut new_entries)?;
336
337 for (path, (new_oid, new_kind)) in &new_entries {
339 if *new_kind != EntryKind::Blob {
340 continue; }
342
343 if let Some((old_oid, _)) = old_entries.get(path) {
344 if old_oid != new_oid {
346 let hunks = get_blob_diff(repo, Some(*old_oid), *new_oid)?;
348 file_changes.push(FileChange {
349 path: path.clone(),
350 old_path: None,
351 change_type: ChangeType::Modified,
352 old_blob_sha: Some(old_oid.to_hex().to_string()),
353 new_blob_sha: Some(new_oid.to_hex().to_string()),
354 hunks,
355 });
356 }
357 } else {
359 let hunks = get_blob_diff(repo, None, *new_oid)?;
361 file_changes.push(FileChange {
362 path: path.clone(),
363 old_path: None,
364 change_type: ChangeType::Added,
365 old_blob_sha: None,
366 new_blob_sha: Some(new_oid.to_hex().to_string()),
367 hunks,
368 });
369 }
370 }
371
372 for (path, (old_oid, old_kind)) in &old_entries {
374 if *old_kind != EntryKind::Blob {
375 continue;
376 }
377
378 if !new_entries.contains_key(path) {
379 let hunks = get_blob_diff(
380 repo,
381 Some(*old_oid),
382 gix::ObjectId::empty_blob(gix::hash::Kind::Sha1),
383 )?;
384 file_changes.push(FileChange {
385 path: path.clone(),
386 old_path: None,
387 change_type: ChangeType::Deleted,
388 old_blob_sha: Some(old_oid.to_hex().to_string()),
389 new_blob_sha: None,
390 hunks,
391 });
392 }
393 }
394
395 Ok(file_changes)
396}
397
398fn collect_tree_entries(
400 repo: &Repository,
401 tree: &gix::Tree,
402 prefix: String,
403 entries: &mut HashMap<String, (gix::ObjectId, EntryKind)>,
404) -> Result<()> {
405 for entry in tree.iter() {
406 let entry = entry.map_err(|e| GitError::TreeReadError(e.to_string()))?;
407 let name = entry.filename().to_string();
408 let path = if prefix.is_empty() {
409 name.clone()
410 } else {
411 format!("{}/{}", prefix, name)
412 };
413
414 let mode = entry.mode();
415 let kind = mode.kind();
416 let oid = entry.oid();
417
418 match kind {
419 EntryKind::Blob | EntryKind::BlobExecutable => {
420 entries.insert(path, (oid.into(), kind));
421 }
422 EntryKind::Tree => {
423 if let Ok(subtree_obj) = repo.find_object(oid) {
425 if let Ok(subtree) = subtree_obj.try_into_tree() {
426 collect_tree_entries(repo, &subtree, path, entries)?;
427 }
428 }
429 }
430 _ => {
431 }
433 }
434 }
435 Ok(())
436}
437
438fn get_blob_diff(
440 repo: &Repository,
441 old_oid: Option<gix::ObjectId>,
442 new_oid: gix::ObjectId,
443) -> Result<Vec<DiffHunk>> {
444 let old_content = if let Some(oid) = old_oid {
446 if oid == gix::ObjectId::empty_blob(gix::hash::Kind::Sha1) {
447 String::new()
448 } else {
449 match repo.find_object(oid) {
450 Ok(obj) => {
451 let data = obj.data.clone();
452 if data.len() as u64 > MAX_FILE_SIZE {
454 return Ok(vec![]);
455 }
456 if is_binary(&data) {
458 return Ok(vec![]);
459 }
460 String::from_utf8_lossy(&data).to_string()
461 }
462 Err(_) => String::new(),
463 }
464 }
465 } else {
466 String::new()
467 };
468
469 let new_content = if new_oid == gix::ObjectId::empty_blob(gix::hash::Kind::Sha1) {
471 String::new()
472 } else {
473 match repo.find_object(new_oid) {
474 Ok(obj) => {
475 let data = obj.data.clone();
476 if data.len() as u64 > MAX_FILE_SIZE {
478 return Ok(vec![]);
479 }
480 if is_binary(&data) {
482 return Ok(vec![]);
483 }
484 String::from_utf8_lossy(&data).to_string()
485 }
486 Err(_) => String::new(),
487 }
488 };
489
490 let input = imara_diff::intern::InternedInput::new(old_content.as_str(), new_content.as_str());
492 let diff_output = diff(
493 Algorithm::Histogram,
494 &input,
495 UnifiedDiffBuilder::new(&input),
496 );
497
498 let hunks = parse_unified_diff(&diff_output);
500
501 Ok(hunks)
502}
503
504fn is_binary(data: &[u8]) -> bool {
506 let check_len = std::cmp::min(data.len(), 8192);
507 data[..check_len].contains(&0)
508}
509
510fn parse_unified_diff(diff_text: &str) -> Vec<DiffHunk> {
512 let mut hunks = Vec::new();
513 let mut current_hunk: Option<DiffHunk> = None;
514 let mut content_lines: Vec<String> = Vec::new();
515
516 for line in diff_text.lines() {
517 if line.starts_with("@@") {
518 if let Some(mut hunk) = current_hunk.take() {
520 hunk.content = content_lines.join("\n");
521 hunks.push(hunk);
522 content_lines.clear();
523 }
524
525 if let Some((old_start, old_lines, new_start, new_lines)) = parse_hunk_header(line) {
527 current_hunk = Some(DiffHunk {
528 old_start,
529 old_lines,
530 new_start,
531 new_lines,
532 content: String::new(),
533 });
534 }
535 } else if current_hunk.is_some() {
536 content_lines.push(line.to_string());
538 }
539 }
540
541 if let Some(mut hunk) = current_hunk.take() {
543 hunk.content = content_lines.join("\n");
544 hunks.push(hunk);
545 }
546
547 hunks
548}
549
550fn parse_hunk_header(line: &str) -> Option<(u32, u32, u32, u32)> {
552 let line = line.trim_start_matches("@@").trim_end_matches("@@").trim();
553 let parts: Vec<&str> = line.split_whitespace().collect();
554
555 if parts.len() < 2 {
556 return None;
557 }
558
559 let old_part = parts[0].trim_start_matches('-');
560 let new_part = parts[1].trim_start_matches('+');
561
562 let (old_start, old_lines) = parse_range(old_part)?;
563 let (new_start, new_lines) = parse_range(new_part)?;
564
565 Some((old_start, old_lines, new_start, new_lines))
566}
567
568fn parse_range(s: &str) -> Option<(u32, u32)> {
570 if let Some((start, lines)) = s.split_once(',') {
571 Some((start.parse().ok()?, lines.parse().ok()?))
572 } else {
573 Some((s.parse().ok()?, 1))
574 }
575}