Skip to main content

rustloclib/data/
diff.rs

1//! Git diff analysis for LOC changes between commits and working directory.
2//!
3//! This module provides functionality to compute LOC differences between:
4//! - Two git commits (using `diff_commits`)
5//! - Working directory and HEAD or index (using `diff_workdir`)
6//!
7//! ## Design Principle
8//!
9//! **Filtering (glob patterns, crate names) is done centrally using `FilterConfig`
10//! and `WorkspaceInfo`, not re-implemented here.** This module:
11//!
12//! 1. Gets changed file paths from git
13//! 2. Delegates to `FilterConfig::matches()` for glob filtering
14//! 3. Uses `WorkspaceInfo::crate_for_path()` for crate mapping
15//! 4. Applies crate filter via workspace's existing mechanisms
16
17use std::collections::HashMap;
18use std::path::{Path, PathBuf};
19
20use serde::{Deserialize, Serialize};
21
22use crate::error::RustlocError;
23use crate::query::options::{Aggregation, LineTypes};
24use crate::source::filter::FilterConfig;
25use crate::source::workspace::WorkspaceInfo;
26use crate::Result;
27
28use super::stats::Locs;
29use super::visitor::{gather_stats, VisitorContext};
30
31/// Lines of code diff (added vs removed).
32///
33/// Tracks additions and removals for each of the 6 line types.
34#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
35pub struct LocsDiff {
36    /// Lines added
37    pub added: Locs,
38    /// Lines removed
39    pub removed: Locs,
40}
41
42impl LocsDiff {
43    /// Create a new empty diff.
44    pub fn new() -> Self {
45        Self::default()
46    }
47
48    /// Net change for code lines.
49    pub fn net_code(&self) -> i64 {
50        self.added.code as i64 - self.removed.code as i64
51    }
52
53    /// Net change for test lines.
54    pub fn net_tests(&self) -> i64 {
55        self.added.tests as i64 - self.removed.tests as i64
56    }
57
58    /// Net change for example lines.
59    pub fn net_examples(&self) -> i64 {
60        self.added.examples as i64 - self.removed.examples as i64
61    }
62
63    /// Net change for doc comment lines.
64    pub fn net_docs(&self) -> i64 {
65        self.added.docs as i64 - self.removed.docs as i64
66    }
67
68    /// Net change for regular comment lines.
69    pub fn net_comments(&self) -> i64 {
70        self.added.comments as i64 - self.removed.comments as i64
71    }
72
73    /// Net change for blank lines.
74    pub fn net_blanks(&self) -> i64 {
75        self.added.blanks as i64 - self.removed.blanks as i64
76    }
77
78    /// Net change for total lines.
79    pub fn net_total(&self) -> i64 {
80        self.added.total() as i64 - self.removed.total() as i64
81    }
82
83    /// Return a filtered copy with only the specified line types included.
84    pub fn filter(&self, types: LineTypes) -> Self {
85        Self {
86            added: self.added.filter(types),
87            removed: self.removed.filter(types),
88        }
89    }
90}
91
92impl std::ops::Add for LocsDiff {
93    type Output = Self;
94
95    fn add(self, other: Self) -> Self {
96        Self {
97            added: self.added + other.added,
98            removed: self.removed + other.removed,
99        }
100    }
101}
102
103impl std::ops::AddAssign for LocsDiff {
104    fn add_assign(&mut self, other: Self) {
105        self.added += other.added;
106        self.removed += other.removed;
107    }
108}
109
110/// Diff statistics for a single file.
111#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
112pub struct FileDiffStats {
113    /// Path to the file (relative to repo root).
114    pub path: PathBuf,
115    /// The type of change.
116    pub change_type: FileChangeType,
117    /// LOC diff for this file.
118    pub diff: LocsDiff,
119}
120
121impl FileDiffStats {
122    /// Return a filtered copy with only the specified line types included.
123    pub fn filter(&self, types: LineTypes) -> Self {
124        Self {
125            path: self.path.clone(),
126            change_type: self.change_type,
127            diff: self.diff.filter(types),
128        }
129    }
130}
131
132/// Type of file change in the diff.
133#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
134pub enum FileChangeType {
135    /// File was added.
136    Added,
137    /// File was deleted.
138    Deleted,
139    /// File was modified.
140    Modified,
141}
142
143/// Diff statistics for a crate.
144#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
145pub struct CrateDiffStats {
146    /// Name of the crate.
147    pub name: String,
148    /// Root path of the crate.
149    pub path: PathBuf,
150    /// Aggregated LOC diff.
151    pub diff: LocsDiff,
152    /// Per-file diff (optional, for detailed output).
153    pub files: Vec<FileDiffStats>,
154}
155
156impl CrateDiffStats {
157    /// Create new crate diff stats.
158    pub fn new(name: String, path: PathBuf) -> Self {
159        Self {
160            name,
161            path,
162            diff: LocsDiff::new(),
163            files: Vec::new(),
164        }
165    }
166
167    /// Add file diff to this crate.
168    pub fn add_file(&mut self, file_diff: FileDiffStats) {
169        self.diff += file_diff.diff;
170        self.files.push(file_diff);
171    }
172
173    /// Return a filtered copy with only the specified line types included.
174    pub fn filter(&self, types: LineTypes) -> Self {
175        Self {
176            name: self.name.clone(),
177            path: self.path.clone(),
178            diff: self.diff.filter(types),
179            files: self.files.iter().map(|f| f.filter(types)).collect(),
180        }
181    }
182}
183
184/// Result of a diff operation between two commits.
185#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
186pub struct DiffResult {
187    /// Root path of the repository analyzed.
188    pub root: PathBuf,
189    /// Base commit (from).
190    pub from_commit: String,
191    /// Target commit (to).
192    pub to_commit: String,
193    /// Total diff across all files.
194    pub total: LocsDiff,
195    /// Per-crate diff breakdown.
196    pub crates: Vec<CrateDiffStats>,
197    /// Per-file diff (optional, for detailed output).
198    pub files: Vec<FileDiffStats>,
199}
200
201impl DiffResult {
202    /// Return a filtered copy with only the specified line types included.
203    pub fn filter(&self, types: LineTypes) -> Self {
204        Self {
205            root: self.root.clone(),
206            from_commit: self.from_commit.clone(),
207            to_commit: self.to_commit.clone(),
208            total: self.total.filter(types),
209            crates: self.crates.iter().map(|c| c.filter(types)).collect(),
210            files: self.files.iter().map(|f| f.filter(types)).collect(),
211        }
212    }
213}
214
215/// Options for diff computation.
216#[derive(Debug, Clone)]
217pub struct DiffOptions {
218    /// Crate names to include (empty = all crates).
219    pub crate_filter: Vec<String>,
220    /// File filter configuration.
221    pub file_filter: FilterConfig,
222    /// Aggregation level for results.
223    pub aggregation: Aggregation,
224    /// Which line types to include in results.
225    pub line_types: LineTypes,
226}
227
228impl Default for DiffOptions {
229    fn default() -> Self {
230        Self {
231            crate_filter: Vec::new(),
232            file_filter: FilterConfig::new(),
233            aggregation: Aggregation::Total,
234            line_types: LineTypes::default(),
235        }
236    }
237}
238
239impl DiffOptions {
240    /// Create new default options.
241    pub fn new() -> Self {
242        Self::default()
243    }
244
245    /// Filter to specific crates.
246    pub fn crates(mut self, names: Vec<String>) -> Self {
247        self.crate_filter = names;
248        self
249    }
250
251    /// Set file filter.
252    pub fn filter(mut self, config: FilterConfig) -> Self {
253        self.file_filter = config;
254        self
255    }
256
257    /// Set aggregation level.
258    pub fn aggregation(mut self, level: Aggregation) -> Self {
259        self.aggregation = level;
260        self
261    }
262
263    /// Set which line types to include.
264    pub fn line_types(mut self, types: LineTypes) -> Self {
265        self.line_types = types;
266        self
267    }
268}
269
270/// Mode for working directory diff.
271#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
272pub enum WorkdirDiffMode {
273    /// Compare HEAD with working directory (all uncommitted changes).
274    /// This is equivalent to `git diff HEAD`.
275    #[default]
276    All,
277    /// Compare HEAD with the staging area/index (staged changes only).
278    /// This is equivalent to `git diff --cached` or `git diff --staged`.
279    Staged,
280}
281
282/// Compute LOC diff for working directory changes.
283pub fn diff_workdir(
284    repo_path: impl AsRef<Path>,
285    mode: WorkdirDiffMode,
286    options: DiffOptions,
287) -> Result<DiffResult> {
288    let repo_path = repo_path.as_ref();
289
290    // Open the git repository
291    let repo = gix::discover(repo_path)
292        .map_err(|e| RustlocError::GitError(format!("Failed to discover git repository: {}", e)))?;
293
294    let repo_root = repo
295        .work_dir()
296        .ok_or_else(|| RustlocError::GitError("Repository has no work directory".to_string()))?
297        .to_path_buf();
298
299    // Get HEAD commit and its tree
300    let head_commit = repo
301        .head_commit()
302        .map_err(|e| RustlocError::GitError(format!("Failed to get HEAD commit: {}", e)))?;
303
304    let head_tree = head_commit
305        .tree()
306        .map_err(|e| RustlocError::GitError(format!("Failed to get HEAD tree: {}", e)))?;
307
308    // Get the index
309    let index = repo
310        .index()
311        .map_err(|e| RustlocError::GitError(format!("Failed to read index: {}", e)))?;
312
313    // Collect changes based on mode
314    let changes = match mode {
315        WorkdirDiffMode::Staged => collect_staged_changes(&repo, &head_tree, &index)?,
316        WorkdirDiffMode::All => collect_workdir_changes(&repo, &head_tree, &repo_root)?,
317    };
318
319    // Try to discover workspace info for crate grouping
320    let workspace = WorkspaceInfo::discover(&repo_root).ok();
321
322    // Apply crate filter if specified
323    let filtered_workspace = workspace.as_ref().map(|ws| {
324        if options.crate_filter.is_empty() {
325            ws.clone()
326        } else {
327            let names: Vec<&str> = options.crate_filter.iter().map(|s| s.as_str()).collect();
328            ws.filter_by_names(&names)
329        }
330    });
331
332    // Process changes
333    let mut total = LocsDiff::new();
334    let mut files = Vec::new();
335    let mut crate_stats: HashMap<String, CrateDiffStats> = HashMap::new();
336
337    // Determine what to include based on aggregation level
338    let include_files = matches!(options.aggregation, Aggregation::ByFile);
339    let include_crates = matches!(
340        options.aggregation,
341        Aggregation::ByCrate | Aggregation::ByFile
342    );
343
344    for change in changes {
345        let path = change.path.clone();
346
347        // Apply glob filter
348        if !options.file_filter.matches(&path) {
349            continue;
350        }
351
352        // Determine which crate this file belongs to
353        let crate_info = filtered_workspace
354            .as_ref()
355            .and_then(|ws| ws.crate_for_path(&path));
356
357        // If crate filter is active and file doesn't belong to a filtered crate, skip
358        if !options.crate_filter.is_empty() && crate_info.is_none() {
359            continue;
360        }
361
362        // Compute file diff
363        let file_diff = compute_workdir_file_diff(&change, &path)?;
364
365        // Aggregate into total
366        total += file_diff.diff;
367
368        // Aggregate into crate stats if applicable
369        if include_crates {
370            if let Some(crate_info) = crate_info {
371                let crate_stats_entry =
372                    crate_stats
373                        .entry(crate_info.name.clone())
374                        .or_insert_with(|| {
375                            CrateDiffStats::new(crate_info.name.clone(), crate_info.root.clone())
376                        });
377
378                if include_files {
379                    crate_stats_entry.add_file(file_diff.clone());
380                } else {
381                    crate_stats_entry.diff += file_diff.diff;
382                }
383            }
384        }
385
386        // Collect file stats if requested
387        if include_files {
388            files.push(file_diff);
389        }
390    }
391
392    // Convert crate stats map to vec
393    let crates: Vec<CrateDiffStats> = crate_stats.into_values().collect();
394
395    // Build result and apply line type filter
396    let (from_label, to_label) = match mode {
397        WorkdirDiffMode::All => ("HEAD", "working tree"),
398        WorkdirDiffMode::Staged => ("HEAD", "index"),
399    };
400
401    let result = DiffResult {
402        root: repo_root,
403        from_commit: from_label.to_string(),
404        to_commit: to_label.to_string(),
405        total,
406        crates,
407        files,
408    };
409
410    Ok(result.filter(options.line_types))
411}
412
413/// Internal representation of a working directory file change
414struct WorkdirFileChange {
415    path: PathBuf,
416    change_type: FileChangeType,
417    old_content: Option<String>,
418    new_content: Option<String>,
419}
420
421/// Collect staged changes (HEAD vs index)
422fn collect_staged_changes(
423    repo: &gix::Repository,
424    head_tree: &gix::Tree<'_>,
425    index: &gix::worktree::Index,
426) -> Result<Vec<WorkdirFileChange>> {
427    use std::collections::HashSet;
428
429    let mut changes = Vec::new();
430    let mut seen_paths: HashSet<PathBuf> = HashSet::new();
431
432    // Build a map of HEAD tree entries
433    let mut head_entries: HashMap<PathBuf, gix::ObjectId> = HashMap::new();
434    collect_tree_entries(repo, head_tree, PathBuf::new(), &mut head_entries)?;
435
436    // Check each entry in the index against HEAD
437    for entry in index.entries() {
438        let path = PathBuf::from(gix::path::from_bstr(entry.path(index)));
439
440        // Only process .rs files
441        if path.extension().and_then(|e| e.to_str()) != Some("rs") {
442            continue;
443        }
444
445        seen_paths.insert(path.clone());
446        let index_oid = entry.id;
447
448        if let Some(&head_oid) = head_entries.get(&path) {
449            if head_oid != index_oid {
450                let old_content = read_blob(repo, head_oid)?;
451                let new_content = read_blob(repo, index_oid)?;
452                changes.push(WorkdirFileChange {
453                    path,
454                    change_type: FileChangeType::Modified,
455                    old_content: Some(old_content),
456                    new_content: Some(new_content),
457                });
458            }
459        } else {
460            let new_content = read_blob(repo, index_oid)?;
461            changes.push(WorkdirFileChange {
462                path,
463                change_type: FileChangeType::Added,
464                old_content: None,
465                new_content: Some(new_content),
466            });
467        }
468    }
469
470    // Check for deleted files
471    for (path, head_oid) in head_entries {
472        if path.extension().and_then(|e| e.to_str()) != Some("rs") {
473            continue;
474        }
475        if !seen_paths.contains(&path) {
476            let old_content = read_blob(repo, head_oid)?;
477            changes.push(WorkdirFileChange {
478                path,
479                change_type: FileChangeType::Deleted,
480                old_content: Some(old_content),
481                new_content: None,
482            });
483        }
484    }
485
486    Ok(changes)
487}
488
489/// Collect all uncommitted changes (HEAD vs working directory)
490fn collect_workdir_changes(
491    repo: &gix::Repository,
492    head_tree: &gix::Tree<'_>,
493    repo_root: &Path,
494) -> Result<Vec<WorkdirFileChange>> {
495    use std::collections::HashSet;
496
497    let mut changes = Vec::new();
498    let mut seen_paths: HashSet<PathBuf> = HashSet::new();
499
500    // Build a map of HEAD tree entries
501    let mut head_entries: HashMap<PathBuf, gix::ObjectId> = HashMap::new();
502    collect_tree_entries(repo, head_tree, PathBuf::new(), &mut head_entries)?;
503
504    // Get tracked files from index
505    let index = repo
506        .index()
507        .map_err(|e| RustlocError::GitError(format!("Failed to read index: {}", e)))?;
508    let tracked_paths: HashSet<PathBuf> = index
509        .entries()
510        .iter()
511        .map(|e| PathBuf::from(gix::path::from_bstr(e.path(&index))))
512        .collect();
513
514    // Walk the working directory
515    let walker = walkdir::WalkDir::new(repo_root)
516        .into_iter()
517        .filter_entry(|e| {
518            let name = e.file_name().to_str();
519            name.is_none_or(|s| s != ".git" && s != "target")
520        });
521
522    for entry in walker.filter_map(|e| e.ok()) {
523        if !entry.file_type().is_file() {
524            continue;
525        }
526
527        let abs_path = entry.path();
528        if abs_path.extension().and_then(|e| e.to_str()) != Some("rs") {
529            continue;
530        }
531
532        let rel_path = abs_path
533            .strip_prefix(repo_root)
534            .unwrap_or(abs_path)
535            .to_path_buf();
536
537        // Skip untracked files
538        if !tracked_paths.contains(&rel_path) && !head_entries.contains_key(&rel_path) {
539            continue;
540        }
541
542        seen_paths.insert(rel_path.clone());
543
544        let workdir_content = match std::fs::read_to_string(abs_path) {
545            Ok(content) => content,
546            Err(_) => continue,
547        };
548
549        if let Some(&head_oid) = head_entries.get(&rel_path) {
550            let head_content = read_blob(repo, head_oid)?;
551            if head_content != workdir_content {
552                changes.push(WorkdirFileChange {
553                    path: rel_path,
554                    change_type: FileChangeType::Modified,
555                    old_content: Some(head_content),
556                    new_content: Some(workdir_content),
557                });
558            }
559        } else {
560            changes.push(WorkdirFileChange {
561                path: rel_path,
562                change_type: FileChangeType::Added,
563                old_content: None,
564                new_content: Some(workdir_content),
565            });
566        }
567    }
568
569    // Check for deleted files
570    for (path, head_oid) in head_entries {
571        if path.extension().and_then(|e| e.to_str()) != Some("rs") {
572            continue;
573        }
574        if !seen_paths.contains(&path) {
575            let old_content = read_blob(repo, head_oid)?;
576            changes.push(WorkdirFileChange {
577                path,
578                change_type: FileChangeType::Deleted,
579                old_content: Some(old_content),
580                new_content: None,
581            });
582        }
583    }
584
585    Ok(changes)
586}
587
588/// Recursively collect all blob entries from a tree
589fn collect_tree_entries(
590    repo: &gix::Repository,
591    tree: &gix::Tree<'_>,
592    prefix: PathBuf,
593    entries: &mut HashMap<PathBuf, gix::ObjectId>,
594) -> Result<()> {
595    for entry in tree.iter() {
596        let entry = entry
597            .map_err(|e| RustlocError::GitError(format!("Failed to read tree entry: {}", e)))?;
598
599        let name = gix::path::from_bstr(entry.filename());
600        let path = prefix.join(name);
601
602        if entry.mode().is_blob() {
603            entries.insert(path, entry.oid().to_owned());
604        } else if entry.mode().is_tree() {
605            let subtree = repo
606                .find_object(entry.oid())
607                .map_err(|e| RustlocError::GitError(format!("Failed to find tree: {}", e)))?
608                .try_into_tree()
609                .map_err(|_| RustlocError::GitError("Object is not a tree".to_string()))?;
610            collect_tree_entries(repo, &subtree, path, entries)?;
611        }
612    }
613    Ok(())
614}
615
616/// Compute the LOC diff for a working directory file change
617fn compute_workdir_file_diff(change: &WorkdirFileChange, path: &Path) -> Result<FileDiffStats> {
618    let context = VisitorContext::from_file_path(path);
619
620    let (old_stats, new_stats) = match change.change_type {
621        FileChangeType::Added => {
622            let stats = gather_stats(change.new_content.as_ref().unwrap(), context);
623            (Locs::new(), stats)
624        }
625        FileChangeType::Deleted => {
626            let stats = gather_stats(change.old_content.as_ref().unwrap(), context);
627            (stats, Locs::new())
628        }
629        FileChangeType::Modified => {
630            let old_stats = gather_stats(change.old_content.as_ref().unwrap(), context);
631            let new_stats = gather_stats(change.new_content.as_ref().unwrap(), context);
632            (old_stats, new_stats)
633        }
634    };
635
636    let diff = compute_locs_diff(&old_stats, &new_stats);
637
638    Ok(FileDiffStats {
639        path: path.to_path_buf(),
640        change_type: change.change_type,
641        diff,
642    })
643}
644
645/// Compute LOC diff between two git commits.
646pub fn diff_commits(
647    repo_path: impl AsRef<Path>,
648    from: &str,
649    to: &str,
650    options: DiffOptions,
651) -> Result<DiffResult> {
652    let repo_path = repo_path.as_ref();
653
654    // Open the git repository
655    let repo = gix::discover(repo_path)
656        .map_err(|e| RustlocError::GitError(format!("Failed to discover git repository: {}", e)))?;
657
658    let repo_root = repo
659        .work_dir()
660        .ok_or_else(|| RustlocError::GitError("Repository has no work directory".to_string()))?
661        .to_path_buf();
662
663    // Resolve commit references
664    let from_commit = resolve_commit(&repo, from)?;
665    let to_commit = resolve_commit(&repo, to)?;
666
667    // Get the trees for both commits
668    let from_tree = from_commit
669        .tree()
670        .map_err(|e| RustlocError::GitError(format!("Failed to get tree for '{}': {}", from, e)))?;
671    let to_tree = to_commit
672        .tree()
673        .map_err(|e| RustlocError::GitError(format!("Failed to get tree for '{}': {}", to, e)))?;
674
675    // Compute the diff between trees
676    let changes = compute_tree_diff(&from_tree, &to_tree)?;
677
678    // Try to discover workspace info
679    let workspace = WorkspaceInfo::discover(&repo_root).ok();
680
681    // Apply crate filter
682    let filtered_workspace = workspace.as_ref().map(|ws| {
683        if options.crate_filter.is_empty() {
684            ws.clone()
685        } else {
686            let names: Vec<&str> = options.crate_filter.iter().map(|s| s.as_str()).collect();
687            ws.filter_by_names(&names)
688        }
689    });
690
691    // Process changes
692    let mut total = LocsDiff::new();
693    let mut files = Vec::new();
694    let mut crate_stats: HashMap<String, CrateDiffStats> = HashMap::new();
695
696    let include_files = matches!(options.aggregation, Aggregation::ByFile);
697    let include_crates = matches!(
698        options.aggregation,
699        Aggregation::ByCrate | Aggregation::ByFile
700    );
701
702    for change in changes {
703        let path = change.path.clone();
704
705        if !options.file_filter.matches(&path) {
706            continue;
707        }
708
709        let crate_info = filtered_workspace
710            .as_ref()
711            .and_then(|ws| ws.crate_for_path(&path));
712
713        if !options.crate_filter.is_empty() && crate_info.is_none() {
714            continue;
715        }
716
717        let file_diff = compute_file_diff(&repo, &change, &path)?;
718
719        total += file_diff.diff;
720
721        if include_crates {
722            if let Some(crate_info) = crate_info {
723                let crate_stats_entry =
724                    crate_stats
725                        .entry(crate_info.name.clone())
726                        .or_insert_with(|| {
727                            CrateDiffStats::new(crate_info.name.clone(), crate_info.root.clone())
728                        });
729
730                if include_files {
731                    crate_stats_entry.add_file(file_diff.clone());
732                } else {
733                    crate_stats_entry.diff += file_diff.diff;
734                }
735            }
736        }
737
738        if include_files {
739            files.push(file_diff);
740        }
741    }
742
743    let crates: Vec<CrateDiffStats> = crate_stats.into_values().collect();
744
745    let result = DiffResult {
746        root: repo_root,
747        from_commit: from.to_string(),
748        to_commit: to.to_string(),
749        total,
750        crates,
751        files,
752    };
753
754    Ok(result.filter(options.line_types))
755}
756
757/// Internal representation of a file change
758struct FileChange {
759    path: PathBuf,
760    change_type: FileChangeType,
761    old_oid: Option<gix::ObjectId>,
762    new_oid: Option<gix::ObjectId>,
763}
764
765/// Resolve a commit reference to a commit object
766fn resolve_commit<'repo>(
767    repo: &'repo gix::Repository,
768    reference: &str,
769) -> Result<gix::Commit<'repo>> {
770    let id = repo
771        .rev_parse_single(reference.as_bytes())
772        .map_err(|e| RustlocError::GitError(format!("Failed to resolve '{}': {}", reference, e)))?
773        .detach();
774
775    repo.find_commit(id).map_err(|e| {
776        RustlocError::GitError(format!("Failed to find commit '{}': {}", reference, e))
777    })
778}
779
780/// Compute the diff between two trees
781fn compute_tree_diff(
782    from_tree: &gix::Tree<'_>,
783    to_tree: &gix::Tree<'_>,
784) -> Result<Vec<FileChange>> {
785    use gix::object::tree::diff::Action;
786
787    let mut changes = Vec::new();
788
789    from_tree
790        .changes()
791        .map_err(|e| RustlocError::GitError(format!("Failed to get tree changes: {}", e)))?
792        .for_each_to_obtain_tree(to_tree, |change| {
793            use gix::object::tree::diff::Change;
794
795            let file_change = match change {
796                Change::Addition {
797                    entry_mode,
798                    id,
799                    location,
800                    ..
801                } => {
802                    if entry_mode.is_blob() {
803                        Some(FileChange {
804                            path: PathBuf::from(gix::path::from_bstr(location)),
805                            change_type: FileChangeType::Added,
806                            old_oid: None,
807                            new_oid: Some(id.detach()),
808                        })
809                    } else {
810                        None
811                    }
812                }
813                Change::Deletion {
814                    entry_mode,
815                    id,
816                    location,
817                    ..
818                } => {
819                    if entry_mode.is_blob() {
820                        Some(FileChange {
821                            path: PathBuf::from(gix::path::from_bstr(location)),
822                            change_type: FileChangeType::Deleted,
823                            old_oid: Some(id.detach()),
824                            new_oid: None,
825                        })
826                    } else {
827                        None
828                    }
829                }
830                Change::Modification {
831                    previous_entry_mode,
832                    entry_mode,
833                    previous_id,
834                    id,
835                    location,
836                    ..
837                } => {
838                    if entry_mode.is_blob() && previous_entry_mode.is_blob() {
839                        Some(FileChange {
840                            path: PathBuf::from(gix::path::from_bstr(location)),
841                            change_type: FileChangeType::Modified,
842                            old_oid: Some(previous_id.detach()),
843                            new_oid: Some(id.detach()),
844                        })
845                    } else {
846                        None
847                    }
848                }
849                Change::Rewrite { .. } => None,
850            };
851
852            if let Some(fc) = file_change {
853                changes.push(fc);
854            }
855            Ok::<_, std::convert::Infallible>(Action::Continue)
856        })
857        .map_err(|e| RustlocError::GitError(format!("Failed to compute tree diff: {}", e)))?;
858
859    Ok(changes)
860}
861
862/// Compute the LOC diff for a single file
863fn compute_file_diff(
864    repo: &gix::Repository,
865    change: &FileChange,
866    path: &Path,
867) -> Result<FileDiffStats> {
868    let context = VisitorContext::from_file_path(path);
869
870    let (old_stats, new_stats) = match change.change_type {
871        FileChangeType::Added => {
872            let content = read_blob(repo, change.new_oid.unwrap())?;
873            let stats = gather_stats(&content, context);
874            (Locs::new(), stats)
875        }
876        FileChangeType::Deleted => {
877            let content = read_blob(repo, change.old_oid.unwrap())?;
878            let stats = gather_stats(&content, context);
879            (stats, Locs::new())
880        }
881        FileChangeType::Modified => {
882            let old_content = read_blob(repo, change.old_oid.unwrap())?;
883            let new_content = read_blob(repo, change.new_oid.unwrap())?;
884            let old_stats = gather_stats(&old_content, context);
885            let new_stats = gather_stats(&new_content, context);
886            (old_stats, new_stats)
887        }
888    };
889
890    let diff = compute_locs_diff(&old_stats, &new_stats);
891
892    Ok(FileDiffStats {
893        path: path.to_path_buf(),
894        change_type: change.change_type,
895        diff,
896    })
897}
898
899/// Compute the diff between two Locs
900fn compute_locs_diff(old: &Locs, new: &Locs) -> LocsDiff {
901    LocsDiff {
902        added: Locs {
903            code: new.code.saturating_sub(old.code),
904            tests: new.tests.saturating_sub(old.tests),
905            examples: new.examples.saturating_sub(old.examples),
906            docs: new.docs.saturating_sub(old.docs),
907            comments: new.comments.saturating_sub(old.comments),
908            blanks: new.blanks.saturating_sub(old.blanks),
909            all: new.all.saturating_sub(old.all),
910        },
911        removed: Locs {
912            code: old.code.saturating_sub(new.code),
913            tests: old.tests.saturating_sub(new.tests),
914            examples: old.examples.saturating_sub(new.examples),
915            docs: old.docs.saturating_sub(new.docs),
916            comments: old.comments.saturating_sub(new.comments),
917            blanks: old.blanks.saturating_sub(new.blanks),
918            all: old.all.saturating_sub(new.all),
919        },
920    }
921}
922
923/// Read a blob's content as a UTF-8 string
924fn read_blob(repo: &gix::Repository, oid: gix::ObjectId) -> Result<String> {
925    let object = repo
926        .find_object(oid)
927        .map_err(|e| RustlocError::GitError(format!("Failed to find object {}: {}", oid, e)))?;
928
929    let blob = object
930        .try_into_blob()
931        .map_err(|_| RustlocError::GitError(format!("Object {} is not a blob", oid)))?;
932
933    String::from_utf8(blob.data.to_vec())
934        .or_else(|e| Ok(String::from_utf8_lossy(&e.into_bytes()).into_owned()))
935}
936
937#[cfg(test)]
938mod tests {
939    use super::*;
940
941    #[test]
942    fn test_locs_diff_default() {
943        let diff = LocsDiff::new();
944        assert_eq!(diff.added.code, 0);
945        assert_eq!(diff.removed.code, 0);
946        assert_eq!(diff.net_code(), 0);
947    }
948
949    #[test]
950    fn test_locs_diff_net() {
951        let diff = LocsDiff {
952            added: Locs {
953                code: 100,
954                tests: 50,
955                examples: 20,
956                docs: 10,
957                comments: 5,
958                blanks: 15,
959                all: 200,
960            },
961            removed: Locs {
962                code: 30,
963                tests: 20,
964                examples: 10,
965                docs: 2,
966                comments: 1,
967                blanks: 5,
968                all: 68,
969            },
970        };
971
972        assert_eq!(diff.net_code(), 70);
973        assert_eq!(diff.net_tests(), 30);
974        assert_eq!(diff.net_examples(), 10);
975        assert_eq!(diff.net_docs(), 8);
976        assert_eq!(diff.net_comments(), 4);
977        assert_eq!(diff.net_blanks(), 10);
978        assert_eq!(diff.net_total(), 132); // 200 - 68
979    }
980
981    #[test]
982    fn test_locs_diff_add() {
983        let a = LocsDiff {
984            added: Locs {
985                code: 10,
986                tests: 5,
987                examples: 2,
988                docs: 1,
989                comments: 1,
990                blanks: 1,
991                all: 20,
992            },
993            removed: Locs {
994                code: 5,
995                tests: 2,
996                examples: 1,
997                docs: 0,
998                comments: 0,
999                blanks: 0,
1000                all: 8,
1001            },
1002        };
1003        let b = LocsDiff {
1004            added: Locs {
1005                code: 20,
1006                tests: 10,
1007                examples: 4,
1008                docs: 2,
1009                comments: 2,
1010                blanks: 2,
1011                all: 40,
1012            },
1013            removed: Locs {
1014                code: 10,
1015                tests: 5,
1016                examples: 2,
1017                docs: 1,
1018                comments: 1,
1019                blanks: 1,
1020                all: 20,
1021            },
1022        };
1023
1024        let sum = a + b;
1025        assert_eq!(sum.added.code, 30);
1026        assert_eq!(sum.removed.code, 15);
1027        assert_eq!(sum.net_code(), 15);
1028    }
1029
1030    #[test]
1031    fn test_diff_options_builder() {
1032        let options = DiffOptions::new()
1033            .crates(vec!["my-crate".to_string()])
1034            .aggregation(Aggregation::ByFile);
1035
1036        assert_eq!(options.crate_filter, vec!["my-crate"]);
1037        assert_eq!(options.aggregation, Aggregation::ByFile);
1038    }
1039
1040    #[test]
1041    fn test_compute_locs_diff_added_file() {
1042        let old = Locs::new();
1043        let new = Locs {
1044            code: 100,
1045            tests: 0,
1046            examples: 0,
1047            docs: 10,
1048            comments: 5,
1049            blanks: 20,
1050            all: 135,
1051        };
1052
1053        let diff = compute_locs_diff(&old, &new);
1054        assert_eq!(diff.added.code, 100);
1055        assert_eq!(diff.removed.code, 0);
1056        assert_eq!(diff.added.docs, 10);
1057        assert_eq!(diff.removed.docs, 0);
1058    }
1059
1060    #[test]
1061    fn test_compute_locs_diff_deleted_file() {
1062        let old = Locs {
1063            code: 0,
1064            tests: 50,
1065            examples: 0,
1066            docs: 5,
1067            comments: 2,
1068            blanks: 10,
1069            all: 67,
1070        };
1071        let new = Locs::new();
1072
1073        let diff = compute_locs_diff(&old, &new);
1074        assert_eq!(diff.added.tests, 0);
1075        assert_eq!(diff.removed.tests, 50);
1076    }
1077
1078    #[test]
1079    fn test_compute_locs_diff_modified_file() {
1080        let old = Locs {
1081            code: 100,
1082            tests: 0,
1083            examples: 0,
1084            docs: 10,
1085            comments: 5,
1086            blanks: 20,
1087            all: 135,
1088        };
1089        let new = Locs {
1090            code: 120,
1091            tests: 0,
1092            examples: 0,
1093            docs: 8,
1094            comments: 5,
1095            blanks: 25,
1096            all: 158,
1097        };
1098
1099        let diff = compute_locs_diff(&old, &new);
1100        assert_eq!(diff.added.code, 20);
1101        assert_eq!(diff.removed.code, 0);
1102        assert_eq!(diff.added.docs, 0);
1103        assert_eq!(diff.removed.docs, 2);
1104    }
1105
1106    #[test]
1107    fn test_workdir_diff_mode_default() {
1108        assert_eq!(WorkdirDiffMode::default(), WorkdirDiffMode::All);
1109    }
1110
1111    #[test]
1112    fn test_diff_commits_same_commit() {
1113        let result = diff_commits(".", "e3b2667", "e3b2667", DiffOptions::new());
1114        assert!(result.is_ok());
1115        let diff = result.unwrap();
1116        assert_eq!(diff.total.net_total(), 0);
1117    }
1118
1119    #[test]
1120    fn test_diff_commits_invalid_commit() {
1121        let result = diff_commits(".", "invalid_commit_hash", "HEAD", DiffOptions::new());
1122        assert!(result.is_err());
1123    }
1124}