Skip to main content

rustloclib/data/
diff.rs

1//! Git diff analysis for LOC changes between commits and working directory.
2//!
3//! This module provides functionality to compute LOC differences between:
4//! - Two git commits (using `diff_commits`)
5//! - Working directory and HEAD or index (using `diff_workdir`)
6//!
7//! ## Design Principle
8//!
9//! **Filtering (glob patterns, crate names) is done centrally using `FilterConfig`
10//! and `WorkspaceInfo`, not re-implemented here.** This module:
11//!
12//! 1. Gets changed file paths from git
13//! 2. Delegates to `FilterConfig::matches()` for glob filtering
14//! 3. Uses `WorkspaceInfo::crate_for_path()` for crate mapping
15//! 4. Applies crate filter via workspace's existing mechanisms
16
17use std::collections::HashMap;
18use std::path::{Path, PathBuf};
19
20use serde::{Deserialize, Serialize};
21
22use crate::error::RustlocError;
23use crate::query::options::{Aggregation, LineTypes};
24use crate::source::filter::FilterConfig;
25use crate::source::workspace::WorkspaceInfo;
26use crate::Result;
27
28use super::stats::Locs;
29use super::visitor::{gather_stats, VisitorContext};
30
31/// Lines of code diff (added vs removed).
32///
33/// Tracks additions and removals for each of the 6 line types.
34#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
35pub struct LocsDiff {
36    /// Lines added
37    pub added: Locs,
38    /// Lines removed
39    pub removed: Locs,
40}
41
42impl LocsDiff {
43    /// Create a new empty diff.
44    pub fn new() -> Self {
45        Self::default()
46    }
47
48    /// Net change for code lines.
49    pub fn net_code(&self) -> i64 {
50        self.added.code as i64 - self.removed.code as i64
51    }
52
53    /// Net change for test lines.
54    pub fn net_tests(&self) -> i64 {
55        self.added.tests as i64 - self.removed.tests as i64
56    }
57
58    /// Net change for example lines.
59    pub fn net_examples(&self) -> i64 {
60        self.added.examples as i64 - self.removed.examples as i64
61    }
62
63    /// Net change for doc comment lines.
64    pub fn net_docs(&self) -> i64 {
65        self.added.docs as i64 - self.removed.docs as i64
66    }
67
68    /// Net change for regular comment lines.
69    pub fn net_comments(&self) -> i64 {
70        self.added.comments as i64 - self.removed.comments as i64
71    }
72
73    /// Net change for blank lines.
74    pub fn net_blanks(&self) -> i64 {
75        self.added.blanks as i64 - self.removed.blanks as i64
76    }
77
78    /// Net change for total lines.
79    pub fn net_total(&self) -> i64 {
80        self.added.total() as i64 - self.removed.total() as i64
81    }
82
83    /// Return a filtered copy with only the specified line types included.
84    pub fn filter(&self, types: LineTypes) -> Self {
85        Self {
86            added: self.added.filter(types),
87            removed: self.removed.filter(types),
88        }
89    }
90}
91
92impl std::ops::Add for LocsDiff {
93    type Output = Self;
94
95    fn add(self, other: Self) -> Self {
96        Self {
97            added: self.added + other.added,
98            removed: self.removed + other.removed,
99        }
100    }
101}
102
103impl std::ops::AddAssign for LocsDiff {
104    fn add_assign(&mut self, other: Self) {
105        self.added += other.added;
106        self.removed += other.removed;
107    }
108}
109
110/// Diff statistics for a single file.
111#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
112pub struct FileDiffStats {
113    /// Path to the file (relative to repo root).
114    pub path: PathBuf,
115    /// The type of change.
116    pub change_type: FileChangeType,
117    /// LOC diff for this file.
118    pub diff: LocsDiff,
119}
120
121impl FileDiffStats {
122    /// Return a filtered copy with only the specified line types included.
123    pub fn filter(&self, types: LineTypes) -> Self {
124        Self {
125            path: self.path.clone(),
126            change_type: self.change_type,
127            diff: self.diff.filter(types),
128        }
129    }
130}
131
132/// Type of file change in the diff.
133#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
134pub enum FileChangeType {
135    /// File was added.
136    Added,
137    /// File was deleted.
138    Deleted,
139    /// File was modified.
140    Modified,
141}
142
143/// Diff statistics for a crate.
144#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
145pub struct CrateDiffStats {
146    /// Name of the crate.
147    pub name: String,
148    /// Root path of the crate.
149    pub path: PathBuf,
150    /// Aggregated LOC diff.
151    pub diff: LocsDiff,
152    /// Per-file diff (optional, for detailed output).
153    pub files: Vec<FileDiffStats>,
154}
155
156impl CrateDiffStats {
157    /// Create new crate diff stats.
158    pub fn new(name: String, path: PathBuf) -> Self {
159        Self {
160            name,
161            path,
162            diff: LocsDiff::new(),
163            files: Vec::new(),
164        }
165    }
166
167    /// Add file diff to this crate.
168    pub fn add_file(&mut self, file_diff: FileDiffStats) {
169        self.diff += file_diff.diff;
170        self.files.push(file_diff);
171    }
172
173    /// Return a filtered copy with only the specified line types included.
174    pub fn filter(&self, types: LineTypes) -> Self {
175        Self {
176            name: self.name.clone(),
177            path: self.path.clone(),
178            diff: self.diff.filter(types),
179            files: self.files.iter().map(|f| f.filter(types)).collect(),
180        }
181    }
182}
183
184/// Result of a diff operation between two commits.
185#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
186pub struct DiffResult {
187    /// Root path of the repository analyzed.
188    pub root: PathBuf,
189    /// Base commit (from).
190    pub from_commit: String,
191    /// Target commit (to).
192    pub to_commit: String,
193    /// Total diff across all files.
194    pub total: LocsDiff,
195    /// Per-crate diff breakdown.
196    pub crates: Vec<CrateDiffStats>,
197    /// Per-file diff (optional, for detailed output).
198    pub files: Vec<FileDiffStats>,
199    /// Lines added in non-Rust files.
200    #[serde(default)]
201    pub non_rust_added: u64,
202    /// Lines removed in non-Rust files.
203    #[serde(default)]
204    pub non_rust_removed: u64,
205}
206
207impl DiffResult {
208    /// Return a filtered copy with only the specified line types included.
209    pub fn filter(&self, types: LineTypes) -> Self {
210        Self {
211            root: self.root.clone(),
212            from_commit: self.from_commit.clone(),
213            to_commit: self.to_commit.clone(),
214            total: self.total.filter(types),
215            crates: self.crates.iter().map(|c| c.filter(types)).collect(),
216            files: self.files.iter().map(|f| f.filter(types)).collect(),
217            non_rust_added: self.non_rust_added,
218            non_rust_removed: self.non_rust_removed,
219        }
220    }
221}
222
223/// Options for diff computation.
224#[derive(Debug, Clone)]
225pub struct DiffOptions {
226    /// Crate names to include (empty = all crates).
227    pub crate_filter: Vec<String>,
228    /// File filter configuration.
229    pub file_filter: FilterConfig,
230    /// Aggregation level for results.
231    pub aggregation: Aggregation,
232    /// Which line types to include in results.
233    pub line_types: LineTypes,
234}
235
236impl Default for DiffOptions {
237    fn default() -> Self {
238        Self {
239            crate_filter: Vec::new(),
240            file_filter: FilterConfig::new(),
241            aggregation: Aggregation::Total,
242            line_types: LineTypes::default(),
243        }
244    }
245}
246
247impl DiffOptions {
248    /// Create new default options.
249    pub fn new() -> Self {
250        Self::default()
251    }
252
253    /// Filter to specific crates.
254    pub fn crates(mut self, names: Vec<String>) -> Self {
255        self.crate_filter = names;
256        self
257    }
258
259    /// Set file filter.
260    pub fn filter(mut self, config: FilterConfig) -> Self {
261        self.file_filter = config;
262        self
263    }
264
265    /// Set aggregation level.
266    pub fn aggregation(mut self, level: Aggregation) -> Self {
267        self.aggregation = level;
268        self
269    }
270
271    /// Set which line types to include.
272    pub fn line_types(mut self, types: LineTypes) -> Self {
273        self.line_types = types;
274        self
275    }
276}
277
278/// Mode for working directory diff.
279#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
280pub enum WorkdirDiffMode {
281    /// Compare HEAD with working directory (all uncommitted changes).
282    /// This is equivalent to `git diff HEAD`.
283    #[default]
284    All,
285    /// Compare HEAD with the staging area/index (staged changes only).
286    /// This is equivalent to `git diff --cached` or `git diff --staged`.
287    Staged,
288}
289
290/// Compute LOC diff for working directory changes.
291pub fn diff_workdir(
292    repo_path: impl AsRef<Path>,
293    mode: WorkdirDiffMode,
294    options: DiffOptions,
295) -> Result<DiffResult> {
296    let repo_path = repo_path.as_ref();
297
298    // Open the git repository
299    let repo = gix::discover(repo_path)
300        .map_err(|e| RustlocError::GitError(format!("Failed to discover git repository: {}", e)))?;
301
302    let repo_root = repo
303        .work_dir()
304        .ok_or_else(|| RustlocError::GitError("Repository has no work directory".to_string()))?
305        .to_path_buf();
306
307    // Get HEAD commit and its tree
308    let head_commit = repo
309        .head_commit()
310        .map_err(|e| RustlocError::GitError(format!("Failed to get HEAD commit: {}", e)))?;
311
312    let head_tree = head_commit
313        .tree()
314        .map_err(|e| RustlocError::GitError(format!("Failed to get HEAD tree: {}", e)))?;
315
316    // Get the index
317    let index = repo
318        .index()
319        .map_err(|e| RustlocError::GitError(format!("Failed to read index: {}", e)))?;
320
321    // Collect changes based on mode
322    let (changes, non_rust_added, non_rust_removed) = match mode {
323        WorkdirDiffMode::Staged => collect_staged_changes(&repo, &head_tree, &index)?,
324        WorkdirDiffMode::All => collect_workdir_changes(&repo, &head_tree, &repo_root)?,
325    };
326
327    // Try to discover workspace info for crate grouping
328    let workspace = WorkspaceInfo::discover(&repo_root).ok();
329
330    // Apply crate filter if specified
331    let filtered_workspace = workspace.as_ref().map(|ws| {
332        if options.crate_filter.is_empty() {
333            ws.clone()
334        } else {
335            let names: Vec<&str> = options.crate_filter.iter().map(|s| s.as_str()).collect();
336            ws.filter_by_names(&names)
337        }
338    });
339
340    // Process changes
341    let mut total = LocsDiff::new();
342    let mut files = Vec::new();
343    let mut crate_stats: HashMap<String, CrateDiffStats> = HashMap::new();
344
345    // Determine what to include based on aggregation level
346    let include_files = matches!(
347        options.aggregation,
348        Aggregation::ByFile | Aggregation::ByModule
349    );
350    let include_crates = matches!(
351        options.aggregation,
352        Aggregation::ByCrate | Aggregation::ByModule | Aggregation::ByFile
353    );
354
355    for change in changes {
356        let path = change.path.clone();
357
358        // Apply glob filter
359        if !options.file_filter.matches(&path) {
360            continue;
361        }
362
363        // Determine which crate this file belongs to
364        let crate_info = filtered_workspace
365            .as_ref()
366            .and_then(|ws| ws.crate_for_path(&path));
367
368        // If crate filter is active and file doesn't belong to a filtered crate, skip
369        if !options.crate_filter.is_empty() && crate_info.is_none() {
370            continue;
371        }
372
373        // Compute file diff
374        let file_diff = compute_workdir_file_diff(&change, &path)?;
375
376        // Aggregate into total
377        total += file_diff.diff;
378
379        // Aggregate into crate stats if applicable
380        if include_crates {
381            if let Some(crate_info) = crate_info {
382                let crate_stats_entry =
383                    crate_stats
384                        .entry(crate_info.name.clone())
385                        .or_insert_with(|| {
386                            CrateDiffStats::new(crate_info.name.clone(), crate_info.root.clone())
387                        });
388
389                if include_files {
390                    crate_stats_entry.add_file(file_diff.clone());
391                } else {
392                    crate_stats_entry.diff += file_diff.diff;
393                }
394            }
395        }
396
397        // Collect file stats if requested
398        if include_files {
399            files.push(file_diff);
400        }
401    }
402
403    // Convert crate stats map to vec
404    let crates: Vec<CrateDiffStats> = crate_stats.into_values().collect();
405
406    // Build result and apply line type filter
407    let (from_label, to_label) = match mode {
408        WorkdirDiffMode::All => ("HEAD", "working tree"),
409        WorkdirDiffMode::Staged => ("HEAD", "index"),
410    };
411
412    let result = DiffResult {
413        root: repo_root,
414        from_commit: from_label.to_string(),
415        to_commit: to_label.to_string(),
416        total,
417        crates,
418        files,
419        non_rust_added,
420        non_rust_removed,
421    };
422
423    Ok(result.filter(options.line_types))
424}
425
426/// Internal representation of a working directory file change
427struct WorkdirFileChange {
428    path: PathBuf,
429    change_type: FileChangeType,
430    old_content: Option<String>,
431    new_content: Option<String>,
432}
433
434/// Collect staged changes (HEAD vs index)
435fn collect_staged_changes(
436    repo: &gix::Repository,
437    head_tree: &gix::Tree<'_>,
438    index: &gix::worktree::Index,
439) -> Result<(Vec<WorkdirFileChange>, u64, u64)> {
440    use std::collections::HashSet;
441
442    let mut changes = Vec::new();
443    let mut seen_paths: HashSet<PathBuf> = HashSet::new();
444    let mut non_rust_added: u64 = 0;
445    let mut non_rust_removed: u64 = 0;
446
447    // Build a map of HEAD tree entries
448    let mut head_entries: HashMap<PathBuf, gix::ObjectId> = HashMap::new();
449    collect_tree_entries(repo, head_tree, PathBuf::new(), &mut head_entries)?;
450
451    // Check each entry in the index against HEAD
452    for entry in index.entries() {
453        let path = PathBuf::from(gix::path::from_bstr(entry.path(index)));
454
455        if path.extension().and_then(|e| e.to_str()) != Some("rs") {
456            // Track non-Rust file line changes
457            let index_oid = entry.id;
458            if let Some(&head_oid) = head_entries.get(&path) {
459                if head_oid != index_oid {
460                    let old_lines = count_lines(&read_blob(repo, head_oid)?);
461                    let new_lines = count_lines(&read_blob(repo, index_oid)?);
462                    non_rust_added += new_lines.saturating_sub(old_lines);
463                    non_rust_removed += old_lines.saturating_sub(new_lines);
464                }
465            } else {
466                non_rust_added += count_lines(&read_blob(repo, index_oid)?);
467            }
468            seen_paths.insert(path);
469            continue;
470        }
471
472        seen_paths.insert(path.clone());
473        let index_oid = entry.id;
474
475        if let Some(&head_oid) = head_entries.get(&path) {
476            if head_oid != index_oid {
477                let old_content = read_blob(repo, head_oid)?;
478                let new_content = read_blob(repo, index_oid)?;
479                changes.push(WorkdirFileChange {
480                    path,
481                    change_type: FileChangeType::Modified,
482                    old_content: Some(old_content),
483                    new_content: Some(new_content),
484                });
485            }
486        } else {
487            let new_content = read_blob(repo, index_oid)?;
488            changes.push(WorkdirFileChange {
489                path,
490                change_type: FileChangeType::Added,
491                old_content: None,
492                new_content: Some(new_content),
493            });
494        }
495    }
496
497    // Check for deleted files
498    for (path, head_oid) in head_entries {
499        if path.extension().and_then(|e| e.to_str()) != Some("rs") {
500            if !seen_paths.contains(&path) {
501                non_rust_removed += count_lines(&read_blob(repo, head_oid)?);
502            }
503            continue;
504        }
505        if !seen_paths.contains(&path) {
506            let old_content = read_blob(repo, head_oid)?;
507            changes.push(WorkdirFileChange {
508                path,
509                change_type: FileChangeType::Deleted,
510                old_content: Some(old_content),
511                new_content: None,
512            });
513        }
514    }
515
516    Ok((changes, non_rust_added, non_rust_removed))
517}
518
519/// Collect all uncommitted changes (HEAD vs working directory)
520fn collect_workdir_changes(
521    repo: &gix::Repository,
522    head_tree: &gix::Tree<'_>,
523    repo_root: &Path,
524) -> Result<(Vec<WorkdirFileChange>, u64, u64)> {
525    use std::collections::HashSet;
526
527    let mut changes = Vec::new();
528    let mut seen_paths: HashSet<PathBuf> = HashSet::new();
529    let mut non_rust_added: u64 = 0;
530    let mut non_rust_removed: u64 = 0;
531
532    // Build a map of HEAD tree entries
533    let mut head_entries: HashMap<PathBuf, gix::ObjectId> = HashMap::new();
534    collect_tree_entries(repo, head_tree, PathBuf::new(), &mut head_entries)?;
535
536    // Get tracked files from index
537    let index = repo
538        .index()
539        .map_err(|e| RustlocError::GitError(format!("Failed to read index: {}", e)))?;
540    let tracked_paths: HashSet<PathBuf> = index
541        .entries()
542        .iter()
543        .map(|e| PathBuf::from(gix::path::from_bstr(e.path(&index))))
544        .collect();
545
546    // Walk the working directory
547    let walker = walkdir::WalkDir::new(repo_root)
548        .into_iter()
549        .filter_entry(|e| {
550            let name = e.file_name().to_str();
551            name.is_none_or(|s| s != ".git" && s != "target")
552        });
553
554    for entry in walker.filter_map(|e| e.ok()) {
555        if !entry.file_type().is_file() {
556            continue;
557        }
558
559        let abs_path = entry.path();
560        let rel_path = abs_path
561            .strip_prefix(repo_root)
562            .unwrap_or(abs_path)
563            .to_path_buf();
564
565        // Skip untracked files
566        if !tracked_paths.contains(&rel_path) && !head_entries.contains_key(&rel_path) {
567            continue;
568        }
569
570        if abs_path.extension().and_then(|e| e.to_str()) != Some("rs") {
571            // Track non-Rust file line changes
572            seen_paths.insert(rel_path.clone());
573            let workdir_content = match std::fs::read_to_string(abs_path) {
574                Ok(content) => content,
575                Err(_) => continue,
576            };
577            let new_lines = count_lines(&workdir_content);
578            if let Some(&head_oid) = head_entries.get(&rel_path) {
579                let old_lines = count_lines(&read_blob(repo, head_oid)?);
580                if old_lines != new_lines {
581                    non_rust_added += new_lines.saturating_sub(old_lines);
582                    non_rust_removed += old_lines.saturating_sub(new_lines);
583                }
584            } else {
585                non_rust_added += new_lines;
586            }
587            continue;
588        }
589
590        seen_paths.insert(rel_path.clone());
591
592        let workdir_content = match std::fs::read_to_string(abs_path) {
593            Ok(content) => content,
594            Err(_) => continue,
595        };
596
597        if let Some(&head_oid) = head_entries.get(&rel_path) {
598            let head_content = read_blob(repo, head_oid)?;
599            if head_content != workdir_content {
600                changes.push(WorkdirFileChange {
601                    path: rel_path,
602                    change_type: FileChangeType::Modified,
603                    old_content: Some(head_content),
604                    new_content: Some(workdir_content),
605                });
606            }
607        } else {
608            changes.push(WorkdirFileChange {
609                path: rel_path,
610                change_type: FileChangeType::Added,
611                old_content: None,
612                new_content: Some(workdir_content),
613            });
614        }
615    }
616
617    // Check for deleted files
618    for (path, head_oid) in head_entries {
619        if path.extension().and_then(|e| e.to_str()) != Some("rs") {
620            if !seen_paths.contains(&path) {
621                non_rust_removed += count_lines(&read_blob(repo, head_oid)?);
622            }
623            continue;
624        }
625        if !seen_paths.contains(&path) {
626            let old_content = read_blob(repo, head_oid)?;
627            changes.push(WorkdirFileChange {
628                path,
629                change_type: FileChangeType::Deleted,
630                old_content: Some(old_content),
631                new_content: None,
632            });
633        }
634    }
635
636    Ok((changes, non_rust_added, non_rust_removed))
637}
638
639/// Recursively collect all blob entries from a tree
640fn collect_tree_entries(
641    repo: &gix::Repository,
642    tree: &gix::Tree<'_>,
643    prefix: PathBuf,
644    entries: &mut HashMap<PathBuf, gix::ObjectId>,
645) -> Result<()> {
646    for entry in tree.iter() {
647        let entry = entry
648            .map_err(|e| RustlocError::GitError(format!("Failed to read tree entry: {}", e)))?;
649
650        let name = gix::path::from_bstr(entry.filename());
651        let path = prefix.join(name);
652
653        if entry.mode().is_blob() {
654            entries.insert(path, entry.oid().to_owned());
655        } else if entry.mode().is_tree() {
656            let subtree = repo
657                .find_object(entry.oid())
658                .map_err(|e| RustlocError::GitError(format!("Failed to find tree: {}", e)))?
659                .try_into_tree()
660                .map_err(|_| RustlocError::GitError("Object is not a tree".to_string()))?;
661            collect_tree_entries(repo, &subtree, path, entries)?;
662        }
663    }
664    Ok(())
665}
666
667/// Compute the LOC diff for a working directory file change
668fn compute_workdir_file_diff(change: &WorkdirFileChange, path: &Path) -> Result<FileDiffStats> {
669    let context = VisitorContext::from_file_path(path);
670
671    let (old_stats, new_stats) = match change.change_type {
672        FileChangeType::Added => {
673            let stats = gather_stats(change.new_content.as_ref().unwrap(), context);
674            (Locs::new(), stats)
675        }
676        FileChangeType::Deleted => {
677            let stats = gather_stats(change.old_content.as_ref().unwrap(), context);
678            (stats, Locs::new())
679        }
680        FileChangeType::Modified => {
681            let old_stats = gather_stats(change.old_content.as_ref().unwrap(), context);
682            let new_stats = gather_stats(change.new_content.as_ref().unwrap(), context);
683            (old_stats, new_stats)
684        }
685    };
686
687    let diff = compute_locs_diff(&old_stats, &new_stats);
688
689    Ok(FileDiffStats {
690        path: path.to_path_buf(),
691        change_type: change.change_type,
692        diff,
693    })
694}
695
696/// Compute LOC diff between two git commits.
697pub fn diff_commits(
698    repo_path: impl AsRef<Path>,
699    from: &str,
700    to: &str,
701    options: DiffOptions,
702) -> Result<DiffResult> {
703    let repo_path = repo_path.as_ref();
704
705    // Open the git repository
706    let repo = gix::discover(repo_path)
707        .map_err(|e| RustlocError::GitError(format!("Failed to discover git repository: {}", e)))?;
708
709    let repo_root = repo
710        .work_dir()
711        .ok_or_else(|| RustlocError::GitError("Repository has no work directory".to_string()))?
712        .to_path_buf();
713
714    // Resolve commit references
715    let from_commit = resolve_commit(&repo, from)?;
716    let to_commit = resolve_commit(&repo, to)?;
717
718    // Get the trees for both commits
719    let from_tree = from_commit
720        .tree()
721        .map_err(|e| RustlocError::GitError(format!("Failed to get tree for '{}': {}", from, e)))?;
722    let to_tree = to_commit
723        .tree()
724        .map_err(|e| RustlocError::GitError(format!("Failed to get tree for '{}': {}", to, e)))?;
725
726    // Compute the diff between trees
727    let changes = compute_tree_diff(&from_tree, &to_tree)?;
728
729    // Try to discover workspace info
730    let workspace = WorkspaceInfo::discover(&repo_root).ok();
731
732    // Apply crate filter
733    let filtered_workspace = workspace.as_ref().map(|ws| {
734        if options.crate_filter.is_empty() {
735            ws.clone()
736        } else {
737            let names: Vec<&str> = options.crate_filter.iter().map(|s| s.as_str()).collect();
738            ws.filter_by_names(&names)
739        }
740    });
741
742    // Process changes
743    let mut total = LocsDiff::new();
744    let mut files = Vec::new();
745    let mut crate_stats: HashMap<String, CrateDiffStats> = HashMap::new();
746    let mut non_rust_added: u64 = 0;
747    let mut non_rust_removed: u64 = 0;
748
749    let include_files = matches!(
750        options.aggregation,
751        Aggregation::ByFile | Aggregation::ByModule
752    );
753    let include_crates = matches!(
754        options.aggregation,
755        Aggregation::ByCrate | Aggregation::ByModule | Aggregation::ByFile
756    );
757
758    for change in changes {
759        let path = change.path.clone();
760
761        // Track non-Rust file line changes
762        if path.extension().and_then(|e| e.to_str()) != Some("rs") {
763            let old_lines = change
764                .old_oid
765                .and_then(|oid| read_blob(&repo, oid).ok().map(|c| count_lines(&c)))
766                .unwrap_or(0);
767            let new_lines = change
768                .new_oid
769                .and_then(|oid| read_blob(&repo, oid).ok().map(|c| count_lines(&c)))
770                .unwrap_or(0);
771            non_rust_added += new_lines.saturating_sub(old_lines);
772            non_rust_removed += old_lines.saturating_sub(new_lines);
773            continue;
774        }
775
776        if !options.file_filter.matches(&path) {
777            continue;
778        }
779
780        let crate_info = filtered_workspace
781            .as_ref()
782            .and_then(|ws| ws.crate_for_path(&path));
783
784        if !options.crate_filter.is_empty() && crate_info.is_none() {
785            continue;
786        }
787
788        let file_diff = compute_file_diff(&repo, &change, &path)?;
789
790        total += file_diff.diff;
791
792        if include_crates {
793            if let Some(crate_info) = crate_info {
794                let crate_stats_entry =
795                    crate_stats
796                        .entry(crate_info.name.clone())
797                        .or_insert_with(|| {
798                            CrateDiffStats::new(crate_info.name.clone(), crate_info.root.clone())
799                        });
800
801                if include_files {
802                    crate_stats_entry.add_file(file_diff.clone());
803                } else {
804                    crate_stats_entry.diff += file_diff.diff;
805                }
806            }
807        }
808
809        if include_files {
810            files.push(file_diff);
811        }
812    }
813
814    let crates: Vec<CrateDiffStats> = crate_stats.into_values().collect();
815
816    let result = DiffResult {
817        root: repo_root,
818        from_commit: from.to_string(),
819        to_commit: to.to_string(),
820        total,
821        crates,
822        files,
823        non_rust_added,
824        non_rust_removed,
825    };
826
827    Ok(result.filter(options.line_types))
828}
829
830/// Internal representation of a file change
831struct FileChange {
832    path: PathBuf,
833    change_type: FileChangeType,
834    old_oid: Option<gix::ObjectId>,
835    new_oid: Option<gix::ObjectId>,
836}
837
838/// Resolve a commit reference to a commit object
839fn resolve_commit<'repo>(
840    repo: &'repo gix::Repository,
841    reference: &str,
842) -> Result<gix::Commit<'repo>> {
843    let id = repo
844        .rev_parse_single(reference.as_bytes())
845        .map_err(|e| RustlocError::GitError(format!("Failed to resolve '{}': {}", reference, e)))?
846        .detach();
847
848    repo.find_commit(id).map_err(|e| {
849        RustlocError::GitError(format!("Failed to find commit '{}': {}", reference, e))
850    })
851}
852
853/// Compute the diff between two trees
854fn compute_tree_diff(
855    from_tree: &gix::Tree<'_>,
856    to_tree: &gix::Tree<'_>,
857) -> Result<Vec<FileChange>> {
858    use gix::object::tree::diff::Action;
859
860    let mut changes = Vec::new();
861
862    from_tree
863        .changes()
864        .map_err(|e| RustlocError::GitError(format!("Failed to get tree changes: {}", e)))?
865        .for_each_to_obtain_tree(to_tree, |change| {
866            use gix::object::tree::diff::Change;
867
868            let file_change = match change {
869                Change::Addition {
870                    entry_mode,
871                    id,
872                    location,
873                    ..
874                } => {
875                    if entry_mode.is_blob() {
876                        Some(FileChange {
877                            path: PathBuf::from(gix::path::from_bstr(location)),
878                            change_type: FileChangeType::Added,
879                            old_oid: None,
880                            new_oid: Some(id.detach()),
881                        })
882                    } else {
883                        None
884                    }
885                }
886                Change::Deletion {
887                    entry_mode,
888                    id,
889                    location,
890                    ..
891                } => {
892                    if entry_mode.is_blob() {
893                        Some(FileChange {
894                            path: PathBuf::from(gix::path::from_bstr(location)),
895                            change_type: FileChangeType::Deleted,
896                            old_oid: Some(id.detach()),
897                            new_oid: None,
898                        })
899                    } else {
900                        None
901                    }
902                }
903                Change::Modification {
904                    previous_entry_mode,
905                    entry_mode,
906                    previous_id,
907                    id,
908                    location,
909                    ..
910                } => {
911                    if entry_mode.is_blob() && previous_entry_mode.is_blob() {
912                        Some(FileChange {
913                            path: PathBuf::from(gix::path::from_bstr(location)),
914                            change_type: FileChangeType::Modified,
915                            old_oid: Some(previous_id.detach()),
916                            new_oid: Some(id.detach()),
917                        })
918                    } else {
919                        None
920                    }
921                }
922                Change::Rewrite { .. } => None,
923            };
924
925            if let Some(fc) = file_change {
926                changes.push(fc);
927            }
928            Ok::<_, std::convert::Infallible>(Action::Continue)
929        })
930        .map_err(|e| RustlocError::GitError(format!("Failed to compute tree diff: {}", e)))?;
931
932    Ok(changes)
933}
934
935/// Compute the LOC diff for a single file
936fn compute_file_diff(
937    repo: &gix::Repository,
938    change: &FileChange,
939    path: &Path,
940) -> Result<FileDiffStats> {
941    let context = VisitorContext::from_file_path(path);
942
943    let (old_stats, new_stats) = match change.change_type {
944        FileChangeType::Added => {
945            let content = read_blob(repo, change.new_oid.unwrap())?;
946            let stats = gather_stats(&content, context);
947            (Locs::new(), stats)
948        }
949        FileChangeType::Deleted => {
950            let content = read_blob(repo, change.old_oid.unwrap())?;
951            let stats = gather_stats(&content, context);
952            (stats, Locs::new())
953        }
954        FileChangeType::Modified => {
955            let old_content = read_blob(repo, change.old_oid.unwrap())?;
956            let new_content = read_blob(repo, change.new_oid.unwrap())?;
957            let old_stats = gather_stats(&old_content, context);
958            let new_stats = gather_stats(&new_content, context);
959            (old_stats, new_stats)
960        }
961    };
962
963    let diff = compute_locs_diff(&old_stats, &new_stats);
964
965    Ok(FileDiffStats {
966        path: path.to_path_buf(),
967        change_type: change.change_type,
968        diff,
969    })
970}
971
972/// Compute the diff between two Locs
973fn compute_locs_diff(old: &Locs, new: &Locs) -> LocsDiff {
974    let added = Locs {
975        code: new.code.saturating_sub(old.code),
976        tests: new.tests.saturating_sub(old.tests),
977        examples: new.examples.saturating_sub(old.examples),
978        docs: new.docs.saturating_sub(old.docs),
979        comments: new.comments.saturating_sub(old.comments),
980        blanks: new.blanks.saturating_sub(old.blanks),
981        total: 0,
982    };
983    let removed = Locs {
984        code: old.code.saturating_sub(new.code),
985        tests: old.tests.saturating_sub(new.tests),
986        examples: old.examples.saturating_sub(new.examples),
987        docs: old.docs.saturating_sub(new.docs),
988        comments: old.comments.saturating_sub(new.comments),
989        blanks: old.blanks.saturating_sub(new.blanks),
990        total: 0,
991    };
992    LocsDiff {
993        added: Locs {
994            total: added.code
995                + added.tests
996                + added.examples
997                + added.docs
998                + added.comments
999                + added.blanks,
1000            ..added
1001        },
1002        removed: Locs {
1003            total: removed.code
1004                + removed.tests
1005                + removed.examples
1006                + removed.docs
1007                + removed.comments
1008                + removed.blanks,
1009            ..removed
1010        },
1011    }
1012}
1013
1014/// Count lines in a text string.
1015fn count_lines(content: &str) -> u64 {
1016    content.lines().count() as u64
1017}
1018
1019/// Read a blob's content as a UTF-8 string
1020fn read_blob(repo: &gix::Repository, oid: gix::ObjectId) -> Result<String> {
1021    let object = repo
1022        .find_object(oid)
1023        .map_err(|e| RustlocError::GitError(format!("Failed to find object {}: {}", oid, e)))?;
1024
1025    let blob = object
1026        .try_into_blob()
1027        .map_err(|_| RustlocError::GitError(format!("Object {} is not a blob", oid)))?;
1028
1029    String::from_utf8(blob.data.to_vec())
1030        .or_else(|e| Ok(String::from_utf8_lossy(&e.into_bytes()).into_owned()))
1031}
1032
1033#[cfg(test)]
1034mod tests {
1035    use super::*;
1036
1037    #[test]
1038    fn test_locs_diff_default() {
1039        let diff = LocsDiff::new();
1040        assert_eq!(diff.added.code, 0);
1041        assert_eq!(diff.removed.code, 0);
1042        assert_eq!(diff.net_code(), 0);
1043    }
1044
1045    #[test]
1046    fn test_locs_diff_net() {
1047        let diff = LocsDiff {
1048            added: Locs {
1049                code: 100,
1050                tests: 50,
1051                examples: 20,
1052                docs: 10,
1053                comments: 5,
1054                blanks: 15,
1055                total: 200,
1056            },
1057            removed: Locs {
1058                code: 30,
1059                tests: 20,
1060                examples: 10,
1061                docs: 2,
1062                comments: 1,
1063                blanks: 5,
1064                total: 68,
1065            },
1066        };
1067
1068        assert_eq!(diff.net_code(), 70);
1069        assert_eq!(diff.net_tests(), 30);
1070        assert_eq!(diff.net_examples(), 10);
1071        assert_eq!(diff.net_docs(), 8);
1072        assert_eq!(diff.net_comments(), 4);
1073        assert_eq!(diff.net_blanks(), 10);
1074        assert_eq!(diff.net_total(), 132); // 200 - 68
1075    }
1076
1077    #[test]
1078    fn test_locs_diff_add() {
1079        let a = LocsDiff {
1080            added: Locs {
1081                code: 10,
1082                tests: 5,
1083                examples: 2,
1084                docs: 1,
1085                comments: 1,
1086                blanks: 1,
1087                total: 20,
1088            },
1089            removed: Locs {
1090                code: 5,
1091                tests: 2,
1092                examples: 1,
1093                docs: 0,
1094                comments: 0,
1095                blanks: 0,
1096                total: 8,
1097            },
1098        };
1099        let b = LocsDiff {
1100            added: Locs {
1101                code: 20,
1102                tests: 10,
1103                examples: 4,
1104                docs: 2,
1105                comments: 2,
1106                blanks: 2,
1107                total: 40,
1108            },
1109            removed: Locs {
1110                code: 10,
1111                tests: 5,
1112                examples: 2,
1113                docs: 1,
1114                comments: 1,
1115                blanks: 1,
1116                total: 20,
1117            },
1118        };
1119
1120        let sum = a + b;
1121        assert_eq!(sum.added.code, 30);
1122        assert_eq!(sum.removed.code, 15);
1123        assert_eq!(sum.net_code(), 15);
1124    }
1125
1126    #[test]
1127    fn test_diff_options_builder() {
1128        let options = DiffOptions::new()
1129            .crates(vec!["my-crate".to_string()])
1130            .aggregation(Aggregation::ByFile);
1131
1132        assert_eq!(options.crate_filter, vec!["my-crate"]);
1133        assert_eq!(options.aggregation, Aggregation::ByFile);
1134    }
1135
1136    #[test]
1137    fn test_compute_locs_diff_added_file() {
1138        let old = Locs::new();
1139        let new = Locs {
1140            code: 100,
1141            tests: 0,
1142            examples: 0,
1143            docs: 10,
1144            comments: 5,
1145            blanks: 20,
1146            total: 135,
1147        };
1148
1149        let diff = compute_locs_diff(&old, &new);
1150        assert_eq!(diff.added.code, 100);
1151        assert_eq!(diff.removed.code, 0);
1152        assert_eq!(diff.added.docs, 10);
1153        assert_eq!(diff.removed.docs, 0);
1154    }
1155
1156    #[test]
1157    fn test_compute_locs_diff_deleted_file() {
1158        let old = Locs {
1159            code: 0,
1160            tests: 50,
1161            examples: 0,
1162            docs: 5,
1163            comments: 2,
1164            blanks: 10,
1165            total: 67,
1166        };
1167        let new = Locs::new();
1168
1169        let diff = compute_locs_diff(&old, &new);
1170        assert_eq!(diff.added.tests, 0);
1171        assert_eq!(diff.removed.tests, 50);
1172    }
1173
1174    #[test]
1175    fn test_compute_locs_diff_modified_file() {
1176        let old = Locs {
1177            code: 100,
1178            tests: 0,
1179            examples: 0,
1180            docs: 10,
1181            comments: 5,
1182            blanks: 20,
1183            total: 135,
1184        };
1185        let new = Locs {
1186            code: 120,
1187            tests: 0,
1188            examples: 0,
1189            docs: 8,
1190            comments: 5,
1191            blanks: 25,
1192            total: 158,
1193        };
1194
1195        let diff = compute_locs_diff(&old, &new);
1196        assert_eq!(diff.added.code, 20);
1197        assert_eq!(diff.removed.code, 0);
1198        assert_eq!(diff.added.docs, 0);
1199        assert_eq!(diff.removed.docs, 2);
1200    }
1201
1202    #[test]
1203    fn test_workdir_diff_mode_default() {
1204        assert_eq!(WorkdirDiffMode::default(), WorkdirDiffMode::All);
1205    }
1206
1207    #[test]
1208    fn test_diff_commits_same_commit() {
1209        let result = diff_commits(".", "e3b2667", "e3b2667", DiffOptions::new());
1210        assert!(result.is_ok());
1211        let diff = result.unwrap();
1212        assert_eq!(diff.total.net_total(), 0);
1213    }
1214
1215    #[test]
1216    fn test_diff_commits_invalid_commit() {
1217        let result = diff_commits(".", "invalid_commit_hash", "HEAD", DiffOptions::new());
1218        assert!(result.is_err());
1219    }
1220}