Skip to main content

rustloclib/data/
diff.rs

1//! Git diff analysis for LOC changes between commits and working directory.
2//!
3//! This module provides functionality to compute LOC differences between:
4//! - Two git commits (using `diff_commits`)
5//! - Working directory and HEAD or index (using `diff_workdir`)
6//!
7//! ## Design Principle
8//!
9//! **Filtering (glob patterns, crate names) is done centrally using `FilterConfig`
10//! and `WorkspaceInfo`, not re-implemented here.** This module:
11//!
12//! 1. Gets changed file paths from git
13//! 2. Delegates to `FilterConfig::matches()` for glob filtering
14//! 3. Uses `WorkspaceInfo::crate_for_path()` for crate mapping
15//! 4. Applies crate filter via workspace's existing mechanisms
16
17use std::collections::HashMap;
18use std::path::{Path, PathBuf};
19
20use serde::{Deserialize, Serialize};
21
22use crate::error::RustlocError;
23use crate::query::options::{Aggregation, LineTypes};
24use crate::source::filter::FilterConfig;
25use crate::source::workspace::WorkspaceInfo;
26use crate::Result;
27
28use super::stats::Locs;
29use super::visitor::{gather_stats, VisitorContext};
30
31/// Lines of code diff (added vs removed).
32///
33/// Tracks additions and removals for each of the 6 line types.
34#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
35pub struct LocsDiff {
36    /// Lines added
37    pub added: Locs,
38    /// Lines removed
39    pub removed: Locs,
40}
41
42impl LocsDiff {
43    /// Create a new empty diff.
44    pub fn new() -> Self {
45        Self::default()
46    }
47
48    /// Net change for code lines.
49    pub fn net_code(&self) -> i64 {
50        self.added.code as i64 - self.removed.code as i64
51    }
52
53    /// Net change for test lines.
54    pub fn net_tests(&self) -> i64 {
55        self.added.tests as i64 - self.removed.tests as i64
56    }
57
58    /// Net change for example lines.
59    pub fn net_examples(&self) -> i64 {
60        self.added.examples as i64 - self.removed.examples as i64
61    }
62
63    /// Net change for doc comment lines.
64    pub fn net_docs(&self) -> i64 {
65        self.added.docs as i64 - self.removed.docs as i64
66    }
67
68    /// Net change for regular comment lines.
69    pub fn net_comments(&self) -> i64 {
70        self.added.comments as i64 - self.removed.comments as i64
71    }
72
73    /// Net change for blank lines.
74    pub fn net_blanks(&self) -> i64 {
75        self.added.blanks as i64 - self.removed.blanks as i64
76    }
77
78    /// Net change for total lines.
79    pub fn net_total(&self) -> i64 {
80        self.added.total() as i64 - self.removed.total() as i64
81    }
82
83    /// Return a filtered copy with only the specified line types included.
84    pub fn filter(&self, types: LineTypes) -> Self {
85        Self {
86            added: self.added.filter(types),
87            removed: self.removed.filter(types),
88        }
89    }
90}
91
92impl std::ops::Add for LocsDiff {
93    type Output = Self;
94
95    fn add(self, other: Self) -> Self {
96        Self {
97            added: self.added + other.added,
98            removed: self.removed + other.removed,
99        }
100    }
101}
102
103impl std::ops::AddAssign for LocsDiff {
104    fn add_assign(&mut self, other: Self) {
105        self.added += other.added;
106        self.removed += other.removed;
107    }
108}
109
110/// Diff statistics for a single file.
111#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
112pub struct FileDiffStats {
113    /// Path to the file (relative to repo root).
114    pub path: PathBuf,
115    /// The type of change.
116    pub change_type: FileChangeType,
117    /// LOC diff for this file.
118    pub diff: LocsDiff,
119}
120
121impl FileDiffStats {
122    /// Return a filtered copy with only the specified line types included.
123    pub fn filter(&self, types: LineTypes) -> Self {
124        Self {
125            path: self.path.clone(),
126            change_type: self.change_type,
127            diff: self.diff.filter(types),
128        }
129    }
130}
131
132/// Type of file change in the diff.
133#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
134pub enum FileChangeType {
135    /// File was added.
136    Added,
137    /// File was deleted.
138    Deleted,
139    /// File was modified.
140    Modified,
141}
142
143/// Diff statistics for a crate.
144#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
145pub struct CrateDiffStats {
146    /// Name of the crate.
147    pub name: String,
148    /// Root path of the crate.
149    pub path: PathBuf,
150    /// Aggregated LOC diff.
151    pub diff: LocsDiff,
152    /// Per-file diff (optional, for detailed output).
153    pub files: Vec<FileDiffStats>,
154}
155
156impl CrateDiffStats {
157    /// Create new crate diff stats.
158    pub fn new(name: String, path: PathBuf) -> Self {
159        Self {
160            name,
161            path,
162            diff: LocsDiff::new(),
163            files: Vec::new(),
164        }
165    }
166
167    /// Add file diff to this crate.
168    pub fn add_file(&mut self, file_diff: FileDiffStats) {
169        self.diff += file_diff.diff;
170        self.files.push(file_diff);
171    }
172
173    /// Return a filtered copy with only the specified line types included.
174    pub fn filter(&self, types: LineTypes) -> Self {
175        Self {
176            name: self.name.clone(),
177            path: self.path.clone(),
178            diff: self.diff.filter(types),
179            files: self.files.iter().map(|f| f.filter(types)).collect(),
180        }
181    }
182}
183
184/// Result of a diff operation between two commits.
185#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
186pub struct DiffResult {
187    /// Root path of the repository analyzed.
188    pub root: PathBuf,
189    /// Base commit (from).
190    pub from_commit: String,
191    /// Target commit (to).
192    pub to_commit: String,
193    /// Total diff across all files.
194    pub total: LocsDiff,
195    /// Per-crate diff breakdown.
196    pub crates: Vec<CrateDiffStats>,
197    /// Per-file diff (optional, for detailed output).
198    pub files: Vec<FileDiffStats>,
199    /// Lines added in non-Rust files.
200    #[serde(default)]
201    pub non_rust_added: u64,
202    /// Lines removed in non-Rust files.
203    #[serde(default)]
204    pub non_rust_removed: u64,
205}
206
207impl DiffResult {
208    /// Return a filtered copy with only the specified line types included.
209    pub fn filter(&self, types: LineTypes) -> Self {
210        Self {
211            root: self.root.clone(),
212            from_commit: self.from_commit.clone(),
213            to_commit: self.to_commit.clone(),
214            total: self.total.filter(types),
215            crates: self.crates.iter().map(|c| c.filter(types)).collect(),
216            files: self.files.iter().map(|f| f.filter(types)).collect(),
217            non_rust_added: self.non_rust_added,
218            non_rust_removed: self.non_rust_removed,
219        }
220    }
221}
222
223/// Options for diff computation.
224#[derive(Debug, Clone)]
225pub struct DiffOptions {
226    /// Crate names to include (empty = all crates).
227    pub crate_filter: Vec<String>,
228    /// File filter configuration.
229    pub file_filter: FilterConfig,
230    /// Aggregation level for results.
231    pub aggregation: Aggregation,
232    /// Which line types to include in results.
233    pub line_types: LineTypes,
234}
235
236impl Default for DiffOptions {
237    fn default() -> Self {
238        Self {
239            crate_filter: Vec::new(),
240            file_filter: FilterConfig::new(),
241            aggregation: Aggregation::Total,
242            line_types: LineTypes::default(),
243        }
244    }
245}
246
247impl DiffOptions {
248    /// Create new default options.
249    pub fn new() -> Self {
250        Self::default()
251    }
252
253    /// Filter to specific crates.
254    pub fn crates(mut self, names: Vec<String>) -> Self {
255        self.crate_filter = names;
256        self
257    }
258
259    /// Set file filter.
260    pub fn filter(mut self, config: FilterConfig) -> Self {
261        self.file_filter = config;
262        self
263    }
264
265    /// Set aggregation level.
266    pub fn aggregation(mut self, level: Aggregation) -> Self {
267        self.aggregation = level;
268        self
269    }
270
271    /// Set which line types to include.
272    pub fn line_types(mut self, types: LineTypes) -> Self {
273        self.line_types = types;
274        self
275    }
276}
277
278/// Mode for working directory diff.
279#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
280pub enum WorkdirDiffMode {
281    /// Compare HEAD with working directory (all uncommitted changes).
282    /// This is equivalent to `git diff HEAD`.
283    #[default]
284    All,
285    /// Compare HEAD with the staging area/index (staged changes only).
286    /// This is equivalent to `git diff --cached` or `git diff --staged`.
287    Staged,
288}
289
290/// Compute LOC diff for working directory changes.
291pub fn diff_workdir(
292    repo_path: impl AsRef<Path>,
293    mode: WorkdirDiffMode,
294    options: DiffOptions,
295) -> Result<DiffResult> {
296    let repo_path = repo_path.as_ref();
297
298    // Open the git repository
299    let repo = gix::discover(repo_path)
300        .map_err(|e| RustlocError::GitError(format!("Failed to discover git repository: {}", e)))?;
301
302    let repo_root = repo
303        .work_dir()
304        .ok_or_else(|| RustlocError::GitError("Repository has no work directory".to_string()))?
305        .to_path_buf();
306
307    // Get HEAD commit and its tree
308    let head_commit = repo
309        .head_commit()
310        .map_err(|e| RustlocError::GitError(format!("Failed to get HEAD commit: {}", e)))?;
311
312    let head_tree = head_commit
313        .tree()
314        .map_err(|e| RustlocError::GitError(format!("Failed to get HEAD tree: {}", e)))?;
315
316    // Get the index
317    let index = repo
318        .index()
319        .map_err(|e| RustlocError::GitError(format!("Failed to read index: {}", e)))?;
320
321    // Collect changes based on mode
322    let (changes, non_rust_added, non_rust_removed) = match mode {
323        WorkdirDiffMode::Staged => collect_staged_changes(&repo, &head_tree, &index)?,
324        WorkdirDiffMode::All => collect_workdir_changes(&repo, &head_tree, &repo_root)?,
325    };
326
327    // Try to discover workspace info for crate grouping
328    let workspace = WorkspaceInfo::discover(&repo_root).ok();
329
330    // Apply crate filter if specified
331    let filtered_workspace = workspace.as_ref().map(|ws| {
332        if options.crate_filter.is_empty() {
333            ws.clone()
334        } else {
335            let names: Vec<&str> = options.crate_filter.iter().map(|s| s.as_str()).collect();
336            ws.filter_by_names(&names)
337        }
338    });
339
340    // Process changes
341    let mut total = LocsDiff::new();
342    let mut files = Vec::new();
343    let mut crate_stats: HashMap<String, CrateDiffStats> = HashMap::new();
344
345    // Determine what to include based on aggregation level
346    let include_files = matches!(options.aggregation, Aggregation::ByFile);
347    let include_crates = matches!(
348        options.aggregation,
349        Aggregation::ByCrate | Aggregation::ByFile
350    );
351
352    for change in changes {
353        let path = change.path.clone();
354
355        // Apply glob filter
356        if !options.file_filter.matches(&path) {
357            continue;
358        }
359
360        // Determine which crate this file belongs to
361        let crate_info = filtered_workspace
362            .as_ref()
363            .and_then(|ws| ws.crate_for_path(&path));
364
365        // If crate filter is active and file doesn't belong to a filtered crate, skip
366        if !options.crate_filter.is_empty() && crate_info.is_none() {
367            continue;
368        }
369
370        // Compute file diff
371        let file_diff = compute_workdir_file_diff(&change, &path)?;
372
373        // Aggregate into total
374        total += file_diff.diff;
375
376        // Aggregate into crate stats if applicable
377        if include_crates {
378            if let Some(crate_info) = crate_info {
379                let crate_stats_entry =
380                    crate_stats
381                        .entry(crate_info.name.clone())
382                        .or_insert_with(|| {
383                            CrateDiffStats::new(crate_info.name.clone(), crate_info.root.clone())
384                        });
385
386                if include_files {
387                    crate_stats_entry.add_file(file_diff.clone());
388                } else {
389                    crate_stats_entry.diff += file_diff.diff;
390                }
391            }
392        }
393
394        // Collect file stats if requested
395        if include_files {
396            files.push(file_diff);
397        }
398    }
399
400    // Convert crate stats map to vec
401    let crates: Vec<CrateDiffStats> = crate_stats.into_values().collect();
402
403    // Build result and apply line type filter
404    let (from_label, to_label) = match mode {
405        WorkdirDiffMode::All => ("HEAD", "working tree"),
406        WorkdirDiffMode::Staged => ("HEAD", "index"),
407    };
408
409    let result = DiffResult {
410        root: repo_root,
411        from_commit: from_label.to_string(),
412        to_commit: to_label.to_string(),
413        total,
414        crates,
415        files,
416        non_rust_added,
417        non_rust_removed,
418    };
419
420    Ok(result.filter(options.line_types))
421}
422
423/// Internal representation of a working directory file change
424struct WorkdirFileChange {
425    path: PathBuf,
426    change_type: FileChangeType,
427    old_content: Option<String>,
428    new_content: Option<String>,
429}
430
431/// Collect staged changes (HEAD vs index)
432fn collect_staged_changes(
433    repo: &gix::Repository,
434    head_tree: &gix::Tree<'_>,
435    index: &gix::worktree::Index,
436) -> Result<(Vec<WorkdirFileChange>, u64, u64)> {
437    use std::collections::HashSet;
438
439    let mut changes = Vec::new();
440    let mut seen_paths: HashSet<PathBuf> = HashSet::new();
441    let mut non_rust_added: u64 = 0;
442    let mut non_rust_removed: u64 = 0;
443
444    // Build a map of HEAD tree entries
445    let mut head_entries: HashMap<PathBuf, gix::ObjectId> = HashMap::new();
446    collect_tree_entries(repo, head_tree, PathBuf::new(), &mut head_entries)?;
447
448    // Check each entry in the index against HEAD
449    for entry in index.entries() {
450        let path = PathBuf::from(gix::path::from_bstr(entry.path(index)));
451
452        if path.extension().and_then(|e| e.to_str()) != Some("rs") {
453            // Track non-Rust file line changes
454            let index_oid = entry.id;
455            if let Some(&head_oid) = head_entries.get(&path) {
456                if head_oid != index_oid {
457                    let old_lines = count_lines(&read_blob(repo, head_oid)?);
458                    let new_lines = count_lines(&read_blob(repo, index_oid)?);
459                    non_rust_added += new_lines.saturating_sub(old_lines);
460                    non_rust_removed += old_lines.saturating_sub(new_lines);
461                }
462            } else {
463                non_rust_added += count_lines(&read_blob(repo, index_oid)?);
464            }
465            seen_paths.insert(path);
466            continue;
467        }
468
469        seen_paths.insert(path.clone());
470        let index_oid = entry.id;
471
472        if let Some(&head_oid) = head_entries.get(&path) {
473            if head_oid != index_oid {
474                let old_content = read_blob(repo, head_oid)?;
475                let new_content = read_blob(repo, index_oid)?;
476                changes.push(WorkdirFileChange {
477                    path,
478                    change_type: FileChangeType::Modified,
479                    old_content: Some(old_content),
480                    new_content: Some(new_content),
481                });
482            }
483        } else {
484            let new_content = read_blob(repo, index_oid)?;
485            changes.push(WorkdirFileChange {
486                path,
487                change_type: FileChangeType::Added,
488                old_content: None,
489                new_content: Some(new_content),
490            });
491        }
492    }
493
494    // Check for deleted files
495    for (path, head_oid) in head_entries {
496        if path.extension().and_then(|e| e.to_str()) != Some("rs") {
497            if !seen_paths.contains(&path) {
498                non_rust_removed += count_lines(&read_blob(repo, head_oid)?);
499            }
500            continue;
501        }
502        if !seen_paths.contains(&path) {
503            let old_content = read_blob(repo, head_oid)?;
504            changes.push(WorkdirFileChange {
505                path,
506                change_type: FileChangeType::Deleted,
507                old_content: Some(old_content),
508                new_content: None,
509            });
510        }
511    }
512
513    Ok((changes, non_rust_added, non_rust_removed))
514}
515
516/// Collect all uncommitted changes (HEAD vs working directory)
517fn collect_workdir_changes(
518    repo: &gix::Repository,
519    head_tree: &gix::Tree<'_>,
520    repo_root: &Path,
521) -> Result<(Vec<WorkdirFileChange>, u64, u64)> {
522    use std::collections::HashSet;
523
524    let mut changes = Vec::new();
525    let mut seen_paths: HashSet<PathBuf> = HashSet::new();
526    let mut non_rust_added: u64 = 0;
527    let mut non_rust_removed: u64 = 0;
528
529    // Build a map of HEAD tree entries
530    let mut head_entries: HashMap<PathBuf, gix::ObjectId> = HashMap::new();
531    collect_tree_entries(repo, head_tree, PathBuf::new(), &mut head_entries)?;
532
533    // Get tracked files from index
534    let index = repo
535        .index()
536        .map_err(|e| RustlocError::GitError(format!("Failed to read index: {}", e)))?;
537    let tracked_paths: HashSet<PathBuf> = index
538        .entries()
539        .iter()
540        .map(|e| PathBuf::from(gix::path::from_bstr(e.path(&index))))
541        .collect();
542
543    // Walk the working directory
544    let walker = walkdir::WalkDir::new(repo_root)
545        .into_iter()
546        .filter_entry(|e| {
547            let name = e.file_name().to_str();
548            name.is_none_or(|s| s != ".git" && s != "target")
549        });
550
551    for entry in walker.filter_map(|e| e.ok()) {
552        if !entry.file_type().is_file() {
553            continue;
554        }
555
556        let abs_path = entry.path();
557        let rel_path = abs_path
558            .strip_prefix(repo_root)
559            .unwrap_or(abs_path)
560            .to_path_buf();
561
562        // Skip untracked files
563        if !tracked_paths.contains(&rel_path) && !head_entries.contains_key(&rel_path) {
564            continue;
565        }
566
567        if abs_path.extension().and_then(|e| e.to_str()) != Some("rs") {
568            // Track non-Rust file line changes
569            seen_paths.insert(rel_path.clone());
570            let workdir_content = match std::fs::read_to_string(abs_path) {
571                Ok(content) => content,
572                Err(_) => continue,
573            };
574            let new_lines = count_lines(&workdir_content);
575            if let Some(&head_oid) = head_entries.get(&rel_path) {
576                let old_lines = count_lines(&read_blob(repo, head_oid)?);
577                if old_lines != new_lines {
578                    non_rust_added += new_lines.saturating_sub(old_lines);
579                    non_rust_removed += old_lines.saturating_sub(new_lines);
580                }
581            } else {
582                non_rust_added += new_lines;
583            }
584            continue;
585        }
586
587        seen_paths.insert(rel_path.clone());
588
589        let workdir_content = match std::fs::read_to_string(abs_path) {
590            Ok(content) => content,
591            Err(_) => continue,
592        };
593
594        if let Some(&head_oid) = head_entries.get(&rel_path) {
595            let head_content = read_blob(repo, head_oid)?;
596            if head_content != workdir_content {
597                changes.push(WorkdirFileChange {
598                    path: rel_path,
599                    change_type: FileChangeType::Modified,
600                    old_content: Some(head_content),
601                    new_content: Some(workdir_content),
602                });
603            }
604        } else {
605            changes.push(WorkdirFileChange {
606                path: rel_path,
607                change_type: FileChangeType::Added,
608                old_content: None,
609                new_content: Some(workdir_content),
610            });
611        }
612    }
613
614    // Check for deleted files
615    for (path, head_oid) in head_entries {
616        if path.extension().and_then(|e| e.to_str()) != Some("rs") {
617            if !seen_paths.contains(&path) {
618                non_rust_removed += count_lines(&read_blob(repo, head_oid)?);
619            }
620            continue;
621        }
622        if !seen_paths.contains(&path) {
623            let old_content = read_blob(repo, head_oid)?;
624            changes.push(WorkdirFileChange {
625                path,
626                change_type: FileChangeType::Deleted,
627                old_content: Some(old_content),
628                new_content: None,
629            });
630        }
631    }
632
633    Ok((changes, non_rust_added, non_rust_removed))
634}
635
636/// Recursively collect all blob entries from a tree
637fn collect_tree_entries(
638    repo: &gix::Repository,
639    tree: &gix::Tree<'_>,
640    prefix: PathBuf,
641    entries: &mut HashMap<PathBuf, gix::ObjectId>,
642) -> Result<()> {
643    for entry in tree.iter() {
644        let entry = entry
645            .map_err(|e| RustlocError::GitError(format!("Failed to read tree entry: {}", e)))?;
646
647        let name = gix::path::from_bstr(entry.filename());
648        let path = prefix.join(name);
649
650        if entry.mode().is_blob() {
651            entries.insert(path, entry.oid().to_owned());
652        } else if entry.mode().is_tree() {
653            let subtree = repo
654                .find_object(entry.oid())
655                .map_err(|e| RustlocError::GitError(format!("Failed to find tree: {}", e)))?
656                .try_into_tree()
657                .map_err(|_| RustlocError::GitError("Object is not a tree".to_string()))?;
658            collect_tree_entries(repo, &subtree, path, entries)?;
659        }
660    }
661    Ok(())
662}
663
664/// Compute the LOC diff for a working directory file change
665fn compute_workdir_file_diff(change: &WorkdirFileChange, path: &Path) -> Result<FileDiffStats> {
666    let context = VisitorContext::from_file_path(path);
667
668    let (old_stats, new_stats) = match change.change_type {
669        FileChangeType::Added => {
670            let stats = gather_stats(change.new_content.as_ref().unwrap(), context);
671            (Locs::new(), stats)
672        }
673        FileChangeType::Deleted => {
674            let stats = gather_stats(change.old_content.as_ref().unwrap(), context);
675            (stats, Locs::new())
676        }
677        FileChangeType::Modified => {
678            let old_stats = gather_stats(change.old_content.as_ref().unwrap(), context);
679            let new_stats = gather_stats(change.new_content.as_ref().unwrap(), context);
680            (old_stats, new_stats)
681        }
682    };
683
684    let diff = compute_locs_diff(&old_stats, &new_stats);
685
686    Ok(FileDiffStats {
687        path: path.to_path_buf(),
688        change_type: change.change_type,
689        diff,
690    })
691}
692
693/// Compute LOC diff between two git commits.
694pub fn diff_commits(
695    repo_path: impl AsRef<Path>,
696    from: &str,
697    to: &str,
698    options: DiffOptions,
699) -> Result<DiffResult> {
700    let repo_path = repo_path.as_ref();
701
702    // Open the git repository
703    let repo = gix::discover(repo_path)
704        .map_err(|e| RustlocError::GitError(format!("Failed to discover git repository: {}", e)))?;
705
706    let repo_root = repo
707        .work_dir()
708        .ok_or_else(|| RustlocError::GitError("Repository has no work directory".to_string()))?
709        .to_path_buf();
710
711    // Resolve commit references
712    let from_commit = resolve_commit(&repo, from)?;
713    let to_commit = resolve_commit(&repo, to)?;
714
715    // Get the trees for both commits
716    let from_tree = from_commit
717        .tree()
718        .map_err(|e| RustlocError::GitError(format!("Failed to get tree for '{}': {}", from, e)))?;
719    let to_tree = to_commit
720        .tree()
721        .map_err(|e| RustlocError::GitError(format!("Failed to get tree for '{}': {}", to, e)))?;
722
723    // Compute the diff between trees
724    let changes = compute_tree_diff(&from_tree, &to_tree)?;
725
726    // Try to discover workspace info
727    let workspace = WorkspaceInfo::discover(&repo_root).ok();
728
729    // Apply crate filter
730    let filtered_workspace = workspace.as_ref().map(|ws| {
731        if options.crate_filter.is_empty() {
732            ws.clone()
733        } else {
734            let names: Vec<&str> = options.crate_filter.iter().map(|s| s.as_str()).collect();
735            ws.filter_by_names(&names)
736        }
737    });
738
739    // Process changes
740    let mut total = LocsDiff::new();
741    let mut files = Vec::new();
742    let mut crate_stats: HashMap<String, CrateDiffStats> = HashMap::new();
743    let mut non_rust_added: u64 = 0;
744    let mut non_rust_removed: u64 = 0;
745
746    let include_files = matches!(options.aggregation, Aggregation::ByFile);
747    let include_crates = matches!(
748        options.aggregation,
749        Aggregation::ByCrate | Aggregation::ByFile
750    );
751
752    for change in changes {
753        let path = change.path.clone();
754
755        // Track non-Rust file line changes
756        if path.extension().and_then(|e| e.to_str()) != Some("rs") {
757            let old_lines = change
758                .old_oid
759                .and_then(|oid| read_blob(&repo, oid).ok().map(|c| count_lines(&c)))
760                .unwrap_or(0);
761            let new_lines = change
762                .new_oid
763                .and_then(|oid| read_blob(&repo, oid).ok().map(|c| count_lines(&c)))
764                .unwrap_or(0);
765            non_rust_added += new_lines.saturating_sub(old_lines);
766            non_rust_removed += old_lines.saturating_sub(new_lines);
767            continue;
768        }
769
770        if !options.file_filter.matches(&path) {
771            continue;
772        }
773
774        let crate_info = filtered_workspace
775            .as_ref()
776            .and_then(|ws| ws.crate_for_path(&path));
777
778        if !options.crate_filter.is_empty() && crate_info.is_none() {
779            continue;
780        }
781
782        let file_diff = compute_file_diff(&repo, &change, &path)?;
783
784        total += file_diff.diff;
785
786        if include_crates {
787            if let Some(crate_info) = crate_info {
788                let crate_stats_entry =
789                    crate_stats
790                        .entry(crate_info.name.clone())
791                        .or_insert_with(|| {
792                            CrateDiffStats::new(crate_info.name.clone(), crate_info.root.clone())
793                        });
794
795                if include_files {
796                    crate_stats_entry.add_file(file_diff.clone());
797                } else {
798                    crate_stats_entry.diff += file_diff.diff;
799                }
800            }
801        }
802
803        if include_files {
804            files.push(file_diff);
805        }
806    }
807
808    let crates: Vec<CrateDiffStats> = crate_stats.into_values().collect();
809
810    let result = DiffResult {
811        root: repo_root,
812        from_commit: from.to_string(),
813        to_commit: to.to_string(),
814        total,
815        crates,
816        files,
817        non_rust_added,
818        non_rust_removed,
819    };
820
821    Ok(result.filter(options.line_types))
822}
823
824/// Internal representation of a file change
825struct FileChange {
826    path: PathBuf,
827    change_type: FileChangeType,
828    old_oid: Option<gix::ObjectId>,
829    new_oid: Option<gix::ObjectId>,
830}
831
832/// Resolve a commit reference to a commit object
833fn resolve_commit<'repo>(
834    repo: &'repo gix::Repository,
835    reference: &str,
836) -> Result<gix::Commit<'repo>> {
837    let id = repo
838        .rev_parse_single(reference.as_bytes())
839        .map_err(|e| RustlocError::GitError(format!("Failed to resolve '{}': {}", reference, e)))?
840        .detach();
841
842    repo.find_commit(id).map_err(|e| {
843        RustlocError::GitError(format!("Failed to find commit '{}': {}", reference, e))
844    })
845}
846
847/// Compute the diff between two trees
848fn compute_tree_diff(
849    from_tree: &gix::Tree<'_>,
850    to_tree: &gix::Tree<'_>,
851) -> Result<Vec<FileChange>> {
852    use gix::object::tree::diff::Action;
853
854    let mut changes = Vec::new();
855
856    from_tree
857        .changes()
858        .map_err(|e| RustlocError::GitError(format!("Failed to get tree changes: {}", e)))?
859        .for_each_to_obtain_tree(to_tree, |change| {
860            use gix::object::tree::diff::Change;
861
862            let file_change = match change {
863                Change::Addition {
864                    entry_mode,
865                    id,
866                    location,
867                    ..
868                } => {
869                    if entry_mode.is_blob() {
870                        Some(FileChange {
871                            path: PathBuf::from(gix::path::from_bstr(location)),
872                            change_type: FileChangeType::Added,
873                            old_oid: None,
874                            new_oid: Some(id.detach()),
875                        })
876                    } else {
877                        None
878                    }
879                }
880                Change::Deletion {
881                    entry_mode,
882                    id,
883                    location,
884                    ..
885                } => {
886                    if entry_mode.is_blob() {
887                        Some(FileChange {
888                            path: PathBuf::from(gix::path::from_bstr(location)),
889                            change_type: FileChangeType::Deleted,
890                            old_oid: Some(id.detach()),
891                            new_oid: None,
892                        })
893                    } else {
894                        None
895                    }
896                }
897                Change::Modification {
898                    previous_entry_mode,
899                    entry_mode,
900                    previous_id,
901                    id,
902                    location,
903                    ..
904                } => {
905                    if entry_mode.is_blob() && previous_entry_mode.is_blob() {
906                        Some(FileChange {
907                            path: PathBuf::from(gix::path::from_bstr(location)),
908                            change_type: FileChangeType::Modified,
909                            old_oid: Some(previous_id.detach()),
910                            new_oid: Some(id.detach()),
911                        })
912                    } else {
913                        None
914                    }
915                }
916                Change::Rewrite { .. } => None,
917            };
918
919            if let Some(fc) = file_change {
920                changes.push(fc);
921            }
922            Ok::<_, std::convert::Infallible>(Action::Continue)
923        })
924        .map_err(|e| RustlocError::GitError(format!("Failed to compute tree diff: {}", e)))?;
925
926    Ok(changes)
927}
928
929/// Compute the LOC diff for a single file
930fn compute_file_diff(
931    repo: &gix::Repository,
932    change: &FileChange,
933    path: &Path,
934) -> Result<FileDiffStats> {
935    let context = VisitorContext::from_file_path(path);
936
937    let (old_stats, new_stats) = match change.change_type {
938        FileChangeType::Added => {
939            let content = read_blob(repo, change.new_oid.unwrap())?;
940            let stats = gather_stats(&content, context);
941            (Locs::new(), stats)
942        }
943        FileChangeType::Deleted => {
944            let content = read_blob(repo, change.old_oid.unwrap())?;
945            let stats = gather_stats(&content, context);
946            (stats, Locs::new())
947        }
948        FileChangeType::Modified => {
949            let old_content = read_blob(repo, change.old_oid.unwrap())?;
950            let new_content = read_blob(repo, change.new_oid.unwrap())?;
951            let old_stats = gather_stats(&old_content, context);
952            let new_stats = gather_stats(&new_content, context);
953            (old_stats, new_stats)
954        }
955    };
956
957    let diff = compute_locs_diff(&old_stats, &new_stats);
958
959    Ok(FileDiffStats {
960        path: path.to_path_buf(),
961        change_type: change.change_type,
962        diff,
963    })
964}
965
966/// Compute the diff between two Locs
967fn compute_locs_diff(old: &Locs, new: &Locs) -> LocsDiff {
968    LocsDiff {
969        added: Locs {
970            code: new.code.saturating_sub(old.code),
971            tests: new.tests.saturating_sub(old.tests),
972            examples: new.examples.saturating_sub(old.examples),
973            docs: new.docs.saturating_sub(old.docs),
974            comments: new.comments.saturating_sub(old.comments),
975            blanks: new.blanks.saturating_sub(old.blanks),
976            total: new.total.saturating_sub(old.total),
977        },
978        removed: Locs {
979            code: old.code.saturating_sub(new.code),
980            tests: old.tests.saturating_sub(new.tests),
981            examples: old.examples.saturating_sub(new.examples),
982            docs: old.docs.saturating_sub(new.docs),
983            comments: old.comments.saturating_sub(new.comments),
984            blanks: old.blanks.saturating_sub(new.blanks),
985            total: old.total.saturating_sub(new.total),
986        },
987    }
988}
989
990/// Count lines in a text string.
991fn count_lines(content: &str) -> u64 {
992    content.lines().count() as u64
993}
994
995/// Read a blob's content as a UTF-8 string
996fn read_blob(repo: &gix::Repository, oid: gix::ObjectId) -> Result<String> {
997    let object = repo
998        .find_object(oid)
999        .map_err(|e| RustlocError::GitError(format!("Failed to find object {}: {}", oid, e)))?;
1000
1001    let blob = object
1002        .try_into_blob()
1003        .map_err(|_| RustlocError::GitError(format!("Object {} is not a blob", oid)))?;
1004
1005    String::from_utf8(blob.data.to_vec())
1006        .or_else(|e| Ok(String::from_utf8_lossy(&e.into_bytes()).into_owned()))
1007}
1008
1009#[cfg(test)]
1010mod tests {
1011    use super::*;
1012
1013    #[test]
1014    fn test_locs_diff_default() {
1015        let diff = LocsDiff::new();
1016        assert_eq!(diff.added.code, 0);
1017        assert_eq!(diff.removed.code, 0);
1018        assert_eq!(diff.net_code(), 0);
1019    }
1020
1021    #[test]
1022    fn test_locs_diff_net() {
1023        let diff = LocsDiff {
1024            added: Locs {
1025                code: 100,
1026                tests: 50,
1027                examples: 20,
1028                docs: 10,
1029                comments: 5,
1030                blanks: 15,
1031                total: 200,
1032            },
1033            removed: Locs {
1034                code: 30,
1035                tests: 20,
1036                examples: 10,
1037                docs: 2,
1038                comments: 1,
1039                blanks: 5,
1040                total: 68,
1041            },
1042        };
1043
1044        assert_eq!(diff.net_code(), 70);
1045        assert_eq!(diff.net_tests(), 30);
1046        assert_eq!(diff.net_examples(), 10);
1047        assert_eq!(diff.net_docs(), 8);
1048        assert_eq!(diff.net_comments(), 4);
1049        assert_eq!(diff.net_blanks(), 10);
1050        assert_eq!(diff.net_total(), 132); // 200 - 68
1051    }
1052
1053    #[test]
1054    fn test_locs_diff_add() {
1055        let a = LocsDiff {
1056            added: Locs {
1057                code: 10,
1058                tests: 5,
1059                examples: 2,
1060                docs: 1,
1061                comments: 1,
1062                blanks: 1,
1063                total: 20,
1064            },
1065            removed: Locs {
1066                code: 5,
1067                tests: 2,
1068                examples: 1,
1069                docs: 0,
1070                comments: 0,
1071                blanks: 0,
1072                total: 8,
1073            },
1074        };
1075        let b = LocsDiff {
1076            added: Locs {
1077                code: 20,
1078                tests: 10,
1079                examples: 4,
1080                docs: 2,
1081                comments: 2,
1082                blanks: 2,
1083                total: 40,
1084            },
1085            removed: Locs {
1086                code: 10,
1087                tests: 5,
1088                examples: 2,
1089                docs: 1,
1090                comments: 1,
1091                blanks: 1,
1092                total: 20,
1093            },
1094        };
1095
1096        let sum = a + b;
1097        assert_eq!(sum.added.code, 30);
1098        assert_eq!(sum.removed.code, 15);
1099        assert_eq!(sum.net_code(), 15);
1100    }
1101
1102    #[test]
1103    fn test_diff_options_builder() {
1104        let options = DiffOptions::new()
1105            .crates(vec!["my-crate".to_string()])
1106            .aggregation(Aggregation::ByFile);
1107
1108        assert_eq!(options.crate_filter, vec!["my-crate"]);
1109        assert_eq!(options.aggregation, Aggregation::ByFile);
1110    }
1111
1112    #[test]
1113    fn test_compute_locs_diff_added_file() {
1114        let old = Locs::new();
1115        let new = Locs {
1116            code: 100,
1117            tests: 0,
1118            examples: 0,
1119            docs: 10,
1120            comments: 5,
1121            blanks: 20,
1122            total: 135,
1123        };
1124
1125        let diff = compute_locs_diff(&old, &new);
1126        assert_eq!(diff.added.code, 100);
1127        assert_eq!(diff.removed.code, 0);
1128        assert_eq!(diff.added.docs, 10);
1129        assert_eq!(diff.removed.docs, 0);
1130    }
1131
1132    #[test]
1133    fn test_compute_locs_diff_deleted_file() {
1134        let old = Locs {
1135            code: 0,
1136            tests: 50,
1137            examples: 0,
1138            docs: 5,
1139            comments: 2,
1140            blanks: 10,
1141            total: 67,
1142        };
1143        let new = Locs::new();
1144
1145        let diff = compute_locs_diff(&old, &new);
1146        assert_eq!(diff.added.tests, 0);
1147        assert_eq!(diff.removed.tests, 50);
1148    }
1149
1150    #[test]
1151    fn test_compute_locs_diff_modified_file() {
1152        let old = Locs {
1153            code: 100,
1154            tests: 0,
1155            examples: 0,
1156            docs: 10,
1157            comments: 5,
1158            blanks: 20,
1159            total: 135,
1160        };
1161        let new = Locs {
1162            code: 120,
1163            tests: 0,
1164            examples: 0,
1165            docs: 8,
1166            comments: 5,
1167            blanks: 25,
1168            total: 158,
1169        };
1170
1171        let diff = compute_locs_diff(&old, &new);
1172        assert_eq!(diff.added.code, 20);
1173        assert_eq!(diff.removed.code, 0);
1174        assert_eq!(diff.added.docs, 0);
1175        assert_eq!(diff.removed.docs, 2);
1176    }
1177
1178    #[test]
1179    fn test_workdir_diff_mode_default() {
1180        assert_eq!(WorkdirDiffMode::default(), WorkdirDiffMode::All);
1181    }
1182
1183    #[test]
1184    fn test_diff_commits_same_commit() {
1185        let result = diff_commits(".", "e3b2667", "e3b2667", DiffOptions::new());
1186        assert!(result.is_ok());
1187        let diff = result.unwrap();
1188        assert_eq!(diff.total.net_total(), 0);
1189    }
1190
1191    #[test]
1192    fn test_diff_commits_invalid_commit() {
1193        let result = diff_commits(".", "invalid_commit_hash", "HEAD", DiffOptions::new());
1194        assert!(result.is_err());
1195    }
1196}