sloc_core/
lib.rs

1// SPDX-License-Identifier: AGPL-3.0-or-later
2// Copyright (C) 2026 Nima Shafie <nimzshafie@gmail.com>
3#![allow(clippy::multiple_crate_versions)]
4
5pub mod baseline;
6pub mod coverage;
7pub mod delta;
8pub mod history;
9pub use baseline::{check_against_baseline, resolve_baselines_path, BaselineEntry, BaselineStore};
10pub use coverage::{aggregate_line_coverage, lookup_coverage, parse_lcov, FileCoverage};
11pub use delta::{
12    compute_delta, compute_multi_delta, FileChangeStatus, FileDelta, MultiFileDelta,
13    MultiScanComparison, MultiScanPoint, ScanComparison, SummaryDelta,
14};
15pub use history::{
16    CleanupPolicy, CleanupPolicyStore, RegistryEntry, ScanRegistry, ScanSummarySnapshot,
17    WatchedDirsStore,
18};
19
20use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
21use std::fs;
22use std::path::{Path, PathBuf};
23use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
24use std::sync::Arc;
25
26use anyhow::{Context, Result};
27use chrono::{DateTime, Utc};
28use encoding_rs::{UTF_16BE, UTF_16LE, WINDOWS_1252};
29use globset::{Glob, GlobSet, GlobSetBuilder};
30use ignore::WalkBuilder;
31use serde::{Deserialize, Serialize};
32use uuid::Uuid;
33
34use sloc_config::{
35    AppConfig, BinaryFileBehavior, BlankInBlockCommentPolicy, ContinuationLinePolicy,
36    FailureBehavior, MixedLinePolicy,
37};
38use sloc_languages::style::IndentStyle;
39use sloc_languages::{
40    analyze_text, detect_language, supported_languages, AnalysisOptions, Language, ParseMode,
41    RawLineCounts, StyleAnalysis, StyleLangScope,
42};
43
44// ── Detection sample sizes and thresholds ────────────────────────────────────
45
46/// Maximum number of worker threads used for parallel file analysis.
47const MAX_ANALYSIS_THREADS: usize = 16;
48/// Fallback thread count when `available_parallelism` is unavailable.
49const DEFAULT_ANALYSIS_THREADS: usize = 4;
50/// Byte sample used to detect `@generated` markers.
51const GENERATED_SAMPLE_BYTES: usize = 1024;
52/// Byte sample used to detect minified files via line-length heuristic.
53const MINIFIED_SAMPLE_BYTES: usize = 4096;
54/// Longest line length above which a file is considered minified.
55const MINIFIED_LINE_THRESHOLD: usize = 2000;
56/// Byte sample used to detect binary files via null-byte scan.
57const BINARY_SAMPLE_BYTES: usize = 8192;
58
59/// Atomics shared between `analyze()` and the caller so the caller can poll scan progress.
60pub struct ProgressCounters {
61    /// Number of candidate files processed so far (incremented per file, across all threads).
62    pub files_done: Arc<AtomicUsize>,
63    /// Total candidate files discovered (set before parallel analysis begins).
64    pub files_total: Arc<AtomicUsize>,
65}
66
67/// Three-way outcome for metadata-level policy checks.
68enum MetadataPolicyOutcome {
69    /// Skip this file — include the record in output.
70    Skip(Box<FileRecord>),
71    /// Exclude this file entirely — no record in output (include-glob miss).
72    Exclude,
73    /// Continue to content checks.
74    Continue,
75}
76
77#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
78#[serde(rename_all = "snake_case")]
79pub enum FileStatus {
80    AnalyzedExact,
81    AnalyzedBestEffort,
82    SkippedBinary,
83    SkippedDecodeError,
84    SkippedUnsupported,
85    SkippedByPolicy,
86    ErrorInternal,
87}
88
89/// COCOMO I (Basic) project mode — determines the a/b/c/d exponent coefficients.
90#[derive(Debug, Clone, Copy, Serialize, Deserialize, Default, PartialEq, Eq)]
91#[serde(rename_all = "snake_case")]
92pub enum CocomoMode {
93    /// Small team, familiar domain. Effort = 2.4 × KSLOC^1.05.
94    #[default]
95    Organic,
96    /// Mixed constraints. Effort = 3.0 × KSLOC^1.12.
97    SemiDetached,
98    /// Tight hardware/OS constraints. Effort = 3.6 × KSLOC^1.20.
99    Embedded,
100}
101
102/// COCOMO I (Basic) cost-estimation result derived from total code SLOC.
103#[derive(Debug, Clone, Serialize, Deserialize)]
104pub struct CocomoEstimate {
105    pub mode: CocomoMode,
106    /// Input: code lines in thousands (KSLOC).
107    pub ksloc: f64,
108    /// Estimated development effort in person-months.
109    pub effort_person_months: f64,
110    /// Estimated schedule duration in months.
111    pub duration_months: f64,
112    /// Average team size (effort ÷ duration).
113    pub avg_staff: f64,
114}
115
116#[derive(Debug, Clone, Serialize, Deserialize, Default)]
117pub struct EffectiveCounts {
118    pub code_lines: u64,
119    pub comment_lines: u64,
120    pub blank_lines: u64,
121    pub mixed_lines_separate: u64,
122}
123
124#[derive(Debug, Clone, Serialize, Deserialize)]
125pub struct ToolMetadata {
126    pub name: String,
127    pub version: String,
128    pub run_id: String,
129    pub timestamp_utc: DateTime<Utc>,
130}
131
132#[derive(Debug, Clone, Serialize, Deserialize)]
133pub struct EnvironmentMetadata {
134    pub operating_system: String,
135    pub architecture: String,
136    pub runtime_mode: String,
137    pub initiator_username: String,
138    pub initiator_hostname: String,
139    /// CI system name when the scan runs inside a known CI environment (Jenkins,
140    /// GitHub Actions, GitLab CI, …). `None` for interactive / local runs.
141    #[serde(default, skip_serializing_if = "Option::is_none")]
142    pub ci_name: Option<String>,
143}
144
145#[derive(Debug, Clone, Serialize, Deserialize, Default)]
146pub struct SummaryTotals {
147    pub files_considered: u64,
148    pub files_analyzed: u64,
149    pub files_skipped: u64,
150    pub total_physical_lines: u64,
151    pub code_lines: u64,
152    pub comment_lines: u64,
153    pub blank_lines: u64,
154    pub mixed_lines_separate: u64,
155    #[serde(default)]
156    pub functions: u64,
157    #[serde(default)]
158    pub classes: u64,
159    #[serde(default)]
160    pub variables: u64,
161    #[serde(default)]
162    pub imports: u64,
163    #[serde(default)]
164    pub test_count: u64,
165    /// Lexically detected test assertion call lines across all analyzed files.
166    #[serde(default)]
167    pub test_assertion_count: u64,
168    /// Lexically detected test suite / fixture / group declaration lines across all analyzed files.
169    #[serde(default)]
170    pub test_suite_count: u64,
171    /// Aggregated from LCOV data when provided.
172    #[serde(default)]
173    pub coverage_lines_found: u64,
174    #[serde(default)]
175    pub coverage_lines_hit: u64,
176    #[serde(default)]
177    pub coverage_functions_found: u64,
178    #[serde(default)]
179    pub coverage_functions_hit: u64,
180    #[serde(default)]
181    pub coverage_branches_found: u64,
182    #[serde(default)]
183    pub coverage_branches_hit: u64,
184    /// Sum of per-file cyclomatic complexity scores across all analyzed files.
185    #[serde(default)]
186    pub cyclomatic_complexity: u64,
187    /// Total logical SLOC across files that support it; `None` if no files produced LSLOC.
188    #[serde(default, skip_serializing_if = "Option::is_none")]
189    pub lsloc: Option<u64>,
190}
191
192#[derive(Debug, Clone, Serialize, Deserialize)]
193pub struct LanguageSummary {
194    pub language: Language,
195    pub files: u64,
196    pub total_physical_lines: u64,
197    pub code_lines: u64,
198    pub comment_lines: u64,
199    pub blank_lines: u64,
200    pub mixed_lines_separate: u64,
201    #[serde(default)]
202    pub functions: u64,
203    #[serde(default)]
204    pub classes: u64,
205    #[serde(default)]
206    pub variables: u64,
207    #[serde(default)]
208    pub imports: u64,
209    #[serde(default)]
210    pub test_count: u64,
211    #[serde(default)]
212    pub test_assertion_count: u64,
213    #[serde(default)]
214    pub test_suite_count: u64,
215    #[serde(default)]
216    pub coverage_lines_found: u64,
217    #[serde(default)]
218    pub coverage_lines_hit: u64,
219    #[serde(default)]
220    pub coverage_functions_found: u64,
221    #[serde(default)]
222    pub coverage_functions_hit: u64,
223    #[serde(default)]
224    pub coverage_branches_found: u64,
225    #[serde(default)]
226    pub coverage_branches_hit: u64,
227    #[serde(default)]
228    pub cyclomatic_complexity: u64,
229    #[serde(default, skip_serializing_if = "Option::is_none")]
230    pub lsloc: Option<u64>,
231}
232
233#[derive(Debug, Clone, Serialize, Deserialize)]
234pub struct FileRecord {
235    pub path: String,
236    pub relative_path: String,
237    pub language: Option<Language>,
238    pub size_bytes: u64,
239    pub detected_encoding: Option<String>,
240    pub raw_line_categories: RawLineCounts,
241    pub effective_counts: EffectiveCounts,
242    pub status: FileStatus,
243    pub warnings: Vec<String>,
244    pub generated: bool,
245    pub minified: bool,
246    pub vendor: bool,
247    pub parse_mode: Option<ParseMode>,
248    #[serde(skip_serializing_if = "Option::is_none")]
249    pub submodule: Option<String>,
250    /// Line/function/branch coverage from an external LCOV file, when provided.
251    #[serde(default, skip_serializing_if = "Option::is_none")]
252    pub coverage: Option<FileCoverage>,
253    /// Lexical style-guide adherence analysis; `None` for unsupported languages.
254    #[serde(default, skip_serializing_if = "Option::is_none")]
255    pub style_analysis: Option<StyleAnalysis>,
256    /// Cyclomatic complexity approximation for this file (sum of branch decision keywords).
257    #[serde(default, skip_serializing_if = "Option::is_none")]
258    pub cyclomatic_complexity: Option<u32>,
259    /// Logical SLOC estimate; `None` when the language does not support lexical LSLOC.
260    #[serde(default, skip_serializing_if = "Option::is_none")]
261    pub lsloc: Option<u32>,
262    /// Git commit-count in the configured activity window that touched this file.
263    /// `None` unless `analysis.activity_window_days` is set and the root is a git repo.
264    /// Powers the hotspots view; distinct from the web layer's scan-to-scan churn rate.
265    #[serde(default, skip_serializing_if = "Option::is_none")]
266    pub commit_count: Option<u32>,
267    /// ISO-8601 date of the most recent commit touching this file within the window.
268    #[serde(default, skip_serializing_if = "Option::is_none")]
269    pub last_commit_date: Option<String>,
270    /// SHA-256 (first 8 bytes as u64) of raw file bytes — used for duplicate detection.
271    /// Not serialized; consumed in-process during `assemble_run`.
272    #[serde(skip)]
273    pub content_hash: u64,
274}
275
276/// Per-language-family style aggregation within a `StyleSummary`.
277#[derive(Debug, Clone, Serialize, Deserialize)]
278pub struct LanguageStyleGroup {
279    /// Display label, e.g. `"C / C++"`, `"Python"`, `"JavaScript"`.
280    pub language_family: String,
281    /// Number of files in this group.
282    pub files_count: u32,
283    /// Name of the guide with the highest average adherence.
284    pub dominant_guide: String,
285    /// Average adherence of the dominant guide (0–100).
286    pub dominant_score_pct: u8,
287    /// Most common indent style across the group.
288    pub common_indent_style: String,
289    /// Average guide adherence scores (guide name, 0–100) sorted descending.
290    pub guide_avg_scores: Vec<(String, u8)>,
291    /// Percentage of files (0–100) where ≤ 5 % of lines exceed the configured column threshold.
292    pub line80_compliant_pct: u8,
293    /// Same as `line80_compliant_pct` but named for the actual configured threshold.
294    pub line_col_compliant_pct: u8,
295}
296
297/// Aggregate multi-language style-guide adherence across all analysed files.
298#[derive(Debug, Clone, Serialize, Deserialize)]
299pub struct StyleSummary {
300    /// Total files for which style data was produced.
301    pub files_analyzed: u32,
302    /// Most common indent style across *all* analysed files.
303    pub common_indent_style: String,
304    /// Percentage of all analysed files (0–100) with ≤ 5 % of lines over 80 chars (legacy, always 80).
305    pub line80_compliant_pct: u8,
306    /// Percentage of all analysed files (0–100) with ≤ 5 % of lines over `col_threshold` chars.
307    pub line_col_compliant_pct: u8,
308    /// Column-width threshold used for `line_col_compliant_pct` (from `analysis.style_col_threshold`).
309    pub col_threshold: u16,
310    /// Per-language-family breakdown, sorted by `files_count` descending.
311    pub by_language: Vec<LanguageStyleGroup>,
312}
313
314/// Backward-compatible alias kept so that `sloc-report` and `sloc-web` can migrate
315/// incrementally without a breaking change on the same release.
316pub type CppStyleSummary = StyleSummary;
317
318/// Per-submodule aggregated stats produced when `submodule_breakdown` is enabled.
319#[derive(Debug, Clone, Serialize, Deserialize)]
320pub struct SubmoduleSummary {
321    pub name: String,
322    pub relative_path: String,
323    pub files_analyzed: u64,
324    pub total_physical_lines: u64,
325    pub code_lines: u64,
326    pub comment_lines: u64,
327    pub blank_lines: u64,
328    pub language_summaries: Vec<LanguageSummary>,
329    /// Short commit SHA (7 chars) of the submodule's own HEAD at scan time.
330    #[serde(default, skip_serializing_if = "Option::is_none")]
331    pub git_commit_short: Option<String>,
332    /// Full commit SHA of the submodule's own HEAD at scan time.
333    #[serde(default, skip_serializing_if = "Option::is_none")]
334    pub git_commit_long: Option<String>,
335    /// Branch name active in the submodule at scan time.
336    #[serde(default, skip_serializing_if = "Option::is_none")]
337    pub git_branch: Option<String>,
338    /// Author of the submodule's most recent commit at scan time.
339    #[serde(default, skip_serializing_if = "Option::is_none")]
340    pub git_commit_author: Option<String>,
341    /// ISO 8601 author-date of the submodule's most recent commit.
342    #[serde(default, skip_serializing_if = "Option::is_none")]
343    pub git_commit_date: Option<String>,
344    /// URL of the submodule's `origin` remote as recorded in its `.git/config`.
345    #[serde(default, skip_serializing_if = "Option::is_none")]
346    pub git_remote_url: Option<String>,
347}
348
349#[derive(Debug, Clone, Serialize, Deserialize)]
350pub struct AnalysisRun {
351    pub tool: ToolMetadata,
352    pub environment: EnvironmentMetadata,
353    pub effective_configuration: AppConfig,
354    pub input_roots: Vec<String>,
355    pub summary_totals: SummaryTotals,
356    pub totals_by_language: Vec<LanguageSummary>,
357    pub per_file_records: Vec<FileRecord>,
358    pub skipped_file_records: Vec<FileRecord>,
359    pub warnings: Vec<String>,
360    /// Non-empty only when `discovery.submodule_breakdown` is enabled.
361    #[serde(default, skip_serializing_if = "Vec::is_empty")]
362    pub submodule_summaries: Vec<SubmoduleSummary>,
363    /// Short git commit SHA (7 chars) at scan time, if the project is a git repo.
364    #[serde(default, skip_serializing_if = "Option::is_none")]
365    pub git_commit_short: Option<String>,
366    /// Full git commit SHA at scan time, if the project is a git repo.
367    #[serde(default, skip_serializing_if = "Option::is_none")]
368    pub git_commit_long: Option<String>,
369    /// Git branch active at scan time, if the project is a git repo.
370    #[serde(default, skip_serializing_if = "Option::is_none")]
371    pub git_branch: Option<String>,
372    /// Author of the last git commit at scan time.
373    #[serde(default, skip_serializing_if = "Option::is_none")]
374    pub git_commit_author: Option<String>,
375    /// Comma-separated git tags pointing at HEAD at scan time.
376    #[serde(default, skip_serializing_if = "Option::is_none")]
377    pub git_tags: Option<String>,
378    /// Nearest ancestor release tag (output of `git describe --tags --abbrev=0`).
379    #[serde(default, skip_serializing_if = "Option::is_none")]
380    pub git_nearest_tag: Option<String>,
381    /// ISO 8601 author-date of the last git commit at scan time.
382    #[serde(default, skip_serializing_if = "Option::is_none")]
383    pub git_commit_date: Option<String>,
384    /// URL of the `origin` remote as recorded in `.git/config` at scan time.
385    #[serde(default, skip_serializing_if = "Option::is_none")]
386    pub git_remote_url: Option<String>,
387    /// Multi-language style-guide adherence; `None` when no supported files were analysed.
388    #[serde(default, skip_serializing_if = "Option::is_none")]
389    pub style_summary: Option<StyleSummary>,
390    /// COCOMO I (Basic) effort/schedule estimate derived from total code SLOC.
391    #[serde(default, skip_serializing_if = "Option::is_none")]
392    pub cocomo: Option<CocomoEstimate>,
393    /// Unique Lines of Code: count of distinct non-blank code lines across all analyzed files.
394    #[serde(default)]
395    pub uloc: u64,
396    /// `DRYness` percentage: `uloc / total_code_lines × 100`. `None` when code lines = 0.
397    #[serde(default, skip_serializing_if = "Option::is_none")]
398    pub dryness_pct: Option<f32>,
399    /// Groups of files with identical content (relative paths). Only non-singleton groups included.
400    #[serde(default, skip_serializing_if = "Vec::is_empty")]
401    pub duplicate_groups: Vec<Vec<String>>,
402    /// Number of duplicate files excluded from SLOC totals (when `exclude_duplicates` is set).
403    #[serde(default)]
404    pub duplicates_excluded: usize,
405}
406
407#[derive(Default)]
408struct GitInfo {
409    commit_short: Option<String>,
410    commit_long: Option<String>,
411    branch: Option<String>,
412    author: Option<String>,
413    tags: Option<String>,
414    nearest_tag: Option<String>,
415    commit_date: Option<String>,
416    remote_url: Option<String>,
417}
418
419/// Locate the `.git` directory by walking up from `start`.
420/// Handles plain repos, worktrees (`.git` is a file with `gitdir:` pointer), and
421/// submodules. Returns `None` if no git repo is found.
422fn find_git_dir(start: &Path) -> Option<PathBuf> {
423    let mut current = Some(start);
424    while let Some(dir) = current {
425        let candidate = dir.join(".git");
426        if candidate.is_dir() {
427            return Some(candidate);
428        }
429        if candidate.is_file() {
430            if let Some(resolved) = resolve_git_file_pointer(&candidate, dir) {
431                return Some(resolved);
432            }
433        }
434        current = dir.parent();
435    }
436    None
437}
438
439/// Resolve a `.git` *file* (worktree/submodule pointer) to the absolute path it
440/// points to. Returns `None` if the file is unreadable or lacks a `gitdir:` line,
441/// or if the resolved path is not an existing directory.
442fn resolve_git_file_pointer(file: &Path, base_dir: &Path) -> Option<PathBuf> {
443    let content = fs::read_to_string(file).ok()?;
444    let ptr = content.trim().strip_prefix("gitdir: ")?;
445    // Normalise forward-slash paths to the OS separator so that Path operations
446    // (join, exists, canonicalize) work correctly on Windows.
447    let ptr_native = ptr.replace('/', std::path::MAIN_SEPARATOR_STR);
448    let resolved = if Path::new(&ptr_native).is_absolute() {
449        PathBuf::from(&ptr_native)
450    } else {
451        base_dir.join(&ptr_native)
452    };
453    // canonicalize resolves ".." components and symlinks; fall back to the
454    // un-canonicalized path if it fails (e.g. some Windows configurations
455    // return a UNC "\\?\" prefix that confuses later path operations).
456    let final_path = resolved.canonicalize().unwrap_or(resolved);
457    if final_path.is_dir() {
458        Some(final_path)
459    } else {
460        None
461    }
462}
463
464/// Resolve a git ref name (e.g. `refs/heads/main`) to a full 40-char commit SHA.
465/// Checks loose ref files first, then `packed-refs`.
466fn resolve_ref(git_dir: &Path, refname: &str) -> Option<String> {
467    // Build the OS-native path to the loose ref file by joining each
468    // forward-slash component individually.  This produces the correct
469    // separator on every platform without any manual replacement.
470    let ref_path = refname
471        .split('/')
472        .fold(git_dir.to_path_buf(), |p, c| p.join(c));
473    if ref_path.exists() {
474        let sha = fs::read_to_string(&ref_path)
475            .ok()
476            .map(|s| s.trim().to_string())
477            .filter(|s| s.len() >= 40 && s.chars().all(|c| c.is_ascii_hexdigit()));
478        if sha.is_some() {
479            return sha;
480        }
481    }
482    // Packed refs: each line is "<sha> <refname>" (lines starting with '#' are
483    // comments; lines starting with '^' are peeled tag objects to skip).
484    // str::lines() handles both \n and \r\n, so Windows line endings are fine.
485    let packed = fs::read_to_string(git_dir.join("packed-refs")).ok()?;
486    for line in packed.lines() {
487        if line.starts_with('#') || line.starts_with('^') {
488            continue;
489        }
490        let mut cols = line.splitn(2, ' ');
491        let sha = cols.next()?;
492        let name = cols.next()?.trim();
493        if name == refname {
494            return Some(sha.to_string());
495        }
496    }
497    None
498}
499
500/// Extract the URL value from a `url = <value>` git-config line, returning `None` if absent or empty.
501fn parse_url_line(line: &str) -> Option<&str> {
502    let rest = line.strip_prefix("url")?;
503    let rest = rest.trim_start_matches([' ', '\t']);
504    let url = rest.strip_prefix('=')?.trim();
505    if url.is_empty() {
506        None
507    } else {
508        Some(url)
509    }
510}
511
512/// Parse `.git/config` and return the URL of the `origin` remote, if present.
513fn read_git_remote_url(git_dir: &Path) -> Option<String> {
514    let config = fs::read_to_string(git_dir.join("config")).ok()?;
515    let mut in_origin = false;
516    for line in config.lines() {
517        let trimmed = line.trim();
518        if trimmed.starts_with('[') {
519            in_origin = trimmed == r#"[remote "origin"]"#;
520        } else if in_origin {
521            if let Some(url) = parse_url_line(trimmed) {
522                return Some(url.to_owned());
523            }
524        }
525    }
526    None
527}
528
529/// Detect git metadata by reading `.git/` files directly — no `git` executable
530/// needed. Falls back gracefully for detached HEADs, shallow clones, and missing
531/// reflogs.
532fn detect_git_for_run(project_path: &Path) -> GitInfo {
533    // Resolve the CI branch early so it can fill in any gap in git metadata.
534    let ci_branch = ci_branch_from_env();
535
536    let Some(git_dir) = find_git_dir(project_path) else {
537        // No .git directory (e.g. scanning a non-repo path in CI). Use whatever
538        // the CI system tells us about the branch.
539        return GitInfo {
540            branch: ci_branch,
541            ..GitInfo::default()
542        };
543    };
544
545    let head_raw = match fs::read_to_string(git_dir.join("HEAD")) {
546        Ok(s) => s.trim().to_string(),
547        Err(_) => {
548            return GitInfo {
549                branch: ci_branch,
550                ..GitInfo::default()
551            }
552        }
553    };
554
555    let (branch_from_head, commit_long) = head_raw.strip_prefix("ref: ").map_or_else(
556        || {
557            if head_raw.len() >= 40 && head_raw.chars().all(|c| c.is_ascii_hexdigit()) {
558                // Detached HEAD — HEAD file is the commit SHA (common in CI checkouts).
559                (None, Some(head_raw[..40].to_string()))
560            } else {
561                (None, None)
562            }
563        },
564        |refname| {
565            let branch = refname
566                .strip_prefix("refs/heads/")
567                .map(|b| b.trim().to_string());
568            let sha = resolve_ref(&git_dir, refname.trim());
569            (branch, sha)
570        },
571    );
572    // Prefer the branch name derived from the HEAD ref; fall back to the CI
573    // env var (covers detached-HEAD checkouts done by Jenkins, GitHub Actions, etc.).
574    let branch = branch_from_head.or(ci_branch);
575
576    let commit_short = commit_long
577        .as_deref()
578        .map(|s| s.chars().take(7).collect::<String>());
579
580    let author = run_git_cmd(project_path, &["log", "-1", "--format=%an", "HEAD"]);
581    let commit_date = run_git_cmd(project_path, &["log", "-1", "--format=%aI", "HEAD"]);
582    let remote_url = read_git_remote_url(&git_dir);
583
584    // Tags and nearest-tag still require git CLI — try it as a best-effort bonus
585    // but don't block on it. If git isn't available these will simply be None.
586    let tags = run_git_cmd(project_path, &["tag", "--points-at", "HEAD"]).map(|t| {
587        t.lines()
588            .filter(|l| !l.is_empty())
589            .collect::<Vec<_>>()
590            .join(", ")
591    });
592    let nearest_tag = run_git_cmd(project_path, &["describe", "--tags", "--abbrev=0", "HEAD"]);
593
594    GitInfo {
595        commit_short,
596        commit_long,
597        branch,
598        author,
599        tags,
600        nearest_tag,
601        commit_date,
602        remote_url,
603    }
604}
605
606/// Run a git command as a best-effort supplemental source.
607fn run_git_cmd(dir: &Path, args: &[&str]) -> Option<String> {
608    // Try the bare name first (works when git is on PATH), then fall back to
609    // absolute paths for service accounts that run with a stripped PATH.
610    // Unix paths silently fail on Windows and vice-versa.
611    let candidates: &[&str] = &[
612        // Works on all platforms when git is on PATH
613        "git",
614        // Common Linux / macOS install locations
615        "/usr/bin/git",
616        "/usr/local/bin/git",
617        "/opt/homebrew/bin/git",
618        // Git for Windows default installation paths
619        r"C:\Program Files\Git\cmd\git.exe",
620        r"C:\Program Files\Git\bin\git.exe",
621        r"C:\Program Files (x86)\Git\cmd\git.exe",
622    ];
623    for &exe in candidates {
624        let result = std::process::Command::new(exe)
625            .args(["-c", "safe.directory=*"])
626            .args(args)
627            .current_dir(dir)
628            .output()
629            .ok()
630            .filter(|o| o.status.success())
631            .and_then(|o| String::from_utf8(o.stdout).ok())
632            .map(|s| s.trim().to_string())
633            .filter(|s| !s.is_empty());
634        if result.is_some() {
635            return result;
636        }
637    }
638    None
639}
640
641/// Per-file git activity (commit-count + last-change date) over `window_days`, computed
642/// with a single `git log --name-status` pass. Keys are paths relative to `project_path`
643/// (via `--relative`), matching `FileRecord::relative_path`. Best-effort: returns an empty
644/// map when git is unavailable or the path is not a repository — a scan never fails on this.
645fn detect_file_activity(
646    project_path: &Path,
647    window_days: u32,
648) -> HashMap<String, (u32, Option<String>)> {
649    let since = format!("--since={window_days} days ago");
650    // `--relative` limits output to (and reports paths relative to) the scan directory, so
651    // the keys line up with FileRecord::relative_path even when scanning a repo subdirectory.
652    // %x00 prefixes each commit header with a NUL, distinguishing it from name-status lines.
653    let out = run_git_cmd(
654        project_path,
655        &[
656            "-c",
657            "core.quotepath=false",
658            "log",
659            since.as_str(),
660            "--no-merges",
661            "--name-status",
662            "--relative",
663            "--pretty=format:%x00%aI",
664        ],
665    );
666    out.map(|s| parse_activity_log(&s)).unwrap_or_default()
667}
668
669/// Parse `git log --name-status` output (NUL-prefixed commit headers) into a
670/// path → (`commit_count`, `last_commit_date`) map. `git log` emits newest-first, so the
671/// first time a path appears is its most recent change. Renames are attributed to the new path.
672fn parse_activity_log(out: &str) -> HashMap<String, (u32, Option<String>)> {
673    let mut map: HashMap<String, (u32, Option<String>)> = HashMap::new();
674    let mut current_date: Option<String> = None;
675    for line in out.lines() {
676        if let Some(date) = line.strip_prefix('\u{0}') {
677            let d = date.trim();
678            current_date = (!d.is_empty()).then(|| d.to_owned());
679            continue;
680        }
681        if line.trim().is_empty() {
682            continue;
683        }
684        // name-status line: "STATUS\tpath" or "Rxxx\told\tnew" / "Cxxx\told\tnew".
685        let mut fields = line.split('\t');
686        let status = fields.next().unwrap_or("");
687        let path = if status.starts_with('R') || status.starts_with('C') {
688            fields.next_back()
689        } else {
690            fields.next()
691        };
692        let Some(path) = path.map(str::trim).filter(|p| !p.is_empty()) else {
693            continue;
694        };
695        let entry = map.entry(path.to_owned()).or_insert((0, None));
696        entry.0 += 1;
697        if entry.1.is_none() {
698            entry.1.clone_from(&current_date);
699        }
700    }
701    map
702}
703
704/// Return the name of the CI system if the process is running inside one.
705fn detect_ci_system() -> Option<&'static str> {
706    let ev = |k: &str| std::env::var(k).is_ok();
707    let ev_true = |k: &str| std::env::var(k).as_deref() == Ok("true");
708    if ev("JENKINS_URL") || ev("JENKINS_HOME") || ev("BUILD_URL") {
709        return Some("Jenkins");
710    }
711    if ev_true("GITHUB_ACTIONS") {
712        return Some("GitHub Actions");
713    }
714    if ev_true("GITLAB_CI") {
715        return Some("GitLab CI");
716    }
717    if ev_true("CIRCLECI") {
718        return Some("CircleCI");
719    }
720    if ev_true("TRAVIS") {
721        return Some("Travis CI");
722    }
723    if ev_true("TF_BUILD") {
724        return Some("Azure DevOps");
725    }
726    if ev("TEAMCITY_VERSION") {
727        return Some("TeamCity");
728    }
729    None
730}
731
732/// Read the current branch name from well-known CI environment variables.
733/// Called as a fallback when the git HEAD is detached (common in CI checkouts).
734fn ci_branch_from_env() -> Option<String> {
735    const VARS: &[&str] = &[
736        "BRANCH_NAME",        // Jenkins Pipeline
737        "GIT_BRANCH",         // Jenkins Freestyle (may carry "origin/<branch>")
738        "GITHUB_REF_NAME",    // GitHub Actions
739        "CI_COMMIT_BRANCH",   // GitLab CI
740        "CIRCLE_BRANCH",      // CircleCI
741        "TRAVIS_BRANCH",      // Travis CI
742        "BUILD_SOURCEBRANCH", // Azure DevOps (may carry "refs/heads/<branch>")
743    ];
744    for &var in VARS {
745        if let Ok(val) = std::env::var(var) {
746            let val = val.trim();
747            let val = val
748                .strip_prefix("refs/heads/")
749                .or_else(|| val.strip_prefix("origin/"))
750                .unwrap_or(val);
751            if !val.is_empty() && val != "HEAD" {
752                return Some(val.to_string());
753            }
754        }
755    }
756    None
757}
758
759fn get_current_username() -> String {
760    std::env::var("USERNAME")
761        .or_else(|_| std::env::var("USER"))
762        .unwrap_or_else(|_| "unknown".to_string())
763}
764
765fn non_empty_env(var: &str) -> Option<String> {
766    let v = std::env::var(var).ok()?;
767    if v.is_empty() {
768        None
769    } else {
770        Some(v)
771    }
772}
773
774fn is_jenkins_env() -> bool {
775    std::env::var("JENKINS_URL").is_ok()
776        || std::env::var("JENKINS_HOME").is_ok()
777        || std::env::var("BUILD_URL").is_ok()
778}
779
780fn get_hostname() -> String {
781    // In CI environments prefer a human-readable agent/runner identifier over
782    // whatever hostname the container was assigned.
783    if is_jenkins_env() {
784        if let Some(n) = non_empty_env("NODE_NAME") {
785            return n;
786        }
787    }
788    if std::env::var("GITHUB_ACTIONS").as_deref() == Ok("true") {
789        if let Some(r) = non_empty_env("RUNNER_NAME") {
790            return r;
791        }
792    }
793    if std::env::var("GITLAB_CI").as_deref() == Ok("true") {
794        if let Some(r) = non_empty_env("CI_RUNNER_DESCRIPTION") {
795            return r;
796        }
797    }
798    std::env::var("COMPUTERNAME")
799        .or_else(|_| std::env::var("HOSTNAME"))
800        .or_else(|_| std::fs::read_to_string("/etc/hostname").map(|s| s.trim().to_string()))
801        .unwrap_or_else(|_| "unknown".to_string())
802}
803
804/// Walk a single directory root and collect file records into the output vectors.
805#[allow(clippy::too_many_arguments)]
806fn walk_root(
807    root: &Path,
808    config: &AppConfig,
809    include_globs: Option<&GlobSet>,
810    exclude_globs: Option<&GlobSet>,
811    enabled_languages: Option<&BTreeSet<Language>>,
812    seen_paths: &mut HashSet<PathBuf>,
813    analyzed: &mut Vec<FileRecord>,
814    skipped: &mut Vec<FileRecord>,
815    warnings: &mut Vec<String>,
816    cancel: Option<&AtomicBool>,
817    progress: Option<&ProgressCounters>,
818) -> Result<()> {
819    let mut builder = WalkBuilder::new(root);
820    builder
821        .follow_links(config.discovery.follow_symlinks)
822        .hidden(config.discovery.ignore_hidden_files)
823        .ignore(config.discovery.honor_ignore_files)
824        .parents(config.discovery.honor_ignore_files)
825        .git_ignore(config.discovery.honor_ignore_files)
826        .git_global(config.discovery.honor_ignore_files)
827        .git_exclude(config.discovery.honor_ignore_files);
828
829    let paths = collect_walk_paths(&builder, seen_paths, warnings);
830    if paths.is_empty() {
831        return Ok(());
832    }
833
834    if let Some(p) = progress {
835        p.files_total.fetch_add(paths.len(), Ordering::Relaxed);
836    }
837
838    let chunk_results = run_parallel_analysis(
839        &paths,
840        root,
841        config,
842        include_globs,
843        exclude_globs,
844        enabled_languages,
845        cancel,
846        progress,
847    )?;
848    merge_chunk_results(chunk_results, analyzed, skipped, warnings)
849}
850
851fn collect_walk_paths(
852    builder: &WalkBuilder,
853    seen_paths: &mut HashSet<PathBuf>,
854    warnings: &mut Vec<String>,
855) -> Vec<PathBuf> {
856    // build_parallel() walks the directory tree across multiple threads (work-stealing
857    // internally), which is meaningfully faster for deeply nested repos with many directories.
858    // We collect results via an MPSC channel so each walker thread sends without contention.
859    let (tx, rx) = std::sync::mpsc::channel::<std::result::Result<PathBuf, String>>();
860
861    builder.build_parallel().run(|| {
862        let tx = tx.clone();
863        Box::new(move |entry| {
864            match entry {
865                Err(e) => {
866                    let _ = tx.send(Err(format!("discovery warning: {e}")));
867                }
868                Ok(e) => {
869                    let path = e.into_path();
870                    if !path.is_dir() {
871                        let _ = tx.send(Ok(path));
872                    }
873                }
874            }
875            ignore::WalkState::Continue
876        })
877    });
878
879    // Drop the sender that the outer scope holds; the per-thread clones were dropped when
880    // run() returned (all threads finished). Dropping this last sender closes the channel.
881    drop(tx);
882
883    rx.into_iter()
884        .filter_map(|msg| match msg {
885            Ok(path) => {
886                if seen_paths.insert(path.clone()) {
887                    Some(path)
888                } else {
889                    None
890                }
891            }
892            Err(warn) => {
893                warnings.push(warn);
894                None
895            }
896        })
897        .collect()
898}
899
900/// Inner work loop executed by each analysis thread.
901#[allow(clippy::too_many_arguments)]
902fn worker_loop(
903    paths: &[PathBuf],
904    root: &Path,
905    config: &AppConfig,
906    include_globs: Option<&GlobSet>,
907    exclude_globs: Option<&GlobSet>,
908    enabled_languages: Option<&BTreeSet<Language>>,
909    cancel: Option<&AtomicBool>,
910    next_index: &AtomicUsize,
911    files_done: Option<&AtomicUsize>,
912) -> Vec<Result<Option<FileRecord>>> {
913    let mut results = Vec::new();
914    loop {
915        if cancel.is_some_and(|c| c.load(Ordering::Relaxed)) {
916            results.push(Err(anyhow::anyhow!("analysis cancelled")));
917            break;
918        }
919        let i = next_index.fetch_add(1, Ordering::Relaxed);
920        if i >= paths.len() {
921            break;
922        }
923        results.push(analyze_candidate_file(
924            &paths[i],
925            root,
926            config,
927            include_globs,
928            exclude_globs,
929            enabled_languages,
930        ));
931        if let Some(fd) = files_done {
932            fd.fetch_add(1, Ordering::Relaxed);
933        }
934    }
935    results
936}
937
938#[allow(clippy::too_many_arguments)]
939fn run_parallel_analysis(
940    paths: &[PathBuf],
941    root: &Path,
942    config: &AppConfig,
943    include_globs: Option<&GlobSet>,
944    exclude_globs: Option<&GlobSet>,
945    enabled_languages: Option<&BTreeSet<Language>>,
946    cancel: Option<&AtomicBool>,
947    progress: Option<&ProgressCounters>,
948) -> Result<Vec<Vec<Result<Option<FileRecord>>>>> {
949    let thread_count = std::thread::available_parallelism().map_or(DEFAULT_ANALYSIS_THREADS, |n| {
950        n.get().min(MAX_ANALYSIS_THREADS)
951    });
952    // Shared work-queue index: each thread atomically claims the next path to process.
953    // This eliminates static-chunk load imbalance — threads that finish early immediately
954    // pick up more work instead of sitting idle while one overloaded chunk finishes.
955    let next_index = AtomicUsize::new(0);
956    let files_done: Option<&AtomicUsize> = progress.map(|p| p.files_done.as_ref());
957
958    std::thread::scope(|s| -> Result<Vec<Vec<Result<Option<FileRecord>>>>> {
959        // IMPORTANT: collect ALL handles before joining any of them.
960        // A lazy spawn-then-join chain would serialize threads one at a time.
961        let mut handles = Vec::with_capacity(thread_count);
962        for _ in 0..thread_count {
963            handles.push(s.spawn(|| {
964                worker_loop(
965                    paths,
966                    root,
967                    config,
968                    include_globs,
969                    exclude_globs,
970                    enabled_languages,
971                    cancel,
972                    &next_index,
973                    files_done,
974                )
975            }));
976        }
977        handles
978            .into_iter()
979            .map(|h| {
980                h.join()
981                    .map_err(|_| anyhow::anyhow!("analysis thread panicked"))
982            })
983            .collect()
984    })
985}
986
987fn merge_chunk_results(
988    chunk_results: Vec<Vec<Result<Option<FileRecord>>>>,
989    analyzed: &mut Vec<FileRecord>,
990    skipped: &mut Vec<FileRecord>,
991    warnings: &mut Vec<String>,
992) -> Result<()> {
993    for chunk in chunk_results {
994        for result in chunk {
995            if let Some(record) = result? {
996                push_record(record, analyzed, skipped, warnings);
997            }
998        }
999    }
1000    Ok(())
1001}
1002
1003/// Label each analyzed file with its submodule and build per-submodule summaries.
1004fn process_submodules(config: &AppConfig, analyzed: &mut [FileRecord]) -> Vec<SubmoduleSummary> {
1005    let root = config.discovery.root_paths[0]
1006        .canonicalize()
1007        .unwrap_or_else(|_| config.discovery.root_paths[0].clone());
1008    let submodules = detect_submodules(&root);
1009    if submodules.is_empty() {
1010        return Vec::new();
1011    }
1012
1013    for file in analyzed.iter_mut() {
1014        for (name, sub_path) in &submodules {
1015            let prefix = sub_path.to_string_lossy().replace('\\', "/");
1016            let rel = &file.relative_path;
1017            if rel == &prefix || rel.starts_with(&format!("{prefix}/")) {
1018                file.submodule = Some(name.clone());
1019                break;
1020            }
1021        }
1022    }
1023
1024    build_submodule_summaries(analyzed, &submodules, &root)
1025}
1026
1027/// Compute Basic COCOMO I cost estimate from total code SLOC.
1028#[allow(clippy::cast_precision_loss)] // COCOMO formula: line counts at f64 precision are sufficient
1029fn compute_cocomo(code_lines: u64, mode: CocomoMode) -> CocomoEstimate {
1030    let ksloc = code_lines as f64 / 1_000.0;
1031    let (a, b, c, d): (f64, f64, f64, f64) = match mode {
1032        CocomoMode::Organic => (2.4, 1.05, 2.5, 0.38),
1033        CocomoMode::SemiDetached => (3.0, 1.12, 2.5, 0.35),
1034        CocomoMode::Embedded => (3.6, 1.20, 2.5, 0.32),
1035    };
1036    let effort = a * ksloc.powf(b);
1037    let duration = c * effort.powf(d);
1038    let avg_staff = if duration > 0.0 {
1039        effort / duration
1040    } else {
1041        0.0
1042    };
1043    // Round to 2 decimal places for readability.
1044    CocomoEstimate {
1045        mode,
1046        ksloc: (ksloc * 100.0).round() / 100.0,
1047        effort_person_months: (effort * 100.0).round() / 100.0,
1048        duration_months: (duration * 100.0).round() / 100.0,
1049        avg_staff: (avg_staff * 100.0).round() / 100.0,
1050    }
1051}
1052
1053/// Collect ULOC hashes across all analyzed files, compute ULOC and `DRYness`.
1054#[allow(clippy::cast_precision_loss)] // DRYness is a display percentage; f32 precision is adequate
1055fn compute_uloc(analyzed: &[FileRecord]) -> (u64, Option<f32>) {
1056    use std::collections::HashSet as StdHashSet;
1057    let mut unique: StdHashSet<u64> = StdHashSet::new();
1058    let mut total_code: u64 = 0;
1059    for record in analyzed {
1060        total_code += record.effective_counts.code_lines;
1061        for &hash in &record.raw_line_categories.code_line_hashes {
1062            unique.insert(hash);
1063        }
1064    }
1065    let uloc = unique.len() as u64;
1066    let dryness = if total_code > 0 {
1067        Some((uloc as f32 / total_code as f32) * 100.0)
1068    } else {
1069        None
1070    };
1071    (uloc, dryness)
1072}
1073
1074/// Group files by content hash and return groups of duplicates (relative paths).
1075/// Only groups with ≥ 2 files are returned.
1076fn find_duplicate_groups(analyzed: &[FileRecord]) -> Vec<Vec<String>> {
1077    let mut by_hash: std::collections::HashMap<u64, Vec<&str>> = std::collections::HashMap::new();
1078    for record in analyzed {
1079        if record.content_hash != 0 {
1080            by_hash
1081                .entry(record.content_hash)
1082                .or_default()
1083                .push(&record.relative_path);
1084        }
1085    }
1086    let mut groups: Vec<Vec<String>> = by_hash
1087        .into_values()
1088        .filter(|v| v.len() >= 2)
1089        .map(|v| {
1090            let mut paths: Vec<String> = v.into_iter().map(str::to_owned).collect();
1091            paths.sort();
1092            paths
1093        })
1094        .collect();
1095    groups.sort_by(|a, b| a[0].cmp(&b[0]));
1096    groups
1097}
1098
1099/// Assemble the final `AnalysisRun` from collected records and metadata.
1100fn assemble_run(
1101    config: &AppConfig,
1102    runtime_mode: &str,
1103    mut analyzed: Vec<FileRecord>,
1104    skipped: Vec<FileRecord>,
1105    warnings: Vec<String>,
1106    submodule_summaries: Vec<SubmoduleSummary>,
1107) -> AnalysisRun {
1108    let summary = build_summary(&analyzed, &skipped);
1109    let language_summaries = build_language_summaries(&analyzed);
1110    let col_threshold = config.analysis.style_col_threshold;
1111    let style_summary = build_style_summary(&analyzed, col_threshold);
1112
1113    // Compute ULOC, DRYness, duplicates, and COCOMO from the aggregated records.
1114    let (uloc, dryness_pct) = compute_uloc(&analyzed);
1115    let duplicate_groups = find_duplicate_groups(&analyzed);
1116    let cocomo = if summary.code_lines > 0 {
1117        Some(compute_cocomo(summary.code_lines, CocomoMode::Organic))
1118    } else {
1119        None
1120    };
1121
1122    let first_root = config
1123        .discovery
1124        .root_paths
1125        .first()
1126        .map(|p| p.canonicalize().unwrap_or_else(|_| p.clone()));
1127    let git = first_root
1128        .as_deref()
1129        .map(detect_git_for_run)
1130        .unwrap_or_default();
1131
1132    // Per-file git activity for the hotspots view (on by default, single `git log` pass,
1133    // best-effort). A window of 0 (or None) disables it; a non-git path yields an empty result.
1134    let activity_window = config.analysis.activity_window_days.unwrap_or(0);
1135    if let (true, Some(root)) = (activity_window > 0, first_root.as_deref()) {
1136        let activity = detect_file_activity(root, activity_window);
1137        if !activity.is_empty() {
1138            for rec in &mut analyzed {
1139                if let Some((count, date)) = activity.get(&rec.relative_path) {
1140                    rec.commit_count = Some(*count);
1141                    rec.last_commit_date.clone_from(date);
1142                }
1143            }
1144        }
1145    }
1146
1147    let now = Utc::now();
1148    let run_id = {
1149        let uuid_suffix = Uuid::new_v4().simple().to_string();
1150        format!("{}-{}", now.format("%Y%m%d-%H%M"), uuid_suffix)
1151    };
1152
1153    AnalysisRun {
1154        tool: ToolMetadata {
1155            name: "sloc".into(),
1156            version: env!("CARGO_PKG_VERSION").into(),
1157            run_id,
1158            timestamp_utc: now,
1159        },
1160        environment: EnvironmentMetadata {
1161            operating_system: std::env::consts::OS.into(),
1162            architecture: std::env::consts::ARCH.into(),
1163            runtime_mode: runtime_mode.into(),
1164            initiator_username: get_current_username(),
1165            initiator_hostname: get_hostname(),
1166            ci_name: if is_jenkins_env() {
1167                Some(format!("Jenkins\t{}", get_hostname()))
1168            } else {
1169                detect_ci_system().map(str::to_string)
1170            },
1171        },
1172        effective_configuration: config.clone(),
1173        input_roots: config
1174            .discovery
1175            .root_paths
1176            .iter()
1177            .map(|p| path_to_string(p))
1178            .collect(),
1179        summary_totals: summary,
1180        totals_by_language: language_summaries,
1181        per_file_records: analyzed,
1182        skipped_file_records: skipped,
1183        warnings,
1184        submodule_summaries,
1185        git_commit_short: git.commit_short,
1186        git_commit_long: git.commit_long,
1187        git_branch: git.branch,
1188        git_commit_author: git.author,
1189        git_tags: git.tags,
1190        git_nearest_tag: git.nearest_tag,
1191        git_commit_date: git.commit_date,
1192        git_remote_url: git.remote_url,
1193        style_summary,
1194        cocomo,
1195        uloc,
1196        dryness_pct,
1197        duplicate_groups,
1198        duplicates_excluded: 0,
1199    }
1200}
1201
1202/// # Errors
1203///
1204/// Returns an error if the config is invalid, root paths cannot be walked, or any file
1205/// analysis step fails in a way that cannot be recovered from.
1206#[allow(clippy::too_many_lines)]
1207pub fn analyze(
1208    config: &AppConfig,
1209    runtime_mode: &str,
1210    cancel: Option<&AtomicBool>,
1211    progress: Option<&ProgressCounters>,
1212) -> Result<AnalysisRun> {
1213    config.validate()?;
1214
1215    if config.discovery.root_paths.is_empty() {
1216        anyhow::bail!("no input paths were provided");
1217    }
1218
1219    let include_globs = compile_globset(&config.discovery.include_globs)?;
1220    let exclude_globs = compile_globset(&config.discovery.exclude_globs)?;
1221    let enabled_languages = parse_enabled_languages(&config.analysis.enabled_languages)?;
1222
1223    let mut analyzed = Vec::new();
1224    let mut skipped = Vec::new();
1225    let mut warnings = Vec::new();
1226    let mut seen_paths = HashSet::new();
1227
1228    for root in &config.discovery.root_paths {
1229        if cancel.is_some_and(|c| c.load(Ordering::Relaxed)) {
1230            anyhow::bail!("analysis cancelled");
1231        }
1232
1233        let root = root.canonicalize().unwrap_or_else(|_| root.clone());
1234
1235        if root.is_file() {
1236            if let Some(record) = analyze_candidate_file(
1237                &root,
1238                root.parent().unwrap_or_else(|| Path::new(".")),
1239                config,
1240                include_globs.as_ref(),
1241                exclude_globs.as_ref(),
1242                enabled_languages.as_ref(),
1243            )? {
1244                push_record(record, &mut analyzed, &mut skipped, &mut warnings);
1245            }
1246            continue;
1247        }
1248
1249        walk_root(
1250            &root,
1251            config,
1252            include_globs.as_ref(),
1253            exclude_globs.as_ref(),
1254            enabled_languages.as_ref(),
1255            &mut seen_paths,
1256            &mut analyzed,
1257            &mut skipped,
1258            &mut warnings,
1259            cancel,
1260            progress,
1261        )?;
1262    }
1263
1264    analyzed.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
1265    skipped.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
1266
1267    // Submodule detection: label each file with its submodule and build per-submodule summaries.
1268    let submodule_summaries = if config.discovery.submodule_breakdown {
1269        process_submodules(config, &mut analyzed)
1270    } else {
1271        Vec::new()
1272    };
1273
1274    attach_coverage(config, &mut analyzed, &mut warnings);
1275
1276    Ok(assemble_run(
1277        config,
1278        runtime_mode,
1279        analyzed,
1280        skipped,
1281        warnings,
1282        submodule_summaries,
1283    ))
1284}
1285
1286fn attach_coverage(config: &AppConfig, analyzed: &mut [FileRecord], warnings: &mut Vec<String>) {
1287    let Some(cov_path) = coverage::resolve_coverage_file(config.analysis.coverage_file.as_deref())
1288    else {
1289        return;
1290    };
1291    tracing::debug!(path = %cov_path.display(), "loading coverage file");
1292    match fs::read_to_string(&cov_path) {
1293        Ok(content) => {
1294            let cov_map = coverage::parse_coverage_auto(&cov_path, &content);
1295            let mut matched: u32 = 0;
1296            let mut unmatched: u32 = 0;
1297            for record in analyzed.iter_mut() {
1298                record.coverage =
1299                    coverage::lookup_coverage(&cov_map, &record.relative_path).cloned();
1300                if record.coverage.is_some() {
1301                    matched += 1;
1302                } else {
1303                    unmatched += 1;
1304                }
1305            }
1306            tracing::debug!(
1307                path = %cov_path.display(),
1308                coverage_entries = cov_map.len(),
1309                files_matched = matched,
1310                files_unmatched = unmatched,
1311                "coverage attached"
1312            );
1313            if unmatched > 0 && matched == 0 {
1314                tracing::warn!(
1315                    path = %cov_path.display(),
1316                    "coverage file loaded but no source files could be matched — check that paths in the coverage report match the scanned directory"
1317                );
1318            }
1319        }
1320        Err(e) => {
1321            tracing::warn!(path = %cov_path.display(), error = %e, "coverage file could not be read");
1322            warnings.push(format!(
1323                "coverage file '{}' could not be read: {e}",
1324                cov_path.display()
1325            ));
1326        }
1327    }
1328}
1329
1330fn push_record(
1331    record: FileRecord,
1332    analyzed: &mut Vec<FileRecord>,
1333    skipped: &mut Vec<FileRecord>,
1334    warnings: &mut Vec<String>,
1335) {
1336    warnings.extend(
1337        record
1338            .warnings
1339            .iter()
1340            .map(|warning| format!("{}: {warning}", record.relative_path)),
1341    );
1342
1343    match record.status {
1344        FileStatus::AnalyzedExact | FileStatus::AnalyzedBestEffort => analyzed.push(record),
1345        _ => skipped.push(record),
1346    }
1347}
1348
1349/// Convenience wrapper: build a boxed `Skip` outcome with a single-item warning message.
1350#[inline]
1351fn skip_with_reason(
1352    path: &Path,
1353    root: &Path,
1354    size: u64,
1355    reason: impl Into<String>,
1356) -> MetadataPolicyOutcome {
1357    MetadataPolicyOutcome::Skip(Box::new(skipped_record(
1358        path,
1359        root,
1360        size,
1361        FileStatus::SkippedByPolicy,
1362        vec![reason.into()],
1363    )))
1364}
1365
1366/// Apply metadata-level policy checks (symlink, name, dir exclusion, size, globs, lockfile).
1367/// Returns `Skip(record)` to skip, `Exclude` to omit from output entirely (include-glob miss),
1368/// or `Continue` to proceed to content checks.
1369#[allow(clippy::too_many_arguments)]
1370fn check_metadata_policy(
1371    path: &Path,
1372    root: &Path,
1373    relative_path: &str,
1374    metadata: &fs::Metadata,
1375    config: &AppConfig,
1376    include_globs: Option<&GlobSet>,
1377    exclude_globs: Option<&GlobSet>,
1378) -> MetadataPolicyOutcome {
1379    let size = metadata.len();
1380
1381    if metadata.file_type().is_symlink() && !config.discovery.follow_symlinks {
1382        return skip_with_reason(path, root, size, "symlink skipped by policy");
1383    }
1384    if file_name_eq(path, ".gitignore") {
1385        return skip_with_reason(path, root, size, ".gitignore is always excluded");
1386    }
1387    if is_excluded_dir_path(path, &config.discovery.excluded_directories) {
1388        return skip_with_reason(path, root, size, "path matched excluded directory setting");
1389    }
1390    if size > config.discovery.max_file_size_bytes {
1391        return skip_with_reason(
1392            path,
1393            root,
1394            size,
1395            format!(
1396                "file exceeded max_file_size_bytes ({})",
1397                config.discovery.max_file_size_bytes
1398            ),
1399        );
1400    }
1401    if let Some(globs) = include_globs {
1402        if !globs.is_match(Path::new(relative_path)) && !globs.is_match(path) {
1403            return MetadataPolicyOutcome::Exclude;
1404        }
1405    }
1406    if let Some(globs) = exclude_globs {
1407        if globs.is_match(Path::new(relative_path)) || globs.is_match(path) {
1408            return skip_with_reason(path, root, size, "path matched exclude glob");
1409        }
1410    }
1411    if is_known_lockfile(path) && !config.analysis.include_lockfiles {
1412        return skip_with_reason(path, root, size, "lockfile skipped by default policy");
1413    }
1414
1415    MetadataPolicyOutcome::Continue
1416}
1417
1418struct ContentPolicyResult {
1419    vendor: bool,
1420    generated: bool,
1421    minified: bool,
1422    skip_record: Option<FileRecord>,
1423}
1424
1425/// Apply content-level policy checks (vendor, generated, minified).
1426/// `skip_record` is `Some` when the file should be skipped.
1427fn check_content_policy(
1428    path: &Path,
1429    root: &Path,
1430    size_bytes: u64,
1431    bytes: &[u8],
1432    config: &AppConfig,
1433) -> ContentPolicyResult {
1434    let vendor = is_vendor_path(path);
1435    if vendor && config.analysis.vendor_directory_detection {
1436        return ContentPolicyResult {
1437            vendor,
1438            generated: false,
1439            minified: false,
1440            skip_record: Some(skipped_record(
1441                path,
1442                root,
1443                size_bytes,
1444                FileStatus::SkippedByPolicy,
1445                vec!["vendor file skipped by policy".into()],
1446            )),
1447        };
1448    }
1449
1450    let generated = config.analysis.generated_file_detection && looks_generated(path, bytes);
1451    if generated {
1452        return ContentPolicyResult {
1453            vendor,
1454            generated,
1455            minified: false,
1456            skip_record: Some(skipped_record(
1457                path,
1458                root,
1459                size_bytes,
1460                FileStatus::SkippedByPolicy,
1461                vec!["generated file skipped by policy".into()],
1462            )),
1463        };
1464    }
1465
1466    let minified = config.analysis.minified_file_detection && looks_minified(path, bytes);
1467    if minified {
1468        return ContentPolicyResult {
1469            vendor,
1470            generated,
1471            minified,
1472            skip_record: Some(skipped_record(
1473                path,
1474                root,
1475                size_bytes,
1476                FileStatus::SkippedByPolicy,
1477                vec!["minified file skipped by policy".into()],
1478            )),
1479        };
1480    }
1481
1482    ContentPolicyResult {
1483        vendor,
1484        generated,
1485        minified,
1486        skip_record: None,
1487    }
1488}
1489
1490/// Decode file bytes to a UTF-8 string, handling binary detection and decode failures.
1491fn decode_file_contents(
1492    path: &Path,
1493    root: &Path,
1494    size_bytes: u64,
1495    bytes: &[u8],
1496    config: &AppConfig,
1497) -> Result<Option<(String, String, Vec<String>)>> {
1498    if is_binary(bytes) {
1499        return match config.analysis.binary_file_behavior {
1500            BinaryFileBehavior::Skip => Ok(None),
1501            BinaryFileBehavior::Fail => {
1502                anyhow::bail!("binary file encountered: {}", path.display())
1503            }
1504        };
1505    }
1506
1507    match decode_bytes(bytes) {
1508        Ok(result) => Ok(Some(result)),
1509        Err(err) => match config.analysis.decode_failure_behavior {
1510            FailureBehavior::WarnSkip => {
1511                // Caller will handle the None as a SkippedDecodeError record.
1512                // We use a sentinel: return Ok(None) but encode the error into a field.
1513                // Instead, propagate as a skipped record via the caller.
1514                let _ = (path, root, size_bytes); // suppress unused warnings
1515                Err(anyhow::anyhow!("__decode_warn__: {err}"))
1516            }
1517            FailureBehavior::Fail => {
1518                anyhow::bail!("decode failure for {}: {err}", path.display())
1519            }
1520        },
1521    }
1522}
1523
1524#[allow(clippy::too_many_lines)]
1525fn analyze_candidate_file(
1526    path: &Path,
1527    root: &Path,
1528    config: &AppConfig,
1529    include_globs: Option<&GlobSet>,
1530    exclude_globs: Option<&GlobSet>,
1531    enabled_languages: Option<&BTreeSet<Language>>,
1532) -> Result<Option<FileRecord>> {
1533    let metadata = match fs::symlink_metadata(path) {
1534        Ok(metadata) => metadata,
1535        Err(err) => {
1536            return Ok(Some(skipped_record(
1537                path,
1538                root,
1539                0,
1540                FileStatus::ErrorInternal,
1541                vec![format!("failed to read metadata: {err}")],
1542            )));
1543        }
1544    };
1545
1546    let relative_path = relative_path_string(path, root);
1547
1548    // Metadata-level policy checks.
1549    match check_metadata_policy(
1550        path,
1551        root,
1552        &relative_path,
1553        &metadata,
1554        config,
1555        include_globs,
1556        exclude_globs,
1557    ) {
1558        MetadataPolicyOutcome::Skip(record) => return Ok(Some(*record)),
1559        MetadataPolicyOutcome::Exclude => return Ok(None),
1560        MetadataPolicyOutcome::Continue => {}
1561    }
1562
1563    let bytes = match fs::read(path) {
1564        Ok(bytes) => bytes,
1565        Err(err) => {
1566            return Ok(Some(skipped_record(
1567                path,
1568                root,
1569                metadata.len(),
1570                FileStatus::ErrorInternal,
1571                vec![format!("failed to read file: {err}")],
1572            )));
1573        }
1574    };
1575
1576    // Content-level policy checks (vendor, generated, minified).
1577    let content_policy = check_content_policy(path, root, metadata.len(), &bytes, config);
1578    if let Some(record) = content_policy.skip_record {
1579        return Ok(Some(record));
1580    }
1581    let (vendor, generated, minified) = (
1582        content_policy.vendor,
1583        content_policy.generated,
1584        content_policy.minified,
1585    );
1586
1587    // Decode content, handling binary and decode failures.
1588    let (text, encoding, decode_warnings) =
1589        match decode_file_contents(path, root, metadata.len(), &bytes, config) {
1590            Ok(Some(result)) => result,
1591            Ok(None) => {
1592                return Ok(Some(skipped_record(
1593                    path,
1594                    root,
1595                    metadata.len(),
1596                    FileStatus::SkippedBinary,
1597                    vec!["binary file skipped by default".into()],
1598                )));
1599            }
1600            Err(err) => {
1601                let msg = err.to_string();
1602                if let Some(warn_msg) = msg.strip_prefix("__decode_warn__: ") {
1603                    return Ok(Some(skipped_record(
1604                        path,
1605                        root,
1606                        metadata.len(),
1607                        FileStatus::SkippedDecodeError,
1608                        vec![warn_msg.to_string()],
1609                    )));
1610                }
1611                return Err(err);
1612            }
1613        };
1614
1615    let first_line = text.lines().next();
1616    let language = detect_language(
1617        path,
1618        first_line,
1619        &config.analysis.extension_overrides,
1620        config.analysis.shebang_detection,
1621    );
1622
1623    let Some(language) = language else {
1624        return Ok(Some(skipped_record(
1625            path,
1626            root,
1627            metadata.len(),
1628            FileStatus::SkippedUnsupported,
1629            vec!["unsupported or undetected language".into()],
1630        )));
1631    };
1632
1633    if let Some(enabled) = enabled_languages {
1634        if !enabled.contains(&language) {
1635            return Ok(Some(skipped_record(
1636                path,
1637                root,
1638                metadata.len(),
1639                FileStatus::SkippedByPolicy,
1640                vec![format!(
1641                    "language {} disabled by configuration",
1642                    language.display_name()
1643                )],
1644            )));
1645        }
1646    }
1647
1648    let style_scope = match config.analysis.style_lang_scope.as_str() {
1649        "c_family" => StyleLangScope::CFamilyOnly,
1650        _ => StyleLangScope::All,
1651    };
1652    let ieee_opts = AnalysisOptions {
1653        blank_in_block_comment_as_comment: config.analysis.blank_in_block_comment_policy
1654            == BlankInBlockCommentPolicy::CountAsComment,
1655        collapse_continuation_lines: config.analysis.continuation_line_policy
1656            == ContinuationLinePolicy::CollapseToLogical,
1657        enable_style: config.analysis.style_analysis_enabled,
1658        style_lang_scope: style_scope,
1659    };
1660    let analysis = analyze_text(language, &text, ieee_opts);
1661    let effective_counts = compute_effective_counts(
1662        &analysis.raw,
1663        config.analysis.mixed_line_policy,
1664        config.analysis.python_docstrings_as_comments,
1665        config.analysis.count_compiler_directives,
1666    );
1667
1668    let mut warnings = decode_warnings;
1669    warnings.extend(analysis.warnings.clone());
1670
1671    // Compute a fast 64-bit content fingerprint for duplicate-file detection.
1672    let content_hash = {
1673        use std::hash::{DefaultHasher, Hash, Hasher};
1674        let mut h = DefaultHasher::new();
1675        bytes.hash(&mut h);
1676        h.finish()
1677    };
1678
1679    // Extract fields from analysis.raw before it is moved into FileRecord.
1680    let cyclomatic_complexity = if analysis.raw.cyclomatic_complexity > 0 {
1681        Some(analysis.raw.cyclomatic_complexity)
1682    } else {
1683        None
1684    };
1685    let lsloc = analysis.raw.lsloc;
1686
1687    Ok(Some(FileRecord {
1688        path: path_to_string(path),
1689        relative_path,
1690        language: Some(language),
1691        size_bytes: metadata.len(),
1692        detected_encoding: Some(encoding),
1693        raw_line_categories: analysis.raw,
1694        effective_counts,
1695        status: match analysis.parse_mode {
1696            ParseMode::Lexical | ParseMode::TreeSitter => FileStatus::AnalyzedExact,
1697            ParseMode::LexicalBestEffort => FileStatus::AnalyzedBestEffort,
1698        },
1699        warnings,
1700        generated,
1701        minified,
1702        vendor,
1703        parse_mode: Some(analysis.parse_mode),
1704        submodule: None,
1705        coverage: None,
1706        style_analysis: analysis.style_analysis,
1707        cyclomatic_complexity,
1708        lsloc,
1709        commit_count: None,
1710        last_commit_date: None,
1711        content_hash,
1712    }))
1713}
1714
1715const fn compute_effective_counts(
1716    raw: &RawLineCounts,
1717    mixed_line_policy: MixedLinePolicy,
1718    python_docstrings_as_comments: bool,
1719    count_compiler_directives: bool,
1720) -> EffectiveCounts {
1721    let mut effective = EffectiveCounts {
1722        code_lines: raw.code_only_lines,
1723        comment_lines: raw.single_comment_only_lines + raw.multi_comment_only_lines,
1724        blank_lines: raw.blank_only_lines,
1725        mixed_lines_separate: 0,
1726    };
1727
1728    if python_docstrings_as_comments {
1729        effective.comment_lines += raw.docstring_comment_lines;
1730    } else {
1731        effective.code_lines += raw.docstring_comment_lines;
1732    }
1733
1734    let mixed_total = raw.mixed_code_single_comment_lines + raw.mixed_code_multi_comment_lines;
1735    match mixed_line_policy {
1736        MixedLinePolicy::CodeOnly => effective.code_lines += mixed_total,
1737        MixedLinePolicy::CodeAndComment => {
1738            effective.code_lines += mixed_total;
1739            effective.comment_lines += mixed_total;
1740        }
1741        MixedLinePolicy::CommentOnly => effective.comment_lines += mixed_total,
1742        MixedLinePolicy::SeparateMixedCategory => effective.mixed_lines_separate += mixed_total,
1743    }
1744
1745    // IEEE 1045-1992 §4.2: optionally exclude preprocessor/compiler directives from code SLOC.
1746    // compiler_directive_lines is a subset of code_only_lines, so subtract it directly.
1747    if !count_compiler_directives {
1748        effective.code_lines = effective
1749            .code_lines
1750            .saturating_sub(raw.compiler_directive_lines);
1751    }
1752
1753    effective
1754}
1755
1756fn build_summary(analyzed: &[FileRecord], skipped: &[FileRecord]) -> SummaryTotals {
1757    let mut summary = SummaryTotals {
1758        files_considered: (analyzed.len() + skipped.len()) as u64,
1759        files_analyzed: analyzed.len() as u64,
1760        files_skipped: skipped.len() as u64,
1761        ..Default::default()
1762    };
1763
1764    for record in analyzed {
1765        summary.total_physical_lines += record.raw_line_categories.total_physical_lines;
1766        summary.code_lines += record.effective_counts.code_lines;
1767        summary.comment_lines += record.effective_counts.comment_lines;
1768        summary.blank_lines += record.effective_counts.blank_lines;
1769        summary.mixed_lines_separate += record.effective_counts.mixed_lines_separate;
1770        summary.functions += record.raw_line_categories.functions;
1771        summary.classes += record.raw_line_categories.classes;
1772        summary.variables += record.raw_line_categories.variables;
1773        summary.imports += record.raw_line_categories.imports;
1774        summary.test_count += record.raw_line_categories.test_count;
1775        summary.test_assertion_count += record.raw_line_categories.test_assertion_count;
1776        summary.test_suite_count += record.raw_line_categories.test_suite_count;
1777        summary.cyclomatic_complexity +=
1778            u64::from(record.raw_line_categories.cyclomatic_complexity);
1779        if let Some(lsloc) = record.raw_line_categories.lsloc {
1780            *summary.lsloc.get_or_insert(0) += u64::from(lsloc);
1781        }
1782        if let Some(cov) = &record.coverage {
1783            summary.coverage_lines_found += u64::from(cov.lines_found);
1784            summary.coverage_lines_hit += u64::from(cov.lines_hit);
1785            summary.coverage_functions_found += u64::from(cov.functions_found);
1786            summary.coverage_functions_hit += u64::from(cov.functions_hit);
1787            summary.coverage_branches_found += u64::from(cov.branches_found);
1788            summary.coverage_branches_hit += u64::from(cov.branches_hit);
1789        }
1790    }
1791
1792    summary
1793}
1794
1795/// Construct a zero-filled `LanguageSummary` for the given language.
1796const fn zeroed_summary(language: Language) -> LanguageSummary {
1797    LanguageSummary {
1798        language,
1799        files: 0,
1800        total_physical_lines: 0,
1801        code_lines: 0,
1802        comment_lines: 0,
1803        blank_lines: 0,
1804        mixed_lines_separate: 0,
1805        functions: 0,
1806        classes: 0,
1807        variables: 0,
1808        imports: 0,
1809        test_count: 0,
1810        test_assertion_count: 0,
1811        test_suite_count: 0,
1812        coverage_lines_found: 0,
1813        coverage_lines_hit: 0,
1814        coverage_functions_found: 0,
1815        coverage_functions_hit: 0,
1816        coverage_branches_found: 0,
1817        coverage_branches_hit: 0,
1818        cyclomatic_complexity: 0,
1819        lsloc: None,
1820    }
1821}
1822
1823/// Accumulate all per-file counters from `record` into an existing `LanguageSummary`.
1824fn accumulate_record_into_summary(entry: &mut LanguageSummary, record: &FileRecord) {
1825    entry.files += 1;
1826    let r = &record.raw_line_categories;
1827    entry.total_physical_lines += r.total_physical_lines;
1828    entry.code_lines += record.effective_counts.code_lines;
1829    entry.comment_lines += record.effective_counts.comment_lines;
1830    entry.blank_lines += record.effective_counts.blank_lines;
1831    entry.mixed_lines_separate += record.effective_counts.mixed_lines_separate;
1832    entry.functions += r.functions;
1833    entry.classes += r.classes;
1834    entry.variables += r.variables;
1835    entry.imports += r.imports;
1836    entry.test_count += r.test_count;
1837    entry.test_assertion_count += r.test_assertion_count;
1838    entry.test_suite_count += r.test_suite_count;
1839    entry.cyclomatic_complexity += u64::from(r.cyclomatic_complexity);
1840    if let Some(lsloc) = r.lsloc {
1841        *entry.lsloc.get_or_insert(0) += u64::from(lsloc);
1842    }
1843    if let Some(cov) = &record.coverage {
1844        entry.coverage_lines_found += u64::from(cov.lines_found);
1845        entry.coverage_lines_hit += u64::from(cov.lines_hit);
1846        entry.coverage_functions_found += u64::from(cov.functions_found);
1847        entry.coverage_functions_hit += u64::from(cov.functions_hit);
1848        entry.coverage_branches_found += u64::from(cov.branches_found);
1849        entry.coverage_branches_hit += u64::from(cov.branches_hit);
1850    }
1851}
1852
1853fn build_language_summaries(analyzed: &[FileRecord]) -> Vec<LanguageSummary> {
1854    let mut by_language: BTreeMap<Language, LanguageSummary> = BTreeMap::new();
1855    for record in analyzed {
1856        let Some(language) = record.language else {
1857            continue;
1858        };
1859        let entry = by_language
1860            .entry(language)
1861            .or_insert_with(|| zeroed_summary(language));
1862        accumulate_record_into_summary(entry, record);
1863    }
1864    by_language.into_values().collect()
1865}
1866
1867fn skipped_record(
1868    path: &Path,
1869    root: &Path,
1870    size_bytes: u64,
1871    status: FileStatus,
1872    warnings: Vec<String>,
1873) -> FileRecord {
1874    FileRecord {
1875        path: path_to_string(path),
1876        relative_path: relative_path_string(path, root),
1877        language: None,
1878        size_bytes,
1879        detected_encoding: None,
1880        raw_line_categories: RawLineCounts::default(),
1881        effective_counts: EffectiveCounts::default(),
1882        status,
1883        warnings,
1884        generated: false,
1885        minified: false,
1886        vendor: false,
1887        parse_mode: None,
1888        submodule: None,
1889        coverage: None,
1890        style_analysis: None,
1891        cyclomatic_complexity: None,
1892        lsloc: None,
1893        commit_count: None,
1894        last_commit_date: None,
1895        content_hash: 0,
1896    }
1897}
1898
1899fn relative_path_string(path: &Path, root: &Path) -> String {
1900    path.strip_prefix(root)
1901        .unwrap_or(path)
1902        .to_string_lossy()
1903        .replace('\\', "/")
1904}
1905
1906fn path_to_string(path: &Path) -> String {
1907    path.to_string_lossy().replace('\\', "/")
1908}
1909
1910/// Parse `.gitmodules` in `root` and return `(name, relative_path)` for each submodule found.
1911#[must_use]
1912pub fn detect_submodules(root: &Path) -> Vec<(String, PathBuf)> {
1913    let gitmodules = root.join(".gitmodules");
1914    if !gitmodules.is_file() {
1915        return Vec::new();
1916    }
1917    let Ok(content) = fs::read_to_string(&gitmodules) else {
1918        return Vec::new();
1919    };
1920
1921    let mut result = Vec::new();
1922    let mut current_name: Option<String> = None;
1923    let mut current_path: Option<PathBuf> = None;
1924
1925    for line in content.lines() {
1926        let trimmed = line.trim();
1927        if trimmed.starts_with("[submodule \"") && trimmed.ends_with("\"]") {
1928            if let (Some(name), Some(path)) = (current_name.take(), current_path.take()) {
1929                result.push((name, path));
1930            }
1931            let name = trimmed["[submodule \"".len()..trimmed.len() - 2].to_string();
1932            current_name = Some(name);
1933        } else if let Some(rest) = trimmed.strip_prefix("path") {
1934            if let Some(eq_pos) = rest.find('=') {
1935                let path_str = rest[eq_pos + 1..].trim();
1936                current_path = Some(PathBuf::from(path_str));
1937            }
1938        }
1939    }
1940    if let (Some(name), Some(path)) = (current_name, current_path) {
1941        result.push((name, path));
1942    }
1943
1944    result
1945}
1946
1947fn build_submodule_summaries(
1948    analyzed: &[FileRecord],
1949    submodules: &[(String, PathBuf)],
1950    root: &Path,
1951) -> Vec<SubmoduleSummary> {
1952    submodules
1953        .iter()
1954        .map(|(name, path)| {
1955            let files: Vec<&FileRecord> = analyzed
1956                .iter()
1957                .filter(|f| f.submodule.as_deref() == Some(name.as_str()))
1958                .collect();
1959
1960            let files_analyzed = files.len() as u64;
1961            let total_physical_lines = files
1962                .iter()
1963                .map(|f| f.raw_line_categories.total_physical_lines)
1964                .sum();
1965            let code_lines = files.iter().map(|f| f.effective_counts.code_lines).sum();
1966            let comment_lines = files.iter().map(|f| f.effective_counts.comment_lines).sum();
1967            let blank_lines = files.iter().map(|f| f.effective_counts.blank_lines).sum();
1968            let language_summaries = build_language_summaries_from_slice(&files);
1969
1970            let git = detect_git_for_run(&root.join(path));
1971
1972            SubmoduleSummary {
1973                name: name.clone(),
1974                relative_path: path.to_string_lossy().replace('\\', "/"),
1975                files_analyzed,
1976                total_physical_lines,
1977                code_lines,
1978                comment_lines,
1979                blank_lines,
1980                language_summaries,
1981                git_commit_short: git.commit_short,
1982                git_commit_long: git.commit_long,
1983                git_branch: git.branch,
1984                git_commit_author: git.author,
1985                git_commit_date: git.commit_date,
1986                git_remote_url: git.remote_url,
1987            }
1988        })
1989        .filter(|s| s.files_analyzed > 0)
1990        .collect()
1991}
1992
1993/// Dominant indent label from vote counts.
1994#[allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
1995fn dominant_indent_label(files: &[&StyleAnalysis]) -> String {
1996    let mut votes = [0u32; 6];
1997    for f in files {
1998        let idx = match f.indent_style {
1999            IndentStyle::Tabs => 0,
2000            IndentStyle::Spaces2 => 1,
2001            IndentStyle::Spaces4 => 2,
2002            IndentStyle::Spaces8 => 3,
2003            IndentStyle::Mixed => 4,
2004            IndentStyle::Unknown => 5,
2005        };
2006        votes[idx] += 1;
2007    }
2008    let labels = ["Tabs", "2-Space", "4-Space", "8-Space", "Mixed", "\u{2014}"];
2009    labels[votes
2010        .iter()
2011        .enumerate()
2012        .max_by_key(|(_, v)| *v)
2013        .map_or(5, |(i, _)| i)]
2014    .to_string()
2015}
2016
2017/// Line-80 compliance percentage for a slice of style analyses.
2018#[allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
2019fn line80_pct(files: &[&StyleAnalysis]) -> u8 {
2020    if files.is_empty() {
2021        return 0;
2022    }
2023    let compliant = files
2024        .iter()
2025        .filter(|f| f.total_lines == 0 || (f.lines_over_80 as f32 / f.total_lines as f32) <= 0.05)
2026        .count() as u32;
2027    ((compliant * 100) / files.len() as u32) as u8
2028}
2029
2030/// Column-N compliance percentage using the configured threshold (80, 100, or 120).
2031/// Falls back to the 80-col bucket for any threshold ≤ 80.
2032#[allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
2033fn line_col_pct(files: &[&StyleAnalysis], threshold: u16) -> u8 {
2034    if files.is_empty() {
2035        return 0;
2036    }
2037    let compliant = files
2038        .iter()
2039        .filter(|f| {
2040            let over = if threshold <= 80 {
2041                f.lines_over_80
2042            } else if threshold <= 100 {
2043                f.lines_over_100
2044            } else {
2045                f.lines_over_120
2046            };
2047            f.total_lines == 0 || (over as f32 / f.total_lines as f32) <= 0.05
2048        })
2049        .count() as u32;
2050    ((compliant * 100) / files.len() as u32) as u8
2051}
2052
2053/// Build a `LanguageStyleGroup` from a non-empty slice of `StyleAnalysis` for one family.
2054#[allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
2055fn build_language_group(
2056    family: &str,
2057    files: &[&StyleAnalysis],
2058    col_threshold: u16,
2059) -> LanguageStyleGroup {
2060    let count = files.len() as u32;
2061
2062    // Collect every unique guide name across all files in this group.
2063    let mut all_names: Vec<String> = Vec::new();
2064    for f in files {
2065        for g in &f.guide_scores {
2066            if !all_names.contains(&g.name) {
2067                all_names.push(g.name.clone());
2068            }
2069        }
2070    }
2071
2072    let mut guide_avg_scores: Vec<(String, u8)> = all_names
2073        .into_iter()
2074        .map(|name| {
2075            let sum: u32 = files
2076                .iter()
2077                .filter_map(|f| f.guide_scores.iter().find(|g| g.name == name))
2078                .map(|g| u32::from(g.score_pct))
2079                .sum();
2080            let avg = (sum / count) as u8;
2081            (name, avg)
2082        })
2083        .collect();
2084    guide_avg_scores.sort_by_key(|s| std::cmp::Reverse(s.1));
2085
2086    let (dominant_guide, dominant_score_pct) = guide_avg_scores
2087        .first()
2088        .map(|(n, s)| (n.clone(), *s))
2089        .unwrap_or_default();
2090
2091    let lcp = line_col_pct(files, col_threshold);
2092    LanguageStyleGroup {
2093        language_family: family.to_string(),
2094        files_count: count,
2095        dominant_guide,
2096        dominant_score_pct,
2097        common_indent_style: dominant_indent_label(files),
2098        guide_avg_scores,
2099        line80_compliant_pct: line80_pct(files),
2100        line_col_compliant_pct: lcp,
2101    }
2102}
2103
2104/// Build aggregate multi-language style-guide adherence.
2105/// Returns `None` when no files had style data.
2106#[allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
2107fn build_style_summary(analyzed: &[FileRecord], col_threshold: u16) -> Option<StyleSummary> {
2108    let all_style: Vec<&StyleAnalysis> = analyzed
2109        .iter()
2110        .filter_map(|f| f.style_analysis.as_ref())
2111        .collect();
2112
2113    if all_style.is_empty() {
2114        return None;
2115    }
2116
2117    // Group by language_family.
2118    let mut families: std::collections::BTreeMap<&str, Vec<&StyleAnalysis>> =
2119        std::collections::BTreeMap::new();
2120    for sa in &all_style {
2121        families
2122            .entry(sa.language_family.as_str())
2123            .or_default()
2124            .push(sa);
2125    }
2126
2127    let mut by_language: Vec<LanguageStyleGroup> = families
2128        .iter()
2129        .map(|(family, files)| build_language_group(family, files, col_threshold))
2130        .collect();
2131    by_language.sort_by_key(|g| std::cmp::Reverse(g.files_count));
2132
2133    let files_analyzed = all_style.len() as u32;
2134    let common_indent_style = dominant_indent_label(&all_style);
2135    let line80_compliant_pct = line80_pct(&all_style);
2136    let line_col_compliant_pct = line_col_pct(&all_style, col_threshold);
2137
2138    Some(StyleSummary {
2139        files_analyzed,
2140        common_indent_style,
2141        line80_compliant_pct,
2142        line_col_compliant_pct,
2143        col_threshold,
2144        by_language,
2145    })
2146}
2147
2148fn build_language_summaries_from_slice(files: &[&FileRecord]) -> Vec<LanguageSummary> {
2149    let mut map: BTreeMap<String, LanguageSummary> = BTreeMap::new();
2150    for file in files {
2151        let Some(lang) = file.language else { continue };
2152        let entry = map
2153            .entry(lang.display_name().to_string())
2154            .or_insert_with(|| zeroed_summary(lang));
2155        accumulate_record_into_summary(entry, file);
2156    }
2157    map.into_values().collect()
2158}
2159
2160fn file_name_eq(path: &Path, expected: &str) -> bool {
2161    path.file_name()
2162        .and_then(|name| name.to_str())
2163        .is_some_and(|name| name == expected)
2164}
2165
2166fn is_excluded_dir_path(path: &Path, excluded_dirs: &[String]) -> bool {
2167    path.components().any(|component| {
2168        component
2169            .as_os_str()
2170            .to_str()
2171            .is_some_and(|part| excluded_dirs.iter().any(|excluded| excluded == part))
2172    })
2173}
2174
2175fn is_vendor_path(path: &Path) -> bool {
2176    path.components().any(|component| {
2177        component
2178            .as_os_str()
2179            .to_str()
2180            .is_some_and(|part| matches!(part, "vendor" | "node_modules" | "packages"))
2181    })
2182}
2183
2184fn is_known_lockfile(path: &Path) -> bool {
2185    path.file_name()
2186        .and_then(|name| name.to_str())
2187        .is_some_and(|name| {
2188            matches!(
2189                name,
2190                "Cargo.lock"
2191                    | "package-lock.json"
2192                    | "yarn.lock"
2193                    | "pnpm-lock.yaml"
2194                    | "Pipfile.lock"
2195                    | "poetry.lock"
2196                    | "composer.lock"
2197            )
2198        })
2199}
2200
2201fn looks_generated(path: &Path, bytes: &[u8]) -> bool {
2202    let file_name = path
2203        .file_name()
2204        .and_then(|name| name.to_str())
2205        .unwrap_or_default();
2206    if file_name.contains(".generated.") || file_name.contains(".g.") {
2207        return true;
2208    }
2209
2210    let sample = String::from_utf8_lossy(&bytes[..bytes.len().min(GENERATED_SAMPLE_BYTES)])
2211        .to_ascii_lowercase();
2212    sample.contains("@generated") || sample.contains("generated by")
2213}
2214
2215fn looks_minified(path: &Path, bytes: &[u8]) -> bool {
2216    let file_name = path
2217        .file_name()
2218        .and_then(|name| name.to_str())
2219        .unwrap_or_default();
2220    if file_name.contains(".min.") {
2221        return true;
2222    }
2223
2224    let sample = String::from_utf8_lossy(&bytes[..bytes.len().min(MINIFIED_SAMPLE_BYTES)]);
2225    let longest_line = sample.lines().map(str::len).max().unwrap_or(0);
2226    let whitespace = sample.chars().filter(|c| c.is_whitespace()).count();
2227    longest_line > MINIFIED_LINE_THRESHOLD && whitespace * 100 < sample.len().max(1)
2228}
2229
2230fn is_binary(bytes: &[u8]) -> bool {
2231    if bytes.starts_with(&[0xEF, 0xBB, 0xBF])
2232        || bytes.starts_with(&[0xFF, 0xFE])
2233        || bytes.starts_with(&[0xFE, 0xFF])
2234    {
2235        return false;
2236    }
2237
2238    let sample = &bytes[..bytes.len().min(BINARY_SAMPLE_BYTES)];
2239    sample.contains(&0)
2240}
2241
2242/// Decode a BOM-stripped UTF-16 byte slice using the given encoding.
2243/// Returns `(text, encoding_label, warnings)`.
2244fn decode_utf16_bom(
2245    bom_stripped: &[u8],
2246    encoding: &'static encoding_rs::Encoding,
2247    label: &str,
2248) -> (String, String, Vec<String>) {
2249    let (cow, _, had_errors) = encoding.decode(bom_stripped);
2250    let mut warnings = Vec::new();
2251    if had_errors {
2252        warnings.push(format!("{label} decode contained replacement characters"));
2253    }
2254    (cow.into_owned(), label.into(), warnings)
2255}
2256
2257fn decode_bytes(bytes: &[u8]) -> std::result::Result<(String, String, Vec<String>), String> {
2258    if bytes.starts_with(&[0xEF, 0xBB, 0xBF]) {
2259        let text = String::from_utf8(bytes[3..].to_vec()).map_err(|err| err.to_string())?;
2260        return Ok((text, "utf-8-bom".into(), vec![]));
2261    }
2262    if bytes.starts_with(&[0xFF, 0xFE]) {
2263        return Ok(decode_utf16_bom(&bytes[2..], UTF_16LE, "utf-16le"));
2264    }
2265    if bytes.starts_with(&[0xFE, 0xFF]) {
2266        return Ok(decode_utf16_bom(&bytes[2..], UTF_16BE, "utf-16be"));
2267    }
2268
2269    // Multiple statements in the else branch make map_or_else awkward here.
2270    #[allow(clippy::option_if_let_else)]
2271    if let Ok(text) = String::from_utf8(bytes.to_vec()) {
2272        Ok((text, "utf-8".into(), vec![]))
2273    } else {
2274        let (cow, _, had_errors) = WINDOWS_1252.decode(bytes);
2275        let mut warnings = vec!["decoded using windows-1252 fallback".into()];
2276        if had_errors {
2277            warnings.push("fallback decode contained replacement characters".into());
2278        }
2279        Ok((cow.into_owned(), "windows-1252".into(), warnings))
2280    }
2281}
2282
2283fn compile_globset(patterns: &[String]) -> Result<Option<GlobSet>> {
2284    if patterns.is_empty() {
2285        return Ok(None);
2286    }
2287
2288    let mut builder = GlobSetBuilder::new();
2289    for pattern in patterns {
2290        builder
2291            .add(Glob::new(pattern).with_context(|| format!("invalid glob pattern: {pattern}"))?);
2292    }
2293    Ok(Some(
2294        builder.build().context("failed to compile glob filters")?,
2295    ))
2296}
2297
2298fn parse_enabled_languages(enabled: &[String]) -> Result<Option<BTreeSet<Language>>> {
2299    if enabled.is_empty() {
2300        return Ok(None);
2301    }
2302
2303    let supported = supported_languages();
2304    let mut set = BTreeSet::new();
2305    for name in enabled {
2306        let language = Language::from_name(name)
2307            .with_context(|| format!("unsupported language in config: {name}"))?;
2308        if !supported.contains(&language) {
2309            anyhow::bail!("language {name} is not supported in this build");
2310        }
2311        set.insert(language);
2312    }
2313    Ok(Some(set))
2314}
2315
2316/// # Errors
2317///
2318/// Returns an error if serialization fails or the output file cannot be written.
2319pub fn write_json(run: &AnalysisRun, output_path: &Path) -> Result<()> {
2320    let json = serde_json::to_string_pretty(run).context("failed to serialize analysis run")?;
2321    fs::write(output_path, json)
2322        .with_context(|| format!("failed to write JSON output to {}", output_path.display()))
2323}
2324
2325/// # Errors
2326///
2327/// Returns an error if the file cannot be read or the JSON cannot be parsed.
2328pub fn read_json(path: &Path) -> Result<AnalysisRun> {
2329    let contents = fs::read_to_string(path)
2330        .with_context(|| format!("failed to read result file {}", path.display()))?;
2331    serde_json::from_str(&contents)
2332        .with_context(|| format!("failed to parse JSON result {}", path.display()))
2333}
2334
2335#[cfg(test)]
2336mod tests {
2337    use super::*;
2338
2339    #[test]
2340    fn effective_counts_respect_code_only_policy() {
2341        let raw = RawLineCounts {
2342            code_only_lines: 2,
2343            single_comment_only_lines: 1,
2344            mixed_code_single_comment_lines: 3,
2345            docstring_comment_lines: 2,
2346            ..RawLineCounts::default()
2347        };
2348        let counts = compute_effective_counts(&raw, MixedLinePolicy::CodeOnly, true, true);
2349        assert_eq!(counts.code_lines, 5);
2350        assert_eq!(counts.comment_lines, 3);
2351    }
2352
2353    #[test]
2354    fn effective_counts_can_separate_mixed() {
2355        let raw = RawLineCounts {
2356            mixed_code_single_comment_lines: 2,
2357            mixed_code_multi_comment_lines: 1,
2358            ..RawLineCounts::default()
2359        };
2360        let counts =
2361            compute_effective_counts(&raw, MixedLinePolicy::SeparateMixedCategory, true, true);
2362        assert_eq!(counts.mixed_lines_separate, 3);
2363        assert_eq!(counts.code_lines, 0);
2364        assert_eq!(counts.comment_lines, 0);
2365    }
2366
2367    #[test]
2368    fn windows_1252_fallback_decodes() {
2369        let bytes = vec![0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x96, 0x57];
2370        let (text, encoding, warnings) = decode_bytes(&bytes).unwrap();
2371        assert_eq!(encoding, "windows-1252");
2372        assert!(text.contains('–'));
2373        assert!(!warnings.is_empty());
2374    }
2375
2376    // ── Pure predicate tests ─────────────────────────────────────────────────
2377
2378    #[test]
2379    fn is_binary_detects_null_byte() {
2380        let bytes = b"hello\x00world";
2381        assert!(is_binary(bytes));
2382    }
2383
2384    #[test]
2385    fn is_binary_clean_text_is_not_binary() {
2386        let bytes = b"fn main() { println!(\"hello\"); }";
2387        assert!(!is_binary(bytes));
2388    }
2389
2390    #[test]
2391    fn is_binary_utf8_bom_not_binary() {
2392        let bytes = b"\xef\xbb\xbffn main() {}";
2393        assert!(!is_binary(bytes));
2394    }
2395
2396    #[test]
2397    fn looks_generated_at_generated_marker() {
2398        let bytes = b"// @generated by protoc-gen-rust\nfn foo() {}";
2399        assert!(looks_generated(Path::new("foo.rs"), bytes));
2400    }
2401
2402    #[test]
2403    fn looks_generated_do_not_edit_marker() {
2404        // "Code generated by" triggers detection (contains the "generated by" substring).
2405        let bytes = b"// Code generated by build.rs. DO NOT EDIT.\nuse foo;";
2406        assert!(looks_generated(Path::new("foo.rs"), bytes));
2407        // @generated also triggers detection independently.
2408        let bytes2 = b"// @generated\nuse foo;";
2409        assert!(looks_generated(Path::new("foo.rs"), bytes2));
2410    }
2411
2412    #[test]
2413    fn looks_generated_normal_file_not_generated() {
2414        let bytes = b"fn main() {\n    println!(\"hello\");\n}\n";
2415        assert!(!looks_generated(Path::new("main.rs"), bytes));
2416    }
2417
2418    #[test]
2419    fn looks_minified_dot_min_filename() {
2420        let bytes = b"function a(){return 1}";
2421        assert!(looks_minified(Path::new("bundle.min.js"), bytes));
2422    }
2423
2424    #[test]
2425    fn looks_minified_normal_file_not_minified() {
2426        let bytes = b"function hello() {\n    return 1;\n}\n";
2427        assert!(!looks_minified(Path::new("app.js"), bytes));
2428    }
2429
2430    #[test]
2431    fn looks_minified_very_long_line() {
2432        let long_line: Vec<u8> = b"x".repeat(MINIFIED_LINE_THRESHOLD + 1);
2433        assert!(looks_minified(Path::new("app.js"), &long_line));
2434    }
2435
2436    #[test]
2437    fn is_known_lockfile_cargo_lock() {
2438        assert!(is_known_lockfile(Path::new("Cargo.lock")));
2439    }
2440
2441    #[test]
2442    fn is_known_lockfile_package_lock_json() {
2443        assert!(is_known_lockfile(Path::new("package-lock.json")));
2444    }
2445
2446    #[test]
2447    fn is_known_lockfile_yarn_lock() {
2448        assert!(is_known_lockfile(Path::new("yarn.lock")));
2449    }
2450
2451    #[test]
2452    fn is_known_lockfile_normal_file_is_not_lockfile() {
2453        assert!(!is_known_lockfile(Path::new("src/lib.rs")));
2454    }
2455
2456    #[test]
2457    fn is_vendor_path_node_modules() {
2458        assert!(is_vendor_path(Path::new("node_modules/react/index.js")));
2459    }
2460
2461    #[test]
2462    fn is_vendor_path_vendor_dir() {
2463        assert!(is_vendor_path(Path::new("vendor/anyhow/src/lib.rs")));
2464    }
2465
2466    #[test]
2467    fn is_vendor_path_normal_src_is_not_vendor() {
2468        assert!(!is_vendor_path(Path::new("src/lib.rs")));
2469    }
2470
2471    #[test]
2472    fn is_excluded_dir_path_matches_excluded() {
2473        let excluded = vec![".git".into(), "target".into()];
2474        assert!(is_excluded_dir_path(Path::new(".git/config"), &excluded));
2475    }
2476
2477    #[test]
2478    fn is_excluded_dir_path_non_excluded_is_ok() {
2479        let excluded = vec![".git".into(), "target".into()];
2480        assert!(!is_excluded_dir_path(Path::new("src/main.rs"), &excluded));
2481    }
2482
2483    #[test]
2484    fn decode_bytes_utf8_bom_stripped() {
2485        let bytes = b"\xef\xbb\xbffn main() {}";
2486        let (text, encoding, _) = decode_bytes(bytes).unwrap();
2487        // BOM is detected — encoding label includes "bom" indicator
2488        assert!(
2489            encoding.contains("utf-8"),
2490            "should be utf-8 variant, got {encoding}"
2491        );
2492        assert!(text.starts_with("fn"));
2493    }
2494
2495    #[test]
2496    fn decode_bytes_plain_utf8() {
2497        let bytes = b"hello world";
2498        let (text, encoding, warnings) = decode_bytes(bytes).unwrap();
2499        assert_eq!(encoding, "utf-8");
2500        assert_eq!(text, "hello world");
2501        assert!(warnings.is_empty());
2502    }
2503
2504    // ── UTF-16 BOM decoding ──────────────────────────────────────────────────
2505
2506    #[test]
2507    fn decode_bytes_utf16le_bom() {
2508        // Encode "hi" as UTF-16 LE with BOM: FF FE 68 00 69 00
2509        let mut bytes = vec![0xFF, 0xFE];
2510        for ch in "hi\n".encode_utf16() {
2511            bytes.extend_from_slice(&ch.to_le_bytes());
2512        }
2513        let (text, encoding, _warnings) = decode_bytes(&bytes).unwrap();
2514        assert_eq!(encoding, "utf-16le");
2515        assert!(text.contains('h') && text.contains('i'));
2516    }
2517
2518    #[test]
2519    fn decode_bytes_utf16be_bom() {
2520        // Encode "ok" as UTF-16 BE with BOM: FE FF 00 6F 00 6B
2521        let mut bytes = vec![0xFE, 0xFF];
2522        for ch in "ok\n".encode_utf16() {
2523            bytes.extend_from_slice(&ch.to_be_bytes());
2524        }
2525        let (text, encoding, _warnings) = decode_bytes(&bytes).unwrap();
2526        assert_eq!(encoding, "utf-16be");
2527        assert!(text.contains('o') && text.contains('k'));
2528    }
2529
2530    #[test]
2531    fn is_binary_utf16le_bom_not_binary() {
2532        // UTF-16 LE BOM followed by null bytes — should NOT be binary
2533        let bytes = &[0xFF, 0xFE, 0x68, 0x00];
2534        assert!(!is_binary(bytes));
2535    }
2536
2537    #[test]
2538    fn is_binary_utf16be_bom_not_binary() {
2539        let bytes = &[0xFE, 0xFF, 0x00, 0x68];
2540        assert!(!is_binary(bytes));
2541    }
2542
2543    // ── MixedLinePolicy branches ─────────────────────────────────────────────
2544
2545    #[test]
2546    fn effective_counts_code_and_comment_policy() {
2547        let raw = RawLineCounts {
2548            mixed_code_single_comment_lines: 3,
2549            mixed_code_multi_comment_lines: 2,
2550            ..RawLineCounts::default()
2551        };
2552        let counts = compute_effective_counts(&raw, MixedLinePolicy::CodeAndComment, true, true);
2553        // Both code and comment incremented by mixed_total (5)
2554        assert_eq!(counts.code_lines, 5);
2555        assert_eq!(counts.comment_lines, 5);
2556        assert_eq!(counts.mixed_lines_separate, 0);
2557    }
2558
2559    #[test]
2560    fn effective_counts_comment_only_policy() {
2561        let raw = RawLineCounts {
2562            mixed_code_single_comment_lines: 4,
2563            mixed_code_multi_comment_lines: 1,
2564            ..RawLineCounts::default()
2565        };
2566        let counts = compute_effective_counts(&raw, MixedLinePolicy::CommentOnly, true, true);
2567        assert_eq!(counts.code_lines, 0);
2568        assert_eq!(counts.comment_lines, 5);
2569        assert_eq!(counts.mixed_lines_separate, 0);
2570    }
2571
2572    #[test]
2573    fn effective_counts_docstrings_as_code_when_flag_false() {
2574        let raw = RawLineCounts {
2575            code_only_lines: 10,
2576            docstring_comment_lines: 3,
2577            ..RawLineCounts::default()
2578        };
2579        // python_docstrings_as_comments = false → docstrings counted as code
2580        let counts = compute_effective_counts(&raw, MixedLinePolicy::CodeOnly, false, true);
2581        assert_eq!(counts.code_lines, 13);
2582        assert_eq!(counts.comment_lines, 0);
2583    }
2584
2585    #[test]
2586    fn effective_counts_exclude_compiler_directives() {
2587        let raw = RawLineCounts {
2588            code_only_lines: 10,
2589            compiler_directive_lines: 3,
2590            ..RawLineCounts::default()
2591        };
2592        // count_compiler_directives = false → subtract directive lines from code
2593        let counts = compute_effective_counts(&raw, MixedLinePolicy::CodeOnly, true, false);
2594        assert_eq!(counts.code_lines, 7);
2595    }
2596
2597    #[test]
2598    fn effective_counts_directives_not_subtracted_below_zero() {
2599        let raw = RawLineCounts {
2600            code_only_lines: 2,
2601            compiler_directive_lines: 5, // more than code — saturating_sub
2602            ..RawLineCounts::default()
2603        };
2604        let counts = compute_effective_counts(&raw, MixedLinePolicy::CodeOnly, true, false);
2605        assert_eq!(counts.code_lines, 0); // saturated at 0
2606    }
2607
2608    // ── COCOMO modes ─────────────────────────────────────────────────────────
2609
2610    #[test]
2611    fn cocomo_organic_computes_positive_values() {
2612        let est = compute_cocomo(5_000, CocomoMode::Organic);
2613        assert!(est.ksloc > 0.0);
2614        assert!(est.effort_person_months > 0.0);
2615        assert!(est.duration_months > 0.0);
2616        assert!(est.avg_staff > 0.0);
2617        assert_eq!(est.mode, CocomoMode::Organic);
2618    }
2619
2620    #[test]
2621    fn cocomo_semi_detached_computes_positive_values() {
2622        let est = compute_cocomo(20_000, CocomoMode::SemiDetached);
2623        assert!(est.ksloc > 0.0);
2624        assert!(est.effort_person_months > 0.0);
2625        assert!(est.duration_months > 0.0);
2626        assert_eq!(est.mode, CocomoMode::SemiDetached);
2627    }
2628
2629    #[test]
2630    fn cocomo_embedded_computes_positive_values() {
2631        let est = compute_cocomo(100_000, CocomoMode::Embedded);
2632        assert!(est.effort_person_months > 0.0);
2633        assert_eq!(est.mode, CocomoMode::Embedded);
2634    }
2635
2636    #[test]
2637    fn cocomo_zero_lines_produces_zero_effort() {
2638        let est = compute_cocomo(0, CocomoMode::Organic);
2639        assert!((est.ksloc).abs() < f64::EPSILON);
2640        // Zero KSLOC → effort = 2.4 * 0^1.05 = 0
2641        assert!((est.effort_person_months - 0.0).abs() < 0.01);
2642    }
2643
2644    // ── parse_activity_log (git hotspots) ─────────────────────────────────────
2645
2646    #[test]
2647    fn parse_activity_log_counts_and_dates_per_file() {
2648        let out = "\u{0}2024-03-02T10:00:00+00:00\n\
2649                   M\tsrc/a.rs\n\
2650                   A\tsrc/b.rs\n\
2651                   \u{0}2024-03-01T09:00:00+00:00\n\
2652                   M\tsrc/a.rs\n";
2653        let map = parse_activity_log(out);
2654        assert_eq!(map["src/a.rs"].0, 2, "a.rs touched in two commits");
2655        assert_eq!(map["src/b.rs"].0, 1, "b.rs touched once");
2656        // Newest-first: a.rs keeps the most recent date.
2657        assert_eq!(
2658            map["src/a.rs"].1.as_deref(),
2659            Some("2024-03-02T10:00:00+00:00")
2660        );
2661    }
2662
2663    #[test]
2664    fn parse_activity_log_attributes_rename_to_new_path() {
2665        let out = "\u{0}2024-03-02T10:00:00+00:00\nR100\tsrc/old.rs\tsrc/new.rs\n";
2666        let map = parse_activity_log(out);
2667        assert_eq!(map["src/new.rs"].0, 1);
2668        assert!(!map.contains_key("src/old.rs"));
2669    }
2670
2671    #[test]
2672    fn parse_activity_log_empty_is_empty() {
2673        assert!(parse_activity_log("").is_empty());
2674    }
2675
2676    // ── Path / git helpers ────────────────────────────────────────────────────
2677
2678    #[test]
2679    fn parse_url_line_extracts_url() {
2680        assert_eq!(
2681            parse_url_line("url = https://example.com/repo.git"),
2682            Some("https://example.com/repo.git")
2683        );
2684    }
2685
2686    #[test]
2687    fn parse_url_line_returns_none_for_non_url_key() {
2688        assert_eq!(
2689            parse_url_line("fetch = +refs/heads/*:refs/remotes/origin/*"),
2690            None
2691        );
2692    }
2693
2694    #[test]
2695    fn parse_url_line_returns_none_for_empty_url() {
2696        assert_eq!(parse_url_line("url = "), None);
2697    }
2698
2699    #[test]
2700    fn looks_generated_generated_filename_extension() {
2701        // Files with ".generated." in name are detected without reading bytes
2702        let bytes = b"// normal code\n";
2703        assert!(looks_generated(Path::new("schema.generated.ts"), bytes));
2704    }
2705
2706    #[test]
2707    fn looks_generated_dot_g_extension() {
2708        let bytes = b"// normal code\n";
2709        assert!(looks_generated(Path::new("parser.g.cs"), bytes));
2710    }
2711
2712    #[test]
2713    fn looks_minified_whitespace_ratio_is_ok() {
2714        // Low whitespace ratio but NOT over the line length threshold → not minified
2715        let normal = b"var x=1,y=2,z=3;\n";
2716        assert!(!looks_minified(Path::new("app.js"), normal));
2717    }
2718
2719    #[test]
2720    fn is_known_lockfile_pnpm() {
2721        assert!(is_known_lockfile(Path::new("pnpm-lock.yaml")));
2722    }
2723
2724    #[test]
2725    fn is_known_lockfile_pipfile() {
2726        assert!(is_known_lockfile(Path::new("Pipfile.lock")));
2727    }
2728
2729    #[test]
2730    fn is_known_lockfile_poetry() {
2731        assert!(is_known_lockfile(Path::new("poetry.lock")));
2732    }
2733
2734    #[test]
2735    fn is_known_lockfile_composer() {
2736        assert!(is_known_lockfile(Path::new("composer.lock")));
2737    }
2738
2739    // ── relative_path_string and path_to_string ──────────────────────────────
2740
2741    #[test]
2742    fn relative_path_string_strips_root_prefix() {
2743        let path = Path::new("/tmp/project/src/lib.rs");
2744        let root = Path::new("/tmp/project");
2745        let rel = relative_path_string(path, root);
2746        assert_eq!(rel, "src/lib.rs");
2747    }
2748
2749    #[test]
2750    fn relative_path_string_falls_back_to_full_path() {
2751        // When path is not under root, fall back to path itself
2752        let path = Path::new("/other/dir/file.rs");
2753        let root = Path::new("/tmp/project");
2754        let rel = relative_path_string(path, root);
2755        // Should not panic; returns path representation
2756        assert!(!rel.is_empty());
2757    }
2758
2759    // ── find_duplicate_groups ────────────────────────────────────────────────
2760
2761    #[test]
2762    fn find_duplicate_groups_returns_empty_for_unique_hashes() {
2763        use sloc_languages::{Language, ParseMode, RawLineCounts};
2764        let make_rec = |hash: u64, path: &str| FileRecord {
2765            path: path.into(),
2766            relative_path: path.into(),
2767            language: Some(Language::Rust),
2768            size_bytes: 10,
2769            detected_encoding: Some("utf-8".into()),
2770            raw_line_categories: RawLineCounts::default(),
2771            effective_counts: EffectiveCounts::default(),
2772            status: FileStatus::AnalyzedExact,
2773            warnings: vec![],
2774            generated: false,
2775            minified: false,
2776            vendor: false,
2777            parse_mode: Some(ParseMode::Lexical),
2778            submodule: None,
2779            coverage: None,
2780            style_analysis: None,
2781            cyclomatic_complexity: None,
2782            lsloc: None,
2783            commit_count: None,
2784            last_commit_date: None,
2785            content_hash: hash,
2786        };
2787        let analyzed = vec![make_rec(111, "a.rs"), make_rec(222, "b.rs")];
2788        let groups = find_duplicate_groups(&analyzed);
2789        assert!(groups.is_empty());
2790    }
2791
2792    #[test]
2793    fn find_duplicate_groups_returns_group_for_same_hash() {
2794        use sloc_languages::{Language, ParseMode, RawLineCounts};
2795        let make_rec = |hash: u64, path: &str| FileRecord {
2796            path: path.into(),
2797            relative_path: path.into(),
2798            language: Some(Language::Rust),
2799            size_bytes: 10,
2800            detected_encoding: Some("utf-8".into()),
2801            raw_line_categories: RawLineCounts::default(),
2802            effective_counts: EffectiveCounts::default(),
2803            status: FileStatus::AnalyzedExact,
2804            warnings: vec![],
2805            generated: false,
2806            minified: false,
2807            vendor: false,
2808            parse_mode: Some(ParseMode::Lexical),
2809            submodule: None,
2810            coverage: None,
2811            style_analysis: None,
2812            cyclomatic_complexity: None,
2813            lsloc: None,
2814            commit_count: None,
2815            last_commit_date: None,
2816            content_hash: hash,
2817        };
2818        let analyzed = vec![
2819            make_rec(999, "a.rs"),
2820            make_rec(999, "b.rs"),
2821            make_rec(123, "c.rs"),
2822        ];
2823        let groups = find_duplicate_groups(&analyzed);
2824        assert_eq!(groups.len(), 1);
2825        assert_eq!(groups[0].len(), 2);
2826    }
2827
2828    #[test]
2829    fn find_duplicate_groups_ignores_zero_hash() {
2830        use sloc_languages::{Language, ParseMode, RawLineCounts};
2831        let make_rec = |hash: u64, path: &str| FileRecord {
2832            path: path.into(),
2833            relative_path: path.into(),
2834            language: Some(Language::Rust),
2835            size_bytes: 10,
2836            detected_encoding: Some("utf-8".into()),
2837            raw_line_categories: RawLineCounts::default(),
2838            effective_counts: EffectiveCounts::default(),
2839            status: FileStatus::AnalyzedExact,
2840            warnings: vec![],
2841            generated: false,
2842            minified: false,
2843            vendor: false,
2844            parse_mode: Some(ParseMode::Lexical),
2845            submodule: None,
2846            coverage: None,
2847            style_analysis: None,
2848            cyclomatic_complexity: None,
2849            lsloc: None,
2850            commit_count: None,
2851            last_commit_date: None,
2852            content_hash: hash,
2853        };
2854        // hash=0 means "not computed" — must be excluded from duplicate detection
2855        let analyzed = vec![make_rec(0, "a.rs"), make_rec(0, "b.rs")];
2856        let groups = find_duplicate_groups(&analyzed);
2857        assert!(
2858            groups.is_empty(),
2859            "zero-hash files must not be grouped as duplicates"
2860        );
2861    }
2862
2863    // ── detect_submodules ────────────────────────────────────────────────────
2864
2865    #[test]
2866    fn detect_submodules_no_gitmodules_returns_empty() {
2867        let dir = tempfile::tempdir().unwrap();
2868        let result = detect_submodules(dir.path());
2869        assert!(result.is_empty());
2870    }
2871
2872    #[test]
2873    fn detect_submodules_parses_gitmodules_file() {
2874        let dir = tempfile::tempdir().unwrap();
2875        let content = "[submodule \"vendor/lib\"]\n\tpath = vendor/lib\n\turl = https://github.com/example/lib.git\n";
2876        std::fs::write(dir.path().join(".gitmodules"), content).unwrap();
2877        let result = detect_submodules(dir.path());
2878        assert_eq!(result.len(), 1);
2879        assert_eq!(result[0].0, "vendor/lib");
2880    }
2881
2882    // ── write_json / read_json roundtrip ─────────────────────────────────────
2883
2884    #[test]
2885    fn write_json_read_json_roundtrip() {
2886        use chrono::Utc;
2887        use sloc_config::AppConfig;
2888        use sloc_languages::{Language, ParseMode, RawLineCounts};
2889        let dir = tempfile::tempdir().unwrap();
2890        let run = AnalysisRun {
2891            tool: ToolMetadata {
2892                name: "sloc".into(),
2893                version: "0.0.1".into(),
2894                run_id: "test-roundtrip".into(),
2895                timestamp_utc: Utc::now(),
2896            },
2897            environment: EnvironmentMetadata {
2898                operating_system: "test".into(),
2899                architecture: "x86_64".into(),
2900                runtime_mode: "test".into(),
2901                initiator_username: "tester".into(),
2902                initiator_hostname: "testhost".into(),
2903                ci_name: None,
2904            },
2905            effective_configuration: AppConfig::default(),
2906            input_roots: vec!["/tmp/test".into()],
2907            summary_totals: SummaryTotals {
2908                files_analyzed: 1,
2909                code_lines: 5,
2910                ..SummaryTotals::default()
2911            },
2912            totals_by_language: vec![],
2913            per_file_records: vec![FileRecord {
2914                path: "a.rs".into(),
2915                relative_path: "a.rs".into(),
2916                language: Some(Language::Rust),
2917                size_bytes: 50,
2918                detected_encoding: Some("utf-8".into()),
2919                raw_line_categories: RawLineCounts {
2920                    code_only_lines: 5,
2921                    ..RawLineCounts::default()
2922                },
2923                effective_counts: EffectiveCounts {
2924                    code_lines: 5,
2925                    ..EffectiveCounts::default()
2926                },
2927                status: FileStatus::AnalyzedExact,
2928                warnings: vec![],
2929                generated: false,
2930                minified: false,
2931                vendor: false,
2932                parse_mode: Some(ParseMode::Lexical),
2933                submodule: None,
2934                coverage: None,
2935                style_analysis: None,
2936                cyclomatic_complexity: None,
2937                lsloc: None,
2938                commit_count: None,
2939                last_commit_date: None,
2940                content_hash: 0,
2941            }],
2942            skipped_file_records: vec![],
2943            warnings: vec![],
2944            submodule_summaries: vec![],
2945            git_commit_short: Some("abc1234".into()),
2946            git_branch: Some("main".into()),
2947            git_commit_long: None,
2948            git_commit_author: None,
2949            git_tags: None,
2950            git_nearest_tag: None,
2951            git_commit_date: None,
2952            git_remote_url: None,
2953            style_summary: None,
2954            cocomo: None,
2955            uloc: 0,
2956            dryness_pct: None,
2957            duplicate_groups: vec![],
2958            duplicates_excluded: 0,
2959        };
2960        let json_path = dir.path().join("test.json");
2961        write_json(&run, &json_path).unwrap();
2962        let loaded = read_json(&json_path).unwrap();
2963        assert_eq!(loaded.summary_totals.files_analyzed, 1);
2964        assert_eq!(loaded.summary_totals.code_lines, 5);
2965        assert_eq!(loaded.git_commit_short.as_deref(), Some("abc1234"));
2966        assert_eq!(loaded.git_branch.as_deref(), Some("main"));
2967        assert_eq!(loaded.per_file_records.len(), 1);
2968    }
2969
2970    // ── detect_ci_system ─────────────────────────────────────────────────────
2971
2972    #[test]
2973    fn detect_ci_system_returns_none_without_env_vars() {
2974        // Remove known CI env vars so detection returns None
2975        for var in &[
2976            "JENKINS_URL",
2977            "JENKINS_HOME",
2978            "BUILD_URL",
2979            "GITHUB_ACTIONS",
2980            "GITLAB_CI",
2981            "CIRCLECI",
2982            "TRAVIS",
2983            "TF_BUILD",
2984            "TEAMCITY_VERSION",
2985        ] {
2986            std::env::remove_var(var);
2987        }
2988        // Result depends on test runner env; just assert no panic
2989        let _ = detect_ci_system();
2990    }
2991
2992    // ── resolve_git_file_pointer ──────────────────────────────────────────────
2993
2994    #[test]
2995    fn resolve_git_file_pointer_valid_absolute_gitdir() {
2996        let dir = tempfile::tempdir().unwrap();
2997        // Create a real target directory (the "real" git dir)
2998        let real_git = dir.path().join("real.git");
2999        fs::create_dir_all(&real_git).unwrap();
3000        // Write a .git file pointing at the real git dir
3001        let git_file = dir.path().join(".git");
3002        fs::write(&git_file, format!("gitdir: {}\n", real_git.display())).unwrap();
3003
3004        let result = resolve_git_file_pointer(&git_file, dir.path());
3005        // Should resolve to the real git dir (or its canonicalized form)
3006        assert!(
3007            result.is_some(),
3008            "should resolve a valid absolute gitdir pointer"
3009        );
3010        assert!(result.unwrap().is_dir());
3011    }
3012
3013    #[test]
3014    fn resolve_git_file_pointer_missing_gitdir_prefix_returns_none() {
3015        let dir = tempfile::tempdir().unwrap();
3016        let git_file = dir.path().join(".git");
3017        fs::write(&git_file, "not a gitdir line\n").unwrap();
3018        assert!(resolve_git_file_pointer(&git_file, dir.path()).is_none());
3019    }
3020
3021    #[test]
3022    fn resolve_git_file_pointer_unreadable_path_returns_none() {
3023        assert!(resolve_git_file_pointer(
3024            Path::new("/nonexistent/__sloc_test_git_file__"),
3025            Path::new("/nonexistent")
3026        )
3027        .is_none());
3028    }
3029
3030    #[test]
3031    fn resolve_git_file_pointer_nonexistent_target_returns_none() {
3032        let dir = tempfile::tempdir().unwrap();
3033        let git_file = dir.path().join(".git");
3034        fs::write(&git_file, "gitdir: /nonexistent/__sloc_fake_gitdir_xyz__\n").unwrap();
3035        // Target does not exist → returns None
3036        assert!(resolve_git_file_pointer(&git_file, dir.path()).is_none());
3037    }
3038
3039    #[test]
3040    fn resolve_git_file_pointer_relative_path() {
3041        let dir = tempfile::tempdir().unwrap();
3042        let real_git = dir.path().join("real_git_dir");
3043        fs::create_dir_all(&real_git).unwrap();
3044        let git_file = dir.path().join(".git");
3045        // Relative path — should be resolved relative to base_dir
3046        fs::write(&git_file, "gitdir: real_git_dir\n").unwrap();
3047        let result = resolve_git_file_pointer(&git_file, dir.path());
3048        assert!(result.is_some());
3049    }
3050
3051    // ── resolve_ref ──────────────────────────────────────────────────────────
3052
3053    #[test]
3054    fn resolve_ref_from_loose_file() {
3055        let dir = tempfile::tempdir().unwrap();
3056        let git_dir = dir.path();
3057        fs::create_dir_all(git_dir.join("refs/heads")).unwrap();
3058        let sha = "abc1234567890abcdef1234567890abcdef123456";
3059        fs::write(git_dir.join("refs/heads/main"), format!("{sha}\n")).unwrap();
3060
3061        let result = resolve_ref(git_dir, "refs/heads/main");
3062        assert_eq!(result.as_deref(), Some(sha));
3063    }
3064
3065    #[test]
3066    fn resolve_ref_from_packed_refs() {
3067        let dir = tempfile::tempdir().unwrap();
3068        let git_dir = dir.path();
3069        let sha = "def5678def5678def5678def5678def5678def56";
3070        fs::write(
3071            git_dir.join("packed-refs"),
3072            format!("# pack-refs with: peeled fully-peeled sorted\n{sha} refs/heads/feature\n"),
3073        )
3074        .unwrap();
3075
3076        let result = resolve_ref(git_dir, "refs/heads/feature");
3077        assert_eq!(result.as_deref(), Some(sha));
3078    }
3079
3080    #[test]
3081    fn resolve_ref_not_found_returns_none() {
3082        let dir = tempfile::tempdir().unwrap();
3083        let result = resolve_ref(dir.path(), "refs/heads/nonexistent-branch-xyz");
3084        assert!(result.is_none());
3085    }
3086
3087    #[test]
3088    fn resolve_ref_packed_refs_skips_comment_and_peeled() {
3089        let dir = tempfile::tempdir().unwrap();
3090        let git_dir = dir.path();
3091        let sha = "aaa1111aaa1111aaa1111aaa1111aaa1111aaa11";
3092        fs::write(
3093            git_dir.join("packed-refs"),
3094            format!("# comment\n^peeled-object-sha\n{sha} refs/tags/v1.0\n"),
3095        )
3096        .unwrap();
3097
3098        let result = resolve_ref(git_dir, "refs/tags/v1.0");
3099        assert_eq!(result.as_deref(), Some(sha));
3100    }
3101
3102    #[test]
3103    fn resolve_ref_loose_sha_too_short_falls_through_to_packed() {
3104        let dir = tempfile::tempdir().unwrap();
3105        let git_dir = dir.path();
3106        fs::create_dir_all(git_dir.join("refs/heads")).unwrap();
3107        // Write an invalid (too short) SHA to the loose file
3108        fs::write(git_dir.join("refs/heads/main"), "short\n").unwrap();
3109        // No packed-refs → None
3110        let result = resolve_ref(git_dir, "refs/heads/main");
3111        assert!(result.is_none());
3112    }
3113
3114    // ── read_git_remote_url ───────────────────────────────────────────────────
3115
3116    #[test]
3117    fn read_git_remote_url_parses_origin_url() {
3118        let dir = tempfile::tempdir().unwrap();
3119        let git_dir = dir.path().join(".git");
3120        fs::create_dir_all(&git_dir).unwrap();
3121        fs::write(
3122            git_dir.join("config"),
3123            "[core]\n\trepositoryformatversion = 0\n[remote \"origin\"]\n\turl = https://github.com/org/repo.git\n\tfetch = +refs/heads/*:refs/remotes/origin/*\n",
3124        )
3125        .unwrap();
3126        let url = read_git_remote_url(&git_dir);
3127        assert_eq!(url.as_deref(), Some("https://github.com/org/repo.git"));
3128    }
3129
3130    #[test]
3131    fn read_git_remote_url_no_config_returns_none() {
3132        let dir = tempfile::tempdir().unwrap();
3133        let git_dir = dir.path().join(".git");
3134        fs::create_dir_all(&git_dir).unwrap();
3135        // No config file
3136        let url = read_git_remote_url(&git_dir);
3137        assert!(url.is_none());
3138    }
3139
3140    // ── detect_git_for_run — HEAD edge cases ──────────────────────────────────
3141
3142    #[test]
3143    fn detect_git_for_run_no_git_dir_returns_default() {
3144        let dir = tempfile::tempdir().unwrap();
3145        // No .git directory or file
3146        let info = detect_git_for_run(dir.path());
3147        assert!(info.commit_long.is_none());
3148    }
3149
3150    #[test]
3151    fn detect_git_for_run_unreadable_head_returns_default() {
3152        let dir = tempfile::tempdir().unwrap();
3153        let git_dir = dir.path().join(".git");
3154        fs::create_dir_all(&git_dir).unwrap();
3155        // .git directory exists but no HEAD file → read fails → early return
3156        let info = detect_git_for_run(dir.path());
3157        assert!(info.commit_long.is_none());
3158    }
3159
3160    #[test]
3161    fn detect_git_for_run_detached_head_with_sha() {
3162        let dir = tempfile::tempdir().unwrap();
3163        let git_dir = dir.path().join(".git");
3164        fs::create_dir_all(&git_dir).unwrap();
3165        // Exactly 40 hex chars — the code checks len >= 40 and takes [..40]
3166        let sha = "abc1234567890abcdef1234567890abcdef12345";
3167        fs::write(git_dir.join("HEAD"), sha).unwrap();
3168        let info = detect_git_for_run(dir.path());
3169        // Detached HEAD — commit_long should be the first 40 chars of HEAD
3170        assert_eq!(info.commit_long.as_deref(), Some(sha));
3171        assert_eq!(info.commit_short.as_deref(), Some("abc1234"));
3172    }
3173
3174    #[test]
3175    fn detect_git_for_run_with_packed_ref() {
3176        let dir = tempfile::tempdir().unwrap();
3177        let git_dir = dir.path().join(".git");
3178        fs::create_dir_all(&git_dir).unwrap();
3179        // HEAD points to a ref resolved via packed-refs
3180        fs::write(git_dir.join("HEAD"), "ref: refs/heads/main\n").unwrap();
3181        let sha = "deadbeef00000000000000000000000000000000";
3182        fs::write(
3183            git_dir.join("packed-refs"),
3184            format!("# pack-refs\n{sha} refs/heads/main\n"),
3185        )
3186        .unwrap();
3187        let info = detect_git_for_run(dir.path());
3188        assert_eq!(info.commit_long.as_deref(), Some(sha));
3189        assert_eq!(info.branch.as_deref(), Some("main"));
3190    }
3191
3192    // ── ci_branch_from_env ───────────────────────────────────────────────────
3193
3194    // Note: ci_branch_from_env env-var tests share a mutex to avoid parallel interference.
3195    use std::sync::{Mutex, OnceLock};
3196    static CI_ENV_LOCK: OnceLock<Mutex<()>> = OnceLock::new();
3197    fn ci_env_lock() -> std::sync::MutexGuard<'static, ()> {
3198        CI_ENV_LOCK.get_or_init(|| Mutex::new(())).lock().unwrap()
3199    }
3200
3201    fn clear_branch_env_vars() {
3202        for v in &[
3203            "BRANCH_NAME",
3204            "GIT_BRANCH",
3205            "GITHUB_REF_NAME",
3206            "CI_COMMIT_BRANCH",
3207            "CIRCLE_BRANCH",
3208            "TRAVIS_BRANCH",
3209            "BUILD_SOURCEBRANCH",
3210        ] {
3211            std::env::remove_var(v);
3212        }
3213    }
3214
3215    #[test]
3216    fn ci_branch_from_env_strips_refs_heads_prefix() {
3217        let _lock = ci_env_lock();
3218        clear_branch_env_vars();
3219        // Azure DevOps sets BUILD_SOURCEBRANCH = "refs/heads/main"
3220        std::env::set_var("BUILD_SOURCEBRANCH", "refs/heads/my-branch");
3221        let branch = ci_branch_from_env();
3222        clear_branch_env_vars();
3223        assert_eq!(branch.as_deref(), Some("my-branch"));
3224    }
3225
3226    #[test]
3227    fn ci_branch_from_env_strips_origin_prefix() {
3228        let _lock = ci_env_lock();
3229        clear_branch_env_vars();
3230        std::env::set_var("GIT_BRANCH", "origin/develop");
3231        let branch = ci_branch_from_env();
3232        clear_branch_env_vars();
3233        assert_eq!(branch.as_deref(), Some("develop"));
3234    }
3235
3236    #[test]
3237    fn ci_branch_from_env_returns_none_for_head() {
3238        let _lock = ci_env_lock();
3239        clear_branch_env_vars();
3240        // "HEAD" is filtered out; with no other vars, should return None
3241        std::env::set_var("BRANCH_NAME", "HEAD");
3242        let branch = ci_branch_from_env();
3243        clear_branch_env_vars();
3244        // HEAD value is filtered → None (or falls through to other vars, but all cleared)
3245        assert!(branch.is_none(), "HEAD should be filtered, got: {branch:?}");
3246    }
3247}
sloc_core/lib.rs

sloc_core/
lib.rs