Skip to main content

sloc_core/
lib.rs

1// SPDX-License-Identifier: AGPL-3.0-or-later
2// Copyright (C) 2026 Nima Shafie <nimzshafie@gmail.com>
3#![allow(clippy::multiple_crate_versions)]
4
5pub mod baseline;
6pub mod coverage;
7pub mod delta;
8pub mod history;
9pub use baseline::{check_against_baseline, resolve_baselines_path, BaselineEntry, BaselineStore};
10pub use coverage::{aggregate_line_coverage, lookup_coverage, parse_lcov, FileCoverage};
11pub use delta::{compute_delta, FileChangeStatus, FileDelta, ScanComparison, SummaryDelta};
12pub use history::{RegistryEntry, ScanRegistry, ScanSummarySnapshot, WatchedDirsStore};
13
14use std::collections::{BTreeMap, BTreeSet, HashSet};
15use std::fs;
16use std::path::{Path, PathBuf};
17use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
18use std::sync::Arc;
19
20use anyhow::{Context, Result};
21use chrono::{DateTime, Utc};
22use encoding_rs::{UTF_16BE, UTF_16LE, WINDOWS_1252};
23use globset::{Glob, GlobSet, GlobSetBuilder};
24use ignore::WalkBuilder;
25use serde::{Deserialize, Serialize};
26use uuid::Uuid;
27
28use sloc_config::{
29    AppConfig, BinaryFileBehavior, BlankInBlockCommentPolicy, ContinuationLinePolicy,
30    FailureBehavior, MixedLinePolicy,
31};
32use sloc_languages::style::IndentStyle;
33use sloc_languages::{
34    analyze_text, detect_language, supported_languages, AnalysisOptions, Language, ParseMode,
35    RawLineCounts, StyleAnalysis,
36};
37
38// ── Detection sample sizes and thresholds ────────────────────────────────────
39
40/// Maximum number of worker threads used for parallel file analysis.
41const MAX_ANALYSIS_THREADS: usize = 16;
42/// Fallback thread count when `available_parallelism` is unavailable.
43const DEFAULT_ANALYSIS_THREADS: usize = 4;
44/// Byte sample used to detect `@generated` markers.
45const GENERATED_SAMPLE_BYTES: usize = 1024;
46/// Byte sample used to detect minified files via line-length heuristic.
47const MINIFIED_SAMPLE_BYTES: usize = 4096;
48/// Longest line length above which a file is considered minified.
49const MINIFIED_LINE_THRESHOLD: usize = 2000;
50/// Byte sample used to detect binary files via null-byte scan.
51const BINARY_SAMPLE_BYTES: usize = 8192;
52
53/// Atomics shared between `analyze()` and the caller so the caller can poll scan progress.
54pub struct ProgressCounters {
55    /// Number of candidate files processed so far (incremented per file, across all threads).
56    pub files_done: Arc<AtomicUsize>,
57    /// Total candidate files discovered (set before parallel analysis begins).
58    pub files_total: Arc<AtomicUsize>,
59}
60
61/// Three-way outcome for metadata-level policy checks.
62enum MetadataPolicyOutcome {
63    /// Skip this file — include the record in output.
64    Skip(Box<FileRecord>),
65    /// Exclude this file entirely — no record in output (include-glob miss).
66    Exclude,
67    /// Continue to content checks.
68    Continue,
69}
70
71#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
72#[serde(rename_all = "snake_case")]
73pub enum FileStatus {
74    AnalyzedExact,
75    AnalyzedBestEffort,
76    SkippedBinary,
77    SkippedDecodeError,
78    SkippedUnsupported,
79    SkippedByPolicy,
80    ErrorInternal,
81}
82
83#[derive(Debug, Clone, Serialize, Deserialize, Default)]
84pub struct EffectiveCounts {
85    pub code_lines: u64,
86    pub comment_lines: u64,
87    pub blank_lines: u64,
88    pub mixed_lines_separate: u64,
89}
90
91#[derive(Debug, Clone, Serialize, Deserialize)]
92pub struct ToolMetadata {
93    pub name: String,
94    pub version: String,
95    pub run_id: String,
96    pub timestamp_utc: DateTime<Utc>,
97}
98
99#[derive(Debug, Clone, Serialize, Deserialize)]
100pub struct EnvironmentMetadata {
101    pub operating_system: String,
102    pub architecture: String,
103    pub runtime_mode: String,
104    pub initiator_username: String,
105    pub initiator_hostname: String,
106    /// CI system name when the scan runs inside a known CI environment (Jenkins,
107    /// GitHub Actions, GitLab CI, …). `None` for interactive / local runs.
108    #[serde(default, skip_serializing_if = "Option::is_none")]
109    pub ci_name: Option<String>,
110}
111
112#[derive(Debug, Clone, Serialize, Deserialize, Default)]
113pub struct SummaryTotals {
114    pub files_considered: u64,
115    pub files_analyzed: u64,
116    pub files_skipped: u64,
117    pub total_physical_lines: u64,
118    pub code_lines: u64,
119    pub comment_lines: u64,
120    pub blank_lines: u64,
121    pub mixed_lines_separate: u64,
122    #[serde(default)]
123    pub functions: u64,
124    #[serde(default)]
125    pub classes: u64,
126    #[serde(default)]
127    pub variables: u64,
128    #[serde(default)]
129    pub imports: u64,
130    #[serde(default)]
131    pub test_count: u64,
132    /// Lexically detected test assertion call lines across all analyzed files.
133    #[serde(default)]
134    pub test_assertion_count: u64,
135    /// Lexically detected test suite / fixture / group declaration lines across all analyzed files.
136    #[serde(default)]
137    pub test_suite_count: u64,
138    /// Aggregated from LCOV data when provided.
139    #[serde(default)]
140    pub coverage_lines_found: u64,
141    #[serde(default)]
142    pub coverage_lines_hit: u64,
143    #[serde(default)]
144    pub coverage_functions_found: u64,
145    #[serde(default)]
146    pub coverage_functions_hit: u64,
147    #[serde(default)]
148    pub coverage_branches_found: u64,
149    #[serde(default)]
150    pub coverage_branches_hit: u64,
151}
152
153#[derive(Debug, Clone, Serialize, Deserialize)]
154pub struct LanguageSummary {
155    pub language: Language,
156    pub files: u64,
157    pub total_physical_lines: u64,
158    pub code_lines: u64,
159    pub comment_lines: u64,
160    pub blank_lines: u64,
161    pub mixed_lines_separate: u64,
162    #[serde(default)]
163    pub functions: u64,
164    #[serde(default)]
165    pub classes: u64,
166    #[serde(default)]
167    pub variables: u64,
168    #[serde(default)]
169    pub imports: u64,
170    #[serde(default)]
171    pub test_count: u64,
172    #[serde(default)]
173    pub test_assertion_count: u64,
174    #[serde(default)]
175    pub test_suite_count: u64,
176    #[serde(default)]
177    pub coverage_lines_found: u64,
178    #[serde(default)]
179    pub coverage_lines_hit: u64,
180    #[serde(default)]
181    pub coverage_functions_found: u64,
182    #[serde(default)]
183    pub coverage_functions_hit: u64,
184    #[serde(default)]
185    pub coverage_branches_found: u64,
186    #[serde(default)]
187    pub coverage_branches_hit: u64,
188}
189
190#[derive(Debug, Clone, Serialize, Deserialize)]
191pub struct FileRecord {
192    pub path: String,
193    pub relative_path: String,
194    pub language: Option<Language>,
195    pub size_bytes: u64,
196    pub detected_encoding: Option<String>,
197    pub raw_line_categories: RawLineCounts,
198    pub effective_counts: EffectiveCounts,
199    pub status: FileStatus,
200    pub warnings: Vec<String>,
201    pub generated: bool,
202    pub minified: bool,
203    pub vendor: bool,
204    pub parse_mode: Option<ParseMode>,
205    #[serde(skip_serializing_if = "Option::is_none")]
206    pub submodule: Option<String>,
207    /// Line/function/branch coverage from an external LCOV file, when provided.
208    #[serde(default, skip_serializing_if = "Option::is_none")]
209    pub coverage: Option<FileCoverage>,
210    /// Lexical style-guide adherence analysis; `None` for unsupported languages.
211    #[serde(default, skip_serializing_if = "Option::is_none")]
212    pub style_analysis: Option<StyleAnalysis>,
213}
214
215/// Per-language-family style aggregation within a `StyleSummary`.
216#[derive(Debug, Clone, Serialize, Deserialize)]
217pub struct LanguageStyleGroup {
218    /// Display label, e.g. `"C / C++"`, `"Python"`, `"JavaScript"`.
219    pub language_family: String,
220    /// Number of files in this group.
221    pub files_count: u32,
222    /// Name of the guide with the highest average adherence.
223    pub dominant_guide: String,
224    /// Average adherence of the dominant guide (0–100).
225    pub dominant_score_pct: u8,
226    /// Most common indent style across the group.
227    pub common_indent_style: String,
228    /// Average guide adherence scores (guide name, 0–100) sorted descending.
229    pub guide_avg_scores: Vec<(String, u8)>,
230    /// Percentage of files (0–100) where ≤ 5 % of lines exceed the configured column threshold.
231    pub line80_compliant_pct: u8,
232    /// Same as `line80_compliant_pct` but named for the actual configured threshold.
233    pub line_col_compliant_pct: u8,
234}
235
236/// Aggregate multi-language style-guide adherence across all analysed files.
237#[derive(Debug, Clone, Serialize, Deserialize)]
238pub struct StyleSummary {
239    /// Total files for which style data was produced.
240    pub files_analyzed: u32,
241    /// Most common indent style across *all* analysed files.
242    pub common_indent_style: String,
243    /// Percentage of all analysed files (0–100) with ≤ 5 % of lines over 80 chars (legacy, always 80).
244    pub line80_compliant_pct: u8,
245    /// Percentage of all analysed files (0–100) with ≤ 5 % of lines over `col_threshold` chars.
246    pub line_col_compliant_pct: u8,
247    /// Column-width threshold used for `line_col_compliant_pct` (from `analysis.style_col_threshold`).
248    pub col_threshold: u16,
249    /// Per-language-family breakdown, sorted by `files_count` descending.
250    pub by_language: Vec<LanguageStyleGroup>,
251}
252
253/// Backward-compatible alias kept so that `sloc-report` and `sloc-web` can migrate
254/// incrementally without a breaking change on the same release.
255pub type CppStyleSummary = StyleSummary;
256
257/// Per-submodule aggregated stats produced when `submodule_breakdown` is enabled.
258#[derive(Debug, Clone, Serialize, Deserialize)]
259pub struct SubmoduleSummary {
260    pub name: String,
261    pub relative_path: String,
262    pub files_analyzed: u64,
263    pub total_physical_lines: u64,
264    pub code_lines: u64,
265    pub comment_lines: u64,
266    pub blank_lines: u64,
267    pub language_summaries: Vec<LanguageSummary>,
268}
269
270#[derive(Debug, Clone, Serialize, Deserialize)]
271pub struct AnalysisRun {
272    pub tool: ToolMetadata,
273    pub environment: EnvironmentMetadata,
274    pub effective_configuration: AppConfig,
275    pub input_roots: Vec<String>,
276    pub summary_totals: SummaryTotals,
277    pub totals_by_language: Vec<LanguageSummary>,
278    pub per_file_records: Vec<FileRecord>,
279    pub skipped_file_records: Vec<FileRecord>,
280    pub warnings: Vec<String>,
281    /// Non-empty only when `discovery.submodule_breakdown` is enabled.
282    #[serde(default, skip_serializing_if = "Vec::is_empty")]
283    pub submodule_summaries: Vec<SubmoduleSummary>,
284    /// Short git commit SHA (7 chars) at scan time, if the project is a git repo.
285    #[serde(default, skip_serializing_if = "Option::is_none")]
286    pub git_commit_short: Option<String>,
287    /// Full git commit SHA at scan time, if the project is a git repo.
288    #[serde(default, skip_serializing_if = "Option::is_none")]
289    pub git_commit_long: Option<String>,
290    /// Git branch active at scan time, if the project is a git repo.
291    #[serde(default, skip_serializing_if = "Option::is_none")]
292    pub git_branch: Option<String>,
293    /// Author of the last git commit at scan time.
294    #[serde(default, skip_serializing_if = "Option::is_none")]
295    pub git_commit_author: Option<String>,
296    /// Comma-separated git tags pointing at HEAD at scan time.
297    #[serde(default, skip_serializing_if = "Option::is_none")]
298    pub git_tags: Option<String>,
299    /// Nearest ancestor release tag (output of `git describe --tags --abbrev=0`).
300    #[serde(default, skip_serializing_if = "Option::is_none")]
301    pub git_nearest_tag: Option<String>,
302    /// ISO 8601 author-date of the last git commit at scan time.
303    #[serde(default, skip_serializing_if = "Option::is_none")]
304    pub git_commit_date: Option<String>,
305    /// URL of the `origin` remote as recorded in `.git/config` at scan time.
306    #[serde(default, skip_serializing_if = "Option::is_none")]
307    pub git_remote_url: Option<String>,
308    /// Multi-language style-guide adherence; `None` when no supported files were analysed.
309    #[serde(default, skip_serializing_if = "Option::is_none")]
310    pub style_summary: Option<StyleSummary>,
311}
312
313#[derive(Default)]
314struct GitInfo {
315    commit_short: Option<String>,
316    commit_long: Option<String>,
317    branch: Option<String>,
318    author: Option<String>,
319    tags: Option<String>,
320    nearest_tag: Option<String>,
321    commit_date: Option<String>,
322    remote_url: Option<String>,
323}
324
325/// Locate the `.git` directory by walking up from `start`.
326/// Handles plain repos, worktrees (`.git` is a file with `gitdir:` pointer), and
327/// submodules. Returns `None` if no git repo is found.
328fn find_git_dir(start: &Path) -> Option<PathBuf> {
329    let mut current = Some(start);
330    while let Some(dir) = current {
331        let candidate = dir.join(".git");
332        if candidate.is_dir() {
333            return Some(candidate);
334        }
335        if candidate.is_file() {
336            if let Some(resolved) = resolve_git_file_pointer(&candidate, dir) {
337                return Some(resolved);
338            }
339        }
340        current = dir.parent();
341    }
342    None
343}
344
345/// Resolve a `.git` *file* (worktree/submodule pointer) to the absolute path it
346/// points to. Returns `None` if the file is unreadable or lacks a `gitdir:` line,
347/// or if the resolved path is not an existing directory.
348fn resolve_git_file_pointer(file: &Path, base_dir: &Path) -> Option<PathBuf> {
349    let content = fs::read_to_string(file).ok()?;
350    let ptr = content.trim().strip_prefix("gitdir: ")?;
351    // Normalise forward-slash paths to the OS separator so that Path operations
352    // (join, exists, canonicalize) work correctly on Windows.
353    let ptr_native = ptr.replace('/', std::path::MAIN_SEPARATOR_STR);
354    let resolved = if Path::new(&ptr_native).is_absolute() {
355        PathBuf::from(&ptr_native)
356    } else {
357        base_dir.join(&ptr_native)
358    };
359    // canonicalize resolves ".." components and symlinks; fall back to the
360    // un-canonicalized path if it fails (e.g. some Windows configurations
361    // return a UNC "\\?\" prefix that confuses later path operations).
362    let final_path = resolved.canonicalize().unwrap_or(resolved);
363    if final_path.is_dir() {
364        Some(final_path)
365    } else {
366        None
367    }
368}
369
370/// Resolve a git ref name (e.g. `refs/heads/main`) to a full 40-char commit SHA.
371/// Checks loose ref files first, then `packed-refs`.
372fn resolve_ref(git_dir: &Path, refname: &str) -> Option<String> {
373    // Build the OS-native path to the loose ref file by joining each
374    // forward-slash component individually.  This produces the correct
375    // separator on every platform without any manual replacement.
376    let ref_path = refname
377        .split('/')
378        .fold(git_dir.to_path_buf(), |p, c| p.join(c));
379    if ref_path.exists() {
380        let sha = fs::read_to_string(&ref_path)
381            .ok()
382            .map(|s| s.trim().to_string())
383            .filter(|s| s.len() >= 40 && s.chars().all(|c| c.is_ascii_hexdigit()));
384        if sha.is_some() {
385            return sha;
386        }
387    }
388    // Packed refs: each line is "<sha> <refname>" (lines starting with '#' are
389    // comments; lines starting with '^' are peeled tag objects to skip).
390    // str::lines() handles both \n and \r\n, so Windows line endings are fine.
391    let packed = fs::read_to_string(git_dir.join("packed-refs")).ok()?;
392    for line in packed.lines() {
393        if line.starts_with('#') || line.starts_with('^') {
394            continue;
395        }
396        let mut cols = line.splitn(2, ' ');
397        let sha = cols.next()?;
398        let name = cols.next()?.trim();
399        if name == refname {
400            return Some(sha.to_string());
401        }
402    }
403    None
404}
405
406/// Extract the URL value from a `url = <value>` git-config line, returning `None` if absent or empty.
407fn parse_url_line(line: &str) -> Option<&str> {
408    let rest = line.strip_prefix("url")?;
409    let rest = rest.trim_start_matches([' ', '\t']);
410    let url = rest.strip_prefix('=')?.trim();
411    if url.is_empty() {
412        None
413    } else {
414        Some(url)
415    }
416}
417
418/// Parse `.git/config` and return the URL of the `origin` remote, if present.
419fn read_git_remote_url(git_dir: &Path) -> Option<String> {
420    let config = fs::read_to_string(git_dir.join("config")).ok()?;
421    let mut in_origin = false;
422    for line in config.lines() {
423        let trimmed = line.trim();
424        if trimmed.starts_with('[') {
425            in_origin = trimmed == r#"[remote "origin"]"#;
426        } else if in_origin {
427            if let Some(url) = parse_url_line(trimmed) {
428                return Some(url.to_owned());
429            }
430        }
431    }
432    None
433}
434
435/// Detect git metadata by reading `.git/` files directly — no `git` executable
436/// needed. Falls back gracefully for detached HEADs, shallow clones, and missing
437/// reflogs.
438fn detect_git_for_run(project_path: &Path) -> GitInfo {
439    // Resolve the CI branch early so it can fill in any gap in git metadata.
440    let ci_branch = ci_branch_from_env();
441
442    let Some(git_dir) = find_git_dir(project_path) else {
443        // No .git directory (e.g. scanning a non-repo path in CI). Use whatever
444        // the CI system tells us about the branch.
445        return GitInfo {
446            branch: ci_branch,
447            ..GitInfo::default()
448        };
449    };
450
451    let head_raw = match fs::read_to_string(git_dir.join("HEAD")) {
452        Ok(s) => s.trim().to_string(),
453        Err(_) => {
454            return GitInfo {
455                branch: ci_branch,
456                ..GitInfo::default()
457            }
458        }
459    };
460
461    let (branch_from_head, commit_long) = head_raw.strip_prefix("ref: ").map_or_else(
462        || {
463            if head_raw.len() >= 40 && head_raw.chars().all(|c| c.is_ascii_hexdigit()) {
464                // Detached HEAD — HEAD file is the commit SHA (common in CI checkouts).
465                (None, Some(head_raw[..40].to_string()))
466            } else {
467                (None, None)
468            }
469        },
470        |refname| {
471            let branch = refname
472                .strip_prefix("refs/heads/")
473                .map(|b| b.trim().to_string());
474            let sha = resolve_ref(&git_dir, refname.trim());
475            (branch, sha)
476        },
477    );
478    // Prefer the branch name derived from the HEAD ref; fall back to the CI
479    // env var (covers detached-HEAD checkouts done by Jenkins, GitHub Actions, etc.).
480    let branch = branch_from_head.or(ci_branch);
481
482    let commit_short = commit_long
483        .as_deref()
484        .map(|s| s.chars().take(7).collect::<String>());
485
486    let author = run_git_cmd(project_path, &["log", "-1", "--format=%an", "HEAD"]);
487    let commit_date = run_git_cmd(project_path, &["log", "-1", "--format=%aI", "HEAD"]);
488    let remote_url = read_git_remote_url(&git_dir);
489
490    // Tags and nearest-tag still require git CLI — try it as a best-effort bonus
491    // but don't block on it. If git isn't available these will simply be None.
492    let tags = run_git_cmd(project_path, &["tag", "--points-at", "HEAD"]).map(|t| {
493        t.lines()
494            .filter(|l| !l.is_empty())
495            .collect::<Vec<_>>()
496            .join(", ")
497    });
498    let nearest_tag = run_git_cmd(project_path, &["describe", "--tags", "--abbrev=0", "HEAD"]);
499
500    GitInfo {
501        commit_short,
502        commit_long,
503        branch,
504        author,
505        tags,
506        nearest_tag,
507        commit_date,
508        remote_url,
509    }
510}
511
512/// Run a git command as a best-effort supplemental source.
513fn run_git_cmd(dir: &Path, args: &[&str]) -> Option<String> {
514    // Try the bare name first (works when git is on PATH), then fall back to
515    // absolute paths for service accounts that run with a stripped PATH.
516    // Unix paths silently fail on Windows and vice-versa.
517    let candidates: &[&str] = &[
518        // Works on all platforms when git is on PATH
519        "git",
520        // Common Linux / macOS install locations
521        "/usr/bin/git",
522        "/usr/local/bin/git",
523        "/opt/homebrew/bin/git",
524        // Git for Windows default installation paths
525        r"C:\Program Files\Git\cmd\git.exe",
526        r"C:\Program Files\Git\bin\git.exe",
527        r"C:\Program Files (x86)\Git\cmd\git.exe",
528    ];
529    for &exe in candidates {
530        let result = std::process::Command::new(exe)
531            .args(["-c", "safe.directory=*"])
532            .args(args)
533            .current_dir(dir)
534            .output()
535            .ok()
536            .filter(|o| o.status.success())
537            .and_then(|o| String::from_utf8(o.stdout).ok())
538            .map(|s| s.trim().to_string())
539            .filter(|s| !s.is_empty());
540        if result.is_some() {
541            return result;
542        }
543    }
544    None
545}
546
547/// Return the name of the CI system if the process is running inside one.
548fn detect_ci_system() -> Option<&'static str> {
549    let ev = |k: &str| std::env::var(k).is_ok();
550    let ev_true = |k: &str| std::env::var(k).as_deref() == Ok("true");
551    if ev("JENKINS_URL") || ev("JENKINS_HOME") || ev("BUILD_URL") {
552        return Some("Jenkins");
553    }
554    if ev_true("GITHUB_ACTIONS") {
555        return Some("GitHub Actions");
556    }
557    if ev_true("GITLAB_CI") {
558        return Some("GitLab CI");
559    }
560    if ev_true("CIRCLECI") {
561        return Some("CircleCI");
562    }
563    if ev_true("TRAVIS") {
564        return Some("Travis CI");
565    }
566    if ev_true("TF_BUILD") {
567        return Some("Azure DevOps");
568    }
569    if ev("TEAMCITY_VERSION") {
570        return Some("TeamCity");
571    }
572    None
573}
574
575/// Read the current branch name from well-known CI environment variables.
576/// Called as a fallback when the git HEAD is detached (common in CI checkouts).
577fn ci_branch_from_env() -> Option<String> {
578    const VARS: &[&str] = &[
579        "BRANCH_NAME",        // Jenkins Pipeline
580        "GIT_BRANCH",         // Jenkins Freestyle (may carry "origin/<branch>")
581        "GITHUB_REF_NAME",    // GitHub Actions
582        "CI_COMMIT_BRANCH",   // GitLab CI
583        "CIRCLE_BRANCH",      // CircleCI
584        "TRAVIS_BRANCH",      // Travis CI
585        "BUILD_SOURCEBRANCH", // Azure DevOps (may carry "refs/heads/<branch>")
586    ];
587    for &var in VARS {
588        if let Ok(val) = std::env::var(var) {
589            let val = val.trim();
590            let val = val
591                .strip_prefix("refs/heads/")
592                .or_else(|| val.strip_prefix("origin/"))
593                .unwrap_or(val);
594            if !val.is_empty() && val != "HEAD" {
595                return Some(val.to_string());
596            }
597        }
598    }
599    None
600}
601
602fn get_current_username() -> String {
603    std::env::var("USERNAME")
604        .or_else(|_| std::env::var("USER"))
605        .unwrap_or_else(|_| "unknown".to_string())
606}
607
608fn non_empty_env(var: &str) -> Option<String> {
609    let v = std::env::var(var).ok()?;
610    if v.is_empty() {
611        None
612    } else {
613        Some(v)
614    }
615}
616
617fn is_jenkins_env() -> bool {
618    std::env::var("JENKINS_URL").is_ok()
619        || std::env::var("JENKINS_HOME").is_ok()
620        || std::env::var("BUILD_URL").is_ok()
621}
622
623fn get_hostname() -> String {
624    // In CI environments prefer a human-readable agent/runner identifier over
625    // whatever hostname the container was assigned.
626    if is_jenkins_env() {
627        if let Some(n) = non_empty_env("NODE_NAME") {
628            return n;
629        }
630    }
631    if std::env::var("GITHUB_ACTIONS").as_deref() == Ok("true") {
632        if let Some(r) = non_empty_env("RUNNER_NAME") {
633            return r;
634        }
635    }
636    if std::env::var("GITLAB_CI").as_deref() == Ok("true") {
637        if let Some(r) = non_empty_env("CI_RUNNER_DESCRIPTION") {
638            return r;
639        }
640    }
641    std::env::var("COMPUTERNAME")
642        .or_else(|_| std::env::var("HOSTNAME"))
643        .or_else(|_| std::fs::read_to_string("/etc/hostname").map(|s| s.trim().to_string()))
644        .unwrap_or_else(|_| "unknown".to_string())
645}
646
647/// Walk a single directory root and collect file records into the output vectors.
648#[allow(clippy::too_many_arguments)]
649fn walk_root(
650    root: &Path,
651    config: &AppConfig,
652    include_globs: Option<&GlobSet>,
653    exclude_globs: Option<&GlobSet>,
654    enabled_languages: Option<&BTreeSet<Language>>,
655    seen_paths: &mut HashSet<PathBuf>,
656    analyzed: &mut Vec<FileRecord>,
657    skipped: &mut Vec<FileRecord>,
658    warnings: &mut Vec<String>,
659    cancel: Option<&AtomicBool>,
660    progress: Option<&ProgressCounters>,
661) -> Result<()> {
662    let mut builder = WalkBuilder::new(root);
663    builder
664        .follow_links(config.discovery.follow_symlinks)
665        .hidden(config.discovery.ignore_hidden_files)
666        .ignore(config.discovery.honor_ignore_files)
667        .parents(config.discovery.honor_ignore_files)
668        .git_ignore(config.discovery.honor_ignore_files)
669        .git_global(config.discovery.honor_ignore_files)
670        .git_exclude(config.discovery.honor_ignore_files);
671
672    let paths = collect_walk_paths(&builder, seen_paths, warnings);
673    if paths.is_empty() {
674        return Ok(());
675    }
676
677    if let Some(p) = progress {
678        p.files_total.fetch_add(paths.len(), Ordering::Relaxed);
679    }
680
681    let chunk_results = run_parallel_analysis(
682        &paths,
683        root,
684        config,
685        include_globs,
686        exclude_globs,
687        enabled_languages,
688        cancel,
689        progress,
690    )?;
691    merge_chunk_results(chunk_results, analyzed, skipped, warnings)
692}
693
694fn collect_walk_paths(
695    builder: &WalkBuilder,
696    seen_paths: &mut HashSet<PathBuf>,
697    warnings: &mut Vec<String>,
698) -> Vec<PathBuf> {
699    // build_parallel() walks the directory tree across multiple threads (work-stealing
700    // internally), which is meaningfully faster for deeply nested repos with many directories.
701    // We collect results via an MPSC channel so each walker thread sends without contention.
702    let (tx, rx) = std::sync::mpsc::channel::<std::result::Result<PathBuf, String>>();
703
704    builder.build_parallel().run(|| {
705        let tx = tx.clone();
706        Box::new(move |entry| {
707            match entry {
708                Err(e) => {
709                    let _ = tx.send(Err(format!("discovery warning: {e}")));
710                }
711                Ok(e) => {
712                    let path = e.into_path();
713                    if !path.is_dir() {
714                        let _ = tx.send(Ok(path));
715                    }
716                }
717            }
718            ignore::WalkState::Continue
719        })
720    });
721
722    // Drop the sender that the outer scope holds; the per-thread clones were dropped when
723    // run() returned (all threads finished). Dropping this last sender closes the channel.
724    drop(tx);
725
726    rx.into_iter()
727        .filter_map(|msg| match msg {
728            Ok(path) => {
729                if seen_paths.insert(path.clone()) {
730                    Some(path)
731                } else {
732                    None
733                }
734            }
735            Err(warn) => {
736                warnings.push(warn);
737                None
738            }
739        })
740        .collect()
741}
742
743/// Inner work loop executed by each analysis thread.
744#[allow(clippy::too_many_arguments)]
745fn worker_loop(
746    paths: &[PathBuf],
747    root: &Path,
748    config: &AppConfig,
749    include_globs: Option<&GlobSet>,
750    exclude_globs: Option<&GlobSet>,
751    enabled_languages: Option<&BTreeSet<Language>>,
752    cancel: Option<&AtomicBool>,
753    next_index: &AtomicUsize,
754    files_done: Option<&AtomicUsize>,
755) -> Vec<Result<Option<FileRecord>>> {
756    let mut results = Vec::new();
757    loop {
758        if cancel.is_some_and(|c| c.load(Ordering::Relaxed)) {
759            results.push(Err(anyhow::anyhow!("analysis cancelled")));
760            break;
761        }
762        let i = next_index.fetch_add(1, Ordering::Relaxed);
763        if i >= paths.len() {
764            break;
765        }
766        results.push(analyze_candidate_file(
767            &paths[i],
768            root,
769            config,
770            include_globs,
771            exclude_globs,
772            enabled_languages,
773        ));
774        if let Some(fd) = files_done {
775            fd.fetch_add(1, Ordering::Relaxed);
776        }
777    }
778    results
779}
780
781#[allow(clippy::too_many_arguments)]
782fn run_parallel_analysis(
783    paths: &[PathBuf],
784    root: &Path,
785    config: &AppConfig,
786    include_globs: Option<&GlobSet>,
787    exclude_globs: Option<&GlobSet>,
788    enabled_languages: Option<&BTreeSet<Language>>,
789    cancel: Option<&AtomicBool>,
790    progress: Option<&ProgressCounters>,
791) -> Result<Vec<Vec<Result<Option<FileRecord>>>>> {
792    let thread_count = std::thread::available_parallelism().map_or(DEFAULT_ANALYSIS_THREADS, |n| {
793        n.get().min(MAX_ANALYSIS_THREADS)
794    });
795    // Shared work-queue index: each thread atomically claims the next path to process.
796    // This eliminates static-chunk load imbalance — threads that finish early immediately
797    // pick up more work instead of sitting idle while one overloaded chunk finishes.
798    let next_index = AtomicUsize::new(0);
799    let files_done: Option<&AtomicUsize> = progress.map(|p| p.files_done.as_ref());
800
801    std::thread::scope(|s| -> Result<Vec<Vec<Result<Option<FileRecord>>>>> {
802        // IMPORTANT: collect ALL handles before joining any of them.
803        // A lazy spawn-then-join chain would serialize threads one at a time.
804        let mut handles = Vec::with_capacity(thread_count);
805        for _ in 0..thread_count {
806            handles.push(s.spawn(|| {
807                worker_loop(
808                    paths,
809                    root,
810                    config,
811                    include_globs,
812                    exclude_globs,
813                    enabled_languages,
814                    cancel,
815                    &next_index,
816                    files_done,
817                )
818            }));
819        }
820        handles
821            .into_iter()
822            .map(|h| {
823                h.join()
824                    .map_err(|_| anyhow::anyhow!("analysis thread panicked"))
825            })
826            .collect()
827    })
828}
829
830fn merge_chunk_results(
831    chunk_results: Vec<Vec<Result<Option<FileRecord>>>>,
832    analyzed: &mut Vec<FileRecord>,
833    skipped: &mut Vec<FileRecord>,
834    warnings: &mut Vec<String>,
835) -> Result<()> {
836    for chunk in chunk_results {
837        for result in chunk {
838            if let Some(record) = result? {
839                push_record(record, analyzed, skipped, warnings);
840            }
841        }
842    }
843    Ok(())
844}
845
846/// Label each analyzed file with its submodule and build per-submodule summaries.
847fn process_submodules(config: &AppConfig, analyzed: &mut [FileRecord]) -> Vec<SubmoduleSummary> {
848    let root = config.discovery.root_paths[0]
849        .canonicalize()
850        .unwrap_or_else(|_| config.discovery.root_paths[0].clone());
851    let submodules = detect_submodules(&root);
852    if submodules.is_empty() {
853        return Vec::new();
854    }
855
856    for file in analyzed.iter_mut() {
857        for (name, sub_path) in &submodules {
858            let prefix = sub_path.to_string_lossy().replace('\\', "/");
859            let rel = &file.relative_path;
860            if rel == &prefix || rel.starts_with(&format!("{prefix}/")) {
861                file.submodule = Some(name.clone());
862                break;
863            }
864        }
865    }
866
867    build_submodule_summaries(analyzed, &submodules)
868}
869
870/// Assemble the final `AnalysisRun` from collected records and metadata.
871fn assemble_run(
872    config: &AppConfig,
873    runtime_mode: &str,
874    analyzed: Vec<FileRecord>,
875    skipped: Vec<FileRecord>,
876    warnings: Vec<String>,
877    submodule_summaries: Vec<SubmoduleSummary>,
878) -> AnalysisRun {
879    let summary = build_summary(&analyzed, &skipped);
880    let language_summaries = build_language_summaries(&analyzed);
881    let col_threshold = config.analysis.style_col_threshold;
882    let style_summary = build_style_summary(&analyzed, col_threshold);
883
884    let first_root = config
885        .discovery
886        .root_paths
887        .first()
888        .map(|p| p.canonicalize().unwrap_or_else(|_| p.clone()));
889    let git = first_root
890        .as_deref()
891        .map(detect_git_for_run)
892        .unwrap_or_default();
893
894    let now = Utc::now();
895    let run_id = {
896        let uuid_suffix = Uuid::new_v4().simple().to_string();
897        format!("{}-{}", now.format("%Y%m%d-%H%M"), uuid_suffix)
898    };
899
900    AnalysisRun {
901        tool: ToolMetadata {
902            name: "sloc".into(),
903            version: env!("CARGO_PKG_VERSION").into(),
904            run_id,
905            timestamp_utc: now,
906        },
907        environment: EnvironmentMetadata {
908            operating_system: std::env::consts::OS.into(),
909            architecture: std::env::consts::ARCH.into(),
910            runtime_mode: runtime_mode.into(),
911            initiator_username: get_current_username(),
912            initiator_hostname: get_hostname(),
913            ci_name: if is_jenkins_env() {
914                Some(format!("Jenkins\t{}", get_hostname()))
915            } else {
916                detect_ci_system().map(str::to_string)
917            },
918        },
919        effective_configuration: config.clone(),
920        input_roots: config
921            .discovery
922            .root_paths
923            .iter()
924            .map(|p| path_to_string(p))
925            .collect(),
926        summary_totals: summary,
927        totals_by_language: language_summaries,
928        per_file_records: analyzed,
929        skipped_file_records: skipped,
930        warnings,
931        submodule_summaries,
932        git_commit_short: git.commit_short,
933        git_commit_long: git.commit_long,
934        git_branch: git.branch,
935        git_commit_author: git.author,
936        git_tags: git.tags,
937        git_nearest_tag: git.nearest_tag,
938        git_commit_date: git.commit_date,
939        git_remote_url: git.remote_url,
940        style_summary,
941    }
942}
943
944/// # Errors
945///
946/// Returns an error if the config is invalid, root paths cannot be walked, or any file
947/// analysis step fails in a way that cannot be recovered from.
948#[allow(clippy::too_many_lines)]
949pub fn analyze(
950    config: &AppConfig,
951    runtime_mode: &str,
952    cancel: Option<&AtomicBool>,
953    progress: Option<&ProgressCounters>,
954) -> Result<AnalysisRun> {
955    config.validate()?;
956
957    if config.discovery.root_paths.is_empty() {
958        anyhow::bail!("no input paths were provided");
959    }
960
961    let include_globs = compile_globset(&config.discovery.include_globs)?;
962    let exclude_globs = compile_globset(&config.discovery.exclude_globs)?;
963    let enabled_languages = parse_enabled_languages(&config.analysis.enabled_languages)?;
964
965    let mut analyzed = Vec::new();
966    let mut skipped = Vec::new();
967    let mut warnings = Vec::new();
968    let mut seen_paths = HashSet::new();
969
970    for root in &config.discovery.root_paths {
971        if cancel.is_some_and(|c| c.load(Ordering::Relaxed)) {
972            anyhow::bail!("analysis cancelled");
973        }
974
975        let root = root.canonicalize().unwrap_or_else(|_| root.clone());
976
977        if root.is_file() {
978            if let Some(record) = analyze_candidate_file(
979                &root,
980                root.parent().unwrap_or_else(|| Path::new(".")),
981                config,
982                include_globs.as_ref(),
983                exclude_globs.as_ref(),
984                enabled_languages.as_ref(),
985            )? {
986                push_record(record, &mut analyzed, &mut skipped, &mut warnings);
987            }
988            continue;
989        }
990
991        walk_root(
992            &root,
993            config,
994            include_globs.as_ref(),
995            exclude_globs.as_ref(),
996            enabled_languages.as_ref(),
997            &mut seen_paths,
998            &mut analyzed,
999            &mut skipped,
1000            &mut warnings,
1001            cancel,
1002            progress,
1003        )?;
1004    }
1005
1006    analyzed.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
1007    skipped.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
1008
1009    // Submodule detection: label each file with its submodule and build per-submodule summaries.
1010    let submodule_summaries = if config.discovery.submodule_breakdown {
1011        process_submodules(config, &mut analyzed)
1012    } else {
1013        Vec::new()
1014    };
1015
1016    attach_coverage(config, &mut analyzed, &mut warnings);
1017
1018    Ok(assemble_run(
1019        config,
1020        runtime_mode,
1021        analyzed,
1022        skipped,
1023        warnings,
1024        submodule_summaries,
1025    ))
1026}
1027
1028fn attach_coverage(config: &AppConfig, analyzed: &mut [FileRecord], warnings: &mut Vec<String>) {
1029    let Some(cov_path) = coverage::resolve_coverage_file(config.analysis.coverage_file.as_deref())
1030    else {
1031        return;
1032    };
1033    tracing::debug!(path = %cov_path.display(), "loading coverage file");
1034    match fs::read_to_string(&cov_path) {
1035        Ok(content) => {
1036            let cov_map = coverage::parse_coverage_auto(&cov_path, &content);
1037            let mut matched: u32 = 0;
1038            let mut unmatched: u32 = 0;
1039            for record in analyzed.iter_mut() {
1040                record.coverage =
1041                    coverage::lookup_coverage(&cov_map, &record.relative_path).cloned();
1042                if record.coverage.is_some() {
1043                    matched += 1;
1044                } else {
1045                    unmatched += 1;
1046                }
1047            }
1048            tracing::debug!(
1049                path = %cov_path.display(),
1050                coverage_entries = cov_map.len(),
1051                files_matched = matched,
1052                files_unmatched = unmatched,
1053                "coverage attached"
1054            );
1055            if unmatched > 0 && matched == 0 {
1056                tracing::warn!(
1057                    path = %cov_path.display(),
1058                    "coverage file loaded but no source files could be matched — check that paths in the coverage report match the scanned directory"
1059                );
1060            }
1061        }
1062        Err(e) => {
1063            tracing::warn!(path = %cov_path.display(), error = %e, "coverage file could not be read");
1064            warnings.push(format!(
1065                "coverage file '{}' could not be read: {e}",
1066                cov_path.display()
1067            ));
1068        }
1069    }
1070}
1071
1072fn push_record(
1073    record: FileRecord,
1074    analyzed: &mut Vec<FileRecord>,
1075    skipped: &mut Vec<FileRecord>,
1076    warnings: &mut Vec<String>,
1077) {
1078    warnings.extend(
1079        record
1080            .warnings
1081            .iter()
1082            .map(|warning| format!("{}: {warning}", record.relative_path)),
1083    );
1084
1085    match record.status {
1086        FileStatus::AnalyzedExact | FileStatus::AnalyzedBestEffort => analyzed.push(record),
1087        _ => skipped.push(record),
1088    }
1089}
1090
1091/// Convenience wrapper: build a boxed `Skip` outcome with a single-item warning message.
1092#[inline]
1093fn skip_with_reason(
1094    path: &Path,
1095    root: &Path,
1096    size: u64,
1097    reason: impl Into<String>,
1098) -> MetadataPolicyOutcome {
1099    MetadataPolicyOutcome::Skip(Box::new(skipped_record(
1100        path,
1101        root,
1102        size,
1103        FileStatus::SkippedByPolicy,
1104        vec![reason.into()],
1105    )))
1106}
1107
1108/// Apply metadata-level policy checks (symlink, name, dir exclusion, size, globs, lockfile).
1109/// Returns `Skip(record)` to skip, `Exclude` to omit from output entirely (include-glob miss),
1110/// or `Continue` to proceed to content checks.
1111#[allow(clippy::too_many_arguments)]
1112fn check_metadata_policy(
1113    path: &Path,
1114    root: &Path,
1115    relative_path: &str,
1116    metadata: &fs::Metadata,
1117    config: &AppConfig,
1118    include_globs: Option<&GlobSet>,
1119    exclude_globs: Option<&GlobSet>,
1120) -> MetadataPolicyOutcome {
1121    let size = metadata.len();
1122
1123    if metadata.file_type().is_symlink() && !config.discovery.follow_symlinks {
1124        return skip_with_reason(path, root, size, "symlink skipped by policy");
1125    }
1126    if file_name_eq(path, ".gitignore") {
1127        return skip_with_reason(path, root, size, ".gitignore is always excluded");
1128    }
1129    if is_excluded_dir_path(path, &config.discovery.excluded_directories) {
1130        return skip_with_reason(path, root, size, "path matched excluded directory setting");
1131    }
1132    if size > config.discovery.max_file_size_bytes {
1133        return skip_with_reason(
1134            path,
1135            root,
1136            size,
1137            format!(
1138                "file exceeded max_file_size_bytes ({})",
1139                config.discovery.max_file_size_bytes
1140            ),
1141        );
1142    }
1143    if let Some(globs) = include_globs {
1144        if !globs.is_match(Path::new(relative_path)) && !globs.is_match(path) {
1145            return MetadataPolicyOutcome::Exclude;
1146        }
1147    }
1148    if let Some(globs) = exclude_globs {
1149        if globs.is_match(Path::new(relative_path)) || globs.is_match(path) {
1150            return skip_with_reason(path, root, size, "path matched exclude glob");
1151        }
1152    }
1153    if is_known_lockfile(path) && !config.analysis.include_lockfiles {
1154        return skip_with_reason(path, root, size, "lockfile skipped by default policy");
1155    }
1156
1157    MetadataPolicyOutcome::Continue
1158}
1159
1160struct ContentPolicyResult {
1161    vendor: bool,
1162    generated: bool,
1163    minified: bool,
1164    skip_record: Option<FileRecord>,
1165}
1166
1167/// Apply content-level policy checks (vendor, generated, minified).
1168/// `skip_record` is `Some` when the file should be skipped.
1169fn check_content_policy(
1170    path: &Path,
1171    root: &Path,
1172    size_bytes: u64,
1173    bytes: &[u8],
1174    config: &AppConfig,
1175) -> ContentPolicyResult {
1176    let vendor = is_vendor_path(path);
1177    if vendor && config.analysis.vendor_directory_detection {
1178        return ContentPolicyResult {
1179            vendor,
1180            generated: false,
1181            minified: false,
1182            skip_record: Some(skipped_record(
1183                path,
1184                root,
1185                size_bytes,
1186                FileStatus::SkippedByPolicy,
1187                vec!["vendor file skipped by policy".into()],
1188            )),
1189        };
1190    }
1191
1192    let generated = config.analysis.generated_file_detection && looks_generated(path, bytes);
1193    if generated {
1194        return ContentPolicyResult {
1195            vendor,
1196            generated,
1197            minified: false,
1198            skip_record: Some(skipped_record(
1199                path,
1200                root,
1201                size_bytes,
1202                FileStatus::SkippedByPolicy,
1203                vec!["generated file skipped by policy".into()],
1204            )),
1205        };
1206    }
1207
1208    let minified = config.analysis.minified_file_detection && looks_minified(path, bytes);
1209    if minified {
1210        return ContentPolicyResult {
1211            vendor,
1212            generated,
1213            minified,
1214            skip_record: Some(skipped_record(
1215                path,
1216                root,
1217                size_bytes,
1218                FileStatus::SkippedByPolicy,
1219                vec!["minified file skipped by policy".into()],
1220            )),
1221        };
1222    }
1223
1224    ContentPolicyResult {
1225        vendor,
1226        generated,
1227        minified,
1228        skip_record: None,
1229    }
1230}
1231
1232/// Decode file bytes to a UTF-8 string, handling binary detection and decode failures.
1233fn decode_file_contents(
1234    path: &Path,
1235    root: &Path,
1236    size_bytes: u64,
1237    bytes: &[u8],
1238    config: &AppConfig,
1239) -> Result<Option<(String, String, Vec<String>)>> {
1240    if is_binary(bytes) {
1241        return match config.analysis.binary_file_behavior {
1242            BinaryFileBehavior::Skip => Ok(None),
1243            BinaryFileBehavior::Fail => {
1244                anyhow::bail!("binary file encountered: {}", path.display())
1245            }
1246        };
1247    }
1248
1249    match decode_bytes(bytes) {
1250        Ok(result) => Ok(Some(result)),
1251        Err(err) => match config.analysis.decode_failure_behavior {
1252            FailureBehavior::WarnSkip => {
1253                // Caller will handle the None as a SkippedDecodeError record.
1254                // We use a sentinel: return Ok(None) but encode the error into a field.
1255                // Instead, propagate as a skipped record via the caller.
1256                let _ = (path, root, size_bytes); // suppress unused warnings
1257                Err(anyhow::anyhow!("__decode_warn__: {err}"))
1258            }
1259            FailureBehavior::Fail => {
1260                anyhow::bail!("decode failure for {}: {err}", path.display())
1261            }
1262        },
1263    }
1264}
1265
1266#[allow(clippy::too_many_lines)]
1267fn analyze_candidate_file(
1268    path: &Path,
1269    root: &Path,
1270    config: &AppConfig,
1271    include_globs: Option<&GlobSet>,
1272    exclude_globs: Option<&GlobSet>,
1273    enabled_languages: Option<&BTreeSet<Language>>,
1274) -> Result<Option<FileRecord>> {
1275    let metadata = match fs::symlink_metadata(path) {
1276        Ok(metadata) => metadata,
1277        Err(err) => {
1278            return Ok(Some(skipped_record(
1279                path,
1280                root,
1281                0,
1282                FileStatus::ErrorInternal,
1283                vec![format!("failed to read metadata: {err}")],
1284            )));
1285        }
1286    };
1287
1288    let relative_path = relative_path_string(path, root);
1289
1290    // Metadata-level policy checks.
1291    match check_metadata_policy(
1292        path,
1293        root,
1294        &relative_path,
1295        &metadata,
1296        config,
1297        include_globs,
1298        exclude_globs,
1299    ) {
1300        MetadataPolicyOutcome::Skip(record) => return Ok(Some(*record)),
1301        MetadataPolicyOutcome::Exclude => return Ok(None),
1302        MetadataPolicyOutcome::Continue => {}
1303    }
1304
1305    let bytes = match fs::read(path) {
1306        Ok(bytes) => bytes,
1307        Err(err) => {
1308            return Ok(Some(skipped_record(
1309                path,
1310                root,
1311                metadata.len(),
1312                FileStatus::ErrorInternal,
1313                vec![format!("failed to read file: {err}")],
1314            )));
1315        }
1316    };
1317
1318    // Content-level policy checks (vendor, generated, minified).
1319    let content_policy = check_content_policy(path, root, metadata.len(), &bytes, config);
1320    if let Some(record) = content_policy.skip_record {
1321        return Ok(Some(record));
1322    }
1323    let (vendor, generated, minified) = (
1324        content_policy.vendor,
1325        content_policy.generated,
1326        content_policy.minified,
1327    );
1328
1329    // Decode content, handling binary and decode failures.
1330    let (text, encoding, decode_warnings) =
1331        match decode_file_contents(path, root, metadata.len(), &bytes, config) {
1332            Ok(Some(result)) => result,
1333            Ok(None) => {
1334                return Ok(Some(skipped_record(
1335                    path,
1336                    root,
1337                    metadata.len(),
1338                    FileStatus::SkippedBinary,
1339                    vec!["binary file skipped by default".into()],
1340                )));
1341            }
1342            Err(err) => {
1343                let msg = err.to_string();
1344                if let Some(warn_msg) = msg.strip_prefix("__decode_warn__: ") {
1345                    return Ok(Some(skipped_record(
1346                        path,
1347                        root,
1348                        metadata.len(),
1349                        FileStatus::SkippedDecodeError,
1350                        vec![warn_msg.to_string()],
1351                    )));
1352                }
1353                return Err(err);
1354            }
1355        };
1356
1357    let first_line = text.lines().next();
1358    let language = detect_language(
1359        path,
1360        first_line,
1361        &config.analysis.extension_overrides,
1362        config.analysis.shebang_detection,
1363    );
1364
1365    let Some(language) = language else {
1366        return Ok(Some(skipped_record(
1367            path,
1368            root,
1369            metadata.len(),
1370            FileStatus::SkippedUnsupported,
1371            vec!["unsupported or undetected language".into()],
1372        )));
1373    };
1374
1375    if let Some(enabled) = enabled_languages {
1376        if !enabled.contains(&language) {
1377            return Ok(Some(skipped_record(
1378                path,
1379                root,
1380                metadata.len(),
1381                FileStatus::SkippedByPolicy,
1382                vec![format!(
1383                    "language {} disabled by configuration",
1384                    language.display_name()
1385                )],
1386            )));
1387        }
1388    }
1389
1390    let ieee_opts = AnalysisOptions {
1391        blank_in_block_comment_as_comment: config.analysis.blank_in_block_comment_policy
1392            == BlankInBlockCommentPolicy::CountAsComment,
1393        collapse_continuation_lines: config.analysis.continuation_line_policy
1394            == ContinuationLinePolicy::CollapseToLogical,
1395    };
1396    let analysis = analyze_text(language, &text, ieee_opts);
1397    let effective_counts = compute_effective_counts(
1398        &analysis.raw,
1399        config.analysis.mixed_line_policy,
1400        config.analysis.python_docstrings_as_comments,
1401        config.analysis.count_compiler_directives,
1402    );
1403
1404    let mut warnings = decode_warnings;
1405    warnings.extend(analysis.warnings.clone());
1406
1407    Ok(Some(FileRecord {
1408        path: path_to_string(path),
1409        relative_path,
1410        language: Some(language),
1411        size_bytes: metadata.len(),
1412        detected_encoding: Some(encoding),
1413        raw_line_categories: analysis.raw,
1414        effective_counts,
1415        status: match analysis.parse_mode {
1416            ParseMode::Lexical | ParseMode::TreeSitter => FileStatus::AnalyzedExact,
1417            ParseMode::LexicalBestEffort => FileStatus::AnalyzedBestEffort,
1418        },
1419        warnings,
1420        generated,
1421        minified,
1422        vendor,
1423        parse_mode: Some(analysis.parse_mode),
1424        submodule: None,
1425        coverage: None,
1426        style_analysis: analysis.style_analysis,
1427    }))
1428}
1429
1430const fn compute_effective_counts(
1431    raw: &RawLineCounts,
1432    mixed_line_policy: MixedLinePolicy,
1433    python_docstrings_as_comments: bool,
1434    count_compiler_directives: bool,
1435) -> EffectiveCounts {
1436    let mut effective = EffectiveCounts {
1437        code_lines: raw.code_only_lines,
1438        comment_lines: raw.single_comment_only_lines + raw.multi_comment_only_lines,
1439        blank_lines: raw.blank_only_lines,
1440        mixed_lines_separate: 0,
1441    };
1442
1443    if python_docstrings_as_comments {
1444        effective.comment_lines += raw.docstring_comment_lines;
1445    } else {
1446        effective.code_lines += raw.docstring_comment_lines;
1447    }
1448
1449    let mixed_total = raw.mixed_code_single_comment_lines + raw.mixed_code_multi_comment_lines;
1450    match mixed_line_policy {
1451        MixedLinePolicy::CodeOnly => effective.code_lines += mixed_total,
1452        MixedLinePolicy::CodeAndComment => {
1453            effective.code_lines += mixed_total;
1454            effective.comment_lines += mixed_total;
1455        }
1456        MixedLinePolicy::CommentOnly => effective.comment_lines += mixed_total,
1457        MixedLinePolicy::SeparateMixedCategory => effective.mixed_lines_separate += mixed_total,
1458    }
1459
1460    // IEEE 1045-1992 §4.2: optionally exclude preprocessor/compiler directives from code SLOC.
1461    // compiler_directive_lines is a subset of code_only_lines, so subtract it directly.
1462    if !count_compiler_directives {
1463        effective.code_lines = effective
1464            .code_lines
1465            .saturating_sub(raw.compiler_directive_lines);
1466    }
1467
1468    effective
1469}
1470
1471fn build_summary(analyzed: &[FileRecord], skipped: &[FileRecord]) -> SummaryTotals {
1472    let mut summary = SummaryTotals {
1473        files_considered: (analyzed.len() + skipped.len()) as u64,
1474        files_analyzed: analyzed.len() as u64,
1475        files_skipped: skipped.len() as u64,
1476        ..Default::default()
1477    };
1478
1479    for record in analyzed {
1480        summary.total_physical_lines += record.raw_line_categories.total_physical_lines;
1481        summary.code_lines += record.effective_counts.code_lines;
1482        summary.comment_lines += record.effective_counts.comment_lines;
1483        summary.blank_lines += record.effective_counts.blank_lines;
1484        summary.mixed_lines_separate += record.effective_counts.mixed_lines_separate;
1485        summary.functions += record.raw_line_categories.functions;
1486        summary.classes += record.raw_line_categories.classes;
1487        summary.variables += record.raw_line_categories.variables;
1488        summary.imports += record.raw_line_categories.imports;
1489        summary.test_count += record.raw_line_categories.test_count;
1490        summary.test_assertion_count += record.raw_line_categories.test_assertion_count;
1491        summary.test_suite_count += record.raw_line_categories.test_suite_count;
1492        if let Some(cov) = &record.coverage {
1493            summary.coverage_lines_found += u64::from(cov.lines_found);
1494            summary.coverage_lines_hit += u64::from(cov.lines_hit);
1495            summary.coverage_functions_found += u64::from(cov.functions_found);
1496            summary.coverage_functions_hit += u64::from(cov.functions_hit);
1497            summary.coverage_branches_found += u64::from(cov.branches_found);
1498            summary.coverage_branches_hit += u64::from(cov.branches_hit);
1499        }
1500    }
1501
1502    summary
1503}
1504
1505/// Construct a zero-filled `LanguageSummary` for the given language.
1506const fn zeroed_summary(language: Language) -> LanguageSummary {
1507    LanguageSummary {
1508        language,
1509        files: 0,
1510        total_physical_lines: 0,
1511        code_lines: 0,
1512        comment_lines: 0,
1513        blank_lines: 0,
1514        mixed_lines_separate: 0,
1515        functions: 0,
1516        classes: 0,
1517        variables: 0,
1518        imports: 0,
1519        test_count: 0,
1520        test_assertion_count: 0,
1521        test_suite_count: 0,
1522        coverage_lines_found: 0,
1523        coverage_lines_hit: 0,
1524        coverage_functions_found: 0,
1525        coverage_functions_hit: 0,
1526        coverage_branches_found: 0,
1527        coverage_branches_hit: 0,
1528    }
1529}
1530
1531/// Accumulate all per-file counters from `record` into an existing `LanguageSummary`.
1532fn accumulate_record_into_summary(entry: &mut LanguageSummary, record: &FileRecord) {
1533    entry.files += 1;
1534    let r = &record.raw_line_categories;
1535    entry.total_physical_lines += r.total_physical_lines;
1536    entry.code_lines += record.effective_counts.code_lines;
1537    entry.comment_lines += record.effective_counts.comment_lines;
1538    entry.blank_lines += record.effective_counts.blank_lines;
1539    entry.mixed_lines_separate += record.effective_counts.mixed_lines_separate;
1540    entry.functions += r.functions;
1541    entry.classes += r.classes;
1542    entry.variables += r.variables;
1543    entry.imports += r.imports;
1544    entry.test_count += r.test_count;
1545    entry.test_assertion_count += r.test_assertion_count;
1546    entry.test_suite_count += r.test_suite_count;
1547    if let Some(cov) = &record.coverage {
1548        entry.coverage_lines_found += u64::from(cov.lines_found);
1549        entry.coverage_lines_hit += u64::from(cov.lines_hit);
1550        entry.coverage_functions_found += u64::from(cov.functions_found);
1551        entry.coverage_functions_hit += u64::from(cov.functions_hit);
1552        entry.coverage_branches_found += u64::from(cov.branches_found);
1553        entry.coverage_branches_hit += u64::from(cov.branches_hit);
1554    }
1555}
1556
1557fn build_language_summaries(analyzed: &[FileRecord]) -> Vec<LanguageSummary> {
1558    let mut by_language: BTreeMap<Language, LanguageSummary> = BTreeMap::new();
1559    for record in analyzed {
1560        let Some(language) = record.language else {
1561            continue;
1562        };
1563        let entry = by_language
1564            .entry(language)
1565            .or_insert_with(|| zeroed_summary(language));
1566        accumulate_record_into_summary(entry, record);
1567    }
1568    by_language.into_values().collect()
1569}
1570
1571fn skipped_record(
1572    path: &Path,
1573    root: &Path,
1574    size_bytes: u64,
1575    status: FileStatus,
1576    warnings: Vec<String>,
1577) -> FileRecord {
1578    FileRecord {
1579        path: path_to_string(path),
1580        relative_path: relative_path_string(path, root),
1581        language: None,
1582        size_bytes,
1583        detected_encoding: None,
1584        raw_line_categories: RawLineCounts::default(),
1585        effective_counts: EffectiveCounts::default(),
1586        status,
1587        warnings,
1588        generated: false,
1589        minified: false,
1590        vendor: false,
1591        parse_mode: None,
1592        submodule: None,
1593        coverage: None,
1594        style_analysis: None,
1595    }
1596}
1597
1598fn relative_path_string(path: &Path, root: &Path) -> String {
1599    path.strip_prefix(root)
1600        .unwrap_or(path)
1601        .to_string_lossy()
1602        .replace('\\', "/")
1603}
1604
1605fn path_to_string(path: &Path) -> String {
1606    path.to_string_lossy().replace('\\', "/")
1607}
1608
1609/// Parse `.gitmodules` in `root` and return `(name, relative_path)` for each submodule found.
1610#[must_use]
1611pub fn detect_submodules(root: &Path) -> Vec<(String, PathBuf)> {
1612    let gitmodules = root.join(".gitmodules");
1613    if !gitmodules.is_file() {
1614        return Vec::new();
1615    }
1616    let Ok(content) = fs::read_to_string(&gitmodules) else {
1617        return Vec::new();
1618    };
1619
1620    let mut result = Vec::new();
1621    let mut current_name: Option<String> = None;
1622    let mut current_path: Option<PathBuf> = None;
1623
1624    for line in content.lines() {
1625        let trimmed = line.trim();
1626        if trimmed.starts_with("[submodule \"") && trimmed.ends_with("\"]") {
1627            if let (Some(name), Some(path)) = (current_name.take(), current_path.take()) {
1628                result.push((name, path));
1629            }
1630            let name = trimmed["[submodule \"".len()..trimmed.len() - 2].to_string();
1631            current_name = Some(name);
1632        } else if let Some(rest) = trimmed.strip_prefix("path") {
1633            if let Some(eq_pos) = rest.find('=') {
1634                let path_str = rest[eq_pos + 1..].trim();
1635                current_path = Some(PathBuf::from(path_str));
1636            }
1637        }
1638    }
1639    if let (Some(name), Some(path)) = (current_name, current_path) {
1640        result.push((name, path));
1641    }
1642
1643    result
1644}
1645
1646fn build_submodule_summaries(
1647    analyzed: &[FileRecord],
1648    submodules: &[(String, PathBuf)],
1649) -> Vec<SubmoduleSummary> {
1650    submodules
1651        .iter()
1652        .map(|(name, path)| {
1653            let files: Vec<&FileRecord> = analyzed
1654                .iter()
1655                .filter(|f| f.submodule.as_deref() == Some(name.as_str()))
1656                .collect();
1657
1658            let files_analyzed = files.len() as u64;
1659            let total_physical_lines = files
1660                .iter()
1661                .map(|f| f.raw_line_categories.total_physical_lines)
1662                .sum();
1663            let code_lines = files.iter().map(|f| f.effective_counts.code_lines).sum();
1664            let comment_lines = files.iter().map(|f| f.effective_counts.comment_lines).sum();
1665            let blank_lines = files.iter().map(|f| f.effective_counts.blank_lines).sum();
1666            let language_summaries = build_language_summaries_from_slice(&files);
1667
1668            SubmoduleSummary {
1669                name: name.clone(),
1670                relative_path: path.to_string_lossy().replace('\\', "/"),
1671                files_analyzed,
1672                total_physical_lines,
1673                code_lines,
1674                comment_lines,
1675                blank_lines,
1676                language_summaries,
1677            }
1678        })
1679        .filter(|s| s.files_analyzed > 0)
1680        .collect()
1681}
1682
1683/// Dominant indent label from vote counts.
1684#[allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
1685fn dominant_indent_label(files: &[&StyleAnalysis]) -> String {
1686    let mut votes = [0u32; 6];
1687    for f in files {
1688        let idx = match f.indent_style {
1689            IndentStyle::Tabs => 0,
1690            IndentStyle::Spaces2 => 1,
1691            IndentStyle::Spaces4 => 2,
1692            IndentStyle::Spaces8 => 3,
1693            IndentStyle::Mixed => 4,
1694            IndentStyle::Unknown => 5,
1695        };
1696        votes[idx] += 1;
1697    }
1698    let labels = ["Tabs", "2-Space", "4-Space", "8-Space", "Mixed", "\u{2014}"];
1699    labels[votes
1700        .iter()
1701        .enumerate()
1702        .max_by_key(|(_, v)| *v)
1703        .map(|(i, _)| i)
1704        .unwrap_or(5)]
1705    .to_string()
1706}
1707
1708/// Line-80 compliance percentage for a slice of style analyses.
1709#[allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
1710fn line80_pct(files: &[&StyleAnalysis]) -> u8 {
1711    if files.is_empty() {
1712        return 0;
1713    }
1714    let compliant = files
1715        .iter()
1716        .filter(|f| f.total_lines == 0 || (f.lines_over_80 as f32 / f.total_lines as f32) <= 0.05)
1717        .count() as u32;
1718    ((compliant * 100) / files.len() as u32) as u8
1719}
1720
1721/// Column-N compliance percentage using the configured threshold (80, 100, or 120).
1722/// Falls back to the 80-col bucket for any threshold ≤ 80.
1723#[allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
1724fn line_col_pct(files: &[&StyleAnalysis], threshold: u16) -> u8 {
1725    if files.is_empty() {
1726        return 0;
1727    }
1728    let compliant = files
1729        .iter()
1730        .filter(|f| {
1731            let over = if threshold <= 80 {
1732                f.lines_over_80
1733            } else if threshold <= 100 {
1734                f.lines_over_100
1735            } else {
1736                f.lines_over_120
1737            };
1738            f.total_lines == 0 || (over as f32 / f.total_lines as f32) <= 0.05
1739        })
1740        .count() as u32;
1741    ((compliant * 100) / files.len() as u32) as u8
1742}
1743
1744/// Build a `LanguageStyleGroup` from a non-empty slice of `StyleAnalysis` for one family.
1745#[allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
1746fn build_language_group(
1747    family: &str,
1748    files: &[&StyleAnalysis],
1749    col_threshold: u16,
1750) -> LanguageStyleGroup {
1751    let count = files.len() as u32;
1752
1753    // Collect every unique guide name across all files in this group.
1754    let mut all_names: Vec<String> = Vec::new();
1755    for f in files {
1756        for g in &f.guide_scores {
1757            if !all_names.contains(&g.name) {
1758                all_names.push(g.name.clone());
1759            }
1760        }
1761    }
1762
1763    let mut guide_avg_scores: Vec<(String, u8)> = all_names
1764        .into_iter()
1765        .map(|name| {
1766            let sum: u32 = files
1767                .iter()
1768                .filter_map(|f| f.guide_scores.iter().find(|g| g.name == name))
1769                .map(|g| u32::from(g.score_pct))
1770                .sum();
1771            let avg = (sum / count) as u8;
1772            (name, avg)
1773        })
1774        .collect();
1775    guide_avg_scores.sort_by_key(|s| std::cmp::Reverse(s.1));
1776
1777    let (dominant_guide, dominant_score_pct) = guide_avg_scores
1778        .first()
1779        .map(|(n, s)| (n.clone(), *s))
1780        .unwrap_or_default();
1781
1782    let lcp = line_col_pct(files, col_threshold);
1783    LanguageStyleGroup {
1784        language_family: family.to_string(),
1785        files_count: count,
1786        dominant_guide,
1787        dominant_score_pct,
1788        common_indent_style: dominant_indent_label(files),
1789        guide_avg_scores,
1790        line80_compliant_pct: line80_pct(files),
1791        line_col_compliant_pct: lcp,
1792    }
1793}
1794
1795/// Build aggregate multi-language style-guide adherence.
1796/// Returns `None` when no files had style data.
1797#[allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
1798fn build_style_summary(analyzed: &[FileRecord], col_threshold: u16) -> Option<StyleSummary> {
1799    let all_style: Vec<&StyleAnalysis> = analyzed
1800        .iter()
1801        .filter_map(|f| f.style_analysis.as_ref())
1802        .collect();
1803
1804    if all_style.is_empty() {
1805        return None;
1806    }
1807
1808    // Group by language_family.
1809    let mut families: std::collections::BTreeMap<&str, Vec<&StyleAnalysis>> =
1810        std::collections::BTreeMap::new();
1811    for sa in &all_style {
1812        families
1813            .entry(sa.language_family.as_str())
1814            .or_default()
1815            .push(sa);
1816    }
1817
1818    let mut by_language: Vec<LanguageStyleGroup> = families
1819        .iter()
1820        .map(|(family, files)| build_language_group(family, files, col_threshold))
1821        .collect();
1822    by_language.sort_by_key(|g| std::cmp::Reverse(g.files_count));
1823
1824    let files_analyzed = all_style.len() as u32;
1825    let common_indent_style = dominant_indent_label(&all_style);
1826    let line80_compliant_pct = line80_pct(&all_style);
1827    let line_col_compliant_pct = line_col_pct(&all_style, col_threshold);
1828
1829    Some(StyleSummary {
1830        files_analyzed,
1831        common_indent_style,
1832        line80_compliant_pct,
1833        line_col_compliant_pct,
1834        col_threshold,
1835        by_language,
1836    })
1837}
1838
1839fn build_language_summaries_from_slice(files: &[&FileRecord]) -> Vec<LanguageSummary> {
1840    let mut map: BTreeMap<String, LanguageSummary> = BTreeMap::new();
1841    for file in files {
1842        let Some(lang) = file.language else { continue };
1843        let entry = map
1844            .entry(lang.display_name().to_string())
1845            .or_insert_with(|| zeroed_summary(lang));
1846        accumulate_record_into_summary(entry, file);
1847    }
1848    map.into_values().collect()
1849}
1850
1851fn file_name_eq(path: &Path, expected: &str) -> bool {
1852    path.file_name()
1853        .and_then(|name| name.to_str())
1854        .is_some_and(|name| name == expected)
1855}
1856
1857fn is_excluded_dir_path(path: &Path, excluded_dirs: &[String]) -> bool {
1858    path.components().any(|component| {
1859        component
1860            .as_os_str()
1861            .to_str()
1862            .is_some_and(|part| excluded_dirs.iter().any(|excluded| excluded == part))
1863    })
1864}
1865
1866fn is_vendor_path(path: &Path) -> bool {
1867    path.components().any(|component| {
1868        component
1869            .as_os_str()
1870            .to_str()
1871            .is_some_and(|part| matches!(part, "vendor" | "node_modules" | "packages"))
1872    })
1873}
1874
1875fn is_known_lockfile(path: &Path) -> bool {
1876    path.file_name()
1877        .and_then(|name| name.to_str())
1878        .is_some_and(|name| {
1879            matches!(
1880                name,
1881                "Cargo.lock"
1882                    | "package-lock.json"
1883                    | "yarn.lock"
1884                    | "pnpm-lock.yaml"
1885                    | "Pipfile.lock"
1886                    | "poetry.lock"
1887                    | "composer.lock"
1888            )
1889        })
1890}
1891
1892fn looks_generated(path: &Path, bytes: &[u8]) -> bool {
1893    let file_name = path
1894        .file_name()
1895        .and_then(|name| name.to_str())
1896        .unwrap_or_default();
1897    if file_name.contains(".generated.") || file_name.contains(".g.") {
1898        return true;
1899    }
1900
1901    let sample = String::from_utf8_lossy(&bytes[..bytes.len().min(GENERATED_SAMPLE_BYTES)])
1902        .to_ascii_lowercase();
1903    sample.contains("@generated") || sample.contains("generated by")
1904}
1905
1906fn looks_minified(path: &Path, bytes: &[u8]) -> bool {
1907    let file_name = path
1908        .file_name()
1909        .and_then(|name| name.to_str())
1910        .unwrap_or_default();
1911    if file_name.contains(".min.") {
1912        return true;
1913    }
1914
1915    let sample = String::from_utf8_lossy(&bytes[..bytes.len().min(MINIFIED_SAMPLE_BYTES)]);
1916    let longest_line = sample.lines().map(str::len).max().unwrap_or(0);
1917    let whitespace = sample.chars().filter(|c| c.is_whitespace()).count();
1918    longest_line > MINIFIED_LINE_THRESHOLD && whitespace * 100 < sample.len().max(1)
1919}
1920
1921fn is_binary(bytes: &[u8]) -> bool {
1922    if bytes.starts_with(&[0xEF, 0xBB, 0xBF])
1923        || bytes.starts_with(&[0xFF, 0xFE])
1924        || bytes.starts_with(&[0xFE, 0xFF])
1925    {
1926        return false;
1927    }
1928
1929    let sample = &bytes[..bytes.len().min(BINARY_SAMPLE_BYTES)];
1930    sample.contains(&0)
1931}
1932
1933/// Decode a BOM-stripped UTF-16 byte slice using the given encoding.
1934/// Returns `(text, encoding_label, warnings)`.
1935fn decode_utf16_bom(
1936    bom_stripped: &[u8],
1937    encoding: &'static encoding_rs::Encoding,
1938    label: &str,
1939) -> (String, String, Vec<String>) {
1940    let (cow, _, had_errors) = encoding.decode(bom_stripped);
1941    let mut warnings = Vec::new();
1942    if had_errors {
1943        warnings.push(format!("{label} decode contained replacement characters"));
1944    }
1945    (cow.into_owned(), label.into(), warnings)
1946}
1947
1948fn decode_bytes(bytes: &[u8]) -> std::result::Result<(String, String, Vec<String>), String> {
1949    if bytes.starts_with(&[0xEF, 0xBB, 0xBF]) {
1950        let text = String::from_utf8(bytes[3..].to_vec()).map_err(|err| err.to_string())?;
1951        return Ok((text, "utf-8-bom".into(), vec![]));
1952    }
1953    if bytes.starts_with(&[0xFF, 0xFE]) {
1954        return Ok(decode_utf16_bom(&bytes[2..], UTF_16LE, "utf-16le"));
1955    }
1956    if bytes.starts_with(&[0xFE, 0xFF]) {
1957        return Ok(decode_utf16_bom(&bytes[2..], UTF_16BE, "utf-16be"));
1958    }
1959
1960    // Multiple statements in the else branch make map_or_else awkward here.
1961    #[allow(clippy::option_if_let_else)]
1962    if let Ok(text) = String::from_utf8(bytes.to_vec()) {
1963        Ok((text, "utf-8".into(), vec![]))
1964    } else {
1965        let (cow, _, had_errors) = WINDOWS_1252.decode(bytes);
1966        let mut warnings = vec!["decoded using windows-1252 fallback".into()];
1967        if had_errors {
1968            warnings.push("fallback decode contained replacement characters".into());
1969        }
1970        Ok((cow.into_owned(), "windows-1252".into(), warnings))
1971    }
1972}
1973
1974fn compile_globset(patterns: &[String]) -> Result<Option<GlobSet>> {
1975    if patterns.is_empty() {
1976        return Ok(None);
1977    }
1978
1979    let mut builder = GlobSetBuilder::new();
1980    for pattern in patterns {
1981        builder
1982            .add(Glob::new(pattern).with_context(|| format!("invalid glob pattern: {pattern}"))?);
1983    }
1984    Ok(Some(
1985        builder.build().context("failed to compile glob filters")?,
1986    ))
1987}
1988
1989fn parse_enabled_languages(enabled: &[String]) -> Result<Option<BTreeSet<Language>>> {
1990    if enabled.is_empty() {
1991        return Ok(None);
1992    }
1993
1994    let supported = supported_languages();
1995    let mut set = BTreeSet::new();
1996    for name in enabled {
1997        let language = Language::from_name(name)
1998            .with_context(|| format!("unsupported language in config: {name}"))?;
1999        if !supported.contains(&language) {
2000            anyhow::bail!("language {name} is not supported in this build");
2001        }
2002        set.insert(language);
2003    }
2004    Ok(Some(set))
2005}
2006
2007/// # Errors
2008///
2009/// Returns an error if serialization fails or the output file cannot be written.
2010pub fn write_json(run: &AnalysisRun, output_path: &Path) -> Result<()> {
2011    let json = serde_json::to_string_pretty(run).context("failed to serialize analysis run")?;
2012    fs::write(output_path, json)
2013        .with_context(|| format!("failed to write JSON output to {}", output_path.display()))
2014}
2015
2016/// # Errors
2017///
2018/// Returns an error if the file cannot be read or the JSON cannot be parsed.
2019pub fn read_json(path: &Path) -> Result<AnalysisRun> {
2020    let contents = fs::read_to_string(path)
2021        .with_context(|| format!("failed to read result file {}", path.display()))?;
2022    serde_json::from_str(&contents)
2023        .with_context(|| format!("failed to parse JSON result {}", path.display()))
2024}
2025
2026#[cfg(test)]
2027mod tests {
2028    use super::*;
2029
2030    #[test]
2031    fn effective_counts_respect_code_only_policy() {
2032        let raw = RawLineCounts {
2033            code_only_lines: 2,
2034            single_comment_only_lines: 1,
2035            mixed_code_single_comment_lines: 3,
2036            docstring_comment_lines: 2,
2037            ..RawLineCounts::default()
2038        };
2039        let counts = compute_effective_counts(&raw, MixedLinePolicy::CodeOnly, true, true);
2040        assert_eq!(counts.code_lines, 5);
2041        assert_eq!(counts.comment_lines, 3);
2042    }
2043
2044    #[test]
2045    fn effective_counts_can_separate_mixed() {
2046        let raw = RawLineCounts {
2047            mixed_code_single_comment_lines: 2,
2048            mixed_code_multi_comment_lines: 1,
2049            ..RawLineCounts::default()
2050        };
2051        let counts =
2052            compute_effective_counts(&raw, MixedLinePolicy::SeparateMixedCategory, true, true);
2053        assert_eq!(counts.mixed_lines_separate, 3);
2054        assert_eq!(counts.code_lines, 0);
2055        assert_eq!(counts.comment_lines, 0);
2056    }
2057
2058    #[test]
2059    fn windows_1252_fallback_decodes() {
2060        let bytes = vec![0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x96, 0x57];
2061        let (text, encoding, warnings) = decode_bytes(&bytes).unwrap();
2062        assert_eq!(encoding, "windows-1252");
2063        assert!(text.contains('–'));
2064        assert!(!warnings.is_empty());
2065    }
2066}