Skip to main content

sloc_core/
lib.rs

1// SPDX-License-Identifier: AGPL-3.0-or-later
2// Copyright (C) 2026 Nima Shafie <nimzshafie@gmail.com>
3#![allow(clippy::multiple_crate_versions)]
4
5pub mod baseline;
6pub mod coverage;
7pub mod delta;
8pub mod history;
9pub use baseline::{check_against_baseline, resolve_baselines_path, BaselineEntry, BaselineStore};
10pub use coverage::{aggregate_line_coverage, lookup_coverage, parse_lcov, FileCoverage};
11pub use delta::{compute_delta, FileChangeStatus, FileDelta, ScanComparison, SummaryDelta};
12pub use history::{RegistryEntry, ScanRegistry, ScanSummarySnapshot, WatchedDirsStore};
13
14use std::collections::{BTreeMap, BTreeSet, HashSet};
15use std::fs;
16use std::path::{Path, PathBuf};
17use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
18use std::sync::Arc;
19
20use anyhow::{Context, Result};
21use chrono::{DateTime, Utc};
22use encoding_rs::{UTF_16BE, UTF_16LE, WINDOWS_1252};
23use globset::{Glob, GlobSet, GlobSetBuilder};
24use ignore::WalkBuilder;
25use serde::{Deserialize, Serialize};
26use uuid::Uuid;
27
28use sloc_config::{
29    AppConfig, BinaryFileBehavior, BlankInBlockCommentPolicy, ContinuationLinePolicy,
30    FailureBehavior, MixedLinePolicy,
31};
32use sloc_languages::style::IndentStyle;
33use sloc_languages::{
34    analyze_text, detect_language, supported_languages, AnalysisOptions, Language, ParseMode,
35    RawLineCounts, StyleAnalysis,
36};
37
38// ── Detection sample sizes and thresholds ────────────────────────────────────
39
40/// Maximum number of worker threads used for parallel file analysis.
41const MAX_ANALYSIS_THREADS: usize = 16;
42/// Fallback thread count when `available_parallelism` is unavailable.
43const DEFAULT_ANALYSIS_THREADS: usize = 4;
44/// Byte sample used to detect `@generated` markers.
45const GENERATED_SAMPLE_BYTES: usize = 1024;
46/// Byte sample used to detect minified files via line-length heuristic.
47const MINIFIED_SAMPLE_BYTES: usize = 4096;
48/// Longest line length above which a file is considered minified.
49const MINIFIED_LINE_THRESHOLD: usize = 2000;
50/// Byte sample used to detect binary files via null-byte scan.
51const BINARY_SAMPLE_BYTES: usize = 8192;
52
53/// Atomics shared between `analyze()` and the caller so the caller can poll scan progress.
54pub struct ProgressCounters {
55    /// Number of candidate files processed so far (incremented per file, across all threads).
56    pub files_done: Arc<AtomicUsize>,
57    /// Total candidate files discovered (set before parallel analysis begins).
58    pub files_total: Arc<AtomicUsize>,
59}
60
61/// Three-way outcome for metadata-level policy checks.
62enum MetadataPolicyOutcome {
63    /// Skip this file — include the record in output.
64    Skip(Box<FileRecord>),
65    /// Exclude this file entirely — no record in output (include-glob miss).
66    Exclude,
67    /// Continue to content checks.
68    Continue,
69}
70
71#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
72#[serde(rename_all = "snake_case")]
73pub enum FileStatus {
74    AnalyzedExact,
75    AnalyzedBestEffort,
76    SkippedBinary,
77    SkippedDecodeError,
78    SkippedUnsupported,
79    SkippedByPolicy,
80    ErrorInternal,
81}
82
83#[derive(Debug, Clone, Serialize, Deserialize, Default)]
84pub struct EffectiveCounts {
85    pub code_lines: u64,
86    pub comment_lines: u64,
87    pub blank_lines: u64,
88    pub mixed_lines_separate: u64,
89}
90
91#[derive(Debug, Clone, Serialize, Deserialize)]
92pub struct ToolMetadata {
93    pub name: String,
94    pub version: String,
95    pub run_id: String,
96    pub timestamp_utc: DateTime<Utc>,
97}
98
99#[derive(Debug, Clone, Serialize, Deserialize)]
100pub struct EnvironmentMetadata {
101    pub operating_system: String,
102    pub architecture: String,
103    pub runtime_mode: String,
104    pub initiator_username: String,
105    pub initiator_hostname: String,
106    /// CI system name when the scan runs inside a known CI environment (Jenkins,
107    /// GitHub Actions, GitLab CI, …). `None` for interactive / local runs.
108    #[serde(default, skip_serializing_if = "Option::is_none")]
109    pub ci_name: Option<String>,
110}
111
112#[derive(Debug, Clone, Serialize, Deserialize, Default)]
113pub struct SummaryTotals {
114    pub files_considered: u64,
115    pub files_analyzed: u64,
116    pub files_skipped: u64,
117    pub total_physical_lines: u64,
118    pub code_lines: u64,
119    pub comment_lines: u64,
120    pub blank_lines: u64,
121    pub mixed_lines_separate: u64,
122    #[serde(default)]
123    pub functions: u64,
124    #[serde(default)]
125    pub classes: u64,
126    #[serde(default)]
127    pub variables: u64,
128    #[serde(default)]
129    pub imports: u64,
130    #[serde(default)]
131    pub test_count: u64,
132    /// Lexically detected test assertion call lines across all analyzed files.
133    #[serde(default)]
134    pub test_assertion_count: u64,
135    /// Lexically detected test suite / fixture / group declaration lines across all analyzed files.
136    #[serde(default)]
137    pub test_suite_count: u64,
138    /// Aggregated from LCOV data when provided.
139    #[serde(default)]
140    pub coverage_lines_found: u64,
141    #[serde(default)]
142    pub coverage_lines_hit: u64,
143    #[serde(default)]
144    pub coverage_functions_found: u64,
145    #[serde(default)]
146    pub coverage_functions_hit: u64,
147    #[serde(default)]
148    pub coverage_branches_found: u64,
149    #[serde(default)]
150    pub coverage_branches_hit: u64,
151}
152
153#[derive(Debug, Clone, Serialize, Deserialize)]
154pub struct LanguageSummary {
155    pub language: Language,
156    pub files: u64,
157    pub total_physical_lines: u64,
158    pub code_lines: u64,
159    pub comment_lines: u64,
160    pub blank_lines: u64,
161    pub mixed_lines_separate: u64,
162    #[serde(default)]
163    pub functions: u64,
164    #[serde(default)]
165    pub classes: u64,
166    #[serde(default)]
167    pub variables: u64,
168    #[serde(default)]
169    pub imports: u64,
170    #[serde(default)]
171    pub test_count: u64,
172    #[serde(default)]
173    pub test_assertion_count: u64,
174    #[serde(default)]
175    pub test_suite_count: u64,
176    #[serde(default)]
177    pub coverage_lines_found: u64,
178    #[serde(default)]
179    pub coverage_lines_hit: u64,
180    #[serde(default)]
181    pub coverage_functions_found: u64,
182    #[serde(default)]
183    pub coverage_functions_hit: u64,
184    #[serde(default)]
185    pub coverage_branches_found: u64,
186    #[serde(default)]
187    pub coverage_branches_hit: u64,
188}
189
190#[derive(Debug, Clone, Serialize, Deserialize)]
191pub struct FileRecord {
192    pub path: String,
193    pub relative_path: String,
194    pub language: Option<Language>,
195    pub size_bytes: u64,
196    pub detected_encoding: Option<String>,
197    pub raw_line_categories: RawLineCounts,
198    pub effective_counts: EffectiveCounts,
199    pub status: FileStatus,
200    pub warnings: Vec<String>,
201    pub generated: bool,
202    pub minified: bool,
203    pub vendor: bool,
204    pub parse_mode: Option<ParseMode>,
205    #[serde(skip_serializing_if = "Option::is_none")]
206    pub submodule: Option<String>,
207    /// Line/function/branch coverage from an external LCOV file, when provided.
208    #[serde(default, skip_serializing_if = "Option::is_none")]
209    pub coverage: Option<FileCoverage>,
210    /// Lexical style-guide adherence analysis; `None` for unsupported languages.
211    #[serde(default, skip_serializing_if = "Option::is_none")]
212    pub style_analysis: Option<StyleAnalysis>,
213}
214
215/// Per-language-family style aggregation within a `StyleSummary`.
216#[derive(Debug, Clone, Serialize, Deserialize)]
217pub struct LanguageStyleGroup {
218    /// Display label, e.g. `"C / C++"`, `"Python"`, `"JavaScript"`.
219    pub language_family: String,
220    /// Number of files in this group.
221    pub files_count: u32,
222    /// Name of the guide with the highest average adherence.
223    pub dominant_guide: String,
224    /// Average adherence of the dominant guide (0–100).
225    pub dominant_score_pct: u8,
226    /// Most common indent style across the group.
227    pub common_indent_style: String,
228    /// Average guide adherence scores (guide name, 0–100) sorted descending.
229    pub guide_avg_scores: Vec<(String, u8)>,
230    /// Percentage of files (0–100) where ≤ 5 % of lines exceed 80 chars.
231    pub line80_compliant_pct: u8,
232}
233
234/// Aggregate multi-language style-guide adherence across all analysed files.
235#[derive(Debug, Clone, Serialize, Deserialize)]
236pub struct StyleSummary {
237    /// Total files for which style data was produced.
238    pub files_analyzed: u32,
239    /// Most common indent style across *all* analysed files.
240    pub common_indent_style: String,
241    /// Percentage of all analysed files (0–100) with ≤ 5 % of lines over 80 chars.
242    pub line80_compliant_pct: u8,
243    /// Per-language-family breakdown, sorted by `files_count` descending.
244    pub by_language: Vec<LanguageStyleGroup>,
245}
246
247/// Backward-compatible alias kept so that `sloc-report` and `sloc-web` can migrate
248/// incrementally without a breaking change on the same release.
249pub type CppStyleSummary = StyleSummary;
250
251/// Per-submodule aggregated stats produced when `submodule_breakdown` is enabled.
252#[derive(Debug, Clone, Serialize, Deserialize)]
253pub struct SubmoduleSummary {
254    pub name: String,
255    pub relative_path: String,
256    pub files_analyzed: u64,
257    pub total_physical_lines: u64,
258    pub code_lines: u64,
259    pub comment_lines: u64,
260    pub blank_lines: u64,
261    pub language_summaries: Vec<LanguageSummary>,
262}
263
264#[derive(Debug, Clone, Serialize, Deserialize)]
265pub struct AnalysisRun {
266    pub tool: ToolMetadata,
267    pub environment: EnvironmentMetadata,
268    pub effective_configuration: AppConfig,
269    pub input_roots: Vec<String>,
270    pub summary_totals: SummaryTotals,
271    pub totals_by_language: Vec<LanguageSummary>,
272    pub per_file_records: Vec<FileRecord>,
273    pub skipped_file_records: Vec<FileRecord>,
274    pub warnings: Vec<String>,
275    /// Non-empty only when `discovery.submodule_breakdown` is enabled.
276    #[serde(default, skip_serializing_if = "Vec::is_empty")]
277    pub submodule_summaries: Vec<SubmoduleSummary>,
278    /// Short git commit SHA (7 chars) at scan time, if the project is a git repo.
279    #[serde(default, skip_serializing_if = "Option::is_none")]
280    pub git_commit_short: Option<String>,
281    /// Full git commit SHA at scan time, if the project is a git repo.
282    #[serde(default, skip_serializing_if = "Option::is_none")]
283    pub git_commit_long: Option<String>,
284    /// Git branch active at scan time, if the project is a git repo.
285    #[serde(default, skip_serializing_if = "Option::is_none")]
286    pub git_branch: Option<String>,
287    /// Author of the last git commit at scan time.
288    #[serde(default, skip_serializing_if = "Option::is_none")]
289    pub git_commit_author: Option<String>,
290    /// Comma-separated git tags pointing at HEAD at scan time.
291    #[serde(default, skip_serializing_if = "Option::is_none")]
292    pub git_tags: Option<String>,
293    /// Nearest ancestor release tag (output of `git describe --tags --abbrev=0`).
294    #[serde(default, skip_serializing_if = "Option::is_none")]
295    pub git_nearest_tag: Option<String>,
296    /// ISO 8601 author-date of the last git commit at scan time.
297    #[serde(default, skip_serializing_if = "Option::is_none")]
298    pub git_commit_date: Option<String>,
299    /// URL of the `origin` remote as recorded in `.git/config` at scan time.
300    #[serde(default, skip_serializing_if = "Option::is_none")]
301    pub git_remote_url: Option<String>,
302    /// Multi-language style-guide adherence; `None` when no supported files were analysed.
303    #[serde(default, skip_serializing_if = "Option::is_none")]
304    pub style_summary: Option<StyleSummary>,
305}
306
307#[derive(Default)]
308struct GitInfo {
309    commit_short: Option<String>,
310    commit_long: Option<String>,
311    branch: Option<String>,
312    author: Option<String>,
313    tags: Option<String>,
314    nearest_tag: Option<String>,
315    commit_date: Option<String>,
316    remote_url: Option<String>,
317}
318
319/// Locate the `.git` directory by walking up from `start`.
320/// Handles plain repos, worktrees (`.git` is a file with `gitdir:` pointer), and
321/// submodules. Returns `None` if no git repo is found.
322fn find_git_dir(start: &Path) -> Option<PathBuf> {
323    let mut current = Some(start);
324    while let Some(dir) = current {
325        let candidate = dir.join(".git");
326        if candidate.is_dir() {
327            return Some(candidate);
328        }
329        if candidate.is_file() {
330            if let Some(resolved) = resolve_git_file_pointer(&candidate, dir) {
331                return Some(resolved);
332            }
333        }
334        current = dir.parent();
335    }
336    None
337}
338
339/// Resolve a `.git` *file* (worktree/submodule pointer) to the absolute path it
340/// points to. Returns `None` if the file is unreadable or lacks a `gitdir:` line,
341/// or if the resolved path is not an existing directory.
342fn resolve_git_file_pointer(file: &Path, base_dir: &Path) -> Option<PathBuf> {
343    let content = fs::read_to_string(file).ok()?;
344    let ptr = content.trim().strip_prefix("gitdir: ")?;
345    // Normalise forward-slash paths to the OS separator so that Path operations
346    // (join, exists, canonicalize) work correctly on Windows.
347    let ptr_native = ptr.replace('/', std::path::MAIN_SEPARATOR_STR);
348    let resolved = if Path::new(&ptr_native).is_absolute() {
349        PathBuf::from(&ptr_native)
350    } else {
351        base_dir.join(&ptr_native)
352    };
353    // canonicalize resolves ".." components and symlinks; fall back to the
354    // un-canonicalized path if it fails (e.g. some Windows configurations
355    // return a UNC "\\?\" prefix that confuses later path operations).
356    let final_path = resolved.canonicalize().unwrap_or(resolved);
357    if final_path.is_dir() {
358        Some(final_path)
359    } else {
360        None
361    }
362}
363
364/// Resolve a git ref name (e.g. `refs/heads/main`) to a full 40-char commit SHA.
365/// Checks loose ref files first, then `packed-refs`.
366fn resolve_ref(git_dir: &Path, refname: &str) -> Option<String> {
367    // Build the OS-native path to the loose ref file by joining each
368    // forward-slash component individually.  This produces the correct
369    // separator on every platform without any manual replacement.
370    let ref_path = refname
371        .split('/')
372        .fold(git_dir.to_path_buf(), |p, c| p.join(c));
373    if ref_path.exists() {
374        let sha = fs::read_to_string(&ref_path)
375            .ok()
376            .map(|s| s.trim().to_string())
377            .filter(|s| s.len() >= 40 && s.chars().all(|c| c.is_ascii_hexdigit()));
378        if sha.is_some() {
379            return sha;
380        }
381    }
382    // Packed refs: each line is "<sha> <refname>" (lines starting with '#' are
383    // comments; lines starting with '^' are peeled tag objects to skip).
384    // str::lines() handles both \n and \r\n, so Windows line endings are fine.
385    let packed = fs::read_to_string(git_dir.join("packed-refs")).ok()?;
386    for line in packed.lines() {
387        if line.starts_with('#') || line.starts_with('^') {
388            continue;
389        }
390        let mut cols = line.splitn(2, ' ');
391        let sha = cols.next()?;
392        let name = cols.next()?.trim();
393        if name == refname {
394            return Some(sha.to_string());
395        }
396    }
397    None
398}
399
400/// Extract the URL value from a `url = <value>` git-config line, returning `None` if absent or empty.
401fn parse_url_line(line: &str) -> Option<&str> {
402    let rest = line.strip_prefix("url")?;
403    let rest = rest.trim_start_matches([' ', '\t']);
404    let url = rest.strip_prefix('=')?.trim();
405    if url.is_empty() {
406        None
407    } else {
408        Some(url)
409    }
410}
411
412/// Parse `.git/config` and return the URL of the `origin` remote, if present.
413fn read_git_remote_url(git_dir: &Path) -> Option<String> {
414    let config = fs::read_to_string(git_dir.join("config")).ok()?;
415    let mut in_origin = false;
416    for line in config.lines() {
417        let trimmed = line.trim();
418        if trimmed.starts_with('[') {
419            in_origin = trimmed == r#"[remote "origin"]"#;
420        } else if in_origin {
421            if let Some(url) = parse_url_line(trimmed) {
422                return Some(url.to_owned());
423            }
424        }
425    }
426    None
427}
428
429/// Detect git metadata by reading `.git/` files directly — no `git` executable
430/// needed. Falls back gracefully for detached HEADs, shallow clones, and missing
431/// reflogs.
432fn detect_git_for_run(project_path: &Path) -> GitInfo {
433    // Resolve the CI branch early so it can fill in any gap in git metadata.
434    let ci_branch = ci_branch_from_env();
435
436    let Some(git_dir) = find_git_dir(project_path) else {
437        // No .git directory (e.g. scanning a non-repo path in CI). Use whatever
438        // the CI system tells us about the branch.
439        return GitInfo {
440            branch: ci_branch,
441            ..GitInfo::default()
442        };
443    };
444
445    let head_raw = match fs::read_to_string(git_dir.join("HEAD")) {
446        Ok(s) => s.trim().to_string(),
447        Err(_) => {
448            return GitInfo {
449                branch: ci_branch,
450                ..GitInfo::default()
451            }
452        }
453    };
454
455    let (branch_from_head, commit_long) = head_raw.strip_prefix("ref: ").map_or_else(
456        || {
457            if head_raw.len() >= 40 && head_raw.chars().all(|c| c.is_ascii_hexdigit()) {
458                // Detached HEAD — HEAD file is the commit SHA (common in CI checkouts).
459                (None, Some(head_raw[..40].to_string()))
460            } else {
461                (None, None)
462            }
463        },
464        |refname| {
465            let branch = refname
466                .strip_prefix("refs/heads/")
467                .map(|b| b.trim().to_string());
468            let sha = resolve_ref(&git_dir, refname.trim());
469            (branch, sha)
470        },
471    );
472    // Prefer the branch name derived from the HEAD ref; fall back to the CI
473    // env var (covers detached-HEAD checkouts done by Jenkins, GitHub Actions, etc.).
474    let branch = branch_from_head.or(ci_branch);
475
476    let commit_short = commit_long
477        .as_deref()
478        .map(|s| s.chars().take(7).collect::<String>());
479
480    let author = run_git_cmd(project_path, &["log", "-1", "--format=%an", "HEAD"]);
481    let commit_date = run_git_cmd(project_path, &["log", "-1", "--format=%aI", "HEAD"]);
482    let remote_url = read_git_remote_url(&git_dir);
483
484    // Tags and nearest-tag still require git CLI — try it as a best-effort bonus
485    // but don't block on it. If git isn't available these will simply be None.
486    let tags = run_git_cmd(project_path, &["tag", "--points-at", "HEAD"]).map(|t| {
487        t.lines()
488            .filter(|l| !l.is_empty())
489            .collect::<Vec<_>>()
490            .join(", ")
491    });
492    let nearest_tag = run_git_cmd(project_path, &["describe", "--tags", "--abbrev=0", "HEAD"]);
493
494    GitInfo {
495        commit_short,
496        commit_long,
497        branch,
498        author,
499        tags,
500        nearest_tag,
501        commit_date,
502        remote_url,
503    }
504}
505
506/// Run a git command as a best-effort supplemental source.
507fn run_git_cmd(dir: &Path, args: &[&str]) -> Option<String> {
508    // Try the bare name first (works when git is on PATH), then fall back to
509    // absolute paths for service accounts that run with a stripped PATH.
510    // Unix paths silently fail on Windows and vice-versa.
511    let candidates: &[&str] = &[
512        // Works on all platforms when git is on PATH
513        "git",
514        // Common Linux / macOS install locations
515        "/usr/bin/git",
516        "/usr/local/bin/git",
517        "/opt/homebrew/bin/git",
518        // Git for Windows default installation paths
519        r"C:\Program Files\Git\cmd\git.exe",
520        r"C:\Program Files\Git\bin\git.exe",
521        r"C:\Program Files (x86)\Git\cmd\git.exe",
522    ];
523    for &exe in candidates {
524        let result = std::process::Command::new(exe)
525            .args(["-c", "safe.directory=*"])
526            .args(args)
527            .current_dir(dir)
528            .output()
529            .ok()
530            .filter(|o| o.status.success())
531            .and_then(|o| String::from_utf8(o.stdout).ok())
532            .map(|s| s.trim().to_string())
533            .filter(|s| !s.is_empty());
534        if result.is_some() {
535            return result;
536        }
537    }
538    None
539}
540
541/// Return the name of the CI system if the process is running inside one.
542fn detect_ci_system() -> Option<&'static str> {
543    let ev = |k: &str| std::env::var(k).is_ok();
544    let ev_true = |k: &str| std::env::var(k).as_deref() == Ok("true");
545    if ev("JENKINS_URL") || ev("JENKINS_HOME") || ev("BUILD_URL") {
546        return Some("Jenkins");
547    }
548    if ev_true("GITHUB_ACTIONS") {
549        return Some("GitHub Actions");
550    }
551    if ev_true("GITLAB_CI") {
552        return Some("GitLab CI");
553    }
554    if ev_true("CIRCLECI") {
555        return Some("CircleCI");
556    }
557    if ev_true("TRAVIS") {
558        return Some("Travis CI");
559    }
560    if ev_true("TF_BUILD") {
561        return Some("Azure DevOps");
562    }
563    if ev("TEAMCITY_VERSION") {
564        return Some("TeamCity");
565    }
566    None
567}
568
569/// Read the current branch name from well-known CI environment variables.
570/// Called as a fallback when the git HEAD is detached (common in CI checkouts).
571fn ci_branch_from_env() -> Option<String> {
572    const VARS: &[&str] = &[
573        "BRANCH_NAME",        // Jenkins Pipeline
574        "GIT_BRANCH",         // Jenkins Freestyle (may carry "origin/<branch>")
575        "GITHUB_REF_NAME",    // GitHub Actions
576        "CI_COMMIT_BRANCH",   // GitLab CI
577        "CIRCLE_BRANCH",      // CircleCI
578        "TRAVIS_BRANCH",      // Travis CI
579        "BUILD_SOURCEBRANCH", // Azure DevOps (may carry "refs/heads/<branch>")
580    ];
581    for &var in VARS {
582        if let Ok(val) = std::env::var(var) {
583            let val = val.trim();
584            let val = val
585                .strip_prefix("refs/heads/")
586                .or_else(|| val.strip_prefix("origin/"))
587                .unwrap_or(val);
588            if !val.is_empty() && val != "HEAD" {
589                return Some(val.to_string());
590            }
591        }
592    }
593    None
594}
595
596fn get_current_username() -> String {
597    std::env::var("USERNAME")
598        .or_else(|_| std::env::var("USER"))
599        .unwrap_or_else(|_| "unknown".to_string())
600}
601
602fn non_empty_env(var: &str) -> Option<String> {
603    let v = std::env::var(var).ok()?;
604    if v.is_empty() {
605        None
606    } else {
607        Some(v)
608    }
609}
610
611fn is_jenkins_env() -> bool {
612    std::env::var("JENKINS_URL").is_ok()
613        || std::env::var("JENKINS_HOME").is_ok()
614        || std::env::var("BUILD_URL").is_ok()
615}
616
617fn get_hostname() -> String {
618    // In CI environments prefer a human-readable agent/runner identifier over
619    // whatever hostname the container was assigned.
620    if is_jenkins_env() {
621        if let Some(n) = non_empty_env("NODE_NAME") {
622            return n;
623        }
624    }
625    if std::env::var("GITHUB_ACTIONS").as_deref() == Ok("true") {
626        if let Some(r) = non_empty_env("RUNNER_NAME") {
627            return r;
628        }
629    }
630    if std::env::var("GITLAB_CI").as_deref() == Ok("true") {
631        if let Some(r) = non_empty_env("CI_RUNNER_DESCRIPTION") {
632            return r;
633        }
634    }
635    std::env::var("COMPUTERNAME")
636        .or_else(|_| std::env::var("HOSTNAME"))
637        .or_else(|_| std::fs::read_to_string("/etc/hostname").map(|s| s.trim().to_string()))
638        .unwrap_or_else(|_| "unknown".to_string())
639}
640
641/// Walk a single directory root and collect file records into the output vectors.
642#[allow(clippy::too_many_arguments)]
643fn walk_root(
644    root: &Path,
645    config: &AppConfig,
646    include_globs: Option<&GlobSet>,
647    exclude_globs: Option<&GlobSet>,
648    enabled_languages: Option<&BTreeSet<Language>>,
649    seen_paths: &mut HashSet<PathBuf>,
650    analyzed: &mut Vec<FileRecord>,
651    skipped: &mut Vec<FileRecord>,
652    warnings: &mut Vec<String>,
653    cancel: Option<&AtomicBool>,
654    progress: Option<&ProgressCounters>,
655) -> Result<()> {
656    let mut builder = WalkBuilder::new(root);
657    builder
658        .follow_links(config.discovery.follow_symlinks)
659        .hidden(config.discovery.ignore_hidden_files)
660        .ignore(config.discovery.honor_ignore_files)
661        .parents(config.discovery.honor_ignore_files)
662        .git_ignore(config.discovery.honor_ignore_files)
663        .git_global(config.discovery.honor_ignore_files)
664        .git_exclude(config.discovery.honor_ignore_files);
665
666    let paths = collect_walk_paths(&builder, seen_paths, warnings);
667    if paths.is_empty() {
668        return Ok(());
669    }
670
671    if let Some(p) = progress {
672        p.files_total.fetch_add(paths.len(), Ordering::Relaxed);
673    }
674
675    let chunk_results = run_parallel_analysis(
676        &paths,
677        root,
678        config,
679        include_globs,
680        exclude_globs,
681        enabled_languages,
682        cancel,
683        progress,
684    )?;
685    merge_chunk_results(chunk_results, analyzed, skipped, warnings)
686}
687
688fn collect_walk_paths(
689    builder: &WalkBuilder,
690    seen_paths: &mut HashSet<PathBuf>,
691    warnings: &mut Vec<String>,
692) -> Vec<PathBuf> {
693    // build_parallel() walks the directory tree across multiple threads (work-stealing
694    // internally), which is meaningfully faster for deeply nested repos with many directories.
695    // We collect results via an MPSC channel so each walker thread sends without contention.
696    let (tx, rx) = std::sync::mpsc::channel::<std::result::Result<PathBuf, String>>();
697
698    builder.build_parallel().run(|| {
699        let tx = tx.clone();
700        Box::new(move |entry| {
701            match entry {
702                Err(e) => {
703                    let _ = tx.send(Err(format!("discovery warning: {e}")));
704                }
705                Ok(e) => {
706                    let path = e.into_path();
707                    if !path.is_dir() {
708                        let _ = tx.send(Ok(path));
709                    }
710                }
711            }
712            ignore::WalkState::Continue
713        })
714    });
715
716    // Drop the sender that the outer scope holds; the per-thread clones were dropped when
717    // run() returned (all threads finished). Dropping this last sender closes the channel.
718    drop(tx);
719
720    rx.into_iter()
721        .filter_map(|msg| match msg {
722            Ok(path) => {
723                if seen_paths.insert(path.clone()) {
724                    Some(path)
725                } else {
726                    None
727                }
728            }
729            Err(warn) => {
730                warnings.push(warn);
731                None
732            }
733        })
734        .collect()
735}
736
737/// Inner work loop executed by each analysis thread.
738#[allow(clippy::too_many_arguments)]
739fn worker_loop(
740    paths: &[PathBuf],
741    root: &Path,
742    config: &AppConfig,
743    include_globs: Option<&GlobSet>,
744    exclude_globs: Option<&GlobSet>,
745    enabled_languages: Option<&BTreeSet<Language>>,
746    cancel: Option<&AtomicBool>,
747    next_index: &AtomicUsize,
748    files_done: Option<&AtomicUsize>,
749) -> Vec<Result<Option<FileRecord>>> {
750    let mut results = Vec::new();
751    loop {
752        if cancel.is_some_and(|c| c.load(Ordering::Relaxed)) {
753            results.push(Err(anyhow::anyhow!("analysis cancelled")));
754            break;
755        }
756        let i = next_index.fetch_add(1, Ordering::Relaxed);
757        if i >= paths.len() {
758            break;
759        }
760        results.push(analyze_candidate_file(
761            &paths[i],
762            root,
763            config,
764            include_globs,
765            exclude_globs,
766            enabled_languages,
767        ));
768        if let Some(fd) = files_done {
769            fd.fetch_add(1, Ordering::Relaxed);
770        }
771    }
772    results
773}
774
775#[allow(clippy::too_many_arguments)]
776fn run_parallel_analysis(
777    paths: &[PathBuf],
778    root: &Path,
779    config: &AppConfig,
780    include_globs: Option<&GlobSet>,
781    exclude_globs: Option<&GlobSet>,
782    enabled_languages: Option<&BTreeSet<Language>>,
783    cancel: Option<&AtomicBool>,
784    progress: Option<&ProgressCounters>,
785) -> Result<Vec<Vec<Result<Option<FileRecord>>>>> {
786    let thread_count = std::thread::available_parallelism().map_or(DEFAULT_ANALYSIS_THREADS, |n| {
787        n.get().min(MAX_ANALYSIS_THREADS)
788    });
789    // Shared work-queue index: each thread atomically claims the next path to process.
790    // This eliminates static-chunk load imbalance — threads that finish early immediately
791    // pick up more work instead of sitting idle while one overloaded chunk finishes.
792    let next_index = AtomicUsize::new(0);
793    let files_done: Option<&AtomicUsize> = progress.map(|p| p.files_done.as_ref());
794
795    std::thread::scope(|s| -> Result<Vec<Vec<Result<Option<FileRecord>>>>> {
796        // IMPORTANT: collect ALL handles before joining any of them.
797        // A lazy spawn-then-join chain would serialize threads one at a time.
798        let mut handles = Vec::with_capacity(thread_count);
799        for _ in 0..thread_count {
800            handles.push(s.spawn(|| {
801                worker_loop(
802                    paths,
803                    root,
804                    config,
805                    include_globs,
806                    exclude_globs,
807                    enabled_languages,
808                    cancel,
809                    &next_index,
810                    files_done,
811                )
812            }));
813        }
814        handles
815            .into_iter()
816            .map(|h| {
817                h.join()
818                    .map_err(|_| anyhow::anyhow!("analysis thread panicked"))
819            })
820            .collect()
821    })
822}
823
824fn merge_chunk_results(
825    chunk_results: Vec<Vec<Result<Option<FileRecord>>>>,
826    analyzed: &mut Vec<FileRecord>,
827    skipped: &mut Vec<FileRecord>,
828    warnings: &mut Vec<String>,
829) -> Result<()> {
830    for chunk in chunk_results {
831        for result in chunk {
832            if let Some(record) = result? {
833                push_record(record, analyzed, skipped, warnings);
834            }
835        }
836    }
837    Ok(())
838}
839
840/// Label each analyzed file with its submodule and build per-submodule summaries.
841fn process_submodules(config: &AppConfig, analyzed: &mut [FileRecord]) -> Vec<SubmoduleSummary> {
842    let root = config.discovery.root_paths[0]
843        .canonicalize()
844        .unwrap_or_else(|_| config.discovery.root_paths[0].clone());
845    let submodules = detect_submodules(&root);
846    if submodules.is_empty() {
847        return Vec::new();
848    }
849
850    for file in analyzed.iter_mut() {
851        for (name, sub_path) in &submodules {
852            let prefix = sub_path.to_string_lossy().replace('\\', "/");
853            let rel = &file.relative_path;
854            if rel == &prefix || rel.starts_with(&format!("{prefix}/")) {
855                file.submodule = Some(name.clone());
856                break;
857            }
858        }
859    }
860
861    build_submodule_summaries(analyzed, &submodules)
862}
863
864/// Assemble the final `AnalysisRun` from collected records and metadata.
865fn assemble_run(
866    config: &AppConfig,
867    runtime_mode: &str,
868    analyzed: Vec<FileRecord>,
869    skipped: Vec<FileRecord>,
870    warnings: Vec<String>,
871    submodule_summaries: Vec<SubmoduleSummary>,
872) -> AnalysisRun {
873    let summary = build_summary(&analyzed, &skipped);
874    let language_summaries = build_language_summaries(&analyzed);
875    let style_summary = build_style_summary(&analyzed);
876
877    let first_root = config
878        .discovery
879        .root_paths
880        .first()
881        .map(|p| p.canonicalize().unwrap_or_else(|_| p.clone()));
882    let git = first_root
883        .as_deref()
884        .map(detect_git_for_run)
885        .unwrap_or_default();
886
887    let now = Utc::now();
888    let run_id = {
889        let uuid_suffix = Uuid::new_v4().simple().to_string();
890        format!("{}-{}", now.format("%Y%m%d-%H%M"), uuid_suffix)
891    };
892
893    AnalysisRun {
894        tool: ToolMetadata {
895            name: "sloc".into(),
896            version: env!("CARGO_PKG_VERSION").into(),
897            run_id,
898            timestamp_utc: now,
899        },
900        environment: EnvironmentMetadata {
901            operating_system: std::env::consts::OS.into(),
902            architecture: std::env::consts::ARCH.into(),
903            runtime_mode: runtime_mode.into(),
904            initiator_username: get_current_username(),
905            initiator_hostname: get_hostname(),
906            ci_name: detect_ci_system().map(str::to_string),
907        },
908        effective_configuration: config.clone(),
909        input_roots: config
910            .discovery
911            .root_paths
912            .iter()
913            .map(|p| path_to_string(p))
914            .collect(),
915        summary_totals: summary,
916        totals_by_language: language_summaries,
917        per_file_records: analyzed,
918        skipped_file_records: skipped,
919        warnings,
920        submodule_summaries,
921        git_commit_short: git.commit_short,
922        git_commit_long: git.commit_long,
923        git_branch: git.branch,
924        git_commit_author: git.author,
925        git_tags: git.tags,
926        git_nearest_tag: git.nearest_tag,
927        git_commit_date: git.commit_date,
928        git_remote_url: git.remote_url,
929        style_summary,
930    }
931}
932
933/// # Errors
934///
935/// Returns an error if the config is invalid, root paths cannot be walked, or any file
936/// analysis step fails in a way that cannot be recovered from.
937#[allow(clippy::too_many_lines)]
938pub fn analyze(
939    config: &AppConfig,
940    runtime_mode: &str,
941    cancel: Option<&AtomicBool>,
942    progress: Option<&ProgressCounters>,
943) -> Result<AnalysisRun> {
944    config.validate()?;
945
946    if config.discovery.root_paths.is_empty() {
947        anyhow::bail!("no input paths were provided");
948    }
949
950    let include_globs = compile_globset(&config.discovery.include_globs)?;
951    let exclude_globs = compile_globset(&config.discovery.exclude_globs)?;
952    let enabled_languages = parse_enabled_languages(&config.analysis.enabled_languages)?;
953
954    let mut analyzed = Vec::new();
955    let mut skipped = Vec::new();
956    let mut warnings = Vec::new();
957    let mut seen_paths = HashSet::new();
958
959    for root in &config.discovery.root_paths {
960        if cancel.is_some_and(|c| c.load(Ordering::Relaxed)) {
961            anyhow::bail!("analysis cancelled");
962        }
963
964        let root = root.canonicalize().unwrap_or_else(|_| root.clone());
965
966        if root.is_file() {
967            if let Some(record) = analyze_candidate_file(
968                &root,
969                root.parent().unwrap_or_else(|| Path::new(".")),
970                config,
971                include_globs.as_ref(),
972                exclude_globs.as_ref(),
973                enabled_languages.as_ref(),
974            )? {
975                push_record(record, &mut analyzed, &mut skipped, &mut warnings);
976            }
977            continue;
978        }
979
980        walk_root(
981            &root,
982            config,
983            include_globs.as_ref(),
984            exclude_globs.as_ref(),
985            enabled_languages.as_ref(),
986            &mut seen_paths,
987            &mut analyzed,
988            &mut skipped,
989            &mut warnings,
990            cancel,
991            progress,
992        )?;
993    }
994
995    analyzed.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
996    skipped.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
997
998    // Submodule detection: label each file with its submodule and build per-submodule summaries.
999    let submodule_summaries = if config.discovery.submodule_breakdown {
1000        process_submodules(config, &mut analyzed)
1001    } else {
1002        Vec::new()
1003    };
1004
1005    attach_coverage(config, &mut analyzed, &mut warnings);
1006
1007    Ok(assemble_run(
1008        config,
1009        runtime_mode,
1010        analyzed,
1011        skipped,
1012        warnings,
1013        submodule_summaries,
1014    ))
1015}
1016
1017fn attach_coverage(config: &AppConfig, analyzed: &mut [FileRecord], warnings: &mut Vec<String>) {
1018    let Some(cov_path) = coverage::resolve_coverage_file(config.analysis.coverage_file.as_deref())
1019    else {
1020        return;
1021    };
1022    tracing::debug!(path = %cov_path.display(), "loading coverage file");
1023    match fs::read_to_string(&cov_path) {
1024        Ok(content) => {
1025            let cov_map = coverage::parse_coverage_auto(&cov_path, &content);
1026            let mut matched: u32 = 0;
1027            let mut unmatched: u32 = 0;
1028            for record in analyzed.iter_mut() {
1029                record.coverage =
1030                    coverage::lookup_coverage(&cov_map, &record.relative_path).cloned();
1031                if record.coverage.is_some() {
1032                    matched += 1;
1033                } else {
1034                    unmatched += 1;
1035                }
1036            }
1037            tracing::debug!(
1038                path = %cov_path.display(),
1039                coverage_entries = cov_map.len(),
1040                files_matched = matched,
1041                files_unmatched = unmatched,
1042                "coverage attached"
1043            );
1044            if unmatched > 0 && matched == 0 {
1045                tracing::warn!(
1046                    path = %cov_path.display(),
1047                    "coverage file loaded but no source files could be matched — check that paths in the coverage report match the scanned directory"
1048                );
1049            }
1050        }
1051        Err(e) => {
1052            tracing::warn!(path = %cov_path.display(), error = %e, "coverage file could not be read");
1053            warnings.push(format!(
1054                "coverage file '{}' could not be read: {e}",
1055                cov_path.display()
1056            ));
1057        }
1058    }
1059}
1060
1061fn push_record(
1062    record: FileRecord,
1063    analyzed: &mut Vec<FileRecord>,
1064    skipped: &mut Vec<FileRecord>,
1065    warnings: &mut Vec<String>,
1066) {
1067    warnings.extend(
1068        record
1069            .warnings
1070            .iter()
1071            .map(|warning| format!("{}: {warning}", record.relative_path)),
1072    );
1073
1074    match record.status {
1075        FileStatus::AnalyzedExact | FileStatus::AnalyzedBestEffort => analyzed.push(record),
1076        _ => skipped.push(record),
1077    }
1078}
1079
1080/// Convenience wrapper: build a boxed `Skip` outcome with a single-item warning message.
1081#[inline]
1082fn skip_with_reason(
1083    path: &Path,
1084    root: &Path,
1085    size: u64,
1086    reason: impl Into<String>,
1087) -> MetadataPolicyOutcome {
1088    MetadataPolicyOutcome::Skip(Box::new(skipped_record(
1089        path,
1090        root,
1091        size,
1092        FileStatus::SkippedByPolicy,
1093        vec![reason.into()],
1094    )))
1095}
1096
1097/// Apply metadata-level policy checks (symlink, name, dir exclusion, size, globs, lockfile).
1098/// Returns `Skip(record)` to skip, `Exclude` to omit from output entirely (include-glob miss),
1099/// or `Continue` to proceed to content checks.
1100#[allow(clippy::too_many_arguments)]
1101fn check_metadata_policy(
1102    path: &Path,
1103    root: &Path,
1104    relative_path: &str,
1105    metadata: &fs::Metadata,
1106    config: &AppConfig,
1107    include_globs: Option<&GlobSet>,
1108    exclude_globs: Option<&GlobSet>,
1109) -> MetadataPolicyOutcome {
1110    let size = metadata.len();
1111
1112    if metadata.file_type().is_symlink() && !config.discovery.follow_symlinks {
1113        return skip_with_reason(path, root, size, "symlink skipped by policy");
1114    }
1115    if file_name_eq(path, ".gitignore") {
1116        return skip_with_reason(path, root, size, ".gitignore is always excluded");
1117    }
1118    if is_excluded_dir_path(path, &config.discovery.excluded_directories) {
1119        return skip_with_reason(path, root, size, "path matched excluded directory setting");
1120    }
1121    if size > config.discovery.max_file_size_bytes {
1122        return skip_with_reason(
1123            path,
1124            root,
1125            size,
1126            format!(
1127                "file exceeded max_file_size_bytes ({})",
1128                config.discovery.max_file_size_bytes
1129            ),
1130        );
1131    }
1132    if let Some(globs) = include_globs {
1133        if !globs.is_match(Path::new(relative_path)) && !globs.is_match(path) {
1134            return MetadataPolicyOutcome::Exclude;
1135        }
1136    }
1137    if let Some(globs) = exclude_globs {
1138        if globs.is_match(Path::new(relative_path)) || globs.is_match(path) {
1139            return skip_with_reason(path, root, size, "path matched exclude glob");
1140        }
1141    }
1142    if is_known_lockfile(path) && !config.analysis.include_lockfiles {
1143        return skip_with_reason(path, root, size, "lockfile skipped by default policy");
1144    }
1145
1146    MetadataPolicyOutcome::Continue
1147}
1148
1149struct ContentPolicyResult {
1150    vendor: bool,
1151    generated: bool,
1152    minified: bool,
1153    skip_record: Option<FileRecord>,
1154}
1155
1156/// Apply content-level policy checks (vendor, generated, minified).
1157/// `skip_record` is `Some` when the file should be skipped.
1158fn check_content_policy(
1159    path: &Path,
1160    root: &Path,
1161    size_bytes: u64,
1162    bytes: &[u8],
1163    config: &AppConfig,
1164) -> ContentPolicyResult {
1165    let vendor = is_vendor_path(path);
1166    if vendor && config.analysis.vendor_directory_detection {
1167        return ContentPolicyResult {
1168            vendor,
1169            generated: false,
1170            minified: false,
1171            skip_record: Some(skipped_record(
1172                path,
1173                root,
1174                size_bytes,
1175                FileStatus::SkippedByPolicy,
1176                vec!["vendor file skipped by policy".into()],
1177            )),
1178        };
1179    }
1180
1181    let generated = config.analysis.generated_file_detection && looks_generated(path, bytes);
1182    if generated {
1183        return ContentPolicyResult {
1184            vendor,
1185            generated,
1186            minified: false,
1187            skip_record: Some(skipped_record(
1188                path,
1189                root,
1190                size_bytes,
1191                FileStatus::SkippedByPolicy,
1192                vec!["generated file skipped by policy".into()],
1193            )),
1194        };
1195    }
1196
1197    let minified = config.analysis.minified_file_detection && looks_minified(path, bytes);
1198    if minified {
1199        return ContentPolicyResult {
1200            vendor,
1201            generated,
1202            minified,
1203            skip_record: Some(skipped_record(
1204                path,
1205                root,
1206                size_bytes,
1207                FileStatus::SkippedByPolicy,
1208                vec!["minified file skipped by policy".into()],
1209            )),
1210        };
1211    }
1212
1213    ContentPolicyResult {
1214        vendor,
1215        generated,
1216        minified,
1217        skip_record: None,
1218    }
1219}
1220
1221/// Decode file bytes to a UTF-8 string, handling binary detection and decode failures.
1222fn decode_file_contents(
1223    path: &Path,
1224    root: &Path,
1225    size_bytes: u64,
1226    bytes: &[u8],
1227    config: &AppConfig,
1228) -> Result<Option<(String, String, Vec<String>)>> {
1229    if is_binary(bytes) {
1230        return match config.analysis.binary_file_behavior {
1231            BinaryFileBehavior::Skip => Ok(None),
1232            BinaryFileBehavior::Fail => {
1233                anyhow::bail!("binary file encountered: {}", path.display())
1234            }
1235        };
1236    }
1237
1238    match decode_bytes(bytes) {
1239        Ok(result) => Ok(Some(result)),
1240        Err(err) => match config.analysis.decode_failure_behavior {
1241            FailureBehavior::WarnSkip => {
1242                // Caller will handle the None as a SkippedDecodeError record.
1243                // We use a sentinel: return Ok(None) but encode the error into a field.
1244                // Instead, propagate as a skipped record via the caller.
1245                let _ = (path, root, size_bytes); // suppress unused warnings
1246                Err(anyhow::anyhow!("__decode_warn__: {err}"))
1247            }
1248            FailureBehavior::Fail => {
1249                anyhow::bail!("decode failure for {}: {err}", path.display())
1250            }
1251        },
1252    }
1253}
1254
1255#[allow(clippy::too_many_lines)]
1256fn analyze_candidate_file(
1257    path: &Path,
1258    root: &Path,
1259    config: &AppConfig,
1260    include_globs: Option<&GlobSet>,
1261    exclude_globs: Option<&GlobSet>,
1262    enabled_languages: Option<&BTreeSet<Language>>,
1263) -> Result<Option<FileRecord>> {
1264    let metadata = match fs::symlink_metadata(path) {
1265        Ok(metadata) => metadata,
1266        Err(err) => {
1267            return Ok(Some(skipped_record(
1268                path,
1269                root,
1270                0,
1271                FileStatus::ErrorInternal,
1272                vec![format!("failed to read metadata: {err}")],
1273            )));
1274        }
1275    };
1276
1277    let relative_path = relative_path_string(path, root);
1278
1279    // Metadata-level policy checks.
1280    match check_metadata_policy(
1281        path,
1282        root,
1283        &relative_path,
1284        &metadata,
1285        config,
1286        include_globs,
1287        exclude_globs,
1288    ) {
1289        MetadataPolicyOutcome::Skip(record) => return Ok(Some(*record)),
1290        MetadataPolicyOutcome::Exclude => return Ok(None),
1291        MetadataPolicyOutcome::Continue => {}
1292    }
1293
1294    let bytes = match fs::read(path) {
1295        Ok(bytes) => bytes,
1296        Err(err) => {
1297            return Ok(Some(skipped_record(
1298                path,
1299                root,
1300                metadata.len(),
1301                FileStatus::ErrorInternal,
1302                vec![format!("failed to read file: {err}")],
1303            )));
1304        }
1305    };
1306
1307    // Content-level policy checks (vendor, generated, minified).
1308    let content_policy = check_content_policy(path, root, metadata.len(), &bytes, config);
1309    if let Some(record) = content_policy.skip_record {
1310        return Ok(Some(record));
1311    }
1312    let (vendor, generated, minified) = (
1313        content_policy.vendor,
1314        content_policy.generated,
1315        content_policy.minified,
1316    );
1317
1318    // Decode content, handling binary and decode failures.
1319    let (text, encoding, decode_warnings) =
1320        match decode_file_contents(path, root, metadata.len(), &bytes, config) {
1321            Ok(Some(result)) => result,
1322            Ok(None) => {
1323                return Ok(Some(skipped_record(
1324                    path,
1325                    root,
1326                    metadata.len(),
1327                    FileStatus::SkippedBinary,
1328                    vec!["binary file skipped by default".into()],
1329                )));
1330            }
1331            Err(err) => {
1332                let msg = err.to_string();
1333                if let Some(warn_msg) = msg.strip_prefix("__decode_warn__: ") {
1334                    return Ok(Some(skipped_record(
1335                        path,
1336                        root,
1337                        metadata.len(),
1338                        FileStatus::SkippedDecodeError,
1339                        vec![warn_msg.to_string()],
1340                    )));
1341                }
1342                return Err(err);
1343            }
1344        };
1345
1346    let first_line = text.lines().next();
1347    let language = detect_language(
1348        path,
1349        first_line,
1350        &config.analysis.extension_overrides,
1351        config.analysis.shebang_detection,
1352    );
1353
1354    let Some(language) = language else {
1355        return Ok(Some(skipped_record(
1356            path,
1357            root,
1358            metadata.len(),
1359            FileStatus::SkippedUnsupported,
1360            vec!["unsupported or undetected language".into()],
1361        )));
1362    };
1363
1364    if let Some(enabled) = enabled_languages {
1365        if !enabled.contains(&language) {
1366            return Ok(Some(skipped_record(
1367                path,
1368                root,
1369                metadata.len(),
1370                FileStatus::SkippedByPolicy,
1371                vec![format!(
1372                    "language {} disabled by configuration",
1373                    language.display_name()
1374                )],
1375            )));
1376        }
1377    }
1378
1379    let ieee_opts = AnalysisOptions {
1380        blank_in_block_comment_as_comment: config.analysis.blank_in_block_comment_policy
1381            == BlankInBlockCommentPolicy::CountAsComment,
1382        collapse_continuation_lines: config.analysis.continuation_line_policy
1383            == ContinuationLinePolicy::CollapseToLogical,
1384    };
1385    let analysis = analyze_text(language, &text, ieee_opts);
1386    let effective_counts = compute_effective_counts(
1387        &analysis.raw,
1388        config.analysis.mixed_line_policy,
1389        config.analysis.python_docstrings_as_comments,
1390        config.analysis.count_compiler_directives,
1391    );
1392
1393    let mut warnings = decode_warnings;
1394    warnings.extend(analysis.warnings.clone());
1395
1396    Ok(Some(FileRecord {
1397        path: path_to_string(path),
1398        relative_path,
1399        language: Some(language),
1400        size_bytes: metadata.len(),
1401        detected_encoding: Some(encoding),
1402        raw_line_categories: analysis.raw,
1403        effective_counts,
1404        status: match analysis.parse_mode {
1405            ParseMode::Lexical | ParseMode::TreeSitter => FileStatus::AnalyzedExact,
1406            ParseMode::LexicalBestEffort => FileStatus::AnalyzedBestEffort,
1407        },
1408        warnings,
1409        generated,
1410        minified,
1411        vendor,
1412        parse_mode: Some(analysis.parse_mode),
1413        submodule: None,
1414        coverage: None,
1415        style_analysis: analysis.style_analysis,
1416    }))
1417}
1418
1419const fn compute_effective_counts(
1420    raw: &RawLineCounts,
1421    mixed_line_policy: MixedLinePolicy,
1422    python_docstrings_as_comments: bool,
1423    count_compiler_directives: bool,
1424) -> EffectiveCounts {
1425    let mut effective = EffectiveCounts {
1426        code_lines: raw.code_only_lines,
1427        comment_lines: raw.single_comment_only_lines + raw.multi_comment_only_lines,
1428        blank_lines: raw.blank_only_lines,
1429        mixed_lines_separate: 0,
1430    };
1431
1432    if python_docstrings_as_comments {
1433        effective.comment_lines += raw.docstring_comment_lines;
1434    } else {
1435        effective.code_lines += raw.docstring_comment_lines;
1436    }
1437
1438    let mixed_total = raw.mixed_code_single_comment_lines + raw.mixed_code_multi_comment_lines;
1439    match mixed_line_policy {
1440        MixedLinePolicy::CodeOnly => effective.code_lines += mixed_total,
1441        MixedLinePolicy::CodeAndComment => {
1442            effective.code_lines += mixed_total;
1443            effective.comment_lines += mixed_total;
1444        }
1445        MixedLinePolicy::CommentOnly => effective.comment_lines += mixed_total,
1446        MixedLinePolicy::SeparateMixedCategory => effective.mixed_lines_separate += mixed_total,
1447    }
1448
1449    // IEEE 1045-1992 §4.2: optionally exclude preprocessor/compiler directives from code SLOC.
1450    // compiler_directive_lines is a subset of code_only_lines, so subtract it directly.
1451    if !count_compiler_directives {
1452        effective.code_lines = effective
1453            .code_lines
1454            .saturating_sub(raw.compiler_directive_lines);
1455    }
1456
1457    effective
1458}
1459
1460fn build_summary(analyzed: &[FileRecord], skipped: &[FileRecord]) -> SummaryTotals {
1461    let mut summary = SummaryTotals {
1462        files_considered: (analyzed.len() + skipped.len()) as u64,
1463        files_analyzed: analyzed.len() as u64,
1464        files_skipped: skipped.len() as u64,
1465        ..Default::default()
1466    };
1467
1468    for record in analyzed {
1469        summary.total_physical_lines += record.raw_line_categories.total_physical_lines;
1470        summary.code_lines += record.effective_counts.code_lines;
1471        summary.comment_lines += record.effective_counts.comment_lines;
1472        summary.blank_lines += record.effective_counts.blank_lines;
1473        summary.mixed_lines_separate += record.effective_counts.mixed_lines_separate;
1474        summary.functions += record.raw_line_categories.functions;
1475        summary.classes += record.raw_line_categories.classes;
1476        summary.variables += record.raw_line_categories.variables;
1477        summary.imports += record.raw_line_categories.imports;
1478        summary.test_count += record.raw_line_categories.test_count;
1479        summary.test_assertion_count += record.raw_line_categories.test_assertion_count;
1480        summary.test_suite_count += record.raw_line_categories.test_suite_count;
1481        if let Some(cov) = &record.coverage {
1482            summary.coverage_lines_found += u64::from(cov.lines_found);
1483            summary.coverage_lines_hit += u64::from(cov.lines_hit);
1484            summary.coverage_functions_found += u64::from(cov.functions_found);
1485            summary.coverage_functions_hit += u64::from(cov.functions_hit);
1486            summary.coverage_branches_found += u64::from(cov.branches_found);
1487            summary.coverage_branches_hit += u64::from(cov.branches_hit);
1488        }
1489    }
1490
1491    summary
1492}
1493
1494/// Construct a zero-filled `LanguageSummary` for the given language.
1495const fn zeroed_summary(language: Language) -> LanguageSummary {
1496    LanguageSummary {
1497        language,
1498        files: 0,
1499        total_physical_lines: 0,
1500        code_lines: 0,
1501        comment_lines: 0,
1502        blank_lines: 0,
1503        mixed_lines_separate: 0,
1504        functions: 0,
1505        classes: 0,
1506        variables: 0,
1507        imports: 0,
1508        test_count: 0,
1509        test_assertion_count: 0,
1510        test_suite_count: 0,
1511        coverage_lines_found: 0,
1512        coverage_lines_hit: 0,
1513        coverage_functions_found: 0,
1514        coverage_functions_hit: 0,
1515        coverage_branches_found: 0,
1516        coverage_branches_hit: 0,
1517    }
1518}
1519
1520/// Accumulate all per-file counters from `record` into an existing `LanguageSummary`.
1521fn accumulate_record_into_summary(entry: &mut LanguageSummary, record: &FileRecord) {
1522    entry.files += 1;
1523    let r = &record.raw_line_categories;
1524    entry.total_physical_lines += r.total_physical_lines;
1525    entry.code_lines += record.effective_counts.code_lines;
1526    entry.comment_lines += record.effective_counts.comment_lines;
1527    entry.blank_lines += record.effective_counts.blank_lines;
1528    entry.mixed_lines_separate += record.effective_counts.mixed_lines_separate;
1529    entry.functions += r.functions;
1530    entry.classes += r.classes;
1531    entry.variables += r.variables;
1532    entry.imports += r.imports;
1533    entry.test_count += r.test_count;
1534    entry.test_assertion_count += r.test_assertion_count;
1535    entry.test_suite_count += r.test_suite_count;
1536    if let Some(cov) = &record.coverage {
1537        entry.coverage_lines_found += u64::from(cov.lines_found);
1538        entry.coverage_lines_hit += u64::from(cov.lines_hit);
1539        entry.coverage_functions_found += u64::from(cov.functions_found);
1540        entry.coverage_functions_hit += u64::from(cov.functions_hit);
1541        entry.coverage_branches_found += u64::from(cov.branches_found);
1542        entry.coverage_branches_hit += u64::from(cov.branches_hit);
1543    }
1544}
1545
1546fn build_language_summaries(analyzed: &[FileRecord]) -> Vec<LanguageSummary> {
1547    let mut by_language: BTreeMap<Language, LanguageSummary> = BTreeMap::new();
1548    for record in analyzed {
1549        let Some(language) = record.language else {
1550            continue;
1551        };
1552        let entry = by_language
1553            .entry(language)
1554            .or_insert_with(|| zeroed_summary(language));
1555        accumulate_record_into_summary(entry, record);
1556    }
1557    by_language.into_values().collect()
1558}
1559
1560fn skipped_record(
1561    path: &Path,
1562    root: &Path,
1563    size_bytes: u64,
1564    status: FileStatus,
1565    warnings: Vec<String>,
1566) -> FileRecord {
1567    FileRecord {
1568        path: path_to_string(path),
1569        relative_path: relative_path_string(path, root),
1570        language: None,
1571        size_bytes,
1572        detected_encoding: None,
1573        raw_line_categories: RawLineCounts::default(),
1574        effective_counts: EffectiveCounts::default(),
1575        status,
1576        warnings,
1577        generated: false,
1578        minified: false,
1579        vendor: false,
1580        parse_mode: None,
1581        submodule: None,
1582        coverage: None,
1583        style_analysis: None,
1584    }
1585}
1586
1587fn relative_path_string(path: &Path, root: &Path) -> String {
1588    path.strip_prefix(root)
1589        .unwrap_or(path)
1590        .to_string_lossy()
1591        .replace('\\', "/")
1592}
1593
1594fn path_to_string(path: &Path) -> String {
1595    path.to_string_lossy().replace('\\', "/")
1596}
1597
1598/// Parse `.gitmodules` in `root` and return `(name, relative_path)` for each submodule found.
1599#[must_use]
1600pub fn detect_submodules(root: &Path) -> Vec<(String, PathBuf)> {
1601    let gitmodules = root.join(".gitmodules");
1602    if !gitmodules.is_file() {
1603        return Vec::new();
1604    }
1605    let Ok(content) = fs::read_to_string(&gitmodules) else {
1606        return Vec::new();
1607    };
1608
1609    let mut result = Vec::new();
1610    let mut current_name: Option<String> = None;
1611    let mut current_path: Option<PathBuf> = None;
1612
1613    for line in content.lines() {
1614        let trimmed = line.trim();
1615        if trimmed.starts_with("[submodule \"") && trimmed.ends_with("\"]") {
1616            if let (Some(name), Some(path)) = (current_name.take(), current_path.take()) {
1617                result.push((name, path));
1618            }
1619            let name = trimmed["[submodule \"".len()..trimmed.len() - 2].to_string();
1620            current_name = Some(name);
1621        } else if let Some(rest) = trimmed.strip_prefix("path") {
1622            if let Some(eq_pos) = rest.find('=') {
1623                let path_str = rest[eq_pos + 1..].trim();
1624                current_path = Some(PathBuf::from(path_str));
1625            }
1626        }
1627    }
1628    if let (Some(name), Some(path)) = (current_name, current_path) {
1629        result.push((name, path));
1630    }
1631
1632    result
1633}
1634
1635fn build_submodule_summaries(
1636    analyzed: &[FileRecord],
1637    submodules: &[(String, PathBuf)],
1638) -> Vec<SubmoduleSummary> {
1639    submodules
1640        .iter()
1641        .map(|(name, path)| {
1642            let files: Vec<&FileRecord> = analyzed
1643                .iter()
1644                .filter(|f| f.submodule.as_deref() == Some(name.as_str()))
1645                .collect();
1646
1647            let files_analyzed = files.len() as u64;
1648            let total_physical_lines = files
1649                .iter()
1650                .map(|f| f.raw_line_categories.total_physical_lines)
1651                .sum();
1652            let code_lines = files.iter().map(|f| f.effective_counts.code_lines).sum();
1653            let comment_lines = files.iter().map(|f| f.effective_counts.comment_lines).sum();
1654            let blank_lines = files.iter().map(|f| f.effective_counts.blank_lines).sum();
1655            let language_summaries = build_language_summaries_from_slice(&files);
1656
1657            SubmoduleSummary {
1658                name: name.clone(),
1659                relative_path: path.to_string_lossy().replace('\\', "/"),
1660                files_analyzed,
1661                total_physical_lines,
1662                code_lines,
1663                comment_lines,
1664                blank_lines,
1665                language_summaries,
1666            }
1667        })
1668        .filter(|s| s.files_analyzed > 0)
1669        .collect()
1670}
1671
1672/// Dominant indent label from vote counts.
1673#[allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
1674fn dominant_indent_label(files: &[&StyleAnalysis]) -> String {
1675    let mut votes = [0u32; 6];
1676    for f in files {
1677        let idx = match f.indent_style {
1678            IndentStyle::Tabs => 0,
1679            IndentStyle::Spaces2 => 1,
1680            IndentStyle::Spaces4 => 2,
1681            IndentStyle::Spaces8 => 3,
1682            IndentStyle::Mixed => 4,
1683            IndentStyle::Unknown => 5,
1684        };
1685        votes[idx] += 1;
1686    }
1687    let labels = ["Tabs", "2-Space", "4-Space", "8-Space", "Mixed", "\u{2014}"];
1688    labels[votes
1689        .iter()
1690        .enumerate()
1691        .max_by_key(|(_, v)| *v)
1692        .map(|(i, _)| i)
1693        .unwrap_or(5)]
1694    .to_string()
1695}
1696
1697/// Line-80 compliance percentage for a slice of style analyses.
1698#[allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
1699fn line80_pct(files: &[&StyleAnalysis]) -> u8 {
1700    if files.is_empty() {
1701        return 0;
1702    }
1703    let compliant = files
1704        .iter()
1705        .filter(|f| f.total_lines == 0 || (f.lines_over_80 as f32 / f.total_lines as f32) <= 0.05)
1706        .count() as u32;
1707    ((compliant * 100) / files.len() as u32) as u8
1708}
1709
1710/// Build a `LanguageStyleGroup` from a non-empty slice of `StyleAnalysis` for one family.
1711#[allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
1712fn build_language_group(family: &str, files: &[&StyleAnalysis]) -> LanguageStyleGroup {
1713    let count = files.len() as u32;
1714
1715    // Collect every unique guide name across all files in this group.
1716    let mut all_names: Vec<String> = Vec::new();
1717    for f in files {
1718        for g in &f.guide_scores {
1719            if !all_names.contains(&g.name) {
1720                all_names.push(g.name.clone());
1721            }
1722        }
1723    }
1724
1725    let mut guide_avg_scores: Vec<(String, u8)> = all_names
1726        .into_iter()
1727        .map(|name| {
1728            let sum: u32 = files
1729                .iter()
1730                .filter_map(|f| f.guide_scores.iter().find(|g| g.name == name))
1731                .map(|g| u32::from(g.score_pct))
1732                .sum();
1733            let avg = (sum / count) as u8;
1734            (name, avg)
1735        })
1736        .collect();
1737    guide_avg_scores.sort_by_key(|s| std::cmp::Reverse(s.1));
1738
1739    let (dominant_guide, dominant_score_pct) = guide_avg_scores
1740        .first()
1741        .map(|(n, s)| (n.clone(), *s))
1742        .unwrap_or_default();
1743
1744    LanguageStyleGroup {
1745        language_family: family.to_string(),
1746        files_count: count,
1747        dominant_guide,
1748        dominant_score_pct,
1749        common_indent_style: dominant_indent_label(files),
1750        guide_avg_scores,
1751        line80_compliant_pct: line80_pct(files),
1752    }
1753}
1754
1755/// Build aggregate multi-language style-guide adherence.
1756/// Returns `None` when no files had style data.
1757#[allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
1758fn build_style_summary(analyzed: &[FileRecord]) -> Option<StyleSummary> {
1759    let all_style: Vec<&StyleAnalysis> = analyzed
1760        .iter()
1761        .filter_map(|f| f.style_analysis.as_ref())
1762        .collect();
1763
1764    if all_style.is_empty() {
1765        return None;
1766    }
1767
1768    // Group by language_family.
1769    let mut families: std::collections::BTreeMap<&str, Vec<&StyleAnalysis>> =
1770        std::collections::BTreeMap::new();
1771    for sa in &all_style {
1772        families
1773            .entry(sa.language_family.as_str())
1774            .or_default()
1775            .push(sa);
1776    }
1777
1778    let mut by_language: Vec<LanguageStyleGroup> = families
1779        .iter()
1780        .map(|(family, files)| build_language_group(family, files))
1781        .collect();
1782    by_language.sort_by_key(|g| std::cmp::Reverse(g.files_count));
1783
1784    let files_analyzed = all_style.len() as u32;
1785    let common_indent_style = dominant_indent_label(&all_style);
1786    let line80_compliant_pct = line80_pct(&all_style);
1787
1788    Some(StyleSummary {
1789        files_analyzed,
1790        common_indent_style,
1791        line80_compliant_pct,
1792        by_language,
1793    })
1794}
1795
1796fn build_language_summaries_from_slice(files: &[&FileRecord]) -> Vec<LanguageSummary> {
1797    let mut map: BTreeMap<String, LanguageSummary> = BTreeMap::new();
1798    for file in files {
1799        let Some(lang) = file.language else { continue };
1800        let entry = map
1801            .entry(lang.display_name().to_string())
1802            .or_insert_with(|| zeroed_summary(lang));
1803        accumulate_record_into_summary(entry, file);
1804    }
1805    map.into_values().collect()
1806}
1807
1808fn file_name_eq(path: &Path, expected: &str) -> bool {
1809    path.file_name()
1810        .and_then(|name| name.to_str())
1811        .is_some_and(|name| name == expected)
1812}
1813
1814fn is_excluded_dir_path(path: &Path, excluded_dirs: &[String]) -> bool {
1815    path.components().any(|component| {
1816        component
1817            .as_os_str()
1818            .to_str()
1819            .is_some_and(|part| excluded_dirs.iter().any(|excluded| excluded == part))
1820    })
1821}
1822
1823fn is_vendor_path(path: &Path) -> bool {
1824    path.components().any(|component| {
1825        component
1826            .as_os_str()
1827            .to_str()
1828            .is_some_and(|part| matches!(part, "vendor" | "node_modules" | "packages"))
1829    })
1830}
1831
1832fn is_known_lockfile(path: &Path) -> bool {
1833    path.file_name()
1834        .and_then(|name| name.to_str())
1835        .is_some_and(|name| {
1836            matches!(
1837                name,
1838                "Cargo.lock"
1839                    | "package-lock.json"
1840                    | "yarn.lock"
1841                    | "pnpm-lock.yaml"
1842                    | "Pipfile.lock"
1843                    | "poetry.lock"
1844                    | "composer.lock"
1845            )
1846        })
1847}
1848
1849fn looks_generated(path: &Path, bytes: &[u8]) -> bool {
1850    let file_name = path
1851        .file_name()
1852        .and_then(|name| name.to_str())
1853        .unwrap_or_default();
1854    if file_name.contains(".generated.") || file_name.contains(".g.") {
1855        return true;
1856    }
1857
1858    let sample = String::from_utf8_lossy(&bytes[..bytes.len().min(GENERATED_SAMPLE_BYTES)])
1859        .to_ascii_lowercase();
1860    sample.contains("@generated") || sample.contains("generated by")
1861}
1862
1863fn looks_minified(path: &Path, bytes: &[u8]) -> bool {
1864    let file_name = path
1865        .file_name()
1866        .and_then(|name| name.to_str())
1867        .unwrap_or_default();
1868    if file_name.contains(".min.") {
1869        return true;
1870    }
1871
1872    let sample = String::from_utf8_lossy(&bytes[..bytes.len().min(MINIFIED_SAMPLE_BYTES)]);
1873    let longest_line = sample.lines().map(str::len).max().unwrap_or(0);
1874    let whitespace = sample.chars().filter(|c| c.is_whitespace()).count();
1875    longest_line > MINIFIED_LINE_THRESHOLD && whitespace * 100 < sample.len().max(1)
1876}
1877
1878fn is_binary(bytes: &[u8]) -> bool {
1879    if bytes.starts_with(&[0xEF, 0xBB, 0xBF])
1880        || bytes.starts_with(&[0xFF, 0xFE])
1881        || bytes.starts_with(&[0xFE, 0xFF])
1882    {
1883        return false;
1884    }
1885
1886    let sample = &bytes[..bytes.len().min(BINARY_SAMPLE_BYTES)];
1887    sample.contains(&0)
1888}
1889
1890/// Decode a BOM-stripped UTF-16 byte slice using the given encoding.
1891/// Returns `(text, encoding_label, warnings)`.
1892fn decode_utf16_bom(
1893    bom_stripped: &[u8],
1894    encoding: &'static encoding_rs::Encoding,
1895    label: &str,
1896) -> (String, String, Vec<String>) {
1897    let (cow, _, had_errors) = encoding.decode(bom_stripped);
1898    let mut warnings = Vec::new();
1899    if had_errors {
1900        warnings.push(format!("{label} decode contained replacement characters"));
1901    }
1902    (cow.into_owned(), label.into(), warnings)
1903}
1904
1905fn decode_bytes(bytes: &[u8]) -> std::result::Result<(String, String, Vec<String>), String> {
1906    if bytes.starts_with(&[0xEF, 0xBB, 0xBF]) {
1907        let text = String::from_utf8(bytes[3..].to_vec()).map_err(|err| err.to_string())?;
1908        return Ok((text, "utf-8-bom".into(), vec![]));
1909    }
1910    if bytes.starts_with(&[0xFF, 0xFE]) {
1911        return Ok(decode_utf16_bom(&bytes[2..], UTF_16LE, "utf-16le"));
1912    }
1913    if bytes.starts_with(&[0xFE, 0xFF]) {
1914        return Ok(decode_utf16_bom(&bytes[2..], UTF_16BE, "utf-16be"));
1915    }
1916
1917    // Multiple statements in the else branch make map_or_else awkward here.
1918    #[allow(clippy::option_if_let_else)]
1919    if let Ok(text) = String::from_utf8(bytes.to_vec()) {
1920        Ok((text, "utf-8".into(), vec![]))
1921    } else {
1922        let (cow, _, had_errors) = WINDOWS_1252.decode(bytes);
1923        let mut warnings = vec!["decoded using windows-1252 fallback".into()];
1924        if had_errors {
1925            warnings.push("fallback decode contained replacement characters".into());
1926        }
1927        Ok((cow.into_owned(), "windows-1252".into(), warnings))
1928    }
1929}
1930
1931fn compile_globset(patterns: &[String]) -> Result<Option<GlobSet>> {
1932    if patterns.is_empty() {
1933        return Ok(None);
1934    }
1935
1936    let mut builder = GlobSetBuilder::new();
1937    for pattern in patterns {
1938        builder
1939            .add(Glob::new(pattern).with_context(|| format!("invalid glob pattern: {pattern}"))?);
1940    }
1941    Ok(Some(
1942        builder.build().context("failed to compile glob filters")?,
1943    ))
1944}
1945
1946fn parse_enabled_languages(enabled: &[String]) -> Result<Option<BTreeSet<Language>>> {
1947    if enabled.is_empty() {
1948        return Ok(None);
1949    }
1950
1951    let supported = supported_languages();
1952    let mut set = BTreeSet::new();
1953    for name in enabled {
1954        let language = Language::from_name(name)
1955            .with_context(|| format!("unsupported language in config: {name}"))?;
1956        if !supported.contains(&language) {
1957            anyhow::bail!("language {name} is not supported in this build");
1958        }
1959        set.insert(language);
1960    }
1961    Ok(Some(set))
1962}
1963
1964/// # Errors
1965///
1966/// Returns an error if serialization fails or the output file cannot be written.
1967pub fn write_json(run: &AnalysisRun, output_path: &Path) -> Result<()> {
1968    let json = serde_json::to_string_pretty(run).context("failed to serialize analysis run")?;
1969    fs::write(output_path, json)
1970        .with_context(|| format!("failed to write JSON output to {}", output_path.display()))
1971}
1972
1973/// # Errors
1974///
1975/// Returns an error if the file cannot be read or the JSON cannot be parsed.
1976pub fn read_json(path: &Path) -> Result<AnalysisRun> {
1977    let contents = fs::read_to_string(path)
1978        .with_context(|| format!("failed to read result file {}", path.display()))?;
1979    serde_json::from_str(&contents)
1980        .with_context(|| format!("failed to parse JSON result {}", path.display()))
1981}
1982
1983#[cfg(test)]
1984mod tests {
1985    use super::*;
1986
1987    #[test]
1988    fn effective_counts_respect_code_only_policy() {
1989        let raw = RawLineCounts {
1990            code_only_lines: 2,
1991            single_comment_only_lines: 1,
1992            mixed_code_single_comment_lines: 3,
1993            docstring_comment_lines: 2,
1994            ..RawLineCounts::default()
1995        };
1996        let counts = compute_effective_counts(&raw, MixedLinePolicy::CodeOnly, true, true);
1997        assert_eq!(counts.code_lines, 5);
1998        assert_eq!(counts.comment_lines, 3);
1999    }
2000
2001    #[test]
2002    fn effective_counts_can_separate_mixed() {
2003        let raw = RawLineCounts {
2004            mixed_code_single_comment_lines: 2,
2005            mixed_code_multi_comment_lines: 1,
2006            ..RawLineCounts::default()
2007        };
2008        let counts =
2009            compute_effective_counts(&raw, MixedLinePolicy::SeparateMixedCategory, true, true);
2010        assert_eq!(counts.mixed_lines_separate, 3);
2011        assert_eq!(counts.code_lines, 0);
2012        assert_eq!(counts.comment_lines, 0);
2013    }
2014
2015    #[test]
2016    fn windows_1252_fallback_decodes() {
2017        let bytes = vec![0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x96, 0x57];
2018        let (text, encoding, warnings) = decode_bytes(&bytes).unwrap();
2019        assert_eq!(encoding, "windows-1252");
2020        assert!(text.contains('–'));
2021        assert!(!warnings.is_empty());
2022    }
2023}