Skip to main content

sloc_core/
lib.rs

1// SPDX-License-Identifier: AGPL-3.0-or-later
2// Copyright (C) 2026 Nima Shafie <nimzshafie@gmail.com>
3#![allow(clippy::multiple_crate_versions)]
4
5pub mod baseline;
6pub mod coverage;
7pub mod delta;
8pub mod history;
9pub use baseline::{check_against_baseline, resolve_baselines_path, BaselineEntry, BaselineStore};
10pub use coverage::{aggregate_line_coverage, lookup_coverage, parse_lcov, FileCoverage};
11pub use delta::{compute_delta, FileChangeStatus, FileDelta, ScanComparison, SummaryDelta};
12pub use history::{RegistryEntry, ScanRegistry, ScanSummarySnapshot, WatchedDirsStore};
13
14use std::collections::{BTreeMap, BTreeSet, HashSet};
15use std::fs;
16use std::path::{Path, PathBuf};
17use std::sync::atomic::{AtomicBool, Ordering};
18
19use anyhow::{Context, Result};
20use chrono::{DateTime, Utc};
21use encoding_rs::{UTF_16BE, UTF_16LE, WINDOWS_1252};
22use globset::{Glob, GlobSet, GlobSetBuilder};
23use ignore::WalkBuilder;
24use serde::{Deserialize, Serialize};
25use uuid::Uuid;
26
27use sloc_config::{
28    AppConfig, BinaryFileBehavior, BlankInBlockCommentPolicy, ContinuationLinePolicy,
29    FailureBehavior, MixedLinePolicy,
30};
31use sloc_languages::{
32    analyze_text, detect_language, supported_languages, AnalysisOptions, Language, ParseMode,
33    RawLineCounts,
34};
35
36// ── Detection sample sizes and thresholds ────────────────────────────────────
37
38/// Maximum number of worker threads used for parallel file analysis.
39const MAX_ANALYSIS_THREADS: usize = 16;
40/// Fallback thread count when `available_parallelism` is unavailable.
41const DEFAULT_ANALYSIS_THREADS: usize = 4;
42/// Byte sample used to detect `@generated` markers.
43const GENERATED_SAMPLE_BYTES: usize = 1024;
44/// Byte sample used to detect minified files via line-length heuristic.
45const MINIFIED_SAMPLE_BYTES: usize = 4096;
46/// Longest line length above which a file is considered minified.
47const MINIFIED_LINE_THRESHOLD: usize = 2000;
48/// Byte sample used to detect binary files via null-byte scan.
49const BINARY_SAMPLE_BYTES: usize = 8192;
50
51/// Three-way outcome for metadata-level policy checks.
52enum MetadataPolicyOutcome {
53    /// Skip this file — include the record in output.
54    Skip(Box<FileRecord>),
55    /// Exclude this file entirely — no record in output (include-glob miss).
56    Exclude,
57    /// Continue to content checks.
58    Continue,
59}
60
61#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
62#[serde(rename_all = "snake_case")]
63pub enum FileStatus {
64    AnalyzedExact,
65    AnalyzedBestEffort,
66    SkippedBinary,
67    SkippedDecodeError,
68    SkippedUnsupported,
69    SkippedByPolicy,
70    ErrorInternal,
71}
72
73#[derive(Debug, Clone, Serialize, Deserialize, Default)]
74pub struct EffectiveCounts {
75    pub code_lines: u64,
76    pub comment_lines: u64,
77    pub blank_lines: u64,
78    pub mixed_lines_separate: u64,
79}
80
81#[derive(Debug, Clone, Serialize, Deserialize)]
82pub struct ToolMetadata {
83    pub name: String,
84    pub version: String,
85    pub run_id: String,
86    pub timestamp_utc: DateTime<Utc>,
87}
88
89#[derive(Debug, Clone, Serialize, Deserialize)]
90pub struct EnvironmentMetadata {
91    pub operating_system: String,
92    pub architecture: String,
93    pub runtime_mode: String,
94    pub initiator_username: String,
95    pub initiator_hostname: String,
96}
97
98#[derive(Debug, Clone, Serialize, Deserialize, Default)]
99pub struct SummaryTotals {
100    pub files_considered: u64,
101    pub files_analyzed: u64,
102    pub files_skipped: u64,
103    pub total_physical_lines: u64,
104    pub code_lines: u64,
105    pub comment_lines: u64,
106    pub blank_lines: u64,
107    pub mixed_lines_separate: u64,
108    #[serde(default)]
109    pub functions: u64,
110    #[serde(default)]
111    pub classes: u64,
112    #[serde(default)]
113    pub variables: u64,
114    #[serde(default)]
115    pub imports: u64,
116    #[serde(default)]
117    pub test_count: u64,
118    /// Lexically detected test assertion call lines across all analyzed files.
119    #[serde(default)]
120    pub test_assertion_count: u64,
121    /// Lexically detected test suite / fixture / group declaration lines across all analyzed files.
122    #[serde(default)]
123    pub test_suite_count: u64,
124    /// Aggregated from LCOV data when provided.
125    #[serde(default)]
126    pub coverage_lines_found: u64,
127    #[serde(default)]
128    pub coverage_lines_hit: u64,
129    #[serde(default)]
130    pub coverage_functions_found: u64,
131    #[serde(default)]
132    pub coverage_functions_hit: u64,
133    #[serde(default)]
134    pub coverage_branches_found: u64,
135    #[serde(default)]
136    pub coverage_branches_hit: u64,
137}
138
139#[derive(Debug, Clone, Serialize, Deserialize)]
140pub struct LanguageSummary {
141    pub language: Language,
142    pub files: u64,
143    pub total_physical_lines: u64,
144    pub code_lines: u64,
145    pub comment_lines: u64,
146    pub blank_lines: u64,
147    pub mixed_lines_separate: u64,
148    #[serde(default)]
149    pub functions: u64,
150    #[serde(default)]
151    pub classes: u64,
152    #[serde(default)]
153    pub variables: u64,
154    #[serde(default)]
155    pub imports: u64,
156    #[serde(default)]
157    pub test_count: u64,
158    #[serde(default)]
159    pub test_assertion_count: u64,
160    #[serde(default)]
161    pub test_suite_count: u64,
162    #[serde(default)]
163    pub coverage_lines_found: u64,
164    #[serde(default)]
165    pub coverage_lines_hit: u64,
166    #[serde(default)]
167    pub coverage_functions_found: u64,
168    #[serde(default)]
169    pub coverage_functions_hit: u64,
170    #[serde(default)]
171    pub coverage_branches_found: u64,
172    #[serde(default)]
173    pub coverage_branches_hit: u64,
174}
175
176#[derive(Debug, Clone, Serialize, Deserialize)]
177pub struct FileRecord {
178    pub path: String,
179    pub relative_path: String,
180    pub language: Option<Language>,
181    pub size_bytes: u64,
182    pub detected_encoding: Option<String>,
183    pub raw_line_categories: RawLineCounts,
184    pub effective_counts: EffectiveCounts,
185    pub status: FileStatus,
186    pub warnings: Vec<String>,
187    pub generated: bool,
188    pub minified: bool,
189    pub vendor: bool,
190    pub parse_mode: Option<ParseMode>,
191    #[serde(skip_serializing_if = "Option::is_none")]
192    pub submodule: Option<String>,
193    /// Line/function/branch coverage from an external LCOV file, when provided.
194    #[serde(default, skip_serializing_if = "Option::is_none")]
195    pub coverage: Option<FileCoverage>,
196}
197
198/// Per-submodule aggregated stats produced when `submodule_breakdown` is enabled.
199#[derive(Debug, Clone, Serialize, Deserialize)]
200pub struct SubmoduleSummary {
201    pub name: String,
202    pub relative_path: String,
203    pub files_analyzed: u64,
204    pub total_physical_lines: u64,
205    pub code_lines: u64,
206    pub comment_lines: u64,
207    pub blank_lines: u64,
208    pub language_summaries: Vec<LanguageSummary>,
209}
210
211#[derive(Debug, Clone, Serialize, Deserialize)]
212pub struct AnalysisRun {
213    pub tool: ToolMetadata,
214    pub environment: EnvironmentMetadata,
215    pub effective_configuration: AppConfig,
216    pub input_roots: Vec<String>,
217    pub summary_totals: SummaryTotals,
218    pub totals_by_language: Vec<LanguageSummary>,
219    pub per_file_records: Vec<FileRecord>,
220    pub skipped_file_records: Vec<FileRecord>,
221    pub warnings: Vec<String>,
222    /// Non-empty only when `discovery.submodule_breakdown` is enabled.
223    #[serde(default, skip_serializing_if = "Vec::is_empty")]
224    pub submodule_summaries: Vec<SubmoduleSummary>,
225    /// Short git commit SHA (7 chars) at scan time, if the project is a git repo.
226    #[serde(default, skip_serializing_if = "Option::is_none")]
227    pub git_commit_short: Option<String>,
228    /// Full git commit SHA at scan time, if the project is a git repo.
229    #[serde(default, skip_serializing_if = "Option::is_none")]
230    pub git_commit_long: Option<String>,
231    /// Git branch active at scan time, if the project is a git repo.
232    #[serde(default, skip_serializing_if = "Option::is_none")]
233    pub git_branch: Option<String>,
234    /// Author of the last git commit at scan time.
235    #[serde(default, skip_serializing_if = "Option::is_none")]
236    pub git_commit_author: Option<String>,
237    /// Comma-separated git tags pointing at HEAD at scan time.
238    #[serde(default, skip_serializing_if = "Option::is_none")]
239    pub git_tags: Option<String>,
240    /// Nearest ancestor release tag (output of `git describe --tags --abbrev=0`).
241    #[serde(default, skip_serializing_if = "Option::is_none")]
242    pub git_nearest_tag: Option<String>,
243    /// ISO 8601 author-date of the last git commit at scan time.
244    #[serde(default, skip_serializing_if = "Option::is_none")]
245    pub git_commit_date: Option<String>,
246}
247
248fn run_git_in(dir: &Path, args: &[&str]) -> Option<String> {
249    std::process::Command::new("git")
250        .args(args)
251        .current_dir(dir)
252        .output()
253        .ok()
254        .filter(|o| o.status.success())
255        .and_then(|o| String::from_utf8(o.stdout).ok())
256        .map(|s| s.trim().to_string())
257        .filter(|s| !s.is_empty())
258}
259
260#[derive(Default)]
261struct GitInfo {
262    commit_short: Option<String>,
263    commit_long: Option<String>,
264    branch: Option<String>,
265    author: Option<String>,
266    tags: Option<String>,
267    nearest_tag: Option<String>,
268    commit_date: Option<String>,
269}
270
271fn detect_git_for_run(project_path: &Path) -> GitInfo {
272    GitInfo {
273        commit_short: run_git_in(project_path, &["rev-parse", "--short", "HEAD"]),
274        commit_long: run_git_in(project_path, &["rev-parse", "HEAD"]),
275        branch: run_git_in(project_path, &["branch", "--show-current"]),
276        author: run_git_in(project_path, &["log", "--format=%an", "-1"]),
277        tags: run_git_in(project_path, &["tag", "--points-at", "HEAD"]).map(|t| {
278            t.lines()
279                .filter(|l| !l.is_empty())
280                .collect::<Vec<_>>()
281                .join(", ")
282        }),
283        nearest_tag: run_git_in(project_path, &["describe", "--tags", "--abbrev=0", "HEAD"]),
284        commit_date: run_git_in(project_path, &["log", "--format=%aI", "-1"]),
285    }
286}
287
288fn get_current_username() -> String {
289    std::env::var("USERNAME")
290        .or_else(|_| std::env::var("USER"))
291        .unwrap_or_else(|_| "unknown".to_string())
292}
293
294fn get_hostname() -> String {
295    std::env::var("COMPUTERNAME")
296        .or_else(|_| std::env::var("HOSTNAME"))
297        .or_else(|_| std::fs::read_to_string("/etc/hostname").map(|s| s.trim().to_string()))
298        .unwrap_or_else(|_| "unknown".to_string())
299}
300
301/// Walk a single directory root and collect file records into the output vectors.
302#[allow(clippy::too_many_arguments)]
303fn walk_root(
304    root: &Path,
305    config: &AppConfig,
306    include_globs: Option<&GlobSet>,
307    exclude_globs: Option<&GlobSet>,
308    enabled_languages: Option<&BTreeSet<Language>>,
309    seen_paths: &mut HashSet<PathBuf>,
310    analyzed: &mut Vec<FileRecord>,
311    skipped: &mut Vec<FileRecord>,
312    warnings: &mut Vec<String>,
313    cancel: Option<&AtomicBool>,
314) -> Result<()> {
315    let mut builder = WalkBuilder::new(root);
316    builder
317        .follow_links(config.discovery.follow_symlinks)
318        .hidden(config.discovery.ignore_hidden_files)
319        .ignore(config.discovery.honor_ignore_files)
320        .parents(config.discovery.honor_ignore_files)
321        .git_ignore(config.discovery.honor_ignore_files)
322        .git_global(config.discovery.honor_ignore_files)
323        .git_exclude(config.discovery.honor_ignore_files);
324
325    let paths = collect_walk_paths(&builder, seen_paths, warnings);
326    if paths.is_empty() {
327        return Ok(());
328    }
329
330    let chunk_results = run_parallel_analysis(
331        &paths,
332        root,
333        config,
334        include_globs,
335        exclude_globs,
336        enabled_languages,
337        cancel,
338    )?;
339    merge_chunk_results(chunk_results, analyzed, skipped, warnings)
340}
341
342fn collect_walk_paths(
343    builder: &WalkBuilder,
344    seen_paths: &mut HashSet<PathBuf>,
345    warnings: &mut Vec<String>,
346) -> Vec<PathBuf> {
347    let mut paths = Vec::new();
348    for entry in builder.build() {
349        let entry = match entry {
350            Ok(e) => e,
351            Err(err) => {
352                warnings.push(format!("discovery warning: {err}"));
353                continue;
354            }
355        };
356        let path = entry.into_path();
357        if path.is_dir() || !seen_paths.insert(path.clone()) {
358            continue;
359        }
360        paths.push(path);
361    }
362    paths
363}
364
365#[allow(clippy::too_many_arguments)]
366fn run_parallel_analysis(
367    paths: &[PathBuf],
368    root: &Path,
369    config: &AppConfig,
370    include_globs: Option<&GlobSet>,
371    exclude_globs: Option<&GlobSet>,
372    enabled_languages: Option<&BTreeSet<Language>>,
373    cancel: Option<&AtomicBool>,
374) -> Result<Vec<Vec<Result<Option<FileRecord>>>>> {
375    let thread_count = std::thread::available_parallelism().map_or(DEFAULT_ANALYSIS_THREADS, |n| {
376        n.get().min(MAX_ANALYSIS_THREADS)
377    });
378    let chunk_size = paths.len().div_ceil(thread_count);
379    std::thread::scope(|s| -> Result<Vec<Vec<Result<Option<FileRecord>>>>> {
380        paths
381            .chunks(chunk_size)
382            .map(|chunk| {
383                s.spawn(move || -> Vec<Result<Option<FileRecord>>> {
384                    let mut results = Vec::with_capacity(chunk.len());
385                    for path in chunk {
386                        if cancel.is_some_and(|c| c.load(Ordering::Relaxed)) {
387                            results.push(Err(anyhow::anyhow!("analysis cancelled")));
388                            break;
389                        }
390                        results.push(analyze_candidate_file(
391                            path,
392                            root,
393                            config,
394                            include_globs,
395                            exclude_globs,
396                            enabled_languages,
397                        ));
398                    }
399                    results
400                })
401            })
402            .map(|h| {
403                h.join()
404                    .map_err(|_| anyhow::anyhow!("analysis thread panicked"))
405            })
406            .collect()
407    })
408}
409
410fn merge_chunk_results(
411    chunk_results: Vec<Vec<Result<Option<FileRecord>>>>,
412    analyzed: &mut Vec<FileRecord>,
413    skipped: &mut Vec<FileRecord>,
414    warnings: &mut Vec<String>,
415) -> Result<()> {
416    for chunk in chunk_results {
417        for result in chunk {
418            if let Some(record) = result? {
419                push_record(record, analyzed, skipped, warnings);
420            }
421        }
422    }
423    Ok(())
424}
425
426/// Label each analyzed file with its submodule and build per-submodule summaries.
427fn process_submodules(config: &AppConfig, analyzed: &mut [FileRecord]) -> Vec<SubmoduleSummary> {
428    let root = config.discovery.root_paths[0]
429        .canonicalize()
430        .unwrap_or_else(|_| config.discovery.root_paths[0].clone());
431    let submodules = detect_submodules(&root);
432    if submodules.is_empty() {
433        return Vec::new();
434    }
435
436    for file in analyzed.iter_mut() {
437        for (name, sub_path) in &submodules {
438            let prefix = sub_path.to_string_lossy().replace('\\', "/");
439            let rel = &file.relative_path;
440            if rel == &prefix || rel.starts_with(&format!("{prefix}/")) {
441                file.submodule = Some(name.clone());
442                break;
443            }
444        }
445    }
446
447    build_submodule_summaries(analyzed, &submodules)
448}
449
450/// Assemble the final `AnalysisRun` from collected records and metadata.
451fn assemble_run(
452    config: &AppConfig,
453    runtime_mode: &str,
454    analyzed: Vec<FileRecord>,
455    skipped: Vec<FileRecord>,
456    warnings: Vec<String>,
457    submodule_summaries: Vec<SubmoduleSummary>,
458) -> AnalysisRun {
459    let summary = build_summary(&analyzed, &skipped);
460    let language_summaries = build_language_summaries(&analyzed);
461
462    let first_root = config
463        .discovery
464        .root_paths
465        .first()
466        .map(|p| p.canonicalize().unwrap_or_else(|_| p.clone()));
467    let git = first_root
468        .as_deref()
469        .map(detect_git_for_run)
470        .unwrap_or_default();
471
472    let now = Utc::now();
473    let run_id = {
474        let uuid_suffix = Uuid::new_v4().simple().to_string();
475        format!("{}-{}", now.format("%Y%m%d-%H%M"), uuid_suffix)
476    };
477
478    AnalysisRun {
479        tool: ToolMetadata {
480            name: "sloc".into(),
481            version: env!("CARGO_PKG_VERSION").into(),
482            run_id,
483            timestamp_utc: now,
484        },
485        environment: EnvironmentMetadata {
486            operating_system: std::env::consts::OS.into(),
487            architecture: std::env::consts::ARCH.into(),
488            runtime_mode: runtime_mode.into(),
489            initiator_username: get_current_username(),
490            initiator_hostname: get_hostname(),
491        },
492        effective_configuration: config.clone(),
493        input_roots: config
494            .discovery
495            .root_paths
496            .iter()
497            .map(|p| path_to_string(p))
498            .collect(),
499        summary_totals: summary,
500        totals_by_language: language_summaries,
501        per_file_records: analyzed,
502        skipped_file_records: skipped,
503        warnings,
504        submodule_summaries,
505        git_commit_short: git.commit_short,
506        git_commit_long: git.commit_long,
507        git_branch: git.branch,
508        git_commit_author: git.author,
509        git_tags: git.tags,
510        git_nearest_tag: git.nearest_tag,
511        git_commit_date: git.commit_date,
512    }
513}
514
515/// # Errors
516///
517/// Returns an error if the config is invalid, root paths cannot be walked, or any file
518/// analysis step fails in a way that cannot be recovered from.
519#[allow(clippy::too_many_lines)]
520pub fn analyze(
521    config: &AppConfig,
522    runtime_mode: &str,
523    cancel: Option<&AtomicBool>,
524) -> Result<AnalysisRun> {
525    config.validate()?;
526
527    if config.discovery.root_paths.is_empty() {
528        anyhow::bail!("no input paths were provided");
529    }
530
531    let include_globs = compile_globset(&config.discovery.include_globs)?;
532    let exclude_globs = compile_globset(&config.discovery.exclude_globs)?;
533    let enabled_languages = parse_enabled_languages(&config.analysis.enabled_languages)?;
534
535    let mut analyzed = Vec::new();
536    let mut skipped = Vec::new();
537    let mut warnings = Vec::new();
538    let mut seen_paths = HashSet::new();
539
540    for root in &config.discovery.root_paths {
541        if cancel.is_some_and(|c| c.load(Ordering::Relaxed)) {
542            anyhow::bail!("analysis cancelled");
543        }
544
545        let root = root.canonicalize().unwrap_or_else(|_| root.clone());
546
547        if root.is_file() {
548            if let Some(record) = analyze_candidate_file(
549                &root,
550                root.parent().unwrap_or_else(|| Path::new(".")),
551                config,
552                include_globs.as_ref(),
553                exclude_globs.as_ref(),
554                enabled_languages.as_ref(),
555            )? {
556                push_record(record, &mut analyzed, &mut skipped, &mut warnings);
557            }
558            continue;
559        }
560
561        walk_root(
562            &root,
563            config,
564            include_globs.as_ref(),
565            exclude_globs.as_ref(),
566            enabled_languages.as_ref(),
567            &mut seen_paths,
568            &mut analyzed,
569            &mut skipped,
570            &mut warnings,
571            cancel,
572        )?;
573    }
574
575    analyzed.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
576    skipped.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
577
578    // Submodule detection: label each file with its submodule and build per-submodule summaries.
579    let submodule_summaries = if config.discovery.submodule_breakdown {
580        process_submodules(config, &mut analyzed)
581    } else {
582        Vec::new()
583    };
584
585    attach_coverage(config, &mut analyzed, &mut warnings);
586
587    Ok(assemble_run(
588        config,
589        runtime_mode,
590        analyzed,
591        skipped,
592        warnings,
593        submodule_summaries,
594    ))
595}
596
597fn attach_coverage(config: &AppConfig, analyzed: &mut [FileRecord], warnings: &mut Vec<String>) {
598    let Some(cov_path) = coverage::resolve_coverage_file(config.analysis.coverage_file.as_deref())
599    else {
600        return;
601    };
602    match fs::read_to_string(&cov_path) {
603        Ok(content) => {
604            let cov_map = coverage::parse_coverage_auto(&cov_path, &content);
605            for record in analyzed.iter_mut() {
606                record.coverage =
607                    coverage::lookup_coverage(&cov_map, &record.relative_path).cloned();
608            }
609        }
610        Err(e) => {
611            warnings.push(format!(
612                "coverage file '{}' could not be read: {e}",
613                cov_path.display()
614            ));
615        }
616    }
617}
618
619fn push_record(
620    record: FileRecord,
621    analyzed: &mut Vec<FileRecord>,
622    skipped: &mut Vec<FileRecord>,
623    warnings: &mut Vec<String>,
624) {
625    warnings.extend(
626        record
627            .warnings
628            .iter()
629            .map(|warning| format!("{}: {warning}", record.relative_path)),
630    );
631
632    match record.status {
633        FileStatus::AnalyzedExact | FileStatus::AnalyzedBestEffort => analyzed.push(record),
634        _ => skipped.push(record),
635    }
636}
637
638/// Convenience wrapper: build a boxed `Skip` outcome with a single-item warning message.
639#[inline]
640fn skip_with_reason(
641    path: &Path,
642    root: &Path,
643    size: u64,
644    reason: impl Into<String>,
645) -> MetadataPolicyOutcome {
646    MetadataPolicyOutcome::Skip(Box::new(skipped_record(
647        path,
648        root,
649        size,
650        FileStatus::SkippedByPolicy,
651        vec![reason.into()],
652    )))
653}
654
655/// Apply metadata-level policy checks (symlink, name, dir exclusion, size, globs, lockfile).
656/// Returns `Skip(record)` to skip, `Exclude` to omit from output entirely (include-glob miss),
657/// or `Continue` to proceed to content checks.
658#[allow(clippy::too_many_arguments)]
659fn check_metadata_policy(
660    path: &Path,
661    root: &Path,
662    relative_path: &str,
663    metadata: &fs::Metadata,
664    config: &AppConfig,
665    include_globs: Option<&GlobSet>,
666    exclude_globs: Option<&GlobSet>,
667) -> MetadataPolicyOutcome {
668    let size = metadata.len();
669
670    if metadata.file_type().is_symlink() && !config.discovery.follow_symlinks {
671        return skip_with_reason(path, root, size, "symlink skipped by policy");
672    }
673    if file_name_eq(path, ".gitignore") {
674        return skip_with_reason(path, root, size, ".gitignore is always excluded");
675    }
676    if is_excluded_dir_path(path, &config.discovery.excluded_directories) {
677        return skip_with_reason(path, root, size, "path matched excluded directory setting");
678    }
679    if size > config.discovery.max_file_size_bytes {
680        return skip_with_reason(
681            path,
682            root,
683            size,
684            format!(
685                "file exceeded max_file_size_bytes ({})",
686                config.discovery.max_file_size_bytes
687            ),
688        );
689    }
690    if let Some(globs) = include_globs {
691        if !globs.is_match(Path::new(relative_path)) && !globs.is_match(path) {
692            return MetadataPolicyOutcome::Exclude;
693        }
694    }
695    if let Some(globs) = exclude_globs {
696        if globs.is_match(Path::new(relative_path)) || globs.is_match(path) {
697            return skip_with_reason(path, root, size, "path matched exclude glob");
698        }
699    }
700    if is_known_lockfile(path) && !config.analysis.include_lockfiles {
701        return skip_with_reason(path, root, size, "lockfile skipped by default policy");
702    }
703
704    MetadataPolicyOutcome::Continue
705}
706
707struct ContentPolicyResult {
708    vendor: bool,
709    generated: bool,
710    minified: bool,
711    skip_record: Option<FileRecord>,
712}
713
714/// Apply content-level policy checks (vendor, generated, minified).
715/// `skip_record` is `Some` when the file should be skipped.
716fn check_content_policy(
717    path: &Path,
718    root: &Path,
719    size_bytes: u64,
720    bytes: &[u8],
721    config: &AppConfig,
722) -> ContentPolicyResult {
723    let vendor = is_vendor_path(path);
724    if vendor && config.analysis.vendor_directory_detection {
725        return ContentPolicyResult {
726            vendor,
727            generated: false,
728            minified: false,
729            skip_record: Some(skipped_record(
730                path,
731                root,
732                size_bytes,
733                FileStatus::SkippedByPolicy,
734                vec!["vendor file skipped by policy".into()],
735            )),
736        };
737    }
738
739    let generated = config.analysis.generated_file_detection && looks_generated(path, bytes);
740    if generated {
741        return ContentPolicyResult {
742            vendor,
743            generated,
744            minified: false,
745            skip_record: Some(skipped_record(
746                path,
747                root,
748                size_bytes,
749                FileStatus::SkippedByPolicy,
750                vec!["generated file skipped by policy".into()],
751            )),
752        };
753    }
754
755    let minified = config.analysis.minified_file_detection && looks_minified(path, bytes);
756    if minified {
757        return ContentPolicyResult {
758            vendor,
759            generated,
760            minified,
761            skip_record: Some(skipped_record(
762                path,
763                root,
764                size_bytes,
765                FileStatus::SkippedByPolicy,
766                vec!["minified file skipped by policy".into()],
767            )),
768        };
769    }
770
771    ContentPolicyResult {
772        vendor,
773        generated,
774        minified,
775        skip_record: None,
776    }
777}
778
779/// Decode file bytes to a UTF-8 string, handling binary detection and decode failures.
780fn decode_file_contents(
781    path: &Path,
782    root: &Path,
783    size_bytes: u64,
784    bytes: &[u8],
785    config: &AppConfig,
786) -> Result<Option<(String, String, Vec<String>)>> {
787    if is_binary(bytes) {
788        return match config.analysis.binary_file_behavior {
789            BinaryFileBehavior::Skip => Ok(None),
790            BinaryFileBehavior::Fail => {
791                anyhow::bail!("binary file encountered: {}", path.display())
792            }
793        };
794    }
795
796    match decode_bytes(bytes) {
797        Ok(result) => Ok(Some(result)),
798        Err(err) => match config.analysis.decode_failure_behavior {
799            FailureBehavior::WarnSkip => {
800                // Caller will handle the None as a SkippedDecodeError record.
801                // We use a sentinel: return Ok(None) but encode the error into a field.
802                // Instead, propagate as a skipped record via the caller.
803                let _ = (path, root, size_bytes); // suppress unused warnings
804                Err(anyhow::anyhow!("__decode_warn__: {err}"))
805            }
806            FailureBehavior::Fail => {
807                anyhow::bail!("decode failure for {}: {err}", path.display())
808            }
809        },
810    }
811}
812
813#[allow(clippy::too_many_lines)]
814fn analyze_candidate_file(
815    path: &Path,
816    root: &Path,
817    config: &AppConfig,
818    include_globs: Option<&GlobSet>,
819    exclude_globs: Option<&GlobSet>,
820    enabled_languages: Option<&BTreeSet<Language>>,
821) -> Result<Option<FileRecord>> {
822    let metadata = match fs::symlink_metadata(path) {
823        Ok(metadata) => metadata,
824        Err(err) => {
825            return Ok(Some(skipped_record(
826                path,
827                root,
828                0,
829                FileStatus::ErrorInternal,
830                vec![format!("failed to read metadata: {err}")],
831            )));
832        }
833    };
834
835    let relative_path = relative_path_string(path, root);
836
837    // Metadata-level policy checks.
838    match check_metadata_policy(
839        path,
840        root,
841        &relative_path,
842        &metadata,
843        config,
844        include_globs,
845        exclude_globs,
846    ) {
847        MetadataPolicyOutcome::Skip(record) => return Ok(Some(*record)),
848        MetadataPolicyOutcome::Exclude => return Ok(None),
849        MetadataPolicyOutcome::Continue => {}
850    }
851
852    let bytes = match fs::read(path) {
853        Ok(bytes) => bytes,
854        Err(err) => {
855            return Ok(Some(skipped_record(
856                path,
857                root,
858                metadata.len(),
859                FileStatus::ErrorInternal,
860                vec![format!("failed to read file: {err}")],
861            )));
862        }
863    };
864
865    // Content-level policy checks (vendor, generated, minified).
866    let content_policy = check_content_policy(path, root, metadata.len(), &bytes, config);
867    if let Some(record) = content_policy.skip_record {
868        return Ok(Some(record));
869    }
870    let (vendor, generated, minified) = (
871        content_policy.vendor,
872        content_policy.generated,
873        content_policy.minified,
874    );
875
876    // Decode content, handling binary and decode failures.
877    let (text, encoding, decode_warnings) =
878        match decode_file_contents(path, root, metadata.len(), &bytes, config) {
879            Ok(Some(result)) => result,
880            Ok(None) => {
881                return Ok(Some(skipped_record(
882                    path,
883                    root,
884                    metadata.len(),
885                    FileStatus::SkippedBinary,
886                    vec!["binary file skipped by default".into()],
887                )));
888            }
889            Err(err) => {
890                let msg = err.to_string();
891                if let Some(warn_msg) = msg.strip_prefix("__decode_warn__: ") {
892                    return Ok(Some(skipped_record(
893                        path,
894                        root,
895                        metadata.len(),
896                        FileStatus::SkippedDecodeError,
897                        vec![warn_msg.to_string()],
898                    )));
899                }
900                return Err(err);
901            }
902        };
903
904    let first_line = text.lines().next();
905    let language = detect_language(
906        path,
907        first_line,
908        &config.analysis.extension_overrides,
909        config.analysis.shebang_detection,
910    );
911
912    let Some(language) = language else {
913        return Ok(Some(skipped_record(
914            path,
915            root,
916            metadata.len(),
917            FileStatus::SkippedUnsupported,
918            vec!["unsupported or undetected language".into()],
919        )));
920    };
921
922    if let Some(enabled) = enabled_languages {
923        if !enabled.contains(&language) {
924            return Ok(Some(skipped_record(
925                path,
926                root,
927                metadata.len(),
928                FileStatus::SkippedByPolicy,
929                vec![format!(
930                    "language {} disabled by configuration",
931                    language.display_name()
932                )],
933            )));
934        }
935    }
936
937    let ieee_opts = AnalysisOptions {
938        blank_in_block_comment_as_comment: config.analysis.blank_in_block_comment_policy
939            == BlankInBlockCommentPolicy::CountAsComment,
940        collapse_continuation_lines: config.analysis.continuation_line_policy
941            == ContinuationLinePolicy::CollapseToLogical,
942    };
943    let analysis = analyze_text(language, &text, ieee_opts);
944    let effective_counts = compute_effective_counts(
945        &analysis.raw,
946        config.analysis.mixed_line_policy,
947        config.analysis.python_docstrings_as_comments,
948        config.analysis.count_compiler_directives,
949    );
950
951    let mut warnings = decode_warnings;
952    warnings.extend(analysis.warnings.clone());
953
954    Ok(Some(FileRecord {
955        path: path_to_string(path),
956        relative_path,
957        language: Some(language),
958        size_bytes: metadata.len(),
959        detected_encoding: Some(encoding),
960        raw_line_categories: analysis.raw,
961        effective_counts,
962        status: match analysis.parse_mode {
963            ParseMode::Lexical | ParseMode::TreeSitter => FileStatus::AnalyzedExact,
964            ParseMode::LexicalBestEffort => FileStatus::AnalyzedBestEffort,
965        },
966        warnings,
967        generated,
968        minified,
969        vendor,
970        parse_mode: Some(analysis.parse_mode),
971        submodule: None,
972        coverage: None,
973    }))
974}
975
976const fn compute_effective_counts(
977    raw: &RawLineCounts,
978    mixed_line_policy: MixedLinePolicy,
979    python_docstrings_as_comments: bool,
980    count_compiler_directives: bool,
981) -> EffectiveCounts {
982    let mut effective = EffectiveCounts {
983        code_lines: raw.code_only_lines,
984        comment_lines: raw.single_comment_only_lines + raw.multi_comment_only_lines,
985        blank_lines: raw.blank_only_lines,
986        mixed_lines_separate: 0,
987    };
988
989    if python_docstrings_as_comments {
990        effective.comment_lines += raw.docstring_comment_lines;
991    } else {
992        effective.code_lines += raw.docstring_comment_lines;
993    }
994
995    let mixed_total = raw.mixed_code_single_comment_lines + raw.mixed_code_multi_comment_lines;
996    match mixed_line_policy {
997        MixedLinePolicy::CodeOnly => effective.code_lines += mixed_total,
998        MixedLinePolicy::CodeAndComment => {
999            effective.code_lines += mixed_total;
1000            effective.comment_lines += mixed_total;
1001        }
1002        MixedLinePolicy::CommentOnly => effective.comment_lines += mixed_total,
1003        MixedLinePolicy::SeparateMixedCategory => effective.mixed_lines_separate += mixed_total,
1004    }
1005
1006    // IEEE 1045-1992 §4.2: optionally exclude preprocessor/compiler directives from code SLOC.
1007    // compiler_directive_lines is a subset of code_only_lines, so subtract it directly.
1008    if !count_compiler_directives {
1009        effective.code_lines = effective
1010            .code_lines
1011            .saturating_sub(raw.compiler_directive_lines);
1012    }
1013
1014    effective
1015}
1016
1017fn build_summary(analyzed: &[FileRecord], skipped: &[FileRecord]) -> SummaryTotals {
1018    let mut summary = SummaryTotals {
1019        files_considered: (analyzed.len() + skipped.len()) as u64,
1020        files_analyzed: analyzed.len() as u64,
1021        files_skipped: skipped.len() as u64,
1022        ..Default::default()
1023    };
1024
1025    for record in analyzed {
1026        summary.total_physical_lines += record.raw_line_categories.total_physical_lines;
1027        summary.code_lines += record.effective_counts.code_lines;
1028        summary.comment_lines += record.effective_counts.comment_lines;
1029        summary.blank_lines += record.effective_counts.blank_lines;
1030        summary.mixed_lines_separate += record.effective_counts.mixed_lines_separate;
1031        summary.functions += record.raw_line_categories.functions;
1032        summary.classes += record.raw_line_categories.classes;
1033        summary.variables += record.raw_line_categories.variables;
1034        summary.imports += record.raw_line_categories.imports;
1035        summary.test_count += record.raw_line_categories.test_count;
1036        summary.test_assertion_count += record.raw_line_categories.test_assertion_count;
1037        summary.test_suite_count += record.raw_line_categories.test_suite_count;
1038        if let Some(cov) = &record.coverage {
1039            summary.coverage_lines_found += u64::from(cov.lines_found);
1040            summary.coverage_lines_hit += u64::from(cov.lines_hit);
1041            summary.coverage_functions_found += u64::from(cov.functions_found);
1042            summary.coverage_functions_hit += u64::from(cov.functions_hit);
1043            summary.coverage_branches_found += u64::from(cov.branches_found);
1044            summary.coverage_branches_hit += u64::from(cov.branches_hit);
1045        }
1046    }
1047
1048    summary
1049}
1050
1051/// Construct a zero-filled `LanguageSummary` for the given language.
1052const fn zeroed_summary(language: Language) -> LanguageSummary {
1053    LanguageSummary {
1054        language,
1055        files: 0,
1056        total_physical_lines: 0,
1057        code_lines: 0,
1058        comment_lines: 0,
1059        blank_lines: 0,
1060        mixed_lines_separate: 0,
1061        functions: 0,
1062        classes: 0,
1063        variables: 0,
1064        imports: 0,
1065        test_count: 0,
1066        test_assertion_count: 0,
1067        test_suite_count: 0,
1068        coverage_lines_found: 0,
1069        coverage_lines_hit: 0,
1070        coverage_functions_found: 0,
1071        coverage_functions_hit: 0,
1072        coverage_branches_found: 0,
1073        coverage_branches_hit: 0,
1074    }
1075}
1076
1077/// Accumulate all per-file counters from `record` into an existing `LanguageSummary`.
1078fn accumulate_record_into_summary(entry: &mut LanguageSummary, record: &FileRecord) {
1079    entry.files += 1;
1080    let r = &record.raw_line_categories;
1081    entry.total_physical_lines += r.total_physical_lines;
1082    entry.code_lines += record.effective_counts.code_lines;
1083    entry.comment_lines += record.effective_counts.comment_lines;
1084    entry.blank_lines += record.effective_counts.blank_lines;
1085    entry.mixed_lines_separate += record.effective_counts.mixed_lines_separate;
1086    entry.functions += r.functions;
1087    entry.classes += r.classes;
1088    entry.variables += r.variables;
1089    entry.imports += r.imports;
1090    entry.test_count += r.test_count;
1091    entry.test_assertion_count += r.test_assertion_count;
1092    entry.test_suite_count += r.test_suite_count;
1093    if let Some(cov) = &record.coverage {
1094        entry.coverage_lines_found += u64::from(cov.lines_found);
1095        entry.coverage_lines_hit += u64::from(cov.lines_hit);
1096        entry.coverage_functions_found += u64::from(cov.functions_found);
1097        entry.coverage_functions_hit += u64::from(cov.functions_hit);
1098        entry.coverage_branches_found += u64::from(cov.branches_found);
1099        entry.coverage_branches_hit += u64::from(cov.branches_hit);
1100    }
1101}
1102
1103fn build_language_summaries(analyzed: &[FileRecord]) -> Vec<LanguageSummary> {
1104    let mut by_language: BTreeMap<Language, LanguageSummary> = BTreeMap::new();
1105    for record in analyzed {
1106        let Some(language) = record.language else {
1107            continue;
1108        };
1109        let entry = by_language
1110            .entry(language)
1111            .or_insert_with(|| zeroed_summary(language));
1112        accumulate_record_into_summary(entry, record);
1113    }
1114    by_language.into_values().collect()
1115}
1116
1117fn skipped_record(
1118    path: &Path,
1119    root: &Path,
1120    size_bytes: u64,
1121    status: FileStatus,
1122    warnings: Vec<String>,
1123) -> FileRecord {
1124    FileRecord {
1125        path: path_to_string(path),
1126        relative_path: relative_path_string(path, root),
1127        language: None,
1128        size_bytes,
1129        detected_encoding: None,
1130        raw_line_categories: RawLineCounts::default(),
1131        effective_counts: EffectiveCounts::default(),
1132        status,
1133        warnings,
1134        generated: false,
1135        minified: false,
1136        vendor: false,
1137        parse_mode: None,
1138        submodule: None,
1139        coverage: None,
1140    }
1141}
1142
1143fn relative_path_string(path: &Path, root: &Path) -> String {
1144    path.strip_prefix(root)
1145        .unwrap_or(path)
1146        .to_string_lossy()
1147        .replace('\\', "/")
1148}
1149
1150fn path_to_string(path: &Path) -> String {
1151    path.to_string_lossy().replace('\\', "/")
1152}
1153
1154/// Parse `.gitmodules` in `root` and return `(name, relative_path)` for each submodule found.
1155#[must_use]
1156pub fn detect_submodules(root: &Path) -> Vec<(String, PathBuf)> {
1157    let gitmodules = root.join(".gitmodules");
1158    if !gitmodules.is_file() {
1159        return Vec::new();
1160    }
1161    let Ok(content) = fs::read_to_string(&gitmodules) else {
1162        return Vec::new();
1163    };
1164
1165    let mut result = Vec::new();
1166    let mut current_name: Option<String> = None;
1167    let mut current_path: Option<PathBuf> = None;
1168
1169    for line in content.lines() {
1170        let trimmed = line.trim();
1171        if trimmed.starts_with("[submodule \"") && trimmed.ends_with("\"]") {
1172            if let (Some(name), Some(path)) = (current_name.take(), current_path.take()) {
1173                result.push((name, path));
1174            }
1175            let name = trimmed["[submodule \"".len()..trimmed.len() - 2].to_string();
1176            current_name = Some(name);
1177        } else if let Some(rest) = trimmed.strip_prefix("path") {
1178            if let Some(eq_pos) = rest.find('=') {
1179                let path_str = rest[eq_pos + 1..].trim();
1180                current_path = Some(PathBuf::from(path_str));
1181            }
1182        }
1183    }
1184    if let (Some(name), Some(path)) = (current_name, current_path) {
1185        result.push((name, path));
1186    }
1187
1188    result
1189}
1190
1191fn build_submodule_summaries(
1192    analyzed: &[FileRecord],
1193    submodules: &[(String, PathBuf)],
1194) -> Vec<SubmoduleSummary> {
1195    submodules
1196        .iter()
1197        .map(|(name, path)| {
1198            let files: Vec<&FileRecord> = analyzed
1199                .iter()
1200                .filter(|f| f.submodule.as_deref() == Some(name.as_str()))
1201                .collect();
1202
1203            let files_analyzed = files.len() as u64;
1204            let total_physical_lines = files
1205                .iter()
1206                .map(|f| f.raw_line_categories.total_physical_lines)
1207                .sum();
1208            let code_lines = files.iter().map(|f| f.effective_counts.code_lines).sum();
1209            let comment_lines = files.iter().map(|f| f.effective_counts.comment_lines).sum();
1210            let blank_lines = files.iter().map(|f| f.effective_counts.blank_lines).sum();
1211            let language_summaries = build_language_summaries_from_slice(&files);
1212
1213            SubmoduleSummary {
1214                name: name.clone(),
1215                relative_path: path.to_string_lossy().replace('\\', "/"),
1216                files_analyzed,
1217                total_physical_lines,
1218                code_lines,
1219                comment_lines,
1220                blank_lines,
1221                language_summaries,
1222            }
1223        })
1224        .filter(|s| s.files_analyzed > 0)
1225        .collect()
1226}
1227
1228fn build_language_summaries_from_slice(files: &[&FileRecord]) -> Vec<LanguageSummary> {
1229    let mut map: BTreeMap<String, LanguageSummary> = BTreeMap::new();
1230    for file in files {
1231        let Some(lang) = file.language else { continue };
1232        let entry = map
1233            .entry(lang.display_name().to_string())
1234            .or_insert_with(|| zeroed_summary(lang));
1235        accumulate_record_into_summary(entry, file);
1236    }
1237    map.into_values().collect()
1238}
1239
1240fn file_name_eq(path: &Path, expected: &str) -> bool {
1241    path.file_name()
1242        .and_then(|name| name.to_str())
1243        .is_some_and(|name| name == expected)
1244}
1245
1246fn is_excluded_dir_path(path: &Path, excluded_dirs: &[String]) -> bool {
1247    path.components().any(|component| {
1248        component
1249            .as_os_str()
1250            .to_str()
1251            .is_some_and(|part| excluded_dirs.iter().any(|excluded| excluded == part))
1252    })
1253}
1254
1255fn is_vendor_path(path: &Path) -> bool {
1256    path.components().any(|component| {
1257        component
1258            .as_os_str()
1259            .to_str()
1260            .is_some_and(|part| matches!(part, "vendor" | "node_modules" | "packages"))
1261    })
1262}
1263
1264fn is_known_lockfile(path: &Path) -> bool {
1265    path.file_name()
1266        .and_then(|name| name.to_str())
1267        .is_some_and(|name| {
1268            matches!(
1269                name,
1270                "Cargo.lock"
1271                    | "package-lock.json"
1272                    | "yarn.lock"
1273                    | "pnpm-lock.yaml"
1274                    | "Pipfile.lock"
1275                    | "poetry.lock"
1276                    | "composer.lock"
1277            )
1278        })
1279}
1280
1281fn looks_generated(path: &Path, bytes: &[u8]) -> bool {
1282    let file_name = path
1283        .file_name()
1284        .and_then(|name| name.to_str())
1285        .unwrap_or_default();
1286    if file_name.contains(".generated.") || file_name.contains(".g.") {
1287        return true;
1288    }
1289
1290    let sample = String::from_utf8_lossy(&bytes[..bytes.len().min(GENERATED_SAMPLE_BYTES)])
1291        .to_ascii_lowercase();
1292    sample.contains("@generated") || sample.contains("generated by")
1293}
1294
1295fn looks_minified(path: &Path, bytes: &[u8]) -> bool {
1296    let file_name = path
1297        .file_name()
1298        .and_then(|name| name.to_str())
1299        .unwrap_or_default();
1300    if file_name.contains(".min.") {
1301        return true;
1302    }
1303
1304    let sample = String::from_utf8_lossy(&bytes[..bytes.len().min(MINIFIED_SAMPLE_BYTES)]);
1305    let longest_line = sample.lines().map(str::len).max().unwrap_or(0);
1306    let whitespace = sample.chars().filter(|c| c.is_whitespace()).count();
1307    longest_line > MINIFIED_LINE_THRESHOLD && whitespace * 100 < sample.len().max(1)
1308}
1309
1310fn is_binary(bytes: &[u8]) -> bool {
1311    if bytes.starts_with(&[0xEF, 0xBB, 0xBF])
1312        || bytes.starts_with(&[0xFF, 0xFE])
1313        || bytes.starts_with(&[0xFE, 0xFF])
1314    {
1315        return false;
1316    }
1317
1318    let sample = &bytes[..bytes.len().min(BINARY_SAMPLE_BYTES)];
1319    sample.contains(&0)
1320}
1321
1322/// Decode a BOM-stripped UTF-16 byte slice using the given encoding.
1323/// Returns `(text, encoding_label, warnings)`.
1324fn decode_utf16_bom(
1325    bom_stripped: &[u8],
1326    encoding: &'static encoding_rs::Encoding,
1327    label: &str,
1328) -> (String, String, Vec<String>) {
1329    let (cow, _, had_errors) = encoding.decode(bom_stripped);
1330    let mut warnings = Vec::new();
1331    if had_errors {
1332        warnings.push(format!("{label} decode contained replacement characters"));
1333    }
1334    (cow.into_owned(), label.into(), warnings)
1335}
1336
1337fn decode_bytes(bytes: &[u8]) -> std::result::Result<(String, String, Vec<String>), String> {
1338    if bytes.starts_with(&[0xEF, 0xBB, 0xBF]) {
1339        let text = String::from_utf8(bytes[3..].to_vec()).map_err(|err| err.to_string())?;
1340        return Ok((text, "utf-8-bom".into(), vec![]));
1341    }
1342    if bytes.starts_with(&[0xFF, 0xFE]) {
1343        return Ok(decode_utf16_bom(&bytes[2..], UTF_16LE, "utf-16le"));
1344    }
1345    if bytes.starts_with(&[0xFE, 0xFF]) {
1346        return Ok(decode_utf16_bom(&bytes[2..], UTF_16BE, "utf-16be"));
1347    }
1348
1349    // Multiple statements in the else branch make map_or_else awkward here.
1350    #[allow(clippy::option_if_let_else)]
1351    if let Ok(text) = String::from_utf8(bytes.to_vec()) {
1352        Ok((text, "utf-8".into(), vec![]))
1353    } else {
1354        let (cow, _, had_errors) = WINDOWS_1252.decode(bytes);
1355        let mut warnings = vec!["decoded using windows-1252 fallback".into()];
1356        if had_errors {
1357            warnings.push("fallback decode contained replacement characters".into());
1358        }
1359        Ok((cow.into_owned(), "windows-1252".into(), warnings))
1360    }
1361}
1362
1363fn compile_globset(patterns: &[String]) -> Result<Option<GlobSet>> {
1364    if patterns.is_empty() {
1365        return Ok(None);
1366    }
1367
1368    let mut builder = GlobSetBuilder::new();
1369    for pattern in patterns {
1370        builder
1371            .add(Glob::new(pattern).with_context(|| format!("invalid glob pattern: {pattern}"))?);
1372    }
1373    Ok(Some(
1374        builder.build().context("failed to compile glob filters")?,
1375    ))
1376}
1377
1378fn parse_enabled_languages(enabled: &[String]) -> Result<Option<BTreeSet<Language>>> {
1379    if enabled.is_empty() {
1380        return Ok(None);
1381    }
1382
1383    let supported = supported_languages();
1384    let mut set = BTreeSet::new();
1385    for name in enabled {
1386        let language = Language::from_name(name)
1387            .with_context(|| format!("unsupported language in config: {name}"))?;
1388        if !supported.contains(&language) {
1389            anyhow::bail!("language {name} is not supported in this build");
1390        }
1391        set.insert(language);
1392    }
1393    Ok(Some(set))
1394}
1395
1396/// # Errors
1397///
1398/// Returns an error if serialization fails or the output file cannot be written.
1399pub fn write_json(run: &AnalysisRun, output_path: &Path) -> Result<()> {
1400    let json = serde_json::to_string_pretty(run).context("failed to serialize analysis run")?;
1401    fs::write(output_path, json)
1402        .with_context(|| format!("failed to write JSON output to {}", output_path.display()))
1403}
1404
1405/// # Errors
1406///
1407/// Returns an error if the file cannot be read or the JSON cannot be parsed.
1408pub fn read_json(path: &Path) -> Result<AnalysisRun> {
1409    let contents = fs::read_to_string(path)
1410        .with_context(|| format!("failed to read result file {}", path.display()))?;
1411    serde_json::from_str(&contents)
1412        .with_context(|| format!("failed to parse JSON result {}", path.display()))
1413}
1414
1415#[cfg(test)]
1416mod tests {
1417    use super::*;
1418
1419    #[test]
1420    fn effective_counts_respect_code_only_policy() {
1421        let raw = RawLineCounts {
1422            code_only_lines: 2,
1423            single_comment_only_lines: 1,
1424            mixed_code_single_comment_lines: 3,
1425            docstring_comment_lines: 2,
1426            ..RawLineCounts::default()
1427        };
1428        let counts = compute_effective_counts(&raw, MixedLinePolicy::CodeOnly, true, true);
1429        assert_eq!(counts.code_lines, 5);
1430        assert_eq!(counts.comment_lines, 3);
1431    }
1432
1433    #[test]
1434    fn effective_counts_can_separate_mixed() {
1435        let raw = RawLineCounts {
1436            mixed_code_single_comment_lines: 2,
1437            mixed_code_multi_comment_lines: 1,
1438            ..RawLineCounts::default()
1439        };
1440        let counts =
1441            compute_effective_counts(&raw, MixedLinePolicy::SeparateMixedCategory, true, true);
1442        assert_eq!(counts.mixed_lines_separate, 3);
1443        assert_eq!(counts.code_lines, 0);
1444        assert_eq!(counts.comment_lines, 0);
1445    }
1446
1447    #[test]
1448    fn windows_1252_fallback_decodes() {
1449        let bytes = vec![0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x96, 0x57];
1450        let (text, encoding, warnings) = decode_bytes(&bytes).unwrap();
1451        assert_eq!(encoding, "windows-1252");
1452        assert!(text.contains('–'));
1453        assert!(!warnings.is_empty());
1454    }
1455}