1#![allow(clippy::multiple_crate_versions)]
4
5pub mod baseline;
6pub mod coverage;
7pub mod delta;
8pub mod history;
9pub use baseline::{check_against_baseline, resolve_baselines_path, BaselineEntry, BaselineStore};
10pub use coverage::{aggregate_line_coverage, lookup_coverage, parse_lcov, FileCoverage};
11pub use delta::{compute_delta, FileChangeStatus, FileDelta, ScanComparison, SummaryDelta};
12pub use history::{RegistryEntry, ScanRegistry, ScanSummarySnapshot, WatchedDirsStore};
13
14use std::collections::{BTreeMap, BTreeSet, HashSet};
15use std::fs;
16use std::path::{Path, PathBuf};
17use std::sync::atomic::{AtomicBool, Ordering};
18
19use anyhow::{Context, Result};
20use chrono::{DateTime, Utc};
21use encoding_rs::{UTF_16BE, UTF_16LE, WINDOWS_1252};
22use globset::{Glob, GlobSet, GlobSetBuilder};
23use ignore::WalkBuilder;
24use serde::{Deserialize, Serialize};
25use uuid::Uuid;
26
27use sloc_config::{
28 AppConfig, BinaryFileBehavior, BlankInBlockCommentPolicy, ContinuationLinePolicy,
29 FailureBehavior, MixedLinePolicy,
30};
31use sloc_languages::{
32 analyze_text, detect_language, supported_languages, AnalysisOptions, Language, ParseMode,
33 RawLineCounts,
34};
35
36const MAX_ANALYSIS_THREADS: usize = 16;
40const DEFAULT_ANALYSIS_THREADS: usize = 4;
42const GENERATED_SAMPLE_BYTES: usize = 1024;
44const MINIFIED_SAMPLE_BYTES: usize = 4096;
46const MINIFIED_LINE_THRESHOLD: usize = 2000;
48const BINARY_SAMPLE_BYTES: usize = 8192;
50
51enum MetadataPolicyOutcome {
53 Skip(Box<FileRecord>),
55 Exclude,
57 Continue,
59}
60
61#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
62#[serde(rename_all = "snake_case")]
63pub enum FileStatus {
64 AnalyzedExact,
65 AnalyzedBestEffort,
66 SkippedBinary,
67 SkippedDecodeError,
68 SkippedUnsupported,
69 SkippedByPolicy,
70 ErrorInternal,
71}
72
73#[derive(Debug, Clone, Serialize, Deserialize, Default)]
74pub struct EffectiveCounts {
75 pub code_lines: u64,
76 pub comment_lines: u64,
77 pub blank_lines: u64,
78 pub mixed_lines_separate: u64,
79}
80
81#[derive(Debug, Clone, Serialize, Deserialize)]
82pub struct ToolMetadata {
83 pub name: String,
84 pub version: String,
85 pub run_id: String,
86 pub timestamp_utc: DateTime<Utc>,
87}
88
89#[derive(Debug, Clone, Serialize, Deserialize)]
90pub struct EnvironmentMetadata {
91 pub operating_system: String,
92 pub architecture: String,
93 pub runtime_mode: String,
94 pub initiator_username: String,
95 pub initiator_hostname: String,
96}
97
98#[derive(Debug, Clone, Serialize, Deserialize, Default)]
99pub struct SummaryTotals {
100 pub files_considered: u64,
101 pub files_analyzed: u64,
102 pub files_skipped: u64,
103 pub total_physical_lines: u64,
104 pub code_lines: u64,
105 pub comment_lines: u64,
106 pub blank_lines: u64,
107 pub mixed_lines_separate: u64,
108 #[serde(default)]
109 pub functions: u64,
110 #[serde(default)]
111 pub classes: u64,
112 #[serde(default)]
113 pub variables: u64,
114 #[serde(default)]
115 pub imports: u64,
116 #[serde(default)]
117 pub test_count: u64,
118 #[serde(default)]
120 pub test_assertion_count: u64,
121 #[serde(default)]
123 pub test_suite_count: u64,
124 #[serde(default)]
126 pub coverage_lines_found: u64,
127 #[serde(default)]
128 pub coverage_lines_hit: u64,
129 #[serde(default)]
130 pub coverage_functions_found: u64,
131 #[serde(default)]
132 pub coverage_functions_hit: u64,
133 #[serde(default)]
134 pub coverage_branches_found: u64,
135 #[serde(default)]
136 pub coverage_branches_hit: u64,
137}
138
139#[derive(Debug, Clone, Serialize, Deserialize)]
140pub struct LanguageSummary {
141 pub language: Language,
142 pub files: u64,
143 pub total_physical_lines: u64,
144 pub code_lines: u64,
145 pub comment_lines: u64,
146 pub blank_lines: u64,
147 pub mixed_lines_separate: u64,
148 #[serde(default)]
149 pub functions: u64,
150 #[serde(default)]
151 pub classes: u64,
152 #[serde(default)]
153 pub variables: u64,
154 #[serde(default)]
155 pub imports: u64,
156 #[serde(default)]
157 pub test_count: u64,
158 #[serde(default)]
159 pub test_assertion_count: u64,
160 #[serde(default)]
161 pub test_suite_count: u64,
162 #[serde(default)]
163 pub coverage_lines_found: u64,
164 #[serde(default)]
165 pub coverage_lines_hit: u64,
166 #[serde(default)]
167 pub coverage_functions_found: u64,
168 #[serde(default)]
169 pub coverage_functions_hit: u64,
170 #[serde(default)]
171 pub coverage_branches_found: u64,
172 #[serde(default)]
173 pub coverage_branches_hit: u64,
174}
175
176#[derive(Debug, Clone, Serialize, Deserialize)]
177pub struct FileRecord {
178 pub path: String,
179 pub relative_path: String,
180 pub language: Option<Language>,
181 pub size_bytes: u64,
182 pub detected_encoding: Option<String>,
183 pub raw_line_categories: RawLineCounts,
184 pub effective_counts: EffectiveCounts,
185 pub status: FileStatus,
186 pub warnings: Vec<String>,
187 pub generated: bool,
188 pub minified: bool,
189 pub vendor: bool,
190 pub parse_mode: Option<ParseMode>,
191 #[serde(skip_serializing_if = "Option::is_none")]
192 pub submodule: Option<String>,
193 #[serde(default, skip_serializing_if = "Option::is_none")]
195 pub coverage: Option<FileCoverage>,
196}
197
198#[derive(Debug, Clone, Serialize, Deserialize)]
200pub struct SubmoduleSummary {
201 pub name: String,
202 pub relative_path: String,
203 pub files_analyzed: u64,
204 pub total_physical_lines: u64,
205 pub code_lines: u64,
206 pub comment_lines: u64,
207 pub blank_lines: u64,
208 pub language_summaries: Vec<LanguageSummary>,
209}
210
211#[derive(Debug, Clone, Serialize, Deserialize)]
212pub struct AnalysisRun {
213 pub tool: ToolMetadata,
214 pub environment: EnvironmentMetadata,
215 pub effective_configuration: AppConfig,
216 pub input_roots: Vec<String>,
217 pub summary_totals: SummaryTotals,
218 pub totals_by_language: Vec<LanguageSummary>,
219 pub per_file_records: Vec<FileRecord>,
220 pub skipped_file_records: Vec<FileRecord>,
221 pub warnings: Vec<String>,
222 #[serde(default, skip_serializing_if = "Vec::is_empty")]
224 pub submodule_summaries: Vec<SubmoduleSummary>,
225 #[serde(default, skip_serializing_if = "Option::is_none")]
227 pub git_commit_short: Option<String>,
228 #[serde(default, skip_serializing_if = "Option::is_none")]
230 pub git_commit_long: Option<String>,
231 #[serde(default, skip_serializing_if = "Option::is_none")]
233 pub git_branch: Option<String>,
234 #[serde(default, skip_serializing_if = "Option::is_none")]
236 pub git_commit_author: Option<String>,
237 #[serde(default, skip_serializing_if = "Option::is_none")]
239 pub git_tags: Option<String>,
240 #[serde(default, skip_serializing_if = "Option::is_none")]
242 pub git_nearest_tag: Option<String>,
243 #[serde(default, skip_serializing_if = "Option::is_none")]
245 pub git_commit_date: Option<String>,
246}
247
248#[derive(Default)]
249struct GitInfo {
250 commit_short: Option<String>,
251 commit_long: Option<String>,
252 branch: Option<String>,
253 author: Option<String>,
254 tags: Option<String>,
255 nearest_tag: Option<String>,
256 commit_date: Option<String>,
257}
258
259fn find_git_dir(start: &Path) -> Option<PathBuf> {
263 let mut current = Some(start);
264 while let Some(dir) = current {
265 let candidate = dir.join(".git");
266 if candidate.is_dir() {
267 return Some(candidate);
268 }
269 if candidate.is_file() {
270 if let Some(resolved) = resolve_git_file_pointer(&candidate, dir) {
271 return Some(resolved);
272 }
273 }
274 current = dir.parent();
275 }
276 None
277}
278
279fn resolve_git_file_pointer(file: &Path, base_dir: &Path) -> Option<PathBuf> {
283 let content = fs::read_to_string(file).ok()?;
284 let ptr = content.trim().strip_prefix("gitdir: ")?;
285 let ptr_native = ptr.replace('/', std::path::MAIN_SEPARATOR_STR);
288 let resolved = if Path::new(&ptr_native).is_absolute() {
289 PathBuf::from(&ptr_native)
290 } else {
291 base_dir.join(&ptr_native)
292 };
293 let final_path = resolved.canonicalize().unwrap_or(resolved);
297 if final_path.is_dir() {
298 Some(final_path)
299 } else {
300 None
301 }
302}
303
304fn resolve_ref(git_dir: &Path, refname: &str) -> Option<String> {
307 let ref_path = refname
311 .split('/')
312 .fold(git_dir.to_path_buf(), |p, c| p.join(c));
313 if ref_path.exists() {
314 let sha = fs::read_to_string(&ref_path)
315 .ok()
316 .map(|s| s.trim().to_string())
317 .filter(|s| s.len() >= 40 && s.chars().all(|c| c.is_ascii_hexdigit()));
318 if sha.is_some() {
319 return sha;
320 }
321 }
322 let packed = fs::read_to_string(git_dir.join("packed-refs")).ok()?;
326 for line in packed.lines() {
327 if line.starts_with('#') || line.starts_with('^') {
328 continue;
329 }
330 let mut cols = line.splitn(2, ' ');
331 let sha = cols.next()?;
332 let name = cols.next()?.trim();
333 if name == refname {
334 return Some(sha.to_string());
335 }
336 }
337 None
338}
339
340fn parse_last_reflog_entry(git_dir: &Path) -> (Option<String>, Option<String>) {
346 let log_path = git_dir.join("logs").join("HEAD");
347 let Ok(content) = fs::read_to_string(&log_path) else {
348 return (None, None);
349 };
350 let Some(last) = content.lines().rfind(|l| !l.trim().is_empty()) else {
351 return (None, None);
352 };
353
354 let Some(after_shas) = last.splitn(3, ' ').nth(2) else {
357 return (None, None);
358 };
359
360 let author = after_shas.find(" <").map(|i| after_shas[..i].to_string());
362
363 let date = (|| {
365 use chrono::TimeZone as _;
366 let close = after_shas.find("> ")?;
367 let rest = after_shas[close + 2..].trim_start();
368 let mut tokens = rest.splitn(3, ' ');
369 let unix_str = tokens.next()?;
370 let offset_str = tokens.next().map(|s| s.split('\t').next().unwrap_or(s))?;
371 let ts: i64 = unix_str.parse().ok()?;
372 let dt = chrono::Utc.timestamp_opt(ts, 0).single()?;
373 let tz_display = if offset_str.len() == 5 {
375 format!("{}:{}", &offset_str[..3], &offset_str[3..])
376 } else {
377 offset_str.to_string()
378 };
379 Some(format!("{}{}", dt.format("%Y-%m-%dT%H:%M:%S"), tz_display))
380 })();
381
382 (author, date)
383}
384
385fn detect_git_for_run(project_path: &Path) -> GitInfo {
389 let Some(git_dir) = find_git_dir(project_path) else {
390 return GitInfo::default();
391 };
392
393 let head_raw = match fs::read_to_string(git_dir.join("HEAD")) {
394 Ok(s) => s.trim().to_string(),
395 Err(_) => return GitInfo::default(),
396 };
397
398 let (branch, commit_long) = head_raw.strip_prefix("ref: ").map_or_else(
399 || {
400 if head_raw.len() >= 40 && head_raw.chars().all(|c| c.is_ascii_hexdigit()) {
401 (None, Some(head_raw[..40].to_string()))
403 } else {
404 (None, None)
405 }
406 },
407 |refname| {
408 let branch = refname
409 .strip_prefix("refs/heads/")
410 .map(|b| b.trim().to_string());
411 let sha = resolve_ref(&git_dir, refname.trim());
412 (branch, sha)
413 },
414 );
415
416 let commit_short = commit_long
417 .as_deref()
418 .map(|s| s.chars().take(7).collect::<String>());
419
420 let (author, commit_date) = parse_last_reflog_entry(&git_dir);
421
422 let tags = run_git_cmd(project_path, &["tag", "--points-at", "HEAD"]).map(|t| {
425 t.lines()
426 .filter(|l| !l.is_empty())
427 .collect::<Vec<_>>()
428 .join(", ")
429 });
430 let nearest_tag = run_git_cmd(project_path, &["describe", "--tags", "--abbrev=0", "HEAD"]);
431
432 GitInfo {
433 commit_short,
434 commit_long,
435 branch,
436 author,
437 tags,
438 nearest_tag,
439 commit_date,
440 }
441}
442
443fn run_git_cmd(dir: &Path, args: &[&str]) -> Option<String> {
446 let candidates: &[&str] = &[
450 "git",
452 "/usr/bin/git",
454 "/usr/local/bin/git",
455 "/opt/homebrew/bin/git",
456 r"C:\Program Files\Git\cmd\git.exe",
458 r"C:\Program Files\Git\bin\git.exe",
459 r"C:\Program Files (x86)\Git\cmd\git.exe",
460 ];
461 for &exe in candidates {
462 let result = std::process::Command::new(exe)
463 .args(["-c", "safe.directory=*"])
464 .args(args)
465 .current_dir(dir)
466 .output()
467 .ok()
468 .filter(|o| o.status.success())
469 .and_then(|o| String::from_utf8(o.stdout).ok())
470 .map(|s| s.trim().to_string())
471 .filter(|s| !s.is_empty());
472 if result.is_some() {
473 return result;
474 }
475 }
476 None
477}
478
479fn get_current_username() -> String {
480 std::env::var("USERNAME")
481 .or_else(|_| std::env::var("USER"))
482 .unwrap_or_else(|_| "unknown".to_string())
483}
484
485fn get_hostname() -> String {
486 std::env::var("COMPUTERNAME")
487 .or_else(|_| std::env::var("HOSTNAME"))
488 .or_else(|_| std::fs::read_to_string("/etc/hostname").map(|s| s.trim().to_string()))
489 .unwrap_or_else(|_| "unknown".to_string())
490}
491
492#[allow(clippy::too_many_arguments)]
494fn walk_root(
495 root: &Path,
496 config: &AppConfig,
497 include_globs: Option<&GlobSet>,
498 exclude_globs: Option<&GlobSet>,
499 enabled_languages: Option<&BTreeSet<Language>>,
500 seen_paths: &mut HashSet<PathBuf>,
501 analyzed: &mut Vec<FileRecord>,
502 skipped: &mut Vec<FileRecord>,
503 warnings: &mut Vec<String>,
504 cancel: Option<&AtomicBool>,
505) -> Result<()> {
506 let mut builder = WalkBuilder::new(root);
507 builder
508 .follow_links(config.discovery.follow_symlinks)
509 .hidden(config.discovery.ignore_hidden_files)
510 .ignore(config.discovery.honor_ignore_files)
511 .parents(config.discovery.honor_ignore_files)
512 .git_ignore(config.discovery.honor_ignore_files)
513 .git_global(config.discovery.honor_ignore_files)
514 .git_exclude(config.discovery.honor_ignore_files);
515
516 let paths = collect_walk_paths(&builder, seen_paths, warnings);
517 if paths.is_empty() {
518 return Ok(());
519 }
520
521 let chunk_results = run_parallel_analysis(
522 &paths,
523 root,
524 config,
525 include_globs,
526 exclude_globs,
527 enabled_languages,
528 cancel,
529 )?;
530 merge_chunk_results(chunk_results, analyzed, skipped, warnings)
531}
532
533fn collect_walk_paths(
534 builder: &WalkBuilder,
535 seen_paths: &mut HashSet<PathBuf>,
536 warnings: &mut Vec<String>,
537) -> Vec<PathBuf> {
538 let mut paths = Vec::new();
539 for entry in builder.build() {
540 let entry = match entry {
541 Ok(e) => e,
542 Err(err) => {
543 warnings.push(format!("discovery warning: {err}"));
544 continue;
545 }
546 };
547 let path = entry.into_path();
548 if path.is_dir() || !seen_paths.insert(path.clone()) {
549 continue;
550 }
551 paths.push(path);
552 }
553 paths
554}
555
556#[allow(clippy::too_many_arguments)]
557fn run_parallel_analysis(
558 paths: &[PathBuf],
559 root: &Path,
560 config: &AppConfig,
561 include_globs: Option<&GlobSet>,
562 exclude_globs: Option<&GlobSet>,
563 enabled_languages: Option<&BTreeSet<Language>>,
564 cancel: Option<&AtomicBool>,
565) -> Result<Vec<Vec<Result<Option<FileRecord>>>>> {
566 let thread_count = std::thread::available_parallelism().map_or(DEFAULT_ANALYSIS_THREADS, |n| {
567 n.get().min(MAX_ANALYSIS_THREADS)
568 });
569 let chunk_size = paths.len().div_ceil(thread_count);
570 std::thread::scope(|s| -> Result<Vec<Vec<Result<Option<FileRecord>>>>> {
571 paths
572 .chunks(chunk_size)
573 .map(|chunk| {
574 s.spawn(move || -> Vec<Result<Option<FileRecord>>> {
575 let mut results = Vec::with_capacity(chunk.len());
576 for path in chunk {
577 if cancel.is_some_and(|c| c.load(Ordering::Relaxed)) {
578 results.push(Err(anyhow::anyhow!("analysis cancelled")));
579 break;
580 }
581 results.push(analyze_candidate_file(
582 path,
583 root,
584 config,
585 include_globs,
586 exclude_globs,
587 enabled_languages,
588 ));
589 }
590 results
591 })
592 })
593 .map(|h| {
594 h.join()
595 .map_err(|_| anyhow::anyhow!("analysis thread panicked"))
596 })
597 .collect()
598 })
599}
600
601fn merge_chunk_results(
602 chunk_results: Vec<Vec<Result<Option<FileRecord>>>>,
603 analyzed: &mut Vec<FileRecord>,
604 skipped: &mut Vec<FileRecord>,
605 warnings: &mut Vec<String>,
606) -> Result<()> {
607 for chunk in chunk_results {
608 for result in chunk {
609 if let Some(record) = result? {
610 push_record(record, analyzed, skipped, warnings);
611 }
612 }
613 }
614 Ok(())
615}
616
617fn process_submodules(config: &AppConfig, analyzed: &mut [FileRecord]) -> Vec<SubmoduleSummary> {
619 let root = config.discovery.root_paths[0]
620 .canonicalize()
621 .unwrap_or_else(|_| config.discovery.root_paths[0].clone());
622 let submodules = detect_submodules(&root);
623 if submodules.is_empty() {
624 return Vec::new();
625 }
626
627 for file in analyzed.iter_mut() {
628 for (name, sub_path) in &submodules {
629 let prefix = sub_path.to_string_lossy().replace('\\', "/");
630 let rel = &file.relative_path;
631 if rel == &prefix || rel.starts_with(&format!("{prefix}/")) {
632 file.submodule = Some(name.clone());
633 break;
634 }
635 }
636 }
637
638 build_submodule_summaries(analyzed, &submodules)
639}
640
641fn assemble_run(
643 config: &AppConfig,
644 runtime_mode: &str,
645 analyzed: Vec<FileRecord>,
646 skipped: Vec<FileRecord>,
647 warnings: Vec<String>,
648 submodule_summaries: Vec<SubmoduleSummary>,
649) -> AnalysisRun {
650 let summary = build_summary(&analyzed, &skipped);
651 let language_summaries = build_language_summaries(&analyzed);
652
653 let first_root = config
654 .discovery
655 .root_paths
656 .first()
657 .map(|p| p.canonicalize().unwrap_or_else(|_| p.clone()));
658 let git = first_root
659 .as_deref()
660 .map(detect_git_for_run)
661 .unwrap_or_default();
662
663 let now = Utc::now();
664 let run_id = {
665 let uuid_suffix = Uuid::new_v4().simple().to_string();
666 format!("{}-{}", now.format("%Y%m%d-%H%M"), uuid_suffix)
667 };
668
669 AnalysisRun {
670 tool: ToolMetadata {
671 name: "sloc".into(),
672 version: env!("CARGO_PKG_VERSION").into(),
673 run_id,
674 timestamp_utc: now,
675 },
676 environment: EnvironmentMetadata {
677 operating_system: std::env::consts::OS.into(),
678 architecture: std::env::consts::ARCH.into(),
679 runtime_mode: runtime_mode.into(),
680 initiator_username: get_current_username(),
681 initiator_hostname: get_hostname(),
682 },
683 effective_configuration: config.clone(),
684 input_roots: config
685 .discovery
686 .root_paths
687 .iter()
688 .map(|p| path_to_string(p))
689 .collect(),
690 summary_totals: summary,
691 totals_by_language: language_summaries,
692 per_file_records: analyzed,
693 skipped_file_records: skipped,
694 warnings,
695 submodule_summaries,
696 git_commit_short: git.commit_short,
697 git_commit_long: git.commit_long,
698 git_branch: git.branch,
699 git_commit_author: git.author,
700 git_tags: git.tags,
701 git_nearest_tag: git.nearest_tag,
702 git_commit_date: git.commit_date,
703 }
704}
705
706#[allow(clippy::too_many_lines)]
711pub fn analyze(
712 config: &AppConfig,
713 runtime_mode: &str,
714 cancel: Option<&AtomicBool>,
715) -> Result<AnalysisRun> {
716 config.validate()?;
717
718 if config.discovery.root_paths.is_empty() {
719 anyhow::bail!("no input paths were provided");
720 }
721
722 let include_globs = compile_globset(&config.discovery.include_globs)?;
723 let exclude_globs = compile_globset(&config.discovery.exclude_globs)?;
724 let enabled_languages = parse_enabled_languages(&config.analysis.enabled_languages)?;
725
726 let mut analyzed = Vec::new();
727 let mut skipped = Vec::new();
728 let mut warnings = Vec::new();
729 let mut seen_paths = HashSet::new();
730
731 for root in &config.discovery.root_paths {
732 if cancel.is_some_and(|c| c.load(Ordering::Relaxed)) {
733 anyhow::bail!("analysis cancelled");
734 }
735
736 let root = root.canonicalize().unwrap_or_else(|_| root.clone());
737
738 if root.is_file() {
739 if let Some(record) = analyze_candidate_file(
740 &root,
741 root.parent().unwrap_or_else(|| Path::new(".")),
742 config,
743 include_globs.as_ref(),
744 exclude_globs.as_ref(),
745 enabled_languages.as_ref(),
746 )? {
747 push_record(record, &mut analyzed, &mut skipped, &mut warnings);
748 }
749 continue;
750 }
751
752 walk_root(
753 &root,
754 config,
755 include_globs.as_ref(),
756 exclude_globs.as_ref(),
757 enabled_languages.as_ref(),
758 &mut seen_paths,
759 &mut analyzed,
760 &mut skipped,
761 &mut warnings,
762 cancel,
763 )?;
764 }
765
766 analyzed.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
767 skipped.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
768
769 let submodule_summaries = if config.discovery.submodule_breakdown {
771 process_submodules(config, &mut analyzed)
772 } else {
773 Vec::new()
774 };
775
776 attach_coverage(config, &mut analyzed, &mut warnings);
777
778 Ok(assemble_run(
779 config,
780 runtime_mode,
781 analyzed,
782 skipped,
783 warnings,
784 submodule_summaries,
785 ))
786}
787
788fn attach_coverage(config: &AppConfig, analyzed: &mut [FileRecord], warnings: &mut Vec<String>) {
789 let Some(cov_path) = coverage::resolve_coverage_file(config.analysis.coverage_file.as_deref())
790 else {
791 return;
792 };
793 match fs::read_to_string(&cov_path) {
794 Ok(content) => {
795 let cov_map = coverage::parse_coverage_auto(&cov_path, &content);
796 for record in analyzed.iter_mut() {
797 record.coverage =
798 coverage::lookup_coverage(&cov_map, &record.relative_path).cloned();
799 }
800 }
801 Err(e) => {
802 warnings.push(format!(
803 "coverage file '{}' could not be read: {e}",
804 cov_path.display()
805 ));
806 }
807 }
808}
809
810fn push_record(
811 record: FileRecord,
812 analyzed: &mut Vec<FileRecord>,
813 skipped: &mut Vec<FileRecord>,
814 warnings: &mut Vec<String>,
815) {
816 warnings.extend(
817 record
818 .warnings
819 .iter()
820 .map(|warning| format!("{}: {warning}", record.relative_path)),
821 );
822
823 match record.status {
824 FileStatus::AnalyzedExact | FileStatus::AnalyzedBestEffort => analyzed.push(record),
825 _ => skipped.push(record),
826 }
827}
828
829#[inline]
831fn skip_with_reason(
832 path: &Path,
833 root: &Path,
834 size: u64,
835 reason: impl Into<String>,
836) -> MetadataPolicyOutcome {
837 MetadataPolicyOutcome::Skip(Box::new(skipped_record(
838 path,
839 root,
840 size,
841 FileStatus::SkippedByPolicy,
842 vec![reason.into()],
843 )))
844}
845
846#[allow(clippy::too_many_arguments)]
850fn check_metadata_policy(
851 path: &Path,
852 root: &Path,
853 relative_path: &str,
854 metadata: &fs::Metadata,
855 config: &AppConfig,
856 include_globs: Option<&GlobSet>,
857 exclude_globs: Option<&GlobSet>,
858) -> MetadataPolicyOutcome {
859 let size = metadata.len();
860
861 if metadata.file_type().is_symlink() && !config.discovery.follow_symlinks {
862 return skip_with_reason(path, root, size, "symlink skipped by policy");
863 }
864 if file_name_eq(path, ".gitignore") {
865 return skip_with_reason(path, root, size, ".gitignore is always excluded");
866 }
867 if is_excluded_dir_path(path, &config.discovery.excluded_directories) {
868 return skip_with_reason(path, root, size, "path matched excluded directory setting");
869 }
870 if size > config.discovery.max_file_size_bytes {
871 return skip_with_reason(
872 path,
873 root,
874 size,
875 format!(
876 "file exceeded max_file_size_bytes ({})",
877 config.discovery.max_file_size_bytes
878 ),
879 );
880 }
881 if let Some(globs) = include_globs {
882 if !globs.is_match(Path::new(relative_path)) && !globs.is_match(path) {
883 return MetadataPolicyOutcome::Exclude;
884 }
885 }
886 if let Some(globs) = exclude_globs {
887 if globs.is_match(Path::new(relative_path)) || globs.is_match(path) {
888 return skip_with_reason(path, root, size, "path matched exclude glob");
889 }
890 }
891 if is_known_lockfile(path) && !config.analysis.include_lockfiles {
892 return skip_with_reason(path, root, size, "lockfile skipped by default policy");
893 }
894
895 MetadataPolicyOutcome::Continue
896}
897
898struct ContentPolicyResult {
899 vendor: bool,
900 generated: bool,
901 minified: bool,
902 skip_record: Option<FileRecord>,
903}
904
905fn check_content_policy(
908 path: &Path,
909 root: &Path,
910 size_bytes: u64,
911 bytes: &[u8],
912 config: &AppConfig,
913) -> ContentPolicyResult {
914 let vendor = is_vendor_path(path);
915 if vendor && config.analysis.vendor_directory_detection {
916 return ContentPolicyResult {
917 vendor,
918 generated: false,
919 minified: false,
920 skip_record: Some(skipped_record(
921 path,
922 root,
923 size_bytes,
924 FileStatus::SkippedByPolicy,
925 vec!["vendor file skipped by policy".into()],
926 )),
927 };
928 }
929
930 let generated = config.analysis.generated_file_detection && looks_generated(path, bytes);
931 if generated {
932 return ContentPolicyResult {
933 vendor,
934 generated,
935 minified: false,
936 skip_record: Some(skipped_record(
937 path,
938 root,
939 size_bytes,
940 FileStatus::SkippedByPolicy,
941 vec!["generated file skipped by policy".into()],
942 )),
943 };
944 }
945
946 let minified = config.analysis.minified_file_detection && looks_minified(path, bytes);
947 if minified {
948 return ContentPolicyResult {
949 vendor,
950 generated,
951 minified,
952 skip_record: Some(skipped_record(
953 path,
954 root,
955 size_bytes,
956 FileStatus::SkippedByPolicy,
957 vec!["minified file skipped by policy".into()],
958 )),
959 };
960 }
961
962 ContentPolicyResult {
963 vendor,
964 generated,
965 minified,
966 skip_record: None,
967 }
968}
969
970fn decode_file_contents(
972 path: &Path,
973 root: &Path,
974 size_bytes: u64,
975 bytes: &[u8],
976 config: &AppConfig,
977) -> Result<Option<(String, String, Vec<String>)>> {
978 if is_binary(bytes) {
979 return match config.analysis.binary_file_behavior {
980 BinaryFileBehavior::Skip => Ok(None),
981 BinaryFileBehavior::Fail => {
982 anyhow::bail!("binary file encountered: {}", path.display())
983 }
984 };
985 }
986
987 match decode_bytes(bytes) {
988 Ok(result) => Ok(Some(result)),
989 Err(err) => match config.analysis.decode_failure_behavior {
990 FailureBehavior::WarnSkip => {
991 let _ = (path, root, size_bytes); Err(anyhow::anyhow!("__decode_warn__: {err}"))
996 }
997 FailureBehavior::Fail => {
998 anyhow::bail!("decode failure for {}: {err}", path.display())
999 }
1000 },
1001 }
1002}
1003
1004#[allow(clippy::too_many_lines)]
1005fn analyze_candidate_file(
1006 path: &Path,
1007 root: &Path,
1008 config: &AppConfig,
1009 include_globs: Option<&GlobSet>,
1010 exclude_globs: Option<&GlobSet>,
1011 enabled_languages: Option<&BTreeSet<Language>>,
1012) -> Result<Option<FileRecord>> {
1013 let metadata = match fs::symlink_metadata(path) {
1014 Ok(metadata) => metadata,
1015 Err(err) => {
1016 return Ok(Some(skipped_record(
1017 path,
1018 root,
1019 0,
1020 FileStatus::ErrorInternal,
1021 vec![format!("failed to read metadata: {err}")],
1022 )));
1023 }
1024 };
1025
1026 let relative_path = relative_path_string(path, root);
1027
1028 match check_metadata_policy(
1030 path,
1031 root,
1032 &relative_path,
1033 &metadata,
1034 config,
1035 include_globs,
1036 exclude_globs,
1037 ) {
1038 MetadataPolicyOutcome::Skip(record) => return Ok(Some(*record)),
1039 MetadataPolicyOutcome::Exclude => return Ok(None),
1040 MetadataPolicyOutcome::Continue => {}
1041 }
1042
1043 let bytes = match fs::read(path) {
1044 Ok(bytes) => bytes,
1045 Err(err) => {
1046 return Ok(Some(skipped_record(
1047 path,
1048 root,
1049 metadata.len(),
1050 FileStatus::ErrorInternal,
1051 vec![format!("failed to read file: {err}")],
1052 )));
1053 }
1054 };
1055
1056 let content_policy = check_content_policy(path, root, metadata.len(), &bytes, config);
1058 if let Some(record) = content_policy.skip_record {
1059 return Ok(Some(record));
1060 }
1061 let (vendor, generated, minified) = (
1062 content_policy.vendor,
1063 content_policy.generated,
1064 content_policy.minified,
1065 );
1066
1067 let (text, encoding, decode_warnings) =
1069 match decode_file_contents(path, root, metadata.len(), &bytes, config) {
1070 Ok(Some(result)) => result,
1071 Ok(None) => {
1072 return Ok(Some(skipped_record(
1073 path,
1074 root,
1075 metadata.len(),
1076 FileStatus::SkippedBinary,
1077 vec!["binary file skipped by default".into()],
1078 )));
1079 }
1080 Err(err) => {
1081 let msg = err.to_string();
1082 if let Some(warn_msg) = msg.strip_prefix("__decode_warn__: ") {
1083 return Ok(Some(skipped_record(
1084 path,
1085 root,
1086 metadata.len(),
1087 FileStatus::SkippedDecodeError,
1088 vec![warn_msg.to_string()],
1089 )));
1090 }
1091 return Err(err);
1092 }
1093 };
1094
1095 let first_line = text.lines().next();
1096 let language = detect_language(
1097 path,
1098 first_line,
1099 &config.analysis.extension_overrides,
1100 config.analysis.shebang_detection,
1101 );
1102
1103 let Some(language) = language else {
1104 return Ok(Some(skipped_record(
1105 path,
1106 root,
1107 metadata.len(),
1108 FileStatus::SkippedUnsupported,
1109 vec!["unsupported or undetected language".into()],
1110 )));
1111 };
1112
1113 if let Some(enabled) = enabled_languages {
1114 if !enabled.contains(&language) {
1115 return Ok(Some(skipped_record(
1116 path,
1117 root,
1118 metadata.len(),
1119 FileStatus::SkippedByPolicy,
1120 vec![format!(
1121 "language {} disabled by configuration",
1122 language.display_name()
1123 )],
1124 )));
1125 }
1126 }
1127
1128 let ieee_opts = AnalysisOptions {
1129 blank_in_block_comment_as_comment: config.analysis.blank_in_block_comment_policy
1130 == BlankInBlockCommentPolicy::CountAsComment,
1131 collapse_continuation_lines: config.analysis.continuation_line_policy
1132 == ContinuationLinePolicy::CollapseToLogical,
1133 };
1134 let analysis = analyze_text(language, &text, ieee_opts);
1135 let effective_counts = compute_effective_counts(
1136 &analysis.raw,
1137 config.analysis.mixed_line_policy,
1138 config.analysis.python_docstrings_as_comments,
1139 config.analysis.count_compiler_directives,
1140 );
1141
1142 let mut warnings = decode_warnings;
1143 warnings.extend(analysis.warnings.clone());
1144
1145 Ok(Some(FileRecord {
1146 path: path_to_string(path),
1147 relative_path,
1148 language: Some(language),
1149 size_bytes: metadata.len(),
1150 detected_encoding: Some(encoding),
1151 raw_line_categories: analysis.raw,
1152 effective_counts,
1153 status: match analysis.parse_mode {
1154 ParseMode::Lexical | ParseMode::TreeSitter => FileStatus::AnalyzedExact,
1155 ParseMode::LexicalBestEffort => FileStatus::AnalyzedBestEffort,
1156 },
1157 warnings,
1158 generated,
1159 minified,
1160 vendor,
1161 parse_mode: Some(analysis.parse_mode),
1162 submodule: None,
1163 coverage: None,
1164 }))
1165}
1166
1167const fn compute_effective_counts(
1168 raw: &RawLineCounts,
1169 mixed_line_policy: MixedLinePolicy,
1170 python_docstrings_as_comments: bool,
1171 count_compiler_directives: bool,
1172) -> EffectiveCounts {
1173 let mut effective = EffectiveCounts {
1174 code_lines: raw.code_only_lines,
1175 comment_lines: raw.single_comment_only_lines + raw.multi_comment_only_lines,
1176 blank_lines: raw.blank_only_lines,
1177 mixed_lines_separate: 0,
1178 };
1179
1180 if python_docstrings_as_comments {
1181 effective.comment_lines += raw.docstring_comment_lines;
1182 } else {
1183 effective.code_lines += raw.docstring_comment_lines;
1184 }
1185
1186 let mixed_total = raw.mixed_code_single_comment_lines + raw.mixed_code_multi_comment_lines;
1187 match mixed_line_policy {
1188 MixedLinePolicy::CodeOnly => effective.code_lines += mixed_total,
1189 MixedLinePolicy::CodeAndComment => {
1190 effective.code_lines += mixed_total;
1191 effective.comment_lines += mixed_total;
1192 }
1193 MixedLinePolicy::CommentOnly => effective.comment_lines += mixed_total,
1194 MixedLinePolicy::SeparateMixedCategory => effective.mixed_lines_separate += mixed_total,
1195 }
1196
1197 if !count_compiler_directives {
1200 effective.code_lines = effective
1201 .code_lines
1202 .saturating_sub(raw.compiler_directive_lines);
1203 }
1204
1205 effective
1206}
1207
1208fn build_summary(analyzed: &[FileRecord], skipped: &[FileRecord]) -> SummaryTotals {
1209 let mut summary = SummaryTotals {
1210 files_considered: (analyzed.len() + skipped.len()) as u64,
1211 files_analyzed: analyzed.len() as u64,
1212 files_skipped: skipped.len() as u64,
1213 ..Default::default()
1214 };
1215
1216 for record in analyzed {
1217 summary.total_physical_lines += record.raw_line_categories.total_physical_lines;
1218 summary.code_lines += record.effective_counts.code_lines;
1219 summary.comment_lines += record.effective_counts.comment_lines;
1220 summary.blank_lines += record.effective_counts.blank_lines;
1221 summary.mixed_lines_separate += record.effective_counts.mixed_lines_separate;
1222 summary.functions += record.raw_line_categories.functions;
1223 summary.classes += record.raw_line_categories.classes;
1224 summary.variables += record.raw_line_categories.variables;
1225 summary.imports += record.raw_line_categories.imports;
1226 summary.test_count += record.raw_line_categories.test_count;
1227 summary.test_assertion_count += record.raw_line_categories.test_assertion_count;
1228 summary.test_suite_count += record.raw_line_categories.test_suite_count;
1229 if let Some(cov) = &record.coverage {
1230 summary.coverage_lines_found += u64::from(cov.lines_found);
1231 summary.coverage_lines_hit += u64::from(cov.lines_hit);
1232 summary.coverage_functions_found += u64::from(cov.functions_found);
1233 summary.coverage_functions_hit += u64::from(cov.functions_hit);
1234 summary.coverage_branches_found += u64::from(cov.branches_found);
1235 summary.coverage_branches_hit += u64::from(cov.branches_hit);
1236 }
1237 }
1238
1239 summary
1240}
1241
1242const fn zeroed_summary(language: Language) -> LanguageSummary {
1244 LanguageSummary {
1245 language,
1246 files: 0,
1247 total_physical_lines: 0,
1248 code_lines: 0,
1249 comment_lines: 0,
1250 blank_lines: 0,
1251 mixed_lines_separate: 0,
1252 functions: 0,
1253 classes: 0,
1254 variables: 0,
1255 imports: 0,
1256 test_count: 0,
1257 test_assertion_count: 0,
1258 test_suite_count: 0,
1259 coverage_lines_found: 0,
1260 coverage_lines_hit: 0,
1261 coverage_functions_found: 0,
1262 coverage_functions_hit: 0,
1263 coverage_branches_found: 0,
1264 coverage_branches_hit: 0,
1265 }
1266}
1267
1268fn accumulate_record_into_summary(entry: &mut LanguageSummary, record: &FileRecord) {
1270 entry.files += 1;
1271 let r = &record.raw_line_categories;
1272 entry.total_physical_lines += r.total_physical_lines;
1273 entry.code_lines += record.effective_counts.code_lines;
1274 entry.comment_lines += record.effective_counts.comment_lines;
1275 entry.blank_lines += record.effective_counts.blank_lines;
1276 entry.mixed_lines_separate += record.effective_counts.mixed_lines_separate;
1277 entry.functions += r.functions;
1278 entry.classes += r.classes;
1279 entry.variables += r.variables;
1280 entry.imports += r.imports;
1281 entry.test_count += r.test_count;
1282 entry.test_assertion_count += r.test_assertion_count;
1283 entry.test_suite_count += r.test_suite_count;
1284 if let Some(cov) = &record.coverage {
1285 entry.coverage_lines_found += u64::from(cov.lines_found);
1286 entry.coverage_lines_hit += u64::from(cov.lines_hit);
1287 entry.coverage_functions_found += u64::from(cov.functions_found);
1288 entry.coverage_functions_hit += u64::from(cov.functions_hit);
1289 entry.coverage_branches_found += u64::from(cov.branches_found);
1290 entry.coverage_branches_hit += u64::from(cov.branches_hit);
1291 }
1292}
1293
1294fn build_language_summaries(analyzed: &[FileRecord]) -> Vec<LanguageSummary> {
1295 let mut by_language: BTreeMap<Language, LanguageSummary> = BTreeMap::new();
1296 for record in analyzed {
1297 let Some(language) = record.language else {
1298 continue;
1299 };
1300 let entry = by_language
1301 .entry(language)
1302 .or_insert_with(|| zeroed_summary(language));
1303 accumulate_record_into_summary(entry, record);
1304 }
1305 by_language.into_values().collect()
1306}
1307
1308fn skipped_record(
1309 path: &Path,
1310 root: &Path,
1311 size_bytes: u64,
1312 status: FileStatus,
1313 warnings: Vec<String>,
1314) -> FileRecord {
1315 FileRecord {
1316 path: path_to_string(path),
1317 relative_path: relative_path_string(path, root),
1318 language: None,
1319 size_bytes,
1320 detected_encoding: None,
1321 raw_line_categories: RawLineCounts::default(),
1322 effective_counts: EffectiveCounts::default(),
1323 status,
1324 warnings,
1325 generated: false,
1326 minified: false,
1327 vendor: false,
1328 parse_mode: None,
1329 submodule: None,
1330 coverage: None,
1331 }
1332}
1333
1334fn relative_path_string(path: &Path, root: &Path) -> String {
1335 path.strip_prefix(root)
1336 .unwrap_or(path)
1337 .to_string_lossy()
1338 .replace('\\', "/")
1339}
1340
1341fn path_to_string(path: &Path) -> String {
1342 path.to_string_lossy().replace('\\', "/")
1343}
1344
1345#[must_use]
1347pub fn detect_submodules(root: &Path) -> Vec<(String, PathBuf)> {
1348 let gitmodules = root.join(".gitmodules");
1349 if !gitmodules.is_file() {
1350 return Vec::new();
1351 }
1352 let Ok(content) = fs::read_to_string(&gitmodules) else {
1353 return Vec::new();
1354 };
1355
1356 let mut result = Vec::new();
1357 let mut current_name: Option<String> = None;
1358 let mut current_path: Option<PathBuf> = None;
1359
1360 for line in content.lines() {
1361 let trimmed = line.trim();
1362 if trimmed.starts_with("[submodule \"") && trimmed.ends_with("\"]") {
1363 if let (Some(name), Some(path)) = (current_name.take(), current_path.take()) {
1364 result.push((name, path));
1365 }
1366 let name = trimmed["[submodule \"".len()..trimmed.len() - 2].to_string();
1367 current_name = Some(name);
1368 } else if let Some(rest) = trimmed.strip_prefix("path") {
1369 if let Some(eq_pos) = rest.find('=') {
1370 let path_str = rest[eq_pos + 1..].trim();
1371 current_path = Some(PathBuf::from(path_str));
1372 }
1373 }
1374 }
1375 if let (Some(name), Some(path)) = (current_name, current_path) {
1376 result.push((name, path));
1377 }
1378
1379 result
1380}
1381
1382fn build_submodule_summaries(
1383 analyzed: &[FileRecord],
1384 submodules: &[(String, PathBuf)],
1385) -> Vec<SubmoduleSummary> {
1386 submodules
1387 .iter()
1388 .map(|(name, path)| {
1389 let files: Vec<&FileRecord> = analyzed
1390 .iter()
1391 .filter(|f| f.submodule.as_deref() == Some(name.as_str()))
1392 .collect();
1393
1394 let files_analyzed = files.len() as u64;
1395 let total_physical_lines = files
1396 .iter()
1397 .map(|f| f.raw_line_categories.total_physical_lines)
1398 .sum();
1399 let code_lines = files.iter().map(|f| f.effective_counts.code_lines).sum();
1400 let comment_lines = files.iter().map(|f| f.effective_counts.comment_lines).sum();
1401 let blank_lines = files.iter().map(|f| f.effective_counts.blank_lines).sum();
1402 let language_summaries = build_language_summaries_from_slice(&files);
1403
1404 SubmoduleSummary {
1405 name: name.clone(),
1406 relative_path: path.to_string_lossy().replace('\\', "/"),
1407 files_analyzed,
1408 total_physical_lines,
1409 code_lines,
1410 comment_lines,
1411 blank_lines,
1412 language_summaries,
1413 }
1414 })
1415 .filter(|s| s.files_analyzed > 0)
1416 .collect()
1417}
1418
1419fn build_language_summaries_from_slice(files: &[&FileRecord]) -> Vec<LanguageSummary> {
1420 let mut map: BTreeMap<String, LanguageSummary> = BTreeMap::new();
1421 for file in files {
1422 let Some(lang) = file.language else { continue };
1423 let entry = map
1424 .entry(lang.display_name().to_string())
1425 .or_insert_with(|| zeroed_summary(lang));
1426 accumulate_record_into_summary(entry, file);
1427 }
1428 map.into_values().collect()
1429}
1430
1431fn file_name_eq(path: &Path, expected: &str) -> bool {
1432 path.file_name()
1433 .and_then(|name| name.to_str())
1434 .is_some_and(|name| name == expected)
1435}
1436
1437fn is_excluded_dir_path(path: &Path, excluded_dirs: &[String]) -> bool {
1438 path.components().any(|component| {
1439 component
1440 .as_os_str()
1441 .to_str()
1442 .is_some_and(|part| excluded_dirs.iter().any(|excluded| excluded == part))
1443 })
1444}
1445
1446fn is_vendor_path(path: &Path) -> bool {
1447 path.components().any(|component| {
1448 component
1449 .as_os_str()
1450 .to_str()
1451 .is_some_and(|part| matches!(part, "vendor" | "node_modules" | "packages"))
1452 })
1453}
1454
1455fn is_known_lockfile(path: &Path) -> bool {
1456 path.file_name()
1457 .and_then(|name| name.to_str())
1458 .is_some_and(|name| {
1459 matches!(
1460 name,
1461 "Cargo.lock"
1462 | "package-lock.json"
1463 | "yarn.lock"
1464 | "pnpm-lock.yaml"
1465 | "Pipfile.lock"
1466 | "poetry.lock"
1467 | "composer.lock"
1468 )
1469 })
1470}
1471
1472fn looks_generated(path: &Path, bytes: &[u8]) -> bool {
1473 let file_name = path
1474 .file_name()
1475 .and_then(|name| name.to_str())
1476 .unwrap_or_default();
1477 if file_name.contains(".generated.") || file_name.contains(".g.") {
1478 return true;
1479 }
1480
1481 let sample = String::from_utf8_lossy(&bytes[..bytes.len().min(GENERATED_SAMPLE_BYTES)])
1482 .to_ascii_lowercase();
1483 sample.contains("@generated") || sample.contains("generated by")
1484}
1485
1486fn looks_minified(path: &Path, bytes: &[u8]) -> bool {
1487 let file_name = path
1488 .file_name()
1489 .and_then(|name| name.to_str())
1490 .unwrap_or_default();
1491 if file_name.contains(".min.") {
1492 return true;
1493 }
1494
1495 let sample = String::from_utf8_lossy(&bytes[..bytes.len().min(MINIFIED_SAMPLE_BYTES)]);
1496 let longest_line = sample.lines().map(str::len).max().unwrap_or(0);
1497 let whitespace = sample.chars().filter(|c| c.is_whitespace()).count();
1498 longest_line > MINIFIED_LINE_THRESHOLD && whitespace * 100 < sample.len().max(1)
1499}
1500
1501fn is_binary(bytes: &[u8]) -> bool {
1502 if bytes.starts_with(&[0xEF, 0xBB, 0xBF])
1503 || bytes.starts_with(&[0xFF, 0xFE])
1504 || bytes.starts_with(&[0xFE, 0xFF])
1505 {
1506 return false;
1507 }
1508
1509 let sample = &bytes[..bytes.len().min(BINARY_SAMPLE_BYTES)];
1510 sample.contains(&0)
1511}
1512
1513fn decode_utf16_bom(
1516 bom_stripped: &[u8],
1517 encoding: &'static encoding_rs::Encoding,
1518 label: &str,
1519) -> (String, String, Vec<String>) {
1520 let (cow, _, had_errors) = encoding.decode(bom_stripped);
1521 let mut warnings = Vec::new();
1522 if had_errors {
1523 warnings.push(format!("{label} decode contained replacement characters"));
1524 }
1525 (cow.into_owned(), label.into(), warnings)
1526}
1527
1528fn decode_bytes(bytes: &[u8]) -> std::result::Result<(String, String, Vec<String>), String> {
1529 if bytes.starts_with(&[0xEF, 0xBB, 0xBF]) {
1530 let text = String::from_utf8(bytes[3..].to_vec()).map_err(|err| err.to_string())?;
1531 return Ok((text, "utf-8-bom".into(), vec![]));
1532 }
1533 if bytes.starts_with(&[0xFF, 0xFE]) {
1534 return Ok(decode_utf16_bom(&bytes[2..], UTF_16LE, "utf-16le"));
1535 }
1536 if bytes.starts_with(&[0xFE, 0xFF]) {
1537 return Ok(decode_utf16_bom(&bytes[2..], UTF_16BE, "utf-16be"));
1538 }
1539
1540 #[allow(clippy::option_if_let_else)]
1542 if let Ok(text) = String::from_utf8(bytes.to_vec()) {
1543 Ok((text, "utf-8".into(), vec![]))
1544 } else {
1545 let (cow, _, had_errors) = WINDOWS_1252.decode(bytes);
1546 let mut warnings = vec!["decoded using windows-1252 fallback".into()];
1547 if had_errors {
1548 warnings.push("fallback decode contained replacement characters".into());
1549 }
1550 Ok((cow.into_owned(), "windows-1252".into(), warnings))
1551 }
1552}
1553
1554fn compile_globset(patterns: &[String]) -> Result<Option<GlobSet>> {
1555 if patterns.is_empty() {
1556 return Ok(None);
1557 }
1558
1559 let mut builder = GlobSetBuilder::new();
1560 for pattern in patterns {
1561 builder
1562 .add(Glob::new(pattern).with_context(|| format!("invalid glob pattern: {pattern}"))?);
1563 }
1564 Ok(Some(
1565 builder.build().context("failed to compile glob filters")?,
1566 ))
1567}
1568
1569fn parse_enabled_languages(enabled: &[String]) -> Result<Option<BTreeSet<Language>>> {
1570 if enabled.is_empty() {
1571 return Ok(None);
1572 }
1573
1574 let supported = supported_languages();
1575 let mut set = BTreeSet::new();
1576 for name in enabled {
1577 let language = Language::from_name(name)
1578 .with_context(|| format!("unsupported language in config: {name}"))?;
1579 if !supported.contains(&language) {
1580 anyhow::bail!("language {name} is not supported in this build");
1581 }
1582 set.insert(language);
1583 }
1584 Ok(Some(set))
1585}
1586
1587pub fn write_json(run: &AnalysisRun, output_path: &Path) -> Result<()> {
1591 let json = serde_json::to_string_pretty(run).context("failed to serialize analysis run")?;
1592 fs::write(output_path, json)
1593 .with_context(|| format!("failed to write JSON output to {}", output_path.display()))
1594}
1595
1596pub fn read_json(path: &Path) -> Result<AnalysisRun> {
1600 let contents = fs::read_to_string(path)
1601 .with_context(|| format!("failed to read result file {}", path.display()))?;
1602 serde_json::from_str(&contents)
1603 .with_context(|| format!("failed to parse JSON result {}", path.display()))
1604}
1605
1606#[cfg(test)]
1607mod tests {
1608 use super::*;
1609
1610 #[test]
1611 fn effective_counts_respect_code_only_policy() {
1612 let raw = RawLineCounts {
1613 code_only_lines: 2,
1614 single_comment_only_lines: 1,
1615 mixed_code_single_comment_lines: 3,
1616 docstring_comment_lines: 2,
1617 ..RawLineCounts::default()
1618 };
1619 let counts = compute_effective_counts(&raw, MixedLinePolicy::CodeOnly, true, true);
1620 assert_eq!(counts.code_lines, 5);
1621 assert_eq!(counts.comment_lines, 3);
1622 }
1623
1624 #[test]
1625 fn effective_counts_can_separate_mixed() {
1626 let raw = RawLineCounts {
1627 mixed_code_single_comment_lines: 2,
1628 mixed_code_multi_comment_lines: 1,
1629 ..RawLineCounts::default()
1630 };
1631 let counts =
1632 compute_effective_counts(&raw, MixedLinePolicy::SeparateMixedCategory, true, true);
1633 assert_eq!(counts.mixed_lines_separate, 3);
1634 assert_eq!(counts.code_lines, 0);
1635 assert_eq!(counts.comment_lines, 0);
1636 }
1637
1638 #[test]
1639 fn windows_1252_fallback_decodes() {
1640 let bytes = vec![0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x96, 0x57];
1641 let (text, encoding, warnings) = decode_bytes(&bytes).unwrap();
1642 assert_eq!(encoding, "windows-1252");
1643 assert!(text.contains('–'));
1644 assert!(!warnings.is_empty());
1645 }
1646}