1#![allow(clippy::multiple_crate_versions)]
4
5pub mod baseline;
6pub mod coverage;
7pub mod delta;
8pub mod history;
9pub use baseline::{check_against_baseline, resolve_baselines_path, BaselineEntry, BaselineStore};
10pub use coverage::{aggregate_line_coverage, lookup_coverage, parse_lcov, FileCoverage};
11pub use delta::{compute_delta, FileChangeStatus, FileDelta, ScanComparison, SummaryDelta};
12pub use history::{RegistryEntry, ScanRegistry, ScanSummarySnapshot, WatchedDirsStore};
13
14use std::collections::{BTreeMap, BTreeSet, HashSet};
15use std::fs;
16use std::path::{Path, PathBuf};
17use std::sync::atomic::{AtomicBool, Ordering};
18
19use anyhow::{Context, Result};
20use chrono::{DateTime, Utc};
21use encoding_rs::{UTF_16BE, UTF_16LE, WINDOWS_1252};
22use globset::{Glob, GlobSet, GlobSetBuilder};
23use ignore::WalkBuilder;
24use serde::{Deserialize, Serialize};
25use uuid::Uuid;
26
27use sloc_config::{
28 AppConfig, BinaryFileBehavior, BlankInBlockCommentPolicy, ContinuationLinePolicy,
29 FailureBehavior, MixedLinePolicy,
30};
31use sloc_languages::{
32 analyze_text, detect_language, supported_languages, AnalysisOptions, Language, ParseMode,
33 RawLineCounts,
34};
35
36const MAX_ANALYSIS_THREADS: usize = 16;
40const DEFAULT_ANALYSIS_THREADS: usize = 4;
42const GENERATED_SAMPLE_BYTES: usize = 1024;
44const MINIFIED_SAMPLE_BYTES: usize = 4096;
46const MINIFIED_LINE_THRESHOLD: usize = 2000;
48const BINARY_SAMPLE_BYTES: usize = 8192;
50
51enum MetadataPolicyOutcome {
53 Skip(Box<FileRecord>),
55 Exclude,
57 Continue,
59}
60
61#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
62#[serde(rename_all = "snake_case")]
63pub enum FileStatus {
64 AnalyzedExact,
65 AnalyzedBestEffort,
66 SkippedBinary,
67 SkippedDecodeError,
68 SkippedUnsupported,
69 SkippedByPolicy,
70 ErrorInternal,
71}
72
73#[derive(Debug, Clone, Serialize, Deserialize, Default)]
74pub struct EffectiveCounts {
75 pub code_lines: u64,
76 pub comment_lines: u64,
77 pub blank_lines: u64,
78 pub mixed_lines_separate: u64,
79}
80
81#[derive(Debug, Clone, Serialize, Deserialize)]
82pub struct ToolMetadata {
83 pub name: String,
84 pub version: String,
85 pub run_id: String,
86 pub timestamp_utc: DateTime<Utc>,
87}
88
89#[derive(Debug, Clone, Serialize, Deserialize)]
90pub struct EnvironmentMetadata {
91 pub operating_system: String,
92 pub architecture: String,
93 pub runtime_mode: String,
94 pub initiator_username: String,
95 pub initiator_hostname: String,
96}
97
98#[derive(Debug, Clone, Serialize, Deserialize, Default)]
99pub struct SummaryTotals {
100 pub files_considered: u64,
101 pub files_analyzed: u64,
102 pub files_skipped: u64,
103 pub total_physical_lines: u64,
104 pub code_lines: u64,
105 pub comment_lines: u64,
106 pub blank_lines: u64,
107 pub mixed_lines_separate: u64,
108 #[serde(default)]
109 pub functions: u64,
110 #[serde(default)]
111 pub classes: u64,
112 #[serde(default)]
113 pub variables: u64,
114 #[serde(default)]
115 pub imports: u64,
116 #[serde(default)]
117 pub test_count: u64,
118 #[serde(default)]
120 pub test_assertion_count: u64,
121 #[serde(default)]
123 pub test_suite_count: u64,
124 #[serde(default)]
126 pub coverage_lines_found: u64,
127 #[serde(default)]
128 pub coverage_lines_hit: u64,
129 #[serde(default)]
130 pub coverage_functions_found: u64,
131 #[serde(default)]
132 pub coverage_functions_hit: u64,
133 #[serde(default)]
134 pub coverage_branches_found: u64,
135 #[serde(default)]
136 pub coverage_branches_hit: u64,
137}
138
139#[derive(Debug, Clone, Serialize, Deserialize)]
140pub struct LanguageSummary {
141 pub language: Language,
142 pub files: u64,
143 pub total_physical_lines: u64,
144 pub code_lines: u64,
145 pub comment_lines: u64,
146 pub blank_lines: u64,
147 pub mixed_lines_separate: u64,
148 #[serde(default)]
149 pub functions: u64,
150 #[serde(default)]
151 pub classes: u64,
152 #[serde(default)]
153 pub variables: u64,
154 #[serde(default)]
155 pub imports: u64,
156 #[serde(default)]
157 pub test_count: u64,
158 #[serde(default)]
159 pub test_assertion_count: u64,
160 #[serde(default)]
161 pub test_suite_count: u64,
162 #[serde(default)]
163 pub coverage_lines_found: u64,
164 #[serde(default)]
165 pub coverage_lines_hit: u64,
166 #[serde(default)]
167 pub coverage_functions_found: u64,
168 #[serde(default)]
169 pub coverage_functions_hit: u64,
170 #[serde(default)]
171 pub coverage_branches_found: u64,
172 #[serde(default)]
173 pub coverage_branches_hit: u64,
174}
175
176#[derive(Debug, Clone, Serialize, Deserialize)]
177pub struct FileRecord {
178 pub path: String,
179 pub relative_path: String,
180 pub language: Option<Language>,
181 pub size_bytes: u64,
182 pub detected_encoding: Option<String>,
183 pub raw_line_categories: RawLineCounts,
184 pub effective_counts: EffectiveCounts,
185 pub status: FileStatus,
186 pub warnings: Vec<String>,
187 pub generated: bool,
188 pub minified: bool,
189 pub vendor: bool,
190 pub parse_mode: Option<ParseMode>,
191 #[serde(skip_serializing_if = "Option::is_none")]
192 pub submodule: Option<String>,
193 #[serde(default, skip_serializing_if = "Option::is_none")]
195 pub coverage: Option<FileCoverage>,
196}
197
198#[derive(Debug, Clone, Serialize, Deserialize)]
200pub struct SubmoduleSummary {
201 pub name: String,
202 pub relative_path: String,
203 pub files_analyzed: u64,
204 pub total_physical_lines: u64,
205 pub code_lines: u64,
206 pub comment_lines: u64,
207 pub blank_lines: u64,
208 pub language_summaries: Vec<LanguageSummary>,
209}
210
211#[derive(Debug, Clone, Serialize, Deserialize)]
212pub struct AnalysisRun {
213 pub tool: ToolMetadata,
214 pub environment: EnvironmentMetadata,
215 pub effective_configuration: AppConfig,
216 pub input_roots: Vec<String>,
217 pub summary_totals: SummaryTotals,
218 pub totals_by_language: Vec<LanguageSummary>,
219 pub per_file_records: Vec<FileRecord>,
220 pub skipped_file_records: Vec<FileRecord>,
221 pub warnings: Vec<String>,
222 #[serde(default, skip_serializing_if = "Vec::is_empty")]
224 pub submodule_summaries: Vec<SubmoduleSummary>,
225 #[serde(default, skip_serializing_if = "Option::is_none")]
227 pub git_commit_short: Option<String>,
228 #[serde(default, skip_serializing_if = "Option::is_none")]
230 pub git_commit_long: Option<String>,
231 #[serde(default, skip_serializing_if = "Option::is_none")]
233 pub git_branch: Option<String>,
234 #[serde(default, skip_serializing_if = "Option::is_none")]
236 pub git_commit_author: Option<String>,
237 #[serde(default, skip_serializing_if = "Option::is_none")]
239 pub git_tags: Option<String>,
240 #[serde(default, skip_serializing_if = "Option::is_none")]
242 pub git_nearest_tag: Option<String>,
243 #[serde(default, skip_serializing_if = "Option::is_none")]
245 pub git_commit_date: Option<String>,
246}
247
248#[derive(Default)]
249struct GitInfo {
250 commit_short: Option<String>,
251 commit_long: Option<String>,
252 branch: Option<String>,
253 author: Option<String>,
254 tags: Option<String>,
255 nearest_tag: Option<String>,
256 commit_date: Option<String>,
257}
258
259fn find_git_dir(start: &Path) -> Option<PathBuf> {
263 let mut current = Some(start);
264 while let Some(dir) = current {
265 let candidate = dir.join(".git");
266 if candidate.is_dir() {
267 return Some(candidate);
268 }
269 if candidate.is_file() {
270 if let Some(resolved) = resolve_git_file_pointer(&candidate, dir) {
271 return Some(resolved);
272 }
273 }
274 current = dir.parent();
275 }
276 None
277}
278
279fn resolve_git_file_pointer(file: &Path, base_dir: &Path) -> Option<PathBuf> {
283 let content = fs::read_to_string(file).ok()?;
284 let ptr = content.trim().strip_prefix("gitdir: ")?;
285 let ptr_native = ptr.replace('/', std::path::MAIN_SEPARATOR_STR);
288 let resolved = if Path::new(&ptr_native).is_absolute() {
289 PathBuf::from(&ptr_native)
290 } else {
291 base_dir.join(&ptr_native)
292 };
293 let final_path = resolved.canonicalize().unwrap_or(resolved);
297 if final_path.is_dir() {
298 Some(final_path)
299 } else {
300 None
301 }
302}
303
304fn resolve_ref(git_dir: &Path, refname: &str) -> Option<String> {
307 let ref_path = refname
311 .split('/')
312 .fold(git_dir.to_path_buf(), |p, c| p.join(c));
313 if ref_path.exists() {
314 let sha = fs::read_to_string(&ref_path)
315 .ok()
316 .map(|s| s.trim().to_string())
317 .filter(|s| s.len() >= 40 && s.chars().all(|c| c.is_ascii_hexdigit()));
318 if sha.is_some() {
319 return sha;
320 }
321 }
322 let packed = fs::read_to_string(git_dir.join("packed-refs")).ok()?;
326 for line in packed.lines() {
327 if line.starts_with('#') || line.starts_with('^') {
328 continue;
329 }
330 let mut cols = line.splitn(2, ' ');
331 let sha = cols.next()?;
332 let name = cols.next()?.trim();
333 if name == refname {
334 return Some(sha.to_string());
335 }
336 }
337 None
338}
339
340fn parse_last_reflog_entry(git_dir: &Path) -> (Option<String>, Option<String>) {
346 let log_path = git_dir.join("logs").join("HEAD");
347 let Ok(content) = fs::read_to_string(&log_path) else {
348 return (None, None);
349 };
350 let Some(last) = content.lines().rfind(|l| !l.trim().is_empty()) else {
351 return (None, None);
352 };
353
354 let Some(after_shas) = last.splitn(3, ' ').nth(2) else {
357 return (None, None);
358 };
359
360 let author = after_shas.find(" <").map(|i| after_shas[..i].to_string());
362
363 let date = (|| {
365 use chrono::TimeZone as _;
366 let close = after_shas.find("> ")?;
367 let rest = after_shas[close + 2..].trim_start();
368 let mut tokens = rest.splitn(3, ' ');
369 let unix_str = tokens.next()?;
370 let offset_str = tokens.next().map(|s| s.split('\t').next().unwrap_or(s))?;
371 let ts: i64 = unix_str.parse().ok()?;
372 let dt = chrono::Utc.timestamp_opt(ts, 0).single()?;
373 let tz_display = if offset_str.len() == 5 {
375 format!("{}:{}", &offset_str[..3], &offset_str[3..])
376 } else {
377 offset_str.to_string()
378 };
379 Some(format!("{}{}", dt.format("%Y-%m-%dT%H:%M:%S"), tz_display))
380 })();
381
382 (author, date)
383}
384
385fn detect_git_for_run(project_path: &Path) -> GitInfo {
389 let Some(git_dir) = find_git_dir(project_path) else {
390 return GitInfo::default();
391 };
392
393 let head_raw = match fs::read_to_string(git_dir.join("HEAD")) {
394 Ok(s) => s.trim().to_string(),
395 Err(_) => return GitInfo::default(),
396 };
397
398 let (branch, commit_long) = head_raw.strip_prefix("ref: ").map_or_else(
399 || {
400 if head_raw.len() >= 40 && head_raw.chars().all(|c| c.is_ascii_hexdigit()) {
401 (None, Some(head_raw[..40].to_string()))
403 } else {
404 (None, None)
405 }
406 },
407 |refname| {
408 let branch = refname
409 .strip_prefix("refs/heads/")
410 .map(|b| b.trim().to_string());
411 let sha = resolve_ref(&git_dir, refname.trim());
412 (branch, sha)
413 },
414 );
415
416 let commit_short = commit_long
417 .as_deref()
418 .map(|s| s.chars().take(7).collect::<String>());
419
420 let (author, commit_date) = parse_last_reflog_entry(&git_dir);
421
422 let tags = run_git_cmd(project_path, &["tag", "--points-at", "HEAD"]).map(|t| {
425 t.lines()
426 .filter(|l| !l.is_empty())
427 .collect::<Vec<_>>()
428 .join(", ")
429 });
430 let nearest_tag = run_git_cmd(project_path, &["describe", "--tags", "--abbrev=0", "HEAD"]);
431
432 GitInfo {
433 commit_short,
434 commit_long,
435 branch,
436 author,
437 tags,
438 nearest_tag,
439 commit_date,
440 }
441}
442
443fn run_git_cmd(dir: &Path, args: &[&str]) -> Option<String> {
446 let candidates: &[&str] = &[
450 "git",
452 "/usr/bin/git",
454 "/usr/local/bin/git",
455 "/opt/homebrew/bin/git",
456 r"C:\Program Files\Git\cmd\git.exe",
458 r"C:\Program Files\Git\bin\git.exe",
459 r"C:\Program Files (x86)\Git\cmd\git.exe",
460 ];
461 for &exe in candidates {
462 let result = std::process::Command::new(exe)
463 .args(["-c", "safe.directory=*"])
464 .args(args)
465 .current_dir(dir)
466 .output()
467 .ok()
468 .filter(|o| o.status.success())
469 .and_then(|o| String::from_utf8(o.stdout).ok())
470 .map(|s| s.trim().to_string())
471 .filter(|s| !s.is_empty());
472 if result.is_some() {
473 return result;
474 }
475 }
476 None
477}
478
479fn get_current_username() -> String {
480 std::env::var("USERNAME")
481 .or_else(|_| std::env::var("USER"))
482 .unwrap_or_else(|_| "unknown".to_string())
483}
484
485fn get_hostname() -> String {
486 std::env::var("COMPUTERNAME")
487 .or_else(|_| std::env::var("HOSTNAME"))
488 .or_else(|_| std::fs::read_to_string("/etc/hostname").map(|s| s.trim().to_string()))
489 .unwrap_or_else(|_| "unknown".to_string())
490}
491
492#[allow(clippy::too_many_arguments)]
494fn walk_root(
495 root: &Path,
496 config: &AppConfig,
497 include_globs: Option<&GlobSet>,
498 exclude_globs: Option<&GlobSet>,
499 enabled_languages: Option<&BTreeSet<Language>>,
500 seen_paths: &mut HashSet<PathBuf>,
501 analyzed: &mut Vec<FileRecord>,
502 skipped: &mut Vec<FileRecord>,
503 warnings: &mut Vec<String>,
504 cancel: Option<&AtomicBool>,
505) -> Result<()> {
506 let mut builder = WalkBuilder::new(root);
507 builder
508 .follow_links(config.discovery.follow_symlinks)
509 .hidden(config.discovery.ignore_hidden_files)
510 .ignore(config.discovery.honor_ignore_files)
511 .parents(config.discovery.honor_ignore_files)
512 .git_ignore(config.discovery.honor_ignore_files)
513 .git_global(config.discovery.honor_ignore_files)
514 .git_exclude(config.discovery.honor_ignore_files);
515
516 let paths = collect_walk_paths(&builder, seen_paths, warnings);
517 if paths.is_empty() {
518 return Ok(());
519 }
520
521 let chunk_results = run_parallel_analysis(
522 &paths,
523 root,
524 config,
525 include_globs,
526 exclude_globs,
527 enabled_languages,
528 cancel,
529 )?;
530 merge_chunk_results(chunk_results, analyzed, skipped, warnings)
531}
532
533fn collect_walk_paths(
534 builder: &WalkBuilder,
535 seen_paths: &mut HashSet<PathBuf>,
536 warnings: &mut Vec<String>,
537) -> Vec<PathBuf> {
538 let mut paths = Vec::new();
539 for entry in builder.build() {
540 let entry = match entry {
541 Ok(e) => e,
542 Err(err) => {
543 warnings.push(format!("discovery warning: {err}"));
544 continue;
545 }
546 };
547 let path = entry.into_path();
548 if path.is_dir() || !seen_paths.insert(path.clone()) {
549 continue;
550 }
551 paths.push(path);
552 }
553 paths
554}
555
556#[allow(clippy::too_many_arguments)]
557fn run_parallel_analysis(
558 paths: &[PathBuf],
559 root: &Path,
560 config: &AppConfig,
561 include_globs: Option<&GlobSet>,
562 exclude_globs: Option<&GlobSet>,
563 enabled_languages: Option<&BTreeSet<Language>>,
564 cancel: Option<&AtomicBool>,
565) -> Result<Vec<Vec<Result<Option<FileRecord>>>>> {
566 let thread_count = std::thread::available_parallelism().map_or(DEFAULT_ANALYSIS_THREADS, |n| {
567 n.get().min(MAX_ANALYSIS_THREADS)
568 });
569 let chunk_size = paths.len().div_ceil(thread_count);
570 std::thread::scope(|s| -> Result<Vec<Vec<Result<Option<FileRecord>>>>> {
571 paths
572 .chunks(chunk_size)
573 .map(|chunk| {
574 s.spawn(move || -> Vec<Result<Option<FileRecord>>> {
575 let mut results = Vec::with_capacity(chunk.len());
576 for path in chunk {
577 if cancel.is_some_and(|c| c.load(Ordering::Relaxed)) {
578 results.push(Err(anyhow::anyhow!("analysis cancelled")));
579 break;
580 }
581 results.push(analyze_candidate_file(
582 path,
583 root,
584 config,
585 include_globs,
586 exclude_globs,
587 enabled_languages,
588 ));
589 }
590 results
591 })
592 })
593 .map(|h| {
594 h.join()
595 .map_err(|_| anyhow::anyhow!("analysis thread panicked"))
596 })
597 .collect()
598 })
599}
600
601fn merge_chunk_results(
602 chunk_results: Vec<Vec<Result<Option<FileRecord>>>>,
603 analyzed: &mut Vec<FileRecord>,
604 skipped: &mut Vec<FileRecord>,
605 warnings: &mut Vec<String>,
606) -> Result<()> {
607 for chunk in chunk_results {
608 for result in chunk {
609 if let Some(record) = result? {
610 push_record(record, analyzed, skipped, warnings);
611 }
612 }
613 }
614 Ok(())
615}
616
617fn process_submodules(config: &AppConfig, analyzed: &mut [FileRecord]) -> Vec<SubmoduleSummary> {
619 let root = config.discovery.root_paths[0]
620 .canonicalize()
621 .unwrap_or_else(|_| config.discovery.root_paths[0].clone());
622 let submodules = detect_submodules(&root);
623 if submodules.is_empty() {
624 return Vec::new();
625 }
626
627 for file in analyzed.iter_mut() {
628 for (name, sub_path) in &submodules {
629 let prefix = sub_path.to_string_lossy().replace('\\', "/");
630 let rel = &file.relative_path;
631 if rel == &prefix || rel.starts_with(&format!("{prefix}/")) {
632 file.submodule = Some(name.clone());
633 break;
634 }
635 }
636 }
637
638 build_submodule_summaries(analyzed, &submodules)
639}
640
641fn assemble_run(
643 config: &AppConfig,
644 runtime_mode: &str,
645 analyzed: Vec<FileRecord>,
646 skipped: Vec<FileRecord>,
647 warnings: Vec<String>,
648 submodule_summaries: Vec<SubmoduleSummary>,
649) -> AnalysisRun {
650 let summary = build_summary(&analyzed, &skipped);
651 let language_summaries = build_language_summaries(&analyzed);
652
653 let first_root = config
654 .discovery
655 .root_paths
656 .first()
657 .map(|p| p.canonicalize().unwrap_or_else(|_| p.clone()));
658 let git = first_root
659 .as_deref()
660 .map(detect_git_for_run)
661 .unwrap_or_default();
662
663 let now = Utc::now();
664 let run_id = {
665 let uuid_suffix = Uuid::new_v4().simple().to_string();
666 format!("{}-{}", now.format("%Y%m%d-%H%M"), uuid_suffix)
667 };
668
669 AnalysisRun {
670 tool: ToolMetadata {
671 name: "sloc".into(),
672 version: env!("CARGO_PKG_VERSION").into(),
673 run_id,
674 timestamp_utc: now,
675 },
676 environment: EnvironmentMetadata {
677 operating_system: std::env::consts::OS.into(),
678 architecture: std::env::consts::ARCH.into(),
679 runtime_mode: runtime_mode.into(),
680 initiator_username: get_current_username(),
681 initiator_hostname: get_hostname(),
682 },
683 effective_configuration: config.clone(),
684 input_roots: config
685 .discovery
686 .root_paths
687 .iter()
688 .map(|p| path_to_string(p))
689 .collect(),
690 summary_totals: summary,
691 totals_by_language: language_summaries,
692 per_file_records: analyzed,
693 skipped_file_records: skipped,
694 warnings,
695 submodule_summaries,
696 git_commit_short: git.commit_short,
697 git_commit_long: git.commit_long,
698 git_branch: git.branch,
699 git_commit_author: git.author,
700 git_tags: git.tags,
701 git_nearest_tag: git.nearest_tag,
702 git_commit_date: git.commit_date,
703 }
704}
705
706#[allow(clippy::too_many_lines)]
711pub fn analyze(
712 config: &AppConfig,
713 runtime_mode: &str,
714 cancel: Option<&AtomicBool>,
715) -> Result<AnalysisRun> {
716 config.validate()?;
717
718 if config.discovery.root_paths.is_empty() {
719 anyhow::bail!("no input paths were provided");
720 }
721
722 let include_globs = compile_globset(&config.discovery.include_globs)?;
723 let exclude_globs = compile_globset(&config.discovery.exclude_globs)?;
724 let enabled_languages = parse_enabled_languages(&config.analysis.enabled_languages)?;
725
726 let mut analyzed = Vec::new();
727 let mut skipped = Vec::new();
728 let mut warnings = Vec::new();
729 let mut seen_paths = HashSet::new();
730
731 for root in &config.discovery.root_paths {
732 if cancel.is_some_and(|c| c.load(Ordering::Relaxed)) {
733 anyhow::bail!("analysis cancelled");
734 }
735
736 let root = root.canonicalize().unwrap_or_else(|_| root.clone());
737
738 if root.is_file() {
739 if let Some(record) = analyze_candidate_file(
740 &root,
741 root.parent().unwrap_or_else(|| Path::new(".")),
742 config,
743 include_globs.as_ref(),
744 exclude_globs.as_ref(),
745 enabled_languages.as_ref(),
746 )? {
747 push_record(record, &mut analyzed, &mut skipped, &mut warnings);
748 }
749 continue;
750 }
751
752 walk_root(
753 &root,
754 config,
755 include_globs.as_ref(),
756 exclude_globs.as_ref(),
757 enabled_languages.as_ref(),
758 &mut seen_paths,
759 &mut analyzed,
760 &mut skipped,
761 &mut warnings,
762 cancel,
763 )?;
764 }
765
766 analyzed.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
767 skipped.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
768
769 let submodule_summaries = if config.discovery.submodule_breakdown {
771 process_submodules(config, &mut analyzed)
772 } else {
773 Vec::new()
774 };
775
776 attach_coverage(config, &mut analyzed, &mut warnings);
777
778 Ok(assemble_run(
779 config,
780 runtime_mode,
781 analyzed,
782 skipped,
783 warnings,
784 submodule_summaries,
785 ))
786}
787
788fn attach_coverage(config: &AppConfig, analyzed: &mut [FileRecord], warnings: &mut Vec<String>) {
789 let Some(cov_path) = coverage::resolve_coverage_file(config.analysis.coverage_file.as_deref())
790 else {
791 return;
792 };
793 tracing::debug!(path = %cov_path.display(), "loading coverage file");
794 match fs::read_to_string(&cov_path) {
795 Ok(content) => {
796 let cov_map = coverage::parse_coverage_auto(&cov_path, &content);
797 let mut matched: u32 = 0;
798 let mut unmatched: u32 = 0;
799 for record in analyzed.iter_mut() {
800 record.coverage =
801 coverage::lookup_coverage(&cov_map, &record.relative_path).cloned();
802 if record.coverage.is_some() {
803 matched += 1;
804 } else {
805 unmatched += 1;
806 }
807 }
808 tracing::debug!(
809 path = %cov_path.display(),
810 coverage_entries = cov_map.len(),
811 files_matched = matched,
812 files_unmatched = unmatched,
813 "coverage attached"
814 );
815 if unmatched > 0 && matched == 0 {
816 tracing::warn!(
817 path = %cov_path.display(),
818 "coverage file loaded but no source files could be matched — check that paths in the coverage report match the scanned directory"
819 );
820 }
821 }
822 Err(e) => {
823 tracing::warn!(path = %cov_path.display(), error = %e, "coverage file could not be read");
824 warnings.push(format!(
825 "coverage file '{}' could not be read: {e}",
826 cov_path.display()
827 ));
828 }
829 }
830}
831
832fn push_record(
833 record: FileRecord,
834 analyzed: &mut Vec<FileRecord>,
835 skipped: &mut Vec<FileRecord>,
836 warnings: &mut Vec<String>,
837) {
838 warnings.extend(
839 record
840 .warnings
841 .iter()
842 .map(|warning| format!("{}: {warning}", record.relative_path)),
843 );
844
845 match record.status {
846 FileStatus::AnalyzedExact | FileStatus::AnalyzedBestEffort => analyzed.push(record),
847 _ => skipped.push(record),
848 }
849}
850
851#[inline]
853fn skip_with_reason(
854 path: &Path,
855 root: &Path,
856 size: u64,
857 reason: impl Into<String>,
858) -> MetadataPolicyOutcome {
859 MetadataPolicyOutcome::Skip(Box::new(skipped_record(
860 path,
861 root,
862 size,
863 FileStatus::SkippedByPolicy,
864 vec![reason.into()],
865 )))
866}
867
868#[allow(clippy::too_many_arguments)]
872fn check_metadata_policy(
873 path: &Path,
874 root: &Path,
875 relative_path: &str,
876 metadata: &fs::Metadata,
877 config: &AppConfig,
878 include_globs: Option<&GlobSet>,
879 exclude_globs: Option<&GlobSet>,
880) -> MetadataPolicyOutcome {
881 let size = metadata.len();
882
883 if metadata.file_type().is_symlink() && !config.discovery.follow_symlinks {
884 return skip_with_reason(path, root, size, "symlink skipped by policy");
885 }
886 if file_name_eq(path, ".gitignore") {
887 return skip_with_reason(path, root, size, ".gitignore is always excluded");
888 }
889 if is_excluded_dir_path(path, &config.discovery.excluded_directories) {
890 return skip_with_reason(path, root, size, "path matched excluded directory setting");
891 }
892 if size > config.discovery.max_file_size_bytes {
893 return skip_with_reason(
894 path,
895 root,
896 size,
897 format!(
898 "file exceeded max_file_size_bytes ({})",
899 config.discovery.max_file_size_bytes
900 ),
901 );
902 }
903 if let Some(globs) = include_globs {
904 if !globs.is_match(Path::new(relative_path)) && !globs.is_match(path) {
905 return MetadataPolicyOutcome::Exclude;
906 }
907 }
908 if let Some(globs) = exclude_globs {
909 if globs.is_match(Path::new(relative_path)) || globs.is_match(path) {
910 return skip_with_reason(path, root, size, "path matched exclude glob");
911 }
912 }
913 if is_known_lockfile(path) && !config.analysis.include_lockfiles {
914 return skip_with_reason(path, root, size, "lockfile skipped by default policy");
915 }
916
917 MetadataPolicyOutcome::Continue
918}
919
920struct ContentPolicyResult {
921 vendor: bool,
922 generated: bool,
923 minified: bool,
924 skip_record: Option<FileRecord>,
925}
926
927fn check_content_policy(
930 path: &Path,
931 root: &Path,
932 size_bytes: u64,
933 bytes: &[u8],
934 config: &AppConfig,
935) -> ContentPolicyResult {
936 let vendor = is_vendor_path(path);
937 if vendor && config.analysis.vendor_directory_detection {
938 return ContentPolicyResult {
939 vendor,
940 generated: false,
941 minified: false,
942 skip_record: Some(skipped_record(
943 path,
944 root,
945 size_bytes,
946 FileStatus::SkippedByPolicy,
947 vec!["vendor file skipped by policy".into()],
948 )),
949 };
950 }
951
952 let generated = config.analysis.generated_file_detection && looks_generated(path, bytes);
953 if generated {
954 return ContentPolicyResult {
955 vendor,
956 generated,
957 minified: false,
958 skip_record: Some(skipped_record(
959 path,
960 root,
961 size_bytes,
962 FileStatus::SkippedByPolicy,
963 vec!["generated file skipped by policy".into()],
964 )),
965 };
966 }
967
968 let minified = config.analysis.minified_file_detection && looks_minified(path, bytes);
969 if minified {
970 return ContentPolicyResult {
971 vendor,
972 generated,
973 minified,
974 skip_record: Some(skipped_record(
975 path,
976 root,
977 size_bytes,
978 FileStatus::SkippedByPolicy,
979 vec!["minified file skipped by policy".into()],
980 )),
981 };
982 }
983
984 ContentPolicyResult {
985 vendor,
986 generated,
987 minified,
988 skip_record: None,
989 }
990}
991
992fn decode_file_contents(
994 path: &Path,
995 root: &Path,
996 size_bytes: u64,
997 bytes: &[u8],
998 config: &AppConfig,
999) -> Result<Option<(String, String, Vec<String>)>> {
1000 if is_binary(bytes) {
1001 return match config.analysis.binary_file_behavior {
1002 BinaryFileBehavior::Skip => Ok(None),
1003 BinaryFileBehavior::Fail => {
1004 anyhow::bail!("binary file encountered: {}", path.display())
1005 }
1006 };
1007 }
1008
1009 match decode_bytes(bytes) {
1010 Ok(result) => Ok(Some(result)),
1011 Err(err) => match config.analysis.decode_failure_behavior {
1012 FailureBehavior::WarnSkip => {
1013 let _ = (path, root, size_bytes); Err(anyhow::anyhow!("__decode_warn__: {err}"))
1018 }
1019 FailureBehavior::Fail => {
1020 anyhow::bail!("decode failure for {}: {err}", path.display())
1021 }
1022 },
1023 }
1024}
1025
1026#[allow(clippy::too_many_lines)]
1027fn analyze_candidate_file(
1028 path: &Path,
1029 root: &Path,
1030 config: &AppConfig,
1031 include_globs: Option<&GlobSet>,
1032 exclude_globs: Option<&GlobSet>,
1033 enabled_languages: Option<&BTreeSet<Language>>,
1034) -> Result<Option<FileRecord>> {
1035 let metadata = match fs::symlink_metadata(path) {
1036 Ok(metadata) => metadata,
1037 Err(err) => {
1038 return Ok(Some(skipped_record(
1039 path,
1040 root,
1041 0,
1042 FileStatus::ErrorInternal,
1043 vec![format!("failed to read metadata: {err}")],
1044 )));
1045 }
1046 };
1047
1048 let relative_path = relative_path_string(path, root);
1049
1050 match check_metadata_policy(
1052 path,
1053 root,
1054 &relative_path,
1055 &metadata,
1056 config,
1057 include_globs,
1058 exclude_globs,
1059 ) {
1060 MetadataPolicyOutcome::Skip(record) => return Ok(Some(*record)),
1061 MetadataPolicyOutcome::Exclude => return Ok(None),
1062 MetadataPolicyOutcome::Continue => {}
1063 }
1064
1065 let bytes = match fs::read(path) {
1066 Ok(bytes) => bytes,
1067 Err(err) => {
1068 return Ok(Some(skipped_record(
1069 path,
1070 root,
1071 metadata.len(),
1072 FileStatus::ErrorInternal,
1073 vec![format!("failed to read file: {err}")],
1074 )));
1075 }
1076 };
1077
1078 let content_policy = check_content_policy(path, root, metadata.len(), &bytes, config);
1080 if let Some(record) = content_policy.skip_record {
1081 return Ok(Some(record));
1082 }
1083 let (vendor, generated, minified) = (
1084 content_policy.vendor,
1085 content_policy.generated,
1086 content_policy.minified,
1087 );
1088
1089 let (text, encoding, decode_warnings) =
1091 match decode_file_contents(path, root, metadata.len(), &bytes, config) {
1092 Ok(Some(result)) => result,
1093 Ok(None) => {
1094 return Ok(Some(skipped_record(
1095 path,
1096 root,
1097 metadata.len(),
1098 FileStatus::SkippedBinary,
1099 vec!["binary file skipped by default".into()],
1100 )));
1101 }
1102 Err(err) => {
1103 let msg = err.to_string();
1104 if let Some(warn_msg) = msg.strip_prefix("__decode_warn__: ") {
1105 return Ok(Some(skipped_record(
1106 path,
1107 root,
1108 metadata.len(),
1109 FileStatus::SkippedDecodeError,
1110 vec![warn_msg.to_string()],
1111 )));
1112 }
1113 return Err(err);
1114 }
1115 };
1116
1117 let first_line = text.lines().next();
1118 let language = detect_language(
1119 path,
1120 first_line,
1121 &config.analysis.extension_overrides,
1122 config.analysis.shebang_detection,
1123 );
1124
1125 let Some(language) = language else {
1126 return Ok(Some(skipped_record(
1127 path,
1128 root,
1129 metadata.len(),
1130 FileStatus::SkippedUnsupported,
1131 vec!["unsupported or undetected language".into()],
1132 )));
1133 };
1134
1135 if let Some(enabled) = enabled_languages {
1136 if !enabled.contains(&language) {
1137 return Ok(Some(skipped_record(
1138 path,
1139 root,
1140 metadata.len(),
1141 FileStatus::SkippedByPolicy,
1142 vec![format!(
1143 "language {} disabled by configuration",
1144 language.display_name()
1145 )],
1146 )));
1147 }
1148 }
1149
1150 let ieee_opts = AnalysisOptions {
1151 blank_in_block_comment_as_comment: config.analysis.blank_in_block_comment_policy
1152 == BlankInBlockCommentPolicy::CountAsComment,
1153 collapse_continuation_lines: config.analysis.continuation_line_policy
1154 == ContinuationLinePolicy::CollapseToLogical,
1155 };
1156 let analysis = analyze_text(language, &text, ieee_opts);
1157 let effective_counts = compute_effective_counts(
1158 &analysis.raw,
1159 config.analysis.mixed_line_policy,
1160 config.analysis.python_docstrings_as_comments,
1161 config.analysis.count_compiler_directives,
1162 );
1163
1164 let mut warnings = decode_warnings;
1165 warnings.extend(analysis.warnings.clone());
1166
1167 Ok(Some(FileRecord {
1168 path: path_to_string(path),
1169 relative_path,
1170 language: Some(language),
1171 size_bytes: metadata.len(),
1172 detected_encoding: Some(encoding),
1173 raw_line_categories: analysis.raw,
1174 effective_counts,
1175 status: match analysis.parse_mode {
1176 ParseMode::Lexical | ParseMode::TreeSitter => FileStatus::AnalyzedExact,
1177 ParseMode::LexicalBestEffort => FileStatus::AnalyzedBestEffort,
1178 },
1179 warnings,
1180 generated,
1181 minified,
1182 vendor,
1183 parse_mode: Some(analysis.parse_mode),
1184 submodule: None,
1185 coverage: None,
1186 }))
1187}
1188
1189const fn compute_effective_counts(
1190 raw: &RawLineCounts,
1191 mixed_line_policy: MixedLinePolicy,
1192 python_docstrings_as_comments: bool,
1193 count_compiler_directives: bool,
1194) -> EffectiveCounts {
1195 let mut effective = EffectiveCounts {
1196 code_lines: raw.code_only_lines,
1197 comment_lines: raw.single_comment_only_lines + raw.multi_comment_only_lines,
1198 blank_lines: raw.blank_only_lines,
1199 mixed_lines_separate: 0,
1200 };
1201
1202 if python_docstrings_as_comments {
1203 effective.comment_lines += raw.docstring_comment_lines;
1204 } else {
1205 effective.code_lines += raw.docstring_comment_lines;
1206 }
1207
1208 let mixed_total = raw.mixed_code_single_comment_lines + raw.mixed_code_multi_comment_lines;
1209 match mixed_line_policy {
1210 MixedLinePolicy::CodeOnly => effective.code_lines += mixed_total,
1211 MixedLinePolicy::CodeAndComment => {
1212 effective.code_lines += mixed_total;
1213 effective.comment_lines += mixed_total;
1214 }
1215 MixedLinePolicy::CommentOnly => effective.comment_lines += mixed_total,
1216 MixedLinePolicy::SeparateMixedCategory => effective.mixed_lines_separate += mixed_total,
1217 }
1218
1219 if !count_compiler_directives {
1222 effective.code_lines = effective
1223 .code_lines
1224 .saturating_sub(raw.compiler_directive_lines);
1225 }
1226
1227 effective
1228}
1229
1230fn build_summary(analyzed: &[FileRecord], skipped: &[FileRecord]) -> SummaryTotals {
1231 let mut summary = SummaryTotals {
1232 files_considered: (analyzed.len() + skipped.len()) as u64,
1233 files_analyzed: analyzed.len() as u64,
1234 files_skipped: skipped.len() as u64,
1235 ..Default::default()
1236 };
1237
1238 for record in analyzed {
1239 summary.total_physical_lines += record.raw_line_categories.total_physical_lines;
1240 summary.code_lines += record.effective_counts.code_lines;
1241 summary.comment_lines += record.effective_counts.comment_lines;
1242 summary.blank_lines += record.effective_counts.blank_lines;
1243 summary.mixed_lines_separate += record.effective_counts.mixed_lines_separate;
1244 summary.functions += record.raw_line_categories.functions;
1245 summary.classes += record.raw_line_categories.classes;
1246 summary.variables += record.raw_line_categories.variables;
1247 summary.imports += record.raw_line_categories.imports;
1248 summary.test_count += record.raw_line_categories.test_count;
1249 summary.test_assertion_count += record.raw_line_categories.test_assertion_count;
1250 summary.test_suite_count += record.raw_line_categories.test_suite_count;
1251 if let Some(cov) = &record.coverage {
1252 summary.coverage_lines_found += u64::from(cov.lines_found);
1253 summary.coverage_lines_hit += u64::from(cov.lines_hit);
1254 summary.coverage_functions_found += u64::from(cov.functions_found);
1255 summary.coverage_functions_hit += u64::from(cov.functions_hit);
1256 summary.coverage_branches_found += u64::from(cov.branches_found);
1257 summary.coverage_branches_hit += u64::from(cov.branches_hit);
1258 }
1259 }
1260
1261 summary
1262}
1263
1264const fn zeroed_summary(language: Language) -> LanguageSummary {
1266 LanguageSummary {
1267 language,
1268 files: 0,
1269 total_physical_lines: 0,
1270 code_lines: 0,
1271 comment_lines: 0,
1272 blank_lines: 0,
1273 mixed_lines_separate: 0,
1274 functions: 0,
1275 classes: 0,
1276 variables: 0,
1277 imports: 0,
1278 test_count: 0,
1279 test_assertion_count: 0,
1280 test_suite_count: 0,
1281 coverage_lines_found: 0,
1282 coverage_lines_hit: 0,
1283 coverage_functions_found: 0,
1284 coverage_functions_hit: 0,
1285 coverage_branches_found: 0,
1286 coverage_branches_hit: 0,
1287 }
1288}
1289
1290fn accumulate_record_into_summary(entry: &mut LanguageSummary, record: &FileRecord) {
1292 entry.files += 1;
1293 let r = &record.raw_line_categories;
1294 entry.total_physical_lines += r.total_physical_lines;
1295 entry.code_lines += record.effective_counts.code_lines;
1296 entry.comment_lines += record.effective_counts.comment_lines;
1297 entry.blank_lines += record.effective_counts.blank_lines;
1298 entry.mixed_lines_separate += record.effective_counts.mixed_lines_separate;
1299 entry.functions += r.functions;
1300 entry.classes += r.classes;
1301 entry.variables += r.variables;
1302 entry.imports += r.imports;
1303 entry.test_count += r.test_count;
1304 entry.test_assertion_count += r.test_assertion_count;
1305 entry.test_suite_count += r.test_suite_count;
1306 if let Some(cov) = &record.coverage {
1307 entry.coverage_lines_found += u64::from(cov.lines_found);
1308 entry.coverage_lines_hit += u64::from(cov.lines_hit);
1309 entry.coverage_functions_found += u64::from(cov.functions_found);
1310 entry.coverage_functions_hit += u64::from(cov.functions_hit);
1311 entry.coverage_branches_found += u64::from(cov.branches_found);
1312 entry.coverage_branches_hit += u64::from(cov.branches_hit);
1313 }
1314}
1315
1316fn build_language_summaries(analyzed: &[FileRecord]) -> Vec<LanguageSummary> {
1317 let mut by_language: BTreeMap<Language, LanguageSummary> = BTreeMap::new();
1318 for record in analyzed {
1319 let Some(language) = record.language else {
1320 continue;
1321 };
1322 let entry = by_language
1323 .entry(language)
1324 .or_insert_with(|| zeroed_summary(language));
1325 accumulate_record_into_summary(entry, record);
1326 }
1327 by_language.into_values().collect()
1328}
1329
1330fn skipped_record(
1331 path: &Path,
1332 root: &Path,
1333 size_bytes: u64,
1334 status: FileStatus,
1335 warnings: Vec<String>,
1336) -> FileRecord {
1337 FileRecord {
1338 path: path_to_string(path),
1339 relative_path: relative_path_string(path, root),
1340 language: None,
1341 size_bytes,
1342 detected_encoding: None,
1343 raw_line_categories: RawLineCounts::default(),
1344 effective_counts: EffectiveCounts::default(),
1345 status,
1346 warnings,
1347 generated: false,
1348 minified: false,
1349 vendor: false,
1350 parse_mode: None,
1351 submodule: None,
1352 coverage: None,
1353 }
1354}
1355
1356fn relative_path_string(path: &Path, root: &Path) -> String {
1357 path.strip_prefix(root)
1358 .unwrap_or(path)
1359 .to_string_lossy()
1360 .replace('\\', "/")
1361}
1362
1363fn path_to_string(path: &Path) -> String {
1364 path.to_string_lossy().replace('\\', "/")
1365}
1366
1367#[must_use]
1369pub fn detect_submodules(root: &Path) -> Vec<(String, PathBuf)> {
1370 let gitmodules = root.join(".gitmodules");
1371 if !gitmodules.is_file() {
1372 return Vec::new();
1373 }
1374 let Ok(content) = fs::read_to_string(&gitmodules) else {
1375 return Vec::new();
1376 };
1377
1378 let mut result = Vec::new();
1379 let mut current_name: Option<String> = None;
1380 let mut current_path: Option<PathBuf> = None;
1381
1382 for line in content.lines() {
1383 let trimmed = line.trim();
1384 if trimmed.starts_with("[submodule \"") && trimmed.ends_with("\"]") {
1385 if let (Some(name), Some(path)) = (current_name.take(), current_path.take()) {
1386 result.push((name, path));
1387 }
1388 let name = trimmed["[submodule \"".len()..trimmed.len() - 2].to_string();
1389 current_name = Some(name);
1390 } else if let Some(rest) = trimmed.strip_prefix("path") {
1391 if let Some(eq_pos) = rest.find('=') {
1392 let path_str = rest[eq_pos + 1..].trim();
1393 current_path = Some(PathBuf::from(path_str));
1394 }
1395 }
1396 }
1397 if let (Some(name), Some(path)) = (current_name, current_path) {
1398 result.push((name, path));
1399 }
1400
1401 result
1402}
1403
1404fn build_submodule_summaries(
1405 analyzed: &[FileRecord],
1406 submodules: &[(String, PathBuf)],
1407) -> Vec<SubmoduleSummary> {
1408 submodules
1409 .iter()
1410 .map(|(name, path)| {
1411 let files: Vec<&FileRecord> = analyzed
1412 .iter()
1413 .filter(|f| f.submodule.as_deref() == Some(name.as_str()))
1414 .collect();
1415
1416 let files_analyzed = files.len() as u64;
1417 let total_physical_lines = files
1418 .iter()
1419 .map(|f| f.raw_line_categories.total_physical_lines)
1420 .sum();
1421 let code_lines = files.iter().map(|f| f.effective_counts.code_lines).sum();
1422 let comment_lines = files.iter().map(|f| f.effective_counts.comment_lines).sum();
1423 let blank_lines = files.iter().map(|f| f.effective_counts.blank_lines).sum();
1424 let language_summaries = build_language_summaries_from_slice(&files);
1425
1426 SubmoduleSummary {
1427 name: name.clone(),
1428 relative_path: path.to_string_lossy().replace('\\', "/"),
1429 files_analyzed,
1430 total_physical_lines,
1431 code_lines,
1432 comment_lines,
1433 blank_lines,
1434 language_summaries,
1435 }
1436 })
1437 .filter(|s| s.files_analyzed > 0)
1438 .collect()
1439}
1440
1441fn build_language_summaries_from_slice(files: &[&FileRecord]) -> Vec<LanguageSummary> {
1442 let mut map: BTreeMap<String, LanguageSummary> = BTreeMap::new();
1443 for file in files {
1444 let Some(lang) = file.language else { continue };
1445 let entry = map
1446 .entry(lang.display_name().to_string())
1447 .or_insert_with(|| zeroed_summary(lang));
1448 accumulate_record_into_summary(entry, file);
1449 }
1450 map.into_values().collect()
1451}
1452
1453fn file_name_eq(path: &Path, expected: &str) -> bool {
1454 path.file_name()
1455 .and_then(|name| name.to_str())
1456 .is_some_and(|name| name == expected)
1457}
1458
1459fn is_excluded_dir_path(path: &Path, excluded_dirs: &[String]) -> bool {
1460 path.components().any(|component| {
1461 component
1462 .as_os_str()
1463 .to_str()
1464 .is_some_and(|part| excluded_dirs.iter().any(|excluded| excluded == part))
1465 })
1466}
1467
1468fn is_vendor_path(path: &Path) -> bool {
1469 path.components().any(|component| {
1470 component
1471 .as_os_str()
1472 .to_str()
1473 .is_some_and(|part| matches!(part, "vendor" | "node_modules" | "packages"))
1474 })
1475}
1476
1477fn is_known_lockfile(path: &Path) -> bool {
1478 path.file_name()
1479 .and_then(|name| name.to_str())
1480 .is_some_and(|name| {
1481 matches!(
1482 name,
1483 "Cargo.lock"
1484 | "package-lock.json"
1485 | "yarn.lock"
1486 | "pnpm-lock.yaml"
1487 | "Pipfile.lock"
1488 | "poetry.lock"
1489 | "composer.lock"
1490 )
1491 })
1492}
1493
1494fn looks_generated(path: &Path, bytes: &[u8]) -> bool {
1495 let file_name = path
1496 .file_name()
1497 .and_then(|name| name.to_str())
1498 .unwrap_or_default();
1499 if file_name.contains(".generated.") || file_name.contains(".g.") {
1500 return true;
1501 }
1502
1503 let sample = String::from_utf8_lossy(&bytes[..bytes.len().min(GENERATED_SAMPLE_BYTES)])
1504 .to_ascii_lowercase();
1505 sample.contains("@generated") || sample.contains("generated by")
1506}
1507
1508fn looks_minified(path: &Path, bytes: &[u8]) -> bool {
1509 let file_name = path
1510 .file_name()
1511 .and_then(|name| name.to_str())
1512 .unwrap_or_default();
1513 if file_name.contains(".min.") {
1514 return true;
1515 }
1516
1517 let sample = String::from_utf8_lossy(&bytes[..bytes.len().min(MINIFIED_SAMPLE_BYTES)]);
1518 let longest_line = sample.lines().map(str::len).max().unwrap_or(0);
1519 let whitespace = sample.chars().filter(|c| c.is_whitespace()).count();
1520 longest_line > MINIFIED_LINE_THRESHOLD && whitespace * 100 < sample.len().max(1)
1521}
1522
1523fn is_binary(bytes: &[u8]) -> bool {
1524 if bytes.starts_with(&[0xEF, 0xBB, 0xBF])
1525 || bytes.starts_with(&[0xFF, 0xFE])
1526 || bytes.starts_with(&[0xFE, 0xFF])
1527 {
1528 return false;
1529 }
1530
1531 let sample = &bytes[..bytes.len().min(BINARY_SAMPLE_BYTES)];
1532 sample.contains(&0)
1533}
1534
1535fn decode_utf16_bom(
1538 bom_stripped: &[u8],
1539 encoding: &'static encoding_rs::Encoding,
1540 label: &str,
1541) -> (String, String, Vec<String>) {
1542 let (cow, _, had_errors) = encoding.decode(bom_stripped);
1543 let mut warnings = Vec::new();
1544 if had_errors {
1545 warnings.push(format!("{label} decode contained replacement characters"));
1546 }
1547 (cow.into_owned(), label.into(), warnings)
1548}
1549
1550fn decode_bytes(bytes: &[u8]) -> std::result::Result<(String, String, Vec<String>), String> {
1551 if bytes.starts_with(&[0xEF, 0xBB, 0xBF]) {
1552 let text = String::from_utf8(bytes[3..].to_vec()).map_err(|err| err.to_string())?;
1553 return Ok((text, "utf-8-bom".into(), vec![]));
1554 }
1555 if bytes.starts_with(&[0xFF, 0xFE]) {
1556 return Ok(decode_utf16_bom(&bytes[2..], UTF_16LE, "utf-16le"));
1557 }
1558 if bytes.starts_with(&[0xFE, 0xFF]) {
1559 return Ok(decode_utf16_bom(&bytes[2..], UTF_16BE, "utf-16be"));
1560 }
1561
1562 #[allow(clippy::option_if_let_else)]
1564 if let Ok(text) = String::from_utf8(bytes.to_vec()) {
1565 Ok((text, "utf-8".into(), vec![]))
1566 } else {
1567 let (cow, _, had_errors) = WINDOWS_1252.decode(bytes);
1568 let mut warnings = vec!["decoded using windows-1252 fallback".into()];
1569 if had_errors {
1570 warnings.push("fallback decode contained replacement characters".into());
1571 }
1572 Ok((cow.into_owned(), "windows-1252".into(), warnings))
1573 }
1574}
1575
1576fn compile_globset(patterns: &[String]) -> Result<Option<GlobSet>> {
1577 if patterns.is_empty() {
1578 return Ok(None);
1579 }
1580
1581 let mut builder = GlobSetBuilder::new();
1582 for pattern in patterns {
1583 builder
1584 .add(Glob::new(pattern).with_context(|| format!("invalid glob pattern: {pattern}"))?);
1585 }
1586 Ok(Some(
1587 builder.build().context("failed to compile glob filters")?,
1588 ))
1589}
1590
1591fn parse_enabled_languages(enabled: &[String]) -> Result<Option<BTreeSet<Language>>> {
1592 if enabled.is_empty() {
1593 return Ok(None);
1594 }
1595
1596 let supported = supported_languages();
1597 let mut set = BTreeSet::new();
1598 for name in enabled {
1599 let language = Language::from_name(name)
1600 .with_context(|| format!("unsupported language in config: {name}"))?;
1601 if !supported.contains(&language) {
1602 anyhow::bail!("language {name} is not supported in this build");
1603 }
1604 set.insert(language);
1605 }
1606 Ok(Some(set))
1607}
1608
1609pub fn write_json(run: &AnalysisRun, output_path: &Path) -> Result<()> {
1613 let json = serde_json::to_string_pretty(run).context("failed to serialize analysis run")?;
1614 fs::write(output_path, json)
1615 .with_context(|| format!("failed to write JSON output to {}", output_path.display()))
1616}
1617
1618pub fn read_json(path: &Path) -> Result<AnalysisRun> {
1622 let contents = fs::read_to_string(path)
1623 .with_context(|| format!("failed to read result file {}", path.display()))?;
1624 serde_json::from_str(&contents)
1625 .with_context(|| format!("failed to parse JSON result {}", path.display()))
1626}
1627
1628#[cfg(test)]
1629mod tests {
1630 use super::*;
1631
1632 #[test]
1633 fn effective_counts_respect_code_only_policy() {
1634 let raw = RawLineCounts {
1635 code_only_lines: 2,
1636 single_comment_only_lines: 1,
1637 mixed_code_single_comment_lines: 3,
1638 docstring_comment_lines: 2,
1639 ..RawLineCounts::default()
1640 };
1641 let counts = compute_effective_counts(&raw, MixedLinePolicy::CodeOnly, true, true);
1642 assert_eq!(counts.code_lines, 5);
1643 assert_eq!(counts.comment_lines, 3);
1644 }
1645
1646 #[test]
1647 fn effective_counts_can_separate_mixed() {
1648 let raw = RawLineCounts {
1649 mixed_code_single_comment_lines: 2,
1650 mixed_code_multi_comment_lines: 1,
1651 ..RawLineCounts::default()
1652 };
1653 let counts =
1654 compute_effective_counts(&raw, MixedLinePolicy::SeparateMixedCategory, true, true);
1655 assert_eq!(counts.mixed_lines_separate, 3);
1656 assert_eq!(counts.code_lines, 0);
1657 assert_eq!(counts.comment_lines, 0);
1658 }
1659
1660 #[test]
1661 fn windows_1252_fallback_decodes() {
1662 let bytes = vec![0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x96, 0x57];
1663 let (text, encoding, warnings) = decode_bytes(&bytes).unwrap();
1664 assert_eq!(encoding, "windows-1252");
1665 assert!(text.contains('–'));
1666 assert!(!warnings.is_empty());
1667 }
1668}