1#![allow(clippy::multiple_crate_versions)]
4
5pub mod baseline;
6pub mod coverage;
7pub mod delta;
8pub mod history;
9pub use baseline::{check_against_baseline, resolve_baselines_path, BaselineEntry, BaselineStore};
10pub use coverage::{aggregate_line_coverage, lookup_coverage, parse_lcov, FileCoverage};
11pub use delta::{compute_delta, FileChangeStatus, FileDelta, ScanComparison, SummaryDelta};
12pub use history::{RegistryEntry, ScanRegistry, ScanSummarySnapshot, WatchedDirsStore};
13
14use std::collections::{BTreeMap, BTreeSet, HashSet};
15use std::fs;
16use std::path::{Path, PathBuf};
17use std::sync::atomic::{AtomicBool, Ordering};
18
19use anyhow::{Context, Result};
20use chrono::{DateTime, Utc};
21use encoding_rs::{UTF_16BE, UTF_16LE, WINDOWS_1252};
22use globset::{Glob, GlobSet, GlobSetBuilder};
23use ignore::WalkBuilder;
24use serde::{Deserialize, Serialize};
25use uuid::Uuid;
26
27use sloc_config::{
28 AppConfig, BinaryFileBehavior, BlankInBlockCommentPolicy, ContinuationLinePolicy,
29 FailureBehavior, MixedLinePolicy,
30};
31use sloc_languages::{
32 analyze_text, detect_language, supported_languages, AnalysisOptions, Language, ParseMode,
33 RawLineCounts,
34};
35
36const MAX_ANALYSIS_THREADS: usize = 16;
40const DEFAULT_ANALYSIS_THREADS: usize = 4;
42const GENERATED_SAMPLE_BYTES: usize = 1024;
44const MINIFIED_SAMPLE_BYTES: usize = 4096;
46const MINIFIED_LINE_THRESHOLD: usize = 2000;
48const BINARY_SAMPLE_BYTES: usize = 8192;
50
51enum MetadataPolicyOutcome {
53 Skip(Box<FileRecord>),
55 Exclude,
57 Continue,
59}
60
61#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
62#[serde(rename_all = "snake_case")]
63pub enum FileStatus {
64 AnalyzedExact,
65 AnalyzedBestEffort,
66 SkippedBinary,
67 SkippedDecodeError,
68 SkippedUnsupported,
69 SkippedByPolicy,
70 ErrorInternal,
71}
72
73#[derive(Debug, Clone, Serialize, Deserialize, Default)]
74pub struct EffectiveCounts {
75 pub code_lines: u64,
76 pub comment_lines: u64,
77 pub blank_lines: u64,
78 pub mixed_lines_separate: u64,
79}
80
81#[derive(Debug, Clone, Serialize, Deserialize)]
82pub struct ToolMetadata {
83 pub name: String,
84 pub version: String,
85 pub run_id: String,
86 pub timestamp_utc: DateTime<Utc>,
87}
88
89#[derive(Debug, Clone, Serialize, Deserialize)]
90pub struct EnvironmentMetadata {
91 pub operating_system: String,
92 pub architecture: String,
93 pub runtime_mode: String,
94 pub initiator_username: String,
95 pub initiator_hostname: String,
96}
97
98#[derive(Debug, Clone, Serialize, Deserialize, Default)]
99pub struct SummaryTotals {
100 pub files_considered: u64,
101 pub files_analyzed: u64,
102 pub files_skipped: u64,
103 pub total_physical_lines: u64,
104 pub code_lines: u64,
105 pub comment_lines: u64,
106 pub blank_lines: u64,
107 pub mixed_lines_separate: u64,
108 #[serde(default)]
109 pub functions: u64,
110 #[serde(default)]
111 pub classes: u64,
112 #[serde(default)]
113 pub variables: u64,
114 #[serde(default)]
115 pub imports: u64,
116 #[serde(default)]
117 pub test_count: u64,
118 #[serde(default)]
120 pub test_assertion_count: u64,
121 #[serde(default)]
123 pub test_suite_count: u64,
124 #[serde(default)]
126 pub coverage_lines_found: u64,
127 #[serde(default)]
128 pub coverage_lines_hit: u64,
129 #[serde(default)]
130 pub coverage_functions_found: u64,
131 #[serde(default)]
132 pub coverage_functions_hit: u64,
133 #[serde(default)]
134 pub coverage_branches_found: u64,
135 #[serde(default)]
136 pub coverage_branches_hit: u64,
137}
138
139#[derive(Debug, Clone, Serialize, Deserialize)]
140pub struct LanguageSummary {
141 pub language: Language,
142 pub files: u64,
143 pub total_physical_lines: u64,
144 pub code_lines: u64,
145 pub comment_lines: u64,
146 pub blank_lines: u64,
147 pub mixed_lines_separate: u64,
148 #[serde(default)]
149 pub functions: u64,
150 #[serde(default)]
151 pub classes: u64,
152 #[serde(default)]
153 pub variables: u64,
154 #[serde(default)]
155 pub imports: u64,
156 #[serde(default)]
157 pub test_count: u64,
158 #[serde(default)]
159 pub test_assertion_count: u64,
160 #[serde(default)]
161 pub test_suite_count: u64,
162 #[serde(default)]
163 pub coverage_lines_found: u64,
164 #[serde(default)]
165 pub coverage_lines_hit: u64,
166 #[serde(default)]
167 pub coverage_functions_found: u64,
168 #[serde(default)]
169 pub coverage_functions_hit: u64,
170 #[serde(default)]
171 pub coverage_branches_found: u64,
172 #[serde(default)]
173 pub coverage_branches_hit: u64,
174}
175
176#[derive(Debug, Clone, Serialize, Deserialize)]
177pub struct FileRecord {
178 pub path: String,
179 pub relative_path: String,
180 pub language: Option<Language>,
181 pub size_bytes: u64,
182 pub detected_encoding: Option<String>,
183 pub raw_line_categories: RawLineCounts,
184 pub effective_counts: EffectiveCounts,
185 pub status: FileStatus,
186 pub warnings: Vec<String>,
187 pub generated: bool,
188 pub minified: bool,
189 pub vendor: bool,
190 pub parse_mode: Option<ParseMode>,
191 #[serde(skip_serializing_if = "Option::is_none")]
192 pub submodule: Option<String>,
193 #[serde(default, skip_serializing_if = "Option::is_none")]
195 pub coverage: Option<FileCoverage>,
196}
197
198#[derive(Debug, Clone, Serialize, Deserialize)]
200pub struct SubmoduleSummary {
201 pub name: String,
202 pub relative_path: String,
203 pub files_analyzed: u64,
204 pub total_physical_lines: u64,
205 pub code_lines: u64,
206 pub comment_lines: u64,
207 pub blank_lines: u64,
208 pub language_summaries: Vec<LanguageSummary>,
209}
210
211#[derive(Debug, Clone, Serialize, Deserialize)]
212pub struct AnalysisRun {
213 pub tool: ToolMetadata,
214 pub environment: EnvironmentMetadata,
215 pub effective_configuration: AppConfig,
216 pub input_roots: Vec<String>,
217 pub summary_totals: SummaryTotals,
218 pub totals_by_language: Vec<LanguageSummary>,
219 pub per_file_records: Vec<FileRecord>,
220 pub skipped_file_records: Vec<FileRecord>,
221 pub warnings: Vec<String>,
222 #[serde(default, skip_serializing_if = "Vec::is_empty")]
224 pub submodule_summaries: Vec<SubmoduleSummary>,
225 #[serde(default, skip_serializing_if = "Option::is_none")]
227 pub git_commit_short: Option<String>,
228 #[serde(default, skip_serializing_if = "Option::is_none")]
230 pub git_commit_long: Option<String>,
231 #[serde(default, skip_serializing_if = "Option::is_none")]
233 pub git_branch: Option<String>,
234 #[serde(default, skip_serializing_if = "Option::is_none")]
236 pub git_commit_author: Option<String>,
237 #[serde(default, skip_serializing_if = "Option::is_none")]
239 pub git_tags: Option<String>,
240 #[serde(default, skip_serializing_if = "Option::is_none")]
242 pub git_nearest_tag: Option<String>,
243 #[serde(default, skip_serializing_if = "Option::is_none")]
245 pub git_commit_date: Option<String>,
246 #[serde(default, skip_serializing_if = "Option::is_none")]
248 pub git_remote_url: Option<String>,
249}
250
251#[derive(Default)]
252struct GitInfo {
253 commit_short: Option<String>,
254 commit_long: Option<String>,
255 branch: Option<String>,
256 author: Option<String>,
257 tags: Option<String>,
258 nearest_tag: Option<String>,
259 commit_date: Option<String>,
260 remote_url: Option<String>,
261}
262
263fn find_git_dir(start: &Path) -> Option<PathBuf> {
267 let mut current = Some(start);
268 while let Some(dir) = current {
269 let candidate = dir.join(".git");
270 if candidate.is_dir() {
271 return Some(candidate);
272 }
273 if candidate.is_file() {
274 if let Some(resolved) = resolve_git_file_pointer(&candidate, dir) {
275 return Some(resolved);
276 }
277 }
278 current = dir.parent();
279 }
280 None
281}
282
283fn resolve_git_file_pointer(file: &Path, base_dir: &Path) -> Option<PathBuf> {
287 let content = fs::read_to_string(file).ok()?;
288 let ptr = content.trim().strip_prefix("gitdir: ")?;
289 let ptr_native = ptr.replace('/', std::path::MAIN_SEPARATOR_STR);
292 let resolved = if Path::new(&ptr_native).is_absolute() {
293 PathBuf::from(&ptr_native)
294 } else {
295 base_dir.join(&ptr_native)
296 };
297 let final_path = resolved.canonicalize().unwrap_or(resolved);
301 if final_path.is_dir() {
302 Some(final_path)
303 } else {
304 None
305 }
306}
307
308fn resolve_ref(git_dir: &Path, refname: &str) -> Option<String> {
311 let ref_path = refname
315 .split('/')
316 .fold(git_dir.to_path_buf(), |p, c| p.join(c));
317 if ref_path.exists() {
318 let sha = fs::read_to_string(&ref_path)
319 .ok()
320 .map(|s| s.trim().to_string())
321 .filter(|s| s.len() >= 40 && s.chars().all(|c| c.is_ascii_hexdigit()));
322 if sha.is_some() {
323 return sha;
324 }
325 }
326 let packed = fs::read_to_string(git_dir.join("packed-refs")).ok()?;
330 for line in packed.lines() {
331 if line.starts_with('#') || line.starts_with('^') {
332 continue;
333 }
334 let mut cols = line.splitn(2, ' ');
335 let sha = cols.next()?;
336 let name = cols.next()?.trim();
337 if name == refname {
338 return Some(sha.to_string());
339 }
340 }
341 None
342}
343
344fn parse_last_reflog_entry(git_dir: &Path) -> (Option<String>, Option<String>) {
350 let log_path = git_dir.join("logs").join("HEAD");
351 let Ok(content) = fs::read_to_string(&log_path) else {
352 return (None, None);
353 };
354 let Some(last) = content.lines().rfind(|l| !l.trim().is_empty()) else {
355 return (None, None);
356 };
357
358 let Some(after_shas) = last.splitn(3, ' ').nth(2) else {
361 return (None, None);
362 };
363
364 let author = after_shas.find(" <").map(|i| after_shas[..i].to_string());
366
367 let date = (|| {
369 use chrono::TimeZone as _;
370 let close = after_shas.find("> ")?;
371 let rest = after_shas[close + 2..].trim_start();
372 let mut tokens = rest.splitn(3, ' ');
373 let unix_str = tokens.next()?;
374 let offset_str = tokens.next().map(|s| s.split('\t').next().unwrap_or(s))?;
375 let ts: i64 = unix_str.parse().ok()?;
376 let dt = chrono::Utc.timestamp_opt(ts, 0).single()?;
377 let tz_display = if offset_str.len() == 5 {
379 format!("{}:{}", &offset_str[..3], &offset_str[3..])
380 } else {
381 offset_str.to_string()
382 };
383 Some(format!("{}{}", dt.format("%Y-%m-%dT%H:%M:%S"), tz_display))
384 })();
385
386 (author, date)
387}
388
389fn read_git_remote_url(git_dir: &Path) -> Option<String> {
391 let config = fs::read_to_string(git_dir.join("config")).ok()?;
392 let mut in_origin = false;
393 for line in config.lines() {
394 let trimmed = line.trim();
395 if trimmed.starts_with('[') {
396 in_origin = trimmed == r#"[remote "origin"]"#;
397 } else if in_origin {
398 if let Some(rest) = trimmed.strip_prefix("url") {
399 let rest = rest.trim_start_matches([' ', '\t']);
400 if let Some(url) = rest.strip_prefix('=') {
401 let url = url.trim();
402 if !url.is_empty() {
403 return Some(url.to_owned());
404 }
405 }
406 }
407 }
408 }
409 None
410}
411
412fn detect_git_for_run(project_path: &Path) -> GitInfo {
416 let Some(git_dir) = find_git_dir(project_path) else {
417 return GitInfo::default();
418 };
419
420 let head_raw = match fs::read_to_string(git_dir.join("HEAD")) {
421 Ok(s) => s.trim().to_string(),
422 Err(_) => return GitInfo::default(),
423 };
424
425 let (branch, commit_long) = head_raw.strip_prefix("ref: ").map_or_else(
426 || {
427 if head_raw.len() >= 40 && head_raw.chars().all(|c| c.is_ascii_hexdigit()) {
428 (None, Some(head_raw[..40].to_string()))
430 } else {
431 (None, None)
432 }
433 },
434 |refname| {
435 let branch = refname
436 .strip_prefix("refs/heads/")
437 .map(|b| b.trim().to_string());
438 let sha = resolve_ref(&git_dir, refname.trim());
439 (branch, sha)
440 },
441 );
442
443 let commit_short = commit_long
444 .as_deref()
445 .map(|s| s.chars().take(7).collect::<String>());
446
447 let (author, commit_date) = parse_last_reflog_entry(&git_dir);
448 let remote_url = read_git_remote_url(&git_dir);
449
450 let tags = run_git_cmd(project_path, &["tag", "--points-at", "HEAD"]).map(|t| {
453 t.lines()
454 .filter(|l| !l.is_empty())
455 .collect::<Vec<_>>()
456 .join(", ")
457 });
458 let nearest_tag = run_git_cmd(project_path, &["describe", "--tags", "--abbrev=0", "HEAD"]);
459
460 GitInfo {
461 commit_short,
462 commit_long,
463 branch,
464 author,
465 tags,
466 nearest_tag,
467 commit_date,
468 remote_url,
469 }
470}
471
472fn run_git_cmd(dir: &Path, args: &[&str]) -> Option<String> {
475 let candidates: &[&str] = &[
479 "git",
481 "/usr/bin/git",
483 "/usr/local/bin/git",
484 "/opt/homebrew/bin/git",
485 r"C:\Program Files\Git\cmd\git.exe",
487 r"C:\Program Files\Git\bin\git.exe",
488 r"C:\Program Files (x86)\Git\cmd\git.exe",
489 ];
490 for &exe in candidates {
491 let result = std::process::Command::new(exe)
492 .args(["-c", "safe.directory=*"])
493 .args(args)
494 .current_dir(dir)
495 .output()
496 .ok()
497 .filter(|o| o.status.success())
498 .and_then(|o| String::from_utf8(o.stdout).ok())
499 .map(|s| s.trim().to_string())
500 .filter(|s| !s.is_empty());
501 if result.is_some() {
502 return result;
503 }
504 }
505 None
506}
507
508fn get_current_username() -> String {
509 std::env::var("USERNAME")
510 .or_else(|_| std::env::var("USER"))
511 .unwrap_or_else(|_| "unknown".to_string())
512}
513
514fn get_hostname() -> String {
515 std::env::var("COMPUTERNAME")
516 .or_else(|_| std::env::var("HOSTNAME"))
517 .or_else(|_| std::fs::read_to_string("/etc/hostname").map(|s| s.trim().to_string()))
518 .unwrap_or_else(|_| "unknown".to_string())
519}
520
521#[allow(clippy::too_many_arguments)]
523fn walk_root(
524 root: &Path,
525 config: &AppConfig,
526 include_globs: Option<&GlobSet>,
527 exclude_globs: Option<&GlobSet>,
528 enabled_languages: Option<&BTreeSet<Language>>,
529 seen_paths: &mut HashSet<PathBuf>,
530 analyzed: &mut Vec<FileRecord>,
531 skipped: &mut Vec<FileRecord>,
532 warnings: &mut Vec<String>,
533 cancel: Option<&AtomicBool>,
534) -> Result<()> {
535 let mut builder = WalkBuilder::new(root);
536 builder
537 .follow_links(config.discovery.follow_symlinks)
538 .hidden(config.discovery.ignore_hidden_files)
539 .ignore(config.discovery.honor_ignore_files)
540 .parents(config.discovery.honor_ignore_files)
541 .git_ignore(config.discovery.honor_ignore_files)
542 .git_global(config.discovery.honor_ignore_files)
543 .git_exclude(config.discovery.honor_ignore_files);
544
545 let paths = collect_walk_paths(&builder, seen_paths, warnings);
546 if paths.is_empty() {
547 return Ok(());
548 }
549
550 let chunk_results = run_parallel_analysis(
551 &paths,
552 root,
553 config,
554 include_globs,
555 exclude_globs,
556 enabled_languages,
557 cancel,
558 )?;
559 merge_chunk_results(chunk_results, analyzed, skipped, warnings)
560}
561
562fn collect_walk_paths(
563 builder: &WalkBuilder,
564 seen_paths: &mut HashSet<PathBuf>,
565 warnings: &mut Vec<String>,
566) -> Vec<PathBuf> {
567 let mut paths = Vec::new();
568 for entry in builder.build() {
569 let entry = match entry {
570 Ok(e) => e,
571 Err(err) => {
572 warnings.push(format!("discovery warning: {err}"));
573 continue;
574 }
575 };
576 let path = entry.into_path();
577 if path.is_dir() || !seen_paths.insert(path.clone()) {
578 continue;
579 }
580 paths.push(path);
581 }
582 paths
583}
584
585#[allow(clippy::too_many_arguments)]
586fn run_parallel_analysis(
587 paths: &[PathBuf],
588 root: &Path,
589 config: &AppConfig,
590 include_globs: Option<&GlobSet>,
591 exclude_globs: Option<&GlobSet>,
592 enabled_languages: Option<&BTreeSet<Language>>,
593 cancel: Option<&AtomicBool>,
594) -> Result<Vec<Vec<Result<Option<FileRecord>>>>> {
595 let thread_count = std::thread::available_parallelism().map_or(DEFAULT_ANALYSIS_THREADS, |n| {
596 n.get().min(MAX_ANALYSIS_THREADS)
597 });
598 let chunk_size = paths.len().div_ceil(thread_count);
599 std::thread::scope(|s| -> Result<Vec<Vec<Result<Option<FileRecord>>>>> {
600 paths
601 .chunks(chunk_size)
602 .map(|chunk| {
603 s.spawn(move || -> Vec<Result<Option<FileRecord>>> {
604 let mut results = Vec::with_capacity(chunk.len());
605 for path in chunk {
606 if cancel.is_some_and(|c| c.load(Ordering::Relaxed)) {
607 results.push(Err(anyhow::anyhow!("analysis cancelled")));
608 break;
609 }
610 results.push(analyze_candidate_file(
611 path,
612 root,
613 config,
614 include_globs,
615 exclude_globs,
616 enabled_languages,
617 ));
618 }
619 results
620 })
621 })
622 .map(|h| {
623 h.join()
624 .map_err(|_| anyhow::anyhow!("analysis thread panicked"))
625 })
626 .collect()
627 })
628}
629
630fn merge_chunk_results(
631 chunk_results: Vec<Vec<Result<Option<FileRecord>>>>,
632 analyzed: &mut Vec<FileRecord>,
633 skipped: &mut Vec<FileRecord>,
634 warnings: &mut Vec<String>,
635) -> Result<()> {
636 for chunk in chunk_results {
637 for result in chunk {
638 if let Some(record) = result? {
639 push_record(record, analyzed, skipped, warnings);
640 }
641 }
642 }
643 Ok(())
644}
645
646fn process_submodules(config: &AppConfig, analyzed: &mut [FileRecord]) -> Vec<SubmoduleSummary> {
648 let root = config.discovery.root_paths[0]
649 .canonicalize()
650 .unwrap_or_else(|_| config.discovery.root_paths[0].clone());
651 let submodules = detect_submodules(&root);
652 if submodules.is_empty() {
653 return Vec::new();
654 }
655
656 for file in analyzed.iter_mut() {
657 for (name, sub_path) in &submodules {
658 let prefix = sub_path.to_string_lossy().replace('\\', "/");
659 let rel = &file.relative_path;
660 if rel == &prefix || rel.starts_with(&format!("{prefix}/")) {
661 file.submodule = Some(name.clone());
662 break;
663 }
664 }
665 }
666
667 build_submodule_summaries(analyzed, &submodules)
668}
669
670fn assemble_run(
672 config: &AppConfig,
673 runtime_mode: &str,
674 analyzed: Vec<FileRecord>,
675 skipped: Vec<FileRecord>,
676 warnings: Vec<String>,
677 submodule_summaries: Vec<SubmoduleSummary>,
678) -> AnalysisRun {
679 let summary = build_summary(&analyzed, &skipped);
680 let language_summaries = build_language_summaries(&analyzed);
681
682 let first_root = config
683 .discovery
684 .root_paths
685 .first()
686 .map(|p| p.canonicalize().unwrap_or_else(|_| p.clone()));
687 let git = first_root
688 .as_deref()
689 .map(detect_git_for_run)
690 .unwrap_or_default();
691
692 let now = Utc::now();
693 let run_id = {
694 let uuid_suffix = Uuid::new_v4().simple().to_string();
695 format!("{}-{}", now.format("%Y%m%d-%H%M"), uuid_suffix)
696 };
697
698 AnalysisRun {
699 tool: ToolMetadata {
700 name: "sloc".into(),
701 version: env!("CARGO_PKG_VERSION").into(),
702 run_id,
703 timestamp_utc: now,
704 },
705 environment: EnvironmentMetadata {
706 operating_system: std::env::consts::OS.into(),
707 architecture: std::env::consts::ARCH.into(),
708 runtime_mode: runtime_mode.into(),
709 initiator_username: get_current_username(),
710 initiator_hostname: get_hostname(),
711 },
712 effective_configuration: config.clone(),
713 input_roots: config
714 .discovery
715 .root_paths
716 .iter()
717 .map(|p| path_to_string(p))
718 .collect(),
719 summary_totals: summary,
720 totals_by_language: language_summaries,
721 per_file_records: analyzed,
722 skipped_file_records: skipped,
723 warnings,
724 submodule_summaries,
725 git_commit_short: git.commit_short,
726 git_commit_long: git.commit_long,
727 git_branch: git.branch,
728 git_commit_author: git.author,
729 git_tags: git.tags,
730 git_nearest_tag: git.nearest_tag,
731 git_commit_date: git.commit_date,
732 git_remote_url: git.remote_url,
733 }
734}
735
736#[allow(clippy::too_many_lines)]
741pub fn analyze(
742 config: &AppConfig,
743 runtime_mode: &str,
744 cancel: Option<&AtomicBool>,
745) -> Result<AnalysisRun> {
746 config.validate()?;
747
748 if config.discovery.root_paths.is_empty() {
749 anyhow::bail!("no input paths were provided");
750 }
751
752 let include_globs = compile_globset(&config.discovery.include_globs)?;
753 let exclude_globs = compile_globset(&config.discovery.exclude_globs)?;
754 let enabled_languages = parse_enabled_languages(&config.analysis.enabled_languages)?;
755
756 let mut analyzed = Vec::new();
757 let mut skipped = Vec::new();
758 let mut warnings = Vec::new();
759 let mut seen_paths = HashSet::new();
760
761 for root in &config.discovery.root_paths {
762 if cancel.is_some_and(|c| c.load(Ordering::Relaxed)) {
763 anyhow::bail!("analysis cancelled");
764 }
765
766 let root = root.canonicalize().unwrap_or_else(|_| root.clone());
767
768 if root.is_file() {
769 if let Some(record) = analyze_candidate_file(
770 &root,
771 root.parent().unwrap_or_else(|| Path::new(".")),
772 config,
773 include_globs.as_ref(),
774 exclude_globs.as_ref(),
775 enabled_languages.as_ref(),
776 )? {
777 push_record(record, &mut analyzed, &mut skipped, &mut warnings);
778 }
779 continue;
780 }
781
782 walk_root(
783 &root,
784 config,
785 include_globs.as_ref(),
786 exclude_globs.as_ref(),
787 enabled_languages.as_ref(),
788 &mut seen_paths,
789 &mut analyzed,
790 &mut skipped,
791 &mut warnings,
792 cancel,
793 )?;
794 }
795
796 analyzed.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
797 skipped.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
798
799 let submodule_summaries = if config.discovery.submodule_breakdown {
801 process_submodules(config, &mut analyzed)
802 } else {
803 Vec::new()
804 };
805
806 attach_coverage(config, &mut analyzed, &mut warnings);
807
808 Ok(assemble_run(
809 config,
810 runtime_mode,
811 analyzed,
812 skipped,
813 warnings,
814 submodule_summaries,
815 ))
816}
817
818fn attach_coverage(config: &AppConfig, analyzed: &mut [FileRecord], warnings: &mut Vec<String>) {
819 let Some(cov_path) = coverage::resolve_coverage_file(config.analysis.coverage_file.as_deref())
820 else {
821 return;
822 };
823 tracing::debug!(path = %cov_path.display(), "loading coverage file");
824 match fs::read_to_string(&cov_path) {
825 Ok(content) => {
826 let cov_map = coverage::parse_coverage_auto(&cov_path, &content);
827 let mut matched: u32 = 0;
828 let mut unmatched: u32 = 0;
829 for record in analyzed.iter_mut() {
830 record.coverage =
831 coverage::lookup_coverage(&cov_map, &record.relative_path).cloned();
832 if record.coverage.is_some() {
833 matched += 1;
834 } else {
835 unmatched += 1;
836 }
837 }
838 tracing::debug!(
839 path = %cov_path.display(),
840 coverage_entries = cov_map.len(),
841 files_matched = matched,
842 files_unmatched = unmatched,
843 "coverage attached"
844 );
845 if unmatched > 0 && matched == 0 {
846 tracing::warn!(
847 path = %cov_path.display(),
848 "coverage file loaded but no source files could be matched — check that paths in the coverage report match the scanned directory"
849 );
850 }
851 }
852 Err(e) => {
853 tracing::warn!(path = %cov_path.display(), error = %e, "coverage file could not be read");
854 warnings.push(format!(
855 "coverage file '{}' could not be read: {e}",
856 cov_path.display()
857 ));
858 }
859 }
860}
861
862fn push_record(
863 record: FileRecord,
864 analyzed: &mut Vec<FileRecord>,
865 skipped: &mut Vec<FileRecord>,
866 warnings: &mut Vec<String>,
867) {
868 warnings.extend(
869 record
870 .warnings
871 .iter()
872 .map(|warning| format!("{}: {warning}", record.relative_path)),
873 );
874
875 match record.status {
876 FileStatus::AnalyzedExact | FileStatus::AnalyzedBestEffort => analyzed.push(record),
877 _ => skipped.push(record),
878 }
879}
880
881#[inline]
883fn skip_with_reason(
884 path: &Path,
885 root: &Path,
886 size: u64,
887 reason: impl Into<String>,
888) -> MetadataPolicyOutcome {
889 MetadataPolicyOutcome::Skip(Box::new(skipped_record(
890 path,
891 root,
892 size,
893 FileStatus::SkippedByPolicy,
894 vec![reason.into()],
895 )))
896}
897
898#[allow(clippy::too_many_arguments)]
902fn check_metadata_policy(
903 path: &Path,
904 root: &Path,
905 relative_path: &str,
906 metadata: &fs::Metadata,
907 config: &AppConfig,
908 include_globs: Option<&GlobSet>,
909 exclude_globs: Option<&GlobSet>,
910) -> MetadataPolicyOutcome {
911 let size = metadata.len();
912
913 if metadata.file_type().is_symlink() && !config.discovery.follow_symlinks {
914 return skip_with_reason(path, root, size, "symlink skipped by policy");
915 }
916 if file_name_eq(path, ".gitignore") {
917 return skip_with_reason(path, root, size, ".gitignore is always excluded");
918 }
919 if is_excluded_dir_path(path, &config.discovery.excluded_directories) {
920 return skip_with_reason(path, root, size, "path matched excluded directory setting");
921 }
922 if size > config.discovery.max_file_size_bytes {
923 return skip_with_reason(
924 path,
925 root,
926 size,
927 format!(
928 "file exceeded max_file_size_bytes ({})",
929 config.discovery.max_file_size_bytes
930 ),
931 );
932 }
933 if let Some(globs) = include_globs {
934 if !globs.is_match(Path::new(relative_path)) && !globs.is_match(path) {
935 return MetadataPolicyOutcome::Exclude;
936 }
937 }
938 if let Some(globs) = exclude_globs {
939 if globs.is_match(Path::new(relative_path)) || globs.is_match(path) {
940 return skip_with_reason(path, root, size, "path matched exclude glob");
941 }
942 }
943 if is_known_lockfile(path) && !config.analysis.include_lockfiles {
944 return skip_with_reason(path, root, size, "lockfile skipped by default policy");
945 }
946
947 MetadataPolicyOutcome::Continue
948}
949
950struct ContentPolicyResult {
951 vendor: bool,
952 generated: bool,
953 minified: bool,
954 skip_record: Option<FileRecord>,
955}
956
957fn check_content_policy(
960 path: &Path,
961 root: &Path,
962 size_bytes: u64,
963 bytes: &[u8],
964 config: &AppConfig,
965) -> ContentPolicyResult {
966 let vendor = is_vendor_path(path);
967 if vendor && config.analysis.vendor_directory_detection {
968 return ContentPolicyResult {
969 vendor,
970 generated: false,
971 minified: false,
972 skip_record: Some(skipped_record(
973 path,
974 root,
975 size_bytes,
976 FileStatus::SkippedByPolicy,
977 vec!["vendor file skipped by policy".into()],
978 )),
979 };
980 }
981
982 let generated = config.analysis.generated_file_detection && looks_generated(path, bytes);
983 if generated {
984 return ContentPolicyResult {
985 vendor,
986 generated,
987 minified: false,
988 skip_record: Some(skipped_record(
989 path,
990 root,
991 size_bytes,
992 FileStatus::SkippedByPolicy,
993 vec!["generated file skipped by policy".into()],
994 )),
995 };
996 }
997
998 let minified = config.analysis.minified_file_detection && looks_minified(path, bytes);
999 if minified {
1000 return ContentPolicyResult {
1001 vendor,
1002 generated,
1003 minified,
1004 skip_record: Some(skipped_record(
1005 path,
1006 root,
1007 size_bytes,
1008 FileStatus::SkippedByPolicy,
1009 vec!["minified file skipped by policy".into()],
1010 )),
1011 };
1012 }
1013
1014 ContentPolicyResult {
1015 vendor,
1016 generated,
1017 minified,
1018 skip_record: None,
1019 }
1020}
1021
1022fn decode_file_contents(
1024 path: &Path,
1025 root: &Path,
1026 size_bytes: u64,
1027 bytes: &[u8],
1028 config: &AppConfig,
1029) -> Result<Option<(String, String, Vec<String>)>> {
1030 if is_binary(bytes) {
1031 return match config.analysis.binary_file_behavior {
1032 BinaryFileBehavior::Skip => Ok(None),
1033 BinaryFileBehavior::Fail => {
1034 anyhow::bail!("binary file encountered: {}", path.display())
1035 }
1036 };
1037 }
1038
1039 match decode_bytes(bytes) {
1040 Ok(result) => Ok(Some(result)),
1041 Err(err) => match config.analysis.decode_failure_behavior {
1042 FailureBehavior::WarnSkip => {
1043 let _ = (path, root, size_bytes); Err(anyhow::anyhow!("__decode_warn__: {err}"))
1048 }
1049 FailureBehavior::Fail => {
1050 anyhow::bail!("decode failure for {}: {err}", path.display())
1051 }
1052 },
1053 }
1054}
1055
1056#[allow(clippy::too_many_lines)]
1057fn analyze_candidate_file(
1058 path: &Path,
1059 root: &Path,
1060 config: &AppConfig,
1061 include_globs: Option<&GlobSet>,
1062 exclude_globs: Option<&GlobSet>,
1063 enabled_languages: Option<&BTreeSet<Language>>,
1064) -> Result<Option<FileRecord>> {
1065 let metadata = match fs::symlink_metadata(path) {
1066 Ok(metadata) => metadata,
1067 Err(err) => {
1068 return Ok(Some(skipped_record(
1069 path,
1070 root,
1071 0,
1072 FileStatus::ErrorInternal,
1073 vec![format!("failed to read metadata: {err}")],
1074 )));
1075 }
1076 };
1077
1078 let relative_path = relative_path_string(path, root);
1079
1080 match check_metadata_policy(
1082 path,
1083 root,
1084 &relative_path,
1085 &metadata,
1086 config,
1087 include_globs,
1088 exclude_globs,
1089 ) {
1090 MetadataPolicyOutcome::Skip(record) => return Ok(Some(*record)),
1091 MetadataPolicyOutcome::Exclude => return Ok(None),
1092 MetadataPolicyOutcome::Continue => {}
1093 }
1094
1095 let bytes = match fs::read(path) {
1096 Ok(bytes) => bytes,
1097 Err(err) => {
1098 return Ok(Some(skipped_record(
1099 path,
1100 root,
1101 metadata.len(),
1102 FileStatus::ErrorInternal,
1103 vec![format!("failed to read file: {err}")],
1104 )));
1105 }
1106 };
1107
1108 let content_policy = check_content_policy(path, root, metadata.len(), &bytes, config);
1110 if let Some(record) = content_policy.skip_record {
1111 return Ok(Some(record));
1112 }
1113 let (vendor, generated, minified) = (
1114 content_policy.vendor,
1115 content_policy.generated,
1116 content_policy.minified,
1117 );
1118
1119 let (text, encoding, decode_warnings) =
1121 match decode_file_contents(path, root, metadata.len(), &bytes, config) {
1122 Ok(Some(result)) => result,
1123 Ok(None) => {
1124 return Ok(Some(skipped_record(
1125 path,
1126 root,
1127 metadata.len(),
1128 FileStatus::SkippedBinary,
1129 vec!["binary file skipped by default".into()],
1130 )));
1131 }
1132 Err(err) => {
1133 let msg = err.to_string();
1134 if let Some(warn_msg) = msg.strip_prefix("__decode_warn__: ") {
1135 return Ok(Some(skipped_record(
1136 path,
1137 root,
1138 metadata.len(),
1139 FileStatus::SkippedDecodeError,
1140 vec![warn_msg.to_string()],
1141 )));
1142 }
1143 return Err(err);
1144 }
1145 };
1146
1147 let first_line = text.lines().next();
1148 let language = detect_language(
1149 path,
1150 first_line,
1151 &config.analysis.extension_overrides,
1152 config.analysis.shebang_detection,
1153 );
1154
1155 let Some(language) = language else {
1156 return Ok(Some(skipped_record(
1157 path,
1158 root,
1159 metadata.len(),
1160 FileStatus::SkippedUnsupported,
1161 vec!["unsupported or undetected language".into()],
1162 )));
1163 };
1164
1165 if let Some(enabled) = enabled_languages {
1166 if !enabled.contains(&language) {
1167 return Ok(Some(skipped_record(
1168 path,
1169 root,
1170 metadata.len(),
1171 FileStatus::SkippedByPolicy,
1172 vec![format!(
1173 "language {} disabled by configuration",
1174 language.display_name()
1175 )],
1176 )));
1177 }
1178 }
1179
1180 let ieee_opts = AnalysisOptions {
1181 blank_in_block_comment_as_comment: config.analysis.blank_in_block_comment_policy
1182 == BlankInBlockCommentPolicy::CountAsComment,
1183 collapse_continuation_lines: config.analysis.continuation_line_policy
1184 == ContinuationLinePolicy::CollapseToLogical,
1185 };
1186 let analysis = analyze_text(language, &text, ieee_opts);
1187 let effective_counts = compute_effective_counts(
1188 &analysis.raw,
1189 config.analysis.mixed_line_policy,
1190 config.analysis.python_docstrings_as_comments,
1191 config.analysis.count_compiler_directives,
1192 );
1193
1194 let mut warnings = decode_warnings;
1195 warnings.extend(analysis.warnings.clone());
1196
1197 Ok(Some(FileRecord {
1198 path: path_to_string(path),
1199 relative_path,
1200 language: Some(language),
1201 size_bytes: metadata.len(),
1202 detected_encoding: Some(encoding),
1203 raw_line_categories: analysis.raw,
1204 effective_counts,
1205 status: match analysis.parse_mode {
1206 ParseMode::Lexical | ParseMode::TreeSitter => FileStatus::AnalyzedExact,
1207 ParseMode::LexicalBestEffort => FileStatus::AnalyzedBestEffort,
1208 },
1209 warnings,
1210 generated,
1211 minified,
1212 vendor,
1213 parse_mode: Some(analysis.parse_mode),
1214 submodule: None,
1215 coverage: None,
1216 }))
1217}
1218
1219const fn compute_effective_counts(
1220 raw: &RawLineCounts,
1221 mixed_line_policy: MixedLinePolicy,
1222 python_docstrings_as_comments: bool,
1223 count_compiler_directives: bool,
1224) -> EffectiveCounts {
1225 let mut effective = EffectiveCounts {
1226 code_lines: raw.code_only_lines,
1227 comment_lines: raw.single_comment_only_lines + raw.multi_comment_only_lines,
1228 blank_lines: raw.blank_only_lines,
1229 mixed_lines_separate: 0,
1230 };
1231
1232 if python_docstrings_as_comments {
1233 effective.comment_lines += raw.docstring_comment_lines;
1234 } else {
1235 effective.code_lines += raw.docstring_comment_lines;
1236 }
1237
1238 let mixed_total = raw.mixed_code_single_comment_lines + raw.mixed_code_multi_comment_lines;
1239 match mixed_line_policy {
1240 MixedLinePolicy::CodeOnly => effective.code_lines += mixed_total,
1241 MixedLinePolicy::CodeAndComment => {
1242 effective.code_lines += mixed_total;
1243 effective.comment_lines += mixed_total;
1244 }
1245 MixedLinePolicy::CommentOnly => effective.comment_lines += mixed_total,
1246 MixedLinePolicy::SeparateMixedCategory => effective.mixed_lines_separate += mixed_total,
1247 }
1248
1249 if !count_compiler_directives {
1252 effective.code_lines = effective
1253 .code_lines
1254 .saturating_sub(raw.compiler_directive_lines);
1255 }
1256
1257 effective
1258}
1259
1260fn build_summary(analyzed: &[FileRecord], skipped: &[FileRecord]) -> SummaryTotals {
1261 let mut summary = SummaryTotals {
1262 files_considered: (analyzed.len() + skipped.len()) as u64,
1263 files_analyzed: analyzed.len() as u64,
1264 files_skipped: skipped.len() as u64,
1265 ..Default::default()
1266 };
1267
1268 for record in analyzed {
1269 summary.total_physical_lines += record.raw_line_categories.total_physical_lines;
1270 summary.code_lines += record.effective_counts.code_lines;
1271 summary.comment_lines += record.effective_counts.comment_lines;
1272 summary.blank_lines += record.effective_counts.blank_lines;
1273 summary.mixed_lines_separate += record.effective_counts.mixed_lines_separate;
1274 summary.functions += record.raw_line_categories.functions;
1275 summary.classes += record.raw_line_categories.classes;
1276 summary.variables += record.raw_line_categories.variables;
1277 summary.imports += record.raw_line_categories.imports;
1278 summary.test_count += record.raw_line_categories.test_count;
1279 summary.test_assertion_count += record.raw_line_categories.test_assertion_count;
1280 summary.test_suite_count += record.raw_line_categories.test_suite_count;
1281 if let Some(cov) = &record.coverage {
1282 summary.coverage_lines_found += u64::from(cov.lines_found);
1283 summary.coverage_lines_hit += u64::from(cov.lines_hit);
1284 summary.coverage_functions_found += u64::from(cov.functions_found);
1285 summary.coverage_functions_hit += u64::from(cov.functions_hit);
1286 summary.coverage_branches_found += u64::from(cov.branches_found);
1287 summary.coverage_branches_hit += u64::from(cov.branches_hit);
1288 }
1289 }
1290
1291 summary
1292}
1293
1294const fn zeroed_summary(language: Language) -> LanguageSummary {
1296 LanguageSummary {
1297 language,
1298 files: 0,
1299 total_physical_lines: 0,
1300 code_lines: 0,
1301 comment_lines: 0,
1302 blank_lines: 0,
1303 mixed_lines_separate: 0,
1304 functions: 0,
1305 classes: 0,
1306 variables: 0,
1307 imports: 0,
1308 test_count: 0,
1309 test_assertion_count: 0,
1310 test_suite_count: 0,
1311 coverage_lines_found: 0,
1312 coverage_lines_hit: 0,
1313 coverage_functions_found: 0,
1314 coverage_functions_hit: 0,
1315 coverage_branches_found: 0,
1316 coverage_branches_hit: 0,
1317 }
1318}
1319
1320fn accumulate_record_into_summary(entry: &mut LanguageSummary, record: &FileRecord) {
1322 entry.files += 1;
1323 let r = &record.raw_line_categories;
1324 entry.total_physical_lines += r.total_physical_lines;
1325 entry.code_lines += record.effective_counts.code_lines;
1326 entry.comment_lines += record.effective_counts.comment_lines;
1327 entry.blank_lines += record.effective_counts.blank_lines;
1328 entry.mixed_lines_separate += record.effective_counts.mixed_lines_separate;
1329 entry.functions += r.functions;
1330 entry.classes += r.classes;
1331 entry.variables += r.variables;
1332 entry.imports += r.imports;
1333 entry.test_count += r.test_count;
1334 entry.test_assertion_count += r.test_assertion_count;
1335 entry.test_suite_count += r.test_suite_count;
1336 if let Some(cov) = &record.coverage {
1337 entry.coverage_lines_found += u64::from(cov.lines_found);
1338 entry.coverage_lines_hit += u64::from(cov.lines_hit);
1339 entry.coverage_functions_found += u64::from(cov.functions_found);
1340 entry.coverage_functions_hit += u64::from(cov.functions_hit);
1341 entry.coverage_branches_found += u64::from(cov.branches_found);
1342 entry.coverage_branches_hit += u64::from(cov.branches_hit);
1343 }
1344}
1345
1346fn build_language_summaries(analyzed: &[FileRecord]) -> Vec<LanguageSummary> {
1347 let mut by_language: BTreeMap<Language, LanguageSummary> = BTreeMap::new();
1348 for record in analyzed {
1349 let Some(language) = record.language else {
1350 continue;
1351 };
1352 let entry = by_language
1353 .entry(language)
1354 .or_insert_with(|| zeroed_summary(language));
1355 accumulate_record_into_summary(entry, record);
1356 }
1357 by_language.into_values().collect()
1358}
1359
1360fn skipped_record(
1361 path: &Path,
1362 root: &Path,
1363 size_bytes: u64,
1364 status: FileStatus,
1365 warnings: Vec<String>,
1366) -> FileRecord {
1367 FileRecord {
1368 path: path_to_string(path),
1369 relative_path: relative_path_string(path, root),
1370 language: None,
1371 size_bytes,
1372 detected_encoding: None,
1373 raw_line_categories: RawLineCounts::default(),
1374 effective_counts: EffectiveCounts::default(),
1375 status,
1376 warnings,
1377 generated: false,
1378 minified: false,
1379 vendor: false,
1380 parse_mode: None,
1381 submodule: None,
1382 coverage: None,
1383 }
1384}
1385
1386fn relative_path_string(path: &Path, root: &Path) -> String {
1387 path.strip_prefix(root)
1388 .unwrap_or(path)
1389 .to_string_lossy()
1390 .replace('\\', "/")
1391}
1392
1393fn path_to_string(path: &Path) -> String {
1394 path.to_string_lossy().replace('\\', "/")
1395}
1396
1397#[must_use]
1399pub fn detect_submodules(root: &Path) -> Vec<(String, PathBuf)> {
1400 let gitmodules = root.join(".gitmodules");
1401 if !gitmodules.is_file() {
1402 return Vec::new();
1403 }
1404 let Ok(content) = fs::read_to_string(&gitmodules) else {
1405 return Vec::new();
1406 };
1407
1408 let mut result = Vec::new();
1409 let mut current_name: Option<String> = None;
1410 let mut current_path: Option<PathBuf> = None;
1411
1412 for line in content.lines() {
1413 let trimmed = line.trim();
1414 if trimmed.starts_with("[submodule \"") && trimmed.ends_with("\"]") {
1415 if let (Some(name), Some(path)) = (current_name.take(), current_path.take()) {
1416 result.push((name, path));
1417 }
1418 let name = trimmed["[submodule \"".len()..trimmed.len() - 2].to_string();
1419 current_name = Some(name);
1420 } else if let Some(rest) = trimmed.strip_prefix("path") {
1421 if let Some(eq_pos) = rest.find('=') {
1422 let path_str = rest[eq_pos + 1..].trim();
1423 current_path = Some(PathBuf::from(path_str));
1424 }
1425 }
1426 }
1427 if let (Some(name), Some(path)) = (current_name, current_path) {
1428 result.push((name, path));
1429 }
1430
1431 result
1432}
1433
1434fn build_submodule_summaries(
1435 analyzed: &[FileRecord],
1436 submodules: &[(String, PathBuf)],
1437) -> Vec<SubmoduleSummary> {
1438 submodules
1439 .iter()
1440 .map(|(name, path)| {
1441 let files: Vec<&FileRecord> = analyzed
1442 .iter()
1443 .filter(|f| f.submodule.as_deref() == Some(name.as_str()))
1444 .collect();
1445
1446 let files_analyzed = files.len() as u64;
1447 let total_physical_lines = files
1448 .iter()
1449 .map(|f| f.raw_line_categories.total_physical_lines)
1450 .sum();
1451 let code_lines = files.iter().map(|f| f.effective_counts.code_lines).sum();
1452 let comment_lines = files.iter().map(|f| f.effective_counts.comment_lines).sum();
1453 let blank_lines = files.iter().map(|f| f.effective_counts.blank_lines).sum();
1454 let language_summaries = build_language_summaries_from_slice(&files);
1455
1456 SubmoduleSummary {
1457 name: name.clone(),
1458 relative_path: path.to_string_lossy().replace('\\', "/"),
1459 files_analyzed,
1460 total_physical_lines,
1461 code_lines,
1462 comment_lines,
1463 blank_lines,
1464 language_summaries,
1465 }
1466 })
1467 .filter(|s| s.files_analyzed > 0)
1468 .collect()
1469}
1470
1471fn build_language_summaries_from_slice(files: &[&FileRecord]) -> Vec<LanguageSummary> {
1472 let mut map: BTreeMap<String, LanguageSummary> = BTreeMap::new();
1473 for file in files {
1474 let Some(lang) = file.language else { continue };
1475 let entry = map
1476 .entry(lang.display_name().to_string())
1477 .or_insert_with(|| zeroed_summary(lang));
1478 accumulate_record_into_summary(entry, file);
1479 }
1480 map.into_values().collect()
1481}
1482
1483fn file_name_eq(path: &Path, expected: &str) -> bool {
1484 path.file_name()
1485 .and_then(|name| name.to_str())
1486 .is_some_and(|name| name == expected)
1487}
1488
1489fn is_excluded_dir_path(path: &Path, excluded_dirs: &[String]) -> bool {
1490 path.components().any(|component| {
1491 component
1492 .as_os_str()
1493 .to_str()
1494 .is_some_and(|part| excluded_dirs.iter().any(|excluded| excluded == part))
1495 })
1496}
1497
1498fn is_vendor_path(path: &Path) -> bool {
1499 path.components().any(|component| {
1500 component
1501 .as_os_str()
1502 .to_str()
1503 .is_some_and(|part| matches!(part, "vendor" | "node_modules" | "packages"))
1504 })
1505}
1506
1507fn is_known_lockfile(path: &Path) -> bool {
1508 path.file_name()
1509 .and_then(|name| name.to_str())
1510 .is_some_and(|name| {
1511 matches!(
1512 name,
1513 "Cargo.lock"
1514 | "package-lock.json"
1515 | "yarn.lock"
1516 | "pnpm-lock.yaml"
1517 | "Pipfile.lock"
1518 | "poetry.lock"
1519 | "composer.lock"
1520 )
1521 })
1522}
1523
1524fn looks_generated(path: &Path, bytes: &[u8]) -> bool {
1525 let file_name = path
1526 .file_name()
1527 .and_then(|name| name.to_str())
1528 .unwrap_or_default();
1529 if file_name.contains(".generated.") || file_name.contains(".g.") {
1530 return true;
1531 }
1532
1533 let sample = String::from_utf8_lossy(&bytes[..bytes.len().min(GENERATED_SAMPLE_BYTES)])
1534 .to_ascii_lowercase();
1535 sample.contains("@generated") || sample.contains("generated by")
1536}
1537
1538fn looks_minified(path: &Path, bytes: &[u8]) -> bool {
1539 let file_name = path
1540 .file_name()
1541 .and_then(|name| name.to_str())
1542 .unwrap_or_default();
1543 if file_name.contains(".min.") {
1544 return true;
1545 }
1546
1547 let sample = String::from_utf8_lossy(&bytes[..bytes.len().min(MINIFIED_SAMPLE_BYTES)]);
1548 let longest_line = sample.lines().map(str::len).max().unwrap_or(0);
1549 let whitespace = sample.chars().filter(|c| c.is_whitespace()).count();
1550 longest_line > MINIFIED_LINE_THRESHOLD && whitespace * 100 < sample.len().max(1)
1551}
1552
1553fn is_binary(bytes: &[u8]) -> bool {
1554 if bytes.starts_with(&[0xEF, 0xBB, 0xBF])
1555 || bytes.starts_with(&[0xFF, 0xFE])
1556 || bytes.starts_with(&[0xFE, 0xFF])
1557 {
1558 return false;
1559 }
1560
1561 let sample = &bytes[..bytes.len().min(BINARY_SAMPLE_BYTES)];
1562 sample.contains(&0)
1563}
1564
1565fn decode_utf16_bom(
1568 bom_stripped: &[u8],
1569 encoding: &'static encoding_rs::Encoding,
1570 label: &str,
1571) -> (String, String, Vec<String>) {
1572 let (cow, _, had_errors) = encoding.decode(bom_stripped);
1573 let mut warnings = Vec::new();
1574 if had_errors {
1575 warnings.push(format!("{label} decode contained replacement characters"));
1576 }
1577 (cow.into_owned(), label.into(), warnings)
1578}
1579
1580fn decode_bytes(bytes: &[u8]) -> std::result::Result<(String, String, Vec<String>), String> {
1581 if bytes.starts_with(&[0xEF, 0xBB, 0xBF]) {
1582 let text = String::from_utf8(bytes[3..].to_vec()).map_err(|err| err.to_string())?;
1583 return Ok((text, "utf-8-bom".into(), vec![]));
1584 }
1585 if bytes.starts_with(&[0xFF, 0xFE]) {
1586 return Ok(decode_utf16_bom(&bytes[2..], UTF_16LE, "utf-16le"));
1587 }
1588 if bytes.starts_with(&[0xFE, 0xFF]) {
1589 return Ok(decode_utf16_bom(&bytes[2..], UTF_16BE, "utf-16be"));
1590 }
1591
1592 #[allow(clippy::option_if_let_else)]
1594 if let Ok(text) = String::from_utf8(bytes.to_vec()) {
1595 Ok((text, "utf-8".into(), vec![]))
1596 } else {
1597 let (cow, _, had_errors) = WINDOWS_1252.decode(bytes);
1598 let mut warnings = vec!["decoded using windows-1252 fallback".into()];
1599 if had_errors {
1600 warnings.push("fallback decode contained replacement characters".into());
1601 }
1602 Ok((cow.into_owned(), "windows-1252".into(), warnings))
1603 }
1604}
1605
1606fn compile_globset(patterns: &[String]) -> Result<Option<GlobSet>> {
1607 if patterns.is_empty() {
1608 return Ok(None);
1609 }
1610
1611 let mut builder = GlobSetBuilder::new();
1612 for pattern in patterns {
1613 builder
1614 .add(Glob::new(pattern).with_context(|| format!("invalid glob pattern: {pattern}"))?);
1615 }
1616 Ok(Some(
1617 builder.build().context("failed to compile glob filters")?,
1618 ))
1619}
1620
1621fn parse_enabled_languages(enabled: &[String]) -> Result<Option<BTreeSet<Language>>> {
1622 if enabled.is_empty() {
1623 return Ok(None);
1624 }
1625
1626 let supported = supported_languages();
1627 let mut set = BTreeSet::new();
1628 for name in enabled {
1629 let language = Language::from_name(name)
1630 .with_context(|| format!("unsupported language in config: {name}"))?;
1631 if !supported.contains(&language) {
1632 anyhow::bail!("language {name} is not supported in this build");
1633 }
1634 set.insert(language);
1635 }
1636 Ok(Some(set))
1637}
1638
1639pub fn write_json(run: &AnalysisRun, output_path: &Path) -> Result<()> {
1643 let json = serde_json::to_string_pretty(run).context("failed to serialize analysis run")?;
1644 fs::write(output_path, json)
1645 .with_context(|| format!("failed to write JSON output to {}", output_path.display()))
1646}
1647
1648pub fn read_json(path: &Path) -> Result<AnalysisRun> {
1652 let contents = fs::read_to_string(path)
1653 .with_context(|| format!("failed to read result file {}", path.display()))?;
1654 serde_json::from_str(&contents)
1655 .with_context(|| format!("failed to parse JSON result {}", path.display()))
1656}
1657
1658#[cfg(test)]
1659mod tests {
1660 use super::*;
1661
1662 #[test]
1663 fn effective_counts_respect_code_only_policy() {
1664 let raw = RawLineCounts {
1665 code_only_lines: 2,
1666 single_comment_only_lines: 1,
1667 mixed_code_single_comment_lines: 3,
1668 docstring_comment_lines: 2,
1669 ..RawLineCounts::default()
1670 };
1671 let counts = compute_effective_counts(&raw, MixedLinePolicy::CodeOnly, true, true);
1672 assert_eq!(counts.code_lines, 5);
1673 assert_eq!(counts.comment_lines, 3);
1674 }
1675
1676 #[test]
1677 fn effective_counts_can_separate_mixed() {
1678 let raw = RawLineCounts {
1679 mixed_code_single_comment_lines: 2,
1680 mixed_code_multi_comment_lines: 1,
1681 ..RawLineCounts::default()
1682 };
1683 let counts =
1684 compute_effective_counts(&raw, MixedLinePolicy::SeparateMixedCategory, true, true);
1685 assert_eq!(counts.mixed_lines_separate, 3);
1686 assert_eq!(counts.code_lines, 0);
1687 assert_eq!(counts.comment_lines, 0);
1688 }
1689
1690 #[test]
1691 fn windows_1252_fallback_decodes() {
1692 let bytes = vec![0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x96, 0x57];
1693 let (text, encoding, warnings) = decode_bytes(&bytes).unwrap();
1694 assert_eq!(encoding, "windows-1252");
1695 assert!(text.contains('–'));
1696 assert!(!warnings.is_empty());
1697 }
1698}