1#![allow(clippy::multiple_crate_versions)]
4
5pub mod baseline;
6pub mod coverage;
7pub mod delta;
8pub mod history;
9pub use baseline::{check_against_baseline, resolve_baselines_path, BaselineEntry, BaselineStore};
10pub use coverage::{aggregate_line_coverage, lookup_coverage, parse_lcov, FileCoverage};
11pub use delta::{compute_delta, FileChangeStatus, FileDelta, ScanComparison, SummaryDelta};
12pub use history::{
13 CleanupPolicy, CleanupPolicyStore, RegistryEntry, ScanRegistry, ScanSummarySnapshot,
14 WatchedDirsStore,
15};
16
17use std::collections::{BTreeMap, BTreeSet, HashSet};
18use std::fs;
19use std::path::{Path, PathBuf};
20use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
21use std::sync::Arc;
22
23use anyhow::{Context, Result};
24use chrono::{DateTime, Utc};
25use encoding_rs::{UTF_16BE, UTF_16LE, WINDOWS_1252};
26use globset::{Glob, GlobSet, GlobSetBuilder};
27use ignore::WalkBuilder;
28use serde::{Deserialize, Serialize};
29use uuid::Uuid;
30
31use sloc_config::{
32 AppConfig, BinaryFileBehavior, BlankInBlockCommentPolicy, ContinuationLinePolicy,
33 FailureBehavior, MixedLinePolicy,
34};
35use sloc_languages::style::IndentStyle;
36use sloc_languages::{
37 analyze_text, detect_language, supported_languages, AnalysisOptions, Language, ParseMode,
38 RawLineCounts, StyleAnalysis, StyleLangScope,
39};
40
41const MAX_ANALYSIS_THREADS: usize = 16;
45const DEFAULT_ANALYSIS_THREADS: usize = 4;
47const GENERATED_SAMPLE_BYTES: usize = 1024;
49const MINIFIED_SAMPLE_BYTES: usize = 4096;
51const MINIFIED_LINE_THRESHOLD: usize = 2000;
53const BINARY_SAMPLE_BYTES: usize = 8192;
55
56pub struct ProgressCounters {
58 pub files_done: Arc<AtomicUsize>,
60 pub files_total: Arc<AtomicUsize>,
62}
63
64enum MetadataPolicyOutcome {
66 Skip(Box<FileRecord>),
68 Exclude,
70 Continue,
72}
73
74#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
75#[serde(rename_all = "snake_case")]
76pub enum FileStatus {
77 AnalyzedExact,
78 AnalyzedBestEffort,
79 SkippedBinary,
80 SkippedDecodeError,
81 SkippedUnsupported,
82 SkippedByPolicy,
83 ErrorInternal,
84}
85
86#[derive(Debug, Clone, Serialize, Deserialize, Default)]
87pub struct EffectiveCounts {
88 pub code_lines: u64,
89 pub comment_lines: u64,
90 pub blank_lines: u64,
91 pub mixed_lines_separate: u64,
92}
93
94#[derive(Debug, Clone, Serialize, Deserialize)]
95pub struct ToolMetadata {
96 pub name: String,
97 pub version: String,
98 pub run_id: String,
99 pub timestamp_utc: DateTime<Utc>,
100}
101
102#[derive(Debug, Clone, Serialize, Deserialize)]
103pub struct EnvironmentMetadata {
104 pub operating_system: String,
105 pub architecture: String,
106 pub runtime_mode: String,
107 pub initiator_username: String,
108 pub initiator_hostname: String,
109 #[serde(default, skip_serializing_if = "Option::is_none")]
112 pub ci_name: Option<String>,
113}
114
115#[derive(Debug, Clone, Serialize, Deserialize, Default)]
116pub struct SummaryTotals {
117 pub files_considered: u64,
118 pub files_analyzed: u64,
119 pub files_skipped: u64,
120 pub total_physical_lines: u64,
121 pub code_lines: u64,
122 pub comment_lines: u64,
123 pub blank_lines: u64,
124 pub mixed_lines_separate: u64,
125 #[serde(default)]
126 pub functions: u64,
127 #[serde(default)]
128 pub classes: u64,
129 #[serde(default)]
130 pub variables: u64,
131 #[serde(default)]
132 pub imports: u64,
133 #[serde(default)]
134 pub test_count: u64,
135 #[serde(default)]
137 pub test_assertion_count: u64,
138 #[serde(default)]
140 pub test_suite_count: u64,
141 #[serde(default)]
143 pub coverage_lines_found: u64,
144 #[serde(default)]
145 pub coverage_lines_hit: u64,
146 #[serde(default)]
147 pub coverage_functions_found: u64,
148 #[serde(default)]
149 pub coverage_functions_hit: u64,
150 #[serde(default)]
151 pub coverage_branches_found: u64,
152 #[serde(default)]
153 pub coverage_branches_hit: u64,
154}
155
156#[derive(Debug, Clone, Serialize, Deserialize)]
157pub struct LanguageSummary {
158 pub language: Language,
159 pub files: u64,
160 pub total_physical_lines: u64,
161 pub code_lines: u64,
162 pub comment_lines: u64,
163 pub blank_lines: u64,
164 pub mixed_lines_separate: u64,
165 #[serde(default)]
166 pub functions: u64,
167 #[serde(default)]
168 pub classes: u64,
169 #[serde(default)]
170 pub variables: u64,
171 #[serde(default)]
172 pub imports: u64,
173 #[serde(default)]
174 pub test_count: u64,
175 #[serde(default)]
176 pub test_assertion_count: u64,
177 #[serde(default)]
178 pub test_suite_count: u64,
179 #[serde(default)]
180 pub coverage_lines_found: u64,
181 #[serde(default)]
182 pub coverage_lines_hit: u64,
183 #[serde(default)]
184 pub coverage_functions_found: u64,
185 #[serde(default)]
186 pub coverage_functions_hit: u64,
187 #[serde(default)]
188 pub coverage_branches_found: u64,
189 #[serde(default)]
190 pub coverage_branches_hit: u64,
191}
192
193#[derive(Debug, Clone, Serialize, Deserialize)]
194pub struct FileRecord {
195 pub path: String,
196 pub relative_path: String,
197 pub language: Option<Language>,
198 pub size_bytes: u64,
199 pub detected_encoding: Option<String>,
200 pub raw_line_categories: RawLineCounts,
201 pub effective_counts: EffectiveCounts,
202 pub status: FileStatus,
203 pub warnings: Vec<String>,
204 pub generated: bool,
205 pub minified: bool,
206 pub vendor: bool,
207 pub parse_mode: Option<ParseMode>,
208 #[serde(skip_serializing_if = "Option::is_none")]
209 pub submodule: Option<String>,
210 #[serde(default, skip_serializing_if = "Option::is_none")]
212 pub coverage: Option<FileCoverage>,
213 #[serde(default, skip_serializing_if = "Option::is_none")]
215 pub style_analysis: Option<StyleAnalysis>,
216}
217
218#[derive(Debug, Clone, Serialize, Deserialize)]
220pub struct LanguageStyleGroup {
221 pub language_family: String,
223 pub files_count: u32,
225 pub dominant_guide: String,
227 pub dominant_score_pct: u8,
229 pub common_indent_style: String,
231 pub guide_avg_scores: Vec<(String, u8)>,
233 pub line80_compliant_pct: u8,
235 pub line_col_compliant_pct: u8,
237}
238
239#[derive(Debug, Clone, Serialize, Deserialize)]
241pub struct StyleSummary {
242 pub files_analyzed: u32,
244 pub common_indent_style: String,
246 pub line80_compliant_pct: u8,
248 pub line_col_compliant_pct: u8,
250 pub col_threshold: u16,
252 pub by_language: Vec<LanguageStyleGroup>,
254}
255
256pub type CppStyleSummary = StyleSummary;
259
260#[derive(Debug, Clone, Serialize, Deserialize)]
262pub struct SubmoduleSummary {
263 pub name: String,
264 pub relative_path: String,
265 pub files_analyzed: u64,
266 pub total_physical_lines: u64,
267 pub code_lines: u64,
268 pub comment_lines: u64,
269 pub blank_lines: u64,
270 pub language_summaries: Vec<LanguageSummary>,
271 #[serde(default, skip_serializing_if = "Option::is_none")]
273 pub git_commit_short: Option<String>,
274 #[serde(default, skip_serializing_if = "Option::is_none")]
276 pub git_commit_long: Option<String>,
277 #[serde(default, skip_serializing_if = "Option::is_none")]
279 pub git_branch: Option<String>,
280 #[serde(default, skip_serializing_if = "Option::is_none")]
282 pub git_commit_author: Option<String>,
283 #[serde(default, skip_serializing_if = "Option::is_none")]
285 pub git_commit_date: Option<String>,
286 #[serde(default, skip_serializing_if = "Option::is_none")]
288 pub git_remote_url: Option<String>,
289}
290
291#[derive(Debug, Clone, Serialize, Deserialize)]
292pub struct AnalysisRun {
293 pub tool: ToolMetadata,
294 pub environment: EnvironmentMetadata,
295 pub effective_configuration: AppConfig,
296 pub input_roots: Vec<String>,
297 pub summary_totals: SummaryTotals,
298 pub totals_by_language: Vec<LanguageSummary>,
299 pub per_file_records: Vec<FileRecord>,
300 pub skipped_file_records: Vec<FileRecord>,
301 pub warnings: Vec<String>,
302 #[serde(default, skip_serializing_if = "Vec::is_empty")]
304 pub submodule_summaries: Vec<SubmoduleSummary>,
305 #[serde(default, skip_serializing_if = "Option::is_none")]
307 pub git_commit_short: Option<String>,
308 #[serde(default, skip_serializing_if = "Option::is_none")]
310 pub git_commit_long: Option<String>,
311 #[serde(default, skip_serializing_if = "Option::is_none")]
313 pub git_branch: Option<String>,
314 #[serde(default, skip_serializing_if = "Option::is_none")]
316 pub git_commit_author: Option<String>,
317 #[serde(default, skip_serializing_if = "Option::is_none")]
319 pub git_tags: Option<String>,
320 #[serde(default, skip_serializing_if = "Option::is_none")]
322 pub git_nearest_tag: Option<String>,
323 #[serde(default, skip_serializing_if = "Option::is_none")]
325 pub git_commit_date: Option<String>,
326 #[serde(default, skip_serializing_if = "Option::is_none")]
328 pub git_remote_url: Option<String>,
329 #[serde(default, skip_serializing_if = "Option::is_none")]
331 pub style_summary: Option<StyleSummary>,
332}
333
334#[derive(Default)]
335struct GitInfo {
336 commit_short: Option<String>,
337 commit_long: Option<String>,
338 branch: Option<String>,
339 author: Option<String>,
340 tags: Option<String>,
341 nearest_tag: Option<String>,
342 commit_date: Option<String>,
343 remote_url: Option<String>,
344}
345
346fn find_git_dir(start: &Path) -> Option<PathBuf> {
350 let mut current = Some(start);
351 while let Some(dir) = current {
352 let candidate = dir.join(".git");
353 if candidate.is_dir() {
354 return Some(candidate);
355 }
356 if candidate.is_file() {
357 if let Some(resolved) = resolve_git_file_pointer(&candidate, dir) {
358 return Some(resolved);
359 }
360 }
361 current = dir.parent();
362 }
363 None
364}
365
366fn resolve_git_file_pointer(file: &Path, base_dir: &Path) -> Option<PathBuf> {
370 let content = fs::read_to_string(file).ok()?;
371 let ptr = content.trim().strip_prefix("gitdir: ")?;
372 let ptr_native = ptr.replace('/', std::path::MAIN_SEPARATOR_STR);
375 let resolved = if Path::new(&ptr_native).is_absolute() {
376 PathBuf::from(&ptr_native)
377 } else {
378 base_dir.join(&ptr_native)
379 };
380 let final_path = resolved.canonicalize().unwrap_or(resolved);
384 if final_path.is_dir() {
385 Some(final_path)
386 } else {
387 None
388 }
389}
390
391fn resolve_ref(git_dir: &Path, refname: &str) -> Option<String> {
394 let ref_path = refname
398 .split('/')
399 .fold(git_dir.to_path_buf(), |p, c| p.join(c));
400 if ref_path.exists() {
401 let sha = fs::read_to_string(&ref_path)
402 .ok()
403 .map(|s| s.trim().to_string())
404 .filter(|s| s.len() >= 40 && s.chars().all(|c| c.is_ascii_hexdigit()));
405 if sha.is_some() {
406 return sha;
407 }
408 }
409 let packed = fs::read_to_string(git_dir.join("packed-refs")).ok()?;
413 for line in packed.lines() {
414 if line.starts_with('#') || line.starts_with('^') {
415 continue;
416 }
417 let mut cols = line.splitn(2, ' ');
418 let sha = cols.next()?;
419 let name = cols.next()?.trim();
420 if name == refname {
421 return Some(sha.to_string());
422 }
423 }
424 None
425}
426
427fn parse_url_line(line: &str) -> Option<&str> {
429 let rest = line.strip_prefix("url")?;
430 let rest = rest.trim_start_matches([' ', '\t']);
431 let url = rest.strip_prefix('=')?.trim();
432 if url.is_empty() {
433 None
434 } else {
435 Some(url)
436 }
437}
438
439fn read_git_remote_url(git_dir: &Path) -> Option<String> {
441 let config = fs::read_to_string(git_dir.join("config")).ok()?;
442 let mut in_origin = false;
443 for line in config.lines() {
444 let trimmed = line.trim();
445 if trimmed.starts_with('[') {
446 in_origin = trimmed == r#"[remote "origin"]"#;
447 } else if in_origin {
448 if let Some(url) = parse_url_line(trimmed) {
449 return Some(url.to_owned());
450 }
451 }
452 }
453 None
454}
455
456fn detect_git_for_run(project_path: &Path) -> GitInfo {
460 let ci_branch = ci_branch_from_env();
462
463 let Some(git_dir) = find_git_dir(project_path) else {
464 return GitInfo {
467 branch: ci_branch,
468 ..GitInfo::default()
469 };
470 };
471
472 let head_raw = match fs::read_to_string(git_dir.join("HEAD")) {
473 Ok(s) => s.trim().to_string(),
474 Err(_) => {
475 return GitInfo {
476 branch: ci_branch,
477 ..GitInfo::default()
478 }
479 }
480 };
481
482 let (branch_from_head, commit_long) = head_raw.strip_prefix("ref: ").map_or_else(
483 || {
484 if head_raw.len() >= 40 && head_raw.chars().all(|c| c.is_ascii_hexdigit()) {
485 (None, Some(head_raw[..40].to_string()))
487 } else {
488 (None, None)
489 }
490 },
491 |refname| {
492 let branch = refname
493 .strip_prefix("refs/heads/")
494 .map(|b| b.trim().to_string());
495 let sha = resolve_ref(&git_dir, refname.trim());
496 (branch, sha)
497 },
498 );
499 let branch = branch_from_head.or(ci_branch);
502
503 let commit_short = commit_long
504 .as_deref()
505 .map(|s| s.chars().take(7).collect::<String>());
506
507 let author = run_git_cmd(project_path, &["log", "-1", "--format=%an", "HEAD"]);
508 let commit_date = run_git_cmd(project_path, &["log", "-1", "--format=%aI", "HEAD"]);
509 let remote_url = read_git_remote_url(&git_dir);
510
511 let tags = run_git_cmd(project_path, &["tag", "--points-at", "HEAD"]).map(|t| {
514 t.lines()
515 .filter(|l| !l.is_empty())
516 .collect::<Vec<_>>()
517 .join(", ")
518 });
519 let nearest_tag = run_git_cmd(project_path, &["describe", "--tags", "--abbrev=0", "HEAD"]);
520
521 GitInfo {
522 commit_short,
523 commit_long,
524 branch,
525 author,
526 tags,
527 nearest_tag,
528 commit_date,
529 remote_url,
530 }
531}
532
533fn run_git_cmd(dir: &Path, args: &[&str]) -> Option<String> {
535 let candidates: &[&str] = &[
539 "git",
541 "/usr/bin/git",
543 "/usr/local/bin/git",
544 "/opt/homebrew/bin/git",
545 r"C:\Program Files\Git\cmd\git.exe",
547 r"C:\Program Files\Git\bin\git.exe",
548 r"C:\Program Files (x86)\Git\cmd\git.exe",
549 ];
550 for &exe in candidates {
551 let result = std::process::Command::new(exe)
552 .args(["-c", "safe.directory=*"])
553 .args(args)
554 .current_dir(dir)
555 .output()
556 .ok()
557 .filter(|o| o.status.success())
558 .and_then(|o| String::from_utf8(o.stdout).ok())
559 .map(|s| s.trim().to_string())
560 .filter(|s| !s.is_empty());
561 if result.is_some() {
562 return result;
563 }
564 }
565 None
566}
567
568fn detect_ci_system() -> Option<&'static str> {
570 let ev = |k: &str| std::env::var(k).is_ok();
571 let ev_true = |k: &str| std::env::var(k).as_deref() == Ok("true");
572 if ev("JENKINS_URL") || ev("JENKINS_HOME") || ev("BUILD_URL") {
573 return Some("Jenkins");
574 }
575 if ev_true("GITHUB_ACTIONS") {
576 return Some("GitHub Actions");
577 }
578 if ev_true("GITLAB_CI") {
579 return Some("GitLab CI");
580 }
581 if ev_true("CIRCLECI") {
582 return Some("CircleCI");
583 }
584 if ev_true("TRAVIS") {
585 return Some("Travis CI");
586 }
587 if ev_true("TF_BUILD") {
588 return Some("Azure DevOps");
589 }
590 if ev("TEAMCITY_VERSION") {
591 return Some("TeamCity");
592 }
593 None
594}
595
596fn ci_branch_from_env() -> Option<String> {
599 const VARS: &[&str] = &[
600 "BRANCH_NAME", "GIT_BRANCH", "GITHUB_REF_NAME", "CI_COMMIT_BRANCH", "CIRCLE_BRANCH", "TRAVIS_BRANCH", "BUILD_SOURCEBRANCH", ];
608 for &var in VARS {
609 if let Ok(val) = std::env::var(var) {
610 let val = val.trim();
611 let val = val
612 .strip_prefix("refs/heads/")
613 .or_else(|| val.strip_prefix("origin/"))
614 .unwrap_or(val);
615 if !val.is_empty() && val != "HEAD" {
616 return Some(val.to_string());
617 }
618 }
619 }
620 None
621}
622
623fn get_current_username() -> String {
624 std::env::var("USERNAME")
625 .or_else(|_| std::env::var("USER"))
626 .unwrap_or_else(|_| "unknown".to_string())
627}
628
629fn non_empty_env(var: &str) -> Option<String> {
630 let v = std::env::var(var).ok()?;
631 if v.is_empty() {
632 None
633 } else {
634 Some(v)
635 }
636}
637
638fn is_jenkins_env() -> bool {
639 std::env::var("JENKINS_URL").is_ok()
640 || std::env::var("JENKINS_HOME").is_ok()
641 || std::env::var("BUILD_URL").is_ok()
642}
643
644fn get_hostname() -> String {
645 if is_jenkins_env() {
648 if let Some(n) = non_empty_env("NODE_NAME") {
649 return n;
650 }
651 }
652 if std::env::var("GITHUB_ACTIONS").as_deref() == Ok("true") {
653 if let Some(r) = non_empty_env("RUNNER_NAME") {
654 return r;
655 }
656 }
657 if std::env::var("GITLAB_CI").as_deref() == Ok("true") {
658 if let Some(r) = non_empty_env("CI_RUNNER_DESCRIPTION") {
659 return r;
660 }
661 }
662 std::env::var("COMPUTERNAME")
663 .or_else(|_| std::env::var("HOSTNAME"))
664 .or_else(|_| std::fs::read_to_string("/etc/hostname").map(|s| s.trim().to_string()))
665 .unwrap_or_else(|_| "unknown".to_string())
666}
667
668#[allow(clippy::too_many_arguments)]
670fn walk_root(
671 root: &Path,
672 config: &AppConfig,
673 include_globs: Option<&GlobSet>,
674 exclude_globs: Option<&GlobSet>,
675 enabled_languages: Option<&BTreeSet<Language>>,
676 seen_paths: &mut HashSet<PathBuf>,
677 analyzed: &mut Vec<FileRecord>,
678 skipped: &mut Vec<FileRecord>,
679 warnings: &mut Vec<String>,
680 cancel: Option<&AtomicBool>,
681 progress: Option<&ProgressCounters>,
682) -> Result<()> {
683 let mut builder = WalkBuilder::new(root);
684 builder
685 .follow_links(config.discovery.follow_symlinks)
686 .hidden(config.discovery.ignore_hidden_files)
687 .ignore(config.discovery.honor_ignore_files)
688 .parents(config.discovery.honor_ignore_files)
689 .git_ignore(config.discovery.honor_ignore_files)
690 .git_global(config.discovery.honor_ignore_files)
691 .git_exclude(config.discovery.honor_ignore_files);
692
693 let paths = collect_walk_paths(&builder, seen_paths, warnings);
694 if paths.is_empty() {
695 return Ok(());
696 }
697
698 if let Some(p) = progress {
699 p.files_total.fetch_add(paths.len(), Ordering::Relaxed);
700 }
701
702 let chunk_results = run_parallel_analysis(
703 &paths,
704 root,
705 config,
706 include_globs,
707 exclude_globs,
708 enabled_languages,
709 cancel,
710 progress,
711 )?;
712 merge_chunk_results(chunk_results, analyzed, skipped, warnings)
713}
714
715fn collect_walk_paths(
716 builder: &WalkBuilder,
717 seen_paths: &mut HashSet<PathBuf>,
718 warnings: &mut Vec<String>,
719) -> Vec<PathBuf> {
720 let (tx, rx) = std::sync::mpsc::channel::<std::result::Result<PathBuf, String>>();
724
725 builder.build_parallel().run(|| {
726 let tx = tx.clone();
727 Box::new(move |entry| {
728 match entry {
729 Err(e) => {
730 let _ = tx.send(Err(format!("discovery warning: {e}")));
731 }
732 Ok(e) => {
733 let path = e.into_path();
734 if !path.is_dir() {
735 let _ = tx.send(Ok(path));
736 }
737 }
738 }
739 ignore::WalkState::Continue
740 })
741 });
742
743 drop(tx);
746
747 rx.into_iter()
748 .filter_map(|msg| match msg {
749 Ok(path) => {
750 if seen_paths.insert(path.clone()) {
751 Some(path)
752 } else {
753 None
754 }
755 }
756 Err(warn) => {
757 warnings.push(warn);
758 None
759 }
760 })
761 .collect()
762}
763
764#[allow(clippy::too_many_arguments)]
766fn worker_loop(
767 paths: &[PathBuf],
768 root: &Path,
769 config: &AppConfig,
770 include_globs: Option<&GlobSet>,
771 exclude_globs: Option<&GlobSet>,
772 enabled_languages: Option<&BTreeSet<Language>>,
773 cancel: Option<&AtomicBool>,
774 next_index: &AtomicUsize,
775 files_done: Option<&AtomicUsize>,
776) -> Vec<Result<Option<FileRecord>>> {
777 let mut results = Vec::new();
778 loop {
779 if cancel.is_some_and(|c| c.load(Ordering::Relaxed)) {
780 results.push(Err(anyhow::anyhow!("analysis cancelled")));
781 break;
782 }
783 let i = next_index.fetch_add(1, Ordering::Relaxed);
784 if i >= paths.len() {
785 break;
786 }
787 results.push(analyze_candidate_file(
788 &paths[i],
789 root,
790 config,
791 include_globs,
792 exclude_globs,
793 enabled_languages,
794 ));
795 if let Some(fd) = files_done {
796 fd.fetch_add(1, Ordering::Relaxed);
797 }
798 }
799 results
800}
801
802#[allow(clippy::too_many_arguments)]
803fn run_parallel_analysis(
804 paths: &[PathBuf],
805 root: &Path,
806 config: &AppConfig,
807 include_globs: Option<&GlobSet>,
808 exclude_globs: Option<&GlobSet>,
809 enabled_languages: Option<&BTreeSet<Language>>,
810 cancel: Option<&AtomicBool>,
811 progress: Option<&ProgressCounters>,
812) -> Result<Vec<Vec<Result<Option<FileRecord>>>>> {
813 let thread_count = std::thread::available_parallelism().map_or(DEFAULT_ANALYSIS_THREADS, |n| {
814 n.get().min(MAX_ANALYSIS_THREADS)
815 });
816 let next_index = AtomicUsize::new(0);
820 let files_done: Option<&AtomicUsize> = progress.map(|p| p.files_done.as_ref());
821
822 std::thread::scope(|s| -> Result<Vec<Vec<Result<Option<FileRecord>>>>> {
823 let mut handles = Vec::with_capacity(thread_count);
826 for _ in 0..thread_count {
827 handles.push(s.spawn(|| {
828 worker_loop(
829 paths,
830 root,
831 config,
832 include_globs,
833 exclude_globs,
834 enabled_languages,
835 cancel,
836 &next_index,
837 files_done,
838 )
839 }));
840 }
841 handles
842 .into_iter()
843 .map(|h| {
844 h.join()
845 .map_err(|_| anyhow::anyhow!("analysis thread panicked"))
846 })
847 .collect()
848 })
849}
850
851fn merge_chunk_results(
852 chunk_results: Vec<Vec<Result<Option<FileRecord>>>>,
853 analyzed: &mut Vec<FileRecord>,
854 skipped: &mut Vec<FileRecord>,
855 warnings: &mut Vec<String>,
856) -> Result<()> {
857 for chunk in chunk_results {
858 for result in chunk {
859 if let Some(record) = result? {
860 push_record(record, analyzed, skipped, warnings);
861 }
862 }
863 }
864 Ok(())
865}
866
867fn process_submodules(config: &AppConfig, analyzed: &mut [FileRecord]) -> Vec<SubmoduleSummary> {
869 let root = config.discovery.root_paths[0]
870 .canonicalize()
871 .unwrap_or_else(|_| config.discovery.root_paths[0].clone());
872 let submodules = detect_submodules(&root);
873 if submodules.is_empty() {
874 return Vec::new();
875 }
876
877 for file in analyzed.iter_mut() {
878 for (name, sub_path) in &submodules {
879 let prefix = sub_path.to_string_lossy().replace('\\', "/");
880 let rel = &file.relative_path;
881 if rel == &prefix || rel.starts_with(&format!("{prefix}/")) {
882 file.submodule = Some(name.clone());
883 break;
884 }
885 }
886 }
887
888 build_submodule_summaries(analyzed, &submodules, &root)
889}
890
891fn assemble_run(
893 config: &AppConfig,
894 runtime_mode: &str,
895 analyzed: Vec<FileRecord>,
896 skipped: Vec<FileRecord>,
897 warnings: Vec<String>,
898 submodule_summaries: Vec<SubmoduleSummary>,
899) -> AnalysisRun {
900 let summary = build_summary(&analyzed, &skipped);
901 let language_summaries = build_language_summaries(&analyzed);
902 let col_threshold = config.analysis.style_col_threshold;
903 let style_summary = build_style_summary(&analyzed, col_threshold);
904
905 let first_root = config
906 .discovery
907 .root_paths
908 .first()
909 .map(|p| p.canonicalize().unwrap_or_else(|_| p.clone()));
910 let git = first_root
911 .as_deref()
912 .map(detect_git_for_run)
913 .unwrap_or_default();
914
915 let now = Utc::now();
916 let run_id = {
917 let uuid_suffix = Uuid::new_v4().simple().to_string();
918 format!("{}-{}", now.format("%Y%m%d-%H%M"), uuid_suffix)
919 };
920
921 AnalysisRun {
922 tool: ToolMetadata {
923 name: "sloc".into(),
924 version: env!("CARGO_PKG_VERSION").into(),
925 run_id,
926 timestamp_utc: now,
927 },
928 environment: EnvironmentMetadata {
929 operating_system: std::env::consts::OS.into(),
930 architecture: std::env::consts::ARCH.into(),
931 runtime_mode: runtime_mode.into(),
932 initiator_username: get_current_username(),
933 initiator_hostname: get_hostname(),
934 ci_name: if is_jenkins_env() {
935 Some(format!("Jenkins\t{}", get_hostname()))
936 } else {
937 detect_ci_system().map(str::to_string)
938 },
939 },
940 effective_configuration: config.clone(),
941 input_roots: config
942 .discovery
943 .root_paths
944 .iter()
945 .map(|p| path_to_string(p))
946 .collect(),
947 summary_totals: summary,
948 totals_by_language: language_summaries,
949 per_file_records: analyzed,
950 skipped_file_records: skipped,
951 warnings,
952 submodule_summaries,
953 git_commit_short: git.commit_short,
954 git_commit_long: git.commit_long,
955 git_branch: git.branch,
956 git_commit_author: git.author,
957 git_tags: git.tags,
958 git_nearest_tag: git.nearest_tag,
959 git_commit_date: git.commit_date,
960 git_remote_url: git.remote_url,
961 style_summary,
962 }
963}
964
965#[allow(clippy::too_many_lines)]
970pub fn analyze(
971 config: &AppConfig,
972 runtime_mode: &str,
973 cancel: Option<&AtomicBool>,
974 progress: Option<&ProgressCounters>,
975) -> Result<AnalysisRun> {
976 config.validate()?;
977
978 if config.discovery.root_paths.is_empty() {
979 anyhow::bail!("no input paths were provided");
980 }
981
982 let include_globs = compile_globset(&config.discovery.include_globs)?;
983 let exclude_globs = compile_globset(&config.discovery.exclude_globs)?;
984 let enabled_languages = parse_enabled_languages(&config.analysis.enabled_languages)?;
985
986 let mut analyzed = Vec::new();
987 let mut skipped = Vec::new();
988 let mut warnings = Vec::new();
989 let mut seen_paths = HashSet::new();
990
991 for root in &config.discovery.root_paths {
992 if cancel.is_some_and(|c| c.load(Ordering::Relaxed)) {
993 anyhow::bail!("analysis cancelled");
994 }
995
996 let root = root.canonicalize().unwrap_or_else(|_| root.clone());
997
998 if root.is_file() {
999 if let Some(record) = analyze_candidate_file(
1000 &root,
1001 root.parent().unwrap_or_else(|| Path::new(".")),
1002 config,
1003 include_globs.as_ref(),
1004 exclude_globs.as_ref(),
1005 enabled_languages.as_ref(),
1006 )? {
1007 push_record(record, &mut analyzed, &mut skipped, &mut warnings);
1008 }
1009 continue;
1010 }
1011
1012 walk_root(
1013 &root,
1014 config,
1015 include_globs.as_ref(),
1016 exclude_globs.as_ref(),
1017 enabled_languages.as_ref(),
1018 &mut seen_paths,
1019 &mut analyzed,
1020 &mut skipped,
1021 &mut warnings,
1022 cancel,
1023 progress,
1024 )?;
1025 }
1026
1027 analyzed.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
1028 skipped.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
1029
1030 let submodule_summaries = if config.discovery.submodule_breakdown {
1032 process_submodules(config, &mut analyzed)
1033 } else {
1034 Vec::new()
1035 };
1036
1037 attach_coverage(config, &mut analyzed, &mut warnings);
1038
1039 Ok(assemble_run(
1040 config,
1041 runtime_mode,
1042 analyzed,
1043 skipped,
1044 warnings,
1045 submodule_summaries,
1046 ))
1047}
1048
1049fn attach_coverage(config: &AppConfig, analyzed: &mut [FileRecord], warnings: &mut Vec<String>) {
1050 let Some(cov_path) = coverage::resolve_coverage_file(config.analysis.coverage_file.as_deref())
1051 else {
1052 return;
1053 };
1054 tracing::debug!(path = %cov_path.display(), "loading coverage file");
1055 match fs::read_to_string(&cov_path) {
1056 Ok(content) => {
1057 let cov_map = coverage::parse_coverage_auto(&cov_path, &content);
1058 let mut matched: u32 = 0;
1059 let mut unmatched: u32 = 0;
1060 for record in analyzed.iter_mut() {
1061 record.coverage =
1062 coverage::lookup_coverage(&cov_map, &record.relative_path).cloned();
1063 if record.coverage.is_some() {
1064 matched += 1;
1065 } else {
1066 unmatched += 1;
1067 }
1068 }
1069 tracing::debug!(
1070 path = %cov_path.display(),
1071 coverage_entries = cov_map.len(),
1072 files_matched = matched,
1073 files_unmatched = unmatched,
1074 "coverage attached"
1075 );
1076 if unmatched > 0 && matched == 0 {
1077 tracing::warn!(
1078 path = %cov_path.display(),
1079 "coverage file loaded but no source files could be matched — check that paths in the coverage report match the scanned directory"
1080 );
1081 }
1082 }
1083 Err(e) => {
1084 tracing::warn!(path = %cov_path.display(), error = %e, "coverage file could not be read");
1085 warnings.push(format!(
1086 "coverage file '{}' could not be read: {e}",
1087 cov_path.display()
1088 ));
1089 }
1090 }
1091}
1092
1093fn push_record(
1094 record: FileRecord,
1095 analyzed: &mut Vec<FileRecord>,
1096 skipped: &mut Vec<FileRecord>,
1097 warnings: &mut Vec<String>,
1098) {
1099 warnings.extend(
1100 record
1101 .warnings
1102 .iter()
1103 .map(|warning| format!("{}: {warning}", record.relative_path)),
1104 );
1105
1106 match record.status {
1107 FileStatus::AnalyzedExact | FileStatus::AnalyzedBestEffort => analyzed.push(record),
1108 _ => skipped.push(record),
1109 }
1110}
1111
1112#[inline]
1114fn skip_with_reason(
1115 path: &Path,
1116 root: &Path,
1117 size: u64,
1118 reason: impl Into<String>,
1119) -> MetadataPolicyOutcome {
1120 MetadataPolicyOutcome::Skip(Box::new(skipped_record(
1121 path,
1122 root,
1123 size,
1124 FileStatus::SkippedByPolicy,
1125 vec![reason.into()],
1126 )))
1127}
1128
1129#[allow(clippy::too_many_arguments)]
1133fn check_metadata_policy(
1134 path: &Path,
1135 root: &Path,
1136 relative_path: &str,
1137 metadata: &fs::Metadata,
1138 config: &AppConfig,
1139 include_globs: Option<&GlobSet>,
1140 exclude_globs: Option<&GlobSet>,
1141) -> MetadataPolicyOutcome {
1142 let size = metadata.len();
1143
1144 if metadata.file_type().is_symlink() && !config.discovery.follow_symlinks {
1145 return skip_with_reason(path, root, size, "symlink skipped by policy");
1146 }
1147 if file_name_eq(path, ".gitignore") {
1148 return skip_with_reason(path, root, size, ".gitignore is always excluded");
1149 }
1150 if is_excluded_dir_path(path, &config.discovery.excluded_directories) {
1151 return skip_with_reason(path, root, size, "path matched excluded directory setting");
1152 }
1153 if size > config.discovery.max_file_size_bytes {
1154 return skip_with_reason(
1155 path,
1156 root,
1157 size,
1158 format!(
1159 "file exceeded max_file_size_bytes ({})",
1160 config.discovery.max_file_size_bytes
1161 ),
1162 );
1163 }
1164 if let Some(globs) = include_globs {
1165 if !globs.is_match(Path::new(relative_path)) && !globs.is_match(path) {
1166 return MetadataPolicyOutcome::Exclude;
1167 }
1168 }
1169 if let Some(globs) = exclude_globs {
1170 if globs.is_match(Path::new(relative_path)) || globs.is_match(path) {
1171 return skip_with_reason(path, root, size, "path matched exclude glob");
1172 }
1173 }
1174 if is_known_lockfile(path) && !config.analysis.include_lockfiles {
1175 return skip_with_reason(path, root, size, "lockfile skipped by default policy");
1176 }
1177
1178 MetadataPolicyOutcome::Continue
1179}
1180
1181struct ContentPolicyResult {
1182 vendor: bool,
1183 generated: bool,
1184 minified: bool,
1185 skip_record: Option<FileRecord>,
1186}
1187
1188fn check_content_policy(
1191 path: &Path,
1192 root: &Path,
1193 size_bytes: u64,
1194 bytes: &[u8],
1195 config: &AppConfig,
1196) -> ContentPolicyResult {
1197 let vendor = is_vendor_path(path);
1198 if vendor && config.analysis.vendor_directory_detection {
1199 return ContentPolicyResult {
1200 vendor,
1201 generated: false,
1202 minified: false,
1203 skip_record: Some(skipped_record(
1204 path,
1205 root,
1206 size_bytes,
1207 FileStatus::SkippedByPolicy,
1208 vec!["vendor file skipped by policy".into()],
1209 )),
1210 };
1211 }
1212
1213 let generated = config.analysis.generated_file_detection && looks_generated(path, bytes);
1214 if generated {
1215 return ContentPolicyResult {
1216 vendor,
1217 generated,
1218 minified: false,
1219 skip_record: Some(skipped_record(
1220 path,
1221 root,
1222 size_bytes,
1223 FileStatus::SkippedByPolicy,
1224 vec!["generated file skipped by policy".into()],
1225 )),
1226 };
1227 }
1228
1229 let minified = config.analysis.minified_file_detection && looks_minified(path, bytes);
1230 if minified {
1231 return ContentPolicyResult {
1232 vendor,
1233 generated,
1234 minified,
1235 skip_record: Some(skipped_record(
1236 path,
1237 root,
1238 size_bytes,
1239 FileStatus::SkippedByPolicy,
1240 vec!["minified file skipped by policy".into()],
1241 )),
1242 };
1243 }
1244
1245 ContentPolicyResult {
1246 vendor,
1247 generated,
1248 minified,
1249 skip_record: None,
1250 }
1251}
1252
1253fn decode_file_contents(
1255 path: &Path,
1256 root: &Path,
1257 size_bytes: u64,
1258 bytes: &[u8],
1259 config: &AppConfig,
1260) -> Result<Option<(String, String, Vec<String>)>> {
1261 if is_binary(bytes) {
1262 return match config.analysis.binary_file_behavior {
1263 BinaryFileBehavior::Skip => Ok(None),
1264 BinaryFileBehavior::Fail => {
1265 anyhow::bail!("binary file encountered: {}", path.display())
1266 }
1267 };
1268 }
1269
1270 match decode_bytes(bytes) {
1271 Ok(result) => Ok(Some(result)),
1272 Err(err) => match config.analysis.decode_failure_behavior {
1273 FailureBehavior::WarnSkip => {
1274 let _ = (path, root, size_bytes); Err(anyhow::anyhow!("__decode_warn__: {err}"))
1279 }
1280 FailureBehavior::Fail => {
1281 anyhow::bail!("decode failure for {}: {err}", path.display())
1282 }
1283 },
1284 }
1285}
1286
1287#[allow(clippy::too_many_lines)]
1288fn analyze_candidate_file(
1289 path: &Path,
1290 root: &Path,
1291 config: &AppConfig,
1292 include_globs: Option<&GlobSet>,
1293 exclude_globs: Option<&GlobSet>,
1294 enabled_languages: Option<&BTreeSet<Language>>,
1295) -> Result<Option<FileRecord>> {
1296 let metadata = match fs::symlink_metadata(path) {
1297 Ok(metadata) => metadata,
1298 Err(err) => {
1299 return Ok(Some(skipped_record(
1300 path,
1301 root,
1302 0,
1303 FileStatus::ErrorInternal,
1304 vec![format!("failed to read metadata: {err}")],
1305 )));
1306 }
1307 };
1308
1309 let relative_path = relative_path_string(path, root);
1310
1311 match check_metadata_policy(
1313 path,
1314 root,
1315 &relative_path,
1316 &metadata,
1317 config,
1318 include_globs,
1319 exclude_globs,
1320 ) {
1321 MetadataPolicyOutcome::Skip(record) => return Ok(Some(*record)),
1322 MetadataPolicyOutcome::Exclude => return Ok(None),
1323 MetadataPolicyOutcome::Continue => {}
1324 }
1325
1326 let bytes = match fs::read(path) {
1327 Ok(bytes) => bytes,
1328 Err(err) => {
1329 return Ok(Some(skipped_record(
1330 path,
1331 root,
1332 metadata.len(),
1333 FileStatus::ErrorInternal,
1334 vec![format!("failed to read file: {err}")],
1335 )));
1336 }
1337 };
1338
1339 let content_policy = check_content_policy(path, root, metadata.len(), &bytes, config);
1341 if let Some(record) = content_policy.skip_record {
1342 return Ok(Some(record));
1343 }
1344 let (vendor, generated, minified) = (
1345 content_policy.vendor,
1346 content_policy.generated,
1347 content_policy.minified,
1348 );
1349
1350 let (text, encoding, decode_warnings) =
1352 match decode_file_contents(path, root, metadata.len(), &bytes, config) {
1353 Ok(Some(result)) => result,
1354 Ok(None) => {
1355 return Ok(Some(skipped_record(
1356 path,
1357 root,
1358 metadata.len(),
1359 FileStatus::SkippedBinary,
1360 vec!["binary file skipped by default".into()],
1361 )));
1362 }
1363 Err(err) => {
1364 let msg = err.to_string();
1365 if let Some(warn_msg) = msg.strip_prefix("__decode_warn__: ") {
1366 return Ok(Some(skipped_record(
1367 path,
1368 root,
1369 metadata.len(),
1370 FileStatus::SkippedDecodeError,
1371 vec![warn_msg.to_string()],
1372 )));
1373 }
1374 return Err(err);
1375 }
1376 };
1377
1378 let first_line = text.lines().next();
1379 let language = detect_language(
1380 path,
1381 first_line,
1382 &config.analysis.extension_overrides,
1383 config.analysis.shebang_detection,
1384 );
1385
1386 let Some(language) = language else {
1387 return Ok(Some(skipped_record(
1388 path,
1389 root,
1390 metadata.len(),
1391 FileStatus::SkippedUnsupported,
1392 vec!["unsupported or undetected language".into()],
1393 )));
1394 };
1395
1396 if let Some(enabled) = enabled_languages {
1397 if !enabled.contains(&language) {
1398 return Ok(Some(skipped_record(
1399 path,
1400 root,
1401 metadata.len(),
1402 FileStatus::SkippedByPolicy,
1403 vec![format!(
1404 "language {} disabled by configuration",
1405 language.display_name()
1406 )],
1407 )));
1408 }
1409 }
1410
1411 let style_scope = match config.analysis.style_lang_scope.as_str() {
1412 "c_family" => StyleLangScope::CFamilyOnly,
1413 _ => StyleLangScope::All,
1414 };
1415 let ieee_opts = AnalysisOptions {
1416 blank_in_block_comment_as_comment: config.analysis.blank_in_block_comment_policy
1417 == BlankInBlockCommentPolicy::CountAsComment,
1418 collapse_continuation_lines: config.analysis.continuation_line_policy
1419 == ContinuationLinePolicy::CollapseToLogical,
1420 enable_style: config.analysis.style_analysis_enabled,
1421 style_lang_scope: style_scope,
1422 };
1423 let analysis = analyze_text(language, &text, ieee_opts);
1424 let effective_counts = compute_effective_counts(
1425 &analysis.raw,
1426 config.analysis.mixed_line_policy,
1427 config.analysis.python_docstrings_as_comments,
1428 config.analysis.count_compiler_directives,
1429 );
1430
1431 let mut warnings = decode_warnings;
1432 warnings.extend(analysis.warnings.clone());
1433
1434 Ok(Some(FileRecord {
1435 path: path_to_string(path),
1436 relative_path,
1437 language: Some(language),
1438 size_bytes: metadata.len(),
1439 detected_encoding: Some(encoding),
1440 raw_line_categories: analysis.raw,
1441 effective_counts,
1442 status: match analysis.parse_mode {
1443 ParseMode::Lexical | ParseMode::TreeSitter => FileStatus::AnalyzedExact,
1444 ParseMode::LexicalBestEffort => FileStatus::AnalyzedBestEffort,
1445 },
1446 warnings,
1447 generated,
1448 minified,
1449 vendor,
1450 parse_mode: Some(analysis.parse_mode),
1451 submodule: None,
1452 coverage: None,
1453 style_analysis: analysis.style_analysis,
1454 }))
1455}
1456
1457const fn compute_effective_counts(
1458 raw: &RawLineCounts,
1459 mixed_line_policy: MixedLinePolicy,
1460 python_docstrings_as_comments: bool,
1461 count_compiler_directives: bool,
1462) -> EffectiveCounts {
1463 let mut effective = EffectiveCounts {
1464 code_lines: raw.code_only_lines,
1465 comment_lines: raw.single_comment_only_lines + raw.multi_comment_only_lines,
1466 blank_lines: raw.blank_only_lines,
1467 mixed_lines_separate: 0,
1468 };
1469
1470 if python_docstrings_as_comments {
1471 effective.comment_lines += raw.docstring_comment_lines;
1472 } else {
1473 effective.code_lines += raw.docstring_comment_lines;
1474 }
1475
1476 let mixed_total = raw.mixed_code_single_comment_lines + raw.mixed_code_multi_comment_lines;
1477 match mixed_line_policy {
1478 MixedLinePolicy::CodeOnly => effective.code_lines += mixed_total,
1479 MixedLinePolicy::CodeAndComment => {
1480 effective.code_lines += mixed_total;
1481 effective.comment_lines += mixed_total;
1482 }
1483 MixedLinePolicy::CommentOnly => effective.comment_lines += mixed_total,
1484 MixedLinePolicy::SeparateMixedCategory => effective.mixed_lines_separate += mixed_total,
1485 }
1486
1487 if !count_compiler_directives {
1490 effective.code_lines = effective
1491 .code_lines
1492 .saturating_sub(raw.compiler_directive_lines);
1493 }
1494
1495 effective
1496}
1497
1498fn build_summary(analyzed: &[FileRecord], skipped: &[FileRecord]) -> SummaryTotals {
1499 let mut summary = SummaryTotals {
1500 files_considered: (analyzed.len() + skipped.len()) as u64,
1501 files_analyzed: analyzed.len() as u64,
1502 files_skipped: skipped.len() as u64,
1503 ..Default::default()
1504 };
1505
1506 for record in analyzed {
1507 summary.total_physical_lines += record.raw_line_categories.total_physical_lines;
1508 summary.code_lines += record.effective_counts.code_lines;
1509 summary.comment_lines += record.effective_counts.comment_lines;
1510 summary.blank_lines += record.effective_counts.blank_lines;
1511 summary.mixed_lines_separate += record.effective_counts.mixed_lines_separate;
1512 summary.functions += record.raw_line_categories.functions;
1513 summary.classes += record.raw_line_categories.classes;
1514 summary.variables += record.raw_line_categories.variables;
1515 summary.imports += record.raw_line_categories.imports;
1516 summary.test_count += record.raw_line_categories.test_count;
1517 summary.test_assertion_count += record.raw_line_categories.test_assertion_count;
1518 summary.test_suite_count += record.raw_line_categories.test_suite_count;
1519 if let Some(cov) = &record.coverage {
1520 summary.coverage_lines_found += u64::from(cov.lines_found);
1521 summary.coverage_lines_hit += u64::from(cov.lines_hit);
1522 summary.coverage_functions_found += u64::from(cov.functions_found);
1523 summary.coverage_functions_hit += u64::from(cov.functions_hit);
1524 summary.coverage_branches_found += u64::from(cov.branches_found);
1525 summary.coverage_branches_hit += u64::from(cov.branches_hit);
1526 }
1527 }
1528
1529 summary
1530}
1531
1532const fn zeroed_summary(language: Language) -> LanguageSummary {
1534 LanguageSummary {
1535 language,
1536 files: 0,
1537 total_physical_lines: 0,
1538 code_lines: 0,
1539 comment_lines: 0,
1540 blank_lines: 0,
1541 mixed_lines_separate: 0,
1542 functions: 0,
1543 classes: 0,
1544 variables: 0,
1545 imports: 0,
1546 test_count: 0,
1547 test_assertion_count: 0,
1548 test_suite_count: 0,
1549 coverage_lines_found: 0,
1550 coverage_lines_hit: 0,
1551 coverage_functions_found: 0,
1552 coverage_functions_hit: 0,
1553 coverage_branches_found: 0,
1554 coverage_branches_hit: 0,
1555 }
1556}
1557
1558fn accumulate_record_into_summary(entry: &mut LanguageSummary, record: &FileRecord) {
1560 entry.files += 1;
1561 let r = &record.raw_line_categories;
1562 entry.total_physical_lines += r.total_physical_lines;
1563 entry.code_lines += record.effective_counts.code_lines;
1564 entry.comment_lines += record.effective_counts.comment_lines;
1565 entry.blank_lines += record.effective_counts.blank_lines;
1566 entry.mixed_lines_separate += record.effective_counts.mixed_lines_separate;
1567 entry.functions += r.functions;
1568 entry.classes += r.classes;
1569 entry.variables += r.variables;
1570 entry.imports += r.imports;
1571 entry.test_count += r.test_count;
1572 entry.test_assertion_count += r.test_assertion_count;
1573 entry.test_suite_count += r.test_suite_count;
1574 if let Some(cov) = &record.coverage {
1575 entry.coverage_lines_found += u64::from(cov.lines_found);
1576 entry.coverage_lines_hit += u64::from(cov.lines_hit);
1577 entry.coverage_functions_found += u64::from(cov.functions_found);
1578 entry.coverage_functions_hit += u64::from(cov.functions_hit);
1579 entry.coverage_branches_found += u64::from(cov.branches_found);
1580 entry.coverage_branches_hit += u64::from(cov.branches_hit);
1581 }
1582}
1583
1584fn build_language_summaries(analyzed: &[FileRecord]) -> Vec<LanguageSummary> {
1585 let mut by_language: BTreeMap<Language, LanguageSummary> = BTreeMap::new();
1586 for record in analyzed {
1587 let Some(language) = record.language else {
1588 continue;
1589 };
1590 let entry = by_language
1591 .entry(language)
1592 .or_insert_with(|| zeroed_summary(language));
1593 accumulate_record_into_summary(entry, record);
1594 }
1595 by_language.into_values().collect()
1596}
1597
1598fn skipped_record(
1599 path: &Path,
1600 root: &Path,
1601 size_bytes: u64,
1602 status: FileStatus,
1603 warnings: Vec<String>,
1604) -> FileRecord {
1605 FileRecord {
1606 path: path_to_string(path),
1607 relative_path: relative_path_string(path, root),
1608 language: None,
1609 size_bytes,
1610 detected_encoding: None,
1611 raw_line_categories: RawLineCounts::default(),
1612 effective_counts: EffectiveCounts::default(),
1613 status,
1614 warnings,
1615 generated: false,
1616 minified: false,
1617 vendor: false,
1618 parse_mode: None,
1619 submodule: None,
1620 coverage: None,
1621 style_analysis: None,
1622 }
1623}
1624
1625fn relative_path_string(path: &Path, root: &Path) -> String {
1626 path.strip_prefix(root)
1627 .unwrap_or(path)
1628 .to_string_lossy()
1629 .replace('\\', "/")
1630}
1631
1632fn path_to_string(path: &Path) -> String {
1633 path.to_string_lossy().replace('\\', "/")
1634}
1635
1636#[must_use]
1638pub fn detect_submodules(root: &Path) -> Vec<(String, PathBuf)> {
1639 let gitmodules = root.join(".gitmodules");
1640 if !gitmodules.is_file() {
1641 return Vec::new();
1642 }
1643 let Ok(content) = fs::read_to_string(&gitmodules) else {
1644 return Vec::new();
1645 };
1646
1647 let mut result = Vec::new();
1648 let mut current_name: Option<String> = None;
1649 let mut current_path: Option<PathBuf> = None;
1650
1651 for line in content.lines() {
1652 let trimmed = line.trim();
1653 if trimmed.starts_with("[submodule \"") && trimmed.ends_with("\"]") {
1654 if let (Some(name), Some(path)) = (current_name.take(), current_path.take()) {
1655 result.push((name, path));
1656 }
1657 let name = trimmed["[submodule \"".len()..trimmed.len() - 2].to_string();
1658 current_name = Some(name);
1659 } else if let Some(rest) = trimmed.strip_prefix("path") {
1660 if let Some(eq_pos) = rest.find('=') {
1661 let path_str = rest[eq_pos + 1..].trim();
1662 current_path = Some(PathBuf::from(path_str));
1663 }
1664 }
1665 }
1666 if let (Some(name), Some(path)) = (current_name, current_path) {
1667 result.push((name, path));
1668 }
1669
1670 result
1671}
1672
1673fn build_submodule_summaries(
1674 analyzed: &[FileRecord],
1675 submodules: &[(String, PathBuf)],
1676 root: &Path,
1677) -> Vec<SubmoduleSummary> {
1678 submodules
1679 .iter()
1680 .map(|(name, path)| {
1681 let files: Vec<&FileRecord> = analyzed
1682 .iter()
1683 .filter(|f| f.submodule.as_deref() == Some(name.as_str()))
1684 .collect();
1685
1686 let files_analyzed = files.len() as u64;
1687 let total_physical_lines = files
1688 .iter()
1689 .map(|f| f.raw_line_categories.total_physical_lines)
1690 .sum();
1691 let code_lines = files.iter().map(|f| f.effective_counts.code_lines).sum();
1692 let comment_lines = files.iter().map(|f| f.effective_counts.comment_lines).sum();
1693 let blank_lines = files.iter().map(|f| f.effective_counts.blank_lines).sum();
1694 let language_summaries = build_language_summaries_from_slice(&files);
1695
1696 let git = detect_git_for_run(&root.join(path));
1697
1698 SubmoduleSummary {
1699 name: name.clone(),
1700 relative_path: path.to_string_lossy().replace('\\', "/"),
1701 files_analyzed,
1702 total_physical_lines,
1703 code_lines,
1704 comment_lines,
1705 blank_lines,
1706 language_summaries,
1707 git_commit_short: git.commit_short,
1708 git_commit_long: git.commit_long,
1709 git_branch: git.branch,
1710 git_commit_author: git.author,
1711 git_commit_date: git.commit_date,
1712 git_remote_url: git.remote_url,
1713 }
1714 })
1715 .filter(|s| s.files_analyzed > 0)
1716 .collect()
1717}
1718
1719#[allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
1721fn dominant_indent_label(files: &[&StyleAnalysis]) -> String {
1722 let mut votes = [0u32; 6];
1723 for f in files {
1724 let idx = match f.indent_style {
1725 IndentStyle::Tabs => 0,
1726 IndentStyle::Spaces2 => 1,
1727 IndentStyle::Spaces4 => 2,
1728 IndentStyle::Spaces8 => 3,
1729 IndentStyle::Mixed => 4,
1730 IndentStyle::Unknown => 5,
1731 };
1732 votes[idx] += 1;
1733 }
1734 let labels = ["Tabs", "2-Space", "4-Space", "8-Space", "Mixed", "\u{2014}"];
1735 labels[votes
1736 .iter()
1737 .enumerate()
1738 .max_by_key(|(_, v)| *v)
1739 .map_or(5, |(i, _)| i)]
1740 .to_string()
1741}
1742
1743#[allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
1745fn line80_pct(files: &[&StyleAnalysis]) -> u8 {
1746 if files.is_empty() {
1747 return 0;
1748 }
1749 let compliant = files
1750 .iter()
1751 .filter(|f| f.total_lines == 0 || (f.lines_over_80 as f32 / f.total_lines as f32) <= 0.05)
1752 .count() as u32;
1753 ((compliant * 100) / files.len() as u32) as u8
1754}
1755
1756#[allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
1759fn line_col_pct(files: &[&StyleAnalysis], threshold: u16) -> u8 {
1760 if files.is_empty() {
1761 return 0;
1762 }
1763 let compliant = files
1764 .iter()
1765 .filter(|f| {
1766 let over = if threshold <= 80 {
1767 f.lines_over_80
1768 } else if threshold <= 100 {
1769 f.lines_over_100
1770 } else {
1771 f.lines_over_120
1772 };
1773 f.total_lines == 0 || (over as f32 / f.total_lines as f32) <= 0.05
1774 })
1775 .count() as u32;
1776 ((compliant * 100) / files.len() as u32) as u8
1777}
1778
1779#[allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
1781fn build_language_group(
1782 family: &str,
1783 files: &[&StyleAnalysis],
1784 col_threshold: u16,
1785) -> LanguageStyleGroup {
1786 let count = files.len() as u32;
1787
1788 let mut all_names: Vec<String> = Vec::new();
1790 for f in files {
1791 for g in &f.guide_scores {
1792 if !all_names.contains(&g.name) {
1793 all_names.push(g.name.clone());
1794 }
1795 }
1796 }
1797
1798 let mut guide_avg_scores: Vec<(String, u8)> = all_names
1799 .into_iter()
1800 .map(|name| {
1801 let sum: u32 = files
1802 .iter()
1803 .filter_map(|f| f.guide_scores.iter().find(|g| g.name == name))
1804 .map(|g| u32::from(g.score_pct))
1805 .sum();
1806 let avg = (sum / count) as u8;
1807 (name, avg)
1808 })
1809 .collect();
1810 guide_avg_scores.sort_by_key(|s| std::cmp::Reverse(s.1));
1811
1812 let (dominant_guide, dominant_score_pct) = guide_avg_scores
1813 .first()
1814 .map(|(n, s)| (n.clone(), *s))
1815 .unwrap_or_default();
1816
1817 let lcp = line_col_pct(files, col_threshold);
1818 LanguageStyleGroup {
1819 language_family: family.to_string(),
1820 files_count: count,
1821 dominant_guide,
1822 dominant_score_pct,
1823 common_indent_style: dominant_indent_label(files),
1824 guide_avg_scores,
1825 line80_compliant_pct: line80_pct(files),
1826 line_col_compliant_pct: lcp,
1827 }
1828}
1829
1830#[allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
1833fn build_style_summary(analyzed: &[FileRecord], col_threshold: u16) -> Option<StyleSummary> {
1834 let all_style: Vec<&StyleAnalysis> = analyzed
1835 .iter()
1836 .filter_map(|f| f.style_analysis.as_ref())
1837 .collect();
1838
1839 if all_style.is_empty() {
1840 return None;
1841 }
1842
1843 let mut families: std::collections::BTreeMap<&str, Vec<&StyleAnalysis>> =
1845 std::collections::BTreeMap::new();
1846 for sa in &all_style {
1847 families
1848 .entry(sa.language_family.as_str())
1849 .or_default()
1850 .push(sa);
1851 }
1852
1853 let mut by_language: Vec<LanguageStyleGroup> = families
1854 .iter()
1855 .map(|(family, files)| build_language_group(family, files, col_threshold))
1856 .collect();
1857 by_language.sort_by_key(|g| std::cmp::Reverse(g.files_count));
1858
1859 let files_analyzed = all_style.len() as u32;
1860 let common_indent_style = dominant_indent_label(&all_style);
1861 let line80_compliant_pct = line80_pct(&all_style);
1862 let line_col_compliant_pct = line_col_pct(&all_style, col_threshold);
1863
1864 Some(StyleSummary {
1865 files_analyzed,
1866 common_indent_style,
1867 line80_compliant_pct,
1868 line_col_compliant_pct,
1869 col_threshold,
1870 by_language,
1871 })
1872}
1873
1874fn build_language_summaries_from_slice(files: &[&FileRecord]) -> Vec<LanguageSummary> {
1875 let mut map: BTreeMap<String, LanguageSummary> = BTreeMap::new();
1876 for file in files {
1877 let Some(lang) = file.language else { continue };
1878 let entry = map
1879 .entry(lang.display_name().to_string())
1880 .or_insert_with(|| zeroed_summary(lang));
1881 accumulate_record_into_summary(entry, file);
1882 }
1883 map.into_values().collect()
1884}
1885
1886fn file_name_eq(path: &Path, expected: &str) -> bool {
1887 path.file_name()
1888 .and_then(|name| name.to_str())
1889 .is_some_and(|name| name == expected)
1890}
1891
1892fn is_excluded_dir_path(path: &Path, excluded_dirs: &[String]) -> bool {
1893 path.components().any(|component| {
1894 component
1895 .as_os_str()
1896 .to_str()
1897 .is_some_and(|part| excluded_dirs.iter().any(|excluded| excluded == part))
1898 })
1899}
1900
1901fn is_vendor_path(path: &Path) -> bool {
1902 path.components().any(|component| {
1903 component
1904 .as_os_str()
1905 .to_str()
1906 .is_some_and(|part| matches!(part, "vendor" | "node_modules" | "packages"))
1907 })
1908}
1909
1910fn is_known_lockfile(path: &Path) -> bool {
1911 path.file_name()
1912 .and_then(|name| name.to_str())
1913 .is_some_and(|name| {
1914 matches!(
1915 name,
1916 "Cargo.lock"
1917 | "package-lock.json"
1918 | "yarn.lock"
1919 | "pnpm-lock.yaml"
1920 | "Pipfile.lock"
1921 | "poetry.lock"
1922 | "composer.lock"
1923 )
1924 })
1925}
1926
1927fn looks_generated(path: &Path, bytes: &[u8]) -> bool {
1928 let file_name = path
1929 .file_name()
1930 .and_then(|name| name.to_str())
1931 .unwrap_or_default();
1932 if file_name.contains(".generated.") || file_name.contains(".g.") {
1933 return true;
1934 }
1935
1936 let sample = String::from_utf8_lossy(&bytes[..bytes.len().min(GENERATED_SAMPLE_BYTES)])
1937 .to_ascii_lowercase();
1938 sample.contains("@generated") || sample.contains("generated by")
1939}
1940
1941fn looks_minified(path: &Path, bytes: &[u8]) -> bool {
1942 let file_name = path
1943 .file_name()
1944 .and_then(|name| name.to_str())
1945 .unwrap_or_default();
1946 if file_name.contains(".min.") {
1947 return true;
1948 }
1949
1950 let sample = String::from_utf8_lossy(&bytes[..bytes.len().min(MINIFIED_SAMPLE_BYTES)]);
1951 let longest_line = sample.lines().map(str::len).max().unwrap_or(0);
1952 let whitespace = sample.chars().filter(|c| c.is_whitespace()).count();
1953 longest_line > MINIFIED_LINE_THRESHOLD && whitespace * 100 < sample.len().max(1)
1954}
1955
1956fn is_binary(bytes: &[u8]) -> bool {
1957 if bytes.starts_with(&[0xEF, 0xBB, 0xBF])
1958 || bytes.starts_with(&[0xFF, 0xFE])
1959 || bytes.starts_with(&[0xFE, 0xFF])
1960 {
1961 return false;
1962 }
1963
1964 let sample = &bytes[..bytes.len().min(BINARY_SAMPLE_BYTES)];
1965 sample.contains(&0)
1966}
1967
1968fn decode_utf16_bom(
1971 bom_stripped: &[u8],
1972 encoding: &'static encoding_rs::Encoding,
1973 label: &str,
1974) -> (String, String, Vec<String>) {
1975 let (cow, _, had_errors) = encoding.decode(bom_stripped);
1976 let mut warnings = Vec::new();
1977 if had_errors {
1978 warnings.push(format!("{label} decode contained replacement characters"));
1979 }
1980 (cow.into_owned(), label.into(), warnings)
1981}
1982
1983fn decode_bytes(bytes: &[u8]) -> std::result::Result<(String, String, Vec<String>), String> {
1984 if bytes.starts_with(&[0xEF, 0xBB, 0xBF]) {
1985 let text = String::from_utf8(bytes[3..].to_vec()).map_err(|err| err.to_string())?;
1986 return Ok((text, "utf-8-bom".into(), vec![]));
1987 }
1988 if bytes.starts_with(&[0xFF, 0xFE]) {
1989 return Ok(decode_utf16_bom(&bytes[2..], UTF_16LE, "utf-16le"));
1990 }
1991 if bytes.starts_with(&[0xFE, 0xFF]) {
1992 return Ok(decode_utf16_bom(&bytes[2..], UTF_16BE, "utf-16be"));
1993 }
1994
1995 #[allow(clippy::option_if_let_else)]
1997 if let Ok(text) = String::from_utf8(bytes.to_vec()) {
1998 Ok((text, "utf-8".into(), vec![]))
1999 } else {
2000 let (cow, _, had_errors) = WINDOWS_1252.decode(bytes);
2001 let mut warnings = vec!["decoded using windows-1252 fallback".into()];
2002 if had_errors {
2003 warnings.push("fallback decode contained replacement characters".into());
2004 }
2005 Ok((cow.into_owned(), "windows-1252".into(), warnings))
2006 }
2007}
2008
2009fn compile_globset(patterns: &[String]) -> Result<Option<GlobSet>> {
2010 if patterns.is_empty() {
2011 return Ok(None);
2012 }
2013
2014 let mut builder = GlobSetBuilder::new();
2015 for pattern in patterns {
2016 builder
2017 .add(Glob::new(pattern).with_context(|| format!("invalid glob pattern: {pattern}"))?);
2018 }
2019 Ok(Some(
2020 builder.build().context("failed to compile glob filters")?,
2021 ))
2022}
2023
2024fn parse_enabled_languages(enabled: &[String]) -> Result<Option<BTreeSet<Language>>> {
2025 if enabled.is_empty() {
2026 return Ok(None);
2027 }
2028
2029 let supported = supported_languages();
2030 let mut set = BTreeSet::new();
2031 for name in enabled {
2032 let language = Language::from_name(name)
2033 .with_context(|| format!("unsupported language in config: {name}"))?;
2034 if !supported.contains(&language) {
2035 anyhow::bail!("language {name} is not supported in this build");
2036 }
2037 set.insert(language);
2038 }
2039 Ok(Some(set))
2040}
2041
2042pub fn write_json(run: &AnalysisRun, output_path: &Path) -> Result<()> {
2046 let json = serde_json::to_string_pretty(run).context("failed to serialize analysis run")?;
2047 fs::write(output_path, json)
2048 .with_context(|| format!("failed to write JSON output to {}", output_path.display()))
2049}
2050
2051pub fn read_json(path: &Path) -> Result<AnalysisRun> {
2055 let contents = fs::read_to_string(path)
2056 .with_context(|| format!("failed to read result file {}", path.display()))?;
2057 serde_json::from_str(&contents)
2058 .with_context(|| format!("failed to parse JSON result {}", path.display()))
2059}
2060
2061#[cfg(test)]
2062mod tests {
2063 use super::*;
2064
2065 #[test]
2066 fn effective_counts_respect_code_only_policy() {
2067 let raw = RawLineCounts {
2068 code_only_lines: 2,
2069 single_comment_only_lines: 1,
2070 mixed_code_single_comment_lines: 3,
2071 docstring_comment_lines: 2,
2072 ..RawLineCounts::default()
2073 };
2074 let counts = compute_effective_counts(&raw, MixedLinePolicy::CodeOnly, true, true);
2075 assert_eq!(counts.code_lines, 5);
2076 assert_eq!(counts.comment_lines, 3);
2077 }
2078
2079 #[test]
2080 fn effective_counts_can_separate_mixed() {
2081 let raw = RawLineCounts {
2082 mixed_code_single_comment_lines: 2,
2083 mixed_code_multi_comment_lines: 1,
2084 ..RawLineCounts::default()
2085 };
2086 let counts =
2087 compute_effective_counts(&raw, MixedLinePolicy::SeparateMixedCategory, true, true);
2088 assert_eq!(counts.mixed_lines_separate, 3);
2089 assert_eq!(counts.code_lines, 0);
2090 assert_eq!(counts.comment_lines, 0);
2091 }
2092
2093 #[test]
2094 fn windows_1252_fallback_decodes() {
2095 let bytes = vec![0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x96, 0x57];
2096 let (text, encoding, warnings) = decode_bytes(&bytes).unwrap();
2097 assert_eq!(encoding, "windows-1252");
2098 assert!(text.contains('–'));
2099 assert!(!warnings.is_empty());
2100 }
2101
2102 #[test]
2105 fn is_binary_detects_null_byte() {
2106 let bytes = b"hello\x00world";
2107 assert!(is_binary(bytes));
2108 }
2109
2110 #[test]
2111 fn is_binary_clean_text_is_not_binary() {
2112 let bytes = b"fn main() { println!(\"hello\"); }";
2113 assert!(!is_binary(bytes));
2114 }
2115
2116 #[test]
2117 fn is_binary_utf8_bom_not_binary() {
2118 let bytes = b"\xef\xbb\xbffn main() {}";
2119 assert!(!is_binary(bytes));
2120 }
2121
2122 #[test]
2123 fn looks_generated_at_generated_marker() {
2124 let bytes = b"// @generated by protoc-gen-rust\nfn foo() {}";
2125 assert!(looks_generated(Path::new("foo.rs"), bytes));
2126 }
2127
2128 #[test]
2129 fn looks_generated_do_not_edit_marker() {
2130 let bytes = b"// Code generated by build.rs. DO NOT EDIT.\nuse foo;";
2132 assert!(looks_generated(Path::new("foo.rs"), bytes));
2133 let bytes2 = b"// @generated\nuse foo;";
2135 assert!(looks_generated(Path::new("foo.rs"), bytes2));
2136 }
2137
2138 #[test]
2139 fn looks_generated_normal_file_not_generated() {
2140 let bytes = b"fn main() {\n println!(\"hello\");\n}\n";
2141 assert!(!looks_generated(Path::new("main.rs"), bytes));
2142 }
2143
2144 #[test]
2145 fn looks_minified_dot_min_filename() {
2146 let bytes = b"function a(){return 1}";
2147 assert!(looks_minified(Path::new("bundle.min.js"), bytes));
2148 }
2149
2150 #[test]
2151 fn looks_minified_normal_file_not_minified() {
2152 let bytes = b"function hello() {\n return 1;\n}\n";
2153 assert!(!looks_minified(Path::new("app.js"), bytes));
2154 }
2155
2156 #[test]
2157 fn looks_minified_very_long_line() {
2158 let long_line: Vec<u8> = b"x".repeat(MINIFIED_LINE_THRESHOLD + 1);
2159 assert!(looks_minified(Path::new("app.js"), &long_line));
2160 }
2161
2162 #[test]
2163 fn is_known_lockfile_cargo_lock() {
2164 assert!(is_known_lockfile(Path::new("Cargo.lock")));
2165 }
2166
2167 #[test]
2168 fn is_known_lockfile_package_lock_json() {
2169 assert!(is_known_lockfile(Path::new("package-lock.json")));
2170 }
2171
2172 #[test]
2173 fn is_known_lockfile_yarn_lock() {
2174 assert!(is_known_lockfile(Path::new("yarn.lock")));
2175 }
2176
2177 #[test]
2178 fn is_known_lockfile_normal_file_is_not_lockfile() {
2179 assert!(!is_known_lockfile(Path::new("src/lib.rs")));
2180 }
2181
2182 #[test]
2183 fn is_vendor_path_node_modules() {
2184 assert!(is_vendor_path(Path::new("node_modules/react/index.js")));
2185 }
2186
2187 #[test]
2188 fn is_vendor_path_vendor_dir() {
2189 assert!(is_vendor_path(Path::new("vendor/anyhow/src/lib.rs")));
2190 }
2191
2192 #[test]
2193 fn is_vendor_path_normal_src_is_not_vendor() {
2194 assert!(!is_vendor_path(Path::new("src/lib.rs")));
2195 }
2196
2197 #[test]
2198 fn is_excluded_dir_path_matches_excluded() {
2199 let excluded = vec![".git".into(), "target".into()];
2200 assert!(is_excluded_dir_path(Path::new(".git/config"), &excluded));
2201 }
2202
2203 #[test]
2204 fn is_excluded_dir_path_non_excluded_is_ok() {
2205 let excluded = vec![".git".into(), "target".into()];
2206 assert!(!is_excluded_dir_path(Path::new("src/main.rs"), &excluded));
2207 }
2208
2209 #[test]
2210 fn decode_bytes_utf8_bom_stripped() {
2211 let bytes = b"\xef\xbb\xbffn main() {}";
2212 let (text, encoding, _) = decode_bytes(bytes).unwrap();
2213 assert!(
2215 encoding.contains("utf-8"),
2216 "should be utf-8 variant, got {encoding}"
2217 );
2218 assert!(text.starts_with("fn"));
2219 }
2220
2221 #[test]
2222 fn decode_bytes_plain_utf8() {
2223 let bytes = b"hello world";
2224 let (text, encoding, warnings) = decode_bytes(bytes).unwrap();
2225 assert_eq!(encoding, "utf-8");
2226 assert_eq!(text, "hello world");
2227 assert!(warnings.is_empty());
2228 }
2229}