1#![allow(clippy::multiple_crate_versions)]
4
5pub mod baseline;
6pub mod coverage;
7pub mod delta;
8pub mod history;
9pub use baseline::{check_against_baseline, resolve_baselines_path, BaselineEntry, BaselineStore};
10pub use coverage::{aggregate_line_coverage, lookup_coverage, parse_lcov, FileCoverage};
11pub use delta::{compute_delta, FileChangeStatus, FileDelta, ScanComparison, SummaryDelta};
12pub use history::{RegistryEntry, ScanRegistry, ScanSummarySnapshot, WatchedDirsStore};
13
14use std::collections::{BTreeMap, BTreeSet, HashSet};
15use std::fs;
16use std::path::{Path, PathBuf};
17use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
18use std::sync::Arc;
19
20use anyhow::{Context, Result};
21use chrono::{DateTime, Utc};
22use encoding_rs::{UTF_16BE, UTF_16LE, WINDOWS_1252};
23use globset::{Glob, GlobSet, GlobSetBuilder};
24use ignore::WalkBuilder;
25use serde::{Deserialize, Serialize};
26use uuid::Uuid;
27
28use sloc_config::{
29 AppConfig, BinaryFileBehavior, BlankInBlockCommentPolicy, ContinuationLinePolicy,
30 FailureBehavior, MixedLinePolicy,
31};
32use sloc_languages::style::IndentStyle;
33use sloc_languages::{
34 analyze_text, detect_language, supported_languages, AnalysisOptions, Language, ParseMode,
35 RawLineCounts, StyleAnalysis,
36};
37
38const MAX_ANALYSIS_THREADS: usize = 16;
42const DEFAULT_ANALYSIS_THREADS: usize = 4;
44const GENERATED_SAMPLE_BYTES: usize = 1024;
46const MINIFIED_SAMPLE_BYTES: usize = 4096;
48const MINIFIED_LINE_THRESHOLD: usize = 2000;
50const BINARY_SAMPLE_BYTES: usize = 8192;
52
53pub struct ProgressCounters {
55 pub files_done: Arc<AtomicUsize>,
57 pub files_total: Arc<AtomicUsize>,
59}
60
61enum MetadataPolicyOutcome {
63 Skip(Box<FileRecord>),
65 Exclude,
67 Continue,
69}
70
71#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
72#[serde(rename_all = "snake_case")]
73pub enum FileStatus {
74 AnalyzedExact,
75 AnalyzedBestEffort,
76 SkippedBinary,
77 SkippedDecodeError,
78 SkippedUnsupported,
79 SkippedByPolicy,
80 ErrorInternal,
81}
82
83#[derive(Debug, Clone, Serialize, Deserialize, Default)]
84pub struct EffectiveCounts {
85 pub code_lines: u64,
86 pub comment_lines: u64,
87 pub blank_lines: u64,
88 pub mixed_lines_separate: u64,
89}
90
91#[derive(Debug, Clone, Serialize, Deserialize)]
92pub struct ToolMetadata {
93 pub name: String,
94 pub version: String,
95 pub run_id: String,
96 pub timestamp_utc: DateTime<Utc>,
97}
98
99#[derive(Debug, Clone, Serialize, Deserialize)]
100pub struct EnvironmentMetadata {
101 pub operating_system: String,
102 pub architecture: String,
103 pub runtime_mode: String,
104 pub initiator_username: String,
105 pub initiator_hostname: String,
106 #[serde(default, skip_serializing_if = "Option::is_none")]
109 pub ci_name: Option<String>,
110}
111
112#[derive(Debug, Clone, Serialize, Deserialize, Default)]
113pub struct SummaryTotals {
114 pub files_considered: u64,
115 pub files_analyzed: u64,
116 pub files_skipped: u64,
117 pub total_physical_lines: u64,
118 pub code_lines: u64,
119 pub comment_lines: u64,
120 pub blank_lines: u64,
121 pub mixed_lines_separate: u64,
122 #[serde(default)]
123 pub functions: u64,
124 #[serde(default)]
125 pub classes: u64,
126 #[serde(default)]
127 pub variables: u64,
128 #[serde(default)]
129 pub imports: u64,
130 #[serde(default)]
131 pub test_count: u64,
132 #[serde(default)]
134 pub test_assertion_count: u64,
135 #[serde(default)]
137 pub test_suite_count: u64,
138 #[serde(default)]
140 pub coverage_lines_found: u64,
141 #[serde(default)]
142 pub coverage_lines_hit: u64,
143 #[serde(default)]
144 pub coverage_functions_found: u64,
145 #[serde(default)]
146 pub coverage_functions_hit: u64,
147 #[serde(default)]
148 pub coverage_branches_found: u64,
149 #[serde(default)]
150 pub coverage_branches_hit: u64,
151}
152
153#[derive(Debug, Clone, Serialize, Deserialize)]
154pub struct LanguageSummary {
155 pub language: Language,
156 pub files: u64,
157 pub total_physical_lines: u64,
158 pub code_lines: u64,
159 pub comment_lines: u64,
160 pub blank_lines: u64,
161 pub mixed_lines_separate: u64,
162 #[serde(default)]
163 pub functions: u64,
164 #[serde(default)]
165 pub classes: u64,
166 #[serde(default)]
167 pub variables: u64,
168 #[serde(default)]
169 pub imports: u64,
170 #[serde(default)]
171 pub test_count: u64,
172 #[serde(default)]
173 pub test_assertion_count: u64,
174 #[serde(default)]
175 pub test_suite_count: u64,
176 #[serde(default)]
177 pub coverage_lines_found: u64,
178 #[serde(default)]
179 pub coverage_lines_hit: u64,
180 #[serde(default)]
181 pub coverage_functions_found: u64,
182 #[serde(default)]
183 pub coverage_functions_hit: u64,
184 #[serde(default)]
185 pub coverage_branches_found: u64,
186 #[serde(default)]
187 pub coverage_branches_hit: u64,
188}
189
190#[derive(Debug, Clone, Serialize, Deserialize)]
191pub struct FileRecord {
192 pub path: String,
193 pub relative_path: String,
194 pub language: Option<Language>,
195 pub size_bytes: u64,
196 pub detected_encoding: Option<String>,
197 pub raw_line_categories: RawLineCounts,
198 pub effective_counts: EffectiveCounts,
199 pub status: FileStatus,
200 pub warnings: Vec<String>,
201 pub generated: bool,
202 pub minified: bool,
203 pub vendor: bool,
204 pub parse_mode: Option<ParseMode>,
205 #[serde(skip_serializing_if = "Option::is_none")]
206 pub submodule: Option<String>,
207 #[serde(default, skip_serializing_if = "Option::is_none")]
209 pub coverage: Option<FileCoverage>,
210 #[serde(default, skip_serializing_if = "Option::is_none")]
212 pub style_analysis: Option<StyleAnalysis>,
213}
214
215#[derive(Debug, Clone, Serialize, Deserialize)]
217pub struct LanguageStyleGroup {
218 pub language_family: String,
220 pub files_count: u32,
222 pub dominant_guide: String,
224 pub dominant_score_pct: u8,
226 pub common_indent_style: String,
228 pub guide_avg_scores: Vec<(String, u8)>,
230 pub line80_compliant_pct: u8,
232 pub line_col_compliant_pct: u8,
234}
235
236#[derive(Debug, Clone, Serialize, Deserialize)]
238pub struct StyleSummary {
239 pub files_analyzed: u32,
241 pub common_indent_style: String,
243 pub line80_compliant_pct: u8,
245 pub line_col_compliant_pct: u8,
247 pub col_threshold: u16,
249 pub by_language: Vec<LanguageStyleGroup>,
251}
252
253pub type CppStyleSummary = StyleSummary;
256
257#[derive(Debug, Clone, Serialize, Deserialize)]
259pub struct SubmoduleSummary {
260 pub name: String,
261 pub relative_path: String,
262 pub files_analyzed: u64,
263 pub total_physical_lines: u64,
264 pub code_lines: u64,
265 pub comment_lines: u64,
266 pub blank_lines: u64,
267 pub language_summaries: Vec<LanguageSummary>,
268}
269
270#[derive(Debug, Clone, Serialize, Deserialize)]
271pub struct AnalysisRun {
272 pub tool: ToolMetadata,
273 pub environment: EnvironmentMetadata,
274 pub effective_configuration: AppConfig,
275 pub input_roots: Vec<String>,
276 pub summary_totals: SummaryTotals,
277 pub totals_by_language: Vec<LanguageSummary>,
278 pub per_file_records: Vec<FileRecord>,
279 pub skipped_file_records: Vec<FileRecord>,
280 pub warnings: Vec<String>,
281 #[serde(default, skip_serializing_if = "Vec::is_empty")]
283 pub submodule_summaries: Vec<SubmoduleSummary>,
284 #[serde(default, skip_serializing_if = "Option::is_none")]
286 pub git_commit_short: Option<String>,
287 #[serde(default, skip_serializing_if = "Option::is_none")]
289 pub git_commit_long: Option<String>,
290 #[serde(default, skip_serializing_if = "Option::is_none")]
292 pub git_branch: Option<String>,
293 #[serde(default, skip_serializing_if = "Option::is_none")]
295 pub git_commit_author: Option<String>,
296 #[serde(default, skip_serializing_if = "Option::is_none")]
298 pub git_tags: Option<String>,
299 #[serde(default, skip_serializing_if = "Option::is_none")]
301 pub git_nearest_tag: Option<String>,
302 #[serde(default, skip_serializing_if = "Option::is_none")]
304 pub git_commit_date: Option<String>,
305 #[serde(default, skip_serializing_if = "Option::is_none")]
307 pub git_remote_url: Option<String>,
308 #[serde(default, skip_serializing_if = "Option::is_none")]
310 pub style_summary: Option<StyleSummary>,
311}
312
313#[derive(Default)]
314struct GitInfo {
315 commit_short: Option<String>,
316 commit_long: Option<String>,
317 branch: Option<String>,
318 author: Option<String>,
319 tags: Option<String>,
320 nearest_tag: Option<String>,
321 commit_date: Option<String>,
322 remote_url: Option<String>,
323}
324
325fn find_git_dir(start: &Path) -> Option<PathBuf> {
329 let mut current = Some(start);
330 while let Some(dir) = current {
331 let candidate = dir.join(".git");
332 if candidate.is_dir() {
333 return Some(candidate);
334 }
335 if candidate.is_file() {
336 if let Some(resolved) = resolve_git_file_pointer(&candidate, dir) {
337 return Some(resolved);
338 }
339 }
340 current = dir.parent();
341 }
342 None
343}
344
345fn resolve_git_file_pointer(file: &Path, base_dir: &Path) -> Option<PathBuf> {
349 let content = fs::read_to_string(file).ok()?;
350 let ptr = content.trim().strip_prefix("gitdir: ")?;
351 let ptr_native = ptr.replace('/', std::path::MAIN_SEPARATOR_STR);
354 let resolved = if Path::new(&ptr_native).is_absolute() {
355 PathBuf::from(&ptr_native)
356 } else {
357 base_dir.join(&ptr_native)
358 };
359 let final_path = resolved.canonicalize().unwrap_or(resolved);
363 if final_path.is_dir() {
364 Some(final_path)
365 } else {
366 None
367 }
368}
369
370fn resolve_ref(git_dir: &Path, refname: &str) -> Option<String> {
373 let ref_path = refname
377 .split('/')
378 .fold(git_dir.to_path_buf(), |p, c| p.join(c));
379 if ref_path.exists() {
380 let sha = fs::read_to_string(&ref_path)
381 .ok()
382 .map(|s| s.trim().to_string())
383 .filter(|s| s.len() >= 40 && s.chars().all(|c| c.is_ascii_hexdigit()));
384 if sha.is_some() {
385 return sha;
386 }
387 }
388 let packed = fs::read_to_string(git_dir.join("packed-refs")).ok()?;
392 for line in packed.lines() {
393 if line.starts_with('#') || line.starts_with('^') {
394 continue;
395 }
396 let mut cols = line.splitn(2, ' ');
397 let sha = cols.next()?;
398 let name = cols.next()?.trim();
399 if name == refname {
400 return Some(sha.to_string());
401 }
402 }
403 None
404}
405
406fn parse_url_line(line: &str) -> Option<&str> {
408 let rest = line.strip_prefix("url")?;
409 let rest = rest.trim_start_matches([' ', '\t']);
410 let url = rest.strip_prefix('=')?.trim();
411 if url.is_empty() {
412 None
413 } else {
414 Some(url)
415 }
416}
417
418fn read_git_remote_url(git_dir: &Path) -> Option<String> {
420 let config = fs::read_to_string(git_dir.join("config")).ok()?;
421 let mut in_origin = false;
422 for line in config.lines() {
423 let trimmed = line.trim();
424 if trimmed.starts_with('[') {
425 in_origin = trimmed == r#"[remote "origin"]"#;
426 } else if in_origin {
427 if let Some(url) = parse_url_line(trimmed) {
428 return Some(url.to_owned());
429 }
430 }
431 }
432 None
433}
434
435fn detect_git_for_run(project_path: &Path) -> GitInfo {
439 let ci_branch = ci_branch_from_env();
441
442 let Some(git_dir) = find_git_dir(project_path) else {
443 return GitInfo {
446 branch: ci_branch,
447 ..GitInfo::default()
448 };
449 };
450
451 let head_raw = match fs::read_to_string(git_dir.join("HEAD")) {
452 Ok(s) => s.trim().to_string(),
453 Err(_) => {
454 return GitInfo {
455 branch: ci_branch,
456 ..GitInfo::default()
457 }
458 }
459 };
460
461 let (branch_from_head, commit_long) = head_raw.strip_prefix("ref: ").map_or_else(
462 || {
463 if head_raw.len() >= 40 && head_raw.chars().all(|c| c.is_ascii_hexdigit()) {
464 (None, Some(head_raw[..40].to_string()))
466 } else {
467 (None, None)
468 }
469 },
470 |refname| {
471 let branch = refname
472 .strip_prefix("refs/heads/")
473 .map(|b| b.trim().to_string());
474 let sha = resolve_ref(&git_dir, refname.trim());
475 (branch, sha)
476 },
477 );
478 let branch = branch_from_head.or(ci_branch);
481
482 let commit_short = commit_long
483 .as_deref()
484 .map(|s| s.chars().take(7).collect::<String>());
485
486 let author = run_git_cmd(project_path, &["log", "-1", "--format=%an", "HEAD"]);
487 let commit_date = run_git_cmd(project_path, &["log", "-1", "--format=%aI", "HEAD"]);
488 let remote_url = read_git_remote_url(&git_dir);
489
490 let tags = run_git_cmd(project_path, &["tag", "--points-at", "HEAD"]).map(|t| {
493 t.lines()
494 .filter(|l| !l.is_empty())
495 .collect::<Vec<_>>()
496 .join(", ")
497 });
498 let nearest_tag = run_git_cmd(project_path, &["describe", "--tags", "--abbrev=0", "HEAD"]);
499
500 GitInfo {
501 commit_short,
502 commit_long,
503 branch,
504 author,
505 tags,
506 nearest_tag,
507 commit_date,
508 remote_url,
509 }
510}
511
512fn run_git_cmd(dir: &Path, args: &[&str]) -> Option<String> {
514 let candidates: &[&str] = &[
518 "git",
520 "/usr/bin/git",
522 "/usr/local/bin/git",
523 "/opt/homebrew/bin/git",
524 r"C:\Program Files\Git\cmd\git.exe",
526 r"C:\Program Files\Git\bin\git.exe",
527 r"C:\Program Files (x86)\Git\cmd\git.exe",
528 ];
529 for &exe in candidates {
530 let result = std::process::Command::new(exe)
531 .args(["-c", "safe.directory=*"])
532 .args(args)
533 .current_dir(dir)
534 .output()
535 .ok()
536 .filter(|o| o.status.success())
537 .and_then(|o| String::from_utf8(o.stdout).ok())
538 .map(|s| s.trim().to_string())
539 .filter(|s| !s.is_empty());
540 if result.is_some() {
541 return result;
542 }
543 }
544 None
545}
546
547fn detect_ci_system() -> Option<&'static str> {
549 let ev = |k: &str| std::env::var(k).is_ok();
550 let ev_true = |k: &str| std::env::var(k).as_deref() == Ok("true");
551 if ev("JENKINS_URL") || ev("JENKINS_HOME") || ev("BUILD_URL") {
552 return Some("Jenkins");
553 }
554 if ev_true("GITHUB_ACTIONS") {
555 return Some("GitHub Actions");
556 }
557 if ev_true("GITLAB_CI") {
558 return Some("GitLab CI");
559 }
560 if ev_true("CIRCLECI") {
561 return Some("CircleCI");
562 }
563 if ev_true("TRAVIS") {
564 return Some("Travis CI");
565 }
566 if ev_true("TF_BUILD") {
567 return Some("Azure DevOps");
568 }
569 if ev("TEAMCITY_VERSION") {
570 return Some("TeamCity");
571 }
572 None
573}
574
575fn ci_branch_from_env() -> Option<String> {
578 const VARS: &[&str] = &[
579 "BRANCH_NAME", "GIT_BRANCH", "GITHUB_REF_NAME", "CI_COMMIT_BRANCH", "CIRCLE_BRANCH", "TRAVIS_BRANCH", "BUILD_SOURCEBRANCH", ];
587 for &var in VARS {
588 if let Ok(val) = std::env::var(var) {
589 let val = val.trim();
590 let val = val
591 .strip_prefix("refs/heads/")
592 .or_else(|| val.strip_prefix("origin/"))
593 .unwrap_or(val);
594 if !val.is_empty() && val != "HEAD" {
595 return Some(val.to_string());
596 }
597 }
598 }
599 None
600}
601
602fn get_current_username() -> String {
603 std::env::var("USERNAME")
604 .or_else(|_| std::env::var("USER"))
605 .unwrap_or_else(|_| "unknown".to_string())
606}
607
608fn non_empty_env(var: &str) -> Option<String> {
609 let v = std::env::var(var).ok()?;
610 if v.is_empty() {
611 None
612 } else {
613 Some(v)
614 }
615}
616
617fn is_jenkins_env() -> bool {
618 std::env::var("JENKINS_URL").is_ok()
619 || std::env::var("JENKINS_HOME").is_ok()
620 || std::env::var("BUILD_URL").is_ok()
621}
622
623fn get_hostname() -> String {
624 if is_jenkins_env() {
627 if let Some(n) = non_empty_env("NODE_NAME") {
628 return n;
629 }
630 }
631 if std::env::var("GITHUB_ACTIONS").as_deref() == Ok("true") {
632 if let Some(r) = non_empty_env("RUNNER_NAME") {
633 return r;
634 }
635 }
636 if std::env::var("GITLAB_CI").as_deref() == Ok("true") {
637 if let Some(r) = non_empty_env("CI_RUNNER_DESCRIPTION") {
638 return r;
639 }
640 }
641 std::env::var("COMPUTERNAME")
642 .or_else(|_| std::env::var("HOSTNAME"))
643 .or_else(|_| std::fs::read_to_string("/etc/hostname").map(|s| s.trim().to_string()))
644 .unwrap_or_else(|_| "unknown".to_string())
645}
646
647#[allow(clippy::too_many_arguments)]
649fn walk_root(
650 root: &Path,
651 config: &AppConfig,
652 include_globs: Option<&GlobSet>,
653 exclude_globs: Option<&GlobSet>,
654 enabled_languages: Option<&BTreeSet<Language>>,
655 seen_paths: &mut HashSet<PathBuf>,
656 analyzed: &mut Vec<FileRecord>,
657 skipped: &mut Vec<FileRecord>,
658 warnings: &mut Vec<String>,
659 cancel: Option<&AtomicBool>,
660 progress: Option<&ProgressCounters>,
661) -> Result<()> {
662 let mut builder = WalkBuilder::new(root);
663 builder
664 .follow_links(config.discovery.follow_symlinks)
665 .hidden(config.discovery.ignore_hidden_files)
666 .ignore(config.discovery.honor_ignore_files)
667 .parents(config.discovery.honor_ignore_files)
668 .git_ignore(config.discovery.honor_ignore_files)
669 .git_global(config.discovery.honor_ignore_files)
670 .git_exclude(config.discovery.honor_ignore_files);
671
672 let paths = collect_walk_paths(&builder, seen_paths, warnings);
673 if paths.is_empty() {
674 return Ok(());
675 }
676
677 if let Some(p) = progress {
678 p.files_total.fetch_add(paths.len(), Ordering::Relaxed);
679 }
680
681 let chunk_results = run_parallel_analysis(
682 &paths,
683 root,
684 config,
685 include_globs,
686 exclude_globs,
687 enabled_languages,
688 cancel,
689 progress,
690 )?;
691 merge_chunk_results(chunk_results, analyzed, skipped, warnings)
692}
693
694fn collect_walk_paths(
695 builder: &WalkBuilder,
696 seen_paths: &mut HashSet<PathBuf>,
697 warnings: &mut Vec<String>,
698) -> Vec<PathBuf> {
699 let (tx, rx) = std::sync::mpsc::channel::<std::result::Result<PathBuf, String>>();
703
704 builder.build_parallel().run(|| {
705 let tx = tx.clone();
706 Box::new(move |entry| {
707 match entry {
708 Err(e) => {
709 let _ = tx.send(Err(format!("discovery warning: {e}")));
710 }
711 Ok(e) => {
712 let path = e.into_path();
713 if !path.is_dir() {
714 let _ = tx.send(Ok(path));
715 }
716 }
717 }
718 ignore::WalkState::Continue
719 })
720 });
721
722 drop(tx);
725
726 rx.into_iter()
727 .filter_map(|msg| match msg {
728 Ok(path) => {
729 if seen_paths.insert(path.clone()) {
730 Some(path)
731 } else {
732 None
733 }
734 }
735 Err(warn) => {
736 warnings.push(warn);
737 None
738 }
739 })
740 .collect()
741}
742
743#[allow(clippy::too_many_arguments)]
745fn worker_loop(
746 paths: &[PathBuf],
747 root: &Path,
748 config: &AppConfig,
749 include_globs: Option<&GlobSet>,
750 exclude_globs: Option<&GlobSet>,
751 enabled_languages: Option<&BTreeSet<Language>>,
752 cancel: Option<&AtomicBool>,
753 next_index: &AtomicUsize,
754 files_done: Option<&AtomicUsize>,
755) -> Vec<Result<Option<FileRecord>>> {
756 let mut results = Vec::new();
757 loop {
758 if cancel.is_some_and(|c| c.load(Ordering::Relaxed)) {
759 results.push(Err(anyhow::anyhow!("analysis cancelled")));
760 break;
761 }
762 let i = next_index.fetch_add(1, Ordering::Relaxed);
763 if i >= paths.len() {
764 break;
765 }
766 results.push(analyze_candidate_file(
767 &paths[i],
768 root,
769 config,
770 include_globs,
771 exclude_globs,
772 enabled_languages,
773 ));
774 if let Some(fd) = files_done {
775 fd.fetch_add(1, Ordering::Relaxed);
776 }
777 }
778 results
779}
780
781#[allow(clippy::too_many_arguments)]
782fn run_parallel_analysis(
783 paths: &[PathBuf],
784 root: &Path,
785 config: &AppConfig,
786 include_globs: Option<&GlobSet>,
787 exclude_globs: Option<&GlobSet>,
788 enabled_languages: Option<&BTreeSet<Language>>,
789 cancel: Option<&AtomicBool>,
790 progress: Option<&ProgressCounters>,
791) -> Result<Vec<Vec<Result<Option<FileRecord>>>>> {
792 let thread_count = std::thread::available_parallelism().map_or(DEFAULT_ANALYSIS_THREADS, |n| {
793 n.get().min(MAX_ANALYSIS_THREADS)
794 });
795 let next_index = AtomicUsize::new(0);
799 let files_done: Option<&AtomicUsize> = progress.map(|p| p.files_done.as_ref());
800
801 std::thread::scope(|s| -> Result<Vec<Vec<Result<Option<FileRecord>>>>> {
802 let mut handles = Vec::with_capacity(thread_count);
805 for _ in 0..thread_count {
806 handles.push(s.spawn(|| {
807 worker_loop(
808 paths,
809 root,
810 config,
811 include_globs,
812 exclude_globs,
813 enabled_languages,
814 cancel,
815 &next_index,
816 files_done,
817 )
818 }));
819 }
820 handles
821 .into_iter()
822 .map(|h| {
823 h.join()
824 .map_err(|_| anyhow::anyhow!("analysis thread panicked"))
825 })
826 .collect()
827 })
828}
829
830fn merge_chunk_results(
831 chunk_results: Vec<Vec<Result<Option<FileRecord>>>>,
832 analyzed: &mut Vec<FileRecord>,
833 skipped: &mut Vec<FileRecord>,
834 warnings: &mut Vec<String>,
835) -> Result<()> {
836 for chunk in chunk_results {
837 for result in chunk {
838 if let Some(record) = result? {
839 push_record(record, analyzed, skipped, warnings);
840 }
841 }
842 }
843 Ok(())
844}
845
846fn process_submodules(config: &AppConfig, analyzed: &mut [FileRecord]) -> Vec<SubmoduleSummary> {
848 let root = config.discovery.root_paths[0]
849 .canonicalize()
850 .unwrap_or_else(|_| config.discovery.root_paths[0].clone());
851 let submodules = detect_submodules(&root);
852 if submodules.is_empty() {
853 return Vec::new();
854 }
855
856 for file in analyzed.iter_mut() {
857 for (name, sub_path) in &submodules {
858 let prefix = sub_path.to_string_lossy().replace('\\', "/");
859 let rel = &file.relative_path;
860 if rel == &prefix || rel.starts_with(&format!("{prefix}/")) {
861 file.submodule = Some(name.clone());
862 break;
863 }
864 }
865 }
866
867 build_submodule_summaries(analyzed, &submodules)
868}
869
870fn assemble_run(
872 config: &AppConfig,
873 runtime_mode: &str,
874 analyzed: Vec<FileRecord>,
875 skipped: Vec<FileRecord>,
876 warnings: Vec<String>,
877 submodule_summaries: Vec<SubmoduleSummary>,
878) -> AnalysisRun {
879 let summary = build_summary(&analyzed, &skipped);
880 let language_summaries = build_language_summaries(&analyzed);
881 let col_threshold = config.analysis.style_col_threshold;
882 let style_summary = build_style_summary(&analyzed, col_threshold);
883
884 let first_root = config
885 .discovery
886 .root_paths
887 .first()
888 .map(|p| p.canonicalize().unwrap_or_else(|_| p.clone()));
889 let git = first_root
890 .as_deref()
891 .map(detect_git_for_run)
892 .unwrap_or_default();
893
894 let now = Utc::now();
895 let run_id = {
896 let uuid_suffix = Uuid::new_v4().simple().to_string();
897 format!("{}-{}", now.format("%Y%m%d-%H%M"), uuid_suffix)
898 };
899
900 AnalysisRun {
901 tool: ToolMetadata {
902 name: "sloc".into(),
903 version: env!("CARGO_PKG_VERSION").into(),
904 run_id,
905 timestamp_utc: now,
906 },
907 environment: EnvironmentMetadata {
908 operating_system: std::env::consts::OS.into(),
909 architecture: std::env::consts::ARCH.into(),
910 runtime_mode: runtime_mode.into(),
911 initiator_username: get_current_username(),
912 initiator_hostname: get_hostname(),
913 ci_name: if is_jenkins_env() {
914 Some(format!("Jenkins\t{}", get_hostname()))
915 } else {
916 detect_ci_system().map(str::to_string)
917 },
918 },
919 effective_configuration: config.clone(),
920 input_roots: config
921 .discovery
922 .root_paths
923 .iter()
924 .map(|p| path_to_string(p))
925 .collect(),
926 summary_totals: summary,
927 totals_by_language: language_summaries,
928 per_file_records: analyzed,
929 skipped_file_records: skipped,
930 warnings,
931 submodule_summaries,
932 git_commit_short: git.commit_short,
933 git_commit_long: git.commit_long,
934 git_branch: git.branch,
935 git_commit_author: git.author,
936 git_tags: git.tags,
937 git_nearest_tag: git.nearest_tag,
938 git_commit_date: git.commit_date,
939 git_remote_url: git.remote_url,
940 style_summary,
941 }
942}
943
944#[allow(clippy::too_many_lines)]
949pub fn analyze(
950 config: &AppConfig,
951 runtime_mode: &str,
952 cancel: Option<&AtomicBool>,
953 progress: Option<&ProgressCounters>,
954) -> Result<AnalysisRun> {
955 config.validate()?;
956
957 if config.discovery.root_paths.is_empty() {
958 anyhow::bail!("no input paths were provided");
959 }
960
961 let include_globs = compile_globset(&config.discovery.include_globs)?;
962 let exclude_globs = compile_globset(&config.discovery.exclude_globs)?;
963 let enabled_languages = parse_enabled_languages(&config.analysis.enabled_languages)?;
964
965 let mut analyzed = Vec::new();
966 let mut skipped = Vec::new();
967 let mut warnings = Vec::new();
968 let mut seen_paths = HashSet::new();
969
970 for root in &config.discovery.root_paths {
971 if cancel.is_some_and(|c| c.load(Ordering::Relaxed)) {
972 anyhow::bail!("analysis cancelled");
973 }
974
975 let root = root.canonicalize().unwrap_or_else(|_| root.clone());
976
977 if root.is_file() {
978 if let Some(record) = analyze_candidate_file(
979 &root,
980 root.parent().unwrap_or_else(|| Path::new(".")),
981 config,
982 include_globs.as_ref(),
983 exclude_globs.as_ref(),
984 enabled_languages.as_ref(),
985 )? {
986 push_record(record, &mut analyzed, &mut skipped, &mut warnings);
987 }
988 continue;
989 }
990
991 walk_root(
992 &root,
993 config,
994 include_globs.as_ref(),
995 exclude_globs.as_ref(),
996 enabled_languages.as_ref(),
997 &mut seen_paths,
998 &mut analyzed,
999 &mut skipped,
1000 &mut warnings,
1001 cancel,
1002 progress,
1003 )?;
1004 }
1005
1006 analyzed.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
1007 skipped.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
1008
1009 let submodule_summaries = if config.discovery.submodule_breakdown {
1011 process_submodules(config, &mut analyzed)
1012 } else {
1013 Vec::new()
1014 };
1015
1016 attach_coverage(config, &mut analyzed, &mut warnings);
1017
1018 Ok(assemble_run(
1019 config,
1020 runtime_mode,
1021 analyzed,
1022 skipped,
1023 warnings,
1024 submodule_summaries,
1025 ))
1026}
1027
1028fn attach_coverage(config: &AppConfig, analyzed: &mut [FileRecord], warnings: &mut Vec<String>) {
1029 let Some(cov_path) = coverage::resolve_coverage_file(config.analysis.coverage_file.as_deref())
1030 else {
1031 return;
1032 };
1033 tracing::debug!(path = %cov_path.display(), "loading coverage file");
1034 match fs::read_to_string(&cov_path) {
1035 Ok(content) => {
1036 let cov_map = coverage::parse_coverage_auto(&cov_path, &content);
1037 let mut matched: u32 = 0;
1038 let mut unmatched: u32 = 0;
1039 for record in analyzed.iter_mut() {
1040 record.coverage =
1041 coverage::lookup_coverage(&cov_map, &record.relative_path).cloned();
1042 if record.coverage.is_some() {
1043 matched += 1;
1044 } else {
1045 unmatched += 1;
1046 }
1047 }
1048 tracing::debug!(
1049 path = %cov_path.display(),
1050 coverage_entries = cov_map.len(),
1051 files_matched = matched,
1052 files_unmatched = unmatched,
1053 "coverage attached"
1054 );
1055 if unmatched > 0 && matched == 0 {
1056 tracing::warn!(
1057 path = %cov_path.display(),
1058 "coverage file loaded but no source files could be matched — check that paths in the coverage report match the scanned directory"
1059 );
1060 }
1061 }
1062 Err(e) => {
1063 tracing::warn!(path = %cov_path.display(), error = %e, "coverage file could not be read");
1064 warnings.push(format!(
1065 "coverage file '{}' could not be read: {e}",
1066 cov_path.display()
1067 ));
1068 }
1069 }
1070}
1071
1072fn push_record(
1073 record: FileRecord,
1074 analyzed: &mut Vec<FileRecord>,
1075 skipped: &mut Vec<FileRecord>,
1076 warnings: &mut Vec<String>,
1077) {
1078 warnings.extend(
1079 record
1080 .warnings
1081 .iter()
1082 .map(|warning| format!("{}: {warning}", record.relative_path)),
1083 );
1084
1085 match record.status {
1086 FileStatus::AnalyzedExact | FileStatus::AnalyzedBestEffort => analyzed.push(record),
1087 _ => skipped.push(record),
1088 }
1089}
1090
1091#[inline]
1093fn skip_with_reason(
1094 path: &Path,
1095 root: &Path,
1096 size: u64,
1097 reason: impl Into<String>,
1098) -> MetadataPolicyOutcome {
1099 MetadataPolicyOutcome::Skip(Box::new(skipped_record(
1100 path,
1101 root,
1102 size,
1103 FileStatus::SkippedByPolicy,
1104 vec![reason.into()],
1105 )))
1106}
1107
1108#[allow(clippy::too_many_arguments)]
1112fn check_metadata_policy(
1113 path: &Path,
1114 root: &Path,
1115 relative_path: &str,
1116 metadata: &fs::Metadata,
1117 config: &AppConfig,
1118 include_globs: Option<&GlobSet>,
1119 exclude_globs: Option<&GlobSet>,
1120) -> MetadataPolicyOutcome {
1121 let size = metadata.len();
1122
1123 if metadata.file_type().is_symlink() && !config.discovery.follow_symlinks {
1124 return skip_with_reason(path, root, size, "symlink skipped by policy");
1125 }
1126 if file_name_eq(path, ".gitignore") {
1127 return skip_with_reason(path, root, size, ".gitignore is always excluded");
1128 }
1129 if is_excluded_dir_path(path, &config.discovery.excluded_directories) {
1130 return skip_with_reason(path, root, size, "path matched excluded directory setting");
1131 }
1132 if size > config.discovery.max_file_size_bytes {
1133 return skip_with_reason(
1134 path,
1135 root,
1136 size,
1137 format!(
1138 "file exceeded max_file_size_bytes ({})",
1139 config.discovery.max_file_size_bytes
1140 ),
1141 );
1142 }
1143 if let Some(globs) = include_globs {
1144 if !globs.is_match(Path::new(relative_path)) && !globs.is_match(path) {
1145 return MetadataPolicyOutcome::Exclude;
1146 }
1147 }
1148 if let Some(globs) = exclude_globs {
1149 if globs.is_match(Path::new(relative_path)) || globs.is_match(path) {
1150 return skip_with_reason(path, root, size, "path matched exclude glob");
1151 }
1152 }
1153 if is_known_lockfile(path) && !config.analysis.include_lockfiles {
1154 return skip_with_reason(path, root, size, "lockfile skipped by default policy");
1155 }
1156
1157 MetadataPolicyOutcome::Continue
1158}
1159
1160struct ContentPolicyResult {
1161 vendor: bool,
1162 generated: bool,
1163 minified: bool,
1164 skip_record: Option<FileRecord>,
1165}
1166
1167fn check_content_policy(
1170 path: &Path,
1171 root: &Path,
1172 size_bytes: u64,
1173 bytes: &[u8],
1174 config: &AppConfig,
1175) -> ContentPolicyResult {
1176 let vendor = is_vendor_path(path);
1177 if vendor && config.analysis.vendor_directory_detection {
1178 return ContentPolicyResult {
1179 vendor,
1180 generated: false,
1181 minified: false,
1182 skip_record: Some(skipped_record(
1183 path,
1184 root,
1185 size_bytes,
1186 FileStatus::SkippedByPolicy,
1187 vec!["vendor file skipped by policy".into()],
1188 )),
1189 };
1190 }
1191
1192 let generated = config.analysis.generated_file_detection && looks_generated(path, bytes);
1193 if generated {
1194 return ContentPolicyResult {
1195 vendor,
1196 generated,
1197 minified: false,
1198 skip_record: Some(skipped_record(
1199 path,
1200 root,
1201 size_bytes,
1202 FileStatus::SkippedByPolicy,
1203 vec!["generated file skipped by policy".into()],
1204 )),
1205 };
1206 }
1207
1208 let minified = config.analysis.minified_file_detection && looks_minified(path, bytes);
1209 if minified {
1210 return ContentPolicyResult {
1211 vendor,
1212 generated,
1213 minified,
1214 skip_record: Some(skipped_record(
1215 path,
1216 root,
1217 size_bytes,
1218 FileStatus::SkippedByPolicy,
1219 vec!["minified file skipped by policy".into()],
1220 )),
1221 };
1222 }
1223
1224 ContentPolicyResult {
1225 vendor,
1226 generated,
1227 minified,
1228 skip_record: None,
1229 }
1230}
1231
1232fn decode_file_contents(
1234 path: &Path,
1235 root: &Path,
1236 size_bytes: u64,
1237 bytes: &[u8],
1238 config: &AppConfig,
1239) -> Result<Option<(String, String, Vec<String>)>> {
1240 if is_binary(bytes) {
1241 return match config.analysis.binary_file_behavior {
1242 BinaryFileBehavior::Skip => Ok(None),
1243 BinaryFileBehavior::Fail => {
1244 anyhow::bail!("binary file encountered: {}", path.display())
1245 }
1246 };
1247 }
1248
1249 match decode_bytes(bytes) {
1250 Ok(result) => Ok(Some(result)),
1251 Err(err) => match config.analysis.decode_failure_behavior {
1252 FailureBehavior::WarnSkip => {
1253 let _ = (path, root, size_bytes); Err(anyhow::anyhow!("__decode_warn__: {err}"))
1258 }
1259 FailureBehavior::Fail => {
1260 anyhow::bail!("decode failure for {}: {err}", path.display())
1261 }
1262 },
1263 }
1264}
1265
1266#[allow(clippy::too_many_lines)]
1267fn analyze_candidate_file(
1268 path: &Path,
1269 root: &Path,
1270 config: &AppConfig,
1271 include_globs: Option<&GlobSet>,
1272 exclude_globs: Option<&GlobSet>,
1273 enabled_languages: Option<&BTreeSet<Language>>,
1274) -> Result<Option<FileRecord>> {
1275 let metadata = match fs::symlink_metadata(path) {
1276 Ok(metadata) => metadata,
1277 Err(err) => {
1278 return Ok(Some(skipped_record(
1279 path,
1280 root,
1281 0,
1282 FileStatus::ErrorInternal,
1283 vec![format!("failed to read metadata: {err}")],
1284 )));
1285 }
1286 };
1287
1288 let relative_path = relative_path_string(path, root);
1289
1290 match check_metadata_policy(
1292 path,
1293 root,
1294 &relative_path,
1295 &metadata,
1296 config,
1297 include_globs,
1298 exclude_globs,
1299 ) {
1300 MetadataPolicyOutcome::Skip(record) => return Ok(Some(*record)),
1301 MetadataPolicyOutcome::Exclude => return Ok(None),
1302 MetadataPolicyOutcome::Continue => {}
1303 }
1304
1305 let bytes = match fs::read(path) {
1306 Ok(bytes) => bytes,
1307 Err(err) => {
1308 return Ok(Some(skipped_record(
1309 path,
1310 root,
1311 metadata.len(),
1312 FileStatus::ErrorInternal,
1313 vec![format!("failed to read file: {err}")],
1314 )));
1315 }
1316 };
1317
1318 let content_policy = check_content_policy(path, root, metadata.len(), &bytes, config);
1320 if let Some(record) = content_policy.skip_record {
1321 return Ok(Some(record));
1322 }
1323 let (vendor, generated, minified) = (
1324 content_policy.vendor,
1325 content_policy.generated,
1326 content_policy.minified,
1327 );
1328
1329 let (text, encoding, decode_warnings) =
1331 match decode_file_contents(path, root, metadata.len(), &bytes, config) {
1332 Ok(Some(result)) => result,
1333 Ok(None) => {
1334 return Ok(Some(skipped_record(
1335 path,
1336 root,
1337 metadata.len(),
1338 FileStatus::SkippedBinary,
1339 vec!["binary file skipped by default".into()],
1340 )));
1341 }
1342 Err(err) => {
1343 let msg = err.to_string();
1344 if let Some(warn_msg) = msg.strip_prefix("__decode_warn__: ") {
1345 return Ok(Some(skipped_record(
1346 path,
1347 root,
1348 metadata.len(),
1349 FileStatus::SkippedDecodeError,
1350 vec![warn_msg.to_string()],
1351 )));
1352 }
1353 return Err(err);
1354 }
1355 };
1356
1357 let first_line = text.lines().next();
1358 let language = detect_language(
1359 path,
1360 first_line,
1361 &config.analysis.extension_overrides,
1362 config.analysis.shebang_detection,
1363 );
1364
1365 let Some(language) = language else {
1366 return Ok(Some(skipped_record(
1367 path,
1368 root,
1369 metadata.len(),
1370 FileStatus::SkippedUnsupported,
1371 vec!["unsupported or undetected language".into()],
1372 )));
1373 };
1374
1375 if let Some(enabled) = enabled_languages {
1376 if !enabled.contains(&language) {
1377 return Ok(Some(skipped_record(
1378 path,
1379 root,
1380 metadata.len(),
1381 FileStatus::SkippedByPolicy,
1382 vec![format!(
1383 "language {} disabled by configuration",
1384 language.display_name()
1385 )],
1386 )));
1387 }
1388 }
1389
1390 let ieee_opts = AnalysisOptions {
1391 blank_in_block_comment_as_comment: config.analysis.blank_in_block_comment_policy
1392 == BlankInBlockCommentPolicy::CountAsComment,
1393 collapse_continuation_lines: config.analysis.continuation_line_policy
1394 == ContinuationLinePolicy::CollapseToLogical,
1395 };
1396 let analysis = analyze_text(language, &text, ieee_opts);
1397 let effective_counts = compute_effective_counts(
1398 &analysis.raw,
1399 config.analysis.mixed_line_policy,
1400 config.analysis.python_docstrings_as_comments,
1401 config.analysis.count_compiler_directives,
1402 );
1403
1404 let mut warnings = decode_warnings;
1405 warnings.extend(analysis.warnings.clone());
1406
1407 Ok(Some(FileRecord {
1408 path: path_to_string(path),
1409 relative_path,
1410 language: Some(language),
1411 size_bytes: metadata.len(),
1412 detected_encoding: Some(encoding),
1413 raw_line_categories: analysis.raw,
1414 effective_counts,
1415 status: match analysis.parse_mode {
1416 ParseMode::Lexical | ParseMode::TreeSitter => FileStatus::AnalyzedExact,
1417 ParseMode::LexicalBestEffort => FileStatus::AnalyzedBestEffort,
1418 },
1419 warnings,
1420 generated,
1421 minified,
1422 vendor,
1423 parse_mode: Some(analysis.parse_mode),
1424 submodule: None,
1425 coverage: None,
1426 style_analysis: analysis.style_analysis,
1427 }))
1428}
1429
1430const fn compute_effective_counts(
1431 raw: &RawLineCounts,
1432 mixed_line_policy: MixedLinePolicy,
1433 python_docstrings_as_comments: bool,
1434 count_compiler_directives: bool,
1435) -> EffectiveCounts {
1436 let mut effective = EffectiveCounts {
1437 code_lines: raw.code_only_lines,
1438 comment_lines: raw.single_comment_only_lines + raw.multi_comment_only_lines,
1439 blank_lines: raw.blank_only_lines,
1440 mixed_lines_separate: 0,
1441 };
1442
1443 if python_docstrings_as_comments {
1444 effective.comment_lines += raw.docstring_comment_lines;
1445 } else {
1446 effective.code_lines += raw.docstring_comment_lines;
1447 }
1448
1449 let mixed_total = raw.mixed_code_single_comment_lines + raw.mixed_code_multi_comment_lines;
1450 match mixed_line_policy {
1451 MixedLinePolicy::CodeOnly => effective.code_lines += mixed_total,
1452 MixedLinePolicy::CodeAndComment => {
1453 effective.code_lines += mixed_total;
1454 effective.comment_lines += mixed_total;
1455 }
1456 MixedLinePolicy::CommentOnly => effective.comment_lines += mixed_total,
1457 MixedLinePolicy::SeparateMixedCategory => effective.mixed_lines_separate += mixed_total,
1458 }
1459
1460 if !count_compiler_directives {
1463 effective.code_lines = effective
1464 .code_lines
1465 .saturating_sub(raw.compiler_directive_lines);
1466 }
1467
1468 effective
1469}
1470
1471fn build_summary(analyzed: &[FileRecord], skipped: &[FileRecord]) -> SummaryTotals {
1472 let mut summary = SummaryTotals {
1473 files_considered: (analyzed.len() + skipped.len()) as u64,
1474 files_analyzed: analyzed.len() as u64,
1475 files_skipped: skipped.len() as u64,
1476 ..Default::default()
1477 };
1478
1479 for record in analyzed {
1480 summary.total_physical_lines += record.raw_line_categories.total_physical_lines;
1481 summary.code_lines += record.effective_counts.code_lines;
1482 summary.comment_lines += record.effective_counts.comment_lines;
1483 summary.blank_lines += record.effective_counts.blank_lines;
1484 summary.mixed_lines_separate += record.effective_counts.mixed_lines_separate;
1485 summary.functions += record.raw_line_categories.functions;
1486 summary.classes += record.raw_line_categories.classes;
1487 summary.variables += record.raw_line_categories.variables;
1488 summary.imports += record.raw_line_categories.imports;
1489 summary.test_count += record.raw_line_categories.test_count;
1490 summary.test_assertion_count += record.raw_line_categories.test_assertion_count;
1491 summary.test_suite_count += record.raw_line_categories.test_suite_count;
1492 if let Some(cov) = &record.coverage {
1493 summary.coverage_lines_found += u64::from(cov.lines_found);
1494 summary.coverage_lines_hit += u64::from(cov.lines_hit);
1495 summary.coverage_functions_found += u64::from(cov.functions_found);
1496 summary.coverage_functions_hit += u64::from(cov.functions_hit);
1497 summary.coverage_branches_found += u64::from(cov.branches_found);
1498 summary.coverage_branches_hit += u64::from(cov.branches_hit);
1499 }
1500 }
1501
1502 summary
1503}
1504
1505const fn zeroed_summary(language: Language) -> LanguageSummary {
1507 LanguageSummary {
1508 language,
1509 files: 0,
1510 total_physical_lines: 0,
1511 code_lines: 0,
1512 comment_lines: 0,
1513 blank_lines: 0,
1514 mixed_lines_separate: 0,
1515 functions: 0,
1516 classes: 0,
1517 variables: 0,
1518 imports: 0,
1519 test_count: 0,
1520 test_assertion_count: 0,
1521 test_suite_count: 0,
1522 coverage_lines_found: 0,
1523 coverage_lines_hit: 0,
1524 coverage_functions_found: 0,
1525 coverage_functions_hit: 0,
1526 coverage_branches_found: 0,
1527 coverage_branches_hit: 0,
1528 }
1529}
1530
1531fn accumulate_record_into_summary(entry: &mut LanguageSummary, record: &FileRecord) {
1533 entry.files += 1;
1534 let r = &record.raw_line_categories;
1535 entry.total_physical_lines += r.total_physical_lines;
1536 entry.code_lines += record.effective_counts.code_lines;
1537 entry.comment_lines += record.effective_counts.comment_lines;
1538 entry.blank_lines += record.effective_counts.blank_lines;
1539 entry.mixed_lines_separate += record.effective_counts.mixed_lines_separate;
1540 entry.functions += r.functions;
1541 entry.classes += r.classes;
1542 entry.variables += r.variables;
1543 entry.imports += r.imports;
1544 entry.test_count += r.test_count;
1545 entry.test_assertion_count += r.test_assertion_count;
1546 entry.test_suite_count += r.test_suite_count;
1547 if let Some(cov) = &record.coverage {
1548 entry.coverage_lines_found += u64::from(cov.lines_found);
1549 entry.coverage_lines_hit += u64::from(cov.lines_hit);
1550 entry.coverage_functions_found += u64::from(cov.functions_found);
1551 entry.coverage_functions_hit += u64::from(cov.functions_hit);
1552 entry.coverage_branches_found += u64::from(cov.branches_found);
1553 entry.coverage_branches_hit += u64::from(cov.branches_hit);
1554 }
1555}
1556
1557fn build_language_summaries(analyzed: &[FileRecord]) -> Vec<LanguageSummary> {
1558 let mut by_language: BTreeMap<Language, LanguageSummary> = BTreeMap::new();
1559 for record in analyzed {
1560 let Some(language) = record.language else {
1561 continue;
1562 };
1563 let entry = by_language
1564 .entry(language)
1565 .or_insert_with(|| zeroed_summary(language));
1566 accumulate_record_into_summary(entry, record);
1567 }
1568 by_language.into_values().collect()
1569}
1570
1571fn skipped_record(
1572 path: &Path,
1573 root: &Path,
1574 size_bytes: u64,
1575 status: FileStatus,
1576 warnings: Vec<String>,
1577) -> FileRecord {
1578 FileRecord {
1579 path: path_to_string(path),
1580 relative_path: relative_path_string(path, root),
1581 language: None,
1582 size_bytes,
1583 detected_encoding: None,
1584 raw_line_categories: RawLineCounts::default(),
1585 effective_counts: EffectiveCounts::default(),
1586 status,
1587 warnings,
1588 generated: false,
1589 minified: false,
1590 vendor: false,
1591 parse_mode: None,
1592 submodule: None,
1593 coverage: None,
1594 style_analysis: None,
1595 }
1596}
1597
1598fn relative_path_string(path: &Path, root: &Path) -> String {
1599 path.strip_prefix(root)
1600 .unwrap_or(path)
1601 .to_string_lossy()
1602 .replace('\\', "/")
1603}
1604
1605fn path_to_string(path: &Path) -> String {
1606 path.to_string_lossy().replace('\\', "/")
1607}
1608
1609#[must_use]
1611pub fn detect_submodules(root: &Path) -> Vec<(String, PathBuf)> {
1612 let gitmodules = root.join(".gitmodules");
1613 if !gitmodules.is_file() {
1614 return Vec::new();
1615 }
1616 let Ok(content) = fs::read_to_string(&gitmodules) else {
1617 return Vec::new();
1618 };
1619
1620 let mut result = Vec::new();
1621 let mut current_name: Option<String> = None;
1622 let mut current_path: Option<PathBuf> = None;
1623
1624 for line in content.lines() {
1625 let trimmed = line.trim();
1626 if trimmed.starts_with("[submodule \"") && trimmed.ends_with("\"]") {
1627 if let (Some(name), Some(path)) = (current_name.take(), current_path.take()) {
1628 result.push((name, path));
1629 }
1630 let name = trimmed["[submodule \"".len()..trimmed.len() - 2].to_string();
1631 current_name = Some(name);
1632 } else if let Some(rest) = trimmed.strip_prefix("path") {
1633 if let Some(eq_pos) = rest.find('=') {
1634 let path_str = rest[eq_pos + 1..].trim();
1635 current_path = Some(PathBuf::from(path_str));
1636 }
1637 }
1638 }
1639 if let (Some(name), Some(path)) = (current_name, current_path) {
1640 result.push((name, path));
1641 }
1642
1643 result
1644}
1645
1646fn build_submodule_summaries(
1647 analyzed: &[FileRecord],
1648 submodules: &[(String, PathBuf)],
1649) -> Vec<SubmoduleSummary> {
1650 submodules
1651 .iter()
1652 .map(|(name, path)| {
1653 let files: Vec<&FileRecord> = analyzed
1654 .iter()
1655 .filter(|f| f.submodule.as_deref() == Some(name.as_str()))
1656 .collect();
1657
1658 let files_analyzed = files.len() as u64;
1659 let total_physical_lines = files
1660 .iter()
1661 .map(|f| f.raw_line_categories.total_physical_lines)
1662 .sum();
1663 let code_lines = files.iter().map(|f| f.effective_counts.code_lines).sum();
1664 let comment_lines = files.iter().map(|f| f.effective_counts.comment_lines).sum();
1665 let blank_lines = files.iter().map(|f| f.effective_counts.blank_lines).sum();
1666 let language_summaries = build_language_summaries_from_slice(&files);
1667
1668 SubmoduleSummary {
1669 name: name.clone(),
1670 relative_path: path.to_string_lossy().replace('\\', "/"),
1671 files_analyzed,
1672 total_physical_lines,
1673 code_lines,
1674 comment_lines,
1675 blank_lines,
1676 language_summaries,
1677 }
1678 })
1679 .filter(|s| s.files_analyzed > 0)
1680 .collect()
1681}
1682
1683#[allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
1685fn dominant_indent_label(files: &[&StyleAnalysis]) -> String {
1686 let mut votes = [0u32; 6];
1687 for f in files {
1688 let idx = match f.indent_style {
1689 IndentStyle::Tabs => 0,
1690 IndentStyle::Spaces2 => 1,
1691 IndentStyle::Spaces4 => 2,
1692 IndentStyle::Spaces8 => 3,
1693 IndentStyle::Mixed => 4,
1694 IndentStyle::Unknown => 5,
1695 };
1696 votes[idx] += 1;
1697 }
1698 let labels = ["Tabs", "2-Space", "4-Space", "8-Space", "Mixed", "\u{2014}"];
1699 labels[votes
1700 .iter()
1701 .enumerate()
1702 .max_by_key(|(_, v)| *v)
1703 .map(|(i, _)| i)
1704 .unwrap_or(5)]
1705 .to_string()
1706}
1707
1708#[allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
1710fn line80_pct(files: &[&StyleAnalysis]) -> u8 {
1711 if files.is_empty() {
1712 return 0;
1713 }
1714 let compliant = files
1715 .iter()
1716 .filter(|f| f.total_lines == 0 || (f.lines_over_80 as f32 / f.total_lines as f32) <= 0.05)
1717 .count() as u32;
1718 ((compliant * 100) / files.len() as u32) as u8
1719}
1720
1721#[allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
1724fn line_col_pct(files: &[&StyleAnalysis], threshold: u16) -> u8 {
1725 if files.is_empty() {
1726 return 0;
1727 }
1728 let compliant = files
1729 .iter()
1730 .filter(|f| {
1731 let over = if threshold <= 80 {
1732 f.lines_over_80
1733 } else if threshold <= 100 {
1734 f.lines_over_100
1735 } else {
1736 f.lines_over_120
1737 };
1738 f.total_lines == 0 || (over as f32 / f.total_lines as f32) <= 0.05
1739 })
1740 .count() as u32;
1741 ((compliant * 100) / files.len() as u32) as u8
1742}
1743
1744#[allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
1746fn build_language_group(
1747 family: &str,
1748 files: &[&StyleAnalysis],
1749 col_threshold: u16,
1750) -> LanguageStyleGroup {
1751 let count = files.len() as u32;
1752
1753 let mut all_names: Vec<String> = Vec::new();
1755 for f in files {
1756 for g in &f.guide_scores {
1757 if !all_names.contains(&g.name) {
1758 all_names.push(g.name.clone());
1759 }
1760 }
1761 }
1762
1763 let mut guide_avg_scores: Vec<(String, u8)> = all_names
1764 .into_iter()
1765 .map(|name| {
1766 let sum: u32 = files
1767 .iter()
1768 .filter_map(|f| f.guide_scores.iter().find(|g| g.name == name))
1769 .map(|g| u32::from(g.score_pct))
1770 .sum();
1771 let avg = (sum / count) as u8;
1772 (name, avg)
1773 })
1774 .collect();
1775 guide_avg_scores.sort_by_key(|s| std::cmp::Reverse(s.1));
1776
1777 let (dominant_guide, dominant_score_pct) = guide_avg_scores
1778 .first()
1779 .map(|(n, s)| (n.clone(), *s))
1780 .unwrap_or_default();
1781
1782 let lcp = line_col_pct(files, col_threshold);
1783 LanguageStyleGroup {
1784 language_family: family.to_string(),
1785 files_count: count,
1786 dominant_guide,
1787 dominant_score_pct,
1788 common_indent_style: dominant_indent_label(files),
1789 guide_avg_scores,
1790 line80_compliant_pct: line80_pct(files),
1791 line_col_compliant_pct: lcp,
1792 }
1793}
1794
1795#[allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
1798fn build_style_summary(analyzed: &[FileRecord], col_threshold: u16) -> Option<StyleSummary> {
1799 let all_style: Vec<&StyleAnalysis> = analyzed
1800 .iter()
1801 .filter_map(|f| f.style_analysis.as_ref())
1802 .collect();
1803
1804 if all_style.is_empty() {
1805 return None;
1806 }
1807
1808 let mut families: std::collections::BTreeMap<&str, Vec<&StyleAnalysis>> =
1810 std::collections::BTreeMap::new();
1811 for sa in &all_style {
1812 families
1813 .entry(sa.language_family.as_str())
1814 .or_default()
1815 .push(sa);
1816 }
1817
1818 let mut by_language: Vec<LanguageStyleGroup> = families
1819 .iter()
1820 .map(|(family, files)| build_language_group(family, files, col_threshold))
1821 .collect();
1822 by_language.sort_by_key(|g| std::cmp::Reverse(g.files_count));
1823
1824 let files_analyzed = all_style.len() as u32;
1825 let common_indent_style = dominant_indent_label(&all_style);
1826 let line80_compliant_pct = line80_pct(&all_style);
1827 let line_col_compliant_pct = line_col_pct(&all_style, col_threshold);
1828
1829 Some(StyleSummary {
1830 files_analyzed,
1831 common_indent_style,
1832 line80_compliant_pct,
1833 line_col_compliant_pct,
1834 col_threshold,
1835 by_language,
1836 })
1837}
1838
1839fn build_language_summaries_from_slice(files: &[&FileRecord]) -> Vec<LanguageSummary> {
1840 let mut map: BTreeMap<String, LanguageSummary> = BTreeMap::new();
1841 for file in files {
1842 let Some(lang) = file.language else { continue };
1843 let entry = map
1844 .entry(lang.display_name().to_string())
1845 .or_insert_with(|| zeroed_summary(lang));
1846 accumulate_record_into_summary(entry, file);
1847 }
1848 map.into_values().collect()
1849}
1850
1851fn file_name_eq(path: &Path, expected: &str) -> bool {
1852 path.file_name()
1853 .and_then(|name| name.to_str())
1854 .is_some_and(|name| name == expected)
1855}
1856
1857fn is_excluded_dir_path(path: &Path, excluded_dirs: &[String]) -> bool {
1858 path.components().any(|component| {
1859 component
1860 .as_os_str()
1861 .to_str()
1862 .is_some_and(|part| excluded_dirs.iter().any(|excluded| excluded == part))
1863 })
1864}
1865
1866fn is_vendor_path(path: &Path) -> bool {
1867 path.components().any(|component| {
1868 component
1869 .as_os_str()
1870 .to_str()
1871 .is_some_and(|part| matches!(part, "vendor" | "node_modules" | "packages"))
1872 })
1873}
1874
1875fn is_known_lockfile(path: &Path) -> bool {
1876 path.file_name()
1877 .and_then(|name| name.to_str())
1878 .is_some_and(|name| {
1879 matches!(
1880 name,
1881 "Cargo.lock"
1882 | "package-lock.json"
1883 | "yarn.lock"
1884 | "pnpm-lock.yaml"
1885 | "Pipfile.lock"
1886 | "poetry.lock"
1887 | "composer.lock"
1888 )
1889 })
1890}
1891
1892fn looks_generated(path: &Path, bytes: &[u8]) -> bool {
1893 let file_name = path
1894 .file_name()
1895 .and_then(|name| name.to_str())
1896 .unwrap_or_default();
1897 if file_name.contains(".generated.") || file_name.contains(".g.") {
1898 return true;
1899 }
1900
1901 let sample = String::from_utf8_lossy(&bytes[..bytes.len().min(GENERATED_SAMPLE_BYTES)])
1902 .to_ascii_lowercase();
1903 sample.contains("@generated") || sample.contains("generated by")
1904}
1905
1906fn looks_minified(path: &Path, bytes: &[u8]) -> bool {
1907 let file_name = path
1908 .file_name()
1909 .and_then(|name| name.to_str())
1910 .unwrap_or_default();
1911 if file_name.contains(".min.") {
1912 return true;
1913 }
1914
1915 let sample = String::from_utf8_lossy(&bytes[..bytes.len().min(MINIFIED_SAMPLE_BYTES)]);
1916 let longest_line = sample.lines().map(str::len).max().unwrap_or(0);
1917 let whitespace = sample.chars().filter(|c| c.is_whitespace()).count();
1918 longest_line > MINIFIED_LINE_THRESHOLD && whitespace * 100 < sample.len().max(1)
1919}
1920
1921fn is_binary(bytes: &[u8]) -> bool {
1922 if bytes.starts_with(&[0xEF, 0xBB, 0xBF])
1923 || bytes.starts_with(&[0xFF, 0xFE])
1924 || bytes.starts_with(&[0xFE, 0xFF])
1925 {
1926 return false;
1927 }
1928
1929 let sample = &bytes[..bytes.len().min(BINARY_SAMPLE_BYTES)];
1930 sample.contains(&0)
1931}
1932
1933fn decode_utf16_bom(
1936 bom_stripped: &[u8],
1937 encoding: &'static encoding_rs::Encoding,
1938 label: &str,
1939) -> (String, String, Vec<String>) {
1940 let (cow, _, had_errors) = encoding.decode(bom_stripped);
1941 let mut warnings = Vec::new();
1942 if had_errors {
1943 warnings.push(format!("{label} decode contained replacement characters"));
1944 }
1945 (cow.into_owned(), label.into(), warnings)
1946}
1947
1948fn decode_bytes(bytes: &[u8]) -> std::result::Result<(String, String, Vec<String>), String> {
1949 if bytes.starts_with(&[0xEF, 0xBB, 0xBF]) {
1950 let text = String::from_utf8(bytes[3..].to_vec()).map_err(|err| err.to_string())?;
1951 return Ok((text, "utf-8-bom".into(), vec![]));
1952 }
1953 if bytes.starts_with(&[0xFF, 0xFE]) {
1954 return Ok(decode_utf16_bom(&bytes[2..], UTF_16LE, "utf-16le"));
1955 }
1956 if bytes.starts_with(&[0xFE, 0xFF]) {
1957 return Ok(decode_utf16_bom(&bytes[2..], UTF_16BE, "utf-16be"));
1958 }
1959
1960 #[allow(clippy::option_if_let_else)]
1962 if let Ok(text) = String::from_utf8(bytes.to_vec()) {
1963 Ok((text, "utf-8".into(), vec![]))
1964 } else {
1965 let (cow, _, had_errors) = WINDOWS_1252.decode(bytes);
1966 let mut warnings = vec!["decoded using windows-1252 fallback".into()];
1967 if had_errors {
1968 warnings.push("fallback decode contained replacement characters".into());
1969 }
1970 Ok((cow.into_owned(), "windows-1252".into(), warnings))
1971 }
1972}
1973
1974fn compile_globset(patterns: &[String]) -> Result<Option<GlobSet>> {
1975 if patterns.is_empty() {
1976 return Ok(None);
1977 }
1978
1979 let mut builder = GlobSetBuilder::new();
1980 for pattern in patterns {
1981 builder
1982 .add(Glob::new(pattern).with_context(|| format!("invalid glob pattern: {pattern}"))?);
1983 }
1984 Ok(Some(
1985 builder.build().context("failed to compile glob filters")?,
1986 ))
1987}
1988
1989fn parse_enabled_languages(enabled: &[String]) -> Result<Option<BTreeSet<Language>>> {
1990 if enabled.is_empty() {
1991 return Ok(None);
1992 }
1993
1994 let supported = supported_languages();
1995 let mut set = BTreeSet::new();
1996 for name in enabled {
1997 let language = Language::from_name(name)
1998 .with_context(|| format!("unsupported language in config: {name}"))?;
1999 if !supported.contains(&language) {
2000 anyhow::bail!("language {name} is not supported in this build");
2001 }
2002 set.insert(language);
2003 }
2004 Ok(Some(set))
2005}
2006
2007pub fn write_json(run: &AnalysisRun, output_path: &Path) -> Result<()> {
2011 let json = serde_json::to_string_pretty(run).context("failed to serialize analysis run")?;
2012 fs::write(output_path, json)
2013 .with_context(|| format!("failed to write JSON output to {}", output_path.display()))
2014}
2015
2016pub fn read_json(path: &Path) -> Result<AnalysisRun> {
2020 let contents = fs::read_to_string(path)
2021 .with_context(|| format!("failed to read result file {}", path.display()))?;
2022 serde_json::from_str(&contents)
2023 .with_context(|| format!("failed to parse JSON result {}", path.display()))
2024}
2025
2026#[cfg(test)]
2027mod tests {
2028 use super::*;
2029
2030 #[test]
2031 fn effective_counts_respect_code_only_policy() {
2032 let raw = RawLineCounts {
2033 code_only_lines: 2,
2034 single_comment_only_lines: 1,
2035 mixed_code_single_comment_lines: 3,
2036 docstring_comment_lines: 2,
2037 ..RawLineCounts::default()
2038 };
2039 let counts = compute_effective_counts(&raw, MixedLinePolicy::CodeOnly, true, true);
2040 assert_eq!(counts.code_lines, 5);
2041 assert_eq!(counts.comment_lines, 3);
2042 }
2043
2044 #[test]
2045 fn effective_counts_can_separate_mixed() {
2046 let raw = RawLineCounts {
2047 mixed_code_single_comment_lines: 2,
2048 mixed_code_multi_comment_lines: 1,
2049 ..RawLineCounts::default()
2050 };
2051 let counts =
2052 compute_effective_counts(&raw, MixedLinePolicy::SeparateMixedCategory, true, true);
2053 assert_eq!(counts.mixed_lines_separate, 3);
2054 assert_eq!(counts.code_lines, 0);
2055 assert_eq!(counts.comment_lines, 0);
2056 }
2057
2058 #[test]
2059 fn windows_1252_fallback_decodes() {
2060 let bytes = vec![0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x96, 0x57];
2061 let (text, encoding, warnings) = decode_bytes(&bytes).unwrap();
2062 assert_eq!(encoding, "windows-1252");
2063 assert!(text.contains('–'));
2064 assert!(!warnings.is_empty());
2065 }
2066}