1#![allow(clippy::multiple_crate_versions)]
4
5pub mod baseline;
6pub mod coverage;
7pub mod delta;
8pub mod history;
9pub use baseline::{check_against_baseline, resolve_baselines_path, BaselineEntry, BaselineStore};
10pub use coverage::{aggregate_line_coverage, lookup_coverage, parse_lcov, FileCoverage};
11pub use delta::{compute_delta, FileChangeStatus, FileDelta, ScanComparison, SummaryDelta};
12pub use history::{RegistryEntry, ScanRegistry, ScanSummarySnapshot, WatchedDirsStore};
13
14use std::collections::{BTreeMap, BTreeSet, HashSet};
15use std::fs;
16use std::path::{Path, PathBuf};
17use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
18use std::sync::Arc;
19
20use anyhow::{Context, Result};
21use chrono::{DateTime, Utc};
22use encoding_rs::{UTF_16BE, UTF_16LE, WINDOWS_1252};
23use globset::{Glob, GlobSet, GlobSetBuilder};
24use ignore::WalkBuilder;
25use serde::{Deserialize, Serialize};
26use uuid::Uuid;
27
28use sloc_config::{
29 AppConfig, BinaryFileBehavior, BlankInBlockCommentPolicy, ContinuationLinePolicy,
30 FailureBehavior, MixedLinePolicy,
31};
32use sloc_languages::style::IndentStyle;
33use sloc_languages::{
34 analyze_text, detect_language, supported_languages, AnalysisOptions, Language, ParseMode,
35 RawLineCounts, StyleAnalysis,
36};
37
38const MAX_ANALYSIS_THREADS: usize = 16;
42const DEFAULT_ANALYSIS_THREADS: usize = 4;
44const GENERATED_SAMPLE_BYTES: usize = 1024;
46const MINIFIED_SAMPLE_BYTES: usize = 4096;
48const MINIFIED_LINE_THRESHOLD: usize = 2000;
50const BINARY_SAMPLE_BYTES: usize = 8192;
52
53pub struct ProgressCounters {
55 pub files_done: Arc<AtomicUsize>,
57 pub files_total: Arc<AtomicUsize>,
59}
60
61enum MetadataPolicyOutcome {
63 Skip(Box<FileRecord>),
65 Exclude,
67 Continue,
69}
70
71#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
72#[serde(rename_all = "snake_case")]
73pub enum FileStatus {
74 AnalyzedExact,
75 AnalyzedBestEffort,
76 SkippedBinary,
77 SkippedDecodeError,
78 SkippedUnsupported,
79 SkippedByPolicy,
80 ErrorInternal,
81}
82
83#[derive(Debug, Clone, Serialize, Deserialize, Default)]
84pub struct EffectiveCounts {
85 pub code_lines: u64,
86 pub comment_lines: u64,
87 pub blank_lines: u64,
88 pub mixed_lines_separate: u64,
89}
90
91#[derive(Debug, Clone, Serialize, Deserialize)]
92pub struct ToolMetadata {
93 pub name: String,
94 pub version: String,
95 pub run_id: String,
96 pub timestamp_utc: DateTime<Utc>,
97}
98
99#[derive(Debug, Clone, Serialize, Deserialize)]
100pub struct EnvironmentMetadata {
101 pub operating_system: String,
102 pub architecture: String,
103 pub runtime_mode: String,
104 pub initiator_username: String,
105 pub initiator_hostname: String,
106 #[serde(default, skip_serializing_if = "Option::is_none")]
109 pub ci_name: Option<String>,
110}
111
112#[derive(Debug, Clone, Serialize, Deserialize, Default)]
113pub struct SummaryTotals {
114 pub files_considered: u64,
115 pub files_analyzed: u64,
116 pub files_skipped: u64,
117 pub total_physical_lines: u64,
118 pub code_lines: u64,
119 pub comment_lines: u64,
120 pub blank_lines: u64,
121 pub mixed_lines_separate: u64,
122 #[serde(default)]
123 pub functions: u64,
124 #[serde(default)]
125 pub classes: u64,
126 #[serde(default)]
127 pub variables: u64,
128 #[serde(default)]
129 pub imports: u64,
130 #[serde(default)]
131 pub test_count: u64,
132 #[serde(default)]
134 pub test_assertion_count: u64,
135 #[serde(default)]
137 pub test_suite_count: u64,
138 #[serde(default)]
140 pub coverage_lines_found: u64,
141 #[serde(default)]
142 pub coverage_lines_hit: u64,
143 #[serde(default)]
144 pub coverage_functions_found: u64,
145 #[serde(default)]
146 pub coverage_functions_hit: u64,
147 #[serde(default)]
148 pub coverage_branches_found: u64,
149 #[serde(default)]
150 pub coverage_branches_hit: u64,
151}
152
153#[derive(Debug, Clone, Serialize, Deserialize)]
154pub struct LanguageSummary {
155 pub language: Language,
156 pub files: u64,
157 pub total_physical_lines: u64,
158 pub code_lines: u64,
159 pub comment_lines: u64,
160 pub blank_lines: u64,
161 pub mixed_lines_separate: u64,
162 #[serde(default)]
163 pub functions: u64,
164 #[serde(default)]
165 pub classes: u64,
166 #[serde(default)]
167 pub variables: u64,
168 #[serde(default)]
169 pub imports: u64,
170 #[serde(default)]
171 pub test_count: u64,
172 #[serde(default)]
173 pub test_assertion_count: u64,
174 #[serde(default)]
175 pub test_suite_count: u64,
176 #[serde(default)]
177 pub coverage_lines_found: u64,
178 #[serde(default)]
179 pub coverage_lines_hit: u64,
180 #[serde(default)]
181 pub coverage_functions_found: u64,
182 #[serde(default)]
183 pub coverage_functions_hit: u64,
184 #[serde(default)]
185 pub coverage_branches_found: u64,
186 #[serde(default)]
187 pub coverage_branches_hit: u64,
188}
189
190#[derive(Debug, Clone, Serialize, Deserialize)]
191pub struct FileRecord {
192 pub path: String,
193 pub relative_path: String,
194 pub language: Option<Language>,
195 pub size_bytes: u64,
196 pub detected_encoding: Option<String>,
197 pub raw_line_categories: RawLineCounts,
198 pub effective_counts: EffectiveCounts,
199 pub status: FileStatus,
200 pub warnings: Vec<String>,
201 pub generated: bool,
202 pub minified: bool,
203 pub vendor: bool,
204 pub parse_mode: Option<ParseMode>,
205 #[serde(skip_serializing_if = "Option::is_none")]
206 pub submodule: Option<String>,
207 #[serde(default, skip_serializing_if = "Option::is_none")]
209 pub coverage: Option<FileCoverage>,
210 #[serde(default, skip_serializing_if = "Option::is_none")]
212 pub style_analysis: Option<StyleAnalysis>,
213}
214
215#[derive(Debug, Clone, Serialize, Deserialize)]
217pub struct LanguageStyleGroup {
218 pub language_family: String,
220 pub files_count: u32,
222 pub dominant_guide: String,
224 pub dominant_score_pct: u8,
226 pub common_indent_style: String,
228 pub guide_avg_scores: Vec<(String, u8)>,
230 pub line80_compliant_pct: u8,
232}
233
234#[derive(Debug, Clone, Serialize, Deserialize)]
236pub struct StyleSummary {
237 pub files_analyzed: u32,
239 pub common_indent_style: String,
241 pub line80_compliant_pct: u8,
243 pub by_language: Vec<LanguageStyleGroup>,
245}
246
247pub type CppStyleSummary = StyleSummary;
250
251#[derive(Debug, Clone, Serialize, Deserialize)]
253pub struct SubmoduleSummary {
254 pub name: String,
255 pub relative_path: String,
256 pub files_analyzed: u64,
257 pub total_physical_lines: u64,
258 pub code_lines: u64,
259 pub comment_lines: u64,
260 pub blank_lines: u64,
261 pub language_summaries: Vec<LanguageSummary>,
262}
263
264#[derive(Debug, Clone, Serialize, Deserialize)]
265pub struct AnalysisRun {
266 pub tool: ToolMetadata,
267 pub environment: EnvironmentMetadata,
268 pub effective_configuration: AppConfig,
269 pub input_roots: Vec<String>,
270 pub summary_totals: SummaryTotals,
271 pub totals_by_language: Vec<LanguageSummary>,
272 pub per_file_records: Vec<FileRecord>,
273 pub skipped_file_records: Vec<FileRecord>,
274 pub warnings: Vec<String>,
275 #[serde(default, skip_serializing_if = "Vec::is_empty")]
277 pub submodule_summaries: Vec<SubmoduleSummary>,
278 #[serde(default, skip_serializing_if = "Option::is_none")]
280 pub git_commit_short: Option<String>,
281 #[serde(default, skip_serializing_if = "Option::is_none")]
283 pub git_commit_long: Option<String>,
284 #[serde(default, skip_serializing_if = "Option::is_none")]
286 pub git_branch: Option<String>,
287 #[serde(default, skip_serializing_if = "Option::is_none")]
289 pub git_commit_author: Option<String>,
290 #[serde(default, skip_serializing_if = "Option::is_none")]
292 pub git_tags: Option<String>,
293 #[serde(default, skip_serializing_if = "Option::is_none")]
295 pub git_nearest_tag: Option<String>,
296 #[serde(default, skip_serializing_if = "Option::is_none")]
298 pub git_commit_date: Option<String>,
299 #[serde(default, skip_serializing_if = "Option::is_none")]
301 pub git_remote_url: Option<String>,
302 #[serde(default, skip_serializing_if = "Option::is_none")]
304 pub style_summary: Option<StyleSummary>,
305}
306
307#[derive(Default)]
308struct GitInfo {
309 commit_short: Option<String>,
310 commit_long: Option<String>,
311 branch: Option<String>,
312 author: Option<String>,
313 tags: Option<String>,
314 nearest_tag: Option<String>,
315 commit_date: Option<String>,
316 remote_url: Option<String>,
317}
318
319fn find_git_dir(start: &Path) -> Option<PathBuf> {
323 let mut current = Some(start);
324 while let Some(dir) = current {
325 let candidate = dir.join(".git");
326 if candidate.is_dir() {
327 return Some(candidate);
328 }
329 if candidate.is_file() {
330 if let Some(resolved) = resolve_git_file_pointer(&candidate, dir) {
331 return Some(resolved);
332 }
333 }
334 current = dir.parent();
335 }
336 None
337}
338
339fn resolve_git_file_pointer(file: &Path, base_dir: &Path) -> Option<PathBuf> {
343 let content = fs::read_to_string(file).ok()?;
344 let ptr = content.trim().strip_prefix("gitdir: ")?;
345 let ptr_native = ptr.replace('/', std::path::MAIN_SEPARATOR_STR);
348 let resolved = if Path::new(&ptr_native).is_absolute() {
349 PathBuf::from(&ptr_native)
350 } else {
351 base_dir.join(&ptr_native)
352 };
353 let final_path = resolved.canonicalize().unwrap_or(resolved);
357 if final_path.is_dir() {
358 Some(final_path)
359 } else {
360 None
361 }
362}
363
364fn resolve_ref(git_dir: &Path, refname: &str) -> Option<String> {
367 let ref_path = refname
371 .split('/')
372 .fold(git_dir.to_path_buf(), |p, c| p.join(c));
373 if ref_path.exists() {
374 let sha = fs::read_to_string(&ref_path)
375 .ok()
376 .map(|s| s.trim().to_string())
377 .filter(|s| s.len() >= 40 && s.chars().all(|c| c.is_ascii_hexdigit()));
378 if sha.is_some() {
379 return sha;
380 }
381 }
382 let packed = fs::read_to_string(git_dir.join("packed-refs")).ok()?;
386 for line in packed.lines() {
387 if line.starts_with('#') || line.starts_with('^') {
388 continue;
389 }
390 let mut cols = line.splitn(2, ' ');
391 let sha = cols.next()?;
392 let name = cols.next()?.trim();
393 if name == refname {
394 return Some(sha.to_string());
395 }
396 }
397 None
398}
399
400fn parse_url_line(line: &str) -> Option<&str> {
402 let rest = line.strip_prefix("url")?;
403 let rest = rest.trim_start_matches([' ', '\t']);
404 let url = rest.strip_prefix('=')?.trim();
405 if url.is_empty() {
406 None
407 } else {
408 Some(url)
409 }
410}
411
412fn read_git_remote_url(git_dir: &Path) -> Option<String> {
414 let config = fs::read_to_string(git_dir.join("config")).ok()?;
415 let mut in_origin = false;
416 for line in config.lines() {
417 let trimmed = line.trim();
418 if trimmed.starts_with('[') {
419 in_origin = trimmed == r#"[remote "origin"]"#;
420 } else if in_origin {
421 if let Some(url) = parse_url_line(trimmed) {
422 return Some(url.to_owned());
423 }
424 }
425 }
426 None
427}
428
429fn detect_git_for_run(project_path: &Path) -> GitInfo {
433 let ci_branch = ci_branch_from_env();
435
436 let Some(git_dir) = find_git_dir(project_path) else {
437 return GitInfo {
440 branch: ci_branch,
441 ..GitInfo::default()
442 };
443 };
444
445 let head_raw = match fs::read_to_string(git_dir.join("HEAD")) {
446 Ok(s) => s.trim().to_string(),
447 Err(_) => {
448 return GitInfo {
449 branch: ci_branch,
450 ..GitInfo::default()
451 }
452 }
453 };
454
455 let (branch_from_head, commit_long) = head_raw.strip_prefix("ref: ").map_or_else(
456 || {
457 if head_raw.len() >= 40 && head_raw.chars().all(|c| c.is_ascii_hexdigit()) {
458 (None, Some(head_raw[..40].to_string()))
460 } else {
461 (None, None)
462 }
463 },
464 |refname| {
465 let branch = refname
466 .strip_prefix("refs/heads/")
467 .map(|b| b.trim().to_string());
468 let sha = resolve_ref(&git_dir, refname.trim());
469 (branch, sha)
470 },
471 );
472 let branch = branch_from_head.or(ci_branch);
475
476 let commit_short = commit_long
477 .as_deref()
478 .map(|s| s.chars().take(7).collect::<String>());
479
480 let author = run_git_cmd(project_path, &["log", "-1", "--format=%an", "HEAD"]);
481 let commit_date = run_git_cmd(project_path, &["log", "-1", "--format=%aI", "HEAD"]);
482 let remote_url = read_git_remote_url(&git_dir);
483
484 let tags = run_git_cmd(project_path, &["tag", "--points-at", "HEAD"]).map(|t| {
487 t.lines()
488 .filter(|l| !l.is_empty())
489 .collect::<Vec<_>>()
490 .join(", ")
491 });
492 let nearest_tag = run_git_cmd(project_path, &["describe", "--tags", "--abbrev=0", "HEAD"]);
493
494 GitInfo {
495 commit_short,
496 commit_long,
497 branch,
498 author,
499 tags,
500 nearest_tag,
501 commit_date,
502 remote_url,
503 }
504}
505
506fn run_git_cmd(dir: &Path, args: &[&str]) -> Option<String> {
508 let candidates: &[&str] = &[
512 "git",
514 "/usr/bin/git",
516 "/usr/local/bin/git",
517 "/opt/homebrew/bin/git",
518 r"C:\Program Files\Git\cmd\git.exe",
520 r"C:\Program Files\Git\bin\git.exe",
521 r"C:\Program Files (x86)\Git\cmd\git.exe",
522 ];
523 for &exe in candidates {
524 let result = std::process::Command::new(exe)
525 .args(["-c", "safe.directory=*"])
526 .args(args)
527 .current_dir(dir)
528 .output()
529 .ok()
530 .filter(|o| o.status.success())
531 .and_then(|o| String::from_utf8(o.stdout).ok())
532 .map(|s| s.trim().to_string())
533 .filter(|s| !s.is_empty());
534 if result.is_some() {
535 return result;
536 }
537 }
538 None
539}
540
541fn detect_ci_system() -> Option<&'static str> {
543 let ev = |k: &str| std::env::var(k).is_ok();
544 let ev_true = |k: &str| std::env::var(k).as_deref() == Ok("true");
545 if ev("JENKINS_URL") || ev("JENKINS_HOME") || ev("BUILD_URL") {
546 return Some("Jenkins");
547 }
548 if ev_true("GITHUB_ACTIONS") {
549 return Some("GitHub Actions");
550 }
551 if ev_true("GITLAB_CI") {
552 return Some("GitLab CI");
553 }
554 if ev_true("CIRCLECI") {
555 return Some("CircleCI");
556 }
557 if ev_true("TRAVIS") {
558 return Some("Travis CI");
559 }
560 if ev_true("TF_BUILD") {
561 return Some("Azure DevOps");
562 }
563 if ev("TEAMCITY_VERSION") {
564 return Some("TeamCity");
565 }
566 None
567}
568
569fn ci_branch_from_env() -> Option<String> {
572 const VARS: &[&str] = &[
573 "BRANCH_NAME", "GIT_BRANCH", "GITHUB_REF_NAME", "CI_COMMIT_BRANCH", "CIRCLE_BRANCH", "TRAVIS_BRANCH", "BUILD_SOURCEBRANCH", ];
581 for &var in VARS {
582 if let Ok(val) = std::env::var(var) {
583 let val = val.trim();
584 let val = val
585 .strip_prefix("refs/heads/")
586 .or_else(|| val.strip_prefix("origin/"))
587 .unwrap_or(val);
588 if !val.is_empty() && val != "HEAD" {
589 return Some(val.to_string());
590 }
591 }
592 }
593 None
594}
595
596fn get_current_username() -> String {
597 std::env::var("USERNAME")
598 .or_else(|_| std::env::var("USER"))
599 .unwrap_or_else(|_| "unknown".to_string())
600}
601
602fn non_empty_env(var: &str) -> Option<String> {
603 let v = std::env::var(var).ok()?;
604 if v.is_empty() {
605 None
606 } else {
607 Some(v)
608 }
609}
610
611fn is_jenkins_env() -> bool {
612 std::env::var("JENKINS_URL").is_ok()
613 || std::env::var("JENKINS_HOME").is_ok()
614 || std::env::var("BUILD_URL").is_ok()
615}
616
617fn get_hostname() -> String {
618 if is_jenkins_env() {
621 if let Some(n) = non_empty_env("NODE_NAME") {
622 return n;
623 }
624 }
625 if std::env::var("GITHUB_ACTIONS").as_deref() == Ok("true") {
626 if let Some(r) = non_empty_env("RUNNER_NAME") {
627 return r;
628 }
629 }
630 if std::env::var("GITLAB_CI").as_deref() == Ok("true") {
631 if let Some(r) = non_empty_env("CI_RUNNER_DESCRIPTION") {
632 return r;
633 }
634 }
635 std::env::var("COMPUTERNAME")
636 .or_else(|_| std::env::var("HOSTNAME"))
637 .or_else(|_| std::fs::read_to_string("/etc/hostname").map(|s| s.trim().to_string()))
638 .unwrap_or_else(|_| "unknown".to_string())
639}
640
641#[allow(clippy::too_many_arguments)]
643fn walk_root(
644 root: &Path,
645 config: &AppConfig,
646 include_globs: Option<&GlobSet>,
647 exclude_globs: Option<&GlobSet>,
648 enabled_languages: Option<&BTreeSet<Language>>,
649 seen_paths: &mut HashSet<PathBuf>,
650 analyzed: &mut Vec<FileRecord>,
651 skipped: &mut Vec<FileRecord>,
652 warnings: &mut Vec<String>,
653 cancel: Option<&AtomicBool>,
654 progress: Option<&ProgressCounters>,
655) -> Result<()> {
656 let mut builder = WalkBuilder::new(root);
657 builder
658 .follow_links(config.discovery.follow_symlinks)
659 .hidden(config.discovery.ignore_hidden_files)
660 .ignore(config.discovery.honor_ignore_files)
661 .parents(config.discovery.honor_ignore_files)
662 .git_ignore(config.discovery.honor_ignore_files)
663 .git_global(config.discovery.honor_ignore_files)
664 .git_exclude(config.discovery.honor_ignore_files);
665
666 let paths = collect_walk_paths(&builder, seen_paths, warnings);
667 if paths.is_empty() {
668 return Ok(());
669 }
670
671 if let Some(p) = progress {
672 p.files_total.fetch_add(paths.len(), Ordering::Relaxed);
673 }
674
675 let chunk_results = run_parallel_analysis(
676 &paths,
677 root,
678 config,
679 include_globs,
680 exclude_globs,
681 enabled_languages,
682 cancel,
683 progress,
684 )?;
685 merge_chunk_results(chunk_results, analyzed, skipped, warnings)
686}
687
688fn collect_walk_paths(
689 builder: &WalkBuilder,
690 seen_paths: &mut HashSet<PathBuf>,
691 warnings: &mut Vec<String>,
692) -> Vec<PathBuf> {
693 let (tx, rx) = std::sync::mpsc::channel::<std::result::Result<PathBuf, String>>();
697
698 builder.build_parallel().run(|| {
699 let tx = tx.clone();
700 Box::new(move |entry| {
701 match entry {
702 Err(e) => {
703 let _ = tx.send(Err(format!("discovery warning: {e}")));
704 }
705 Ok(e) => {
706 let path = e.into_path();
707 if !path.is_dir() {
708 let _ = tx.send(Ok(path));
709 }
710 }
711 }
712 ignore::WalkState::Continue
713 })
714 });
715
716 drop(tx);
719
720 rx.into_iter()
721 .filter_map(|msg| match msg {
722 Ok(path) => {
723 if seen_paths.insert(path.clone()) {
724 Some(path)
725 } else {
726 None
727 }
728 }
729 Err(warn) => {
730 warnings.push(warn);
731 None
732 }
733 })
734 .collect()
735}
736
737#[allow(clippy::too_many_arguments)]
739fn worker_loop(
740 paths: &[PathBuf],
741 root: &Path,
742 config: &AppConfig,
743 include_globs: Option<&GlobSet>,
744 exclude_globs: Option<&GlobSet>,
745 enabled_languages: Option<&BTreeSet<Language>>,
746 cancel: Option<&AtomicBool>,
747 next_index: &AtomicUsize,
748 files_done: Option<&AtomicUsize>,
749) -> Vec<Result<Option<FileRecord>>> {
750 let mut results = Vec::new();
751 loop {
752 if cancel.is_some_and(|c| c.load(Ordering::Relaxed)) {
753 results.push(Err(anyhow::anyhow!("analysis cancelled")));
754 break;
755 }
756 let i = next_index.fetch_add(1, Ordering::Relaxed);
757 if i >= paths.len() {
758 break;
759 }
760 results.push(analyze_candidate_file(
761 &paths[i],
762 root,
763 config,
764 include_globs,
765 exclude_globs,
766 enabled_languages,
767 ));
768 if let Some(fd) = files_done {
769 fd.fetch_add(1, Ordering::Relaxed);
770 }
771 }
772 results
773}
774
775#[allow(clippy::too_many_arguments)]
776fn run_parallel_analysis(
777 paths: &[PathBuf],
778 root: &Path,
779 config: &AppConfig,
780 include_globs: Option<&GlobSet>,
781 exclude_globs: Option<&GlobSet>,
782 enabled_languages: Option<&BTreeSet<Language>>,
783 cancel: Option<&AtomicBool>,
784 progress: Option<&ProgressCounters>,
785) -> Result<Vec<Vec<Result<Option<FileRecord>>>>> {
786 let thread_count = std::thread::available_parallelism().map_or(DEFAULT_ANALYSIS_THREADS, |n| {
787 n.get().min(MAX_ANALYSIS_THREADS)
788 });
789 let next_index = AtomicUsize::new(0);
793 let files_done: Option<&AtomicUsize> = progress.map(|p| p.files_done.as_ref());
794
795 std::thread::scope(|s| -> Result<Vec<Vec<Result<Option<FileRecord>>>>> {
796 let mut handles = Vec::with_capacity(thread_count);
799 for _ in 0..thread_count {
800 handles.push(s.spawn(|| {
801 worker_loop(
802 paths,
803 root,
804 config,
805 include_globs,
806 exclude_globs,
807 enabled_languages,
808 cancel,
809 &next_index,
810 files_done,
811 )
812 }));
813 }
814 handles
815 .into_iter()
816 .map(|h| {
817 h.join()
818 .map_err(|_| anyhow::anyhow!("analysis thread panicked"))
819 })
820 .collect()
821 })
822}
823
824fn merge_chunk_results(
825 chunk_results: Vec<Vec<Result<Option<FileRecord>>>>,
826 analyzed: &mut Vec<FileRecord>,
827 skipped: &mut Vec<FileRecord>,
828 warnings: &mut Vec<String>,
829) -> Result<()> {
830 for chunk in chunk_results {
831 for result in chunk {
832 if let Some(record) = result? {
833 push_record(record, analyzed, skipped, warnings);
834 }
835 }
836 }
837 Ok(())
838}
839
840fn process_submodules(config: &AppConfig, analyzed: &mut [FileRecord]) -> Vec<SubmoduleSummary> {
842 let root = config.discovery.root_paths[0]
843 .canonicalize()
844 .unwrap_or_else(|_| config.discovery.root_paths[0].clone());
845 let submodules = detect_submodules(&root);
846 if submodules.is_empty() {
847 return Vec::new();
848 }
849
850 for file in analyzed.iter_mut() {
851 for (name, sub_path) in &submodules {
852 let prefix = sub_path.to_string_lossy().replace('\\', "/");
853 let rel = &file.relative_path;
854 if rel == &prefix || rel.starts_with(&format!("{prefix}/")) {
855 file.submodule = Some(name.clone());
856 break;
857 }
858 }
859 }
860
861 build_submodule_summaries(analyzed, &submodules)
862}
863
864fn assemble_run(
866 config: &AppConfig,
867 runtime_mode: &str,
868 analyzed: Vec<FileRecord>,
869 skipped: Vec<FileRecord>,
870 warnings: Vec<String>,
871 submodule_summaries: Vec<SubmoduleSummary>,
872) -> AnalysisRun {
873 let summary = build_summary(&analyzed, &skipped);
874 let language_summaries = build_language_summaries(&analyzed);
875 let style_summary = build_style_summary(&analyzed);
876
877 let first_root = config
878 .discovery
879 .root_paths
880 .first()
881 .map(|p| p.canonicalize().unwrap_or_else(|_| p.clone()));
882 let git = first_root
883 .as_deref()
884 .map(detect_git_for_run)
885 .unwrap_or_default();
886
887 let now = Utc::now();
888 let run_id = {
889 let uuid_suffix = Uuid::new_v4().simple().to_string();
890 format!("{}-{}", now.format("%Y%m%d-%H%M"), uuid_suffix)
891 };
892
893 AnalysisRun {
894 tool: ToolMetadata {
895 name: "sloc".into(),
896 version: env!("CARGO_PKG_VERSION").into(),
897 run_id,
898 timestamp_utc: now,
899 },
900 environment: EnvironmentMetadata {
901 operating_system: std::env::consts::OS.into(),
902 architecture: std::env::consts::ARCH.into(),
903 runtime_mode: runtime_mode.into(),
904 initiator_username: get_current_username(),
905 initiator_hostname: get_hostname(),
906 ci_name: detect_ci_system().map(str::to_string),
907 },
908 effective_configuration: config.clone(),
909 input_roots: config
910 .discovery
911 .root_paths
912 .iter()
913 .map(|p| path_to_string(p))
914 .collect(),
915 summary_totals: summary,
916 totals_by_language: language_summaries,
917 per_file_records: analyzed,
918 skipped_file_records: skipped,
919 warnings,
920 submodule_summaries,
921 git_commit_short: git.commit_short,
922 git_commit_long: git.commit_long,
923 git_branch: git.branch,
924 git_commit_author: git.author,
925 git_tags: git.tags,
926 git_nearest_tag: git.nearest_tag,
927 git_commit_date: git.commit_date,
928 git_remote_url: git.remote_url,
929 style_summary,
930 }
931}
932
933#[allow(clippy::too_many_lines)]
938pub fn analyze(
939 config: &AppConfig,
940 runtime_mode: &str,
941 cancel: Option<&AtomicBool>,
942 progress: Option<&ProgressCounters>,
943) -> Result<AnalysisRun> {
944 config.validate()?;
945
946 if config.discovery.root_paths.is_empty() {
947 anyhow::bail!("no input paths were provided");
948 }
949
950 let include_globs = compile_globset(&config.discovery.include_globs)?;
951 let exclude_globs = compile_globset(&config.discovery.exclude_globs)?;
952 let enabled_languages = parse_enabled_languages(&config.analysis.enabled_languages)?;
953
954 let mut analyzed = Vec::new();
955 let mut skipped = Vec::new();
956 let mut warnings = Vec::new();
957 let mut seen_paths = HashSet::new();
958
959 for root in &config.discovery.root_paths {
960 if cancel.is_some_and(|c| c.load(Ordering::Relaxed)) {
961 anyhow::bail!("analysis cancelled");
962 }
963
964 let root = root.canonicalize().unwrap_or_else(|_| root.clone());
965
966 if root.is_file() {
967 if let Some(record) = analyze_candidate_file(
968 &root,
969 root.parent().unwrap_or_else(|| Path::new(".")),
970 config,
971 include_globs.as_ref(),
972 exclude_globs.as_ref(),
973 enabled_languages.as_ref(),
974 )? {
975 push_record(record, &mut analyzed, &mut skipped, &mut warnings);
976 }
977 continue;
978 }
979
980 walk_root(
981 &root,
982 config,
983 include_globs.as_ref(),
984 exclude_globs.as_ref(),
985 enabled_languages.as_ref(),
986 &mut seen_paths,
987 &mut analyzed,
988 &mut skipped,
989 &mut warnings,
990 cancel,
991 progress,
992 )?;
993 }
994
995 analyzed.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
996 skipped.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
997
998 let submodule_summaries = if config.discovery.submodule_breakdown {
1000 process_submodules(config, &mut analyzed)
1001 } else {
1002 Vec::new()
1003 };
1004
1005 attach_coverage(config, &mut analyzed, &mut warnings);
1006
1007 Ok(assemble_run(
1008 config,
1009 runtime_mode,
1010 analyzed,
1011 skipped,
1012 warnings,
1013 submodule_summaries,
1014 ))
1015}
1016
1017fn attach_coverage(config: &AppConfig, analyzed: &mut [FileRecord], warnings: &mut Vec<String>) {
1018 let Some(cov_path) = coverage::resolve_coverage_file(config.analysis.coverage_file.as_deref())
1019 else {
1020 return;
1021 };
1022 tracing::debug!(path = %cov_path.display(), "loading coverage file");
1023 match fs::read_to_string(&cov_path) {
1024 Ok(content) => {
1025 let cov_map = coverage::parse_coverage_auto(&cov_path, &content);
1026 let mut matched: u32 = 0;
1027 let mut unmatched: u32 = 0;
1028 for record in analyzed.iter_mut() {
1029 record.coverage =
1030 coverage::lookup_coverage(&cov_map, &record.relative_path).cloned();
1031 if record.coverage.is_some() {
1032 matched += 1;
1033 } else {
1034 unmatched += 1;
1035 }
1036 }
1037 tracing::debug!(
1038 path = %cov_path.display(),
1039 coverage_entries = cov_map.len(),
1040 files_matched = matched,
1041 files_unmatched = unmatched,
1042 "coverage attached"
1043 );
1044 if unmatched > 0 && matched == 0 {
1045 tracing::warn!(
1046 path = %cov_path.display(),
1047 "coverage file loaded but no source files could be matched — check that paths in the coverage report match the scanned directory"
1048 );
1049 }
1050 }
1051 Err(e) => {
1052 tracing::warn!(path = %cov_path.display(), error = %e, "coverage file could not be read");
1053 warnings.push(format!(
1054 "coverage file '{}' could not be read: {e}",
1055 cov_path.display()
1056 ));
1057 }
1058 }
1059}
1060
1061fn push_record(
1062 record: FileRecord,
1063 analyzed: &mut Vec<FileRecord>,
1064 skipped: &mut Vec<FileRecord>,
1065 warnings: &mut Vec<String>,
1066) {
1067 warnings.extend(
1068 record
1069 .warnings
1070 .iter()
1071 .map(|warning| format!("{}: {warning}", record.relative_path)),
1072 );
1073
1074 match record.status {
1075 FileStatus::AnalyzedExact | FileStatus::AnalyzedBestEffort => analyzed.push(record),
1076 _ => skipped.push(record),
1077 }
1078}
1079
1080#[inline]
1082fn skip_with_reason(
1083 path: &Path,
1084 root: &Path,
1085 size: u64,
1086 reason: impl Into<String>,
1087) -> MetadataPolicyOutcome {
1088 MetadataPolicyOutcome::Skip(Box::new(skipped_record(
1089 path,
1090 root,
1091 size,
1092 FileStatus::SkippedByPolicy,
1093 vec![reason.into()],
1094 )))
1095}
1096
1097#[allow(clippy::too_many_arguments)]
1101fn check_metadata_policy(
1102 path: &Path,
1103 root: &Path,
1104 relative_path: &str,
1105 metadata: &fs::Metadata,
1106 config: &AppConfig,
1107 include_globs: Option<&GlobSet>,
1108 exclude_globs: Option<&GlobSet>,
1109) -> MetadataPolicyOutcome {
1110 let size = metadata.len();
1111
1112 if metadata.file_type().is_symlink() && !config.discovery.follow_symlinks {
1113 return skip_with_reason(path, root, size, "symlink skipped by policy");
1114 }
1115 if file_name_eq(path, ".gitignore") {
1116 return skip_with_reason(path, root, size, ".gitignore is always excluded");
1117 }
1118 if is_excluded_dir_path(path, &config.discovery.excluded_directories) {
1119 return skip_with_reason(path, root, size, "path matched excluded directory setting");
1120 }
1121 if size > config.discovery.max_file_size_bytes {
1122 return skip_with_reason(
1123 path,
1124 root,
1125 size,
1126 format!(
1127 "file exceeded max_file_size_bytes ({})",
1128 config.discovery.max_file_size_bytes
1129 ),
1130 );
1131 }
1132 if let Some(globs) = include_globs {
1133 if !globs.is_match(Path::new(relative_path)) && !globs.is_match(path) {
1134 return MetadataPolicyOutcome::Exclude;
1135 }
1136 }
1137 if let Some(globs) = exclude_globs {
1138 if globs.is_match(Path::new(relative_path)) || globs.is_match(path) {
1139 return skip_with_reason(path, root, size, "path matched exclude glob");
1140 }
1141 }
1142 if is_known_lockfile(path) && !config.analysis.include_lockfiles {
1143 return skip_with_reason(path, root, size, "lockfile skipped by default policy");
1144 }
1145
1146 MetadataPolicyOutcome::Continue
1147}
1148
1149struct ContentPolicyResult {
1150 vendor: bool,
1151 generated: bool,
1152 minified: bool,
1153 skip_record: Option<FileRecord>,
1154}
1155
1156fn check_content_policy(
1159 path: &Path,
1160 root: &Path,
1161 size_bytes: u64,
1162 bytes: &[u8],
1163 config: &AppConfig,
1164) -> ContentPolicyResult {
1165 let vendor = is_vendor_path(path);
1166 if vendor && config.analysis.vendor_directory_detection {
1167 return ContentPolicyResult {
1168 vendor,
1169 generated: false,
1170 minified: false,
1171 skip_record: Some(skipped_record(
1172 path,
1173 root,
1174 size_bytes,
1175 FileStatus::SkippedByPolicy,
1176 vec!["vendor file skipped by policy".into()],
1177 )),
1178 };
1179 }
1180
1181 let generated = config.analysis.generated_file_detection && looks_generated(path, bytes);
1182 if generated {
1183 return ContentPolicyResult {
1184 vendor,
1185 generated,
1186 minified: false,
1187 skip_record: Some(skipped_record(
1188 path,
1189 root,
1190 size_bytes,
1191 FileStatus::SkippedByPolicy,
1192 vec!["generated file skipped by policy".into()],
1193 )),
1194 };
1195 }
1196
1197 let minified = config.analysis.minified_file_detection && looks_minified(path, bytes);
1198 if minified {
1199 return ContentPolicyResult {
1200 vendor,
1201 generated,
1202 minified,
1203 skip_record: Some(skipped_record(
1204 path,
1205 root,
1206 size_bytes,
1207 FileStatus::SkippedByPolicy,
1208 vec!["minified file skipped by policy".into()],
1209 )),
1210 };
1211 }
1212
1213 ContentPolicyResult {
1214 vendor,
1215 generated,
1216 minified,
1217 skip_record: None,
1218 }
1219}
1220
1221fn decode_file_contents(
1223 path: &Path,
1224 root: &Path,
1225 size_bytes: u64,
1226 bytes: &[u8],
1227 config: &AppConfig,
1228) -> Result<Option<(String, String, Vec<String>)>> {
1229 if is_binary(bytes) {
1230 return match config.analysis.binary_file_behavior {
1231 BinaryFileBehavior::Skip => Ok(None),
1232 BinaryFileBehavior::Fail => {
1233 anyhow::bail!("binary file encountered: {}", path.display())
1234 }
1235 };
1236 }
1237
1238 match decode_bytes(bytes) {
1239 Ok(result) => Ok(Some(result)),
1240 Err(err) => match config.analysis.decode_failure_behavior {
1241 FailureBehavior::WarnSkip => {
1242 let _ = (path, root, size_bytes); Err(anyhow::anyhow!("__decode_warn__: {err}"))
1247 }
1248 FailureBehavior::Fail => {
1249 anyhow::bail!("decode failure for {}: {err}", path.display())
1250 }
1251 },
1252 }
1253}
1254
1255#[allow(clippy::too_many_lines)]
1256fn analyze_candidate_file(
1257 path: &Path,
1258 root: &Path,
1259 config: &AppConfig,
1260 include_globs: Option<&GlobSet>,
1261 exclude_globs: Option<&GlobSet>,
1262 enabled_languages: Option<&BTreeSet<Language>>,
1263) -> Result<Option<FileRecord>> {
1264 let metadata = match fs::symlink_metadata(path) {
1265 Ok(metadata) => metadata,
1266 Err(err) => {
1267 return Ok(Some(skipped_record(
1268 path,
1269 root,
1270 0,
1271 FileStatus::ErrorInternal,
1272 vec![format!("failed to read metadata: {err}")],
1273 )));
1274 }
1275 };
1276
1277 let relative_path = relative_path_string(path, root);
1278
1279 match check_metadata_policy(
1281 path,
1282 root,
1283 &relative_path,
1284 &metadata,
1285 config,
1286 include_globs,
1287 exclude_globs,
1288 ) {
1289 MetadataPolicyOutcome::Skip(record) => return Ok(Some(*record)),
1290 MetadataPolicyOutcome::Exclude => return Ok(None),
1291 MetadataPolicyOutcome::Continue => {}
1292 }
1293
1294 let bytes = match fs::read(path) {
1295 Ok(bytes) => bytes,
1296 Err(err) => {
1297 return Ok(Some(skipped_record(
1298 path,
1299 root,
1300 metadata.len(),
1301 FileStatus::ErrorInternal,
1302 vec![format!("failed to read file: {err}")],
1303 )));
1304 }
1305 };
1306
1307 let content_policy = check_content_policy(path, root, metadata.len(), &bytes, config);
1309 if let Some(record) = content_policy.skip_record {
1310 return Ok(Some(record));
1311 }
1312 let (vendor, generated, minified) = (
1313 content_policy.vendor,
1314 content_policy.generated,
1315 content_policy.minified,
1316 );
1317
1318 let (text, encoding, decode_warnings) =
1320 match decode_file_contents(path, root, metadata.len(), &bytes, config) {
1321 Ok(Some(result)) => result,
1322 Ok(None) => {
1323 return Ok(Some(skipped_record(
1324 path,
1325 root,
1326 metadata.len(),
1327 FileStatus::SkippedBinary,
1328 vec!["binary file skipped by default".into()],
1329 )));
1330 }
1331 Err(err) => {
1332 let msg = err.to_string();
1333 if let Some(warn_msg) = msg.strip_prefix("__decode_warn__: ") {
1334 return Ok(Some(skipped_record(
1335 path,
1336 root,
1337 metadata.len(),
1338 FileStatus::SkippedDecodeError,
1339 vec![warn_msg.to_string()],
1340 )));
1341 }
1342 return Err(err);
1343 }
1344 };
1345
1346 let first_line = text.lines().next();
1347 let language = detect_language(
1348 path,
1349 first_line,
1350 &config.analysis.extension_overrides,
1351 config.analysis.shebang_detection,
1352 );
1353
1354 let Some(language) = language else {
1355 return Ok(Some(skipped_record(
1356 path,
1357 root,
1358 metadata.len(),
1359 FileStatus::SkippedUnsupported,
1360 vec!["unsupported or undetected language".into()],
1361 )));
1362 };
1363
1364 if let Some(enabled) = enabled_languages {
1365 if !enabled.contains(&language) {
1366 return Ok(Some(skipped_record(
1367 path,
1368 root,
1369 metadata.len(),
1370 FileStatus::SkippedByPolicy,
1371 vec![format!(
1372 "language {} disabled by configuration",
1373 language.display_name()
1374 )],
1375 )));
1376 }
1377 }
1378
1379 let ieee_opts = AnalysisOptions {
1380 blank_in_block_comment_as_comment: config.analysis.blank_in_block_comment_policy
1381 == BlankInBlockCommentPolicy::CountAsComment,
1382 collapse_continuation_lines: config.analysis.continuation_line_policy
1383 == ContinuationLinePolicy::CollapseToLogical,
1384 };
1385 let analysis = analyze_text(language, &text, ieee_opts);
1386 let effective_counts = compute_effective_counts(
1387 &analysis.raw,
1388 config.analysis.mixed_line_policy,
1389 config.analysis.python_docstrings_as_comments,
1390 config.analysis.count_compiler_directives,
1391 );
1392
1393 let mut warnings = decode_warnings;
1394 warnings.extend(analysis.warnings.clone());
1395
1396 Ok(Some(FileRecord {
1397 path: path_to_string(path),
1398 relative_path,
1399 language: Some(language),
1400 size_bytes: metadata.len(),
1401 detected_encoding: Some(encoding),
1402 raw_line_categories: analysis.raw,
1403 effective_counts,
1404 status: match analysis.parse_mode {
1405 ParseMode::Lexical | ParseMode::TreeSitter => FileStatus::AnalyzedExact,
1406 ParseMode::LexicalBestEffort => FileStatus::AnalyzedBestEffort,
1407 },
1408 warnings,
1409 generated,
1410 minified,
1411 vendor,
1412 parse_mode: Some(analysis.parse_mode),
1413 submodule: None,
1414 coverage: None,
1415 style_analysis: analysis.style_analysis,
1416 }))
1417}
1418
1419const fn compute_effective_counts(
1420 raw: &RawLineCounts,
1421 mixed_line_policy: MixedLinePolicy,
1422 python_docstrings_as_comments: bool,
1423 count_compiler_directives: bool,
1424) -> EffectiveCounts {
1425 let mut effective = EffectiveCounts {
1426 code_lines: raw.code_only_lines,
1427 comment_lines: raw.single_comment_only_lines + raw.multi_comment_only_lines,
1428 blank_lines: raw.blank_only_lines,
1429 mixed_lines_separate: 0,
1430 };
1431
1432 if python_docstrings_as_comments {
1433 effective.comment_lines += raw.docstring_comment_lines;
1434 } else {
1435 effective.code_lines += raw.docstring_comment_lines;
1436 }
1437
1438 let mixed_total = raw.mixed_code_single_comment_lines + raw.mixed_code_multi_comment_lines;
1439 match mixed_line_policy {
1440 MixedLinePolicy::CodeOnly => effective.code_lines += mixed_total,
1441 MixedLinePolicy::CodeAndComment => {
1442 effective.code_lines += mixed_total;
1443 effective.comment_lines += mixed_total;
1444 }
1445 MixedLinePolicy::CommentOnly => effective.comment_lines += mixed_total,
1446 MixedLinePolicy::SeparateMixedCategory => effective.mixed_lines_separate += mixed_total,
1447 }
1448
1449 if !count_compiler_directives {
1452 effective.code_lines = effective
1453 .code_lines
1454 .saturating_sub(raw.compiler_directive_lines);
1455 }
1456
1457 effective
1458}
1459
1460fn build_summary(analyzed: &[FileRecord], skipped: &[FileRecord]) -> SummaryTotals {
1461 let mut summary = SummaryTotals {
1462 files_considered: (analyzed.len() + skipped.len()) as u64,
1463 files_analyzed: analyzed.len() as u64,
1464 files_skipped: skipped.len() as u64,
1465 ..Default::default()
1466 };
1467
1468 for record in analyzed {
1469 summary.total_physical_lines += record.raw_line_categories.total_physical_lines;
1470 summary.code_lines += record.effective_counts.code_lines;
1471 summary.comment_lines += record.effective_counts.comment_lines;
1472 summary.blank_lines += record.effective_counts.blank_lines;
1473 summary.mixed_lines_separate += record.effective_counts.mixed_lines_separate;
1474 summary.functions += record.raw_line_categories.functions;
1475 summary.classes += record.raw_line_categories.classes;
1476 summary.variables += record.raw_line_categories.variables;
1477 summary.imports += record.raw_line_categories.imports;
1478 summary.test_count += record.raw_line_categories.test_count;
1479 summary.test_assertion_count += record.raw_line_categories.test_assertion_count;
1480 summary.test_suite_count += record.raw_line_categories.test_suite_count;
1481 if let Some(cov) = &record.coverage {
1482 summary.coverage_lines_found += u64::from(cov.lines_found);
1483 summary.coverage_lines_hit += u64::from(cov.lines_hit);
1484 summary.coverage_functions_found += u64::from(cov.functions_found);
1485 summary.coverage_functions_hit += u64::from(cov.functions_hit);
1486 summary.coverage_branches_found += u64::from(cov.branches_found);
1487 summary.coverage_branches_hit += u64::from(cov.branches_hit);
1488 }
1489 }
1490
1491 summary
1492}
1493
1494const fn zeroed_summary(language: Language) -> LanguageSummary {
1496 LanguageSummary {
1497 language,
1498 files: 0,
1499 total_physical_lines: 0,
1500 code_lines: 0,
1501 comment_lines: 0,
1502 blank_lines: 0,
1503 mixed_lines_separate: 0,
1504 functions: 0,
1505 classes: 0,
1506 variables: 0,
1507 imports: 0,
1508 test_count: 0,
1509 test_assertion_count: 0,
1510 test_suite_count: 0,
1511 coverage_lines_found: 0,
1512 coverage_lines_hit: 0,
1513 coverage_functions_found: 0,
1514 coverage_functions_hit: 0,
1515 coverage_branches_found: 0,
1516 coverage_branches_hit: 0,
1517 }
1518}
1519
1520fn accumulate_record_into_summary(entry: &mut LanguageSummary, record: &FileRecord) {
1522 entry.files += 1;
1523 let r = &record.raw_line_categories;
1524 entry.total_physical_lines += r.total_physical_lines;
1525 entry.code_lines += record.effective_counts.code_lines;
1526 entry.comment_lines += record.effective_counts.comment_lines;
1527 entry.blank_lines += record.effective_counts.blank_lines;
1528 entry.mixed_lines_separate += record.effective_counts.mixed_lines_separate;
1529 entry.functions += r.functions;
1530 entry.classes += r.classes;
1531 entry.variables += r.variables;
1532 entry.imports += r.imports;
1533 entry.test_count += r.test_count;
1534 entry.test_assertion_count += r.test_assertion_count;
1535 entry.test_suite_count += r.test_suite_count;
1536 if let Some(cov) = &record.coverage {
1537 entry.coverage_lines_found += u64::from(cov.lines_found);
1538 entry.coverage_lines_hit += u64::from(cov.lines_hit);
1539 entry.coverage_functions_found += u64::from(cov.functions_found);
1540 entry.coverage_functions_hit += u64::from(cov.functions_hit);
1541 entry.coverage_branches_found += u64::from(cov.branches_found);
1542 entry.coverage_branches_hit += u64::from(cov.branches_hit);
1543 }
1544}
1545
1546fn build_language_summaries(analyzed: &[FileRecord]) -> Vec<LanguageSummary> {
1547 let mut by_language: BTreeMap<Language, LanguageSummary> = BTreeMap::new();
1548 for record in analyzed {
1549 let Some(language) = record.language else {
1550 continue;
1551 };
1552 let entry = by_language
1553 .entry(language)
1554 .or_insert_with(|| zeroed_summary(language));
1555 accumulate_record_into_summary(entry, record);
1556 }
1557 by_language.into_values().collect()
1558}
1559
1560fn skipped_record(
1561 path: &Path,
1562 root: &Path,
1563 size_bytes: u64,
1564 status: FileStatus,
1565 warnings: Vec<String>,
1566) -> FileRecord {
1567 FileRecord {
1568 path: path_to_string(path),
1569 relative_path: relative_path_string(path, root),
1570 language: None,
1571 size_bytes,
1572 detected_encoding: None,
1573 raw_line_categories: RawLineCounts::default(),
1574 effective_counts: EffectiveCounts::default(),
1575 status,
1576 warnings,
1577 generated: false,
1578 minified: false,
1579 vendor: false,
1580 parse_mode: None,
1581 submodule: None,
1582 coverage: None,
1583 style_analysis: None,
1584 }
1585}
1586
1587fn relative_path_string(path: &Path, root: &Path) -> String {
1588 path.strip_prefix(root)
1589 .unwrap_or(path)
1590 .to_string_lossy()
1591 .replace('\\', "/")
1592}
1593
1594fn path_to_string(path: &Path) -> String {
1595 path.to_string_lossy().replace('\\', "/")
1596}
1597
1598#[must_use]
1600pub fn detect_submodules(root: &Path) -> Vec<(String, PathBuf)> {
1601 let gitmodules = root.join(".gitmodules");
1602 if !gitmodules.is_file() {
1603 return Vec::new();
1604 }
1605 let Ok(content) = fs::read_to_string(&gitmodules) else {
1606 return Vec::new();
1607 };
1608
1609 let mut result = Vec::new();
1610 let mut current_name: Option<String> = None;
1611 let mut current_path: Option<PathBuf> = None;
1612
1613 for line in content.lines() {
1614 let trimmed = line.trim();
1615 if trimmed.starts_with("[submodule \"") && trimmed.ends_with("\"]") {
1616 if let (Some(name), Some(path)) = (current_name.take(), current_path.take()) {
1617 result.push((name, path));
1618 }
1619 let name = trimmed["[submodule \"".len()..trimmed.len() - 2].to_string();
1620 current_name = Some(name);
1621 } else if let Some(rest) = trimmed.strip_prefix("path") {
1622 if let Some(eq_pos) = rest.find('=') {
1623 let path_str = rest[eq_pos + 1..].trim();
1624 current_path = Some(PathBuf::from(path_str));
1625 }
1626 }
1627 }
1628 if let (Some(name), Some(path)) = (current_name, current_path) {
1629 result.push((name, path));
1630 }
1631
1632 result
1633}
1634
1635fn build_submodule_summaries(
1636 analyzed: &[FileRecord],
1637 submodules: &[(String, PathBuf)],
1638) -> Vec<SubmoduleSummary> {
1639 submodules
1640 .iter()
1641 .map(|(name, path)| {
1642 let files: Vec<&FileRecord> = analyzed
1643 .iter()
1644 .filter(|f| f.submodule.as_deref() == Some(name.as_str()))
1645 .collect();
1646
1647 let files_analyzed = files.len() as u64;
1648 let total_physical_lines = files
1649 .iter()
1650 .map(|f| f.raw_line_categories.total_physical_lines)
1651 .sum();
1652 let code_lines = files.iter().map(|f| f.effective_counts.code_lines).sum();
1653 let comment_lines = files.iter().map(|f| f.effective_counts.comment_lines).sum();
1654 let blank_lines = files.iter().map(|f| f.effective_counts.blank_lines).sum();
1655 let language_summaries = build_language_summaries_from_slice(&files);
1656
1657 SubmoduleSummary {
1658 name: name.clone(),
1659 relative_path: path.to_string_lossy().replace('\\', "/"),
1660 files_analyzed,
1661 total_physical_lines,
1662 code_lines,
1663 comment_lines,
1664 blank_lines,
1665 language_summaries,
1666 }
1667 })
1668 .filter(|s| s.files_analyzed > 0)
1669 .collect()
1670}
1671
1672#[allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
1674fn dominant_indent_label(files: &[&StyleAnalysis]) -> String {
1675 let mut votes = [0u32; 6];
1676 for f in files {
1677 let idx = match f.indent_style {
1678 IndentStyle::Tabs => 0,
1679 IndentStyle::Spaces2 => 1,
1680 IndentStyle::Spaces4 => 2,
1681 IndentStyle::Spaces8 => 3,
1682 IndentStyle::Mixed => 4,
1683 IndentStyle::Unknown => 5,
1684 };
1685 votes[idx] += 1;
1686 }
1687 let labels = ["Tabs", "2-Space", "4-Space", "8-Space", "Mixed", "\u{2014}"];
1688 labels[votes
1689 .iter()
1690 .enumerate()
1691 .max_by_key(|(_, v)| *v)
1692 .map(|(i, _)| i)
1693 .unwrap_or(5)]
1694 .to_string()
1695}
1696
1697#[allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
1699fn line80_pct(files: &[&StyleAnalysis]) -> u8 {
1700 if files.is_empty() {
1701 return 0;
1702 }
1703 let compliant = files
1704 .iter()
1705 .filter(|f| f.total_lines == 0 || (f.lines_over_80 as f32 / f.total_lines as f32) <= 0.05)
1706 .count() as u32;
1707 ((compliant * 100) / files.len() as u32) as u8
1708}
1709
1710#[allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
1712fn build_language_group(family: &str, files: &[&StyleAnalysis]) -> LanguageStyleGroup {
1713 let count = files.len() as u32;
1714
1715 let mut all_names: Vec<String> = Vec::new();
1717 for f in files {
1718 for g in &f.guide_scores {
1719 if !all_names.contains(&g.name) {
1720 all_names.push(g.name.clone());
1721 }
1722 }
1723 }
1724
1725 let mut guide_avg_scores: Vec<(String, u8)> = all_names
1726 .into_iter()
1727 .map(|name| {
1728 let sum: u32 = files
1729 .iter()
1730 .filter_map(|f| f.guide_scores.iter().find(|g| g.name == name))
1731 .map(|g| u32::from(g.score_pct))
1732 .sum();
1733 let avg = (sum / count) as u8;
1734 (name, avg)
1735 })
1736 .collect();
1737 guide_avg_scores.sort_by_key(|s| std::cmp::Reverse(s.1));
1738
1739 let (dominant_guide, dominant_score_pct) = guide_avg_scores
1740 .first()
1741 .map(|(n, s)| (n.clone(), *s))
1742 .unwrap_or_default();
1743
1744 LanguageStyleGroup {
1745 language_family: family.to_string(),
1746 files_count: count,
1747 dominant_guide,
1748 dominant_score_pct,
1749 common_indent_style: dominant_indent_label(files),
1750 guide_avg_scores,
1751 line80_compliant_pct: line80_pct(files),
1752 }
1753}
1754
1755#[allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
1758fn build_style_summary(analyzed: &[FileRecord]) -> Option<StyleSummary> {
1759 let all_style: Vec<&StyleAnalysis> = analyzed
1760 .iter()
1761 .filter_map(|f| f.style_analysis.as_ref())
1762 .collect();
1763
1764 if all_style.is_empty() {
1765 return None;
1766 }
1767
1768 let mut families: std::collections::BTreeMap<&str, Vec<&StyleAnalysis>> =
1770 std::collections::BTreeMap::new();
1771 for sa in &all_style {
1772 families
1773 .entry(sa.language_family.as_str())
1774 .or_default()
1775 .push(sa);
1776 }
1777
1778 let mut by_language: Vec<LanguageStyleGroup> = families
1779 .iter()
1780 .map(|(family, files)| build_language_group(family, files))
1781 .collect();
1782 by_language.sort_by_key(|g| std::cmp::Reverse(g.files_count));
1783
1784 let files_analyzed = all_style.len() as u32;
1785 let common_indent_style = dominant_indent_label(&all_style);
1786 let line80_compliant_pct = line80_pct(&all_style);
1787
1788 Some(StyleSummary {
1789 files_analyzed,
1790 common_indent_style,
1791 line80_compliant_pct,
1792 by_language,
1793 })
1794}
1795
1796fn build_language_summaries_from_slice(files: &[&FileRecord]) -> Vec<LanguageSummary> {
1797 let mut map: BTreeMap<String, LanguageSummary> = BTreeMap::new();
1798 for file in files {
1799 let Some(lang) = file.language else { continue };
1800 let entry = map
1801 .entry(lang.display_name().to_string())
1802 .or_insert_with(|| zeroed_summary(lang));
1803 accumulate_record_into_summary(entry, file);
1804 }
1805 map.into_values().collect()
1806}
1807
1808fn file_name_eq(path: &Path, expected: &str) -> bool {
1809 path.file_name()
1810 .and_then(|name| name.to_str())
1811 .is_some_and(|name| name == expected)
1812}
1813
1814fn is_excluded_dir_path(path: &Path, excluded_dirs: &[String]) -> bool {
1815 path.components().any(|component| {
1816 component
1817 .as_os_str()
1818 .to_str()
1819 .is_some_and(|part| excluded_dirs.iter().any(|excluded| excluded == part))
1820 })
1821}
1822
1823fn is_vendor_path(path: &Path) -> bool {
1824 path.components().any(|component| {
1825 component
1826 .as_os_str()
1827 .to_str()
1828 .is_some_and(|part| matches!(part, "vendor" | "node_modules" | "packages"))
1829 })
1830}
1831
1832fn is_known_lockfile(path: &Path) -> bool {
1833 path.file_name()
1834 .and_then(|name| name.to_str())
1835 .is_some_and(|name| {
1836 matches!(
1837 name,
1838 "Cargo.lock"
1839 | "package-lock.json"
1840 | "yarn.lock"
1841 | "pnpm-lock.yaml"
1842 | "Pipfile.lock"
1843 | "poetry.lock"
1844 | "composer.lock"
1845 )
1846 })
1847}
1848
1849fn looks_generated(path: &Path, bytes: &[u8]) -> bool {
1850 let file_name = path
1851 .file_name()
1852 .and_then(|name| name.to_str())
1853 .unwrap_or_default();
1854 if file_name.contains(".generated.") || file_name.contains(".g.") {
1855 return true;
1856 }
1857
1858 let sample = String::from_utf8_lossy(&bytes[..bytes.len().min(GENERATED_SAMPLE_BYTES)])
1859 .to_ascii_lowercase();
1860 sample.contains("@generated") || sample.contains("generated by")
1861}
1862
1863fn looks_minified(path: &Path, bytes: &[u8]) -> bool {
1864 let file_name = path
1865 .file_name()
1866 .and_then(|name| name.to_str())
1867 .unwrap_or_default();
1868 if file_name.contains(".min.") {
1869 return true;
1870 }
1871
1872 let sample = String::from_utf8_lossy(&bytes[..bytes.len().min(MINIFIED_SAMPLE_BYTES)]);
1873 let longest_line = sample.lines().map(str::len).max().unwrap_or(0);
1874 let whitespace = sample.chars().filter(|c| c.is_whitespace()).count();
1875 longest_line > MINIFIED_LINE_THRESHOLD && whitespace * 100 < sample.len().max(1)
1876}
1877
1878fn is_binary(bytes: &[u8]) -> bool {
1879 if bytes.starts_with(&[0xEF, 0xBB, 0xBF])
1880 || bytes.starts_with(&[0xFF, 0xFE])
1881 || bytes.starts_with(&[0xFE, 0xFF])
1882 {
1883 return false;
1884 }
1885
1886 let sample = &bytes[..bytes.len().min(BINARY_SAMPLE_BYTES)];
1887 sample.contains(&0)
1888}
1889
1890fn decode_utf16_bom(
1893 bom_stripped: &[u8],
1894 encoding: &'static encoding_rs::Encoding,
1895 label: &str,
1896) -> (String, String, Vec<String>) {
1897 let (cow, _, had_errors) = encoding.decode(bom_stripped);
1898 let mut warnings = Vec::new();
1899 if had_errors {
1900 warnings.push(format!("{label} decode contained replacement characters"));
1901 }
1902 (cow.into_owned(), label.into(), warnings)
1903}
1904
1905fn decode_bytes(bytes: &[u8]) -> std::result::Result<(String, String, Vec<String>), String> {
1906 if bytes.starts_with(&[0xEF, 0xBB, 0xBF]) {
1907 let text = String::from_utf8(bytes[3..].to_vec()).map_err(|err| err.to_string())?;
1908 return Ok((text, "utf-8-bom".into(), vec![]));
1909 }
1910 if bytes.starts_with(&[0xFF, 0xFE]) {
1911 return Ok(decode_utf16_bom(&bytes[2..], UTF_16LE, "utf-16le"));
1912 }
1913 if bytes.starts_with(&[0xFE, 0xFF]) {
1914 return Ok(decode_utf16_bom(&bytes[2..], UTF_16BE, "utf-16be"));
1915 }
1916
1917 #[allow(clippy::option_if_let_else)]
1919 if let Ok(text) = String::from_utf8(bytes.to_vec()) {
1920 Ok((text, "utf-8".into(), vec![]))
1921 } else {
1922 let (cow, _, had_errors) = WINDOWS_1252.decode(bytes);
1923 let mut warnings = vec!["decoded using windows-1252 fallback".into()];
1924 if had_errors {
1925 warnings.push("fallback decode contained replacement characters".into());
1926 }
1927 Ok((cow.into_owned(), "windows-1252".into(), warnings))
1928 }
1929}
1930
1931fn compile_globset(patterns: &[String]) -> Result<Option<GlobSet>> {
1932 if patterns.is_empty() {
1933 return Ok(None);
1934 }
1935
1936 let mut builder = GlobSetBuilder::new();
1937 for pattern in patterns {
1938 builder
1939 .add(Glob::new(pattern).with_context(|| format!("invalid glob pattern: {pattern}"))?);
1940 }
1941 Ok(Some(
1942 builder.build().context("failed to compile glob filters")?,
1943 ))
1944}
1945
1946fn parse_enabled_languages(enabled: &[String]) -> Result<Option<BTreeSet<Language>>> {
1947 if enabled.is_empty() {
1948 return Ok(None);
1949 }
1950
1951 let supported = supported_languages();
1952 let mut set = BTreeSet::new();
1953 for name in enabled {
1954 let language = Language::from_name(name)
1955 .with_context(|| format!("unsupported language in config: {name}"))?;
1956 if !supported.contains(&language) {
1957 anyhow::bail!("language {name} is not supported in this build");
1958 }
1959 set.insert(language);
1960 }
1961 Ok(Some(set))
1962}
1963
1964pub fn write_json(run: &AnalysisRun, output_path: &Path) -> Result<()> {
1968 let json = serde_json::to_string_pretty(run).context("failed to serialize analysis run")?;
1969 fs::write(output_path, json)
1970 .with_context(|| format!("failed to write JSON output to {}", output_path.display()))
1971}
1972
1973pub fn read_json(path: &Path) -> Result<AnalysisRun> {
1977 let contents = fs::read_to_string(path)
1978 .with_context(|| format!("failed to read result file {}", path.display()))?;
1979 serde_json::from_str(&contents)
1980 .with_context(|| format!("failed to parse JSON result {}", path.display()))
1981}
1982
1983#[cfg(test)]
1984mod tests {
1985 use super::*;
1986
1987 #[test]
1988 fn effective_counts_respect_code_only_policy() {
1989 let raw = RawLineCounts {
1990 code_only_lines: 2,
1991 single_comment_only_lines: 1,
1992 mixed_code_single_comment_lines: 3,
1993 docstring_comment_lines: 2,
1994 ..RawLineCounts::default()
1995 };
1996 let counts = compute_effective_counts(&raw, MixedLinePolicy::CodeOnly, true, true);
1997 assert_eq!(counts.code_lines, 5);
1998 assert_eq!(counts.comment_lines, 3);
1999 }
2000
2001 #[test]
2002 fn effective_counts_can_separate_mixed() {
2003 let raw = RawLineCounts {
2004 mixed_code_single_comment_lines: 2,
2005 mixed_code_multi_comment_lines: 1,
2006 ..RawLineCounts::default()
2007 };
2008 let counts =
2009 compute_effective_counts(&raw, MixedLinePolicy::SeparateMixedCategory, true, true);
2010 assert_eq!(counts.mixed_lines_separate, 3);
2011 assert_eq!(counts.code_lines, 0);
2012 assert_eq!(counts.comment_lines, 0);
2013 }
2014
2015 #[test]
2016 fn windows_1252_fallback_decodes() {
2017 let bytes = vec![0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x96, 0x57];
2018 let (text, encoding, warnings) = decode_bytes(&bytes).unwrap();
2019 assert_eq!(encoding, "windows-1252");
2020 assert!(text.contains('–'));
2021 assert!(!warnings.is_empty());
2022 }
2023}