1#![allow(clippy::multiple_crate_versions)]
4
5pub mod baseline;
6pub mod coverage;
7pub mod delta;
8pub mod history;
9pub use baseline::{check_against_baseline, resolve_baselines_path, BaselineEntry, BaselineStore};
10pub use coverage::{aggregate_line_coverage, lookup_coverage, parse_lcov, FileCoverage};
11pub use delta::{compute_delta, FileChangeStatus, FileDelta, ScanComparison, SummaryDelta};
12pub use history::{
13 CleanupPolicy, CleanupPolicyStore, RegistryEntry, ScanRegistry, ScanSummarySnapshot,
14 WatchedDirsStore,
15};
16
17use std::collections::{BTreeMap, BTreeSet, HashSet};
18use std::fs;
19use std::path::{Path, PathBuf};
20use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
21use std::sync::Arc;
22
23use anyhow::{Context, Result};
24use chrono::{DateTime, Utc};
25use encoding_rs::{UTF_16BE, UTF_16LE, WINDOWS_1252};
26use globset::{Glob, GlobSet, GlobSetBuilder};
27use ignore::WalkBuilder;
28use serde::{Deserialize, Serialize};
29use uuid::Uuid;
30
31use sloc_config::{
32 AppConfig, BinaryFileBehavior, BlankInBlockCommentPolicy, ContinuationLinePolicy,
33 FailureBehavior, MixedLinePolicy,
34};
35use sloc_languages::style::IndentStyle;
36use sloc_languages::{
37 analyze_text, detect_language, supported_languages, AnalysisOptions, Language, ParseMode,
38 RawLineCounts, StyleAnalysis, StyleLangScope,
39};
40
41const MAX_ANALYSIS_THREADS: usize = 16;
45const DEFAULT_ANALYSIS_THREADS: usize = 4;
47const GENERATED_SAMPLE_BYTES: usize = 1024;
49const MINIFIED_SAMPLE_BYTES: usize = 4096;
51const MINIFIED_LINE_THRESHOLD: usize = 2000;
53const BINARY_SAMPLE_BYTES: usize = 8192;
55
56pub struct ProgressCounters {
58 pub files_done: Arc<AtomicUsize>,
60 pub files_total: Arc<AtomicUsize>,
62}
63
64enum MetadataPolicyOutcome {
66 Skip(Box<FileRecord>),
68 Exclude,
70 Continue,
72}
73
74#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
75#[serde(rename_all = "snake_case")]
76pub enum FileStatus {
77 AnalyzedExact,
78 AnalyzedBestEffort,
79 SkippedBinary,
80 SkippedDecodeError,
81 SkippedUnsupported,
82 SkippedByPolicy,
83 ErrorInternal,
84}
85
86#[derive(Debug, Clone, Serialize, Deserialize, Default)]
87pub struct EffectiveCounts {
88 pub code_lines: u64,
89 pub comment_lines: u64,
90 pub blank_lines: u64,
91 pub mixed_lines_separate: u64,
92}
93
94#[derive(Debug, Clone, Serialize, Deserialize)]
95pub struct ToolMetadata {
96 pub name: String,
97 pub version: String,
98 pub run_id: String,
99 pub timestamp_utc: DateTime<Utc>,
100}
101
102#[derive(Debug, Clone, Serialize, Deserialize)]
103pub struct EnvironmentMetadata {
104 pub operating_system: String,
105 pub architecture: String,
106 pub runtime_mode: String,
107 pub initiator_username: String,
108 pub initiator_hostname: String,
109 #[serde(default, skip_serializing_if = "Option::is_none")]
112 pub ci_name: Option<String>,
113}
114
115#[derive(Debug, Clone, Serialize, Deserialize, Default)]
116pub struct SummaryTotals {
117 pub files_considered: u64,
118 pub files_analyzed: u64,
119 pub files_skipped: u64,
120 pub total_physical_lines: u64,
121 pub code_lines: u64,
122 pub comment_lines: u64,
123 pub blank_lines: u64,
124 pub mixed_lines_separate: u64,
125 #[serde(default)]
126 pub functions: u64,
127 #[serde(default)]
128 pub classes: u64,
129 #[serde(default)]
130 pub variables: u64,
131 #[serde(default)]
132 pub imports: u64,
133 #[serde(default)]
134 pub test_count: u64,
135 #[serde(default)]
137 pub test_assertion_count: u64,
138 #[serde(default)]
140 pub test_suite_count: u64,
141 #[serde(default)]
143 pub coverage_lines_found: u64,
144 #[serde(default)]
145 pub coverage_lines_hit: u64,
146 #[serde(default)]
147 pub coverage_functions_found: u64,
148 #[serde(default)]
149 pub coverage_functions_hit: u64,
150 #[serde(default)]
151 pub coverage_branches_found: u64,
152 #[serde(default)]
153 pub coverage_branches_hit: u64,
154}
155
156#[derive(Debug, Clone, Serialize, Deserialize)]
157pub struct LanguageSummary {
158 pub language: Language,
159 pub files: u64,
160 pub total_physical_lines: u64,
161 pub code_lines: u64,
162 pub comment_lines: u64,
163 pub blank_lines: u64,
164 pub mixed_lines_separate: u64,
165 #[serde(default)]
166 pub functions: u64,
167 #[serde(default)]
168 pub classes: u64,
169 #[serde(default)]
170 pub variables: u64,
171 #[serde(default)]
172 pub imports: u64,
173 #[serde(default)]
174 pub test_count: u64,
175 #[serde(default)]
176 pub test_assertion_count: u64,
177 #[serde(default)]
178 pub test_suite_count: u64,
179 #[serde(default)]
180 pub coverage_lines_found: u64,
181 #[serde(default)]
182 pub coverage_lines_hit: u64,
183 #[serde(default)]
184 pub coverage_functions_found: u64,
185 #[serde(default)]
186 pub coverage_functions_hit: u64,
187 #[serde(default)]
188 pub coverage_branches_found: u64,
189 #[serde(default)]
190 pub coverage_branches_hit: u64,
191}
192
193#[derive(Debug, Clone, Serialize, Deserialize)]
194pub struct FileRecord {
195 pub path: String,
196 pub relative_path: String,
197 pub language: Option<Language>,
198 pub size_bytes: u64,
199 pub detected_encoding: Option<String>,
200 pub raw_line_categories: RawLineCounts,
201 pub effective_counts: EffectiveCounts,
202 pub status: FileStatus,
203 pub warnings: Vec<String>,
204 pub generated: bool,
205 pub minified: bool,
206 pub vendor: bool,
207 pub parse_mode: Option<ParseMode>,
208 #[serde(skip_serializing_if = "Option::is_none")]
209 pub submodule: Option<String>,
210 #[serde(default, skip_serializing_if = "Option::is_none")]
212 pub coverage: Option<FileCoverage>,
213 #[serde(default, skip_serializing_if = "Option::is_none")]
215 pub style_analysis: Option<StyleAnalysis>,
216}
217
218#[derive(Debug, Clone, Serialize, Deserialize)]
220pub struct LanguageStyleGroup {
221 pub language_family: String,
223 pub files_count: u32,
225 pub dominant_guide: String,
227 pub dominant_score_pct: u8,
229 pub common_indent_style: String,
231 pub guide_avg_scores: Vec<(String, u8)>,
233 pub line80_compliant_pct: u8,
235 pub line_col_compliant_pct: u8,
237}
238
239#[derive(Debug, Clone, Serialize, Deserialize)]
241pub struct StyleSummary {
242 pub files_analyzed: u32,
244 pub common_indent_style: String,
246 pub line80_compliant_pct: u8,
248 pub line_col_compliant_pct: u8,
250 pub col_threshold: u16,
252 pub by_language: Vec<LanguageStyleGroup>,
254}
255
256pub type CppStyleSummary = StyleSummary;
259
260#[derive(Debug, Clone, Serialize, Deserialize)]
262pub struct SubmoduleSummary {
263 pub name: String,
264 pub relative_path: String,
265 pub files_analyzed: u64,
266 pub total_physical_lines: u64,
267 pub code_lines: u64,
268 pub comment_lines: u64,
269 pub blank_lines: u64,
270 pub language_summaries: Vec<LanguageSummary>,
271}
272
273#[derive(Debug, Clone, Serialize, Deserialize)]
274pub struct AnalysisRun {
275 pub tool: ToolMetadata,
276 pub environment: EnvironmentMetadata,
277 pub effective_configuration: AppConfig,
278 pub input_roots: Vec<String>,
279 pub summary_totals: SummaryTotals,
280 pub totals_by_language: Vec<LanguageSummary>,
281 pub per_file_records: Vec<FileRecord>,
282 pub skipped_file_records: Vec<FileRecord>,
283 pub warnings: Vec<String>,
284 #[serde(default, skip_serializing_if = "Vec::is_empty")]
286 pub submodule_summaries: Vec<SubmoduleSummary>,
287 #[serde(default, skip_serializing_if = "Option::is_none")]
289 pub git_commit_short: Option<String>,
290 #[serde(default, skip_serializing_if = "Option::is_none")]
292 pub git_commit_long: Option<String>,
293 #[serde(default, skip_serializing_if = "Option::is_none")]
295 pub git_branch: Option<String>,
296 #[serde(default, skip_serializing_if = "Option::is_none")]
298 pub git_commit_author: Option<String>,
299 #[serde(default, skip_serializing_if = "Option::is_none")]
301 pub git_tags: Option<String>,
302 #[serde(default, skip_serializing_if = "Option::is_none")]
304 pub git_nearest_tag: Option<String>,
305 #[serde(default, skip_serializing_if = "Option::is_none")]
307 pub git_commit_date: Option<String>,
308 #[serde(default, skip_serializing_if = "Option::is_none")]
310 pub git_remote_url: Option<String>,
311 #[serde(default, skip_serializing_if = "Option::is_none")]
313 pub style_summary: Option<StyleSummary>,
314}
315
316#[derive(Default)]
317struct GitInfo {
318 commit_short: Option<String>,
319 commit_long: Option<String>,
320 branch: Option<String>,
321 author: Option<String>,
322 tags: Option<String>,
323 nearest_tag: Option<String>,
324 commit_date: Option<String>,
325 remote_url: Option<String>,
326}
327
328fn find_git_dir(start: &Path) -> Option<PathBuf> {
332 let mut current = Some(start);
333 while let Some(dir) = current {
334 let candidate = dir.join(".git");
335 if candidate.is_dir() {
336 return Some(candidate);
337 }
338 if candidate.is_file() {
339 if let Some(resolved) = resolve_git_file_pointer(&candidate, dir) {
340 return Some(resolved);
341 }
342 }
343 current = dir.parent();
344 }
345 None
346}
347
348fn resolve_git_file_pointer(file: &Path, base_dir: &Path) -> Option<PathBuf> {
352 let content = fs::read_to_string(file).ok()?;
353 let ptr = content.trim().strip_prefix("gitdir: ")?;
354 let ptr_native = ptr.replace('/', std::path::MAIN_SEPARATOR_STR);
357 let resolved = if Path::new(&ptr_native).is_absolute() {
358 PathBuf::from(&ptr_native)
359 } else {
360 base_dir.join(&ptr_native)
361 };
362 let final_path = resolved.canonicalize().unwrap_or(resolved);
366 if final_path.is_dir() {
367 Some(final_path)
368 } else {
369 None
370 }
371}
372
373fn resolve_ref(git_dir: &Path, refname: &str) -> Option<String> {
376 let ref_path = refname
380 .split('/')
381 .fold(git_dir.to_path_buf(), |p, c| p.join(c));
382 if ref_path.exists() {
383 let sha = fs::read_to_string(&ref_path)
384 .ok()
385 .map(|s| s.trim().to_string())
386 .filter(|s| s.len() >= 40 && s.chars().all(|c| c.is_ascii_hexdigit()));
387 if sha.is_some() {
388 return sha;
389 }
390 }
391 let packed = fs::read_to_string(git_dir.join("packed-refs")).ok()?;
395 for line in packed.lines() {
396 if line.starts_with('#') || line.starts_with('^') {
397 continue;
398 }
399 let mut cols = line.splitn(2, ' ');
400 let sha = cols.next()?;
401 let name = cols.next()?.trim();
402 if name == refname {
403 return Some(sha.to_string());
404 }
405 }
406 None
407}
408
409fn parse_url_line(line: &str) -> Option<&str> {
411 let rest = line.strip_prefix("url")?;
412 let rest = rest.trim_start_matches([' ', '\t']);
413 let url = rest.strip_prefix('=')?.trim();
414 if url.is_empty() {
415 None
416 } else {
417 Some(url)
418 }
419}
420
421fn read_git_remote_url(git_dir: &Path) -> Option<String> {
423 let config = fs::read_to_string(git_dir.join("config")).ok()?;
424 let mut in_origin = false;
425 for line in config.lines() {
426 let trimmed = line.trim();
427 if trimmed.starts_with('[') {
428 in_origin = trimmed == r#"[remote "origin"]"#;
429 } else if in_origin {
430 if let Some(url) = parse_url_line(trimmed) {
431 return Some(url.to_owned());
432 }
433 }
434 }
435 None
436}
437
438fn detect_git_for_run(project_path: &Path) -> GitInfo {
442 let ci_branch = ci_branch_from_env();
444
445 let Some(git_dir) = find_git_dir(project_path) else {
446 return GitInfo {
449 branch: ci_branch,
450 ..GitInfo::default()
451 };
452 };
453
454 let head_raw = match fs::read_to_string(git_dir.join("HEAD")) {
455 Ok(s) => s.trim().to_string(),
456 Err(_) => {
457 return GitInfo {
458 branch: ci_branch,
459 ..GitInfo::default()
460 }
461 }
462 };
463
464 let (branch_from_head, commit_long) = head_raw.strip_prefix("ref: ").map_or_else(
465 || {
466 if head_raw.len() >= 40 && head_raw.chars().all(|c| c.is_ascii_hexdigit()) {
467 (None, Some(head_raw[..40].to_string()))
469 } else {
470 (None, None)
471 }
472 },
473 |refname| {
474 let branch = refname
475 .strip_prefix("refs/heads/")
476 .map(|b| b.trim().to_string());
477 let sha = resolve_ref(&git_dir, refname.trim());
478 (branch, sha)
479 },
480 );
481 let branch = branch_from_head.or(ci_branch);
484
485 let commit_short = commit_long
486 .as_deref()
487 .map(|s| s.chars().take(7).collect::<String>());
488
489 let author = run_git_cmd(project_path, &["log", "-1", "--format=%an", "HEAD"]);
490 let commit_date = run_git_cmd(project_path, &["log", "-1", "--format=%aI", "HEAD"]);
491 let remote_url = read_git_remote_url(&git_dir);
492
493 let tags = run_git_cmd(project_path, &["tag", "--points-at", "HEAD"]).map(|t| {
496 t.lines()
497 .filter(|l| !l.is_empty())
498 .collect::<Vec<_>>()
499 .join(", ")
500 });
501 let nearest_tag = run_git_cmd(project_path, &["describe", "--tags", "--abbrev=0", "HEAD"]);
502
503 GitInfo {
504 commit_short,
505 commit_long,
506 branch,
507 author,
508 tags,
509 nearest_tag,
510 commit_date,
511 remote_url,
512 }
513}
514
515fn run_git_cmd(dir: &Path, args: &[&str]) -> Option<String> {
517 let candidates: &[&str] = &[
521 "git",
523 "/usr/bin/git",
525 "/usr/local/bin/git",
526 "/opt/homebrew/bin/git",
527 r"C:\Program Files\Git\cmd\git.exe",
529 r"C:\Program Files\Git\bin\git.exe",
530 r"C:\Program Files (x86)\Git\cmd\git.exe",
531 ];
532 for &exe in candidates {
533 let result = std::process::Command::new(exe)
534 .args(["-c", "safe.directory=*"])
535 .args(args)
536 .current_dir(dir)
537 .output()
538 .ok()
539 .filter(|o| o.status.success())
540 .and_then(|o| String::from_utf8(o.stdout).ok())
541 .map(|s| s.trim().to_string())
542 .filter(|s| !s.is_empty());
543 if result.is_some() {
544 return result;
545 }
546 }
547 None
548}
549
550fn detect_ci_system() -> Option<&'static str> {
552 let ev = |k: &str| std::env::var(k).is_ok();
553 let ev_true = |k: &str| std::env::var(k).as_deref() == Ok("true");
554 if ev("JENKINS_URL") || ev("JENKINS_HOME") || ev("BUILD_URL") {
555 return Some("Jenkins");
556 }
557 if ev_true("GITHUB_ACTIONS") {
558 return Some("GitHub Actions");
559 }
560 if ev_true("GITLAB_CI") {
561 return Some("GitLab CI");
562 }
563 if ev_true("CIRCLECI") {
564 return Some("CircleCI");
565 }
566 if ev_true("TRAVIS") {
567 return Some("Travis CI");
568 }
569 if ev_true("TF_BUILD") {
570 return Some("Azure DevOps");
571 }
572 if ev("TEAMCITY_VERSION") {
573 return Some("TeamCity");
574 }
575 None
576}
577
578fn ci_branch_from_env() -> Option<String> {
581 const VARS: &[&str] = &[
582 "BRANCH_NAME", "GIT_BRANCH", "GITHUB_REF_NAME", "CI_COMMIT_BRANCH", "CIRCLE_BRANCH", "TRAVIS_BRANCH", "BUILD_SOURCEBRANCH", ];
590 for &var in VARS {
591 if let Ok(val) = std::env::var(var) {
592 let val = val.trim();
593 let val = val
594 .strip_prefix("refs/heads/")
595 .or_else(|| val.strip_prefix("origin/"))
596 .unwrap_or(val);
597 if !val.is_empty() && val != "HEAD" {
598 return Some(val.to_string());
599 }
600 }
601 }
602 None
603}
604
605fn get_current_username() -> String {
606 std::env::var("USERNAME")
607 .or_else(|_| std::env::var("USER"))
608 .unwrap_or_else(|_| "unknown".to_string())
609}
610
611fn non_empty_env(var: &str) -> Option<String> {
612 let v = std::env::var(var).ok()?;
613 if v.is_empty() {
614 None
615 } else {
616 Some(v)
617 }
618}
619
620fn is_jenkins_env() -> bool {
621 std::env::var("JENKINS_URL").is_ok()
622 || std::env::var("JENKINS_HOME").is_ok()
623 || std::env::var("BUILD_URL").is_ok()
624}
625
626fn get_hostname() -> String {
627 if is_jenkins_env() {
630 if let Some(n) = non_empty_env("NODE_NAME") {
631 return n;
632 }
633 }
634 if std::env::var("GITHUB_ACTIONS").as_deref() == Ok("true") {
635 if let Some(r) = non_empty_env("RUNNER_NAME") {
636 return r;
637 }
638 }
639 if std::env::var("GITLAB_CI").as_deref() == Ok("true") {
640 if let Some(r) = non_empty_env("CI_RUNNER_DESCRIPTION") {
641 return r;
642 }
643 }
644 std::env::var("COMPUTERNAME")
645 .or_else(|_| std::env::var("HOSTNAME"))
646 .or_else(|_| std::fs::read_to_string("/etc/hostname").map(|s| s.trim().to_string()))
647 .unwrap_or_else(|_| "unknown".to_string())
648}
649
650#[allow(clippy::too_many_arguments)]
652fn walk_root(
653 root: &Path,
654 config: &AppConfig,
655 include_globs: Option<&GlobSet>,
656 exclude_globs: Option<&GlobSet>,
657 enabled_languages: Option<&BTreeSet<Language>>,
658 seen_paths: &mut HashSet<PathBuf>,
659 analyzed: &mut Vec<FileRecord>,
660 skipped: &mut Vec<FileRecord>,
661 warnings: &mut Vec<String>,
662 cancel: Option<&AtomicBool>,
663 progress: Option<&ProgressCounters>,
664) -> Result<()> {
665 let mut builder = WalkBuilder::new(root);
666 builder
667 .follow_links(config.discovery.follow_symlinks)
668 .hidden(config.discovery.ignore_hidden_files)
669 .ignore(config.discovery.honor_ignore_files)
670 .parents(config.discovery.honor_ignore_files)
671 .git_ignore(config.discovery.honor_ignore_files)
672 .git_global(config.discovery.honor_ignore_files)
673 .git_exclude(config.discovery.honor_ignore_files);
674
675 let paths = collect_walk_paths(&builder, seen_paths, warnings);
676 if paths.is_empty() {
677 return Ok(());
678 }
679
680 if let Some(p) = progress {
681 p.files_total.fetch_add(paths.len(), Ordering::Relaxed);
682 }
683
684 let chunk_results = run_parallel_analysis(
685 &paths,
686 root,
687 config,
688 include_globs,
689 exclude_globs,
690 enabled_languages,
691 cancel,
692 progress,
693 )?;
694 merge_chunk_results(chunk_results, analyzed, skipped, warnings)
695}
696
697fn collect_walk_paths(
698 builder: &WalkBuilder,
699 seen_paths: &mut HashSet<PathBuf>,
700 warnings: &mut Vec<String>,
701) -> Vec<PathBuf> {
702 let (tx, rx) = std::sync::mpsc::channel::<std::result::Result<PathBuf, String>>();
706
707 builder.build_parallel().run(|| {
708 let tx = tx.clone();
709 Box::new(move |entry| {
710 match entry {
711 Err(e) => {
712 let _ = tx.send(Err(format!("discovery warning: {e}")));
713 }
714 Ok(e) => {
715 let path = e.into_path();
716 if !path.is_dir() {
717 let _ = tx.send(Ok(path));
718 }
719 }
720 }
721 ignore::WalkState::Continue
722 })
723 });
724
725 drop(tx);
728
729 rx.into_iter()
730 .filter_map(|msg| match msg {
731 Ok(path) => {
732 if seen_paths.insert(path.clone()) {
733 Some(path)
734 } else {
735 None
736 }
737 }
738 Err(warn) => {
739 warnings.push(warn);
740 None
741 }
742 })
743 .collect()
744}
745
746#[allow(clippy::too_many_arguments)]
748fn worker_loop(
749 paths: &[PathBuf],
750 root: &Path,
751 config: &AppConfig,
752 include_globs: Option<&GlobSet>,
753 exclude_globs: Option<&GlobSet>,
754 enabled_languages: Option<&BTreeSet<Language>>,
755 cancel: Option<&AtomicBool>,
756 next_index: &AtomicUsize,
757 files_done: Option<&AtomicUsize>,
758) -> Vec<Result<Option<FileRecord>>> {
759 let mut results = Vec::new();
760 loop {
761 if cancel.is_some_and(|c| c.load(Ordering::Relaxed)) {
762 results.push(Err(anyhow::anyhow!("analysis cancelled")));
763 break;
764 }
765 let i = next_index.fetch_add(1, Ordering::Relaxed);
766 if i >= paths.len() {
767 break;
768 }
769 results.push(analyze_candidate_file(
770 &paths[i],
771 root,
772 config,
773 include_globs,
774 exclude_globs,
775 enabled_languages,
776 ));
777 if let Some(fd) = files_done {
778 fd.fetch_add(1, Ordering::Relaxed);
779 }
780 }
781 results
782}
783
784#[allow(clippy::too_many_arguments)]
785fn run_parallel_analysis(
786 paths: &[PathBuf],
787 root: &Path,
788 config: &AppConfig,
789 include_globs: Option<&GlobSet>,
790 exclude_globs: Option<&GlobSet>,
791 enabled_languages: Option<&BTreeSet<Language>>,
792 cancel: Option<&AtomicBool>,
793 progress: Option<&ProgressCounters>,
794) -> Result<Vec<Vec<Result<Option<FileRecord>>>>> {
795 let thread_count = std::thread::available_parallelism().map_or(DEFAULT_ANALYSIS_THREADS, |n| {
796 n.get().min(MAX_ANALYSIS_THREADS)
797 });
798 let next_index = AtomicUsize::new(0);
802 let files_done: Option<&AtomicUsize> = progress.map(|p| p.files_done.as_ref());
803
804 std::thread::scope(|s| -> Result<Vec<Vec<Result<Option<FileRecord>>>>> {
805 let mut handles = Vec::with_capacity(thread_count);
808 for _ in 0..thread_count {
809 handles.push(s.spawn(|| {
810 worker_loop(
811 paths,
812 root,
813 config,
814 include_globs,
815 exclude_globs,
816 enabled_languages,
817 cancel,
818 &next_index,
819 files_done,
820 )
821 }));
822 }
823 handles
824 .into_iter()
825 .map(|h| {
826 h.join()
827 .map_err(|_| anyhow::anyhow!("analysis thread panicked"))
828 })
829 .collect()
830 })
831}
832
833fn merge_chunk_results(
834 chunk_results: Vec<Vec<Result<Option<FileRecord>>>>,
835 analyzed: &mut Vec<FileRecord>,
836 skipped: &mut Vec<FileRecord>,
837 warnings: &mut Vec<String>,
838) -> Result<()> {
839 for chunk in chunk_results {
840 for result in chunk {
841 if let Some(record) = result? {
842 push_record(record, analyzed, skipped, warnings);
843 }
844 }
845 }
846 Ok(())
847}
848
849fn process_submodules(config: &AppConfig, analyzed: &mut [FileRecord]) -> Vec<SubmoduleSummary> {
851 let root = config.discovery.root_paths[0]
852 .canonicalize()
853 .unwrap_or_else(|_| config.discovery.root_paths[0].clone());
854 let submodules = detect_submodules(&root);
855 if submodules.is_empty() {
856 return Vec::new();
857 }
858
859 for file in analyzed.iter_mut() {
860 for (name, sub_path) in &submodules {
861 let prefix = sub_path.to_string_lossy().replace('\\', "/");
862 let rel = &file.relative_path;
863 if rel == &prefix || rel.starts_with(&format!("{prefix}/")) {
864 file.submodule = Some(name.clone());
865 break;
866 }
867 }
868 }
869
870 build_submodule_summaries(analyzed, &submodules)
871}
872
873fn assemble_run(
875 config: &AppConfig,
876 runtime_mode: &str,
877 analyzed: Vec<FileRecord>,
878 skipped: Vec<FileRecord>,
879 warnings: Vec<String>,
880 submodule_summaries: Vec<SubmoduleSummary>,
881) -> AnalysisRun {
882 let summary = build_summary(&analyzed, &skipped);
883 let language_summaries = build_language_summaries(&analyzed);
884 let col_threshold = config.analysis.style_col_threshold;
885 let style_summary = build_style_summary(&analyzed, col_threshold);
886
887 let first_root = config
888 .discovery
889 .root_paths
890 .first()
891 .map(|p| p.canonicalize().unwrap_or_else(|_| p.clone()));
892 let git = first_root
893 .as_deref()
894 .map(detect_git_for_run)
895 .unwrap_or_default();
896
897 let now = Utc::now();
898 let run_id = {
899 let uuid_suffix = Uuid::new_v4().simple().to_string();
900 format!("{}-{}", now.format("%Y%m%d-%H%M"), uuid_suffix)
901 };
902
903 AnalysisRun {
904 tool: ToolMetadata {
905 name: "sloc".into(),
906 version: env!("CARGO_PKG_VERSION").into(),
907 run_id,
908 timestamp_utc: now,
909 },
910 environment: EnvironmentMetadata {
911 operating_system: std::env::consts::OS.into(),
912 architecture: std::env::consts::ARCH.into(),
913 runtime_mode: runtime_mode.into(),
914 initiator_username: get_current_username(),
915 initiator_hostname: get_hostname(),
916 ci_name: if is_jenkins_env() {
917 Some(format!("Jenkins\t{}", get_hostname()))
918 } else {
919 detect_ci_system().map(str::to_string)
920 },
921 },
922 effective_configuration: config.clone(),
923 input_roots: config
924 .discovery
925 .root_paths
926 .iter()
927 .map(|p| path_to_string(p))
928 .collect(),
929 summary_totals: summary,
930 totals_by_language: language_summaries,
931 per_file_records: analyzed,
932 skipped_file_records: skipped,
933 warnings,
934 submodule_summaries,
935 git_commit_short: git.commit_short,
936 git_commit_long: git.commit_long,
937 git_branch: git.branch,
938 git_commit_author: git.author,
939 git_tags: git.tags,
940 git_nearest_tag: git.nearest_tag,
941 git_commit_date: git.commit_date,
942 git_remote_url: git.remote_url,
943 style_summary,
944 }
945}
946
947#[allow(clippy::too_many_lines)]
952pub fn analyze(
953 config: &AppConfig,
954 runtime_mode: &str,
955 cancel: Option<&AtomicBool>,
956 progress: Option<&ProgressCounters>,
957) -> Result<AnalysisRun> {
958 config.validate()?;
959
960 if config.discovery.root_paths.is_empty() {
961 anyhow::bail!("no input paths were provided");
962 }
963
964 let include_globs = compile_globset(&config.discovery.include_globs)?;
965 let exclude_globs = compile_globset(&config.discovery.exclude_globs)?;
966 let enabled_languages = parse_enabled_languages(&config.analysis.enabled_languages)?;
967
968 let mut analyzed = Vec::new();
969 let mut skipped = Vec::new();
970 let mut warnings = Vec::new();
971 let mut seen_paths = HashSet::new();
972
973 for root in &config.discovery.root_paths {
974 if cancel.is_some_and(|c| c.load(Ordering::Relaxed)) {
975 anyhow::bail!("analysis cancelled");
976 }
977
978 let root = root.canonicalize().unwrap_or_else(|_| root.clone());
979
980 if root.is_file() {
981 if let Some(record) = analyze_candidate_file(
982 &root,
983 root.parent().unwrap_or_else(|| Path::new(".")),
984 config,
985 include_globs.as_ref(),
986 exclude_globs.as_ref(),
987 enabled_languages.as_ref(),
988 )? {
989 push_record(record, &mut analyzed, &mut skipped, &mut warnings);
990 }
991 continue;
992 }
993
994 walk_root(
995 &root,
996 config,
997 include_globs.as_ref(),
998 exclude_globs.as_ref(),
999 enabled_languages.as_ref(),
1000 &mut seen_paths,
1001 &mut analyzed,
1002 &mut skipped,
1003 &mut warnings,
1004 cancel,
1005 progress,
1006 )?;
1007 }
1008
1009 analyzed.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
1010 skipped.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
1011
1012 let submodule_summaries = if config.discovery.submodule_breakdown {
1014 process_submodules(config, &mut analyzed)
1015 } else {
1016 Vec::new()
1017 };
1018
1019 attach_coverage(config, &mut analyzed, &mut warnings);
1020
1021 Ok(assemble_run(
1022 config,
1023 runtime_mode,
1024 analyzed,
1025 skipped,
1026 warnings,
1027 submodule_summaries,
1028 ))
1029}
1030
1031fn attach_coverage(config: &AppConfig, analyzed: &mut [FileRecord], warnings: &mut Vec<String>) {
1032 let Some(cov_path) = coverage::resolve_coverage_file(config.analysis.coverage_file.as_deref())
1033 else {
1034 return;
1035 };
1036 tracing::debug!(path = %cov_path.display(), "loading coverage file");
1037 match fs::read_to_string(&cov_path) {
1038 Ok(content) => {
1039 let cov_map = coverage::parse_coverage_auto(&cov_path, &content);
1040 let mut matched: u32 = 0;
1041 let mut unmatched: u32 = 0;
1042 for record in analyzed.iter_mut() {
1043 record.coverage =
1044 coverage::lookup_coverage(&cov_map, &record.relative_path).cloned();
1045 if record.coverage.is_some() {
1046 matched += 1;
1047 } else {
1048 unmatched += 1;
1049 }
1050 }
1051 tracing::debug!(
1052 path = %cov_path.display(),
1053 coverage_entries = cov_map.len(),
1054 files_matched = matched,
1055 files_unmatched = unmatched,
1056 "coverage attached"
1057 );
1058 if unmatched > 0 && matched == 0 {
1059 tracing::warn!(
1060 path = %cov_path.display(),
1061 "coverage file loaded but no source files could be matched — check that paths in the coverage report match the scanned directory"
1062 );
1063 }
1064 }
1065 Err(e) => {
1066 tracing::warn!(path = %cov_path.display(), error = %e, "coverage file could not be read");
1067 warnings.push(format!(
1068 "coverage file '{}' could not be read: {e}",
1069 cov_path.display()
1070 ));
1071 }
1072 }
1073}
1074
1075fn push_record(
1076 record: FileRecord,
1077 analyzed: &mut Vec<FileRecord>,
1078 skipped: &mut Vec<FileRecord>,
1079 warnings: &mut Vec<String>,
1080) {
1081 warnings.extend(
1082 record
1083 .warnings
1084 .iter()
1085 .map(|warning| format!("{}: {warning}", record.relative_path)),
1086 );
1087
1088 match record.status {
1089 FileStatus::AnalyzedExact | FileStatus::AnalyzedBestEffort => analyzed.push(record),
1090 _ => skipped.push(record),
1091 }
1092}
1093
1094#[inline]
1096fn skip_with_reason(
1097 path: &Path,
1098 root: &Path,
1099 size: u64,
1100 reason: impl Into<String>,
1101) -> MetadataPolicyOutcome {
1102 MetadataPolicyOutcome::Skip(Box::new(skipped_record(
1103 path,
1104 root,
1105 size,
1106 FileStatus::SkippedByPolicy,
1107 vec![reason.into()],
1108 )))
1109}
1110
1111#[allow(clippy::too_many_arguments)]
1115fn check_metadata_policy(
1116 path: &Path,
1117 root: &Path,
1118 relative_path: &str,
1119 metadata: &fs::Metadata,
1120 config: &AppConfig,
1121 include_globs: Option<&GlobSet>,
1122 exclude_globs: Option<&GlobSet>,
1123) -> MetadataPolicyOutcome {
1124 let size = metadata.len();
1125
1126 if metadata.file_type().is_symlink() && !config.discovery.follow_symlinks {
1127 return skip_with_reason(path, root, size, "symlink skipped by policy");
1128 }
1129 if file_name_eq(path, ".gitignore") {
1130 return skip_with_reason(path, root, size, ".gitignore is always excluded");
1131 }
1132 if is_excluded_dir_path(path, &config.discovery.excluded_directories) {
1133 return skip_with_reason(path, root, size, "path matched excluded directory setting");
1134 }
1135 if size > config.discovery.max_file_size_bytes {
1136 return skip_with_reason(
1137 path,
1138 root,
1139 size,
1140 format!(
1141 "file exceeded max_file_size_bytes ({})",
1142 config.discovery.max_file_size_bytes
1143 ),
1144 );
1145 }
1146 if let Some(globs) = include_globs {
1147 if !globs.is_match(Path::new(relative_path)) && !globs.is_match(path) {
1148 return MetadataPolicyOutcome::Exclude;
1149 }
1150 }
1151 if let Some(globs) = exclude_globs {
1152 if globs.is_match(Path::new(relative_path)) || globs.is_match(path) {
1153 return skip_with_reason(path, root, size, "path matched exclude glob");
1154 }
1155 }
1156 if is_known_lockfile(path) && !config.analysis.include_lockfiles {
1157 return skip_with_reason(path, root, size, "lockfile skipped by default policy");
1158 }
1159
1160 MetadataPolicyOutcome::Continue
1161}
1162
1163struct ContentPolicyResult {
1164 vendor: bool,
1165 generated: bool,
1166 minified: bool,
1167 skip_record: Option<FileRecord>,
1168}
1169
1170fn check_content_policy(
1173 path: &Path,
1174 root: &Path,
1175 size_bytes: u64,
1176 bytes: &[u8],
1177 config: &AppConfig,
1178) -> ContentPolicyResult {
1179 let vendor = is_vendor_path(path);
1180 if vendor && config.analysis.vendor_directory_detection {
1181 return ContentPolicyResult {
1182 vendor,
1183 generated: false,
1184 minified: false,
1185 skip_record: Some(skipped_record(
1186 path,
1187 root,
1188 size_bytes,
1189 FileStatus::SkippedByPolicy,
1190 vec!["vendor file skipped by policy".into()],
1191 )),
1192 };
1193 }
1194
1195 let generated = config.analysis.generated_file_detection && looks_generated(path, bytes);
1196 if generated {
1197 return ContentPolicyResult {
1198 vendor,
1199 generated,
1200 minified: false,
1201 skip_record: Some(skipped_record(
1202 path,
1203 root,
1204 size_bytes,
1205 FileStatus::SkippedByPolicy,
1206 vec!["generated file skipped by policy".into()],
1207 )),
1208 };
1209 }
1210
1211 let minified = config.analysis.minified_file_detection && looks_minified(path, bytes);
1212 if minified {
1213 return ContentPolicyResult {
1214 vendor,
1215 generated,
1216 minified,
1217 skip_record: Some(skipped_record(
1218 path,
1219 root,
1220 size_bytes,
1221 FileStatus::SkippedByPolicy,
1222 vec!["minified file skipped by policy".into()],
1223 )),
1224 };
1225 }
1226
1227 ContentPolicyResult {
1228 vendor,
1229 generated,
1230 minified,
1231 skip_record: None,
1232 }
1233}
1234
1235fn decode_file_contents(
1237 path: &Path,
1238 root: &Path,
1239 size_bytes: u64,
1240 bytes: &[u8],
1241 config: &AppConfig,
1242) -> Result<Option<(String, String, Vec<String>)>> {
1243 if is_binary(bytes) {
1244 return match config.analysis.binary_file_behavior {
1245 BinaryFileBehavior::Skip => Ok(None),
1246 BinaryFileBehavior::Fail => {
1247 anyhow::bail!("binary file encountered: {}", path.display())
1248 }
1249 };
1250 }
1251
1252 match decode_bytes(bytes) {
1253 Ok(result) => Ok(Some(result)),
1254 Err(err) => match config.analysis.decode_failure_behavior {
1255 FailureBehavior::WarnSkip => {
1256 let _ = (path, root, size_bytes); Err(anyhow::anyhow!("__decode_warn__: {err}"))
1261 }
1262 FailureBehavior::Fail => {
1263 anyhow::bail!("decode failure for {}: {err}", path.display())
1264 }
1265 },
1266 }
1267}
1268
1269#[allow(clippy::too_many_lines)]
1270fn analyze_candidate_file(
1271 path: &Path,
1272 root: &Path,
1273 config: &AppConfig,
1274 include_globs: Option<&GlobSet>,
1275 exclude_globs: Option<&GlobSet>,
1276 enabled_languages: Option<&BTreeSet<Language>>,
1277) -> Result<Option<FileRecord>> {
1278 let metadata = match fs::symlink_metadata(path) {
1279 Ok(metadata) => metadata,
1280 Err(err) => {
1281 return Ok(Some(skipped_record(
1282 path,
1283 root,
1284 0,
1285 FileStatus::ErrorInternal,
1286 vec![format!("failed to read metadata: {err}")],
1287 )));
1288 }
1289 };
1290
1291 let relative_path = relative_path_string(path, root);
1292
1293 match check_metadata_policy(
1295 path,
1296 root,
1297 &relative_path,
1298 &metadata,
1299 config,
1300 include_globs,
1301 exclude_globs,
1302 ) {
1303 MetadataPolicyOutcome::Skip(record) => return Ok(Some(*record)),
1304 MetadataPolicyOutcome::Exclude => return Ok(None),
1305 MetadataPolicyOutcome::Continue => {}
1306 }
1307
1308 let bytes = match fs::read(path) {
1309 Ok(bytes) => bytes,
1310 Err(err) => {
1311 return Ok(Some(skipped_record(
1312 path,
1313 root,
1314 metadata.len(),
1315 FileStatus::ErrorInternal,
1316 vec![format!("failed to read file: {err}")],
1317 )));
1318 }
1319 };
1320
1321 let content_policy = check_content_policy(path, root, metadata.len(), &bytes, config);
1323 if let Some(record) = content_policy.skip_record {
1324 return Ok(Some(record));
1325 }
1326 let (vendor, generated, minified) = (
1327 content_policy.vendor,
1328 content_policy.generated,
1329 content_policy.minified,
1330 );
1331
1332 let (text, encoding, decode_warnings) =
1334 match decode_file_contents(path, root, metadata.len(), &bytes, config) {
1335 Ok(Some(result)) => result,
1336 Ok(None) => {
1337 return Ok(Some(skipped_record(
1338 path,
1339 root,
1340 metadata.len(),
1341 FileStatus::SkippedBinary,
1342 vec!["binary file skipped by default".into()],
1343 )));
1344 }
1345 Err(err) => {
1346 let msg = err.to_string();
1347 if let Some(warn_msg) = msg.strip_prefix("__decode_warn__: ") {
1348 return Ok(Some(skipped_record(
1349 path,
1350 root,
1351 metadata.len(),
1352 FileStatus::SkippedDecodeError,
1353 vec![warn_msg.to_string()],
1354 )));
1355 }
1356 return Err(err);
1357 }
1358 };
1359
1360 let first_line = text.lines().next();
1361 let language = detect_language(
1362 path,
1363 first_line,
1364 &config.analysis.extension_overrides,
1365 config.analysis.shebang_detection,
1366 );
1367
1368 let Some(language) = language else {
1369 return Ok(Some(skipped_record(
1370 path,
1371 root,
1372 metadata.len(),
1373 FileStatus::SkippedUnsupported,
1374 vec!["unsupported or undetected language".into()],
1375 )));
1376 };
1377
1378 if let Some(enabled) = enabled_languages {
1379 if !enabled.contains(&language) {
1380 return Ok(Some(skipped_record(
1381 path,
1382 root,
1383 metadata.len(),
1384 FileStatus::SkippedByPolicy,
1385 vec![format!(
1386 "language {} disabled by configuration",
1387 language.display_name()
1388 )],
1389 )));
1390 }
1391 }
1392
1393 let style_scope = match config.analysis.style_lang_scope.as_str() {
1394 "c_family" => StyleLangScope::CFamilyOnly,
1395 _ => StyleLangScope::All,
1396 };
1397 let ieee_opts = AnalysisOptions {
1398 blank_in_block_comment_as_comment: config.analysis.blank_in_block_comment_policy
1399 == BlankInBlockCommentPolicy::CountAsComment,
1400 collapse_continuation_lines: config.analysis.continuation_line_policy
1401 == ContinuationLinePolicy::CollapseToLogical,
1402 enable_style: config.analysis.style_analysis_enabled,
1403 style_lang_scope: style_scope,
1404 };
1405 let analysis = analyze_text(language, &text, ieee_opts);
1406 let effective_counts = compute_effective_counts(
1407 &analysis.raw,
1408 config.analysis.mixed_line_policy,
1409 config.analysis.python_docstrings_as_comments,
1410 config.analysis.count_compiler_directives,
1411 );
1412
1413 let mut warnings = decode_warnings;
1414 warnings.extend(analysis.warnings.clone());
1415
1416 Ok(Some(FileRecord {
1417 path: path_to_string(path),
1418 relative_path,
1419 language: Some(language),
1420 size_bytes: metadata.len(),
1421 detected_encoding: Some(encoding),
1422 raw_line_categories: analysis.raw,
1423 effective_counts,
1424 status: match analysis.parse_mode {
1425 ParseMode::Lexical | ParseMode::TreeSitter => FileStatus::AnalyzedExact,
1426 ParseMode::LexicalBestEffort => FileStatus::AnalyzedBestEffort,
1427 },
1428 warnings,
1429 generated,
1430 minified,
1431 vendor,
1432 parse_mode: Some(analysis.parse_mode),
1433 submodule: None,
1434 coverage: None,
1435 style_analysis: analysis.style_analysis,
1436 }))
1437}
1438
1439const fn compute_effective_counts(
1440 raw: &RawLineCounts,
1441 mixed_line_policy: MixedLinePolicy,
1442 python_docstrings_as_comments: bool,
1443 count_compiler_directives: bool,
1444) -> EffectiveCounts {
1445 let mut effective = EffectiveCounts {
1446 code_lines: raw.code_only_lines,
1447 comment_lines: raw.single_comment_only_lines + raw.multi_comment_only_lines,
1448 blank_lines: raw.blank_only_lines,
1449 mixed_lines_separate: 0,
1450 };
1451
1452 if python_docstrings_as_comments {
1453 effective.comment_lines += raw.docstring_comment_lines;
1454 } else {
1455 effective.code_lines += raw.docstring_comment_lines;
1456 }
1457
1458 let mixed_total = raw.mixed_code_single_comment_lines + raw.mixed_code_multi_comment_lines;
1459 match mixed_line_policy {
1460 MixedLinePolicy::CodeOnly => effective.code_lines += mixed_total,
1461 MixedLinePolicy::CodeAndComment => {
1462 effective.code_lines += mixed_total;
1463 effective.comment_lines += mixed_total;
1464 }
1465 MixedLinePolicy::CommentOnly => effective.comment_lines += mixed_total,
1466 MixedLinePolicy::SeparateMixedCategory => effective.mixed_lines_separate += mixed_total,
1467 }
1468
1469 if !count_compiler_directives {
1472 effective.code_lines = effective
1473 .code_lines
1474 .saturating_sub(raw.compiler_directive_lines);
1475 }
1476
1477 effective
1478}
1479
1480fn build_summary(analyzed: &[FileRecord], skipped: &[FileRecord]) -> SummaryTotals {
1481 let mut summary = SummaryTotals {
1482 files_considered: (analyzed.len() + skipped.len()) as u64,
1483 files_analyzed: analyzed.len() as u64,
1484 files_skipped: skipped.len() as u64,
1485 ..Default::default()
1486 };
1487
1488 for record in analyzed {
1489 summary.total_physical_lines += record.raw_line_categories.total_physical_lines;
1490 summary.code_lines += record.effective_counts.code_lines;
1491 summary.comment_lines += record.effective_counts.comment_lines;
1492 summary.blank_lines += record.effective_counts.blank_lines;
1493 summary.mixed_lines_separate += record.effective_counts.mixed_lines_separate;
1494 summary.functions += record.raw_line_categories.functions;
1495 summary.classes += record.raw_line_categories.classes;
1496 summary.variables += record.raw_line_categories.variables;
1497 summary.imports += record.raw_line_categories.imports;
1498 summary.test_count += record.raw_line_categories.test_count;
1499 summary.test_assertion_count += record.raw_line_categories.test_assertion_count;
1500 summary.test_suite_count += record.raw_line_categories.test_suite_count;
1501 if let Some(cov) = &record.coverage {
1502 summary.coverage_lines_found += u64::from(cov.lines_found);
1503 summary.coverage_lines_hit += u64::from(cov.lines_hit);
1504 summary.coverage_functions_found += u64::from(cov.functions_found);
1505 summary.coverage_functions_hit += u64::from(cov.functions_hit);
1506 summary.coverage_branches_found += u64::from(cov.branches_found);
1507 summary.coverage_branches_hit += u64::from(cov.branches_hit);
1508 }
1509 }
1510
1511 summary
1512}
1513
1514const fn zeroed_summary(language: Language) -> LanguageSummary {
1516 LanguageSummary {
1517 language,
1518 files: 0,
1519 total_physical_lines: 0,
1520 code_lines: 0,
1521 comment_lines: 0,
1522 blank_lines: 0,
1523 mixed_lines_separate: 0,
1524 functions: 0,
1525 classes: 0,
1526 variables: 0,
1527 imports: 0,
1528 test_count: 0,
1529 test_assertion_count: 0,
1530 test_suite_count: 0,
1531 coverage_lines_found: 0,
1532 coverage_lines_hit: 0,
1533 coverage_functions_found: 0,
1534 coverage_functions_hit: 0,
1535 coverage_branches_found: 0,
1536 coverage_branches_hit: 0,
1537 }
1538}
1539
1540fn accumulate_record_into_summary(entry: &mut LanguageSummary, record: &FileRecord) {
1542 entry.files += 1;
1543 let r = &record.raw_line_categories;
1544 entry.total_physical_lines += r.total_physical_lines;
1545 entry.code_lines += record.effective_counts.code_lines;
1546 entry.comment_lines += record.effective_counts.comment_lines;
1547 entry.blank_lines += record.effective_counts.blank_lines;
1548 entry.mixed_lines_separate += record.effective_counts.mixed_lines_separate;
1549 entry.functions += r.functions;
1550 entry.classes += r.classes;
1551 entry.variables += r.variables;
1552 entry.imports += r.imports;
1553 entry.test_count += r.test_count;
1554 entry.test_assertion_count += r.test_assertion_count;
1555 entry.test_suite_count += r.test_suite_count;
1556 if let Some(cov) = &record.coverage {
1557 entry.coverage_lines_found += u64::from(cov.lines_found);
1558 entry.coverage_lines_hit += u64::from(cov.lines_hit);
1559 entry.coverage_functions_found += u64::from(cov.functions_found);
1560 entry.coverage_functions_hit += u64::from(cov.functions_hit);
1561 entry.coverage_branches_found += u64::from(cov.branches_found);
1562 entry.coverage_branches_hit += u64::from(cov.branches_hit);
1563 }
1564}
1565
1566fn build_language_summaries(analyzed: &[FileRecord]) -> Vec<LanguageSummary> {
1567 let mut by_language: BTreeMap<Language, LanguageSummary> = BTreeMap::new();
1568 for record in analyzed {
1569 let Some(language) = record.language else {
1570 continue;
1571 };
1572 let entry = by_language
1573 .entry(language)
1574 .or_insert_with(|| zeroed_summary(language));
1575 accumulate_record_into_summary(entry, record);
1576 }
1577 by_language.into_values().collect()
1578}
1579
1580fn skipped_record(
1581 path: &Path,
1582 root: &Path,
1583 size_bytes: u64,
1584 status: FileStatus,
1585 warnings: Vec<String>,
1586) -> FileRecord {
1587 FileRecord {
1588 path: path_to_string(path),
1589 relative_path: relative_path_string(path, root),
1590 language: None,
1591 size_bytes,
1592 detected_encoding: None,
1593 raw_line_categories: RawLineCounts::default(),
1594 effective_counts: EffectiveCounts::default(),
1595 status,
1596 warnings,
1597 generated: false,
1598 minified: false,
1599 vendor: false,
1600 parse_mode: None,
1601 submodule: None,
1602 coverage: None,
1603 style_analysis: None,
1604 }
1605}
1606
1607fn relative_path_string(path: &Path, root: &Path) -> String {
1608 path.strip_prefix(root)
1609 .unwrap_or(path)
1610 .to_string_lossy()
1611 .replace('\\', "/")
1612}
1613
1614fn path_to_string(path: &Path) -> String {
1615 path.to_string_lossy().replace('\\', "/")
1616}
1617
1618#[must_use]
1620pub fn detect_submodules(root: &Path) -> Vec<(String, PathBuf)> {
1621 let gitmodules = root.join(".gitmodules");
1622 if !gitmodules.is_file() {
1623 return Vec::new();
1624 }
1625 let Ok(content) = fs::read_to_string(&gitmodules) else {
1626 return Vec::new();
1627 };
1628
1629 let mut result = Vec::new();
1630 let mut current_name: Option<String> = None;
1631 let mut current_path: Option<PathBuf> = None;
1632
1633 for line in content.lines() {
1634 let trimmed = line.trim();
1635 if trimmed.starts_with("[submodule \"") && trimmed.ends_with("\"]") {
1636 if let (Some(name), Some(path)) = (current_name.take(), current_path.take()) {
1637 result.push((name, path));
1638 }
1639 let name = trimmed["[submodule \"".len()..trimmed.len() - 2].to_string();
1640 current_name = Some(name);
1641 } else if let Some(rest) = trimmed.strip_prefix("path") {
1642 if let Some(eq_pos) = rest.find('=') {
1643 let path_str = rest[eq_pos + 1..].trim();
1644 current_path = Some(PathBuf::from(path_str));
1645 }
1646 }
1647 }
1648 if let (Some(name), Some(path)) = (current_name, current_path) {
1649 result.push((name, path));
1650 }
1651
1652 result
1653}
1654
1655fn build_submodule_summaries(
1656 analyzed: &[FileRecord],
1657 submodules: &[(String, PathBuf)],
1658) -> Vec<SubmoduleSummary> {
1659 submodules
1660 .iter()
1661 .map(|(name, path)| {
1662 let files: Vec<&FileRecord> = analyzed
1663 .iter()
1664 .filter(|f| f.submodule.as_deref() == Some(name.as_str()))
1665 .collect();
1666
1667 let files_analyzed = files.len() as u64;
1668 let total_physical_lines = files
1669 .iter()
1670 .map(|f| f.raw_line_categories.total_physical_lines)
1671 .sum();
1672 let code_lines = files.iter().map(|f| f.effective_counts.code_lines).sum();
1673 let comment_lines = files.iter().map(|f| f.effective_counts.comment_lines).sum();
1674 let blank_lines = files.iter().map(|f| f.effective_counts.blank_lines).sum();
1675 let language_summaries = build_language_summaries_from_slice(&files);
1676
1677 SubmoduleSummary {
1678 name: name.clone(),
1679 relative_path: path.to_string_lossy().replace('\\', "/"),
1680 files_analyzed,
1681 total_physical_lines,
1682 code_lines,
1683 comment_lines,
1684 blank_lines,
1685 language_summaries,
1686 }
1687 })
1688 .filter(|s| s.files_analyzed > 0)
1689 .collect()
1690}
1691
1692#[allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
1694fn dominant_indent_label(files: &[&StyleAnalysis]) -> String {
1695 let mut votes = [0u32; 6];
1696 for f in files {
1697 let idx = match f.indent_style {
1698 IndentStyle::Tabs => 0,
1699 IndentStyle::Spaces2 => 1,
1700 IndentStyle::Spaces4 => 2,
1701 IndentStyle::Spaces8 => 3,
1702 IndentStyle::Mixed => 4,
1703 IndentStyle::Unknown => 5,
1704 };
1705 votes[idx] += 1;
1706 }
1707 let labels = ["Tabs", "2-Space", "4-Space", "8-Space", "Mixed", "\u{2014}"];
1708 labels[votes
1709 .iter()
1710 .enumerate()
1711 .max_by_key(|(_, v)| *v)
1712 .map(|(i, _)| i)
1713 .unwrap_or(5)]
1714 .to_string()
1715}
1716
1717#[allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
1719fn line80_pct(files: &[&StyleAnalysis]) -> u8 {
1720 if files.is_empty() {
1721 return 0;
1722 }
1723 let compliant = files
1724 .iter()
1725 .filter(|f| f.total_lines == 0 || (f.lines_over_80 as f32 / f.total_lines as f32) <= 0.05)
1726 .count() as u32;
1727 ((compliant * 100) / files.len() as u32) as u8
1728}
1729
1730#[allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
1733fn line_col_pct(files: &[&StyleAnalysis], threshold: u16) -> u8 {
1734 if files.is_empty() {
1735 return 0;
1736 }
1737 let compliant = files
1738 .iter()
1739 .filter(|f| {
1740 let over = if threshold <= 80 {
1741 f.lines_over_80
1742 } else if threshold <= 100 {
1743 f.lines_over_100
1744 } else {
1745 f.lines_over_120
1746 };
1747 f.total_lines == 0 || (over as f32 / f.total_lines as f32) <= 0.05
1748 })
1749 .count() as u32;
1750 ((compliant * 100) / files.len() as u32) as u8
1751}
1752
1753#[allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
1755fn build_language_group(
1756 family: &str,
1757 files: &[&StyleAnalysis],
1758 col_threshold: u16,
1759) -> LanguageStyleGroup {
1760 let count = files.len() as u32;
1761
1762 let mut all_names: Vec<String> = Vec::new();
1764 for f in files {
1765 for g in &f.guide_scores {
1766 if !all_names.contains(&g.name) {
1767 all_names.push(g.name.clone());
1768 }
1769 }
1770 }
1771
1772 let mut guide_avg_scores: Vec<(String, u8)> = all_names
1773 .into_iter()
1774 .map(|name| {
1775 let sum: u32 = files
1776 .iter()
1777 .filter_map(|f| f.guide_scores.iter().find(|g| g.name == name))
1778 .map(|g| u32::from(g.score_pct))
1779 .sum();
1780 let avg = (sum / count) as u8;
1781 (name, avg)
1782 })
1783 .collect();
1784 guide_avg_scores.sort_by_key(|s| std::cmp::Reverse(s.1));
1785
1786 let (dominant_guide, dominant_score_pct) = guide_avg_scores
1787 .first()
1788 .map(|(n, s)| (n.clone(), *s))
1789 .unwrap_or_default();
1790
1791 let lcp = line_col_pct(files, col_threshold);
1792 LanguageStyleGroup {
1793 language_family: family.to_string(),
1794 files_count: count,
1795 dominant_guide,
1796 dominant_score_pct,
1797 common_indent_style: dominant_indent_label(files),
1798 guide_avg_scores,
1799 line80_compliant_pct: line80_pct(files),
1800 line_col_compliant_pct: lcp,
1801 }
1802}
1803
1804#[allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
1807fn build_style_summary(analyzed: &[FileRecord], col_threshold: u16) -> Option<StyleSummary> {
1808 let all_style: Vec<&StyleAnalysis> = analyzed
1809 .iter()
1810 .filter_map(|f| f.style_analysis.as_ref())
1811 .collect();
1812
1813 if all_style.is_empty() {
1814 return None;
1815 }
1816
1817 let mut families: std::collections::BTreeMap<&str, Vec<&StyleAnalysis>> =
1819 std::collections::BTreeMap::new();
1820 for sa in &all_style {
1821 families
1822 .entry(sa.language_family.as_str())
1823 .or_default()
1824 .push(sa);
1825 }
1826
1827 let mut by_language: Vec<LanguageStyleGroup> = families
1828 .iter()
1829 .map(|(family, files)| build_language_group(family, files, col_threshold))
1830 .collect();
1831 by_language.sort_by_key(|g| std::cmp::Reverse(g.files_count));
1832
1833 let files_analyzed = all_style.len() as u32;
1834 let common_indent_style = dominant_indent_label(&all_style);
1835 let line80_compliant_pct = line80_pct(&all_style);
1836 let line_col_compliant_pct = line_col_pct(&all_style, col_threshold);
1837
1838 Some(StyleSummary {
1839 files_analyzed,
1840 common_indent_style,
1841 line80_compliant_pct,
1842 line_col_compliant_pct,
1843 col_threshold,
1844 by_language,
1845 })
1846}
1847
1848fn build_language_summaries_from_slice(files: &[&FileRecord]) -> Vec<LanguageSummary> {
1849 let mut map: BTreeMap<String, LanguageSummary> = BTreeMap::new();
1850 for file in files {
1851 let Some(lang) = file.language else { continue };
1852 let entry = map
1853 .entry(lang.display_name().to_string())
1854 .or_insert_with(|| zeroed_summary(lang));
1855 accumulate_record_into_summary(entry, file);
1856 }
1857 map.into_values().collect()
1858}
1859
1860fn file_name_eq(path: &Path, expected: &str) -> bool {
1861 path.file_name()
1862 .and_then(|name| name.to_str())
1863 .is_some_and(|name| name == expected)
1864}
1865
1866fn is_excluded_dir_path(path: &Path, excluded_dirs: &[String]) -> bool {
1867 path.components().any(|component| {
1868 component
1869 .as_os_str()
1870 .to_str()
1871 .is_some_and(|part| excluded_dirs.iter().any(|excluded| excluded == part))
1872 })
1873}
1874
1875fn is_vendor_path(path: &Path) -> bool {
1876 path.components().any(|component| {
1877 component
1878 .as_os_str()
1879 .to_str()
1880 .is_some_and(|part| matches!(part, "vendor" | "node_modules" | "packages"))
1881 })
1882}
1883
1884fn is_known_lockfile(path: &Path) -> bool {
1885 path.file_name()
1886 .and_then(|name| name.to_str())
1887 .is_some_and(|name| {
1888 matches!(
1889 name,
1890 "Cargo.lock"
1891 | "package-lock.json"
1892 | "yarn.lock"
1893 | "pnpm-lock.yaml"
1894 | "Pipfile.lock"
1895 | "poetry.lock"
1896 | "composer.lock"
1897 )
1898 })
1899}
1900
1901fn looks_generated(path: &Path, bytes: &[u8]) -> bool {
1902 let file_name = path
1903 .file_name()
1904 .and_then(|name| name.to_str())
1905 .unwrap_or_default();
1906 if file_name.contains(".generated.") || file_name.contains(".g.") {
1907 return true;
1908 }
1909
1910 let sample = String::from_utf8_lossy(&bytes[..bytes.len().min(GENERATED_SAMPLE_BYTES)])
1911 .to_ascii_lowercase();
1912 sample.contains("@generated") || sample.contains("generated by")
1913}
1914
1915fn looks_minified(path: &Path, bytes: &[u8]) -> bool {
1916 let file_name = path
1917 .file_name()
1918 .and_then(|name| name.to_str())
1919 .unwrap_or_default();
1920 if file_name.contains(".min.") {
1921 return true;
1922 }
1923
1924 let sample = String::from_utf8_lossy(&bytes[..bytes.len().min(MINIFIED_SAMPLE_BYTES)]);
1925 let longest_line = sample.lines().map(str::len).max().unwrap_or(0);
1926 let whitespace = sample.chars().filter(|c| c.is_whitespace()).count();
1927 longest_line > MINIFIED_LINE_THRESHOLD && whitespace * 100 < sample.len().max(1)
1928}
1929
1930fn is_binary(bytes: &[u8]) -> bool {
1931 if bytes.starts_with(&[0xEF, 0xBB, 0xBF])
1932 || bytes.starts_with(&[0xFF, 0xFE])
1933 || bytes.starts_with(&[0xFE, 0xFF])
1934 {
1935 return false;
1936 }
1937
1938 let sample = &bytes[..bytes.len().min(BINARY_SAMPLE_BYTES)];
1939 sample.contains(&0)
1940}
1941
1942fn decode_utf16_bom(
1945 bom_stripped: &[u8],
1946 encoding: &'static encoding_rs::Encoding,
1947 label: &str,
1948) -> (String, String, Vec<String>) {
1949 let (cow, _, had_errors) = encoding.decode(bom_stripped);
1950 let mut warnings = Vec::new();
1951 if had_errors {
1952 warnings.push(format!("{label} decode contained replacement characters"));
1953 }
1954 (cow.into_owned(), label.into(), warnings)
1955}
1956
1957fn decode_bytes(bytes: &[u8]) -> std::result::Result<(String, String, Vec<String>), String> {
1958 if bytes.starts_with(&[0xEF, 0xBB, 0xBF]) {
1959 let text = String::from_utf8(bytes[3..].to_vec()).map_err(|err| err.to_string())?;
1960 return Ok((text, "utf-8-bom".into(), vec![]));
1961 }
1962 if bytes.starts_with(&[0xFF, 0xFE]) {
1963 return Ok(decode_utf16_bom(&bytes[2..], UTF_16LE, "utf-16le"));
1964 }
1965 if bytes.starts_with(&[0xFE, 0xFF]) {
1966 return Ok(decode_utf16_bom(&bytes[2..], UTF_16BE, "utf-16be"));
1967 }
1968
1969 #[allow(clippy::option_if_let_else)]
1971 if let Ok(text) = String::from_utf8(bytes.to_vec()) {
1972 Ok((text, "utf-8".into(), vec![]))
1973 } else {
1974 let (cow, _, had_errors) = WINDOWS_1252.decode(bytes);
1975 let mut warnings = vec!["decoded using windows-1252 fallback".into()];
1976 if had_errors {
1977 warnings.push("fallback decode contained replacement characters".into());
1978 }
1979 Ok((cow.into_owned(), "windows-1252".into(), warnings))
1980 }
1981}
1982
1983fn compile_globset(patterns: &[String]) -> Result<Option<GlobSet>> {
1984 if patterns.is_empty() {
1985 return Ok(None);
1986 }
1987
1988 let mut builder = GlobSetBuilder::new();
1989 for pattern in patterns {
1990 builder
1991 .add(Glob::new(pattern).with_context(|| format!("invalid glob pattern: {pattern}"))?);
1992 }
1993 Ok(Some(
1994 builder.build().context("failed to compile glob filters")?,
1995 ))
1996}
1997
1998fn parse_enabled_languages(enabled: &[String]) -> Result<Option<BTreeSet<Language>>> {
1999 if enabled.is_empty() {
2000 return Ok(None);
2001 }
2002
2003 let supported = supported_languages();
2004 let mut set = BTreeSet::new();
2005 for name in enabled {
2006 let language = Language::from_name(name)
2007 .with_context(|| format!("unsupported language in config: {name}"))?;
2008 if !supported.contains(&language) {
2009 anyhow::bail!("language {name} is not supported in this build");
2010 }
2011 set.insert(language);
2012 }
2013 Ok(Some(set))
2014}
2015
2016pub fn write_json(run: &AnalysisRun, output_path: &Path) -> Result<()> {
2020 let json = serde_json::to_string_pretty(run).context("failed to serialize analysis run")?;
2021 fs::write(output_path, json)
2022 .with_context(|| format!("failed to write JSON output to {}", output_path.display()))
2023}
2024
2025pub fn read_json(path: &Path) -> Result<AnalysisRun> {
2029 let contents = fs::read_to_string(path)
2030 .with_context(|| format!("failed to read result file {}", path.display()))?;
2031 serde_json::from_str(&contents)
2032 .with_context(|| format!("failed to parse JSON result {}", path.display()))
2033}
2034
2035#[cfg(test)]
2036mod tests {
2037 use super::*;
2038
2039 #[test]
2040 fn effective_counts_respect_code_only_policy() {
2041 let raw = RawLineCounts {
2042 code_only_lines: 2,
2043 single_comment_only_lines: 1,
2044 mixed_code_single_comment_lines: 3,
2045 docstring_comment_lines: 2,
2046 ..RawLineCounts::default()
2047 };
2048 let counts = compute_effective_counts(&raw, MixedLinePolicy::CodeOnly, true, true);
2049 assert_eq!(counts.code_lines, 5);
2050 assert_eq!(counts.comment_lines, 3);
2051 }
2052
2053 #[test]
2054 fn effective_counts_can_separate_mixed() {
2055 let raw = RawLineCounts {
2056 mixed_code_single_comment_lines: 2,
2057 mixed_code_multi_comment_lines: 1,
2058 ..RawLineCounts::default()
2059 };
2060 let counts =
2061 compute_effective_counts(&raw, MixedLinePolicy::SeparateMixedCategory, true, true);
2062 assert_eq!(counts.mixed_lines_separate, 3);
2063 assert_eq!(counts.code_lines, 0);
2064 assert_eq!(counts.comment_lines, 0);
2065 }
2066
2067 #[test]
2068 fn windows_1252_fallback_decodes() {
2069 let bytes = vec![0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x96, 0x57];
2070 let (text, encoding, warnings) = decode_bytes(&bytes).unwrap();
2071 assert_eq!(encoding, "windows-1252");
2072 assert!(text.contains('–'));
2073 assert!(!warnings.is_empty());
2074 }
2075
2076 #[test]
2079 fn is_binary_detects_null_byte() {
2080 let bytes = b"hello\x00world";
2081 assert!(is_binary(bytes));
2082 }
2083
2084 #[test]
2085 fn is_binary_clean_text_is_not_binary() {
2086 let bytes = b"fn main() { println!(\"hello\"); }";
2087 assert!(!is_binary(bytes));
2088 }
2089
2090 #[test]
2091 fn is_binary_utf8_bom_not_binary() {
2092 let bytes = b"\xef\xbb\xbffn main() {}";
2093 assert!(!is_binary(bytes));
2094 }
2095
2096 #[test]
2097 fn looks_generated_at_generated_marker() {
2098 let bytes = b"// @generated by protoc-gen-rust\nfn foo() {}";
2099 assert!(looks_generated(Path::new("foo.rs"), bytes));
2100 }
2101
2102 #[test]
2103 fn looks_generated_do_not_edit_marker() {
2104 let bytes = b"// Code generated by build.rs. DO NOT EDIT.\nuse foo;";
2106 assert!(looks_generated(Path::new("foo.rs"), bytes));
2107 let bytes2 = b"// @generated\nuse foo;";
2109 assert!(looks_generated(Path::new("foo.rs"), bytes2));
2110 }
2111
2112 #[test]
2113 fn looks_generated_normal_file_not_generated() {
2114 let bytes = b"fn main() {\n println!(\"hello\");\n}\n";
2115 assert!(!looks_generated(Path::new("main.rs"), bytes));
2116 }
2117
2118 #[test]
2119 fn looks_minified_dot_min_filename() {
2120 let bytes = b"function a(){return 1}";
2121 assert!(looks_minified(Path::new("bundle.min.js"), bytes));
2122 }
2123
2124 #[test]
2125 fn looks_minified_normal_file_not_minified() {
2126 let bytes = b"function hello() {\n return 1;\n}\n";
2127 assert!(!looks_minified(Path::new("app.js"), bytes));
2128 }
2129
2130 #[test]
2131 fn looks_minified_very_long_line() {
2132 let long_line: Vec<u8> = b"x".repeat(MINIFIED_LINE_THRESHOLD + 1);
2133 assert!(looks_minified(Path::new("app.js"), &long_line));
2134 }
2135
2136 #[test]
2137 fn is_known_lockfile_cargo_lock() {
2138 assert!(is_known_lockfile(Path::new("Cargo.lock")));
2139 }
2140
2141 #[test]
2142 fn is_known_lockfile_package_lock_json() {
2143 assert!(is_known_lockfile(Path::new("package-lock.json")));
2144 }
2145
2146 #[test]
2147 fn is_known_lockfile_yarn_lock() {
2148 assert!(is_known_lockfile(Path::new("yarn.lock")));
2149 }
2150
2151 #[test]
2152 fn is_known_lockfile_normal_file_is_not_lockfile() {
2153 assert!(!is_known_lockfile(Path::new("src/lib.rs")));
2154 }
2155
2156 #[test]
2157 fn is_vendor_path_node_modules() {
2158 assert!(is_vendor_path(Path::new("node_modules/react/index.js")));
2159 }
2160
2161 #[test]
2162 fn is_vendor_path_vendor_dir() {
2163 assert!(is_vendor_path(Path::new("vendor/anyhow/src/lib.rs")));
2164 }
2165
2166 #[test]
2167 fn is_vendor_path_normal_src_is_not_vendor() {
2168 assert!(!is_vendor_path(Path::new("src/lib.rs")));
2169 }
2170
2171 #[test]
2172 fn is_excluded_dir_path_matches_excluded() {
2173 let excluded = vec![".git".into(), "target".into()];
2174 assert!(is_excluded_dir_path(Path::new(".git/config"), &excluded));
2175 }
2176
2177 #[test]
2178 fn is_excluded_dir_path_non_excluded_is_ok() {
2179 let excluded = vec![".git".into(), "target".into()];
2180 assert!(!is_excluded_dir_path(Path::new("src/main.rs"), &excluded));
2181 }
2182
2183 #[test]
2184 fn decode_bytes_utf8_bom_stripped() {
2185 let bytes = b"\xef\xbb\xbffn main() {}";
2186 let (text, encoding, _) = decode_bytes(bytes).unwrap();
2187 assert!(
2189 encoding.contains("utf-8"),
2190 "should be utf-8 variant, got {encoding}"
2191 );
2192 assert!(text.starts_with("fn"));
2193 }
2194
2195 #[test]
2196 fn decode_bytes_plain_utf8() {
2197 let bytes = b"hello world";
2198 let (text, encoding, warnings) = decode_bytes(bytes).unwrap();
2199 assert_eq!(encoding, "utf-8");
2200 assert_eq!(text, "hello world");
2201 assert!(warnings.is_empty());
2202 }
2203}