1#![allow(clippy::multiple_crate_versions)]
4
5pub mod baseline;
6pub mod coverage;
7pub mod delta;
8pub mod history;
9pub use baseline::{check_against_baseline, resolve_baselines_path, BaselineEntry, BaselineStore};
10pub use coverage::{aggregate_line_coverage, lookup_coverage, parse_lcov, FileCoverage};
11pub use delta::{compute_delta, FileChangeStatus, FileDelta, ScanComparison, SummaryDelta};
12pub use history::{RegistryEntry, ScanRegistry, ScanSummarySnapshot, WatchedDirsStore};
13
14use std::collections::{BTreeMap, BTreeSet, HashSet};
15use std::fs;
16use std::path::{Path, PathBuf};
17use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
18use std::sync::Arc;
19
20use anyhow::{Context, Result};
21use chrono::{DateTime, Utc};
22use encoding_rs::{UTF_16BE, UTF_16LE, WINDOWS_1252};
23use globset::{Glob, GlobSet, GlobSetBuilder};
24use ignore::WalkBuilder;
25use serde::{Deserialize, Serialize};
26use uuid::Uuid;
27
28use sloc_config::{
29 AppConfig, BinaryFileBehavior, BlankInBlockCommentPolicy, ContinuationLinePolicy,
30 FailureBehavior, MixedLinePolicy,
31};
32use sloc_languages::{
33 analyze_text, detect_language, supported_languages, AnalysisOptions, Language, ParseMode,
34 RawLineCounts,
35};
36
37const MAX_ANALYSIS_THREADS: usize = 16;
41const DEFAULT_ANALYSIS_THREADS: usize = 4;
43const GENERATED_SAMPLE_BYTES: usize = 1024;
45const MINIFIED_SAMPLE_BYTES: usize = 4096;
47const MINIFIED_LINE_THRESHOLD: usize = 2000;
49const BINARY_SAMPLE_BYTES: usize = 8192;
51
52pub struct ProgressCounters {
54 pub files_done: Arc<AtomicUsize>,
56 pub files_total: Arc<AtomicUsize>,
58}
59
60enum MetadataPolicyOutcome {
62 Skip(Box<FileRecord>),
64 Exclude,
66 Continue,
68}
69
70#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
71#[serde(rename_all = "snake_case")]
72pub enum FileStatus {
73 AnalyzedExact,
74 AnalyzedBestEffort,
75 SkippedBinary,
76 SkippedDecodeError,
77 SkippedUnsupported,
78 SkippedByPolicy,
79 ErrorInternal,
80}
81
82#[derive(Debug, Clone, Serialize, Deserialize, Default)]
83pub struct EffectiveCounts {
84 pub code_lines: u64,
85 pub comment_lines: u64,
86 pub blank_lines: u64,
87 pub mixed_lines_separate: u64,
88}
89
90#[derive(Debug, Clone, Serialize, Deserialize)]
91pub struct ToolMetadata {
92 pub name: String,
93 pub version: String,
94 pub run_id: String,
95 pub timestamp_utc: DateTime<Utc>,
96}
97
98#[derive(Debug, Clone, Serialize, Deserialize)]
99pub struct EnvironmentMetadata {
100 pub operating_system: String,
101 pub architecture: String,
102 pub runtime_mode: String,
103 pub initiator_username: String,
104 pub initiator_hostname: String,
105 #[serde(default, skip_serializing_if = "Option::is_none")]
108 pub ci_name: Option<String>,
109}
110
111#[derive(Debug, Clone, Serialize, Deserialize, Default)]
112pub struct SummaryTotals {
113 pub files_considered: u64,
114 pub files_analyzed: u64,
115 pub files_skipped: u64,
116 pub total_physical_lines: u64,
117 pub code_lines: u64,
118 pub comment_lines: u64,
119 pub blank_lines: u64,
120 pub mixed_lines_separate: u64,
121 #[serde(default)]
122 pub functions: u64,
123 #[serde(default)]
124 pub classes: u64,
125 #[serde(default)]
126 pub variables: u64,
127 #[serde(default)]
128 pub imports: u64,
129 #[serde(default)]
130 pub test_count: u64,
131 #[serde(default)]
133 pub test_assertion_count: u64,
134 #[serde(default)]
136 pub test_suite_count: u64,
137 #[serde(default)]
139 pub coverage_lines_found: u64,
140 #[serde(default)]
141 pub coverage_lines_hit: u64,
142 #[serde(default)]
143 pub coverage_functions_found: u64,
144 #[serde(default)]
145 pub coverage_functions_hit: u64,
146 #[serde(default)]
147 pub coverage_branches_found: u64,
148 #[serde(default)]
149 pub coverage_branches_hit: u64,
150}
151
152#[derive(Debug, Clone, Serialize, Deserialize)]
153pub struct LanguageSummary {
154 pub language: Language,
155 pub files: u64,
156 pub total_physical_lines: u64,
157 pub code_lines: u64,
158 pub comment_lines: u64,
159 pub blank_lines: u64,
160 pub mixed_lines_separate: u64,
161 #[serde(default)]
162 pub functions: u64,
163 #[serde(default)]
164 pub classes: u64,
165 #[serde(default)]
166 pub variables: u64,
167 #[serde(default)]
168 pub imports: u64,
169 #[serde(default)]
170 pub test_count: u64,
171 #[serde(default)]
172 pub test_assertion_count: u64,
173 #[serde(default)]
174 pub test_suite_count: u64,
175 #[serde(default)]
176 pub coverage_lines_found: u64,
177 #[serde(default)]
178 pub coverage_lines_hit: u64,
179 #[serde(default)]
180 pub coverage_functions_found: u64,
181 #[serde(default)]
182 pub coverage_functions_hit: u64,
183 #[serde(default)]
184 pub coverage_branches_found: u64,
185 #[serde(default)]
186 pub coverage_branches_hit: u64,
187}
188
189#[derive(Debug, Clone, Serialize, Deserialize)]
190pub struct FileRecord {
191 pub path: String,
192 pub relative_path: String,
193 pub language: Option<Language>,
194 pub size_bytes: u64,
195 pub detected_encoding: Option<String>,
196 pub raw_line_categories: RawLineCounts,
197 pub effective_counts: EffectiveCounts,
198 pub status: FileStatus,
199 pub warnings: Vec<String>,
200 pub generated: bool,
201 pub minified: bool,
202 pub vendor: bool,
203 pub parse_mode: Option<ParseMode>,
204 #[serde(skip_serializing_if = "Option::is_none")]
205 pub submodule: Option<String>,
206 #[serde(default, skip_serializing_if = "Option::is_none")]
208 pub coverage: Option<FileCoverage>,
209}
210
211#[derive(Debug, Clone, Serialize, Deserialize)]
213pub struct SubmoduleSummary {
214 pub name: String,
215 pub relative_path: String,
216 pub files_analyzed: u64,
217 pub total_physical_lines: u64,
218 pub code_lines: u64,
219 pub comment_lines: u64,
220 pub blank_lines: u64,
221 pub language_summaries: Vec<LanguageSummary>,
222}
223
224#[derive(Debug, Clone, Serialize, Deserialize)]
225pub struct AnalysisRun {
226 pub tool: ToolMetadata,
227 pub environment: EnvironmentMetadata,
228 pub effective_configuration: AppConfig,
229 pub input_roots: Vec<String>,
230 pub summary_totals: SummaryTotals,
231 pub totals_by_language: Vec<LanguageSummary>,
232 pub per_file_records: Vec<FileRecord>,
233 pub skipped_file_records: Vec<FileRecord>,
234 pub warnings: Vec<String>,
235 #[serde(default, skip_serializing_if = "Vec::is_empty")]
237 pub submodule_summaries: Vec<SubmoduleSummary>,
238 #[serde(default, skip_serializing_if = "Option::is_none")]
240 pub git_commit_short: Option<String>,
241 #[serde(default, skip_serializing_if = "Option::is_none")]
243 pub git_commit_long: Option<String>,
244 #[serde(default, skip_serializing_if = "Option::is_none")]
246 pub git_branch: Option<String>,
247 #[serde(default, skip_serializing_if = "Option::is_none")]
249 pub git_commit_author: Option<String>,
250 #[serde(default, skip_serializing_if = "Option::is_none")]
252 pub git_tags: Option<String>,
253 #[serde(default, skip_serializing_if = "Option::is_none")]
255 pub git_nearest_tag: Option<String>,
256 #[serde(default, skip_serializing_if = "Option::is_none")]
258 pub git_commit_date: Option<String>,
259 #[serde(default, skip_serializing_if = "Option::is_none")]
261 pub git_remote_url: Option<String>,
262}
263
264#[derive(Default)]
265struct GitInfo {
266 commit_short: Option<String>,
267 commit_long: Option<String>,
268 branch: Option<String>,
269 author: Option<String>,
270 tags: Option<String>,
271 nearest_tag: Option<String>,
272 commit_date: Option<String>,
273 remote_url: Option<String>,
274}
275
276fn find_git_dir(start: &Path) -> Option<PathBuf> {
280 let mut current = Some(start);
281 while let Some(dir) = current {
282 let candidate = dir.join(".git");
283 if candidate.is_dir() {
284 return Some(candidate);
285 }
286 if candidate.is_file() {
287 if let Some(resolved) = resolve_git_file_pointer(&candidate, dir) {
288 return Some(resolved);
289 }
290 }
291 current = dir.parent();
292 }
293 None
294}
295
296fn resolve_git_file_pointer(file: &Path, base_dir: &Path) -> Option<PathBuf> {
300 let content = fs::read_to_string(file).ok()?;
301 let ptr = content.trim().strip_prefix("gitdir: ")?;
302 let ptr_native = ptr.replace('/', std::path::MAIN_SEPARATOR_STR);
305 let resolved = if Path::new(&ptr_native).is_absolute() {
306 PathBuf::from(&ptr_native)
307 } else {
308 base_dir.join(&ptr_native)
309 };
310 let final_path = resolved.canonicalize().unwrap_or(resolved);
314 if final_path.is_dir() {
315 Some(final_path)
316 } else {
317 None
318 }
319}
320
321fn resolve_ref(git_dir: &Path, refname: &str) -> Option<String> {
324 let ref_path = refname
328 .split('/')
329 .fold(git_dir.to_path_buf(), |p, c| p.join(c));
330 if ref_path.exists() {
331 let sha = fs::read_to_string(&ref_path)
332 .ok()
333 .map(|s| s.trim().to_string())
334 .filter(|s| s.len() >= 40 && s.chars().all(|c| c.is_ascii_hexdigit()));
335 if sha.is_some() {
336 return sha;
337 }
338 }
339 let packed = fs::read_to_string(git_dir.join("packed-refs")).ok()?;
343 for line in packed.lines() {
344 if line.starts_with('#') || line.starts_with('^') {
345 continue;
346 }
347 let mut cols = line.splitn(2, ' ');
348 let sha = cols.next()?;
349 let name = cols.next()?.trim();
350 if name == refname {
351 return Some(sha.to_string());
352 }
353 }
354 None
355}
356
357fn parse_url_line(line: &str) -> Option<&str> {
359 let rest = line.strip_prefix("url")?;
360 let rest = rest.trim_start_matches([' ', '\t']);
361 let url = rest.strip_prefix('=')?.trim();
362 if url.is_empty() {
363 None
364 } else {
365 Some(url)
366 }
367}
368
369fn read_git_remote_url(git_dir: &Path) -> Option<String> {
371 let config = fs::read_to_string(git_dir.join("config")).ok()?;
372 let mut in_origin = false;
373 for line in config.lines() {
374 let trimmed = line.trim();
375 if trimmed.starts_with('[') {
376 in_origin = trimmed == r#"[remote "origin"]"#;
377 } else if in_origin {
378 if let Some(url) = parse_url_line(trimmed) {
379 return Some(url.to_owned());
380 }
381 }
382 }
383 None
384}
385
386fn detect_git_for_run(project_path: &Path) -> GitInfo {
390 let ci_branch = ci_branch_from_env();
392
393 let Some(git_dir) = find_git_dir(project_path) else {
394 return GitInfo {
397 branch: ci_branch,
398 ..GitInfo::default()
399 };
400 };
401
402 let head_raw = match fs::read_to_string(git_dir.join("HEAD")) {
403 Ok(s) => s.trim().to_string(),
404 Err(_) => {
405 return GitInfo {
406 branch: ci_branch,
407 ..GitInfo::default()
408 }
409 }
410 };
411
412 let (branch_from_head, commit_long) = head_raw.strip_prefix("ref: ").map_or_else(
413 || {
414 if head_raw.len() >= 40 && head_raw.chars().all(|c| c.is_ascii_hexdigit()) {
415 (None, Some(head_raw[..40].to_string()))
417 } else {
418 (None, None)
419 }
420 },
421 |refname| {
422 let branch = refname
423 .strip_prefix("refs/heads/")
424 .map(|b| b.trim().to_string());
425 let sha = resolve_ref(&git_dir, refname.trim());
426 (branch, sha)
427 },
428 );
429 let branch = branch_from_head.or(ci_branch);
432
433 let commit_short = commit_long
434 .as_deref()
435 .map(|s| s.chars().take(7).collect::<String>());
436
437 let author = run_git_cmd(project_path, &["log", "-1", "--format=%an", "HEAD"]);
438 let commit_date = run_git_cmd(project_path, &["log", "-1", "--format=%aI", "HEAD"]);
439 let remote_url = read_git_remote_url(&git_dir);
440
441 let tags = run_git_cmd(project_path, &["tag", "--points-at", "HEAD"]).map(|t| {
444 t.lines()
445 .filter(|l| !l.is_empty())
446 .collect::<Vec<_>>()
447 .join(", ")
448 });
449 let nearest_tag = run_git_cmd(project_path, &["describe", "--tags", "--abbrev=0", "HEAD"]);
450
451 GitInfo {
452 commit_short,
453 commit_long,
454 branch,
455 author,
456 tags,
457 nearest_tag,
458 commit_date,
459 remote_url,
460 }
461}
462
463fn run_git_cmd(dir: &Path, args: &[&str]) -> Option<String> {
465 let candidates: &[&str] = &[
469 "git",
471 "/usr/bin/git",
473 "/usr/local/bin/git",
474 "/opt/homebrew/bin/git",
475 r"C:\Program Files\Git\cmd\git.exe",
477 r"C:\Program Files\Git\bin\git.exe",
478 r"C:\Program Files (x86)\Git\cmd\git.exe",
479 ];
480 for &exe in candidates {
481 let result = std::process::Command::new(exe)
482 .args(["-c", "safe.directory=*"])
483 .args(args)
484 .current_dir(dir)
485 .output()
486 .ok()
487 .filter(|o| o.status.success())
488 .and_then(|o| String::from_utf8(o.stdout).ok())
489 .map(|s| s.trim().to_string())
490 .filter(|s| !s.is_empty());
491 if result.is_some() {
492 return result;
493 }
494 }
495 None
496}
497
498fn detect_ci_system() -> Option<&'static str> {
500 let ev = |k: &str| std::env::var(k).is_ok();
501 let ev_true = |k: &str| std::env::var(k).as_deref() == Ok("true");
502 if ev("JENKINS_URL") || ev("JENKINS_HOME") || ev("BUILD_URL") {
503 return Some("Jenkins");
504 }
505 if ev_true("GITHUB_ACTIONS") {
506 return Some("GitHub Actions");
507 }
508 if ev_true("GITLAB_CI") {
509 return Some("GitLab CI");
510 }
511 if ev_true("CIRCLECI") {
512 return Some("CircleCI");
513 }
514 if ev_true("TRAVIS") {
515 return Some("Travis CI");
516 }
517 if ev_true("TF_BUILD") {
518 return Some("Azure DevOps");
519 }
520 if ev("TEAMCITY_VERSION") {
521 return Some("TeamCity");
522 }
523 None
524}
525
526fn ci_branch_from_env() -> Option<String> {
529 const VARS: &[&str] = &[
530 "BRANCH_NAME", "GIT_BRANCH", "GITHUB_REF_NAME", "CI_COMMIT_BRANCH", "CIRCLE_BRANCH", "TRAVIS_BRANCH", "BUILD_SOURCEBRANCH", ];
538 for &var in VARS {
539 if let Ok(val) = std::env::var(var) {
540 let val = val.trim();
541 let val = val
542 .strip_prefix("refs/heads/")
543 .or_else(|| val.strip_prefix("origin/"))
544 .unwrap_or(val);
545 if !val.is_empty() && val != "HEAD" {
546 return Some(val.to_string());
547 }
548 }
549 }
550 None
551}
552
553fn get_current_username() -> String {
554 std::env::var("USERNAME")
555 .or_else(|_| std::env::var("USER"))
556 .unwrap_or_else(|_| "unknown".to_string())
557}
558
559fn non_empty_env(var: &str) -> Option<String> {
560 let v = std::env::var(var).ok()?;
561 if v.is_empty() {
562 None
563 } else {
564 Some(v)
565 }
566}
567
568fn is_jenkins_env() -> bool {
569 std::env::var("JENKINS_URL").is_ok()
570 || std::env::var("JENKINS_HOME").is_ok()
571 || std::env::var("BUILD_URL").is_ok()
572}
573
574fn get_hostname() -> String {
575 if is_jenkins_env() {
578 if let Some(n) = non_empty_env("NODE_NAME") {
579 return n;
580 }
581 }
582 if std::env::var("GITHUB_ACTIONS").as_deref() == Ok("true") {
583 if let Some(r) = non_empty_env("RUNNER_NAME") {
584 return r;
585 }
586 }
587 if std::env::var("GITLAB_CI").as_deref() == Ok("true") {
588 if let Some(r) = non_empty_env("CI_RUNNER_DESCRIPTION") {
589 return r;
590 }
591 }
592 std::env::var("COMPUTERNAME")
593 .or_else(|_| std::env::var("HOSTNAME"))
594 .or_else(|_| std::fs::read_to_string("/etc/hostname").map(|s| s.trim().to_string()))
595 .unwrap_or_else(|_| "unknown".to_string())
596}
597
598#[allow(clippy::too_many_arguments)]
600fn walk_root(
601 root: &Path,
602 config: &AppConfig,
603 include_globs: Option<&GlobSet>,
604 exclude_globs: Option<&GlobSet>,
605 enabled_languages: Option<&BTreeSet<Language>>,
606 seen_paths: &mut HashSet<PathBuf>,
607 analyzed: &mut Vec<FileRecord>,
608 skipped: &mut Vec<FileRecord>,
609 warnings: &mut Vec<String>,
610 cancel: Option<&AtomicBool>,
611 progress: Option<&ProgressCounters>,
612) -> Result<()> {
613 let mut builder = WalkBuilder::new(root);
614 builder
615 .follow_links(config.discovery.follow_symlinks)
616 .hidden(config.discovery.ignore_hidden_files)
617 .ignore(config.discovery.honor_ignore_files)
618 .parents(config.discovery.honor_ignore_files)
619 .git_ignore(config.discovery.honor_ignore_files)
620 .git_global(config.discovery.honor_ignore_files)
621 .git_exclude(config.discovery.honor_ignore_files);
622
623 let paths = collect_walk_paths(&builder, seen_paths, warnings);
624 if paths.is_empty() {
625 return Ok(());
626 }
627
628 if let Some(p) = progress {
629 p.files_total.fetch_add(paths.len(), Ordering::Relaxed);
630 }
631
632 let chunk_results = run_parallel_analysis(
633 &paths,
634 root,
635 config,
636 include_globs,
637 exclude_globs,
638 enabled_languages,
639 cancel,
640 progress,
641 )?;
642 merge_chunk_results(chunk_results, analyzed, skipped, warnings)
643}
644
645fn collect_walk_paths(
646 builder: &WalkBuilder,
647 seen_paths: &mut HashSet<PathBuf>,
648 warnings: &mut Vec<String>,
649) -> Vec<PathBuf> {
650 let (tx, rx) = std::sync::mpsc::channel::<std::result::Result<PathBuf, String>>();
654
655 builder.build_parallel().run(|| {
656 let tx = tx.clone();
657 Box::new(move |entry| {
658 match entry {
659 Err(e) => {
660 let _ = tx.send(Err(format!("discovery warning: {e}")));
661 }
662 Ok(e) => {
663 let path = e.into_path();
664 if !path.is_dir() {
665 let _ = tx.send(Ok(path));
666 }
667 }
668 }
669 ignore::WalkState::Continue
670 })
671 });
672
673 drop(tx);
676
677 rx.into_iter()
678 .filter_map(|msg| match msg {
679 Ok(path) => {
680 if seen_paths.insert(path.clone()) {
681 Some(path)
682 } else {
683 None
684 }
685 }
686 Err(warn) => {
687 warnings.push(warn);
688 None
689 }
690 })
691 .collect()
692}
693
694#[allow(clippy::too_many_arguments)]
696fn worker_loop(
697 paths: &[PathBuf],
698 root: &Path,
699 config: &AppConfig,
700 include_globs: Option<&GlobSet>,
701 exclude_globs: Option<&GlobSet>,
702 enabled_languages: Option<&BTreeSet<Language>>,
703 cancel: Option<&AtomicBool>,
704 next_index: &AtomicUsize,
705 files_done: Option<&AtomicUsize>,
706) -> Vec<Result<Option<FileRecord>>> {
707 let mut results = Vec::new();
708 loop {
709 if cancel.is_some_and(|c| c.load(Ordering::Relaxed)) {
710 results.push(Err(anyhow::anyhow!("analysis cancelled")));
711 break;
712 }
713 let i = next_index.fetch_add(1, Ordering::Relaxed);
714 if i >= paths.len() {
715 break;
716 }
717 results.push(analyze_candidate_file(
718 &paths[i],
719 root,
720 config,
721 include_globs,
722 exclude_globs,
723 enabled_languages,
724 ));
725 if let Some(fd) = files_done {
726 fd.fetch_add(1, Ordering::Relaxed);
727 }
728 }
729 results
730}
731
732#[allow(clippy::too_many_arguments)]
733fn run_parallel_analysis(
734 paths: &[PathBuf],
735 root: &Path,
736 config: &AppConfig,
737 include_globs: Option<&GlobSet>,
738 exclude_globs: Option<&GlobSet>,
739 enabled_languages: Option<&BTreeSet<Language>>,
740 cancel: Option<&AtomicBool>,
741 progress: Option<&ProgressCounters>,
742) -> Result<Vec<Vec<Result<Option<FileRecord>>>>> {
743 let thread_count = std::thread::available_parallelism().map_or(DEFAULT_ANALYSIS_THREADS, |n| {
744 n.get().min(MAX_ANALYSIS_THREADS)
745 });
746 let next_index = AtomicUsize::new(0);
750 let files_done: Option<&AtomicUsize> = progress.map(|p| p.files_done.as_ref());
751
752 std::thread::scope(|s| -> Result<Vec<Vec<Result<Option<FileRecord>>>>> {
753 let mut handles = Vec::with_capacity(thread_count);
756 for _ in 0..thread_count {
757 handles.push(s.spawn(|| {
758 worker_loop(
759 paths,
760 root,
761 config,
762 include_globs,
763 exclude_globs,
764 enabled_languages,
765 cancel,
766 &next_index,
767 files_done,
768 )
769 }));
770 }
771 handles
772 .into_iter()
773 .map(|h| {
774 h.join()
775 .map_err(|_| anyhow::anyhow!("analysis thread panicked"))
776 })
777 .collect()
778 })
779}
780
781fn merge_chunk_results(
782 chunk_results: Vec<Vec<Result<Option<FileRecord>>>>,
783 analyzed: &mut Vec<FileRecord>,
784 skipped: &mut Vec<FileRecord>,
785 warnings: &mut Vec<String>,
786) -> Result<()> {
787 for chunk in chunk_results {
788 for result in chunk {
789 if let Some(record) = result? {
790 push_record(record, analyzed, skipped, warnings);
791 }
792 }
793 }
794 Ok(())
795}
796
797fn process_submodules(config: &AppConfig, analyzed: &mut [FileRecord]) -> Vec<SubmoduleSummary> {
799 let root = config.discovery.root_paths[0]
800 .canonicalize()
801 .unwrap_or_else(|_| config.discovery.root_paths[0].clone());
802 let submodules = detect_submodules(&root);
803 if submodules.is_empty() {
804 return Vec::new();
805 }
806
807 for file in analyzed.iter_mut() {
808 for (name, sub_path) in &submodules {
809 let prefix = sub_path.to_string_lossy().replace('\\', "/");
810 let rel = &file.relative_path;
811 if rel == &prefix || rel.starts_with(&format!("{prefix}/")) {
812 file.submodule = Some(name.clone());
813 break;
814 }
815 }
816 }
817
818 build_submodule_summaries(analyzed, &submodules)
819}
820
821fn assemble_run(
823 config: &AppConfig,
824 runtime_mode: &str,
825 analyzed: Vec<FileRecord>,
826 skipped: Vec<FileRecord>,
827 warnings: Vec<String>,
828 submodule_summaries: Vec<SubmoduleSummary>,
829) -> AnalysisRun {
830 let summary = build_summary(&analyzed, &skipped);
831 let language_summaries = build_language_summaries(&analyzed);
832
833 let first_root = config
834 .discovery
835 .root_paths
836 .first()
837 .map(|p| p.canonicalize().unwrap_or_else(|_| p.clone()));
838 let git = first_root
839 .as_deref()
840 .map(detect_git_for_run)
841 .unwrap_or_default();
842
843 let now = Utc::now();
844 let run_id = {
845 let uuid_suffix = Uuid::new_v4().simple().to_string();
846 format!("{}-{}", now.format("%Y%m%d-%H%M"), uuid_suffix)
847 };
848
849 AnalysisRun {
850 tool: ToolMetadata {
851 name: "sloc".into(),
852 version: env!("CARGO_PKG_VERSION").into(),
853 run_id,
854 timestamp_utc: now,
855 },
856 environment: EnvironmentMetadata {
857 operating_system: std::env::consts::OS.into(),
858 architecture: std::env::consts::ARCH.into(),
859 runtime_mode: runtime_mode.into(),
860 initiator_username: get_current_username(),
861 initiator_hostname: get_hostname(),
862 ci_name: detect_ci_system().map(str::to_string),
863 },
864 effective_configuration: config.clone(),
865 input_roots: config
866 .discovery
867 .root_paths
868 .iter()
869 .map(|p| path_to_string(p))
870 .collect(),
871 summary_totals: summary,
872 totals_by_language: language_summaries,
873 per_file_records: analyzed,
874 skipped_file_records: skipped,
875 warnings,
876 submodule_summaries,
877 git_commit_short: git.commit_short,
878 git_commit_long: git.commit_long,
879 git_branch: git.branch,
880 git_commit_author: git.author,
881 git_tags: git.tags,
882 git_nearest_tag: git.nearest_tag,
883 git_commit_date: git.commit_date,
884 git_remote_url: git.remote_url,
885 }
886}
887
888#[allow(clippy::too_many_lines)]
893pub fn analyze(
894 config: &AppConfig,
895 runtime_mode: &str,
896 cancel: Option<&AtomicBool>,
897 progress: Option<&ProgressCounters>,
898) -> Result<AnalysisRun> {
899 config.validate()?;
900
901 if config.discovery.root_paths.is_empty() {
902 anyhow::bail!("no input paths were provided");
903 }
904
905 let include_globs = compile_globset(&config.discovery.include_globs)?;
906 let exclude_globs = compile_globset(&config.discovery.exclude_globs)?;
907 let enabled_languages = parse_enabled_languages(&config.analysis.enabled_languages)?;
908
909 let mut analyzed = Vec::new();
910 let mut skipped = Vec::new();
911 let mut warnings = Vec::new();
912 let mut seen_paths = HashSet::new();
913
914 for root in &config.discovery.root_paths {
915 if cancel.is_some_and(|c| c.load(Ordering::Relaxed)) {
916 anyhow::bail!("analysis cancelled");
917 }
918
919 let root = root.canonicalize().unwrap_or_else(|_| root.clone());
920
921 if root.is_file() {
922 if let Some(record) = analyze_candidate_file(
923 &root,
924 root.parent().unwrap_or_else(|| Path::new(".")),
925 config,
926 include_globs.as_ref(),
927 exclude_globs.as_ref(),
928 enabled_languages.as_ref(),
929 )? {
930 push_record(record, &mut analyzed, &mut skipped, &mut warnings);
931 }
932 continue;
933 }
934
935 walk_root(
936 &root,
937 config,
938 include_globs.as_ref(),
939 exclude_globs.as_ref(),
940 enabled_languages.as_ref(),
941 &mut seen_paths,
942 &mut analyzed,
943 &mut skipped,
944 &mut warnings,
945 cancel,
946 progress,
947 )?;
948 }
949
950 analyzed.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
951 skipped.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
952
953 let submodule_summaries = if config.discovery.submodule_breakdown {
955 process_submodules(config, &mut analyzed)
956 } else {
957 Vec::new()
958 };
959
960 attach_coverage(config, &mut analyzed, &mut warnings);
961
962 Ok(assemble_run(
963 config,
964 runtime_mode,
965 analyzed,
966 skipped,
967 warnings,
968 submodule_summaries,
969 ))
970}
971
972fn attach_coverage(config: &AppConfig, analyzed: &mut [FileRecord], warnings: &mut Vec<String>) {
973 let Some(cov_path) = coverage::resolve_coverage_file(config.analysis.coverage_file.as_deref())
974 else {
975 return;
976 };
977 tracing::debug!(path = %cov_path.display(), "loading coverage file");
978 match fs::read_to_string(&cov_path) {
979 Ok(content) => {
980 let cov_map = coverage::parse_coverage_auto(&cov_path, &content);
981 let mut matched: u32 = 0;
982 let mut unmatched: u32 = 0;
983 for record in analyzed.iter_mut() {
984 record.coverage =
985 coverage::lookup_coverage(&cov_map, &record.relative_path).cloned();
986 if record.coverage.is_some() {
987 matched += 1;
988 } else {
989 unmatched += 1;
990 }
991 }
992 tracing::debug!(
993 path = %cov_path.display(),
994 coverage_entries = cov_map.len(),
995 files_matched = matched,
996 files_unmatched = unmatched,
997 "coverage attached"
998 );
999 if unmatched > 0 && matched == 0 {
1000 tracing::warn!(
1001 path = %cov_path.display(),
1002 "coverage file loaded but no source files could be matched — check that paths in the coverage report match the scanned directory"
1003 );
1004 }
1005 }
1006 Err(e) => {
1007 tracing::warn!(path = %cov_path.display(), error = %e, "coverage file could not be read");
1008 warnings.push(format!(
1009 "coverage file '{}' could not be read: {e}",
1010 cov_path.display()
1011 ));
1012 }
1013 }
1014}
1015
1016fn push_record(
1017 record: FileRecord,
1018 analyzed: &mut Vec<FileRecord>,
1019 skipped: &mut Vec<FileRecord>,
1020 warnings: &mut Vec<String>,
1021) {
1022 warnings.extend(
1023 record
1024 .warnings
1025 .iter()
1026 .map(|warning| format!("{}: {warning}", record.relative_path)),
1027 );
1028
1029 match record.status {
1030 FileStatus::AnalyzedExact | FileStatus::AnalyzedBestEffort => analyzed.push(record),
1031 _ => skipped.push(record),
1032 }
1033}
1034
1035#[inline]
1037fn skip_with_reason(
1038 path: &Path,
1039 root: &Path,
1040 size: u64,
1041 reason: impl Into<String>,
1042) -> MetadataPolicyOutcome {
1043 MetadataPolicyOutcome::Skip(Box::new(skipped_record(
1044 path,
1045 root,
1046 size,
1047 FileStatus::SkippedByPolicy,
1048 vec![reason.into()],
1049 )))
1050}
1051
1052#[allow(clippy::too_many_arguments)]
1056fn check_metadata_policy(
1057 path: &Path,
1058 root: &Path,
1059 relative_path: &str,
1060 metadata: &fs::Metadata,
1061 config: &AppConfig,
1062 include_globs: Option<&GlobSet>,
1063 exclude_globs: Option<&GlobSet>,
1064) -> MetadataPolicyOutcome {
1065 let size = metadata.len();
1066
1067 if metadata.file_type().is_symlink() && !config.discovery.follow_symlinks {
1068 return skip_with_reason(path, root, size, "symlink skipped by policy");
1069 }
1070 if file_name_eq(path, ".gitignore") {
1071 return skip_with_reason(path, root, size, ".gitignore is always excluded");
1072 }
1073 if is_excluded_dir_path(path, &config.discovery.excluded_directories) {
1074 return skip_with_reason(path, root, size, "path matched excluded directory setting");
1075 }
1076 if size > config.discovery.max_file_size_bytes {
1077 return skip_with_reason(
1078 path,
1079 root,
1080 size,
1081 format!(
1082 "file exceeded max_file_size_bytes ({})",
1083 config.discovery.max_file_size_bytes
1084 ),
1085 );
1086 }
1087 if let Some(globs) = include_globs {
1088 if !globs.is_match(Path::new(relative_path)) && !globs.is_match(path) {
1089 return MetadataPolicyOutcome::Exclude;
1090 }
1091 }
1092 if let Some(globs) = exclude_globs {
1093 if globs.is_match(Path::new(relative_path)) || globs.is_match(path) {
1094 return skip_with_reason(path, root, size, "path matched exclude glob");
1095 }
1096 }
1097 if is_known_lockfile(path) && !config.analysis.include_lockfiles {
1098 return skip_with_reason(path, root, size, "lockfile skipped by default policy");
1099 }
1100
1101 MetadataPolicyOutcome::Continue
1102}
1103
1104struct ContentPolicyResult {
1105 vendor: bool,
1106 generated: bool,
1107 minified: bool,
1108 skip_record: Option<FileRecord>,
1109}
1110
1111fn check_content_policy(
1114 path: &Path,
1115 root: &Path,
1116 size_bytes: u64,
1117 bytes: &[u8],
1118 config: &AppConfig,
1119) -> ContentPolicyResult {
1120 let vendor = is_vendor_path(path);
1121 if vendor && config.analysis.vendor_directory_detection {
1122 return ContentPolicyResult {
1123 vendor,
1124 generated: false,
1125 minified: false,
1126 skip_record: Some(skipped_record(
1127 path,
1128 root,
1129 size_bytes,
1130 FileStatus::SkippedByPolicy,
1131 vec!["vendor file skipped by policy".into()],
1132 )),
1133 };
1134 }
1135
1136 let generated = config.analysis.generated_file_detection && looks_generated(path, bytes);
1137 if generated {
1138 return ContentPolicyResult {
1139 vendor,
1140 generated,
1141 minified: false,
1142 skip_record: Some(skipped_record(
1143 path,
1144 root,
1145 size_bytes,
1146 FileStatus::SkippedByPolicy,
1147 vec!["generated file skipped by policy".into()],
1148 )),
1149 };
1150 }
1151
1152 let minified = config.analysis.minified_file_detection && looks_minified(path, bytes);
1153 if minified {
1154 return ContentPolicyResult {
1155 vendor,
1156 generated,
1157 minified,
1158 skip_record: Some(skipped_record(
1159 path,
1160 root,
1161 size_bytes,
1162 FileStatus::SkippedByPolicy,
1163 vec!["minified file skipped by policy".into()],
1164 )),
1165 };
1166 }
1167
1168 ContentPolicyResult {
1169 vendor,
1170 generated,
1171 minified,
1172 skip_record: None,
1173 }
1174}
1175
1176fn decode_file_contents(
1178 path: &Path,
1179 root: &Path,
1180 size_bytes: u64,
1181 bytes: &[u8],
1182 config: &AppConfig,
1183) -> Result<Option<(String, String, Vec<String>)>> {
1184 if is_binary(bytes) {
1185 return match config.analysis.binary_file_behavior {
1186 BinaryFileBehavior::Skip => Ok(None),
1187 BinaryFileBehavior::Fail => {
1188 anyhow::bail!("binary file encountered: {}", path.display())
1189 }
1190 };
1191 }
1192
1193 match decode_bytes(bytes) {
1194 Ok(result) => Ok(Some(result)),
1195 Err(err) => match config.analysis.decode_failure_behavior {
1196 FailureBehavior::WarnSkip => {
1197 let _ = (path, root, size_bytes); Err(anyhow::anyhow!("__decode_warn__: {err}"))
1202 }
1203 FailureBehavior::Fail => {
1204 anyhow::bail!("decode failure for {}: {err}", path.display())
1205 }
1206 },
1207 }
1208}
1209
1210#[allow(clippy::too_many_lines)]
1211fn analyze_candidate_file(
1212 path: &Path,
1213 root: &Path,
1214 config: &AppConfig,
1215 include_globs: Option<&GlobSet>,
1216 exclude_globs: Option<&GlobSet>,
1217 enabled_languages: Option<&BTreeSet<Language>>,
1218) -> Result<Option<FileRecord>> {
1219 let metadata = match fs::symlink_metadata(path) {
1220 Ok(metadata) => metadata,
1221 Err(err) => {
1222 return Ok(Some(skipped_record(
1223 path,
1224 root,
1225 0,
1226 FileStatus::ErrorInternal,
1227 vec![format!("failed to read metadata: {err}")],
1228 )));
1229 }
1230 };
1231
1232 let relative_path = relative_path_string(path, root);
1233
1234 match check_metadata_policy(
1236 path,
1237 root,
1238 &relative_path,
1239 &metadata,
1240 config,
1241 include_globs,
1242 exclude_globs,
1243 ) {
1244 MetadataPolicyOutcome::Skip(record) => return Ok(Some(*record)),
1245 MetadataPolicyOutcome::Exclude => return Ok(None),
1246 MetadataPolicyOutcome::Continue => {}
1247 }
1248
1249 let bytes = match fs::read(path) {
1250 Ok(bytes) => bytes,
1251 Err(err) => {
1252 return Ok(Some(skipped_record(
1253 path,
1254 root,
1255 metadata.len(),
1256 FileStatus::ErrorInternal,
1257 vec![format!("failed to read file: {err}")],
1258 )));
1259 }
1260 };
1261
1262 let content_policy = check_content_policy(path, root, metadata.len(), &bytes, config);
1264 if let Some(record) = content_policy.skip_record {
1265 return Ok(Some(record));
1266 }
1267 let (vendor, generated, minified) = (
1268 content_policy.vendor,
1269 content_policy.generated,
1270 content_policy.minified,
1271 );
1272
1273 let (text, encoding, decode_warnings) =
1275 match decode_file_contents(path, root, metadata.len(), &bytes, config) {
1276 Ok(Some(result)) => result,
1277 Ok(None) => {
1278 return Ok(Some(skipped_record(
1279 path,
1280 root,
1281 metadata.len(),
1282 FileStatus::SkippedBinary,
1283 vec!["binary file skipped by default".into()],
1284 )));
1285 }
1286 Err(err) => {
1287 let msg = err.to_string();
1288 if let Some(warn_msg) = msg.strip_prefix("__decode_warn__: ") {
1289 return Ok(Some(skipped_record(
1290 path,
1291 root,
1292 metadata.len(),
1293 FileStatus::SkippedDecodeError,
1294 vec![warn_msg.to_string()],
1295 )));
1296 }
1297 return Err(err);
1298 }
1299 };
1300
1301 let first_line = text.lines().next();
1302 let language = detect_language(
1303 path,
1304 first_line,
1305 &config.analysis.extension_overrides,
1306 config.analysis.shebang_detection,
1307 );
1308
1309 let Some(language) = language else {
1310 return Ok(Some(skipped_record(
1311 path,
1312 root,
1313 metadata.len(),
1314 FileStatus::SkippedUnsupported,
1315 vec!["unsupported or undetected language".into()],
1316 )));
1317 };
1318
1319 if let Some(enabled) = enabled_languages {
1320 if !enabled.contains(&language) {
1321 return Ok(Some(skipped_record(
1322 path,
1323 root,
1324 metadata.len(),
1325 FileStatus::SkippedByPolicy,
1326 vec![format!(
1327 "language {} disabled by configuration",
1328 language.display_name()
1329 )],
1330 )));
1331 }
1332 }
1333
1334 let ieee_opts = AnalysisOptions {
1335 blank_in_block_comment_as_comment: config.analysis.blank_in_block_comment_policy
1336 == BlankInBlockCommentPolicy::CountAsComment,
1337 collapse_continuation_lines: config.analysis.continuation_line_policy
1338 == ContinuationLinePolicy::CollapseToLogical,
1339 };
1340 let analysis = analyze_text(language, &text, ieee_opts);
1341 let effective_counts = compute_effective_counts(
1342 &analysis.raw,
1343 config.analysis.mixed_line_policy,
1344 config.analysis.python_docstrings_as_comments,
1345 config.analysis.count_compiler_directives,
1346 );
1347
1348 let mut warnings = decode_warnings;
1349 warnings.extend(analysis.warnings.clone());
1350
1351 Ok(Some(FileRecord {
1352 path: path_to_string(path),
1353 relative_path,
1354 language: Some(language),
1355 size_bytes: metadata.len(),
1356 detected_encoding: Some(encoding),
1357 raw_line_categories: analysis.raw,
1358 effective_counts,
1359 status: match analysis.parse_mode {
1360 ParseMode::Lexical | ParseMode::TreeSitter => FileStatus::AnalyzedExact,
1361 ParseMode::LexicalBestEffort => FileStatus::AnalyzedBestEffort,
1362 },
1363 warnings,
1364 generated,
1365 minified,
1366 vendor,
1367 parse_mode: Some(analysis.parse_mode),
1368 submodule: None,
1369 coverage: None,
1370 }))
1371}
1372
1373const fn compute_effective_counts(
1374 raw: &RawLineCounts,
1375 mixed_line_policy: MixedLinePolicy,
1376 python_docstrings_as_comments: bool,
1377 count_compiler_directives: bool,
1378) -> EffectiveCounts {
1379 let mut effective = EffectiveCounts {
1380 code_lines: raw.code_only_lines,
1381 comment_lines: raw.single_comment_only_lines + raw.multi_comment_only_lines,
1382 blank_lines: raw.blank_only_lines,
1383 mixed_lines_separate: 0,
1384 };
1385
1386 if python_docstrings_as_comments {
1387 effective.comment_lines += raw.docstring_comment_lines;
1388 } else {
1389 effective.code_lines += raw.docstring_comment_lines;
1390 }
1391
1392 let mixed_total = raw.mixed_code_single_comment_lines + raw.mixed_code_multi_comment_lines;
1393 match mixed_line_policy {
1394 MixedLinePolicy::CodeOnly => effective.code_lines += mixed_total,
1395 MixedLinePolicy::CodeAndComment => {
1396 effective.code_lines += mixed_total;
1397 effective.comment_lines += mixed_total;
1398 }
1399 MixedLinePolicy::CommentOnly => effective.comment_lines += mixed_total,
1400 MixedLinePolicy::SeparateMixedCategory => effective.mixed_lines_separate += mixed_total,
1401 }
1402
1403 if !count_compiler_directives {
1406 effective.code_lines = effective
1407 .code_lines
1408 .saturating_sub(raw.compiler_directive_lines);
1409 }
1410
1411 effective
1412}
1413
1414fn build_summary(analyzed: &[FileRecord], skipped: &[FileRecord]) -> SummaryTotals {
1415 let mut summary = SummaryTotals {
1416 files_considered: (analyzed.len() + skipped.len()) as u64,
1417 files_analyzed: analyzed.len() as u64,
1418 files_skipped: skipped.len() as u64,
1419 ..Default::default()
1420 };
1421
1422 for record in analyzed {
1423 summary.total_physical_lines += record.raw_line_categories.total_physical_lines;
1424 summary.code_lines += record.effective_counts.code_lines;
1425 summary.comment_lines += record.effective_counts.comment_lines;
1426 summary.blank_lines += record.effective_counts.blank_lines;
1427 summary.mixed_lines_separate += record.effective_counts.mixed_lines_separate;
1428 summary.functions += record.raw_line_categories.functions;
1429 summary.classes += record.raw_line_categories.classes;
1430 summary.variables += record.raw_line_categories.variables;
1431 summary.imports += record.raw_line_categories.imports;
1432 summary.test_count += record.raw_line_categories.test_count;
1433 summary.test_assertion_count += record.raw_line_categories.test_assertion_count;
1434 summary.test_suite_count += record.raw_line_categories.test_suite_count;
1435 if let Some(cov) = &record.coverage {
1436 summary.coverage_lines_found += u64::from(cov.lines_found);
1437 summary.coverage_lines_hit += u64::from(cov.lines_hit);
1438 summary.coverage_functions_found += u64::from(cov.functions_found);
1439 summary.coverage_functions_hit += u64::from(cov.functions_hit);
1440 summary.coverage_branches_found += u64::from(cov.branches_found);
1441 summary.coverage_branches_hit += u64::from(cov.branches_hit);
1442 }
1443 }
1444
1445 summary
1446}
1447
1448const fn zeroed_summary(language: Language) -> LanguageSummary {
1450 LanguageSummary {
1451 language,
1452 files: 0,
1453 total_physical_lines: 0,
1454 code_lines: 0,
1455 comment_lines: 0,
1456 blank_lines: 0,
1457 mixed_lines_separate: 0,
1458 functions: 0,
1459 classes: 0,
1460 variables: 0,
1461 imports: 0,
1462 test_count: 0,
1463 test_assertion_count: 0,
1464 test_suite_count: 0,
1465 coverage_lines_found: 0,
1466 coverage_lines_hit: 0,
1467 coverage_functions_found: 0,
1468 coverage_functions_hit: 0,
1469 coverage_branches_found: 0,
1470 coverage_branches_hit: 0,
1471 }
1472}
1473
1474fn accumulate_record_into_summary(entry: &mut LanguageSummary, record: &FileRecord) {
1476 entry.files += 1;
1477 let r = &record.raw_line_categories;
1478 entry.total_physical_lines += r.total_physical_lines;
1479 entry.code_lines += record.effective_counts.code_lines;
1480 entry.comment_lines += record.effective_counts.comment_lines;
1481 entry.blank_lines += record.effective_counts.blank_lines;
1482 entry.mixed_lines_separate += record.effective_counts.mixed_lines_separate;
1483 entry.functions += r.functions;
1484 entry.classes += r.classes;
1485 entry.variables += r.variables;
1486 entry.imports += r.imports;
1487 entry.test_count += r.test_count;
1488 entry.test_assertion_count += r.test_assertion_count;
1489 entry.test_suite_count += r.test_suite_count;
1490 if let Some(cov) = &record.coverage {
1491 entry.coverage_lines_found += u64::from(cov.lines_found);
1492 entry.coverage_lines_hit += u64::from(cov.lines_hit);
1493 entry.coverage_functions_found += u64::from(cov.functions_found);
1494 entry.coverage_functions_hit += u64::from(cov.functions_hit);
1495 entry.coverage_branches_found += u64::from(cov.branches_found);
1496 entry.coverage_branches_hit += u64::from(cov.branches_hit);
1497 }
1498}
1499
1500fn build_language_summaries(analyzed: &[FileRecord]) -> Vec<LanguageSummary> {
1501 let mut by_language: BTreeMap<Language, LanguageSummary> = BTreeMap::new();
1502 for record in analyzed {
1503 let Some(language) = record.language else {
1504 continue;
1505 };
1506 let entry = by_language
1507 .entry(language)
1508 .or_insert_with(|| zeroed_summary(language));
1509 accumulate_record_into_summary(entry, record);
1510 }
1511 by_language.into_values().collect()
1512}
1513
1514fn skipped_record(
1515 path: &Path,
1516 root: &Path,
1517 size_bytes: u64,
1518 status: FileStatus,
1519 warnings: Vec<String>,
1520) -> FileRecord {
1521 FileRecord {
1522 path: path_to_string(path),
1523 relative_path: relative_path_string(path, root),
1524 language: None,
1525 size_bytes,
1526 detected_encoding: None,
1527 raw_line_categories: RawLineCounts::default(),
1528 effective_counts: EffectiveCounts::default(),
1529 status,
1530 warnings,
1531 generated: false,
1532 minified: false,
1533 vendor: false,
1534 parse_mode: None,
1535 submodule: None,
1536 coverage: None,
1537 }
1538}
1539
1540fn relative_path_string(path: &Path, root: &Path) -> String {
1541 path.strip_prefix(root)
1542 .unwrap_or(path)
1543 .to_string_lossy()
1544 .replace('\\', "/")
1545}
1546
1547fn path_to_string(path: &Path) -> String {
1548 path.to_string_lossy().replace('\\', "/")
1549}
1550
1551#[must_use]
1553pub fn detect_submodules(root: &Path) -> Vec<(String, PathBuf)> {
1554 let gitmodules = root.join(".gitmodules");
1555 if !gitmodules.is_file() {
1556 return Vec::new();
1557 }
1558 let Ok(content) = fs::read_to_string(&gitmodules) else {
1559 return Vec::new();
1560 };
1561
1562 let mut result = Vec::new();
1563 let mut current_name: Option<String> = None;
1564 let mut current_path: Option<PathBuf> = None;
1565
1566 for line in content.lines() {
1567 let trimmed = line.trim();
1568 if trimmed.starts_with("[submodule \"") && trimmed.ends_with("\"]") {
1569 if let (Some(name), Some(path)) = (current_name.take(), current_path.take()) {
1570 result.push((name, path));
1571 }
1572 let name = trimmed["[submodule \"".len()..trimmed.len() - 2].to_string();
1573 current_name = Some(name);
1574 } else if let Some(rest) = trimmed.strip_prefix("path") {
1575 if let Some(eq_pos) = rest.find('=') {
1576 let path_str = rest[eq_pos + 1..].trim();
1577 current_path = Some(PathBuf::from(path_str));
1578 }
1579 }
1580 }
1581 if let (Some(name), Some(path)) = (current_name, current_path) {
1582 result.push((name, path));
1583 }
1584
1585 result
1586}
1587
1588fn build_submodule_summaries(
1589 analyzed: &[FileRecord],
1590 submodules: &[(String, PathBuf)],
1591) -> Vec<SubmoduleSummary> {
1592 submodules
1593 .iter()
1594 .map(|(name, path)| {
1595 let files: Vec<&FileRecord> = analyzed
1596 .iter()
1597 .filter(|f| f.submodule.as_deref() == Some(name.as_str()))
1598 .collect();
1599
1600 let files_analyzed = files.len() as u64;
1601 let total_physical_lines = files
1602 .iter()
1603 .map(|f| f.raw_line_categories.total_physical_lines)
1604 .sum();
1605 let code_lines = files.iter().map(|f| f.effective_counts.code_lines).sum();
1606 let comment_lines = files.iter().map(|f| f.effective_counts.comment_lines).sum();
1607 let blank_lines = files.iter().map(|f| f.effective_counts.blank_lines).sum();
1608 let language_summaries = build_language_summaries_from_slice(&files);
1609
1610 SubmoduleSummary {
1611 name: name.clone(),
1612 relative_path: path.to_string_lossy().replace('\\', "/"),
1613 files_analyzed,
1614 total_physical_lines,
1615 code_lines,
1616 comment_lines,
1617 blank_lines,
1618 language_summaries,
1619 }
1620 })
1621 .filter(|s| s.files_analyzed > 0)
1622 .collect()
1623}
1624
1625fn build_language_summaries_from_slice(files: &[&FileRecord]) -> Vec<LanguageSummary> {
1626 let mut map: BTreeMap<String, LanguageSummary> = BTreeMap::new();
1627 for file in files {
1628 let Some(lang) = file.language else { continue };
1629 let entry = map
1630 .entry(lang.display_name().to_string())
1631 .or_insert_with(|| zeroed_summary(lang));
1632 accumulate_record_into_summary(entry, file);
1633 }
1634 map.into_values().collect()
1635}
1636
1637fn file_name_eq(path: &Path, expected: &str) -> bool {
1638 path.file_name()
1639 .and_then(|name| name.to_str())
1640 .is_some_and(|name| name == expected)
1641}
1642
1643fn is_excluded_dir_path(path: &Path, excluded_dirs: &[String]) -> bool {
1644 path.components().any(|component| {
1645 component
1646 .as_os_str()
1647 .to_str()
1648 .is_some_and(|part| excluded_dirs.iter().any(|excluded| excluded == part))
1649 })
1650}
1651
1652fn is_vendor_path(path: &Path) -> bool {
1653 path.components().any(|component| {
1654 component
1655 .as_os_str()
1656 .to_str()
1657 .is_some_and(|part| matches!(part, "vendor" | "node_modules" | "packages"))
1658 })
1659}
1660
1661fn is_known_lockfile(path: &Path) -> bool {
1662 path.file_name()
1663 .and_then(|name| name.to_str())
1664 .is_some_and(|name| {
1665 matches!(
1666 name,
1667 "Cargo.lock"
1668 | "package-lock.json"
1669 | "yarn.lock"
1670 | "pnpm-lock.yaml"
1671 | "Pipfile.lock"
1672 | "poetry.lock"
1673 | "composer.lock"
1674 )
1675 })
1676}
1677
1678fn looks_generated(path: &Path, bytes: &[u8]) -> bool {
1679 let file_name = path
1680 .file_name()
1681 .and_then(|name| name.to_str())
1682 .unwrap_or_default();
1683 if file_name.contains(".generated.") || file_name.contains(".g.") {
1684 return true;
1685 }
1686
1687 let sample = String::from_utf8_lossy(&bytes[..bytes.len().min(GENERATED_SAMPLE_BYTES)])
1688 .to_ascii_lowercase();
1689 sample.contains("@generated") || sample.contains("generated by")
1690}
1691
1692fn looks_minified(path: &Path, bytes: &[u8]) -> bool {
1693 let file_name = path
1694 .file_name()
1695 .and_then(|name| name.to_str())
1696 .unwrap_or_default();
1697 if file_name.contains(".min.") {
1698 return true;
1699 }
1700
1701 let sample = String::from_utf8_lossy(&bytes[..bytes.len().min(MINIFIED_SAMPLE_BYTES)]);
1702 let longest_line = sample.lines().map(str::len).max().unwrap_or(0);
1703 let whitespace = sample.chars().filter(|c| c.is_whitespace()).count();
1704 longest_line > MINIFIED_LINE_THRESHOLD && whitespace * 100 < sample.len().max(1)
1705}
1706
1707fn is_binary(bytes: &[u8]) -> bool {
1708 if bytes.starts_with(&[0xEF, 0xBB, 0xBF])
1709 || bytes.starts_with(&[0xFF, 0xFE])
1710 || bytes.starts_with(&[0xFE, 0xFF])
1711 {
1712 return false;
1713 }
1714
1715 let sample = &bytes[..bytes.len().min(BINARY_SAMPLE_BYTES)];
1716 sample.contains(&0)
1717}
1718
1719fn decode_utf16_bom(
1722 bom_stripped: &[u8],
1723 encoding: &'static encoding_rs::Encoding,
1724 label: &str,
1725) -> (String, String, Vec<String>) {
1726 let (cow, _, had_errors) = encoding.decode(bom_stripped);
1727 let mut warnings = Vec::new();
1728 if had_errors {
1729 warnings.push(format!("{label} decode contained replacement characters"));
1730 }
1731 (cow.into_owned(), label.into(), warnings)
1732}
1733
1734fn decode_bytes(bytes: &[u8]) -> std::result::Result<(String, String, Vec<String>), String> {
1735 if bytes.starts_with(&[0xEF, 0xBB, 0xBF]) {
1736 let text = String::from_utf8(bytes[3..].to_vec()).map_err(|err| err.to_string())?;
1737 return Ok((text, "utf-8-bom".into(), vec![]));
1738 }
1739 if bytes.starts_with(&[0xFF, 0xFE]) {
1740 return Ok(decode_utf16_bom(&bytes[2..], UTF_16LE, "utf-16le"));
1741 }
1742 if bytes.starts_with(&[0xFE, 0xFF]) {
1743 return Ok(decode_utf16_bom(&bytes[2..], UTF_16BE, "utf-16be"));
1744 }
1745
1746 #[allow(clippy::option_if_let_else)]
1748 if let Ok(text) = String::from_utf8(bytes.to_vec()) {
1749 Ok((text, "utf-8".into(), vec![]))
1750 } else {
1751 let (cow, _, had_errors) = WINDOWS_1252.decode(bytes);
1752 let mut warnings = vec!["decoded using windows-1252 fallback".into()];
1753 if had_errors {
1754 warnings.push("fallback decode contained replacement characters".into());
1755 }
1756 Ok((cow.into_owned(), "windows-1252".into(), warnings))
1757 }
1758}
1759
1760fn compile_globset(patterns: &[String]) -> Result<Option<GlobSet>> {
1761 if patterns.is_empty() {
1762 return Ok(None);
1763 }
1764
1765 let mut builder = GlobSetBuilder::new();
1766 for pattern in patterns {
1767 builder
1768 .add(Glob::new(pattern).with_context(|| format!("invalid glob pattern: {pattern}"))?);
1769 }
1770 Ok(Some(
1771 builder.build().context("failed to compile glob filters")?,
1772 ))
1773}
1774
1775fn parse_enabled_languages(enabled: &[String]) -> Result<Option<BTreeSet<Language>>> {
1776 if enabled.is_empty() {
1777 return Ok(None);
1778 }
1779
1780 let supported = supported_languages();
1781 let mut set = BTreeSet::new();
1782 for name in enabled {
1783 let language = Language::from_name(name)
1784 .with_context(|| format!("unsupported language in config: {name}"))?;
1785 if !supported.contains(&language) {
1786 anyhow::bail!("language {name} is not supported in this build");
1787 }
1788 set.insert(language);
1789 }
1790 Ok(Some(set))
1791}
1792
1793pub fn write_json(run: &AnalysisRun, output_path: &Path) -> Result<()> {
1797 let json = serde_json::to_string_pretty(run).context("failed to serialize analysis run")?;
1798 fs::write(output_path, json)
1799 .with_context(|| format!("failed to write JSON output to {}", output_path.display()))
1800}
1801
1802pub fn read_json(path: &Path) -> Result<AnalysisRun> {
1806 let contents = fs::read_to_string(path)
1807 .with_context(|| format!("failed to read result file {}", path.display()))?;
1808 serde_json::from_str(&contents)
1809 .with_context(|| format!("failed to parse JSON result {}", path.display()))
1810}
1811
1812#[cfg(test)]
1813mod tests {
1814 use super::*;
1815
1816 #[test]
1817 fn effective_counts_respect_code_only_policy() {
1818 let raw = RawLineCounts {
1819 code_only_lines: 2,
1820 single_comment_only_lines: 1,
1821 mixed_code_single_comment_lines: 3,
1822 docstring_comment_lines: 2,
1823 ..RawLineCounts::default()
1824 };
1825 let counts = compute_effective_counts(&raw, MixedLinePolicy::CodeOnly, true, true);
1826 assert_eq!(counts.code_lines, 5);
1827 assert_eq!(counts.comment_lines, 3);
1828 }
1829
1830 #[test]
1831 fn effective_counts_can_separate_mixed() {
1832 let raw = RawLineCounts {
1833 mixed_code_single_comment_lines: 2,
1834 mixed_code_multi_comment_lines: 1,
1835 ..RawLineCounts::default()
1836 };
1837 let counts =
1838 compute_effective_counts(&raw, MixedLinePolicy::SeparateMixedCategory, true, true);
1839 assert_eq!(counts.mixed_lines_separate, 3);
1840 assert_eq!(counts.code_lines, 0);
1841 assert_eq!(counts.comment_lines, 0);
1842 }
1843
1844 #[test]
1845 fn windows_1252_fallback_decodes() {
1846 let bytes = vec![0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x96, 0x57];
1847 let (text, encoding, warnings) = decode_bytes(&bytes).unwrap();
1848 assert_eq!(encoding, "windows-1252");
1849 assert!(text.contains('–'));
1850 assert!(!warnings.is_empty());
1851 }
1852}