1#![allow(clippy::multiple_crate_versions)]
4
5pub mod baseline;
6pub mod coverage;
7pub mod delta;
8pub mod history;
9pub use baseline::{check_against_baseline, resolve_baselines_path, BaselineEntry, BaselineStore};
10pub use coverage::{aggregate_line_coverage, lookup_coverage, parse_lcov, FileCoverage};
11pub use delta::{
12 compute_delta, compute_multi_delta, FileChangeStatus, FileDelta, MultiFileDelta,
13 MultiScanComparison, MultiScanPoint, ScanComparison, SummaryDelta,
14};
15pub use history::{
16 CleanupPolicy, CleanupPolicyStore, RegistryEntry, ScanRegistry, ScanSummarySnapshot,
17 WatchedDirsStore,
18};
19
20use std::collections::{BTreeMap, BTreeSet, HashSet};
21use std::fs;
22use std::path::{Path, PathBuf};
23use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
24use std::sync::Arc;
25
26use anyhow::{Context, Result};
27use chrono::{DateTime, Utc};
28use encoding_rs::{UTF_16BE, UTF_16LE, WINDOWS_1252};
29use globset::{Glob, GlobSet, GlobSetBuilder};
30use ignore::WalkBuilder;
31use serde::{Deserialize, Serialize};
32use uuid::Uuid;
33
34use sloc_config::{
35 AppConfig, BinaryFileBehavior, BlankInBlockCommentPolicy, ContinuationLinePolicy,
36 FailureBehavior, MixedLinePolicy,
37};
38use sloc_languages::style::IndentStyle;
39use sloc_languages::{
40 analyze_text, detect_language, supported_languages, AnalysisOptions, Language, ParseMode,
41 RawLineCounts, StyleAnalysis, StyleLangScope,
42};
43
44const MAX_ANALYSIS_THREADS: usize = 16;
48const DEFAULT_ANALYSIS_THREADS: usize = 4;
50const GENERATED_SAMPLE_BYTES: usize = 1024;
52const MINIFIED_SAMPLE_BYTES: usize = 4096;
54const MINIFIED_LINE_THRESHOLD: usize = 2000;
56const BINARY_SAMPLE_BYTES: usize = 8192;
58
59pub struct ProgressCounters {
61 pub files_done: Arc<AtomicUsize>,
63 pub files_total: Arc<AtomicUsize>,
65}
66
67enum MetadataPolicyOutcome {
69 Skip(Box<FileRecord>),
71 Exclude,
73 Continue,
75}
76
77#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
78#[serde(rename_all = "snake_case")]
79pub enum FileStatus {
80 AnalyzedExact,
81 AnalyzedBestEffort,
82 SkippedBinary,
83 SkippedDecodeError,
84 SkippedUnsupported,
85 SkippedByPolicy,
86 ErrorInternal,
87}
88
89#[derive(Debug, Clone, Copy, Serialize, Deserialize, Default, PartialEq, Eq)]
91#[serde(rename_all = "snake_case")]
92pub enum CocomoMode {
93 #[default]
95 Organic,
96 SemiDetached,
98 Embedded,
100}
101
102#[derive(Debug, Clone, Serialize, Deserialize)]
104pub struct CocomoEstimate {
105 pub mode: CocomoMode,
106 pub ksloc: f64,
108 pub effort_person_months: f64,
110 pub duration_months: f64,
112 pub avg_staff: f64,
114}
115
116#[derive(Debug, Clone, Serialize, Deserialize, Default)]
117pub struct EffectiveCounts {
118 pub code_lines: u64,
119 pub comment_lines: u64,
120 pub blank_lines: u64,
121 pub mixed_lines_separate: u64,
122}
123
124#[derive(Debug, Clone, Serialize, Deserialize)]
125pub struct ToolMetadata {
126 pub name: String,
127 pub version: String,
128 pub run_id: String,
129 pub timestamp_utc: DateTime<Utc>,
130}
131
132#[derive(Debug, Clone, Serialize, Deserialize)]
133pub struct EnvironmentMetadata {
134 pub operating_system: String,
135 pub architecture: String,
136 pub runtime_mode: String,
137 pub initiator_username: String,
138 pub initiator_hostname: String,
139 #[serde(default, skip_serializing_if = "Option::is_none")]
142 pub ci_name: Option<String>,
143}
144
145#[derive(Debug, Clone, Serialize, Deserialize, Default)]
146pub struct SummaryTotals {
147 pub files_considered: u64,
148 pub files_analyzed: u64,
149 pub files_skipped: u64,
150 pub total_physical_lines: u64,
151 pub code_lines: u64,
152 pub comment_lines: u64,
153 pub blank_lines: u64,
154 pub mixed_lines_separate: u64,
155 #[serde(default)]
156 pub functions: u64,
157 #[serde(default)]
158 pub classes: u64,
159 #[serde(default)]
160 pub variables: u64,
161 #[serde(default)]
162 pub imports: u64,
163 #[serde(default)]
164 pub test_count: u64,
165 #[serde(default)]
167 pub test_assertion_count: u64,
168 #[serde(default)]
170 pub test_suite_count: u64,
171 #[serde(default)]
173 pub coverage_lines_found: u64,
174 #[serde(default)]
175 pub coverage_lines_hit: u64,
176 #[serde(default)]
177 pub coverage_functions_found: u64,
178 #[serde(default)]
179 pub coverage_functions_hit: u64,
180 #[serde(default)]
181 pub coverage_branches_found: u64,
182 #[serde(default)]
183 pub coverage_branches_hit: u64,
184 #[serde(default)]
186 pub cyclomatic_complexity: u64,
187 #[serde(default, skip_serializing_if = "Option::is_none")]
189 pub lsloc: Option<u64>,
190}
191
192#[derive(Debug, Clone, Serialize, Deserialize)]
193pub struct LanguageSummary {
194 pub language: Language,
195 pub files: u64,
196 pub total_physical_lines: u64,
197 pub code_lines: u64,
198 pub comment_lines: u64,
199 pub blank_lines: u64,
200 pub mixed_lines_separate: u64,
201 #[serde(default)]
202 pub functions: u64,
203 #[serde(default)]
204 pub classes: u64,
205 #[serde(default)]
206 pub variables: u64,
207 #[serde(default)]
208 pub imports: u64,
209 #[serde(default)]
210 pub test_count: u64,
211 #[serde(default)]
212 pub test_assertion_count: u64,
213 #[serde(default)]
214 pub test_suite_count: u64,
215 #[serde(default)]
216 pub coverage_lines_found: u64,
217 #[serde(default)]
218 pub coverage_lines_hit: u64,
219 #[serde(default)]
220 pub coverage_functions_found: u64,
221 #[serde(default)]
222 pub coverage_functions_hit: u64,
223 #[serde(default)]
224 pub coverage_branches_found: u64,
225 #[serde(default)]
226 pub coverage_branches_hit: u64,
227 #[serde(default)]
228 pub cyclomatic_complexity: u64,
229 #[serde(default, skip_serializing_if = "Option::is_none")]
230 pub lsloc: Option<u64>,
231}
232
233#[derive(Debug, Clone, Serialize, Deserialize)]
234pub struct FileRecord {
235 pub path: String,
236 pub relative_path: String,
237 pub language: Option<Language>,
238 pub size_bytes: u64,
239 pub detected_encoding: Option<String>,
240 pub raw_line_categories: RawLineCounts,
241 pub effective_counts: EffectiveCounts,
242 pub status: FileStatus,
243 pub warnings: Vec<String>,
244 pub generated: bool,
245 pub minified: bool,
246 pub vendor: bool,
247 pub parse_mode: Option<ParseMode>,
248 #[serde(skip_serializing_if = "Option::is_none")]
249 pub submodule: Option<String>,
250 #[serde(default, skip_serializing_if = "Option::is_none")]
252 pub coverage: Option<FileCoverage>,
253 #[serde(default, skip_serializing_if = "Option::is_none")]
255 pub style_analysis: Option<StyleAnalysis>,
256 #[serde(default, skip_serializing_if = "Option::is_none")]
258 pub cyclomatic_complexity: Option<u32>,
259 #[serde(default, skip_serializing_if = "Option::is_none")]
261 pub lsloc: Option<u32>,
262 #[serde(skip)]
265 pub content_hash: u64,
266}
267
268#[derive(Debug, Clone, Serialize, Deserialize)]
270pub struct LanguageStyleGroup {
271 pub language_family: String,
273 pub files_count: u32,
275 pub dominant_guide: String,
277 pub dominant_score_pct: u8,
279 pub common_indent_style: String,
281 pub guide_avg_scores: Vec<(String, u8)>,
283 pub line80_compliant_pct: u8,
285 pub line_col_compliant_pct: u8,
287}
288
289#[derive(Debug, Clone, Serialize, Deserialize)]
291pub struct StyleSummary {
292 pub files_analyzed: u32,
294 pub common_indent_style: String,
296 pub line80_compliant_pct: u8,
298 pub line_col_compliant_pct: u8,
300 pub col_threshold: u16,
302 pub by_language: Vec<LanguageStyleGroup>,
304}
305
306pub type CppStyleSummary = StyleSummary;
309
310#[derive(Debug, Clone, Serialize, Deserialize)]
312pub struct SubmoduleSummary {
313 pub name: String,
314 pub relative_path: String,
315 pub files_analyzed: u64,
316 pub total_physical_lines: u64,
317 pub code_lines: u64,
318 pub comment_lines: u64,
319 pub blank_lines: u64,
320 pub language_summaries: Vec<LanguageSummary>,
321 #[serde(default, skip_serializing_if = "Option::is_none")]
323 pub git_commit_short: Option<String>,
324 #[serde(default, skip_serializing_if = "Option::is_none")]
326 pub git_commit_long: Option<String>,
327 #[serde(default, skip_serializing_if = "Option::is_none")]
329 pub git_branch: Option<String>,
330 #[serde(default, skip_serializing_if = "Option::is_none")]
332 pub git_commit_author: Option<String>,
333 #[serde(default, skip_serializing_if = "Option::is_none")]
335 pub git_commit_date: Option<String>,
336 #[serde(default, skip_serializing_if = "Option::is_none")]
338 pub git_remote_url: Option<String>,
339}
340
341#[derive(Debug, Clone, Serialize, Deserialize)]
342pub struct AnalysisRun {
343 pub tool: ToolMetadata,
344 pub environment: EnvironmentMetadata,
345 pub effective_configuration: AppConfig,
346 pub input_roots: Vec<String>,
347 pub summary_totals: SummaryTotals,
348 pub totals_by_language: Vec<LanguageSummary>,
349 pub per_file_records: Vec<FileRecord>,
350 pub skipped_file_records: Vec<FileRecord>,
351 pub warnings: Vec<String>,
352 #[serde(default, skip_serializing_if = "Vec::is_empty")]
354 pub submodule_summaries: Vec<SubmoduleSummary>,
355 #[serde(default, skip_serializing_if = "Option::is_none")]
357 pub git_commit_short: Option<String>,
358 #[serde(default, skip_serializing_if = "Option::is_none")]
360 pub git_commit_long: Option<String>,
361 #[serde(default, skip_serializing_if = "Option::is_none")]
363 pub git_branch: Option<String>,
364 #[serde(default, skip_serializing_if = "Option::is_none")]
366 pub git_commit_author: Option<String>,
367 #[serde(default, skip_serializing_if = "Option::is_none")]
369 pub git_tags: Option<String>,
370 #[serde(default, skip_serializing_if = "Option::is_none")]
372 pub git_nearest_tag: Option<String>,
373 #[serde(default, skip_serializing_if = "Option::is_none")]
375 pub git_commit_date: Option<String>,
376 #[serde(default, skip_serializing_if = "Option::is_none")]
378 pub git_remote_url: Option<String>,
379 #[serde(default, skip_serializing_if = "Option::is_none")]
381 pub style_summary: Option<StyleSummary>,
382 #[serde(default, skip_serializing_if = "Option::is_none")]
384 pub cocomo: Option<CocomoEstimate>,
385 #[serde(default)]
387 pub uloc: u64,
388 #[serde(default, skip_serializing_if = "Option::is_none")]
390 pub dryness_pct: Option<f32>,
391 #[serde(default, skip_serializing_if = "Vec::is_empty")]
393 pub duplicate_groups: Vec<Vec<String>>,
394 #[serde(default)]
396 pub duplicates_excluded: usize,
397}
398
399#[derive(Default)]
400struct GitInfo {
401 commit_short: Option<String>,
402 commit_long: Option<String>,
403 branch: Option<String>,
404 author: Option<String>,
405 tags: Option<String>,
406 nearest_tag: Option<String>,
407 commit_date: Option<String>,
408 remote_url: Option<String>,
409}
410
411fn find_git_dir(start: &Path) -> Option<PathBuf> {
415 let mut current = Some(start);
416 while let Some(dir) = current {
417 let candidate = dir.join(".git");
418 if candidate.is_dir() {
419 return Some(candidate);
420 }
421 if candidate.is_file() {
422 if let Some(resolved) = resolve_git_file_pointer(&candidate, dir) {
423 return Some(resolved);
424 }
425 }
426 current = dir.parent();
427 }
428 None
429}
430
431fn resolve_git_file_pointer(file: &Path, base_dir: &Path) -> Option<PathBuf> {
435 let content = fs::read_to_string(file).ok()?;
436 let ptr = content.trim().strip_prefix("gitdir: ")?;
437 let ptr_native = ptr.replace('/', std::path::MAIN_SEPARATOR_STR);
440 let resolved = if Path::new(&ptr_native).is_absolute() {
441 PathBuf::from(&ptr_native)
442 } else {
443 base_dir.join(&ptr_native)
444 };
445 let final_path = resolved.canonicalize().unwrap_or(resolved);
449 if final_path.is_dir() {
450 Some(final_path)
451 } else {
452 None
453 }
454}
455
456fn resolve_ref(git_dir: &Path, refname: &str) -> Option<String> {
459 let ref_path = refname
463 .split('/')
464 .fold(git_dir.to_path_buf(), |p, c| p.join(c));
465 if ref_path.exists() {
466 let sha = fs::read_to_string(&ref_path)
467 .ok()
468 .map(|s| s.trim().to_string())
469 .filter(|s| s.len() >= 40 && s.chars().all(|c| c.is_ascii_hexdigit()));
470 if sha.is_some() {
471 return sha;
472 }
473 }
474 let packed = fs::read_to_string(git_dir.join("packed-refs")).ok()?;
478 for line in packed.lines() {
479 if line.starts_with('#') || line.starts_with('^') {
480 continue;
481 }
482 let mut cols = line.splitn(2, ' ');
483 let sha = cols.next()?;
484 let name = cols.next()?.trim();
485 if name == refname {
486 return Some(sha.to_string());
487 }
488 }
489 None
490}
491
492fn parse_url_line(line: &str) -> Option<&str> {
494 let rest = line.strip_prefix("url")?;
495 let rest = rest.trim_start_matches([' ', '\t']);
496 let url = rest.strip_prefix('=')?.trim();
497 if url.is_empty() {
498 None
499 } else {
500 Some(url)
501 }
502}
503
504fn read_git_remote_url(git_dir: &Path) -> Option<String> {
506 let config = fs::read_to_string(git_dir.join("config")).ok()?;
507 let mut in_origin = false;
508 for line in config.lines() {
509 let trimmed = line.trim();
510 if trimmed.starts_with('[') {
511 in_origin = trimmed == r#"[remote "origin"]"#;
512 } else if in_origin {
513 if let Some(url) = parse_url_line(trimmed) {
514 return Some(url.to_owned());
515 }
516 }
517 }
518 None
519}
520
521fn detect_git_for_run(project_path: &Path) -> GitInfo {
525 let ci_branch = ci_branch_from_env();
527
528 let Some(git_dir) = find_git_dir(project_path) else {
529 return GitInfo {
532 branch: ci_branch,
533 ..GitInfo::default()
534 };
535 };
536
537 let head_raw = match fs::read_to_string(git_dir.join("HEAD")) {
538 Ok(s) => s.trim().to_string(),
539 Err(_) => {
540 return GitInfo {
541 branch: ci_branch,
542 ..GitInfo::default()
543 }
544 }
545 };
546
547 let (branch_from_head, commit_long) = head_raw.strip_prefix("ref: ").map_or_else(
548 || {
549 if head_raw.len() >= 40 && head_raw.chars().all(|c| c.is_ascii_hexdigit()) {
550 (None, Some(head_raw[..40].to_string()))
552 } else {
553 (None, None)
554 }
555 },
556 |refname| {
557 let branch = refname
558 .strip_prefix("refs/heads/")
559 .map(|b| b.trim().to_string());
560 let sha = resolve_ref(&git_dir, refname.trim());
561 (branch, sha)
562 },
563 );
564 let branch = branch_from_head.or(ci_branch);
567
568 let commit_short = commit_long
569 .as_deref()
570 .map(|s| s.chars().take(7).collect::<String>());
571
572 let author = run_git_cmd(project_path, &["log", "-1", "--format=%an", "HEAD"]);
573 let commit_date = run_git_cmd(project_path, &["log", "-1", "--format=%aI", "HEAD"]);
574 let remote_url = read_git_remote_url(&git_dir);
575
576 let tags = run_git_cmd(project_path, &["tag", "--points-at", "HEAD"]).map(|t| {
579 t.lines()
580 .filter(|l| !l.is_empty())
581 .collect::<Vec<_>>()
582 .join(", ")
583 });
584 let nearest_tag = run_git_cmd(project_path, &["describe", "--tags", "--abbrev=0", "HEAD"]);
585
586 GitInfo {
587 commit_short,
588 commit_long,
589 branch,
590 author,
591 tags,
592 nearest_tag,
593 commit_date,
594 remote_url,
595 }
596}
597
598fn run_git_cmd(dir: &Path, args: &[&str]) -> Option<String> {
600 let candidates: &[&str] = &[
604 "git",
606 "/usr/bin/git",
608 "/usr/local/bin/git",
609 "/opt/homebrew/bin/git",
610 r"C:\Program Files\Git\cmd\git.exe",
612 r"C:\Program Files\Git\bin\git.exe",
613 r"C:\Program Files (x86)\Git\cmd\git.exe",
614 ];
615 for &exe in candidates {
616 let result = std::process::Command::new(exe)
617 .args(["-c", "safe.directory=*"])
618 .args(args)
619 .current_dir(dir)
620 .output()
621 .ok()
622 .filter(|o| o.status.success())
623 .and_then(|o| String::from_utf8(o.stdout).ok())
624 .map(|s| s.trim().to_string())
625 .filter(|s| !s.is_empty());
626 if result.is_some() {
627 return result;
628 }
629 }
630 None
631}
632
633fn detect_ci_system() -> Option<&'static str> {
635 let ev = |k: &str| std::env::var(k).is_ok();
636 let ev_true = |k: &str| std::env::var(k).as_deref() == Ok("true");
637 if ev("JENKINS_URL") || ev("JENKINS_HOME") || ev("BUILD_URL") {
638 return Some("Jenkins");
639 }
640 if ev_true("GITHUB_ACTIONS") {
641 return Some("GitHub Actions");
642 }
643 if ev_true("GITLAB_CI") {
644 return Some("GitLab CI");
645 }
646 if ev_true("CIRCLECI") {
647 return Some("CircleCI");
648 }
649 if ev_true("TRAVIS") {
650 return Some("Travis CI");
651 }
652 if ev_true("TF_BUILD") {
653 return Some("Azure DevOps");
654 }
655 if ev("TEAMCITY_VERSION") {
656 return Some("TeamCity");
657 }
658 None
659}
660
661fn ci_branch_from_env() -> Option<String> {
664 const VARS: &[&str] = &[
665 "BRANCH_NAME", "GIT_BRANCH", "GITHUB_REF_NAME", "CI_COMMIT_BRANCH", "CIRCLE_BRANCH", "TRAVIS_BRANCH", "BUILD_SOURCEBRANCH", ];
673 for &var in VARS {
674 if let Ok(val) = std::env::var(var) {
675 let val = val.trim();
676 let val = val
677 .strip_prefix("refs/heads/")
678 .or_else(|| val.strip_prefix("origin/"))
679 .unwrap_or(val);
680 if !val.is_empty() && val != "HEAD" {
681 return Some(val.to_string());
682 }
683 }
684 }
685 None
686}
687
688fn get_current_username() -> String {
689 std::env::var("USERNAME")
690 .or_else(|_| std::env::var("USER"))
691 .unwrap_or_else(|_| "unknown".to_string())
692}
693
694fn non_empty_env(var: &str) -> Option<String> {
695 let v = std::env::var(var).ok()?;
696 if v.is_empty() {
697 None
698 } else {
699 Some(v)
700 }
701}
702
703fn is_jenkins_env() -> bool {
704 std::env::var("JENKINS_URL").is_ok()
705 || std::env::var("JENKINS_HOME").is_ok()
706 || std::env::var("BUILD_URL").is_ok()
707}
708
709fn get_hostname() -> String {
710 if is_jenkins_env() {
713 if let Some(n) = non_empty_env("NODE_NAME") {
714 return n;
715 }
716 }
717 if std::env::var("GITHUB_ACTIONS").as_deref() == Ok("true") {
718 if let Some(r) = non_empty_env("RUNNER_NAME") {
719 return r;
720 }
721 }
722 if std::env::var("GITLAB_CI").as_deref() == Ok("true") {
723 if let Some(r) = non_empty_env("CI_RUNNER_DESCRIPTION") {
724 return r;
725 }
726 }
727 std::env::var("COMPUTERNAME")
728 .or_else(|_| std::env::var("HOSTNAME"))
729 .or_else(|_| std::fs::read_to_string("/etc/hostname").map(|s| s.trim().to_string()))
730 .unwrap_or_else(|_| "unknown".to_string())
731}
732
733#[allow(clippy::too_many_arguments)]
735fn walk_root(
736 root: &Path,
737 config: &AppConfig,
738 include_globs: Option<&GlobSet>,
739 exclude_globs: Option<&GlobSet>,
740 enabled_languages: Option<&BTreeSet<Language>>,
741 seen_paths: &mut HashSet<PathBuf>,
742 analyzed: &mut Vec<FileRecord>,
743 skipped: &mut Vec<FileRecord>,
744 warnings: &mut Vec<String>,
745 cancel: Option<&AtomicBool>,
746 progress: Option<&ProgressCounters>,
747) -> Result<()> {
748 let mut builder = WalkBuilder::new(root);
749 builder
750 .follow_links(config.discovery.follow_symlinks)
751 .hidden(config.discovery.ignore_hidden_files)
752 .ignore(config.discovery.honor_ignore_files)
753 .parents(config.discovery.honor_ignore_files)
754 .git_ignore(config.discovery.honor_ignore_files)
755 .git_global(config.discovery.honor_ignore_files)
756 .git_exclude(config.discovery.honor_ignore_files);
757
758 let paths = collect_walk_paths(&builder, seen_paths, warnings);
759 if paths.is_empty() {
760 return Ok(());
761 }
762
763 if let Some(p) = progress {
764 p.files_total.fetch_add(paths.len(), Ordering::Relaxed);
765 }
766
767 let chunk_results = run_parallel_analysis(
768 &paths,
769 root,
770 config,
771 include_globs,
772 exclude_globs,
773 enabled_languages,
774 cancel,
775 progress,
776 )?;
777 merge_chunk_results(chunk_results, analyzed, skipped, warnings)
778}
779
780fn collect_walk_paths(
781 builder: &WalkBuilder,
782 seen_paths: &mut HashSet<PathBuf>,
783 warnings: &mut Vec<String>,
784) -> Vec<PathBuf> {
785 let (tx, rx) = std::sync::mpsc::channel::<std::result::Result<PathBuf, String>>();
789
790 builder.build_parallel().run(|| {
791 let tx = tx.clone();
792 Box::new(move |entry| {
793 match entry {
794 Err(e) => {
795 let _ = tx.send(Err(format!("discovery warning: {e}")));
796 }
797 Ok(e) => {
798 let path = e.into_path();
799 if !path.is_dir() {
800 let _ = tx.send(Ok(path));
801 }
802 }
803 }
804 ignore::WalkState::Continue
805 })
806 });
807
808 drop(tx);
811
812 rx.into_iter()
813 .filter_map(|msg| match msg {
814 Ok(path) => {
815 if seen_paths.insert(path.clone()) {
816 Some(path)
817 } else {
818 None
819 }
820 }
821 Err(warn) => {
822 warnings.push(warn);
823 None
824 }
825 })
826 .collect()
827}
828
829#[allow(clippy::too_many_arguments)]
831fn worker_loop(
832 paths: &[PathBuf],
833 root: &Path,
834 config: &AppConfig,
835 include_globs: Option<&GlobSet>,
836 exclude_globs: Option<&GlobSet>,
837 enabled_languages: Option<&BTreeSet<Language>>,
838 cancel: Option<&AtomicBool>,
839 next_index: &AtomicUsize,
840 files_done: Option<&AtomicUsize>,
841) -> Vec<Result<Option<FileRecord>>> {
842 let mut results = Vec::new();
843 loop {
844 if cancel.is_some_and(|c| c.load(Ordering::Relaxed)) {
845 results.push(Err(anyhow::anyhow!("analysis cancelled")));
846 break;
847 }
848 let i = next_index.fetch_add(1, Ordering::Relaxed);
849 if i >= paths.len() {
850 break;
851 }
852 results.push(analyze_candidate_file(
853 &paths[i],
854 root,
855 config,
856 include_globs,
857 exclude_globs,
858 enabled_languages,
859 ));
860 if let Some(fd) = files_done {
861 fd.fetch_add(1, Ordering::Relaxed);
862 }
863 }
864 results
865}
866
867#[allow(clippy::too_many_arguments)]
868fn run_parallel_analysis(
869 paths: &[PathBuf],
870 root: &Path,
871 config: &AppConfig,
872 include_globs: Option<&GlobSet>,
873 exclude_globs: Option<&GlobSet>,
874 enabled_languages: Option<&BTreeSet<Language>>,
875 cancel: Option<&AtomicBool>,
876 progress: Option<&ProgressCounters>,
877) -> Result<Vec<Vec<Result<Option<FileRecord>>>>> {
878 let thread_count = std::thread::available_parallelism().map_or(DEFAULT_ANALYSIS_THREADS, |n| {
879 n.get().min(MAX_ANALYSIS_THREADS)
880 });
881 let next_index = AtomicUsize::new(0);
885 let files_done: Option<&AtomicUsize> = progress.map(|p| p.files_done.as_ref());
886
887 std::thread::scope(|s| -> Result<Vec<Vec<Result<Option<FileRecord>>>>> {
888 let mut handles = Vec::with_capacity(thread_count);
891 for _ in 0..thread_count {
892 handles.push(s.spawn(|| {
893 worker_loop(
894 paths,
895 root,
896 config,
897 include_globs,
898 exclude_globs,
899 enabled_languages,
900 cancel,
901 &next_index,
902 files_done,
903 )
904 }));
905 }
906 handles
907 .into_iter()
908 .map(|h| {
909 h.join()
910 .map_err(|_| anyhow::anyhow!("analysis thread panicked"))
911 })
912 .collect()
913 })
914}
915
916fn merge_chunk_results(
917 chunk_results: Vec<Vec<Result<Option<FileRecord>>>>,
918 analyzed: &mut Vec<FileRecord>,
919 skipped: &mut Vec<FileRecord>,
920 warnings: &mut Vec<String>,
921) -> Result<()> {
922 for chunk in chunk_results {
923 for result in chunk {
924 if let Some(record) = result? {
925 push_record(record, analyzed, skipped, warnings);
926 }
927 }
928 }
929 Ok(())
930}
931
932fn process_submodules(config: &AppConfig, analyzed: &mut [FileRecord]) -> Vec<SubmoduleSummary> {
934 let root = config.discovery.root_paths[0]
935 .canonicalize()
936 .unwrap_or_else(|_| config.discovery.root_paths[0].clone());
937 let submodules = detect_submodules(&root);
938 if submodules.is_empty() {
939 return Vec::new();
940 }
941
942 for file in analyzed.iter_mut() {
943 for (name, sub_path) in &submodules {
944 let prefix = sub_path.to_string_lossy().replace('\\', "/");
945 let rel = &file.relative_path;
946 if rel == &prefix || rel.starts_with(&format!("{prefix}/")) {
947 file.submodule = Some(name.clone());
948 break;
949 }
950 }
951 }
952
953 build_submodule_summaries(analyzed, &submodules, &root)
954}
955
956#[allow(clippy::cast_precision_loss)] fn compute_cocomo(code_lines: u64, mode: CocomoMode) -> CocomoEstimate {
959 let ksloc = code_lines as f64 / 1_000.0;
960 let (a, b, c, d): (f64, f64, f64, f64) = match mode {
961 CocomoMode::Organic => (2.4, 1.05, 2.5, 0.38),
962 CocomoMode::SemiDetached => (3.0, 1.12, 2.5, 0.35),
963 CocomoMode::Embedded => (3.6, 1.20, 2.5, 0.32),
964 };
965 let effort = a * ksloc.powf(b);
966 let duration = c * effort.powf(d);
967 let avg_staff = if duration > 0.0 {
968 effort / duration
969 } else {
970 0.0
971 };
972 CocomoEstimate {
974 mode,
975 ksloc: (ksloc * 100.0).round() / 100.0,
976 effort_person_months: (effort * 100.0).round() / 100.0,
977 duration_months: (duration * 100.0).round() / 100.0,
978 avg_staff: (avg_staff * 100.0).round() / 100.0,
979 }
980}
981
982#[allow(clippy::cast_precision_loss)] fn compute_uloc(analyzed: &[FileRecord]) -> (u64, Option<f32>) {
985 use std::collections::HashSet as StdHashSet;
986 let mut unique: StdHashSet<u64> = StdHashSet::new();
987 let mut total_code: u64 = 0;
988 for record in analyzed {
989 total_code += record.effective_counts.code_lines;
990 for &hash in &record.raw_line_categories.code_line_hashes {
991 unique.insert(hash);
992 }
993 }
994 let uloc = unique.len() as u64;
995 let dryness = if total_code > 0 {
996 Some((uloc as f32 / total_code as f32) * 100.0)
997 } else {
998 None
999 };
1000 (uloc, dryness)
1001}
1002
1003fn find_duplicate_groups(analyzed: &[FileRecord]) -> Vec<Vec<String>> {
1006 let mut by_hash: std::collections::HashMap<u64, Vec<&str>> = std::collections::HashMap::new();
1007 for record in analyzed {
1008 if record.content_hash != 0 {
1009 by_hash
1010 .entry(record.content_hash)
1011 .or_default()
1012 .push(&record.relative_path);
1013 }
1014 }
1015 let mut groups: Vec<Vec<String>> = by_hash
1016 .into_values()
1017 .filter(|v| v.len() >= 2)
1018 .map(|v| {
1019 let mut paths: Vec<String> = v.into_iter().map(str::to_owned).collect();
1020 paths.sort();
1021 paths
1022 })
1023 .collect();
1024 groups.sort_by(|a, b| a[0].cmp(&b[0]));
1025 groups
1026}
1027
1028fn assemble_run(
1030 config: &AppConfig,
1031 runtime_mode: &str,
1032 analyzed: Vec<FileRecord>,
1033 skipped: Vec<FileRecord>,
1034 warnings: Vec<String>,
1035 submodule_summaries: Vec<SubmoduleSummary>,
1036) -> AnalysisRun {
1037 let summary = build_summary(&analyzed, &skipped);
1038 let language_summaries = build_language_summaries(&analyzed);
1039 let col_threshold = config.analysis.style_col_threshold;
1040 let style_summary = build_style_summary(&analyzed, col_threshold);
1041
1042 let (uloc, dryness_pct) = compute_uloc(&analyzed);
1044 let duplicate_groups = find_duplicate_groups(&analyzed);
1045 let cocomo = if summary.code_lines > 0 {
1046 Some(compute_cocomo(summary.code_lines, CocomoMode::Organic))
1047 } else {
1048 None
1049 };
1050
1051 let first_root = config
1052 .discovery
1053 .root_paths
1054 .first()
1055 .map(|p| p.canonicalize().unwrap_or_else(|_| p.clone()));
1056 let git = first_root
1057 .as_deref()
1058 .map(detect_git_for_run)
1059 .unwrap_or_default();
1060
1061 let now = Utc::now();
1062 let run_id = {
1063 let uuid_suffix = Uuid::new_v4().simple().to_string();
1064 format!("{}-{}", now.format("%Y%m%d-%H%M"), uuid_suffix)
1065 };
1066
1067 AnalysisRun {
1068 tool: ToolMetadata {
1069 name: "sloc".into(),
1070 version: env!("CARGO_PKG_VERSION").into(),
1071 run_id,
1072 timestamp_utc: now,
1073 },
1074 environment: EnvironmentMetadata {
1075 operating_system: std::env::consts::OS.into(),
1076 architecture: std::env::consts::ARCH.into(),
1077 runtime_mode: runtime_mode.into(),
1078 initiator_username: get_current_username(),
1079 initiator_hostname: get_hostname(),
1080 ci_name: if is_jenkins_env() {
1081 Some(format!("Jenkins\t{}", get_hostname()))
1082 } else {
1083 detect_ci_system().map(str::to_string)
1084 },
1085 },
1086 effective_configuration: config.clone(),
1087 input_roots: config
1088 .discovery
1089 .root_paths
1090 .iter()
1091 .map(|p| path_to_string(p))
1092 .collect(),
1093 summary_totals: summary,
1094 totals_by_language: language_summaries,
1095 per_file_records: analyzed,
1096 skipped_file_records: skipped,
1097 warnings,
1098 submodule_summaries,
1099 git_commit_short: git.commit_short,
1100 git_commit_long: git.commit_long,
1101 git_branch: git.branch,
1102 git_commit_author: git.author,
1103 git_tags: git.tags,
1104 git_nearest_tag: git.nearest_tag,
1105 git_commit_date: git.commit_date,
1106 git_remote_url: git.remote_url,
1107 style_summary,
1108 cocomo,
1109 uloc,
1110 dryness_pct,
1111 duplicate_groups,
1112 duplicates_excluded: 0,
1113 }
1114}
1115
1116#[allow(clippy::too_many_lines)]
1121pub fn analyze(
1122 config: &AppConfig,
1123 runtime_mode: &str,
1124 cancel: Option<&AtomicBool>,
1125 progress: Option<&ProgressCounters>,
1126) -> Result<AnalysisRun> {
1127 config.validate()?;
1128
1129 if config.discovery.root_paths.is_empty() {
1130 anyhow::bail!("no input paths were provided");
1131 }
1132
1133 let include_globs = compile_globset(&config.discovery.include_globs)?;
1134 let exclude_globs = compile_globset(&config.discovery.exclude_globs)?;
1135 let enabled_languages = parse_enabled_languages(&config.analysis.enabled_languages)?;
1136
1137 let mut analyzed = Vec::new();
1138 let mut skipped = Vec::new();
1139 let mut warnings = Vec::new();
1140 let mut seen_paths = HashSet::new();
1141
1142 for root in &config.discovery.root_paths {
1143 if cancel.is_some_and(|c| c.load(Ordering::Relaxed)) {
1144 anyhow::bail!("analysis cancelled");
1145 }
1146
1147 let root = root.canonicalize().unwrap_or_else(|_| root.clone());
1148
1149 if root.is_file() {
1150 if let Some(record) = analyze_candidate_file(
1151 &root,
1152 root.parent().unwrap_or_else(|| Path::new(".")),
1153 config,
1154 include_globs.as_ref(),
1155 exclude_globs.as_ref(),
1156 enabled_languages.as_ref(),
1157 )? {
1158 push_record(record, &mut analyzed, &mut skipped, &mut warnings);
1159 }
1160 continue;
1161 }
1162
1163 walk_root(
1164 &root,
1165 config,
1166 include_globs.as_ref(),
1167 exclude_globs.as_ref(),
1168 enabled_languages.as_ref(),
1169 &mut seen_paths,
1170 &mut analyzed,
1171 &mut skipped,
1172 &mut warnings,
1173 cancel,
1174 progress,
1175 )?;
1176 }
1177
1178 analyzed.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
1179 skipped.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
1180
1181 let submodule_summaries = if config.discovery.submodule_breakdown {
1183 process_submodules(config, &mut analyzed)
1184 } else {
1185 Vec::new()
1186 };
1187
1188 attach_coverage(config, &mut analyzed, &mut warnings);
1189
1190 Ok(assemble_run(
1191 config,
1192 runtime_mode,
1193 analyzed,
1194 skipped,
1195 warnings,
1196 submodule_summaries,
1197 ))
1198}
1199
1200fn attach_coverage(config: &AppConfig, analyzed: &mut [FileRecord], warnings: &mut Vec<String>) {
1201 let Some(cov_path) = coverage::resolve_coverage_file(config.analysis.coverage_file.as_deref())
1202 else {
1203 return;
1204 };
1205 tracing::debug!(path = %cov_path.display(), "loading coverage file");
1206 match fs::read_to_string(&cov_path) {
1207 Ok(content) => {
1208 let cov_map = coverage::parse_coverage_auto(&cov_path, &content);
1209 let mut matched: u32 = 0;
1210 let mut unmatched: u32 = 0;
1211 for record in analyzed.iter_mut() {
1212 record.coverage =
1213 coverage::lookup_coverage(&cov_map, &record.relative_path).cloned();
1214 if record.coverage.is_some() {
1215 matched += 1;
1216 } else {
1217 unmatched += 1;
1218 }
1219 }
1220 tracing::debug!(
1221 path = %cov_path.display(),
1222 coverage_entries = cov_map.len(),
1223 files_matched = matched,
1224 files_unmatched = unmatched,
1225 "coverage attached"
1226 );
1227 if unmatched > 0 && matched == 0 {
1228 tracing::warn!(
1229 path = %cov_path.display(),
1230 "coverage file loaded but no source files could be matched — check that paths in the coverage report match the scanned directory"
1231 );
1232 }
1233 }
1234 Err(e) => {
1235 tracing::warn!(path = %cov_path.display(), error = %e, "coverage file could not be read");
1236 warnings.push(format!(
1237 "coverage file '{}' could not be read: {e}",
1238 cov_path.display()
1239 ));
1240 }
1241 }
1242}
1243
1244fn push_record(
1245 record: FileRecord,
1246 analyzed: &mut Vec<FileRecord>,
1247 skipped: &mut Vec<FileRecord>,
1248 warnings: &mut Vec<String>,
1249) {
1250 warnings.extend(
1251 record
1252 .warnings
1253 .iter()
1254 .map(|warning| format!("{}: {warning}", record.relative_path)),
1255 );
1256
1257 match record.status {
1258 FileStatus::AnalyzedExact | FileStatus::AnalyzedBestEffort => analyzed.push(record),
1259 _ => skipped.push(record),
1260 }
1261}
1262
1263#[inline]
1265fn skip_with_reason(
1266 path: &Path,
1267 root: &Path,
1268 size: u64,
1269 reason: impl Into<String>,
1270) -> MetadataPolicyOutcome {
1271 MetadataPolicyOutcome::Skip(Box::new(skipped_record(
1272 path,
1273 root,
1274 size,
1275 FileStatus::SkippedByPolicy,
1276 vec![reason.into()],
1277 )))
1278}
1279
1280#[allow(clippy::too_many_arguments)]
1284fn check_metadata_policy(
1285 path: &Path,
1286 root: &Path,
1287 relative_path: &str,
1288 metadata: &fs::Metadata,
1289 config: &AppConfig,
1290 include_globs: Option<&GlobSet>,
1291 exclude_globs: Option<&GlobSet>,
1292) -> MetadataPolicyOutcome {
1293 let size = metadata.len();
1294
1295 if metadata.file_type().is_symlink() && !config.discovery.follow_symlinks {
1296 return skip_with_reason(path, root, size, "symlink skipped by policy");
1297 }
1298 if file_name_eq(path, ".gitignore") {
1299 return skip_with_reason(path, root, size, ".gitignore is always excluded");
1300 }
1301 if is_excluded_dir_path(path, &config.discovery.excluded_directories) {
1302 return skip_with_reason(path, root, size, "path matched excluded directory setting");
1303 }
1304 if size > config.discovery.max_file_size_bytes {
1305 return skip_with_reason(
1306 path,
1307 root,
1308 size,
1309 format!(
1310 "file exceeded max_file_size_bytes ({})",
1311 config.discovery.max_file_size_bytes
1312 ),
1313 );
1314 }
1315 if let Some(globs) = include_globs {
1316 if !globs.is_match(Path::new(relative_path)) && !globs.is_match(path) {
1317 return MetadataPolicyOutcome::Exclude;
1318 }
1319 }
1320 if let Some(globs) = exclude_globs {
1321 if globs.is_match(Path::new(relative_path)) || globs.is_match(path) {
1322 return skip_with_reason(path, root, size, "path matched exclude glob");
1323 }
1324 }
1325 if is_known_lockfile(path) && !config.analysis.include_lockfiles {
1326 return skip_with_reason(path, root, size, "lockfile skipped by default policy");
1327 }
1328
1329 MetadataPolicyOutcome::Continue
1330}
1331
1332struct ContentPolicyResult {
1333 vendor: bool,
1334 generated: bool,
1335 minified: bool,
1336 skip_record: Option<FileRecord>,
1337}
1338
1339fn check_content_policy(
1342 path: &Path,
1343 root: &Path,
1344 size_bytes: u64,
1345 bytes: &[u8],
1346 config: &AppConfig,
1347) -> ContentPolicyResult {
1348 let vendor = is_vendor_path(path);
1349 if vendor && config.analysis.vendor_directory_detection {
1350 return ContentPolicyResult {
1351 vendor,
1352 generated: false,
1353 minified: false,
1354 skip_record: Some(skipped_record(
1355 path,
1356 root,
1357 size_bytes,
1358 FileStatus::SkippedByPolicy,
1359 vec!["vendor file skipped by policy".into()],
1360 )),
1361 };
1362 }
1363
1364 let generated = config.analysis.generated_file_detection && looks_generated(path, bytes);
1365 if generated {
1366 return ContentPolicyResult {
1367 vendor,
1368 generated,
1369 minified: false,
1370 skip_record: Some(skipped_record(
1371 path,
1372 root,
1373 size_bytes,
1374 FileStatus::SkippedByPolicy,
1375 vec!["generated file skipped by policy".into()],
1376 )),
1377 };
1378 }
1379
1380 let minified = config.analysis.minified_file_detection && looks_minified(path, bytes);
1381 if minified {
1382 return ContentPolicyResult {
1383 vendor,
1384 generated,
1385 minified,
1386 skip_record: Some(skipped_record(
1387 path,
1388 root,
1389 size_bytes,
1390 FileStatus::SkippedByPolicy,
1391 vec!["minified file skipped by policy".into()],
1392 )),
1393 };
1394 }
1395
1396 ContentPolicyResult {
1397 vendor,
1398 generated,
1399 minified,
1400 skip_record: None,
1401 }
1402}
1403
1404fn decode_file_contents(
1406 path: &Path,
1407 root: &Path,
1408 size_bytes: u64,
1409 bytes: &[u8],
1410 config: &AppConfig,
1411) -> Result<Option<(String, String, Vec<String>)>> {
1412 if is_binary(bytes) {
1413 return match config.analysis.binary_file_behavior {
1414 BinaryFileBehavior::Skip => Ok(None),
1415 BinaryFileBehavior::Fail => {
1416 anyhow::bail!("binary file encountered: {}", path.display())
1417 }
1418 };
1419 }
1420
1421 match decode_bytes(bytes) {
1422 Ok(result) => Ok(Some(result)),
1423 Err(err) => match config.analysis.decode_failure_behavior {
1424 FailureBehavior::WarnSkip => {
1425 let _ = (path, root, size_bytes); Err(anyhow::anyhow!("__decode_warn__: {err}"))
1430 }
1431 FailureBehavior::Fail => {
1432 anyhow::bail!("decode failure for {}: {err}", path.display())
1433 }
1434 },
1435 }
1436}
1437
1438#[allow(clippy::too_many_lines)]
1439fn analyze_candidate_file(
1440 path: &Path,
1441 root: &Path,
1442 config: &AppConfig,
1443 include_globs: Option<&GlobSet>,
1444 exclude_globs: Option<&GlobSet>,
1445 enabled_languages: Option<&BTreeSet<Language>>,
1446) -> Result<Option<FileRecord>> {
1447 let metadata = match fs::symlink_metadata(path) {
1448 Ok(metadata) => metadata,
1449 Err(err) => {
1450 return Ok(Some(skipped_record(
1451 path,
1452 root,
1453 0,
1454 FileStatus::ErrorInternal,
1455 vec![format!("failed to read metadata: {err}")],
1456 )));
1457 }
1458 };
1459
1460 let relative_path = relative_path_string(path, root);
1461
1462 match check_metadata_policy(
1464 path,
1465 root,
1466 &relative_path,
1467 &metadata,
1468 config,
1469 include_globs,
1470 exclude_globs,
1471 ) {
1472 MetadataPolicyOutcome::Skip(record) => return Ok(Some(*record)),
1473 MetadataPolicyOutcome::Exclude => return Ok(None),
1474 MetadataPolicyOutcome::Continue => {}
1475 }
1476
1477 let bytes = match fs::read(path) {
1478 Ok(bytes) => bytes,
1479 Err(err) => {
1480 return Ok(Some(skipped_record(
1481 path,
1482 root,
1483 metadata.len(),
1484 FileStatus::ErrorInternal,
1485 vec![format!("failed to read file: {err}")],
1486 )));
1487 }
1488 };
1489
1490 let content_policy = check_content_policy(path, root, metadata.len(), &bytes, config);
1492 if let Some(record) = content_policy.skip_record {
1493 return Ok(Some(record));
1494 }
1495 let (vendor, generated, minified) = (
1496 content_policy.vendor,
1497 content_policy.generated,
1498 content_policy.minified,
1499 );
1500
1501 let (text, encoding, decode_warnings) =
1503 match decode_file_contents(path, root, metadata.len(), &bytes, config) {
1504 Ok(Some(result)) => result,
1505 Ok(None) => {
1506 return Ok(Some(skipped_record(
1507 path,
1508 root,
1509 metadata.len(),
1510 FileStatus::SkippedBinary,
1511 vec!["binary file skipped by default".into()],
1512 )));
1513 }
1514 Err(err) => {
1515 let msg = err.to_string();
1516 if let Some(warn_msg) = msg.strip_prefix("__decode_warn__: ") {
1517 return Ok(Some(skipped_record(
1518 path,
1519 root,
1520 metadata.len(),
1521 FileStatus::SkippedDecodeError,
1522 vec![warn_msg.to_string()],
1523 )));
1524 }
1525 return Err(err);
1526 }
1527 };
1528
1529 let first_line = text.lines().next();
1530 let language = detect_language(
1531 path,
1532 first_line,
1533 &config.analysis.extension_overrides,
1534 config.analysis.shebang_detection,
1535 );
1536
1537 let Some(language) = language else {
1538 return Ok(Some(skipped_record(
1539 path,
1540 root,
1541 metadata.len(),
1542 FileStatus::SkippedUnsupported,
1543 vec!["unsupported or undetected language".into()],
1544 )));
1545 };
1546
1547 if let Some(enabled) = enabled_languages {
1548 if !enabled.contains(&language) {
1549 return Ok(Some(skipped_record(
1550 path,
1551 root,
1552 metadata.len(),
1553 FileStatus::SkippedByPolicy,
1554 vec![format!(
1555 "language {} disabled by configuration",
1556 language.display_name()
1557 )],
1558 )));
1559 }
1560 }
1561
1562 let style_scope = match config.analysis.style_lang_scope.as_str() {
1563 "c_family" => StyleLangScope::CFamilyOnly,
1564 _ => StyleLangScope::All,
1565 };
1566 let ieee_opts = AnalysisOptions {
1567 blank_in_block_comment_as_comment: config.analysis.blank_in_block_comment_policy
1568 == BlankInBlockCommentPolicy::CountAsComment,
1569 collapse_continuation_lines: config.analysis.continuation_line_policy
1570 == ContinuationLinePolicy::CollapseToLogical,
1571 enable_style: config.analysis.style_analysis_enabled,
1572 style_lang_scope: style_scope,
1573 };
1574 let analysis = analyze_text(language, &text, ieee_opts);
1575 let effective_counts = compute_effective_counts(
1576 &analysis.raw,
1577 config.analysis.mixed_line_policy,
1578 config.analysis.python_docstrings_as_comments,
1579 config.analysis.count_compiler_directives,
1580 );
1581
1582 let mut warnings = decode_warnings;
1583 warnings.extend(analysis.warnings.clone());
1584
1585 let content_hash = {
1587 use std::hash::{DefaultHasher, Hash, Hasher};
1588 let mut h = DefaultHasher::new();
1589 bytes.hash(&mut h);
1590 h.finish()
1591 };
1592
1593 let cyclomatic_complexity = if analysis.raw.cyclomatic_complexity > 0 {
1595 Some(analysis.raw.cyclomatic_complexity)
1596 } else {
1597 None
1598 };
1599 let lsloc = analysis.raw.lsloc;
1600
1601 Ok(Some(FileRecord {
1602 path: path_to_string(path),
1603 relative_path,
1604 language: Some(language),
1605 size_bytes: metadata.len(),
1606 detected_encoding: Some(encoding),
1607 raw_line_categories: analysis.raw,
1608 effective_counts,
1609 status: match analysis.parse_mode {
1610 ParseMode::Lexical | ParseMode::TreeSitter => FileStatus::AnalyzedExact,
1611 ParseMode::LexicalBestEffort => FileStatus::AnalyzedBestEffort,
1612 },
1613 warnings,
1614 generated,
1615 minified,
1616 vendor,
1617 parse_mode: Some(analysis.parse_mode),
1618 submodule: None,
1619 coverage: None,
1620 style_analysis: analysis.style_analysis,
1621 cyclomatic_complexity,
1622 lsloc,
1623 content_hash,
1624 }))
1625}
1626
1627const fn compute_effective_counts(
1628 raw: &RawLineCounts,
1629 mixed_line_policy: MixedLinePolicy,
1630 python_docstrings_as_comments: bool,
1631 count_compiler_directives: bool,
1632) -> EffectiveCounts {
1633 let mut effective = EffectiveCounts {
1634 code_lines: raw.code_only_lines,
1635 comment_lines: raw.single_comment_only_lines + raw.multi_comment_only_lines,
1636 blank_lines: raw.blank_only_lines,
1637 mixed_lines_separate: 0,
1638 };
1639
1640 if python_docstrings_as_comments {
1641 effective.comment_lines += raw.docstring_comment_lines;
1642 } else {
1643 effective.code_lines += raw.docstring_comment_lines;
1644 }
1645
1646 let mixed_total = raw.mixed_code_single_comment_lines + raw.mixed_code_multi_comment_lines;
1647 match mixed_line_policy {
1648 MixedLinePolicy::CodeOnly => effective.code_lines += mixed_total,
1649 MixedLinePolicy::CodeAndComment => {
1650 effective.code_lines += mixed_total;
1651 effective.comment_lines += mixed_total;
1652 }
1653 MixedLinePolicy::CommentOnly => effective.comment_lines += mixed_total,
1654 MixedLinePolicy::SeparateMixedCategory => effective.mixed_lines_separate += mixed_total,
1655 }
1656
1657 if !count_compiler_directives {
1660 effective.code_lines = effective
1661 .code_lines
1662 .saturating_sub(raw.compiler_directive_lines);
1663 }
1664
1665 effective
1666}
1667
1668fn build_summary(analyzed: &[FileRecord], skipped: &[FileRecord]) -> SummaryTotals {
1669 let mut summary = SummaryTotals {
1670 files_considered: (analyzed.len() + skipped.len()) as u64,
1671 files_analyzed: analyzed.len() as u64,
1672 files_skipped: skipped.len() as u64,
1673 ..Default::default()
1674 };
1675
1676 for record in analyzed {
1677 summary.total_physical_lines += record.raw_line_categories.total_physical_lines;
1678 summary.code_lines += record.effective_counts.code_lines;
1679 summary.comment_lines += record.effective_counts.comment_lines;
1680 summary.blank_lines += record.effective_counts.blank_lines;
1681 summary.mixed_lines_separate += record.effective_counts.mixed_lines_separate;
1682 summary.functions += record.raw_line_categories.functions;
1683 summary.classes += record.raw_line_categories.classes;
1684 summary.variables += record.raw_line_categories.variables;
1685 summary.imports += record.raw_line_categories.imports;
1686 summary.test_count += record.raw_line_categories.test_count;
1687 summary.test_assertion_count += record.raw_line_categories.test_assertion_count;
1688 summary.test_suite_count += record.raw_line_categories.test_suite_count;
1689 summary.cyclomatic_complexity +=
1690 u64::from(record.raw_line_categories.cyclomatic_complexity);
1691 if let Some(lsloc) = record.raw_line_categories.lsloc {
1692 *summary.lsloc.get_or_insert(0) += u64::from(lsloc);
1693 }
1694 if let Some(cov) = &record.coverage {
1695 summary.coverage_lines_found += u64::from(cov.lines_found);
1696 summary.coverage_lines_hit += u64::from(cov.lines_hit);
1697 summary.coverage_functions_found += u64::from(cov.functions_found);
1698 summary.coverage_functions_hit += u64::from(cov.functions_hit);
1699 summary.coverage_branches_found += u64::from(cov.branches_found);
1700 summary.coverage_branches_hit += u64::from(cov.branches_hit);
1701 }
1702 }
1703
1704 summary
1705}
1706
1707const fn zeroed_summary(language: Language) -> LanguageSummary {
1709 LanguageSummary {
1710 language,
1711 files: 0,
1712 total_physical_lines: 0,
1713 code_lines: 0,
1714 comment_lines: 0,
1715 blank_lines: 0,
1716 mixed_lines_separate: 0,
1717 functions: 0,
1718 classes: 0,
1719 variables: 0,
1720 imports: 0,
1721 test_count: 0,
1722 test_assertion_count: 0,
1723 test_suite_count: 0,
1724 coverage_lines_found: 0,
1725 coverage_lines_hit: 0,
1726 coverage_functions_found: 0,
1727 coverage_functions_hit: 0,
1728 coverage_branches_found: 0,
1729 coverage_branches_hit: 0,
1730 cyclomatic_complexity: 0,
1731 lsloc: None,
1732 }
1733}
1734
1735fn accumulate_record_into_summary(entry: &mut LanguageSummary, record: &FileRecord) {
1737 entry.files += 1;
1738 let r = &record.raw_line_categories;
1739 entry.total_physical_lines += r.total_physical_lines;
1740 entry.code_lines += record.effective_counts.code_lines;
1741 entry.comment_lines += record.effective_counts.comment_lines;
1742 entry.blank_lines += record.effective_counts.blank_lines;
1743 entry.mixed_lines_separate += record.effective_counts.mixed_lines_separate;
1744 entry.functions += r.functions;
1745 entry.classes += r.classes;
1746 entry.variables += r.variables;
1747 entry.imports += r.imports;
1748 entry.test_count += r.test_count;
1749 entry.test_assertion_count += r.test_assertion_count;
1750 entry.test_suite_count += r.test_suite_count;
1751 entry.cyclomatic_complexity += u64::from(r.cyclomatic_complexity);
1752 if let Some(lsloc) = r.lsloc {
1753 *entry.lsloc.get_or_insert(0) += u64::from(lsloc);
1754 }
1755 if let Some(cov) = &record.coverage {
1756 entry.coverage_lines_found += u64::from(cov.lines_found);
1757 entry.coverage_lines_hit += u64::from(cov.lines_hit);
1758 entry.coverage_functions_found += u64::from(cov.functions_found);
1759 entry.coverage_functions_hit += u64::from(cov.functions_hit);
1760 entry.coverage_branches_found += u64::from(cov.branches_found);
1761 entry.coverage_branches_hit += u64::from(cov.branches_hit);
1762 }
1763}
1764
1765fn build_language_summaries(analyzed: &[FileRecord]) -> Vec<LanguageSummary> {
1766 let mut by_language: BTreeMap<Language, LanguageSummary> = BTreeMap::new();
1767 for record in analyzed {
1768 let Some(language) = record.language else {
1769 continue;
1770 };
1771 let entry = by_language
1772 .entry(language)
1773 .or_insert_with(|| zeroed_summary(language));
1774 accumulate_record_into_summary(entry, record);
1775 }
1776 by_language.into_values().collect()
1777}
1778
1779fn skipped_record(
1780 path: &Path,
1781 root: &Path,
1782 size_bytes: u64,
1783 status: FileStatus,
1784 warnings: Vec<String>,
1785) -> FileRecord {
1786 FileRecord {
1787 path: path_to_string(path),
1788 relative_path: relative_path_string(path, root),
1789 language: None,
1790 size_bytes,
1791 detected_encoding: None,
1792 raw_line_categories: RawLineCounts::default(),
1793 effective_counts: EffectiveCounts::default(),
1794 status,
1795 warnings,
1796 generated: false,
1797 minified: false,
1798 vendor: false,
1799 parse_mode: None,
1800 submodule: None,
1801 coverage: None,
1802 style_analysis: None,
1803 cyclomatic_complexity: None,
1804 lsloc: None,
1805 content_hash: 0,
1806 }
1807}
1808
1809fn relative_path_string(path: &Path, root: &Path) -> String {
1810 path.strip_prefix(root)
1811 .unwrap_or(path)
1812 .to_string_lossy()
1813 .replace('\\', "/")
1814}
1815
1816fn path_to_string(path: &Path) -> String {
1817 path.to_string_lossy().replace('\\', "/")
1818}
1819
1820#[must_use]
1822pub fn detect_submodules(root: &Path) -> Vec<(String, PathBuf)> {
1823 let gitmodules = root.join(".gitmodules");
1824 if !gitmodules.is_file() {
1825 return Vec::new();
1826 }
1827 let Ok(content) = fs::read_to_string(&gitmodules) else {
1828 return Vec::new();
1829 };
1830
1831 let mut result = Vec::new();
1832 let mut current_name: Option<String> = None;
1833 let mut current_path: Option<PathBuf> = None;
1834
1835 for line in content.lines() {
1836 let trimmed = line.trim();
1837 if trimmed.starts_with("[submodule \"") && trimmed.ends_with("\"]") {
1838 if let (Some(name), Some(path)) = (current_name.take(), current_path.take()) {
1839 result.push((name, path));
1840 }
1841 let name = trimmed["[submodule \"".len()..trimmed.len() - 2].to_string();
1842 current_name = Some(name);
1843 } else if let Some(rest) = trimmed.strip_prefix("path") {
1844 if let Some(eq_pos) = rest.find('=') {
1845 let path_str = rest[eq_pos + 1..].trim();
1846 current_path = Some(PathBuf::from(path_str));
1847 }
1848 }
1849 }
1850 if let (Some(name), Some(path)) = (current_name, current_path) {
1851 result.push((name, path));
1852 }
1853
1854 result
1855}
1856
1857fn build_submodule_summaries(
1858 analyzed: &[FileRecord],
1859 submodules: &[(String, PathBuf)],
1860 root: &Path,
1861) -> Vec<SubmoduleSummary> {
1862 submodules
1863 .iter()
1864 .map(|(name, path)| {
1865 let files: Vec<&FileRecord> = analyzed
1866 .iter()
1867 .filter(|f| f.submodule.as_deref() == Some(name.as_str()))
1868 .collect();
1869
1870 let files_analyzed = files.len() as u64;
1871 let total_physical_lines = files
1872 .iter()
1873 .map(|f| f.raw_line_categories.total_physical_lines)
1874 .sum();
1875 let code_lines = files.iter().map(|f| f.effective_counts.code_lines).sum();
1876 let comment_lines = files.iter().map(|f| f.effective_counts.comment_lines).sum();
1877 let blank_lines = files.iter().map(|f| f.effective_counts.blank_lines).sum();
1878 let language_summaries = build_language_summaries_from_slice(&files);
1879
1880 let git = detect_git_for_run(&root.join(path));
1881
1882 SubmoduleSummary {
1883 name: name.clone(),
1884 relative_path: path.to_string_lossy().replace('\\', "/"),
1885 files_analyzed,
1886 total_physical_lines,
1887 code_lines,
1888 comment_lines,
1889 blank_lines,
1890 language_summaries,
1891 git_commit_short: git.commit_short,
1892 git_commit_long: git.commit_long,
1893 git_branch: git.branch,
1894 git_commit_author: git.author,
1895 git_commit_date: git.commit_date,
1896 git_remote_url: git.remote_url,
1897 }
1898 })
1899 .filter(|s| s.files_analyzed > 0)
1900 .collect()
1901}
1902
1903#[allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
1905fn dominant_indent_label(files: &[&StyleAnalysis]) -> String {
1906 let mut votes = [0u32; 6];
1907 for f in files {
1908 let idx = match f.indent_style {
1909 IndentStyle::Tabs => 0,
1910 IndentStyle::Spaces2 => 1,
1911 IndentStyle::Spaces4 => 2,
1912 IndentStyle::Spaces8 => 3,
1913 IndentStyle::Mixed => 4,
1914 IndentStyle::Unknown => 5,
1915 };
1916 votes[idx] += 1;
1917 }
1918 let labels = ["Tabs", "2-Space", "4-Space", "8-Space", "Mixed", "\u{2014}"];
1919 labels[votes
1920 .iter()
1921 .enumerate()
1922 .max_by_key(|(_, v)| *v)
1923 .map_or(5, |(i, _)| i)]
1924 .to_string()
1925}
1926
1927#[allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
1929fn line80_pct(files: &[&StyleAnalysis]) -> u8 {
1930 if files.is_empty() {
1931 return 0;
1932 }
1933 let compliant = files
1934 .iter()
1935 .filter(|f| f.total_lines == 0 || (f.lines_over_80 as f32 / f.total_lines as f32) <= 0.05)
1936 .count() as u32;
1937 ((compliant * 100) / files.len() as u32) as u8
1938}
1939
1940#[allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
1943fn line_col_pct(files: &[&StyleAnalysis], threshold: u16) -> u8 {
1944 if files.is_empty() {
1945 return 0;
1946 }
1947 let compliant = files
1948 .iter()
1949 .filter(|f| {
1950 let over = if threshold <= 80 {
1951 f.lines_over_80
1952 } else if threshold <= 100 {
1953 f.lines_over_100
1954 } else {
1955 f.lines_over_120
1956 };
1957 f.total_lines == 0 || (over as f32 / f.total_lines as f32) <= 0.05
1958 })
1959 .count() as u32;
1960 ((compliant * 100) / files.len() as u32) as u8
1961}
1962
1963#[allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
1965fn build_language_group(
1966 family: &str,
1967 files: &[&StyleAnalysis],
1968 col_threshold: u16,
1969) -> LanguageStyleGroup {
1970 let count = files.len() as u32;
1971
1972 let mut all_names: Vec<String> = Vec::new();
1974 for f in files {
1975 for g in &f.guide_scores {
1976 if !all_names.contains(&g.name) {
1977 all_names.push(g.name.clone());
1978 }
1979 }
1980 }
1981
1982 let mut guide_avg_scores: Vec<(String, u8)> = all_names
1983 .into_iter()
1984 .map(|name| {
1985 let sum: u32 = files
1986 .iter()
1987 .filter_map(|f| f.guide_scores.iter().find(|g| g.name == name))
1988 .map(|g| u32::from(g.score_pct))
1989 .sum();
1990 let avg = (sum / count) as u8;
1991 (name, avg)
1992 })
1993 .collect();
1994 guide_avg_scores.sort_by_key(|s| std::cmp::Reverse(s.1));
1995
1996 let (dominant_guide, dominant_score_pct) = guide_avg_scores
1997 .first()
1998 .map(|(n, s)| (n.clone(), *s))
1999 .unwrap_or_default();
2000
2001 let lcp = line_col_pct(files, col_threshold);
2002 LanguageStyleGroup {
2003 language_family: family.to_string(),
2004 files_count: count,
2005 dominant_guide,
2006 dominant_score_pct,
2007 common_indent_style: dominant_indent_label(files),
2008 guide_avg_scores,
2009 line80_compliant_pct: line80_pct(files),
2010 line_col_compliant_pct: lcp,
2011 }
2012}
2013
2014#[allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
2017fn build_style_summary(analyzed: &[FileRecord], col_threshold: u16) -> Option<StyleSummary> {
2018 let all_style: Vec<&StyleAnalysis> = analyzed
2019 .iter()
2020 .filter_map(|f| f.style_analysis.as_ref())
2021 .collect();
2022
2023 if all_style.is_empty() {
2024 return None;
2025 }
2026
2027 let mut families: std::collections::BTreeMap<&str, Vec<&StyleAnalysis>> =
2029 std::collections::BTreeMap::new();
2030 for sa in &all_style {
2031 families
2032 .entry(sa.language_family.as_str())
2033 .or_default()
2034 .push(sa);
2035 }
2036
2037 let mut by_language: Vec<LanguageStyleGroup> = families
2038 .iter()
2039 .map(|(family, files)| build_language_group(family, files, col_threshold))
2040 .collect();
2041 by_language.sort_by_key(|g| std::cmp::Reverse(g.files_count));
2042
2043 let files_analyzed = all_style.len() as u32;
2044 let common_indent_style = dominant_indent_label(&all_style);
2045 let line80_compliant_pct = line80_pct(&all_style);
2046 let line_col_compliant_pct = line_col_pct(&all_style, col_threshold);
2047
2048 Some(StyleSummary {
2049 files_analyzed,
2050 common_indent_style,
2051 line80_compliant_pct,
2052 line_col_compliant_pct,
2053 col_threshold,
2054 by_language,
2055 })
2056}
2057
2058fn build_language_summaries_from_slice(files: &[&FileRecord]) -> Vec<LanguageSummary> {
2059 let mut map: BTreeMap<String, LanguageSummary> = BTreeMap::new();
2060 for file in files {
2061 let Some(lang) = file.language else { continue };
2062 let entry = map
2063 .entry(lang.display_name().to_string())
2064 .or_insert_with(|| zeroed_summary(lang));
2065 accumulate_record_into_summary(entry, file);
2066 }
2067 map.into_values().collect()
2068}
2069
2070fn file_name_eq(path: &Path, expected: &str) -> bool {
2071 path.file_name()
2072 .and_then(|name| name.to_str())
2073 .is_some_and(|name| name == expected)
2074}
2075
2076fn is_excluded_dir_path(path: &Path, excluded_dirs: &[String]) -> bool {
2077 path.components().any(|component| {
2078 component
2079 .as_os_str()
2080 .to_str()
2081 .is_some_and(|part| excluded_dirs.iter().any(|excluded| excluded == part))
2082 })
2083}
2084
2085fn is_vendor_path(path: &Path) -> bool {
2086 path.components().any(|component| {
2087 component
2088 .as_os_str()
2089 .to_str()
2090 .is_some_and(|part| matches!(part, "vendor" | "node_modules" | "packages"))
2091 })
2092}
2093
2094fn is_known_lockfile(path: &Path) -> bool {
2095 path.file_name()
2096 .and_then(|name| name.to_str())
2097 .is_some_and(|name| {
2098 matches!(
2099 name,
2100 "Cargo.lock"
2101 | "package-lock.json"
2102 | "yarn.lock"
2103 | "pnpm-lock.yaml"
2104 | "Pipfile.lock"
2105 | "poetry.lock"
2106 | "composer.lock"
2107 )
2108 })
2109}
2110
2111fn looks_generated(path: &Path, bytes: &[u8]) -> bool {
2112 let file_name = path
2113 .file_name()
2114 .and_then(|name| name.to_str())
2115 .unwrap_or_default();
2116 if file_name.contains(".generated.") || file_name.contains(".g.") {
2117 return true;
2118 }
2119
2120 let sample = String::from_utf8_lossy(&bytes[..bytes.len().min(GENERATED_SAMPLE_BYTES)])
2121 .to_ascii_lowercase();
2122 sample.contains("@generated") || sample.contains("generated by")
2123}
2124
2125fn looks_minified(path: &Path, bytes: &[u8]) -> bool {
2126 let file_name = path
2127 .file_name()
2128 .and_then(|name| name.to_str())
2129 .unwrap_or_default();
2130 if file_name.contains(".min.") {
2131 return true;
2132 }
2133
2134 let sample = String::from_utf8_lossy(&bytes[..bytes.len().min(MINIFIED_SAMPLE_BYTES)]);
2135 let longest_line = sample.lines().map(str::len).max().unwrap_or(0);
2136 let whitespace = sample.chars().filter(|c| c.is_whitespace()).count();
2137 longest_line > MINIFIED_LINE_THRESHOLD && whitespace * 100 < sample.len().max(1)
2138}
2139
2140fn is_binary(bytes: &[u8]) -> bool {
2141 if bytes.starts_with(&[0xEF, 0xBB, 0xBF])
2142 || bytes.starts_with(&[0xFF, 0xFE])
2143 || bytes.starts_with(&[0xFE, 0xFF])
2144 {
2145 return false;
2146 }
2147
2148 let sample = &bytes[..bytes.len().min(BINARY_SAMPLE_BYTES)];
2149 sample.contains(&0)
2150}
2151
2152fn decode_utf16_bom(
2155 bom_stripped: &[u8],
2156 encoding: &'static encoding_rs::Encoding,
2157 label: &str,
2158) -> (String, String, Vec<String>) {
2159 let (cow, _, had_errors) = encoding.decode(bom_stripped);
2160 let mut warnings = Vec::new();
2161 if had_errors {
2162 warnings.push(format!("{label} decode contained replacement characters"));
2163 }
2164 (cow.into_owned(), label.into(), warnings)
2165}
2166
2167fn decode_bytes(bytes: &[u8]) -> std::result::Result<(String, String, Vec<String>), String> {
2168 if bytes.starts_with(&[0xEF, 0xBB, 0xBF]) {
2169 let text = String::from_utf8(bytes[3..].to_vec()).map_err(|err| err.to_string())?;
2170 return Ok((text, "utf-8-bom".into(), vec![]));
2171 }
2172 if bytes.starts_with(&[0xFF, 0xFE]) {
2173 return Ok(decode_utf16_bom(&bytes[2..], UTF_16LE, "utf-16le"));
2174 }
2175 if bytes.starts_with(&[0xFE, 0xFF]) {
2176 return Ok(decode_utf16_bom(&bytes[2..], UTF_16BE, "utf-16be"));
2177 }
2178
2179 #[allow(clippy::option_if_let_else)]
2181 if let Ok(text) = String::from_utf8(bytes.to_vec()) {
2182 Ok((text, "utf-8".into(), vec![]))
2183 } else {
2184 let (cow, _, had_errors) = WINDOWS_1252.decode(bytes);
2185 let mut warnings = vec!["decoded using windows-1252 fallback".into()];
2186 if had_errors {
2187 warnings.push("fallback decode contained replacement characters".into());
2188 }
2189 Ok((cow.into_owned(), "windows-1252".into(), warnings))
2190 }
2191}
2192
2193fn compile_globset(patterns: &[String]) -> Result<Option<GlobSet>> {
2194 if patterns.is_empty() {
2195 return Ok(None);
2196 }
2197
2198 let mut builder = GlobSetBuilder::new();
2199 for pattern in patterns {
2200 builder
2201 .add(Glob::new(pattern).with_context(|| format!("invalid glob pattern: {pattern}"))?);
2202 }
2203 Ok(Some(
2204 builder.build().context("failed to compile glob filters")?,
2205 ))
2206}
2207
2208fn parse_enabled_languages(enabled: &[String]) -> Result<Option<BTreeSet<Language>>> {
2209 if enabled.is_empty() {
2210 return Ok(None);
2211 }
2212
2213 let supported = supported_languages();
2214 let mut set = BTreeSet::new();
2215 for name in enabled {
2216 let language = Language::from_name(name)
2217 .with_context(|| format!("unsupported language in config: {name}"))?;
2218 if !supported.contains(&language) {
2219 anyhow::bail!("language {name} is not supported in this build");
2220 }
2221 set.insert(language);
2222 }
2223 Ok(Some(set))
2224}
2225
2226pub fn write_json(run: &AnalysisRun, output_path: &Path) -> Result<()> {
2230 let json = serde_json::to_string_pretty(run).context("failed to serialize analysis run")?;
2231 fs::write(output_path, json)
2232 .with_context(|| format!("failed to write JSON output to {}", output_path.display()))
2233}
2234
2235pub fn read_json(path: &Path) -> Result<AnalysisRun> {
2239 let contents = fs::read_to_string(path)
2240 .with_context(|| format!("failed to read result file {}", path.display()))?;
2241 serde_json::from_str(&contents)
2242 .with_context(|| format!("failed to parse JSON result {}", path.display()))
2243}
2244
2245#[cfg(test)]
2246mod tests {
2247 use super::*;
2248
2249 #[test]
2250 fn effective_counts_respect_code_only_policy() {
2251 let raw = RawLineCounts {
2252 code_only_lines: 2,
2253 single_comment_only_lines: 1,
2254 mixed_code_single_comment_lines: 3,
2255 docstring_comment_lines: 2,
2256 ..RawLineCounts::default()
2257 };
2258 let counts = compute_effective_counts(&raw, MixedLinePolicy::CodeOnly, true, true);
2259 assert_eq!(counts.code_lines, 5);
2260 assert_eq!(counts.comment_lines, 3);
2261 }
2262
2263 #[test]
2264 fn effective_counts_can_separate_mixed() {
2265 let raw = RawLineCounts {
2266 mixed_code_single_comment_lines: 2,
2267 mixed_code_multi_comment_lines: 1,
2268 ..RawLineCounts::default()
2269 };
2270 let counts =
2271 compute_effective_counts(&raw, MixedLinePolicy::SeparateMixedCategory, true, true);
2272 assert_eq!(counts.mixed_lines_separate, 3);
2273 assert_eq!(counts.code_lines, 0);
2274 assert_eq!(counts.comment_lines, 0);
2275 }
2276
2277 #[test]
2278 fn windows_1252_fallback_decodes() {
2279 let bytes = vec![0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x96, 0x57];
2280 let (text, encoding, warnings) = decode_bytes(&bytes).unwrap();
2281 assert_eq!(encoding, "windows-1252");
2282 assert!(text.contains('–'));
2283 assert!(!warnings.is_empty());
2284 }
2285
2286 #[test]
2289 fn is_binary_detects_null_byte() {
2290 let bytes = b"hello\x00world";
2291 assert!(is_binary(bytes));
2292 }
2293
2294 #[test]
2295 fn is_binary_clean_text_is_not_binary() {
2296 let bytes = b"fn main() { println!(\"hello\"); }";
2297 assert!(!is_binary(bytes));
2298 }
2299
2300 #[test]
2301 fn is_binary_utf8_bom_not_binary() {
2302 let bytes = b"\xef\xbb\xbffn main() {}";
2303 assert!(!is_binary(bytes));
2304 }
2305
2306 #[test]
2307 fn looks_generated_at_generated_marker() {
2308 let bytes = b"// @generated by protoc-gen-rust\nfn foo() {}";
2309 assert!(looks_generated(Path::new("foo.rs"), bytes));
2310 }
2311
2312 #[test]
2313 fn looks_generated_do_not_edit_marker() {
2314 let bytes = b"// Code generated by build.rs. DO NOT EDIT.\nuse foo;";
2316 assert!(looks_generated(Path::new("foo.rs"), bytes));
2317 let bytes2 = b"// @generated\nuse foo;";
2319 assert!(looks_generated(Path::new("foo.rs"), bytes2));
2320 }
2321
2322 #[test]
2323 fn looks_generated_normal_file_not_generated() {
2324 let bytes = b"fn main() {\n println!(\"hello\");\n}\n";
2325 assert!(!looks_generated(Path::new("main.rs"), bytes));
2326 }
2327
2328 #[test]
2329 fn looks_minified_dot_min_filename() {
2330 let bytes = b"function a(){return 1}";
2331 assert!(looks_minified(Path::new("bundle.min.js"), bytes));
2332 }
2333
2334 #[test]
2335 fn looks_minified_normal_file_not_minified() {
2336 let bytes = b"function hello() {\n return 1;\n}\n";
2337 assert!(!looks_minified(Path::new("app.js"), bytes));
2338 }
2339
2340 #[test]
2341 fn looks_minified_very_long_line() {
2342 let long_line: Vec<u8> = b"x".repeat(MINIFIED_LINE_THRESHOLD + 1);
2343 assert!(looks_minified(Path::new("app.js"), &long_line));
2344 }
2345
2346 #[test]
2347 fn is_known_lockfile_cargo_lock() {
2348 assert!(is_known_lockfile(Path::new("Cargo.lock")));
2349 }
2350
2351 #[test]
2352 fn is_known_lockfile_package_lock_json() {
2353 assert!(is_known_lockfile(Path::new("package-lock.json")));
2354 }
2355
2356 #[test]
2357 fn is_known_lockfile_yarn_lock() {
2358 assert!(is_known_lockfile(Path::new("yarn.lock")));
2359 }
2360
2361 #[test]
2362 fn is_known_lockfile_normal_file_is_not_lockfile() {
2363 assert!(!is_known_lockfile(Path::new("src/lib.rs")));
2364 }
2365
2366 #[test]
2367 fn is_vendor_path_node_modules() {
2368 assert!(is_vendor_path(Path::new("node_modules/react/index.js")));
2369 }
2370
2371 #[test]
2372 fn is_vendor_path_vendor_dir() {
2373 assert!(is_vendor_path(Path::new("vendor/anyhow/src/lib.rs")));
2374 }
2375
2376 #[test]
2377 fn is_vendor_path_normal_src_is_not_vendor() {
2378 assert!(!is_vendor_path(Path::new("src/lib.rs")));
2379 }
2380
2381 #[test]
2382 fn is_excluded_dir_path_matches_excluded() {
2383 let excluded = vec![".git".into(), "target".into()];
2384 assert!(is_excluded_dir_path(Path::new(".git/config"), &excluded));
2385 }
2386
2387 #[test]
2388 fn is_excluded_dir_path_non_excluded_is_ok() {
2389 let excluded = vec![".git".into(), "target".into()];
2390 assert!(!is_excluded_dir_path(Path::new("src/main.rs"), &excluded));
2391 }
2392
2393 #[test]
2394 fn decode_bytes_utf8_bom_stripped() {
2395 let bytes = b"\xef\xbb\xbffn main() {}";
2396 let (text, encoding, _) = decode_bytes(bytes).unwrap();
2397 assert!(
2399 encoding.contains("utf-8"),
2400 "should be utf-8 variant, got {encoding}"
2401 );
2402 assert!(text.starts_with("fn"));
2403 }
2404
2405 #[test]
2406 fn decode_bytes_plain_utf8() {
2407 let bytes = b"hello world";
2408 let (text, encoding, warnings) = decode_bytes(bytes).unwrap();
2409 assert_eq!(encoding, "utf-8");
2410 assert_eq!(text, "hello world");
2411 assert!(warnings.is_empty());
2412 }
2413
2414 #[test]
2417 fn decode_bytes_utf16le_bom() {
2418 let mut bytes = vec![0xFF, 0xFE];
2420 for ch in "hi\n".encode_utf16() {
2421 bytes.extend_from_slice(&ch.to_le_bytes());
2422 }
2423 let (text, encoding, _warnings) = decode_bytes(&bytes).unwrap();
2424 assert_eq!(encoding, "utf-16le");
2425 assert!(text.contains('h') && text.contains('i'));
2426 }
2427
2428 #[test]
2429 fn decode_bytes_utf16be_bom() {
2430 let mut bytes = vec![0xFE, 0xFF];
2432 for ch in "ok\n".encode_utf16() {
2433 bytes.extend_from_slice(&ch.to_be_bytes());
2434 }
2435 let (text, encoding, _warnings) = decode_bytes(&bytes).unwrap();
2436 assert_eq!(encoding, "utf-16be");
2437 assert!(text.contains('o') && text.contains('k'));
2438 }
2439
2440 #[test]
2441 fn is_binary_utf16le_bom_not_binary() {
2442 let bytes = &[0xFF, 0xFE, 0x68, 0x00];
2444 assert!(!is_binary(bytes));
2445 }
2446
2447 #[test]
2448 fn is_binary_utf16be_bom_not_binary() {
2449 let bytes = &[0xFE, 0xFF, 0x00, 0x68];
2450 assert!(!is_binary(bytes));
2451 }
2452
2453 #[test]
2456 fn effective_counts_code_and_comment_policy() {
2457 let raw = RawLineCounts {
2458 mixed_code_single_comment_lines: 3,
2459 mixed_code_multi_comment_lines: 2,
2460 ..RawLineCounts::default()
2461 };
2462 let counts = compute_effective_counts(&raw, MixedLinePolicy::CodeAndComment, true, true);
2463 assert_eq!(counts.code_lines, 5);
2465 assert_eq!(counts.comment_lines, 5);
2466 assert_eq!(counts.mixed_lines_separate, 0);
2467 }
2468
2469 #[test]
2470 fn effective_counts_comment_only_policy() {
2471 let raw = RawLineCounts {
2472 mixed_code_single_comment_lines: 4,
2473 mixed_code_multi_comment_lines: 1,
2474 ..RawLineCounts::default()
2475 };
2476 let counts = compute_effective_counts(&raw, MixedLinePolicy::CommentOnly, true, true);
2477 assert_eq!(counts.code_lines, 0);
2478 assert_eq!(counts.comment_lines, 5);
2479 assert_eq!(counts.mixed_lines_separate, 0);
2480 }
2481
2482 #[test]
2483 fn effective_counts_docstrings_as_code_when_flag_false() {
2484 let raw = RawLineCounts {
2485 code_only_lines: 10,
2486 docstring_comment_lines: 3,
2487 ..RawLineCounts::default()
2488 };
2489 let counts = compute_effective_counts(&raw, MixedLinePolicy::CodeOnly, false, true);
2491 assert_eq!(counts.code_lines, 13);
2492 assert_eq!(counts.comment_lines, 0);
2493 }
2494
2495 #[test]
2496 fn effective_counts_exclude_compiler_directives() {
2497 let raw = RawLineCounts {
2498 code_only_lines: 10,
2499 compiler_directive_lines: 3,
2500 ..RawLineCounts::default()
2501 };
2502 let counts = compute_effective_counts(&raw, MixedLinePolicy::CodeOnly, true, false);
2504 assert_eq!(counts.code_lines, 7);
2505 }
2506
2507 #[test]
2508 fn effective_counts_directives_not_subtracted_below_zero() {
2509 let raw = RawLineCounts {
2510 code_only_lines: 2,
2511 compiler_directive_lines: 5, ..RawLineCounts::default()
2513 };
2514 let counts = compute_effective_counts(&raw, MixedLinePolicy::CodeOnly, true, false);
2515 assert_eq!(counts.code_lines, 0); }
2517
2518 #[test]
2521 fn cocomo_organic_computes_positive_values() {
2522 let est = compute_cocomo(5_000, CocomoMode::Organic);
2523 assert!(est.ksloc > 0.0);
2524 assert!(est.effort_person_months > 0.0);
2525 assert!(est.duration_months > 0.0);
2526 assert!(est.avg_staff > 0.0);
2527 assert_eq!(est.mode, CocomoMode::Organic);
2528 }
2529
2530 #[test]
2531 fn cocomo_semi_detached_computes_positive_values() {
2532 let est = compute_cocomo(20_000, CocomoMode::SemiDetached);
2533 assert!(est.ksloc > 0.0);
2534 assert!(est.effort_person_months > 0.0);
2535 assert!(est.duration_months > 0.0);
2536 assert_eq!(est.mode, CocomoMode::SemiDetached);
2537 }
2538
2539 #[test]
2540 fn cocomo_embedded_computes_positive_values() {
2541 let est = compute_cocomo(100_000, CocomoMode::Embedded);
2542 assert!(est.effort_person_months > 0.0);
2543 assert_eq!(est.mode, CocomoMode::Embedded);
2544 }
2545
2546 #[test]
2547 fn cocomo_zero_lines_produces_zero_effort() {
2548 let est = compute_cocomo(0, CocomoMode::Organic);
2549 assert!((est.ksloc).abs() < f64::EPSILON);
2550 assert!((est.effort_person_months - 0.0).abs() < 0.01);
2552 }
2553
2554 #[test]
2557 fn parse_url_line_extracts_url() {
2558 assert_eq!(
2559 parse_url_line("url = https://example.com/repo.git"),
2560 Some("https://example.com/repo.git")
2561 );
2562 }
2563
2564 #[test]
2565 fn parse_url_line_returns_none_for_non_url_key() {
2566 assert_eq!(
2567 parse_url_line("fetch = +refs/heads/*:refs/remotes/origin/*"),
2568 None
2569 );
2570 }
2571
2572 #[test]
2573 fn parse_url_line_returns_none_for_empty_url() {
2574 assert_eq!(parse_url_line("url = "), None);
2575 }
2576
2577 #[test]
2578 fn looks_generated_generated_filename_extension() {
2579 let bytes = b"// normal code\n";
2581 assert!(looks_generated(Path::new("schema.generated.ts"), bytes));
2582 }
2583
2584 #[test]
2585 fn looks_generated_dot_g_extension() {
2586 let bytes = b"// normal code\n";
2587 assert!(looks_generated(Path::new("parser.g.cs"), bytes));
2588 }
2589
2590 #[test]
2591 fn looks_minified_whitespace_ratio_is_ok() {
2592 let normal = b"var x=1,y=2,z=3;\n";
2594 assert!(!looks_minified(Path::new("app.js"), normal));
2595 }
2596
2597 #[test]
2598 fn is_known_lockfile_pnpm() {
2599 assert!(is_known_lockfile(Path::new("pnpm-lock.yaml")));
2600 }
2601
2602 #[test]
2603 fn is_known_lockfile_pipfile() {
2604 assert!(is_known_lockfile(Path::new("Pipfile.lock")));
2605 }
2606
2607 #[test]
2608 fn is_known_lockfile_poetry() {
2609 assert!(is_known_lockfile(Path::new("poetry.lock")));
2610 }
2611
2612 #[test]
2613 fn is_known_lockfile_composer() {
2614 assert!(is_known_lockfile(Path::new("composer.lock")));
2615 }
2616
2617 #[test]
2620 fn relative_path_string_strips_root_prefix() {
2621 let path = Path::new("/tmp/project/src/lib.rs");
2622 let root = Path::new("/tmp/project");
2623 let rel = relative_path_string(path, root);
2624 assert_eq!(rel, "src/lib.rs");
2625 }
2626
2627 #[test]
2628 fn relative_path_string_falls_back_to_full_path() {
2629 let path = Path::new("/other/dir/file.rs");
2631 let root = Path::new("/tmp/project");
2632 let rel = relative_path_string(path, root);
2633 assert!(!rel.is_empty());
2635 }
2636
2637 #[test]
2640 fn find_duplicate_groups_returns_empty_for_unique_hashes() {
2641 use sloc_languages::{Language, ParseMode, RawLineCounts};
2642 let make_rec = |hash: u64, path: &str| FileRecord {
2643 path: path.into(),
2644 relative_path: path.into(),
2645 language: Some(Language::Rust),
2646 size_bytes: 10,
2647 detected_encoding: Some("utf-8".into()),
2648 raw_line_categories: RawLineCounts::default(),
2649 effective_counts: EffectiveCounts::default(),
2650 status: FileStatus::AnalyzedExact,
2651 warnings: vec![],
2652 generated: false,
2653 minified: false,
2654 vendor: false,
2655 parse_mode: Some(ParseMode::Lexical),
2656 submodule: None,
2657 coverage: None,
2658 style_analysis: None,
2659 cyclomatic_complexity: None,
2660 lsloc: None,
2661 content_hash: hash,
2662 };
2663 let analyzed = vec![make_rec(111, "a.rs"), make_rec(222, "b.rs")];
2664 let groups = find_duplicate_groups(&analyzed);
2665 assert!(groups.is_empty());
2666 }
2667
2668 #[test]
2669 fn find_duplicate_groups_returns_group_for_same_hash() {
2670 use sloc_languages::{Language, ParseMode, RawLineCounts};
2671 let make_rec = |hash: u64, path: &str| FileRecord {
2672 path: path.into(),
2673 relative_path: path.into(),
2674 language: Some(Language::Rust),
2675 size_bytes: 10,
2676 detected_encoding: Some("utf-8".into()),
2677 raw_line_categories: RawLineCounts::default(),
2678 effective_counts: EffectiveCounts::default(),
2679 status: FileStatus::AnalyzedExact,
2680 warnings: vec![],
2681 generated: false,
2682 minified: false,
2683 vendor: false,
2684 parse_mode: Some(ParseMode::Lexical),
2685 submodule: None,
2686 coverage: None,
2687 style_analysis: None,
2688 cyclomatic_complexity: None,
2689 lsloc: None,
2690 content_hash: hash,
2691 };
2692 let analyzed = vec![
2693 make_rec(999, "a.rs"),
2694 make_rec(999, "b.rs"),
2695 make_rec(123, "c.rs"),
2696 ];
2697 let groups = find_duplicate_groups(&analyzed);
2698 assert_eq!(groups.len(), 1);
2699 assert_eq!(groups[0].len(), 2);
2700 }
2701
2702 #[test]
2703 fn find_duplicate_groups_ignores_zero_hash() {
2704 use sloc_languages::{Language, ParseMode, RawLineCounts};
2705 let make_rec = |hash: u64, path: &str| FileRecord {
2706 path: path.into(),
2707 relative_path: path.into(),
2708 language: Some(Language::Rust),
2709 size_bytes: 10,
2710 detected_encoding: Some("utf-8".into()),
2711 raw_line_categories: RawLineCounts::default(),
2712 effective_counts: EffectiveCounts::default(),
2713 status: FileStatus::AnalyzedExact,
2714 warnings: vec![],
2715 generated: false,
2716 minified: false,
2717 vendor: false,
2718 parse_mode: Some(ParseMode::Lexical),
2719 submodule: None,
2720 coverage: None,
2721 style_analysis: None,
2722 cyclomatic_complexity: None,
2723 lsloc: None,
2724 content_hash: hash,
2725 };
2726 let analyzed = vec![make_rec(0, "a.rs"), make_rec(0, "b.rs")];
2728 let groups = find_duplicate_groups(&analyzed);
2729 assert!(
2730 groups.is_empty(),
2731 "zero-hash files must not be grouped as duplicates"
2732 );
2733 }
2734
2735 #[test]
2738 fn detect_submodules_no_gitmodules_returns_empty() {
2739 let dir = tempfile::tempdir().unwrap();
2740 let result = detect_submodules(dir.path());
2741 assert!(result.is_empty());
2742 }
2743
2744 #[test]
2745 fn detect_submodules_parses_gitmodules_file() {
2746 let dir = tempfile::tempdir().unwrap();
2747 let content = "[submodule \"vendor/lib\"]\n\tpath = vendor/lib\n\turl = https://github.com/example/lib.git\n";
2748 std::fs::write(dir.path().join(".gitmodules"), content).unwrap();
2749 let result = detect_submodules(dir.path());
2750 assert_eq!(result.len(), 1);
2751 assert_eq!(result[0].0, "vendor/lib");
2752 }
2753
2754 #[test]
2757 fn write_json_read_json_roundtrip() {
2758 use chrono::Utc;
2759 use sloc_config::AppConfig;
2760 use sloc_languages::{Language, ParseMode, RawLineCounts};
2761 let dir = tempfile::tempdir().unwrap();
2762 let run = AnalysisRun {
2763 tool: ToolMetadata {
2764 name: "sloc".into(),
2765 version: "0.0.1".into(),
2766 run_id: "test-roundtrip".into(),
2767 timestamp_utc: Utc::now(),
2768 },
2769 environment: EnvironmentMetadata {
2770 operating_system: "test".into(),
2771 architecture: "x86_64".into(),
2772 runtime_mode: "test".into(),
2773 initiator_username: "tester".into(),
2774 initiator_hostname: "testhost".into(),
2775 ci_name: None,
2776 },
2777 effective_configuration: AppConfig::default(),
2778 input_roots: vec!["/tmp/test".into()],
2779 summary_totals: SummaryTotals {
2780 files_analyzed: 1,
2781 code_lines: 5,
2782 ..SummaryTotals::default()
2783 },
2784 totals_by_language: vec![],
2785 per_file_records: vec![FileRecord {
2786 path: "a.rs".into(),
2787 relative_path: "a.rs".into(),
2788 language: Some(Language::Rust),
2789 size_bytes: 50,
2790 detected_encoding: Some("utf-8".into()),
2791 raw_line_categories: RawLineCounts {
2792 code_only_lines: 5,
2793 ..RawLineCounts::default()
2794 },
2795 effective_counts: EffectiveCounts {
2796 code_lines: 5,
2797 ..EffectiveCounts::default()
2798 },
2799 status: FileStatus::AnalyzedExact,
2800 warnings: vec![],
2801 generated: false,
2802 minified: false,
2803 vendor: false,
2804 parse_mode: Some(ParseMode::Lexical),
2805 submodule: None,
2806 coverage: None,
2807 style_analysis: None,
2808 cyclomatic_complexity: None,
2809 lsloc: None,
2810 content_hash: 0,
2811 }],
2812 skipped_file_records: vec![],
2813 warnings: vec![],
2814 submodule_summaries: vec![],
2815 git_commit_short: Some("abc1234".into()),
2816 git_branch: Some("main".into()),
2817 git_commit_long: None,
2818 git_commit_author: None,
2819 git_tags: None,
2820 git_nearest_tag: None,
2821 git_commit_date: None,
2822 git_remote_url: None,
2823 style_summary: None,
2824 cocomo: None,
2825 uloc: 0,
2826 dryness_pct: None,
2827 duplicate_groups: vec![],
2828 duplicates_excluded: 0,
2829 };
2830 let json_path = dir.path().join("test.json");
2831 write_json(&run, &json_path).unwrap();
2832 let loaded = read_json(&json_path).unwrap();
2833 assert_eq!(loaded.summary_totals.files_analyzed, 1);
2834 assert_eq!(loaded.summary_totals.code_lines, 5);
2835 assert_eq!(loaded.git_commit_short.as_deref(), Some("abc1234"));
2836 assert_eq!(loaded.git_branch.as_deref(), Some("main"));
2837 assert_eq!(loaded.per_file_records.len(), 1);
2838 }
2839
2840 #[test]
2843 fn detect_ci_system_returns_none_without_env_vars() {
2844 for var in &[
2846 "JENKINS_URL",
2847 "JENKINS_HOME",
2848 "BUILD_URL",
2849 "GITHUB_ACTIONS",
2850 "GITLAB_CI",
2851 "CIRCLECI",
2852 "TRAVIS",
2853 "TF_BUILD",
2854 "TEAMCITY_VERSION",
2855 ] {
2856 std::env::remove_var(var);
2857 }
2858 let _ = detect_ci_system();
2860 }
2861
2862 #[test]
2865 fn resolve_git_file_pointer_valid_absolute_gitdir() {
2866 let dir = tempfile::tempdir().unwrap();
2867 let real_git = dir.path().join("real.git");
2869 fs::create_dir_all(&real_git).unwrap();
2870 let git_file = dir.path().join(".git");
2872 fs::write(&git_file, format!("gitdir: {}\n", real_git.display())).unwrap();
2873
2874 let result = resolve_git_file_pointer(&git_file, dir.path());
2875 assert!(
2877 result.is_some(),
2878 "should resolve a valid absolute gitdir pointer"
2879 );
2880 assert!(result.unwrap().is_dir());
2881 }
2882
2883 #[test]
2884 fn resolve_git_file_pointer_missing_gitdir_prefix_returns_none() {
2885 let dir = tempfile::tempdir().unwrap();
2886 let git_file = dir.path().join(".git");
2887 fs::write(&git_file, "not a gitdir line\n").unwrap();
2888 assert!(resolve_git_file_pointer(&git_file, dir.path()).is_none());
2889 }
2890
2891 #[test]
2892 fn resolve_git_file_pointer_unreadable_path_returns_none() {
2893 assert!(resolve_git_file_pointer(
2894 Path::new("/nonexistent/__sloc_test_git_file__"),
2895 Path::new("/nonexistent")
2896 )
2897 .is_none());
2898 }
2899
2900 #[test]
2901 fn resolve_git_file_pointer_nonexistent_target_returns_none() {
2902 let dir = tempfile::tempdir().unwrap();
2903 let git_file = dir.path().join(".git");
2904 fs::write(&git_file, "gitdir: /nonexistent/__sloc_fake_gitdir_xyz__\n").unwrap();
2905 assert!(resolve_git_file_pointer(&git_file, dir.path()).is_none());
2907 }
2908
2909 #[test]
2910 fn resolve_git_file_pointer_relative_path() {
2911 let dir = tempfile::tempdir().unwrap();
2912 let real_git = dir.path().join("real_git_dir");
2913 fs::create_dir_all(&real_git).unwrap();
2914 let git_file = dir.path().join(".git");
2915 fs::write(&git_file, "gitdir: real_git_dir\n").unwrap();
2917 let result = resolve_git_file_pointer(&git_file, dir.path());
2918 assert!(result.is_some());
2919 }
2920
2921 #[test]
2924 fn resolve_ref_from_loose_file() {
2925 let dir = tempfile::tempdir().unwrap();
2926 let git_dir = dir.path();
2927 fs::create_dir_all(git_dir.join("refs/heads")).unwrap();
2928 let sha = "abc1234567890abcdef1234567890abcdef123456";
2929 fs::write(git_dir.join("refs/heads/main"), format!("{sha}\n")).unwrap();
2930
2931 let result = resolve_ref(git_dir, "refs/heads/main");
2932 assert_eq!(result.as_deref(), Some(sha));
2933 }
2934
2935 #[test]
2936 fn resolve_ref_from_packed_refs() {
2937 let dir = tempfile::tempdir().unwrap();
2938 let git_dir = dir.path();
2939 let sha = "def5678def5678def5678def5678def5678def56";
2940 fs::write(
2941 git_dir.join("packed-refs"),
2942 format!("# pack-refs with: peeled fully-peeled sorted\n{sha} refs/heads/feature\n"),
2943 )
2944 .unwrap();
2945
2946 let result = resolve_ref(git_dir, "refs/heads/feature");
2947 assert_eq!(result.as_deref(), Some(sha));
2948 }
2949
2950 #[test]
2951 fn resolve_ref_not_found_returns_none() {
2952 let dir = tempfile::tempdir().unwrap();
2953 let result = resolve_ref(dir.path(), "refs/heads/nonexistent-branch-xyz");
2954 assert!(result.is_none());
2955 }
2956
2957 #[test]
2958 fn resolve_ref_packed_refs_skips_comment_and_peeled() {
2959 let dir = tempfile::tempdir().unwrap();
2960 let git_dir = dir.path();
2961 let sha = "aaa1111aaa1111aaa1111aaa1111aaa1111aaa11";
2962 fs::write(
2963 git_dir.join("packed-refs"),
2964 format!("# comment\n^peeled-object-sha\n{sha} refs/tags/v1.0\n"),
2965 )
2966 .unwrap();
2967
2968 let result = resolve_ref(git_dir, "refs/tags/v1.0");
2969 assert_eq!(result.as_deref(), Some(sha));
2970 }
2971
2972 #[test]
2973 fn resolve_ref_loose_sha_too_short_falls_through_to_packed() {
2974 let dir = tempfile::tempdir().unwrap();
2975 let git_dir = dir.path();
2976 fs::create_dir_all(git_dir.join("refs/heads")).unwrap();
2977 fs::write(git_dir.join("refs/heads/main"), "short\n").unwrap();
2979 let result = resolve_ref(git_dir, "refs/heads/main");
2981 assert!(result.is_none());
2982 }
2983
2984 #[test]
2987 fn read_git_remote_url_parses_origin_url() {
2988 let dir = tempfile::tempdir().unwrap();
2989 let git_dir = dir.path().join(".git");
2990 fs::create_dir_all(&git_dir).unwrap();
2991 fs::write(
2992 git_dir.join("config"),
2993 "[core]\n\trepositoryformatversion = 0\n[remote \"origin\"]\n\turl = https://github.com/org/repo.git\n\tfetch = +refs/heads/*:refs/remotes/origin/*\n",
2994 )
2995 .unwrap();
2996 let url = read_git_remote_url(&git_dir);
2997 assert_eq!(url.as_deref(), Some("https://github.com/org/repo.git"));
2998 }
2999
3000 #[test]
3001 fn read_git_remote_url_no_config_returns_none() {
3002 let dir = tempfile::tempdir().unwrap();
3003 let git_dir = dir.path().join(".git");
3004 fs::create_dir_all(&git_dir).unwrap();
3005 let url = read_git_remote_url(&git_dir);
3007 assert!(url.is_none());
3008 }
3009
3010 #[test]
3013 fn detect_git_for_run_no_git_dir_returns_default() {
3014 let dir = tempfile::tempdir().unwrap();
3015 let info = detect_git_for_run(dir.path());
3017 assert!(info.commit_long.is_none());
3018 }
3019
3020 #[test]
3021 fn detect_git_for_run_unreadable_head_returns_default() {
3022 let dir = tempfile::tempdir().unwrap();
3023 let git_dir = dir.path().join(".git");
3024 fs::create_dir_all(&git_dir).unwrap();
3025 let info = detect_git_for_run(dir.path());
3027 assert!(info.commit_long.is_none());
3028 }
3029
3030 #[test]
3031 fn detect_git_for_run_detached_head_with_sha() {
3032 let dir = tempfile::tempdir().unwrap();
3033 let git_dir = dir.path().join(".git");
3034 fs::create_dir_all(&git_dir).unwrap();
3035 let sha = "abc1234567890abcdef1234567890abcdef12345";
3037 fs::write(git_dir.join("HEAD"), sha).unwrap();
3038 let info = detect_git_for_run(dir.path());
3039 assert_eq!(info.commit_long.as_deref(), Some(sha));
3041 assert_eq!(info.commit_short.as_deref(), Some("abc1234"));
3042 }
3043
3044 #[test]
3045 fn detect_git_for_run_with_packed_ref() {
3046 let dir = tempfile::tempdir().unwrap();
3047 let git_dir = dir.path().join(".git");
3048 fs::create_dir_all(&git_dir).unwrap();
3049 fs::write(git_dir.join("HEAD"), "ref: refs/heads/main\n").unwrap();
3051 let sha = "deadbeef00000000000000000000000000000000";
3052 fs::write(
3053 git_dir.join("packed-refs"),
3054 format!("# pack-refs\n{sha} refs/heads/main\n"),
3055 )
3056 .unwrap();
3057 let info = detect_git_for_run(dir.path());
3058 assert_eq!(info.commit_long.as_deref(), Some(sha));
3059 assert_eq!(info.branch.as_deref(), Some("main"));
3060 }
3061
3062 use std::sync::{Mutex, OnceLock};
3066 static CI_ENV_LOCK: OnceLock<Mutex<()>> = OnceLock::new();
3067 fn ci_env_lock() -> std::sync::MutexGuard<'static, ()> {
3068 CI_ENV_LOCK.get_or_init(|| Mutex::new(())).lock().unwrap()
3069 }
3070
3071 fn clear_branch_env_vars() {
3072 for v in &[
3073 "BRANCH_NAME",
3074 "GIT_BRANCH",
3075 "GITHUB_REF_NAME",
3076 "CI_COMMIT_BRANCH",
3077 "CIRCLE_BRANCH",
3078 "TRAVIS_BRANCH",
3079 "BUILD_SOURCEBRANCH",
3080 ] {
3081 std::env::remove_var(v);
3082 }
3083 }
3084
3085 #[test]
3086 fn ci_branch_from_env_strips_refs_heads_prefix() {
3087 let _lock = ci_env_lock();
3088 clear_branch_env_vars();
3089 std::env::set_var("BUILD_SOURCEBRANCH", "refs/heads/my-branch");
3091 let branch = ci_branch_from_env();
3092 clear_branch_env_vars();
3093 assert_eq!(branch.as_deref(), Some("my-branch"));
3094 }
3095
3096 #[test]
3097 fn ci_branch_from_env_strips_origin_prefix() {
3098 let _lock = ci_env_lock();
3099 clear_branch_env_vars();
3100 std::env::set_var("GIT_BRANCH", "origin/develop");
3101 let branch = ci_branch_from_env();
3102 clear_branch_env_vars();
3103 assert_eq!(branch.as_deref(), Some("develop"));
3104 }
3105
3106 #[test]
3107 fn ci_branch_from_env_returns_none_for_head() {
3108 let _lock = ci_env_lock();
3109 clear_branch_env_vars();
3110 std::env::set_var("BRANCH_NAME", "HEAD");
3112 let branch = ci_branch_from_env();
3113 clear_branch_env_vars();
3114 assert!(branch.is_none(), "HEAD should be filtered, got: {branch:?}");
3116 }
3117}