1#![allow(clippy::multiple_crate_versions)]
4
5pub mod baseline;
6pub mod coverage;
7pub mod delta;
8pub mod history;
9pub use baseline::{check_against_baseline, resolve_baselines_path, BaselineEntry, BaselineStore};
10pub use coverage::{aggregate_line_coverage, lookup_coverage, parse_lcov, FileCoverage};
11pub use delta::{
12 compute_delta, compute_multi_delta, FileChangeStatus, FileDelta, MultiFileDelta,
13 MultiScanComparison, MultiScanPoint, ScanComparison, SummaryDelta,
14};
15pub use history::{
16 CleanupPolicy, CleanupPolicyStore, RegistryEntry, ScanRegistry, ScanSummarySnapshot,
17 WatchedDirsStore,
18};
19
20use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
21use std::fs;
22use std::path::{Path, PathBuf};
23use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
24use std::sync::Arc;
25
26use anyhow::{Context, Result};
27use chrono::{DateTime, Utc};
28use encoding_rs::{UTF_16BE, UTF_16LE, WINDOWS_1252};
29use globset::{Glob, GlobSet, GlobSetBuilder};
30use ignore::WalkBuilder;
31use serde::{Deserialize, Serialize};
32use uuid::Uuid;
33
34use sloc_config::{
35 AppConfig, BinaryFileBehavior, BlankInBlockCommentPolicy, ContinuationLinePolicy,
36 FailureBehavior, MixedLinePolicy,
37};
38use sloc_languages::style::IndentStyle;
39use sloc_languages::{
40 analyze_text, detect_language, supported_languages, AnalysisOptions, Language, ParseMode,
41 RawLineCounts, StyleAnalysis, StyleLangScope,
42};
43
44const MAX_ANALYSIS_THREADS: usize = 16;
48const DEFAULT_ANALYSIS_THREADS: usize = 4;
50const GENERATED_SAMPLE_BYTES: usize = 1024;
52const MINIFIED_SAMPLE_BYTES: usize = 4096;
54const MINIFIED_LINE_THRESHOLD: usize = 2000;
56const BINARY_SAMPLE_BYTES: usize = 8192;
58
59pub struct ProgressCounters {
61 pub files_done: Arc<AtomicUsize>,
63 pub files_total: Arc<AtomicUsize>,
65}
66
67enum MetadataPolicyOutcome {
69 Skip(Box<FileRecord>),
71 Exclude,
73 Continue,
75}
76
77#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
78#[serde(rename_all = "snake_case")]
79pub enum FileStatus {
80 AnalyzedExact,
81 AnalyzedBestEffort,
82 SkippedBinary,
83 SkippedDecodeError,
84 SkippedUnsupported,
85 SkippedByPolicy,
86 ErrorInternal,
87}
88
89#[derive(Debug, Clone, Copy, Serialize, Deserialize, Default, PartialEq, Eq)]
91#[serde(rename_all = "snake_case")]
92pub enum CocomoMode {
93 #[default]
95 Organic,
96 SemiDetached,
98 Embedded,
100}
101
102#[derive(Debug, Clone, Serialize, Deserialize)]
104pub struct CocomoEstimate {
105 pub mode: CocomoMode,
106 pub ksloc: f64,
108 pub effort_person_months: f64,
110 pub duration_months: f64,
112 pub avg_staff: f64,
114}
115
116#[derive(Debug, Clone, Serialize, Deserialize, Default)]
117pub struct EffectiveCounts {
118 pub code_lines: u64,
119 pub comment_lines: u64,
120 pub blank_lines: u64,
121 pub mixed_lines_separate: u64,
122}
123
124#[derive(Debug, Clone, Serialize, Deserialize)]
125pub struct ToolMetadata {
126 pub name: String,
127 pub version: String,
128 pub run_id: String,
129 pub timestamp_utc: DateTime<Utc>,
130}
131
132#[derive(Debug, Clone, Serialize, Deserialize)]
133pub struct EnvironmentMetadata {
134 pub operating_system: String,
135 pub architecture: String,
136 pub runtime_mode: String,
137 pub initiator_username: String,
138 pub initiator_hostname: String,
139 #[serde(default, skip_serializing_if = "Option::is_none")]
142 pub ci_name: Option<String>,
143}
144
145#[derive(Debug, Clone, Serialize, Deserialize, Default)]
146pub struct SummaryTotals {
147 pub files_considered: u64,
148 pub files_analyzed: u64,
149 pub files_skipped: u64,
150 pub total_physical_lines: u64,
151 pub code_lines: u64,
152 pub comment_lines: u64,
153 pub blank_lines: u64,
154 pub mixed_lines_separate: u64,
155 #[serde(default)]
156 pub functions: u64,
157 #[serde(default)]
158 pub classes: u64,
159 #[serde(default)]
160 pub variables: u64,
161 #[serde(default)]
162 pub imports: u64,
163 #[serde(default)]
164 pub test_count: u64,
165 #[serde(default)]
167 pub test_assertion_count: u64,
168 #[serde(default)]
170 pub test_suite_count: u64,
171 #[serde(default)]
173 pub coverage_lines_found: u64,
174 #[serde(default)]
175 pub coverage_lines_hit: u64,
176 #[serde(default)]
177 pub coverage_functions_found: u64,
178 #[serde(default)]
179 pub coverage_functions_hit: u64,
180 #[serde(default)]
181 pub coverage_branches_found: u64,
182 #[serde(default)]
183 pub coverage_branches_hit: u64,
184 #[serde(default)]
186 pub cyclomatic_complexity: u64,
187 #[serde(default, skip_serializing_if = "Option::is_none")]
189 pub lsloc: Option<u64>,
190}
191
192#[derive(Debug, Clone, Serialize, Deserialize)]
193pub struct LanguageSummary {
194 pub language: Language,
195 pub files: u64,
196 pub total_physical_lines: u64,
197 pub code_lines: u64,
198 pub comment_lines: u64,
199 pub blank_lines: u64,
200 pub mixed_lines_separate: u64,
201 #[serde(default)]
202 pub functions: u64,
203 #[serde(default)]
204 pub classes: u64,
205 #[serde(default)]
206 pub variables: u64,
207 #[serde(default)]
208 pub imports: u64,
209 #[serde(default)]
210 pub test_count: u64,
211 #[serde(default)]
212 pub test_assertion_count: u64,
213 #[serde(default)]
214 pub test_suite_count: u64,
215 #[serde(default)]
216 pub coverage_lines_found: u64,
217 #[serde(default)]
218 pub coverage_lines_hit: u64,
219 #[serde(default)]
220 pub coverage_functions_found: u64,
221 #[serde(default)]
222 pub coverage_functions_hit: u64,
223 #[serde(default)]
224 pub coverage_branches_found: u64,
225 #[serde(default)]
226 pub coverage_branches_hit: u64,
227 #[serde(default)]
228 pub cyclomatic_complexity: u64,
229 #[serde(default, skip_serializing_if = "Option::is_none")]
230 pub lsloc: Option<u64>,
231}
232
233#[derive(Debug, Clone, Serialize, Deserialize)]
234pub struct FileRecord {
235 pub path: String,
236 pub relative_path: String,
237 pub language: Option<Language>,
238 pub size_bytes: u64,
239 pub detected_encoding: Option<String>,
240 pub raw_line_categories: RawLineCounts,
241 pub effective_counts: EffectiveCounts,
242 pub status: FileStatus,
243 pub warnings: Vec<String>,
244 pub generated: bool,
245 pub minified: bool,
246 pub vendor: bool,
247 pub parse_mode: Option<ParseMode>,
248 #[serde(skip_serializing_if = "Option::is_none")]
249 pub submodule: Option<String>,
250 #[serde(default, skip_serializing_if = "Option::is_none")]
252 pub coverage: Option<FileCoverage>,
253 #[serde(default, skip_serializing_if = "Option::is_none")]
255 pub style_analysis: Option<StyleAnalysis>,
256 #[serde(default, skip_serializing_if = "Option::is_none")]
258 pub cyclomatic_complexity: Option<u32>,
259 #[serde(default, skip_serializing_if = "Option::is_none")]
261 pub lsloc: Option<u32>,
262 #[serde(default, skip_serializing_if = "Option::is_none")]
266 pub commit_count: Option<u32>,
267 #[serde(default, skip_serializing_if = "Option::is_none")]
269 pub last_commit_date: Option<String>,
270 #[serde(skip)]
273 pub content_hash: u64,
274}
275
276#[derive(Debug, Clone, Serialize, Deserialize)]
278pub struct LanguageStyleGroup {
279 pub language_family: String,
281 pub files_count: u32,
283 pub dominant_guide: String,
285 pub dominant_score_pct: u8,
287 pub common_indent_style: String,
289 pub guide_avg_scores: Vec<(String, u8)>,
291 pub line80_compliant_pct: u8,
293 pub line_col_compliant_pct: u8,
295}
296
297#[derive(Debug, Clone, Serialize, Deserialize)]
299pub struct StyleSummary {
300 pub files_analyzed: u32,
302 pub common_indent_style: String,
304 pub line80_compliant_pct: u8,
306 pub line_col_compliant_pct: u8,
308 pub col_threshold: u16,
310 pub by_language: Vec<LanguageStyleGroup>,
312}
313
314pub type CppStyleSummary = StyleSummary;
317
318#[derive(Debug, Clone, Serialize, Deserialize)]
320pub struct SubmoduleSummary {
321 pub name: String,
322 pub relative_path: String,
323 pub files_analyzed: u64,
324 pub total_physical_lines: u64,
325 pub code_lines: u64,
326 pub comment_lines: u64,
327 pub blank_lines: u64,
328 pub language_summaries: Vec<LanguageSummary>,
329 #[serde(default, skip_serializing_if = "Option::is_none")]
331 pub git_commit_short: Option<String>,
332 #[serde(default, skip_serializing_if = "Option::is_none")]
334 pub git_commit_long: Option<String>,
335 #[serde(default, skip_serializing_if = "Option::is_none")]
337 pub git_branch: Option<String>,
338 #[serde(default, skip_serializing_if = "Option::is_none")]
340 pub git_commit_author: Option<String>,
341 #[serde(default, skip_serializing_if = "Option::is_none")]
343 pub git_commit_date: Option<String>,
344 #[serde(default, skip_serializing_if = "Option::is_none")]
346 pub git_remote_url: Option<String>,
347}
348
349#[derive(Debug, Clone, Serialize, Deserialize)]
350pub struct AnalysisRun {
351 pub tool: ToolMetadata,
352 pub environment: EnvironmentMetadata,
353 pub effective_configuration: AppConfig,
354 pub input_roots: Vec<String>,
355 pub summary_totals: SummaryTotals,
356 pub totals_by_language: Vec<LanguageSummary>,
357 pub per_file_records: Vec<FileRecord>,
358 pub skipped_file_records: Vec<FileRecord>,
359 pub warnings: Vec<String>,
360 #[serde(default, skip_serializing_if = "Vec::is_empty")]
362 pub submodule_summaries: Vec<SubmoduleSummary>,
363 #[serde(default, skip_serializing_if = "Option::is_none")]
365 pub git_commit_short: Option<String>,
366 #[serde(default, skip_serializing_if = "Option::is_none")]
368 pub git_commit_long: Option<String>,
369 #[serde(default, skip_serializing_if = "Option::is_none")]
371 pub git_branch: Option<String>,
372 #[serde(default, skip_serializing_if = "Option::is_none")]
374 pub git_commit_author: Option<String>,
375 #[serde(default, skip_serializing_if = "Option::is_none")]
377 pub git_tags: Option<String>,
378 #[serde(default, skip_serializing_if = "Option::is_none")]
380 pub git_nearest_tag: Option<String>,
381 #[serde(default, skip_serializing_if = "Option::is_none")]
383 pub git_commit_date: Option<String>,
384 #[serde(default, skip_serializing_if = "Option::is_none")]
386 pub git_remote_url: Option<String>,
387 #[serde(default, skip_serializing_if = "Option::is_none")]
389 pub style_summary: Option<StyleSummary>,
390 #[serde(default, skip_serializing_if = "Option::is_none")]
392 pub cocomo: Option<CocomoEstimate>,
393 #[serde(default)]
395 pub uloc: u64,
396 #[serde(default, skip_serializing_if = "Option::is_none")]
398 pub dryness_pct: Option<f32>,
399 #[serde(default, skip_serializing_if = "Vec::is_empty")]
401 pub duplicate_groups: Vec<Vec<String>>,
402 #[serde(default)]
404 pub duplicates_excluded: usize,
405}
406
407#[derive(Default)]
408struct GitInfo {
409 commit_short: Option<String>,
410 commit_long: Option<String>,
411 branch: Option<String>,
412 author: Option<String>,
413 tags: Option<String>,
414 nearest_tag: Option<String>,
415 commit_date: Option<String>,
416 remote_url: Option<String>,
417}
418
419fn find_git_dir(start: &Path) -> Option<PathBuf> {
423 let mut current = Some(start);
424 while let Some(dir) = current {
425 let candidate = dir.join(".git");
426 if candidate.is_dir() {
427 return Some(candidate);
428 }
429 if candidate.is_file() {
430 if let Some(resolved) = resolve_git_file_pointer(&candidate, dir) {
431 return Some(resolved);
432 }
433 }
434 current = dir.parent();
435 }
436 None
437}
438
439fn resolve_git_file_pointer(file: &Path, base_dir: &Path) -> Option<PathBuf> {
443 let content = fs::read_to_string(file).ok()?;
444 let ptr = content.trim().strip_prefix("gitdir: ")?;
445 let ptr_native = ptr.replace('/', std::path::MAIN_SEPARATOR_STR);
448 let resolved = if Path::new(&ptr_native).is_absolute() {
449 PathBuf::from(&ptr_native)
450 } else {
451 base_dir.join(&ptr_native)
452 };
453 let final_path = resolved.canonicalize().unwrap_or(resolved);
457 if final_path.is_dir() {
458 Some(final_path)
459 } else {
460 None
461 }
462}
463
464fn resolve_ref(git_dir: &Path, refname: &str) -> Option<String> {
467 let ref_path = refname
471 .split('/')
472 .fold(git_dir.to_path_buf(), |p, c| p.join(c));
473 if ref_path.exists() {
474 let sha = fs::read_to_string(&ref_path)
475 .ok()
476 .map(|s| s.trim().to_string())
477 .filter(|s| s.len() >= 40 && s.chars().all(|c| c.is_ascii_hexdigit()));
478 if sha.is_some() {
479 return sha;
480 }
481 }
482 let packed = fs::read_to_string(git_dir.join("packed-refs")).ok()?;
486 for line in packed.lines() {
487 if line.starts_with('#') || line.starts_with('^') {
488 continue;
489 }
490 let mut cols = line.splitn(2, ' ');
491 let sha = cols.next()?;
492 let name = cols.next()?.trim();
493 if name == refname {
494 return Some(sha.to_string());
495 }
496 }
497 None
498}
499
500fn parse_url_line(line: &str) -> Option<&str> {
502 let rest = line.strip_prefix("url")?;
503 let rest = rest.trim_start_matches([' ', '\t']);
504 let url = rest.strip_prefix('=')?.trim();
505 if url.is_empty() {
506 None
507 } else {
508 Some(url)
509 }
510}
511
512fn read_git_remote_url(git_dir: &Path) -> Option<String> {
514 let config = fs::read_to_string(git_dir.join("config")).ok()?;
515 let mut in_origin = false;
516 for line in config.lines() {
517 let trimmed = line.trim();
518 if trimmed.starts_with('[') {
519 in_origin = trimmed == r#"[remote "origin"]"#;
520 } else if in_origin {
521 if let Some(url) = parse_url_line(trimmed) {
522 return Some(url.to_owned());
523 }
524 }
525 }
526 None
527}
528
529fn detect_git_for_run(project_path: &Path) -> GitInfo {
533 let ci_branch = ci_branch_from_env();
535
536 let Some(git_dir) = find_git_dir(project_path) else {
537 return GitInfo {
540 branch: ci_branch,
541 ..GitInfo::default()
542 };
543 };
544
545 let head_raw = match fs::read_to_string(git_dir.join("HEAD")) {
546 Ok(s) => s.trim().to_string(),
547 Err(_) => {
548 return GitInfo {
549 branch: ci_branch,
550 ..GitInfo::default()
551 }
552 }
553 };
554
555 let (branch_from_head, commit_long) = head_raw.strip_prefix("ref: ").map_or_else(
556 || {
557 if head_raw.len() >= 40 && head_raw.chars().all(|c| c.is_ascii_hexdigit()) {
558 (None, Some(head_raw[..40].to_string()))
560 } else {
561 (None, None)
562 }
563 },
564 |refname| {
565 let branch = refname
566 .strip_prefix("refs/heads/")
567 .map(|b| b.trim().to_string());
568 let sha = resolve_ref(&git_dir, refname.trim());
569 (branch, sha)
570 },
571 );
572 let branch = branch_from_head.or(ci_branch);
575
576 let commit_short = commit_long
577 .as_deref()
578 .map(|s| s.chars().take(7).collect::<String>());
579
580 let author = run_git_cmd(project_path, &["log", "-1", "--format=%an", "HEAD"]);
581 let commit_date = run_git_cmd(project_path, &["log", "-1", "--format=%aI", "HEAD"]);
582 let remote_url = read_git_remote_url(&git_dir);
583
584 let tags = run_git_cmd(project_path, &["tag", "--points-at", "HEAD"]).map(|t| {
587 t.lines()
588 .filter(|l| !l.is_empty())
589 .collect::<Vec<_>>()
590 .join(", ")
591 });
592 let nearest_tag = run_git_cmd(project_path, &["describe", "--tags", "--abbrev=0", "HEAD"]);
593
594 GitInfo {
595 commit_short,
596 commit_long,
597 branch,
598 author,
599 tags,
600 nearest_tag,
601 commit_date,
602 remote_url,
603 }
604}
605
606fn run_git_cmd(dir: &Path, args: &[&str]) -> Option<String> {
608 let candidates: &[&str] = &[
612 "git",
614 "/usr/bin/git",
616 "/usr/local/bin/git",
617 "/opt/homebrew/bin/git",
618 r"C:\Program Files\Git\cmd\git.exe",
620 r"C:\Program Files\Git\bin\git.exe",
621 r"C:\Program Files (x86)\Git\cmd\git.exe",
622 ];
623 for &exe in candidates {
624 let result = std::process::Command::new(exe)
625 .args(["-c", "safe.directory=*"])
626 .args(args)
627 .current_dir(dir)
628 .output()
629 .ok()
630 .filter(|o| o.status.success())
631 .and_then(|o| String::from_utf8(o.stdout).ok())
632 .map(|s| s.trim().to_string())
633 .filter(|s| !s.is_empty());
634 if result.is_some() {
635 return result;
636 }
637 }
638 None
639}
640
641fn detect_file_activity(
646 project_path: &Path,
647 window_days: u32,
648) -> HashMap<String, (u32, Option<String>)> {
649 let since = format!("--since={window_days} days ago");
650 let out = run_git_cmd(
654 project_path,
655 &[
656 "-c",
657 "core.quotepath=false",
658 "log",
659 since.as_str(),
660 "--no-merges",
661 "--name-status",
662 "--relative",
663 "--pretty=format:%x00%aI",
664 ],
665 );
666 out.map(|s| parse_activity_log(&s)).unwrap_or_default()
667}
668
669fn parse_activity_log(out: &str) -> HashMap<String, (u32, Option<String>)> {
673 let mut map: HashMap<String, (u32, Option<String>)> = HashMap::new();
674 let mut current_date: Option<String> = None;
675 for line in out.lines() {
676 if let Some(date) = line.strip_prefix('\u{0}') {
677 let d = date.trim();
678 current_date = (!d.is_empty()).then(|| d.to_owned());
679 continue;
680 }
681 if line.trim().is_empty() {
682 continue;
683 }
684 let mut fields = line.split('\t');
686 let status = fields.next().unwrap_or("");
687 let path = if status.starts_with('R') || status.starts_with('C') {
688 fields.next_back()
689 } else {
690 fields.next()
691 };
692 let Some(path) = path.map(str::trim).filter(|p| !p.is_empty()) else {
693 continue;
694 };
695 let entry = map.entry(path.to_owned()).or_insert((0, None));
696 entry.0 += 1;
697 if entry.1.is_none() {
698 entry.1.clone_from(¤t_date);
699 }
700 }
701 map
702}
703
704fn detect_ci_system() -> Option<&'static str> {
706 let ev = |k: &str| std::env::var(k).is_ok();
707 let ev_true = |k: &str| std::env::var(k).as_deref() == Ok("true");
708 if ev("JENKINS_URL") || ev("JENKINS_HOME") || ev("BUILD_URL") {
709 return Some("Jenkins");
710 }
711 if ev_true("GITHUB_ACTIONS") {
712 return Some("GitHub Actions");
713 }
714 if ev_true("GITLAB_CI") {
715 return Some("GitLab CI");
716 }
717 if ev_true("CIRCLECI") {
718 return Some("CircleCI");
719 }
720 if ev_true("TRAVIS") {
721 return Some("Travis CI");
722 }
723 if ev_true("TF_BUILD") {
724 return Some("Azure DevOps");
725 }
726 if ev("TEAMCITY_VERSION") {
727 return Some("TeamCity");
728 }
729 None
730}
731
732fn ci_branch_from_env() -> Option<String> {
735 const VARS: &[&str] = &[
736 "BRANCH_NAME", "GIT_BRANCH", "GITHUB_REF_NAME", "CI_COMMIT_BRANCH", "CIRCLE_BRANCH", "TRAVIS_BRANCH", "BUILD_SOURCEBRANCH", ];
744 for &var in VARS {
745 if let Ok(val) = std::env::var(var) {
746 let val = val.trim();
747 let val = val
748 .strip_prefix("refs/heads/")
749 .or_else(|| val.strip_prefix("origin/"))
750 .unwrap_or(val);
751 if !val.is_empty() && val != "HEAD" {
752 return Some(val.to_string());
753 }
754 }
755 }
756 None
757}
758
759fn get_current_username() -> String {
760 std::env::var("USERNAME")
761 .or_else(|_| std::env::var("USER"))
762 .unwrap_or_else(|_| "unknown".to_string())
763}
764
765fn non_empty_env(var: &str) -> Option<String> {
766 let v = std::env::var(var).ok()?;
767 if v.is_empty() {
768 None
769 } else {
770 Some(v)
771 }
772}
773
774fn is_jenkins_env() -> bool {
775 std::env::var("JENKINS_URL").is_ok()
776 || std::env::var("JENKINS_HOME").is_ok()
777 || std::env::var("BUILD_URL").is_ok()
778}
779
780fn get_hostname() -> String {
781 if is_jenkins_env() {
784 if let Some(n) = non_empty_env("NODE_NAME") {
785 return n;
786 }
787 }
788 if std::env::var("GITHUB_ACTIONS").as_deref() == Ok("true") {
789 if let Some(r) = non_empty_env("RUNNER_NAME") {
790 return r;
791 }
792 }
793 if std::env::var("GITLAB_CI").as_deref() == Ok("true") {
794 if let Some(r) = non_empty_env("CI_RUNNER_DESCRIPTION") {
795 return r;
796 }
797 }
798 std::env::var("COMPUTERNAME")
799 .or_else(|_| std::env::var("HOSTNAME"))
800 .or_else(|_| std::fs::read_to_string("/etc/hostname").map(|s| s.trim().to_string()))
801 .unwrap_or_else(|_| "unknown".to_string())
802}
803
804#[allow(clippy::too_many_arguments)]
806fn walk_root(
807 root: &Path,
808 config: &AppConfig,
809 include_globs: Option<&GlobSet>,
810 exclude_globs: Option<&GlobSet>,
811 enabled_languages: Option<&BTreeSet<Language>>,
812 seen_paths: &mut HashSet<PathBuf>,
813 analyzed: &mut Vec<FileRecord>,
814 skipped: &mut Vec<FileRecord>,
815 warnings: &mut Vec<String>,
816 cancel: Option<&AtomicBool>,
817 progress: Option<&ProgressCounters>,
818) -> Result<()> {
819 let mut builder = WalkBuilder::new(root);
820 builder
821 .follow_links(config.discovery.follow_symlinks)
822 .hidden(config.discovery.ignore_hidden_files)
823 .ignore(config.discovery.honor_ignore_files)
824 .parents(config.discovery.honor_ignore_files)
825 .git_ignore(config.discovery.honor_ignore_files)
826 .git_global(config.discovery.honor_ignore_files)
827 .git_exclude(config.discovery.honor_ignore_files);
828
829 let paths = collect_walk_paths(&builder, seen_paths, warnings);
830 if paths.is_empty() {
831 return Ok(());
832 }
833
834 if let Some(p) = progress {
835 p.files_total.fetch_add(paths.len(), Ordering::Relaxed);
836 }
837
838 let chunk_results = run_parallel_analysis(
839 &paths,
840 root,
841 config,
842 include_globs,
843 exclude_globs,
844 enabled_languages,
845 cancel,
846 progress,
847 )?;
848 merge_chunk_results(chunk_results, analyzed, skipped, warnings)
849}
850
851fn collect_walk_paths(
852 builder: &WalkBuilder,
853 seen_paths: &mut HashSet<PathBuf>,
854 warnings: &mut Vec<String>,
855) -> Vec<PathBuf> {
856 let (tx, rx) = std::sync::mpsc::channel::<std::result::Result<PathBuf, String>>();
860
861 builder.build_parallel().run(|| {
862 let tx = tx.clone();
863 Box::new(move |entry| {
864 match entry {
865 Err(e) => {
866 let _ = tx.send(Err(format!("discovery warning: {e}")));
867 }
868 Ok(e) => {
869 let path = e.into_path();
870 if !path.is_dir() {
871 let _ = tx.send(Ok(path));
872 }
873 }
874 }
875 ignore::WalkState::Continue
876 })
877 });
878
879 drop(tx);
882
883 rx.into_iter()
884 .filter_map(|msg| match msg {
885 Ok(path) => {
886 if seen_paths.insert(path.clone()) {
887 Some(path)
888 } else {
889 None
890 }
891 }
892 Err(warn) => {
893 warnings.push(warn);
894 None
895 }
896 })
897 .collect()
898}
899
900#[allow(clippy::too_many_arguments)]
902fn worker_loop(
903 paths: &[PathBuf],
904 root: &Path,
905 config: &AppConfig,
906 include_globs: Option<&GlobSet>,
907 exclude_globs: Option<&GlobSet>,
908 enabled_languages: Option<&BTreeSet<Language>>,
909 cancel: Option<&AtomicBool>,
910 next_index: &AtomicUsize,
911 files_done: Option<&AtomicUsize>,
912) -> Vec<Result<Option<FileRecord>>> {
913 let mut results = Vec::new();
914 loop {
915 if cancel.is_some_and(|c| c.load(Ordering::Relaxed)) {
916 results.push(Err(anyhow::anyhow!("analysis cancelled")));
917 break;
918 }
919 let i = next_index.fetch_add(1, Ordering::Relaxed);
920 if i >= paths.len() {
921 break;
922 }
923 results.push(analyze_candidate_file(
924 &paths[i],
925 root,
926 config,
927 include_globs,
928 exclude_globs,
929 enabled_languages,
930 ));
931 if let Some(fd) = files_done {
932 fd.fetch_add(1, Ordering::Relaxed);
933 }
934 }
935 results
936}
937
938#[allow(clippy::too_many_arguments)]
939fn run_parallel_analysis(
940 paths: &[PathBuf],
941 root: &Path,
942 config: &AppConfig,
943 include_globs: Option<&GlobSet>,
944 exclude_globs: Option<&GlobSet>,
945 enabled_languages: Option<&BTreeSet<Language>>,
946 cancel: Option<&AtomicBool>,
947 progress: Option<&ProgressCounters>,
948) -> Result<Vec<Vec<Result<Option<FileRecord>>>>> {
949 let thread_count = std::thread::available_parallelism().map_or(DEFAULT_ANALYSIS_THREADS, |n| {
950 n.get().min(MAX_ANALYSIS_THREADS)
951 });
952 let next_index = AtomicUsize::new(0);
956 let files_done: Option<&AtomicUsize> = progress.map(|p| p.files_done.as_ref());
957
958 std::thread::scope(|s| -> Result<Vec<Vec<Result<Option<FileRecord>>>>> {
959 let mut handles = Vec::with_capacity(thread_count);
962 for _ in 0..thread_count {
963 handles.push(s.spawn(|| {
964 worker_loop(
965 paths,
966 root,
967 config,
968 include_globs,
969 exclude_globs,
970 enabled_languages,
971 cancel,
972 &next_index,
973 files_done,
974 )
975 }));
976 }
977 handles
978 .into_iter()
979 .map(|h| {
980 h.join()
981 .map_err(|_| anyhow::anyhow!("analysis thread panicked"))
982 })
983 .collect()
984 })
985}
986
987fn merge_chunk_results(
988 chunk_results: Vec<Vec<Result<Option<FileRecord>>>>,
989 analyzed: &mut Vec<FileRecord>,
990 skipped: &mut Vec<FileRecord>,
991 warnings: &mut Vec<String>,
992) -> Result<()> {
993 for chunk in chunk_results {
994 for result in chunk {
995 if let Some(record) = result? {
996 push_record(record, analyzed, skipped, warnings);
997 }
998 }
999 }
1000 Ok(())
1001}
1002
1003fn process_submodules(config: &AppConfig, analyzed: &mut [FileRecord]) -> Vec<SubmoduleSummary> {
1005 let root = config.discovery.root_paths[0]
1006 .canonicalize()
1007 .unwrap_or_else(|_| config.discovery.root_paths[0].clone());
1008 let submodules = detect_submodules(&root);
1009 if submodules.is_empty() {
1010 return Vec::new();
1011 }
1012
1013 for file in analyzed.iter_mut() {
1014 for (name, sub_path) in &submodules {
1015 let prefix = sub_path.to_string_lossy().replace('\\', "/");
1016 let rel = &file.relative_path;
1017 if rel == &prefix || rel.starts_with(&format!("{prefix}/")) {
1018 file.submodule = Some(name.clone());
1019 break;
1020 }
1021 }
1022 }
1023
1024 build_submodule_summaries(analyzed, &submodules, &root)
1025}
1026
1027#[allow(clippy::cast_precision_loss)] fn compute_cocomo(code_lines: u64, mode: CocomoMode) -> CocomoEstimate {
1030 let ksloc = code_lines as f64 / 1_000.0;
1031 let (a, b, c, d): (f64, f64, f64, f64) = match mode {
1032 CocomoMode::Organic => (2.4, 1.05, 2.5, 0.38),
1033 CocomoMode::SemiDetached => (3.0, 1.12, 2.5, 0.35),
1034 CocomoMode::Embedded => (3.6, 1.20, 2.5, 0.32),
1035 };
1036 let effort = a * ksloc.powf(b);
1037 let duration = c * effort.powf(d);
1038 let avg_staff = if duration > 0.0 {
1039 effort / duration
1040 } else {
1041 0.0
1042 };
1043 CocomoEstimate {
1045 mode,
1046 ksloc: (ksloc * 100.0).round() / 100.0,
1047 effort_person_months: (effort * 100.0).round() / 100.0,
1048 duration_months: (duration * 100.0).round() / 100.0,
1049 avg_staff: (avg_staff * 100.0).round() / 100.0,
1050 }
1051}
1052
1053#[allow(clippy::cast_precision_loss)] fn compute_uloc(analyzed: &[FileRecord]) -> (u64, Option<f32>) {
1056 use std::collections::HashSet as StdHashSet;
1057 let mut unique: StdHashSet<u64> = StdHashSet::new();
1058 let mut total_code: u64 = 0;
1059 for record in analyzed {
1060 total_code += record.effective_counts.code_lines;
1061 for &hash in &record.raw_line_categories.code_line_hashes {
1062 unique.insert(hash);
1063 }
1064 }
1065 let uloc = unique.len() as u64;
1066 let dryness = if total_code > 0 {
1067 Some((uloc as f32 / total_code as f32) * 100.0)
1068 } else {
1069 None
1070 };
1071 (uloc, dryness)
1072}
1073
1074fn find_duplicate_groups(analyzed: &[FileRecord]) -> Vec<Vec<String>> {
1077 let mut by_hash: std::collections::HashMap<u64, Vec<&str>> = std::collections::HashMap::new();
1078 for record in analyzed {
1079 if record.content_hash != 0 {
1080 by_hash
1081 .entry(record.content_hash)
1082 .or_default()
1083 .push(&record.relative_path);
1084 }
1085 }
1086 let mut groups: Vec<Vec<String>> = by_hash
1087 .into_values()
1088 .filter(|v| v.len() >= 2)
1089 .map(|v| {
1090 let mut paths: Vec<String> = v.into_iter().map(str::to_owned).collect();
1091 paths.sort();
1092 paths
1093 })
1094 .collect();
1095 groups.sort_by(|a, b| a[0].cmp(&b[0]));
1096 groups
1097}
1098
1099fn assemble_run(
1101 config: &AppConfig,
1102 runtime_mode: &str,
1103 mut analyzed: Vec<FileRecord>,
1104 skipped: Vec<FileRecord>,
1105 warnings: Vec<String>,
1106 submodule_summaries: Vec<SubmoduleSummary>,
1107) -> AnalysisRun {
1108 let summary = build_summary(&analyzed, &skipped);
1109 let language_summaries = build_language_summaries(&analyzed);
1110 let col_threshold = config.analysis.style_col_threshold;
1111 let style_summary = build_style_summary(&analyzed, col_threshold);
1112
1113 let (uloc, dryness_pct) = compute_uloc(&analyzed);
1115 let duplicate_groups = find_duplicate_groups(&analyzed);
1116 let cocomo = if summary.code_lines > 0 {
1117 Some(compute_cocomo(summary.code_lines, CocomoMode::Organic))
1118 } else {
1119 None
1120 };
1121
1122 let first_root = config
1123 .discovery
1124 .root_paths
1125 .first()
1126 .map(|p| p.canonicalize().unwrap_or_else(|_| p.clone()));
1127 let git = first_root
1128 .as_deref()
1129 .map(detect_git_for_run)
1130 .unwrap_or_default();
1131
1132 let activity_window = config.analysis.activity_window_days.unwrap_or(0);
1135 if let (true, Some(root)) = (activity_window > 0, first_root.as_deref()) {
1136 let activity = detect_file_activity(root, activity_window);
1137 if !activity.is_empty() {
1138 for rec in &mut analyzed {
1139 if let Some((count, date)) = activity.get(&rec.relative_path) {
1140 rec.commit_count = Some(*count);
1141 rec.last_commit_date.clone_from(date);
1142 }
1143 }
1144 }
1145 }
1146
1147 let now = Utc::now();
1148 let run_id = {
1149 let uuid_suffix = Uuid::new_v4().simple().to_string();
1150 format!("{}-{}", now.format("%Y%m%d-%H%M"), uuid_suffix)
1151 };
1152
1153 AnalysisRun {
1154 tool: ToolMetadata {
1155 name: "sloc".into(),
1156 version: env!("CARGO_PKG_VERSION").into(),
1157 run_id,
1158 timestamp_utc: now,
1159 },
1160 environment: EnvironmentMetadata {
1161 operating_system: std::env::consts::OS.into(),
1162 architecture: std::env::consts::ARCH.into(),
1163 runtime_mode: runtime_mode.into(),
1164 initiator_username: get_current_username(),
1165 initiator_hostname: get_hostname(),
1166 ci_name: if is_jenkins_env() {
1167 Some(format!("Jenkins\t{}", get_hostname()))
1168 } else {
1169 detect_ci_system().map(str::to_string)
1170 },
1171 },
1172 effective_configuration: config.clone(),
1173 input_roots: config
1174 .discovery
1175 .root_paths
1176 .iter()
1177 .map(|p| path_to_string(p))
1178 .collect(),
1179 summary_totals: summary,
1180 totals_by_language: language_summaries,
1181 per_file_records: analyzed,
1182 skipped_file_records: skipped,
1183 warnings,
1184 submodule_summaries,
1185 git_commit_short: git.commit_short,
1186 git_commit_long: git.commit_long,
1187 git_branch: git.branch,
1188 git_commit_author: git.author,
1189 git_tags: git.tags,
1190 git_nearest_tag: git.nearest_tag,
1191 git_commit_date: git.commit_date,
1192 git_remote_url: git.remote_url,
1193 style_summary,
1194 cocomo,
1195 uloc,
1196 dryness_pct,
1197 duplicate_groups,
1198 duplicates_excluded: 0,
1199 }
1200}
1201
1202#[allow(clippy::too_many_lines)]
1207pub fn analyze(
1208 config: &AppConfig,
1209 runtime_mode: &str,
1210 cancel: Option<&AtomicBool>,
1211 progress: Option<&ProgressCounters>,
1212) -> Result<AnalysisRun> {
1213 config.validate()?;
1214
1215 if config.discovery.root_paths.is_empty() {
1216 anyhow::bail!("no input paths were provided");
1217 }
1218
1219 let include_globs = compile_globset(&config.discovery.include_globs)?;
1220 let exclude_globs = compile_globset(&config.discovery.exclude_globs)?;
1221 let enabled_languages = parse_enabled_languages(&config.analysis.enabled_languages)?;
1222
1223 let mut analyzed = Vec::new();
1224 let mut skipped = Vec::new();
1225 let mut warnings = Vec::new();
1226 let mut seen_paths = HashSet::new();
1227
1228 for root in &config.discovery.root_paths {
1229 if cancel.is_some_and(|c| c.load(Ordering::Relaxed)) {
1230 anyhow::bail!("analysis cancelled");
1231 }
1232
1233 let root = root.canonicalize().unwrap_or_else(|_| root.clone());
1234
1235 if root.is_file() {
1236 if let Some(record) = analyze_candidate_file(
1237 &root,
1238 root.parent().unwrap_or_else(|| Path::new(".")),
1239 config,
1240 include_globs.as_ref(),
1241 exclude_globs.as_ref(),
1242 enabled_languages.as_ref(),
1243 )? {
1244 push_record(record, &mut analyzed, &mut skipped, &mut warnings);
1245 }
1246 continue;
1247 }
1248
1249 walk_root(
1250 &root,
1251 config,
1252 include_globs.as_ref(),
1253 exclude_globs.as_ref(),
1254 enabled_languages.as_ref(),
1255 &mut seen_paths,
1256 &mut analyzed,
1257 &mut skipped,
1258 &mut warnings,
1259 cancel,
1260 progress,
1261 )?;
1262 }
1263
1264 analyzed.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
1265 skipped.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
1266
1267 let submodule_summaries = if config.discovery.submodule_breakdown {
1269 process_submodules(config, &mut analyzed)
1270 } else {
1271 Vec::new()
1272 };
1273
1274 attach_coverage(config, &mut analyzed, &mut warnings);
1275
1276 Ok(assemble_run(
1277 config,
1278 runtime_mode,
1279 analyzed,
1280 skipped,
1281 warnings,
1282 submodule_summaries,
1283 ))
1284}
1285
1286fn attach_coverage(config: &AppConfig, analyzed: &mut [FileRecord], warnings: &mut Vec<String>) {
1287 let Some(cov_path) = coverage::resolve_coverage_file(config.analysis.coverage_file.as_deref())
1288 else {
1289 return;
1290 };
1291 tracing::debug!(path = %cov_path.display(), "loading coverage file");
1292 match fs::read_to_string(&cov_path) {
1293 Ok(content) => {
1294 let cov_map = coverage::parse_coverage_auto(&cov_path, &content);
1295 let mut matched: u32 = 0;
1296 let mut unmatched: u32 = 0;
1297 for record in analyzed.iter_mut() {
1298 record.coverage =
1299 coverage::lookup_coverage(&cov_map, &record.relative_path).cloned();
1300 if record.coverage.is_some() {
1301 matched += 1;
1302 } else {
1303 unmatched += 1;
1304 }
1305 }
1306 tracing::debug!(
1307 path = %cov_path.display(),
1308 coverage_entries = cov_map.len(),
1309 files_matched = matched,
1310 files_unmatched = unmatched,
1311 "coverage attached"
1312 );
1313 if unmatched > 0 && matched == 0 {
1314 tracing::warn!(
1315 path = %cov_path.display(),
1316 "coverage file loaded but no source files could be matched — check that paths in the coverage report match the scanned directory"
1317 );
1318 }
1319 }
1320 Err(e) => {
1321 tracing::warn!(path = %cov_path.display(), error = %e, "coverage file could not be read");
1322 warnings.push(format!(
1323 "coverage file '{}' could not be read: {e}",
1324 cov_path.display()
1325 ));
1326 }
1327 }
1328}
1329
1330fn push_record(
1331 record: FileRecord,
1332 analyzed: &mut Vec<FileRecord>,
1333 skipped: &mut Vec<FileRecord>,
1334 warnings: &mut Vec<String>,
1335) {
1336 warnings.extend(
1337 record
1338 .warnings
1339 .iter()
1340 .map(|warning| format!("{}: {warning}", record.relative_path)),
1341 );
1342
1343 match record.status {
1344 FileStatus::AnalyzedExact | FileStatus::AnalyzedBestEffort => analyzed.push(record),
1345 _ => skipped.push(record),
1346 }
1347}
1348
1349#[inline]
1351fn skip_with_reason(
1352 path: &Path,
1353 root: &Path,
1354 size: u64,
1355 reason: impl Into<String>,
1356) -> MetadataPolicyOutcome {
1357 MetadataPolicyOutcome::Skip(Box::new(skipped_record(
1358 path,
1359 root,
1360 size,
1361 FileStatus::SkippedByPolicy,
1362 vec![reason.into()],
1363 )))
1364}
1365
1366#[allow(clippy::too_many_arguments)]
1370fn check_metadata_policy(
1371 path: &Path,
1372 root: &Path,
1373 relative_path: &str,
1374 metadata: &fs::Metadata,
1375 config: &AppConfig,
1376 include_globs: Option<&GlobSet>,
1377 exclude_globs: Option<&GlobSet>,
1378) -> MetadataPolicyOutcome {
1379 let size = metadata.len();
1380
1381 if metadata.file_type().is_symlink() && !config.discovery.follow_symlinks {
1382 return skip_with_reason(path, root, size, "symlink skipped by policy");
1383 }
1384 if file_name_eq(path, ".gitignore") {
1385 return skip_with_reason(path, root, size, ".gitignore is always excluded");
1386 }
1387 if is_excluded_dir_path(path, &config.discovery.excluded_directories) {
1388 return skip_with_reason(path, root, size, "path matched excluded directory setting");
1389 }
1390 if size > config.discovery.max_file_size_bytes {
1391 return skip_with_reason(
1392 path,
1393 root,
1394 size,
1395 format!(
1396 "file exceeded max_file_size_bytes ({})",
1397 config.discovery.max_file_size_bytes
1398 ),
1399 );
1400 }
1401 if let Some(globs) = include_globs {
1402 if !globs.is_match(Path::new(relative_path)) && !globs.is_match(path) {
1403 return MetadataPolicyOutcome::Exclude;
1404 }
1405 }
1406 if let Some(globs) = exclude_globs {
1407 if globs.is_match(Path::new(relative_path)) || globs.is_match(path) {
1408 return skip_with_reason(path, root, size, "path matched exclude glob");
1409 }
1410 }
1411 if is_known_lockfile(path) && !config.analysis.include_lockfiles {
1412 return skip_with_reason(path, root, size, "lockfile skipped by default policy");
1413 }
1414
1415 MetadataPolicyOutcome::Continue
1416}
1417
1418struct ContentPolicyResult {
1419 vendor: bool,
1420 generated: bool,
1421 minified: bool,
1422 skip_record: Option<FileRecord>,
1423}
1424
1425fn check_content_policy(
1428 path: &Path,
1429 root: &Path,
1430 size_bytes: u64,
1431 bytes: &[u8],
1432 config: &AppConfig,
1433) -> ContentPolicyResult {
1434 let vendor = is_vendor_path(path);
1435 if vendor && config.analysis.vendor_directory_detection {
1436 return ContentPolicyResult {
1437 vendor,
1438 generated: false,
1439 minified: false,
1440 skip_record: Some(skipped_record(
1441 path,
1442 root,
1443 size_bytes,
1444 FileStatus::SkippedByPolicy,
1445 vec!["vendor file skipped by policy".into()],
1446 )),
1447 };
1448 }
1449
1450 let generated = config.analysis.generated_file_detection && looks_generated(path, bytes);
1451 if generated {
1452 return ContentPolicyResult {
1453 vendor,
1454 generated,
1455 minified: false,
1456 skip_record: Some(skipped_record(
1457 path,
1458 root,
1459 size_bytes,
1460 FileStatus::SkippedByPolicy,
1461 vec!["generated file skipped by policy".into()],
1462 )),
1463 };
1464 }
1465
1466 let minified = config.analysis.minified_file_detection && looks_minified(path, bytes);
1467 if minified {
1468 return ContentPolicyResult {
1469 vendor,
1470 generated,
1471 minified,
1472 skip_record: Some(skipped_record(
1473 path,
1474 root,
1475 size_bytes,
1476 FileStatus::SkippedByPolicy,
1477 vec!["minified file skipped by policy".into()],
1478 )),
1479 };
1480 }
1481
1482 ContentPolicyResult {
1483 vendor,
1484 generated,
1485 minified,
1486 skip_record: None,
1487 }
1488}
1489
1490fn decode_file_contents(
1492 path: &Path,
1493 root: &Path,
1494 size_bytes: u64,
1495 bytes: &[u8],
1496 config: &AppConfig,
1497) -> Result<Option<(String, String, Vec<String>)>> {
1498 if is_binary(bytes) {
1499 return match config.analysis.binary_file_behavior {
1500 BinaryFileBehavior::Skip => Ok(None),
1501 BinaryFileBehavior::Fail => {
1502 anyhow::bail!("binary file encountered: {}", path.display())
1503 }
1504 };
1505 }
1506
1507 match decode_bytes(bytes) {
1508 Ok(result) => Ok(Some(result)),
1509 Err(err) => match config.analysis.decode_failure_behavior {
1510 FailureBehavior::WarnSkip => {
1511 let _ = (path, root, size_bytes); Err(anyhow::anyhow!("__decode_warn__: {err}"))
1516 }
1517 FailureBehavior::Fail => {
1518 anyhow::bail!("decode failure for {}: {err}", path.display())
1519 }
1520 },
1521 }
1522}
1523
1524#[allow(clippy::too_many_lines)]
1525fn analyze_candidate_file(
1526 path: &Path,
1527 root: &Path,
1528 config: &AppConfig,
1529 include_globs: Option<&GlobSet>,
1530 exclude_globs: Option<&GlobSet>,
1531 enabled_languages: Option<&BTreeSet<Language>>,
1532) -> Result<Option<FileRecord>> {
1533 let metadata = match fs::symlink_metadata(path) {
1534 Ok(metadata) => metadata,
1535 Err(err) => {
1536 return Ok(Some(skipped_record(
1537 path,
1538 root,
1539 0,
1540 FileStatus::ErrorInternal,
1541 vec![format!("failed to read metadata: {err}")],
1542 )));
1543 }
1544 };
1545
1546 let relative_path = relative_path_string(path, root);
1547
1548 match check_metadata_policy(
1550 path,
1551 root,
1552 &relative_path,
1553 &metadata,
1554 config,
1555 include_globs,
1556 exclude_globs,
1557 ) {
1558 MetadataPolicyOutcome::Skip(record) => return Ok(Some(*record)),
1559 MetadataPolicyOutcome::Exclude => return Ok(None),
1560 MetadataPolicyOutcome::Continue => {}
1561 }
1562
1563 let bytes = match fs::read(path) {
1564 Ok(bytes) => bytes,
1565 Err(err) => {
1566 return Ok(Some(skipped_record(
1567 path,
1568 root,
1569 metadata.len(),
1570 FileStatus::ErrorInternal,
1571 vec![format!("failed to read file: {err}")],
1572 )));
1573 }
1574 };
1575
1576 let content_policy = check_content_policy(path, root, metadata.len(), &bytes, config);
1578 if let Some(record) = content_policy.skip_record {
1579 return Ok(Some(record));
1580 }
1581 let (vendor, generated, minified) = (
1582 content_policy.vendor,
1583 content_policy.generated,
1584 content_policy.minified,
1585 );
1586
1587 let (text, encoding, decode_warnings) =
1589 match decode_file_contents(path, root, metadata.len(), &bytes, config) {
1590 Ok(Some(result)) => result,
1591 Ok(None) => {
1592 return Ok(Some(skipped_record(
1593 path,
1594 root,
1595 metadata.len(),
1596 FileStatus::SkippedBinary,
1597 vec!["binary file skipped by default".into()],
1598 )));
1599 }
1600 Err(err) => {
1601 let msg = err.to_string();
1602 if let Some(warn_msg) = msg.strip_prefix("__decode_warn__: ") {
1603 return Ok(Some(skipped_record(
1604 path,
1605 root,
1606 metadata.len(),
1607 FileStatus::SkippedDecodeError,
1608 vec![warn_msg.to_string()],
1609 )));
1610 }
1611 return Err(err);
1612 }
1613 };
1614
1615 let first_line = text.lines().next();
1616 let language = detect_language(
1617 path,
1618 first_line,
1619 &config.analysis.extension_overrides,
1620 config.analysis.shebang_detection,
1621 );
1622
1623 let Some(language) = language else {
1624 return Ok(Some(skipped_record(
1625 path,
1626 root,
1627 metadata.len(),
1628 FileStatus::SkippedUnsupported,
1629 vec!["unsupported or undetected language".into()],
1630 )));
1631 };
1632
1633 if let Some(enabled) = enabled_languages {
1634 if !enabled.contains(&language) {
1635 return Ok(Some(skipped_record(
1636 path,
1637 root,
1638 metadata.len(),
1639 FileStatus::SkippedByPolicy,
1640 vec![format!(
1641 "language {} disabled by configuration",
1642 language.display_name()
1643 )],
1644 )));
1645 }
1646 }
1647
1648 let style_scope = match config.analysis.style_lang_scope.as_str() {
1649 "c_family" => StyleLangScope::CFamilyOnly,
1650 _ => StyleLangScope::All,
1651 };
1652 let ieee_opts = AnalysisOptions {
1653 blank_in_block_comment_as_comment: config.analysis.blank_in_block_comment_policy
1654 == BlankInBlockCommentPolicy::CountAsComment,
1655 collapse_continuation_lines: config.analysis.continuation_line_policy
1656 == ContinuationLinePolicy::CollapseToLogical,
1657 enable_style: config.analysis.style_analysis_enabled,
1658 style_lang_scope: style_scope,
1659 };
1660 let analysis = analyze_text(language, &text, ieee_opts);
1661 let effective_counts = compute_effective_counts(
1662 &analysis.raw,
1663 config.analysis.mixed_line_policy,
1664 config.analysis.python_docstrings_as_comments,
1665 config.analysis.count_compiler_directives,
1666 );
1667
1668 let mut warnings = decode_warnings;
1669 warnings.extend(analysis.warnings.clone());
1670
1671 let content_hash = {
1673 use std::hash::{DefaultHasher, Hash, Hasher};
1674 let mut h = DefaultHasher::new();
1675 bytes.hash(&mut h);
1676 h.finish()
1677 };
1678
1679 let cyclomatic_complexity = if analysis.raw.cyclomatic_complexity > 0 {
1681 Some(analysis.raw.cyclomatic_complexity)
1682 } else {
1683 None
1684 };
1685 let lsloc = analysis.raw.lsloc;
1686
1687 Ok(Some(FileRecord {
1688 path: path_to_string(path),
1689 relative_path,
1690 language: Some(language),
1691 size_bytes: metadata.len(),
1692 detected_encoding: Some(encoding),
1693 raw_line_categories: analysis.raw,
1694 effective_counts,
1695 status: match analysis.parse_mode {
1696 ParseMode::Lexical | ParseMode::TreeSitter => FileStatus::AnalyzedExact,
1697 ParseMode::LexicalBestEffort => FileStatus::AnalyzedBestEffort,
1698 },
1699 warnings,
1700 generated,
1701 minified,
1702 vendor,
1703 parse_mode: Some(analysis.parse_mode),
1704 submodule: None,
1705 coverage: None,
1706 style_analysis: analysis.style_analysis,
1707 cyclomatic_complexity,
1708 lsloc,
1709 commit_count: None,
1710 last_commit_date: None,
1711 content_hash,
1712 }))
1713}
1714
1715const fn compute_effective_counts(
1716 raw: &RawLineCounts,
1717 mixed_line_policy: MixedLinePolicy,
1718 python_docstrings_as_comments: bool,
1719 count_compiler_directives: bool,
1720) -> EffectiveCounts {
1721 let mut effective = EffectiveCounts {
1722 code_lines: raw.code_only_lines,
1723 comment_lines: raw.single_comment_only_lines + raw.multi_comment_only_lines,
1724 blank_lines: raw.blank_only_lines,
1725 mixed_lines_separate: 0,
1726 };
1727
1728 if python_docstrings_as_comments {
1729 effective.comment_lines += raw.docstring_comment_lines;
1730 } else {
1731 effective.code_lines += raw.docstring_comment_lines;
1732 }
1733
1734 let mixed_total = raw.mixed_code_single_comment_lines + raw.mixed_code_multi_comment_lines;
1735 match mixed_line_policy {
1736 MixedLinePolicy::CodeOnly => effective.code_lines += mixed_total,
1737 MixedLinePolicy::CodeAndComment => {
1738 effective.code_lines += mixed_total;
1739 effective.comment_lines += mixed_total;
1740 }
1741 MixedLinePolicy::CommentOnly => effective.comment_lines += mixed_total,
1742 MixedLinePolicy::SeparateMixedCategory => effective.mixed_lines_separate += mixed_total,
1743 }
1744
1745 if !count_compiler_directives {
1748 effective.code_lines = effective
1749 .code_lines
1750 .saturating_sub(raw.compiler_directive_lines);
1751 }
1752
1753 effective
1754}
1755
1756fn build_summary(analyzed: &[FileRecord], skipped: &[FileRecord]) -> SummaryTotals {
1757 let mut summary = SummaryTotals {
1758 files_considered: (analyzed.len() + skipped.len()) as u64,
1759 files_analyzed: analyzed.len() as u64,
1760 files_skipped: skipped.len() as u64,
1761 ..Default::default()
1762 };
1763
1764 for record in analyzed {
1765 summary.total_physical_lines += record.raw_line_categories.total_physical_lines;
1766 summary.code_lines += record.effective_counts.code_lines;
1767 summary.comment_lines += record.effective_counts.comment_lines;
1768 summary.blank_lines += record.effective_counts.blank_lines;
1769 summary.mixed_lines_separate += record.effective_counts.mixed_lines_separate;
1770 summary.functions += record.raw_line_categories.functions;
1771 summary.classes += record.raw_line_categories.classes;
1772 summary.variables += record.raw_line_categories.variables;
1773 summary.imports += record.raw_line_categories.imports;
1774 summary.test_count += record.raw_line_categories.test_count;
1775 summary.test_assertion_count += record.raw_line_categories.test_assertion_count;
1776 summary.test_suite_count += record.raw_line_categories.test_suite_count;
1777 summary.cyclomatic_complexity +=
1778 u64::from(record.raw_line_categories.cyclomatic_complexity);
1779 if let Some(lsloc) = record.raw_line_categories.lsloc {
1780 *summary.lsloc.get_or_insert(0) += u64::from(lsloc);
1781 }
1782 if let Some(cov) = &record.coverage {
1783 summary.coverage_lines_found += u64::from(cov.lines_found);
1784 summary.coverage_lines_hit += u64::from(cov.lines_hit);
1785 summary.coverage_functions_found += u64::from(cov.functions_found);
1786 summary.coverage_functions_hit += u64::from(cov.functions_hit);
1787 summary.coverage_branches_found += u64::from(cov.branches_found);
1788 summary.coverage_branches_hit += u64::from(cov.branches_hit);
1789 }
1790 }
1791
1792 summary
1793}
1794
1795const fn zeroed_summary(language: Language) -> LanguageSummary {
1797 LanguageSummary {
1798 language,
1799 files: 0,
1800 total_physical_lines: 0,
1801 code_lines: 0,
1802 comment_lines: 0,
1803 blank_lines: 0,
1804 mixed_lines_separate: 0,
1805 functions: 0,
1806 classes: 0,
1807 variables: 0,
1808 imports: 0,
1809 test_count: 0,
1810 test_assertion_count: 0,
1811 test_suite_count: 0,
1812 coverage_lines_found: 0,
1813 coverage_lines_hit: 0,
1814 coverage_functions_found: 0,
1815 coverage_functions_hit: 0,
1816 coverage_branches_found: 0,
1817 coverage_branches_hit: 0,
1818 cyclomatic_complexity: 0,
1819 lsloc: None,
1820 }
1821}
1822
1823fn accumulate_record_into_summary(entry: &mut LanguageSummary, record: &FileRecord) {
1825 entry.files += 1;
1826 let r = &record.raw_line_categories;
1827 entry.total_physical_lines += r.total_physical_lines;
1828 entry.code_lines += record.effective_counts.code_lines;
1829 entry.comment_lines += record.effective_counts.comment_lines;
1830 entry.blank_lines += record.effective_counts.blank_lines;
1831 entry.mixed_lines_separate += record.effective_counts.mixed_lines_separate;
1832 entry.functions += r.functions;
1833 entry.classes += r.classes;
1834 entry.variables += r.variables;
1835 entry.imports += r.imports;
1836 entry.test_count += r.test_count;
1837 entry.test_assertion_count += r.test_assertion_count;
1838 entry.test_suite_count += r.test_suite_count;
1839 entry.cyclomatic_complexity += u64::from(r.cyclomatic_complexity);
1840 if let Some(lsloc) = r.lsloc {
1841 *entry.lsloc.get_or_insert(0) += u64::from(lsloc);
1842 }
1843 if let Some(cov) = &record.coverage {
1844 entry.coverage_lines_found += u64::from(cov.lines_found);
1845 entry.coverage_lines_hit += u64::from(cov.lines_hit);
1846 entry.coverage_functions_found += u64::from(cov.functions_found);
1847 entry.coverage_functions_hit += u64::from(cov.functions_hit);
1848 entry.coverage_branches_found += u64::from(cov.branches_found);
1849 entry.coverage_branches_hit += u64::from(cov.branches_hit);
1850 }
1851}
1852
1853fn build_language_summaries(analyzed: &[FileRecord]) -> Vec<LanguageSummary> {
1854 let mut by_language: BTreeMap<Language, LanguageSummary> = BTreeMap::new();
1855 for record in analyzed {
1856 let Some(language) = record.language else {
1857 continue;
1858 };
1859 let entry = by_language
1860 .entry(language)
1861 .or_insert_with(|| zeroed_summary(language));
1862 accumulate_record_into_summary(entry, record);
1863 }
1864 by_language.into_values().collect()
1865}
1866
1867fn skipped_record(
1868 path: &Path,
1869 root: &Path,
1870 size_bytes: u64,
1871 status: FileStatus,
1872 warnings: Vec<String>,
1873) -> FileRecord {
1874 FileRecord {
1875 path: path_to_string(path),
1876 relative_path: relative_path_string(path, root),
1877 language: None,
1878 size_bytes,
1879 detected_encoding: None,
1880 raw_line_categories: RawLineCounts::default(),
1881 effective_counts: EffectiveCounts::default(),
1882 status,
1883 warnings,
1884 generated: false,
1885 minified: false,
1886 vendor: false,
1887 parse_mode: None,
1888 submodule: None,
1889 coverage: None,
1890 style_analysis: None,
1891 cyclomatic_complexity: None,
1892 lsloc: None,
1893 commit_count: None,
1894 last_commit_date: None,
1895 content_hash: 0,
1896 }
1897}
1898
1899fn relative_path_string(path: &Path, root: &Path) -> String {
1900 path.strip_prefix(root)
1901 .unwrap_or(path)
1902 .to_string_lossy()
1903 .replace('\\', "/")
1904}
1905
1906fn path_to_string(path: &Path) -> String {
1907 path.to_string_lossy().replace('\\', "/")
1908}
1909
1910#[must_use]
1912pub fn detect_submodules(root: &Path) -> Vec<(String, PathBuf)> {
1913 let gitmodules = root.join(".gitmodules");
1914 if !gitmodules.is_file() {
1915 return Vec::new();
1916 }
1917 let Ok(content) = fs::read_to_string(&gitmodules) else {
1918 return Vec::new();
1919 };
1920
1921 let mut result = Vec::new();
1922 let mut current_name: Option<String> = None;
1923 let mut current_path: Option<PathBuf> = None;
1924
1925 for line in content.lines() {
1926 let trimmed = line.trim();
1927 if trimmed.starts_with("[submodule \"") && trimmed.ends_with("\"]") {
1928 if let (Some(name), Some(path)) = (current_name.take(), current_path.take()) {
1929 result.push((name, path));
1930 }
1931 let name = trimmed["[submodule \"".len()..trimmed.len() - 2].to_string();
1932 current_name = Some(name);
1933 } else if let Some(rest) = trimmed.strip_prefix("path") {
1934 if let Some(eq_pos) = rest.find('=') {
1935 let path_str = rest[eq_pos + 1..].trim();
1936 current_path = Some(PathBuf::from(path_str));
1937 }
1938 }
1939 }
1940 if let (Some(name), Some(path)) = (current_name, current_path) {
1941 result.push((name, path));
1942 }
1943
1944 result
1945}
1946
1947fn build_submodule_summaries(
1948 analyzed: &[FileRecord],
1949 submodules: &[(String, PathBuf)],
1950 root: &Path,
1951) -> Vec<SubmoduleSummary> {
1952 submodules
1953 .iter()
1954 .map(|(name, path)| {
1955 let files: Vec<&FileRecord> = analyzed
1956 .iter()
1957 .filter(|f| f.submodule.as_deref() == Some(name.as_str()))
1958 .collect();
1959
1960 let files_analyzed = files.len() as u64;
1961 let total_physical_lines = files
1962 .iter()
1963 .map(|f| f.raw_line_categories.total_physical_lines)
1964 .sum();
1965 let code_lines = files.iter().map(|f| f.effective_counts.code_lines).sum();
1966 let comment_lines = files.iter().map(|f| f.effective_counts.comment_lines).sum();
1967 let blank_lines = files.iter().map(|f| f.effective_counts.blank_lines).sum();
1968 let language_summaries = build_language_summaries_from_slice(&files);
1969
1970 let git = detect_git_for_run(&root.join(path));
1971
1972 SubmoduleSummary {
1973 name: name.clone(),
1974 relative_path: path.to_string_lossy().replace('\\', "/"),
1975 files_analyzed,
1976 total_physical_lines,
1977 code_lines,
1978 comment_lines,
1979 blank_lines,
1980 language_summaries,
1981 git_commit_short: git.commit_short,
1982 git_commit_long: git.commit_long,
1983 git_branch: git.branch,
1984 git_commit_author: git.author,
1985 git_commit_date: git.commit_date,
1986 git_remote_url: git.remote_url,
1987 }
1988 })
1989 .filter(|s| s.files_analyzed > 0)
1990 .collect()
1991}
1992
1993#[allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
1995fn dominant_indent_label(files: &[&StyleAnalysis]) -> String {
1996 let mut votes = [0u32; 6];
1997 for f in files {
1998 let idx = match f.indent_style {
1999 IndentStyle::Tabs => 0,
2000 IndentStyle::Spaces2 => 1,
2001 IndentStyle::Spaces4 => 2,
2002 IndentStyle::Spaces8 => 3,
2003 IndentStyle::Mixed => 4,
2004 IndentStyle::Unknown => 5,
2005 };
2006 votes[idx] += 1;
2007 }
2008 let labels = ["Tabs", "2-Space", "4-Space", "8-Space", "Mixed", "\u{2014}"];
2009 labels[votes
2010 .iter()
2011 .enumerate()
2012 .max_by_key(|(_, v)| *v)
2013 .map_or(5, |(i, _)| i)]
2014 .to_string()
2015}
2016
2017#[allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
2019fn line80_pct(files: &[&StyleAnalysis]) -> u8 {
2020 if files.is_empty() {
2021 return 0;
2022 }
2023 let compliant = files
2024 .iter()
2025 .filter(|f| f.total_lines == 0 || (f.lines_over_80 as f32 / f.total_lines as f32) <= 0.05)
2026 .count() as u32;
2027 ((compliant * 100) / files.len() as u32) as u8
2028}
2029
2030#[allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
2033fn line_col_pct(files: &[&StyleAnalysis], threshold: u16) -> u8 {
2034 if files.is_empty() {
2035 return 0;
2036 }
2037 let compliant = files
2038 .iter()
2039 .filter(|f| {
2040 let over = if threshold <= 80 {
2041 f.lines_over_80
2042 } else if threshold <= 100 {
2043 f.lines_over_100
2044 } else {
2045 f.lines_over_120
2046 };
2047 f.total_lines == 0 || (over as f32 / f.total_lines as f32) <= 0.05
2048 })
2049 .count() as u32;
2050 ((compliant * 100) / files.len() as u32) as u8
2051}
2052
2053#[allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
2055fn build_language_group(
2056 family: &str,
2057 files: &[&StyleAnalysis],
2058 col_threshold: u16,
2059) -> LanguageStyleGroup {
2060 let count = files.len() as u32;
2061
2062 let mut all_names: Vec<String> = Vec::new();
2064 for f in files {
2065 for g in &f.guide_scores {
2066 if !all_names.contains(&g.name) {
2067 all_names.push(g.name.clone());
2068 }
2069 }
2070 }
2071
2072 let mut guide_avg_scores: Vec<(String, u8)> = all_names
2073 .into_iter()
2074 .map(|name| {
2075 let sum: u32 = files
2076 .iter()
2077 .filter_map(|f| f.guide_scores.iter().find(|g| g.name == name))
2078 .map(|g| u32::from(g.score_pct))
2079 .sum();
2080 let avg = (sum / count) as u8;
2081 (name, avg)
2082 })
2083 .collect();
2084 guide_avg_scores.sort_by_key(|s| std::cmp::Reverse(s.1));
2085
2086 let (dominant_guide, dominant_score_pct) = guide_avg_scores
2087 .first()
2088 .map(|(n, s)| (n.clone(), *s))
2089 .unwrap_or_default();
2090
2091 let lcp = line_col_pct(files, col_threshold);
2092 LanguageStyleGroup {
2093 language_family: family.to_string(),
2094 files_count: count,
2095 dominant_guide,
2096 dominant_score_pct,
2097 common_indent_style: dominant_indent_label(files),
2098 guide_avg_scores,
2099 line80_compliant_pct: line80_pct(files),
2100 line_col_compliant_pct: lcp,
2101 }
2102}
2103
2104#[allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
2107fn build_style_summary(analyzed: &[FileRecord], col_threshold: u16) -> Option<StyleSummary> {
2108 let all_style: Vec<&StyleAnalysis> = analyzed
2109 .iter()
2110 .filter_map(|f| f.style_analysis.as_ref())
2111 .collect();
2112
2113 if all_style.is_empty() {
2114 return None;
2115 }
2116
2117 let mut families: std::collections::BTreeMap<&str, Vec<&StyleAnalysis>> =
2119 std::collections::BTreeMap::new();
2120 for sa in &all_style {
2121 families
2122 .entry(sa.language_family.as_str())
2123 .or_default()
2124 .push(sa);
2125 }
2126
2127 let mut by_language: Vec<LanguageStyleGroup> = families
2128 .iter()
2129 .map(|(family, files)| build_language_group(family, files, col_threshold))
2130 .collect();
2131 by_language.sort_by_key(|g| std::cmp::Reverse(g.files_count));
2132
2133 let files_analyzed = all_style.len() as u32;
2134 let common_indent_style = dominant_indent_label(&all_style);
2135 let line80_compliant_pct = line80_pct(&all_style);
2136 let line_col_compliant_pct = line_col_pct(&all_style, col_threshold);
2137
2138 Some(StyleSummary {
2139 files_analyzed,
2140 common_indent_style,
2141 line80_compliant_pct,
2142 line_col_compliant_pct,
2143 col_threshold,
2144 by_language,
2145 })
2146}
2147
2148fn build_language_summaries_from_slice(files: &[&FileRecord]) -> Vec<LanguageSummary> {
2149 let mut map: BTreeMap<String, LanguageSummary> = BTreeMap::new();
2150 for file in files {
2151 let Some(lang) = file.language else { continue };
2152 let entry = map
2153 .entry(lang.display_name().to_string())
2154 .or_insert_with(|| zeroed_summary(lang));
2155 accumulate_record_into_summary(entry, file);
2156 }
2157 map.into_values().collect()
2158}
2159
2160fn file_name_eq(path: &Path, expected: &str) -> bool {
2161 path.file_name()
2162 .and_then(|name| name.to_str())
2163 .is_some_and(|name| name == expected)
2164}
2165
2166fn is_excluded_dir_path(path: &Path, excluded_dirs: &[String]) -> bool {
2167 path.components().any(|component| {
2168 component
2169 .as_os_str()
2170 .to_str()
2171 .is_some_and(|part| excluded_dirs.iter().any(|excluded| excluded == part))
2172 })
2173}
2174
2175fn is_vendor_path(path: &Path) -> bool {
2176 path.components().any(|component| {
2177 component
2178 .as_os_str()
2179 .to_str()
2180 .is_some_and(|part| matches!(part, "vendor" | "node_modules" | "packages"))
2181 })
2182}
2183
2184fn is_known_lockfile(path: &Path) -> bool {
2185 path.file_name()
2186 .and_then(|name| name.to_str())
2187 .is_some_and(|name| {
2188 matches!(
2189 name,
2190 "Cargo.lock"
2191 | "package-lock.json"
2192 | "yarn.lock"
2193 | "pnpm-lock.yaml"
2194 | "Pipfile.lock"
2195 | "poetry.lock"
2196 | "composer.lock"
2197 )
2198 })
2199}
2200
2201fn looks_generated(path: &Path, bytes: &[u8]) -> bool {
2202 let file_name = path
2203 .file_name()
2204 .and_then(|name| name.to_str())
2205 .unwrap_or_default();
2206 if file_name.contains(".generated.") || file_name.contains(".g.") {
2207 return true;
2208 }
2209
2210 let sample = String::from_utf8_lossy(&bytes[..bytes.len().min(GENERATED_SAMPLE_BYTES)])
2211 .to_ascii_lowercase();
2212 sample.contains("@generated") || sample.contains("generated by")
2213}
2214
2215fn looks_minified(path: &Path, bytes: &[u8]) -> bool {
2216 let file_name = path
2217 .file_name()
2218 .and_then(|name| name.to_str())
2219 .unwrap_or_default();
2220 if file_name.contains(".min.") {
2221 return true;
2222 }
2223
2224 let sample = String::from_utf8_lossy(&bytes[..bytes.len().min(MINIFIED_SAMPLE_BYTES)]);
2225 let longest_line = sample.lines().map(str::len).max().unwrap_or(0);
2226 let whitespace = sample.chars().filter(|c| c.is_whitespace()).count();
2227 longest_line > MINIFIED_LINE_THRESHOLD && whitespace * 100 < sample.len().max(1)
2228}
2229
2230fn is_binary(bytes: &[u8]) -> bool {
2231 if bytes.starts_with(&[0xEF, 0xBB, 0xBF])
2232 || bytes.starts_with(&[0xFF, 0xFE])
2233 || bytes.starts_with(&[0xFE, 0xFF])
2234 {
2235 return false;
2236 }
2237
2238 let sample = &bytes[..bytes.len().min(BINARY_SAMPLE_BYTES)];
2239 sample.contains(&0)
2240}
2241
2242fn decode_utf16_bom(
2245 bom_stripped: &[u8],
2246 encoding: &'static encoding_rs::Encoding,
2247 label: &str,
2248) -> (String, String, Vec<String>) {
2249 let (cow, _, had_errors) = encoding.decode(bom_stripped);
2250 let mut warnings = Vec::new();
2251 if had_errors {
2252 warnings.push(format!("{label} decode contained replacement characters"));
2253 }
2254 (cow.into_owned(), label.into(), warnings)
2255}
2256
2257fn decode_bytes(bytes: &[u8]) -> std::result::Result<(String, String, Vec<String>), String> {
2258 if bytes.starts_with(&[0xEF, 0xBB, 0xBF]) {
2259 let text = String::from_utf8(bytes[3..].to_vec()).map_err(|err| err.to_string())?;
2260 return Ok((text, "utf-8-bom".into(), vec![]));
2261 }
2262 if bytes.starts_with(&[0xFF, 0xFE]) {
2263 return Ok(decode_utf16_bom(&bytes[2..], UTF_16LE, "utf-16le"));
2264 }
2265 if bytes.starts_with(&[0xFE, 0xFF]) {
2266 return Ok(decode_utf16_bom(&bytes[2..], UTF_16BE, "utf-16be"));
2267 }
2268
2269 #[allow(clippy::option_if_let_else)]
2271 if let Ok(text) = String::from_utf8(bytes.to_vec()) {
2272 Ok((text, "utf-8".into(), vec![]))
2273 } else {
2274 let (cow, _, had_errors) = WINDOWS_1252.decode(bytes);
2275 let mut warnings = vec!["decoded using windows-1252 fallback".into()];
2276 if had_errors {
2277 warnings.push("fallback decode contained replacement characters".into());
2278 }
2279 Ok((cow.into_owned(), "windows-1252".into(), warnings))
2280 }
2281}
2282
2283fn compile_globset(patterns: &[String]) -> Result<Option<GlobSet>> {
2284 if patterns.is_empty() {
2285 return Ok(None);
2286 }
2287
2288 let mut builder = GlobSetBuilder::new();
2289 for pattern in patterns {
2290 builder
2291 .add(Glob::new(pattern).with_context(|| format!("invalid glob pattern: {pattern}"))?);
2292 }
2293 Ok(Some(
2294 builder.build().context("failed to compile glob filters")?,
2295 ))
2296}
2297
2298fn parse_enabled_languages(enabled: &[String]) -> Result<Option<BTreeSet<Language>>> {
2299 if enabled.is_empty() {
2300 return Ok(None);
2301 }
2302
2303 let supported = supported_languages();
2304 let mut set = BTreeSet::new();
2305 for name in enabled {
2306 let language = Language::from_name(name)
2307 .with_context(|| format!("unsupported language in config: {name}"))?;
2308 if !supported.contains(&language) {
2309 anyhow::bail!("language {name} is not supported in this build");
2310 }
2311 set.insert(language);
2312 }
2313 Ok(Some(set))
2314}
2315
2316pub fn write_json(run: &AnalysisRun, output_path: &Path) -> Result<()> {
2320 let json = serde_json::to_string_pretty(run).context("failed to serialize analysis run")?;
2321 fs::write(output_path, json)
2322 .with_context(|| format!("failed to write JSON output to {}", output_path.display()))
2323}
2324
2325pub fn read_json(path: &Path) -> Result<AnalysisRun> {
2329 let contents = fs::read_to_string(path)
2330 .with_context(|| format!("failed to read result file {}", path.display()))?;
2331 serde_json::from_str(&contents)
2332 .with_context(|| format!("failed to parse JSON result {}", path.display()))
2333}
2334
2335#[cfg(test)]
2336mod tests {
2337 use super::*;
2338
2339 #[test]
2340 fn effective_counts_respect_code_only_policy() {
2341 let raw = RawLineCounts {
2342 code_only_lines: 2,
2343 single_comment_only_lines: 1,
2344 mixed_code_single_comment_lines: 3,
2345 docstring_comment_lines: 2,
2346 ..RawLineCounts::default()
2347 };
2348 let counts = compute_effective_counts(&raw, MixedLinePolicy::CodeOnly, true, true);
2349 assert_eq!(counts.code_lines, 5);
2350 assert_eq!(counts.comment_lines, 3);
2351 }
2352
2353 #[test]
2354 fn effective_counts_can_separate_mixed() {
2355 let raw = RawLineCounts {
2356 mixed_code_single_comment_lines: 2,
2357 mixed_code_multi_comment_lines: 1,
2358 ..RawLineCounts::default()
2359 };
2360 let counts =
2361 compute_effective_counts(&raw, MixedLinePolicy::SeparateMixedCategory, true, true);
2362 assert_eq!(counts.mixed_lines_separate, 3);
2363 assert_eq!(counts.code_lines, 0);
2364 assert_eq!(counts.comment_lines, 0);
2365 }
2366
2367 #[test]
2368 fn windows_1252_fallback_decodes() {
2369 let bytes = vec![0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x96, 0x57];
2370 let (text, encoding, warnings) = decode_bytes(&bytes).unwrap();
2371 assert_eq!(encoding, "windows-1252");
2372 assert!(text.contains('–'));
2373 assert!(!warnings.is_empty());
2374 }
2375
2376 #[test]
2379 fn is_binary_detects_null_byte() {
2380 let bytes = b"hello\x00world";
2381 assert!(is_binary(bytes));
2382 }
2383
2384 #[test]
2385 fn is_binary_clean_text_is_not_binary() {
2386 let bytes = b"fn main() { println!(\"hello\"); }";
2387 assert!(!is_binary(bytes));
2388 }
2389
2390 #[test]
2391 fn is_binary_utf8_bom_not_binary() {
2392 let bytes = b"\xef\xbb\xbffn main() {}";
2393 assert!(!is_binary(bytes));
2394 }
2395
2396 #[test]
2397 fn looks_generated_at_generated_marker() {
2398 let bytes = b"// @generated by protoc-gen-rust\nfn foo() {}";
2399 assert!(looks_generated(Path::new("foo.rs"), bytes));
2400 }
2401
2402 #[test]
2403 fn looks_generated_do_not_edit_marker() {
2404 let bytes = b"// Code generated by build.rs. DO NOT EDIT.\nuse foo;";
2406 assert!(looks_generated(Path::new("foo.rs"), bytes));
2407 let bytes2 = b"// @generated\nuse foo;";
2409 assert!(looks_generated(Path::new("foo.rs"), bytes2));
2410 }
2411
2412 #[test]
2413 fn looks_generated_normal_file_not_generated() {
2414 let bytes = b"fn main() {\n println!(\"hello\");\n}\n";
2415 assert!(!looks_generated(Path::new("main.rs"), bytes));
2416 }
2417
2418 #[test]
2419 fn looks_minified_dot_min_filename() {
2420 let bytes = b"function a(){return 1}";
2421 assert!(looks_minified(Path::new("bundle.min.js"), bytes));
2422 }
2423
2424 #[test]
2425 fn looks_minified_normal_file_not_minified() {
2426 let bytes = b"function hello() {\n return 1;\n}\n";
2427 assert!(!looks_minified(Path::new("app.js"), bytes));
2428 }
2429
2430 #[test]
2431 fn looks_minified_very_long_line() {
2432 let long_line: Vec<u8> = b"x".repeat(MINIFIED_LINE_THRESHOLD + 1);
2433 assert!(looks_minified(Path::new("app.js"), &long_line));
2434 }
2435
2436 #[test]
2437 fn is_known_lockfile_cargo_lock() {
2438 assert!(is_known_lockfile(Path::new("Cargo.lock")));
2439 }
2440
2441 #[test]
2442 fn is_known_lockfile_package_lock_json() {
2443 assert!(is_known_lockfile(Path::new("package-lock.json")));
2444 }
2445
2446 #[test]
2447 fn is_known_lockfile_yarn_lock() {
2448 assert!(is_known_lockfile(Path::new("yarn.lock")));
2449 }
2450
2451 #[test]
2452 fn is_known_lockfile_normal_file_is_not_lockfile() {
2453 assert!(!is_known_lockfile(Path::new("src/lib.rs")));
2454 }
2455
2456 #[test]
2457 fn is_vendor_path_node_modules() {
2458 assert!(is_vendor_path(Path::new("node_modules/react/index.js")));
2459 }
2460
2461 #[test]
2462 fn is_vendor_path_vendor_dir() {
2463 assert!(is_vendor_path(Path::new("vendor/anyhow/src/lib.rs")));
2464 }
2465
2466 #[test]
2467 fn is_vendor_path_normal_src_is_not_vendor() {
2468 assert!(!is_vendor_path(Path::new("src/lib.rs")));
2469 }
2470
2471 #[test]
2472 fn is_excluded_dir_path_matches_excluded() {
2473 let excluded = vec![".git".into(), "target".into()];
2474 assert!(is_excluded_dir_path(Path::new(".git/config"), &excluded));
2475 }
2476
2477 #[test]
2478 fn is_excluded_dir_path_non_excluded_is_ok() {
2479 let excluded = vec![".git".into(), "target".into()];
2480 assert!(!is_excluded_dir_path(Path::new("src/main.rs"), &excluded));
2481 }
2482
2483 #[test]
2484 fn decode_bytes_utf8_bom_stripped() {
2485 let bytes = b"\xef\xbb\xbffn main() {}";
2486 let (text, encoding, _) = decode_bytes(bytes).unwrap();
2487 assert!(
2489 encoding.contains("utf-8"),
2490 "should be utf-8 variant, got {encoding}"
2491 );
2492 assert!(text.starts_with("fn"));
2493 }
2494
2495 #[test]
2496 fn decode_bytes_plain_utf8() {
2497 let bytes = b"hello world";
2498 let (text, encoding, warnings) = decode_bytes(bytes).unwrap();
2499 assert_eq!(encoding, "utf-8");
2500 assert_eq!(text, "hello world");
2501 assert!(warnings.is_empty());
2502 }
2503
2504 #[test]
2507 fn decode_bytes_utf16le_bom() {
2508 let mut bytes = vec![0xFF, 0xFE];
2510 for ch in "hi\n".encode_utf16() {
2511 bytes.extend_from_slice(&ch.to_le_bytes());
2512 }
2513 let (text, encoding, _warnings) = decode_bytes(&bytes).unwrap();
2514 assert_eq!(encoding, "utf-16le");
2515 assert!(text.contains('h') && text.contains('i'));
2516 }
2517
2518 #[test]
2519 fn decode_bytes_utf16be_bom() {
2520 let mut bytes = vec![0xFE, 0xFF];
2522 for ch in "ok\n".encode_utf16() {
2523 bytes.extend_from_slice(&ch.to_be_bytes());
2524 }
2525 let (text, encoding, _warnings) = decode_bytes(&bytes).unwrap();
2526 assert_eq!(encoding, "utf-16be");
2527 assert!(text.contains('o') && text.contains('k'));
2528 }
2529
2530 #[test]
2531 fn is_binary_utf16le_bom_not_binary() {
2532 let bytes = &[0xFF, 0xFE, 0x68, 0x00];
2534 assert!(!is_binary(bytes));
2535 }
2536
2537 #[test]
2538 fn is_binary_utf16be_bom_not_binary() {
2539 let bytes = &[0xFE, 0xFF, 0x00, 0x68];
2540 assert!(!is_binary(bytes));
2541 }
2542
2543 #[test]
2546 fn effective_counts_code_and_comment_policy() {
2547 let raw = RawLineCounts {
2548 mixed_code_single_comment_lines: 3,
2549 mixed_code_multi_comment_lines: 2,
2550 ..RawLineCounts::default()
2551 };
2552 let counts = compute_effective_counts(&raw, MixedLinePolicy::CodeAndComment, true, true);
2553 assert_eq!(counts.code_lines, 5);
2555 assert_eq!(counts.comment_lines, 5);
2556 assert_eq!(counts.mixed_lines_separate, 0);
2557 }
2558
2559 #[test]
2560 fn effective_counts_comment_only_policy() {
2561 let raw = RawLineCounts {
2562 mixed_code_single_comment_lines: 4,
2563 mixed_code_multi_comment_lines: 1,
2564 ..RawLineCounts::default()
2565 };
2566 let counts = compute_effective_counts(&raw, MixedLinePolicy::CommentOnly, true, true);
2567 assert_eq!(counts.code_lines, 0);
2568 assert_eq!(counts.comment_lines, 5);
2569 assert_eq!(counts.mixed_lines_separate, 0);
2570 }
2571
2572 #[test]
2573 fn effective_counts_docstrings_as_code_when_flag_false() {
2574 let raw = RawLineCounts {
2575 code_only_lines: 10,
2576 docstring_comment_lines: 3,
2577 ..RawLineCounts::default()
2578 };
2579 let counts = compute_effective_counts(&raw, MixedLinePolicy::CodeOnly, false, true);
2581 assert_eq!(counts.code_lines, 13);
2582 assert_eq!(counts.comment_lines, 0);
2583 }
2584
2585 #[test]
2586 fn effective_counts_exclude_compiler_directives() {
2587 let raw = RawLineCounts {
2588 code_only_lines: 10,
2589 compiler_directive_lines: 3,
2590 ..RawLineCounts::default()
2591 };
2592 let counts = compute_effective_counts(&raw, MixedLinePolicy::CodeOnly, true, false);
2594 assert_eq!(counts.code_lines, 7);
2595 }
2596
2597 #[test]
2598 fn effective_counts_directives_not_subtracted_below_zero() {
2599 let raw = RawLineCounts {
2600 code_only_lines: 2,
2601 compiler_directive_lines: 5, ..RawLineCounts::default()
2603 };
2604 let counts = compute_effective_counts(&raw, MixedLinePolicy::CodeOnly, true, false);
2605 assert_eq!(counts.code_lines, 0); }
2607
2608 #[test]
2611 fn cocomo_organic_computes_positive_values() {
2612 let est = compute_cocomo(5_000, CocomoMode::Organic);
2613 assert!(est.ksloc > 0.0);
2614 assert!(est.effort_person_months > 0.0);
2615 assert!(est.duration_months > 0.0);
2616 assert!(est.avg_staff > 0.0);
2617 assert_eq!(est.mode, CocomoMode::Organic);
2618 }
2619
2620 #[test]
2621 fn cocomo_semi_detached_computes_positive_values() {
2622 let est = compute_cocomo(20_000, CocomoMode::SemiDetached);
2623 assert!(est.ksloc > 0.0);
2624 assert!(est.effort_person_months > 0.0);
2625 assert!(est.duration_months > 0.0);
2626 assert_eq!(est.mode, CocomoMode::SemiDetached);
2627 }
2628
2629 #[test]
2630 fn cocomo_embedded_computes_positive_values() {
2631 let est = compute_cocomo(100_000, CocomoMode::Embedded);
2632 assert!(est.effort_person_months > 0.0);
2633 assert_eq!(est.mode, CocomoMode::Embedded);
2634 }
2635
2636 #[test]
2637 fn cocomo_zero_lines_produces_zero_effort() {
2638 let est = compute_cocomo(0, CocomoMode::Organic);
2639 assert!((est.ksloc).abs() < f64::EPSILON);
2640 assert!((est.effort_person_months - 0.0).abs() < 0.01);
2642 }
2643
2644 #[test]
2647 fn parse_activity_log_counts_and_dates_per_file() {
2648 let out = "\u{0}2024-03-02T10:00:00+00:00\n\
2649 M\tsrc/a.rs\n\
2650 A\tsrc/b.rs\n\
2651 \u{0}2024-03-01T09:00:00+00:00\n\
2652 M\tsrc/a.rs\n";
2653 let map = parse_activity_log(out);
2654 assert_eq!(map["src/a.rs"].0, 2, "a.rs touched in two commits");
2655 assert_eq!(map["src/b.rs"].0, 1, "b.rs touched once");
2656 assert_eq!(
2658 map["src/a.rs"].1.as_deref(),
2659 Some("2024-03-02T10:00:00+00:00")
2660 );
2661 }
2662
2663 #[test]
2664 fn parse_activity_log_attributes_rename_to_new_path() {
2665 let out = "\u{0}2024-03-02T10:00:00+00:00\nR100\tsrc/old.rs\tsrc/new.rs\n";
2666 let map = parse_activity_log(out);
2667 assert_eq!(map["src/new.rs"].0, 1);
2668 assert!(!map.contains_key("src/old.rs"));
2669 }
2670
2671 #[test]
2672 fn parse_activity_log_empty_is_empty() {
2673 assert!(parse_activity_log("").is_empty());
2674 }
2675
2676 #[test]
2679 fn parse_url_line_extracts_url() {
2680 assert_eq!(
2681 parse_url_line("url = https://example.com/repo.git"),
2682 Some("https://example.com/repo.git")
2683 );
2684 }
2685
2686 #[test]
2687 fn parse_url_line_returns_none_for_non_url_key() {
2688 assert_eq!(
2689 parse_url_line("fetch = +refs/heads/*:refs/remotes/origin/*"),
2690 None
2691 );
2692 }
2693
2694 #[test]
2695 fn parse_url_line_returns_none_for_empty_url() {
2696 assert_eq!(parse_url_line("url = "), None);
2697 }
2698
2699 #[test]
2700 fn looks_generated_generated_filename_extension() {
2701 let bytes = b"// normal code\n";
2703 assert!(looks_generated(Path::new("schema.generated.ts"), bytes));
2704 }
2705
2706 #[test]
2707 fn looks_generated_dot_g_extension() {
2708 let bytes = b"// normal code\n";
2709 assert!(looks_generated(Path::new("parser.g.cs"), bytes));
2710 }
2711
2712 #[test]
2713 fn looks_minified_whitespace_ratio_is_ok() {
2714 let normal = b"var x=1,y=2,z=3;\n";
2716 assert!(!looks_minified(Path::new("app.js"), normal));
2717 }
2718
2719 #[test]
2720 fn is_known_lockfile_pnpm() {
2721 assert!(is_known_lockfile(Path::new("pnpm-lock.yaml")));
2722 }
2723
2724 #[test]
2725 fn is_known_lockfile_pipfile() {
2726 assert!(is_known_lockfile(Path::new("Pipfile.lock")));
2727 }
2728
2729 #[test]
2730 fn is_known_lockfile_poetry() {
2731 assert!(is_known_lockfile(Path::new("poetry.lock")));
2732 }
2733
2734 #[test]
2735 fn is_known_lockfile_composer() {
2736 assert!(is_known_lockfile(Path::new("composer.lock")));
2737 }
2738
2739 #[test]
2742 fn relative_path_string_strips_root_prefix() {
2743 let path = Path::new("/tmp/project/src/lib.rs");
2744 let root = Path::new("/tmp/project");
2745 let rel = relative_path_string(path, root);
2746 assert_eq!(rel, "src/lib.rs");
2747 }
2748
2749 #[test]
2750 fn relative_path_string_falls_back_to_full_path() {
2751 let path = Path::new("/other/dir/file.rs");
2753 let root = Path::new("/tmp/project");
2754 let rel = relative_path_string(path, root);
2755 assert!(!rel.is_empty());
2757 }
2758
2759 #[test]
2762 fn find_duplicate_groups_returns_empty_for_unique_hashes() {
2763 use sloc_languages::{Language, ParseMode, RawLineCounts};
2764 let make_rec = |hash: u64, path: &str| FileRecord {
2765 path: path.into(),
2766 relative_path: path.into(),
2767 language: Some(Language::Rust),
2768 size_bytes: 10,
2769 detected_encoding: Some("utf-8".into()),
2770 raw_line_categories: RawLineCounts::default(),
2771 effective_counts: EffectiveCounts::default(),
2772 status: FileStatus::AnalyzedExact,
2773 warnings: vec![],
2774 generated: false,
2775 minified: false,
2776 vendor: false,
2777 parse_mode: Some(ParseMode::Lexical),
2778 submodule: None,
2779 coverage: None,
2780 style_analysis: None,
2781 cyclomatic_complexity: None,
2782 lsloc: None,
2783 commit_count: None,
2784 last_commit_date: None,
2785 content_hash: hash,
2786 };
2787 let analyzed = vec![make_rec(111, "a.rs"), make_rec(222, "b.rs")];
2788 let groups = find_duplicate_groups(&analyzed);
2789 assert!(groups.is_empty());
2790 }
2791
2792 #[test]
2793 fn find_duplicate_groups_returns_group_for_same_hash() {
2794 use sloc_languages::{Language, ParseMode, RawLineCounts};
2795 let make_rec = |hash: u64, path: &str| FileRecord {
2796 path: path.into(),
2797 relative_path: path.into(),
2798 language: Some(Language::Rust),
2799 size_bytes: 10,
2800 detected_encoding: Some("utf-8".into()),
2801 raw_line_categories: RawLineCounts::default(),
2802 effective_counts: EffectiveCounts::default(),
2803 status: FileStatus::AnalyzedExact,
2804 warnings: vec![],
2805 generated: false,
2806 minified: false,
2807 vendor: false,
2808 parse_mode: Some(ParseMode::Lexical),
2809 submodule: None,
2810 coverage: None,
2811 style_analysis: None,
2812 cyclomatic_complexity: None,
2813 lsloc: None,
2814 commit_count: None,
2815 last_commit_date: None,
2816 content_hash: hash,
2817 };
2818 let analyzed = vec![
2819 make_rec(999, "a.rs"),
2820 make_rec(999, "b.rs"),
2821 make_rec(123, "c.rs"),
2822 ];
2823 let groups = find_duplicate_groups(&analyzed);
2824 assert_eq!(groups.len(), 1);
2825 assert_eq!(groups[0].len(), 2);
2826 }
2827
2828 #[test]
2829 fn find_duplicate_groups_ignores_zero_hash() {
2830 use sloc_languages::{Language, ParseMode, RawLineCounts};
2831 let make_rec = |hash: u64, path: &str| FileRecord {
2832 path: path.into(),
2833 relative_path: path.into(),
2834 language: Some(Language::Rust),
2835 size_bytes: 10,
2836 detected_encoding: Some("utf-8".into()),
2837 raw_line_categories: RawLineCounts::default(),
2838 effective_counts: EffectiveCounts::default(),
2839 status: FileStatus::AnalyzedExact,
2840 warnings: vec![],
2841 generated: false,
2842 minified: false,
2843 vendor: false,
2844 parse_mode: Some(ParseMode::Lexical),
2845 submodule: None,
2846 coverage: None,
2847 style_analysis: None,
2848 cyclomatic_complexity: None,
2849 lsloc: None,
2850 commit_count: None,
2851 last_commit_date: None,
2852 content_hash: hash,
2853 };
2854 let analyzed = vec![make_rec(0, "a.rs"), make_rec(0, "b.rs")];
2856 let groups = find_duplicate_groups(&analyzed);
2857 assert!(
2858 groups.is_empty(),
2859 "zero-hash files must not be grouped as duplicates"
2860 );
2861 }
2862
2863 #[test]
2866 fn detect_submodules_no_gitmodules_returns_empty() {
2867 let dir = tempfile::tempdir().unwrap();
2868 let result = detect_submodules(dir.path());
2869 assert!(result.is_empty());
2870 }
2871
2872 #[test]
2873 fn detect_submodules_parses_gitmodules_file() {
2874 let dir = tempfile::tempdir().unwrap();
2875 let content = "[submodule \"vendor/lib\"]\n\tpath = vendor/lib\n\turl = https://github.com/example/lib.git\n";
2876 std::fs::write(dir.path().join(".gitmodules"), content).unwrap();
2877 let result = detect_submodules(dir.path());
2878 assert_eq!(result.len(), 1);
2879 assert_eq!(result[0].0, "vendor/lib");
2880 }
2881
2882 #[test]
2885 fn write_json_read_json_roundtrip() {
2886 use chrono::Utc;
2887 use sloc_config::AppConfig;
2888 use sloc_languages::{Language, ParseMode, RawLineCounts};
2889 let dir = tempfile::tempdir().unwrap();
2890 let run = AnalysisRun {
2891 tool: ToolMetadata {
2892 name: "sloc".into(),
2893 version: "0.0.1".into(),
2894 run_id: "test-roundtrip".into(),
2895 timestamp_utc: Utc::now(),
2896 },
2897 environment: EnvironmentMetadata {
2898 operating_system: "test".into(),
2899 architecture: "x86_64".into(),
2900 runtime_mode: "test".into(),
2901 initiator_username: "tester".into(),
2902 initiator_hostname: "testhost".into(),
2903 ci_name: None,
2904 },
2905 effective_configuration: AppConfig::default(),
2906 input_roots: vec!["/tmp/test".into()],
2907 summary_totals: SummaryTotals {
2908 files_analyzed: 1,
2909 code_lines: 5,
2910 ..SummaryTotals::default()
2911 },
2912 totals_by_language: vec![],
2913 per_file_records: vec![FileRecord {
2914 path: "a.rs".into(),
2915 relative_path: "a.rs".into(),
2916 language: Some(Language::Rust),
2917 size_bytes: 50,
2918 detected_encoding: Some("utf-8".into()),
2919 raw_line_categories: RawLineCounts {
2920 code_only_lines: 5,
2921 ..RawLineCounts::default()
2922 },
2923 effective_counts: EffectiveCounts {
2924 code_lines: 5,
2925 ..EffectiveCounts::default()
2926 },
2927 status: FileStatus::AnalyzedExact,
2928 warnings: vec![],
2929 generated: false,
2930 minified: false,
2931 vendor: false,
2932 parse_mode: Some(ParseMode::Lexical),
2933 submodule: None,
2934 coverage: None,
2935 style_analysis: None,
2936 cyclomatic_complexity: None,
2937 lsloc: None,
2938 commit_count: None,
2939 last_commit_date: None,
2940 content_hash: 0,
2941 }],
2942 skipped_file_records: vec![],
2943 warnings: vec![],
2944 submodule_summaries: vec![],
2945 git_commit_short: Some("abc1234".into()),
2946 git_branch: Some("main".into()),
2947 git_commit_long: None,
2948 git_commit_author: None,
2949 git_tags: None,
2950 git_nearest_tag: None,
2951 git_commit_date: None,
2952 git_remote_url: None,
2953 style_summary: None,
2954 cocomo: None,
2955 uloc: 0,
2956 dryness_pct: None,
2957 duplicate_groups: vec![],
2958 duplicates_excluded: 0,
2959 };
2960 let json_path = dir.path().join("test.json");
2961 write_json(&run, &json_path).unwrap();
2962 let loaded = read_json(&json_path).unwrap();
2963 assert_eq!(loaded.summary_totals.files_analyzed, 1);
2964 assert_eq!(loaded.summary_totals.code_lines, 5);
2965 assert_eq!(loaded.git_commit_short.as_deref(), Some("abc1234"));
2966 assert_eq!(loaded.git_branch.as_deref(), Some("main"));
2967 assert_eq!(loaded.per_file_records.len(), 1);
2968 }
2969
2970 #[test]
2973 fn detect_ci_system_returns_none_without_env_vars() {
2974 for var in &[
2976 "JENKINS_URL",
2977 "JENKINS_HOME",
2978 "BUILD_URL",
2979 "GITHUB_ACTIONS",
2980 "GITLAB_CI",
2981 "CIRCLECI",
2982 "TRAVIS",
2983 "TF_BUILD",
2984 "TEAMCITY_VERSION",
2985 ] {
2986 std::env::remove_var(var);
2987 }
2988 let _ = detect_ci_system();
2990 }
2991
2992 #[test]
2995 fn resolve_git_file_pointer_valid_absolute_gitdir() {
2996 let dir = tempfile::tempdir().unwrap();
2997 let real_git = dir.path().join("real.git");
2999 fs::create_dir_all(&real_git).unwrap();
3000 let git_file = dir.path().join(".git");
3002 fs::write(&git_file, format!("gitdir: {}\n", real_git.display())).unwrap();
3003
3004 let result = resolve_git_file_pointer(&git_file, dir.path());
3005 assert!(
3007 result.is_some(),
3008 "should resolve a valid absolute gitdir pointer"
3009 );
3010 assert!(result.unwrap().is_dir());
3011 }
3012
3013 #[test]
3014 fn resolve_git_file_pointer_missing_gitdir_prefix_returns_none() {
3015 let dir = tempfile::tempdir().unwrap();
3016 let git_file = dir.path().join(".git");
3017 fs::write(&git_file, "not a gitdir line\n").unwrap();
3018 assert!(resolve_git_file_pointer(&git_file, dir.path()).is_none());
3019 }
3020
3021 #[test]
3022 fn resolve_git_file_pointer_unreadable_path_returns_none() {
3023 assert!(resolve_git_file_pointer(
3024 Path::new("/nonexistent/__sloc_test_git_file__"),
3025 Path::new("/nonexistent")
3026 )
3027 .is_none());
3028 }
3029
3030 #[test]
3031 fn resolve_git_file_pointer_nonexistent_target_returns_none() {
3032 let dir = tempfile::tempdir().unwrap();
3033 let git_file = dir.path().join(".git");
3034 fs::write(&git_file, "gitdir: /nonexistent/__sloc_fake_gitdir_xyz__\n").unwrap();
3035 assert!(resolve_git_file_pointer(&git_file, dir.path()).is_none());
3037 }
3038
3039 #[test]
3040 fn resolve_git_file_pointer_relative_path() {
3041 let dir = tempfile::tempdir().unwrap();
3042 let real_git = dir.path().join("real_git_dir");
3043 fs::create_dir_all(&real_git).unwrap();
3044 let git_file = dir.path().join(".git");
3045 fs::write(&git_file, "gitdir: real_git_dir\n").unwrap();
3047 let result = resolve_git_file_pointer(&git_file, dir.path());
3048 assert!(result.is_some());
3049 }
3050
3051 #[test]
3054 fn resolve_ref_from_loose_file() {
3055 let dir = tempfile::tempdir().unwrap();
3056 let git_dir = dir.path();
3057 fs::create_dir_all(git_dir.join("refs/heads")).unwrap();
3058 let sha = "abc1234567890abcdef1234567890abcdef123456";
3059 fs::write(git_dir.join("refs/heads/main"), format!("{sha}\n")).unwrap();
3060
3061 let result = resolve_ref(git_dir, "refs/heads/main");
3062 assert_eq!(result.as_deref(), Some(sha));
3063 }
3064
3065 #[test]
3066 fn resolve_ref_from_packed_refs() {
3067 let dir = tempfile::tempdir().unwrap();
3068 let git_dir = dir.path();
3069 let sha = "def5678def5678def5678def5678def5678def56";
3070 fs::write(
3071 git_dir.join("packed-refs"),
3072 format!("# pack-refs with: peeled fully-peeled sorted\n{sha} refs/heads/feature\n"),
3073 )
3074 .unwrap();
3075
3076 let result = resolve_ref(git_dir, "refs/heads/feature");
3077 assert_eq!(result.as_deref(), Some(sha));
3078 }
3079
3080 #[test]
3081 fn resolve_ref_not_found_returns_none() {
3082 let dir = tempfile::tempdir().unwrap();
3083 let result = resolve_ref(dir.path(), "refs/heads/nonexistent-branch-xyz");
3084 assert!(result.is_none());
3085 }
3086
3087 #[test]
3088 fn resolve_ref_packed_refs_skips_comment_and_peeled() {
3089 let dir = tempfile::tempdir().unwrap();
3090 let git_dir = dir.path();
3091 let sha = "aaa1111aaa1111aaa1111aaa1111aaa1111aaa11";
3092 fs::write(
3093 git_dir.join("packed-refs"),
3094 format!("# comment\n^peeled-object-sha\n{sha} refs/tags/v1.0\n"),
3095 )
3096 .unwrap();
3097
3098 let result = resolve_ref(git_dir, "refs/tags/v1.0");
3099 assert_eq!(result.as_deref(), Some(sha));
3100 }
3101
3102 #[test]
3103 fn resolve_ref_loose_sha_too_short_falls_through_to_packed() {
3104 let dir = tempfile::tempdir().unwrap();
3105 let git_dir = dir.path();
3106 fs::create_dir_all(git_dir.join("refs/heads")).unwrap();
3107 fs::write(git_dir.join("refs/heads/main"), "short\n").unwrap();
3109 let result = resolve_ref(git_dir, "refs/heads/main");
3111 assert!(result.is_none());
3112 }
3113
3114 #[test]
3117 fn read_git_remote_url_parses_origin_url() {
3118 let dir = tempfile::tempdir().unwrap();
3119 let git_dir = dir.path().join(".git");
3120 fs::create_dir_all(&git_dir).unwrap();
3121 fs::write(
3122 git_dir.join("config"),
3123 "[core]\n\trepositoryformatversion = 0\n[remote \"origin\"]\n\turl = https://github.com/org/repo.git\n\tfetch = +refs/heads/*:refs/remotes/origin/*\n",
3124 )
3125 .unwrap();
3126 let url = read_git_remote_url(&git_dir);
3127 assert_eq!(url.as_deref(), Some("https://github.com/org/repo.git"));
3128 }
3129
3130 #[test]
3131 fn read_git_remote_url_no_config_returns_none() {
3132 let dir = tempfile::tempdir().unwrap();
3133 let git_dir = dir.path().join(".git");
3134 fs::create_dir_all(&git_dir).unwrap();
3135 let url = read_git_remote_url(&git_dir);
3137 assert!(url.is_none());
3138 }
3139
3140 #[test]
3143 fn detect_git_for_run_no_git_dir_returns_default() {
3144 let dir = tempfile::tempdir().unwrap();
3145 let info = detect_git_for_run(dir.path());
3147 assert!(info.commit_long.is_none());
3148 }
3149
3150 #[test]
3151 fn detect_git_for_run_unreadable_head_returns_default() {
3152 let dir = tempfile::tempdir().unwrap();
3153 let git_dir = dir.path().join(".git");
3154 fs::create_dir_all(&git_dir).unwrap();
3155 let info = detect_git_for_run(dir.path());
3157 assert!(info.commit_long.is_none());
3158 }
3159
3160 #[test]
3161 fn detect_git_for_run_detached_head_with_sha() {
3162 let dir = tempfile::tempdir().unwrap();
3163 let git_dir = dir.path().join(".git");
3164 fs::create_dir_all(&git_dir).unwrap();
3165 let sha = "abc1234567890abcdef1234567890abcdef12345";
3167 fs::write(git_dir.join("HEAD"), sha).unwrap();
3168 let info = detect_git_for_run(dir.path());
3169 assert_eq!(info.commit_long.as_deref(), Some(sha));
3171 assert_eq!(info.commit_short.as_deref(), Some("abc1234"));
3172 }
3173
3174 #[test]
3175 fn detect_git_for_run_with_packed_ref() {
3176 let dir = tempfile::tempdir().unwrap();
3177 let git_dir = dir.path().join(".git");
3178 fs::create_dir_all(&git_dir).unwrap();
3179 fs::write(git_dir.join("HEAD"), "ref: refs/heads/main\n").unwrap();
3181 let sha = "deadbeef00000000000000000000000000000000";
3182 fs::write(
3183 git_dir.join("packed-refs"),
3184 format!("# pack-refs\n{sha} refs/heads/main\n"),
3185 )
3186 .unwrap();
3187 let info = detect_git_for_run(dir.path());
3188 assert_eq!(info.commit_long.as_deref(), Some(sha));
3189 assert_eq!(info.branch.as_deref(), Some("main"));
3190 }
3191
3192 use std::sync::{Mutex, OnceLock};
3196 static CI_ENV_LOCK: OnceLock<Mutex<()>> = OnceLock::new();
3197 fn ci_env_lock() -> std::sync::MutexGuard<'static, ()> {
3198 CI_ENV_LOCK.get_or_init(|| Mutex::new(())).lock().unwrap()
3199 }
3200
3201 fn clear_branch_env_vars() {
3202 for v in &[
3203 "BRANCH_NAME",
3204 "GIT_BRANCH",
3205 "GITHUB_REF_NAME",
3206 "CI_COMMIT_BRANCH",
3207 "CIRCLE_BRANCH",
3208 "TRAVIS_BRANCH",
3209 "BUILD_SOURCEBRANCH",
3210 ] {
3211 std::env::remove_var(v);
3212 }
3213 }
3214
3215 #[test]
3216 fn ci_branch_from_env_strips_refs_heads_prefix() {
3217 let _lock = ci_env_lock();
3218 clear_branch_env_vars();
3219 std::env::set_var("BUILD_SOURCEBRANCH", "refs/heads/my-branch");
3221 let branch = ci_branch_from_env();
3222 clear_branch_env_vars();
3223 assert_eq!(branch.as_deref(), Some("my-branch"));
3224 }
3225
3226 #[test]
3227 fn ci_branch_from_env_strips_origin_prefix() {
3228 let _lock = ci_env_lock();
3229 clear_branch_env_vars();
3230 std::env::set_var("GIT_BRANCH", "origin/develop");
3231 let branch = ci_branch_from_env();
3232 clear_branch_env_vars();
3233 assert_eq!(branch.as_deref(), Some("develop"));
3234 }
3235
3236 #[test]
3237 fn ci_branch_from_env_returns_none_for_head() {
3238 let _lock = ci_env_lock();
3239 clear_branch_env_vars();
3240 std::env::set_var("BRANCH_NAME", "HEAD");
3242 let branch = ci_branch_from_env();
3243 clear_branch_env_vars();
3244 assert!(branch.is_none(), "HEAD should be filtered, got: {branch:?}");
3246 }
3247}