#![allow(clippy::multiple_crate_versions)]
pub mod baseline;
pub mod coverage;
pub mod delta;
pub mod history;
pub use baseline::{check_against_baseline, resolve_baselines_path, BaselineEntry, BaselineStore};
pub use coverage::{aggregate_line_coverage, lookup_coverage, parse_lcov, FileCoverage};
pub use delta::{
compute_delta, compute_multi_delta, FileChangeStatus, FileDelta, MultiFileDelta,
MultiScanComparison, MultiScanPoint, ScanComparison, SummaryDelta,
};
pub use history::{
CleanupPolicy, CleanupPolicyStore, RegistryEntry, ScanRegistry, ScanSummarySnapshot,
WatchedDirsStore,
};
use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
use std::fs;
use std::path::{Path, PathBuf};
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
use std::sync::Arc;
use anyhow::{Context, Result};
use chrono::{DateTime, Utc};
use encoding_rs::{UTF_16BE, UTF_16LE, WINDOWS_1252};
use globset::{Glob, GlobSet, GlobSetBuilder};
use ignore::WalkBuilder;
use serde::{Deserialize, Serialize};
use uuid::Uuid;
use sloc_config::{
AppConfig, BinaryFileBehavior, BlankInBlockCommentPolicy, ContinuationLinePolicy,
FailureBehavior, MixedLinePolicy,
};
use sloc_languages::style::IndentStyle;
use sloc_languages::{
analyze_text, detect_language, supported_languages, AnalysisOptions, Language, ParseMode,
RawLineCounts, StyleAnalysis, StyleLangScope,
};
const MAX_ANALYSIS_THREADS: usize = 16;
const DEFAULT_ANALYSIS_THREADS: usize = 4;
const GENERATED_SAMPLE_BYTES: usize = 1024;
const MINIFIED_SAMPLE_BYTES: usize = 4096;
const MINIFIED_LINE_THRESHOLD: usize = 2000;
const BINARY_SAMPLE_BYTES: usize = 8192;
pub struct ProgressCounters {
pub files_done: Arc<AtomicUsize>,
pub files_total: Arc<AtomicUsize>,
}
enum MetadataPolicyOutcome {
Skip(Box<FileRecord>),
Exclude,
Continue,
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum FileStatus {
AnalyzedExact,
AnalyzedBestEffort,
SkippedBinary,
SkippedDecodeError,
SkippedUnsupported,
SkippedByPolicy,
ErrorInternal,
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, Default, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum CocomoMode {
#[default]
Organic,
SemiDetached,
Embedded,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CocomoEstimate {
pub mode: CocomoMode,
pub ksloc: f64,
pub effort_person_months: f64,
pub duration_months: f64,
pub avg_staff: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct EffectiveCounts {
pub code_lines: u64,
pub comment_lines: u64,
pub blank_lines: u64,
pub mixed_lines_separate: u64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ToolMetadata {
pub name: String,
pub version: String,
pub run_id: String,
pub timestamp_utc: DateTime<Utc>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EnvironmentMetadata {
pub operating_system: String,
pub architecture: String,
pub runtime_mode: String,
pub initiator_username: String,
pub initiator_hostname: String,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub ci_name: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct SummaryTotals {
pub files_considered: u64,
pub files_analyzed: u64,
pub files_skipped: u64,
pub total_physical_lines: u64,
pub code_lines: u64,
pub comment_lines: u64,
pub blank_lines: u64,
pub mixed_lines_separate: u64,
#[serde(default)]
pub functions: u64,
#[serde(default)]
pub classes: u64,
#[serde(default)]
pub variables: u64,
#[serde(default)]
pub imports: u64,
#[serde(default)]
pub test_count: u64,
#[serde(default)]
pub test_assertion_count: u64,
#[serde(default)]
pub test_suite_count: u64,
#[serde(default)]
pub coverage_lines_found: u64,
#[serde(default)]
pub coverage_lines_hit: u64,
#[serde(default)]
pub coverage_functions_found: u64,
#[serde(default)]
pub coverage_functions_hit: u64,
#[serde(default)]
pub coverage_branches_found: u64,
#[serde(default)]
pub coverage_branches_hit: u64,
#[serde(default)]
pub cyclomatic_complexity: u64,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub lsloc: Option<u64>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LanguageSummary {
pub language: Language,
pub files: u64,
pub total_physical_lines: u64,
pub code_lines: u64,
pub comment_lines: u64,
pub blank_lines: u64,
pub mixed_lines_separate: u64,
#[serde(default)]
pub functions: u64,
#[serde(default)]
pub classes: u64,
#[serde(default)]
pub variables: u64,
#[serde(default)]
pub imports: u64,
#[serde(default)]
pub test_count: u64,
#[serde(default)]
pub test_assertion_count: u64,
#[serde(default)]
pub test_suite_count: u64,
#[serde(default)]
pub coverage_lines_found: u64,
#[serde(default)]
pub coverage_lines_hit: u64,
#[serde(default)]
pub coverage_functions_found: u64,
#[serde(default)]
pub coverage_functions_hit: u64,
#[serde(default)]
pub coverage_branches_found: u64,
#[serde(default)]
pub coverage_branches_hit: u64,
#[serde(default)]
pub cyclomatic_complexity: u64,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub lsloc: Option<u64>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FileRecord {
pub path: String,
pub relative_path: String,
pub language: Option<Language>,
pub size_bytes: u64,
pub detected_encoding: Option<String>,
pub raw_line_categories: RawLineCounts,
pub effective_counts: EffectiveCounts,
pub status: FileStatus,
pub warnings: Vec<String>,
pub generated: bool,
pub minified: bool,
pub vendor: bool,
pub parse_mode: Option<ParseMode>,
#[serde(skip_serializing_if = "Option::is_none")]
pub submodule: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub coverage: Option<FileCoverage>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub style_analysis: Option<StyleAnalysis>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub cyclomatic_complexity: Option<u32>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub lsloc: Option<u32>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub commit_count: Option<u32>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub last_commit_date: Option<String>,
#[serde(skip)]
pub content_hash: u64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LanguageStyleGroup {
pub language_family: String,
pub files_count: u32,
pub dominant_guide: String,
pub dominant_score_pct: u8,
pub common_indent_style: String,
pub guide_avg_scores: Vec<(String, u8)>,
pub line80_compliant_pct: u8,
pub line_col_compliant_pct: u8,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct StyleSummary {
pub files_analyzed: u32,
pub common_indent_style: String,
pub line80_compliant_pct: u8,
pub line_col_compliant_pct: u8,
pub col_threshold: u16,
pub by_language: Vec<LanguageStyleGroup>,
}
pub type CppStyleSummary = StyleSummary;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SubmoduleSummary {
pub name: String,
pub relative_path: String,
pub files_analyzed: u64,
pub total_physical_lines: u64,
pub code_lines: u64,
pub comment_lines: u64,
pub blank_lines: u64,
pub language_summaries: Vec<LanguageSummary>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub git_commit_short: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub git_commit_long: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub git_branch: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub git_commit_author: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub git_commit_date: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub git_remote_url: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AnalysisRun {
pub tool: ToolMetadata,
pub environment: EnvironmentMetadata,
pub effective_configuration: AppConfig,
pub input_roots: Vec<String>,
pub summary_totals: SummaryTotals,
pub totals_by_language: Vec<LanguageSummary>,
pub per_file_records: Vec<FileRecord>,
pub skipped_file_records: Vec<FileRecord>,
pub warnings: Vec<String>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub submodule_summaries: Vec<SubmoduleSummary>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub git_commit_short: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub git_commit_long: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub git_branch: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub git_commit_author: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub git_tags: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub git_nearest_tag: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub git_commit_date: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub git_remote_url: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub style_summary: Option<StyleSummary>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub cocomo: Option<CocomoEstimate>,
#[serde(default)]
pub uloc: u64,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub dryness_pct: Option<f32>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub duplicate_groups: Vec<Vec<String>>,
#[serde(default)]
pub duplicates_excluded: usize,
}
#[derive(Default)]
struct GitInfo {
commit_short: Option<String>,
commit_long: Option<String>,
branch: Option<String>,
author: Option<String>,
tags: Option<String>,
nearest_tag: Option<String>,
commit_date: Option<String>,
remote_url: Option<String>,
}
fn find_git_dir(start: &Path) -> Option<PathBuf> {
let mut current = Some(start);
while let Some(dir) = current {
let candidate = dir.join(".git");
if candidate.is_dir() {
return Some(candidate);
}
if candidate.is_file() {
if let Some(resolved) = resolve_git_file_pointer(&candidate, dir) {
return Some(resolved);
}
}
current = dir.parent();
}
None
}
fn resolve_git_file_pointer(file: &Path, base_dir: &Path) -> Option<PathBuf> {
let content = fs::read_to_string(file).ok()?;
let ptr = content.trim().strip_prefix("gitdir: ")?;
let ptr_native = ptr.replace('/', std::path::MAIN_SEPARATOR_STR);
let resolved = if Path::new(&ptr_native).is_absolute() {
PathBuf::from(&ptr_native)
} else {
base_dir.join(&ptr_native)
};
let final_path = resolved.canonicalize().unwrap_or(resolved);
if final_path.is_dir() {
Some(final_path)
} else {
None
}
}
fn resolve_ref(git_dir: &Path, refname: &str) -> Option<String> {
let ref_path = refname
.split('/')
.fold(git_dir.to_path_buf(), |p, c| p.join(c));
if ref_path.exists() {
let sha = fs::read_to_string(&ref_path)
.ok()
.map(|s| s.trim().to_string())
.filter(|s| s.len() >= 40 && s.chars().all(|c| c.is_ascii_hexdigit()));
if sha.is_some() {
return sha;
}
}
let packed = fs::read_to_string(git_dir.join("packed-refs")).ok()?;
for line in packed.lines() {
if line.starts_with('#') || line.starts_with('^') {
continue;
}
let mut cols = line.splitn(2, ' ');
let sha = cols.next()?;
let name = cols.next()?.trim();
if name == refname {
return Some(sha.to_string());
}
}
None
}
fn parse_url_line(line: &str) -> Option<&str> {
let rest = line.strip_prefix("url")?;
let rest = rest.trim_start_matches([' ', '\t']);
let url = rest.strip_prefix('=')?.trim();
if url.is_empty() {
None
} else {
Some(url)
}
}
fn read_git_remote_url(git_dir: &Path) -> Option<String> {
let config = fs::read_to_string(git_dir.join("config")).ok()?;
let mut in_origin = false;
for line in config.lines() {
let trimmed = line.trim();
if trimmed.starts_with('[') {
in_origin = trimmed == r#"[remote "origin"]"#;
} else if in_origin {
if let Some(url) = parse_url_line(trimmed) {
return Some(url.to_owned());
}
}
}
None
}
fn detect_git_for_run(project_path: &Path) -> GitInfo {
let ci_branch = ci_branch_from_env();
let Some(git_dir) = find_git_dir(project_path) else {
return GitInfo {
branch: ci_branch,
..GitInfo::default()
};
};
let head_raw = match fs::read_to_string(git_dir.join("HEAD")) {
Ok(s) => s.trim().to_string(),
Err(_) => {
return GitInfo {
branch: ci_branch,
..GitInfo::default()
}
}
};
let (branch_from_head, commit_long) = head_raw.strip_prefix("ref: ").map_or_else(
|| {
if head_raw.len() >= 40 && head_raw.chars().all(|c| c.is_ascii_hexdigit()) {
(None, Some(head_raw[..40].to_string()))
} else {
(None, None)
}
},
|refname| {
let branch = refname
.strip_prefix("refs/heads/")
.map(|b| b.trim().to_string());
let sha = resolve_ref(&git_dir, refname.trim());
(branch, sha)
},
);
let branch = branch_from_head.or(ci_branch);
let commit_short = commit_long
.as_deref()
.map(|s| s.chars().take(7).collect::<String>());
let author = run_git_cmd(project_path, &["log", "-1", "--format=%an", "HEAD"]);
let commit_date = run_git_cmd(project_path, &["log", "-1", "--format=%aI", "HEAD"]);
let remote_url = read_git_remote_url(&git_dir);
let tags = run_git_cmd(project_path, &["tag", "--points-at", "HEAD"]).map(|t| {
t.lines()
.filter(|l| !l.is_empty())
.collect::<Vec<_>>()
.join(", ")
});
let nearest_tag = run_git_cmd(project_path, &["describe", "--tags", "--abbrev=0", "HEAD"]);
GitInfo {
commit_short,
commit_long,
branch,
author,
tags,
nearest_tag,
commit_date,
remote_url,
}
}
fn run_git_cmd(dir: &Path, args: &[&str]) -> Option<String> {
let candidates: &[&str] = &[
"git",
"/usr/bin/git",
"/usr/local/bin/git",
"/opt/homebrew/bin/git",
r"C:\Program Files\Git\cmd\git.exe",
r"C:\Program Files\Git\bin\git.exe",
r"C:\Program Files (x86)\Git\cmd\git.exe",
];
for &exe in candidates {
let result = std::process::Command::new(exe)
.args(["-c", "safe.directory=*"])
.args(args)
.current_dir(dir)
.output()
.ok()
.filter(|o| o.status.success())
.and_then(|o| String::from_utf8(o.stdout).ok())
.map(|s| s.trim().to_string())
.filter(|s| !s.is_empty());
if result.is_some() {
return result;
}
}
None
}
fn detect_file_activity(
project_path: &Path,
window_days: u32,
) -> HashMap<String, (u32, Option<String>)> {
let since = format!("--since={window_days} days ago");
let out = run_git_cmd(
project_path,
&[
"-c",
"core.quotepath=false",
"log",
since.as_str(),
"--no-merges",
"--name-status",
"--relative",
"--pretty=format:%x00%aI",
],
);
out.map(|s| parse_activity_log(&s)).unwrap_or_default()
}
fn parse_activity_log(out: &str) -> HashMap<String, (u32, Option<String>)> {
let mut map: HashMap<String, (u32, Option<String>)> = HashMap::new();
let mut current_date: Option<String> = None;
for line in out.lines() {
if let Some(date) = line.strip_prefix('\u{0}') {
let d = date.trim();
current_date = (!d.is_empty()).then(|| d.to_owned());
continue;
}
if line.trim().is_empty() {
continue;
}
let mut fields = line.split('\t');
let status = fields.next().unwrap_or("");
let path = if status.starts_with('R') || status.starts_with('C') {
fields.next_back()
} else {
fields.next()
};
let Some(path) = path.map(str::trim).filter(|p| !p.is_empty()) else {
continue;
};
let entry = map.entry(path.to_owned()).or_insert((0, None));
entry.0 += 1;
if entry.1.is_none() {
entry.1.clone_from(¤t_date);
}
}
map
}
fn detect_ci_system() -> Option<&'static str> {
let ev = |k: &str| std::env::var(k).is_ok();
let ev_true = |k: &str| std::env::var(k).as_deref() == Ok("true");
if ev("JENKINS_URL") || ev("JENKINS_HOME") || ev("BUILD_URL") {
return Some("Jenkins");
}
if ev_true("GITHUB_ACTIONS") {
return Some("GitHub Actions");
}
if ev_true("GITLAB_CI") {
return Some("GitLab CI");
}
if ev_true("CIRCLECI") {
return Some("CircleCI");
}
if ev_true("TRAVIS") {
return Some("Travis CI");
}
if ev_true("TF_BUILD") {
return Some("Azure DevOps");
}
if ev("TEAMCITY_VERSION") {
return Some("TeamCity");
}
None
}
fn ci_branch_from_env() -> Option<String> {
const VARS: &[&str] = &[
"BRANCH_NAME", "GIT_BRANCH", "GITHUB_REF_NAME", "CI_COMMIT_BRANCH", "CIRCLE_BRANCH", "TRAVIS_BRANCH", "BUILD_SOURCEBRANCH", ];
for &var in VARS {
if let Ok(val) = std::env::var(var) {
let val = val.trim();
let val = val
.strip_prefix("refs/heads/")
.or_else(|| val.strip_prefix("origin/"))
.unwrap_or(val);
if !val.is_empty() && val != "HEAD" {
return Some(val.to_string());
}
}
}
None
}
fn get_current_username() -> String {
std::env::var("USERNAME")
.or_else(|_| std::env::var("USER"))
.unwrap_or_else(|_| "unknown".to_string())
}
fn non_empty_env(var: &str) -> Option<String> {
let v = std::env::var(var).ok()?;
if v.is_empty() {
None
} else {
Some(v)
}
}
fn is_jenkins_env() -> bool {
std::env::var("JENKINS_URL").is_ok()
|| std::env::var("JENKINS_HOME").is_ok()
|| std::env::var("BUILD_URL").is_ok()
}
fn get_hostname() -> String {
if is_jenkins_env() {
if let Some(n) = non_empty_env("NODE_NAME") {
return n;
}
}
if std::env::var("GITHUB_ACTIONS").as_deref() == Ok("true") {
if let Some(r) = non_empty_env("RUNNER_NAME") {
return r;
}
}
if std::env::var("GITLAB_CI").as_deref() == Ok("true") {
if let Some(r) = non_empty_env("CI_RUNNER_DESCRIPTION") {
return r;
}
}
std::env::var("COMPUTERNAME")
.or_else(|_| std::env::var("HOSTNAME"))
.or_else(|_| std::fs::read_to_string("/etc/hostname").map(|s| s.trim().to_string()))
.unwrap_or_else(|_| "unknown".to_string())
}
#[allow(clippy::too_many_arguments)]
fn walk_root(
root: &Path,
config: &AppConfig,
include_globs: Option<&GlobSet>,
exclude_globs: Option<&GlobSet>,
enabled_languages: Option<&BTreeSet<Language>>,
seen_paths: &mut HashSet<PathBuf>,
analyzed: &mut Vec<FileRecord>,
skipped: &mut Vec<FileRecord>,
warnings: &mut Vec<String>,
cancel: Option<&AtomicBool>,
progress: Option<&ProgressCounters>,
) -> Result<()> {
let mut builder = WalkBuilder::new(root);
builder
.follow_links(config.discovery.follow_symlinks)
.hidden(config.discovery.ignore_hidden_files)
.ignore(config.discovery.honor_ignore_files)
.parents(config.discovery.honor_ignore_files)
.git_ignore(config.discovery.honor_ignore_files)
.git_global(config.discovery.honor_ignore_files)
.git_exclude(config.discovery.honor_ignore_files);
let paths = collect_walk_paths(&builder, seen_paths, warnings);
if paths.is_empty() {
return Ok(());
}
if let Some(p) = progress {
p.files_total.fetch_add(paths.len(), Ordering::Relaxed);
}
let chunk_results = run_parallel_analysis(
&paths,
root,
config,
include_globs,
exclude_globs,
enabled_languages,
cancel,
progress,
)?;
merge_chunk_results(chunk_results, analyzed, skipped, warnings)
}
fn collect_walk_paths(
builder: &WalkBuilder,
seen_paths: &mut HashSet<PathBuf>,
warnings: &mut Vec<String>,
) -> Vec<PathBuf> {
let (tx, rx) = std::sync::mpsc::channel::<std::result::Result<PathBuf, String>>();
builder.build_parallel().run(|| {
let tx = tx.clone();
Box::new(move |entry| {
match entry {
Err(e) => {
let _ = tx.send(Err(format!("discovery warning: {e}")));
}
Ok(e) => {
let path = e.into_path();
if !path.is_dir() {
let _ = tx.send(Ok(path));
}
}
}
ignore::WalkState::Continue
})
});
drop(tx);
rx.into_iter()
.filter_map(|msg| match msg {
Ok(path) => {
if seen_paths.insert(path.clone()) {
Some(path)
} else {
None
}
}
Err(warn) => {
warnings.push(warn);
None
}
})
.collect()
}
#[allow(clippy::too_many_arguments)]
fn worker_loop(
paths: &[PathBuf],
root: &Path,
config: &AppConfig,
include_globs: Option<&GlobSet>,
exclude_globs: Option<&GlobSet>,
enabled_languages: Option<&BTreeSet<Language>>,
cancel: Option<&AtomicBool>,
next_index: &AtomicUsize,
files_done: Option<&AtomicUsize>,
) -> Vec<Result<Option<FileRecord>>> {
let mut results = Vec::new();
loop {
if cancel.is_some_and(|c| c.load(Ordering::Relaxed)) {
results.push(Err(anyhow::anyhow!("analysis cancelled")));
break;
}
let i = next_index.fetch_add(1, Ordering::Relaxed);
if i >= paths.len() {
break;
}
results.push(analyze_candidate_file(
&paths[i],
root,
config,
include_globs,
exclude_globs,
enabled_languages,
));
if let Some(fd) = files_done {
fd.fetch_add(1, Ordering::Relaxed);
}
}
results
}
#[allow(clippy::too_many_arguments)]
fn run_parallel_analysis(
paths: &[PathBuf],
root: &Path,
config: &AppConfig,
include_globs: Option<&GlobSet>,
exclude_globs: Option<&GlobSet>,
enabled_languages: Option<&BTreeSet<Language>>,
cancel: Option<&AtomicBool>,
progress: Option<&ProgressCounters>,
) -> Result<Vec<Vec<Result<Option<FileRecord>>>>> {
let thread_count = std::thread::available_parallelism().map_or(DEFAULT_ANALYSIS_THREADS, |n| {
n.get().min(MAX_ANALYSIS_THREADS)
});
let next_index = AtomicUsize::new(0);
let files_done: Option<&AtomicUsize> = progress.map(|p| p.files_done.as_ref());
std::thread::scope(|s| -> Result<Vec<Vec<Result<Option<FileRecord>>>>> {
let mut handles = Vec::with_capacity(thread_count);
for _ in 0..thread_count {
handles.push(s.spawn(|| {
worker_loop(
paths,
root,
config,
include_globs,
exclude_globs,
enabled_languages,
cancel,
&next_index,
files_done,
)
}));
}
handles
.into_iter()
.map(|h| {
h.join()
.map_err(|_| anyhow::anyhow!("analysis thread panicked"))
})
.collect()
})
}
fn merge_chunk_results(
chunk_results: Vec<Vec<Result<Option<FileRecord>>>>,
analyzed: &mut Vec<FileRecord>,
skipped: &mut Vec<FileRecord>,
warnings: &mut Vec<String>,
) -> Result<()> {
for chunk in chunk_results {
for result in chunk {
if let Some(record) = result? {
push_record(record, analyzed, skipped, warnings);
}
}
}
Ok(())
}
fn process_submodules(config: &AppConfig, analyzed: &mut [FileRecord]) -> Vec<SubmoduleSummary> {
let root = config.discovery.root_paths[0]
.canonicalize()
.unwrap_or_else(|_| config.discovery.root_paths[0].clone());
let submodules = detect_submodules(&root);
if submodules.is_empty() {
return Vec::new();
}
for file in analyzed.iter_mut() {
for (name, sub_path) in &submodules {
let prefix = sub_path.to_string_lossy().replace('\\', "/");
let rel = &file.relative_path;
if rel == &prefix || rel.starts_with(&format!("{prefix}/")) {
file.submodule = Some(name.clone());
break;
}
}
}
build_submodule_summaries(analyzed, &submodules, &root)
}
#[allow(clippy::cast_precision_loss)] fn compute_cocomo(code_lines: u64, mode: CocomoMode) -> CocomoEstimate {
let ksloc = code_lines as f64 / 1_000.0;
let (a, b, c, d): (f64, f64, f64, f64) = match mode {
CocomoMode::Organic => (2.4, 1.05, 2.5, 0.38),
CocomoMode::SemiDetached => (3.0, 1.12, 2.5, 0.35),
CocomoMode::Embedded => (3.6, 1.20, 2.5, 0.32),
};
let effort = a * ksloc.powf(b);
let duration = c * effort.powf(d);
let avg_staff = if duration > 0.0 {
effort / duration
} else {
0.0
};
CocomoEstimate {
mode,
ksloc: (ksloc * 100.0).round() / 100.0,
effort_person_months: (effort * 100.0).round() / 100.0,
duration_months: (duration * 100.0).round() / 100.0,
avg_staff: (avg_staff * 100.0).round() / 100.0,
}
}
#[allow(clippy::cast_precision_loss)] fn compute_uloc(analyzed: &[FileRecord]) -> (u64, Option<f32>) {
use std::collections::HashSet as StdHashSet;
let mut unique: StdHashSet<u64> = StdHashSet::new();
let mut total_code: u64 = 0;
for record in analyzed {
total_code += record.effective_counts.code_lines;
for &hash in &record.raw_line_categories.code_line_hashes {
unique.insert(hash);
}
}
let uloc = unique.len() as u64;
let dryness = if total_code > 0 {
Some((uloc as f32 / total_code as f32) * 100.0)
} else {
None
};
(uloc, dryness)
}
fn find_duplicate_groups(analyzed: &[FileRecord]) -> Vec<Vec<String>> {
let mut by_hash: std::collections::HashMap<u64, Vec<&str>> = std::collections::HashMap::new();
for record in analyzed {
if record.content_hash != 0 {
by_hash
.entry(record.content_hash)
.or_default()
.push(&record.relative_path);
}
}
let mut groups: Vec<Vec<String>> = by_hash
.into_values()
.filter(|v| v.len() >= 2)
.map(|v| {
let mut paths: Vec<String> = v.into_iter().map(str::to_owned).collect();
paths.sort();
paths
})
.collect();
groups.sort_by(|a, b| a[0].cmp(&b[0]));
groups
}
fn assemble_run(
config: &AppConfig,
runtime_mode: &str,
mut analyzed: Vec<FileRecord>,
skipped: Vec<FileRecord>,
warnings: Vec<String>,
submodule_summaries: Vec<SubmoduleSummary>,
) -> AnalysisRun {
let summary = build_summary(&analyzed, &skipped);
let language_summaries = build_language_summaries(&analyzed);
let col_threshold = config.analysis.style_col_threshold;
let style_summary = build_style_summary(&analyzed, col_threshold);
let (uloc, dryness_pct) = compute_uloc(&analyzed);
let duplicate_groups = find_duplicate_groups(&analyzed);
let cocomo = if summary.code_lines > 0 {
Some(compute_cocomo(summary.code_lines, CocomoMode::Organic))
} else {
None
};
let first_root = config
.discovery
.root_paths
.first()
.map(|p| p.canonicalize().unwrap_or_else(|_| p.clone()));
let git = first_root
.as_deref()
.map(detect_git_for_run)
.unwrap_or_default();
let activity_window = config.analysis.activity_window_days.unwrap_or(0);
if let (true, Some(root)) = (activity_window > 0, first_root.as_deref()) {
let activity = detect_file_activity(root, activity_window);
if !activity.is_empty() {
for rec in &mut analyzed {
if let Some((count, date)) = activity.get(&rec.relative_path) {
rec.commit_count = Some(*count);
rec.last_commit_date.clone_from(date);
}
}
}
}
let now = Utc::now();
let run_id = {
let uuid_suffix = Uuid::new_v4().simple().to_string();
format!("{}-{}", now.format("%Y%m%d-%H%M"), uuid_suffix)
};
AnalysisRun {
tool: ToolMetadata {
name: "sloc".into(),
version: env!("CARGO_PKG_VERSION").into(),
run_id,
timestamp_utc: now,
},
environment: EnvironmentMetadata {
operating_system: std::env::consts::OS.into(),
architecture: std::env::consts::ARCH.into(),
runtime_mode: runtime_mode.into(),
initiator_username: get_current_username(),
initiator_hostname: get_hostname(),
ci_name: if is_jenkins_env() {
Some(format!("Jenkins\t{}", get_hostname()))
} else {
detect_ci_system().map(str::to_string)
},
},
effective_configuration: config.clone(),
input_roots: config
.discovery
.root_paths
.iter()
.map(|p| path_to_string(p))
.collect(),
summary_totals: summary,
totals_by_language: language_summaries,
per_file_records: analyzed,
skipped_file_records: skipped,
warnings,
submodule_summaries,
git_commit_short: git.commit_short,
git_commit_long: git.commit_long,
git_branch: git.branch,
git_commit_author: git.author,
git_tags: git.tags,
git_nearest_tag: git.nearest_tag,
git_commit_date: git.commit_date,
git_remote_url: git.remote_url,
style_summary,
cocomo,
uloc,
dryness_pct,
duplicate_groups,
duplicates_excluded: 0,
}
}
#[allow(clippy::too_many_lines)]
pub fn analyze(
config: &AppConfig,
runtime_mode: &str,
cancel: Option<&AtomicBool>,
progress: Option<&ProgressCounters>,
) -> Result<AnalysisRun> {
config.validate()?;
if config.discovery.root_paths.is_empty() {
anyhow::bail!("no input paths were provided");
}
let include_globs = compile_globset(&config.discovery.include_globs)?;
let exclude_globs = compile_globset(&config.discovery.exclude_globs)?;
let enabled_languages = parse_enabled_languages(&config.analysis.enabled_languages)?;
let mut analyzed = Vec::new();
let mut skipped = Vec::new();
let mut warnings = Vec::new();
let mut seen_paths = HashSet::new();
for root in &config.discovery.root_paths {
if cancel.is_some_and(|c| c.load(Ordering::Relaxed)) {
anyhow::bail!("analysis cancelled");
}
let root = root.canonicalize().unwrap_or_else(|_| root.clone());
if root.is_file() {
if let Some(record) = analyze_candidate_file(
&root,
root.parent().unwrap_or_else(|| Path::new(".")),
config,
include_globs.as_ref(),
exclude_globs.as_ref(),
enabled_languages.as_ref(),
)? {
push_record(record, &mut analyzed, &mut skipped, &mut warnings);
}
continue;
}
walk_root(
&root,
config,
include_globs.as_ref(),
exclude_globs.as_ref(),
enabled_languages.as_ref(),
&mut seen_paths,
&mut analyzed,
&mut skipped,
&mut warnings,
cancel,
progress,
)?;
}
analyzed.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
skipped.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
let submodule_summaries = if config.discovery.submodule_breakdown {
process_submodules(config, &mut analyzed)
} else {
Vec::new()
};
attach_coverage(config, &mut analyzed, &mut warnings);
Ok(assemble_run(
config,
runtime_mode,
analyzed,
skipped,
warnings,
submodule_summaries,
))
}
fn attach_coverage(config: &AppConfig, analyzed: &mut [FileRecord], warnings: &mut Vec<String>) {
let Some(cov_path) = coverage::resolve_coverage_file(config.analysis.coverage_file.as_deref())
else {
return;
};
tracing::debug!(path = %cov_path.display(), "loading coverage file");
match fs::read_to_string(&cov_path) {
Ok(content) => {
let cov_map = coverage::parse_coverage_auto(&cov_path, &content);
let mut matched: u32 = 0;
let mut unmatched: u32 = 0;
for record in analyzed.iter_mut() {
record.coverage =
coverage::lookup_coverage(&cov_map, &record.relative_path).cloned();
if record.coverage.is_some() {
matched += 1;
} else {
unmatched += 1;
}
}
tracing::debug!(
path = %cov_path.display(),
coverage_entries = cov_map.len(),
files_matched = matched,
files_unmatched = unmatched,
"coverage attached"
);
if unmatched > 0 && matched == 0 {
tracing::warn!(
path = %cov_path.display(),
"coverage file loaded but no source files could be matched — check that paths in the coverage report match the scanned directory"
);
}
}
Err(e) => {
tracing::warn!(path = %cov_path.display(), error = %e, "coverage file could not be read");
warnings.push(format!(
"coverage file '{}' could not be read: {e}",
cov_path.display()
));
}
}
}
fn push_record(
record: FileRecord,
analyzed: &mut Vec<FileRecord>,
skipped: &mut Vec<FileRecord>,
warnings: &mut Vec<String>,
) {
warnings.extend(
record
.warnings
.iter()
.map(|warning| format!("{}: {warning}", record.relative_path)),
);
match record.status {
FileStatus::AnalyzedExact | FileStatus::AnalyzedBestEffort => analyzed.push(record),
_ => skipped.push(record),
}
}
#[inline]
fn skip_with_reason(
path: &Path,
root: &Path,
size: u64,
reason: impl Into<String>,
) -> MetadataPolicyOutcome {
MetadataPolicyOutcome::Skip(Box::new(skipped_record(
path,
root,
size,
FileStatus::SkippedByPolicy,
vec![reason.into()],
)))
}
#[allow(clippy::too_many_arguments)]
fn check_metadata_policy(
path: &Path,
root: &Path,
relative_path: &str,
metadata: &fs::Metadata,
config: &AppConfig,
include_globs: Option<&GlobSet>,
exclude_globs: Option<&GlobSet>,
) -> MetadataPolicyOutcome {
let size = metadata.len();
if metadata.file_type().is_symlink() && !config.discovery.follow_symlinks {
return skip_with_reason(path, root, size, "symlink skipped by policy");
}
if file_name_eq(path, ".gitignore") {
return skip_with_reason(path, root, size, ".gitignore is always excluded");
}
if is_excluded_dir_path(path, &config.discovery.excluded_directories) {
return skip_with_reason(path, root, size, "path matched excluded directory setting");
}
if size > config.discovery.max_file_size_bytes {
return skip_with_reason(
path,
root,
size,
format!(
"file exceeded max_file_size_bytes ({})",
config.discovery.max_file_size_bytes
),
);
}
if let Some(globs) = include_globs {
if !globs.is_match(Path::new(relative_path)) && !globs.is_match(path) {
return MetadataPolicyOutcome::Exclude;
}
}
if let Some(globs) = exclude_globs {
if globs.is_match(Path::new(relative_path)) || globs.is_match(path) {
return skip_with_reason(path, root, size, "path matched exclude glob");
}
}
if is_known_lockfile(path) && !config.analysis.include_lockfiles {
return skip_with_reason(path, root, size, "lockfile skipped by default policy");
}
MetadataPolicyOutcome::Continue
}
struct ContentPolicyResult {
vendor: bool,
generated: bool,
minified: bool,
skip_record: Option<FileRecord>,
}
fn check_content_policy(
path: &Path,
root: &Path,
size_bytes: u64,
bytes: &[u8],
config: &AppConfig,
) -> ContentPolicyResult {
let vendor = is_vendor_path(path);
if vendor && config.analysis.vendor_directory_detection {
return ContentPolicyResult {
vendor,
generated: false,
minified: false,
skip_record: Some(skipped_record(
path,
root,
size_bytes,
FileStatus::SkippedByPolicy,
vec!["vendor file skipped by policy".into()],
)),
};
}
let generated = config.analysis.generated_file_detection && looks_generated(path, bytes);
if generated {
return ContentPolicyResult {
vendor,
generated,
minified: false,
skip_record: Some(skipped_record(
path,
root,
size_bytes,
FileStatus::SkippedByPolicy,
vec!["generated file skipped by policy".into()],
)),
};
}
let minified = config.analysis.minified_file_detection && looks_minified(path, bytes);
if minified {
return ContentPolicyResult {
vendor,
generated,
minified,
skip_record: Some(skipped_record(
path,
root,
size_bytes,
FileStatus::SkippedByPolicy,
vec!["minified file skipped by policy".into()],
)),
};
}
ContentPolicyResult {
vendor,
generated,
minified,
skip_record: None,
}
}
fn decode_file_contents(
path: &Path,
root: &Path,
size_bytes: u64,
bytes: &[u8],
config: &AppConfig,
) -> Result<Option<(String, String, Vec<String>)>> {
if is_binary(bytes) {
return match config.analysis.binary_file_behavior {
BinaryFileBehavior::Skip => Ok(None),
BinaryFileBehavior::Fail => {
anyhow::bail!("binary file encountered: {}", path.display())
}
};
}
match decode_bytes(bytes) {
Ok(result) => Ok(Some(result)),
Err(err) => match config.analysis.decode_failure_behavior {
FailureBehavior::WarnSkip => {
let _ = (path, root, size_bytes); Err(anyhow::anyhow!("__decode_warn__: {err}"))
}
FailureBehavior::Fail => {
anyhow::bail!("decode failure for {}: {err}", path.display())
}
},
}
}
#[allow(clippy::too_many_lines)]
fn analyze_candidate_file(
path: &Path,
root: &Path,
config: &AppConfig,
include_globs: Option<&GlobSet>,
exclude_globs: Option<&GlobSet>,
enabled_languages: Option<&BTreeSet<Language>>,
) -> Result<Option<FileRecord>> {
let metadata = match fs::symlink_metadata(path) {
Ok(metadata) => metadata,
Err(err) => {
return Ok(Some(skipped_record(
path,
root,
0,
FileStatus::ErrorInternal,
vec![format!("failed to read metadata: {err}")],
)));
}
};
let relative_path = relative_path_string(path, root);
match check_metadata_policy(
path,
root,
&relative_path,
&metadata,
config,
include_globs,
exclude_globs,
) {
MetadataPolicyOutcome::Skip(record) => return Ok(Some(*record)),
MetadataPolicyOutcome::Exclude => return Ok(None),
MetadataPolicyOutcome::Continue => {}
}
let bytes = match fs::read(path) {
Ok(bytes) => bytes,
Err(err) => {
return Ok(Some(skipped_record(
path,
root,
metadata.len(),
FileStatus::ErrorInternal,
vec![format!("failed to read file: {err}")],
)));
}
};
let content_policy = check_content_policy(path, root, metadata.len(), &bytes, config);
if let Some(record) = content_policy.skip_record {
return Ok(Some(record));
}
let (vendor, generated, minified) = (
content_policy.vendor,
content_policy.generated,
content_policy.minified,
);
let (text, encoding, decode_warnings) =
match decode_file_contents(path, root, metadata.len(), &bytes, config) {
Ok(Some(result)) => result,
Ok(None) => {
return Ok(Some(skipped_record(
path,
root,
metadata.len(),
FileStatus::SkippedBinary,
vec!["binary file skipped by default".into()],
)));
}
Err(err) => {
let msg = err.to_string();
if let Some(warn_msg) = msg.strip_prefix("__decode_warn__: ") {
return Ok(Some(skipped_record(
path,
root,
metadata.len(),
FileStatus::SkippedDecodeError,
vec![warn_msg.to_string()],
)));
}
return Err(err);
}
};
let first_line = text.lines().next();
let language = detect_language(
path,
first_line,
&config.analysis.extension_overrides,
config.analysis.shebang_detection,
);
let Some(language) = language else {
return Ok(Some(skipped_record(
path,
root,
metadata.len(),
FileStatus::SkippedUnsupported,
vec!["unsupported or undetected language".into()],
)));
};
if let Some(enabled) = enabled_languages {
if !enabled.contains(&language) {
return Ok(Some(skipped_record(
path,
root,
metadata.len(),
FileStatus::SkippedByPolicy,
vec![format!(
"language {} disabled by configuration",
language.display_name()
)],
)));
}
}
let style_scope = match config.analysis.style_lang_scope.as_str() {
"c_family" => StyleLangScope::CFamilyOnly,
_ => StyleLangScope::All,
};
let ieee_opts = AnalysisOptions {
blank_in_block_comment_as_comment: config.analysis.blank_in_block_comment_policy
== BlankInBlockCommentPolicy::CountAsComment,
collapse_continuation_lines: config.analysis.continuation_line_policy
== ContinuationLinePolicy::CollapseToLogical,
enable_style: config.analysis.style_analysis_enabled,
style_lang_scope: style_scope,
};
let analysis = analyze_text(language, &text, ieee_opts);
let effective_counts = compute_effective_counts(
&analysis.raw,
config.analysis.mixed_line_policy,
config.analysis.python_docstrings_as_comments,
config.analysis.count_compiler_directives,
);
let mut warnings = decode_warnings;
warnings.extend(analysis.warnings.clone());
let content_hash = {
use std::hash::{DefaultHasher, Hash, Hasher};
let mut h = DefaultHasher::new();
bytes.hash(&mut h);
h.finish()
};
let cyclomatic_complexity = if analysis.raw.cyclomatic_complexity > 0 {
Some(analysis.raw.cyclomatic_complexity)
} else {
None
};
let lsloc = analysis.raw.lsloc;
Ok(Some(FileRecord {
path: path_to_string(path),
relative_path,
language: Some(language),
size_bytes: metadata.len(),
detected_encoding: Some(encoding),
raw_line_categories: analysis.raw,
effective_counts,
status: match analysis.parse_mode {
ParseMode::Lexical | ParseMode::TreeSitter => FileStatus::AnalyzedExact,
ParseMode::LexicalBestEffort => FileStatus::AnalyzedBestEffort,
},
warnings,
generated,
minified,
vendor,
parse_mode: Some(analysis.parse_mode),
submodule: None,
coverage: None,
style_analysis: analysis.style_analysis,
cyclomatic_complexity,
lsloc,
commit_count: None,
last_commit_date: None,
content_hash,
}))
}
const fn compute_effective_counts(
raw: &RawLineCounts,
mixed_line_policy: MixedLinePolicy,
python_docstrings_as_comments: bool,
count_compiler_directives: bool,
) -> EffectiveCounts {
let mut effective = EffectiveCounts {
code_lines: raw.code_only_lines,
comment_lines: raw.single_comment_only_lines + raw.multi_comment_only_lines,
blank_lines: raw.blank_only_lines,
mixed_lines_separate: 0,
};
if python_docstrings_as_comments {
effective.comment_lines += raw.docstring_comment_lines;
} else {
effective.code_lines += raw.docstring_comment_lines;
}
let mixed_total = raw.mixed_code_single_comment_lines + raw.mixed_code_multi_comment_lines;
match mixed_line_policy {
MixedLinePolicy::CodeOnly => effective.code_lines += mixed_total,
MixedLinePolicy::CodeAndComment => {
effective.code_lines += mixed_total;
effective.comment_lines += mixed_total;
}
MixedLinePolicy::CommentOnly => effective.comment_lines += mixed_total,
MixedLinePolicy::SeparateMixedCategory => effective.mixed_lines_separate += mixed_total,
}
if !count_compiler_directives {
effective.code_lines = effective
.code_lines
.saturating_sub(raw.compiler_directive_lines);
}
effective
}
fn build_summary(analyzed: &[FileRecord], skipped: &[FileRecord]) -> SummaryTotals {
let mut summary = SummaryTotals {
files_considered: (analyzed.len() + skipped.len()) as u64,
files_analyzed: analyzed.len() as u64,
files_skipped: skipped.len() as u64,
..Default::default()
};
for record in analyzed {
summary.total_physical_lines += record.raw_line_categories.total_physical_lines;
summary.code_lines += record.effective_counts.code_lines;
summary.comment_lines += record.effective_counts.comment_lines;
summary.blank_lines += record.effective_counts.blank_lines;
summary.mixed_lines_separate += record.effective_counts.mixed_lines_separate;
summary.functions += record.raw_line_categories.functions;
summary.classes += record.raw_line_categories.classes;
summary.variables += record.raw_line_categories.variables;
summary.imports += record.raw_line_categories.imports;
summary.test_count += record.raw_line_categories.test_count;
summary.test_assertion_count += record.raw_line_categories.test_assertion_count;
summary.test_suite_count += record.raw_line_categories.test_suite_count;
summary.cyclomatic_complexity +=
u64::from(record.raw_line_categories.cyclomatic_complexity);
if let Some(lsloc) = record.raw_line_categories.lsloc {
*summary.lsloc.get_or_insert(0) += u64::from(lsloc);
}
if let Some(cov) = &record.coverage {
summary.coverage_lines_found += u64::from(cov.lines_found);
summary.coverage_lines_hit += u64::from(cov.lines_hit);
summary.coverage_functions_found += u64::from(cov.functions_found);
summary.coverage_functions_hit += u64::from(cov.functions_hit);
summary.coverage_branches_found += u64::from(cov.branches_found);
summary.coverage_branches_hit += u64::from(cov.branches_hit);
}
}
summary
}
const fn zeroed_summary(language: Language) -> LanguageSummary {
LanguageSummary {
language,
files: 0,
total_physical_lines: 0,
code_lines: 0,
comment_lines: 0,
blank_lines: 0,
mixed_lines_separate: 0,
functions: 0,
classes: 0,
variables: 0,
imports: 0,
test_count: 0,
test_assertion_count: 0,
test_suite_count: 0,
coverage_lines_found: 0,
coverage_lines_hit: 0,
coverage_functions_found: 0,
coverage_functions_hit: 0,
coverage_branches_found: 0,
coverage_branches_hit: 0,
cyclomatic_complexity: 0,
lsloc: None,
}
}
fn accumulate_record_into_summary(entry: &mut LanguageSummary, record: &FileRecord) {
entry.files += 1;
let r = &record.raw_line_categories;
entry.total_physical_lines += r.total_physical_lines;
entry.code_lines += record.effective_counts.code_lines;
entry.comment_lines += record.effective_counts.comment_lines;
entry.blank_lines += record.effective_counts.blank_lines;
entry.mixed_lines_separate += record.effective_counts.mixed_lines_separate;
entry.functions += r.functions;
entry.classes += r.classes;
entry.variables += r.variables;
entry.imports += r.imports;
entry.test_count += r.test_count;
entry.test_assertion_count += r.test_assertion_count;
entry.test_suite_count += r.test_suite_count;
entry.cyclomatic_complexity += u64::from(r.cyclomatic_complexity);
if let Some(lsloc) = r.lsloc {
*entry.lsloc.get_or_insert(0) += u64::from(lsloc);
}
if let Some(cov) = &record.coverage {
entry.coverage_lines_found += u64::from(cov.lines_found);
entry.coverage_lines_hit += u64::from(cov.lines_hit);
entry.coverage_functions_found += u64::from(cov.functions_found);
entry.coverage_functions_hit += u64::from(cov.functions_hit);
entry.coverage_branches_found += u64::from(cov.branches_found);
entry.coverage_branches_hit += u64::from(cov.branches_hit);
}
}
fn build_language_summaries(analyzed: &[FileRecord]) -> Vec<LanguageSummary> {
let mut by_language: BTreeMap<Language, LanguageSummary> = BTreeMap::new();
for record in analyzed {
let Some(language) = record.language else {
continue;
};
let entry = by_language
.entry(language)
.or_insert_with(|| zeroed_summary(language));
accumulate_record_into_summary(entry, record);
}
by_language.into_values().collect()
}
fn skipped_record(
path: &Path,
root: &Path,
size_bytes: u64,
status: FileStatus,
warnings: Vec<String>,
) -> FileRecord {
FileRecord {
path: path_to_string(path),
relative_path: relative_path_string(path, root),
language: None,
size_bytes,
detected_encoding: None,
raw_line_categories: RawLineCounts::default(),
effective_counts: EffectiveCounts::default(),
status,
warnings,
generated: false,
minified: false,
vendor: false,
parse_mode: None,
submodule: None,
coverage: None,
style_analysis: None,
cyclomatic_complexity: None,
lsloc: None,
commit_count: None,
last_commit_date: None,
content_hash: 0,
}
}
fn relative_path_string(path: &Path, root: &Path) -> String {
path.strip_prefix(root)
.unwrap_or(path)
.to_string_lossy()
.replace('\\', "/")
}
fn path_to_string(path: &Path) -> String {
path.to_string_lossy().replace('\\', "/")
}
#[must_use]
pub fn detect_submodules(root: &Path) -> Vec<(String, PathBuf)> {
let gitmodules = root.join(".gitmodules");
if !gitmodules.is_file() {
return Vec::new();
}
let Ok(content) = fs::read_to_string(&gitmodules) else {
return Vec::new();
};
let mut result = Vec::new();
let mut current_name: Option<String> = None;
let mut current_path: Option<PathBuf> = None;
for line in content.lines() {
let trimmed = line.trim();
if trimmed.starts_with("[submodule \"") && trimmed.ends_with("\"]") {
if let (Some(name), Some(path)) = (current_name.take(), current_path.take()) {
result.push((name, path));
}
let name = trimmed["[submodule \"".len()..trimmed.len() - 2].to_string();
current_name = Some(name);
} else if let Some(rest) = trimmed.strip_prefix("path") {
if let Some(eq_pos) = rest.find('=') {
let path_str = rest[eq_pos + 1..].trim();
current_path = Some(PathBuf::from(path_str));
}
}
}
if let (Some(name), Some(path)) = (current_name, current_path) {
result.push((name, path));
}
result
}
fn build_submodule_summaries(
analyzed: &[FileRecord],
submodules: &[(String, PathBuf)],
root: &Path,
) -> Vec<SubmoduleSummary> {
submodules
.iter()
.map(|(name, path)| {
let files: Vec<&FileRecord> = analyzed
.iter()
.filter(|f| f.submodule.as_deref() == Some(name.as_str()))
.collect();
let files_analyzed = files.len() as u64;
let total_physical_lines = files
.iter()
.map(|f| f.raw_line_categories.total_physical_lines)
.sum();
let code_lines = files.iter().map(|f| f.effective_counts.code_lines).sum();
let comment_lines = files.iter().map(|f| f.effective_counts.comment_lines).sum();
let blank_lines = files.iter().map(|f| f.effective_counts.blank_lines).sum();
let language_summaries = build_language_summaries_from_slice(&files);
let git = detect_git_for_run(&root.join(path));
SubmoduleSummary {
name: name.clone(),
relative_path: path.to_string_lossy().replace('\\', "/"),
files_analyzed,
total_physical_lines,
code_lines,
comment_lines,
blank_lines,
language_summaries,
git_commit_short: git.commit_short,
git_commit_long: git.commit_long,
git_branch: git.branch,
git_commit_author: git.author,
git_commit_date: git.commit_date,
git_remote_url: git.remote_url,
}
})
.filter(|s| s.files_analyzed > 0)
.collect()
}
#[allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
fn dominant_indent_label(files: &[&StyleAnalysis]) -> String {
let mut votes = [0u32; 6];
for f in files {
let idx = match f.indent_style {
IndentStyle::Tabs => 0,
IndentStyle::Spaces2 => 1,
IndentStyle::Spaces4 => 2,
IndentStyle::Spaces8 => 3,
IndentStyle::Mixed => 4,
IndentStyle::Unknown => 5,
};
votes[idx] += 1;
}
let labels = ["Tabs", "2-Space", "4-Space", "8-Space", "Mixed", "\u{2014}"];
labels[votes
.iter()
.enumerate()
.max_by_key(|(_, v)| *v)
.map_or(5, |(i, _)| i)]
.to_string()
}
#[allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
fn line80_pct(files: &[&StyleAnalysis]) -> u8 {
if files.is_empty() {
return 0;
}
let compliant = files
.iter()
.filter(|f| f.total_lines == 0 || (f.lines_over_80 as f32 / f.total_lines as f32) <= 0.05)
.count() as u32;
((compliant * 100) / files.len() as u32) as u8
}
#[allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
fn line_col_pct(files: &[&StyleAnalysis], threshold: u16) -> u8 {
if files.is_empty() {
return 0;
}
let compliant = files
.iter()
.filter(|f| {
let over = if threshold <= 80 {
f.lines_over_80
} else if threshold <= 100 {
f.lines_over_100
} else {
f.lines_over_120
};
f.total_lines == 0 || (over as f32 / f.total_lines as f32) <= 0.05
})
.count() as u32;
((compliant * 100) / files.len() as u32) as u8
}
#[allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
fn build_language_group(
family: &str,
files: &[&StyleAnalysis],
col_threshold: u16,
) -> LanguageStyleGroup {
let count = files.len() as u32;
let mut all_names: Vec<String> = Vec::new();
for f in files {
for g in &f.guide_scores {
if !all_names.contains(&g.name) {
all_names.push(g.name.clone());
}
}
}
let mut guide_avg_scores: Vec<(String, u8)> = all_names
.into_iter()
.map(|name| {
let sum: u32 = files
.iter()
.filter_map(|f| f.guide_scores.iter().find(|g| g.name == name))
.map(|g| u32::from(g.score_pct))
.sum();
let avg = (sum / count) as u8;
(name, avg)
})
.collect();
guide_avg_scores.sort_by_key(|s| std::cmp::Reverse(s.1));
let (dominant_guide, dominant_score_pct) = guide_avg_scores
.first()
.map(|(n, s)| (n.clone(), *s))
.unwrap_or_default();
let lcp = line_col_pct(files, col_threshold);
LanguageStyleGroup {
language_family: family.to_string(),
files_count: count,
dominant_guide,
dominant_score_pct,
common_indent_style: dominant_indent_label(files),
guide_avg_scores,
line80_compliant_pct: line80_pct(files),
line_col_compliant_pct: lcp,
}
}
#[allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
fn build_style_summary(analyzed: &[FileRecord], col_threshold: u16) -> Option<StyleSummary> {
let all_style: Vec<&StyleAnalysis> = analyzed
.iter()
.filter_map(|f| f.style_analysis.as_ref())
.collect();
if all_style.is_empty() {
return None;
}
let mut families: std::collections::BTreeMap<&str, Vec<&StyleAnalysis>> =
std::collections::BTreeMap::new();
for sa in &all_style {
families
.entry(sa.language_family.as_str())
.or_default()
.push(sa);
}
let mut by_language: Vec<LanguageStyleGroup> = families
.iter()
.map(|(family, files)| build_language_group(family, files, col_threshold))
.collect();
by_language.sort_by_key(|g| std::cmp::Reverse(g.files_count));
let files_analyzed = all_style.len() as u32;
let common_indent_style = dominant_indent_label(&all_style);
let line80_compliant_pct = line80_pct(&all_style);
let line_col_compliant_pct = line_col_pct(&all_style, col_threshold);
Some(StyleSummary {
files_analyzed,
common_indent_style,
line80_compliant_pct,
line_col_compliant_pct,
col_threshold,
by_language,
})
}
fn build_language_summaries_from_slice(files: &[&FileRecord]) -> Vec<LanguageSummary> {
let mut map: BTreeMap<String, LanguageSummary> = BTreeMap::new();
for file in files {
let Some(lang) = file.language else { continue };
let entry = map
.entry(lang.display_name().to_string())
.or_insert_with(|| zeroed_summary(lang));
accumulate_record_into_summary(entry, file);
}
map.into_values().collect()
}
fn file_name_eq(path: &Path, expected: &str) -> bool {
path.file_name()
.and_then(|name| name.to_str())
.is_some_and(|name| name == expected)
}
fn is_excluded_dir_path(path: &Path, excluded_dirs: &[String]) -> bool {
path.components().any(|component| {
component
.as_os_str()
.to_str()
.is_some_and(|part| excluded_dirs.iter().any(|excluded| excluded == part))
})
}
fn is_vendor_path(path: &Path) -> bool {
path.components().any(|component| {
component
.as_os_str()
.to_str()
.is_some_and(|part| matches!(part, "vendor" | "node_modules" | "packages"))
})
}
fn is_known_lockfile(path: &Path) -> bool {
path.file_name()
.and_then(|name| name.to_str())
.is_some_and(|name| {
matches!(
name,
"Cargo.lock"
| "package-lock.json"
| "yarn.lock"
| "pnpm-lock.yaml"
| "Pipfile.lock"
| "poetry.lock"
| "composer.lock"
)
})
}
fn looks_generated(path: &Path, bytes: &[u8]) -> bool {
let file_name = path
.file_name()
.and_then(|name| name.to_str())
.unwrap_or_default();
if file_name.contains(".generated.") || file_name.contains(".g.") {
return true;
}
let sample = String::from_utf8_lossy(&bytes[..bytes.len().min(GENERATED_SAMPLE_BYTES)])
.to_ascii_lowercase();
sample.contains("@generated") || sample.contains("generated by")
}
fn looks_minified(path: &Path, bytes: &[u8]) -> bool {
let file_name = path
.file_name()
.and_then(|name| name.to_str())
.unwrap_or_default();
if file_name.contains(".min.") {
return true;
}
let sample = String::from_utf8_lossy(&bytes[..bytes.len().min(MINIFIED_SAMPLE_BYTES)]);
let longest_line = sample.lines().map(str::len).max().unwrap_or(0);
let whitespace = sample.chars().filter(|c| c.is_whitespace()).count();
longest_line > MINIFIED_LINE_THRESHOLD && whitespace * 100 < sample.len().max(1)
}
fn is_binary(bytes: &[u8]) -> bool {
if bytes.starts_with(&[0xEF, 0xBB, 0xBF])
|| bytes.starts_with(&[0xFF, 0xFE])
|| bytes.starts_with(&[0xFE, 0xFF])
{
return false;
}
let sample = &bytes[..bytes.len().min(BINARY_SAMPLE_BYTES)];
sample.contains(&0)
}
fn decode_utf16_bom(
bom_stripped: &[u8],
encoding: &'static encoding_rs::Encoding,
label: &str,
) -> (String, String, Vec<String>) {
let (cow, _, had_errors) = encoding.decode(bom_stripped);
let mut warnings = Vec::new();
if had_errors {
warnings.push(format!("{label} decode contained replacement characters"));
}
(cow.into_owned(), label.into(), warnings)
}
fn decode_bytes(bytes: &[u8]) -> std::result::Result<(String, String, Vec<String>), String> {
if bytes.starts_with(&[0xEF, 0xBB, 0xBF]) {
let text = String::from_utf8(bytes[3..].to_vec()).map_err(|err| err.to_string())?;
return Ok((text, "utf-8-bom".into(), vec![]));
}
if bytes.starts_with(&[0xFF, 0xFE]) {
return Ok(decode_utf16_bom(&bytes[2..], UTF_16LE, "utf-16le"));
}
if bytes.starts_with(&[0xFE, 0xFF]) {
return Ok(decode_utf16_bom(&bytes[2..], UTF_16BE, "utf-16be"));
}
#[allow(clippy::option_if_let_else)]
if let Ok(text) = String::from_utf8(bytes.to_vec()) {
Ok((text, "utf-8".into(), vec![]))
} else {
let (cow, _, had_errors) = WINDOWS_1252.decode(bytes);
let mut warnings = vec!["decoded using windows-1252 fallback".into()];
if had_errors {
warnings.push("fallback decode contained replacement characters".into());
}
Ok((cow.into_owned(), "windows-1252".into(), warnings))
}
}
fn compile_globset(patterns: &[String]) -> Result<Option<GlobSet>> {
if patterns.is_empty() {
return Ok(None);
}
let mut builder = GlobSetBuilder::new();
for pattern in patterns {
builder
.add(Glob::new(pattern).with_context(|| format!("invalid glob pattern: {pattern}"))?);
}
Ok(Some(
builder.build().context("failed to compile glob filters")?,
))
}
fn parse_enabled_languages(enabled: &[String]) -> Result<Option<BTreeSet<Language>>> {
if enabled.is_empty() {
return Ok(None);
}
let supported = supported_languages();
let mut set = BTreeSet::new();
for name in enabled {
let language = Language::from_name(name)
.with_context(|| format!("unsupported language in config: {name}"))?;
if !supported.contains(&language) {
anyhow::bail!("language {name} is not supported in this build");
}
set.insert(language);
}
Ok(Some(set))
}
pub fn write_json(run: &AnalysisRun, output_path: &Path) -> Result<()> {
let json = serde_json::to_string_pretty(run).context("failed to serialize analysis run")?;
fs::write(output_path, json)
.with_context(|| format!("failed to write JSON output to {}", output_path.display()))
}
pub fn read_json(path: &Path) -> Result<AnalysisRun> {
let contents = fs::read_to_string(path)
.with_context(|| format!("failed to read result file {}", path.display()))?;
serde_json::from_str(&contents)
.with_context(|| format!("failed to parse JSON result {}", path.display()))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn effective_counts_respect_code_only_policy() {
let raw = RawLineCounts {
code_only_lines: 2,
single_comment_only_lines: 1,
mixed_code_single_comment_lines: 3,
docstring_comment_lines: 2,
..RawLineCounts::default()
};
let counts = compute_effective_counts(&raw, MixedLinePolicy::CodeOnly, true, true);
assert_eq!(counts.code_lines, 5);
assert_eq!(counts.comment_lines, 3);
}
#[test]
fn effective_counts_can_separate_mixed() {
let raw = RawLineCounts {
mixed_code_single_comment_lines: 2,
mixed_code_multi_comment_lines: 1,
..RawLineCounts::default()
};
let counts =
compute_effective_counts(&raw, MixedLinePolicy::SeparateMixedCategory, true, true);
assert_eq!(counts.mixed_lines_separate, 3);
assert_eq!(counts.code_lines, 0);
assert_eq!(counts.comment_lines, 0);
}
#[test]
fn windows_1252_fallback_decodes() {
let bytes = vec![0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x96, 0x57];
let (text, encoding, warnings) = decode_bytes(&bytes).unwrap();
assert_eq!(encoding, "windows-1252");
assert!(text.contains('–'));
assert!(!warnings.is_empty());
}
#[test]
fn is_binary_detects_null_byte() {
let bytes = b"hello\x00world";
assert!(is_binary(bytes));
}
#[test]
fn is_binary_clean_text_is_not_binary() {
let bytes = b"fn main() { println!(\"hello\"); }";
assert!(!is_binary(bytes));
}
#[test]
fn is_binary_utf8_bom_not_binary() {
let bytes = b"\xef\xbb\xbffn main() {}";
assert!(!is_binary(bytes));
}
#[test]
fn looks_generated_at_generated_marker() {
let bytes = b"// @generated by protoc-gen-rust\nfn foo() {}";
assert!(looks_generated(Path::new("foo.rs"), bytes));
}
#[test]
fn looks_generated_do_not_edit_marker() {
let bytes = b"// Code generated by build.rs. DO NOT EDIT.\nuse foo;";
assert!(looks_generated(Path::new("foo.rs"), bytes));
let bytes2 = b"// @generated\nuse foo;";
assert!(looks_generated(Path::new("foo.rs"), bytes2));
}
#[test]
fn looks_generated_normal_file_not_generated() {
let bytes = b"fn main() {\n println!(\"hello\");\n}\n";
assert!(!looks_generated(Path::new("main.rs"), bytes));
}
#[test]
fn looks_minified_dot_min_filename() {
let bytes = b"function a(){return 1}";
assert!(looks_minified(Path::new("bundle.min.js"), bytes));
}
#[test]
fn looks_minified_normal_file_not_minified() {
let bytes = b"function hello() {\n return 1;\n}\n";
assert!(!looks_minified(Path::new("app.js"), bytes));
}
#[test]
fn looks_minified_very_long_line() {
let long_line: Vec<u8> = b"x".repeat(MINIFIED_LINE_THRESHOLD + 1);
assert!(looks_minified(Path::new("app.js"), &long_line));
}
#[test]
fn is_known_lockfile_cargo_lock() {
assert!(is_known_lockfile(Path::new("Cargo.lock")));
}
#[test]
fn is_known_lockfile_package_lock_json() {
assert!(is_known_lockfile(Path::new("package-lock.json")));
}
#[test]
fn is_known_lockfile_yarn_lock() {
assert!(is_known_lockfile(Path::new("yarn.lock")));
}
#[test]
fn is_known_lockfile_normal_file_is_not_lockfile() {
assert!(!is_known_lockfile(Path::new("src/lib.rs")));
}
#[test]
fn is_vendor_path_node_modules() {
assert!(is_vendor_path(Path::new("node_modules/react/index.js")));
}
#[test]
fn is_vendor_path_vendor_dir() {
assert!(is_vendor_path(Path::new("vendor/anyhow/src/lib.rs")));
}
#[test]
fn is_vendor_path_normal_src_is_not_vendor() {
assert!(!is_vendor_path(Path::new("src/lib.rs")));
}
#[test]
fn is_excluded_dir_path_matches_excluded() {
let excluded = vec![".git".into(), "target".into()];
assert!(is_excluded_dir_path(Path::new(".git/config"), &excluded));
}
#[test]
fn is_excluded_dir_path_non_excluded_is_ok() {
let excluded = vec![".git".into(), "target".into()];
assert!(!is_excluded_dir_path(Path::new("src/main.rs"), &excluded));
}
#[test]
fn decode_bytes_utf8_bom_stripped() {
let bytes = b"\xef\xbb\xbffn main() {}";
let (text, encoding, _) = decode_bytes(bytes).unwrap();
assert!(
encoding.contains("utf-8"),
"should be utf-8 variant, got {encoding}"
);
assert!(text.starts_with("fn"));
}
#[test]
fn decode_bytes_plain_utf8() {
let bytes = b"hello world";
let (text, encoding, warnings) = decode_bytes(bytes).unwrap();
assert_eq!(encoding, "utf-8");
assert_eq!(text, "hello world");
assert!(warnings.is_empty());
}
#[test]
fn decode_bytes_utf16le_bom() {
let mut bytes = vec![0xFF, 0xFE];
for ch in "hi\n".encode_utf16() {
bytes.extend_from_slice(&ch.to_le_bytes());
}
let (text, encoding, _warnings) = decode_bytes(&bytes).unwrap();
assert_eq!(encoding, "utf-16le");
assert!(text.contains('h') && text.contains('i'));
}
#[test]
fn decode_bytes_utf16be_bom() {
let mut bytes = vec![0xFE, 0xFF];
for ch in "ok\n".encode_utf16() {
bytes.extend_from_slice(&ch.to_be_bytes());
}
let (text, encoding, _warnings) = decode_bytes(&bytes).unwrap();
assert_eq!(encoding, "utf-16be");
assert!(text.contains('o') && text.contains('k'));
}
#[test]
fn is_binary_utf16le_bom_not_binary() {
let bytes = &[0xFF, 0xFE, 0x68, 0x00];
assert!(!is_binary(bytes));
}
#[test]
fn is_binary_utf16be_bom_not_binary() {
let bytes = &[0xFE, 0xFF, 0x00, 0x68];
assert!(!is_binary(bytes));
}
#[test]
fn effective_counts_code_and_comment_policy() {
let raw = RawLineCounts {
mixed_code_single_comment_lines: 3,
mixed_code_multi_comment_lines: 2,
..RawLineCounts::default()
};
let counts = compute_effective_counts(&raw, MixedLinePolicy::CodeAndComment, true, true);
assert_eq!(counts.code_lines, 5);
assert_eq!(counts.comment_lines, 5);
assert_eq!(counts.mixed_lines_separate, 0);
}
#[test]
fn effective_counts_comment_only_policy() {
let raw = RawLineCounts {
mixed_code_single_comment_lines: 4,
mixed_code_multi_comment_lines: 1,
..RawLineCounts::default()
};
let counts = compute_effective_counts(&raw, MixedLinePolicy::CommentOnly, true, true);
assert_eq!(counts.code_lines, 0);
assert_eq!(counts.comment_lines, 5);
assert_eq!(counts.mixed_lines_separate, 0);
}
#[test]
fn effective_counts_docstrings_as_code_when_flag_false() {
let raw = RawLineCounts {
code_only_lines: 10,
docstring_comment_lines: 3,
..RawLineCounts::default()
};
let counts = compute_effective_counts(&raw, MixedLinePolicy::CodeOnly, false, true);
assert_eq!(counts.code_lines, 13);
assert_eq!(counts.comment_lines, 0);
}
#[test]
fn effective_counts_exclude_compiler_directives() {
let raw = RawLineCounts {
code_only_lines: 10,
compiler_directive_lines: 3,
..RawLineCounts::default()
};
let counts = compute_effective_counts(&raw, MixedLinePolicy::CodeOnly, true, false);
assert_eq!(counts.code_lines, 7);
}
#[test]
fn effective_counts_directives_not_subtracted_below_zero() {
let raw = RawLineCounts {
code_only_lines: 2,
compiler_directive_lines: 5, ..RawLineCounts::default()
};
let counts = compute_effective_counts(&raw, MixedLinePolicy::CodeOnly, true, false);
assert_eq!(counts.code_lines, 0); }
#[test]
fn cocomo_organic_computes_positive_values() {
let est = compute_cocomo(5_000, CocomoMode::Organic);
assert!(est.ksloc > 0.0);
assert!(est.effort_person_months > 0.0);
assert!(est.duration_months > 0.0);
assert!(est.avg_staff > 0.0);
assert_eq!(est.mode, CocomoMode::Organic);
}
#[test]
fn cocomo_semi_detached_computes_positive_values() {
let est = compute_cocomo(20_000, CocomoMode::SemiDetached);
assert!(est.ksloc > 0.0);
assert!(est.effort_person_months > 0.0);
assert!(est.duration_months > 0.0);
assert_eq!(est.mode, CocomoMode::SemiDetached);
}
#[test]
fn cocomo_embedded_computes_positive_values() {
let est = compute_cocomo(100_000, CocomoMode::Embedded);
assert!(est.effort_person_months > 0.0);
assert_eq!(est.mode, CocomoMode::Embedded);
}
#[test]
fn cocomo_zero_lines_produces_zero_effort() {
let est = compute_cocomo(0, CocomoMode::Organic);
assert!((est.ksloc).abs() < f64::EPSILON);
assert!((est.effort_person_months - 0.0).abs() < 0.01);
}
#[test]
fn parse_activity_log_counts_and_dates_per_file() {
let out = "\u{0}2024-03-02T10:00:00+00:00\n\
M\tsrc/a.rs\n\
A\tsrc/b.rs\n\
\u{0}2024-03-01T09:00:00+00:00\n\
M\tsrc/a.rs\n";
let map = parse_activity_log(out);
assert_eq!(map["src/a.rs"].0, 2, "a.rs touched in two commits");
assert_eq!(map["src/b.rs"].0, 1, "b.rs touched once");
assert_eq!(
map["src/a.rs"].1.as_deref(),
Some("2024-03-02T10:00:00+00:00")
);
}
#[test]
fn parse_activity_log_attributes_rename_to_new_path() {
let out = "\u{0}2024-03-02T10:00:00+00:00\nR100\tsrc/old.rs\tsrc/new.rs\n";
let map = parse_activity_log(out);
assert_eq!(map["src/new.rs"].0, 1);
assert!(!map.contains_key("src/old.rs"));
}
#[test]
fn parse_activity_log_empty_is_empty() {
assert!(parse_activity_log("").is_empty());
}
#[test]
fn parse_url_line_extracts_url() {
assert_eq!(
parse_url_line("url = https://example.com/repo.git"),
Some("https://example.com/repo.git")
);
}
#[test]
fn parse_url_line_returns_none_for_non_url_key() {
assert_eq!(
parse_url_line("fetch = +refs/heads/*:refs/remotes/origin/*"),
None
);
}
#[test]
fn parse_url_line_returns_none_for_empty_url() {
assert_eq!(parse_url_line("url = "), None);
}
#[test]
fn looks_generated_generated_filename_extension() {
let bytes = b"// normal code\n";
assert!(looks_generated(Path::new("schema.generated.ts"), bytes));
}
#[test]
fn looks_generated_dot_g_extension() {
let bytes = b"// normal code\n";
assert!(looks_generated(Path::new("parser.g.cs"), bytes));
}
#[test]
fn looks_minified_whitespace_ratio_is_ok() {
let normal = b"var x=1,y=2,z=3;\n";
assert!(!looks_minified(Path::new("app.js"), normal));
}
#[test]
fn is_known_lockfile_pnpm() {
assert!(is_known_lockfile(Path::new("pnpm-lock.yaml")));
}
#[test]
fn is_known_lockfile_pipfile() {
assert!(is_known_lockfile(Path::new("Pipfile.lock")));
}
#[test]
fn is_known_lockfile_poetry() {
assert!(is_known_lockfile(Path::new("poetry.lock")));
}
#[test]
fn is_known_lockfile_composer() {
assert!(is_known_lockfile(Path::new("composer.lock")));
}
#[test]
fn relative_path_string_strips_root_prefix() {
let path = Path::new("/tmp/project/src/lib.rs");
let root = Path::new("/tmp/project");
let rel = relative_path_string(path, root);
assert_eq!(rel, "src/lib.rs");
}
#[test]
fn relative_path_string_falls_back_to_full_path() {
let path = Path::new("/other/dir/file.rs");
let root = Path::new("/tmp/project");
let rel = relative_path_string(path, root);
assert!(!rel.is_empty());
}
#[test]
fn find_duplicate_groups_returns_empty_for_unique_hashes() {
use sloc_languages::{Language, ParseMode, RawLineCounts};
let make_rec = |hash: u64, path: &str| FileRecord {
path: path.into(),
relative_path: path.into(),
language: Some(Language::Rust),
size_bytes: 10,
detected_encoding: Some("utf-8".into()),
raw_line_categories: RawLineCounts::default(),
effective_counts: EffectiveCounts::default(),
status: FileStatus::AnalyzedExact,
warnings: vec![],
generated: false,
minified: false,
vendor: false,
parse_mode: Some(ParseMode::Lexical),
submodule: None,
coverage: None,
style_analysis: None,
cyclomatic_complexity: None,
lsloc: None,
commit_count: None,
last_commit_date: None,
content_hash: hash,
};
let analyzed = vec![make_rec(111, "a.rs"), make_rec(222, "b.rs")];
let groups = find_duplicate_groups(&analyzed);
assert!(groups.is_empty());
}
#[test]
fn find_duplicate_groups_returns_group_for_same_hash() {
use sloc_languages::{Language, ParseMode, RawLineCounts};
let make_rec = |hash: u64, path: &str| FileRecord {
path: path.into(),
relative_path: path.into(),
language: Some(Language::Rust),
size_bytes: 10,
detected_encoding: Some("utf-8".into()),
raw_line_categories: RawLineCounts::default(),
effective_counts: EffectiveCounts::default(),
status: FileStatus::AnalyzedExact,
warnings: vec![],
generated: false,
minified: false,
vendor: false,
parse_mode: Some(ParseMode::Lexical),
submodule: None,
coverage: None,
style_analysis: None,
cyclomatic_complexity: None,
lsloc: None,
commit_count: None,
last_commit_date: None,
content_hash: hash,
};
let analyzed = vec![
make_rec(999, "a.rs"),
make_rec(999, "b.rs"),
make_rec(123, "c.rs"),
];
let groups = find_duplicate_groups(&analyzed);
assert_eq!(groups.len(), 1);
assert_eq!(groups[0].len(), 2);
}
#[test]
fn find_duplicate_groups_ignores_zero_hash() {
use sloc_languages::{Language, ParseMode, RawLineCounts};
let make_rec = |hash: u64, path: &str| FileRecord {
path: path.into(),
relative_path: path.into(),
language: Some(Language::Rust),
size_bytes: 10,
detected_encoding: Some("utf-8".into()),
raw_line_categories: RawLineCounts::default(),
effective_counts: EffectiveCounts::default(),
status: FileStatus::AnalyzedExact,
warnings: vec![],
generated: false,
minified: false,
vendor: false,
parse_mode: Some(ParseMode::Lexical),
submodule: None,
coverage: None,
style_analysis: None,
cyclomatic_complexity: None,
lsloc: None,
commit_count: None,
last_commit_date: None,
content_hash: hash,
};
let analyzed = vec![make_rec(0, "a.rs"), make_rec(0, "b.rs")];
let groups = find_duplicate_groups(&analyzed);
assert!(
groups.is_empty(),
"zero-hash files must not be grouped as duplicates"
);
}
#[test]
fn detect_submodules_no_gitmodules_returns_empty() {
let dir = tempfile::tempdir().unwrap();
let result = detect_submodules(dir.path());
assert!(result.is_empty());
}
#[test]
fn detect_submodules_parses_gitmodules_file() {
let dir = tempfile::tempdir().unwrap();
let content = "[submodule \"vendor/lib\"]\n\tpath = vendor/lib\n\turl = https://github.com/example/lib.git\n";
std::fs::write(dir.path().join(".gitmodules"), content).unwrap();
let result = detect_submodules(dir.path());
assert_eq!(result.len(), 1);
assert_eq!(result[0].0, "vendor/lib");
}
#[test]
fn write_json_read_json_roundtrip() {
use chrono::Utc;
use sloc_config::AppConfig;
use sloc_languages::{Language, ParseMode, RawLineCounts};
let dir = tempfile::tempdir().unwrap();
let run = AnalysisRun {
tool: ToolMetadata {
name: "sloc".into(),
version: "0.0.1".into(),
run_id: "test-roundtrip".into(),
timestamp_utc: Utc::now(),
},
environment: EnvironmentMetadata {
operating_system: "test".into(),
architecture: "x86_64".into(),
runtime_mode: "test".into(),
initiator_username: "tester".into(),
initiator_hostname: "testhost".into(),
ci_name: None,
},
effective_configuration: AppConfig::default(),
input_roots: vec!["/tmp/test".into()],
summary_totals: SummaryTotals {
files_analyzed: 1,
code_lines: 5,
..SummaryTotals::default()
},
totals_by_language: vec![],
per_file_records: vec![FileRecord {
path: "a.rs".into(),
relative_path: "a.rs".into(),
language: Some(Language::Rust),
size_bytes: 50,
detected_encoding: Some("utf-8".into()),
raw_line_categories: RawLineCounts {
code_only_lines: 5,
..RawLineCounts::default()
},
effective_counts: EffectiveCounts {
code_lines: 5,
..EffectiveCounts::default()
},
status: FileStatus::AnalyzedExact,
warnings: vec![],
generated: false,
minified: false,
vendor: false,
parse_mode: Some(ParseMode::Lexical),
submodule: None,
coverage: None,
style_analysis: None,
cyclomatic_complexity: None,
lsloc: None,
commit_count: None,
last_commit_date: None,
content_hash: 0,
}],
skipped_file_records: vec![],
warnings: vec![],
submodule_summaries: vec![],
git_commit_short: Some("abc1234".into()),
git_branch: Some("main".into()),
git_commit_long: None,
git_commit_author: None,
git_tags: None,
git_nearest_tag: None,
git_commit_date: None,
git_remote_url: None,
style_summary: None,
cocomo: None,
uloc: 0,
dryness_pct: None,
duplicate_groups: vec![],
duplicates_excluded: 0,
};
let json_path = dir.path().join("test.json");
write_json(&run, &json_path).unwrap();
let loaded = read_json(&json_path).unwrap();
assert_eq!(loaded.summary_totals.files_analyzed, 1);
assert_eq!(loaded.summary_totals.code_lines, 5);
assert_eq!(loaded.git_commit_short.as_deref(), Some("abc1234"));
assert_eq!(loaded.git_branch.as_deref(), Some("main"));
assert_eq!(loaded.per_file_records.len(), 1);
}
#[test]
fn detect_ci_system_returns_none_without_env_vars() {
for var in &[
"JENKINS_URL",
"JENKINS_HOME",
"BUILD_URL",
"GITHUB_ACTIONS",
"GITLAB_CI",
"CIRCLECI",
"TRAVIS",
"TF_BUILD",
"TEAMCITY_VERSION",
] {
std::env::remove_var(var);
}
let _ = detect_ci_system();
}
#[test]
fn resolve_git_file_pointer_valid_absolute_gitdir() {
let dir = tempfile::tempdir().unwrap();
let real_git = dir.path().join("real.git");
fs::create_dir_all(&real_git).unwrap();
let git_file = dir.path().join(".git");
fs::write(&git_file, format!("gitdir: {}\n", real_git.display())).unwrap();
let result = resolve_git_file_pointer(&git_file, dir.path());
assert!(
result.is_some(),
"should resolve a valid absolute gitdir pointer"
);
assert!(result.unwrap().is_dir());
}
#[test]
fn resolve_git_file_pointer_missing_gitdir_prefix_returns_none() {
let dir = tempfile::tempdir().unwrap();
let git_file = dir.path().join(".git");
fs::write(&git_file, "not a gitdir line\n").unwrap();
assert!(resolve_git_file_pointer(&git_file, dir.path()).is_none());
}
#[test]
fn resolve_git_file_pointer_unreadable_path_returns_none() {
assert!(resolve_git_file_pointer(
Path::new("/nonexistent/__sloc_test_git_file__"),
Path::new("/nonexistent")
)
.is_none());
}
#[test]
fn resolve_git_file_pointer_nonexistent_target_returns_none() {
let dir = tempfile::tempdir().unwrap();
let git_file = dir.path().join(".git");
fs::write(&git_file, "gitdir: /nonexistent/__sloc_fake_gitdir_xyz__\n").unwrap();
assert!(resolve_git_file_pointer(&git_file, dir.path()).is_none());
}
#[test]
fn resolve_git_file_pointer_relative_path() {
let dir = tempfile::tempdir().unwrap();
let real_git = dir.path().join("real_git_dir");
fs::create_dir_all(&real_git).unwrap();
let git_file = dir.path().join(".git");
fs::write(&git_file, "gitdir: real_git_dir\n").unwrap();
let result = resolve_git_file_pointer(&git_file, dir.path());
assert!(result.is_some());
}
#[test]
fn resolve_ref_from_loose_file() {
let dir = tempfile::tempdir().unwrap();
let git_dir = dir.path();
fs::create_dir_all(git_dir.join("refs/heads")).unwrap();
let sha = "abc1234567890abcdef1234567890abcdef123456";
fs::write(git_dir.join("refs/heads/main"), format!("{sha}\n")).unwrap();
let result = resolve_ref(git_dir, "refs/heads/main");
assert_eq!(result.as_deref(), Some(sha));
}
#[test]
fn resolve_ref_from_packed_refs() {
let dir = tempfile::tempdir().unwrap();
let git_dir = dir.path();
let sha = "def5678def5678def5678def5678def5678def56";
fs::write(
git_dir.join("packed-refs"),
format!("# pack-refs with: peeled fully-peeled sorted\n{sha} refs/heads/feature\n"),
)
.unwrap();
let result = resolve_ref(git_dir, "refs/heads/feature");
assert_eq!(result.as_deref(), Some(sha));
}
#[test]
fn resolve_ref_not_found_returns_none() {
let dir = tempfile::tempdir().unwrap();
let result = resolve_ref(dir.path(), "refs/heads/nonexistent-branch-xyz");
assert!(result.is_none());
}
#[test]
fn resolve_ref_packed_refs_skips_comment_and_peeled() {
let dir = tempfile::tempdir().unwrap();
let git_dir = dir.path();
let sha = "aaa1111aaa1111aaa1111aaa1111aaa1111aaa11";
fs::write(
git_dir.join("packed-refs"),
format!("# comment\n^peeled-object-sha\n{sha} refs/tags/v1.0\n"),
)
.unwrap();
let result = resolve_ref(git_dir, "refs/tags/v1.0");
assert_eq!(result.as_deref(), Some(sha));
}
#[test]
fn resolve_ref_loose_sha_too_short_falls_through_to_packed() {
let dir = tempfile::tempdir().unwrap();
let git_dir = dir.path();
fs::create_dir_all(git_dir.join("refs/heads")).unwrap();
fs::write(git_dir.join("refs/heads/main"), "short\n").unwrap();
let result = resolve_ref(git_dir, "refs/heads/main");
assert!(result.is_none());
}
#[test]
fn read_git_remote_url_parses_origin_url() {
let dir = tempfile::tempdir().unwrap();
let git_dir = dir.path().join(".git");
fs::create_dir_all(&git_dir).unwrap();
fs::write(
git_dir.join("config"),
"[core]\n\trepositoryformatversion = 0\n[remote \"origin\"]\n\turl = https://github.com/org/repo.git\n\tfetch = +refs/heads/*:refs/remotes/origin/*\n",
)
.unwrap();
let url = read_git_remote_url(&git_dir);
assert_eq!(url.as_deref(), Some("https://github.com/org/repo.git"));
}
#[test]
fn read_git_remote_url_no_config_returns_none() {
let dir = tempfile::tempdir().unwrap();
let git_dir = dir.path().join(".git");
fs::create_dir_all(&git_dir).unwrap();
let url = read_git_remote_url(&git_dir);
assert!(url.is_none());
}
#[test]
fn detect_git_for_run_no_git_dir_returns_default() {
let dir = tempfile::tempdir().unwrap();
let info = detect_git_for_run(dir.path());
assert!(info.commit_long.is_none());
}
#[test]
fn detect_git_for_run_unreadable_head_returns_default() {
let dir = tempfile::tempdir().unwrap();
let git_dir = dir.path().join(".git");
fs::create_dir_all(&git_dir).unwrap();
let info = detect_git_for_run(dir.path());
assert!(info.commit_long.is_none());
}
#[test]
fn detect_git_for_run_detached_head_with_sha() {
let dir = tempfile::tempdir().unwrap();
let git_dir = dir.path().join(".git");
fs::create_dir_all(&git_dir).unwrap();
let sha = "abc1234567890abcdef1234567890abcdef12345";
fs::write(git_dir.join("HEAD"), sha).unwrap();
let info = detect_git_for_run(dir.path());
assert_eq!(info.commit_long.as_deref(), Some(sha));
assert_eq!(info.commit_short.as_deref(), Some("abc1234"));
}
#[test]
fn detect_git_for_run_with_packed_ref() {
let dir = tempfile::tempdir().unwrap();
let git_dir = dir.path().join(".git");
fs::create_dir_all(&git_dir).unwrap();
fs::write(git_dir.join("HEAD"), "ref: refs/heads/main\n").unwrap();
let sha = "deadbeef00000000000000000000000000000000";
fs::write(
git_dir.join("packed-refs"),
format!("# pack-refs\n{sha} refs/heads/main\n"),
)
.unwrap();
let info = detect_git_for_run(dir.path());
assert_eq!(info.commit_long.as_deref(), Some(sha));
assert_eq!(info.branch.as_deref(), Some("main"));
}
use std::sync::{Mutex, OnceLock};
static CI_ENV_LOCK: OnceLock<Mutex<()>> = OnceLock::new();
fn ci_env_lock() -> std::sync::MutexGuard<'static, ()> {
CI_ENV_LOCK.get_or_init(|| Mutex::new(())).lock().unwrap()
}
fn clear_branch_env_vars() {
for v in &[
"BRANCH_NAME",
"GIT_BRANCH",
"GITHUB_REF_NAME",
"CI_COMMIT_BRANCH",
"CIRCLE_BRANCH",
"TRAVIS_BRANCH",
"BUILD_SOURCEBRANCH",
] {
std::env::remove_var(v);
}
}
#[test]
fn ci_branch_from_env_strips_refs_heads_prefix() {
let _lock = ci_env_lock();
clear_branch_env_vars();
std::env::set_var("BUILD_SOURCEBRANCH", "refs/heads/my-branch");
let branch = ci_branch_from_env();
clear_branch_env_vars();
assert_eq!(branch.as_deref(), Some("my-branch"));
}
#[test]
fn ci_branch_from_env_strips_origin_prefix() {
let _lock = ci_env_lock();
clear_branch_env_vars();
std::env::set_var("GIT_BRANCH", "origin/develop");
let branch = ci_branch_from_env();
clear_branch_env_vars();
assert_eq!(branch.as_deref(), Some("develop"));
}
#[test]
fn ci_branch_from_env_returns_none_for_head() {
let _lock = ci_env_lock();
clear_branch_env_vars();
std::env::set_var("BRANCH_NAME", "HEAD");
let branch = ci_branch_from_env();
clear_branch_env_vars();
assert!(branch.is_none(), "HEAD should be filtered, got: {branch:?}");
}
}