1#![allow(clippy::multiple_crate_versions)]
4
5pub mod delta;
6pub mod history;
7pub use delta::{compute_delta, FileChangeStatus, FileDelta, ScanComparison, SummaryDelta};
8pub use history::{RegistryEntry, ScanRegistry, ScanSummarySnapshot};
9
10use std::collections::{BTreeMap, BTreeSet, HashSet};
11use std::fs;
12use std::path::{Path, PathBuf};
13
14use anyhow::{Context, Result};
15use chrono::{DateTime, Utc};
16use encoding_rs::{UTF_16BE, UTF_16LE, WINDOWS_1252};
17use globset::{Glob, GlobSet, GlobSetBuilder};
18use ignore::WalkBuilder;
19use serde::{Deserialize, Serialize};
20use uuid::Uuid;
21
22use sloc_config::{
23 AppConfig, BinaryFileBehavior, BlankInBlockCommentPolicy, ContinuationLinePolicy,
24 FailureBehavior, MixedLinePolicy,
25};
26use sloc_languages::{
27 analyze_text, detect_language, supported_languages, AnalysisOptions, Language, ParseMode,
28 RawLineCounts,
29};
30
31enum MetadataPolicyOutcome {
33 Skip(Box<FileRecord>),
35 Exclude,
37 Continue,
39}
40
41#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
42#[serde(rename_all = "snake_case")]
43pub enum FileStatus {
44 AnalyzedExact,
45 AnalyzedBestEffort,
46 SkippedBinary,
47 SkippedDecodeError,
48 SkippedUnsupported,
49 SkippedByPolicy,
50 ErrorInternal,
51}
52
53#[derive(Debug, Clone, Serialize, Deserialize, Default)]
54pub struct EffectiveCounts {
55 pub code_lines: u64,
56 pub comment_lines: u64,
57 pub blank_lines: u64,
58 pub mixed_lines_separate: u64,
59}
60
61#[derive(Debug, Clone, Serialize, Deserialize)]
62pub struct ToolMetadata {
63 pub name: String,
64 pub version: String,
65 pub run_id: String,
66 pub timestamp_utc: DateTime<Utc>,
67}
68
69#[derive(Debug, Clone, Serialize, Deserialize)]
70pub struct EnvironmentMetadata {
71 pub operating_system: String,
72 pub architecture: String,
73 pub runtime_mode: String,
74 pub initiator_username: String,
75 pub initiator_hostname: String,
76}
77
78#[derive(Debug, Clone, Serialize, Deserialize, Default)]
79pub struct SummaryTotals {
80 pub files_considered: u64,
81 pub files_analyzed: u64,
82 pub files_skipped: u64,
83 pub total_physical_lines: u64,
84 pub code_lines: u64,
85 pub comment_lines: u64,
86 pub blank_lines: u64,
87 pub mixed_lines_separate: u64,
88 #[serde(default)]
89 pub functions: u64,
90 #[serde(default)]
91 pub classes: u64,
92 #[serde(default)]
93 pub variables: u64,
94 #[serde(default)]
95 pub imports: u64,
96}
97
98#[derive(Debug, Clone, Serialize, Deserialize)]
99pub struct LanguageSummary {
100 pub language: Language,
101 pub files: u64,
102 pub total_physical_lines: u64,
103 pub code_lines: u64,
104 pub comment_lines: u64,
105 pub blank_lines: u64,
106 pub mixed_lines_separate: u64,
107 #[serde(default)]
108 pub functions: u64,
109 #[serde(default)]
110 pub classes: u64,
111 #[serde(default)]
112 pub variables: u64,
113 #[serde(default)]
114 pub imports: u64,
115}
116
117#[derive(Debug, Clone, Serialize, Deserialize)]
118pub struct FileRecord {
119 pub path: String,
120 pub relative_path: String,
121 pub language: Option<Language>,
122 pub size_bytes: u64,
123 pub detected_encoding: Option<String>,
124 pub raw_line_categories: RawLineCounts,
125 pub effective_counts: EffectiveCounts,
126 pub status: FileStatus,
127 pub warnings: Vec<String>,
128 pub generated: bool,
129 pub minified: bool,
130 pub vendor: bool,
131 pub parse_mode: Option<ParseMode>,
132 #[serde(skip_serializing_if = "Option::is_none")]
133 pub submodule: Option<String>,
134}
135
136#[derive(Debug, Clone, Serialize, Deserialize)]
138pub struct SubmoduleSummary {
139 pub name: String,
140 pub relative_path: String,
141 pub files_analyzed: u64,
142 pub total_physical_lines: u64,
143 pub code_lines: u64,
144 pub comment_lines: u64,
145 pub blank_lines: u64,
146 pub language_summaries: Vec<LanguageSummary>,
147}
148
149#[derive(Debug, Clone, Serialize, Deserialize)]
150pub struct AnalysisRun {
151 pub tool: ToolMetadata,
152 pub environment: EnvironmentMetadata,
153 pub effective_configuration: AppConfig,
154 pub input_roots: Vec<String>,
155 pub summary_totals: SummaryTotals,
156 pub totals_by_language: Vec<LanguageSummary>,
157 pub per_file_records: Vec<FileRecord>,
158 pub skipped_file_records: Vec<FileRecord>,
159 pub warnings: Vec<String>,
160 #[serde(default, skip_serializing_if = "Vec::is_empty")]
162 pub submodule_summaries: Vec<SubmoduleSummary>,
163 #[serde(default, skip_serializing_if = "Option::is_none")]
165 pub git_commit_short: Option<String>,
166 #[serde(default, skip_serializing_if = "Option::is_none")]
168 pub git_commit_long: Option<String>,
169 #[serde(default, skip_serializing_if = "Option::is_none")]
171 pub git_branch: Option<String>,
172 #[serde(default, skip_serializing_if = "Option::is_none")]
174 pub git_commit_author: Option<String>,
175 #[serde(default, skip_serializing_if = "Option::is_none")]
177 pub git_tags: Option<String>,
178 #[serde(default, skip_serializing_if = "Option::is_none")]
180 pub git_commit_date: Option<String>,
181}
182
183fn run_git_in(dir: &Path, args: &[&str]) -> Option<String> {
184 std::process::Command::new("git")
185 .args(args)
186 .current_dir(dir)
187 .output()
188 .ok()
189 .filter(|o| o.status.success())
190 .and_then(|o| String::from_utf8(o.stdout).ok())
191 .map(|s| s.trim().to_string())
192 .filter(|s| !s.is_empty())
193}
194
195#[derive(Default)]
196struct GitInfo {
197 commit_short: Option<String>,
198 commit_long: Option<String>,
199 branch: Option<String>,
200 author: Option<String>,
201 tags: Option<String>,
202 commit_date: Option<String>,
203}
204
205fn detect_git_for_run(project_path: &Path) -> GitInfo {
206 GitInfo {
207 commit_short: run_git_in(project_path, &["rev-parse", "--short", "HEAD"]),
208 commit_long: run_git_in(project_path, &["rev-parse", "HEAD"]),
209 branch: run_git_in(project_path, &["branch", "--show-current"]),
210 author: run_git_in(project_path, &["log", "--format=%an", "-1"]),
211 tags: run_git_in(project_path, &["tag", "--points-at", "HEAD"]).map(|t| {
212 t.lines()
213 .filter(|l| !l.is_empty())
214 .collect::<Vec<_>>()
215 .join(", ")
216 }),
217 commit_date: run_git_in(project_path, &["log", "--format=%aI", "-1"]),
218 }
219}
220
221fn get_current_username() -> String {
222 std::env::var("USERNAME")
223 .or_else(|_| std::env::var("USER"))
224 .unwrap_or_else(|_| "unknown".to_string())
225}
226
227fn get_hostname() -> String {
228 std::env::var("COMPUTERNAME")
229 .or_else(|_| std::env::var("HOSTNAME"))
230 .or_else(|_| std::fs::read_to_string("/etc/hostname").map(|s| s.trim().to_string()))
231 .unwrap_or_else(|_| "unknown".to_string())
232}
233
234#[allow(clippy::too_many_arguments)]
236fn walk_root(
237 root: &Path,
238 config: &AppConfig,
239 include_globs: Option<&GlobSet>,
240 exclude_globs: Option<&GlobSet>,
241 enabled_languages: Option<&BTreeSet<Language>>,
242 seen_paths: &mut HashSet<PathBuf>,
243 analyzed: &mut Vec<FileRecord>,
244 skipped: &mut Vec<FileRecord>,
245 warnings: &mut Vec<String>,
246) -> Result<()> {
247 let mut builder = WalkBuilder::new(root);
248 builder
249 .follow_links(config.discovery.follow_symlinks)
250 .hidden(config.discovery.ignore_hidden_files)
251 .ignore(config.discovery.honor_ignore_files)
252 .parents(config.discovery.honor_ignore_files)
253 .git_ignore(config.discovery.honor_ignore_files)
254 .git_global(config.discovery.honor_ignore_files)
255 .git_exclude(config.discovery.honor_ignore_files);
256
257 for entry in builder.build() {
258 let entry = match entry {
259 Ok(entry) => entry,
260 Err(err) => {
261 warnings.push(format!("discovery warning: {err}"));
262 continue;
263 }
264 };
265
266 let path = entry.into_path();
267 if path.is_dir() || !seen_paths.insert(path.clone()) {
268 continue;
269 }
270
271 if let Some(record) = analyze_candidate_file(
272 &path,
273 root,
274 config,
275 include_globs,
276 exclude_globs,
277 enabled_languages,
278 )? {
279 push_record(record, analyzed, skipped, warnings);
280 }
281 }
282
283 Ok(())
284}
285
286fn process_submodules(config: &AppConfig, analyzed: &mut [FileRecord]) -> Vec<SubmoduleSummary> {
288 let root = config.discovery.root_paths[0]
289 .canonicalize()
290 .unwrap_or_else(|_| config.discovery.root_paths[0].clone());
291 let submodules = detect_submodules(&root);
292 if submodules.is_empty() {
293 return Vec::new();
294 }
295
296 for file in analyzed.iter_mut() {
297 for (name, sub_path) in &submodules {
298 let prefix = sub_path.to_string_lossy().replace('\\', "/");
299 let rel = &file.relative_path;
300 if rel == &prefix || rel.starts_with(&format!("{prefix}/")) {
301 file.submodule = Some(name.clone());
302 break;
303 }
304 }
305 }
306
307 build_submodule_summaries(analyzed, &submodules)
308}
309
310fn assemble_run(
312 config: &AppConfig,
313 runtime_mode: &str,
314 analyzed: Vec<FileRecord>,
315 skipped: Vec<FileRecord>,
316 warnings: Vec<String>,
317 submodule_summaries: Vec<SubmoduleSummary>,
318) -> AnalysisRun {
319 let summary = build_summary(&analyzed, &skipped);
320 let language_summaries = build_language_summaries(&analyzed);
321
322 let first_root = config
323 .discovery
324 .root_paths
325 .first()
326 .map(|p| p.canonicalize().unwrap_or_else(|_| p.clone()));
327 let git = first_root
328 .as_deref()
329 .map(detect_git_for_run)
330 .unwrap_or_default();
331
332 let now = Utc::now();
333 let run_id = {
334 let uuid_suffix = Uuid::new_v4().simple().to_string();
335 format!("{}-{}", now.format("%Y%m%d-%H%M"), uuid_suffix)
336 };
337
338 AnalysisRun {
339 tool: ToolMetadata {
340 name: "sloc".into(),
341 version: env!("CARGO_PKG_VERSION").into(),
342 run_id,
343 timestamp_utc: now,
344 },
345 environment: EnvironmentMetadata {
346 operating_system: std::env::consts::OS.into(),
347 architecture: std::env::consts::ARCH.into(),
348 runtime_mode: runtime_mode.into(),
349 initiator_username: get_current_username(),
350 initiator_hostname: get_hostname(),
351 },
352 effective_configuration: config.clone(),
353 input_roots: config
354 .discovery
355 .root_paths
356 .iter()
357 .map(|p| path_to_string(p))
358 .collect(),
359 summary_totals: summary,
360 totals_by_language: language_summaries,
361 per_file_records: analyzed,
362 skipped_file_records: skipped,
363 warnings,
364 submodule_summaries,
365 git_commit_short: git.commit_short,
366 git_commit_long: git.commit_long,
367 git_branch: git.branch,
368 git_commit_author: git.author,
369 git_tags: git.tags,
370 git_commit_date: git.commit_date,
371 }
372}
373
374#[allow(clippy::too_many_lines)]
379pub fn analyze(config: &AppConfig, runtime_mode: &str) -> Result<AnalysisRun> {
380 config.validate()?;
381
382 if config.discovery.root_paths.is_empty() {
383 anyhow::bail!("no input paths were provided");
384 }
385
386 let include_globs = compile_globset(&config.discovery.include_globs)?;
387 let exclude_globs = compile_globset(&config.discovery.exclude_globs)?;
388 let enabled_languages = parse_enabled_languages(&config.analysis.enabled_languages)?;
389
390 let mut analyzed = Vec::new();
391 let mut skipped = Vec::new();
392 let mut warnings = Vec::new();
393 let mut seen_paths = HashSet::new();
394
395 for root in &config.discovery.root_paths {
396 let root = root.canonicalize().unwrap_or_else(|_| root.clone());
397
398 if root.is_file() {
399 if let Some(record) = analyze_candidate_file(
400 &root,
401 root.parent().unwrap_or_else(|| Path::new(".")),
402 config,
403 include_globs.as_ref(),
404 exclude_globs.as_ref(),
405 enabled_languages.as_ref(),
406 )? {
407 push_record(record, &mut analyzed, &mut skipped, &mut warnings);
408 }
409 continue;
410 }
411
412 walk_root(
413 &root,
414 config,
415 include_globs.as_ref(),
416 exclude_globs.as_ref(),
417 enabled_languages.as_ref(),
418 &mut seen_paths,
419 &mut analyzed,
420 &mut skipped,
421 &mut warnings,
422 )?;
423 }
424
425 analyzed.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
426 skipped.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
427
428 let submodule_summaries = if config.discovery.submodule_breakdown {
430 process_submodules(config, &mut analyzed)
431 } else {
432 Vec::new()
433 };
434
435 Ok(assemble_run(
436 config,
437 runtime_mode,
438 analyzed,
439 skipped,
440 warnings,
441 submodule_summaries,
442 ))
443}
444
445fn push_record(
446 record: FileRecord,
447 analyzed: &mut Vec<FileRecord>,
448 skipped: &mut Vec<FileRecord>,
449 warnings: &mut Vec<String>,
450) {
451 warnings.extend(
452 record
453 .warnings
454 .iter()
455 .map(|warning| format!("{}: {warning}", record.relative_path)),
456 );
457
458 match record.status {
459 FileStatus::AnalyzedExact | FileStatus::AnalyzedBestEffort => analyzed.push(record),
460 _ => skipped.push(record),
461 }
462}
463
464#[allow(clippy::too_many_arguments)]
468fn check_metadata_policy(
469 path: &Path,
470 root: &Path,
471 relative_path: &str,
472 metadata: &fs::Metadata,
473 config: &AppConfig,
474 include_globs: Option<&GlobSet>,
475 exclude_globs: Option<&GlobSet>,
476) -> MetadataPolicyOutcome {
477 if metadata.file_type().is_symlink() && !config.discovery.follow_symlinks {
478 return MetadataPolicyOutcome::Skip(Box::new(skipped_record(
479 path,
480 root,
481 metadata.len(),
482 FileStatus::SkippedByPolicy,
483 vec!["symlink skipped by policy".into()],
484 )));
485 }
486
487 if file_name_eq(path, ".gitignore") {
488 return MetadataPolicyOutcome::Skip(Box::new(skipped_record(
489 path,
490 root,
491 metadata.len(),
492 FileStatus::SkippedByPolicy,
493 vec![".gitignore is always excluded".into()],
494 )));
495 }
496
497 if is_excluded_dir_path(path, &config.discovery.excluded_directories) {
498 return MetadataPolicyOutcome::Skip(Box::new(skipped_record(
499 path,
500 root,
501 metadata.len(),
502 FileStatus::SkippedByPolicy,
503 vec!["path matched excluded directory setting".into()],
504 )));
505 }
506
507 if metadata.len() > config.discovery.max_file_size_bytes {
508 return MetadataPolicyOutcome::Skip(Box::new(skipped_record(
509 path,
510 root,
511 metadata.len(),
512 FileStatus::SkippedByPolicy,
513 vec![format!(
514 "file exceeded max_file_size_bytes ({})",
515 config.discovery.max_file_size_bytes
516 )],
517 )));
518 }
519
520 if let Some(globs) = include_globs {
521 if !globs.is_match(Path::new(relative_path)) && !globs.is_match(path) {
522 return MetadataPolicyOutcome::Exclude;
523 }
524 }
525
526 if let Some(globs) = exclude_globs {
527 if globs.is_match(Path::new(relative_path)) || globs.is_match(path) {
528 return MetadataPolicyOutcome::Skip(Box::new(skipped_record(
529 path,
530 root,
531 metadata.len(),
532 FileStatus::SkippedByPolicy,
533 vec!["path matched exclude glob".into()],
534 )));
535 }
536 }
537
538 if is_known_lockfile(path) && !config.analysis.include_lockfiles {
539 return MetadataPolicyOutcome::Skip(Box::new(skipped_record(
540 path,
541 root,
542 metadata.len(),
543 FileStatus::SkippedByPolicy,
544 vec!["lockfile skipped by default policy".into()],
545 )));
546 }
547
548 MetadataPolicyOutcome::Continue
549}
550
551fn check_content_policy(
555 path: &Path,
556 root: &Path,
557 size_bytes: u64,
558 bytes: &[u8],
559 config: &AppConfig,
560) -> (bool, bool, bool, Option<FileRecord>) {
561 let vendor = is_vendor_path(path);
562 if vendor && config.analysis.vendor_directory_detection {
563 return (
564 vendor,
565 false,
566 false,
567 Some(skipped_record(
568 path,
569 root,
570 size_bytes,
571 FileStatus::SkippedByPolicy,
572 vec!["vendor file skipped by policy".into()],
573 )),
574 );
575 }
576
577 let generated = config.analysis.generated_file_detection && looks_generated(path, bytes);
578 if generated {
579 return (
580 vendor,
581 generated,
582 false,
583 Some(skipped_record(
584 path,
585 root,
586 size_bytes,
587 FileStatus::SkippedByPolicy,
588 vec!["generated file skipped by policy".into()],
589 )),
590 );
591 }
592
593 let minified = config.analysis.minified_file_detection && looks_minified(path, bytes);
594 if minified {
595 return (
596 vendor,
597 generated,
598 minified,
599 Some(skipped_record(
600 path,
601 root,
602 size_bytes,
603 FileStatus::SkippedByPolicy,
604 vec!["minified file skipped by policy".into()],
605 )),
606 );
607 }
608
609 (vendor, generated, minified, None)
610}
611
612fn decode_file_contents(
614 path: &Path,
615 root: &Path,
616 size_bytes: u64,
617 bytes: &[u8],
618 config: &AppConfig,
619) -> Result<Option<(String, String, Vec<String>)>> {
620 if is_binary(bytes) {
621 return match config.analysis.binary_file_behavior {
622 BinaryFileBehavior::Skip => Ok(None),
623 BinaryFileBehavior::Fail => {
624 anyhow::bail!("binary file encountered: {}", path.display())
625 }
626 };
627 }
628
629 match decode_bytes(bytes) {
630 Ok(result) => Ok(Some(result)),
631 Err(err) => match config.analysis.decode_failure_behavior {
632 FailureBehavior::WarnSkip => {
633 let _ = (path, root, size_bytes); Err(anyhow::anyhow!("__decode_warn__: {err}"))
638 }
639 FailureBehavior::Fail => {
640 anyhow::bail!("decode failure for {}: {err}", path.display())
641 }
642 },
643 }
644}
645
646#[allow(clippy::too_many_lines)]
647fn analyze_candidate_file(
648 path: &Path,
649 root: &Path,
650 config: &AppConfig,
651 include_globs: Option<&GlobSet>,
652 exclude_globs: Option<&GlobSet>,
653 enabled_languages: Option<&BTreeSet<Language>>,
654) -> Result<Option<FileRecord>> {
655 let metadata = match fs::symlink_metadata(path) {
656 Ok(metadata) => metadata,
657 Err(err) => {
658 return Ok(Some(skipped_record(
659 path,
660 root,
661 0,
662 FileStatus::ErrorInternal,
663 vec![format!("failed to read metadata: {err}")],
664 )));
665 }
666 };
667
668 let relative_path = relative_path_string(path, root);
669
670 match check_metadata_policy(
672 path,
673 root,
674 &relative_path,
675 &metadata,
676 config,
677 include_globs,
678 exclude_globs,
679 ) {
680 MetadataPolicyOutcome::Skip(record) => return Ok(Some(*record)),
681 MetadataPolicyOutcome::Exclude => return Ok(None),
682 MetadataPolicyOutcome::Continue => {}
683 }
684
685 let bytes = match fs::read(path) {
686 Ok(bytes) => bytes,
687 Err(err) => {
688 return Ok(Some(skipped_record(
689 path,
690 root,
691 metadata.len(),
692 FileStatus::ErrorInternal,
693 vec![format!("failed to read file: {err}")],
694 )));
695 }
696 };
697
698 let (vendor, generated, minified, skip_record) =
700 check_content_policy(path, root, metadata.len(), &bytes, config);
701 if let Some(record) = skip_record {
702 return Ok(Some(record));
703 }
704
705 let (text, encoding, decode_warnings) =
707 match decode_file_contents(path, root, metadata.len(), &bytes, config) {
708 Ok(Some(result)) => result,
709 Ok(None) => {
710 return Ok(Some(skipped_record(
711 path,
712 root,
713 metadata.len(),
714 FileStatus::SkippedBinary,
715 vec!["binary file skipped by default".into()],
716 )));
717 }
718 Err(err) => {
719 let msg = err.to_string();
720 if let Some(warn_msg) = msg.strip_prefix("__decode_warn__: ") {
721 return Ok(Some(skipped_record(
722 path,
723 root,
724 metadata.len(),
725 FileStatus::SkippedDecodeError,
726 vec![warn_msg.to_string()],
727 )));
728 }
729 return Err(err);
730 }
731 };
732
733 let first_line = text.lines().next();
734 let language = detect_language(
735 path,
736 first_line,
737 &config.analysis.extension_overrides,
738 config.analysis.shebang_detection,
739 );
740
741 let Some(language) = language else {
742 return Ok(Some(skipped_record(
743 path,
744 root,
745 metadata.len(),
746 FileStatus::SkippedUnsupported,
747 vec!["unsupported or undetected language".into()],
748 )));
749 };
750
751 if let Some(enabled) = enabled_languages {
752 if !enabled.contains(&language) {
753 return Ok(Some(skipped_record(
754 path,
755 root,
756 metadata.len(),
757 FileStatus::SkippedByPolicy,
758 vec![format!(
759 "language {} disabled by configuration",
760 language.display_name()
761 )],
762 )));
763 }
764 }
765
766 let ieee_opts = AnalysisOptions {
767 blank_in_block_comment_as_comment: config.analysis.blank_in_block_comment_policy
768 == BlankInBlockCommentPolicy::CountAsComment,
769 collapse_continuation_lines: config.analysis.continuation_line_policy
770 == ContinuationLinePolicy::CollapseToLogical,
771 };
772 let analysis = analyze_text(language, &text, ieee_opts);
773 let effective_counts = compute_effective_counts(
774 &analysis.raw,
775 config.analysis.mixed_line_policy,
776 config.analysis.python_docstrings_as_comments,
777 config.analysis.count_compiler_directives,
778 );
779
780 let mut warnings = decode_warnings;
781 warnings.extend(analysis.warnings.clone());
782
783 Ok(Some(FileRecord {
784 path: path_to_string(path),
785 relative_path,
786 language: Some(language),
787 size_bytes: metadata.len(),
788 detected_encoding: Some(encoding),
789 raw_line_categories: analysis.raw,
790 effective_counts,
791 status: match analysis.parse_mode {
792 ParseMode::Lexical | ParseMode::TreeSitter => FileStatus::AnalyzedExact,
793 ParseMode::LexicalBestEffort => FileStatus::AnalyzedBestEffort,
794 },
795 warnings,
796 generated,
797 minified,
798 vendor,
799 parse_mode: Some(analysis.parse_mode),
800 submodule: None,
801 }))
802}
803
804const fn compute_effective_counts(
805 raw: &RawLineCounts,
806 mixed_line_policy: MixedLinePolicy,
807 python_docstrings_as_comments: bool,
808 count_compiler_directives: bool,
809) -> EffectiveCounts {
810 let mut effective = EffectiveCounts {
811 code_lines: raw.code_only_lines,
812 comment_lines: raw.single_comment_only_lines + raw.multi_comment_only_lines,
813 blank_lines: raw.blank_only_lines,
814 mixed_lines_separate: 0,
815 };
816
817 if python_docstrings_as_comments {
818 effective.comment_lines += raw.docstring_comment_lines;
819 } else {
820 effective.code_lines += raw.docstring_comment_lines;
821 }
822
823 let mixed_total = raw.mixed_code_single_comment_lines + raw.mixed_code_multi_comment_lines;
824 match mixed_line_policy {
825 MixedLinePolicy::CodeOnly => effective.code_lines += mixed_total,
826 MixedLinePolicy::CodeAndComment => {
827 effective.code_lines += mixed_total;
828 effective.comment_lines += mixed_total;
829 }
830 MixedLinePolicy::CommentOnly => effective.comment_lines += mixed_total,
831 MixedLinePolicy::SeparateMixedCategory => effective.mixed_lines_separate += mixed_total,
832 }
833
834 if !count_compiler_directives {
837 effective.code_lines = effective
838 .code_lines
839 .saturating_sub(raw.compiler_directive_lines);
840 }
841
842 effective
843}
844
845fn build_summary(analyzed: &[FileRecord], skipped: &[FileRecord]) -> SummaryTotals {
846 let mut summary = SummaryTotals {
847 files_considered: (analyzed.len() + skipped.len()) as u64,
848 files_analyzed: analyzed.len() as u64,
849 files_skipped: skipped.len() as u64,
850 ..Default::default()
851 };
852
853 for record in analyzed {
854 summary.total_physical_lines += record.raw_line_categories.total_physical_lines;
855 summary.code_lines += record.effective_counts.code_lines;
856 summary.comment_lines += record.effective_counts.comment_lines;
857 summary.blank_lines += record.effective_counts.blank_lines;
858 summary.mixed_lines_separate += record.effective_counts.mixed_lines_separate;
859 summary.functions += record.raw_line_categories.functions;
860 summary.classes += record.raw_line_categories.classes;
861 summary.variables += record.raw_line_categories.variables;
862 summary.imports += record.raw_line_categories.imports;
863 }
864
865 summary
866}
867
868fn build_language_summaries(analyzed: &[FileRecord]) -> Vec<LanguageSummary> {
869 let mut by_language: BTreeMap<Language, LanguageSummary> = BTreeMap::new();
870 for record in analyzed {
871 let Some(language) = record.language else {
872 continue;
873 };
874 let entry = by_language.entry(language).or_insert(LanguageSummary {
875 language,
876 files: 0,
877 total_physical_lines: 0,
878 code_lines: 0,
879 comment_lines: 0,
880 blank_lines: 0,
881 mixed_lines_separate: 0,
882 functions: 0,
883 classes: 0,
884 variables: 0,
885 imports: 0,
886 });
887 entry.files += 1;
888 entry.total_physical_lines += record.raw_line_categories.total_physical_lines;
889 entry.code_lines += record.effective_counts.code_lines;
890 entry.comment_lines += record.effective_counts.comment_lines;
891 entry.blank_lines += record.effective_counts.blank_lines;
892 entry.mixed_lines_separate += record.effective_counts.mixed_lines_separate;
893 entry.functions += record.raw_line_categories.functions;
894 entry.classes += record.raw_line_categories.classes;
895 entry.variables += record.raw_line_categories.variables;
896 entry.imports += record.raw_line_categories.imports;
897 }
898
899 by_language.into_values().collect()
900}
901
902fn skipped_record(
903 path: &Path,
904 root: &Path,
905 size_bytes: u64,
906 status: FileStatus,
907 warnings: Vec<String>,
908) -> FileRecord {
909 FileRecord {
910 path: path_to_string(path),
911 relative_path: relative_path_string(path, root),
912 language: None,
913 size_bytes,
914 detected_encoding: None,
915 raw_line_categories: RawLineCounts::default(),
916 effective_counts: EffectiveCounts::default(),
917 status,
918 warnings,
919 generated: false,
920 minified: false,
921 vendor: false,
922 parse_mode: None,
923 submodule: None,
924 }
925}
926
927fn relative_path_string(path: &Path, root: &Path) -> String {
928 path.strip_prefix(root)
929 .unwrap_or(path)
930 .to_string_lossy()
931 .replace('\\', "/")
932}
933
934fn path_to_string(path: &Path) -> String {
935 path.to_string_lossy().replace('\\', "/")
936}
937
938#[must_use]
940pub fn detect_submodules(root: &Path) -> Vec<(String, PathBuf)> {
941 let gitmodules = root.join(".gitmodules");
942 if !gitmodules.is_file() {
943 return Vec::new();
944 }
945 let Ok(content) = fs::read_to_string(&gitmodules) else {
946 return Vec::new();
947 };
948
949 let mut result = Vec::new();
950 let mut current_name: Option<String> = None;
951 let mut current_path: Option<PathBuf> = None;
952
953 for line in content.lines() {
954 let trimmed = line.trim();
955 if trimmed.starts_with("[submodule \"") && trimmed.ends_with("\"]") {
956 if let (Some(name), Some(path)) = (current_name.take(), current_path.take()) {
957 result.push((name, path));
958 }
959 let name = trimmed["[submodule \"".len()..trimmed.len() - 2].to_string();
960 current_name = Some(name);
961 } else if let Some(rest) = trimmed.strip_prefix("path") {
962 if let Some(eq_pos) = rest.find('=') {
963 let path_str = rest[eq_pos + 1..].trim();
964 current_path = Some(PathBuf::from(path_str));
965 }
966 }
967 }
968 if let (Some(name), Some(path)) = (current_name, current_path) {
969 result.push((name, path));
970 }
971
972 result
973}
974
975fn build_submodule_summaries(
976 analyzed: &[FileRecord],
977 submodules: &[(String, PathBuf)],
978) -> Vec<SubmoduleSummary> {
979 submodules
980 .iter()
981 .map(|(name, path)| {
982 let files: Vec<&FileRecord> = analyzed
983 .iter()
984 .filter(|f| f.submodule.as_deref() == Some(name.as_str()))
985 .collect();
986
987 let files_analyzed = files.len() as u64;
988 let total_physical_lines = files
989 .iter()
990 .map(|f| f.raw_line_categories.total_physical_lines)
991 .sum();
992 let code_lines = files.iter().map(|f| f.effective_counts.code_lines).sum();
993 let comment_lines = files.iter().map(|f| f.effective_counts.comment_lines).sum();
994 let blank_lines = files.iter().map(|f| f.effective_counts.blank_lines).sum();
995 let language_summaries = build_language_summaries_from_slice(&files);
996
997 SubmoduleSummary {
998 name: name.clone(),
999 relative_path: path.to_string_lossy().replace('\\', "/"),
1000 files_analyzed,
1001 total_physical_lines,
1002 code_lines,
1003 comment_lines,
1004 blank_lines,
1005 language_summaries,
1006 }
1007 })
1008 .filter(|s| s.files_analyzed > 0)
1009 .collect()
1010}
1011
1012fn build_language_summaries_from_slice(files: &[&FileRecord]) -> Vec<LanguageSummary> {
1013 let mut map: BTreeMap<String, LanguageSummary> = BTreeMap::new();
1014 for file in files {
1015 if let Some(lang) = file.language {
1016 let entry = map
1017 .entry(lang.display_name().to_string())
1018 .or_insert_with(|| LanguageSummary {
1019 language: lang,
1020 files: 0,
1021 total_physical_lines: 0,
1022 code_lines: 0,
1023 comment_lines: 0,
1024 blank_lines: 0,
1025 mixed_lines_separate: 0,
1026 functions: 0,
1027 classes: 0,
1028 variables: 0,
1029 imports: 0,
1030 });
1031 entry.files += 1;
1032 let r = &file.raw_line_categories;
1033 entry.total_physical_lines += r.total_physical_lines;
1034 entry.code_lines += file.effective_counts.code_lines;
1035 entry.comment_lines += file.effective_counts.comment_lines;
1036 entry.blank_lines += file.effective_counts.blank_lines;
1037 entry.mixed_lines_separate += file.effective_counts.mixed_lines_separate;
1038 }
1039 }
1040 map.into_values().collect()
1041}
1042
1043fn file_name_eq(path: &Path, expected: &str) -> bool {
1044 path.file_name()
1045 .and_then(|name| name.to_str())
1046 .is_some_and(|name| name == expected)
1047}
1048
1049fn is_excluded_dir_path(path: &Path, excluded_dirs: &[String]) -> bool {
1050 path.components().any(|component| {
1051 component
1052 .as_os_str()
1053 .to_str()
1054 .is_some_and(|part| excluded_dirs.iter().any(|excluded| excluded == part))
1055 })
1056}
1057
1058fn is_vendor_path(path: &Path) -> bool {
1059 path.components().any(|component| {
1060 component
1061 .as_os_str()
1062 .to_str()
1063 .is_some_and(|part| matches!(part, "vendor" | "node_modules" | "packages"))
1064 })
1065}
1066
1067fn is_known_lockfile(path: &Path) -> bool {
1068 path.file_name()
1069 .and_then(|name| name.to_str())
1070 .is_some_and(|name| {
1071 matches!(
1072 name,
1073 "Cargo.lock"
1074 | "package-lock.json"
1075 | "yarn.lock"
1076 | "pnpm-lock.yaml"
1077 | "Pipfile.lock"
1078 | "poetry.lock"
1079 | "composer.lock"
1080 )
1081 })
1082}
1083
1084fn looks_generated(path: &Path, bytes: &[u8]) -> bool {
1085 let file_name = path
1086 .file_name()
1087 .and_then(|name| name.to_str())
1088 .unwrap_or_default();
1089 if file_name.contains(".generated.") || file_name.contains(".g.") {
1090 return true;
1091 }
1092
1093 let sample = String::from_utf8_lossy(&bytes[..bytes.len().min(1024)]).to_ascii_lowercase();
1094 sample.contains("@generated") || sample.contains("generated by")
1095}
1096
1097fn looks_minified(path: &Path, bytes: &[u8]) -> bool {
1098 let file_name = path
1099 .file_name()
1100 .and_then(|name| name.to_str())
1101 .unwrap_or_default();
1102 if file_name.contains(".min.") {
1103 return true;
1104 }
1105
1106 let sample = String::from_utf8_lossy(&bytes[..bytes.len().min(4096)]);
1107 let longest_line = sample.lines().map(str::len).max().unwrap_or(0);
1108 let whitespace = sample.chars().filter(|c| c.is_whitespace()).count();
1109 longest_line > 2000 && whitespace * 100 < sample.len().max(1)
1110}
1111
1112fn is_binary(bytes: &[u8]) -> bool {
1113 if bytes.starts_with(&[0xEF, 0xBB, 0xBF])
1114 || bytes.starts_with(&[0xFF, 0xFE])
1115 || bytes.starts_with(&[0xFE, 0xFF])
1116 {
1117 return false;
1118 }
1119
1120 let sample = &bytes[..bytes.len().min(8192)];
1121 sample.contains(&0)
1122}
1123
1124fn decode_bytes(bytes: &[u8]) -> std::result::Result<(String, String, Vec<String>), String> {
1125 if bytes.starts_with(&[0xEF, 0xBB, 0xBF]) {
1126 let text = String::from_utf8(bytes[3..].to_vec()).map_err(|err| err.to_string())?;
1127 return Ok((text, "utf-8-bom".into(), vec![]));
1128 }
1129
1130 if bytes.starts_with(&[0xFF, 0xFE]) {
1131 let (cow, _, had_errors) = UTF_16LE.decode(&bytes[2..]);
1132 let mut warnings = Vec::new();
1133 if had_errors {
1134 warnings.push("utf-16le decode contained replacement characters".into());
1135 }
1136 return Ok((cow.into_owned(), "utf-16le".into(), warnings));
1137 }
1138
1139 if bytes.starts_with(&[0xFE, 0xFF]) {
1140 let (cow, _, had_errors) = UTF_16BE.decode(&bytes[2..]);
1141 let mut warnings = Vec::new();
1142 if had_errors {
1143 warnings.push("utf-16be decode contained replacement characters".into());
1144 }
1145 return Ok((cow.into_owned(), "utf-16be".into(), warnings));
1146 }
1147
1148 #[allow(clippy::option_if_let_else)]
1150 if let Ok(text) = String::from_utf8(bytes.to_vec()) {
1151 Ok((text, "utf-8".into(), vec![]))
1152 } else {
1153 let (cow, _, had_errors) = WINDOWS_1252.decode(bytes);
1154 let mut warnings = vec!["decoded using windows-1252 fallback".into()];
1155 if had_errors {
1156 warnings.push("fallback decode contained replacement characters".into());
1157 }
1158 Ok((cow.into_owned(), "windows-1252".into(), warnings))
1159 }
1160}
1161
1162fn compile_globset(patterns: &[String]) -> Result<Option<GlobSet>> {
1163 if patterns.is_empty() {
1164 return Ok(None);
1165 }
1166
1167 let mut builder = GlobSetBuilder::new();
1168 for pattern in patterns {
1169 builder
1170 .add(Glob::new(pattern).with_context(|| format!("invalid glob pattern: {pattern}"))?);
1171 }
1172 Ok(Some(
1173 builder.build().context("failed to compile glob filters")?,
1174 ))
1175}
1176
1177fn parse_enabled_languages(enabled: &[String]) -> Result<Option<BTreeSet<Language>>> {
1178 if enabled.is_empty() {
1179 return Ok(None);
1180 }
1181
1182 let supported = supported_languages();
1183 let mut set = BTreeSet::new();
1184 for name in enabled {
1185 let language = Language::from_name(name)
1186 .with_context(|| format!("unsupported language in config: {name}"))?;
1187 if !supported.contains(&language) {
1188 anyhow::bail!("language {name} is not supported in this build");
1189 }
1190 set.insert(language);
1191 }
1192 Ok(Some(set))
1193}
1194
1195pub fn write_json(run: &AnalysisRun, output_path: &Path) -> Result<()> {
1199 let json = serde_json::to_string_pretty(run).context("failed to serialize analysis run")?;
1200 fs::write(output_path, json)
1201 .with_context(|| format!("failed to write JSON output to {}", output_path.display()))
1202}
1203
1204pub fn read_json(path: &Path) -> Result<AnalysisRun> {
1208 let contents = fs::read_to_string(path)
1209 .with_context(|| format!("failed to read result file {}", path.display()))?;
1210 serde_json::from_str(&contents)
1211 .with_context(|| format!("failed to parse JSON result {}", path.display()))
1212}
1213
1214#[cfg(test)]
1215mod tests {
1216 use super::*;
1217
1218 #[test]
1219 fn effective_counts_respect_code_only_policy() {
1220 let raw = RawLineCounts {
1221 code_only_lines: 2,
1222 single_comment_only_lines: 1,
1223 mixed_code_single_comment_lines: 3,
1224 docstring_comment_lines: 2,
1225 ..RawLineCounts::default()
1226 };
1227 let counts = compute_effective_counts(&raw, MixedLinePolicy::CodeOnly, true, true);
1228 assert_eq!(counts.code_lines, 5);
1229 assert_eq!(counts.comment_lines, 3);
1230 }
1231
1232 #[test]
1233 fn effective_counts_can_separate_mixed() {
1234 let raw = RawLineCounts {
1235 mixed_code_single_comment_lines: 2,
1236 mixed_code_multi_comment_lines: 1,
1237 ..RawLineCounts::default()
1238 };
1239 let counts =
1240 compute_effective_counts(&raw, MixedLinePolicy::SeparateMixedCategory, true, true);
1241 assert_eq!(counts.mixed_lines_separate, 3);
1242 assert_eq!(counts.code_lines, 0);
1243 assert_eq!(counts.comment_lines, 0);
1244 }
1245
1246 #[test]
1247 fn windows_1252_fallback_decodes() {
1248 let bytes = vec![0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x96, 0x57];
1249 let (text, encoding, warnings) = decode_bytes(&bytes).unwrap();
1250 assert_eq!(encoding, "windows-1252");
1251 assert!(text.contains('โ'));
1252 assert!(!warnings.is_empty());
1253 }
1254}