use std::collections::HashMap;
use std::ffi::OsStr;
use std::path::{Path, PathBuf};
#[derive(Clone, Copy, Debug, PartialEq, Eq, Default)]
pub enum LocLanguage {
Rust,
Python,
JavaScript,
TypeScript,
#[default]
Unknown,
}
impl LocLanguage {
pub fn from_extension(ext: Option<&OsStr>) -> Self {
match ext.and_then(|e| e.to_str()) {
Some("rs") => Self::Rust,
Some("py") | Some("pyi") => Self::Python,
Some("js") | Some("jsx") | Some("mjs") | Some("cjs") => Self::JavaScript,
Some("ts") | Some("tsx") | Some("mts") | Some("cts") => Self::TypeScript,
_ => Self::Unknown,
}
}
pub fn from_path(path: &Path) -> Self {
Self::from_extension(path.extension())
}
}
#[derive(Debug, Clone, Default)]
pub struct LocCounter {
config: LocCountingConfig,
}
impl LocCounter {
pub fn new(config: LocCountingConfig) -> Self {
Self { config }
}
pub fn count_file(&self, path: &Path) -> Result<LocCount, std::io::Error> {
let content = std::fs::read_to_string(path)?;
let language = LocLanguage::from_path(path);
Ok(self.count_content_with_language(&content, Some(language)))
}
pub fn count_content(&self, content: &str) -> LocCount {
self.count_content_with_language(content, None)
}
pub fn count_content_with_language(
&self,
content: &str,
language: Option<LocLanguage>,
) -> LocCount {
let language = language.unwrap_or_default();
let physical_lines = content.lines().count();
let mut code_lines = 0;
let mut comment_lines = 0;
let mut blank_lines = 0;
let mut comment_state = CommentState::default();
for line in content.lines() {
let trimmed = line.trim();
let line_type = classify_line(trimmed, &mut comment_state, language);
match line_type {
LineType::Blank => blank_lines += 1,
LineType::Comment => comment_lines += 1,
LineType::Code => code_lines += 1,
}
}
LocCount {
physical_lines,
code_lines,
comment_lines,
blank_lines,
}
}
pub fn should_include(&self, path: &Path) -> bool {
if !self.config.include_tests && self.is_test_file(path) {
log::debug!("Excluding test file: {}", path.display());
return false;
}
if !self.config.include_generated && self.is_generated(path) {
log::debug!("Excluding generated file: {}", path.display());
return false;
}
if self.is_excluded_by_pattern(path) {
log::debug!("Excluding file by pattern: {}", path.display());
return false;
}
log::debug!("Including file in LOC count: {}", path.display());
true
}
pub fn is_test_file(&self, path: &Path) -> bool {
let path_str = path.to_string_lossy();
if path_str.contains("/tests/") || path_str.contains("/test/") {
return true;
}
if let Some(file_name) = path.file_stem() {
let name = file_name.to_string_lossy();
if name.ends_with("_test") || name.ends_with("_tests") {
return true;
}
}
false
}
pub fn is_generated(&self, path: &Path) -> bool {
if let Ok(content) = std::fs::read_to_string(path) {
let first_100_lines: String = content.lines().take(100).collect::<Vec<_>>().join("\n");
if first_100_lines.contains("@generated")
|| first_100_lines.contains("DO NOT EDIT")
|| first_100_lines.contains("automatically generated")
{
return true;
}
}
let path_str = path.to_string_lossy();
path_str.contains(".generated.") || path_str.ends_with(".g.rs")
}
fn is_excluded_by_pattern(&self, path: &Path) -> bool {
let path_str = path.to_string_lossy();
for pattern in &self.config.exclude_patterns {
if path_str.contains(pattern.as_str()) {
return true;
}
}
false
}
pub fn count_from_file_paths(&self, files: &[PathBuf]) -> ProjectLocCount {
let mut file_counts = HashMap::new();
for file_path in files {
if !self.should_include(file_path) {
continue;
}
if let Ok(count) = self.count_file(file_path) {
file_counts.insert(file_path.clone(), count);
}
}
let total = self.aggregate_counts(&file_counts);
ProjectLocCount {
total,
by_file: file_counts,
}
}
fn aggregate_counts(&self, file_counts: &HashMap<PathBuf, LocCount>) -> LocCount {
let mut total = LocCount::default();
for count in file_counts.values() {
total.physical_lines += count.physical_lines;
total.code_lines += count.code_lines;
total.comment_lines += count.comment_lines;
total.blank_lines += count.blank_lines;
}
total
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub struct LocCount {
pub physical_lines: usize,
pub code_lines: usize,
pub comment_lines: usize,
pub blank_lines: usize,
}
#[derive(Debug, Clone)]
pub struct ProjectLocCount {
pub total: LocCount,
pub by_file: HashMap<PathBuf, LocCount>,
}
#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
pub struct LocCountingConfig {
#[serde(default)]
pub include_tests: bool,
#[serde(default)]
pub include_generated: bool,
#[serde(default)]
pub count_comments: bool,
#[serde(default)]
pub count_blanks: bool,
#[serde(default)]
pub exclude_patterns: Vec<String>,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
enum LineType {
Blank,
Comment,
Code,
}
#[derive(Clone, Debug, Default)]
struct CommentState {
in_block_comment: bool,
block_depth: usize,
}
fn classify_line(trimmed: &str, state: &mut CommentState, language: LocLanguage) -> LineType {
if trimmed.is_empty() {
return LineType::Blank;
}
if state.in_block_comment {
update_block_comment_state(trimmed, state, language);
return LineType::Comment;
}
if let Some(start_idx) = trimmed.find("/*") {
let before_comment = &trimmed[..start_idx];
let has_code_before = !before_comment.trim().is_empty()
&& !is_single_line_comment_start(before_comment.trim(), language);
enter_block_comment(trimmed, start_idx, state, language);
if has_code_before {
return LineType::Code;
}
if !state.in_block_comment {
if let Some(end_idx) = trimmed.rfind("*/") {
let after_comment = &trimmed[end_idx + 2..];
if after_comment.trim().is_empty() {
return LineType::Comment;
}
return LineType::Code;
}
}
return LineType::Comment;
}
if is_single_line_comment(trimmed, language) {
return LineType::Comment;
}
LineType::Code
}
fn is_single_line_comment_start(trimmed: &str, language: LocLanguage) -> bool {
if trimmed.starts_with("//") {
return true;
}
if trimmed.starts_with('#') {
match language {
LocLanguage::Rust => {
false
}
LocLanguage::Python => true,
_ => false,
}
} else {
false
}
}
fn is_single_line_comment(trimmed: &str, language: LocLanguage) -> bool {
if is_single_line_comment_start(trimmed, language) {
return true;
}
if trimmed.starts_with('*') && !trimmed.starts_with("*/") && !trimmed.starts_with("**") {
return false;
}
false
}
fn scan_comment_depth(
bytes: &[u8],
start_idx: usize,
initial_depth: usize,
language: LocLanguage,
) -> usize {
let mut idx = start_idx;
let mut depth = initial_depth;
while idx < bytes.len() {
if let Some(marker) = detect_comment_marker(bytes, idx) {
match marker {
CommentMarker::Open if language == LocLanguage::Rust => depth += 1,
CommentMarker::Open => {}
CommentMarker::Close => {
depth = depth.saturating_sub(1);
if depth == 0 {
return 0;
}
}
}
idx += 2;
} else {
idx += 1;
}
}
depth
}
enum CommentMarker {
Open, Close, }
fn detect_comment_marker(bytes: &[u8], idx: usize) -> Option<CommentMarker> {
if idx + 1 >= bytes.len() {
return None;
}
match (bytes[idx], bytes[idx + 1]) {
(b'/', b'*') => Some(CommentMarker::Open),
(b'*', b'/') => Some(CommentMarker::Close),
_ => None,
}
}
fn enter_block_comment(
trimmed: &str,
start_idx: usize,
state: &mut CommentState,
language: LocLanguage,
) {
let depth = scan_comment_depth(trimmed.as_bytes(), start_idx + 2, 1, language);
state.in_block_comment = depth > 0;
state.block_depth = depth;
}
fn update_block_comment_state(trimmed: &str, state: &mut CommentState, language: LocLanguage) {
let depth = scan_comment_depth(trimmed.as_bytes(), 0, state.block_depth, language);
state.in_block_comment = depth > 0;
state.block_depth = depth;
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_loc_counter_consistent() {
let counter = LocCounter::default();
let content = "fn main() {\n println!(\"Hello\");\n}\n";
let count1 = counter.count_content(content);
let count2 = counter.count_content(content);
assert_eq!(count1, count2);
}
#[test]
fn test_count_content() {
let counter = LocCounter::default();
let content = "// Comment\nfn main() {\n\n println!(\"Hello\");\n}\n";
let count = counter.count_content(content);
assert_eq!(count.physical_lines, 5);
assert_eq!(count.comment_lines, 1);
assert_eq!(count.blank_lines, 1);
assert_eq!(count.code_lines, 3); }
#[test]
fn test_test_file_exclusion() {
let counter = LocCounter::default();
assert!(!counter.should_include(Path::new("tests/integration_test.rs")));
assert!(!counter.should_include(Path::new("src/foo_test.rs")));
assert!(counter.should_include(Path::new("src/main.rs")));
}
#[test]
fn test_single_line_comments() {
let counter = LocCounter::default();
let rust_content = "// This is a comment\nlet x = 5;";
let count = counter.count_content_with_language(rust_content, Some(LocLanguage::Rust));
assert_eq!(count.comment_lines, 1);
assert_eq!(count.code_lines, 1);
let python_content = "# This is a comment\nx = 5";
let count = counter.count_content_with_language(python_content, Some(LocLanguage::Python));
assert_eq!(count.comment_lines, 1);
assert_eq!(count.code_lines, 1);
}
#[test]
fn test_count_from_unique_files() {
let counter = LocCounter::default();
let files = vec![PathBuf::from("src/main.rs"), PathBuf::from("src/lib.rs")];
let result = counter.count_from_file_paths(&files);
assert_eq!(result.by_file.len(), files.len());
}
#[test]
fn test_multiline_block_comment() {
let counter = LocCounter::default();
let code = "/* comment\nstill comment\nend */\ncode";
let count = counter.count_content_with_language(code, Some(LocLanguage::Rust));
assert_eq!(count.comment_lines, 3);
assert_eq!(count.code_lines, 1);
assert_eq!(count.physical_lines, 4);
}
#[test]
fn test_multiline_block_comment_no_asterisk_prefix() {
let counter = LocCounter::default();
let code = "/*\nThis line starts with text, not *\nso it should still be a comment\n*/\nfn main() {}";
let count = counter.count_content_with_language(code, Some(LocLanguage::Rust));
assert_eq!(count.comment_lines, 4);
assert_eq!(count.code_lines, 1);
}
#[test]
fn test_nested_block_comments_rust() {
let counter = LocCounter::default();
let code = "/* outer /* inner */ still outer */\ncode";
let count = counter.count_content_with_language(code, Some(LocLanguage::Rust));
assert_eq!(count.comment_lines, 1);
assert_eq!(count.code_lines, 1);
}
#[test]
fn test_nested_block_comments_multiline_rust() {
let counter = LocCounter::default();
let code = "/*\n/* nested\n*/\nstill in outer\n*/\ncode";
let count = counter.count_content_with_language(code, Some(LocLanguage::Rust));
assert_eq!(count.comment_lines, 5);
assert_eq!(count.code_lines, 1);
}
#[test]
fn test_rust_attributes_are_code() {
let counter = LocCounter::default();
let code = "#[derive(Debug)]\nstruct Foo;";
let count = counter.count_content_with_language(code, Some(LocLanguage::Rust));
assert_eq!(count.code_lines, 2);
assert_eq!(count.comment_lines, 0);
}
#[test]
fn test_rust_inner_attributes_are_code() {
let counter = LocCounter::default();
let code = "#![allow(unused)]\nfn main() {}";
let count = counter.count_content_with_language(code, Some(LocLanguage::Rust));
assert_eq!(count.code_lines, 2);
assert_eq!(count.comment_lines, 0);
}
#[test]
fn test_rust_cfg_attributes_are_code() {
let counter = LocCounter::default();
let code = "#[cfg(test)]\nmod tests {\n #[test]\n fn test_foo() {}\n}";
let count = counter.count_content_with_language(code, Some(LocLanguage::Rust));
assert_eq!(count.code_lines, 5);
assert_eq!(count.comment_lines, 0);
}
#[test]
fn test_python_hash_still_comments() {
let counter = LocCounter::default();
let code = "# This is a Python comment\ndef foo():\n pass";
let count = counter.count_content_with_language(code, Some(LocLanguage::Python));
assert_eq!(count.comment_lines, 1);
assert_eq!(count.code_lines, 2);
}
#[test]
fn test_loc_invariant_simple() {
let counter = LocCounter::default();
let code = "fn main() {\n // comment\n\n let x = 5;\n}";
let count = counter.count_content_with_language(code, Some(LocLanguage::Rust));
assert_eq!(
count.physical_lines,
count.code_lines + count.comment_lines + count.blank_lines,
"Invariant: physical = code + comment + blank"
);
}
#[test]
fn test_loc_invariant_multiline_comments() {
let counter = LocCounter::default();
let code = "/*\nMulti\nline\ncomment\n*/\nfn foo() {}\n\n// single\nlet x = 1;";
let count = counter.count_content_with_language(code, Some(LocLanguage::Rust));
assert_eq!(
count.physical_lines,
count.code_lines + count.comment_lines + count.blank_lines,
"Invariant: physical = code + comment + blank"
);
}
#[test]
fn test_loc_invariant_mixed_content() {
let counter = LocCounter::default();
let code = r#"
// Header comment
#[derive(Debug)]
struct Foo {
/* inline */ x: i32,
}
/*
Block
comment
*/
fn main() {
println!("hello");
}
"#;
let count = counter.count_content_with_language(code, Some(LocLanguage::Rust));
assert_eq!(
count.physical_lines,
count.code_lines + count.comment_lines + count.blank_lines,
"Invariant: physical = code + comment + blank"
);
}
#[test]
fn test_language_from_extension() {
assert_eq!(
LocLanguage::from_path(Path::new("foo.rs")),
LocLanguage::Rust
);
assert_eq!(
LocLanguage::from_path(Path::new("bar.py")),
LocLanguage::Python
);
assert_eq!(
LocLanguage::from_path(Path::new("baz.js")),
LocLanguage::JavaScript
);
assert_eq!(
LocLanguage::from_path(Path::new("qux.ts")),
LocLanguage::TypeScript
);
assert_eq!(
LocLanguage::from_path(Path::new("file.tsx")),
LocLanguage::TypeScript
);
assert_eq!(
LocLanguage::from_path(Path::new("unknown.xyz")),
LocLanguage::Unknown
);
}
#[test]
fn test_inline_block_comment() {
let counter = LocCounter::default();
let code = "let x = /* inline */ 5;";
let count = counter.count_content_with_language(code, Some(LocLanguage::Rust));
assert_eq!(count.code_lines, 1);
assert_eq!(count.comment_lines, 0);
}
#[test]
fn test_block_comment_single_line() {
let counter = LocCounter::default();
let code = "/* just a comment */";
let count = counter.count_content_with_language(code, Some(LocLanguage::Rust));
assert_eq!(count.comment_lines, 1);
assert_eq!(count.code_lines, 0);
}
#[test]
fn test_code_before_block_comment() {
let counter = LocCounter::default();
let code = "let x = 5; /* comment";
let count = counter.count_content_with_language(code, Some(LocLanguage::Rust));
assert_eq!(count.code_lines, 1);
assert_eq!(count.comment_lines, 0);
}
#[test]
fn test_doc_comments_are_comments() {
let counter = LocCounter::default();
let code = "/// This is a doc comment\n//! Module doc\nfn foo() {}";
let count = counter.count_content_with_language(code, Some(LocLanguage::Rust));
assert_eq!(count.comment_lines, 2);
assert_eq!(count.code_lines, 1);
}
}