use std::collections::HashMap;
use std::path::{Path, PathBuf};
use serde::{Deserialize, Serialize};
use crate::metrics::file_utils::{
check_file_size, has_binary_extension, is_binary_file, should_exclude, should_skip_path,
DEFAULT_MAX_FILE_SIZE_MB,
};
use crate::metrics::types::LocInfo;
use crate::types::Language;
use crate::TldrError;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LocReport {
pub summary: LocSummary,
pub by_language: Vec<LanguageLocEntry>,
#[serde(skip_serializing_if = "Option::is_none")]
pub by_file: Option<Vec<FileLocEntry>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub by_directory: Option<Vec<DirectoryLocEntry>>,
pub warnings: Vec<String>,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct LocSummary {
pub total_files: usize,
pub total_lines: usize,
pub code_lines: usize,
pub comment_lines: usize,
pub blank_lines: usize,
pub code_percent: f64,
pub comment_percent: f64,
pub blank_percent: f64,
}
impl LocSummary {
pub fn from_totals(
total_files: usize,
code_lines: usize,
comment_lines: usize,
blank_lines: usize,
) -> Self {
let total_lines = code_lines + comment_lines + blank_lines;
let (code_percent, comment_percent, blank_percent) = if total_lines == 0 {
(0.0, 0.0, 0.0)
} else {
(
(code_lines as f64 / total_lines as f64) * 100.0,
(comment_lines as f64 / total_lines as f64) * 100.0,
(blank_lines as f64 / total_lines as f64) * 100.0,
)
};
Self {
total_files,
total_lines,
code_lines,
comment_lines,
blank_lines,
code_percent,
comment_percent,
blank_percent,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LanguageLocEntry {
pub language: String,
pub files: usize,
pub code_lines: usize,
pub comment_lines: usize,
pub blank_lines: usize,
pub total_lines: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FileLocEntry {
pub path: PathBuf,
pub language: String,
pub code_lines: usize,
pub comment_lines: usize,
pub blank_lines: usize,
pub total_lines: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DirectoryLocEntry {
pub path: PathBuf,
pub code_lines: usize,
pub comment_lines: usize,
pub blank_lines: usize,
pub total_lines: usize,
}
#[derive(Debug, Clone, Default)]
pub struct LocOptions {
pub lang: Option<Language>,
pub by_file: bool,
pub by_dir: bool,
pub exclude: Vec<String>,
pub include_hidden: bool,
pub gitignore: bool,
pub max_files: usize,
pub max_file_size_mb: usize,
}
impl LocOptions {
pub fn new() -> Self {
Self {
gitignore: true,
max_file_size_mb: DEFAULT_MAX_FILE_SIZE_MB,
..Default::default()
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum ParseState {
Normal,
InMultiLineComment,
InTripleQuotedString(TripleQuoteType),
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum TripleQuoteType {
Double,
Single,
}
fn classify_line(line: &str, lang: Language, state: ParseState) -> (LineType, ParseState) {
let trimmed = line.trim();
if trimmed.is_empty() {
return (LineType::Blank, state);
}
match state {
ParseState::InMultiLineComment => classify_in_multiline_comment(trimmed, lang),
ParseState::InTripleQuotedString(quote_type) => {
classify_in_triple_quoted_string(trimmed, quote_type)
}
ParseState::Normal => classify_normal_line(trimmed, lang),
}
}
fn classify_in_multiline_comment(trimmed: &str, lang: Language) -> (LineType, ParseState) {
let end_marker = match lang {
Language::Ruby => "=end",
_ => "*/",
};
if trimmed.contains(end_marker) {
let after_end = match lang {
Language::Ruby => {
if let Some(pos) = trimmed.find(end_marker) {
let after = trimmed[pos + end_marker.len()..].trim();
!after.is_empty() && !is_single_line_comment(after, lang)
} else {
false
}
}
_ => {
if let Some(pos) = trimmed.find(end_marker) {
let after = trimmed[pos + end_marker.len()..].trim();
!after.is_empty() && !is_single_line_comment(after, lang)
} else {
false
}
}
};
if after_end {
(LineType::Code, ParseState::Normal)
} else {
(LineType::Comment, ParseState::Normal)
}
} else {
(LineType::Comment, ParseState::InMultiLineComment)
}
}
fn classify_in_triple_quoted_string(
trimmed: &str,
quote_type: TripleQuoteType,
) -> (LineType, ParseState) {
let marker = match quote_type {
TripleQuoteType::Double => "\"\"\"",
TripleQuoteType::Single => "'''",
};
if trimmed == marker {
(LineType::Comment, ParseState::Normal)
} else if trimmed.ends_with(marker) {
let without_end = &trimmed[..trimmed.len() - 3];
if without_end.contains(marker) {
(LineType::Comment, ParseState::Normal)
} else {
(LineType::Comment, ParseState::Normal)
}
} else if trimmed.contains(marker) {
let pos = trimmed.find(marker).unwrap();
let after = trimmed[pos + 3..].trim();
if after.is_empty() || is_single_line_comment(after, Language::Python) {
(LineType::Comment, ParseState::Normal)
} else {
(LineType::Code, ParseState::Normal)
}
} else {
(
LineType::Comment,
ParseState::InTripleQuotedString(quote_type),
)
}
}
fn classify_normal_line(trimmed: &str, lang: Language) -> (LineType, ParseState) {
if lang == Language::Python {
if trimmed.starts_with("\"\"\"") || trimmed.starts_with("'''") {
let quote_type = if trimmed.starts_with("\"\"\"") {
TripleQuoteType::Double
} else {
TripleQuoteType::Single
};
let marker = if quote_type == TripleQuoteType::Double {
"\"\"\""
} else {
"'''"
};
let rest = &trimmed[3..];
if rest.contains(marker) {
let after_close_pos = rest.find(marker).unwrap() + 3;
let after = rest[after_close_pos..].trim();
if after.is_empty() || is_single_line_comment(after, lang) {
return (LineType::Comment, ParseState::Normal);
} else {
return (LineType::Code, ParseState::Normal);
}
} else {
return (
LineType::Comment,
ParseState::InTripleQuotedString(quote_type),
);
}
}
}
if is_single_line_comment(trimmed, lang) {
return (LineType::Comment, ParseState::Normal);
}
let (start_marker, end_marker) = match lang {
Language::Ruby => ("=begin", "=end"),
Language::Python => ("", ""), _ => ("/*", "*/"),
};
if !start_marker.is_empty() && trimmed.starts_with(start_marker) {
if trimmed.contains(end_marker) && trimmed.find(end_marker) > trimmed.find(start_marker) {
let after_close_pos = trimmed.find(end_marker).unwrap() + end_marker.len();
let after = trimmed[after_close_pos..].trim();
if after.is_empty() || is_single_line_comment(after, lang) {
return (LineType::Comment, ParseState::Normal);
} else {
return (LineType::Code, ParseState::Normal);
}
} else {
return (LineType::Comment, ParseState::InMultiLineComment);
}
}
if !start_marker.is_empty() && trimmed.contains(start_marker) {
return (LineType::Code, ParseState::Normal);
}
(LineType::Code, ParseState::Normal)
}
fn is_single_line_comment(trimmed: &str, lang: Language) -> bool {
match lang {
Language::Python | Language::Ruby => trimmed.starts_with('#'),
Language::Rust
| Language::Go
| Language::TypeScript
| Language::JavaScript
| Language::Java
| Language::C
| Language::Cpp
| Language::Swift
| Language::Kotlin
| Language::CSharp
| Language::Scala
| Language::Php => trimmed.starts_with("//"),
Language::Lua | Language::Luau => trimmed.starts_with("--"),
Language::Elixir => trimmed.starts_with('#'),
Language::Ocaml => trimmed.starts_with("(*") || trimmed.starts_with('*'),
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum LineType {
Code,
Comment,
Blank,
}
pub fn count_lines(source: &str, lang: Language) -> LocInfo {
let mut code_lines = 0;
let mut comment_lines = 0;
let mut blank_lines = 0;
let mut state = ParseState::Normal;
for line in source.lines() {
let (line_type, new_state) = classify_line(line, lang, state);
state = new_state;
match line_type {
LineType::Code => code_lines += 1,
LineType::Comment => comment_lines += 1,
LineType::Blank => blank_lines += 1,
}
}
LocInfo::new(code_lines, comment_lines, blank_lines)
}
pub fn analyze_file(
path: &Path,
lang: Option<Language>,
max_file_size_mb: usize,
) -> Result<(LocInfo, Language), TldrError> {
if !path.exists() {
return Err(TldrError::PathNotFound(path.to_path_buf()));
}
check_file_size(path, max_file_size_mb)?;
if has_binary_extension(path) || is_binary_file(path) {
return Err(TldrError::UnsupportedLanguage(format!(
"Binary file: {}",
path.display()
)));
}
let detected_lang = lang.or_else(|| Language::from_path(path));
let language = match detected_lang {
Some(l) => l,
None => {
return Err(TldrError::UnsupportedLanguage(format!(
"{}",
path.display()
)))
}
};
let source = std::fs::read_to_string(path)?;
let info = count_lines(&source, language);
Ok((info, language))
}
pub fn analyze_directory(path: &Path, options: &LocOptions) -> Result<LocReport, TldrError> {
if !path.exists() {
return Err(TldrError::PathNotFound(path.to_path_buf()));
}
let mut by_language: HashMap<Language, (usize, LocInfo)> = HashMap::new(); let mut by_file: Vec<FileLocEntry> = Vec::new();
let mut by_directory: HashMap<PathBuf, LocInfo> = HashMap::new();
let mut warnings: Vec<String> = Vec::new();
let mut files_processed = 0;
let mut builder = ignore::WalkBuilder::new(path);
builder.follow_links(false); builder.hidden(!options.include_hidden);
if options.gitignore {
builder.git_ignore(true);
builder.git_global(true);
} else {
builder.git_ignore(false);
builder.git_global(false);
}
for entry in builder.build() {
let entry = match entry {
Ok(e) => e,
Err(e) => {
warnings.push(format!("Walk error: {}", e));
continue;
}
};
let entry_path = entry.path();
if entry_path.is_dir() {
continue;
}
if options.max_files > 0 && files_processed >= options.max_files {
warnings.push(format!(
"Stopped after {} files (max_files limit)",
options.max_files
));
break;
}
let relative_path = entry_path.strip_prefix(path).unwrap_or(entry_path);
if should_skip_path(relative_path) {
continue;
}
if should_exclude(relative_path, &options.exclude) {
continue;
}
let lang = match Language::from_path(entry_path) {
Some(l) => l,
None => continue, };
if let Some(filter_lang) = options.lang {
if lang != filter_lang {
continue;
}
}
match analyze_file(entry_path, Some(lang), options.max_file_size_mb) {
Ok((info, detected_lang)) => {
files_processed += 1;
let entry = by_language
.entry(detected_lang)
.or_insert((0, LocInfo::default()));
entry.0 += 1;
entry.1.merge(&info);
if options.by_file {
by_file.push(FileLocEntry {
path: relative_path.to_path_buf(),
language: detected_lang.as_str().to_string(),
code_lines: info.code_lines,
comment_lines: info.comment_lines,
blank_lines: info.blank_lines,
total_lines: info.total_lines,
});
}
if options.by_dir {
if let Some(parent) = relative_path.parent() {
let dir_path = if parent.as_os_str().is_empty() {
PathBuf::from(".")
} else {
parent.to_path_buf()
};
let dir_entry = by_directory.entry(dir_path).or_default();
dir_entry.merge(&info);
}
}
}
Err(TldrError::FileTooLarge {
path,
size_mb,
max_mb,
}) => {
warnings.push(format!(
"Skipped large file: {} ({}MB > {}MB)",
path.display(),
size_mb,
max_mb
));
}
Err(TldrError::UnsupportedLanguage(msg)) if msg.contains("Binary file") => {
warnings.push(format!("Skipped binary file: {}", entry_path.display()));
}
Err(TldrError::UnsupportedLanguage(_)) => {
}
Err(e) => {
warnings.push(format!("Error reading {}: {}", entry_path.display(), e));
}
}
}
let mut by_language_vec: Vec<LanguageLocEntry> = by_language
.into_iter()
.map(|(lang, (count, info))| LanguageLocEntry {
language: lang.as_str().to_string(),
files: count,
code_lines: info.code_lines,
comment_lines: info.comment_lines,
blank_lines: info.blank_lines,
total_lines: info.total_lines,
})
.collect();
by_language_vec.sort_by(|a, b| b.total_lines.cmp(&a.total_lines));
let total_code: usize = by_language_vec.iter().map(|e| e.code_lines).sum();
let total_comment: usize = by_language_vec.iter().map(|e| e.comment_lines).sum();
let total_blank: usize = by_language_vec.iter().map(|e| e.blank_lines).sum();
let total_files: usize = by_language_vec.iter().map(|e| e.files).sum();
let summary = LocSummary::from_totals(total_files, total_code, total_comment, total_blank);
let by_directory_vec = if options.by_dir {
let mut vec: Vec<DirectoryLocEntry> = by_directory
.into_iter()
.map(|(path, info)| DirectoryLocEntry {
path,
code_lines: info.code_lines,
comment_lines: info.comment_lines,
blank_lines: info.blank_lines,
total_lines: info.total_lines,
})
.collect();
vec.sort_by(|a, b| b.total_lines.cmp(&a.total_lines));
Some(vec)
} else {
None
};
Ok(LocReport {
summary,
by_language: by_language_vec,
by_file: if options.by_file { Some(by_file) } else { None },
by_directory: by_directory_vec,
warnings,
})
}
pub fn analyze_loc(path: &Path, options: &LocOptions) -> Result<LocReport, TldrError> {
if path.is_file() {
let (info, lang) = analyze_file(path, options.lang, options.max_file_size_mb)?;
let summary =
LocSummary::from_totals(1, info.code_lines, info.comment_lines, info.blank_lines);
let by_language = vec![LanguageLocEntry {
language: lang.as_str().to_string(),
files: 1,
code_lines: info.code_lines,
comment_lines: info.comment_lines,
blank_lines: info.blank_lines,
total_lines: info.total_lines,
}];
let by_file = if options.by_file {
Some(vec![FileLocEntry {
path: path.to_path_buf(),
language: lang.as_str().to_string(),
code_lines: info.code_lines,
comment_lines: info.comment_lines,
blank_lines: info.blank_lines,
total_lines: info.total_lines,
}])
} else {
None
};
Ok(LocReport {
summary,
by_language,
by_file,
by_directory: None,
warnings: vec![],
})
} else {
analyze_directory(path, options)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_count_lines_python_simple() {
let source = r#"# Comment
def foo():
pass
"#;
let info = count_lines(source, Language::Python);
assert_eq!(info.code_lines, 2);
assert_eq!(info.comment_lines, 1);
assert_eq!(info.blank_lines, 0);
assert!(info.is_valid());
}
#[test]
fn test_count_lines_python_docstring() {
let source = r#""""Module docstring."""
def foo():
"""Function docstring."""
pass
"#;
let info = count_lines(source, Language::Python);
assert_eq!(info.comment_lines, 2);
assert_eq!(info.blank_lines, 1);
assert_eq!(info.code_lines, 2);
assert!(info.is_valid());
}
#[test]
fn test_count_lines_python_multiline_docstring() {
let source = r#""""
Multi-line
docstring
"""
def foo():
pass
"#;
let info = count_lines(source, Language::Python);
assert_eq!(info.comment_lines, 4);
assert_eq!(info.code_lines, 2);
assert!(info.is_valid());
}
#[test]
fn test_count_lines_rust_simple() {
let source = r#"// Comment
fn main() {
println!("Hello");
}
"#;
let info = count_lines(source, Language::Rust);
assert_eq!(info.code_lines, 3);
assert_eq!(info.comment_lines, 1);
assert!(info.is_valid());
}
#[test]
fn test_count_lines_rust_multiline_comment() {
let source = r#"/* Multi
line
comment */
fn main() {
/* inline */ let x = 1;
}
"#;
let info = count_lines(source, Language::Rust);
assert_eq!(info.comment_lines, 3);
assert_eq!(info.code_lines, 3);
assert!(info.is_valid());
}
#[test]
fn test_count_lines_empty() {
let source = "";
let info = count_lines(source, Language::Python);
assert_eq!(info.code_lines, 0);
assert_eq!(info.comment_lines, 0);
assert_eq!(info.blank_lines, 0);
assert_eq!(info.total_lines, 0);
assert!(info.is_valid());
}
#[test]
fn test_count_lines_blank_only() {
let source = "\n\n\n";
let info = count_lines(source, Language::Python);
assert_eq!(info.blank_lines, 3);
assert_eq!(info.code_lines, 0);
assert_eq!(info.comment_lines, 0);
assert!(info.is_valid());
}
#[test]
fn test_count_lines_javascript() {
let source = r#"// Single line comment
/*
* Multi-line
*/
function hello() {
console.log("hi");
}
"#;
let info = count_lines(source, Language::JavaScript);
assert_eq!(info.comment_lines, 4);
assert_eq!(info.code_lines, 3);
assert!(info.is_valid());
}
#[test]
fn test_count_lines_go() {
let source = r#"// Package main
package main
import "fmt"
// main is the entry point
func main() {
fmt.Println("Hello")
}
"#;
let info = count_lines(source, Language::Go);
assert_eq!(info.comment_lines, 2);
assert_eq!(info.blank_lines, 2);
assert_eq!(info.code_lines, 5);
assert!(info.is_valid());
}
#[test]
fn test_invariant_holds() {
let source = r#"# Comment
def foo():
"""Docstring"""
# Another comment
pass
# End
"#;
let info = count_lines(source, Language::Python);
assert!(info.is_valid());
assert_eq!(
info.code_lines + info.comment_lines + info.blank_lines,
info.total_lines
);
}
#[test]
fn test_classify_python_hash_comment() {
let (line_type, state) =
classify_line("# This is a comment", Language::Python, ParseState::Normal);
assert_eq!(line_type, LineType::Comment);
assert_eq!(state, ParseState::Normal);
}
#[test]
fn test_classify_rust_slash_comment() {
let (line_type, state) =
classify_line("// This is a comment", Language::Rust, ParseState::Normal);
assert_eq!(line_type, LineType::Comment);
assert_eq!(state, ParseState::Normal);
}
#[test]
fn test_classify_blank_line() {
let (line_type, _) = classify_line(" ", Language::Python, ParseState::Normal);
assert_eq!(line_type, LineType::Blank);
}
#[test]
fn test_classify_code_line() {
let (line_type, _) = classify_line("let x = 5;", Language::Rust, ParseState::Normal);
assert_eq!(line_type, LineType::Code);
}
}