use std::path::Path;
use serde::{Deserialize, Serialize};
use tokei::{Config as TokeiConfig, LanguageType, Languages};
use crate::core::config::Config;
use crate::observer::{ObservationMeta, Observer};
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
pub struct LineCounts {
pub code: usize,
pub comments: usize,
pub blanks: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct LanguageStats {
pub name: String,
pub files: usize,
#[serde(flatten)]
pub counts: LineCounts,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
pub struct LocReport {
pub languages: Vec<LanguageStats>,
pub primary: Option<String>,
pub totals: LineCounts,
}
impl LocReport {
#[must_use]
pub fn total_files(&self) -> usize {
self.languages.iter().map(|e| e.files).sum()
}
}
#[derive(Debug, Clone, Default)]
pub struct LocObserver {
pub excluded: Vec<String>,
pub exclude_languages: Vec<String>,
}
impl LocObserver {
#[must_use]
pub fn from_config(cfg: &Config) -> Self {
Self {
excluded: cfg.exclude_lines(),
exclude_languages: Vec::new(),
}
}
pub fn scan(&self, root: &Path) -> LocReport {
let mut languages = Languages::new();
let paths = [root];
let tokei_substrings: Vec<&str> = self
.excluded
.iter()
.filter(|line| is_tokei_substring_safe(line))
.map(String::as_str)
.collect();
languages.get_statistics(&paths, &tokei_substrings, &TokeiConfig::default());
let matcher = crate::observer::walk::ExcludeMatcher::compile(root, &self.excluded)
.expect("exclude patterns validated at config load");
let mut entries = Vec::with_capacity(languages.len());
let mut totals = LineCounts::default();
for (lang_type, lang) in &languages {
let name = lang_type.name().to_string();
if self
.exclude_languages
.iter()
.any(|n| n.eq_ignore_ascii_case(&name))
{
continue;
}
let mut code = 0usize;
let mut comments = 0usize;
let mut blanks = 0usize;
let mut files = 0usize;
for report in &lang.reports {
if matcher.is_excluded(&report.name, false) {
continue;
}
code += report.stats.code;
comments += report.stats.comments;
blanks += report.stats.blanks;
files += 1;
}
if files == 0 {
continue;
}
totals.code += code;
totals.comments += comments;
totals.blanks += blanks;
entries.push(LanguageStats {
name,
files,
counts: LineCounts {
code,
comments,
blanks,
},
});
}
entries.sort_by(|a, b| b.counts.code.cmp(&a.counts.code).then(a.name.cmp(&b.name)));
let primary = entries
.iter()
.find(|e| !is_literate_name(&e.name))
.map(|e| e.name.clone());
LocReport {
languages: entries,
primary,
totals,
}
}
}
fn is_literate_name(name: &str) -> bool {
LanguageType::from_name(name).is_some_and(LanguageType::is_literate)
}
fn is_tokei_substring_safe(line: &str) -> bool {
let trimmed = line.trim();
if trimmed.is_empty() || trimmed.starts_with('#') || trimmed.starts_with('!') {
return false;
}
if trimmed.starts_with('/') {
return false;
}
!trimmed.chars().any(|c| matches!(c, '*' | '?' | '['))
}
impl Observer for LocObserver {
type Output = LocReport;
fn meta(&self) -> ObservationMeta {
ObservationMeta {
name: "loc",
version: 1,
}
}
fn observe(&self, project_root: &Path) -> anyhow::Result<Self::Output> {
Ok(self.scan(project_root))
}
}
#[cfg(test)]
mod tests {
use super::is_tokei_substring_safe;
#[test]
fn substring_safe_accepts_plain_directory_patterns() {
assert!(is_tokei_substring_safe("target/"));
assert!(is_tokei_substring_safe("vendor/"));
assert!(is_tokei_substring_safe("crates/cli/vendor/"));
assert!(is_tokei_substring_safe("foo")); }
#[test]
fn substring_safe_rejects_glob_metacharacters() {
assert!(!is_tokei_substring_safe("*.log"));
assert!(!is_tokei_substring_safe("**/generated/**"));
assert!(!is_tokei_substring_safe("file?.tmp"));
assert!(!is_tokei_substring_safe("[abc]"));
}
#[test]
fn substring_safe_rejects_anchor_negation_comment() {
assert!(!is_tokei_substring_safe("/build")); assert!(!is_tokei_substring_safe("!keep.log")); assert!(!is_tokei_substring_safe("# comment line")); assert!(!is_tokei_substring_safe("")); assert!(!is_tokei_substring_safe(" ")); }
}