use std::collections::{HashMap, HashSet};
use std::path::{Path, PathBuf};
use rayon::prelude::*;
use serde::{Deserialize, Serialize};
use walkdir::WalkDir;
use crate::ast::parser::parse;
use crate::error::TldrError;
use crate::metrics::calculate_all_complexities_file;
use crate::types::Language;
use crate::TldrResult;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HalsteadMetrics {
pub distinct_operators: usize,
pub distinct_operands: usize,
pub total_operators: usize,
pub total_operands: usize,
pub vocabulary: usize,
pub length: usize,
pub calculated_length: f64,
pub volume: f64,
pub difficulty: f64,
pub effort: f64,
pub time_to_program: f64,
pub bugs: f64,
}
impl Default for HalsteadMetrics {
fn default() -> Self {
Self {
distinct_operators: 0,
distinct_operands: 0,
total_operators: 0,
total_operands: 0,
vocabulary: 0,
length: 0,
calculated_length: 0.0,
volume: 1.0, difficulty: 0.0,
effort: 0.0,
time_to_program: 0.0,
bugs: 0.0,
}
}
}
impl HalsteadMetrics {
pub fn calculate(&mut self) {
self.vocabulary = self.distinct_operators + self.distinct_operands;
self.length = self.total_operators + self.total_operands;
self.calculated_length = if self.distinct_operators > 0 && self.distinct_operands > 0 {
self.distinct_operators as f64 * (self.distinct_operators as f64).log2()
+ self.distinct_operands as f64 * (self.distinct_operands as f64).log2()
} else {
0.0
};
self.volume = if self.vocabulary > 0 && self.length > 0 {
self.length as f64 * (self.vocabulary as f64).log2()
} else {
1.0 };
self.difficulty = if self.distinct_operands > 0 {
(self.distinct_operators as f64 / 2.0)
* (self.total_operands as f64 / self.distinct_operands as f64)
} else {
0.0
};
self.effort = self.difficulty * self.volume;
self.time_to_program = self.effort / 18.0;
self.bugs = self.volume / 3000.0;
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FileMI {
pub path: PathBuf,
pub mi: f64,
pub grade: char,
pub loc: usize,
pub avg_complexity: f64,
#[serde(skip_serializing_if = "Option::is_none")]
pub halstead: Option<HalsteadMetrics>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MISummary {
pub average_mi: f64,
pub min_mi: f64,
pub max_mi: f64,
pub files_analyzed: usize,
pub by_grade: HashMap<char, usize>,
pub total_loc: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MaintainabilityReport {
pub files: Vec<FileMI>,
pub summary: MISummary,
}
pub fn maintainability_index(
path: &Path,
include_halstead: bool,
language: Option<Language>,
) -> TldrResult<MaintainabilityReport> {
const MAX_FILE_SIZE: u64 = 500 * 1024;
let file_paths: Vec<PathBuf> = if path.is_file() {
vec![path.to_path_buf()]
} else {
WalkDir::new(path)
.into_iter()
.filter_map(|e| e.ok())
.filter(|e| e.file_type().is_file())
.filter(|e| {
let detected = Language::from_path(e.path());
match (detected, language) {
(Some(d), Some(l)) => d == l,
(Some(_), None) => true,
_ => false,
}
})
.filter(|e| {
e.metadata()
.map(|m| m.len() <= MAX_FILE_SIZE)
.unwrap_or(true)
})
.map(|e| e.path().to_path_buf())
.collect()
};
let files: Vec<FileMI> = file_paths
.par_iter()
.filter_map(|file_path| analyze_file_mi(file_path, include_halstead).ok())
.collect();
let summary = calculate_summary(&files);
Ok(MaintainabilityReport { files, summary })
}
fn analyze_file_mi(path: &Path, include_halstead: bool) -> TldrResult<FileMI> {
let source = std::fs::read_to_string(path)?;
let language = Language::from_path(path).ok_or_else(|| {
TldrError::UnsupportedLanguage(
path.extension()
.and_then(|e| e.to_str())
.unwrap_or("unknown")
.to_string(),
)
})?;
let loc = count_loc(&source, language);
let halstead = if include_halstead {
Some(calculate_halstead(&source, language))
} else {
None
};
let avg_complexity = calculate_avg_complexity(path, language);
let volume = halstead
.as_ref()
.map(|h| h.volume)
.unwrap_or_else(|| estimate_volume(loc));
let mi = calculate_mi(volume, avg_complexity, loc);
let grade = mi_to_grade(mi);
Ok(FileMI {
path: path.to_path_buf(),
mi,
grade,
loc,
avg_complexity,
halstead,
})
}
fn calculate_mi(volume: f64, complexity: f64, loc: usize) -> f64 {
if loc == 0 {
return 100.0; }
let v_ln = if volume > 0.0 { volume.ln() } else { 0.0 };
let loc_ln = (loc as f64).ln();
let raw_mi = 171.0 - 5.2 * v_ln - 0.23 * complexity - 16.2 * loc_ln;
let normalized = (raw_mi * 100.0) / 171.0;
normalized.clamp(0.0, 100.0)
}
fn mi_to_grade(mi: f64) -> char {
if mi > 85.0 {
'A'
} else if mi > 65.0 {
'B'
} else if mi > 45.0 {
'C'
} else if mi > 25.0 {
'D'
} else {
'F'
}
}
fn estimate_volume(loc: usize) -> f64 {
let n = loc * 5;
let vocab = (loc as f64).sqrt().max(1.0);
n as f64 * vocab.log2()
}
fn count_loc(source: &str, language: Language) -> usize {
let comment_prefixes = match language {
Language::Python => vec!["#"],
Language::TypeScript
| Language::JavaScript
| Language::Go
| Language::Rust
| Language::Java
| Language::C
| Language::Cpp
| Language::Kotlin
| Language::Swift
| Language::CSharp
| Language::Scala
| Language::Php => vec!["//", "/*", "*"],
Language::Ruby | Language::Elixir => vec!["#"],
Language::Ocaml => vec!["(*", "*"],
Language::Lua | Language::Luau => vec!["--"],
};
source
.lines()
.filter(|line| {
let trimmed = line.trim();
!trimmed.is_empty() && !comment_prefixes.iter().any(|p| trimmed.starts_with(p))
})
.count()
}
fn calculate_avg_complexity(path: &Path, _language: Language) -> f64 {
if let Ok(complexity_map) = calculate_all_complexities_file(path) {
if complexity_map.is_empty() {
return 1.0; }
let total: u32 = complexity_map.values().map(|m| m.cyclomatic).sum();
total as f64 / complexity_map.len() as f64
} else {
1.0
}
}
fn calculate_halstead(source: &str, language: Language) -> HalsteadMetrics {
let mut operators: HashSet<String> = HashSet::new();
let mut operands: HashSet<String> = HashSet::new();
let mut total_operators = 0usize;
let mut total_operands = 0usize;
let Ok(tree) = parse(source, language) else {
return HalsteadMetrics::default();
};
let mut cursor = tree.root_node().walk();
let mut stack = vec![tree.root_node()];
while let Some(node) = stack.pop() {
let kind = node.kind();
let text = node.utf8_text(source.as_bytes()).unwrap_or("");
if is_operator(kind, language) {
operators.insert(text.to_string());
total_operators += 1;
} else if is_operand(kind, language) {
operands.insert(text.to_string());
total_operands += 1;
}
cursor.reset(node);
if cursor.goto_first_child() {
loop {
stack.push(cursor.node());
if !cursor.goto_next_sibling() {
break;
}
}
}
}
let mut halstead = HalsteadMetrics {
distinct_operators: operators.len(),
distinct_operands: operands.len(),
total_operators,
total_operands,
..Default::default()
};
halstead.calculate();
halstead
}
fn is_operator(kind: &str, _language: Language) -> bool {
matches!(
kind,
"+" | "-"
| "*"
| "/"
| "%"
| "**"
| "//"
| "=="
| "!="
| "<"
| ">"
| "<="
| ">="
| "="
| "+="
| "-="
| "*="
| "/="
| "and"
| "or"
| "not"
| "&&"
| "||"
| "!"
| "if"
| "else"
| "elif"
| "for"
| "while"
| "return"
| "try"
| "except"
| "catch"
| "finally"
| "def"
| "class"
| "function"
| "fn"
| "func"
| "import"
| "from"
| "use"
| "require"
| "("
| ")"
| "["
| "]"
| "{"
| "}"
| "."
| ","
| ":"
| ";"
| "->"
| "binary_operator"
| "unary_operator"
| "comparison_operator"
| "boolean_operator"
| "assignment"
)
}
fn is_operand(kind: &str, _language: Language) -> bool {
matches!(
kind,
"identifier"
| "string"
| "integer"
| "float"
| "number"
| "true"
| "false"
| "none"
| "null"
| "nil"
| "string_literal"
| "integer_literal"
| "float_literal"
| "property_identifier"
| "field_identifier"
)
}
fn calculate_summary(files: &[FileMI]) -> MISummary {
if files.is_empty() {
return MISummary {
average_mi: 0.0,
min_mi: 0.0,
max_mi: 0.0,
files_analyzed: 0,
by_grade: HashMap::new(),
total_loc: 0,
};
}
let total_mi: f64 = files.iter().map(|f| f.mi).sum();
let min_mi = files.iter().map(|f| f.mi).fold(f64::INFINITY, f64::min);
let max_mi = files.iter().map(|f| f.mi).fold(f64::NEG_INFINITY, f64::max);
let total_loc: usize = files.iter().map(|f| f.loc).sum();
let mut by_grade: HashMap<char, usize> = HashMap::new();
for file in files {
*by_grade.entry(file.grade).or_insert(0) += 1;
}
MISummary {
average_mi: total_mi / files.len() as f64,
min_mi,
max_mi,
files_analyzed: files.len(),
by_grade,
total_loc,
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_mi_calculation_simple() {
let mi = calculate_mi(100.0, 2.0, 20);
assert!(mi > 50.0, "Simple code should be maintainable");
}
#[test]
fn test_mi_calculation_complex() {
let mi = calculate_mi(10000.0, 50.0, 1000);
assert!(mi < 50.0, "Complex code should be less maintainable");
}
#[test]
fn test_mi_grades() {
assert_eq!(mi_to_grade(90.0), 'A');
assert_eq!(mi_to_grade(75.0), 'B');
assert_eq!(mi_to_grade(55.0), 'C');
assert_eq!(mi_to_grade(35.0), 'D');
assert_eq!(mi_to_grade(15.0), 'F');
}
#[test]
fn test_halstead_calculation() {
let mut h = HalsteadMetrics {
distinct_operators: 10,
distinct_operands: 20,
total_operators: 50,
total_operands: 100,
..Default::default()
};
h.calculate();
assert_eq!(h.vocabulary, 30);
assert_eq!(h.length, 150);
assert!(h.volume > 0.0);
assert!(h.difficulty > 0.0);
assert!(h.effort > 0.0);
}
#[test]
fn test_count_loc_python() {
let source = r#"
# Comment
def foo():
pass
# Another comment
"#;
let loc = count_loc(source, Language::Python);
assert_eq!(loc, 2); }
#[test]
fn test_empty_file_mi() {
let mi = calculate_mi(1.0, 1.0, 0);
assert_eq!(mi, 100.0);
}
}