1use std::collections::{HashMap, HashSet};
23use std::path::{Path, PathBuf};
24
25use rayon::prelude::*;
26use serde::{Deserialize, Serialize};
27use walkdir::WalkDir;
28
29use crate::ast::parser::parse;
30use crate::error::TldrError;
31use crate::metrics::calculate_all_complexities_file;
32use crate::types::Language;
33use crate::TldrResult;
34
35#[derive(Debug, Clone, Serialize, Deserialize)]
47pub struct HalsteadMetrics {
48 pub distinct_operators: usize,
50 pub distinct_operands: usize,
52 pub total_operators: usize,
54 pub total_operands: usize,
56 pub vocabulary: usize,
58 pub length: usize,
60 pub calculated_length: f64,
62 pub volume: f64,
64 pub difficulty: f64,
66 pub effort: f64,
68 pub time_to_program: f64,
70 pub bugs: f64,
72}
73
74impl Default for HalsteadMetrics {
75 fn default() -> Self {
76 Self {
77 distinct_operators: 0,
78 distinct_operands: 0,
79 total_operators: 0,
80 total_operands: 0,
81 vocabulary: 0,
82 length: 0,
83 calculated_length: 0.0,
84 volume: 1.0, difficulty: 0.0,
86 effort: 0.0,
87 time_to_program: 0.0,
88 bugs: 0.0,
89 }
90 }
91}
92
93impl HalsteadMetrics {
94 pub fn calculate(&mut self) {
96 self.vocabulary = self.distinct_operators + self.distinct_operands;
97 self.length = self.total_operators + self.total_operands;
98
99 self.calculated_length = if self.distinct_operators > 0 && self.distinct_operands > 0 {
101 self.distinct_operators as f64 * (self.distinct_operators as f64).log2()
102 + self.distinct_operands as f64 * (self.distinct_operands as f64).log2()
103 } else {
104 0.0
105 };
106
107 self.volume = if self.vocabulary > 0 && self.length > 0 {
109 self.length as f64 * (self.vocabulary as f64).log2()
110 } else {
111 1.0 };
113
114 self.difficulty = if self.distinct_operands > 0 {
116 (self.distinct_operators as f64 / 2.0)
117 * (self.total_operands as f64 / self.distinct_operands as f64)
118 } else {
119 0.0
120 };
121
122 self.effort = self.difficulty * self.volume;
124
125 self.time_to_program = self.effort / 18.0;
127
128 self.bugs = self.volume / 3000.0;
130 }
131}
132
133#[derive(Debug, Clone, Serialize, Deserialize)]
135pub struct FileMI {
136 pub path: PathBuf,
138 pub mi: f64,
140 pub grade: char,
142 pub loc: usize,
144 pub avg_complexity: f64,
146 #[serde(skip_serializing_if = "Option::is_none")]
148 pub halstead: Option<HalsteadMetrics>,
149}
150
151#[derive(Debug, Clone, Serialize, Deserialize)]
153pub struct MISummary {
154 pub average_mi: f64,
156 pub min_mi: f64,
158 pub max_mi: f64,
160 pub files_analyzed: usize,
162 pub by_grade: HashMap<char, usize>,
164 pub total_loc: usize,
166}
167
168#[derive(Debug, Clone, Serialize, Deserialize)]
170pub struct MaintainabilityReport {
171 pub files: Vec<FileMI>,
173 pub summary: MISummary,
175}
176
177pub fn maintainability_index(
203 path: &Path,
204 include_halstead: bool,
205 language: Option<Language>,
206) -> TldrResult<MaintainabilityReport> {
207 const MAX_FILE_SIZE: u64 = 500 * 1024;
209
210 let file_paths: Vec<PathBuf> = if path.is_file() {
212 vec![path.to_path_buf()]
213 } else {
214 WalkDir::new(path)
215 .into_iter()
216 .filter_map(|e| e.ok())
217 .filter(|e| e.file_type().is_file())
218 .filter(|e| {
219 let detected = Language::from_path(e.path());
220 match (detected, language) {
221 (Some(d), Some(l)) => d == l,
222 (Some(_), None) => true,
223 _ => false,
224 }
225 })
226 .filter(|e| {
227 e.metadata()
228 .map(|m| m.len() <= MAX_FILE_SIZE)
229 .unwrap_or(true)
230 })
231 .map(|e| e.path().to_path_buf())
232 .collect()
233 };
234
235 let files: Vec<FileMI> = file_paths
237 .par_iter()
238 .filter_map(|file_path| analyze_file_mi(file_path, include_halstead).ok())
239 .collect();
240
241 let summary = calculate_summary(&files);
243
244 Ok(MaintainabilityReport { files, summary })
245}
246
247fn analyze_file_mi(path: &Path, include_halstead: bool) -> TldrResult<FileMI> {
253 let source = std::fs::read_to_string(path)?;
255 let language = Language::from_path(path).ok_or_else(|| {
256 TldrError::UnsupportedLanguage(
257 path.extension()
258 .and_then(|e| e.to_str())
259 .unwrap_or("unknown")
260 .to_string(),
261 )
262 })?;
263
264 let loc = count_loc(&source, language);
266
267 let halstead = if include_halstead {
269 Some(calculate_halstead(&source, language))
270 } else {
271 None
272 };
273
274 let avg_complexity = calculate_avg_complexity(path, language);
276
277 let volume = halstead
279 .as_ref()
280 .map(|h| h.volume)
281 .unwrap_or_else(|| estimate_volume(loc));
282
283 let mi = calculate_mi(volume, avg_complexity, loc);
284 let grade = mi_to_grade(mi);
285
286 Ok(FileMI {
287 path: path.to_path_buf(),
288 mi,
289 grade,
290 loc,
291 avg_complexity,
292 halstead,
293 })
294}
295
296fn calculate_mi(volume: f64, complexity: f64, loc: usize) -> f64 {
299 if loc == 0 {
300 return 100.0; }
302
303 let v_ln = if volume > 0.0 { volume.ln() } else { 0.0 };
304 let loc_ln = (loc as f64).ln();
305
306 let raw_mi = 171.0 - 5.2 * v_ln - 0.23 * complexity - 16.2 * loc_ln;
307 let normalized = (raw_mi * 100.0) / 171.0;
308
309 normalized.clamp(0.0, 100.0)
310}
311
312fn mi_to_grade(mi: f64) -> char {
314 if mi > 85.0 {
315 'A'
316 } else if mi > 65.0 {
317 'B'
318 } else if mi > 45.0 {
319 'C'
320 } else if mi > 25.0 {
321 'D'
322 } else {
323 'F'
324 }
325}
326
327fn estimate_volume(loc: usize) -> f64 {
329 let n = loc * 5;
331 let vocab = (loc as f64).sqrt().max(1.0);
332 n as f64 * vocab.log2()
333}
334
335fn count_loc(source: &str, language: Language) -> usize {
337 let comment_prefixes = match language {
338 Language::Python => vec!["#"],
339 Language::TypeScript
340 | Language::JavaScript
341 | Language::Go
342 | Language::Rust
343 | Language::Java
344 | Language::C
345 | Language::Cpp
346 | Language::Kotlin
347 | Language::Swift
348 | Language::CSharp
349 | Language::Scala
350 | Language::Php => vec!["//", "/*", "*"],
351 Language::Ruby | Language::Elixir => vec!["#"],
352 Language::Ocaml => vec!["(*", "*"],
353 Language::Lua | Language::Luau => vec!["--"],
354 };
355
356 source
357 .lines()
358 .filter(|line| {
359 let trimmed = line.trim();
360 !trimmed.is_empty() && !comment_prefixes.iter().any(|p| trimmed.starts_with(p))
361 })
362 .count()
363}
364
365fn calculate_avg_complexity(path: &Path, _language: Language) -> f64 {
370 if let Ok(complexity_map) = calculate_all_complexities_file(path) {
371 if complexity_map.is_empty() {
372 return 1.0; }
374 let total: u32 = complexity_map.values().map(|m| m.cyclomatic).sum();
375 total as f64 / complexity_map.len() as f64
376 } else {
377 1.0
378 }
379}
380
381fn calculate_halstead(source: &str, language: Language) -> HalsteadMetrics {
383 let mut operators: HashSet<String> = HashSet::new();
384 let mut operands: HashSet<String> = HashSet::new();
385 let mut total_operators = 0usize;
386 let mut total_operands = 0usize;
387
388 let Ok(tree) = parse(source, language) else {
390 return HalsteadMetrics::default();
391 };
392
393 let mut cursor = tree.root_node().walk();
395 let mut stack = vec![tree.root_node()];
396
397 while let Some(node) = stack.pop() {
398 let kind = node.kind();
399 let text = node.utf8_text(source.as_bytes()).unwrap_or("");
400
401 if is_operator(kind, language) {
403 operators.insert(text.to_string());
404 total_operators += 1;
405 } else if is_operand(kind, language) {
406 operands.insert(text.to_string());
407 total_operands += 1;
408 }
409
410 cursor.reset(node);
412 if cursor.goto_first_child() {
413 loop {
414 stack.push(cursor.node());
415 if !cursor.goto_next_sibling() {
416 break;
417 }
418 }
419 }
420 }
421
422 let mut halstead = HalsteadMetrics {
423 distinct_operators: operators.len(),
424 distinct_operands: operands.len(),
425 total_operators,
426 total_operands,
427 ..Default::default()
428 };
429
430 halstead.calculate();
431 halstead
432}
433
434fn is_operator(kind: &str, _language: Language) -> bool {
436 matches!(
437 kind,
438 "+" | "-"
439 | "*"
440 | "/"
441 | "%"
442 | "**"
443 | "//"
444 | "=="
445 | "!="
446 | "<"
447 | ">"
448 | "<="
449 | ">="
450 | "="
451 | "+="
452 | "-="
453 | "*="
454 | "/="
455 | "and"
456 | "or"
457 | "not"
458 | "&&"
459 | "||"
460 | "!"
461 | "if"
462 | "else"
463 | "elif"
464 | "for"
465 | "while"
466 | "return"
467 | "try"
468 | "except"
469 | "catch"
470 | "finally"
471 | "def"
472 | "class"
473 | "function"
474 | "fn"
475 | "func"
476 | "import"
477 | "from"
478 | "use"
479 | "require"
480 | "("
481 | ")"
482 | "["
483 | "]"
484 | "{"
485 | "}"
486 | "."
487 | ","
488 | ":"
489 | ";"
490 | "->"
491 | "binary_operator"
492 | "unary_operator"
493 | "comparison_operator"
494 | "boolean_operator"
495 | "assignment"
496 )
497}
498
499fn is_operand(kind: &str, _language: Language) -> bool {
501 matches!(
502 kind,
503 "identifier"
504 | "string"
505 | "integer"
506 | "float"
507 | "number"
508 | "true"
509 | "false"
510 | "none"
511 | "null"
512 | "nil"
513 | "string_literal"
514 | "integer_literal"
515 | "float_literal"
516 | "property_identifier"
517 | "field_identifier"
518 )
519}
520
521fn calculate_summary(files: &[FileMI]) -> MISummary {
523 if files.is_empty() {
524 return MISummary {
525 average_mi: 0.0,
526 min_mi: 0.0,
527 max_mi: 0.0,
528 files_analyzed: 0,
529 by_grade: HashMap::new(),
530 total_loc: 0,
531 };
532 }
533
534 let total_mi: f64 = files.iter().map(|f| f.mi).sum();
535 let min_mi = files.iter().map(|f| f.mi).fold(f64::INFINITY, f64::min);
536 let max_mi = files.iter().map(|f| f.mi).fold(f64::NEG_INFINITY, f64::max);
537 let total_loc: usize = files.iter().map(|f| f.loc).sum();
538
539 let mut by_grade: HashMap<char, usize> = HashMap::new();
540 for file in files {
541 *by_grade.entry(file.grade).or_insert(0) += 1;
542 }
543
544 MISummary {
545 average_mi: total_mi / files.len() as f64,
546 min_mi,
547 max_mi,
548 files_analyzed: files.len(),
549 by_grade,
550 total_loc,
551 }
552}
553
554#[cfg(test)]
555mod tests {
556 use super::*;
557
558 #[test]
559 fn test_mi_calculation_simple() {
560 let mi = calculate_mi(100.0, 2.0, 20);
562 assert!(mi > 50.0, "Simple code should be maintainable");
563 }
564
565 #[test]
566 fn test_mi_calculation_complex() {
567 let mi = calculate_mi(10000.0, 50.0, 1000);
569 assert!(mi < 50.0, "Complex code should be less maintainable");
570 }
571
572 #[test]
573 fn test_mi_grades() {
574 assert_eq!(mi_to_grade(90.0), 'A');
575 assert_eq!(mi_to_grade(75.0), 'B');
576 assert_eq!(mi_to_grade(55.0), 'C');
577 assert_eq!(mi_to_grade(35.0), 'D');
578 assert_eq!(mi_to_grade(15.0), 'F');
579 }
580
581 #[test]
582 fn test_halstead_calculation() {
583 let mut h = HalsteadMetrics {
584 distinct_operators: 10,
585 distinct_operands: 20,
586 total_operators: 50,
587 total_operands: 100,
588 ..Default::default()
589 };
590 h.calculate();
591
592 assert_eq!(h.vocabulary, 30);
593 assert_eq!(h.length, 150);
594 assert!(h.volume > 0.0);
595 assert!(h.difficulty > 0.0);
596 assert!(h.effort > 0.0);
597 }
598
599 #[test]
600 fn test_count_loc_python() {
601 let source = r#"
602# Comment
603def foo():
604 pass
605
606# Another comment
607"#;
608 let loc = count_loc(source, Language::Python);
609 assert_eq!(loc, 2); }
611
612 #[test]
613 fn test_empty_file_mi() {
614 let mi = calculate_mi(1.0, 1.0, 0);
615 assert_eq!(mi, 100.0);
616 }
617}