Skip to main content

tldr_core/quality/
maintainability.rs

1//! Maintainability Index calculation
2//!
3//! Implements Maintainability Index as per spec Section 2.8.2:
4//! MI = max(0, (171 - 5.2*ln(V) - 0.23*G - 16.2*ln(LOC)) * 100/171)
5//!
6//! Where:
7//! - V = Halstead Volume
8//! - G = Cyclomatic Complexity
9//! - LOC = Lines of Code
10//!
11//! # Grading
12//! - A: MI > 85 (highly maintainable)
13//! - B: 65 < MI <= 85 (moderately maintainable)
14//! - C: 45 < MI <= 65 (difficult to maintain)
15//! - D: 25 < MI <= 45 (very difficult to maintain)
16//! - F: MI <= 25 (unmaintainable)
17//!
18//! # References
19//! - Oman, P. and Hagemeister, J. (1992). "Metrics for Assessing a Software System's Maintainability"
20//! - Microsoft Visual Studio Maintainability Index documentation
21
22use std::collections::{HashMap, HashSet};
23use std::path::{Path, PathBuf};
24
25use rayon::prelude::*;
26use serde::{Deserialize, Serialize};
27use walkdir::WalkDir;
28
29use crate::ast::parser::parse;
30use crate::error::TldrError;
31use crate::metrics::calculate_all_complexities_file;
32use crate::types::Language;
33use crate::TldrResult;
34
35// =============================================================================
36// Types
37// =============================================================================
38
39/// Halstead software metrics
40///
41/// Based on distinct operators/operands and their total occurrences:
42/// - n1 = number of distinct operators
43/// - n2 = number of distinct operands
44/// - N1 = total number of operators
45/// - N2 = total number of operands
46#[derive(Debug, Clone, Serialize, Deserialize)]
47pub struct HalsteadMetrics {
48    /// Number of distinct operators (n1)
49    pub distinct_operators: usize,
50    /// Number of distinct operands (n2)
51    pub distinct_operands: usize,
52    /// Total number of operators (N1)
53    pub total_operators: usize,
54    /// Total number of operands (N2)
55    pub total_operands: usize,
56    /// Program vocabulary: n = n1 + n2
57    pub vocabulary: usize,
58    /// Program length: N = N1 + N2
59    pub length: usize,
60    /// Calculated program length: N^ = n1*log2(n1) + n2*log2(n2)
61    pub calculated_length: f64,
62    /// Volume: V = N * log2(n)
63    pub volume: f64,
64    /// Difficulty: D = (n1/2) * (N2/n2)
65    pub difficulty: f64,
66    /// Effort: E = D * V
67    pub effort: f64,
68    /// Time to program: T = E / 18 seconds
69    pub time_to_program: f64,
70    /// Number of delivered bugs: B = V / 3000
71    pub bugs: f64,
72}
73
74impl Default for HalsteadMetrics {
75    fn default() -> Self {
76        Self {
77            distinct_operators: 0,
78            distinct_operands: 0,
79            total_operators: 0,
80            total_operands: 0,
81            vocabulary: 0,
82            length: 0,
83            calculated_length: 0.0,
84            volume: 1.0, // Avoid log(0) issues
85            difficulty: 0.0,
86            effort: 0.0,
87            time_to_program: 0.0,
88            bugs: 0.0,
89        }
90    }
91}
92
93impl HalsteadMetrics {
94    /// Calculate all derived metrics from counts
95    pub fn calculate(&mut self) {
96        self.vocabulary = self.distinct_operators + self.distinct_operands;
97        self.length = self.total_operators + self.total_operands;
98
99        // Calculated length (Halstead's estimate)
100        self.calculated_length = if self.distinct_operators > 0 && self.distinct_operands > 0 {
101            self.distinct_operators as f64 * (self.distinct_operators as f64).log2()
102                + self.distinct_operands as f64 * (self.distinct_operands as f64).log2()
103        } else {
104            0.0
105        };
106
107        // Volume
108        self.volume = if self.vocabulary > 0 && self.length > 0 {
109            self.length as f64 * (self.vocabulary as f64).log2()
110        } else {
111            1.0 // Minimum volume to avoid log(0)
112        };
113
114        // Difficulty
115        self.difficulty = if self.distinct_operands > 0 {
116            (self.distinct_operators as f64 / 2.0)
117                * (self.total_operands as f64 / self.distinct_operands as f64)
118        } else {
119            0.0
120        };
121
122        // Effort
123        self.effort = self.difficulty * self.volume;
124
125        // Time to program (Halstead's estimate: E/18 seconds)
126        self.time_to_program = self.effort / 18.0;
127
128        // Number of delivered bugs (V/3000)
129        self.bugs = self.volume / 3000.0;
130    }
131}
132
133/// Maintainability Index for a single file
134#[derive(Debug, Clone, Serialize, Deserialize)]
135pub struct FileMI {
136    /// File path (relative to project root)
137    pub path: PathBuf,
138    /// Maintainability Index (0-100)
139    pub mi: f64,
140    /// Letter grade (A-F)
141    pub grade: char,
142    /// Lines of code
143    pub loc: usize,
144    /// Average cyclomatic complexity
145    pub avg_complexity: f64,
146    /// Halstead metrics (optional)
147    #[serde(skip_serializing_if = "Option::is_none")]
148    pub halstead: Option<HalsteadMetrics>,
149}
150
151/// Summary of maintainability across files
152#[derive(Debug, Clone, Serialize, Deserialize)]
153pub struct MISummary {
154    /// Average MI across all files
155    pub average_mi: f64,
156    /// Minimum MI found
157    pub min_mi: f64,
158    /// Maximum MI found
159    pub max_mi: f64,
160    /// Number of files analyzed
161    pub files_analyzed: usize,
162    /// Distribution by grade
163    pub by_grade: HashMap<char, usize>,
164    /// Total lines of code
165    pub total_loc: usize,
166}
167
168/// Report from maintainability analysis
169#[derive(Debug, Clone, Serialize, Deserialize)]
170pub struct MaintainabilityReport {
171    /// Per-file maintainability metrics
172    pub files: Vec<FileMI>,
173    /// Summary statistics
174    pub summary: MISummary,
175}
176
177// =============================================================================
178// Main API
179// =============================================================================
180
181/// Calculate Maintainability Index for a file or directory
182///
183/// Uses the Microsoft formula:
184/// MI = max(0, (171 - 5.2*ln(V) - 0.23*G - 16.2*ln(LOC)) * 100/171)
185///
186/// # Arguments
187/// * `path` - File or directory to analyze
188/// * `include_halstead` - Whether to include detailed Halstead metrics
189/// * `language` - Optional language filter
190///
191/// # Returns
192/// * `Ok(MaintainabilityReport)` - Report with MI scores
193/// * `Err(TldrError)` - On file system or parse errors
194///
195/// # Example
196/// ```ignore
197/// use tldr_core::quality::maintainability::maintainability_index;
198///
199/// let report = maintainability_index(Path::new("src/"), true, None)?;
200/// println!("Average MI: {:.1}", report.summary.average_mi);
201/// ```
202pub fn maintainability_index(
203    path: &Path,
204    include_halstead: bool,
205    language: Option<Language>,
206) -> TldrResult<MaintainabilityReport> {
207    // Max file size to analyze (500KB) - skip minified/generated files
208    const MAX_FILE_SIZE: u64 = 500 * 1024;
209
210    // Collect files to analyze
211    let file_paths: Vec<PathBuf> = if path.is_file() {
212        vec![path.to_path_buf()]
213    } else {
214        WalkDir::new(path)
215            .into_iter()
216            .filter_map(|e| e.ok())
217            .filter(|e| e.file_type().is_file())
218            .filter(|e| {
219                let detected = Language::from_path(e.path());
220                match (detected, language) {
221                    (Some(d), Some(l)) => d == l,
222                    (Some(_), None) => true,
223                    _ => false,
224                }
225            })
226            .filter(|e| {
227                e.metadata()
228                    .map(|m| m.len() <= MAX_FILE_SIZE)
229                    .unwrap_or(true)
230            })
231            .map(|e| e.path().to_path_buf())
232            .collect()
233    };
234
235    // Analyze files in parallel using rayon
236    let files: Vec<FileMI> = file_paths
237        .par_iter()
238        .filter_map(|file_path| analyze_file_mi(file_path, include_halstead).ok())
239        .collect();
240
241    // Calculate summary
242    let summary = calculate_summary(&files);
243
244    Ok(MaintainabilityReport { files, summary })
245}
246
247// =============================================================================
248// Internal Implementation
249// =============================================================================
250
251/// Analyze maintainability of a single file
252fn analyze_file_mi(path: &Path, include_halstead: bool) -> TldrResult<FileMI> {
253    // Read and parse file
254    let source = std::fs::read_to_string(path)?;
255    let language = Language::from_path(path).ok_or_else(|| {
256        TldrError::UnsupportedLanguage(
257            path.extension()
258                .and_then(|e| e.to_str())
259                .unwrap_or("unknown")
260                .to_string(),
261        )
262    })?;
263
264    // Count lines of code (non-blank, non-comment)
265    let loc = count_loc(&source, language);
266
267    // Calculate Halstead metrics
268    let halstead = if include_halstead {
269        Some(calculate_halstead(&source, language))
270    } else {
271        None
272    };
273
274    // Get average cyclomatic complexity
275    let avg_complexity = calculate_avg_complexity(path, language);
276
277    // Calculate MI using Halstead volume or estimate
278    let volume = halstead
279        .as_ref()
280        .map(|h| h.volume)
281        .unwrap_or_else(|| estimate_volume(loc));
282
283    let mi = calculate_mi(volume, avg_complexity, loc);
284    let grade = mi_to_grade(mi);
285
286    Ok(FileMI {
287        path: path.to_path_buf(),
288        mi,
289        grade,
290        loc,
291        avg_complexity,
292        halstead,
293    })
294}
295
296/// Calculate Maintainability Index using the Microsoft formula
297/// MI = max(0, (171 - 5.2*ln(V) - 0.23*G - 16.2*ln(LOC)) * 100/171)
298fn calculate_mi(volume: f64, complexity: f64, loc: usize) -> f64 {
299    if loc == 0 {
300        return 100.0; // Empty file is perfectly maintainable
301    }
302
303    let v_ln = if volume > 0.0 { volume.ln() } else { 0.0 };
304    let loc_ln = (loc as f64).ln();
305
306    let raw_mi = 171.0 - 5.2 * v_ln - 0.23 * complexity - 16.2 * loc_ln;
307    let normalized = (raw_mi * 100.0) / 171.0;
308
309    normalized.clamp(0.0, 100.0)
310}
311
312/// Convert MI to letter grade
313fn mi_to_grade(mi: f64) -> char {
314    if mi > 85.0 {
315        'A'
316    } else if mi > 65.0 {
317        'B'
318    } else if mi > 45.0 {
319        'C'
320    } else if mi > 25.0 {
321        'D'
322    } else {
323        'F'
324    }
325}
326
327/// Estimate volume when Halstead metrics not computed
328fn estimate_volume(loc: usize) -> f64 {
329    // Rough estimate: average of 5 tokens per line, vocabulary of sqrt(loc)
330    let n = loc * 5;
331    let vocab = (loc as f64).sqrt().max(1.0);
332    n as f64 * vocab.log2()
333}
334
335/// Count non-blank, non-comment lines of code
336fn count_loc(source: &str, language: Language) -> usize {
337    let comment_prefixes = match language {
338        Language::Python => vec!["#"],
339        Language::TypeScript
340        | Language::JavaScript
341        | Language::Go
342        | Language::Rust
343        | Language::Java
344        | Language::C
345        | Language::Cpp
346        | Language::Kotlin
347        | Language::Swift
348        | Language::CSharp
349        | Language::Scala
350        | Language::Php => vec!["//", "/*", "*"],
351        Language::Ruby | Language::Elixir => vec!["#"],
352        Language::Ocaml => vec!["(*", "*"],
353        Language::Lua | Language::Luau => vec!["--"],
354    };
355
356    source
357        .lines()
358        .filter(|line| {
359            let trimmed = line.trim();
360            !trimmed.is_empty() && !comment_prefixes.iter().any(|p| trimmed.starts_with(p))
361        })
362        .count()
363}
364
365/// Calculate average cyclomatic complexity for all functions in file.
366///
367/// Uses batch complexity calculation to parse the file only once,
368/// instead of re-parsing per function (10-25x faster on large files).
369fn calculate_avg_complexity(path: &Path, _language: Language) -> f64 {
370    if let Ok(complexity_map) = calculate_all_complexities_file(path) {
371        if complexity_map.is_empty() {
372            return 1.0; // Default complexity for files with no functions
373        }
374        let total: u32 = complexity_map.values().map(|m| m.cyclomatic).sum();
375        total as f64 / complexity_map.len() as f64
376    } else {
377        1.0
378    }
379}
380
381/// Calculate Halstead metrics for a source file
382fn calculate_halstead(source: &str, language: Language) -> HalsteadMetrics {
383    let mut operators: HashSet<String> = HashSet::new();
384    let mut operands: HashSet<String> = HashSet::new();
385    let mut total_operators = 0usize;
386    let mut total_operands = 0usize;
387
388    // Parse with tree-sitter
389    let Ok(tree) = parse(source, language) else {
390        return HalsteadMetrics::default();
391    };
392
393    // Walk the tree and categorize tokens
394    let mut cursor = tree.root_node().walk();
395    let mut stack = vec![tree.root_node()];
396
397    while let Some(node) = stack.pop() {
398        let kind = node.kind();
399        let text = node.utf8_text(source.as_bytes()).unwrap_or("");
400
401        // Classify as operator or operand based on node kind
402        if is_operator(kind, language) {
403            operators.insert(text.to_string());
404            total_operators += 1;
405        } else if is_operand(kind, language) {
406            operands.insert(text.to_string());
407            total_operands += 1;
408        }
409
410        // Push children
411        cursor.reset(node);
412        if cursor.goto_first_child() {
413            loop {
414                stack.push(cursor.node());
415                if !cursor.goto_next_sibling() {
416                    break;
417                }
418            }
419        }
420    }
421
422    let mut halstead = HalsteadMetrics {
423        distinct_operators: operators.len(),
424        distinct_operands: operands.len(),
425        total_operators,
426        total_operands,
427        ..Default::default()
428    };
429
430    halstead.calculate();
431    halstead
432}
433
434/// Check if a node kind represents an operator
435fn is_operator(kind: &str, _language: Language) -> bool {
436    matches!(
437        kind,
438        "+" | "-"
439            | "*"
440            | "/"
441            | "%"
442            | "**"
443            | "//"
444            | "=="
445            | "!="
446            | "<"
447            | ">"
448            | "<="
449            | ">="
450            | "="
451            | "+="
452            | "-="
453            | "*="
454            | "/="
455            | "and"
456            | "or"
457            | "not"
458            | "&&"
459            | "||"
460            | "!"
461            | "if"
462            | "else"
463            | "elif"
464            | "for"
465            | "while"
466            | "return"
467            | "try"
468            | "except"
469            | "catch"
470            | "finally"
471            | "def"
472            | "class"
473            | "function"
474            | "fn"
475            | "func"
476            | "import"
477            | "from"
478            | "use"
479            | "require"
480            | "("
481            | ")"
482            | "["
483            | "]"
484            | "{"
485            | "}"
486            | "."
487            | ","
488            | ":"
489            | ";"
490            | "->"
491            | "binary_operator"
492            | "unary_operator"
493            | "comparison_operator"
494            | "boolean_operator"
495            | "assignment"
496    )
497}
498
499/// Check if a node kind represents an operand
500fn is_operand(kind: &str, _language: Language) -> bool {
501    matches!(
502        kind,
503        "identifier"
504            | "string"
505            | "integer"
506            | "float"
507            | "number"
508            | "true"
509            | "false"
510            | "none"
511            | "null"
512            | "nil"
513            | "string_literal"
514            | "integer_literal"
515            | "float_literal"
516            | "property_identifier"
517            | "field_identifier"
518    )
519}
520
521/// Calculate summary statistics
522fn calculate_summary(files: &[FileMI]) -> MISummary {
523    if files.is_empty() {
524        return MISummary {
525            average_mi: 0.0,
526            min_mi: 0.0,
527            max_mi: 0.0,
528            files_analyzed: 0,
529            by_grade: HashMap::new(),
530            total_loc: 0,
531        };
532    }
533
534    let total_mi: f64 = files.iter().map(|f| f.mi).sum();
535    let min_mi = files.iter().map(|f| f.mi).fold(f64::INFINITY, f64::min);
536    let max_mi = files.iter().map(|f| f.mi).fold(f64::NEG_INFINITY, f64::max);
537    let total_loc: usize = files.iter().map(|f| f.loc).sum();
538
539    let mut by_grade: HashMap<char, usize> = HashMap::new();
540    for file in files {
541        *by_grade.entry(file.grade).or_insert(0) += 1;
542    }
543
544    MISummary {
545        average_mi: total_mi / files.len() as f64,
546        min_mi,
547        max_mi,
548        files_analyzed: files.len(),
549        by_grade,
550        total_loc,
551    }
552}
553
554#[cfg(test)]
555mod tests {
556    use super::*;
557
558    #[test]
559    fn test_mi_calculation_simple() {
560        // Simple file: low volume, low complexity, few lines
561        let mi = calculate_mi(100.0, 2.0, 20);
562        assert!(mi > 50.0, "Simple code should be maintainable");
563    }
564
565    #[test]
566    fn test_mi_calculation_complex() {
567        // Complex file: high volume, high complexity, many lines
568        let mi = calculate_mi(10000.0, 50.0, 1000);
569        assert!(mi < 50.0, "Complex code should be less maintainable");
570    }
571
572    #[test]
573    fn test_mi_grades() {
574        assert_eq!(mi_to_grade(90.0), 'A');
575        assert_eq!(mi_to_grade(75.0), 'B');
576        assert_eq!(mi_to_grade(55.0), 'C');
577        assert_eq!(mi_to_grade(35.0), 'D');
578        assert_eq!(mi_to_grade(15.0), 'F');
579    }
580
581    #[test]
582    fn test_halstead_calculation() {
583        let mut h = HalsteadMetrics {
584            distinct_operators: 10,
585            distinct_operands: 20,
586            total_operators: 50,
587            total_operands: 100,
588            ..Default::default()
589        };
590        h.calculate();
591
592        assert_eq!(h.vocabulary, 30);
593        assert_eq!(h.length, 150);
594        assert!(h.volume > 0.0);
595        assert!(h.difficulty > 0.0);
596        assert!(h.effort > 0.0);
597    }
598
599    #[test]
600    fn test_count_loc_python() {
601        let source = r#"
602# Comment
603def foo():
604    pass
605
606# Another comment
607"#;
608        let loc = count_loc(source, Language::Python);
609        assert_eq!(loc, 2); // def foo(): and pass
610    }
611
612    #[test]
613    fn test_empty_file_mi() {
614        let mi = calculate_mi(1.0, 1.0, 0);
615        assert_eq!(mi, 100.0);
616    }
617}