big_code_analysis/metrics/
mi.rs

1// Per-language metric and AST modules deliberately consume the macro-
2// generated tree-sitter token enums via `use crate::*` and `use Foo::*`
3// inside match expressions — explicit imports would list dozens of
4// variants per arm and obscure the per-language token sets that are the
5// point of these files. Allowed at the module level rather than per
6// function so the per-language impl blocks stay readable.
7#![allow(clippy::enum_glob_use, clippy::unused_self, clippy::wildcard_imports)]
8// `u64` integral metric accessors (Halstead length/vocabulary, cyclomatic
9// sum, sloc/cloc) are widened to `f64` for the MI formulas; the casts are
10// bounded by the counts they came from (#530).
11#![allow(clippy::cast_precision_loss)]
12
13use std::fmt;
14
15use super::cyclomatic;
16use super::halstead;
17use super::loc;
18
19use crate::checker::Checker;
20use crate::macros::implement_metric_trait;
21
22use crate::*;
23
24/// The `Mi` metric.
25#[derive(Default, Clone, Debug, PartialEq)]
26#[non_exhaustive]
27pub struct Stats {
28    halstead_length: f64,
29    halstead_vocabulary: f64,
30    halstead_volume: f64,
31    cyclomatic: f64,
32    sloc: f64,
33    /// Comment lines as a percentage in [0, 100] (not a ratio in [0, 1]).
34    /// Only `sei` consumes this — the SEI MI formula uses `perCM` on
35    /// the percentage scale; see issue #241.
36    comments_percentage: f64,
37}
38
39impl fmt::Display for Stats {
40    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
41        write!(
42            f,
43            "original: {}, sei: {}, visual_studio: {}",
44            self.original(),
45            self.sei(),
46            self.visual_studio()
47        )
48    }
49}
50
51impl Stats {
52    // Intentionally a no-op. MI is a derived metric: the parent space
53    // recomputes it from its merged Loc / Cyclomatic / Halstead inputs
54    // (`compute_halstead_mi_and_wmc` in `spaces.rs`), so there is nothing
55    // to roll up from a child's finalized `Stats`. Combining the fields
56    // here would double-apply inputs already captured by the parent's
57    // recompute. (Same rationale as `halstead::Stats::merge`.)
58    pub(crate) fn merge(&mut self, _other: &Stats) {}
59
60    #[inline]
61    fn inputs_are_empty(&self) -> bool {
62        self.halstead_volume <= 0.0 || self.sloc <= 0.0
63    }
64
65    /// Returns the `Mi` metric calculated using the original formula.
66    ///
67    /// Its value can be negative.
68    #[inline]
69    #[must_use]
70    pub fn original(&self) -> f64 {
71        if self.inputs_are_empty() {
72            return 0.0;
73        }
74        // http://www.projectcodemeter.com/cost_estimation/help/GL_maintainability.htm
75        171.0 - 5.2 * (self.halstead_volume).ln() - 0.23 * self.cyclomatic - 16.2 * self.sloc.ln()
76    }
77
78    /// Returns the `Mi` metric calculated using the derivative formula
79    /// employed by the Software Engineering Insitute (SEI).
80    ///
81    /// Its value can be negative.
82    #[inline]
83    #[must_use]
84    pub fn sei(&self) -> f64 {
85        if self.inputs_are_empty() {
86            return 0.0;
87        }
88        // http://www.projectcodemeter.com/cost_estimation/help/GL_maintainability.htm
89        171.0 - 5.2 * self.halstead_volume.log2() - 0.23 * self.cyclomatic - 16.2 * self.sloc.log2()
90            + 50.0 * (self.comments_percentage * 2.4).sqrt().sin()
91    }
92
93    /// Returns the `Mi` metric calculated using the derivative formula
94    /// employed by Microsoft Visual Studio.
95    #[inline]
96    #[must_use]
97    pub fn visual_studio(&self) -> f64 {
98        if self.inputs_are_empty() {
99            return 0.0;
100        }
101        // http://www.projectcodemeter.com/cost_estimation/help/GL_maintainability.htm
102        let formula = 171.0
103            - 5.2 * self.halstead_volume.ln()
104            - 0.23 * self.cyclomatic
105            - 16.2 * self.sloc.ln();
106        (formula * 100.0 / 171.0).max(0.)
107    }
108}
109
110#[doc(hidden)]
111/// Per-language computation of the maintainability index.
112pub(crate) trait Mi
113where
114    Self: Checker,
115{
116    /// Walk `node` and update `stats` with this metric for the language
117    /// implementing the trait.
118    fn compute(
119        loc: &loc::Stats,
120        cyclomatic: &cyclomatic::Stats,
121        halstead: &halstead::Stats,
122        stats: &mut Stats,
123    ) {
124        stats.halstead_length = halstead.length() as f64;
125        stats.halstead_vocabulary = halstead.vocabulary() as f64;
126        stats.halstead_volume = halstead.volume();
127        stats.cyclomatic = cyclomatic.cyclomatic_sum() as f64;
128        stats.sloc = loc.sloc() as f64;
129        // The SEI Maintainability Index expects `perCM` as a percentage
130        // in [0, 100], not a ratio in [0, 1] — `50·sin(√(2.4·CM))` is
131        // nonsensical when CM is two orders of magnitude too small. See
132        // issue #241 and Welker/Oman's original MI definition.
133        stats.comments_percentage = if stats.sloc == 0.0 {
134            0.0
135        } else {
136            // Clamp to [0, 100]: a comment ratio is a percentage of
137            // source lines and cannot exceed 100%. The SEI term
138            // `50·sin(√(2.4·CM))` has no clamp of its own, so an
139            // out-of-range CM (e.g. cloc > sloc) would distort
140            // `sei` by tens of points (issue #461).
141            (loc.cloc() as f64 / stats.sloc * 100.0).clamp(0.0, 100.0)
142        };
143    }
144}
145
146// `Mi` uses the bracketed `[Trait]` arm: this expands to a bare
147// `impl Mi for X {}` which inherits `Mi::compute`'s default trait
148// method body. The default method is fully language-neutral — it
149// combines already-computed Halstead / Cyclomatic / Loc stats into
150// the three MI variants — so this list is NOT a no-op like the named-
151// arm matrices for Abc / Npa / Npm / Wmc. Audited in #188.
152implement_metric_trait!(
153    [Mi],
154    PythonCode,
155    MozjsCode,
156    JavascriptCode,
157    TypescriptCode,
158    TsxCode,
159    RustCode,
160    CppCode,
161    MozcppCode,
162    CCode,
163    ObjcCode,
164    PreprocCode,
165    CcommentCode,
166    JavaCode,
167    KotlinCode,
168    GoCode,
169    PerlCode,
170    BashCode,
171    LuaCode,
172    TclCode,
173    PhpCode,
174    CsharpCode,
175    ElixirCode,
176    RubyCode,
177    GroovyCode,
178    IrulesCode
179);
180
181#[cfg(test)]
182#[allow(
183    clippy::float_cmp,
184    clippy::cast_precision_loss,
185    clippy::cast_possible_truncation,
186    clippy::cast_sign_loss,
187    clippy::similar_names,
188    clippy::doc_markdown,
189    clippy::needless_raw_string_hashes,
190    clippy::too_many_lines
191)]
192mod tests {
193    use crate::tools::check_metrics;
194
195    use super::*;
196
197    #[test]
198    fn mi_empty_file() {
199        check_metrics::<PythonParser>("", "empty.py", |metric| {
200            let mi = &metric.mi;
201            assert_eq!(mi.original(), 0.0);
202            assert_eq!(mi.sei(), 0.0);
203            assert_eq!(mi.visual_studio(), 0.0);
204        });
205    }
206
207    #[test]
208    fn check_mi_metrics() {
209        // This test checks that MI metric is computed correctly, so it verifies
210        // the calculations are correct, the adopted source code is irrelevant
211        check_metrics::<PythonParser>(
212            "def f():
213                 pass",
214            "foo.py",
215            |metric| {
216                insta::assert_json_snapshot!(
217                    metric.mi,
218                    @r#"
219                {
220                  "original": 151.2033158832232,
221                  "sei": 142.64306171748976,
222                  "visual_studio": 88.42299174457497
223                }
224                "#
225                );
226            },
227        );
228    }
229
230    #[test]
231    fn mi_sei_uses_comments_as_percentage() {
232        // Regression test for #241. `Stats::comments_percentage` is stored
233        // as a percentage in [0, 100], so `sei` plugs it directly into
234        // `50·sin(√(2.4·CM))`. Constructing `Stats` directly isolates the
235        // formula from the parsing pipeline and pins the scale the SEI
236        // formula expects: `perCM` is a percentage, not a ratio. With
237        // the pre-fix ratio scaling, this assertion would fail by ~50.
238        let stats = Stats {
239            halstead_length: 4.0,
240            halstead_vocabulary: 3.0,
241            halstead_volume: 4.0 * f64::log2(3.0),
242            cyclomatic: 1.0,
243            sloc: 10.0,
244            // 50% of lines are comments — drives the sin term hard.
245            comments_percentage: 50.0,
246        };
247        // Hand-derived: 171 − 5.2·log2(V) − 0.23·G − 16.2·log2(SLOC)
248        // + 50·sin(√(2.4·50)). The fifth term equals
249        // 50·sin(√120) ≈ 50·sin(10.954) ≈ −50·0.99989… ≈ −49.99…,
250        // which only lands in this neighborhood when CM is treated
251        // as a percentage; the ratio-scaled bug would put the term
252        // near +47 instead. Asserting a tight epsilon catches a
253        // reintroduction of the ratio-vs-percentage scaling bug.
254        let expected = 171.0
255            - 5.2 * stats.halstead_volume.log2()
256            - 0.23 * stats.cyclomatic
257            - 16.2 * stats.sloc.log2()
258            + 50.0 * (2.4_f64 * 50.0).sqrt().sin();
259        let actual = stats.sei();
260        assert!(
261            (actual - expected).abs() < 1e-9,
262            "sei = {actual}, expected {expected}",
263        );
264        // Sanity check against the pre-fix (ratio) behaviour: ensure
265        // the value is nowhere near the ratio-scaled answer.
266        let buggy = 171.0
267            - 5.2 * stats.halstead_volume.log2()
268            - 0.23 * stats.cyclomatic
269            - 16.2 * stats.sloc.log2()
270            + 50.0 * (2.4_f64 * 0.5).sqrt().sin();
271        // The ratio-vs-percentage flip moves the sin term by roughly
272        // its full ±50 amplitude; pin the bound at 50.0 so a partial
273        // regression (e.g. accidentally dividing by 10 instead of by 1)
274        // still fails this check instead of slipping under a generous
275        // threshold.
276        assert!(
277            (actual - buggy).abs() > 50.0,
278            "sei should differ from the ratio-scaled value by >50; got actual={actual}, buggy={buggy}",
279        );
280    }
281
282    #[test]
283    fn rust_mi_smoke() {
284        // Rust now derives MI from the populated Loc / Cyclomatic /
285        // Halstead trios via the default trait method. This test
286        // pins the per-function MI on a tiny straight-line function
287        // so accidental regressions in the cascade get caught.
288        check_metrics::<RustParser>("fn f() -> i32 { 1 }\n", "foo.rs", |metric| {
289            let mi = &metric.mi;
290            // expected: SLOC = 1, cyclomatic = 1 (no branches), and
291            // Halstead n1 = 4 (`fn`, `->`, `{`, `}` operators visible
292            // at unit level), n2 = 2 (`f` identifier, `1` literal).
293            // The default `Mi::compute` then folds those into the
294            // three MI variants — these numbers are produced by the
295            // populated Rust trios. Pinning them anchors the snapshot
296            // against accidental drift in the cascade.
297            assert!(mi.original() > 0.0);
298            assert!(mi.sei() > 0.0);
299            assert!(mi.visual_studio() > 0.0);
300        });
301    }
302
303    #[test]
304    fn go_mi_smoke() {
305        // Go uses the default `Mi::compute`; once Loc / Cyclomatic /
306        // Halstead are populated (they are for Go), MI is derived
307        // automatically. Pin the cascade against drift.
308        check_metrics::<GoParser>(
309            "package main\nfunc f() int { return 1 }\n",
310            "foo.go",
311            |metric| {
312                let mi = &metric.mi;
313                assert!(mi.original() > 0.0);
314                assert!(mi.sei() > 0.0);
315                assert!(mi.visual_studio() > 0.0);
316            },
317        );
318    }
319
320    #[test]
321    fn elixir_mi_smoke() {
322        // Elixir uses the default `Mi::compute`; with Loc / Cyclomatic
323        // / Halstead populated (and now Cognitive / Abc as well), MI
324        // derives automatically. Pin the cascade against drift.
325        check_metrics::<ElixirParser>(
326            "defmodule Foo do\n  def f(x), do: x + 1\nend\n",
327            "foo.ex",
328            |metric| {
329                let mi = &metric.mi;
330                assert!(mi.original() > 0.0);
331                assert!(mi.sei() > 0.0);
332                assert!(mi.visual_studio() > 0.0);
333            },
334        );
335    }
336
337    #[test]
338    fn cpp_mi_smoke() {
339        // C++ uses the default `Mi::compute`; Loc / Cyclomatic /
340        // Halstead all already populated for C++, and Abc / Npa / Npm
341        // / Wmc now contribute too. MI derives from Loc + Cyclomatic
342        // + Halstead via the default. Pin the cascade against drift.
343        check_metrics::<CppParser>(
344            "int f(int x) { if (x > 0) return 1; return 0; }",
345            "foo.cpp",
346            |metric| {
347                let mi = &metric.mi;
348                assert!(mi.original() > 0.0);
349                assert!(mi.sei() > 0.0);
350                assert!(mi.visual_studio() > 0.0);
351            },
352        );
353    }
354
355    #[test]
356    fn javascript_mi_smoke() {
357        // JavaScript uses the default `Mi::compute`; Loc / Cyclomatic
358        // / Halstead were already populated, and Abc / Npa / Npm /
359        // Wmc now contribute too. Pin the cascade against drift.
360        check_metrics::<JavascriptParser>(
361            "function f(x) { if (x > 0) return 1; return 0; }",
362            "foo.js",
363            |metric| {
364                let mi = &metric.mi;
365                assert!(mi.original() > 0.0);
366                assert!(mi.sei() > 0.0);
367                assert!(mi.visual_studio() > 0.0);
368            },
369        );
370    }
371
372    #[test]
373    fn mozjs_mi_smoke() {
374        // Mozjs shares JavaScript's MI cascade; this is a parity pin.
375        check_metrics::<MozjsParser>(
376            "function f(x) { if (x > 0) return 1; return 0; }",
377            "foo.js",
378            |metric| {
379                let mi = &metric.mi;
380                assert!(mi.original() > 0.0);
381                assert!(mi.sei() > 0.0);
382                assert!(mi.visual_studio() > 0.0);
383            },
384        );
385    }
386
387    /// `comments_percentage` feeds the unclamped SEI term
388    /// `50·sin(√(2.4·CM))`. A degenerate `Loc` with `cloc > sloc`
389    /// (which the loc.rs fix prevents for parsed input, but which the
390    /// clamp defends regardless) must yield a `comments_percentage`
391    /// capped at exactly 100, not the ~209 the raw ratio would give
392    /// (issue #461). Here `cloc = 2`, `sloc = 1` => raw 200%. Reverting
393    /// the `.clamp(0.0, 100.0)` in `Mi::compute` makes this fail.
394    #[test]
395    fn mi_comments_percentage_clamped() {
396        // cloc = 2 (degenerate), sloc = 1 (single non-unit row) => raw
397        // comments_percentage = 200%.
398        let loc = loc::Stats::with_cloc_sloc(2, 0);
399        assert_eq!(loc.cloc(), 2);
400        assert_eq!(loc.sloc(), 1);
401
402        let cyclomatic = cyclomatic::Stats::default();
403        let halstead = halstead::Stats::default();
404        let mut mi = Stats::default();
405        PythonCode::compute(&loc, &cyclomatic, &halstead, &mut mi);
406
407        assert!(
408            (mi.comments_percentage - 100.0).abs() < f64::EPSILON,
409            "comments_percentage must clamp to 100, got {}",
410            mi.comments_percentage
411        );
412    }
413}
big_code_analysis/metrics/mi.rs

big_code_analysis/metrics/
mi.rs