Skip to main content

big_code_analysis/metrics/
mi.rs

1// Per-language metric and AST modules deliberately consume the macro-
2// generated tree-sitter token enums via `use crate::*` and `use Foo::*`
3// inside match expressions — explicit imports would list dozens of
4// variants per arm and obscure the per-language token sets that are the
5// point of these files. Allowed at the module level rather than per
6// function so the per-language impl blocks stay readable.
7#![allow(clippy::enum_glob_use, clippy::unused_self, clippy::wildcard_imports)]
8
9use serde::Serialize;
10use serde::ser::{SerializeStruct, Serializer};
11use std::fmt;
12
13use super::cyclomatic;
14use super::halstead;
15use super::loc;
16
17use crate::checker::Checker;
18use crate::macros::implement_metric_trait;
19
20use crate::*;
21
22/// The `Mi` metric.
23#[derive(Default, Clone, Debug)]
24pub struct Stats {
25    halstead_length: f64,
26    halstead_vocabulary: f64,
27    halstead_volume: f64,
28    cyclomatic: f64,
29    sloc: f64,
30    /// Comment lines as a percentage in [0, 100] (not a ratio in [0, 1]).
31    /// Only `mi_sei` consumes this — the SEI MI formula uses `perCM` on
32    /// the percentage scale; see issue #241.
33    comments_percentage: f64,
34}
35
36impl Serialize for Stats {
37    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
38    where
39        S: Serializer,
40    {
41        let mut st = serializer.serialize_struct("maintainability_index", 3)?;
42        st.serialize_field("mi_original", &self.mi_original())?;
43        st.serialize_field("mi_sei", &self.mi_sei())?;
44        st.serialize_field("mi_visual_studio", &self.mi_visual_studio())?;
45        st.end()
46    }
47}
48
49impl fmt::Display for Stats {
50    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
51        write!(
52            f,
53            "mi_original: {}, mi_sei: {}, mi_visual_studio: {}",
54            self.mi_original(),
55            self.mi_sei(),
56            self.mi_visual_studio()
57        )
58    }
59}
60
61impl Stats {
62    pub(crate) fn merge(&mut self, _other: &Stats) {}
63
64    #[inline]
65    fn inputs_are_empty(&self) -> bool {
66        self.halstead_volume <= 0.0 || self.sloc <= 0.0
67    }
68
69    /// Returns the `Mi` metric calculated using the original formula.
70    ///
71    /// Its value can be negative.
72    #[inline]
73    #[must_use]
74    pub fn mi_original(&self) -> f64 {
75        if self.inputs_are_empty() {
76            return 0.0;
77        }
78        // http://www.projectcodemeter.com/cost_estimation/help/GL_maintainability.htm
79        171.0 - 5.2 * (self.halstead_volume).ln() - 0.23 * self.cyclomatic - 16.2 * self.sloc.ln()
80    }
81
82    /// Returns the `Mi` metric calculated using the derivative formula
83    /// employed by the Software Engineering Insitute (SEI).
84    ///
85    /// Its value can be negative.
86    #[inline]
87    #[must_use]
88    pub fn mi_sei(&self) -> f64 {
89        if self.inputs_are_empty() {
90            return 0.0;
91        }
92        // http://www.projectcodemeter.com/cost_estimation/help/GL_maintainability.htm
93        171.0 - 5.2 * self.halstead_volume.log2() - 0.23 * self.cyclomatic - 16.2 * self.sloc.log2()
94            + 50.0 * (self.comments_percentage * 2.4).sqrt().sin()
95    }
96
97    /// Returns the `Mi` metric calculated using the derivative formula
98    /// employed by Microsoft Visual Studio.
99    #[inline]
100    #[must_use]
101    pub fn mi_visual_studio(&self) -> f64 {
102        if self.inputs_are_empty() {
103            return 0.0;
104        }
105        // http://www.projectcodemeter.com/cost_estimation/help/GL_maintainability.htm
106        let formula = 171.0
107            - 5.2 * self.halstead_volume.ln()
108            - 0.23 * self.cyclomatic
109            - 16.2 * self.sloc.ln();
110        (formula * 100.0 / 171.0).max(0.)
111    }
112}
113
114#[doc(hidden)]
115/// Per-language computation of the maintainability index.
116pub trait Mi
117where
118    Self: Checker,
119{
120    /// Walk `node` and update `stats` with this metric for the language
121    /// implementing the trait.
122    fn compute(
123        loc: &loc::Stats,
124        cyclomatic: &cyclomatic::Stats,
125        halstead: &halstead::Stats,
126        stats: &mut Stats,
127    ) {
128        stats.halstead_length = halstead.length();
129        stats.halstead_vocabulary = halstead.vocabulary();
130        stats.halstead_volume = halstead.volume();
131        stats.cyclomatic = cyclomatic.cyclomatic_sum();
132        stats.sloc = loc.sloc();
133        // The SEI Maintainability Index expects `perCM` as a percentage
134        // in [0, 100], not a ratio in [0, 1] — `50·sin(√(2.4·CM))` is
135        // nonsensical when CM is two orders of magnitude too small. See
136        // issue #241 and Welker/Oman's original MI definition.
137        stats.comments_percentage = if stats.sloc == 0.0 {
138            0.0
139        } else {
140            loc.cloc() / stats.sloc * 100.0
141        };
142    }
143}
144
145// `Mi` uses the bracketed `[Trait]` arm: this expands to a bare
146// `impl Mi for X {}` which inherits `Mi::compute`'s default trait
147// method body. The default method is fully language-neutral — it
148// combines already-computed Halstead / Cyclomatic / Loc stats into
149// the three MI variants — so this list is NOT a no-op like the named-
150// arm matrices for Abc / Npa / Npm / Wmc. Audited in #188.
151implement_metric_trait!(
152    [Mi],
153    PythonCode,
154    MozjsCode,
155    JavascriptCode,
156    TypescriptCode,
157    TsxCode,
158    RustCode,
159    CppCode,
160    PreprocCode,
161    CcommentCode,
162    JavaCode,
163    KotlinCode,
164    GoCode,
165    PerlCode,
166    BashCode,
167    LuaCode,
168    TclCode,
169    PhpCode,
170    CsharpCode,
171    ElixirCode,
172    RubyCode,
173    GroovyCode
174);
175
176#[cfg(test)]
177#[allow(
178    clippy::float_cmp,
179    clippy::cast_precision_loss,
180    clippy::cast_possible_truncation,
181    clippy::cast_sign_loss,
182    clippy::similar_names,
183    clippy::doc_markdown,
184    clippy::needless_raw_string_hashes,
185    clippy::too_many_lines
186)]
187mod tests {
188    use crate::tools::check_metrics;
189
190    use super::*;
191
192    #[test]
193    fn mi_empty_file() {
194        check_metrics::<PythonParser>("", "empty.py", |metric| {
195            let mi = &metric.mi;
196            assert_eq!(mi.mi_original(), 0.0);
197            assert_eq!(mi.mi_sei(), 0.0);
198            assert_eq!(mi.mi_visual_studio(), 0.0);
199        });
200    }
201
202    #[test]
203    fn check_mi_metrics() {
204        // This test checks that MI metric is computed correctly, so it verifies
205        // the calculations are correct, the adopted source code is irrelevant
206        check_metrics::<PythonParser>(
207            "def f():
208                 pass",
209            "foo.py",
210            |metric| {
211                insta::assert_json_snapshot!(
212                    metric.mi,
213                    @r###"
214                    {
215                      "mi_original": 151.2033158832232,
216                      "mi_sei": 142.64306171748976,
217                      "mi_visual_studio": 88.42299174457497
218                    }"###
219                );
220            },
221        );
222    }
223
224    #[test]
225    fn mi_sei_uses_comments_as_percentage() {
226        // Regression test for #241. `Stats::comments_percentage` is stored
227        // as a percentage in [0, 100], so `mi_sei` plugs it directly into
228        // `50·sin(√(2.4·CM))`. Constructing `Stats` directly isolates the
229        // formula from the parsing pipeline and pins the scale the SEI
230        // formula expects: `perCM` is a percentage, not a ratio. With
231        // the pre-fix ratio scaling, this assertion would fail by ~50.
232        let stats = Stats {
233            halstead_length: 4.0,
234            halstead_vocabulary: 3.0,
235            halstead_volume: 4.0 * f64::log2(3.0),
236            cyclomatic: 1.0,
237            sloc: 10.0,
238            // 50% of lines are comments — drives the sin term hard.
239            comments_percentage: 50.0,
240        };
241        // Hand-derived: 171 − 5.2·log2(V) − 0.23·G − 16.2·log2(SLOC)
242        // + 50·sin(√(2.4·50)). The fifth term equals
243        // 50·sin(√120) ≈ 50·sin(10.954) ≈ −50·0.99989… ≈ −49.99…,
244        // which only lands in this neighborhood when CM is treated
245        // as a percentage; the ratio-scaled bug would put the term
246        // near +47 instead. Asserting a tight epsilon catches a
247        // reintroduction of the ratio-vs-percentage scaling bug.
248        let expected = 171.0
249            - 5.2 * stats.halstead_volume.log2()
250            - 0.23 * stats.cyclomatic
251            - 16.2 * stats.sloc.log2()
252            + 50.0 * (2.4_f64 * 50.0).sqrt().sin();
253        let actual = stats.mi_sei();
254        assert!(
255            (actual - expected).abs() < 1e-9,
256            "mi_sei = {actual}, expected {expected}",
257        );
258        // Sanity check against the pre-fix (ratio) behaviour: ensure
259        // the value is nowhere near the ratio-scaled answer.
260        let buggy = 171.0
261            - 5.2 * stats.halstead_volume.log2()
262            - 0.23 * stats.cyclomatic
263            - 16.2 * stats.sloc.log2()
264            + 50.0 * (2.4_f64 * 0.5).sqrt().sin();
265        // The ratio-vs-percentage flip moves the sin term by roughly
266        // its full ±50 amplitude; pin the bound at 50.0 so a partial
267        // regression (e.g. accidentally dividing by 10 instead of by 1)
268        // still fails this check instead of slipping under a generous
269        // threshold.
270        assert!(
271            (actual - buggy).abs() > 50.0,
272            "mi_sei should differ from the ratio-scaled value by >50; got actual={actual}, buggy={buggy}",
273        );
274    }
275
276    #[test]
277    fn rust_mi_smoke() {
278        // Rust now derives MI from the populated Loc / Cyclomatic /
279        // Halstead trios via the default trait method. This test
280        // pins the per-function MI on a tiny straight-line function
281        // so accidental regressions in the cascade get caught.
282        check_metrics::<RustParser>("fn f() -> i32 { 1 }\n", "foo.rs", |metric| {
283            let mi = &metric.mi;
284            // expected: SLOC = 1, cyclomatic = 1 (no branches), and
285            // Halstead n1 = 4 (`fn`, `->`, `{`, `}` operators visible
286            // at unit level), n2 = 2 (`f` identifier, `1` literal).
287            // The default `Mi::compute` then folds those into the
288            // three MI variants — these numbers are produced by the
289            // populated Rust trios. Pinning them anchors the snapshot
290            // against accidental drift in the cascade.
291            assert!(mi.mi_original() > 0.0);
292            assert!(mi.mi_sei() > 0.0);
293            assert!(mi.mi_visual_studio() > 0.0);
294        });
295    }
296
297    #[test]
298    fn go_mi_smoke() {
299        // Go uses the default `Mi::compute`; once Loc / Cyclomatic /
300        // Halstead are populated (they are for Go), MI is derived
301        // automatically. Pin the cascade against drift.
302        check_metrics::<GoParser>(
303            "package main\nfunc f() int { return 1 }\n",
304            "foo.go",
305            |metric| {
306                let mi = &metric.mi;
307                assert!(mi.mi_original() > 0.0);
308                assert!(mi.mi_sei() > 0.0);
309                assert!(mi.mi_visual_studio() > 0.0);
310            },
311        );
312    }
313
314    #[test]
315    fn elixir_mi_smoke() {
316        // Elixir uses the default `Mi::compute`; with Loc / Cyclomatic
317        // / Halstead populated (and now Cognitive / Abc as well), MI
318        // derives automatically. Pin the cascade against drift.
319        check_metrics::<ElixirParser>(
320            "defmodule Foo do\n  def f(x), do: x + 1\nend\n",
321            "foo.ex",
322            |metric| {
323                let mi = &metric.mi;
324                assert!(mi.mi_original() > 0.0);
325                assert!(mi.mi_sei() > 0.0);
326                assert!(mi.mi_visual_studio() > 0.0);
327            },
328        );
329    }
330
331    #[test]
332    fn cpp_mi_smoke() {
333        // C++ uses the default `Mi::compute`; Loc / Cyclomatic /
334        // Halstead all already populated for C++, and Abc / Npa / Npm
335        // / Wmc now contribute too. MI derives from Loc + Cyclomatic
336        // + Halstead via the default. Pin the cascade against drift.
337        check_metrics::<CppParser>(
338            "int f(int x) { if (x > 0) return 1; return 0; }",
339            "foo.cpp",
340            |metric| {
341                let mi = &metric.mi;
342                assert!(mi.mi_original() > 0.0);
343                assert!(mi.mi_sei() > 0.0);
344                assert!(mi.mi_visual_studio() > 0.0);
345            },
346        );
347    }
348
349    #[test]
350    fn javascript_mi_smoke() {
351        // JavaScript uses the default `Mi::compute`; Loc / Cyclomatic
352        // / Halstead were already populated, and Abc / Npa / Npm /
353        // Wmc now contribute too. Pin the cascade against drift.
354        check_metrics::<JavascriptParser>(
355            "function f(x) { if (x > 0) return 1; return 0; }",
356            "foo.js",
357            |metric| {
358                let mi = &metric.mi;
359                assert!(mi.mi_original() > 0.0);
360                assert!(mi.mi_sei() > 0.0);
361                assert!(mi.mi_visual_studio() > 0.0);
362            },
363        );
364    }
365
366    #[test]
367    fn mozjs_mi_smoke() {
368        // Mozjs shares JavaScript's MI cascade; this is a parity pin.
369        check_metrics::<MozjsParser>(
370            "function f(x) { if (x > 0) return 1; return 0; }",
371            "foo.js",
372            |metric| {
373                let mi = &metric.mi;
374                assert!(mi.mi_original() > 0.0);
375                assert!(mi.mi_sei() > 0.0);
376                assert!(mi.mi_visual_studio() > 0.0);
377            },
378        );
379    }
380}