big_code_analysis/metrics/mi.rs
1// Per-language metric and AST modules deliberately consume the macro-
2// generated tree-sitter token enums via `use crate::*` and `use Foo::*`
3// inside match expressions — explicit imports would list dozens of
4// variants per arm and obscure the per-language token sets that are the
5// point of these files. Allowed at the module level rather than per
6// function so the per-language impl blocks stay readable.
7#![allow(clippy::enum_glob_use, clippy::unused_self, clippy::wildcard_imports)]
8
9use serde::Serialize;
10use serde::ser::{SerializeStruct, Serializer};
11use std::fmt;
12
13use super::cyclomatic;
14use super::halstead;
15use super::loc;
16
17use crate::checker::Checker;
18use crate::macros::implement_metric_trait;
19
20use crate::*;
21
22/// The `Mi` metric.
23#[derive(Default, Clone, Debug)]
24pub struct Stats {
25 halstead_length: f64,
26 halstead_vocabulary: f64,
27 halstead_volume: f64,
28 cyclomatic: f64,
29 sloc: f64,
30 /// Comment lines as a percentage in [0, 100] (not a ratio in [0, 1]).
31 /// Only `mi_sei` consumes this — the SEI MI formula uses `perCM` on
32 /// the percentage scale; see issue #241.
33 comments_percentage: f64,
34}
35
36impl Serialize for Stats {
37 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
38 where
39 S: Serializer,
40 {
41 let mut st = serializer.serialize_struct("maintainability_index", 3)?;
42 st.serialize_field("mi_original", &self.mi_original())?;
43 st.serialize_field("mi_sei", &self.mi_sei())?;
44 st.serialize_field("mi_visual_studio", &self.mi_visual_studio())?;
45 st.end()
46 }
47}
48
49impl fmt::Display for Stats {
50 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
51 write!(
52 f,
53 "mi_original: {}, mi_sei: {}, mi_visual_studio: {}",
54 self.mi_original(),
55 self.mi_sei(),
56 self.mi_visual_studio()
57 )
58 }
59}
60
61impl Stats {
62 pub(crate) fn merge(&mut self, _other: &Stats) {}
63
64 #[inline]
65 fn inputs_are_empty(&self) -> bool {
66 self.halstead_volume <= 0.0 || self.sloc <= 0.0
67 }
68
69 /// Returns the `Mi` metric calculated using the original formula.
70 ///
71 /// Its value can be negative.
72 #[inline]
73 #[must_use]
74 pub fn mi_original(&self) -> f64 {
75 if self.inputs_are_empty() {
76 return 0.0;
77 }
78 // http://www.projectcodemeter.com/cost_estimation/help/GL_maintainability.htm
79 171.0 - 5.2 * (self.halstead_volume).ln() - 0.23 * self.cyclomatic - 16.2 * self.sloc.ln()
80 }
81
82 /// Returns the `Mi` metric calculated using the derivative formula
83 /// employed by the Software Engineering Insitute (SEI).
84 ///
85 /// Its value can be negative.
86 #[inline]
87 #[must_use]
88 pub fn mi_sei(&self) -> f64 {
89 if self.inputs_are_empty() {
90 return 0.0;
91 }
92 // http://www.projectcodemeter.com/cost_estimation/help/GL_maintainability.htm
93 171.0 - 5.2 * self.halstead_volume.log2() - 0.23 * self.cyclomatic - 16.2 * self.sloc.log2()
94 + 50.0 * (self.comments_percentage * 2.4).sqrt().sin()
95 }
96
97 /// Returns the `Mi` metric calculated using the derivative formula
98 /// employed by Microsoft Visual Studio.
99 #[inline]
100 #[must_use]
101 pub fn mi_visual_studio(&self) -> f64 {
102 if self.inputs_are_empty() {
103 return 0.0;
104 }
105 // http://www.projectcodemeter.com/cost_estimation/help/GL_maintainability.htm
106 let formula = 171.0
107 - 5.2 * self.halstead_volume.ln()
108 - 0.23 * self.cyclomatic
109 - 16.2 * self.sloc.ln();
110 (formula * 100.0 / 171.0).max(0.)
111 }
112}
113
114#[doc(hidden)]
115/// Per-language computation of the maintainability index.
116pub trait Mi
117where
118 Self: Checker,
119{
120 /// Walk `node` and update `stats` with this metric for the language
121 /// implementing the trait.
122 fn compute(
123 loc: &loc::Stats,
124 cyclomatic: &cyclomatic::Stats,
125 halstead: &halstead::Stats,
126 stats: &mut Stats,
127 ) {
128 stats.halstead_length = halstead.length();
129 stats.halstead_vocabulary = halstead.vocabulary();
130 stats.halstead_volume = halstead.volume();
131 stats.cyclomatic = cyclomatic.cyclomatic_sum();
132 stats.sloc = loc.sloc();
133 // The SEI Maintainability Index expects `perCM` as a percentage
134 // in [0, 100], not a ratio in [0, 1] — `50·sin(√(2.4·CM))` is
135 // nonsensical when CM is two orders of magnitude too small. See
136 // issue #241 and Welker/Oman's original MI definition.
137 stats.comments_percentage = if stats.sloc == 0.0 {
138 0.0
139 } else {
140 loc.cloc() / stats.sloc * 100.0
141 };
142 }
143}
144
145// `Mi` uses the bracketed `[Trait]` arm: this expands to a bare
146// `impl Mi for X {}` which inherits `Mi::compute`'s default trait
147// method body. The default method is fully language-neutral — it
148// combines already-computed Halstead / Cyclomatic / Loc stats into
149// the three MI variants — so this list is NOT a no-op like the named-
150// arm matrices for Abc / Npa / Npm / Wmc. Audited in #188.
151implement_metric_trait!(
152 [Mi],
153 PythonCode,
154 MozjsCode,
155 JavascriptCode,
156 TypescriptCode,
157 TsxCode,
158 RustCode,
159 CppCode,
160 PreprocCode,
161 CcommentCode,
162 JavaCode,
163 KotlinCode,
164 GoCode,
165 PerlCode,
166 BashCode,
167 LuaCode,
168 TclCode,
169 PhpCode,
170 CsharpCode,
171 ElixirCode,
172 RubyCode,
173 GroovyCode
174);
175
176#[cfg(test)]
177#[allow(
178 clippy::float_cmp,
179 clippy::cast_precision_loss,
180 clippy::cast_possible_truncation,
181 clippy::cast_sign_loss,
182 clippy::similar_names,
183 clippy::doc_markdown,
184 clippy::needless_raw_string_hashes,
185 clippy::too_many_lines
186)]
187mod tests {
188 use crate::tools::check_metrics;
189
190 use super::*;
191
192 #[test]
193 fn mi_empty_file() {
194 check_metrics::<PythonParser>("", "empty.py", |metric| {
195 let mi = &metric.mi;
196 assert_eq!(mi.mi_original(), 0.0);
197 assert_eq!(mi.mi_sei(), 0.0);
198 assert_eq!(mi.mi_visual_studio(), 0.0);
199 });
200 }
201
202 #[test]
203 fn check_mi_metrics() {
204 // This test checks that MI metric is computed correctly, so it verifies
205 // the calculations are correct, the adopted source code is irrelevant
206 check_metrics::<PythonParser>(
207 "def f():
208 pass",
209 "foo.py",
210 |metric| {
211 insta::assert_json_snapshot!(
212 metric.mi,
213 @r###"
214 {
215 "mi_original": 151.2033158832232,
216 "mi_sei": 142.64306171748976,
217 "mi_visual_studio": 88.42299174457497
218 }"###
219 );
220 },
221 );
222 }
223
224 #[test]
225 fn mi_sei_uses_comments_as_percentage() {
226 // Regression test for #241. `Stats::comments_percentage` is stored
227 // as a percentage in [0, 100], so `mi_sei` plugs it directly into
228 // `50·sin(√(2.4·CM))`. Constructing `Stats` directly isolates the
229 // formula from the parsing pipeline and pins the scale the SEI
230 // formula expects: `perCM` is a percentage, not a ratio. With
231 // the pre-fix ratio scaling, this assertion would fail by ~50.
232 let stats = Stats {
233 halstead_length: 4.0,
234 halstead_vocabulary: 3.0,
235 halstead_volume: 4.0 * f64::log2(3.0),
236 cyclomatic: 1.0,
237 sloc: 10.0,
238 // 50% of lines are comments — drives the sin term hard.
239 comments_percentage: 50.0,
240 };
241 // Hand-derived: 171 − 5.2·log2(V) − 0.23·G − 16.2·log2(SLOC)
242 // + 50·sin(√(2.4·50)). The fifth term equals
243 // 50·sin(√120) ≈ 50·sin(10.954) ≈ −50·0.99989… ≈ −49.99…,
244 // which only lands in this neighborhood when CM is treated
245 // as a percentage; the ratio-scaled bug would put the term
246 // near +47 instead. Asserting a tight epsilon catches a
247 // reintroduction of the ratio-vs-percentage scaling bug.
248 let expected = 171.0
249 - 5.2 * stats.halstead_volume.log2()
250 - 0.23 * stats.cyclomatic
251 - 16.2 * stats.sloc.log2()
252 + 50.0 * (2.4_f64 * 50.0).sqrt().sin();
253 let actual = stats.mi_sei();
254 assert!(
255 (actual - expected).abs() < 1e-9,
256 "mi_sei = {actual}, expected {expected}",
257 );
258 // Sanity check against the pre-fix (ratio) behaviour: ensure
259 // the value is nowhere near the ratio-scaled answer.
260 let buggy = 171.0
261 - 5.2 * stats.halstead_volume.log2()
262 - 0.23 * stats.cyclomatic
263 - 16.2 * stats.sloc.log2()
264 + 50.0 * (2.4_f64 * 0.5).sqrt().sin();
265 // The ratio-vs-percentage flip moves the sin term by roughly
266 // its full ±50 amplitude; pin the bound at 50.0 so a partial
267 // regression (e.g. accidentally dividing by 10 instead of by 1)
268 // still fails this check instead of slipping under a generous
269 // threshold.
270 assert!(
271 (actual - buggy).abs() > 50.0,
272 "mi_sei should differ from the ratio-scaled value by >50; got actual={actual}, buggy={buggy}",
273 );
274 }
275
276 #[test]
277 fn rust_mi_smoke() {
278 // Rust now derives MI from the populated Loc / Cyclomatic /
279 // Halstead trios via the default trait method. This test
280 // pins the per-function MI on a tiny straight-line function
281 // so accidental regressions in the cascade get caught.
282 check_metrics::<RustParser>("fn f() -> i32 { 1 }\n", "foo.rs", |metric| {
283 let mi = &metric.mi;
284 // expected: SLOC = 1, cyclomatic = 1 (no branches), and
285 // Halstead n1 = 4 (`fn`, `->`, `{`, `}` operators visible
286 // at unit level), n2 = 2 (`f` identifier, `1` literal).
287 // The default `Mi::compute` then folds those into the
288 // three MI variants — these numbers are produced by the
289 // populated Rust trios. Pinning them anchors the snapshot
290 // against accidental drift in the cascade.
291 assert!(mi.mi_original() > 0.0);
292 assert!(mi.mi_sei() > 0.0);
293 assert!(mi.mi_visual_studio() > 0.0);
294 });
295 }
296
297 #[test]
298 fn go_mi_smoke() {
299 // Go uses the default `Mi::compute`; once Loc / Cyclomatic /
300 // Halstead are populated (they are for Go), MI is derived
301 // automatically. Pin the cascade against drift.
302 check_metrics::<GoParser>(
303 "package main\nfunc f() int { return 1 }\n",
304 "foo.go",
305 |metric| {
306 let mi = &metric.mi;
307 assert!(mi.mi_original() > 0.0);
308 assert!(mi.mi_sei() > 0.0);
309 assert!(mi.mi_visual_studio() > 0.0);
310 },
311 );
312 }
313
314 #[test]
315 fn elixir_mi_smoke() {
316 // Elixir uses the default `Mi::compute`; with Loc / Cyclomatic
317 // / Halstead populated (and now Cognitive / Abc as well), MI
318 // derives automatically. Pin the cascade against drift.
319 check_metrics::<ElixirParser>(
320 "defmodule Foo do\n def f(x), do: x + 1\nend\n",
321 "foo.ex",
322 |metric| {
323 let mi = &metric.mi;
324 assert!(mi.mi_original() > 0.0);
325 assert!(mi.mi_sei() > 0.0);
326 assert!(mi.mi_visual_studio() > 0.0);
327 },
328 );
329 }
330
331 #[test]
332 fn cpp_mi_smoke() {
333 // C++ uses the default `Mi::compute`; Loc / Cyclomatic /
334 // Halstead all already populated for C++, and Abc / Npa / Npm
335 // / Wmc now contribute too. MI derives from Loc + Cyclomatic
336 // + Halstead via the default. Pin the cascade against drift.
337 check_metrics::<CppParser>(
338 "int f(int x) { if (x > 0) return 1; return 0; }",
339 "foo.cpp",
340 |metric| {
341 let mi = &metric.mi;
342 assert!(mi.mi_original() > 0.0);
343 assert!(mi.mi_sei() > 0.0);
344 assert!(mi.mi_visual_studio() > 0.0);
345 },
346 );
347 }
348
349 #[test]
350 fn javascript_mi_smoke() {
351 // JavaScript uses the default `Mi::compute`; Loc / Cyclomatic
352 // / Halstead were already populated, and Abc / Npa / Npm /
353 // Wmc now contribute too. Pin the cascade against drift.
354 check_metrics::<JavascriptParser>(
355 "function f(x) { if (x > 0) return 1; return 0; }",
356 "foo.js",
357 |metric| {
358 let mi = &metric.mi;
359 assert!(mi.mi_original() > 0.0);
360 assert!(mi.mi_sei() > 0.0);
361 assert!(mi.mi_visual_studio() > 0.0);
362 },
363 );
364 }
365
366 #[test]
367 fn mozjs_mi_smoke() {
368 // Mozjs shares JavaScript's MI cascade; this is a parity pin.
369 check_metrics::<MozjsParser>(
370 "function f(x) { if (x > 0) return 1; return 0; }",
371 "foo.js",
372 |metric| {
373 let mi = &metric.mi;
374 assert!(mi.mi_original() > 0.0);
375 assert!(mi.mi_sei() > 0.0);
376 assert!(mi.mi_visual_studio() > 0.0);
377 },
378 );
379 }
380}