rust_code_analysis/metrics/
halstead.rs

1use fxhash::FxHashMap;
2use serde::ser::{SerializeStruct, Serializer};
3use serde::Serialize;
4use std::fmt;
5
6use crate::checker::Checker;
7use crate::getter::Getter;
8
9use crate::*;
10
11/// The `Halstead` metric suite.
12#[derive(Default, Clone, Debug)]
13pub struct Stats {
14    u_operators: u64,
15    operators: u64,
16    u_operands: u64,
17    operands: u64,
18}
19
20/// Specifies the type of nodes accepted by the `Halstead` metric.
21pub enum HalsteadType {
22    /// The node is an `Halstead` operator
23    Operator,
24    /// The node is an `Halstead` operand
25    Operand,
26    /// The node is unknown to the `Halstead` metric
27    Unknown,
28}
29
30#[doc(hidden)]
31#[derive(Debug, Default, Clone)]
32pub struct HalsteadMaps<'a> {
33    pub(crate) operators: FxHashMap<u16, u64>,
34    pub(crate) operands: FxHashMap<&'a [u8], u64>,
35}
36
37impl<'a> HalsteadMaps<'a> {
38    pub(crate) fn new() -> Self {
39        HalsteadMaps {
40            operators: FxHashMap::default(),
41            operands: FxHashMap::default(),
42        }
43    }
44
45    pub(crate) fn merge(&mut self, other: &HalsteadMaps<'a>) {
46        for (k, v) in other.operators.iter() {
47            *self.operators.entry(*k).or_insert(0) += v;
48        }
49        for (k, v) in other.operands.iter() {
50            *self.operands.entry(*k).or_insert(0) += v;
51        }
52    }
53
54    pub(crate) fn finalize(&self, stats: &mut Stats) {
55        stats.u_operators = self.operators.len() as u64;
56        stats.operators = self.operators.values().sum::<u64>();
57        stats.u_operands = self.operands.len() as u64;
58        stats.operands = self.operands.values().sum::<u64>();
59    }
60}
61
62impl Serialize for Stats {
63    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
64    where
65        S: Serializer,
66    {
67        let mut st = serializer.serialize_struct("halstead", 14)?;
68        st.serialize_field("n1", &self.u_operators())?;
69        st.serialize_field("N1", &self.operators())?;
70        st.serialize_field("n2", &self.u_operands())?;
71        st.serialize_field("N2", &self.operands())?;
72        st.serialize_field("length", &self.length())?;
73        st.serialize_field("estimated_program_length", &self.estimated_program_length())?;
74        st.serialize_field("purity_ratio", &self.purity_ratio())?;
75        st.serialize_field("vocabulary", &self.vocabulary())?;
76        st.serialize_field("volume", &self.volume())?;
77        st.serialize_field("difficulty", &self.difficulty())?;
78        st.serialize_field("level", &self.level())?;
79        st.serialize_field("effort", &self.effort())?;
80        st.serialize_field("time", &self.time())?;
81        st.serialize_field("bugs", &self.bugs())?;
82        st.end()
83    }
84}
85
86impl fmt::Display for Stats {
87    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
88        write!(
89            f,
90            "n1: {}, \
91             N1: {}, \
92             n2: {}, \
93             N2: {}, \
94             length: {}, \
95             estimated program length: {}, \
96             purity ratio: {}, \
97             size: {}, \
98             volume: {}, \
99             difficulty: {}, \
100             level: {}, \
101             effort: {}, \
102             time: {}, \
103             bugs: {}",
104            self.u_operators(),
105            self.operators(),
106            self.u_operands(),
107            self.operands(),
108            self.length(),
109            self.estimated_program_length(),
110            self.purity_ratio(),
111            self.vocabulary(),
112            self.volume(),
113            self.difficulty(),
114            self.level(),
115            self.effort(),
116            self.time(),
117            self.bugs(),
118        )
119    }
120}
121
122impl Stats {
123    pub(crate) fn merge(&mut self, _other: &Stats) {}
124
125    /// Returns `η1`, the number of distinct operators
126    #[inline(always)]
127    pub fn u_operators(&self) -> f64 {
128        self.u_operators as f64
129    }
130
131    /// Returns `N1`, the number of total operators
132    #[inline(always)]
133    pub fn operators(&self) -> f64 {
134        self.operators as f64
135    }
136
137    /// Returns `η2`, the number of distinct operands
138    #[inline(always)]
139    pub fn u_operands(&self) -> f64 {
140        self.u_operands as f64
141    }
142
143    /// Returns `N2`, the number of total operands
144    #[inline(always)]
145    pub fn operands(&self) -> f64 {
146        self.operands as f64
147    }
148
149    /// Returns the program length
150    #[inline(always)]
151    pub fn length(&self) -> f64 {
152        self.operands() + self.operators()
153    }
154
155    /// Returns the calculated estimated program length
156    #[inline(always)]
157    pub fn estimated_program_length(&self) -> f64 {
158        self.u_operators() * self.u_operators().log2()
159            + self.u_operands() * self.u_operands().log2()
160    }
161
162    /// Returns the purity ratio
163    #[inline(always)]
164    pub fn purity_ratio(&self) -> f64 {
165        self.estimated_program_length() / self.length()
166    }
167
168    /// Returns the program vocabulary
169    #[inline(always)]
170    pub fn vocabulary(&self) -> f64 {
171        self.u_operands() + self.u_operators()
172    }
173
174    /// Returns the program volume.
175    ///
176    /// Unit of measurement: bits
177    #[inline(always)]
178    pub fn volume(&self) -> f64 {
179        // Assumes a uniform binary encoding for the vocabulary is used.
180        self.length() * self.vocabulary().log2()
181    }
182
183    /// Returns the estimated difficulty required to program
184    #[inline(always)]
185    pub fn difficulty(&self) -> f64 {
186        self.u_operators() / 2. * self.operands() / self.u_operands()
187    }
188
189    /// Returns the estimated level of difficulty required to program
190    #[inline(always)]
191    pub fn level(&self) -> f64 {
192        1. / self.difficulty()
193    }
194
195    /// Returns the estimated effort required to program
196    #[inline(always)]
197    pub fn effort(&self) -> f64 {
198        self.difficulty() * self.volume()
199    }
200
201    /// Returns the estimated time required to program.
202    ///
203    /// Unit of measurement: seconds
204    #[inline(always)]
205    pub fn time(&self) -> f64 {
206        // The floating point `18.` aims to describe the processing rate of the
207        // human brain. It is called Stoud number, S, and its
208        // unit of measurement is moments/seconds.
209        // A moment is the time required by the human brain to carry out the
210        // most elementary decision.
211        // 5 <= S <= 20. Halstead uses 18.
212        // The value of S has been empirically developed from psychological
213        // reasoning, and its recommended value for
214        // programming applications is 18.
215        //
216        // Source: https://www.geeksforgeeks.org/software-engineering-halsteads-software-metrics/
217        self.effort() / 18.
218    }
219
220    /// Returns the estimated number of delivered bugs.
221    ///
222    /// This metric represents the average amount of work a programmer can do
223    /// without introducing an error.
224    #[inline(always)]
225    pub fn bugs(&self) -> f64 {
226        // The floating point `3000.` represents the number of elementary
227        // mental discriminations.
228        // A mental discrimination, in psychology, is the ability to perceive
229        // and respond to differences among stimuli.
230        //
231        // The value above is obtained starting from a constant that
232        // is different for every language and assumes that natural language is
233        // the language of the brain.
234        // For programming languages, the English language constant
235        // has been considered.
236        //
237        // After every 3000 mental discriminations a result is produced.
238        // This result, whether correct or incorrect, is more than likely
239        // either used as an input for the next operation or is output to the
240        // environment.
241        // If incorrect the error should become apparent.
242        // Thus, an opportunity for error occurs every 3000
243        // mental discriminations.
244        //
245        // Source: https://docs.lib.purdue.edu/cgi/viewcontent.cgi?article=1145&context=cstech
246        self.effort().powf(2. / 3.) / 3000.
247    }
248}
249
250#[doc(hidden)]
251pub trait Halstead
252where
253    Self: Checker,
254{
255    fn compute<'a>(_node: &Node<'a>, _code: &'a [u8], _halstead_maps: &mut HalsteadMaps<'a>) {}
256}
257
258#[inline(always)]
259fn get_id<'a>(node: &Node<'a>, code: &'a [u8]) -> &'a [u8] {
260    &code[node.object().start_byte()..node.object().end_byte()]
261}
262
263#[inline(always)]
264fn compute_halstead<'a, T: Getter>(
265    node: &Node<'a>,
266    code: &'a [u8],
267    halstead_maps: &mut HalsteadMaps<'a>,
268) {
269    match T::get_op_type(node) {
270        HalsteadType::Operator => {
271            *halstead_maps
272                .operators
273                .entry(node.object().kind_id())
274                .or_insert(0) += 1;
275        }
276        HalsteadType::Operand => {
277            *halstead_maps
278                .operands
279                .entry(get_id(node, code))
280                .or_insert(0) += 1;
281        }
282        _ => {}
283    }
284}
285
286impl Halstead for PythonCode {
287    fn compute<'a>(node: &Node<'a>, code: &'a [u8], halstead_maps: &mut HalsteadMaps<'a>) {
288        compute_halstead::<Self>(node, code, halstead_maps);
289    }
290}
291
292impl Halstead for MozjsCode {
293    fn compute<'a>(node: &Node<'a>, code: &'a [u8], halstead_maps: &mut HalsteadMaps<'a>) {
294        compute_halstead::<Self>(node, code, halstead_maps);
295    }
296}
297
298impl Halstead for JavascriptCode {
299    fn compute<'a>(node: &Node<'a>, code: &'a [u8], halstead_maps: &mut HalsteadMaps<'a>) {
300        compute_halstead::<Self>(node, code, halstead_maps);
301    }
302}
303
304impl Halstead for TypescriptCode {
305    fn compute<'a>(node: &Node<'a>, code: &'a [u8], halstead_maps: &mut HalsteadMaps<'a>) {
306        compute_halstead::<Self>(node, code, halstead_maps);
307    }
308}
309
310impl Halstead for TsxCode {
311    fn compute<'a>(node: &Node<'a>, code: &'a [u8], halstead_maps: &mut HalsteadMaps<'a>) {
312        compute_halstead::<Self>(node, code, halstead_maps);
313    }
314}
315
316impl Halstead for RustCode {
317    fn compute<'a>(node: &Node<'a>, code: &'a [u8], halstead_maps: &mut HalsteadMaps<'a>) {
318        compute_halstead::<Self>(node, code, halstead_maps);
319    }
320}
321
322impl Halstead for CppCode {
323    fn compute<'a>(node: &Node<'a>, code: &'a [u8], halstead_maps: &mut HalsteadMaps<'a>) {
324        compute_halstead::<Self>(node, code, halstead_maps);
325    }
326}
327
328impl Halstead for JavaCode {
329    fn compute<'a>(node: &Node<'a>, code: &'a [u8], halstead_maps: &mut HalsteadMaps<'a>) {
330        compute_halstead::<Self>(node, code, halstead_maps);
331    }
332}
333
334impl Halstead for PreprocCode {}
335impl Halstead for CcommentCode {}
336
337#[cfg(test)]
338mod tests {
339    use std::path::PathBuf;
340
341    use super::*;
342
343    #[test]
344    fn python_operators_and_operands() {
345        check_metrics!(
346            "def foo():
347                 def bar():
348                     def toto():
349                        a = 1 + 1
350                     b = 2 + a
351                 c = 3 + 3",
352            "foo.py",
353            PythonParser,
354            halstead,
355            [
356                (u_operators, 3, usize), // def, =, +
357                (operators, 9, usize),   // def, def, def, =, =, =, +, +, +
358                (u_operands, 9, usize),  // foo, bar, toto, a, b, c, 1, 2, 3
359                (operands, 12, usize)    // foo, bar, toto, a, b, c, 1, 1, 2, a, 3, 3
360            ]
361        );
362    }
363
364    #[test]
365    fn cpp_operators_and_operands() {
366        // Define operators and operands for C/C++ grammar according to this specification:
367        // https://www.verifysoft.com/en_halstead_metrics.html
368        // The only difference with the specification above is that
369        // primitive types are treated as operators, since the definition of a
370        // primitive type can be seen as the creation of a slot of a certain size.
371        // i.e. The `int a;` definition creates a n-bytes slot.
372        check_metrics!(
373            "main()
374            {
375              int a, b, c, avg;
376              scanf(\"%d %d %d\", &a, &b, &c);
377              avg = (a + b + c) / 3;
378              printf(\"avg = %d\", avg);
379            }",
380            "foo.c",
381            CppParser,
382            halstead,
383            [
384                (u_operators, 9, usize), // (), {}, int, &, =, +, /, ,, ;
385                (operators, 24, usize),
386                (u_operands, 10, usize), // main, a, b, c, avg, scanf, "%d %d %d", 3, printf, "avg = %d"
387                (operands, 18, usize)
388            ]
389        );
390    }
391
392    #[test]
393    fn rust_operators_and_operands() {
394        check_metrics!(
395            "fn main() {
396              let a = 5; let b = 5; let c = 5;
397              let avg = (a + b + c) / 3;
398              println!(\"{}\", avg);
399            }",
400            "foo.rs",
401            RustParser,
402            halstead,
403            [
404                // FIXME tree-sitter-rust does not parse the comma inside the println! macro
405                (u_operators, 9, usize), // fn, (), {}, let, =, +, /, ;, !
406                (operators, 22, usize),
407                (u_operands, 9, usize), // main, a, b, c, avg, 5, 3, println, "{}"
408                (operands, 15, usize)
409            ]
410        );
411    }
412
413    #[test]
414    fn javascript_operators_and_operands() {
415        check_metrics!(
416            "function main() {
417              var a, b, c, avg;
418              a = 5; b = 5; c = 5;
419              avg = (a + b + c) / 3;
420              console.log(\"{}\", avg);
421            }",
422            "foo.js",
423            JavascriptParser,
424            halstead,
425            [
426                (u_operators, 10, usize), // function, (), {}, var, =, +, /, ,, ., ;
427                (operators, 24, usize),
428                (u_operands, 11, usize), // main, a, b, c, avg, 3, 5, console.log, console, log, "{}"
429                (operands, 21, usize)
430            ]
431        );
432    }
433
434    #[test]
435    fn mozjs_operators_and_operands() {
436        check_metrics!(
437            "function main() {
438              var a, b, c, avg;
439              a = 5; b = 5; c = 5;
440              avg = (a + b + c) / 3;
441              console.log(\"{}\", avg);
442            }",
443            "foo.js",
444            MozjsParser,
445            halstead,
446            [
447                (u_operators, 10, usize), // function, (), {}, var, =, +, /, ,, ., ;
448                (operators, 24, usize),
449                (u_operands, 11, usize), // main, a, b, c, avg, 3, 5, console.log, console, log, "{}"
450                (operands, 21, usize)
451            ]
452        );
453    }
454
455    #[test]
456    fn typescript_operators_and_operands() {
457        check_metrics!(
458            "function main() {
459              var a, b, c, avg;
460              a = 5; b = 5; c = 5;
461              avg = (a + b + c) / 3;
462              console.log(\"{}\", avg);
463            }",
464            "foo.ts",
465            TypescriptParser,
466            halstead,
467            [
468                (u_operators, 10, usize), // function, (), {}, var, =, +, /, ,, ., ;
469                (operators, 24, usize),
470                (u_operands, 11, usize), // main, a, b, c, avg, 3, 5, console.log, console, log, "{}"
471                (operands, 21, usize)
472            ]
473        );
474    }
475
476    #[test]
477    fn tsx_operators_and_operands() {
478        check_metrics!(
479            "function main() {
480              var a, b, c, avg;
481              a = 5; b = 5; c = 5;
482              avg = (a + b + c) / 3;
483              console.log(\"{}\", avg);
484            }",
485            "foo.ts",
486            TsxParser,
487            halstead,
488            [
489                (u_operators, 10, usize), // function, (), {}, var, =, +, /, ,, ., ;
490                (operators, 24, usize),
491                (u_operands, 11, usize), // main, a, b, c, avg, 3, 5, console.log, console, log, "{}"
492                (operands, 21, usize)
493            ]
494        );
495    }
496
497    #[test]
498    fn python_wrong_operators() {
499        check_metrics!(
500            "()[]{}",
501            "foo.py",
502            PythonParser,
503            halstead,
504            [(u_operators, 0, usize), (operators, 0, usize)]
505        );
506    }
507
508    #[test]
509    fn python_check_metrics() {
510        check_metrics!(
511            "def f():
512                 pass",
513            "foo.py",
514            PythonParser,
515            halstead,
516            [(vocabulary, 3, usize), (length, 3, usize)],
517            [
518                (volume, 4.754_887_502_163_468),
519                (estimated_program_length, 2.0),
520                (difficulty, 1.0),
521                (effort, 4.754_887_502_163_468),
522                (purity_ratio, 0.666_666_666_666_666_6),
523                (level, 1.0),
524                (time, 0.264_160_416_786_859_36),
525                (bugs, 0.000_942_552_557_372_941_4)
526            ]
527        );
528    }
529
530    #[test]
531    fn java_operators_and_operands() {
532        check_metrics!(
533            "public class Main {
534            public static void main(String args[]) {
535                  int a, b, c, avg;
536                  a = 5; b = 5; c = 5;
537                  avg = (a + b + c) / 3;
538                  MessageFormat.format(\"{0}\", avg);
539                }
540            }",
541            "foo.java",
542            JavaParser,
543            halstead,
544            [
545                (u_operators, 16, usize), // { void ; ( String [ ] ) , int = + / format . }
546                (operators, 34, usize),
547                (u_operands, 12, usize), // Main main args a b c avg 5 3 MessageFormat format "{0}"
548                (operands, 22, usize)
549            ]
550        );
551    }
552}