Skip to main content

rust_code_analysis_code_split/metrics/
halstead.rs

1use std::collections::HashMap;
2
3use serde::Serialize;
4use serde::ser::{SerializeStruct, Serializer};
5use std::fmt;
6
7use crate::checker::Checker;
8use crate::getter::Getter;
9use crate::macros::implement_metric_trait;
10
11use crate::*;
12
13/// The `Halstead` metric suite.
14#[derive(Default, Clone, Debug)]
15pub struct Stats {
16    u_operators: u64,
17    operators: u64,
18    u_operands: u64,
19    operands: u64,
20}
21
22/// Specifies the type of nodes accepted by the `Halstead` metric.
23pub enum HalsteadType {
24    /// The node is an `Halstead` operator
25    Operator,
26    /// The node is an `Halstead` operand
27    Operand,
28    /// The node is unknown to the `Halstead` metric
29    Unknown,
30}
31
32#[derive(Debug, Default, Clone)]
33pub struct HalsteadMaps<'a> {
34    pub(crate) operators: HashMap<u16, u64>,
35    pub(crate) operands: HashMap<&'a [u8], u64>,
36}
37
38impl<'a> HalsteadMaps<'a> {
39    pub(crate) fn new() -> Self {
40        HalsteadMaps {
41            operators: HashMap::default(),
42            operands: HashMap::default(),
43        }
44    }
45
46    pub(crate) fn merge(&mut self, other: &HalsteadMaps<'a>) {
47        for (k, v) in other.operators.iter() {
48            *self.operators.entry(*k).or_insert(0) += v;
49        }
50        for (k, v) in other.operands.iter() {
51            *self.operands.entry(*k).or_insert(0) += v;
52        }
53    }
54
55    pub(crate) fn finalize(&self, stats: &mut Stats) {
56        stats.u_operators = self.operators.len() as u64;
57        stats.operators = self.operators.values().sum::<u64>();
58        stats.u_operands = self.operands.len() as u64;
59        stats.operands = self.operands.values().sum::<u64>();
60    }
61}
62
63impl Serialize for Stats {
64    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
65    where
66        S: Serializer,
67    {
68        let mut st = serializer.serialize_struct("halstead", 14)?;
69        st.serialize_field("n1", &self.u_operators())?;
70        st.serialize_field("N1", &self.operators())?;
71        st.serialize_field("n2", &self.u_operands())?;
72        st.serialize_field("N2", &self.operands())?;
73        st.serialize_field("length", &self.length())?;
74        st.serialize_field("estimated_program_length", &self.estimated_program_length())?;
75        st.serialize_field("purity_ratio", &self.purity_ratio())?;
76        st.serialize_field("vocabulary", &self.vocabulary())?;
77        st.serialize_field("volume", &self.volume())?;
78        st.serialize_field("difficulty", &self.difficulty())?;
79        st.serialize_field("level", &self.level())?;
80        st.serialize_field("effort", &self.effort())?;
81        st.serialize_field("time", &self.time())?;
82        st.serialize_field("bugs", &self.bugs())?;
83        st.end()
84    }
85}
86
87impl fmt::Display for Stats {
88    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
89        write!(
90            f,
91            "n1: {}, \
92             N1: {}, \
93             n2: {}, \
94             N2: {}, \
95             length: {}, \
96             estimated program length: {}, \
97             purity ratio: {}, \
98             size: {}, \
99             volume: {}, \
100             difficulty: {}, \
101             level: {}, \
102             effort: {}, \
103             time: {}, \
104             bugs: {}",
105            self.u_operators(),
106            self.operators(),
107            self.u_operands(),
108            self.operands(),
109            self.length(),
110            self.estimated_program_length(),
111            self.purity_ratio(),
112            self.vocabulary(),
113            self.volume(),
114            self.difficulty(),
115            self.level(),
116            self.effort(),
117            self.time(),
118            self.bugs(),
119        )
120    }
121}
122
123impl Stats {
124    pub(crate) fn merge(&mut self, _other: &Stats) {}
125
126    /// Returns `η1`, the number of distinct operators
127    #[inline(always)]
128    pub fn u_operators(&self) -> f64 {
129        self.u_operators as f64
130    }
131
132    /// Returns `N1`, the number of total operators
133    #[inline(always)]
134    pub fn operators(&self) -> f64 {
135        self.operators as f64
136    }
137
138    /// Returns `η2`, the number of distinct operands
139    #[inline(always)]
140    pub fn u_operands(&self) -> f64 {
141        self.u_operands as f64
142    }
143
144    /// Returns `N2`, the number of total operands
145    #[inline(always)]
146    pub fn operands(&self) -> f64 {
147        self.operands as f64
148    }
149
150    /// Returns the program length
151    #[inline(always)]
152    pub fn length(&self) -> f64 {
153        self.operands() + self.operators()
154    }
155
156    /// Returns the calculated estimated program length
157    #[inline(always)]
158    pub fn estimated_program_length(&self) -> f64 {
159        self.u_operators() * self.u_operators().log2()
160            + self.u_operands() * self.u_operands().log2()
161    }
162
163    /// Returns the purity ratio
164    #[inline(always)]
165    pub fn purity_ratio(&self) -> f64 {
166        self.estimated_program_length() / self.length()
167    }
168
169    /// Returns the program vocabulary
170    #[inline(always)]
171    pub fn vocabulary(&self) -> f64 {
172        self.u_operands() + self.u_operators()
173    }
174
175    /// Returns the program volume.
176    ///
177    /// Unit of measurement: bits
178    #[inline(always)]
179    pub fn volume(&self) -> f64 {
180        // Assumes a uniform binary encoding for the vocabulary is used.
181        self.length() * self.vocabulary().log2()
182    }
183
184    /// Returns the estimated difficulty required to program
185    #[inline(always)]
186    pub fn difficulty(&self) -> f64 {
187        self.u_operators() / 2. * self.operands() / self.u_operands()
188    }
189
190    /// Returns the estimated level of difficulty required to program
191    #[inline(always)]
192    pub fn level(&self) -> f64 {
193        1. / self.difficulty()
194    }
195
196    /// Returns the estimated effort required to program
197    #[inline(always)]
198    pub fn effort(&self) -> f64 {
199        self.difficulty() * self.volume()
200    }
201
202    /// Returns the estimated time required to program.
203    ///
204    /// Unit of measurement: seconds
205    #[inline(always)]
206    pub fn time(&self) -> f64 {
207        // The floating point `18.` aims to describe the processing rate of the
208        // human brain. It is called Stoud number, S, and its
209        // unit of measurement is moments/seconds.
210        // A moment is the time required by the human brain to carry out the
211        // most elementary decision.
212        // 5 <= S <= 20. Halstead uses 18.
213        // The value of S has been empirically developed from psychological
214        // reasoning, and its recommended value for
215        // programming applications is 18.
216        //
217        // Source: https://www.geeksforgeeks.org/software-engineering-halsteads-software-metrics/
218        self.effort() / 18.
219    }
220
221    /// Returns the estimated number of delivered bugs.
222    ///
223    /// This metric represents the average amount of work a programmer can do
224    /// without introducing an error.
225    #[inline(always)]
226    pub fn bugs(&self) -> f64 {
227        // The floating point `3000.` represents the number of elementary
228        // mental discriminations.
229        // A mental discrimination, in psychology, is the ability to perceive
230        // and respond to differences among stimuli.
231        //
232        // The value above is obtained starting from a constant that
233        // is different for every language and assumes that natural language is
234        // the language of the brain.
235        // For programming languages, the English language constant
236        // has been considered.
237        //
238        // After every 3000 mental discriminations a result is produced.
239        // This result, whether correct or incorrect, is more than likely
240        // either used as an input for the next operation or is output to the
241        // environment.
242        // If incorrect the error should become apparent.
243        // Thus, an opportunity for error occurs every 3000
244        // mental discriminations.
245        //
246        // Source: https://docs.lib.purdue.edu/cgi/viewcontent.cgi?article=1145&context=cstech
247        self.effort().powf(2. / 3.) / 3000.
248    }
249}
250
251pub trait Halstead
252where
253    Self: Checker,
254{
255    fn compute<'a>(node: &Node<'a>, code: &'a [u8], halstead_maps: &mut HalsteadMaps<'a>);
256}
257
258#[inline(always)]
259fn get_id<'a>(node: &Node<'a>, code: &'a [u8]) -> &'a [u8] {
260    &code[node.start_byte()..node.end_byte()]
261}
262
263#[inline(always)]
264fn compute_halstead<'a, T: Getter>(
265    node: &Node<'a>,
266    code: &'a [u8],
267    halstead_maps: &mut HalsteadMaps<'a>,
268) {
269    match T::get_op_type(node) {
270        HalsteadType::Operator => {
271            *halstead_maps.operators.entry(node.kind_id()).or_insert(0) += 1;
272        }
273        HalsteadType::Operand => {
274            *halstead_maps
275                .operands
276                .entry(get_id(node, code))
277                .or_insert(0) += 1;
278        }
279        _ => {}
280    }
281}
282
283impl Halstead for PythonCode {
284    fn compute<'a>(node: &Node<'a>, code: &'a [u8], halstead_maps: &mut HalsteadMaps<'a>) {
285        compute_halstead::<Self>(node, code, halstead_maps);
286    }
287}
288
289impl Halstead for MozjsCode {
290    fn compute<'a>(node: &Node<'a>, code: &'a [u8], halstead_maps: &mut HalsteadMaps<'a>) {
291        compute_halstead::<Self>(node, code, halstead_maps);
292    }
293}
294
295impl Halstead for JavascriptCode {
296    fn compute<'a>(node: &Node<'a>, code: &'a [u8], halstead_maps: &mut HalsteadMaps<'a>) {
297        compute_halstead::<Self>(node, code, halstead_maps);
298    }
299}
300
301impl Halstead for TypescriptCode {
302    fn compute<'a>(node: &Node<'a>, code: &'a [u8], halstead_maps: &mut HalsteadMaps<'a>) {
303        compute_halstead::<Self>(node, code, halstead_maps);
304    }
305}
306
307impl Halstead for TsxCode {
308    fn compute<'a>(node: &Node<'a>, code: &'a [u8], halstead_maps: &mut HalsteadMaps<'a>) {
309        compute_halstead::<Self>(node, code, halstead_maps);
310    }
311}
312
313impl Halstead for RustCode {
314    fn compute<'a>(node: &Node<'a>, code: &'a [u8], halstead_maps: &mut HalsteadMaps<'a>) {
315        compute_halstead::<Self>(node, code, halstead_maps);
316    }
317}
318
319impl Halstead for CppCode {
320    fn compute<'a>(node: &Node<'a>, code: &'a [u8], halstead_maps: &mut HalsteadMaps<'a>) {
321        compute_halstead::<Self>(node, code, halstead_maps);
322    }
323}
324
325impl Halstead for JavaCode {
326    fn compute<'a>(node: &Node<'a>, code: &'a [u8], halstead_maps: &mut HalsteadMaps<'a>) {
327        compute_halstead::<Self>(node, code, halstead_maps);
328    }
329}
330
331implement_metric_trait!(Halstead, KotlinCode, PreprocCode, CcommentCode);
332
333#[cfg(test)]
334mod tests {
335    use crate::tools::check_metrics;
336
337    use super::*;
338
339    #[test]
340    fn python_operators_and_operands() {
341        check_metrics::<PythonParser>(
342            "def foo():
343                 def bar():
344                     def toto():
345                        a = 1 + 1
346                     b = 2 + a
347                 c = 3 + 3",
348            "foo.py",
349            |metric| {
350                // unique operators: def, =, +
351                // operators: def, def, def, =, =, =, +, +, +
352                // unique operands: foo, bar, toto, a, b, c, 1, 2, 3
353                // operands: foo, bar, toto, a, b, c, 1, 1, 2, a, 3, 3
354                insta::assert_json_snapshot!(
355                    metric.halstead,
356                    @r###"
357                    {
358                      "n1": 3.0,
359                      "N1": 9.0,
360                      "n2": 9.0,
361                      "N2": 12.0,
362                      "length": 21.0,
363                      "estimated_program_length": 33.284212515144276,
364                      "purity_ratio": 1.584962500721156,
365                      "vocabulary": 12.0,
366                      "volume": 75.28421251514428,
367                      "difficulty": 2.0,
368                      "level": 0.5,
369                      "effort": 150.56842503028855,
370                      "time": 8.364912501682698,
371                      "bugs": 0.0094341190071077
372                    }"###
373                );
374            },
375        );
376    }
377
378    #[test]
379    fn cpp_operators_and_operands() {
380        // Define operators and operands for C/C++ grammar according to this specification:
381        // https://www.verifysoft.com/en_halstead_metrics.html
382        // The only difference with the specification above is that
383        // primitive types are treated as operators, since the definition of a
384        // primitive type can be seen as the creation of a slot of a certain size.
385        // i.e. The `int a;` definition creates a n-bytes slot.
386        check_metrics::<CppParser>(
387            "main()
388            {
389              int a, b, c, avg;
390              scanf(\"%d %d %d\", &a, &b, &c);
391              avg = (a + b + c) / 3;
392              printf(\"avg = %d\", avg);
393            }",
394            "foo.c",
395            |metric| {
396                // unique operators: (), {}, int, &, =, +, /, ,, ;
397                // unique operands: main, a, b, c, avg, scanf, "%d %d %d", 3, printf, "avg = %d"
398                insta::assert_json_snapshot!(
399                    metric.halstead,
400                    @r###"
401                    {
402                      "n1": 9.0,
403                      "N1": 24.0,
404                      "n2": 10.0,
405                      "N2": 18.0,
406                      "length": 42.0,
407                      "estimated_program_length": 61.74860596185444,
408                      "purity_ratio": 1.470204903853677,
409                      "vocabulary": 19.0,
410                      "volume": 178.41295556463058,
411                      "difficulty": 8.1,
412                      "level": 0.1234567901234568,
413                      "effort": 1445.1449400735075,
414                      "time": 80.28583000408375,
415                      "bugs": 0.04260752914034329
416                    }"###
417                );
418            },
419        );
420    }
421
422    #[test]
423    fn rust_operators_and_operands() {
424        check_metrics::<RustParser>(
425            "fn main() {
426              let a = 5; let b = 5; let c = 5;
427              let avg = (a + b + c) / 3;
428              println!(\"{}\", avg);
429            }",
430            "foo.rs",
431            |metric| {
432                // unique operators: fn, (), {}, let, =, +, /, ;, !, ,
433                // unique operands: main, a, b, c, avg, 5, 3, println, "{}"
434                insta::assert_json_snapshot!(
435                    metric.halstead,
436                    @r###"
437                    {
438                      "n1": 10.0,
439                      "N1": 23.0,
440                      "n2": 9.0,
441                      "N2": 15.0,
442                      "length": 38.0,
443                      "estimated_program_length": 61.74860596185444,
444                      "purity_ratio": 1.624963314785643,
445                      "vocabulary": 19.0,
446                      "volume": 161.42124551085624,
447                      "difficulty": 8.333333333333334,
448                      "level": 0.12,
449                      "effort": 1345.177045923802,
450                      "time": 74.7320581068779,
451                      "bugs": 0.040619232256751396
452                    }"###
453                );
454            },
455        );
456    }
457
458    #[test]
459    fn javascript_operators_and_operands() {
460        check_metrics::<JavascriptParser>(
461            "function main() {
462              var a, b, c, avg;
463              a = 5; b = 5; c = 5;
464              avg = (a + b + c) / 3;
465              console.log(\"{}\", avg);
466            }",
467            "foo.js",
468            |metric| {
469                // unique operators: function, (), {}, var, =, +, /, ,, ., ;
470                // unique operands: main, a, b, c, avg, 3, 5, console.log, console, log, "{}"
471                insta::assert_json_snapshot!(
472                    metric.halstead,
473                    @r#"
474                {
475                  "n1": 10.0,
476                  "N1": 26.0,
477                  "n2": 6.0,
478                  "N2": 14.0,
479                  "length": 40.0,
480                  "estimated_program_length": 48.72905595320056,
481                  "purity_ratio": 1.218226398830014,
482                  "vocabulary": 16.0,
483                  "volume": 160.0,
484                  "difficulty": 11.666666666666666,
485                  "level": 0.08571428571428572,
486                  "effort": 1866.6666666666665,
487                  "time": 103.7037037037037,
488                  "bugs": 0.050534727339581954
489                }
490                "#
491                );
492            },
493        );
494    }
495
496    #[test]
497    fn mozjs_operators_and_operands() {
498        check_metrics::<MozjsParser>(
499            "function main() {
500              var a, b, c, avg;
501              a = 5; b = 5; c = 5;
502              avg = (a + b + c) / 3;
503              console.log(\"{}\", avg);
504            }",
505            "foo.js",
506            |metric| {
507                // unique operators: function, (), {}, var, =, +, /, ,, ., ;
508                // unique operands: main, a, b, c, avg, 3, 5, console.log, console, log, "{}"
509                insta::assert_json_snapshot!(
510                    metric.halstead,
511                    @r###"
512                    {
513                      "n1": 10.0,
514                      "N1": 24.0,
515                      "n2": 11.0,
516                      "N2": 21.0,
517                      "length": 45.0,
518                      "estimated_program_length": 71.27302875388389,
519                      "purity_ratio": 1.583845083419642,
520                      "vocabulary": 21.0,
521                      "volume": 197.65428402504423,
522                      "difficulty": 9.545454545454545,
523                      "level": 0.10476190476190476,
524                      "effort": 1886.699983875422,
525                      "time": 104.81666577085679,
526                      "bugs": 0.05089564733125986
527                    }"###
528                );
529            },
530        );
531    }
532
533    #[test]
534    fn typescript_operators_and_operands() {
535        check_metrics::<TypescriptParser>(
536            "function main() {
537              var a, b, c, avg;
538              a = 5; b = 5; c = 5;
539              avg = (a + b + c) / 3;
540              console.log(\"{}\", avg);
541            }",
542            "foo.ts",
543            |metric| {
544                // unique operators: function, (), {}, var, =, +, /, ,, ., ;
545                // unique operands: main, a, b, c, avg, 3, 5, console.log, console, log, "{}"
546                insta::assert_json_snapshot!(
547                    metric.halstead,
548                    @r###"
549                    {
550                      "n1": 10.0,
551                      "N1": 24.0,
552                      "n2": 11.0,
553                      "N2": 21.0,
554                      "length": 45.0,
555                      "estimated_program_length": 71.27302875388389,
556                      "purity_ratio": 1.583845083419642,
557                      "vocabulary": 21.0,
558                      "volume": 197.65428402504423,
559                      "difficulty": 9.545454545454545,
560                      "level": 0.10476190476190476,
561                      "effort": 1886.699983875422,
562                      "time": 104.81666577085679,
563                      "bugs": 0.05089564733125986
564                    }"###
565                );
566            },
567        );
568    }
569
570    #[test]
571    fn tsx_operators_and_operands() {
572        check_metrics::<TsxParser>(
573            "function main() {
574              var a, b, c, avg;
575              a = 5; b = 5; c = 5;
576              avg = (a + b + c) / 3;
577              console.log(\"{}\", avg);
578            }",
579            "foo.ts",
580            |metric| {
581                // unique operators: function, (), {}, var, =, +, /, ,, ., ;
582                // unique operands: main, a, b, c, avg, 3, 5, console.log, console, log, "{}"
583                insta::assert_json_snapshot!(
584                    metric.halstead,
585                    @r###"
586                    {
587                      "n1": 10.0,
588                      "N1": 24.0,
589                      "n2": 11.0,
590                      "N2": 21.0,
591                      "length": 45.0,
592                      "estimated_program_length": 71.27302875388389,
593                      "purity_ratio": 1.583845083419642,
594                      "vocabulary": 21.0,
595                      "volume": 197.65428402504423,
596                      "difficulty": 9.545454545454545,
597                      "level": 0.10476190476190476,
598                      "effort": 1886.699983875422,
599                      "time": 104.81666577085679,
600                      "bugs": 0.05089564733125986
601                    }"###
602                );
603            },
604        );
605    }
606
607    #[test]
608    fn python_wrong_operators() {
609        check_metrics::<PythonParser>("()[]{}", "foo.py", |metric| {
610            insta::assert_json_snapshot!(
611                metric.halstead,
612                @r###"
613                    {
614                      "n1": 0.0,
615                      "N1": 0.0,
616                      "n2": 0.0,
617                      "N2": 0.0,
618                      "length": 0.0,
619                      "estimated_program_length": null,
620                      "purity_ratio": null,
621                      "vocabulary": 0.0,
622                      "volume": null,
623                      "difficulty": null,
624                      "level": null,
625                      "effort": null,
626                      "time": null,
627                      "bugs": null
628                    }"###
629            );
630        });
631    }
632
633    #[test]
634    fn python_check_metrics() {
635        check_metrics::<PythonParser>(
636            "def f():
637                 pass",
638            "foo.py",
639            |metric| {
640                insta::assert_json_snapshot!(
641                    metric.halstead,
642                    @r###"
643                    {
644                      "n1": 2.0,
645                      "N1": 2.0,
646                      "n2": 1.0,
647                      "N2": 1.0,
648                      "length": 3.0,
649                      "estimated_program_length": 2.0,
650                      "purity_ratio": 0.6666666666666666,
651                      "vocabulary": 3.0,
652                      "volume": 4.754887502163468,
653                      "difficulty": 1.0,
654                      "level": 1.0,
655                      "effort": 4.754887502163468,
656                      "time": 0.26416041678685936,
657                      "bugs": 0.0009425525573729414
658                    }"###
659                );
660            },
661        );
662    }
663
664    #[test]
665    fn java_operators_and_operands() {
666        check_metrics::<JavaParser>(
667            "public class Main {
668            public static void main(string args[]) {
669                  int a, b, c, avg;
670                  a = 5; b = 5; c = 5;
671                  avg = (a + b + c) / 3;
672                  MessageFormat.format(\"{0}\", avg);
673                }
674            }",
675            "foo.java",
676            |metric| {
677                // { void ; ( String [ ] ) , int = + / format . }
678                // Main main args a b c avg 5 3 MessageFormat format "{0}"
679                insta::assert_json_snapshot!(
680                    metric.halstead,
681                    @r###"
682                    {
683                      "n1": 10.0,
684                      "N1": 25.0,
685                      "n2": 12.0,
686                      "N2": 22.0,
687                      "length": 47.0,
688                      "estimated_program_length": 76.2388309575275,
689                      "purity_ratio": 1.6221027863303723,
690                      "vocabulary": 22.0,
691                      "volume": 209.59328607595296,
692                      "difficulty": 9.166666666666666,
693                      "level": 0.1090909090909091,
694                      "effort": 1921.2717890295687,
695                      "time": 106.73732161275382,
696                      "bugs": 0.05151550353617788
697                    }"###
698                );
699            },
700        );
701    }
702}