Skip to main content

big_code_analysis/metrics/
halstead.rs

1// Per-language metric and AST modules deliberately consume the macro-
2// generated tree-sitter token enums via `use crate::*` and `use Foo::*`
3// inside match expressions — explicit imports would list dozens of
4// variants per arm and obscure the per-language token sets that are the
5// point of these files. Allowed at the module level rather than per
6// function so the per-language impl blocks stay readable.
7#![allow(
8    clippy::doc_markdown,
9    clippy::enum_glob_use,
10    clippy::match_wildcard_for_single_variants,
11    clippy::similar_names,
12    clippy::unused_self,
13    clippy::wildcard_imports
14)]
15// Metric counts (token, function, branch, argument, etc.) are stored as
16// `usize` and crossed with `f64` averages, ratios, and Halstead scores
17// across the cyclomatic / MI / Halstead computations. The `usize as f64`
18// and `f64 as usize` casts are intentional and snapshot-anchored — every
19// site is bounded by the count it came from. Allowing the lints at the
20// module level keeps the metric arithmetic legible.
21#![allow(
22    clippy::cast_precision_loss,
23    clippy::cast_possible_truncation,
24    clippy::cast_sign_loss
25)]
26
27use std::collections::HashMap;
28
29use serde::Serialize;
30use serde::ser::{SerializeStruct, Serializer};
31use std::fmt;
32
33use crate::checker::Checker;
34use crate::getter::Getter;
35use crate::macros::implement_metric_trait;
36
37use crate::*;
38
39/// The `Halstead` metric suite.
40#[derive(Default, Clone, Debug)]
41pub struct Stats {
42    u_operators: u64,
43    operators: u64,
44    u_operands: u64,
45    operands: u64,
46}
47
48/// Specifies the type of nodes accepted by the `Halstead` metric.
49pub enum HalsteadType {
50    /// The node is an `Halstead` operator
51    Operator,
52    /// The node is an `Halstead` operand
53    Operand,
54    /// The node is unknown to the `Halstead` metric
55    Unknown,
56}
57
58/// Per-space operator / operand occurrence maps used to compute the
59/// Halstead `Stats` struct. One map per distinct operator (`kind_id`)
60/// and one per distinct operand (`text`); merged across nested spaces.
61#[derive(Debug, Default, Clone)]
62pub struct HalsteadMaps<'a> {
63    pub(crate) operators: HashMap<u16, u64>,
64    /// Primitive-type operators stored by text so each distinct primitive
65    /// (e.g. `int` vs `double`) counts as a separate distinct operator,
66    /// even when the grammar maps them all to a single kind_id.
67    pub(crate) primitive_operators: HashMap<&'a [u8], u64>,
68    pub(crate) operands: HashMap<&'a [u8], u64>,
69}
70
71impl<'a> HalsteadMaps<'a> {
72    pub(crate) fn new() -> Self {
73        Self::default()
74    }
75
76    pub(crate) fn merge(&mut self, other: &HalsteadMaps<'a>) {
77        for (k, v) in &other.operators {
78            *self.operators.entry(*k).or_insert(0) += v;
79        }
80        for (k, v) in &other.primitive_operators {
81            *self.primitive_operators.entry(*k).or_insert(0) += v;
82        }
83        for (k, v) in &other.operands {
84            *self.operands.entry(*k).or_insert(0) += v;
85        }
86    }
87
88    pub(crate) fn finalize(&self, stats: &mut Stats) {
89        stats.u_operators = (self.operators.len() + self.primitive_operators.len()) as u64;
90        stats.operators =
91            self.operators.values().sum::<u64>() + self.primitive_operators.values().sum::<u64>();
92        stats.u_operands = self.operands.len() as u64;
93        stats.operands = self.operands.values().sum::<u64>();
94    }
95}
96
97impl Serialize for Stats {
98    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
99    where
100        S: Serializer,
101    {
102        let mut st = serializer.serialize_struct("halstead", 14)?;
103        st.serialize_field("n1", &self.u_operators())?;
104        st.serialize_field("N1", &self.operators())?;
105        st.serialize_field("n2", &self.u_operands())?;
106        st.serialize_field("N2", &self.operands())?;
107        st.serialize_field("length", &self.length())?;
108        st.serialize_field("estimated_program_length", &self.estimated_program_length())?;
109        st.serialize_field("purity_ratio", &self.purity_ratio())?;
110        st.serialize_field("vocabulary", &self.vocabulary())?;
111        st.serialize_field("volume", &self.volume())?;
112        st.serialize_field("difficulty", &self.difficulty())?;
113        st.serialize_field("level", &self.level())?;
114        st.serialize_field("effort", &self.effort())?;
115        st.serialize_field("time", &self.time())?;
116        st.serialize_field("bugs", &self.bugs())?;
117        st.end()
118    }
119}
120
121impl fmt::Display for Stats {
122    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
123        write!(
124            f,
125            "n1: {}, \
126             N1: {}, \
127             n2: {}, \
128             N2: {}, \
129             length: {}, \
130             estimated program length: {}, \
131             purity ratio: {}, \
132             size: {}, \
133             volume: {}, \
134             difficulty: {}, \
135             level: {}, \
136             effort: {}, \
137             time: {}, \
138             bugs: {}",
139            self.u_operators(),
140            self.operators(),
141            self.u_operands(),
142            self.operands(),
143            self.length(),
144            self.estimated_program_length(),
145            self.purity_ratio(),
146            self.vocabulary(),
147            self.volume(),
148            self.difficulty(),
149            self.level(),
150            self.effort(),
151            self.time(),
152            self.bugs(),
153        )
154    }
155}
156
157impl Stats {
158    pub(crate) fn merge(&mut self, _other: &Stats) {}
159
160    /// Returns `η1`, the number of distinct operators
161    #[inline]
162    #[must_use]
163    pub fn u_operators(&self) -> f64 {
164        self.u_operators as f64
165    }
166
167    /// Returns `N1`, the number of total operators
168    #[inline]
169    #[must_use]
170    pub fn operators(&self) -> f64 {
171        self.operators as f64
172    }
173
174    /// Returns `η2`, the number of distinct operands
175    #[inline]
176    #[must_use]
177    pub fn u_operands(&self) -> f64 {
178        self.u_operands as f64
179    }
180
181    /// Returns `N2`, the number of total operands
182    #[inline]
183    #[must_use]
184    pub fn operands(&self) -> f64 {
185        self.operands as f64
186    }
187
188    /// Returns the program length
189    #[inline]
190    #[must_use]
191    pub fn length(&self) -> f64 {
192        self.operands() + self.operators()
193    }
194
195    /// Returns the calculated estimated program length
196    #[inline]
197    #[must_use]
198    pub fn estimated_program_length(&self) -> f64 {
199        let uo = self.u_operators();
200        let ud = self.u_operands();
201        let uo_term = if uo == 0.0 { 0.0 } else { uo * uo.log2() };
202        let ud_term = if ud == 0.0 { 0.0 } else { ud * ud.log2() };
203        uo_term + ud_term
204    }
205
206    /// Returns the purity ratio
207    #[inline]
208    #[must_use]
209    pub fn purity_ratio(&self) -> f64 {
210        let len = self.length();
211        if len == 0.0 {
212            0.0
213        } else {
214            self.estimated_program_length() / len
215        }
216    }
217
218    /// Returns the program vocabulary
219    #[inline]
220    #[must_use]
221    pub fn vocabulary(&self) -> f64 {
222        self.u_operands() + self.u_operators()
223    }
224
225    /// Returns the program volume.
226    ///
227    /// Unit of measurement: bits
228    #[inline]
229    #[must_use]
230    pub fn volume(&self) -> f64 {
231        // Assumes a uniform binary encoding for the vocabulary is used.
232        let vocab = self.vocabulary();
233        if vocab <= 1.0 {
234            0.0
235        } else {
236            self.length() * vocab.log2()
237        }
238    }
239
240    /// Returns the estimated difficulty required to program
241    #[inline]
242    #[must_use]
243    pub fn difficulty(&self) -> f64 {
244        let ud = self.u_operands();
245        if ud == 0.0 {
246            0.0
247        } else {
248            self.u_operators() / 2. * self.operands() / ud
249        }
250    }
251
252    /// Returns the estimated level of difficulty required to program
253    #[inline]
254    #[must_use]
255    pub fn level(&self) -> f64 {
256        let d = self.difficulty();
257        if d == 0.0 { 0.0 } else { 1. / d }
258    }
259
260    /// Returns the estimated effort required to program
261    #[inline]
262    #[must_use]
263    pub fn effort(&self) -> f64 {
264        self.difficulty() * self.volume()
265    }
266
267    /// Returns the estimated time required to program.
268    ///
269    /// Unit of measurement: seconds
270    #[inline]
271    #[must_use]
272    pub fn time(&self) -> f64 {
273        // The floating point `18.` aims to describe the processing rate of the
274        // human brain. It is called Stoud number, S, and its
275        // unit of measurement is moments/seconds.
276        // A moment is the time required by the human brain to carry out the
277        // most elementary decision.
278        // 5 <= S <= 20. Halstead uses 18.
279        // The value of S has been empirically developed from psychological
280        // reasoning, and its recommended value for
281        // programming applications is 18.
282        //
283        // Source: https://www.geeksforgeeks.org/software-engineering-halsteads-software-metrics/
284        self.effort() / 18.
285    }
286
287    /// Returns the estimated number of delivered bugs.
288    ///
289    /// This metric represents the average amount of work a programmer can do
290    /// without introducing an error.
291    #[inline]
292    #[must_use]
293    pub fn bugs(&self) -> f64 {
294        // The floating point `3000.` represents the number of elementary
295        // mental discriminations.
296        // A mental discrimination, in psychology, is the ability to perceive
297        // and respond to differences among stimuli.
298        //
299        // The value above is obtained starting from a constant that
300        // is different for every language and assumes that natural language is
301        // the language of the brain.
302        // For programming languages, the English language constant
303        // has been considered.
304        //
305        // After every 3000 mental discriminations a result is produced.
306        // This result, whether correct or incorrect, is more than likely
307        // either used as an input for the next operation or is output to the
308        // environment.
309        // If incorrect the error should become apparent.
310        // Thus, an opportunity for error occurs every 3000
311        // mental discriminations.
312        //
313        // Source: https://docs.lib.purdue.edu/cgi/viewcontent.cgi?article=1145&context=cstech
314        self.effort().powf(2. / 3.) / 3000.
315    }
316}
317
318#[doc(hidden)]
319/// Per-language extraction of Halstead operator/operand maps.
320pub trait Halstead
321where
322    Self: Checker + Getter,
323{
324    /// Walk `node` and update `stats` with this metric for the language
325    /// implementing the trait.
326    fn compute<'a>(node: &Node<'a>, code: &'a [u8], halstead_maps: &mut HalsteadMaps<'a>);
327}
328
329#[inline]
330fn get_id<'a>(node: &Node<'a>, code: &'a [u8]) -> &'a [u8] {
331    &code[node.start_byte()..node.end_byte()]
332}
333
334#[inline]
335fn compute_halstead<'a, T: Getter + Checker>(
336    node: &Node<'a>,
337    code: &'a [u8],
338    halstead_maps: &mut HalsteadMaps<'a>,
339) {
340    match T::get_op_type(node) {
341        HalsteadType::Operator => {
342            if T::is_primitive(node.kind_id()) {
343                // Store primitive-type operators by text so distinct
344                // primitives (e.g. `int` vs `double`) that share a
345                // single kind_id are counted separately in n1/N1.
346                *halstead_maps
347                    .primitive_operators
348                    .entry(get_id(node, code))
349                    .or_insert(0) += 1;
350            } else {
351                *halstead_maps.operators.entry(node.kind_id()).or_insert(0) += 1;
352            }
353        }
354        HalsteadType::Operand => {
355            *halstead_maps
356                .operands
357                .entry(get_id(node, code))
358                .or_insert(0) += 1;
359        }
360        _ => {}
361    }
362}
363
364impl Halstead for PythonCode {
365    fn compute<'a>(node: &Node<'a>, code: &'a [u8], halstead_maps: &mut HalsteadMaps<'a>) {
366        compute_halstead::<Self>(node, code, halstead_maps);
367    }
368}
369
370impl Halstead for MozjsCode {
371    fn compute<'a>(node: &Node<'a>, code: &'a [u8], halstead_maps: &mut HalsteadMaps<'a>) {
372        compute_halstead::<Self>(node, code, halstead_maps);
373    }
374}
375
376impl Halstead for JavascriptCode {
377    fn compute<'a>(node: &Node<'a>, code: &'a [u8], halstead_maps: &mut HalsteadMaps<'a>) {
378        compute_halstead::<Self>(node, code, halstead_maps);
379    }
380}
381
382impl Halstead for TypescriptCode {
383    fn compute<'a>(node: &Node<'a>, code: &'a [u8], halstead_maps: &mut HalsteadMaps<'a>) {
384        compute_halstead::<Self>(node, code, halstead_maps);
385    }
386}
387
388impl Halstead for TsxCode {
389    fn compute<'a>(node: &Node<'a>, code: &'a [u8], halstead_maps: &mut HalsteadMaps<'a>) {
390        compute_halstead::<Self>(node, code, halstead_maps);
391    }
392}
393
394impl Halstead for RustCode {
395    fn compute<'a>(node: &Node<'a>, code: &'a [u8], halstead_maps: &mut HalsteadMaps<'a>) {
396        compute_halstead::<Self>(node, code, halstead_maps);
397    }
398}
399
400impl Halstead for CppCode {
401    fn compute<'a>(node: &Node<'a>, code: &'a [u8], halstead_maps: &mut HalsteadMaps<'a>) {
402        compute_halstead::<Self>(node, code, halstead_maps);
403    }
404}
405
406impl Halstead for JavaCode {
407    fn compute<'a>(node: &Node<'a>, code: &'a [u8], halstead_maps: &mut HalsteadMaps<'a>) {
408        compute_halstead::<Self>(node, code, halstead_maps);
409    }
410}
411
412impl Halstead for GroovyCode {
413    fn compute<'a>(node: &Node<'a>, code: &'a [u8], halstead_maps: &mut HalsteadMaps<'a>) {
414        compute_halstead::<Self>(node, code, halstead_maps);
415    }
416}
417
418impl Halstead for CsharpCode {
419    fn compute<'a>(node: &Node<'a>, code: &'a [u8], halstead_maps: &mut HalsteadMaps<'a>) {
420        compute_halstead::<Self>(node, code, halstead_maps);
421    }
422}
423
424impl Halstead for GoCode {
425    fn compute<'a>(node: &Node<'a>, code: &'a [u8], halstead_maps: &mut HalsteadMaps<'a>) {
426        compute_halstead::<Self>(node, code, halstead_maps);
427    }
428}
429
430impl Halstead for PerlCode {
431    fn compute<'a>(node: &Node<'a>, code: &'a [u8], halstead_maps: &mut HalsteadMaps<'a>) {
432        compute_halstead::<Self>(node, code, halstead_maps);
433    }
434}
435
436impl Halstead for KotlinCode {
437    fn compute<'a>(node: &Node<'a>, code: &'a [u8], halstead_maps: &mut HalsteadMaps<'a>) {
438        compute_halstead::<Self>(node, code, halstead_maps);
439    }
440}
441
442impl Halstead for LuaCode {
443    fn compute<'a>(node: &Node<'a>, code: &'a [u8], halstead_maps: &mut HalsteadMaps<'a>) {
444        compute_halstead::<Self>(node, code, halstead_maps);
445    }
446}
447
448impl Halstead for PhpCode {
449    fn compute<'a>(node: &Node<'a>, code: &'a [u8], halstead_maps: &mut HalsteadMaps<'a>) {
450        compute_halstead::<Self>(node, code, halstead_maps);
451    }
452}
453
454// Real defaults — no operators / operands to count. Audited in #188.
455implement_metric_trait!(Halstead, PreprocCode, CcommentCode);
456
457impl Halstead for RubyCode {
458    fn compute<'a>(node: &Node<'a>, code: &'a [u8], halstead_maps: &mut HalsteadMaps<'a>) {
459        compute_halstead::<Self>(node, code, halstead_maps);
460    }
461}
462
463impl Halstead for ElixirCode {
464    fn compute<'a>(node: &Node<'a>, code: &'a [u8], halstead_maps: &mut HalsteadMaps<'a>) {
465        compute_halstead::<Self>(node, code, halstead_maps);
466    }
467}
468
469impl Halstead for BashCode {
470    fn compute<'a>(node: &Node<'a>, code: &'a [u8], halstead_maps: &mut HalsteadMaps<'a>) {
471        compute_halstead::<Self>(node, code, halstead_maps);
472    }
473}
474
475impl Halstead for TclCode {
476    fn compute<'a>(node: &Node<'a>, code: &'a [u8], halstead_maps: &mut HalsteadMaps<'a>) {
477        compute_halstead::<Self>(node, code, halstead_maps);
478    }
479}
480
481#[cfg(test)]
482#[allow(
483    clippy::float_cmp,
484    clippy::cast_precision_loss,
485    clippy::cast_possible_truncation,
486    clippy::cast_sign_loss,
487    clippy::similar_names,
488    clippy::doc_markdown,
489    clippy::needless_raw_string_hashes,
490    clippy::too_many_lines
491)]
492mod tests {
493    use crate::tools::check_metrics;
494
495    use super::*;
496
497    #[test]
498    fn python_operators_and_operands() {
499        check_metrics::<PythonParser>(
500            "def foo():
501                 def bar():
502                     def toto():
503                        a = 1 + 1
504                     b = 2 + a
505                 c = 3 + 3",
506            "foo.py",
507            |metric| {
508                // unique operators: def, =, +
509                // operators: def, def, def, =, =, =, +, +, +
510                // unique operands: foo, bar, toto, a, b, c, 1, 2, 3
511                // operands: foo, bar, toto, a, b, c, 1, 1, 2, a, 3, 3
512                insta::assert_json_snapshot!(
513                    metric.halstead,
514                    @r###"
515                    {
516                      "n1": 3.0,
517                      "N1": 9.0,
518                      "n2": 9.0,
519                      "N2": 12.0,
520                      "length": 21.0,
521                      "estimated_program_length": 33.284212515144276,
522                      "purity_ratio": 1.584962500721156,
523                      "vocabulary": 12.0,
524                      "volume": 75.28421251514428,
525                      "difficulty": 2.0,
526                      "level": 0.5,
527                      "effort": 150.56842503028855,
528                      "time": 8.364912501682698,
529                      "bugs": 0.0094341190071077
530                    }"###
531                );
532            },
533        );
534    }
535
536    /// Pointer-arithmetic operators: `*` (dereference), `&` (address-of),
537    /// `->` (member-of-pointer), `+` (pointer + offset). Each is counted
538    /// once in `n1`; multiple uses bump `N1`. The headline integer values
539    /// (`u_operators`, `u_operands`) anchor the snapshot per the
540    /// snapshot-anchor policy.
541    #[test]
542    fn c_pointer_arithmetic_operators() {
543        check_metrics::<CppParser>(
544            "int g(int* p, int* q) {
545                 return *(p + 1) + *q;
546             }",
547            "foo.c",
548            |metric| {
549                // Unique operators: int, *, (), {, }, +, ;, return  (= 8)
550                //   `*` covers both pointer-type and dereference; the grammar
551                //   does NOT split them.  `,` does not appear (only one
552                //   parameter on each side of the body).
553                // Unique operands: g, p, q, 1                       (= 4)
554                assert_eq!(metric.halstead.u_operators(), 8.0);
555                assert_eq!(metric.halstead.u_operands(), 4.0);
556                insta::assert_json_snapshot!(metric.halstead);
557            },
558        );
559    }
560
561    /// Bitwise (`&`, `|`, `^`, `~`, `<<`, `>>`) and logical (`&&`, `||`,
562    /// `!`) operators are distinct kind_ids and count as separate unique
563    /// operators in Halstead.  `&` (bitwise-and) and `&&` (logical-and)
564    /// must NOT collapse, even though both render as ampersands.
565    #[test]
566    fn c_bitwise_and_logical_operators() {
567        check_metrics::<CppParser>(
568            "int f(int a, int b) {
569                 int x = (a & b) | (a ^ b);
570                 int y = ~a;
571                 int z = (a << 1) >> 2;
572                 return (a && b) || !x;
573             }",
574            "foo.c",
575            |metric| {
576                // Expect: 6 bitwise op kinds (& | ^ ~ << >>), 3 logical (&& || !).
577                // Plus int, (), {, }, =, ;, return, , — 8 syntactic / arithmetic
578                // operator kinds.  Six bitwise + three logical + eight = 17 unique
579                // operators is the upper bound; actuals depend on grammar collapse,
580                // so we assert a lower-bound and anchor via snapshot below.
581                let s = &metric.halstead;
582                assert!(
583                    s.u_operators() >= 14.0,
584                    "expected >= 14 unique operators (bitwise + logical + syntax), got {}",
585                    s.u_operators(),
586                );
587                assert_eq!(s.u_operands(), 8.0); // f, a, b, x, y, z, 1, 2
588                insta::assert_json_snapshot!(metric.halstead);
589            },
590        );
591    }
592
593    /// Increment / decrement (`++`, `--`) and `sizeof` / cast operators
594    /// each contribute distinct unique operators.  C-style casts in the
595    /// tree-sitter grammar surface as `cast_expression` with the type
596    /// token classified as a primitive_type operator.
597    #[test]
598    fn c_increment_decrement_and_sizeof() {
599        check_metrics::<CppParser>(
600            "void f(int* p) {
601                 int n = sizeof(int);
602                 ++p;
603                 --n;
604                 long w = (long) n;
605             }",
606            "foo.c",
607            |metric| {
608                // Unique operators include: void, int, long, *, =, sizeof, ++, --, (), {, }, ;
609                // Unique operands: f, p, n, w
610                let s = &metric.halstead;
611                assert!(
612                    s.u_operators() >= 10.0,
613                    "expected >= 10 unique operators including ++ / -- / sizeof / cast, got {}",
614                    s.u_operators(),
615                );
616                assert_eq!(s.u_operands(), 4.0);
617                insta::assert_json_snapshot!(metric.halstead);
618            },
619        );
620    }
621
622    #[test]
623    fn cpp_operators_and_operands() {
624        // Define operators and operands for C/C++ grammar according to this specification:
625        // https://www.verifysoft.com/en_halstead_metrics.html
626        // The only difference with the specification above is that
627        // primitive types are treated as operators, since the definition of a
628        // primitive type can be seen as the creation of a slot of a certain size.
629        // i.e. The `int a;` definition creates a n-bytes slot.
630        check_metrics::<CppParser>(
631            "main()
632            {
633              int a, b, c, avg;
634              scanf(\"%d %d %d\", &a, &b, &c);
635              avg = (a + b + c) / 3;
636              printf(\"avg = %d\", avg);
637            }",
638            "foo.c",
639            |metric| {
640                // unique operators: (), {}, int, &, =, +, /, ,, ;
641                // unique operands: main, a, b, c, avg, scanf, "%d %d %d", 3, printf, "avg = %d"
642                insta::assert_json_snapshot!(
643                    metric.halstead,
644                    @r###"
645                    {
646                      "n1": 9.0,
647                      "N1": 24.0,
648                      "n2": 10.0,
649                      "N2": 18.0,
650                      "length": 42.0,
651                      "estimated_program_length": 61.74860596185444,
652                      "purity_ratio": 1.470204903853677,
653                      "vocabulary": 19.0,
654                      "volume": 178.41295556463058,
655                      "difficulty": 8.1,
656                      "level": 0.1234567901234568,
657                      "effort": 1445.1449400735075,
658                      "time": 80.28583000408375,
659                      "bugs": 0.04260752914034329
660                    }"###
661                );
662            },
663        );
664    }
665
666    /// C++20 spaceship operator `<=>` (`Cpp::LTEQGT`) is a comparison
667    /// operator and must be counted in Halstead, like its sibling
668    /// comparison operators `<`, `>`, `<=`, `>=`, `==`, `!=`. Prior to
669    /// this fix it fell through to the `Unknown` arm and was silently
670    /// dropped from `n1` / `N1`, under-reporting volume / effort on any
671    /// C++20+ codebase that defines `operator<=>`. Regression test for
672    /// issue #197.
673    #[test]
674    fn cpp_spaceship_operator_is_halstead_operator() {
675        check_metrics::<CppParser>(
676            "int f(int a, int b) {
677                 return (a <=> b) != 0;
678             }",
679            "foo.cpp",
680            |metric| {
681                // Unique operators (grammar collapses matched delimiters
682                // to a single kind_id): int, (), {}, <=>, !=, return, ;, ,
683                //   `<=>` is the regression target — without the fix it
684                //   would be Unknown and `u_operators` would be 7.
685                // Unique operands: f, a, b, 0
686                let s = &metric.halstead;
687                assert_eq!(s.u_operators(), 8.0);
688                assert_eq!(s.u_operands(), 4.0);
689                insta::assert_json_snapshot!(
690                    s,
691                    @r###"
692                    {
693                      "n1": 8.0,
694                      "N1": 11.0,
695                      "n2": 4.0,
696                      "N2": 6.0,
697                      "length": 17.0,
698                      "estimated_program_length": 32.0,
699                      "purity_ratio": 1.8823529411764706,
700                      "vocabulary": 12.0,
701                      "volume": 60.94436251225965,
702                      "difficulty": 6.0,
703                      "level": 0.16666666666666666,
704                      "effort": 365.6661750735579,
705                      "time": 20.31478750408655,
706                      "bugs": 0.01704519358507665
707                    }"###
708                );
709            },
710        );
711    }
712
713    /// C++ compound subtract-assign `-=` (`Cpp::DASHEQ`) must be counted
714    /// in Halstead like every other compound assignment (`+=`, `*=`,
715    /// `/=`, etc.). Prior to the fix it fell through to the `Unknown`
716    /// arm and was silently dropped from `n1` / `N1` — under-reporting
717    /// volume / effort wherever C++ code subtracts in place. Regression
718    /// test for issue #198.
719    #[test]
720    fn cpp_dash_eq_is_halstead_operator() {
721        check_metrics::<CppParser>("void f(int a, int b) { a -= b; }", "foo.cpp", |metric| {
722            // Unique operators: void, (), {}, int, ,, -=, ;
723            //   `-=` is the regression target — without the fix it
724            //   would be Unknown and `u_operators` would be 6.
725            // Unique operands: f, a, b
726            let s = &metric.halstead;
727            assert_eq!(s.u_operators(), 7.0);
728            assert_eq!(s.u_operands(), 3.0);
729        });
730    }
731
732    /// C++ pointer-to-member access `.*` (`Cpp::DOTSTAR`) must be
733    /// counted in Halstead. Prior to the fix it fell through to the
734    /// `Unknown` arm and was silently dropped from `n1` / `N1`.
735    /// Regression test for issue #198.
736    ///
737    /// The snippet uses an `operator.*` declaration because that is
738    /// where the C++ tree-sitter grammar reliably emits a single
739    /// `DOTSTAR` leaf; in expression position (`a.*b`) some grammar
740    /// versions split the token into `DOT` + `STAR` and the regression
741    /// would be masked.
742    #[test]
743    fn cpp_dot_star_is_halstead_operator() {
744        check_metrics::<CppParser>("struct S { void operator.*(int); };", "foo.cpp", |metric| {
745            // Unique operators with fix: {}, ;, (), int, void, .*
746            //   `.*` is the regression target — without the fix it
747            //   falls through to `Unknown` and `u_operators` is 5.
748            // Unique operands: S
749            let s = &metric.halstead;
750            assert_eq!(s.u_operators(), 6.0);
751            assert_eq!(s.u_operands(), 1.0);
752        });
753    }
754
755    /// C++ pointer-to-member access through pointer `->*`
756    /// (`Cpp::DASHGTSTAR`) must be counted in Halstead. Prior to the
757    /// fix it fell through to the `Unknown` arm and was silently
758    /// dropped from `n1` / `N1`. Regression test for issue #198.
759    ///
760    /// The snippet uses an `operator->*` declaration because that is
761    /// where the C++ tree-sitter grammar reliably emits a single
762    /// `DASHGTSTAR` leaf; in expression position (`a->*b`) the grammar
763    /// splits the token into `DASHGT` + `STAR` and the regression would
764    /// be masked.
765    #[test]
766    fn cpp_dash_gt_star_is_halstead_operator() {
767        check_metrics::<CppParser>(
768            "struct S { void operator->*(int); };",
769            "foo.cpp",
770            |metric| {
771                // Unique operators with fix: {}, ;, (), int, void, ->*
772                //   `->*` is the regression target — without the fix it
773                //   falls through to `Unknown` and `u_operators` is 5.
774                // Unique operands: S
775                let s = &metric.halstead;
776                assert_eq!(s.u_operators(), 6.0);
777                assert_eq!(s.u_operands(), 1.0);
778            },
779        );
780    }
781
782    #[test]
783    fn rust_operators_and_operands() {
784        check_metrics::<RustParser>(
785            "fn main() {
786              let a = 5; let b = 5; let c = 5;
787              let avg = (a + b + c) / 3;
788              println!(\"{}\", avg);
789            }",
790            "foo.rs",
791            |metric| {
792                // unique operators: fn, (), {}, let, =, +, /, ;, !, ,
793                // unique operands: main, a, b, c, avg, 5, 3, println, "{}"
794                insta::assert_json_snapshot!(
795                    metric.halstead,
796                    @r###"
797                    {
798                      "n1": 10.0,
799                      "N1": 23.0,
800                      "n2": 9.0,
801                      "N2": 15.0,
802                      "length": 38.0,
803                      "estimated_program_length": 61.74860596185444,
804                      "purity_ratio": 1.624963314785643,
805                      "vocabulary": 19.0,
806                      "volume": 161.42124551085624,
807                      "difficulty": 8.333333333333334,
808                      "level": 0.12,
809                      "effort": 1345.177045923802,
810                      "time": 74.7320581068779,
811                      "bugs": 0.040619232256751396
812                    }"###
813                );
814            },
815        );
816    }
817
818    #[test]
819    fn rust_aliased_primitive_type_classification() {
820        // Regression for issue #95 (lesson #2): the Rust grammar emits 17
821        // distinct `kind_id`s for `primitive_type` (one base plus 16
822        // numeric-suffixed alias variants). `RustCode::is_primitive` in
823        // `src/checker.rs` must list every variant; if a future regression
824        // omits one, primitive type names emitted in that aliased position
825        // silently drop into the kind_id-keyed operators bucket instead of
826        // the text-keyed primitive_operators map, miscounting Halstead n1.
827        //
828        // The snippet exercises every primitive scalar type across many
829        // syntactic positions (function parameter types, return types,
830        // let-binding annotations, `as` casts, const items, type aliases,
831        // struct fields, function pointer types, tuple types, array types,
832        // reference types, generic type arguments). Empirically, ordinary
833        // Rust source emits the base `Rust::PrimitiveType` variant from
834        // all of these positions; the 16 suffixed alias variants are
835        // produced by specific grammar productions not reachable from
836        // user-written code. Mutation-verified: dropping
837        // `Rust::PrimitiveType` from `is_primitive` fails this test
838        // (u_operators 30→15). Dropping any single suffixed variant
839        // currently leaves the test passing; if a future grammar bump
840        // makes any suffixed variant reachable from idiomatic source,
841        // extend the snippet so the test fires for that variant too.
842        check_metrics::<RustParser>(
843            "const C: u8 = 0;
844            type T = i64;
845            struct S { x: u32, y: u64 }
846            fn g(p: fn(u8) -> u16) -> bool { let _ = p(0); true }
847            fn f(a: u8, b: u16, c: u32, d: u64) -> u128 {
848                let _x: i8 = 0;
849                let _y: i16 = 0;
850                let _z: i32 = 0;
851                let _w: i64 = 0;
852                let _v: i128 = 0;
853                let _p: f32 = 1.0;
854                let _q: f64 = 2.0;
855                let _r: bool = true;
856                let _s: char = 'x';
857                let _t: usize = 0;
858                let _u: isize = 0;
859                let _arr: [u32; 4] = [0; 4];
860                let _ref: &u8 = &0;
861                let _tup: (u32, u64) = (0, 0);
862                let _opt: Option<u32> = None;
863                a as u128 + b as u128 + c as u128 + d
864            }",
865            "foo.rs",
866            |metric| {
867                // Headline: u_operators is the load-bearing assertion —
868                // the 16 distinct primitive type names dedupe by text in
869                // the primitive_operators map. Total operators (N1) and
870                // operand counts pin the rest of the Halstead state.
871                assert_eq!(metric.halstead.u_operators(), 30.0);
872                assert_eq!(metric.halstead.operators(), 118.0);
873                assert_eq!(metric.halstead.u_operands(), 31.0);
874                assert_eq!(metric.halstead.operands(), 50.0);
875            },
876        );
877    }
878
879    #[test]
880    fn javascript_operators_and_operands() {
881        check_metrics::<JavascriptParser>(
882            "function main() {
883              var a, b, c, avg;
884              a = 5; b = 5; c = 5;
885              avg = (a + b + c) / 3;
886              console.log(\"{}\", avg);
887            }",
888            "foo.js",
889            |metric| {
890                // unique operators: function, (), {}, var, =, +, /, ,, ., ;
891                // unique operands: main, a, b, c, avg, 3, 5, console.log, console, log, "{}"
892                insta::assert_json_snapshot!(
893                    metric.halstead,
894                    @r###"
895                    {
896                      "n1": 10.0,
897                      "N1": 24.0,
898                      "n2": 11.0,
899                      "N2": 21.0,
900                      "length": 45.0,
901                      "estimated_program_length": 71.27302875388389,
902                      "purity_ratio": 1.583845083419642,
903                      "vocabulary": 21.0,
904                      "volume": 197.65428402504423,
905                      "difficulty": 9.545454545454545,
906                      "level": 0.10476190476190476,
907                      "effort": 1886.699983875422,
908                      "time": 104.81666577085679,
909                      "bugs": 0.05089564733125986
910                    }"###
911                );
912            },
913        );
914    }
915
916    #[test]
917    fn mozjs_operators_and_operands() {
918        check_metrics::<MozjsParser>(
919            "function main() {
920              var a, b, c, avg;
921              a = 5; b = 5; c = 5;
922              avg = (a + b + c) / 3;
923              console.log(\"{}\", avg);
924            }",
925            "foo.js",
926            |metric| {
927                // unique operators: function, (), {}, var, =, +, /, ,, ., ;
928                // unique operands: main, a, b, c, avg, 3, 5, console.log, console, log, "{}"
929                insta::assert_json_snapshot!(
930                    metric.halstead,
931                    @r###"
932                    {
933                      "n1": 10.0,
934                      "N1": 24.0,
935                      "n2": 11.0,
936                      "N2": 21.0,
937                      "length": 45.0,
938                      "estimated_program_length": 71.27302875388389,
939                      "purity_ratio": 1.583845083419642,
940                      "vocabulary": 21.0,
941                      "volume": 197.65428402504423,
942                      "difficulty": 9.545454545454545,
943                      "level": 0.10476190476190476,
944                      "effort": 1886.699983875422,
945                      "time": 104.81666577085679,
946                      "bugs": 0.05089564733125986
947                    }"###
948                );
949            },
950        );
951    }
952
953    #[test]
954    fn typescript_operators_and_operands() {
955        check_metrics::<TypescriptParser>(
956            "function main() {
957              var a, b, c, avg;
958              a = 5; b = 5; c = 5;
959              avg = (a + b + c) / 3;
960              console.log(\"{}\", avg);
961            }",
962            "foo.ts",
963            |metric| {
964                // unique operators: function, (), {}, var, =, +, /, ,, ., ;
965                // unique operands: main, a, b, c, avg, 3, 5, console.log, console, log, "{}"
966                insta::assert_json_snapshot!(
967                    metric.halstead,
968                    @r###"
969                    {
970                      "n1": 10.0,
971                      "N1": 24.0,
972                      "n2": 11.0,
973                      "N2": 21.0,
974                      "length": 45.0,
975                      "estimated_program_length": 71.27302875388389,
976                      "purity_ratio": 1.583845083419642,
977                      "vocabulary": 21.0,
978                      "volume": 197.65428402504423,
979                      "difficulty": 9.545454545454545,
980                      "level": 0.10476190476190476,
981                      "effort": 1886.699983875422,
982                      "time": 104.81666577085679,
983                      "bugs": 0.05089564733125986
984                    }"###
985                );
986            },
987        );
988    }
989
990    #[test]
991    fn tsx_operators_and_operands() {
992        check_metrics::<TsxParser>(
993            "function main() {
994              var a, b, c, avg;
995              a = 5; b = 5; c = 5;
996              avg = (a + b + c) / 3;
997              console.log(\"{}\", avg);
998            }",
999            "foo.ts",
1000            |metric| {
1001                // unique operators: function, (), {}, var, =, +, /, ,, ., ;
1002                // unique operands: main, a, b, c, avg, 3, 5, console.log, console, log, "{}"
1003                insta::assert_json_snapshot!(
1004                    metric.halstead,
1005                    @r###"
1006                    {
1007                      "n1": 10.0,
1008                      "N1": 24.0,
1009                      "n2": 11.0,
1010                      "N2": 21.0,
1011                      "length": 45.0,
1012                      "estimated_program_length": 71.27302875388389,
1013                      "purity_ratio": 1.583845083419642,
1014                      "vocabulary": 21.0,
1015                      "volume": 197.65428402504423,
1016                      "difficulty": 9.545454545454545,
1017                      "level": 0.10476190476190476,
1018                      "effort": 1886.699983875422,
1019                      "time": 104.81666577085679,
1020                      "bugs": 0.05089564733125986
1021                    }"###
1022                );
1023            },
1024        );
1025    }
1026
1027    #[test]
1028    fn javascript_template_string_plain_is_operand() {
1029        // Regression: issue #192. A backtick-delimited `` `hello` ``
1030        // without `${...}` is semantically identical to `"hello"` /
1031        // `'hello'` and must contribute exactly one operand — before
1032        // the fix `TemplateString` fell through to `HalsteadType::Unknown`
1033        // and contributed zero. expected: operands are `f` (function
1034        // name) and the wrapping `` `hello` `` template literal →
1035        // u_operands = 2, N2 = 2 (matches the equivalent
1036        // `function f() { return "hello"; }` baseline).
1037        check_metrics::<JavascriptParser>("function f() { return `hello`; }", "foo.js", |metric| {
1038            assert_eq!(metric.halstead.u_operands(), 2.0);
1039            assert_eq!(metric.halstead.operands(), 2.0);
1040        });
1041    }
1042
1043    #[test]
1044    fn javascript_template_string_interpolation_no_double_count() {
1045        // Regression: issue #192. An interpolated template literal
1046        // `` `Hi ${name}!` `` used to fall through to `Unknown`,
1047        // dropping the wrapper from the count entirely; the inner
1048        // `name` was still walked and counted via the
1049        // `TemplateSubstitution` child. Mirrors #183 (C#), #191
1050        // (Kotlin), #199 (Perl): the wrapper is skipped when a
1051        // `TemplateSubstitution` child is present so the inner
1052        // expression is not double-counted.
1053        //
1054        // expected: for `function f(name) { return ` + "`Hi ${name}!`"
1055        // + `; }`, operands are `f` and `name` (twice — `name` as the
1056        // parameter, then again inside the interpolation), so
1057        // u_operands = 2 and N2 = 3. Without the wrapper-skip guard
1058        // the wrapping literal would also be counted, lifting
1059        // u_operands to 3 and N2 to 4.
1060        check_metrics::<JavascriptParser>(
1061            "function f(name) { return `Hi ${name}!`; }",
1062            "foo.js",
1063            |metric| {
1064                assert_eq!(metric.halstead.u_operands(), 2.0);
1065                assert_eq!(metric.halstead.operands(), 3.0);
1066            },
1067        );
1068    }
1069
1070    #[test]
1071    fn mozjs_template_string_plain_is_operand() {
1072        // Regression: issue #192. Mirrors
1073        // `javascript_template_string_plain_is_operand` for the
1074        // Firefox-mode dialect — the four JS-family `get_op_type`
1075        // impls share the same template-literal handling.
1076        check_metrics::<MozjsParser>("function f() { return `hello`; }", "foo.js", |metric| {
1077            assert_eq!(metric.halstead.u_operands(), 2.0);
1078            assert_eq!(metric.halstead.operands(), 2.0);
1079        });
1080    }
1081
1082    #[test]
1083    fn mozjs_template_string_interpolation_no_double_count() {
1084        // Regression: issue #192. Mirrors
1085        // `javascript_template_string_interpolation_no_double_count`
1086        // for the Firefox-mode dialect.
1087        check_metrics::<MozjsParser>(
1088            "function f(name) { return `Hi ${name}!`; }",
1089            "foo.js",
1090            |metric| {
1091                assert_eq!(metric.halstead.u_operands(), 2.0);
1092                assert_eq!(metric.halstead.operands(), 3.0);
1093            },
1094        );
1095    }
1096
1097    #[test]
1098    fn typescript_template_string_plain_is_operand() {
1099        // Regression: issue #192. Mirrors
1100        // `javascript_template_string_plain_is_operand` for
1101        // TypeScript — the four JS-family `get_op_type` impls share
1102        // the same template-literal handling.
1103        //
1104        // After #313 the `: string` annotation's `String2` child also
1105        // counts as an operand (text `"string"`), so unique operands
1106        // are `f`, `` `hello` ``, `string` (3 each). The headline of
1107        // this test — that the plain template literal contributes one
1108        // operand — is unaffected.
1109        check_metrics::<TypescriptParser>(
1110            "function f(): string { return `hello`; }",
1111            "foo.ts",
1112            |metric| {
1113                assert_eq!(metric.halstead.u_operands(), 3.0);
1114                assert_eq!(metric.halstead.operands(), 3.0);
1115            },
1116        );
1117    }
1118
1119    #[test]
1120    fn typescript_template_string_interpolation_no_double_count() {
1121        // Regression: issue #192. Mirrors
1122        // `javascript_template_string_interpolation_no_double_count`
1123        // for TypeScript.
1124        //
1125        // After #313 each `: string` annotation contributes one
1126        // `"string"` operand. Unique operands: `f`, `name`, `string`
1127        // (3). Total operands: `f`, `name` (param), `name` (in the
1128        // interpolation), `string`, `string` (5). The interpolation
1129        // guard from #192 still holds — the wrapping `` `Hi ${name}!` ``
1130        // is `Unknown`, not double-counted.
1131        check_metrics::<TypescriptParser>(
1132            "function f(name: string): string { return `Hi ${name}!`; }",
1133            "foo.ts",
1134            |metric| {
1135                assert_eq!(metric.halstead.u_operands(), 3.0);
1136                assert_eq!(metric.halstead.operands(), 5.0);
1137            },
1138        );
1139    }
1140
1141    #[test]
1142    fn tsx_template_string_plain_is_operand() {
1143        // Regression: issue #192. Mirrors
1144        // `javascript_template_string_plain_is_operand` for the
1145        // TSX (TypeScript + JSX) variant.
1146        //
1147        // After #313 TSX's type-keyword `string` (`String3`) also
1148        // counts as an operand, mirroring TS::String2.
1149        check_metrics::<TsxParser>(
1150            "function f(): string { return `hello`; }",
1151            "foo.tsx",
1152            |metric| {
1153                assert_eq!(metric.halstead.u_operands(), 3.0);
1154                assert_eq!(metric.halstead.operands(), 3.0);
1155            },
1156        );
1157    }
1158
1159    #[test]
1160    fn tsx_template_string_interpolation_no_double_count() {
1161        // Regression: issue #192. Mirrors
1162        // `javascript_template_string_interpolation_no_double_count`
1163        // for the TSX (TypeScript + JSX) variant.
1164        //
1165        // After #313 each `: string` annotation contributes one
1166        // `String3` operand; see `typescript_template_string_…` for
1167        // the count derivation.
1168        check_metrics::<TsxParser>(
1169            "function f(name: string): string { return `Hi ${name}!`; }",
1170            "foo.tsx",
1171            |metric| {
1172                assert_eq!(metric.halstead.u_operands(), 3.0);
1173                assert_eq!(metric.halstead.operands(), 5.0);
1174            },
1175        );
1176    }
1177
1178    // Issue #281: optional chaining (`?.`) was double-counted as a
1179    // Halstead operator in TypeScript and TSX because the grammar
1180    // exposes both an `optional_chain` named wrapper AND a child
1181    // `?.` token, and both were classified as `Operator`. The fix
1182    // counts only the bare `?.` token (`QMARKDOT`) in TS/TSX so each
1183    // textual `?.` contributes exactly once, matching JS / MozJS
1184    // (whose grammars expose only `OptionalChain` — the `?.` token
1185    // itself).
1186    //
1187    // The four assertions below all compare against the same totals:
1188    // for `function f(a) { return a?.b?.c; }` the operator stream is
1189    // `function`, `(`, `{`, `return`, `?.`, `?.`, `;` (7 total, 6
1190    // unique — `LPAREN`/`LBRACE` count once, closing tokens are not
1191    // in the operator set). Before the fix, TS/TSX reported 9/7
1192    // instead of 7/6.
1193    #[test]
1194    fn javascript_optional_chain_not_double_counted_in_halstead_281() {
1195        check_metrics::<JavascriptParser>("function f(a) { return a?.b?.c; }", "foo.js", |m| {
1196            assert_eq!(m.halstead.u_operators(), 6.0);
1197            assert_eq!(m.halstead.operators(), 7.0);
1198        });
1199    }
1200
1201    #[test]
1202    fn mozjs_optional_chain_not_double_counted_in_halstead_281() {
1203        check_metrics::<MozjsParser>("function f(a) { return a?.b?.c; }", "foo.js", |m| {
1204            assert_eq!(m.halstead.u_operators(), 6.0);
1205            assert_eq!(m.halstead.operators(), 7.0);
1206        });
1207    }
1208
1209    #[test]
1210    fn typescript_optional_chain_not_double_counted_in_halstead_281() {
1211        // The TS grammar wraps member-expression `?.` in an
1212        // `optional_chain` named node containing the bare `?.`
1213        // token; classifying both as `Operator` double-counted the
1214        // chain. We now count only the bare token, so TS matches JS.
1215        check_metrics::<TypescriptParser>("function f(a) { return a?.b?.c; }", "foo.ts", |m| {
1216            assert_eq!(m.halstead.u_operators(), 6.0);
1217            assert_eq!(m.halstead.operators(), 7.0);
1218        });
1219    }
1220
1221    #[test]
1222    fn tsx_optional_chain_not_double_counted_in_halstead_281() {
1223        check_metrics::<TsxParser>("function f(a) { return a?.b?.c; }", "foo.tsx", |m| {
1224            assert_eq!(m.halstead.u_operators(), 6.0);
1225            assert_eq!(m.halstead.operators(), 7.0);
1226        });
1227    }
1228
1229    // Issue #299: parity guard for the JS-family `get_op_type` macro
1230    // on the optional-chain operator token (#281's prior regression
1231    // surface). All four languages must classify the bare `?.` token
1232    // identically — `OptionalChain` in JS/MozJS, `QMARKDOT` in
1233    // TS/TSX — and emit the same totals for
1234    // `function f(a) { return a?.b?.c; }`:
1235    //
1236    // * Operators: `function`, `(`, `{`, `return`, `?.`, `?.`, `;`
1237    //   (7 total, 6 unique).
1238    // * Operands: `f`, `a`, `a`, `b`, `c`, plus the two wrapping
1239    //   member expressions (`a?.b`, `a?.b?.c`) classified as
1240    //   `MemberExpression*` (7 total, 6 unique).
1241    //
1242    // Verified by test-via-revert: dropping `OptionalChain` from
1243    // JS/MozJS, or `QMARKDOT` from TS/TSX, trips the test
1244    // (u_operators 6→5). This input does NOT exercise every operand
1245    // alias in the per-language `operand_extras` (`Identifier2`,
1246    // `String2`, `NestedIdentifier`, `MemberExpression4`) or the TS
1247    // `PredefinedType` operator; drift in those is out of scope for
1248    // this regression guard and would need a separate fixture.
1249    #[test]
1250    fn js_family_get_op_type_parity_optional_chain_member_299() {
1251        // Non-capturing closure (coerced to the `fn` pointer that
1252        // `check_metrics` accepts) avoids the
1253        // `clippy::needless_pass_by_value` warning that a free `fn`
1254        // taking `CodeMetrics` by value would trigger.
1255        const SRC: &str = "function f(a) { return a?.b?.c; }";
1256        let check = |m: crate::CodeMetrics| {
1257            assert_eq!(m.halstead.u_operators(), 6.0);
1258            assert_eq!(m.halstead.operators(), 7.0);
1259            assert_eq!(m.halstead.u_operands(), 6.0);
1260            assert_eq!(m.halstead.operands(), 7.0);
1261        };
1262
1263        check_metrics::<JavascriptParser>(SRC, "foo.js", check);
1264        check_metrics::<MozjsParser>(SRC, "foo.js", check);
1265        check_metrics::<TypescriptParser>(SRC, "foo.ts", check);
1266        check_metrics::<TsxParser>(SRC, "foo.tsx", check);
1267    }
1268
1269    // Issue #313: parity guard for the `"string"` type-keyword aliases
1270    // that the TS / TSX grammars expose. `Checker::is_string` matches
1271    // these aliases (#283), so `Getter::get_op_type` must also classify
1272    // them — otherwise the same node disagrees between the two
1273    // predicates and Halstead silently undercounts every `: string`
1274    // annotation by one operand.
1275    //
1276    // For the input `let x: string = "y";`:
1277    //
1278    // * TypeScript emits `Typescript::String2` for the `string` type
1279    //   keyword (kind_id 135, in the type-keyword block of the enum).
1280    // * TSX emits `Tsx::String3` for the same role (kind_id 141).
1281    //
1282    // After #313 both kinds are in `operand_extras` and contribute one
1283    // `"string"` operand. Verified by test-via-revert: dropping
1284    // `String2` from TS's `operand_extras` (or `String3` from TSX's)
1285    // trips this test on `u_operands` / `operands` for the affected
1286    // language.
1287    #[test]
1288    fn ts_family_string2_string3_type_keyword_parity_313() {
1289        const SRC: &str = "let x: string = \"y\";";
1290        // Operators (n1 = 5, N1 = 5):
1291        //   `let`, `:`, `=`, `;`, plus `string` (PredefinedType wrapper,
1292        //   routed through `is_primitive` so it's keyed by its lexeme
1293        //   `"string"` in `primitive_operators`).
1294        // Operands (n2 = 3, N2 = 3):
1295        //   `x`, the `"y"` literal, and `string` (the type-keyword
1296        //   child of `predefined_type`, classified via the operand
1297        //   extras added by #313). Pre-fix the TS column reported
1298        //   n2 = 2 / N2 = 2 because String2 fell through to `Unknown`;
1299        //   the TSX column had the same gap for String3.
1300        let check = |m: crate::CodeMetrics| {
1301            assert_eq!(m.halstead.u_operators(), 5.0);
1302            assert_eq!(m.halstead.operators(), 5.0);
1303            assert_eq!(m.halstead.u_operands(), 3.0);
1304            assert_eq!(m.halstead.operands(), 3.0);
1305        };
1306
1307        check_metrics::<TypescriptParser>(SRC, "foo.ts", check);
1308        check_metrics::<TsxParser>(SRC, "foo.tsx", check);
1309    }
1310
1311    #[test]
1312    fn python_wrong_operators() {
1313        check_metrics::<PythonParser>("()[]{}", "foo.py", |metric| {
1314            insta::assert_json_snapshot!(
1315                metric.halstead,
1316                @r###"
1317                    {
1318                      "n1": 0.0,
1319                      "N1": 0.0,
1320                      "n2": 0.0,
1321                      "N2": 0.0,
1322                      "length": 0.0,
1323                      "estimated_program_length": 0.0,
1324                      "purity_ratio": 0.0,
1325                      "vocabulary": 0.0,
1326                      "volume": 0.0,
1327                      "difficulty": 0.0,
1328                      "level": 0.0,
1329                      "effort": 0.0,
1330                      "time": 0.0,
1331                      "bugs": 0.0
1332                    }"###
1333            );
1334        });
1335    }
1336
1337    #[test]
1338    fn python_check_metrics() {
1339        check_metrics::<PythonParser>(
1340            "def f():
1341                 pass",
1342            "foo.py",
1343            |metric| {
1344                insta::assert_json_snapshot!(
1345                    metric.halstead,
1346                    @r###"
1347                    {
1348                      "n1": 2.0,
1349                      "N1": 2.0,
1350                      "n2": 1.0,
1351                      "N2": 1.0,
1352                      "length": 3.0,
1353                      "estimated_program_length": 2.0,
1354                      "purity_ratio": 0.6666666666666666,
1355                      "vocabulary": 3.0,
1356                      "volume": 4.754887502163468,
1357                      "difficulty": 1.0,
1358                      "level": 1.0,
1359                      "effort": 4.754887502163468,
1360                      "time": 0.26416041678685936,
1361                      "bugs": 0.0009425525573729414
1362                    }"###
1363                );
1364            },
1365        );
1366    }
1367
1368    #[test]
1369    fn java_operators_and_operands() {
1370        check_metrics::<JavaParser>(
1371            "public class Main {
1372            public static void main(string args[]) {
1373                  int a, b, c, avg;
1374                  a = 5; b = 5; c = 5;
1375                  avg = (a + b + c) / 3;
1376                  MessageFormat.format(\"{0}\", avg);
1377                }
1378            }",
1379            "foo.java",
1380            |metric| {
1381                // Operators (n1=11): {} void () [] , . ; int = + /
1382                // Operands (n2=12): Main main args a b c avg 5 3 MessageFormat format "{0}"
1383                insta::assert_json_snapshot!(
1384                    metric.halstead,
1385                    @r#"
1386                {
1387                  "n1": 11.0,
1388                  "N1": 26.0,
1389                  "n2": 12.0,
1390                  "N2": 22.0,
1391                  "length": 48.0,
1392                  "estimated_program_length": 81.07329781366414,
1393                  "purity_ratio": 1.6890270377846697,
1394                  "vocabulary": 23.0,
1395                  "volume": 217.13097389073664,
1396                  "difficulty": 10.083333333333334,
1397                  "level": 0.09917355371900825,
1398                  "effort": 2189.4039867315946,
1399                  "time": 121.63355481842193,
1400                  "bugs": 0.05620341201461669
1401                }
1402                "#
1403                );
1404            },
1405        );
1406    }
1407
1408    #[test]
1409    fn java_primitive_types_and_booleans() {
1410        check_metrics::<JavaParser>(
1411            "public class Prims {
1412                byte a = 1;
1413                short b = 2;
1414                int c = 3;
1415                long d = 4;
1416                char e = 'x';
1417                float f = 1.0f;
1418                double g = 2.0;
1419                boolean h = true;
1420                boolean i = false;
1421            }",
1422            "foo.java",
1423            |metric| {
1424                // Verifies all 8 Java primitive-type keywords (byte, short, int, long,
1425                // char, float, double, boolean) are counted as distinct operators, and
1426                // that true/false are counted as operands.
1427                insta::assert_json_snapshot!(
1428                    metric.halstead,
1429                    @r#"
1430                {
1431                  "n1": 11.0,
1432                  "N1": 28.0,
1433                  "n2": 19.0,
1434                  "N2": 19.0,
1435                  "length": 47.0,
1436                  "estimated_program_length": 118.76437056043838,
1437                  "purity_ratio": 2.526901501285923,
1438                  "vocabulary": 30.0,
1439                  "volume": 230.62385799360038,
1440                  "difficulty": 5.5,
1441                  "level": 0.18181818181818182,
1442                  "effort": 1268.4312189648022,
1443                  "time": 70.46840105360012,
1444                  "bugs": 0.03905920146699976
1445                }
1446                "#
1447                );
1448            },
1449        );
1450    }
1451
1452    #[test]
1453    fn groovy_operators_and_operands() {
1454        check_metrics::<GroovyParser>(
1455            "class Main {
1456                static void main(String[] args) {
1457                    int a, b, c, avg;
1458                    a = 5; b = 5; c = 5;
1459                    avg = (a + b + c) / 3;
1460                    println(avg);
1461                }
1462            }",
1463            "foo.groovy",
1464            |metric| {
1465                // Groovy mirror of `java_operators_and_operands`. The juxt
1466                // call `println avg` exercises `juxt_function_call` in
1467                // place of Java's `MessageFormat.format(...)`. amaanq's
1468                // grammar inherits Java's tokenisation, so n1/N1/n2/N2
1469                // shapes match Java up to those substitutions.
1470                // The dekobon grammar parses primitive type names
1471                // (`void`, `int`, `String`) as `type_identifier`
1472                // rather than as distinct keyword tokens, so they
1473                // count as operands here — the prior amaanq grammar
1474                // treated them as operators. Net shift: −2 unique
1475                // operators (`void`, `int`), +2 unique operands
1476                // (`void`, `int` were the only two type_identifiers
1477                // not already counted as operands, since `String`
1478                // was already an identifier in the prior grammar's
1479                // counting).
1480                assert_eq!(metric.halstead.u_operators(), 8.0);
1481                assert_eq!(metric.halstead.u_operands(), 13.0);
1482                insta::assert_json_snapshot!(
1483                    metric.halstead,
1484                    @r#"
1485                {
1486                  "n1": 8.0,
1487                  "N1": 22.0,
1488                  "n2": 13.0,
1489                  "N2": 23.0,
1490                  "length": 45.0,
1491                  "estimated_program_length": 72.10571633583419,
1492                  "purity_ratio": 1.6023492519074265,
1493                  "vocabulary": 21.0,
1494                  "volume": 197.65428402504423,
1495                  "difficulty": 7.076923076923077,
1496                  "level": 0.14130434782608697,
1497                  "effort": 1398.7841638695438,
1498                  "time": 77.71023132608576,
1499                  "bugs": 0.04169134280255714
1500                }
1501                "#
1502                );
1503            },
1504        );
1505    }
1506
1507    #[test]
1508    fn groovy_primitive_types_and_booleans() {
1509        check_metrics::<GroovyParser>(
1510            "class Prims {
1511                byte a = 1
1512                short b = 2
1513                int c = 3
1514                long d = 4
1515                char e = 'x'
1516                float f = 1.0f
1517                double g = 2.0
1518                boolean h = true
1519                boolean i = false
1520            }",
1521            "foo.groovy",
1522            |metric| {
1523                // The dekobon grammar consolidates the 8 primitive
1524                // type names (`byte`, `short`, `int`, `long`, `char`,
1525                // `float`, `double`, `boolean`) under `type_identifier`
1526                // — so they count as operands, not as distinct
1527                // operators. Likewise numeric literals collapse to one
1528                // `NumberLiteral` shape (no Hex/Octal/Binary/Decimal
1529                // split), and `'x'` parses as `StringLiteral` (Groovy
1530                // single-quoted strings) rather than as
1531                // `CharacterLiteral`. Operators remaining in this
1532                // fixture: `=` and `class`-body braces (only `{` is in
1533                // the operator set). True/false collapse under one
1534                // `BooleanLiteral`.
1535                assert_eq!(metric.halstead.u_operators(), 2.0);
1536                assert_eq!(metric.halstead.u_operands(), 27.0);
1537                insta::assert_json_snapshot!(
1538                    metric.halstead,
1539                    @r#"
1540                {
1541                  "n1": 2.0,
1542                  "N1": 10.0,
1543                  "n2": 27.0,
1544                  "N2": 28.0,
1545                  "length": 38.0,
1546                  "estimated_program_length": 130.38196255841365,
1547                  "purity_ratio": 3.4311042778529908,
1548                  "vocabulary": 29.0,
1549                  "volume": 184.60327781484773,
1550                  "difficulty": 1.037037037037037,
1551                  "level": 0.9642857142857143,
1552                  "effort": 191.44043625243467,
1553                  "time": 10.635579791801925,
1554                  "bugs": 0.01107221547116606
1555                }
1556                "#
1557                );
1558            },
1559        );
1560    }
1561
1562    #[test]
1563    fn groovy_closure_operators_and_operands() {
1564        check_metrics::<GroovyParser>("def double = { x -> x * 2 }", "foo.groovy", |metric| {
1565            // Closure with arrow-style parameter list.
1566            // Distinct operators: def, =, {}, ->, * = 5.
1567            // Distinct operands: double, x, 2 = 3.
1568            assert_eq!(metric.halstead.u_operators(), 5.0);
1569            assert_eq!(metric.halstead.u_operands(), 3.0);
1570        });
1571    }
1572
1573    /// Regression for issue #247: every Groovy-specific operator the
1574    /// prior amaanq grammar dropped to ERROR or mis-shaped as a Java
1575    /// node now parses as a distinct lexer token in the dekobon
1576    /// grammar, so Halstead counts each one. The fixture below
1577    /// exercises Elvis `?:`, safe-nav `?.`, safe-chain `??.`,
1578    /// spread-dot `*.`, method-pointer `.&`, direct-field `.@`,
1579    /// identity `===` / `!==`, spaceship `<=>`, regex `=~` / `==~`,
1580    /// exclusive ranges `..<` / `<..` / `<..<`, `as` coercion, and
1581    /// `?[` safe index — every distinct operator kind must appear in
1582    /// `u_operators` (the count grows by exactly the number of new
1583    /// distinct operator tokens introduced).
1584    #[test]
1585    fn groovy_dekobon_operator_coverage_247() {
1586        check_metrics::<GroovyParser>(
1587            "def f(a, b, list, s) {
1588                def x = a ?: b
1589                def y = a?.field
1590                def z = a??.field
1591                def items = list*.size()
1592                def ptr = a.&size
1593                def fld = a.@field
1594                def id1 = a === b
1595                def id2 = a !== b
1596                def ship = a <=> b
1597                def find = s =~ /pat/
1598                def match = s ==~ /^pat\\$/
1599                def r1 = 0..<10
1600                def r2 = 0<..10
1601                def r3 = 0<..<10
1602                def cast = a as String
1603                def safe = list?[0]
1604                return x
1605            }",
1606            "foo.groovy",
1607            |metric| {
1608                // Each Groovy-specific operator kind contributes one
1609                // distinct entry to the operator set. The 20-operator
1610                // floor breaks down as: 16 Groovy-specific tokens
1611                // exercised by the fixture (`?:`, `?.`, `??.`, `*.`,
1612                // `.&`, `.@`, `===`, `!==`, `<=>`, `=~`, `==~`, `..<`,
1613                // `<..`, `<..<`, `as`, `?[`) plus a handful of
1614                // ambient Java-shaped operators the fixture also
1615                // uses (`def`, `=`, `{`, `(`, `,`, `return`). A
1616                // grammar regression that drops one of the 16
1617                // Groovy-specific tokens would push the count below
1618                // this floor.
1619                // Exact pin: with the dekobon Groovy grammar this
1620                // fixture exercises 16 Groovy-specific tokens (`?:`,
1621                // `?.`, `??.`, `*.`, `.&`, `.@`, `===`, `!==`, `<=>`,
1622                // `=~`, `==~`, `..<`, `<..`, `<..<`, `as`, `?[`) plus
1623                // 7 ambient Java-shaped operators the fixture also
1624                // uses (`def`, `=`, `,`, `{`, `(`, `[`, `return`),
1625                // for a total of 23 distinct operator kinds. A
1626                // regression that drops any one of the 16 #247
1627                // operators would push the count below 23 and fail
1628                // this assertion. The complementary AST walk below
1629                // pins each #247 operator's identity individually so
1630                // a grammar change that adds an unrelated operator
1631                // (lifting `u_operators` to 24) still flags the loss
1632                // of a #247 operator at the per-token level.
1633                assert_eq!(
1634                    metric.halstead.u_operators(),
1635                    23.0,
1636                    "u_operators changed; check whether a #247 operator was dropped or an unrelated operator added (and update the comment / token list above accordingly)",
1637                );
1638            },
1639        );
1640    }
1641
1642    #[test]
1643    fn csharp_operators_and_operands() {
1644        // After issue #286, `void`, `string`, and `int` count as three
1645        // distinct Halstead operators rather than collapsing into one
1646        // `PredefinedType` kind_id entry, lifting u_operators from 13
1647        // to 15. Total operators (N1) is unchanged because the same
1648        // nodes are still counted, just keyed by lexeme.
1649        check_metrics::<CsharpParser>(
1650            "public class Main {
1651                public static void Run(string[] args) {
1652                    int a, b, c, avg;
1653                    a = 5; b = 5; c = 5;
1654                    avg = (a + b + c) / 3;
1655                    System.Console.WriteLine(\"{0}\", avg);
1656                }
1657            }",
1658            "foo.cs",
1659            |metric| {
1660                assert_eq!(metric.halstead.u_operators(), 15.0);
1661                assert_eq!(metric.halstead.operators(), 32.0);
1662                assert_eq!(metric.halstead.u_operands(), 13.0);
1663                assert_eq!(metric.halstead.operands(), 23.0);
1664                // Pin every Halstead field; values are whatever the
1665                // classifier produces and become the regression spec.
1666                insta::assert_json_snapshot!(metric.halstead);
1667            },
1668        );
1669    }
1670
1671    #[test]
1672    fn csharp_primitive_types_and_booleans() {
1673        // After issue #286: each of `byte`, `short`, `int`, `long`,
1674        // `char`, `float`, `double`, `bool`, `object` is now a distinct
1675        // Halstead operator (9 primitives) rather than collapsing into
1676        // one `PredefinedType` kind_id entry. u_operators rises from 6
1677        // to 14 (5 non-primitive operators + 9 distinct primitives);
1678        // total operators (N1) is unchanged because the same nodes are
1679        // still counted, just keyed by lexeme.
1680        check_metrics::<CsharpParser>(
1681            "public class Prims {
1682                byte a = 1;
1683                short b = 2;
1684                int c = 3;
1685                long d = 4;
1686                char e = 'x';
1687                float f = 1.0f;
1688                double g = 2.0;
1689                bool h = true;
1690                bool i = false;
1691                object j = null;
1692            }",
1693            "foo.cs",
1694            |metric| {
1695                assert_eq!(metric.halstead.u_operators(), 14.0);
1696                assert_eq!(metric.halstead.operators(), 33.0);
1697                assert_eq!(metric.halstead.u_operands(), 21.0);
1698                assert_eq!(metric.halstead.operands(), 23.0);
1699                insta::assert_json_snapshot!(metric.halstead);
1700            },
1701        );
1702    }
1703
1704    #[test]
1705    fn csharp_predefined_types_keyed_by_lexeme() {
1706        // Regression: issue #286. The C# grammar emits one `PredefinedType`
1707        // kind_id for every keyword type (`int`, `string`, `bool`, …).
1708        // Without keying by source text the entire family collapses into
1709        // a single Halstead operator (n1 += 1) instead of one per distinct
1710        // keyword. This test pins the post-fix behaviour using four
1711        // distinct primitives — `int`, `string`, `bool`, `object` —
1712        // appearing as parameter types so no other operators interact
1713        // with the count.
1714        //
1715        // expected: operators are `class`, `void`, `M`, `{}`, `()`, `,`
1716        // (×3 between 4 params), plus the four distinct predefined types
1717        // → u_operators = 5 + 4 = 9. Without the fix the four primitives
1718        // collapse to one entry, giving u_operators = 6.
1719        check_metrics::<CsharpParser>(
1720            "class C { void M(int a, string b, bool c, object d) {} }",
1721            "foo.cs",
1722            |metric| {
1723                // The headline assertion: four distinct primitive
1724                // keywords contribute four distinct operators, not one.
1725                assert_eq!(metric.halstead.u_operators(), 9.0);
1726            },
1727        );
1728    }
1729
1730    #[test]
1731    fn csharp_interpolated_string_no_double_count() {
1732        // Regression: issue #183. A C# `$"Hi {name}!"` used to be
1733        // classified as a Halstead operand (the wrapping
1734        // `InterpolatedStringExpression`) AND have its inner
1735        // `Interpolation`'s identifier classified as an operand too.
1736        // The fix routes `InterpolatedStringExpression` through a
1737        // conditional: when it has an `Interpolation` child, the inner
1738        // identifier already carries the operand contribution and the
1739        // wrapper is treated as `Unknown`; when it does not (static
1740        // `$"hello"`), the wrapper still counts as one operand.
1741        //
1742        // expected: operand contributions for
1743        //   `class C { void M(string name) { string s = $"Hi {name}!"; } }`
1744        // — `C` (class), `M` (method), `name` (param), `s` (local),
1745        // and the inner `name` (inside `{...}`). With the fix,
1746        // u_operands = 4 (C, M, name, s); N2 = 5 (`name` twice).
1747        // Without the fix, the wrapping `$"Hi {name}!"` would also
1748        // count → u_operands = 5, N2 = 6.
1749        check_metrics::<CsharpParser>(
1750            "class C { void M(string name) { string s = $\"Hi {name}!\"; } }",
1751            "foo.cs",
1752            |metric| {
1753                assert_eq!(metric.halstead.u_operands(), 4.0);
1754                assert_eq!(metric.halstead.operands(), 5.0);
1755            },
1756        );
1757    }
1758
1759    #[test]
1760    fn csharp_static_interpolated_string_is_operand() {
1761        // Regression: issue #183. A `$"..."` with no `{...}` is
1762        // semantically identical to `"..."` and must still contribute
1763        // exactly one operand — the conditional `is_child(Interpolation)`
1764        // check distinguishes it from a true interpolation. expected:
1765        // operands are `C`, `M`, `s`, `$"hello"` → u_operands = 4, N2 = 4.
1766        // A naive "always Unknown" fix would yield u_operands = 3, N2 = 3,
1767        // diverging from the plain-string equivalent below.
1768        check_metrics::<CsharpParser>(
1769            "class C { void M() { string s = $\"hello\"; } }",
1770            "foo.cs",
1771            |metric| {
1772                assert_eq!(metric.halstead.u_operands(), 4.0);
1773                assert_eq!(metric.halstead.operands(), 4.0);
1774            },
1775        );
1776    }
1777
1778    #[test]
1779    fn csharp_plain_string_still_operand() {
1780        // The fix for #183 only changes how `InterpolatedStringExpression`
1781        // is classified; plain `StringLiteral` (and `VerbatimStringLiteral`
1782        // / `RawStringLiteral`) must still contribute exactly one operand
1783        // each. expected: operands are `C`, `M`, `s`, `"hi"` →
1784        // u_operands = 4, N2 = 4.
1785        check_metrics::<CsharpParser>(
1786            "class C { void M() { string s = \"hi\"; } }",
1787            "foo.cs",
1788            |metric| {
1789                assert_eq!(metric.halstead.u_operands(), 4.0);
1790                assert_eq!(metric.halstead.operands(), 4.0);
1791            },
1792        );
1793    }
1794
1795    #[test]
1796    fn go_operators_and_operands() {
1797        check_metrics::<GoParser>(
1798            "package main
1799            func sum(a, b int) int {
1800                return a + b
1801            }",
1802            "foo.go",
1803            |metric| {
1804                insta::assert_json_snapshot!(
1805                    metric.halstead,
1806                    @r###"
1807                    {
1808                      "n1": 7.0,
1809                      "N1": 7.0,
1810                      "n2": 5.0,
1811                      "N2": 8.0,
1812                      "length": 15.0,
1813                      "estimated_program_length": 31.26112492884004,
1814                      "purity_ratio": 2.0840749952560027,
1815                      "vocabulary": 12.0,
1816                      "volume": 53.77443751081734,
1817                      "difficulty": 5.6,
1818                      "level": 0.17857142857142858,
1819                      "effort": 301.1368500605771,
1820                      "time": 16.729825003365395,
1821                      "bugs": 0.014975730436275946
1822                    }"###
1823                );
1824            },
1825        );
1826    }
1827
1828    #[test]
1829    fn perl_operators_and_operands() {
1830        check_metrics::<PerlParser>(
1831            "sub sum {
1832                my ($a, $b) = @_;
1833                return $a + $b;
1834            }",
1835            "foo.pl",
1836            |metric| {
1837                insta::assert_json_snapshot!(
1838                    metric.halstead,
1839                    @r#"
1840                {
1841                  "n1": 10.0,
1842                  "N1": 14.0,
1843                  "n2": 4.0,
1844                  "N2": 6.0,
1845                  "length": 20.0,
1846                  "estimated_program_length": 41.219280948873624,
1847                  "purity_ratio": 2.0609640474436812,
1848                  "vocabulary": 14.0,
1849                  "volume": 76.14709844115208,
1850                  "difficulty": 7.5,
1851                  "level": 0.13333333333333333,
1852                  "effort": 571.1032383086406,
1853                  "time": 31.727957683813365,
1854                  "bugs": 0.02294502281013948
1855                }
1856                "#
1857                );
1858            },
1859        );
1860    }
1861
1862    #[test]
1863    fn perl_interpolated_string_no_double_count() {
1864        // Regression: issue #199. A `string_double_quoted` (and
1865        // `string_qq_quoted` / `backtick_quoted` / `command_qx_quoted`)
1866        // wrapping an `interpolation` child used to be counted as a
1867        // Halstead operand while the inner scalar/array/hash variable
1868        // was also walked and counted — double-counting the inner
1869        // variable's contribution to `N2`. Mirrors #180 (Bash/Elixir),
1870        // #183 (C#), #184 (PHP), #191 (Kotlin).
1871        //
1872        // expected: for
1873        //   sub greet { my $name = shift; my $msg = "Hi $name"; return $msg; }
1874        // — operands are `greet`, `$name`, `shift`, `$msg`. With the
1875        // fix the wrapping `"Hi $name"` is skipped (has `Interpolation`
1876        // child), so u_operands = 4 and N2 = 6 (`$name` x2 from the
1877        // `my` binding and the interpolation; `$msg` x2 from the `my`
1878        // binding and `return`; `greet`, `shift` once each). Without
1879        // the fix the wrapping literal would also be counted, lifting
1880        // u_operands to 5 and N2 to 7.
1881        check_metrics::<PerlParser>(
1882            "sub greet { my $name = shift; my $msg = \"Hi $name\"; return $msg; }",
1883            "foo.pl",
1884            |metric| {
1885                assert_eq!(metric.halstead.u_operands(), 4.0);
1886                assert_eq!(metric.halstead.operands(), 6.0);
1887                insta::assert_json_snapshot!(metric.halstead);
1888            },
1889        );
1890    }
1891
1892    #[test]
1893    fn perl_plain_string_still_operand() {
1894        // The fix for #199 only skips wrapping literals that carry an
1895        // `Interpolation` child; a plain `"hello"` (no `$…` inside)
1896        // must still contribute exactly one operand. expected: operands
1897        // `greet`, `$msg`, `"hello"` → u_operands = 3, N2 = 4 (`$msg`
1898        // appears in the `my` binding and the `return`).
1899        check_metrics::<PerlParser>(
1900            "sub greet { my $msg = \"hello\"; return $msg; }",
1901            "foo.pl",
1902            |metric| {
1903                assert_eq!(metric.halstead.u_operands(), 3.0);
1904                assert_eq!(metric.halstead.operands(), 4.0);
1905            },
1906        );
1907    }
1908
1909    #[test]
1910    fn perl_single_quoted_string_never_interpolates() {
1911        // Single-quoted (`'…'`) and `q{…}` literals are not subject to
1912        // interpolation in Perl, so even when their text contains a
1913        // `$name`-shaped sequence the wrapper is still counted as one
1914        // operand and the inner text is not parsed as a variable.
1915        // expected: operands `greet`, `$msg`, `'Hi $name'` →
1916        // u_operands = 3, N2 = 4 (`$msg` x2).
1917        check_metrics::<PerlParser>(
1918            "sub greet { my $msg = 'Hi $name'; return $msg; }",
1919            "foo.pl",
1920            |metric| {
1921                assert_eq!(metric.halstead.u_operands(), 3.0);
1922                assert_eq!(metric.halstead.operands(), 4.0);
1923            },
1924        );
1925    }
1926
1927    #[test]
1928    fn perl_plain_heredoc_counts_as_one_operand() {
1929        // Regression: issue #287. A plain (non-interpolating) Perl
1930        // heredoc body used to be classified `HalsteadType::Unknown`,
1931        // so its visible `HeredocBodyStatement` node contributed
1932        // nothing to N2 even though it is a string literal. The fix
1933        // adds `HeredocBodyStatement` to the interpolation-aware
1934        // operand arm, so an inert heredoc counts as one operand.
1935        //
1936        // Source (heredoc body lives at the source_file level, not
1937        // inside any sub):
1938        //   my $msg = <<END;
1939        //   hello world
1940        //   END
1941        //
1942        // Operands traversed:
1943        //   * `$msg` (`scalar_variable`)                    × 1
1944        //   * heredoc body (`heredoc_body_statement`)       × 1
1945        // expected: u_operands = 2, N2 = 2.
1946        check_metrics::<PerlParser>("my $msg = <<END;\nhello world\nEND\n", "foo.pl", |metric| {
1947            assert_eq!(metric.halstead.u_operands(), 2.0);
1948            assert_eq!(metric.halstead.operands(), 2.0);
1949        });
1950    }
1951
1952    #[test]
1953    fn perl_interpolated_heredoc_no_double_count() {
1954        // Regression: issue #287. An interpolating Perl heredoc
1955        // (`<<"TAG"` or bare `<<TAG`) carries an `Interpolation` child
1956        // when its body contains a `$var`. The wrapper must drop to
1957        // `Unknown` so the inner scalar variable carries the operand
1958        // count — same dispatch as the existing double-quoted /
1959        // backtick / qx wrappers (issue #199) and the PHP heredoc fix
1960        // (issue #184).
1961        //
1962        // Source:
1963        //   my $name = "x";
1964        //   my $msg = <<"END";
1965        //   hi $name
1966        //   END
1967        //
1968        // Operands by text key:
1969        //   * `$name` × 2 (my-binding + interpolation inside heredoc)
1970        //   * `"x"`  × 1 (inert double-quoted string)
1971        //   * `$msg` × 1
1972        // expected: u_operands = 3, N2 = 4. Without the
1973        // interpolation-aware drop the wrapping heredoc body would
1974        // also count, lifting u_operands to 4 and N2 to 5.
1975        check_metrics::<PerlParser>(
1976            "my $name = \"x\";\nmy $msg = <<\"END\";\nhi $name\nEND\n",
1977            "foo.pl",
1978            |metric| {
1979                assert_eq!(metric.halstead.u_operands(), 3.0);
1980                assert_eq!(metric.halstead.operands(), 4.0);
1981            },
1982        );
1983    }
1984
1985    #[test]
1986    fn lua_operators_and_operands() {
1987        check_metrics::<LuaParser>(
1988            "local function add(a, b)
1989  local result = a + b
1990  if result > 0 then
1991    return result
1992  end
1993  return 0
1994end",
1995            "foo.lua",
1996            |metric| {
1997                // n1=12: local,function,(,,,),=,+,if,>,then,return,end
1998                // n2=5: add,a,b,result,0
1999                insta::assert_json_snapshot!(metric.halstead, @r###"
2000                    {
2001                      "n1": 12.0,
2002                      "N1": 15.0,
2003                      "n2": 5.0,
2004                      "N2": 10.0,
2005                      "length": 25.0,
2006                      "estimated_program_length": 54.62919048309068,
2007                      "purity_ratio": 2.1851676193236274,
2008                      "vocabulary": 17.0,
2009                      "volume": 102.18657103125848,
2010                      "difficulty": 12.0,
2011                      "level": 0.08333333333333333,
2012                      "effort": 1226.2388523751017,
2013                      "time": 68.12438068750565,
2014                      "bugs": 0.03818816527310305
2015                    }
2016                    "###);
2017            },
2018        );
2019    }
2020
2021    #[test]
2022    fn kotlin_halstead_basic() {
2023        check_metrics::<KotlinParser>(
2024            "fun add(a: Int, b: Int): Int {
2025                val result = a + b
2026                return result
2027            }",
2028            "foo.kt",
2029            |metric| {
2030                insta::assert_json_snapshot!(
2031                    metric.halstead,
2032                    @r###"
2033                    {
2034                      "n1": 9.0,
2035                      "N1": 11.0,
2036                      "n2": 5.0,
2037                      "N2": 10.0,
2038                      "length": 21.0,
2039                      "estimated_program_length": 40.13896548741762,
2040                      "purity_ratio": 1.9113793089246487,
2041                      "vocabulary": 14.0,
2042                      "volume": 79.9544533632097,
2043                      "difficulty": 9.0,
2044                      "level": 0.1111111111111111,
2045                      "effort": 719.5900802688873,
2046                      "time": 39.97722668160485,
2047                      "bugs": 0.026767153565498338
2048                    }
2049                    "###
2050                );
2051            },
2052        );
2053    }
2054
2055    #[test]
2056    fn kotlin_string_template_no_double_count() {
2057        // Regression: issue #191. A Kotlin string template (`"Hi $name!"`)
2058        // wraps an `Interpolation` child whose inner expression is
2059        // walked and counted separately. Without the
2060        // `is_child(Interpolation)` guard the wrapping `StringLiteral`
2061        // would also count as an operand, inflating N2. Same pattern as
2062        // #180 (Bash/Elixir) and #184 (PHP).
2063        //
2064        // Source: `fun greet(name: String): String {\n    return "Hi $name!"\n}\n`
2065        // Operands (by source-byte key):
2066        //   Function signature (no body): `greet` × 1, `name` × 1,
2067        //   `String` × 2 (param type + return type) = 3 unique, 4 total.
2068        //   Body adds the short-form interpolation `$name`: tree-sitter
2069        //   kotlin-ng 1.1.0 produces an `identifier` node whose source
2070        //   range includes the leading `$`, so its bytes are `$name` —
2071        //   distinct from the bare `name` operand in the signature.
2072        //   The wrapping `StringLiteral` is skipped (fix working) →
2073        //   u_operands = 4 (`greet`, `name`, `String`, `$name`), N2 = 5.
2074        //   Without the fix the `StringLiteral` text (`"Hi $name!"`)
2075        //   would also be counted → N2 = 6, u_operands = 5.
2076        check_metrics::<KotlinParser>(
2077            "fun greet(name: String): String {\n    return \"Hi $name!\"\n}\n",
2078            "foo.kt",
2079            |metric| {
2080                assert_eq!(metric.halstead.u_operands(), 4.0);
2081                assert_eq!(metric.halstead.operands(), 5.0);
2082            },
2083        );
2084    }
2085
2086    #[test]
2087    fn kotlin_string_template_long_form_no_double_count() {
2088        // The `${expr}` long form of a Kotlin string template also
2089        // produces an `Interpolation` child. The fix must apply to it
2090        // identically.
2091        //
2092        // Source: `fun f(x: Int): String { return "v=${x}" }\n`
2093        // Operands by source-byte key:
2094        //   `f` × 1, `x` × 2 (param + inside `${x}`),
2095        //   `Int` × 1, `String` × 1.
2096        // With the fix u_operands = 4 (`f`, `x`, `Int`, `String`),
2097        // N2 = 5. Without the fix the wrapping `"v=${x}"` would also
2098        // count → u_operands = 5, N2 = 6.
2099        check_metrics::<KotlinParser>(
2100            "fun f(x: Int): String { return \"v=${x}\" }\n",
2101            "foo.kt",
2102            |metric| {
2103                assert_eq!(metric.halstead.u_operands(), 4.0);
2104                assert_eq!(metric.halstead.operands(), 5.0);
2105            },
2106        );
2107    }
2108
2109    #[test]
2110    fn kotlin_plain_string_still_operand() {
2111        // The fix for #191 only skips wrapping templates that contain
2112        // an `Interpolation` child; a plain `"hello"` (no `$` interp)
2113        // must still contribute exactly one operand.
2114        //
2115        // Source: `fun f(): String { return "hello" }\n`
2116        // Operands: `f` × 1, `String` × 1, `"hello"` × 1 →
2117        // u_operands = 3, N2 = 3.
2118        check_metrics::<KotlinParser>(
2119            "fun f(): String { return \"hello\" }\n",
2120            "foo.kt",
2121            |metric| {
2122                assert_eq!(metric.halstead.u_operands(), 3.0);
2123                assert_eq!(metric.halstead.operands(), 3.0);
2124            },
2125        );
2126    }
2127
2128    #[test]
2129    fn python_fstring_no_double_count() {
2130        // Regression: issue #191. A Python f-string (`f"Hi {name}!"`)
2131        // wraps an `Interpolation` child whose inner identifier
2132        // `name` is walked and counted as its own operand. Without
2133        // the `is_child(Interpolation)` guard the wrapping `String`
2134        // would also count, double-counting `name`'s contribution to
2135        // `N2`. Same pattern as #180 (Bash/Elixir) and #184 (PHP).
2136        //
2137        // Source: `def greet(name):\n    return f"Hi {name}!"\n`
2138        // Operands by source-byte key:
2139        //   `greet` × 1, `name` × 2 (param + inside `{name}`).
2140        // With the fix the wrapping `f"Hi {name}!"` is skipped →
2141        // u_operands = 2 (`greet`, `name`), N2 = 3. Without the fix
2142        // the wrapping literal would also count → u_operands = 3,
2143        // N2 = 4.
2144        check_metrics::<PythonParser>(
2145            "def greet(name):\n    return f\"Hi {name}!\"\n",
2146            "foo.py",
2147            |metric| {
2148                assert_eq!(metric.halstead.u_operands(), 2.0);
2149                assert_eq!(metric.halstead.operands(), 3.0);
2150            },
2151        );
2152    }
2153
2154    #[test]
2155    fn python_plain_string_still_operand() {
2156        // The fix for #191 only skips wrapping `String` nodes that
2157        // contain an `Interpolation` child; a plain `"hi"` must still
2158        // contribute exactly one operand.
2159        //
2160        // Source: `def f():\n    return "hi"\n`
2161        // Operands: `f` × 1, `"hi"` × 1 → u_operands = 2, N2 = 2.
2162        // (The previous documentation-string filter is preserved:
2163        // a bare `"hi"` as a top-level `expression_statement` would
2164        // be skipped, but here it appears as `return "hi"`.)
2165        check_metrics::<PythonParser>("def f():\n    return \"hi\"\n", "foo.py", |metric| {
2166            assert_eq!(metric.halstead.u_operands(), 2.0);
2167            assert_eq!(metric.halstead.operands(), 2.0);
2168        });
2169    }
2170
2171    #[test]
2172    fn python_empty_file_halstead() {
2173        check_metrics::<PythonParser>("", "empty.py", |metric| {
2174            let h = &metric.halstead;
2175            assert_eq!(h.u_operators(), 0.0);
2176            assert_eq!(h.operands(), 0.0);
2177            assert_eq!(h.estimated_program_length(), 0.0);
2178            assert_eq!(h.purity_ratio(), 0.0);
2179            assert_eq!(h.volume(), 0.0);
2180            assert_eq!(h.difficulty(), 0.0);
2181            assert_eq!(h.level(), 0.0);
2182            assert_eq!(h.effort(), 0.0);
2183            assert_eq!(h.time(), 0.0);
2184            assert_eq!(h.bugs(), 0.0);
2185        });
2186    }
2187
2188    #[test]
2189    fn bash_operators_and_operands() {
2190        check_metrics::<BashParser>(
2191            "#!/bin/bash
2192f() {
2193    local x=1
2194    if [ $x -eq 1 ]; then
2195        echo 'one'
2196    fi
2197}",
2198            "foo.sh",
2199            |metric| {
2200                // `x` (assignment LHS and inside `$x`) is a `variable_name`
2201                // with aliased kind_id 160 — all three aliases must be in
2202                // the operand list (see lesson 2).
2203                assert_eq!(metric.halstead.u_operators(), 12.0);
2204                assert_eq!(metric.halstead.operators(), 12.0);
2205                assert_eq!(metric.halstead.u_operands(), 6.0);
2206                assert_eq!(metric.halstead.operands(), 9.0);
2207                insta::assert_json_snapshot!(metric.halstead);
2208            },
2209        );
2210    }
2211
2212    #[test]
2213    fn bash_interpolated_string_no_double_count() {
2214        // Regression: issue #180. A double-quoted Bash string containing
2215        // `$name`, `${name[…]}`, or `$(cmd)` used to be classified as a
2216        // Halstead operand AND have its inner `simple_expansion` /
2217        // `expansion` / `command_substitution` children classified as
2218        // operands too. We now skip the wrapping literal when it has an
2219        // expansion child so only the inner expansion contributes.
2220        //
2221        // expected: operands across `a="plain"\nb="$x"\n` —
2222        //   line 1: variable_name `a`, plain string `"plain"` (no
2223        //     expansion, still operand) → 2.
2224        //   line 2: variable_name `b`, wrapping `"$x"` skipped (has
2225        //     expansion), `simple_expansion` `$x`, inner variable_name
2226        //     `x` → 3.
2227        // Total unique operands: 5 (`a`, `b`, `"plain"`, `$x`, `x`),
2228        // each appearing once → N2 = 5. Without the #180 fix, the
2229        // wrapping `"$x"` literal would also be counted, making
2230        // u_operands = 6 and N2 = 6. The `=` is the only operator;
2231        // appears twice (N1 = 2, n1 = 1).
2232        check_metrics::<BashParser>("a=\"plain\"\nb=\"$x\"\n", "foo.sh", |metric| {
2233            assert_eq!(metric.halstead.u_operators(), 1.0);
2234            assert_eq!(metric.halstead.operators(), 2.0);
2235            assert_eq!(metric.halstead.u_operands(), 5.0);
2236            assert_eq!(metric.halstead.operands(), 5.0);
2237            insta::assert_json_snapshot!(metric.halstead);
2238        });
2239    }
2240
2241    #[test]
2242    fn elixir_interpolated_string_no_double_count() {
2243        // Regression: issue #180. Without the fix, an interpolated
2244        // Elixir `String` was classified as a single operand while its
2245        // inner `interpolation` identifier was also walked and
2246        // classified as its own operand — double-counting the
2247        // interpolated identifier's contribution to `N2`.
2248        //
2249        // expected: operand contributions for
2250        //   `def greet(name) do\n  msg = "Hi #{name}"\nend\n` —
2251        // `def`, `greet`, `name` (param), `msg`, and the inner `name`
2252        // (inside `#{...}`). With the fix, the wrapping
2253        // `"Hi #{name}"` literal is skipped (has `Interpolation`
2254        // child), so `name` is the only repeated operand:
2255        // u_operands = 4 (def, greet, name, msg), N2 = 5. Without the
2256        // fix, the wrapping literal would also count → u_operands = 5,
2257        // N2 = 6. Operators (`do`, `end`, `(`, `)`, `=`, `#{`, `}`)
2258        // are unchanged: u = N = 7 (the `#{`/`}` interpolation
2259        // markers stay classified as operators).
2260        check_metrics::<ElixirParser>(
2261            "def greet(name) do\n  msg = \"Hi #{name}\"\nend\n",
2262            "foo.ex",
2263            |metric| {
2264                assert_eq!(metric.halstead.u_operators(), 7.0);
2265                assert_eq!(metric.halstead.operators(), 7.0);
2266                assert_eq!(metric.halstead.u_operands(), 4.0);
2267                assert_eq!(metric.halstead.operands(), 5.0);
2268                insta::assert_json_snapshot!(metric.halstead);
2269            },
2270        );
2271    }
2272
2273    #[test]
2274    fn elixir_plain_string_still_operand() {
2275        // The fix for #180 only skips wrapping literals that contain
2276        // interpolation; a plain `"hello"` must still contribute exactly
2277        // one operand. expected: `def`, `f`, `"hello"` → 3 unique
2278        // operands (n2 = 3), each appearing once (N2 = 3).
2279        check_metrics::<ElixirParser>("def f do\n  \"hello\"\nend\n", "foo.ex", |metric| {
2280            assert_eq!(metric.halstead.u_operands(), 3.0);
2281            assert_eq!(metric.halstead.operands(), 3.0);
2282        });
2283    }
2284
2285    #[test]
2286    fn elixir_interpolated_sigil_no_double_count() {
2287        // Sigils mirror strings under #180. For `~r/foo#{name}/`, the
2288        // wrapping `Sigil` is skipped, but `SigilName` (`r`) and the
2289        // inner `name` identifier each contribute one operand.
2290        // expected: `def`, `f`, `name` (param), `re`, `r` (sigil name),
2291        // `name` (inside `#{...}`) → u_operands = 5, N2 = 6 (`name`
2292        // twice).
2293        check_metrics::<ElixirParser>(
2294            "def f(name) do\n  re = ~r/foo#{name}/\nend\n",
2295            "foo.ex",
2296            |metric| {
2297                assert_eq!(metric.halstead.u_operands(), 5.0);
2298                assert_eq!(metric.halstead.operands(), 6.0);
2299            },
2300        );
2301    }
2302
2303    #[test]
2304    fn elixir_interpolated_charlist_no_double_count() {
2305        // Charlists mirror strings and sigils under #180. The
2306        // `E::String | E::Charlist | E::Sigil` arm in `get_op_type`
2307        // skips any wrapping literal that has an `Interpolation`
2308        // child; this test exercises the `Charlist` branch
2309        // specifically.
2310        //
2311        // expected: for `def f(name) do\n  cl = 'Hi #{name}'\nend\n` —
2312        // `def`, `f`, `name` (param), `cl`, and the inner `name`
2313        // (inside `#{...}`). With the fix, the wrapping
2314        // `'Hi #{name}'` is skipped → u_operands = 4 (def, f, name,
2315        // cl), N2 = 5 (`name` twice).
2316        check_metrics::<ElixirParser>(
2317            "def f(name) do\n  cl = 'Hi #{name}'\nend\n",
2318            "foo.ex",
2319            |metric| {
2320                assert_eq!(metric.halstead.u_operands(), 4.0);
2321                assert_eq!(metric.halstead.operands(), 5.0);
2322            },
2323        );
2324    }
2325
2326    #[test]
2327    fn bash_all_expansion_kinds_skip_wrapper() {
2328        // Exercises every node kind tested by
2329        // `bash_string_has_expansion`: `simple_expansion` (`$v`),
2330        // `expansion` (`${v[0]}`), `command_substitution` (`$(date)`),
2331        // and `arithmetic_expansion` (`$((1+2))`). A typo replacing
2332        // one kind with an aliased neighbour in `language_bash.rs`
2333        // (e.g., `ExpansionBody` instead of `Expansion`) would leave
2334        // the corresponding wrapping string counted as an operand and
2335        // shift the totals.
2336        //
2337        // expected: operands across the four lines —
2338        //   line 1 `a="$v"`: var_name `a`, simple_expansion `$v`,
2339        //     inner var_name `v` (wrapper skipped) → 3
2340        //   line 2 `b="${v[0]}"`: var_name `b`, var_name `v` (inside
2341        //     subscript), number `0` (wrapper skipped, `expansion`
2342        //     itself is not in the operand list) → 3
2343        //   line 3 `c="$(date)"`: var_name `c`, command_name `date`
2344        //     (wrapper skipped, `command_substitution` not in operand
2345        //     list) → 2
2346        //   line 4 `d="$((1+2))"`: var_name `d`, numbers `1` and `2`
2347        //     (wrapper skipped, `arithmetic_expansion` not in operand
2348        //     list) → 3
2349        // Unique operands (`v` shared across lines 1 and 2): a, b, c,
2350        // d, $v, v, 0, date, 1, 2 → 10. Total occurrences: 12 (`v`
2351        // appears twice). Operators include `=` four times plus the
2352        // `${`, `}`, `$(`, `)`, `$((`, `))`, `[`, `]`, `+` punctuation.
2353        check_metrics::<BashParser>(
2354            "a=\"$v\"\nb=\"${v[0]}\"\nc=\"$(date)\"\nd=\"$((1+2))\"\n",
2355            "foo.sh",
2356            |metric| {
2357                assert_eq!(metric.halstead.u_operators(), 6.0);
2358                assert_eq!(metric.halstead.operators(), 9.0);
2359                assert_eq!(metric.halstead.u_operands(), 10.0);
2360                assert_eq!(metric.halstead.operands(), 12.0);
2361            },
2362        );
2363    }
2364
2365    #[test]
2366    fn tcl_operators_and_operands() {
2367        check_metrics::<TclParser>(
2368            "proc f {a b} {
2369    set x [expr {$a + $b}]
2370    if {$x > 0 && $x != 0} {
2371        return $x
2372    }
2373    return 0
2374}",
2375            "foo.tcl",
2376            |metric| {
2377                insta::assert_json_snapshot!(metric.halstead);
2378            },
2379        );
2380    }
2381
2382    #[test]
2383    fn tcl_bitwise_ternary_string_ops() {
2384        // Exercises operator families not covered by tcl_operators_and_operands:
2385        // bitwise (&, |, ^, ~, <<, >>), ternary (?), and string-comparison (eq, ne, in, ni).
2386        check_metrics::<TclParser>(
2387            "proc f {a b} {
2388    set bits [expr {$a & $b | $a ^ ~$b}]
2389    set sh [expr {$a << 1 | $b >> 1}]
2390    set t [expr {$a > 0 ? $a : $b}]
2391    if {$a eq {x} || $a ne {y}} {
2392        return $a
2393    }
2394    return $b
2395}",
2396            "foo.tcl",
2397            |metric| {
2398                insta::assert_json_snapshot!(metric.halstead);
2399            },
2400        );
2401    }
2402
2403    #[test]
2404    fn tcl_bare_variable_operand() {
2405        // Bare `$varname` produces a VariableSubstitution node (already an operand).
2406        // Its anonymous Id2 child must NOT be counted separately; each reference is 1 operand.
2407        check_metrics::<TclParser>(
2408            "proc f {x} {
2409    return $x
2410}",
2411            "foo.tcl",
2412            |metric| {
2413                insta::assert_json_snapshot!(metric.halstead);
2414            },
2415        );
2416    }
2417
2418    #[test]
2419    fn tcl_inert_quoted_word_counts_as_operand() {
2420        // Regression for #277. A `"..."` literal with no `$var` / `[cmd]`
2421        // interpolation must contribute exactly one operand (the wrapping
2422        // `QuotedWord`). The string content `hello world` is exposed as a
2423        // single `_quoted_word_content` token (not itself classified by
2424        // `get_op_type`), so the only operands here are `f`, `s`, and the
2425        // quoted string. `set` is the anonymous `Set2` keyword and is
2426        // classified as an operator, not an operand.
2427        check_metrics::<TclParser>(
2428            "proc f {} {
2429    set s \"hello world\"
2430}",
2431            "foo.tcl",
2432            |metric| {
2433                // Operands: `f`, `s`, `"hello world"` — 3 unique, 3 total.
2434                // The wrapping `QuotedWord` must still contribute exactly
2435                // one operand when it carries no interpolation children;
2436                // dropping to 2 would mean the inert case was over-guarded.
2437                assert_eq!(metric.halstead.u_operands(), 3.0);
2438                assert_eq!(metric.halstead.operands(), 3.0);
2439                insta::assert_json_snapshot!(metric.halstead);
2440            },
2441        );
2442    }
2443
2444    #[test]
2445    fn tcl_interpolated_quoted_word_no_double_count() {
2446        // Regression for #277. Before the fix, `"$x is $y"` produced an
2447        // extra operand for the wrapping `QuotedWord` on top of the two
2448        // inner `VariableSubstitution` operands (`$x`, `$y`), giving 7.
2449        // After the fix, the wrapper is `HalsteadType::Unknown` whenever
2450        // it carries an interpolation child, so operand attribution
2451        // belongs solely to the inner substitutions.
2452        check_metrics::<TclParser>(
2453            "proc f {x y} {
2454    set s \"$x is $y\"
2455}",
2456            "foo.tcl",
2457            |metric| {
2458                // Operands: `f`, `x`, `y` (proc args), `s`, `$x`, `$y` — 6
2459                // unique, 6 total. The wrapping `QuotedWord` contributes
2460                // nothing. Pre-fix this read 7/7 (double-counted wrapper).
2461                assert_eq!(metric.halstead.u_operands(), 6.0);
2462                assert_eq!(metric.halstead.operands(), 6.0);
2463                insta::assert_json_snapshot!(metric.halstead);
2464            },
2465        );
2466    }
2467
2468    #[test]
2469    fn tcl_command_substitution_quoted_word_no_double_count() {
2470        // Regression for #277. A `"...[cmd]..."` literal exposes the
2471        // bracketed command as a `command_substitution` child whose inner
2472        // identifiers/literals contribute their own operands. The wrapping
2473        // `QuotedWord` must not also be classified as an operand, or the
2474        // command's identifier would be counted alongside a phantom
2475        // wrapper operand.
2476        check_metrics::<TclParser>(
2477            "proc f {} {
2478    set s \"result: [foo]\"
2479}",
2480            "foo.tcl",
2481            |metric| {
2482                // Operands: `f`, `s`, `foo` — 3 unique, 3 total. The
2483                // wrapping `QuotedWord` and the inert text `result: ` do
2484                // not contribute extra operands. Pre-fix this read 4/4
2485                // (double-counted wrapper).
2486                assert_eq!(metric.halstead.u_operands(), 3.0);
2487                assert_eq!(metric.halstead.operands(), 3.0);
2488                insta::assert_json_snapshot!(metric.halstead);
2489            },
2490        );
2491    }
2492
2493    #[test]
2494    fn php_operators_and_operands() {
2495        check_metrics::<PhpParser>(
2496            "<?php
2497            function avg(int $a, int $b, int $c): int {
2498                return ($a + $b + $c) / 3;
2499            }",
2500            "foo.php",
2501            |metric| {
2502                assert_eq!(metric.halstead.u_operators(), 11.0);
2503                assert_eq!(metric.halstead.operators(), 15.0);
2504                assert_eq!(metric.halstead.u_operands(), 9.0);
2505                assert_eq!(metric.halstead.operands(), 22.0);
2506                insta::assert_json_snapshot!(metric.halstead);
2507            },
2508        );
2509    }
2510
2511    #[test]
2512    fn php_simple_function() {
2513        check_metrics::<PhpParser>(
2514            "<?php
2515            function inc(int $x): int { return $x + 1; }",
2516            "foo.php",
2517            |metric| {
2518                assert_eq!(metric.halstead.u_operators(), 9.0);
2519                assert_eq!(metric.halstead.operators(), 9.0);
2520                assert_eq!(metric.halstead.u_operands(), 5.0);
2521                assert_eq!(metric.halstead.operands(), 10.0);
2522                insta::assert_json_snapshot!(metric.halstead);
2523            },
2524        );
2525    }
2526
2527    #[test]
2528    fn php_encapsed_string_interpolation_no_double_count() {
2529        // Regression: issue #184. A PHP `"Hello $name!"` used to be
2530        // classified as a Halstead operand (the wrapping
2531        // `encapsed_string`) AND have its inner `variable_name`
2532        // (`$name`) plus the inner `name` token classified as
2533        // operands too. With the fix, the wrapping literal drops to
2534        // `Unknown` when it carries any `$var` / `${name}` / `{$expr}`
2535        // child, so `$name` is counted exactly once at each text
2536        // occurrence.
2537        //
2538        // Source:
2539        //   <?php $name = "world"; echo "Hello $name!";
2540        //
2541        // Inert operand: `"world"` (no interpolation, still operand).
2542        // Operands by text key (`get_id` keys by source bytes):
2543        //   `$name` × 2 (assignment LHS and `$name` inside the
2544        //   interpolated string), `name` × 2 (the `name` token inside
2545        //   each `variable_name`), `"world"` × 1.
2546        // u_operands = 3, N2 = 5.
2547        // Without the fix the wrapping `"Hello $name!"` would also
2548        // count → u_operands = 4, N2 = 6.
2549        check_metrics::<PhpParser>(
2550            "<?php $name = \"world\"; echo \"Hello $name!\";",
2551            "foo.php",
2552            |metric| {
2553                assert_eq!(metric.halstead.u_operands(), 3.0);
2554                assert_eq!(metric.halstead.operands(), 5.0);
2555            },
2556        );
2557    }
2558
2559    #[test]
2560    fn php_encapsed_string_no_interpolation_still_operand() {
2561        // The fix for #184 only drops `EncapsedString`/`Heredoc` from
2562        // the operand arm when interpolation is present. An inert
2563        // double-quoted string must still count as exactly one
2564        // operand, identical to the single-quoted equivalent.
2565        //
2566        // Source: `<?php echo "Hello world!";`
2567        // Operands: `"Hello world!"` × 1 → u_operands = 1, N2 = 1.
2568        check_metrics::<PhpParser>("<?php echo \"Hello world!\";", "foo.php", |metric| {
2569            assert_eq!(metric.halstead.u_operands(), 1.0);
2570            assert_eq!(metric.halstead.operands(), 1.0);
2571        });
2572    }
2573
2574    #[test]
2575    fn php_heredoc_interpolation_no_double_count() {
2576        // Regression: issue #184. A PHP heredoc whose body
2577        // interpolates `$name` previously counted both the wrapping
2578        // `heredoc` node and the inner `$name` as operands; the fix
2579        // drops the wrapper when its `heredoc_body` carries any
2580        // interpolation child.
2581        //
2582        // Source:
2583        //   <?php $name = "x"; echo <<<EOT
2584        //   hi $name
2585        //   EOT;
2586        //
2587        // Operands by text key: `$name` × 2, `name` × 2, `"x"` × 1
2588        // (inert single-interp encapsed string also operand). With
2589        // the fix u_operands = 3, N2 = 5. Without the fix the
2590        // wrapping heredoc text would add one more unique operand.
2591        check_metrics::<PhpParser>(
2592            "<?php $name = \"x\"; echo <<<EOT\nhi $name\nEOT;\n",
2593            "foo.php",
2594            |metric| {
2595                assert_eq!(metric.halstead.u_operands(), 3.0);
2596                assert_eq!(metric.halstead.operands(), 5.0);
2597            },
2598        );
2599    }
2600
2601    #[test]
2602    fn php_nowdoc_unaffected() {
2603        // `Nowdoc` (single-quoted heredoc) never interpolates and is
2604        // never matched by `php_string_has_interpolation`. It must
2605        // continue counting as exactly one operand regardless of the
2606        // text inside, mirroring single-quoted `String`.
2607        //
2608        // Source:
2609        //   <?php echo <<<'EOT'
2610        //   plain $name not interpolated
2611        //   EOT;
2612        //
2613        // Operands: the nowdoc literal × 1 → u_operands = 1, N2 = 1.
2614        check_metrics::<PhpParser>(
2615            "<?php echo <<<'EOT'\nplain $name not interpolated\nEOT;\n",
2616            "foo.php",
2617            |metric| {
2618                assert_eq!(metric.halstead.u_operands(), 1.0);
2619                assert_eq!(metric.halstead.operands(), 1.0);
2620            },
2621        );
2622    }
2623
2624    #[test]
2625    fn php_encapsed_string_bare_member_access_no_double_count() {
2626        // Regression: issue #184 follow-up. The PHP grammar allows
2627        // bare `$obj->prop` interpolation inside `"…"` without
2628        // surrounding `{ … }`; tree-sitter-php emits this as a
2629        // direct `member_access_expression` child of
2630        // `encapsed_string` (kind_id 329 in the current grammar).
2631        // The wrapper must drop to `Unknown` for that form too —
2632        // otherwise the inner `$obj` and `prop` `name` tokens are
2633        // walked as operands while the wrapper also counts,
2634        // double-counting `N2`.
2635        //
2636        // Source:
2637        //   <?php $obj = new stdClass; $obj->prop = "x"; echo "Hi $obj->prop!";
2638        //
2639        // Operands tallied by `get_id` (keyed on source bytes):
2640        //   `$obj`        × 3 (LHS assignment, member-access target,
2641        //                      inside the interpolated string)
2642        //   `obj`  (name) × 3 (one per `variable_name`)
2643        //   `prop` (name) × 2 (member-access RHS twice)
2644        //   `stdClass`    × 1
2645        //   `"x"`         × 1
2646        // ⇒ u_operands = 5, N2 = 10.
2647        // With the bug the wrapping `"Hi $obj->prop!"` text adds one
2648        // more unique operand and one more occurrence ⇒ 6 / 11.
2649        check_metrics::<PhpParser>(
2650            "<?php $obj = new stdClass; $obj->prop = \"x\"; echo \"Hi $obj->prop!\";",
2651            "foo.php",
2652            |metric| {
2653                assert_eq!(metric.halstead.u_operands(), 5.0);
2654                assert_eq!(metric.halstead.operands(), 10.0);
2655            },
2656        );
2657    }
2658
2659    #[test]
2660    fn php_encapsed_string_bare_subscript_no_double_count() {
2661        // Regression: issue #184 follow-up. Bare `$arr[0]` inside
2662        // `"…"` produces a `subscript_expression` child of
2663        // `encapsed_string` (kind_id 351). The wrapper must drop to
2664        // `Unknown` for that form.
2665        //
2666        // Source:
2667        //   <?php $arr = [1]; echo "Hi $arr[0]!";
2668        //
2669        // Operands tallied by `get_id`:
2670        //   `$arr` × 2, `arr` × 2 (inner `name`), `1` × 1, `0` × 1.
2671        // ⇒ u_operands = 4, N2 = 6.
2672        // With the bug the wrapping `"Hi $arr[0]!"` text adds 1 / 1.
2673        check_metrics::<PhpParser>(
2674            "<?php $arr = [1]; echo \"Hi $arr[0]!\";",
2675            "foo.php",
2676            |metric| {
2677                assert_eq!(metric.halstead.u_operands(), 4.0);
2678                assert_eq!(metric.halstead.operands(), 6.0);
2679            },
2680        );
2681    }
2682
2683    #[test]
2684    fn php_shell_command_expression_inert_is_operand() {
2685        // Regression: issue #288. Backtick command literals (PHP's
2686        // `shell_command_expression`) were filtered as strings by
2687        // `Checker::is_string` and `Alterator::alterate`, but never
2688        // classified as Halstead operands — so they contributed
2689        // nothing to N2 / eta2. An inert backtick literal must now
2690        // count as exactly one operand, matching `EncapsedString`
2691        // and `Heredoc`.
2692        //
2693        // Source: `<?php $out = ` + backtick `ls` + backtick + `;`
2694        // Operands tallied by `get_id`:
2695        //   `$out` × 1, `out` × 1 (inner `name`), backtick literal × 1.
2696        // ⇒ u_operands = 3, N2 = 3.
2697        // Before the fix the backtick literal vanished from the count
2698        // ⇒ u_operands = 2, N2 = 2.
2699        check_metrics::<PhpParser>("<?php $out = `ls`;", "foo.php", |metric| {
2700            assert_eq!(metric.halstead.u_operands(), 3.0);
2701            assert_eq!(metric.halstead.operands(), 3.0);
2702        });
2703    }
2704
2705    #[test]
2706    fn php_shell_command_expression_interpolation_no_double_count() {
2707        // Regression: issue #288. PHP backtick literals DO support
2708        // `$var` interpolation (see tree-sitter-php node-types.json:
2709        // `shell_command_expression` children include `variable_name`,
2710        // `dynamic_variable_name`, `member_access_expression`,
2711        // `subscript_expression`). With the fix the wrapper drops to
2712        // `Unknown` when it carries any interpolation child, exactly
2713        // as `EncapsedString` does.
2714        //
2715        // Source: `<?php $dir = "/tmp"; $out = ` + backtick `ls $dir` +
2716        //   backtick + `;`
2717        //
2718        // Operands tallied by `get_id`:
2719        //   `$dir` × 2 (assignment LHS, inside backticks),
2720        //   `dir`  × 2 (inner `name`),
2721        //   `$out` × 1, `out` × 1, `"/tmp"` × 1.
2722        // ⇒ u_operands = 5, N2 = 7.
2723        // Without the interpolation guard the wrapping backtick literal
2724        // would also count ⇒ u_operands = 6, N2 = 8.
2725        check_metrics::<PhpParser>(
2726            "<?php $dir = \"/tmp\"; $out = `ls $dir`;",
2727            "foo.php",
2728            |metric| {
2729                assert_eq!(metric.halstead.u_operands(), 5.0);
2730                assert_eq!(metric.halstead.operands(), 7.0);
2731            },
2732        );
2733    }
2734
2735    #[test]
2736    fn elixir_operators_and_operands() {
2737        // Exercises every Halstead family classified in Elixir's
2738        // `get_op_type`: control-flow keywords (`do`, `end`, `fn`),
2739        // structural punctuation (`(`, `)`, `[`, `]`, `,`, `.`, `@`),
2740        // arithmetic (`+`, `-`, `*`, `/`), comparison (`==`, `>`),
2741        // logical (`&&`, `||`, `and`, `or`, `!`), pipe (`|>`), capture
2742        // (`&`), assignment/match (`=`), and the stab arrow (`->`).
2743        // The body mixes identifiers, integers, atoms, and a string.
2744        check_metrics::<ElixirParser>(
2745            "defmodule Foo do\n  @doc \"add\"\n  def calc(a, b) do\n    result = a + b * 2\n    flag = result > 0 && a == b\n    out = if flag, do: result, else: -result\n    [out, a, b]\n  end\nend\n",
2746            "foo.ex",
2747            |metric| {
2748                // Positive headline assertions on integer counts.
2749                assert_eq!(metric.halstead.u_operators(), 15.0);
2750                assert_eq!(metric.halstead.operators(), 23.0);
2751                assert_eq!(metric.halstead.u_operands(), 16.0);
2752                assert_eq!(metric.halstead.operands(), 27.0);
2753                insta::assert_json_snapshot!(
2754                    metric.halstead,
2755                    @r###"
2756                {
2757                  "n1": 15.0,
2758                  "N1": 23.0,
2759                  "n2": 16.0,
2760                  "N2": 27.0,
2761                  "length": 50.0,
2762                  "estimated_program_length": 122.60335893412778,
2763                  "purity_ratio": 2.452067178682556,
2764                  "vocabulary": 31.0,
2765                  "volume": 247.70981551934375,
2766                  "difficulty": 12.65625,
2767                  "level": 0.07901234567901234,
2768                  "effort": 3135.0773526666944,
2769                  "time": 174.17096403703857,
2770                  "bugs": 0.07140208917738183
2771                }"###
2772                );
2773            },
2774        );
2775    }
2776
2777    #[test]
2778    fn ruby_operators_and_operands() {
2779        // A small Ruby method exercising operators (def/if/end keyword
2780        // tokens, `+`, `==`, `<=`, structural punctuation) and operands
2781        // (`n`, `1`, `factorial`). Anchors the unique/total counts on
2782        // both sides and snapshots the full Halstead derivation.
2783        //
2784        // Lesson 4 invariants: u_operators / u_operands here equal the
2785        // dedupe lengths the `--ops` accessor would emit on the same
2786        // source. Any future grammar bump that adds an aliased kind_id
2787        // to either side will trip this without snapshot drift.
2788        check_metrics::<RubyParser>(
2789            "def factorial(n)\n  return 1 if n <= 1\n  n * factorial(n - 1)\nend\n",
2790            "foo.rb",
2791            |metric| {
2792                assert_eq!(metric.halstead.u_operators(), 9.0);
2793                assert_eq!(metric.halstead.operators(), 11.0);
2794                assert_eq!(metric.halstead.u_operands(), 3.0);
2795                assert_eq!(metric.halstead.operands(), 9.0);
2796                insta::assert_json_snapshot!(metric.halstead);
2797            },
2798        );
2799    }
2800
2801    #[test]
2802    fn ruby_halstead_plain_string_operand() {
2803        // A bare string literal contributes exactly one operand. The
2804        // counterpart to `ruby_halstead_interpolated_string_no_double_count`
2805        // — verifies the "no interpolation" branch of the same arm
2806        // (see `src/getter.rs::get_op_type`'s `R::String | …` case).
2807        // expected: operators = {def, end} = 2; operands = {f, "hello"} = 2.
2808        check_metrics::<RubyParser>("def f\n  \"hello\"\nend\n", "foo.rb", |metric| {
2809            assert_eq!(metric.halstead.u_operators(), 2.0);
2810            assert_eq!(metric.halstead.operators(), 2.0);
2811            assert_eq!(metric.halstead.u_operands(), 2.0);
2812            assert_eq!(metric.halstead.operands(), 2.0);
2813        });
2814    }
2815
2816    #[test]
2817    fn ruby_halstead_interpolated_string_no_double_count() {
2818        // Regression mirror for #180 (Bash) / #183 (C#): when a Ruby
2819        // string literal carries an `Interpolation` child, the
2820        // wrapping `String` node is intentionally classified as
2821        // `Unknown` so the inner expression's identifiers are not
2822        // double-counted as operands.
2823        //
2824        // expected: for `def f(name)\n  "Hi #{name}"\nend\n` —
2825        //   operators: def, (, ), #{, }, end → u_operators = 6.
2826        //   operands: f, name (param), name (inside `#{name}`). The
2827        //   wrapping `"…#{name}"` literal is skipped by the
2828        //   `is_child(R::Interpolation)` guard; the operand store
2829        //   keys by token text so the two `name` occurrences dedupe
2830        //   into one distinct entry → u_operands = 2, operands = 3
2831        //   (`f` once, `name` twice).
2832        // Without the guard, the wrapping literal would also count,
2833        // inflating u_operands to 3 and operands to 4.
2834        check_metrics::<RubyParser>("def f(name)\n  \"Hi #{name}\"\nend\n", "foo.rb", |metric| {
2835            assert_eq!(metric.halstead.u_operands(), 2.0);
2836            assert_eq!(metric.halstead.operands(), 3.0);
2837        });
2838    }
2839
2840    #[test]
2841    fn ruby_halstead_symbol_literal_operand() {
2842        // `:foo` is a `SimpleSymbol` leaf — counts as a single
2843        // operand, no interpolation guard needed (only
2844        // `DelimitedSymbol` (`:"…#{x}…"`) can interpolate).
2845        // expected: operators = {def, end} = 2; operands = {f, :ok} = 2.
2846        check_metrics::<RubyParser>("def f\n  :ok\nend\n", "foo.rb", |metric| {
2847            assert_eq!(metric.halstead.u_operators(), 2.0);
2848            assert_eq!(metric.halstead.u_operands(), 2.0);
2849        });
2850    }
2851
2852    #[test]
2853    fn ruby_halstead_regex_operand() {
2854        // `/foo/` parses as a `Regex` node — one operand. The slash
2855        // delimiters around it are emitted as `SLASH` tokens and
2856        // classified as arithmetic-or-divide operators by the shared
2857        // arm; they count once toward the distinct-operator set.
2858        // expected: u_operators = {def, (, ), =~, /, end} = 6;
2859        // u_operands = {f, s, /foo/} = 3.
2860        check_metrics::<RubyParser>("def f(s)\n  s =~ /foo/\nend\n", "foo.rb", |metric| {
2861            assert_eq!(metric.halstead.u_operators(), 6.0);
2862            assert_eq!(metric.halstead.u_operands(), 3.0);
2863        });
2864    }
2865}