big_code_analysis/metrics/halstead.rs
1// Per-language metric and AST modules deliberately consume the macro-
2// generated tree-sitter token enums via `use crate::*` and `use Foo::*`
3// inside match expressions — explicit imports would list dozens of
4// variants per arm and obscure the per-language token sets that are the
5// point of these files. Allowed at the module level rather than per
6// function so the per-language impl blocks stay readable.
7#![allow(
8 clippy::doc_markdown,
9 clippy::enum_glob_use,
10 clippy::match_wildcard_for_single_variants,
11 clippy::similar_names,
12 clippy::unused_self,
13 clippy::wildcard_imports
14)]
15// Metric counts (token, function, branch, argument, etc.) are stored as
16// `usize` and crossed with `f64` averages, ratios, and Halstead scores
17// across the cyclomatic / MI / Halstead computations. The `usize as f64`
18// and `f64 as usize` casts are intentional and snapshot-anchored — every
19// site is bounded by the count it came from. Allowing the lints at the
20// module level keeps the metric arithmetic legible.
21#![allow(
22 clippy::cast_precision_loss,
23 clippy::cast_possible_truncation,
24 clippy::cast_sign_loss
25)]
26
27use std::collections::HashMap;
28
29use serde::Serialize;
30use serde::ser::{SerializeStruct, Serializer};
31use std::fmt;
32
33use crate::checker::Checker;
34use crate::getter::Getter;
35use crate::macros::implement_metric_trait;
36
37use crate::*;
38
39/// The `Halstead` metric suite.
40#[derive(Default, Clone, Debug)]
41pub struct Stats {
42 u_operators: u64,
43 operators: u64,
44 u_operands: u64,
45 operands: u64,
46}
47
48/// Specifies the type of nodes accepted by the `Halstead` metric.
49pub enum HalsteadType {
50 /// The node is an `Halstead` operator
51 Operator,
52 /// The node is an `Halstead` operand
53 Operand,
54 /// The node is unknown to the `Halstead` metric
55 Unknown,
56}
57
58/// Per-space operator / operand occurrence maps used to compute the
59/// Halstead `Stats` struct. One map per distinct operator (`kind_id`)
60/// and one per distinct operand (`text`); merged across nested spaces.
61#[derive(Debug, Default, Clone)]
62pub struct HalsteadMaps<'a> {
63 pub(crate) operators: HashMap<u16, u64>,
64 /// Primitive-type operators stored by text so each distinct primitive
65 /// (e.g. `int` vs `double`) counts as a separate distinct operator,
66 /// even when the grammar maps them all to a single kind_id.
67 pub(crate) primitive_operators: HashMap<&'a [u8], u64>,
68 pub(crate) operands: HashMap<&'a [u8], u64>,
69}
70
71impl<'a> HalsteadMaps<'a> {
72 pub(crate) fn new() -> Self {
73 Self::default()
74 }
75
76 pub(crate) fn merge(&mut self, other: &HalsteadMaps<'a>) {
77 for (k, v) in &other.operators {
78 *self.operators.entry(*k).or_insert(0) += v;
79 }
80 for (k, v) in &other.primitive_operators {
81 *self.primitive_operators.entry(*k).or_insert(0) += v;
82 }
83 for (k, v) in &other.operands {
84 *self.operands.entry(*k).or_insert(0) += v;
85 }
86 }
87
88 pub(crate) fn finalize(&self, stats: &mut Stats) {
89 stats.u_operators = (self.operators.len() + self.primitive_operators.len()) as u64;
90 stats.operators =
91 self.operators.values().sum::<u64>() + self.primitive_operators.values().sum::<u64>();
92 stats.u_operands = self.operands.len() as u64;
93 stats.operands = self.operands.values().sum::<u64>();
94 }
95}
96
97impl Serialize for Stats {
98 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
99 where
100 S: Serializer,
101 {
102 let mut st = serializer.serialize_struct("halstead", 14)?;
103 st.serialize_field("n1", &self.u_operators())?;
104 st.serialize_field("N1", &self.operators())?;
105 st.serialize_field("n2", &self.u_operands())?;
106 st.serialize_field("N2", &self.operands())?;
107 st.serialize_field("length", &self.length())?;
108 st.serialize_field("estimated_program_length", &self.estimated_program_length())?;
109 st.serialize_field("purity_ratio", &self.purity_ratio())?;
110 st.serialize_field("vocabulary", &self.vocabulary())?;
111 st.serialize_field("volume", &self.volume())?;
112 st.serialize_field("difficulty", &self.difficulty())?;
113 st.serialize_field("level", &self.level())?;
114 st.serialize_field("effort", &self.effort())?;
115 st.serialize_field("time", &self.time())?;
116 st.serialize_field("bugs", &self.bugs())?;
117 st.end()
118 }
119}
120
121impl fmt::Display for Stats {
122 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
123 write!(
124 f,
125 "n1: {}, \
126 N1: {}, \
127 n2: {}, \
128 N2: {}, \
129 length: {}, \
130 estimated program length: {}, \
131 purity ratio: {}, \
132 size: {}, \
133 volume: {}, \
134 difficulty: {}, \
135 level: {}, \
136 effort: {}, \
137 time: {}, \
138 bugs: {}",
139 self.u_operators(),
140 self.operators(),
141 self.u_operands(),
142 self.operands(),
143 self.length(),
144 self.estimated_program_length(),
145 self.purity_ratio(),
146 self.vocabulary(),
147 self.volume(),
148 self.difficulty(),
149 self.level(),
150 self.effort(),
151 self.time(),
152 self.bugs(),
153 )
154 }
155}
156
157impl Stats {
158 pub(crate) fn merge(&mut self, _other: &Stats) {}
159
160 /// Returns `η1`, the number of distinct operators
161 #[inline]
162 #[must_use]
163 pub fn u_operators(&self) -> f64 {
164 self.u_operators as f64
165 }
166
167 /// Returns `N1`, the number of total operators
168 #[inline]
169 #[must_use]
170 pub fn operators(&self) -> f64 {
171 self.operators as f64
172 }
173
174 /// Returns `η2`, the number of distinct operands
175 #[inline]
176 #[must_use]
177 pub fn u_operands(&self) -> f64 {
178 self.u_operands as f64
179 }
180
181 /// Returns `N2`, the number of total operands
182 #[inline]
183 #[must_use]
184 pub fn operands(&self) -> f64 {
185 self.operands as f64
186 }
187
188 /// Returns the program length
189 #[inline]
190 #[must_use]
191 pub fn length(&self) -> f64 {
192 self.operands() + self.operators()
193 }
194
195 /// Returns the calculated estimated program length
196 #[inline]
197 #[must_use]
198 pub fn estimated_program_length(&self) -> f64 {
199 let uo = self.u_operators();
200 let ud = self.u_operands();
201 let uo_term = if uo == 0.0 { 0.0 } else { uo * uo.log2() };
202 let ud_term = if ud == 0.0 { 0.0 } else { ud * ud.log2() };
203 uo_term + ud_term
204 }
205
206 /// Returns the purity ratio
207 #[inline]
208 #[must_use]
209 pub fn purity_ratio(&self) -> f64 {
210 let len = self.length();
211 if len == 0.0 {
212 0.0
213 } else {
214 self.estimated_program_length() / len
215 }
216 }
217
218 /// Returns the program vocabulary
219 #[inline]
220 #[must_use]
221 pub fn vocabulary(&self) -> f64 {
222 self.u_operands() + self.u_operators()
223 }
224
225 /// Returns the program volume.
226 ///
227 /// Unit of measurement: bits
228 #[inline]
229 #[must_use]
230 pub fn volume(&self) -> f64 {
231 // Assumes a uniform binary encoding for the vocabulary is used.
232 let vocab = self.vocabulary();
233 if vocab <= 1.0 {
234 0.0
235 } else {
236 self.length() * vocab.log2()
237 }
238 }
239
240 /// Returns the estimated difficulty required to program
241 #[inline]
242 #[must_use]
243 pub fn difficulty(&self) -> f64 {
244 let ud = self.u_operands();
245 if ud == 0.0 {
246 0.0
247 } else {
248 self.u_operators() / 2. * self.operands() / ud
249 }
250 }
251
252 /// Returns the estimated level of difficulty required to program
253 #[inline]
254 #[must_use]
255 pub fn level(&self) -> f64 {
256 let d = self.difficulty();
257 if d == 0.0 { 0.0 } else { 1. / d }
258 }
259
260 /// Returns the estimated effort required to program
261 #[inline]
262 #[must_use]
263 pub fn effort(&self) -> f64 {
264 self.difficulty() * self.volume()
265 }
266
267 /// Returns the estimated time required to program.
268 ///
269 /// Unit of measurement: seconds
270 #[inline]
271 #[must_use]
272 pub fn time(&self) -> f64 {
273 // The floating point `18.` aims to describe the processing rate of the
274 // human brain. It is called Stoud number, S, and its
275 // unit of measurement is moments/seconds.
276 // A moment is the time required by the human brain to carry out the
277 // most elementary decision.
278 // 5 <= S <= 20. Halstead uses 18.
279 // The value of S has been empirically developed from psychological
280 // reasoning, and its recommended value for
281 // programming applications is 18.
282 //
283 // Source: https://www.geeksforgeeks.org/software-engineering-halsteads-software-metrics/
284 self.effort() / 18.
285 }
286
287 /// Returns the estimated number of delivered bugs.
288 ///
289 /// This metric represents the average amount of work a programmer can do
290 /// without introducing an error.
291 #[inline]
292 #[must_use]
293 pub fn bugs(&self) -> f64 {
294 // The floating point `3000.` represents the number of elementary
295 // mental discriminations.
296 // A mental discrimination, in psychology, is the ability to perceive
297 // and respond to differences among stimuli.
298 //
299 // The value above is obtained starting from a constant that
300 // is different for every language and assumes that natural language is
301 // the language of the brain.
302 // For programming languages, the English language constant
303 // has been considered.
304 //
305 // After every 3000 mental discriminations a result is produced.
306 // This result, whether correct or incorrect, is more than likely
307 // either used as an input for the next operation or is output to the
308 // environment.
309 // If incorrect the error should become apparent.
310 // Thus, an opportunity for error occurs every 3000
311 // mental discriminations.
312 //
313 // Source: https://docs.lib.purdue.edu/cgi/viewcontent.cgi?article=1145&context=cstech
314 self.effort().powf(2. / 3.) / 3000.
315 }
316}
317
318#[doc(hidden)]
319/// Per-language extraction of Halstead operator/operand maps.
320pub trait Halstead
321where
322 Self: Checker + Getter,
323{
324 /// Walk `node` and update `stats` with this metric for the language
325 /// implementing the trait.
326 fn compute<'a>(node: &Node<'a>, code: &'a [u8], halstead_maps: &mut HalsteadMaps<'a>);
327}
328
329#[inline]
330fn get_id<'a>(node: &Node<'a>, code: &'a [u8]) -> &'a [u8] {
331 &code[node.start_byte()..node.end_byte()]
332}
333
334#[inline]
335fn compute_halstead<'a, T: Getter + Checker>(
336 node: &Node<'a>,
337 code: &'a [u8],
338 halstead_maps: &mut HalsteadMaps<'a>,
339) {
340 match T::get_op_type(node) {
341 HalsteadType::Operator => {
342 if T::is_primitive(node.kind_id()) {
343 // Store primitive-type operators by text so distinct
344 // primitives (e.g. `int` vs `double`) that share a
345 // single kind_id are counted separately in n1/N1.
346 *halstead_maps
347 .primitive_operators
348 .entry(get_id(node, code))
349 .or_insert(0) += 1;
350 } else {
351 *halstead_maps.operators.entry(node.kind_id()).or_insert(0) += 1;
352 }
353 }
354 HalsteadType::Operand => {
355 *halstead_maps
356 .operands
357 .entry(get_id(node, code))
358 .or_insert(0) += 1;
359 }
360 _ => {}
361 }
362}
363
364impl Halstead for PythonCode {
365 fn compute<'a>(node: &Node<'a>, code: &'a [u8], halstead_maps: &mut HalsteadMaps<'a>) {
366 compute_halstead::<Self>(node, code, halstead_maps);
367 }
368}
369
370impl Halstead for MozjsCode {
371 fn compute<'a>(node: &Node<'a>, code: &'a [u8], halstead_maps: &mut HalsteadMaps<'a>) {
372 compute_halstead::<Self>(node, code, halstead_maps);
373 }
374}
375
376impl Halstead for JavascriptCode {
377 fn compute<'a>(node: &Node<'a>, code: &'a [u8], halstead_maps: &mut HalsteadMaps<'a>) {
378 compute_halstead::<Self>(node, code, halstead_maps);
379 }
380}
381
382impl Halstead for TypescriptCode {
383 fn compute<'a>(node: &Node<'a>, code: &'a [u8], halstead_maps: &mut HalsteadMaps<'a>) {
384 compute_halstead::<Self>(node, code, halstead_maps);
385 }
386}
387
388impl Halstead for TsxCode {
389 fn compute<'a>(node: &Node<'a>, code: &'a [u8], halstead_maps: &mut HalsteadMaps<'a>) {
390 compute_halstead::<Self>(node, code, halstead_maps);
391 }
392}
393
394impl Halstead for RustCode {
395 fn compute<'a>(node: &Node<'a>, code: &'a [u8], halstead_maps: &mut HalsteadMaps<'a>) {
396 compute_halstead::<Self>(node, code, halstead_maps);
397 }
398}
399
400impl Halstead for CppCode {
401 fn compute<'a>(node: &Node<'a>, code: &'a [u8], halstead_maps: &mut HalsteadMaps<'a>) {
402 compute_halstead::<Self>(node, code, halstead_maps);
403 }
404}
405
406impl Halstead for JavaCode {
407 fn compute<'a>(node: &Node<'a>, code: &'a [u8], halstead_maps: &mut HalsteadMaps<'a>) {
408 compute_halstead::<Self>(node, code, halstead_maps);
409 }
410}
411
412impl Halstead for GroovyCode {
413 fn compute<'a>(node: &Node<'a>, code: &'a [u8], halstead_maps: &mut HalsteadMaps<'a>) {
414 compute_halstead::<Self>(node, code, halstead_maps);
415 }
416}
417
418impl Halstead for CsharpCode {
419 fn compute<'a>(node: &Node<'a>, code: &'a [u8], halstead_maps: &mut HalsteadMaps<'a>) {
420 compute_halstead::<Self>(node, code, halstead_maps);
421 }
422}
423
424impl Halstead for GoCode {
425 fn compute<'a>(node: &Node<'a>, code: &'a [u8], halstead_maps: &mut HalsteadMaps<'a>) {
426 compute_halstead::<Self>(node, code, halstead_maps);
427 }
428}
429
430impl Halstead for PerlCode {
431 fn compute<'a>(node: &Node<'a>, code: &'a [u8], halstead_maps: &mut HalsteadMaps<'a>) {
432 compute_halstead::<Self>(node, code, halstead_maps);
433 }
434}
435
436impl Halstead for KotlinCode {
437 fn compute<'a>(node: &Node<'a>, code: &'a [u8], halstead_maps: &mut HalsteadMaps<'a>) {
438 compute_halstead::<Self>(node, code, halstead_maps);
439 }
440}
441
442impl Halstead for LuaCode {
443 fn compute<'a>(node: &Node<'a>, code: &'a [u8], halstead_maps: &mut HalsteadMaps<'a>) {
444 compute_halstead::<Self>(node, code, halstead_maps);
445 }
446}
447
448impl Halstead for PhpCode {
449 fn compute<'a>(node: &Node<'a>, code: &'a [u8], halstead_maps: &mut HalsteadMaps<'a>) {
450 compute_halstead::<Self>(node, code, halstead_maps);
451 }
452}
453
454// Real defaults — no operators / operands to count. Audited in #188.
455implement_metric_trait!(Halstead, PreprocCode, CcommentCode);
456
457impl Halstead for RubyCode {
458 fn compute<'a>(node: &Node<'a>, code: &'a [u8], halstead_maps: &mut HalsteadMaps<'a>) {
459 compute_halstead::<Self>(node, code, halstead_maps);
460 }
461}
462
463impl Halstead for ElixirCode {
464 fn compute<'a>(node: &Node<'a>, code: &'a [u8], halstead_maps: &mut HalsteadMaps<'a>) {
465 compute_halstead::<Self>(node, code, halstead_maps);
466 }
467}
468
469impl Halstead for BashCode {
470 fn compute<'a>(node: &Node<'a>, code: &'a [u8], halstead_maps: &mut HalsteadMaps<'a>) {
471 compute_halstead::<Self>(node, code, halstead_maps);
472 }
473}
474
475impl Halstead for TclCode {
476 fn compute<'a>(node: &Node<'a>, code: &'a [u8], halstead_maps: &mut HalsteadMaps<'a>) {
477 compute_halstead::<Self>(node, code, halstead_maps);
478 }
479}
480
481#[cfg(test)]
482#[allow(
483 clippy::float_cmp,
484 clippy::cast_precision_loss,
485 clippy::cast_possible_truncation,
486 clippy::cast_sign_loss,
487 clippy::similar_names,
488 clippy::doc_markdown,
489 clippy::needless_raw_string_hashes,
490 clippy::too_many_lines
491)]
492mod tests {
493 use crate::tools::check_metrics;
494
495 use super::*;
496
497 #[test]
498 fn python_operators_and_operands() {
499 check_metrics::<PythonParser>(
500 "def foo():
501 def bar():
502 def toto():
503 a = 1 + 1
504 b = 2 + a
505 c = 3 + 3",
506 "foo.py",
507 |metric| {
508 // unique operators: def, =, +
509 // operators: def, def, def, =, =, =, +, +, +
510 // unique operands: foo, bar, toto, a, b, c, 1, 2, 3
511 // operands: foo, bar, toto, a, b, c, 1, 1, 2, a, 3, 3
512 insta::assert_json_snapshot!(
513 metric.halstead,
514 @r###"
515 {
516 "n1": 3.0,
517 "N1": 9.0,
518 "n2": 9.0,
519 "N2": 12.0,
520 "length": 21.0,
521 "estimated_program_length": 33.284212515144276,
522 "purity_ratio": 1.584962500721156,
523 "vocabulary": 12.0,
524 "volume": 75.28421251514428,
525 "difficulty": 2.0,
526 "level": 0.5,
527 "effort": 150.56842503028855,
528 "time": 8.364912501682698,
529 "bugs": 0.0094341190071077
530 }"###
531 );
532 },
533 );
534 }
535
536 /// Pointer-arithmetic operators: `*` (dereference), `&` (address-of),
537 /// `->` (member-of-pointer), `+` (pointer + offset). Each is counted
538 /// once in `n1`; multiple uses bump `N1`. The headline integer values
539 /// (`u_operators`, `u_operands`) anchor the snapshot per the
540 /// snapshot-anchor policy.
541 #[test]
542 fn c_pointer_arithmetic_operators() {
543 check_metrics::<CppParser>(
544 "int g(int* p, int* q) {
545 return *(p + 1) + *q;
546 }",
547 "foo.c",
548 |metric| {
549 // Unique operators: int, *, (), {, }, +, ;, return (= 8)
550 // `*` covers both pointer-type and dereference; the grammar
551 // does NOT split them. `,` does not appear (only one
552 // parameter on each side of the body).
553 // Unique operands: g, p, q, 1 (= 4)
554 assert_eq!(metric.halstead.u_operators(), 8.0);
555 assert_eq!(metric.halstead.u_operands(), 4.0);
556 insta::assert_json_snapshot!(metric.halstead);
557 },
558 );
559 }
560
561 /// Bitwise (`&`, `|`, `^`, `~`, `<<`, `>>`) and logical (`&&`, `||`,
562 /// `!`) operators are distinct kind_ids and count as separate unique
563 /// operators in Halstead. `&` (bitwise-and) and `&&` (logical-and)
564 /// must NOT collapse, even though both render as ampersands.
565 #[test]
566 fn c_bitwise_and_logical_operators() {
567 check_metrics::<CppParser>(
568 "int f(int a, int b) {
569 int x = (a & b) | (a ^ b);
570 int y = ~a;
571 int z = (a << 1) >> 2;
572 return (a && b) || !x;
573 }",
574 "foo.c",
575 |metric| {
576 // Expect: 6 bitwise op kinds (& | ^ ~ << >>), 3 logical (&& || !).
577 // Plus int, (), {, }, =, ;, return, , — 8 syntactic / arithmetic
578 // operator kinds. Six bitwise + three logical + eight = 17 unique
579 // operators is the upper bound; actuals depend on grammar collapse,
580 // so we assert a lower-bound and anchor via snapshot below.
581 let s = &metric.halstead;
582 assert!(
583 s.u_operators() >= 14.0,
584 "expected >= 14 unique operators (bitwise + logical + syntax), got {}",
585 s.u_operators(),
586 );
587 assert_eq!(s.u_operands(), 8.0); // f, a, b, x, y, z, 1, 2
588 insta::assert_json_snapshot!(metric.halstead);
589 },
590 );
591 }
592
593 /// Increment / decrement (`++`, `--`) and `sizeof` / cast operators
594 /// each contribute distinct unique operators. C-style casts in the
595 /// tree-sitter grammar surface as `cast_expression` with the type
596 /// token classified as a primitive_type operator.
597 #[test]
598 fn c_increment_decrement_and_sizeof() {
599 check_metrics::<CppParser>(
600 "void f(int* p) {
601 int n = sizeof(int);
602 ++p;
603 --n;
604 long w = (long) n;
605 }",
606 "foo.c",
607 |metric| {
608 // Unique operators include: void, int, long, *, =, sizeof, ++, --, (), {, }, ;
609 // Unique operands: f, p, n, w
610 let s = &metric.halstead;
611 assert!(
612 s.u_operators() >= 10.0,
613 "expected >= 10 unique operators including ++ / -- / sizeof / cast, got {}",
614 s.u_operators(),
615 );
616 assert_eq!(s.u_operands(), 4.0);
617 insta::assert_json_snapshot!(metric.halstead);
618 },
619 );
620 }
621
622 #[test]
623 fn cpp_operators_and_operands() {
624 // Define operators and operands for C/C++ grammar according to this specification:
625 // https://www.verifysoft.com/en_halstead_metrics.html
626 // The only difference with the specification above is that
627 // primitive types are treated as operators, since the definition of a
628 // primitive type can be seen as the creation of a slot of a certain size.
629 // i.e. The `int a;` definition creates a n-bytes slot.
630 check_metrics::<CppParser>(
631 "main()
632 {
633 int a, b, c, avg;
634 scanf(\"%d %d %d\", &a, &b, &c);
635 avg = (a + b + c) / 3;
636 printf(\"avg = %d\", avg);
637 }",
638 "foo.c",
639 |metric| {
640 // unique operators: (), {}, int, &, =, +, /, ,, ;
641 // unique operands: main, a, b, c, avg, scanf, "%d %d %d", 3, printf, "avg = %d"
642 insta::assert_json_snapshot!(
643 metric.halstead,
644 @r###"
645 {
646 "n1": 9.0,
647 "N1": 24.0,
648 "n2": 10.0,
649 "N2": 18.0,
650 "length": 42.0,
651 "estimated_program_length": 61.74860596185444,
652 "purity_ratio": 1.470204903853677,
653 "vocabulary": 19.0,
654 "volume": 178.41295556463058,
655 "difficulty": 8.1,
656 "level": 0.1234567901234568,
657 "effort": 1445.1449400735075,
658 "time": 80.28583000408375,
659 "bugs": 0.04260752914034329
660 }"###
661 );
662 },
663 );
664 }
665
666 /// C++20 spaceship operator `<=>` (`Cpp::LTEQGT`) is a comparison
667 /// operator and must be counted in Halstead, like its sibling
668 /// comparison operators `<`, `>`, `<=`, `>=`, `==`, `!=`. Prior to
669 /// this fix it fell through to the `Unknown` arm and was silently
670 /// dropped from `n1` / `N1`, under-reporting volume / effort on any
671 /// C++20+ codebase that defines `operator<=>`. Regression test for
672 /// issue #197.
673 #[test]
674 fn cpp_spaceship_operator_is_halstead_operator() {
675 check_metrics::<CppParser>(
676 "int f(int a, int b) {
677 return (a <=> b) != 0;
678 }",
679 "foo.cpp",
680 |metric| {
681 // Unique operators (grammar collapses matched delimiters
682 // to a single kind_id): int, (), {}, <=>, !=, return, ;, ,
683 // `<=>` is the regression target — without the fix it
684 // would be Unknown and `u_operators` would be 7.
685 // Unique operands: f, a, b, 0
686 let s = &metric.halstead;
687 assert_eq!(s.u_operators(), 8.0);
688 assert_eq!(s.u_operands(), 4.0);
689 insta::assert_json_snapshot!(
690 s,
691 @r###"
692 {
693 "n1": 8.0,
694 "N1": 11.0,
695 "n2": 4.0,
696 "N2": 6.0,
697 "length": 17.0,
698 "estimated_program_length": 32.0,
699 "purity_ratio": 1.8823529411764706,
700 "vocabulary": 12.0,
701 "volume": 60.94436251225965,
702 "difficulty": 6.0,
703 "level": 0.16666666666666666,
704 "effort": 365.6661750735579,
705 "time": 20.31478750408655,
706 "bugs": 0.01704519358507665
707 }"###
708 );
709 },
710 );
711 }
712
713 /// C++ compound subtract-assign `-=` (`Cpp::DASHEQ`) must be counted
714 /// in Halstead like every other compound assignment (`+=`, `*=`,
715 /// `/=`, etc.). Prior to the fix it fell through to the `Unknown`
716 /// arm and was silently dropped from `n1` / `N1` — under-reporting
717 /// volume / effort wherever C++ code subtracts in place. Regression
718 /// test for issue #198.
719 #[test]
720 fn cpp_dash_eq_is_halstead_operator() {
721 check_metrics::<CppParser>("void f(int a, int b) { a -= b; }", "foo.cpp", |metric| {
722 // Unique operators: void, (), {}, int, ,, -=, ;
723 // `-=` is the regression target — without the fix it
724 // would be Unknown and `u_operators` would be 6.
725 // Unique operands: f, a, b
726 let s = &metric.halstead;
727 assert_eq!(s.u_operators(), 7.0);
728 assert_eq!(s.u_operands(), 3.0);
729 });
730 }
731
732 /// C++ pointer-to-member access `.*` (`Cpp::DOTSTAR`) must be
733 /// counted in Halstead. Prior to the fix it fell through to the
734 /// `Unknown` arm and was silently dropped from `n1` / `N1`.
735 /// Regression test for issue #198.
736 ///
737 /// The snippet uses an `operator.*` declaration because that is
738 /// where the C++ tree-sitter grammar reliably emits a single
739 /// `DOTSTAR` leaf; in expression position (`a.*b`) some grammar
740 /// versions split the token into `DOT` + `STAR` and the regression
741 /// would be masked.
742 #[test]
743 fn cpp_dot_star_is_halstead_operator() {
744 check_metrics::<CppParser>("struct S { void operator.*(int); };", "foo.cpp", |metric| {
745 // Unique operators with fix: {}, ;, (), int, void, .*
746 // `.*` is the regression target — without the fix it
747 // falls through to `Unknown` and `u_operators` is 5.
748 // Unique operands: S
749 let s = &metric.halstead;
750 assert_eq!(s.u_operators(), 6.0);
751 assert_eq!(s.u_operands(), 1.0);
752 });
753 }
754
755 /// C++ pointer-to-member access through pointer `->*`
756 /// (`Cpp::DASHGTSTAR`) must be counted in Halstead. Prior to the
757 /// fix it fell through to the `Unknown` arm and was silently
758 /// dropped from `n1` / `N1`. Regression test for issue #198.
759 ///
760 /// The snippet uses an `operator->*` declaration because that is
761 /// where the C++ tree-sitter grammar reliably emits a single
762 /// `DASHGTSTAR` leaf; in expression position (`a->*b`) the grammar
763 /// splits the token into `DASHGT` + `STAR` and the regression would
764 /// be masked.
765 #[test]
766 fn cpp_dash_gt_star_is_halstead_operator() {
767 check_metrics::<CppParser>(
768 "struct S { void operator->*(int); };",
769 "foo.cpp",
770 |metric| {
771 // Unique operators with fix: {}, ;, (), int, void, ->*
772 // `->*` is the regression target — without the fix it
773 // falls through to `Unknown` and `u_operators` is 5.
774 // Unique operands: S
775 let s = &metric.halstead;
776 assert_eq!(s.u_operators(), 6.0);
777 assert_eq!(s.u_operands(), 1.0);
778 },
779 );
780 }
781
782 #[test]
783 fn rust_operators_and_operands() {
784 check_metrics::<RustParser>(
785 "fn main() {
786 let a = 5; let b = 5; let c = 5;
787 let avg = (a + b + c) / 3;
788 println!(\"{}\", avg);
789 }",
790 "foo.rs",
791 |metric| {
792 // unique operators: fn, (), {}, let, =, +, /, ;, !, ,
793 // unique operands: main, a, b, c, avg, 5, 3, println, "{}"
794 insta::assert_json_snapshot!(
795 metric.halstead,
796 @r###"
797 {
798 "n1": 10.0,
799 "N1": 23.0,
800 "n2": 9.0,
801 "N2": 15.0,
802 "length": 38.0,
803 "estimated_program_length": 61.74860596185444,
804 "purity_ratio": 1.624963314785643,
805 "vocabulary": 19.0,
806 "volume": 161.42124551085624,
807 "difficulty": 8.333333333333334,
808 "level": 0.12,
809 "effort": 1345.177045923802,
810 "time": 74.7320581068779,
811 "bugs": 0.040619232256751396
812 }"###
813 );
814 },
815 );
816 }
817
818 #[test]
819 fn rust_aliased_primitive_type_classification() {
820 // Regression for issue #95 (lesson #2): the Rust grammar emits 17
821 // distinct `kind_id`s for `primitive_type` (one base plus 16
822 // numeric-suffixed alias variants). `RustCode::is_primitive` in
823 // `src/checker.rs` must list every variant; if a future regression
824 // omits one, primitive type names emitted in that aliased position
825 // silently drop into the kind_id-keyed operators bucket instead of
826 // the text-keyed primitive_operators map, miscounting Halstead n1.
827 //
828 // The snippet exercises every primitive scalar type across many
829 // syntactic positions (function parameter types, return types,
830 // let-binding annotations, `as` casts, const items, type aliases,
831 // struct fields, function pointer types, tuple types, array types,
832 // reference types, generic type arguments). Empirically, ordinary
833 // Rust source emits the base `Rust::PrimitiveType` variant from
834 // all of these positions; the 16 suffixed alias variants are
835 // produced by specific grammar productions not reachable from
836 // user-written code. Mutation-verified: dropping
837 // `Rust::PrimitiveType` from `is_primitive` fails this test
838 // (u_operators 30→15). Dropping any single suffixed variant
839 // currently leaves the test passing; if a future grammar bump
840 // makes any suffixed variant reachable from idiomatic source,
841 // extend the snippet so the test fires for that variant too.
842 check_metrics::<RustParser>(
843 "const C: u8 = 0;
844 type T = i64;
845 struct S { x: u32, y: u64 }
846 fn g(p: fn(u8) -> u16) -> bool { let _ = p(0); true }
847 fn f(a: u8, b: u16, c: u32, d: u64) -> u128 {
848 let _x: i8 = 0;
849 let _y: i16 = 0;
850 let _z: i32 = 0;
851 let _w: i64 = 0;
852 let _v: i128 = 0;
853 let _p: f32 = 1.0;
854 let _q: f64 = 2.0;
855 let _r: bool = true;
856 let _s: char = 'x';
857 let _t: usize = 0;
858 let _u: isize = 0;
859 let _arr: [u32; 4] = [0; 4];
860 let _ref: &u8 = &0;
861 let _tup: (u32, u64) = (0, 0);
862 let _opt: Option<u32> = None;
863 a as u128 + b as u128 + c as u128 + d
864 }",
865 "foo.rs",
866 |metric| {
867 // Headline: u_operators is the load-bearing assertion —
868 // the 16 distinct primitive type names dedupe by text in
869 // the primitive_operators map. Total operators (N1) and
870 // operand counts pin the rest of the Halstead state.
871 assert_eq!(metric.halstead.u_operators(), 30.0);
872 assert_eq!(metric.halstead.operators(), 118.0);
873 assert_eq!(metric.halstead.u_operands(), 31.0);
874 assert_eq!(metric.halstead.operands(), 50.0);
875 },
876 );
877 }
878
879 #[test]
880 fn javascript_operators_and_operands() {
881 check_metrics::<JavascriptParser>(
882 "function main() {
883 var a, b, c, avg;
884 a = 5; b = 5; c = 5;
885 avg = (a + b + c) / 3;
886 console.log(\"{}\", avg);
887 }",
888 "foo.js",
889 |metric| {
890 // unique operators: function, (), {}, var, =, +, /, ,, ., ;
891 // unique operands: main, a, b, c, avg, 3, 5, console.log, console, log, "{}"
892 insta::assert_json_snapshot!(
893 metric.halstead,
894 @r###"
895 {
896 "n1": 10.0,
897 "N1": 24.0,
898 "n2": 11.0,
899 "N2": 21.0,
900 "length": 45.0,
901 "estimated_program_length": 71.27302875388389,
902 "purity_ratio": 1.583845083419642,
903 "vocabulary": 21.0,
904 "volume": 197.65428402504423,
905 "difficulty": 9.545454545454545,
906 "level": 0.10476190476190476,
907 "effort": 1886.699983875422,
908 "time": 104.81666577085679,
909 "bugs": 0.05089564733125986
910 }"###
911 );
912 },
913 );
914 }
915
916 #[test]
917 fn mozjs_operators_and_operands() {
918 check_metrics::<MozjsParser>(
919 "function main() {
920 var a, b, c, avg;
921 a = 5; b = 5; c = 5;
922 avg = (a + b + c) / 3;
923 console.log(\"{}\", avg);
924 }",
925 "foo.js",
926 |metric| {
927 // unique operators: function, (), {}, var, =, +, /, ,, ., ;
928 // unique operands: main, a, b, c, avg, 3, 5, console.log, console, log, "{}"
929 insta::assert_json_snapshot!(
930 metric.halstead,
931 @r###"
932 {
933 "n1": 10.0,
934 "N1": 24.0,
935 "n2": 11.0,
936 "N2": 21.0,
937 "length": 45.0,
938 "estimated_program_length": 71.27302875388389,
939 "purity_ratio": 1.583845083419642,
940 "vocabulary": 21.0,
941 "volume": 197.65428402504423,
942 "difficulty": 9.545454545454545,
943 "level": 0.10476190476190476,
944 "effort": 1886.699983875422,
945 "time": 104.81666577085679,
946 "bugs": 0.05089564733125986
947 }"###
948 );
949 },
950 );
951 }
952
953 #[test]
954 fn typescript_operators_and_operands() {
955 check_metrics::<TypescriptParser>(
956 "function main() {
957 var a, b, c, avg;
958 a = 5; b = 5; c = 5;
959 avg = (a + b + c) / 3;
960 console.log(\"{}\", avg);
961 }",
962 "foo.ts",
963 |metric| {
964 // unique operators: function, (), {}, var, =, +, /, ,, ., ;
965 // unique operands: main, a, b, c, avg, 3, 5, console.log, console, log, "{}"
966 insta::assert_json_snapshot!(
967 metric.halstead,
968 @r###"
969 {
970 "n1": 10.0,
971 "N1": 24.0,
972 "n2": 11.0,
973 "N2": 21.0,
974 "length": 45.0,
975 "estimated_program_length": 71.27302875388389,
976 "purity_ratio": 1.583845083419642,
977 "vocabulary": 21.0,
978 "volume": 197.65428402504423,
979 "difficulty": 9.545454545454545,
980 "level": 0.10476190476190476,
981 "effort": 1886.699983875422,
982 "time": 104.81666577085679,
983 "bugs": 0.05089564733125986
984 }"###
985 );
986 },
987 );
988 }
989
990 #[test]
991 fn tsx_operators_and_operands() {
992 check_metrics::<TsxParser>(
993 "function main() {
994 var a, b, c, avg;
995 a = 5; b = 5; c = 5;
996 avg = (a + b + c) / 3;
997 console.log(\"{}\", avg);
998 }",
999 "foo.ts",
1000 |metric| {
1001 // unique operators: function, (), {}, var, =, +, /, ,, ., ;
1002 // unique operands: main, a, b, c, avg, 3, 5, console.log, console, log, "{}"
1003 insta::assert_json_snapshot!(
1004 metric.halstead,
1005 @r###"
1006 {
1007 "n1": 10.0,
1008 "N1": 24.0,
1009 "n2": 11.0,
1010 "N2": 21.0,
1011 "length": 45.0,
1012 "estimated_program_length": 71.27302875388389,
1013 "purity_ratio": 1.583845083419642,
1014 "vocabulary": 21.0,
1015 "volume": 197.65428402504423,
1016 "difficulty": 9.545454545454545,
1017 "level": 0.10476190476190476,
1018 "effort": 1886.699983875422,
1019 "time": 104.81666577085679,
1020 "bugs": 0.05089564733125986
1021 }"###
1022 );
1023 },
1024 );
1025 }
1026
1027 #[test]
1028 fn javascript_template_string_plain_is_operand() {
1029 // Regression: issue #192. A backtick-delimited `` `hello` ``
1030 // without `${...}` is semantically identical to `"hello"` /
1031 // `'hello'` and must contribute exactly one operand — before
1032 // the fix `TemplateString` fell through to `HalsteadType::Unknown`
1033 // and contributed zero. expected: operands are `f` (function
1034 // name) and the wrapping `` `hello` `` template literal →
1035 // u_operands = 2, N2 = 2 (matches the equivalent
1036 // `function f() { return "hello"; }` baseline).
1037 check_metrics::<JavascriptParser>("function f() { return `hello`; }", "foo.js", |metric| {
1038 assert_eq!(metric.halstead.u_operands(), 2.0);
1039 assert_eq!(metric.halstead.operands(), 2.0);
1040 });
1041 }
1042
1043 #[test]
1044 fn javascript_template_string_interpolation_no_double_count() {
1045 // Regression: issue #192. An interpolated template literal
1046 // `` `Hi ${name}!` `` used to fall through to `Unknown`,
1047 // dropping the wrapper from the count entirely; the inner
1048 // `name` was still walked and counted via the
1049 // `TemplateSubstitution` child. Mirrors #183 (C#), #191
1050 // (Kotlin), #199 (Perl): the wrapper is skipped when a
1051 // `TemplateSubstitution` child is present so the inner
1052 // expression is not double-counted.
1053 //
1054 // expected: for `function f(name) { return ` + "`Hi ${name}!`"
1055 // + `; }`, operands are `f` and `name` (twice — `name` as the
1056 // parameter, then again inside the interpolation), so
1057 // u_operands = 2 and N2 = 3. Without the wrapper-skip guard
1058 // the wrapping literal would also be counted, lifting
1059 // u_operands to 3 and N2 to 4.
1060 check_metrics::<JavascriptParser>(
1061 "function f(name) { return `Hi ${name}!`; }",
1062 "foo.js",
1063 |metric| {
1064 assert_eq!(metric.halstead.u_operands(), 2.0);
1065 assert_eq!(metric.halstead.operands(), 3.0);
1066 },
1067 );
1068 }
1069
1070 #[test]
1071 fn mozjs_template_string_plain_is_operand() {
1072 // Regression: issue #192. Mirrors
1073 // `javascript_template_string_plain_is_operand` for the
1074 // Firefox-mode dialect — the four JS-family `get_op_type`
1075 // impls share the same template-literal handling.
1076 check_metrics::<MozjsParser>("function f() { return `hello`; }", "foo.js", |metric| {
1077 assert_eq!(metric.halstead.u_operands(), 2.0);
1078 assert_eq!(metric.halstead.operands(), 2.0);
1079 });
1080 }
1081
1082 #[test]
1083 fn mozjs_template_string_interpolation_no_double_count() {
1084 // Regression: issue #192. Mirrors
1085 // `javascript_template_string_interpolation_no_double_count`
1086 // for the Firefox-mode dialect.
1087 check_metrics::<MozjsParser>(
1088 "function f(name) { return `Hi ${name}!`; }",
1089 "foo.js",
1090 |metric| {
1091 assert_eq!(metric.halstead.u_operands(), 2.0);
1092 assert_eq!(metric.halstead.operands(), 3.0);
1093 },
1094 );
1095 }
1096
1097 #[test]
1098 fn typescript_template_string_plain_is_operand() {
1099 // Regression: issue #192. Mirrors
1100 // `javascript_template_string_plain_is_operand` for
1101 // TypeScript — the four JS-family `get_op_type` impls share
1102 // the same template-literal handling.
1103 //
1104 // After #313 the `: string` annotation's `String2` child also
1105 // counts as an operand (text `"string"`), so unique operands
1106 // are `f`, `` `hello` ``, `string` (3 each). The headline of
1107 // this test — that the plain template literal contributes one
1108 // operand — is unaffected.
1109 check_metrics::<TypescriptParser>(
1110 "function f(): string { return `hello`; }",
1111 "foo.ts",
1112 |metric| {
1113 assert_eq!(metric.halstead.u_operands(), 3.0);
1114 assert_eq!(metric.halstead.operands(), 3.0);
1115 },
1116 );
1117 }
1118
1119 #[test]
1120 fn typescript_template_string_interpolation_no_double_count() {
1121 // Regression: issue #192. Mirrors
1122 // `javascript_template_string_interpolation_no_double_count`
1123 // for TypeScript.
1124 //
1125 // After #313 each `: string` annotation contributes one
1126 // `"string"` operand. Unique operands: `f`, `name`, `string`
1127 // (3). Total operands: `f`, `name` (param), `name` (in the
1128 // interpolation), `string`, `string` (5). The interpolation
1129 // guard from #192 still holds — the wrapping `` `Hi ${name}!` ``
1130 // is `Unknown`, not double-counted.
1131 check_metrics::<TypescriptParser>(
1132 "function f(name: string): string { return `Hi ${name}!`; }",
1133 "foo.ts",
1134 |metric| {
1135 assert_eq!(metric.halstead.u_operands(), 3.0);
1136 assert_eq!(metric.halstead.operands(), 5.0);
1137 },
1138 );
1139 }
1140
1141 #[test]
1142 fn tsx_template_string_plain_is_operand() {
1143 // Regression: issue #192. Mirrors
1144 // `javascript_template_string_plain_is_operand` for the
1145 // TSX (TypeScript + JSX) variant.
1146 //
1147 // After #313 TSX's type-keyword `string` (`String3`) also
1148 // counts as an operand, mirroring TS::String2.
1149 check_metrics::<TsxParser>(
1150 "function f(): string { return `hello`; }",
1151 "foo.tsx",
1152 |metric| {
1153 assert_eq!(metric.halstead.u_operands(), 3.0);
1154 assert_eq!(metric.halstead.operands(), 3.0);
1155 },
1156 );
1157 }
1158
1159 #[test]
1160 fn tsx_template_string_interpolation_no_double_count() {
1161 // Regression: issue #192. Mirrors
1162 // `javascript_template_string_interpolation_no_double_count`
1163 // for the TSX (TypeScript + JSX) variant.
1164 //
1165 // After #313 each `: string` annotation contributes one
1166 // `String3` operand; see `typescript_template_string_…` for
1167 // the count derivation.
1168 check_metrics::<TsxParser>(
1169 "function f(name: string): string { return `Hi ${name}!`; }",
1170 "foo.tsx",
1171 |metric| {
1172 assert_eq!(metric.halstead.u_operands(), 3.0);
1173 assert_eq!(metric.halstead.operands(), 5.0);
1174 },
1175 );
1176 }
1177
1178 // Issue #281: optional chaining (`?.`) was double-counted as a
1179 // Halstead operator in TypeScript and TSX because the grammar
1180 // exposes both an `optional_chain` named wrapper AND a child
1181 // `?.` token, and both were classified as `Operator`. The fix
1182 // counts only the bare `?.` token (`QMARKDOT`) in TS/TSX so each
1183 // textual `?.` contributes exactly once, matching JS / MozJS
1184 // (whose grammars expose only `OptionalChain` — the `?.` token
1185 // itself).
1186 //
1187 // The four assertions below all compare against the same totals:
1188 // for `function f(a) { return a?.b?.c; }` the operator stream is
1189 // `function`, `(`, `{`, `return`, `?.`, `?.`, `;` (7 total, 6
1190 // unique — `LPAREN`/`LBRACE` count once, closing tokens are not
1191 // in the operator set). Before the fix, TS/TSX reported 9/7
1192 // instead of 7/6.
1193 #[test]
1194 fn javascript_optional_chain_not_double_counted_in_halstead_281() {
1195 check_metrics::<JavascriptParser>("function f(a) { return a?.b?.c; }", "foo.js", |m| {
1196 assert_eq!(m.halstead.u_operators(), 6.0);
1197 assert_eq!(m.halstead.operators(), 7.0);
1198 });
1199 }
1200
1201 #[test]
1202 fn mozjs_optional_chain_not_double_counted_in_halstead_281() {
1203 check_metrics::<MozjsParser>("function f(a) { return a?.b?.c; }", "foo.js", |m| {
1204 assert_eq!(m.halstead.u_operators(), 6.0);
1205 assert_eq!(m.halstead.operators(), 7.0);
1206 });
1207 }
1208
1209 #[test]
1210 fn typescript_optional_chain_not_double_counted_in_halstead_281() {
1211 // The TS grammar wraps member-expression `?.` in an
1212 // `optional_chain` named node containing the bare `?.`
1213 // token; classifying both as `Operator` double-counted the
1214 // chain. We now count only the bare token, so TS matches JS.
1215 check_metrics::<TypescriptParser>("function f(a) { return a?.b?.c; }", "foo.ts", |m| {
1216 assert_eq!(m.halstead.u_operators(), 6.0);
1217 assert_eq!(m.halstead.operators(), 7.0);
1218 });
1219 }
1220
1221 #[test]
1222 fn tsx_optional_chain_not_double_counted_in_halstead_281() {
1223 check_metrics::<TsxParser>("function f(a) { return a?.b?.c; }", "foo.tsx", |m| {
1224 assert_eq!(m.halstead.u_operators(), 6.0);
1225 assert_eq!(m.halstead.operators(), 7.0);
1226 });
1227 }
1228
1229 // Issue #299: parity guard for the JS-family `get_op_type` macro
1230 // on the optional-chain operator token (#281's prior regression
1231 // surface). All four languages must classify the bare `?.` token
1232 // identically — `OptionalChain` in JS/MozJS, `QMARKDOT` in
1233 // TS/TSX — and emit the same totals for
1234 // `function f(a) { return a?.b?.c; }`:
1235 //
1236 // * Operators: `function`, `(`, `{`, `return`, `?.`, `?.`, `;`
1237 // (7 total, 6 unique).
1238 // * Operands: `f`, `a`, `a`, `b`, `c`, plus the two wrapping
1239 // member expressions (`a?.b`, `a?.b?.c`) classified as
1240 // `MemberExpression*` (7 total, 6 unique).
1241 //
1242 // Verified by test-via-revert: dropping `OptionalChain` from
1243 // JS/MozJS, or `QMARKDOT` from TS/TSX, trips the test
1244 // (u_operators 6→5). This input does NOT exercise every operand
1245 // alias in the per-language `operand_extras` (`Identifier2`,
1246 // `String2`, `NestedIdentifier`, `MemberExpression4`) or the TS
1247 // `PredefinedType` operator; drift in those is out of scope for
1248 // this regression guard and would need a separate fixture.
1249 #[test]
1250 fn js_family_get_op_type_parity_optional_chain_member_299() {
1251 // Non-capturing closure (coerced to the `fn` pointer that
1252 // `check_metrics` accepts) avoids the
1253 // `clippy::needless_pass_by_value` warning that a free `fn`
1254 // taking `CodeMetrics` by value would trigger.
1255 const SRC: &str = "function f(a) { return a?.b?.c; }";
1256 let check = |m: crate::CodeMetrics| {
1257 assert_eq!(m.halstead.u_operators(), 6.0);
1258 assert_eq!(m.halstead.operators(), 7.0);
1259 assert_eq!(m.halstead.u_operands(), 6.0);
1260 assert_eq!(m.halstead.operands(), 7.0);
1261 };
1262
1263 check_metrics::<JavascriptParser>(SRC, "foo.js", check);
1264 check_metrics::<MozjsParser>(SRC, "foo.js", check);
1265 check_metrics::<TypescriptParser>(SRC, "foo.ts", check);
1266 check_metrics::<TsxParser>(SRC, "foo.tsx", check);
1267 }
1268
1269 // Issue #313: parity guard for the `"string"` type-keyword aliases
1270 // that the TS / TSX grammars expose. `Checker::is_string` matches
1271 // these aliases (#283), so `Getter::get_op_type` must also classify
1272 // them — otherwise the same node disagrees between the two
1273 // predicates and Halstead silently undercounts every `: string`
1274 // annotation by one operand.
1275 //
1276 // For the input `let x: string = "y";`:
1277 //
1278 // * TypeScript emits `Typescript::String2` for the `string` type
1279 // keyword (kind_id 135, in the type-keyword block of the enum).
1280 // * TSX emits `Tsx::String3` for the same role (kind_id 141).
1281 //
1282 // After #313 both kinds are in `operand_extras` and contribute one
1283 // `"string"` operand. Verified by test-via-revert: dropping
1284 // `String2` from TS's `operand_extras` (or `String3` from TSX's)
1285 // trips this test on `u_operands` / `operands` for the affected
1286 // language.
1287 #[test]
1288 fn ts_family_string2_string3_type_keyword_parity_313() {
1289 const SRC: &str = "let x: string = \"y\";";
1290 // Operators (n1 = 5, N1 = 5):
1291 // `let`, `:`, `=`, `;`, plus `string` (PredefinedType wrapper,
1292 // routed through `is_primitive` so it's keyed by its lexeme
1293 // `"string"` in `primitive_operators`).
1294 // Operands (n2 = 3, N2 = 3):
1295 // `x`, the `"y"` literal, and `string` (the type-keyword
1296 // child of `predefined_type`, classified via the operand
1297 // extras added by #313). Pre-fix the TS column reported
1298 // n2 = 2 / N2 = 2 because String2 fell through to `Unknown`;
1299 // the TSX column had the same gap for String3.
1300 let check = |m: crate::CodeMetrics| {
1301 assert_eq!(m.halstead.u_operators(), 5.0);
1302 assert_eq!(m.halstead.operators(), 5.0);
1303 assert_eq!(m.halstead.u_operands(), 3.0);
1304 assert_eq!(m.halstead.operands(), 3.0);
1305 };
1306
1307 check_metrics::<TypescriptParser>(SRC, "foo.ts", check);
1308 check_metrics::<TsxParser>(SRC, "foo.tsx", check);
1309 }
1310
1311 #[test]
1312 fn python_wrong_operators() {
1313 check_metrics::<PythonParser>("()[]{}", "foo.py", |metric| {
1314 insta::assert_json_snapshot!(
1315 metric.halstead,
1316 @r###"
1317 {
1318 "n1": 0.0,
1319 "N1": 0.0,
1320 "n2": 0.0,
1321 "N2": 0.0,
1322 "length": 0.0,
1323 "estimated_program_length": 0.0,
1324 "purity_ratio": 0.0,
1325 "vocabulary": 0.0,
1326 "volume": 0.0,
1327 "difficulty": 0.0,
1328 "level": 0.0,
1329 "effort": 0.0,
1330 "time": 0.0,
1331 "bugs": 0.0
1332 }"###
1333 );
1334 });
1335 }
1336
1337 #[test]
1338 fn python_check_metrics() {
1339 check_metrics::<PythonParser>(
1340 "def f():
1341 pass",
1342 "foo.py",
1343 |metric| {
1344 insta::assert_json_snapshot!(
1345 metric.halstead,
1346 @r###"
1347 {
1348 "n1": 2.0,
1349 "N1": 2.0,
1350 "n2": 1.0,
1351 "N2": 1.0,
1352 "length": 3.0,
1353 "estimated_program_length": 2.0,
1354 "purity_ratio": 0.6666666666666666,
1355 "vocabulary": 3.0,
1356 "volume": 4.754887502163468,
1357 "difficulty": 1.0,
1358 "level": 1.0,
1359 "effort": 4.754887502163468,
1360 "time": 0.26416041678685936,
1361 "bugs": 0.0009425525573729414
1362 }"###
1363 );
1364 },
1365 );
1366 }
1367
1368 #[test]
1369 fn java_operators_and_operands() {
1370 check_metrics::<JavaParser>(
1371 "public class Main {
1372 public static void main(string args[]) {
1373 int a, b, c, avg;
1374 a = 5; b = 5; c = 5;
1375 avg = (a + b + c) / 3;
1376 MessageFormat.format(\"{0}\", avg);
1377 }
1378 }",
1379 "foo.java",
1380 |metric| {
1381 // Operators (n1=11): {} void () [] , . ; int = + /
1382 // Operands (n2=12): Main main args a b c avg 5 3 MessageFormat format "{0}"
1383 insta::assert_json_snapshot!(
1384 metric.halstead,
1385 @r#"
1386 {
1387 "n1": 11.0,
1388 "N1": 26.0,
1389 "n2": 12.0,
1390 "N2": 22.0,
1391 "length": 48.0,
1392 "estimated_program_length": 81.07329781366414,
1393 "purity_ratio": 1.6890270377846697,
1394 "vocabulary": 23.0,
1395 "volume": 217.13097389073664,
1396 "difficulty": 10.083333333333334,
1397 "level": 0.09917355371900825,
1398 "effort": 2189.4039867315946,
1399 "time": 121.63355481842193,
1400 "bugs": 0.05620341201461669
1401 }
1402 "#
1403 );
1404 },
1405 );
1406 }
1407
1408 #[test]
1409 fn java_primitive_types_and_booleans() {
1410 check_metrics::<JavaParser>(
1411 "public class Prims {
1412 byte a = 1;
1413 short b = 2;
1414 int c = 3;
1415 long d = 4;
1416 char e = 'x';
1417 float f = 1.0f;
1418 double g = 2.0;
1419 boolean h = true;
1420 boolean i = false;
1421 }",
1422 "foo.java",
1423 |metric| {
1424 // Verifies all 8 Java primitive-type keywords (byte, short, int, long,
1425 // char, float, double, boolean) are counted as distinct operators, and
1426 // that true/false are counted as operands.
1427 insta::assert_json_snapshot!(
1428 metric.halstead,
1429 @r#"
1430 {
1431 "n1": 11.0,
1432 "N1": 28.0,
1433 "n2": 19.0,
1434 "N2": 19.0,
1435 "length": 47.0,
1436 "estimated_program_length": 118.76437056043838,
1437 "purity_ratio": 2.526901501285923,
1438 "vocabulary": 30.0,
1439 "volume": 230.62385799360038,
1440 "difficulty": 5.5,
1441 "level": 0.18181818181818182,
1442 "effort": 1268.4312189648022,
1443 "time": 70.46840105360012,
1444 "bugs": 0.03905920146699976
1445 }
1446 "#
1447 );
1448 },
1449 );
1450 }
1451
1452 #[test]
1453 fn groovy_operators_and_operands() {
1454 check_metrics::<GroovyParser>(
1455 "class Main {
1456 static void main(String[] args) {
1457 int a, b, c, avg;
1458 a = 5; b = 5; c = 5;
1459 avg = (a + b + c) / 3;
1460 println(avg);
1461 }
1462 }",
1463 "foo.groovy",
1464 |metric| {
1465 // Groovy mirror of `java_operators_and_operands`. The juxt
1466 // call `println avg` exercises `juxt_function_call` in
1467 // place of Java's `MessageFormat.format(...)`. amaanq's
1468 // grammar inherits Java's tokenisation, so n1/N1/n2/N2
1469 // shapes match Java up to those substitutions.
1470 // The dekobon grammar parses primitive type names
1471 // (`void`, `int`, `String`) as `type_identifier`
1472 // rather than as distinct keyword tokens, so they
1473 // count as operands here — the prior amaanq grammar
1474 // treated them as operators. Net shift: −2 unique
1475 // operators (`void`, `int`), +2 unique operands
1476 // (`void`, `int` were the only two type_identifiers
1477 // not already counted as operands, since `String`
1478 // was already an identifier in the prior grammar's
1479 // counting).
1480 assert_eq!(metric.halstead.u_operators(), 8.0);
1481 assert_eq!(metric.halstead.u_operands(), 13.0);
1482 insta::assert_json_snapshot!(
1483 metric.halstead,
1484 @r#"
1485 {
1486 "n1": 8.0,
1487 "N1": 22.0,
1488 "n2": 13.0,
1489 "N2": 23.0,
1490 "length": 45.0,
1491 "estimated_program_length": 72.10571633583419,
1492 "purity_ratio": 1.6023492519074265,
1493 "vocabulary": 21.0,
1494 "volume": 197.65428402504423,
1495 "difficulty": 7.076923076923077,
1496 "level": 0.14130434782608697,
1497 "effort": 1398.7841638695438,
1498 "time": 77.71023132608576,
1499 "bugs": 0.04169134280255714
1500 }
1501 "#
1502 );
1503 },
1504 );
1505 }
1506
1507 #[test]
1508 fn groovy_primitive_types_and_booleans() {
1509 check_metrics::<GroovyParser>(
1510 "class Prims {
1511 byte a = 1
1512 short b = 2
1513 int c = 3
1514 long d = 4
1515 char e = 'x'
1516 float f = 1.0f
1517 double g = 2.0
1518 boolean h = true
1519 boolean i = false
1520 }",
1521 "foo.groovy",
1522 |metric| {
1523 // The dekobon grammar consolidates the 8 primitive
1524 // type names (`byte`, `short`, `int`, `long`, `char`,
1525 // `float`, `double`, `boolean`) under `type_identifier`
1526 // — so they count as operands, not as distinct
1527 // operators. Likewise numeric literals collapse to one
1528 // `NumberLiteral` shape (no Hex/Octal/Binary/Decimal
1529 // split), and `'x'` parses as `StringLiteral` (Groovy
1530 // single-quoted strings) rather than as
1531 // `CharacterLiteral`. Operators remaining in this
1532 // fixture: `=` and `class`-body braces (only `{` is in
1533 // the operator set). True/false collapse under one
1534 // `BooleanLiteral`.
1535 assert_eq!(metric.halstead.u_operators(), 2.0);
1536 assert_eq!(metric.halstead.u_operands(), 27.0);
1537 insta::assert_json_snapshot!(
1538 metric.halstead,
1539 @r#"
1540 {
1541 "n1": 2.0,
1542 "N1": 10.0,
1543 "n2": 27.0,
1544 "N2": 28.0,
1545 "length": 38.0,
1546 "estimated_program_length": 130.38196255841365,
1547 "purity_ratio": 3.4311042778529908,
1548 "vocabulary": 29.0,
1549 "volume": 184.60327781484773,
1550 "difficulty": 1.037037037037037,
1551 "level": 0.9642857142857143,
1552 "effort": 191.44043625243467,
1553 "time": 10.635579791801925,
1554 "bugs": 0.01107221547116606
1555 }
1556 "#
1557 );
1558 },
1559 );
1560 }
1561
1562 #[test]
1563 fn groovy_closure_operators_and_operands() {
1564 check_metrics::<GroovyParser>("def double = { x -> x * 2 }", "foo.groovy", |metric| {
1565 // Closure with arrow-style parameter list.
1566 // Distinct operators: def, =, {}, ->, * = 5.
1567 // Distinct operands: double, x, 2 = 3.
1568 assert_eq!(metric.halstead.u_operators(), 5.0);
1569 assert_eq!(metric.halstead.u_operands(), 3.0);
1570 });
1571 }
1572
1573 /// Regression for issue #247: every Groovy-specific operator the
1574 /// prior amaanq grammar dropped to ERROR or mis-shaped as a Java
1575 /// node now parses as a distinct lexer token in the dekobon
1576 /// grammar, so Halstead counts each one. The fixture below
1577 /// exercises Elvis `?:`, safe-nav `?.`, safe-chain `??.`,
1578 /// spread-dot `*.`, method-pointer `.&`, direct-field `.@`,
1579 /// identity `===` / `!==`, spaceship `<=>`, regex `=~` / `==~`,
1580 /// exclusive ranges `..<` / `<..` / `<..<`, `as` coercion, and
1581 /// `?[` safe index — every distinct operator kind must appear in
1582 /// `u_operators` (the count grows by exactly the number of new
1583 /// distinct operator tokens introduced).
1584 #[test]
1585 fn groovy_dekobon_operator_coverage_247() {
1586 check_metrics::<GroovyParser>(
1587 "def f(a, b, list, s) {
1588 def x = a ?: b
1589 def y = a?.field
1590 def z = a??.field
1591 def items = list*.size()
1592 def ptr = a.&size
1593 def fld = a.@field
1594 def id1 = a === b
1595 def id2 = a !== b
1596 def ship = a <=> b
1597 def find = s =~ /pat/
1598 def match = s ==~ /^pat\\$/
1599 def r1 = 0..<10
1600 def r2 = 0<..10
1601 def r3 = 0<..<10
1602 def cast = a as String
1603 def safe = list?[0]
1604 return x
1605 }",
1606 "foo.groovy",
1607 |metric| {
1608 // Each Groovy-specific operator kind contributes one
1609 // distinct entry to the operator set. The 20-operator
1610 // floor breaks down as: 16 Groovy-specific tokens
1611 // exercised by the fixture (`?:`, `?.`, `??.`, `*.`,
1612 // `.&`, `.@`, `===`, `!==`, `<=>`, `=~`, `==~`, `..<`,
1613 // `<..`, `<..<`, `as`, `?[`) plus a handful of
1614 // ambient Java-shaped operators the fixture also
1615 // uses (`def`, `=`, `{`, `(`, `,`, `return`). A
1616 // grammar regression that drops one of the 16
1617 // Groovy-specific tokens would push the count below
1618 // this floor.
1619 // Exact pin: with the dekobon Groovy grammar this
1620 // fixture exercises 16 Groovy-specific tokens (`?:`,
1621 // `?.`, `??.`, `*.`, `.&`, `.@`, `===`, `!==`, `<=>`,
1622 // `=~`, `==~`, `..<`, `<..`, `<..<`, `as`, `?[`) plus
1623 // 7 ambient Java-shaped operators the fixture also
1624 // uses (`def`, `=`, `,`, `{`, `(`, `[`, `return`),
1625 // for a total of 23 distinct operator kinds. A
1626 // regression that drops any one of the 16 #247
1627 // operators would push the count below 23 and fail
1628 // this assertion. The complementary AST walk below
1629 // pins each #247 operator's identity individually so
1630 // a grammar change that adds an unrelated operator
1631 // (lifting `u_operators` to 24) still flags the loss
1632 // of a #247 operator at the per-token level.
1633 assert_eq!(
1634 metric.halstead.u_operators(),
1635 23.0,
1636 "u_operators changed; check whether a #247 operator was dropped or an unrelated operator added (and update the comment / token list above accordingly)",
1637 );
1638 },
1639 );
1640 }
1641
1642 #[test]
1643 fn csharp_operators_and_operands() {
1644 // After issue #286, `void`, `string`, and `int` count as three
1645 // distinct Halstead operators rather than collapsing into one
1646 // `PredefinedType` kind_id entry, lifting u_operators from 13
1647 // to 15. Total operators (N1) is unchanged because the same
1648 // nodes are still counted, just keyed by lexeme.
1649 check_metrics::<CsharpParser>(
1650 "public class Main {
1651 public static void Run(string[] args) {
1652 int a, b, c, avg;
1653 a = 5; b = 5; c = 5;
1654 avg = (a + b + c) / 3;
1655 System.Console.WriteLine(\"{0}\", avg);
1656 }
1657 }",
1658 "foo.cs",
1659 |metric| {
1660 assert_eq!(metric.halstead.u_operators(), 15.0);
1661 assert_eq!(metric.halstead.operators(), 32.0);
1662 assert_eq!(metric.halstead.u_operands(), 13.0);
1663 assert_eq!(metric.halstead.operands(), 23.0);
1664 // Pin every Halstead field; values are whatever the
1665 // classifier produces and become the regression spec.
1666 insta::assert_json_snapshot!(metric.halstead);
1667 },
1668 );
1669 }
1670
1671 #[test]
1672 fn csharp_primitive_types_and_booleans() {
1673 // After issue #286: each of `byte`, `short`, `int`, `long`,
1674 // `char`, `float`, `double`, `bool`, `object` is now a distinct
1675 // Halstead operator (9 primitives) rather than collapsing into
1676 // one `PredefinedType` kind_id entry. u_operators rises from 6
1677 // to 14 (5 non-primitive operators + 9 distinct primitives);
1678 // total operators (N1) is unchanged because the same nodes are
1679 // still counted, just keyed by lexeme.
1680 check_metrics::<CsharpParser>(
1681 "public class Prims {
1682 byte a = 1;
1683 short b = 2;
1684 int c = 3;
1685 long d = 4;
1686 char e = 'x';
1687 float f = 1.0f;
1688 double g = 2.0;
1689 bool h = true;
1690 bool i = false;
1691 object j = null;
1692 }",
1693 "foo.cs",
1694 |metric| {
1695 assert_eq!(metric.halstead.u_operators(), 14.0);
1696 assert_eq!(metric.halstead.operators(), 33.0);
1697 assert_eq!(metric.halstead.u_operands(), 21.0);
1698 assert_eq!(metric.halstead.operands(), 23.0);
1699 insta::assert_json_snapshot!(metric.halstead);
1700 },
1701 );
1702 }
1703
1704 #[test]
1705 fn csharp_predefined_types_keyed_by_lexeme() {
1706 // Regression: issue #286. The C# grammar emits one `PredefinedType`
1707 // kind_id for every keyword type (`int`, `string`, `bool`, …).
1708 // Without keying by source text the entire family collapses into
1709 // a single Halstead operator (n1 += 1) instead of one per distinct
1710 // keyword. This test pins the post-fix behaviour using four
1711 // distinct primitives — `int`, `string`, `bool`, `object` —
1712 // appearing as parameter types so no other operators interact
1713 // with the count.
1714 //
1715 // expected: operators are `class`, `void`, `M`, `{}`, `()`, `,`
1716 // (×3 between 4 params), plus the four distinct predefined types
1717 // → u_operators = 5 + 4 = 9. Without the fix the four primitives
1718 // collapse to one entry, giving u_operators = 6.
1719 check_metrics::<CsharpParser>(
1720 "class C { void M(int a, string b, bool c, object d) {} }",
1721 "foo.cs",
1722 |metric| {
1723 // The headline assertion: four distinct primitive
1724 // keywords contribute four distinct operators, not one.
1725 assert_eq!(metric.halstead.u_operators(), 9.0);
1726 },
1727 );
1728 }
1729
1730 #[test]
1731 fn csharp_interpolated_string_no_double_count() {
1732 // Regression: issue #183. A C# `$"Hi {name}!"` used to be
1733 // classified as a Halstead operand (the wrapping
1734 // `InterpolatedStringExpression`) AND have its inner
1735 // `Interpolation`'s identifier classified as an operand too.
1736 // The fix routes `InterpolatedStringExpression` through a
1737 // conditional: when it has an `Interpolation` child, the inner
1738 // identifier already carries the operand contribution and the
1739 // wrapper is treated as `Unknown`; when it does not (static
1740 // `$"hello"`), the wrapper still counts as one operand.
1741 //
1742 // expected: operand contributions for
1743 // `class C { void M(string name) { string s = $"Hi {name}!"; } }`
1744 // — `C` (class), `M` (method), `name` (param), `s` (local),
1745 // and the inner `name` (inside `{...}`). With the fix,
1746 // u_operands = 4 (C, M, name, s); N2 = 5 (`name` twice).
1747 // Without the fix, the wrapping `$"Hi {name}!"` would also
1748 // count → u_operands = 5, N2 = 6.
1749 check_metrics::<CsharpParser>(
1750 "class C { void M(string name) { string s = $\"Hi {name}!\"; } }",
1751 "foo.cs",
1752 |metric| {
1753 assert_eq!(metric.halstead.u_operands(), 4.0);
1754 assert_eq!(metric.halstead.operands(), 5.0);
1755 },
1756 );
1757 }
1758
1759 #[test]
1760 fn csharp_static_interpolated_string_is_operand() {
1761 // Regression: issue #183. A `$"..."` with no `{...}` is
1762 // semantically identical to `"..."` and must still contribute
1763 // exactly one operand — the conditional `is_child(Interpolation)`
1764 // check distinguishes it from a true interpolation. expected:
1765 // operands are `C`, `M`, `s`, `$"hello"` → u_operands = 4, N2 = 4.
1766 // A naive "always Unknown" fix would yield u_operands = 3, N2 = 3,
1767 // diverging from the plain-string equivalent below.
1768 check_metrics::<CsharpParser>(
1769 "class C { void M() { string s = $\"hello\"; } }",
1770 "foo.cs",
1771 |metric| {
1772 assert_eq!(metric.halstead.u_operands(), 4.0);
1773 assert_eq!(metric.halstead.operands(), 4.0);
1774 },
1775 );
1776 }
1777
1778 #[test]
1779 fn csharp_plain_string_still_operand() {
1780 // The fix for #183 only changes how `InterpolatedStringExpression`
1781 // is classified; plain `StringLiteral` (and `VerbatimStringLiteral`
1782 // / `RawStringLiteral`) must still contribute exactly one operand
1783 // each. expected: operands are `C`, `M`, `s`, `"hi"` →
1784 // u_operands = 4, N2 = 4.
1785 check_metrics::<CsharpParser>(
1786 "class C { void M() { string s = \"hi\"; } }",
1787 "foo.cs",
1788 |metric| {
1789 assert_eq!(metric.halstead.u_operands(), 4.0);
1790 assert_eq!(metric.halstead.operands(), 4.0);
1791 },
1792 );
1793 }
1794
1795 #[test]
1796 fn go_operators_and_operands() {
1797 check_metrics::<GoParser>(
1798 "package main
1799 func sum(a, b int) int {
1800 return a + b
1801 }",
1802 "foo.go",
1803 |metric| {
1804 insta::assert_json_snapshot!(
1805 metric.halstead,
1806 @r###"
1807 {
1808 "n1": 7.0,
1809 "N1": 7.0,
1810 "n2": 5.0,
1811 "N2": 8.0,
1812 "length": 15.0,
1813 "estimated_program_length": 31.26112492884004,
1814 "purity_ratio": 2.0840749952560027,
1815 "vocabulary": 12.0,
1816 "volume": 53.77443751081734,
1817 "difficulty": 5.6,
1818 "level": 0.17857142857142858,
1819 "effort": 301.1368500605771,
1820 "time": 16.729825003365395,
1821 "bugs": 0.014975730436275946
1822 }"###
1823 );
1824 },
1825 );
1826 }
1827
1828 #[test]
1829 fn perl_operators_and_operands() {
1830 check_metrics::<PerlParser>(
1831 "sub sum {
1832 my ($a, $b) = @_;
1833 return $a + $b;
1834 }",
1835 "foo.pl",
1836 |metric| {
1837 insta::assert_json_snapshot!(
1838 metric.halstead,
1839 @r#"
1840 {
1841 "n1": 10.0,
1842 "N1": 14.0,
1843 "n2": 4.0,
1844 "N2": 6.0,
1845 "length": 20.0,
1846 "estimated_program_length": 41.219280948873624,
1847 "purity_ratio": 2.0609640474436812,
1848 "vocabulary": 14.0,
1849 "volume": 76.14709844115208,
1850 "difficulty": 7.5,
1851 "level": 0.13333333333333333,
1852 "effort": 571.1032383086406,
1853 "time": 31.727957683813365,
1854 "bugs": 0.02294502281013948
1855 }
1856 "#
1857 );
1858 },
1859 );
1860 }
1861
1862 #[test]
1863 fn perl_interpolated_string_no_double_count() {
1864 // Regression: issue #199. A `string_double_quoted` (and
1865 // `string_qq_quoted` / `backtick_quoted` / `command_qx_quoted`)
1866 // wrapping an `interpolation` child used to be counted as a
1867 // Halstead operand while the inner scalar/array/hash variable
1868 // was also walked and counted — double-counting the inner
1869 // variable's contribution to `N2`. Mirrors #180 (Bash/Elixir),
1870 // #183 (C#), #184 (PHP), #191 (Kotlin).
1871 //
1872 // expected: for
1873 // sub greet { my $name = shift; my $msg = "Hi $name"; return $msg; }
1874 // — operands are `greet`, `$name`, `shift`, `$msg`. With the
1875 // fix the wrapping `"Hi $name"` is skipped (has `Interpolation`
1876 // child), so u_operands = 4 and N2 = 6 (`$name` x2 from the
1877 // `my` binding and the interpolation; `$msg` x2 from the `my`
1878 // binding and `return`; `greet`, `shift` once each). Without
1879 // the fix the wrapping literal would also be counted, lifting
1880 // u_operands to 5 and N2 to 7.
1881 check_metrics::<PerlParser>(
1882 "sub greet { my $name = shift; my $msg = \"Hi $name\"; return $msg; }",
1883 "foo.pl",
1884 |metric| {
1885 assert_eq!(metric.halstead.u_operands(), 4.0);
1886 assert_eq!(metric.halstead.operands(), 6.0);
1887 insta::assert_json_snapshot!(metric.halstead);
1888 },
1889 );
1890 }
1891
1892 #[test]
1893 fn perl_plain_string_still_operand() {
1894 // The fix for #199 only skips wrapping literals that carry an
1895 // `Interpolation` child; a plain `"hello"` (no `$…` inside)
1896 // must still contribute exactly one operand. expected: operands
1897 // `greet`, `$msg`, `"hello"` → u_operands = 3, N2 = 4 (`$msg`
1898 // appears in the `my` binding and the `return`).
1899 check_metrics::<PerlParser>(
1900 "sub greet { my $msg = \"hello\"; return $msg; }",
1901 "foo.pl",
1902 |metric| {
1903 assert_eq!(metric.halstead.u_operands(), 3.0);
1904 assert_eq!(metric.halstead.operands(), 4.0);
1905 },
1906 );
1907 }
1908
1909 #[test]
1910 fn perl_single_quoted_string_never_interpolates() {
1911 // Single-quoted (`'…'`) and `q{…}` literals are not subject to
1912 // interpolation in Perl, so even when their text contains a
1913 // `$name`-shaped sequence the wrapper is still counted as one
1914 // operand and the inner text is not parsed as a variable.
1915 // expected: operands `greet`, `$msg`, `'Hi $name'` →
1916 // u_operands = 3, N2 = 4 (`$msg` x2).
1917 check_metrics::<PerlParser>(
1918 "sub greet { my $msg = 'Hi $name'; return $msg; }",
1919 "foo.pl",
1920 |metric| {
1921 assert_eq!(metric.halstead.u_operands(), 3.0);
1922 assert_eq!(metric.halstead.operands(), 4.0);
1923 },
1924 );
1925 }
1926
1927 #[test]
1928 fn perl_plain_heredoc_counts_as_one_operand() {
1929 // Regression: issue #287. A plain (non-interpolating) Perl
1930 // heredoc body used to be classified `HalsteadType::Unknown`,
1931 // so its visible `HeredocBodyStatement` node contributed
1932 // nothing to N2 even though it is a string literal. The fix
1933 // adds `HeredocBodyStatement` to the interpolation-aware
1934 // operand arm, so an inert heredoc counts as one operand.
1935 //
1936 // Source (heredoc body lives at the source_file level, not
1937 // inside any sub):
1938 // my $msg = <<END;
1939 // hello world
1940 // END
1941 //
1942 // Operands traversed:
1943 // * `$msg` (`scalar_variable`) × 1
1944 // * heredoc body (`heredoc_body_statement`) × 1
1945 // expected: u_operands = 2, N2 = 2.
1946 check_metrics::<PerlParser>("my $msg = <<END;\nhello world\nEND\n", "foo.pl", |metric| {
1947 assert_eq!(metric.halstead.u_operands(), 2.0);
1948 assert_eq!(metric.halstead.operands(), 2.0);
1949 });
1950 }
1951
1952 #[test]
1953 fn perl_interpolated_heredoc_no_double_count() {
1954 // Regression: issue #287. An interpolating Perl heredoc
1955 // (`<<"TAG"` or bare `<<TAG`) carries an `Interpolation` child
1956 // when its body contains a `$var`. The wrapper must drop to
1957 // `Unknown` so the inner scalar variable carries the operand
1958 // count — same dispatch as the existing double-quoted /
1959 // backtick / qx wrappers (issue #199) and the PHP heredoc fix
1960 // (issue #184).
1961 //
1962 // Source:
1963 // my $name = "x";
1964 // my $msg = <<"END";
1965 // hi $name
1966 // END
1967 //
1968 // Operands by text key:
1969 // * `$name` × 2 (my-binding + interpolation inside heredoc)
1970 // * `"x"` × 1 (inert double-quoted string)
1971 // * `$msg` × 1
1972 // expected: u_operands = 3, N2 = 4. Without the
1973 // interpolation-aware drop the wrapping heredoc body would
1974 // also count, lifting u_operands to 4 and N2 to 5.
1975 check_metrics::<PerlParser>(
1976 "my $name = \"x\";\nmy $msg = <<\"END\";\nhi $name\nEND\n",
1977 "foo.pl",
1978 |metric| {
1979 assert_eq!(metric.halstead.u_operands(), 3.0);
1980 assert_eq!(metric.halstead.operands(), 4.0);
1981 },
1982 );
1983 }
1984
1985 #[test]
1986 fn lua_operators_and_operands() {
1987 check_metrics::<LuaParser>(
1988 "local function add(a, b)
1989 local result = a + b
1990 if result > 0 then
1991 return result
1992 end
1993 return 0
1994end",
1995 "foo.lua",
1996 |metric| {
1997 // n1=12: local,function,(,,,),=,+,if,>,then,return,end
1998 // n2=5: add,a,b,result,0
1999 insta::assert_json_snapshot!(metric.halstead, @r###"
2000 {
2001 "n1": 12.0,
2002 "N1": 15.0,
2003 "n2": 5.0,
2004 "N2": 10.0,
2005 "length": 25.0,
2006 "estimated_program_length": 54.62919048309068,
2007 "purity_ratio": 2.1851676193236274,
2008 "vocabulary": 17.0,
2009 "volume": 102.18657103125848,
2010 "difficulty": 12.0,
2011 "level": 0.08333333333333333,
2012 "effort": 1226.2388523751017,
2013 "time": 68.12438068750565,
2014 "bugs": 0.03818816527310305
2015 }
2016 "###);
2017 },
2018 );
2019 }
2020
2021 #[test]
2022 fn kotlin_halstead_basic() {
2023 check_metrics::<KotlinParser>(
2024 "fun add(a: Int, b: Int): Int {
2025 val result = a + b
2026 return result
2027 }",
2028 "foo.kt",
2029 |metric| {
2030 insta::assert_json_snapshot!(
2031 metric.halstead,
2032 @r###"
2033 {
2034 "n1": 9.0,
2035 "N1": 11.0,
2036 "n2": 5.0,
2037 "N2": 10.0,
2038 "length": 21.0,
2039 "estimated_program_length": 40.13896548741762,
2040 "purity_ratio": 1.9113793089246487,
2041 "vocabulary": 14.0,
2042 "volume": 79.9544533632097,
2043 "difficulty": 9.0,
2044 "level": 0.1111111111111111,
2045 "effort": 719.5900802688873,
2046 "time": 39.97722668160485,
2047 "bugs": 0.026767153565498338
2048 }
2049 "###
2050 );
2051 },
2052 );
2053 }
2054
2055 #[test]
2056 fn kotlin_string_template_no_double_count() {
2057 // Regression: issue #191. A Kotlin string template (`"Hi $name!"`)
2058 // wraps an `Interpolation` child whose inner expression is
2059 // walked and counted separately. Without the
2060 // `is_child(Interpolation)` guard the wrapping `StringLiteral`
2061 // would also count as an operand, inflating N2. Same pattern as
2062 // #180 (Bash/Elixir) and #184 (PHP).
2063 //
2064 // Source: `fun greet(name: String): String {\n return "Hi $name!"\n}\n`
2065 // Operands (by source-byte key):
2066 // Function signature (no body): `greet` × 1, `name` × 1,
2067 // `String` × 2 (param type + return type) = 3 unique, 4 total.
2068 // Body adds the short-form interpolation `$name`: tree-sitter
2069 // kotlin-ng 1.1.0 produces an `identifier` node whose source
2070 // range includes the leading `$`, so its bytes are `$name` —
2071 // distinct from the bare `name` operand in the signature.
2072 // The wrapping `StringLiteral` is skipped (fix working) →
2073 // u_operands = 4 (`greet`, `name`, `String`, `$name`), N2 = 5.
2074 // Without the fix the `StringLiteral` text (`"Hi $name!"`)
2075 // would also be counted → N2 = 6, u_operands = 5.
2076 check_metrics::<KotlinParser>(
2077 "fun greet(name: String): String {\n return \"Hi $name!\"\n}\n",
2078 "foo.kt",
2079 |metric| {
2080 assert_eq!(metric.halstead.u_operands(), 4.0);
2081 assert_eq!(metric.halstead.operands(), 5.0);
2082 },
2083 );
2084 }
2085
2086 #[test]
2087 fn kotlin_string_template_long_form_no_double_count() {
2088 // The `${expr}` long form of a Kotlin string template also
2089 // produces an `Interpolation` child. The fix must apply to it
2090 // identically.
2091 //
2092 // Source: `fun f(x: Int): String { return "v=${x}" }\n`
2093 // Operands by source-byte key:
2094 // `f` × 1, `x` × 2 (param + inside `${x}`),
2095 // `Int` × 1, `String` × 1.
2096 // With the fix u_operands = 4 (`f`, `x`, `Int`, `String`),
2097 // N2 = 5. Without the fix the wrapping `"v=${x}"` would also
2098 // count → u_operands = 5, N2 = 6.
2099 check_metrics::<KotlinParser>(
2100 "fun f(x: Int): String { return \"v=${x}\" }\n",
2101 "foo.kt",
2102 |metric| {
2103 assert_eq!(metric.halstead.u_operands(), 4.0);
2104 assert_eq!(metric.halstead.operands(), 5.0);
2105 },
2106 );
2107 }
2108
2109 #[test]
2110 fn kotlin_plain_string_still_operand() {
2111 // The fix for #191 only skips wrapping templates that contain
2112 // an `Interpolation` child; a plain `"hello"` (no `$` interp)
2113 // must still contribute exactly one operand.
2114 //
2115 // Source: `fun f(): String { return "hello" }\n`
2116 // Operands: `f` × 1, `String` × 1, `"hello"` × 1 →
2117 // u_operands = 3, N2 = 3.
2118 check_metrics::<KotlinParser>(
2119 "fun f(): String { return \"hello\" }\n",
2120 "foo.kt",
2121 |metric| {
2122 assert_eq!(metric.halstead.u_operands(), 3.0);
2123 assert_eq!(metric.halstead.operands(), 3.0);
2124 },
2125 );
2126 }
2127
2128 #[test]
2129 fn python_fstring_no_double_count() {
2130 // Regression: issue #191. A Python f-string (`f"Hi {name}!"`)
2131 // wraps an `Interpolation` child whose inner identifier
2132 // `name` is walked and counted as its own operand. Without
2133 // the `is_child(Interpolation)` guard the wrapping `String`
2134 // would also count, double-counting `name`'s contribution to
2135 // `N2`. Same pattern as #180 (Bash/Elixir) and #184 (PHP).
2136 //
2137 // Source: `def greet(name):\n return f"Hi {name}!"\n`
2138 // Operands by source-byte key:
2139 // `greet` × 1, `name` × 2 (param + inside `{name}`).
2140 // With the fix the wrapping `f"Hi {name}!"` is skipped →
2141 // u_operands = 2 (`greet`, `name`), N2 = 3. Without the fix
2142 // the wrapping literal would also count → u_operands = 3,
2143 // N2 = 4.
2144 check_metrics::<PythonParser>(
2145 "def greet(name):\n return f\"Hi {name}!\"\n",
2146 "foo.py",
2147 |metric| {
2148 assert_eq!(metric.halstead.u_operands(), 2.0);
2149 assert_eq!(metric.halstead.operands(), 3.0);
2150 },
2151 );
2152 }
2153
2154 #[test]
2155 fn python_plain_string_still_operand() {
2156 // The fix for #191 only skips wrapping `String` nodes that
2157 // contain an `Interpolation` child; a plain `"hi"` must still
2158 // contribute exactly one operand.
2159 //
2160 // Source: `def f():\n return "hi"\n`
2161 // Operands: `f` × 1, `"hi"` × 1 → u_operands = 2, N2 = 2.
2162 // (The previous documentation-string filter is preserved:
2163 // a bare `"hi"` as a top-level `expression_statement` would
2164 // be skipped, but here it appears as `return "hi"`.)
2165 check_metrics::<PythonParser>("def f():\n return \"hi\"\n", "foo.py", |metric| {
2166 assert_eq!(metric.halstead.u_operands(), 2.0);
2167 assert_eq!(metric.halstead.operands(), 2.0);
2168 });
2169 }
2170
2171 #[test]
2172 fn python_empty_file_halstead() {
2173 check_metrics::<PythonParser>("", "empty.py", |metric| {
2174 let h = &metric.halstead;
2175 assert_eq!(h.u_operators(), 0.0);
2176 assert_eq!(h.operands(), 0.0);
2177 assert_eq!(h.estimated_program_length(), 0.0);
2178 assert_eq!(h.purity_ratio(), 0.0);
2179 assert_eq!(h.volume(), 0.0);
2180 assert_eq!(h.difficulty(), 0.0);
2181 assert_eq!(h.level(), 0.0);
2182 assert_eq!(h.effort(), 0.0);
2183 assert_eq!(h.time(), 0.0);
2184 assert_eq!(h.bugs(), 0.0);
2185 });
2186 }
2187
2188 #[test]
2189 fn bash_operators_and_operands() {
2190 check_metrics::<BashParser>(
2191 "#!/bin/bash
2192f() {
2193 local x=1
2194 if [ $x -eq 1 ]; then
2195 echo 'one'
2196 fi
2197}",
2198 "foo.sh",
2199 |metric| {
2200 // `x` (assignment LHS and inside `$x`) is a `variable_name`
2201 // with aliased kind_id 160 — all three aliases must be in
2202 // the operand list (see lesson 2).
2203 assert_eq!(metric.halstead.u_operators(), 12.0);
2204 assert_eq!(metric.halstead.operators(), 12.0);
2205 assert_eq!(metric.halstead.u_operands(), 6.0);
2206 assert_eq!(metric.halstead.operands(), 9.0);
2207 insta::assert_json_snapshot!(metric.halstead);
2208 },
2209 );
2210 }
2211
2212 #[test]
2213 fn bash_interpolated_string_no_double_count() {
2214 // Regression: issue #180. A double-quoted Bash string containing
2215 // `$name`, `${name[…]}`, or `$(cmd)` used to be classified as a
2216 // Halstead operand AND have its inner `simple_expansion` /
2217 // `expansion` / `command_substitution` children classified as
2218 // operands too. We now skip the wrapping literal when it has an
2219 // expansion child so only the inner expansion contributes.
2220 //
2221 // expected: operands across `a="plain"\nb="$x"\n` —
2222 // line 1: variable_name `a`, plain string `"plain"` (no
2223 // expansion, still operand) → 2.
2224 // line 2: variable_name `b`, wrapping `"$x"` skipped (has
2225 // expansion), `simple_expansion` `$x`, inner variable_name
2226 // `x` → 3.
2227 // Total unique operands: 5 (`a`, `b`, `"plain"`, `$x`, `x`),
2228 // each appearing once → N2 = 5. Without the #180 fix, the
2229 // wrapping `"$x"` literal would also be counted, making
2230 // u_operands = 6 and N2 = 6. The `=` is the only operator;
2231 // appears twice (N1 = 2, n1 = 1).
2232 check_metrics::<BashParser>("a=\"plain\"\nb=\"$x\"\n", "foo.sh", |metric| {
2233 assert_eq!(metric.halstead.u_operators(), 1.0);
2234 assert_eq!(metric.halstead.operators(), 2.0);
2235 assert_eq!(metric.halstead.u_operands(), 5.0);
2236 assert_eq!(metric.halstead.operands(), 5.0);
2237 insta::assert_json_snapshot!(metric.halstead);
2238 });
2239 }
2240
2241 #[test]
2242 fn elixir_interpolated_string_no_double_count() {
2243 // Regression: issue #180. Without the fix, an interpolated
2244 // Elixir `String` was classified as a single operand while its
2245 // inner `interpolation` identifier was also walked and
2246 // classified as its own operand — double-counting the
2247 // interpolated identifier's contribution to `N2`.
2248 //
2249 // expected: operand contributions for
2250 // `def greet(name) do\n msg = "Hi #{name}"\nend\n` —
2251 // `def`, `greet`, `name` (param), `msg`, and the inner `name`
2252 // (inside `#{...}`). With the fix, the wrapping
2253 // `"Hi #{name}"` literal is skipped (has `Interpolation`
2254 // child), so `name` is the only repeated operand:
2255 // u_operands = 4 (def, greet, name, msg), N2 = 5. Without the
2256 // fix, the wrapping literal would also count → u_operands = 5,
2257 // N2 = 6. Operators (`do`, `end`, `(`, `)`, `=`, `#{`, `}`)
2258 // are unchanged: u = N = 7 (the `#{`/`}` interpolation
2259 // markers stay classified as operators).
2260 check_metrics::<ElixirParser>(
2261 "def greet(name) do\n msg = \"Hi #{name}\"\nend\n",
2262 "foo.ex",
2263 |metric| {
2264 assert_eq!(metric.halstead.u_operators(), 7.0);
2265 assert_eq!(metric.halstead.operators(), 7.0);
2266 assert_eq!(metric.halstead.u_operands(), 4.0);
2267 assert_eq!(metric.halstead.operands(), 5.0);
2268 insta::assert_json_snapshot!(metric.halstead);
2269 },
2270 );
2271 }
2272
2273 #[test]
2274 fn elixir_plain_string_still_operand() {
2275 // The fix for #180 only skips wrapping literals that contain
2276 // interpolation; a plain `"hello"` must still contribute exactly
2277 // one operand. expected: `def`, `f`, `"hello"` → 3 unique
2278 // operands (n2 = 3), each appearing once (N2 = 3).
2279 check_metrics::<ElixirParser>("def f do\n \"hello\"\nend\n", "foo.ex", |metric| {
2280 assert_eq!(metric.halstead.u_operands(), 3.0);
2281 assert_eq!(metric.halstead.operands(), 3.0);
2282 });
2283 }
2284
2285 #[test]
2286 fn elixir_interpolated_sigil_no_double_count() {
2287 // Sigils mirror strings under #180. For `~r/foo#{name}/`, the
2288 // wrapping `Sigil` is skipped, but `SigilName` (`r`) and the
2289 // inner `name` identifier each contribute one operand.
2290 // expected: `def`, `f`, `name` (param), `re`, `r` (sigil name),
2291 // `name` (inside `#{...}`) → u_operands = 5, N2 = 6 (`name`
2292 // twice).
2293 check_metrics::<ElixirParser>(
2294 "def f(name) do\n re = ~r/foo#{name}/\nend\n",
2295 "foo.ex",
2296 |metric| {
2297 assert_eq!(metric.halstead.u_operands(), 5.0);
2298 assert_eq!(metric.halstead.operands(), 6.0);
2299 },
2300 );
2301 }
2302
2303 #[test]
2304 fn elixir_interpolated_charlist_no_double_count() {
2305 // Charlists mirror strings and sigils under #180. The
2306 // `E::String | E::Charlist | E::Sigil` arm in `get_op_type`
2307 // skips any wrapping literal that has an `Interpolation`
2308 // child; this test exercises the `Charlist` branch
2309 // specifically.
2310 //
2311 // expected: for `def f(name) do\n cl = 'Hi #{name}'\nend\n` —
2312 // `def`, `f`, `name` (param), `cl`, and the inner `name`
2313 // (inside `#{...}`). With the fix, the wrapping
2314 // `'Hi #{name}'` is skipped → u_operands = 4 (def, f, name,
2315 // cl), N2 = 5 (`name` twice).
2316 check_metrics::<ElixirParser>(
2317 "def f(name) do\n cl = 'Hi #{name}'\nend\n",
2318 "foo.ex",
2319 |metric| {
2320 assert_eq!(metric.halstead.u_operands(), 4.0);
2321 assert_eq!(metric.halstead.operands(), 5.0);
2322 },
2323 );
2324 }
2325
2326 #[test]
2327 fn bash_all_expansion_kinds_skip_wrapper() {
2328 // Exercises every node kind tested by
2329 // `bash_string_has_expansion`: `simple_expansion` (`$v`),
2330 // `expansion` (`${v[0]}`), `command_substitution` (`$(date)`),
2331 // and `arithmetic_expansion` (`$((1+2))`). A typo replacing
2332 // one kind with an aliased neighbour in `language_bash.rs`
2333 // (e.g., `ExpansionBody` instead of `Expansion`) would leave
2334 // the corresponding wrapping string counted as an operand and
2335 // shift the totals.
2336 //
2337 // expected: operands across the four lines —
2338 // line 1 `a="$v"`: var_name `a`, simple_expansion `$v`,
2339 // inner var_name `v` (wrapper skipped) → 3
2340 // line 2 `b="${v[0]}"`: var_name `b`, var_name `v` (inside
2341 // subscript), number `0` (wrapper skipped, `expansion`
2342 // itself is not in the operand list) → 3
2343 // line 3 `c="$(date)"`: var_name `c`, command_name `date`
2344 // (wrapper skipped, `command_substitution` not in operand
2345 // list) → 2
2346 // line 4 `d="$((1+2))"`: var_name `d`, numbers `1` and `2`
2347 // (wrapper skipped, `arithmetic_expansion` not in operand
2348 // list) → 3
2349 // Unique operands (`v` shared across lines 1 and 2): a, b, c,
2350 // d, $v, v, 0, date, 1, 2 → 10. Total occurrences: 12 (`v`
2351 // appears twice). Operators include `=` four times plus the
2352 // `${`, `}`, `$(`, `)`, `$((`, `))`, `[`, `]`, `+` punctuation.
2353 check_metrics::<BashParser>(
2354 "a=\"$v\"\nb=\"${v[0]}\"\nc=\"$(date)\"\nd=\"$((1+2))\"\n",
2355 "foo.sh",
2356 |metric| {
2357 assert_eq!(metric.halstead.u_operators(), 6.0);
2358 assert_eq!(metric.halstead.operators(), 9.0);
2359 assert_eq!(metric.halstead.u_operands(), 10.0);
2360 assert_eq!(metric.halstead.operands(), 12.0);
2361 },
2362 );
2363 }
2364
2365 #[test]
2366 fn tcl_operators_and_operands() {
2367 check_metrics::<TclParser>(
2368 "proc f {a b} {
2369 set x [expr {$a + $b}]
2370 if {$x > 0 && $x != 0} {
2371 return $x
2372 }
2373 return 0
2374}",
2375 "foo.tcl",
2376 |metric| {
2377 insta::assert_json_snapshot!(metric.halstead);
2378 },
2379 );
2380 }
2381
2382 #[test]
2383 fn tcl_bitwise_ternary_string_ops() {
2384 // Exercises operator families not covered by tcl_operators_and_operands:
2385 // bitwise (&, |, ^, ~, <<, >>), ternary (?), and string-comparison (eq, ne, in, ni).
2386 check_metrics::<TclParser>(
2387 "proc f {a b} {
2388 set bits [expr {$a & $b | $a ^ ~$b}]
2389 set sh [expr {$a << 1 | $b >> 1}]
2390 set t [expr {$a > 0 ? $a : $b}]
2391 if {$a eq {x} || $a ne {y}} {
2392 return $a
2393 }
2394 return $b
2395}",
2396 "foo.tcl",
2397 |metric| {
2398 insta::assert_json_snapshot!(metric.halstead);
2399 },
2400 );
2401 }
2402
2403 #[test]
2404 fn tcl_bare_variable_operand() {
2405 // Bare `$varname` produces a VariableSubstitution node (already an operand).
2406 // Its anonymous Id2 child must NOT be counted separately; each reference is 1 operand.
2407 check_metrics::<TclParser>(
2408 "proc f {x} {
2409 return $x
2410}",
2411 "foo.tcl",
2412 |metric| {
2413 insta::assert_json_snapshot!(metric.halstead);
2414 },
2415 );
2416 }
2417
2418 #[test]
2419 fn tcl_inert_quoted_word_counts_as_operand() {
2420 // Regression for #277. A `"..."` literal with no `$var` / `[cmd]`
2421 // interpolation must contribute exactly one operand (the wrapping
2422 // `QuotedWord`). The string content `hello world` is exposed as a
2423 // single `_quoted_word_content` token (not itself classified by
2424 // `get_op_type`), so the only operands here are `f`, `s`, and the
2425 // quoted string. `set` is the anonymous `Set2` keyword and is
2426 // classified as an operator, not an operand.
2427 check_metrics::<TclParser>(
2428 "proc f {} {
2429 set s \"hello world\"
2430}",
2431 "foo.tcl",
2432 |metric| {
2433 // Operands: `f`, `s`, `"hello world"` — 3 unique, 3 total.
2434 // The wrapping `QuotedWord` must still contribute exactly
2435 // one operand when it carries no interpolation children;
2436 // dropping to 2 would mean the inert case was over-guarded.
2437 assert_eq!(metric.halstead.u_operands(), 3.0);
2438 assert_eq!(metric.halstead.operands(), 3.0);
2439 insta::assert_json_snapshot!(metric.halstead);
2440 },
2441 );
2442 }
2443
2444 #[test]
2445 fn tcl_interpolated_quoted_word_no_double_count() {
2446 // Regression for #277. Before the fix, `"$x is $y"` produced an
2447 // extra operand for the wrapping `QuotedWord` on top of the two
2448 // inner `VariableSubstitution` operands (`$x`, `$y`), giving 7.
2449 // After the fix, the wrapper is `HalsteadType::Unknown` whenever
2450 // it carries an interpolation child, so operand attribution
2451 // belongs solely to the inner substitutions.
2452 check_metrics::<TclParser>(
2453 "proc f {x y} {
2454 set s \"$x is $y\"
2455}",
2456 "foo.tcl",
2457 |metric| {
2458 // Operands: `f`, `x`, `y` (proc args), `s`, `$x`, `$y` — 6
2459 // unique, 6 total. The wrapping `QuotedWord` contributes
2460 // nothing. Pre-fix this read 7/7 (double-counted wrapper).
2461 assert_eq!(metric.halstead.u_operands(), 6.0);
2462 assert_eq!(metric.halstead.operands(), 6.0);
2463 insta::assert_json_snapshot!(metric.halstead);
2464 },
2465 );
2466 }
2467
2468 #[test]
2469 fn tcl_command_substitution_quoted_word_no_double_count() {
2470 // Regression for #277. A `"...[cmd]..."` literal exposes the
2471 // bracketed command as a `command_substitution` child whose inner
2472 // identifiers/literals contribute their own operands. The wrapping
2473 // `QuotedWord` must not also be classified as an operand, or the
2474 // command's identifier would be counted alongside a phantom
2475 // wrapper operand.
2476 check_metrics::<TclParser>(
2477 "proc f {} {
2478 set s \"result: [foo]\"
2479}",
2480 "foo.tcl",
2481 |metric| {
2482 // Operands: `f`, `s`, `foo` — 3 unique, 3 total. The
2483 // wrapping `QuotedWord` and the inert text `result: ` do
2484 // not contribute extra operands. Pre-fix this read 4/4
2485 // (double-counted wrapper).
2486 assert_eq!(metric.halstead.u_operands(), 3.0);
2487 assert_eq!(metric.halstead.operands(), 3.0);
2488 insta::assert_json_snapshot!(metric.halstead);
2489 },
2490 );
2491 }
2492
2493 #[test]
2494 fn php_operators_and_operands() {
2495 check_metrics::<PhpParser>(
2496 "<?php
2497 function avg(int $a, int $b, int $c): int {
2498 return ($a + $b + $c) / 3;
2499 }",
2500 "foo.php",
2501 |metric| {
2502 assert_eq!(metric.halstead.u_operators(), 11.0);
2503 assert_eq!(metric.halstead.operators(), 15.0);
2504 assert_eq!(metric.halstead.u_operands(), 9.0);
2505 assert_eq!(metric.halstead.operands(), 22.0);
2506 insta::assert_json_snapshot!(metric.halstead);
2507 },
2508 );
2509 }
2510
2511 #[test]
2512 fn php_simple_function() {
2513 check_metrics::<PhpParser>(
2514 "<?php
2515 function inc(int $x): int { return $x + 1; }",
2516 "foo.php",
2517 |metric| {
2518 assert_eq!(metric.halstead.u_operators(), 9.0);
2519 assert_eq!(metric.halstead.operators(), 9.0);
2520 assert_eq!(metric.halstead.u_operands(), 5.0);
2521 assert_eq!(metric.halstead.operands(), 10.0);
2522 insta::assert_json_snapshot!(metric.halstead);
2523 },
2524 );
2525 }
2526
2527 #[test]
2528 fn php_encapsed_string_interpolation_no_double_count() {
2529 // Regression: issue #184. A PHP `"Hello $name!"` used to be
2530 // classified as a Halstead operand (the wrapping
2531 // `encapsed_string`) AND have its inner `variable_name`
2532 // (`$name`) plus the inner `name` token classified as
2533 // operands too. With the fix, the wrapping literal drops to
2534 // `Unknown` when it carries any `$var` / `${name}` / `{$expr}`
2535 // child, so `$name` is counted exactly once at each text
2536 // occurrence.
2537 //
2538 // Source:
2539 // <?php $name = "world"; echo "Hello $name!";
2540 //
2541 // Inert operand: `"world"` (no interpolation, still operand).
2542 // Operands by text key (`get_id` keys by source bytes):
2543 // `$name` × 2 (assignment LHS and `$name` inside the
2544 // interpolated string), `name` × 2 (the `name` token inside
2545 // each `variable_name`), `"world"` × 1.
2546 // u_operands = 3, N2 = 5.
2547 // Without the fix the wrapping `"Hello $name!"` would also
2548 // count → u_operands = 4, N2 = 6.
2549 check_metrics::<PhpParser>(
2550 "<?php $name = \"world\"; echo \"Hello $name!\";",
2551 "foo.php",
2552 |metric| {
2553 assert_eq!(metric.halstead.u_operands(), 3.0);
2554 assert_eq!(metric.halstead.operands(), 5.0);
2555 },
2556 );
2557 }
2558
2559 #[test]
2560 fn php_encapsed_string_no_interpolation_still_operand() {
2561 // The fix for #184 only drops `EncapsedString`/`Heredoc` from
2562 // the operand arm when interpolation is present. An inert
2563 // double-quoted string must still count as exactly one
2564 // operand, identical to the single-quoted equivalent.
2565 //
2566 // Source: `<?php echo "Hello world!";`
2567 // Operands: `"Hello world!"` × 1 → u_operands = 1, N2 = 1.
2568 check_metrics::<PhpParser>("<?php echo \"Hello world!\";", "foo.php", |metric| {
2569 assert_eq!(metric.halstead.u_operands(), 1.0);
2570 assert_eq!(metric.halstead.operands(), 1.0);
2571 });
2572 }
2573
2574 #[test]
2575 fn php_heredoc_interpolation_no_double_count() {
2576 // Regression: issue #184. A PHP heredoc whose body
2577 // interpolates `$name` previously counted both the wrapping
2578 // `heredoc` node and the inner `$name` as operands; the fix
2579 // drops the wrapper when its `heredoc_body` carries any
2580 // interpolation child.
2581 //
2582 // Source:
2583 // <?php $name = "x"; echo <<<EOT
2584 // hi $name
2585 // EOT;
2586 //
2587 // Operands by text key: `$name` × 2, `name` × 2, `"x"` × 1
2588 // (inert single-interp encapsed string also operand). With
2589 // the fix u_operands = 3, N2 = 5. Without the fix the
2590 // wrapping heredoc text would add one more unique operand.
2591 check_metrics::<PhpParser>(
2592 "<?php $name = \"x\"; echo <<<EOT\nhi $name\nEOT;\n",
2593 "foo.php",
2594 |metric| {
2595 assert_eq!(metric.halstead.u_operands(), 3.0);
2596 assert_eq!(metric.halstead.operands(), 5.0);
2597 },
2598 );
2599 }
2600
2601 #[test]
2602 fn php_nowdoc_unaffected() {
2603 // `Nowdoc` (single-quoted heredoc) never interpolates and is
2604 // never matched by `php_string_has_interpolation`. It must
2605 // continue counting as exactly one operand regardless of the
2606 // text inside, mirroring single-quoted `String`.
2607 //
2608 // Source:
2609 // <?php echo <<<'EOT'
2610 // plain $name not interpolated
2611 // EOT;
2612 //
2613 // Operands: the nowdoc literal × 1 → u_operands = 1, N2 = 1.
2614 check_metrics::<PhpParser>(
2615 "<?php echo <<<'EOT'\nplain $name not interpolated\nEOT;\n",
2616 "foo.php",
2617 |metric| {
2618 assert_eq!(metric.halstead.u_operands(), 1.0);
2619 assert_eq!(metric.halstead.operands(), 1.0);
2620 },
2621 );
2622 }
2623
2624 #[test]
2625 fn php_encapsed_string_bare_member_access_no_double_count() {
2626 // Regression: issue #184 follow-up. The PHP grammar allows
2627 // bare `$obj->prop` interpolation inside `"…"` without
2628 // surrounding `{ … }`; tree-sitter-php emits this as a
2629 // direct `member_access_expression` child of
2630 // `encapsed_string` (kind_id 329 in the current grammar).
2631 // The wrapper must drop to `Unknown` for that form too —
2632 // otherwise the inner `$obj` and `prop` `name` tokens are
2633 // walked as operands while the wrapper also counts,
2634 // double-counting `N2`.
2635 //
2636 // Source:
2637 // <?php $obj = new stdClass; $obj->prop = "x"; echo "Hi $obj->prop!";
2638 //
2639 // Operands tallied by `get_id` (keyed on source bytes):
2640 // `$obj` × 3 (LHS assignment, member-access target,
2641 // inside the interpolated string)
2642 // `obj` (name) × 3 (one per `variable_name`)
2643 // `prop` (name) × 2 (member-access RHS twice)
2644 // `stdClass` × 1
2645 // `"x"` × 1
2646 // ⇒ u_operands = 5, N2 = 10.
2647 // With the bug the wrapping `"Hi $obj->prop!"` text adds one
2648 // more unique operand and one more occurrence ⇒ 6 / 11.
2649 check_metrics::<PhpParser>(
2650 "<?php $obj = new stdClass; $obj->prop = \"x\"; echo \"Hi $obj->prop!\";",
2651 "foo.php",
2652 |metric| {
2653 assert_eq!(metric.halstead.u_operands(), 5.0);
2654 assert_eq!(metric.halstead.operands(), 10.0);
2655 },
2656 );
2657 }
2658
2659 #[test]
2660 fn php_encapsed_string_bare_subscript_no_double_count() {
2661 // Regression: issue #184 follow-up. Bare `$arr[0]` inside
2662 // `"…"` produces a `subscript_expression` child of
2663 // `encapsed_string` (kind_id 351). The wrapper must drop to
2664 // `Unknown` for that form.
2665 //
2666 // Source:
2667 // <?php $arr = [1]; echo "Hi $arr[0]!";
2668 //
2669 // Operands tallied by `get_id`:
2670 // `$arr` × 2, `arr` × 2 (inner `name`), `1` × 1, `0` × 1.
2671 // ⇒ u_operands = 4, N2 = 6.
2672 // With the bug the wrapping `"Hi $arr[0]!"` text adds 1 / 1.
2673 check_metrics::<PhpParser>(
2674 "<?php $arr = [1]; echo \"Hi $arr[0]!\";",
2675 "foo.php",
2676 |metric| {
2677 assert_eq!(metric.halstead.u_operands(), 4.0);
2678 assert_eq!(metric.halstead.operands(), 6.0);
2679 },
2680 );
2681 }
2682
2683 #[test]
2684 fn php_shell_command_expression_inert_is_operand() {
2685 // Regression: issue #288. Backtick command literals (PHP's
2686 // `shell_command_expression`) were filtered as strings by
2687 // `Checker::is_string` and `Alterator::alterate`, but never
2688 // classified as Halstead operands — so they contributed
2689 // nothing to N2 / eta2. An inert backtick literal must now
2690 // count as exactly one operand, matching `EncapsedString`
2691 // and `Heredoc`.
2692 //
2693 // Source: `<?php $out = ` + backtick `ls` + backtick + `;`
2694 // Operands tallied by `get_id`:
2695 // `$out` × 1, `out` × 1 (inner `name`), backtick literal × 1.
2696 // ⇒ u_operands = 3, N2 = 3.
2697 // Before the fix the backtick literal vanished from the count
2698 // ⇒ u_operands = 2, N2 = 2.
2699 check_metrics::<PhpParser>("<?php $out = `ls`;", "foo.php", |metric| {
2700 assert_eq!(metric.halstead.u_operands(), 3.0);
2701 assert_eq!(metric.halstead.operands(), 3.0);
2702 });
2703 }
2704
2705 #[test]
2706 fn php_shell_command_expression_interpolation_no_double_count() {
2707 // Regression: issue #288. PHP backtick literals DO support
2708 // `$var` interpolation (see tree-sitter-php node-types.json:
2709 // `shell_command_expression` children include `variable_name`,
2710 // `dynamic_variable_name`, `member_access_expression`,
2711 // `subscript_expression`). With the fix the wrapper drops to
2712 // `Unknown` when it carries any interpolation child, exactly
2713 // as `EncapsedString` does.
2714 //
2715 // Source: `<?php $dir = "/tmp"; $out = ` + backtick `ls $dir` +
2716 // backtick + `;`
2717 //
2718 // Operands tallied by `get_id`:
2719 // `$dir` × 2 (assignment LHS, inside backticks),
2720 // `dir` × 2 (inner `name`),
2721 // `$out` × 1, `out` × 1, `"/tmp"` × 1.
2722 // ⇒ u_operands = 5, N2 = 7.
2723 // Without the interpolation guard the wrapping backtick literal
2724 // would also count ⇒ u_operands = 6, N2 = 8.
2725 check_metrics::<PhpParser>(
2726 "<?php $dir = \"/tmp\"; $out = `ls $dir`;",
2727 "foo.php",
2728 |metric| {
2729 assert_eq!(metric.halstead.u_operands(), 5.0);
2730 assert_eq!(metric.halstead.operands(), 7.0);
2731 },
2732 );
2733 }
2734
2735 #[test]
2736 fn elixir_operators_and_operands() {
2737 // Exercises every Halstead family classified in Elixir's
2738 // `get_op_type`: control-flow keywords (`do`, `end`, `fn`),
2739 // structural punctuation (`(`, `)`, `[`, `]`, `,`, `.`, `@`),
2740 // arithmetic (`+`, `-`, `*`, `/`), comparison (`==`, `>`),
2741 // logical (`&&`, `||`, `and`, `or`, `!`), pipe (`|>`), capture
2742 // (`&`), assignment/match (`=`), and the stab arrow (`->`).
2743 // The body mixes identifiers, integers, atoms, and a string.
2744 check_metrics::<ElixirParser>(
2745 "defmodule Foo do\n @doc \"add\"\n def calc(a, b) do\n result = a + b * 2\n flag = result > 0 && a == b\n out = if flag, do: result, else: -result\n [out, a, b]\n end\nend\n",
2746 "foo.ex",
2747 |metric| {
2748 // Positive headline assertions on integer counts.
2749 assert_eq!(metric.halstead.u_operators(), 15.0);
2750 assert_eq!(metric.halstead.operators(), 23.0);
2751 assert_eq!(metric.halstead.u_operands(), 16.0);
2752 assert_eq!(metric.halstead.operands(), 27.0);
2753 insta::assert_json_snapshot!(
2754 metric.halstead,
2755 @r###"
2756 {
2757 "n1": 15.0,
2758 "N1": 23.0,
2759 "n2": 16.0,
2760 "N2": 27.0,
2761 "length": 50.0,
2762 "estimated_program_length": 122.60335893412778,
2763 "purity_ratio": 2.452067178682556,
2764 "vocabulary": 31.0,
2765 "volume": 247.70981551934375,
2766 "difficulty": 12.65625,
2767 "level": 0.07901234567901234,
2768 "effort": 3135.0773526666944,
2769 "time": 174.17096403703857,
2770 "bugs": 0.07140208917738183
2771 }"###
2772 );
2773 },
2774 );
2775 }
2776
2777 #[test]
2778 fn ruby_operators_and_operands() {
2779 // A small Ruby method exercising operators (def/if/end keyword
2780 // tokens, `+`, `==`, `<=`, structural punctuation) and operands
2781 // (`n`, `1`, `factorial`). Anchors the unique/total counts on
2782 // both sides and snapshots the full Halstead derivation.
2783 //
2784 // Lesson 4 invariants: u_operators / u_operands here equal the
2785 // dedupe lengths the `--ops` accessor would emit on the same
2786 // source. Any future grammar bump that adds an aliased kind_id
2787 // to either side will trip this without snapshot drift.
2788 check_metrics::<RubyParser>(
2789 "def factorial(n)\n return 1 if n <= 1\n n * factorial(n - 1)\nend\n",
2790 "foo.rb",
2791 |metric| {
2792 assert_eq!(metric.halstead.u_operators(), 9.0);
2793 assert_eq!(metric.halstead.operators(), 11.0);
2794 assert_eq!(metric.halstead.u_operands(), 3.0);
2795 assert_eq!(metric.halstead.operands(), 9.0);
2796 insta::assert_json_snapshot!(metric.halstead);
2797 },
2798 );
2799 }
2800
2801 #[test]
2802 fn ruby_halstead_plain_string_operand() {
2803 // A bare string literal contributes exactly one operand. The
2804 // counterpart to `ruby_halstead_interpolated_string_no_double_count`
2805 // — verifies the "no interpolation" branch of the same arm
2806 // (see `src/getter.rs::get_op_type`'s `R::String | …` case).
2807 // expected: operators = {def, end} = 2; operands = {f, "hello"} = 2.
2808 check_metrics::<RubyParser>("def f\n \"hello\"\nend\n", "foo.rb", |metric| {
2809 assert_eq!(metric.halstead.u_operators(), 2.0);
2810 assert_eq!(metric.halstead.operators(), 2.0);
2811 assert_eq!(metric.halstead.u_operands(), 2.0);
2812 assert_eq!(metric.halstead.operands(), 2.0);
2813 });
2814 }
2815
2816 #[test]
2817 fn ruby_halstead_interpolated_string_no_double_count() {
2818 // Regression mirror for #180 (Bash) / #183 (C#): when a Ruby
2819 // string literal carries an `Interpolation` child, the
2820 // wrapping `String` node is intentionally classified as
2821 // `Unknown` so the inner expression's identifiers are not
2822 // double-counted as operands.
2823 //
2824 // expected: for `def f(name)\n "Hi #{name}"\nend\n` —
2825 // operators: def, (, ), #{, }, end → u_operators = 6.
2826 // operands: f, name (param), name (inside `#{name}`). The
2827 // wrapping `"…#{name}"` literal is skipped by the
2828 // `is_child(R::Interpolation)` guard; the operand store
2829 // keys by token text so the two `name` occurrences dedupe
2830 // into one distinct entry → u_operands = 2, operands = 3
2831 // (`f` once, `name` twice).
2832 // Without the guard, the wrapping literal would also count,
2833 // inflating u_operands to 3 and operands to 4.
2834 check_metrics::<RubyParser>("def f(name)\n \"Hi #{name}\"\nend\n", "foo.rb", |metric| {
2835 assert_eq!(metric.halstead.u_operands(), 2.0);
2836 assert_eq!(metric.halstead.operands(), 3.0);
2837 });
2838 }
2839
2840 #[test]
2841 fn ruby_halstead_symbol_literal_operand() {
2842 // `:foo` is a `SimpleSymbol` leaf — counts as a single
2843 // operand, no interpolation guard needed (only
2844 // `DelimitedSymbol` (`:"…#{x}…"`) can interpolate).
2845 // expected: operators = {def, end} = 2; operands = {f, :ok} = 2.
2846 check_metrics::<RubyParser>("def f\n :ok\nend\n", "foo.rb", |metric| {
2847 assert_eq!(metric.halstead.u_operators(), 2.0);
2848 assert_eq!(metric.halstead.u_operands(), 2.0);
2849 });
2850 }
2851
2852 #[test]
2853 fn ruby_halstead_regex_operand() {
2854 // `/foo/` parses as a `Regex` node — one operand. The slash
2855 // delimiters around it are emitted as `SLASH` tokens and
2856 // classified as arithmetic-or-divide operators by the shared
2857 // arm; they count once toward the distinct-operator set.
2858 // expected: u_operators = {def, (, ), =~, /, end} = 6;
2859 // u_operands = {f, s, /foo/} = 3.
2860 check_metrics::<RubyParser>("def f(s)\n s =~ /foo/\nend\n", "foo.rb", |metric| {
2861 assert_eq!(metric.halstead.u_operators(), 6.0);
2862 assert_eq!(metric.halstead.u_operands(), 3.0);
2863 });
2864 }
2865}