big_code_analysis/spaces.rs
1// Per-language metric and AST modules deliberately consume the macro-
2// generated tree-sitter token enums via `use crate::*` and `use Foo::*`
3// inside match expressions — explicit imports would list dozens of
4// variants per arm and obscure the per-language token sets that are the
5// point of these files. Allowed at the module level rather than per
6// function so the per-language impl blocks stay readable.
7#![allow(clippy::wildcard_imports, clippy::enum_glob_use)]
8// Metric counts (token, function, branch, argument, etc.) are stored as
9// `usize` and crossed with `f64` averages, ratios, and Halstead scores
10// across the cyclomatic / MI / Halstead computations. The `usize as f64`
11// and `f64 as usize` casts are intentional and snapshot-anchored — every
12// site is bounded by the count it came from. Allowing the lints at the
13// module level keeps the metric arithmetic legible.
14#![allow(
15 clippy::cast_precision_loss,
16 clippy::cast_possible_truncation,
17 clippy::cast_sign_loss
18)]
19
20use std::collections::HashMap;
21
22use serde::Serialize;
23use serde::ser::SerializeStruct;
24use std::fmt;
25use std::path::{Path, PathBuf};
26use std::sync::Arc;
27
28use crate::langs::LANG;
29use crate::metric_set::{Metric, MetricSet};
30use crate::preproc::PreprocResults;
31
32use crate::checker::Checker;
33use crate::error::MetricsError;
34use crate::node::Node;
35use crate::suppression::{
36 Suppression, SuppressionKind, SuppressionScope, parse_marker as parse_suppression_marker,
37};
38
39use crate::abc::{self, Abc};
40use crate::cognitive::{self, Cognitive};
41use crate::cyclomatic::{self, Cyclomatic};
42use crate::exit::{self, Exit};
43use crate::getter::Getter;
44use crate::halstead::{self, Halstead, HalsteadMaps};
45use crate::loc::{self, Loc};
46use crate::mi::{self, Mi};
47use crate::nargs::{self, NArgs};
48use crate::nom::{self, Nom};
49use crate::npa::{self, Npa};
50use crate::npm::{self, Npm};
51use crate::tokens::{self, Tokens};
52use crate::wmc::{self, Wmc};
53
54use crate::output::dump_metrics::*;
55use crate::traits::*;
56
57/// The list of supported space kinds.
58#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Serialize)]
59#[serde(rename_all = "lowercase")]
60pub enum SpaceKind {
61 /// An unknown space
62 #[default]
63 Unknown,
64 /// A function space
65 Function,
66 /// A class space
67 Class,
68 /// A struct space
69 Struct,
70 /// A `Rust` trait space
71 Trait,
72 /// A `Rust` implementation space
73 Impl,
74 /// A general space
75 Unit,
76 /// A `C/C++` namespace
77 Namespace,
78 /// An interface
79 Interface,
80}
81
82impl fmt::Display for SpaceKind {
83 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
84 let s = match self {
85 SpaceKind::Unknown => "unknown",
86 SpaceKind::Function => "function",
87 SpaceKind::Class => "class",
88 SpaceKind::Struct => "struct",
89 SpaceKind::Trait => "trait",
90 SpaceKind::Impl => "impl",
91 SpaceKind::Unit => "unit",
92 SpaceKind::Namespace => "namespace",
93 SpaceKind::Interface => "interface",
94 };
95 write!(f, "{s}")
96 }
97}
98
99/// All metrics data.
100///
101/// The set of metrics actually computed is governed by
102/// [`MetricsOptions::with_only`]. By default every metric is
103/// populated; when `with_only` restricts the set, unselected fields
104/// remain at their `Default` value and are elided from
105/// `Serialize` output. The `selected` mask is the source of truth
106/// for which fields are populated — read it via
107/// [`CodeMetrics::selected`].
108#[derive(Default, Debug, Clone)]
109pub struct CodeMetrics {
110 /// `NArgs` data
111 pub nargs: nargs::Stats,
112 /// `NExits` data
113 pub nexits: exit::Stats,
114 /// `Cognitive` data
115 pub cognitive: cognitive::Stats,
116 /// `Cyclomatic` data
117 pub cyclomatic: cyclomatic::Stats,
118 /// `Halstead` data
119 pub halstead: halstead::Stats,
120 /// `Loc` data
121 pub loc: loc::Stats,
122 /// `Nom` data
123 pub nom: nom::Stats,
124 /// `Tokens` data
125 pub tokens: tokens::Stats,
126 /// `Mi` data
127 pub mi: mi::Stats,
128 /// `Abc` data
129 pub abc: abc::Stats,
130 /// `Wmc` data
131 pub wmc: wmc::Stats,
132 /// `Npm` data
133 pub npm: npm::Stats,
134 /// `Npa` data
135 pub npa: npa::Stats,
136 /// Which metrics were actually computed for this space.
137 ///
138 /// Default is [`MetricSet::all`] — every metric was run, matching
139 /// the pre-#257 behaviour. After
140 /// [`MetricsOptions::with_only`] the bitfield is restricted to the
141 /// caller's selection plus auto-added dependencies.
142 ///
143 /// The [`Serialize`] impl consults this set to elide fields the
144 /// caller did not select. The field itself is not serialized.
145 pub selected: MetricSet,
146}
147
148impl Serialize for CodeMetrics {
149 // Per-metric serialization gated by `self.selected`. We
150 // pre-count the number of fields that will be emitted so the
151 // `SerializeStruct` header is accurate (formats like CBOR write
152 // the field count up front and reject mismatches at the end).
153 //
154 // The existing skip-when-disabled predicates for `wmc`, `npm`, and
155 // `npa` are honored alongside the selection mask: a metric is
156 // emitted iff it was selected AND not flagged as disabled by the
157 // metric itself.
158 #[allow(clippy::similar_names)] // wmc / npm / npa are domain terms
159 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
160 where
161 S: serde::Serializer,
162 {
163 let sel = self.selected;
164 let emit_wmc = sel.contains(Metric::Wmc) && !self.wmc.is_disabled();
165 let emit_npm = sel.contains(Metric::Npm) && !self.npm.is_disabled();
166 let emit_npa = sel.contains(Metric::Npa) && !self.npa.is_disabled();
167
168 // 10 always-on metrics (nargs, nexits, cognitive, cyclomatic,
169 // halstead, loc, nom, tokens, mi, abc) plus up to 3 from the
170 // class-only group (wmc, npm, npa). The count must track the
171 // serialize_field arms below 1:1 — CBOR writes the field
172 // count up front and rejects mismatches at end().
173 let always_on = [
174 Metric::NArgs,
175 Metric::Exit,
176 Metric::Cognitive,
177 Metric::Cyclomatic,
178 Metric::Halstead,
179 Metric::Loc,
180 Metric::Nom,
181 Metric::Tokens,
182 Metric::Mi,
183 Metric::Abc,
184 ];
185 let field_count = always_on.iter().filter(|m| sel.contains(**m)).count()
186 + usize::from(emit_wmc)
187 + usize::from(emit_npm)
188 + usize::from(emit_npa);
189
190 let mut st = serializer.serialize_struct("CodeMetrics", field_count)?;
191 // Each arm must match exactly one of the booleans counted into
192 // `field_count` above — drift here will make CBOR reject the
193 // payload at `st.end()`.
194 macro_rules! emit_if {
195 ($cond:expr, $key:literal, $field:expr) => {
196 if $cond {
197 st.serialize_field($key, $field)?;
198 }
199 };
200 }
201 emit_if!(sel.contains(Metric::NArgs), "nargs", &self.nargs);
202 emit_if!(sel.contains(Metric::Exit), "nexits", &self.nexits);
203 emit_if!(
204 sel.contains(Metric::Cognitive),
205 "cognitive",
206 &self.cognitive
207 );
208 emit_if!(
209 sel.contains(Metric::Cyclomatic),
210 "cyclomatic",
211 &self.cyclomatic
212 );
213 emit_if!(sel.contains(Metric::Halstead), "halstead", &self.halstead);
214 emit_if!(sel.contains(Metric::Loc), "loc", &self.loc);
215 emit_if!(sel.contains(Metric::Nom), "nom", &self.nom);
216 emit_if!(sel.contains(Metric::Tokens), "tokens", &self.tokens);
217 emit_if!(sel.contains(Metric::Mi), "mi", &self.mi);
218 emit_if!(sel.contains(Metric::Abc), "abc", &self.abc);
219 emit_if!(emit_wmc, "wmc", &self.wmc);
220 emit_if!(emit_npm, "npm", &self.npm);
221 emit_if!(emit_npa, "npa", &self.npa);
222 st.end()
223 }
224}
225
226impl CodeMetrics {
227 /// Construct a `CodeMetrics` whose `selected` mask is the given
228 /// [`MetricSet`]. All metric fields are at their `Default` value;
229 /// the walker fills them in for whichever metrics the mask
230 /// admits.
231 #[inline]
232 #[must_use]
233 pub fn with_selected(selected: MetricSet) -> Self {
234 Self {
235 selected,
236 ..Self::default()
237 }
238 }
239
240 /// Returns the set of metrics that were computed for this space.
241 #[inline]
242 #[must_use]
243 pub fn selected(&self) -> MetricSet {
244 self.selected
245 }
246}
247
248impl fmt::Display for CodeMetrics {
249 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
250 writeln!(f, "{}", self.nargs)?;
251 writeln!(f, "{}", self.nexits)?;
252 writeln!(f, "{}", self.cognitive)?;
253 writeln!(f, "{}", self.cyclomatic)?;
254 writeln!(f, "{}", self.halstead)?;
255 writeln!(f, "{}", self.loc)?;
256 writeln!(f, "{}", self.nom)?;
257 writeln!(f, "{}", self.tokens)?;
258 write!(f, "{}", self.mi)
259 }
260}
261
262impl CodeMetrics {
263 /// Sum each metric component from `other` into `self` in place. Used to
264 /// roll nested function-space metrics into their parent space.
265 pub fn merge(&mut self, other: &CodeMetrics) {
266 self.cognitive.merge(&other.cognitive);
267 self.cyclomatic.merge(&other.cyclomatic);
268 self.halstead.merge(&other.halstead);
269 self.loc.merge(&other.loc);
270 self.nom.merge(&other.nom);
271 self.tokens.merge(&other.tokens);
272 self.mi.merge(&other.mi);
273 self.nargs.merge(&other.nargs);
274 self.nexits.merge(&other.nexits);
275 self.abc.merge(&other.abc);
276 self.wmc.merge(&other.wmc);
277 self.npm.merge(&other.npm);
278 self.npa.merge(&other.npa);
279 // Union the selection masks so a parent space's emitted
280 // fields are the union of every nested space's selection.
281 // In practice every nested space shares the same mask (set
282 // once from `MetricsOptions::metrics`), so this is the
283 // identity operation; we union rather than assign to keep
284 // `merge` correct under future callers that mix
285 // independently-built `FuncSpace` values.
286 self.selected = self.selected.union(other.selected);
287 }
288}
289
290/// Function space data.
291#[derive(Debug, Clone, Serialize)]
292pub struct FuncSpace {
293 /// The name of a function space.
294 ///
295 /// For the top-level (file-level) `FuncSpace`, this is the value
296 /// supplied via [`Source::name`] to [`analyze`] — typically a file
297 /// path or other display identifier chosen by the caller. The
298 /// library no longer derives this from a `&Path` or applies lossy
299 /// UTF-8 conversion; callers are expected to pass an
300 /// already-stringified identifier (or `None` if they have no
301 /// meaningful name to attach). The deprecated entry points
302 /// `get_function_spaces` / [`metrics_with_options`] continue to
303 /// derive a lossy string from the `&Path` argument for backwards
304 /// compatibility.
305 ///
306 /// For nested spaces, `None` means an error occurred in parsing the
307 /// name of the function space from the AST.
308 pub name: Option<String>,
309 /// The first line of a function space
310 pub start_line: usize,
311 /// The last line of a function space
312 pub end_line: usize,
313 /// The space kind
314 pub kind: SpaceKind,
315 /// All subspaces contained in a function space
316 pub spaces: Vec<FuncSpace>,
317 /// All metrics of a function space
318 pub metrics: CodeMetrics,
319 /// In-source suppression markers that apply to this space.
320 ///
321 /// Populated during the spaces pass from comment-embedded
322 /// directives. Each marker carries a [`SuppressionScope`] naming
323 /// the metrics it silences. The top-level (file-level) `FuncSpace`
324 /// aggregates every file-scoped marker; nested function spaces
325 /// aggregate every function-scoped marker whose comment lies
326 /// inside their source range. Metric computation itself is
327 /// unaffected — this field is consumed by downstream
328 /// *threshold-check* code (e.g. `bca check`), which consults a
329 /// [`crate::SuppressionPolicy`] to decide whether to honour the
330 /// markers or surface every violation regardless.
331 ///
332 /// Defaults to `SuppressionScope::default()` (an empty `Some`), so
333 /// pre-existing code paths that do not honor suppressions see no
334 /// behaviour change. The field is elided from JSON output when
335 /// empty so the existing schema is unchanged for files without
336 /// markers.
337 #[serde(default, skip_serializing_if = "SuppressionScope::is_empty")]
338 pub suppressed: SuppressionScope,
339}
340
341impl FuncSpace {
342 fn new<T: Getter>(node: &Node, code: &[u8], kind: SpaceKind, selected: MetricSet) -> Self {
343 let (start_position, end_position) = match kind {
344 SpaceKind::Unit => {
345 if node.child_count() == 0 {
346 (0, 0)
347 } else {
348 (node.start_row() + 1, node.end_row())
349 }
350 }
351 _ => (node.start_row() + 1, node.end_row() + 1),
352 };
353
354 // The top-level Unit's name is overwritten by `metrics_with_options`
355 // (when called with an explicit name) before returning, so
356 // computing it here is wasted work. Other kinds keep the
357 // AST-derived name.
358 let name = (kind != SpaceKind::Unit)
359 .then(|| {
360 T::get_func_space_name(node, code)
361 .map(|name| name.split_whitespace().collect::<Vec<_>>().join(" "))
362 })
363 .flatten();
364
365 Self {
366 name,
367 spaces: Vec::new(),
368 metrics: CodeMetrics::with_selected(selected),
369 kind,
370 start_line: start_position,
371 end_line: end_position,
372 suppressed: SuppressionScope::default(),
373 }
374 }
375}
376
377#[inline]
378fn compute_halstead_mi_and_wmc<T: ParserTrait>(state: &mut State, selected: MetricSet) {
379 if selected.contains(Metric::Halstead) {
380 state
381 .halstead_maps
382 .finalize(&mut state.space.metrics.halstead);
383 }
384 if selected.contains(Metric::Mi) {
385 // `MetricsOptions::with_only` guarantees Mi's dependencies
386 // (Loc + Cyclomatic + Halstead) are also selected, so the
387 // Stats values feeding into the MI formula here are populated
388 // — not the zero defaults that would silently produce a
389 // garbage MI score.
390 T::Mi::compute(
391 &state.space.metrics.loc,
392 &state.space.metrics.cyclomatic,
393 &state.space.metrics.halstead,
394 &mut state.space.metrics.mi,
395 );
396 }
397 if selected.contains(Metric::Wmc) {
398 T::Wmc::compute(
399 state.space.kind,
400 &state.space.metrics.cyclomatic,
401 &mut state.space.metrics.wmc,
402 );
403 }
404}
405
406#[inline]
407fn compute_averages(state: &mut State, selected: MetricSet) {
408 // `Nom::functions_sum / closures_sum / total` are only meaningful
409 // if Nom was selected; when it isn't, the divisor is the Stats
410 // default (0) and the per-metric `finalize` calls treat that as
411 // "no functions, no closures, no items". Compute the divisors
412 // once and feed them into each gated finalize.
413 let nom_functions = state.space.metrics.nom.functions_sum() as usize;
414 let nom_closures = state.space.metrics.nom.closures_sum() as usize;
415 let nom_total = state.space.metrics.nom.total() as usize;
416 // Cognitive average
417 if selected.contains(Metric::Cognitive) {
418 state.space.metrics.cognitive.finalize(nom_total);
419 }
420 // Nexit average
421 if selected.contains(Metric::Exit) {
422 state.space.metrics.nexits.finalize(nom_total);
423 }
424 // Nargs average
425 if selected.contains(Metric::NArgs) {
426 state
427 .space
428 .metrics
429 .nargs
430 .finalize(nom_functions, nom_closures);
431 }
432}
433
434#[inline]
435fn compute_minmax(state: &mut State, selected: MetricSet) {
436 if selected.contains(Metric::Cyclomatic) {
437 state.space.metrics.cyclomatic.compute_minmax();
438 }
439 if selected.contains(Metric::Exit) {
440 state.space.metrics.nexits.compute_minmax();
441 }
442 if selected.contains(Metric::Cognitive) {
443 state.space.metrics.cognitive.compute_minmax();
444 }
445 if selected.contains(Metric::NArgs) {
446 state.space.metrics.nargs.compute_minmax();
447 }
448 if selected.contains(Metric::Nom) {
449 state.space.metrics.nom.compute_minmax();
450 }
451 if selected.contains(Metric::Loc) {
452 state.space.metrics.loc.compute_minmax();
453 }
454 if selected.contains(Metric::Abc) {
455 state.space.metrics.abc.compute_minmax();
456 }
457 if selected.contains(Metric::Tokens) {
458 state.space.metrics.tokens.compute_minmax();
459 }
460}
461
462#[inline]
463fn compute_sum(state: &mut State, selected: MetricSet) {
464 if selected.contains(Metric::Wmc) {
465 state.space.metrics.wmc.compute_sum();
466 }
467 if selected.contains(Metric::Npm) {
468 state.space.metrics.npm.compute_sum();
469 }
470 if selected.contains(Metric::Npa) {
471 state.space.metrics.npa.compute_sum();
472 }
473}
474
475fn finalize<T: ParserTrait>(state_stack: &mut Vec<State>, diff_level: usize, selected: MetricSet) {
476 if state_stack.is_empty() {
477 return;
478 }
479 for _ in 0..diff_level {
480 if state_stack.len() == 1 {
481 let last_state = state_stack
482 .last_mut()
483 .expect("invariant: state_stack has exactly one element");
484 compute_minmax(last_state, selected);
485 compute_sum(last_state, selected);
486 compute_halstead_mi_and_wmc::<T>(last_state, selected);
487 compute_averages(last_state, selected);
488 break;
489 }
490 let mut state = state_stack
491 .pop()
492 .expect("invariant: state_stack has more than one element");
493 compute_minmax(&mut state, selected);
494 compute_sum(&mut state, selected);
495 compute_halstead_mi_and_wmc::<T>(&mut state, selected);
496 compute_averages(&mut state, selected);
497
498 let last_state = state_stack
499 .last_mut()
500 .expect("invariant: state_stack has remaining elements after pop");
501 last_state.halstead_maps.merge(&state.halstead_maps);
502 compute_halstead_mi_and_wmc::<T>(last_state, selected);
503
504 // Merge function spaces
505 last_state.space.metrics.merge(&state.space.metrics);
506 last_state.space.spaces.push(state.space);
507 }
508}
509
510#[derive(Debug, Clone)]
511struct State<'a> {
512 space: FuncSpace,
513 halstead_maps: HalsteadMaps<'a>,
514}
515
516/// In-memory source bundle handed to [`analyze`].
517///
518/// `Source` decouples the *display name* of the top-level
519/// [`FuncSpace`] (`Source::name`) from the optional *filesystem path*
520/// used by the C++ preprocessor lookup (`Source::preproc_path`). The
521/// older path-positional entry points (`get_function_spaces`,
522/// `metrics_with_options`) conflate the two and derive the name via
523/// lossy UTF-8 conversion of the path; for in-memory snippets, code
524/// fetched over the network, or test fixtures, callers can now pass
525/// `Source` directly without manufacturing a `Path`.
526///
527/// Marked `#[non_exhaustive]` so future input fields can land
528/// additively. Downstream callers must construct via
529/// [`Source::new`] plus the `with_*` builder setters rather than
530/// struct-literal syntax (rustc rejects external struct literals on
531/// non-exhaustive types with E0639).
532///
533/// # Examples
534///
535/// Analysing an in-memory snippet with no on-disk path:
536///
537/// ```
538/// use big_code_analysis::{analyze, MetricsOptions, Source, LANG};
539///
540/// let source = Source::new(LANG::Rust, b"fn main() {}")
541/// .with_name(Some("snippet.rs".to_owned()));
542/// let space = analyze(source, MetricsOptions::default()).unwrap();
543/// assert_eq!(space.name.as_deref(), Some("snippet.rs"));
544/// ```
545#[non_exhaustive]
546#[derive(Debug, Clone)]
547pub struct Source<'a> {
548 /// The source language used to select the parser.
549 pub lang: LANG,
550 /// Raw source bytes. `Source` borrows them so callers retain
551 /// ownership; `analyze` copies into the parser's owned buffer.
552 pub code: &'a [u8],
553 /// Display / identifier name for the top-level [`FuncSpace`].
554 /// If `None`, the top-level [`FuncSpace::name`] is left `None`.
555 pub name: Option<String>,
556 /// Optional path used only by the C++ preprocessor lookup
557 /// (`get_fake_code`) to resolve macro definitions in
558 /// [`PreprocResults`]. For non-C++ languages this is ignored.
559 /// Defaults to `None`.
560 pub preproc_path: Option<&'a Path>,
561 /// Preprocessor results paired with [`Source::preproc_path`].
562 /// Same shape as the `pr` arg on the deprecated entry points.
563 pub preproc: Option<Arc<PreprocResults>>,
564}
565
566impl<'a> Source<'a> {
567 /// Build a `Source` for `lang` and `code` with no name and no
568 /// preprocessor inputs. Chain `with_*` setters to attach a
569 /// display name or preprocessor results.
570 ///
571 /// `Source` is `#[non_exhaustive]`, so external callers cannot
572 /// use struct-literal syntax — this constructor plus the
573 /// builder setters are the supported construction path.
574 #[inline]
575 #[must_use]
576 pub fn new(lang: LANG, code: &'a [u8]) -> Self {
577 Self {
578 lang,
579 code,
580 name: None,
581 preproc_path: None,
582 preproc: None,
583 }
584 }
585
586 /// Builder-style setter for [`Source::name`].
587 #[inline]
588 #[must_use]
589 pub fn with_name(mut self, name: Option<String>) -> Self {
590 self.name = name;
591 self
592 }
593
594 /// Builder-style setter for [`Source::preproc_path`].
595 #[inline]
596 #[must_use]
597 pub fn with_preproc_path(mut self, preproc_path: Option<&'a Path>) -> Self {
598 self.preproc_path = preproc_path;
599 self
600 }
601
602 /// Builder-style setter for [`Source::preproc`].
603 #[inline]
604 #[must_use]
605 pub fn with_preproc(mut self, preproc: Option<Arc<PreprocResults>>) -> Self {
606 self.preproc = preproc;
607 self
608 }
609}
610
611/// Parse-once, compute-many handle.
612///
613/// Owns the parsed [`tree_sitter::Tree`] and the source bytes it was parsed
614/// from, so callers can run [`Ast::metrics`] repeatedly against the same
615/// parse — with different [`MetricsOptions`] subsets, interleaved with
616/// custom `tree_sitter` traversal via [`Ast::as_tree_sitter`], or cached
617/// across configuration changes in an analysis pipeline.
618///
619/// Build one via [`Ast::parse`] (mirrors [`analyze`]) or
620/// [`Ast::from_tree_sitter`] (mirrors [`crate::metrics_from_tree`] but
621/// with an explicit display name instead of a lossy path-to-string
622/// conversion).
623///
624/// `Ast` is a snapshot — it does not pick up changes to the source after
625/// construction. Incremental reparse via [`tree_sitter::InputEdit`] is out
626/// of scope for this seam.
627///
628/// # C++ preprocessor
629///
630/// When [`Ast::parse`] is given a [`Source`] carrying preprocessor inputs
631/// and the language is [`LANG::Cpp`], [`Ast::source`] returns the *expanded*
632/// bytes the parser actually saw (the macro pre-pass runs before
633/// `tree-sitter` does). [`Ast::from_tree_sitter`] adopts whatever tree the
634/// caller supplied; whatever expansion they applied before building it is
635/// what [`Ast::source`] reflects.
636///
637/// # Examples
638///
639/// Parse once, run two disjoint metric subsets without re-parsing:
640///
641/// ```
642/// use big_code_analysis::{Ast, LANG, Metric, MetricsOptions, Source};
643///
644/// let ast = Ast::parse(
645/// Source::new(LANG::Rust, b"fn f() { if true { 1 } else { 2 }; }"),
646/// )
647/// .expect("rust feature enabled");
648///
649/// let loc = ast
650/// .metrics(MetricsOptions::default().with_only(&[Metric::Loc]))
651/// .expect("walker succeeds");
652/// let cyc = ast
653/// .metrics(MetricsOptions::default().with_only(&[Metric::Cyclomatic]))
654/// .expect("walker succeeds");
655/// // Each call's `with_only` filters to its requested family — the other
656/// // metric stays at its `Default` (zero) value, confirming options are
657/// // honored per call rather than carried over.
658/// assert!(loc.metrics.loc.ploc() > 0.0);
659/// assert_eq!(loc.metrics.cyclomatic.cyclomatic_sum(), 0.0);
660/// assert!(cyc.metrics.cyclomatic.cyclomatic_sum() > 0.0);
661/// assert_eq!(cyc.metrics.loc.ploc(), 0.0);
662/// ```
663///
664/// Walk the underlying `tree_sitter::Tree` and then run metrics on the
665/// same parse:
666///
667/// ```
668/// use big_code_analysis::{Ast, LANG, MetricsOptions, Source};
669///
670/// let ast = Ast::parse(Source::new(LANG::Rust, b"fn f() {}"))
671/// .expect("rust feature enabled");
672/// let root = ast.as_tree_sitter().root_node();
673/// assert_eq!(root.kind(), "source_file");
674/// let _ = ast.metrics(MetricsOptions::default()).expect("walker succeeds");
675/// ```
676pub struct Ast {
677 inner: crate::langs::AstInner,
678 name: Option<String>,
679}
680
681impl fmt::Debug for Ast {
682 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
683 // The held parser owns a `tree_sitter::Tree` and a `Vec<u8>`;
684 // neither has a meaningful `Debug` projection (one is an opaque
685 // C handle, the other is raw source). Reporting language + name
686 // keeps the public `Ast: Debug` promise without forcing `Debug`
687 // onto every per-language `*Code` tag.
688 f.debug_struct("Ast")
689 .field("language", &self.language())
690 .field("name", &self.name)
691 .finish_non_exhaustive()
692 }
693}
694
695impl Ast {
696 /// Parse `source` into a reusable [`Ast`]. Equivalent to the parse half
697 /// of [`analyze`]: every [`Ast::metrics`] call on the returned handle
698 /// produces the same [`FuncSpace`] as a freshly-issued
699 /// `analyze(source, options)` would.
700 ///
701 /// # Errors
702 ///
703 /// Returns [`MetricsError::LanguageDisabled`] when the source language's
704 /// per-language Cargo feature is not enabled in this build.
705 pub fn parse(source: Source<'_>) -> Result<Self, MetricsError> {
706 let Source {
707 lang,
708 code,
709 name,
710 preproc_path,
711 preproc,
712 } = source;
713 let inner = crate::langs::ast_parse_dispatch(lang, code, preproc_path, preproc)?;
714 Ok(Self { inner, name })
715 }
716
717 /// Adopt a caller-built [`tree_sitter::Tree`]. The `Source`-flavored
718 /// counterpart of [`crate::metrics_from_tree`]: same tree-reuse semantics, but
719 /// with `name: Option<String>` carried end-to-end instead of derived
720 /// from a path via lossy UTF-8 conversion.
721 ///
722 /// The supplied `tree` must have been produced from `code` with the
723 /// [`tree_sitter::Language`] returned by
724 /// [`LANG::get_tree_sitter_language`] for `lang`; a mismatch is not
725 /// `unsafe` but yields nonsensical metric values.
726 ///
727 /// # Errors
728 ///
729 /// Returns [`MetricsError::LanguageDisabled`] when `lang`'s
730 /// per-language Cargo feature is not enabled in this build.
731 pub fn from_tree_sitter(
732 lang: LANG,
733 tree: tree_sitter::Tree,
734 code: Vec<u8>,
735 name: Option<String>,
736 ) -> Result<Self, MetricsError> {
737 let inner = crate::langs::ast_from_tree_dispatch(lang, tree, code)?;
738 Ok(Self { inner, name })
739 }
740
741 /// Run the metric walker against the held parse. Safe to call
742 /// repeatedly — the tree is reused.
743 ///
744 /// Two `metrics` calls with different [`MetricsOptions::with_only`]
745 /// selections walk the tree twice; the savings versus [`analyze`] come
746 /// from not re-parsing the source.
747 ///
748 /// # Errors
749 ///
750 /// The return type carries [`MetricsError::EmptyRoot`] for forward
751 /// compatibility, but the walker always pushes a synthetic top-level
752 /// [`SpaceKind::Unit`] [`FuncSpace`] before walking, so this method
753 /// does not return `Err` in practice today.
754 pub fn metrics(&self, options: MetricsOptions) -> Result<FuncSpace, MetricsError> {
755 self.inner.run_metrics(self.name.clone(), options)
756 }
757
758 /// Source language of the parsed tree.
759 #[must_use]
760 #[inline]
761 pub fn language(&self) -> LANG {
762 self.inner.language()
763 }
764
765 /// Source bytes the held tree was parsed from. For [`LANG::Cpp`] with
766 /// preprocessor inputs supplied to [`Ast::parse`], these are the
767 /// *expanded* bytes (see the type-level "C++ preprocessor" note).
768 #[must_use]
769 #[inline]
770 pub fn source(&self) -> &[u8] {
771 self.inner.code_bytes()
772 }
773
774 /// Display name carried through to [`FuncSpace::name`] by every
775 /// [`Ast::metrics`] call.
776 #[must_use]
777 #[inline]
778 pub fn name(&self) -> Option<&str> {
779 self.name.as_deref()
780 }
781
782 /// Borrow the underlying [`tree_sitter::Tree`] for callers that want
783 /// to drive their own traversal alongside the metric walker.
784 ///
785 /// The returned reference is valid only while `self` lives; nodes
786 /// obtained from it must be resolved against [`Ast::source`] (the
787 /// `tree_sitter::Tree` is lazy and lifetime-bound to that byte
788 /// buffer).
789 #[must_use]
790 #[inline]
791 pub fn as_tree_sitter(&self) -> &tree_sitter::Tree {
792 self.inner.ts_tree()
793 }
794}
795
796/// Compute every metric for a [`Source`].
797///
798/// This is the recommended library entry point. Unlike the
799/// deprecated [`metrics`] / [`metrics_with_options`] family it does
800/// not conflate the top-level [`FuncSpace::name`] with a filesystem
801/// path: callers supply an explicit `Source::name` and an optional
802/// `Source::preproc_path` for C++ preprocessor lookup.
803///
804/// `options` controls per-traversal flags (e.g.
805/// `MetricsOptions::default().with_exclude_tests(true)` to elide
806/// Rust `#[test]` / `#[cfg(test)]` subtrees).
807///
808/// # Errors
809///
810/// The return type carries [`MetricsError::EmptyRoot`] for forward
811/// compatibility, but the walker always pushes a synthetic top-level
812/// [`SpaceKind::Unit`][crate::SpaceKind] `FuncSpace` before walking,
813/// so this function does not return `Err` in practice today (see
814/// the variant doc).
815///
816/// # Examples
817///
818/// Analysing an in-memory snippet without constructing a `Path`:
819///
820/// ```
821/// use big_code_analysis::{analyze, MetricsOptions, Source, LANG};
822///
823/// let space = analyze(
824/// Source::new(LANG::Rust, b"fn main() { let x = 1 + 2; }")
825/// .with_name(Some("snippet.rs".to_owned())),
826/// MetricsOptions::default(),
827/// )
828/// .expect("snippet has a top-level FuncSpace");
829/// assert_eq!(space.name.as_deref(), Some("snippet.rs"));
830/// ```
831pub fn analyze(source: Source<'_>, options: MetricsOptions) -> Result<FuncSpace, MetricsError> {
832 Ast::parse(source)?.metrics(options)
833}
834
835/// Returns all function spaces data of a code. This function needs a parser to
836/// be created a priori in order to work.
837///
838/// Equivalent to calling [`metrics_with_options`] with
839/// [`MetricsOptions::default`] — every node is visited and counted.
840/// Existing callers (including `get_function_spaces` and the
841/// `Metrics` callback used by the CLI) keep their previous behaviour
842/// through this entry point. Pass an explicit [`MetricsOptions`]
843/// (e.g. `exclude_tests: true`) to opt in to subtree filtering.
844///
845/// # Deprecated
846///
847/// Prefer [`analyze`], which accepts a [`Source`] carrying an explicit
848/// display name distinct from any on-disk path.
849///
850/// # Errors
851///
852/// The return type carries [`MetricsError::EmptyRoot`] for forward
853/// compatibility, but the walker always pushes a synthetic top-level
854/// [`SpaceKind::Unit`][crate::SpaceKind] `FuncSpace` before walking,
855/// so this function does not return `Err` in practice today (see
856/// the variant doc).
857///
858/// # Examples
859///
860/// ```
861/// use std::path::Path;
862///
863/// # #[allow(deprecated)]
864/// use big_code_analysis::{CppParser, metrics, ParserTrait};
865///
866/// let source_code = "int a = 42;";
867///
868/// // The path to a dummy file used to contain the source code
869/// let path = Path::new("foo.c");
870/// let source_as_vec = source_code.as_bytes().to_vec();
871///
872/// // The parser of the code, in this case a CPP parser
873/// let parser = CppParser::new(source_as_vec, &path, None);
874///
875/// // Gets all function spaces data of the code contained in foo.c
876/// # #[allow(deprecated)]
877/// metrics(&parser, &path).unwrap();
878/// ```
879#[deprecated(
880 since = "0.0.26",
881 note = "Use `analyze(Source::new(lang, code).with_name(Some(name)), MetricsOptions::default())` instead — the path-positional shim derives the top-level FuncSpace name via lossy UTF-8 conversion."
882)]
883// Hidden from rustdoc because the signature exposes `ParserTrait` and
884// `Parser<T>` — both demoted to `#[doc(hidden)]` per issue #256. The
885// deprecation note already redirects callers to `analyze` / `Source`,
886// which is the documented surface.
887#[doc(hidden)]
888pub fn metrics<'a, T: ParserTrait>(
889 parser: &'a T,
890 path: &'a Path,
891) -> Result<FuncSpace, MetricsError> {
892 #[allow(deprecated)]
893 metrics_with_options(parser, path, MetricsOptions::default())
894}
895
896/// Like [`metrics`], but consults `options` while walking the AST.
897///
898/// Setting `options.exclude_tests = true` calls the language
899/// [`Checker`]'s `should_skip_subtree` hook on every node and prunes
900/// matching subtrees before any per-metric `compute` runs. The hook
901/// defaults to `false` for every language, so passing
902/// `exclude_tests = true` is a no-op except where a language module
903/// overrides it (today: `RustCode`, which filters Rust `#[test]` /
904/// `#[cfg(test)]` items).
905///
906/// Comment nodes are additionally scanned for in-source suppression
907/// markers (see [`crate::suppression`]); any matches are attached to
908/// the enclosing [`FuncSpace::suppressed`]. Malformed `bca:` markers
909/// produce a warning to stderr — they do not abort the walk, so a
910/// single typo in one file cannot derail a workspace-wide run.
911///
912/// # Deprecated
913///
914/// Prefer [`analyze`], which accepts a [`Source`] carrying an explicit
915/// display name distinct from any on-disk path. This entry point
916/// remains for backwards compatibility for one minor release; it
917/// derives [`FuncSpace::name`] from `path` via lossy UTF-8 conversion.
918///
919/// # Errors
920///
921/// The return type carries [`MetricsError::EmptyRoot`] for forward
922/// compatibility, but the walker always pushes a synthetic top-level
923/// [`SpaceKind::Unit`][crate::SpaceKind] `FuncSpace` before walking,
924/// so this function does not return `Err` in practice today (see
925/// the variant doc).
926#[deprecated(
927 since = "0.0.26",
928 note = "Use `analyze(Source::new(lang, code).with_name(Some(name)), options)` instead — the path-positional shim derives the top-level FuncSpace name via lossy UTF-8 conversion and will be removed in a future release."
929)]
930// Hidden from rustdoc — see `metrics` above for the rationale (#256).
931#[doc(hidden)]
932pub fn metrics_with_options<'a, T: ParserTrait>(
933 parser: &'a T,
934 path: &'a Path,
935 options: MetricsOptions,
936) -> Result<FuncSpace, MetricsError> {
937 // Backwards-compat shim: derive the top-level name from `path` via
938 // lossy UTF-8 conversion, matching pre-#254 behaviour. The new
939 // `analyze` entry point lets callers supply a name explicitly.
940 metrics_inner(parser, Some(path.to_string_lossy().into_owned()), options)
941}
942
943// Per-node metric dispatch. Each `compute` call is paired with a bit
944// check against the caller's selection. The bit tests are cheap
945// (single AND-and-compare on a u16) and an unselected metric saves
946// both the call overhead and any per-node text-slice / token-table
947// work the metric does internally — Halstead in particular owns
948// `HalsteadMaps` allocations and is the headline cost saving for
949// `with_only(&[Metric::Loc])`. Extracted from `metrics_inner` so the
950// walker stays under clippy's 100-line ceiling.
951#[inline]
952fn compute_per_node<'a, T: ParserTrait>(
953 state: &mut State<'a>,
954 node: &Node<'a>,
955 code: &'a [u8],
956 selected: MetricSet,
957 func_space: bool,
958 unit: bool,
959 nesting_map: &mut HashMap<usize, (usize, usize, usize)>,
960) {
961 let last = &mut state.space;
962 if selected.contains(Metric::Cognitive) {
963 T::Cognitive::compute(node, code, &mut last.metrics.cognitive, nesting_map);
964 }
965 if selected.contains(Metric::Cyclomatic) {
966 T::Cyclomatic::compute(node, code, &mut last.metrics.cyclomatic);
967 }
968 if selected.contains(Metric::Halstead) {
969 T::Halstead::compute(node, code, &mut state.halstead_maps);
970 }
971 if selected.contains(Metric::Loc) {
972 T::Loc::compute(node, &mut last.metrics.loc, func_space, unit);
973 }
974 if selected.contains(Metric::Nom) {
975 T::Nom::compute(node, &mut last.metrics.nom);
976 }
977 if selected.contains(Metric::Tokens) {
978 T::Tokens::compute(node, &mut last.metrics.tokens);
979 }
980 if selected.contains(Metric::NArgs) {
981 T::NArgs::compute(node, &mut last.metrics.nargs);
982 }
983 if selected.contains(Metric::Exit) {
984 T::Exit::compute(node, code, &mut last.metrics.nexits);
985 }
986 if selected.contains(Metric::Abc) {
987 T::Abc::compute(node, code, &mut last.metrics.abc);
988 }
989 if selected.contains(Metric::Npm) {
990 T::Npm::compute(node, code, &mut last.metrics.npm);
991 }
992 if selected.contains(Metric::Npa) {
993 T::Npa::compute(node, code, &mut last.metrics.npa);
994 }
995}
996
997pub(crate) fn metrics_inner<T: ParserTrait>(
998 parser: &T,
999 name: Option<String>,
1000 options: MetricsOptions,
1001) -> Result<FuncSpace, MetricsError> {
1002 // The suppression-warning diagnostic uses the caller-supplied
1003 // name when present; otherwise we fall back to a placeholder so
1004 // the warning still locates the offending line. All path-based
1005 // shims pass a lossy-stringified path here, matching pre-#254
1006 // behaviour byte-for-byte.
1007 let diagnostic_path = name.as_deref().unwrap_or("<input>");
1008 let selected = options.metrics;
1009 let code = parser.get_code();
1010 let node = parser.get_root();
1011 let mut cursor = node.cursor();
1012 let mut stack = Vec::new();
1013 let mut children = Vec::new();
1014 let mut state_stack: Vec<State> = Vec::new();
1015 let mut last_level = 0;
1016 // Initialize nesting_map used for storing nesting information for cognitive
1017 // Three type of nesting info: conditionals, functions and lambdas
1018 let mut nesting_map = HashMap::<usize, (usize, usize, usize)>::default();
1019 nesting_map.insert(node.id(), (0, 0, 0));
1020
1021 // Suppression markers are resolved inline during the walk rather
1022 // than queued for a post-finalize pass. When we visit a comment
1023 // node, the active `state_stack` already encodes the comment's
1024 // syntactic context: the topmost `SpaceKind::Function` entry is
1025 // the *innermost enclosing function* by construction, with no
1026 // ambiguity when sibling functions share a source line (issue
1027 // #289). The root `Unit` state — always at index 0 once the walk
1028 // has visited the AST root — owns file-scoped markers.
1029
1030 // Some grammars (e.g. tree-sitter-mozcpp on unparseable input) return a
1031 // non-Unit root. Wrap with a synthetic Unit space spanning the whole
1032 // file so the top-level FuncSpace upholds the LOC invariant
1033 // `blank = sloc - ploc - only_comment_lines >= 0`.
1034 if T::Getter::get_space_kind_with_code(&node, code) != SpaceKind::Unit {
1035 let mut synthetic = FuncSpace::new::<T::Getter>(&node, code, SpaceKind::Unit, selected);
1036 synthetic
1037 .metrics
1038 .loc
1039 .init_unit_span(node.start_row(), node.end_row());
1040 state_stack.push(State {
1041 space: synthetic,
1042 halstead_maps: HalsteadMaps::new(),
1043 });
1044 }
1045
1046 stack.push((node, 0));
1047
1048 while let Some((node, level)) = stack.pop() {
1049 // Prune test-only subtrees before any per-metric work runs.
1050 // The hook is gated on `exclude_tests` so the default
1051 // `metrics()` entry point keeps emitting the pre-#182
1052 // numbers byte-for-byte.
1053 if options.exclude_tests && T::Checker::should_skip_subtree(&node, code) {
1054 continue;
1055 }
1056
1057 if level < last_level {
1058 finalize::<T>(&mut state_stack, last_level - level, selected);
1059 last_level = level;
1060 }
1061
1062 let kind = T::Getter::get_space_kind_with_code(&node, code);
1063
1064 let func_space = T::Checker::promotes_to_func_space_with_code(&node, code);
1065 let unit = kind == SpaceKind::Unit;
1066
1067 let new_level = if func_space {
1068 let state = State {
1069 space: FuncSpace::new::<T::Getter>(&node, code, kind, selected),
1070 halstead_maps: HalsteadMaps::new(),
1071 };
1072 state_stack.push(state);
1073 last_level = level + 1;
1074 last_level
1075 } else {
1076 level
1077 };
1078
1079 // Scan comment nodes for suppression markers and apply them
1080 // immediately against `state_stack`. Doing this inline (rather
1081 // than queueing for a post-walk pass keyed on line number)
1082 // pins each marker to the syntactically nearest enclosing
1083 // function space — the only frame on the stack that the
1084 // grammar nested the comment inside. Line-only matching was
1085 // ambiguous when two sibling functions shared a source line
1086 // and the first-by-source-order won regardless of which body
1087 // actually contained the comment (issue #289).
1088 if T::Checker::is_comment(&node)
1089 && let Some(text) = node.utf8_text(code)
1090 {
1091 match parse_suppression_marker(text) {
1092 Ok(Some(s)) => apply_suppression(&mut state_stack, &s),
1093 Ok(None) => {}
1094 Err(e) => {
1095 // Logged but non-fatal so a typo in one file
1096 // cannot derail a workspace-wide walk. The
1097 // malformed marker is dropped (no scope attached),
1098 // which is the conservative behaviour: a typo
1099 // should not accidentally silence anything. The
1100 // `+ 1` converts tree-sitter's 0-based rows to the
1101 // 1-based line numbers `FuncSpace::start_line` and
1102 // the rest of this module report.
1103 eprintln!("warning: {}:{}: {e}", diagnostic_path, node.start_row() + 1);
1104 }
1105 }
1106 }
1107
1108 if let Some(state) = state_stack.last_mut() {
1109 compute_per_node::<T>(
1110 state,
1111 &node,
1112 code,
1113 selected,
1114 func_space,
1115 unit,
1116 &mut nesting_map,
1117 );
1118 }
1119
1120 cursor.reset(&node);
1121 if cursor.goto_first_child() {
1122 loop {
1123 children.push((cursor.node(), new_level));
1124 if !cursor.goto_next_sibling() {
1125 break;
1126 }
1127 }
1128 for child in children.drain(..).rev() {
1129 stack.push(child);
1130 }
1131 }
1132 }
1133
1134 finalize::<T>(&mut state_stack, usize::MAX, selected);
1135
1136 // Reserved error path: `MetricsError::EmptyRoot` is unreachable
1137 // today because the synthetic Unit push above (and every
1138 // language's translation_unit / module / source_file being a
1139 // `func_space`) keeps the state stack non-empty for every input,
1140 // including empty / whitespace-only / comment-only sources. The
1141 // `ok_or` is retained so a future walker change that legitimately
1142 // drains the stack surfaces a distinct error variant rather than
1143 // panicking or returning a bare `None`. See `MetricsError::EmptyRoot`
1144 // for the matching variant doc.
1145 let mut state = state_stack.pop().ok_or(MetricsError::EmptyRoot)?;
1146 state.space.name = name;
1147 Ok(state.space)
1148}
1149
1150fn apply_suppression(state_stack: &mut [State], suppression: &Suppression) {
1151 // Both arms ultimately call `merge` on a `FuncSpace::suppressed`;
1152 // they differ only in *which* frame on the stack to target.
1153 //
1154 // - `File`: the topmost `Unit` frame — by construction the root
1155 // `state_stack[0]`, but we match on `SpaceKind::Unit` rather
1156 // than index 0 so the invariant is runtime-checked. The
1157 // synthetic Unit pushed by `metrics_inner` for non-Unit-root
1158 // grammars and every translation-unit/module/source-file being
1159 // a `func_space` keep `state_stack[0]` populated for every
1160 // input; a marker with no Unit frame on the stack would be a
1161 // bug elsewhere and is silently dropped rather than landing on
1162 // an arbitrary frame.
1163 // - `Function`: the topmost `SpaceKind::Function` frame — the
1164 // syntactically nearest enclosing function body. Class / struct
1165 // / trait spaces are skipped so a marker at class scope but
1166 // outside any method does not silence thresholds on the entire
1167 // class; authors who want class-wide suppression use `bca:
1168 // suppress-file` or repeat the marker on each method. A marker
1169 // outside every function body finds no `Function` frame and is
1170 // silently dropped — the issue's "no enclosing function" rule.
1171 let target = match suppression.kind {
1172 SuppressionKind::File => state_stack
1173 .iter_mut()
1174 .find(|s| matches!(s.space.kind, SpaceKind::Unit)),
1175 SuppressionKind::Function => state_stack
1176 .iter_mut()
1177 .rev()
1178 .find(|s| matches!(s.space.kind, SpaceKind::Function)),
1179 };
1180 if let Some(state) = target {
1181 state.space.suppressed.merge(&suppression.scope);
1182 }
1183}
1184
1185/// Per-traversal options for [`metrics_with_options`].
1186///
1187/// Marked `#[non_exhaustive]` so future option fields can land
1188/// additively. Downstream callers must construct via the builder
1189/// methods rather than struct-literal syntax (rustc rejects external
1190/// struct literals on non-exhaustive types with E0639, including the
1191/// `..Default::default()` spread form). The defaults preserve every
1192/// metric value emitted by the pre-#182 [`metrics`] entry point.
1193///
1194/// ```
1195/// use big_code_analysis::MetricsOptions;
1196/// let opts = MetricsOptions::default().with_exclude_tests(true);
1197/// ```
1198#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
1199#[non_exhaustive]
1200pub struct MetricsOptions {
1201 /// When true, the traversal asks the language module to skip
1202 /// test-only subtrees (e.g. Rust `#[test]` / `#[cfg(test)]`
1203 /// functions and modules). Only languages that override the
1204 /// internal `should_skip_subtree` hook honor this; others ignore
1205 /// the flag.
1206 pub exclude_tests: bool,
1207 /// Which metrics to compute. Defaults to [`MetricSet::all`] —
1208 /// every metric is enabled, matching the pre-#257 behaviour.
1209 /// Restrict via [`MetricsOptions::with_only`].
1210 pub metrics: MetricSet,
1211}
1212
1213impl MetricsOptions {
1214 /// Builder-style setter for [`MetricsOptions::exclude_tests`].
1215 ///
1216 /// Provided because `MetricsOptions` is `#[non_exhaustive]` — the
1217 /// struct-literal form is unavailable to downstream crates, so
1218 /// external callers chain `MetricsOptions::default()
1219 /// .with_exclude_tests(true)` instead.
1220 #[inline]
1221 #[must_use]
1222 pub fn with_exclude_tests(mut self, exclude_tests: bool) -> Self {
1223 self.exclude_tests = exclude_tests;
1224 self
1225 }
1226
1227 /// Restrict computation to the given metrics. Metrics outside
1228 /// this set are skipped during the walk; their `Stats` fields on
1229 /// [`CodeMetrics`] remain at their `Default` value and are
1230 /// elided from the [`Serialize`] output. Pass an empty slice to
1231 /// disable every metric (the walker still runs and produces the
1232 /// space tree, but no metric values are populated).
1233 ///
1234 /// # Dependencies
1235 ///
1236 /// Derived metrics implicitly pull in the inputs they require:
1237 ///
1238 /// - [`Metric::Mi`] adds [`Metric::Loc`], [`Metric::Cyclomatic`],
1239 /// [`Metric::Halstead`].
1240 /// - [`Metric::Wmc`] adds [`Metric::Cyclomatic`] and
1241 /// [`Metric::Nom`].
1242 ///
1243 /// This auto-resolution is silent: a caller asking for `Mi`
1244 /// alone gets a populated `Mi` value, not a zero. See
1245 /// [`Metric::dependencies`] for the source of truth.
1246 ///
1247 /// # Examples
1248 ///
1249 /// ```
1250 /// use big_code_analysis::{Metric, MetricsOptions};
1251 ///
1252 /// // Compute LoC only.
1253 /// let _opts = MetricsOptions::default().with_only(&[Metric::Loc]);
1254 ///
1255 /// // Compute Mi: Loc + Cyclomatic + Halstead are auto-added.
1256 /// let _opts = MetricsOptions::default().with_only(&[Metric::Mi]);
1257 /// ```
1258 #[inline]
1259 #[must_use]
1260 pub fn with_only(mut self, metrics: &[Metric]) -> Self {
1261 self.metrics = MetricSet::from_slice_with_deps(metrics);
1262 self
1263 }
1264
1265 /// Restrict computation to an already-resolved [`MetricSet`].
1266 ///
1267 /// # Caller responsibility
1268 ///
1269 /// The input set MUST be closed under
1270 /// [`Metric::dependencies`] before it reaches this builder.
1271 /// Use [`MetricSet::from_slice_with_deps`] to construct a
1272 /// dependency-closed set from a slice of metric names, or call
1273 /// [`MetricsOptions::with_only`] (which performs the closure
1274 /// internally) when you have a `&[Metric]` rather than a
1275 /// pre-built set.
1276 ///
1277 /// This builder is NOT equivalent to
1278 /// [`MetricsOptions::with_only`]: `with_only` runs the closure
1279 /// resolver; `with_metric_set` consumes the set verbatim and
1280 /// trusts the caller. The two methods are interchangeable only
1281 /// when the input is already closed.
1282 ///
1283 /// # Pitfall
1284 ///
1285 /// Passing an unresolved set silently corrupts derived metrics
1286 /// — the walker computes [`Metric::Mi`] or [`Metric::Wmc`]
1287 /// using zero-valued dependency inputs and emits a number with
1288 /// no error. For example:
1289 ///
1290 /// ```
1291 /// use big_code_analysis::{Metric, MetricSet, MetricsOptions};
1292 ///
1293 /// // WRONG: `Mi` selected without its dependencies — the
1294 /// // resulting MI value is garbage (formula divides by a
1295 /// // zero-valued Loc / Halstead).
1296 /// let bad = MetricSet::empty().with(Metric::Mi);
1297 /// let _opts = MetricsOptions::default().with_metric_set(bad);
1298 ///
1299 /// // RIGHT: closure resolved upstream; `with_metric_set`
1300 /// // attaches the already-closed set.
1301 /// let good = MetricSet::from_slice_with_deps(&[Metric::Mi]);
1302 /// let _opts = MetricsOptions::default().with_metric_set(good);
1303 /// ```
1304 ///
1305 /// The closure-resolved form above is equivalent to
1306 /// `MetricsOptions::with_only(&[Metric::Mi])` — prefer that
1307 /// builder if you don't already have a `MetricSet`.
1308 #[inline]
1309 #[must_use]
1310 pub fn with_metric_set(mut self, metrics: MetricSet) -> Self {
1311 self.metrics = metrics;
1312 self
1313 }
1314}
1315
1316/// Configuration options for computing the metrics of a code.
1317///
1318/// Marked `#[non_exhaustive]` so future config fields can land
1319/// additively. Downstream callers must construct via the builder
1320/// methods rather than struct-literal syntax (rustc rejects external
1321/// struct literals on non-exhaustive types with E0639, including the
1322/// `..Default::default()` spread form).
1323///
1324/// ```
1325/// use std::path::PathBuf;
1326/// use big_code_analysis::{MetricsCfg, MetricsOptions};
1327///
1328/// let cfg = MetricsCfg::new(PathBuf::from("lib.rs"))
1329/// .with_options(MetricsOptions::default().with_exclude_tests(true));
1330/// ```
1331#[derive(Debug, Default)]
1332#[non_exhaustive]
1333pub struct MetricsCfg {
1334 /// Path to the file containing the code
1335 pub path: PathBuf,
1336 /// Per-traversal options forwarded to [`metrics_with_options`].
1337 pub options: MetricsOptions,
1338}
1339
1340impl MetricsCfg {
1341 /// Build a `MetricsCfg` for `path` with default options. Chain
1342 /// [`MetricsCfg::with_options`] to override the per-traversal
1343 /// flags. Required because `MetricsCfg` is `#[non_exhaustive]` —
1344 /// downstream crates cannot use the struct-literal form.
1345 #[inline]
1346 #[must_use]
1347 pub fn new(path: PathBuf) -> Self {
1348 Self {
1349 path,
1350 ..Default::default()
1351 }
1352 }
1353
1354 /// Builder-style setter for [`MetricsCfg::options`].
1355 #[inline]
1356 #[must_use]
1357 pub fn with_options(mut self, options: MetricsOptions) -> Self {
1358 self.options = options;
1359 self
1360 }
1361}
1362
1363/// Type tag identifying the metric-computation action; carries no data.
1364pub struct Metrics {
1365 _guard: (),
1366}
1367
1368impl Callback for Metrics {
1369 type Res = std::io::Result<()>;
1370 type Cfg = MetricsCfg;
1371
1372 fn call<T: ParserTrait>(cfg: Self::Cfg, parser: &T) -> Self::Res {
1373 // `MetricsCfg::path` is the legacy filesystem-keyed identity
1374 // for this callback. The new `analyze` entry point fully
1375 // supersedes the path-positional API, but this internal
1376 // callback site still has a `&Path` in hand, so use the
1377 // shared `metrics_inner` directly with a lossy-string name —
1378 // matching pre-#254 behaviour byte-for-byte.
1379 let name = Some(cfg.path.to_string_lossy().into_owned());
1380 match metrics_inner(parser, name, cfg.options) {
1381 Ok(space) => dump_root(&space),
1382 Err(_) => Ok(()),
1383 }
1384 }
1385}
1386
1387#[cfg(test)]
1388// The lossy-path / synthetic-Unit tests below intentionally exercise
1389// the deprecated path-positional entry points so we have regression
1390// coverage on the shim even after the recommended seam moved to
1391// `analyze(Source { ... }, ...)`. Scope the deprecation allowance to
1392// the whole module so individual tests do not need per-call
1393// attributes.
1394#[allow(deprecated)]
1395#[allow(
1396 clippy::float_cmp,
1397 clippy::cast_precision_loss,
1398 clippy::cast_possible_truncation,
1399 clippy::cast_sign_loss,
1400 clippy::similar_names,
1401 clippy::doc_markdown,
1402 clippy::needless_raw_string_hashes,
1403 clippy::too_many_lines
1404)]
1405mod tests {
1406 use crate::MetricsOptions;
1407 use crate::metrics;
1408 use crate::{CppParser, ParserTrait, SpaceKind, check_func_space};
1409
1410 /// Positive coverage for the C++ function-space predicates on the
1411 /// only `function_definition` `kind_id` (343) that
1412 /// `tree-sitter-mozcpp` currently emits. The structural
1413 /// `FunctionDefinition*` contract for the aliased kind_ids
1414 /// (489/491/494) that no observed input parses to is documented
1415 /// at the predicate call sites in `src/checker.rs` and
1416 /// `src/getter.rs` — see issue #285.
1417 #[test]
1418 fn cpp_function_definition_is_classified_as_function() {
1419 use crate::Cpp;
1420 use crate::checker::Checker;
1421 use crate::getter::Getter;
1422 use crate::langs::CppCode;
1423 use crate::traits::Search;
1424
1425 let source = "int the_func(int x) { return x; }\n";
1426 let path = std::path::PathBuf::from("fd.cc");
1427 let parser = CppParser::new(source.as_bytes().to_vec(), &path, None);
1428 let root = parser.get_root();
1429
1430 // Walk for any `FunctionDefinition*` variant (FD/FD2/FD3/FD4)
1431 // so the test stays valid if a future grammar bump starts
1432 // emitting one of the higher-numbered aliases.
1433 let fn_node = root
1434 .first_occurrence(|id| {
1435 Cpp::FunctionDefinition == id
1436 || Cpp::FunctionDefinition2 == id
1437 || Cpp::FunctionDefinition3 == id
1438 || Cpp::FunctionDefinition4 == id
1439 })
1440 .expect("parse must produce a function_definition node");
1441
1442 assert!(
1443 CppCode::is_func(&fn_node),
1444 "is_func must return true for a function_definition"
1445 );
1446 assert!(
1447 CppCode::is_func_space(&fn_node),
1448 "is_func_space must return true for a function_definition"
1449 );
1450 assert_eq!(
1451 CppCode::get_space_kind(&fn_node),
1452 SpaceKind::Function,
1453 "get_space_kind must classify function_definition as Function"
1454 );
1455 assert_eq!(
1456 CppCode::get_func_space_name(&fn_node, source.as_bytes()),
1457 Some("the_func"),
1458 "get_func_space_name must extract the declarator identifier"
1459 );
1460 }
1461
1462 #[test]
1463 fn c_scope_resolution_operator() {
1464 check_func_space::<CppParser, _>(
1465 "void Foo::bar(){
1466 return;
1467 }",
1468 "foo.c",
1469 |func_space| {
1470 insta::assert_json_snapshot!(
1471 func_space.spaces[0].name,
1472 @r###""Foo::bar""###
1473 );
1474 },
1475 );
1476 }
1477
1478 /// Regression for issue #80 — when tree-sitter-mozcpp returns a non-Unit
1479 /// root (e.g. an `ERROR` root for code it cannot fully parse, as
1480 /// happens for parts of DeepSpeech's KenLM and OpenFst sources), the
1481 /// top-level `FuncSpace` must still be a `Unit` spanning the whole
1482 /// file, with `blank >= 0` and `sloc >= ploc`.
1483 #[test]
1484 fn cpp_error_root_yields_unit_top_level_space() {
1485 // This snippet (a chunk of kenlm/lm/model.hh shape) is rejected by
1486 // tree-sitter-mozcpp as a clean translation_unit and surfaces as an
1487 // ERROR root node in the parse tree. Verified at the time of writing
1488 // against tree-sitter-mozcpp 0.20.4.
1489 let source = "#ifndef A\n\
1490 namespace a { namespace b { namespace c {\n\
1491 template <class S, class V> class C : publi\n";
1492
1493 let path = std::path::PathBuf::from("error_root.cc");
1494 let parser = CppParser::new(source.as_bytes().to_vec(), &path, None);
1495 // Sanity: the grammar really does fall back to a non-Unit root for
1496 // this snippet — otherwise the synthetic-Unit code path is not
1497 // exercised by this test.
1498 assert!(
1499 parser.get_root().0.is_error(),
1500 "test premise broken: grammar must yield ERROR root for this snippet"
1501 );
1502
1503 let space = metrics(&parser, &path).unwrap();
1504
1505 assert_eq!(
1506 space.kind,
1507 SpaceKind::Unit,
1508 "top-level FuncSpace must be Unit, not {:?}",
1509 space.kind
1510 );
1511
1512 let loc = &space.metrics.loc;
1513 let sloc = loc.sloc();
1514 let ploc = loc.ploc();
1515 let blank = loc.blank();
1516 let line_count = source.lines().count();
1517
1518 assert!(
1519 sloc >= ploc,
1520 "sloc ({sloc}) must be >= ploc ({ploc}) for the file-level space"
1521 );
1522 assert!(blank >= 0.0, "blank ({blank}) must be >= 0");
1523 assert_eq!(
1524 sloc as usize, line_count,
1525 "sloc ({sloc}) should match the file's line count ({line_count})"
1526 );
1527 }
1528
1529 /// Lesson-9 contract (`docs/development/lessons_learned.md` §9,
1530 /// issue #193): for every supported language, parsing any input —
1531 /// including malformed or truncated — must yield a file-level
1532 /// `FuncSpace` whose `kind == SpaceKind::Unit` with `sloc >= ploc`
1533 /// and `blank >= 0`.
1534 ///
1535 /// This helper pins the **contract** at the public API surface
1536 /// (`metrics()` always returns a `Unit` top-level space). For most
1537 /// grammars the parse root is already the canonical translation-
1538 /// unit kind regardless of input, so the synthetic-Unit wrapper
1539 /// (`src/spaces.rs:~385`) is not actually exercised by tests
1540 /// using this helper alone. They serve as future-proofing: a
1541 /// grammar bump that starts promoting an inner kind to root on
1542 /// partial input would fail here before shipping a non-`Unit`
1543 /// top-level space to downstream consumers.
1544 ///
1545 /// Tests that need to exercise the synthetic-Unit wrapper itself
1546 /// (i.e., the path triggered by an `ERROR`-root parse) must also
1547 /// assert `parser.get_root().0.is_error()` before calling this
1548 /// helper. See `cpp_error_root_yields_unit_top_level_space` and
1549 /// `lua_partial_input_yields_synthetic_unit_wrapper` — those two
1550 /// are the only tests in the corpus that today exercise the
1551 /// wrapper path. Issue #220 tracks finding additional per-grammar
1552 /// fixtures that surface ERROR roots so each language can have
1553 /// both a contract test and a wrapper-exercising test.
1554 fn assert_top_level_space_is_unit_contract<P: ParserTrait>(source: &str, filename: &str) {
1555 let path = std::path::PathBuf::from(filename);
1556 let parser = P::new(source.as_bytes().to_vec(), &path, None);
1557 let space = metrics(&parser, &path).expect("metrics must yield a top-level space");
1558 assert_eq!(
1559 space.kind,
1560 SpaceKind::Unit,
1561 "top-level FuncSpace for {filename:?} must be Unit, not {:?}",
1562 space.kind
1563 );
1564 let loc = &space.metrics.loc;
1565 let sloc = loc.sloc();
1566 let ploc = loc.ploc();
1567 let blank = loc.blank();
1568 assert!(
1569 sloc >= ploc,
1570 "sloc ({sloc}) must be >= ploc ({ploc}) for the file-level space of {filename:?}",
1571 );
1572 assert!(
1573 blank >= 0.0,
1574 "blank ({blank}) must be >= 0 for the file-level space of {filename:?}",
1575 );
1576 }
1577
1578 /// Like [`assert_top_level_space_is_unit_contract`] but additionally
1579 /// asserts the parse root is an `ERROR` node, so the test actually
1580 /// exercises the synthetic-Unit wrapper in `metrics()` rather than
1581 /// the contract-only path. Use this for languages where a fixture
1582 /// is known to make the grammar return ERROR (currently: Lua, C++
1583 /// via mozcpp).
1584 fn assert_partial_input_yields_synthetic_unit_wrapper<P: ParserTrait>(
1585 source: &str,
1586 filename: &str,
1587 ) {
1588 let path = std::path::PathBuf::from(filename);
1589 let parser = P::new(source.as_bytes().to_vec(), &path, None);
1590 assert!(
1591 parser.get_root().0.is_error(),
1592 "test premise broken: grammar must yield ERROR root for {filename:?}",
1593 );
1594 assert_top_level_space_is_unit_contract::<P>(source, filename);
1595 }
1596
1597 #[test]
1598 fn python_top_level_space_is_unit_contract() {
1599 assert_top_level_space_is_unit_contract::<crate::PythonParser>(
1600 "def foo(x):\n return x +\n",
1601 "partial.py",
1602 );
1603 }
1604
1605 #[test]
1606 fn javascript_top_level_space_is_unit_contract() {
1607 assert_top_level_space_is_unit_contract::<crate::JavascriptParser>(
1608 "function foo(x) {\n return x +\n",
1609 "partial.js",
1610 );
1611 }
1612
1613 #[test]
1614 fn mozjs_top_level_space_is_unit_contract() {
1615 assert_top_level_space_is_unit_contract::<crate::MozjsParser>(
1616 "function foo(x) {\n return x +\n",
1617 "partial.js",
1618 );
1619 }
1620
1621 #[test]
1622 fn typescript_top_level_space_is_unit_contract() {
1623 assert_top_level_space_is_unit_contract::<crate::TypescriptParser>(
1624 "function foo(x: number): number {\n return x +\n",
1625 "partial.ts",
1626 );
1627 }
1628
1629 #[test]
1630 fn tsx_top_level_space_is_unit_contract() {
1631 assert_top_level_space_is_unit_contract::<crate::TsxParser>(
1632 "function Foo(x: number): JSX.Element {\n return <div>{x +\n",
1633 "partial.tsx",
1634 );
1635 }
1636
1637 #[test]
1638 fn java_top_level_space_is_unit_contract() {
1639 assert_top_level_space_is_unit_contract::<crate::JavaParser>(
1640 "class Foo {\n void bar(int x) {\n return x +\n",
1641 "Partial.java",
1642 );
1643 }
1644
1645 #[test]
1646 fn kotlin_top_level_space_is_unit_contract() {
1647 assert_top_level_space_is_unit_contract::<crate::KotlinParser>(
1648 "class Foo {\n fun bar(x: Int): Int {\n return x +\n",
1649 "Partial.kt",
1650 );
1651 }
1652
1653 #[test]
1654 fn go_top_level_space_is_unit_contract() {
1655 assert_top_level_space_is_unit_contract::<crate::GoParser>(
1656 "package main\nfunc foo(x int) int {\n return x +\n",
1657 "partial.go",
1658 );
1659 }
1660
1661 #[test]
1662 fn rust_top_level_space_is_unit_contract() {
1663 assert_top_level_space_is_unit_contract::<crate::RustParser>(
1664 "fn foo(x: i32) -> i32 {\n return x +\n",
1665 "partial.rs",
1666 );
1667 }
1668
1669 #[test]
1670 fn csharp_top_level_space_is_unit_contract() {
1671 assert_top_level_space_is_unit_contract::<crate::CsharpParser>(
1672 "class Foo {\n void Bar(int x) {\n return x +\n",
1673 "Partial.cs",
1674 );
1675 }
1676
1677 #[test]
1678 fn bash_top_level_space_is_unit_contract() {
1679 assert_top_level_space_is_unit_contract::<crate::BashParser>(
1680 "function foo() {\n echo \"x +\n",
1681 "partial.sh",
1682 );
1683 }
1684
1685 /// Lua's grammar surfaces an `ERROR` root for this fixture
1686 /// (tree-sitter-lua 0.4.x), so this test exercises the
1687 /// synthetic-Unit wrapper directly, on par with the C++
1688 /// regression in `cpp_error_root_yields_unit_top_level_space`.
1689 /// The 16 sibling `*_top_level_space_is_unit_contract` tests
1690 /// only pin the public-API contract; only this and the C++ test
1691 /// actually trigger the wrapper code path. See #220.
1692 #[test]
1693 fn lua_partial_input_yields_synthetic_unit_wrapper() {
1694 assert_partial_input_yields_synthetic_unit_wrapper::<crate::LuaParser>(
1695 "function foo(x)\n return x +\n",
1696 "partial.lua",
1697 );
1698 }
1699
1700 #[test]
1701 fn tcl_top_level_space_is_unit_contract() {
1702 assert_top_level_space_is_unit_contract::<crate::TclParser>(
1703 "proc foo {x} {\n return [expr {$x +\n",
1704 "partial.tcl",
1705 );
1706 }
1707
1708 #[test]
1709 fn perl_top_level_space_is_unit_contract() {
1710 assert_top_level_space_is_unit_contract::<crate::PerlParser>(
1711 "sub foo {\n my $x = shift;\n return $x +\n",
1712 "partial.pl",
1713 );
1714 }
1715
1716 #[test]
1717 fn php_top_level_space_is_unit_contract() {
1718 assert_top_level_space_is_unit_contract::<crate::PhpParser>(
1719 "<?php\nfunction foo($x) {\n return $x +\n",
1720 "partial.php",
1721 );
1722 }
1723
1724 #[test]
1725 fn elixir_top_level_space_is_unit_contract() {
1726 assert_top_level_space_is_unit_contract::<crate::ElixirParser>(
1727 "defmodule Foo do\n def bar(x) do\n x +\n",
1728 "partial.ex",
1729 );
1730 }
1731
1732 // Regression for #275: the source-aware Getter must extract the
1733 // human-readable head name from each macro-shaped declaration.
1734 // The wave 2 implementation initially looked for an `Identifier` /
1735 // `Alias` / `Call` as a *direct* child of the outer Call, but the
1736 // tree-sitter-elixir grammar wraps the head in an `Arguments`
1737 // node, so every promoted Class / Function space was labelled
1738 // `<anonymous>` despite the source carrying a name.
1739 #[test]
1740 fn elixir_func_space_names_resolve_through_arguments_wrapper() {
1741 let src = "defmodule Foo.Bar do\n def hello(x), do: x\n defp helper, do: :ok\n defmodule Inner do\n def i, do: 1\n end\nend\n";
1742 let path = std::path::PathBuf::from("foo.ex");
1743 let parser = crate::ElixirParser::new(src.as_bytes().to_vec(), &path, None);
1744 let space = metrics(&parser, &path).expect("metrics must yield a top-level space");
1745
1746 // Top-level Unit -> file name.
1747 assert_eq!(space.name.as_deref(), Some("foo.ex"));
1748
1749 // Outer defmodule Class is named `Foo.Bar`.
1750 let outer = space.spaces.first().expect("outer class space");
1751 assert_eq!(outer.kind, SpaceKind::Class);
1752 assert_eq!(outer.name.as_deref(), Some("Foo.Bar"));
1753
1754 // Direct child names: `hello`, `helper`, `Inner`.
1755 let names: Vec<&str> = outer
1756 .spaces
1757 .iter()
1758 .map(|s| s.name.as_deref().unwrap_or("?"))
1759 .collect();
1760 assert_eq!(names, vec!["hello", "helper", "Inner"]);
1761
1762 // Nested defmodule's child def resolves too.
1763 let inner = outer
1764 .spaces
1765 .iter()
1766 .find(|s| s.kind == SpaceKind::Class)
1767 .expect("nested class");
1768 let inner_names: Vec<&str> = inner
1769 .spaces
1770 .iter()
1771 .map(|s| s.name.as_deref().unwrap_or("?"))
1772 .collect();
1773 assert_eq!(inner_names, vec!["i"]);
1774 }
1775
1776 /// `Preproc` and `Ccomment` are auxiliary grammars (preprocessor
1777 /// directives and comments respectively). They expose the same
1778 /// `ParserTrait` API, so the lesson-9 contract must hold for them
1779 /// too — a grammar bump promoting an inner construct to root would
1780 /// otherwise produce a non-`Unit` file-level space.
1781 #[test]
1782 fn preproc_top_level_space_is_unit_contract() {
1783 assert_top_level_space_is_unit_contract::<crate::PreprocParser>(
1784 "#ifdef FOO\n#define BAR(x) (x +\n",
1785 "partial.h",
1786 );
1787 }
1788
1789 #[test]
1790 fn ccomment_top_level_space_is_unit_contract() {
1791 assert_top_level_space_is_unit_contract::<crate::CcommentParser>(
1792 "/* unterminated comment\n spanning several\n",
1793 "partial.c",
1794 );
1795 }
1796
1797 /// Ruby uses tree-sitter-ruby which always returns a `program`
1798 /// (Unit) root regardless of input — the synthetic-Unit fallback
1799 /// path is unreachable today. The test pins the contract so a
1800 /// future grammar bump that starts promoting an inner kind to
1801 /// root would fail here.
1802 #[test]
1803 fn ruby_top_level_space_is_unit_contract() {
1804 // Truncated method definition (missing `end`) plus an
1805 // incomplete parameter list — tree-sitter-ruby treats both as
1806 // ERROR children of `program`.
1807 assert_top_level_space_is_unit_contract::<crate::RubyParser>(
1808 "class Foo\n def bar(\n x\n ",
1809 "partial.rb",
1810 );
1811 }
1812
1813 /// Regression for issue #128 — the deprecated path-positional
1814 /// entry point still derives the top-level name from `path` via
1815 /// lossy UTF-8 conversion. Even when the original bytes are not
1816 /// valid UTF-8 on Linux (valid on ext4/tmpfs/etc.), the top-level
1817 /// name must be `Some(...)` (never the parse-error sentinel
1818 /// `None`) so downstream JSON consumers can distinguish the two
1819 /// cases.
1820 ///
1821 /// After #254, callers who want to avoid the lossy round-trip
1822 /// pass an explicit `Source::name` to [`analyze`] (see the
1823 /// `analyze_in_memory_snippet_carries_caller_supplied_name`
1824 /// test below).
1825 #[cfg(unix)]
1826 #[test]
1827 fn non_utf8_path_yields_lossy_top_level_name() {
1828 use std::ffi::OsStr;
1829 use std::os::unix::ffi::OsStrExt;
1830 use std::path::PathBuf;
1831
1832 // Bytes that are not valid UTF-8 (lone continuation + invalid
1833 // start byte) framed with ASCII so the resulting filename
1834 // unambiguously contains the U+FFFD replacement character after
1835 // lossy conversion.
1836 let raw_bytes: &[u8] = b"foo_\xFF\xFE_bar.rs";
1837 let path = PathBuf::from(OsStr::from_bytes(raw_bytes));
1838 assert!(
1839 path.to_str().is_none(),
1840 "test premise broken: path must be non-UTF-8 for this test to be meaningful"
1841 );
1842
1843 let source = "int a = 42;";
1844 let parser = CppParser::new(source.as_bytes().to_vec(), &path, None);
1845 #[allow(deprecated)]
1846 let space = metrics(&parser, &path).expect("metrics must yield a top-level space");
1847
1848 let name = space
1849 .name
1850 .as_deref()
1851 .expect("top-level FuncSpace name must be Some, not the parse-error sentinel None");
1852 assert!(
1853 name.contains('\u{FFFD}'),
1854 "expected U+FFFD replacement char in lossy name, got {name:?}"
1855 );
1856 assert!(
1857 name.starts_with("foo_") && name.ends_with("_bar.rs"),
1858 "lossy name must preserve the surrounding ASCII bytes, got {name:?}"
1859 );
1860 }
1861
1862 /// `analyze` with a caller-supplied `Source::name` skips the
1863 /// lossy round-trip entirely — the top-level name is whatever
1864 /// string the caller passed, byte-for-byte. This is the
1865 /// post-#254 contract: callers analysing in-memory snippets no
1866 /// longer need a `Path` to identify the resulting `FuncSpace`.
1867 #[test]
1868 fn analyze_in_memory_snippet_carries_caller_supplied_name() {
1869 use crate::{Source, analyze};
1870
1871 let source = Source::new(crate::LANG::Cpp, b"int a = 42;")
1872 .with_name(Some("in-memory.cpp".to_owned()));
1873 let space = analyze(source, MetricsOptions::default())
1874 .expect("analyze must yield a top-level space");
1875 assert_eq!(
1876 space.name.as_deref(),
1877 Some("in-memory.cpp"),
1878 "top-level name must be the caller-supplied string, byte-for-byte"
1879 );
1880 }
1881
1882 /// `analyze` with `Source::name = None` leaves the top-level
1883 /// `FuncSpace::name` as `None`. The pre-#254 entry points always
1884 /// forced a `Some(...)`; the new API lets callers opt out.
1885 #[test]
1886 fn analyze_without_name_leaves_top_level_name_none() {
1887 use crate::{Source, analyze};
1888
1889 let space = analyze(
1890 Source::new(crate::LANG::Cpp, b"int a = 42;"),
1891 MetricsOptions::default(),
1892 )
1893 .expect("analyze must yield a top-level space");
1894 assert!(
1895 space.name.is_none(),
1896 "top-level name must be None when Source::name is None, got {:?}",
1897 space.name
1898 );
1899 }
1900
1901 // --- #306: file-scope suppression requires a Unit target ------
1902 //
1903 // `apply_suppression` historically picked `state_stack.first_mut()`
1904 // for the `File` arm, relying on the convention that the root
1905 // frame is always `SpaceKind::Unit`. The fix tightens that to an
1906 // explicit `SpaceKind::Unit` predicate so an accidentally
1907 // non-Unit root cannot silently swallow a file marker. These
1908 // tests pin the new behaviour: they construct a `State` slice by
1909 // hand (bypassing the parser) so the invariant violation is
1910 // observable in isolation.
1911
1912 fn make_state<'a>(kind: SpaceKind) -> super::State<'a> {
1913 // Synthetic State constructor for `apply_suppression` tests.
1914 // Line spans are zeroed because these tests only inspect
1915 // `space.kind` and `space.suppressed`; do not reuse this helper
1916 // for tests that depend on `start_line` / `end_line` /
1917 // `metrics`.
1918 super::State {
1919 space: super::FuncSpace {
1920 name: None,
1921 start_line: 0,
1922 end_line: 0,
1923 kind,
1924 spaces: Vec::new(),
1925 metrics: super::CodeMetrics::default(),
1926 suppressed: super::SuppressionScope::default(),
1927 },
1928 halstead_maps: crate::metrics::halstead::HalsteadMaps::new(),
1929 }
1930 }
1931
1932 fn file_suppression_all() -> crate::suppression::Suppression {
1933 crate::suppression::Suppression {
1934 kind: crate::suppression::SuppressionKind::File,
1935 scope: crate::suppression::SuppressionScope::All,
1936 source: crate::suppression::SuppressionSource::Native,
1937 }
1938 }
1939
1940 #[test]
1941 fn file_suppression_attaches_to_unit_frame() {
1942 let mut stack = vec![make_state(SpaceKind::Unit), make_state(SpaceKind::Function)];
1943 super::apply_suppression(&mut stack, &file_suppression_all());
1944 assert!(
1945 stack[0].space.suppressed.is_all(),
1946 "file marker (scope=All) must attach to the Unit root frame"
1947 );
1948 assert!(
1949 stack[1].space.suppressed.is_empty(),
1950 "file marker must not attach to a non-Unit frame"
1951 );
1952 }
1953
1954 #[test]
1955 fn file_suppression_skips_non_unit_root_frame() {
1956 // Synthetic stack where index 0 is *not* `Unit` — simulates
1957 // the broken-invariant case the explicit predicate guards
1958 // against. With the old `first_mut()` code this would
1959 // erroneously attach the file marker to a Function frame.
1960 let mut stack = vec![
1961 make_state(SpaceKind::Function),
1962 make_state(SpaceKind::Class),
1963 ];
1964 super::apply_suppression(&mut stack, &file_suppression_all());
1965 assert!(
1966 stack.iter().all(|s| s.space.suppressed.is_empty()),
1967 "file marker must be silently dropped when no Unit frame exists"
1968 );
1969 }
1970
1971 #[test]
1972 fn file_suppression_finds_unit_deeper_in_stack() {
1973 // The new predicate is "first frame whose kind is Unit",
1974 // not "first frame". If the root invariant is violated and
1975 // a Unit frame sits below a non-Unit frame, the marker must
1976 // still land on the Unit frame rather than being dropped.
1977 // Under the old `first_mut()` code, the Function root would
1978 // have absorbed the marker; this test pins the new search
1979 // semantics.
1980 let mut stack = vec![make_state(SpaceKind::Function), make_state(SpaceKind::Unit)];
1981 super::apply_suppression(&mut stack, &file_suppression_all());
1982 assert!(
1983 stack[0].space.suppressed.is_empty(),
1984 "non-Unit frame above the Unit must not absorb the file marker"
1985 );
1986 assert!(
1987 stack[1].space.suppressed.is_all(),
1988 "file marker must land on the Unit frame even when not at index 0"
1989 );
1990 }
1991
1992 #[test]
1993 fn file_suppression_empty_stack_is_silent_noop() {
1994 // No frames on the stack — `apply_suppression` must not
1995 // panic and must remain a silent no-op. Reaching the end of
1996 // this body proves no-panic; the stack cannot grow through
1997 // `&mut [State]`, so an explicit `is_empty()` check would
1998 // be a dead assertion.
1999 let mut stack: Vec<super::State<'_>> = Vec::new();
2000 super::apply_suppression(&mut stack, &file_suppression_all());
2001 }
2002
2003 // --- #182: exclude_tests for Rust -----------------------------
2004 //
2005 // These exercise both flag values (`exclude_tests = false` is
2006 // the documented backward-compatible default; `true` opts in to
2007 // the new pruning). They are anchored on integer-valued
2008 // accessors (`nom_functions_sum`, `cyclomatic_sum`,
2009 // `cognitive_sum`, `n_operators`) rather than float magnitudes,
2010 // because Halstead floats are bit-brittle (lessons_learned.md).
2011
2012 mod exclude_tests_rust {
2013 use crate::metrics_with_options;
2014 use crate::{MetricsOptions, ParserTrait, RustParser};
2015 use std::path::PathBuf;
2016
2017 fn analyse(source: &str, exclude_tests: bool) -> crate::FuncSpace {
2018 let path = PathBuf::from("lib.rs");
2019 let parser = RustParser::new(source.as_bytes().to_vec(), &path, None);
2020 metrics_with_options(
2021 &parser,
2022 &path,
2023 MetricsOptions::default().with_exclude_tests(exclude_tests),
2024 )
2025 .expect("metrics must yield a top-level space")
2026 }
2027
2028 // Production function plus an outer-attribute `#[test]`
2029 // function. With pruning on, the unit-level counts must
2030 // drop to the production function alone.
2031 #[test]
2032 fn outer_test_attribute_elides_function() {
2033 let source = "\
2034fn prod() -> i32 { 1 + 2 }
2035
2036#[test]
2037fn t() { assert_eq!(1 + 1, 2); }
2038";
2039 let baseline = analyse(source, false);
2040 let pruned = analyse(source, true);
2041
2042 // Baseline: both functions counted (2 functions).
2043 assert_eq!(baseline.metrics.nom.functions_sum() as usize, 2);
2044 // Pruned: only the production function (1 function).
2045 assert_eq!(pruned.metrics.nom.functions_sum() as usize, 1);
2046 // Cyclomatic should also drop: prod has 1, test fn body
2047 // adds its own branches via assert_eq!. We use
2048 // non-strict inequality (`pruned <= baseline`) here so
2049 // grammar tweaks that flatten `assert_eq!` expansion to
2050 // zero cyclomatic branches don't make this test brittle;
2051 // the load-bearing pruning check is `functions_sum`
2052 // above.
2053 assert!(
2054 pruned.metrics.cyclomatic.cyclomatic_sum()
2055 <= baseline.metrics.cyclomatic.cyclomatic_sum()
2056 );
2057 }
2058
2059 // `#[cfg(test)] mod tests { fn helper() {} #[test] fn t() {}
2060 // }` — every function inside the gated module disappears.
2061 #[test]
2062 fn cfg_test_mod_elides_entire_module() {
2063 let source = "\
2064fn prod() -> i32 { 1 }
2065
2066#[cfg(test)]
2067mod tests {
2068 fn helper() -> i32 { 2 }
2069 fn another_helper() -> i32 { 3 }
2070 #[test] fn t() { assert_eq!(1, 1); }
2071}
2072";
2073 let baseline = analyse(source, false);
2074 let pruned = analyse(source, true);
2075
2076 // Baseline: prod + helper + another_helper + t = 4 functions.
2077 assert_eq!(baseline.metrics.nom.functions_sum() as usize, 4);
2078 // Pruned: only prod survives.
2079 assert_eq!(pruned.metrics.nom.functions_sum() as usize, 1);
2080 }
2081
2082 // `#[tokio::test]` is the most common async-runtime variant
2083 // and must be elided too. Baseline anchored at 2 so a grammar
2084 // regression that stops counting `async fn` cannot make this
2085 // test pass without pruning actually doing work.
2086 #[test]
2087 fn tokio_test_attribute_is_elided() {
2088 let source = "\
2089fn prod() -> i32 { 1 }
2090
2091#[tokio::test]
2092async fn async_t() { let _x = 1; }
2093";
2094 let baseline = analyse(source, false);
2095 let pruned = analyse(source, true);
2096 assert_eq!(baseline.metrics.nom.functions_sum() as usize, 2);
2097 assert_eq!(pruned.metrics.nom.functions_sum() as usize, 1);
2098 }
2099
2100 // `#[cfg(all(test, target_arch = \"x86_64\"))]` — the
2101 // attribute parser must accept commas inside `all(...)`.
2102 // Baseline anchored at 2 to guard against silent grammar
2103 // regressions (see `tokio_test_attribute_is_elided`).
2104 #[test]
2105 fn cfg_all_test_with_extras_is_elided() {
2106 let source = "\
2107fn prod() -> i32 { 1 }
2108
2109#[cfg(all(test, target_arch = \"x86_64\"))]
2110fn arch_specific_test() { let _x = 1; }
2111";
2112 let baseline = analyse(source, false);
2113 let pruned = analyse(source, true);
2114 assert_eq!(baseline.metrics.nom.functions_sum() as usize, 2);
2115 assert_eq!(pruned.metrics.nom.functions_sum() as usize, 1);
2116 }
2117
2118 // Plain prod-only file must be unchanged by either flag
2119 // value — i.e. the flag is genuinely a no-op when there's
2120 // no test code. Anchor the absolute count (2) so the
2121 // "they're equal" assertion can't be satisfied by both
2122 // values being 0.
2123 #[test]
2124 fn pure_production_unaffected_by_flag() {
2125 let source = "\
2126fn prod() -> i32 { 1 + 2 }
2127fn helper(x: i32) -> i32 { x * 2 }
2128";
2129 let baseline = analyse(source, false);
2130 let pruned = analyse(source, true);
2131 assert_eq!(baseline.metrics.nom.functions_sum() as usize, 2);
2132 assert_eq!(pruned.metrics.nom.functions_sum() as usize, 2);
2133 assert_eq!(
2134 baseline.metrics.cyclomatic.cyclomatic_sum(),
2135 pruned.metrics.cyclomatic.cyclomatic_sum(),
2136 );
2137 }
2138
2139 // Backward compat: with the flag off (the default), every
2140 // node is still counted even when the source contains
2141 // test items.
2142 #[test]
2143 fn default_flag_off_preserves_baseline() {
2144 let source = "\
2145fn prod() -> i32 { 1 }
2146
2147#[test]
2148fn t() { assert_eq!(1, 1); }
2149";
2150 let baseline_default = analyse(source, false);
2151 assert_eq!(baseline_default.metrics.nom.functions_sum() as usize, 2);
2152 }
2153
2154 // Stacked attributes: tree-sitter exposes multiple
2155 // `#[...]` decorations as a chain of `AttributeItem`
2156 // siblings before the decorated item. The matcher must
2157 // walk all of them, not just the immediately-preceding
2158 // one, so a `#[cfg(target_arch = "x86_64")]` on top of
2159 // `#[cfg(test)]` still prunes.
2160 #[test]
2161 fn stacked_attributes_walk_all_siblings() {
2162 let source = "\
2163fn prod() -> i32 { 1 }
2164
2165#[cfg(target_arch = \"x86_64\")]
2166#[cfg(test)]
2167fn t() { let _x = 1; }
2168";
2169 let baseline = analyse(source, false);
2170 let pruned = analyse(source, true);
2171 assert_eq!(baseline.metrics.nom.functions_sum() as usize, 2);
2172 assert_eq!(pruned.metrics.nom.functions_sum() as usize, 1);
2173 }
2174
2175 // Regression for #278. `test` was previously required to be
2176 // the first operand of `all(...)` / `any(...)`; forms like
2177 // `cfg(all(unix, test))` and `cfg(any(feature = "x", test))`
2178 // were silently kept. Baseline anchored at 3 (prod + two
2179 // gated fns) so a grammar regression cannot satisfy the test
2180 // without pruning doing real work.
2181 #[test]
2182 fn cfg_with_test_not_first_is_elided() {
2183 let source = "\
2184fn prod() -> i32 { 1 }
2185
2186#[cfg(all(unix, test))]
2187fn unix_only_test() { let _x = 1; }
2188
2189#[cfg(any(feature = \"slow\", test))]
2190fn slow_or_test() { let _x = 2; }
2191";
2192 let baseline = analyse(source, false);
2193 let pruned = analyse(source, true);
2194 assert_eq!(baseline.metrics.nom.functions_sum() as usize, 3);
2195 assert_eq!(pruned.metrics.nom.functions_sum() as usize, 1);
2196 }
2197
2198 // Negative coverage: attribute shapes that look like "test"
2199 // but must NOT trigger pruning. Production code marked with
2200 // `#[cfg(not(test))]`, a feature flag named "test", or a
2201 // user macro whose path contains "test" must survive
2202 // pruning intact.
2203 #[test]
2204 fn lookalike_attributes_are_not_pruned() {
2205 let source = "\
2206#[cfg(not(test))]
2207fn only_outside_tests() -> i32 { 1 }
2208
2209#[cfg(feature = \"test\")]
2210fn behind_test_feature() -> i32 { 2 }
2211
2212#[my_crate::test_helper]
2213fn decorated_helper() -> i32 { 3 }
2214
2215#[cfg(all(unix, not(test)))]
2216fn unix_prod_only() -> i32 { 4 }
2217";
2218 let pruned = analyse(source, true);
2219 // None of the four attributes mark test-only code.
2220 // All four functions must survive — particularly the
2221 // last one, which combines `not(test)` with another
2222 // operand (regression sibling to #278).
2223 assert_eq!(pruned.metrics.nom.functions_sum() as usize, 4);
2224 }
2225
2226 // Inner attribute on a module: `mod tests { #![cfg(test)] ... }`
2227 // is the idiomatic form when you want to put the gate inside
2228 // the module body rather than on the declaration. Baseline
2229 // anchored at 3 (prod + helper + t) so a grammar regression
2230 // that drops the module body cannot satisfy this test with
2231 // pruning disabled.
2232 #[test]
2233 fn inner_cfg_test_attribute_elides_module() {
2234 let source = "\
2235fn prod() -> i32 { 1 }
2236
2237mod tests {
2238 #![cfg(test)]
2239 fn helper() -> i32 { 2 }
2240 #[test] fn t() { assert_eq!(1, 1); }
2241}
2242";
2243 let baseline = analyse(source, false);
2244 let pruned = analyse(source, true);
2245 assert_eq!(baseline.metrics.nom.functions_sum() as usize, 3);
2246 assert_eq!(pruned.metrics.nom.functions_sum() as usize, 1);
2247 }
2248 }
2249
2250 // Non-Rust languages must ignore `exclude_tests = true` because
2251 // they don't override `should_skip_subtree`. This is the
2252 // "spot-check non-Rust" check from issue #182.
2253 mod exclude_tests_non_rust {
2254 use crate::metrics_with_options;
2255 use crate::{CppParser, MetricsOptions, ParserTrait};
2256 use std::path::PathBuf;
2257
2258 #[test]
2259 fn cpp_ignores_exclude_tests_flag() {
2260 let source = "\
2261int prod() { return 1; }
2262int helper() { return 2; }
2263";
2264 let path = PathBuf::from("foo.cpp");
2265 let parser = CppParser::new(source.as_bytes().to_vec(), &path, None);
2266 let baseline = metrics_with_options(
2267 &parser,
2268 &path,
2269 MetricsOptions::default().with_exclude_tests(false),
2270 )
2271 .expect("baseline must yield a top-level space");
2272 let parser = CppParser::new(source.as_bytes().to_vec(), &path, None);
2273 let pruned = metrics_with_options(
2274 &parser,
2275 &path,
2276 MetricsOptions::default().with_exclude_tests(true),
2277 )
2278 .expect("pruned must yield a top-level space");
2279 // Anchor on the absolute count (2) so a regression that
2280 // dropped all C++ functions wouldn't satisfy a bare
2281 // `baseline == pruned` check.
2282 assert_eq!(baseline.metrics.nom.functions_sum() as usize, 2);
2283 assert_eq!(pruned.metrics.nom.functions_sum() as usize, 2);
2284 }
2285 }
2286
2287 // --- #257: per-metric selection via with_only --------------------
2288 //
2289 // Exercise the gating bitfield through the recommended public
2290 // entry point (`analyze` + `Source`) rather than the deprecated
2291 // path-positional shims, so the tests pin the surface library
2292 // consumers actually use.
2293
2294 mod with_only {
2295 use crate::{LANG, Metric, MetricSet, MetricsOptions, Source, analyze};
2296
2297 const SOURCE: &str = "\
2298fn prod(x: i32) -> i32 {
2299 if x > 0 { x + 1 } else { x - 1 }
2300}
2301";
2302
2303 fn analyse(metrics: &[Metric]) -> crate::FuncSpace {
2304 let opts = MetricsOptions::default().with_only(metrics);
2305 analyze(
2306 Source::new(LANG::Rust, SOURCE.as_bytes()).with_name(Some("lib.rs".to_owned())),
2307 opts,
2308 )
2309 .expect("analyze must yield a top-level space")
2310 }
2311
2312 // `with_only(&[Metric::Loc])` records exactly that bit on
2313 // `CodeMetrics.selected` and leaves the dependent metrics
2314 // (cognitive / cyclomatic / halstead / ...) at their default
2315 // values. The dependent-metric anchors guard against the
2316 // walker silently running them anyway.
2317 #[test]
2318 fn loc_only_skips_other_metrics() {
2319 let full = analyze(
2320 Source::new(LANG::Rust, SOURCE.as_bytes()).with_name(Some("lib.rs".to_owned())),
2321 MetricsOptions::default(),
2322 )
2323 .expect("full analyze must yield a top-level space");
2324 let pruned = analyse(&[Metric::Loc]);
2325
2326 assert_eq!(
2327 pruned.metrics.selected(),
2328 MetricSet::empty().with(Metric::Loc),
2329 "with_only(&[Loc]) must record exactly the Loc bit"
2330 );
2331 // LoC populated: the production function span is >= 1 ploc.
2332 assert!(pruned.metrics.loc.ploc() >= 1.0);
2333 // Full run has > 0 cognitive/cyclomatic; pruned must be
2334 // exactly zero because the compute call is gated off.
2335 assert!(full.metrics.cognitive.cognitive_sum() > 0.0);
2336 assert_eq!(pruned.metrics.cognitive.cognitive_sum(), 0.0);
2337 assert!(full.metrics.cyclomatic.cyclomatic_sum() > 0.0);
2338 assert_eq!(pruned.metrics.cyclomatic.cyclomatic_sum(), 0.0);
2339 // Halstead operators count is at the default (0) — no
2340 // per-node token text was hashed.
2341 assert_eq!(pruned.metrics.halstead.u_operators(), 0.0);
2342 }
2343
2344 // Selecting `Mi` alone must auto-add its dependencies
2345 // (Loc + Cyclomatic + Halstead) — otherwise the MI formula
2346 // would compute against zero inputs and return a meaningless
2347 // score.
2348 #[test]
2349 fn mi_auto_pulls_dependencies() {
2350 let pruned = analyse(&[Metric::Mi]);
2351 let sel = pruned.metrics.selected();
2352 assert!(sel.contains(Metric::Mi));
2353 assert!(sel.contains(Metric::Loc), "Mi depends on Loc");
2354 assert!(sel.contains(Metric::Cyclomatic), "Mi depends on Cyclomatic");
2355 assert!(sel.contains(Metric::Halstead), "Mi depends on Halstead");
2356 // Unrelated metrics must NOT be selected.
2357 assert!(!sel.contains(Metric::Abc));
2358 assert!(!sel.contains(Metric::Tokens));
2359 // The dependencies must actually be populated — not just
2360 // selected. Otherwise the MI formula receives zero inputs
2361 // and `mi_original`'s `inputs_are_empty` short-circuit
2362 // returns 0.0, which would also be `is_finite`. We anchor
2363 // on the dependency values themselves (Loc ploc > 0,
2364 // Cyclomatic sum > 0) so the test would fail if the
2365 // walker silently skipped the dependency compute.
2366 assert!(
2367 pruned.metrics.loc.ploc() > 0.0,
2368 "Loc must have run (Mi dependency); got ploc=0"
2369 );
2370 assert!(
2371 pruned.metrics.cyclomatic.cyclomatic_sum() > 0.0,
2372 "Cyclomatic must have run (Mi dependency); got sum=0"
2373 );
2374 // With non-zero inputs feeding the MI formula, the result
2375 // is a finite non-zero number (the MI for this snippet is
2376 // around 150 — a positive value well above the 0.0 that
2377 // `inputs_are_empty` would short-circuit to).
2378 let mi_value = pruned.metrics.mi.mi_original();
2379 assert!(
2380 mi_value.is_finite() && mi_value != 0.0,
2381 "MI must be finite and non-default when its dependencies were computed; got {mi_value}"
2382 );
2383 }
2384
2385 // `with_only(&[Metric::Wmc])` auto-adds Cyclomatic + Nom.
2386 #[test]
2387 fn wmc_auto_pulls_dependencies() {
2388 let pruned = analyse(&[Metric::Wmc]);
2389 let sel = pruned.metrics.selected();
2390 assert!(sel.contains(Metric::Wmc));
2391 assert!(
2392 sel.contains(Metric::Cyclomatic),
2393 "Wmc depends on Cyclomatic"
2394 );
2395 assert!(sel.contains(Metric::Nom), "Wmc depends on Nom");
2396 assert!(!sel.contains(Metric::Halstead));
2397 // Dependency must actually be computed, not just bit-set:
2398 // selecting Wmc alone must populate Cyclomatic & Nom.
2399 assert!(
2400 pruned.metrics.cyclomatic.cyclomatic_sum() > 0.0,
2401 "Cyclomatic must have run (Wmc dependency); got sum=0"
2402 );
2403 assert!(
2404 pruned.metrics.nom.functions_sum() > 0.0,
2405 "Nom must have run (Wmc dependency); got functions_sum=0"
2406 );
2407 }
2408
2409 // `MetricsOptions::default()` selects every metric (#257's
2410 // default-preservation contract).
2411 #[test]
2412 fn default_options_select_every_metric() {
2413 let full = analyze(
2414 Source::new(LANG::Rust, SOURCE.as_bytes()).with_name(Some("lib.rs".to_owned())),
2415 MetricsOptions::default(),
2416 )
2417 .expect("analyze must yield a top-level space");
2418 assert_eq!(full.metrics.selected(), MetricSet::all());
2419 }
2420
2421 // JSON serialization elides unselected metrics. Anchored on
2422 // the field names emitted at the top level of the
2423 // `metrics` object rather than the full payload so a future
2424 // additive change (new metric, new sub-field) doesn't shift
2425 // unrelated tests.
2426 #[test]
2427 fn unselected_metrics_are_skipped_in_json() {
2428 let pruned = analyse(&[Metric::Loc]);
2429 let json =
2430 serde_json::to_value(&pruned.metrics).expect("CodeMetrics must serialize cleanly");
2431 let metrics = json.as_object().expect("CodeMetrics serializes as object");
2432
2433 assert!(
2434 metrics.contains_key("loc"),
2435 "loc must be serialized when selected"
2436 );
2437 for skipped in [
2438 "cognitive",
2439 "cyclomatic",
2440 "halstead",
2441 "nom",
2442 "tokens",
2443 "nargs",
2444 "nexits",
2445 "abc",
2446 "mi",
2447 "wmc",
2448 "npm",
2449 "npa",
2450 ] {
2451 assert!(
2452 !metrics.contains_key(skipped),
2453 "{skipped} must be elided when not selected"
2454 );
2455 }
2456 }
2457
2458 // Empty slice = nothing selected. Every metric must be
2459 // elided from JSON output; the space tree is still
2460 // produced.
2461 #[test]
2462 fn empty_slice_selects_nothing() {
2463 let pruned = analyse(&[]);
2464 assert_eq!(pruned.metrics.selected(), MetricSet::empty());
2465 let json =
2466 serde_json::to_value(&pruned.metrics).expect("CodeMetrics must serialize cleanly");
2467 let metrics = json.as_object().expect("CodeMetrics serializes as object");
2468 assert!(
2469 metrics.is_empty(),
2470 "with_only(&[]) must elide every metric, got keys {:?}",
2471 metrics.keys().collect::<Vec<_>>()
2472 );
2473 }
2474 }
2475}