Skip to main content

code_ranker_complexity/
lib.rs

1//! Central, language-agnostic complexity pass. Given a structural graph whose
2//! file nodes carry their absolute path as `id`, this reads each file, picks a
3//! `rust-code-analysis` parser by extension, and writes the metrics into the
4//! node's `attrs` as flat keys. It is the single place that knows
5//! rust-code-analysis; plugins emit structure only.
6//!
7//! The metric attribute dictionary it can produce is exposed via
8//! [`metric_specs`] so the orchestrator can declare it in the snapshot.
9
10use code_ranker_graph::attrs::num_attr;
11use code_ranker_plugin_api::{
12    attrs::ValueType,
13    graph::Graph,
14    level::{AttributeGroup, AttributeSpec, Direction, SpecRow, attr_dict, group},
15};
16use rust_code_analysis::{
17    FuncSpace, JavascriptParser, ParserTrait, PythonParser, RustParser, TsxParser,
18    TypescriptParser, metrics,
19};
20use std::collections::BTreeMap;
21use std::path::Path;
22
23/// Annotate every file node (`kind == "file"`) whose `id` is a readable source
24/// file of a known extension with complexity metrics. Returns the number of
25/// nodes annotated. Nodes whose file cannot be read/parsed are left untouched.
26pub fn annotate(graph: &mut Graph) -> usize {
27    let mut annotated = 0usize;
28    for node in &mut graph.nodes {
29        if node.kind != "file" {
30            continue;
31        }
32        let path = Path::new(&node.id);
33        let Ok(src) = std::fs::read(path) else {
34            continue;
35        };
36        let Some((space, tloc)) = parse_metrics(path, src) else {
37            continue;
38        };
39        write_metrics(node, &space, tloc);
40        annotated += 1;
41    }
42    annotated
43}
44
45/// True if any attribute gates an item to tests: `#[test]`, `#[bench]`, or
46/// `#[cfg(test)]` / `#[cfg(all(test, …))]` / `#[cfg(any(test, …))]`. A `test`
47/// **identifier** inside `cfg(...)` is what matches — `cfg(feature = "test")`
48/// (a string literal) does not.
49fn is_test_attr(attr: &syn::Attribute) -> bool {
50    if attr.path().is_ident("test") || attr.path().is_ident("bench") {
51        return true;
52    }
53    if attr.path().is_ident("cfg")
54        && let syn::Meta::List(list) = &attr.meta
55    {
56        return tokens_have_test_ident(list.tokens.clone());
57    }
58    false
59}
60
61/// Recursively scan a token stream for a bare `test` identifier (descends into
62/// `all(...)` / `any(...)` groups).
63fn tokens_have_test_ident(ts: proc_macro2::TokenStream) -> bool {
64    ts.into_iter().any(|t| match t {
65        proc_macro2::TokenTree::Ident(i) => i == "test",
66        proc_macro2::TokenTree::Group(g) => tokens_have_test_ident(g.stream()),
67        _ => false,
68    })
69}
70
71/// Visitor collecting the 1-based, inclusive line ranges of test-only items
72/// (`#[cfg(test)]` modules, `#[test]`/`#[cfg(test)]` fns), attribute line
73/// included. It recurses into ordinary modules to catch nested test modules but
74/// not into a test item it already captured.
75#[derive(Default)]
76struct TestSpans {
77    ranges: Vec<(usize, usize)>,
78}
79
80impl TestSpans {
81    fn record(&mut self, attrs: &[syn::Attribute], span: proc_macro2::Span) {
82        use syn::spanned::Spanned;
83        let start = attrs
84            .iter()
85            .map(|a| a.span().start().line)
86            .chain(std::iter::once(span.start().line))
87            .min()
88            .unwrap_or(0);
89        self.ranges.push((start, span.end().line));
90    }
91}
92
93impl<'ast> syn::visit::Visit<'ast> for TestSpans {
94    fn visit_item_mod(&mut self, m: &'ast syn::ItemMod) {
95        use syn::spanned::Spanned;
96        if m.attrs.iter().any(is_test_attr) {
97            self.record(&m.attrs, m.span());
98        } else {
99            syn::visit::visit_item_mod(self, m);
100        }
101    }
102    fn visit_item_fn(&mut self, f: &'ast syn::ItemFn) {
103        use syn::spanned::Spanned;
104        if f.attrs.iter().any(is_test_attr) {
105            self.record(&f.attrs, f.span());
106        }
107    }
108}
109
110/// Step 1 of the Rust line accounting: remove `#[cfg(test)]` / `#[test]` /
111/// `#[bench]` items so the production metrics (`sloc` / `cloc` / `blank` / `hk` /
112/// complexity) are then measured on production code only. Returns the production
113/// source **and** `tloc` — the number of test lines removed (the whole test
114/// region: attribute, body, braces). Parse failures or no test items return the
115/// source unchanged with `tloc = 0`.
116fn strip_cfg_test(src: &[u8]) -> (Vec<u8>, usize) {
117    use syn::visit::Visit;
118    let Ok(text) = std::str::from_utf8(src) else {
119        return (src.to_vec(), 0);
120    };
121    let Ok(file) = syn::parse_file(text) else {
122        return (src.to_vec(), 0);
123    };
124    let mut spans = TestSpans::default();
125    spans.visit_file(&file);
126    if spans.ranges.is_empty() {
127        return (src.to_vec(), 0);
128    }
129    let drop: std::collections::HashSet<usize> =
130        spans.ranges.iter().flat_map(|&(s, e)| s..=e).collect();
131    let tloc = drop.len();
132    let mut out: String = text
133        .lines()
134        .enumerate()
135        .filter(|(i, _)| !drop.contains(&(i + 1)))
136        .map(|(_, l)| l)
137        .collect::<Vec<_>>()
138        .join("\n");
139    out.push('\n');
140    (out.into_bytes(), tloc)
141}
142
143/// Pick a parser by file extension and compute the file's production `FuncSpace`
144/// plus `tloc` — the number of **test** lines (`#[cfg(test)]` / `#[test]` /
145/// `#[bench]`) removed before measuring. Only Rust strips tests, so `tloc` is
146/// `0.0` for every other language. (Step 1 strips tests; step 2, in
147/// `write_metrics`, counts sloc/cloc/blank on the production remainder.)
148fn parse_metrics(path: &Path, src: Vec<u8>) -> Option<(FuncSpace, f64)> {
149    let ext = path.extension().and_then(|e| e.to_str())?;
150    match ext {
151        "rs" => {
152            let (prod_src, tloc) = strip_cfg_test(&src);
153            let prod = metrics(&RustParser::new(prod_src, path, None), path)?;
154            Some((prod, tloc as f64))
155        }
156        "py" => metrics(&PythonParser::new(src, path, None), path).map(|s| (s, 0.0)),
157        "ts" | "mts" | "cts" => {
158            metrics(&TypescriptParser::new(src, path, None), path).map(|s| (s, 0.0))
159        }
160        "tsx" => metrics(&TsxParser::new(src, path, None), path).map(|s| (s, 0.0)),
161        "js" | "jsx" | "mjs" | "cjs" => {
162            metrics(&JavascriptParser::new(src, path, None), path).map(|s| (s, 0.0))
163        }
164        _ => None,
165    }
166}
167
168/// Write the metric attributes for one file node. Each value is omitted when it
169/// rounds to zero; the LOC block is gated on `sloc > 0` and the Halstead block
170/// on `volume > 0` (matching the historical behavior).
171fn write_metrics(node: &mut code_ranker_plugin_api::node::Node, s: &FuncSpace, tloc: f64) {
172    let m = &s.metrics;
173    let mut put = |key: &str, v: f64| {
174        let a = num_attr(v);
175        if matches!(&a, code_ranker_plugin_api::attrs::AttrValue::Int(0))
176            || matches!(&a, code_ranker_plugin_api::attrs::AttrValue::Float(f) if *f == 0.0)
177        {
178            node.attrs.remove(key);
179        } else {
180            node.attrs.insert(key.to_string(), a);
181        }
182    };
183
184    put("cyclomatic", m.cyclomatic.cyclomatic());
185    put("cognitive", m.cognitive.cognitive());
186    put("exits", m.nexits.exit());
187    let args = if m.nargs.fn_args() > 0.0 {
188        m.nargs.fn_args()
189    } else {
190        m.nargs.closure_args()
191    };
192    put("args", args);
193    put("closures", m.nom.closures());
194
195    put("mi", m.mi.mi_original());
196    put("mi_sei", m.mi.mi_sei());
197
198    // `sloc` here means *physical lines of code* — lines with real code, excluding
199    // blanks and comment-only lines (see this key's spec). rust-code-analysis names
200    // that `ploc()`; its `sloc()` is the total line count (already exposed as `loc`).
201    //
202    // NOTE: for Rust these four — `sloc` (physical), `lloc` (logical), `cloc`
203    // (comments), `blank` — are all measured on the *production* source, i.e.
204    // AFTER `strip_cfg_test` removed `#[cfg(test)]` / `#[test]` / `#[bench]`
205    // items. So none of them count lines from inline tests; those go to `tloc`.
206    let sloc = m.loc.ploc();
207    if sloc > 0.0 {
208        put("sloc", sloc);
209        put("lloc", m.loc.lloc());
210        put("cloc", m.loc.cloc());
211        put("blank", m.loc.blank());
212    }
213    // Test source lines (`#[cfg(test)]`/`#[test]`/`#[bench]`), the complement of
214    // `sloc`. Zero (non-Rust, or no inline tests) is dropped by `put`.
215    put("tloc", tloc);
216
217    let volume = m.halstead.volume();
218    if volume > 0.0 {
219        put("length", m.halstead.length());
220        put(
221            "vocabulary",
222            m.halstead.u_operators() + m.halstead.u_operands(),
223        );
224        put("volume", volume);
225        put("effort", m.halstead.effort());
226        put("time", m.halstead.time());
227        put("bugs", m.halstead.bugs());
228    }
229}
230
231/// The complexity metric attribute dictionary and its groups, fully enriched
232/// (label/name/short/description/formula/calc/direction) so the UI hardcodes no
233/// metric. The orchestrator merges these into each level's `node_attributes` /
234/// `attribute_groups` (then prunes to keys actually present) and overlays
235/// language thresholds. Coupling/cycle specs live in `code-ranker-graph`.
236pub fn metric_specs() -> (
237    BTreeMap<String, AttributeSpec>,
238    BTreeMap<String, AttributeGroup>,
239) {
240    use Direction::{HigherBetter, LowerBetter};
241    use ValueType::Float;
242    let specs = attr_dict(vec![
243        (
244            "cyclomatic",
245            SpecRow {
246                group: "complexity",
247                label: "Cyclomatic",
248                name: "Cyclomatic complexity",
249                short: "Cyclomatic",
250                description: "Number of linearly independent paths through the code. Higher values indicate complex branching logic.",
251                formula: "branches + 1",
252                direction: LowerBetter,
253                ..Default::default()
254            },
255        ),
256        (
257            "cognitive",
258            SpecRow {
259                group: "complexity",
260                label: "Cognitive",
261                name: "Cognitive complexity",
262                short: "Cognitive",
263                description: "Measures how difficult the code is to understand, accounting for nesting depth and non-structural control flow.",
264                direction: LowerBetter,
265                ..Default::default()
266            },
267        ),
268        (
269            "exits",
270            SpecRow {
271                group: "complexity",
272                label: "Exits",
273                name: "Exit points",
274                short: "Exits",
275                description: "Number of exit points (return/throw) in the unit.",
276                direction: LowerBetter,
277                ..Default::default()
278            },
279        ),
280        (
281            "args",
282            SpecRow {
283                group: "complexity",
284                label: "Args",
285                name: "Arguments",
286                short: "Args",
287                description: "Number of function / closure arguments.",
288                direction: LowerBetter,
289                ..Default::default()
290            },
291        ),
292        (
293            "closures",
294            SpecRow {
295                group: "complexity",
296                label: "Closures",
297                name: "Closures",
298                short: "Closures",
299                description: "Number of closures defined in the unit.",
300                direction: LowerBetter,
301                ..Default::default()
302            },
303        ),
304        (
305            "mi",
306            SpecRow {
307                group: "maintainability",
308                value_type: Float,
309                label: "MI",
310                name: "Maintainability index",
311                short: "MI",
312                description: "Maintainability Index (0–100, higher is more maintainable). Derived from Halstead volume, cyclomatic complexity, and SLOC.",
313                formula: "171 − 5.2·ln(volume) − 0.23·cyclomatic − 16.2·ln(sloc)",
314                direction: HigherBetter,
315                ..Default::default()
316            },
317        ),
318        (
319            "mi_sei",
320            SpecRow {
321                group: "maintainability",
322                value_type: Float,
323                label: "MI (SEI)",
324                name: "Maintainability (SEI)",
325                short: "MI SEI",
326                description: "SEI variant of the Maintainability Index — adds a bonus for comment density.",
327                formula: "MI + 50·sin(√(2.4 × comment-ratio))",
328                direction: HigherBetter,
329                ..Default::default()
330            },
331        ),
332        (
333            "sloc",
334            SpecRow {
335                group: "loc",
336                label: "Source",
337                name: "Source lines",
338                short: "SLOC",
339                description: "Source lines of code — lines with at least one non-whitespace, non-comment character. Blank and comment-only lines are not counted. In Rust, lines inside `#[cfg(test)]` / `#[test]` items are excluded too, so this counts production code only (unlike `loc`, the raw file line count).",
340                ..Default::default()
341            },
342        ),
343        (
344            "lloc",
345            SpecRow {
346                group: "loc",
347                label: "Logical",
348                name: "Logical lines",
349                short: "Logical",
350                description: "Logical lines — counts statements, not physical lines. In Rust, measured on production code only (inline `#[cfg(test)]` / `#[test]` tests are excluded, like `sloc`; their lines are `tloc`).",
351                ..Default::default()
352            },
353        ),
354        (
355            "cloc",
356            SpecRow {
357                group: "loc",
358                label: "Comments",
359                name: "Comment lines",
360                short: "Comments",
361                description: "Comment-only lines (inline comments on code lines are not counted). In Rust, measured on production code only (inline `#[cfg(test)]` / `#[test]` tests are excluded, like `sloc`; their lines are `tloc`).",
362                ..Default::default()
363            },
364        ),
365        (
366            "blank",
367            SpecRow {
368                group: "loc",
369                label: "Blank",
370                name: "Blank lines",
371                short: "Blank",
372                description: "Empty or whitespace-only lines. In Rust, measured on production code only (inline `#[cfg(test)]` / `#[test]` tests are excluded, like `sloc`; their lines are `tloc`).",
373                ..Default::default()
374            },
375        ),
376        (
377            "tloc",
378            SpecRow {
379                group: "loc",
380                label: "Test",
381                name: "Test lines",
382                short: "TLOC",
383                description: "Test lines of code — the lines inside `#[cfg(test)]` / `#[test]` / `#[bench]` items (Rust), removed before the production metrics are measured. The complement of `sloc`: test code never inflates a file's size, HK, or complexity.",
384                ..Default::default()
385            },
386        ),
387        (
388            "length",
389            SpecRow {
390                group: "halstead",
391                value_type: Float,
392                label: "Length",
393                name: "Halstead length",
394                short: "H.len",
395                description: "Program length — total operator + operand occurrences.",
396                formula: "N₁ + N₂",
397                direction: LowerBetter,
398                ..Default::default()
399            },
400        ),
401        (
402            "vocabulary",
403            SpecRow {
404                group: "halstead",
405                value_type: Float,
406                label: "Vocabulary",
407                name: "Halstead vocabulary",
408                short: "H.vocab",
409                description: "Vocabulary — distinct operators + operands.",
410                formula: "η₁ + η₂",
411                direction: LowerBetter,
412                ..Default::default()
413            },
414        ),
415        (
416            "volume",
417            SpecRow {
418                group: "halstead",
419                value_type: Float,
420                label: "Volume",
421                name: "Halstead volume",
422                short: "H.vol",
423                description: "Algorithm size in bits, from distinct operators and operands.",
424                formula: "length × log₂(vocabulary)",
425                calc: "length * Math.log2(vocabulary)",
426                direction: LowerBetter,
427                ..Default::default()
428            },
429        ),
430        (
431            "effort",
432            SpecRow {
433                group: "halstead",
434                value_type: Float,
435                label: "Effort",
436                name: "Halstead effort",
437                short: "H.effort",
438                description: "Mental effort to implement the algorithm.",
439                formula: "volume × difficulty",
440                direction: LowerBetter,
441                ..Default::default()
442            },
443        ),
444        (
445            "time",
446            SpecRow {
447                group: "halstead",
448                value_type: Float,
449                label: "Time",
450                name: "Halstead time, s",
451                short: "H.time(s)",
452                description: "Estimated implementation time, in seconds.",
453                formula: "effort ÷ 18",
454                calc: "effort / 18",
455                direction: LowerBetter,
456                ..Default::default()
457            },
458        ),
459        (
460            "bugs",
461            SpecRow {
462                group: "halstead",
463                value_type: Float,
464                label: "Bugs",
465                name: "Halstead bugs",
466                short: "H.bugs",
467                description: "Estimated delivered bugs — a rough predictor of defect density.",
468                formula: "effort^⅔ ÷ 3000",
469                calc: "effort ** (2/3) / 3000",
470                direction: LowerBetter,
471                ..Default::default()
472            },
473        ),
474    ]);
475    let mut groups = BTreeMap::new();
476    groups.insert(
477        "complexity".to_string(),
478        group("Complexity", "Code complexity metrics"),
479    );
480    groups.insert(
481        "halstead".to_string(),
482        group("Halstead", "Halstead software metrics"),
483    );
484    groups.insert(
485        "loc".to_string(),
486        group("Lines of Code", "Lines of code breakdown"),
487    );
488    groups.insert(
489        "maintainability".to_string(),
490        group("Maintainability", "Maintainability index"),
491    );
492    (specs, groups)
493}
494
495#[cfg(test)]
496mod tests {
497    use super::*;
498
499    fn strip(src: &str) -> String {
500        String::from_utf8(strip_cfg_test(src.as_bytes()).0).unwrap()
501    }
502
503    #[test]
504    fn strips_cfg_test_module_with_its_attribute() {
505        let out = strip(
506            "pub fn prod() -> i32 {\n    1\n}\n\n\
507             #[cfg(test)]\nmod tests {\n    use super::*;\n    #[test]\n    fn t() { assert_eq!(prod(), 1); }\n}\n",
508        );
509        assert!(out.contains("pub fn prod"), "production kept: {out}");
510        assert!(!out.contains("mod tests"), "test mod removed: {out}");
511        assert!(
512            !out.contains("#[cfg(test)]"),
513            "the cfg attr line removed too: {out}"
514        );
515        assert!(!out.contains("fn t()"), "test fn removed: {out}");
516    }
517
518    #[test]
519    fn strips_standalone_test_and_bench_fns() {
520        let out = strip("fn prod() {}\n#[test]\nfn it_works() {}\n#[bench]\nfn b(_: &mut ()) {}\n");
521        assert!(out.contains("fn prod"));
522        assert!(
523            !out.contains("it_works") && !out.contains("fn b("),
524            "test/bench fns removed: {out}"
525        );
526    }
527
528    #[test]
529    fn keeps_non_test_cfg_and_similarly_named_items() {
530        // `cfg(feature = "test")` is a string literal, not a `test` ident; a
531        // `mod tests_data` is not gated. Both stay.
532        let out = strip("#[cfg(feature = \"test\")]\npub mod gated {}\npub mod tests_data {}\n");
533        assert!(out.contains("pub mod gated"), "feature-cfg kept: {out}");
534        assert!(
535            out.contains("tests_data"),
536            "non-gated lookalike kept: {out}"
537        );
538    }
539
540    #[test]
541    fn strips_cfg_all_test_combinations() {
542        let out = strip("fn p() {}\n#[cfg(all(test, feature = \"x\"))]\nmod t {}\n");
543        assert!(out.contains("fn p"));
544        assert!(!out.contains("mod t"), "cfg(all(test,…)) removed: {out}");
545    }
546
547    #[test]
548    fn unchanged_without_tests_or_on_parse_error() {
549        let prod = "pub fn a() {}\n";
550        assert_eq!(
551            strip_cfg_test(prod.as_bytes()),
552            (prod.as_bytes().to_vec(), 0)
553        );
554        let broken = "@@@ not rust @@@";
555        assert_eq!(
556            strip_cfg_test(broken.as_bytes()),
557            (broken.as_bytes().to_vec(), 0)
558        );
559    }
560
561    #[test]
562    fn tloc_counts_the_whole_removed_test_region() {
563        // 4 lines removed: the #[cfg(test)] attr, `mod tests {`, the body line,
564        // and the closing `}`.
565        let src = "pub fn p() {}\n#[cfg(test)]\nmod tests {\n    fn t() {}\n}\n";
566        let (_prod, tloc) = strip_cfg_test(src.as_bytes());
567        assert_eq!(tloc, 4);
568    }
569}