code_ranker_complexity/
lib.rs

1//! Central, language-agnostic complexity pass. Given a structural graph whose
2//! file nodes carry their absolute path as `id`, this reads each file, picks a
3//! `rust-code-analysis` parser by extension, and writes the metrics into the
4//! node's `attrs` as flat keys. It is the single place that knows
5//! rust-code-analysis; plugins emit structure only.
6//!
7//! The metric attribute dictionary it can produce is exposed via
8//! [`metric_specs`] so the orchestrator can declare it in the snapshot.
9
10use code_ranker_graph::attrs::num_attr;
11use code_ranker_plugin_api::{
12    attrs::ValueType,
13    graph::Graph,
14    level::{AttributeGroup, AttributeSpec},
15};
16use rust_code_analysis::{
17    FuncSpace, JavascriptParser, ParserTrait, PythonParser, RustParser, TsxParser,
18    TypescriptParser, metrics,
19};
20use std::collections::BTreeMap;
21use std::path::Path;
22
23/// Annotate every file node (`kind == "file"`) whose `id` is a readable source
24/// file of a known extension with complexity metrics. Returns the number of
25/// nodes annotated. Nodes whose file cannot be read/parsed are left untouched.
26pub fn annotate(graph: &mut Graph) -> usize {
27    let mut annotated = 0usize;
28    for node in &mut graph.nodes {
29        if node.kind != "file" {
30            continue;
31        }
32        let path = Path::new(&node.id);
33        let Ok(src) = std::fs::read(path) else {
34            continue;
35        };
36        let Some((space, tloc)) = parse_metrics(path, src) else {
37            continue;
38        };
39        write_metrics(node, &space, tloc);
40        annotated += 1;
41    }
42    annotated
43}
44
45/// True if any attribute gates an item to tests: `#[test]`, `#[bench]`, or
46/// `#[cfg(test)]` / `#[cfg(all(test, …))]` / `#[cfg(any(test, …))]`. A `test`
47/// **identifier** inside `cfg(...)` is what matches — `cfg(feature = "test")`
48/// (a string literal) does not.
49fn is_test_attr(attr: &syn::Attribute) -> bool {
50    if attr.path().is_ident("test") || attr.path().is_ident("bench") {
51        return true;
52    }
53    if attr.path().is_ident("cfg")
54        && let syn::Meta::List(list) = &attr.meta
55    {
56        return tokens_have_test_ident(list.tokens.clone());
57    }
58    false
59}
60
61/// Recursively scan a token stream for a bare `test` identifier (descends into
62/// `all(...)` / `any(...)` groups).
63fn tokens_have_test_ident(ts: proc_macro2::TokenStream) -> bool {
64    ts.into_iter().any(|t| match t {
65        proc_macro2::TokenTree::Ident(i) => i == "test",
66        proc_macro2::TokenTree::Group(g) => tokens_have_test_ident(g.stream()),
67        _ => false,
68    })
69}
70
71/// Visitor collecting the 1-based, inclusive line ranges of test-only items
72/// (`#[cfg(test)]` modules, `#[test]`/`#[cfg(test)]` fns), attribute line
73/// included. It recurses into ordinary modules to catch nested test modules but
74/// not into a test item it already captured.
75#[derive(Default)]
76struct TestSpans {
77    ranges: Vec<(usize, usize)>,
78}
79
80impl TestSpans {
81    fn record(&mut self, attrs: &[syn::Attribute], span: proc_macro2::Span) {
82        use syn::spanned::Spanned;
83        let start = attrs
84            .iter()
85            .map(|a| a.span().start().line)
86            .chain(std::iter::once(span.start().line))
87            .min()
88            .unwrap_or(0);
89        self.ranges.push((start, span.end().line));
90    }
91}
92
93impl<'ast> syn::visit::Visit<'ast> for TestSpans {
94    fn visit_item_mod(&mut self, m: &'ast syn::ItemMod) {
95        use syn::spanned::Spanned;
96        if m.attrs.iter().any(is_test_attr) {
97            self.record(&m.attrs, m.span());
98        } else {
99            syn::visit::visit_item_mod(self, m);
100        }
101    }
102    fn visit_item_fn(&mut self, f: &'ast syn::ItemFn) {
103        use syn::spanned::Spanned;
104        if f.attrs.iter().any(is_test_attr) {
105            self.record(&f.attrs, f.span());
106        }
107    }
108}
109
110/// Step 1 of the Rust line accounting: remove `#[cfg(test)]` / `#[test]` /
111/// `#[bench]` items so the production metrics (`sloc` / `cloc` / `blank` / `hk` /
112/// complexity) are then measured on production code only. Returns the production
113/// source **and** `tloc` — the number of test lines removed (the whole test
114/// region: attribute, body, braces). Parse failures or no test items return the
115/// source unchanged with `tloc = 0`.
116fn strip_cfg_test(src: &[u8]) -> (Vec<u8>, usize) {
117    use syn::visit::Visit;
118    let Ok(text) = std::str::from_utf8(src) else {
119        return (src.to_vec(), 0);
120    };
121    let Ok(file) = syn::parse_file(text) else {
122        return (src.to_vec(), 0);
123    };
124    let mut spans = TestSpans::default();
125    spans.visit_file(&file);
126    if spans.ranges.is_empty() {
127        return (src.to_vec(), 0);
128    }
129    let drop: std::collections::HashSet<usize> =
130        spans.ranges.iter().flat_map(|&(s, e)| s..=e).collect();
131    let tloc = drop.len();
132    let mut out: String = text
133        .lines()
134        .enumerate()
135        .filter(|(i, _)| !drop.contains(&(i + 1)))
136        .map(|(_, l)| l)
137        .collect::<Vec<_>>()
138        .join("\n");
139    out.push('\n');
140    (out.into_bytes(), tloc)
141}
142
143/// Pick a parser by file extension and compute the file's production `FuncSpace`
144/// plus `tloc` — the number of **test** lines (`#[cfg(test)]` / `#[test]` /
145/// `#[bench]`) removed before measuring. Only Rust strips tests, so `tloc` is
146/// `0.0` for every other language. (Step 1 strips tests; step 2, in
147/// `write_metrics`, counts sloc/cloc/blank on the production remainder.)
148fn parse_metrics(path: &Path, src: Vec<u8>) -> Option<(FuncSpace, f64)> {
149    let ext = path.extension().and_then(|e| e.to_str())?;
150    match ext {
151        "rs" => {
152            let (prod_src, tloc) = strip_cfg_test(&src);
153            let prod = metrics(&RustParser::new(prod_src, path, None), path)?;
154            Some((prod, tloc as f64))
155        }
156        "py" => metrics(&PythonParser::new(src, path, None), path).map(|s| (s, 0.0)),
157        "ts" | "mts" | "cts" => {
158            metrics(&TypescriptParser::new(src, path, None), path).map(|s| (s, 0.0))
159        }
160        "tsx" => metrics(&TsxParser::new(src, path, None), path).map(|s| (s, 0.0)),
161        "js" | "jsx" | "mjs" | "cjs" => {
162            metrics(&JavascriptParser::new(src, path, None), path).map(|s| (s, 0.0))
163        }
164        _ => None,
165    }
166}
167
168/// Write the metric attributes for one file node. Each value is omitted when it
169/// rounds to zero; the LOC block is gated on `sloc > 0` and the Halstead block
170/// on `volume > 0` (matching the historical behavior).
171fn write_metrics(node: &mut code_ranker_plugin_api::node::Node, s: &FuncSpace, tloc: f64) {
172    let m = &s.metrics;
173    let mut put = |key: &str, v: f64| {
174        let a = num_attr(v);
175        if matches!(&a, code_ranker_plugin_api::attrs::AttrValue::Int(0))
176            || matches!(&a, code_ranker_plugin_api::attrs::AttrValue::Float(f) if *f == 0.0)
177        {
178            node.attrs.remove(key);
179        } else {
180            node.attrs.insert(key.to_string(), a);
181        }
182    };
183
184    put("cyclomatic", m.cyclomatic.cyclomatic());
185    put("cognitive", m.cognitive.cognitive());
186    put("exits", m.nexits.exit());
187    let args = if m.nargs.fn_args() > 0.0 {
188        m.nargs.fn_args()
189    } else {
190        m.nargs.closure_args()
191    };
192    put("args", args);
193    put("closures", m.nom.closures());
194
195    put("mi", m.mi.mi_original());
196    put("mi_sei", m.mi.mi_sei());
197
198    // `sloc` here means *physical lines of code* — lines with real code, excluding
199    // blanks and comment-only lines (see this key's spec). rust-code-analysis names
200    // that `ploc()`; its `sloc()` is the total line count (already exposed as `loc`).
201    //
202    // NOTE: for Rust these four — `sloc` (physical), `lloc` (logical), `cloc`
203    // (comments), `blank` — are all measured on the *production* source, i.e.
204    // AFTER `strip_cfg_test` removed `#[cfg(test)]` / `#[test]` / `#[bench]`
205    // items. So none of them count lines from inline tests; those go to `tloc`.
206    let sloc = m.loc.ploc();
207    if sloc > 0.0 {
208        put("sloc", sloc);
209        put("lloc", m.loc.lloc());
210        put("cloc", m.loc.cloc());
211        put("blank", m.loc.blank());
212    }
213    // Test source lines (`#[cfg(test)]`/`#[test]`/`#[bench]`), the complement of
214    // `sloc`. Zero (non-Rust, or no inline tests) is dropped by `put`.
215    put("tloc", tloc);
216
217    let volume = m.halstead.volume();
218    if volume > 0.0 {
219        put("length", m.halstead.length());
220        put(
221            "vocabulary",
222            m.halstead.u_operators() + m.halstead.u_operands(),
223        );
224        put("volume", volume);
225        put("effort", m.halstead.effort());
226        put("time", m.halstead.time());
227        put("bugs", m.halstead.bugs());
228    }
229}
230
231/// One metric row: (key, group, value_type, label, name, short, description,
232/// formula, calc, direction). Empty strings become `None`.
233type MetricRow = (
234    &'static str,
235    &'static str,
236    ValueType,
237    &'static str,
238    &'static str,
239    &'static str,
240    &'static str,
241    &'static str,
242    &'static str,
243    &'static str,
244);
245
246fn group(label: &str, description: &str) -> AttributeGroup {
247    AttributeGroup {
248        label: Some(label.to_string()),
249        description: Some(description.to_string()),
250    }
251}
252
253/// The complexity metric attribute dictionary and its groups, fully enriched
254/// (label/name/short/description/formula/calc/direction) so the UI hardcodes no
255/// metric. The orchestrator merges these into each level's `node_attributes` /
256/// `attribute_groups` (then prunes to keys actually present) and overlays
257/// language thresholds. Coupling/cycle specs live in `code-ranker-graph`.
258pub fn metric_specs() -> (
259    BTreeMap<String, AttributeSpec>,
260    BTreeMap<String, AttributeGroup>,
261) {
262    use ValueType::{Float, Int};
263    let opt = |s: &str| {
264        if s.is_empty() {
265            None
266        } else {
267            Some(s.to_string())
268        }
269    };
270    // (key, group, value_type, label, name, short, description, formula, calc, direction)
271    let rows: &[MetricRow] = &[
272        (
273            "cyclomatic",
274            "complexity",
275            Int,
276            "Cyclomatic",
277            "Cyclomatic complexity",
278            "Cyclomatic",
279            "Number of linearly independent paths through the code. Higher values indicate complex branching logic.",
280            "branches + 1",
281            "",
282            "lower_better",
283        ),
284        (
285            "cognitive",
286            "complexity",
287            Int,
288            "Cognitive",
289            "Cognitive complexity",
290            "Cognitive",
291            "Measures how difficult the code is to understand, accounting for nesting depth and non-structural control flow.",
292            "",
293            "",
294            "lower_better",
295        ),
296        (
297            "exits",
298            "complexity",
299            Int,
300            "Exits",
301            "Exit points",
302            "Exits",
303            "Number of exit points (return/throw) in the unit.",
304            "",
305            "",
306            "lower_better",
307        ),
308        (
309            "args",
310            "complexity",
311            Int,
312            "Args",
313            "Arguments",
314            "Args",
315            "Number of function / closure arguments.",
316            "",
317            "",
318            "lower_better",
319        ),
320        (
321            "closures",
322            "complexity",
323            Int,
324            "Closures",
325            "Closures",
326            "Closures",
327            "Number of closures defined in the unit.",
328            "",
329            "",
330            "lower_better",
331        ),
332        (
333            "mi",
334            "maintainability",
335            Float,
336            "MI",
337            "Maintainability index",
338            "MI",
339            "Maintainability Index (0–100, higher is more maintainable). Derived from Halstead volume, cyclomatic complexity, and SLOC.",
340            "171 − 5.2·ln(volume) − 0.23·cyclomatic − 16.2·ln(sloc)",
341            "",
342            "higher_better",
343        ),
344        (
345            "mi_sei",
346            "maintainability",
347            Float,
348            "MI (SEI)",
349            "Maintainability (SEI)",
350            "MI SEI",
351            "SEI variant of the Maintainability Index — adds a bonus for comment density.",
352            "MI + 50·sin(√(2.4 × comment-ratio))",
353            "",
354            "higher_better",
355        ),
356        (
357            "sloc",
358            "loc",
359            Int,
360            "Source",
361            "Source lines (sloc)",
362            "SLOC",
363            "Source lines of code — lines with at least one non-whitespace, non-comment character. Blank and comment-only lines are not counted. In Rust, lines inside `#[cfg(test)]` / `#[test]` items are excluded too, so this counts production code only (unlike `loc`, the raw file line count).",
364            "",
365            "",
366            "",
367        ),
368        (
369            "lloc",
370            "loc",
371            Int,
372            "Logical",
373            "Logical LOC",
374            "Logical",
375            "Logical lines — counts statements, not physical lines. In Rust, measured on production code only (inline `#[cfg(test)]` / `#[test]` tests are excluded, like `sloc`; their lines are `tloc`).",
376            "",
377            "",
378            "",
379        ),
380        (
381            "cloc",
382            "loc",
383            Int,
384            "Comments",
385            "Comment lines",
386            "Comments",
387            "Comment-only lines (inline comments on code lines are not counted). In Rust, measured on production code only (inline `#[cfg(test)]` / `#[test]` tests are excluded, like `sloc`; their lines are `tloc`).",
388            "",
389            "",
390            "",
391        ),
392        (
393            "blank",
394            "loc",
395            Int,
396            "Blank",
397            "Blank lines",
398            "Blank",
399            "Empty or whitespace-only lines. In Rust, measured on production code only (inline `#[cfg(test)]` / `#[test]` tests are excluded, like `sloc`; their lines are `tloc`).",
400            "",
401            "",
402            "",
403        ),
404        (
405            "tloc",
406            "loc",
407            Int,
408            "Test",
409            "Test lines (tloc)",
410            "TLOC",
411            "Test lines of code — the lines inside `#[cfg(test)]` / `#[test]` / `#[bench]` items (Rust), removed before the production metrics are measured. The complement of `sloc`: test code never inflates a file's size, HK, or complexity.",
412            "",
413            "",
414            "",
415        ),
416        (
417            "length",
418            "halstead",
419            Float,
420            "Length",
421            "Halstead length",
422            "H.len",
423            "Program length — total operator + operand occurrences.",
424            "N₁ + N₂",
425            "",
426            "lower_better",
427        ),
428        (
429            "vocabulary",
430            "halstead",
431            Float,
432            "Vocabulary",
433            "Halstead vocabulary",
434            "H.vocab",
435            "Vocabulary — distinct operators + operands.",
436            "η₁ + η₂",
437            "",
438            "lower_better",
439        ),
440        (
441            "volume",
442            "halstead",
443            Float,
444            "Volume",
445            "Halstead volume",
446            "H.vol",
447            "Algorithm size in bits, from distinct operators and operands.",
448            "length × log₂(vocabulary)",
449            "length * Math.log2(vocabulary)",
450            "lower_better",
451        ),
452        (
453            "effort",
454            "halstead",
455            Float,
456            "Effort",
457            "Halstead effort",
458            "H.effort",
459            "Mental effort to implement the algorithm.",
460            "volume × difficulty",
461            "",
462            "lower_better",
463        ),
464        (
465            "time",
466            "halstead",
467            Float,
468            "Time",
469            "Halstead time, s",
470            "H.time(s)",
471            "Estimated implementation time, in seconds.",
472            "effort ÷ 18",
473            "effort / 18",
474            "lower_better",
475        ),
476        (
477            "bugs",
478            "halstead",
479            Float,
480            "Bugs",
481            "Halstead bugs",
482            "H.bugs",
483            "Estimated delivered bugs — a rough predictor of defect density.",
484            "effort^⅔ ÷ 3000",
485            "effort ** (2/3) / 3000",
486            "lower_better",
487        ),
488    ];
489    let mut specs = BTreeMap::new();
490    for (k, g, vt, label, name, short, desc, formula, calc, dir) in rows {
491        let mut s = AttributeSpec::new(*vt, label);
492        s.group = opt(g);
493        s.name = opt(name);
494        s.short = opt(short);
495        s.description = opt(desc);
496        s.formula = opt(formula);
497        s.calc = opt(calc);
498        s.direction = opt(dir);
499        specs.insert((*k).to_string(), s);
500    }
501    let mut groups = BTreeMap::new();
502    groups.insert(
503        "complexity".to_string(),
504        group("Complexity", "Code complexity metrics"),
505    );
506    groups.insert(
507        "halstead".to_string(),
508        group("Halstead", "Halstead software metrics"),
509    );
510    groups.insert(
511        "loc".to_string(),
512        group("Lines of Code", "Lines of code breakdown"),
513    );
514    groups.insert(
515        "maintainability".to_string(),
516        group("Maintainability", "Maintainability index"),
517    );
518    (specs, groups)
519}
520
521#[cfg(test)]
522mod tests {
523    use super::*;
524
525    fn strip(src: &str) -> String {
526        String::from_utf8(strip_cfg_test(src.as_bytes()).0).unwrap()
527    }
528
529    #[test]
530    fn strips_cfg_test_module_with_its_attribute() {
531        let out = strip(
532            "pub fn prod() -> i32 {\n    1\n}\n\n\
533             #[cfg(test)]\nmod tests {\n    use super::*;\n    #[test]\n    fn t() { assert_eq!(prod(), 1); }\n}\n",
534        );
535        assert!(out.contains("pub fn prod"), "production kept: {out}");
536        assert!(!out.contains("mod tests"), "test mod removed: {out}");
537        assert!(
538            !out.contains("#[cfg(test)]"),
539            "the cfg attr line removed too: {out}"
540        );
541        assert!(!out.contains("fn t()"), "test fn removed: {out}");
542    }
543
544    #[test]
545    fn strips_standalone_test_and_bench_fns() {
546        let out = strip("fn prod() {}\n#[test]\nfn it_works() {}\n#[bench]\nfn b(_: &mut ()) {}\n");
547        assert!(out.contains("fn prod"));
548        assert!(
549            !out.contains("it_works") && !out.contains("fn b("),
550            "test/bench fns removed: {out}"
551        );
552    }
553
554    #[test]
555    fn keeps_non_test_cfg_and_similarly_named_items() {
556        // `cfg(feature = "test")` is a string literal, not a `test` ident; a
557        // `mod tests_data` is not gated. Both stay.
558        let out = strip("#[cfg(feature = \"test\")]\npub mod gated {}\npub mod tests_data {}\n");
559        assert!(out.contains("pub mod gated"), "feature-cfg kept: {out}");
560        assert!(
561            out.contains("tests_data"),
562            "non-gated lookalike kept: {out}"
563        );
564    }
565
566    #[test]
567    fn strips_cfg_all_test_combinations() {
568        let out = strip("fn p() {}\n#[cfg(all(test, feature = \"x\"))]\nmod t {}\n");
569        assert!(out.contains("fn p"));
570        assert!(!out.contains("mod t"), "cfg(all(test,…)) removed: {out}");
571    }
572
573    #[test]
574    fn unchanged_without_tests_or_on_parse_error() {
575        let prod = "pub fn a() {}\n";
576        assert_eq!(
577            strip_cfg_test(prod.as_bytes()),
578            (prod.as_bytes().to_vec(), 0)
579        );
580        let broken = "@@@ not rust @@@";
581        assert_eq!(
582            strip_cfg_test(broken.as_bytes()),
583            (broken.as_bytes().to_vec(), 0)
584        );
585    }
586
587    #[test]
588    fn tloc_counts_the_whole_removed_test_region() {
589        // 4 lines removed: the #[cfg(test)] attr, `mod tests {`, the body line,
590        // and the closing `}`.
591        let src = "pub fn p() {}\n#[cfg(test)]\nmod tests {\n    fn t() {}\n}\n";
592        let (_prod, tloc) = strip_cfg_test(src.as_bytes());
593        assert_eq!(tloc, 4);
594    }
595}
code_ranker_complexity/lib.rs

code_ranker_complexity/
lib.rs